js-stream-sas7bdat 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +58 -0
- package/package.json +4 -2
- package/src/binding/ReadStat/LICENSE +19 -0
- package/src/binding/ReadStat/README.md +483 -0
- package/src/binding/ReadStat/src/CKHashTable.c +309 -0
- package/src/binding/ReadStat/src/CKHashTable.h +37 -0
- package/src/binding/ReadStat/src/readstat.h +627 -0
- package/src/binding/ReadStat/src/readstat_bits.c +69 -0
- package/src/binding/ReadStat/src/readstat_bits.h +20 -0
- package/src/binding/ReadStat/src/readstat_convert.c +36 -0
- package/src/binding/ReadStat/src/readstat_convert.h +2 -0
- package/src/binding/ReadStat/src/readstat_error.c +126 -0
- package/src/binding/ReadStat/src/readstat_iconv.h +15 -0
- package/src/binding/ReadStat/src/readstat_io_unistd.c +147 -0
- package/src/binding/ReadStat/src/readstat_io_unistd.h +11 -0
- package/src/binding/ReadStat/src/readstat_malloc.c +34 -0
- package/src/binding/ReadStat/src/readstat_malloc.h +4 -0
- package/src/binding/ReadStat/src/readstat_metadata.c +53 -0
- package/src/binding/ReadStat/src/readstat_parser.c +121 -0
- package/src/binding/ReadStat/src/readstat_strings.h +6 -0
- package/src/binding/ReadStat/src/readstat_value.c +178 -0
- package/src/binding/ReadStat/src/readstat_variable.c +123 -0
- package/src/binding/ReadStat/src/readstat_writer.c +677 -0
- package/src/binding/ReadStat/src/readstat_writer.h +21 -0
- package/src/binding/ReadStat/src/sas/ieee.c +420 -0
- package/src/binding/ReadStat/src/sas/ieee.h +6 -0
- package/src/binding/ReadStat/src/sas/readstat_sas.c +528 -0
- package/src/binding/ReadStat/src/sas/readstat_sas.h +131 -0
- package/src/binding/ReadStat/src/sas/readstat_sas7bcat_read.c +515 -0
- package/src/binding/ReadStat/src/sas/readstat_sas7bcat_write.c +218 -0
- package/src/binding/ReadStat/src/sas/readstat_sas7bdat_read.c +1304 -0
- package/src/binding/ReadStat/src/sas/readstat_sas7bdat_write.c +812 -0
- package/src/binding/ReadStat/src/sas/readstat_sas_rle.c +286 -0
- package/src/binding/ReadStat/src/sas/readstat_sas_rle.h +8 -0
- package/src/binding/ReadStat/src/sas/readstat_xport.c +28 -0
- package/src/binding/ReadStat/src/sas/readstat_xport.h +47 -0
- package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.c +265 -0
- package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.h +4 -0
- package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.rl +68 -0
- package/src/binding/ReadStat/src/sas/readstat_xport_read.c +777 -0
- package/src/binding/ReadStat/src/sas/readstat_xport_write.c +561 -0
- package/src/binding/readstat_binding.cc +393 -0
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <stdlib.h>
|
|
3
|
+
#include <errno.h>
|
|
4
|
+
#include <string.h>
|
|
5
|
+
#include <math.h>
|
|
6
|
+
#include "readstat_sas.h"
|
|
7
|
+
#include "../readstat_iconv.h"
|
|
8
|
+
#include "../readstat_convert.h"
|
|
9
|
+
#include "../readstat_malloc.h"
|
|
10
|
+
|
|
11
|
+
#define SAS_CATALOG_FIRST_INDEX_PAGE 1
|
|
12
|
+
#define SAS_CATALOG_USELESS_PAGES 3
|
|
13
|
+
|
|
14
|
+
typedef struct sas7bcat_ctx_s {
|
|
15
|
+
readstat_metadata_handler metadata_handler;
|
|
16
|
+
readstat_value_label_handler value_label_handler;
|
|
17
|
+
void *user_ctx;
|
|
18
|
+
readstat_io_t *io;
|
|
19
|
+
int u64;
|
|
20
|
+
int pad1;
|
|
21
|
+
int bswap;
|
|
22
|
+
int64_t xlsr_size;
|
|
23
|
+
int64_t xlsr_offset;
|
|
24
|
+
int64_t xlsr_O_offset;
|
|
25
|
+
int64_t page_count;
|
|
26
|
+
int64_t page_size;
|
|
27
|
+
int64_t header_size;
|
|
28
|
+
uint64_t *block_pointers;
|
|
29
|
+
int block_pointers_used;
|
|
30
|
+
int block_pointers_capacity;
|
|
31
|
+
const char *input_encoding;
|
|
32
|
+
const char *output_encoding;
|
|
33
|
+
iconv_t converter;
|
|
34
|
+
} sas7bcat_ctx_t;
|
|
35
|
+
|
|
36
|
+
static void sas7bcat_ctx_free(sas7bcat_ctx_t *ctx) {
|
|
37
|
+
if (ctx->converter)
|
|
38
|
+
iconv_close(ctx->converter);
|
|
39
|
+
if (ctx->block_pointers)
|
|
40
|
+
free(ctx->block_pointers);
|
|
41
|
+
|
|
42
|
+
free(ctx);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len,
|
|
46
|
+
int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) {
|
|
47
|
+
readstat_error_t retval = READSTAT_OK;
|
|
48
|
+
int i;
|
|
49
|
+
const char *lbp1 = value_start;
|
|
50
|
+
uint32_t *value_offset = readstat_calloc(label_count_used, sizeof(uint32_t));
|
|
51
|
+
/* Doubles appear to be stored as big-endian, always */
|
|
52
|
+
int bswap_doubles = machine_is_little_endian();
|
|
53
|
+
int is_string = (name[0] == '$');
|
|
54
|
+
char *label = NULL;
|
|
55
|
+
|
|
56
|
+
if (value_offset == NULL) {
|
|
57
|
+
retval = READSTAT_ERROR_MALLOC;
|
|
58
|
+
goto cleanup;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/* Pass 1 -- find out the offset of the labels */
|
|
62
|
+
for (i=0; i<label_count_capacity; i++) {
|
|
63
|
+
if (&lbp1[4] - value_start > value_labels_len || sas_read2(&lbp1[2], ctx->bswap) < 0) {
|
|
64
|
+
retval = READSTAT_ERROR_PARSE;
|
|
65
|
+
goto cleanup;
|
|
66
|
+
}
|
|
67
|
+
if (i<label_count_used) {
|
|
68
|
+
if (&lbp1[10+ctx->pad1+4] - value_start > value_labels_len) {
|
|
69
|
+
retval = READSTAT_ERROR_PARSE;
|
|
70
|
+
goto cleanup;
|
|
71
|
+
}
|
|
72
|
+
uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap);
|
|
73
|
+
if (label_pos >= label_count_used) {
|
|
74
|
+
retval = READSTAT_ERROR_PARSE;
|
|
75
|
+
goto cleanup;
|
|
76
|
+
}
|
|
77
|
+
value_offset[label_pos] = lbp1 - value_start;
|
|
78
|
+
}
|
|
79
|
+
lbp1 += 6 + sas_read2(&lbp1[2], ctx->bswap);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const char *lbp2 = lbp1;
|
|
83
|
+
|
|
84
|
+
/* Pass 2 -- parse pairs of values & labels */
|
|
85
|
+
for (i=0; i<label_count_used && i<label_count_capacity; i++) {
|
|
86
|
+
lbp1 = value_start + value_offset[i];
|
|
87
|
+
|
|
88
|
+
if (&lbp1[30] - value_start > value_labels_len ||
|
|
89
|
+
&lbp2[10] - value_start > value_labels_len) {
|
|
90
|
+
retval = READSTAT_ERROR_PARSE;
|
|
91
|
+
goto cleanup;
|
|
92
|
+
}
|
|
93
|
+
readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE };
|
|
94
|
+
char string_val[4*16+1];
|
|
95
|
+
if (is_string) {
|
|
96
|
+
size_t value_entry_len = 6 + sas_read2(&lbp1[2], ctx->bswap);
|
|
97
|
+
retval = readstat_convert(string_val, sizeof(string_val),
|
|
98
|
+
&lbp1[value_entry_len-16], 16, ctx->converter);
|
|
99
|
+
if (retval != READSTAT_OK)
|
|
100
|
+
goto cleanup;
|
|
101
|
+
|
|
102
|
+
value.v.string_value = string_val;
|
|
103
|
+
} else {
|
|
104
|
+
uint64_t val = sas_read8(&lbp1[22], bswap_doubles);
|
|
105
|
+
double dval = NAN;
|
|
106
|
+
if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) {
|
|
107
|
+
sas_assign_tag(&value, (val >> 40));
|
|
108
|
+
} else {
|
|
109
|
+
memcpy(&dval, &val, 8);
|
|
110
|
+
dval *= -1.0;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
value.v.double_value = dval;
|
|
114
|
+
}
|
|
115
|
+
size_t label_len = sas_read2(&lbp2[8], ctx->bswap);
|
|
116
|
+
if (&lbp2[10] + label_len - value_start > value_labels_len) {
|
|
117
|
+
retval = READSTAT_ERROR_PARSE;
|
|
118
|
+
goto cleanup;
|
|
119
|
+
}
|
|
120
|
+
if (ctx->value_label_handler) {
|
|
121
|
+
label = realloc(label, 4 * label_len + 1);
|
|
122
|
+
retval = readstat_convert(label, 4 * label_len + 1,
|
|
123
|
+
&lbp2[10], label_len, ctx->converter);
|
|
124
|
+
if (retval != READSTAT_OK)
|
|
125
|
+
goto cleanup;
|
|
126
|
+
|
|
127
|
+
if (ctx->value_label_handler(name, value, label, ctx->user_ctx) != READSTAT_HANDLER_OK) {
|
|
128
|
+
retval = READSTAT_ERROR_USER_ABORT;
|
|
129
|
+
goto cleanup;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
lbp2 += 8 + 2 + label_len + 1;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
cleanup:
|
|
137
|
+
free(label);
|
|
138
|
+
free(value_offset);
|
|
139
|
+
return retval;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, sas7bcat_ctx_t *ctx) {
|
|
143
|
+
readstat_error_t retval = READSTAT_OK;
|
|
144
|
+
|
|
145
|
+
size_t pad = 0;
|
|
146
|
+
uint64_t label_count_capacity = 0;
|
|
147
|
+
uint64_t label_count_used = 0;
|
|
148
|
+
int payload_offset = 106;
|
|
149
|
+
uint16_t flags = 0;
|
|
150
|
+
char name[4*32+1];
|
|
151
|
+
|
|
152
|
+
if (data_size < payload_offset)
|
|
153
|
+
goto cleanup;
|
|
154
|
+
|
|
155
|
+
flags = sas_read2(&data[2], ctx->bswap);
|
|
156
|
+
pad = (flags & 0x08) ? 4 : 0; // might be 0x10, not sure
|
|
157
|
+
if (ctx->u64) {
|
|
158
|
+
label_count_capacity = sas_read8(&data[42+pad], ctx->bswap);
|
|
159
|
+
label_count_used = sas_read8(&data[50+pad], ctx->bswap);
|
|
160
|
+
|
|
161
|
+
payload_offset += 32;
|
|
162
|
+
} else {
|
|
163
|
+
label_count_capacity = sas_read4(&data[38+pad], ctx->bswap);
|
|
164
|
+
label_count_used = sas_read4(&data[42+pad], ctx->bswap);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if ((retval = readstat_convert(name, sizeof(name), &data[8], 8, ctx->converter)) != READSTAT_OK)
|
|
168
|
+
goto cleanup;
|
|
169
|
+
|
|
170
|
+
if (pad) {
|
|
171
|
+
pad += 16;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (((flags & 0x80) && !ctx->u64) || ((flags & 0x20) && ctx->u64)) { // has long name
|
|
175
|
+
if (data_size < payload_offset + pad + 32)
|
|
176
|
+
goto cleanup;
|
|
177
|
+
|
|
178
|
+
retval = readstat_convert(name, sizeof(name), &data[payload_offset+pad], 32, ctx->converter);
|
|
179
|
+
if (retval != READSTAT_OK)
|
|
180
|
+
goto cleanup;
|
|
181
|
+
pad += 32;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (data_size < payload_offset + pad)
|
|
185
|
+
goto cleanup;
|
|
186
|
+
|
|
187
|
+
if (label_count_used == 0)
|
|
188
|
+
goto cleanup;
|
|
189
|
+
|
|
190
|
+
if ((retval = sas7bcat_parse_value_labels(&data[payload_offset+pad], data_size - payload_offset - pad,
|
|
191
|
+
label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK)
|
|
192
|
+
goto cleanup;
|
|
193
|
+
|
|
194
|
+
cleanup:
|
|
195
|
+
return retval;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static readstat_error_t sas7bcat_augment_index(const char *index, size_t len, sas7bcat_ctx_t *ctx) {
|
|
199
|
+
const char *xlsr = index;
|
|
200
|
+
readstat_error_t retval = READSTAT_OK;
|
|
201
|
+
while (xlsr + ctx->xlsr_size <= index + len) {
|
|
202
|
+
if (memcmp(xlsr, "XLSR", 4) != 0) // some block pointers seem to have 8 bytes of extra padding
|
|
203
|
+
xlsr += 8;
|
|
204
|
+
if (memcmp(xlsr, "XLSR", 4) != 0)
|
|
205
|
+
break;
|
|
206
|
+
|
|
207
|
+
if (xlsr[ctx->xlsr_O_offset] == 'O') {
|
|
208
|
+
uint64_t page = 0, pos = 0;
|
|
209
|
+
if (ctx->u64) {
|
|
210
|
+
page = sas_read8(&xlsr[8], ctx->bswap);
|
|
211
|
+
pos = sas_read2(&xlsr[16], ctx->bswap);
|
|
212
|
+
} else {
|
|
213
|
+
page = sas_read4(&xlsr[4], ctx->bswap);
|
|
214
|
+
pos = sas_read2(&xlsr[8], ctx->bswap);
|
|
215
|
+
}
|
|
216
|
+
ctx->block_pointers[ctx->block_pointers_used++] = (page << 32) + pos;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if (ctx->block_pointers_used == ctx->block_pointers_capacity) {
|
|
220
|
+
ctx->block_pointers = readstat_realloc(ctx->block_pointers, (ctx->block_pointers_capacity *= 2) * sizeof(uint64_t));
|
|
221
|
+
if (ctx->block_pointers == NULL) {
|
|
222
|
+
retval = READSTAT_ERROR_MALLOC;
|
|
223
|
+
goto cleanup;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
xlsr += ctx->xlsr_size;
|
|
228
|
+
}
|
|
229
|
+
cleanup:
|
|
230
|
+
return retval;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
static int compare_block_pointers(const void *elem1, const void *elem2) {
|
|
234
|
+
uint64_t v1 = *(const uint64_t *)elem1;
|
|
235
|
+
uint64_t v2 = *(const uint64_t *)elem2;
|
|
236
|
+
return v1 - v2;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
static void sas7bcat_sort_index(sas7bcat_ctx_t *ctx) {
|
|
240
|
+
if (ctx->block_pointers_used == 0)
|
|
241
|
+
return;
|
|
242
|
+
|
|
243
|
+
int i;
|
|
244
|
+
for (i=1; i<ctx->block_pointers_used; i++) {
|
|
245
|
+
if (ctx->block_pointers[i] < ctx->block_pointers[i-1]) {
|
|
246
|
+
qsort(ctx->block_pointers, ctx->block_pointers_used, sizeof(uint64_t), &compare_block_pointers);
|
|
247
|
+
break;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
static void sas7bcat_uniq_index(sas7bcat_ctx_t *ctx) {
|
|
253
|
+
if (ctx->block_pointers_used == 0)
|
|
254
|
+
return;
|
|
255
|
+
|
|
256
|
+
int i;
|
|
257
|
+
int out_i = 1;
|
|
258
|
+
for (i=1; i<ctx->block_pointers_used; i++) {
|
|
259
|
+
if (ctx->block_pointers[i] != ctx->block_pointers[i-1]) {
|
|
260
|
+
if (out_i != i) {
|
|
261
|
+
ctx->block_pointers[out_i] = ctx->block_pointers[i];
|
|
262
|
+
}
|
|
263
|
+
out_i++;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
ctx->block_pointers_used = out_i;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
static int sas7bcat_block_size(int start_page, int start_page_pos, sas7bcat_ctx_t *ctx, readstat_error_t *outError) {
|
|
270
|
+
readstat_error_t retval = READSTAT_OK;
|
|
271
|
+
readstat_io_t *io = ctx->io;
|
|
272
|
+
int next_page = start_page;
|
|
273
|
+
int next_page_pos = start_page_pos;
|
|
274
|
+
int link_count = 0;
|
|
275
|
+
|
|
276
|
+
int buffer_len = 0;
|
|
277
|
+
int chain_link_len = 0;
|
|
278
|
+
|
|
279
|
+
char chain_link[32];
|
|
280
|
+
int chain_link_header_len = 16;
|
|
281
|
+
if (ctx->u64) {
|
|
282
|
+
chain_link_header_len = 32;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// calculate buffer size needed
|
|
286
|
+
while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count && link_count++ < ctx->page_count) {
|
|
287
|
+
if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) {
|
|
288
|
+
retval = READSTAT_ERROR_SEEK;
|
|
289
|
+
goto cleanup;
|
|
290
|
+
}
|
|
291
|
+
if (io->read(chain_link, chain_link_header_len, io->io_ctx) < chain_link_header_len) {
|
|
292
|
+
retval = READSTAT_ERROR_READ;
|
|
293
|
+
goto cleanup;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (ctx->u64) {
|
|
297
|
+
next_page = sas_read4(&chain_link[0], ctx->bswap);
|
|
298
|
+
next_page_pos = sas_read2(&chain_link[8], ctx->bswap);
|
|
299
|
+
chain_link_len = sas_read2(&chain_link[10], ctx->bswap);
|
|
300
|
+
} else {
|
|
301
|
+
next_page = sas_read4(&chain_link[0], ctx->bswap);
|
|
302
|
+
next_page_pos = sas_read2(&chain_link[4], ctx->bswap);
|
|
303
|
+
chain_link_len = sas_read2(&chain_link[6], ctx->bswap);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
buffer_len += chain_link_len;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
cleanup:
|
|
310
|
+
if (outError)
|
|
311
|
+
*outError = retval;
|
|
312
|
+
|
|
313
|
+
return retval == READSTAT_OK ? buffer_len : -1;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
static readstat_error_t sas7bcat_read_block(char *buffer, size_t buffer_len,
|
|
317
|
+
int start_page, int start_page_pos, sas7bcat_ctx_t *ctx) {
|
|
318
|
+
readstat_error_t retval = READSTAT_OK;
|
|
319
|
+
readstat_io_t *io = ctx->io;
|
|
320
|
+
int next_page = start_page;
|
|
321
|
+
int next_page_pos = start_page_pos;
|
|
322
|
+
int link_count = 0;
|
|
323
|
+
|
|
324
|
+
int chain_link_len = 0;
|
|
325
|
+
int buffer_offset = 0;
|
|
326
|
+
|
|
327
|
+
char chain_link[32];
|
|
328
|
+
int chain_link_header_len = 16;
|
|
329
|
+
if (ctx->u64) {
|
|
330
|
+
chain_link_header_len = 32;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count && link_count++ < ctx->page_count) {
|
|
334
|
+
if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) {
|
|
335
|
+
retval = READSTAT_ERROR_SEEK;
|
|
336
|
+
goto cleanup;
|
|
337
|
+
}
|
|
338
|
+
if (io->read(chain_link, chain_link_header_len, io->io_ctx) < chain_link_header_len) {
|
|
339
|
+
retval = READSTAT_ERROR_READ;
|
|
340
|
+
goto cleanup;
|
|
341
|
+
}
|
|
342
|
+
if (ctx->u64) {
|
|
343
|
+
next_page = sas_read4(&chain_link[0], ctx->bswap);
|
|
344
|
+
next_page_pos = sas_read2(&chain_link[8], ctx->bswap);
|
|
345
|
+
chain_link_len = sas_read2(&chain_link[10], ctx->bswap);
|
|
346
|
+
} else {
|
|
347
|
+
next_page = sas_read4(&chain_link[0], ctx->bswap);
|
|
348
|
+
next_page_pos = sas_read2(&chain_link[4], ctx->bswap);
|
|
349
|
+
chain_link_len = sas_read2(&chain_link[6], ctx->bswap);
|
|
350
|
+
}
|
|
351
|
+
if (buffer_offset + chain_link_len > buffer_len) {
|
|
352
|
+
retval = READSTAT_ERROR_PARSE;
|
|
353
|
+
goto cleanup;
|
|
354
|
+
}
|
|
355
|
+
if (io->read(buffer + buffer_offset, chain_link_len, io->io_ctx) < chain_link_len) {
|
|
356
|
+
retval = READSTAT_ERROR_READ;
|
|
357
|
+
goto cleanup;
|
|
358
|
+
}
|
|
359
|
+
buffer_offset += chain_link_len;
|
|
360
|
+
}
|
|
361
|
+
cleanup:
|
|
362
|
+
|
|
363
|
+
return retval;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *path, void *user_ctx) {
|
|
367
|
+
readstat_error_t retval = READSTAT_OK;
|
|
368
|
+
readstat_io_t *io = parser->io;
|
|
369
|
+
int64_t i;
|
|
370
|
+
char *page = NULL;
|
|
371
|
+
char *buffer = NULL;
|
|
372
|
+
|
|
373
|
+
sas7bcat_ctx_t *ctx = calloc(1, sizeof(sas7bcat_ctx_t));
|
|
374
|
+
sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t));
|
|
375
|
+
|
|
376
|
+
ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t));
|
|
377
|
+
|
|
378
|
+
ctx->value_label_handler = parser->handlers.value_label;
|
|
379
|
+
ctx->metadata_handler = parser->handlers.metadata;
|
|
380
|
+
ctx->input_encoding = parser->input_encoding;
|
|
381
|
+
ctx->output_encoding = parser->output_encoding;
|
|
382
|
+
ctx->user_ctx = user_ctx;
|
|
383
|
+
ctx->io = io;
|
|
384
|
+
|
|
385
|
+
if (io->open(path, io->io_ctx) == -1) {
|
|
386
|
+
retval = READSTAT_ERROR_OPEN;
|
|
387
|
+
goto cleanup;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
if ((retval = sas_read_header(io, hinfo, parser->handlers.error, user_ctx)) != READSTAT_OK) {
|
|
391
|
+
goto cleanup;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
ctx->u64 = hinfo->u64;
|
|
395
|
+
ctx->pad1 = hinfo->pad1;
|
|
396
|
+
ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian;
|
|
397
|
+
ctx->header_size = hinfo->header_size;
|
|
398
|
+
ctx->page_count = hinfo->page_count;
|
|
399
|
+
ctx->page_size = hinfo->page_size;
|
|
400
|
+
if (ctx->input_encoding == NULL) {
|
|
401
|
+
ctx->input_encoding = hinfo->encoding;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
ctx->xlsr_size = 212 + ctx->pad1;
|
|
405
|
+
ctx->xlsr_offset = 856 + 2 * ctx->pad1;
|
|
406
|
+
ctx->xlsr_O_offset = 50 + ctx->pad1;
|
|
407
|
+
if (ctx->u64) {
|
|
408
|
+
ctx->xlsr_offset += 144;
|
|
409
|
+
ctx->xlsr_size += 72;
|
|
410
|
+
ctx->xlsr_O_offset += 24;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) {
|
|
414
|
+
iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding);
|
|
415
|
+
if (converter == (iconv_t)-1) {
|
|
416
|
+
retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
|
|
417
|
+
goto cleanup;
|
|
418
|
+
}
|
|
419
|
+
ctx->converter = converter;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
if (ctx->metadata_handler) {
|
|
423
|
+
char table_name[4*32+1];
|
|
424
|
+
readstat_metadata_t metadata = {
|
|
425
|
+
.file_encoding = ctx->input_encoding, /* orig encoding? */
|
|
426
|
+
.modified_time = hinfo->modification_time,
|
|
427
|
+
.creation_time = hinfo->creation_time,
|
|
428
|
+
.file_format_version = hinfo->major_version,
|
|
429
|
+
.endianness = hinfo->little_endian ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG,
|
|
430
|
+
.is64bit = ctx->u64
|
|
431
|
+
};
|
|
432
|
+
retval = readstat_convert(table_name, sizeof(table_name),
|
|
433
|
+
hinfo->table_name, sizeof(hinfo->table_name), ctx->converter);
|
|
434
|
+
if (retval != READSTAT_OK)
|
|
435
|
+
goto cleanup;
|
|
436
|
+
|
|
437
|
+
metadata.table_name = table_name;
|
|
438
|
+
|
|
439
|
+
if (ctx->metadata_handler(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) {
|
|
440
|
+
retval = READSTAT_ERROR_USER_ABORT;
|
|
441
|
+
goto cleanup;
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
if ((page = readstat_malloc(ctx->page_size)) == NULL) {
|
|
446
|
+
retval = READSTAT_ERROR_MALLOC;
|
|
447
|
+
goto cleanup;
|
|
448
|
+
}
|
|
449
|
+
if (io->seek(ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
|
|
450
|
+
retval = READSTAT_ERROR_SEEK;
|
|
451
|
+
goto cleanup;
|
|
452
|
+
}
|
|
453
|
+
if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) {
|
|
454
|
+
retval = READSTAT_ERROR_READ;
|
|
455
|
+
goto cleanup;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
retval = sas7bcat_augment_index(&page[ctx->xlsr_offset], ctx->page_size - ctx->xlsr_offset, ctx);
|
|
459
|
+
if (retval != READSTAT_OK)
|
|
460
|
+
goto cleanup;
|
|
461
|
+
|
|
462
|
+
// Pass 1 -- find the XLSR entries
|
|
463
|
+
for (i=SAS_CATALOG_USELESS_PAGES; i<ctx->page_count; i++) {
|
|
464
|
+
if (io->seek(ctx->header_size+i*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
|
|
465
|
+
retval = READSTAT_ERROR_SEEK;
|
|
466
|
+
goto cleanup;
|
|
467
|
+
}
|
|
468
|
+
if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) {
|
|
469
|
+
retval = READSTAT_ERROR_READ;
|
|
470
|
+
goto cleanup;
|
|
471
|
+
}
|
|
472
|
+
if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) {
|
|
473
|
+
retval = sas7bcat_augment_index(&page[16], ctx->page_size - 16, ctx);
|
|
474
|
+
if (retval != READSTAT_OK)
|
|
475
|
+
goto cleanup;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
sas7bcat_sort_index(ctx);
|
|
480
|
+
sas7bcat_uniq_index(ctx);
|
|
481
|
+
|
|
482
|
+
// Pass 2 -- look up the individual block pointers
|
|
483
|
+
for (i=0; i<ctx->block_pointers_used; i++) {
|
|
484
|
+
int start_page = ctx->block_pointers[i] >> 32;
|
|
485
|
+
int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF;
|
|
486
|
+
|
|
487
|
+
int buffer_len = sas7bcat_block_size(start_page, start_page_pos, ctx, &retval);
|
|
488
|
+
if (buffer_len == -1) {
|
|
489
|
+
goto cleanup;
|
|
490
|
+
} else if (buffer_len == 0) {
|
|
491
|
+
continue;
|
|
492
|
+
}
|
|
493
|
+
if ((buffer = readstat_realloc(buffer, buffer_len)) == NULL) {
|
|
494
|
+
retval = READSTAT_ERROR_MALLOC;
|
|
495
|
+
goto cleanup;
|
|
496
|
+
}
|
|
497
|
+
if ((retval = sas7bcat_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK)
|
|
498
|
+
goto cleanup;
|
|
499
|
+
if ((retval = sas7bcat_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK)
|
|
500
|
+
goto cleanup;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
cleanup:
|
|
504
|
+
io->close(io->io_ctx);
|
|
505
|
+
if (page)
|
|
506
|
+
free(page);
|
|
507
|
+
if (buffer)
|
|
508
|
+
free(buffer);
|
|
509
|
+
if (ctx)
|
|
510
|
+
sas7bcat_ctx_free(ctx);
|
|
511
|
+
if (hinfo)
|
|
512
|
+
free(hinfo);
|
|
513
|
+
|
|
514
|
+
return retval;
|
|
515
|
+
}
|