js-stream-sas7bdat 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/binding.gyp +58 -0
  2. package/package.json +4 -2
  3. package/src/binding/ReadStat/LICENSE +19 -0
  4. package/src/binding/ReadStat/README.md +483 -0
  5. package/src/binding/ReadStat/src/CKHashTable.c +309 -0
  6. package/src/binding/ReadStat/src/CKHashTable.h +37 -0
  7. package/src/binding/ReadStat/src/readstat.h +627 -0
  8. package/src/binding/ReadStat/src/readstat_bits.c +69 -0
  9. package/src/binding/ReadStat/src/readstat_bits.h +20 -0
  10. package/src/binding/ReadStat/src/readstat_convert.c +36 -0
  11. package/src/binding/ReadStat/src/readstat_convert.h +2 -0
  12. package/src/binding/ReadStat/src/readstat_error.c +126 -0
  13. package/src/binding/ReadStat/src/readstat_iconv.h +15 -0
  14. package/src/binding/ReadStat/src/readstat_io_unistd.c +147 -0
  15. package/src/binding/ReadStat/src/readstat_io_unistd.h +11 -0
  16. package/src/binding/ReadStat/src/readstat_malloc.c +34 -0
  17. package/src/binding/ReadStat/src/readstat_malloc.h +4 -0
  18. package/src/binding/ReadStat/src/readstat_metadata.c +53 -0
  19. package/src/binding/ReadStat/src/readstat_parser.c +121 -0
  20. package/src/binding/ReadStat/src/readstat_strings.h +6 -0
  21. package/src/binding/ReadStat/src/readstat_value.c +178 -0
  22. package/src/binding/ReadStat/src/readstat_variable.c +123 -0
  23. package/src/binding/ReadStat/src/readstat_writer.c +677 -0
  24. package/src/binding/ReadStat/src/readstat_writer.h +21 -0
  25. package/src/binding/ReadStat/src/sas/ieee.c +420 -0
  26. package/src/binding/ReadStat/src/sas/ieee.h +6 -0
  27. package/src/binding/ReadStat/src/sas/readstat_sas.c +528 -0
  28. package/src/binding/ReadStat/src/sas/readstat_sas.h +131 -0
  29. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_read.c +515 -0
  30. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_write.c +218 -0
  31. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_read.c +1304 -0
  32. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_write.c +812 -0
  33. package/src/binding/ReadStat/src/sas/readstat_sas_rle.c +286 -0
  34. package/src/binding/ReadStat/src/sas/readstat_sas_rle.h +8 -0
  35. package/src/binding/ReadStat/src/sas/readstat_xport.c +28 -0
  36. package/src/binding/ReadStat/src/sas/readstat_xport.h +47 -0
  37. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.c +265 -0
  38. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.h +4 -0
  39. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.rl +68 -0
  40. package/src/binding/ReadStat/src/sas/readstat_xport_read.c +777 -0
  41. package/src/binding/ReadStat/src/sas/readstat_xport_write.c +561 -0
  42. package/src/binding/readstat_binding.cc +393 -0
@@ -0,0 +1,777 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include <sys/types.h>
5
+ #include <stdint.h>
6
+ #include <time.h>
7
+
8
+ #include "../readstat.h"
9
+ #include "../readstat_iconv.h"
10
+ #include "../readstat_convert.h"
11
+ #include "../readstat_malloc.h"
12
+ #include "readstat_sas.h"
13
+ #include "readstat_xport.h"
14
+ #include "ieee.h"
15
+
16
+ #define LINE_LEN 80
17
+
18
+ typedef struct xport_ctx_s {
19
+ readstat_callbacks_t handle;
20
+ size_t file_size;
21
+ void *user_ctx;
22
+ const char *input_encoding;
23
+ const char *output_encoding;
24
+ iconv_t converter;
25
+
26
+ readstat_io_t *io;
27
+ time_t timestamp;
28
+
29
+ int obs_count;
30
+ int var_count;
31
+ int row_limit;
32
+ int row_offset;
33
+ size_t row_length;
34
+ int parsed_row_count;
35
+ char file_label[256*4+1];
36
+ char table_name[32*4+1];
37
+
38
+ readstat_variable_t **variables;
39
+
40
+ int version;
41
+ } xport_ctx_t;
42
+
43
+ static readstat_error_t xport_update_progress(xport_ctx_t *ctx) {
44
+ readstat_io_t *io = ctx->io;
45
+ return io->update(ctx->file_size, ctx->handle.progress, ctx->user_ctx, io->io_ctx);
46
+ }
47
+
48
+ static xport_ctx_t *xport_ctx_init(void) {
49
+ xport_ctx_t *ctx = calloc(1, sizeof(xport_ctx_t));
50
+ return ctx;
51
+ }
52
+
53
+ static void xport_ctx_free(xport_ctx_t *ctx) {
54
+ if (ctx->variables) {
55
+ int i;
56
+ for (i=0; i<ctx->var_count; i++) {
57
+ if (ctx->variables[i])
58
+ free(ctx->variables[i]);
59
+ }
60
+ free(ctx->variables);
61
+ }
62
+ if (ctx->converter) {
63
+ iconv_close(ctx->converter);
64
+ }
65
+
66
+ free(ctx);
67
+ }
68
+
69
+ static ssize_t read_bytes(xport_ctx_t *ctx, void *dst, size_t dst_len) {
70
+ readstat_io_t *io = (readstat_io_t *)ctx->io;
71
+ return io->read(dst, dst_len, io->io_ctx);
72
+ }
73
+
74
+ static readstat_error_t xport_skip_record(xport_ctx_t *ctx) {
75
+ readstat_io_t *io = (readstat_io_t *)ctx->io;
76
+ if (io->seek(LINE_LEN, READSTAT_SEEK_CUR, io->io_ctx) == -1)
77
+ return READSTAT_ERROR_SEEK;
78
+
79
+ return READSTAT_OK;
80
+ }
81
+
82
+ static readstat_error_t xport_skip_rest_of_record(xport_ctx_t *ctx) {
83
+ readstat_io_t *io = (readstat_io_t *)ctx->io;
84
+ off_t pos = io->seek(0, READSTAT_SEEK_CUR, io->io_ctx);
85
+ if (pos == -1)
86
+ return READSTAT_ERROR_SEEK;
87
+
88
+ if (pos % LINE_LEN) {
89
+ if (io->seek(LINE_LEN - (pos % LINE_LEN), READSTAT_SEEK_CUR, io->io_ctx) == -1)
90
+ return READSTAT_ERROR_SEEK;
91
+ }
92
+
93
+ return READSTAT_OK;
94
+ }
95
+
96
+ static readstat_error_t xport_read_record(xport_ctx_t *ctx, char *record) {
97
+ ssize_t bytes_read = read_bytes(ctx, record, LINE_LEN);
98
+ if (bytes_read < LINE_LEN)
99
+ return READSTAT_ERROR_READ;
100
+
101
+ record[LINE_LEN] = '\0';
102
+
103
+ return READSTAT_OK;
104
+ }
105
+
106
+ static readstat_error_t xport_read_header_record(xport_ctx_t *ctx, xport_header_record_t *xrecord) {
107
+ char line[LINE_LEN+1];
108
+ readstat_error_t retval = READSTAT_OK;
109
+
110
+ retval = xport_read_record(ctx, line);
111
+ if (retval != READSTAT_OK)
112
+ return retval;
113
+
114
+ memset(xrecord, 0, sizeof(xport_header_record_t));
115
+ int matches = sscanf(line,
116
+ "HEADER RECORD*******%8s HEADER RECORD!!!!!!!"
117
+ "%05d%05d%05d" "%05d%05d%05d", xrecord->name,
118
+ &xrecord->num1, &xrecord->num2, &xrecord->num3,
119
+ &xrecord->num4, &xrecord->num5, &xrecord->num6);
120
+
121
+ if (matches < 2) {
122
+ return READSTAT_ERROR_PARSE;
123
+ }
124
+
125
+ return READSTAT_OK;
126
+ }
127
+
128
+ static readstat_error_t xport_expect_header_record(xport_ctx_t *ctx,
129
+ const char *v5_name, const char *v8_name) {
130
+ readstat_error_t retval = READSTAT_OK;
131
+ xport_header_record_t xrecord;
132
+
133
+ retval = xport_read_header_record(ctx, &xrecord);
134
+ if (retval != READSTAT_OK)
135
+ goto cleanup;
136
+
137
+ if (ctx->version == 5 && strcmp(xrecord.name, v5_name) != 0) {
138
+ retval = READSTAT_ERROR_PARSE;
139
+ goto cleanup;
140
+ } else if (ctx->version == 8 && strcmp(xrecord.name, v8_name) != 0) {
141
+ retval = READSTAT_ERROR_PARSE;
142
+ goto cleanup;
143
+ }
144
+
145
+ cleanup:
146
+ return retval;
147
+ }
148
+
149
+ static readstat_error_t xport_read_table_name_record(xport_ctx_t *ctx) {
150
+ char line[LINE_LEN+1];
151
+ readstat_error_t retval = READSTAT_OK;
152
+
153
+ retval = xport_read_record(ctx, line);
154
+ if (retval != READSTAT_OK)
155
+ goto cleanup;
156
+
157
+ retval = readstat_convert(ctx->table_name, sizeof(ctx->table_name), &line[8],
158
+ ctx->version == 5 ? 8 : 32, ctx->converter);
159
+ if (retval != READSTAT_OK)
160
+ goto cleanup;
161
+
162
+ cleanup:
163
+ return retval;
164
+ }
165
+
166
+ static readstat_error_t xport_read_file_label_record(xport_ctx_t *ctx) {
167
+ char line[LINE_LEN+1];
168
+ readstat_error_t retval = READSTAT_OK;
169
+
170
+ retval = xport_read_record(ctx, line);
171
+ if (retval != READSTAT_OK)
172
+ goto cleanup;
173
+
174
+ retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label), &line[32],
175
+ 40, ctx->converter);
176
+ if (retval != READSTAT_OK)
177
+ goto cleanup;
178
+
179
+ cleanup:
180
+ return retval;
181
+ }
182
+
183
+ static readstat_error_t xport_read_library_record(xport_ctx_t *ctx) {
184
+ xport_header_record_t xrecord;
185
+ readstat_error_t retval = xport_read_header_record(ctx, &xrecord);
186
+ if (retval != READSTAT_OK)
187
+ goto cleanup;
188
+
189
+ if (strcmp(xrecord.name, "LIBRARY") == 0) {
190
+ ctx->version = 5;
191
+ } else if (strcmp(xrecord.name, "LIBV8") == 0) {
192
+ ctx->version = 8;
193
+ } else {
194
+ retval = READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION;
195
+ goto cleanup;
196
+ }
197
+
198
+ cleanup:
199
+ return retval;
200
+ }
201
+
202
+ static readstat_error_t xport_read_timestamp_record(xport_ctx_t *ctx) {
203
+ char line[LINE_LEN+1];
204
+ readstat_error_t retval = READSTAT_OK;
205
+ struct tm ts = { .tm_isdst = -1 };
206
+ char month[4];
207
+ int i;
208
+
209
+ retval = xport_read_record(ctx, line);
210
+ if (retval != READSTAT_OK)
211
+ goto cleanup;
212
+
213
+ sscanf(line,
214
+ "%02d%3s%02d:%02d:%02d:%02d",
215
+ &ts.tm_mday, month, &ts.tm_year, &ts.tm_hour, &ts.tm_min, &ts.tm_sec);
216
+
217
+ for (i=0; i<sizeof(_xport_months)/sizeof(_xport_months[0]); i++) {
218
+ if (strcmp(month, _xport_months[i]) == 0) {
219
+ ts.tm_mon = i;
220
+ break;
221
+ }
222
+ }
223
+
224
+ if (ts.tm_year < 60) {
225
+ ts.tm_year += 100;
226
+ }
227
+
228
+ ctx->timestamp = mktime(&ts);
229
+
230
+ cleanup:
231
+ return retval;
232
+ }
233
+
234
+ static readstat_error_t xport_read_namestr_header_record(xport_ctx_t *ctx) {
235
+ xport_header_record_t xrecord;
236
+ readstat_error_t retval = READSTAT_OK;
237
+
238
+ retval = xport_read_header_record(ctx, &xrecord);
239
+ if (retval != READSTAT_OK)
240
+ goto cleanup;
241
+
242
+ if (ctx->version == 5 && strcmp(xrecord.name, "NAMESTR") != 0) {
243
+ retval = READSTAT_ERROR_PARSE;
244
+ goto cleanup;
245
+ } else if (ctx->version == 8 && strcmp(xrecord.name, "NAMSTV8") != 0) {
246
+ retval = READSTAT_ERROR_PARSE;
247
+ goto cleanup;
248
+ }
249
+
250
+ ctx->var_count = xrecord.num2;
251
+ ctx->variables = readstat_calloc(ctx->var_count, sizeof(readstat_variable_t *));
252
+ if (ctx->variables == NULL) {
253
+ retval = READSTAT_ERROR_MALLOC;
254
+ goto cleanup;
255
+ }
256
+
257
+ if (ctx->handle.metadata) {
258
+ readstat_metadata_t metadata = {
259
+ .row_count = -1,
260
+ .var_count = ctx->var_count,
261
+ .file_label = ctx->file_label,
262
+ .table_name = ctx->table_name,
263
+ .creation_time = ctx->timestamp,
264
+ .modified_time = ctx->timestamp,
265
+ .file_format_version = ctx->version
266
+ };
267
+ if (ctx->handle.metadata(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) {
268
+ retval = READSTAT_ERROR_USER_ABORT;
269
+ goto cleanup;
270
+ }
271
+ }
272
+
273
+ cleanup:
274
+ return retval;
275
+ }
276
+
277
+ static readstat_error_t xport_read_obs_header_record(xport_ctx_t *ctx) {
278
+ return xport_expect_header_record(ctx, "OBS", "OBSV8");
279
+ }
280
+
281
+ static readstat_error_t xport_construct_format(char *dst, size_t dst_len,
282
+ const char *src, size_t src_len, int width, int decimals) {
283
+ char *format = malloc(4 * src_len + 1);
284
+ readstat_error_t retval = readstat_convert(format, 4 * src_len + 1, src, src_len, NULL);
285
+
286
+ if (retval != READSTAT_OK) {
287
+ free(format);
288
+ return retval;
289
+ }
290
+
291
+ char *pos = dst;
292
+ *dst = '\0';
293
+ if (format[0]) {
294
+ pos += snprintf(dst, dst_len, "%s", format);
295
+ }
296
+ if (width) {
297
+ pos += snprintf(pos, dst_len-(pos-dst), "%d", width);
298
+ }
299
+ if (decimals) {
300
+ pos += snprintf(pos, dst_len-(pos-dst), ".%d", decimals);
301
+ }
302
+
303
+ free(format);
304
+ return retval;
305
+ }
306
+
307
+ static readstat_error_t xport_read_labels_v8(xport_ctx_t *ctx, int label_count) {
308
+ readstat_error_t retval = READSTAT_OK;
309
+ uint16_t labeldef[3];
310
+ char *name = NULL;
311
+ char *label = NULL;
312
+ int i;
313
+ for (i=0; i<label_count; i++) {
314
+ int index, name_len, label_len;
315
+ if (read_bytes(ctx, labeldef, sizeof(labeldef)) != sizeof(labeldef)) {
316
+ retval = READSTAT_ERROR_READ;
317
+ goto cleanup;
318
+ }
319
+
320
+ if (machine_is_little_endian()) {
321
+ index = byteswap2(labeldef[0]);
322
+ name_len = byteswap2(labeldef[1]);
323
+ label_len = byteswap2(labeldef[2]);
324
+ } else {
325
+ index = labeldef[0];
326
+ name_len = labeldef[1];
327
+ label_len = labeldef[2];
328
+ }
329
+
330
+ if (index > ctx->var_count || index == 0) {
331
+ retval = READSTAT_ERROR_PARSE;
332
+ goto cleanup;
333
+ }
334
+
335
+ name = realloc(name, name_len + 1);
336
+ label = realloc(label, label_len + 1);
337
+ readstat_variable_t *variable = ctx->variables[index-1];
338
+
339
+ if (read_bytes(ctx, name, name_len) != name_len ||
340
+ read_bytes(ctx, label, label_len) != label_len) {
341
+ retval = READSTAT_ERROR_READ;
342
+ goto cleanup;
343
+ }
344
+
345
+ retval = readstat_convert(variable->name, sizeof(variable->name),
346
+ name, name_len, ctx->converter);
347
+ if (retval != READSTAT_OK)
348
+ goto cleanup;
349
+
350
+ retval = readstat_convert(variable->label, sizeof(variable->label),
351
+ label, label_len, ctx->converter);
352
+ if (retval != READSTAT_OK)
353
+ goto cleanup;
354
+ }
355
+
356
+ retval = xport_skip_rest_of_record(ctx);
357
+ if (retval != READSTAT_OK)
358
+ goto cleanup;
359
+
360
+ retval = xport_read_obs_header_record(ctx);
361
+ if (retval != READSTAT_OK)
362
+ goto cleanup;
363
+
364
+ cleanup:
365
+ free(name);
366
+ free(label);
367
+ return retval;
368
+ }
369
+
370
+ static readstat_error_t xport_read_labels_v9(xport_ctx_t *ctx, int label_count) {
371
+ readstat_error_t retval = READSTAT_OK;
372
+ uint16_t labeldef[5];
373
+ int i;
374
+ char *name = NULL;
375
+ char *label = NULL;
376
+ char *format = NULL;
377
+ char *informat = NULL;
378
+
379
+ for (i=0; i<label_count; i++) {
380
+ int index, name_len, label_len, format_len, informat_len;
381
+ if (read_bytes(ctx, labeldef, sizeof(labeldef)) != sizeof(labeldef)) {
382
+ retval = READSTAT_ERROR_READ;
383
+ goto cleanup;
384
+ }
385
+
386
+ if (machine_is_little_endian()) {
387
+ index = byteswap2(labeldef[0]);
388
+ name_len = byteswap2(labeldef[1]);
389
+ label_len = byteswap2(labeldef[2]);
390
+ format_len = byteswap2(labeldef[3]);
391
+ informat_len = byteswap2(labeldef[4]);
392
+ } else {
393
+ index = labeldef[0];
394
+ name_len = labeldef[1];
395
+ label_len = labeldef[2];
396
+ format_len = labeldef[3];
397
+ informat_len = labeldef[4];
398
+ }
399
+
400
+ if (index > ctx->var_count || index == 0) {
401
+ retval = READSTAT_ERROR_PARSE;
402
+ goto cleanup;
403
+ }
404
+
405
+ name = realloc(name, name_len + 1);
406
+ label = realloc(label, label_len + 1);
407
+ format = realloc(format, format_len + 1);
408
+ informat = realloc(informat, informat_len + 1);
409
+
410
+ readstat_variable_t *variable = ctx->variables[index-1];
411
+
412
+ if (read_bytes(ctx, name, name_len) != name_len ||
413
+ read_bytes(ctx, label, label_len) != label_len ||
414
+ read_bytes(ctx, format, format_len) != format_len ||
415
+ read_bytes(ctx, informat, informat_len) != informat_len) {
416
+ retval = READSTAT_ERROR_READ;
417
+ goto cleanup;
418
+ }
419
+
420
+ retval = readstat_convert(variable->name, sizeof(variable->name),
421
+ name, name_len, ctx->converter);
422
+ if (retval != READSTAT_OK)
423
+ goto cleanup;
424
+
425
+ retval = readstat_convert(variable->label, sizeof(variable->label),
426
+ label, label_len, ctx->converter);
427
+ if (retval != READSTAT_OK)
428
+ goto cleanup;
429
+
430
+ retval = readstat_convert(variable->format, sizeof(variable->format),
431
+ format, format_len, ctx->converter);
432
+ if (retval != READSTAT_OK)
433
+ goto cleanup;
434
+ }
435
+
436
+ retval = xport_skip_rest_of_record(ctx);
437
+ if (retval != READSTAT_OK)
438
+ goto cleanup;
439
+
440
+ retval = xport_read_obs_header_record(ctx);
441
+ if (retval != READSTAT_OK)
442
+ goto cleanup;
443
+
444
+ cleanup:
445
+ free(name);
446
+ free(format);
447
+ free(informat);
448
+ free(label);
449
+ return retval;
450
+ }
451
+
452
+ static readstat_error_t xport_read_variables(xport_ctx_t *ctx) {
453
+ int i;
454
+ readstat_error_t retval = READSTAT_OK;
455
+ for (i=0; i<ctx->var_count; i++) {
456
+ xport_namestr_t namestr;
457
+ ssize_t bytes_read = read_bytes(ctx, &namestr, sizeof(xport_namestr_t));
458
+ if (bytes_read < sizeof(xport_namestr_t)) {
459
+ retval = READSTAT_ERROR_READ;
460
+ goto cleanup;
461
+ }
462
+ xport_namestr_bswap(&namestr);
463
+
464
+ readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t));
465
+
466
+ variable->index = i;
467
+ variable->type = namestr.ntype == SAS_COLUMN_TYPE_CHR ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE;
468
+ variable->storage_width = namestr.nlng;
469
+ variable->display_width = namestr.nfl;
470
+ variable->decimals = namestr.nfd;
471
+ variable->alignment = namestr.nfj ? READSTAT_ALIGNMENT_RIGHT : READSTAT_ALIGNMENT_LEFT;
472
+
473
+ if (ctx->version == 5) {
474
+ retval = readstat_convert(variable->name, sizeof(variable->name),
475
+ namestr.nname, sizeof(namestr.nname), ctx->converter);
476
+ } else {
477
+ retval = readstat_convert(variable->name, sizeof(variable->name),
478
+ namestr.longname, sizeof(namestr.longname), ctx->converter);
479
+ }
480
+ if (retval != READSTAT_OK)
481
+ goto cleanup;
482
+
483
+ retval = readstat_convert(variable->label, sizeof(variable->label),
484
+ namestr.nlabel, sizeof(namestr.nlabel), ctx->converter);
485
+ if (retval != READSTAT_OK)
486
+ goto cleanup;
487
+
488
+ retval = xport_construct_format(variable->format, sizeof(variable->format),
489
+ namestr.nform, sizeof(namestr.nform),
490
+ variable->display_width, variable->decimals);
491
+ if (retval != READSTAT_OK)
492
+ goto cleanup;
493
+
494
+ ctx->variables[i] = variable;
495
+ }
496
+
497
+ retval = xport_skip_rest_of_record(ctx);
498
+ if (retval != READSTAT_OK)
499
+ goto cleanup;
500
+
501
+ if (ctx->version == 5) {
502
+ retval = xport_read_obs_header_record(ctx);
503
+ if (retval != READSTAT_OK)
504
+ goto cleanup;
505
+ } else {
506
+ xport_header_record_t xrecord;
507
+ retval = xport_read_header_record(ctx, &xrecord);
508
+ if (retval != READSTAT_OK)
509
+ goto cleanup;
510
+
511
+ if (strcmp(xrecord.name, "OBSV8") == 0) {
512
+ /* void */
513
+ } else if (strcmp(xrecord.name, "LABELV8") == 0) {
514
+ retval = xport_read_labels_v8(ctx, xrecord.num1);
515
+ } else if (strcmp(xrecord.name, "LABELV9") == 0) {
516
+ retval = xport_read_labels_v9(ctx, xrecord.num1);
517
+ }
518
+ if (retval != READSTAT_OK)
519
+ goto cleanup;
520
+ }
521
+
522
+ ctx->row_length = 0;
523
+
524
+ int index_after_skipping = 0;
525
+
526
+ for (i=0; i<ctx->var_count; i++) {
527
+ readstat_variable_t *variable = ctx->variables[i];
528
+ variable->index_after_skipping = index_after_skipping;
529
+
530
+ int cb_retval = READSTAT_HANDLER_OK;
531
+ if (ctx->handle.variable) {
532
+ cb_retval = ctx->handle.variable(i, variable, variable->format, ctx->user_ctx);
533
+ }
534
+ if (cb_retval == READSTAT_HANDLER_ABORT) {
535
+ retval = READSTAT_ERROR_USER_ABORT;
536
+ goto cleanup;
537
+ }
538
+ if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) {
539
+ variable->skip = 1;
540
+ } else {
541
+ index_after_skipping++;
542
+ }
543
+
544
+ ctx->row_length += variable->storage_width;
545
+ }
546
+
547
+ cleanup:
548
+ return retval;
549
+ }
550
+
551
+ static readstat_error_t xport_process_row(xport_ctx_t *ctx, const char *row, size_t row_length) {
552
+ readstat_error_t retval = READSTAT_OK;
553
+ int i;
554
+ off_t pos = 0;
555
+ char *string = NULL;
556
+ for (i=0; i<ctx->var_count; i++) {
557
+ readstat_variable_t *variable = ctx->variables[i];
558
+ readstat_value_t value = { .type = variable->type };
559
+
560
+ if (variable->type == READSTAT_TYPE_STRING) {
561
+ string = readstat_realloc(string, 4*variable->storage_width+1);
562
+ if (string == NULL) {
563
+ retval = READSTAT_ERROR_MALLOC;
564
+ goto cleanup;
565
+ }
566
+ retval = readstat_convert(string, 4*variable->storage_width+1,
567
+ &row[pos], variable->storage_width, ctx->converter);
568
+ if (retval != READSTAT_OK)
569
+ goto cleanup;
570
+
571
+ value.v.string_value = string;
572
+ } else {
573
+ double dval = NAN;
574
+ if (variable->storage_width <= XPORT_MAX_DOUBLE_SIZE &&
575
+ variable->storage_width >= XPORT_MIN_DOUBLE_SIZE) {
576
+ char full_value[8] = { 0 };
577
+ if (memcmp(&full_value[1], &row[pos+1], variable->storage_width - 1) == 0 &&
578
+ (row[pos] == '.' || sas_validate_tag(row[pos]) == READSTAT_OK)) {
579
+ if (row[pos] == '.') {
580
+ value.is_system_missing = 1;
581
+ } else {
582
+ value.tag = row[pos];
583
+ value.is_tagged_missing = 1;
584
+ }
585
+ } else {
586
+ memcpy(full_value, &row[pos], variable->storage_width);
587
+ int rc = cnxptiee(full_value, CN_TYPE_XPORT, &dval, CN_TYPE_NATIVE);
588
+ if (rc != 0) {
589
+ retval = READSTAT_ERROR_CONVERT;
590
+ goto cleanup;
591
+ }
592
+ }
593
+ }
594
+
595
+ value.v.double_value = dval;
596
+ }
597
+ pos += variable->storage_width;
598
+
599
+ if (ctx->handle.value && !ctx->variables[i]->skip && !ctx->row_offset) {
600
+ if (ctx->handle.value(ctx->parsed_row_count, variable, value, ctx->user_ctx) != READSTAT_HANDLER_OK) {
601
+ retval = READSTAT_ERROR_USER_ABORT;
602
+ goto cleanup;
603
+ }
604
+ }
605
+ }
606
+ if (ctx->row_offset) {
607
+ ctx->row_offset--;
608
+ } else {
609
+ ctx->parsed_row_count++;
610
+ }
611
+
612
+ cleanup:
613
+ free(string);
614
+ return retval;
615
+ }
616
+
617
+ static readstat_error_t xport_read_data(xport_ctx_t *ctx) {
618
+ if (!ctx->row_length)
619
+ return READSTAT_OK;
620
+
621
+ if (!ctx->handle.value)
622
+ return READSTAT_OK;
623
+
624
+ readstat_error_t retval = READSTAT_OK;
625
+ char *row = readstat_malloc(ctx->row_length);
626
+ char *blank_row = readstat_malloc(ctx->row_length);
627
+ int num_blank_rows = 0;
628
+
629
+ if (row == NULL || blank_row == NULL) {
630
+ retval = READSTAT_ERROR_MALLOC;
631
+ goto cleanup;
632
+ }
633
+
634
+ memset(blank_row, ' ', ctx->row_length);
635
+ while (1) {
636
+ ssize_t bytes_read = read_bytes(ctx, row, ctx->row_length);
637
+ if (bytes_read == -1) {
638
+ retval = READSTAT_ERROR_READ;
639
+ goto cleanup;
640
+ } else if (bytes_read < ctx->row_length) {
641
+ break;
642
+ }
643
+
644
+ off_t pos = 0;
645
+
646
+ int row_is_blank = 1;
647
+
648
+ for (pos=0; pos<ctx->row_length; pos++) {
649
+ if (row[pos] != ' ') {
650
+ row_is_blank = 0;
651
+ break;
652
+ }
653
+ }
654
+
655
+ if (row_is_blank) {
656
+ num_blank_rows++;
657
+ continue;
658
+ }
659
+
660
+ while (num_blank_rows) {
661
+ retval = xport_process_row(ctx, blank_row, ctx->row_length);
662
+ if (retval != READSTAT_OK)
663
+ goto cleanup;
664
+
665
+ if (ctx->row_limit > 0 && ctx->parsed_row_count == ctx->row_limit)
666
+ goto cleanup;
667
+
668
+ num_blank_rows--;
669
+ }
670
+
671
+ retval = xport_process_row(ctx, row, ctx->row_length);
672
+ if (retval != READSTAT_OK)
673
+ goto cleanup;
674
+
675
+ retval = xport_update_progress(ctx);
676
+ if (retval != READSTAT_OK)
677
+ goto cleanup;
678
+
679
+ if (ctx->row_limit > 0 && ctx->parsed_row_count == ctx->row_limit)
680
+ break;
681
+ }
682
+
683
+ cleanup:
684
+ if (row)
685
+ free(row);
686
+ if (blank_row)
687
+ free(blank_row);
688
+ return retval;
689
+ }
690
+
691
+ readstat_error_t readstat_parse_xport(readstat_parser_t *parser, const char *path, void *user_ctx) {
692
+ readstat_error_t retval = READSTAT_OK;
693
+ readstat_io_t *io = parser->io;
694
+
695
+ xport_ctx_t *ctx = xport_ctx_init();
696
+ ctx->handle = parser->handlers;
697
+ ctx->input_encoding = parser->input_encoding;
698
+ ctx->output_encoding = parser->output_encoding;
699
+ ctx->user_ctx = user_ctx;
700
+ ctx->io = io;
701
+ ctx->row_limit = parser->row_limit;
702
+ if (parser->row_offset > 0)
703
+ ctx->row_offset = parser->row_offset;
704
+
705
+ if (io->open(path, io->io_ctx) == -1) {
706
+ retval = READSTAT_ERROR_OPEN;
707
+ goto cleanup;
708
+ }
709
+
710
+ if ((ctx->file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx)) == -1) {
711
+ retval = READSTAT_ERROR_SEEK;
712
+ goto cleanup;
713
+ }
714
+
715
+ if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) {
716
+ retval = READSTAT_ERROR_SEEK;
717
+ goto cleanup;
718
+ }
719
+
720
+ if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) {
721
+ iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding);
722
+ if (converter == (iconv_t)-1) {
723
+ retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
724
+ goto cleanup;
725
+ }
726
+ ctx->converter = converter;
727
+ }
728
+
729
+ retval = xport_read_library_record(ctx);
730
+ if (retval != READSTAT_OK)
731
+ goto cleanup;
732
+
733
+ retval = xport_skip_record(ctx);
734
+ if (retval != READSTAT_OK)
735
+ goto cleanup;
736
+
737
+ retval = xport_read_timestamp_record(ctx);
738
+ if (retval != READSTAT_OK)
739
+ goto cleanup;
740
+
741
+ retval = xport_expect_header_record(ctx, "MEMBER", "MEMBV8");
742
+ if (retval != READSTAT_OK)
743
+ goto cleanup;
744
+
745
+ retval = xport_expect_header_record(ctx, "DSCRPTR", "DSCPTV8");
746
+ if (retval != READSTAT_OK)
747
+ goto cleanup;
748
+
749
+ retval = xport_read_table_name_record(ctx);
750
+ if (retval != READSTAT_OK)
751
+ goto cleanup;
752
+
753
+ retval = xport_read_file_label_record(ctx);
754
+ if (retval != READSTAT_OK)
755
+ goto cleanup;
756
+
757
+ retval = xport_read_namestr_header_record(ctx);
758
+ if (retval != READSTAT_OK)
759
+ goto cleanup;
760
+
761
+ retval = xport_read_variables(ctx);
762
+ if (retval != READSTAT_OK)
763
+ goto cleanup;
764
+
765
+ if (ctx->row_length) {
766
+ retval = xport_read_data(ctx);
767
+ if (retval != READSTAT_OK)
768
+ goto cleanup;
769
+ }
770
+
771
+ cleanup:
772
+ io->close(io->io_ctx);
773
+ xport_ctx_free(ctx);
774
+
775
+ return retval;
776
+ }
777
+