js-stream-sas7bdat 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/binding.gyp +58 -0
  2. package/package.json +4 -2
  3. package/src/binding/ReadStat/LICENSE +19 -0
  4. package/src/binding/ReadStat/README.md +483 -0
  5. package/src/binding/ReadStat/src/CKHashTable.c +309 -0
  6. package/src/binding/ReadStat/src/CKHashTable.h +37 -0
  7. package/src/binding/ReadStat/src/readstat.h +627 -0
  8. package/src/binding/ReadStat/src/readstat_bits.c +69 -0
  9. package/src/binding/ReadStat/src/readstat_bits.h +20 -0
  10. package/src/binding/ReadStat/src/readstat_convert.c +36 -0
  11. package/src/binding/ReadStat/src/readstat_convert.h +2 -0
  12. package/src/binding/ReadStat/src/readstat_error.c +126 -0
  13. package/src/binding/ReadStat/src/readstat_iconv.h +15 -0
  14. package/src/binding/ReadStat/src/readstat_io_unistd.c +147 -0
  15. package/src/binding/ReadStat/src/readstat_io_unistd.h +11 -0
  16. package/src/binding/ReadStat/src/readstat_malloc.c +34 -0
  17. package/src/binding/ReadStat/src/readstat_malloc.h +4 -0
  18. package/src/binding/ReadStat/src/readstat_metadata.c +53 -0
  19. package/src/binding/ReadStat/src/readstat_parser.c +121 -0
  20. package/src/binding/ReadStat/src/readstat_strings.h +6 -0
  21. package/src/binding/ReadStat/src/readstat_value.c +178 -0
  22. package/src/binding/ReadStat/src/readstat_variable.c +123 -0
  23. package/src/binding/ReadStat/src/readstat_writer.c +677 -0
  24. package/src/binding/ReadStat/src/readstat_writer.h +21 -0
  25. package/src/binding/ReadStat/src/sas/ieee.c +420 -0
  26. package/src/binding/ReadStat/src/sas/ieee.h +6 -0
  27. package/src/binding/ReadStat/src/sas/readstat_sas.c +528 -0
  28. package/src/binding/ReadStat/src/sas/readstat_sas.h +131 -0
  29. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_read.c +515 -0
  30. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_write.c +218 -0
  31. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_read.c +1304 -0
  32. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_write.c +812 -0
  33. package/src/binding/ReadStat/src/sas/readstat_sas_rle.c +286 -0
  34. package/src/binding/ReadStat/src/sas/readstat_sas_rle.h +8 -0
  35. package/src/binding/ReadStat/src/sas/readstat_xport.c +28 -0
  36. package/src/binding/ReadStat/src/sas/readstat_xport.h +47 -0
  37. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.c +265 -0
  38. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.h +4 -0
  39. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.rl +68 -0
  40. package/src/binding/ReadStat/src/sas/readstat_xport_read.c +777 -0
  41. package/src/binding/ReadStat/src/sas/readstat_xport_write.c +561 -0
  42. package/src/binding/readstat_binding.cc +393 -0
@@ -0,0 +1,528 @@
1
+
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <errno.h>
5
+ #include <string.h>
6
+ #include <math.h>
7
+ #include <time.h>
8
+ #include <limits.h>
9
+ #include <inttypes.h>
10
+
11
+ #include "readstat_sas.h"
12
+ #include "../readstat_iconv.h"
13
+ #include "../readstat_convert.h"
14
+ #include "../readstat_writer.h"
15
+
16
+ #define SAS_FILE_HEADER_SIZE_32BIT 1024
17
+ #define SAS_FILE_HEADER_SIZE_64BIT 8192
18
+ #define SAS_DEFAULT_PAGE_SIZE 4096
19
+
20
+ #define SAS_DEFAULT_STRING_ENCODING "WINDOWS-1252"
21
+
22
+ unsigned char sas7bdat_magic_number[32] = {
23
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
24
+ 0x00, 0x00, 0x00, 0x00, 0xc2, 0xea, 0x81, 0x60,
25
+ 0xb3, 0x14, 0x11, 0xcf, 0xbd, 0x92, 0x08, 0x00,
26
+ 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f, 0x10, 0x11
27
+ };
28
+
29
+ unsigned char sas7bcat_magic_number[32] = {
30
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
31
+ 0x00, 0x00, 0x00, 0x00, 0xc2, 0xea, 0x81, 0x63,
32
+ 0xb3, 0x14, 0x11, 0xcf, 0xbd, 0x92, 0x08, 0x00,
33
+ 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f, 0x10, 0x11
34
+ };
35
+
36
+ /* This table is cobbled together from extant files and:
37
+ * https://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm
38
+ * https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html
39
+ *
40
+ * Discrepancies form the official documentation are noted with a comment. It
41
+ * appears that in some instances that SAS software uses a newer encoding than
42
+ * what's listed in the docs. In these cases the encoding used by ReadStat
43
+ * represents the author's best guess.
44
+ */
45
+ static readstat_charset_entry_t _charset_table[] = {
46
+ { .code = 0, .name = SAS_DEFAULT_STRING_ENCODING },
47
+ { .code = 20, .name = "UTF-8" },
48
+ { .code = 28, .name = "US-ASCII" },
49
+ { .code = 29, .name = "ISO-8859-1" },
50
+ { .code = 30, .name = "ISO-8859-2" },
51
+ { .code = 31, .name = "ISO-8859-3" },
52
+ { .code = 32, .name = "ISO-8859-4" },
53
+ { .code = 33, .name = "ISO-8859-5" },
54
+ { .code = 34, .name = "ISO-8859-6" },
55
+ { .code = 35, .name = "ISO-8859-7" },
56
+ { .code = 36, .name = "ISO-8859-8" },
57
+ { .code = 37, .name = "ISO-8859-9" },
58
+ { .code = 39, .name = "ISO-8859-11" },
59
+ { .code = 40, .name = "ISO-8859-15" },
60
+ { .code = 41, .name = "CP437" },
61
+ { .code = 42, .name = "CP850" },
62
+ { .code = 43, .name = "CP852" },
63
+ { .code = 44, .name = "CP857" },
64
+ { .code = 45, .name = "CP858" },
65
+ { .code = 46, .name = "CP862" },
66
+ { .code = 47, .name = "CP864" },
67
+ { .code = 48, .name = "CP865" },
68
+ { .code = 49, .name = "CP866" },
69
+ { .code = 50, .name = "CP869" },
70
+ { .code = 51, .name = "CP874" },
71
+ { .code = 52, .name = "CP921" },
72
+ { .code = 53, .name = "CP922" },
73
+ { .code = 54, .name = "CP1129" },
74
+ { .code = 55, .name = "CP720" },
75
+ { .code = 56, .name = "CP737" },
76
+ { .code = 57, .name = "CP775" },
77
+ { .code = 58, .name = "CP860" },
78
+ { .code = 59, .name = "CP863" },
79
+ { .code = 60, .name = "WINDOWS-1250" },
80
+ { .code = 61, .name = "WINDOWS-1251" },
81
+ { .code = 62, .name = "WINDOWS-1252" },
82
+ { .code = 63, .name = "WINDOWS-1253" },
83
+ { .code = 64, .name = "WINDOWS-1254" },
84
+ { .code = 65, .name = "WINDOWS-1255" },
85
+ { .code = 66, .name = "WINDOWS-1256" },
86
+ { .code = 67, .name = "WINDOWS-1257" },
87
+ { .code = 68, .name = "WINDOWS-1258" },
88
+ { .code = 69, .name = "MACROMAN" },
89
+ { .code = 70, .name = "MACARABIC" },
90
+ { .code = 71, .name = "MACHEBREW" },
91
+ { .code = 72, .name = "MACGREEK" },
92
+ { .code = 73, .name = "MACTHAI" },
93
+ { .code = 75, .name = "MACTURKISH" },
94
+ { .code = 76, .name = "MACUKRAINE" },
95
+ { .code = 118, .name = "CP950" },
96
+ { .code = 119, .name = "EUC-TW" },
97
+ { .code = 123, .name = "BIG-5" },
98
+ { .code = 125, .name = "GB18030" }, // "euc-cn" in SAS
99
+ { .code = 126, .name = "WINDOWS-936" }, // "zwin"
100
+ { .code = 128, .name = "CP1381" }, // "zpce"
101
+ { .code = 134, .name = "EUC-JP" },
102
+ { .code = 136, .name = "CP949" },
103
+ { .code = 137, .name = "CP942" },
104
+ { .code = 138, .name = "CP932" }, // "shift-jis" in SAS
105
+ { .code = 140, .name = "EUC-KR" },
106
+ { .code = 141, .name = "CP949" }, // "kpce"
107
+ { .code = 142, .name = "CP949" }, // "kwin"
108
+ { .code = 163, .name = "MACICELAND" },
109
+ { .code = 167, .name = "ISO-2022-JP" },
110
+ { .code = 168, .name = "ISO-2022-KR" },
111
+ { .code = 169, .name = "ISO-2022-CN" },
112
+ { .code = 172, .name = "ISO-2022-CN-EXT" },
113
+ { .code = 204, .name = SAS_DEFAULT_STRING_ENCODING }, // "any" in SAS
114
+ { .code = 205, .name = "GB18030" },
115
+ { .code = 227, .name = "ISO-8859-14" },
116
+ { .code = 242, .name = "ISO-8859-13" },
117
+ { .code = 245, .name = "MACCROATIAN" },
118
+ { .code = 246, .name = "MACCYRILLIC" },
119
+ { .code = 247, .name = "MACROMANIA" },
120
+ { .code = 248, .name = "SHIFT_JISX0213" },
121
+ };
122
+
123
+ static time_t sas_epoch(void) {
124
+ return - 3653 * 86400; // seconds between 01-01-1960 and 01-01-1970
125
+ }
126
+
127
+ static time_t sas_convert_time(double time, double time_diff, time_t epoch) {
128
+ time -= time_diff;
129
+ time += epoch;
130
+ if (isnan(time))
131
+ return 0;
132
+ if (time > (double)LONG_MAX)
133
+ return LONG_MAX;
134
+ if (time < (double)LONG_MIN)
135
+ return LONG_MIN;
136
+ return time;
137
+ }
138
+
139
+ uint64_t sas_read8(const char *data, int bswap) {
140
+ uint64_t tmp;
141
+ memcpy(&tmp, data, 8);
142
+ return bswap ? byteswap8(tmp) : tmp;
143
+ }
144
+
145
+ uint32_t sas_read4(const char *data, int bswap) {
146
+ uint32_t tmp;
147
+ memcpy(&tmp, data, 4);
148
+ return bswap ? byteswap4(tmp) : tmp;
149
+ }
150
+
151
+ uint16_t sas_read2(const char *data, int bswap) {
152
+ uint16_t tmp;
153
+ memcpy(&tmp, data, 2);
154
+ return bswap ? byteswap2(tmp) : tmp;
155
+ }
156
+
157
+ size_t sas_subheader_remainder(size_t len, size_t signature_len) {
158
+ return len - (4+2*signature_len);
159
+ }
160
+
161
+ readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo,
162
+ readstat_error_handler error_handler, void *user_ctx) {
163
+ sas_header_start_t header_start;
164
+ sas_header_end_t header_end;
165
+ int retval = READSTAT_OK;
166
+ char error_buf[1024];
167
+ time_t epoch = sas_epoch();
168
+
169
+ if (io->read(&header_start, sizeof(sas_header_start_t), io->io_ctx) < sizeof(sas_header_start_t)) {
170
+ retval = READSTAT_ERROR_READ;
171
+ goto cleanup;
172
+ }
173
+ if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 &&
174
+ memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) {
175
+ retval = READSTAT_ERROR_PARSE;
176
+ goto cleanup;
177
+ }
178
+ if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) {
179
+ hinfo->pad1 = 4;
180
+ }
181
+ if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) {
182
+ hinfo->u64 = 1;
183
+ }
184
+ int bswap = 0;
185
+ if (header_start.endian == SAS_ENDIAN_BIG) {
186
+ bswap = machine_is_little_endian();
187
+ hinfo->little_endian = 0;
188
+ } else if (header_start.endian == SAS_ENDIAN_LITTLE) {
189
+ bswap = !machine_is_little_endian();
190
+ hinfo->little_endian = 1;
191
+ } else {
192
+ retval = READSTAT_ERROR_PARSE;
193
+ goto cleanup;
194
+ }
195
+ int i;
196
+ for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) {
197
+ if (header_start.encoding == _charset_table[i].code) {
198
+ hinfo->encoding = _charset_table[i].name;
199
+ break;
200
+ }
201
+ }
202
+ if (hinfo->encoding == NULL) {
203
+ if (error_handler) {
204
+ snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d", header_start.encoding);
205
+ error_handler(error_buf, user_ctx);
206
+ }
207
+ retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
208
+ goto cleanup;
209
+ }
210
+ memcpy(hinfo->table_name, header_start.table_name, sizeof(header_start.table_name));
211
+ if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
212
+ retval = READSTAT_ERROR_SEEK;
213
+ goto cleanup;
214
+ }
215
+
216
+ double creation_time, modification_time, creation_time_diff, modification_time_diff;
217
+
218
+ if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) {
219
+ retval = READSTAT_ERROR_READ;
220
+ goto cleanup;
221
+ }
222
+ if (bswap)
223
+ creation_time = byteswap_double(creation_time);
224
+
225
+ if (io->read(&modification_time, sizeof(double), io->io_ctx) < sizeof(double)) {
226
+ retval = READSTAT_ERROR_READ;
227
+ goto cleanup;
228
+ }
229
+ if (bswap)
230
+ modification_time = byteswap_double(modification_time);
231
+
232
+ if (io->read(&creation_time_diff, sizeof(double), io->io_ctx) < sizeof(double)) {
233
+ retval = READSTAT_ERROR_READ;
234
+ goto cleanup;
235
+ }
236
+ if (bswap)
237
+ creation_time_diff = byteswap_double(creation_time_diff);
238
+
239
+ if (io->read(&modification_time_diff, sizeof(double), io->io_ctx) < sizeof(double)) {
240
+ retval = READSTAT_ERROR_READ;
241
+ goto cleanup;
242
+ }
243
+ if (bswap)
244
+ modification_time_diff = byteswap_double(modification_time_diff);
245
+
246
+ hinfo->creation_time = sas_convert_time(creation_time, creation_time_diff, epoch);
247
+ hinfo->modification_time = sas_convert_time(modification_time, modification_time_diff, epoch);
248
+
249
+ uint32_t header_size, page_size;
250
+
251
+ if (io->read(&header_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
252
+ retval = READSTAT_ERROR_READ;
253
+ goto cleanup;
254
+ }
255
+ if (io->read(&page_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
256
+ retval = READSTAT_ERROR_READ;
257
+ goto cleanup;
258
+ }
259
+
260
+ hinfo->header_size = bswap ? byteswap4(header_size) : header_size;
261
+ hinfo->page_size = bswap ? byteswap4(page_size) : page_size;
262
+
263
+ if (hinfo->header_size < 1024 || hinfo->page_size < 1024) {
264
+ retval = READSTAT_ERROR_PARSE;
265
+ goto cleanup;
266
+ }
267
+ if (hinfo->header_size > (1<<24) || hinfo->page_size > (1<<24)) {
268
+ retval = READSTAT_ERROR_PARSE;
269
+ goto cleanup;
270
+ }
271
+
272
+ if (hinfo->u64) {
273
+ hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_64BIT;
274
+ hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_64BIT;
275
+ } else {
276
+ hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_32BIT;
277
+ hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_32BIT;
278
+ }
279
+
280
+ if (hinfo->u64) {
281
+ uint64_t page_count;
282
+ if (io->read(&page_count, sizeof(uint64_t), io->io_ctx) < sizeof(uint64_t)) {
283
+ retval = READSTAT_ERROR_READ;
284
+ goto cleanup;
285
+ }
286
+ hinfo->page_count = bswap ? byteswap8(page_count) : page_count;
287
+ } else {
288
+ uint32_t page_count;
289
+ if (io->read(&page_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
290
+ retval = READSTAT_ERROR_READ;
291
+ goto cleanup;
292
+ }
293
+ hinfo->page_count = bswap ? byteswap4(page_count) : page_count;
294
+ }
295
+ if (hinfo->page_count > (1<<24)) {
296
+ retval = READSTAT_ERROR_PARSE;
297
+ goto cleanup;
298
+ }
299
+
300
+ if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
301
+ retval = READSTAT_ERROR_SEEK;
302
+ if (error_handler) {
303
+ snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d", 8);
304
+ error_handler(error_buf, user_ctx);
305
+ }
306
+ goto cleanup;
307
+ }
308
+ if (io->read(&header_end, sizeof(sas_header_end_t), io->io_ctx) < sizeof(sas_header_end_t)) {
309
+ retval = READSTAT_ERROR_READ;
310
+ goto cleanup;
311
+ }
312
+ char major;
313
+ int minor, revision;
314
+ if (sscanf(header_end.release, "%c.%04dM%1d", &major, &minor, &revision) != 3) {
315
+ retval = READSTAT_ERROR_PARSE;
316
+ goto cleanup;
317
+ }
318
+
319
+ if (major >= '1' && major <= '9') {
320
+ hinfo->major_version = major - '0';
321
+ } else if (major == 'V') {
322
+ // It appears that SAS Visual Forecaster reports the major version as "V"
323
+ // Treat it as version 9 for all intents and purposes
324
+ hinfo->major_version = 9;
325
+ } else {
326
+ retval = READSTAT_ERROR_PARSE;
327
+ goto cleanup;
328
+ }
329
+ hinfo->minor_version = minor;
330
+ hinfo->revision = revision;
331
+
332
+ if ((major == '8' || major == '9') && minor == 0 && revision == 0) {
333
+ /* A bit of a hack, but most SAS installations are running a minor update */
334
+ hinfo->vendor = READSTAT_VENDOR_STAT_TRANSFER;
335
+ } else {
336
+ hinfo->vendor = READSTAT_VENDOR_SAS;
337
+ }
338
+ if (io->seek(hinfo->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
339
+ retval = READSTAT_ERROR_SEEK;
340
+ if (error_handler) {
341
+ snprintf(error_buf, sizeof(error_buf),
342
+ "ReadStat: Failed to seek to position %" PRId64, hinfo->header_size);
343
+ error_handler(error_buf, user_ctx);
344
+ }
345
+ goto cleanup;
346
+ }
347
+
348
+ cleanup:
349
+ return retval;
350
+ }
351
+
352
+ readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t *hinfo, sas_header_start_t header_start) {
353
+ readstat_error_t retval = READSTAT_OK;
354
+ time_t epoch = sas_epoch();
355
+
356
+ memset(header_start.table_name, ' ', sizeof(header_start.table_name));
357
+
358
+ size_t table_name_len = strlen(writer->table_name);
359
+ if (table_name_len > sizeof(header_start.table_name))
360
+ table_name_len = sizeof(header_start.table_name);
361
+
362
+ if (table_name_len) {
363
+ memcpy(header_start.table_name, writer->table_name, table_name_len);
364
+ } else {
365
+ memcpy(header_start.table_name, "DATASET", sizeof("DATASET")-1);
366
+ }
367
+
368
+ retval = readstat_write_bytes(writer, &header_start, sizeof(sas_header_start_t));
369
+ if (retval != READSTAT_OK)
370
+ goto cleanup;
371
+
372
+ retval = readstat_write_zeros(writer, hinfo->pad1);
373
+ if (retval != READSTAT_OK)
374
+ goto cleanup;
375
+
376
+ double creation_time = hinfo->creation_time - epoch;
377
+
378
+ retval = readstat_write_bytes(writer, &creation_time, sizeof(double));
379
+ if (retval != READSTAT_OK)
380
+ goto cleanup;
381
+
382
+ double modification_time = hinfo->modification_time - epoch;
383
+
384
+ retval = readstat_write_bytes(writer, &modification_time, sizeof(double));
385
+ if (retval != READSTAT_OK)
386
+ goto cleanup;
387
+
388
+ retval = readstat_write_zeros(writer, 16);
389
+ if (retval != READSTAT_OK)
390
+ goto cleanup;
391
+
392
+ uint32_t header_size = hinfo->header_size;
393
+ uint32_t page_size = hinfo->page_size;
394
+
395
+ retval = readstat_write_bytes(writer, &header_size, sizeof(uint32_t));
396
+ if (retval != READSTAT_OK)
397
+ goto cleanup;
398
+
399
+ retval = readstat_write_bytes(writer, &page_size, sizeof(uint32_t));
400
+ if (retval != READSTAT_OK)
401
+ goto cleanup;
402
+
403
+ if (hinfo->u64) {
404
+ uint64_t page_count = hinfo->page_count;
405
+ retval = readstat_write_bytes(writer, &page_count, sizeof(uint64_t));
406
+ } else {
407
+ uint32_t page_count = hinfo->page_count;
408
+ retval = readstat_write_bytes(writer, &page_count, sizeof(uint32_t));
409
+ }
410
+ if (retval != READSTAT_OK)
411
+ goto cleanup;
412
+
413
+ retval = readstat_write_zeros(writer, 8);
414
+ if (retval != READSTAT_OK)
415
+ goto cleanup;
416
+
417
+ sas_header_end_t header_end = {
418
+ .host = "9.0401M6Linux"
419
+ };
420
+
421
+ char release[sizeof(header_end.release)+1] = { 0 };
422
+ snprintf(release, sizeof(release), "%1d.%04dM0", (unsigned int)writer->version % 10, 101);
423
+ memcpy(header_end.release, release, sizeof(header_end.release));
424
+
425
+ retval = readstat_write_bytes(writer, &header_end, sizeof(sas_header_end_t));
426
+ if (retval != READSTAT_OK)
427
+ goto cleanup;
428
+
429
+ retval = readstat_write_zeros(writer, hinfo->header_size-writer->bytes_written);
430
+ if (retval != READSTAT_OK)
431
+ goto cleanup;
432
+
433
+ cleanup:
434
+ return retval;
435
+ }
436
+
437
+ sas_header_info_t *sas_header_info_init(readstat_writer_t *writer, int is_64bit) {
438
+ sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t));
439
+ hinfo->creation_time = writer->timestamp;
440
+ hinfo->modification_time = writer->timestamp;
441
+ hinfo->page_size = SAS_DEFAULT_PAGE_SIZE;
442
+ hinfo->u64 = !!is_64bit;
443
+
444
+ if (hinfo->u64) {
445
+ hinfo->header_size = SAS_FILE_HEADER_SIZE_64BIT;
446
+ hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_64BIT;
447
+ hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_64BIT;
448
+ } else {
449
+ hinfo->header_size = SAS_FILE_HEADER_SIZE_32BIT;
450
+ hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_32BIT;
451
+ hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_32BIT;
452
+ }
453
+
454
+ return hinfo;
455
+ }
456
+
457
+ readstat_error_t sas_fill_page(readstat_writer_t *writer, sas_header_info_t *hinfo) {
458
+ if ((writer->bytes_written - hinfo->header_size) % hinfo->page_size) {
459
+ size_t num_zeros = (hinfo->page_size -
460
+ (writer->bytes_written - hinfo->header_size) % hinfo->page_size);
461
+ return readstat_write_zeros(writer, num_zeros);
462
+ }
463
+ return READSTAT_OK;
464
+ }
465
+
466
+ readstat_error_t sas_validate_name(const char *name, size_t max_len) {
467
+ int j;
468
+ for (j=0; name[j]; j++) {
469
+ if (name[j] != '_' &&
470
+ !(name[j] >= 'a' && name[j] <= 'z') &&
471
+ !(name[j] >= 'A' && name[j] <= 'Z') &&
472
+ !(name[j] >= '0' && name[j] <= '9')) {
473
+ return READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER;
474
+ }
475
+ }
476
+ char first_char = name[0];
477
+
478
+ if (!first_char)
479
+ return READSTAT_ERROR_NAME_IS_ZERO_LENGTH;
480
+
481
+ if (first_char != '_' &&
482
+ !(first_char >= 'a' && first_char <= 'z') &&
483
+ !(first_char >= 'A' && first_char <= 'Z')) {
484
+ return READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER;
485
+ }
486
+ if (strcmp(name, "_N_") == 0 || strcmp(name, "_ERROR_") == 0 ||
487
+ strcmp(name, "_NUMERIC_") == 0 || strcmp(name, "_CHARACTER_") == 0 ||
488
+ strcmp(name, "_ALL_") == 0) {
489
+ return READSTAT_ERROR_NAME_IS_RESERVED_WORD;
490
+ }
491
+
492
+ if (strlen(name) > max_len)
493
+ return READSTAT_ERROR_NAME_IS_TOO_LONG;
494
+
495
+ return READSTAT_OK;
496
+ }
497
+
498
+ readstat_error_t sas_validate_variable(const readstat_variable_t *variable) {
499
+ return sas_validate_name(readstat_variable_get_name(variable), 32);
500
+ }
501
+
502
+ readstat_error_t sas_validate_tag(char tag) {
503
+ if (tag == '_' || (tag >= 'A' && tag <= 'Z'))
504
+ return READSTAT_OK;
505
+
506
+ return READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE;
507
+ }
508
+
509
+ void sas_assign_tag(readstat_value_t *value, uint8_t tag) {
510
+ /* We accommodate two tag schemes. In the first, the tag is an ASCII code
511
+ * given by uint8_t tag above. System missing is represented by an ASCII
512
+ * period. In the second scheme, (tag-2) is an offset from 'A', except when
513
+ * tag == 0, in which case it represents an underscore, or tag == 1, in
514
+ * which case it represents system-missing.
515
+ */
516
+ if (tag == 0) {
517
+ tag = '_';
518
+ } else if (tag >= 2 && tag < 28) {
519
+ tag = 'A' + (tag - 2);
520
+ }
521
+ if (sas_validate_tag(tag) == READSTAT_OK) {
522
+ value->tag = tag;
523
+ value->is_tagged_missing = 1;
524
+ } else {
525
+ value->tag = 0;
526
+ value->is_system_missing = 1;
527
+ }
528
+ }
@@ -0,0 +1,131 @@
1
+
2
+ #include "../readstat.h"
3
+ #include "../readstat_bits.h"
4
+
5
+ #pragma pack(push, 1)
6
+
7
+ typedef struct sas_header_start_s {
8
+ unsigned char magic[32];
9
+ unsigned char a2;
10
+ unsigned char mystery1[2];
11
+ unsigned char a1;
12
+ unsigned char mystery2[1];
13
+ unsigned char endian;
14
+ unsigned char mystery3[1];
15
+ char file_format;
16
+ unsigned char mystery4[30];
17
+ unsigned char encoding;
18
+ unsigned char mystery5[13];
19
+ char file_type[8];
20
+ char table_name[32];
21
+ unsigned char mystery6[32];
22
+ char file_info[8];
23
+ } sas_header_start_t;
24
+
25
+ typedef struct sas_header_end_s {
26
+ char release[8];
27
+ char host[16];
28
+ char version[16];
29
+ char os_vendor[16];
30
+ char os_name[16];
31
+ char extra[48];
32
+ } sas_header_end_t;
33
+
34
+ #pragma pack(pop)
35
+
36
+ typedef struct sas_header_info_s {
37
+ int little_endian;
38
+ int u64;
39
+ int vendor;
40
+ int major_version;
41
+ int minor_version;
42
+ int revision;
43
+ int pad1;
44
+ int64_t page_size;
45
+ int64_t page_header_size;
46
+ int64_t subheader_pointer_size;
47
+ int64_t page_count;
48
+ int64_t header_size;
49
+ time_t creation_time;
50
+ time_t modification_time;
51
+ char table_name[32];
52
+ char file_label[256];
53
+ char *encoding;
54
+ } sas_header_info_t;
55
+
56
+ enum {
57
+ READSTAT_VENDOR_STAT_TRANSFER,
58
+ READSTAT_VENDOR_SAS
59
+ };
60
+
61
+ typedef struct sas_text_ref_s {
62
+ uint16_t index;
63
+ uint16_t offset;
64
+ uint16_t length;
65
+ } sas_text_ref_t;
66
+
67
+ #define SAS_ENDIAN_BIG 0x00
68
+ #define SAS_ENDIAN_LITTLE 0x01
69
+
70
+ #define SAS_FILE_FORMAT_UNIX '1'
71
+ #define SAS_FILE_FORMAT_WINDOWS '2'
72
+
73
+ #define SAS_ALIGNMENT_OFFSET_0 0x22
74
+ #define SAS_ALIGNMENT_OFFSET_4 0x33
75
+
76
+ #define SAS_COLUMN_TYPE_NUM 0x01
77
+ #define SAS_COLUMN_TYPE_CHR 0x02
78
+
79
+ #define SAS_SUBHEADER_SIGNATURE_ROW_SIZE 0xF7F7F7F7
80
+ #define SAS_SUBHEADER_SIGNATURE_COLUMN_SIZE 0xF6F6F6F6
81
+ #define SAS_SUBHEADER_SIGNATURE_COUNTS 0xFFFFFC00
82
+ #define SAS_SUBHEADER_SIGNATURE_COLUMN_FORMAT 0xFFFFFBFE
83
+
84
+ #define SAS_SUBHEADER_SIGNATURE_COLUMN_MASK 0xFFFFFFF8
85
+ /* Seen in the wild: FA (unknown), F8 (locale?) */
86
+
87
+ #define SAS_SUBHEADER_SIGNATURE_COLUMN_ATTRS 0xFFFFFFFC
88
+ #define SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT 0xFFFFFFFD
89
+ #define SAS_SUBHEADER_SIGNATURE_COLUMN_LIST 0xFFFFFFFE
90
+ #define SAS_SUBHEADER_SIGNATURE_COLUMN_NAME 0xFFFFFFFF
91
+
92
+ #define SAS_PAGE_TYPE_META 0x0000
93
+ #define SAS_PAGE_TYPE_DATA 0x0100
94
+ #define SAS_PAGE_TYPE_MIX 0x0200
95
+ #define SAS_PAGE_TYPE_AMD 0x0400
96
+ #define SAS_PAGE_TYPE_MASK 0x0F00
97
+
98
+ #define SAS_PAGE_TYPE_META2 0x4000
99
+ #define SAS_PAGE_TYPE_COMP 0x9000
100
+
101
+ #define SAS_SUBHEADER_POINTER_SIZE_32BIT 12
102
+ #define SAS_SUBHEADER_POINTER_SIZE_64BIT 24
103
+
104
+ #define SAS_PAGE_HEADER_SIZE_32BIT 24
105
+ #define SAS_PAGE_HEADER_SIZE_64BIT 40
106
+
107
+ #define SAS_COMPRESSION_NONE 0x00
108
+ #define SAS_COMPRESSION_TRUNC 0x01
109
+ #define SAS_COMPRESSION_ROW 0x04
110
+
111
+ #define SAS_COMPRESSION_SIGNATURE_RLE "SASYZCRL"
112
+ #define SAS_COMPRESSION_SIGNATURE_RDC "SASYZCR2"
113
+
114
+ #define SAS_DEFAULT_FILE_VERSION 9
115
+
116
+ extern unsigned char sas7bdat_magic_number[32];
117
+ extern unsigned char sas7bcat_magic_number[32];
118
+
119
+ uint64_t sas_read8(const char *data, int bswap);
120
+ uint32_t sas_read4(const char *data, int bswap);
121
+ uint16_t sas_read2(const char *data, int bswap);
122
+ readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *ctx, readstat_error_handler error_handler, void *user_ctx);
123
+ size_t sas_subheader_remainder(size_t len, size_t signature_len);
124
+
125
+ sas_header_info_t *sas_header_info_init(readstat_writer_t *writer, int is_64bit);
126
+ readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t *hinfo, sas_header_start_t header_start);
127
+ readstat_error_t sas_fill_page(readstat_writer_t *writer, sas_header_info_t *hinfo);
128
+ readstat_error_t sas_validate_variable(const readstat_variable_t *variable);
129
+ readstat_error_t sas_validate_name(const char *name, size_t max_len);
130
+ readstat_error_t sas_validate_tag(char tag);
131
+ void sas_assign_tag(readstat_value_t *value, uint8_t tag);