js-stream-sas7bdat 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/binding.gyp +58 -0
  2. package/package.json +4 -2
  3. package/src/binding/ReadStat/LICENSE +19 -0
  4. package/src/binding/ReadStat/README.md +483 -0
  5. package/src/binding/ReadStat/src/CKHashTable.c +309 -0
  6. package/src/binding/ReadStat/src/CKHashTable.h +37 -0
  7. package/src/binding/ReadStat/src/readstat.h +627 -0
  8. package/src/binding/ReadStat/src/readstat_bits.c +69 -0
  9. package/src/binding/ReadStat/src/readstat_bits.h +20 -0
  10. package/src/binding/ReadStat/src/readstat_convert.c +36 -0
  11. package/src/binding/ReadStat/src/readstat_convert.h +2 -0
  12. package/src/binding/ReadStat/src/readstat_error.c +126 -0
  13. package/src/binding/ReadStat/src/readstat_iconv.h +15 -0
  14. package/src/binding/ReadStat/src/readstat_io_unistd.c +147 -0
  15. package/src/binding/ReadStat/src/readstat_io_unistd.h +11 -0
  16. package/src/binding/ReadStat/src/readstat_malloc.c +34 -0
  17. package/src/binding/ReadStat/src/readstat_malloc.h +4 -0
  18. package/src/binding/ReadStat/src/readstat_metadata.c +53 -0
  19. package/src/binding/ReadStat/src/readstat_parser.c +121 -0
  20. package/src/binding/ReadStat/src/readstat_strings.h +6 -0
  21. package/src/binding/ReadStat/src/readstat_value.c +178 -0
  22. package/src/binding/ReadStat/src/readstat_variable.c +123 -0
  23. package/src/binding/ReadStat/src/readstat_writer.c +677 -0
  24. package/src/binding/ReadStat/src/readstat_writer.h +21 -0
  25. package/src/binding/ReadStat/src/sas/ieee.c +420 -0
  26. package/src/binding/ReadStat/src/sas/ieee.h +6 -0
  27. package/src/binding/ReadStat/src/sas/readstat_sas.c +528 -0
  28. package/src/binding/ReadStat/src/sas/readstat_sas.h +131 -0
  29. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_read.c +515 -0
  30. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_write.c +218 -0
  31. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_read.c +1304 -0
  32. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_write.c +812 -0
  33. package/src/binding/ReadStat/src/sas/readstat_sas_rle.c +286 -0
  34. package/src/binding/ReadStat/src/sas/readstat_sas_rle.h +8 -0
  35. package/src/binding/ReadStat/src/sas/readstat_xport.c +28 -0
  36. package/src/binding/ReadStat/src/sas/readstat_xport.h +47 -0
  37. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.c +265 -0
  38. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.h +4 -0
  39. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.rl +68 -0
  40. package/src/binding/ReadStat/src/sas/readstat_xport_read.c +777 -0
  41. package/src/binding/ReadStat/src/sas/readstat_xport_write.c +561 -0
  42. package/src/binding/readstat_binding.cc +393 -0
@@ -0,0 +1,627 @@
1
+ //
2
+ // readstat.h - API and internal data structures for ReadStat
3
+ //
4
+ // Copyright Evan Miller and ReadStat authors (see LICENSE)
5
+ //
6
+
7
+ #ifndef INCLUDE_READSTAT_H
8
+ #define INCLUDE_READSTAT_H
9
+
10
+ #ifdef __cplusplus
11
+ extern "C" {
12
+ #endif
13
+
14
+ #include <stdint.h>
15
+ #include <sys/types.h>
16
+ #include <string.h>
17
+ #include <math.h>
18
+ #include <stdio.h>
19
+
20
+ enum {
21
+ READSTAT_HANDLER_OK,
22
+ READSTAT_HANDLER_ABORT,
23
+ READSTAT_HANDLER_SKIP_VARIABLE
24
+ };
25
+
26
+ typedef enum readstat_type_e {
27
+ READSTAT_TYPE_STRING,
28
+ READSTAT_TYPE_INT8,
29
+ READSTAT_TYPE_INT16,
30
+ READSTAT_TYPE_INT32,
31
+ READSTAT_TYPE_FLOAT,
32
+ READSTAT_TYPE_DOUBLE,
33
+ READSTAT_TYPE_STRING_REF
34
+ } readstat_type_t;
35
+
36
+ typedef enum readstat_type_class_e {
37
+ READSTAT_TYPE_CLASS_STRING,
38
+ READSTAT_TYPE_CLASS_NUMERIC
39
+ } readstat_type_class_t;
40
+
41
+ typedef enum readstat_measure_e {
42
+ READSTAT_MEASURE_UNKNOWN,
43
+ READSTAT_MEASURE_NOMINAL = 1,
44
+ READSTAT_MEASURE_ORDINAL,
45
+ READSTAT_MEASURE_SCALE
46
+ } readstat_measure_t;
47
+
48
+ typedef enum readstat_alignment_e {
49
+ READSTAT_ALIGNMENT_UNKNOWN,
50
+ READSTAT_ALIGNMENT_LEFT = 1,
51
+ READSTAT_ALIGNMENT_CENTER,
52
+ READSTAT_ALIGNMENT_RIGHT
53
+ } readstat_alignment_t;
54
+
55
+ typedef enum readstat_compress_e {
56
+ READSTAT_COMPRESS_NONE,
57
+ READSTAT_COMPRESS_ROWS,
58
+ READSTAT_COMPRESS_BINARY
59
+ } readstat_compress_t;
60
+
61
+ typedef enum readstat_endian_e {
62
+ READSTAT_ENDIAN_NONE,
63
+ READSTAT_ENDIAN_LITTLE,
64
+ READSTAT_ENDIAN_BIG
65
+ } readstat_endian_t;
66
+
67
+ typedef enum readstat_error_e {
68
+ READSTAT_OK,
69
+ READSTAT_ERROR_OPEN = 1,
70
+ READSTAT_ERROR_READ,
71
+ READSTAT_ERROR_MALLOC,
72
+ READSTAT_ERROR_USER_ABORT,
73
+ READSTAT_ERROR_PARSE,
74
+ READSTAT_ERROR_UNSUPPORTED_COMPRESSION,
75
+ READSTAT_ERROR_UNSUPPORTED_CHARSET,
76
+ READSTAT_ERROR_COLUMN_COUNT_MISMATCH,
77
+ READSTAT_ERROR_ROW_COUNT_MISMATCH,
78
+ READSTAT_ERROR_ROW_WIDTH_MISMATCH,
79
+ READSTAT_ERROR_BAD_FORMAT_STRING,
80
+ READSTAT_ERROR_VALUE_TYPE_MISMATCH,
81
+ READSTAT_ERROR_WRITE,
82
+ READSTAT_ERROR_WRITER_NOT_INITIALIZED,
83
+ READSTAT_ERROR_SEEK,
84
+ READSTAT_ERROR_CONVERT,
85
+ READSTAT_ERROR_CONVERT_BAD_STRING,
86
+ READSTAT_ERROR_CONVERT_SHORT_STRING,
87
+ READSTAT_ERROR_CONVERT_LONG_STRING,
88
+ READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE,
89
+ READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE,
90
+ READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG,
91
+ READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED,
92
+ READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION,
93
+ READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER,
94
+ READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER,
95
+ READSTAT_ERROR_NAME_IS_RESERVED_WORD,
96
+ READSTAT_ERROR_NAME_IS_TOO_LONG,
97
+ READSTAT_ERROR_BAD_TIMESTAMP_STRING,
98
+ READSTAT_ERROR_BAD_FREQUENCY_WEIGHT,
99
+ READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS,
100
+ READSTAT_ERROR_NOTE_IS_TOO_LONG,
101
+ READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED,
102
+ READSTAT_ERROR_STRING_REF_IS_REQUIRED,
103
+ READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE,
104
+ READSTAT_ERROR_TOO_FEW_COLUMNS,
105
+ READSTAT_ERROR_TOO_MANY_COLUMNS,
106
+ READSTAT_ERROR_NAME_IS_ZERO_LENGTH,
107
+ READSTAT_ERROR_BAD_TIMESTAMP_VALUE,
108
+ READSTAT_ERROR_BAD_MR_STRING
109
+ } readstat_error_t;
110
+
111
+ const char *readstat_error_message(readstat_error_t error_code);
112
+
113
+ typedef struct mr_set_s {
114
+ char type;
115
+ char *name;
116
+ char *label;
117
+ int is_dichotomy;
118
+ int counted_value;
119
+ char **subvariables;
120
+ int num_subvars;
121
+ } mr_set_t;
122
+
123
+ typedef struct readstat_metadata_s {
124
+ int64_t row_count;
125
+ int64_t var_count;
126
+ time_t creation_time;
127
+ time_t modified_time;
128
+ int64_t file_format_version;
129
+ readstat_compress_t compression;
130
+ readstat_endian_t endianness;
131
+ const char *table_name;
132
+ const char *file_label;
133
+ const char *file_encoding;
134
+ unsigned int is64bit:1;
135
+ size_t multiple_response_sets_length;
136
+ mr_set_t *mr_sets;
137
+ } readstat_metadata_t;
138
+
139
+ /* If the row count is unknown (e.g. it's an XPORT or POR file, or an SAV
140
+ * file created with non-conforming software), then readstat_get_row_count
141
+ * returns -1.
142
+ */
143
+ int readstat_get_row_count(readstat_metadata_t *metadata);
144
+ int readstat_get_var_count(readstat_metadata_t *metadata);
145
+ time_t readstat_get_creation_time(readstat_metadata_t *metadata);
146
+ time_t readstat_get_modified_time(readstat_metadata_t *metadata);
147
+ int readstat_get_file_format_version(readstat_metadata_t *metadata);
148
+ int readstat_get_file_format_is_64bit(readstat_metadata_t *metadata);
149
+ readstat_compress_t readstat_get_compression(readstat_metadata_t *metadata);
150
+ readstat_endian_t readstat_get_endianness(readstat_metadata_t *metadata);
151
+ const char *readstat_get_table_name(readstat_metadata_t *metadata);
152
+ const char *readstat_get_file_label(readstat_metadata_t *metadata);
153
+ const char *readstat_get_file_encoding(readstat_metadata_t *metadata);
154
+ const mr_set_t *readstat_get_multiple_response_sets(readstat_metadata_t *metadata);
155
+ size_t readstat_get_multiple_response_sets_length(readstat_metadata_t *metadata);
156
+
157
+ typedef struct readstat_value_s {
158
+ union {
159
+ float float_value;
160
+ double double_value;
161
+ int8_t i8_value;
162
+ int16_t i16_value;
163
+ int32_t i32_value;
164
+ const char *string_value;
165
+ } v;
166
+ readstat_type_t type;
167
+ char tag;
168
+ unsigned int is_system_missing:1;
169
+ unsigned int is_tagged_missing:1;
170
+ } readstat_value_t;
171
+
172
+ /* Internal data structures */
173
+ typedef struct readstat_value_label_s {
174
+ double double_key;
175
+ int32_t int32_key;
176
+ char tag;
177
+
178
+ char *string_key;
179
+ size_t string_key_len;
180
+
181
+ char *label;
182
+ size_t label_len;
183
+ } readstat_value_label_t;
184
+
185
+ typedef struct readstat_label_set_s {
186
+ readstat_type_t type;
187
+ char name[256];
188
+
189
+ readstat_value_label_t *value_labels;
190
+ long value_labels_count;
191
+ long value_labels_capacity;
192
+
193
+ void *variables;
194
+ long variables_count;
195
+ long variables_capacity;
196
+ } readstat_label_set_t;
197
+
198
+ typedef struct readstat_missingness_s {
199
+ readstat_value_t missing_ranges[32];
200
+ long missing_ranges_count;
201
+ } readstat_missingness_t;
202
+
203
+ typedef struct readstat_variable_s {
204
+ readstat_type_t type;
205
+ int index;
206
+ char name[300];
207
+ char format[256];
208
+ char label[1024];
209
+ readstat_label_set_t *label_set;
210
+ off_t offset;
211
+ size_t storage_width;
212
+ size_t user_width;
213
+ readstat_missingness_t missingness;
214
+ readstat_measure_t measure;
215
+ readstat_alignment_t alignment;
216
+ int display_width;
217
+ int decimals;
218
+ int skip;
219
+ int index_after_skipping;
220
+ } readstat_variable_t;
221
+
222
+ typedef struct readstat_schema_entry_s {
223
+ uint32_t row;
224
+ uint32_t col;
225
+ uint32_t len;
226
+ int skip;
227
+ readstat_variable_t variable;
228
+ char labelset[32];
229
+ char decimal_separator;
230
+ } readstat_schema_entry_t;
231
+
232
+ typedef struct readstat_schema_s {
233
+ char filename[255];
234
+ uint32_t rows_per_observation;
235
+ uint32_t cols_per_observation;
236
+ int first_line;
237
+ int entry_count;
238
+ char field_delimiter;
239
+ readstat_schema_entry_t *entries;
240
+ } readstat_schema_t;
241
+
242
+ /* Value accessors */
243
+ readstat_type_t readstat_value_type(readstat_value_t value);
244
+ readstat_type_class_t readstat_value_type_class(readstat_value_t value);
245
+
246
+ /* Values can be missing in one of three ways:
247
+ * 1. "System missing", delivered to value handlers as NaN. Occurs in all file
248
+ * types. The most common kind of missing value.
249
+ * 2. Tagged missing, also delivered as NaN, but with a single character tag
250
+ * accessible via readstat_value_tag(). The tag might be 'a', 'b', etc,
251
+ * corresponding to Stata's .a, .b, values etc. Occurs only in Stata and
252
+ * SAS files.
253
+ * 3. Defined missing. The value is a real number but is to be treated as
254
+ * missing according to the variable's missingness rules (such as "value < 0 ||
255
+ * value == 999"). Occurs only in SPSS files. access the rules via:
256
+ *
257
+ * readstat_variable_get_missing_ranges_count()
258
+ * readstat_variable_get_missing_range_lo()
259
+ * readstat_variable_get_missing_range_hi()
260
+ *
261
+ * Note that "ranges" include individual values where lo == hi.
262
+ *
263
+ * readstat_value_is_missing() is equivalent to:
264
+ *
265
+ * (readstat_value_is_system_missing()
266
+ * || readstat_value_is_tagged_missing()
267
+ * || readstat_value_is_defined_missing())
268
+ */
269
+ int readstat_value_is_missing(readstat_value_t value, readstat_variable_t *variable);
270
+ int readstat_value_is_system_missing(readstat_value_t value);
271
+ int readstat_value_is_tagged_missing(readstat_value_t value);
272
+ int readstat_value_is_defined_missing(readstat_value_t value, readstat_variable_t *variable);
273
+ char readstat_value_tag(readstat_value_t value);
274
+
275
+ char readstat_int8_value(readstat_value_t value);
276
+ int16_t readstat_int16_value(readstat_value_t value);
277
+ int32_t readstat_int32_value(readstat_value_t value);
278
+ float readstat_float_value(readstat_value_t value);
279
+ double readstat_double_value(readstat_value_t value);
280
+ const char *readstat_string_value(readstat_value_t value);
281
+
282
+ readstat_type_class_t readstat_type_class(readstat_type_t type);
283
+
284
+ /* Accessor methods for use inside variable handlers */
285
+ int readstat_variable_get_index(const readstat_variable_t *variable);
286
+ int readstat_variable_get_index_after_skipping(const readstat_variable_t *variable);
287
+ const char *readstat_variable_get_name(const readstat_variable_t *variable);
288
+ const char *readstat_variable_get_label(const readstat_variable_t *variable);
289
+ const char *readstat_variable_get_format(const readstat_variable_t *variable);
290
+ readstat_type_t readstat_variable_get_type(const readstat_variable_t *variable);
291
+ readstat_type_class_t readstat_variable_get_type_class(const readstat_variable_t *variable);
292
+ size_t readstat_variable_get_storage_width(const readstat_variable_t *variable);
293
+ int readstat_variable_get_display_width(const readstat_variable_t *variable);
294
+ readstat_measure_t readstat_variable_get_measure(const readstat_variable_t *variable);
295
+ readstat_alignment_t readstat_variable_get_alignment(const readstat_variable_t *variable);
296
+
297
+ int readstat_variable_get_missing_ranges_count(const readstat_variable_t *variable);
298
+ readstat_value_t readstat_variable_get_missing_range_lo(const readstat_variable_t *variable, int i);
299
+ readstat_value_t readstat_variable_get_missing_range_hi(const readstat_variable_t *variable, int i);
300
+
301
+ /* Callbacks should return 0 (aka READSTAT_HANDLER_OK) on success and 1 (aka READSTAT_HANDLER_ABORT) to abort. */
302
+ /* If the variable handler returns READSTAT_HANDLER_SKIP_VARIABLE, the value handler will not be called on
303
+ * the associated variable. (Note that subsequent variables will retain their original index values.)
304
+ */
305
+ typedef int (*readstat_metadata_handler)(readstat_metadata_t *metadata, void *ctx);
306
+ typedef int (*readstat_note_handler)(int note_index, const char *note, void *ctx);
307
+ typedef int (*readstat_variable_handler)(int index, readstat_variable_t *variable,
308
+ const char *val_labels, void *ctx);
309
+ typedef int (*readstat_fweight_handler)(readstat_variable_t *variable, void *ctx);
310
+ typedef int (*readstat_value_handler)(int obs_index, readstat_variable_t *variable,
311
+ readstat_value_t value, void *ctx);
312
+ typedef int (*readstat_value_label_handler)(const char *val_labels,
313
+ readstat_value_t value, const char *label, void *ctx);
314
+ typedef void (*readstat_error_handler)(const char *error_message, void *ctx);
315
+ typedef int (*readstat_progress_handler)(double progress, void *ctx);
316
+
317
+ #if defined(_MSC_VER)
318
+ #include <BaseTsd.h>
319
+ typedef SSIZE_T ssize_t;
320
+ typedef __int64 readstat_off_t;
321
+ #elif defined _WIN32 || defined __CYGWIN__
322
+ typedef _off64_t readstat_off_t;
323
+ #elif defined _AIX
324
+ typedef off64_t readstat_off_t;
325
+ #else
326
+ typedef off_t readstat_off_t;
327
+ #endif
328
+
329
+ typedef enum readstat_io_flags_e {
330
+ READSTAT_SEEK_SET,
331
+ READSTAT_SEEK_CUR,
332
+ READSTAT_SEEK_END
333
+ } readstat_io_flags_t;
334
+
335
+ typedef int (*readstat_open_handler)(const char *path, void *io_ctx);
336
+ typedef int (*readstat_close_handler)(void *io_ctx);
337
+ typedef readstat_off_t (*readstat_seek_handler)(readstat_off_t offset, readstat_io_flags_t whence, void *io_ctx);
338
+ typedef ssize_t (*readstat_read_handler)(void *buf, size_t nbyte, void *io_ctx);
339
+ typedef readstat_error_t (*readstat_update_handler)(long file_size, readstat_progress_handler progress_handler, void *user_ctx, void *io_ctx);
340
+
341
+ typedef struct readstat_io_s {
342
+ readstat_open_handler open;
343
+ readstat_close_handler close;
344
+ readstat_seek_handler seek;
345
+ readstat_read_handler read;
346
+ readstat_update_handler update;
347
+ void *io_ctx;
348
+ int io_ctx_needs_free;
349
+ } readstat_io_t;
350
+
351
+ typedef struct readstat_callbacks_s {
352
+ readstat_metadata_handler metadata;
353
+ readstat_note_handler note;
354
+ readstat_variable_handler variable;
355
+ readstat_fweight_handler fweight;
356
+ readstat_value_handler value;
357
+ readstat_value_label_handler value_label;
358
+ readstat_error_handler error;
359
+ readstat_progress_handler progress;
360
+ } readstat_callbacks_t;
361
+
362
+ typedef struct readstat_parser_s {
363
+ readstat_callbacks_t handlers;
364
+ readstat_io_t *io;
365
+ const char *input_encoding;
366
+ const char *output_encoding;
367
+ long row_limit;
368
+ long row_offset;
369
+ } readstat_parser_t;
370
+
371
+ readstat_parser_t *readstat_parser_init(void);
372
+ void readstat_parser_free(readstat_parser_t *parser);
373
+ void readstat_io_free(readstat_io_t *io);
374
+
375
+ readstat_error_t readstat_set_metadata_handler(readstat_parser_t *parser, readstat_metadata_handler metadata_handler);
376
+ readstat_error_t readstat_set_note_handler(readstat_parser_t *parser, readstat_note_handler note_handler);
377
+ readstat_error_t readstat_set_variable_handler(readstat_parser_t *parser, readstat_variable_handler variable_handler);
378
+ readstat_error_t readstat_set_fweight_handler(readstat_parser_t *parser, readstat_fweight_handler fweight_handler);
379
+ readstat_error_t readstat_set_value_handler(readstat_parser_t *parser, readstat_value_handler value_handler);
380
+ readstat_error_t readstat_set_value_label_handler(readstat_parser_t *parser, readstat_value_label_handler value_label_handler);
381
+ readstat_error_t readstat_set_error_handler(readstat_parser_t *parser, readstat_error_handler error_handler);
382
+ readstat_error_t readstat_set_progress_handler(readstat_parser_t *parser, readstat_progress_handler progress_handler);
383
+
384
+ readstat_error_t readstat_set_open_handler(readstat_parser_t *parser, readstat_open_handler open_handler);
385
+ readstat_error_t readstat_set_close_handler(readstat_parser_t *parser, readstat_close_handler close_handler);
386
+ readstat_error_t readstat_set_seek_handler(readstat_parser_t *parser, readstat_seek_handler seek_handler);
387
+ readstat_error_t readstat_set_read_handler(readstat_parser_t *parser, readstat_read_handler read_handler);
388
+ readstat_error_t readstat_set_update_handler(readstat_parser_t *parser, readstat_update_handler update_handler);
389
+ readstat_error_t readstat_set_io_ctx(readstat_parser_t *parser, void *io_ctx);
390
+
391
+ // Usually inferred from the file, but sometimes a manual override is desirable.
392
+ // In particular, pre-14 Stata uses the system encoding, which is usually Win 1252
393
+ // but could be anything. `encoding' should be an iconv-compatible name.
394
+ readstat_error_t readstat_set_file_character_encoding(readstat_parser_t *parser, const char *encoding);
395
+
396
+ // Defaults to UTF-8. Pass in NULL to disable transliteration.
397
+ readstat_error_t readstat_set_handler_character_encoding(readstat_parser_t *parser, const char *encoding);
398
+
399
+ readstat_error_t readstat_set_row_limit(readstat_parser_t *parser, long row_limit);
400
+ readstat_error_t readstat_set_row_offset(readstat_parser_t *parser, long row_offset);
401
+
402
+ /* Parse binary / portable files */
403
+ readstat_error_t readstat_parse_dta(readstat_parser_t *parser, const char *path, void *user_ctx);
404
+ readstat_error_t readstat_parse_sav(readstat_parser_t *parser, const char *path, void *user_ctx);
405
+ readstat_error_t readstat_parse_por(readstat_parser_t *parser, const char *path, void *user_ctx);
406
+ readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char *path, void *user_ctx);
407
+ readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *path, void *user_ctx);
408
+ readstat_error_t readstat_parse_xport(readstat_parser_t *parser, const char *path, void *user_ctx);
409
+
410
+ /* Parse a schema file... */
411
+ readstat_schema_t *readstat_parse_sas_commands(readstat_parser_t *parser,
412
+ const char *filepath, void *user_ctx, readstat_error_t *outError);
413
+ readstat_schema_t *readstat_parse_spss_commands(readstat_parser_t *parser,
414
+ const char *filepath, void *user_ctx, readstat_error_t *outError);
415
+ readstat_schema_t *readstat_parse_stata_dictionary(readstat_parser_t *parser,
416
+ const char *filepath, void *user_ctx, readstat_error_t *outError);
417
+
418
+ /* ... then pass the schema to the plain-text parser ... */
419
+ readstat_error_t readstat_parse_txt(readstat_parser_t *parser, const char *filename,
420
+ readstat_schema_t *schema, void *user_ctx);
421
+
422
+ /* ... and free the schema structure */
423
+ void readstat_schema_free(readstat_schema_t *schema);
424
+
425
+ /* Internal module callbacks */
426
+ typedef struct readstat_string_ref_s {
427
+ int64_t first_v;
428
+ int64_t first_o;
429
+ size_t len;
430
+ char data[1]; // Flexible array; using [1] for C++98 compatibility
431
+ } readstat_string_ref_t;
432
+
433
+ typedef size_t (*readstat_variable_width_callback)(readstat_type_t type, size_t user_width);
434
+ typedef readstat_error_t (*readstat_variable_ok_callback)(const readstat_variable_t *variable);
435
+
436
+ typedef readstat_error_t (*readstat_write_int8_callback)(void *row_data, const readstat_variable_t *variable, int8_t value);
437
+ typedef readstat_error_t (*readstat_write_int16_callback)(void *row_data, const readstat_variable_t *variable, int16_t value);
438
+ typedef readstat_error_t (*readstat_write_int32_callback)(void *row_data, const readstat_variable_t *variable, int32_t value);
439
+ typedef readstat_error_t (*readstat_write_float_callback)(void *row_data, const readstat_variable_t *variable, float value);
440
+ typedef readstat_error_t (*readstat_write_double_callback)(void *row_data, const readstat_variable_t *variable, double value);
441
+ typedef readstat_error_t (*readstat_write_string_callback)(void *row_data, const readstat_variable_t *variable, const char *value);
442
+ typedef readstat_error_t (*readstat_write_string_ref_callback)(void *row_data, const readstat_variable_t *variable, readstat_string_ref_t *ref);
443
+ typedef readstat_error_t (*readstat_write_missing_callback)(void *row_data, const readstat_variable_t *variable);
444
+ typedef readstat_error_t (*readstat_write_tagged_callback)(void *row_data, const readstat_variable_t *variable, char tag);
445
+
446
+ typedef readstat_error_t (*readstat_begin_data_callback)(void *writer);
447
+ typedef readstat_error_t (*readstat_write_row_callback)(void *writer, void *row_data, size_t row_len);
448
+ typedef readstat_error_t (*readstat_end_data_callback)(void *writer);
449
+ typedef void (*readstat_module_ctx_free_callback)(void *module_ctx);
450
+ typedef readstat_error_t (*readstat_metadata_ok_callback)(void *writer);
451
+
452
+ typedef struct readstat_writer_callbacks_s {
453
+ readstat_variable_width_callback variable_width;
454
+ readstat_variable_ok_callback variable_ok;
455
+ readstat_write_int8_callback write_int8;
456
+ readstat_write_int16_callback write_int16;
457
+ readstat_write_int32_callback write_int32;
458
+ readstat_write_float_callback write_float;
459
+ readstat_write_double_callback write_double;
460
+ readstat_write_string_callback write_string;
461
+ readstat_write_string_ref_callback write_string_ref;
462
+ readstat_write_missing_callback write_missing_string;
463
+ readstat_write_missing_callback write_missing_number;
464
+ readstat_write_tagged_callback write_missing_tagged;
465
+ readstat_begin_data_callback begin_data;
466
+ readstat_write_row_callback write_row;
467
+ readstat_end_data_callback end_data;
468
+ readstat_module_ctx_free_callback module_ctx_free;
469
+ readstat_metadata_ok_callback metadata_ok;
470
+ } readstat_writer_callbacks_t;
471
+
472
+ /* You'll need to define one of these to get going. Should return # bytes written,
473
+ * or -1 on error, a la write(2) */
474
+ typedef ssize_t (*readstat_data_writer)(const void *data, size_t len, void *ctx);
475
+
476
+ typedef struct readstat_writer_s {
477
+ readstat_data_writer data_writer;
478
+ size_t bytes_written;
479
+ long version;
480
+ int is_64bit; // SAS only
481
+ readstat_compress_t compression;
482
+ time_t timestamp;
483
+
484
+ readstat_variable_t **variables;
485
+ long variables_count;
486
+ long variables_capacity;
487
+
488
+ readstat_label_set_t **label_sets;
489
+ long label_sets_count;
490
+ long label_sets_capacity;
491
+
492
+ char **notes;
493
+ long notes_count;
494
+ long notes_capacity;
495
+
496
+ readstat_string_ref_t **string_refs;
497
+ long string_refs_count;
498
+ long string_refs_capacity;
499
+
500
+ unsigned char *row;
501
+ size_t row_len;
502
+
503
+ int row_count;
504
+ int current_row;
505
+ char file_label[257];
506
+ char table_name[33];
507
+ const readstat_variable_t *fweight_variable;
508
+
509
+ readstat_writer_callbacks_t callbacks;
510
+ readstat_error_handler error_handler;
511
+
512
+ void *module_ctx;
513
+ void *user_ctx;
514
+
515
+ int initialized;
516
+ } readstat_writer_t;
517
+
518
+ /* Writer API */
519
+
520
+
521
+ // First call this...
522
+ readstat_writer_t *readstat_writer_init(void);
523
+
524
+ // Then specify a function that will handle the output bytes...
525
+ readstat_error_t readstat_set_data_writer(readstat_writer_t *writer, readstat_data_writer data_writer);
526
+
527
+ // Next define your value labels, if any. Create as many named sets as you'd like.
528
+ readstat_label_set_t *readstat_add_label_set(readstat_writer_t *writer, readstat_type_t type, const char *name);
529
+ void readstat_label_double_value(readstat_label_set_t *label_set, double value, const char *label);
530
+ void readstat_label_int32_value(readstat_label_set_t *label_set, int32_t value, const char *label);
531
+ void readstat_label_string_value(readstat_label_set_t *label_set, const char *value, const char *label);
532
+ void readstat_label_tagged_value(readstat_label_set_t *label_set, char tag, const char *label);
533
+
534
+ // Now define your variables. Note that `storage_width' is used for:
535
+ // * READSTAT_TYPE_STRING variables in all formats
536
+ // * READSTAT_TYPE_DOUBLE variables, but only in the SAS XPORT format (valid values 3-8, defaults to 8)
537
+ readstat_variable_t *readstat_add_variable(readstat_writer_t *writer, const char *name, readstat_type_t type,
538
+ size_t storage_width);
539
+ void readstat_variable_set_label(readstat_variable_t *variable, const char *label);
540
+ void readstat_variable_set_format(readstat_variable_t *variable, const char *format);
541
+ void readstat_variable_set_label_set(readstat_variable_t *variable, readstat_label_set_t *label_set);
542
+ void readstat_variable_set_measure(readstat_variable_t *variable, readstat_measure_t measure);
543
+ void readstat_variable_set_alignment(readstat_variable_t *variable, readstat_alignment_t alignment);
544
+ void readstat_variable_set_display_width(readstat_variable_t *variable, int display_width);
545
+ readstat_error_t readstat_variable_add_missing_double_value(readstat_variable_t *variable, double value);
546
+ readstat_error_t readstat_variable_add_missing_double_range(readstat_variable_t *variable, double lo, double hi);
547
+ readstat_error_t readstat_variable_add_missing_string_value(readstat_variable_t *variable, const char *value);
548
+ readstat_error_t readstat_variable_add_missing_string_range(readstat_variable_t *variable, const char *lo, const char *hi);
549
+ readstat_variable_t *readstat_get_variable(readstat_writer_t *writer, int index);
550
+
551
+ // "Notes" appear in the file metadata. In SPSS these are stored as
552
+ // lines in the Document Record; in Stata these are stored using
553
+ // the "notes" feature.
554
+ //
555
+ // Note that the line length in SPSS is 80 characters; ReadStat will
556
+ // produce a write error if a note is longer than this limit.
557
+ void readstat_add_note(readstat_writer_t *writer, const char *note);
558
+
559
+ // String refs are used for creating a READSTAT_TYPE_STRING_REF column,
560
+ // which is only supported in Stata. String references can be shared
561
+ // across columns, and inserted with readstat_insert_string_ref().
562
+ readstat_string_ref_t *readstat_add_string_ref(readstat_writer_t *writer, const char *string);
563
+ readstat_string_ref_t *readstat_get_string_ref(readstat_writer_t *writer, int index);
564
+
565
+ // Optional metadata
566
+ readstat_error_t readstat_writer_set_file_label(readstat_writer_t *writer, const char *file_label);
567
+ readstat_error_t readstat_writer_set_file_timestamp(readstat_writer_t *writer, time_t timestamp);
568
+ readstat_error_t readstat_writer_set_fweight_variable(readstat_writer_t *writer, const readstat_variable_t *variable);
569
+
570
+ readstat_error_t readstat_writer_set_file_format_version(readstat_writer_t *writer,
571
+ uint8_t file_format_version);
572
+ // e.g. 104-119 for DTA; 5 or 8 for SAS Transport.
573
+ // SAV files support 2 or 3, where 3 is equivalent to setting
574
+ // readstat_writer_set_compression(READSTAT_COMPRESS_BINARY)
575
+
576
+ readstat_error_t readstat_writer_set_table_name(readstat_writer_t *writer, const char *table_name);
577
+ // Only used in XPORT files at the moment (defaults to DATASET)
578
+
579
+ readstat_error_t readstat_writer_set_file_format_is_64bit(readstat_writer_t *writer,
580
+ int is_64bit); // applies only to SAS files; defaults to 1=true
581
+ readstat_error_t readstat_writer_set_compression(readstat_writer_t *writer,
582
+ readstat_compress_t compression);
583
+ // READSTAT_COMPRESS_BINARY is supported only with SAV files (i.e. ZSAV files)
584
+ // READSTAT_COMPRESS_ROWS is supported only with sas7bdat and SAV files
585
+
586
+ // Optional error handler
587
+ readstat_error_t readstat_writer_set_error_handler(readstat_writer_t *writer,
588
+ readstat_error_handler error_handler);
589
+
590
+ // Call one of these at any time before the first invocation of readstat_begin_row
591
+ readstat_error_t readstat_begin_writing_dta(readstat_writer_t *writer, void *user_ctx, long row_count);
592
+ readstat_error_t readstat_begin_writing_por(readstat_writer_t *writer, void *user_ctx, long row_count);
593
+ readstat_error_t readstat_begin_writing_sas7bcat(readstat_writer_t *writer, void *user_ctx);
594
+ readstat_error_t readstat_begin_writing_sas7bdat(readstat_writer_t *writer, void *user_ctx, long row_count);
595
+ readstat_error_t readstat_begin_writing_sav(readstat_writer_t *writer, void *user_ctx, long row_count);
596
+ readstat_error_t readstat_begin_writing_xport(readstat_writer_t *writer, void *user_ctx, long row_count);
597
+
598
+ // Optional, file-specific validation routines, to be called AFTER readstat_begin_writing_XXX
599
+ readstat_error_t readstat_validate_metadata(readstat_writer_t *writer);
600
+ readstat_error_t readstat_validate_variable(readstat_writer_t *writer, const readstat_variable_t *variable);
601
+
602
+ // Start a row of data (that is, a case or observation)
603
+ readstat_error_t readstat_begin_row(readstat_writer_t *writer);
604
+
605
+ // Then call one of these for each variable
606
+ readstat_error_t readstat_insert_int8_value(readstat_writer_t *writer, const readstat_variable_t *variable, int8_t value);
607
+ readstat_error_t readstat_insert_int16_value(readstat_writer_t *writer, const readstat_variable_t *variable, int16_t value);
608
+ readstat_error_t readstat_insert_int32_value(readstat_writer_t *writer, const readstat_variable_t *variable, int32_t value);
609
+ readstat_error_t readstat_insert_float_value(readstat_writer_t *writer, const readstat_variable_t *variable, float value);
610
+ readstat_error_t readstat_insert_double_value(readstat_writer_t *writer, const readstat_variable_t *variable, double value);
611
+ readstat_error_t readstat_insert_string_value(readstat_writer_t *writer, const readstat_variable_t *variable, const char *value);
612
+ readstat_error_t readstat_insert_string_ref(readstat_writer_t *writer, const readstat_variable_t *variable, readstat_string_ref_t *ref);
613
+ readstat_error_t readstat_insert_missing_value(readstat_writer_t *writer, const readstat_variable_t *variable);
614
+ readstat_error_t readstat_insert_tagged_missing_value(readstat_writer_t *writer, const readstat_variable_t *variable, char tag);
615
+
616
+ // Finally, close out the row
617
+ readstat_error_t readstat_end_row(readstat_writer_t *writer);
618
+
619
+ // Once you've written all the rows, clean up after yourself
620
+ readstat_error_t readstat_end_writing(readstat_writer_t *writer);
621
+ void readstat_writer_free(readstat_writer_t *writer);
622
+
623
+ #ifdef __cplusplus
624
+ }
625
+ #endif
626
+
627
+ #endif
@@ -0,0 +1,69 @@
1
+ //
2
+ // readstat_bits.c - Bit-twiddling utility functions
3
+ //
4
+
5
+ #include <sys/types.h>
6
+ #include <stdint.h>
7
+ #include <string.h>
8
+
9
+ #include "readstat_bits.h"
10
+
11
+ int machine_is_little_endian(void) {
12
+ int test_byte_order = 1;
13
+ return ((char *)&test_byte_order)[0];
14
+ }
15
+
16
+ char ones_to_twos_complement1(char num) {
17
+ return num < 0 ? num+1 : num;
18
+ }
19
+
20
+ int16_t ones_to_twos_complement2(int16_t num) {
21
+ return num < 0 ? num+1 : num;
22
+ }
23
+
24
+ int32_t ones_to_twos_complement4(int32_t num) {
25
+ return num < 0 ? num+1 : num;
26
+ }
27
+
28
+ char twos_to_ones_complement1(char num) {
29
+ return num < 0 ? num-1 : num;
30
+ }
31
+
32
+ int16_t twos_to_ones_complement2(int16_t num) {
33
+ return num < 0 ? num-1 : num;
34
+ }
35
+
36
+ int32_t twos_to_ones_complement4(int32_t num) {
37
+ return num < 0 ? num-1 : num;
38
+ }
39
+
40
+ uint16_t byteswap2(uint16_t num) {
41
+ return ((num & 0xFF00) >> 8) | ((num & 0x00FF) << 8);
42
+ }
43
+
44
+ uint32_t byteswap4(uint32_t num) {
45
+ num = ((num & 0xFFFF0000) >> 16) | ((num & 0x0000FFFF) << 16);
46
+ return ((num & 0xFF00FF00) >> 8) | ((num & 0x00FF00FF) << 8);
47
+ }
48
+
49
+ uint64_t byteswap8(uint64_t num) {
50
+ num = ((num & 0xFFFFFFFF00000000) >> 32) | ((num & 0x00000000FFFFFFFF) << 32);
51
+ num = ((num & 0xFFFF0000FFFF0000) >> 16) | ((num & 0x0000FFFF0000FFFF) << 16);
52
+ return ((num & 0xFF00FF00FF00FF00) >> 8) | ((num & 0x00FF00FF00FF00FF) << 8);
53
+ }
54
+
55
+ float byteswap_float(float num) {
56
+ uint32_t answer = 0;
57
+ memcpy(&answer, &num, 4);
58
+ answer = byteswap4(answer);
59
+ memcpy(&num, &answer, 4);
60
+ return num;
61
+ }
62
+
63
+ double byteswap_double(double num) {
64
+ uint64_t answer = 0;
65
+ memcpy(&answer, &num, 8);
66
+ answer = byteswap8(answer);
67
+ memcpy(&num, &answer, 8);
68
+ return num;
69
+ }