js-stream-sas7bdat 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/binding.gyp +58 -0
  2. package/dist/class/datasetSas7BDat.d.ts.map +1 -1
  3. package/dist/class/datasetSas7BDat.js +4 -3
  4. package/dist/class/datasetSas7BDat.js.map +1 -1
  5. package/dist/tsconfig.tsbuildinfo +1 -1
  6. package/package.json +16 -7
  7. package/prebuilds/darwin-arm64/js-stream-sas7bdat.node +0 -0
  8. package/prebuilds/darwin-x64/js-stream-sas7bdat.node +0 -0
  9. package/prebuilds/linux-arm64/js-stream-sas7bdat.node +0 -0
  10. package/prebuilds/linux-x64/js-stream-sas7bdat.node +0 -0
  11. package/prebuilds/win32-ia32/js-stream-sas7bdat.node +0 -0
  12. package/prebuilds/win32-x64/js-stream-sas7bdat.node +0 -0
  13. package/src/binding/ReadStat/LICENSE +19 -0
  14. package/src/binding/ReadStat/README.md +483 -0
  15. package/src/binding/ReadStat/src/CKHashTable.c +309 -0
  16. package/src/binding/ReadStat/src/CKHashTable.h +37 -0
  17. package/src/binding/ReadStat/src/readstat.h +627 -0
  18. package/src/binding/ReadStat/src/readstat_bits.c +69 -0
  19. package/src/binding/ReadStat/src/readstat_bits.h +20 -0
  20. package/src/binding/ReadStat/src/readstat_convert.c +36 -0
  21. package/src/binding/ReadStat/src/readstat_convert.h +2 -0
  22. package/src/binding/ReadStat/src/readstat_error.c +126 -0
  23. package/src/binding/ReadStat/src/readstat_iconv.h +15 -0
  24. package/src/binding/ReadStat/src/readstat_io_unistd.c +147 -0
  25. package/src/binding/ReadStat/src/readstat_io_unistd.h +11 -0
  26. package/src/binding/ReadStat/src/readstat_malloc.c +34 -0
  27. package/src/binding/ReadStat/src/readstat_malloc.h +4 -0
  28. package/src/binding/ReadStat/src/readstat_metadata.c +53 -0
  29. package/src/binding/ReadStat/src/readstat_parser.c +121 -0
  30. package/src/binding/ReadStat/src/readstat_strings.h +6 -0
  31. package/src/binding/ReadStat/src/readstat_value.c +178 -0
  32. package/src/binding/ReadStat/src/readstat_variable.c +123 -0
  33. package/src/binding/ReadStat/src/readstat_writer.c +677 -0
  34. package/src/binding/ReadStat/src/readstat_writer.h +21 -0
  35. package/src/binding/ReadStat/src/sas/ieee.c +420 -0
  36. package/src/binding/ReadStat/src/sas/ieee.h +6 -0
  37. package/src/binding/ReadStat/src/sas/readstat_sas.c +528 -0
  38. package/src/binding/ReadStat/src/sas/readstat_sas.h +131 -0
  39. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_read.c +515 -0
  40. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_write.c +218 -0
  41. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_read.c +1304 -0
  42. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_write.c +812 -0
  43. package/src/binding/ReadStat/src/sas/readstat_sas_rle.c +286 -0
  44. package/src/binding/ReadStat/src/sas/readstat_sas_rle.h +8 -0
  45. package/src/binding/ReadStat/src/sas/readstat_xport.c +28 -0
  46. package/src/binding/ReadStat/src/sas/readstat_xport.h +47 -0
  47. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.c +265 -0
  48. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.h +4 -0
  49. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.rl +68 -0
  50. package/src/binding/ReadStat/src/sas/readstat_xport_read.c +777 -0
  51. package/src/binding/ReadStat/src/sas/readstat_xport_write.c +561 -0
  52. package/src/binding/readstat_binding.cc +393 -0
@@ -0,0 +1,812 @@
1
+
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <time.h>
5
+ #include <iconv.h>
6
+
7
+ #include "../readstat.h"
8
+ #include "../readstat_writer.h"
9
+ #include "readstat_sas.h"
10
+ #include "readstat_sas_rle.h"
11
+
12
+ typedef struct sas7bdat_subheader_s {
13
+ uint32_t signature;
14
+ char *data;
15
+ size_t len;
16
+ int is_row_data;
17
+ int is_row_data_compressed;
18
+ } sas7bdat_subheader_t;
19
+
20
+ typedef struct sas7bdat_subheader_array_s {
21
+ int64_t count;
22
+ int64_t capacity;
23
+ sas7bdat_subheader_t **subheaders;
24
+ } sas7bdat_subheader_array_t;
25
+
26
+ typedef struct sas7bdat_column_text_s {
27
+ char *data;
28
+ size_t capacity;
29
+ size_t used;
30
+ int64_t index;
31
+ } sas7bdat_column_text_t;
32
+
33
+ typedef struct sas7bdat_column_text_array_s {
34
+ int64_t count;
35
+ sas7bdat_column_text_t **column_texts;
36
+ } sas7bdat_column_text_array_t;
37
+
38
+ typedef struct sas7bdat_write_ctx_s {
39
+ sas_header_info_t *hinfo;
40
+ sas7bdat_subheader_array_t *sarray;
41
+ } sas7bdat_write_ctx_t;
42
+
43
+ static size_t sas7bdat_variable_width(readstat_type_t type, size_t user_width);
44
+
45
+ static int32_t sas7bdat_count_meta_pages(readstat_writer_t *writer) {
46
+ sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx;
47
+ sas_header_info_t *hinfo = ctx->hinfo;
48
+ sas7bdat_subheader_array_t *sarray = ctx->sarray;
49
+ int i;
50
+ int pages = 1;
51
+ size_t bytes_left = hinfo->page_size - hinfo->page_header_size;
52
+ size_t shp_ptr_size = hinfo->subheader_pointer_size;
53
+ for (i=sarray->count-1; i>=0; i--) {
54
+ sas7bdat_subheader_t *subheader = sarray->subheaders[i];
55
+ if (subheader->len + shp_ptr_size > bytes_left) {
56
+ bytes_left = hinfo->page_size - hinfo->page_header_size;
57
+ pages++;
58
+ }
59
+ bytes_left -= (subheader->len + shp_ptr_size);
60
+ }
61
+ return pages;
62
+ }
63
+
64
+ static size_t sas7bdat_row_length(readstat_writer_t *writer) {
65
+ int i;
66
+ size_t len = 0;
67
+ for (i=0; i<writer->variables_count; i++) {
68
+ readstat_variable_t *variable = readstat_get_variable(writer, i);
69
+ len += sas7bdat_variable_width(readstat_variable_get_type(variable),
70
+ readstat_variable_get_storage_width(variable));
71
+ }
72
+ return len;
73
+ }
74
+
75
+ static int32_t sas7bdat_rows_per_page(readstat_writer_t *writer, sas_header_info_t *hinfo) {
76
+ size_t row_length = sas7bdat_row_length(writer);
77
+ return (hinfo->page_size - hinfo->page_header_size) / row_length;
78
+ }
79
+
80
+ static int32_t sas7bdat_count_data_pages(readstat_writer_t *writer, sas_header_info_t *hinfo) {
81
+ if (writer->compression == READSTAT_COMPRESS_ROWS)
82
+ return 0;
83
+
84
+ int32_t rows_per_page = sas7bdat_rows_per_page(writer, hinfo);
85
+ return (writer->row_count + (rows_per_page - 1)) / rows_per_page;
86
+ }
87
+
88
+ static sas7bdat_column_text_t *sas7bdat_column_text_init(int64_t index, size_t len) {
89
+ sas7bdat_column_text_t *column_text = calloc(1, sizeof(sas7bdat_column_text_t));
90
+ column_text->data = malloc(len);
91
+ column_text->capacity = len;
92
+ column_text->index = index;
93
+ return column_text;
94
+ }
95
+
96
+ static void sas7bdat_column_text_free(sas7bdat_column_text_t *column_text) {
97
+ free(column_text->data);
98
+ free(column_text);
99
+ }
100
+
101
+ static void sas7bdat_column_text_array_free(sas7bdat_column_text_array_t *column_text_array) {
102
+ int i;
103
+ for (i=0; i<column_text_array->count; i++) {
104
+ sas7bdat_column_text_free(column_text_array->column_texts[i]);
105
+ }
106
+ free(column_text_array->column_texts);
107
+ free(column_text_array);
108
+ }
109
+
110
+ static sas_text_ref_t sas7bdat_make_text_ref(sas7bdat_column_text_array_t *column_text_array,
111
+ const char *string) {
112
+ size_t len = strlen(string);
113
+ size_t padded_len = (len + 3) / 4 * 4;
114
+ sas7bdat_column_text_t *column_text = column_text_array->column_texts[
115
+ column_text_array->count-1];
116
+ if (column_text->used + padded_len > column_text->capacity) {
117
+ column_text_array->count++;
118
+ column_text_array->column_texts = realloc(column_text_array->column_texts,
119
+ sizeof(sas7bdat_column_text_t *) * column_text_array->count);
120
+
121
+ column_text = sas7bdat_column_text_init(column_text_array->count-1,
122
+ column_text->capacity);
123
+ column_text_array->column_texts[column_text_array->count-1] = column_text;
124
+ }
125
+ sas_text_ref_t text_ref = {
126
+ .index = column_text->index,
127
+ .offset = column_text->used + 28,
128
+ .length = len
129
+ };
130
+ strncpy(&column_text->data[column_text->used], string, padded_len);
131
+ column_text->used += padded_len;
132
+ return text_ref;
133
+ }
134
+
135
+ static readstat_error_t sas7bdat_emit_header(readstat_writer_t *writer, sas_header_info_t *hinfo) {
136
+ sas_header_start_t header_start = {
137
+ .a2 = hinfo->u64 ? SAS_ALIGNMENT_OFFSET_4 : SAS_ALIGNMENT_OFFSET_0,
138
+ .a1 = SAS_ALIGNMENT_OFFSET_0,
139
+ .endian = machine_is_little_endian() ? SAS_ENDIAN_LITTLE : SAS_ENDIAN_BIG,
140
+ .file_format = SAS_FILE_FORMAT_UNIX,
141
+ .encoding = 20, /* UTF-8 */
142
+ .file_type = "SAS FILE",
143
+ .file_info = "DATA "
144
+ };
145
+
146
+ memcpy(&header_start.magic, sas7bdat_magic_number, sizeof(header_start.magic));
147
+
148
+ return sas_write_header(writer, hinfo, header_start);
149
+ }
150
+
151
+ static sas7bdat_subheader_t *sas7bdat_subheader_init(uint32_t signature, size_t len) {
152
+ sas7bdat_subheader_t *subheader = calloc(1, sizeof(sas7bdat_subheader_t));
153
+ subheader->signature = signature;
154
+ subheader->len = len;
155
+ subheader->data = calloc(1, len);
156
+
157
+ return subheader;
158
+ }
159
+
160
+ static sas7bdat_subheader_t *sas7bdat_row_size_subheader_init(readstat_writer_t *writer,
161
+ sas_header_info_t *hinfo, sas7bdat_column_text_array_t *column_text_array) {
162
+ sas7bdat_subheader_t *subheader = sas7bdat_subheader_init(
163
+ SAS_SUBHEADER_SIGNATURE_ROW_SIZE,
164
+ hinfo->u64 ? 808 : 480);
165
+
166
+ if (hinfo->u64) {
167
+ int64_t row_length = sas7bdat_row_length(writer);
168
+ int64_t row_count = writer->row_count;
169
+ int64_t ncfl1 = writer->variables_count;
170
+ int64_t page_size = hinfo->page_size;
171
+
172
+ memcpy(&subheader->data[40], &row_length, sizeof(int64_t));
173
+ memcpy(&subheader->data[48], &row_count, sizeof(int64_t));
174
+ memcpy(&subheader->data[72], &ncfl1, sizeof(int64_t));
175
+ memcpy(&subheader->data[104], &page_size, sizeof(int64_t));
176
+ memset(&subheader->data[128], 0xFF, 16);
177
+ } else {
178
+ int32_t row_length = sas7bdat_row_length(writer);
179
+ int32_t row_count = writer->row_count;
180
+ int32_t ncfl1 = writer->variables_count;
181
+ int32_t page_size = hinfo->page_size;
182
+
183
+ memcpy(&subheader->data[20], &row_length, sizeof(int32_t));
184
+ memcpy(&subheader->data[24], &row_count, sizeof(int32_t));
185
+ memcpy(&subheader->data[36], &ncfl1, sizeof(int32_t));
186
+ memcpy(&subheader->data[52], &page_size, sizeof(int32_t));
187
+ memset(&subheader->data[64], 0xFF, 8);
188
+ }
189
+
190
+ sas_text_ref_t text_ref = { 0 };
191
+
192
+ if (writer->file_label[0]) {
193
+ text_ref = sas7bdat_make_text_ref(column_text_array, writer->file_label);
194
+ memcpy(&subheader->data[subheader->len-130], &text_ref, sizeof(sas_text_ref_t));
195
+ }
196
+
197
+ if (writer->compression == READSTAT_COMPRESS_ROWS) {
198
+ text_ref = sas7bdat_make_text_ref(column_text_array, SAS_COMPRESSION_SIGNATURE_RLE);
199
+ memcpy(&subheader->data[subheader->len-118], &text_ref, sizeof(sas_text_ref_t));
200
+ }
201
+
202
+ return subheader;
203
+ }
204
+
205
+ static sas7bdat_subheader_t *sas7bdat_col_size_subheader_init(readstat_writer_t *writer,
206
+ sas_header_info_t *hinfo) {
207
+ sas7bdat_subheader_t *subheader = sas7bdat_subheader_init(
208
+ SAS_SUBHEADER_SIGNATURE_COLUMN_SIZE,
209
+ hinfo->u64 ? 24 : 12);
210
+ if (hinfo->u64) {
211
+ int64_t col_count = writer->variables_count;
212
+ memcpy(&subheader->data[8], &col_count, sizeof(int64_t));
213
+ } else {
214
+ int32_t col_count = writer->variables_count;
215
+ memcpy(&subheader->data[4], &col_count, sizeof(int32_t));
216
+ }
217
+ return subheader;
218
+ }
219
+
220
+ static size_t sas7bdat_col_name_subheader_length(readstat_writer_t *writer,
221
+ sas_header_info_t *hinfo) {
222
+ return (hinfo->u64 ? 28+8*writer->variables_count : 20+8*writer->variables_count);
223
+ }
224
+
225
+ static sas7bdat_subheader_t *sas7bdat_col_name_subheader_init(readstat_writer_t *writer,
226
+ sas_header_info_t *hinfo, sas7bdat_column_text_array_t *column_text_array) {
227
+ size_t len = sas7bdat_col_name_subheader_length(writer, hinfo);
228
+ size_t signature_len = hinfo->u64 ? 8 : 4;
229
+ uint16_t remainder = sas_subheader_remainder(len, signature_len);
230
+ sas7bdat_subheader_t *subheader = sas7bdat_subheader_init(
231
+ SAS_SUBHEADER_SIGNATURE_COLUMN_NAME, len);
232
+ memcpy(&subheader->data[signature_len], &remainder, sizeof(uint16_t));
233
+
234
+ int i;
235
+ char *ptrs = &subheader->data[signature_len+8];
236
+ for (i=0; i<writer->variables_count; i++) {
237
+ readstat_variable_t *variable = readstat_get_variable(writer, i);
238
+ const char *name = readstat_variable_get_name(variable);
239
+ sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, name);
240
+ memcpy(ptrs, &text_ref, sizeof(sas_text_ref_t));
241
+ ptrs += 8;
242
+ }
243
+ return subheader;
244
+ }
245
+
246
+ static size_t sas7bdat_col_attrs_subheader_length(readstat_writer_t *writer,
247
+ sas_header_info_t *hinfo) {
248
+ return (hinfo->u64 ? 28+16*writer->variables_count : 20+12*writer->variables_count);
249
+ }
250
+
251
+ static sas7bdat_subheader_t *sas7bdat_col_attrs_subheader_init(readstat_writer_t *writer,
252
+ sas_header_info_t *hinfo) {
253
+ size_t len = sas7bdat_col_attrs_subheader_length(writer, hinfo);
254
+ size_t signature_len = hinfo->u64 ? 8 : 4;
255
+ uint16_t remainder = sas_subheader_remainder(len, signature_len);
256
+ sas7bdat_subheader_t *subheader = sas7bdat_subheader_init(
257
+ SAS_SUBHEADER_SIGNATURE_COLUMN_ATTRS, len);
258
+ memcpy(&subheader->data[signature_len], &remainder, sizeof(uint16_t));
259
+
260
+ char *ptrs = &subheader->data[signature_len+8];
261
+ uint64_t offset = 0;
262
+ int i;
263
+ for (i=0; i<writer->variables_count; i++) {
264
+ readstat_variable_t *variable = readstat_get_variable(writer, i);
265
+ const char *name = readstat_variable_get_name(variable);
266
+ readstat_type_t type = readstat_variable_get_type(variable);
267
+ uint16_t name_length_flag = strlen(name) <= 8 ? 4 : 2048;
268
+ uint32_t width = 0;
269
+ if (hinfo->u64) {
270
+ memcpy(&ptrs[0], &offset, sizeof(uint64_t));
271
+ ptrs += sizeof(uint64_t);
272
+ } else {
273
+ uint32_t offset32 = offset;
274
+ memcpy(&ptrs[0], &offset32, sizeof(uint32_t));
275
+ ptrs += sizeof(uint32_t);
276
+ }
277
+ if (type == READSTAT_TYPE_STRING) {
278
+ ptrs[6] = SAS_COLUMN_TYPE_CHR;
279
+ width = readstat_variable_get_storage_width(variable);
280
+ } else {
281
+ ptrs[6] = SAS_COLUMN_TYPE_NUM;
282
+ width = 8;
283
+ }
284
+ memcpy(&ptrs[0], &width, sizeof(uint32_t));
285
+ memcpy(&ptrs[4], &name_length_flag, sizeof(uint16_t));
286
+ offset += width;
287
+ ptrs += 8;
288
+ }
289
+ return subheader;
290
+ }
291
+
292
+ static sas7bdat_subheader_t *sas7bdat_col_format_subheader_init(readstat_variable_t *variable,
293
+ sas_header_info_t *hinfo, sas7bdat_column_text_array_t *column_text_array) {
294
+ sas7bdat_subheader_t *subheader = sas7bdat_subheader_init(
295
+ SAS_SUBHEADER_SIGNATURE_COLUMN_FORMAT,
296
+ hinfo->u64 ? 64 : 52);
297
+ const char *format = readstat_variable_get_format(variable);
298
+ const char *label = readstat_variable_get_label(variable);
299
+ off_t format_offset = hinfo->u64 ? 46 : 34;
300
+ off_t label_offset = hinfo->u64 ? 52 : 40;
301
+ if (format) {
302
+ sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, format);
303
+ memcpy(&subheader->data[format_offset+0], &text_ref.index, sizeof(uint16_t));
304
+ memcpy(&subheader->data[format_offset+2], &text_ref.offset, sizeof(uint16_t));
305
+ memcpy(&subheader->data[format_offset+4], &text_ref.length, sizeof(uint16_t));
306
+ }
307
+ if (label) {
308
+ sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, label);
309
+ memcpy(&subheader->data[label_offset+0], &text_ref.index, sizeof(uint16_t));
310
+ memcpy(&subheader->data[label_offset+2], &text_ref.offset, sizeof(uint16_t));
311
+ memcpy(&subheader->data[label_offset+4], &text_ref.length, sizeof(uint16_t));
312
+ }
313
+ return subheader;
314
+ }
315
+
316
+ static size_t sas7bdat_col_text_subheader_length(sas_header_info_t *hinfo,
317
+ sas7bdat_column_text_t *column_text) {
318
+ size_t signature_len = hinfo->u64 ? 8 : 4;
319
+ size_t text_len = column_text ? column_text->used : 0;
320
+ return signature_len + 28 + text_len;
321
+ }
322
+
323
+ static sas7bdat_subheader_t *sas7bdat_col_text_subheader_init(readstat_writer_t *writer,
324
+ sas_header_info_t *hinfo, sas7bdat_column_text_t *column_text) {
325
+ size_t signature_len = hinfo->u64 ? 8 : 4;
326
+ size_t len = sas7bdat_col_text_subheader_length(hinfo, column_text);
327
+ sas7bdat_subheader_t *subheader = sas7bdat_subheader_init(
328
+ SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT, len);
329
+
330
+ uint16_t used = sas_subheader_remainder(len, signature_len);
331
+ memcpy(&subheader->data[signature_len], &used, sizeof(uint16_t));
332
+ memset(&subheader->data[signature_len+12], ' ', 8);
333
+ memcpy(&subheader->data[signature_len+28], column_text->data, column_text->used);
334
+ return subheader;
335
+ }
336
+
337
+ static sas7bdat_subheader_array_t *sas7bdat_subheader_array_init(readstat_writer_t *writer,
338
+ sas_header_info_t *hinfo) {
339
+ sas7bdat_column_text_array_t *column_text_array = calloc(1, sizeof(sas7bdat_column_text_array_t));
340
+ column_text_array->count = 1;
341
+ column_text_array->column_texts = malloc(sizeof(sas7bdat_column_text_t *));
342
+ column_text_array->column_texts[0] = sas7bdat_column_text_init(0,
343
+ hinfo->page_size - hinfo->page_header_size - hinfo->subheader_pointer_size -
344
+ sas7bdat_col_text_subheader_length(hinfo, NULL));
345
+
346
+ sas7bdat_subheader_array_t *sarray = calloc(1, sizeof(sas7bdat_subheader_array_t));
347
+ sarray->count = 4+writer->variables_count;
348
+ sarray->subheaders = calloc(sarray->count, sizeof(sas7bdat_subheader_t *));
349
+
350
+ long idx = 0;
351
+ int i;
352
+ sas7bdat_subheader_t *col_name_subheader = NULL;
353
+ sas7bdat_subheader_t *col_attrs_subheader = NULL;
354
+ sas7bdat_subheader_t **col_format_subheaders = NULL;
355
+
356
+ col_name_subheader = sas7bdat_col_name_subheader_init(writer, hinfo, column_text_array);
357
+ col_attrs_subheader = sas7bdat_col_attrs_subheader_init(writer, hinfo);
358
+
359
+ sarray->subheaders[idx++] = sas7bdat_row_size_subheader_init(writer, hinfo, column_text_array);
360
+ sarray->subheaders[idx++] = sas7bdat_col_size_subheader_init(writer, hinfo);
361
+
362
+ col_format_subheaders = calloc(writer->variables_count, sizeof(sas7bdat_subheader_t *));
363
+ for (i=0; i<writer->variables_count; i++) {
364
+ readstat_variable_t *variable = readstat_get_variable(writer, i);
365
+ col_format_subheaders[i] = sas7bdat_col_format_subheader_init(variable, hinfo, column_text_array);
366
+ }
367
+ sarray->count += column_text_array->count;
368
+ sarray->subheaders = realloc(sarray->subheaders, sarray->count * sizeof(sas7bdat_subheader_t *));
369
+ for (i=0; i<column_text_array->count; i++) {
370
+ sarray->subheaders[idx++] = sas7bdat_col_text_subheader_init(writer, hinfo,
371
+ column_text_array->column_texts[i]);
372
+ }
373
+ sas7bdat_column_text_array_free(column_text_array);
374
+
375
+ sarray->subheaders[idx++] = col_name_subheader;
376
+ sarray->subheaders[idx++] = col_attrs_subheader;
377
+
378
+ for (i=0; i<writer->variables_count; i++) {
379
+ sarray->subheaders[idx++] = col_format_subheaders[i];
380
+ }
381
+ free(col_format_subheaders);
382
+
383
+ sarray->capacity = sarray->count;
384
+
385
+ if (writer->compression == READSTAT_COMPRESS_ROWS) {
386
+ sarray->capacity = (sarray->count + writer->row_count);
387
+ sarray->subheaders = realloc(sarray->subheaders,
388
+ sarray->capacity * sizeof(sas7bdat_subheader_t *));
389
+ }
390
+
391
+ return sarray;
392
+ }
393
+
394
+ static void sas7bdat_subheader_free(sas7bdat_subheader_t *subheader) {
395
+ if (!subheader)
396
+ return;
397
+ if (subheader->data)
398
+ free(subheader->data);
399
+ free(subheader);
400
+ }
401
+
402
+ static void sas7bdat_subheader_array_free(sas7bdat_subheader_array_t *sarray) {
403
+ int i;
404
+ for (i=0; i<sarray->count; i++) {
405
+ sas7bdat_subheader_free(sarray->subheaders[i]);
406
+ }
407
+ free(sarray->subheaders);
408
+ free(sarray);
409
+ }
410
+
411
+ static int sas7bdat_subheader_type(uint32_t signature) {
412
+ return (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT ||
413
+ signature == SAS_SUBHEADER_SIGNATURE_COLUMN_NAME ||
414
+ signature == SAS_SUBHEADER_SIGNATURE_COLUMN_ATTRS ||
415
+ signature == SAS_SUBHEADER_SIGNATURE_COLUMN_LIST);
416
+ }
417
+
418
+ static readstat_error_t sas7bdat_emit_meta_pages(readstat_writer_t *writer) {
419
+ sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx;
420
+ sas_header_info_t *hinfo = ctx->hinfo;
421
+ sas7bdat_subheader_array_t *sarray = ctx->sarray;
422
+ readstat_error_t retval = READSTAT_OK;
423
+ int16_t page_type = SAS_PAGE_TYPE_META;
424
+ char *page = malloc(hinfo->page_size);
425
+ int64_t shp_written = 0;
426
+
427
+ while (sarray->count > shp_written) {
428
+ memset(page, 0, hinfo->page_size);
429
+ int16_t shp_count = 0;
430
+ size_t shp_data_offset = hinfo->page_size;
431
+ size_t shp_ptr_offset = hinfo->page_header_size;
432
+ size_t shp_ptr_size = hinfo->subheader_pointer_size;
433
+
434
+ memcpy(&page[hinfo->page_header_size-8], &page_type, sizeof(int16_t));
435
+
436
+ if (sarray->subheaders[shp_written]->len + shp_ptr_size >
437
+ shp_data_offset - shp_ptr_offset) {
438
+ retval = READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE;
439
+ goto cleanup;
440
+ }
441
+
442
+ while (sarray->count > shp_written &&
443
+ sarray->subheaders[shp_written]->len + shp_ptr_size <=
444
+ shp_data_offset - shp_ptr_offset) {
445
+ sas7bdat_subheader_t *subheader = sarray->subheaders[shp_written];
446
+ uint32_t signature32 = subheader->signature;
447
+
448
+ /* copy ptr */
449
+ if (hinfo->u64) {
450
+ uint64_t offset = shp_data_offset - subheader->len;
451
+ uint64_t len = subheader->len;
452
+ memcpy(&page[shp_ptr_offset], &offset, sizeof(uint64_t));
453
+ memcpy(&page[shp_ptr_offset+8], &len, sizeof(uint64_t));
454
+ if (subheader->is_row_data) {
455
+ if (subheader->is_row_data_compressed) {
456
+ page[shp_ptr_offset+16] = SAS_COMPRESSION_ROW;
457
+ } else {
458
+ page[shp_ptr_offset+16] = SAS_COMPRESSION_NONE;
459
+ }
460
+ page[shp_ptr_offset+17] = 1;
461
+ } else {
462
+ page[shp_ptr_offset+17] = sas7bdat_subheader_type(subheader->signature);
463
+ if (signature32 >= 0xFF000000) {
464
+ int64_t signature64 = (int32_t)signature32;
465
+ memcpy(&subheader->data[0], &signature64, sizeof(int64_t));
466
+ } else {
467
+ memcpy(&subheader->data[0], &signature32, sizeof(int32_t));
468
+ }
469
+ }
470
+ } else {
471
+ uint32_t offset = shp_data_offset - subheader->len;
472
+ uint32_t len = subheader->len;
473
+ memcpy(&page[shp_ptr_offset], &offset, sizeof(uint32_t));
474
+ memcpy(&page[shp_ptr_offset+4], &len, sizeof(uint32_t));
475
+ if (subheader->is_row_data) {
476
+ if (subheader->is_row_data_compressed) {
477
+ page[shp_ptr_offset+8] = SAS_COMPRESSION_ROW;
478
+ } else {
479
+ page[shp_ptr_offset+8] = SAS_COMPRESSION_NONE;
480
+ }
481
+ page[shp_ptr_offset+9] = 1;
482
+ } else {
483
+ page[shp_ptr_offset+9] = sas7bdat_subheader_type(subheader->signature);
484
+ memcpy(&subheader->data[0], &signature32, sizeof(int32_t));
485
+ }
486
+ }
487
+ shp_ptr_offset += shp_ptr_size;
488
+
489
+ /* copy data */
490
+ shp_data_offset -= subheader->len;
491
+ memcpy(&page[shp_data_offset], subheader->data, subheader->len);
492
+
493
+ shp_written++;
494
+ shp_count++;
495
+ }
496
+
497
+ if (hinfo->u64) {
498
+ memcpy(&page[34], &shp_count, sizeof(int16_t));
499
+ memcpy(&page[36], &shp_count, sizeof(int16_t));
500
+ } else {
501
+ memcpy(&page[18], &shp_count, sizeof(int16_t));
502
+ memcpy(&page[20], &shp_count, sizeof(int16_t));
503
+ }
504
+
505
+ retval = readstat_write_bytes(writer, page, hinfo->page_size);
506
+ if (retval != READSTAT_OK)
507
+ goto cleanup;
508
+ }
509
+
510
+ cleanup:
511
+ free(page);
512
+
513
+ return retval;
514
+ }
515
+
516
+ static int sas7bdat_page_is_too_small(readstat_writer_t *writer, sas_header_info_t *hinfo, size_t row_length) {
517
+ size_t page_length = hinfo->page_size - hinfo->page_header_size;
518
+
519
+ if (writer->compression == READSTAT_COMPRESS_NONE && page_length < row_length)
520
+ return 1;
521
+
522
+ if (writer->compression == READSTAT_COMPRESS_ROWS && page_length < row_length + hinfo->subheader_pointer_size)
523
+ return 1;
524
+
525
+ if (page_length < sas7bdat_col_name_subheader_length(writer, hinfo) + hinfo->subheader_pointer_size)
526
+ return 1;
527
+
528
+ if (page_length < sas7bdat_col_attrs_subheader_length(writer, hinfo) + hinfo->subheader_pointer_size)
529
+ return 1;
530
+
531
+ return 0;
532
+ }
533
+
534
+ static sas7bdat_write_ctx_t *sas7bdat_write_ctx_init(readstat_writer_t *writer) {
535
+ sas7bdat_write_ctx_t *ctx = calloc(1, sizeof(sas7bdat_write_ctx_t));
536
+
537
+ sas_header_info_t *hinfo = sas_header_info_init(writer, writer->is_64bit);
538
+
539
+ size_t row_length = sas7bdat_row_length(writer);
540
+
541
+ while (sas7bdat_page_is_too_small(writer, hinfo, row_length)) {
542
+ hinfo->page_size <<= 1;
543
+ }
544
+
545
+ ctx->hinfo = hinfo;
546
+ ctx->sarray = sas7bdat_subheader_array_init(writer, hinfo);
547
+
548
+ return ctx;
549
+ }
550
+
551
+ static void sas7bdat_write_ctx_free(sas7bdat_write_ctx_t *ctx) {
552
+ free(ctx->hinfo);
553
+ sas7bdat_subheader_array_free(ctx->sarray);
554
+ free(ctx);
555
+ }
556
+
557
+ static readstat_error_t sas7bdat_emit_header_and_meta_pages(readstat_writer_t *writer) {
558
+ sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx;
559
+ readstat_error_t retval = READSTAT_OK;
560
+
561
+ if (sas7bdat_row_length(writer) == 0) {
562
+ retval = READSTAT_ERROR_TOO_FEW_COLUMNS;
563
+ goto cleanup;
564
+ }
565
+
566
+ if (writer->compression == READSTAT_COMPRESS_NONE &&
567
+ sas7bdat_rows_per_page(writer, ctx->hinfo) == 0) {
568
+ retval = READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE;
569
+ goto cleanup;
570
+ }
571
+
572
+ ctx->hinfo->page_count = sas7bdat_count_meta_pages(writer) + sas7bdat_count_data_pages(writer, ctx->hinfo);
573
+
574
+ retval = sas7bdat_emit_header(writer, ctx->hinfo);
575
+ if (retval != READSTAT_OK)
576
+ goto cleanup;
577
+
578
+ retval = sas7bdat_emit_meta_pages(writer);
579
+ if (retval != READSTAT_OK)
580
+ goto cleanup;
581
+
582
+ cleanup:
583
+ return retval;
584
+ }
585
+
586
+ static readstat_error_t sas7bdat_begin_data(void *writer_ctx) {
587
+ readstat_writer_t *writer = (readstat_writer_t *)writer_ctx;
588
+ readstat_error_t retval = READSTAT_OK;
589
+
590
+ writer->module_ctx = sas7bdat_write_ctx_init(writer);
591
+
592
+ if (writer->compression == READSTAT_COMPRESS_NONE) {
593
+ retval = sas7bdat_emit_header_and_meta_pages(writer);
594
+ if (retval != READSTAT_OK)
595
+ goto cleanup;
596
+ }
597
+
598
+ cleanup:
599
+ if (retval != READSTAT_OK) {
600
+ if (writer->module_ctx) {
601
+ sas7bdat_write_ctx_free(writer->module_ctx);
602
+ writer->module_ctx = NULL;
603
+ }
604
+ }
605
+
606
+ return retval;
607
+ }
608
+
609
+ static readstat_error_t sas7bdat_end_data(void *writer_ctx) {
610
+ readstat_error_t retval = READSTAT_OK;
611
+ readstat_writer_t *writer = (readstat_writer_t *)writer_ctx;
612
+ sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx;
613
+
614
+ if (writer->compression == READSTAT_COMPRESS_ROWS) {
615
+ retval = sas7bdat_emit_header_and_meta_pages(writer);
616
+ } else {
617
+ retval = sas_fill_page(writer, ctx->hinfo);
618
+ }
619
+
620
+ return retval;
621
+ }
622
+
623
+ static void sas7bdat_module_ctx_free(void *module_ctx) {
624
+ sas7bdat_write_ctx_free(module_ctx);
625
+ }
626
+
627
+ static readstat_error_t sas7bdat_write_double(void *row, const readstat_variable_t *var, double value) {
628
+ memcpy(row, &value, sizeof(double));
629
+ return READSTAT_OK;
630
+ }
631
+
632
+ static readstat_error_t sas7bdat_write_float(void *row, const readstat_variable_t *var, float value) {
633
+ return sas7bdat_write_double(row, var, value);
634
+ }
635
+
636
+ static readstat_error_t sas7bdat_write_int32(void *row, const readstat_variable_t *var, int32_t value) {
637
+ return sas7bdat_write_double(row, var, value);
638
+ }
639
+
640
+ static readstat_error_t sas7bdat_write_int16(void *row, const readstat_variable_t *var, int16_t value) {
641
+ return sas7bdat_write_double(row, var, value);
642
+ }
643
+
644
+ static readstat_error_t sas7bdat_write_int8(void *row, const readstat_variable_t *var, int8_t value) {
645
+ return sas7bdat_write_double(row, var, value);
646
+ }
647
+
648
+ static readstat_error_t sas7bdat_write_missing_tagged_raw(void *row, const readstat_variable_t *var, char tag) {
649
+ union {
650
+ double dval;
651
+ char chars[8];
652
+ } nan_value;
653
+
654
+ nan_value.dval = NAN;
655
+ nan_value.chars[machine_is_little_endian() ? 5 : 2] = ~tag;
656
+ return sas7bdat_write_double(row, var, nan_value.dval);
657
+ }
658
+
659
+ static readstat_error_t sas7bdat_write_missing_tagged(void *row, const readstat_variable_t *var, char tag) {
660
+ readstat_error_t error = sas_validate_tag(tag);
661
+ if (error == READSTAT_OK)
662
+ return sas7bdat_write_missing_tagged_raw(row, var, tag);
663
+
664
+ return error;
665
+ }
666
+
667
+ static readstat_error_t sas7bdat_write_missing_numeric(void *row, const readstat_variable_t *var) {
668
+ return sas7bdat_write_missing_tagged_raw(row, var, '.');
669
+ }
670
+
671
+ static readstat_error_t sas7bdat_write_string(void *row, const readstat_variable_t *var, const char *value) {
672
+ size_t max_len = readstat_variable_get_storage_width(var);
673
+ if (value == NULL || value[0] == '\0') {
674
+ memset(row, '\0', max_len);
675
+ } else {
676
+ size_t value_len = strlen(value);
677
+ if (value_len > max_len)
678
+ return READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG;
679
+
680
+ strncpy((char *)row, value, max_len);
681
+ }
682
+ return READSTAT_OK;
683
+ }
684
+
685
+ static readstat_error_t sas7bdat_write_missing_string(void *row, const readstat_variable_t *var) {
686
+ return sas7bdat_write_string(row, var, NULL);
687
+ }
688
+
689
+ static size_t sas7bdat_variable_width(readstat_type_t type, size_t user_width) {
690
+ if (type == READSTAT_TYPE_STRING) {
691
+ return user_width;
692
+ }
693
+ return 8;
694
+ }
695
+
696
+ static readstat_error_t sas7bdat_write_row_uncompressed(readstat_writer_t *writer, sas7bdat_write_ctx_t *ctx,
697
+ void *bytes, size_t len) {
698
+ readstat_error_t retval = READSTAT_OK;
699
+ sas_header_info_t *hinfo = ctx->hinfo;
700
+
701
+ int32_t rows_per_page = sas7bdat_rows_per_page(writer, hinfo);
702
+ if (writer->current_row % rows_per_page == 0) {
703
+ retval = sas_fill_page(writer, ctx->hinfo);
704
+ if (retval != READSTAT_OK)
705
+ goto cleanup;
706
+
707
+ int16_t page_type = SAS_PAGE_TYPE_DATA;
708
+ int16_t page_row_count = (writer->row_count - writer->current_row < rows_per_page
709
+ ? writer->row_count - writer->current_row
710
+ : rows_per_page);
711
+ char *header = calloc(hinfo->page_header_size, 1);
712
+ memcpy(&header[hinfo->page_header_size-6], &page_row_count, sizeof(int16_t));
713
+ memcpy(&header[hinfo->page_header_size-8], &page_type, sizeof(int16_t));
714
+ retval = readstat_write_bytes(writer, header, hinfo->page_header_size);
715
+ free(header);
716
+ if (retval != READSTAT_OK)
717
+ goto cleanup;
718
+ }
719
+
720
+ retval = readstat_write_bytes(writer, bytes, len);
721
+
722
+ cleanup:
723
+ return retval;
724
+ }
725
+
726
+ /* We don't actually write compressed data out at this point; the file header
727
+ * requires a page count, so instead we collect the compressed subheaders in
728
+ * memory and write the entire file at the end, once the page count can be
729
+ * determined.
730
+ */
731
+ static readstat_error_t sas7bdat_write_row_compressed(readstat_writer_t *writer, sas7bdat_write_ctx_t *ctx,
732
+ void *bytes, size_t len) {
733
+ readstat_error_t retval = READSTAT_OK;
734
+ size_t compressed_len = sas_rle_compressed_len(bytes, len);
735
+
736
+ sas7bdat_subheader_t *subheader = NULL;
737
+ if (compressed_len < len) {
738
+ subheader = sas7bdat_subheader_init(0, compressed_len);
739
+ subheader->is_row_data = 1;
740
+ subheader->is_row_data_compressed = 1;
741
+ size_t actual_len = sas_rle_compress(subheader->data, subheader->len, bytes, len);
742
+ if (actual_len != compressed_len) {
743
+ retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH;
744
+ goto cleanup;
745
+ }
746
+ } else {
747
+ subheader = sas7bdat_subheader_init(0, len);
748
+ subheader->is_row_data = 1;
749
+ memcpy(subheader->data, bytes, len);
750
+ }
751
+
752
+ ctx->sarray->subheaders[ctx->sarray->count++] = subheader;
753
+
754
+ cleanup:
755
+ if (retval != READSTAT_OK)
756
+ sas7bdat_subheader_free(subheader);
757
+
758
+ return retval;
759
+ }
760
+
761
+ static readstat_error_t sas7bdat_write_row(void *writer_ctx, void *bytes, size_t len) {
762
+ readstat_writer_t *writer = (readstat_writer_t *)writer_ctx;
763
+ sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx;
764
+ readstat_error_t retval = READSTAT_OK;
765
+
766
+ if (writer->compression == READSTAT_COMPRESS_NONE) {
767
+ retval = sas7bdat_write_row_uncompressed(writer, ctx, bytes, len);
768
+ } else if (writer->compression == READSTAT_COMPRESS_ROWS) {
769
+ retval = sas7bdat_write_row_compressed(writer, ctx, bytes, len);
770
+ }
771
+
772
+ return retval;
773
+ }
774
+
775
+ static readstat_error_t sas7bdat_metadata_ok(void *writer_ctx) {
776
+ readstat_writer_t *writer = (readstat_writer_t *)writer_ctx;
777
+
778
+ if (writer->compression != READSTAT_COMPRESS_NONE &&
779
+ writer->compression != READSTAT_COMPRESS_ROWS)
780
+ return READSTAT_ERROR_UNSUPPORTED_COMPRESSION;
781
+
782
+ return READSTAT_OK;
783
+ }
784
+
785
+ readstat_error_t readstat_begin_writing_sas7bdat(readstat_writer_t *writer, void *user_ctx, long row_count) {
786
+
787
+ if (writer->version == 0)
788
+ writer->version = SAS_DEFAULT_FILE_VERSION;
789
+
790
+ writer->callbacks.metadata_ok = &sas7bdat_metadata_ok;
791
+ writer->callbacks.write_int8 = &sas7bdat_write_int8;
792
+ writer->callbacks.write_int16 = &sas7bdat_write_int16;
793
+ writer->callbacks.write_int32 = &sas7bdat_write_int32;
794
+ writer->callbacks.write_float = &sas7bdat_write_float;
795
+ writer->callbacks.write_double = &sas7bdat_write_double;
796
+
797
+ writer->callbacks.write_string = &sas7bdat_write_string;
798
+ writer->callbacks.write_missing_string = &sas7bdat_write_missing_string;
799
+ writer->callbacks.write_missing_number = &sas7bdat_write_missing_numeric;
800
+ writer->callbacks.write_missing_tagged = &sas7bdat_write_missing_tagged;
801
+
802
+ writer->callbacks.variable_width = &sas7bdat_variable_width;
803
+ writer->callbacks.variable_ok = &sas_validate_variable;
804
+
805
+ writer->callbacks.begin_data = &sas7bdat_begin_data;
806
+ writer->callbacks.end_data = &sas7bdat_end_data;
807
+ writer->callbacks.module_ctx_free = &sas7bdat_module_ctx_free;
808
+
809
+ writer->callbacks.write_row = &sas7bdat_write_row;
810
+
811
+ return readstat_begin_writing_file(writer, user_ctx, row_count);
812
+ }