js-stream-sas7bdat 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/binding.gyp +58 -0
  2. package/package.json +4 -2
  3. package/src/binding/ReadStat/LICENSE +19 -0
  4. package/src/binding/ReadStat/README.md +483 -0
  5. package/src/binding/ReadStat/src/CKHashTable.c +309 -0
  6. package/src/binding/ReadStat/src/CKHashTable.h +37 -0
  7. package/src/binding/ReadStat/src/readstat.h +627 -0
  8. package/src/binding/ReadStat/src/readstat_bits.c +69 -0
  9. package/src/binding/ReadStat/src/readstat_bits.h +20 -0
  10. package/src/binding/ReadStat/src/readstat_convert.c +36 -0
  11. package/src/binding/ReadStat/src/readstat_convert.h +2 -0
  12. package/src/binding/ReadStat/src/readstat_error.c +126 -0
  13. package/src/binding/ReadStat/src/readstat_iconv.h +15 -0
  14. package/src/binding/ReadStat/src/readstat_io_unistd.c +147 -0
  15. package/src/binding/ReadStat/src/readstat_io_unistd.h +11 -0
  16. package/src/binding/ReadStat/src/readstat_malloc.c +34 -0
  17. package/src/binding/ReadStat/src/readstat_malloc.h +4 -0
  18. package/src/binding/ReadStat/src/readstat_metadata.c +53 -0
  19. package/src/binding/ReadStat/src/readstat_parser.c +121 -0
  20. package/src/binding/ReadStat/src/readstat_strings.h +6 -0
  21. package/src/binding/ReadStat/src/readstat_value.c +178 -0
  22. package/src/binding/ReadStat/src/readstat_variable.c +123 -0
  23. package/src/binding/ReadStat/src/readstat_writer.c +677 -0
  24. package/src/binding/ReadStat/src/readstat_writer.h +21 -0
  25. package/src/binding/ReadStat/src/sas/ieee.c +420 -0
  26. package/src/binding/ReadStat/src/sas/ieee.h +6 -0
  27. package/src/binding/ReadStat/src/sas/readstat_sas.c +528 -0
  28. package/src/binding/ReadStat/src/sas/readstat_sas.h +131 -0
  29. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_read.c +515 -0
  30. package/src/binding/ReadStat/src/sas/readstat_sas7bcat_write.c +218 -0
  31. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_read.c +1304 -0
  32. package/src/binding/ReadStat/src/sas/readstat_sas7bdat_write.c +812 -0
  33. package/src/binding/ReadStat/src/sas/readstat_sas_rle.c +286 -0
  34. package/src/binding/ReadStat/src/sas/readstat_sas_rle.h +8 -0
  35. package/src/binding/ReadStat/src/sas/readstat_xport.c +28 -0
  36. package/src/binding/ReadStat/src/sas/readstat_xport.h +47 -0
  37. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.c +265 -0
  38. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.h +4 -0
  39. package/src/binding/ReadStat/src/sas/readstat_xport_parse_format.rl +68 -0
  40. package/src/binding/ReadStat/src/sas/readstat_xport_read.c +777 -0
  41. package/src/binding/ReadStat/src/sas/readstat_xport_write.c +561 -0
  42. package/src/binding/readstat_binding.cc +393 -0
@@ -0,0 +1,393 @@
1
+ // binding/readstat_binding.cc
2
+ #include <napi.h>
3
+ #include <vector>
4
+ #include <map>
5
+ #include <string>
6
+ #include "./ReadStat/src/readstat.h"
7
+ #include <ctime>
8
+ #include <iomanip>
9
+ #include <sstream>
10
+
11
+ // Helper function to convert readstat_compress_t to a string
12
+ const char* compressionTypeToString(readstat_compress_t compression) {
13
+ switch(compression) {
14
+ case READSTAT_COMPRESS_NONE:
15
+ return "NONE";
16
+ case READSTAT_COMPRESS_ROWS:
17
+ return "ROWS";
18
+ case READSTAT_COMPRESS_BINARY:
19
+ return "BINARY";
20
+ default:
21
+ return "UNKNOWN";
22
+ }
23
+ }
24
+
25
+ // Context structure to pass data between callbacks
26
+ struct context_t {
27
+ int var_count; // Store the total number of variables
28
+ int current_var; // Track the current variable in each row
29
+ int current_record; // Current record being processed
30
+
31
+ // Storage for variable metadata
32
+ std::vector<std::string> var_names;
33
+ std::vector<readstat_type_t> var_types;
34
+
35
+ // Storage for data rows - changed from map to vector for array-based output
36
+ std::vector<std::vector<Napi::Value>> rows;
37
+ Napi::Env env;
38
+
39
+ // Constructor to fix the initialization issue
40
+ context_t(Napi::Env e) :
41
+ var_count(0),
42
+ current_var(0),
43
+ current_record(0),
44
+ env(e) {}
45
+ };
46
+
47
+ // Enhanced metadata context structure
48
+ struct metadata_context_t {
49
+ readstat_metadata_t *metadata;
50
+ std::vector<readstat_variable_t*> variables;
51
+ Napi::Env env;
52
+ Napi::Object dataset; // Store the result directly in the context
53
+ Napi::Array columns; // Store columns array directly
54
+
55
+ // Constructor with enhanced initialization
56
+ metadata_context_t(Napi::Env e) :
57
+ metadata(nullptr),
58
+ env(e),
59
+ dataset(Napi::Object::New(e)),
60
+ columns(Napi::Array::New(e)) {}
61
+ };
62
+
63
+ // Convert SAS format to a simplified type format
64
+ std::string getSASDataType(readstat_type_t type) {
65
+ switch(type) {
66
+ case READSTAT_TYPE_STRING:
67
+ return "text";
68
+ case READSTAT_TYPE_INT8:
69
+ case READSTAT_TYPE_INT16:
70
+ case READSTAT_TYPE_INT32:
71
+ return "integer";
72
+ case READSTAT_TYPE_FLOAT:
73
+ case READSTAT_TYPE_DOUBLE:
74
+ return "double";
75
+ default:
76
+ // Return character representation of the type
77
+ return std::string(1, (char)type);
78
+ }
79
+ }
80
+
81
+ // Callback for ReadStat
82
+ static int handle_metadata(readstat_metadata_t *metadata, void *ctx) {
83
+ context_t *context = (context_t *)ctx;
84
+ context->current_record = 0;
85
+ context->var_count = readstat_get_var_count(metadata);
86
+ context->current_var = 0;
87
+
88
+ // Initialize storage for variable metadata
89
+ context->var_names.resize(context->var_count);
90
+ context->var_types.resize(context->var_count);
91
+
92
+ return READSTAT_HANDLER_OK;
93
+ }
94
+
95
+ static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) {
96
+ context_t *context = (context_t *)ctx;
97
+ if (index < context->var_count) {
98
+ context->var_names[index] = readstat_variable_get_name(variable);
99
+ context->var_types[index] = readstat_variable_get_type(variable);
100
+ }
101
+ return READSTAT_HANDLER_OK;
102
+ }
103
+
104
+ static int handle_value(int obs_index, readstat_variable_t *variable,
105
+ readstat_value_t value, void *ctx) {
106
+ context_t *context = (context_t *)ctx;
107
+
108
+ // Update current variable index
109
+ int var_idx = readstat_variable_get_index(variable);
110
+ context->current_var = var_idx;
111
+
112
+ // Create new row if this is the first variable in a row
113
+ if (var_idx == 0) {
114
+ context->rows.push_back(std::vector<Napi::Value>(context->var_count));
115
+ }
116
+
117
+ // Process the value
118
+ Napi::Value jsValue;
119
+ if (!readstat_value_is_missing(value, variable)) {
120
+ switch(readstat_value_type(value)) {
121
+ case READSTAT_TYPE_STRING:
122
+ jsValue = Napi::String::New(context->env, readstat_string_value(value));
123
+ break;
124
+ case READSTAT_TYPE_INT8:
125
+ case READSTAT_TYPE_INT16:
126
+ case READSTAT_TYPE_INT32:
127
+ jsValue = Napi::Number::New(context->env, readstat_int32_value(value));
128
+ break;
129
+ case READSTAT_TYPE_FLOAT:
130
+ case READSTAT_TYPE_DOUBLE:
131
+ jsValue = Napi::Number::New(context->env, readstat_double_value(value));
132
+ break;
133
+ default:
134
+ jsValue = context->env.Null();
135
+ }
136
+ } else {
137
+ jsValue = context->env.Null();
138
+ }
139
+
140
+ // Store in the array at the correct position
141
+ context->rows.back()[var_idx] = jsValue;
142
+
143
+ return READSTAT_HANDLER_OK;
144
+ }
145
+
146
+ // Enhanced metadata handler that processes most metadata directly
147
+ static int handle_metadata_only(readstat_metadata_t *metadata, void *ctx) {
148
+ metadata_context_t *context = (metadata_context_t *)ctx;
149
+ context->metadata = metadata;
150
+
151
+ // Store the dataset properties directly in the context's dataset object
152
+
153
+ // Set record count
154
+ context->dataset.Set("records", Napi::Number::New(context->env,
155
+ readstat_get_row_count(metadata)));
156
+
157
+ // Set dataset label if available
158
+ const char* fileLabel = readstat_get_file_label(metadata);
159
+ const char* tableLabel = readstat_get_table_name(metadata);
160
+ context->dataset.Set("label", Napi::String::New(context->env, tableLabel ? tableLabel : fileLabel));
161
+
162
+ // Creation time
163
+ time_t creationTime = readstat_get_creation_time(metadata);
164
+ context->dataset.Set("CreationDateTime", Napi::Number::New(context->env, static_cast<double>(creationTime)));
165
+
166
+ // Modification time
167
+ time_t modTime = readstat_get_modified_time(metadata);
168
+ context->dataset.Set("ModifiedDateTime", Napi::Number::New(context->env, static_cast<double>(modTime)));
169
+
170
+ // Create and initialize columns array
171
+ int var_count = readstat_get_var_count(metadata);
172
+ context->columns = Napi::Array::New(context->env, var_count);
173
+ context->dataset.Set("columns", context->columns);
174
+
175
+ // Add optional fields from SAS metadata if available
176
+ const int format_version = readstat_get_file_format_version(metadata);
177
+ Napi::Object sourceSystem = Napi::Object::New(context->env);
178
+ sourceSystem.Set("name", Napi::String::New(context->env, "SAS"));
179
+ if (format_version) {
180
+ sourceSystem.Set("version", Napi::String::New(context->env,
181
+ std::to_string(format_version)));
182
+ }
183
+ context->dataset.Set("sourceSystem", sourceSystem);
184
+
185
+ // Add compression info if available
186
+ const readstat_compress_t compression = readstat_get_compression(metadata);
187
+ const char* compression_str = compressionTypeToString(compression);
188
+ context->dataset.Set("compression", Napi::String::New(context->env, compression_str));
189
+
190
+ // Add character encoding information
191
+ const char* encoding = readstat_get_file_encoding(metadata);
192
+ if (encoding != NULL) {
193
+ context->dataset.Set("encoding", Napi::String::New(context->env, encoding));
194
+ }
195
+ // Add bit level information (32/64 bit)
196
+ context->dataset.Set("is64Bit", Napi::Boolean::New(context->env,
197
+ readstat_get_file_format_is_64bit(metadata)));
198
+ // Add file version information
199
+ int file_format_version = readstat_get_file_format_version(metadata);
200
+ if (file_format_version > 0) {
201
+ context->dataset.Set("fileFormatVersion", Napi::Number::New(context->env, file_format_version));
202
+ }
203
+
204
+ return READSTAT_HANDLER_OK;
205
+ }
206
+
207
+ static int handle_variable_metadata(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) {
208
+ metadata_context_t *context = (metadata_context_t *)ctx;
209
+
210
+ // Create column object directly and add it to the columns array
211
+ Napi::Object column = Napi::Object::New(context->env);
212
+
213
+ // Generate an OID based on the column name
214
+ std::string oid = "IT." + std::string(readstat_variable_get_name(variable));
215
+ column.Set("itemOID", Napi::String::New(context->env, oid));
216
+ column.Set("name", Napi::String::New(context->env, readstat_variable_get_name(variable)));
217
+
218
+ const char* label = readstat_variable_get_label(variable);
219
+ column.Set("label", Napi::String::New(context->env,
220
+ label ? label : readstat_variable_get_name(variable)));
221
+
222
+ std::string dataType = getSASDataType(readstat_variable_get_type(variable));
223
+ column.Set("dataType", Napi::String::New(context->env, dataType));
224
+
225
+ // Include length if available
226
+ size_t length = readstat_variable_get_storage_width(variable);
227
+ if (length > 0) {
228
+ column.Set("length", Napi::Number::New(context->env, length));
229
+ }
230
+
231
+ // Include format if available
232
+ const char* format = readstat_variable_get_format(variable);
233
+ if (format && strlen(format) > 0) {
234
+ column.Set("displayFormat", Napi::String::New(context->env, format));
235
+ }
236
+
237
+ // Store the column in the array
238
+ context->columns[index] = column;
239
+
240
+ return READSTAT_HANDLER_OK;
241
+ }
242
+
243
+ // Get metadata from SAS7BDAT file - enhanced version with comprehensive metadata
244
+ Napi::Value GetSAS7BDATMetadata(const Napi::CallbackInfo& info) {
245
+ Napi::Env env = info.Env();
246
+
247
+ if (info.Length() < 1) {
248
+ Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
249
+ return env.Null();
250
+ }
251
+
252
+ std::string filePath = info[0].As<Napi::String>().Utf8Value();
253
+ metadata_context_t context(env);
254
+
255
+ readstat_parser_t *parser = readstat_parser_init();
256
+ readstat_set_metadata_handler(parser, &handle_metadata_only);
257
+ readstat_set_variable_handler(parser, &handle_variable_metadata);
258
+
259
+ readstat_error_t error = readstat_parse_sas7bdat(parser, filePath.c_str(), &context);
260
+
261
+ if (error != READSTAT_OK) {
262
+ std::string errorMsg = "Failed to parse SAS7BDAT metadata: ";
263
+ errorMsg += readstat_error_message(error);
264
+ readstat_parser_free(parser);
265
+ Napi::Error::New(env, errorMsg).ThrowAsJavaScriptException();
266
+ return env.Null();
267
+ }
268
+
269
+ // Get file name from path for the name field
270
+ std::string fileName = filePath.substr(filePath.find_last_of("/\\") + 1);
271
+ fileName = fileName.substr(0, fileName.find_last_of("."));
272
+ context.dataset.Set("name", Napi::String::New(env, fileName));
273
+
274
+ // Add file path for reference
275
+ context.dataset.Set("filePath", Napi::String::New(env, filePath));
276
+
277
+ // Add file format information
278
+ context.dataset.Set("fileFormat", Napi::String::New(env, "SAS7BDAT"));
279
+
280
+ // Add modification time as timestamp number if available
281
+ if (context.metadata) {
282
+ // Add compression info if available
283
+ // const readstat_compress_t compression = readstat_get_compression(context.metadata);
284
+ // const char* compression_str = compressionTypeToString(compression);
285
+ // printf("DEBUG: pointer = %p\n", (void*)compression_str);
286
+ // context.dataset.Set("compression", Napi::String::New(env, compression_str));
287
+
288
+ // // Add character encoding information
289
+ // const char* encoding = readstat_get_file_encoding(context.metadata);
290
+ // printf("DEBUG: encoding pointer = %p\n", (void*)encoding);
291
+ // if (encoding != NULL) {
292
+ // context.dataset.Set("encoding", Napi::String::New(env, encoding));
293
+ // }
294
+
295
+ // Add table display label if different from file label
296
+ // const char* tableLabel = readstat_get_table_name(context.metadata);
297
+ // printf("DEBUG: pointer = %p\n", (void*)tableLabel);
298
+ // if (tableLabel && strlen(tableLabel) > 0 &&
299
+ // strcmp(tableLabel, readstat_get_file_label(context.metadata)) != 0) {
300
+ // context.dataset.Set("tableLabel", Napi::String::New(env, tableLabel));
301
+ // }
302
+
303
+ // Add file version information
304
+ // int file_format_version = readstat_get_file_format_version(context.metadata);
305
+ // if (file_format_version > 0) {
306
+ // context.dataset.Set("fileFormatVersion", Napi::Number::New(env, file_format_version));
307
+ // }
308
+
309
+ // Add bit level information (32/64 bit)
310
+ // context.dataset.Set("is64Bit", Napi::Boolean::New(env, readstat_get_file_format_is_64bit(context.metadata)));
311
+ }
312
+
313
+ // Cleanup
314
+ readstat_parser_free(parser);
315
+
316
+ return context.dataset;
317
+ }
318
+
319
+ // Node.js binding
320
+ Napi::Value ReadSas7bdat(const Napi::CallbackInfo& info) {
321
+ Napi::Env env = info.Env();
322
+
323
+ if (info.Length() < 1) {
324
+ Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
325
+ return env.Null();
326
+ }
327
+
328
+ std::string filePath = info[0].As<Napi::String>().Utf8Value();
329
+
330
+ // Create context with proper initialization
331
+ context_t context(env);
332
+
333
+ // Initialize the parser
334
+ readstat_parser_t *parser = readstat_parser_init();
335
+
336
+ // Parse optional row offset parameter
337
+ if (info.Length() > 1 && info[1].IsNumber()) {
338
+ long row_offset = info[1].As<Napi::Number>().Int32Value();
339
+ if (row_offset < 0) {
340
+ Napi::RangeError::New(env, "Row offset must be non-negative").ThrowAsJavaScriptException();
341
+ readstat_parser_free(parser);
342
+ return env.Null();
343
+ }
344
+ readstat_set_row_offset(parser, row_offset);
345
+ }
346
+
347
+ // Parse optional row limit parameter
348
+ if (info.Length() > 2 && info[2].IsNumber()) {
349
+ long row_limit = info[2].As<Napi::Number>().Int32Value();
350
+ if (row_limit < -1) {
351
+ Napi::RangeError::New(env, "Row limit must be positive or -1 (for all records)").ThrowAsJavaScriptException();
352
+ readstat_parser_free(parser);
353
+ return env.Null();
354
+ }
355
+ if (row_limit != -1) {
356
+ readstat_set_row_limit(parser, row_limit);
357
+ }
358
+ }
359
+
360
+ readstat_set_metadata_handler(parser, &handle_metadata);
361
+ readstat_set_variable_handler(parser, &handle_variable);
362
+ readstat_set_value_handler(parser, &handle_value);
363
+
364
+ readstat_error_t error = readstat_parse_sas7bdat(parser, filePath.c_str(), &context);
365
+ readstat_parser_free(parser);
366
+
367
+ if (error != READSTAT_OK) {
368
+ std::string errorMsg = "Failed to parse SAS7BDAT file: ";
369
+ errorMsg += readstat_error_message(error);
370
+ Napi::Error::New(env, errorMsg).ThrowAsJavaScriptException();
371
+ return env.Null();
372
+ }
373
+
374
+ // Create result array
375
+ Napi::Array result = Napi::Array::New(env, context.rows.size());
376
+ for (size_t i = 0; i < context.rows.size(); i++) {
377
+ Napi::Array row = Napi::Array::New(env, context.rows[i].size());
378
+ for (size_t j = 0; j < context.rows[i].size(); j++) {
379
+ row[j] = context.rows[i][j];
380
+ }
381
+ result[i] = row;
382
+ }
383
+
384
+ return result;
385
+ }
386
+
387
+ Napi::Object Init(Napi::Env env, Napi::Object exports) {
388
+ exports.Set("readSas7bdat", Napi::Function::New(env, ReadSas7bdat));
389
+ exports.Set("getSAS7BDATMetadata", Napi::Function::New(env, GetSAS7BDATMetadata));
390
+ return exports;
391
+ }
392
+
393
+ NODE_API_MODULE(readstat_binding, Init)