@elaraai/east-node-io 0.0.1-beta.1 → 0.0.1-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/compression/gzip.d.ts +108 -0
- package/dist/compression/gzip.d.ts.map +1 -0
- package/dist/compression/gzip.js +153 -0
- package/dist/compression/gzip.js.map +1 -0
- package/dist/compression/index.d.ts +308 -0
- package/dist/compression/index.d.ts.map +1 -0
- package/dist/compression/index.js +289 -0
- package/dist/compression/index.js.map +1 -0
- package/dist/compression/tar.d.ts +115 -0
- package/dist/compression/tar.d.ts.map +1 -0
- package/dist/compression/tar.js +254 -0
- package/dist/compression/tar.js.map +1 -0
- package/dist/compression/types.d.ts +124 -0
- package/dist/compression/types.d.ts.map +1 -0
- package/dist/compression/types.js +106 -0
- package/dist/compression/types.js.map +1 -0
- package/dist/compression/zip.d.ts +121 -0
- package/dist/compression/zip.d.ts.map +1 -0
- package/dist/compression/zip.js +200 -0
- package/dist/compression/zip.js.map +1 -0
- package/dist/connection/index.d.ts +78 -0
- package/dist/connection/index.d.ts.map +1 -0
- package/dist/connection/index.js +134 -0
- package/dist/connection/index.js.map +1 -0
- package/dist/format/csv.d.ts +210 -0
- package/dist/format/csv.d.ts.map +1 -0
- package/dist/format/csv.js +729 -0
- package/dist/format/csv.js.map +1 -0
- package/dist/format/index.d.ts +641 -0
- package/dist/format/index.d.ts.map +1 -0
- package/dist/format/index.js +463 -0
- package/dist/format/index.js.map +1 -0
- package/dist/format/types.d.ts +176 -0
- package/dist/format/types.d.ts.map +1 -0
- package/dist/format/types.js +122 -0
- package/dist/format/types.js.map +1 -0
- package/dist/format/xlsx.d.ts +178 -0
- package/dist/format/xlsx.d.ts.map +1 -0
- package/dist/format/xlsx.js +313 -0
- package/dist/format/xlsx.js.map +1 -0
- package/dist/format/xml.d.ts +302 -0
- package/dist/format/xml.d.ts.map +1 -0
- package/dist/format/xml.js +602 -0
- package/dist/format/xml.js.map +1 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +32 -0
- package/dist/index.js.map +1 -0
- package/dist/nosql/index.d.ts +503 -0
- package/dist/nosql/index.d.ts.map +1 -0
- package/dist/nosql/index.js +483 -0
- package/dist/nosql/index.js.map +1 -0
- package/dist/nosql/mongodb.d.ts +306 -0
- package/dist/nosql/mongodb.d.ts.map +1 -0
- package/dist/nosql/mongodb.js +552 -0
- package/dist/nosql/mongodb.js.map +1 -0
- package/dist/nosql/redis.d.ts +268 -0
- package/dist/nosql/redis.d.ts.map +1 -0
- package/dist/nosql/redis.js +371 -0
- package/dist/nosql/redis.js.map +1 -0
- package/dist/nosql/types.d.ts +70 -0
- package/dist/nosql/types.d.ts.map +1 -0
- package/dist/nosql/types.js +79 -0
- package/dist/nosql/types.js.map +1 -0
- package/dist/sql/index.d.ts +777 -0
- package/dist/sql/index.d.ts.map +1 -0
- package/dist/sql/index.js +515 -0
- package/dist/sql/index.js.map +1 -0
- package/dist/sql/mysql.d.ts +238 -0
- package/dist/sql/mysql.d.ts.map +1 -0
- package/dist/sql/mysql.js +396 -0
- package/dist/sql/mysql.js.map +1 -0
- package/dist/sql/postgres.d.ts +237 -0
- package/dist/sql/postgres.d.ts.map +1 -0
- package/dist/sql/postgres.js +381 -0
- package/dist/sql/postgres.js.map +1 -0
- package/dist/sql/sqlite.d.ts +217 -0
- package/dist/sql/sqlite.d.ts.map +1 -0
- package/dist/sql/sqlite.js +366 -0
- package/dist/sql/sqlite.js.map +1 -0
- package/dist/sql/types.d.ts +205 -0
- package/dist/sql/types.d.ts.map +1 -0
- package/dist/sql/types.js +175 -0
- package/dist/sql/types.js.map +1 -0
- package/dist/storage/index.d.ts +304 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/dist/storage/index.js +234 -0
- package/dist/storage/index.js.map +1 -0
- package/dist/storage/s3.d.ts +373 -0
- package/dist/storage/s3.d.ts.map +1 -0
- package/dist/storage/s3.js +502 -0
- package/dist/storage/s3.js.map +1 -0
- package/dist/storage/types.d.ts +117 -0
- package/dist/storage/types.d.ts.map +1 -0
- package/dist/storage/types.js +94 -0
- package/dist/storage/types.js.map +1 -0
- package/dist/transfer/ftp.d.ts +333 -0
- package/dist/transfer/ftp.d.ts.map +1 -0
- package/dist/transfer/ftp.js +437 -0
- package/dist/transfer/ftp.js.map +1 -0
- package/dist/transfer/index.d.ts +456 -0
- package/dist/transfer/index.d.ts.map +1 -0
- package/dist/transfer/index.js +414 -0
- package/dist/transfer/index.js.map +1 -0
- package/dist/transfer/sftp.d.ts +333 -0
- package/dist/transfer/sftp.d.ts.map +1 -0
- package/dist/transfer/sftp.js +436 -0
- package/dist/transfer/sftp.js.map +1 -0
- package/dist/transfer/types.d.ts +108 -0
- package/dist/transfer/types.d.ts.map +1 -0
- package/dist/transfer/types.js +110 -0
- package/dist/transfer/types.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +6 -6
|
@@ -0,0 +1,729 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) 2025 Elara AI Pty Ltd
|
|
3
|
+
* Dual-licensed under AGPL-3.0 and commercial license. See LICENSE for details.
|
|
4
|
+
*/
|
|
5
|
+
import { East, BlobType, ArrayType, DictType, StringType, OptionType, BooleanType, StructType, variant, VariantType, NullType, match } from "@elaraai/east";
|
|
6
|
+
import { EastError, LiteralValueType } from "@elaraai/east/internal";
|
|
7
|
+
// CSV Configuration Types
|
|
8
|
+
export const CsvColumnType = VariantType({
|
|
9
|
+
"Null": NullType,
|
|
10
|
+
"Boolean": NullType,
|
|
11
|
+
"Integer": NullType,
|
|
12
|
+
"Float": NullType,
|
|
13
|
+
"String": NullType,
|
|
14
|
+
"DateTime": NullType,
|
|
15
|
+
"Blob": NullType,
|
|
16
|
+
});
|
|
17
|
+
export const CsvParseConfig = StructType({
|
|
18
|
+
columns: OptionType(DictType(StringType, CsvColumnType)),
|
|
19
|
+
delimiter: OptionType(StringType),
|
|
20
|
+
quoteChar: OptionType(StringType),
|
|
21
|
+
escapeChar: OptionType(StringType),
|
|
22
|
+
newline: OptionType(StringType),
|
|
23
|
+
hasHeader: BooleanType,
|
|
24
|
+
nullString: OptionType(StringType),
|
|
25
|
+
skipEmptyLines: BooleanType,
|
|
26
|
+
trimFields: BooleanType,
|
|
27
|
+
});
|
|
28
|
+
export const CsvSerializeConfig = StructType({
|
|
29
|
+
delimiter: StringType,
|
|
30
|
+
quoteChar: StringType,
|
|
31
|
+
escapeChar: StringType,
|
|
32
|
+
newline: StringType,
|
|
33
|
+
includeHeader: BooleanType,
|
|
34
|
+
nullString: StringType,
|
|
35
|
+
alwaysQuote: BooleanType,
|
|
36
|
+
});
|
|
37
|
+
// CSV Platform Functions
|
|
38
|
+
/** Represents a single CSV row as a dictionary mapping column names to optional string values. */
|
|
39
|
+
export const CsvRowType = DictType(StringType, LiteralValueType);
|
|
40
|
+
/** Represents CSV data as an array of row dictionaries. */
|
|
41
|
+
export const CsvDataType = ArrayType(CsvRowType);
|
|
42
|
+
/**
|
|
43
|
+
* Parses CSV data from a binary blob into structured row data.
|
|
44
|
+
*
|
|
45
|
+
* Converts CSV-formatted binary data into an array of row dictionaries,
|
|
46
|
+
* where each dictionary maps column names to optional string values.
|
|
47
|
+
* Supports configurable delimiters, quote characters, escape sequences,
|
|
48
|
+
* and header handling.
|
|
49
|
+
*
|
|
50
|
+
* This is a platform function for the East language, enabling CSV parsing
|
|
51
|
+
* in East programs running on Node.js.
|
|
52
|
+
*
|
|
53
|
+
* @param blob - The CSV data as a binary blob (UTF-8 encoded)
|
|
54
|
+
* @param config - Parsing configuration including delimiter, quote characters, and header options
|
|
55
|
+
* @returns An array of row dictionaries, each mapping column names to optional string values
|
|
56
|
+
*
|
|
57
|
+
* @throws {EastError} When CSV is malformed with specific error messages:
|
|
58
|
+
* - "Unclosed quote in row N, column M" - Quote not properly closed
|
|
59
|
+
* - "Too many fields in row N (expected X columns, found at least Y)" - More fields than header
|
|
60
|
+
* - "Too few fields in row N (expected X columns, got Y)" - Fewer fields than header
|
|
61
|
+
* - "Invalid escape sequence in row N, column M" - Invalid escape character usage
|
|
62
|
+
* - "Expected delimiter or newline after closing quote in row N, column M" - Invalid data after quote
|
|
63
|
+
* - "quoteChar must have length 1" - Invalid configuration
|
|
64
|
+
* - "escapeChar must have length 1" - Invalid configuration
|
|
65
|
+
* - "delimiter must not be empty" - Invalid configuration
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ```ts
|
|
69
|
+
* const parseCSV = East.function([BlobType], CsvDataType, ($, csvBlob) => {
|
|
70
|
+
* const config = $.const(East.value({
|
|
71
|
+
* delimiter: variant('some', ','),
|
|
72
|
+
* quoteChar: variant('some', '"'),
|
|
73
|
+
* escapeChar: variant('some', '"'),
|
|
74
|
+
* newline: variant('none', null),
|
|
75
|
+
* hasHeader: true,
|
|
76
|
+
* nullString: variant('some', ''),
|
|
77
|
+
* skipEmptyLines: true,
|
|
78
|
+
* trimFields: false,
|
|
79
|
+
* }, CsvParseConfig));
|
|
80
|
+
*
|
|
81
|
+
* return csv_parse(csvBlob, config);
|
|
82
|
+
* // Returns: [{"name": some("Alice"), "age": some("30")}, ...]
|
|
83
|
+
* });
|
|
84
|
+
* ```
|
|
85
|
+
*
|
|
86
|
+
* @remarks
|
|
87
|
+
* - Handles quoted fields with embedded delimiters and newlines
|
|
88
|
+
* - Supports both quote-as-escape ("") and backslash-escape (\") modes
|
|
89
|
+
* - Auto-detects newline format (CRLF, LF, or CR) when newline option is none
|
|
90
|
+
* - Validates column counts when hasHeader is true
|
|
91
|
+
* - Skips UTF-8 BOM (0xEF 0xBB 0xBF) if present at start
|
|
92
|
+
* - When hasHeader is false, generates column names as "column_0", "column_1", etc.
|
|
93
|
+
*/
|
|
94
|
+
export const csv_parse = East.platform("csv_parse", [BlobType, CsvParseConfig], CsvDataType);
|
|
95
|
+
/**
|
|
96
|
+
* Serializes structured row data into CSV-formatted binary data.
|
|
97
|
+
*
|
|
98
|
+
* Converts an array of row dictionaries into CSV-formatted binary data.
|
|
99
|
+
* Supports configurable delimiters, quote characters, escape sequences,
|
|
100
|
+
* and formatting options.
|
|
101
|
+
*
|
|
102
|
+
* This is a platform function for the East language, enabling CSV serialization
|
|
103
|
+
* in East programs running on Node.js.
|
|
104
|
+
*
|
|
105
|
+
* @param data - An array of row dictionaries to serialize
|
|
106
|
+
* @param config - Serialization configuration including delimiter, quote characters, and formatting options
|
|
107
|
+
* @returns A binary blob containing the CSV-formatted data (UTF-8 encoded)
|
|
108
|
+
*
|
|
109
|
+
* @throws {EastError} When configuration is invalid:
|
|
110
|
+
* - "quoteChar must have length 1" - Invalid configuration
|
|
111
|
+
* - "escapeChar must have length 1" - Invalid configuration
|
|
112
|
+
* - "delimiter must not be empty" - Invalid configuration
|
|
113
|
+
*
|
|
114
|
+
* @example
|
|
115
|
+
* ```ts
|
|
116
|
+
* const serializeCSV = East.function([CsvDataType], BlobType, ($, data) => {
|
|
117
|
+
* const config = $.const(East.value({
|
|
118
|
+
* delimiter: ',',
|
|
119
|
+
* quoteChar: '"',
|
|
120
|
+
* escapeChar: '"',
|
|
121
|
+
* newline: '\n',
|
|
122
|
+
* includeHeader: true,
|
|
123
|
+
* nullString: '',
|
|
124
|
+
* alwaysQuote: false,
|
|
125
|
+
* }, CsvSerializeConfig));
|
|
126
|
+
*
|
|
127
|
+
* return csv_serialize(data, config);
|
|
128
|
+
* // Returns blob that decodes to: "name,age\nAlice,30\nBob,25"
|
|
129
|
+
* });
|
|
130
|
+
* ```
|
|
131
|
+
*
|
|
132
|
+
* @remarks
|
|
133
|
+
* - Automatically quotes fields containing delimiter, quote char, newline, or null string
|
|
134
|
+
* - Escapes quote characters within quoted fields using escapeChar
|
|
135
|
+
* - Column order is determined by the first row's dictionary keys
|
|
136
|
+
* - Null values are serialized as nullString
|
|
137
|
+
* - Use alwaysQuote: true to force quoting of all fields
|
|
138
|
+
*/
|
|
139
|
+
export const csv_serialize = East.platform("csv_serialize", [CsvDataType, CsvSerializeConfig], BlobType);
|
|
140
|
+
/**
|
|
141
|
+
* Node.js implementation of CSV platform functions.
|
|
142
|
+
*
|
|
143
|
+
* Pass this array to {@link East.compile} to enable CSV operations.
|
|
144
|
+
*/
|
|
145
|
+
export const CsvImpl = [
|
|
146
|
+
csv_parse.implement((blob, config) => {
|
|
147
|
+
try {
|
|
148
|
+
return parseCsv(blob, config);
|
|
149
|
+
}
|
|
150
|
+
catch (err) {
|
|
151
|
+
if (err instanceof EastError)
|
|
152
|
+
throw err;
|
|
153
|
+
throw new EastError(`CSV parsing failed: ${err.message}`, {
|
|
154
|
+
location: { filename: "csv_parse", line: 0n, column: 0n },
|
|
155
|
+
cause: err
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
}),
|
|
159
|
+
csv_serialize.implement((data, config) => {
|
|
160
|
+
try {
|
|
161
|
+
return serializeCsv(data, config);
|
|
162
|
+
}
|
|
163
|
+
catch (err) {
|
|
164
|
+
if (err instanceof EastError)
|
|
165
|
+
throw err;
|
|
166
|
+
throw new EastError(`CSV serialization failed: ${err.message}`, {
|
|
167
|
+
location: { filename: "csv_serialize", line: 0n, column: 0n },
|
|
168
|
+
cause: err
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
}),
|
|
172
|
+
];
|
|
173
|
+
// Helper Functions
|
|
174
|
+
function parseCsv(blob, config) {
|
|
175
|
+
const encoder = new TextEncoder();
|
|
176
|
+
const decoder = new TextDecoder();
|
|
177
|
+
// Extract config with defaults
|
|
178
|
+
const delimiter = config.delimiter.type === 'some' ? config.delimiter.value : ',';
|
|
179
|
+
const quoteChar = config.quoteChar.type === 'some' ? config.quoteChar.value : '"';
|
|
180
|
+
const escapeChar = config.escapeChar.type === 'some' ? config.escapeChar.value : '"';
|
|
181
|
+
const nullString = config.nullString.type === 'some' ? config.nullString.value : '';
|
|
182
|
+
const hasHeader = config.hasHeader;
|
|
183
|
+
const skipEmptyLines = config.skipEmptyLines;
|
|
184
|
+
const trimFields = config.trimFields;
|
|
185
|
+
const getColumnType = (col) => {
|
|
186
|
+
if (config.columns.type === 'none') {
|
|
187
|
+
return variant('String', null);
|
|
188
|
+
}
|
|
189
|
+
else if (!col) {
|
|
190
|
+
return variant('String', null);
|
|
191
|
+
}
|
|
192
|
+
else if (config.columns.value?.has(col)) {
|
|
193
|
+
return config.columns.value?.get(col);
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
return variant('String', null);
|
|
197
|
+
}
|
|
198
|
+
};
|
|
199
|
+
// add the code to
|
|
200
|
+
// Auto-detect newline if not specified
|
|
201
|
+
let newlines;
|
|
202
|
+
if (config.newline.type === 'some') {
|
|
203
|
+
newlines = [config.newline.value];
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
// Auto-detect: check for \r\n, \n, or \r
|
|
207
|
+
newlines = ['\r\n', '\n', '\r'];
|
|
208
|
+
}
|
|
209
|
+
// Validation
|
|
210
|
+
if (quoteChar.length !== 1) {
|
|
211
|
+
throw new EastError(`quoteChar must have length 1, got ${JSON.stringify(quoteChar)}`, {
|
|
212
|
+
location: { filename: "csv_parse", line: 0n, column: 0n }
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
if (escapeChar.length !== 1) {
|
|
216
|
+
throw new EastError(`escapeChar must have length 1, got ${JSON.stringify(escapeChar)}`, {
|
|
217
|
+
location: { filename: "csv_parse", line: 0n, column: 0n }
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
if (delimiter.length === 0) {
|
|
221
|
+
throw new EastError(`delimiter must not be empty`, {
|
|
222
|
+
location: { filename: "csv_parse", line: 0n, column: 0n }
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
const delimiterBuffer = encoder.encode(delimiter);
|
|
226
|
+
const newlineBuffers = newlines.map(nl => encoder.encode(nl));
|
|
227
|
+
const quoteCharBuffer = encoder.encode(quoteChar);
|
|
228
|
+
const escapeCharBuffer = encoder.encode(escapeChar);
|
|
229
|
+
// Skip UTF-8 BOM if present
|
|
230
|
+
let offset = 0;
|
|
231
|
+
if (blob.length >= 3 && blob[0] === 0xef && blob[1] === 0xbb && blob[2] === 0xbf) {
|
|
232
|
+
offset = 3;
|
|
233
|
+
}
|
|
234
|
+
// Helper: match pattern at offset
|
|
235
|
+
const match = (pattern, pos) => {
|
|
236
|
+
if (pos + pattern.length > blob.length)
|
|
237
|
+
return false;
|
|
238
|
+
for (let i = 0; i < pattern.length; i++) {
|
|
239
|
+
if (blob[pos + i] !== pattern[i])
|
|
240
|
+
return false;
|
|
241
|
+
}
|
|
242
|
+
return true;
|
|
243
|
+
};
|
|
244
|
+
// Helper: parse next field
|
|
245
|
+
// Returns: [terminator, contentStart, contentEnd, nextOffset]
|
|
246
|
+
// For quoted fields: contentStart is after opening quote, contentEnd is before closing quote
|
|
247
|
+
// For unquoted fields: contentStart equals the input pos, contentEnd is before delimiter/newline
|
|
248
|
+
const nextField = (pos, row, col) => {
|
|
249
|
+
if (pos >= blob.length) {
|
|
250
|
+
return ['eof', blob.length, blob.length, blob.length];
|
|
251
|
+
}
|
|
252
|
+
// Check if field is quoted
|
|
253
|
+
if (match(quoteCharBuffer, pos)) {
|
|
254
|
+
pos += quoteCharBuffer.length;
|
|
255
|
+
const fieldStart = pos; // Content starts after opening quote
|
|
256
|
+
while (pos < blob.length) {
|
|
257
|
+
if (match(escapeCharBuffer, pos)) {
|
|
258
|
+
pos += escapeCharBuffer.length;
|
|
259
|
+
if (match(quoteCharBuffer, pos)) {
|
|
260
|
+
// Escaped quote
|
|
261
|
+
pos += quoteCharBuffer.length;
|
|
262
|
+
}
|
|
263
|
+
else if (quoteChar === escapeChar) {
|
|
264
|
+
// Quote-as-escape: doubled quote means literal quote
|
|
265
|
+
// Single quote means end of field
|
|
266
|
+
// Check what follows
|
|
267
|
+
const fieldEnd = pos - escapeCharBuffer.length; // Before the closing quote
|
|
268
|
+
for (const newlineBuffer of newlineBuffers) {
|
|
269
|
+
if (match(newlineBuffer, pos)) {
|
|
270
|
+
return ['record', fieldStart, fieldEnd, pos + newlineBuffer.length];
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
if (match(delimiterBuffer, pos)) {
|
|
274
|
+
return ['field', fieldStart, fieldEnd, pos + delimiterBuffer.length];
|
|
275
|
+
}
|
|
276
|
+
if (pos >= blob.length) {
|
|
277
|
+
return ['eof', fieldStart, fieldEnd, blob.length];
|
|
278
|
+
}
|
|
279
|
+
// Single quote followed by something else = error
|
|
280
|
+
throw new EastError(`Expected delimiter or newline after closing quote in row ${row}, column ${col}`, {
|
|
281
|
+
location: { filename: "csv_parse", line: row, column: BigInt(col) }
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
else if (match(escapeCharBuffer, pos)) {
|
|
285
|
+
// Escaped escape char
|
|
286
|
+
pos += escapeCharBuffer.length;
|
|
287
|
+
}
|
|
288
|
+
else {
|
|
289
|
+
throw new EastError(`Invalid escape sequence in row ${row}, column ${col}`, {
|
|
290
|
+
location: { filename: "csv_parse", line: row, column: BigInt(col) }
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
else if (match(quoteCharBuffer, pos)) {
|
|
295
|
+
// End of quoted field
|
|
296
|
+
const fieldEnd = pos; // Before the closing quote
|
|
297
|
+
pos += quoteCharBuffer.length;
|
|
298
|
+
// Check what follows the closing quote
|
|
299
|
+
for (const newlineBuffer of newlineBuffers) {
|
|
300
|
+
if (match(newlineBuffer, pos)) {
|
|
301
|
+
return ['record', fieldStart, fieldEnd, pos + newlineBuffer.length];
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
if (match(delimiterBuffer, pos)) {
|
|
305
|
+
return ['field', fieldStart, fieldEnd, pos + delimiterBuffer.length];
|
|
306
|
+
}
|
|
307
|
+
if (pos >= blob.length) {
|
|
308
|
+
return ['eof', fieldStart, fieldEnd, blob.length];
|
|
309
|
+
}
|
|
310
|
+
throw new EastError(`Expected delimiter or newline after closing quote in row ${row}, column ${col}`, {
|
|
311
|
+
location: { filename: "csv_parse", line: row, column: BigInt(col) }
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
else {
|
|
315
|
+
// Advance one UTF-8 code point
|
|
316
|
+
const charByte = blob[pos] || 0;
|
|
317
|
+
if (charByte >= 240) {
|
|
318
|
+
pos += 4;
|
|
319
|
+
}
|
|
320
|
+
else if (charByte >= 224) {
|
|
321
|
+
pos += 3;
|
|
322
|
+
}
|
|
323
|
+
else if (charByte >= 192) {
|
|
324
|
+
pos += 2;
|
|
325
|
+
}
|
|
326
|
+
else {
|
|
327
|
+
pos += 1;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
throw new EastError(`Unclosed quote in row ${row}, column ${col}`, {
|
|
332
|
+
location: { filename: "csv_parse", line: row, column: BigInt(col) }
|
|
333
|
+
});
|
|
334
|
+
}
|
|
335
|
+
else {
|
|
336
|
+
// Unquoted field
|
|
337
|
+
const fieldStart = pos;
|
|
338
|
+
while (pos < blob.length) {
|
|
339
|
+
for (const newlineBuffer of newlineBuffers) {
|
|
340
|
+
if (match(newlineBuffer, pos)) {
|
|
341
|
+
return ['record', fieldStart, pos, pos + newlineBuffer.length];
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
if (match(delimiterBuffer, pos)) {
|
|
345
|
+
return ['field', fieldStart, pos, pos + delimiterBuffer.length];
|
|
346
|
+
}
|
|
347
|
+
// Advance one UTF-8 code point
|
|
348
|
+
const charByte = blob[pos] || 0;
|
|
349
|
+
if (charByte >= 240) {
|
|
350
|
+
pos += 4;
|
|
351
|
+
}
|
|
352
|
+
else if (charByte >= 224) {
|
|
353
|
+
pos += 3;
|
|
354
|
+
}
|
|
355
|
+
else if (charByte >= 192) {
|
|
356
|
+
pos += 2;
|
|
357
|
+
}
|
|
358
|
+
else {
|
|
359
|
+
pos += 1;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
return ['eof', fieldStart, pos, blob.length];
|
|
363
|
+
}
|
|
364
|
+
};
|
|
365
|
+
// Helper: parse field value
|
|
366
|
+
// Note: start/end positions come from nextField, which already strips quotes for quoted fields
|
|
367
|
+
const parseFieldValue = (start, end, allowNull = true) => {
|
|
368
|
+
let str = decoder.decode(blob.slice(start, end));
|
|
369
|
+
// Unescape if needed (nextField doesn't unescape, just removes quotes)
|
|
370
|
+
// Determine if this was a quoted field by checking if the byte before start is a quote
|
|
371
|
+
const wasQuoted = start > 0 && match(quoteCharBuffer, start - quoteCharBuffer.length);
|
|
372
|
+
if (wasQuoted) {
|
|
373
|
+
// Unescape
|
|
374
|
+
if (quoteChar === escapeChar) {
|
|
375
|
+
str = str.replaceAll(quoteChar + quoteChar, quoteChar);
|
|
376
|
+
}
|
|
377
|
+
else {
|
|
378
|
+
let result = '';
|
|
379
|
+
let i = 0;
|
|
380
|
+
while (i < str.length) {
|
|
381
|
+
if (str[i] === escapeChar && i + 1 < str.length) {
|
|
382
|
+
if (str[i + 1] === quoteChar || str[i + 1] === escapeChar) {
|
|
383
|
+
result += str[i + 1];
|
|
384
|
+
i += 2;
|
|
385
|
+
}
|
|
386
|
+
else {
|
|
387
|
+
result += str[i];
|
|
388
|
+
i += 1;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
else {
|
|
392
|
+
result += str[i];
|
|
393
|
+
i += 1;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
str = result;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
if (trimFields) {
|
|
400
|
+
str = str.trim();
|
|
401
|
+
}
|
|
402
|
+
if (allowNull && str === nullString) {
|
|
403
|
+
return null;
|
|
404
|
+
}
|
|
405
|
+
return str;
|
|
406
|
+
};
|
|
407
|
+
// Parse header
|
|
408
|
+
let headers = [];
|
|
409
|
+
let rowNumber = 0n;
|
|
410
|
+
if (hasHeader) {
|
|
411
|
+
rowNumber = 1n; // Header is row 1
|
|
412
|
+
let headerEnd = false;
|
|
413
|
+
let columnIndex = 0;
|
|
414
|
+
while (!headerEnd && offset < blob.length) {
|
|
415
|
+
const [terminator, start, end, newOffset] = nextField(offset, rowNumber, columnIndex + 1);
|
|
416
|
+
const value = parseFieldValue(start, end, false); // Headers cannot be null
|
|
417
|
+
if (value === null) {
|
|
418
|
+
throw new EastError(`Header column name cannot be null (row 1, column ${columnIndex + 1})`, {
|
|
419
|
+
location: { filename: "csv_parse", line: 1n, column: BigInt(columnIndex + 1) }
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
headers.push(value);
|
|
423
|
+
offset = newOffset;
|
|
424
|
+
columnIndex++;
|
|
425
|
+
if (terminator === 'record' || terminator === 'eof') {
|
|
426
|
+
headerEnd = true;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
rowNumber = 2n; // Data starts at row 2 after header
|
|
430
|
+
}
|
|
431
|
+
else {
|
|
432
|
+
rowNumber = 1n; // No header, data starts at row 1
|
|
433
|
+
}
|
|
434
|
+
const columnTypes = headers.map(h => getColumnType(h));
|
|
435
|
+
// Parse rows
|
|
436
|
+
const rows = [];
|
|
437
|
+
let fieldIndex = 0;
|
|
438
|
+
let currentRow = new Map();
|
|
439
|
+
while (offset < blob.length) {
|
|
440
|
+
const [terminator, start, end, newOffset] = nextField(offset, rowNumber, fieldIndex + 1);
|
|
441
|
+
// Skip empty lines if configured
|
|
442
|
+
if (skipEmptyLines && start === end && terminator === 'record') {
|
|
443
|
+
offset = newOffset;
|
|
444
|
+
rowNumber++;
|
|
445
|
+
continue;
|
|
446
|
+
}
|
|
447
|
+
const value = parseFieldValue(start, end);
|
|
448
|
+
// Determine column name
|
|
449
|
+
let columnName;
|
|
450
|
+
if (hasHeader) {
|
|
451
|
+
if (fieldIndex >= headers.length) {
|
|
452
|
+
throw new EastError(`Too many fields in row ${rowNumber} (expected ${headers.length} columns, found at least ${fieldIndex + 1})`, {
|
|
453
|
+
location: { filename: "csv_parse", line: rowNumber, column: BigInt(fieldIndex + 1) }
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
columnName = headers[fieldIndex];
|
|
457
|
+
}
|
|
458
|
+
else {
|
|
459
|
+
columnName = `column_${fieldIndex}`;
|
|
460
|
+
}
|
|
461
|
+
// Add to current row
|
|
462
|
+
if (value === null) {
|
|
463
|
+
currentRow.set(columnName, variant('Null', null));
|
|
464
|
+
}
|
|
465
|
+
else {
|
|
466
|
+
try {
|
|
467
|
+
// Convert raw CSV value to East variant using convertNativeToCell
|
|
468
|
+
// For headers, use the pre-computed columnTypes; for no-header, compute on-the-fly
|
|
469
|
+
const colType = hasHeader ? columnTypes[fieldIndex] : getColumnType(columnName);
|
|
470
|
+
const cellValue = convertNativeToCell(value, colType);
|
|
471
|
+
currentRow.set(columnName, cellValue);
|
|
472
|
+
}
|
|
473
|
+
catch (err) {
|
|
474
|
+
throw new EastError(`Failed to parse value for header ${columnName} in row ${rowNumber}, column ${fieldIndex + 1}: ${err.message}`, {
|
|
475
|
+
location: { filename: "csv_parse", line: rowNumber, column: BigInt(fieldIndex + 1) }
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
fieldIndex++;
|
|
480
|
+
offset = newOffset;
|
|
481
|
+
if (terminator === 'record' || terminator === 'eof') {
|
|
482
|
+
// Validate field count
|
|
483
|
+
if (hasHeader && fieldIndex < headers.length) {
|
|
484
|
+
throw new EastError(`Too few fields in row ${rowNumber} (expected ${headers.length} columns, got ${fieldIndex})`, {
|
|
485
|
+
location: { filename: "csv_parse", line: rowNumber, column: BigInt(fieldIndex + 1) }
|
|
486
|
+
});
|
|
487
|
+
}
|
|
488
|
+
// Add row if not empty
|
|
489
|
+
if (fieldIndex > 0) {
|
|
490
|
+
rows.push(currentRow);
|
|
491
|
+
}
|
|
492
|
+
// Reset for next row
|
|
493
|
+
currentRow = new Map();
|
|
494
|
+
fieldIndex = 0;
|
|
495
|
+
rowNumber++;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
return rows;
|
|
499
|
+
}
|
|
500
|
+
/**
|
|
501
|
+
* Converts native CSV string to East LiteralValueType variant for parsing.
|
|
502
|
+
*
|
|
503
|
+
* @param value - Raw CSV string value
|
|
504
|
+
* @param colType - Column type specification
|
|
505
|
+
* @returns East variant value
|
|
506
|
+
* @throws {Error} When value cannot be parsed as the specified type
|
|
507
|
+
* @internal
|
|
508
|
+
*/
|
|
509
|
+
function convertNativeToCell(value, colType) {
|
|
510
|
+
const typeName = colType.type;
|
|
511
|
+
if (typeName === 'Null') {
|
|
512
|
+
return variant('Null', null);
|
|
513
|
+
}
|
|
514
|
+
else if (typeName === 'String') {
|
|
515
|
+
return variant('String', value);
|
|
516
|
+
}
|
|
517
|
+
else if (typeName === 'Integer') {
|
|
518
|
+
// Handle integer parsing with edge cases (matching East's parseInteger logic)
|
|
519
|
+
const trimmed = value.trim();
|
|
520
|
+
if (trimmed === '') {
|
|
521
|
+
throw new Error(`Cannot parse empty string as Integer`);
|
|
522
|
+
}
|
|
523
|
+
try {
|
|
524
|
+
const bigIntValue = BigInt(trimmed);
|
|
525
|
+
// Check for 64-bit signed integer range: -2^63 to 2^63-1
|
|
526
|
+
if (bigIntValue < -9223372036854775808n || bigIntValue > 9223372036854775807n) {
|
|
527
|
+
throw new Error(`Integer out of range (must be 64-bit signed)`);
|
|
528
|
+
}
|
|
529
|
+
return variant('Integer', bigIntValue);
|
|
530
|
+
}
|
|
531
|
+
catch (err) {
|
|
532
|
+
// If it's already our error, rethrow as-is
|
|
533
|
+
if (err instanceof Error && err.message.includes('Integer out of range')) {
|
|
534
|
+
throw err;
|
|
535
|
+
}
|
|
536
|
+
throw new Error(`Cannot parse "${value}" as Integer: ${err instanceof Error ? err.message : String(err)}`);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
else if (typeName === 'Float') {
|
|
540
|
+
// Handle float parsing with edge cases (matching East's parseFloat logic)
|
|
541
|
+
const trimmed = value.trim();
|
|
542
|
+
if (trimmed === '') {
|
|
543
|
+
throw new Error(`Cannot parse empty string as Float`);
|
|
544
|
+
}
|
|
545
|
+
// Handle special values
|
|
546
|
+
if (trimmed === 'NaN') {
|
|
547
|
+
return variant('Float', NaN);
|
|
548
|
+
}
|
|
549
|
+
else if (trimmed === 'Infinity') {
|
|
550
|
+
return variant('Float', Infinity);
|
|
551
|
+
}
|
|
552
|
+
else if (trimmed === '-Infinity') {
|
|
553
|
+
return variant('Float', -Infinity);
|
|
554
|
+
}
|
|
555
|
+
const floatValue = Number(trimmed);
|
|
556
|
+
if (Number.isNaN(floatValue)) {
|
|
557
|
+
throw new Error(`Cannot parse "${value}" as Float`);
|
|
558
|
+
}
|
|
559
|
+
return variant('Float', floatValue);
|
|
560
|
+
}
|
|
561
|
+
else if (typeName === 'Boolean') {
|
|
562
|
+
// Handle boolean parsing (matching East's parseBoolean logic - only accept true/false)
|
|
563
|
+
const trimmed = value.trim();
|
|
564
|
+
if (trimmed === 'true') {
|
|
565
|
+
return variant('Boolean', true);
|
|
566
|
+
}
|
|
567
|
+
else if (trimmed === 'false') {
|
|
568
|
+
return variant('Boolean', false);
|
|
569
|
+
}
|
|
570
|
+
else {
|
|
571
|
+
throw new Error(`Cannot parse "${value}" as Boolean (expected 'true' or 'false')`);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
else if (typeName === 'DateTime') {
|
|
575
|
+
// Handle DateTime parsing with ISO format
|
|
576
|
+
const trimmed = value.trim();
|
|
577
|
+
if (trimmed === '') {
|
|
578
|
+
throw new Error(`Cannot parse empty string as DateTime`);
|
|
579
|
+
}
|
|
580
|
+
const date = new Date(trimmed);
|
|
581
|
+
if (Number.isNaN(date.getTime())) {
|
|
582
|
+
throw new Error(`Cannot parse "${value}" as DateTime (expected ISO 8601 format)`);
|
|
583
|
+
}
|
|
584
|
+
return variant('DateTime', date);
|
|
585
|
+
}
|
|
586
|
+
else if (typeName === 'Blob') {
|
|
587
|
+
// Handle Blob parsing from hex (0x-prefixed hex string, matching East format)
|
|
588
|
+
const trimmed = value.trim();
|
|
589
|
+
if (trimmed === '') {
|
|
590
|
+
throw new Error(`Cannot parse empty string as Blob`);
|
|
591
|
+
}
|
|
592
|
+
// Check for 0x prefix
|
|
593
|
+
if (!trimmed.startsWith('0x')) {
|
|
594
|
+
throw new Error(`Cannot parse "${value}" as Blob (expected 0x-prefixed hex string)`);
|
|
595
|
+
}
|
|
596
|
+
const hexStr = trimmed.slice(2); // Remove 0x prefix
|
|
597
|
+
// Check for odd length
|
|
598
|
+
if (hexStr.length % 2 !== 0) {
|
|
599
|
+
throw new Error(`Cannot parse "${value}" as Blob (odd length hex string)`);
|
|
600
|
+
}
|
|
601
|
+
// Validate hex characters and convert to bytes
|
|
602
|
+
const bytes = [];
|
|
603
|
+
for (let i = 0; i < hexStr.length; i += 2) {
|
|
604
|
+
const hexByte = hexStr.slice(i, i + 2);
|
|
605
|
+
if (!/^[0-9a-fA-F]{2}$/.test(hexByte)) {
|
|
606
|
+
throw new Error(`Cannot parse "${value}" as Blob (invalid hex character)`);
|
|
607
|
+
}
|
|
608
|
+
bytes.push(parseInt(hexByte, 16));
|
|
609
|
+
}
|
|
610
|
+
return variant('Blob', new Uint8Array(bytes));
|
|
611
|
+
}
|
|
612
|
+
else {
|
|
613
|
+
throw new Error(`Unknown column type: ${typeName}`);
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
/**
|
|
617
|
+
* Converts East LiteralValueType variant to native string for CSV serialization.
|
|
618
|
+
*
|
|
619
|
+
* @param cell - East variant value
|
|
620
|
+
* @param nullString - String to use for null values
|
|
621
|
+
* @returns Native string value for CSV
|
|
622
|
+
* @internal
|
|
623
|
+
*/
|
|
624
|
+
function convertCellToNative(cell, nullString) {
|
|
625
|
+
return match(cell, {
|
|
626
|
+
Null: () => nullString,
|
|
627
|
+
String: (value) => {
|
|
628
|
+
// String values are used as-is
|
|
629
|
+
return value;
|
|
630
|
+
},
|
|
631
|
+
Integer: (value) => {
|
|
632
|
+
// Convert BigInt to string, handles large integers safely
|
|
633
|
+
return String(value);
|
|
634
|
+
},
|
|
635
|
+
Float: (value) => {
|
|
636
|
+
// Handle special float values
|
|
637
|
+
if (Number.isNaN(value)) {
|
|
638
|
+
return 'NaN';
|
|
639
|
+
}
|
|
640
|
+
else if (!Number.isFinite(value)) {
|
|
641
|
+
return value > 0 ? 'Infinity' : '-Infinity';
|
|
642
|
+
}
|
|
643
|
+
return String(value);
|
|
644
|
+
},
|
|
645
|
+
Boolean: (value) => {
|
|
646
|
+
// Use lowercase true/false for consistency
|
|
647
|
+
return String(value);
|
|
648
|
+
},
|
|
649
|
+
DateTime: (value) => {
|
|
650
|
+
// Use ISO format for dates (matching East format: YYYY-MM-DDTHH:MM:SS.sss, no 'Z')
|
|
651
|
+
return value.toISOString().substring(0, 23);
|
|
652
|
+
},
|
|
653
|
+
Blob: (value) => {
|
|
654
|
+
// Encode binary data as hex (0x-prefixed, matching East format)
|
|
655
|
+
return `0x${[...value].map(b => b.toString(16).padStart(2, '0')).join('')}`;
|
|
656
|
+
},
|
|
657
|
+
});
|
|
658
|
+
}
|
|
659
|
+
function serializeCsv(data, config) {
|
|
660
|
+
const { delimiter, quoteChar, escapeChar, newline, includeHeader, nullString, alwaysQuote } = config;
|
|
661
|
+
// Validation
|
|
662
|
+
if (quoteChar.length !== 1) {
|
|
663
|
+
throw new EastError(`quoteChar must have length 1, got ${JSON.stringify(quoteChar)}`, {
|
|
664
|
+
location: { filename: "csv_serialize", line: 0n, column: 0n }
|
|
665
|
+
});
|
|
666
|
+
}
|
|
667
|
+
if (escapeChar.length !== 1) {
|
|
668
|
+
throw new EastError(`escapeChar must have length 1, got ${JSON.stringify(escapeChar)}`, {
|
|
669
|
+
location: { filename: "csv_serialize", line: 0n, column: 0n }
|
|
670
|
+
});
|
|
671
|
+
}
|
|
672
|
+
if (delimiter.length === 0) {
|
|
673
|
+
throw new EastError(`delimiter must not be empty`, {
|
|
674
|
+
location: { filename: "csv_serialize", line: 0n, column: 0n }
|
|
675
|
+
});
|
|
676
|
+
}
|
|
677
|
+
const lines = [];
|
|
678
|
+
// Get column names from first row
|
|
679
|
+
if (data.length === 0) {
|
|
680
|
+
return new TextEncoder().encode('');
|
|
681
|
+
}
|
|
682
|
+
const columns = Array.from(data[0].keys());
|
|
683
|
+
// Escape field value
|
|
684
|
+
const escapeField = (value) => {
|
|
685
|
+
const needsQuoting = alwaysQuote ||
|
|
686
|
+
value.includes(delimiter) ||
|
|
687
|
+
value.includes(quoteChar) ||
|
|
688
|
+
value.includes(escapeChar) ||
|
|
689
|
+
value.includes(newline) ||
|
|
690
|
+
value === nullString;
|
|
691
|
+
if (!needsQuoting) {
|
|
692
|
+
return value;
|
|
693
|
+
}
|
|
694
|
+
// Escape quotes and escape chars
|
|
695
|
+
let escaped = value;
|
|
696
|
+
if (quoteChar === escapeChar) {
|
|
697
|
+
escaped = escaped.replaceAll(quoteChar, quoteChar + quoteChar);
|
|
698
|
+
}
|
|
699
|
+
else {
|
|
700
|
+
escaped = escaped.replaceAll(escapeChar, escapeChar + escapeChar);
|
|
701
|
+
escaped = escaped.replaceAll(quoteChar, escapeChar + quoteChar);
|
|
702
|
+
}
|
|
703
|
+
return quoteChar + escaped + quoteChar;
|
|
704
|
+
};
|
|
705
|
+
// Write header
|
|
706
|
+
if (includeHeader) {
|
|
707
|
+
const headerFields = columns.map(col => escapeField(col));
|
|
708
|
+
lines.push(headerFields.join(delimiter));
|
|
709
|
+
}
|
|
710
|
+
// Write rows
|
|
711
|
+
for (const row of data) {
|
|
712
|
+
const fields = columns.map(col => {
|
|
713
|
+
const cell = row.get(col);
|
|
714
|
+
if (!cell) {
|
|
715
|
+
return nullString;
|
|
716
|
+
}
|
|
717
|
+
const value = convertCellToNative(cell, nullString);
|
|
718
|
+
// Don't escape/quote null values - output as-is
|
|
719
|
+
if (value === nullString) {
|
|
720
|
+
return nullString;
|
|
721
|
+
}
|
|
722
|
+
return escapeField(value);
|
|
723
|
+
});
|
|
724
|
+
lines.push(fields.join(delimiter));
|
|
725
|
+
}
|
|
726
|
+
// Join lines with newline and add trailing newline (standard CSV format)
|
|
727
|
+
return new TextEncoder().encode(lines.join(newline) + newline);
|
|
728
|
+
}
|
|
729
|
+
//# sourceMappingURL=csv.js.map
|