@procwire/codec-arrow 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +230 -115
- package/dist/codec.d.ts +654 -0
- package/dist/codec.d.ts.map +1 -0
- package/dist/codec.js +598 -0
- package/dist/codec.js.map +1 -0
- package/dist/index.d.ts +118 -40
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +73 -56
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/codec.js
ADDED
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Apache Arrow IPC codec implementation for @procwire/transport.
|
|
3
|
+
*
|
|
4
|
+
* @remarks
|
|
5
|
+
* This is an internal module. Import from `@procwire/codec-arrow` instead.
|
|
6
|
+
*
|
|
7
|
+
* @internal
|
|
8
|
+
*/
|
|
9
|
+
import { tableFromIPC, tableToIPC } from "apache-arrow";
|
|
10
|
+
import { SerializationError } from "@procwire/transport";
|
|
11
|
+
/**
|
|
12
|
+
* High-performance Apache Arrow IPC serialization codec.
|
|
13
|
+
*
|
|
14
|
+
* Implements the {@link SerializationCodec} interface for use with
|
|
15
|
+
* @procwire/transport channels. Optimized for columnar data transfer,
|
|
16
|
+
* analytics workloads, and interoperability with data science tools.
|
|
17
|
+
*
|
|
18
|
+
* @remarks
|
|
19
|
+
* This codec uses zero-copy optimization where possible, wrapping the
|
|
20
|
+
* underlying ArrayBuffer instead of copying data. For maximum performance
|
|
21
|
+
* in trusted environments, disable input validation with `validateInput: false`.
|
|
22
|
+
*
|
|
23
|
+
* @example Basic usage
|
|
24
|
+
* ```ts
|
|
25
|
+
* import { tableFromArrays } from 'apache-arrow';
|
|
26
|
+
* import { ArrowCodec } from '@procwire/codec-arrow';
|
|
27
|
+
*
|
|
28
|
+
* const codec = new ArrowCodec();
|
|
29
|
+
*
|
|
30
|
+
* // Create a table with typed columns
|
|
31
|
+
* const table = tableFromArrays({
|
|
32
|
+
* id: Int32Array.from([1, 2, 3, 4, 5]),
|
|
33
|
+
* name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
|
|
34
|
+
* score: Float64Array.from([95.5, 87.3, 92.1, 88.7, 91.2]),
|
|
35
|
+
* active: [true, false, true, true, false]
|
|
36
|
+
* });
|
|
37
|
+
*
|
|
38
|
+
* // Serialize to Arrow IPC format
|
|
39
|
+
* const buffer = codec.serialize(table);
|
|
40
|
+
* console.log(`Serialized ${table.numRows} rows to ${buffer.length} bytes`);
|
|
41
|
+
*
|
|
42
|
+
* // Deserialize back to Table
|
|
43
|
+
* const decoded = codec.deserialize(buffer);
|
|
44
|
+
* console.log(decoded.numRows); // 5
|
|
45
|
+
* console.log(decoded.numCols); // 4
|
|
46
|
+
* console.log(decoded.schema.fields.map(f => f.name)); // ['id', 'name', 'score', 'active']
|
|
47
|
+
* ```
|
|
48
|
+
*
|
|
49
|
+
* @example With configuration options
|
|
50
|
+
* ```ts
|
|
51
|
+
* const codec = new ArrowCodec({
|
|
52
|
+
* format: 'stream', // Streaming format for IPC
|
|
53
|
+
* validateInput: false, // Skip validation for performance
|
|
54
|
+
* collectMetrics: true, // Track throughput
|
|
55
|
+
* });
|
|
56
|
+
*
|
|
57
|
+
* // Process batches
|
|
58
|
+
* for (const batch of batches) {
|
|
59
|
+
* channel.send(codec.serialize(batch));
|
|
60
|
+
* }
|
|
61
|
+
*
|
|
62
|
+
* // Check metrics
|
|
63
|
+
* console.log(`Processed ${codec.metrics?.rowsSerialized} rows`);
|
|
64
|
+
* ```
|
|
65
|
+
*
|
|
66
|
+
* @example File format for random access
|
|
67
|
+
* ```ts
|
|
68
|
+
* import { writeFileSync, readFileSync } from 'fs';
|
|
69
|
+
*
|
|
70
|
+
* const codec = new ArrowCodec({ format: 'file' });
|
|
71
|
+
*
|
|
72
|
+
* // Write to file
|
|
73
|
+
* const buffer = codec.serialize(table);
|
|
74
|
+
* writeFileSync('data.arrow', buffer);
|
|
75
|
+
*
|
|
76
|
+
* // Read from file (supports random access)
|
|
77
|
+
* const fileBuffer = readFileSync('data.arrow');
|
|
78
|
+
* const loadedTable = codec.deserialize(fileBuffer);
|
|
79
|
+
* ```
|
|
80
|
+
*
|
|
81
|
+
* @example Cross-language interoperability
|
|
82
|
+
* ```ts
|
|
83
|
+
* // Node.js side
|
|
84
|
+
* const codec = new ArrowCodec();
|
|
85
|
+
* const buffer = codec.serialize(table);
|
|
86
|
+
* socket.write(buffer);
|
|
87
|
+
*
|
|
88
|
+
* // Python side (PyArrow)
|
|
89
|
+
* // import pyarrow as pa
|
|
90
|
+
* // reader = pa.ipc.open_stream(buffer)
|
|
91
|
+
* // table = reader.read_all()
|
|
92
|
+
* ```
|
|
93
|
+
*
|
|
94
|
+
* @example Integration with @procwire/transport
|
|
95
|
+
* ```ts
|
|
96
|
+
* import { ArrowCodec } from '@procwire/codec-arrow';
|
|
97
|
+
* import { StreamChannel } from '@procwire/transport/channel';
|
|
98
|
+
*
|
|
99
|
+
* const codec = new ArrowCodec({ collectMetrics: true });
|
|
100
|
+
*
|
|
101
|
+
* const channel = new StreamChannel({
|
|
102
|
+
* transport,
|
|
103
|
+
* framing,
|
|
104
|
+
* serialization: codec,
|
|
105
|
+
* protocol
|
|
106
|
+
* });
|
|
107
|
+
*
|
|
108
|
+
* // Send Arrow tables through the channel
|
|
109
|
+
* channel.send(table);
|
|
110
|
+
* ```
|
|
111
|
+
*
|
|
112
|
+
* @see {@link ArrowCodecOptions} for configuration options
|
|
113
|
+
* @see {@link ArrowCodecMetrics} for metrics structure
|
|
114
|
+
* @see {@link createFastArrowCodec} for maximum performance
|
|
115
|
+
* @see {@link createMonitoredArrowCodec} for monitoring
|
|
116
|
+
* @see {@link createFileArrowCodec} for file format
|
|
117
|
+
*/
|
|
118
|
+
export class ArrowCodec {
|
|
119
|
+
/**
|
|
120
|
+
* Unique identifier for this codec.
|
|
121
|
+
*
|
|
122
|
+
* Used by codec registries to identify and lookup codecs by name.
|
|
123
|
+
* The value `"arrow"` identifies this as an Apache Arrow codec.
|
|
124
|
+
*
|
|
125
|
+
* @readonly
|
|
126
|
+
*/
|
|
127
|
+
name = "arrow";
|
|
128
|
+
/**
|
|
129
|
+
* MIME type for Arrow IPC encoded data.
|
|
130
|
+
*
|
|
131
|
+
* The content type varies based on the format:
|
|
132
|
+
* - Stream format: `application/vnd.apache.arrow.stream`
|
|
133
|
+
* - File format: `application/vnd.apache.arrow.file`
|
|
134
|
+
*
|
|
135
|
+
* Used in HTTP Content-Type headers and content negotiation.
|
|
136
|
+
*
|
|
137
|
+
* @readonly
|
|
138
|
+
* @see {@link https://arrow.apache.org/docs/format/IPC.html | Apache Arrow IPC specification}
|
|
139
|
+
*/
|
|
140
|
+
contentType;
|
|
141
|
+
format;
|
|
142
|
+
validateInput;
|
|
143
|
+
collectMetrics;
|
|
144
|
+
_metrics = null;
|
|
145
|
+
/**
|
|
146
|
+
* Creates a new ArrowCodec instance.
|
|
147
|
+
*
|
|
148
|
+
* @param options - Optional configuration for serialization behavior.
|
|
149
|
+
* See {@link ArrowCodecOptions} for available options.
|
|
150
|
+
*
|
|
151
|
+
* @example Default configuration
|
|
152
|
+
* ```ts
|
|
153
|
+
* const codec = new ArrowCodec();
|
|
154
|
+
* ```
|
|
155
|
+
*
|
|
156
|
+
* @example With options
|
|
157
|
+
* ```ts
|
|
158
|
+
* const codec = new ArrowCodec({
|
|
159
|
+
* format: 'file',
|
|
160
|
+
* validateInput: true,
|
|
161
|
+
* collectMetrics: true,
|
|
162
|
+
* });
|
|
163
|
+
* ```
|
|
164
|
+
*/
|
|
165
|
+
constructor(options) {
|
|
166
|
+
this.format = options?.format ?? "stream";
|
|
167
|
+
this.validateInput = options?.validateInput ?? true;
|
|
168
|
+
this.collectMetrics = options?.collectMetrics ?? false;
|
|
169
|
+
// Set content type based on format
|
|
170
|
+
this.contentType =
|
|
171
|
+
this.format === "file"
|
|
172
|
+
? "application/vnd.apache.arrow.file"
|
|
173
|
+
: "application/vnd.apache.arrow.stream";
|
|
174
|
+
if (this.collectMetrics) {
|
|
175
|
+
this._metrics = {
|
|
176
|
+
serializeCount: 0,
|
|
177
|
+
deserializeCount: 0,
|
|
178
|
+
bytesSerialised: 0,
|
|
179
|
+
bytesDeserialized: 0,
|
|
180
|
+
rowsSerialized: 0,
|
|
181
|
+
rowsDeserialized: 0,
|
|
182
|
+
serializeErrors: 0,
|
|
183
|
+
deserializeErrors: 0,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Returns current metrics if `collectMetrics` is enabled.
|
|
189
|
+
*
|
|
190
|
+
* Returns a copy of the metrics object to prevent external modification.
|
|
191
|
+
* Returns `null` if metrics collection is disabled.
|
|
192
|
+
*
|
|
193
|
+
* @returns A readonly copy of metrics, or `null` if metrics are disabled.
|
|
194
|
+
*
|
|
195
|
+
* @example
|
|
196
|
+
* ```ts
|
|
197
|
+
* const codec = new ArrowCodec({ collectMetrics: true });
|
|
198
|
+
*
|
|
199
|
+
* // Process some data
|
|
200
|
+
* codec.serialize(table1);
|
|
201
|
+
* codec.serialize(table2);
|
|
202
|
+
*
|
|
203
|
+
* const metrics = codec.metrics;
|
|
204
|
+
* if (metrics) {
|
|
205
|
+
* console.log(`Serialized ${metrics.serializeCount} tables`);
|
|
206
|
+
* console.log(`Total rows: ${metrics.rowsSerialized}`);
|
|
207
|
+
* console.log(`Total bytes: ${metrics.bytesSerialised}`);
|
|
208
|
+
* }
|
|
209
|
+
* ```
|
|
210
|
+
*
|
|
211
|
+
* @see {@link ArrowCodecMetrics} for the metrics structure
|
|
212
|
+
* @see {@link resetMetrics} to reset all metrics to zero
|
|
213
|
+
*/
|
|
214
|
+
get metrics() {
|
|
215
|
+
return this._metrics ? { ...this._metrics } : null;
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Resets all metrics to zero.
|
|
219
|
+
*
|
|
220
|
+
* Use this method to start fresh measurement periods, for example
|
|
221
|
+
* at the beginning of a new batch processing run or time window.
|
|
222
|
+
* Has no effect if metrics collection is disabled.
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```ts
|
|
226
|
+
* const codec = new ArrowCodec({ collectMetrics: true });
|
|
227
|
+
*
|
|
228
|
+
* // Process batch 1
|
|
229
|
+
* for (const table of batch1) {
|
|
230
|
+
* codec.serialize(table);
|
|
231
|
+
* }
|
|
232
|
+
* console.log('Batch 1:', codec.metrics);
|
|
233
|
+
*
|
|
234
|
+
* // Reset for batch 2
|
|
235
|
+
* codec.resetMetrics();
|
|
236
|
+
*
|
|
237
|
+
* // Process batch 2
|
|
238
|
+
* for (const table of batch2) {
|
|
239
|
+
* codec.serialize(table);
|
|
240
|
+
* }
|
|
241
|
+
* console.log('Batch 2:', codec.metrics);
|
|
242
|
+
* ```
|
|
243
|
+
*/
|
|
244
|
+
resetMetrics() {
|
|
245
|
+
if (this._metrics) {
|
|
246
|
+
this._metrics.serializeCount = 0;
|
|
247
|
+
this._metrics.deserializeCount = 0;
|
|
248
|
+
this._metrics.bytesSerialised = 0;
|
|
249
|
+
this._metrics.bytesDeserialized = 0;
|
|
250
|
+
this._metrics.rowsSerialized = 0;
|
|
251
|
+
this._metrics.rowsDeserialized = 0;
|
|
252
|
+
this._metrics.serializeErrors = 0;
|
|
253
|
+
this._metrics.deserializeErrors = 0;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Serializes an Apache Arrow Table to IPC format.
|
|
258
|
+
*
|
|
259
|
+
* Converts the input Table to Arrow IPC binary format using the configured
|
|
260
|
+
* format (stream or file). Uses zero-copy optimization to avoid unnecessary
|
|
261
|
+
* memory allocations.
|
|
262
|
+
*
|
|
263
|
+
* @param value - Apache Arrow Table to serialize. Must be a valid Table
|
|
264
|
+
* instance with at least one column.
|
|
265
|
+
* @returns Buffer containing the Arrow IPC encoded data.
|
|
266
|
+
*
|
|
267
|
+
* @throws {SerializationError} When input is not a valid Arrow Table
|
|
268
|
+
* (if validation is enabled).
|
|
269
|
+
* @throws {SerializationError} When encoding fails due to internal
|
|
270
|
+
* Arrow library errors.
|
|
271
|
+
*
|
|
272
|
+
* @example Basic serialization
|
|
273
|
+
* ```ts
|
|
274
|
+
* import { tableFromArrays } from 'apache-arrow';
|
|
275
|
+
*
|
|
276
|
+
* const codec = new ArrowCodec();
|
|
277
|
+
* const table = tableFromArrays({
|
|
278
|
+
* id: [1, 2, 3],
|
|
279
|
+
* name: ['Alice', 'Bob', 'Charlie']
|
|
280
|
+
* });
|
|
281
|
+
*
|
|
282
|
+
* const buffer = codec.serialize(table);
|
|
283
|
+
* console.log(`Serialized to ${buffer.length} bytes`);
|
|
284
|
+
* ```
|
|
285
|
+
*
|
|
286
|
+
* @example Error handling
|
|
287
|
+
* ```ts
|
|
288
|
+
* const codec = new ArrowCodec();
|
|
289
|
+
*
|
|
290
|
+
* try {
|
|
291
|
+
* codec.serialize({ notATable: true } as any);
|
|
292
|
+
* } catch (error) {
|
|
293
|
+
* if (error instanceof SerializationError) {
|
|
294
|
+
* console.error('Invalid input:', error.message);
|
|
295
|
+
* }
|
|
296
|
+
* }
|
|
297
|
+
* ```
|
|
298
|
+
*
|
|
299
|
+
* @see {@link deserialize} for the reverse operation
|
|
300
|
+
*/
|
|
301
|
+
serialize(value) {
|
|
302
|
+
// Input validation (can be disabled for performance)
|
|
303
|
+
if (this.validateInput) {
|
|
304
|
+
if (!this.isTable(value)) {
|
|
305
|
+
if (this._metrics)
|
|
306
|
+
this._metrics.serializeErrors++;
|
|
307
|
+
throw new SerializationError("Invalid input: expected Apache Arrow Table", new TypeError("Input is not an Arrow Table"));
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
try {
|
|
311
|
+
// Serialize to IPC format
|
|
312
|
+
const uint8array = tableToIPC(value, this.format);
|
|
313
|
+
// ZERO-COPY: Wrap underlying ArrayBuffer without copying
|
|
314
|
+
const buffer = Buffer.from(uint8array.buffer, uint8array.byteOffset, uint8array.byteLength);
|
|
315
|
+
// Update metrics
|
|
316
|
+
if (this._metrics) {
|
|
317
|
+
this._metrics.serializeCount++;
|
|
318
|
+
this._metrics.bytesSerialised += buffer.length;
|
|
319
|
+
this._metrics.rowsSerialized += value.numRows;
|
|
320
|
+
}
|
|
321
|
+
return buffer;
|
|
322
|
+
}
|
|
323
|
+
catch (error) {
|
|
324
|
+
if (this._metrics)
|
|
325
|
+
this._metrics.serializeErrors++;
|
|
326
|
+
throw new SerializationError(`Failed to encode Arrow table: ${error instanceof Error ? error.message : String(error)}`, error);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Deserializes Arrow IPC data back to an Apache Arrow Table.
|
|
331
|
+
*
|
|
332
|
+
* Parses the binary Arrow IPC data and reconstructs the Table with
|
|
333
|
+
* its full schema and column data.
|
|
334
|
+
*
|
|
335
|
+
* @param buffer - Buffer or Uint8Array containing Arrow IPC encoded data.
|
|
336
|
+
* Must be valid Arrow IPC format (stream or file).
|
|
337
|
+
* @returns The deserialized Apache Arrow Table.
|
|
338
|
+
*
|
|
339
|
+
* @throws {SerializationError} When input is null, undefined, empty,
|
|
340
|
+
* or not a Buffer/Uint8Array (if validation is enabled).
|
|
341
|
+
* @throws {SerializationError} When the buffer contains invalid or
|
|
342
|
+
* corrupted Arrow IPC data.
|
|
343
|
+
*
|
|
344
|
+
* @example Basic deserialization
|
|
345
|
+
* ```ts
|
|
346
|
+
* const codec = new ArrowCodec();
|
|
347
|
+
*
|
|
348
|
+
* // Roundtrip
|
|
349
|
+
* const original = tableFromArrays({ id: [1, 2, 3], name: ['A', 'B', 'C'] });
|
|
350
|
+
* const buffer = codec.serialize(original);
|
|
351
|
+
* const decoded = codec.deserialize(buffer);
|
|
352
|
+
*
|
|
353
|
+
* console.log(decoded.numRows); // 3
|
|
354
|
+
* console.log(decoded.numCols); // 2
|
|
355
|
+
* console.log(decoded.getChild('id')?.toArray()); // Int32Array [1, 2, 3]
|
|
356
|
+
* ```
|
|
357
|
+
*
|
|
358
|
+
* @example Accessing column data
|
|
359
|
+
* ```ts
|
|
360
|
+
* const table = codec.deserialize(buffer);
|
|
361
|
+
*
|
|
362
|
+
* // Get column by name
|
|
363
|
+
* const idColumn = table.getChild('id');
|
|
364
|
+
* const ids = idColumn?.toArray();
|
|
365
|
+
*
|
|
366
|
+
* // Iterate rows
|
|
367
|
+
* for (const row of table) {
|
|
368
|
+
* console.log(row.id, row.name);
|
|
369
|
+
* }
|
|
370
|
+
*
|
|
371
|
+
* // Access schema
|
|
372
|
+
* for (const field of table.schema.fields) {
|
|
373
|
+
* console.log(`${field.name}: ${field.type}`);
|
|
374
|
+
* }
|
|
375
|
+
* ```
|
|
376
|
+
*
|
|
377
|
+
* @example Error handling
|
|
378
|
+
* ```ts
|
|
379
|
+
* const codec = new ArrowCodec();
|
|
380
|
+
*
|
|
381
|
+
* try {
|
|
382
|
+
* codec.deserialize(Buffer.from('invalid data'));
|
|
383
|
+
* } catch (error) {
|
|
384
|
+
* if (error instanceof SerializationError) {
|
|
385
|
+
* console.error('Decode failed:', error.message);
|
|
386
|
+
* }
|
|
387
|
+
* }
|
|
388
|
+
* ```
|
|
389
|
+
*
|
|
390
|
+
* @see {@link serialize} for the reverse operation
|
|
391
|
+
*/
|
|
392
|
+
deserialize(buffer) {
|
|
393
|
+
// Input validation (can be disabled for performance)
|
|
394
|
+
if (this.validateInput) {
|
|
395
|
+
// Use unknown to allow runtime type checking
|
|
396
|
+
const input = buffer;
|
|
397
|
+
if (input === null || input === undefined) {
|
|
398
|
+
if (this._metrics)
|
|
399
|
+
this._metrics.deserializeErrors++;
|
|
400
|
+
throw new SerializationError(`Invalid input: expected Buffer or Uint8Array, got ${input === null ? "null" : "undefined"}`, new TypeError("Invalid input type"));
|
|
401
|
+
}
|
|
402
|
+
if (!Buffer.isBuffer(input) && !(input instanceof Uint8Array)) {
|
|
403
|
+
if (this._metrics)
|
|
404
|
+
this._metrics.deserializeErrors++;
|
|
405
|
+
throw new SerializationError(`Invalid input: expected Buffer or Uint8Array, got ${typeof input}`, new TypeError("Invalid input type"));
|
|
406
|
+
}
|
|
407
|
+
if (buffer.length === 0) {
|
|
408
|
+
if (this._metrics)
|
|
409
|
+
this._metrics.deserializeErrors++;
|
|
410
|
+
throw new SerializationError("Invalid input: buffer is empty", new Error("Empty buffer"));
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
try {
|
|
414
|
+
const table = tableFromIPC(buffer);
|
|
415
|
+
// Update metrics
|
|
416
|
+
if (this._metrics) {
|
|
417
|
+
this._metrics.deserializeCount++;
|
|
418
|
+
this._metrics.bytesDeserialized += buffer.length;
|
|
419
|
+
this._metrics.rowsDeserialized += table.numRows;
|
|
420
|
+
}
|
|
421
|
+
return table;
|
|
422
|
+
}
|
|
423
|
+
catch (error) {
|
|
424
|
+
if (this._metrics)
|
|
425
|
+
this._metrics.deserializeErrors++;
|
|
426
|
+
throw new SerializationError(`Failed to decode Arrow table: ${error instanceof Error ? error.message : String(error)}`, error);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
/**
|
|
430
|
+
* Checks if value is an Apache Arrow Table.
|
|
431
|
+
*
|
|
432
|
+
* Uses duck-typing for performance, avoiding instanceof checks that
|
|
433
|
+
* can fail across module boundaries or different package versions.
|
|
434
|
+
*
|
|
435
|
+
* @param value - Value to check.
|
|
436
|
+
* @returns `true` if value appears to be an Arrow Table.
|
|
437
|
+
*
|
|
438
|
+
* @internal
|
|
439
|
+
*/
|
|
440
|
+
isTable(value) {
|
|
441
|
+
if (!value || typeof value !== "object")
|
|
442
|
+
return false;
|
|
443
|
+
const table = value;
|
|
444
|
+
return (typeof table.numRows === "number" &&
|
|
445
|
+
typeof table.numCols === "number" &&
|
|
446
|
+
typeof table.schema === "object" &&
|
|
447
|
+
table.schema !== null &&
|
|
448
|
+
typeof table.getChild === "function");
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
/**
|
|
452
|
+
* Creates an ArrowCodec optimized for maximum throughput.
|
|
453
|
+
*
|
|
454
|
+
* Returns a codec with input validation disabled for use in trusted
|
|
455
|
+
* environments where input is guaranteed to be valid. This provides
|
|
456
|
+
* the best possible performance but will produce unclear errors or
|
|
457
|
+
* undefined behavior if given invalid input.
|
|
458
|
+
*
|
|
459
|
+
* @param format - IPC format to use. Defaults to `'stream'`.
|
|
460
|
+
*
|
|
461
|
+
* @returns A configured ArrowCodec with validation disabled.
|
|
462
|
+
*
|
|
463
|
+
* @example
|
|
464
|
+
* ```ts
|
|
465
|
+
* // For trusted internal IPC between your own processes
|
|
466
|
+
* const codec = createFastArrowCodec('stream');
|
|
467
|
+
*
|
|
468
|
+
* // Maximum performance - no validation overhead
|
|
469
|
+
* const buffer = codec.serialize(table);
|
|
470
|
+
* ```
|
|
471
|
+
*
|
|
472
|
+
* @remarks
|
|
473
|
+
* Only use this in trusted environments where:
|
|
474
|
+
* - Input always comes from your own code
|
|
475
|
+
* - Tables are guaranteed to be valid Arrow Tables
|
|
476
|
+
* - Buffers are guaranteed to be valid Arrow IPC data
|
|
477
|
+
*
|
|
478
|
+
* @see {@link ArrowCodec} for the full codec with validation
|
|
479
|
+
* @see {@link createMonitoredArrowCodec} for monitoring support
|
|
480
|
+
*/
|
|
481
|
+
export function createFastArrowCodec(format = "stream") {
|
|
482
|
+
return new ArrowCodec({
|
|
483
|
+
format,
|
|
484
|
+
validateInput: false,
|
|
485
|
+
collectMetrics: false,
|
|
486
|
+
});
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Creates an ArrowCodec with metrics collection enabled.
|
|
490
|
+
*
|
|
491
|
+
* Returns a codec that tracks serialize/deserialize counts, bytes processed,
|
|
492
|
+
* rows processed, and error counts. Useful for monitoring throughput,
|
|
493
|
+
* debugging issues, and capacity planning.
|
|
494
|
+
*
|
|
495
|
+
* @param options - Additional codec options. The `collectMetrics` option
|
|
496
|
+
* will always be set to `true`.
|
|
497
|
+
*
|
|
498
|
+
* @returns A configured ArrowCodec with metrics collection enabled.
|
|
499
|
+
*
|
|
500
|
+
* @example Basic monitoring
|
|
501
|
+
* ```ts
|
|
502
|
+
* const codec = createMonitoredArrowCodec();
|
|
503
|
+
*
|
|
504
|
+
* // Process data
|
|
505
|
+
* for (const table of tables) {
|
|
506
|
+
* codec.serialize(table);
|
|
507
|
+
* }
|
|
508
|
+
*
|
|
509
|
+
* // Check throughput
|
|
510
|
+
* const metrics = codec.metrics!;
|
|
511
|
+
* console.log(`Tables: ${metrics.serializeCount}`);
|
|
512
|
+
* console.log(`Rows: ${metrics.rowsSerialized}`);
|
|
513
|
+
* console.log(`Bytes: ${metrics.bytesSerialised}`);
|
|
514
|
+
* console.log(`Errors: ${metrics.serializeErrors}`);
|
|
515
|
+
* ```
|
|
516
|
+
*
|
|
517
|
+
* @example With additional options
|
|
518
|
+
* ```ts
|
|
519
|
+
* const codec = createMonitoredArrowCodec({
|
|
520
|
+
* format: 'file',
|
|
521
|
+
* validateInput: false, // Trust input for performance
|
|
522
|
+
* });
|
|
523
|
+
* ```
|
|
524
|
+
*
|
|
525
|
+
* @example Periodic reporting
|
|
526
|
+
* ```ts
|
|
527
|
+
* const codec = createMonitoredArrowCodec();
|
|
528
|
+
*
|
|
529
|
+
* setInterval(() => {
|
|
530
|
+
* const m = codec.metrics;
|
|
531
|
+
* if (m) {
|
|
532
|
+
* console.log(`Throughput: ${m.rowsSerialized} rows, ${m.bytesSerialised} bytes`);
|
|
533
|
+
* codec.resetMetrics(); // Reset for next interval
|
|
534
|
+
* }
|
|
535
|
+
* }, 60000); // Report every minute
|
|
536
|
+
* ```
|
|
537
|
+
*
|
|
538
|
+
* @see {@link ArrowCodecMetrics} for the metrics structure
|
|
539
|
+
* @see {@link ArrowCodec.metrics} for accessing metrics
|
|
540
|
+
* @see {@link ArrowCodec.resetMetrics} for resetting metrics
|
|
541
|
+
*/
|
|
542
|
+
export function createMonitoredArrowCodec(options) {
|
|
543
|
+
return new ArrowCodec({
|
|
544
|
+
...options,
|
|
545
|
+
collectMetrics: true,
|
|
546
|
+
});
|
|
547
|
+
}
|
|
548
|
+
/**
|
|
549
|
+
* Creates an ArrowCodec configured for file format.
|
|
550
|
+
*
|
|
551
|
+
* Returns a codec using the Arrow file format, which includes a footer
|
|
552
|
+
* with schema and record batch offsets for random access. Use this when
|
|
553
|
+
* you need to write Arrow data to disk or when random access to record
|
|
554
|
+
* batches is required.
|
|
555
|
+
*
|
|
556
|
+
* @param options - Additional codec options. The `format` option
|
|
557
|
+
* will always be set to `'file'`.
|
|
558
|
+
*
|
|
559
|
+
* @returns A configured ArrowCodec for file format.
|
|
560
|
+
*
|
|
561
|
+
* @example Writing to disk
|
|
562
|
+
* ```ts
|
|
563
|
+
* import { writeFileSync, readFileSync } from 'fs';
|
|
564
|
+
*
|
|
565
|
+
* const codec = createFileArrowCodec();
|
|
566
|
+
*
|
|
567
|
+
* // Serialize with file format (includes footer)
|
|
568
|
+
* const buffer = codec.serialize(table);
|
|
569
|
+
* writeFileSync('data.arrow', buffer);
|
|
570
|
+
*
|
|
571
|
+
* // Read back
|
|
572
|
+
* const loaded = codec.deserialize(readFileSync('data.arrow'));
|
|
573
|
+
* ```
|
|
574
|
+
*
|
|
575
|
+
* @example With additional options
|
|
576
|
+
* ```ts
|
|
577
|
+
* const codec = createFileArrowCodec({
|
|
578
|
+
* validateInput: true,
|
|
579
|
+
* collectMetrics: true,
|
|
580
|
+
* });
|
|
581
|
+
* ```
|
|
582
|
+
*
|
|
583
|
+
* @remarks
|
|
584
|
+
* The file format is larger than stream format due to the footer,
|
|
585
|
+
* but enables random access to record batches without reading the
|
|
586
|
+
* entire file. For IPC where you read data sequentially, prefer
|
|
587
|
+
* the default stream format.
|
|
588
|
+
*
|
|
589
|
+
* @see {@link ArrowCodec} for stream format (default)
|
|
590
|
+
* @see {@link ArrowIPCFormat} for format differences
|
|
591
|
+
*/
|
|
592
|
+
export function createFileArrowCodec(options) {
|
|
593
|
+
return new ArrowCodec({
|
|
594
|
+
...options,
|
|
595
|
+
format: "file",
|
|
596
|
+
});
|
|
597
|
+
}
|
|
598
|
+
//# sourceMappingURL=codec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"codec.js","sourceRoot":"","sources":["../src/codec.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAExD,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAgMzD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0GG;AACH,MAAM,OAAO,UAAU;IACrB;;;;;;;OAOG;IACM,IAAI,GAAG,OAAO,CAAC;IAExB;;;;;;;;;;;OAWG;IACM,WAAW,CAAS;IAEZ,MAAM,CAAiB;IACvB,aAAa,CAAU;IACvB,cAAc,CAAU;IACjC,QAAQ,GAA6B,IAAI,CAAC;IAElD;;;;;;;;;;;;;;;;;;;OAmBG;IACH,YAAY,OAA2B;QACrC,IAAI,CAAC,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,QAAQ,CAAC;QAC1C,IAAI,CAAC,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,IAAI,CAAC;QACpD,IAAI,CAAC,cAAc,GAAG,OAAO,EAAE,cAAc,IAAI,KAAK,CAAC;QAEvD,mCAAmC;QACnC,IAAI,CAAC,WAAW;YACd,IAAI,CAAC,MAAM,KAAK,MAAM;gBACpB,CAAC,CAAC,mCAAmC;gBACrC,CAAC,CAAC,qCAAqC,CAAC;QAE5C,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,QAAQ,GAAG;gBACd,cAAc,EAAE,CAAC;gBACjB,gBAAgB,EAAE,CAAC;gBACnB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,cAAc,EAAE,CAAC;gBACjB,gBAAgB,EAAE,CAAC;gBACnB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;aACrB,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACrD,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,YAAY;QACV,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,cAAc,GAAG,CAAC,CAAC;YACjC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,GAAG,CAAC,CAAC;YACnC,IAAI,CAAC,QAAQ,CAAC,eAAe,GAAG,CAAC,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,iBAAiB,GAAG,CAAC,CAAC;YACpC,IAAI,CAAC,QAAQ,CAAC,cAAc,GAAG,CAAC,CAAC;YACjC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,GAAG,CAAC,CAAC;YACnC,IAAI,CAAC,QAAQ,CAAC,eAAe,GAAG,CAAC,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,iBAAiB,GAAG,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4CG;IACH,SAAS,CAAC,KAAY;QACpB,qDAAqD;QACrD,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,IAAI,IAAI,CAAC,QAAQ;oBAAE,IAAI,CAAC,QAAQ,CAAC,eAAe,EAAE,CAAC;gBACnD,MAAM,IAAI,kBAAkB,CAC1B,4CAA4C,EAC5C,IAAI,SAAS,CAAC,6BAA6B,CAAC,CAC7C,CAAC;YACJ,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,0BAA0B;YAC1B,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAElD,yDAAyD;YACzD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,UAAU,CAAC,UAAU,EAAE,UAAU,CAAC,UAAU,CAAC,CAAC;YAE5F,iBAAiB;YACjB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,IAAI,CAAC,QAAQ,CAAC,cAAc,EAAE,CAAC;gBAC/B,IAAI,CAAC,QAAQ,CAAC,eAAe,IAAI,MAAM,CAAC,MAAM,CAAC;gBAC/C,IAAI,CAAC,QAAQ,CAAC,cAAc,IAAI,KAAK,CAAC,OAAO,CAAC;YAChD,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,IAAI,CAAC,QAAQ;gBAAE,IAAI,CAAC,QAAQ,CAAC,eAAe,EAAE,CAAC;YACnD,MAAM,IAAI,kBAAkB,CAC1B,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,EACzF,KAAK,CACN,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA8DG;IACH,WAAW,CAAC,MAAc;QACxB,qDAAqD;QACrD,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,6CAA6C;YAC7C,MAAM,KAAK,GAAG,MAAiB,CAAC;YAEhC,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC1C,IAAI,IAAI,CAAC,QAAQ;oBAAE,IAAI,CAAC,QAAQ,CAAC,iBAAiB,EAAE,CAAC;gBACrD,MAAM,IAAI,kBAAkB,CAC1B,qDAAqD,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,EAAE,EAC5F,IAAI,SAAS,CAAC,oBAAoB,CAAC,CACpC,CAAC;YACJ,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,YAAY,UAAU,CAAC,EAAE,CAAC;gBAC9D,IAAI,IAAI,CAAC,QAAQ;oBAAE,IAAI,CAAC,QAAQ,CAAC,iBAAiB,EAAE,CAAC;gBACrD,MAAM,IAAI,kBAAkB,CAC1B,qDAAqD,OAAO,KAAK,EAAE,EACnE,IAAI,SAAS,CAAC,oBAAoB,CAAC,CACpC,CAAC;YACJ,CAAC;YAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACxB,IAAI,IAAI,CAAC,QAAQ;oBAAE,IAAI,CAAC,QAAQ,CAAC,iBAAiB,EAAE,CAAC;gBACrD,MAAM,IAAI,kBAAkB,CAAC,gCAAgC,EAAE,IAAI,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC;YAC5F,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;YAEnC,iBAAiB;YACjB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,IAAI,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;gBACjC,IAAI,CAAC,QAAQ,CAAC,iBAAiB,IAAI,MAAM,CAAC,MAAM,CAAC;gBACjD,IAAI,CAAC,QAAQ,CAAC,gBAAgB,IAAI,KAAK,CAAC,OAAO,CAAC;YAClD,CAAC;YAED,OAAO,KAAK,CAAC;QACf,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,IAAI,CAAC,QAAQ;gBAAE,IAAI,CAAC,QAAQ,CAAC,iBAAiB,EAAE,CAAC;YACrD,MAAM,IAAI,kBAAkB,CAC1B,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,EACzF,KAAK,CACN,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;;;;;;;;;OAUG;IACK,OAAO,CAAC,KAAc;QAC5B,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,OAAO,KAAK,CAAC;QACtD,MAAM,KAAK,GAAG,KAAc,CAAC;QAC7B,OAAO,CACL,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ;YACjC,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ;YACjC,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ;YAChC,KAAK,CAAC,MAAM,KAAK,IAAI;YACrB,OAAO,KAAK,CAAC,QAAQ,KAAK,UAAU,CACrC,CAAC;IACJ,CAAC;CACF;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,MAAM,UAAU,oBAAoB,CAAC,SAAyB,QAAQ;IACpE,OAAO,IAAI,UAAU,CAAC;QACpB,MAAM;QACN,aAAa,EAAE,KAAK;QACpB,cAAc,EAAE,KAAK;KACtB,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqDG;AACH,MAAM,UAAU,yBAAyB,CACvC,OAAmD;IAEnD,OAAO,IAAI,UAAU,CAAC;QACpB,GAAG,OAAO;QACV,cAAc,EAAE,IAAI;KACrB,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,MAAM,UAAU,oBAAoB,CAAC,OAA2C;IAC9E,OAAO,IAAI,UAAU,CAAC;QACpB,GAAG,OAAO;QACV,MAAM,EAAE,MAAM;KACf,CAAC,CAAC;AACL,CAAC"}
|