@procwire/codec-arrow 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -104
- package/dist/codec.d.ts +654 -0
- package/dist/codec.d.ts.map +1 -0
- package/dist/codec.js +598 -0
- package/dist/codec.js.map +1 -0
- package/dist/index.d.ts +118 -40
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +73 -56
- package/dist/index.js.map +1 -1
- package/package.json +6 -5
package/dist/codec.d.ts
ADDED
|
@@ -0,0 +1,654 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Apache Arrow IPC codec implementation for @procwire/transport.
|
|
3
|
+
*
|
|
4
|
+
* @remarks
|
|
5
|
+
* This is an internal module. Import from `@procwire/codec-arrow` instead.
|
|
6
|
+
*
|
|
7
|
+
* @internal
|
|
8
|
+
*/
|
|
9
|
+
import type { Table } from "apache-arrow";
|
|
10
|
+
import type { SerializationCodec } from "@procwire/transport/serialization";
|
|
11
|
+
/**
|
|
12
|
+
* IPC format type for Arrow serialization.
|
|
13
|
+
*
|
|
14
|
+
* Apache Arrow supports two IPC formats with different characteristics:
|
|
15
|
+
*
|
|
16
|
+
* - `'stream'` - Streaming format optimized for sequential access.
|
|
17
|
+
* Smaller size, no footer, ideal for IPC and streaming scenarios.
|
|
18
|
+
* - `'file'` - File format with footer for random access.
|
|
19
|
+
* Larger size, includes schema and metadata footer, suitable for files.
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```ts
|
|
23
|
+
* import { ArrowIPCFormat } from '@procwire/codec-arrow';
|
|
24
|
+
*
|
|
25
|
+
* const format: ArrowIPCFormat = 'stream';
|
|
26
|
+
* const codec = new ArrowCodec({ format });
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export type ArrowIPCFormat = "stream" | "file";
|
|
30
|
+
/**
|
|
31
|
+
* Configuration options for {@link ArrowCodec}.
|
|
32
|
+
*
|
|
33
|
+
* All options are optional and have sensible defaults optimized for
|
|
34
|
+
* typical IPC scenarios.
|
|
35
|
+
*
|
|
36
|
+
* @example Default configuration
|
|
37
|
+
* ```ts
|
|
38
|
+
* const codec = new ArrowCodec();
|
|
39
|
+
* // Equivalent to: { format: 'stream', validateInput: true, collectMetrics: false }
|
|
40
|
+
* ```
|
|
41
|
+
*
|
|
42
|
+
* @example Performance configuration
|
|
43
|
+
* ```ts
|
|
44
|
+
* const codec = new ArrowCodec({
|
|
45
|
+
* format: 'stream',
|
|
46
|
+
* validateInput: false, // Skip validation in trusted environments
|
|
47
|
+
* collectMetrics: true, // Monitor throughput
|
|
48
|
+
* });
|
|
49
|
+
* ```
|
|
50
|
+
*
|
|
51
|
+
* @see {@link ArrowCodec} for the main codec class
|
|
52
|
+
*/
|
|
53
|
+
export interface ArrowCodecOptions {
|
|
54
|
+
/**
|
|
55
|
+
* IPC format to use for serialization.
|
|
56
|
+
*
|
|
57
|
+
* - `'stream'` (default): Optimized for streaming and IPC. Smaller size,
|
|
58
|
+
* no footer, data can be read sequentially as it arrives.
|
|
59
|
+
* - `'file'`: With footer for random access. Larger size, includes
|
|
60
|
+
* schema and record batch offsets at the end for seekable reads.
|
|
61
|
+
*
|
|
62
|
+
* Use `'stream'` for inter-process communication and `'file'` when
|
|
63
|
+
* writing to disk or when random access is needed.
|
|
64
|
+
*
|
|
65
|
+
* @default 'stream'
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ```ts
|
|
69
|
+
* // Stream format for IPC (default)
|
|
70
|
+
* const ipcCodec = new ArrowCodec({ format: 'stream' });
|
|
71
|
+
*
|
|
72
|
+
* // File format for disk storage
|
|
73
|
+
* const fileCodec = new ArrowCodec({ format: 'file' });
|
|
74
|
+
* ```
|
|
75
|
+
*/
|
|
76
|
+
format?: ArrowIPCFormat;
|
|
77
|
+
/**
|
|
78
|
+
* Whether to validate input types before serialization/deserialization.
|
|
79
|
+
*
|
|
80
|
+
* - `true` (default): Validate that inputs are valid Arrow Tables or
|
|
81
|
+
* non-empty Buffers. Provides clear error messages for invalid data.
|
|
82
|
+
* - `false`: Skip validation for maximum performance. Only use in
|
|
83
|
+
* trusted environments where input is guaranteed to be valid.
|
|
84
|
+
*
|
|
85
|
+
* @default true
|
|
86
|
+
*
|
|
87
|
+
* @example
|
|
88
|
+
* ```ts
|
|
89
|
+
* // With validation (recommended for external data)
|
|
90
|
+
* const safeCodec = new ArrowCodec({ validateInput: true });
|
|
91
|
+
*
|
|
92
|
+
* // Without validation (for trusted internal IPC)
|
|
93
|
+
* const fastCodec = new ArrowCodec({ validateInput: false });
|
|
94
|
+
* ```
|
|
95
|
+
*/
|
|
96
|
+
validateInput?: boolean;
|
|
97
|
+
/**
|
|
98
|
+
* Whether to collect basic metrics for monitoring.
|
|
99
|
+
*
|
|
100
|
+
* When enabled, the codec tracks:
|
|
101
|
+
* - Serialize/deserialize counts
|
|
102
|
+
* - Bytes processed
|
|
103
|
+
* - Rows processed
|
|
104
|
+
* - Error counts
|
|
105
|
+
*
|
|
106
|
+
* Metrics have minimal overhead and are useful for monitoring
|
|
107
|
+
* throughput and debugging issues.
|
|
108
|
+
*
|
|
109
|
+
* @default false
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* ```ts
|
|
113
|
+
* const codec = new ArrowCodec({ collectMetrics: true });
|
|
114
|
+
*
|
|
115
|
+
* // Process data...
|
|
116
|
+
* for (const batch of batches) {
|
|
117
|
+
* codec.serialize(batch);
|
|
118
|
+
* }
|
|
119
|
+
*
|
|
120
|
+
* // Check metrics
|
|
121
|
+
* console.log(codec.metrics);
|
|
122
|
+
* // { serializeCount: 100, bytesSerialised: 10485760, rowsSerialized: 1000000, ... }
|
|
123
|
+
* ```
|
|
124
|
+
*
|
|
125
|
+
* @see {@link ArrowCodecMetrics} for the metrics structure
|
|
126
|
+
* @see {@link ArrowCodec.metrics} for accessing metrics
|
|
127
|
+
* @see {@link ArrowCodec.resetMetrics} for resetting metrics
|
|
128
|
+
*/
|
|
129
|
+
collectMetrics?: boolean;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Metrics collected by ArrowCodec when `collectMetrics` is enabled.
|
|
133
|
+
*
|
|
134
|
+
* These metrics provide visibility into codec performance and can be
|
|
135
|
+
* used for monitoring, debugging, and capacity planning.
|
|
136
|
+
*
|
|
137
|
+
* @example
|
|
138
|
+
* ```ts
|
|
139
|
+
* const codec = new ArrowCodec({ collectMetrics: true });
|
|
140
|
+
*
|
|
141
|
+
* // Process data...
|
|
142
|
+
* codec.serialize(table);
|
|
143
|
+
*
|
|
144
|
+
* const metrics = codec.metrics;
|
|
145
|
+
* if (metrics) {
|
|
146
|
+
* console.log(`Serialized ${metrics.rowsSerialized} rows`);
|
|
147
|
+
* console.log(`Total bytes: ${metrics.bytesSerialised}`);
|
|
148
|
+
* console.log(`Errors: ${metrics.serializeErrors}`);
|
|
149
|
+
* }
|
|
150
|
+
* ```
|
|
151
|
+
*
|
|
152
|
+
* @see {@link ArrowCodecOptions.collectMetrics} to enable metrics
|
|
153
|
+
* @see {@link ArrowCodec.metrics} for accessing metrics
|
|
154
|
+
*/
|
|
155
|
+
export interface ArrowCodecMetrics {
|
|
156
|
+
/**
|
|
157
|
+
* Number of successful `serialize()` calls.
|
|
158
|
+
*/
|
|
159
|
+
serializeCount: number;
|
|
160
|
+
/**
|
|
161
|
+
* Number of successful `deserialize()` calls.
|
|
162
|
+
*/
|
|
163
|
+
deserializeCount: number;
|
|
164
|
+
/**
|
|
165
|
+
* Total bytes produced by serialization.
|
|
166
|
+
*/
|
|
167
|
+
bytesSerialised: number;
|
|
168
|
+
/**
|
|
169
|
+
* Total bytes consumed by deserialization.
|
|
170
|
+
*/
|
|
171
|
+
bytesDeserialized: number;
|
|
172
|
+
/**
|
|
173
|
+
* Total number of rows serialized across all tables.
|
|
174
|
+
*/
|
|
175
|
+
rowsSerialized: number;
|
|
176
|
+
/**
|
|
177
|
+
* Total number of rows deserialized across all tables.
|
|
178
|
+
*/
|
|
179
|
+
rowsDeserialized: number;
|
|
180
|
+
/**
|
|
181
|
+
* Number of serialization errors encountered.
|
|
182
|
+
*/
|
|
183
|
+
serializeErrors: number;
|
|
184
|
+
/**
|
|
185
|
+
* Number of deserialization errors encountered.
|
|
186
|
+
*/
|
|
187
|
+
deserializeErrors: number;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* High-performance Apache Arrow IPC serialization codec.
|
|
191
|
+
*
|
|
192
|
+
* Implements the {@link SerializationCodec} interface for use with
|
|
193
|
+
* @procwire/transport channels. Optimized for columnar data transfer,
|
|
194
|
+
* analytics workloads, and interoperability with data science tools.
|
|
195
|
+
*
|
|
196
|
+
* @remarks
|
|
197
|
+
* This codec uses zero-copy optimization where possible, wrapping the
|
|
198
|
+
* underlying ArrayBuffer instead of copying data. For maximum performance
|
|
199
|
+
* in trusted environments, disable input validation with `validateInput: false`.
|
|
200
|
+
*
|
|
201
|
+
* @example Basic usage
|
|
202
|
+
* ```ts
|
|
203
|
+
* import { tableFromArrays } from 'apache-arrow';
|
|
204
|
+
* import { ArrowCodec } from '@procwire/codec-arrow';
|
|
205
|
+
*
|
|
206
|
+
* const codec = new ArrowCodec();
|
|
207
|
+
*
|
|
208
|
+
* // Create a table with typed columns
|
|
209
|
+
* const table = tableFromArrays({
|
|
210
|
+
* id: Int32Array.from([1, 2, 3, 4, 5]),
|
|
211
|
+
* name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
|
|
212
|
+
* score: Float64Array.from([95.5, 87.3, 92.1, 88.7, 91.2]),
|
|
213
|
+
* active: [true, false, true, true, false]
|
|
214
|
+
* });
|
|
215
|
+
*
|
|
216
|
+
* // Serialize to Arrow IPC format
|
|
217
|
+
* const buffer = codec.serialize(table);
|
|
218
|
+
* console.log(`Serialized ${table.numRows} rows to ${buffer.length} bytes`);
|
|
219
|
+
*
|
|
220
|
+
* // Deserialize back to Table
|
|
221
|
+
* const decoded = codec.deserialize(buffer);
|
|
222
|
+
* console.log(decoded.numRows); // 5
|
|
223
|
+
* console.log(decoded.numCols); // 4
|
|
224
|
+
* console.log(decoded.schema.fields.map(f => f.name)); // ['id', 'name', 'score', 'active']
|
|
225
|
+
* ```
|
|
226
|
+
*
|
|
227
|
+
* @example With configuration options
|
|
228
|
+
* ```ts
|
|
229
|
+
* const codec = new ArrowCodec({
|
|
230
|
+
* format: 'stream', // Streaming format for IPC
|
|
231
|
+
* validateInput: false, // Skip validation for performance
|
|
232
|
+
* collectMetrics: true, // Track throughput
|
|
233
|
+
* });
|
|
234
|
+
*
|
|
235
|
+
* // Process batches
|
|
236
|
+
* for (const batch of batches) {
|
|
237
|
+
* channel.send(codec.serialize(batch));
|
|
238
|
+
* }
|
|
239
|
+
*
|
|
240
|
+
* // Check metrics
|
|
241
|
+
* console.log(`Processed ${codec.metrics?.rowsSerialized} rows`);
|
|
242
|
+
* ```
|
|
243
|
+
*
|
|
244
|
+
* @example File format for random access
|
|
245
|
+
* ```ts
|
|
246
|
+
* import { writeFileSync, readFileSync } from 'fs';
|
|
247
|
+
*
|
|
248
|
+
* const codec = new ArrowCodec({ format: 'file' });
|
|
249
|
+
*
|
|
250
|
+
* // Write to file
|
|
251
|
+
* const buffer = codec.serialize(table);
|
|
252
|
+
* writeFileSync('data.arrow', buffer);
|
|
253
|
+
*
|
|
254
|
+
* // Read from file (supports random access)
|
|
255
|
+
* const fileBuffer = readFileSync('data.arrow');
|
|
256
|
+
* const loadedTable = codec.deserialize(fileBuffer);
|
|
257
|
+
* ```
|
|
258
|
+
*
|
|
259
|
+
* @example Cross-language interoperability
|
|
260
|
+
* ```ts
|
|
261
|
+
* // Node.js side
|
|
262
|
+
* const codec = new ArrowCodec();
|
|
263
|
+
* const buffer = codec.serialize(table);
|
|
264
|
+
* socket.write(buffer);
|
|
265
|
+
*
|
|
266
|
+
* // Python side (PyArrow)
|
|
267
|
+
* // import pyarrow as pa
|
|
268
|
+
* // reader = pa.ipc.open_stream(buffer)
|
|
269
|
+
* // table = reader.read_all()
|
|
270
|
+
* ```
|
|
271
|
+
*
|
|
272
|
+
* @example Integration with @procwire/transport
|
|
273
|
+
* ```ts
|
|
274
|
+
* import { ArrowCodec } from '@procwire/codec-arrow';
|
|
275
|
+
* import { StreamChannel } from '@procwire/transport/channel';
|
|
276
|
+
*
|
|
277
|
+
* const codec = new ArrowCodec({ collectMetrics: true });
|
|
278
|
+
*
|
|
279
|
+
* const channel = new StreamChannel({
|
|
280
|
+
* transport,
|
|
281
|
+
* framing,
|
|
282
|
+
* serialization: codec,
|
|
283
|
+
* protocol
|
|
284
|
+
* });
|
|
285
|
+
*
|
|
286
|
+
* // Send Arrow tables through the channel
|
|
287
|
+
* channel.send(table);
|
|
288
|
+
* ```
|
|
289
|
+
*
|
|
290
|
+
* @see {@link ArrowCodecOptions} for configuration options
|
|
291
|
+
* @see {@link ArrowCodecMetrics} for metrics structure
|
|
292
|
+
* @see {@link createFastArrowCodec} for maximum performance
|
|
293
|
+
* @see {@link createMonitoredArrowCodec} for monitoring
|
|
294
|
+
* @see {@link createFileArrowCodec} for file format
|
|
295
|
+
*/
|
|
296
|
+
export declare class ArrowCodec implements SerializationCodec<Table> {
|
|
297
|
+
/**
|
|
298
|
+
* Unique identifier for this codec.
|
|
299
|
+
*
|
|
300
|
+
* Used by codec registries to identify and lookup codecs by name.
|
|
301
|
+
* The value `"arrow"` identifies this as an Apache Arrow codec.
|
|
302
|
+
*
|
|
303
|
+
* @readonly
|
|
304
|
+
*/
|
|
305
|
+
readonly name = "arrow";
|
|
306
|
+
/**
|
|
307
|
+
* MIME type for Arrow IPC encoded data.
|
|
308
|
+
*
|
|
309
|
+
* The content type varies based on the format:
|
|
310
|
+
* - Stream format: `application/vnd.apache.arrow.stream`
|
|
311
|
+
* - File format: `application/vnd.apache.arrow.file`
|
|
312
|
+
*
|
|
313
|
+
* Used in HTTP Content-Type headers and content negotiation.
|
|
314
|
+
*
|
|
315
|
+
* @readonly
|
|
316
|
+
* @see {@link https://arrow.apache.org/docs/format/IPC.html | Apache Arrow IPC specification}
|
|
317
|
+
*/
|
|
318
|
+
readonly contentType: string;
|
|
319
|
+
private readonly format;
|
|
320
|
+
private readonly validateInput;
|
|
321
|
+
private readonly collectMetrics;
|
|
322
|
+
private _metrics;
|
|
323
|
+
/**
|
|
324
|
+
* Creates a new ArrowCodec instance.
|
|
325
|
+
*
|
|
326
|
+
* @param options - Optional configuration for serialization behavior.
|
|
327
|
+
* See {@link ArrowCodecOptions} for available options.
|
|
328
|
+
*
|
|
329
|
+
* @example Default configuration
|
|
330
|
+
* ```ts
|
|
331
|
+
* const codec = new ArrowCodec();
|
|
332
|
+
* ```
|
|
333
|
+
*
|
|
334
|
+
* @example With options
|
|
335
|
+
* ```ts
|
|
336
|
+
* const codec = new ArrowCodec({
|
|
337
|
+
* format: 'file',
|
|
338
|
+
* validateInput: true,
|
|
339
|
+
* collectMetrics: true,
|
|
340
|
+
* });
|
|
341
|
+
* ```
|
|
342
|
+
*/
|
|
343
|
+
constructor(options?: ArrowCodecOptions);
|
|
344
|
+
/**
|
|
345
|
+
* Returns current metrics if `collectMetrics` is enabled.
|
|
346
|
+
*
|
|
347
|
+
* Returns a copy of the metrics object to prevent external modification.
|
|
348
|
+
* Returns `null` if metrics collection is disabled.
|
|
349
|
+
*
|
|
350
|
+
* @returns A readonly copy of metrics, or `null` if metrics are disabled.
|
|
351
|
+
*
|
|
352
|
+
* @example
|
|
353
|
+
* ```ts
|
|
354
|
+
* const codec = new ArrowCodec({ collectMetrics: true });
|
|
355
|
+
*
|
|
356
|
+
* // Process some data
|
|
357
|
+
* codec.serialize(table1);
|
|
358
|
+
* codec.serialize(table2);
|
|
359
|
+
*
|
|
360
|
+
* const metrics = codec.metrics;
|
|
361
|
+
* if (metrics) {
|
|
362
|
+
* console.log(`Serialized ${metrics.serializeCount} tables`);
|
|
363
|
+
* console.log(`Total rows: ${metrics.rowsSerialized}`);
|
|
364
|
+
* console.log(`Total bytes: ${metrics.bytesSerialised}`);
|
|
365
|
+
* }
|
|
366
|
+
* ```
|
|
367
|
+
*
|
|
368
|
+
* @see {@link ArrowCodecMetrics} for the metrics structure
|
|
369
|
+
* @see {@link resetMetrics} to reset all metrics to zero
|
|
370
|
+
*/
|
|
371
|
+
get metrics(): Readonly<ArrowCodecMetrics> | null;
|
|
372
|
+
/**
|
|
373
|
+
* Resets all metrics to zero.
|
|
374
|
+
*
|
|
375
|
+
* Use this method to start fresh measurement periods, for example
|
|
376
|
+
* at the beginning of a new batch processing run or time window.
|
|
377
|
+
* Has no effect if metrics collection is disabled.
|
|
378
|
+
*
|
|
379
|
+
* @example
|
|
380
|
+
* ```ts
|
|
381
|
+
* const codec = new ArrowCodec({ collectMetrics: true });
|
|
382
|
+
*
|
|
383
|
+
* // Process batch 1
|
|
384
|
+
* for (const table of batch1) {
|
|
385
|
+
* codec.serialize(table);
|
|
386
|
+
* }
|
|
387
|
+
* console.log('Batch 1:', codec.metrics);
|
|
388
|
+
*
|
|
389
|
+
* // Reset for batch 2
|
|
390
|
+
* codec.resetMetrics();
|
|
391
|
+
*
|
|
392
|
+
* // Process batch 2
|
|
393
|
+
* for (const table of batch2) {
|
|
394
|
+
* codec.serialize(table);
|
|
395
|
+
* }
|
|
396
|
+
* console.log('Batch 2:', codec.metrics);
|
|
397
|
+
* ```
|
|
398
|
+
*/
|
|
399
|
+
resetMetrics(): void;
|
|
400
|
+
/**
|
|
401
|
+
* Serializes an Apache Arrow Table to IPC format.
|
|
402
|
+
*
|
|
403
|
+
* Converts the input Table to Arrow IPC binary format using the configured
|
|
404
|
+
* format (stream or file). Uses zero-copy optimization to avoid unnecessary
|
|
405
|
+
* memory allocations.
|
|
406
|
+
*
|
|
407
|
+
* @param value - Apache Arrow Table to serialize. Must be a valid Table
|
|
408
|
+
* instance with at least one column.
|
|
409
|
+
* @returns Buffer containing the Arrow IPC encoded data.
|
|
410
|
+
*
|
|
411
|
+
* @throws {SerializationError} When input is not a valid Arrow Table
|
|
412
|
+
* (if validation is enabled).
|
|
413
|
+
* @throws {SerializationError} When encoding fails due to internal
|
|
414
|
+
* Arrow library errors.
|
|
415
|
+
*
|
|
416
|
+
* @example Basic serialization
|
|
417
|
+
* ```ts
|
|
418
|
+
* import { tableFromArrays } from 'apache-arrow';
|
|
419
|
+
*
|
|
420
|
+
* const codec = new ArrowCodec();
|
|
421
|
+
* const table = tableFromArrays({
|
|
422
|
+
* id: [1, 2, 3],
|
|
423
|
+
* name: ['Alice', 'Bob', 'Charlie']
|
|
424
|
+
* });
|
|
425
|
+
*
|
|
426
|
+
* const buffer = codec.serialize(table);
|
|
427
|
+
* console.log(`Serialized to ${buffer.length} bytes`);
|
|
428
|
+
* ```
|
|
429
|
+
*
|
|
430
|
+
* @example Error handling
|
|
431
|
+
* ```ts
|
|
432
|
+
* const codec = new ArrowCodec();
|
|
433
|
+
*
|
|
434
|
+
* try {
|
|
435
|
+
* codec.serialize({ notATable: true } as any);
|
|
436
|
+
* } catch (error) {
|
|
437
|
+
* if (error instanceof SerializationError) {
|
|
438
|
+
* console.error('Invalid input:', error.message);
|
|
439
|
+
* }
|
|
440
|
+
* }
|
|
441
|
+
* ```
|
|
442
|
+
*
|
|
443
|
+
* @see {@link deserialize} for the reverse operation
|
|
444
|
+
*/
|
|
445
|
+
serialize(value: Table): Buffer;
|
|
446
|
+
/**
|
|
447
|
+
* Deserializes Arrow IPC data back to an Apache Arrow Table.
|
|
448
|
+
*
|
|
449
|
+
* Parses the binary Arrow IPC data and reconstructs the Table with
|
|
450
|
+
* its full schema and column data.
|
|
451
|
+
*
|
|
452
|
+
* @param buffer - Buffer or Uint8Array containing Arrow IPC encoded data.
|
|
453
|
+
* Must be valid Arrow IPC format (stream or file).
|
|
454
|
+
* @returns The deserialized Apache Arrow Table.
|
|
455
|
+
*
|
|
456
|
+
* @throws {SerializationError} When input is null, undefined, empty,
|
|
457
|
+
* or not a Buffer/Uint8Array (if validation is enabled).
|
|
458
|
+
* @throws {SerializationError} When the buffer contains invalid or
|
|
459
|
+
* corrupted Arrow IPC data.
|
|
460
|
+
*
|
|
461
|
+
* @example Basic deserialization
|
|
462
|
+
* ```ts
|
|
463
|
+
* const codec = new ArrowCodec();
|
|
464
|
+
*
|
|
465
|
+
* // Roundtrip
|
|
466
|
+
* const original = tableFromArrays({ id: [1, 2, 3], name: ['A', 'B', 'C'] });
|
|
467
|
+
* const buffer = codec.serialize(original);
|
|
468
|
+
* const decoded = codec.deserialize(buffer);
|
|
469
|
+
*
|
|
470
|
+
* console.log(decoded.numRows); // 3
|
|
471
|
+
* console.log(decoded.numCols); // 2
|
|
472
|
+
* console.log(decoded.getChild('id')?.toArray()); // Int32Array [1, 2, 3]
|
|
473
|
+
* ```
|
|
474
|
+
*
|
|
475
|
+
* @example Accessing column data
|
|
476
|
+
* ```ts
|
|
477
|
+
* const table = codec.deserialize(buffer);
|
|
478
|
+
*
|
|
479
|
+
* // Get column by name
|
|
480
|
+
* const idColumn = table.getChild('id');
|
|
481
|
+
* const ids = idColumn?.toArray();
|
|
482
|
+
*
|
|
483
|
+
* // Iterate rows
|
|
484
|
+
* for (const row of table) {
|
|
485
|
+
* console.log(row.id, row.name);
|
|
486
|
+
* }
|
|
487
|
+
*
|
|
488
|
+
* // Access schema
|
|
489
|
+
* for (const field of table.schema.fields) {
|
|
490
|
+
* console.log(`${field.name}: ${field.type}`);
|
|
491
|
+
* }
|
|
492
|
+
* ```
|
|
493
|
+
*
|
|
494
|
+
* @example Error handling
|
|
495
|
+
* ```ts
|
|
496
|
+
* const codec = new ArrowCodec();
|
|
497
|
+
*
|
|
498
|
+
* try {
|
|
499
|
+
* codec.deserialize(Buffer.from('invalid data'));
|
|
500
|
+
* } catch (error) {
|
|
501
|
+
* if (error instanceof SerializationError) {
|
|
502
|
+
* console.error('Decode failed:', error.message);
|
|
503
|
+
* }
|
|
504
|
+
* }
|
|
505
|
+
* ```
|
|
506
|
+
*
|
|
507
|
+
* @see {@link serialize} for the reverse operation
|
|
508
|
+
*/
|
|
509
|
+
deserialize(buffer: Buffer): Table;
|
|
510
|
+
/**
|
|
511
|
+
* Checks if value is an Apache Arrow Table.
|
|
512
|
+
*
|
|
513
|
+
* Uses duck-typing for performance, avoiding instanceof checks that
|
|
514
|
+
* can fail across module boundaries or different package versions.
|
|
515
|
+
*
|
|
516
|
+
* @param value - Value to check.
|
|
517
|
+
* @returns `true` if value appears to be an Arrow Table.
|
|
518
|
+
*
|
|
519
|
+
* @internal
|
|
520
|
+
*/
|
|
521
|
+
private isTable;
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* Creates an ArrowCodec optimized for maximum throughput.
|
|
525
|
+
*
|
|
526
|
+
* Returns a codec with input validation disabled for use in trusted
|
|
527
|
+
* environments where input is guaranteed to be valid. This provides
|
|
528
|
+
* the best possible performance but will produce unclear errors or
|
|
529
|
+
* undefined behavior if given invalid input.
|
|
530
|
+
*
|
|
531
|
+
* @param format - IPC format to use. Defaults to `'stream'`.
|
|
532
|
+
*
|
|
533
|
+
* @returns A configured ArrowCodec with validation disabled.
|
|
534
|
+
*
|
|
535
|
+
* @example
|
|
536
|
+
* ```ts
|
|
537
|
+
* // For trusted internal IPC between your own processes
|
|
538
|
+
* const codec = createFastArrowCodec('stream');
|
|
539
|
+
*
|
|
540
|
+
* // Maximum performance - no validation overhead
|
|
541
|
+
* const buffer = codec.serialize(table);
|
|
542
|
+
* ```
|
|
543
|
+
*
|
|
544
|
+
* @remarks
|
|
545
|
+
* Only use this in trusted environments where:
|
|
546
|
+
* - Input always comes from your own code
|
|
547
|
+
* - Tables are guaranteed to be valid Arrow Tables
|
|
548
|
+
* - Buffers are guaranteed to be valid Arrow IPC data
|
|
549
|
+
*
|
|
550
|
+
* @see {@link ArrowCodec} for the full codec with validation
|
|
551
|
+
* @see {@link createMonitoredArrowCodec} for monitoring support
|
|
552
|
+
*/
|
|
553
|
+
export declare function createFastArrowCodec(format?: ArrowIPCFormat): ArrowCodec;
|
|
554
|
+
/**
|
|
555
|
+
* Creates an ArrowCodec with metrics collection enabled.
|
|
556
|
+
*
|
|
557
|
+
* Returns a codec that tracks serialize/deserialize counts, bytes processed,
|
|
558
|
+
* rows processed, and error counts. Useful for monitoring throughput,
|
|
559
|
+
* debugging issues, and capacity planning.
|
|
560
|
+
*
|
|
561
|
+
* @param options - Additional codec options. The `collectMetrics` option
|
|
562
|
+
* will always be set to `true`.
|
|
563
|
+
*
|
|
564
|
+
* @returns A configured ArrowCodec with metrics collection enabled.
|
|
565
|
+
*
|
|
566
|
+
* @example Basic monitoring
|
|
567
|
+
* ```ts
|
|
568
|
+
* const codec = createMonitoredArrowCodec();
|
|
569
|
+
*
|
|
570
|
+
* // Process data
|
|
571
|
+
* for (const table of tables) {
|
|
572
|
+
* codec.serialize(table);
|
|
573
|
+
* }
|
|
574
|
+
*
|
|
575
|
+
* // Check throughput
|
|
576
|
+
* const metrics = codec.metrics!;
|
|
577
|
+
* console.log(`Tables: ${metrics.serializeCount}`);
|
|
578
|
+
* console.log(`Rows: ${metrics.rowsSerialized}`);
|
|
579
|
+
* console.log(`Bytes: ${metrics.bytesSerialised}`);
|
|
580
|
+
* console.log(`Errors: ${metrics.serializeErrors}`);
|
|
581
|
+
* ```
|
|
582
|
+
*
|
|
583
|
+
* @example With additional options
|
|
584
|
+
* ```ts
|
|
585
|
+
* const codec = createMonitoredArrowCodec({
|
|
586
|
+
* format: 'file',
|
|
587
|
+
* validateInput: false, // Trust input for performance
|
|
588
|
+
* });
|
|
589
|
+
* ```
|
|
590
|
+
*
|
|
591
|
+
* @example Periodic reporting
|
|
592
|
+
* ```ts
|
|
593
|
+
* const codec = createMonitoredArrowCodec();
|
|
594
|
+
*
|
|
595
|
+
* setInterval(() => {
|
|
596
|
+
* const m = codec.metrics;
|
|
597
|
+
* if (m) {
|
|
598
|
+
* console.log(`Throughput: ${m.rowsSerialized} rows, ${m.bytesSerialised} bytes`);
|
|
599
|
+
* codec.resetMetrics(); // Reset for next interval
|
|
600
|
+
* }
|
|
601
|
+
* }, 60000); // Report every minute
|
|
602
|
+
* ```
|
|
603
|
+
*
|
|
604
|
+
* @see {@link ArrowCodecMetrics} for the metrics structure
|
|
605
|
+
* @see {@link ArrowCodec.metrics} for accessing metrics
|
|
606
|
+
* @see {@link ArrowCodec.resetMetrics} for resetting metrics
|
|
607
|
+
*/
|
|
608
|
+
export declare function createMonitoredArrowCodec(options?: Omit<ArrowCodecOptions, "collectMetrics">): ArrowCodec;
|
|
609
|
+
/**
|
|
610
|
+
* Creates an ArrowCodec configured for file format.
|
|
611
|
+
*
|
|
612
|
+
* Returns a codec using the Arrow file format, which includes a footer
|
|
613
|
+
* with schema and record batch offsets for random access. Use this when
|
|
614
|
+
* you need to write Arrow data to disk or when random access to record
|
|
615
|
+
* batches is required.
|
|
616
|
+
*
|
|
617
|
+
* @param options - Additional codec options. The `format` option
|
|
618
|
+
* will always be set to `'file'`.
|
|
619
|
+
*
|
|
620
|
+
* @returns A configured ArrowCodec for file format.
|
|
621
|
+
*
|
|
622
|
+
* @example Writing to disk
|
|
623
|
+
* ```ts
|
|
624
|
+
* import { writeFileSync, readFileSync } from 'fs';
|
|
625
|
+
*
|
|
626
|
+
* const codec = createFileArrowCodec();
|
|
627
|
+
*
|
|
628
|
+
* // Serialize with file format (includes footer)
|
|
629
|
+
* const buffer = codec.serialize(table);
|
|
630
|
+
* writeFileSync('data.arrow', buffer);
|
|
631
|
+
*
|
|
632
|
+
* // Read back
|
|
633
|
+
* const loaded = codec.deserialize(readFileSync('data.arrow'));
|
|
634
|
+
* ```
|
|
635
|
+
*
|
|
636
|
+
* @example With additional options
|
|
637
|
+
* ```ts
|
|
638
|
+
* const codec = createFileArrowCodec({
|
|
639
|
+
* validateInput: true,
|
|
640
|
+
* collectMetrics: true,
|
|
641
|
+
* });
|
|
642
|
+
* ```
|
|
643
|
+
*
|
|
644
|
+
* @remarks
|
|
645
|
+
* The file format is larger than stream format due to the footer,
|
|
646
|
+
* but enables random access to record batches without reading the
|
|
647
|
+
* entire file. For IPC where you read data sequentially, prefer
|
|
648
|
+
* the default stream format.
|
|
649
|
+
*
|
|
650
|
+
* @see {@link ArrowCodec} for stream format (default)
|
|
651
|
+
* @see {@link ArrowIPCFormat} for format differences
|
|
652
|
+
*/
|
|
653
|
+
export declare function createFileArrowCodec(options?: Omit<ArrowCodecOptions, "format">): ArrowCodec;
|
|
654
|
+
//# sourceMappingURL=codec.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"codec.d.ts","sourceRoot":"","sources":["../src/codec.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAE1C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AAG5E;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,MAAM,cAAc,GAAG,QAAQ,GAAG,MAAM,CAAC;AAE/C;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;;;;;;;;;;;;;;;;;;;OAqBG;IACH,MAAM,CAAC,EAAE,cAAc,CAAC;IAExB;;;;;;;;;;;;;;;;;;OAkBG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA+BG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,cAAc,EAAE,MAAM,CAAC;IAEvB;;OAEG;IACH,gBAAgB,EAAE,MAAM,CAAC;IAEzB;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,iBAAiB,EAAE,MAAM,CAAC;IAE1B;;OAEG;IACH,cAAc,EAAE,MAAM,CAAC;IAEvB;;OAEG;IACH,gBAAgB,EAAE,MAAM,CAAC;IAEzB;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0GG;AACH,qBAAa,UAAW,YAAW,kBAAkB,CAAC,KAAK,CAAC;IAC1D;;;;;;;OAOG;IACH,QAAQ,CAAC,IAAI,WAAW;IAExB;;;;;;;;;;;OAWG;IACH,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAE7B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiB;IACxC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAU;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;IACzC,OAAO,CAAC,QAAQ,CAAkC;IAElD;;;;;;;;;;;;;;;;;;;OAmBG;gBACS,OAAO,CAAC,EAAE,iBAAiB;IAyBvC;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,IAAI,OAAO,IAAI,QAAQ,CAAC,iBAAiB,CAAC,GAAG,IAAI,CAEhD;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,YAAY,IAAI,IAAI;IAapB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4CG;IACH,SAAS,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM;IAoC/B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA8DG;IACH,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK;IAgDlC;;;;;;;;;;OAUG;IACH,OAAO,CAAC,OAAO;CAWhB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,GAAE,cAAyB,GAAG,UAAU,CAMlF;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqDG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,CAAC,EAAE,IAAI,CAAC,iBAAiB,EAAE,gBAAgB,CAAC,GAClD,UAAU,CAKZ;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,CAAC,EAAE,IAAI,CAAC,iBAAiB,EAAE,QAAQ,CAAC,GAAG,UAAU,CAK5F"}
|