@procwire/codec-arrow 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -67
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -15,12 +15,12 @@ Provides efficient columnar data serialization using [apache-arrow](https://gith
|
|
|
15
15
|
|
|
16
16
|
## Performance
|
|
17
17
|
|
|
18
|
-
| Metric
|
|
19
|
-
|
|
20
|
-
| Throughput
|
|
21
|
-
| Serialization overhead | Near-zero (zero-copy)
|
|
22
|
-
| Memory overhead
|
|
23
|
-
| Stream format overhead | ~100-200 bytes
|
|
18
|
+
| Metric | Value |
|
|
19
|
+
| ---------------------- | ------------------------ |
|
|
20
|
+
| Throughput | >1M rows/second |
|
|
21
|
+
| Serialization overhead | Near-zero (zero-copy) |
|
|
22
|
+
| Memory overhead | Minimal (reuses buffers) |
|
|
23
|
+
| Stream format overhead | ~100-200 bytes |
|
|
24
24
|
|
|
25
25
|
## Installation
|
|
26
26
|
|
|
@@ -35,15 +35,15 @@ Note: `apache-arrow` is a peer dependency and must be installed separately.
|
|
|
35
35
|
### Basic Usage
|
|
36
36
|
|
|
37
37
|
```ts
|
|
38
|
-
import { tableFromArrays } from
|
|
39
|
-
import { ArrowCodec } from
|
|
38
|
+
import { tableFromArrays } from "apache-arrow";
|
|
39
|
+
import { ArrowCodec } from "@procwire/codec-arrow";
|
|
40
40
|
|
|
41
41
|
const codec = new ArrowCodec();
|
|
42
42
|
|
|
43
43
|
const table = tableFromArrays({
|
|
44
44
|
id: [1, 2, 3],
|
|
45
|
-
name: [
|
|
46
|
-
score: [95.5, 87.3, 92.1]
|
|
45
|
+
name: ["Alice", "Bob", "Charlie"],
|
|
46
|
+
score: [95.5, 87.3, 92.1],
|
|
47
47
|
});
|
|
48
48
|
|
|
49
49
|
// Serialize (zero-copy!)
|
|
@@ -57,10 +57,10 @@ console.log(decoded.numRows); // 3
|
|
|
57
57
|
### High-Performance Mode
|
|
58
58
|
|
|
59
59
|
```ts
|
|
60
|
-
import { createFastArrowCodec } from
|
|
60
|
+
import { createFastArrowCodec } from "@procwire/codec-arrow";
|
|
61
61
|
|
|
62
62
|
// For trusted environments - validation disabled
|
|
63
|
-
const codec = createFastArrowCodec(
|
|
63
|
+
const codec = createFastArrowCodec("stream");
|
|
64
64
|
|
|
65
65
|
// Process data at maximum throughput
|
|
66
66
|
for (const table of tables) {
|
|
@@ -72,7 +72,7 @@ for (const table of tables) {
|
|
|
72
72
|
### With Metrics
|
|
73
73
|
|
|
74
74
|
```ts
|
|
75
|
-
import { createMonitoredArrowCodec } from
|
|
75
|
+
import { createMonitoredArrowCodec } from "@procwire/codec-arrow";
|
|
76
76
|
|
|
77
77
|
const codec = createMonitoredArrowCodec();
|
|
78
78
|
|
|
@@ -91,14 +91,14 @@ console.log(`Errors: ${metrics.serializeErrors}`);
|
|
|
91
91
|
### File Format (Random Access)
|
|
92
92
|
|
|
93
93
|
```ts
|
|
94
|
-
import { createFileArrowCodec } from
|
|
95
|
-
import { writeFileSync } from
|
|
94
|
+
import { createFileArrowCodec } from "@procwire/codec-arrow";
|
|
95
|
+
import { writeFileSync } from "fs";
|
|
96
96
|
|
|
97
97
|
const codec = createFileArrowCodec();
|
|
98
98
|
const buffer = codec.serialize(table);
|
|
99
99
|
|
|
100
100
|
// Write to disk - format supports random access
|
|
101
|
-
writeFileSync(
|
|
101
|
+
writeFileSync("data.arrow", buffer);
|
|
102
102
|
```
|
|
103
103
|
|
|
104
104
|
## API Reference
|
|
@@ -113,11 +113,11 @@ const codec = new ArrowCodec(options?: ArrowCodecOptions);
|
|
|
113
113
|
|
|
114
114
|
#### Properties
|
|
115
115
|
|
|
116
|
-
| Property
|
|
117
|
-
|
|
118
|
-
| `name`
|
|
119
|
-
| `contentType` | `string`
|
|
120
|
-
| `metrics`
|
|
116
|
+
| Property | Type | Description |
|
|
117
|
+
| ------------- | --------------------------- | ------------------------- |
|
|
118
|
+
| `name` | `"arrow"` | Codec identifier |
|
|
119
|
+
| `contentType` | `string` | MIME type based on format |
|
|
120
|
+
| `metrics` | `ArrowCodecMetrics \| null` | Current metrics or null |
|
|
121
121
|
|
|
122
122
|
#### Methods
|
|
123
123
|
|
|
@@ -126,6 +126,7 @@ const codec = new ArrowCodec(options?: ArrowCodecOptions);
|
|
|
126
126
|
Serializes an Apache Arrow Table to IPC format using zero-copy optimization.
|
|
127
127
|
|
|
128
128
|
**Parameters:**
|
|
129
|
+
|
|
129
130
|
- `value` - Arrow Table to serialize
|
|
130
131
|
|
|
131
132
|
**Returns:** `Buffer` containing Arrow IPC data
|
|
@@ -137,6 +138,7 @@ Serializes an Apache Arrow Table to IPC format using zero-copy optimization.
|
|
|
137
138
|
Deserializes Arrow IPC data to an Apache Arrow Table.
|
|
138
139
|
|
|
139
140
|
**Parameters:**
|
|
141
|
+
|
|
140
142
|
- `buffer` - Buffer containing Arrow IPC data
|
|
141
143
|
|
|
142
144
|
**Returns:** Deserialized Arrow Table
|
|
@@ -149,26 +151,26 @@ Resets all collected metrics to zero. No-op if metrics collection is disabled.
|
|
|
149
151
|
|
|
150
152
|
### ArrowCodecOptions
|
|
151
153
|
|
|
152
|
-
| Option
|
|
153
|
-
|
|
154
|
-
| `format`
|
|
155
|
-
| `validateInput`
|
|
156
|
-
| `collectMetrics` | `boolean`
|
|
154
|
+
| Option | Type | Default | Description |
|
|
155
|
+
| ---------------- | -------------------- | ---------- | ---------------------------- |
|
|
156
|
+
| `format` | `'stream' \| 'file'` | `'stream'` | IPC format to use |
|
|
157
|
+
| `validateInput` | `boolean` | `true` | Enable input type validation |
|
|
158
|
+
| `collectMetrics` | `boolean` | `false` | Enable metrics collection |
|
|
157
159
|
|
|
158
160
|
### ArrowCodecMetrics
|
|
159
161
|
|
|
160
162
|
Metrics collected when `collectMetrics: true`:
|
|
161
163
|
|
|
162
|
-
| Metric
|
|
163
|
-
|
|
164
|
-
| `serializeCount`
|
|
165
|
-
| `deserializeCount`
|
|
166
|
-
| `bytesSerialised`
|
|
167
|
-
| `bytesDeserialized` | `number` | Total bytes deserialized
|
|
168
|
-
| `rowsSerialized`
|
|
169
|
-
| `rowsDeserialized`
|
|
170
|
-
| `serializeErrors`
|
|
171
|
-
| `deserializeErrors` | `number` | Failed deserialize() calls
|
|
164
|
+
| Metric | Type | Description |
|
|
165
|
+
| ------------------- | -------- | ------------------------------ |
|
|
166
|
+
| `serializeCount` | `number` | Successful serialize() calls |
|
|
167
|
+
| `deserializeCount` | `number` | Successful deserialize() calls |
|
|
168
|
+
| `bytesSerialised` | `number` | Total bytes serialized |
|
|
169
|
+
| `bytesDeserialized` | `number` | Total bytes deserialized |
|
|
170
|
+
| `rowsSerialized` | `number` | Total rows serialized |
|
|
171
|
+
| `rowsDeserialized` | `number` | Total rows deserialized |
|
|
172
|
+
| `serializeErrors` | `number` | Failed serialize() calls |
|
|
173
|
+
| `deserializeErrors` | `number` | Failed deserialize() calls |
|
|
172
174
|
|
|
173
175
|
### Helper Functions
|
|
174
176
|
|
|
@@ -194,16 +196,16 @@ For maximum performance in trusted environments:
|
|
|
194
196
|
|
|
195
197
|
```ts
|
|
196
198
|
const codec = new ArrowCodec({
|
|
197
|
-
format:
|
|
198
|
-
validateInput: false,
|
|
199
|
-
collectMetrics: false
|
|
199
|
+
format: "stream", // Smaller, no footer overhead
|
|
200
|
+
validateInput: false, // Skip type checks
|
|
201
|
+
collectMetrics: false, // Skip metric collection
|
|
200
202
|
});
|
|
201
203
|
```
|
|
202
204
|
|
|
203
205
|
Or use the helper:
|
|
204
206
|
|
|
205
207
|
```ts
|
|
206
|
-
const codec = createFastArrowCodec(
|
|
208
|
+
const codec = createFastArrowCodec("stream");
|
|
207
209
|
```
|
|
208
210
|
|
|
209
211
|
### Memory Optimization
|
|
@@ -212,28 +214,28 @@ The codec uses zero-copy serialization by wrapping the underlying ArrayBuffer:
|
|
|
212
214
|
|
|
213
215
|
```ts
|
|
214
216
|
// Internally uses:
|
|
215
|
-
Buffer.from(uint8array.buffer, uint8array.byteOffset, uint8array.byteLength)
|
|
217
|
+
Buffer.from(uint8array.buffer, uint8array.byteOffset, uint8array.byteLength);
|
|
216
218
|
// Instead of:
|
|
217
|
-
Buffer.from(uint8array) // This copies data!
|
|
219
|
+
Buffer.from(uint8array); // This copies data!
|
|
218
220
|
```
|
|
219
221
|
|
|
220
222
|
This reduces memory allocation by ~50% during serialization.
|
|
221
223
|
|
|
222
224
|
### Format Selection
|
|
223
225
|
|
|
224
|
-
| Use Case
|
|
225
|
-
|
|
226
|
-
| IPC streaming
|
|
227
|
-
| Network transfer
|
|
228
|
-
| File storage
|
|
229
|
-
| Random access needed | `'file'`
|
|
230
|
-
| Smallest size
|
|
226
|
+
| Use Case | Recommended Format |
|
|
227
|
+
| -------------------- | -------------------- |
|
|
228
|
+
| IPC streaming | `'stream'` (default) |
|
|
229
|
+
| Network transfer | `'stream'` |
|
|
230
|
+
| File storage | `'file'` |
|
|
231
|
+
| Random access needed | `'file'` |
|
|
232
|
+
| Smallest size | `'stream'` |
|
|
231
233
|
|
|
232
234
|
## Integration with @procwire/transport
|
|
233
235
|
|
|
234
236
|
```ts
|
|
235
|
-
import { ChannelBuilder } from
|
|
236
|
-
import { ArrowCodec } from
|
|
237
|
+
import { ChannelBuilder } from "@procwire/transport";
|
|
238
|
+
import { ArrowCodec } from "@procwire/codec-arrow";
|
|
237
239
|
|
|
238
240
|
const channel = new ChannelBuilder()
|
|
239
241
|
.withTransport(transport)
|
|
@@ -243,7 +245,7 @@ const channel = new ChannelBuilder()
|
|
|
243
245
|
.build();
|
|
244
246
|
|
|
245
247
|
// Send Arrow tables over the channel
|
|
246
|
-
await channel.request(
|
|
248
|
+
await channel.request("processAnalytics", analyticsTable);
|
|
247
249
|
```
|
|
248
250
|
|
|
249
251
|
## Type System Support
|
|
@@ -251,8 +253,8 @@ await channel.request('processAnalytics', analyticsTable);
|
|
|
251
253
|
The codec provides full TypeScript support:
|
|
252
254
|
|
|
253
255
|
```ts
|
|
254
|
-
import type { Table, Schema, Field, RecordBatch } from
|
|
255
|
-
import { ArrowCodec, ArrowCodecOptions, ArrowCodecMetrics } from
|
|
256
|
+
import type { Table, Schema, Field, RecordBatch } from "@procwire/codec-arrow";
|
|
257
|
+
import { ArrowCodec, ArrowCodecOptions, ArrowCodecMetrics } from "@procwire/codec-arrow";
|
|
256
258
|
```
|
|
257
259
|
|
|
258
260
|
## Error Handling
|
|
@@ -260,14 +262,14 @@ import { ArrowCodec, ArrowCodecOptions, ArrowCodecMetrics } from '@procwire/code
|
|
|
260
262
|
All errors are wrapped in `SerializationError` from `@procwire/transport`:
|
|
261
263
|
|
|
262
264
|
```ts
|
|
263
|
-
import { SerializationError } from
|
|
265
|
+
import { SerializationError } from "@procwire/transport";
|
|
264
266
|
|
|
265
267
|
try {
|
|
266
268
|
codec.serialize(invalidTable);
|
|
267
269
|
} catch (error) {
|
|
268
270
|
if (error instanceof SerializationError) {
|
|
269
|
-
console.error(
|
|
270
|
-
console.error(
|
|
271
|
+
console.error("Serialization failed:", error.message);
|
|
272
|
+
console.error("Cause:", error.cause);
|
|
271
273
|
}
|
|
272
274
|
}
|
|
273
275
|
```
|
|
@@ -277,14 +279,14 @@ try {
|
|
|
277
279
|
### Creating Tables from Arrays
|
|
278
280
|
|
|
279
281
|
```ts
|
|
280
|
-
import { tableFromArrays } from
|
|
282
|
+
import { tableFromArrays } from "apache-arrow";
|
|
281
283
|
|
|
282
284
|
const table = tableFromArrays({
|
|
283
285
|
// Integer column
|
|
284
286
|
id: [1, 2, 3],
|
|
285
287
|
|
|
286
288
|
// String column
|
|
287
|
-
name: [
|
|
289
|
+
name: ["Alice", "Bob", "Charlie"],
|
|
288
290
|
|
|
289
291
|
// Float column
|
|
290
292
|
score: [95.5, 87.3, 92.1],
|
|
@@ -293,19 +295,19 @@ const table = tableFromArrays({
|
|
|
293
295
|
active: [true, false, true],
|
|
294
296
|
|
|
295
297
|
// Column with nulls
|
|
296
|
-
email: [
|
|
298
|
+
email: ["alice@example.com", null, "charlie@example.com"],
|
|
297
299
|
});
|
|
298
300
|
```
|
|
299
301
|
|
|
300
302
|
### Typed Arrays for Performance
|
|
301
303
|
|
|
302
304
|
```ts
|
|
303
|
-
import { tableFromArrays } from
|
|
305
|
+
import { tableFromArrays } from "apache-arrow";
|
|
304
306
|
|
|
305
307
|
const table = tableFromArrays({
|
|
306
308
|
int32_col: new Int32Array([1, 2, 3, 4, 5]),
|
|
307
309
|
float64_col: new Float64Array([1.1, 2.2, 3.3, 4.4, 5.5]),
|
|
308
|
-
uint8_col: new Uint8Array([255, 128, 64, 32, 0])
|
|
310
|
+
uint8_col: new Uint8Array([255, 128, 64, 32, 0]),
|
|
309
311
|
});
|
|
310
312
|
```
|
|
311
313
|
|
|
@@ -314,11 +316,11 @@ const table = tableFromArrays({
|
|
|
314
316
|
```ts
|
|
315
317
|
const table = tableFromArrays({
|
|
316
318
|
id: [1, 2, 3],
|
|
317
|
-
name: [
|
|
319
|
+
name: ["Alice", "Bob", "Charlie"],
|
|
318
320
|
});
|
|
319
321
|
|
|
320
322
|
// Get column
|
|
321
|
-
const idColumn = table.getChild(
|
|
323
|
+
const idColumn = table.getChild("id");
|
|
322
324
|
const ids = idColumn?.toArray(); // [1, 2, 3]
|
|
323
325
|
|
|
324
326
|
// Iterate rows
|
|
@@ -348,7 +350,7 @@ Tables serialized in one language can be deserialized in another seamlessly.
|
|
|
348
350
|
const timeSeries = tableFromArrays({
|
|
349
351
|
timestamp: timestamps,
|
|
350
352
|
value: values,
|
|
351
|
-
quality: qualities
|
|
353
|
+
quality: qualities,
|
|
352
354
|
});
|
|
353
355
|
```
|
|
354
356
|
|
|
@@ -359,7 +361,7 @@ const analyticsData = tableFromArrays({
|
|
|
359
361
|
user_id: userIds,
|
|
360
362
|
event_type: eventTypes,
|
|
361
363
|
timestamp: timestamps,
|
|
362
|
-
properties: jsonProperties
|
|
364
|
+
properties: jsonProperties,
|
|
363
365
|
});
|
|
364
366
|
```
|
|
365
367
|
|
|
@@ -369,7 +371,7 @@ const analyticsData = tableFromArrays({
|
|
|
369
371
|
const features = tableFromArrays({
|
|
370
372
|
feature1: feature1Data,
|
|
371
373
|
feature2: feature2Data,
|
|
372
|
-
label: labels
|
|
374
|
+
label: labels,
|
|
373
375
|
});
|
|
374
376
|
```
|
|
375
377
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@procwire/codec-arrow",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "Apache Arrow IPC codec for @procwire/transport.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ipc",
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"provenance": true
|
|
48
48
|
},
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@procwire/transport": "0.
|
|
50
|
+
"@procwire/transport": "0.3.0"
|
|
51
51
|
},
|
|
52
52
|
"peerDependencies": {
|
|
53
53
|
"apache-arrow": "^21.0.0"
|