@procwire/codec-arrow 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +69 -67
  2. package/package.json +2 -2
package/README.md CHANGED
@@ -15,12 +15,12 @@ Provides efficient columnar data serialization using [apache-arrow](https://gith
15
15
 
16
16
  ## Performance
17
17
 
18
- | Metric | Value |
19
- |--------|-------|
20
- | Throughput | >1M rows/second |
21
- | Serialization overhead | Near-zero (zero-copy) |
22
- | Memory overhead | Minimal (reuses buffers) |
23
- | Stream format overhead | ~100-200 bytes |
18
+ | Metric | Value |
19
+ | ---------------------- | ------------------------ |
20
+ | Throughput | >1M rows/second |
21
+ | Serialization overhead | Near-zero (zero-copy) |
22
+ | Memory overhead | Minimal (reuses buffers) |
23
+ | Stream format overhead | ~100-200 bytes |
24
24
 
25
25
  ## Installation
26
26
 
@@ -35,15 +35,15 @@ Note: `apache-arrow` is a peer dependency and must be installed separately.
35
35
  ### Basic Usage
36
36
 
37
37
  ```ts
38
- import { tableFromArrays } from 'apache-arrow';
39
- import { ArrowCodec } from '@procwire/codec-arrow';
38
+ import { tableFromArrays } from "apache-arrow";
39
+ import { ArrowCodec } from "@procwire/codec-arrow";
40
40
 
41
41
  const codec = new ArrowCodec();
42
42
 
43
43
  const table = tableFromArrays({
44
44
  id: [1, 2, 3],
45
- name: ['Alice', 'Bob', 'Charlie'],
46
- score: [95.5, 87.3, 92.1]
45
+ name: ["Alice", "Bob", "Charlie"],
46
+ score: [95.5, 87.3, 92.1],
47
47
  });
48
48
 
49
49
  // Serialize (zero-copy!)
@@ -57,10 +57,10 @@ console.log(decoded.numRows); // 3
57
57
  ### High-Performance Mode
58
58
 
59
59
  ```ts
60
- import { createFastArrowCodec } from '@procwire/codec-arrow';
60
+ import { createFastArrowCodec } from "@procwire/codec-arrow";
61
61
 
62
62
  // For trusted environments - validation disabled
63
- const codec = createFastArrowCodec('stream');
63
+ const codec = createFastArrowCodec("stream");
64
64
 
65
65
  // Process data at maximum throughput
66
66
  for (const table of tables) {
@@ -72,7 +72,7 @@ for (const table of tables) {
72
72
  ### With Metrics
73
73
 
74
74
  ```ts
75
- import { createMonitoredArrowCodec } from '@procwire/codec-arrow';
75
+ import { createMonitoredArrowCodec } from "@procwire/codec-arrow";
76
76
 
77
77
  const codec = createMonitoredArrowCodec();
78
78
 
@@ -91,14 +91,14 @@ console.log(`Errors: ${metrics.serializeErrors}`);
91
91
  ### File Format (Random Access)
92
92
 
93
93
  ```ts
94
- import { createFileArrowCodec } from '@procwire/codec-arrow';
95
- import { writeFileSync } from 'fs';
94
+ import { createFileArrowCodec } from "@procwire/codec-arrow";
95
+ import { writeFileSync } from "fs";
96
96
 
97
97
  const codec = createFileArrowCodec();
98
98
  const buffer = codec.serialize(table);
99
99
 
100
100
  // Write to disk - format supports random access
101
- writeFileSync('data.arrow', buffer);
101
+ writeFileSync("data.arrow", buffer);
102
102
  ```
103
103
 
104
104
  ## API Reference
@@ -113,11 +113,11 @@ const codec = new ArrowCodec(options?: ArrowCodecOptions);
113
113
 
114
114
  #### Properties
115
115
 
116
- | Property | Type | Description |
117
- |----------|------|-------------|
118
- | `name` | `"arrow"` | Codec identifier |
119
- | `contentType` | `string` | MIME type based on format |
120
- | `metrics` | `ArrowCodecMetrics \| null` | Current metrics or null |
116
+ | Property | Type | Description |
117
+ | ------------- | --------------------------- | ------------------------- |
118
+ | `name` | `"arrow"` | Codec identifier |
119
+ | `contentType` | `string` | MIME type based on format |
120
+ | `metrics` | `ArrowCodecMetrics \| null` | Current metrics or null |
121
121
 
122
122
  #### Methods
123
123
 
@@ -126,6 +126,7 @@ const codec = new ArrowCodec(options?: ArrowCodecOptions);
126
126
  Serializes an Apache Arrow Table to IPC format using zero-copy optimization.
127
127
 
128
128
  **Parameters:**
129
+
129
130
  - `value` - Arrow Table to serialize
130
131
 
131
132
  **Returns:** `Buffer` containing Arrow IPC data
@@ -137,6 +138,7 @@ Serializes an Apache Arrow Table to IPC format using zero-copy optimization.
137
138
  Deserializes Arrow IPC data to an Apache Arrow Table.
138
139
 
139
140
  **Parameters:**
141
+
140
142
  - `buffer` - Buffer containing Arrow IPC data
141
143
 
142
144
  **Returns:** Deserialized Arrow Table
@@ -149,26 +151,26 @@ Resets all collected metrics to zero. No-op if metrics collection is disabled.
149
151
 
150
152
  ### ArrowCodecOptions
151
153
 
152
- | Option | Type | Default | Description |
153
- |--------|------|---------|-------------|
154
- | `format` | `'stream' \| 'file'` | `'stream'` | IPC format to use |
155
- | `validateInput` | `boolean` | `true` | Enable input type validation |
156
- | `collectMetrics` | `boolean` | `false` | Enable metrics collection |
154
+ | Option | Type | Default | Description |
155
+ | ---------------- | -------------------- | ---------- | ---------------------------- |
156
+ | `format` | `'stream' \| 'file'` | `'stream'` | IPC format to use |
157
+ | `validateInput` | `boolean` | `true` | Enable input type validation |
158
+ | `collectMetrics` | `boolean` | `false` | Enable metrics collection |
157
159
 
158
160
  ### ArrowCodecMetrics
159
161
 
160
162
  Metrics collected when `collectMetrics: true`:
161
163
 
162
- | Metric | Type | Description |
163
- |--------|------|-------------|
164
- | `serializeCount` | `number` | Successful serialize() calls |
165
- | `deserializeCount` | `number` | Successful deserialize() calls |
166
- | `bytesSerialised` | `number` | Total bytes serialized |
167
- | `bytesDeserialized` | `number` | Total bytes deserialized |
168
- | `rowsSerialized` | `number` | Total rows serialized |
169
- | `rowsDeserialized` | `number` | Total rows deserialized |
170
- | `serializeErrors` | `number` | Failed serialize() calls |
171
- | `deserializeErrors` | `number` | Failed deserialize() calls |
164
+ | Metric | Type | Description |
165
+ | ------------------- | -------- | ------------------------------ |
166
+ | `serializeCount` | `number` | Successful serialize() calls |
167
+ | `deserializeCount` | `number` | Successful deserialize() calls |
168
+ | `bytesSerialised` | `number` | Total bytes serialized |
169
+ | `bytesDeserialized` | `number` | Total bytes deserialized |
170
+ | `rowsSerialized` | `number` | Total rows serialized |
171
+ | `rowsDeserialized` | `number` | Total rows deserialized |
172
+ | `serializeErrors` | `number` | Failed serialize() calls |
173
+ | `deserializeErrors` | `number` | Failed deserialize() calls |
172
174
 
173
175
  ### Helper Functions
174
176
 
@@ -194,16 +196,16 @@ For maximum performance in trusted environments:
194
196
 
195
197
  ```ts
196
198
  const codec = new ArrowCodec({
197
- format: 'stream', // Smaller, no footer overhead
198
- validateInput: false, // Skip type checks
199
- collectMetrics: false // Skip metric collection
199
+ format: "stream", // Smaller, no footer overhead
200
+ validateInput: false, // Skip type checks
201
+ collectMetrics: false, // Skip metric collection
200
202
  });
201
203
  ```
202
204
 
203
205
  Or use the helper:
204
206
 
205
207
  ```ts
206
- const codec = createFastArrowCodec('stream');
208
+ const codec = createFastArrowCodec("stream");
207
209
  ```
208
210
 
209
211
  ### Memory Optimization
@@ -212,28 +214,28 @@ The codec uses zero-copy serialization by wrapping the underlying ArrayBuffer:
212
214
 
213
215
  ```ts
214
216
  // Internally uses:
215
- Buffer.from(uint8array.buffer, uint8array.byteOffset, uint8array.byteLength)
217
+ Buffer.from(uint8array.buffer, uint8array.byteOffset, uint8array.byteLength);
216
218
  // Instead of:
217
- Buffer.from(uint8array) // This copies data!
219
+ Buffer.from(uint8array); // This copies data!
218
220
  ```
219
221
 
220
222
  This reduces memory allocation by ~50% during serialization.
221
223
 
222
224
  ### Format Selection
223
225
 
224
- | Use Case | Recommended Format |
225
- |----------|-------------------|
226
- | IPC streaming | `'stream'` (default) |
227
- | Network transfer | `'stream'` |
228
- | File storage | `'file'` |
229
- | Random access needed | `'file'` |
230
- | Smallest size | `'stream'` |
226
+ | Use Case | Recommended Format |
227
+ | -------------------- | -------------------- |
228
+ | IPC streaming | `'stream'` (default) |
229
+ | Network transfer | `'stream'` |
230
+ | File storage | `'file'` |
231
+ | Random access needed | `'file'` |
232
+ | Smallest size | `'stream'` |
231
233
 
232
234
  ## Integration with @procwire/transport
233
235
 
234
236
  ```ts
235
- import { ChannelBuilder } from '@procwire/transport';
236
- import { ArrowCodec } from '@procwire/codec-arrow';
237
+ import { ChannelBuilder } from "@procwire/transport";
238
+ import { ArrowCodec } from "@procwire/codec-arrow";
237
239
 
238
240
  const channel = new ChannelBuilder()
239
241
  .withTransport(transport)
@@ -243,7 +245,7 @@ const channel = new ChannelBuilder()
243
245
  .build();
244
246
 
245
247
  // Send Arrow tables over the channel
246
- await channel.request('processAnalytics', analyticsTable);
248
+ await channel.request("processAnalytics", analyticsTable);
247
249
  ```
248
250
 
249
251
  ## Type System Support
@@ -251,8 +253,8 @@ await channel.request('processAnalytics', analyticsTable);
251
253
  The codec provides full TypeScript support:
252
254
 
253
255
  ```ts
254
- import type { Table, Schema, Field, RecordBatch } from '@procwire/codec-arrow';
255
- import { ArrowCodec, ArrowCodecOptions, ArrowCodecMetrics } from '@procwire/codec-arrow';
256
+ import type { Table, Schema, Field, RecordBatch } from "@procwire/codec-arrow";
257
+ import { ArrowCodec, ArrowCodecOptions, ArrowCodecMetrics } from "@procwire/codec-arrow";
256
258
  ```
257
259
 
258
260
  ## Error Handling
@@ -260,14 +262,14 @@ import { ArrowCodec, ArrowCodecOptions, ArrowCodecMetrics } from '@procwire/code
260
262
  All errors are wrapped in `SerializationError` from `@procwire/transport`:
261
263
 
262
264
  ```ts
263
- import { SerializationError } from '@procwire/transport';
265
+ import { SerializationError } from "@procwire/transport";
264
266
 
265
267
  try {
266
268
  codec.serialize(invalidTable);
267
269
  } catch (error) {
268
270
  if (error instanceof SerializationError) {
269
- console.error('Serialization failed:', error.message);
270
- console.error('Cause:', error.cause);
271
+ console.error("Serialization failed:", error.message);
272
+ console.error("Cause:", error.cause);
271
273
  }
272
274
  }
273
275
  ```
@@ -277,14 +279,14 @@ try {
277
279
  ### Creating Tables from Arrays
278
280
 
279
281
  ```ts
280
- import { tableFromArrays } from 'apache-arrow';
282
+ import { tableFromArrays } from "apache-arrow";
281
283
 
282
284
  const table = tableFromArrays({
283
285
  // Integer column
284
286
  id: [1, 2, 3],
285
287
 
286
288
  // String column
287
- name: ['Alice', 'Bob', 'Charlie'],
289
+ name: ["Alice", "Bob", "Charlie"],
288
290
 
289
291
  // Float column
290
292
  score: [95.5, 87.3, 92.1],
@@ -293,19 +295,19 @@ const table = tableFromArrays({
293
295
  active: [true, false, true],
294
296
 
295
297
  // Column with nulls
296
- email: ['alice@example.com', null, 'charlie@example.com']
298
+ email: ["alice@example.com", null, "charlie@example.com"],
297
299
  });
298
300
  ```
299
301
 
300
302
  ### Typed Arrays for Performance
301
303
 
302
304
  ```ts
303
- import { tableFromArrays } from 'apache-arrow';
305
+ import { tableFromArrays } from "apache-arrow";
304
306
 
305
307
  const table = tableFromArrays({
306
308
  int32_col: new Int32Array([1, 2, 3, 4, 5]),
307
309
  float64_col: new Float64Array([1.1, 2.2, 3.3, 4.4, 5.5]),
308
- uint8_col: new Uint8Array([255, 128, 64, 32, 0])
310
+ uint8_col: new Uint8Array([255, 128, 64, 32, 0]),
309
311
  });
310
312
  ```
311
313
 
@@ -314,11 +316,11 @@ const table = tableFromArrays({
314
316
  ```ts
315
317
  const table = tableFromArrays({
316
318
  id: [1, 2, 3],
317
- name: ['Alice', 'Bob', 'Charlie']
319
+ name: ["Alice", "Bob", "Charlie"],
318
320
  });
319
321
 
320
322
  // Get column
321
- const idColumn = table.getChild('id');
323
+ const idColumn = table.getChild("id");
322
324
  const ids = idColumn?.toArray(); // [1, 2, 3]
323
325
 
324
326
  // Iterate rows
@@ -348,7 +350,7 @@ Tables serialized in one language can be deserialized in another seamlessly.
348
350
  const timeSeries = tableFromArrays({
349
351
  timestamp: timestamps,
350
352
  value: values,
351
- quality: qualities
353
+ quality: qualities,
352
354
  });
353
355
  ```
354
356
 
@@ -359,7 +361,7 @@ const analyticsData = tableFromArrays({
359
361
  user_id: userIds,
360
362
  event_type: eventTypes,
361
363
  timestamp: timestamps,
362
- properties: jsonProperties
364
+ properties: jsonProperties,
363
365
  });
364
366
  ```
365
367
 
@@ -369,7 +371,7 @@ const analyticsData = tableFromArrays({
369
371
  const features = tableFromArrays({
370
372
  feature1: feature1Data,
371
373
  feature2: feature2Data,
372
- label: labels
374
+ label: labels,
373
375
  });
374
376
  ```
375
377
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@procwire/codec-arrow",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "description": "Apache Arrow IPC codec for @procwire/transport.",
5
5
  "keywords": [
6
6
  "ipc",
@@ -47,7 +47,7 @@
47
47
  "provenance": true
48
48
  },
49
49
  "dependencies": {
50
- "@procwire/transport": "0.1.3"
50
+ "@procwire/transport": "0.3.0"
51
51
  },
52
52
  "peerDependencies": {
53
53
  "apache-arrow": "^21.0.0"