@procwire/codec-arrow 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Sebastian Webdev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,265 @@
1
+ # @procwire/codec-arrow
2
+
3
+ Apache Arrow serialization codec for `@procwire/transport`.
4
+
5
+ Provides efficient columnar data serialization using [apache-arrow](https://github.com/apache/arrow/tree/main/js), ideal for analytical workloads and large datasets.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install @procwire/codec-arrow apache-arrow
11
+ ```
12
+
13
+ Note: `apache-arrow` is a peer dependency and must be installed separately.
14
+
15
+ ## Usage
16
+
17
+ ### Basic Usage
18
+
19
+ ```ts
20
+ import { tableFromArrays } from 'apache-arrow';
21
+ import { ArrowCodec } from '@procwire/codec-arrow';
22
+ import { ChannelBuilder } from '@procwire/transport';
23
+
24
+ const codec = new ArrowCodec();
25
+
26
+ // Create a table
27
+ const table = tableFromArrays({
28
+ id: [1, 2, 3, 4, 5],
29
+ name: ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
30
+ score: [95.5, 87.3, 92.1, 88.7, 94.2]
31
+ });
32
+
33
+ // Use with ChannelBuilder
34
+ const channel = new ChannelBuilder()
35
+ .withTransport(transport)
36
+ .withFraming(framing)
37
+ .withSerialization(codec)
38
+ .withProtocol(protocol)
39
+ .build();
40
+
41
+ // Send table over channel
42
+ await channel.request('processData', table);
43
+ ```
44
+
45
+ ### Standalone Usage
46
+
47
+ ```ts
48
+ import { tableFromArrays } from 'apache-arrow';
49
+ import { ArrowCodec } from '@procwire/codec-arrow';
50
+
51
+ const codec = new ArrowCodec();
52
+
53
+ // Serialize
54
+ const table = tableFromArrays({
55
+ id: [1, 2, 3],
56
+ value: [10.5, 20.3, 30.1]
57
+ });
58
+
59
+ const buffer = codec.serialize(table);
60
+
61
+ // Deserialize
62
+ const decoded = codec.deserialize(buffer);
63
+ console.log(decoded.numRows); // 3
64
+ console.log(decoded.getChild('id')?.toArray()); // [1, 2, 3]
65
+ ```
66
+
67
+ ### Working with Large Datasets
68
+
69
+ ```ts
70
+ import { tableFromArrays } from 'apache-arrow';
71
+ import { ArrowCodec } from '@procwire/codec-arrow';
72
+
73
+ const codec = new ArrowCodec();
74
+
75
+ // Create large dataset (100K rows)
76
+ const size = 100000;
77
+ const table = tableFromArrays({
78
+ timestamp: Array.from({ length: size }, (_, i) => Date.now() + i * 1000),
79
+ sensor_id: Array.from({ length: size }, (_, i) => i % 100),
80
+ temperature: Array.from({ length: size }, () => 20 + Math.random() * 10),
81
+ humidity: Array.from({ length: size }, () => 40 + Math.random() * 20)
82
+ });
83
+
84
+ // Efficient serialization of columnar data
85
+ const buffer = codec.serialize(table);
86
+ console.log(`Serialized ${size} rows in ${buffer.length} bytes`);
87
+
88
+ // Fast deserialization
89
+ const decoded = codec.deserialize(buffer);
90
+ console.log(`Deserialized table with ${decoded.numRows} rows`);
91
+ ```
92
+
93
+ ## Features
94
+
95
+ - **Columnar Format**: Optimized for analytical queries and large datasets
96
+ - **Type Preservation**: Full type system support (integers, floats, strings, booleans, etc.)
97
+ - **Null Handling**: Native support for null values
98
+ - **Zero-Copy**: Efficient memory usage with zero-copy reads where possible
99
+ - **Error Handling**: Wraps encoding/decoding errors in `SerializationError` from `@procwire/transport`
100
+ - **IPC Stream Format**: Uses Arrow IPC streaming format for efficient transmission
101
+
102
+ ## API
103
+
104
+ ### `ArrowCodec`
105
+
106
+ Implements `SerializationCodec<Table>` interface.
107
+
108
+ #### Properties
109
+
110
+ - `name: "arrow"` - Codec identifier
111
+ - `contentType: "application/vnd.apache.arrow.stream"` - MIME type
112
+
113
+ #### Methods
114
+
115
+ ##### `serialize(value: Table): Buffer`
116
+
117
+ Serializes an Apache Arrow Table to IPC stream format.
118
+
119
+ **Parameters:**
120
+ - `value` - Arrow Table to serialize
121
+
122
+ **Returns:** `Buffer` containing Arrow IPC stream data
123
+
124
+ **Throws:** `SerializationError` if encoding fails
125
+
126
+ ##### `deserialize(buffer: Buffer): Table`
127
+
128
+ Deserializes Arrow IPC stream data to an Apache Arrow Table.
129
+
130
+ **Parameters:**
131
+ - `buffer` - Buffer containing Arrow IPC stream data
132
+
133
+ **Returns:** Deserialized Arrow Table
134
+
135
+ **Throws:** `SerializationError` if decoding fails
136
+
137
+ ## Advanced Usage
138
+
139
+ ### Creating Tables from Arrays
140
+
141
+ ```ts
142
+ import { tableFromArrays } from 'apache-arrow';
143
+
144
+ const table = tableFromArrays({
145
+ // Integer column
146
+ id: [1, 2, 3],
147
+
148
+ // String column
149
+ name: ['Alice', 'Bob', 'Charlie'],
150
+
151
+ // Float column
152
+ score: [95.5, 87.3, 92.1],
153
+
154
+ // Boolean column
155
+ active: [true, false, true],
156
+
157
+ // Column with nulls
158
+ email: ['alice@example.com', null, 'charlie@example.com']
159
+ });
160
+ ```
161
+
162
+ ### Typed Arrays for Performance
163
+
164
+ ```ts
165
+ import { tableFromArrays } from 'apache-arrow';
166
+
167
+ const table = tableFromArrays({
168
+ int32_col: new Int32Array([1, 2, 3, 4, 5]),
169
+ float64_col: new Float64Array([1.1, 2.2, 3.3, 4.4, 5.5]),
170
+ uint8_col: new Uint8Array([255, 128, 64, 32, 0])
171
+ });
172
+ ```
173
+
174
+ ### Accessing Column Data
175
+
176
+ ```ts
177
+ const table = tableFromArrays({
178
+ id: [1, 2, 3],
179
+ name: ['Alice', 'Bob', 'Charlie']
180
+ });
181
+
182
+ // Get column
183
+ const idColumn = table.getChild('id');
184
+ const ids = idColumn?.toArray(); // [1, 2, 3]
185
+
186
+ // Iterate rows
187
+ for (let i = 0; i < table.numRows; i++) {
188
+ const row = table.get(i);
189
+ console.log(row);
190
+ }
191
+ ```
192
+
193
+ ## Performance
194
+
195
+ Apache Arrow provides exceptional performance for columnar data:
196
+
197
+ - **Columnar Storage**: Data stored in columns, not rows - ideal for analytical queries
198
+ - **Zero-Copy Reads**: Direct memory access without deserialization overhead
199
+ - **Compression**: Built-in dictionary encoding for repeated values
200
+ - **Vectorized Operations**: SIMD-friendly data layout for fast processing
201
+ - **Cross-Language**: Same binary format used in Python, R, Java, C++, etc.
202
+
203
+ ### Performance Characteristics
204
+
205
+ Compared to JSON:
206
+ - **5-50x faster** serialization/deserialization for large datasets
207
+ - **2-10x smaller** binary size for numeric-heavy data
208
+ - **Zero-copy** operations for in-memory analytics
209
+
210
+ Ideal for:
211
+ - Time-series data
212
+ - Analytics and data science workloads
213
+ - Large datasets (millions of rows)
214
+ - High-throughput data streaming
215
+ - Cross-language data exchange
216
+ - Machine learning pipelines
217
+
218
+ ## Use Cases
219
+
220
+ ### Time-Series Data
221
+
222
+ ```ts
223
+ const timeSeries = tableFromArrays({
224
+ timestamp: timestamps, // millions of timestamps
225
+ value: values, // sensor readings
226
+ quality: qualities // quality flags
227
+ });
228
+ ```
229
+
230
+ ### Data Analytics
231
+
232
+ ```ts
233
+ const analyticsData = tableFromArrays({
234
+ user_id: userIds,
235
+ event_type: eventTypes,
236
+ timestamp: timestamps,
237
+ properties: jsonProperties
238
+ });
239
+ ```
240
+
241
+ ### Machine Learning
242
+
243
+ ```ts
244
+ const features = tableFromArrays({
245
+ feature1: feature1Data,
246
+ feature2: feature2Data,
247
+ // ... many features
248
+ label: labels
249
+ });
250
+ ```
251
+
252
+ ## Compatibility
253
+
254
+ Arrow IPC format is cross-platform and cross-language:
255
+ - **Python**: PyArrow
256
+ - **R**: arrow R package
257
+ - **Java**: Arrow Java
258
+ - **C++**: Arrow C++
259
+ - **Rust**: arrow-rs
260
+
261
+ Tables can be serialized in one language and deserialized in another seamlessly.
262
+
263
+ ## License
264
+
265
+ MIT
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Apache Arrow codec for @procwire/transport.
3
+ * Provides columnar data serialization using apache-arrow.
4
+ *
5
+ * @module
6
+ */
7
+ import type { Table } from "apache-arrow";
8
+ import type { SerializationCodec } from "@procwire/transport/serialization";
9
+ /**
10
+ * Apache Arrow serialization codec.
11
+ * Implements efficient columnar data serialization ideal for analytical workloads.
12
+ *
13
+ * @example
14
+ * ```ts
15
+ * import { tableFromArrays } from 'apache-arrow';
16
+ * import { ArrowCodec } from '@procwire/codec-arrow';
17
+ * import { ChannelBuilder } from '@procwire/transport';
18
+ *
19
+ * const codec = new ArrowCodec();
20
+ *
21
+ * // Create a table
22
+ * const table = tableFromArrays({
23
+ * id: [1, 2, 3],
24
+ * name: ['Alice', 'Bob', 'Charlie']
25
+ * });
26
+ *
27
+ * // Use with channel
28
+ * const channel = new ChannelBuilder()
29
+ * .withSerialization(codec)
30
+ * // ... other configuration
31
+ * .build();
32
+ *
33
+ * // Send table over channel
34
+ * await channel.request('process', table);
35
+ * ```
36
+ */
37
+ export declare class ArrowCodec implements SerializationCodec<Table> {
38
+ readonly name = "arrow";
39
+ readonly contentType = "application/vnd.apache.arrow.stream";
40
+ /**
41
+ * Serializes an Apache Arrow Table to IPC stream format.
42
+ *
43
+ * @param value - Arrow Table to serialize
44
+ * @returns Buffer containing Arrow IPC stream data
45
+ * @throws {SerializationError} if encoding fails
46
+ */
47
+ serialize(value: Table): Buffer;
48
+ /**
49
+ * Deserializes Arrow IPC stream data to an Apache Arrow Table.
50
+ *
51
+ * @param buffer - Buffer containing Arrow IPC stream data
52
+ * @returns Deserialized Arrow Table
53
+ * @throws {SerializationError} if decoding fails
54
+ */
55
+ deserialize(buffer: Buffer): Table;
56
+ }
57
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAE1C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AAG5E;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,qBAAa,UAAW,YAAW,kBAAkB,CAAC,KAAK,CAAC;IAC1D,QAAQ,CAAC,IAAI,WAAW;IACxB,QAAQ,CAAC,WAAW,yCAAyC;IAE7D;;;;;;OAMG;IACH,SAAS,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM;IAY/B;;;;;;OAMG;IACH,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK;CAUnC"}
package/dist/index.js ADDED
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Apache Arrow codec for @procwire/transport.
3
+ * Provides columnar data serialization using apache-arrow.
4
+ *
5
+ * @module
6
+ */
7
+ import { tableFromIPC, tableToIPC } from "apache-arrow";
8
+ import { SerializationError } from "@procwire/transport";
9
+ /**
10
+ * Apache Arrow serialization codec.
11
+ * Implements efficient columnar data serialization ideal for analytical workloads.
12
+ *
13
+ * @example
14
+ * ```ts
15
+ * import { tableFromArrays } from 'apache-arrow';
16
+ * import { ArrowCodec } from '@procwire/codec-arrow';
17
+ * import { ChannelBuilder } from '@procwire/transport';
18
+ *
19
+ * const codec = new ArrowCodec();
20
+ *
21
+ * // Create a table
22
+ * const table = tableFromArrays({
23
+ * id: [1, 2, 3],
24
+ * name: ['Alice', 'Bob', 'Charlie']
25
+ * });
26
+ *
27
+ * // Use with channel
28
+ * const channel = new ChannelBuilder()
29
+ * .withSerialization(codec)
30
+ * // ... other configuration
31
+ * .build();
32
+ *
33
+ * // Send table over channel
34
+ * await channel.request('process', table);
35
+ * ```
36
+ */
37
+ export class ArrowCodec {
38
+ name = "arrow";
39
+ contentType = "application/vnd.apache.arrow.stream";
40
+ /**
41
+ * Serializes an Apache Arrow Table to IPC stream format.
42
+ *
43
+ * @param value - Arrow Table to serialize
44
+ * @returns Buffer containing Arrow IPC stream data
45
+ * @throws {SerializationError} if encoding fails
46
+ */
47
+ serialize(value) {
48
+ try {
49
+ const uint8array = tableToIPC(value);
50
+ return Buffer.from(uint8array);
51
+ }
52
+ catch (error) {
53
+ throw new SerializationError(`Failed to encode Arrow table: ${error instanceof Error ? error.message : String(error)}`, error);
54
+ }
55
+ }
56
+ /**
57
+ * Deserializes Arrow IPC stream data to an Apache Arrow Table.
58
+ *
59
+ * @param buffer - Buffer containing Arrow IPC stream data
60
+ * @returns Deserialized Arrow Table
61
+ * @throws {SerializationError} if decoding fails
62
+ */
63
+ deserialize(buffer) {
64
+ try {
65
+ return tableFromIPC(buffer);
66
+ }
67
+ catch (error) {
68
+ throw new SerializationError(`Failed to decode Arrow table: ${error instanceof Error ? error.message : String(error)}`, error);
69
+ }
70
+ }
71
+ }
72
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAExD,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAM,OAAO,UAAU;IACZ,IAAI,GAAG,OAAO,CAAC;IACf,WAAW,GAAG,qCAAqC,CAAC;IAE7D;;;;;;OAMG;IACH,SAAS,CAAC,KAAY;QACpB,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;YACrC,OAAO,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACjC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAC1B,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,EACzF,KAAK,CACN,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,WAAW,CAAC,MAAc;QACxB,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;QAC9B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAC1B,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,EACzF,KAAK,CACN,CAAC;QACJ,CAAC;IACH,CAAC;CACF"}
package/package.json ADDED
@@ -0,0 +1,67 @@
1
+ {
2
+ "name": "@procwire/codec-arrow",
3
+ "version": "0.1.1",
4
+ "description": "Apache Arrow IPC codec for @procwire/transport.",
5
+ "keywords": [
6
+ "ipc",
7
+ "arrow",
8
+ "apache-arrow",
9
+ "serialization",
10
+ "codec",
11
+ "columnar",
12
+ "binary",
13
+ "node",
14
+ "typescript"
15
+ ],
16
+ "author": "Sebastian Webdev",
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "https://github.com/SebastianWebdev/procwire.git",
20
+ "directory": "codec-arrow"
21
+ },
22
+ "bugs": {
23
+ "url": "https://github.com/SebastianWebdev/procwire/issues"
24
+ },
25
+ "homepage": "https://github.com/SebastianWebdev/procwire/tree/main/codec-arrow#readme",
26
+ "type": "module",
27
+ "sideEffects": false,
28
+ "license": "MIT",
29
+ "engines": {
30
+ "node": ">=18"
31
+ },
32
+ "exports": {
33
+ ".": {
34
+ "types": "./dist/index.d.ts",
35
+ "default": "./dist/index.js"
36
+ }
37
+ },
38
+ "main": "./dist/index.js",
39
+ "types": "./dist/index.d.ts",
40
+ "files": [
41
+ "dist",
42
+ "README.md",
43
+ "LICENSE"
44
+ ],
45
+ "publishConfig": {
46
+ "access": "public"
47
+ },
48
+ "dependencies": {
49
+ "@procwire/transport": "0.1.1"
50
+ },
51
+ "peerDependencies": {
52
+ "apache-arrow": "^21.0.0"
53
+ },
54
+ "devDependencies": {
55
+ "@types/node": "^22.0.0",
56
+ "apache-arrow": "^21.0.0",
57
+ "rimraf": "^6.0.1",
58
+ "typescript": "^5.9.3",
59
+ "vitest": "^2.1.8"
60
+ },
61
+ "scripts": {
62
+ "clean": "rimraf dist \"*.tsbuildinfo\"",
63
+ "typecheck": "tsc -p tsconfig.json --noEmit",
64
+ "build": "tsc -p tsconfig.build.json",
65
+ "test": "vitest run"
66
+ }
67
+ }