@procwire/codec-arrow 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -104
- package/dist/codec.d.ts +654 -0
- package/dist/codec.d.ts.map +1 -0
- package/dist/codec.js +598 -0
- package/dist/codec.js.map +1 -0
- package/dist/index.d.ts +118 -40
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +73 -56
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,57 +1,135 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Apache Arrow codec for @procwire/transport.
|
|
3
|
-
* Provides columnar data serialization using apache-arrow.
|
|
2
|
+
* Apache Arrow IPC serialization codec for @procwire/transport.
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
*
|
|
11
|
-
*
|
|
4
|
+
* Provides high-performance columnar data serialization using Apache Arrow,
|
|
5
|
+
* optimized for analytics workloads and large datasets. This codec implements
|
|
6
|
+
* the {@link SerializationCodec} interface for seamless integration with
|
|
7
|
+
* @procwire/transport channels.
|
|
8
|
+
*
|
|
9
|
+
* ## Features
|
|
10
|
+
*
|
|
11
|
+
* - **Zero-copy serialization** - Minimizes memory allocations and copies
|
|
12
|
+
* - **Columnar format** - Optimized for analytics and batch processing
|
|
13
|
+
* - **Large dataset support** - Efficiently handles millions of rows
|
|
14
|
+
* - **Cross-language compatibility** - Works with Python (PyArrow), R, Java, etc.
|
|
15
|
+
* - **Built-in metrics** - Optional monitoring of throughput and errors
|
|
16
|
+
* - **Configurable formats** - Stream (default) or file format
|
|
17
|
+
*
|
|
18
|
+
* ## When to Use Arrow
|
|
19
|
+
*
|
|
20
|
+
* Apache Arrow is ideal for:
|
|
21
|
+
* - Data analytics and processing pipelines
|
|
22
|
+
* - Transferring tabular data between processes
|
|
23
|
+
* - Interoperability with data science tools (pandas, R, Spark)
|
|
24
|
+
* - High-throughput, low-latency data transfer
|
|
25
|
+
* - Large datasets where columnar access patterns dominate
|
|
26
|
+
*
|
|
27
|
+
* For small messages or non-tabular data, consider {@link @procwire/codec-msgpack}
|
|
28
|
+
* or {@link @procwire/codec-protobuf} instead.
|
|
29
|
+
*
|
|
30
|
+
* ## Quick Start
|
|
12
31
|
*
|
|
13
|
-
* @example
|
|
14
32
|
* ```ts
|
|
15
33
|
* import { tableFromArrays } from 'apache-arrow';
|
|
16
34
|
* import { ArrowCodec } from '@procwire/codec-arrow';
|
|
17
|
-
* import { ChannelBuilder } from '@procwire/transport';
|
|
18
35
|
*
|
|
19
36
|
* const codec = new ArrowCodec();
|
|
20
37
|
*
|
|
21
|
-
* // Create
|
|
38
|
+
* // Create an Arrow table
|
|
22
39
|
* const table = tableFromArrays({
|
|
40
|
+
* id: [1, 2, 3, 4, 5],
|
|
41
|
+
* name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
|
|
42
|
+
* score: [95.5, 87.3, 92.1, 88.7, 91.2]
|
|
43
|
+
* });
|
|
44
|
+
*
|
|
45
|
+
* // Serialize to IPC format
|
|
46
|
+
* const buffer = codec.serialize(table);
|
|
47
|
+
*
|
|
48
|
+
* // Deserialize back to Table
|
|
49
|
+
* const decoded = codec.deserialize(buffer);
|
|
50
|
+
* console.log(decoded.numRows); // 5
|
|
51
|
+
* ```
|
|
52
|
+
*
|
|
53
|
+
* ## IPC Formats
|
|
54
|
+
*
|
|
55
|
+
* Arrow supports two IPC formats:
|
|
56
|
+
*
|
|
57
|
+
* - **Stream format** (default): Smaller size, no footer, ideal for streaming/IPC
|
|
58
|
+
* - **File format**: Includes footer for random access, suitable for file storage
|
|
59
|
+
*
|
|
60
|
+
* ```ts
|
|
61
|
+
* // Stream format (default) - for IPC
|
|
62
|
+
* const streamCodec = new ArrowCodec({ format: 'stream' });
|
|
63
|
+
*
|
|
64
|
+
* // File format - for random access
|
|
65
|
+
* const fileCodec = new ArrowCodec({ format: 'file' });
|
|
66
|
+
* ```
|
|
67
|
+
*
|
|
68
|
+
* ## Integration with @procwire/transport
|
|
69
|
+
*
|
|
70
|
+
* ```ts
|
|
71
|
+
* import { ArrowCodec } from '@procwire/codec-arrow';
|
|
72
|
+
* import { RequestChannel } from '@procwire/transport/channel';
|
|
73
|
+
*
|
|
74
|
+
* const channel = new RequestChannel({
|
|
75
|
+
* transport,
|
|
76
|
+
* framing,
|
|
77
|
+
* serialization: new ArrowCodec(),
|
|
78
|
+
* protocol
|
|
79
|
+
* });
|
|
80
|
+
* ```
|
|
81
|
+
*
|
|
82
|
+
* @packageDocumentation
|
|
83
|
+
* @module codec-arrow
|
|
84
|
+
*/
|
|
85
|
+
export { ArrowCodec } from "./codec.js";
|
|
86
|
+
export type { ArrowCodecOptions, ArrowCodecMetrics, ArrowIPCFormat } from "./codec.js";
|
|
87
|
+
export { createFastArrowCodec, createMonitoredArrowCodec, createFileArrowCodec } from "./codec.js";
|
|
88
|
+
/**
|
|
89
|
+
* Re-export of Table from apache-arrow.
|
|
90
|
+
*
|
|
91
|
+
* The Table class is the primary data structure for Apache Arrow.
|
|
92
|
+
* It represents a two-dimensional dataset with named columns,
|
|
93
|
+
* similar to a DataFrame in pandas or R.
|
|
94
|
+
*
|
|
95
|
+
* @example Creating a Table
|
|
96
|
+
* ```ts
|
|
97
|
+
* import { tableFromArrays, Table } from 'apache-arrow';
|
|
98
|
+
*
|
|
99
|
+
* const table: Table = tableFromArrays({
|
|
23
100
|
* id: [1, 2, 3],
|
|
24
101
|
* name: ['Alice', 'Bob', 'Charlie']
|
|
25
102
|
* });
|
|
103
|
+
* ```
|
|
26
104
|
*
|
|
27
|
-
*
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
*
|
|
105
|
+
* @see {@link https://arrow.apache.org/docs/js/classes/Arrow_dom.Table.html | Apache Arrow Table documentation}
|
|
106
|
+
*/
|
|
107
|
+
export type { Table } from "apache-arrow";
|
|
108
|
+
/**
|
|
109
|
+
* Re-export of Schema from apache-arrow.
|
|
32
110
|
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
-
*
|
|
111
|
+
* The Schema class describes the structure of an Arrow Table,
|
|
112
|
+
* including column names, types, and metadata.
|
|
113
|
+
*
|
|
114
|
+
* @see {@link https://arrow.apache.org/docs/js/classes/Arrow_dom.Schema.html | Apache Arrow Schema documentation}
|
|
115
|
+
*/
|
|
116
|
+
export type { Schema } from "apache-arrow";
|
|
117
|
+
/**
|
|
118
|
+
* Re-export of Field from apache-arrow.
|
|
119
|
+
*
|
|
120
|
+
* The Field class represents a single column definition in a Schema,
|
|
121
|
+
* including the column name, data type, and nullability.
|
|
122
|
+
*
|
|
123
|
+
* @see {@link https://arrow.apache.org/docs/js/classes/Arrow_dom.Field.html | Apache Arrow Field documentation}
|
|
124
|
+
*/
|
|
125
|
+
export type { Field } from "apache-arrow";
|
|
126
|
+
/**
|
|
127
|
+
* Re-export of RecordBatch from apache-arrow.
|
|
128
|
+
*
|
|
129
|
+
* A RecordBatch is a chunk of a Table, containing a fixed number of rows
|
|
130
|
+
* with the same schema. Tables are composed of one or more RecordBatches.
|
|
131
|
+
*
|
|
132
|
+
* @see {@link https://arrow.apache.org/docs/js/classes/Arrow_dom.RecordBatch.html | Apache Arrow RecordBatch documentation}
|
|
36
133
|
*/
|
|
37
|
-
export
|
|
38
|
-
readonly name = "arrow";
|
|
39
|
-
readonly contentType = "application/vnd.apache.arrow.stream";
|
|
40
|
-
/**
|
|
41
|
-
* Serializes an Apache Arrow Table to IPC stream format.
|
|
42
|
-
*
|
|
43
|
-
* @param value - Arrow Table to serialize
|
|
44
|
-
* @returns Buffer containing Arrow IPC stream data
|
|
45
|
-
* @throws {SerializationError} if encoding fails
|
|
46
|
-
*/
|
|
47
|
-
serialize(value: Table): Buffer;
|
|
48
|
-
/**
|
|
49
|
-
* Deserializes Arrow IPC stream data to an Apache Arrow Table.
|
|
50
|
-
*
|
|
51
|
-
* @param buffer - Buffer containing Arrow IPC stream data
|
|
52
|
-
* @returns Deserialized Arrow Table
|
|
53
|
-
* @throws {SerializationError} if decoding fails
|
|
54
|
-
*/
|
|
55
|
-
deserialize(buffer: Buffer): Table;
|
|
56
|
-
}
|
|
134
|
+
export type { RecordBatch } from "apache-arrow";
|
|
57
135
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmFG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,YAAY,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAGvF,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAEnG;;;;;;;;;;;;;;;;;;GAkBG;AACH,YAAY,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAE1C;;;;;;;GAOG;AACH,YAAY,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE3C;;;;;;;GAOG;AACH,YAAY,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAE1C;;;;;;;GAOG;AACH,YAAY,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,72 +1,89 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Apache Arrow codec for @procwire/transport.
|
|
3
|
-
* Provides columnar data serialization using apache-arrow.
|
|
2
|
+
* Apache Arrow IPC serialization codec for @procwire/transport.
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
*
|
|
11
|
-
*
|
|
4
|
+
* Provides high-performance columnar data serialization using Apache Arrow,
|
|
5
|
+
* optimized for analytics workloads and large datasets. This codec implements
|
|
6
|
+
* the {@link SerializationCodec} interface for seamless integration with
|
|
7
|
+
* @procwire/transport channels.
|
|
8
|
+
*
|
|
9
|
+
* ## Features
|
|
10
|
+
*
|
|
11
|
+
* - **Zero-copy serialization** - Minimizes memory allocations and copies
|
|
12
|
+
* - **Columnar format** - Optimized for analytics and batch processing
|
|
13
|
+
* - **Large dataset support** - Efficiently handles millions of rows
|
|
14
|
+
* - **Cross-language compatibility** - Works with Python (PyArrow), R, Java, etc.
|
|
15
|
+
* - **Built-in metrics** - Optional monitoring of throughput and errors
|
|
16
|
+
* - **Configurable formats** - Stream (default) or file format
|
|
17
|
+
*
|
|
18
|
+
* ## When to Use Arrow
|
|
19
|
+
*
|
|
20
|
+
* Apache Arrow is ideal for:
|
|
21
|
+
* - Data analytics and processing pipelines
|
|
22
|
+
* - Transferring tabular data between processes
|
|
23
|
+
* - Interoperability with data science tools (pandas, R, Spark)
|
|
24
|
+
* - High-throughput, low-latency data transfer
|
|
25
|
+
* - Large datasets where columnar access patterns dominate
|
|
26
|
+
*
|
|
27
|
+
* For small messages or non-tabular data, consider {@link @procwire/codec-msgpack}
|
|
28
|
+
* or {@link @procwire/codec-protobuf} instead.
|
|
29
|
+
*
|
|
30
|
+
* ## Quick Start
|
|
12
31
|
*
|
|
13
|
-
* @example
|
|
14
32
|
* ```ts
|
|
15
33
|
* import { tableFromArrays } from 'apache-arrow';
|
|
16
34
|
* import { ArrowCodec } from '@procwire/codec-arrow';
|
|
17
|
-
* import { ChannelBuilder } from '@procwire/transport';
|
|
18
35
|
*
|
|
19
36
|
* const codec = new ArrowCodec();
|
|
20
37
|
*
|
|
21
|
-
* // Create
|
|
38
|
+
* // Create an Arrow table
|
|
22
39
|
* const table = tableFromArrays({
|
|
23
|
-
* id: [1, 2, 3],
|
|
24
|
-
* name: ['Alice', 'Bob', 'Charlie']
|
|
40
|
+
* id: [1, 2, 3, 4, 5],
|
|
41
|
+
* name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
|
|
42
|
+
* score: [95.5, 87.3, 92.1, 88.7, 91.2]
|
|
25
43
|
* });
|
|
26
44
|
*
|
|
27
|
-
* //
|
|
28
|
-
* const
|
|
29
|
-
* .withSerialization(codec)
|
|
30
|
-
* // ... other configuration
|
|
31
|
-
* .build();
|
|
45
|
+
* // Serialize to IPC format
|
|
46
|
+
* const buffer = codec.serialize(table);
|
|
32
47
|
*
|
|
33
|
-
* //
|
|
34
|
-
*
|
|
48
|
+
* // Deserialize back to Table
|
|
49
|
+
* const decoded = codec.deserialize(buffer);
|
|
50
|
+
* console.log(decoded.numRows); // 5
|
|
35
51
|
* ```
|
|
52
|
+
*
|
|
53
|
+
* ## IPC Formats
|
|
54
|
+
*
|
|
55
|
+
* Arrow supports two IPC formats:
|
|
56
|
+
*
|
|
57
|
+
* - **Stream format** (default): Smaller size, no footer, ideal for streaming/IPC
|
|
58
|
+
* - **File format**: Includes footer for random access, suitable for file storage
|
|
59
|
+
*
|
|
60
|
+
* ```ts
|
|
61
|
+
* // Stream format (default) - for IPC
|
|
62
|
+
* const streamCodec = new ArrowCodec({ format: 'stream' });
|
|
63
|
+
*
|
|
64
|
+
* // File format - for random access
|
|
65
|
+
* const fileCodec = new ArrowCodec({ format: 'file' });
|
|
66
|
+
* ```
|
|
67
|
+
*
|
|
68
|
+
* ## Integration with @procwire/transport
|
|
69
|
+
*
|
|
70
|
+
* ```ts
|
|
71
|
+
* import { ArrowCodec } from '@procwire/codec-arrow';
|
|
72
|
+
* import { RequestChannel } from '@procwire/transport/channel';
|
|
73
|
+
*
|
|
74
|
+
* const channel = new RequestChannel({
|
|
75
|
+
* transport,
|
|
76
|
+
* framing,
|
|
77
|
+
* serialization: new ArrowCodec(),
|
|
78
|
+
* protocol
|
|
79
|
+
* });
|
|
80
|
+
* ```
|
|
81
|
+
*
|
|
82
|
+
* @packageDocumentation
|
|
83
|
+
* @module codec-arrow
|
|
36
84
|
*/
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
* Serializes an Apache Arrow Table to IPC stream format.
|
|
42
|
-
*
|
|
43
|
-
* @param value - Arrow Table to serialize
|
|
44
|
-
* @returns Buffer containing Arrow IPC stream data
|
|
45
|
-
* @throws {SerializationError} if encoding fails
|
|
46
|
-
*/
|
|
47
|
-
serialize(value) {
|
|
48
|
-
try {
|
|
49
|
-
const uint8array = tableToIPC(value);
|
|
50
|
-
return Buffer.from(uint8array);
|
|
51
|
-
}
|
|
52
|
-
catch (error) {
|
|
53
|
-
throw new SerializationError(`Failed to encode Arrow table: ${error instanceof Error ? error.message : String(error)}`, error);
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
/**
|
|
57
|
-
* Deserializes Arrow IPC stream data to an Apache Arrow Table.
|
|
58
|
-
*
|
|
59
|
-
* @param buffer - Buffer containing Arrow IPC stream data
|
|
60
|
-
* @returns Deserialized Arrow Table
|
|
61
|
-
* @throws {SerializationError} if decoding fails
|
|
62
|
-
*/
|
|
63
|
-
deserialize(buffer) {
|
|
64
|
-
try {
|
|
65
|
-
return tableFromIPC(buffer);
|
|
66
|
-
}
|
|
67
|
-
catch (error) {
|
|
68
|
-
throw new SerializationError(`Failed to decode Arrow table: ${error instanceof Error ? error.message : String(error)}`, error);
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
}
|
|
85
|
+
// Main codec class and options
|
|
86
|
+
export { ArrowCodec } from "./codec.js";
|
|
87
|
+
// Helper functions
|
|
88
|
+
export { createFastArrowCodec, createMonitoredArrowCodec, createFileArrowCodec } from "./codec.js";
|
|
72
89
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmFG;AAEH,+BAA+B;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAGxC,mBAAmB;AACnB,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC"}
|