@procwire/codec-arrow 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,57 +1,135 @@
1
1
  /**
2
- * Apache Arrow codec for @procwire/transport.
3
- * Provides columnar data serialization using apache-arrow.
2
+ * Apache Arrow IPC serialization codec for @procwire/transport.
4
3
  *
5
- * @module Codec Arrow
6
- */
7
- import type { Table } from "apache-arrow";
8
- import type { SerializationCodec } from "@procwire/transport/serialization";
9
- /**
10
- * Apache Arrow serialization codec.
11
- * Implements efficient columnar data serialization ideal for analytical workloads.
4
+ * Provides high-performance columnar data serialization using Apache Arrow,
5
+ * optimized for analytics workloads and large datasets. This codec implements
6
+ * the {@link SerializationCodec} interface for seamless integration with
7
+ * @procwire/transport channels.
8
+ *
9
+ * ## Features
10
+ *
11
+ * - **Zero-copy serialization** - Minimizes memory allocations and copies
12
+ * - **Columnar format** - Optimized for analytics and batch processing
13
+ * - **Large dataset support** - Efficiently handles millions of rows
14
+ * - **Cross-language compatibility** - Works with Python (PyArrow), R, Java, etc.
15
+ * - **Built-in metrics** - Optional monitoring of throughput and errors
16
+ * - **Configurable formats** - Stream (default) or file format
17
+ *
18
+ * ## When to Use Arrow
19
+ *
20
+ * Apache Arrow is ideal for:
21
+ * - Data analytics and processing pipelines
22
+ * - Transferring tabular data between processes
23
+ * - Interoperability with data science tools (pandas, R, Spark)
24
+ * - High-throughput, low-latency data transfer
25
+ * - Large datasets where columnar access patterns dominate
26
+ *
27
+ * For small messages or non-tabular data, consider {@link @procwire/codec-msgpack}
28
+ * or {@link @procwire/codec-protobuf} instead.
29
+ *
30
+ * ## Quick Start
12
31
  *
13
- * @example
14
32
  * ```ts
15
33
  * import { tableFromArrays } from 'apache-arrow';
16
34
  * import { ArrowCodec } from '@procwire/codec-arrow';
17
- * import { ChannelBuilder } from '@procwire/transport';
18
35
  *
19
36
  * const codec = new ArrowCodec();
20
37
  *
21
- * // Create a table
38
+ * // Create an Arrow table
22
39
  * const table = tableFromArrays({
40
+ * id: [1, 2, 3, 4, 5],
41
+ * name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
42
+ * score: [95.5, 87.3, 92.1, 88.7, 91.2]
43
+ * });
44
+ *
45
+ * // Serialize to IPC format
46
+ * const buffer = codec.serialize(table);
47
+ *
48
+ * // Deserialize back to Table
49
+ * const decoded = codec.deserialize(buffer);
50
+ * console.log(decoded.numRows); // 5
51
+ * ```
52
+ *
53
+ * ## IPC Formats
54
+ *
55
+ * Arrow supports two IPC formats:
56
+ *
57
+ * - **Stream format** (default): Smaller size, no footer, ideal for streaming/IPC
58
+ * - **File format**: Includes footer for random access, suitable for file storage
59
+ *
60
+ * ```ts
61
+ * // Stream format (default) - for IPC
62
+ * const streamCodec = new ArrowCodec({ format: 'stream' });
63
+ *
64
+ * // File format - for random access
65
+ * const fileCodec = new ArrowCodec({ format: 'file' });
66
+ * ```
67
+ *
68
+ * ## Integration with @procwire/transport
69
+ *
70
+ * ```ts
71
+ * import { ArrowCodec } from '@procwire/codec-arrow';
72
+ * import { RequestChannel } from '@procwire/transport/channel';
73
+ *
74
+ * const channel = new RequestChannel({
75
+ * transport,
76
+ * framing,
77
+ * serialization: new ArrowCodec(),
78
+ * protocol
79
+ * });
80
+ * ```
81
+ *
82
+ * @packageDocumentation
83
+ * @module codec-arrow
84
+ */
85
+ export { ArrowCodec } from "./codec.js";
86
+ export type { ArrowCodecOptions, ArrowCodecMetrics, ArrowIPCFormat } from "./codec.js";
87
+ export { createFastArrowCodec, createMonitoredArrowCodec, createFileArrowCodec } from "./codec.js";
88
+ /**
89
+ * Re-export of Table from apache-arrow.
90
+ *
91
+ * The Table class is the primary data structure for Apache Arrow.
92
+ * It represents a two-dimensional dataset with named columns,
93
+ * similar to a DataFrame in pandas or R.
94
+ *
95
+ * @example Creating a Table
96
+ * ```ts
97
+ * import { tableFromArrays, Table } from 'apache-arrow';
98
+ *
99
+ * const table: Table = tableFromArrays({
23
100
  * id: [1, 2, 3],
24
101
  * name: ['Alice', 'Bob', 'Charlie']
25
102
  * });
103
+ * ```
26
104
  *
27
- * // Use with channel
28
- * const channel = new ChannelBuilder()
29
- * .withSerialization(codec)
30
- * // ... other configuration
31
- * .build();
105
+ * @see {@link https://arrow.apache.org/docs/js/classes/Arrow_dom.Table.html | Apache Arrow Table documentation}
106
+ */
107
+ export type { Table } from "apache-arrow";
108
+ /**
109
+ * Re-export of Schema from apache-arrow.
32
110
  *
33
- * // Send table over channel
34
- * await channel.request('process', table);
35
- * ```
111
+ * The Schema class describes the structure of an Arrow Table,
112
+ * including column names, types, and metadata.
113
+ *
114
+ * @see {@link https://arrow.apache.org/docs/js/classes/Arrow_dom.Schema.html | Apache Arrow Schema documentation}
115
+ */
116
+ export type { Schema } from "apache-arrow";
117
+ /**
118
+ * Re-export of Field from apache-arrow.
119
+ *
120
+ * The Field class represents a single column definition in a Schema,
121
+ * including the column name, data type, and nullability.
122
+ *
123
+ * @see {@link https://arrow.apache.org/docs/js/classes/Arrow_dom.Field.html | Apache Arrow Field documentation}
124
+ */
125
+ export type { Field } from "apache-arrow";
126
+ /**
127
+ * Re-export of RecordBatch from apache-arrow.
128
+ *
129
+ * A RecordBatch is a chunk of a Table, containing a fixed number of rows
130
+ * with the same schema. Tables are composed of one or more RecordBatches.
131
+ *
132
+ * @see {@link https://arrow.apache.org/docs/js/classes/Arrow_dom.RecordBatch.html | Apache Arrow RecordBatch documentation}
36
133
  */
37
- export declare class ArrowCodec implements SerializationCodec<Table> {
38
- readonly name = "arrow";
39
- readonly contentType = "application/vnd.apache.arrow.stream";
40
- /**
41
- * Serializes an Apache Arrow Table to IPC stream format.
42
- *
43
- * @param value - Arrow Table to serialize
44
- * @returns Buffer containing Arrow IPC stream data
45
- * @throws {SerializationError} if encoding fails
46
- */
47
- serialize(value: Table): Buffer;
48
- /**
49
- * Deserializes Arrow IPC stream data to an Apache Arrow Table.
50
- *
51
- * @param buffer - Buffer containing Arrow IPC stream data
52
- * @returns Deserialized Arrow Table
53
- * @throws {SerializationError} if decoding fails
54
- */
55
- deserialize(buffer: Buffer): Table;
56
- }
134
+ export type { RecordBatch } from "apache-arrow";
57
135
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAE1C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AAG5E;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,qBAAa,UAAW,YAAW,kBAAkB,CAAC,KAAK,CAAC;IAC1D,QAAQ,CAAC,IAAI,WAAW;IACxB,QAAQ,CAAC,WAAW,yCAAyC;IAE7D;;;;;;OAMG;IACH,SAAS,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM;IAY/B;;;;;;OAMG;IACH,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK;CAUnC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmFG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,YAAY,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAGvF,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAEnG;;;;;;;;;;;;;;;;;;GAkBG;AACH,YAAY,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAE1C;;;;;;;GAOG;AACH,YAAY,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE3C;;;;;;;GAOG;AACH,YAAY,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAE1C;;;;;;;GAOG;AACH,YAAY,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC"}
package/dist/index.js CHANGED
@@ -1,72 +1,89 @@
1
1
  /**
2
- * Apache Arrow codec for @procwire/transport.
3
- * Provides columnar data serialization using apache-arrow.
2
+ * Apache Arrow IPC serialization codec for @procwire/transport.
4
3
  *
5
- * @module Codec Arrow
6
- */
7
- import { tableFromIPC, tableToIPC } from "apache-arrow";
8
- import { SerializationError } from "@procwire/transport";
9
- /**
10
- * Apache Arrow serialization codec.
11
- * Implements efficient columnar data serialization ideal for analytical workloads.
4
+ * Provides high-performance columnar data serialization using Apache Arrow,
5
+ * optimized for analytics workloads and large datasets. This codec implements
6
+ * the {@link SerializationCodec} interface for seamless integration with
7
+ * @procwire/transport channels.
8
+ *
9
+ * ## Features
10
+ *
11
+ * - **Zero-copy serialization** - Minimizes memory allocations and copies
12
+ * - **Columnar format** - Optimized for analytics and batch processing
13
+ * - **Large dataset support** - Efficiently handles millions of rows
14
+ * - **Cross-language compatibility** - Works with Python (PyArrow), R, Java, etc.
15
+ * - **Built-in metrics** - Optional monitoring of throughput and errors
16
+ * - **Configurable formats** - Stream (default) or file format
17
+ *
18
+ * ## When to Use Arrow
19
+ *
20
+ * Apache Arrow is ideal for:
21
+ * - Data analytics and processing pipelines
22
+ * - Transferring tabular data between processes
23
+ * - Interoperability with data science tools (pandas, R, Spark)
24
+ * - High-throughput, low-latency data transfer
25
+ * - Large datasets where columnar access patterns dominate
26
+ *
27
+ * For small messages or non-tabular data, consider {@link @procwire/codec-msgpack}
28
+ * or {@link @procwire/codec-protobuf} instead.
29
+ *
30
+ * ## Quick Start
12
31
  *
13
- * @example
14
32
  * ```ts
15
33
  * import { tableFromArrays } from 'apache-arrow';
16
34
  * import { ArrowCodec } from '@procwire/codec-arrow';
17
- * import { ChannelBuilder } from '@procwire/transport';
18
35
  *
19
36
  * const codec = new ArrowCodec();
20
37
  *
21
- * // Create a table
38
+ * // Create an Arrow table
22
39
  * const table = tableFromArrays({
23
- * id: [1, 2, 3],
24
- * name: ['Alice', 'Bob', 'Charlie']
40
+ * id: [1, 2, 3, 4, 5],
41
+ * name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
42
+ * score: [95.5, 87.3, 92.1, 88.7, 91.2]
25
43
  * });
26
44
  *
27
- * // Use with channel
28
- * const channel = new ChannelBuilder()
29
- * .withSerialization(codec)
30
- * // ... other configuration
31
- * .build();
45
+ * // Serialize to IPC format
46
+ * const buffer = codec.serialize(table);
32
47
  *
33
- * // Send table over channel
34
- * await channel.request('process', table);
48
+ * // Deserialize back to Table
49
+ * const decoded = codec.deserialize(buffer);
50
+ * console.log(decoded.numRows); // 5
35
51
  * ```
52
+ *
53
+ * ## IPC Formats
54
+ *
55
+ * Arrow supports two IPC formats:
56
+ *
57
+ * - **Stream format** (default): Smaller size, no footer, ideal for streaming/IPC
58
+ * - **File format**: Includes footer for random access, suitable for file storage
59
+ *
60
+ * ```ts
61
+ * // Stream format (default) - for IPC
62
+ * const streamCodec = new ArrowCodec({ format: 'stream' });
63
+ *
64
+ * // File format - for random access
65
+ * const fileCodec = new ArrowCodec({ format: 'file' });
66
+ * ```
67
+ *
68
+ * ## Integration with @procwire/transport
69
+ *
70
+ * ```ts
71
+ * import { ArrowCodec } from '@procwire/codec-arrow';
72
+ * import { RequestChannel } from '@procwire/transport/channel';
73
+ *
74
+ * const channel = new RequestChannel({
75
+ * transport,
76
+ * framing,
77
+ * serialization: new ArrowCodec(),
78
+ * protocol
79
+ * });
80
+ * ```
81
+ *
82
+ * @packageDocumentation
83
+ * @module codec-arrow
36
84
  */
37
- export class ArrowCodec {
38
- name = "arrow";
39
- contentType = "application/vnd.apache.arrow.stream";
40
- /**
41
- * Serializes an Apache Arrow Table to IPC stream format.
42
- *
43
- * @param value - Arrow Table to serialize
44
- * @returns Buffer containing Arrow IPC stream data
45
- * @throws {SerializationError} if encoding fails
46
- */
47
- serialize(value) {
48
- try {
49
- const uint8array = tableToIPC(value);
50
- return Buffer.from(uint8array);
51
- }
52
- catch (error) {
53
- throw new SerializationError(`Failed to encode Arrow table: ${error instanceof Error ? error.message : String(error)}`, error);
54
- }
55
- }
56
- /**
57
- * Deserializes Arrow IPC stream data to an Apache Arrow Table.
58
- *
59
- * @param buffer - Buffer containing Arrow IPC stream data
60
- * @returns Deserialized Arrow Table
61
- * @throws {SerializationError} if decoding fails
62
- */
63
- deserialize(buffer) {
64
- try {
65
- return tableFromIPC(buffer);
66
- }
67
- catch (error) {
68
- throw new SerializationError(`Failed to decode Arrow table: ${error instanceof Error ? error.message : String(error)}`, error);
69
- }
70
- }
71
- }
85
+ // Main codec class and options
86
+ export { ArrowCodec } from "./codec.js";
87
+ // Helper functions
88
+ export { createFastArrowCodec, createMonitoredArrowCodec, createFileArrowCodec } from "./codec.js";
72
89
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAExD,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAM,OAAO,UAAU;IACZ,IAAI,GAAG,OAAO,CAAC;IACf,WAAW,GAAG,qCAAqC,CAAC;IAE7D;;;;;;OAMG;IACH,SAAS,CAAC,KAAY;QACpB,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;YACrC,OAAO,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACjC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAC1B,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,EACzF,KAAK,CACN,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,WAAW,CAAC,MAAc;QACxB,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;QAC9B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAC1B,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,EACzF,KAAK,CACN,CAAC;QACJ,CAAC;IACH,CAAC;CACF"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmFG;AAEH,+BAA+B;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAGxC,mBAAmB;AACnB,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@procwire/codec-arrow",
3
- "version": "0.1.3",
3
+ "version": "0.2.1",
4
4
  "description": "Apache Arrow IPC codec for @procwire/transport.",
5
5
  "keywords": [
6
6
  "ipc",
@@ -47,7 +47,7 @@
47
47
  "provenance": true
48
48
  },
49
49
  "dependencies": {
50
- "@procwire/transport": "0.1.3"
50
+ "@procwire/transport": "0.2.0"
51
51
  },
52
52
  "peerDependencies": {
53
53
  "apache-arrow": "^21.0.0"