@query-farm/vgi-rpc 0.6.4 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-log.d.ts +55 -0
- package/dist/access-log.d.ts.map +1 -0
- package/dist/arrow/impl-arrowjs/index.d.ts +96 -0
- package/dist/arrow/impl-arrowjs/index.d.ts.map +1 -0
- package/dist/arrow/impl-flechette/index.d.ts +102 -0
- package/dist/arrow/impl-flechette/index.d.ts.map +1 -0
- package/dist/arrow/impl-flechette/message-meta.d.ts +11 -0
- package/dist/arrow/impl-flechette/message-meta.d.ts.map +1 -0
- package/dist/arrow/index.d.ts +4 -0
- package/dist/arrow/index.d.ts.map +1 -0
- package/dist/arrow/predicates.d.ts +44 -0
- package/dist/arrow/predicates.d.ts.map +1 -0
- package/dist/arrow/types.d.ts +62 -0
- package/dist/arrow/types.d.ts.map +1 -0
- package/dist/auth.d.ts +5 -0
- package/dist/auth.d.ts.map +1 -1
- package/dist/client/capabilities.d.ts +25 -0
- package/dist/client/capabilities.d.ts.map +1 -0
- package/dist/client/connect.d.ts +10 -0
- package/dist/client/connect.d.ts.map +1 -1
- package/dist/client/introspect.d.ts +21 -0
- package/dist/client/introspect.d.ts.map +1 -1
- package/dist/client/ipc.d.ts +8 -2
- package/dist/client/ipc.d.ts.map +1 -1
- package/dist/client/oauth.d.ts +9 -0
- package/dist/client/oauth.d.ts.map +1 -1
- package/dist/client/pipe.d.ts +24 -0
- package/dist/client/pipe.d.ts.map +1 -1
- package/dist/client/stream.d.ts +19 -2
- package/dist/client/stream.d.ts.map +1 -1
- package/dist/client/types.d.ts +23 -0
- package/dist/client/types.d.ts.map +1 -1
- package/dist/client/uploadUrl.d.ts +25 -0
- package/dist/client/uploadUrl.d.ts.map +1 -0
- package/dist/constants.d.ts +30 -2
- package/dist/constants.d.ts.map +1 -1
- package/dist/crypto.d.ts +22 -0
- package/dist/crypto.d.ts.map +1 -0
- package/dist/dispatch/describe.d.ts +10 -6
- package/dist/dispatch/describe.d.ts.map +1 -1
- package/dist/dispatch/stream.d.ts +2 -2
- package/dist/dispatch/stream.d.ts.map +1 -1
- package/dist/dispatch/unary.d.ts +2 -2
- package/dist/dispatch/unary.d.ts.map +1 -1
- package/dist/errors.d.ts +64 -1
- package/dist/errors.d.ts.map +1 -1
- package/dist/external.d.ts +27 -5
- package/dist/external.d.ts.map +1 -1
- package/dist/http/auth.d.ts +13 -0
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/bearer.d.ts.map +1 -1
- package/dist/http/common.d.ts +43 -7
- package/dist/http/common.d.ts.map +1 -1
- package/dist/http/dispatch.d.ts +20 -2
- package/dist/http/dispatch.d.ts.map +1 -1
- package/dist/http/handler.d.ts.map +1 -1
- package/dist/http/index.d.ts +1 -0
- package/dist/http/index.d.ts.map +1 -1
- package/dist/http/jwt.d.ts +1 -0
- package/dist/http/jwt.d.ts.map +1 -1
- package/dist/http/mtls.d.ts +9 -1
- package/dist/http/mtls.d.ts.map +1 -1
- package/dist/http/oauth-pkce.d.ts +141 -0
- package/dist/http/oauth-pkce.d.ts.map +1 -0
- package/dist/http/pages.d.ts +3 -0
- package/dist/http/pages.d.ts.map +1 -1
- package/dist/http/sticky.d.ts +124 -0
- package/dist/http/sticky.d.ts.map +1 -0
- package/dist/http/token.d.ts +43 -12
- package/dist/http/token.d.ts.map +1 -1
- package/dist/http/types.d.ts +68 -5
- package/dist/http/types.d.ts.map +1 -1
- package/dist/index.d.ts +6 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1275 -3511
- package/dist/index.js.map +20 -38
- package/dist/launcher/hash.d.ts +22 -0
- package/dist/launcher/hash.d.ts.map +1 -0
- package/dist/launcher/index.d.ts +23 -0
- package/dist/launcher/index.d.ts.map +1 -0
- package/dist/launcher/launch.d.ts +27 -0
- package/dist/launcher/launch.d.ts.map +1 -0
- package/dist/launcher/lock.d.ts +19 -0
- package/dist/launcher/lock.d.ts.map +1 -0
- package/dist/launcher/serve-unix.d.ts +55 -0
- package/dist/launcher/serve-unix.d.ts.map +1 -0
- package/dist/launcher/state.d.ts +71 -0
- package/dist/launcher/state.d.ts.map +1 -0
- package/dist/otel.d.ts.map +1 -1
- package/dist/protocol.d.ts +19 -2
- package/dist/protocol.d.ts.map +1 -1
- package/dist/schema.d.ts +45 -18
- package/dist/schema.d.ts.map +1 -1
- package/dist/server.d.ts +23 -2
- package/dist/server.d.ts.map +1 -1
- package/dist/types.d.ts +270 -12
- package/dist/types.d.ts.map +1 -1
- package/dist/util/gzip.d.ts +10 -0
- package/dist/util/gzip.d.ts.map +1 -0
- package/dist/util/schema.d.ts +3 -15
- package/dist/util/schema.d.ts.map +1 -1
- package/dist/util/web-crypto.d.ts +22 -0
- package/dist/util/web-crypto.d.ts.map +1 -0
- package/dist/util/zstd.d.ts +26 -3
- package/dist/util/zstd.d.ts.map +1 -1
- package/dist/wire/opaque.d.ts +11 -0
- package/dist/wire/opaque.d.ts.map +1 -0
- package/dist/wire/reader.d.ts +5 -5
- package/dist/wire/reader.d.ts.map +1 -1
- package/dist/wire/request.d.ts +11 -3
- package/dist/wire/request.d.ts.map +1 -1
- package/dist/wire/response.d.ts +6 -6
- package/dist/wire/response.d.ts.map +1 -1
- package/dist/wire/writer.d.ts +49 -39
- package/dist/wire/writer.d.ts.map +1 -1
- package/package.json +35 -21
- package/src/access-log.ts +200 -0
- package/src/arrow/impl-arrowjs/index.ts +433 -0
- package/src/arrow/impl-flechette/index.ts +414 -0
- package/src/arrow/impl-flechette/message-meta.ts +174 -0
- package/src/arrow/index.ts +89 -0
- package/src/arrow/predicates.ts +56 -0
- package/src/arrow/types.ts +73 -0
- package/src/auth.ts +5 -0
- package/src/client/capabilities.ts +84 -0
- package/src/client/connect.ts +113 -26
- package/src/client/introspect.ts +74 -38
- package/src/client/ipc.ts +37 -27
- package/src/client/oauth.ts +9 -0
- package/src/client/pipe.ts +36 -9
- package/src/client/stream.ts +43 -20
- package/src/client/types.ts +23 -0
- package/src/client/uploadUrl.ts +169 -0
- package/src/constants.ts +34 -2
- package/src/crypto.ts +95 -0
- package/src/dispatch/describe.ts +146 -107
- package/src/dispatch/stream.ts +53 -24
- package/src/dispatch/unary.ts +5 -4
- package/src/errors.ts +87 -0
- package/src/external.ts +49 -30
- package/src/http/auth.ts +13 -0
- package/src/http/bearer.ts +2 -5
- package/src/http/common.ts +91 -23
- package/src/http/dispatch.ts +373 -46
- package/src/http/handler.ts +790 -68
- package/src/http/index.ts +1 -0
- package/src/http/jwt.ts +1 -0
- package/src/http/mtls.ts +25 -3
- package/src/http/oauth-pkce.ts +1035 -0
- package/src/http/pages.ts +30 -15
- package/src/http/sticky.ts +429 -0
- package/src/http/token.ts +170 -75
- package/src/http/types.ts +69 -5
- package/src/index.ts +40 -1
- package/src/launcher/hash.ts +104 -0
- package/src/launcher/index.ts +35 -0
- package/src/launcher/launch.ts +284 -0
- package/src/launcher/lock.ts +171 -0
- package/src/launcher/serve-unix.ts +386 -0
- package/src/launcher/state.ts +257 -0
- package/src/otel.ts +39 -33
- package/src/protocol.ts +30 -3
- package/src/schema.ts +107 -56
- package/src/server.ts +196 -20
- package/src/types.ts +376 -18
- package/src/util/gzip.ts +63 -0
- package/src/util/schema.ts +4 -22
- package/src/util/web-crypto.ts +98 -0
- package/src/util/zstd.ts +133 -14
- package/src/wire/opaque.ts +37 -0
- package/src/wire/reader.ts +5 -4
- package/src/wire/request.ts +67 -8
- package/src/wire/response.ts +51 -85
- package/src/wire/writer.ts +165 -69
- package/dist/util/conform.d.ts +0 -18
- package/dist/util/conform.d.ts.map +0 -1
- package/src/util/conform.ts +0 -94
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
// arrow-js backend for vgi-rpc-typescript's Arrow facade.
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
Binary as A_Binary,
|
|
5
|
+
Bool as A_Bool,
|
|
6
|
+
Data as A_Data,
|
|
7
|
+
type DataType as A_DataType,
|
|
8
|
+
DataType as A_DataTypeNS,
|
|
9
|
+
DateDay as A_DateDay,
|
|
10
|
+
Decimal as A_Decimal,
|
|
11
|
+
Dictionary as A_Dictionary,
|
|
12
|
+
DurationMicrosecond as A_DurationMicrosecond,
|
|
13
|
+
Field as A_Field,
|
|
14
|
+
FixedSizeBinary as A_FixedSizeBinary,
|
|
15
|
+
Float32 as A_Float32,
|
|
16
|
+
Float64 as A_Float64,
|
|
17
|
+
Int8 as A_Int8,
|
|
18
|
+
Int16 as A_Int16,
|
|
19
|
+
Int32 as A_Int32,
|
|
20
|
+
Int64 as A_Int64,
|
|
21
|
+
LargeBinary as A_LargeBinary,
|
|
22
|
+
LargeUtf8 as A_LargeUtf8,
|
|
23
|
+
List as A_List,
|
|
24
|
+
Map_ as A_Map,
|
|
25
|
+
Null as A_Null,
|
|
26
|
+
RecordBatch as A_RecordBatch,
|
|
27
|
+
Schema as A_Schema,
|
|
28
|
+
Struct as A_Struct,
|
|
29
|
+
TimeMicrosecond as A_TimeMicrosecond,
|
|
30
|
+
Timestamp as A_Timestamp,
|
|
31
|
+
TimeUnit as A_TimeUnit,
|
|
32
|
+
Type as A_Type,
|
|
33
|
+
Uint8 as A_Uint8,
|
|
34
|
+
Uint16 as A_Uint16,
|
|
35
|
+
Uint32 as A_Uint32,
|
|
36
|
+
Uint64 as A_Uint64,
|
|
37
|
+
Utf8 as A_Utf8,
|
|
38
|
+
makeData as a_makeData,
|
|
39
|
+
vectorFromArray as a_vectorFromArray,
|
|
40
|
+
RecordBatchReader,
|
|
41
|
+
RecordBatchStreamWriter,
|
|
42
|
+
} from "@query-farm/apache-arrow";
|
|
43
|
+
|
|
44
|
+
// Local type-only helpers used by conformBatchToSchema.
|
|
45
|
+
type _NeedsCast = (src: A_DataType, dst: A_DataType) => boolean;
|
|
46
|
+
|
|
47
|
+
import type {
|
|
48
|
+
IncrementalEncoder,
|
|
49
|
+
VgiBackendInfo,
|
|
50
|
+
VgiBatch,
|
|
51
|
+
VgiColumnData,
|
|
52
|
+
VgiDataType,
|
|
53
|
+
VgiField,
|
|
54
|
+
VgiSchema,
|
|
55
|
+
} from "../types.js";
|
|
56
|
+
|
|
57
|
+
export const backend: VgiBackendInfo = { name: "arrow-js", opaquePassthrough: true };
|
|
58
|
+
|
|
59
|
+
// ----- Type factories ------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
export const nullType = (): VgiDataType => new A_Null() as unknown as VgiDataType;
|
|
62
|
+
export const bool = (): VgiDataType => new A_Bool() as unknown as VgiDataType;
|
|
63
|
+
export const int8 = (): VgiDataType => new A_Int8() as unknown as VgiDataType;
|
|
64
|
+
export const int16 = (): VgiDataType => new A_Int16() as unknown as VgiDataType;
|
|
65
|
+
export const int32 = (): VgiDataType => new A_Int32() as unknown as VgiDataType;
|
|
66
|
+
export const int64 = (): VgiDataType => new A_Int64() as unknown as VgiDataType;
|
|
67
|
+
export const uint8 = (): VgiDataType => new A_Uint8() as unknown as VgiDataType;
|
|
68
|
+
export const uint16 = (): VgiDataType => new A_Uint16() as unknown as VgiDataType;
|
|
69
|
+
export const uint32 = (): VgiDataType => new A_Uint32() as unknown as VgiDataType;
|
|
70
|
+
export const uint64 = (): VgiDataType => new A_Uint64() as unknown as VgiDataType;
|
|
71
|
+
export const float32 = (): VgiDataType => new A_Float32() as unknown as VgiDataType;
|
|
72
|
+
export const float64 = (): VgiDataType => new A_Float64() as unknown as VgiDataType;
|
|
73
|
+
export const utf8 = (): VgiDataType => new A_Utf8() as unknown as VgiDataType;
|
|
74
|
+
export const binary = (): VgiDataType => new A_Binary() as unknown as VgiDataType;
|
|
75
|
+
|
|
76
|
+
/** Microsecond Timestamp with optional timezone. */
|
|
77
|
+
export const timestampMicro = (timezone: string | null = null): VgiDataType =>
|
|
78
|
+
new A_Timestamp(A_TimeUnit.MICROSECOND, timezone) as unknown as VgiDataType;
|
|
79
|
+
|
|
80
|
+
/** Date32 with day resolution. */
|
|
81
|
+
export const dateDay = (): VgiDataType => new A_DateDay() as unknown as VgiDataType;
|
|
82
|
+
/** Time64 with microsecond resolution. */
|
|
83
|
+
export const timeMicro = (): VgiDataType => new A_TimeMicrosecond() as unknown as VgiDataType;
|
|
84
|
+
/** Duration with microsecond resolution. */
|
|
85
|
+
export const durationMicro = (): VgiDataType => new A_DurationMicrosecond() as unknown as VgiDataType;
|
|
86
|
+
/** Decimal128 by default; pass bitWidth=256 for Decimal256. */
|
|
87
|
+
export const decimal = (precision: number, scale: number, bitWidth: 128 | 256 = 128): VgiDataType =>
|
|
88
|
+
new A_Decimal(scale, precision, bitWidth) as unknown as VgiDataType;
|
|
89
|
+
/** FixedSizeBinary with the given byte width. */
|
|
90
|
+
export const fixedSizeBinary = (byteWidth: number): VgiDataType =>
|
|
91
|
+
new A_FixedSizeBinary(byteWidth) as unknown as VgiDataType;
|
|
92
|
+
/** LargeUtf8 — 64-bit-offset UTF-8 string. */
|
|
93
|
+
export const largeUtf8 = (): VgiDataType => new A_LargeUtf8() as unknown as VgiDataType;
|
|
94
|
+
/** LargeBinary — 64-bit-offset binary blob. */
|
|
95
|
+
export const largeBinary = (): VgiDataType => new A_LargeBinary() as unknown as VgiDataType;
|
|
96
|
+
/** List of `child` items. The child field carries name + nullability + type. */
|
|
97
|
+
export const list = (child: VgiField): VgiDataType => new A_List(child as unknown as A_Field) as unknown as VgiDataType;
|
|
98
|
+
/** Struct of `fields`. */
|
|
99
|
+
export const struct = (fields: readonly VgiField[]): VgiDataType =>
|
|
100
|
+
new A_Struct(fields as unknown as A_Field[]) as unknown as VgiDataType;
|
|
101
|
+
/** Map (key → value) carried as a List<Struct<key,value>>. arrow-js's Map_
|
|
102
|
+
* constructor takes a child Field whose type is a Struct of [key, value]. */
|
|
103
|
+
export const map = (keyField: VgiField, valueField: VgiField, keysSorted = false): VgiDataType => {
|
|
104
|
+
const k = keyField as unknown as A_Field;
|
|
105
|
+
const v = valueField as unknown as A_Field;
|
|
106
|
+
const entriesField = new A_Field("entries", new A_Struct([k, v]), /* nullable */ false);
|
|
107
|
+
return new A_Map(entriesField, keysSorted) as unknown as VgiDataType;
|
|
108
|
+
};
|
|
109
|
+
/** Dictionary-encoded type. `indices` must be an integer type.
|
|
110
|
+
*
|
|
111
|
+
* `id` is left undefined by default so arrow-js's internal `getId()`
|
|
112
|
+
* counter assigns a fresh unique id per Dictionary instance. Passing
|
|
113
|
+
* `-1` (or any concrete number) here would short-circuit that counter
|
|
114
|
+
* and produce id collisions when multiple Dictionary types are used. */
|
|
115
|
+
export const dictionary = (indices: VgiDataType, values: VgiDataType, id?: number, ordered = false): VgiDataType =>
|
|
116
|
+
new A_Dictionary(values as A_DataType, indices as any, id, ordered) as unknown as VgiDataType;
|
|
117
|
+
|
|
118
|
+
export function field(name: string, type: VgiDataType, nullable = true, metadata?: Map<string, string>): VgiField {
|
|
119
|
+
return new A_Field(name, type as A_DataType, nullable, metadata ?? new Map()) as unknown as VgiField;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export function schema(fields: readonly VgiField[], metadata?: Map<string, string>): VgiSchema {
|
|
123
|
+
return new A_Schema(fields as A_Field[], metadata ?? new Map()) as unknown as VgiSchema;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ----- IPC -----------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
export function serializeSchema(s: VgiSchema): Uint8Array {
|
|
129
|
+
const writer = new RecordBatchStreamWriter();
|
|
130
|
+
writer.reset(undefined, s as unknown as A_Schema);
|
|
131
|
+
writer.close();
|
|
132
|
+
return writer.toUint8Array(true);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export function deserializeSchema(bytes: Uint8Array): VgiSchema {
|
|
136
|
+
const reader = RecordBatchReader.from(bytes);
|
|
137
|
+
const batches = [...reader];
|
|
138
|
+
if (batches.length > 0) return batches[0].schema as unknown as VgiSchema;
|
|
139
|
+
if (reader.schema) return reader.schema as unknown as VgiSchema;
|
|
140
|
+
throw new Error("Cannot deserialize schema from empty IPC stream");
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function serializeBatch(batch: VgiBatch): Uint8Array {
|
|
144
|
+
const a = batch as unknown as A_RecordBatch;
|
|
145
|
+
const writer = new RecordBatchStreamWriter();
|
|
146
|
+
writer.reset(undefined, a.schema);
|
|
147
|
+
(writer as any)._writeRecordBatch(a);
|
|
148
|
+
writer.close();
|
|
149
|
+
return writer.toUint8Array(true);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Incremental IPC encoder over arrow-js's `RecordBatchStreamWriter`. Each
|
|
154
|
+
* call drains the writer's internal sink queue and returns the new bytes,
|
|
155
|
+
* so the caller can flush them synchronously between lockstep turns.
|
|
156
|
+
*
|
|
157
|
+
* `_writeRecordBatch` is called directly (rather than the public `write`)
|
|
158
|
+
* to bypass arrow-js's schema comparison, which would auto-close the
|
|
159
|
+
* writer and silently drop a batch whose schema differs only in
|
|
160
|
+
* nullability — our output schema is fixed at open time and all batches
|
|
161
|
+
* are structurally compatible.
|
|
162
|
+
*/
|
|
163
|
+
export function createIncrementalEncoder(s: VgiSchema): IncrementalEncoder {
|
|
164
|
+
const writer = new RecordBatchStreamWriter();
|
|
165
|
+
writer.reset(undefined, s as unknown as A_Schema);
|
|
166
|
+
const drain = (): Uint8Array => {
|
|
167
|
+
const values = (writer as any)._sink._values as Uint8Array[];
|
|
168
|
+
const total = values.reduce((n, c) => n + c.length, 0);
|
|
169
|
+
const out = new Uint8Array(total);
|
|
170
|
+
let off = 0;
|
|
171
|
+
for (const c of values) {
|
|
172
|
+
out.set(c, off);
|
|
173
|
+
off += c.length;
|
|
174
|
+
}
|
|
175
|
+
values.length = 0;
|
|
176
|
+
return out;
|
|
177
|
+
};
|
|
178
|
+
return {
|
|
179
|
+
start: () => drain(),
|
|
180
|
+
writeBatch(batch: VgiBatch): Uint8Array {
|
|
181
|
+
(writer as any)._writeRecordBatch(batch as unknown as A_RecordBatch);
|
|
182
|
+
return drain();
|
|
183
|
+
},
|
|
184
|
+
// EOS marker: continuation (0xFFFFFFFF) + metadata length (0x00000000).
|
|
185
|
+
finish: () => new Uint8Array(new Int32Array([-1, 0]).buffer),
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export function deserializeBatch(bytes: Uint8Array): VgiBatch {
|
|
190
|
+
const reader = RecordBatchReader.from(bytes);
|
|
191
|
+
const batches = [...reader];
|
|
192
|
+
if (batches.length === 0) {
|
|
193
|
+
const sch = reader.schema ?? new A_Schema([]);
|
|
194
|
+
// Build an empty batch matching the schema. arrow-js quirk: empty IPC
|
|
195
|
+
// streams may not give us a Schema; default to no fields.
|
|
196
|
+
const structType = new A_Struct(sch.fields);
|
|
197
|
+
const data = a_makeData({ type: structType, length: 0, children: [], nullCount: 0 });
|
|
198
|
+
return new A_RecordBatch(sch, data) as unknown as VgiBatch;
|
|
199
|
+
}
|
|
200
|
+
return batches[0] as unknown as VgiBatch;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// ----- Construction --------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
export function columnFromArray(values: any[], type: VgiDataType): VgiColumnData {
|
|
206
|
+
return a_vectorFromArray(values, type as A_DataType).data[0] as VgiColumnData;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/** Build a 1-row batch from {colName: value} dict (Int64 numbers auto-coerced). */
|
|
210
|
+
export function singleRowBatch(s: VgiSchema, values: Record<string, any>): VgiBatch {
|
|
211
|
+
const a = s as unknown as A_Schema;
|
|
212
|
+
const children = a.fields.map((f) => {
|
|
213
|
+
let val = values[f.name];
|
|
214
|
+
if (f.type.typeId === A_Type.Int && (f.type as any).bitWidth === 64) {
|
|
215
|
+
if (typeof val === "number") val = BigInt(val);
|
|
216
|
+
}
|
|
217
|
+
return a_vectorFromArray([val], f.type).data[0];
|
|
218
|
+
});
|
|
219
|
+
const structType = new A_Struct(a.fields);
|
|
220
|
+
const data = a_makeData({ type: structType, length: 1, children, nullCount: 0 });
|
|
221
|
+
return new A_RecordBatch(a, data) as unknown as VgiBatch;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/** Build an N-row batch from columnar arrays. */
|
|
225
|
+
export function batchFromColumns(s: VgiSchema, columns: Record<string, any[]>): VgiBatch {
|
|
226
|
+
const a = s as unknown as A_Schema;
|
|
227
|
+
const numRows = a.fields.length > 0 ? (columns[a.fields[0].name]?.length ?? 0) : 0;
|
|
228
|
+
const children = a.fields.map((f) => {
|
|
229
|
+
const vals = columns[f.name];
|
|
230
|
+
if (!vals) return a_makeData({ type: f.type, length: numRows, nullCount: numRows });
|
|
231
|
+
return a_vectorFromArray(vals, f.type).data[0];
|
|
232
|
+
});
|
|
233
|
+
const structType = new A_Struct(a.fields);
|
|
234
|
+
const data = a_makeData({ type: structType, length: numRows, children, nullCount: 0 });
|
|
235
|
+
return new A_RecordBatch(a, data) as unknown as VgiBatch;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/** Build a batch from pre-built column-data handles + schema. */
|
|
239
|
+
export function batchFromColumnData(
|
|
240
|
+
s: VgiSchema,
|
|
241
|
+
numRows: number,
|
|
242
|
+
columnData: VgiColumnData[],
|
|
243
|
+
metadata?: Map<string, string>,
|
|
244
|
+
): VgiBatch {
|
|
245
|
+
const a = s as unknown as A_Schema;
|
|
246
|
+
const structType = new A_Struct(a.fields);
|
|
247
|
+
const data = a_makeData({
|
|
248
|
+
type: structType,
|
|
249
|
+
length: numRows,
|
|
250
|
+
children: columnData as any[],
|
|
251
|
+
nullCount: 0,
|
|
252
|
+
});
|
|
253
|
+
// arrow-js Schema doesn't carry batch-level metadata; attach it when present
|
|
254
|
+
// by cloning with a fresh metadata map.
|
|
255
|
+
const finalSchema = metadata && metadata.size > 0 ? new A_Schema(a.fields, metadata) : a;
|
|
256
|
+
return new A_RecordBatch(finalSchema, data) as unknown as VgiBatch;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/** Empty-Data builder used when assembling batches with pre-built children. */
|
|
260
|
+
export function emptyColumnData(type: VgiDataType): VgiColumnData {
|
|
261
|
+
return makeEmptyDataRecursive(type as A_DataType) as VgiColumnData;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* 0-row batch with optional batch-level metadata (used for log/error/empty
|
|
266
|
+
* tombstone batches by the wire layer).
|
|
267
|
+
*/
|
|
268
|
+
export function emptyBatchWithMetadata(s: VgiSchema, metadata?: Map<string, string>): VgiBatch {
|
|
269
|
+
const a = s as unknown as A_Schema;
|
|
270
|
+
const children = a.fields.map((f) => makeEmptyDataRecursive(f.type));
|
|
271
|
+
const structType = new A_Struct(a.fields);
|
|
272
|
+
const data = a_makeData({ type: structType, length: 0, children, nullCount: 0 });
|
|
273
|
+
return new A_RecordBatch(a, data, metadata) as unknown as VgiBatch;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/** Recursive empty-Data builder — needed for nested types so arrow-js's
|
|
277
|
+
* IPC writer doesn't crash on List/Map/Struct/Union with absent children. */
|
|
278
|
+
function makeEmptyDataRecursive(type: A_DataType): any {
|
|
279
|
+
const M = { DataType: A_DataTypeNS, Data: A_Data };
|
|
280
|
+
if (M.DataType.isStruct(type)) {
|
|
281
|
+
const children = (type as any).children.map((f: any) => makeEmptyDataRecursive(f.type));
|
|
282
|
+
return a_makeData({ type, length: 0, children, nullCount: 0 } as any);
|
|
283
|
+
}
|
|
284
|
+
if (M.DataType.isList(type)) {
|
|
285
|
+
const childData = makeEmptyDataRecursive((type as any).children[0].type);
|
|
286
|
+
return a_makeData({ type, length: 0, child: childData, nullCount: 0, valueOffsets: new Int32Array([0]) } as any);
|
|
287
|
+
}
|
|
288
|
+
if (M.DataType.isFixedSizeList(type)) {
|
|
289
|
+
const childData = makeEmptyDataRecursive((type as any).children[0].type);
|
|
290
|
+
return a_makeData({ type, length: 0, child: childData, nullCount: 0 } as any);
|
|
291
|
+
}
|
|
292
|
+
if (M.DataType.isMap(type)) {
|
|
293
|
+
const entryType = (type as any).children[0]?.type;
|
|
294
|
+
const entryData = entryType
|
|
295
|
+
? makeEmptyDataRecursive(entryType)
|
|
296
|
+
: a_makeData({ type: new A_Struct([]), length: 0, children: [], nullCount: 0 });
|
|
297
|
+
return a_makeData({ type, length: 0, child: entryData, nullCount: 0, valueOffsets: new Int32Array([0]) } as any);
|
|
298
|
+
}
|
|
299
|
+
if (M.DataType.isUnion(type)) {
|
|
300
|
+
const children = (type as any).children.map((f: any) => makeEmptyDataRecursive(f.type));
|
|
301
|
+
if (M.DataType.isDenseUnion(type)) {
|
|
302
|
+
return a_makeData({
|
|
303
|
+
type,
|
|
304
|
+
length: 0,
|
|
305
|
+
typeIds: new Int8Array(0),
|
|
306
|
+
valueOffsets: new Int32Array(0),
|
|
307
|
+
children,
|
|
308
|
+
nullCount: 0,
|
|
309
|
+
} as any);
|
|
310
|
+
}
|
|
311
|
+
return a_makeData({
|
|
312
|
+
type,
|
|
313
|
+
length: 0,
|
|
314
|
+
typeIds: new Int8Array(0),
|
|
315
|
+
children,
|
|
316
|
+
nullCount: 0,
|
|
317
|
+
} as any);
|
|
318
|
+
}
|
|
319
|
+
return a_makeData({ type, length: 0, nullCount: 0 });
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/** 1-row result batch: vectorFromArray each value, support raw Data
|
|
323
|
+
* passthrough (for Map/opaque types whose .get(0) is unreliable). */
|
|
324
|
+
export function singleRowBatchWithMetadata(
|
|
325
|
+
s: VgiSchema,
|
|
326
|
+
values: Record<string, any>,
|
|
327
|
+
metadata?: Map<string, string>,
|
|
328
|
+
): VgiBatch {
|
|
329
|
+
const a = s as unknown as A_Schema;
|
|
330
|
+
const M = { DataType: A_DataTypeNS, Data: A_Data };
|
|
331
|
+
const children = a.fields.map((f) => {
|
|
332
|
+
const val = values[f.name];
|
|
333
|
+
if (val instanceof M.Data) return val;
|
|
334
|
+
return a_vectorFromArray([val], f.type).data[0];
|
|
335
|
+
});
|
|
336
|
+
const structType = new A_Struct(a.fields);
|
|
337
|
+
const data = a_makeData({ type: structType, length: 1, children, nullCount: 0 });
|
|
338
|
+
return new A_RecordBatch(a, data, metadata) as unknown as VgiBatch;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/** Tag a value as a raw Data passthrough. arrow-js: returns true if the
|
|
342
|
+
* value is an arrow-js Data instance. flechette: always false (the
|
|
343
|
+
* flechette backend doesn't surface this opaque-type quirk). */
|
|
344
|
+
export function isOpaqueData(val: unknown): boolean {
|
|
345
|
+
const M = { DataType: A_DataTypeNS, Data: A_Data };
|
|
346
|
+
return val instanceof M.Data;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/** Re-emit a batch with a different metadata map (same schema + data). */
|
|
350
|
+
export function withBatchMetadata(batch: VgiBatch, metadata: Map<string, string>): VgiBatch {
|
|
351
|
+
const a = batch as unknown as A_RecordBatch;
|
|
352
|
+
return new A_RecordBatch(a.schema, a.data, metadata) as unknown as VgiBatch;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Serialize a sequence of batches into a single multi-batch IPC stream.
|
|
357
|
+
* arrow-js's `RecordBatchStreamWriter` writes schema + N batches + EOS
|
|
358
|
+
* incrementally — exactly what consumers expect for a producer/exchange
|
|
359
|
+
* response that emits more than one batch.
|
|
360
|
+
*/
|
|
361
|
+
export function serializeBatches(schema: VgiSchema, batches: VgiBatch[]): Uint8Array {
|
|
362
|
+
const writer = new RecordBatchStreamWriter();
|
|
363
|
+
writer.reset(undefined, schema as unknown as A_Schema);
|
|
364
|
+
for (const batch of batches) {
|
|
365
|
+
(writer as any)._writeRecordBatch(batch as unknown as A_RecordBatch);
|
|
366
|
+
}
|
|
367
|
+
writer.close();
|
|
368
|
+
return writer.toUint8Array(true);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Rebuild a batch's data to match a target schema's field types.
|
|
373
|
+
*
|
|
374
|
+
* Lives on the backend because every line touches arrow-js internals
|
|
375
|
+
* (Data.children, Data.clone, vectorFromArray, makeData with nullBitmap
|
|
376
|
+
* passthrough). flechette's IPC reader produces specific types upfront, so
|
|
377
|
+
* its `conformBatchToSchema` is a no-op — keeping this code out of the
|
|
378
|
+
* shared util/ tree is what lets the worker-cf bundle drop arrow-js.
|
|
379
|
+
*/
|
|
380
|
+
const _needsValueCast: _NeedsCast = (src, dst) => {
|
|
381
|
+
if (src.typeId === dst.typeId) return false;
|
|
382
|
+
if (src.constructor === dst.constructor) return false;
|
|
383
|
+
return true;
|
|
384
|
+
};
|
|
385
|
+
|
|
386
|
+
const _isNumeric = (t: A_DataType): boolean => t.typeId === A_Type.Int || t.typeId === A_Type.Float;
|
|
387
|
+
|
|
388
|
+
export function conformBatchToSchema(batch: VgiBatch, schema: VgiSchema): VgiBatch {
|
|
389
|
+
const a = batch as unknown as A_RecordBatch;
|
|
390
|
+
if (a.numRows === 0) return batch;
|
|
391
|
+
const s = schema as unknown as A_Schema;
|
|
392
|
+
|
|
393
|
+
if (a.schema.fields.length !== s.fields.length) {
|
|
394
|
+
throw new TypeError(`Field count mismatch: expected ${s.fields.length}, got ${a.schema.fields.length}`);
|
|
395
|
+
}
|
|
396
|
+
for (let i = 0; i < s.fields.length; i++) {
|
|
397
|
+
if (a.schema.fields[i].name !== s.fields[i].name) {
|
|
398
|
+
throw new TypeError(
|
|
399
|
+
`Field name mismatch at index ${i}: expected '${s.fields[i].name}', got '${a.schema.fields[i].name}'`,
|
|
400
|
+
);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
const children = s.fields.map((f, i) => {
|
|
405
|
+
const srcChild = a.data.children[i];
|
|
406
|
+
const srcType = srcChild.type;
|
|
407
|
+
const dstType = f.type;
|
|
408
|
+
|
|
409
|
+
if (!_needsValueCast(srcType, dstType)) {
|
|
410
|
+
return srcChild.clone(dstType);
|
|
411
|
+
}
|
|
412
|
+
if (_isNumeric(srcType) && _isNumeric(dstType)) {
|
|
413
|
+
const col = a.getChildAt(i)!;
|
|
414
|
+
const values: number[] = [];
|
|
415
|
+
for (let r = 0; r < a.numRows; r++) {
|
|
416
|
+
const v = col.get(r);
|
|
417
|
+
values.push(typeof v === "bigint" ? Number(v) : (v as number));
|
|
418
|
+
}
|
|
419
|
+
return a_vectorFromArray(values, dstType).data[0];
|
|
420
|
+
}
|
|
421
|
+
return srcChild.clone(dstType);
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
const structType = new A_Struct(s.fields);
|
|
425
|
+
const data = a_makeData({
|
|
426
|
+
type: structType,
|
|
427
|
+
length: a.numRows,
|
|
428
|
+
children,
|
|
429
|
+
nullCount: a.data.nullCount,
|
|
430
|
+
nullBitmap: a.data.nullBitmap,
|
|
431
|
+
});
|
|
432
|
+
return new A_RecordBatch(s, data, a.metadata) as unknown as VgiBatch;
|
|
433
|
+
}
|