@query-farm/vgi-rpc 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arrow/impl-flechette/index.d.ts.map +1 -1
- package/dist/arrow/impl-flechette/normalize-type.d.ts +12 -0
- package/dist/arrow/impl-flechette/normalize-type.d.ts.map +1 -0
- package/dist/dispatch/describe.d.ts.map +1 -1
- package/dist/index.js +2 -8
- package/dist/index.js.map +3 -3
- package/dist/util/conform.d.ts +18 -0
- package/dist/util/conform.d.ts.map +1 -0
- package/package.json +3 -3
- package/src/arrow/impl-flechette/index.ts +10 -10
- package/src/arrow/impl-flechette/normalize-type.ts +116 -0
- package/src/dispatch/describe.ts +6 -4
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { VgiBatch, VgiSchema } from "../arrow/index.js";
|
|
2
|
+
/**
|
|
3
|
+
* Rebuild a batch's data to match the given schema's field types.
|
|
4
|
+
*
|
|
5
|
+
* Batches deserialized from IPC streams (e.g., from PyArrow) may use generic
|
|
6
|
+
* types (Float) instead of specific ones (Float64). Arrow-JS's
|
|
7
|
+
* RecordBatchStreamWriter silently drops batches whose child Data types don't
|
|
8
|
+
* match the writer's schema. Cloning each child Data with the schema's field
|
|
9
|
+
* type fixes the type metadata while preserving the underlying buffers.
|
|
10
|
+
*
|
|
11
|
+
* This is also used to cast compatible input types (e.g., int32→float64,
|
|
12
|
+
* float32→float64) when the input batch schema doesn't exactly match the
|
|
13
|
+
* method's declared input schema. When the underlying buffer layout differs
|
|
14
|
+
* (e.g., 4-byte int32 vs 8-byte float64), we read the values and build a
|
|
15
|
+
* new vector with the target type.
|
|
16
|
+
*/
|
|
17
|
+
export declare function conformBatchToSchema(batch: VgiBatch, schema: VgiSchema): VgiBatch;
|
|
18
|
+
//# sourceMappingURL=conform.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conform.d.ts","sourceRoot":"","sources":["../../src/util/conform.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAkB7D;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,GAAG,QAAQ,CAuDjF"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@query-farm/vgi-rpc",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.4",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"homepage": "https://vgi-rpc-typescript.query.farm",
|
|
6
6
|
"repository": {
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"dependencies": {
|
|
51
51
|
"@noble/ciphers": "^2.2.0",
|
|
52
52
|
"@query-farm/apache-arrow": "^21.1.1",
|
|
53
|
-
"@
|
|
53
|
+
"@query-farm/flechette": "^2.4.0",
|
|
54
54
|
"fzstd": "^0.1.1",
|
|
55
55
|
"oauth4webapi": "^3.8.6"
|
|
56
56
|
},
|
|
@@ -90,7 +90,7 @@
|
|
|
90
90
|
"build": "bun run build:types && bun run build:js",
|
|
91
91
|
"build:types": "bunx tsc -p tsconfig.build.json",
|
|
92
92
|
"build:js": "bun build ./src/index.ts --outdir dist --target node --format esm --sourcemap=external --external @query-farm/apache-arrow",
|
|
93
|
-
"postinstall": "cd node_modules/@query-farm/apache-arrow && node -e \"const fs=require('fs');const p=JSON.parse(fs.readFileSync('package.json','utf8'));p.main='src/Arrow.node.ts';fs.writeFileSync('package.json',JSON.stringify(p,null,2)+'\\n')\"
|
|
93
|
+
"postinstall": "cd node_modules/@query-farm/apache-arrow && node -e \"const fs=require('fs');const p=JSON.parse(fs.readFileSync('package.json','utf8'));p.main='src/Arrow.node.ts';fs.writeFileSync('package.json',JSON.stringify(p,null,2)+'\\n')\"",
|
|
94
94
|
"test": "bun test",
|
|
95
95
|
"lint": "biome check .",
|
|
96
96
|
"lint:fix": "biome check --write .",
|
|
@@ -36,8 +36,7 @@ import {
|
|
|
36
36
|
tableFromIPC,
|
|
37
37
|
tablesToIPC,
|
|
38
38
|
tableToIPC,
|
|
39
|
-
} from "@
|
|
40
|
-
|
|
39
|
+
} from "@query-farm/flechette";
|
|
41
40
|
import type {
|
|
42
41
|
IncrementalEncoder,
|
|
43
42
|
VgiBackendInfo,
|
|
@@ -48,6 +47,7 @@ import type {
|
|
|
48
47
|
VgiSchema,
|
|
49
48
|
} from "../types.js";
|
|
50
49
|
import { readFirstRecordBatchMeta } from "./message-meta.js";
|
|
50
|
+
import { toFlechetteType } from "./normalize-type.js";
|
|
51
51
|
|
|
52
52
|
export const backend: VgiBackendInfo = { name: "flechette", opaquePassthrough: false };
|
|
53
53
|
|
|
@@ -129,7 +129,7 @@ export function serializeSchema(s: VgiSchema): Uint8Array {
|
|
|
129
129
|
// empty schemas, which then leaked into state-token-embedded schema
|
|
130
130
|
// bytes and made `exchange_zero_columns` etc. see a 1-column schema
|
|
131
131
|
// on the next round-trip.
|
|
132
|
-
const cols = s.fields.map((f) => f_columnFromArray([], f.type as any));
|
|
132
|
+
const cols = s.fields.map((f) => f_columnFromArray([], toFlechetteType(f.type) as any));
|
|
133
133
|
const table = buildTablePreservingNullable(s, cols) as any;
|
|
134
134
|
return tableToIPC(table, { format: "stream" }) as Uint8Array;
|
|
135
135
|
}
|
|
@@ -169,7 +169,7 @@ export function deserializeBatch(bytes: Uint8Array): VgiBatch {
|
|
|
169
169
|
// ----- Construction --------------------------------------------------------
|
|
170
170
|
|
|
171
171
|
export function columnFromArray(values: any[], type: VgiDataType): VgiColumnData {
|
|
172
|
-
return f_columnFromArray(values, type as any, EXTRACT_OPTS) as VgiColumnData;
|
|
172
|
+
return f_columnFromArray(values, toFlechetteType(type) as any, EXTRACT_OPTS) as VgiColumnData;
|
|
173
173
|
}
|
|
174
174
|
|
|
175
175
|
// flechette's `tableFromColumns` discards per-field `nullable`/`metadata` —
|
|
@@ -213,7 +213,7 @@ export function singleRowBatch(s: VgiSchema, values: Record<string, any>): VgiBa
|
|
|
213
213
|
if (f.type.typeId === 2 /* Int */ && (f.type as any).bitWidth === 64 && typeof val === "number") {
|
|
214
214
|
val = BigInt(val);
|
|
215
215
|
}
|
|
216
|
-
cols.push(f_columnFromArray(coerceValuesForType([val], f.type), f.type as any, EXTRACT_OPTS));
|
|
216
|
+
cols.push(f_columnFromArray(coerceValuesForType([val], f.type), toFlechetteType(f.type) as any, EXTRACT_OPTS));
|
|
217
217
|
}
|
|
218
218
|
return buildTablePreservingNullable(s, cols);
|
|
219
219
|
}
|
|
@@ -223,7 +223,7 @@ export function batchFromColumns(s: VgiSchema, columns: Record<string, any[]>):
|
|
|
223
223
|
const cols: Column<any>[] = [];
|
|
224
224
|
for (const f of s.fields) {
|
|
225
225
|
const vals = columns[f.name] ?? new Array(numRows).fill(null);
|
|
226
|
-
cols.push(f_columnFromArray(coerceValuesForType(vals, f.type), f.type as any, EXTRACT_OPTS));
|
|
226
|
+
cols.push(f_columnFromArray(coerceValuesForType(vals, f.type), toFlechetteType(f.type) as any, EXTRACT_OPTS));
|
|
227
227
|
}
|
|
228
228
|
return buildTablePreservingNullable(s, cols);
|
|
229
229
|
}
|
|
@@ -240,7 +240,7 @@ export function batchFromColumnData(
|
|
|
240
240
|
}
|
|
241
241
|
|
|
242
242
|
export function emptyColumnData(type: VgiDataType): VgiColumnData {
|
|
243
|
-
return f_columnFromArray([], type as any, EXTRACT_OPTS) as VgiColumnData;
|
|
243
|
+
return f_columnFromArray([], toFlechetteType(type) as any, EXTRACT_OPTS) as VgiColumnData;
|
|
244
244
|
}
|
|
245
245
|
|
|
246
246
|
/**
|
|
@@ -255,7 +255,7 @@ export function emptyColumnData(type: VgiDataType): VgiColumnData {
|
|
|
255
255
|
export function emptyBatchWithMetadata(s: VgiSchema, metadata?: Map<string, string>): VgiBatch {
|
|
256
256
|
const cols: Record<string, Column<any>> = {};
|
|
257
257
|
for (const f of s.fields) {
|
|
258
|
-
cols[f.name] = f_columnFromArray([], f.type as any, EXTRACT_OPTS);
|
|
258
|
+
cols[f.name] = f_columnFromArray([], toFlechetteType(f.type) as any, EXTRACT_OPTS);
|
|
259
259
|
}
|
|
260
260
|
const t = tableFromColumns(cols) as any;
|
|
261
261
|
attachBatchMetadata(t, metadata);
|
|
@@ -274,7 +274,7 @@ export function singleRowBatchWithMetadata(
|
|
|
274
274
|
if (f.type.typeId === 2 /* Int */ && (f.type as any).bitWidth === 64 && typeof val === "number") {
|
|
275
275
|
val = BigInt(val);
|
|
276
276
|
}
|
|
277
|
-
cols[f.name] = f_columnFromArray([val], f.type as any, EXTRACT_OPTS);
|
|
277
|
+
cols[f.name] = f_columnFromArray([val], toFlechetteType(f.type) as any, EXTRACT_OPTS);
|
|
278
278
|
}
|
|
279
279
|
const t = tableFromColumns(cols) as any;
|
|
280
280
|
attachBatchMetadata(t, metadata);
|
|
@@ -386,7 +386,7 @@ export function conformBatchToSchema(batch: VgiBatch, schema: VgiSchema): VgiBat
|
|
|
386
386
|
if (srcType.typeId === dstType.typeId) return srcCol;
|
|
387
387
|
if (isNumericTypeId(srcType.typeId) && isNumericTypeId(dstType.typeId)) {
|
|
388
388
|
mutated = true;
|
|
389
|
-
return f_columnFromArray(castNumericValues(srcCol, dstType), dstType as any, EXTRACT_OPTS);
|
|
389
|
+
return f_columnFromArray(castNumericValues(srcCol, dstType), toFlechetteType(dstType) as any, EXTRACT_OPTS);
|
|
390
390
|
}
|
|
391
391
|
return srcCol;
|
|
392
392
|
});
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
// Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
//
|
|
3
|
+
// Normalize a (possibly foreign) Arrow DataType into a flechette-native type.
|
|
4
|
+
//
|
|
5
|
+
// App/worker code routinely builds schemas with concrete arrow-js DataType
|
|
6
|
+
// instances (e.g. `new Int64()` from @query-farm/apache-arrow) rather than the
|
|
7
|
+
// facade constructors. flechette's `columnFromArray` *tolerates* such foreign
|
|
8
|
+
// type objects when reading values, but the resulting `Column.type` is the
|
|
9
|
+
// foreign object — and flechette's IPC writer then serializes it incorrectly
|
|
10
|
+
// (e.g. Utf8 round-trips to ["", "xy\0\0…"], Int64 throws BigInt→number on a
|
|
11
|
+
// stale build). Reconstructing the type from its structural fields via
|
|
12
|
+
// flechette's own constructors yields a writer-safe native type.
|
|
13
|
+
//
|
|
14
|
+
// Both arrow-js and flechette encode `typeId` with the same Arrow Type enum, so
|
|
15
|
+
// we switch on it and read the shared structural props (with the one rename
|
|
16
|
+
// flechette differs on: Int uses `signed`, arrow-js uses `isSigned`).
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
binary,
|
|
20
|
+
bool,
|
|
21
|
+
date,
|
|
22
|
+
decimal,
|
|
23
|
+
dictionary,
|
|
24
|
+
duration,
|
|
25
|
+
field as f_field,
|
|
26
|
+
fixedSizeBinary,
|
|
27
|
+
fixedSizeList,
|
|
28
|
+
float,
|
|
29
|
+
int,
|
|
30
|
+
interval,
|
|
31
|
+
largeList,
|
|
32
|
+
list,
|
|
33
|
+
map,
|
|
34
|
+
nullType,
|
|
35
|
+
struct,
|
|
36
|
+
Type,
|
|
37
|
+
time,
|
|
38
|
+
timestamp,
|
|
39
|
+
union,
|
|
40
|
+
utf8,
|
|
41
|
+
} from "@query-farm/flechette";
|
|
42
|
+
|
|
43
|
+
function fField(f: any): any {
|
|
44
|
+
return f_field(f.name, toFlechetteType(f.type), f.nullable ?? true, f.metadata ?? null);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Rebuild `type` as a flechette-native DataType. Idempotent for types that are
|
|
49
|
+
* already native (reconstructed from the same structural props). Unknown
|
|
50
|
+
* typeIds pass through unchanged as a safety net.
|
|
51
|
+
*/
|
|
52
|
+
export function toFlechetteType(type: any): any {
|
|
53
|
+
if (type == null) return nullType();
|
|
54
|
+
switch (type.typeId) {
|
|
55
|
+
case Type.Null:
|
|
56
|
+
return nullType();
|
|
57
|
+
case Type.Bool:
|
|
58
|
+
return bool();
|
|
59
|
+
case Type.Int:
|
|
60
|
+
return int(type.bitWidth, type.isSigned ?? type.signed ?? true);
|
|
61
|
+
case Type.Float:
|
|
62
|
+
return float(type.precision);
|
|
63
|
+
case Type.Binary:
|
|
64
|
+
case Type.LargeBinary:
|
|
65
|
+
return binary();
|
|
66
|
+
case Type.Utf8:
|
|
67
|
+
case Type.LargeUtf8:
|
|
68
|
+
return utf8();
|
|
69
|
+
case Type.FixedSizeBinary:
|
|
70
|
+
return fixedSizeBinary(type.byteWidth ?? type.stride);
|
|
71
|
+
case Type.Decimal:
|
|
72
|
+
return decimal(type.precision, type.scale, type.bitWidth ?? 128);
|
|
73
|
+
case Type.Date:
|
|
74
|
+
return date(type.unit);
|
|
75
|
+
case Type.Time:
|
|
76
|
+
return time(type.unit);
|
|
77
|
+
case Type.Timestamp:
|
|
78
|
+
return timestamp(type.unit, type.timezone ?? null);
|
|
79
|
+
case Type.Duration:
|
|
80
|
+
return duration(type.unit);
|
|
81
|
+
case Type.Interval:
|
|
82
|
+
return interval(type.unit);
|
|
83
|
+
case Type.List:
|
|
84
|
+
case Type.LargeList:
|
|
85
|
+
return (type.typeId === Type.LargeList ? largeList : list)(fField(type.children[0]));
|
|
86
|
+
case Type.FixedSizeList:
|
|
87
|
+
return fixedSizeList(fField(type.children[0]), type.listSize);
|
|
88
|
+
case Type.Struct:
|
|
89
|
+
return struct(type.children.map(fField));
|
|
90
|
+
case Type.Map: {
|
|
91
|
+
// children[0] is the "entries" struct field holding [key, value].
|
|
92
|
+
const entries = type.children[0];
|
|
93
|
+
const [k, v] = entries.type.children;
|
|
94
|
+
return map(fField(k), fField(v), type.keysSorted ?? false);
|
|
95
|
+
}
|
|
96
|
+
case Type.Dictionary:
|
|
97
|
+
return dictionary(
|
|
98
|
+
toFlechetteType(type.dictionary),
|
|
99
|
+
toFlechetteType(type.indices),
|
|
100
|
+
type.ordered ?? type.isOrdered ?? false,
|
|
101
|
+
type.id,
|
|
102
|
+
);
|
|
103
|
+
case Type.Union:
|
|
104
|
+
return union(type.mode, type.children.map(fField), type.typeIds);
|
|
105
|
+
default:
|
|
106
|
+
return type;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Normalize every field's type in a schema-like object to flechette-native,
|
|
112
|
+
* preserving field name / nullable / metadata.
|
|
113
|
+
*/
|
|
114
|
+
export function normalizeSchemaFields(fields: readonly any[]): any[] {
|
|
115
|
+
return fields.map(fField);
|
|
116
|
+
}
|
package/src/dispatch/describe.ts
CHANGED
|
@@ -156,10 +156,12 @@ export async function buildDescribeBatch(
|
|
|
156
156
|
const headerIpc = method.headerSchema ? serializeSchema(method.headerSchema) : null;
|
|
157
157
|
headerSchemas.push(headerIpc);
|
|
158
158
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
159
|
+
// v4 slim schema: `is_exchange` is vestigial and always null — producer
|
|
160
|
+
// vs exchange is already derivable from `method_type` + the presence of an
|
|
161
|
+
// input/params schema. The cross-language describe spec asserts null for
|
|
162
|
+
// every stream method (and the Python reference emits null for all), so we
|
|
163
|
+
// never populate it. See vgi-rpc conformance describe_stream_properties.
|
|
164
|
+
const isExchange: boolean | null = null;
|
|
163
165
|
isExchanges.push(isExchange);
|
|
164
166
|
|
|
165
167
|
hashRows.push({
|