@query-farm/vgi-rpc 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ import type { VgiBatch, VgiSchema } from "../arrow/index.js";
2
+ /**
3
+ * Rebuild a batch's data to match the given schema's field types.
4
+ *
5
+ * Batches deserialized from IPC streams (e.g., from PyArrow) may use generic
6
+ * types (Float) instead of specific ones (Float64). Arrow-JS's
7
+ * RecordBatchStreamWriter silently drops batches whose child Data types don't
8
+ * match the writer's schema. Cloning each child Data with the schema's field
9
+ * type fixes the type metadata while preserving the underlying buffers.
10
+ *
11
+ * This is also used to cast compatible input types (e.g., int32→float64,
12
+ * float32→float64) when the input batch schema doesn't exactly match the
13
+ * method's declared input schema. When the underlying buffer layout differs
14
+ * (e.g., 4-byte int32 vs 8-byte float64), we read the values and build a
15
+ * new vector with the target type.
16
+ */
17
+ export declare function conformBatchToSchema(batch: VgiBatch, schema: VgiSchema): VgiBatch;
18
+ //# sourceMappingURL=conform.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"conform.d.ts","sourceRoot":"","sources":["../../src/util/conform.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAkB7D;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,GAAG,QAAQ,CAuDjF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@query-farm/vgi-rpc",
3
- "version": "0.7.2",
3
+ "version": "0.7.4",
4
4
  "license": "Apache-2.0",
5
5
  "homepage": "https://vgi-rpc-typescript.query.farm",
6
6
  "repository": {
@@ -50,7 +50,7 @@
50
50
  "dependencies": {
51
51
  "@noble/ciphers": "^2.2.0",
52
52
  "@query-farm/apache-arrow": "^21.1.1",
53
- "@uwdata/flechette": "github:Query-farm/flechette#fix/timestamp-bigint-encode",
53
+ "@query-farm/flechette": "^2.4.0",
54
54
  "fzstd": "^0.1.1",
55
55
  "oauth4webapi": "^3.8.6"
56
56
  },
@@ -90,7 +90,7 @@
90
90
  "build": "bun run build:types && bun run build:js",
91
91
  "build:types": "bunx tsc -p tsconfig.build.json",
92
92
  "build:js": "bun build ./src/index.ts --outdir dist --target node --format esm --sourcemap=external --external @query-farm/apache-arrow",
93
- "postinstall": "cd node_modules/@query-farm/apache-arrow && node -e \"const fs=require('fs');const p=JSON.parse(fs.readFileSync('package.json','utf8'));p.main='src/Arrow.node.ts';fs.writeFileSync('package.json',JSON.stringify(p,null,2)+'\\n')\" && cd ../../.. && bun scripts/patch-flechette.mjs",
93
+ "postinstall": "cd node_modules/@query-farm/apache-arrow && node -e \"const fs=require('fs');const p=JSON.parse(fs.readFileSync('package.json','utf8'));p.main='src/Arrow.node.ts';fs.writeFileSync('package.json',JSON.stringify(p,null,2)+'\\n')\"",
94
94
  "test": "bun test",
95
95
  "lint": "biome check .",
96
96
  "lint:fix": "biome check --write .",
@@ -36,8 +36,7 @@ import {
36
36
  tableFromIPC,
37
37
  tablesToIPC,
38
38
  tableToIPC,
39
- } from "@uwdata/flechette";
40
-
39
+ } from "@query-farm/flechette";
41
40
  import type {
42
41
  IncrementalEncoder,
43
42
  VgiBackendInfo,
@@ -48,6 +47,7 @@ import type {
48
47
  VgiSchema,
49
48
  } from "../types.js";
50
49
  import { readFirstRecordBatchMeta } from "./message-meta.js";
50
+ import { toFlechetteType } from "./normalize-type.js";
51
51
 
52
52
  export const backend: VgiBackendInfo = { name: "flechette", opaquePassthrough: false };
53
53
 
@@ -129,7 +129,7 @@ export function serializeSchema(s: VgiSchema): Uint8Array {
129
129
  // empty schemas, which then leaked into state-token-embedded schema
130
130
  // bytes and made `exchange_zero_columns` etc. see a 1-column schema
131
131
  // on the next round-trip.
132
- const cols = s.fields.map((f) => f_columnFromArray([], f.type as any));
132
+ const cols = s.fields.map((f) => f_columnFromArray([], toFlechetteType(f.type) as any));
133
133
  const table = buildTablePreservingNullable(s, cols) as any;
134
134
  return tableToIPC(table, { format: "stream" }) as Uint8Array;
135
135
  }
@@ -169,7 +169,7 @@ export function deserializeBatch(bytes: Uint8Array): VgiBatch {
169
169
  // ----- Construction --------------------------------------------------------
170
170
 
171
171
  export function columnFromArray(values: any[], type: VgiDataType): VgiColumnData {
172
- return f_columnFromArray(values, type as any, EXTRACT_OPTS) as VgiColumnData;
172
+ return f_columnFromArray(values, toFlechetteType(type) as any, EXTRACT_OPTS) as VgiColumnData;
173
173
  }
174
174
 
175
175
  // flechette's `tableFromColumns` discards per-field `nullable`/`metadata` —
@@ -213,7 +213,7 @@ export function singleRowBatch(s: VgiSchema, values: Record<string, any>): VgiBa
213
213
  if (f.type.typeId === 2 /* Int */ && (f.type as any).bitWidth === 64 && typeof val === "number") {
214
214
  val = BigInt(val);
215
215
  }
216
- cols.push(f_columnFromArray(coerceValuesForType([val], f.type), f.type as any, EXTRACT_OPTS));
216
+ cols.push(f_columnFromArray(coerceValuesForType([val], f.type), toFlechetteType(f.type) as any, EXTRACT_OPTS));
217
217
  }
218
218
  return buildTablePreservingNullable(s, cols);
219
219
  }
@@ -223,7 +223,7 @@ export function batchFromColumns(s: VgiSchema, columns: Record<string, any[]>):
223
223
  const cols: Column<any>[] = [];
224
224
  for (const f of s.fields) {
225
225
  const vals = columns[f.name] ?? new Array(numRows).fill(null);
226
- cols.push(f_columnFromArray(coerceValuesForType(vals, f.type), f.type as any, EXTRACT_OPTS));
226
+ cols.push(f_columnFromArray(coerceValuesForType(vals, f.type), toFlechetteType(f.type) as any, EXTRACT_OPTS));
227
227
  }
228
228
  return buildTablePreservingNullable(s, cols);
229
229
  }
@@ -240,7 +240,7 @@ export function batchFromColumnData(
240
240
  }
241
241
 
242
242
  export function emptyColumnData(type: VgiDataType): VgiColumnData {
243
- return f_columnFromArray([], type as any, EXTRACT_OPTS) as VgiColumnData;
243
+ return f_columnFromArray([], toFlechetteType(type) as any, EXTRACT_OPTS) as VgiColumnData;
244
244
  }
245
245
 
246
246
  /**
@@ -255,7 +255,7 @@ export function emptyColumnData(type: VgiDataType): VgiColumnData {
255
255
  export function emptyBatchWithMetadata(s: VgiSchema, metadata?: Map<string, string>): VgiBatch {
256
256
  const cols: Record<string, Column<any>> = {};
257
257
  for (const f of s.fields) {
258
- cols[f.name] = f_columnFromArray([], f.type as any, EXTRACT_OPTS);
258
+ cols[f.name] = f_columnFromArray([], toFlechetteType(f.type) as any, EXTRACT_OPTS);
259
259
  }
260
260
  const t = tableFromColumns(cols) as any;
261
261
  attachBatchMetadata(t, metadata);
@@ -274,7 +274,7 @@ export function singleRowBatchWithMetadata(
274
274
  if (f.type.typeId === 2 /* Int */ && (f.type as any).bitWidth === 64 && typeof val === "number") {
275
275
  val = BigInt(val);
276
276
  }
277
- cols[f.name] = f_columnFromArray([val], f.type as any, EXTRACT_OPTS);
277
+ cols[f.name] = f_columnFromArray([val], toFlechetteType(f.type) as any, EXTRACT_OPTS);
278
278
  }
279
279
  const t = tableFromColumns(cols) as any;
280
280
  attachBatchMetadata(t, metadata);
@@ -386,7 +386,7 @@ export function conformBatchToSchema(batch: VgiBatch, schema: VgiSchema): VgiBat
386
386
  if (srcType.typeId === dstType.typeId) return srcCol;
387
387
  if (isNumericTypeId(srcType.typeId) && isNumericTypeId(dstType.typeId)) {
388
388
  mutated = true;
389
- return f_columnFromArray(castNumericValues(srcCol, dstType), dstType as any, EXTRACT_OPTS);
389
+ return f_columnFromArray(castNumericValues(srcCol, dstType), toFlechetteType(dstType) as any, EXTRACT_OPTS);
390
390
  }
391
391
  return srcCol;
392
392
  });
@@ -0,0 +1,116 @@
1
+ // Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+ //
3
+ // Normalize a (possibly foreign) Arrow DataType into a flechette-native type.
4
+ //
5
+ // App/worker code routinely builds schemas with concrete arrow-js DataType
6
+ // instances (e.g. `new Int64()` from @query-farm/apache-arrow) rather than the
7
+ // facade constructors. flechette's `columnFromArray` *tolerates* such foreign
8
+ // type objects when reading values, but the resulting `Column.type` is the
9
+ // foreign object — and flechette's IPC writer then serializes it incorrectly
10
+ // (e.g. Utf8 round-trips to ["", "xy\0\0…"], Int64 throws BigInt→number on a
11
+ // stale build). Reconstructing the type from its structural fields via
12
+ // flechette's own constructors yields a writer-safe native type.
13
+ //
14
+ // Both arrow-js and flechette encode `typeId` with the same Arrow Type enum, so
15
+ // we switch on it and read the shared structural props (with the one rename
16
+ // flechette differs on: Int uses `signed`, arrow-js uses `isSigned`).
17
+
18
+ import {
19
+ binary,
20
+ bool,
21
+ date,
22
+ decimal,
23
+ dictionary,
24
+ duration,
25
+ field as f_field,
26
+ fixedSizeBinary,
27
+ fixedSizeList,
28
+ float,
29
+ int,
30
+ interval,
31
+ largeList,
32
+ list,
33
+ map,
34
+ nullType,
35
+ struct,
36
+ Type,
37
+ time,
38
+ timestamp,
39
+ union,
40
+ utf8,
41
+ } from "@query-farm/flechette";
42
+
43
+ function fField(f: any): any {
44
+ return f_field(f.name, toFlechetteType(f.type), f.nullable ?? true, f.metadata ?? null);
45
+ }
46
+
47
+ /**
48
+ * Rebuild `type` as a flechette-native DataType. Idempotent for types that are
49
+ * already native (reconstructed from the same structural props). Unknown
50
+ * typeIds pass through unchanged as a safety net.
51
+ */
52
+ export function toFlechetteType(type: any): any {
53
+ if (type == null) return nullType();
54
+ switch (type.typeId) {
55
+ case Type.Null:
56
+ return nullType();
57
+ case Type.Bool:
58
+ return bool();
59
+ case Type.Int:
60
+ return int(type.bitWidth, type.isSigned ?? type.signed ?? true);
61
+ case Type.Float:
62
+ return float(type.precision);
63
+ case Type.Binary:
64
+ case Type.LargeBinary:
65
+ return binary();
66
+ case Type.Utf8:
67
+ case Type.LargeUtf8:
68
+ return utf8();
69
+ case Type.FixedSizeBinary:
70
+ return fixedSizeBinary(type.byteWidth ?? type.stride);
71
+ case Type.Decimal:
72
+ return decimal(type.precision, type.scale, type.bitWidth ?? 128);
73
+ case Type.Date:
74
+ return date(type.unit);
75
+ case Type.Time:
76
+ return time(type.unit);
77
+ case Type.Timestamp:
78
+ return timestamp(type.unit, type.timezone ?? null);
79
+ case Type.Duration:
80
+ return duration(type.unit);
81
+ case Type.Interval:
82
+ return interval(type.unit);
83
+ case Type.List:
84
+ case Type.LargeList:
85
+ return (type.typeId === Type.LargeList ? largeList : list)(fField(type.children[0]));
86
+ case Type.FixedSizeList:
87
+ return fixedSizeList(fField(type.children[0]), type.listSize);
88
+ case Type.Struct:
89
+ return struct(type.children.map(fField));
90
+ case Type.Map: {
91
+ // children[0] is the "entries" struct field holding [key, value].
92
+ const entries = type.children[0];
93
+ const [k, v] = entries.type.children;
94
+ return map(fField(k), fField(v), type.keysSorted ?? false);
95
+ }
96
+ case Type.Dictionary:
97
+ return dictionary(
98
+ toFlechetteType(type.dictionary),
99
+ toFlechetteType(type.indices),
100
+ type.ordered ?? type.isOrdered ?? false,
101
+ type.id,
102
+ );
103
+ case Type.Union:
104
+ return union(type.mode, type.children.map(fField), type.typeIds);
105
+ default:
106
+ return type;
107
+ }
108
+ }
109
+
110
+ /**
111
+ * Normalize every field's type in a schema-like object to flechette-native,
112
+ * preserving field name / nullable / metadata.
113
+ */
114
+ export function normalizeSchemaFields(fields: readonly any[]): any[] {
115
+ return fields.map(fField);
116
+ }
@@ -156,10 +156,12 @@ export async function buildDescribeBatch(
156
156
  const headerIpc = method.headerSchema ? serializeSchema(method.headerSchema) : null;
157
157
  headerSchemas.push(headerIpc);
158
158
 
159
- let isExchange: boolean | null;
160
- if (method.exchangeFn) isExchange = true;
161
- else if (method.producerFn) isExchange = false;
162
- else isExchange = null;
159
+ // v4 slim schema: `is_exchange` is vestigial and always null — producer
160
+ // vs exchange is already derivable from `method_type` + the presence of an
161
+ // input/params schema. The cross-language describe spec asserts null for
162
+ // every stream method (and the Python reference emits null for all), so we
163
+ // never populate it. See vgi-rpc conformance describe_stream_properties.
164
+ const isExchange: boolean | null = null;
163
165
  isExchanges.push(isExchange);
164
166
 
165
167
  hashRows.push({