@lancedb/lancedb 0.15.1-beta.2 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,7 +32,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
32
32
  console.log(results);
33
33
  ```
34
34
 
35
- The [quickstart](../basic.md) contains a more complete example.
35
+ The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
36
36
 
37
37
  ## Development
38
38
 
package/dist/arrow.d.ts CHANGED
@@ -91,8 +91,6 @@ export declare class MakeArrowTableOptions {
91
91
  * This function converts an array of Record<String, any> (row-major JS objects)
92
92
  * to an Arrow Table (a columnar structure)
93
93
  *
94
- * Note that it currently does not support nulls.
95
- *
96
94
  * If a schema is provided then it will be used to determine the resulting array
97
95
  * types. Fields will also be reordered to fit the order defined by the schema.
98
96
  *
@@ -100,6 +98,9 @@ export declare class MakeArrowTableOptions {
100
98
  * will be controlled by the order of properties in the first record. If a type
101
99
  * is inferred it will always be nullable.
102
100
  *
101
+ * If not all fields are found in the data, then a subset of the schema will be
102
+ * returned.
103
+ *
103
104
  * If the input is empty then a schema must be provided to create an empty table.
104
105
  *
105
106
  * When a schema is not specified then data types will be inferred. The inference
@@ -107,11 +108,13 @@ export declare class MakeArrowTableOptions {
107
108
  *
108
109
  * - boolean => Bool
109
110
  * - number => Float64
111
+ * - bigint => Int64
110
112
  * - String => Utf8
111
113
  * - Buffer => Binary
112
114
  * - Record<String, any> => Struct
113
115
  * - Array<any> => List
114
116
  * @example
117
+ * ```ts
115
118
  * import { fromTableToBuffer, makeArrowTable } from "../arrow";
116
119
  * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
117
120
  *
@@ -133,43 +136,41 @@ export declare class MakeArrowTableOptions {
133
136
  * names and data types.
134
137
  *
135
138
  * ```ts
136
- *
137
139
  * const schema = new Schema([
138
- new Field("a", new Float64()),
139
- new Field("b", new Float64()),
140
- new Field(
141
- "vector",
142
- new FixedSizeList(3, new Field("item", new Float32()))
143
- ),
144
- ]);
145
- const table = makeArrowTable([
146
- { a: 1, b: 2, vector: [1, 2, 3] },
147
- { a: 4, b: 5, vector: [4, 5, 6] },
148
- { a: 7, b: 8, vector: [7, 8, 9] },
149
- ]);
150
- assert.deepEqual(table.schema, schema);
140
+ * new Field("a", new Float64()),
141
+ * new Field("b", new Float64()),
142
+ * new Field(
143
+ * "vector",
144
+ * new FixedSizeList(3, new Field("item", new Float32()))
145
+ * ),
146
+ * ]);
147
+ * const table = makeArrowTable([
148
+ * { a: 1, b: 2, vector: [1, 2, 3] },
149
+ * { a: 4, b: 5, vector: [4, 5, 6] },
150
+ * { a: 7, b: 8, vector: [7, 8, 9] },
151
+ * ]);
152
+ * assert.deepEqual(table.schema, schema);
151
153
  * ```
152
154
  *
153
155
  * You can specify the vector column types and names using the options as well
154
156
  *
155
- * ```typescript
156
- *
157
+ * ```ts
157
158
  * const schema = new Schema([
158
- new Field('a', new Float64()),
159
- new Field('b', new Float64()),
160
- new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
161
- new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
162
- ]);
159
+ * new Field('a', new Float64()),
160
+ * new Field('b', new Float64()),
161
+ * new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
162
+ * new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
163
+ * ]);
163
164
  * const table = makeArrowTable([
164
- { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
165
- { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
166
- { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
167
- ], {
168
- vectorColumns: {
169
- vec1: { type: new Float16() },
170
- vec2: { type: new Float16() }
171
- }
172
- }
165
+ * { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
166
+ * { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
167
+ * { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
168
+ * ], {
169
+ * vectorColumns: {
170
+ * vec1: { type: new Float16() },
171
+ * vec2: { type: new Float16() }
172
+ * }
173
+ * }
173
174
  * assert.deepEqual(table.schema, schema)
174
175
  * ```
175
176
  */
package/dist/arrow.js CHANGED
@@ -186,8 +186,6 @@ exports.MakeArrowTableOptions = MakeArrowTableOptions;
186
186
  * This function converts an array of Record<String, any> (row-major JS objects)
187
187
  * to an Arrow Table (a columnar structure)
188
188
  *
189
- * Note that it currently does not support nulls.
190
- *
191
189
  * If a schema is provided then it will be used to determine the resulting array
192
190
  * types. Fields will also be reordered to fit the order defined by the schema.
193
191
  *
@@ -195,6 +193,9 @@ exports.MakeArrowTableOptions = MakeArrowTableOptions;
195
193
  * will be controlled by the order of properties in the first record. If a type
196
194
  * is inferred it will always be nullable.
197
195
  *
196
+ * If not all fields are found in the data, then a subset of the schema will be
197
+ * returned.
198
+ *
198
199
  * If the input is empty then a schema must be provided to create an empty table.
199
200
  *
200
201
  * When a schema is not specified then data types will be inferred. The inference
@@ -202,11 +203,13 @@ exports.MakeArrowTableOptions = MakeArrowTableOptions;
202
203
  *
203
204
  * - boolean => Bool
204
205
  * - number => Float64
206
+ * - bigint => Int64
205
207
  * - String => Utf8
206
208
  * - Buffer => Binary
207
209
  * - Record<String, any> => Struct
208
210
  * - Array<any> => List
209
211
  * @example
212
+ * ```ts
210
213
  * import { fromTableToBuffer, makeArrowTable } from "../arrow";
211
214
  * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
212
215
  *
@@ -228,149 +231,325 @@ exports.MakeArrowTableOptions = MakeArrowTableOptions;
228
231
  * names and data types.
229
232
  *
230
233
  * ```ts
231
- *
232
234
  * const schema = new Schema([
233
- new Field("a", new Float64()),
234
- new Field("b", new Float64()),
235
- new Field(
236
- "vector",
237
- new FixedSizeList(3, new Field("item", new Float32()))
238
- ),
239
- ]);
240
- const table = makeArrowTable([
241
- { a: 1, b: 2, vector: [1, 2, 3] },
242
- { a: 4, b: 5, vector: [4, 5, 6] },
243
- { a: 7, b: 8, vector: [7, 8, 9] },
244
- ]);
245
- assert.deepEqual(table.schema, schema);
235
+ * new Field("a", new Float64()),
236
+ * new Field("b", new Float64()),
237
+ * new Field(
238
+ * "vector",
239
+ * new FixedSizeList(3, new Field("item", new Float32()))
240
+ * ),
241
+ * ]);
242
+ * const table = makeArrowTable([
243
+ * { a: 1, b: 2, vector: [1, 2, 3] },
244
+ * { a: 4, b: 5, vector: [4, 5, 6] },
245
+ * { a: 7, b: 8, vector: [7, 8, 9] },
246
+ * ]);
247
+ * assert.deepEqual(table.schema, schema);
246
248
  * ```
247
249
  *
248
250
  * You can specify the vector column types and names using the options as well
249
251
  *
250
- * ```typescript
251
- *
252
+ * ```ts
252
253
  * const schema = new Schema([
253
- new Field('a', new Float64()),
254
- new Field('b', new Float64()),
255
- new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
256
- new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
257
- ]);
254
+ * new Field('a', new Float64()),
255
+ * new Field('b', new Float64()),
256
+ * new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
257
+ * new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
258
+ * ]);
258
259
  * const table = makeArrowTable([
259
- { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
260
- { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
261
- { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
262
- ], {
263
- vectorColumns: {
264
- vec1: { type: new Float16() },
265
- vec2: { type: new Float16() }
266
- }
267
- }
260
+ * { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
261
+ * { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
262
+ * { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
263
+ * ], {
264
+ * vectorColumns: {
265
+ * vec1: { type: new Float16() },
266
+ * vec2: { type: new Float16() }
267
+ * }
268
+ * }
268
269
  * assert.deepEqual(table.schema, schema)
269
270
  * ```
270
271
  */
271
272
  function makeArrowTable(data, options, metadata) {
273
+ const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
274
+ let schema = undefined;
275
+ if (opt.schema !== undefined && opt.schema !== null) {
276
+ schema = (0, sanitize_1.sanitizeSchema)(opt.schema);
277
+ schema = validateSchemaEmbeddings(schema, data, options?.embeddingFunction);
278
+ }
279
+ let schemaMetadata = schema?.metadata || new Map();
280
+ if (metadata !== undefined) {
281
+ schemaMetadata = new Map([...schemaMetadata, ...metadata]);
282
+ }
272
283
  if (data.length === 0 &&
273
284
  (options?.schema === undefined || options?.schema === null)) {
274
285
  throw new Error("At least one record or a schema needs to be provided");
275
286
  }
276
- const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
277
- if (opt.schema !== undefined && opt.schema !== null) {
278
- opt.schema = (0, sanitize_1.sanitizeSchema)(opt.schema);
279
- opt.schema = validateSchemaEmbeddings(opt.schema, data, options?.embeddingFunction);
280
- }
281
- const columns = {};
282
- // TODO: sample dataset to find missing columns
283
- // Prefer the field ordering of the schema, if present
284
- const columnNames = opt.schema != null ? opt.schema.names : Object.keys(data[0]);
285
- for (const colName of columnNames) {
286
- if (data.length !== 0 &&
287
- !Object.prototype.hasOwnProperty.call(data[0], colName)) {
288
- // The field is present in the schema, but not in the data, skip it
289
- continue;
290
- }
291
- // Extract a single column from the records (transpose from row-major to col-major)
292
- let values = data.map((datum) => datum[colName]);
293
- // By default (type === undefined) arrow will infer the type from the JS type
294
- let type;
295
- if (opt.schema !== undefined) {
296
- // If there is a schema provided, then use that for the type instead
297
- type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
298
- if (apache_arrow_1.DataType.isInt(type) && type.bitWidth === 64) {
299
- // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
300
- values = values.map((v) => {
301
- if (v === null) {
302
- return v;
287
+ else if (data.length === 0) {
288
+ if (schema === undefined) {
289
+ throw new Error("A schema must be provided if data is empty");
290
+ }
291
+ else {
292
+ schema = new apache_arrow_1.Schema(schema.fields, schemaMetadata);
293
+ return new apache_arrow_1.Table(schema);
294
+ }
295
+ }
296
+ let inferredSchema = inferSchema(data, schema, opt);
297
+ inferredSchema = new apache_arrow_1.Schema(inferredSchema.fields, schemaMetadata);
298
+ const finalColumns = {};
299
+ for (const field of inferredSchema.fields) {
300
+ finalColumns[field.name] = transposeData(data, field);
301
+ }
302
+ return new apache_arrow_1.Table(inferredSchema, finalColumns);
303
+ }
304
+ function inferSchema(data, schema, opts) {
305
+ // We will collect all fields we see in the data.
306
+ const pathTree = new PathTree();
307
+ for (const [rowI, row] of data.entries()) {
308
+ for (const [path, value] of rowPathsAndValues(row)) {
309
+ if (!pathTree.has(path)) {
310
+ // First time seeing this field.
311
+ if (schema !== undefined) {
312
+ const field = getFieldForPath(schema, path);
313
+ if (field === undefined) {
314
+ throw new Error(`Found field not in schema: ${path.join(".")} at row ${rowI}`);
303
315
  }
304
- if (typeof v === "bigint") {
305
- return v;
316
+ else {
317
+ pathTree.set(path, field.type);
306
318
  }
307
- if (typeof v === "number") {
308
- return BigInt(v);
319
+ }
320
+ else {
321
+ const inferredType = inferType(value, path, opts);
322
+ if (inferredType === undefined) {
323
+ throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
324
+ Consider providing an explicit schema.`);
309
325
  }
310
- throw new Error(`Expected BigInt or number for column ${colName}, got ${typeof v}`);
311
- });
326
+ pathTree.set(path, inferredType);
327
+ }
328
+ }
329
+ else if (schema === undefined) {
330
+ const currentType = pathTree.get(path);
331
+ const newType = inferType(value, path, opts);
332
+ if (currentType !== newType) {
333
+ new Error(`Failed to infer schema for data. Previously inferred type \
334
+ ${currentType} but found ${newType} at row ${rowI}. Consider \
335
+ providing an explicit schema.`);
336
+ }
312
337
  }
313
338
  }
314
- else {
315
- // Otherwise, check to see if this column is one of the vector columns
316
- // defined by opt.vectorColumns and, if so, use the fixed size list type
317
- const vectorColumnOptions = opt.vectorColumns[colName];
318
- if (vectorColumnOptions !== undefined) {
319
- const firstNonNullValue = values.find((v) => v !== null);
320
- if (Array.isArray(firstNonNullValue)) {
321
- type = newVectorType(firstNonNullValue.length, vectorColumnOptions.type);
339
+ }
340
+ if (schema === undefined) {
341
+ function fieldsFromPathTree(pathTree) {
342
+ const fields = [];
343
+ for (const [name, value] of pathTree.map.entries()) {
344
+ if (value instanceof PathTree) {
345
+ const children = fieldsFromPathTree(value);
346
+ fields.push(new apache_arrow_1.Field(name, new apache_arrow_1.Struct(children), true));
322
347
  }
323
348
  else {
324
- throw new Error(`Column ${colName} is expected to be a vector column but first non-null value is not an array. Could not determine size of vector column`);
349
+ fields.push(new apache_arrow_1.Field(name, value, true));
325
350
  }
326
351
  }
352
+ return fields;
327
353
  }
328
- try {
329
- // Convert an Array of JS values to an arrow vector
330
- columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
331
- }
332
- catch (error) {
333
- // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
334
- throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
335
- }
336
- }
337
- if (opt.schema != null) {
338
- // `new ArrowTable(columns)` infers a schema which may sometimes have
339
- // incorrect nullability (it assumes nullable=true always)
340
- //
341
- // `new ArrowTable(schema, columns)` will also fail because it will create a
342
- // batch with an inferred schema and then complain that the batch schema
343
- // does not match the provided schema.
344
- //
345
- // To work around this we first create a table with the wrong schema and
346
- // then patch the schema of the batches so we can use
347
- // `new ArrowTable(schema, batches)` which does not do any schema inference
348
- const firstTable = new apache_arrow_1.Table(columns);
349
- const batchesFixed = firstTable.batches.map((batch) => new apache_arrow_1.RecordBatch(opt.schema, batch.data));
350
- let schema;
351
- if (metadata !== undefined) {
352
- let schemaMetadata = opt.schema.metadata;
353
- if (schemaMetadata.size === 0) {
354
- schemaMetadata = metadata;
355
- }
356
- else {
357
- for (const [key, entry] of schemaMetadata.entries()) {
358
- schemaMetadata.set(key, entry);
354
+ const fields = fieldsFromPathTree(pathTree);
355
+ return new apache_arrow_1.Schema(fields);
356
+ }
357
+ else {
358
+ function takeMatchingFields(fields, pathTree) {
359
+ const outFields = [];
360
+ for (const field of fields) {
361
+ if (pathTree.map.has(field.name)) {
362
+ const value = pathTree.get([field.name]);
363
+ if (value instanceof PathTree) {
364
+ const struct = field.type;
365
+ const children = takeMatchingFields(struct.children, value);
366
+ outFields.push(new apache_arrow_1.Field(field.name, new apache_arrow_1.Struct(children), field.nullable));
367
+ }
368
+ else {
369
+ outFields.push(new apache_arrow_1.Field(field.name, value, field.nullable));
370
+ }
359
371
  }
360
372
  }
361
- schema = new apache_arrow_1.Schema(opt.schema.fields, schemaMetadata);
373
+ return outFields;
374
+ }
375
+ const fields = takeMatchingFields(schema.fields, pathTree);
376
+ return new apache_arrow_1.Schema(fields);
377
+ }
378
+ }
379
+ function* rowPathsAndValues(row, basePath = []) {
380
+ for (const [key, value] of Object.entries(row)) {
381
+ if (isObject(value)) {
382
+ yield* rowPathsAndValues(value, [...basePath, key]);
362
383
  }
363
384
  else {
364
- schema = opt.schema;
385
+ yield [[...basePath, key], value];
365
386
  }
366
- return new apache_arrow_1.Table(schema, batchesFixed);
367
387
  }
368
- const tbl = new apache_arrow_1.Table(columns);
369
- if (metadata !== undefined) {
370
- // biome-ignore lint/suspicious/noExplicitAny: <explanation>
371
- tbl.schema.metadata = metadata;
388
+ }
389
+ function isObject(value) {
390
+ return (typeof value === "object" &&
391
+ value !== null &&
392
+ !Array.isArray(value) &&
393
+ !(value instanceof RegExp) &&
394
+ !(value instanceof Date) &&
395
+ !(value instanceof Set) &&
396
+ !(value instanceof Map) &&
397
+ !(value instanceof Buffer));
398
+ }
399
+ function getFieldForPath(schema, path) {
400
+ let current = schema;
401
+ for (const key of path) {
402
+ if (current instanceof apache_arrow_1.Schema) {
403
+ const field = current.fields.find((f) => f.name === key);
404
+ if (field === undefined) {
405
+ return undefined;
406
+ }
407
+ current = field;
408
+ }
409
+ else if (current instanceof apache_arrow_1.Field && apache_arrow_1.DataType.isStruct(current.type)) {
410
+ const struct = current.type;
411
+ const field = struct.children.find((f) => f.name === key);
412
+ if (field === undefined) {
413
+ return undefined;
414
+ }
415
+ current = field;
416
+ }
417
+ else {
418
+ return undefined;
419
+ }
420
+ }
421
+ if (current instanceof apache_arrow_1.Field) {
422
+ return current;
423
+ }
424
+ else {
425
+ return undefined;
426
+ }
427
+ }
428
+ /**
429
+ * Try to infer which Arrow type to use for a given value.
430
+ *
431
+ * May return undefined if the type cannot be inferred.
432
+ */
433
+ function inferType(value, path, opts) {
434
+ if (typeof value === "bigint") {
435
+ return new apache_arrow_1.Int64();
436
+ }
437
+ else if (typeof value === "number") {
438
+ // Even if it's an integer, it's safer to assume Float64. Users can
439
+ // always provide an explicit schema or use BigInt if they mean integer.
440
+ return new apache_arrow_1.Float64();
441
+ }
442
+ else if (typeof value === "string") {
443
+ if (opts.dictionaryEncodeStrings) {
444
+ return new apache_arrow_1.Dictionary(new apache_arrow_1.Utf8(), new apache_arrow_1.Int32());
445
+ }
446
+ else {
447
+ return new apache_arrow_1.Utf8();
448
+ }
449
+ }
450
+ else if (typeof value === "boolean") {
451
+ return new apache_arrow_1.Bool();
452
+ }
453
+ else if (value instanceof Buffer) {
454
+ return new apache_arrow_1.Binary();
455
+ }
456
+ else if (Array.isArray(value)) {
457
+ if (value.length === 0) {
458
+ return undefined; // Without any values we can't infer the type
459
+ }
460
+ if (path.length === 1 && Object.hasOwn(opts.vectorColumns, path[0])) {
461
+ const floatType = (0, sanitize_1.sanitizeType)(opts.vectorColumns[path[0]].type);
462
+ return new apache_arrow_1.FixedSizeList(value.length, new apache_arrow_1.Field("item", floatType, true));
463
+ }
464
+ const valueType = inferType(value[0], path, opts);
465
+ if (valueType === undefined) {
466
+ return undefined;
467
+ }
468
+ // Try to automatically detect embedding columns.
469
+ if (valueType instanceof apache_arrow_1.Float && path[path.length - 1] === "vector") {
470
+ // We default to Float32 for vectors.
471
+ const child = new apache_arrow_1.Field("item", new apache_arrow_1.Float32(), true);
472
+ return new apache_arrow_1.FixedSizeList(value.length, child);
473
+ }
474
+ else {
475
+ const child = new apache_arrow_1.Field("item", valueType, true);
476
+ return new apache_arrow_1.List(child);
477
+ }
478
+ }
479
+ else {
480
+ // TODO: timestamp
481
+ return undefined;
482
+ }
483
+ }
484
+ class PathTree {
485
+ map;
486
+ constructor(entries) {
487
+ this.map = new Map();
488
+ if (entries !== undefined) {
489
+ for (const [path, value] of entries) {
490
+ this.set(path, value);
491
+ }
492
+ }
493
+ }
494
+ has(path) {
495
+ let ref = this;
496
+ for (const part of path) {
497
+ if (!(ref instanceof PathTree) || !ref.map.has(part)) {
498
+ return false;
499
+ }
500
+ ref = ref.map.get(part);
501
+ }
502
+ return true;
503
+ }
504
+ get(path) {
505
+ let ref = this;
506
+ for (const part of path) {
507
+ if (!(ref instanceof PathTree) || !ref.map.has(part)) {
508
+ return undefined;
509
+ }
510
+ ref = ref.map.get(part);
511
+ }
512
+ return ref;
513
+ }
514
+ set(path, value) {
515
+ let ref = this;
516
+ for (const part of path.slice(0, path.length - 1)) {
517
+ if (!ref.map.has(part)) {
518
+ ref.map.set(part, new PathTree());
519
+ }
520
+ ref = ref.map.get(part);
521
+ }
522
+ ref.map.set(path[path.length - 1], value);
523
+ }
524
+ }
525
+ function transposeData(data, field, path = []) {
526
+ if (field.type instanceof apache_arrow_1.Struct) {
527
+ const childFields = field.type.children;
528
+ const childVectors = childFields.map((child) => {
529
+ return transposeData(data, child, [...path, child.name]);
530
+ });
531
+ const structData = (0, apache_arrow_1.makeData)({
532
+ type: field.type,
533
+ children: childVectors,
534
+ });
535
+ return (0, apache_arrow_1.makeVector)(structData);
536
+ }
537
+ else {
538
+ const valuesPath = [...path, field.name];
539
+ const values = data.map((datum) => {
540
+ let current = datum;
541
+ for (const key of valuesPath) {
542
+ if (isObject(current) && Object.hasOwn(current, key)) {
543
+ current = current[key];
544
+ }
545
+ else {
546
+ return null;
547
+ }
548
+ }
549
+ return current;
550
+ });
551
+ return makeVector(values, field.type);
372
552
  }
373
- return tbl;
374
553
  }
375
554
  /**
376
555
  * Create an empty Arrow table with the provided schema
@@ -409,6 +588,36 @@ function makeListVector(lists) {
409
588
  function makeVector(values, type, stringAsDictionary) {
410
589
  if (type !== undefined) {
411
590
  // No need for inference, let Arrow create it
591
+ if (type instanceof apache_arrow_1.Int) {
592
+ if (apache_arrow_1.DataType.isInt(type) && type.bitWidth === 64) {
593
+ // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
594
+ values = values.map((v) => {
595
+ if (v === null) {
596
+ return v;
597
+ }
598
+ else if (typeof v === "bigint") {
599
+ return v;
600
+ }
601
+ else if (typeof v === "number") {
602
+ return BigInt(v);
603
+ }
604
+ else {
605
+ return v;
606
+ }
607
+ });
608
+ }
609
+ else {
610
+ // Similarly, bigint isn't supported for 16 or 32-bit ints.
611
+ values = values.map((v) => {
612
+ if (typeof v == "bigint") {
613
+ return Number(v);
614
+ }
615
+ else {
616
+ return v;
617
+ }
618
+ });
619
+ }
620
+ }
412
621
  return (0, apache_arrow_1.vectorFromArray)(values, type);
413
622
  }
414
623
  if (values.length === 0) {