@lancedb/lancedb 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/biome.json +8 -2
  2. package/dist/arrow.d.ts +36 -9
  3. package/dist/arrow.js +222 -24
  4. package/dist/connection.d.ts +10 -1
  5. package/dist/connection.js +13 -7
  6. package/dist/embedding/embedding_function.d.ts +54 -28
  7. package/dist/embedding/embedding_function.js +89 -10
  8. package/dist/embedding/index.d.ts +28 -2
  9. package/dist/embedding/index.js +111 -4
  10. package/dist/embedding/openai.d.ts +16 -7
  11. package/dist/embedding/openai.js +62 -12
  12. package/dist/embedding/registry.d.ts +58 -0
  13. package/dist/embedding/registry.js +127 -0
  14. package/dist/native.d.ts +5 -4
  15. package/dist/query.d.ts +19 -7
  16. package/dist/query.js +27 -13
  17. package/dist/sanitize.d.ts +22 -1
  18. package/dist/sanitize.js +123 -110
  19. package/dist/table.d.ts +18 -3
  20. package/dist/table.js +33 -3
  21. package/lancedb/arrow.ts +243 -41
  22. package/lancedb/connection.ts +35 -6
  23. package/lancedb/embedding/embedding_function.ts +147 -42
  24. package/lancedb/embedding/index.ts +113 -2
  25. package/lancedb/embedding/openai.ts +62 -16
  26. package/lancedb/embedding/registry.ts +176 -0
  27. package/lancedb/query.ts +58 -14
  28. package/lancedb/sanitize.ts +22 -22
  29. package/lancedb/table.ts +67 -5
  30. package/nodejs-artifacts/arrow.d.ts +36 -9
  31. package/nodejs-artifacts/arrow.js +222 -24
  32. package/nodejs-artifacts/connection.d.ts +10 -1
  33. package/nodejs-artifacts/connection.js +13 -7
  34. package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
  35. package/nodejs-artifacts/embedding/embedding_function.js +89 -10
  36. package/nodejs-artifacts/embedding/index.d.ts +28 -2
  37. package/nodejs-artifacts/embedding/index.js +111 -4
  38. package/nodejs-artifacts/embedding/openai.d.ts +16 -7
  39. package/nodejs-artifacts/embedding/openai.js +62 -12
  40. package/nodejs-artifacts/embedding/registry.d.ts +58 -0
  41. package/nodejs-artifacts/embedding/registry.js +127 -0
  42. package/nodejs-artifacts/native.d.ts +5 -4
  43. package/nodejs-artifacts/query.d.ts +19 -7
  44. package/nodejs-artifacts/query.js +27 -13
  45. package/nodejs-artifacts/sanitize.d.ts +22 -1
  46. package/nodejs-artifacts/sanitize.js +123 -110
  47. package/nodejs-artifacts/table.d.ts +18 -3
  48. package/nodejs-artifacts/table.js +33 -3
  49. package/package.json +14 -9
  50. package/tsconfig.json +3 -1
package/lancedb/arrow.ts CHANGED
@@ -17,24 +17,122 @@ import {
17
17
  Binary,
18
18
  DataType,
19
19
  Field,
20
+ FixedSizeBinary,
20
21
  FixedSizeList,
21
- type Float,
22
+ Float,
22
23
  Float32,
24
+ Int,
25
+ LargeBinary,
23
26
  List,
27
+ Null,
24
28
  RecordBatch,
25
29
  RecordBatchFileWriter,
26
30
  RecordBatchStreamWriter,
27
31
  Schema,
28
32
  Struct,
29
33
  Utf8,
30
- type Vector,
34
+ Vector,
31
35
  makeBuilder,
32
36
  makeData,
33
37
  type makeTable,
34
38
  vectorFromArray,
35
39
  } from "apache-arrow";
36
40
  import { type EmbeddingFunction } from "./embedding/embedding_function";
37
- import { sanitizeSchema } from "./sanitize";
41
+ import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
42
+ import { sanitizeField, sanitizeSchema, sanitizeType } from "./sanitize";
43
+ export * from "apache-arrow";
44
+
45
+ export type IntoVector = Float32Array | Float64Array | number[];
46
+
47
+ export function isArrowTable(value: object): value is ArrowTable {
48
+ if (value instanceof ArrowTable) return true;
49
+ return "schema" in value && "batches" in value;
50
+ }
51
+
52
+ export function isDataType(value: unknown): value is DataType {
53
+ return (
54
+ value instanceof DataType ||
55
+ DataType.isNull(value) ||
56
+ DataType.isInt(value) ||
57
+ DataType.isFloat(value) ||
58
+ DataType.isBinary(value) ||
59
+ DataType.isLargeBinary(value) ||
60
+ DataType.isUtf8(value) ||
61
+ DataType.isLargeUtf8(value) ||
62
+ DataType.isBool(value) ||
63
+ DataType.isDecimal(value) ||
64
+ DataType.isDate(value) ||
65
+ DataType.isTime(value) ||
66
+ DataType.isTimestamp(value) ||
67
+ DataType.isInterval(value) ||
68
+ DataType.isDuration(value) ||
69
+ DataType.isList(value) ||
70
+ DataType.isStruct(value) ||
71
+ DataType.isUnion(value) ||
72
+ DataType.isFixedSizeBinary(value) ||
73
+ DataType.isFixedSizeList(value) ||
74
+ DataType.isMap(value) ||
75
+ DataType.isDictionary(value)
76
+ );
77
+ }
78
+ export function isNull(value: unknown): value is Null {
79
+ return value instanceof Null || DataType.isNull(value);
80
+ }
81
+ export function isInt(value: unknown): value is Int {
82
+ return value instanceof Int || DataType.isInt(value);
83
+ }
84
+ export function isFloat(value: unknown): value is Float {
85
+ return value instanceof Float || DataType.isFloat(value);
86
+ }
87
+ export function isBinary(value: unknown): value is Binary {
88
+ return value instanceof Binary || DataType.isBinary(value);
89
+ }
90
+ export function isLargeBinary(value: unknown): value is LargeBinary {
91
+ return value instanceof LargeBinary || DataType.isLargeBinary(value);
92
+ }
93
+ export function isUtf8(value: unknown): value is Utf8 {
94
+ return value instanceof Utf8 || DataType.isUtf8(value);
95
+ }
96
+ export function isLargeUtf8(value: unknown): value is Utf8 {
97
+ return value instanceof Utf8 || DataType.isLargeUtf8(value);
98
+ }
99
+ export function isBool(value: unknown): value is Utf8 {
100
+ return value instanceof Utf8 || DataType.isBool(value);
101
+ }
102
+ export function isDecimal(value: unknown): value is Utf8 {
103
+ return value instanceof Utf8 || DataType.isDecimal(value);
104
+ }
105
+ export function isDate(value: unknown): value is Utf8 {
106
+ return value instanceof Utf8 || DataType.isDate(value);
107
+ }
108
+ export function isTime(value: unknown): value is Utf8 {
109
+ return value instanceof Utf8 || DataType.isTime(value);
110
+ }
111
+ export function isTimestamp(value: unknown): value is Utf8 {
112
+ return value instanceof Utf8 || DataType.isTimestamp(value);
113
+ }
114
+ export function isInterval(value: unknown): value is Utf8 {
115
+ return value instanceof Utf8 || DataType.isInterval(value);
116
+ }
117
+ export function isDuration(value: unknown): value is Utf8 {
118
+ return value instanceof Utf8 || DataType.isDuration(value);
119
+ }
120
+ export function isList(value: unknown): value is List {
121
+ return value instanceof List || DataType.isList(value);
122
+ }
123
+ export function isStruct(value: unknown): value is Struct {
124
+ return value instanceof Struct || DataType.isStruct(value);
125
+ }
126
+ export function isUnion(value: unknown): value is Struct {
127
+ return value instanceof Struct || DataType.isUnion(value);
128
+ }
129
+ export function isFixedSizeBinary(value: unknown): value is FixedSizeBinary {
130
+ return value instanceof FixedSizeBinary || DataType.isFixedSizeBinary(value);
131
+ }
132
+
133
+ export function isFixedSizeList(value: unknown): value is FixedSizeList {
134
+ return value instanceof FixedSizeList || DataType.isFixedSizeList(value);
135
+ }
38
136
 
39
137
  /** Data type accepted by NodeJS SDK */
40
138
  export type Data = Record<string, unknown>[] | ArrowTable;
@@ -86,6 +184,7 @@ export class MakeArrowTableOptions {
86
184
  vector: new VectorColumnOptions(),
87
185
  };
88
186
  embeddings?: EmbeddingFunction<unknown>;
187
+ embeddingFunction?: EmbeddingFunctionConfig;
89
188
 
90
189
  /**
91
190
  * If true then string columns will be encoded with dictionary encoding
@@ -198,6 +297,7 @@ export class MakeArrowTableOptions {
198
297
  export function makeArrowTable(
199
298
  data: Array<Record<string, unknown>>,
200
299
  options?: Partial<MakeArrowTableOptions>,
300
+ metadata?: Map<string, string>,
201
301
  ): ArrowTable {
202
302
  if (
203
303
  data.length === 0 &&
@@ -209,7 +309,11 @@ export function makeArrowTable(
209
309
  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
210
310
  if (opt.schema !== undefined && opt.schema !== null) {
211
311
  opt.schema = sanitizeSchema(opt.schema);
212
- opt.schema = validateSchemaEmbeddings(opt.schema, data, opt.embeddings);
312
+ opt.schema = validateSchemaEmbeddings(
313
+ opt.schema,
314
+ data,
315
+ options?.embeddingFunction,
316
+ );
213
317
  }
214
318
  const columns: Record<string, Vector> = {};
215
319
  // TODO: sample dataset to find missing columns
@@ -290,20 +394,41 @@ export function makeArrowTable(
290
394
  // `new ArrowTable(schema, batches)` which does not do any schema inference
291
395
  const firstTable = new ArrowTable(columns);
292
396
  const batchesFixed = firstTable.batches.map(
293
- // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
294
397
  (batch) => new RecordBatch(opt.schema!, batch.data),
295
398
  );
296
- return new ArrowTable(opt.schema, batchesFixed);
297
- } else {
298
- return new ArrowTable(columns);
399
+ let schema: Schema;
400
+ if (metadata !== undefined) {
401
+ let schemaMetadata = opt.schema.metadata;
402
+ if (schemaMetadata.size === 0) {
403
+ schemaMetadata = metadata;
404
+ } else {
405
+ for (const [key, entry] of schemaMetadata.entries()) {
406
+ schemaMetadata.set(key, entry);
407
+ }
408
+ }
409
+
410
+ schema = new Schema(opt.schema.fields, schemaMetadata);
411
+ } else {
412
+ schema = opt.schema;
413
+ }
414
+ return new ArrowTable(schema, batchesFixed);
299
415
  }
416
+ const tbl = new ArrowTable(columns);
417
+ if (metadata !== undefined) {
418
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
419
+ (<any>tbl.schema).metadata = metadata;
420
+ }
421
+ return tbl;
300
422
  }
301
423
 
302
424
  /**
303
425
  * Create an empty Arrow table with the provided schema
304
426
  */
305
- export function makeEmptyTable(schema: Schema): ArrowTable {
306
- return makeArrowTable([], { schema });
427
+ export function makeEmptyTable(
428
+ schema: Schema,
429
+ metadata?: Map<string, string>,
430
+ ): ArrowTable {
431
+ return makeArrowTable([], { schema }, metadata);
307
432
  }
308
433
 
309
434
  /**
@@ -375,13 +500,74 @@ function makeVector(
375
500
  }
376
501
  }
377
502
 
503
+ /** Helper function to apply embeddings from metadata to an input table */
504
+ async function applyEmbeddingsFromMetadata(
505
+ table: ArrowTable,
506
+ schema: Schema,
507
+ ): Promise<ArrowTable> {
508
+ const registry = getRegistry();
509
+ const functions = registry.parseFunctions(schema.metadata);
510
+
511
+ const columns = Object.fromEntries(
512
+ table.schema.fields.map((field) => [
513
+ field.name,
514
+ table.getChild(field.name)!,
515
+ ]),
516
+ );
517
+
518
+ for (const functionEntry of functions.values()) {
519
+ const sourceColumn = columns[functionEntry.sourceColumn];
520
+ const destColumn = functionEntry.vectorColumn ?? "vector";
521
+ if (sourceColumn === undefined) {
522
+ throw new Error(
523
+ `Cannot apply embedding function because the source column '${functionEntry.sourceColumn}' was not present in the data`,
524
+ );
525
+ }
526
+ if (columns[destColumn] !== undefined) {
527
+ throw new Error(
528
+ `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
529
+ );
530
+ }
531
+ if (table.batches.length > 1) {
532
+ throw new Error(
533
+ "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
534
+ );
535
+ }
536
+ const values = sourceColumn.toArray();
537
+
538
+ const vectors =
539
+ await functionEntry.function.computeSourceEmbeddings(values);
540
+ if (vectors.length !== values.length) {
541
+ throw new Error(
542
+ "Embedding function did not return an embedding for each input element",
543
+ );
544
+ }
545
+ let destType: DataType;
546
+ const dtype = schema.fields.find((f) => f.name === destColumn)!.type;
547
+ if (isFixedSizeList(dtype)) {
548
+ destType = sanitizeType(dtype);
549
+ } else {
550
+ throw new Error(
551
+ "Expected FixedSizeList as datatype for vector field, instead got: " +
552
+ dtype,
553
+ );
554
+ }
555
+ const vector = makeVector(vectors, destType);
556
+ columns[destColumn] = vector;
557
+ }
558
+ const newTable = new ArrowTable(columns);
559
+ return alignTable(newTable, schema);
560
+ }
561
+
378
562
  /** Helper function to apply embeddings to an input table */
379
563
  async function applyEmbeddings<T>(
380
564
  table: ArrowTable,
381
- embeddings?: EmbeddingFunction<T>,
565
+ embeddings?: EmbeddingFunctionConfig,
382
566
  schema?: Schema,
383
567
  ): Promise<ArrowTable> {
384
- if (embeddings == null) {
568
+ if (schema?.metadata.has("embedding_functions")) {
569
+ return applyEmbeddingsFromMetadata(table, schema!);
570
+ } else if (embeddings == null || embeddings === undefined) {
385
571
  return table;
386
572
  }
387
573
 
@@ -399,8 +585,9 @@ async function applyEmbeddings<T>(
399
585
  const newColumns = Object.fromEntries(colEntries);
400
586
 
401
587
  const sourceColumn = newColumns[embeddings.sourceColumn];
402
- const destColumn = embeddings.destColumn ?? "vector";
403
- const innerDestType = embeddings.embeddingDataType ?? new Float32();
588
+ const destColumn = embeddings.vectorColumn ?? "vector";
589
+ const innerDestType =
590
+ embeddings.function.embeddingDataType() ?? new Float32();
404
591
  if (sourceColumn === undefined) {
405
592
  throw new Error(
406
593
  `Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`,
@@ -414,11 +601,9 @@ async function applyEmbeddings<T>(
414
601
  // if we call convertToTable with 0 records and a schema that includes the embedding
415
602
  return table;
416
603
  }
417
- if (embeddings.embeddingDimension !== undefined) {
418
- const destType = newVectorType(
419
- embeddings.embeddingDimension,
420
- innerDestType,
421
- );
604
+ const dimensions = embeddings.function.ndims();
605
+ if (dimensions !== undefined) {
606
+ const destType = newVectorType(dimensions, innerDestType);
422
607
  newColumns[destColumn] = makeVector([], destType);
423
608
  } else if (schema != null) {
424
609
  const destField = schema.fields.find((f) => f.name === destColumn);
@@ -446,7 +631,9 @@ async function applyEmbeddings<T>(
446
631
  );
447
632
  }
448
633
  const values = sourceColumn.toArray();
449
- const vectors = await embeddings.embed(values as T[]);
634
+ const vectors = await embeddings.function.computeSourceEmbeddings(
635
+ values as T[],
636
+ );
450
637
  if (vectors.length !== values.length) {
451
638
  throw new Error(
452
639
  "Embedding function did not return an embedding for each input element",
@@ -486,9 +673,9 @@ async function applyEmbeddings<T>(
486
673
  * embedding columns. If no schema is provded then embedding columns will
487
674
  * be placed at the end of the table, after all of the input columns.
488
675
  */
489
- export async function convertToTable<T>(
676
+ export async function convertToTable(
490
677
  data: Array<Record<string, unknown>>,
491
- embeddings?: EmbeddingFunction<T>,
678
+ embeddings?: EmbeddingFunctionConfig,
492
679
  makeTableOptions?: Partial<MakeArrowTableOptions>,
493
680
  ): Promise<ArrowTable> {
494
681
  const table = makeArrowTable(data, makeTableOptions);
@@ -496,13 +683,13 @@ export async function convertToTable<T>(
496
683
  }
497
684
 
498
685
  /** Creates the Arrow Type for a Vector column with dimension `dim` */
499
- function newVectorType<T extends Float>(
686
+ export function newVectorType<T extends Float>(
500
687
  dim: number,
501
688
  innerType: T,
502
689
  ): FixedSizeList<T> {
503
690
  // in Lance we always default to have the elements nullable, so we need to set it to true
504
691
  // otherwise we often get schema mismatches because the stored data always has schema with nullable elements
505
- const children = new Field<T>("item", innerType, true);
692
+ const children = new Field("item", <T>sanitizeType(innerType), true);
506
693
  return new FixedSizeList(dim, children);
507
694
  }
508
695
 
@@ -513,9 +700,9 @@ function newVectorType<T extends Float>(
513
700
  *
514
701
  * `schema` is required if data is empty
515
702
  */
516
- export async function fromRecordsToBuffer<T>(
703
+ export async function fromRecordsToBuffer(
517
704
  data: Array<Record<string, unknown>>,
518
- embeddings?: EmbeddingFunction<T>,
705
+ embeddings?: EmbeddingFunctionConfig,
519
706
  schema?: Schema,
520
707
  ): Promise<Buffer> {
521
708
  if (schema !== undefined && schema !== null) {
@@ -533,9 +720,9 @@ export async function fromRecordsToBuffer<T>(
533
720
  *
534
721
  * `schema` is required if data is empty
535
722
  */
536
- export async function fromRecordsToStreamBuffer<T>(
723
+ export async function fromRecordsToStreamBuffer(
537
724
  data: Array<Record<string, unknown>>,
538
- embeddings?: EmbeddingFunction<T>,
725
+ embeddings?: EmbeddingFunctionConfig,
539
726
  schema?: Schema,
540
727
  ): Promise<Buffer> {
541
728
  if (schema !== undefined && schema !== null) {
@@ -554,9 +741,9 @@ export async function fromRecordsToStreamBuffer<T>(
554
741
  *
555
742
  * `schema` is required if the table is empty
556
743
  */
557
- export async function fromTableToBuffer<T>(
744
+ export async function fromTableToBuffer(
558
745
  table: ArrowTable,
559
- embeddings?: EmbeddingFunction<T>,
746
+ embeddings?: EmbeddingFunctionConfig,
560
747
  schema?: Schema,
561
748
  ): Promise<Buffer> {
562
749
  if (schema !== undefined && schema !== null) {
@@ -575,19 +762,19 @@ export async function fromTableToBuffer<T>(
575
762
  *
576
763
  * `schema` is required if the table is empty
577
764
  */
578
- export async function fromDataToBuffer<T>(
765
+ export async function fromDataToBuffer(
579
766
  data: Data,
580
- embeddings?: EmbeddingFunction<T>,
767
+ embeddings?: EmbeddingFunctionConfig,
581
768
  schema?: Schema,
582
769
  ): Promise<Buffer> {
583
770
  if (schema !== undefined && schema !== null) {
584
771
  schema = sanitizeSchema(schema);
585
772
  }
586
- if (data instanceof ArrowTable) {
773
+ if (isArrowTable(data)) {
587
774
  return fromTableToBuffer(data, embeddings, schema);
588
775
  } else {
589
- const table = await convertToTable(data);
590
- return fromTableToBuffer(table, embeddings, schema);
776
+ const table = await convertToTable(data, embeddings, { schema });
777
+ return fromTableToBuffer(table);
591
778
  }
592
779
  }
593
780
 
@@ -599,9 +786,9 @@ export async function fromDataToBuffer<T>(
599
786
  *
600
787
  * `schema` is required if the table is empty
601
788
  */
602
- export async function fromTableToStreamBuffer<T>(
789
+ export async function fromTableToStreamBuffer(
603
790
  table: ArrowTable,
604
- embeddings?: EmbeddingFunction<T>,
791
+ embeddings?: EmbeddingFunctionConfig,
605
792
  schema?: Schema,
606
793
  ): Promise<Buffer> {
607
794
  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
@@ -654,7 +841,7 @@ export function createEmptyTable(schema: Schema): ArrowTable {
654
841
  function validateSchemaEmbeddings(
655
842
  schema: Schema,
656
843
  data: Array<Record<string, unknown>>,
657
- embeddings: EmbeddingFunction<unknown> | undefined,
844
+ embeddings: EmbeddingFunctionConfig | undefined,
658
845
  ) {
659
846
  const fields = [];
660
847
  const missingEmbeddingFields = [];
@@ -664,10 +851,25 @@ function validateSchemaEmbeddings(
664
851
  // if it does not, we add it to the list of missing embedding fields
665
852
  // Finally, we check if those missing embedding fields are `this._embeddings`
666
853
  // if they are not, we throw an error
667
- for (const field of schema.fields) {
668
- if (field.type instanceof FixedSizeList) {
854
+ for (let field of schema.fields) {
855
+ if (isFixedSizeList(field.type)) {
856
+ field = sanitizeField(field);
857
+
669
858
  if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
670
- missingEmbeddingFields.push(field);
859
+ if (schema.metadata.has("embedding_functions")) {
860
+ const embeddings = JSON.parse(
861
+ schema.metadata.get("embedding_functions")!,
862
+ );
863
+ if (
864
+ // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
865
+ embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
866
+ undefined
867
+ ) {
868
+ missingEmbeddingFields.push(field);
869
+ }
870
+ } else {
871
+ missingEmbeddingFields.push(field);
872
+ }
671
873
  } else {
672
874
  fields.push(field);
673
875
  }
@@ -12,8 +12,14 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
- import { Table as ArrowTable, Schema } from "apache-arrow";
16
- import { fromTableToBuffer, makeArrowTable, makeEmptyTable } from "./arrow";
15
+ import { Table as ArrowTable, Schema } from "./arrow";
16
+ import {
17
+ fromTableToBuffer,
18
+ isArrowTable,
19
+ makeArrowTable,
20
+ makeEmptyTable,
21
+ } from "./arrow";
22
+ import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
17
23
  import { ConnectionOptions, Connection as LanceDbConnection } from "./native";
18
24
  import { Table } from "./table";
19
25
 
@@ -65,6 +71,14 @@ export interface CreateTableOptions {
65
71
  * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
66
72
  */
67
73
  storageOptions?: Record<string, string>;
74
+ /**
75
+ * If true then data files will be written with the legacy format
76
+ *
77
+ * The default is true while the new format is in beta
78
+ */
79
+ useLegacyFormat?: boolean;
80
+ schema?: Schema;
81
+ embeddingFunction?: EmbeddingFunctionConfig;
68
82
  }
69
83
 
70
84
  export interface OpenTableOptions {
@@ -174,6 +188,7 @@ export class Connection {
174
188
  cleanseStorageOptions(options?.storageOptions),
175
189
  options?.indexCacheSize,
176
190
  );
191
+
177
192
  return new Table(innerTable);
178
193
  }
179
194
 
@@ -196,18 +211,25 @@ export class Connection {
196
211
  }
197
212
 
198
213
  let table: ArrowTable;
199
- if (data instanceof ArrowTable) {
214
+ if (isArrowTable(data)) {
200
215
  table = data;
201
216
  } else {
202
- table = makeArrowTable(data);
217
+ table = makeArrowTable(data, options);
203
218
  }
204
- const buf = await fromTableToBuffer(table);
219
+
220
+ const buf = await fromTableToBuffer(
221
+ table,
222
+ options?.embeddingFunction,
223
+ options?.schema,
224
+ );
205
225
  const innerTable = await this.inner.createTable(
206
226
  name,
207
227
  buf,
208
228
  mode,
209
229
  cleanseStorageOptions(options?.storageOptions),
230
+ options?.useLegacyFormat,
210
231
  );
232
+
211
233
  return new Table(innerTable);
212
234
  }
213
235
 
@@ -227,14 +249,21 @@ export class Connection {
227
249
  if (mode === "create" && existOk) {
228
250
  mode = "exist_ok";
229
251
  }
252
+ let metadata: Map<string, string> | undefined = undefined;
253
+ if (options?.embeddingFunction !== undefined) {
254
+ const embeddingFunction = options.embeddingFunction;
255
+ const registry = getRegistry();
256
+ metadata = registry.getTableMetadata([embeddingFunction]);
257
+ }
230
258
 
231
- const table = makeEmptyTable(schema);
259
+ const table = makeEmptyTable(schema, metadata);
232
260
  const buf = await fromTableToBuffer(table);
233
261
  const innerTable = await this.inner.createEmptyTable(
234
262
  name,
235
263
  buf,
236
264
  mode,
237
265
  cleanseStorageOptions(options?.storageOptions),
266
+ options?.useLegacyFormat,
238
267
  );
239
268
  return new Table(innerTable);
240
269
  }