@lancedb/lancedb 0.5.2 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/Cargo.toml +3 -3
  2. package/biome.json +19 -3
  3. package/dist/arrow.d.ts +41 -8
  4. package/dist/arrow.js +4 -4
  5. package/dist/connection.d.ts +49 -29
  6. package/dist/connection.js +21 -73
  7. package/dist/embedding/embedding_function.d.ts +9 -1
  8. package/dist/embedding/embedding_function.js +6 -0
  9. package/dist/embedding/openai.d.ts +6 -5
  10. package/dist/embedding/openai.js +4 -2
  11. package/dist/embedding/registry.d.ts +6 -11
  12. package/dist/index.d.ts +51 -3
  13. package/dist/index.js +28 -4
  14. package/dist/merge.d.ts +54 -0
  15. package/dist/merge.js +64 -0
  16. package/dist/native.d.ts +29 -3
  17. package/dist/native.js +26 -9
  18. package/dist/query.d.ts +33 -10
  19. package/dist/query.js +100 -13
  20. package/dist/remote/client.d.ts +28 -0
  21. package/dist/remote/client.js +172 -0
  22. package/dist/remote/connection.d.ts +25 -0
  23. package/dist/remote/connection.js +110 -0
  24. package/dist/remote/index.d.ts +3 -0
  25. package/dist/remote/index.js +9 -0
  26. package/dist/remote/table.d.ts +42 -0
  27. package/dist/remote/table.js +179 -0
  28. package/dist/sanitize.d.ts +3 -2
  29. package/dist/sanitize.js +55 -1
  30. package/dist/table.d.ts +105 -30
  31. package/dist/table.js +94 -237
  32. package/dist/util.d.ts +14 -0
  33. package/dist/util.js +65 -0
  34. package/examples/ann_indexes.ts +49 -0
  35. package/examples/basic.ts +149 -0
  36. package/examples/embedding.ts +83 -0
  37. package/examples/filtering.ts +34 -0
  38. package/examples/jsconfig.json +27 -0
  39. package/examples/package-lock.json +79 -0
  40. package/examples/package.json +18 -0
  41. package/examples/search.ts +37 -0
  42. package/lancedb/arrow.ts +80 -23
  43. package/lancedb/connection.ts +107 -92
  44. package/lancedb/embedding/embedding_function.ts +12 -1
  45. package/lancedb/embedding/openai.ts +11 -6
  46. package/lancedb/embedding/registry.ts +34 -22
  47. package/lancedb/index.ts +101 -2
  48. package/lancedb/merge.ts +70 -0
  49. package/lancedb/query.ts +114 -28
  50. package/lancedb/remote/client.ts +221 -0
  51. package/lancedb/remote/connection.ts +201 -0
  52. package/lancedb/remote/index.ts +3 -0
  53. package/lancedb/remote/table.ts +226 -0
  54. package/lancedb/sanitize.ts +73 -1
  55. package/lancedb/table.ts +320 -132
  56. package/lancedb/util.ts +69 -0
  57. package/native.d.ts +208 -0
  58. package/nodejs-artifacts/arrow.d.ts +41 -8
  59. package/nodejs-artifacts/arrow.js +4 -4
  60. package/nodejs-artifacts/connection.d.ts +49 -29
  61. package/nodejs-artifacts/connection.js +21 -73
  62. package/nodejs-artifacts/embedding/embedding_function.d.ts +9 -1
  63. package/nodejs-artifacts/embedding/embedding_function.js +6 -0
  64. package/nodejs-artifacts/embedding/openai.d.ts +6 -5
  65. package/nodejs-artifacts/embedding/openai.js +4 -2
  66. package/nodejs-artifacts/embedding/registry.d.ts +6 -11
  67. package/nodejs-artifacts/index.d.ts +51 -3
  68. package/nodejs-artifacts/index.js +28 -4
  69. package/nodejs-artifacts/merge.d.ts +54 -0
  70. package/nodejs-artifacts/merge.js +64 -0
  71. package/nodejs-artifacts/native.d.ts +29 -3
  72. package/nodejs-artifacts/native.js +26 -9
  73. package/nodejs-artifacts/query.d.ts +33 -10
  74. package/nodejs-artifacts/query.js +100 -13
  75. package/nodejs-artifacts/remote/client.d.ts +28 -0
  76. package/nodejs-artifacts/remote/client.js +172 -0
  77. package/nodejs-artifacts/remote/connection.d.ts +25 -0
  78. package/nodejs-artifacts/remote/connection.js +110 -0
  79. package/nodejs-artifacts/remote/index.d.ts +3 -0
  80. package/nodejs-artifacts/remote/index.js +9 -0
  81. package/nodejs-artifacts/remote/table.d.ts +42 -0
  82. package/nodejs-artifacts/remote/table.js +179 -0
  83. package/nodejs-artifacts/sanitize.d.ts +3 -2
  84. package/nodejs-artifacts/sanitize.js +55 -1
  85. package/nodejs-artifacts/table.d.ts +105 -30
  86. package/nodejs-artifacts/table.js +94 -237
  87. package/nodejs-artifacts/util.d.ts +14 -0
  88. package/nodejs-artifacts/util.js +65 -0
  89. package/package.json +25 -11
@@ -0,0 +1,149 @@
1
+ // --8<-- [start:imports]
2
+ import * as lancedb from "@lancedb/lancedb";
3
+ import * as arrow from "apache-arrow";
4
+ import { Field, FixedSizeList, Float16, Int32, Schema } from "apache-arrow";
5
+
6
+ // --8<-- [end:imports]
7
+
8
+ // --8<-- [start:connect]
9
+ const uri = "/tmp/lancedb/";
10
+ const db = await lancedb.connect(uri);
11
+ // --8<-- [end:connect]
12
+ {
13
+ // --8<-- [start:create_table]
14
+ const data = [
15
+ { vector: [3.1, 4.1], item: "foo", price: 10.0 },
16
+ { vector: [5.9, 26.5], item: "bar", price: 20.0 },
17
+ ];
18
+ const _tbl = await db.createTable("myTable", data);
19
+ // --8<-- [end:create_table]
20
+ {
21
+ // --8<-- [start:create_table_exists_ok]
22
+ const _tbl = await db.createTable("myTable", data, {
23
+ existsOk: true,
24
+ });
25
+ // --8<-- [end:create_table_exists_ok]
26
+ }
27
+ {
28
+ // --8<-- [start:create_table_overwrite]
29
+ const _tbl = await db.createTable("myTable", data, {
30
+ mode: "overwrite",
31
+ });
32
+ // --8<-- [end:create_table_overwrite]
33
+ }
34
+ }
35
+
36
+ {
37
+ // --8<-- [start:create_table_with_schema]
38
+ const schema = new arrow.Schema([
39
+ new arrow.Field(
40
+ "vector",
41
+ new arrow.FixedSizeList(
42
+ 2,
43
+ new arrow.Field("item", new arrow.Float32(), true),
44
+ ),
45
+ ),
46
+ new arrow.Field("item", new arrow.Utf8(), true),
47
+ new arrow.Field("price", new arrow.Float32(), true),
48
+ ]);
49
+ const data = [
50
+ { vector: [3.1, 4.1], item: "foo", price: 10.0 },
51
+ { vector: [5.9, 26.5], item: "bar", price: 20.0 },
52
+ ];
53
+ const _tbl = await db.createTable("myTable", data, {
54
+ schema,
55
+ });
56
+ // --8<-- [end:create_table_with_schema]
57
+ }
58
+
59
+ {
60
+ // --8<-- [start:create_empty_table]
61
+ const schema = new arrow.Schema([
62
+ new arrow.Field(
63
+ "vector",
64
+ new arrow.FixedSizeList(
65
+ 2,
66
+ new arrow.Field("item", new arrow.Float32(), true),
67
+ ),
68
+ ),
69
+ ]);
70
+ const _tbl = await db.createEmptyTable("empty_table", schema);
71
+ // --8<-- [end:create_empty_table]
72
+ }
73
+ {
74
+ // --8<-- [start:open_table]
75
+ const _tbl = await db.openTable("myTable");
76
+ // --8<-- [end:open_table]
77
+ }
78
+
79
+ {
80
+ // --8<-- [start:table_names]
81
+ const tableNames = await db.tableNames();
82
+ console.log(tableNames);
83
+ // --8<-- [end:table_names]
84
+ }
85
+
86
+ const tbl = await db.openTable("myTable");
87
+ {
88
+ // --8<-- [start:add_data]
89
+ const data = [
90
+ { vector: [1.3, 1.4], item: "fizz", price: 100.0 },
91
+ { vector: [9.5, 56.2], item: "buzz", price: 200.0 },
92
+ ];
93
+ await tbl.add(data);
94
+ // --8<-- [end:add_data]
95
+ }
96
+ {
97
+ // --8<-- [start:vector_search]
98
+ const _res = tbl.search([100, 100]).limit(2).toArray();
99
+ // --8<-- [end:vector_search]
100
+ }
101
+ {
102
+ const data = Array.from({ length: 1000 })
103
+ .fill(null)
104
+ .map(() => ({
105
+ vector: [Math.random(), Math.random()],
106
+ item: "autogen",
107
+ price: Math.round(Math.random() * 100),
108
+ }));
109
+
110
+ await tbl.add(data);
111
+ }
112
+
113
+ // --8<-- [start:create_index]
114
+ await tbl.createIndex("vector");
115
+ // --8<-- [end:create_index]
116
+
117
+ // --8<-- [start:delete_rows]
118
+ await tbl.delete('item = "fizz"');
119
+ // --8<-- [end:delete_rows]
120
+
121
+ // --8<-- [start:drop_table]
122
+ await db.dropTable("myTable");
123
+ // --8<-- [end:drop_table]
124
+ await db.dropTable("empty_table");
125
+
126
+ {
127
+ // --8<-- [start:create_f16_table]
128
+ const db = await lancedb.connect("/tmp/lancedb");
129
+ const dim = 16;
130
+ const total = 10;
131
+ const f16Schema = new Schema([
132
+ new Field("id", new Int32()),
133
+ new Field(
134
+ "vector",
135
+ new FixedSizeList(dim, new Field("item", new Float16(), true)),
136
+ false,
137
+ ),
138
+ ]);
139
+ const data = lancedb.makeArrowTable(
140
+ Array.from(Array(total), (_, i) => ({
141
+ id: i,
142
+ vector: Array.from(Array(dim), Math.random),
143
+ })),
144
+ { schema: f16Schema },
145
+ );
146
+ const _table = await db.createTable("f16_tbl", data);
147
+ // --8<-- [end:create_f16_table]
148
+ await db.dropTable("f16_tbl");
149
+ }
@@ -0,0 +1,83 @@
1
+ // --8<-- [start:imports]
2
+ import * as lancedb from "@lancedb/lancedb";
3
+ import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
4
+ import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
5
+ import { type Float, Float32, Utf8 } from "apache-arrow";
6
+ // --8<-- [end:imports]
7
+
8
+ {
9
+ // --8<-- [start:openai_embeddings]
10
+
11
+ const db = await lancedb.connect("/tmp/db");
12
+ const func = getRegistry()
13
+ .get("openai")
14
+ ?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
15
+
16
+ const wordsSchema = LanceSchema({
17
+ text: func.sourceField(new Utf8()),
18
+ vector: func.vectorField(),
19
+ });
20
+ const tbl = await db.createEmptyTable("words", wordsSchema, {
21
+ mode: "overwrite",
22
+ });
23
+ await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
24
+
25
+ const query = "greetings";
26
+ const actual = (await (await tbl.search(query)).limit(1).toArray())[0];
27
+
28
+ // --8<-- [end:openai_embeddings]
29
+ console.log("result = ", actual.text);
30
+ }
31
+
32
+ {
33
+ // --8<-- [start:embedding_function]
34
+ const db = await lancedb.connect("/tmp/db");
35
+
36
+ @register("my_embedding")
37
+ class MyEmbeddingFunction extends EmbeddingFunction<string> {
38
+ toJSON(): object {
39
+ return {};
40
+ }
41
+ ndims() {
42
+ return 3;
43
+ }
44
+ embeddingDataType(): Float {
45
+ return new Float32();
46
+ }
47
+ async computeQueryEmbeddings(_data: string) {
48
+ // This is a placeholder for a real embedding function
49
+ return [1, 2, 3];
50
+ }
51
+ async computeSourceEmbeddings(data: string[]) {
52
+ // This is a placeholder for a real embedding function
53
+ return Array.from({ length: data.length }).fill([1, 2, 3]) as number[][];
54
+ }
55
+ }
56
+
57
+ const func = new MyEmbeddingFunction();
58
+
59
+ const data = [{ text: "pepperoni" }, { text: "pineapple" }];
60
+
61
+ // Option 1: manually specify the embedding function
62
+ const table = await db.createTable("vectors", data, {
63
+ embeddingFunction: {
64
+ function: func,
65
+ sourceColumn: "text",
66
+ vectorColumn: "vector",
67
+ },
68
+ mode: "overwrite",
69
+ });
70
+
71
+ // Option 2: provide the embedding function through a schema
72
+
73
+ const schema = LanceSchema({
74
+ text: func.sourceField(new Utf8()),
75
+ vector: func.vectorField(),
76
+ });
77
+
78
+ const table2 = await db.createTable("vectors2", data, {
79
+ schema,
80
+ mode: "overwrite",
81
+ });
82
+ // --8<-- [end:embedding_function]
83
+ }
@@ -0,0 +1,34 @@
1
+ import * as lancedb from "@lancedb/lancedb";
2
+
3
+ const db = await lancedb.connect("data/sample-lancedb");
4
+
5
+ const data = Array.from({ length: 10_000 }, (_, i) => ({
6
+ vector: Array(1536).fill(i),
7
+ id: i,
8
+ item: `item ${i}`,
9
+ strId: `${i}`,
10
+ }));
11
+
12
+ const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
13
+
14
+ // --8<-- [start:search]
15
+ const _result = await tbl
16
+ .search(Array(1536).fill(0.5))
17
+ .limit(1)
18
+ .where("id = 10")
19
+ .toArray();
20
+ // --8<-- [end:search]
21
+
22
+ // --8<-- [start:vec_search]
23
+ await tbl
24
+ .search(Array(1536).fill(0))
25
+ .where("(item IN ('item 0', 'item 2')) AND (id > 10)")
26
+ .postfilter()
27
+ .toArray();
28
+ // --8<-- [end:vec_search]
29
+
30
+ // --8<-- [start:sql_search]
31
+ await tbl.query().where("id = 10").limit(10).toArray();
32
+ // --8<-- [end:sql_search]
33
+
34
+ console.log("SQL search: done");
@@ -0,0 +1,27 @@
1
+ {
2
+ "compilerOptions": {
3
+ // Enable latest features
4
+ "lib": ["ESNext", "DOM"],
5
+ "target": "ESNext",
6
+ "module": "ESNext",
7
+ "moduleDetection": "force",
8
+ "jsx": "react-jsx",
9
+ "allowJs": true,
10
+
11
+ // Bundler mode
12
+ "moduleResolution": "bundler",
13
+ "allowImportingTsExtensions": true,
14
+ "verbatimModuleSyntax": true,
15
+ "noEmit": true,
16
+
17
+ // Best practices
18
+ "strict": true,
19
+ "skipLibCheck": true,
20
+ "noFallthroughCasesInSwitch": true,
21
+
22
+ // Some stricter flags (disabled by default)
23
+ "noUnusedLocals": false,
24
+ "noUnusedParameters": false,
25
+ "noPropertyAccessFromIndexSignature": false
26
+ }
27
+ }
@@ -0,0 +1,79 @@
1
+ {
2
+ "name": "examples",
3
+ "version": "1.0.0",
4
+ "lockfileVersion": 3,
5
+ "requires": true,
6
+ "packages": {
7
+ "": {
8
+ "name": "examples",
9
+ "version": "1.0.0",
10
+ "license": "Apache-2.0",
11
+ "dependencies": {
12
+ "@lancedb/lancedb": "file:../"
13
+ },
14
+ "peerDependencies": {
15
+ "typescript": "^5.0.0"
16
+ }
17
+ },
18
+ "..": {
19
+ "name": "@lancedb/lancedb",
20
+ "version": "0.6.0",
21
+ "cpu": [
22
+ "x64",
23
+ "arm64"
24
+ ],
25
+ "license": "Apache 2.0",
26
+ "os": [
27
+ "darwin",
28
+ "linux",
29
+ "win32"
30
+ ],
31
+ "dependencies": {
32
+ "apache-arrow": "^15.0.0",
33
+ "axios": "^1.7.2",
34
+ "openai": "^4.29.2",
35
+ "reflect-metadata": "^0.2.2"
36
+ },
37
+ "devDependencies": {
38
+ "@aws-sdk/client-kms": "^3.33.0",
39
+ "@aws-sdk/client-s3": "^3.33.0",
40
+ "@biomejs/biome": "^1.7.3",
41
+ "@jest/globals": "^29.7.0",
42
+ "@napi-rs/cli": "^2.18.0",
43
+ "@types/axios": "^0.14.0",
44
+ "@types/jest": "^29.1.2",
45
+ "@types/tmp": "^0.2.6",
46
+ "apache-arrow-old": "npm:apache-arrow@13.0.0",
47
+ "eslint": "^8.57.0",
48
+ "jest": "^29.7.0",
49
+ "shx": "^0.3.4",
50
+ "tmp": "^0.2.3",
51
+ "ts-jest": "^29.1.2",
52
+ "typedoc": "^0.25.7",
53
+ "typedoc-plugin-markdown": "^3.17.1",
54
+ "typescript": "^5.3.3",
55
+ "typescript-eslint": "^7.1.0"
56
+ },
57
+ "engines": {
58
+ "node": ">= 18"
59
+ }
60
+ },
61
+ "node_modules/@lancedb/lancedb": {
62
+ "resolved": "..",
63
+ "link": true
64
+ },
65
+ "node_modules/typescript": {
66
+ "version": "5.5.2",
67
+ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.2.tgz",
68
+ "integrity": "sha512-NcRtPEOsPFFWjobJEtfihkLCZCXZt/os3zf8nTxjVH3RvTSxjrCamJpbExGvYOF+tFHc3pA65qpdwPbzjohhew==",
69
+ "peer": true,
70
+ "bin": {
71
+ "tsc": "bin/tsc",
72
+ "tsserver": "bin/tsserver"
73
+ },
74
+ "engines": {
75
+ "node": ">=14.17"
76
+ }
77
+ }
78
+ }
79
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ "name": "examples",
3
+ "version": "1.0.0",
4
+ "description": "Examples for LanceDB",
5
+ "main": "index.js",
6
+ "type": "module",
7
+ "scripts": {
8
+ "test": "echo \"Error: no test specified\" && exit 1"
9
+ },
10
+ "author": "Lance Devs",
11
+ "license": "Apache-2.0",
12
+ "dependencies": {
13
+ "@lancedb/lancedb": "file:../"
14
+ },
15
+ "peerDependencies": {
16
+ "typescript": "^5.0.0"
17
+ }
18
+ }
@@ -0,0 +1,37 @@
1
+ // --8<-- [end:import]
2
+ import * as fs from "node:fs";
3
+ // --8<-- [start:import]
4
+ import * as lancedb from "@lancedb/lancedb";
5
+
6
+ async function setup() {
7
+ fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
8
+ const db = await lancedb.connect("data/sample-lancedb");
9
+
10
+ const data = Array.from({ length: 10_000 }, (_, i) => ({
11
+ vector: Array(1536).fill(i),
12
+ id: `${i}`,
13
+ content: "",
14
+ longId: `${i}`,
15
+ }));
16
+
17
+ await db.createTable("my_vectors", data);
18
+ }
19
+
20
+ await setup();
21
+
22
+ // --8<-- [start:search1]
23
+ const db = await lancedb.connect("data/sample-lancedb");
24
+ const tbl = await db.openTable("my_vectors");
25
+
26
+ const _results1 = await tbl.search(Array(1536).fill(1.2)).limit(10).toArray();
27
+ // --8<-- [end:search1]
28
+
29
+ // --8<-- [start:search2]
30
+ const _results2 = await tbl
31
+ .search(Array(1536).fill(1.2))
32
+ .distanceType("cosine")
33
+ .limit(10)
34
+ .toArray();
35
+ // --8<-- [end:search2]
36
+
37
+ console.log("search: done");
package/lancedb/arrow.ts CHANGED
@@ -15,6 +15,7 @@
15
15
  import {
16
16
  Table as ArrowTable,
17
17
  Binary,
18
+ BufferType,
18
19
  DataType,
19
20
  Field,
20
21
  FixedSizeBinary,
@@ -37,14 +38,72 @@ import {
37
38
  type makeTable,
38
39
  vectorFromArray,
39
40
  } from "apache-arrow";
41
+ import { Buffers } from "apache-arrow/data";
40
42
  import { type EmbeddingFunction } from "./embedding/embedding_function";
41
43
  import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
42
- import { sanitizeField, sanitizeSchema, sanitizeType } from "./sanitize";
44
+ import {
45
+ sanitizeField,
46
+ sanitizeSchema,
47
+ sanitizeTable,
48
+ sanitizeType,
49
+ } from "./sanitize";
43
50
  export * from "apache-arrow";
44
-
45
- export type IntoVector = Float32Array | Float64Array | number[];
46
-
47
- export function isArrowTable(value: object): value is ArrowTable {
51
+ export type SchemaLike =
52
+ | Schema
53
+ | {
54
+ fields: FieldLike[];
55
+ metadata: Map<string, string>;
56
+ get names(): unknown[];
57
+ };
58
+ export type FieldLike =
59
+ | Field
60
+ | {
61
+ type: string;
62
+ name: string;
63
+ nullable?: boolean;
64
+ metadata?: Map<string, string>;
65
+ };
66
+
67
+ export type DataLike =
68
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
69
+ | import("apache-arrow").Data<Struct<any>>
70
+ | {
71
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
72
+ type: any;
73
+ length: number;
74
+ offset: number;
75
+ stride: number;
76
+ nullable: boolean;
77
+ children: DataLike[];
78
+ get nullCount(): number;
79
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
80
+ values: Buffers<any>[BufferType.DATA];
81
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
82
+ typeIds: Buffers<any>[BufferType.TYPE];
83
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
84
+ nullBitmap: Buffers<any>[BufferType.VALIDITY];
85
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
86
+ valueOffsets: Buffers<any>[BufferType.OFFSET];
87
+ };
88
+
89
+ export type RecordBatchLike =
90
+ | RecordBatch
91
+ | {
92
+ schema: SchemaLike;
93
+ data: DataLike;
94
+ };
95
+
96
+ export type TableLike =
97
+ | ArrowTable
98
+ | { schema: SchemaLike; batches: RecordBatchLike[] };
99
+
100
+ export type IntoVector =
101
+ | Float32Array
102
+ | Float64Array
103
+ | number[]
104
+ | Promise<Float32Array | Float64Array | number[]>;
105
+
106
+ export function isArrowTable(value: object): value is TableLike {
48
107
  if (value instanceof ArrowTable) return true;
49
108
  return "schema" in value && "batches" in value;
50
109
  }
@@ -135,7 +194,7 @@ export function isFixedSizeList(value: unknown): value is FixedSizeList {
135
194
  }
136
195
 
137
196
  /** Data type accepted by NodeJS SDK */
138
- export type Data = Record<string, unknown>[] | ArrowTable;
197
+ export type Data = Record<string, unknown>[] | TableLike;
139
198
 
140
199
  /*
141
200
  * Options to control how a column should be converted to a vector array
@@ -162,7 +221,7 @@ export class MakeArrowTableOptions {
162
221
  * The schema must be specified if there are no records (e.g. to make
163
222
  * an empty table)
164
223
  */
165
- schema?: Schema;
224
+ schema?: SchemaLike;
166
225
 
167
226
  /*
168
227
  * Mapping from vector column name to expected type
@@ -310,7 +369,7 @@ export function makeArrowTable(
310
369
  if (opt.schema !== undefined && opt.schema !== null) {
311
370
  opt.schema = sanitizeSchema(opt.schema);
312
371
  opt.schema = validateSchemaEmbeddings(
313
- opt.schema,
372
+ opt.schema as Schema,
314
373
  data,
315
374
  options?.embeddingFunction,
316
375
  );
@@ -394,7 +453,7 @@ export function makeArrowTable(
394
453
  // `new ArrowTable(schema, batches)` which does not do any schema inference
395
454
  const firstTable = new ArrowTable(columns);
396
455
  const batchesFixed = firstTable.batches.map(
397
- (batch) => new RecordBatch(opt.schema!, batch.data),
456
+ (batch) => new RecordBatch(opt.schema as Schema, batch.data),
398
457
  );
399
458
  let schema: Schema;
400
459
  if (metadata !== undefined) {
@@ -407,9 +466,9 @@ export function makeArrowTable(
407
466
  }
408
467
  }
409
468
 
410
- schema = new Schema(opt.schema.fields, schemaMetadata);
469
+ schema = new Schema(opt.schema.fields as Field[], schemaMetadata);
411
470
  } else {
412
- schema = opt.schema;
471
+ schema = opt.schema as Schema;
413
472
  }
414
473
  return new ArrowTable(schema, batchesFixed);
415
474
  }
@@ -425,7 +484,7 @@ export function makeArrowTable(
425
484
  * Create an empty Arrow table with the provided schema
426
485
  */
427
486
  export function makeEmptyTable(
428
- schema: Schema,
487
+ schema: SchemaLike,
429
488
  metadata?: Map<string, string>,
430
489
  ): ArrowTable {
431
490
  return makeArrowTable([], { schema }, metadata);
@@ -563,18 +622,17 @@ async function applyEmbeddingsFromMetadata(
563
622
  async function applyEmbeddings<T>(
564
623
  table: ArrowTable,
565
624
  embeddings?: EmbeddingFunctionConfig,
566
- schema?: Schema,
625
+ schema?: SchemaLike,
567
626
  ): Promise<ArrowTable> {
627
+ if (schema !== undefined && schema !== null) {
628
+ schema = sanitizeSchema(schema);
629
+ }
568
630
  if (schema?.metadata.has("embedding_functions")) {
569
- return applyEmbeddingsFromMetadata(table, schema!);
631
+ return applyEmbeddingsFromMetadata(table, schema! as Schema);
570
632
  } else if (embeddings == null || embeddings === undefined) {
571
633
  return table;
572
634
  }
573
635
 
574
- if (schema !== undefined && schema !== null) {
575
- schema = sanitizeSchema(schema);
576
- }
577
-
578
636
  // Convert from ArrowTable to Record<String, Vector>
579
637
  const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
580
638
  const name = table.schema.fields[idx].name;
@@ -650,7 +708,7 @@ async function applyEmbeddings<T>(
650
708
  `When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`,
651
709
  );
652
710
  }
653
- return alignTable(newTable, schema);
711
+ return alignTable(newTable, schema as Schema);
654
712
  }
655
713
  return newTable;
656
714
  }
@@ -744,7 +802,7 @@ export async function fromRecordsToStreamBuffer(
744
802
  export async function fromTableToBuffer(
745
803
  table: ArrowTable,
746
804
  embeddings?: EmbeddingFunctionConfig,
747
- schema?: Schema,
805
+ schema?: SchemaLike,
748
806
  ): Promise<Buffer> {
749
807
  if (schema !== undefined && schema !== null) {
750
808
  schema = sanitizeSchema(schema);
@@ -771,7 +829,7 @@ export async function fromDataToBuffer(
771
829
  schema = sanitizeSchema(schema);
772
830
  }
773
831
  if (isArrowTable(data)) {
774
- return fromTableToBuffer(data, embeddings, schema);
832
+ return fromTableToBuffer(sanitizeTable(data), embeddings, schema);
775
833
  } else {
776
834
  const table = await convertToTable(data, embeddings, { schema });
777
835
  return fromTableToBuffer(table);
@@ -789,7 +847,7 @@ export async function fromDataToBuffer(
789
847
  export async function fromTableToStreamBuffer(
790
848
  table: ArrowTable,
791
849
  embeddings?: EmbeddingFunctionConfig,
792
- schema?: Schema,
850
+ schema?: SchemaLike,
793
851
  ): Promise<Buffer> {
794
852
  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
795
853
  const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
@@ -854,7 +912,6 @@ function validateSchemaEmbeddings(
854
912
  for (let field of schema.fields) {
855
913
  if (isFixedSizeList(field.type)) {
856
914
  field = sanitizeField(field);
857
-
858
915
  if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
859
916
  if (schema.metadata.has("embedding_functions")) {
860
917
  const embeddings = JSON.parse(