@lancedb/lancedb 0.5.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +3 -3
- package/biome.json +19 -3
- package/dist/arrow.d.ts +41 -8
- package/dist/arrow.js +4 -4
- package/dist/connection.d.ts +49 -29
- package/dist/connection.js +21 -73
- package/dist/embedding/embedding_function.d.ts +9 -1
- package/dist/embedding/embedding_function.js +6 -0
- package/dist/embedding/openai.d.ts +6 -5
- package/dist/embedding/openai.js +4 -2
- package/dist/embedding/registry.d.ts +6 -11
- package/dist/index.d.ts +51 -3
- package/dist/index.js +28 -4
- package/dist/merge.d.ts +54 -0
- package/dist/merge.js +64 -0
- package/dist/native.d.ts +29 -3
- package/dist/native.js +26 -9
- package/dist/query.d.ts +33 -10
- package/dist/query.js +100 -13
- package/dist/remote/client.d.ts +28 -0
- package/dist/remote/client.js +172 -0
- package/dist/remote/connection.d.ts +25 -0
- package/dist/remote/connection.js +110 -0
- package/dist/remote/index.d.ts +3 -0
- package/dist/remote/index.js +9 -0
- package/dist/remote/table.d.ts +42 -0
- package/dist/remote/table.js +179 -0
- package/dist/sanitize.d.ts +3 -2
- package/dist/sanitize.js +55 -1
- package/dist/table.d.ts +105 -30
- package/dist/table.js +94 -237
- package/dist/util.d.ts +14 -0
- package/dist/util.js +65 -0
- package/examples/ann_indexes.ts +49 -0
- package/examples/basic.ts +149 -0
- package/examples/embedding.ts +83 -0
- package/examples/filtering.ts +34 -0
- package/examples/jsconfig.json +27 -0
- package/examples/package-lock.json +79 -0
- package/examples/package.json +18 -0
- package/examples/search.ts +37 -0
- package/lancedb/arrow.ts +80 -23
- package/lancedb/connection.ts +107 -92
- package/lancedb/embedding/embedding_function.ts +12 -1
- package/lancedb/embedding/openai.ts +11 -6
- package/lancedb/embedding/registry.ts +34 -22
- package/lancedb/index.ts +101 -2
- package/lancedb/merge.ts +70 -0
- package/lancedb/query.ts +114 -28
- package/lancedb/remote/client.ts +221 -0
- package/lancedb/remote/connection.ts +201 -0
- package/lancedb/remote/index.ts +3 -0
- package/lancedb/remote/table.ts +226 -0
- package/lancedb/sanitize.ts +73 -1
- package/lancedb/table.ts +320 -132
- package/lancedb/util.ts +69 -0
- package/native.d.ts +208 -0
- package/nodejs-artifacts/arrow.d.ts +41 -8
- package/nodejs-artifacts/arrow.js +4 -4
- package/nodejs-artifacts/connection.d.ts +49 -29
- package/nodejs-artifacts/connection.js +21 -73
- package/nodejs-artifacts/embedding/embedding_function.d.ts +9 -1
- package/nodejs-artifacts/embedding/embedding_function.js +6 -0
- package/nodejs-artifacts/embedding/openai.d.ts +6 -5
- package/nodejs-artifacts/embedding/openai.js +4 -2
- package/nodejs-artifacts/embedding/registry.d.ts +6 -11
- package/nodejs-artifacts/index.d.ts +51 -3
- package/nodejs-artifacts/index.js +28 -4
- package/nodejs-artifacts/merge.d.ts +54 -0
- package/nodejs-artifacts/merge.js +64 -0
- package/nodejs-artifacts/native.d.ts +29 -3
- package/nodejs-artifacts/native.js +26 -9
- package/nodejs-artifacts/query.d.ts +33 -10
- package/nodejs-artifacts/query.js +100 -13
- package/nodejs-artifacts/remote/client.d.ts +28 -0
- package/nodejs-artifacts/remote/client.js +172 -0
- package/nodejs-artifacts/remote/connection.d.ts +25 -0
- package/nodejs-artifacts/remote/connection.js +110 -0
- package/nodejs-artifacts/remote/index.d.ts +3 -0
- package/nodejs-artifacts/remote/index.js +9 -0
- package/nodejs-artifacts/remote/table.d.ts +42 -0
- package/nodejs-artifacts/remote/table.js +179 -0
- package/nodejs-artifacts/sanitize.d.ts +3 -2
- package/nodejs-artifacts/sanitize.js +55 -1
- package/nodejs-artifacts/table.d.ts +105 -30
- package/nodejs-artifacts/table.js +94 -237
- package/nodejs-artifacts/util.d.ts +14 -0
- package/nodejs-artifacts/util.js +65 -0
- package/package.json +25 -11
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// --8<-- [start:imports]
|
|
2
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
3
|
+
import * as arrow from "apache-arrow";
|
|
4
|
+
import { Field, FixedSizeList, Float16, Int32, Schema } from "apache-arrow";
|
|
5
|
+
|
|
6
|
+
// --8<-- [end:imports]
|
|
7
|
+
|
|
8
|
+
// --8<-- [start:connect]
|
|
9
|
+
const uri = "/tmp/lancedb/";
|
|
10
|
+
const db = await lancedb.connect(uri);
|
|
11
|
+
// --8<-- [end:connect]
|
|
12
|
+
{
|
|
13
|
+
// --8<-- [start:create_table]
|
|
14
|
+
const data = [
|
|
15
|
+
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
|
16
|
+
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
|
17
|
+
];
|
|
18
|
+
const _tbl = await db.createTable("myTable", data);
|
|
19
|
+
// --8<-- [end:create_table]
|
|
20
|
+
{
|
|
21
|
+
// --8<-- [start:create_table_exists_ok]
|
|
22
|
+
const _tbl = await db.createTable("myTable", data, {
|
|
23
|
+
existsOk: true,
|
|
24
|
+
});
|
|
25
|
+
// --8<-- [end:create_table_exists_ok]
|
|
26
|
+
}
|
|
27
|
+
{
|
|
28
|
+
// --8<-- [start:create_table_overwrite]
|
|
29
|
+
const _tbl = await db.createTable("myTable", data, {
|
|
30
|
+
mode: "overwrite",
|
|
31
|
+
});
|
|
32
|
+
// --8<-- [end:create_table_overwrite]
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
{
|
|
37
|
+
// --8<-- [start:create_table_with_schema]
|
|
38
|
+
const schema = new arrow.Schema([
|
|
39
|
+
new arrow.Field(
|
|
40
|
+
"vector",
|
|
41
|
+
new arrow.FixedSizeList(
|
|
42
|
+
2,
|
|
43
|
+
new arrow.Field("item", new arrow.Float32(), true),
|
|
44
|
+
),
|
|
45
|
+
),
|
|
46
|
+
new arrow.Field("item", new arrow.Utf8(), true),
|
|
47
|
+
new arrow.Field("price", new arrow.Float32(), true),
|
|
48
|
+
]);
|
|
49
|
+
const data = [
|
|
50
|
+
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
|
51
|
+
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
|
52
|
+
];
|
|
53
|
+
const _tbl = await db.createTable("myTable", data, {
|
|
54
|
+
schema,
|
|
55
|
+
});
|
|
56
|
+
// --8<-- [end:create_table_with_schema]
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
{
|
|
60
|
+
// --8<-- [start:create_empty_table]
|
|
61
|
+
const schema = new arrow.Schema([
|
|
62
|
+
new arrow.Field(
|
|
63
|
+
"vector",
|
|
64
|
+
new arrow.FixedSizeList(
|
|
65
|
+
2,
|
|
66
|
+
new arrow.Field("item", new arrow.Float32(), true),
|
|
67
|
+
),
|
|
68
|
+
),
|
|
69
|
+
]);
|
|
70
|
+
const _tbl = await db.createEmptyTable("empty_table", schema);
|
|
71
|
+
// --8<-- [end:create_empty_table]
|
|
72
|
+
}
|
|
73
|
+
{
|
|
74
|
+
// --8<-- [start:open_table]
|
|
75
|
+
const _tbl = await db.openTable("myTable");
|
|
76
|
+
// --8<-- [end:open_table]
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
{
|
|
80
|
+
// --8<-- [start:table_names]
|
|
81
|
+
const tableNames = await db.tableNames();
|
|
82
|
+
console.log(tableNames);
|
|
83
|
+
// --8<-- [end:table_names]
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const tbl = await db.openTable("myTable");
|
|
87
|
+
{
|
|
88
|
+
// --8<-- [start:add_data]
|
|
89
|
+
const data = [
|
|
90
|
+
{ vector: [1.3, 1.4], item: "fizz", price: 100.0 },
|
|
91
|
+
{ vector: [9.5, 56.2], item: "buzz", price: 200.0 },
|
|
92
|
+
];
|
|
93
|
+
await tbl.add(data);
|
|
94
|
+
// --8<-- [end:add_data]
|
|
95
|
+
}
|
|
96
|
+
{
|
|
97
|
+
// --8<-- [start:vector_search]
|
|
98
|
+
const _res = tbl.search([100, 100]).limit(2).toArray();
|
|
99
|
+
// --8<-- [end:vector_search]
|
|
100
|
+
}
|
|
101
|
+
{
|
|
102
|
+
const data = Array.from({ length: 1000 })
|
|
103
|
+
.fill(null)
|
|
104
|
+
.map(() => ({
|
|
105
|
+
vector: [Math.random(), Math.random()],
|
|
106
|
+
item: "autogen",
|
|
107
|
+
price: Math.round(Math.random() * 100),
|
|
108
|
+
}));
|
|
109
|
+
|
|
110
|
+
await tbl.add(data);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// --8<-- [start:create_index]
|
|
114
|
+
await tbl.createIndex("vector");
|
|
115
|
+
// --8<-- [end:create_index]
|
|
116
|
+
|
|
117
|
+
// --8<-- [start:delete_rows]
|
|
118
|
+
await tbl.delete('item = "fizz"');
|
|
119
|
+
// --8<-- [end:delete_rows]
|
|
120
|
+
|
|
121
|
+
// --8<-- [start:drop_table]
|
|
122
|
+
await db.dropTable("myTable");
|
|
123
|
+
// --8<-- [end:drop_table]
|
|
124
|
+
await db.dropTable("empty_table");
|
|
125
|
+
|
|
126
|
+
{
|
|
127
|
+
// --8<-- [start:create_f16_table]
|
|
128
|
+
const db = await lancedb.connect("/tmp/lancedb");
|
|
129
|
+
const dim = 16;
|
|
130
|
+
const total = 10;
|
|
131
|
+
const f16Schema = new Schema([
|
|
132
|
+
new Field("id", new Int32()),
|
|
133
|
+
new Field(
|
|
134
|
+
"vector",
|
|
135
|
+
new FixedSizeList(dim, new Field("item", new Float16(), true)),
|
|
136
|
+
false,
|
|
137
|
+
),
|
|
138
|
+
]);
|
|
139
|
+
const data = lancedb.makeArrowTable(
|
|
140
|
+
Array.from(Array(total), (_, i) => ({
|
|
141
|
+
id: i,
|
|
142
|
+
vector: Array.from(Array(dim), Math.random),
|
|
143
|
+
})),
|
|
144
|
+
{ schema: f16Schema },
|
|
145
|
+
);
|
|
146
|
+
const _table = await db.createTable("f16_tbl", data);
|
|
147
|
+
// --8<-- [end:create_f16_table]
|
|
148
|
+
await db.dropTable("f16_tbl");
|
|
149
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// --8<-- [start:imports]
|
|
2
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
3
|
+
import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
|
|
4
|
+
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
|
|
5
|
+
import { type Float, Float32, Utf8 } from "apache-arrow";
|
|
6
|
+
// --8<-- [end:imports]
|
|
7
|
+
|
|
8
|
+
{
|
|
9
|
+
// --8<-- [start:openai_embeddings]
|
|
10
|
+
|
|
11
|
+
const db = await lancedb.connect("/tmp/db");
|
|
12
|
+
const func = getRegistry()
|
|
13
|
+
.get("openai")
|
|
14
|
+
?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
|
|
15
|
+
|
|
16
|
+
const wordsSchema = LanceSchema({
|
|
17
|
+
text: func.sourceField(new Utf8()),
|
|
18
|
+
vector: func.vectorField(),
|
|
19
|
+
});
|
|
20
|
+
const tbl = await db.createEmptyTable("words", wordsSchema, {
|
|
21
|
+
mode: "overwrite",
|
|
22
|
+
});
|
|
23
|
+
await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
|
|
24
|
+
|
|
25
|
+
const query = "greetings";
|
|
26
|
+
const actual = (await (await tbl.search(query)).limit(1).toArray())[0];
|
|
27
|
+
|
|
28
|
+
// --8<-- [end:openai_embeddings]
|
|
29
|
+
console.log("result = ", actual.text);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
{
|
|
33
|
+
// --8<-- [start:embedding_function]
|
|
34
|
+
const db = await lancedb.connect("/tmp/db");
|
|
35
|
+
|
|
36
|
+
@register("my_embedding")
|
|
37
|
+
class MyEmbeddingFunction extends EmbeddingFunction<string> {
|
|
38
|
+
toJSON(): object {
|
|
39
|
+
return {};
|
|
40
|
+
}
|
|
41
|
+
ndims() {
|
|
42
|
+
return 3;
|
|
43
|
+
}
|
|
44
|
+
embeddingDataType(): Float {
|
|
45
|
+
return new Float32();
|
|
46
|
+
}
|
|
47
|
+
async computeQueryEmbeddings(_data: string) {
|
|
48
|
+
// This is a placeholder for a real embedding function
|
|
49
|
+
return [1, 2, 3];
|
|
50
|
+
}
|
|
51
|
+
async computeSourceEmbeddings(data: string[]) {
|
|
52
|
+
// This is a placeholder for a real embedding function
|
|
53
|
+
return Array.from({ length: data.length }).fill([1, 2, 3]) as number[][];
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const func = new MyEmbeddingFunction();
|
|
58
|
+
|
|
59
|
+
const data = [{ text: "pepperoni" }, { text: "pineapple" }];
|
|
60
|
+
|
|
61
|
+
// Option 1: manually specify the embedding function
|
|
62
|
+
const table = await db.createTable("vectors", data, {
|
|
63
|
+
embeddingFunction: {
|
|
64
|
+
function: func,
|
|
65
|
+
sourceColumn: "text",
|
|
66
|
+
vectorColumn: "vector",
|
|
67
|
+
},
|
|
68
|
+
mode: "overwrite",
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// Option 2: provide the embedding function through a schema
|
|
72
|
+
|
|
73
|
+
const schema = LanceSchema({
|
|
74
|
+
text: func.sourceField(new Utf8()),
|
|
75
|
+
vector: func.vectorField(),
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
const table2 = await db.createTable("vectors2", data, {
|
|
79
|
+
schema,
|
|
80
|
+
mode: "overwrite",
|
|
81
|
+
});
|
|
82
|
+
// --8<-- [end:embedding_function]
|
|
83
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
2
|
+
|
|
3
|
+
const db = await lancedb.connect("data/sample-lancedb");
|
|
4
|
+
|
|
5
|
+
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
|
6
|
+
vector: Array(1536).fill(i),
|
|
7
|
+
id: i,
|
|
8
|
+
item: `item ${i}`,
|
|
9
|
+
strId: `${i}`,
|
|
10
|
+
}));
|
|
11
|
+
|
|
12
|
+
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
|
|
13
|
+
|
|
14
|
+
// --8<-- [start:search]
|
|
15
|
+
const _result = await tbl
|
|
16
|
+
.search(Array(1536).fill(0.5))
|
|
17
|
+
.limit(1)
|
|
18
|
+
.where("id = 10")
|
|
19
|
+
.toArray();
|
|
20
|
+
// --8<-- [end:search]
|
|
21
|
+
|
|
22
|
+
// --8<-- [start:vec_search]
|
|
23
|
+
await tbl
|
|
24
|
+
.search(Array(1536).fill(0))
|
|
25
|
+
.where("(item IN ('item 0', 'item 2')) AND (id > 10)")
|
|
26
|
+
.postfilter()
|
|
27
|
+
.toArray();
|
|
28
|
+
// --8<-- [end:vec_search]
|
|
29
|
+
|
|
30
|
+
// --8<-- [start:sql_search]
|
|
31
|
+
await tbl.query().where("id = 10").limit(10).toArray();
|
|
32
|
+
// --8<-- [end:sql_search]
|
|
33
|
+
|
|
34
|
+
console.log("SQL search: done");
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
// Enable latest features
|
|
4
|
+
"lib": ["ESNext", "DOM"],
|
|
5
|
+
"target": "ESNext",
|
|
6
|
+
"module": "ESNext",
|
|
7
|
+
"moduleDetection": "force",
|
|
8
|
+
"jsx": "react-jsx",
|
|
9
|
+
"allowJs": true,
|
|
10
|
+
|
|
11
|
+
// Bundler mode
|
|
12
|
+
"moduleResolution": "bundler",
|
|
13
|
+
"allowImportingTsExtensions": true,
|
|
14
|
+
"verbatimModuleSyntax": true,
|
|
15
|
+
"noEmit": true,
|
|
16
|
+
|
|
17
|
+
// Best practices
|
|
18
|
+
"strict": true,
|
|
19
|
+
"skipLibCheck": true,
|
|
20
|
+
"noFallthroughCasesInSwitch": true,
|
|
21
|
+
|
|
22
|
+
// Some stricter flags (disabled by default)
|
|
23
|
+
"noUnusedLocals": false,
|
|
24
|
+
"noUnusedParameters": false,
|
|
25
|
+
"noPropertyAccessFromIndexSignature": false
|
|
26
|
+
}
|
|
27
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "examples",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"lockfileVersion": 3,
|
|
5
|
+
"requires": true,
|
|
6
|
+
"packages": {
|
|
7
|
+
"": {
|
|
8
|
+
"name": "examples",
|
|
9
|
+
"version": "1.0.0",
|
|
10
|
+
"license": "Apache-2.0",
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@lancedb/lancedb": "file:../"
|
|
13
|
+
},
|
|
14
|
+
"peerDependencies": {
|
|
15
|
+
"typescript": "^5.0.0"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"..": {
|
|
19
|
+
"name": "@lancedb/lancedb",
|
|
20
|
+
"version": "0.6.0",
|
|
21
|
+
"cpu": [
|
|
22
|
+
"x64",
|
|
23
|
+
"arm64"
|
|
24
|
+
],
|
|
25
|
+
"license": "Apache 2.0",
|
|
26
|
+
"os": [
|
|
27
|
+
"darwin",
|
|
28
|
+
"linux",
|
|
29
|
+
"win32"
|
|
30
|
+
],
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"apache-arrow": "^15.0.0",
|
|
33
|
+
"axios": "^1.7.2",
|
|
34
|
+
"openai": "^4.29.2",
|
|
35
|
+
"reflect-metadata": "^0.2.2"
|
|
36
|
+
},
|
|
37
|
+
"devDependencies": {
|
|
38
|
+
"@aws-sdk/client-kms": "^3.33.0",
|
|
39
|
+
"@aws-sdk/client-s3": "^3.33.0",
|
|
40
|
+
"@biomejs/biome": "^1.7.3",
|
|
41
|
+
"@jest/globals": "^29.7.0",
|
|
42
|
+
"@napi-rs/cli": "^2.18.0",
|
|
43
|
+
"@types/axios": "^0.14.0",
|
|
44
|
+
"@types/jest": "^29.1.2",
|
|
45
|
+
"@types/tmp": "^0.2.6",
|
|
46
|
+
"apache-arrow-old": "npm:apache-arrow@13.0.0",
|
|
47
|
+
"eslint": "^8.57.0",
|
|
48
|
+
"jest": "^29.7.0",
|
|
49
|
+
"shx": "^0.3.4",
|
|
50
|
+
"tmp": "^0.2.3",
|
|
51
|
+
"ts-jest": "^29.1.2",
|
|
52
|
+
"typedoc": "^0.25.7",
|
|
53
|
+
"typedoc-plugin-markdown": "^3.17.1",
|
|
54
|
+
"typescript": "^5.3.3",
|
|
55
|
+
"typescript-eslint": "^7.1.0"
|
|
56
|
+
},
|
|
57
|
+
"engines": {
|
|
58
|
+
"node": ">= 18"
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"node_modules/@lancedb/lancedb": {
|
|
62
|
+
"resolved": "..",
|
|
63
|
+
"link": true
|
|
64
|
+
},
|
|
65
|
+
"node_modules/typescript": {
|
|
66
|
+
"version": "5.5.2",
|
|
67
|
+
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.2.tgz",
|
|
68
|
+
"integrity": "sha512-NcRtPEOsPFFWjobJEtfihkLCZCXZt/os3zf8nTxjVH3RvTSxjrCamJpbExGvYOF+tFHc3pA65qpdwPbzjohhew==",
|
|
69
|
+
"peer": true,
|
|
70
|
+
"bin": {
|
|
71
|
+
"tsc": "bin/tsc",
|
|
72
|
+
"tsserver": "bin/tsserver"
|
|
73
|
+
},
|
|
74
|
+
"engines": {
|
|
75
|
+
"node": ">=14.17"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "examples",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Examples for LanceDB",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
9
|
+
},
|
|
10
|
+
"author": "Lance Devs",
|
|
11
|
+
"license": "Apache-2.0",
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"@lancedb/lancedb": "file:../"
|
|
14
|
+
},
|
|
15
|
+
"peerDependencies": {
|
|
16
|
+
"typescript": "^5.0.0"
|
|
17
|
+
}
|
|
18
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
// --8<-- [end:import]
|
|
2
|
+
import * as fs from "node:fs";
|
|
3
|
+
// --8<-- [start:import]
|
|
4
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
5
|
+
|
|
6
|
+
async function setup() {
|
|
7
|
+
fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
|
|
8
|
+
const db = await lancedb.connect("data/sample-lancedb");
|
|
9
|
+
|
|
10
|
+
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
|
11
|
+
vector: Array(1536).fill(i),
|
|
12
|
+
id: `${i}`,
|
|
13
|
+
content: "",
|
|
14
|
+
longId: `${i}`,
|
|
15
|
+
}));
|
|
16
|
+
|
|
17
|
+
await db.createTable("my_vectors", data);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
await setup();
|
|
21
|
+
|
|
22
|
+
// --8<-- [start:search1]
|
|
23
|
+
const db = await lancedb.connect("data/sample-lancedb");
|
|
24
|
+
const tbl = await db.openTable("my_vectors");
|
|
25
|
+
|
|
26
|
+
const _results1 = await tbl.search(Array(1536).fill(1.2)).limit(10).toArray();
|
|
27
|
+
// --8<-- [end:search1]
|
|
28
|
+
|
|
29
|
+
// --8<-- [start:search2]
|
|
30
|
+
const _results2 = await tbl
|
|
31
|
+
.search(Array(1536).fill(1.2))
|
|
32
|
+
.distanceType("cosine")
|
|
33
|
+
.limit(10)
|
|
34
|
+
.toArray();
|
|
35
|
+
// --8<-- [end:search2]
|
|
36
|
+
|
|
37
|
+
console.log("search: done");
|
package/lancedb/arrow.ts
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import {
|
|
16
16
|
Table as ArrowTable,
|
|
17
17
|
Binary,
|
|
18
|
+
BufferType,
|
|
18
19
|
DataType,
|
|
19
20
|
Field,
|
|
20
21
|
FixedSizeBinary,
|
|
@@ -37,14 +38,72 @@ import {
|
|
|
37
38
|
type makeTable,
|
|
38
39
|
vectorFromArray,
|
|
39
40
|
} from "apache-arrow";
|
|
41
|
+
import { Buffers } from "apache-arrow/data";
|
|
40
42
|
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
|
41
43
|
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
|
42
|
-
import {
|
|
44
|
+
import {
|
|
45
|
+
sanitizeField,
|
|
46
|
+
sanitizeSchema,
|
|
47
|
+
sanitizeTable,
|
|
48
|
+
sanitizeType,
|
|
49
|
+
} from "./sanitize";
|
|
43
50
|
export * from "apache-arrow";
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
51
|
+
export type SchemaLike =
|
|
52
|
+
| Schema
|
|
53
|
+
| {
|
|
54
|
+
fields: FieldLike[];
|
|
55
|
+
metadata: Map<string, string>;
|
|
56
|
+
get names(): unknown[];
|
|
57
|
+
};
|
|
58
|
+
export type FieldLike =
|
|
59
|
+
| Field
|
|
60
|
+
| {
|
|
61
|
+
type: string;
|
|
62
|
+
name: string;
|
|
63
|
+
nullable?: boolean;
|
|
64
|
+
metadata?: Map<string, string>;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
export type DataLike =
|
|
68
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
69
|
+
| import("apache-arrow").Data<Struct<any>>
|
|
70
|
+
| {
|
|
71
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
72
|
+
type: any;
|
|
73
|
+
length: number;
|
|
74
|
+
offset: number;
|
|
75
|
+
stride: number;
|
|
76
|
+
nullable: boolean;
|
|
77
|
+
children: DataLike[];
|
|
78
|
+
get nullCount(): number;
|
|
79
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
80
|
+
values: Buffers<any>[BufferType.DATA];
|
|
81
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
82
|
+
typeIds: Buffers<any>[BufferType.TYPE];
|
|
83
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
84
|
+
nullBitmap: Buffers<any>[BufferType.VALIDITY];
|
|
85
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
86
|
+
valueOffsets: Buffers<any>[BufferType.OFFSET];
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
export type RecordBatchLike =
|
|
90
|
+
| RecordBatch
|
|
91
|
+
| {
|
|
92
|
+
schema: SchemaLike;
|
|
93
|
+
data: DataLike;
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
export type TableLike =
|
|
97
|
+
| ArrowTable
|
|
98
|
+
| { schema: SchemaLike; batches: RecordBatchLike[] };
|
|
99
|
+
|
|
100
|
+
export type IntoVector =
|
|
101
|
+
| Float32Array
|
|
102
|
+
| Float64Array
|
|
103
|
+
| number[]
|
|
104
|
+
| Promise<Float32Array | Float64Array | number[]>;
|
|
105
|
+
|
|
106
|
+
export function isArrowTable(value: object): value is TableLike {
|
|
48
107
|
if (value instanceof ArrowTable) return true;
|
|
49
108
|
return "schema" in value && "batches" in value;
|
|
50
109
|
}
|
|
@@ -135,7 +194,7 @@ export function isFixedSizeList(value: unknown): value is FixedSizeList {
|
|
|
135
194
|
}
|
|
136
195
|
|
|
137
196
|
/** Data type accepted by NodeJS SDK */
|
|
138
|
-
export type Data = Record<string, unknown>[] |
|
|
197
|
+
export type Data = Record<string, unknown>[] | TableLike;
|
|
139
198
|
|
|
140
199
|
/*
|
|
141
200
|
* Options to control how a column should be converted to a vector array
|
|
@@ -162,7 +221,7 @@ export class MakeArrowTableOptions {
|
|
|
162
221
|
* The schema must be specified if there are no records (e.g. to make
|
|
163
222
|
* an empty table)
|
|
164
223
|
*/
|
|
165
|
-
schema?:
|
|
224
|
+
schema?: SchemaLike;
|
|
166
225
|
|
|
167
226
|
/*
|
|
168
227
|
* Mapping from vector column name to expected type
|
|
@@ -310,7 +369,7 @@ export function makeArrowTable(
|
|
|
310
369
|
if (opt.schema !== undefined && opt.schema !== null) {
|
|
311
370
|
opt.schema = sanitizeSchema(opt.schema);
|
|
312
371
|
opt.schema = validateSchemaEmbeddings(
|
|
313
|
-
opt.schema,
|
|
372
|
+
opt.schema as Schema,
|
|
314
373
|
data,
|
|
315
374
|
options?.embeddingFunction,
|
|
316
375
|
);
|
|
@@ -394,7 +453,7 @@ export function makeArrowTable(
|
|
|
394
453
|
// `new ArrowTable(schema, batches)` which does not do any schema inference
|
|
395
454
|
const firstTable = new ArrowTable(columns);
|
|
396
455
|
const batchesFixed = firstTable.batches.map(
|
|
397
|
-
(batch) => new RecordBatch(opt.schema
|
|
456
|
+
(batch) => new RecordBatch(opt.schema as Schema, batch.data),
|
|
398
457
|
);
|
|
399
458
|
let schema: Schema;
|
|
400
459
|
if (metadata !== undefined) {
|
|
@@ -407,9 +466,9 @@ export function makeArrowTable(
|
|
|
407
466
|
}
|
|
408
467
|
}
|
|
409
468
|
|
|
410
|
-
schema = new Schema(opt.schema.fields, schemaMetadata);
|
|
469
|
+
schema = new Schema(opt.schema.fields as Field[], schemaMetadata);
|
|
411
470
|
} else {
|
|
412
|
-
schema = opt.schema;
|
|
471
|
+
schema = opt.schema as Schema;
|
|
413
472
|
}
|
|
414
473
|
return new ArrowTable(schema, batchesFixed);
|
|
415
474
|
}
|
|
@@ -425,7 +484,7 @@ export function makeArrowTable(
|
|
|
425
484
|
* Create an empty Arrow table with the provided schema
|
|
426
485
|
*/
|
|
427
486
|
export function makeEmptyTable(
|
|
428
|
-
schema:
|
|
487
|
+
schema: SchemaLike,
|
|
429
488
|
metadata?: Map<string, string>,
|
|
430
489
|
): ArrowTable {
|
|
431
490
|
return makeArrowTable([], { schema }, metadata);
|
|
@@ -563,18 +622,17 @@ async function applyEmbeddingsFromMetadata(
|
|
|
563
622
|
async function applyEmbeddings<T>(
|
|
564
623
|
table: ArrowTable,
|
|
565
624
|
embeddings?: EmbeddingFunctionConfig,
|
|
566
|
-
schema?:
|
|
625
|
+
schema?: SchemaLike,
|
|
567
626
|
): Promise<ArrowTable> {
|
|
627
|
+
if (schema !== undefined && schema !== null) {
|
|
628
|
+
schema = sanitizeSchema(schema);
|
|
629
|
+
}
|
|
568
630
|
if (schema?.metadata.has("embedding_functions")) {
|
|
569
|
-
return applyEmbeddingsFromMetadata(table, schema!);
|
|
631
|
+
return applyEmbeddingsFromMetadata(table, schema! as Schema);
|
|
570
632
|
} else if (embeddings == null || embeddings === undefined) {
|
|
571
633
|
return table;
|
|
572
634
|
}
|
|
573
635
|
|
|
574
|
-
if (schema !== undefined && schema !== null) {
|
|
575
|
-
schema = sanitizeSchema(schema);
|
|
576
|
-
}
|
|
577
|
-
|
|
578
636
|
// Convert from ArrowTable to Record<String, Vector>
|
|
579
637
|
const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
|
|
580
638
|
const name = table.schema.fields[idx].name;
|
|
@@ -650,7 +708,7 @@ async function applyEmbeddings<T>(
|
|
|
650
708
|
`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`,
|
|
651
709
|
);
|
|
652
710
|
}
|
|
653
|
-
return alignTable(newTable, schema);
|
|
711
|
+
return alignTable(newTable, schema as Schema);
|
|
654
712
|
}
|
|
655
713
|
return newTable;
|
|
656
714
|
}
|
|
@@ -744,7 +802,7 @@ export async function fromRecordsToStreamBuffer(
|
|
|
744
802
|
export async function fromTableToBuffer(
|
|
745
803
|
table: ArrowTable,
|
|
746
804
|
embeddings?: EmbeddingFunctionConfig,
|
|
747
|
-
schema?:
|
|
805
|
+
schema?: SchemaLike,
|
|
748
806
|
): Promise<Buffer> {
|
|
749
807
|
if (schema !== undefined && schema !== null) {
|
|
750
808
|
schema = sanitizeSchema(schema);
|
|
@@ -771,7 +829,7 @@ export async function fromDataToBuffer(
|
|
|
771
829
|
schema = sanitizeSchema(schema);
|
|
772
830
|
}
|
|
773
831
|
if (isArrowTable(data)) {
|
|
774
|
-
return fromTableToBuffer(data, embeddings, schema);
|
|
832
|
+
return fromTableToBuffer(sanitizeTable(data), embeddings, schema);
|
|
775
833
|
} else {
|
|
776
834
|
const table = await convertToTable(data, embeddings, { schema });
|
|
777
835
|
return fromTableToBuffer(table);
|
|
@@ -789,7 +847,7 @@ export async function fromDataToBuffer(
|
|
|
789
847
|
export async function fromTableToStreamBuffer(
|
|
790
848
|
table: ArrowTable,
|
|
791
849
|
embeddings?: EmbeddingFunctionConfig,
|
|
792
|
-
schema?:
|
|
850
|
+
schema?: SchemaLike,
|
|
793
851
|
): Promise<Buffer> {
|
|
794
852
|
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
|
|
795
853
|
const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
|
|
@@ -854,7 +912,6 @@ function validateSchemaEmbeddings(
|
|
|
854
912
|
for (let field of schema.fields) {
|
|
855
913
|
if (isFixedSizeList(field.type)) {
|
|
856
914
|
field = sanitizeField(field);
|
|
857
|
-
|
|
858
915
|
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
|
|
859
916
|
if (schema.metadata.has("embedding_functions")) {
|
|
860
917
|
const embeddings = JSON.parse(
|