@lancedb/lancedb 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +3 -3
- package/biome.json +19 -3
- package/dist/arrow.d.ts +42 -7
- package/dist/arrow.js +6 -5
- package/dist/connection.d.ts +55 -29
- package/dist/connection.js +22 -74
- package/dist/embedding/embedding_function.d.ts +11 -3
- package/dist/embedding/embedding_function.js +36 -12
- package/dist/embedding/openai.d.ts +6 -5
- package/dist/embedding/openai.js +4 -2
- package/dist/embedding/registry.d.ts +10 -11
- package/dist/embedding/registry.js +4 -0
- package/dist/index.d.ts +51 -3
- package/dist/index.js +28 -4
- package/dist/merge.d.ts +54 -0
- package/dist/merge.js +64 -0
- package/dist/native.d.ts +34 -7
- package/dist/native.js +26 -9
- package/dist/query.d.ts +51 -16
- package/dist/query.js +122 -21
- package/dist/remote/client.d.ts +28 -0
- package/dist/remote/client.js +172 -0
- package/dist/remote/connection.d.ts +25 -0
- package/dist/remote/connection.js +110 -0
- package/dist/remote/index.d.ts +3 -0
- package/dist/remote/index.js +9 -0
- package/dist/remote/table.d.ts +42 -0
- package/dist/remote/table.js +179 -0
- package/dist/sanitize.d.ts +3 -2
- package/dist/sanitize.js +55 -1
- package/dist/table.d.ts +116 -25
- package/dist/table.js +117 -233
- package/dist/util.d.ts +14 -0
- package/dist/util.js +65 -0
- package/examples/ann_indexes.ts +49 -0
- package/examples/basic.ts +149 -0
- package/examples/embedding.ts +83 -0
- package/examples/filtering.ts +34 -0
- package/examples/jsconfig.json +27 -0
- package/examples/package-lock.json +79 -0
- package/examples/package.json +18 -0
- package/examples/search.ts +37 -0
- package/lancedb/arrow.ts +87 -24
- package/lancedb/connection.ts +115 -92
- package/lancedb/embedding/embedding_function.ts +48 -16
- package/lancedb/embedding/openai.ts +11 -6
- package/lancedb/embedding/registry.ts +38 -22
- package/lancedb/index.ts +101 -2
- package/lancedb/merge.ts +70 -0
- package/lancedb/query.ts +168 -39
- package/lancedb/remote/client.ts +221 -0
- package/lancedb/remote/connection.ts +201 -0
- package/lancedb/remote/index.ts +3 -0
- package/lancedb/remote/table.ts +226 -0
- package/lancedb/sanitize.ts +73 -1
- package/lancedb/table.ts +344 -101
- package/lancedb/util.ts +69 -0
- package/native.d.ts +208 -0
- package/nodejs-artifacts/arrow.d.ts +42 -7
- package/nodejs-artifacts/arrow.js +6 -5
- package/nodejs-artifacts/connection.d.ts +55 -29
- package/nodejs-artifacts/connection.js +22 -74
- package/nodejs-artifacts/embedding/embedding_function.d.ts +11 -3
- package/nodejs-artifacts/embedding/embedding_function.js +36 -12
- package/nodejs-artifacts/embedding/openai.d.ts +6 -5
- package/nodejs-artifacts/embedding/openai.js +4 -2
- package/nodejs-artifacts/embedding/registry.d.ts +10 -11
- package/nodejs-artifacts/embedding/registry.js +4 -0
- package/nodejs-artifacts/index.d.ts +51 -3
- package/nodejs-artifacts/index.js +28 -4
- package/nodejs-artifacts/merge.d.ts +54 -0
- package/nodejs-artifacts/merge.js +64 -0
- package/nodejs-artifacts/native.d.ts +34 -7
- package/nodejs-artifacts/native.js +26 -9
- package/nodejs-artifacts/query.d.ts +51 -16
- package/nodejs-artifacts/query.js +122 -21
- package/nodejs-artifacts/remote/client.d.ts +28 -0
- package/nodejs-artifacts/remote/client.js +172 -0
- package/nodejs-artifacts/remote/connection.d.ts +25 -0
- package/nodejs-artifacts/remote/connection.js +110 -0
- package/nodejs-artifacts/remote/index.d.ts +3 -0
- package/nodejs-artifacts/remote/index.js +9 -0
- package/nodejs-artifacts/remote/table.d.ts +42 -0
- package/nodejs-artifacts/remote/table.js +179 -0
- package/nodejs-artifacts/sanitize.d.ts +3 -2
- package/nodejs-artifacts/sanitize.js +55 -1
- package/nodejs-artifacts/table.d.ts +116 -25
- package/nodejs-artifacts/table.js +117 -233
- package/nodejs-artifacts/util.d.ts +14 -0
- package/nodejs-artifacts/util.js +65 -0
- package/package.json +25 -11
package/dist/table.js
CHANGED
|
@@ -13,10 +13,13 @@
|
|
|
13
13
|
// See the License for the specific language governing permissions and
|
|
14
14
|
// limitations under the License.
|
|
15
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.Table = void 0;
|
|
16
|
+
exports.LocalTable = exports.Table = void 0;
|
|
17
17
|
const arrow_1 = require("./arrow");
|
|
18
18
|
const registry_1 = require("./embedding/registry");
|
|
19
|
+
const merge_1 = require("./merge");
|
|
19
20
|
const query_1 = require("./query");
|
|
21
|
+
const sanitize_1 = require("./sanitize");
|
|
22
|
+
const util_1 = require("./util");
|
|
20
23
|
/**
|
|
21
24
|
* A Table is a collection of Records in a LanceDB Database.
|
|
22
25
|
*
|
|
@@ -30,305 +33,173 @@ const query_1 = require("./query");
|
|
|
30
33
|
* collected.
|
|
31
34
|
*/
|
|
32
35
|
class Table {
|
|
36
|
+
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
37
|
+
return this.display();
|
|
38
|
+
}
|
|
39
|
+
static async parseTableData(data, options, streaming = false) {
|
|
40
|
+
let mode = options?.mode ?? "create";
|
|
41
|
+
const existOk = options?.existOk ?? false;
|
|
42
|
+
if (mode === "create" && existOk) {
|
|
43
|
+
mode = "exist_ok";
|
|
44
|
+
}
|
|
45
|
+
let table;
|
|
46
|
+
if ((0, arrow_1.isArrowTable)(data)) {
|
|
47
|
+
table = (0, sanitize_1.sanitizeTable)(data);
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
table = (0, arrow_1.makeArrowTable)(data, options);
|
|
51
|
+
}
|
|
52
|
+
if (streaming) {
|
|
53
|
+
const buf = await (0, arrow_1.fromTableToStreamBuffer)(table, options?.embeddingFunction, options?.schema);
|
|
54
|
+
return { buf, mode };
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
const buf = await (0, arrow_1.fromTableToBuffer)(table, options?.embeddingFunction, options?.schema);
|
|
58
|
+
return { buf, mode };
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
exports.Table = Table;
|
|
63
|
+
class LocalTable extends Table {
|
|
33
64
|
inner;
|
|
34
|
-
/** Construct a Table. Internal use only. */
|
|
35
65
|
constructor(inner) {
|
|
66
|
+
super();
|
|
36
67
|
this.inner = inner;
|
|
37
68
|
}
|
|
38
|
-
|
|
69
|
+
get name() {
|
|
70
|
+
return this.inner.name;
|
|
71
|
+
}
|
|
39
72
|
isOpen() {
|
|
40
73
|
return this.inner.isOpen();
|
|
41
74
|
}
|
|
42
|
-
/**
|
|
43
|
-
* Close the table, releasing any underlying resources.
|
|
44
|
-
*
|
|
45
|
-
* It is safe to call this method multiple times.
|
|
46
|
-
*
|
|
47
|
-
* Any attempt to use the table after it is closed will result in an error.
|
|
48
|
-
*/
|
|
49
75
|
close() {
|
|
50
76
|
this.inner.close();
|
|
51
77
|
}
|
|
52
|
-
/** Return a brief description of the table */
|
|
53
78
|
display() {
|
|
54
79
|
return this.inner.display();
|
|
55
80
|
}
|
|
81
|
+
async getEmbeddingFunctions() {
|
|
82
|
+
const schema = await this.schema();
|
|
83
|
+
const registry = (0, registry_1.getRegistry)();
|
|
84
|
+
return registry.parseFunctions(schema.metadata);
|
|
85
|
+
}
|
|
56
86
|
/** Get the schema of the table. */
|
|
57
87
|
async schema() {
|
|
58
88
|
const schemaBuf = await this.inner.schema();
|
|
59
89
|
const tbl = (0, arrow_1.tableFromIPC)(schemaBuf);
|
|
60
90
|
return tbl.schema;
|
|
61
91
|
}
|
|
62
|
-
/**
|
|
63
|
-
* Insert records into this Table.
|
|
64
|
-
* @param {Data} data Records to be inserted into the Table
|
|
65
|
-
*/
|
|
66
92
|
async add(data, options) {
|
|
67
93
|
const mode = options?.mode ?? "append";
|
|
68
94
|
const schema = await this.schema();
|
|
69
95
|
const registry = (0, registry_1.getRegistry)();
|
|
70
96
|
const functions = registry.parseFunctions(schema.metadata);
|
|
71
|
-
const buffer = await (0, arrow_1.fromDataToBuffer)(data, functions.values().next().value);
|
|
97
|
+
const buffer = await (0, arrow_1.fromDataToBuffer)(data, functions.values().next().value, schema);
|
|
72
98
|
await this.inner.add(buffer, mode);
|
|
73
99
|
}
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
* An optional condition can be specified (e.g. "only update if the old
|
|
83
|
-
* value is 0")
|
|
84
|
-
*
|
|
85
|
-
* Note: if your condition is something like "some_id_column == 7" and
|
|
86
|
-
* you are updating many rows (with different ids) then you will get
|
|
87
|
-
* better performance with a single [`merge_insert`] call instead of
|
|
88
|
-
* repeatedly calilng this method.
|
|
89
|
-
* @param {Map<string, string> | Record<string, string>} updates - the
|
|
90
|
-
* columns to update
|
|
91
|
-
*
|
|
92
|
-
* Keys in the map should specify the name of the column to update.
|
|
93
|
-
* Values in the map provide the new value of the column. These can
|
|
94
|
-
* be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
|
|
95
|
-
* based on the row being updated (e.g. "my_col + 1")
|
|
96
|
-
* @param {Partial<UpdateOptions>} options - additional options to control
|
|
97
|
-
* the update behavior
|
|
98
|
-
*/
|
|
99
|
-
async update(updates, options) {
|
|
100
|
-
const onlyIf = options?.where;
|
|
100
|
+
async update(optsOrUpdates, options) {
|
|
101
|
+
const isValues = "values" in optsOrUpdates && typeof optsOrUpdates.values !== "string";
|
|
102
|
+
const isValuesSql = "valuesSql" in optsOrUpdates &&
|
|
103
|
+
typeof optsOrUpdates.valuesSql !== "string";
|
|
104
|
+
const isMap = (obj) => {
|
|
105
|
+
return obj instanceof Map;
|
|
106
|
+
};
|
|
107
|
+
let predicate;
|
|
101
108
|
let columns;
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
109
|
+
switch (true) {
|
|
110
|
+
case isMap(optsOrUpdates):
|
|
111
|
+
columns = Array.from(optsOrUpdates.entries());
|
|
112
|
+
predicate = options?.where;
|
|
113
|
+
break;
|
|
114
|
+
case isValues && isMap(optsOrUpdates.values):
|
|
115
|
+
columns = Array.from(optsOrUpdates.values.entries()).map(([k, v]) => [
|
|
116
|
+
k,
|
|
117
|
+
(0, util_1.toSQL)(v),
|
|
118
|
+
]);
|
|
119
|
+
predicate = optsOrUpdates.where;
|
|
120
|
+
break;
|
|
121
|
+
case isValues && !isMap(optsOrUpdates.values):
|
|
122
|
+
columns = Object.entries(optsOrUpdates.values).map(([k, v]) => [
|
|
123
|
+
k,
|
|
124
|
+
(0, util_1.toSQL)(v),
|
|
125
|
+
]);
|
|
126
|
+
predicate = optsOrUpdates.where;
|
|
127
|
+
break;
|
|
128
|
+
case isValuesSql && isMap(optsOrUpdates.valuesSql):
|
|
129
|
+
columns = Array.from(optsOrUpdates.valuesSql.entries());
|
|
130
|
+
predicate = optsOrUpdates.where;
|
|
131
|
+
break;
|
|
132
|
+
case isValuesSql && !isMap(optsOrUpdates.valuesSql):
|
|
133
|
+
columns = Object.entries(optsOrUpdates.valuesSql).map(([k, v]) => [
|
|
134
|
+
k,
|
|
135
|
+
v,
|
|
136
|
+
]);
|
|
137
|
+
predicate = optsOrUpdates.where;
|
|
138
|
+
break;
|
|
139
|
+
default:
|
|
140
|
+
columns = Object.entries(optsOrUpdates);
|
|
141
|
+
predicate = options?.where;
|
|
107
142
|
}
|
|
108
|
-
await this.inner.update(
|
|
143
|
+
await this.inner.update(predicate, columns);
|
|
109
144
|
}
|
|
110
|
-
/** Count the total number of rows in the dataset. */
|
|
111
145
|
async countRows(filter) {
|
|
112
146
|
return await this.inner.countRows(filter);
|
|
113
147
|
}
|
|
114
|
-
/** Delete the rows that satisfy the predicate. */
|
|
115
148
|
async delete(predicate) {
|
|
116
149
|
await this.inner.delete(predicate);
|
|
117
150
|
}
|
|
118
|
-
/**
|
|
119
|
-
* Create an index to speed up queries.
|
|
120
|
-
*
|
|
121
|
-
* Indices can be created on vector columns or scalar columns.
|
|
122
|
-
* Indices on vector columns will speed up vector searches.
|
|
123
|
-
* Indices on scalar columns will speed up filtering (in both
|
|
124
|
-
* vector and non-vector searches)
|
|
125
|
-
* @example
|
|
126
|
-
* // If the column has a vector (fixed size list) data type then
|
|
127
|
-
* // an IvfPq vector index will be created.
|
|
128
|
-
* const table = await conn.openTable("my_table");
|
|
129
|
-
* await table.createIndex("vector");
|
|
130
|
-
* @example
|
|
131
|
-
* // For advanced control over vector index creation you can specify
|
|
132
|
-
* // the index type and options.
|
|
133
|
-
* const table = await conn.openTable("my_table");
|
|
134
|
-
* await table.createIndex("vector", {
|
|
135
|
-
* config: lancedb.Index.ivfPq({
|
|
136
|
-
* numPartitions: 128,
|
|
137
|
-
* numSubVectors: 16,
|
|
138
|
-
* }),
|
|
139
|
-
* });
|
|
140
|
-
* @example
|
|
141
|
-
* // Or create a Scalar index
|
|
142
|
-
* await table.createIndex("my_float_col");
|
|
143
|
-
*/
|
|
144
151
|
async createIndex(column, options) {
|
|
145
152
|
// Bit of a hack to get around the fact that TS has no package-scope.
|
|
146
153
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
147
154
|
const nativeIndex = options?.config?.inner;
|
|
148
155
|
await this.inner.createIndex(nativeIndex, column, options?.replace);
|
|
149
156
|
}
|
|
150
|
-
/**
|
|
151
|
-
* Create a {@link Query} Builder.
|
|
152
|
-
*
|
|
153
|
-
* Queries allow you to search your existing data. By default the query will
|
|
154
|
-
* return all the data in the table in no particular order. The builder
|
|
155
|
-
* returned by this method can be used to control the query using filtering,
|
|
156
|
-
* vector similarity, sorting, and more.
|
|
157
|
-
*
|
|
158
|
-
* Note: By default, all columns are returned. For best performance, you should
|
|
159
|
-
* only fetch the columns you need.
|
|
160
|
-
*
|
|
161
|
-
* When appropriate, various indices and statistics based pruning will be used to
|
|
162
|
-
* accelerate the query.
|
|
163
|
-
* @example
|
|
164
|
-
* // SQL-style filtering
|
|
165
|
-
* //
|
|
166
|
-
* // This query will return up to 1000 rows whose value in the `id` column
|
|
167
|
-
* // is greater than 5. LanceDb supports a broad set of filtering functions.
|
|
168
|
-
* for await (const batch of table
|
|
169
|
-
* .query()
|
|
170
|
-
* .where("id > 1")
|
|
171
|
-
* .select(["id"])
|
|
172
|
-
* .limit(20)) {
|
|
173
|
-
* console.log(batch);
|
|
174
|
-
* }
|
|
175
|
-
* @example
|
|
176
|
-
* // Vector Similarity Search
|
|
177
|
-
* //
|
|
178
|
-
* // This example will find the 10 rows whose value in the "vector" column are
|
|
179
|
-
* // closest to the query vector [1.0, 2.0, 3.0]. If an index has been created
|
|
180
|
-
* // on the "vector" column then this will perform an ANN search.
|
|
181
|
-
* //
|
|
182
|
-
* // The `refineFactor` and `nprobes` methods are used to control the recall /
|
|
183
|
-
* // latency tradeoff of the search.
|
|
184
|
-
* for await (const batch of table
|
|
185
|
-
* .query()
|
|
186
|
-
* .where("id > 1")
|
|
187
|
-
* .select(["id"])
|
|
188
|
-
* .limit(20)) {
|
|
189
|
-
* console.log(batch);
|
|
190
|
-
* }
|
|
191
|
-
* @example
|
|
192
|
-
* // Scan the full dataset
|
|
193
|
-
* //
|
|
194
|
-
* // This query will return everything in the table in no particular order.
|
|
195
|
-
* for await (const batch of table.query()) {
|
|
196
|
-
* console.log(batch);
|
|
197
|
-
* }
|
|
198
|
-
* @returns {Query} A builder that can be used to parameterize the query
|
|
199
|
-
*/
|
|
200
157
|
query() {
|
|
201
158
|
return new query_1.Query(this.inner);
|
|
202
159
|
}
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
160
|
+
search(query) {
|
|
161
|
+
if (typeof query !== "string") {
|
|
162
|
+
return this.vectorSearch(query);
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
const queryPromise = this.getEmbeddingFunctions().then(async (functions) => {
|
|
166
|
+
// TODO: Support multiple embedding functions
|
|
167
|
+
const embeddingFunc = functions
|
|
168
|
+
.values()
|
|
169
|
+
.next().value;
|
|
170
|
+
if (!embeddingFunc) {
|
|
171
|
+
return Promise.reject(new Error("No embedding functions are defined in the table"));
|
|
172
|
+
}
|
|
173
|
+
return await embeddingFunc.function.computeQueryEmbeddings(query);
|
|
174
|
+
});
|
|
175
|
+
return this.query().nearestTo(queryPromise);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
210
178
|
vectorSearch(vector) {
|
|
211
179
|
return this.query().nearestTo(vector);
|
|
212
180
|
}
|
|
213
181
|
// TODO: Support BatchUDF
|
|
214
|
-
/**
|
|
215
|
-
* Add new columns with defined values.
|
|
216
|
-
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
|
|
217
|
-
* the SQL expression to use to calculate the value of the new column. These
|
|
218
|
-
* expressions will be evaluated for each row in the table, and can
|
|
219
|
-
* reference existing columns in the table.
|
|
220
|
-
*/
|
|
221
182
|
async addColumns(newColumnTransforms) {
|
|
222
183
|
await this.inner.addColumns(newColumnTransforms);
|
|
223
184
|
}
|
|
224
|
-
/**
|
|
225
|
-
* Alter the name or nullability of columns.
|
|
226
|
-
* @param {ColumnAlteration[]} columnAlterations One or more alterations to
|
|
227
|
-
* apply to columns.
|
|
228
|
-
*/
|
|
229
185
|
async alterColumns(columnAlterations) {
|
|
230
186
|
await this.inner.alterColumns(columnAlterations);
|
|
231
187
|
}
|
|
232
|
-
/**
|
|
233
|
-
* Drop one or more columns from the dataset
|
|
234
|
-
*
|
|
235
|
-
* This is a metadata-only operation and does not remove the data from the
|
|
236
|
-
* underlying storage. In order to remove the data, you must subsequently
|
|
237
|
-
* call ``compact_files`` to rewrite the data without the removed columns and
|
|
238
|
-
* then call ``cleanup_files`` to remove the old files.
|
|
239
|
-
* @param {string[]} columnNames The names of the columns to drop. These can
|
|
240
|
-
* be nested column references (e.g. "a.b.c") or top-level column names
|
|
241
|
-
* (e.g. "a").
|
|
242
|
-
*/
|
|
243
188
|
async dropColumns(columnNames) {
|
|
244
189
|
await this.inner.dropColumns(columnNames);
|
|
245
190
|
}
|
|
246
|
-
/** Retrieve the version of the table */
|
|
247
191
|
async version() {
|
|
248
192
|
return await this.inner.version();
|
|
249
193
|
}
|
|
250
|
-
/**
|
|
251
|
-
* Checks out a specific version of the table _This is an in-place operation._
|
|
252
|
-
*
|
|
253
|
-
* This allows viewing previous versions of the table. If you wish to
|
|
254
|
-
* keep writing to the dataset starting from an old version, then use
|
|
255
|
-
* the `restore` function.
|
|
256
|
-
*
|
|
257
|
-
* Calling this method will set the table into time-travel mode. If you
|
|
258
|
-
* wish to return to standard mode, call `checkoutLatest`.
|
|
259
|
-
* @param {number} version The version to checkout
|
|
260
|
-
* @example
|
|
261
|
-
* ```typescript
|
|
262
|
-
* import * as lancedb from "@lancedb/lancedb"
|
|
263
|
-
* const db = await lancedb.connect("./.lancedb");
|
|
264
|
-
* const table = await db.createTable("my_table", [
|
|
265
|
-
* { vector: [1.1, 0.9], type: "vector" },
|
|
266
|
-
* ]);
|
|
267
|
-
*
|
|
268
|
-
* console.log(await table.version()); // 1
|
|
269
|
-
* console.log(table.display());
|
|
270
|
-
* await table.add([{ vector: [0.5, 0.2], type: "vector" }]);
|
|
271
|
-
* await table.checkout(1);
|
|
272
|
-
* console.log(await table.version()); // 2
|
|
273
|
-
* ```
|
|
274
|
-
*/
|
|
275
194
|
async checkout(version) {
|
|
276
195
|
await this.inner.checkout(version);
|
|
277
196
|
}
|
|
278
|
-
/**
|
|
279
|
-
* Checkout the latest version of the table. _This is an in-place operation._
|
|
280
|
-
*
|
|
281
|
-
* The table will be set back into standard mode, and will track the latest
|
|
282
|
-
* version of the table.
|
|
283
|
-
*/
|
|
284
197
|
async checkoutLatest() {
|
|
285
198
|
await this.inner.checkoutLatest();
|
|
286
199
|
}
|
|
287
|
-
/**
|
|
288
|
-
* Restore the table to the currently checked out version
|
|
289
|
-
*
|
|
290
|
-
* This operation will fail if checkout has not been called previously
|
|
291
|
-
*
|
|
292
|
-
* This operation will overwrite the latest version of the table with a
|
|
293
|
-
* previous version. Any changes made since the checked out version will
|
|
294
|
-
* no longer be visible.
|
|
295
|
-
*
|
|
296
|
-
* Once the operation concludes the table will no longer be in a checked
|
|
297
|
-
* out state and the read_consistency_interval, if any, will apply.
|
|
298
|
-
*/
|
|
299
200
|
async restore() {
|
|
300
201
|
await this.inner.restore();
|
|
301
202
|
}
|
|
302
|
-
/**
|
|
303
|
-
* Optimize the on-disk data and indices for better performance.
|
|
304
|
-
*
|
|
305
|
-
* Modeled after ``VACUUM`` in PostgreSQL.
|
|
306
|
-
*
|
|
307
|
-
* Optimization covers three operations:
|
|
308
|
-
*
|
|
309
|
-
* - Compaction: Merges small files into larger ones
|
|
310
|
-
* - Prune: Removes old versions of the dataset
|
|
311
|
-
* - Index: Optimizes the indices, adding new data to existing indices
|
|
312
|
-
*
|
|
313
|
-
*
|
|
314
|
-
* Experimental API
|
|
315
|
-
* ----------------
|
|
316
|
-
*
|
|
317
|
-
* The optimization process is undergoing active development and may change.
|
|
318
|
-
* Our goal with these changes is to improve the performance of optimization and
|
|
319
|
-
* reduce the complexity.
|
|
320
|
-
*
|
|
321
|
-
* That being said, it is essential today to run optimize if you want the best
|
|
322
|
-
* performance. It should be stable and safe to use in production, but it our
|
|
323
|
-
* hope that the API may be simplified (or not even need to be called) in the
|
|
324
|
-
* future.
|
|
325
|
-
*
|
|
326
|
-
* The frequency an application shoudl call optimize is based on the frequency of
|
|
327
|
-
* data modifications. If data is frequently added, deleted, or updated then
|
|
328
|
-
* optimize should be run frequently. A good rule of thumb is to run optimize if
|
|
329
|
-
* you have added or modified 100,000 or more records or run more than 20 data
|
|
330
|
-
* modification operations.
|
|
331
|
-
*/
|
|
332
203
|
async optimize(options) {
|
|
333
204
|
let cleanupOlderThanMs;
|
|
334
205
|
if (options?.cleanupOlderThan !== undefined &&
|
|
@@ -338,9 +209,22 @@ class Table {
|
|
|
338
209
|
}
|
|
339
210
|
return await this.inner.optimize(cleanupOlderThanMs);
|
|
340
211
|
}
|
|
341
|
-
/** List all indices that have been created with {@link Table.createIndex} */
|
|
342
212
|
async listIndices() {
|
|
343
213
|
return await this.inner.listIndices();
|
|
344
214
|
}
|
|
215
|
+
async toArrow() {
|
|
216
|
+
return await this.query().toArrow();
|
|
217
|
+
}
|
|
218
|
+
async indexStats(name) {
|
|
219
|
+
const stats = await this.inner.indexStats(name);
|
|
220
|
+
if (stats === null) {
|
|
221
|
+
return undefined;
|
|
222
|
+
}
|
|
223
|
+
return stats;
|
|
224
|
+
}
|
|
225
|
+
mergeInsert(on) {
|
|
226
|
+
on = Array.isArray(on) ? on : [on];
|
|
227
|
+
return new merge_1.MergeInsertBuilder(this.inner.mergeInsert(on));
|
|
228
|
+
}
|
|
345
229
|
}
|
|
346
|
-
exports.
|
|
230
|
+
exports.LocalTable = LocalTable;
|
package/dist/util.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
export type IntoSql = string | number | boolean | null | Date | ArrayBufferLike | Buffer | IntoSql[];
|
|
3
|
+
export declare function toSQL(value: IntoSql): string;
|
|
4
|
+
export declare class TTLCache {
|
|
5
|
+
private readonly ttl;
|
|
6
|
+
private readonly cache;
|
|
7
|
+
/**
|
|
8
|
+
* @param ttl Time to live in milliseconds
|
|
9
|
+
*/
|
|
10
|
+
constructor(ttl: number);
|
|
11
|
+
get(key: string): any | undefined;
|
|
12
|
+
set(key: string, value: any): void;
|
|
13
|
+
delete(key: string): void;
|
|
14
|
+
}
|
package/dist/util.js
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TTLCache = exports.toSQL = void 0;
|
|
4
|
+
function toSQL(value) {
|
|
5
|
+
if (typeof value === "string") {
|
|
6
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
7
|
+
}
|
|
8
|
+
else if (typeof value === "number") {
|
|
9
|
+
return value.toString();
|
|
10
|
+
}
|
|
11
|
+
else if (typeof value === "boolean") {
|
|
12
|
+
return value ? "TRUE" : "FALSE";
|
|
13
|
+
}
|
|
14
|
+
else if (value === null) {
|
|
15
|
+
return "NULL";
|
|
16
|
+
}
|
|
17
|
+
else if (value instanceof Date) {
|
|
18
|
+
return `'${value.toISOString()}'`;
|
|
19
|
+
}
|
|
20
|
+
else if (Array.isArray(value)) {
|
|
21
|
+
return `[${value.map(toSQL).join(", ")}]`;
|
|
22
|
+
}
|
|
23
|
+
else if (Buffer.isBuffer(value)) {
|
|
24
|
+
return `X'${value.toString("hex")}'`;
|
|
25
|
+
}
|
|
26
|
+
else if (value instanceof ArrayBuffer) {
|
|
27
|
+
return `X'${Buffer.from(value).toString("hex")}'`;
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
throw new Error(`Unsupported value type: ${typeof value} value: (${value})`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
exports.toSQL = toSQL;
|
|
34
|
+
class TTLCache {
|
|
35
|
+
ttl;
|
|
36
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
37
|
+
cache;
|
|
38
|
+
/**
|
|
39
|
+
* @param ttl Time to live in milliseconds
|
|
40
|
+
*/
|
|
41
|
+
constructor(ttl) {
|
|
42
|
+
this.ttl = ttl;
|
|
43
|
+
this.cache = new Map();
|
|
44
|
+
}
|
|
45
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
46
|
+
get(key) {
|
|
47
|
+
const entry = this.cache.get(key);
|
|
48
|
+
if (entry === undefined) {
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
if (entry.expires < Date.now()) {
|
|
52
|
+
this.cache.delete(key);
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
return entry.value;
|
|
56
|
+
}
|
|
57
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
58
|
+
set(key, value) {
|
|
59
|
+
this.cache.set(key, { value, expires: Date.now() + this.ttl });
|
|
60
|
+
}
|
|
61
|
+
delete(key) {
|
|
62
|
+
this.cache.delete(key);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
exports.TTLCache = TTLCache;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// --8<-- [start:import]
|
|
2
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
3
|
+
// --8<-- [end:import]
|
|
4
|
+
|
|
5
|
+
// --8<-- [start:ingest]
|
|
6
|
+
const db = await lancedb.connect("/tmp/lancedb/");
|
|
7
|
+
|
|
8
|
+
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
|
9
|
+
vector: Array(1536).fill(i),
|
|
10
|
+
id: `${i}`,
|
|
11
|
+
content: "",
|
|
12
|
+
longId: `${i}`,
|
|
13
|
+
}));
|
|
14
|
+
|
|
15
|
+
const table = await db.createTable("my_vectors", data, { mode: "overwrite" });
|
|
16
|
+
await table.createIndex("vector", {
|
|
17
|
+
config: lancedb.Index.ivfPq({
|
|
18
|
+
numPartitions: 16,
|
|
19
|
+
numSubVectors: 48,
|
|
20
|
+
}),
|
|
21
|
+
});
|
|
22
|
+
// --8<-- [end:ingest]
|
|
23
|
+
|
|
24
|
+
// --8<-- [start:search1]
|
|
25
|
+
const _results1 = await table
|
|
26
|
+
.search(Array(1536).fill(1.2))
|
|
27
|
+
.limit(2)
|
|
28
|
+
.nprobes(20)
|
|
29
|
+
.refineFactor(10)
|
|
30
|
+
.toArray();
|
|
31
|
+
// --8<-- [end:search1]
|
|
32
|
+
|
|
33
|
+
// --8<-- [start:search2]
|
|
34
|
+
const _results2 = await table
|
|
35
|
+
.search(Array(1536).fill(1.2))
|
|
36
|
+
.where("id != '1141'")
|
|
37
|
+
.limit(2)
|
|
38
|
+
.toArray();
|
|
39
|
+
// --8<-- [end:search2]
|
|
40
|
+
|
|
41
|
+
// --8<-- [start:search3]
|
|
42
|
+
const _results3 = await table
|
|
43
|
+
.search(Array(1536).fill(1.2))
|
|
44
|
+
.select(["id"])
|
|
45
|
+
.limit(2)
|
|
46
|
+
.toArray();
|
|
47
|
+
// --8<-- [end:search3]
|
|
48
|
+
|
|
49
|
+
console.log("Ann indexes: done");
|