@lancedb/lancedb 0.14.0-beta.0 → 0.14.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DEVELOPMENT.md +42 -0
- package/package.json +12 -9
- package/dist/arrow.d.ts +0 -248
- package/dist/arrow.js +0 -743
- package/dist/connection.d.ts +0 -188
- package/dist/connection.js +0 -149
- package/dist/embedding/embedding_function.d.ts +0 -90
- package/dist/embedding/embedding_function.js +0 -133
- package/dist/embedding/index.d.ts +0 -27
- package/dist/embedding/index.js +0 -112
- package/dist/embedding/openai.d.ts +0 -18
- package/dist/embedding/openai.js +0 -105
- package/dist/embedding/registry.d.ts +0 -55
- package/dist/embedding/registry.js +0 -151
- package/dist/embedding/transformers.d.ts +0 -37
- package/dist/embedding/transformers.js +0 -148
- package/dist/index.d.ts +0 -54
- package/dist/index.js +0 -55
- package/dist/indices.d.ts +0 -429
- package/dist/indices.js +0 -131
- package/dist/merge.d.ts +0 -54
- package/dist/merge.js +0 -64
- package/dist/native.d.ts +0 -328
- package/dist/native.js +0 -330
- package/dist/query.d.ts +0 -324
- package/dist/query.js +0 -544
- package/dist/sanitize.d.ts +0 -31
- package/dist/sanitize.js +0 -437
- package/dist/table.d.ts +0 -425
- package/dist/table.js +0 -276
- package/dist/util.d.ts +0 -13
- package/dist/util.js +0 -65
package/dist/query.js
DELETED
|
@@ -1,544 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// Copyright 2024 Lance Developers.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
// you may not use this file except in compliance with the License.
|
|
6
|
-
// You may obtain a copy of the License at
|
|
7
|
-
//
|
|
8
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
//
|
|
10
|
-
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
// See the License for the specific language governing permissions and
|
|
14
|
-
// limitations under the License.
|
|
15
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.Query = exports.VectorQuery = exports.QueryBase = exports.RecordBatchIterator = void 0;
|
|
17
|
-
const arrow_1 = require("./arrow");
|
|
18
|
-
class RecordBatchIterator {
|
|
19
|
-
promisedInner;
|
|
20
|
-
inner;
|
|
21
|
-
constructor(promise) {
|
|
22
|
-
// TODO: check promise reliably so we dont need to pass two arguments.
|
|
23
|
-
this.promisedInner = promise;
|
|
24
|
-
}
|
|
25
|
-
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
26
|
-
async next() {
|
|
27
|
-
if (this.inner === undefined) {
|
|
28
|
-
this.inner = await this.promisedInner;
|
|
29
|
-
}
|
|
30
|
-
if (this.inner === undefined) {
|
|
31
|
-
throw new Error("Invalid iterator state state");
|
|
32
|
-
}
|
|
33
|
-
const n = await this.inner.next();
|
|
34
|
-
if (n == null) {
|
|
35
|
-
return Promise.resolve({ done: true, value: null });
|
|
36
|
-
}
|
|
37
|
-
const tbl = (0, arrow_1.tableFromIPC)(n);
|
|
38
|
-
if (tbl.batches.length != 1) {
|
|
39
|
-
throw new Error("Expected only one batch");
|
|
40
|
-
}
|
|
41
|
-
return Promise.resolve({ done: false, value: tbl.batches[0] });
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
exports.RecordBatchIterator = RecordBatchIterator;
|
|
45
|
-
/* eslint-enable */
|
|
46
|
-
class RecordBatchIterable {
|
|
47
|
-
inner;
|
|
48
|
-
options;
|
|
49
|
-
constructor(inner, options) {
|
|
50
|
-
this.inner = inner;
|
|
51
|
-
this.options = options;
|
|
52
|
-
}
|
|
53
|
-
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
54
|
-
[Symbol.asyncIterator]() {
|
|
55
|
-
return new RecordBatchIterator(this.inner.execute(this.options?.maxBatchLength));
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
/** Common methods supported by all query types */
|
|
59
|
-
class QueryBase {
|
|
60
|
-
inner;
|
|
61
|
-
constructor(inner) {
|
|
62
|
-
this.inner = inner;
|
|
63
|
-
// intentionally empty
|
|
64
|
-
}
|
|
65
|
-
// call a function on the inner (either a promise or the actual object)
|
|
66
|
-
doCall(fn) {
|
|
67
|
-
if (this.inner instanceof Promise) {
|
|
68
|
-
this.inner = this.inner.then((inner) => {
|
|
69
|
-
fn(inner);
|
|
70
|
-
return inner;
|
|
71
|
-
});
|
|
72
|
-
}
|
|
73
|
-
else {
|
|
74
|
-
fn(this.inner);
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
/**
|
|
78
|
-
* A filter statement to be applied to this query.
|
|
79
|
-
*
|
|
80
|
-
* The filter should be supplied as an SQL query string. For example:
|
|
81
|
-
* @example
|
|
82
|
-
* x > 10
|
|
83
|
-
* y > 0 AND y < 100
|
|
84
|
-
* x > 5 OR y = 'test'
|
|
85
|
-
*
|
|
86
|
-
* Filtering performance can often be improved by creating a scalar index
|
|
87
|
-
* on the filter column(s).
|
|
88
|
-
*/
|
|
89
|
-
where(predicate) {
|
|
90
|
-
this.doCall((inner) => inner.onlyIf(predicate));
|
|
91
|
-
return this;
|
|
92
|
-
}
|
|
93
|
-
/**
|
|
94
|
-
* A filter statement to be applied to this query.
|
|
95
|
-
* @alias where
|
|
96
|
-
* @deprecated Use `where` instead
|
|
97
|
-
*/
|
|
98
|
-
filter(predicate) {
|
|
99
|
-
return this.where(predicate);
|
|
100
|
-
}
|
|
101
|
-
fullTextSearch(query, options) {
|
|
102
|
-
let columns = null;
|
|
103
|
-
if (options) {
|
|
104
|
-
if (typeof options.columns === "string") {
|
|
105
|
-
columns = [options.columns];
|
|
106
|
-
}
|
|
107
|
-
else if (Array.isArray(options.columns)) {
|
|
108
|
-
columns = options.columns;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
this.doCall((inner) => inner.fullTextSearch(query, columns));
|
|
112
|
-
return this;
|
|
113
|
-
}
|
|
114
|
-
/**
|
|
115
|
-
* Return only the specified columns.
|
|
116
|
-
*
|
|
117
|
-
* By default a query will return all columns from the table. However, this can have
|
|
118
|
-
* a very significant impact on latency. LanceDb stores data in a columnar fashion. This
|
|
119
|
-
* means we can finely tune our I/O to select exactly the columns we need.
|
|
120
|
-
*
|
|
121
|
-
* As a best practice you should always limit queries to the columns that you need. If you
|
|
122
|
-
* pass in an array of column names then only those columns will be returned.
|
|
123
|
-
*
|
|
124
|
-
* You can also use this method to create new "dynamic" columns based on your existing columns.
|
|
125
|
-
* For example, you may not care about "a" or "b" but instead simply want "a + b". This is often
|
|
126
|
-
* seen in the SELECT clause of an SQL query (e.g. `SELECT a+b FROM my_table`).
|
|
127
|
-
*
|
|
128
|
-
* To create dynamic columns you can pass in a Map<string, string>. A column will be returned
|
|
129
|
-
* for each entry in the map. The key provides the name of the column. The value is
|
|
130
|
-
* an SQL string used to specify how the column is calculated.
|
|
131
|
-
*
|
|
132
|
-
* For example, an SQL query might state `SELECT a + b AS combined, c`. The equivalent
|
|
133
|
-
* input to this method would be:
|
|
134
|
-
* @example
|
|
135
|
-
* new Map([["combined", "a + b"], ["c", "c"]])
|
|
136
|
-
*
|
|
137
|
-
* Columns will always be returned in the order given, even if that order is different than
|
|
138
|
-
* the order used when adding the data.
|
|
139
|
-
*
|
|
140
|
-
* Note that you can pass in a `Record<string, string>` (e.g. an object literal). This method
|
|
141
|
-
* uses `Object.entries` which should preserve the insertion order of the object. However,
|
|
142
|
-
* object insertion order is easy to get wrong and `Map` is more foolproof.
|
|
143
|
-
*/
|
|
144
|
-
select(columns) {
|
|
145
|
-
const selectColumns = (columnArray) => {
|
|
146
|
-
this.doCall((inner) => {
|
|
147
|
-
inner.selectColumns(columnArray);
|
|
148
|
-
});
|
|
149
|
-
};
|
|
150
|
-
const selectMapping = (columnTuples) => {
|
|
151
|
-
this.doCall((inner) => {
|
|
152
|
-
inner.select(columnTuples);
|
|
153
|
-
});
|
|
154
|
-
};
|
|
155
|
-
if (typeof columns === "string") {
|
|
156
|
-
selectColumns([columns]);
|
|
157
|
-
}
|
|
158
|
-
else if (Array.isArray(columns)) {
|
|
159
|
-
selectColumns(columns);
|
|
160
|
-
}
|
|
161
|
-
else if (columns instanceof Map) {
|
|
162
|
-
selectMapping(Array.from(columns.entries()));
|
|
163
|
-
}
|
|
164
|
-
else {
|
|
165
|
-
selectMapping(Object.entries(columns));
|
|
166
|
-
}
|
|
167
|
-
return this;
|
|
168
|
-
}
|
|
169
|
-
/**
|
|
170
|
-
* Set the maximum number of results to return.
|
|
171
|
-
*
|
|
172
|
-
* By default, a plain search has no limit. If this method is not
|
|
173
|
-
* called then every valid row from the table will be returned.
|
|
174
|
-
*/
|
|
175
|
-
limit(limit) {
|
|
176
|
-
this.doCall((inner) => inner.limit(limit));
|
|
177
|
-
return this;
|
|
178
|
-
}
|
|
179
|
-
offset(offset) {
|
|
180
|
-
this.doCall((inner) => inner.offset(offset));
|
|
181
|
-
return this;
|
|
182
|
-
}
|
|
183
|
-
/**
|
|
184
|
-
* Skip searching un-indexed data. This can make search faster, but will miss
|
|
185
|
-
* any data that is not yet indexed.
|
|
186
|
-
*
|
|
187
|
-
* Use {@link lancedb.Table#optimize} to index all un-indexed data.
|
|
188
|
-
*/
|
|
189
|
-
fastSearch() {
|
|
190
|
-
this.doCall((inner) => inner.fastSearch());
|
|
191
|
-
return this;
|
|
192
|
-
}
|
|
193
|
-
/**
|
|
194
|
-
* Whether to return the row id in the results.
|
|
195
|
-
*
|
|
196
|
-
* This column can be used to match results between different queries. For
|
|
197
|
-
* example, to match results from a full text search and a vector search in
|
|
198
|
-
* order to perform hybrid search.
|
|
199
|
-
*/
|
|
200
|
-
withRowId() {
|
|
201
|
-
this.doCall((inner) => inner.withRowId());
|
|
202
|
-
return this;
|
|
203
|
-
}
|
|
204
|
-
nativeExecute(options) {
|
|
205
|
-
if (this.inner instanceof Promise) {
|
|
206
|
-
return this.inner.then((inner) => inner.execute(options?.maxBatchLength));
|
|
207
|
-
}
|
|
208
|
-
else {
|
|
209
|
-
return this.inner.execute(options?.maxBatchLength);
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
/**
|
|
213
|
-
* Execute the query and return the results as an @see {@link AsyncIterator}
|
|
214
|
-
* of @see {@link RecordBatch}.
|
|
215
|
-
*
|
|
216
|
-
* By default, LanceDb will use many threads to calculate results and, when
|
|
217
|
-
* the result set is large, multiple batches will be processed at one time.
|
|
218
|
-
* This readahead is limited however and backpressure will be applied if this
|
|
219
|
-
* stream is consumed slowly (this constrains the maximum memory used by a
|
|
220
|
-
* single query)
|
|
221
|
-
*
|
|
222
|
-
*/
|
|
223
|
-
execute(options) {
|
|
224
|
-
return new RecordBatchIterator(this.nativeExecute(options));
|
|
225
|
-
}
|
|
226
|
-
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
227
|
-
[Symbol.asyncIterator]() {
|
|
228
|
-
const promise = this.nativeExecute();
|
|
229
|
-
return new RecordBatchIterator(promise);
|
|
230
|
-
}
|
|
231
|
-
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
232
|
-
async toArrow(options) {
|
|
233
|
-
const batches = [];
|
|
234
|
-
let inner;
|
|
235
|
-
if (this.inner instanceof Promise) {
|
|
236
|
-
inner = await this.inner;
|
|
237
|
-
}
|
|
238
|
-
else {
|
|
239
|
-
inner = this.inner;
|
|
240
|
-
}
|
|
241
|
-
for await (const batch of new RecordBatchIterable(inner, options)) {
|
|
242
|
-
batches.push(batch);
|
|
243
|
-
}
|
|
244
|
-
return new arrow_1.Table(batches);
|
|
245
|
-
}
|
|
246
|
-
/** Collect the results as an array of objects. */
|
|
247
|
-
// biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
|
|
248
|
-
async toArray(options) {
|
|
249
|
-
const tbl = await this.toArrow(options);
|
|
250
|
-
return tbl.toArray();
|
|
251
|
-
}
|
|
252
|
-
/**
|
|
253
|
-
* Generates an explanation of the query execution plan.
|
|
254
|
-
*
|
|
255
|
-
* @example
|
|
256
|
-
* import * as lancedb from "@lancedb/lancedb"
|
|
257
|
-
* const db = await lancedb.connect("./.lancedb");
|
|
258
|
-
* const table = await db.createTable("my_table", [
|
|
259
|
-
* { vector: [1.1, 0.9], id: "1" },
|
|
260
|
-
* ]);
|
|
261
|
-
* const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
|
262
|
-
*
|
|
263
|
-
* @param verbose - If true, provides a more detailed explanation. Defaults to false.
|
|
264
|
-
* @returns A Promise that resolves to a string containing the query execution plan explanation.
|
|
265
|
-
*/
|
|
266
|
-
async explainPlan(verbose = false) {
|
|
267
|
-
if (this.inner instanceof Promise) {
|
|
268
|
-
return this.inner.then((inner) => inner.explainPlan(verbose));
|
|
269
|
-
}
|
|
270
|
-
else {
|
|
271
|
-
return this.inner.explainPlan(verbose);
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
exports.QueryBase = QueryBase;
|
|
276
|
-
/**
|
|
277
|
-
* A builder used to construct a vector search
|
|
278
|
-
*
|
|
279
|
-
* This builder can be reused to execute the query many times.
|
|
280
|
-
*/
|
|
281
|
-
class VectorQuery extends QueryBase {
|
|
282
|
-
constructor(inner) {
|
|
283
|
-
super(inner);
|
|
284
|
-
}
|
|
285
|
-
/**
|
|
286
|
-
* Set the number of partitions to search (probe)
|
|
287
|
-
*
|
|
288
|
-
* This argument is only used when the vector column has an IVF PQ index.
|
|
289
|
-
* If there is no index then this value is ignored.
|
|
290
|
-
*
|
|
291
|
-
* The IVF stage of IVF PQ divides the input into partitions (clusters) of
|
|
292
|
-
* related values.
|
|
293
|
-
*
|
|
294
|
-
* The partition whose centroids are closest to the query vector will be
|
|
295
|
-
* exhaustiely searched to find matches. This parameter controls how many
|
|
296
|
-
* partitions should be searched.
|
|
297
|
-
*
|
|
298
|
-
* Increasing this value will increase the recall of your query but will
|
|
299
|
-
* also increase the latency of your query. The default value is 20. This
|
|
300
|
-
* default is good for many cases but the best value to use will depend on
|
|
301
|
-
* your data and the recall that you need to achieve.
|
|
302
|
-
*
|
|
303
|
-
* For best results we recommend tuning this parameter with a benchmark against
|
|
304
|
-
* your actual data to find the smallest possible value that will still give
|
|
305
|
-
* you the desired recall.
|
|
306
|
-
*/
|
|
307
|
-
nprobes(nprobes) {
|
|
308
|
-
super.doCall((inner) => inner.nprobes(nprobes));
|
|
309
|
-
return this;
|
|
310
|
-
}
|
|
311
|
-
/**
|
|
312
|
-
* Set the number of candidates to consider during the search
|
|
313
|
-
*
|
|
314
|
-
* This argument is only used when the vector column has an HNSW index.
|
|
315
|
-
* If there is no index then this value is ignored.
|
|
316
|
-
*
|
|
317
|
-
* Increasing this value will increase the recall of your query but will
|
|
318
|
-
* also increase the latency of your query. The default value is 1.5*limit.
|
|
319
|
-
*/
|
|
320
|
-
ef(ef) {
|
|
321
|
-
super.doCall((inner) => inner.ef(ef));
|
|
322
|
-
return this;
|
|
323
|
-
}
|
|
324
|
-
/**
|
|
325
|
-
* Set the vector column to query
|
|
326
|
-
*
|
|
327
|
-
* This controls which column is compared to the query vector supplied in
|
|
328
|
-
* the call to @see {@link Query#nearestTo}
|
|
329
|
-
*
|
|
330
|
-
* This parameter must be specified if the table has more than one column
|
|
331
|
-
* whose data type is a fixed-size-list of floats.
|
|
332
|
-
*/
|
|
333
|
-
column(column) {
|
|
334
|
-
super.doCall((inner) => inner.column(column));
|
|
335
|
-
return this;
|
|
336
|
-
}
|
|
337
|
-
/**
|
|
338
|
-
* Set the distance metric to use
|
|
339
|
-
*
|
|
340
|
-
* When performing a vector search we try and find the "nearest" vectors according
|
|
341
|
-
* to some kind of distance metric. This parameter controls which distance metric to
|
|
342
|
-
* use. See @see {@link IvfPqOptions.distanceType} for more details on the different
|
|
343
|
-
* distance metrics available.
|
|
344
|
-
*
|
|
345
|
-
* Note: if there is a vector index then the distance type used MUST match the distance
|
|
346
|
-
* type used to train the vector index. If this is not done then the results will be
|
|
347
|
-
* invalid.
|
|
348
|
-
*
|
|
349
|
-
* By default "l2" is used.
|
|
350
|
-
*/
|
|
351
|
-
distanceType(distanceType) {
|
|
352
|
-
super.doCall((inner) => inner.distanceType(distanceType));
|
|
353
|
-
return this;
|
|
354
|
-
}
|
|
355
|
-
/**
|
|
356
|
-
* A multiplier to control how many additional rows are taken during the refine step
|
|
357
|
-
*
|
|
358
|
-
* This argument is only used when the vector column has an IVF PQ index.
|
|
359
|
-
* If there is no index then this value is ignored.
|
|
360
|
-
*
|
|
361
|
-
* An IVF PQ index stores compressed (quantized) values. They query vector is compared
|
|
362
|
-
* against these values and, since they are compressed, the comparison is inaccurate.
|
|
363
|
-
*
|
|
364
|
-
* This parameter can be used to refine the results. It can improve both improve recall
|
|
365
|
-
* and correct the ordering of the nearest results.
|
|
366
|
-
*
|
|
367
|
-
* To refine results LanceDb will first perform an ANN search to find the nearest
|
|
368
|
-
* `limit` * `refine_factor` results. In other words, if `refine_factor` is 3 and
|
|
369
|
-
* `limit` is the default (10) then the first 30 results will be selected. LanceDb
|
|
370
|
-
* then fetches the full, uncompressed, values for these 30 results. The results are
|
|
371
|
-
* then reordered by the true distance and only the nearest 10 are kept.
|
|
372
|
-
*
|
|
373
|
-
* Note: there is a difference between calling this method with a value of 1 and never
|
|
374
|
-
* calling this method at all. Calling this method with any value will have an impact
|
|
375
|
-
* on your search latency. When you call this method with a `refine_factor` of 1 then
|
|
376
|
-
* LanceDb still needs to fetch the full, uncompressed, values so that it can potentially
|
|
377
|
-
* reorder the results.
|
|
378
|
-
*
|
|
379
|
-
* Note: if this method is NOT called then the distances returned in the _distance column
|
|
380
|
-
* will be approximate distances based on the comparison of the quantized query vector
|
|
381
|
-
* and the quantized result vectors. This can be considerably different than the true
|
|
382
|
-
* distance between the query vector and the actual uncompressed vector.
|
|
383
|
-
*/
|
|
384
|
-
refineFactor(refineFactor) {
|
|
385
|
-
super.doCall((inner) => inner.refineFactor(refineFactor));
|
|
386
|
-
return this;
|
|
387
|
-
}
|
|
388
|
-
/**
|
|
389
|
-
* If this is called then filtering will happen after the vector search instead of
|
|
390
|
-
* before.
|
|
391
|
-
*
|
|
392
|
-
* By default filtering will be performed before the vector search. This is how
|
|
393
|
-
* filtering is typically understood to work. This prefilter step does add some
|
|
394
|
-
* additional latency. Creating a scalar index on the filter column(s) can
|
|
395
|
-
* often improve this latency. However, sometimes a filter is too complex or scalar
|
|
396
|
-
* indices cannot be applied to the column. In these cases postfiltering can be
|
|
397
|
-
* used instead of prefiltering to improve latency.
|
|
398
|
-
*
|
|
399
|
-
* Post filtering applies the filter to the results of the vector search. This means
|
|
400
|
-
* we only run the filter on a much smaller set of data. However, it can cause the
|
|
401
|
-
* query to return fewer than `limit` results (or even no results) if none of the nearest
|
|
402
|
-
* results match the filter.
|
|
403
|
-
*
|
|
404
|
-
* Post filtering happens during the "refine stage" (described in more detail in
|
|
405
|
-
* @see {@link VectorQuery#refineFactor}). This means that setting a higher refine
|
|
406
|
-
* factor can often help restore some of the results lost by post filtering.
|
|
407
|
-
*/
|
|
408
|
-
postfilter() {
|
|
409
|
-
super.doCall((inner) => inner.postfilter());
|
|
410
|
-
return this;
|
|
411
|
-
}
|
|
412
|
-
/**
|
|
413
|
-
* If this is called then any vector index is skipped
|
|
414
|
-
*
|
|
415
|
-
* An exhaustive (flat) search will be performed. The query vector will
|
|
416
|
-
* be compared to every vector in the table. At high scales this can be
|
|
417
|
-
* expensive. However, this is often still useful. For example, skipping
|
|
418
|
-
* the vector index can give you ground truth results which you can use to
|
|
419
|
-
* calculate your recall to select an appropriate value for nprobes.
|
|
420
|
-
*/
|
|
421
|
-
bypassVectorIndex() {
|
|
422
|
-
super.doCall((inner) => inner.bypassVectorIndex());
|
|
423
|
-
return this;
|
|
424
|
-
}
|
|
425
|
-
/*
|
|
426
|
-
* Add a query vector to the search
|
|
427
|
-
*
|
|
428
|
-
* This method can be called multiple times to add multiple query vectors
|
|
429
|
-
* to the search. If multiple query vectors are added, then they will be searched
|
|
430
|
-
* in parallel, and the results will be concatenated. A column called `query_index`
|
|
431
|
-
* will be added to indicate the index of the query vector that produced the result.
|
|
432
|
-
*
|
|
433
|
-
* Performance wise, this is equivalent to running multiple queries concurrently.
|
|
434
|
-
*/
|
|
435
|
-
addQueryVector(vector) {
|
|
436
|
-
if (vector instanceof Promise) {
|
|
437
|
-
const res = (async () => {
|
|
438
|
-
try {
|
|
439
|
-
const v = await vector;
|
|
440
|
-
const arr = Float32Array.from(v);
|
|
441
|
-
//
|
|
442
|
-
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
|
443
|
-
const value = this.addQueryVector(arr);
|
|
444
|
-
const inner = value.inner;
|
|
445
|
-
return inner;
|
|
446
|
-
}
|
|
447
|
-
catch (e) {
|
|
448
|
-
return Promise.reject(e);
|
|
449
|
-
}
|
|
450
|
-
})();
|
|
451
|
-
return new VectorQuery(res);
|
|
452
|
-
}
|
|
453
|
-
else {
|
|
454
|
-
super.doCall((inner) => {
|
|
455
|
-
inner.addQueryVector(Float32Array.from(vector));
|
|
456
|
-
});
|
|
457
|
-
return this;
|
|
458
|
-
}
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
exports.VectorQuery = VectorQuery;
|
|
462
|
-
/** A builder for LanceDB queries. */
|
|
463
|
-
class Query extends QueryBase {
|
|
464
|
-
constructor(tbl) {
|
|
465
|
-
super(tbl.query());
|
|
466
|
-
}
|
|
467
|
-
/**
|
|
468
|
-
* Find the nearest vectors to the given query vector.
|
|
469
|
-
*
|
|
470
|
-
* This converts the query from a plain query to a vector query.
|
|
471
|
-
*
|
|
472
|
-
* This method will attempt to convert the input to the query vector
|
|
473
|
-
* expected by the embedding model. If the input cannot be converted
|
|
474
|
-
* then an error will be thrown.
|
|
475
|
-
*
|
|
476
|
-
* By default, there is no embedding model, and the input should be
|
|
477
|
-
* an array-like object of numbers (something that can be used as input
|
|
478
|
-
* to Float32Array.from)
|
|
479
|
-
*
|
|
480
|
-
* If there is only one vector column (a column whose data type is a
|
|
481
|
-
* fixed size list of floats) then the column does not need to be specified.
|
|
482
|
-
* If there is more than one vector column you must use
|
|
483
|
-
* @see {@link VectorQuery#column} to specify which column you would like
|
|
484
|
-
* to compare with.
|
|
485
|
-
*
|
|
486
|
-
* If no index has been created on the vector column then a vector query
|
|
487
|
-
* will perform a distance comparison between the query vector and every
|
|
488
|
-
* vector in the database and then sort the results. This is sometimes
|
|
489
|
-
* called a "flat search"
|
|
490
|
-
*
|
|
491
|
-
* For small databases, with a few hundred thousand vectors or less, this can
|
|
492
|
-
* be reasonably fast. In larger databases you should create a vector index
|
|
493
|
-
* on the column. If there is a vector index then an "approximate" nearest
|
|
494
|
-
* neighbor search (frequently called an ANN search) will be performed. This
|
|
495
|
-
* search is much faster, but the results will be approximate.
|
|
496
|
-
*
|
|
497
|
-
* The query can be further parameterized using the returned builder. There
|
|
498
|
-
* are various ANN search parameters that will let you fine tune your recall
|
|
499
|
-
* accuracy vs search latency.
|
|
500
|
-
*
|
|
501
|
-
* Vector searches always have a `limit`. If `limit` has not been called then
|
|
502
|
-
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
503
|
-
*/
|
|
504
|
-
nearestTo(vector) {
|
|
505
|
-
if (this.inner instanceof Promise) {
|
|
506
|
-
const nativeQuery = this.inner.then(async (inner) => {
|
|
507
|
-
if (vector instanceof Promise) {
|
|
508
|
-
const arr = await vector.then((v) => Float32Array.from(v));
|
|
509
|
-
return inner.nearestTo(arr);
|
|
510
|
-
}
|
|
511
|
-
else {
|
|
512
|
-
return inner.nearestTo(Float32Array.from(vector));
|
|
513
|
-
}
|
|
514
|
-
});
|
|
515
|
-
return new VectorQuery(nativeQuery);
|
|
516
|
-
}
|
|
517
|
-
if (vector instanceof Promise) {
|
|
518
|
-
const res = (async () => {
|
|
519
|
-
try {
|
|
520
|
-
const v = await vector;
|
|
521
|
-
const arr = Float32Array.from(v);
|
|
522
|
-
//
|
|
523
|
-
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
|
524
|
-
const value = this.nearestTo(arr);
|
|
525
|
-
const inner = value.inner;
|
|
526
|
-
return inner;
|
|
527
|
-
}
|
|
528
|
-
catch (e) {
|
|
529
|
-
return Promise.reject(e);
|
|
530
|
-
}
|
|
531
|
-
})();
|
|
532
|
-
return new VectorQuery(res);
|
|
533
|
-
}
|
|
534
|
-
else {
|
|
535
|
-
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
|
536
|
-
return new VectorQuery(vectorQuery);
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
nearestToText(query, columns) {
|
|
540
|
-
this.doCall((inner) => inner.fullTextSearch(query, columns));
|
|
541
|
-
return this;
|
|
542
|
-
}
|
|
543
|
-
}
|
|
544
|
-
exports.Query = Query;
|
package/dist/sanitize.d.ts
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import type { TKeys } from "apache-arrow/type";
|
|
2
|
-
import { DataType, Date_, Decimal, DenseUnion, Dictionary, Duration, Field, FixedSizeBinary, FixedSizeList, Float, Int, Interval, List, Map_, Schema, SchemaLike, SparseUnion, Struct, Table, TableLike, Time, Timestamp, TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond, Type, Union } from "./arrow";
|
|
3
|
-
export declare function sanitizeMetadata(metadataLike?: unknown): Map<string, string> | undefined;
|
|
4
|
-
export declare function sanitizeInt(typeLike: object): Int<Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64>;
|
|
5
|
-
export declare function sanitizeFloat(typeLike: object): Float<Type.Float | Type.Float16 | Type.Float32 | Type.Float64>;
|
|
6
|
-
export declare function sanitizeDecimal(typeLike: object): Decimal;
|
|
7
|
-
export declare function sanitizeDate(typeLike: object): Date_<import("apache-arrow/type").Dates>;
|
|
8
|
-
export declare function sanitizeTime(typeLike: object): Time<Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicrosecond | Type.TimeNanosecond>;
|
|
9
|
-
export declare function sanitizeTimestamp(typeLike: object): Timestamp<Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond>;
|
|
10
|
-
export declare function sanitizeTypedTimestamp(typeLike: object, Datatype: typeof TimestampNanosecond | typeof TimestampMicrosecond | typeof TimestampMillisecond | typeof TimestampSecond): TimestampSecond | TimestampMillisecond | TimestampMicrosecond | TimestampNanosecond;
|
|
11
|
-
export declare function sanitizeInterval(typeLike: object): Interval<Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth>;
|
|
12
|
-
export declare function sanitizeList(typeLike: object): List<any>;
|
|
13
|
-
export declare function sanitizeStruct(typeLike: object): Struct<any>;
|
|
14
|
-
export declare function sanitizeUnion(typeLike: object): Union<Type.Union | Type.DenseUnion | Type.SparseUnion>;
|
|
15
|
-
export declare function sanitizeTypedUnion(typeLike: object, UnionType: typeof DenseUnion | typeof SparseUnion): SparseUnion | DenseUnion;
|
|
16
|
-
export declare function sanitizeFixedSizeBinary(typeLike: object): FixedSizeBinary;
|
|
17
|
-
export declare function sanitizeFixedSizeList(typeLike: object): FixedSizeList<any>;
|
|
18
|
-
export declare function sanitizeMap(typeLike: object): Map_<any, any>;
|
|
19
|
-
export declare function sanitizeDuration(typeLike: object): Duration<Type.Duration | Type.DurationSecond | Type.DurationMillisecond | Type.DurationMicrosecond | Type.DurationNanosecond>;
|
|
20
|
-
export declare function sanitizeDictionary(typeLike: object): Dictionary<DataType<any, any>, TKeys>;
|
|
21
|
-
export declare function sanitizeType(typeLike: unknown): DataType<any>;
|
|
22
|
-
export declare function sanitizeField(fieldLike: unknown): Field;
|
|
23
|
-
/**
|
|
24
|
-
* Convert something schemaLike into a Schema instance
|
|
25
|
-
*
|
|
26
|
-
* This method is often needed even when the caller is using a Schema
|
|
27
|
-
* instance because they might be using a different instance of apache-arrow
|
|
28
|
-
* than lancedb is using.
|
|
29
|
-
*/
|
|
30
|
-
export declare function sanitizeSchema(schemaLike: SchemaLike): Schema;
|
|
31
|
-
export declare function sanitizeTable(tableLike: TableLike): Table;
|