@dengxifeng/lancedb 0.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/AGENTS.md +13 -0
  2. package/CONTRIBUTING.md +76 -0
  3. package/README.md +37 -0
  4. package/dist/arrow.d.ts +279 -0
  5. package/dist/arrow.js +1316 -0
  6. package/dist/connection.d.ts +259 -0
  7. package/dist/connection.js +224 -0
  8. package/dist/embedding/embedding_function.d.ts +103 -0
  9. package/dist/embedding/embedding_function.js +192 -0
  10. package/dist/embedding/index.d.ts +27 -0
  11. package/dist/embedding/index.js +101 -0
  12. package/dist/embedding/openai.d.ts +16 -0
  13. package/dist/embedding/openai.js +93 -0
  14. package/dist/embedding/registry.d.ts +74 -0
  15. package/dist/embedding/registry.js +165 -0
  16. package/dist/embedding/transformers.d.ts +36 -0
  17. package/dist/embedding/transformers.js +122 -0
  18. package/dist/header.d.ts +162 -0
  19. package/dist/header.js +217 -0
  20. package/dist/index.d.ts +85 -0
  21. package/dist/index.js +106 -0
  22. package/dist/indices.d.ts +692 -0
  23. package/dist/indices.js +156 -0
  24. package/dist/merge.d.ts +80 -0
  25. package/dist/merge.js +92 -0
  26. package/dist/native.d.ts +585 -0
  27. package/dist/native.js +339 -0
  28. package/dist/permutation.d.ts +143 -0
  29. package/dist/permutation.js +184 -0
  30. package/dist/query.d.ts +581 -0
  31. package/dist/query.js +853 -0
  32. package/dist/rerankers/index.d.ts +5 -0
  33. package/dist/rerankers/index.js +19 -0
  34. package/dist/rerankers/rrf.d.ts +14 -0
  35. package/dist/rerankers/rrf.js +28 -0
  36. package/dist/sanitize.d.ts +32 -0
  37. package/dist/sanitize.js +473 -0
  38. package/dist/table.d.ts +581 -0
  39. package/dist/table.js +321 -0
  40. package/dist/util.d.ts +14 -0
  41. package/dist/util.js +77 -0
  42. package/license_header.txt +2 -0
  43. package/package.json +122 -0
package/dist/query.js ADDED
@@ -0,0 +1,853 @@
1
+ "use strict";
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ // SPDX-FileCopyrightText: Copyright The LanceDB Authors
4
+ Object.defineProperty(exports, "__esModule", { value: true });
5
+ exports.BooleanQuery = exports.MultiMatchQuery = exports.BoostQuery = exports.PhraseQuery = exports.MatchQuery = exports.Occur = exports.Operator = exports.FullTextQueryType = exports.Query = exports.TakeQuery = exports.VectorQuery = exports.StandardQueryBase = exports.QueryBase = void 0;
6
+ exports.RecordBatchIterator = RecordBatchIterator;
7
+ exports.instanceOfFullTextQuery = instanceOfFullTextQuery;
8
+ const arrow_1 = require("./arrow");
9
+ const native_1 = require("./native");
10
+ async function* RecordBatchIterator(promisedInner) {
11
+ const inner = await promisedInner;
12
+ if (inner === undefined) {
13
+ throw new Error("Invalid iterator state");
14
+ }
15
+ for (let buffer = await inner.next(); buffer; buffer = await inner.next()) {
16
+ const { batches } = (0, arrow_1.tableFromIPC)(buffer);
17
+ if (batches.length !== 1) {
18
+ throw new Error("Expected only one batch");
19
+ }
20
+ yield batches[0];
21
+ }
22
+ }
23
+ class RecordBatchIterable {
24
+ inner;
25
+ options;
26
+ constructor(inner, options) {
27
+ this.inner = inner;
28
+ this.options = options;
29
+ }
30
+ // biome-ignore lint/suspicious/noExplicitAny: skip
31
+ [Symbol.asyncIterator]() {
32
+ return RecordBatchIterator(this.inner.execute(this.options?.maxBatchLength, this.options?.timeoutMs));
33
+ }
34
+ }
35
+ /** Common methods supported by all query types
36
+ *
37
+ * @see {@link Query}
38
+ * @see {@link VectorQuery}
39
+ *
40
+ * @hideconstructor
41
+ */
42
+ class QueryBase {
43
+ inner;
44
+ /**
45
+ * @hidden
46
+ */
47
+ constructor(inner) {
48
+ this.inner = inner;
49
+ // intentionally empty
50
+ }
51
+ // call a function on the inner (either a promise or the actual object)
52
+ /**
53
+ * @hidden
54
+ */
55
+ doCall(fn) {
56
+ if (this.inner instanceof Promise) {
57
+ this.inner = this.inner.then((inner) => {
58
+ fn(inner);
59
+ return inner;
60
+ });
61
+ }
62
+ else {
63
+ fn(this.inner);
64
+ }
65
+ }
66
+ /**
67
+ * Return only the specified columns.
68
+ *
69
+ * By default a query will return all columns from the table. However, this can have
70
+ * a very significant impact on latency. LanceDb stores data in a columnar fashion. This
71
+ * means we can finely tune our I/O to select exactly the columns we need.
72
+ *
73
+ * As a best practice you should always limit queries to the columns that you need. If you
74
+ * pass in an array of column names then only those columns will be returned.
75
+ *
76
+ * You can also use this method to create new "dynamic" columns based on your existing columns.
77
+ * For example, you may not care about "a" or "b" but instead simply want "a + b". This is often
78
+ * seen in the SELECT clause of an SQL query (e.g. `SELECT a+b FROM my_table`).
79
+ *
80
+ * To create dynamic columns you can pass in a Map<string, string>. A column will be returned
81
+ * for each entry in the map. The key provides the name of the column. The value is
82
+ * an SQL string used to specify how the column is calculated.
83
+ *
84
+ * For example, an SQL query might state `SELECT a + b AS combined, c`. The equivalent
85
+ * input to this method would be:
86
+ * @example
87
+ * new Map([["combined", "a + b"], ["c", "c"]])
88
+ *
89
+ * Columns will always be returned in the order given, even if that order is different than
90
+ * the order used when adding the data.
91
+ *
92
+ * Note that you can pass in a `Record<string, string>` (e.g. an object literal). This method
93
+ * uses `Object.entries` which should preserve the insertion order of the object. However,
94
+ * object insertion order is easy to get wrong and `Map` is more foolproof.
95
+ */
96
+ select(columns) {
97
+ const selectColumns = (columnArray) => {
98
+ this.doCall((inner) => {
99
+ inner.selectColumns(columnArray);
100
+ });
101
+ };
102
+ const selectMapping = (columnTuples) => {
103
+ this.doCall((inner) => {
104
+ inner.select(columnTuples);
105
+ });
106
+ };
107
+ if (typeof columns === "string") {
108
+ selectColumns([columns]);
109
+ }
110
+ else if (Array.isArray(columns)) {
111
+ selectColumns(columns);
112
+ }
113
+ else if (columns instanceof Map) {
114
+ selectMapping(Array.from(columns.entries()));
115
+ }
116
+ else {
117
+ selectMapping(Object.entries(columns));
118
+ }
119
+ return this;
120
+ }
121
+ /**
122
+ * Whether to return the row id in the results.
123
+ *
124
+ * This column can be used to match results between different queries. For
125
+ * example, to match results from a full text search and a vector search in
126
+ * order to perform hybrid search.
127
+ */
128
+ withRowId() {
129
+ this.doCall((inner) => inner.withRowId());
130
+ return this;
131
+ }
132
+ /**
133
+ * @hidden
134
+ */
135
+ nativeExecute(options) {
136
+ if (this.inner instanceof Promise) {
137
+ return this.inner.then((inner) => inner.execute(options?.maxBatchLength, options?.timeoutMs));
138
+ }
139
+ else {
140
+ return this.inner.execute(options?.maxBatchLength, options?.timeoutMs);
141
+ }
142
+ }
143
+ /**
144
+ * Execute the query and return the results as an @see {@link AsyncIterator}
145
+ * of @see {@link RecordBatch}.
146
+ *
147
+ * By default, LanceDb will use many threads to calculate results and, when
148
+ * the result set is large, multiple batches will be processed at one time.
149
+ * This readahead is limited however and backpressure will be applied if this
150
+ * stream is consumed slowly (this constrains the maximum memory used by a
151
+ * single query)
152
+ *
153
+ */
154
+ execute(options) {
155
+ return RecordBatchIterator(this.nativeExecute(options));
156
+ }
157
+ /**
158
+ * @hidden
159
+ */
160
+ // biome-ignore lint/suspicious/noExplicitAny: skip
161
+ [Symbol.asyncIterator]() {
162
+ return RecordBatchIterator(this.nativeExecute());
163
+ }
164
+ /** Collect the results as an Arrow @see {@link ArrowTable}. */
165
+ async toArrow(options) {
166
+ const batches = [];
167
+ let inner;
168
+ if (this.inner instanceof Promise) {
169
+ inner = await this.inner;
170
+ }
171
+ else {
172
+ inner = this.inner;
173
+ }
174
+ for await (const batch of new RecordBatchIterable(inner, options)) {
175
+ batches.push(batch);
176
+ }
177
+ return new arrow_1.Table(batches);
178
+ }
179
+ /** Collect the results as an array of objects. */
180
+ // biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
181
+ async toArray(options) {
182
+ const tbl = await this.toArrow(options);
183
+ return tbl.toArray();
184
+ }
185
+ /**
186
+ * Generates an explanation of the query execution plan.
187
+ *
188
+ * @example
189
+ * import * as lancedb from "@lancedb/lancedb"
190
+ * const db = await lancedb.connect("./.lancedb");
191
+ * const table = await db.createTable("my_table", [
192
+ * { vector: [1.1, 0.9], id: "1" },
193
+ * ]);
194
+ * const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
195
+ *
196
+ * @param verbose - If true, provides a more detailed explanation. Defaults to false.
197
+ * @returns A Promise that resolves to a string containing the query execution plan explanation.
198
+ */
199
+ async explainPlan(verbose = false) {
200
+ if (this.inner instanceof Promise) {
201
+ return this.inner.then((inner) => inner.explainPlan(verbose));
202
+ }
203
+ else {
204
+ return this.inner.explainPlan(verbose);
205
+ }
206
+ }
207
+ /**
208
+ * Executes the query and returns the physical query plan annotated with runtime metrics.
209
+ *
210
+ * This is useful for debugging and performance analysis, as it shows how the query was executed
211
+ * and includes metrics such as elapsed time, rows processed, and I/O statistics.
212
+ *
213
+ * @example
214
+ * import * as lancedb from "@lancedb/lancedb"
215
+ *
216
+ * const db = await lancedb.connect("./.lancedb");
217
+ * const table = await db.createTable("my_table", [
218
+ * { vector: [1.1, 0.9], id: "1" },
219
+ * ]);
220
+ *
221
+ * const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
222
+ *
223
+ * Example output (with runtime metrics inlined):
224
+ * AnalyzeExec verbose=true, metrics=[]
225
+ * ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
226
+ * Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
227
+ * CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
228
+ * GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
229
+ * FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
230
+ * SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
231
+ * KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
232
+ * LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
233
+ *
234
+ * @returns A query execution plan with runtime metrics for each step.
235
+ */
236
+ async analyzePlan() {
237
+ if (this.inner instanceof Promise) {
238
+ return this.inner.then((inner) => inner.analyzePlan());
239
+ }
240
+ else {
241
+ return this.inner.analyzePlan();
242
+ }
243
+ }
244
+ /**
245
+ * Returns the schema of the output that will be returned by this query.
246
+ *
247
+ * This can be used to inspect the types and names of the columns that will be
248
+ * returned by the query before executing it.
249
+ *
250
+ * @returns An Arrow Schema describing the output columns.
251
+ */
252
+ async outputSchema() {
253
+ let schemaBuffer;
254
+ if (this.inner instanceof Promise) {
255
+ schemaBuffer = await this.inner.then((inner) => inner.outputSchema());
256
+ }
257
+ else {
258
+ schemaBuffer = await this.inner.outputSchema();
259
+ }
260
+ const schema = (0, arrow_1.tableFromIPC)(schemaBuffer).schema;
261
+ return schema;
262
+ }
263
+ }
264
+ exports.QueryBase = QueryBase;
265
+ class StandardQueryBase extends QueryBase {
266
+ constructor(inner) {
267
+ super(inner);
268
+ }
269
+ /**
270
+ * A filter statement to be applied to this query.
271
+ *
272
+ * The filter should be supplied as an SQL query string. For example:
273
+ * @example
274
+ * x > 10
275
+ * y > 0 AND y < 100
276
+ * x > 5 OR y = 'test'
277
+ *
278
+ * Filtering performance can often be improved by creating a scalar index
279
+ * on the filter column(s).
280
+ */
281
+ where(predicate) {
282
+ this.doCall((inner) => inner.onlyIf(predicate));
283
+ return this;
284
+ }
285
+ /**
286
+ * A filter statement to be applied to this query.
287
+ * @see where
288
+ * @deprecated Use `where` instead
289
+ */
290
+ filter(predicate) {
291
+ return this.where(predicate);
292
+ }
293
+ fullTextSearch(query, options) {
294
+ let columns = null;
295
+ if (options) {
296
+ if (typeof options.columns === "string") {
297
+ columns = [options.columns];
298
+ }
299
+ else if (Array.isArray(options.columns)) {
300
+ columns = options.columns;
301
+ }
302
+ }
303
+ this.doCall((inner) => {
304
+ if (typeof query === "string") {
305
+ inner.fullTextSearch({
306
+ query: query,
307
+ columns: columns,
308
+ });
309
+ }
310
+ else {
311
+ inner.fullTextSearch({ query: query.inner });
312
+ }
313
+ });
314
+ return this;
315
+ }
316
+ /**
317
+ * Set the maximum number of results to return.
318
+ *
319
+ * By default, a plain search has no limit. If this method is not
320
+ * called then every valid row from the table will be returned.
321
+ */
322
+ limit(limit) {
323
+ this.doCall((inner) => inner.limit(limit));
324
+ return this;
325
+ }
326
+ /**
327
+ * Set the number of rows to skip before returning results.
328
+ *
329
+ * This is useful for pagination.
330
+ */
331
+ offset(offset) {
332
+ this.doCall((inner) => inner.offset(offset));
333
+ return this;
334
+ }
335
+ /**
336
+ * Skip searching un-indexed data. This can make search faster, but will miss
337
+ * any data that is not yet indexed.
338
+ *
339
+ * Use {@link Table#optimize} to index all un-indexed data.
340
+ */
341
+ fastSearch() {
342
+ this.doCall((inner) => inner.fastSearch());
343
+ return this;
344
+ }
345
+ }
346
+ exports.StandardQueryBase = StandardQueryBase;
347
+ /**
348
+ * A builder used to construct a vector search
349
+ *
350
+ * This builder can be reused to execute the query many times.
351
+ *
352
+ * @see {@link Query#nearestTo}
353
+ *
354
+ * @hideconstructor
355
+ */
356
+ class VectorQuery extends StandardQueryBase {
357
+ /**
358
+ * @hidden
359
+ */
360
+ constructor(inner) {
361
+ super(inner);
362
+ }
363
+ /**
364
+ * Set the number of partitions to search (probe)
365
+ *
366
+ * This argument is only used when the vector column has an IVF PQ index.
367
+ * If there is no index then this value is ignored.
368
+ *
369
+ * The IVF stage of IVF PQ divides the input into partitions (clusters) of
370
+ * related values.
371
+ *
372
+ * The partition whose centroids are closest to the query vector will be
373
+ * exhaustiely searched to find matches. This parameter controls how many
374
+ * partitions should be searched.
375
+ *
376
+ * Increasing this value will increase the recall of your query but will
377
+ * also increase the latency of your query. The default value is 20. This
378
+ * default is good for many cases but the best value to use will depend on
379
+ * your data and the recall that you need to achieve.
380
+ *
381
+ * For best results we recommend tuning this parameter with a benchmark against
382
+ * your actual data to find the smallest possible value that will still give
383
+ * you the desired recall.
384
+ *
385
+ * For more fine grained control over behavior when you have a very narrow filter
386
+ * you can use `minimumNprobes` and `maximumNprobes`. This method sets both
387
+ * the minimum and maximum to the same value.
388
+ */
389
+ nprobes(nprobes) {
390
+ super.doCall((inner) => inner.nprobes(nprobes));
391
+ return this;
392
+ }
393
+ /**
394
+ * Set the minimum number of probes used.
395
+ *
396
+ * This controls the minimum number of partitions that will be searched. This
397
+ * parameter will impact every query against a vector index, regardless of the
398
+ * filter. See `nprobes` for more details. Higher values will increase recall
399
+ * but will also increase latency.
400
+ */
401
+ minimumNprobes(minimumNprobes) {
402
+ super.doCall((inner) => inner.minimumNprobes(minimumNprobes));
403
+ return this;
404
+ }
405
+ /**
406
+ * Set the maximum number of probes used.
407
+ *
408
+ * This controls the maximum number of partitions that will be searched. If this
409
+ * number is greater than minimumNprobes then the excess partitions will _only_ be
410
+ * searched if we have not found enough results. This can be useful when there is
411
+ * a narrow filter to allow these queries to spend more time searching and avoid
412
+ * potential false negatives.
413
+ */
414
+ maximumNprobes(maximumNprobes) {
415
+ super.doCall((inner) => inner.maximumNprobes(maximumNprobes));
416
+ return this;
417
+ }
418
+ /*
419
+ * Set the distance range to use
420
+ *
421
+ * Only rows with distances within range [lower_bound, upper_bound)
422
+ * will be returned.
423
+ *
424
+ * `undefined` means no lower or upper bound.
425
+ */
426
+ distanceRange(lowerBound, upperBound) {
427
+ super.doCall((inner) => inner.distanceRange(lowerBound, upperBound));
428
+ return this;
429
+ }
430
+ /**
431
+ * Set the number of candidates to consider during the search
432
+ *
433
+ * This argument is only used when the vector column has an HNSW index.
434
+ * If there is no index then this value is ignored.
435
+ *
436
+ * Increasing this value will increase the recall of your query but will
437
+ * also increase the latency of your query. The default value is 1.5*limit.
438
+ */
439
+ ef(ef) {
440
+ super.doCall((inner) => inner.ef(ef));
441
+ return this;
442
+ }
443
+ /**
444
+ * Set the vector column to query
445
+ *
446
+ * This controls which column is compared to the query vector supplied in
447
+ * the call to @see {@link Query#nearestTo}
448
+ *
449
+ * This parameter must be specified if the table has more than one column
450
+ * whose data type is a fixed-size-list of floats.
451
+ */
452
+ column(column) {
453
+ super.doCall((inner) => inner.column(column));
454
+ return this;
455
+ }
456
+ /**
457
+ * Set the distance metric to use
458
+ *
459
+ * When performing a vector search we try and find the "nearest" vectors according
460
+ * to some kind of distance metric. This parameter controls which distance metric to
461
+ * use. See @see {@link IvfPqOptions.distanceType} for more details on the different
462
+ * distance metrics available.
463
+ *
464
+ * Note: if there is a vector index then the distance type used MUST match the distance
465
+ * type used to train the vector index. If this is not done then the results will be
466
+ * invalid.
467
+ *
468
+ * By default "l2" is used.
469
+ */
470
+ distanceType(distanceType) {
471
+ super.doCall((inner) => inner.distanceType(distanceType));
472
+ return this;
473
+ }
474
+ /**
475
+ * A multiplier to control how many additional rows are taken during the refine step
476
+ *
477
+ * This argument is only used when the vector column has an IVF PQ index.
478
+ * If there is no index then this value is ignored.
479
+ *
480
+ * An IVF PQ index stores compressed (quantized) values. They query vector is compared
481
+ * against these values and, since they are compressed, the comparison is inaccurate.
482
+ *
483
+ * This parameter can be used to refine the results. It can improve both improve recall
484
+ * and correct the ordering of the nearest results.
485
+ *
486
+ * To refine results LanceDb will first perform an ANN search to find the nearest
487
+ * `limit` * `refine_factor` results. In other words, if `refine_factor` is 3 and
488
+ * `limit` is the default (10) then the first 30 results will be selected. LanceDb
489
+ * then fetches the full, uncompressed, values for these 30 results. The results are
490
+ * then reordered by the true distance and only the nearest 10 are kept.
491
+ *
492
+ * Note: there is a difference between calling this method with a value of 1 and never
493
+ * calling this method at all. Calling this method with any value will have an impact
494
+ * on your search latency. When you call this method with a `refine_factor` of 1 then
495
+ * LanceDb still needs to fetch the full, uncompressed, values so that it can potentially
496
+ * reorder the results.
497
+ *
498
+ * Note: if this method is NOT called then the distances returned in the _distance column
499
+ * will be approximate distances based on the comparison of the quantized query vector
500
+ * and the quantized result vectors. This can be considerably different than the true
501
+ * distance between the query vector and the actual uncompressed vector.
502
+ */
503
+ refineFactor(refineFactor) {
504
+ super.doCall((inner) => inner.refineFactor(refineFactor));
505
+ return this;
506
+ }
507
+ /**
508
+ * If this is called then filtering will happen after the vector search instead of
509
+ * before.
510
+ *
511
+ * By default filtering will be performed before the vector search. This is how
512
+ * filtering is typically understood to work. This prefilter step does add some
513
+ * additional latency. Creating a scalar index on the filter column(s) can
514
+ * often improve this latency. However, sometimes a filter is too complex or scalar
515
+ * indices cannot be applied to the column. In these cases postfiltering can be
516
+ * used instead of prefiltering to improve latency.
517
+ *
518
+ * Post filtering applies the filter to the results of the vector search. This means
519
+ * we only run the filter on a much smaller set of data. However, it can cause the
520
+ * query to return fewer than `limit` results (or even no results) if none of the nearest
521
+ * results match the filter.
522
+ *
523
+ * Post filtering happens during the "refine stage" (described in more detail in
524
+ * @see {@link VectorQuery#refineFactor}). This means that setting a higher refine
525
+ * factor can often help restore some of the results lost by post filtering.
526
+ */
527
+ postfilter() {
528
+ super.doCall((inner) => inner.postfilter());
529
+ return this;
530
+ }
531
+ /**
532
+ * If this is called then any vector index is skipped
533
+ *
534
+ * An exhaustive (flat) search will be performed. The query vector will
535
+ * be compared to every vector in the table. At high scales this can be
536
+ * expensive. However, this is often still useful. For example, skipping
537
+ * the vector index can give you ground truth results which you can use to
538
+ * calculate your recall to select an appropriate value for nprobes.
539
+ */
540
+ bypassVectorIndex() {
541
+ super.doCall((inner) => inner.bypassVectorIndex());
542
+ return this;
543
+ }
544
+ /*
545
+ * Add a query vector to the search
546
+ *
547
+ * This method can be called multiple times to add multiple query vectors
548
+ * to the search. If multiple query vectors are added, then they will be searched
549
+ * in parallel, and the results will be concatenated. A column called `query_index`
550
+ * will be added to indicate the index of the query vector that produced the result.
551
+ *
552
+ * Performance wise, this is equivalent to running multiple queries concurrently.
553
+ */
554
+ addQueryVector(vector) {
555
+ if (vector instanceof Promise) {
556
+ const res = (async () => {
557
+ try {
558
+ const v = await vector;
559
+ const arr = Float32Array.from(v);
560
+ //
561
+ // biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
562
+ const value = this.addQueryVector(arr);
563
+ const inner = value.inner;
564
+ return inner;
565
+ }
566
+ catch (e) {
567
+ return Promise.reject(e);
568
+ }
569
+ })();
570
+ return new VectorQuery(res);
571
+ }
572
+ else {
573
+ super.doCall((inner) => {
574
+ inner.addQueryVector(Float32Array.from(vector));
575
+ });
576
+ return this;
577
+ }
578
+ }
579
+ rerank(reranker) {
580
+ super.doCall((inner) => inner.rerank({
581
+ rerankHybrid: async (_, args) => {
582
+ const vecResults = await (0, arrow_1.fromBufferToRecordBatch)(args.vecResults);
583
+ const ftsResults = await (0, arrow_1.fromBufferToRecordBatch)(args.ftsResults);
584
+ const result = await reranker.rerankHybrid(args.query, vecResults, ftsResults);
585
+ const buffer = (0, arrow_1.fromRecordBatchToBuffer)(result);
586
+ return buffer;
587
+ },
588
+ }));
589
+ return this;
590
+ }
591
+ }
592
+ exports.VectorQuery = VectorQuery;
593
+ /**
594
+ * A query that returns a subset of the rows in the table.
595
+ *
596
+ * @hideconstructor
597
+ */
598
+ class TakeQuery extends QueryBase {
599
+ constructor(inner) {
600
+ super(inner);
601
+ }
602
+ }
603
+ exports.TakeQuery = TakeQuery;
604
+ /** A builder for LanceDB queries.
605
+ *
606
+ * @see {@link Table#query}, {@link Table#search}
607
+ *
608
+ * @hideconstructor
609
+ */
610
+ class Query extends StandardQueryBase {
611
+ /**
612
+ * @hidden
613
+ */
614
+ constructor(tbl) {
615
+ super(tbl.query());
616
+ }
617
+ /**
618
+ * Find the nearest vectors to the given query vector.
619
+ *
620
+ * This converts the query from a plain query to a vector query.
621
+ *
622
+ * This method will attempt to convert the input to the query vector
623
+ * expected by the embedding model. If the input cannot be converted
624
+ * then an error will be thrown.
625
+ *
626
+ * By default, there is no embedding model, and the input should be
627
+ * an array-like object of numbers (something that can be used as input
628
+ * to Float32Array.from)
629
+ *
630
+ * If there is only one vector column (a column whose data type is a
631
+ * fixed size list of floats) then the column does not need to be specified.
632
+ * If there is more than one vector column you must use
633
+ * @see {@link VectorQuery#column} to specify which column you would like
634
+ * to compare with.
635
+ *
636
+ * If no index has been created on the vector column then a vector query
637
+ * will perform a distance comparison between the query vector and every
638
+ * vector in the database and then sort the results. This is sometimes
639
+ * called a "flat search"
640
+ *
641
+ * For small databases, with a few hundred thousand vectors or less, this can
642
+ * be reasonably fast. In larger databases you should create a vector index
643
+ * on the column. If there is a vector index then an "approximate" nearest
644
+ * neighbor search (frequently called an ANN search) will be performed. This
645
+ * search is much faster, but the results will be approximate.
646
+ *
647
+ * The query can be further parameterized using the returned builder. There
648
+ * are various ANN search parameters that will let you fine tune your recall
649
+ * accuracy vs search latency.
650
+ *
651
+ * Vector searches always have a `limit`. If `limit` has not been called then
652
+ * a default `limit` of 10 will be used. @see {@link Query#limit}
653
+ */
654
+ nearestTo(vector) {
655
+ if (this.inner instanceof Promise) {
656
+ const nativeQuery = this.inner.then(async (inner) => {
657
+ if (vector instanceof Promise) {
658
+ const arr = await vector.then((v) => Float32Array.from(v));
659
+ return inner.nearestTo(arr);
660
+ }
661
+ else {
662
+ return inner.nearestTo(Float32Array.from(vector));
663
+ }
664
+ });
665
+ return new VectorQuery(nativeQuery);
666
+ }
667
+ if (vector instanceof Promise) {
668
+ const res = (async () => {
669
+ try {
670
+ const v = await vector;
671
+ const arr = Float32Array.from(v);
672
+ //
673
+ // biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
674
+ const value = this.nearestTo(arr);
675
+ const inner = value.inner;
676
+ return inner;
677
+ }
678
+ catch (e) {
679
+ return Promise.reject(e);
680
+ }
681
+ })();
682
+ return new VectorQuery(res);
683
+ }
684
+ else {
685
+ const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
686
+ return new VectorQuery(vectorQuery);
687
+ }
688
+ }
689
+ nearestToText(query, columns) {
690
+ this.doCall((inner) => {
691
+ if (typeof query === "string") {
692
+ inner.fullTextSearch({
693
+ query: query,
694
+ columns: columns,
695
+ });
696
+ }
697
+ else {
698
+ inner.fullTextSearch({ query: query.inner });
699
+ }
700
+ });
701
+ return this;
702
+ }
703
+ }
704
+ exports.Query = Query;
705
+ /**
706
+ * Enum representing the types of full-text queries supported.
707
+ *
708
+ * - `Match`: Performs a full-text search for terms in the query string.
709
+ * - `MatchPhrase`: Searches for an exact phrase match in the text.
710
+ * - `Boost`: Boosts the relevance score of specific terms in the query.
711
+ * - `MultiMatch`: Searches across multiple fields for the query terms.
712
+ */
713
+ var FullTextQueryType;
714
+ (function (FullTextQueryType) {
715
+ FullTextQueryType["Match"] = "match";
716
+ FullTextQueryType["MatchPhrase"] = "match_phrase";
717
+ FullTextQueryType["Boost"] = "boost";
718
+ FullTextQueryType["MultiMatch"] = "multi_match";
719
+ FullTextQueryType["Boolean"] = "boolean";
720
+ })(FullTextQueryType || (exports.FullTextQueryType = FullTextQueryType = {}));
721
+ /**
722
+ * Enum representing the logical operators used in full-text queries.
723
+ *
724
+ * - `And`: All terms must match.
725
+ * - `Or`: At least one term must match.
726
+ */
727
+ var Operator;
728
+ (function (Operator) {
729
+ Operator["And"] = "AND";
730
+ Operator["Or"] = "OR";
731
+ })(Operator || (exports.Operator = Operator = {}));
732
+ /**
733
+ * Enum representing the occurrence of terms in full-text queries.
734
+ *
735
+ * - `Must`: The term must be present in the document.
736
+ * - `Should`: The term should contribute to the document score, but is not required.
737
+ * - `MustNot`: The term must not be present in the document.
738
+ */
739
+ var Occur;
740
+ (function (Occur) {
741
+ Occur["Should"] = "SHOULD";
742
+ Occur["Must"] = "MUST";
743
+ Occur["MustNot"] = "MUST_NOT";
744
+ })(Occur || (exports.Occur = Occur = {}));
745
+ // biome-ignore lint/suspicious/noExplicitAny: we want any here
746
+ function instanceOfFullTextQuery(obj) {
747
+ return obj != null && obj.inner instanceof native_1.JsFullTextQuery;
748
+ }
749
+ class MatchQuery {
750
+ /** @ignore */
751
+ inner;
752
+ /**
753
+ * Creates an instance of MatchQuery.
754
+ *
755
+ * @param query - The text query to search for.
756
+ * @param column - The name of the column to search within.
757
+ * @param options - Optional parameters for the match query.
758
+ * - `boost`: The boost factor for the query (default is 1.0).
759
+ * - `fuzziness`: The fuzziness level for the query (default is 0).
760
+ * - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
761
+ * - `operator`: The logical operator to use for combining terms in the query (default is "OR").
762
+ * - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
763
+ */
764
+ constructor(query, column, options) {
765
+ let fuzziness = options?.fuzziness;
766
+ if (fuzziness === undefined) {
767
+ fuzziness = 0;
768
+ }
769
+ this.inner = native_1.JsFullTextQuery.matchQuery(query, column, options?.boost ?? 1.0, fuzziness, options?.maxExpansions ?? 50, options?.operator ?? Operator.Or, options?.prefixLength ?? 0);
770
+ }
771
+ queryType() {
772
+ return FullTextQueryType.Match;
773
+ }
774
+ }
775
+ exports.MatchQuery = MatchQuery;
776
+ class PhraseQuery {
777
+ /** @ignore */
778
+ inner;
779
+ /**
780
+ * Creates an instance of `PhraseQuery`.
781
+ *
782
+ * @param query - The phrase to search for in the specified column.
783
+ * @param column - The name of the column to search within.
784
+ * @param options - Optional parameters for the phrase query.
785
+ * - `slop`: The maximum number of intervening unmatched positions allowed between words in the phrase (default is 0).
786
+ */
787
+ constructor(query, column, options) {
788
+ this.inner = native_1.JsFullTextQuery.phraseQuery(query, column, options?.slop ?? 0);
789
+ }
790
+ queryType() {
791
+ return FullTextQueryType.MatchPhrase;
792
+ }
793
+ }
794
+ exports.PhraseQuery = PhraseQuery;
795
+ class BoostQuery {
796
+ /** @ignore */
797
+ inner;
798
+ /**
799
+ * Creates an instance of BoostQuery.
800
+ * The boost returns documents that match the positive query,
801
+ * but penalizes those that match the negative query.
802
+ * the penalty is controlled by the `negativeBoost` parameter.
803
+ *
804
+ * @param positive - The positive query that boosts the relevance score.
805
+ * @param negative - The negative query that reduces the relevance score.
806
+ * @param options - Optional parameters for the boost query.
807
+ * - `negativeBoost`: The boost factor for the negative query (default is 0.0).
808
+ */
809
+ constructor(positive, negative, options) {
810
+ this.inner = native_1.JsFullTextQuery.boostQuery(positive.inner, negative.inner, options?.negativeBoost);
811
+ }
812
+ queryType() {
813
+ return FullTextQueryType.Boost;
814
+ }
815
+ }
816
+ exports.BoostQuery = BoostQuery;
817
+ class MultiMatchQuery {
818
+ /** @ignore */
819
+ inner;
820
+ /**
821
+ * Creates an instance of MultiMatchQuery.
822
+ *
823
+ * @param query - The text query to search for across multiple columns.
824
+ * @param columns - An array of column names to search within.
825
+ * @param options - Optional parameters for the multi-match query.
826
+ * - `boosts`: An array of boost factors for each column (default is 1.0 for all).
827
+ * - `operator`: The logical operator to use for combining terms in the query (default is "OR").
828
+ */
829
+ constructor(query, columns, options) {
830
+ this.inner = native_1.JsFullTextQuery.multiMatchQuery(query, columns, options?.boosts, options?.operator ?? Operator.Or);
831
+ }
832
+ queryType() {
833
+ return FullTextQueryType.MultiMatch;
834
+ }
835
+ }
836
+ exports.MultiMatchQuery = MultiMatchQuery;
837
+ class BooleanQuery {
838
+ /** @ignore */
839
+ inner;
840
+ /**
841
+ * Creates an instance of BooleanQuery.
842
+ *
843
+ * @param queries - An array of (Occur, FullTextQuery objects) to combine.
844
+ * Occur specifies whether the query must match, or should match.
845
+ */
846
+ constructor(queries) {
847
+ this.inner = native_1.JsFullTextQuery.booleanQuery(queries.map(([occur, query]) => [occur, query.inner]));
848
+ }
849
+ queryType() {
850
+ return FullTextQueryType.Boolean;
851
+ }
852
+ }
853
+ exports.BooleanQuery = BooleanQuery;