@dengxifeng/lancedb 0.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/AGENTS.md +13 -0
  2. package/CONTRIBUTING.md +76 -0
  3. package/README.md +37 -0
  4. package/dist/arrow.d.ts +279 -0
  5. package/dist/arrow.js +1316 -0
  6. package/dist/connection.d.ts +259 -0
  7. package/dist/connection.js +224 -0
  8. package/dist/embedding/embedding_function.d.ts +103 -0
  9. package/dist/embedding/embedding_function.js +192 -0
  10. package/dist/embedding/index.d.ts +27 -0
  11. package/dist/embedding/index.js +101 -0
  12. package/dist/embedding/openai.d.ts +16 -0
  13. package/dist/embedding/openai.js +93 -0
  14. package/dist/embedding/registry.d.ts +74 -0
  15. package/dist/embedding/registry.js +165 -0
  16. package/dist/embedding/transformers.d.ts +36 -0
  17. package/dist/embedding/transformers.js +122 -0
  18. package/dist/header.d.ts +162 -0
  19. package/dist/header.js +217 -0
  20. package/dist/index.d.ts +85 -0
  21. package/dist/index.js +106 -0
  22. package/dist/indices.d.ts +692 -0
  23. package/dist/indices.js +156 -0
  24. package/dist/merge.d.ts +80 -0
  25. package/dist/merge.js +92 -0
  26. package/dist/native.d.ts +585 -0
  27. package/dist/native.js +339 -0
  28. package/dist/permutation.d.ts +143 -0
  29. package/dist/permutation.js +184 -0
  30. package/dist/query.d.ts +581 -0
  31. package/dist/query.js +853 -0
  32. package/dist/rerankers/index.d.ts +5 -0
  33. package/dist/rerankers/index.js +19 -0
  34. package/dist/rerankers/rrf.d.ts +14 -0
  35. package/dist/rerankers/rrf.js +28 -0
  36. package/dist/sanitize.d.ts +32 -0
  37. package/dist/sanitize.js +473 -0
  38. package/dist/table.d.ts +581 -0
  39. package/dist/table.js +321 -0
  40. package/dist/util.d.ts +14 -0
  41. package/dist/util.js +77 -0
  42. package/license_header.txt +2 -0
  43. package/package.json +122 -0
@@ -0,0 +1,156 @@
1
+ "use strict";
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ // SPDX-FileCopyrightText: Copyright The LanceDB Authors
4
+ Object.defineProperty(exports, "__esModule", { value: true });
5
+ exports.Index = void 0;
6
+ const native_1 = require("./native");
7
+ class Index {
8
+ inner;
9
+ constructor(inner) {
10
+ this.inner = inner;
11
+ }
12
+ /**
13
+ * Create an IvfPq index
14
+ *
15
+ * This index stores a compressed (quantized) copy of every vector. These vectors
16
+ * are grouped into partitions of similar vectors. Each partition keeps track of
17
+ * a centroid which is the average value of all vectors in the group.
18
+ *
19
+ * During a query the centroids are compared with the query vector to find the closest
20
+ * partitions. The compressed vectors in these partitions are then searched to find
21
+ * the closest vectors.
22
+ *
23
+ * The compression scheme is called product quantization. Each vector is divided into
24
+ * subvectors and then each subvector is quantized into a small number of bits. the
25
+ * parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
26
+ * between index size (and thus search speed) and index accuracy.
27
+ *
28
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
29
+ * many groups to create.
30
+ *
31
+ * Note that training an IVF PQ index on a large dataset is a slow operation and
32
+ * currently is also a memory intensive operation.
33
+ */
34
+ static ivfPq(options) {
35
+ return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.numBits, options?.maxIterations, options?.sampleRate));
36
+ }
37
+ /**
38
+ * Create an IvfRq index
39
+ *
40
+ * IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
41
+ * and organizes them into IVF partitions.
42
+ *
43
+ * The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
44
+ * The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
45
+ * between index size (and thus search speed) and index accuracy.
46
+ *
47
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
48
+ * many groups to create.
49
+ *
50
+ * Note that training an IVF RQ index on a large dataset is a slow operation and
51
+ * currently is also a memory intensive operation.
52
+ */
53
+ static ivfRq(options) {
54
+ return new Index(native_1.Index.ivfRq(options?.distanceType, options?.numPartitions, options?.numBits, options?.maxIterations, options?.sampleRate));
55
+ }
56
+ /**
57
+ * Create an IvfFlat index
58
+ *
59
+ * This index groups vectors into partitions of similar vectors. Each partition keeps track of
60
+ * a centroid which is the average value of all vectors in the group.
61
+ *
62
+ * During a query the centroids are compared with the query vector to find the closest
63
+ * partitions. The vectors in these partitions are then searched to find
64
+ * the closest vectors.
65
+ *
66
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
67
+ * many groups to create.
68
+ *
69
+ * Note that training an IVF FLAT index on a large dataset is a slow operation and
70
+ * currently is also a memory intensive operation.
71
+ */
72
+ static ivfFlat(options) {
73
+ return new Index(native_1.Index.ivfFlat(options?.distanceType, options?.numPartitions, options?.maxIterations, options?.sampleRate));
74
+ }
75
+ /**
76
+ * Create a btree index
77
+ *
78
+ * A btree index is an index on a scalar columns. The index stores a copy of the column
79
+ * in sorted order. A header entry is created for each block of rows (currently the
80
+ * block size is fixed at 4096). These header entries are stored in a separate
81
+ * cacheable structure (a btree). To search for data the header is used to determine
82
+ * which blocks need to be read from disk.
83
+ *
84
+ * For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
85
+ * bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
86
+ * the correct row ids.
87
+ *
88
+ * This index is good for scalar columns with mostly distinct values and does best when
89
+ * the query is highly selective.
90
+ *
91
+ * The btree index does not currently have any parameters though parameters such as the
92
+ * block size may be added in the future.
93
+ */
94
+ static btree() {
95
+ return new Index(native_1.Index.btree());
96
+ }
97
+ /**
98
+ * Create a bitmap index.
99
+ *
100
+ * A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
101
+ *
102
+ * This index works best for low-cardinality columns, where the number of unique values
103
+ * is small (i.e., less than a few hundreds).
104
+ */
105
+ static bitmap() {
106
+ return new Index(native_1.Index.bitmap());
107
+ }
108
+ /**
109
+ * Create a label list index.
110
+ *
111
+ * LabelList index is a scalar index that can be used on `List<T>` columns to
112
+ * support queries with `array_contains_all` and `array_contains_any`
113
+ * using an underlying bitmap index.
114
+ */
115
+ static labelList() {
116
+ return new Index(native_1.Index.labelList());
117
+ }
118
+ /**
119
+ * Create a full text search index
120
+ *
121
+ * A full text search index is an index on a string column, so that you can conduct full
122
+ * text searches on the column.
123
+ *
124
+ * The results of a full text search are ordered by relevance measured by BM25.
125
+ *
126
+ * You can combine filters with full text search.
127
+ */
128
+ static fts(options) {
129
+ return new Index(native_1.Index.fts(options?.withPosition, options?.baseTokenizer, options?.language, options?.maxTokenLength, options?.lowercase, options?.stem, options?.removeStopWords, options?.asciiFolding, options?.ngramMinLength, options?.ngramMaxLength, options?.prefixOnly));
130
+ }
131
+ /**
132
+ *
133
+ * Create a hnswPq index
134
+ *
135
+ * HNSW-PQ stands for Hierarchical Navigable Small World - Product Quantization.
136
+ * It is a variant of the HNSW algorithm that uses product quantization to compress
137
+ * the vectors.
138
+ *
139
+ */
140
+ static hnswPq(options) {
141
+ return new Index(native_1.Index.hnswPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate, options?.m, options?.efConstruction));
142
+ }
143
+ /**
144
+ *
145
+ * Create a hnswSq index
146
+ *
147
+ * HNSW-SQ stands for Hierarchical Navigable Small World - Scalar Quantization.
148
+ * It is a variant of the HNSW algorithm that uses scalar quantization to compress
149
+ * the vectors.
150
+ *
151
+ */
152
+ static hnswSq(options) {
153
+ return new Index(native_1.Index.hnswSq(options?.distanceType, options?.numPartitions, options?.maxIterations, options?.sampleRate, options?.m, options?.efConstruction));
154
+ }
155
+ }
156
+ exports.Index = Index;
@@ -0,0 +1,80 @@
1
+ import { Data, Schema } from "./arrow";
2
+ import { MergeResult, NativeMergeInsertBuilder } from "./native";
3
+ /** A builder used to create and run a merge insert operation */
4
+ export declare class MergeInsertBuilder {
5
+ #private;
6
+ /** Construct a MergeInsertBuilder. __Internal use only.__ */
7
+ constructor(native: NativeMergeInsertBuilder, schema: Schema | Promise<Schema>);
8
+ /**
9
+ * Rows that exist in both the source table (new data) and
10
+ * the target table (old data) will be updated, replacing
11
+ * the old row with the corresponding matching row.
12
+ *
13
+ * If there are multiple matches then the behavior is undefined.
14
+ * Currently this causes multiple copies of the row to be created
15
+ * but that behavior is subject to change.
16
+ *
17
+ * An optional condition may be specified. If it is, then only
18
+ * matched rows that satisfy the condtion will be updated. Any
19
+ * rows that do not satisfy the condition will be left as they
20
+ * are. Failing to satisfy the condition does not cause a
21
+ * "matched row" to become a "not matched" row.
22
+ *
23
+ * The condition should be an SQL string. Use the prefix
24
+ * target. to refer to rows in the target table (old data)
25
+ * and the prefix source. to refer to rows in the source
26
+ * table (new data).
27
+ *
28
+ * For example, "target.last_update < source.last_update"
29
+ */
30
+ whenMatchedUpdateAll(options?: {
31
+ where: string;
32
+ }): MergeInsertBuilder;
33
+ /**
34
+ * Rows that exist only in the source table (new data) should
35
+ * be inserted into the target table.
36
+ */
37
+ whenNotMatchedInsertAll(): MergeInsertBuilder;
38
+ /**
39
+ * Rows that exist only in the target table (old data) will be
40
+ * deleted. An optional condition can be provided to limit what
41
+ * data is deleted.
42
+ *
43
+ * @param options.where - An optional condition to limit what data is deleted
44
+ */
45
+ whenNotMatchedBySourceDelete(options?: {
46
+ where: string;
47
+ }): MergeInsertBuilder;
48
+ /**
49
+ * Controls whether to use indexes for the merge operation.
50
+ *
51
+ * When set to `true` (the default), the operation will use an index if available
52
+ * on the join key for improved performance. When set to `false`, it forces a full
53
+ * table scan even if an index exists. This can be useful for benchmarking or when
54
+ * the query optimizer chooses a suboptimal path.
55
+ *
56
+ * @param useIndex - Whether to use indices for the merge operation. Defaults to `true`.
57
+ */
58
+ useIndex(useIndex: boolean): MergeInsertBuilder;
59
+ /**
60
+ * Executes the merge insert operation
61
+ *
62
+ * @returns {Promise<MergeResult>} the merge result
63
+ */
64
+ execute(data: Data, execOptions?: Partial<WriteExecutionOptions>): Promise<MergeResult>;
65
+ }
66
+ export interface WriteExecutionOptions {
67
+ /**
68
+ * Maximum time to run the operation before cancelling it.
69
+ *
70
+ * By default, there is a 30-second timeout that is only enforced after the
71
+ * first attempt. This is to prevent spending too long retrying to resolve
72
+ * conflicts. For example, if a write attempt takes 20 seconds and fails,
73
+ * the second attempt will be cancelled after 10 seconds, hitting the
74
+ * 30-second timeout. However, a write that takes one hour and succeeds on the
75
+ * first attempt will not be cancelled.
76
+ *
77
+ * When this is set, the timeout is enforced on all attempts, including the first.
78
+ */
79
+ timeoutMs?: number;
80
+ }
package/dist/merge.js ADDED
@@ -0,0 +1,92 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MergeInsertBuilder = void 0;
4
+ // SPDX-License-Identifier: Apache-2.0
5
+ // SPDX-FileCopyrightText: Copyright The LanceDB Authors
6
+ const arrow_1 = require("./arrow");
7
+ /** A builder used to create and run a merge insert operation */
8
+ class MergeInsertBuilder {
9
+ #native;
10
+ #schema;
11
+ /** Construct a MergeInsertBuilder. __Internal use only.__ */
12
+ constructor(native, schema) {
13
+ this.#native = native;
14
+ this.#schema = schema;
15
+ }
16
+ /**
17
+ * Rows that exist in both the source table (new data) and
18
+ * the target table (old data) will be updated, replacing
19
+ * the old row with the corresponding matching row.
20
+ *
21
+ * If there are multiple matches then the behavior is undefined.
22
+ * Currently this causes multiple copies of the row to be created
23
+ * but that behavior is subject to change.
24
+ *
25
+ * An optional condition may be specified. If it is, then only
26
+ * matched rows that satisfy the condtion will be updated. Any
27
+ * rows that do not satisfy the condition will be left as they
28
+ * are. Failing to satisfy the condition does not cause a
29
+ * "matched row" to become a "not matched" row.
30
+ *
31
+ * The condition should be an SQL string. Use the prefix
32
+ * target. to refer to rows in the target table (old data)
33
+ * and the prefix source. to refer to rows in the source
34
+ * table (new data).
35
+ *
36
+ * For example, "target.last_update < source.last_update"
37
+ */
38
+ whenMatchedUpdateAll(options) {
39
+ return new MergeInsertBuilder(this.#native.whenMatchedUpdateAll(options?.where), this.#schema);
40
+ }
41
+ /**
42
+ * Rows that exist only in the source table (new data) should
43
+ * be inserted into the target table.
44
+ */
45
+ whenNotMatchedInsertAll() {
46
+ return new MergeInsertBuilder(this.#native.whenNotMatchedInsertAll(), this.#schema);
47
+ }
48
+ /**
49
+ * Rows that exist only in the target table (old data) will be
50
+ * deleted. An optional condition can be provided to limit what
51
+ * data is deleted.
52
+ *
53
+ * @param options.where - An optional condition to limit what data is deleted
54
+ */
55
+ whenNotMatchedBySourceDelete(options) {
56
+ return new MergeInsertBuilder(this.#native.whenNotMatchedBySourceDelete(options?.where), this.#schema);
57
+ }
58
+ /**
59
+ * Controls whether to use indexes for the merge operation.
60
+ *
61
+ * When set to `true` (the default), the operation will use an index if available
62
+ * on the join key for improved performance. When set to `false`, it forces a full
63
+ * table scan even if an index exists. This can be useful for benchmarking or when
64
+ * the query optimizer chooses a suboptimal path.
65
+ *
66
+ * @param useIndex - Whether to use indices for the merge operation. Defaults to `true`.
67
+ */
68
+ useIndex(useIndex) {
69
+ return new MergeInsertBuilder(this.#native.useIndex(useIndex), this.#schema);
70
+ }
71
+ /**
72
+ * Executes the merge insert operation
73
+ *
74
+ * @returns {Promise<MergeResult>} the merge result
75
+ */
76
+ async execute(data, execOptions) {
77
+ let schema;
78
+ if (this.#schema instanceof Promise) {
79
+ schema = await this.#schema;
80
+ this.#schema = schema; // In case of future calls
81
+ }
82
+ else {
83
+ schema = this.#schema;
84
+ }
85
+ if (execOptions?.timeoutMs !== undefined) {
86
+ this.#native.setTimeout(execOptions.timeoutMs);
87
+ }
88
+ const buffer = await (0, arrow_1.fromDataToBuffer)(data, undefined, schema);
89
+ return await this.#native.execute(buffer);
90
+ }
91
+ }
92
+ exports.MergeInsertBuilder = MergeInsertBuilder;