@lancedb/lancedb 0.14.0-beta.2 → 0.14.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,55 @@
1
+ "use strict";
2
+ // Copyright 2024 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = exports.WriteMode = void 0;
17
+ exports.connect = connect;
18
+ const connection_1 = require("./connection");
19
+ const native_js_1 = require("./native.js");
20
+ var native_js_2 = require("./native.js");
21
+ Object.defineProperty(exports, "WriteMode", { enumerable: true, get: function () { return native_js_2.WriteMode; } });
22
+ var arrow_1 = require("./arrow");
23
+ Object.defineProperty(exports, "makeArrowTable", { enumerable: true, get: function () { return arrow_1.makeArrowTable; } });
24
+ Object.defineProperty(exports, "MakeArrowTableOptions", { enumerable: true, get: function () { return arrow_1.MakeArrowTableOptions; } });
25
+ Object.defineProperty(exports, "VectorColumnOptions", { enumerable: true, get: function () { return arrow_1.VectorColumnOptions; } });
26
+ var connection_2 = require("./connection");
27
+ Object.defineProperty(exports, "Connection", { enumerable: true, get: function () { return connection_2.Connection; } });
28
+ var query_1 = require("./query");
29
+ Object.defineProperty(exports, "Query", { enumerable: true, get: function () { return query_1.Query; } });
30
+ Object.defineProperty(exports, "QueryBase", { enumerable: true, get: function () { return query_1.QueryBase; } });
31
+ Object.defineProperty(exports, "VectorQuery", { enumerable: true, get: function () { return query_1.VectorQuery; } });
32
+ Object.defineProperty(exports, "RecordBatchIterator", { enumerable: true, get: function () { return query_1.RecordBatchIterator; } });
33
+ var indices_1 = require("./indices");
34
+ Object.defineProperty(exports, "Index", { enumerable: true, get: function () { return indices_1.Index; } });
35
+ var table_1 = require("./table");
36
+ Object.defineProperty(exports, "Table", { enumerable: true, get: function () { return table_1.Table; } });
37
+ exports.embedding = require("./embedding");
38
+ async function connect(uriOrOptions, opts = {}) {
39
+ let uri;
40
+ if (typeof uriOrOptions !== "string") {
41
+ const { uri: uri_, ...options } = uriOrOptions;
42
+ uri = uri_;
43
+ opts = options;
44
+ }
45
+ else {
46
+ uri = uriOrOptions;
47
+ }
48
+ if (!uri) {
49
+ throw new Error("uri is required");
50
+ }
51
+ opts = opts ?? {};
52
+ opts.storageOptions = (0, connection_1.cleanseStorageOptions)(opts.storageOptions);
53
+ const nativeConn = await native_js_1.Connection.new(uri, opts);
54
+ return new connection_1.LocalConnection(nativeConn);
55
+ }
@@ -0,0 +1,429 @@
1
+ /**
2
+ * Options to create an `IVF_PQ` index
3
+ */
4
+ export interface IvfPqOptions {
5
+ /**
6
+ * The number of IVF partitions to create.
7
+ *
8
+ * This value should generally scale with the number of rows in the dataset.
9
+ * By default the number of partitions is the square root of the number of
10
+ * rows.
11
+ *
12
+ * If this value is too large then the first part of the search (picking the
13
+ * right partition) will be slow. If this value is too small then the second
14
+ * part of the search (searching within a partition) will be slow.
15
+ */
16
+ numPartitions?: number;
17
+ /**
18
+ * Number of sub-vectors of PQ.
19
+ *
20
+ * This value controls how much the vector is compressed during the quantization step.
21
+ * The more sub vectors there are the less the vector is compressed. The default is
22
+ * the dimension of the vector divided by 16. If the dimension is not evenly divisible
23
+ * by 16 we use the dimension divded by 8.
24
+ *
25
+ * The above two cases are highly preferred. Having 8 or 16 values per subvector allows
26
+ * us to use efficient SIMD instructions.
27
+ *
28
+ * If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and
29
+ * will likely result in poor performance.
30
+ */
31
+ numSubVectors?: number;
32
+ /**
33
+ * Distance type to use to build the index.
34
+ *
35
+ * Default value is "l2".
36
+ *
37
+ * This is used when training the index to calculate the IVF partitions
38
+ * (vectors are grouped in partitions with similar vectors according to this
39
+ * distance type) and to calculate a subvector's code during quantization.
40
+ *
41
+ * The distance type used to train an index MUST match the distance type used
42
+ * to search the index. Failure to do so will yield inaccurate results.
43
+ *
44
+ * The following distance types are available:
45
+ *
46
+ * "l2" - Euclidean distance. This is a very common distance metric that
47
+ * accounts for both magnitude and direction when determining the distance
48
+ * between vectors. L2 distance has a range of [0, ∞).
49
+ *
50
+ * "cosine" - Cosine distance. Cosine distance is a distance metric
51
+ * calculated from the cosine similarity between two vectors. Cosine
52
+ * similarity is a measure of similarity between two non-zero vectors of an
53
+ * inner product space. It is defined to equal the cosine of the angle
54
+ * between them. Unlike L2, the cosine distance is not affected by the
55
+ * magnitude of the vectors. Cosine distance has a range of [0, 2].
56
+ *
57
+ * Note: the cosine distance is undefined when one (or both) of the vectors
58
+ * are all zeros (there is no direction). These vectors are invalid and may
59
+ * never be returned from a vector search.
60
+ *
61
+ * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
62
+ * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
63
+ * L2 norm is 1), then dot distance is equivalent to the cosine distance.
64
+ */
65
+ distanceType?: "l2" | "cosine" | "dot";
66
+ /**
67
+ * Max iteration to train IVF kmeans.
68
+ *
69
+ * When training an IVF PQ index we use kmeans to calculate the partitions. This parameter
70
+ * controls how many iterations of kmeans to run.
71
+ *
72
+ * Increasing this might improve the quality of the index but in most cases these extra
73
+ * iterations have diminishing returns.
74
+ *
75
+ * The default value is 50.
76
+ */
77
+ maxIterations?: number;
78
+ /**
79
+ * The number of vectors, per partition, to sample when training IVF kmeans.
80
+ *
81
+ * When an IVF PQ index is trained, we need to calculate partitions. These are groups
82
+ * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
83
+ *
84
+ * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
85
+ * random sample of the data. This parameter controls the size of the sample. The total
86
+ * number of vectors used to train the index is `sample_rate * num_partitions`.
87
+ *
88
+ * Increasing this value might improve the quality of the index but in most cases the
89
+ * default should be sufficient.
90
+ *
91
+ * The default value is 256.
92
+ */
93
+ sampleRate?: number;
94
+ }
95
+ /**
96
+ * Options to create an `HNSW_PQ` index
97
+ */
98
+ export interface HnswPqOptions {
99
+ /**
100
+ * The distance metric used to train the index.
101
+ *
102
+ * Default value is "l2".
103
+ *
104
+ * The following distance types are available:
105
+ *
106
+ * "l2" - Euclidean distance. This is a very common distance metric that
107
+ * accounts for both magnitude and direction when determining the distance
108
+ * between vectors. L2 distance has a range of [0, ∞).
109
+ *
110
+ * "cosine" - Cosine distance. Cosine distance is a distance metric
111
+ * calculated from the cosine similarity between two vectors. Cosine
112
+ * similarity is a measure of similarity between two non-zero vectors of an
113
+ * inner product space. It is defined to equal the cosine of the angle
114
+ * between them. Unlike L2, the cosine distance is not affected by the
115
+ * magnitude of the vectors. Cosine distance has a range of [0, 2].
116
+ *
117
+ * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
118
+ * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
119
+ * L2 norm is 1), then dot distance is equivalent to the cosine distance.
120
+ */
121
+ distanceType?: "l2" | "cosine" | "dot";
122
+ /**
123
+ * The number of IVF partitions to create.
124
+ *
125
+ * For HNSW, we recommend a small number of partitions. Setting this to 1 works
126
+ * well for most tables. For very large tables, training just one HNSW graph
127
+ * will require too much memory. Each partition becomes its own HNSW graph, so
128
+ * setting this value higher reduces the peak memory use of training.
129
+ *
130
+ */
131
+ numPartitions?: number;
132
+ /**
133
+ * Number of sub-vectors of PQ.
134
+ *
135
+ * This value controls how much the vector is compressed during the quantization step.
136
+ * The more sub vectors there are the less the vector is compressed. The default is
137
+ * the dimension of the vector divided by 16. If the dimension is not evenly divisible
138
+ * by 16 we use the dimension divded by 8.
139
+ *
140
+ * The above two cases are highly preferred. Having 8 or 16 values per subvector allows
141
+ * us to use efficient SIMD instructions.
142
+ *
143
+ * If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and
144
+ * will likely result in poor performance.
145
+ *
146
+ */
147
+ numSubVectors?: number;
148
+ /**
149
+ * Max iterations to train kmeans.
150
+ *
151
+ * The default value is 50.
152
+ *
153
+ * When training an IVF index we use kmeans to calculate the partitions. This parameter
154
+ * controls how many iterations of kmeans to run.
155
+ *
156
+ * Increasing this might improve the quality of the index but in most cases the parameter
157
+ * is unused because kmeans will converge with fewer iterations. The parameter is only
158
+ * used in cases where kmeans does not appear to converge. In those cases it is unlikely
159
+ * that setting this larger will lead to the index converging anyways.
160
+ *
161
+ */
162
+ maxIterations?: number;
163
+ /**
164
+ * The rate used to calculate the number of training vectors for kmeans.
165
+ *
166
+ * Default value is 256.
167
+ *
168
+ * When an IVF index is trained, we need to calculate partitions. These are groups
169
+ * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
170
+ *
171
+ * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
172
+ * random sample of the data. This parameter controls the size of the sample. The total
173
+ * number of vectors used to train the index is `sample_rate * num_partitions`.
174
+ *
175
+ * Increasing this value might improve the quality of the index but in most cases the
176
+ * default should be sufficient.
177
+ *
178
+ */
179
+ sampleRate?: number;
180
+ /**
181
+ * The number of neighbors to select for each vector in the HNSW graph.
182
+ *
183
+ * The default value is 20.
184
+ *
185
+ * This value controls the tradeoff between search speed and accuracy.
186
+ * The higher the value the more accurate the search but the slower it will be.
187
+ *
188
+ */
189
+ m?: number;
190
+ /**
191
+ * The number of candidates to evaluate during the construction of the HNSW graph.
192
+ *
193
+ * The default value is 300.
194
+ *
195
+ * This value controls the tradeoff between build speed and accuracy.
196
+ * The higher the value the more accurate the build but the slower it will be.
197
+ * 150 to 300 is the typical range. 100 is a minimum for good quality search
198
+ * results. In most cases, there is no benefit to setting this higher than 500.
199
+ * This value should be set to a value that is not less than `ef` in the search phase.
200
+ *
201
+ */
202
+ efConstruction?: number;
203
+ }
204
+ /**
205
+ * Options to create an `HNSW_SQ` index
206
+ */
207
+ export interface HnswSqOptions {
208
+ /**
209
+ * The distance metric used to train the index.
210
+ *
211
+ * Default value is "l2".
212
+ *
213
+ * The following distance types are available:
214
+ *
215
+ * "l2" - Euclidean distance. This is a very common distance metric that
216
+ * accounts for both magnitude and direction when determining the distance
217
+ * between vectors. L2 distance has a range of [0, ∞).
218
+ *
219
+ * "cosine" - Cosine distance. Cosine distance is a distance metric
220
+ * calculated from the cosine similarity between two vectors. Cosine
221
+ * similarity is a measure of similarity between two non-zero vectors of an
222
+ * inner product space. It is defined to equal the cosine of the angle
223
+ * between them. Unlike L2, the cosine distance is not affected by the
224
+ * magnitude of the vectors. Cosine distance has a range of [0, 2].
225
+ *
226
+ * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
227
+ * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
228
+ * L2 norm is 1), then dot distance is equivalent to the cosine distance.
229
+ */
230
+ distanceType?: "l2" | "cosine" | "dot";
231
+ /**
232
+ * The number of IVF partitions to create.
233
+ *
234
+ * For HNSW, we recommend a small number of partitions. Setting this to 1 works
235
+ * well for most tables. For very large tables, training just one HNSW graph
236
+ * will require too much memory. Each partition becomes its own HNSW graph, so
237
+ * setting this value higher reduces the peak memory use of training.
238
+ *
239
+ */
240
+ numPartitions?: number;
241
+ /**
242
+ * Max iterations to train kmeans.
243
+ *
244
+ * The default value is 50.
245
+ *
246
+ * When training an IVF index we use kmeans to calculate the partitions. This parameter
247
+ * controls how many iterations of kmeans to run.
248
+ *
249
+ * Increasing this might improve the quality of the index but in most cases the parameter
250
+ * is unused because kmeans will converge with fewer iterations. The parameter is only
251
+ * used in cases where kmeans does not appear to converge. In those cases it is unlikely
252
+ * that setting this larger will lead to the index converging anyways.
253
+ *
254
+ */
255
+ maxIterations?: number;
256
+ /**
257
+ * The rate used to calculate the number of training vectors for kmeans.
258
+ *
259
+ * Default value is 256.
260
+ *
261
+ * When an IVF index is trained, we need to calculate partitions. These are groups
262
+ * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
263
+ *
264
+ * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
265
+ * random sample of the data. This parameter controls the size of the sample. The total
266
+ * number of vectors used to train the index is `sample_rate * num_partitions`.
267
+ *
268
+ * Increasing this value might improve the quality of the index but in most cases the
269
+ * default should be sufficient.
270
+ *
271
+ */
272
+ sampleRate?: number;
273
+ /**
274
+ * The number of neighbors to select for each vector in the HNSW graph.
275
+ *
276
+ * The default value is 20.
277
+ *
278
+ * This value controls the tradeoff between search speed and accuracy.
279
+ * The higher the value the more accurate the search but the slower it will be.
280
+ *
281
+ */
282
+ m?: number;
283
+ /**
284
+ * The number of candidates to evaluate during the construction of the HNSW graph.
285
+ *
286
+ * The default value is 300.
287
+ *
288
+ * This value controls the tradeoff between build speed and accuracy.
289
+ * The higher the value the more accurate the build but the slower it will be.
290
+ * 150 to 300 is the typical range. 100 is a minimum for good quality search
291
+ * results. In most cases, there is no benefit to setting this higher than 500.
292
+ * This value should be set to a value that is not less than `ef` in the search phase.
293
+ *
294
+ */
295
+ efConstruction?: number;
296
+ }
297
+ /**
298
+ * Options to create a full text search index
299
+ */
300
+ export interface FtsOptions {
301
+ /**
302
+ * Whether to build the index with positions.
303
+ * True by default.
304
+ * If set to false, the index will not store the positions of the tokens in the text,
305
+ * which will make the index smaller and faster to build, but will not support phrase queries.
306
+ */
307
+ withPosition?: boolean;
308
+ }
309
+ export declare class Index {
310
+ private readonly inner;
311
+ private constructor();
312
+ /**
313
+ * Create an IvfPq index
314
+ *
315
+ * This index stores a compressed (quantized) copy of every vector. These vectors
316
+ * are grouped into partitions of similar vectors. Each partition keeps track of
317
+ * a centroid which is the average value of all vectors in the group.
318
+ *
319
+ * During a query the centroids are compared with the query vector to find the closest
320
+ * partitions. The compressed vectors in these partitions are then searched to find
321
+ * the closest vectors.
322
+ *
323
+ * The compression scheme is called product quantization. Each vector is divided into
324
+ * subvectors and then each subvector is quantized into a small number of bits. the
325
+ * parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
326
+ * between index size (and thus search speed) and index accuracy.
327
+ *
328
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
329
+ * many groups to create.
330
+ *
331
+ * Note that training an IVF PQ index on a large dataset is a slow operation and
332
+ * currently is also a memory intensive operation.
333
+ */
334
+ static ivfPq(options?: Partial<IvfPqOptions>): Index;
335
+ /**
336
+ * Create a btree index
337
+ *
338
+ * A btree index is an index on a scalar columns. The index stores a copy of the column
339
+ * in sorted order. A header entry is created for each block of rows (currently the
340
+ * block size is fixed at 4096). These header entries are stored in a separate
341
+ * cacheable structure (a btree). To search for data the header is used to determine
342
+ * which blocks need to be read from disk.
343
+ *
344
+ * For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
345
+ * bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
346
+ * the correct row ids.
347
+ *
348
+ * This index is good for scalar columns with mostly distinct values and does best when
349
+ * the query is highly selective.
350
+ *
351
+ * The btree index does not currently have any parameters though parameters such as the
352
+ * block size may be added in the future.
353
+ */
354
+ static btree(): Index;
355
+ /**
356
+ * Create a bitmap index.
357
+ *
358
+ * A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
359
+ *
360
+ * This index works best for low-cardinality columns, where the number of unique values
361
+ * is small (i.e., less than a few hundreds).
362
+ */
363
+ static bitmap(): Index;
364
+ /**
365
+ * Create a label list index.
366
+ *
367
+ * LabelList index is a scalar index that can be used on `List<T>` columns to
368
+ * support queries with `array_contains_all` and `array_contains_any`
369
+ * using an underlying bitmap index.
370
+ */
371
+ static labelList(): Index;
372
+ /**
373
+ * Create a full text search index
374
+ *
375
+ * A full text search index is an index on a string column, so that you can conduct full
376
+ * text searches on the column.
377
+ *
378
+ * The results of a full text search are ordered by relevance measured by BM25.
379
+ *
380
+ * You can combine filters with full text search.
381
+ *
382
+ * For now, the full text search index only supports English, and doesn't support phrase search.
383
+ */
384
+ static fts(options?: Partial<FtsOptions>): Index;
385
+ /**
386
+ *
387
+ * Create a hnswPq index
388
+ *
389
+ * HNSW-PQ stands for Hierarchical Navigable Small World - Product Quantization.
390
+ * It is a variant of the HNSW algorithm that uses product quantization to compress
391
+ * the vectors.
392
+ *
393
+ */
394
+ static hnswPq(options?: Partial<HnswPqOptions>): Index;
395
+ /**
396
+ *
397
+ * Create a hnswSq index
398
+ *
399
+ * HNSW-SQ stands for Hierarchical Navigable Small World - Scalar Quantization.
400
+ * It is a variant of the HNSW algorithm that uses scalar quantization to compress
401
+ * the vectors.
402
+ *
403
+ */
404
+ static hnswSq(options?: Partial<HnswSqOptions>): Index;
405
+ }
406
+ export interface IndexOptions {
407
+ /**
408
+ * Advanced index configuration
409
+ *
410
+ * This option allows you to specify a specfic index to create and also
411
+ * allows you to pass in configuration for training the index.
412
+ *
413
+ * See the static methods on Index for details on the various index types.
414
+ *
415
+ * If this is not supplied then column data type(s) and column statistics
416
+ * will be used to determine the most useful kind of index to create.
417
+ */
418
+ config?: Index;
419
+ /**
420
+ * Whether to replace the existing index
421
+ *
422
+ * If this is false, and another index already exists on the same columns
423
+ * and the same name, then an error will be returned. This is true even if
424
+ * that index is out of date.
425
+ *
426
+ * The default is true
427
+ */
428
+ replace?: boolean;
429
+ }
@@ -0,0 +1,131 @@
1
+ "use strict";
2
+ // Copyright 2024 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.Index = void 0;
17
+ const native_1 = require("./native");
18
+ class Index {
19
+ inner;
20
+ constructor(inner) {
21
+ this.inner = inner;
22
+ }
23
+ /**
24
+ * Create an IvfPq index
25
+ *
26
+ * This index stores a compressed (quantized) copy of every vector. These vectors
27
+ * are grouped into partitions of similar vectors. Each partition keeps track of
28
+ * a centroid which is the average value of all vectors in the group.
29
+ *
30
+ * During a query the centroids are compared with the query vector to find the closest
31
+ * partitions. The compressed vectors in these partitions are then searched to find
32
+ * the closest vectors.
33
+ *
34
+ * The compression scheme is called product quantization. Each vector is divided into
35
+ * subvectors and then each subvector is quantized into a small number of bits. the
36
+ * parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
37
+ * between index size (and thus search speed) and index accuracy.
38
+ *
39
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
40
+ * many groups to create.
41
+ *
42
+ * Note that training an IVF PQ index on a large dataset is a slow operation and
43
+ * currently is also a memory intensive operation.
44
+ */
45
+ static ivfPq(options) {
46
+ return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate));
47
+ }
48
+ /**
49
+ * Create a btree index
50
+ *
51
+ * A btree index is an index on a scalar columns. The index stores a copy of the column
52
+ * in sorted order. A header entry is created for each block of rows (currently the
53
+ * block size is fixed at 4096). These header entries are stored in a separate
54
+ * cacheable structure (a btree). To search for data the header is used to determine
55
+ * which blocks need to be read from disk.
56
+ *
57
+ * For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
58
+ * bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
59
+ * the correct row ids.
60
+ *
61
+ * This index is good for scalar columns with mostly distinct values and does best when
62
+ * the query is highly selective.
63
+ *
64
+ * The btree index does not currently have any parameters though parameters such as the
65
+ * block size may be added in the future.
66
+ */
67
+ static btree() {
68
+ return new Index(native_1.Index.btree());
69
+ }
70
+ /**
71
+ * Create a bitmap index.
72
+ *
73
+ * A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
74
+ *
75
+ * This index works best for low-cardinality columns, where the number of unique values
76
+ * is small (i.e., less than a few hundreds).
77
+ */
78
+ static bitmap() {
79
+ return new Index(native_1.Index.bitmap());
80
+ }
81
+ /**
82
+ * Create a label list index.
83
+ *
84
+ * LabelList index is a scalar index that can be used on `List<T>` columns to
85
+ * support queries with `array_contains_all` and `array_contains_any`
86
+ * using an underlying bitmap index.
87
+ */
88
+ static labelList() {
89
+ return new Index(native_1.Index.labelList());
90
+ }
91
+ /**
92
+ * Create a full text search index
93
+ *
94
+ * A full text search index is an index on a string column, so that you can conduct full
95
+ * text searches on the column.
96
+ *
97
+ * The results of a full text search are ordered by relevance measured by BM25.
98
+ *
99
+ * You can combine filters with full text search.
100
+ *
101
+ * For now, the full text search index only supports English, and doesn't support phrase search.
102
+ */
103
+ static fts(options) {
104
+ return new Index(native_1.Index.fts(options?.withPosition));
105
+ }
106
+ /**
107
+ *
108
+ * Create a hnswPq index
109
+ *
110
+ * HNSW-PQ stands for Hierarchical Navigable Small World - Product Quantization.
111
+ * It is a variant of the HNSW algorithm that uses product quantization to compress
112
+ * the vectors.
113
+ *
114
+ */
115
+ static hnswPq(options) {
116
+ return new Index(native_1.Index.hnswPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate, options?.m, options?.efConstruction));
117
+ }
118
+ /**
119
+ *
120
+ * Create a hnswSq index
121
+ *
122
+ * HNSW-SQ stands for Hierarchical Navigable Small World - Scalar Quantization.
123
+ * It is a variant of the HNSW algorithm that uses scalar quantization to compress
124
+ * the vectors.
125
+ *
126
+ */
127
+ static hnswSq(options) {
128
+ return new Index(native_1.Index.hnswSq(options?.distanceType, options?.numPartitions, options?.maxIterations, options?.sampleRate, options?.m, options?.efConstruction));
129
+ }
130
+ }
131
+ exports.Index = Index;
@@ -0,0 +1,54 @@
1
+ import { Data } from "./arrow";
2
+ import { NativeMergeInsertBuilder } from "./native";
3
+ /** A builder used to create and run a merge insert operation */
4
+ export declare class MergeInsertBuilder {
5
+ #private;
6
+ /** Construct a MergeInsertBuilder. __Internal use only.__ */
7
+ constructor(native: NativeMergeInsertBuilder);
8
+ /**
9
+ * Rows that exist in both the source table (new data) and
10
+ * the target table (old data) will be updated, replacing
11
+ * the old row with the corresponding matching row.
12
+ *
13
+ * If there are multiple matches then the behavior is undefined.
14
+ * Currently this causes multiple copies of the row to be created
15
+ * but that behavior is subject to change.
16
+ *
17
+ * An optional condition may be specified. If it is, then only
18
+ * matched rows that satisfy the condtion will be updated. Any
19
+ * rows that do not satisfy the condition will be left as they
20
+ * are. Failing to satisfy the condition does not cause a
21
+ * "matched row" to become a "not matched" row.
22
+ *
23
+ * The condition should be an SQL string. Use the prefix
24
+ * target. to refer to rows in the target table (old data)
25
+ * and the prefix source. to refer to rows in the source
26
+ * table (new data).
27
+ *
28
+ * For example, "target.last_update < source.last_update"
29
+ */
30
+ whenMatchedUpdateAll(options?: {
31
+ where: string;
32
+ }): MergeInsertBuilder;
33
+ /**
34
+ * Rows that exist only in the source table (new data) should
35
+ * be inserted into the target table.
36
+ */
37
+ whenNotMatchedInsertAll(): MergeInsertBuilder;
38
+ /**
39
+ * Rows that exist only in the target table (old data) will be
40
+ * deleted. An optional condition can be provided to limit what
41
+ * data is deleted.
42
+ *
43
+ * @param options.where - An optional condition to limit what data is deleted
44
+ */
45
+ whenNotMatchedBySourceDelete(options?: {
46
+ where: string;
47
+ }): MergeInsertBuilder;
48
+ /**
49
+ * Executes the merge insert operation
50
+ *
51
+ * Nothing is returned but the `Table` is updated
52
+ */
53
+ execute(data: Data): Promise<void>;
54
+ }