@lancedb/lancedb 0.10.0 → 0.11.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/connection.d.ts +4 -4
- package/dist/connection.js +2 -2
- package/dist/index.js +3 -1
- package/dist/indices.d.ts +196 -3
- package/dist/indices.js +11 -3
- package/dist/native.d.ts +2 -2
- package/package.json +8 -8
- package/native.d.ts +0 -208
package/dist/connection.d.ts
CHANGED
|
@@ -30,8 +30,8 @@ export interface CreateTableOptions {
|
|
|
30
30
|
/**
|
|
31
31
|
* The version of the data storage format to use.
|
|
32
32
|
*
|
|
33
|
-
* The default is `
|
|
34
|
-
*
|
|
33
|
+
* The default is `stable`.
|
|
34
|
+
* Set to "legacy" to use the old format.
|
|
35
35
|
*/
|
|
36
36
|
dataStorageVersion?: string;
|
|
37
37
|
/**
|
|
@@ -45,9 +45,9 @@ export interface CreateTableOptions {
|
|
|
45
45
|
/**
|
|
46
46
|
* If true then data files will be written with the legacy format
|
|
47
47
|
*
|
|
48
|
-
* The default is
|
|
48
|
+
* The default is false.
|
|
49
49
|
*
|
|
50
|
-
* Deprecated.
|
|
50
|
+
* Deprecated. Use data storage version instead.
|
|
51
51
|
*/
|
|
52
52
|
useLegacyFormat?: boolean;
|
|
53
53
|
schema?: SchemaLike;
|
package/dist/connection.js
CHANGED
|
@@ -73,7 +73,7 @@ class LocalConnection extends Connection {
|
|
|
73
73
|
throw new Error("data is required");
|
|
74
74
|
}
|
|
75
75
|
const { buf, mode } = await table_1.Table.parseTableData(data, options);
|
|
76
|
-
let dataStorageVersion = "
|
|
76
|
+
let dataStorageVersion = "stable";
|
|
77
77
|
if (options?.dataStorageVersion !== undefined) {
|
|
78
78
|
dataStorageVersion = options.dataStorageVersion;
|
|
79
79
|
}
|
|
@@ -95,7 +95,7 @@ class LocalConnection extends Connection {
|
|
|
95
95
|
const registry = (0, registry_1.getRegistry)();
|
|
96
96
|
metadata = registry.getTableMetadata([embeddingFunction]);
|
|
97
97
|
}
|
|
98
|
-
let dataStorageVersion = "
|
|
98
|
+
let dataStorageVersion = "stable";
|
|
99
99
|
if (options?.dataStorageVersion !== undefined) {
|
|
100
100
|
dataStorageVersion = options.dataStorageVersion;
|
|
101
101
|
}
|
package/dist/index.js
CHANGED
|
@@ -13,11 +13,13 @@
|
|
|
13
13
|
// See the License for the specific language governing permissions and
|
|
14
14
|
// limitations under the License.
|
|
15
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
|
|
16
|
+
exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = exports.WriteMode = void 0;
|
|
17
17
|
exports.connect = connect;
|
|
18
18
|
const connection_1 = require("./connection");
|
|
19
19
|
const native_js_1 = require("./native.js");
|
|
20
20
|
const remote_1 = require("./remote");
|
|
21
|
+
var native_js_2 = require("./native.js");
|
|
22
|
+
Object.defineProperty(exports, "WriteMode", { enumerable: true, get: function () { return native_js_2.WriteMode; } });
|
|
21
23
|
var arrow_1 = require("./arrow");
|
|
22
24
|
Object.defineProperty(exports, "makeArrowTable", { enumerable: true, get: function () { return arrow_1.makeArrowTable; } });
|
|
23
25
|
Object.defineProperty(exports, "MakeArrowTableOptions", { enumerable: true, get: function () { return arrow_1.MakeArrowTableOptions; } });
|
package/dist/indices.d.ts
CHANGED
|
@@ -92,21 +92,206 @@ export interface IvfPqOptions {
|
|
|
92
92
|
*/
|
|
93
93
|
sampleRate?: number;
|
|
94
94
|
}
|
|
95
|
+
/**
|
|
96
|
+
* Options to create an `HNSW_PQ` index
|
|
97
|
+
*/
|
|
95
98
|
export interface HnswPqOptions {
|
|
99
|
+
/**
|
|
100
|
+
* The distance metric used to train the index.
|
|
101
|
+
*
|
|
102
|
+
* Default value is "l2".
|
|
103
|
+
*
|
|
104
|
+
* The following distance types are available:
|
|
105
|
+
*
|
|
106
|
+
* "l2" - Euclidean distance. This is a very common distance metric that
|
|
107
|
+
* accounts for both magnitude and direction when determining the distance
|
|
108
|
+
* between vectors. L2 distance has a range of [0, ∞).
|
|
109
|
+
*
|
|
110
|
+
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
|
111
|
+
* calculated from the cosine similarity between two vectors. Cosine
|
|
112
|
+
* similarity is a measure of similarity between two non-zero vectors of an
|
|
113
|
+
* inner product space. It is defined to equal the cosine of the angle
|
|
114
|
+
* between them. Unlike L2, the cosine distance is not affected by the
|
|
115
|
+
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
|
116
|
+
*
|
|
117
|
+
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
|
118
|
+
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
|
119
|
+
* L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
|
120
|
+
*/
|
|
96
121
|
distanceType?: "l2" | "cosine" | "dot";
|
|
122
|
+
/**
|
|
123
|
+
* The number of IVF partitions to create.
|
|
124
|
+
*
|
|
125
|
+
* For HNSW, we recommend a small number of partitions. Setting this to 1 works
|
|
126
|
+
* well for most tables. For very large tables, training just one HNSW graph
|
|
127
|
+
* will require too much memory. Each partition becomes its own HNSW graph, so
|
|
128
|
+
* setting this value higher reduces the peak memory use of training.
|
|
129
|
+
*
|
|
130
|
+
*/
|
|
97
131
|
numPartitions?: number;
|
|
132
|
+
/**
|
|
133
|
+
* Number of sub-vectors of PQ.
|
|
134
|
+
*
|
|
135
|
+
* This value controls how much the vector is compressed during the quantization step.
|
|
136
|
+
* The more sub vectors there are the less the vector is compressed. The default is
|
|
137
|
+
* the dimension of the vector divided by 16. If the dimension is not evenly divisible
|
|
138
|
+
* by 16 we use the dimension divded by 8.
|
|
139
|
+
*
|
|
140
|
+
* The above two cases are highly preferred. Having 8 or 16 values per subvector allows
|
|
141
|
+
* us to use efficient SIMD instructions.
|
|
142
|
+
*
|
|
143
|
+
* If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and
|
|
144
|
+
* will likely result in poor performance.
|
|
145
|
+
*
|
|
146
|
+
*/
|
|
98
147
|
numSubVectors?: number;
|
|
148
|
+
/**
|
|
149
|
+
* Max iterations to train kmeans.
|
|
150
|
+
*
|
|
151
|
+
* The default value is 50.
|
|
152
|
+
*
|
|
153
|
+
* When training an IVF index we use kmeans to calculate the partitions. This parameter
|
|
154
|
+
* controls how many iterations of kmeans to run.
|
|
155
|
+
*
|
|
156
|
+
* Increasing this might improve the quality of the index but in most cases the parameter
|
|
157
|
+
* is unused because kmeans will converge with fewer iterations. The parameter is only
|
|
158
|
+
* used in cases where kmeans does not appear to converge. In those cases it is unlikely
|
|
159
|
+
* that setting this larger will lead to the index converging anyways.
|
|
160
|
+
*
|
|
161
|
+
*/
|
|
99
162
|
maxIterations?: number;
|
|
163
|
+
/**
|
|
164
|
+
* The rate used to calculate the number of training vectors for kmeans.
|
|
165
|
+
*
|
|
166
|
+
* Default value is 256.
|
|
167
|
+
*
|
|
168
|
+
* When an IVF index is trained, we need to calculate partitions. These are groups
|
|
169
|
+
* of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
|
170
|
+
*
|
|
171
|
+
* Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
|
172
|
+
* random sample of the data. This parameter controls the size of the sample. The total
|
|
173
|
+
* number of vectors used to train the index is `sample_rate * num_partitions`.
|
|
174
|
+
*
|
|
175
|
+
* Increasing this value might improve the quality of the index but in most cases the
|
|
176
|
+
* default should be sufficient.
|
|
177
|
+
*
|
|
178
|
+
*/
|
|
100
179
|
sampleRate?: number;
|
|
180
|
+
/**
|
|
181
|
+
* The number of neighbors to select for each vector in the HNSW graph.
|
|
182
|
+
*
|
|
183
|
+
* The default value is 20.
|
|
184
|
+
*
|
|
185
|
+
* This value controls the tradeoff between search speed and accuracy.
|
|
186
|
+
* The higher the value the more accurate the search but the slower it will be.
|
|
187
|
+
*
|
|
188
|
+
*/
|
|
101
189
|
m?: number;
|
|
190
|
+
/**
|
|
191
|
+
* The number of candidates to evaluate during the construction of the HNSW graph.
|
|
192
|
+
*
|
|
193
|
+
* The default value is 300.
|
|
194
|
+
*
|
|
195
|
+
* This value controls the tradeoff between build speed and accuracy.
|
|
196
|
+
* The higher the value the more accurate the build but the slower it will be.
|
|
197
|
+
* 150 to 300 is the typical range. 100 is a minimum for good quality search
|
|
198
|
+
* results. In most cases, there is no benefit to setting this higher than 500.
|
|
199
|
+
* This value should be set to a value that is not less than `ef` in the search phase.
|
|
200
|
+
*
|
|
201
|
+
*/
|
|
102
202
|
efConstruction?: number;
|
|
103
203
|
}
|
|
204
|
+
/**
|
|
205
|
+
* Options to create an `HNSW_SQ` index
|
|
206
|
+
*/
|
|
104
207
|
export interface HnswSqOptions {
|
|
208
|
+
/**
|
|
209
|
+
* The distance metric used to train the index.
|
|
210
|
+
*
|
|
211
|
+
* Default value is "l2".
|
|
212
|
+
*
|
|
213
|
+
* The following distance types are available:
|
|
214
|
+
*
|
|
215
|
+
* "l2" - Euclidean distance. This is a very common distance metric that
|
|
216
|
+
* accounts for both magnitude and direction when determining the distance
|
|
217
|
+
* between vectors. L2 distance has a range of [0, ∞).
|
|
218
|
+
*
|
|
219
|
+
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
|
220
|
+
* calculated from the cosine similarity between two vectors. Cosine
|
|
221
|
+
* similarity is a measure of similarity between two non-zero vectors of an
|
|
222
|
+
* inner product space. It is defined to equal the cosine of the angle
|
|
223
|
+
* between them. Unlike L2, the cosine distance is not affected by the
|
|
224
|
+
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
|
225
|
+
*
|
|
226
|
+
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
|
227
|
+
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
|
228
|
+
* L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
|
229
|
+
*/
|
|
105
230
|
distanceType?: "l2" | "cosine" | "dot";
|
|
231
|
+
/**
|
|
232
|
+
* The number of IVF partitions to create.
|
|
233
|
+
*
|
|
234
|
+
* For HNSW, we recommend a small number of partitions. Setting this to 1 works
|
|
235
|
+
* well for most tables. For very large tables, training just one HNSW graph
|
|
236
|
+
* will require too much memory. Each partition becomes its own HNSW graph, so
|
|
237
|
+
* setting this value higher reduces the peak memory use of training.
|
|
238
|
+
*
|
|
239
|
+
*/
|
|
106
240
|
numPartitions?: number;
|
|
241
|
+
/**
|
|
242
|
+
* Max iterations to train kmeans.
|
|
243
|
+
*
|
|
244
|
+
* The default value is 50.
|
|
245
|
+
*
|
|
246
|
+
* When training an IVF index we use kmeans to calculate the partitions. This parameter
|
|
247
|
+
* controls how many iterations of kmeans to run.
|
|
248
|
+
*
|
|
249
|
+
* Increasing this might improve the quality of the index but in most cases the parameter
|
|
250
|
+
* is unused because kmeans will converge with fewer iterations. The parameter is only
|
|
251
|
+
* used in cases where kmeans does not appear to converge. In those cases it is unlikely
|
|
252
|
+
* that setting this larger will lead to the index converging anyways.
|
|
253
|
+
*
|
|
254
|
+
*/
|
|
107
255
|
maxIterations?: number;
|
|
256
|
+
/**
|
|
257
|
+
* The rate used to calculate the number of training vectors for kmeans.
|
|
258
|
+
*
|
|
259
|
+
* Default value is 256.
|
|
260
|
+
*
|
|
261
|
+
* When an IVF index is trained, we need to calculate partitions. These are groups
|
|
262
|
+
* of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
|
263
|
+
*
|
|
264
|
+
* Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
|
265
|
+
* random sample of the data. This parameter controls the size of the sample. The total
|
|
266
|
+
* number of vectors used to train the index is `sample_rate * num_partitions`.
|
|
267
|
+
*
|
|
268
|
+
* Increasing this value might improve the quality of the index but in most cases the
|
|
269
|
+
* default should be sufficient.
|
|
270
|
+
*
|
|
271
|
+
*/
|
|
108
272
|
sampleRate?: number;
|
|
273
|
+
/**
|
|
274
|
+
* The number of neighbors to select for each vector in the HNSW graph.
|
|
275
|
+
*
|
|
276
|
+
* The default value is 20.
|
|
277
|
+
*
|
|
278
|
+
* This value controls the tradeoff between search speed and accuracy.
|
|
279
|
+
* The higher the value the more accurate the search but the slower it will be.
|
|
280
|
+
*
|
|
281
|
+
*/
|
|
109
282
|
m?: number;
|
|
283
|
+
/**
|
|
284
|
+
* The number of candidates to evaluate during the construction of the HNSW graph.
|
|
285
|
+
*
|
|
286
|
+
* The default value is 300.
|
|
287
|
+
*
|
|
288
|
+
* This value controls the tradeoff between build speed and accuracy.
|
|
289
|
+
* The higher the value the more accurate the build but the slower it will be.
|
|
290
|
+
* 150 to 300 is the typical range. 100 is a minimum for good quality search
|
|
291
|
+
* results. In most cases, there is no benefit to setting this higher than 500.
|
|
292
|
+
* This value should be set to a value that is not less than `ef` in the search phase.
|
|
293
|
+
*
|
|
294
|
+
*/
|
|
110
295
|
efConstruction?: number;
|
|
111
296
|
}
|
|
112
297
|
/**
|
|
@@ -119,7 +304,7 @@ export interface FtsOptions {
|
|
|
119
304
|
* If set to false, the index will not store the positions of the tokens in the text,
|
|
120
305
|
* which will make the index smaller and faster to build, but will not support phrase queries.
|
|
121
306
|
*/
|
|
122
|
-
|
|
307
|
+
withPosition?: boolean;
|
|
123
308
|
}
|
|
124
309
|
export declare class Index {
|
|
125
310
|
private readonly inner;
|
|
@@ -199,13 +384,21 @@ export declare class Index {
|
|
|
199
384
|
static fts(options?: Partial<FtsOptions>): Index;
|
|
200
385
|
/**
|
|
201
386
|
*
|
|
202
|
-
* Create a
|
|
387
|
+
* Create a hnswPq index
|
|
388
|
+
*
|
|
389
|
+
* HNSW-PQ stands for Hierarchical Navigable Small World - Product Quantization.
|
|
390
|
+
* It is a variant of the HNSW algorithm that uses product quantization to compress
|
|
391
|
+
* the vectors.
|
|
203
392
|
*
|
|
204
393
|
*/
|
|
205
394
|
static hnswPq(options?: Partial<HnswPqOptions>): Index;
|
|
206
395
|
/**
|
|
207
396
|
*
|
|
208
|
-
* Create a
|
|
397
|
+
* Create a hnswSq index
|
|
398
|
+
*
|
|
399
|
+
* HNSW-SQ stands for Hierarchical Navigable Small World - Scalar Quantization.
|
|
400
|
+
* It is a variant of the HNSW algorithm that uses scalar quantization to compress
|
|
401
|
+
* the vectors.
|
|
209
402
|
*
|
|
210
403
|
*/
|
|
211
404
|
static hnswSq(options?: Partial<HnswSqOptions>): Index;
|
package/dist/indices.js
CHANGED
|
@@ -101,11 +101,15 @@ class Index {
|
|
|
101
101
|
* For now, the full text search index only supports English, and doesn't support phrase search.
|
|
102
102
|
*/
|
|
103
103
|
static fts(options) {
|
|
104
|
-
return new Index(native_1.Index.fts(options?.
|
|
104
|
+
return new Index(native_1.Index.fts(options?.withPosition));
|
|
105
105
|
}
|
|
106
106
|
/**
|
|
107
107
|
*
|
|
108
|
-
* Create a
|
|
108
|
+
* Create a hnswPq index
|
|
109
|
+
*
|
|
110
|
+
* HNSW-PQ stands for Hierarchical Navigable Small World - Product Quantization.
|
|
111
|
+
* It is a variant of the HNSW algorithm that uses product quantization to compress
|
|
112
|
+
* the vectors.
|
|
109
113
|
*
|
|
110
114
|
*/
|
|
111
115
|
static hnswPq(options) {
|
|
@@ -113,7 +117,11 @@ class Index {
|
|
|
113
117
|
}
|
|
114
118
|
/**
|
|
115
119
|
*
|
|
116
|
-
* Create a
|
|
120
|
+
* Create a hnswSq index
|
|
121
|
+
*
|
|
122
|
+
* HNSW-SQ stands for Hierarchical Navigable Small World - Scalar Quantization.
|
|
123
|
+
* It is a variant of the HNSW algorithm that uses scalar quantization to compress
|
|
124
|
+
* the vectors.
|
|
117
125
|
*
|
|
118
126
|
*/
|
|
119
127
|
static hnswSq(options) {
|
package/dist/native.d.ts
CHANGED
|
@@ -109,7 +109,7 @@ export interface ConnectionOptions {
|
|
|
109
109
|
storageOptions?: Record<string, string>
|
|
110
110
|
}
|
|
111
111
|
/** Write mode for writing a table. */
|
|
112
|
-
export
|
|
112
|
+
export enum WriteMode {
|
|
113
113
|
Create = 'Create',
|
|
114
114
|
Append = 'Append',
|
|
115
115
|
Overwrite = 'Overwrite'
|
|
@@ -202,7 +202,7 @@ export class Table {
|
|
|
202
202
|
countRows(filter?: string | undefined | null): Promise<number>
|
|
203
203
|
delete(predicate: string): Promise<void>
|
|
204
204
|
createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
|
|
205
|
-
update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<
|
|
205
|
+
update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<bigint>
|
|
206
206
|
query(): Query
|
|
207
207
|
vectorSearch(vector: Float32Array): VectorQuery
|
|
208
208
|
addColumns(transforms: Array<AddColumnsSql>): Promise<void>
|
package/package.json
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"vector database",
|
|
11
11
|
"ann"
|
|
12
12
|
],
|
|
13
|
-
"version": "0.
|
|
13
|
+
"version": "0.11.0-beta.1",
|
|
14
14
|
"main": "dist/index.js",
|
|
15
15
|
"exports": {
|
|
16
16
|
".": "./dist/index.js",
|
|
@@ -73,8 +73,8 @@
|
|
|
73
73
|
],
|
|
74
74
|
"scripts": {
|
|
75
75
|
"artifacts": "napi artifacts",
|
|
76
|
-
"build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
|
|
77
|
-
"build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
|
|
76
|
+
"build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
|
|
77
|
+
"build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
|
|
78
78
|
"build": "npm run build:debug && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts && shx cp lancedb/*.node dist/",
|
|
79
79
|
"build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
|
|
80
80
|
"lint-ci": "biome ci .",
|
|
@@ -92,11 +92,11 @@
|
|
|
92
92
|
"reflect-metadata": "^0.2.2"
|
|
93
93
|
},
|
|
94
94
|
"optionalDependencies": {
|
|
95
|
-
"@lancedb/lancedb-darwin-arm64": "0.
|
|
96
|
-
"@lancedb/lancedb-linux-arm64-gnu": "0.
|
|
97
|
-
"@lancedb/lancedb-darwin-x64": "0.
|
|
98
|
-
"@lancedb/lancedb-linux-x64-gnu": "0.
|
|
99
|
-
"@lancedb/lancedb-win32-x64-msvc": "0.
|
|
95
|
+
"@lancedb/lancedb-darwin-arm64": "0.11.0-beta.1",
|
|
96
|
+
"@lancedb/lancedb-linux-arm64-gnu": "0.11.0-beta.1",
|
|
97
|
+
"@lancedb/lancedb-darwin-x64": "0.11.0-beta.1",
|
|
98
|
+
"@lancedb/lancedb-linux-x64-gnu": "0.11.0-beta.1",
|
|
99
|
+
"@lancedb/lancedb-win32-x64-msvc": "0.11.0-beta.1"
|
|
100
100
|
},
|
|
101
101
|
"peerDependencies": {
|
|
102
102
|
"apache-arrow": ">=13.0.0 <=17.0.0"
|
package/native.d.ts
DELETED
|
@@ -1,208 +0,0 @@
|
|
|
1
|
-
/* tslint:disable */
|
|
2
|
-
/* eslint-disable */
|
|
3
|
-
|
|
4
|
-
/* auto-generated by NAPI-RS */
|
|
5
|
-
|
|
6
|
-
/** A description of an index currently configured on a column */
|
|
7
|
-
export interface IndexConfig {
|
|
8
|
-
/** The name of the index */
|
|
9
|
-
name: string
|
|
10
|
-
/** The type of the index */
|
|
11
|
-
indexType: string
|
|
12
|
-
/**
|
|
13
|
-
* The columns in the index
|
|
14
|
-
*
|
|
15
|
-
* Currently this is always an array of size 1. In the future there may
|
|
16
|
-
* be more columns to represent composite indices.
|
|
17
|
-
*/
|
|
18
|
-
columns: Array<string>
|
|
19
|
-
}
|
|
20
|
-
/** Statistics about a compaction operation. */
|
|
21
|
-
export interface CompactionStats {
|
|
22
|
-
/** The number of fragments removed */
|
|
23
|
-
fragmentsRemoved: number
|
|
24
|
-
/** The number of new, compacted fragments added */
|
|
25
|
-
fragmentsAdded: number
|
|
26
|
-
/** The number of data files removed */
|
|
27
|
-
filesRemoved: number
|
|
28
|
-
/** The number of new, compacted data files added */
|
|
29
|
-
filesAdded: number
|
|
30
|
-
}
|
|
31
|
-
/** Statistics about a cleanup operation */
|
|
32
|
-
export interface RemovalStats {
|
|
33
|
-
/** The number of bytes removed */
|
|
34
|
-
bytesRemoved: number
|
|
35
|
-
/** The number of old versions removed */
|
|
36
|
-
oldVersionsRemoved: number
|
|
37
|
-
}
|
|
38
|
-
/** Statistics about an optimize operation */
|
|
39
|
-
export interface OptimizeStats {
|
|
40
|
-
/** Statistics about the compaction operation */
|
|
41
|
-
compaction: CompactionStats
|
|
42
|
-
/** Statistics about the removal operation */
|
|
43
|
-
prune: RemovalStats
|
|
44
|
-
}
|
|
45
|
-
/**
|
|
46
|
-
* A definition of a column alteration. The alteration changes the column at
|
|
47
|
-
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
|
48
|
-
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
|
49
|
-
* must be provided.
|
|
50
|
-
*/
|
|
51
|
-
export interface ColumnAlteration {
|
|
52
|
-
/**
|
|
53
|
-
* The path to the column to alter. This is a dot-separated path to the column.
|
|
54
|
-
* If it is a top-level column then it is just the name of the column. If it is
|
|
55
|
-
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
|
56
|
-
* `c` nested inside a column `b` nested inside a column `a`.
|
|
57
|
-
*/
|
|
58
|
-
path: string
|
|
59
|
-
/**
|
|
60
|
-
* The new name of the column. If not provided then the name will not be changed.
|
|
61
|
-
* This must be distinct from the names of all other columns in the table.
|
|
62
|
-
*/
|
|
63
|
-
rename?: string
|
|
64
|
-
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
|
|
65
|
-
nullable?: boolean
|
|
66
|
-
}
|
|
67
|
-
/** A definition of a new column to add to a table. */
|
|
68
|
-
export interface AddColumnsSql {
|
|
69
|
-
/** The name of the new column. */
|
|
70
|
-
name: string
|
|
71
|
-
/**
|
|
72
|
-
* The values to populate the new column with, as a SQL expression.
|
|
73
|
-
* The expression can reference other columns in the table.
|
|
74
|
-
*/
|
|
75
|
-
valueSql: string
|
|
76
|
-
}
|
|
77
|
-
export interface IndexStatistics {
|
|
78
|
-
/** The number of rows indexed by the index */
|
|
79
|
-
numIndexedRows: number
|
|
80
|
-
/** The number of rows not indexed */
|
|
81
|
-
numUnindexedRows: number
|
|
82
|
-
/** The type of the index */
|
|
83
|
-
indexType?: string
|
|
84
|
-
/** The metadata for each index */
|
|
85
|
-
indices: Array<IndexMetadata>
|
|
86
|
-
}
|
|
87
|
-
export interface IndexMetadata {
|
|
88
|
-
metricType?: string
|
|
89
|
-
indexType?: string
|
|
90
|
-
}
|
|
91
|
-
export interface ConnectionOptions {
|
|
92
|
-
/**
|
|
93
|
-
* (For LanceDB OSS only): The interval, in seconds, at which to check for
|
|
94
|
-
* updates to the table from other processes. If None, then consistency is not
|
|
95
|
-
* checked. For performance reasons, this is the default. For strong
|
|
96
|
-
* consistency, set this to zero seconds. Then every read will check for
|
|
97
|
-
* updates from other processes. As a compromise, you can set this to a
|
|
98
|
-
* non-zero value for eventual consistency. If more than that interval
|
|
99
|
-
* has passed since the last check, then the table will be checked for updates.
|
|
100
|
-
* Note: this consistency only applies to read operations. Write operations are
|
|
101
|
-
* always consistent.
|
|
102
|
-
*/
|
|
103
|
-
readConsistencyInterval?: number
|
|
104
|
-
/**
|
|
105
|
-
* (For LanceDB OSS only): configuration for object storage.
|
|
106
|
-
*
|
|
107
|
-
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
108
|
-
*/
|
|
109
|
-
storageOptions?: Record<string, string>
|
|
110
|
-
}
|
|
111
|
-
/** Write mode for writing a table. */
|
|
112
|
-
export const enum WriteMode {
|
|
113
|
-
Create = 'Create',
|
|
114
|
-
Append = 'Append',
|
|
115
|
-
Overwrite = 'Overwrite'
|
|
116
|
-
}
|
|
117
|
-
/** Write options when creating a Table. */
|
|
118
|
-
export interface WriteOptions {
|
|
119
|
-
/** Write mode for writing to a table. */
|
|
120
|
-
mode?: WriteMode
|
|
121
|
-
}
|
|
122
|
-
export interface OpenTableOptions {
|
|
123
|
-
storageOptions?: Record<string, string>
|
|
124
|
-
}
|
|
125
|
-
export class Connection {
|
|
126
|
-
/** Create a new Connection instance from the given URI. */
|
|
127
|
-
static new(uri: string, options: ConnectionOptions): Promise<Connection>
|
|
128
|
-
display(): string
|
|
129
|
-
isOpen(): boolean
|
|
130
|
-
close(): void
|
|
131
|
-
/** List all tables in the dataset. */
|
|
132
|
-
tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
|
|
133
|
-
/**
|
|
134
|
-
* Create table from a Apache Arrow IPC (file) buffer.
|
|
135
|
-
*
|
|
136
|
-
* Parameters:
|
|
137
|
-
* - name: The name of the table.
|
|
138
|
-
* - buf: The buffer containing the IPC file.
|
|
139
|
-
*
|
|
140
|
-
*/
|
|
141
|
-
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
142
|
-
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
143
|
-
openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
|
|
144
|
-
/** Drop table with the name. Or raise an error if the table does not exist. */
|
|
145
|
-
dropTable(name: string): Promise<void>
|
|
146
|
-
}
|
|
147
|
-
export class Index {
|
|
148
|
-
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
|
149
|
-
static btree(): Index
|
|
150
|
-
}
|
|
151
|
-
/** Typescript-style Async Iterator over RecordBatches */
|
|
152
|
-
export class RecordBatchIterator {
|
|
153
|
-
next(): Promise<Buffer | null>
|
|
154
|
-
}
|
|
155
|
-
/** A builder used to create and run a merge insert operation */
|
|
156
|
-
export class NativeMergeInsertBuilder {
|
|
157
|
-
whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder
|
|
158
|
-
whenNotMatchedInsertAll(): NativeMergeInsertBuilder
|
|
159
|
-
whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder
|
|
160
|
-
execute(buf: Buffer): Promise<void>
|
|
161
|
-
}
|
|
162
|
-
export class Query {
|
|
163
|
-
onlyIf(predicate: string): void
|
|
164
|
-
select(columns: Array<[string, string]>): void
|
|
165
|
-
limit(limit: number): void
|
|
166
|
-
nearestTo(vector: Float32Array): VectorQuery
|
|
167
|
-
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
168
|
-
explainPlan(verbose: boolean): Promise<string>
|
|
169
|
-
}
|
|
170
|
-
export class VectorQuery {
|
|
171
|
-
column(column: string): void
|
|
172
|
-
distanceType(distanceType: string): void
|
|
173
|
-
postfilter(): void
|
|
174
|
-
refineFactor(refineFactor: number): void
|
|
175
|
-
nprobes(nprobe: number): void
|
|
176
|
-
bypassVectorIndex(): void
|
|
177
|
-
onlyIf(predicate: string): void
|
|
178
|
-
select(columns: Array<[string, string]>): void
|
|
179
|
-
limit(limit: number): void
|
|
180
|
-
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
181
|
-
explainPlan(verbose: boolean): Promise<string>
|
|
182
|
-
}
|
|
183
|
-
export class Table {
|
|
184
|
-
name: string
|
|
185
|
-
display(): string
|
|
186
|
-
isOpen(): boolean
|
|
187
|
-
close(): void
|
|
188
|
-
/** Return Schema as empty Arrow IPC file. */
|
|
189
|
-
schema(): Promise<Buffer>
|
|
190
|
-
add(buf: Buffer, mode: string): Promise<void>
|
|
191
|
-
countRows(filter?: string | undefined | null): Promise<number>
|
|
192
|
-
delete(predicate: string): Promise<void>
|
|
193
|
-
createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
|
|
194
|
-
update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<void>
|
|
195
|
-
query(): Query
|
|
196
|
-
vectorSearch(vector: Float32Array): VectorQuery
|
|
197
|
-
addColumns(transforms: Array<AddColumnsSql>): Promise<void>
|
|
198
|
-
alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
|
|
199
|
-
dropColumns(columns: Array<string>): Promise<void>
|
|
200
|
-
version(): Promise<number>
|
|
201
|
-
checkout(version: number): Promise<void>
|
|
202
|
-
checkoutLatest(): Promise<void>
|
|
203
|
-
restore(): Promise<void>
|
|
204
|
-
optimize(olderThanMs?: number | undefined | null): Promise<OptimizeStats>
|
|
205
|
-
listIndices(): Promise<Array<IndexConfig>>
|
|
206
|
-
indexStats(indexName: string): Promise<IndexStatistics | null>
|
|
207
|
-
mergeInsert(on: Array<string>): NativeMergeInsertBuilder
|
|
208
|
-
}
|