@lancedb/lancedb 0.14.0-beta.2 → 0.14.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/merge.js ADDED
@@ -0,0 +1,64 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MergeInsertBuilder = void 0;
4
+ const arrow_1 = require("./arrow");
5
+ /** A builder used to create and run a merge insert operation */
6
+ class MergeInsertBuilder {
7
+ #native;
8
+ /** Construct a MergeInsertBuilder. __Internal use only.__ */
9
+ constructor(native) {
10
+ this.#native = native;
11
+ }
12
+ /**
13
+ * Rows that exist in both the source table (new data) and
14
+ * the target table (old data) will be updated, replacing
15
+ * the old row with the corresponding matching row.
16
+ *
17
+ * If there are multiple matches then the behavior is undefined.
18
+ * Currently this causes multiple copies of the row to be created
19
+ * but that behavior is subject to change.
20
+ *
21
+ * An optional condition may be specified. If it is, then only
22
+ * matched rows that satisfy the condtion will be updated. Any
23
+ * rows that do not satisfy the condition will be left as they
24
+ * are. Failing to satisfy the condition does not cause a
25
+ * "matched row" to become a "not matched" row.
26
+ *
27
+ * The condition should be an SQL string. Use the prefix
28
+ * target. to refer to rows in the target table (old data)
29
+ * and the prefix source. to refer to rows in the source
30
+ * table (new data).
31
+ *
32
+ * For example, "target.last_update < source.last_update"
33
+ */
34
+ whenMatchedUpdateAll(options) {
35
+ return new MergeInsertBuilder(this.#native.whenMatchedUpdateAll(options?.where));
36
+ }
37
+ /**
38
+ * Rows that exist only in the source table (new data) should
39
+ * be inserted into the target table.
40
+ */
41
+ whenNotMatchedInsertAll() {
42
+ return new MergeInsertBuilder(this.#native.whenNotMatchedInsertAll());
43
+ }
44
+ /**
45
+ * Rows that exist only in the target table (old data) will be
46
+ * deleted. An optional condition can be provided to limit what
47
+ * data is deleted.
48
+ *
49
+ * @param options.where - An optional condition to limit what data is deleted
50
+ */
51
+ whenNotMatchedBySourceDelete(options) {
52
+ return new MergeInsertBuilder(this.#native.whenNotMatchedBySourceDelete(options?.where));
53
+ }
54
+ /**
55
+ * Executes the merge insert operation
56
+ *
57
+ * Nothing is returned but the `Table` is updated
58
+ */
59
+ async execute(data) {
60
+ const buffer = await (0, arrow_1.fromDataToBuffer)(data);
61
+ await this.#native.execute(buffer);
62
+ }
63
+ }
64
+ exports.MergeInsertBuilder = MergeInsertBuilder;
@@ -0,0 +1,340 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ /* auto-generated by NAPI-RS */
5
+
6
+ /** Timeout configuration for remote HTTP client. */
7
+ export interface TimeoutConfig {
8
+ /**
9
+ * The timeout for establishing a connection in seconds. Default is 120
10
+ * seconds (2 minutes). This can also be set via the environment variable
11
+ * `LANCE_CLIENT_CONNECT_TIMEOUT`, as an integer number of seconds.
12
+ */
13
+ connectTimeout?: number
14
+ /**
15
+ * The timeout for reading data from the server in seconds. Default is 300
16
+ * seconds (5 minutes). This can also be set via the environment variable
17
+ * `LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
18
+ */
19
+ readTimeout?: number
20
+ /**
21
+ * The timeout for keeping idle connections in the connection pool in seconds.
22
+ * Default is 300 seconds (5 minutes). This can also be set via the
23
+ * environment variable `LANCE_CLIENT_CONNECTION_TIMEOUT`, as an integer
24
+ * number of seconds.
25
+ */
26
+ poolIdleTimeout?: number
27
+ }
28
+ /** Retry configuration for the remote HTTP client. */
29
+ export interface RetryConfig {
30
+ /**
31
+ * The maximum number of retries for a request. Default is 3. You can also
32
+ * set this via the environment variable `LANCE_CLIENT_MAX_RETRIES`.
33
+ */
34
+ retries?: number
35
+ /**
36
+ * The maximum number of retries for connection errors. Default is 3. You
37
+ * can also set this via the environment variable `LANCE_CLIENT_CONNECT_RETRIES`.
38
+ */
39
+ connectRetries?: number
40
+ /**
41
+ * The maximum number of retries for read errors. Default is 3. You can also
42
+ * set this via the environment variable `LANCE_CLIENT_READ_RETRIES`.
43
+ */
44
+ readRetries?: number
45
+ /**
46
+ * The backoff factor to apply between retries. Default is 0.25. Between each retry
47
+ * the client will wait for the amount of seconds:
48
+ * `{backoff factor} * (2 ** ({number of previous retries}))`. So for the default
49
+ * of 0.25, the first retry will wait 0.25 seconds, the second retry will wait 0.5
50
+ * seconds, the third retry will wait 1 second, etc.
51
+ *
52
+ * You can also set this via the environment variable
53
+ * `LANCE_CLIENT_RETRY_BACKOFF_FACTOR`.
54
+ */
55
+ backoffFactor?: number
56
+ /**
57
+ * The jitter to apply to the backoff factor, in seconds. Default is 0.25.
58
+ *
59
+ * A random value between 0 and `backoff_jitter` will be added to the backoff
60
+ * factor in seconds. So for the default of 0.25 seconds, between 0 and 250
61
+ * milliseconds will be added to the sleep between each retry.
62
+ *
63
+ * You can also set this via the environment variable
64
+ * `LANCE_CLIENT_RETRY_BACKOFF_JITTER`.
65
+ */
66
+ backoffJitter?: number
67
+ /**
68
+ * The HTTP status codes for which to retry the request. Default is
69
+ * [429, 500, 502, 503].
70
+ *
71
+ * You can also set this via the environment variable
72
+ * `LANCE_CLIENT_RETRY_STATUSES`. Use a comma-separated list of integers.
73
+ */
74
+ statuses?: Array<number>
75
+ }
76
+ export interface ClientConfig {
77
+ userAgent?: string
78
+ retryConfig?: RetryConfig
79
+ timeoutConfig?: TimeoutConfig
80
+ }
81
+ /** A description of an index currently configured on a column */
82
+ export interface IndexConfig {
83
+ /** The name of the index */
84
+ name: string
85
+ /** The type of the index */
86
+ indexType: string
87
+ /**
88
+ * The columns in the index
89
+ *
90
+ * Currently this is always an array of size 1. In the future there may
91
+ * be more columns to represent composite indices.
92
+ */
93
+ columns: Array<string>
94
+ }
95
+ /** Statistics about a compaction operation. */
96
+ export interface CompactionStats {
97
+ /** The number of fragments removed */
98
+ fragmentsRemoved: number
99
+ /** The number of new, compacted fragments added */
100
+ fragmentsAdded: number
101
+ /** The number of data files removed */
102
+ filesRemoved: number
103
+ /** The number of new, compacted data files added */
104
+ filesAdded: number
105
+ }
106
+ /** Statistics about a cleanup operation */
107
+ export interface RemovalStats {
108
+ /** The number of bytes removed */
109
+ bytesRemoved: number
110
+ /** The number of old versions removed */
111
+ oldVersionsRemoved: number
112
+ }
113
+ /** Statistics about an optimize operation */
114
+ export interface OptimizeStats {
115
+ /** Statistics about the compaction operation */
116
+ compaction: CompactionStats
117
+ /** Statistics about the removal operation */
118
+ prune: RemovalStats
119
+ }
120
+ /**
121
+ * A definition of a column alteration. The alteration changes the column at
122
+ * `path` to have the new name `name`, to be nullable if `nullable` is true,
123
+ * and to have the data type `data_type`. At least one of `rename` or `nullable`
124
+ * must be provided.
125
+ */
126
+ export interface ColumnAlteration {
127
+ /**
128
+ * The path to the column to alter. This is a dot-separated path to the column.
129
+ * If it is a top-level column then it is just the name of the column. If it is
130
+ * a nested column then it is the path to the column, e.g. "a.b.c" for a column
131
+ * `c` nested inside a column `b` nested inside a column `a`.
132
+ */
133
+ path: string
134
+ /**
135
+ * The new name of the column. If not provided then the name will not be changed.
136
+ * This must be distinct from the names of all other columns in the table.
137
+ */
138
+ rename?: string
139
+ /**
140
+ * A new data type for the column. If not provided then the data type will not be changed.
141
+ * Changing data types is limited to casting to the same general type. For example, these
142
+ * changes are valid:
143
+ * * `int32` -> `int64` (integers)
144
+ * * `double` -> `float` (floats)
145
+ * * `string` -> `large_string` (strings)
146
+ * But these changes are not:
147
+ * * `int32` -> `double` (mix integers and floats)
148
+ * * `string` -> `int32` (mix strings and integers)
149
+ */
150
+ dataType?: string
151
+ /** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
152
+ nullable?: boolean
153
+ }
154
+ /** A definition of a new column to add to a table. */
155
+ export interface AddColumnsSql {
156
+ /** The name of the new column. */
157
+ name: string
158
+ /**
159
+ * The values to populate the new column with, as a SQL expression.
160
+ * The expression can reference other columns in the table.
161
+ */
162
+ valueSql: string
163
+ }
164
+ export interface IndexStatistics {
165
+ /** The number of rows indexed by the index */
166
+ numIndexedRows: number
167
+ /** The number of rows not indexed */
168
+ numUnindexedRows: number
169
+ /** The type of the index */
170
+ indexType: string
171
+ /**
172
+ * The type of the distance function used by the index. This is only
173
+ * present for vector indices. Scalar and full text search indices do
174
+ * not have a distance function.
175
+ */
176
+ distanceType?: string
177
+ /** The number of parts this index is split into. */
178
+ numIndices?: number
179
+ }
180
+ export interface Version {
181
+ version: number
182
+ timestamp: number
183
+ metadata: Record<string, string>
184
+ }
185
+ export interface ConnectionOptions {
186
+ /**
187
+ * (For LanceDB OSS only): The interval, in seconds, at which to check for
188
+ * updates to the table from other processes. If None, then consistency is not
189
+ * checked. For performance reasons, this is the default. For strong
190
+ * consistency, set this to zero seconds. Then every read will check for
191
+ * updates from other processes. As a compromise, you can set this to a
192
+ * non-zero value for eventual consistency. If more than that interval
193
+ * has passed since the last check, then the table will be checked for updates.
194
+ * Note: this consistency only applies to read operations. Write operations are
195
+ * always consistent.
196
+ */
197
+ readConsistencyInterval?: number
198
+ /**
199
+ * (For LanceDB OSS only): configuration for object storage.
200
+ *
201
+ * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
202
+ */
203
+ storageOptions?: Record<string, string>
204
+ /** (For LanceDB cloud only): configuration for the remote HTTP client. */
205
+ clientConfig?: ClientConfig
206
+ /**
207
+ * (For LanceDB cloud only): the API key to use with LanceDB Cloud.
208
+ *
209
+ * Can also be set via the environment variable `LANCEDB_API_KEY`.
210
+ */
211
+ apiKey?: string
212
+ /**
213
+ * (For LanceDB cloud only): the region to use for LanceDB cloud.
214
+ * Defaults to 'us-east-1'.
215
+ */
216
+ region?: string
217
+ /**
218
+ * (For LanceDB cloud only): the host to use for LanceDB cloud. Used
219
+ * for testing purposes.
220
+ */
221
+ hostOverride?: string
222
+ }
223
+ /** Write mode for writing a table. */
224
+ export enum WriteMode {
225
+ Create = 'Create',
226
+ Append = 'Append',
227
+ Overwrite = 'Overwrite'
228
+ }
229
+ /** Write options when creating a Table. */
230
+ export interface WriteOptions {
231
+ /** Write mode for writing to a table. */
232
+ mode?: WriteMode
233
+ }
234
+ export interface OpenTableOptions {
235
+ storageOptions?: Record<string, string>
236
+ }
237
+ export class Connection {
238
+ /** Create a new Connection instance from the given URI. */
239
+ static new(uri: string, options: ConnectionOptions): Promise<Connection>
240
+ display(): string
241
+ isOpen(): boolean
242
+ close(): void
243
+ /** List all tables in the dataset. */
244
+ tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
245
+ /**
246
+ * Create table from a Apache Arrow IPC (file) buffer.
247
+ *
248
+ * Parameters:
249
+ * - name: The name of the table.
250
+ * - buf: The buffer containing the IPC file.
251
+ *
252
+ */
253
+ createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, dataStorageOptions?: string | undefined | null, enableV2ManifestPaths?: boolean | undefined | null): Promise<Table>
254
+ createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, dataStorageOptions?: string | undefined | null, enableV2ManifestPaths?: boolean | undefined | null): Promise<Table>
255
+ openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
256
+ /** Drop table with the name. Or raise an error if the table does not exist. */
257
+ dropTable(name: string): Promise<void>
258
+ }
259
+ export class Index {
260
+ static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
261
+ static btree(): Index
262
+ static bitmap(): Index
263
+ static labelList(): Index
264
+ static fts(withPosition?: boolean | undefined | null): Index
265
+ static hnswPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null, m?: number | undefined | null, efConstruction?: number | undefined | null): Index
266
+ static hnswSq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null, m?: number | undefined | null, efConstruction?: number | undefined | null): Index
267
+ }
268
+ /** Typescript-style Async Iterator over RecordBatches */
269
+ export class RecordBatchIterator {
270
+ next(): Promise<Buffer | null>
271
+ }
272
+ /** A builder used to create and run a merge insert operation */
273
+ export class NativeMergeInsertBuilder {
274
+ whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder
275
+ whenNotMatchedInsertAll(): NativeMergeInsertBuilder
276
+ whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder
277
+ execute(buf: Buffer): Promise<void>
278
+ }
279
+ export class Query {
280
+ onlyIf(predicate: string): void
281
+ fullTextSearch(query: string, columns?: Array<string> | undefined | null): void
282
+ select(columns: Array<[string, string]>): void
283
+ selectColumns(columns: Array<string>): void
284
+ limit(limit: number): void
285
+ offset(offset: number): void
286
+ nearestTo(vector: Float32Array): VectorQuery
287
+ fastSearch(): void
288
+ withRowId(): void
289
+ execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
290
+ explainPlan(verbose: boolean): Promise<string>
291
+ }
292
+ export class VectorQuery {
293
+ column(column: string): void
294
+ addQueryVector(vector: Float32Array): void
295
+ distanceType(distanceType: string): void
296
+ postfilter(): void
297
+ refineFactor(refineFactor: number): void
298
+ nprobes(nprobe: number): void
299
+ ef(ef: number): void
300
+ bypassVectorIndex(): void
301
+ onlyIf(predicate: string): void
302
+ fullTextSearch(query: string, columns?: Array<string> | undefined | null): void
303
+ select(columns: Array<[string, string]>): void
304
+ selectColumns(columns: Array<string>): void
305
+ limit(limit: number): void
306
+ offset(offset: number): void
307
+ fastSearch(): void
308
+ withRowId(): void
309
+ execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
310
+ explainPlan(verbose: boolean): Promise<string>
311
+ }
312
+ export class Table {
313
+ name: string
314
+ display(): string
315
+ isOpen(): boolean
316
+ close(): void
317
+ /** Return Schema as empty Arrow IPC file. */
318
+ schema(): Promise<Buffer>
319
+ add(buf: Buffer, mode: string): Promise<void>
320
+ countRows(filter?: string | undefined | null): Promise<number>
321
+ delete(predicate: string): Promise<void>
322
+ createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
323
+ update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<bigint>
324
+ query(): Query
325
+ vectorSearch(vector: Float32Array): VectorQuery
326
+ addColumns(transforms: Array<AddColumnsSql>): Promise<void>
327
+ alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
328
+ dropColumns(columns: Array<string>): Promise<void>
329
+ version(): Promise<number>
330
+ checkout(version: number): Promise<void>
331
+ checkoutLatest(): Promise<void>
332
+ listVersions(): Promise<Array<Version>>
333
+ restore(): Promise<void>
334
+ optimize(olderThanMs?: number | undefined | null, deleteUnverified?: boolean | undefined | null): Promise<OptimizeStats>
335
+ listIndices(): Promise<Array<IndexConfig>>
336
+ indexStats(indexName: string): Promise<IndexStatistics | null>
337
+ mergeInsert(on: Array<string>): NativeMergeInsertBuilder
338
+ usesV2ManifestPaths(): Promise<boolean>
339
+ migrateManifestPathsV2(): Promise<void>
340
+ }