@lancedb/lancedb 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/.eslintignore +3 -0
  2. package/Cargo.toml +28 -0
  3. package/README.md +49 -0
  4. package/build.rs +5 -0
  5. package/eslint.config.js +28 -0
  6. package/examples/js/index.mjs +40 -0
  7. package/examples/js/package.json +14 -0
  8. package/examples/js-openai/index.mjs +43 -0
  9. package/examples/js-openai/package-lock.json +256 -0
  10. package/examples/js-openai/package.json +15 -0
  11. package/examples/js-transformers/index.mjs +65 -0
  12. package/examples/js-transformers/package-lock.json +1418 -0
  13. package/examples/js-transformers/package.json +15 -0
  14. package/examples/js-youtube-transcripts/index.mjs +135 -0
  15. package/examples/js-youtube-transcripts/package.json +15 -0
  16. package/examples/ts/data/sample-lancedb/vectors.lance/_latest.manifest +0 -0
  17. package/examples/ts/data/sample-lancedb/vectors.lance/_transactions/0-adde4e05-fcfc-415c-86a6-5b252cb9e79a.txn +0 -0
  18. package/examples/ts/data/sample-lancedb/vectors.lance/_versions/1.manifest +0 -0
  19. package/examples/ts/data/sample-lancedb/vectors.lance/data/3618b33e-3eea-4b5e-a0fc-7d1f718d551e.lance +0 -0
  20. package/examples/ts/package-lock.json +1340 -0
  21. package/examples/ts/package.json +22 -0
  22. package/examples/ts/tsconfig.json +10 -0
  23. package/jest.config.js +7 -0
  24. package/lancedb/arrow.ts +650 -0
  25. package/lancedb/connection.ts +176 -0
  26. package/lancedb/embedding/embedding_function.ts +78 -0
  27. package/lancedb/embedding/index.ts +2 -0
  28. package/lancedb/embedding/openai.ts +62 -0
  29. package/lancedb/index.ts +69 -0
  30. package/lancedb/indices.ts +203 -0
  31. package/lancedb/query.ts +375 -0
  32. package/lancedb/sanitize.ts +516 -0
  33. package/lancedb/table.ts +353 -0
  34. package/package.json +82 -0
  35. package/tsconfig.json +23 -0
@@ -0,0 +1,353 @@
1
+ // Copyright 2024 Lance Developers.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ import { Schema, tableFromIPC } from "apache-arrow";
16
+ import {
17
+ AddColumnsSql,
18
+ ColumnAlteration,
19
+ IndexConfig,
20
+ Table as _NativeTable,
21
+ } from "./native";
22
+ import { Query, VectorQuery } from "./query";
23
+ import { IndexOptions } from "./indices";
24
+ import { Data, fromDataToBuffer } from "./arrow";
25
+
26
+ export { IndexConfig } from "./native";
27
+ /**
28
+ * Options for adding data to a table.
29
+ */
30
+ export interface AddDataOptions {
31
+ /**
32
+ * If "append" (the default) then the new data will be added to the table
33
+ *
34
+ * If "overwrite" then the new data will replace the existing data in the table.
35
+ */
36
+ mode: "append" | "overwrite";
37
+ }
38
+
39
+ export interface UpdateOptions {
40
+ /**
41
+ * A filter that limits the scope of the update.
42
+ *
43
+ * This should be an SQL filter expression.
44
+ *
45
+ * Only rows that satisfy the expression will be updated.
46
+ *
47
+ * For example, this could be 'my_col == 0' to replace all instances
48
+ * of 0 in a column with some other default value.
49
+ */
50
+ where: string;
51
+ }
52
+
53
+ /**
54
+ * A Table is a collection of Records in a LanceDB Database.
55
+ *
56
+ * A Table object is expected to be long lived and reused for multiple operations.
57
+ * Table objects will cache a certain amount of index data in memory. This cache
58
+ * will be freed when the Table is garbage collected. To eagerly free the cache you
59
+ * can call the `close` method. Once the Table is closed, it cannot be used for any
60
+ * further operations.
61
+ *
62
+ * Closing a table is optional. It not closed, it will be closed when it is garbage
63
+ * collected.
64
+ */
65
+ export class Table {
66
+ private readonly inner: _NativeTable;
67
+
68
+ /** Construct a Table. Internal use only. */
69
+ constructor(inner: _NativeTable) {
70
+ this.inner = inner;
71
+ }
72
+
73
+ /** Return true if the table has not been closed */
74
+ isOpen(): boolean {
75
+ return this.inner.isOpen();
76
+ }
77
+
78
+ /**
79
+ * Close the table, releasing any underlying resources.
80
+ *
81
+ * It is safe to call this method multiple times.
82
+ *
83
+ * Any attempt to use the table after it is closed will result in an error.
84
+ */
85
+ close(): void {
86
+ this.inner.close();
87
+ }
88
+
89
+ /** Return a brief description of the table */
90
+ display(): string {
91
+ return this.inner.display();
92
+ }
93
+
94
+ /** Get the schema of the table. */
95
+ async schema(): Promise<Schema> {
96
+ const schemaBuf = await this.inner.schema();
97
+ const tbl = tableFromIPC(schemaBuf);
98
+ return tbl.schema;
99
+ }
100
+
101
+ /**
102
+ * Insert records into this Table.
103
+ * @param {Data} data Records to be inserted into the Table
104
+ */
105
+ async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
106
+ const mode = options?.mode ?? "append";
107
+
108
+ const buffer = await fromDataToBuffer(data);
109
+ await this.inner.add(buffer, mode);
110
+ }
111
+
112
+ /**
113
+ * Update existing records in the Table
114
+ *
115
+ * An update operation can be used to adjust existing values. Use the
116
+ * returned builder to specify which columns to update. The new value
117
+ * can be a literal value (e.g. replacing nulls with some default value)
118
+ * or an expression applied to the old value (e.g. incrementing a value)
119
+ *
120
+ * An optional condition can be specified (e.g. "only update if the old
121
+ * value is 0")
122
+ *
123
+ * Note: if your condition is something like "some_id_column == 7" and
124
+ * you are updating many rows (with different ids) then you will get
125
+ * better performance with a single [`merge_insert`] call instead of
126
+ * repeatedly calilng this method.
127
+ * @param {Map<string, string> | Record<string, string>} updates - the
128
+ * columns to update
129
+ *
130
+ * Keys in the map should specify the name of the column to update.
131
+ * Values in the map provide the new value of the column. These can
132
+ * be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
133
+ * based on the row being updated (e.g. "my_col + 1")
134
+ * @param {Partial<UpdateOptions>} options - additional options to control
135
+ * the update behavior
136
+ */
137
+ async update(
138
+ updates: Map<string, string> | Record<string, string>,
139
+ options?: Partial<UpdateOptions>,
140
+ ) {
141
+ const onlyIf = options?.where;
142
+ let columns: [string, string][];
143
+ if (updates instanceof Map) {
144
+ columns = Array.from(updates.entries());
145
+ } else {
146
+ columns = Object.entries(updates);
147
+ }
148
+ await this.inner.update(onlyIf, columns);
149
+ }
150
+
151
+ /** Count the total number of rows in the dataset. */
152
+ async countRows(filter?: string): Promise<number> {
153
+ return await this.inner.countRows(filter);
154
+ }
155
+
156
+ /** Delete the rows that satisfy the predicate. */
157
+ async delete(predicate: string): Promise<void> {
158
+ await this.inner.delete(predicate);
159
+ }
160
+
161
+ /**
162
+ * Create an index to speed up queries.
163
+ *
164
+ * Indices can be created on vector columns or scalar columns.
165
+ * Indices on vector columns will speed up vector searches.
166
+ * Indices on scalar columns will speed up filtering (in both
167
+ * vector and non-vector searches)
168
+ * @example
169
+ * // If the column has a vector (fixed size list) data type then
170
+ * // an IvfPq vector index will be created.
171
+ * const table = await conn.openTable("my_table");
172
+ * await table.createIndex(["vector"]);
173
+ * @example
174
+ * // For advanced control over vector index creation you can specify
175
+ * // the index type and options.
176
+ * const table = await conn.openTable("my_table");
177
+ * await table.createIndex(["vector"], I)
178
+ * .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
179
+ * .build();
180
+ * @example
181
+ * // Or create a Scalar index
182
+ * await table.createIndex("my_float_col").build();
183
+ */
184
+ async createIndex(column: string, options?: Partial<IndexOptions>) {
185
+ // Bit of a hack to get around the fact that TS has no package-scope.
186
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
187
+ const nativeIndex = (options?.config as any)?.inner;
188
+ await this.inner.createIndex(nativeIndex, column, options?.replace);
189
+ }
190
+
191
+ /**
192
+ * Create a {@link Query} Builder.
193
+ *
194
+ * Queries allow you to search your existing data. By default the query will
195
+ * return all the data in the table in no particular order. The builder
196
+ * returned by this method can be used to control the query using filtering,
197
+ * vector similarity, sorting, and more.
198
+ *
199
+ * Note: By default, all columns are returned. For best performance, you should
200
+ * only fetch the columns you need. See [`Query::select_with_projection`] for
201
+ * more details.
202
+ *
203
+ * When appropriate, various indices and statistics based pruning will be used to
204
+ * accelerate the query.
205
+ * @example
206
+ * // SQL-style filtering
207
+ * //
208
+ * // This query will return up to 1000 rows whose value in the `id` column
209
+ * // is greater than 5. LanceDb supports a broad set of filtering functions.
210
+ * for await (const batch of table.query()
211
+ * .filter("id > 1").select(["id"]).limit(20)) {
212
+ * console.log(batch);
213
+ * }
214
+ * @example
215
+ * // Vector Similarity Search
216
+ * //
217
+ * // This example will find the 10 rows whose value in the "vector" column are
218
+ * // closest to the query vector [1.0, 2.0, 3.0]. If an index has been created
219
+ * // on the "vector" column then this will perform an ANN search.
220
+ * //
221
+ * // The `refine_factor` and `nprobes` methods are used to control the recall /
222
+ * // latency tradeoff of the search.
223
+ * for await (const batch of table.query()
224
+ * .nearestTo([1, 2, 3])
225
+ * .refineFactor(5).nprobe(10)
226
+ * .limit(10)) {
227
+ * console.log(batch);
228
+ * }
229
+ * @example
230
+ * // Scan the full dataset
231
+ * //
232
+ * // This query will return everything in the table in no particular order.
233
+ * for await (const batch of table.query()) {
234
+ * console.log(batch);
235
+ * }
236
+ * @returns {Query} A builder that can be used to parameterize the query
237
+ */
238
+ query(): Query {
239
+ return new Query(this.inner);
240
+ }
241
+
242
+ /**
243
+ * Search the table with a given query vector.
244
+ *
245
+ * This is a convenience method for preparing a vector query and
246
+ * is the same thing as calling `nearestTo` on the builder returned
247
+ * by `query`. @see {@link Query#nearestTo} for more details.
248
+ */
249
+ vectorSearch(vector: unknown): VectorQuery {
250
+ return this.query().nearestTo(vector);
251
+ }
252
+
253
+ // TODO: Support BatchUDF
254
+ /**
255
+ * Add new columns with defined values.
256
+ * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
257
+ * the SQL expression to use to calculate the value of the new column. These
258
+ * expressions will be evaluated for each row in the table, and can
259
+ * reference existing columns in the table.
260
+ */
261
+ async addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void> {
262
+ await this.inner.addColumns(newColumnTransforms);
263
+ }
264
+
265
+ /**
266
+ * Alter the name or nullability of columns.
267
+ * @param {ColumnAlteration[]} columnAlterations One or more alterations to
268
+ * apply to columns.
269
+ */
270
+ async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
271
+ await this.inner.alterColumns(columnAlterations);
272
+ }
273
+
274
+ /**
275
+ * Drop one or more columns from the dataset
276
+ *
277
+ * This is a metadata-only operation and does not remove the data from the
278
+ * underlying storage. In order to remove the data, you must subsequently
279
+ * call ``compact_files`` to rewrite the data without the removed columns and
280
+ * then call ``cleanup_files`` to remove the old files.
281
+ * @param {string[]} columnNames The names of the columns to drop. These can
282
+ * be nested column references (e.g. "a.b.c") or top-level column names
283
+ * (e.g. "a").
284
+ */
285
+ async dropColumns(columnNames: string[]): Promise<void> {
286
+ await this.inner.dropColumns(columnNames);
287
+ }
288
+
289
+ /**
290
+ * Retrieve the version of the table
291
+ *
292
+ * LanceDb supports versioning. Every operation that modifies the table increases
293
+ * version. As long as a version hasn't been deleted you can `[Self::checkout]` that
294
+ * version to view the data at that point. In addition, you can `[Self::restore]` the
295
+ * version to replace the current table with a previous version.
296
+ */
297
+ async version(): Promise<number> {
298
+ return await this.inner.version();
299
+ }
300
+
301
+ /**
302
+ * Checks out a specific version of the Table
303
+ *
304
+ * Any read operation on the table will now access the data at the checked out version.
305
+ * As a consequence, calling this method will disable any read consistency interval
306
+ * that was previously set.
307
+ *
308
+ * This is a read-only operation that turns the table into a sort of "view"
309
+ * or "detached head". Other table instances will not be affected. To make the change
310
+ * permanent you can use the `[Self::restore]` method.
311
+ *
312
+ * Any operation that modifies the table will fail while the table is in a checked
313
+ * out state.
314
+ *
315
+ * To return the table to a normal state use `[Self::checkout_latest]`
316
+ */
317
+ async checkout(version: number): Promise<void> {
318
+ await this.inner.checkout(version);
319
+ }
320
+
321
+ /**
322
+ * Ensures the table is pointing at the latest version
323
+ *
324
+ * This can be used to manually update a table when the read_consistency_interval is None
325
+ * It can also be used to undo a `[Self::checkout]` operation
326
+ */
327
+ async checkoutLatest(): Promise<void> {
328
+ await this.inner.checkoutLatest();
329
+ }
330
+
331
+ /**
332
+ * Restore the table to the currently checked out version
333
+ *
334
+ * This operation will fail if checkout has not been called previously
335
+ *
336
+ * This operation will overwrite the latest version of the table with a
337
+ * previous version. Any changes made since the checked out version will
338
+ * no longer be visible.
339
+ *
340
+ * Once the operation concludes the table will no longer be in a checked
341
+ * out state and the read_consistency_interval, if any, will apply.
342
+ */
343
+ async restore(): Promise<void> {
344
+ await this.inner.restore();
345
+ }
346
+
347
+ /**
348
+ * List all indices that have been created with Self::create_index
349
+ */
350
+ async listIndices(): Promise<IndexConfig[]> {
351
+ return await this.inner.listIndices();
352
+ }
353
+ }
package/package.json ADDED
@@ -0,0 +1,82 @@
1
+ {
2
+ "name": "@lancedb/lancedb",
3
+ "version": "0.4.3",
4
+ "main": "./dist/index.js",
5
+ "types": "./dist/index.d.ts",
6
+ "napi": {
7
+ "name": "lancedb",
8
+ "triples": {
9
+ "defaults": false,
10
+ "additional": [
11
+ "aarch64-apple-darwin",
12
+ "aarch64-unknown-linux-gnu",
13
+ "x86_64-apple-darwin",
14
+ "x86_64-unknown-linux-gnu",
15
+ "x86_64-pc-windows-msvc"
16
+ ]
17
+ }
18
+ },
19
+ "license": "Apache 2.0",
20
+ "devDependencies": {
21
+ "@napi-rs/cli": "^2.18.0",
22
+ "@types/jest": "^29.1.2",
23
+ "@types/tmp": "^0.2.6",
24
+ "@typescript-eslint/eslint-plugin": "^6.19.0",
25
+ "@typescript-eslint/parser": "^6.19.0",
26
+ "apache-arrow-old": "npm:apache-arrow@13.0.0",
27
+ "eslint": "^8.57.0",
28
+ "eslint-config-prettier": "^9.1.0",
29
+ "eslint-plugin-jsdoc": "^48.2.1",
30
+ "jest": "^29.7.0",
31
+ "prettier": "^3.1.0",
32
+ "shx": "^0.3.4",
33
+ "tmp": "^0.2.3",
34
+ "ts-jest": "^29.1.2",
35
+ "typedoc": "^0.25.7",
36
+ "typedoc-plugin-markdown": "^3.17.1",
37
+ "typescript": "^5.3.3",
38
+ "typescript-eslint": "^7.1.0"
39
+ },
40
+ "ava": {
41
+ "timeout": "3m"
42
+ },
43
+ "engines": {
44
+ "node": ">= 18"
45
+ },
46
+ "cpu": [
47
+ "x64",
48
+ "arm64"
49
+ ],
50
+ "os": [
51
+ "darwin",
52
+ "linux",
53
+ "win32"
54
+ ],
55
+ "scripts": {
56
+ "artifacts": "napi artifacts",
57
+ "build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
58
+ "build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
59
+ "build": "npm run build:debug && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
60
+ "build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
61
+ "chkformat": "prettier . --check",
62
+ "docs": "typedoc --plugin typedoc-plugin-markdown lancedb/index.ts",
63
+ "lint": "eslint lancedb && eslint __test__",
64
+ "prepublishOnly": "napi prepublish -t npm",
65
+ "test": "npm run build && jest --verbose",
66
+ "universal": "napi universal",
67
+ "version": "napi version"
68
+ },
69
+ "optionalDependencies": {
70
+ "@lancedb/lancedb-darwin-arm64": "0.4.3",
71
+ "@lancedb/lancedb-linux-arm64-gnu": "0.4.3",
72
+ "@lancedb/lancedb-darwin-x64": "0.4.3",
73
+ "@lancedb/lancedb-linux-x64-gnu": "0.4.3",
74
+ "@lancedb/lancedb-win32-x64-msvc": "0.4.3"
75
+ },
76
+ "peerDependencies": {
77
+ "apache-arrow": "^15.0.0"
78
+ },
79
+ "dependencies": {
80
+ "openai": "^4.29.2"
81
+ }
82
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,23 @@
1
+ {
2
+ "include": ["lancedb/*.ts", "lancedb/**/*.ts", "lancedb/*.js"],
3
+ "compilerOptions": {
4
+ "target": "es2022",
5
+ "module": "commonjs",
6
+ "declaration": true,
7
+ "outDir": "./dist",
8
+ "strict": true,
9
+ "allowJs": true,
10
+ "resolveJsonModule": true
11
+ },
12
+ "exclude": ["./dist/*"],
13
+ "typedocOptions": {
14
+ "entryPoints": ["lancedb/index.ts"],
15
+ "out": "../docs/src/javascript/",
16
+ "visibilityFilters": {
17
+ "protected": false,
18
+ "private": false,
19
+ "inherited": true,
20
+ "external": false
21
+ }
22
+ }
23
+ }