@lancedb/lancedb 0.13.0-beta.1 → 0.13.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,6 @@
1
1
  import { Schema } from "../arrow";
2
2
  import { EmbeddingFunction } from "./embedding_function";
3
3
  export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
4
- export * from "./openai";
5
- export * from "./transformers";
6
4
  export * from "./registry";
7
5
  /**
8
6
  * Create a schema with embedding functions.
@@ -35,9 +35,6 @@ const registry_1 = require("./registry");
35
35
  var embedding_function_1 = require("./embedding_function");
36
36
  Object.defineProperty(exports, "EmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.EmbeddingFunction; } });
37
37
  Object.defineProperty(exports, "TextEmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.TextEmbeddingFunction; } });
38
- // We need to explicitly export '*' so that the `register` decorator actually registers the class.
39
- __exportStar(require("./openai"), exports);
40
- __exportStar(require("./transformers"), exports);
41
38
  __exportStar(require("./registry"), exports);
42
39
  /**
43
40
  * Create a schema with embedding functions.
@@ -1,7 +1,5 @@
1
1
  import { type EmbeddingFunction, type EmbeddingFunctionConstructor } from "./embedding_function";
2
2
  import "reflect-metadata";
3
- import { OpenAIEmbeddingFunction } from "./openai";
4
- import { TransformersEmbeddingFunction } from "./transformers";
5
3
  type CreateReturnType<T> = T extends {
6
4
  init: () => Promise<void>;
7
5
  } ? Promise<T> : T;
@@ -27,8 +25,6 @@ export declare class EmbeddingFunctionRegistry {
27
25
  * @throws Error if the function is already registered
28
26
  */
29
27
  register<T extends EmbeddingFunctionConstructor = EmbeddingFunctionConstructor>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
30
- get(name: "openai"): EmbeddingFunctionCreate<OpenAIEmbeddingFunction>;
31
- get(name: "huggingface"): EmbeddingFunctionCreate<TransformersEmbeddingFunction>;
32
28
  get<T extends EmbeddingFunction<unknown>>(name: string): EmbeddingFunctionCreate<T> | undefined;
33
29
  /**
34
30
  * reset the registry to the initial state
@@ -66,17 +66,17 @@ let TransformersEmbeddingFunction = class TransformersEmbeddingFunction extends
66
66
  try {
67
67
  // SAFETY:
68
68
  // since typescript transpiles `import` to `require`, we need to do this in an unsafe way
69
- // We can't use `require` because `@xenova/transformers` is an ESM module
69
+ // We can't use `require` because `@huggingface/transformers` is an ESM module
70
70
  // and we can't use `import` directly because typescript will transpile it to `require`.
71
71
  // and we want to remain compatible with both ESM and CJS modules
72
72
  // so we use `eval` to bypass typescript for this specific import.
73
- transformers = await eval('import("@xenova/transformers")');
73
+ transformers = await eval('import("@huggingface/transformers")');
74
74
  }
75
75
  catch (e) {
76
- throw new Error(`error loading @xenova/transformers\nReason: ${e}`);
76
+ throw new Error(`error loading @huggingface/transformers\nReason: ${e}`);
77
77
  }
78
78
  try {
79
- this.#model = await transformers.AutoModel.from_pretrained(this.#modelName);
79
+ this.#model = await transformers.AutoModel.from_pretrained(this.#modelName, { dtype: "fp32" });
80
80
  }
81
81
  catch (e) {
82
82
  throw new Error(`error loading model ${this.#modelName}. Make sure you are using a wasm compatible model.\nReason: ${e}`);
@@ -95,7 +95,8 @@ let TransformersEmbeddingFunction = class TransformersEmbeddingFunction extends
95
95
  }
96
96
  else {
97
97
  const config = this.#model.config;
98
- const ndims = config["hidden_size"];
98
+ // biome-ignore lint/style/useNamingConvention: we don't control this name.
99
+ const ndims = config.hidden_size;
99
100
  if (!ndims) {
100
101
  throw new Error("hidden_size not found in model config, you may need to manually specify the embedding dimensions. ");
101
102
  }
package/dist/native.d.ts CHANGED
@@ -165,6 +165,11 @@ export interface IndexStatistics {
165
165
  /** The number of parts this index is split into. */
166
166
  numIndices?: number
167
167
  }
168
+ export interface Version {
169
+ version: number
170
+ timestamp: number
171
+ metadata: Record<string, string>
172
+ }
168
173
  export interface ConnectionOptions {
169
174
  /**
170
175
  * (For LanceDB OSS only): The interval, in seconds, at which to check for
@@ -274,10 +279,12 @@ export class Query {
274
279
  }
275
280
  export class VectorQuery {
276
281
  column(column: string): void
282
+ addQueryVector(vector: Float32Array): void
277
283
  distanceType(distanceType: string): void
278
284
  postfilter(): void
279
285
  refineFactor(refineFactor: number): void
280
286
  nprobes(nprobe: number): void
287
+ ef(ef: number): void
281
288
  bypassVectorIndex(): void
282
289
  onlyIf(predicate: string): void
283
290
  fullTextSearch(query: string, columns?: Array<string> | undefined | null): void
@@ -310,6 +317,7 @@ export class Table {
310
317
  version(): Promise<number>
311
318
  checkout(version: number): Promise<void>
312
319
  checkoutLatest(): Promise<void>
320
+ listVersions(): Promise<Array<Version>>
313
321
  restore(): Promise<void>
314
322
  optimize(olderThanMs?: number | undefined | null, deleteUnverified?: boolean | undefined | null): Promise<OptimizeStats>
315
323
  listIndices(): Promise<Array<IndexConfig>>
package/dist/query.d.ts CHANGED
@@ -181,6 +181,16 @@ export declare class VectorQuery extends QueryBase<NativeVectorQuery> {
181
181
  * you the desired recall.
182
182
  */
183
183
  nprobes(nprobes: number): VectorQuery;
184
+ /**
185
+ * Set the number of candidates to consider during the search
186
+ *
187
+ * This argument is only used when the vector column has an HNSW index.
188
+ * If there is no index then this value is ignored.
189
+ *
190
+ * Increasing this value will increase the recall of your query but will
191
+ * also increase the latency of your query. The default value is 1.5*limit.
192
+ */
193
+ ef(ef: number): VectorQuery;
184
194
  /**
185
195
  * Set the vector column to query
186
196
  *
@@ -267,6 +277,7 @@ export declare class VectorQuery extends QueryBase<NativeVectorQuery> {
267
277
  * calculate your recall to select an appropriate value for nprobes.
268
278
  */
269
279
  bypassVectorIndex(): VectorQuery;
280
+ addQueryVector(vector: IntoVector): VectorQuery;
270
281
  }
271
282
  /** A builder for LanceDB queries. */
272
283
  export declare class Query extends QueryBase<NativeQuery> {
@@ -309,4 +320,5 @@ export declare class Query extends QueryBase<NativeQuery> {
309
320
  * a default `limit` of 10 will be used. @see {@link Query#limit}
310
321
  */
311
322
  nearestTo(vector: IntoVector): VectorQuery;
323
+ nearestToText(query: string, columns?: string[]): Query;
312
324
  }
package/dist/query.js CHANGED
@@ -308,6 +308,19 @@ class VectorQuery extends QueryBase {
308
308
  super.doCall((inner) => inner.nprobes(nprobes));
309
309
  return this;
310
310
  }
311
+ /**
312
+ * Set the number of candidates to consider during the search
313
+ *
314
+ * This argument is only used when the vector column has an HNSW index.
315
+ * If there is no index then this value is ignored.
316
+ *
317
+ * Increasing this value will increase the recall of your query but will
318
+ * also increase the latency of your query. The default value is 1.5*limit.
319
+ */
320
+ ef(ef) {
321
+ super.doCall((inner) => inner.ef(ef));
322
+ return this;
323
+ }
311
324
  /**
312
325
  * Set the vector column to query
313
326
  *
@@ -409,6 +422,41 @@ class VectorQuery extends QueryBase {
409
422
  super.doCall((inner) => inner.bypassVectorIndex());
410
423
  return this;
411
424
  }
425
+ /*
426
+ * Add a query vector to the search
427
+ *
428
+ * This method can be called multiple times to add multiple query vectors
429
+ * to the search. If multiple query vectors are added, then they will be searched
430
+ * in parallel, and the results will be concatenated. A column called `query_index`
431
+ * will be added to indicate the index of the query vector that produced the result.
432
+ *
433
+ * Performance wise, this is equivalent to running multiple queries concurrently.
434
+ */
435
+ addQueryVector(vector) {
436
+ if (vector instanceof Promise) {
437
+ const res = (async () => {
438
+ try {
439
+ const v = await vector;
440
+ const arr = Float32Array.from(v);
441
+ //
442
+ // biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
443
+ const value = this.addQueryVector(arr);
444
+ const inner = value.inner;
445
+ return inner;
446
+ }
447
+ catch (e) {
448
+ return Promise.reject(e);
449
+ }
450
+ })();
451
+ return new VectorQuery(res);
452
+ }
453
+ else {
454
+ super.doCall((inner) => {
455
+ inner.addQueryVector(Float32Array.from(vector));
456
+ });
457
+ return this;
458
+ }
459
+ }
412
460
  }
413
461
  exports.VectorQuery = VectorQuery;
414
462
  /** A builder for LanceDB queries. */
@@ -488,5 +536,9 @@ class Query extends QueryBase {
488
536
  return new VectorQuery(vectorQuery);
489
537
  }
490
538
  }
539
+ nearestToText(query, columns) {
540
+ this.doCall((inner) => inner.fullTextSearch(query, columns));
541
+ return this;
542
+ }
491
543
  }
492
544
  exports.Query = Query;
package/dist/table.d.ts CHANGED
@@ -47,6 +47,11 @@ export interface OptimizeOptions {
47
47
  cleanupOlderThan: Date;
48
48
  deleteUnverified: boolean;
49
49
  }
50
+ export interface Version {
51
+ version: number;
52
+ timestamp: Date;
53
+ metadata: Record<string, string>;
54
+ }
50
55
  /**
51
56
  * A Table is a collection of Records in a LanceDB Database.
52
57
  *
@@ -297,6 +302,10 @@ export declare abstract class Table {
297
302
  * version of the table.
298
303
  */
299
304
  abstract checkoutLatest(): Promise<void>;
305
+ /**
306
+ * List all the versions of the table
307
+ */
308
+ abstract listVersions(): Promise<Version[]>;
300
309
  /**
301
310
  * Restore the table to the currently checked out version
302
311
  *
@@ -385,6 +394,7 @@ export declare class LocalTable extends Table {
385
394
  version(): Promise<number>;
386
395
  checkout(version: number): Promise<void>;
387
396
  checkoutLatest(): Promise<void>;
397
+ listVersions(): Promise<Version[]>;
388
398
  restore(): Promise<void>;
389
399
  optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats>;
390
400
  listIndices(): Promise<IndexConfig[]>;
package/dist/table.js CHANGED
@@ -211,6 +211,13 @@ class LocalTable extends Table {
211
211
  async checkoutLatest() {
212
212
  await this.inner.checkoutLatest();
213
213
  }
214
+ async listVersions() {
215
+ return (await this.inner.listVersions()).map((version) => ({
216
+ version: version.version,
217
+ timestamp: new Date(version.timestamp / 1000),
218
+ metadata: version.metadata,
219
+ }));
220
+ }
214
221
  async restore() {
215
222
  await this.inner.restore();
216
223
  }
package/package.json CHANGED
@@ -10,11 +10,13 @@
10
10
  "vector database",
11
11
  "ann"
12
12
  ],
13
- "version": "0.13.0-beta.1",
13
+ "version": "0.13.1-beta.0",
14
14
  "main": "dist/index.js",
15
15
  "exports": {
16
16
  ".": "./dist/index.js",
17
- "./embedding": "./dist/embedding/index.js"
17
+ "./embedding": "./dist/embedding/index.js",
18
+ "./embedding/openai": "./dist/embedding/openai.js",
19
+ "./embedding/transformers": "./dist/embedding/transformers.js"
18
20
  },
19
21
  "types": "dist/index.d.ts",
20
22
  "napi": {
@@ -22,10 +24,12 @@
22
24
  "triples": {
23
25
  "defaults": false,
24
26
  "additional": [
25
- "aarch64-apple-darwin",
26
- "aarch64-unknown-linux-gnu",
27
27
  "x86_64-apple-darwin",
28
+ "aarch64-apple-darwin",
28
29
  "x86_64-unknown-linux-gnu",
30
+ "aarch64-unknown-linux-gnu",
31
+ "x86_64-unknown-linux-musl",
32
+ "aarch64-unknown-linux-musl",
29
33
  "x86_64-pc-windows-msvc"
30
34
  ]
31
35
  }
@@ -92,11 +96,13 @@
92
96
  "reflect-metadata": "^0.2.2"
93
97
  },
94
98
  "optionalDependencies": {
95
- "@lancedb/lancedb-darwin-arm64": "0.13.0-beta.1",
96
- "@lancedb/lancedb-linux-arm64-gnu": "0.13.0-beta.1",
97
- "@lancedb/lancedb-darwin-x64": "0.13.0-beta.1",
98
- "@lancedb/lancedb-linux-x64-gnu": "0.13.0-beta.1",
99
- "@lancedb/lancedb-win32-x64-msvc": "0.13.0-beta.1"
99
+ "@lancedb/lancedb-darwin-x64": "0.13.1-beta.0",
100
+ "@lancedb/lancedb-darwin-arm64": "0.13.1-beta.0",
101
+ "@lancedb/lancedb-linux-x64-gnu": "0.13.1-beta.0",
102
+ "@lancedb/lancedb-linux-arm64-gnu": "0.13.1-beta.0",
103
+ "@lancedb/lancedb-linux-x64-musl": "0.13.1-beta.0",
104
+ "@lancedb/lancedb-linux-arm64-musl": "0.13.1-beta.0",
105
+ "@lancedb/lancedb-win32-x64-msvc": "0.13.1-beta.0"
100
106
  },
101
107
  "peerDependencies": {
102
108
  "apache-arrow": ">=13.0.0 <=17.0.0"