@lancedb/lancedb 0.14.1 → 0.15.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ # Contributing to LanceDB Typescript
2
+
3
+ This document outlines the process for contributing to LanceDB Typescript.
4
+ For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
5
+
6
+ ## Project layout
7
+
8
+ The Typescript package is a wrapper around the Rust library, `lancedb`. We use
9
+ the [napi-rs](https://napi.rs/) library to create the bindings between Rust and
10
+ Typescript.
11
+
12
+ * `src/`: Rust bindings source code
13
+ * `lancedb/`: Typescript package source code
14
+ * `__test__/`: Unit tests
15
+ * `examples/`: An npm package with the examples shown in the documentation
16
+
17
+ ## Development environment
18
+
19
+ To set up your development environment, you will need to install the following:
20
+
21
+ 1. Node.js 14 or later
22
+ 2. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
23
+ 3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
24
+
25
+ Initial setup:
26
+
27
+ ```shell
28
+ npm install
29
+ ```
30
+
31
+ ### Commit Hooks
32
+
33
+ It is **highly recommended** to install the [pre-commit](https://pre-commit.com/) hooks to ensure that your
34
+ code is formatted correctly and passes basic checks before committing:
35
+
36
+ ```shell
37
+ pre-commit install
38
+ ```
39
+
40
+ ## Development
41
+
42
+ Most common development commands can be run using the npm scripts.
43
+
44
+ Build the package
45
+
46
+ ```shell
47
+ npm install
48
+ npm run build
49
+ ```
50
+
51
+ Lint:
52
+
53
+ ```shell
54
+ npm run lint
55
+ ```
56
+
57
+ Format and fix lints:
58
+
59
+ ```shell
60
+ npm run lint-fix
61
+ ```
62
+
63
+ Run tests:
64
+
65
+ ```shell
66
+ npm test
67
+ ```
68
+
69
+ To run a single test:
70
+
71
+ ```shell
72
+ # Single file: table.test.ts
73
+ npm test -- table.test.ts
74
+ # Single test: 'merge insert' in table.test.ts
75
+ npm test -- table.test.ts --testNamePattern=merge\ insert
76
+ ```
package/README.md CHANGED
@@ -36,37 +36,4 @@ The [quickstart](../basic.md) contains a more complete example.
36
36
 
37
37
  ## Development
38
38
 
39
- ```sh
40
- npm run build
41
- npm run test
42
- ```
43
-
44
- ### Running lint / format
45
-
46
- LanceDb uses [biome](https://biomejs.dev/) for linting and formatting. if you are using VSCode you will need to install the official [Biome](https://marketplace.visualstudio.com/items?itemName=biomejs.biome) extension.
47
- To manually lint your code you can run:
48
-
49
- ```sh
50
- npm run lint
51
- ```
52
-
53
- to automatically fix all fixable issues:
54
-
55
- ```sh
56
- npm run lint-fix
57
- ```
58
-
59
- If you do not have your workspace root set to the `nodejs` directory, unfortunately the extension will not work. You can still run the linting and formatting commands manually.
60
-
61
- ### Generating docs
62
-
63
- ```sh
64
- npm run docs
65
-
66
- cd ../docs
67
- # Asssume the virtual environment was created
68
- # python3 -m venv venv
69
- # pip install -r requirements.txt
70
- . ./venv/bin/activate
71
- mkdocs build
72
- ```
39
+ See [CONTRIBUTING.md](./CONTRIBUTING.md) for information on how to contribute to LanceDB.
package/dist/arrow.d.ts CHANGED
@@ -233,6 +233,16 @@ export declare function fromTableToBuffer(table: ArrowTable, embeddings?: Embedd
233
233
  * `schema` is required if the table is empty
234
234
  */
235
235
  export declare function fromDataToBuffer(data: Data, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
236
+ /**
237
+ * Read a single record batch from a buffer.
238
+ *
239
+ * Returns null if the buffer does not contain a record batch
240
+ */
241
+ export declare function fromBufferToRecordBatch(data: Buffer): Promise<RecordBatch | null>;
242
+ /**
243
+ * Create a buffer containing a single record batch
244
+ */
245
+ export declare function fromRecordBatchToBuffer(batch: RecordBatch): Promise<Buffer>;
236
246
  /**
237
247
  * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
238
248
  *
package/dist/arrow.js CHANGED
@@ -56,6 +56,8 @@ exports.fromRecordsToBuffer = fromRecordsToBuffer;
56
56
  exports.fromRecordsToStreamBuffer = fromRecordsToStreamBuffer;
57
57
  exports.fromTableToBuffer = fromTableToBuffer;
58
58
  exports.fromDataToBuffer = fromDataToBuffer;
59
+ exports.fromBufferToRecordBatch = fromBufferToRecordBatch;
60
+ exports.fromRecordBatchToBuffer = fromRecordBatchToBuffer;
59
61
  exports.fromTableToStreamBuffer = fromTableToStreamBuffer;
60
62
  exports.createEmptyTable = createEmptyTable;
61
63
  const apache_arrow_1 = require("apache-arrow");
@@ -655,6 +657,24 @@ async function fromDataToBuffer(data, embeddings, schema) {
655
657
  return fromTableToBuffer(table);
656
658
  }
657
659
  }
660
+ /**
661
+ * Read a single record batch from a buffer.
662
+ *
663
+ * Returns null if the buffer does not contain a record batch
664
+ */
665
+ async function fromBufferToRecordBatch(data) {
666
+ const iter = await apache_arrow_1.RecordBatchFileReader.readAll(Buffer.from(data)).next()
667
+ .value;
668
+ const recordBatch = iter?.next().value;
669
+ return recordBatch || null;
670
+ }
671
+ /**
672
+ * Create a buffer containing a single record batch
673
+ */
674
+ async function fromRecordBatchToBuffer(batch) {
675
+ const writer = new apache_arrow_1.RecordBatchFileWriter().writeAll([batch]);
676
+ return Buffer.from(await writer.toUint8Array());
677
+ }
658
678
  /**
659
679
  * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
660
680
  *
package/dist/index.d.ts CHANGED
@@ -7,6 +7,7 @@ export { ExecutableQuery, Query, QueryBase, VectorQuery, RecordBatchIterator, }
7
7
  export { Index, IndexOptions, IvfPqOptions } from "./indices";
8
8
  export { Table, AddDataOptions, UpdateOptions, OptimizeOptions } from "./table";
9
9
  export * as embedding from "./embedding";
10
+ export * as rerankers from "./rerankers";
10
11
  /**
11
12
  * Connect to a LanceDB instance at the given URI.
12
13
  *
package/dist/index.js CHANGED
@@ -13,7 +13,7 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = exports.WriteMode = void 0;
16
+ exports.rerankers = exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = exports.WriteMode = void 0;
17
17
  exports.connect = connect;
18
18
  const connection_1 = require("./connection");
19
19
  const native_js_1 = require("./native.js");
@@ -35,6 +35,7 @@ Object.defineProperty(exports, "Index", { enumerable: true, get: function () { r
35
35
  var table_1 = require("./table");
36
36
  Object.defineProperty(exports, "Table", { enumerable: true, get: function () { return table_1.Table; } });
37
37
  exports.embedding = require("./embedding");
38
+ exports.rerankers = require("./rerankers");
38
39
  async function connect(uriOrOptions, opts = {}) {
39
40
  let uri;
40
41
  if (typeof uriOrOptions !== "string") {
package/dist/native.d.ts CHANGED
@@ -78,6 +78,14 @@ export interface ClientConfig {
78
78
  retryConfig?: RetryConfig
79
79
  timeoutConfig?: TimeoutConfig
80
80
  }
81
+ export interface RerankerCallbacks {
82
+ rerankHybrid: (...args: any[]) => any
83
+ }
84
+ export interface RerankHybridCallbackArgs {
85
+ query: string
86
+ vecResults: Array<number>
87
+ ftsResults: Array<number>
88
+ }
81
89
  /** A description of an index currently configured on a column */
82
90
  export interface IndexConfig {
83
91
  /** The name of the index */
@@ -296,6 +304,7 @@ export class VectorQuery {
296
304
  postfilter(): void
297
305
  refineFactor(refineFactor: number): void
298
306
  nprobes(nprobe: number): void
307
+ distanceRange(lowerBound?: number | undefined | null, upperBound?: number | undefined | null): void
299
308
  ef(ef: number): void
300
309
  bypassVectorIndex(): void
301
310
  onlyIf(predicate: string): void
@@ -306,9 +315,25 @@ export class VectorQuery {
306
315
  offset(offset: number): void
307
316
  fastSearch(): void
308
317
  withRowId(): void
318
+ rerank(callbacks: RerankerCallbacks): void
309
319
  execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
310
320
  explainPlan(verbose: boolean): Promise<string>
311
321
  }
322
+ /**
323
+ * Reranker implementation that "wraps" a NodeJS Reranker implementation.
324
+ * This contains references to the callbacks that can be used to invoke the
325
+ * reranking methods on the NodeJS implementation and handles serializing the
326
+ * record batches to Arrow IPC buffers.
327
+ */
328
+ export class Reranker {
329
+ static new(callbacks: RerankerCallbacks): Reranker
330
+ }
331
+ export type RRFReranker = RrfReranker
332
+ /** Wrapper around rust RRFReranker */
333
+ export class RrfReranker {
334
+ static tryNew(k: Float32Array): Promise<RrfReranker>
335
+ rerankHybrid(query: string, vecResults: Buffer, ftsResults: Buffer): Promise<Buffer>
336
+ }
312
337
  export class Table {
313
338
  name: string
314
339
  display(): string
package/dist/native.js CHANGED
@@ -319,12 +319,14 @@ if (!nativeBinding) {
319
319
  }
320
320
  throw new Error(`Failed to load native binding`);
321
321
  }
322
- const { Connection, Index, RecordBatchIterator, NativeMergeInsertBuilder, Query, VectorQuery, Table, WriteMode } = nativeBinding;
322
+ const { Connection, Index, RecordBatchIterator, NativeMergeInsertBuilder, Query, VectorQuery, Reranker, RrfReranker, Table, WriteMode } = nativeBinding;
323
323
  module.exports.Connection = Connection;
324
324
  module.exports.Index = Index;
325
325
  module.exports.RecordBatchIterator = RecordBatchIterator;
326
326
  module.exports.NativeMergeInsertBuilder = NativeMergeInsertBuilder;
327
327
  module.exports.Query = Query;
328
328
  module.exports.VectorQuery = VectorQuery;
329
+ module.exports.Reranker = Reranker;
330
+ module.exports.RrfReranker = RrfReranker;
329
331
  module.exports.Table = Table;
330
332
  module.exports.WriteMode = WriteMode;
package/dist/query.d.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  import { Table as ArrowTable, type IntoVector, RecordBatch } from "./arrow";
2
2
  import { type IvfPqOptions } from "./indices";
3
3
  import { RecordBatchIterator as NativeBatchIterator, Query as NativeQuery, Table as NativeTable, VectorQuery as NativeVectorQuery } from "./native";
4
+ import { Reranker } from "./rerankers";
4
5
  export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
5
6
  private promisedInner?;
6
7
  private inner?;
@@ -181,6 +182,7 @@ export declare class VectorQuery extends QueryBase<NativeVectorQuery> {
181
182
  * you the desired recall.
182
183
  */
183
184
  nprobes(nprobes: number): VectorQuery;
185
+ distanceRange(lowerBound?: number, upperBound?: number): VectorQuery;
184
186
  /**
185
187
  * Set the number of candidates to consider during the search
186
188
  *
@@ -278,6 +280,7 @@ export declare class VectorQuery extends QueryBase<NativeVectorQuery> {
278
280
  */
279
281
  bypassVectorIndex(): VectorQuery;
280
282
  addQueryVector(vector: IntoVector): VectorQuery;
283
+ rerank(reranker: Reranker): VectorQuery;
281
284
  }
282
285
  /** A builder for LanceDB queries. */
283
286
  export declare class Query extends QueryBase<NativeQuery> {
package/dist/query.js CHANGED
@@ -308,6 +308,18 @@ class VectorQuery extends QueryBase {
308
308
  super.doCall((inner) => inner.nprobes(nprobes));
309
309
  return this;
310
310
  }
311
+ /*
312
+ * Set the distance range to use
313
+ *
314
+ * Only rows with distances within range [lower_bound, upper_bound)
315
+ * will be returned.
316
+ *
317
+ * `undefined` means no lower or upper bound.
318
+ */
319
+ distanceRange(lowerBound, upperBound) {
320
+ super.doCall((inner) => inner.distanceRange(lowerBound, upperBound));
321
+ return this;
322
+ }
311
323
  /**
312
324
  * Set the number of candidates to consider during the search
313
325
  *
@@ -457,6 +469,18 @@ class VectorQuery extends QueryBase {
457
469
  return this;
458
470
  }
459
471
  }
472
+ rerank(reranker) {
473
+ super.doCall((inner) => inner.rerank({
474
+ rerankHybrid: async (_, args) => {
475
+ const vecResults = await (0, arrow_1.fromBufferToRecordBatch)(args.vecResults);
476
+ const ftsResults = await (0, arrow_1.fromBufferToRecordBatch)(args.ftsResults);
477
+ const result = await reranker.rerankHybrid(args.query, vecResults, ftsResults);
478
+ const buffer = (0, arrow_1.fromRecordBatchToBuffer)(result);
479
+ return buffer;
480
+ },
481
+ }));
482
+ return this;
483
+ }
460
484
  }
461
485
  exports.VectorQuery = VectorQuery;
462
486
  /** A builder for LanceDB queries. */
@@ -0,0 +1,5 @@
1
+ import { RecordBatch } from "apache-arrow";
2
+ export * from "./rrf";
3
+ export interface Reranker {
4
+ rerankHybrid(query: string, vecResults: RecordBatch, ftsResults: RecordBatch): Promise<RecordBatch>;
5
+ }
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ // SPDX-FileCopyrightText: Copyright The LanceDB Authors
4
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
5
+ if (k2 === undefined) k2 = k;
6
+ var desc = Object.getOwnPropertyDescriptor(m, k);
7
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
8
+ desc = { enumerable: true, get: function() { return m[k]; } };
9
+ }
10
+ Object.defineProperty(o, k2, desc);
11
+ }) : (function(o, m, k, k2) {
12
+ if (k2 === undefined) k2 = k;
13
+ o[k2] = m[k];
14
+ }));
15
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
16
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
17
+ };
18
+ Object.defineProperty(exports, "__esModule", { value: true });
19
+ __exportStar(require("./rrf"), exports);
@@ -0,0 +1,13 @@
1
+ import { RecordBatch } from "apache-arrow";
2
+ import { RrfReranker as NativeRRFReranker } from "../native";
3
+ /**
4
+ * Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm.
5
+ *
6
+ * Internally this uses the Rust implementation
7
+ */
8
+ export declare class RRFReranker {
9
+ private inner;
10
+ constructor(inner: NativeRRFReranker);
11
+ static create(k?: number): Promise<RRFReranker>;
12
+ rerankHybrid(query: string, vecResults: RecordBatch, ftsResults: RecordBatch): Promise<RecordBatch>;
13
+ }
@@ -0,0 +1,27 @@
1
+ "use strict";
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ // SPDX-FileCopyrightText: Copyright The LanceDB Authors
4
+ Object.defineProperty(exports, "__esModule", { value: true });
5
+ exports.RRFReranker = void 0;
6
+ const arrow_1 = require("../arrow");
7
+ const native_1 = require("../native");
8
+ /**
9
+ * Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm.
10
+ *
11
+ * Internally this uses the Rust implementation
12
+ */
13
+ class RRFReranker {
14
+ inner;
15
+ constructor(inner) {
16
+ this.inner = inner;
17
+ }
18
+ static async create(k = 60) {
19
+ return new RRFReranker(await native_1.RrfReranker.tryNew(new Float32Array([k])));
20
+ }
21
+ async rerankHybrid(query, vecResults, ftsResults) {
22
+ const buffer = await this.inner.rerankHybrid(query, await (0, arrow_1.fromRecordBatchToBuffer)(vecResults), await (0, arrow_1.fromRecordBatchToBuffer)(ftsResults));
23
+ const recordBatch = await (0, arrow_1.fromBufferToRecordBatch)(buffer);
24
+ return recordBatch;
25
+ }
26
+ }
27
+ exports.RRFReranker = RRFReranker;
package/package.json CHANGED
@@ -11,7 +11,7 @@
11
11
  "ann"
12
12
  ],
13
13
  "private": false,
14
- "version": "0.14.1",
14
+ "version": "0.15.0-beta.0",
15
15
  "main": "dist/index.js",
16
16
  "exports": {
17
17
  ".": "./dist/index.js",
@@ -98,14 +98,14 @@
98
98
  "reflect-metadata": "^0.2.2"
99
99
  },
100
100
  "optionalDependencies": {
101
- "@lancedb/lancedb-darwin-x64": "0.14.1",
102
- "@lancedb/lancedb-darwin-arm64": "0.14.1",
103
- "@lancedb/lancedb-linux-x64-gnu": "0.14.1",
104
- "@lancedb/lancedb-linux-arm64-gnu": "0.14.1",
105
- "@lancedb/lancedb-linux-x64-musl": "0.14.1",
106
- "@lancedb/lancedb-linux-arm64-musl": "0.14.1",
107
- "@lancedb/lancedb-win32-x64-msvc": "0.14.1",
108
- "@lancedb/lancedb-win32-arm64-msvc": "0.14.1"
101
+ "@lancedb/lancedb-darwin-x64": "0.15.0-beta.0",
102
+ "@lancedb/lancedb-darwin-arm64": "0.15.0-beta.0",
103
+ "@lancedb/lancedb-linux-x64-gnu": "0.15.0-beta.0",
104
+ "@lancedb/lancedb-linux-arm64-gnu": "0.15.0-beta.0",
105
+ "@lancedb/lancedb-linux-x64-musl": "0.15.0-beta.0",
106
+ "@lancedb/lancedb-linux-arm64-musl": "0.15.0-beta.0",
107
+ "@lancedb/lancedb-win32-x64-msvc": "0.15.0-beta.0",
108
+ "@lancedb/lancedb-win32-arm64-msvc": "0.15.0-beta.0"
109
109
  },
110
110
  "peerDependencies": {
111
111
  "apache-arrow": ">=15.0.0 <=18.1.0"