@dengxifeng/lancedb 0.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/AGENTS.md +13 -0
  2. package/CONTRIBUTING.md +76 -0
  3. package/README.md +37 -0
  4. package/dist/arrow.d.ts +279 -0
  5. package/dist/arrow.js +1316 -0
  6. package/dist/connection.d.ts +259 -0
  7. package/dist/connection.js +224 -0
  8. package/dist/embedding/embedding_function.d.ts +103 -0
  9. package/dist/embedding/embedding_function.js +192 -0
  10. package/dist/embedding/index.d.ts +27 -0
  11. package/dist/embedding/index.js +101 -0
  12. package/dist/embedding/openai.d.ts +16 -0
  13. package/dist/embedding/openai.js +93 -0
  14. package/dist/embedding/registry.d.ts +74 -0
  15. package/dist/embedding/registry.js +165 -0
  16. package/dist/embedding/transformers.d.ts +36 -0
  17. package/dist/embedding/transformers.js +122 -0
  18. package/dist/header.d.ts +162 -0
  19. package/dist/header.js +217 -0
  20. package/dist/index.d.ts +85 -0
  21. package/dist/index.js +106 -0
  22. package/dist/indices.d.ts +692 -0
  23. package/dist/indices.js +156 -0
  24. package/dist/merge.d.ts +80 -0
  25. package/dist/merge.js +92 -0
  26. package/dist/native.d.ts +585 -0
  27. package/dist/native.js +339 -0
  28. package/dist/permutation.d.ts +143 -0
  29. package/dist/permutation.js +184 -0
  30. package/dist/query.d.ts +581 -0
  31. package/dist/query.js +853 -0
  32. package/dist/rerankers/index.d.ts +5 -0
  33. package/dist/rerankers/index.js +19 -0
  34. package/dist/rerankers/rrf.d.ts +14 -0
  35. package/dist/rerankers/rrf.js +28 -0
  36. package/dist/sanitize.d.ts +32 -0
  37. package/dist/sanitize.js +473 -0
  38. package/dist/table.d.ts +581 -0
  39. package/dist/table.js +321 -0
  40. package/dist/util.d.ts +14 -0
  41. package/dist/util.js +77 -0
  42. package/license_header.txt +2 -0
  43. package/package.json +122 -0
package/AGENTS.md ADDED
@@ -0,0 +1,13 @@
1
+ These are the typescript bindings of LanceDB.
2
+ The core Rust library is in the `../rust/lancedb` directory, the rust binding
3
+ code is in the `src/` directory and the typescript bindings are in
4
+ the `lancedb/` directory.
5
+
6
+ Whenever you change the Rust code, you will need to recompile: `npm run build`.
7
+
8
+ Common commands:
9
+ * Build: `npm run build`
10
+ * Lint: `npm run lint`
11
+ * Fix lints: `npm run lint-fix`
12
+ * Test: `npm test`
13
+ * Run single test file: `npm test __test__/arrow.test.ts`
@@ -0,0 +1,76 @@
1
+ # Contributing to LanceDB Typescript
2
+
3
+ This document outlines the process for contributing to LanceDB Typescript.
4
+ For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
5
+
6
+ ## Project layout
7
+
8
+ The Typescript package is a wrapper around the Rust library, `lancedb`. We use
9
+ the [napi-rs](https://napi.rs/) library to create the bindings between Rust and
10
+ Typescript.
11
+
12
+ * `src/`: Rust bindings source code
13
+ * `lancedb/`: Typescript package source code
14
+ * `__test__/`: Unit tests
15
+ * `examples/`: An npm package with the examples shown in the documentation
16
+
17
+ ## Development environment
18
+
19
+ To set up your development environment, you will need to install the following:
20
+
21
+ 1. Node.js 14 or later
22
+ 2. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
23
+ 3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
24
+
25
+ Initial setup:
26
+
27
+ ```shell
28
+ npm install
29
+ ```
30
+
31
+ ### Commit Hooks
32
+
33
+ It is **highly recommended** to install the [pre-commit](https://pre-commit.com/) hooks to ensure that your
34
+ code is formatted correctly and passes basic checks before committing:
35
+
36
+ ```shell
37
+ pre-commit install
38
+ ```
39
+
40
+ ## Development
41
+
42
+ Most common development commands can be run using the npm scripts.
43
+
44
+ Build the package
45
+
46
+ ```shell
47
+ npm install
48
+ npm run build
49
+ ```
50
+
51
+ Lint:
52
+
53
+ ```shell
54
+ npm run lint
55
+ ```
56
+
57
+ Format and fix lints:
58
+
59
+ ```shell
60
+ npm run lint-fix
61
+ ```
62
+
63
+ Run tests:
64
+
65
+ ```shell
66
+ npm test
67
+ ```
68
+
69
+ To run a single test:
70
+
71
+ ```shell
72
+ # Single file: table.test.ts
73
+ npm test -- table.test.ts
74
+ # Single test: 'merge insert' in table.test.ts
75
+ npm test -- table.test.ts --testNamePattern=merge\ insert
76
+ ```
package/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # LanceDB JavaScript SDK
2
+
3
+ A JavaScript library for [LanceDB](https://github.com/lancedb/lancedb).
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @lancedb/lancedb
9
+ ```
10
+
11
+ This will download the appropriate native library for your platform. We currently
12
+ support:
13
+
14
+ - Linux (x86_64 and aarch64 on glibc and musl)
15
+ - MacOS (Intel and ARM/M1/M2)
16
+ - Windows (x86_64 and aarch64)
17
+
18
+ ## Usage
19
+
20
+ ### Basic Example
21
+
22
+ ```javascript
23
+ import * as lancedb from "@lancedb/lancedb";
24
+ const db = await lancedb.connect("data/sample-lancedb");
25
+ const table = await db.createTable("my_table", [
26
+ { id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
27
+ { id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 },
28
+ ]);
29
+ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
30
+ console.log(results);
31
+ ```
32
+
33
+ The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
34
+
35
+ ## Development
36
+
37
+ See [CONTRIBUTING.md](./CONTRIBUTING.md) for information on how to contribute to LanceDB.
@@ -0,0 +1,279 @@
1
+ import { Table as ArrowTable, Binary, BufferType, DataType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
2
+ import { Buffers } from "apache-arrow/data";
3
+ import { type EmbeddingFunction } from "./embedding/embedding_function";
4
+ import { EmbeddingFunctionConfig } from "./embedding/registry";
5
+ export * from "apache-arrow";
6
+ export type SchemaLike = Schema | {
7
+ fields: FieldLike[];
8
+ metadata: Map<string, string>;
9
+ get names(): unknown[];
10
+ };
11
+ export type FieldLike = Field | {
12
+ type: string;
13
+ name: string;
14
+ nullable: boolean;
15
+ metadata?: Map<string, string>;
16
+ };
17
+ export type DataLike = import("apache-arrow").Data<Struct<any>> | {
18
+ type: any;
19
+ length: number;
20
+ offset: number;
21
+ stride: number;
22
+ nullable: boolean;
23
+ children: DataLike[];
24
+ get nullCount(): number;
25
+ values: Buffers<any>[BufferType.DATA];
26
+ typeIds: Buffers<any>[BufferType.TYPE];
27
+ nullBitmap: Buffers<any>[BufferType.VALIDITY];
28
+ valueOffsets: Buffers<any>[BufferType.OFFSET];
29
+ };
30
+ export type RecordBatchLike = RecordBatch | {
31
+ schema: SchemaLike;
32
+ data: DataLike;
33
+ };
34
+ export type TableLike = ArrowTable | {
35
+ schema: SchemaLike;
36
+ batches: RecordBatchLike[];
37
+ };
38
+ export type IntoVector = Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
39
+ export type MultiVector = IntoVector[];
40
+ export declare function isMultiVector(value: unknown): value is MultiVector;
41
+ export declare function isIntoVector(value: unknown): value is IntoVector;
42
+ export declare function isArrowTable(value: object): value is TableLike;
43
+ export declare function isNull(value: unknown): value is Null;
44
+ export declare function isInt(value: unknown): value is Int;
45
+ export declare function isFloat(value: unknown): value is Float;
46
+ export declare function isBinary(value: unknown): value is Binary;
47
+ export declare function isLargeBinary(value: unknown): value is LargeBinary;
48
+ export declare function isUtf8(value: unknown): value is Utf8;
49
+ export declare function isLargeUtf8(value: unknown): value is Utf8;
50
+ export declare function isBool(value: unknown): value is Utf8;
51
+ export declare function isDecimal(value: unknown): value is Utf8;
52
+ export declare function isDate(value: unknown): value is Utf8;
53
+ export declare function isTime(value: unknown): value is Utf8;
54
+ export declare function isTimestamp(value: unknown): value is Utf8;
55
+ export declare function isInterval(value: unknown): value is Utf8;
56
+ export declare function isDuration(value: unknown): value is Utf8;
57
+ export declare function isList(value: unknown): value is List;
58
+ export declare function isStruct(value: unknown): value is Struct;
59
+ export declare function isUnion(value: unknown): value is Struct;
60
+ export declare function isFixedSizeBinary(value: unknown): value is FixedSizeBinary;
61
+ export declare function isFixedSizeList(value: unknown): value is FixedSizeList;
62
+ /** Data type accepted by NodeJS SDK */
63
+ export type Data = Record<string, unknown>[] | TableLike;
64
+ export declare class VectorColumnOptions {
65
+ /** Vector column type. */
66
+ type: Float;
67
+ constructor(values?: Partial<VectorColumnOptions>);
68
+ }
69
+ /** Options to control the makeArrowTable call. */
70
+ export declare class MakeArrowTableOptions {
71
+ schema?: SchemaLike;
72
+ vectorColumns: Record<string, VectorColumnOptions>;
73
+ embeddings?: EmbeddingFunction<unknown>;
74
+ embeddingFunction?: EmbeddingFunctionConfig;
75
+ /**
76
+ * If true then string columns will be encoded with dictionary encoding
77
+ *
78
+ * Set this to true if your string columns tend to repeat the same values
79
+ * often. For more precise control use the `schema` property to specify the
80
+ * data type for individual columns.
81
+ *
82
+ * If `schema` is provided then this property is ignored.
83
+ */
84
+ dictionaryEncodeStrings: boolean;
85
+ constructor(values?: Partial<MakeArrowTableOptions>);
86
+ }
87
+ /**
88
+ * An enhanced version of the apache-arrow makeTable function from Apache Arrow
89
+ * that supports nested fields and embeddings columns.
90
+ *
91
+ * (typically you do not need to call this function. It will be called automatically
92
+ * when creating a table or adding data to it)
93
+ *
94
+ * This function converts an array of Record<String, any> (row-major JS objects)
95
+ * to an Arrow Table (a columnar structure)
96
+ *
97
+ * If a schema is provided then it will be used to determine the resulting array
98
+ * types. Fields will also be reordered to fit the order defined by the schema.
99
+ *
100
+ * If a schema is not provided then the types will be inferred and the field order
101
+ * will be controlled by the order of properties in the first record. If a type
102
+ * is inferred it will always be nullable.
103
+ *
104
+ * If not all fields are found in the data, then a subset of the schema will be
105
+ * returned.
106
+ *
107
+ * If the input is empty then a schema must be provided to create an empty table.
108
+ *
109
+ * When a schema is not specified then data types will be inferred. The inference
110
+ * rules are as follows:
111
+ *
112
+ * - boolean => Bool
113
+ * - number => Float64
114
+ * - bigint => Int64
115
+ * - String => Utf8
116
+ * - Buffer => Binary
117
+ * - Record<String, any> => Struct
118
+ * - Array<any> => List
119
+ * @example
120
+ * ```ts
121
+ * import { fromTableToBuffer, makeArrowTable } from "../arrow";
122
+ * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
123
+ *
124
+ * const schema = new Schema([
125
+ * new Field("a", new Int32()),
126
+ * new Field("b", new Float32()),
127
+ * new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
128
+ * ]);
129
+ * const table = makeArrowTable([
130
+ * { a: 1, b: 2, c: [1, 2, 3] },
131
+ * { a: 4, b: 5, c: [4, 5, 6] },
132
+ * { a: 7, b: 8, c: [7, 8, 9] },
133
+ * ], { schema });
134
+ * ```
135
+ *
136
+ * By default it assumes that the column named `vector` is a vector column
137
+ * and it will be converted into a fixed size list array of type float32.
138
+ * The `vectorColumns` option can be used to support other vector column
139
+ * names and data types.
140
+ *
141
+ * ```ts
142
+ * const schema = new Schema([
143
+ * new Field("a", new Float64()),
144
+ * new Field("b", new Float64()),
145
+ * new Field(
146
+ * "vector",
147
+ * new FixedSizeList(3, new Field("item", new Float32()))
148
+ * ),
149
+ * ]);
150
+ * const table = makeArrowTable([
151
+ * { a: 1, b: 2, vector: [1, 2, 3] },
152
+ * { a: 4, b: 5, vector: [4, 5, 6] },
153
+ * { a: 7, b: 8, vector: [7, 8, 9] },
154
+ * ]);
155
+ * assert.deepEqual(table.schema, schema);
156
+ * ```
157
+ *
158
+ * You can specify the vector column types and names using the options as well
159
+ *
160
+ * ```ts
161
+ * const schema = new Schema([
162
+ * new Field('a', new Float64()),
163
+ * new Field('b', new Float64()),
164
+ * new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
165
+ * new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
166
+ * ]);
167
+ * const table = makeArrowTable([
168
+ * { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
169
+ * { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
170
+ * { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
171
+ * ], {
172
+ * vectorColumns: {
173
+ * vec1: { type: new Float16() },
174
+ * vec2: { type: new Float16() }
175
+ * }
176
+ * }
177
+ * assert.deepEqual(table.schema, schema)
178
+ * ```
179
+ */
180
+ export declare function makeArrowTable(data: Array<Record<string, unknown>>, options?: Partial<MakeArrowTableOptions>, metadata?: Map<string, string>): ArrowTable;
181
+ /**
182
+ * Create an empty Arrow table with the provided schema
183
+ */
184
+ export declare function makeEmptyTable(schema: SchemaLike, metadata?: Map<string, string>): ArrowTable;
185
+ /**
186
+ * Convert an Array of records into an Arrow Table, optionally applying an
187
+ * embeddings function to it.
188
+ *
189
+ * This function calls `makeArrowTable` first to create the Arrow Table.
190
+ * Any provided `makeTableOptions` (e.g. a schema) will be passed on to
191
+ * that call.
192
+ *
193
+ * The embedding function will be passed a column of values (based on the
194
+ * `sourceColumn` of the embedding function) and expects to receive back
195
+ * number[][] which will be converted into a fixed size list column. By
196
+ * default this will be a fixed size list of Float32 but that can be
197
+ * customized by the `embeddingDataType` property of the embedding function.
198
+ *
199
+ * If a schema is provided in `makeTableOptions` then it should include the
200
+ * embedding columns. If no schema is provded then embedding columns will
201
+ * be placed at the end of the table, after all of the input columns.
202
+ */
203
+ export declare function convertToTable(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, makeTableOptions?: Partial<MakeArrowTableOptions>): Promise<ArrowTable>;
204
+ /** Creates the Arrow Type for a Vector column with dimension `dim` */
205
+ export declare function newVectorType<T extends Float>(dim: number, innerType: unknown): FixedSizeList<T>;
206
+ /**
207
+ * Serialize an Array of records into a buffer using the Arrow IPC File serialization
208
+ *
209
+ * This function will call `convertToTable` and pass on `embeddings` and `schema`
210
+ *
211
+ * `schema` is required if data is empty
212
+ */
213
+ export declare function fromRecordsToBuffer(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
214
+ /**
215
+ * Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
216
+ *
217
+ * This function will call `convertToTable` and pass on `embeddings` and `schema`
218
+ *
219
+ * `schema` is required if data is empty
220
+ */
221
+ export declare function fromRecordsToStreamBuffer(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
222
+ /**
223
+ * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
224
+ *
225
+ * This function will apply `embeddings` to the table in a manner similar to
226
+ * `convertToTable`.
227
+ *
228
+ * `schema` is required if the table is empty
229
+ */
230
+ export declare function fromTableToBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: SchemaLike): Promise<Buffer>;
231
+ /**
232
+ * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
233
+ *
234
+ * This function will apply `embeddings` to the table in a manner similar to
235
+ * `convertToTable`.
236
+ *
237
+ * `schema` is required if the table is empty
238
+ */
239
+ export declare function fromDataToBuffer(data: Data, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
240
+ /**
241
+ * Read a single record batch from a buffer.
242
+ *
243
+ * Returns null if the buffer does not contain a record batch
244
+ */
245
+ export declare function fromBufferToRecordBatch(data: Buffer): Promise<RecordBatch | null>;
246
+ /**
247
+ * Create a buffer containing a single record batch
248
+ */
249
+ export declare function fromRecordBatchToBuffer(batch: RecordBatch): Promise<Buffer>;
250
+ /**
251
+ * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
252
+ *
253
+ * This function will apply `embeddings` to the table in a manner similar to
254
+ * `convertToTable`.
255
+ *
256
+ * `schema` is required if the table is empty
257
+ */
258
+ export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: SchemaLike): Promise<Buffer>;
259
+ /**
260
+ * Create an empty table with the given schema
261
+ */
262
+ export declare function createEmptyTable(schema: Schema): ArrowTable;
263
+ /**
264
+ * Ensures that all nested fields defined in the schema exist in the data,
265
+ * filling missing fields with null values.
266
+ */
267
+ export declare function ensureNestedFieldsExist(data: Array<Record<string, unknown>>, schema: Schema): Array<Record<string, unknown>>;
268
+ interface JsonDataType {
269
+ type: string;
270
+ fields?: JsonField[];
271
+ length?: number;
272
+ }
273
+ interface JsonField {
274
+ name: string;
275
+ type: JsonDataType;
276
+ nullable: boolean;
277
+ metadata: Map<string, string>;
278
+ }
279
+ export declare function dataTypeToJson(dataType: DataType): JsonDataType;