@lancedb/lancedb 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/biome.json +8 -2
  2. package/dist/arrow.d.ts +36 -9
  3. package/dist/arrow.js +222 -24
  4. package/dist/connection.d.ts +10 -1
  5. package/dist/connection.js +13 -7
  6. package/dist/embedding/embedding_function.d.ts +54 -28
  7. package/dist/embedding/embedding_function.js +89 -10
  8. package/dist/embedding/index.d.ts +28 -2
  9. package/dist/embedding/index.js +111 -4
  10. package/dist/embedding/openai.d.ts +16 -7
  11. package/dist/embedding/openai.js +62 -12
  12. package/dist/embedding/registry.d.ts +58 -0
  13. package/dist/embedding/registry.js +127 -0
  14. package/dist/native.d.ts +5 -4
  15. package/dist/query.d.ts +19 -7
  16. package/dist/query.js +27 -13
  17. package/dist/sanitize.d.ts +22 -1
  18. package/dist/sanitize.js +123 -110
  19. package/dist/table.d.ts +18 -3
  20. package/dist/table.js +33 -3
  21. package/lancedb/arrow.ts +243 -41
  22. package/lancedb/connection.ts +35 -6
  23. package/lancedb/embedding/embedding_function.ts +147 -42
  24. package/lancedb/embedding/index.ts +113 -2
  25. package/lancedb/embedding/openai.ts +62 -16
  26. package/lancedb/embedding/registry.ts +176 -0
  27. package/lancedb/query.ts +58 -14
  28. package/lancedb/sanitize.ts +22 -22
  29. package/lancedb/table.ts +67 -5
  30. package/nodejs-artifacts/arrow.d.ts +36 -9
  31. package/nodejs-artifacts/arrow.js +222 -24
  32. package/nodejs-artifacts/connection.d.ts +10 -1
  33. package/nodejs-artifacts/connection.js +13 -7
  34. package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
  35. package/nodejs-artifacts/embedding/embedding_function.js +89 -10
  36. package/nodejs-artifacts/embedding/index.d.ts +28 -2
  37. package/nodejs-artifacts/embedding/index.js +111 -4
  38. package/nodejs-artifacts/embedding/openai.d.ts +16 -7
  39. package/nodejs-artifacts/embedding/openai.js +62 -12
  40. package/nodejs-artifacts/embedding/registry.d.ts +58 -0
  41. package/nodejs-artifacts/embedding/registry.js +127 -0
  42. package/nodejs-artifacts/native.d.ts +5 -4
  43. package/nodejs-artifacts/query.d.ts +19 -7
  44. package/nodejs-artifacts/query.js +27 -13
  45. package/nodejs-artifacts/sanitize.d.ts +22 -1
  46. package/nodejs-artifacts/sanitize.js +123 -110
  47. package/nodejs-artifacts/table.d.ts +18 -3
  48. package/nodejs-artifacts/table.js +33 -3
  49. package/package.json +14 -9
  50. package/tsconfig.json +3 -1
@@ -1,5 +1,5 @@
1
1
  "use strict";
2
- // Copyright 2023 Lance Developers.
2
+ // Copyright 2024 Lance Developers.
3
3
  //
4
4
  // Licensed under the Apache License, Version 2.0 (the "License");
5
5
  // you may not use this file except in compliance with the License.
@@ -13,15 +13,94 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.isEmbeddingFunction = void 0;
17
- /** Test if the input seems to be an embedding function */
18
- function isEmbeddingFunction(value) {
19
- if (typeof value !== "object" || value === null) {
20
- return false;
16
+ exports.EmbeddingFunction = void 0;
17
+ require("reflect-metadata");
18
+ const arrow_1 = require("../arrow");
19
+ const sanitize_1 = require("../sanitize");
20
+ /**
21
+ * An embedding function that automatically creates vector representation for a given column.
22
+ */
23
+ class EmbeddingFunction {
24
+ /**
25
+ * sourceField is used in combination with `LanceSchema` to provide a declarative data model
26
+ *
27
+ * @param optionsOrDatatype - The options for the field or the datatype
28
+ *
29
+ * @see {@link lancedb.LanceSchema}
30
+ */
31
+ sourceField(optionsOrDatatype) {
32
+ let datatype = (0, arrow_1.isDataType)(optionsOrDatatype)
33
+ ? optionsOrDatatype
34
+ : optionsOrDatatype?.datatype;
35
+ if (!datatype) {
36
+ throw new Error("Datatype is required");
37
+ }
38
+ datatype = (0, sanitize_1.sanitizeType)(datatype);
39
+ const metadata = new Map();
40
+ metadata.set("source_column_for", this);
41
+ return [datatype, metadata];
21
42
  }
22
- if (!("sourceColumn" in value) || !("embed" in value)) {
23
- return false;
43
+ /**
44
+ * vectorField is used in combination with `LanceSchema` to provide a declarative data model
45
+ *
46
+ * @param options - The options for the field
47
+ *
48
+ * @see {@link lancedb.LanceSchema}
49
+ */
50
+ vectorField(optionsOrDatatype) {
51
+ let dtype;
52
+ let vectorType;
53
+ let dims = this.ndims();
54
+ // `func.vectorField(new Float32())`
55
+ if ((0, arrow_1.isDataType)(optionsOrDatatype)) {
56
+ dtype = optionsOrDatatype;
57
+ }
58
+ else {
59
+ // `func.vectorField({
60
+ // datatype: new Float32(),
61
+ // dims: 10
62
+ // })`
63
+ dims = dims ?? optionsOrDatatype?.dims;
64
+ dtype = optionsOrDatatype?.datatype;
65
+ }
66
+ if (dtype !== undefined) {
67
+ // `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
68
+ // or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
69
+ if ((0, arrow_1.isFixedSizeList)(dtype)) {
70
+ vectorType = dtype;
71
+ // `func.vectorField(new Float32())`
72
+ // or `func.vectorField({datatype: new Float32()})`
73
+ }
74
+ else if ((0, arrow_1.isFloat)(dtype)) {
75
+ // No `ndims` impl and no `{dims: n}` provided;
76
+ if (dims === undefined) {
77
+ throw new Error("ndims is required for vector field");
78
+ }
79
+ vectorType = (0, arrow_1.newVectorType)(dims, dtype);
80
+ }
81
+ else {
82
+ throw new Error("Expected FixedSizeList or Float as datatype for vector field");
83
+ }
84
+ }
85
+ else {
86
+ if (dims === undefined) {
87
+ throw new Error("ndims is required for vector field");
88
+ }
89
+ vectorType = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
90
+ }
91
+ const metadata = new Map();
92
+ metadata.set("vector_column_for", this);
93
+ return [vectorType, metadata];
94
+ }
95
+ /** The number of dimensions of the embeddings */
96
+ ndims() {
97
+ return undefined;
98
+ }
99
+ /**
100
+ Compute the embeddings for a single query
101
+ */
102
+ async computeQueryEmbeddings(data) {
103
+ return this.computeSourceEmbeddings([data]).then((embeddings) => embeddings[0]);
24
104
  }
25
- return (typeof value.sourceColumn === "string" && typeof value.embed === "function");
26
105
  }
27
- exports.isEmbeddingFunction = isEmbeddingFunction;
106
+ exports.EmbeddingFunction = EmbeddingFunction;
@@ -1,2 +1,28 @@
1
- export { EmbeddingFunction, isEmbeddingFunction } from "./embedding_function";
2
- export { OpenAIEmbeddingFunction } from "./openai";
1
+ import { Schema } from "../arrow";
2
+ import { EmbeddingFunction } from "./embedding_function";
3
+ export { EmbeddingFunction } from "./embedding_function";
4
+ export * from "./openai";
5
+ export * from "./registry";
6
+ /**
7
+ * Create a schema with embedding functions.
8
+ *
9
+ * @param fields
10
+ * @returns Schema
11
+ * @example
12
+ * ```ts
13
+ * class MyEmbeddingFunction extends EmbeddingFunction {
14
+ * // ...
15
+ * }
16
+ * const func = new MyEmbeddingFunction();
17
+ * const schema = LanceSchema({
18
+ * id: new Int32(),
19
+ * text: func.sourceField(new Utf8()),
20
+ * vector: func.vectorField(),
21
+ * // optional: specify the datatype and/or dimensions
22
+ * vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
23
+ * });
24
+ *
25
+ * const table = await db.createTable("my_table", data, { schema });
26
+ * ```
27
+ */
28
+ export declare function LanceSchema(fields: Record<string, [object, Map<string, EmbeddingFunction>] | object>): Schema;
@@ -1,7 +1,114 @@
1
1
  "use strict";
2
+ // Copyright 2023 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ var desc = Object.getOwnPropertyDescriptor(m, k);
18
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
19
+ desc = { enumerable: true, get: function() { return m[k]; } };
20
+ }
21
+ Object.defineProperty(o, k2, desc);
22
+ }) : (function(o, m, k, k2) {
23
+ if (k2 === undefined) k2 = k;
24
+ o[k2] = m[k];
25
+ }));
26
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
27
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
28
+ };
2
29
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.OpenAIEmbeddingFunction = exports.isEmbeddingFunction = void 0;
30
+ exports.LanceSchema = exports.EmbeddingFunction = void 0;
31
+ const arrow_1 = require("../arrow");
32
+ const arrow_2 = require("../arrow");
33
+ const sanitize_1 = require("../sanitize");
34
+ const registry_1 = require("./registry");
4
35
  var embedding_function_1 = require("./embedding_function");
5
- Object.defineProperty(exports, "isEmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.isEmbeddingFunction; } });
6
- var openai_1 = require("./openai");
7
- Object.defineProperty(exports, "OpenAIEmbeddingFunction", { enumerable: true, get: function () { return openai_1.OpenAIEmbeddingFunction; } });
36
+ Object.defineProperty(exports, "EmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.EmbeddingFunction; } });
37
+ // We need to explicitly export '*' so that the `register` decorator actually registers the class.
38
+ __exportStar(require("./openai"), exports);
39
+ __exportStar(require("./registry"), exports);
40
+ /**
41
+ * Create a schema with embedding functions.
42
+ *
43
+ * @param fields
44
+ * @returns Schema
45
+ * @example
46
+ * ```ts
47
+ * class MyEmbeddingFunction extends EmbeddingFunction {
48
+ * // ...
49
+ * }
50
+ * const func = new MyEmbeddingFunction();
51
+ * const schema = LanceSchema({
52
+ * id: new Int32(),
53
+ * text: func.sourceField(new Utf8()),
54
+ * vector: func.vectorField(),
55
+ * // optional: specify the datatype and/or dimensions
56
+ * vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
57
+ * });
58
+ *
59
+ * const table = await db.createTable("my_table", data, { schema });
60
+ * ```
61
+ */
62
+ function LanceSchema(fields) {
63
+ const arrowFields = [];
64
+ const embeddingFunctions = new Map();
65
+ Object.entries(fields).forEach(([key, value]) => {
66
+ if ((0, arrow_2.isDataType)(value)) {
67
+ arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
68
+ }
69
+ else {
70
+ const [dtype, metadata] = value;
71
+ arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(dtype), true));
72
+ parseEmbeddingFunctions(embeddingFunctions, key, metadata);
73
+ }
74
+ });
75
+ const registry = (0, registry_1.getRegistry)();
76
+ const metadata = registry.getTableMetadata(Array.from(embeddingFunctions.values()));
77
+ const schema = new arrow_1.Schema(arrowFields, metadata);
78
+ return schema;
79
+ }
80
+ exports.LanceSchema = LanceSchema;
81
+ function parseEmbeddingFunctions(embeddingFunctions, key, metadata) {
82
+ if (metadata.has("source_column_for")) {
83
+ const embedFunction = metadata.get("source_column_for");
84
+ const current = embeddingFunctions.get(embedFunction);
85
+ if (current !== undefined) {
86
+ embeddingFunctions.set(embedFunction, {
87
+ ...current,
88
+ sourceColumn: key,
89
+ });
90
+ }
91
+ else {
92
+ embeddingFunctions.set(embedFunction, {
93
+ sourceColumn: key,
94
+ function: embedFunction,
95
+ });
96
+ }
97
+ }
98
+ else if (metadata.has("vector_column_for")) {
99
+ const embedFunction = metadata.get("vector_column_for");
100
+ const current = embeddingFunctions.get(embedFunction);
101
+ if (current !== undefined) {
102
+ embeddingFunctions.set(embedFunction, {
103
+ ...current,
104
+ vectorColumn: key,
105
+ });
106
+ }
107
+ else {
108
+ embeddingFunctions.set(embedFunction, {
109
+ vectorColumn: key,
110
+ function: embedFunction,
111
+ });
112
+ }
113
+ }
114
+ }
@@ -1,8 +1,17 @@
1
- import { type EmbeddingFunction } from "./embedding_function";
2
- export declare class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
3
- private readonly _openai;
4
- private readonly _modelName;
5
- constructor(sourceColumn: string, openAIKey: string, modelName?: string);
6
- embed(data: string[]): Promise<number[][]>;
7
- sourceColumn: string;
1
+ import { Float } from "../arrow";
2
+ import { EmbeddingFunction } from "./embedding_function";
3
+ export type OpenAIOptions = {
4
+ apiKey?: string;
5
+ model?: string;
6
+ };
7
+ export declare class OpenAIEmbeddingFunction extends EmbeddingFunction<string, OpenAIOptions> {
8
+ #private;
9
+ constructor(options?: OpenAIOptions);
10
+ toJSON(): {
11
+ model: string;
12
+ };
13
+ ndims(): number;
14
+ embeddingDataType(): Float;
15
+ computeSourceEmbeddings(data: string[]): Promise<number[][]>;
16
+ computeQueryEmbeddings(data: string): Promise<number[]>;
8
17
  }
@@ -12,12 +12,30 @@
12
12
  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
16
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
17
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
18
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
19
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
20
+ };
21
+ var __metadata = (this && this.__metadata) || function (k, v) {
22
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
23
+ };
15
24
  Object.defineProperty(exports, "__esModule", { value: true });
16
25
  exports.OpenAIEmbeddingFunction = void 0;
17
- class OpenAIEmbeddingFunction {
18
- _openai;
19
- _modelName;
20
- constructor(sourceColumn, openAIKey, modelName = "text-embedding-ada-002") {
26
+ const arrow_1 = require("../arrow");
27
+ const embedding_function_1 = require("./embedding_function");
28
+ const registry_1 = require("./registry");
29
+ let OpenAIEmbeddingFunction = class OpenAIEmbeddingFunction extends embedding_function_1.EmbeddingFunction {
30
+ #openai;
31
+ #modelName;
32
+ constructor(options = { model: "text-embedding-ada-002" }) {
33
+ super();
34
+ const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
35
+ if (!openAIKey) {
36
+ throw new Error("OpenAI API key is required");
37
+ }
38
+ const modelName = options?.model ?? "text-embedding-ada-002";
21
39
  /**
22
40
  * @type {import("openai").default}
23
41
  */
@@ -30,16 +48,35 @@ class OpenAIEmbeddingFunction {
30
48
  catch {
31
49
  throw new Error("please install openai@^4.24.1 using npm install openai");
32
50
  }
33
- this.sourceColumn = sourceColumn;
34
51
  const configuration = {
35
52
  apiKey: openAIKey,
36
53
  };
37
- this._openai = new Openai(configuration);
38
- this._modelName = modelName;
54
+ this.#openai = new Openai(configuration);
55
+ this.#modelName = modelName;
56
+ }
57
+ toJSON() {
58
+ return {
59
+ model: this.#modelName,
60
+ };
61
+ }
62
+ ndims() {
63
+ switch (this.#modelName) {
64
+ case "text-embedding-ada-002":
65
+ return 1536;
66
+ case "text-embedding-3-large":
67
+ return 3072;
68
+ case "text-embedding-3-small":
69
+ return 1536;
70
+ default:
71
+ return null;
72
+ }
39
73
  }
40
- async embed(data) {
41
- const response = await this._openai.embeddings.create({
42
- model: this._modelName,
74
+ embeddingDataType() {
75
+ return new arrow_1.Float32();
76
+ }
77
+ async computeSourceEmbeddings(data) {
78
+ const response = await this.#openai.embeddings.create({
79
+ model: this.#modelName,
43
80
  input: data,
44
81
  });
45
82
  const embeddings = [];
@@ -48,6 +85,19 @@ class OpenAIEmbeddingFunction {
48
85
  }
49
86
  return embeddings;
50
87
  }
51
- sourceColumn;
52
- }
88
+ async computeQueryEmbeddings(data) {
89
+ if (typeof data !== "string") {
90
+ throw new Error("Data must be a string");
91
+ }
92
+ const response = await this.#openai.embeddings.create({
93
+ model: this.#modelName,
94
+ input: data,
95
+ });
96
+ return response.data[0].embedding;
97
+ }
98
+ };
53
99
  exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction;
100
+ exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction = __decorate([
101
+ (0, registry_1.register)("openai"),
102
+ __metadata("design:paramtypes", [Object])
103
+ ], OpenAIEmbeddingFunction);
@@ -0,0 +1,58 @@
1
+ import type { EmbeddingFunction } from "./embedding_function";
2
+ import "reflect-metadata";
3
+ export interface EmbeddingFunctionOptions {
4
+ [key: string]: unknown;
5
+ }
6
+ export interface EmbeddingFunctionFactory<T extends EmbeddingFunction = EmbeddingFunction> {
7
+ new (modelOptions?: EmbeddingFunctionOptions): T;
8
+ }
9
+ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
10
+ create(options?: EmbeddingFunctionOptions): T;
11
+ }
12
+ /**
13
+ * This is a singleton class used to register embedding functions
14
+ * and fetch them by name. It also handles serializing and deserializing.
15
+ * You can implement your own embedding function by subclassing EmbeddingFunction
16
+ * or TextEmbeddingFunction and registering it with the registry
17
+ */
18
+ export declare class EmbeddingFunctionRegistry {
19
+ #private;
20
+ /**
21
+ * Register an embedding function
22
+ * @param name The name of the function
23
+ * @param func The function to register
24
+ * @throws Error if the function is already registered
25
+ */
26
+ register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
27
+ /**
28
+ * Fetch an embedding function by name
29
+ * @param name The name of the function
30
+ */
31
+ get<T extends EmbeddingFunction<unknown> = EmbeddingFunction>(name: string): EmbeddingFunctionCreate<T> | undefined;
32
+ /**
33
+ * reset the registry to the initial state
34
+ */
35
+ reset(this: EmbeddingFunctionRegistry): void;
36
+ /**
37
+ * @ignore
38
+ */
39
+ parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Map<string, EmbeddingFunctionConfig>;
40
+ functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any>;
41
+ getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string>;
42
+ }
43
+ export declare function register(name?: string): (ctor: EmbeddingFunctionFactory<EmbeddingFunction<any, import("./embedding_function").FunctionOptions>>) => any;
44
+ /**
45
+ * Utility function to get the global instance of the registry
46
+ * @returns `EmbeddingFunctionRegistry` The global instance of the registry
47
+ * @example
48
+ * ```ts
49
+ * const registry = getRegistry();
50
+ * const openai = registry.get("openai").create();
51
+ */
52
+ export declare function getRegistry(): EmbeddingFunctionRegistry;
53
+ export interface EmbeddingFunctionConfig {
54
+ sourceColumn: string;
55
+ vectorColumn?: string;
56
+ function: EmbeddingFunction;
57
+ }
58
+ export {};
@@ -0,0 +1,127 @@
1
+ "use strict";
2
+ // Copyright 2024 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.getRegistry = exports.register = exports.EmbeddingFunctionRegistry = void 0;
17
+ require("reflect-metadata");
18
+ /**
19
+ * This is a singleton class used to register embedding functions
20
+ * and fetch them by name. It also handles serializing and deserializing.
21
+ * You can implement your own embedding function by subclassing EmbeddingFunction
22
+ * or TextEmbeddingFunction and registering it with the registry
23
+ */
24
+ class EmbeddingFunctionRegistry {
25
+ #functions = new Map();
26
+ /**
27
+ * Register an embedding function
28
+ * @param name The name of the function
29
+ * @param func The function to register
30
+ * @throws Error if the function is already registered
31
+ */
32
+ register(alias) {
33
+ const self = this;
34
+ return function (ctor) {
35
+ if (!alias) {
36
+ alias = ctor.name;
37
+ }
38
+ if (self.#functions.has(alias)) {
39
+ throw new Error(`Embedding function with alias "${alias}" already exists`);
40
+ }
41
+ self.#functions.set(alias, ctor);
42
+ Reflect.defineMetadata("lancedb::embedding::name", alias, ctor);
43
+ return ctor;
44
+ };
45
+ }
46
+ /**
47
+ * Fetch an embedding function by name
48
+ * @param name The name of the function
49
+ */
50
+ get(name) {
51
+ const factory = this.#functions.get(name);
52
+ if (!factory) {
53
+ return undefined;
54
+ }
55
+ return {
56
+ create: function (options) {
57
+ return new factory(options);
58
+ },
59
+ };
60
+ }
61
+ /**
62
+ * reset the registry to the initial state
63
+ */
64
+ reset() {
65
+ this.#functions.clear();
66
+ }
67
+ /**
68
+ * @ignore
69
+ */
70
+ parseFunctions(metadata) {
71
+ if (!metadata.has("embedding_functions")) {
72
+ return new Map();
73
+ }
74
+ else {
75
+ const functions = (JSON.parse(metadata.get("embedding_functions")));
76
+ return new Map(functions.map((f) => {
77
+ const fn = this.get(f.name);
78
+ if (!fn) {
79
+ throw new Error(`Function "${f.name}" not found in registry`);
80
+ }
81
+ return [
82
+ f.name,
83
+ {
84
+ sourceColumn: f.sourceColumn,
85
+ vectorColumn: f.vectorColumn,
86
+ function: this.get(f.name).create(f.model),
87
+ },
88
+ ];
89
+ }));
90
+ }
91
+ }
92
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
93
+ functionToMetadata(conf) {
94
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
95
+ const metadata = {};
96
+ const name = Reflect.getMetadata("lancedb::embedding::name", conf.function.constructor);
97
+ metadata["sourceColumn"] = conf.sourceColumn;
98
+ metadata["vectorColumn"] = conf.vectorColumn ?? "vector";
99
+ metadata["name"] = name ?? conf.function.constructor.name;
100
+ metadata["model"] = conf.function.toJSON();
101
+ return metadata;
102
+ }
103
+ getTableMetadata(functions) {
104
+ const metadata = new Map();
105
+ const jsonData = functions.map((conf) => this.functionToMetadata(conf));
106
+ metadata.set("embedding_functions", JSON.stringify(jsonData));
107
+ return metadata;
108
+ }
109
+ }
110
+ exports.EmbeddingFunctionRegistry = EmbeddingFunctionRegistry;
111
+ const _REGISTRY = new EmbeddingFunctionRegistry();
112
+ function register(name) {
113
+ return _REGISTRY.register(name);
114
+ }
115
+ exports.register = register;
116
+ /**
117
+ * Utility function to get the global instance of the registry
118
+ * @returns `EmbeddingFunctionRegistry` The global instance of the registry
119
+ * @example
120
+ * ```ts
121
+ * const registry = getRegistry();
122
+ * const openai = registry.get("openai").create();
123
+ */
124
+ function getRegistry() {
125
+ return _REGISTRY;
126
+ }
127
+ exports.getRegistry = getRegistry;
package/dist/native.d.ts CHANGED
@@ -102,6 +102,7 @@ export const enum WriteMode {
102
102
  }
103
103
  /** Write options when creating a Table. */
104
104
  export interface WriteOptions {
105
+ /** Write mode for writing to a table. */
105
106
  mode?: WriteMode
106
107
  }
107
108
  export interface OpenTableOptions {
@@ -123,8 +124,8 @@ export class Connection {
123
124
  * - buf: The buffer containing the IPC file.
124
125
  *
125
126
  */
126
- createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
127
- createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
127
+ createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
128
+ createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
128
129
  openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
129
130
  /** Drop table with the name. Or raise an error if the table does not exist. */
130
131
  dropTable(name: string): Promise<void>
@@ -142,7 +143,7 @@ export class Query {
142
143
  select(columns: Array<[string, string]>): void
143
144
  limit(limit: number): void
144
145
  nearestTo(vector: Float32Array): VectorQuery
145
- execute(): Promise<RecordBatchIterator>
146
+ execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
146
147
  }
147
148
  export class VectorQuery {
148
149
  column(column: string): void
@@ -154,7 +155,7 @@ export class VectorQuery {
154
155
  onlyIf(predicate: string): void
155
156
  select(columns: Array<[string, string]>): void
156
157
  limit(limit: number): void
157
- execute(): Promise<RecordBatchIterator>
158
+ execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
158
159
  }
159
160
  export class Table {
160
161
  display(): string
package/dist/query.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { Table as ArrowTable, RecordBatch } from "apache-arrow";
1
+ import { Table as ArrowTable, type IntoVector, RecordBatch } from "./arrow";
2
2
  import { RecordBatchIterator as NativeBatchIterator, Query as NativeQuery, Table as NativeTable, VectorQuery as NativeVectorQuery } from "./native";
3
3
  export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
4
4
  private promisedInner?;
@@ -6,6 +6,18 @@ export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
6
6
  constructor(promise?: Promise<NativeBatchIterator>);
7
7
  next(): Promise<IteratorResult<RecordBatch<any>>>;
8
8
  }
9
+ /**
10
+ * Options that control the behavior of a particular query execution
11
+ */
12
+ export interface QueryExecutionOptions {
13
+ /**
14
+ * The maximum number of rows to return in a single batch
15
+ *
16
+ * Batches may have fewer rows if the underlying data is stored
17
+ * in smaller chunks.
18
+ */
19
+ maxBatchLength?: number;
20
+ }
9
21
  /** Common methods supported by all query types */
10
22
  export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery, QueryType> implements AsyncIterable<RecordBatch> {
11
23
  protected inner: NativeQueryType;
@@ -53,7 +65,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
53
65
  * uses `Object.entries` which should preserve the insertion order of the object. However,
54
66
  * object insertion order is easy to get wrong and `Map` is more foolproof.
55
67
  */
56
- select(columns: string[] | Map<string, string> | Record<string, string>): QueryType;
68
+ select(columns: string[] | Map<string, string> | Record<string, string> | string): QueryType;
57
69
  /**
58
70
  * Set the maximum number of results to return.
59
71
  *
@@ -61,7 +73,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
61
73
  * called then every valid row from the table will be returned.
62
74
  */
63
75
  limit(limit: number): QueryType;
64
- protected nativeExecute(): Promise<NativeBatchIterator>;
76
+ protected nativeExecute(options?: Partial<QueryExecutionOptions>): Promise<NativeBatchIterator>;
65
77
  /**
66
78
  * Execute the query and return the results as an @see {@link AsyncIterator}
67
79
  * of @see {@link RecordBatch}.
@@ -73,12 +85,12 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
73
85
  * single query)
74
86
  *
75
87
  */
76
- protected execute(): RecordBatchIterator;
88
+ protected execute(options?: Partial<QueryExecutionOptions>): RecordBatchIterator;
77
89
  [Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>>;
78
90
  /** Collect the results as an Arrow @see {@link ArrowTable}. */
79
- toArrow(): Promise<ArrowTable>;
91
+ toArrow(options?: Partial<QueryExecutionOptions>): Promise<ArrowTable>;
80
92
  /** Collect the results as an array of objects. */
81
- toArray(): Promise<unknown[]>;
93
+ toArray(options?: Partial<QueryExecutionOptions>): Promise<any[]>;
82
94
  }
83
95
  /**
84
96
  * An interface for a query that can be executed
@@ -244,5 +256,5 @@ export declare class Query extends QueryBase<NativeQuery, Query> {
244
256
  * Vector searches always have a `limit`. If `limit` has not been called then
245
257
  * a default `limit` of 10 will be used. @see {@link Query#limit}
246
258
  */
247
- nearestTo(vector: unknown): VectorQuery;
259
+ nearestTo(vector: IntoVector): VectorQuery;
248
260
  }