@lancedb/lancedb 0.4.20 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +5 -14
  2. package/biome.json +142 -0
  3. package/dist/arrow.d.ts +35 -9
  4. package/dist/arrow.js +247 -19
  5. package/dist/connection.d.ts +4 -1
  6. package/dist/connection.js +11 -5
  7. package/dist/embedding/embedding_function.d.ts +54 -28
  8. package/dist/embedding/embedding_function.js +71 -10
  9. package/dist/embedding/index.d.ts +28 -2
  10. package/dist/embedding/index.js +111 -4
  11. package/dist/embedding/openai.d.ts +16 -7
  12. package/dist/embedding/openai.js +62 -12
  13. package/dist/embedding/registry.d.ts +54 -0
  14. package/dist/embedding/registry.js +123 -0
  15. package/dist/native.d.ts +26 -0
  16. package/dist/query.d.ts +1 -1
  17. package/dist/query.js +7 -6
  18. package/dist/sanitize.d.ts +22 -1
  19. package/dist/sanitize.js +126 -113
  20. package/dist/table.d.ts +50 -4
  21. package/dist/table.js +47 -5
  22. package/lancedb/arrow.ts +283 -49
  23. package/lancedb/connection.ts +27 -6
  24. package/lancedb/embedding/embedding_function.ts +126 -42
  25. package/lancedb/embedding/index.ts +113 -2
  26. package/lancedb/embedding/openai.ts +62 -16
  27. package/lancedb/embedding/registry.ts +172 -0
  28. package/lancedb/query.ts +9 -6
  29. package/lancedb/sanitize.ts +62 -62
  30. package/lancedb/table.ts +72 -5
  31. package/nodejs-artifacts/arrow.d.ts +35 -9
  32. package/nodejs-artifacts/arrow.js +247 -19
  33. package/nodejs-artifacts/connection.d.ts +4 -1
  34. package/nodejs-artifacts/connection.js +11 -5
  35. package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
  36. package/nodejs-artifacts/embedding/embedding_function.js +71 -10
  37. package/nodejs-artifacts/embedding/index.d.ts +28 -2
  38. package/nodejs-artifacts/embedding/index.js +111 -4
  39. package/nodejs-artifacts/embedding/openai.d.ts +16 -7
  40. package/nodejs-artifacts/embedding/openai.js +62 -12
  41. package/nodejs-artifacts/embedding/registry.d.ts +54 -0
  42. package/nodejs-artifacts/embedding/registry.js +123 -0
  43. package/nodejs-artifacts/native.d.ts +26 -0
  44. package/nodejs-artifacts/query.d.ts +1 -1
  45. package/nodejs-artifacts/query.js +7 -6
  46. package/nodejs-artifacts/sanitize.d.ts +22 -1
  47. package/nodejs-artifacts/sanitize.js +126 -113
  48. package/nodejs-artifacts/table.d.ts +50 -4
  49. package/nodejs-artifacts/table.js +47 -5
  50. package/package.json +23 -21
  51. package/tsconfig.json +3 -1
  52. package/.eslintignore +0 -3
  53. package/eslint.config.js +0 -28
@@ -15,9 +15,9 @@
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
16
  exports.Connection = exports.connect = void 0;
17
17
  const arrow_1 = require("./arrow");
18
+ const registry_1 = require("./embedding/registry");
18
19
  const native_1 = require("./native");
19
20
  const table_1 = require("./table");
20
- const apache_arrow_1 = require("apache-arrow");
21
21
  /**
22
22
  * Connect to a LanceDB instance at the given URI.
23
23
  *
@@ -109,13 +109,13 @@ class Connection {
109
109
  mode = "exist_ok";
110
110
  }
111
111
  let table;
112
- if (data instanceof apache_arrow_1.Table) {
112
+ if ((0, arrow_1.isArrowTable)(data)) {
113
113
  table = data;
114
114
  }
115
115
  else {
116
- table = (0, arrow_1.makeArrowTable)(data);
116
+ table = (0, arrow_1.makeArrowTable)(data, options);
117
117
  }
118
- const buf = await (0, arrow_1.fromTableToBuffer)(table);
118
+ const buf = await (0, arrow_1.fromTableToBuffer)(table, options?.embeddingFunction, options?.schema);
119
119
  const innerTable = await this.inner.createTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
120
120
  return new table_1.Table(innerTable);
121
121
  }
@@ -130,7 +130,13 @@ class Connection {
130
130
  if (mode === "create" && existOk) {
131
131
  mode = "exist_ok";
132
132
  }
133
- const table = (0, arrow_1.makeEmptyTable)(schema);
133
+ let metadata = undefined;
134
+ if (options?.embeddingFunction !== undefined) {
135
+ const embeddingFunction = options.embeddingFunction;
136
+ const registry = (0, registry_1.getRegistry)();
137
+ metadata = registry.getTableMetadata([embeddingFunction]);
138
+ }
139
+ const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
134
140
  const buf = await (0, arrow_1.fromTableToBuffer)(table);
135
141
  const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
136
142
  return new table_1.Table(innerTable);
@@ -1,45 +1,71 @@
1
- import { type Float } from "apache-arrow";
1
+ import "reflect-metadata";
2
+ import { DataType, Float } from "../arrow";
3
+ /**
4
+ * Options for a given embedding function
5
+ */
6
+ export interface FunctionOptions {
7
+ [key: string]: any;
8
+ }
2
9
  /**
3
10
  * An embedding function that automatically creates vector representation for a given column.
4
11
  */
5
- export interface EmbeddingFunction<T> {
12
+ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptions = FunctionOptions> {
6
13
  /**
7
- * The name of the column that will be used as input for the Embedding Function.
8
- */
9
- sourceColumn: string;
10
- /**
11
- * The data type of the embedding
14
+ * Convert the embedding function to a JSON object
15
+ * It is used to serialize the embedding function to the schema
16
+ * It's important that any object returned by this method contains all the necessary
17
+ * information to recreate the embedding function
12
18
  *
13
- * The embedding function should return `number`. This will be converted into
14
- * an Arrow float array. By default this will be Float32 but this property can
15
- * be used to control the conversion.
16
- */
17
- embeddingDataType?: Float;
18
- /**
19
- * The dimension of the embedding
19
+ * It should return the same object that was passed to the constructor
20
+ * If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
20
21
  *
21
- * This is optional, normally this can be determined by looking at the results of
22
- * `embed`. If this is not specified, and there is an attempt to apply the embedding
23
- * to an empty table, then that process will fail.
22
+ * @example
23
+ * ```ts
24
+ * class MyEmbeddingFunction extends EmbeddingFunction {
25
+ * constructor(options: {model: string, timeout: number}) {
26
+ * super();
27
+ * this.model = options.model;
28
+ * this.timeout = options.timeout;
29
+ * }
30
+ * toJSON() {
31
+ * return {
32
+ * model: this.model,
33
+ * timeout: this.timeout,
34
+ * };
35
+ * }
36
+ * ```
24
37
  */
25
- embeddingDimension?: number;
38
+ abstract toJSON(): Partial<M>;
26
39
  /**
27
- * The name of the column that will contain the embedding
40
+ * sourceField is used in combination with `LanceSchema` to provide a declarative data model
41
+ *
42
+ * @param optionsOrDatatype - The options for the field or the datatype
28
43
  *
29
- * By default this is "vector"
44
+ * @see {@link lancedb.LanceSchema}
30
45
  */
31
- destColumn?: string;
46
+ sourceField(optionsOrDatatype: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
32
47
  /**
33
- * Should the source column be excluded from the resulting table
48
+ * vectorField is used in combination with `LanceSchema` to provide a declarative data model
49
+ *
50
+ * @param options - The options for the field
34
51
  *
35
- * By default the source column is included. Set this to true and
36
- * only the embedding will be stored.
52
+ * @see {@link lancedb.LanceSchema}
37
53
  */
38
- excludeSource?: boolean;
54
+ vectorField(options?: Partial<FieldOptions>): [DataType, Map<string, EmbeddingFunction>];
55
+ /** The number of dimensions of the embeddings */
56
+ ndims(): number | undefined;
57
+ /** The datatype of the embeddings */
58
+ abstract embeddingDataType(): Float;
39
59
  /**
40
60
  * Creates a vector representation for the given values.
41
61
  */
42
- embed: (data: T[]) => Promise<number[][]>;
62
+ abstract computeSourceEmbeddings(data: T[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
63
+ /**
64
+ Compute the embeddings for a single query
65
+ */
66
+ computeQueryEmbeddings(data: T): Promise<number[] | Float32Array | Float64Array>;
67
+ }
68
+ export interface FieldOptions<T extends DataType = DataType> {
69
+ datatype: T;
70
+ dims?: number;
43
71
  }
44
- /** Test if the input seems to be an embedding function */
45
- export declare function isEmbeddingFunction<T>(value: unknown): value is EmbeddingFunction<T>;
@@ -1,5 +1,5 @@
1
1
  "use strict";
2
- // Copyright 2023 Lance Developers.
2
+ // Copyright 2024 Lance Developers.
3
3
  //
4
4
  // Licensed under the Apache License, Version 2.0 (the "License");
5
5
  // you may not use this file except in compliance with the License.
@@ -13,15 +13,76 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.isEmbeddingFunction = void 0;
17
- /** Test if the input seems to be an embedding function */
18
- function isEmbeddingFunction(value) {
19
- if (typeof value !== "object" || value === null) {
20
- return false;
16
+ exports.EmbeddingFunction = void 0;
17
+ require("reflect-metadata");
18
+ const arrow_1 = require("../arrow");
19
+ const sanitize_1 = require("../sanitize");
20
+ /**
21
+ * An embedding function that automatically creates vector representation for a given column.
22
+ */
23
+ class EmbeddingFunction {
24
+ /**
25
+ * sourceField is used in combination with `LanceSchema` to provide a declarative data model
26
+ *
27
+ * @param optionsOrDatatype - The options for the field or the datatype
28
+ *
29
+ * @see {@link lancedb.LanceSchema}
30
+ */
31
+ sourceField(optionsOrDatatype) {
32
+ let datatype = (0, arrow_1.isDataType)(optionsOrDatatype)
33
+ ? optionsOrDatatype
34
+ : optionsOrDatatype?.datatype;
35
+ if (!datatype) {
36
+ throw new Error("Datatype is required");
37
+ }
38
+ datatype = (0, sanitize_1.sanitizeType)(datatype);
39
+ const metadata = new Map();
40
+ metadata.set("source_column_for", this);
41
+ return [datatype, metadata];
21
42
  }
22
- if (!("sourceColumn" in value) || !("embed" in value)) {
23
- return false;
43
+ /**
44
+ * vectorField is used in combination with `LanceSchema` to provide a declarative data model
45
+ *
46
+ * @param options - The options for the field
47
+ *
48
+ * @see {@link lancedb.LanceSchema}
49
+ */
50
+ vectorField(options) {
51
+ let dtype;
52
+ const dims = this.ndims() ?? options?.dims;
53
+ if (!options?.datatype) {
54
+ if (dims === undefined) {
55
+ throw new Error("ndims is required for vector field");
56
+ }
57
+ dtype = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
58
+ }
59
+ else {
60
+ if ((0, arrow_1.isFixedSizeList)(options.datatype)) {
61
+ dtype = options.datatype;
62
+ }
63
+ else if ((0, arrow_1.isFloat)(options.datatype)) {
64
+ if (dims === undefined) {
65
+ throw new Error("ndims is required for vector field");
66
+ }
67
+ dtype = (0, arrow_1.newVectorType)(dims, options.datatype);
68
+ }
69
+ else {
70
+ throw new Error("Expected FixedSizeList or Float as datatype for vector field");
71
+ }
72
+ }
73
+ const metadata = new Map();
74
+ metadata.set("vector_column_for", this);
75
+ return [dtype, metadata];
76
+ }
77
+ /** The number of dimensions of the embeddings */
78
+ ndims() {
79
+ return undefined;
80
+ }
81
+ /**
82
+ Compute the embeddings for a single query
83
+ */
84
+ async computeQueryEmbeddings(data) {
85
+ return this.computeSourceEmbeddings([data]).then((embeddings) => embeddings[0]);
24
86
  }
25
- return (typeof value.sourceColumn === "string" && typeof value.embed === "function");
26
87
  }
27
- exports.isEmbeddingFunction = isEmbeddingFunction;
88
+ exports.EmbeddingFunction = EmbeddingFunction;
@@ -1,2 +1,28 @@
1
- export { EmbeddingFunction, isEmbeddingFunction } from "./embedding_function";
2
- export { OpenAIEmbeddingFunction } from "./openai";
1
+ import { Schema } from "../arrow";
2
+ import { EmbeddingFunction } from "./embedding_function";
3
+ export { EmbeddingFunction } from "./embedding_function";
4
+ export * from "./openai";
5
+ export * from "./registry";
6
+ /**
7
+ * Create a schema with embedding functions.
8
+ *
9
+ * @param fields
10
+ * @returns Schema
11
+ * @example
12
+ * ```ts
13
+ * class MyEmbeddingFunction extends EmbeddingFunction {
14
+ * // ...
15
+ * }
16
+ * const func = new MyEmbeddingFunction();
17
+ * const schema = LanceSchema({
18
+ * id: new Int32(),
19
+ * text: func.sourceField(new Utf8()),
20
+ * vector: func.vectorField(),
21
+ * // optional: specify the datatype and/or dimensions
22
+ * vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
23
+ * });
24
+ *
25
+ * const table = await db.createTable("my_table", data, { schema });
26
+ * ```
27
+ */
28
+ export declare function LanceSchema(fields: Record<string, [object, Map<string, EmbeddingFunction>] | object>): Schema;
@@ -1,7 +1,114 @@
1
1
  "use strict";
2
+ // Copyright 2023 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ var desc = Object.getOwnPropertyDescriptor(m, k);
18
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
19
+ desc = { enumerable: true, get: function() { return m[k]; } };
20
+ }
21
+ Object.defineProperty(o, k2, desc);
22
+ }) : (function(o, m, k, k2) {
23
+ if (k2 === undefined) k2 = k;
24
+ o[k2] = m[k];
25
+ }));
26
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
27
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
28
+ };
2
29
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.OpenAIEmbeddingFunction = exports.isEmbeddingFunction = void 0;
30
+ exports.LanceSchema = exports.EmbeddingFunction = void 0;
31
+ const arrow_1 = require("../arrow");
32
+ const arrow_2 = require("../arrow");
33
+ const sanitize_1 = require("../sanitize");
34
+ const registry_1 = require("./registry");
4
35
  var embedding_function_1 = require("./embedding_function");
5
- Object.defineProperty(exports, "isEmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.isEmbeddingFunction; } });
6
- var openai_1 = require("./openai");
7
- Object.defineProperty(exports, "OpenAIEmbeddingFunction", { enumerable: true, get: function () { return openai_1.OpenAIEmbeddingFunction; } });
36
+ Object.defineProperty(exports, "EmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.EmbeddingFunction; } });
37
+ // We need to explicitly export '*' so that the `register` decorator actually registers the class.
38
+ __exportStar(require("./openai"), exports);
39
+ __exportStar(require("./registry"), exports);
40
+ /**
41
+ * Create a schema with embedding functions.
42
+ *
43
+ * @param fields
44
+ * @returns Schema
45
+ * @example
46
+ * ```ts
47
+ * class MyEmbeddingFunction extends EmbeddingFunction {
48
+ * // ...
49
+ * }
50
+ * const func = new MyEmbeddingFunction();
51
+ * const schema = LanceSchema({
52
+ * id: new Int32(),
53
+ * text: func.sourceField(new Utf8()),
54
+ * vector: func.vectorField(),
55
+ * // optional: specify the datatype and/or dimensions
56
+ * vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
57
+ * });
58
+ *
59
+ * const table = await db.createTable("my_table", data, { schema });
60
+ * ```
61
+ */
62
+ function LanceSchema(fields) {
63
+ const arrowFields = [];
64
+ const embeddingFunctions = new Map();
65
+ Object.entries(fields).forEach(([key, value]) => {
66
+ if ((0, arrow_2.isDataType)(value)) {
67
+ arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
68
+ }
69
+ else {
70
+ const [dtype, metadata] = value;
71
+ arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(dtype), true));
72
+ parseEmbeddingFunctions(embeddingFunctions, key, metadata);
73
+ }
74
+ });
75
+ const registry = (0, registry_1.getRegistry)();
76
+ const metadata = registry.getTableMetadata(Array.from(embeddingFunctions.values()));
77
+ const schema = new arrow_1.Schema(arrowFields, metadata);
78
+ return schema;
79
+ }
80
+ exports.LanceSchema = LanceSchema;
81
+ function parseEmbeddingFunctions(embeddingFunctions, key, metadata) {
82
+ if (metadata.has("source_column_for")) {
83
+ const embedFunction = metadata.get("source_column_for");
84
+ const current = embeddingFunctions.get(embedFunction);
85
+ if (current !== undefined) {
86
+ embeddingFunctions.set(embedFunction, {
87
+ ...current,
88
+ sourceColumn: key,
89
+ });
90
+ }
91
+ else {
92
+ embeddingFunctions.set(embedFunction, {
93
+ sourceColumn: key,
94
+ function: embedFunction,
95
+ });
96
+ }
97
+ }
98
+ else if (metadata.has("vector_column_for")) {
99
+ const embedFunction = metadata.get("vector_column_for");
100
+ const current = embeddingFunctions.get(embedFunction);
101
+ if (current !== undefined) {
102
+ embeddingFunctions.set(embedFunction, {
103
+ ...current,
104
+ vectorColumn: key,
105
+ });
106
+ }
107
+ else {
108
+ embeddingFunctions.set(embedFunction, {
109
+ vectorColumn: key,
110
+ function: embedFunction,
111
+ });
112
+ }
113
+ }
114
+ }
@@ -1,8 +1,17 @@
1
- import { type EmbeddingFunction } from "./embedding_function";
2
- export declare class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
3
- private readonly _openai;
4
- private readonly _modelName;
5
- constructor(sourceColumn: string, openAIKey: string, modelName?: string);
6
- embed(data: string[]): Promise<number[][]>;
7
- sourceColumn: string;
1
+ import { Float } from "../arrow";
2
+ import { EmbeddingFunction } from "./embedding_function";
3
+ export type OpenAIOptions = {
4
+ apiKey?: string;
5
+ model?: string;
6
+ };
7
+ export declare class OpenAIEmbeddingFunction extends EmbeddingFunction<string, OpenAIOptions> {
8
+ #private;
9
+ constructor(options?: OpenAIOptions);
10
+ toJSON(): {
11
+ model: string;
12
+ };
13
+ ndims(): number;
14
+ embeddingDataType(): Float;
15
+ computeSourceEmbeddings(data: string[]): Promise<number[][]>;
16
+ computeQueryEmbeddings(data: string): Promise<number[]>;
8
17
  }
@@ -12,12 +12,30 @@
12
12
  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
16
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
17
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
18
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
19
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
20
+ };
21
+ var __metadata = (this && this.__metadata) || function (k, v) {
22
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
23
+ };
15
24
  Object.defineProperty(exports, "__esModule", { value: true });
16
25
  exports.OpenAIEmbeddingFunction = void 0;
17
- class OpenAIEmbeddingFunction {
18
- _openai;
19
- _modelName;
20
- constructor(sourceColumn, openAIKey, modelName = "text-embedding-ada-002") {
26
+ const arrow_1 = require("../arrow");
27
+ const embedding_function_1 = require("./embedding_function");
28
+ const registry_1 = require("./registry");
29
+ let OpenAIEmbeddingFunction = class OpenAIEmbeddingFunction extends embedding_function_1.EmbeddingFunction {
30
+ #openai;
31
+ #modelName;
32
+ constructor(options = { model: "text-embedding-ada-002" }) {
33
+ super();
34
+ const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
35
+ if (!openAIKey) {
36
+ throw new Error("OpenAI API key is required");
37
+ }
38
+ const modelName = options?.model ?? "text-embedding-ada-002";
21
39
  /**
22
40
  * @type {import("openai").default}
23
41
  */
@@ -30,16 +48,35 @@ class OpenAIEmbeddingFunction {
30
48
  catch {
31
49
  throw new Error("please install openai@^4.24.1 using npm install openai");
32
50
  }
33
- this.sourceColumn = sourceColumn;
34
51
  const configuration = {
35
52
  apiKey: openAIKey,
36
53
  };
37
- this._openai = new Openai(configuration);
38
- this._modelName = modelName;
54
+ this.#openai = new Openai(configuration);
55
+ this.#modelName = modelName;
56
+ }
57
+ toJSON() {
58
+ return {
59
+ model: this.#modelName,
60
+ };
61
+ }
62
+ ndims() {
63
+ switch (this.#modelName) {
64
+ case "text-embedding-ada-002":
65
+ return 1536;
66
+ case "text-embedding-3-large":
67
+ return 3072;
68
+ case "text-embedding-3-small":
69
+ return 1536;
70
+ default:
71
+ return null;
72
+ }
39
73
  }
40
- async embed(data) {
41
- const response = await this._openai.embeddings.create({
42
- model: this._modelName,
74
+ embeddingDataType() {
75
+ return new arrow_1.Float32();
76
+ }
77
+ async computeSourceEmbeddings(data) {
78
+ const response = await this.#openai.embeddings.create({
79
+ model: this.#modelName,
43
80
  input: data,
44
81
  });
45
82
  const embeddings = [];
@@ -48,6 +85,19 @@ class OpenAIEmbeddingFunction {
48
85
  }
49
86
  return embeddings;
50
87
  }
51
- sourceColumn;
52
- }
88
+ async computeQueryEmbeddings(data) {
89
+ if (typeof data !== "string") {
90
+ throw new Error("Data must be a string");
91
+ }
92
+ const response = await this.#openai.embeddings.create({
93
+ model: this.#modelName,
94
+ input: data,
95
+ });
96
+ return response.data[0].embedding;
97
+ }
98
+ };
53
99
  exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction;
100
+ exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction = __decorate([
101
+ (0, registry_1.register)("openai"),
102
+ __metadata("design:paramtypes", [Object])
103
+ ], OpenAIEmbeddingFunction);
@@ -0,0 +1,54 @@
1
+ import type { EmbeddingFunction } from "./embedding_function";
2
+ import "reflect-metadata";
3
+ export interface EmbeddingFunctionOptions {
4
+ [key: string]: unknown;
5
+ }
6
+ export interface EmbeddingFunctionFactory<T extends EmbeddingFunction = EmbeddingFunction> {
7
+ new (modelOptions?: EmbeddingFunctionOptions): T;
8
+ }
9
+ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
10
+ create(options?: EmbeddingFunctionOptions): T;
11
+ }
12
+ /**
13
+ * This is a singleton class used to register embedding functions
14
+ * and fetch them by name. It also handles serializing and deserializing.
15
+ * You can implement your own embedding function by subclassing EmbeddingFunction
16
+ * or TextEmbeddingFunction and registering it with the registry
17
+ */
18
+ export declare class EmbeddingFunctionRegistry {
19
+ #private;
20
+ /**
21
+ * Register an embedding function
22
+ * @param name The name of the function
23
+ * @param func The function to register
24
+ */
25
+ register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
26
+ /**
27
+ * Fetch an embedding function by name
28
+ * @param name The name of the function
29
+ */
30
+ get<T extends EmbeddingFunction<unknown> = EmbeddingFunction>(name: string): EmbeddingFunctionCreate<T> | undefined;
31
+ /**
32
+ * reset the registry to the initial state
33
+ */
34
+ reset(this: EmbeddingFunctionRegistry): void;
35
+ parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Map<string, EmbeddingFunctionConfig>;
36
+ functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any>;
37
+ getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string>;
38
+ }
39
+ export declare function register(name?: string): (ctor: EmbeddingFunctionFactory<EmbeddingFunction<any, import("./embedding_function").FunctionOptions>>) => any;
40
+ /**
41
+ * Utility function to get the global instance of the registry
42
+ * @returns `EmbeddingFunctionRegistry` The global instance of the registry
43
+ * @example
44
+ * ```ts
45
+ * const registry = getRegistry();
46
+ * const openai = registry.get("openai").create();
47
+ */
48
+ export declare function getRegistry(): EmbeddingFunctionRegistry;
49
+ export interface EmbeddingFunctionConfig {
50
+ sourceColumn: string;
51
+ vectorColumn?: string;
52
+ function: EmbeddingFunction;
53
+ }
54
+ export {};