@lancedb/lancedb 0.4.20 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +5 -14
  2. package/biome.json +142 -0
  3. package/dist/arrow.d.ts +35 -9
  4. package/dist/arrow.js +247 -19
  5. package/dist/connection.d.ts +4 -1
  6. package/dist/connection.js +11 -5
  7. package/dist/embedding/embedding_function.d.ts +54 -28
  8. package/dist/embedding/embedding_function.js +71 -10
  9. package/dist/embedding/index.d.ts +28 -2
  10. package/dist/embedding/index.js +111 -4
  11. package/dist/embedding/openai.d.ts +16 -7
  12. package/dist/embedding/openai.js +62 -12
  13. package/dist/embedding/registry.d.ts +54 -0
  14. package/dist/embedding/registry.js +123 -0
  15. package/dist/native.d.ts +26 -0
  16. package/dist/query.d.ts +1 -1
  17. package/dist/query.js +7 -6
  18. package/dist/sanitize.d.ts +22 -1
  19. package/dist/sanitize.js +126 -113
  20. package/dist/table.d.ts +50 -4
  21. package/dist/table.js +47 -5
  22. package/lancedb/arrow.ts +283 -49
  23. package/lancedb/connection.ts +27 -6
  24. package/lancedb/embedding/embedding_function.ts +126 -42
  25. package/lancedb/embedding/index.ts +113 -2
  26. package/lancedb/embedding/openai.ts +62 -16
  27. package/lancedb/embedding/registry.ts +172 -0
  28. package/lancedb/query.ts +9 -6
  29. package/lancedb/sanitize.ts +62 -62
  30. package/lancedb/table.ts +72 -5
  31. package/nodejs-artifacts/arrow.d.ts +35 -9
  32. package/nodejs-artifacts/arrow.js +247 -19
  33. package/nodejs-artifacts/connection.d.ts +4 -1
  34. package/nodejs-artifacts/connection.js +11 -5
  35. package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
  36. package/nodejs-artifacts/embedding/embedding_function.js +71 -10
  37. package/nodejs-artifacts/embedding/index.d.ts +28 -2
  38. package/nodejs-artifacts/embedding/index.js +111 -4
  39. package/nodejs-artifacts/embedding/openai.d.ts +16 -7
  40. package/nodejs-artifacts/embedding/openai.js +62 -12
  41. package/nodejs-artifacts/embedding/registry.d.ts +54 -0
  42. package/nodejs-artifacts/embedding/registry.js +123 -0
  43. package/nodejs-artifacts/native.d.ts +26 -0
  44. package/nodejs-artifacts/query.d.ts +1 -1
  45. package/nodejs-artifacts/query.js +7 -6
  46. package/nodejs-artifacts/sanitize.d.ts +22 -1
  47. package/nodejs-artifacts/sanitize.js +126 -113
  48. package/nodejs-artifacts/table.d.ts +50 -4
  49. package/nodejs-artifacts/table.js +47 -5
  50. package/package.json +23 -21
  51. package/tsconfig.json +3 -1
  52. package/.eslintignore +0 -3
  53. package/eslint.config.js +0 -28
@@ -1,4 +1,4 @@
1
- // Copyright 2023 Lance Developers.
1
+ // Copyright 2024 Lance Developers.
2
2
  //
3
3
  // Licensed under the Apache License, Version 2.0 (the "License");
4
4
  // you may not use this file except in compliance with the License.
@@ -12,67 +12,151 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
- import { type Float } from "apache-arrow";
15
+ import "reflect-metadata";
16
+ import {
17
+ DataType,
18
+ Field,
19
+ FixedSizeList,
20
+ Float,
21
+ Float32,
22
+ isDataType,
23
+ isFixedSizeList,
24
+ isFloat,
25
+ newVectorType,
26
+ } from "../arrow";
27
+ import { sanitizeType } from "../sanitize";
16
28
 
17
29
  /**
18
- * An embedding function that automatically creates vector representation for a given column.
30
+ * Options for a given embedding function
19
31
  */
20
- export interface EmbeddingFunction<T> {
21
- /**
22
- * The name of the column that will be used as input for the Embedding Function.
23
- */
24
- sourceColumn: string;
32
+ export interface FunctionOptions {
33
+ // biome-ignore lint/suspicious/noExplicitAny: options can be anything
34
+ [key: string]: any;
35
+ }
25
36
 
37
+ /**
38
+ * An embedding function that automatically creates vector representation for a given column.
39
+ */
40
+ export abstract class EmbeddingFunction<
41
+ // biome-ignore lint/suspicious/noExplicitAny: we don't know what the implementor will do
42
+ T = any,
43
+ M extends FunctionOptions = FunctionOptions,
44
+ > {
26
45
  /**
27
- * The data type of the embedding
46
+ * Convert the embedding function to a JSON object
47
+ * It is used to serialize the embedding function to the schema
48
+ * It's important that any object returned by this method contains all the necessary
49
+ * information to recreate the embedding function
28
50
  *
29
- * The embedding function should return `number`. This will be converted into
30
- * an Arrow float array. By default this will be Float32 but this property can
31
- * be used to control the conversion.
32
- */
33
- embeddingDataType?: Float;
34
-
35
- /**
36
- * The dimension of the embedding
51
+ * It should return the same object that was passed to the constructor
52
+ * If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
37
53
  *
38
- * This is optional, normally this can be determined by looking at the results of
39
- * `embed`. If this is not specified, and there is an attempt to apply the embedding
40
- * to an empty table, then that process will fail.
54
+ * @example
55
+ * ```ts
56
+ * class MyEmbeddingFunction extends EmbeddingFunction {
57
+ * constructor(options: {model: string, timeout: number}) {
58
+ * super();
59
+ * this.model = options.model;
60
+ * this.timeout = options.timeout;
61
+ * }
62
+ * toJSON() {
63
+ * return {
64
+ * model: this.model,
65
+ * timeout: this.timeout,
66
+ * };
67
+ * }
68
+ * ```
41
69
  */
42
- embeddingDimension?: number;
70
+ abstract toJSON(): Partial<M>;
43
71
 
44
72
  /**
45
- * The name of the column that will contain the embedding
73
+ * sourceField is used in combination with `LanceSchema` to provide a declarative data model
46
74
  *
47
- * By default this is "vector"
75
+ * @param optionsOrDatatype - The options for the field or the datatype
76
+ *
77
+ * @see {@link lancedb.LanceSchema}
48
78
  */
49
- destColumn?: string;
79
+ sourceField(
80
+ optionsOrDatatype: Partial<FieldOptions> | DataType,
81
+ ): [DataType, Map<string, EmbeddingFunction>] {
82
+ let datatype = isDataType(optionsOrDatatype)
83
+ ? optionsOrDatatype
84
+ : optionsOrDatatype?.datatype;
85
+ if (!datatype) {
86
+ throw new Error("Datatype is required");
87
+ }
88
+ datatype = sanitizeType(datatype);
89
+ const metadata = new Map<string, EmbeddingFunction>();
90
+ metadata.set("source_column_for", this);
91
+
92
+ return [datatype, metadata];
93
+ }
50
94
 
51
95
  /**
52
- * Should the source column be excluded from the resulting table
96
+ * vectorField is used in combination with `LanceSchema` to provide a declarative data model
53
97
  *
54
- * By default the source column is included. Set this to true and
55
- * only the embedding will be stored.
98
+ * @param options - The options for the field
99
+ *
100
+ * @see {@link lancedb.LanceSchema}
56
101
  */
57
- excludeSource?: boolean;
102
+ vectorField(
103
+ options?: Partial<FieldOptions>,
104
+ ): [DataType, Map<string, EmbeddingFunction>] {
105
+ let dtype: DataType;
106
+ const dims = this.ndims() ?? options?.dims;
107
+ if (!options?.datatype) {
108
+ if (dims === undefined) {
109
+ throw new Error("ndims is required for vector field");
110
+ }
111
+ dtype = new FixedSizeList(dims, new Field("item", new Float32(), true));
112
+ } else {
113
+ if (isFixedSizeList(options.datatype)) {
114
+ dtype = options.datatype;
115
+ } else if (isFloat(options.datatype)) {
116
+ if (dims === undefined) {
117
+ throw new Error("ndims is required for vector field");
118
+ }
119
+ dtype = newVectorType(dims, options.datatype);
120
+ } else {
121
+ throw new Error(
122
+ "Expected FixedSizeList or Float as datatype for vector field",
123
+ );
124
+ }
125
+ }
126
+ const metadata = new Map<string, EmbeddingFunction>();
127
+ metadata.set("vector_column_for", this);
128
+
129
+ return [dtype, metadata];
130
+ }
131
+
132
+ /** The number of dimensions of the embeddings */
133
+ ndims(): number | undefined {
134
+ return undefined;
135
+ }
136
+
137
+ /** The datatype of the embeddings */
138
+ abstract embeddingDataType(): Float;
58
139
 
59
140
  /**
60
141
  * Creates a vector representation for the given values.
61
142
  */
62
- embed: (data: T[]) => Promise<number[][]>;
63
- }
143
+ abstract computeSourceEmbeddings(
144
+ data: T[],
145
+ ): Promise<number[][] | Float32Array[] | Float64Array[]>;
64
146
 
65
- /** Test if the input seems to be an embedding function */
66
- export function isEmbeddingFunction<T>(
67
- value: unknown,
68
- ): value is EmbeddingFunction<T> {
69
- if (typeof value !== "object" || value === null) {
70
- return false;
71
- }
72
- if (!("sourceColumn" in value) || !("embed" in value)) {
73
- return false;
147
+ /**
148
+ Compute the embeddings for a single query
149
+ */
150
+ async computeQueryEmbeddings(
151
+ data: T,
152
+ ): Promise<number[] | Float32Array | Float64Array> {
153
+ return this.computeSourceEmbeddings([data]).then(
154
+ (embeddings) => embeddings[0],
155
+ );
74
156
  }
75
- return (
76
- typeof value.sourceColumn === "string" && typeof value.embed === "function"
77
- );
157
+ }
158
+
159
+ export interface FieldOptions<T extends DataType = DataType> {
160
+ datatype: T;
161
+ dims?: number;
78
162
  }
@@ -1,2 +1,113 @@
1
- export { EmbeddingFunction, isEmbeddingFunction } from "./embedding_function";
2
- export { OpenAIEmbeddingFunction } from "./openai";
1
+ // Copyright 2023 Lance Developers.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ import { DataType, Field, Schema } from "../arrow";
16
+ import { isDataType } from "../arrow";
17
+ import { sanitizeType } from "../sanitize";
18
+ import { EmbeddingFunction } from "./embedding_function";
19
+ import { EmbeddingFunctionConfig, getRegistry } from "./registry";
20
+
21
+ export { EmbeddingFunction } from "./embedding_function";
22
+
23
+ // We need to explicitly export '*' so that the `register` decorator actually registers the class.
24
+ export * from "./openai";
25
+ export * from "./registry";
26
+
27
+ /**
28
+ * Create a schema with embedding functions.
29
+ *
30
+ * @param fields
31
+ * @returns Schema
32
+ * @example
33
+ * ```ts
34
+ * class MyEmbeddingFunction extends EmbeddingFunction {
35
+ * // ...
36
+ * }
37
+ * const func = new MyEmbeddingFunction();
38
+ * const schema = LanceSchema({
39
+ * id: new Int32(),
40
+ * text: func.sourceField(new Utf8()),
41
+ * vector: func.vectorField(),
42
+ * // optional: specify the datatype and/or dimensions
43
+ * vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
44
+ * });
45
+ *
46
+ * const table = await db.createTable("my_table", data, { schema });
47
+ * ```
48
+ */
49
+ export function LanceSchema(
50
+ fields: Record<string, [object, Map<string, EmbeddingFunction>] | object>,
51
+ ): Schema {
52
+ const arrowFields: Field[] = [];
53
+
54
+ const embeddingFunctions = new Map<
55
+ EmbeddingFunction,
56
+ Partial<EmbeddingFunctionConfig>
57
+ >();
58
+ Object.entries(fields).forEach(([key, value]) => {
59
+ if (isDataType(value)) {
60
+ arrowFields.push(new Field(key, sanitizeType(value), true));
61
+ } else {
62
+ const [dtype, metadata] = value as [
63
+ object,
64
+ Map<string, EmbeddingFunction>,
65
+ ];
66
+ arrowFields.push(new Field(key, sanitizeType(dtype), true));
67
+ parseEmbeddingFunctions(embeddingFunctions, key, metadata);
68
+ }
69
+ });
70
+ const registry = getRegistry();
71
+ const metadata = registry.getTableMetadata(
72
+ Array.from(embeddingFunctions.values()) as EmbeddingFunctionConfig[],
73
+ );
74
+ const schema = new Schema(arrowFields, metadata);
75
+ return schema;
76
+ }
77
+
78
+ function parseEmbeddingFunctions(
79
+ embeddingFunctions: Map<EmbeddingFunction, Partial<EmbeddingFunctionConfig>>,
80
+ key: string,
81
+ metadata: Map<string, EmbeddingFunction>,
82
+ ): void {
83
+ if (metadata.has("source_column_for")) {
84
+ const embedFunction = metadata.get("source_column_for")!;
85
+ const current = embeddingFunctions.get(embedFunction);
86
+ if (current !== undefined) {
87
+ embeddingFunctions.set(embedFunction, {
88
+ ...current,
89
+ sourceColumn: key,
90
+ });
91
+ } else {
92
+ embeddingFunctions.set(embedFunction, {
93
+ sourceColumn: key,
94
+ function: embedFunction,
95
+ });
96
+ }
97
+ } else if (metadata.has("vector_column_for")) {
98
+ const embedFunction = metadata.get("vector_column_for")!;
99
+
100
+ const current = embeddingFunctions.get(embedFunction);
101
+ if (current !== undefined) {
102
+ embeddingFunctions.set(embedFunction, {
103
+ ...current,
104
+ vectorColumn: key,
105
+ });
106
+ } else {
107
+ embeddingFunctions.set(embedFunction, {
108
+ vectorColumn: key,
109
+ function: embedFunction,
110
+ });
111
+ }
112
+ }
113
+ }
@@ -12,18 +12,32 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
- import { type EmbeddingFunction } from "./embedding_function";
16
15
  import type OpenAI from "openai";
16
+ import { Float, Float32 } from "../arrow";
17
+ import { EmbeddingFunction } from "./embedding_function";
18
+ import { register } from "./registry";
17
19
 
18
- export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
19
- private readonly _openai: OpenAI;
20
- private readonly _modelName: string;
20
+ export type OpenAIOptions = {
21
+ apiKey?: string;
22
+ model?: string;
23
+ };
24
+
25
+ @register("openai")
26
+ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
27
+ string,
28
+ OpenAIOptions
29
+ > {
30
+ #openai: OpenAI;
31
+ #modelName: string;
32
+
33
+ constructor(options: OpenAIOptions = { model: "text-embedding-ada-002" }) {
34
+ super();
35
+ const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
36
+ if (!openAIKey) {
37
+ throw new Error("OpenAI API key is required");
38
+ }
39
+ const modelName = options?.model ?? "text-embedding-ada-002";
21
40
 
22
- constructor(
23
- sourceColumn: string,
24
- openAIKey: string,
25
- modelName: string = "text-embedding-ada-002",
26
- ) {
27
41
  /**
28
42
  * @type {import("openai").default}
29
43
  */
@@ -36,18 +50,40 @@ export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
36
50
  throw new Error("please install openai@^4.24.1 using npm install openai");
37
51
  }
38
52
 
39
- this.sourceColumn = sourceColumn;
40
53
  const configuration = {
41
54
  apiKey: openAIKey,
42
55
  };
43
56
 
44
- this._openai = new Openai(configuration);
45
- this._modelName = modelName;
57
+ this.#openai = new Openai(configuration);
58
+ this.#modelName = modelName;
59
+ }
60
+
61
+ toJSON() {
62
+ return {
63
+ model: this.#modelName,
64
+ };
65
+ }
66
+
67
+ ndims(): number {
68
+ switch (this.#modelName) {
69
+ case "text-embedding-ada-002":
70
+ return 1536;
71
+ case "text-embedding-3-large":
72
+ return 3072;
73
+ case "text-embedding-3-small":
74
+ return 1536;
75
+ default:
76
+ return null as never;
77
+ }
78
+ }
79
+
80
+ embeddingDataType(): Float {
81
+ return new Float32();
46
82
  }
47
83
 
48
- async embed(data: string[]): Promise<number[][]> {
49
- const response = await this._openai.embeddings.create({
50
- model: this._modelName,
84
+ async computeSourceEmbeddings(data: string[]): Promise<number[][]> {
85
+ const response = await this.#openai.embeddings.create({
86
+ model: this.#modelName,
51
87
  input: data,
52
88
  });
53
89
 
@@ -58,5 +94,15 @@ export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
58
94
  return embeddings;
59
95
  }
60
96
 
61
- sourceColumn: string;
97
+ async computeQueryEmbeddings(data: string): Promise<number[]> {
98
+ if (typeof data !== "string") {
99
+ throw new Error("Data must be a string");
100
+ }
101
+ const response = await this.#openai.embeddings.create({
102
+ model: this.#modelName,
103
+ input: data,
104
+ });
105
+
106
+ return response.data[0].embedding;
107
+ }
62
108
  }
@@ -0,0 +1,172 @@
1
+ // Copyright 2024 Lance Developers.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ import type { EmbeddingFunction } from "./embedding_function";
16
+ import "reflect-metadata";
17
+
18
+ export interface EmbeddingFunctionOptions {
19
+ [key: string]: unknown;
20
+ }
21
+
22
+ export interface EmbeddingFunctionFactory<
23
+ T extends EmbeddingFunction = EmbeddingFunction,
24
+ > {
25
+ new (modelOptions?: EmbeddingFunctionOptions): T;
26
+ }
27
+
28
+ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
29
+ create(options?: EmbeddingFunctionOptions): T;
30
+ }
31
+
32
+ /**
33
+ * This is a singleton class used to register embedding functions
34
+ * and fetch them by name. It also handles serializing and deserializing.
35
+ * You can implement your own embedding function by subclassing EmbeddingFunction
36
+ * or TextEmbeddingFunction and registering it with the registry
37
+ */
38
+ export class EmbeddingFunctionRegistry {
39
+ #functions: Map<string, EmbeddingFunctionFactory> = new Map();
40
+
41
+ /**
42
+ * Register an embedding function
43
+ * @param name The name of the function
44
+ * @param func The function to register
45
+ */
46
+ register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(
47
+ this: EmbeddingFunctionRegistry,
48
+ alias?: string,
49
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
50
+ ): (ctor: T) => any {
51
+ const self = this;
52
+ return function (ctor: T) {
53
+ if (!alias) {
54
+ alias = ctor.name;
55
+ }
56
+ if (self.#functions.has(alias)) {
57
+ throw new Error(
58
+ `Embedding function with alias "${alias}" already exists`,
59
+ );
60
+ }
61
+ self.#functions.set(alias, ctor);
62
+ Reflect.defineMetadata("lancedb::embedding::name", alias, ctor);
63
+ return ctor;
64
+ };
65
+ }
66
+
67
+ /**
68
+ * Fetch an embedding function by name
69
+ * @param name The name of the function
70
+ */
71
+ get<T extends EmbeddingFunction<unknown> = EmbeddingFunction>(
72
+ name: string,
73
+ ): EmbeddingFunctionCreate<T> | undefined {
74
+ const factory = this.#functions.get(name);
75
+ if (!factory) {
76
+ return undefined;
77
+ }
78
+ return {
79
+ create: function (options: EmbeddingFunctionOptions) {
80
+ return new factory(options) as unknown as T;
81
+ },
82
+ };
83
+ }
84
+
85
+ /**
86
+ * reset the registry to the initial state
87
+ */
88
+ reset(this: EmbeddingFunctionRegistry) {
89
+ this.#functions.clear();
90
+ }
91
+
92
+ parseFunctions(
93
+ this: EmbeddingFunctionRegistry,
94
+ metadata: Map<string, string>,
95
+ ): Map<string, EmbeddingFunctionConfig> {
96
+ if (!metadata.has("embedding_functions")) {
97
+ return new Map();
98
+ } else {
99
+ type FunctionConfig = {
100
+ name: string;
101
+ sourceColumn: string;
102
+ vectorColumn: string;
103
+ model: EmbeddingFunctionOptions;
104
+ };
105
+ const functions = <FunctionConfig[]>(
106
+ JSON.parse(metadata.get("embedding_functions")!)
107
+ );
108
+ return new Map(
109
+ functions.map((f) => {
110
+ const fn = this.get(f.name);
111
+ if (!fn) {
112
+ throw new Error(`Function "${f.name}" not found in registry`);
113
+ }
114
+ return [
115
+ f.name,
116
+ {
117
+ sourceColumn: f.sourceColumn,
118
+ vectorColumn: f.vectorColumn,
119
+ function: this.get(f.name)!.create(f.model),
120
+ },
121
+ ];
122
+ }),
123
+ );
124
+ }
125
+ }
126
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
127
+ functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any> {
128
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
129
+ const metadata: Record<string, any> = {};
130
+ const name = Reflect.getMetadata(
131
+ "lancedb::embedding::name",
132
+ conf.function.constructor,
133
+ );
134
+ metadata["sourceColumn"] = conf.sourceColumn;
135
+ metadata["vectorColumn"] = conf.vectorColumn ?? "vector";
136
+ metadata["name"] = name ?? conf.function.constructor.name;
137
+ metadata["model"] = conf.function.toJSON();
138
+ return metadata;
139
+ }
140
+
141
+ getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string> {
142
+ const metadata = new Map<string, string>();
143
+ const jsonData = functions.map((conf) => this.functionToMetadata(conf));
144
+ metadata.set("embedding_functions", JSON.stringify(jsonData));
145
+
146
+ return metadata;
147
+ }
148
+ }
149
+
150
+ const _REGISTRY = new EmbeddingFunctionRegistry();
151
+
152
+ export function register(name?: string) {
153
+ return _REGISTRY.register(name);
154
+ }
155
+
156
+ /**
157
+ * Utility function to get the global instance of the registry
158
+ * @returns `EmbeddingFunctionRegistry` The global instance of the registry
159
+ * @example
160
+ * ```ts
161
+ * const registry = getRegistry();
162
+ * const openai = registry.get("openai").create();
163
+ */
164
+ export function getRegistry(): EmbeddingFunctionRegistry {
165
+ return _REGISTRY;
166
+ }
167
+
168
+ export interface EmbeddingFunctionConfig {
169
+ sourceColumn: string;
170
+ vectorColumn?: string;
171
+ function: EmbeddingFunction;
172
+ }
package/lancedb/query.ts CHANGED
@@ -12,14 +12,14 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
- import { RecordBatch, tableFromIPC, Table as ArrowTable } from "apache-arrow";
15
+ import { Table as ArrowTable, RecordBatch, tableFromIPC } from "./arrow";
16
+ import { type IvfPqOptions } from "./indices";
16
17
  import {
17
18
  RecordBatchIterator as NativeBatchIterator,
18
19
  Query as NativeQuery,
19
20
  Table as NativeTable,
20
21
  VectorQuery as NativeVectorQuery,
21
22
  } from "./native";
22
- import { type IvfPqOptions } from "./indices";
23
23
  export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
24
24
  private promisedInner?: Promise<NativeBatchIterator>;
25
25
  private inner?: NativeBatchIterator;
@@ -29,7 +29,7 @@ export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
29
29
  this.promisedInner = promise;
30
30
  }
31
31
 
32
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
32
+ // biome-ignore lint/suspicious/noExplicitAny: skip
33
33
  async next(): Promise<IteratorResult<RecordBatch<any>>> {
34
34
  if (this.inner === undefined) {
35
35
  this.inner = await this.promisedInner;
@@ -56,7 +56,9 @@ export class QueryBase<
56
56
  QueryType,
57
57
  > implements AsyncIterable<RecordBatch>
58
58
  {
59
- protected constructor(protected inner: NativeQueryType) {}
59
+ protected constructor(protected inner: NativeQueryType) {
60
+ // intentionally empty
61
+ }
60
62
 
61
63
  /**
62
64
  * A filter statement to be applied to this query.
@@ -150,7 +152,7 @@ export class QueryBase<
150
152
  return new RecordBatchIterator(this.nativeExecute());
151
153
  }
152
154
 
153
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
155
+ // biome-ignore lint/suspicious/noExplicitAny: skip
154
156
  [Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>> {
155
157
  const promise = this.nativeExecute();
156
158
  return new RecordBatchIterator(promise);
@@ -168,6 +170,7 @@ export class QueryBase<
168
170
  /** Collect the results as an array of objects. */
169
171
  async toArray(): Promise<unknown[]> {
170
172
  const tbl = await this.toArrow();
173
+
171
174
  // eslint-disable-next-line @typescript-eslint/no-unsafe-return
172
175
  return tbl.toArray();
173
176
  }
@@ -368,7 +371,7 @@ export class Query extends QueryBase<NativeQuery, Query> {
368
371
  * a default `limit` of 10 will be used. @see {@link Query#limit}
369
372
  */
370
373
  nearestTo(vector: unknown): VectorQuery {
371
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
374
+ // biome-ignore lint/suspicious/noExplicitAny: skip
372
375
  const vectorQuery = this.inner.nearestTo(Float32Array.from(vector as any));
373
376
  return new VectorQuery(vectorQuery);
374
377
  }