@lancedb/lancedb 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/dist/arrow.d.ts +2 -3
  2. package/dist/arrow.js +2 -27
  3. package/dist/connection.d.ts +9 -0
  4. package/dist/connection.js +16 -2
  5. package/dist/embedding/embedding_function.d.ts +11 -0
  6. package/dist/embedding/embedding_function.js +28 -7
  7. package/dist/embedding/index.d.ts +2 -1
  8. package/dist/embedding/index.js +7 -6
  9. package/dist/embedding/openai.d.ts +1 -1
  10. package/dist/embedding/registry.d.ts +9 -7
  11. package/dist/embedding/registry.js +24 -6
  12. package/dist/embedding/transformers.d.ts +37 -0
  13. package/dist/embedding/transformers.js +147 -0
  14. package/dist/index.d.ts +1 -1
  15. package/dist/query.js +15 -9
  16. package/dist/remote/client.d.ts +1 -1
  17. package/dist/remote/client.js +6 -8
  18. package/dist/remote/connection.d.ts +2 -3
  19. package/dist/remote/connection.js +2 -2
  20. package/dist/sanitize.js +4 -2
  21. package/dist/table.d.ts +3 -0
  22. package/dist/table.js +1 -1
  23. package/package.json +17 -13
  24. package/Cargo.toml +0 -28
  25. package/biome.json +0 -158
  26. package/build.rs +0 -5
  27. package/dist/native.d.ts +0 -208
  28. package/examples/ann_indexes.ts +0 -49
  29. package/examples/basic.ts +0 -149
  30. package/examples/embedding.ts +0 -83
  31. package/examples/filtering.ts +0 -34
  32. package/examples/jsconfig.json +0 -27
  33. package/examples/package-lock.json +0 -79
  34. package/examples/package.json +0 -18
  35. package/examples/search.ts +0 -37
  36. package/jest.config.js +0 -7
  37. package/lancedb/arrow.ts +0 -947
  38. package/lancedb/connection.ts +0 -333
  39. package/lancedb/embedding/embedding_function.ts +0 -194
  40. package/lancedb/embedding/index.ts +0 -113
  41. package/lancedb/embedding/openai.ts +0 -113
  42. package/lancedb/embedding/registry.ts +0 -188
  43. package/lancedb/index.ts +0 -142
  44. package/lancedb/indices.ts +0 -203
  45. package/lancedb/merge.ts +0 -70
  46. package/lancedb/query.ts +0 -507
  47. package/lancedb/remote/client.ts +0 -221
  48. package/lancedb/remote/connection.ts +0 -201
  49. package/lancedb/remote/index.ts +0 -3
  50. package/lancedb/remote/table.ts +0 -226
  51. package/lancedb/sanitize.ts +0 -588
  52. package/lancedb/table.ts +0 -669
  53. package/lancedb/util.ts +0 -69
  54. package/native.d.ts +0 -208
  55. package/nodejs-artifacts/arrow.d.ts +0 -250
  56. package/nodejs-artifacts/arrow.js +0 -768
  57. package/nodejs-artifacts/connection.d.ts +0 -171
  58. package/nodejs-artifacts/connection.js +0 -135
  59. package/nodejs-artifacts/embedding/embedding_function.d.ts +0 -79
  60. package/nodejs-artifacts/embedding/embedding_function.js +0 -112
  61. package/nodejs-artifacts/embedding/index.d.ts +0 -28
  62. package/nodejs-artifacts/embedding/index.js +0 -114
  63. package/nodejs-artifacts/embedding/openai.d.ts +0 -18
  64. package/nodejs-artifacts/embedding/openai.js +0 -105
  65. package/nodejs-artifacts/embedding/registry.d.ts +0 -53
  66. package/nodejs-artifacts/embedding/registry.js +0 -127
  67. package/nodejs-artifacts/index.d.ts +0 -55
  68. package/nodejs-artifacts/index.js +0 -57
  69. package/nodejs-artifacts/indices.d.ts +0 -165
  70. package/nodejs-artifacts/indices.js +0 -71
  71. package/nodejs-artifacts/merge.d.ts +0 -54
  72. package/nodejs-artifacts/merge.js +0 -64
  73. package/nodejs-artifacts/native.d.ts +0 -208
  74. package/nodejs-artifacts/native.js +0 -330
  75. package/nodejs-artifacts/query.d.ts +0 -283
  76. package/nodejs-artifacts/query.js +0 -448
  77. package/nodejs-artifacts/remote/client.d.ts +0 -28
  78. package/nodejs-artifacts/remote/client.js +0 -172
  79. package/nodejs-artifacts/remote/connection.d.ts +0 -25
  80. package/nodejs-artifacts/remote/connection.js +0 -110
  81. package/nodejs-artifacts/remote/index.d.ts +0 -3
  82. package/nodejs-artifacts/remote/index.js +0 -9
  83. package/nodejs-artifacts/remote/table.d.ts +0 -42
  84. package/nodejs-artifacts/remote/table.js +0 -179
  85. package/nodejs-artifacts/sanitize.d.ts +0 -31
  86. package/nodejs-artifacts/sanitize.js +0 -436
  87. package/nodejs-artifacts/table.d.ts +0 -395
  88. package/nodejs-artifacts/table.js +0 -230
  89. package/nodejs-artifacts/util.d.ts +0 -14
  90. package/nodejs-artifacts/util.js +0 -65
  91. package/tsconfig.json +0 -25
  92. package/typedoc.json +0 -10
@@ -1,333 +0,0 @@
1
- // Copyright 2024 Lance Developers.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
-
15
- import { Data, Schema, SchemaLike, TableLike } from "./arrow";
16
- import { fromTableToBuffer, makeEmptyTable } from "./arrow";
17
- import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
18
- import { Connection as LanceDbConnection } from "./native";
19
- import { LocalTable, Table } from "./table";
20
-
21
- export interface CreateTableOptions {
22
- /**
23
- * The mode to use when creating the table.
24
- *
25
- * If this is set to "create" and the table already exists then either
26
- * an error will be thrown or, if existOk is true, then nothing will
27
- * happen. Any provided data will be ignored.
28
- *
29
- * If this is set to "overwrite" then any existing table will be replaced.
30
- */
31
- mode: "create" | "overwrite";
32
- /**
33
- * If this is true and the table already exists and the mode is "create"
34
- * then no error will be raised.
35
- */
36
- existOk: boolean;
37
-
38
- /**
39
- * Configuration for object storage.
40
- *
41
- * Options already set on the connection will be inherited by the table,
42
- * but can be overridden here.
43
- *
44
- * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
45
- */
46
- storageOptions?: Record<string, string>;
47
- /**
48
- * If true then data files will be written with the legacy format
49
- *
50
- * The default is true while the new format is in beta
51
- */
52
- useLegacyFormat?: boolean;
53
- schema?: SchemaLike;
54
- embeddingFunction?: EmbeddingFunctionConfig;
55
- }
56
-
57
- export interface OpenTableOptions {
58
- /**
59
- * Configuration for object storage.
60
- *
61
- * Options already set on the connection will be inherited by the table,
62
- * but can be overridden here.
63
- *
64
- * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
65
- */
66
- storageOptions?: Record<string, string>;
67
- /**
68
- * Set the size of the index cache, specified as a number of entries
69
- *
70
- * The exact meaning of an "entry" will depend on the type of index:
71
- * - IVF: there is one entry for each IVF partition
72
- * - BTREE: there is one entry for the entire index
73
- *
74
- * This cache applies to the entire opened table, across all indices.
75
- * Setting this value higher will increase performance on larger datasets
76
- * at the expense of more RAM
77
- */
78
- indexCacheSize?: number;
79
- }
80
-
81
- export interface TableNamesOptions {
82
- /**
83
- * If present, only return names that come lexicographically after the
84
- * supplied value.
85
- *
86
- * This can be combined with limit to implement pagination by setting this to
87
- * the last table name from the previous page.
88
- */
89
- startAfter?: string;
90
- /** An optional limit to the number of results to return. */
91
- limit?: number;
92
- }
93
- /**
94
- * A LanceDB Connection that allows you to open tables and create new ones.
95
- *
96
- * Connection could be local against filesystem or remote against a server.
97
- *
98
- * A Connection is intended to be a long lived object and may hold open
99
- * resources such as HTTP connection pools. This is generally fine and
100
- * a single connection should be shared if it is going to be used many
101
- * times. However, if you are finished with a connection, you may call
102
- * close to eagerly free these resources. Any call to a Connection
103
- * method after it has been closed will result in an error.
104
- *
105
- * Closing a connection is optional. Connections will automatically
106
- * be closed when they are garbage collected.
107
- *
108
- * Any created tables are independent and will continue to work even if
109
- * the underlying connection has been closed.
110
- */
111
- export abstract class Connection {
112
- [Symbol.for("nodejs.util.inspect.custom")](): string {
113
- return this.display();
114
- }
115
-
116
- /**
117
- * Return true if the connection has not been closed
118
- */
119
- abstract isOpen(): boolean;
120
-
121
- /**
122
- * Close the connection, releasing any underlying resources.
123
- *
124
- * It is safe to call this method multiple times.
125
- *
126
- * Any attempt to use the connection after it is closed will result in an error.
127
- */
128
- abstract close(): void;
129
-
130
- /**
131
- * Return a brief description of the connection
132
- */
133
- abstract display(): string;
134
-
135
- /**
136
- * List all the table names in this database.
137
- *
138
- * Tables will be returned in lexicographical order.
139
- * @param {Partial<TableNamesOptions>} options - options to control the
140
- * paging / start point
141
- *
142
- */
143
- abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
144
-
145
- /**
146
- * Open a table in the database.
147
- * @param {string} name - The name of the table
148
- */
149
- abstract openTable(
150
- name: string,
151
- options?: Partial<OpenTableOptions>,
152
- ): Promise<Table>;
153
-
154
- /**
155
- * Creates a new Table and initialize it with new data.
156
- * @param {object} options - The options object.
157
- * @param {string} options.name - The name of the table.
158
- * @param {Data} options.data - Non-empty Array of Records to be inserted into the table
159
- *
160
- */
161
- abstract createTable(
162
- options: {
163
- name: string;
164
- data: Data;
165
- } & Partial<CreateTableOptions>,
166
- ): Promise<Table>;
167
- /**
168
- * Creates a new Table and initialize it with new data.
169
- * @param {string} name - The name of the table.
170
- * @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
171
- * to be inserted into the table
172
- */
173
- abstract createTable(
174
- name: string,
175
- data: Record<string, unknown>[] | TableLike,
176
- options?: Partial<CreateTableOptions>,
177
- ): Promise<Table>;
178
-
179
- /**
180
- * Creates a new empty Table
181
- * @param {string} name - The name of the table.
182
- * @param {Schema} schema - The schema of the table
183
- */
184
- abstract createEmptyTable(
185
- name: string,
186
- schema: import("./arrow").SchemaLike,
187
- options?: Partial<CreateTableOptions>,
188
- ): Promise<Table>;
189
-
190
- /**
191
- * Drop an existing table.
192
- * @param {string} name The name of the table to drop.
193
- */
194
- abstract dropTable(name: string): Promise<void>;
195
- }
196
-
197
- export class LocalConnection extends Connection {
198
- readonly inner: LanceDbConnection;
199
-
200
- constructor(inner: LanceDbConnection) {
201
- super();
202
- this.inner = inner;
203
- }
204
-
205
- isOpen(): boolean {
206
- return this.inner.isOpen();
207
- }
208
-
209
- close(): void {
210
- this.inner.close();
211
- }
212
-
213
- display(): string {
214
- return this.inner.display();
215
- }
216
-
217
- async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
218
- return this.inner.tableNames(options?.startAfter, options?.limit);
219
- }
220
-
221
- async openTable(
222
- name: string,
223
- options?: Partial<OpenTableOptions>,
224
- ): Promise<Table> {
225
- const innerTable = await this.inner.openTable(
226
- name,
227
- cleanseStorageOptions(options?.storageOptions),
228
- options?.indexCacheSize,
229
- );
230
-
231
- return new LocalTable(innerTable);
232
- }
233
-
234
- async createTable(
235
- nameOrOptions:
236
- | string
237
- | ({ name: string; data: Data } & Partial<CreateTableOptions>),
238
- data?: Record<string, unknown>[] | TableLike,
239
- options?: Partial<CreateTableOptions>,
240
- ): Promise<Table> {
241
- if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
242
- const { name, data, ...options } = nameOrOptions;
243
- return this.createTable(name, data, options);
244
- }
245
- if (data === undefined) {
246
- throw new Error("data is required");
247
- }
248
- const { buf, mode } = await Table.parseTableData(data, options);
249
- const innerTable = await this.inner.createTable(
250
- nameOrOptions,
251
- buf,
252
- mode,
253
- cleanseStorageOptions(options?.storageOptions),
254
- options?.useLegacyFormat,
255
- );
256
-
257
- return new LocalTable(innerTable);
258
- }
259
-
260
- async createEmptyTable(
261
- name: string,
262
- schema: import("./arrow").SchemaLike,
263
- options?: Partial<CreateTableOptions>,
264
- ): Promise<Table> {
265
- let mode: string = options?.mode ?? "create";
266
- const existOk = options?.existOk ?? false;
267
-
268
- if (mode === "create" && existOk) {
269
- mode = "exist_ok";
270
- }
271
- let metadata: Map<string, string> | undefined = undefined;
272
- if (options?.embeddingFunction !== undefined) {
273
- const embeddingFunction = options.embeddingFunction;
274
- const registry = getRegistry();
275
- metadata = registry.getTableMetadata([embeddingFunction]);
276
- }
277
-
278
- const table = makeEmptyTable(schema, metadata);
279
- const buf = await fromTableToBuffer(table);
280
- const innerTable = await this.inner.createEmptyTable(
281
- name,
282
- buf,
283
- mode,
284
- cleanseStorageOptions(options?.storageOptions),
285
- options?.useLegacyFormat,
286
- );
287
- return new LocalTable(innerTable);
288
- }
289
-
290
- async dropTable(name: string): Promise<void> {
291
- return this.inner.dropTable(name);
292
- }
293
- }
294
-
295
- /**
296
- * Takes storage options and makes all the keys snake case.
297
- */
298
- export function cleanseStorageOptions(
299
- options?: Record<string, string>,
300
- ): Record<string, string> | undefined {
301
- if (options === undefined) {
302
- return undefined;
303
- }
304
- const result: Record<string, string> = {};
305
- for (const [key, value] of Object.entries(options)) {
306
- if (value !== undefined) {
307
- const newKey = camelToSnakeCase(key);
308
- result[newKey] = value;
309
- }
310
- }
311
- return result;
312
- }
313
-
314
- /**
315
- * Convert a string to snake case. It might already be snake case, in which case it is
316
- * returned unchanged.
317
- */
318
- function camelToSnakeCase(camel: string): string {
319
- if (camel.includes("_")) {
320
- // Assume if there is at least one underscore, it is already snake case
321
- return camel;
322
- }
323
- if (camel.toLocaleUpperCase() === camel) {
324
- // Assume if the string is all uppercase, it is already snake case
325
- return camel;
326
- }
327
-
328
- let result = camel.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
329
- if (result.startsWith("_")) {
330
- result = result.slice(1);
331
- }
332
- return result;
333
- }
@@ -1,194 +0,0 @@
1
- // Copyright 2024 Lance Developers.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
-
15
- import "reflect-metadata";
16
- import {
17
- DataType,
18
- Field,
19
- FixedSizeList,
20
- Float,
21
- Float32,
22
- type IntoVector,
23
- isDataType,
24
- isFixedSizeList,
25
- isFloat,
26
- newVectorType,
27
- } from "../arrow";
28
- import { sanitizeType } from "../sanitize";
29
-
30
- /**
31
- * Options for a given embedding function
32
- */
33
- export interface FunctionOptions {
34
- // biome-ignore lint/suspicious/noExplicitAny: options can be anything
35
- [key: string]: any;
36
- }
37
-
38
- export interface EmbeddingFunctionConstructor<
39
- T extends EmbeddingFunction = EmbeddingFunction,
40
- > {
41
- new (modelOptions?: T["TOptions"]): T;
42
- }
43
- /**
44
- * An embedding function that automatically creates vector representation for a given column.
45
- */
46
- export abstract class EmbeddingFunction<
47
- // biome-ignore lint/suspicious/noExplicitAny: we don't know what the implementor will do
48
- T = any,
49
- M extends FunctionOptions = FunctionOptions,
50
- > {
51
- /**
52
- * @ignore
53
- * This is only used for associating the options type with the class for type checking
54
- */
55
- // biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
56
- readonly TOptions!: M;
57
- /**
58
- * Convert the embedding function to a JSON object
59
- * It is used to serialize the embedding function to the schema
60
- * It's important that any object returned by this method contains all the necessary
61
- * information to recreate the embedding function
62
- *
63
- * It should return the same object that was passed to the constructor
64
- * If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
65
- *
66
- * @example
67
- * ```ts
68
- * class MyEmbeddingFunction extends EmbeddingFunction {
69
- * constructor(options: {model: string, timeout: number}) {
70
- * super();
71
- * this.model = options.model;
72
- * this.timeout = options.timeout;
73
- * }
74
- * toJSON() {
75
- * return {
76
- * model: this.model,
77
- * timeout: this.timeout,
78
- * };
79
- * }
80
- * ```
81
- */
82
- abstract toJSON(): Partial<M>;
83
-
84
- /**
85
- * sourceField is used in combination with `LanceSchema` to provide a declarative data model
86
- *
87
- * @param optionsOrDatatype - The options for the field or the datatype
88
- *
89
- * @see {@link lancedb.LanceSchema}
90
- */
91
- sourceField(
92
- optionsOrDatatype: Partial<FieldOptions> | DataType,
93
- ): [DataType, Map<string, EmbeddingFunction>] {
94
- let datatype = isDataType(optionsOrDatatype)
95
- ? optionsOrDatatype
96
- : optionsOrDatatype?.datatype;
97
- if (!datatype) {
98
- throw new Error("Datatype is required");
99
- }
100
- datatype = sanitizeType(datatype);
101
- const metadata = new Map<string, EmbeddingFunction>();
102
- metadata.set("source_column_for", this);
103
-
104
- return [datatype, metadata];
105
- }
106
-
107
- /**
108
- * vectorField is used in combination with `LanceSchema` to provide a declarative data model
109
- *
110
- * @param options - The options for the field
111
- *
112
- * @see {@link lancedb.LanceSchema}
113
- */
114
- vectorField(
115
- optionsOrDatatype?: Partial<FieldOptions> | DataType,
116
- ): [DataType, Map<string, EmbeddingFunction>] {
117
- let dtype: DataType | undefined;
118
- let vectorType: DataType;
119
- let dims: number | undefined = this.ndims();
120
-
121
- // `func.vectorField(new Float32())`
122
- if (isDataType(optionsOrDatatype)) {
123
- dtype = optionsOrDatatype;
124
- } else {
125
- // `func.vectorField({
126
- // datatype: new Float32(),
127
- // dims: 10
128
- // })`
129
- dims = dims ?? optionsOrDatatype?.dims;
130
- dtype = optionsOrDatatype?.datatype;
131
- }
132
-
133
- if (dtype !== undefined) {
134
- // `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
135
- // or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
136
- if (isFixedSizeList(dtype)) {
137
- vectorType = dtype;
138
- // `func.vectorField(new Float32())`
139
- // or `func.vectorField({datatype: new Float32()})`
140
- } else if (isFloat(dtype)) {
141
- // No `ndims` impl and no `{dims: n}` provided;
142
- if (dims === undefined) {
143
- throw new Error("ndims is required for vector field");
144
- }
145
- vectorType = newVectorType(dims, dtype);
146
- } else {
147
- throw new Error(
148
- "Expected FixedSizeList or Float as datatype for vector field",
149
- );
150
- }
151
- } else {
152
- if (dims === undefined) {
153
- throw new Error("ndims is required for vector field");
154
- }
155
- vectorType = new FixedSizeList(
156
- dims,
157
- new Field("item", new Float32(), true),
158
- );
159
- }
160
- const metadata = new Map<string, EmbeddingFunction>();
161
- metadata.set("vector_column_for", this);
162
-
163
- return [vectorType, metadata];
164
- }
165
-
166
- /** The number of dimensions of the embeddings */
167
- ndims(): number | undefined {
168
- return undefined;
169
- }
170
-
171
- /** The datatype of the embeddings */
172
- abstract embeddingDataType(): Float;
173
-
174
- /**
175
- * Creates a vector representation for the given values.
176
- */
177
- abstract computeSourceEmbeddings(
178
- data: T[],
179
- ): Promise<number[][] | Float32Array[] | Float64Array[]>;
180
-
181
- /**
182
- Compute the embeddings for a single query
183
- */
184
- async computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>> {
185
- return this.computeSourceEmbeddings([data]).then(
186
- (embeddings) => embeddings[0],
187
- );
188
- }
189
- }
190
-
191
- export interface FieldOptions<T extends DataType = DataType> {
192
- datatype: T;
193
- dims?: number;
194
- }
@@ -1,113 +0,0 @@
1
- // Copyright 2023 Lance Developers.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
-
15
- import { DataType, Field, Schema } from "../arrow";
16
- import { isDataType } from "../arrow";
17
- import { sanitizeType } from "../sanitize";
18
- import { EmbeddingFunction } from "./embedding_function";
19
- import { EmbeddingFunctionConfig, getRegistry } from "./registry";
20
-
21
- export { EmbeddingFunction } from "./embedding_function";
22
-
23
- // We need to explicitly export '*' so that the `register` decorator actually registers the class.
24
- export * from "./openai";
25
- export * from "./registry";
26
-
27
- /**
28
- * Create a schema with embedding functions.
29
- *
30
- * @param fields
31
- * @returns Schema
32
- * @example
33
- * ```ts
34
- * class MyEmbeddingFunction extends EmbeddingFunction {
35
- * // ...
36
- * }
37
- * const func = new MyEmbeddingFunction();
38
- * const schema = LanceSchema({
39
- * id: new Int32(),
40
- * text: func.sourceField(new Utf8()),
41
- * vector: func.vectorField(),
42
- * // optional: specify the datatype and/or dimensions
43
- * vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
44
- * });
45
- *
46
- * const table = await db.createTable("my_table", data, { schema });
47
- * ```
48
- */
49
- export function LanceSchema(
50
- fields: Record<string, [object, Map<string, EmbeddingFunction>] | object>,
51
- ): Schema {
52
- const arrowFields: Field[] = [];
53
-
54
- const embeddingFunctions = new Map<
55
- EmbeddingFunction,
56
- Partial<EmbeddingFunctionConfig>
57
- >();
58
- Object.entries(fields).forEach(([key, value]) => {
59
- if (isDataType(value)) {
60
- arrowFields.push(new Field(key, sanitizeType(value), true));
61
- } else {
62
- const [dtype, metadata] = value as [
63
- object,
64
- Map<string, EmbeddingFunction>,
65
- ];
66
- arrowFields.push(new Field(key, sanitizeType(dtype), true));
67
- parseEmbeddingFunctions(embeddingFunctions, key, metadata);
68
- }
69
- });
70
- const registry = getRegistry();
71
- const metadata = registry.getTableMetadata(
72
- Array.from(embeddingFunctions.values()) as EmbeddingFunctionConfig[],
73
- );
74
- const schema = new Schema(arrowFields, metadata);
75
- return schema;
76
- }
77
-
78
- function parseEmbeddingFunctions(
79
- embeddingFunctions: Map<EmbeddingFunction, Partial<EmbeddingFunctionConfig>>,
80
- key: string,
81
- metadata: Map<string, EmbeddingFunction>,
82
- ): void {
83
- if (metadata.has("source_column_for")) {
84
- const embedFunction = metadata.get("source_column_for")!;
85
- const current = embeddingFunctions.get(embedFunction);
86
- if (current !== undefined) {
87
- embeddingFunctions.set(embedFunction, {
88
- ...current,
89
- sourceColumn: key,
90
- });
91
- } else {
92
- embeddingFunctions.set(embedFunction, {
93
- sourceColumn: key,
94
- function: embedFunction,
95
- });
96
- }
97
- } else if (metadata.has("vector_column_for")) {
98
- const embedFunction = metadata.get("vector_column_for")!;
99
-
100
- const current = embeddingFunctions.get(embedFunction);
101
- if (current !== undefined) {
102
- embeddingFunctions.set(embedFunction, {
103
- ...current,
104
- vectorColumn: key,
105
- });
106
- } else {
107
- embeddingFunctions.set(embedFunction, {
108
- vectorColumn: key,
109
- function: embedFunction,
110
- });
111
- }
112
- }
113
- }