@lancedb/lancedb 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +3 -3
- package/biome.json +19 -3
- package/dist/arrow.d.ts +42 -7
- package/dist/arrow.js +6 -5
- package/dist/connection.d.ts +55 -29
- package/dist/connection.js +22 -74
- package/dist/embedding/embedding_function.d.ts +11 -3
- package/dist/embedding/embedding_function.js +36 -12
- package/dist/embedding/openai.d.ts +6 -5
- package/dist/embedding/openai.js +4 -2
- package/dist/embedding/registry.d.ts +10 -11
- package/dist/embedding/registry.js +4 -0
- package/dist/index.d.ts +51 -3
- package/dist/index.js +28 -4
- package/dist/merge.d.ts +54 -0
- package/dist/merge.js +64 -0
- package/dist/native.d.ts +34 -7
- package/dist/native.js +26 -9
- package/dist/query.d.ts +51 -16
- package/dist/query.js +122 -21
- package/dist/remote/client.d.ts +28 -0
- package/dist/remote/client.js +172 -0
- package/dist/remote/connection.d.ts +25 -0
- package/dist/remote/connection.js +110 -0
- package/dist/remote/index.d.ts +3 -0
- package/dist/remote/index.js +9 -0
- package/dist/remote/table.d.ts +42 -0
- package/dist/remote/table.js +179 -0
- package/dist/sanitize.d.ts +3 -2
- package/dist/sanitize.js +55 -1
- package/dist/table.d.ts +116 -25
- package/dist/table.js +117 -233
- package/dist/util.d.ts +14 -0
- package/dist/util.js +65 -0
- package/examples/ann_indexes.ts +49 -0
- package/examples/basic.ts +149 -0
- package/examples/embedding.ts +83 -0
- package/examples/filtering.ts +34 -0
- package/examples/jsconfig.json +27 -0
- package/examples/package-lock.json +79 -0
- package/examples/package.json +18 -0
- package/examples/search.ts +37 -0
- package/lancedb/arrow.ts +87 -24
- package/lancedb/connection.ts +115 -92
- package/lancedb/embedding/embedding_function.ts +48 -16
- package/lancedb/embedding/openai.ts +11 -6
- package/lancedb/embedding/registry.ts +38 -22
- package/lancedb/index.ts +101 -2
- package/lancedb/merge.ts +70 -0
- package/lancedb/query.ts +168 -39
- package/lancedb/remote/client.ts +221 -0
- package/lancedb/remote/connection.ts +201 -0
- package/lancedb/remote/index.ts +3 -0
- package/lancedb/remote/table.ts +226 -0
- package/lancedb/sanitize.ts +73 -1
- package/lancedb/table.ts +344 -101
- package/lancedb/util.ts +69 -0
- package/native.d.ts +208 -0
- package/nodejs-artifacts/arrow.d.ts +42 -7
- package/nodejs-artifacts/arrow.js +6 -5
- package/nodejs-artifacts/connection.d.ts +55 -29
- package/nodejs-artifacts/connection.js +22 -74
- package/nodejs-artifacts/embedding/embedding_function.d.ts +11 -3
- package/nodejs-artifacts/embedding/embedding_function.js +36 -12
- package/nodejs-artifacts/embedding/openai.d.ts +6 -5
- package/nodejs-artifacts/embedding/openai.js +4 -2
- package/nodejs-artifacts/embedding/registry.d.ts +10 -11
- package/nodejs-artifacts/embedding/registry.js +4 -0
- package/nodejs-artifacts/index.d.ts +51 -3
- package/nodejs-artifacts/index.js +28 -4
- package/nodejs-artifacts/merge.d.ts +54 -0
- package/nodejs-artifacts/merge.js +64 -0
- package/nodejs-artifacts/native.d.ts +34 -7
- package/nodejs-artifacts/native.js +26 -9
- package/nodejs-artifacts/query.d.ts +51 -16
- package/nodejs-artifacts/query.js +122 -21
- package/nodejs-artifacts/remote/client.d.ts +28 -0
- package/nodejs-artifacts/remote/client.js +172 -0
- package/nodejs-artifacts/remote/connection.d.ts +25 -0
- package/nodejs-artifacts/remote/connection.js +110 -0
- package/nodejs-artifacts/remote/index.d.ts +3 -0
- package/nodejs-artifacts/remote/index.js +9 -0
- package/nodejs-artifacts/remote/table.d.ts +42 -0
- package/nodejs-artifacts/remote/table.js +179 -0
- package/nodejs-artifacts/sanitize.d.ts +3 -2
- package/nodejs-artifacts/sanitize.js +55 -1
- package/nodejs-artifacts/table.d.ts +116 -25
- package/nodejs-artifacts/table.js +117 -233
- package/nodejs-artifacts/util.d.ts +14 -0
- package/nodejs-artifacts/util.js +65 -0
- package/package.json +25 -11
package/lancedb/connection.ts
CHANGED
|
@@ -12,38 +12,11 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
17
|
-
fromTableToBuffer,
|
|
18
|
-
isArrowTable,
|
|
19
|
-
makeArrowTable,
|
|
20
|
-
makeEmptyTable,
|
|
21
|
-
} from "./arrow";
|
|
15
|
+
import { Data, Schema, SchemaLike, TableLike } from "./arrow";
|
|
16
|
+
import { fromTableToBuffer, makeEmptyTable } from "./arrow";
|
|
22
17
|
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
|
23
|
-
import {
|
|
24
|
-
import { Table } from "./table";
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Connect to a LanceDB instance at the given URI.
|
|
28
|
-
*
|
|
29
|
-
* Accepted formats:
|
|
30
|
-
*
|
|
31
|
-
* - `/path/to/database` - local database
|
|
32
|
-
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
33
|
-
* - `db://host:port` - remote database (LanceDB cloud)
|
|
34
|
-
* @param {string} uri - The uri of the database. If the database uri starts
|
|
35
|
-
* with `db://` then it connects to a remote database.
|
|
36
|
-
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
37
|
-
*/
|
|
38
|
-
export async function connect(
|
|
39
|
-
uri: string,
|
|
40
|
-
opts?: Partial<ConnectionOptions>,
|
|
41
|
-
): Promise<Connection> {
|
|
42
|
-
opts = opts ?? {};
|
|
43
|
-
opts.storageOptions = cleanseStorageOptions(opts.storageOptions);
|
|
44
|
-
const nativeConn = await LanceDbConnection.new(uri, opts);
|
|
45
|
-
return new Connection(nativeConn);
|
|
46
|
-
}
|
|
18
|
+
import { Connection as LanceDbConnection } from "./native";
|
|
19
|
+
import { LocalTable, Table } from "./table";
|
|
47
20
|
|
|
48
21
|
export interface CreateTableOptions {
|
|
49
22
|
/**
|
|
@@ -71,7 +44,13 @@ export interface CreateTableOptions {
|
|
|
71
44
|
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
72
45
|
*/
|
|
73
46
|
storageOptions?: Record<string, string>;
|
|
74
|
-
|
|
47
|
+
/**
|
|
48
|
+
* If true then data files will be written with the legacy format
|
|
49
|
+
*
|
|
50
|
+
* The default is true while the new format is in beta
|
|
51
|
+
*/
|
|
52
|
+
useLegacyFormat?: boolean;
|
|
53
|
+
schema?: SchemaLike;
|
|
75
54
|
embeddingFunction?: EmbeddingFunctionConfig;
|
|
76
55
|
}
|
|
77
56
|
|
|
@@ -111,7 +90,6 @@ export interface TableNamesOptions {
|
|
|
111
90
|
/** An optional limit to the number of results to return. */
|
|
112
91
|
limit?: number;
|
|
113
92
|
}
|
|
114
|
-
|
|
115
93
|
/**
|
|
116
94
|
* A LanceDB Connection that allows you to open tables and create new ones.
|
|
117
95
|
*
|
|
@@ -130,17 +108,15 @@ export interface TableNamesOptions {
|
|
|
130
108
|
* Any created tables are independent and will continue to work even if
|
|
131
109
|
* the underlying connection has been closed.
|
|
132
110
|
*/
|
|
133
|
-
export class Connection {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
constructor(inner: LanceDbConnection) {
|
|
137
|
-
this.inner = inner;
|
|
111
|
+
export abstract class Connection {
|
|
112
|
+
[Symbol.for("nodejs.util.inspect.custom")](): string {
|
|
113
|
+
return this.display();
|
|
138
114
|
}
|
|
139
115
|
|
|
140
|
-
/**
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
116
|
+
/**
|
|
117
|
+
* Return true if the connection has not been closed
|
|
118
|
+
*/
|
|
119
|
+
abstract isOpen(): boolean;
|
|
144
120
|
|
|
145
121
|
/**
|
|
146
122
|
* Close the connection, releasing any underlying resources.
|
|
@@ -149,14 +125,12 @@ export class Connection {
|
|
|
149
125
|
*
|
|
150
126
|
* Any attempt to use the connection after it is closed will result in an error.
|
|
151
127
|
*/
|
|
152
|
-
close(): void
|
|
153
|
-
this.inner.close();
|
|
154
|
-
}
|
|
128
|
+
abstract close(): void;
|
|
155
129
|
|
|
156
|
-
/**
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
130
|
+
/**
|
|
131
|
+
* Return a brief description of the connection
|
|
132
|
+
*/
|
|
133
|
+
abstract display(): string;
|
|
160
134
|
|
|
161
135
|
/**
|
|
162
136
|
* List all the table names in this database.
|
|
@@ -164,15 +138,86 @@ export class Connection {
|
|
|
164
138
|
* Tables will be returned in lexicographical order.
|
|
165
139
|
* @param {Partial<TableNamesOptions>} options - options to control the
|
|
166
140
|
* paging / start point
|
|
141
|
+
*
|
|
167
142
|
*/
|
|
168
|
-
|
|
169
|
-
return this.inner.tableNames(options?.startAfter, options?.limit);
|
|
170
|
-
}
|
|
143
|
+
abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
|
171
144
|
|
|
172
145
|
/**
|
|
173
146
|
* Open a table in the database.
|
|
174
147
|
* @param {string} name - The name of the table
|
|
175
148
|
*/
|
|
149
|
+
abstract openTable(
|
|
150
|
+
name: string,
|
|
151
|
+
options?: Partial<OpenTableOptions>,
|
|
152
|
+
): Promise<Table>;
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Creates a new Table and initialize it with new data.
|
|
156
|
+
* @param {object} options - The options object.
|
|
157
|
+
* @param {string} options.name - The name of the table.
|
|
158
|
+
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
|
159
|
+
*
|
|
160
|
+
*/
|
|
161
|
+
abstract createTable(
|
|
162
|
+
options: {
|
|
163
|
+
name: string;
|
|
164
|
+
data: Data;
|
|
165
|
+
} & Partial<CreateTableOptions>,
|
|
166
|
+
): Promise<Table>;
|
|
167
|
+
/**
|
|
168
|
+
* Creates a new Table and initialize it with new data.
|
|
169
|
+
* @param {string} name - The name of the table.
|
|
170
|
+
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
|
171
|
+
* to be inserted into the table
|
|
172
|
+
*/
|
|
173
|
+
abstract createTable(
|
|
174
|
+
name: string,
|
|
175
|
+
data: Record<string, unknown>[] | TableLike,
|
|
176
|
+
options?: Partial<CreateTableOptions>,
|
|
177
|
+
): Promise<Table>;
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Creates a new empty Table
|
|
181
|
+
* @param {string} name - The name of the table.
|
|
182
|
+
* @param {Schema} schema - The schema of the table
|
|
183
|
+
*/
|
|
184
|
+
abstract createEmptyTable(
|
|
185
|
+
name: string,
|
|
186
|
+
schema: import("./arrow").SchemaLike,
|
|
187
|
+
options?: Partial<CreateTableOptions>,
|
|
188
|
+
): Promise<Table>;
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Drop an existing table.
|
|
192
|
+
* @param {string} name The name of the table to drop.
|
|
193
|
+
*/
|
|
194
|
+
abstract dropTable(name: string): Promise<void>;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
export class LocalConnection extends Connection {
|
|
198
|
+
readonly inner: LanceDbConnection;
|
|
199
|
+
|
|
200
|
+
constructor(inner: LanceDbConnection) {
|
|
201
|
+
super();
|
|
202
|
+
this.inner = inner;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
isOpen(): boolean {
|
|
206
|
+
return this.inner.isOpen();
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
close(): void {
|
|
210
|
+
this.inner.close();
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
display(): string {
|
|
214
|
+
return this.inner.display();
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
|
|
218
|
+
return this.inner.tableNames(options?.startAfter, options?.limit);
|
|
219
|
+
}
|
|
220
|
+
|
|
176
221
|
async openTable(
|
|
177
222
|
name: string,
|
|
178
223
|
options?: Partial<OpenTableOptions>,
|
|
@@ -183,57 +228,38 @@ export class Connection {
|
|
|
183
228
|
options?.indexCacheSize,
|
|
184
229
|
);
|
|
185
230
|
|
|
186
|
-
return new
|
|
231
|
+
return new LocalTable(innerTable);
|
|
187
232
|
}
|
|
188
233
|
|
|
189
|
-
/**
|
|
190
|
-
* Creates a new Table and initialize it with new data.
|
|
191
|
-
* @param {string} name - The name of the table.
|
|
192
|
-
* @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
|
|
193
|
-
* to be inserted into the table
|
|
194
|
-
*/
|
|
195
234
|
async createTable(
|
|
196
|
-
|
|
197
|
-
|
|
235
|
+
nameOrOptions:
|
|
236
|
+
| string
|
|
237
|
+
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
|
238
|
+
data?: Record<string, unknown>[] | TableLike,
|
|
198
239
|
options?: Partial<CreateTableOptions>,
|
|
199
240
|
): Promise<Table> {
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
if (mode === "create" && existOk) {
|
|
204
|
-
mode = "exist_ok";
|
|
241
|
+
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
|
242
|
+
const { name, data, ...options } = nameOrOptions;
|
|
243
|
+
return this.createTable(name, data, options);
|
|
205
244
|
}
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
if (isArrowTable(data)) {
|
|
209
|
-
table = data;
|
|
210
|
-
} else {
|
|
211
|
-
table = makeArrowTable(data, options);
|
|
245
|
+
if (data === undefined) {
|
|
246
|
+
throw new Error("data is required");
|
|
212
247
|
}
|
|
213
|
-
|
|
214
|
-
const buf = await fromTableToBuffer(
|
|
215
|
-
table,
|
|
216
|
-
options?.embeddingFunction,
|
|
217
|
-
options?.schema,
|
|
218
|
-
);
|
|
248
|
+
const { buf, mode } = await Table.parseTableData(data, options);
|
|
219
249
|
const innerTable = await this.inner.createTable(
|
|
220
|
-
|
|
250
|
+
nameOrOptions,
|
|
221
251
|
buf,
|
|
222
252
|
mode,
|
|
223
253
|
cleanseStorageOptions(options?.storageOptions),
|
|
254
|
+
options?.useLegacyFormat,
|
|
224
255
|
);
|
|
225
256
|
|
|
226
|
-
return new
|
|
257
|
+
return new LocalTable(innerTable);
|
|
227
258
|
}
|
|
228
259
|
|
|
229
|
-
/**
|
|
230
|
-
* Creates a new empty Table
|
|
231
|
-
* @param {string} name - The name of the table.
|
|
232
|
-
* @param {Schema} schema - The schema of the table
|
|
233
|
-
*/
|
|
234
260
|
async createEmptyTable(
|
|
235
261
|
name: string,
|
|
236
|
-
schema:
|
|
262
|
+
schema: import("./arrow").SchemaLike,
|
|
237
263
|
options?: Partial<CreateTableOptions>,
|
|
238
264
|
): Promise<Table> {
|
|
239
265
|
let mode: string = options?.mode ?? "create";
|
|
@@ -256,14 +282,11 @@ export class Connection {
|
|
|
256
282
|
buf,
|
|
257
283
|
mode,
|
|
258
284
|
cleanseStorageOptions(options?.storageOptions),
|
|
285
|
+
options?.useLegacyFormat,
|
|
259
286
|
);
|
|
260
|
-
return new
|
|
287
|
+
return new LocalTable(innerTable);
|
|
261
288
|
}
|
|
262
289
|
|
|
263
|
-
/**
|
|
264
|
-
* Drop an existing table.
|
|
265
|
-
* @param {string} name The name of the table to drop.
|
|
266
|
-
*/
|
|
267
290
|
async dropTable(name: string): Promise<void> {
|
|
268
291
|
return this.inner.dropTable(name);
|
|
269
292
|
}
|
|
@@ -272,7 +295,7 @@ export class Connection {
|
|
|
272
295
|
/**
|
|
273
296
|
* Takes storage options and makes all the keys snake case.
|
|
274
297
|
*/
|
|
275
|
-
function cleanseStorageOptions(
|
|
298
|
+
export function cleanseStorageOptions(
|
|
276
299
|
options?: Record<string, string>,
|
|
277
300
|
): Record<string, string> | undefined {
|
|
278
301
|
if (options === undefined) {
|
|
@@ -19,6 +19,7 @@ import {
|
|
|
19
19
|
FixedSizeList,
|
|
20
20
|
Float,
|
|
21
21
|
Float32,
|
|
22
|
+
type IntoVector,
|
|
22
23
|
isDataType,
|
|
23
24
|
isFixedSizeList,
|
|
24
25
|
isFloat,
|
|
@@ -34,6 +35,11 @@ export interface FunctionOptions {
|
|
|
34
35
|
[key: string]: any;
|
|
35
36
|
}
|
|
36
37
|
|
|
38
|
+
export interface EmbeddingFunctionConstructor<
|
|
39
|
+
T extends EmbeddingFunction = EmbeddingFunction,
|
|
40
|
+
> {
|
|
41
|
+
new (modelOptions?: T["TOptions"]): T;
|
|
42
|
+
}
|
|
37
43
|
/**
|
|
38
44
|
* An embedding function that automatically creates vector representation for a given column.
|
|
39
45
|
*/
|
|
@@ -42,6 +48,12 @@ export abstract class EmbeddingFunction<
|
|
|
42
48
|
T = any,
|
|
43
49
|
M extends FunctionOptions = FunctionOptions,
|
|
44
50
|
> {
|
|
51
|
+
/**
|
|
52
|
+
* @ignore
|
|
53
|
+
* This is only used for associating the options type with the class for type checking
|
|
54
|
+
*/
|
|
55
|
+
// biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
|
|
56
|
+
readonly TOptions!: M;
|
|
45
57
|
/**
|
|
46
58
|
* Convert the embedding function to a JSON object
|
|
47
59
|
* It is used to serialize the embedding function to the schema
|
|
@@ -100,33 +112,55 @@ export abstract class EmbeddingFunction<
|
|
|
100
112
|
* @see {@link lancedb.LanceSchema}
|
|
101
113
|
*/
|
|
102
114
|
vectorField(
|
|
103
|
-
|
|
115
|
+
optionsOrDatatype?: Partial<FieldOptions> | DataType,
|
|
104
116
|
): [DataType, Map<string, EmbeddingFunction>] {
|
|
105
|
-
let dtype: DataType;
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
dtype =
|
|
117
|
+
let dtype: DataType | undefined;
|
|
118
|
+
let vectorType: DataType;
|
|
119
|
+
let dims: number | undefined = this.ndims();
|
|
120
|
+
|
|
121
|
+
// `func.vectorField(new Float32())`
|
|
122
|
+
if (isDataType(optionsOrDatatype)) {
|
|
123
|
+
dtype = optionsOrDatatype;
|
|
112
124
|
} else {
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
125
|
+
// `func.vectorField({
|
|
126
|
+
// datatype: new Float32(),
|
|
127
|
+
// dims: 10
|
|
128
|
+
// })`
|
|
129
|
+
dims = dims ?? optionsOrDatatype?.dims;
|
|
130
|
+
dtype = optionsOrDatatype?.datatype;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (dtype !== undefined) {
|
|
134
|
+
// `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
|
|
135
|
+
// or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
|
|
136
|
+
if (isFixedSizeList(dtype)) {
|
|
137
|
+
vectorType = dtype;
|
|
138
|
+
// `func.vectorField(new Float32())`
|
|
139
|
+
// or `func.vectorField({datatype: new Float32()})`
|
|
140
|
+
} else if (isFloat(dtype)) {
|
|
141
|
+
// No `ndims` impl and no `{dims: n}` provided;
|
|
116
142
|
if (dims === undefined) {
|
|
117
143
|
throw new Error("ndims is required for vector field");
|
|
118
144
|
}
|
|
119
|
-
|
|
145
|
+
vectorType = newVectorType(dims, dtype);
|
|
120
146
|
} else {
|
|
121
147
|
throw new Error(
|
|
122
148
|
"Expected FixedSizeList or Float as datatype for vector field",
|
|
123
149
|
);
|
|
124
150
|
}
|
|
151
|
+
} else {
|
|
152
|
+
if (dims === undefined) {
|
|
153
|
+
throw new Error("ndims is required for vector field");
|
|
154
|
+
}
|
|
155
|
+
vectorType = new FixedSizeList(
|
|
156
|
+
dims,
|
|
157
|
+
new Field("item", new Float32(), true),
|
|
158
|
+
);
|
|
125
159
|
}
|
|
126
160
|
const metadata = new Map<string, EmbeddingFunction>();
|
|
127
161
|
metadata.set("vector_column_for", this);
|
|
128
162
|
|
|
129
|
-
return [
|
|
163
|
+
return [vectorType, metadata];
|
|
130
164
|
}
|
|
131
165
|
|
|
132
166
|
/** The number of dimensions of the embeddings */
|
|
@@ -147,9 +181,7 @@ export abstract class EmbeddingFunction<
|
|
|
147
181
|
/**
|
|
148
182
|
Compute the embeddings for a single query
|
|
149
183
|
*/
|
|
150
|
-
async computeQueryEmbeddings(
|
|
151
|
-
data: T,
|
|
152
|
-
): Promise<number[] | Float32Array | Float64Array> {
|
|
184
|
+
async computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>> {
|
|
153
185
|
return this.computeSourceEmbeddings([data]).then(
|
|
154
186
|
(embeddings) => embeddings[0],
|
|
155
187
|
);
|
|
@@ -13,24 +13,29 @@
|
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
15
|
import type OpenAI from "openai";
|
|
16
|
+
import { type EmbeddingCreateParams } from "openai/resources";
|
|
16
17
|
import { Float, Float32 } from "../arrow";
|
|
17
18
|
import { EmbeddingFunction } from "./embedding_function";
|
|
18
19
|
import { register } from "./registry";
|
|
19
20
|
|
|
20
21
|
export type OpenAIOptions = {
|
|
21
|
-
apiKey
|
|
22
|
-
model
|
|
22
|
+
apiKey: string;
|
|
23
|
+
model: EmbeddingCreateParams["model"];
|
|
23
24
|
};
|
|
24
25
|
|
|
25
26
|
@register("openai")
|
|
26
27
|
export class OpenAIEmbeddingFunction extends EmbeddingFunction<
|
|
27
28
|
string,
|
|
28
|
-
OpenAIOptions
|
|
29
|
+
Partial<OpenAIOptions>
|
|
29
30
|
> {
|
|
30
31
|
#openai: OpenAI;
|
|
31
|
-
#modelName:
|
|
32
|
+
#modelName: OpenAIOptions["model"];
|
|
32
33
|
|
|
33
|
-
constructor(
|
|
34
|
+
constructor(
|
|
35
|
+
options: Partial<OpenAIOptions> = {
|
|
36
|
+
model: "text-embedding-ada-002",
|
|
37
|
+
},
|
|
38
|
+
) {
|
|
34
39
|
super();
|
|
35
40
|
const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
|
|
36
41
|
if (!openAIKey) {
|
|
@@ -73,7 +78,7 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
|
|
|
73
78
|
case "text-embedding-3-small":
|
|
74
79
|
return 1536;
|
|
75
80
|
default:
|
|
76
|
-
|
|
81
|
+
throw new Error(`Unknown model: ${this.#modelName}`);
|
|
77
82
|
}
|
|
78
83
|
}
|
|
79
84
|
|
|
@@ -12,21 +12,15 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import
|
|
15
|
+
import {
|
|
16
|
+
type EmbeddingFunction,
|
|
17
|
+
type EmbeddingFunctionConstructor,
|
|
18
|
+
} from "./embedding_function";
|
|
16
19
|
import "reflect-metadata";
|
|
17
|
-
|
|
18
|
-
export interface EmbeddingFunctionOptions {
|
|
19
|
-
[key: string]: unknown;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface EmbeddingFunctionFactory<
|
|
23
|
-
T extends EmbeddingFunction = EmbeddingFunction,
|
|
24
|
-
> {
|
|
25
|
-
new (modelOptions?: EmbeddingFunctionOptions): T;
|
|
26
|
-
}
|
|
20
|
+
import { OpenAIEmbeddingFunction } from "./openai";
|
|
27
21
|
|
|
28
22
|
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
|
29
|
-
create(options?:
|
|
23
|
+
create(options?: T["TOptions"]): T;
|
|
30
24
|
}
|
|
31
25
|
|
|
32
26
|
/**
|
|
@@ -36,14 +30,17 @@ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
|
|
36
30
|
* or TextEmbeddingFunction and registering it with the registry
|
|
37
31
|
*/
|
|
38
32
|
export class EmbeddingFunctionRegistry {
|
|
39
|
-
#functions
|
|
33
|
+
#functions = new Map<string, EmbeddingFunctionConstructor>();
|
|
40
34
|
|
|
41
35
|
/**
|
|
42
36
|
* Register an embedding function
|
|
43
37
|
* @param name The name of the function
|
|
44
38
|
* @param func The function to register
|
|
39
|
+
* @throws Error if the function is already registered
|
|
45
40
|
*/
|
|
46
|
-
register<
|
|
41
|
+
register<
|
|
42
|
+
T extends EmbeddingFunctionConstructor = EmbeddingFunctionConstructor,
|
|
43
|
+
>(
|
|
47
44
|
this: EmbeddingFunctionRegistry,
|
|
48
45
|
alias?: string,
|
|
49
46
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
@@ -68,18 +65,34 @@ export class EmbeddingFunctionRegistry {
|
|
|
68
65
|
* Fetch an embedding function by name
|
|
69
66
|
* @param name The name of the function
|
|
70
67
|
*/
|
|
71
|
-
get<T extends EmbeddingFunction<unknown
|
|
72
|
-
name: string,
|
|
73
|
-
|
|
68
|
+
get<T extends EmbeddingFunction<unknown>, Name extends string = "">(
|
|
69
|
+
name: Name extends "openai" ? "openai" : string,
|
|
70
|
+
//This makes it so that you can use string constants as "types", or use an explicitly supplied type
|
|
71
|
+
// ex:
|
|
72
|
+
// `registry.get("openai") -> EmbeddingFunctionCreate<OpenAIEmbeddingFunction>`
|
|
73
|
+
// `registry.get<MyCustomEmbeddingFunction>("my_func") -> EmbeddingFunctionCreate<MyCustomEmbeddingFunction> | undefined`
|
|
74
|
+
//
|
|
75
|
+
// the reason this is important is that we always know our built in functions are defined so the user isnt forced to do a non null/undefined
|
|
76
|
+
// ```ts
|
|
77
|
+
// const openai: OpenAIEmbeddingFunction = registry.get("openai").create()
|
|
78
|
+
// ```
|
|
79
|
+
): Name extends "openai"
|
|
80
|
+
? EmbeddingFunctionCreate<OpenAIEmbeddingFunction>
|
|
81
|
+
: EmbeddingFunctionCreate<T> | undefined {
|
|
82
|
+
type Output = Name extends "openai"
|
|
83
|
+
? EmbeddingFunctionCreate<OpenAIEmbeddingFunction>
|
|
84
|
+
: EmbeddingFunctionCreate<T> | undefined;
|
|
85
|
+
|
|
74
86
|
const factory = this.#functions.get(name);
|
|
75
87
|
if (!factory) {
|
|
76
|
-
return undefined;
|
|
88
|
+
return undefined as Output;
|
|
77
89
|
}
|
|
90
|
+
|
|
78
91
|
return {
|
|
79
|
-
create: function (options
|
|
80
|
-
return new factory(options)
|
|
92
|
+
create: function (options?: T["TOptions"]) {
|
|
93
|
+
return new factory(options);
|
|
81
94
|
},
|
|
82
|
-
};
|
|
95
|
+
} as Output;
|
|
83
96
|
}
|
|
84
97
|
|
|
85
98
|
/**
|
|
@@ -89,6 +102,9 @@ export class EmbeddingFunctionRegistry {
|
|
|
89
102
|
this.#functions.clear();
|
|
90
103
|
}
|
|
91
104
|
|
|
105
|
+
/**
|
|
106
|
+
* @ignore
|
|
107
|
+
*/
|
|
92
108
|
parseFunctions(
|
|
93
109
|
this: EmbeddingFunctionRegistry,
|
|
94
110
|
metadata: Map<string, string>,
|
|
@@ -100,7 +116,7 @@ export class EmbeddingFunctionRegistry {
|
|
|
100
116
|
name: string;
|
|
101
117
|
sourceColumn: string;
|
|
102
118
|
vectorColumn: string;
|
|
103
|
-
model:
|
|
119
|
+
model: EmbeddingFunction["TOptions"];
|
|
104
120
|
};
|
|
105
121
|
const functions = <FunctionConfig[]>(
|
|
106
122
|
JSON.parse(metadata.get("embedding_functions")!)
|
package/lancedb/index.ts
CHANGED
|
@@ -12,25 +12,43 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
+
import {
|
|
16
|
+
Connection,
|
|
17
|
+
LocalConnection,
|
|
18
|
+
cleanseStorageOptions,
|
|
19
|
+
} from "./connection";
|
|
20
|
+
|
|
21
|
+
import {
|
|
22
|
+
ConnectionOptions,
|
|
23
|
+
Connection as LanceDbConnection,
|
|
24
|
+
} from "./native.js";
|
|
25
|
+
|
|
26
|
+
import { RemoteConnection, RemoteConnectionOptions } from "./remote";
|
|
27
|
+
|
|
15
28
|
export {
|
|
16
29
|
WriteOptions,
|
|
17
30
|
WriteMode,
|
|
18
31
|
AddColumnsSql,
|
|
19
32
|
ColumnAlteration,
|
|
20
33
|
ConnectionOptions,
|
|
34
|
+
IndexStatistics,
|
|
35
|
+
IndexMetadata,
|
|
36
|
+
IndexConfig,
|
|
21
37
|
} from "./native.js";
|
|
38
|
+
|
|
22
39
|
export {
|
|
23
40
|
makeArrowTable,
|
|
24
41
|
MakeArrowTableOptions,
|
|
25
42
|
Data,
|
|
26
43
|
VectorColumnOptions,
|
|
27
44
|
} from "./arrow";
|
|
45
|
+
|
|
28
46
|
export {
|
|
29
|
-
connect,
|
|
30
47
|
Connection,
|
|
31
48
|
CreateTableOptions,
|
|
32
49
|
TableNamesOptions,
|
|
33
50
|
} from "./connection";
|
|
51
|
+
|
|
34
52
|
export {
|
|
35
53
|
ExecutableQuery,
|
|
36
54
|
Query,
|
|
@@ -38,6 +56,87 @@ export {
|
|
|
38
56
|
VectorQuery,
|
|
39
57
|
RecordBatchIterator,
|
|
40
58
|
} from "./query";
|
|
59
|
+
|
|
41
60
|
export { Index, IndexOptions, IvfPqOptions } from "./indices";
|
|
42
|
-
|
|
61
|
+
|
|
62
|
+
export { Table, AddDataOptions, UpdateOptions } from "./table";
|
|
63
|
+
|
|
43
64
|
export * as embedding from "./embedding";
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Connect to a LanceDB instance at the given URI.
|
|
68
|
+
*
|
|
69
|
+
* Accepted formats:
|
|
70
|
+
*
|
|
71
|
+
* - `/path/to/database` - local database
|
|
72
|
+
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
73
|
+
* - `db://host:port` - remote database (LanceDB cloud)
|
|
74
|
+
* @param {string} uri - The uri of the database. If the database uri starts
|
|
75
|
+
* with `db://` then it connects to a remote database.
|
|
76
|
+
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
77
|
+
* @example
|
|
78
|
+
* ```ts
|
|
79
|
+
* const conn = await connect("/path/to/database");
|
|
80
|
+
* ```
|
|
81
|
+
* @example
|
|
82
|
+
* ```ts
|
|
83
|
+
* const conn = await connect(
|
|
84
|
+
* "s3://bucket/path/to/database",
|
|
85
|
+
* {storageOptions: {timeout: "60s"}
|
|
86
|
+
* });
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
export async function connect(
|
|
90
|
+
uri: string,
|
|
91
|
+
opts?: Partial<ConnectionOptions | RemoteConnectionOptions>,
|
|
92
|
+
): Promise<Connection>;
|
|
93
|
+
/**
|
|
94
|
+
* Connect to a LanceDB instance at the given URI.
|
|
95
|
+
*
|
|
96
|
+
* Accepted formats:
|
|
97
|
+
*
|
|
98
|
+
* - `/path/to/database` - local database
|
|
99
|
+
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
100
|
+
* - `db://host:port` - remote database (LanceDB cloud)
|
|
101
|
+
* @param options - The options to use when connecting to the database
|
|
102
|
+
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* const conn = await connect({
|
|
106
|
+
* uri: "/path/to/database",
|
|
107
|
+
* storageOptions: {timeout: "60s"}
|
|
108
|
+
* });
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
export async function connect(
|
|
112
|
+
opts: Partial<RemoteConnectionOptions | ConnectionOptions> & { uri: string },
|
|
113
|
+
): Promise<Connection>;
|
|
114
|
+
export async function connect(
|
|
115
|
+
uriOrOptions:
|
|
116
|
+
| string
|
|
117
|
+
| (Partial<RemoteConnectionOptions | ConnectionOptions> & { uri: string }),
|
|
118
|
+
opts: Partial<ConnectionOptions | RemoteConnectionOptions> = {},
|
|
119
|
+
): Promise<Connection> {
|
|
120
|
+
let uri: string | undefined;
|
|
121
|
+
if (typeof uriOrOptions !== "string") {
|
|
122
|
+
const { uri: uri_, ...options } = uriOrOptions;
|
|
123
|
+
uri = uri_;
|
|
124
|
+
opts = options;
|
|
125
|
+
} else {
|
|
126
|
+
uri = uriOrOptions;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (!uri) {
|
|
130
|
+
throw new Error("uri is required");
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (uri?.startsWith("db://")) {
|
|
134
|
+
return new RemoteConnection(uri, opts as RemoteConnectionOptions);
|
|
135
|
+
}
|
|
136
|
+
opts = (opts as ConnectionOptions) ?? {};
|
|
137
|
+
(<ConnectionOptions>opts).storageOptions = cleanseStorageOptions(
|
|
138
|
+
(<ConnectionOptions>opts).storageOptions,
|
|
139
|
+
);
|
|
140
|
+
const nativeConn = await LanceDbConnection.new(uri, opts);
|
|
141
|
+
return new LocalConnection(nativeConn);
|
|
142
|
+
}
|