@lancedb/lancedb 0.5.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +3 -3
- package/biome.json +19 -3
- package/dist/arrow.d.ts +41 -8
- package/dist/arrow.js +4 -4
- package/dist/connection.d.ts +49 -29
- package/dist/connection.js +21 -73
- package/dist/embedding/embedding_function.d.ts +9 -1
- package/dist/embedding/embedding_function.js +6 -0
- package/dist/embedding/openai.d.ts +6 -5
- package/dist/embedding/openai.js +4 -2
- package/dist/embedding/registry.d.ts +6 -11
- package/dist/index.d.ts +51 -3
- package/dist/index.js +28 -4
- package/dist/merge.d.ts +54 -0
- package/dist/merge.js +64 -0
- package/dist/native.d.ts +29 -3
- package/dist/native.js +26 -9
- package/dist/query.d.ts +33 -10
- package/dist/query.js +100 -13
- package/dist/remote/client.d.ts +28 -0
- package/dist/remote/client.js +172 -0
- package/dist/remote/connection.d.ts +25 -0
- package/dist/remote/connection.js +110 -0
- package/dist/remote/index.d.ts +3 -0
- package/dist/remote/index.js +9 -0
- package/dist/remote/table.d.ts +42 -0
- package/dist/remote/table.js +179 -0
- package/dist/sanitize.d.ts +3 -2
- package/dist/sanitize.js +55 -1
- package/dist/table.d.ts +105 -30
- package/dist/table.js +94 -237
- package/dist/util.d.ts +14 -0
- package/dist/util.js +65 -0
- package/examples/ann_indexes.ts +49 -0
- package/examples/basic.ts +149 -0
- package/examples/embedding.ts +83 -0
- package/examples/filtering.ts +34 -0
- package/examples/jsconfig.json +27 -0
- package/examples/package-lock.json +79 -0
- package/examples/package.json +18 -0
- package/examples/search.ts +37 -0
- package/lancedb/arrow.ts +80 -23
- package/lancedb/connection.ts +107 -92
- package/lancedb/embedding/embedding_function.ts +12 -1
- package/lancedb/embedding/openai.ts +11 -6
- package/lancedb/embedding/registry.ts +34 -22
- package/lancedb/index.ts +101 -2
- package/lancedb/merge.ts +70 -0
- package/lancedb/query.ts +114 -28
- package/lancedb/remote/client.ts +221 -0
- package/lancedb/remote/connection.ts +201 -0
- package/lancedb/remote/index.ts +3 -0
- package/lancedb/remote/table.ts +226 -0
- package/lancedb/sanitize.ts +73 -1
- package/lancedb/table.ts +320 -132
- package/lancedb/util.ts +69 -0
- package/native.d.ts +208 -0
- package/nodejs-artifacts/arrow.d.ts +41 -8
- package/nodejs-artifacts/arrow.js +4 -4
- package/nodejs-artifacts/connection.d.ts +49 -29
- package/nodejs-artifacts/connection.js +21 -73
- package/nodejs-artifacts/embedding/embedding_function.d.ts +9 -1
- package/nodejs-artifacts/embedding/embedding_function.js +6 -0
- package/nodejs-artifacts/embedding/openai.d.ts +6 -5
- package/nodejs-artifacts/embedding/openai.js +4 -2
- package/nodejs-artifacts/embedding/registry.d.ts +6 -11
- package/nodejs-artifacts/index.d.ts +51 -3
- package/nodejs-artifacts/index.js +28 -4
- package/nodejs-artifacts/merge.d.ts +54 -0
- package/nodejs-artifacts/merge.js +64 -0
- package/nodejs-artifacts/native.d.ts +29 -3
- package/nodejs-artifacts/native.js +26 -9
- package/nodejs-artifacts/query.d.ts +33 -10
- package/nodejs-artifacts/query.js +100 -13
- package/nodejs-artifacts/remote/client.d.ts +28 -0
- package/nodejs-artifacts/remote/client.js +172 -0
- package/nodejs-artifacts/remote/connection.d.ts +25 -0
- package/nodejs-artifacts/remote/connection.js +110 -0
- package/nodejs-artifacts/remote/index.d.ts +3 -0
- package/nodejs-artifacts/remote/index.js +9 -0
- package/nodejs-artifacts/remote/table.d.ts +42 -0
- package/nodejs-artifacts/remote/table.js +179 -0
- package/nodejs-artifacts/sanitize.d.ts +3 -2
- package/nodejs-artifacts/sanitize.js +55 -1
- package/nodejs-artifacts/table.d.ts +105 -30
- package/nodejs-artifacts/table.js +94 -237
- package/nodejs-artifacts/util.d.ts +14 -0
- package/nodejs-artifacts/util.js +65 -0
- package/package.json +25 -11
package/lancedb/connection.ts
CHANGED
|
@@ -12,38 +12,11 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
17
|
-
fromTableToBuffer,
|
|
18
|
-
isArrowTable,
|
|
19
|
-
makeArrowTable,
|
|
20
|
-
makeEmptyTable,
|
|
21
|
-
} from "./arrow";
|
|
15
|
+
import { Data, Schema, SchemaLike, TableLike } from "./arrow";
|
|
16
|
+
import { fromTableToBuffer, makeEmptyTable } from "./arrow";
|
|
22
17
|
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
|
23
|
-
import {
|
|
24
|
-
import { Table } from "./table";
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Connect to a LanceDB instance at the given URI.
|
|
28
|
-
*
|
|
29
|
-
* Accepted formats:
|
|
30
|
-
*
|
|
31
|
-
* - `/path/to/database` - local database
|
|
32
|
-
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
33
|
-
* - `db://host:port` - remote database (LanceDB cloud)
|
|
34
|
-
* @param {string} uri - The uri of the database. If the database uri starts
|
|
35
|
-
* with `db://` then it connects to a remote database.
|
|
36
|
-
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
37
|
-
*/
|
|
38
|
-
export async function connect(
|
|
39
|
-
uri: string,
|
|
40
|
-
opts?: Partial<ConnectionOptions>,
|
|
41
|
-
): Promise<Connection> {
|
|
42
|
-
opts = opts ?? {};
|
|
43
|
-
opts.storageOptions = cleanseStorageOptions(opts.storageOptions);
|
|
44
|
-
const nativeConn = await LanceDbConnection.new(uri, opts);
|
|
45
|
-
return new Connection(nativeConn);
|
|
46
|
-
}
|
|
18
|
+
import { Connection as LanceDbConnection } from "./native";
|
|
19
|
+
import { LocalTable, Table } from "./table";
|
|
47
20
|
|
|
48
21
|
export interface CreateTableOptions {
|
|
49
22
|
/**
|
|
@@ -77,7 +50,7 @@ export interface CreateTableOptions {
|
|
|
77
50
|
* The default is true while the new format is in beta
|
|
78
51
|
*/
|
|
79
52
|
useLegacyFormat?: boolean;
|
|
80
|
-
schema?:
|
|
53
|
+
schema?: SchemaLike;
|
|
81
54
|
embeddingFunction?: EmbeddingFunctionConfig;
|
|
82
55
|
}
|
|
83
56
|
|
|
@@ -117,7 +90,6 @@ export interface TableNamesOptions {
|
|
|
117
90
|
/** An optional limit to the number of results to return. */
|
|
118
91
|
limit?: number;
|
|
119
92
|
}
|
|
120
|
-
|
|
121
93
|
/**
|
|
122
94
|
* A LanceDB Connection that allows you to open tables and create new ones.
|
|
123
95
|
*
|
|
@@ -136,17 +108,15 @@ export interface TableNamesOptions {
|
|
|
136
108
|
* Any created tables are independent and will continue to work even if
|
|
137
109
|
* the underlying connection has been closed.
|
|
138
110
|
*/
|
|
139
|
-
export class Connection {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
constructor(inner: LanceDbConnection) {
|
|
143
|
-
this.inner = inner;
|
|
111
|
+
export abstract class Connection {
|
|
112
|
+
[Symbol.for("nodejs.util.inspect.custom")](): string {
|
|
113
|
+
return this.display();
|
|
144
114
|
}
|
|
145
115
|
|
|
146
|
-
/**
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
116
|
+
/**
|
|
117
|
+
* Return true if the connection has not been closed
|
|
118
|
+
*/
|
|
119
|
+
abstract isOpen(): boolean;
|
|
150
120
|
|
|
151
121
|
/**
|
|
152
122
|
* Close the connection, releasing any underlying resources.
|
|
@@ -155,14 +125,12 @@ export class Connection {
|
|
|
155
125
|
*
|
|
156
126
|
* Any attempt to use the connection after it is closed will result in an error.
|
|
157
127
|
*/
|
|
158
|
-
close(): void
|
|
159
|
-
this.inner.close();
|
|
160
|
-
}
|
|
128
|
+
abstract close(): void;
|
|
161
129
|
|
|
162
|
-
/**
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
130
|
+
/**
|
|
131
|
+
* Return a brief description of the connection
|
|
132
|
+
*/
|
|
133
|
+
abstract display(): string;
|
|
166
134
|
|
|
167
135
|
/**
|
|
168
136
|
* List all the table names in this database.
|
|
@@ -170,15 +138,86 @@ export class Connection {
|
|
|
170
138
|
* Tables will be returned in lexicographical order.
|
|
171
139
|
* @param {Partial<TableNamesOptions>} options - options to control the
|
|
172
140
|
* paging / start point
|
|
141
|
+
*
|
|
173
142
|
*/
|
|
174
|
-
|
|
175
|
-
return this.inner.tableNames(options?.startAfter, options?.limit);
|
|
176
|
-
}
|
|
143
|
+
abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
|
177
144
|
|
|
178
145
|
/**
|
|
179
146
|
* Open a table in the database.
|
|
180
147
|
* @param {string} name - The name of the table
|
|
181
148
|
*/
|
|
149
|
+
abstract openTable(
|
|
150
|
+
name: string,
|
|
151
|
+
options?: Partial<OpenTableOptions>,
|
|
152
|
+
): Promise<Table>;
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Creates a new Table and initialize it with new data.
|
|
156
|
+
* @param {object} options - The options object.
|
|
157
|
+
* @param {string} options.name - The name of the table.
|
|
158
|
+
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
|
159
|
+
*
|
|
160
|
+
*/
|
|
161
|
+
abstract createTable(
|
|
162
|
+
options: {
|
|
163
|
+
name: string;
|
|
164
|
+
data: Data;
|
|
165
|
+
} & Partial<CreateTableOptions>,
|
|
166
|
+
): Promise<Table>;
|
|
167
|
+
/**
|
|
168
|
+
* Creates a new Table and initialize it with new data.
|
|
169
|
+
* @param {string} name - The name of the table.
|
|
170
|
+
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
|
171
|
+
* to be inserted into the table
|
|
172
|
+
*/
|
|
173
|
+
abstract createTable(
|
|
174
|
+
name: string,
|
|
175
|
+
data: Record<string, unknown>[] | TableLike,
|
|
176
|
+
options?: Partial<CreateTableOptions>,
|
|
177
|
+
): Promise<Table>;
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Creates a new empty Table
|
|
181
|
+
* @param {string} name - The name of the table.
|
|
182
|
+
* @param {Schema} schema - The schema of the table
|
|
183
|
+
*/
|
|
184
|
+
abstract createEmptyTable(
|
|
185
|
+
name: string,
|
|
186
|
+
schema: import("./arrow").SchemaLike,
|
|
187
|
+
options?: Partial<CreateTableOptions>,
|
|
188
|
+
): Promise<Table>;
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Drop an existing table.
|
|
192
|
+
* @param {string} name The name of the table to drop.
|
|
193
|
+
*/
|
|
194
|
+
abstract dropTable(name: string): Promise<void>;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
export class LocalConnection extends Connection {
|
|
198
|
+
readonly inner: LanceDbConnection;
|
|
199
|
+
|
|
200
|
+
constructor(inner: LanceDbConnection) {
|
|
201
|
+
super();
|
|
202
|
+
this.inner = inner;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
isOpen(): boolean {
|
|
206
|
+
return this.inner.isOpen();
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
close(): void {
|
|
210
|
+
this.inner.close();
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
display(): string {
|
|
214
|
+
return this.inner.display();
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
|
|
218
|
+
return this.inner.tableNames(options?.startAfter, options?.limit);
|
|
219
|
+
}
|
|
220
|
+
|
|
182
221
|
async openTable(
|
|
183
222
|
name: string,
|
|
184
223
|
options?: Partial<OpenTableOptions>,
|
|
@@ -189,58 +228,38 @@ export class Connection {
|
|
|
189
228
|
options?.indexCacheSize,
|
|
190
229
|
);
|
|
191
230
|
|
|
192
|
-
return new
|
|
231
|
+
return new LocalTable(innerTable);
|
|
193
232
|
}
|
|
194
233
|
|
|
195
|
-
/**
|
|
196
|
-
* Creates a new Table and initialize it with new data.
|
|
197
|
-
* @param {string} name - The name of the table.
|
|
198
|
-
* @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
|
|
199
|
-
* to be inserted into the table
|
|
200
|
-
*/
|
|
201
234
|
async createTable(
|
|
202
|
-
|
|
203
|
-
|
|
235
|
+
nameOrOptions:
|
|
236
|
+
| string
|
|
237
|
+
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
|
238
|
+
data?: Record<string, unknown>[] | TableLike,
|
|
204
239
|
options?: Partial<CreateTableOptions>,
|
|
205
240
|
): Promise<Table> {
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
if (mode === "create" && existOk) {
|
|
210
|
-
mode = "exist_ok";
|
|
241
|
+
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
|
242
|
+
const { name, data, ...options } = nameOrOptions;
|
|
243
|
+
return this.createTable(name, data, options);
|
|
211
244
|
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
if (isArrowTable(data)) {
|
|
215
|
-
table = data;
|
|
216
|
-
} else {
|
|
217
|
-
table = makeArrowTable(data, options);
|
|
245
|
+
if (data === undefined) {
|
|
246
|
+
throw new Error("data is required");
|
|
218
247
|
}
|
|
219
|
-
|
|
220
|
-
const buf = await fromTableToBuffer(
|
|
221
|
-
table,
|
|
222
|
-
options?.embeddingFunction,
|
|
223
|
-
options?.schema,
|
|
224
|
-
);
|
|
248
|
+
const { buf, mode } = await Table.parseTableData(data, options);
|
|
225
249
|
const innerTable = await this.inner.createTable(
|
|
226
|
-
|
|
250
|
+
nameOrOptions,
|
|
227
251
|
buf,
|
|
228
252
|
mode,
|
|
229
253
|
cleanseStorageOptions(options?.storageOptions),
|
|
230
254
|
options?.useLegacyFormat,
|
|
231
255
|
);
|
|
232
256
|
|
|
233
|
-
return new
|
|
257
|
+
return new LocalTable(innerTable);
|
|
234
258
|
}
|
|
235
259
|
|
|
236
|
-
/**
|
|
237
|
-
* Creates a new empty Table
|
|
238
|
-
* @param {string} name - The name of the table.
|
|
239
|
-
* @param {Schema} schema - The schema of the table
|
|
240
|
-
*/
|
|
241
260
|
async createEmptyTable(
|
|
242
261
|
name: string,
|
|
243
|
-
schema:
|
|
262
|
+
schema: import("./arrow").SchemaLike,
|
|
244
263
|
options?: Partial<CreateTableOptions>,
|
|
245
264
|
): Promise<Table> {
|
|
246
265
|
let mode: string = options?.mode ?? "create";
|
|
@@ -265,13 +284,9 @@ export class Connection {
|
|
|
265
284
|
cleanseStorageOptions(options?.storageOptions),
|
|
266
285
|
options?.useLegacyFormat,
|
|
267
286
|
);
|
|
268
|
-
return new
|
|
287
|
+
return new LocalTable(innerTable);
|
|
269
288
|
}
|
|
270
289
|
|
|
271
|
-
/**
|
|
272
|
-
* Drop an existing table.
|
|
273
|
-
* @param {string} name The name of the table to drop.
|
|
274
|
-
*/
|
|
275
290
|
async dropTable(name: string): Promise<void> {
|
|
276
291
|
return this.inner.dropTable(name);
|
|
277
292
|
}
|
|
@@ -280,7 +295,7 @@ export class Connection {
|
|
|
280
295
|
/**
|
|
281
296
|
* Takes storage options and makes all the keys snake case.
|
|
282
297
|
*/
|
|
283
|
-
function cleanseStorageOptions(
|
|
298
|
+
export function cleanseStorageOptions(
|
|
284
299
|
options?: Record<string, string>,
|
|
285
300
|
): Record<string, string> | undefined {
|
|
286
301
|
if (options === undefined) {
|
|
@@ -35,6 +35,11 @@ export interface FunctionOptions {
|
|
|
35
35
|
[key: string]: any;
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
export interface EmbeddingFunctionConstructor<
|
|
39
|
+
T extends EmbeddingFunction = EmbeddingFunction,
|
|
40
|
+
> {
|
|
41
|
+
new (modelOptions?: T["TOptions"]): T;
|
|
42
|
+
}
|
|
38
43
|
/**
|
|
39
44
|
* An embedding function that automatically creates vector representation for a given column.
|
|
40
45
|
*/
|
|
@@ -43,6 +48,12 @@ export abstract class EmbeddingFunction<
|
|
|
43
48
|
T = any,
|
|
44
49
|
M extends FunctionOptions = FunctionOptions,
|
|
45
50
|
> {
|
|
51
|
+
/**
|
|
52
|
+
* @ignore
|
|
53
|
+
* This is only used for associating the options type with the class for type checking
|
|
54
|
+
*/
|
|
55
|
+
// biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
|
|
56
|
+
readonly TOptions!: M;
|
|
46
57
|
/**
|
|
47
58
|
* Convert the embedding function to a JSON object
|
|
48
59
|
* It is used to serialize the embedding function to the schema
|
|
@@ -170,7 +181,7 @@ export abstract class EmbeddingFunction<
|
|
|
170
181
|
/**
|
|
171
182
|
Compute the embeddings for a single query
|
|
172
183
|
*/
|
|
173
|
-
async computeQueryEmbeddings(data: T): Promise<IntoVector
|
|
184
|
+
async computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>> {
|
|
174
185
|
return this.computeSourceEmbeddings([data]).then(
|
|
175
186
|
(embeddings) => embeddings[0],
|
|
176
187
|
);
|
|
@@ -13,24 +13,29 @@
|
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
15
|
import type OpenAI from "openai";
|
|
16
|
+
import { type EmbeddingCreateParams } from "openai/resources";
|
|
16
17
|
import { Float, Float32 } from "../arrow";
|
|
17
18
|
import { EmbeddingFunction } from "./embedding_function";
|
|
18
19
|
import { register } from "./registry";
|
|
19
20
|
|
|
20
21
|
export type OpenAIOptions = {
|
|
21
|
-
apiKey
|
|
22
|
-
model
|
|
22
|
+
apiKey: string;
|
|
23
|
+
model: EmbeddingCreateParams["model"];
|
|
23
24
|
};
|
|
24
25
|
|
|
25
26
|
@register("openai")
|
|
26
27
|
export class OpenAIEmbeddingFunction extends EmbeddingFunction<
|
|
27
28
|
string,
|
|
28
|
-
OpenAIOptions
|
|
29
|
+
Partial<OpenAIOptions>
|
|
29
30
|
> {
|
|
30
31
|
#openai: OpenAI;
|
|
31
|
-
#modelName:
|
|
32
|
+
#modelName: OpenAIOptions["model"];
|
|
32
33
|
|
|
33
|
-
constructor(
|
|
34
|
+
constructor(
|
|
35
|
+
options: Partial<OpenAIOptions> = {
|
|
36
|
+
model: "text-embedding-ada-002",
|
|
37
|
+
},
|
|
38
|
+
) {
|
|
34
39
|
super();
|
|
35
40
|
const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
|
|
36
41
|
if (!openAIKey) {
|
|
@@ -73,7 +78,7 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
|
|
|
73
78
|
case "text-embedding-3-small":
|
|
74
79
|
return 1536;
|
|
75
80
|
default:
|
|
76
|
-
|
|
81
|
+
throw new Error(`Unknown model: ${this.#modelName}`);
|
|
77
82
|
}
|
|
78
83
|
}
|
|
79
84
|
|
|
@@ -12,21 +12,15 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import
|
|
15
|
+
import {
|
|
16
|
+
type EmbeddingFunction,
|
|
17
|
+
type EmbeddingFunctionConstructor,
|
|
18
|
+
} from "./embedding_function";
|
|
16
19
|
import "reflect-metadata";
|
|
17
|
-
|
|
18
|
-
export interface EmbeddingFunctionOptions {
|
|
19
|
-
[key: string]: unknown;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface EmbeddingFunctionFactory<
|
|
23
|
-
T extends EmbeddingFunction = EmbeddingFunction,
|
|
24
|
-
> {
|
|
25
|
-
new (modelOptions?: EmbeddingFunctionOptions): T;
|
|
26
|
-
}
|
|
20
|
+
import { OpenAIEmbeddingFunction } from "./openai";
|
|
27
21
|
|
|
28
22
|
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
|
29
|
-
create(options?:
|
|
23
|
+
create(options?: T["TOptions"]): T;
|
|
30
24
|
}
|
|
31
25
|
|
|
32
26
|
/**
|
|
@@ -36,7 +30,7 @@ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
|
|
36
30
|
* or TextEmbeddingFunction and registering it with the registry
|
|
37
31
|
*/
|
|
38
32
|
export class EmbeddingFunctionRegistry {
|
|
39
|
-
#functions
|
|
33
|
+
#functions = new Map<string, EmbeddingFunctionConstructor>();
|
|
40
34
|
|
|
41
35
|
/**
|
|
42
36
|
* Register an embedding function
|
|
@@ -44,7 +38,9 @@ export class EmbeddingFunctionRegistry {
|
|
|
44
38
|
* @param func The function to register
|
|
45
39
|
* @throws Error if the function is already registered
|
|
46
40
|
*/
|
|
47
|
-
register<
|
|
41
|
+
register<
|
|
42
|
+
T extends EmbeddingFunctionConstructor = EmbeddingFunctionConstructor,
|
|
43
|
+
>(
|
|
48
44
|
this: EmbeddingFunctionRegistry,
|
|
49
45
|
alias?: string,
|
|
50
46
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
@@ -69,18 +65,34 @@ export class EmbeddingFunctionRegistry {
|
|
|
69
65
|
* Fetch an embedding function by name
|
|
70
66
|
* @param name The name of the function
|
|
71
67
|
*/
|
|
72
|
-
get<T extends EmbeddingFunction<unknown
|
|
73
|
-
name: string,
|
|
74
|
-
|
|
68
|
+
get<T extends EmbeddingFunction<unknown>, Name extends string = "">(
|
|
69
|
+
name: Name extends "openai" ? "openai" : string,
|
|
70
|
+
//This makes it so that you can use string constants as "types", or use an explicitly supplied type
|
|
71
|
+
// ex:
|
|
72
|
+
// `registry.get("openai") -> EmbeddingFunctionCreate<OpenAIEmbeddingFunction>`
|
|
73
|
+
// `registry.get<MyCustomEmbeddingFunction>("my_func") -> EmbeddingFunctionCreate<MyCustomEmbeddingFunction> | undefined`
|
|
74
|
+
//
|
|
75
|
+
// the reason this is important is that we always know our built in functions are defined so the user isnt forced to do a non null/undefined
|
|
76
|
+
// ```ts
|
|
77
|
+
// const openai: OpenAIEmbeddingFunction = registry.get("openai").create()
|
|
78
|
+
// ```
|
|
79
|
+
): Name extends "openai"
|
|
80
|
+
? EmbeddingFunctionCreate<OpenAIEmbeddingFunction>
|
|
81
|
+
: EmbeddingFunctionCreate<T> | undefined {
|
|
82
|
+
type Output = Name extends "openai"
|
|
83
|
+
? EmbeddingFunctionCreate<OpenAIEmbeddingFunction>
|
|
84
|
+
: EmbeddingFunctionCreate<T> | undefined;
|
|
85
|
+
|
|
75
86
|
const factory = this.#functions.get(name);
|
|
76
87
|
if (!factory) {
|
|
77
|
-
return undefined;
|
|
88
|
+
return undefined as Output;
|
|
78
89
|
}
|
|
90
|
+
|
|
79
91
|
return {
|
|
80
|
-
create: function (options
|
|
81
|
-
return new factory(options)
|
|
92
|
+
create: function (options?: T["TOptions"]) {
|
|
93
|
+
return new factory(options);
|
|
82
94
|
},
|
|
83
|
-
};
|
|
95
|
+
} as Output;
|
|
84
96
|
}
|
|
85
97
|
|
|
86
98
|
/**
|
|
@@ -104,7 +116,7 @@ export class EmbeddingFunctionRegistry {
|
|
|
104
116
|
name: string;
|
|
105
117
|
sourceColumn: string;
|
|
106
118
|
vectorColumn: string;
|
|
107
|
-
model:
|
|
119
|
+
model: EmbeddingFunction["TOptions"];
|
|
108
120
|
};
|
|
109
121
|
const functions = <FunctionConfig[]>(
|
|
110
122
|
JSON.parse(metadata.get("embedding_functions")!)
|
package/lancedb/index.ts
CHANGED
|
@@ -12,25 +12,43 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
+
import {
|
|
16
|
+
Connection,
|
|
17
|
+
LocalConnection,
|
|
18
|
+
cleanseStorageOptions,
|
|
19
|
+
} from "./connection";
|
|
20
|
+
|
|
21
|
+
import {
|
|
22
|
+
ConnectionOptions,
|
|
23
|
+
Connection as LanceDbConnection,
|
|
24
|
+
} from "./native.js";
|
|
25
|
+
|
|
26
|
+
import { RemoteConnection, RemoteConnectionOptions } from "./remote";
|
|
27
|
+
|
|
15
28
|
export {
|
|
16
29
|
WriteOptions,
|
|
17
30
|
WriteMode,
|
|
18
31
|
AddColumnsSql,
|
|
19
32
|
ColumnAlteration,
|
|
20
33
|
ConnectionOptions,
|
|
34
|
+
IndexStatistics,
|
|
35
|
+
IndexMetadata,
|
|
36
|
+
IndexConfig,
|
|
21
37
|
} from "./native.js";
|
|
38
|
+
|
|
22
39
|
export {
|
|
23
40
|
makeArrowTable,
|
|
24
41
|
MakeArrowTableOptions,
|
|
25
42
|
Data,
|
|
26
43
|
VectorColumnOptions,
|
|
27
44
|
} from "./arrow";
|
|
45
|
+
|
|
28
46
|
export {
|
|
29
|
-
connect,
|
|
30
47
|
Connection,
|
|
31
48
|
CreateTableOptions,
|
|
32
49
|
TableNamesOptions,
|
|
33
50
|
} from "./connection";
|
|
51
|
+
|
|
34
52
|
export {
|
|
35
53
|
ExecutableQuery,
|
|
36
54
|
Query,
|
|
@@ -38,6 +56,87 @@ export {
|
|
|
38
56
|
VectorQuery,
|
|
39
57
|
RecordBatchIterator,
|
|
40
58
|
} from "./query";
|
|
59
|
+
|
|
41
60
|
export { Index, IndexOptions, IvfPqOptions } from "./indices";
|
|
42
|
-
|
|
61
|
+
|
|
62
|
+
export { Table, AddDataOptions, UpdateOptions } from "./table";
|
|
63
|
+
|
|
43
64
|
export * as embedding from "./embedding";
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Connect to a LanceDB instance at the given URI.
|
|
68
|
+
*
|
|
69
|
+
* Accepted formats:
|
|
70
|
+
*
|
|
71
|
+
* - `/path/to/database` - local database
|
|
72
|
+
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
73
|
+
* - `db://host:port` - remote database (LanceDB cloud)
|
|
74
|
+
* @param {string} uri - The uri of the database. If the database uri starts
|
|
75
|
+
* with `db://` then it connects to a remote database.
|
|
76
|
+
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
77
|
+
* @example
|
|
78
|
+
* ```ts
|
|
79
|
+
* const conn = await connect("/path/to/database");
|
|
80
|
+
* ```
|
|
81
|
+
* @example
|
|
82
|
+
* ```ts
|
|
83
|
+
* const conn = await connect(
|
|
84
|
+
* "s3://bucket/path/to/database",
|
|
85
|
+
* {storageOptions: {timeout: "60s"}
|
|
86
|
+
* });
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
export async function connect(
|
|
90
|
+
uri: string,
|
|
91
|
+
opts?: Partial<ConnectionOptions | RemoteConnectionOptions>,
|
|
92
|
+
): Promise<Connection>;
|
|
93
|
+
/**
|
|
94
|
+
* Connect to a LanceDB instance at the given URI.
|
|
95
|
+
*
|
|
96
|
+
* Accepted formats:
|
|
97
|
+
*
|
|
98
|
+
* - `/path/to/database` - local database
|
|
99
|
+
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
100
|
+
* - `db://host:port` - remote database (LanceDB cloud)
|
|
101
|
+
* @param options - The options to use when connecting to the database
|
|
102
|
+
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* const conn = await connect({
|
|
106
|
+
* uri: "/path/to/database",
|
|
107
|
+
* storageOptions: {timeout: "60s"}
|
|
108
|
+
* });
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
export async function connect(
|
|
112
|
+
opts: Partial<RemoteConnectionOptions | ConnectionOptions> & { uri: string },
|
|
113
|
+
): Promise<Connection>;
|
|
114
|
+
export async function connect(
|
|
115
|
+
uriOrOptions:
|
|
116
|
+
| string
|
|
117
|
+
| (Partial<RemoteConnectionOptions | ConnectionOptions> & { uri: string }),
|
|
118
|
+
opts: Partial<ConnectionOptions | RemoteConnectionOptions> = {},
|
|
119
|
+
): Promise<Connection> {
|
|
120
|
+
let uri: string | undefined;
|
|
121
|
+
if (typeof uriOrOptions !== "string") {
|
|
122
|
+
const { uri: uri_, ...options } = uriOrOptions;
|
|
123
|
+
uri = uri_;
|
|
124
|
+
opts = options;
|
|
125
|
+
} else {
|
|
126
|
+
uri = uriOrOptions;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (!uri) {
|
|
130
|
+
throw new Error("uri is required");
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (uri?.startsWith("db://")) {
|
|
134
|
+
return new RemoteConnection(uri, opts as RemoteConnectionOptions);
|
|
135
|
+
}
|
|
136
|
+
opts = (opts as ConnectionOptions) ?? {};
|
|
137
|
+
(<ConnectionOptions>opts).storageOptions = cleanseStorageOptions(
|
|
138
|
+
(<ConnectionOptions>opts).storageOptions,
|
|
139
|
+
);
|
|
140
|
+
const nativeConn = await LanceDbConnection.new(uri, opts);
|
|
141
|
+
return new LocalConnection(nativeConn);
|
|
142
|
+
}
|
package/lancedb/merge.ts
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { Data, fromDataToBuffer } from "./arrow";
|
|
2
|
+
import { NativeMergeInsertBuilder } from "./native";
|
|
3
|
+
|
|
4
|
+
/** A builder used to create and run a merge insert operation */
|
|
5
|
+
export class MergeInsertBuilder {
|
|
6
|
+
#native: NativeMergeInsertBuilder;
|
|
7
|
+
|
|
8
|
+
/** Construct a MergeInsertBuilder. __Internal use only.__ */
|
|
9
|
+
constructor(native: NativeMergeInsertBuilder) {
|
|
10
|
+
this.#native = native;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Rows that exist in both the source table (new data) and
|
|
15
|
+
* the target table (old data) will be updated, replacing
|
|
16
|
+
* the old row with the corresponding matching row.
|
|
17
|
+
*
|
|
18
|
+
* If there are multiple matches then the behavior is undefined.
|
|
19
|
+
* Currently this causes multiple copies of the row to be created
|
|
20
|
+
* but that behavior is subject to change.
|
|
21
|
+
*
|
|
22
|
+
* An optional condition may be specified. If it is, then only
|
|
23
|
+
* matched rows that satisfy the condtion will be updated. Any
|
|
24
|
+
* rows that do not satisfy the condition will be left as they
|
|
25
|
+
* are. Failing to satisfy the condition does not cause a
|
|
26
|
+
* "matched row" to become a "not matched" row.
|
|
27
|
+
*
|
|
28
|
+
* The condition should be an SQL string. Use the prefix
|
|
29
|
+
* target. to refer to rows in the target table (old data)
|
|
30
|
+
* and the prefix source. to refer to rows in the source
|
|
31
|
+
* table (new data).
|
|
32
|
+
*
|
|
33
|
+
* For example, "target.last_update < source.last_update"
|
|
34
|
+
*/
|
|
35
|
+
whenMatchedUpdateAll(options?: { where: string }): MergeInsertBuilder {
|
|
36
|
+
return new MergeInsertBuilder(
|
|
37
|
+
this.#native.whenMatchedUpdateAll(options?.where),
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Rows that exist only in the source table (new data) should
|
|
42
|
+
* be inserted into the target table.
|
|
43
|
+
*/
|
|
44
|
+
whenNotMatchedInsertAll(): MergeInsertBuilder {
|
|
45
|
+
return new MergeInsertBuilder(this.#native.whenNotMatchedInsertAll());
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Rows that exist only in the target table (old data) will be
|
|
49
|
+
* deleted. An optional condition can be provided to limit what
|
|
50
|
+
* data is deleted.
|
|
51
|
+
*
|
|
52
|
+
* @param options.where - An optional condition to limit what data is deleted
|
|
53
|
+
*/
|
|
54
|
+
whenNotMatchedBySourceDelete(options?: {
|
|
55
|
+
where: string;
|
|
56
|
+
}): MergeInsertBuilder {
|
|
57
|
+
return new MergeInsertBuilder(
|
|
58
|
+
this.#native.whenNotMatchedBySourceDelete(options?.where),
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Executes the merge insert operation
|
|
63
|
+
*
|
|
64
|
+
* Nothing is returned but the `Table` is updated
|
|
65
|
+
*/
|
|
66
|
+
async execute(data: Data): Promise<void> {
|
|
67
|
+
const buffer = await fromDataToBuffer(data);
|
|
68
|
+
await this.#native.execute(buffer);
|
|
69
|
+
}
|
|
70
|
+
}
|