@lancedb/lancedb 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/Cargo.toml +3 -3
  2. package/biome.json +19 -3
  3. package/dist/arrow.d.ts +42 -7
  4. package/dist/arrow.js +6 -5
  5. package/dist/connection.d.ts +55 -29
  6. package/dist/connection.js +22 -74
  7. package/dist/embedding/embedding_function.d.ts +11 -3
  8. package/dist/embedding/embedding_function.js +36 -12
  9. package/dist/embedding/openai.d.ts +6 -5
  10. package/dist/embedding/openai.js +4 -2
  11. package/dist/embedding/registry.d.ts +10 -11
  12. package/dist/embedding/registry.js +4 -0
  13. package/dist/index.d.ts +51 -3
  14. package/dist/index.js +28 -4
  15. package/dist/merge.d.ts +54 -0
  16. package/dist/merge.js +64 -0
  17. package/dist/native.d.ts +34 -7
  18. package/dist/native.js +26 -9
  19. package/dist/query.d.ts +51 -16
  20. package/dist/query.js +122 -21
  21. package/dist/remote/client.d.ts +28 -0
  22. package/dist/remote/client.js +172 -0
  23. package/dist/remote/connection.d.ts +25 -0
  24. package/dist/remote/connection.js +110 -0
  25. package/dist/remote/index.d.ts +3 -0
  26. package/dist/remote/index.js +9 -0
  27. package/dist/remote/table.d.ts +42 -0
  28. package/dist/remote/table.js +179 -0
  29. package/dist/sanitize.d.ts +3 -2
  30. package/dist/sanitize.js +55 -1
  31. package/dist/table.d.ts +116 -25
  32. package/dist/table.js +117 -233
  33. package/dist/util.d.ts +14 -0
  34. package/dist/util.js +65 -0
  35. package/examples/ann_indexes.ts +49 -0
  36. package/examples/basic.ts +149 -0
  37. package/examples/embedding.ts +83 -0
  38. package/examples/filtering.ts +34 -0
  39. package/examples/jsconfig.json +27 -0
  40. package/examples/package-lock.json +79 -0
  41. package/examples/package.json +18 -0
  42. package/examples/search.ts +37 -0
  43. package/lancedb/arrow.ts +87 -24
  44. package/lancedb/connection.ts +115 -92
  45. package/lancedb/embedding/embedding_function.ts +48 -16
  46. package/lancedb/embedding/openai.ts +11 -6
  47. package/lancedb/embedding/registry.ts +38 -22
  48. package/lancedb/index.ts +101 -2
  49. package/lancedb/merge.ts +70 -0
  50. package/lancedb/query.ts +168 -39
  51. package/lancedb/remote/client.ts +221 -0
  52. package/lancedb/remote/connection.ts +201 -0
  53. package/lancedb/remote/index.ts +3 -0
  54. package/lancedb/remote/table.ts +226 -0
  55. package/lancedb/sanitize.ts +73 -1
  56. package/lancedb/table.ts +344 -101
  57. package/lancedb/util.ts +69 -0
  58. package/native.d.ts +208 -0
  59. package/nodejs-artifacts/arrow.d.ts +42 -7
  60. package/nodejs-artifacts/arrow.js +6 -5
  61. package/nodejs-artifacts/connection.d.ts +55 -29
  62. package/nodejs-artifacts/connection.js +22 -74
  63. package/nodejs-artifacts/embedding/embedding_function.d.ts +11 -3
  64. package/nodejs-artifacts/embedding/embedding_function.js +36 -12
  65. package/nodejs-artifacts/embedding/openai.d.ts +6 -5
  66. package/nodejs-artifacts/embedding/openai.js +4 -2
  67. package/nodejs-artifacts/embedding/registry.d.ts +10 -11
  68. package/nodejs-artifacts/embedding/registry.js +4 -0
  69. package/nodejs-artifacts/index.d.ts +51 -3
  70. package/nodejs-artifacts/index.js +28 -4
  71. package/nodejs-artifacts/merge.d.ts +54 -0
  72. package/nodejs-artifacts/merge.js +64 -0
  73. package/nodejs-artifacts/native.d.ts +34 -7
  74. package/nodejs-artifacts/native.js +26 -9
  75. package/nodejs-artifacts/query.d.ts +51 -16
  76. package/nodejs-artifacts/query.js +122 -21
  77. package/nodejs-artifacts/remote/client.d.ts +28 -0
  78. package/nodejs-artifacts/remote/client.js +172 -0
  79. package/nodejs-artifacts/remote/connection.d.ts +25 -0
  80. package/nodejs-artifacts/remote/connection.js +110 -0
  81. package/nodejs-artifacts/remote/index.d.ts +3 -0
  82. package/nodejs-artifacts/remote/index.js +9 -0
  83. package/nodejs-artifacts/remote/table.d.ts +42 -0
  84. package/nodejs-artifacts/remote/table.js +179 -0
  85. package/nodejs-artifacts/sanitize.d.ts +3 -2
  86. package/nodejs-artifacts/sanitize.js +55 -1
  87. package/nodejs-artifacts/table.d.ts +116 -25
  88. package/nodejs-artifacts/table.js +117 -233
  89. package/nodejs-artifacts/util.d.ts +14 -0
  90. package/nodejs-artifacts/util.js +65 -0
  91. package/package.json +25 -11
@@ -12,38 +12,11 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
- import { Table as ArrowTable, Schema } from "./arrow";
16
- import {
17
- fromTableToBuffer,
18
- isArrowTable,
19
- makeArrowTable,
20
- makeEmptyTable,
21
- } from "./arrow";
15
+ import { Data, Schema, SchemaLike, TableLike } from "./arrow";
16
+ import { fromTableToBuffer, makeEmptyTable } from "./arrow";
22
17
  import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
23
- import { ConnectionOptions, Connection as LanceDbConnection } from "./native";
24
- import { Table } from "./table";
25
-
26
- /**
27
- * Connect to a LanceDB instance at the given URI.
28
- *
29
- * Accepted formats:
30
- *
31
- * - `/path/to/database` - local database
32
- * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
33
- * - `db://host:port` - remote database (LanceDB cloud)
34
- * @param {string} uri - The uri of the database. If the database uri starts
35
- * with `db://` then it connects to a remote database.
36
- * @see {@link ConnectionOptions} for more details on the URI format.
37
- */
38
- export async function connect(
39
- uri: string,
40
- opts?: Partial<ConnectionOptions>,
41
- ): Promise<Connection> {
42
- opts = opts ?? {};
43
- opts.storageOptions = cleanseStorageOptions(opts.storageOptions);
44
- const nativeConn = await LanceDbConnection.new(uri, opts);
45
- return new Connection(nativeConn);
46
- }
18
+ import { Connection as LanceDbConnection } from "./native";
19
+ import { LocalTable, Table } from "./table";
47
20
 
48
21
  export interface CreateTableOptions {
49
22
  /**
@@ -71,7 +44,13 @@ export interface CreateTableOptions {
71
44
  * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
72
45
  */
73
46
  storageOptions?: Record<string, string>;
74
- schema?: Schema;
47
+ /**
48
+ * If true then data files will be written with the legacy format
49
+ *
50
+ * The default is true while the new format is in beta
51
+ */
52
+ useLegacyFormat?: boolean;
53
+ schema?: SchemaLike;
75
54
  embeddingFunction?: EmbeddingFunctionConfig;
76
55
  }
77
56
 
@@ -111,7 +90,6 @@ export interface TableNamesOptions {
111
90
  /** An optional limit to the number of results to return. */
112
91
  limit?: number;
113
92
  }
114
-
115
93
  /**
116
94
  * A LanceDB Connection that allows you to open tables and create new ones.
117
95
  *
@@ -130,17 +108,15 @@ export interface TableNamesOptions {
130
108
  * Any created tables are independent and will continue to work even if
131
109
  * the underlying connection has been closed.
132
110
  */
133
- export class Connection {
134
- readonly inner: LanceDbConnection;
135
-
136
- constructor(inner: LanceDbConnection) {
137
- this.inner = inner;
111
+ export abstract class Connection {
112
+ [Symbol.for("nodejs.util.inspect.custom")](): string {
113
+ return this.display();
138
114
  }
139
115
 
140
- /** Return true if the connection has not been closed */
141
- isOpen(): boolean {
142
- return this.inner.isOpen();
143
- }
116
+ /**
117
+ * Return true if the connection has not been closed
118
+ */
119
+ abstract isOpen(): boolean;
144
120
 
145
121
  /**
146
122
  * Close the connection, releasing any underlying resources.
@@ -149,14 +125,12 @@ export class Connection {
149
125
  *
150
126
  * Any attempt to use the connection after it is closed will result in an error.
151
127
  */
152
- close(): void {
153
- this.inner.close();
154
- }
128
+ abstract close(): void;
155
129
 
156
- /** Return a brief description of the connection */
157
- display(): string {
158
- return this.inner.display();
159
- }
130
+ /**
131
+ * Return a brief description of the connection
132
+ */
133
+ abstract display(): string;
160
134
 
161
135
  /**
162
136
  * List all the table names in this database.
@@ -164,15 +138,86 @@ export class Connection {
164
138
  * Tables will be returned in lexicographical order.
165
139
  * @param {Partial<TableNamesOptions>} options - options to control the
166
140
  * paging / start point
141
+ *
167
142
  */
168
- async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
169
- return this.inner.tableNames(options?.startAfter, options?.limit);
170
- }
143
+ abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
171
144
 
172
145
  /**
173
146
  * Open a table in the database.
174
147
  * @param {string} name - The name of the table
175
148
  */
149
+ abstract openTable(
150
+ name: string,
151
+ options?: Partial<OpenTableOptions>,
152
+ ): Promise<Table>;
153
+
154
+ /**
155
+ * Creates a new Table and initialize it with new data.
156
+ * @param {object} options - The options object.
157
+ * @param {string} options.name - The name of the table.
158
+ * @param {Data} options.data - Non-empty Array of Records to be inserted into the table
159
+ *
160
+ */
161
+ abstract createTable(
162
+ options: {
163
+ name: string;
164
+ data: Data;
165
+ } & Partial<CreateTableOptions>,
166
+ ): Promise<Table>;
167
+ /**
168
+ * Creates a new Table and initialize it with new data.
169
+ * @param {string} name - The name of the table.
170
+ * @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
171
+ * to be inserted into the table
172
+ */
173
+ abstract createTable(
174
+ name: string,
175
+ data: Record<string, unknown>[] | TableLike,
176
+ options?: Partial<CreateTableOptions>,
177
+ ): Promise<Table>;
178
+
179
+ /**
180
+ * Creates a new empty Table
181
+ * @param {string} name - The name of the table.
182
+ * @param {Schema} schema - The schema of the table
183
+ */
184
+ abstract createEmptyTable(
185
+ name: string,
186
+ schema: import("./arrow").SchemaLike,
187
+ options?: Partial<CreateTableOptions>,
188
+ ): Promise<Table>;
189
+
190
+ /**
191
+ * Drop an existing table.
192
+ * @param {string} name The name of the table to drop.
193
+ */
194
+ abstract dropTable(name: string): Promise<void>;
195
+ }
196
+
197
+ export class LocalConnection extends Connection {
198
+ readonly inner: LanceDbConnection;
199
+
200
+ constructor(inner: LanceDbConnection) {
201
+ super();
202
+ this.inner = inner;
203
+ }
204
+
205
+ isOpen(): boolean {
206
+ return this.inner.isOpen();
207
+ }
208
+
209
+ close(): void {
210
+ this.inner.close();
211
+ }
212
+
213
+ display(): string {
214
+ return this.inner.display();
215
+ }
216
+
217
+ async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
218
+ return this.inner.tableNames(options?.startAfter, options?.limit);
219
+ }
220
+
176
221
  async openTable(
177
222
  name: string,
178
223
  options?: Partial<OpenTableOptions>,
@@ -183,57 +228,38 @@ export class Connection {
183
228
  options?.indexCacheSize,
184
229
  );
185
230
 
186
- return new Table(innerTable);
231
+ return new LocalTable(innerTable);
187
232
  }
188
233
 
189
- /**
190
- * Creates a new Table and initialize it with new data.
191
- * @param {string} name - The name of the table.
192
- * @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
193
- * to be inserted into the table
194
- */
195
234
  async createTable(
196
- name: string,
197
- data: Record<string, unknown>[] | ArrowTable,
235
+ nameOrOptions:
236
+ | string
237
+ | ({ name: string; data: Data } & Partial<CreateTableOptions>),
238
+ data?: Record<string, unknown>[] | TableLike,
198
239
  options?: Partial<CreateTableOptions>,
199
240
  ): Promise<Table> {
200
- let mode: string = options?.mode ?? "create";
201
- const existOk = options?.existOk ?? false;
202
-
203
- if (mode === "create" && existOk) {
204
- mode = "exist_ok";
241
+ if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
242
+ const { name, data, ...options } = nameOrOptions;
243
+ return this.createTable(name, data, options);
205
244
  }
206
-
207
- let table: ArrowTable;
208
- if (isArrowTable(data)) {
209
- table = data;
210
- } else {
211
- table = makeArrowTable(data, options);
245
+ if (data === undefined) {
246
+ throw new Error("data is required");
212
247
  }
213
-
214
- const buf = await fromTableToBuffer(
215
- table,
216
- options?.embeddingFunction,
217
- options?.schema,
218
- );
248
+ const { buf, mode } = await Table.parseTableData(data, options);
219
249
  const innerTable = await this.inner.createTable(
220
- name,
250
+ nameOrOptions,
221
251
  buf,
222
252
  mode,
223
253
  cleanseStorageOptions(options?.storageOptions),
254
+ options?.useLegacyFormat,
224
255
  );
225
256
 
226
- return new Table(innerTable);
257
+ return new LocalTable(innerTable);
227
258
  }
228
259
 
229
- /**
230
- * Creates a new empty Table
231
- * @param {string} name - The name of the table.
232
- * @param {Schema} schema - The schema of the table
233
- */
234
260
  async createEmptyTable(
235
261
  name: string,
236
- schema: Schema,
262
+ schema: import("./arrow").SchemaLike,
237
263
  options?: Partial<CreateTableOptions>,
238
264
  ): Promise<Table> {
239
265
  let mode: string = options?.mode ?? "create";
@@ -256,14 +282,11 @@ export class Connection {
256
282
  buf,
257
283
  mode,
258
284
  cleanseStorageOptions(options?.storageOptions),
285
+ options?.useLegacyFormat,
259
286
  );
260
- return new Table(innerTable);
287
+ return new LocalTable(innerTable);
261
288
  }
262
289
 
263
- /**
264
- * Drop an existing table.
265
- * @param {string} name The name of the table to drop.
266
- */
267
290
  async dropTable(name: string): Promise<void> {
268
291
  return this.inner.dropTable(name);
269
292
  }
@@ -272,7 +295,7 @@ export class Connection {
272
295
  /**
273
296
  * Takes storage options and makes all the keys snake case.
274
297
  */
275
- function cleanseStorageOptions(
298
+ export function cleanseStorageOptions(
276
299
  options?: Record<string, string>,
277
300
  ): Record<string, string> | undefined {
278
301
  if (options === undefined) {
@@ -19,6 +19,7 @@ import {
19
19
  FixedSizeList,
20
20
  Float,
21
21
  Float32,
22
+ type IntoVector,
22
23
  isDataType,
23
24
  isFixedSizeList,
24
25
  isFloat,
@@ -34,6 +35,11 @@ export interface FunctionOptions {
34
35
  [key: string]: any;
35
36
  }
36
37
 
38
+ export interface EmbeddingFunctionConstructor<
39
+ T extends EmbeddingFunction = EmbeddingFunction,
40
+ > {
41
+ new (modelOptions?: T["TOptions"]): T;
42
+ }
37
43
  /**
38
44
  * An embedding function that automatically creates vector representation for a given column.
39
45
  */
@@ -42,6 +48,12 @@ export abstract class EmbeddingFunction<
42
48
  T = any,
43
49
  M extends FunctionOptions = FunctionOptions,
44
50
  > {
51
+ /**
52
+ * @ignore
53
+ * This is only used for associating the options type with the class for type checking
54
+ */
55
+ // biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
56
+ readonly TOptions!: M;
45
57
  /**
46
58
  * Convert the embedding function to a JSON object
47
59
  * It is used to serialize the embedding function to the schema
@@ -100,33 +112,55 @@ export abstract class EmbeddingFunction<
100
112
  * @see {@link lancedb.LanceSchema}
101
113
  */
102
114
  vectorField(
103
- options?: Partial<FieldOptions>,
115
+ optionsOrDatatype?: Partial<FieldOptions> | DataType,
104
116
  ): [DataType, Map<string, EmbeddingFunction>] {
105
- let dtype: DataType;
106
- const dims = this.ndims() ?? options?.dims;
107
- if (!options?.datatype) {
108
- if (dims === undefined) {
109
- throw new Error("ndims is required for vector field");
110
- }
111
- dtype = new FixedSizeList(dims, new Field("item", new Float32(), true));
117
+ let dtype: DataType | undefined;
118
+ let vectorType: DataType;
119
+ let dims: number | undefined = this.ndims();
120
+
121
+ // `func.vectorField(new Float32())`
122
+ if (isDataType(optionsOrDatatype)) {
123
+ dtype = optionsOrDatatype;
112
124
  } else {
113
- if (isFixedSizeList(options.datatype)) {
114
- dtype = options.datatype;
115
- } else if (isFloat(options.datatype)) {
125
+ // `func.vectorField({
126
+ // datatype: new Float32(),
127
+ // dims: 10
128
+ // })`
129
+ dims = dims ?? optionsOrDatatype?.dims;
130
+ dtype = optionsOrDatatype?.datatype;
131
+ }
132
+
133
+ if (dtype !== undefined) {
134
+ // `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
135
+ // or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
136
+ if (isFixedSizeList(dtype)) {
137
+ vectorType = dtype;
138
+ // `func.vectorField(new Float32())`
139
+ // or `func.vectorField({datatype: new Float32()})`
140
+ } else if (isFloat(dtype)) {
141
+ // No `ndims` impl and no `{dims: n}` provided;
116
142
  if (dims === undefined) {
117
143
  throw new Error("ndims is required for vector field");
118
144
  }
119
- dtype = newVectorType(dims, options.datatype);
145
+ vectorType = newVectorType(dims, dtype);
120
146
  } else {
121
147
  throw new Error(
122
148
  "Expected FixedSizeList or Float as datatype for vector field",
123
149
  );
124
150
  }
151
+ } else {
152
+ if (dims === undefined) {
153
+ throw new Error("ndims is required for vector field");
154
+ }
155
+ vectorType = new FixedSizeList(
156
+ dims,
157
+ new Field("item", new Float32(), true),
158
+ );
125
159
  }
126
160
  const metadata = new Map<string, EmbeddingFunction>();
127
161
  metadata.set("vector_column_for", this);
128
162
 
129
- return [dtype, metadata];
163
+ return [vectorType, metadata];
130
164
  }
131
165
 
132
166
  /** The number of dimensions of the embeddings */
@@ -147,9 +181,7 @@ export abstract class EmbeddingFunction<
147
181
  /**
148
182
  Compute the embeddings for a single query
149
183
  */
150
- async computeQueryEmbeddings(
151
- data: T,
152
- ): Promise<number[] | Float32Array | Float64Array> {
184
+ async computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>> {
153
185
  return this.computeSourceEmbeddings([data]).then(
154
186
  (embeddings) => embeddings[0],
155
187
  );
@@ -13,24 +13,29 @@
13
13
  // limitations under the License.
14
14
 
15
15
  import type OpenAI from "openai";
16
+ import { type EmbeddingCreateParams } from "openai/resources";
16
17
  import { Float, Float32 } from "../arrow";
17
18
  import { EmbeddingFunction } from "./embedding_function";
18
19
  import { register } from "./registry";
19
20
 
20
21
  export type OpenAIOptions = {
21
- apiKey?: string;
22
- model?: string;
22
+ apiKey: string;
23
+ model: EmbeddingCreateParams["model"];
23
24
  };
24
25
 
25
26
  @register("openai")
26
27
  export class OpenAIEmbeddingFunction extends EmbeddingFunction<
27
28
  string,
28
- OpenAIOptions
29
+ Partial<OpenAIOptions>
29
30
  > {
30
31
  #openai: OpenAI;
31
- #modelName: string;
32
+ #modelName: OpenAIOptions["model"];
32
33
 
33
- constructor(options: OpenAIOptions = { model: "text-embedding-ada-002" }) {
34
+ constructor(
35
+ options: Partial<OpenAIOptions> = {
36
+ model: "text-embedding-ada-002",
37
+ },
38
+ ) {
34
39
  super();
35
40
  const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
36
41
  if (!openAIKey) {
@@ -73,7 +78,7 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
73
78
  case "text-embedding-3-small":
74
79
  return 1536;
75
80
  default:
76
- return null as never;
81
+ throw new Error(`Unknown model: ${this.#modelName}`);
77
82
  }
78
83
  }
79
84
 
@@ -12,21 +12,15 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
- import type { EmbeddingFunction } from "./embedding_function";
15
+ import {
16
+ type EmbeddingFunction,
17
+ type EmbeddingFunctionConstructor,
18
+ } from "./embedding_function";
16
19
  import "reflect-metadata";
17
-
18
- export interface EmbeddingFunctionOptions {
19
- [key: string]: unknown;
20
- }
21
-
22
- export interface EmbeddingFunctionFactory<
23
- T extends EmbeddingFunction = EmbeddingFunction,
24
- > {
25
- new (modelOptions?: EmbeddingFunctionOptions): T;
26
- }
20
+ import { OpenAIEmbeddingFunction } from "./openai";
27
21
 
28
22
  interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
29
- create(options?: EmbeddingFunctionOptions): T;
23
+ create(options?: T["TOptions"]): T;
30
24
  }
31
25
 
32
26
  /**
@@ -36,14 +30,17 @@ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
36
30
  * or TextEmbeddingFunction and registering it with the registry
37
31
  */
38
32
  export class EmbeddingFunctionRegistry {
39
- #functions: Map<string, EmbeddingFunctionFactory> = new Map();
33
+ #functions = new Map<string, EmbeddingFunctionConstructor>();
40
34
 
41
35
  /**
42
36
  * Register an embedding function
43
37
  * @param name The name of the function
44
38
  * @param func The function to register
39
+ * @throws Error if the function is already registered
45
40
  */
46
- register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(
41
+ register<
42
+ T extends EmbeddingFunctionConstructor = EmbeddingFunctionConstructor,
43
+ >(
47
44
  this: EmbeddingFunctionRegistry,
48
45
  alias?: string,
49
46
  // biome-ignore lint/suspicious/noExplicitAny: <explanation>
@@ -68,18 +65,34 @@ export class EmbeddingFunctionRegistry {
68
65
  * Fetch an embedding function by name
69
66
  * @param name The name of the function
70
67
  */
71
- get<T extends EmbeddingFunction<unknown> = EmbeddingFunction>(
72
- name: string,
73
- ): EmbeddingFunctionCreate<T> | undefined {
68
+ get<T extends EmbeddingFunction<unknown>, Name extends string = "">(
69
+ name: Name extends "openai" ? "openai" : string,
70
+ //This makes it so that you can use string constants as "types", or use an explicitly supplied type
71
+ // ex:
72
+ // `registry.get("openai") -> EmbeddingFunctionCreate<OpenAIEmbeddingFunction>`
73
+ // `registry.get<MyCustomEmbeddingFunction>("my_func") -> EmbeddingFunctionCreate<MyCustomEmbeddingFunction> | undefined`
74
+ //
75
+ // the reason this is important is that we always know our built in functions are defined so the user isnt forced to do a non null/undefined
76
+ // ```ts
77
+ // const openai: OpenAIEmbeddingFunction = registry.get("openai").create()
78
+ // ```
79
+ ): Name extends "openai"
80
+ ? EmbeddingFunctionCreate<OpenAIEmbeddingFunction>
81
+ : EmbeddingFunctionCreate<T> | undefined {
82
+ type Output = Name extends "openai"
83
+ ? EmbeddingFunctionCreate<OpenAIEmbeddingFunction>
84
+ : EmbeddingFunctionCreate<T> | undefined;
85
+
74
86
  const factory = this.#functions.get(name);
75
87
  if (!factory) {
76
- return undefined;
88
+ return undefined as Output;
77
89
  }
90
+
78
91
  return {
79
- create: function (options: EmbeddingFunctionOptions) {
80
- return new factory(options) as unknown as T;
92
+ create: function (options?: T["TOptions"]) {
93
+ return new factory(options);
81
94
  },
82
- };
95
+ } as Output;
83
96
  }
84
97
 
85
98
  /**
@@ -89,6 +102,9 @@ export class EmbeddingFunctionRegistry {
89
102
  this.#functions.clear();
90
103
  }
91
104
 
105
+ /**
106
+ * @ignore
107
+ */
92
108
  parseFunctions(
93
109
  this: EmbeddingFunctionRegistry,
94
110
  metadata: Map<string, string>,
@@ -100,7 +116,7 @@ export class EmbeddingFunctionRegistry {
100
116
  name: string;
101
117
  sourceColumn: string;
102
118
  vectorColumn: string;
103
- model: EmbeddingFunctionOptions;
119
+ model: EmbeddingFunction["TOptions"];
104
120
  };
105
121
  const functions = <FunctionConfig[]>(
106
122
  JSON.parse(metadata.get("embedding_functions")!)
package/lancedb/index.ts CHANGED
@@ -12,25 +12,43 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
+ import {
16
+ Connection,
17
+ LocalConnection,
18
+ cleanseStorageOptions,
19
+ } from "./connection";
20
+
21
+ import {
22
+ ConnectionOptions,
23
+ Connection as LanceDbConnection,
24
+ } from "./native.js";
25
+
26
+ import { RemoteConnection, RemoteConnectionOptions } from "./remote";
27
+
15
28
  export {
16
29
  WriteOptions,
17
30
  WriteMode,
18
31
  AddColumnsSql,
19
32
  ColumnAlteration,
20
33
  ConnectionOptions,
34
+ IndexStatistics,
35
+ IndexMetadata,
36
+ IndexConfig,
21
37
  } from "./native.js";
38
+
22
39
  export {
23
40
  makeArrowTable,
24
41
  MakeArrowTableOptions,
25
42
  Data,
26
43
  VectorColumnOptions,
27
44
  } from "./arrow";
45
+
28
46
  export {
29
- connect,
30
47
  Connection,
31
48
  CreateTableOptions,
32
49
  TableNamesOptions,
33
50
  } from "./connection";
51
+
34
52
  export {
35
53
  ExecutableQuery,
36
54
  Query,
@@ -38,6 +56,87 @@ export {
38
56
  VectorQuery,
39
57
  RecordBatchIterator,
40
58
  } from "./query";
59
+
41
60
  export { Index, IndexOptions, IvfPqOptions } from "./indices";
42
- export { Table, AddDataOptions, IndexConfig, UpdateOptions } from "./table";
61
+
62
+ export { Table, AddDataOptions, UpdateOptions } from "./table";
63
+
43
64
  export * as embedding from "./embedding";
65
+
66
+ /**
67
+ * Connect to a LanceDB instance at the given URI.
68
+ *
69
+ * Accepted formats:
70
+ *
71
+ * - `/path/to/database` - local database
72
+ * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
73
+ * - `db://host:port` - remote database (LanceDB cloud)
74
+ * @param {string} uri - The uri of the database. If the database uri starts
75
+ * with `db://` then it connects to a remote database.
76
+ * @see {@link ConnectionOptions} for more details on the URI format.
77
+ * @example
78
+ * ```ts
79
+ * const conn = await connect("/path/to/database");
80
+ * ```
81
+ * @example
82
+ * ```ts
83
+ * const conn = await connect(
84
+ * "s3://bucket/path/to/database",
85
+ * {storageOptions: {timeout: "60s"}
86
+ * });
87
+ * ```
88
+ */
89
+ export async function connect(
90
+ uri: string,
91
+ opts?: Partial<ConnectionOptions | RemoteConnectionOptions>,
92
+ ): Promise<Connection>;
93
+ /**
94
+ * Connect to a LanceDB instance at the given URI.
95
+ *
96
+ * Accepted formats:
97
+ *
98
+ * - `/path/to/database` - local database
99
+ * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
100
+ * - `db://host:port` - remote database (LanceDB cloud)
101
+ * @param options - The options to use when connecting to the database
102
+ * @see {@link ConnectionOptions} for more details on the URI format.
103
+ * @example
104
+ * ```ts
105
+ * const conn = await connect({
106
+ * uri: "/path/to/database",
107
+ * storageOptions: {timeout: "60s"}
108
+ * });
109
+ * ```
110
+ */
111
+ export async function connect(
112
+ opts: Partial<RemoteConnectionOptions | ConnectionOptions> & { uri: string },
113
+ ): Promise<Connection>;
114
+ export async function connect(
115
+ uriOrOptions:
116
+ | string
117
+ | (Partial<RemoteConnectionOptions | ConnectionOptions> & { uri: string }),
118
+ opts: Partial<ConnectionOptions | RemoteConnectionOptions> = {},
119
+ ): Promise<Connection> {
120
+ let uri: string | undefined;
121
+ if (typeof uriOrOptions !== "string") {
122
+ const { uri: uri_, ...options } = uriOrOptions;
123
+ uri = uri_;
124
+ opts = options;
125
+ } else {
126
+ uri = uriOrOptions;
127
+ }
128
+
129
+ if (!uri) {
130
+ throw new Error("uri is required");
131
+ }
132
+
133
+ if (uri?.startsWith("db://")) {
134
+ return new RemoteConnection(uri, opts as RemoteConnectionOptions);
135
+ }
136
+ opts = (opts as ConnectionOptions) ?? {};
137
+ (<ConnectionOptions>opts).storageOptions = cleanseStorageOptions(
138
+ (<ConnectionOptions>opts).storageOptions,
139
+ );
140
+ const nativeConn = await LanceDbConnection.new(uri, opts);
141
+ return new LocalConnection(nativeConn);
142
+ }