@lancedb/lancedb 0.5.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/Cargo.toml +3 -3
  2. package/biome.json +19 -3
  3. package/dist/arrow.d.ts +41 -8
  4. package/dist/arrow.js +4 -4
  5. package/dist/connection.d.ts +49 -29
  6. package/dist/connection.js +21 -73
  7. package/dist/embedding/embedding_function.d.ts +9 -1
  8. package/dist/embedding/embedding_function.js +6 -0
  9. package/dist/embedding/openai.d.ts +6 -5
  10. package/dist/embedding/openai.js +4 -2
  11. package/dist/embedding/registry.d.ts +6 -11
  12. package/dist/index.d.ts +51 -3
  13. package/dist/index.js +28 -4
  14. package/dist/merge.d.ts +54 -0
  15. package/dist/merge.js +64 -0
  16. package/dist/native.d.ts +29 -3
  17. package/dist/native.js +26 -9
  18. package/dist/query.d.ts +33 -10
  19. package/dist/query.js +100 -13
  20. package/dist/remote/client.d.ts +28 -0
  21. package/dist/remote/client.js +172 -0
  22. package/dist/remote/connection.d.ts +25 -0
  23. package/dist/remote/connection.js +110 -0
  24. package/dist/remote/index.d.ts +3 -0
  25. package/dist/remote/index.js +9 -0
  26. package/dist/remote/table.d.ts +42 -0
  27. package/dist/remote/table.js +179 -0
  28. package/dist/sanitize.d.ts +3 -2
  29. package/dist/sanitize.js +55 -1
  30. package/dist/table.d.ts +105 -30
  31. package/dist/table.js +94 -237
  32. package/dist/util.d.ts +14 -0
  33. package/dist/util.js +65 -0
  34. package/examples/ann_indexes.ts +49 -0
  35. package/examples/basic.ts +149 -0
  36. package/examples/embedding.ts +83 -0
  37. package/examples/filtering.ts +34 -0
  38. package/examples/jsconfig.json +27 -0
  39. package/examples/package-lock.json +79 -0
  40. package/examples/package.json +18 -0
  41. package/examples/search.ts +37 -0
  42. package/lancedb/arrow.ts +80 -23
  43. package/lancedb/connection.ts +107 -92
  44. package/lancedb/embedding/embedding_function.ts +12 -1
  45. package/lancedb/embedding/openai.ts +11 -6
  46. package/lancedb/embedding/registry.ts +34 -22
  47. package/lancedb/index.ts +101 -2
  48. package/lancedb/merge.ts +70 -0
  49. package/lancedb/query.ts +114 -28
  50. package/lancedb/remote/client.ts +221 -0
  51. package/lancedb/remote/connection.ts +201 -0
  52. package/lancedb/remote/index.ts +3 -0
  53. package/lancedb/remote/table.ts +226 -0
  54. package/lancedb/sanitize.ts +73 -1
  55. package/lancedb/table.ts +320 -132
  56. package/lancedb/util.ts +69 -0
  57. package/native.d.ts +208 -0
  58. package/nodejs-artifacts/arrow.d.ts +41 -8
  59. package/nodejs-artifacts/arrow.js +4 -4
  60. package/nodejs-artifacts/connection.d.ts +49 -29
  61. package/nodejs-artifacts/connection.js +21 -73
  62. package/nodejs-artifacts/embedding/embedding_function.d.ts +9 -1
  63. package/nodejs-artifacts/embedding/embedding_function.js +6 -0
  64. package/nodejs-artifacts/embedding/openai.d.ts +6 -5
  65. package/nodejs-artifacts/embedding/openai.js +4 -2
  66. package/nodejs-artifacts/embedding/registry.d.ts +6 -11
  67. package/nodejs-artifacts/index.d.ts +51 -3
  68. package/nodejs-artifacts/index.js +28 -4
  69. package/nodejs-artifacts/merge.d.ts +54 -0
  70. package/nodejs-artifacts/merge.js +64 -0
  71. package/nodejs-artifacts/native.d.ts +29 -3
  72. package/nodejs-artifacts/native.js +26 -9
  73. package/nodejs-artifacts/query.d.ts +33 -10
  74. package/nodejs-artifacts/query.js +100 -13
  75. package/nodejs-artifacts/remote/client.d.ts +28 -0
  76. package/nodejs-artifacts/remote/client.js +172 -0
  77. package/nodejs-artifacts/remote/connection.d.ts +25 -0
  78. package/nodejs-artifacts/remote/connection.js +110 -0
  79. package/nodejs-artifacts/remote/index.d.ts +3 -0
  80. package/nodejs-artifacts/remote/index.js +9 -0
  81. package/nodejs-artifacts/remote/table.d.ts +42 -0
  82. package/nodejs-artifacts/remote/table.js +179 -0
  83. package/nodejs-artifacts/sanitize.d.ts +3 -2
  84. package/nodejs-artifacts/sanitize.js +55 -1
  85. package/nodejs-artifacts/table.d.ts +105 -30
  86. package/nodejs-artifacts/table.js +94 -237
  87. package/nodejs-artifacts/util.d.ts +14 -0
  88. package/nodejs-artifacts/util.js +65 -0
  89. package/package.json +25 -11
package/native.d.ts ADDED
@@ -0,0 +1,208 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ /* auto-generated by NAPI-RS */
5
+
6
+ /** A description of an index currently configured on a column */
7
+ export interface IndexConfig {
8
+ /** The name of the index */
9
+ name: string
10
+ /** The type of the index */
11
+ indexType: string
12
+ /**
13
+ * The columns in the index
14
+ *
15
+ * Currently this is always an array of size 1. In the future there may
16
+ * be more columns to represent composite indices.
17
+ */
18
+ columns: Array<string>
19
+ }
20
+ /** Statistics about a compaction operation. */
21
+ export interface CompactionStats {
22
+ /** The number of fragments removed */
23
+ fragmentsRemoved: number
24
+ /** The number of new, compacted fragments added */
25
+ fragmentsAdded: number
26
+ /** The number of data files removed */
27
+ filesRemoved: number
28
+ /** The number of new, compacted data files added */
29
+ filesAdded: number
30
+ }
31
+ /** Statistics about a cleanup operation */
32
+ export interface RemovalStats {
33
+ /** The number of bytes removed */
34
+ bytesRemoved: number
35
+ /** The number of old versions removed */
36
+ oldVersionsRemoved: number
37
+ }
38
+ /** Statistics about an optimize operation */
39
+ export interface OptimizeStats {
40
+ /** Statistics about the compaction operation */
41
+ compaction: CompactionStats
42
+ /** Statistics about the removal operation */
43
+ prune: RemovalStats
44
+ }
45
+ /**
46
+ * A definition of a column alteration. The alteration changes the column at
47
+ * `path` to have the new name `name`, to be nullable if `nullable` is true,
48
+ * and to have the data type `data_type`. At least one of `rename` or `nullable`
49
+ * must be provided.
50
+ */
51
+ export interface ColumnAlteration {
52
+ /**
53
+ * The path to the column to alter. This is a dot-separated path to the column.
54
+ * If it is a top-level column then it is just the name of the column. If it is
55
+ * a nested column then it is the path to the column, e.g. "a.b.c" for a column
56
+ * `c` nested inside a column `b` nested inside a column `a`.
57
+ */
58
+ path: string
59
+ /**
60
+ * The new name of the column. If not provided then the name will not be changed.
61
+ * This must be distinct from the names of all other columns in the table.
62
+ */
63
+ rename?: string
64
+ /** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
65
+ nullable?: boolean
66
+ }
67
+ /** A definition of a new column to add to a table. */
68
+ export interface AddColumnsSql {
69
+ /** The name of the new column. */
70
+ name: string
71
+ /**
72
+ * The values to populate the new column with, as a SQL expression.
73
+ * The expression can reference other columns in the table.
74
+ */
75
+ valueSql: string
76
+ }
77
+ export interface IndexStatistics {
78
+ /** The number of rows indexed by the index */
79
+ numIndexedRows: number
80
+ /** The number of rows not indexed */
81
+ numUnindexedRows: number
82
+ /** The type of the index */
83
+ indexType?: string
84
+ /** The metadata for each index */
85
+ indices: Array<IndexMetadata>
86
+ }
87
+ export interface IndexMetadata {
88
+ metricType?: string
89
+ indexType?: string
90
+ }
91
+ export interface ConnectionOptions {
92
+ /**
93
+ * (For LanceDB OSS only): The interval, in seconds, at which to check for
94
+ * updates to the table from other processes. If None, then consistency is not
95
+ * checked. For performance reasons, this is the default. For strong
96
+ * consistency, set this to zero seconds. Then every read will check for
97
+ * updates from other processes. As a compromise, you can set this to a
98
+ * non-zero value for eventual consistency. If more than that interval
99
+ * has passed since the last check, then the table will be checked for updates.
100
+ * Note: this consistency only applies to read operations. Write operations are
101
+ * always consistent.
102
+ */
103
+ readConsistencyInterval?: number
104
+ /**
105
+ * (For LanceDB OSS only): configuration for object storage.
106
+ *
107
+ * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
108
+ */
109
+ storageOptions?: Record<string, string>
110
+ }
111
+ /** Write mode for writing a table. */
112
+ export const enum WriteMode {
113
+ Create = 'Create',
114
+ Append = 'Append',
115
+ Overwrite = 'Overwrite'
116
+ }
117
+ /** Write options when creating a Table. */
118
+ export interface WriteOptions {
119
+ /** Write mode for writing to a table. */
120
+ mode?: WriteMode
121
+ }
122
+ export interface OpenTableOptions {
123
+ storageOptions?: Record<string, string>
124
+ }
125
+ export class Connection {
126
+ /** Create a new Connection instance from the given URI. */
127
+ static new(uri: string, options: ConnectionOptions): Promise<Connection>
128
+ display(): string
129
+ isOpen(): boolean
130
+ close(): void
131
+ /** List all tables in the dataset. */
132
+ tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
133
+ /**
134
+ * Create table from a Apache Arrow IPC (file) buffer.
135
+ *
136
+ * Parameters:
137
+ * - name: The name of the table.
138
+ * - buf: The buffer containing the IPC file.
139
+ *
140
+ */
141
+ createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
142
+ createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
143
+ openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
144
+ /** Drop table with the name. Or raise an error if the table does not exist. */
145
+ dropTable(name: string): Promise<void>
146
+ }
147
+ export class Index {
148
+ static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
149
+ static btree(): Index
150
+ }
151
+ /** Typescript-style Async Iterator over RecordBatches */
152
+ export class RecordBatchIterator {
153
+ next(): Promise<Buffer | null>
154
+ }
155
+ /** A builder used to create and run a merge insert operation */
156
+ export class NativeMergeInsertBuilder {
157
+ whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder
158
+ whenNotMatchedInsertAll(): NativeMergeInsertBuilder
159
+ whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder
160
+ execute(buf: Buffer): Promise<void>
161
+ }
162
+ export class Query {
163
+ onlyIf(predicate: string): void
164
+ select(columns: Array<[string, string]>): void
165
+ limit(limit: number): void
166
+ nearestTo(vector: Float32Array): VectorQuery
167
+ execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
168
+ explainPlan(verbose: boolean): Promise<string>
169
+ }
170
+ export class VectorQuery {
171
+ column(column: string): void
172
+ distanceType(distanceType: string): void
173
+ postfilter(): void
174
+ refineFactor(refineFactor: number): void
175
+ nprobes(nprobe: number): void
176
+ bypassVectorIndex(): void
177
+ onlyIf(predicate: string): void
178
+ select(columns: Array<[string, string]>): void
179
+ limit(limit: number): void
180
+ execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
181
+ explainPlan(verbose: boolean): Promise<string>
182
+ }
183
+ export class Table {
184
+ name: string
185
+ display(): string
186
+ isOpen(): boolean
187
+ close(): void
188
+ /** Return Schema as empty Arrow IPC file. */
189
+ schema(): Promise<Buffer>
190
+ add(buf: Buffer, mode: string): Promise<void>
191
+ countRows(filter?: string | undefined | null): Promise<number>
192
+ delete(predicate: string): Promise<void>
193
+ createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
194
+ update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<void>
195
+ query(): Query
196
+ vectorSearch(vector: Float32Array): VectorQuery
197
+ addColumns(transforms: Array<AddColumnsSql>): Promise<void>
198
+ alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
199
+ dropColumns(columns: Array<string>): Promise<void>
200
+ version(): Promise<number>
201
+ checkout(version: number): Promise<void>
202
+ checkoutLatest(): Promise<void>
203
+ restore(): Promise<void>
204
+ optimize(olderThanMs?: number | undefined | null): Promise<OptimizeStats>
205
+ listIndices(): Promise<Array<IndexConfig>>
206
+ indexStats(indexName: string): Promise<IndexStatistics | null>
207
+ mergeInsert(on: Array<string>): NativeMergeInsertBuilder
208
+ }
@@ -1,10 +1,43 @@
1
1
  /// <reference types="node" />
2
- import { Table as ArrowTable, Binary, DataType, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, Schema, Struct, Utf8 } from "apache-arrow";
2
+ import { Table as ArrowTable, Binary, BufferType, DataType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
3
+ import { Buffers } from "apache-arrow/data";
3
4
  import { type EmbeddingFunction } from "./embedding/embedding_function";
4
5
  import { EmbeddingFunctionConfig } from "./embedding/registry";
5
6
  export * from "apache-arrow";
6
- export type IntoVector = Float32Array | Float64Array | number[];
7
- export declare function isArrowTable(value: object): value is ArrowTable;
7
+ export type SchemaLike = Schema | {
8
+ fields: FieldLike[];
9
+ metadata: Map<string, string>;
10
+ get names(): unknown[];
11
+ };
12
+ export type FieldLike = Field | {
13
+ type: string;
14
+ name: string;
15
+ nullable?: boolean;
16
+ metadata?: Map<string, string>;
17
+ };
18
+ export type DataLike = import("apache-arrow").Data<Struct<any>> | {
19
+ type: any;
20
+ length: number;
21
+ offset: number;
22
+ stride: number;
23
+ nullable: boolean;
24
+ children: DataLike[];
25
+ get nullCount(): number;
26
+ values: Buffers<any>[BufferType.DATA];
27
+ typeIds: Buffers<any>[BufferType.TYPE];
28
+ nullBitmap: Buffers<any>[BufferType.VALIDITY];
29
+ valueOffsets: Buffers<any>[BufferType.OFFSET];
30
+ };
31
+ export type RecordBatchLike = RecordBatch | {
32
+ schema: SchemaLike;
33
+ data: DataLike;
34
+ };
35
+ export type TableLike = ArrowTable | {
36
+ schema: SchemaLike;
37
+ batches: RecordBatchLike[];
38
+ };
39
+ export type IntoVector = Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
40
+ export declare function isArrowTable(value: object): value is TableLike;
8
41
  export declare function isDataType(value: unknown): value is DataType;
9
42
  export declare function isNull(value: unknown): value is Null;
10
43
  export declare function isInt(value: unknown): value is Int;
@@ -26,7 +59,7 @@ export declare function isUnion(value: unknown): value is Struct;
26
59
  export declare function isFixedSizeBinary(value: unknown): value is FixedSizeBinary;
27
60
  export declare function isFixedSizeList(value: unknown): value is FixedSizeList;
28
61
  /** Data type accepted by NodeJS SDK */
29
- export type Data = Record<string, unknown>[] | ArrowTable;
62
+ export type Data = Record<string, unknown>[] | TableLike;
30
63
  export declare class VectorColumnOptions {
31
64
  /** Vector column type. */
32
65
  type: Float;
@@ -34,7 +67,7 @@ export declare class VectorColumnOptions {
34
67
  }
35
68
  /** Options to control the makeArrowTable call. */
36
69
  export declare class MakeArrowTableOptions {
37
- schema?: Schema;
70
+ schema?: SchemaLike;
38
71
  vectorColumns: Record<string, VectorColumnOptions>;
39
72
  embeddings?: EmbeddingFunction<unknown>;
40
73
  embeddingFunction?: EmbeddingFunctionConfig;
@@ -146,7 +179,7 @@ export declare function makeArrowTable(data: Array<Record<string, unknown>>, opt
146
179
  /**
147
180
  * Create an empty Arrow table with the provided schema
148
181
  */
149
- export declare function makeEmptyTable(schema: Schema, metadata?: Map<string, string>): ArrowTable;
182
+ export declare function makeEmptyTable(schema: SchemaLike, metadata?: Map<string, string>): ArrowTable;
150
183
  /**
151
184
  * Convert an Array of records into an Arrow Table, optionally applying an
152
185
  * embeddings function to it.
@@ -192,7 +225,7 @@ export declare function fromRecordsToStreamBuffer(data: Array<Record<string, unk
192
225
  *
193
226
  * `schema` is required if the table is empty
194
227
  */
195
- export declare function fromTableToBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
228
+ export declare function fromTableToBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: SchemaLike): Promise<Buffer>;
196
229
  /**
197
230
  * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
198
231
  *
@@ -210,7 +243,7 @@ export declare function fromDataToBuffer(data: Data, embeddings?: EmbeddingFunct
210
243
  *
211
244
  * `schema` is required if the table is empty
212
245
  */
213
- export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
246
+ export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: SchemaLike): Promise<Buffer>;
214
247
  /**
215
248
  * Create an empty table with the given schema
216
249
  */
@@ -505,15 +505,15 @@ async function applyEmbeddingsFromMetadata(table, schema) {
505
505
  }
506
506
  /** Helper function to apply embeddings to an input table */
507
507
  async function applyEmbeddings(table, embeddings, schema) {
508
+ if (schema !== undefined && schema !== null) {
509
+ schema = (0, sanitize_1.sanitizeSchema)(schema);
510
+ }
508
511
  if (schema?.metadata.has("embedding_functions")) {
509
512
  return applyEmbeddingsFromMetadata(table, schema);
510
513
  }
511
514
  else if (embeddings == null || embeddings === undefined) {
512
515
  return table;
513
516
  }
514
- if (schema !== undefined && schema !== null) {
515
- schema = (0, sanitize_1.sanitizeSchema)(schema);
516
- }
517
517
  // Convert from ArrowTable to Record<String, Vector>
518
518
  const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
519
519
  const name = table.schema.fields[idx].name;
@@ -670,7 +670,7 @@ async function fromDataToBuffer(data, embeddings, schema) {
670
670
  schema = (0, sanitize_1.sanitizeSchema)(schema);
671
671
  }
672
672
  if (isArrowTable(data)) {
673
- return fromTableToBuffer(data, embeddings, schema);
673
+ return fromTableToBuffer((0, sanitize_1.sanitizeTable)(data), embeddings, schema);
674
674
  }
675
675
  else {
676
676
  const table = await convertToTable(data, embeddings, { schema });
@@ -1,20 +1,7 @@
1
- import { Table as ArrowTable, Schema } from "./arrow";
1
+ import { Data, SchemaLike, TableLike } from "./arrow";
2
2
  import { EmbeddingFunctionConfig } from "./embedding/registry";
3
- import { ConnectionOptions, Connection as LanceDbConnection } from "./native";
3
+ import { Connection as LanceDbConnection } from "./native";
4
4
  import { Table } from "./table";
5
- /**
6
- * Connect to a LanceDB instance at the given URI.
7
- *
8
- * Accepted formats:
9
- *
10
- * - `/path/to/database` - local database
11
- * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
12
- * - `db://host:port` - remote database (LanceDB cloud)
13
- * @param {string} uri - The uri of the database. If the database uri starts
14
- * with `db://` then it connects to a remote database.
15
- * @see {@link ConnectionOptions} for more details on the URI format.
16
- */
17
- export declare function connect(uri: string, opts?: Partial<ConnectionOptions>): Promise<Connection>;
18
5
  export interface CreateTableOptions {
19
6
  /**
20
7
  * The mode to use when creating the table.
@@ -46,7 +33,7 @@ export interface CreateTableOptions {
46
33
  * The default is true while the new format is in beta
47
34
  */
48
35
  useLegacyFormat?: boolean;
49
- schema?: Schema;
36
+ schema?: SchemaLike;
50
37
  embeddingFunction?: EmbeddingFunctionConfig;
51
38
  }
52
39
  export interface OpenTableOptions {
@@ -102,11 +89,11 @@ export interface TableNamesOptions {
102
89
  * Any created tables are independent and will continue to work even if
103
90
  * the underlying connection has been closed.
104
91
  */
105
- export declare class Connection {
106
- readonly inner: LanceDbConnection;
107
- constructor(inner: LanceDbConnection);
108
- /** Return true if the connection has not been closed */
109
- isOpen(): boolean;
92
+ export declare abstract class Connection {
93
+ /**
94
+ * Return true if the connection has not been closed
95
+ */
96
+ abstract isOpen(): boolean;
110
97
  /**
111
98
  * Close the connection, releasing any underlying resources.
112
99
  *
@@ -114,38 +101,71 @@ export declare class Connection {
114
101
  *
115
102
  * Any attempt to use the connection after it is closed will result in an error.
116
103
  */
117
- close(): void;
118
- /** Return a brief description of the connection */
119
- display(): string;
104
+ abstract close(): void;
105
+ /**
106
+ * Return a brief description of the connection
107
+ */
108
+ abstract display(): string;
120
109
  /**
121
110
  * List all the table names in this database.
122
111
  *
123
112
  * Tables will be returned in lexicographical order.
124
113
  * @param {Partial<TableNamesOptions>} options - options to control the
125
114
  * paging / start point
115
+ *
126
116
  */
127
- tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
117
+ abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
128
118
  /**
129
119
  * Open a table in the database.
130
120
  * @param {string} name - The name of the table
131
121
  */
132
- openTable(name: string, options?: Partial<OpenTableOptions>): Promise<Table>;
122
+ abstract openTable(name: string, options?: Partial<OpenTableOptions>): Promise<Table>;
123
+ /**
124
+ * Creates a new Table and initialize it with new data.
125
+ * @param {object} options - The options object.
126
+ * @param {string} options.name - The name of the table.
127
+ * @param {Data} options.data - Non-empty Array of Records to be inserted into the table
128
+ *
129
+ */
130
+ abstract createTable(options: {
131
+ name: string;
132
+ data: Data;
133
+ } & Partial<CreateTableOptions>): Promise<Table>;
133
134
  /**
134
135
  * Creates a new Table and initialize it with new data.
135
136
  * @param {string} name - The name of the table.
136
- * @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
137
+ * @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
137
138
  * to be inserted into the table
138
139
  */
139
- createTable(name: string, data: Record<string, unknown>[] | ArrowTable, options?: Partial<CreateTableOptions>): Promise<Table>;
140
+ abstract createTable(name: string, data: Record<string, unknown>[] | TableLike, options?: Partial<CreateTableOptions>): Promise<Table>;
140
141
  /**
141
142
  * Creates a new empty Table
142
143
  * @param {string} name - The name of the table.
143
144
  * @param {Schema} schema - The schema of the table
144
145
  */
145
- createEmptyTable(name: string, schema: Schema, options?: Partial<CreateTableOptions>): Promise<Table>;
146
+ abstract createEmptyTable(name: string, schema: import("./arrow").SchemaLike, options?: Partial<CreateTableOptions>): Promise<Table>;
146
147
  /**
147
148
  * Drop an existing table.
148
149
  * @param {string} name The name of the table to drop.
149
150
  */
151
+ abstract dropTable(name: string): Promise<void>;
152
+ }
153
+ export declare class LocalConnection extends Connection {
154
+ readonly inner: LanceDbConnection;
155
+ constructor(inner: LanceDbConnection);
156
+ isOpen(): boolean;
157
+ close(): void;
158
+ display(): string;
159
+ tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
160
+ openTable(name: string, options?: Partial<OpenTableOptions>): Promise<Table>;
161
+ createTable(nameOrOptions: string | ({
162
+ name: string;
163
+ data: Data;
164
+ } & Partial<CreateTableOptions>), data?: Record<string, unknown>[] | TableLike, options?: Partial<CreateTableOptions>): Promise<Table>;
165
+ createEmptyTable(name: string, schema: import("./arrow").SchemaLike, options?: Partial<CreateTableOptions>): Promise<Table>;
150
166
  dropTable(name: string): Promise<void>;
151
167
  }
168
+ /**
169
+ * Takes storage options and makes all the keys snake case.
170
+ */
171
+ export declare function cleanseStorageOptions(options?: Record<string, string>): Record<string, string> | undefined;
@@ -13,30 +13,10 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.Connection = exports.connect = void 0;
16
+ exports.cleanseStorageOptions = exports.LocalConnection = exports.Connection = void 0;
17
17
  const arrow_1 = require("./arrow");
18
18
  const registry_1 = require("./embedding/registry");
19
- const native_1 = require("./native");
20
19
  const table_1 = require("./table");
21
- /**
22
- * Connect to a LanceDB instance at the given URI.
23
- *
24
- * Accepted formats:
25
- *
26
- * - `/path/to/database` - local database
27
- * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
28
- * - `db://host:port` - remote database (LanceDB cloud)
29
- * @param {string} uri - The uri of the database. If the database uri starts
30
- * with `db://` then it connects to a remote database.
31
- * @see {@link ConnectionOptions} for more details on the URI format.
32
- */
33
- async function connect(uri, opts) {
34
- opts = opts ?? {};
35
- opts.storageOptions = cleanseStorageOptions(opts.storageOptions);
36
- const nativeConn = await native_1.Connection.new(uri, opts);
37
- return new Connection(nativeConn);
38
- }
39
- exports.connect = connect;
40
20
  /**
41
21
  * A LanceDB Connection that allows you to open tables and create new ones.
42
22
  *
@@ -56,74 +36,45 @@ exports.connect = connect;
56
36
  * the underlying connection has been closed.
57
37
  */
58
38
  class Connection {
39
+ [Symbol.for("nodejs.util.inspect.custom")]() {
40
+ return this.display();
41
+ }
42
+ }
43
+ exports.Connection = Connection;
44
+ class LocalConnection extends Connection {
59
45
  inner;
60
46
  constructor(inner) {
47
+ super();
61
48
  this.inner = inner;
62
49
  }
63
- /** Return true if the connection has not been closed */
64
50
  isOpen() {
65
51
  return this.inner.isOpen();
66
52
  }
67
- /**
68
- * Close the connection, releasing any underlying resources.
69
- *
70
- * It is safe to call this method multiple times.
71
- *
72
- * Any attempt to use the connection after it is closed will result in an error.
73
- */
74
53
  close() {
75
54
  this.inner.close();
76
55
  }
77
- /** Return a brief description of the connection */
78
56
  display() {
79
57
  return this.inner.display();
80
58
  }
81
- /**
82
- * List all the table names in this database.
83
- *
84
- * Tables will be returned in lexicographical order.
85
- * @param {Partial<TableNamesOptions>} options - options to control the
86
- * paging / start point
87
- */
88
59
  async tableNames(options) {
89
60
  return this.inner.tableNames(options?.startAfter, options?.limit);
90
61
  }
91
- /**
92
- * Open a table in the database.
93
- * @param {string} name - The name of the table
94
- */
95
62
  async openTable(name, options) {
96
63
  const innerTable = await this.inner.openTable(name, cleanseStorageOptions(options?.storageOptions), options?.indexCacheSize);
97
- return new table_1.Table(innerTable);
64
+ return new table_1.LocalTable(innerTable);
98
65
  }
99
- /**
100
- * Creates a new Table and initialize it with new data.
101
- * @param {string} name - The name of the table.
102
- * @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
103
- * to be inserted into the table
104
- */
105
- async createTable(name, data, options) {
106
- let mode = options?.mode ?? "create";
107
- const existOk = options?.existOk ?? false;
108
- if (mode === "create" && existOk) {
109
- mode = "exist_ok";
110
- }
111
- let table;
112
- if ((0, arrow_1.isArrowTable)(data)) {
113
- table = data;
66
+ async createTable(nameOrOptions, data, options) {
67
+ if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
68
+ const { name, data, ...options } = nameOrOptions;
69
+ return this.createTable(name, data, options);
114
70
  }
115
- else {
116
- table = (0, arrow_1.makeArrowTable)(data, options);
71
+ if (data === undefined) {
72
+ throw new Error("data is required");
117
73
  }
118
- const buf = await (0, arrow_1.fromTableToBuffer)(table, options?.embeddingFunction, options?.schema);
119
- const innerTable = await this.inner.createTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
120
- return new table_1.Table(innerTable);
74
+ const { buf, mode } = await table_1.Table.parseTableData(data, options);
75
+ const innerTable = await this.inner.createTable(nameOrOptions, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
76
+ return new table_1.LocalTable(innerTable);
121
77
  }
122
- /**
123
- * Creates a new empty Table
124
- * @param {string} name - The name of the table.
125
- * @param {Schema} schema - The schema of the table
126
- */
127
78
  async createEmptyTable(name, schema, options) {
128
79
  let mode = options?.mode ?? "create";
129
80
  const existOk = options?.existOk ?? false;
@@ -139,17 +90,13 @@ class Connection {
139
90
  const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
140
91
  const buf = await (0, arrow_1.fromTableToBuffer)(table);
141
92
  const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
142
- return new table_1.Table(innerTable);
93
+ return new table_1.LocalTable(innerTable);
143
94
  }
144
- /**
145
- * Drop an existing table.
146
- * @param {string} name The name of the table to drop.
147
- */
148
95
  async dropTable(name) {
149
96
  return this.inner.dropTable(name);
150
97
  }
151
98
  }
152
- exports.Connection = Connection;
99
+ exports.LocalConnection = LocalConnection;
153
100
  /**
154
101
  * Takes storage options and makes all the keys snake case.
155
102
  */
@@ -166,6 +113,7 @@ function cleanseStorageOptions(options) {
166
113
  }
167
114
  return result;
168
115
  }
116
+ exports.cleanseStorageOptions = cleanseStorageOptions;
169
117
  /**
170
118
  * Convert a string to snake case. It might already be snake case, in which case it is
171
119
  * returned unchanged.
@@ -6,10 +6,18 @@ import { DataType, Float, type IntoVector } from "../arrow";
6
6
  export interface FunctionOptions {
7
7
  [key: string]: any;
8
8
  }
9
+ export interface EmbeddingFunctionConstructor<T extends EmbeddingFunction = EmbeddingFunction> {
10
+ new (modelOptions?: T["TOptions"]): T;
11
+ }
9
12
  /**
10
13
  * An embedding function that automatically creates vector representation for a given column.
11
14
  */
12
15
  export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptions = FunctionOptions> {
16
+ /**
17
+ * @ignore
18
+ * This is only used for associating the options type with the class for type checking
19
+ */
20
+ readonly TOptions: M;
13
21
  /**
14
22
  * Convert the embedding function to a JSON object
15
23
  * It is used to serialize the embedding function to the schema
@@ -63,7 +71,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
63
71
  /**
64
72
  Compute the embeddings for a single query
65
73
  */
66
- computeQueryEmbeddings(data: T): Promise<IntoVector>;
74
+ computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>>;
67
75
  }
68
76
  export interface FieldOptions<T extends DataType = DataType> {
69
77
  datatype: T;
@@ -21,6 +21,12 @@ const sanitize_1 = require("../sanitize");
21
21
  * An embedding function that automatically creates vector representation for a given column.
22
22
  */
23
23
  class EmbeddingFunction {
24
+ /**
25
+ * @ignore
26
+ * This is only used for associating the options type with the class for type checking
27
+ */
28
+ // biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
29
+ TOptions;
24
30
  /**
25
31
  * sourceField is used in combination with `LanceSchema` to provide a declarative data model
26
32
  *
@@ -1,14 +1,15 @@
1
+ import { type EmbeddingCreateParams } from "openai/resources";
1
2
  import { Float } from "../arrow";
2
3
  import { EmbeddingFunction } from "./embedding_function";
3
4
  export type OpenAIOptions = {
4
- apiKey?: string;
5
- model?: string;
5
+ apiKey: string;
6
+ model: EmbeddingCreateParams["model"];
6
7
  };
7
- export declare class OpenAIEmbeddingFunction extends EmbeddingFunction<string, OpenAIOptions> {
8
+ export declare class OpenAIEmbeddingFunction extends EmbeddingFunction<string, Partial<OpenAIOptions>> {
8
9
  #private;
9
- constructor(options?: OpenAIOptions);
10
+ constructor(options?: Partial<OpenAIOptions>);
10
11
  toJSON(): {
11
- model: string;
12
+ model: (string & {}) | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large";
12
13
  };
13
14
  ndims(): number;
14
15
  embeddingDataType(): Float;