@lancedb/lancedb 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +3 -3
- package/biome.json +19 -3
- package/dist/arrow.d.ts +42 -7
- package/dist/arrow.js +6 -5
- package/dist/connection.d.ts +55 -29
- package/dist/connection.js +22 -74
- package/dist/embedding/embedding_function.d.ts +11 -3
- package/dist/embedding/embedding_function.js +36 -12
- package/dist/embedding/openai.d.ts +6 -5
- package/dist/embedding/openai.js +4 -2
- package/dist/embedding/registry.d.ts +10 -11
- package/dist/embedding/registry.js +4 -0
- package/dist/index.d.ts +51 -3
- package/dist/index.js +28 -4
- package/dist/merge.d.ts +54 -0
- package/dist/merge.js +64 -0
- package/dist/native.d.ts +34 -7
- package/dist/native.js +26 -9
- package/dist/query.d.ts +51 -16
- package/dist/query.js +122 -21
- package/dist/remote/client.d.ts +28 -0
- package/dist/remote/client.js +172 -0
- package/dist/remote/connection.d.ts +25 -0
- package/dist/remote/connection.js +110 -0
- package/dist/remote/index.d.ts +3 -0
- package/dist/remote/index.js +9 -0
- package/dist/remote/table.d.ts +42 -0
- package/dist/remote/table.js +179 -0
- package/dist/sanitize.d.ts +3 -2
- package/dist/sanitize.js +55 -1
- package/dist/table.d.ts +116 -25
- package/dist/table.js +117 -233
- package/dist/util.d.ts +14 -0
- package/dist/util.js +65 -0
- package/examples/ann_indexes.ts +49 -0
- package/examples/basic.ts +149 -0
- package/examples/embedding.ts +83 -0
- package/examples/filtering.ts +34 -0
- package/examples/jsconfig.json +27 -0
- package/examples/package-lock.json +79 -0
- package/examples/package.json +18 -0
- package/examples/search.ts +37 -0
- package/lancedb/arrow.ts +87 -24
- package/lancedb/connection.ts +115 -92
- package/lancedb/embedding/embedding_function.ts +48 -16
- package/lancedb/embedding/openai.ts +11 -6
- package/lancedb/embedding/registry.ts +38 -22
- package/lancedb/index.ts +101 -2
- package/lancedb/merge.ts +70 -0
- package/lancedb/query.ts +168 -39
- package/lancedb/remote/client.ts +221 -0
- package/lancedb/remote/connection.ts +201 -0
- package/lancedb/remote/index.ts +3 -0
- package/lancedb/remote/table.ts +226 -0
- package/lancedb/sanitize.ts +73 -1
- package/lancedb/table.ts +344 -101
- package/lancedb/util.ts +69 -0
- package/native.d.ts +208 -0
- package/nodejs-artifacts/arrow.d.ts +42 -7
- package/nodejs-artifacts/arrow.js +6 -5
- package/nodejs-artifacts/connection.d.ts +55 -29
- package/nodejs-artifacts/connection.js +22 -74
- package/nodejs-artifacts/embedding/embedding_function.d.ts +11 -3
- package/nodejs-artifacts/embedding/embedding_function.js +36 -12
- package/nodejs-artifacts/embedding/openai.d.ts +6 -5
- package/nodejs-artifacts/embedding/openai.js +4 -2
- package/nodejs-artifacts/embedding/registry.d.ts +10 -11
- package/nodejs-artifacts/embedding/registry.js +4 -0
- package/nodejs-artifacts/index.d.ts +51 -3
- package/nodejs-artifacts/index.js +28 -4
- package/nodejs-artifacts/merge.d.ts +54 -0
- package/nodejs-artifacts/merge.js +64 -0
- package/nodejs-artifacts/native.d.ts +34 -7
- package/nodejs-artifacts/native.js +26 -9
- package/nodejs-artifacts/query.d.ts +51 -16
- package/nodejs-artifacts/query.js +122 -21
- package/nodejs-artifacts/remote/client.d.ts +28 -0
- package/nodejs-artifacts/remote/client.js +172 -0
- package/nodejs-artifacts/remote/connection.d.ts +25 -0
- package/nodejs-artifacts/remote/connection.js +110 -0
- package/nodejs-artifacts/remote/index.d.ts +3 -0
- package/nodejs-artifacts/remote/index.js +9 -0
- package/nodejs-artifacts/remote/table.d.ts +42 -0
- package/nodejs-artifacts/remote/table.js +179 -0
- package/nodejs-artifacts/sanitize.d.ts +3 -2
- package/nodejs-artifacts/sanitize.js +55 -1
- package/nodejs-artifacts/table.d.ts +116 -25
- package/nodejs-artifacts/table.js +117 -233
- package/nodejs-artifacts/util.d.ts +14 -0
- package/nodejs-artifacts/util.js +65 -0
- package/package.json +25 -11
package/native.d.ts
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/* auto-generated by NAPI-RS */
|
|
5
|
+
|
|
6
|
+
/** A description of an index currently configured on a column */
|
|
7
|
+
export interface IndexConfig {
|
|
8
|
+
/** The name of the index */
|
|
9
|
+
name: string
|
|
10
|
+
/** The type of the index */
|
|
11
|
+
indexType: string
|
|
12
|
+
/**
|
|
13
|
+
* The columns in the index
|
|
14
|
+
*
|
|
15
|
+
* Currently this is always an array of size 1. In the future there may
|
|
16
|
+
* be more columns to represent composite indices.
|
|
17
|
+
*/
|
|
18
|
+
columns: Array<string>
|
|
19
|
+
}
|
|
20
|
+
/** Statistics about a compaction operation. */
|
|
21
|
+
export interface CompactionStats {
|
|
22
|
+
/** The number of fragments removed */
|
|
23
|
+
fragmentsRemoved: number
|
|
24
|
+
/** The number of new, compacted fragments added */
|
|
25
|
+
fragmentsAdded: number
|
|
26
|
+
/** The number of data files removed */
|
|
27
|
+
filesRemoved: number
|
|
28
|
+
/** The number of new, compacted data files added */
|
|
29
|
+
filesAdded: number
|
|
30
|
+
}
|
|
31
|
+
/** Statistics about a cleanup operation */
|
|
32
|
+
export interface RemovalStats {
|
|
33
|
+
/** The number of bytes removed */
|
|
34
|
+
bytesRemoved: number
|
|
35
|
+
/** The number of old versions removed */
|
|
36
|
+
oldVersionsRemoved: number
|
|
37
|
+
}
|
|
38
|
+
/** Statistics about an optimize operation */
|
|
39
|
+
export interface OptimizeStats {
|
|
40
|
+
/** Statistics about the compaction operation */
|
|
41
|
+
compaction: CompactionStats
|
|
42
|
+
/** Statistics about the removal operation */
|
|
43
|
+
prune: RemovalStats
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* A definition of a column alteration. The alteration changes the column at
|
|
47
|
+
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
|
48
|
+
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
|
49
|
+
* must be provided.
|
|
50
|
+
*/
|
|
51
|
+
export interface ColumnAlteration {
|
|
52
|
+
/**
|
|
53
|
+
* The path to the column to alter. This is a dot-separated path to the column.
|
|
54
|
+
* If it is a top-level column then it is just the name of the column. If it is
|
|
55
|
+
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
|
56
|
+
* `c` nested inside a column `b` nested inside a column `a`.
|
|
57
|
+
*/
|
|
58
|
+
path: string
|
|
59
|
+
/**
|
|
60
|
+
* The new name of the column. If not provided then the name will not be changed.
|
|
61
|
+
* This must be distinct from the names of all other columns in the table.
|
|
62
|
+
*/
|
|
63
|
+
rename?: string
|
|
64
|
+
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
|
|
65
|
+
nullable?: boolean
|
|
66
|
+
}
|
|
67
|
+
/** A definition of a new column to add to a table. */
|
|
68
|
+
export interface AddColumnsSql {
|
|
69
|
+
/** The name of the new column. */
|
|
70
|
+
name: string
|
|
71
|
+
/**
|
|
72
|
+
* The values to populate the new column with, as a SQL expression.
|
|
73
|
+
* The expression can reference other columns in the table.
|
|
74
|
+
*/
|
|
75
|
+
valueSql: string
|
|
76
|
+
}
|
|
77
|
+
export interface IndexStatistics {
|
|
78
|
+
/** The number of rows indexed by the index */
|
|
79
|
+
numIndexedRows: number
|
|
80
|
+
/** The number of rows not indexed */
|
|
81
|
+
numUnindexedRows: number
|
|
82
|
+
/** The type of the index */
|
|
83
|
+
indexType?: string
|
|
84
|
+
/** The metadata for each index */
|
|
85
|
+
indices: Array<IndexMetadata>
|
|
86
|
+
}
|
|
87
|
+
export interface IndexMetadata {
|
|
88
|
+
metricType?: string
|
|
89
|
+
indexType?: string
|
|
90
|
+
}
|
|
91
|
+
export interface ConnectionOptions {
|
|
92
|
+
/**
|
|
93
|
+
* (For LanceDB OSS only): The interval, in seconds, at which to check for
|
|
94
|
+
* updates to the table from other processes. If None, then consistency is not
|
|
95
|
+
* checked. For performance reasons, this is the default. For strong
|
|
96
|
+
* consistency, set this to zero seconds. Then every read will check for
|
|
97
|
+
* updates from other processes. As a compromise, you can set this to a
|
|
98
|
+
* non-zero value for eventual consistency. If more than that interval
|
|
99
|
+
* has passed since the last check, then the table will be checked for updates.
|
|
100
|
+
* Note: this consistency only applies to read operations. Write operations are
|
|
101
|
+
* always consistent.
|
|
102
|
+
*/
|
|
103
|
+
readConsistencyInterval?: number
|
|
104
|
+
/**
|
|
105
|
+
* (For LanceDB OSS only): configuration for object storage.
|
|
106
|
+
*
|
|
107
|
+
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
108
|
+
*/
|
|
109
|
+
storageOptions?: Record<string, string>
|
|
110
|
+
}
|
|
111
|
+
/** Write mode for writing a table. */
|
|
112
|
+
export const enum WriteMode {
|
|
113
|
+
Create = 'Create',
|
|
114
|
+
Append = 'Append',
|
|
115
|
+
Overwrite = 'Overwrite'
|
|
116
|
+
}
|
|
117
|
+
/** Write options when creating a Table. */
|
|
118
|
+
export interface WriteOptions {
|
|
119
|
+
/** Write mode for writing to a table. */
|
|
120
|
+
mode?: WriteMode
|
|
121
|
+
}
|
|
122
|
+
export interface OpenTableOptions {
|
|
123
|
+
storageOptions?: Record<string, string>
|
|
124
|
+
}
|
|
125
|
+
export class Connection {
|
|
126
|
+
/** Create a new Connection instance from the given URI. */
|
|
127
|
+
static new(uri: string, options: ConnectionOptions): Promise<Connection>
|
|
128
|
+
display(): string
|
|
129
|
+
isOpen(): boolean
|
|
130
|
+
close(): void
|
|
131
|
+
/** List all tables in the dataset. */
|
|
132
|
+
tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
|
|
133
|
+
/**
|
|
134
|
+
* Create table from a Apache Arrow IPC (file) buffer.
|
|
135
|
+
*
|
|
136
|
+
* Parameters:
|
|
137
|
+
* - name: The name of the table.
|
|
138
|
+
* - buf: The buffer containing the IPC file.
|
|
139
|
+
*
|
|
140
|
+
*/
|
|
141
|
+
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
142
|
+
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
143
|
+
openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
|
|
144
|
+
/** Drop table with the name. Or raise an error if the table does not exist. */
|
|
145
|
+
dropTable(name: string): Promise<void>
|
|
146
|
+
}
|
|
147
|
+
export class Index {
|
|
148
|
+
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
|
149
|
+
static btree(): Index
|
|
150
|
+
}
|
|
151
|
+
/** Typescript-style Async Iterator over RecordBatches */
|
|
152
|
+
export class RecordBatchIterator {
|
|
153
|
+
next(): Promise<Buffer | null>
|
|
154
|
+
}
|
|
155
|
+
/** A builder used to create and run a merge insert operation */
|
|
156
|
+
export class NativeMergeInsertBuilder {
|
|
157
|
+
whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder
|
|
158
|
+
whenNotMatchedInsertAll(): NativeMergeInsertBuilder
|
|
159
|
+
whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder
|
|
160
|
+
execute(buf: Buffer): Promise<void>
|
|
161
|
+
}
|
|
162
|
+
export class Query {
|
|
163
|
+
onlyIf(predicate: string): void
|
|
164
|
+
select(columns: Array<[string, string]>): void
|
|
165
|
+
limit(limit: number): void
|
|
166
|
+
nearestTo(vector: Float32Array): VectorQuery
|
|
167
|
+
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
168
|
+
explainPlan(verbose: boolean): Promise<string>
|
|
169
|
+
}
|
|
170
|
+
export class VectorQuery {
|
|
171
|
+
column(column: string): void
|
|
172
|
+
distanceType(distanceType: string): void
|
|
173
|
+
postfilter(): void
|
|
174
|
+
refineFactor(refineFactor: number): void
|
|
175
|
+
nprobes(nprobe: number): void
|
|
176
|
+
bypassVectorIndex(): void
|
|
177
|
+
onlyIf(predicate: string): void
|
|
178
|
+
select(columns: Array<[string, string]>): void
|
|
179
|
+
limit(limit: number): void
|
|
180
|
+
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
181
|
+
explainPlan(verbose: boolean): Promise<string>
|
|
182
|
+
}
|
|
183
|
+
export class Table {
|
|
184
|
+
name: string
|
|
185
|
+
display(): string
|
|
186
|
+
isOpen(): boolean
|
|
187
|
+
close(): void
|
|
188
|
+
/** Return Schema as empty Arrow IPC file. */
|
|
189
|
+
schema(): Promise<Buffer>
|
|
190
|
+
add(buf: Buffer, mode: string): Promise<void>
|
|
191
|
+
countRows(filter?: string | undefined | null): Promise<number>
|
|
192
|
+
delete(predicate: string): Promise<void>
|
|
193
|
+
createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
|
|
194
|
+
update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<void>
|
|
195
|
+
query(): Query
|
|
196
|
+
vectorSearch(vector: Float32Array): VectorQuery
|
|
197
|
+
addColumns(transforms: Array<AddColumnsSql>): Promise<void>
|
|
198
|
+
alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
|
|
199
|
+
dropColumns(columns: Array<string>): Promise<void>
|
|
200
|
+
version(): Promise<number>
|
|
201
|
+
checkout(version: number): Promise<void>
|
|
202
|
+
checkoutLatest(): Promise<void>
|
|
203
|
+
restore(): Promise<void>
|
|
204
|
+
optimize(olderThanMs?: number | undefined | null): Promise<OptimizeStats>
|
|
205
|
+
listIndices(): Promise<Array<IndexConfig>>
|
|
206
|
+
indexStats(indexName: string): Promise<IndexStatistics | null>
|
|
207
|
+
mergeInsert(on: Array<string>): NativeMergeInsertBuilder
|
|
208
|
+
}
|
|
@@ -1,9 +1,43 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
|
-
import { Table as ArrowTable, Binary, DataType, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, Schema, Struct, Utf8 } from "apache-arrow";
|
|
2
|
+
import { Table as ArrowTable, Binary, BufferType, DataType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
|
|
3
|
+
import { Buffers } from "apache-arrow/data";
|
|
3
4
|
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
|
4
5
|
import { EmbeddingFunctionConfig } from "./embedding/registry";
|
|
5
6
|
export * from "apache-arrow";
|
|
6
|
-
export
|
|
7
|
+
export type SchemaLike = Schema | {
|
|
8
|
+
fields: FieldLike[];
|
|
9
|
+
metadata: Map<string, string>;
|
|
10
|
+
get names(): unknown[];
|
|
11
|
+
};
|
|
12
|
+
export type FieldLike = Field | {
|
|
13
|
+
type: string;
|
|
14
|
+
name: string;
|
|
15
|
+
nullable?: boolean;
|
|
16
|
+
metadata?: Map<string, string>;
|
|
17
|
+
};
|
|
18
|
+
export type DataLike = import("apache-arrow").Data<Struct<any>> | {
|
|
19
|
+
type: any;
|
|
20
|
+
length: number;
|
|
21
|
+
offset: number;
|
|
22
|
+
stride: number;
|
|
23
|
+
nullable: boolean;
|
|
24
|
+
children: DataLike[];
|
|
25
|
+
get nullCount(): number;
|
|
26
|
+
values: Buffers<any>[BufferType.DATA];
|
|
27
|
+
typeIds: Buffers<any>[BufferType.TYPE];
|
|
28
|
+
nullBitmap: Buffers<any>[BufferType.VALIDITY];
|
|
29
|
+
valueOffsets: Buffers<any>[BufferType.OFFSET];
|
|
30
|
+
};
|
|
31
|
+
export type RecordBatchLike = RecordBatch | {
|
|
32
|
+
schema: SchemaLike;
|
|
33
|
+
data: DataLike;
|
|
34
|
+
};
|
|
35
|
+
export type TableLike = ArrowTable | {
|
|
36
|
+
schema: SchemaLike;
|
|
37
|
+
batches: RecordBatchLike[];
|
|
38
|
+
};
|
|
39
|
+
export type IntoVector = Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
|
|
40
|
+
export declare function isArrowTable(value: object): value is TableLike;
|
|
7
41
|
export declare function isDataType(value: unknown): value is DataType;
|
|
8
42
|
export declare function isNull(value: unknown): value is Null;
|
|
9
43
|
export declare function isInt(value: unknown): value is Int;
|
|
@@ -25,7 +59,7 @@ export declare function isUnion(value: unknown): value is Struct;
|
|
|
25
59
|
export declare function isFixedSizeBinary(value: unknown): value is FixedSizeBinary;
|
|
26
60
|
export declare function isFixedSizeList(value: unknown): value is FixedSizeList;
|
|
27
61
|
/** Data type accepted by NodeJS SDK */
|
|
28
|
-
export type Data = Record<string, unknown>[] |
|
|
62
|
+
export type Data = Record<string, unknown>[] | TableLike;
|
|
29
63
|
export declare class VectorColumnOptions {
|
|
30
64
|
/** Vector column type. */
|
|
31
65
|
type: Float;
|
|
@@ -33,9 +67,10 @@ export declare class VectorColumnOptions {
|
|
|
33
67
|
}
|
|
34
68
|
/** Options to control the makeArrowTable call. */
|
|
35
69
|
export declare class MakeArrowTableOptions {
|
|
36
|
-
schema?:
|
|
70
|
+
schema?: SchemaLike;
|
|
37
71
|
vectorColumns: Record<string, VectorColumnOptions>;
|
|
38
72
|
embeddings?: EmbeddingFunction<unknown>;
|
|
73
|
+
embeddingFunction?: EmbeddingFunctionConfig;
|
|
39
74
|
/**
|
|
40
75
|
* If true then string columns will be encoded with dictionary encoding
|
|
41
76
|
*
|
|
@@ -144,7 +179,7 @@ export declare function makeArrowTable(data: Array<Record<string, unknown>>, opt
|
|
|
144
179
|
/**
|
|
145
180
|
* Create an empty Arrow table with the provided schema
|
|
146
181
|
*/
|
|
147
|
-
export declare function makeEmptyTable(schema:
|
|
182
|
+
export declare function makeEmptyTable(schema: SchemaLike, metadata?: Map<string, string>): ArrowTable;
|
|
148
183
|
/**
|
|
149
184
|
* Convert an Array of records into an Arrow Table, optionally applying an
|
|
150
185
|
* embeddings function to it.
|
|
@@ -190,7 +225,7 @@ export declare function fromRecordsToStreamBuffer(data: Array<Record<string, unk
|
|
|
190
225
|
*
|
|
191
226
|
* `schema` is required if the table is empty
|
|
192
227
|
*/
|
|
193
|
-
export declare function fromTableToBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?:
|
|
228
|
+
export declare function fromTableToBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: SchemaLike): Promise<Buffer>;
|
|
194
229
|
/**
|
|
195
230
|
* Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
|
|
196
231
|
*
|
|
@@ -208,7 +243,7 @@ export declare function fromDataToBuffer(data: Data, embeddings?: EmbeddingFunct
|
|
|
208
243
|
*
|
|
209
244
|
* `schema` is required if the table is empty
|
|
210
245
|
*/
|
|
211
|
-
export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?:
|
|
246
|
+
export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: SchemaLike): Promise<Buffer>;
|
|
212
247
|
/**
|
|
213
248
|
* Create an empty table with the given schema
|
|
214
249
|
*/
|
|
@@ -184,6 +184,7 @@ class MakeArrowTableOptions {
|
|
|
184
184
|
vector: new VectorColumnOptions(),
|
|
185
185
|
};
|
|
186
186
|
embeddings;
|
|
187
|
+
embeddingFunction;
|
|
187
188
|
/**
|
|
188
189
|
* If true then string columns will be encoded with dictionary encoding
|
|
189
190
|
*
|
|
@@ -299,7 +300,7 @@ function makeArrowTable(data, options, metadata) {
|
|
|
299
300
|
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
|
|
300
301
|
if (opt.schema !== undefined && opt.schema !== null) {
|
|
301
302
|
opt.schema = (0, sanitize_1.sanitizeSchema)(opt.schema);
|
|
302
|
-
opt.schema = validateSchemaEmbeddings(opt.schema, data,
|
|
303
|
+
opt.schema = validateSchemaEmbeddings(opt.schema, data, options?.embeddingFunction);
|
|
303
304
|
}
|
|
304
305
|
const columns = {};
|
|
305
306
|
// TODO: sample dataset to find missing columns
|
|
@@ -504,15 +505,15 @@ async function applyEmbeddingsFromMetadata(table, schema) {
|
|
|
504
505
|
}
|
|
505
506
|
/** Helper function to apply embeddings to an input table */
|
|
506
507
|
async function applyEmbeddings(table, embeddings, schema) {
|
|
508
|
+
if (schema !== undefined && schema !== null) {
|
|
509
|
+
schema = (0, sanitize_1.sanitizeSchema)(schema);
|
|
510
|
+
}
|
|
507
511
|
if (schema?.metadata.has("embedding_functions")) {
|
|
508
512
|
return applyEmbeddingsFromMetadata(table, schema);
|
|
509
513
|
}
|
|
510
514
|
else if (embeddings == null || embeddings === undefined) {
|
|
511
515
|
return table;
|
|
512
516
|
}
|
|
513
|
-
if (schema !== undefined && schema !== null) {
|
|
514
|
-
schema = (0, sanitize_1.sanitizeSchema)(schema);
|
|
515
|
-
}
|
|
516
517
|
// Convert from ArrowTable to Record<String, Vector>
|
|
517
518
|
const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
|
|
518
519
|
const name = table.schema.fields[idx].name;
|
|
@@ -669,7 +670,7 @@ async function fromDataToBuffer(data, embeddings, schema) {
|
|
|
669
670
|
schema = (0, sanitize_1.sanitizeSchema)(schema);
|
|
670
671
|
}
|
|
671
672
|
if (isArrowTable(data)) {
|
|
672
|
-
return fromTableToBuffer(data, embeddings, schema);
|
|
673
|
+
return fromTableToBuffer((0, sanitize_1.sanitizeTable)(data), embeddings, schema);
|
|
673
674
|
}
|
|
674
675
|
else {
|
|
675
676
|
const table = await convertToTable(data, embeddings, { schema });
|
|
@@ -1,20 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Data, SchemaLike, TableLike } from "./arrow";
|
|
2
2
|
import { EmbeddingFunctionConfig } from "./embedding/registry";
|
|
3
|
-
import {
|
|
3
|
+
import { Connection as LanceDbConnection } from "./native";
|
|
4
4
|
import { Table } from "./table";
|
|
5
|
-
/**
|
|
6
|
-
* Connect to a LanceDB instance at the given URI.
|
|
7
|
-
*
|
|
8
|
-
* Accepted formats:
|
|
9
|
-
*
|
|
10
|
-
* - `/path/to/database` - local database
|
|
11
|
-
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
12
|
-
* - `db://host:port` - remote database (LanceDB cloud)
|
|
13
|
-
* @param {string} uri - The uri of the database. If the database uri starts
|
|
14
|
-
* with `db://` then it connects to a remote database.
|
|
15
|
-
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
16
|
-
*/
|
|
17
|
-
export declare function connect(uri: string, opts?: Partial<ConnectionOptions>): Promise<Connection>;
|
|
18
5
|
export interface CreateTableOptions {
|
|
19
6
|
/**
|
|
20
7
|
* The mode to use when creating the table.
|
|
@@ -40,7 +27,13 @@ export interface CreateTableOptions {
|
|
|
40
27
|
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
41
28
|
*/
|
|
42
29
|
storageOptions?: Record<string, string>;
|
|
43
|
-
|
|
30
|
+
/**
|
|
31
|
+
* If true then data files will be written with the legacy format
|
|
32
|
+
*
|
|
33
|
+
* The default is true while the new format is in beta
|
|
34
|
+
*/
|
|
35
|
+
useLegacyFormat?: boolean;
|
|
36
|
+
schema?: SchemaLike;
|
|
44
37
|
embeddingFunction?: EmbeddingFunctionConfig;
|
|
45
38
|
}
|
|
46
39
|
export interface OpenTableOptions {
|
|
@@ -96,11 +89,11 @@ export interface TableNamesOptions {
|
|
|
96
89
|
* Any created tables are independent and will continue to work even if
|
|
97
90
|
* the underlying connection has been closed.
|
|
98
91
|
*/
|
|
99
|
-
export declare class Connection {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
isOpen(): boolean;
|
|
92
|
+
export declare abstract class Connection {
|
|
93
|
+
/**
|
|
94
|
+
* Return true if the connection has not been closed
|
|
95
|
+
*/
|
|
96
|
+
abstract isOpen(): boolean;
|
|
104
97
|
/**
|
|
105
98
|
* Close the connection, releasing any underlying resources.
|
|
106
99
|
*
|
|
@@ -108,38 +101,71 @@ export declare class Connection {
|
|
|
108
101
|
*
|
|
109
102
|
* Any attempt to use the connection after it is closed will result in an error.
|
|
110
103
|
*/
|
|
111
|
-
close(): void;
|
|
112
|
-
/**
|
|
113
|
-
|
|
104
|
+
abstract close(): void;
|
|
105
|
+
/**
|
|
106
|
+
* Return a brief description of the connection
|
|
107
|
+
*/
|
|
108
|
+
abstract display(): string;
|
|
114
109
|
/**
|
|
115
110
|
* List all the table names in this database.
|
|
116
111
|
*
|
|
117
112
|
* Tables will be returned in lexicographical order.
|
|
118
113
|
* @param {Partial<TableNamesOptions>} options - options to control the
|
|
119
114
|
* paging / start point
|
|
115
|
+
*
|
|
120
116
|
*/
|
|
121
|
-
tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
|
117
|
+
abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
|
122
118
|
/**
|
|
123
119
|
* Open a table in the database.
|
|
124
120
|
* @param {string} name - The name of the table
|
|
125
121
|
*/
|
|
126
|
-
openTable(name: string, options?: Partial<OpenTableOptions>): Promise<Table>;
|
|
122
|
+
abstract openTable(name: string, options?: Partial<OpenTableOptions>): Promise<Table>;
|
|
123
|
+
/**
|
|
124
|
+
* Creates a new Table and initialize it with new data.
|
|
125
|
+
* @param {object} options - The options object.
|
|
126
|
+
* @param {string} options.name - The name of the table.
|
|
127
|
+
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
|
128
|
+
*
|
|
129
|
+
*/
|
|
130
|
+
abstract createTable(options: {
|
|
131
|
+
name: string;
|
|
132
|
+
data: Data;
|
|
133
|
+
} & Partial<CreateTableOptions>): Promise<Table>;
|
|
127
134
|
/**
|
|
128
135
|
* Creates a new Table and initialize it with new data.
|
|
129
136
|
* @param {string} name - The name of the table.
|
|
130
|
-
* @param {Record<string, unknown>[] |
|
|
137
|
+
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
|
131
138
|
* to be inserted into the table
|
|
132
139
|
*/
|
|
133
|
-
createTable(name: string, data: Record<string, unknown>[] |
|
|
140
|
+
abstract createTable(name: string, data: Record<string, unknown>[] | TableLike, options?: Partial<CreateTableOptions>): Promise<Table>;
|
|
134
141
|
/**
|
|
135
142
|
* Creates a new empty Table
|
|
136
143
|
* @param {string} name - The name of the table.
|
|
137
144
|
* @param {Schema} schema - The schema of the table
|
|
138
145
|
*/
|
|
139
|
-
createEmptyTable(name: string, schema:
|
|
146
|
+
abstract createEmptyTable(name: string, schema: import("./arrow").SchemaLike, options?: Partial<CreateTableOptions>): Promise<Table>;
|
|
140
147
|
/**
|
|
141
148
|
* Drop an existing table.
|
|
142
149
|
* @param {string} name The name of the table to drop.
|
|
143
150
|
*/
|
|
151
|
+
abstract dropTable(name: string): Promise<void>;
|
|
152
|
+
}
|
|
153
|
+
export declare class LocalConnection extends Connection {
|
|
154
|
+
readonly inner: LanceDbConnection;
|
|
155
|
+
constructor(inner: LanceDbConnection);
|
|
156
|
+
isOpen(): boolean;
|
|
157
|
+
close(): void;
|
|
158
|
+
display(): string;
|
|
159
|
+
tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
|
160
|
+
openTable(name: string, options?: Partial<OpenTableOptions>): Promise<Table>;
|
|
161
|
+
createTable(nameOrOptions: string | ({
|
|
162
|
+
name: string;
|
|
163
|
+
data: Data;
|
|
164
|
+
} & Partial<CreateTableOptions>), data?: Record<string, unknown>[] | TableLike, options?: Partial<CreateTableOptions>): Promise<Table>;
|
|
165
|
+
createEmptyTable(name: string, schema: import("./arrow").SchemaLike, options?: Partial<CreateTableOptions>): Promise<Table>;
|
|
144
166
|
dropTable(name: string): Promise<void>;
|
|
145
167
|
}
|
|
168
|
+
/**
|
|
169
|
+
* Takes storage options and makes all the keys snake case.
|
|
170
|
+
*/
|
|
171
|
+
export declare function cleanseStorageOptions(options?: Record<string, string>): Record<string, string> | undefined;
|
|
@@ -13,30 +13,10 @@
|
|
|
13
13
|
// See the License for the specific language governing permissions and
|
|
14
14
|
// limitations under the License.
|
|
15
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.
|
|
16
|
+
exports.cleanseStorageOptions = exports.LocalConnection = exports.Connection = void 0;
|
|
17
17
|
const arrow_1 = require("./arrow");
|
|
18
18
|
const registry_1 = require("./embedding/registry");
|
|
19
|
-
const native_1 = require("./native");
|
|
20
19
|
const table_1 = require("./table");
|
|
21
|
-
/**
|
|
22
|
-
* Connect to a LanceDB instance at the given URI.
|
|
23
|
-
*
|
|
24
|
-
* Accepted formats:
|
|
25
|
-
*
|
|
26
|
-
* - `/path/to/database` - local database
|
|
27
|
-
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
28
|
-
* - `db://host:port` - remote database (LanceDB cloud)
|
|
29
|
-
* @param {string} uri - The uri of the database. If the database uri starts
|
|
30
|
-
* with `db://` then it connects to a remote database.
|
|
31
|
-
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
32
|
-
*/
|
|
33
|
-
async function connect(uri, opts) {
|
|
34
|
-
opts = opts ?? {};
|
|
35
|
-
opts.storageOptions = cleanseStorageOptions(opts.storageOptions);
|
|
36
|
-
const nativeConn = await native_1.Connection.new(uri, opts);
|
|
37
|
-
return new Connection(nativeConn);
|
|
38
|
-
}
|
|
39
|
-
exports.connect = connect;
|
|
40
20
|
/**
|
|
41
21
|
* A LanceDB Connection that allows you to open tables and create new ones.
|
|
42
22
|
*
|
|
@@ -56,74 +36,45 @@ exports.connect = connect;
|
|
|
56
36
|
* the underlying connection has been closed.
|
|
57
37
|
*/
|
|
58
38
|
class Connection {
|
|
39
|
+
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
40
|
+
return this.display();
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
exports.Connection = Connection;
|
|
44
|
+
class LocalConnection extends Connection {
|
|
59
45
|
inner;
|
|
60
46
|
constructor(inner) {
|
|
47
|
+
super();
|
|
61
48
|
this.inner = inner;
|
|
62
49
|
}
|
|
63
|
-
/** Return true if the connection has not been closed */
|
|
64
50
|
isOpen() {
|
|
65
51
|
return this.inner.isOpen();
|
|
66
52
|
}
|
|
67
|
-
/**
|
|
68
|
-
* Close the connection, releasing any underlying resources.
|
|
69
|
-
*
|
|
70
|
-
* It is safe to call this method multiple times.
|
|
71
|
-
*
|
|
72
|
-
* Any attempt to use the connection after it is closed will result in an error.
|
|
73
|
-
*/
|
|
74
53
|
close() {
|
|
75
54
|
this.inner.close();
|
|
76
55
|
}
|
|
77
|
-
/** Return a brief description of the connection */
|
|
78
56
|
display() {
|
|
79
57
|
return this.inner.display();
|
|
80
58
|
}
|
|
81
|
-
/**
|
|
82
|
-
* List all the table names in this database.
|
|
83
|
-
*
|
|
84
|
-
* Tables will be returned in lexicographical order.
|
|
85
|
-
* @param {Partial<TableNamesOptions>} options - options to control the
|
|
86
|
-
* paging / start point
|
|
87
|
-
*/
|
|
88
59
|
async tableNames(options) {
|
|
89
60
|
return this.inner.tableNames(options?.startAfter, options?.limit);
|
|
90
61
|
}
|
|
91
|
-
/**
|
|
92
|
-
* Open a table in the database.
|
|
93
|
-
* @param {string} name - The name of the table
|
|
94
|
-
*/
|
|
95
62
|
async openTable(name, options) {
|
|
96
63
|
const innerTable = await this.inner.openTable(name, cleanseStorageOptions(options?.storageOptions), options?.indexCacheSize);
|
|
97
|
-
return new table_1.
|
|
64
|
+
return new table_1.LocalTable(innerTable);
|
|
98
65
|
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
* to be inserted into the table
|
|
104
|
-
*/
|
|
105
|
-
async createTable(name, data, options) {
|
|
106
|
-
let mode = options?.mode ?? "create";
|
|
107
|
-
const existOk = options?.existOk ?? false;
|
|
108
|
-
if (mode === "create" && existOk) {
|
|
109
|
-
mode = "exist_ok";
|
|
110
|
-
}
|
|
111
|
-
let table;
|
|
112
|
-
if ((0, arrow_1.isArrowTable)(data)) {
|
|
113
|
-
table = data;
|
|
66
|
+
async createTable(nameOrOptions, data, options) {
|
|
67
|
+
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
|
68
|
+
const { name, data, ...options } = nameOrOptions;
|
|
69
|
+
return this.createTable(name, data, options);
|
|
114
70
|
}
|
|
115
|
-
|
|
116
|
-
|
|
71
|
+
if (data === undefined) {
|
|
72
|
+
throw new Error("data is required");
|
|
117
73
|
}
|
|
118
|
-
const buf = await
|
|
119
|
-
const innerTable = await this.inner.createTable(
|
|
120
|
-
return new table_1.
|
|
74
|
+
const { buf, mode } = await table_1.Table.parseTableData(data, options);
|
|
75
|
+
const innerTable = await this.inner.createTable(nameOrOptions, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
|
|
76
|
+
return new table_1.LocalTable(innerTable);
|
|
121
77
|
}
|
|
122
|
-
/**
|
|
123
|
-
* Creates a new empty Table
|
|
124
|
-
* @param {string} name - The name of the table.
|
|
125
|
-
* @param {Schema} schema - The schema of the table
|
|
126
|
-
*/
|
|
127
78
|
async createEmptyTable(name, schema, options) {
|
|
128
79
|
let mode = options?.mode ?? "create";
|
|
129
80
|
const existOk = options?.existOk ?? false;
|
|
@@ -138,18 +89,14 @@ class Connection {
|
|
|
138
89
|
}
|
|
139
90
|
const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
|
|
140
91
|
const buf = await (0, arrow_1.fromTableToBuffer)(table);
|
|
141
|
-
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
|
|
142
|
-
return new table_1.
|
|
92
|
+
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
|
|
93
|
+
return new table_1.LocalTable(innerTable);
|
|
143
94
|
}
|
|
144
|
-
/**
|
|
145
|
-
* Drop an existing table.
|
|
146
|
-
* @param {string} name The name of the table to drop.
|
|
147
|
-
*/
|
|
148
95
|
async dropTable(name) {
|
|
149
96
|
return this.inner.dropTable(name);
|
|
150
97
|
}
|
|
151
98
|
}
|
|
152
|
-
exports.
|
|
99
|
+
exports.LocalConnection = LocalConnection;
|
|
153
100
|
/**
|
|
154
101
|
* Takes storage options and makes all the keys snake case.
|
|
155
102
|
*/
|
|
@@ -166,6 +113,7 @@ function cleanseStorageOptions(options) {
|
|
|
166
113
|
}
|
|
167
114
|
return result;
|
|
168
115
|
}
|
|
116
|
+
exports.cleanseStorageOptions = cleanseStorageOptions;
|
|
169
117
|
/**
|
|
170
118
|
* Convert a string to snake case. It might already be snake case, in which case it is
|
|
171
119
|
* returned unchanged.
|
|
@@ -1,15 +1,23 @@
|
|
|
1
1
|
import "reflect-metadata";
|
|
2
|
-
import { DataType, Float } from "../arrow";
|
|
2
|
+
import { DataType, Float, type IntoVector } from "../arrow";
|
|
3
3
|
/**
|
|
4
4
|
* Options for a given embedding function
|
|
5
5
|
*/
|
|
6
6
|
export interface FunctionOptions {
|
|
7
7
|
[key: string]: any;
|
|
8
8
|
}
|
|
9
|
+
export interface EmbeddingFunctionConstructor<T extends EmbeddingFunction = EmbeddingFunction> {
|
|
10
|
+
new (modelOptions?: T["TOptions"]): T;
|
|
11
|
+
}
|
|
9
12
|
/**
|
|
10
13
|
* An embedding function that automatically creates vector representation for a given column.
|
|
11
14
|
*/
|
|
12
15
|
export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptions = FunctionOptions> {
|
|
16
|
+
/**
|
|
17
|
+
* @ignore
|
|
18
|
+
* This is only used for associating the options type with the class for type checking
|
|
19
|
+
*/
|
|
20
|
+
readonly TOptions: M;
|
|
13
21
|
/**
|
|
14
22
|
* Convert the embedding function to a JSON object
|
|
15
23
|
* It is used to serialize the embedding function to the schema
|
|
@@ -51,7 +59,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
51
59
|
*
|
|
52
60
|
* @see {@link lancedb.LanceSchema}
|
|
53
61
|
*/
|
|
54
|
-
vectorField(
|
|
62
|
+
vectorField(optionsOrDatatype?: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
|
|
55
63
|
/** The number of dimensions of the embeddings */
|
|
56
64
|
ndims(): number | undefined;
|
|
57
65
|
/** The datatype of the embeddings */
|
|
@@ -63,7 +71,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
63
71
|
/**
|
|
64
72
|
Compute the embeddings for a single query
|
|
65
73
|
*/
|
|
66
|
-
computeQueryEmbeddings(data: T): Promise<
|
|
74
|
+
computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>>;
|
|
67
75
|
}
|
|
68
76
|
export interface FieldOptions<T extends DataType = DataType> {
|
|
69
77
|
datatype: T;
|