@lancedb/lancedb 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arrow.d.ts +2 -0
- package/dist/arrow.js +2 -1
- package/dist/connection.d.ts +6 -0
- package/dist/connection.js +2 -2
- package/dist/embedding/embedding_function.d.ts +3 -3
- package/dist/embedding/embedding_function.js +30 -12
- package/dist/embedding/registry.d.ts +4 -0
- package/dist/embedding/registry.js +4 -0
- package/dist/native.d.ts +5 -4
- package/dist/query.d.ts +19 -7
- package/dist/query.js +24 -10
- package/dist/table.d.ts +18 -2
- package/dist/table.js +28 -1
- package/lancedb/arrow.ts +10 -4
- package/lancedb/connection.ts +8 -0
- package/lancedb/embedding/embedding_function.ts +37 -16
- package/lancedb/embedding/registry.ts +4 -0
- package/lancedb/query.ts +58 -15
- package/lancedb/table.ts +59 -4
- package/nodejs-artifacts/arrow.d.ts +2 -0
- package/nodejs-artifacts/arrow.js +2 -1
- package/nodejs-artifacts/connection.d.ts +6 -0
- package/nodejs-artifacts/connection.js +2 -2
- package/nodejs-artifacts/embedding/embedding_function.d.ts +3 -3
- package/nodejs-artifacts/embedding/embedding_function.js +30 -12
- package/nodejs-artifacts/embedding/registry.d.ts +4 -0
- package/nodejs-artifacts/embedding/registry.js +4 -0
- package/nodejs-artifacts/native.d.ts +5 -4
- package/nodejs-artifacts/query.d.ts +19 -7
- package/nodejs-artifacts/query.js +24 -10
- package/nodejs-artifacts/table.d.ts +18 -2
- package/nodejs-artifacts/table.js +28 -1
- package/package.json +6 -6
package/lancedb/query.ts
CHANGED
|
@@ -12,7 +12,12 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
Table as ArrowTable,
|
|
17
|
+
type IntoVector,
|
|
18
|
+
RecordBatch,
|
|
19
|
+
tableFromIPC,
|
|
20
|
+
} from "./arrow";
|
|
16
21
|
import { type IvfPqOptions } from "./indices";
|
|
17
22
|
import {
|
|
18
23
|
RecordBatchIterator as NativeBatchIterator,
|
|
@@ -50,6 +55,39 @@ export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
|
50
55
|
}
|
|
51
56
|
/* eslint-enable */
|
|
52
57
|
|
|
58
|
+
class RecordBatchIterable<
|
|
59
|
+
NativeQueryType extends NativeQuery | NativeVectorQuery,
|
|
60
|
+
> implements AsyncIterable<RecordBatch>
|
|
61
|
+
{
|
|
62
|
+
private inner: NativeQueryType;
|
|
63
|
+
private options?: QueryExecutionOptions;
|
|
64
|
+
|
|
65
|
+
constructor(inner: NativeQueryType, options?: QueryExecutionOptions) {
|
|
66
|
+
this.inner = inner;
|
|
67
|
+
this.options = options;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
71
|
+
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
|
|
72
|
+
return new RecordBatchIterator(
|
|
73
|
+
this.inner.execute(this.options?.maxBatchLength),
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Options that control the behavior of a particular query execution
|
|
80
|
+
*/
|
|
81
|
+
export interface QueryExecutionOptions {
|
|
82
|
+
/**
|
|
83
|
+
* The maximum number of rows to return in a single batch
|
|
84
|
+
*
|
|
85
|
+
* Batches may have fewer rows if the underlying data is stored
|
|
86
|
+
* in smaller chunks.
|
|
87
|
+
*/
|
|
88
|
+
maxBatchLength?: number;
|
|
89
|
+
}
|
|
90
|
+
|
|
53
91
|
/** Common methods supported by all query types */
|
|
54
92
|
export class QueryBase<
|
|
55
93
|
NativeQueryType extends NativeQuery | NativeVectorQuery,
|
|
@@ -108,9 +146,12 @@ export class QueryBase<
|
|
|
108
146
|
* object insertion order is easy to get wrong and `Map` is more foolproof.
|
|
109
147
|
*/
|
|
110
148
|
select(
|
|
111
|
-
columns: string[] | Map<string, string> | Record<string, string
|
|
149
|
+
columns: string[] | Map<string, string> | Record<string, string> | string,
|
|
112
150
|
): QueryType {
|
|
113
151
|
let columnTuples: [string, string][];
|
|
152
|
+
if (typeof columns === "string") {
|
|
153
|
+
columns = [columns];
|
|
154
|
+
}
|
|
114
155
|
if (Array.isArray(columns)) {
|
|
115
156
|
columnTuples = columns.map((c) => [c, c]);
|
|
116
157
|
} else if (columns instanceof Map) {
|
|
@@ -133,8 +174,10 @@ export class QueryBase<
|
|
|
133
174
|
return this as unknown as QueryType;
|
|
134
175
|
}
|
|
135
176
|
|
|
136
|
-
protected nativeExecute(
|
|
137
|
-
|
|
177
|
+
protected nativeExecute(
|
|
178
|
+
options?: Partial<QueryExecutionOptions>,
|
|
179
|
+
): Promise<NativeBatchIterator> {
|
|
180
|
+
return this.inner.execute(options?.maxBatchLength);
|
|
138
181
|
}
|
|
139
182
|
|
|
140
183
|
/**
|
|
@@ -148,8 +191,10 @@ export class QueryBase<
|
|
|
148
191
|
* single query)
|
|
149
192
|
*
|
|
150
193
|
*/
|
|
151
|
-
protected execute(
|
|
152
|
-
|
|
194
|
+
protected execute(
|
|
195
|
+
options?: Partial<QueryExecutionOptions>,
|
|
196
|
+
): RecordBatchIterator {
|
|
197
|
+
return new RecordBatchIterator(this.nativeExecute(options));
|
|
153
198
|
}
|
|
154
199
|
|
|
155
200
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
@@ -159,19 +204,18 @@ export class QueryBase<
|
|
|
159
204
|
}
|
|
160
205
|
|
|
161
206
|
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
162
|
-
async toArrow(): Promise<ArrowTable> {
|
|
207
|
+
async toArrow(options?: Partial<QueryExecutionOptions>): Promise<ArrowTable> {
|
|
163
208
|
const batches = [];
|
|
164
|
-
for await (const batch of this) {
|
|
209
|
+
for await (const batch of new RecordBatchIterable(this.inner, options)) {
|
|
165
210
|
batches.push(batch);
|
|
166
211
|
}
|
|
167
212
|
return new ArrowTable(batches);
|
|
168
213
|
}
|
|
169
214
|
|
|
170
215
|
/** Collect the results as an array of objects. */
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
|
|
216
|
+
// biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
|
|
217
|
+
async toArray(options?: Partial<QueryExecutionOptions>): Promise<any[]> {
|
|
218
|
+
const tbl = await this.toArrow(options);
|
|
175
219
|
return tbl.toArray();
|
|
176
220
|
}
|
|
177
221
|
}
|
|
@@ -370,9 +414,8 @@ export class Query extends QueryBase<NativeQuery, Query> {
|
|
|
370
414
|
* Vector searches always have a `limit`. If `limit` has not been called then
|
|
371
415
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
372
416
|
*/
|
|
373
|
-
nearestTo(vector:
|
|
374
|
-
|
|
375
|
-
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector as any));
|
|
417
|
+
nearestTo(vector: IntoVector): VectorQuery {
|
|
418
|
+
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
|
376
419
|
return new VectorQuery(vectorQuery);
|
|
377
420
|
}
|
|
378
421
|
}
|
package/lancedb/table.ts
CHANGED
|
@@ -12,9 +12,16 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
Table as ArrowTable,
|
|
17
|
+
Data,
|
|
18
|
+
IntoVector,
|
|
19
|
+
Schema,
|
|
20
|
+
fromDataToBuffer,
|
|
21
|
+
tableFromIPC,
|
|
22
|
+
} from "./arrow";
|
|
16
23
|
|
|
17
|
-
import { getRegistry } from "./embedding/registry";
|
|
24
|
+
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
|
18
25
|
import { IndexOptions } from "./indices";
|
|
19
26
|
import {
|
|
20
27
|
AddColumnsSql,
|
|
@@ -24,8 +31,8 @@ import {
|
|
|
24
31
|
Table as _NativeTable,
|
|
25
32
|
} from "./native";
|
|
26
33
|
import { Query, VectorQuery } from "./query";
|
|
27
|
-
|
|
28
34
|
export { IndexConfig } from "./native";
|
|
35
|
+
|
|
29
36
|
/**
|
|
30
37
|
* Options for adding data to a table.
|
|
31
38
|
*/
|
|
@@ -110,6 +117,14 @@ export class Table {
|
|
|
110
117
|
return this.inner.display();
|
|
111
118
|
}
|
|
112
119
|
|
|
120
|
+
async #getEmbeddingFunctions(): Promise<
|
|
121
|
+
Map<string, EmbeddingFunctionConfig>
|
|
122
|
+
> {
|
|
123
|
+
const schema = await this.schema();
|
|
124
|
+
const registry = getRegistry();
|
|
125
|
+
return registry.parseFunctions(schema.metadata);
|
|
126
|
+
}
|
|
127
|
+
|
|
113
128
|
/** Get the schema of the table. */
|
|
114
129
|
async schema(): Promise<Schema> {
|
|
115
130
|
const schemaBuf = await this.inner.schema();
|
|
@@ -130,6 +145,7 @@ export class Table {
|
|
|
130
145
|
const buffer = await fromDataToBuffer(
|
|
131
146
|
data,
|
|
132
147
|
functions.values().next().value,
|
|
148
|
+
schema,
|
|
133
149
|
);
|
|
134
150
|
await this.inner.add(buffer, mode);
|
|
135
151
|
}
|
|
@@ -270,6 +286,40 @@ export class Table {
|
|
|
270
286
|
return new Query(this.inner);
|
|
271
287
|
}
|
|
272
288
|
|
|
289
|
+
/**
|
|
290
|
+
* Create a search query to find the nearest neighbors
|
|
291
|
+
* of the given query vector
|
|
292
|
+
* @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
|
|
293
|
+
* @rejects {Error} If no embedding functions are defined in the table
|
|
294
|
+
*/
|
|
295
|
+
search(query: string): Promise<VectorQuery>;
|
|
296
|
+
/**
|
|
297
|
+
* Create a search query to find the nearest neighbors
|
|
298
|
+
* of the given query vector
|
|
299
|
+
* @param {IntoVector} query - the query vector
|
|
300
|
+
*/
|
|
301
|
+
search(query: IntoVector): VectorQuery;
|
|
302
|
+
search(query: string | IntoVector): Promise<VectorQuery> | VectorQuery {
|
|
303
|
+
if (typeof query !== "string") {
|
|
304
|
+
return this.vectorSearch(query);
|
|
305
|
+
} else {
|
|
306
|
+
return this.#getEmbeddingFunctions().then(async (functions) => {
|
|
307
|
+
// TODO: Support multiple embedding functions
|
|
308
|
+
const embeddingFunc: EmbeddingFunctionConfig | undefined = functions
|
|
309
|
+
.values()
|
|
310
|
+
.next().value;
|
|
311
|
+
if (!embeddingFunc) {
|
|
312
|
+
return Promise.reject(
|
|
313
|
+
new Error("No embedding functions are defined in the table"),
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
const embeddings =
|
|
317
|
+
await embeddingFunc.function.computeQueryEmbeddings(query);
|
|
318
|
+
return this.query().nearestTo(embeddings);
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
273
323
|
/**
|
|
274
324
|
* Search the table with a given query vector.
|
|
275
325
|
*
|
|
@@ -277,7 +327,7 @@ export class Table {
|
|
|
277
327
|
* is the same thing as calling `nearestTo` on the builder returned
|
|
278
328
|
* by `query`. @see {@link Query#nearestTo} for more details.
|
|
279
329
|
*/
|
|
280
|
-
vectorSearch(vector:
|
|
330
|
+
vectorSearch(vector: IntoVector): VectorQuery {
|
|
281
331
|
return this.query().nearestTo(vector);
|
|
282
332
|
}
|
|
283
333
|
|
|
@@ -423,4 +473,9 @@ export class Table {
|
|
|
423
473
|
async listIndices(): Promise<IndexConfig[]> {
|
|
424
474
|
return await this.inner.listIndices();
|
|
425
475
|
}
|
|
476
|
+
|
|
477
|
+
/** Return the table as an arrow table */
|
|
478
|
+
async toArrow(): Promise<ArrowTable> {
|
|
479
|
+
return await this.query().toArrow();
|
|
480
|
+
}
|
|
426
481
|
}
|
|
@@ -3,6 +3,7 @@ import { Table as ArrowTable, Binary, DataType, FixedSizeBinary, FixedSizeList,
|
|
|
3
3
|
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
|
4
4
|
import { EmbeddingFunctionConfig } from "./embedding/registry";
|
|
5
5
|
export * from "apache-arrow";
|
|
6
|
+
export type IntoVector = Float32Array | Float64Array | number[];
|
|
6
7
|
export declare function isArrowTable(value: object): value is ArrowTable;
|
|
7
8
|
export declare function isDataType(value: unknown): value is DataType;
|
|
8
9
|
export declare function isNull(value: unknown): value is Null;
|
|
@@ -36,6 +37,7 @@ export declare class MakeArrowTableOptions {
|
|
|
36
37
|
schema?: Schema;
|
|
37
38
|
vectorColumns: Record<string, VectorColumnOptions>;
|
|
38
39
|
embeddings?: EmbeddingFunction<unknown>;
|
|
40
|
+
embeddingFunction?: EmbeddingFunctionConfig;
|
|
39
41
|
/**
|
|
40
42
|
* If true then string columns will be encoded with dictionary encoding
|
|
41
43
|
*
|
|
@@ -184,6 +184,7 @@ class MakeArrowTableOptions {
|
|
|
184
184
|
vector: new VectorColumnOptions(),
|
|
185
185
|
};
|
|
186
186
|
embeddings;
|
|
187
|
+
embeddingFunction;
|
|
187
188
|
/**
|
|
188
189
|
* If true then string columns will be encoded with dictionary encoding
|
|
189
190
|
*
|
|
@@ -299,7 +300,7 @@ function makeArrowTable(data, options, metadata) {
|
|
|
299
300
|
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
|
|
300
301
|
if (opt.schema !== undefined && opt.schema !== null) {
|
|
301
302
|
opt.schema = (0, sanitize_1.sanitizeSchema)(opt.schema);
|
|
302
|
-
opt.schema = validateSchemaEmbeddings(opt.schema, data,
|
|
303
|
+
opt.schema = validateSchemaEmbeddings(opt.schema, data, options?.embeddingFunction);
|
|
303
304
|
}
|
|
304
305
|
const columns = {};
|
|
305
306
|
// TODO: sample dataset to find missing columns
|
|
@@ -40,6 +40,12 @@ export interface CreateTableOptions {
|
|
|
40
40
|
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
41
41
|
*/
|
|
42
42
|
storageOptions?: Record<string, string>;
|
|
43
|
+
/**
|
|
44
|
+
* If true then data files will be written with the legacy format
|
|
45
|
+
*
|
|
46
|
+
* The default is true while the new format is in beta
|
|
47
|
+
*/
|
|
48
|
+
useLegacyFormat?: boolean;
|
|
43
49
|
schema?: Schema;
|
|
44
50
|
embeddingFunction?: EmbeddingFunctionConfig;
|
|
45
51
|
}
|
|
@@ -116,7 +116,7 @@ class Connection {
|
|
|
116
116
|
table = (0, arrow_1.makeArrowTable)(data, options);
|
|
117
117
|
}
|
|
118
118
|
const buf = await (0, arrow_1.fromTableToBuffer)(table, options?.embeddingFunction, options?.schema);
|
|
119
|
-
const innerTable = await this.inner.createTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
|
|
119
|
+
const innerTable = await this.inner.createTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
|
|
120
120
|
return new table_1.Table(innerTable);
|
|
121
121
|
}
|
|
122
122
|
/**
|
|
@@ -138,7 +138,7 @@ class Connection {
|
|
|
138
138
|
}
|
|
139
139
|
const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
|
|
140
140
|
const buf = await (0, arrow_1.fromTableToBuffer)(table);
|
|
141
|
-
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
|
|
141
|
+
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
|
|
142
142
|
return new table_1.Table(innerTable);
|
|
143
143
|
}
|
|
144
144
|
/**
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import "reflect-metadata";
|
|
2
|
-
import { DataType, Float } from "../arrow";
|
|
2
|
+
import { DataType, Float, type IntoVector } from "../arrow";
|
|
3
3
|
/**
|
|
4
4
|
* Options for a given embedding function
|
|
5
5
|
*/
|
|
@@ -51,7 +51,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
51
51
|
*
|
|
52
52
|
* @see {@link lancedb.LanceSchema}
|
|
53
53
|
*/
|
|
54
|
-
vectorField(
|
|
54
|
+
vectorField(optionsOrDatatype?: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
|
|
55
55
|
/** The number of dimensions of the embeddings */
|
|
56
56
|
ndims(): number | undefined;
|
|
57
57
|
/** The datatype of the embeddings */
|
|
@@ -63,7 +63,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
63
63
|
/**
|
|
64
64
|
Compute the embeddings for a single query
|
|
65
65
|
*/
|
|
66
|
-
computeQueryEmbeddings(data: T): Promise<
|
|
66
|
+
computeQueryEmbeddings(data: T): Promise<IntoVector>;
|
|
67
67
|
}
|
|
68
68
|
export interface FieldOptions<T extends DataType = DataType> {
|
|
69
69
|
datatype: T;
|
|
@@ -47,32 +47,50 @@ class EmbeddingFunction {
|
|
|
47
47
|
*
|
|
48
48
|
* @see {@link lancedb.LanceSchema}
|
|
49
49
|
*/
|
|
50
|
-
vectorField(
|
|
50
|
+
vectorField(optionsOrDatatype) {
|
|
51
51
|
let dtype;
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
dtype = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
|
|
52
|
+
let vectorType;
|
|
53
|
+
let dims = this.ndims();
|
|
54
|
+
// `func.vectorField(new Float32())`
|
|
55
|
+
if ((0, arrow_1.isDataType)(optionsOrDatatype)) {
|
|
56
|
+
dtype = optionsOrDatatype;
|
|
58
57
|
}
|
|
59
58
|
else {
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
// `func.vectorField({
|
|
60
|
+
// datatype: new Float32(),
|
|
61
|
+
// dims: 10
|
|
62
|
+
// })`
|
|
63
|
+
dims = dims ?? optionsOrDatatype?.dims;
|
|
64
|
+
dtype = optionsOrDatatype?.datatype;
|
|
65
|
+
}
|
|
66
|
+
if (dtype !== undefined) {
|
|
67
|
+
// `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
|
|
68
|
+
// or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
|
|
69
|
+
if ((0, arrow_1.isFixedSizeList)(dtype)) {
|
|
70
|
+
vectorType = dtype;
|
|
71
|
+
// `func.vectorField(new Float32())`
|
|
72
|
+
// or `func.vectorField({datatype: new Float32()})`
|
|
62
73
|
}
|
|
63
|
-
else if ((0, arrow_1.isFloat)(
|
|
74
|
+
else if ((0, arrow_1.isFloat)(dtype)) {
|
|
75
|
+
// No `ndims` impl and no `{dims: n}` provided;
|
|
64
76
|
if (dims === undefined) {
|
|
65
77
|
throw new Error("ndims is required for vector field");
|
|
66
78
|
}
|
|
67
|
-
|
|
79
|
+
vectorType = (0, arrow_1.newVectorType)(dims, dtype);
|
|
68
80
|
}
|
|
69
81
|
else {
|
|
70
82
|
throw new Error("Expected FixedSizeList or Float as datatype for vector field");
|
|
71
83
|
}
|
|
72
84
|
}
|
|
85
|
+
else {
|
|
86
|
+
if (dims === undefined) {
|
|
87
|
+
throw new Error("ndims is required for vector field");
|
|
88
|
+
}
|
|
89
|
+
vectorType = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
|
|
90
|
+
}
|
|
73
91
|
const metadata = new Map();
|
|
74
92
|
metadata.set("vector_column_for", this);
|
|
75
|
-
return [
|
|
93
|
+
return [vectorType, metadata];
|
|
76
94
|
}
|
|
77
95
|
/** The number of dimensions of the embeddings */
|
|
78
96
|
ndims() {
|
|
@@ -21,6 +21,7 @@ export declare class EmbeddingFunctionRegistry {
|
|
|
21
21
|
* Register an embedding function
|
|
22
22
|
* @param name The name of the function
|
|
23
23
|
* @param func The function to register
|
|
24
|
+
* @throws Error if the function is already registered
|
|
24
25
|
*/
|
|
25
26
|
register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
|
|
26
27
|
/**
|
|
@@ -32,6 +33,9 @@ export declare class EmbeddingFunctionRegistry {
|
|
|
32
33
|
* reset the registry to the initial state
|
|
33
34
|
*/
|
|
34
35
|
reset(this: EmbeddingFunctionRegistry): void;
|
|
36
|
+
/**
|
|
37
|
+
* @ignore
|
|
38
|
+
*/
|
|
35
39
|
parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Map<string, EmbeddingFunctionConfig>;
|
|
36
40
|
functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any>;
|
|
37
41
|
getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string>;
|
|
@@ -27,6 +27,7 @@ class EmbeddingFunctionRegistry {
|
|
|
27
27
|
* Register an embedding function
|
|
28
28
|
* @param name The name of the function
|
|
29
29
|
* @param func The function to register
|
|
30
|
+
* @throws Error if the function is already registered
|
|
30
31
|
*/
|
|
31
32
|
register(alias) {
|
|
32
33
|
const self = this;
|
|
@@ -63,6 +64,9 @@ class EmbeddingFunctionRegistry {
|
|
|
63
64
|
reset() {
|
|
64
65
|
this.#functions.clear();
|
|
65
66
|
}
|
|
67
|
+
/**
|
|
68
|
+
* @ignore
|
|
69
|
+
*/
|
|
66
70
|
parseFunctions(metadata) {
|
|
67
71
|
if (!metadata.has("embedding_functions")) {
|
|
68
72
|
return new Map();
|
|
@@ -102,6 +102,7 @@ export const enum WriteMode {
|
|
|
102
102
|
}
|
|
103
103
|
/** Write options when creating a Table. */
|
|
104
104
|
export interface WriteOptions {
|
|
105
|
+
/** Write mode for writing to a table. */
|
|
105
106
|
mode?: WriteMode
|
|
106
107
|
}
|
|
107
108
|
export interface OpenTableOptions {
|
|
@@ -123,8 +124,8 @@ export class Connection {
|
|
|
123
124
|
* - buf: The buffer containing the IPC file.
|
|
124
125
|
*
|
|
125
126
|
*/
|
|
126
|
-
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
|
|
127
|
-
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
|
|
127
|
+
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
128
|
+
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
128
129
|
openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
|
|
129
130
|
/** Drop table with the name. Or raise an error if the table does not exist. */
|
|
130
131
|
dropTable(name: string): Promise<void>
|
|
@@ -142,7 +143,7 @@ export class Query {
|
|
|
142
143
|
select(columns: Array<[string, string]>): void
|
|
143
144
|
limit(limit: number): void
|
|
144
145
|
nearestTo(vector: Float32Array): VectorQuery
|
|
145
|
-
execute(): Promise<RecordBatchIterator>
|
|
146
|
+
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
146
147
|
}
|
|
147
148
|
export class VectorQuery {
|
|
148
149
|
column(column: string): void
|
|
@@ -154,7 +155,7 @@ export class VectorQuery {
|
|
|
154
155
|
onlyIf(predicate: string): void
|
|
155
156
|
select(columns: Array<[string, string]>): void
|
|
156
157
|
limit(limit: number): void
|
|
157
|
-
execute(): Promise<RecordBatchIterator>
|
|
158
|
+
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
158
159
|
}
|
|
159
160
|
export class Table {
|
|
160
161
|
display(): string
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Table as ArrowTable, RecordBatch } from "./arrow";
|
|
1
|
+
import { Table as ArrowTable, type IntoVector, RecordBatch } from "./arrow";
|
|
2
2
|
import { RecordBatchIterator as NativeBatchIterator, Query as NativeQuery, Table as NativeTable, VectorQuery as NativeVectorQuery } from "./native";
|
|
3
3
|
export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
4
4
|
private promisedInner?;
|
|
@@ -6,6 +6,18 @@ export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
|
6
6
|
constructor(promise?: Promise<NativeBatchIterator>);
|
|
7
7
|
next(): Promise<IteratorResult<RecordBatch<any>>>;
|
|
8
8
|
}
|
|
9
|
+
/**
|
|
10
|
+
* Options that control the behavior of a particular query execution
|
|
11
|
+
*/
|
|
12
|
+
export interface QueryExecutionOptions {
|
|
13
|
+
/**
|
|
14
|
+
* The maximum number of rows to return in a single batch
|
|
15
|
+
*
|
|
16
|
+
* Batches may have fewer rows if the underlying data is stored
|
|
17
|
+
* in smaller chunks.
|
|
18
|
+
*/
|
|
19
|
+
maxBatchLength?: number;
|
|
20
|
+
}
|
|
9
21
|
/** Common methods supported by all query types */
|
|
10
22
|
export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery, QueryType> implements AsyncIterable<RecordBatch> {
|
|
11
23
|
protected inner: NativeQueryType;
|
|
@@ -53,7 +65,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
53
65
|
* uses `Object.entries` which should preserve the insertion order of the object. However,
|
|
54
66
|
* object insertion order is easy to get wrong and `Map` is more foolproof.
|
|
55
67
|
*/
|
|
56
|
-
select(columns: string[] | Map<string, string> | Record<string, string>): QueryType;
|
|
68
|
+
select(columns: string[] | Map<string, string> | Record<string, string> | string): QueryType;
|
|
57
69
|
/**
|
|
58
70
|
* Set the maximum number of results to return.
|
|
59
71
|
*
|
|
@@ -61,7 +73,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
61
73
|
* called then every valid row from the table will be returned.
|
|
62
74
|
*/
|
|
63
75
|
limit(limit: number): QueryType;
|
|
64
|
-
protected nativeExecute(): Promise<NativeBatchIterator>;
|
|
76
|
+
protected nativeExecute(options?: Partial<QueryExecutionOptions>): Promise<NativeBatchIterator>;
|
|
65
77
|
/**
|
|
66
78
|
* Execute the query and return the results as an @see {@link AsyncIterator}
|
|
67
79
|
* of @see {@link RecordBatch}.
|
|
@@ -73,12 +85,12 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
73
85
|
* single query)
|
|
74
86
|
*
|
|
75
87
|
*/
|
|
76
|
-
protected execute(): RecordBatchIterator;
|
|
88
|
+
protected execute(options?: Partial<QueryExecutionOptions>): RecordBatchIterator;
|
|
77
89
|
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>>;
|
|
78
90
|
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
79
|
-
toArrow(): Promise<ArrowTable>;
|
|
91
|
+
toArrow(options?: Partial<QueryExecutionOptions>): Promise<ArrowTable>;
|
|
80
92
|
/** Collect the results as an array of objects. */
|
|
81
|
-
toArray(): Promise<
|
|
93
|
+
toArray(options?: Partial<QueryExecutionOptions>): Promise<any[]>;
|
|
82
94
|
}
|
|
83
95
|
/**
|
|
84
96
|
* An interface for a query that can be executed
|
|
@@ -244,5 +256,5 @@ export declare class Query extends QueryBase<NativeQuery, Query> {
|
|
|
244
256
|
* Vector searches always have a `limit`. If `limit` has not been called then
|
|
245
257
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
246
258
|
*/
|
|
247
|
-
nearestTo(vector:
|
|
259
|
+
nearestTo(vector: IntoVector): VectorQuery;
|
|
248
260
|
}
|
|
@@ -43,6 +43,18 @@ class RecordBatchIterator {
|
|
|
43
43
|
}
|
|
44
44
|
exports.RecordBatchIterator = RecordBatchIterator;
|
|
45
45
|
/* eslint-enable */
|
|
46
|
+
class RecordBatchIterable {
|
|
47
|
+
inner;
|
|
48
|
+
options;
|
|
49
|
+
constructor(inner, options) {
|
|
50
|
+
this.inner = inner;
|
|
51
|
+
this.options = options;
|
|
52
|
+
}
|
|
53
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
54
|
+
[Symbol.asyncIterator]() {
|
|
55
|
+
return new RecordBatchIterator(this.inner.execute(this.options?.maxBatchLength));
|
|
56
|
+
}
|
|
57
|
+
}
|
|
46
58
|
/** Common methods supported by all query types */
|
|
47
59
|
class QueryBase {
|
|
48
60
|
inner;
|
|
@@ -98,6 +110,9 @@ class QueryBase {
|
|
|
98
110
|
*/
|
|
99
111
|
select(columns) {
|
|
100
112
|
let columnTuples;
|
|
113
|
+
if (typeof columns === "string") {
|
|
114
|
+
columns = [columns];
|
|
115
|
+
}
|
|
101
116
|
if (Array.isArray(columns)) {
|
|
102
117
|
columnTuples = columns.map((c) => [c, c]);
|
|
103
118
|
}
|
|
@@ -120,8 +135,8 @@ class QueryBase {
|
|
|
120
135
|
this.inner.limit(limit);
|
|
121
136
|
return this;
|
|
122
137
|
}
|
|
123
|
-
nativeExecute() {
|
|
124
|
-
return this.inner.execute();
|
|
138
|
+
nativeExecute(options) {
|
|
139
|
+
return this.inner.execute(options?.maxBatchLength);
|
|
125
140
|
}
|
|
126
141
|
/**
|
|
127
142
|
* Execute the query and return the results as an @see {@link AsyncIterator}
|
|
@@ -134,8 +149,8 @@ class QueryBase {
|
|
|
134
149
|
* single query)
|
|
135
150
|
*
|
|
136
151
|
*/
|
|
137
|
-
execute() {
|
|
138
|
-
return new RecordBatchIterator(this.nativeExecute());
|
|
152
|
+
execute(options) {
|
|
153
|
+
return new RecordBatchIterator(this.nativeExecute(options));
|
|
139
154
|
}
|
|
140
155
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
141
156
|
[Symbol.asyncIterator]() {
|
|
@@ -143,17 +158,17 @@ class QueryBase {
|
|
|
143
158
|
return new RecordBatchIterator(promise);
|
|
144
159
|
}
|
|
145
160
|
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
146
|
-
async toArrow() {
|
|
161
|
+
async toArrow(options) {
|
|
147
162
|
const batches = [];
|
|
148
|
-
for await (const batch of this) {
|
|
163
|
+
for await (const batch of new RecordBatchIterable(this.inner, options)) {
|
|
149
164
|
batches.push(batch);
|
|
150
165
|
}
|
|
151
166
|
return new arrow_1.Table(batches);
|
|
152
167
|
}
|
|
153
168
|
/** Collect the results as an array of objects. */
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
169
|
+
// biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
|
|
170
|
+
async toArray(options) {
|
|
171
|
+
const tbl = await this.toArrow(options);
|
|
157
172
|
return tbl.toArray();
|
|
158
173
|
}
|
|
159
174
|
}
|
|
@@ -339,7 +354,6 @@ class Query extends QueryBase {
|
|
|
339
354
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
340
355
|
*/
|
|
341
356
|
nearestTo(vector) {
|
|
342
|
-
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
343
357
|
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
|
344
358
|
return new VectorQuery(vectorQuery);
|
|
345
359
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Data, Schema } from "./arrow";
|
|
1
|
+
import { Table as ArrowTable, Data, IntoVector, Schema } from "./arrow";
|
|
2
2
|
import { IndexOptions } from "./indices";
|
|
3
3
|
import { AddColumnsSql, ColumnAlteration, IndexConfig, OptimizeStats, Table as _NativeTable } from "./native";
|
|
4
4
|
import { Query, VectorQuery } from "./query";
|
|
@@ -56,6 +56,7 @@ export interface OptimizeOptions {
|
|
|
56
56
|
* collected.
|
|
57
57
|
*/
|
|
58
58
|
export declare class Table {
|
|
59
|
+
#private;
|
|
59
60
|
private readonly inner;
|
|
60
61
|
/** Construct a Table. Internal use only. */
|
|
61
62
|
constructor(inner: _NativeTable);
|
|
@@ -186,6 +187,19 @@ export declare class Table {
|
|
|
186
187
|
* @returns {Query} A builder that can be used to parameterize the query
|
|
187
188
|
*/
|
|
188
189
|
query(): Query;
|
|
190
|
+
/**
|
|
191
|
+
* Create a search query to find the nearest neighbors
|
|
192
|
+
* of the given query vector
|
|
193
|
+
* @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
|
|
194
|
+
* @rejects {Error} If no embedding functions are defined in the table
|
|
195
|
+
*/
|
|
196
|
+
search(query: string): Promise<VectorQuery>;
|
|
197
|
+
/**
|
|
198
|
+
* Create a search query to find the nearest neighbors
|
|
199
|
+
* of the given query vector
|
|
200
|
+
* @param {IntoVector} query - the query vector
|
|
201
|
+
*/
|
|
202
|
+
search(query: IntoVector): VectorQuery;
|
|
189
203
|
/**
|
|
190
204
|
* Search the table with a given query vector.
|
|
191
205
|
*
|
|
@@ -193,7 +207,7 @@ export declare class Table {
|
|
|
193
207
|
* is the same thing as calling `nearestTo` on the builder returned
|
|
194
208
|
* by `query`. @see {@link Query#nearestTo} for more details.
|
|
195
209
|
*/
|
|
196
|
-
vectorSearch(vector:
|
|
210
|
+
vectorSearch(vector: IntoVector): VectorQuery;
|
|
197
211
|
/**
|
|
198
212
|
* Add new columns with defined values.
|
|
199
213
|
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
|
|
@@ -301,4 +315,6 @@ export declare class Table {
|
|
|
301
315
|
optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats>;
|
|
302
316
|
/** List all indices that have been created with {@link Table.createIndex} */
|
|
303
317
|
listIndices(): Promise<IndexConfig[]>;
|
|
318
|
+
/** Return the table as an arrow table */
|
|
319
|
+
toArrow(): Promise<ArrowTable>;
|
|
304
320
|
}
|