@lancedb/lancedb 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arrow.d.ts +2 -0
- package/dist/arrow.js +2 -1
- package/dist/connection.d.ts +6 -0
- package/dist/connection.js +2 -2
- package/dist/embedding/embedding_function.d.ts +3 -3
- package/dist/embedding/embedding_function.js +30 -12
- package/dist/embedding/registry.d.ts +4 -0
- package/dist/embedding/registry.js +4 -0
- package/dist/native.d.ts +5 -4
- package/dist/query.d.ts +19 -7
- package/dist/query.js +24 -10
- package/dist/table.d.ts +18 -2
- package/dist/table.js +28 -1
- package/lancedb/arrow.ts +10 -4
- package/lancedb/connection.ts +8 -0
- package/lancedb/embedding/embedding_function.ts +37 -16
- package/lancedb/embedding/registry.ts +4 -0
- package/lancedb/query.ts +58 -15
- package/lancedb/table.ts +59 -4
- package/nodejs-artifacts/arrow.d.ts +2 -0
- package/nodejs-artifacts/arrow.js +2 -1
- package/nodejs-artifacts/connection.d.ts +6 -0
- package/nodejs-artifacts/connection.js +2 -2
- package/nodejs-artifacts/embedding/embedding_function.d.ts +3 -3
- package/nodejs-artifacts/embedding/embedding_function.js +30 -12
- package/nodejs-artifacts/embedding/registry.d.ts +4 -0
- package/nodejs-artifacts/embedding/registry.js +4 -0
- package/nodejs-artifacts/native.d.ts +5 -4
- package/nodejs-artifacts/query.d.ts +19 -7
- package/nodejs-artifacts/query.js +24 -10
- package/nodejs-artifacts/table.d.ts +18 -2
- package/nodejs-artifacts/table.js +28 -1
- package/package.json +6 -6
package/dist/arrow.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { Table as ArrowTable, Binary, DataType, FixedSizeBinary, FixedSizeList,
|
|
|
3
3
|
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
|
4
4
|
import { EmbeddingFunctionConfig } from "./embedding/registry";
|
|
5
5
|
export * from "apache-arrow";
|
|
6
|
+
export type IntoVector = Float32Array | Float64Array | number[];
|
|
6
7
|
export declare function isArrowTable(value: object): value is ArrowTable;
|
|
7
8
|
export declare function isDataType(value: unknown): value is DataType;
|
|
8
9
|
export declare function isNull(value: unknown): value is Null;
|
|
@@ -36,6 +37,7 @@ export declare class MakeArrowTableOptions {
|
|
|
36
37
|
schema?: Schema;
|
|
37
38
|
vectorColumns: Record<string, VectorColumnOptions>;
|
|
38
39
|
embeddings?: EmbeddingFunction<unknown>;
|
|
40
|
+
embeddingFunction?: EmbeddingFunctionConfig;
|
|
39
41
|
/**
|
|
40
42
|
* If true then string columns will be encoded with dictionary encoding
|
|
41
43
|
*
|
package/dist/arrow.js
CHANGED
|
@@ -184,6 +184,7 @@ class MakeArrowTableOptions {
|
|
|
184
184
|
vector: new VectorColumnOptions(),
|
|
185
185
|
};
|
|
186
186
|
embeddings;
|
|
187
|
+
embeddingFunction;
|
|
187
188
|
/**
|
|
188
189
|
* If true then string columns will be encoded with dictionary encoding
|
|
189
190
|
*
|
|
@@ -299,7 +300,7 @@ function makeArrowTable(data, options, metadata) {
|
|
|
299
300
|
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
|
|
300
301
|
if (opt.schema !== undefined && opt.schema !== null) {
|
|
301
302
|
opt.schema = (0, sanitize_1.sanitizeSchema)(opt.schema);
|
|
302
|
-
opt.schema = validateSchemaEmbeddings(opt.schema, data,
|
|
303
|
+
opt.schema = validateSchemaEmbeddings(opt.schema, data, options?.embeddingFunction);
|
|
303
304
|
}
|
|
304
305
|
const columns = {};
|
|
305
306
|
// TODO: sample dataset to find missing columns
|
package/dist/connection.d.ts
CHANGED
|
@@ -40,6 +40,12 @@ export interface CreateTableOptions {
|
|
|
40
40
|
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
41
41
|
*/
|
|
42
42
|
storageOptions?: Record<string, string>;
|
|
43
|
+
/**
|
|
44
|
+
* If true then data files will be written with the legacy format
|
|
45
|
+
*
|
|
46
|
+
* The default is true while the new format is in beta
|
|
47
|
+
*/
|
|
48
|
+
useLegacyFormat?: boolean;
|
|
43
49
|
schema?: Schema;
|
|
44
50
|
embeddingFunction?: EmbeddingFunctionConfig;
|
|
45
51
|
}
|
package/dist/connection.js
CHANGED
|
@@ -116,7 +116,7 @@ class Connection {
|
|
|
116
116
|
table = (0, arrow_1.makeArrowTable)(data, options);
|
|
117
117
|
}
|
|
118
118
|
const buf = await (0, arrow_1.fromTableToBuffer)(table, options?.embeddingFunction, options?.schema);
|
|
119
|
-
const innerTable = await this.inner.createTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
|
|
119
|
+
const innerTable = await this.inner.createTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
|
|
120
120
|
return new table_1.Table(innerTable);
|
|
121
121
|
}
|
|
122
122
|
/**
|
|
@@ -138,7 +138,7 @@ class Connection {
|
|
|
138
138
|
}
|
|
139
139
|
const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
|
|
140
140
|
const buf = await (0, arrow_1.fromTableToBuffer)(table);
|
|
141
|
-
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
|
|
141
|
+
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
|
|
142
142
|
return new table_1.Table(innerTable);
|
|
143
143
|
}
|
|
144
144
|
/**
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import "reflect-metadata";
|
|
2
|
-
import { DataType, Float } from "../arrow";
|
|
2
|
+
import { DataType, Float, type IntoVector } from "../arrow";
|
|
3
3
|
/**
|
|
4
4
|
* Options for a given embedding function
|
|
5
5
|
*/
|
|
@@ -51,7 +51,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
51
51
|
*
|
|
52
52
|
* @see {@link lancedb.LanceSchema}
|
|
53
53
|
*/
|
|
54
|
-
vectorField(
|
|
54
|
+
vectorField(optionsOrDatatype?: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
|
|
55
55
|
/** The number of dimensions of the embeddings */
|
|
56
56
|
ndims(): number | undefined;
|
|
57
57
|
/** The datatype of the embeddings */
|
|
@@ -63,7 +63,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
63
63
|
/**
|
|
64
64
|
Compute the embeddings for a single query
|
|
65
65
|
*/
|
|
66
|
-
computeQueryEmbeddings(data: T): Promise<
|
|
66
|
+
computeQueryEmbeddings(data: T): Promise<IntoVector>;
|
|
67
67
|
}
|
|
68
68
|
export interface FieldOptions<T extends DataType = DataType> {
|
|
69
69
|
datatype: T;
|
|
@@ -47,32 +47,50 @@ class EmbeddingFunction {
|
|
|
47
47
|
*
|
|
48
48
|
* @see {@link lancedb.LanceSchema}
|
|
49
49
|
*/
|
|
50
|
-
vectorField(
|
|
50
|
+
vectorField(optionsOrDatatype) {
|
|
51
51
|
let dtype;
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
dtype = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
|
|
52
|
+
let vectorType;
|
|
53
|
+
let dims = this.ndims();
|
|
54
|
+
// `func.vectorField(new Float32())`
|
|
55
|
+
if ((0, arrow_1.isDataType)(optionsOrDatatype)) {
|
|
56
|
+
dtype = optionsOrDatatype;
|
|
58
57
|
}
|
|
59
58
|
else {
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
// `func.vectorField({
|
|
60
|
+
// datatype: new Float32(),
|
|
61
|
+
// dims: 10
|
|
62
|
+
// })`
|
|
63
|
+
dims = dims ?? optionsOrDatatype?.dims;
|
|
64
|
+
dtype = optionsOrDatatype?.datatype;
|
|
65
|
+
}
|
|
66
|
+
if (dtype !== undefined) {
|
|
67
|
+
// `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
|
|
68
|
+
// or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
|
|
69
|
+
if ((0, arrow_1.isFixedSizeList)(dtype)) {
|
|
70
|
+
vectorType = dtype;
|
|
71
|
+
// `func.vectorField(new Float32())`
|
|
72
|
+
// or `func.vectorField({datatype: new Float32()})`
|
|
62
73
|
}
|
|
63
|
-
else if ((0, arrow_1.isFloat)(
|
|
74
|
+
else if ((0, arrow_1.isFloat)(dtype)) {
|
|
75
|
+
// No `ndims` impl and no `{dims: n}` provided;
|
|
64
76
|
if (dims === undefined) {
|
|
65
77
|
throw new Error("ndims is required for vector field");
|
|
66
78
|
}
|
|
67
|
-
|
|
79
|
+
vectorType = (0, arrow_1.newVectorType)(dims, dtype);
|
|
68
80
|
}
|
|
69
81
|
else {
|
|
70
82
|
throw new Error("Expected FixedSizeList or Float as datatype for vector field");
|
|
71
83
|
}
|
|
72
84
|
}
|
|
85
|
+
else {
|
|
86
|
+
if (dims === undefined) {
|
|
87
|
+
throw new Error("ndims is required for vector field");
|
|
88
|
+
}
|
|
89
|
+
vectorType = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
|
|
90
|
+
}
|
|
73
91
|
const metadata = new Map();
|
|
74
92
|
metadata.set("vector_column_for", this);
|
|
75
|
-
return [
|
|
93
|
+
return [vectorType, metadata];
|
|
76
94
|
}
|
|
77
95
|
/** The number of dimensions of the embeddings */
|
|
78
96
|
ndims() {
|
|
@@ -21,6 +21,7 @@ export declare class EmbeddingFunctionRegistry {
|
|
|
21
21
|
* Register an embedding function
|
|
22
22
|
* @param name The name of the function
|
|
23
23
|
* @param func The function to register
|
|
24
|
+
* @throws Error if the function is already registered
|
|
24
25
|
*/
|
|
25
26
|
register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
|
|
26
27
|
/**
|
|
@@ -32,6 +33,9 @@ export declare class EmbeddingFunctionRegistry {
|
|
|
32
33
|
* reset the registry to the initial state
|
|
33
34
|
*/
|
|
34
35
|
reset(this: EmbeddingFunctionRegistry): void;
|
|
36
|
+
/**
|
|
37
|
+
* @ignore
|
|
38
|
+
*/
|
|
35
39
|
parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Map<string, EmbeddingFunctionConfig>;
|
|
36
40
|
functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any>;
|
|
37
41
|
getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string>;
|
|
@@ -27,6 +27,7 @@ class EmbeddingFunctionRegistry {
|
|
|
27
27
|
* Register an embedding function
|
|
28
28
|
* @param name The name of the function
|
|
29
29
|
* @param func The function to register
|
|
30
|
+
* @throws Error if the function is already registered
|
|
30
31
|
*/
|
|
31
32
|
register(alias) {
|
|
32
33
|
const self = this;
|
|
@@ -63,6 +64,9 @@ class EmbeddingFunctionRegistry {
|
|
|
63
64
|
reset() {
|
|
64
65
|
this.#functions.clear();
|
|
65
66
|
}
|
|
67
|
+
/**
|
|
68
|
+
* @ignore
|
|
69
|
+
*/
|
|
66
70
|
parseFunctions(metadata) {
|
|
67
71
|
if (!metadata.has("embedding_functions")) {
|
|
68
72
|
return new Map();
|
package/dist/native.d.ts
CHANGED
|
@@ -102,6 +102,7 @@ export const enum WriteMode {
|
|
|
102
102
|
}
|
|
103
103
|
/** Write options when creating a Table. */
|
|
104
104
|
export interface WriteOptions {
|
|
105
|
+
/** Write mode for writing to a table. */
|
|
105
106
|
mode?: WriteMode
|
|
106
107
|
}
|
|
107
108
|
export interface OpenTableOptions {
|
|
@@ -123,8 +124,8 @@ export class Connection {
|
|
|
123
124
|
* - buf: The buffer containing the IPC file.
|
|
124
125
|
*
|
|
125
126
|
*/
|
|
126
|
-
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
|
|
127
|
-
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
|
|
127
|
+
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
128
|
+
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
128
129
|
openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
|
|
129
130
|
/** Drop table with the name. Or raise an error if the table does not exist. */
|
|
130
131
|
dropTable(name: string): Promise<void>
|
|
@@ -142,7 +143,7 @@ export class Query {
|
|
|
142
143
|
select(columns: Array<[string, string]>): void
|
|
143
144
|
limit(limit: number): void
|
|
144
145
|
nearestTo(vector: Float32Array): VectorQuery
|
|
145
|
-
execute(): Promise<RecordBatchIterator>
|
|
146
|
+
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
146
147
|
}
|
|
147
148
|
export class VectorQuery {
|
|
148
149
|
column(column: string): void
|
|
@@ -154,7 +155,7 @@ export class VectorQuery {
|
|
|
154
155
|
onlyIf(predicate: string): void
|
|
155
156
|
select(columns: Array<[string, string]>): void
|
|
156
157
|
limit(limit: number): void
|
|
157
|
-
execute(): Promise<RecordBatchIterator>
|
|
158
|
+
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
158
159
|
}
|
|
159
160
|
export class Table {
|
|
160
161
|
display(): string
|
package/dist/query.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Table as ArrowTable, RecordBatch } from "./arrow";
|
|
1
|
+
import { Table as ArrowTable, type IntoVector, RecordBatch } from "./arrow";
|
|
2
2
|
import { RecordBatchIterator as NativeBatchIterator, Query as NativeQuery, Table as NativeTable, VectorQuery as NativeVectorQuery } from "./native";
|
|
3
3
|
export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
4
4
|
private promisedInner?;
|
|
@@ -6,6 +6,18 @@ export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
|
6
6
|
constructor(promise?: Promise<NativeBatchIterator>);
|
|
7
7
|
next(): Promise<IteratorResult<RecordBatch<any>>>;
|
|
8
8
|
}
|
|
9
|
+
/**
|
|
10
|
+
* Options that control the behavior of a particular query execution
|
|
11
|
+
*/
|
|
12
|
+
export interface QueryExecutionOptions {
|
|
13
|
+
/**
|
|
14
|
+
* The maximum number of rows to return in a single batch
|
|
15
|
+
*
|
|
16
|
+
* Batches may have fewer rows if the underlying data is stored
|
|
17
|
+
* in smaller chunks.
|
|
18
|
+
*/
|
|
19
|
+
maxBatchLength?: number;
|
|
20
|
+
}
|
|
9
21
|
/** Common methods supported by all query types */
|
|
10
22
|
export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery, QueryType> implements AsyncIterable<RecordBatch> {
|
|
11
23
|
protected inner: NativeQueryType;
|
|
@@ -53,7 +65,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
53
65
|
* uses `Object.entries` which should preserve the insertion order of the object. However,
|
|
54
66
|
* object insertion order is easy to get wrong and `Map` is more foolproof.
|
|
55
67
|
*/
|
|
56
|
-
select(columns: string[] | Map<string, string> | Record<string, string>): QueryType;
|
|
68
|
+
select(columns: string[] | Map<string, string> | Record<string, string> | string): QueryType;
|
|
57
69
|
/**
|
|
58
70
|
* Set the maximum number of results to return.
|
|
59
71
|
*
|
|
@@ -61,7 +73,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
61
73
|
* called then every valid row from the table will be returned.
|
|
62
74
|
*/
|
|
63
75
|
limit(limit: number): QueryType;
|
|
64
|
-
protected nativeExecute(): Promise<NativeBatchIterator>;
|
|
76
|
+
protected nativeExecute(options?: Partial<QueryExecutionOptions>): Promise<NativeBatchIterator>;
|
|
65
77
|
/**
|
|
66
78
|
* Execute the query and return the results as an @see {@link AsyncIterator}
|
|
67
79
|
* of @see {@link RecordBatch}.
|
|
@@ -73,12 +85,12 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
73
85
|
* single query)
|
|
74
86
|
*
|
|
75
87
|
*/
|
|
76
|
-
protected execute(): RecordBatchIterator;
|
|
88
|
+
protected execute(options?: Partial<QueryExecutionOptions>): RecordBatchIterator;
|
|
77
89
|
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>>;
|
|
78
90
|
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
79
|
-
toArrow(): Promise<ArrowTable>;
|
|
91
|
+
toArrow(options?: Partial<QueryExecutionOptions>): Promise<ArrowTable>;
|
|
80
92
|
/** Collect the results as an array of objects. */
|
|
81
|
-
toArray(): Promise<
|
|
93
|
+
toArray(options?: Partial<QueryExecutionOptions>): Promise<any[]>;
|
|
82
94
|
}
|
|
83
95
|
/**
|
|
84
96
|
* An interface for a query that can be executed
|
|
@@ -244,5 +256,5 @@ export declare class Query extends QueryBase<NativeQuery, Query> {
|
|
|
244
256
|
* Vector searches always have a `limit`. If `limit` has not been called then
|
|
245
257
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
246
258
|
*/
|
|
247
|
-
nearestTo(vector:
|
|
259
|
+
nearestTo(vector: IntoVector): VectorQuery;
|
|
248
260
|
}
|
package/dist/query.js
CHANGED
|
@@ -43,6 +43,18 @@ class RecordBatchIterator {
|
|
|
43
43
|
}
|
|
44
44
|
exports.RecordBatchIterator = RecordBatchIterator;
|
|
45
45
|
/* eslint-enable */
|
|
46
|
+
class RecordBatchIterable {
|
|
47
|
+
inner;
|
|
48
|
+
options;
|
|
49
|
+
constructor(inner, options) {
|
|
50
|
+
this.inner = inner;
|
|
51
|
+
this.options = options;
|
|
52
|
+
}
|
|
53
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
54
|
+
[Symbol.asyncIterator]() {
|
|
55
|
+
return new RecordBatchIterator(this.inner.execute(this.options?.maxBatchLength));
|
|
56
|
+
}
|
|
57
|
+
}
|
|
46
58
|
/** Common methods supported by all query types */
|
|
47
59
|
class QueryBase {
|
|
48
60
|
inner;
|
|
@@ -98,6 +110,9 @@ class QueryBase {
|
|
|
98
110
|
*/
|
|
99
111
|
select(columns) {
|
|
100
112
|
let columnTuples;
|
|
113
|
+
if (typeof columns === "string") {
|
|
114
|
+
columns = [columns];
|
|
115
|
+
}
|
|
101
116
|
if (Array.isArray(columns)) {
|
|
102
117
|
columnTuples = columns.map((c) => [c, c]);
|
|
103
118
|
}
|
|
@@ -120,8 +135,8 @@ class QueryBase {
|
|
|
120
135
|
this.inner.limit(limit);
|
|
121
136
|
return this;
|
|
122
137
|
}
|
|
123
|
-
nativeExecute() {
|
|
124
|
-
return this.inner.execute();
|
|
138
|
+
nativeExecute(options) {
|
|
139
|
+
return this.inner.execute(options?.maxBatchLength);
|
|
125
140
|
}
|
|
126
141
|
/**
|
|
127
142
|
* Execute the query and return the results as an @see {@link AsyncIterator}
|
|
@@ -134,8 +149,8 @@ class QueryBase {
|
|
|
134
149
|
* single query)
|
|
135
150
|
*
|
|
136
151
|
*/
|
|
137
|
-
execute() {
|
|
138
|
-
return new RecordBatchIterator(this.nativeExecute());
|
|
152
|
+
execute(options) {
|
|
153
|
+
return new RecordBatchIterator(this.nativeExecute(options));
|
|
139
154
|
}
|
|
140
155
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
141
156
|
[Symbol.asyncIterator]() {
|
|
@@ -143,17 +158,17 @@ class QueryBase {
|
|
|
143
158
|
return new RecordBatchIterator(promise);
|
|
144
159
|
}
|
|
145
160
|
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
146
|
-
async toArrow() {
|
|
161
|
+
async toArrow(options) {
|
|
147
162
|
const batches = [];
|
|
148
|
-
for await (const batch of this) {
|
|
163
|
+
for await (const batch of new RecordBatchIterable(this.inner, options)) {
|
|
149
164
|
batches.push(batch);
|
|
150
165
|
}
|
|
151
166
|
return new arrow_1.Table(batches);
|
|
152
167
|
}
|
|
153
168
|
/** Collect the results as an array of objects. */
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
169
|
+
// biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
|
|
170
|
+
async toArray(options) {
|
|
171
|
+
const tbl = await this.toArrow(options);
|
|
157
172
|
return tbl.toArray();
|
|
158
173
|
}
|
|
159
174
|
}
|
|
@@ -339,7 +354,6 @@ class Query extends QueryBase {
|
|
|
339
354
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
340
355
|
*/
|
|
341
356
|
nearestTo(vector) {
|
|
342
|
-
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
343
357
|
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
|
344
358
|
return new VectorQuery(vectorQuery);
|
|
345
359
|
}
|
package/dist/table.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Data, Schema } from "./arrow";
|
|
1
|
+
import { Table as ArrowTable, Data, IntoVector, Schema } from "./arrow";
|
|
2
2
|
import { IndexOptions } from "./indices";
|
|
3
3
|
import { AddColumnsSql, ColumnAlteration, IndexConfig, OptimizeStats, Table as _NativeTable } from "./native";
|
|
4
4
|
import { Query, VectorQuery } from "./query";
|
|
@@ -56,6 +56,7 @@ export interface OptimizeOptions {
|
|
|
56
56
|
* collected.
|
|
57
57
|
*/
|
|
58
58
|
export declare class Table {
|
|
59
|
+
#private;
|
|
59
60
|
private readonly inner;
|
|
60
61
|
/** Construct a Table. Internal use only. */
|
|
61
62
|
constructor(inner: _NativeTable);
|
|
@@ -186,6 +187,19 @@ export declare class Table {
|
|
|
186
187
|
* @returns {Query} A builder that can be used to parameterize the query
|
|
187
188
|
*/
|
|
188
189
|
query(): Query;
|
|
190
|
+
/**
|
|
191
|
+
* Create a search query to find the nearest neighbors
|
|
192
|
+
* of the given query vector
|
|
193
|
+
* @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
|
|
194
|
+
* @rejects {Error} If no embedding functions are defined in the table
|
|
195
|
+
*/
|
|
196
|
+
search(query: string): Promise<VectorQuery>;
|
|
197
|
+
/**
|
|
198
|
+
* Create a search query to find the nearest neighbors
|
|
199
|
+
* of the given query vector
|
|
200
|
+
* @param {IntoVector} query - the query vector
|
|
201
|
+
*/
|
|
202
|
+
search(query: IntoVector): VectorQuery;
|
|
189
203
|
/**
|
|
190
204
|
* Search the table with a given query vector.
|
|
191
205
|
*
|
|
@@ -193,7 +207,7 @@ export declare class Table {
|
|
|
193
207
|
* is the same thing as calling `nearestTo` on the builder returned
|
|
194
208
|
* by `query`. @see {@link Query#nearestTo} for more details.
|
|
195
209
|
*/
|
|
196
|
-
vectorSearch(vector:
|
|
210
|
+
vectorSearch(vector: IntoVector): VectorQuery;
|
|
197
211
|
/**
|
|
198
212
|
* Add new columns with defined values.
|
|
199
213
|
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
|
|
@@ -301,4 +315,6 @@ export declare class Table {
|
|
|
301
315
|
optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats>;
|
|
302
316
|
/** List all indices that have been created with {@link Table.createIndex} */
|
|
303
317
|
listIndices(): Promise<IndexConfig[]>;
|
|
318
|
+
/** Return the table as an arrow table */
|
|
319
|
+
toArrow(): Promise<ArrowTable>;
|
|
304
320
|
}
|
package/dist/table.js
CHANGED
|
@@ -53,6 +53,11 @@ class Table {
|
|
|
53
53
|
display() {
|
|
54
54
|
return this.inner.display();
|
|
55
55
|
}
|
|
56
|
+
async #getEmbeddingFunctions() {
|
|
57
|
+
const schema = await this.schema();
|
|
58
|
+
const registry = (0, registry_1.getRegistry)();
|
|
59
|
+
return registry.parseFunctions(schema.metadata);
|
|
60
|
+
}
|
|
56
61
|
/** Get the schema of the table. */
|
|
57
62
|
async schema() {
|
|
58
63
|
const schemaBuf = await this.inner.schema();
|
|
@@ -68,7 +73,7 @@ class Table {
|
|
|
68
73
|
const schema = await this.schema();
|
|
69
74
|
const registry = (0, registry_1.getRegistry)();
|
|
70
75
|
const functions = registry.parseFunctions(schema.metadata);
|
|
71
|
-
const buffer = await (0, arrow_1.fromDataToBuffer)(data, functions.values().next().value);
|
|
76
|
+
const buffer = await (0, arrow_1.fromDataToBuffer)(data, functions.values().next().value, schema);
|
|
72
77
|
await this.inner.add(buffer, mode);
|
|
73
78
|
}
|
|
74
79
|
/**
|
|
@@ -200,6 +205,24 @@ class Table {
|
|
|
200
205
|
query() {
|
|
201
206
|
return new query_1.Query(this.inner);
|
|
202
207
|
}
|
|
208
|
+
search(query) {
|
|
209
|
+
if (typeof query !== "string") {
|
|
210
|
+
return this.vectorSearch(query);
|
|
211
|
+
}
|
|
212
|
+
else {
|
|
213
|
+
return this.#getEmbeddingFunctions().then(async (functions) => {
|
|
214
|
+
// TODO: Support multiple embedding functions
|
|
215
|
+
const embeddingFunc = functions
|
|
216
|
+
.values()
|
|
217
|
+
.next().value;
|
|
218
|
+
if (!embeddingFunc) {
|
|
219
|
+
return Promise.reject(new Error("No embedding functions are defined in the table"));
|
|
220
|
+
}
|
|
221
|
+
const embeddings = await embeddingFunc.function.computeQueryEmbeddings(query);
|
|
222
|
+
return this.query().nearestTo(embeddings);
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
}
|
|
203
226
|
/**
|
|
204
227
|
* Search the table with a given query vector.
|
|
205
228
|
*
|
|
@@ -342,5 +365,9 @@ class Table {
|
|
|
342
365
|
async listIndices() {
|
|
343
366
|
return await this.inner.listIndices();
|
|
344
367
|
}
|
|
368
|
+
/** Return the table as an arrow table */
|
|
369
|
+
async toArrow() {
|
|
370
|
+
return await this.query().toArrow();
|
|
371
|
+
}
|
|
345
372
|
}
|
|
346
373
|
exports.Table = Table;
|
package/lancedb/arrow.ts
CHANGED
|
@@ -31,7 +31,7 @@ import {
|
|
|
31
31
|
Schema,
|
|
32
32
|
Struct,
|
|
33
33
|
Utf8,
|
|
34
|
-
|
|
34
|
+
Vector,
|
|
35
35
|
makeBuilder,
|
|
36
36
|
makeData,
|
|
37
37
|
type makeTable,
|
|
@@ -42,6 +42,8 @@ import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
|
|
42
42
|
import { sanitizeField, sanitizeSchema, sanitizeType } from "./sanitize";
|
|
43
43
|
export * from "apache-arrow";
|
|
44
44
|
|
|
45
|
+
export type IntoVector = Float32Array | Float64Array | number[];
|
|
46
|
+
|
|
45
47
|
export function isArrowTable(value: object): value is ArrowTable {
|
|
46
48
|
if (value instanceof ArrowTable) return true;
|
|
47
49
|
return "schema" in value && "batches" in value;
|
|
@@ -182,6 +184,7 @@ export class MakeArrowTableOptions {
|
|
|
182
184
|
vector: new VectorColumnOptions(),
|
|
183
185
|
};
|
|
184
186
|
embeddings?: EmbeddingFunction<unknown>;
|
|
187
|
+
embeddingFunction?: EmbeddingFunctionConfig;
|
|
185
188
|
|
|
186
189
|
/**
|
|
187
190
|
* If true then string columns will be encoded with dictionary encoding
|
|
@@ -306,7 +309,11 @@ export function makeArrowTable(
|
|
|
306
309
|
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
|
|
307
310
|
if (opt.schema !== undefined && opt.schema !== null) {
|
|
308
311
|
opt.schema = sanitizeSchema(opt.schema);
|
|
309
|
-
opt.schema = validateSchemaEmbeddings(
|
|
312
|
+
opt.schema = validateSchemaEmbeddings(
|
|
313
|
+
opt.schema,
|
|
314
|
+
data,
|
|
315
|
+
options?.embeddingFunction,
|
|
316
|
+
);
|
|
310
317
|
}
|
|
311
318
|
const columns: Record<string, Vector> = {};
|
|
312
319
|
// TODO: sample dataset to find missing columns
|
|
@@ -545,7 +552,6 @@ async function applyEmbeddingsFromMetadata(
|
|
|
545
552
|
dtype,
|
|
546
553
|
);
|
|
547
554
|
}
|
|
548
|
-
|
|
549
555
|
const vector = makeVector(vectors, destType);
|
|
550
556
|
columns[destColumn] = vector;
|
|
551
557
|
}
|
|
@@ -835,7 +841,7 @@ export function createEmptyTable(schema: Schema): ArrowTable {
|
|
|
835
841
|
function validateSchemaEmbeddings(
|
|
836
842
|
schema: Schema,
|
|
837
843
|
data: Array<Record<string, unknown>>,
|
|
838
|
-
embeddings:
|
|
844
|
+
embeddings: EmbeddingFunctionConfig | undefined,
|
|
839
845
|
) {
|
|
840
846
|
const fields = [];
|
|
841
847
|
const missingEmbeddingFields = [];
|
package/lancedb/connection.ts
CHANGED
|
@@ -71,6 +71,12 @@ export interface CreateTableOptions {
|
|
|
71
71
|
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
72
72
|
*/
|
|
73
73
|
storageOptions?: Record<string, string>;
|
|
74
|
+
/**
|
|
75
|
+
* If true then data files will be written with the legacy format
|
|
76
|
+
*
|
|
77
|
+
* The default is true while the new format is in beta
|
|
78
|
+
*/
|
|
79
|
+
useLegacyFormat?: boolean;
|
|
74
80
|
schema?: Schema;
|
|
75
81
|
embeddingFunction?: EmbeddingFunctionConfig;
|
|
76
82
|
}
|
|
@@ -221,6 +227,7 @@ export class Connection {
|
|
|
221
227
|
buf,
|
|
222
228
|
mode,
|
|
223
229
|
cleanseStorageOptions(options?.storageOptions),
|
|
230
|
+
options?.useLegacyFormat,
|
|
224
231
|
);
|
|
225
232
|
|
|
226
233
|
return new Table(innerTable);
|
|
@@ -256,6 +263,7 @@ export class Connection {
|
|
|
256
263
|
buf,
|
|
257
264
|
mode,
|
|
258
265
|
cleanseStorageOptions(options?.storageOptions),
|
|
266
|
+
options?.useLegacyFormat,
|
|
259
267
|
);
|
|
260
268
|
return new Table(innerTable);
|
|
261
269
|
}
|
|
@@ -19,6 +19,7 @@ import {
|
|
|
19
19
|
FixedSizeList,
|
|
20
20
|
Float,
|
|
21
21
|
Float32,
|
|
22
|
+
type IntoVector,
|
|
22
23
|
isDataType,
|
|
23
24
|
isFixedSizeList,
|
|
24
25
|
isFloat,
|
|
@@ -100,33 +101,55 @@ export abstract class EmbeddingFunction<
|
|
|
100
101
|
* @see {@link lancedb.LanceSchema}
|
|
101
102
|
*/
|
|
102
103
|
vectorField(
|
|
103
|
-
|
|
104
|
+
optionsOrDatatype?: Partial<FieldOptions> | DataType,
|
|
104
105
|
): [DataType, Map<string, EmbeddingFunction>] {
|
|
105
|
-
let dtype: DataType;
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
dtype =
|
|
106
|
+
let dtype: DataType | undefined;
|
|
107
|
+
let vectorType: DataType;
|
|
108
|
+
let dims: number | undefined = this.ndims();
|
|
109
|
+
|
|
110
|
+
// `func.vectorField(new Float32())`
|
|
111
|
+
if (isDataType(optionsOrDatatype)) {
|
|
112
|
+
dtype = optionsOrDatatype;
|
|
112
113
|
} else {
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
114
|
+
// `func.vectorField({
|
|
115
|
+
// datatype: new Float32(),
|
|
116
|
+
// dims: 10
|
|
117
|
+
// })`
|
|
118
|
+
dims = dims ?? optionsOrDatatype?.dims;
|
|
119
|
+
dtype = optionsOrDatatype?.datatype;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (dtype !== undefined) {
|
|
123
|
+
// `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
|
|
124
|
+
// or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
|
|
125
|
+
if (isFixedSizeList(dtype)) {
|
|
126
|
+
vectorType = dtype;
|
|
127
|
+
// `func.vectorField(new Float32())`
|
|
128
|
+
// or `func.vectorField({datatype: new Float32()})`
|
|
129
|
+
} else if (isFloat(dtype)) {
|
|
130
|
+
// No `ndims` impl and no `{dims: n}` provided;
|
|
116
131
|
if (dims === undefined) {
|
|
117
132
|
throw new Error("ndims is required for vector field");
|
|
118
133
|
}
|
|
119
|
-
|
|
134
|
+
vectorType = newVectorType(dims, dtype);
|
|
120
135
|
} else {
|
|
121
136
|
throw new Error(
|
|
122
137
|
"Expected FixedSizeList or Float as datatype for vector field",
|
|
123
138
|
);
|
|
124
139
|
}
|
|
140
|
+
} else {
|
|
141
|
+
if (dims === undefined) {
|
|
142
|
+
throw new Error("ndims is required for vector field");
|
|
143
|
+
}
|
|
144
|
+
vectorType = new FixedSizeList(
|
|
145
|
+
dims,
|
|
146
|
+
new Field("item", new Float32(), true),
|
|
147
|
+
);
|
|
125
148
|
}
|
|
126
149
|
const metadata = new Map<string, EmbeddingFunction>();
|
|
127
150
|
metadata.set("vector_column_for", this);
|
|
128
151
|
|
|
129
|
-
return [
|
|
152
|
+
return [vectorType, metadata];
|
|
130
153
|
}
|
|
131
154
|
|
|
132
155
|
/** The number of dimensions of the embeddings */
|
|
@@ -147,9 +170,7 @@ export abstract class EmbeddingFunction<
|
|
|
147
170
|
/**
|
|
148
171
|
Compute the embeddings for a single query
|
|
149
172
|
*/
|
|
150
|
-
async computeQueryEmbeddings(
|
|
151
|
-
data: T,
|
|
152
|
-
): Promise<number[] | Float32Array | Float64Array> {
|
|
173
|
+
async computeQueryEmbeddings(data: T): Promise<IntoVector> {
|
|
153
174
|
return this.computeSourceEmbeddings([data]).then(
|
|
154
175
|
(embeddings) => embeddings[0],
|
|
155
176
|
);
|
|
@@ -42,6 +42,7 @@ export class EmbeddingFunctionRegistry {
|
|
|
42
42
|
* Register an embedding function
|
|
43
43
|
* @param name The name of the function
|
|
44
44
|
* @param func The function to register
|
|
45
|
+
* @throws Error if the function is already registered
|
|
45
46
|
*/
|
|
46
47
|
register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(
|
|
47
48
|
this: EmbeddingFunctionRegistry,
|
|
@@ -89,6 +90,9 @@ export class EmbeddingFunctionRegistry {
|
|
|
89
90
|
this.#functions.clear();
|
|
90
91
|
}
|
|
91
92
|
|
|
93
|
+
/**
|
|
94
|
+
* @ignore
|
|
95
|
+
*/
|
|
92
96
|
parseFunctions(
|
|
93
97
|
this: EmbeddingFunctionRegistry,
|
|
94
98
|
metadata: Map<string, string>,
|