@lancedb/lancedb 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/biome.json +8 -2
- package/dist/arrow.d.ts +36 -9
- package/dist/arrow.js +222 -24
- package/dist/connection.d.ts +10 -1
- package/dist/connection.js +13 -7
- package/dist/embedding/embedding_function.d.ts +54 -28
- package/dist/embedding/embedding_function.js +89 -10
- package/dist/embedding/index.d.ts +28 -2
- package/dist/embedding/index.js +111 -4
- package/dist/embedding/openai.d.ts +16 -7
- package/dist/embedding/openai.js +62 -12
- package/dist/embedding/registry.d.ts +58 -0
- package/dist/embedding/registry.js +127 -0
- package/dist/native.d.ts +5 -4
- package/dist/query.d.ts +19 -7
- package/dist/query.js +27 -13
- package/dist/sanitize.d.ts +22 -1
- package/dist/sanitize.js +123 -110
- package/dist/table.d.ts +18 -3
- package/dist/table.js +33 -3
- package/lancedb/arrow.ts +243 -41
- package/lancedb/connection.ts +35 -6
- package/lancedb/embedding/embedding_function.ts +147 -42
- package/lancedb/embedding/index.ts +113 -2
- package/lancedb/embedding/openai.ts +62 -16
- package/lancedb/embedding/registry.ts +176 -0
- package/lancedb/query.ts +58 -14
- package/lancedb/sanitize.ts +22 -22
- package/lancedb/table.ts +67 -5
- package/nodejs-artifacts/arrow.d.ts +36 -9
- package/nodejs-artifacts/arrow.js +222 -24
- package/nodejs-artifacts/connection.d.ts +10 -1
- package/nodejs-artifacts/connection.js +13 -7
- package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
- package/nodejs-artifacts/embedding/embedding_function.js +89 -10
- package/nodejs-artifacts/embedding/index.d.ts +28 -2
- package/nodejs-artifacts/embedding/index.js +111 -4
- package/nodejs-artifacts/embedding/openai.d.ts +16 -7
- package/nodejs-artifacts/embedding/openai.js +62 -12
- package/nodejs-artifacts/embedding/registry.d.ts +58 -0
- package/nodejs-artifacts/embedding/registry.js +127 -0
- package/nodejs-artifacts/native.d.ts +5 -4
- package/nodejs-artifacts/query.d.ts +19 -7
- package/nodejs-artifacts/query.js +27 -13
- package/nodejs-artifacts/sanitize.d.ts +22 -1
- package/nodejs-artifacts/sanitize.js +123 -110
- package/nodejs-artifacts/table.d.ts +18 -3
- package/nodejs-artifacts/table.js +33 -3
- package/package.json +14 -9
- package/tsconfig.json +3 -1
package/lancedb/query.ts
CHANGED
|
@@ -12,7 +12,12 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
Table as ArrowTable,
|
|
17
|
+
type IntoVector,
|
|
18
|
+
RecordBatch,
|
|
19
|
+
tableFromIPC,
|
|
20
|
+
} from "./arrow";
|
|
16
21
|
import { type IvfPqOptions } from "./indices";
|
|
17
22
|
import {
|
|
18
23
|
RecordBatchIterator as NativeBatchIterator,
|
|
@@ -50,6 +55,39 @@ export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
|
50
55
|
}
|
|
51
56
|
/* eslint-enable */
|
|
52
57
|
|
|
58
|
+
class RecordBatchIterable<
|
|
59
|
+
NativeQueryType extends NativeQuery | NativeVectorQuery,
|
|
60
|
+
> implements AsyncIterable<RecordBatch>
|
|
61
|
+
{
|
|
62
|
+
private inner: NativeQueryType;
|
|
63
|
+
private options?: QueryExecutionOptions;
|
|
64
|
+
|
|
65
|
+
constructor(inner: NativeQueryType, options?: QueryExecutionOptions) {
|
|
66
|
+
this.inner = inner;
|
|
67
|
+
this.options = options;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
71
|
+
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
|
|
72
|
+
return new RecordBatchIterator(
|
|
73
|
+
this.inner.execute(this.options?.maxBatchLength),
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Options that control the behavior of a particular query execution
|
|
80
|
+
*/
|
|
81
|
+
export interface QueryExecutionOptions {
|
|
82
|
+
/**
|
|
83
|
+
* The maximum number of rows to return in a single batch
|
|
84
|
+
*
|
|
85
|
+
* Batches may have fewer rows if the underlying data is stored
|
|
86
|
+
* in smaller chunks.
|
|
87
|
+
*/
|
|
88
|
+
maxBatchLength?: number;
|
|
89
|
+
}
|
|
90
|
+
|
|
53
91
|
/** Common methods supported by all query types */
|
|
54
92
|
export class QueryBase<
|
|
55
93
|
NativeQueryType extends NativeQuery | NativeVectorQuery,
|
|
@@ -108,9 +146,12 @@ export class QueryBase<
|
|
|
108
146
|
* object insertion order is easy to get wrong and `Map` is more foolproof.
|
|
109
147
|
*/
|
|
110
148
|
select(
|
|
111
|
-
columns: string[] | Map<string, string> | Record<string, string
|
|
149
|
+
columns: string[] | Map<string, string> | Record<string, string> | string,
|
|
112
150
|
): QueryType {
|
|
113
151
|
let columnTuples: [string, string][];
|
|
152
|
+
if (typeof columns === "string") {
|
|
153
|
+
columns = [columns];
|
|
154
|
+
}
|
|
114
155
|
if (Array.isArray(columns)) {
|
|
115
156
|
columnTuples = columns.map((c) => [c, c]);
|
|
116
157
|
} else if (columns instanceof Map) {
|
|
@@ -133,8 +174,10 @@ export class QueryBase<
|
|
|
133
174
|
return this as unknown as QueryType;
|
|
134
175
|
}
|
|
135
176
|
|
|
136
|
-
protected nativeExecute(
|
|
137
|
-
|
|
177
|
+
protected nativeExecute(
|
|
178
|
+
options?: Partial<QueryExecutionOptions>,
|
|
179
|
+
): Promise<NativeBatchIterator> {
|
|
180
|
+
return this.inner.execute(options?.maxBatchLength);
|
|
138
181
|
}
|
|
139
182
|
|
|
140
183
|
/**
|
|
@@ -148,8 +191,10 @@ export class QueryBase<
|
|
|
148
191
|
* single query)
|
|
149
192
|
*
|
|
150
193
|
*/
|
|
151
|
-
protected execute(
|
|
152
|
-
|
|
194
|
+
protected execute(
|
|
195
|
+
options?: Partial<QueryExecutionOptions>,
|
|
196
|
+
): RecordBatchIterator {
|
|
197
|
+
return new RecordBatchIterator(this.nativeExecute(options));
|
|
153
198
|
}
|
|
154
199
|
|
|
155
200
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
@@ -159,18 +204,18 @@ export class QueryBase<
|
|
|
159
204
|
}
|
|
160
205
|
|
|
161
206
|
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
162
|
-
async toArrow(): Promise<ArrowTable> {
|
|
207
|
+
async toArrow(options?: Partial<QueryExecutionOptions>): Promise<ArrowTable> {
|
|
163
208
|
const batches = [];
|
|
164
|
-
for await (const batch of this) {
|
|
209
|
+
for await (const batch of new RecordBatchIterable(this.inner, options)) {
|
|
165
210
|
batches.push(batch);
|
|
166
211
|
}
|
|
167
212
|
return new ArrowTable(batches);
|
|
168
213
|
}
|
|
169
214
|
|
|
170
215
|
/** Collect the results as an array of objects. */
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
216
|
+
// biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
|
|
217
|
+
async toArray(options?: Partial<QueryExecutionOptions>): Promise<any[]> {
|
|
218
|
+
const tbl = await this.toArrow(options);
|
|
174
219
|
return tbl.toArray();
|
|
175
220
|
}
|
|
176
221
|
}
|
|
@@ -369,9 +414,8 @@ export class Query extends QueryBase<NativeQuery, Query> {
|
|
|
369
414
|
* Vector searches always have a `limit`. If `limit` has not been called then
|
|
370
415
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
371
416
|
*/
|
|
372
|
-
nearestTo(vector:
|
|
373
|
-
|
|
374
|
-
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector as any));
|
|
417
|
+
nearestTo(vector: IntoVector): VectorQuery {
|
|
418
|
+
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
|
375
419
|
return new VectorQuery(vectorQuery);
|
|
376
420
|
}
|
|
377
421
|
}
|
package/lancedb/sanitize.ts
CHANGED
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
// comes from the exact same library instance. This is not always the case
|
|
21
21
|
// and so we must sanitize the input to ensure that it is compatible.
|
|
22
22
|
|
|
23
|
+
import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";
|
|
23
24
|
import {
|
|
24
25
|
Binary,
|
|
25
26
|
Bool,
|
|
@@ -75,10 +76,9 @@ import {
|
|
|
75
76
|
Uint64,
|
|
76
77
|
Union,
|
|
77
78
|
Utf8,
|
|
78
|
-
} from "
|
|
79
|
-
import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";
|
|
79
|
+
} from "./arrow";
|
|
80
80
|
|
|
81
|
-
function sanitizeMetadata(
|
|
81
|
+
export function sanitizeMetadata(
|
|
82
82
|
metadataLike?: unknown,
|
|
83
83
|
): Map<string, string> | undefined {
|
|
84
84
|
if (metadataLike === undefined || metadataLike === null) {
|
|
@@ -97,7 +97,7 @@ function sanitizeMetadata(
|
|
|
97
97
|
return metadataLike as Map<string, string>;
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
function sanitizeInt(typeLike: object) {
|
|
100
|
+
export function sanitizeInt(typeLike: object) {
|
|
101
101
|
if (
|
|
102
102
|
!("bitWidth" in typeLike) ||
|
|
103
103
|
typeof typeLike.bitWidth !== "number" ||
|
|
@@ -111,14 +111,14 @@ function sanitizeInt(typeLike: object) {
|
|
|
111
111
|
return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
|
|
112
112
|
}
|
|
113
113
|
|
|
114
|
-
function sanitizeFloat(typeLike: object) {
|
|
114
|
+
export function sanitizeFloat(typeLike: object) {
|
|
115
115
|
if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
|
|
116
116
|
throw Error("Expected a Float Type to have a `precision` property");
|
|
117
117
|
}
|
|
118
118
|
return new Float(typeLike.precision as Precision);
|
|
119
119
|
}
|
|
120
120
|
|
|
121
|
-
function sanitizeDecimal(typeLike: object) {
|
|
121
|
+
export function sanitizeDecimal(typeLike: object) {
|
|
122
122
|
if (
|
|
123
123
|
!("scale" in typeLike) ||
|
|
124
124
|
typeof typeLike.scale !== "number" ||
|
|
@@ -134,14 +134,14 @@ function sanitizeDecimal(typeLike: object) {
|
|
|
134
134
|
return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
|
|
135
135
|
}
|
|
136
136
|
|
|
137
|
-
function sanitizeDate(typeLike: object) {
|
|
137
|
+
export function sanitizeDate(typeLike: object) {
|
|
138
138
|
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
139
139
|
throw Error("Expected a Date type to have a `unit` property");
|
|
140
140
|
}
|
|
141
141
|
return new Date_(typeLike.unit as DateUnit);
|
|
142
142
|
}
|
|
143
143
|
|
|
144
|
-
function sanitizeTime(typeLike: object) {
|
|
144
|
+
export function sanitizeTime(typeLike: object) {
|
|
145
145
|
if (
|
|
146
146
|
!("unit" in typeLike) ||
|
|
147
147
|
typeof typeLike.unit !== "number" ||
|
|
@@ -155,7 +155,7 @@ function sanitizeTime(typeLike: object) {
|
|
|
155
155
|
return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
|
|
156
156
|
}
|
|
157
157
|
|
|
158
|
-
function sanitizeTimestamp(typeLike: object) {
|
|
158
|
+
export function sanitizeTimestamp(typeLike: object) {
|
|
159
159
|
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
160
160
|
throw Error("Expected a Timestamp type to have a `unit` property");
|
|
161
161
|
}
|
|
@@ -166,7 +166,7 @@ function sanitizeTimestamp(typeLike: object) {
|
|
|
166
166
|
return new Timestamp(typeLike.unit, timezone);
|
|
167
167
|
}
|
|
168
168
|
|
|
169
|
-
function sanitizeTypedTimestamp(
|
|
169
|
+
export function sanitizeTypedTimestamp(
|
|
170
170
|
typeLike: object,
|
|
171
171
|
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
172
172
|
Datatype:
|
|
@@ -182,14 +182,14 @@ function sanitizeTypedTimestamp(
|
|
|
182
182
|
return new Datatype(timezone);
|
|
183
183
|
}
|
|
184
184
|
|
|
185
|
-
function sanitizeInterval(typeLike: object) {
|
|
185
|
+
export function sanitizeInterval(typeLike: object) {
|
|
186
186
|
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
187
187
|
throw Error("Expected an Interval type to have a `unit` property");
|
|
188
188
|
}
|
|
189
189
|
return new Interval(typeLike.unit);
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
-
function sanitizeList(typeLike: object) {
|
|
192
|
+
export function sanitizeList(typeLike: object) {
|
|
193
193
|
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
194
194
|
throw Error(
|
|
195
195
|
"Expected a List type to have an array-like `children` property",
|
|
@@ -201,7 +201,7 @@ function sanitizeList(typeLike: object) {
|
|
|
201
201
|
return new List(sanitizeField(typeLike.children[0]));
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
-
function sanitizeStruct(typeLike: object) {
|
|
204
|
+
export function sanitizeStruct(typeLike: object) {
|
|
205
205
|
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
206
206
|
throw Error(
|
|
207
207
|
"Expected a Struct type to have an array-like `children` property",
|
|
@@ -210,7 +210,7 @@ function sanitizeStruct(typeLike: object) {
|
|
|
210
210
|
return new Struct(typeLike.children.map((child) => sanitizeField(child)));
|
|
211
211
|
}
|
|
212
212
|
|
|
213
|
-
function sanitizeUnion(typeLike: object) {
|
|
213
|
+
export function sanitizeUnion(typeLike: object) {
|
|
214
214
|
if (
|
|
215
215
|
!("typeIds" in typeLike) ||
|
|
216
216
|
!("mode" in typeLike) ||
|
|
@@ -234,7 +234,7 @@ function sanitizeUnion(typeLike: object) {
|
|
|
234
234
|
);
|
|
235
235
|
}
|
|
236
236
|
|
|
237
|
-
function sanitizeTypedUnion(
|
|
237
|
+
export function sanitizeTypedUnion(
|
|
238
238
|
typeLike: object,
|
|
239
239
|
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
240
240
|
UnionType: typeof DenseUnion | typeof SparseUnion,
|
|
@@ -256,7 +256,7 @@ function sanitizeTypedUnion(
|
|
|
256
256
|
);
|
|
257
257
|
}
|
|
258
258
|
|
|
259
|
-
function sanitizeFixedSizeBinary(typeLike: object) {
|
|
259
|
+
export function sanitizeFixedSizeBinary(typeLike: object) {
|
|
260
260
|
if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
|
|
261
261
|
throw Error(
|
|
262
262
|
"Expected a FixedSizeBinary type to have a `byteWidth` property",
|
|
@@ -265,7 +265,7 @@ function sanitizeFixedSizeBinary(typeLike: object) {
|
|
|
265
265
|
return new FixedSizeBinary(typeLike.byteWidth);
|
|
266
266
|
}
|
|
267
267
|
|
|
268
|
-
function sanitizeFixedSizeList(typeLike: object) {
|
|
268
|
+
export function sanitizeFixedSizeList(typeLike: object) {
|
|
269
269
|
if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
|
|
270
270
|
throw Error("Expected a FixedSizeList type to have a `listSize` property");
|
|
271
271
|
}
|
|
@@ -283,7 +283,7 @@ function sanitizeFixedSizeList(typeLike: object) {
|
|
|
283
283
|
);
|
|
284
284
|
}
|
|
285
285
|
|
|
286
|
-
function sanitizeMap(typeLike: object) {
|
|
286
|
+
export function sanitizeMap(typeLike: object) {
|
|
287
287
|
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
288
288
|
throw Error(
|
|
289
289
|
"Expected a Map type to have an array-like `children` property",
|
|
@@ -300,14 +300,14 @@ function sanitizeMap(typeLike: object) {
|
|
|
300
300
|
);
|
|
301
301
|
}
|
|
302
302
|
|
|
303
|
-
function sanitizeDuration(typeLike: object) {
|
|
303
|
+
export function sanitizeDuration(typeLike: object) {
|
|
304
304
|
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
305
305
|
throw Error("Expected a Duration type to have a `unit` property");
|
|
306
306
|
}
|
|
307
307
|
return new Duration(typeLike.unit);
|
|
308
308
|
}
|
|
309
309
|
|
|
310
|
-
function sanitizeDictionary(typeLike: object) {
|
|
310
|
+
export function sanitizeDictionary(typeLike: object) {
|
|
311
311
|
if (!("id" in typeLike) || typeof typeLike.id !== "number") {
|
|
312
312
|
throw Error("Expected a Dictionary type to have an `id` property");
|
|
313
313
|
}
|
|
@@ -329,7 +329,7 @@ function sanitizeDictionary(typeLike: object) {
|
|
|
329
329
|
}
|
|
330
330
|
|
|
331
331
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
332
|
-
function sanitizeType(typeLike: unknown): DataType<any> {
|
|
332
|
+
export function sanitizeType(typeLike: unknown): DataType<any> {
|
|
333
333
|
if (typeof typeLike !== "object" || typeLike === null) {
|
|
334
334
|
throw Error("Expected a Type but object was null/undefined");
|
|
335
335
|
}
|
|
@@ -449,7 +449,7 @@ function sanitizeType(typeLike: unknown): DataType<any> {
|
|
|
449
449
|
}
|
|
450
450
|
}
|
|
451
451
|
|
|
452
|
-
function sanitizeField(fieldLike: unknown): Field {
|
|
452
|
+
export function sanitizeField(fieldLike: unknown): Field {
|
|
453
453
|
if (fieldLike instanceof Field) {
|
|
454
454
|
return fieldLike;
|
|
455
455
|
}
|
package/lancedb/table.ts
CHANGED
|
@@ -12,8 +12,16 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import {
|
|
16
|
-
|
|
15
|
+
import {
|
|
16
|
+
Table as ArrowTable,
|
|
17
|
+
Data,
|
|
18
|
+
IntoVector,
|
|
19
|
+
Schema,
|
|
20
|
+
fromDataToBuffer,
|
|
21
|
+
tableFromIPC,
|
|
22
|
+
} from "./arrow";
|
|
23
|
+
|
|
24
|
+
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
|
17
25
|
import { IndexOptions } from "./indices";
|
|
18
26
|
import {
|
|
19
27
|
AddColumnsSql,
|
|
@@ -23,8 +31,8 @@ import {
|
|
|
23
31
|
Table as _NativeTable,
|
|
24
32
|
} from "./native";
|
|
25
33
|
import { Query, VectorQuery } from "./query";
|
|
26
|
-
|
|
27
34
|
export { IndexConfig } from "./native";
|
|
35
|
+
|
|
28
36
|
/**
|
|
29
37
|
* Options for adding data to a table.
|
|
30
38
|
*/
|
|
@@ -109,6 +117,14 @@ export class Table {
|
|
|
109
117
|
return this.inner.display();
|
|
110
118
|
}
|
|
111
119
|
|
|
120
|
+
async #getEmbeddingFunctions(): Promise<
|
|
121
|
+
Map<string, EmbeddingFunctionConfig>
|
|
122
|
+
> {
|
|
123
|
+
const schema = await this.schema();
|
|
124
|
+
const registry = getRegistry();
|
|
125
|
+
return registry.parseFunctions(schema.metadata);
|
|
126
|
+
}
|
|
127
|
+
|
|
112
128
|
/** Get the schema of the table. */
|
|
113
129
|
async schema(): Promise<Schema> {
|
|
114
130
|
const schemaBuf = await this.inner.schema();
|
|
@@ -122,8 +138,15 @@ export class Table {
|
|
|
122
138
|
*/
|
|
123
139
|
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
|
124
140
|
const mode = options?.mode ?? "append";
|
|
141
|
+
const schema = await this.schema();
|
|
142
|
+
const registry = getRegistry();
|
|
143
|
+
const functions = registry.parseFunctions(schema.metadata);
|
|
125
144
|
|
|
126
|
-
const buffer = await fromDataToBuffer(
|
|
145
|
+
const buffer = await fromDataToBuffer(
|
|
146
|
+
data,
|
|
147
|
+
functions.values().next().value,
|
|
148
|
+
schema,
|
|
149
|
+
);
|
|
127
150
|
await this.inner.add(buffer, mode);
|
|
128
151
|
}
|
|
129
152
|
|
|
@@ -263,6 +286,40 @@ export class Table {
|
|
|
263
286
|
return new Query(this.inner);
|
|
264
287
|
}
|
|
265
288
|
|
|
289
|
+
/**
|
|
290
|
+
* Create a search query to find the nearest neighbors
|
|
291
|
+
* of the given query vector
|
|
292
|
+
* @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
|
|
293
|
+
* @rejects {Error} If no embedding functions are defined in the table
|
|
294
|
+
*/
|
|
295
|
+
search(query: string): Promise<VectorQuery>;
|
|
296
|
+
/**
|
|
297
|
+
* Create a search query to find the nearest neighbors
|
|
298
|
+
* of the given query vector
|
|
299
|
+
* @param {IntoVector} query - the query vector
|
|
300
|
+
*/
|
|
301
|
+
search(query: IntoVector): VectorQuery;
|
|
302
|
+
search(query: string | IntoVector): Promise<VectorQuery> | VectorQuery {
|
|
303
|
+
if (typeof query !== "string") {
|
|
304
|
+
return this.vectorSearch(query);
|
|
305
|
+
} else {
|
|
306
|
+
return this.#getEmbeddingFunctions().then(async (functions) => {
|
|
307
|
+
// TODO: Support multiple embedding functions
|
|
308
|
+
const embeddingFunc: EmbeddingFunctionConfig | undefined = functions
|
|
309
|
+
.values()
|
|
310
|
+
.next().value;
|
|
311
|
+
if (!embeddingFunc) {
|
|
312
|
+
return Promise.reject(
|
|
313
|
+
new Error("No embedding functions are defined in the table"),
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
const embeddings =
|
|
317
|
+
await embeddingFunc.function.computeQueryEmbeddings(query);
|
|
318
|
+
return this.query().nearestTo(embeddings);
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
266
323
|
/**
|
|
267
324
|
* Search the table with a given query vector.
|
|
268
325
|
*
|
|
@@ -270,7 +327,7 @@ export class Table {
|
|
|
270
327
|
* is the same thing as calling `nearestTo` on the builder returned
|
|
271
328
|
* by `query`. @see {@link Query#nearestTo} for more details.
|
|
272
329
|
*/
|
|
273
|
-
vectorSearch(vector:
|
|
330
|
+
vectorSearch(vector: IntoVector): VectorQuery {
|
|
274
331
|
return this.query().nearestTo(vector);
|
|
275
332
|
}
|
|
276
333
|
|
|
@@ -416,4 +473,9 @@ export class Table {
|
|
|
416
473
|
async listIndices(): Promise<IndexConfig[]> {
|
|
417
474
|
return await this.inner.listIndices();
|
|
418
475
|
}
|
|
476
|
+
|
|
477
|
+
/** Return the table as an arrow table */
|
|
478
|
+
async toArrow(): Promise<ArrowTable> {
|
|
479
|
+
return await this.query().toArrow();
|
|
480
|
+
}
|
|
419
481
|
}
|
|
@@ -1,6 +1,30 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
|
-
import { Table as ArrowTable,
|
|
2
|
+
import { Table as ArrowTable, Binary, DataType, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, Schema, Struct, Utf8 } from "apache-arrow";
|
|
3
3
|
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
|
4
|
+
import { EmbeddingFunctionConfig } from "./embedding/registry";
|
|
5
|
+
export * from "apache-arrow";
|
|
6
|
+
export type IntoVector = Float32Array | Float64Array | number[];
|
|
7
|
+
export declare function isArrowTable(value: object): value is ArrowTable;
|
|
8
|
+
export declare function isDataType(value: unknown): value is DataType;
|
|
9
|
+
export declare function isNull(value: unknown): value is Null;
|
|
10
|
+
export declare function isInt(value: unknown): value is Int;
|
|
11
|
+
export declare function isFloat(value: unknown): value is Float;
|
|
12
|
+
export declare function isBinary(value: unknown): value is Binary;
|
|
13
|
+
export declare function isLargeBinary(value: unknown): value is LargeBinary;
|
|
14
|
+
export declare function isUtf8(value: unknown): value is Utf8;
|
|
15
|
+
export declare function isLargeUtf8(value: unknown): value is Utf8;
|
|
16
|
+
export declare function isBool(value: unknown): value is Utf8;
|
|
17
|
+
export declare function isDecimal(value: unknown): value is Utf8;
|
|
18
|
+
export declare function isDate(value: unknown): value is Utf8;
|
|
19
|
+
export declare function isTime(value: unknown): value is Utf8;
|
|
20
|
+
export declare function isTimestamp(value: unknown): value is Utf8;
|
|
21
|
+
export declare function isInterval(value: unknown): value is Utf8;
|
|
22
|
+
export declare function isDuration(value: unknown): value is Utf8;
|
|
23
|
+
export declare function isList(value: unknown): value is List;
|
|
24
|
+
export declare function isStruct(value: unknown): value is Struct;
|
|
25
|
+
export declare function isUnion(value: unknown): value is Struct;
|
|
26
|
+
export declare function isFixedSizeBinary(value: unknown): value is FixedSizeBinary;
|
|
27
|
+
export declare function isFixedSizeList(value: unknown): value is FixedSizeList;
|
|
4
28
|
/** Data type accepted by NodeJS SDK */
|
|
5
29
|
export type Data = Record<string, unknown>[] | ArrowTable;
|
|
6
30
|
export declare class VectorColumnOptions {
|
|
@@ -13,6 +37,7 @@ export declare class MakeArrowTableOptions {
|
|
|
13
37
|
schema?: Schema;
|
|
14
38
|
vectorColumns: Record<string, VectorColumnOptions>;
|
|
15
39
|
embeddings?: EmbeddingFunction<unknown>;
|
|
40
|
+
embeddingFunction?: EmbeddingFunctionConfig;
|
|
16
41
|
/**
|
|
17
42
|
* If true then string columns will be encoded with dictionary encoding
|
|
18
43
|
*
|
|
@@ -117,11 +142,11 @@ export declare class MakeArrowTableOptions {
|
|
|
117
142
|
* assert.deepEqual(table.schema, schema)
|
|
118
143
|
* ```
|
|
119
144
|
*/
|
|
120
|
-
export declare function makeArrowTable(data: Array<Record<string, unknown>>, options?: Partial<MakeArrowTableOptions>): ArrowTable;
|
|
145
|
+
export declare function makeArrowTable(data: Array<Record<string, unknown>>, options?: Partial<MakeArrowTableOptions>, metadata?: Map<string, string>): ArrowTable;
|
|
121
146
|
/**
|
|
122
147
|
* Create an empty Arrow table with the provided schema
|
|
123
148
|
*/
|
|
124
|
-
export declare function makeEmptyTable(schema: Schema): ArrowTable;
|
|
149
|
+
export declare function makeEmptyTable(schema: Schema, metadata?: Map<string, string>): ArrowTable;
|
|
125
150
|
/**
|
|
126
151
|
* Convert an Array of records into an Arrow Table, optionally applying an
|
|
127
152
|
* embeddings function to it.
|
|
@@ -140,7 +165,9 @@ export declare function makeEmptyTable(schema: Schema): ArrowTable;
|
|
|
140
165
|
* embedding columns. If no schema is provded then embedding columns will
|
|
141
166
|
* be placed at the end of the table, after all of the input columns.
|
|
142
167
|
*/
|
|
143
|
-
export declare function convertToTable
|
|
168
|
+
export declare function convertToTable(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, makeTableOptions?: Partial<MakeArrowTableOptions>): Promise<ArrowTable>;
|
|
169
|
+
/** Creates the Arrow Type for a Vector column with dimension `dim` */
|
|
170
|
+
export declare function newVectorType<T extends Float>(dim: number, innerType: T): FixedSizeList<T>;
|
|
144
171
|
/**
|
|
145
172
|
* Serialize an Array of records into a buffer using the Arrow IPC File serialization
|
|
146
173
|
*
|
|
@@ -148,7 +175,7 @@ export declare function convertToTable<T>(data: Array<Record<string, unknown>>,
|
|
|
148
175
|
*
|
|
149
176
|
* `schema` is required if data is empty
|
|
150
177
|
*/
|
|
151
|
-
export declare function fromRecordsToBuffer
|
|
178
|
+
export declare function fromRecordsToBuffer(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
|
|
152
179
|
/**
|
|
153
180
|
* Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
|
|
154
181
|
*
|
|
@@ -156,7 +183,7 @@ export declare function fromRecordsToBuffer<T>(data: Array<Record<string, unknow
|
|
|
156
183
|
*
|
|
157
184
|
* `schema` is required if data is empty
|
|
158
185
|
*/
|
|
159
|
-
export declare function fromRecordsToStreamBuffer
|
|
186
|
+
export declare function fromRecordsToStreamBuffer(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
|
|
160
187
|
/**
|
|
161
188
|
* Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
|
|
162
189
|
*
|
|
@@ -165,7 +192,7 @@ export declare function fromRecordsToStreamBuffer<T>(data: Array<Record<string,
|
|
|
165
192
|
*
|
|
166
193
|
* `schema` is required if the table is empty
|
|
167
194
|
*/
|
|
168
|
-
export declare function fromTableToBuffer
|
|
195
|
+
export declare function fromTableToBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
|
|
169
196
|
/**
|
|
170
197
|
* Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
|
|
171
198
|
*
|
|
@@ -174,7 +201,7 @@ export declare function fromTableToBuffer<T>(table: ArrowTable, embeddings?: Emb
|
|
|
174
201
|
*
|
|
175
202
|
* `schema` is required if the table is empty
|
|
176
203
|
*/
|
|
177
|
-
export declare function fromDataToBuffer
|
|
204
|
+
export declare function fromDataToBuffer(data: Data, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
|
|
178
205
|
/**
|
|
179
206
|
* Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
|
|
180
207
|
*
|
|
@@ -183,7 +210,7 @@ export declare function fromDataToBuffer<T>(data: Data, embeddings?: EmbeddingFu
|
|
|
183
210
|
*
|
|
184
211
|
* `schema` is required if the table is empty
|
|
185
212
|
*/
|
|
186
|
-
export declare function fromTableToStreamBuffer
|
|
213
|
+
export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
|
|
187
214
|
/**
|
|
188
215
|
* Create an empty table with the given schema
|
|
189
216
|
*/
|