@lancedb/lancedb 0.5.2 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +3 -3
- package/biome.json +19 -3
- package/dist/arrow.d.ts +41 -8
- package/dist/arrow.js +4 -4
- package/dist/connection.d.ts +49 -29
- package/dist/connection.js +21 -73
- package/dist/embedding/embedding_function.d.ts +9 -1
- package/dist/embedding/embedding_function.js +6 -0
- package/dist/embedding/openai.d.ts +6 -5
- package/dist/embedding/openai.js +4 -2
- package/dist/embedding/registry.d.ts +6 -11
- package/dist/index.d.ts +51 -3
- package/dist/index.js +28 -4
- package/dist/merge.d.ts +54 -0
- package/dist/merge.js +64 -0
- package/dist/native.d.ts +29 -3
- package/dist/native.js +26 -9
- package/dist/query.d.ts +33 -10
- package/dist/query.js +100 -13
- package/dist/remote/client.d.ts +28 -0
- package/dist/remote/client.js +172 -0
- package/dist/remote/connection.d.ts +25 -0
- package/dist/remote/connection.js +110 -0
- package/dist/remote/index.d.ts +3 -0
- package/dist/remote/index.js +9 -0
- package/dist/remote/table.d.ts +42 -0
- package/dist/remote/table.js +179 -0
- package/dist/sanitize.d.ts +3 -2
- package/dist/sanitize.js +55 -1
- package/dist/table.d.ts +105 -30
- package/dist/table.js +94 -237
- package/dist/util.d.ts +14 -0
- package/dist/util.js +65 -0
- package/examples/ann_indexes.ts +49 -0
- package/examples/basic.ts +149 -0
- package/examples/embedding.ts +83 -0
- package/examples/filtering.ts +34 -0
- package/examples/jsconfig.json +27 -0
- package/examples/package-lock.json +79 -0
- package/examples/package.json +18 -0
- package/examples/search.ts +37 -0
- package/lancedb/arrow.ts +80 -23
- package/lancedb/connection.ts +107 -92
- package/lancedb/embedding/embedding_function.ts +12 -1
- package/lancedb/embedding/openai.ts +11 -6
- package/lancedb/embedding/registry.ts +34 -22
- package/lancedb/index.ts +101 -2
- package/lancedb/merge.ts +70 -0
- package/lancedb/query.ts +114 -28
- package/lancedb/remote/client.ts +221 -0
- package/lancedb/remote/connection.ts +201 -0
- package/lancedb/remote/index.ts +3 -0
- package/lancedb/remote/table.ts +226 -0
- package/lancedb/sanitize.ts +73 -1
- package/lancedb/table.ts +320 -132
- package/lancedb/util.ts +69 -0
- package/native.d.ts +208 -0
- package/nodejs-artifacts/arrow.d.ts +41 -8
- package/nodejs-artifacts/arrow.js +4 -4
- package/nodejs-artifacts/connection.d.ts +49 -29
- package/nodejs-artifacts/connection.js +21 -73
- package/nodejs-artifacts/embedding/embedding_function.d.ts +9 -1
- package/nodejs-artifacts/embedding/embedding_function.js +6 -0
- package/nodejs-artifacts/embedding/openai.d.ts +6 -5
- package/nodejs-artifacts/embedding/openai.js +4 -2
- package/nodejs-artifacts/embedding/registry.d.ts +6 -11
- package/nodejs-artifacts/index.d.ts +51 -3
- package/nodejs-artifacts/index.js +28 -4
- package/nodejs-artifacts/merge.d.ts +54 -0
- package/nodejs-artifacts/merge.js +64 -0
- package/nodejs-artifacts/native.d.ts +29 -3
- package/nodejs-artifacts/native.js +26 -9
- package/nodejs-artifacts/query.d.ts +33 -10
- package/nodejs-artifacts/query.js +100 -13
- package/nodejs-artifacts/remote/client.d.ts +28 -0
- package/nodejs-artifacts/remote/client.js +172 -0
- package/nodejs-artifacts/remote/connection.d.ts +25 -0
- package/nodejs-artifacts/remote/connection.js +110 -0
- package/nodejs-artifacts/remote/index.d.ts +3 -0
- package/nodejs-artifacts/remote/index.js +9 -0
- package/nodejs-artifacts/remote/table.d.ts +42 -0
- package/nodejs-artifacts/remote/table.js +179 -0
- package/nodejs-artifacts/sanitize.d.ts +3 -2
- package/nodejs-artifacts/sanitize.js +55 -1
- package/nodejs-artifacts/table.d.ts +105 -30
- package/nodejs-artifacts/table.js +94 -237
- package/nodejs-artifacts/util.d.ts +14 -0
- package/nodejs-artifacts/util.js +65 -0
- package/package.json +25 -11
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
// Copyright 2023 LanceDB Developers.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
|
|
15
|
+
import { Table as ArrowTable } from "apache-arrow";
|
|
16
|
+
|
|
17
|
+
import { Data, IntoVector } from "../arrow";
|
|
18
|
+
|
|
19
|
+
import { IndexStatistics } from "..";
|
|
20
|
+
import { CreateTableOptions } from "../connection";
|
|
21
|
+
import { IndexOptions } from "../indices";
|
|
22
|
+
import { MergeInsertBuilder } from "../merge";
|
|
23
|
+
import { VectorQuery } from "../query";
|
|
24
|
+
import { AddDataOptions, Table, UpdateOptions } from "../table";
|
|
25
|
+
import { IntoSql, toSQL } from "../util";
|
|
26
|
+
import { RestfulLanceDBClient } from "./client";
|
|
27
|
+
|
|
28
|
+
export class RemoteTable extends Table {
|
|
29
|
+
#client: RestfulLanceDBClient;
|
|
30
|
+
#name: string;
|
|
31
|
+
|
|
32
|
+
// Used in the display() method
|
|
33
|
+
#dbName: string;
|
|
34
|
+
|
|
35
|
+
get #tablePrefix() {
|
|
36
|
+
return `/v1/table/${encodeURIComponent(this.#name)}/`;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
get name(): string {
|
|
40
|
+
return this.#name;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
public constructor(
|
|
44
|
+
client: RestfulLanceDBClient,
|
|
45
|
+
tableName: string,
|
|
46
|
+
dbName: string,
|
|
47
|
+
) {
|
|
48
|
+
super();
|
|
49
|
+
this.#client = client;
|
|
50
|
+
this.#name = tableName;
|
|
51
|
+
this.#dbName = dbName;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
isOpen(): boolean {
|
|
55
|
+
return !this.#client.isOpen();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
close(): void {
|
|
59
|
+
this.#client.close();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
display(): string {
|
|
63
|
+
return `RemoteTable(${this.#dbName}; ${this.#name})`;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async schema(): Promise<import("apache-arrow").Schema> {
|
|
67
|
+
const resp = await this.#client.post(`${this.#tablePrefix}/describe/`);
|
|
68
|
+
// TODO: parse this into a valid arrow schema
|
|
69
|
+
return resp.schema;
|
|
70
|
+
}
|
|
71
|
+
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
|
72
|
+
const { buf, mode } = await Table.parseTableData(
|
|
73
|
+
data,
|
|
74
|
+
options as CreateTableOptions,
|
|
75
|
+
true,
|
|
76
|
+
);
|
|
77
|
+
await this.#client.post(`${this.#tablePrefix}/insert/`, buf, {
|
|
78
|
+
params: {
|
|
79
|
+
mode,
|
|
80
|
+
},
|
|
81
|
+
headers: {
|
|
82
|
+
"Content-Type": "application/vnd.apache.arrow.stream",
|
|
83
|
+
},
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async update(
|
|
88
|
+
optsOrUpdates:
|
|
89
|
+
| (Map<string, string> | Record<string, string>)
|
|
90
|
+
| ({
|
|
91
|
+
values: Map<string, IntoSql> | Record<string, IntoSql>;
|
|
92
|
+
} & Partial<UpdateOptions>)
|
|
93
|
+
| ({
|
|
94
|
+
valuesSql: Map<string, string> | Record<string, string>;
|
|
95
|
+
} & Partial<UpdateOptions>),
|
|
96
|
+
options?: Partial<UpdateOptions>,
|
|
97
|
+
): Promise<void> {
|
|
98
|
+
const isValues =
|
|
99
|
+
"values" in optsOrUpdates && typeof optsOrUpdates.values !== "string";
|
|
100
|
+
const isValuesSql =
|
|
101
|
+
"valuesSql" in optsOrUpdates &&
|
|
102
|
+
typeof optsOrUpdates.valuesSql !== "string";
|
|
103
|
+
const isMap = (obj: unknown): obj is Map<string, string> => {
|
|
104
|
+
return obj instanceof Map;
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
let predicate;
|
|
108
|
+
let columns: [string, string][];
|
|
109
|
+
switch (true) {
|
|
110
|
+
case isMap(optsOrUpdates):
|
|
111
|
+
columns = Array.from(optsOrUpdates.entries());
|
|
112
|
+
predicate = options?.where;
|
|
113
|
+
break;
|
|
114
|
+
case isValues && isMap(optsOrUpdates.values):
|
|
115
|
+
columns = Array.from(optsOrUpdates.values.entries()).map(([k, v]) => [
|
|
116
|
+
k,
|
|
117
|
+
toSQL(v),
|
|
118
|
+
]);
|
|
119
|
+
predicate = optsOrUpdates.where;
|
|
120
|
+
break;
|
|
121
|
+
case isValues && !isMap(optsOrUpdates.values):
|
|
122
|
+
columns = Object.entries(optsOrUpdates.values).map(([k, v]) => [
|
|
123
|
+
k,
|
|
124
|
+
toSQL(v),
|
|
125
|
+
]);
|
|
126
|
+
predicate = optsOrUpdates.where;
|
|
127
|
+
break;
|
|
128
|
+
|
|
129
|
+
case isValuesSql && isMap(optsOrUpdates.valuesSql):
|
|
130
|
+
columns = Array.from(optsOrUpdates.valuesSql.entries());
|
|
131
|
+
predicate = optsOrUpdates.where;
|
|
132
|
+
break;
|
|
133
|
+
case isValuesSql && !isMap(optsOrUpdates.valuesSql):
|
|
134
|
+
columns = Object.entries(optsOrUpdates.valuesSql).map(([k, v]) => [
|
|
135
|
+
k,
|
|
136
|
+
v,
|
|
137
|
+
]);
|
|
138
|
+
predicate = optsOrUpdates.where;
|
|
139
|
+
break;
|
|
140
|
+
default:
|
|
141
|
+
columns = Object.entries(optsOrUpdates as Record<string, string>);
|
|
142
|
+
predicate = options?.where;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
await this.#client.post(`${this.#tablePrefix}/update/`, {
|
|
146
|
+
predicate: predicate ?? null,
|
|
147
|
+
updates: columns,
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
async countRows(filter?: unknown): Promise<number> {
|
|
151
|
+
const payload = { predicate: filter };
|
|
152
|
+
return await this.#client.post(`${this.#tablePrefix}/count_rows/`, payload);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async delete(predicate: unknown): Promise<void> {
|
|
156
|
+
const payload = { predicate };
|
|
157
|
+
await this.#client.post(`${this.#tablePrefix}/delete/`, payload);
|
|
158
|
+
}
|
|
159
|
+
async createIndex(
|
|
160
|
+
column: string,
|
|
161
|
+
options?: Partial<IndexOptions>,
|
|
162
|
+
): Promise<void> {
|
|
163
|
+
if (options !== undefined) {
|
|
164
|
+
console.warn("options are not yet supported on the LanceDB cloud");
|
|
165
|
+
}
|
|
166
|
+
const indexType = "vector";
|
|
167
|
+
const metric = "L2";
|
|
168
|
+
const data = {
|
|
169
|
+
column,
|
|
170
|
+
// biome-ignore lint/style/useNamingConvention: external API
|
|
171
|
+
index_type: indexType,
|
|
172
|
+
// biome-ignore lint/style/useNamingConvention: external API
|
|
173
|
+
metric_type: metric,
|
|
174
|
+
};
|
|
175
|
+
await this.#client.post(`${this.#tablePrefix}/create_index`, data);
|
|
176
|
+
}
|
|
177
|
+
query(): import("..").Query {
|
|
178
|
+
throw new Error("query() is not yet supported on the LanceDB cloud");
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
search(_query: string | IntoVector): VectorQuery {
|
|
182
|
+
throw new Error("search() is not yet supported on the LanceDB cloud");
|
|
183
|
+
}
|
|
184
|
+
vectorSearch(_vector: unknown): import("..").VectorQuery {
|
|
185
|
+
throw new Error("vectorSearch() is not yet supported on the LanceDB cloud");
|
|
186
|
+
}
|
|
187
|
+
addColumns(_newColumnTransforms: unknown): Promise<void> {
|
|
188
|
+
throw new Error("addColumns() is not yet supported on the LanceDB cloud");
|
|
189
|
+
}
|
|
190
|
+
alterColumns(_columnAlterations: unknown): Promise<void> {
|
|
191
|
+
throw new Error("alterColumns() is not yet supported on the LanceDB cloud");
|
|
192
|
+
}
|
|
193
|
+
dropColumns(_columnNames: unknown): Promise<void> {
|
|
194
|
+
throw new Error("dropColumns() is not yet supported on the LanceDB cloud");
|
|
195
|
+
}
|
|
196
|
+
async version(): Promise<number> {
|
|
197
|
+
const resp = await this.#client.post(`${this.#tablePrefix}/describe/`);
|
|
198
|
+
return resp.version;
|
|
199
|
+
}
|
|
200
|
+
checkout(_version: unknown): Promise<void> {
|
|
201
|
+
throw new Error("checkout() is not yet supported on the LanceDB cloud");
|
|
202
|
+
}
|
|
203
|
+
checkoutLatest(): Promise<void> {
|
|
204
|
+
throw new Error(
|
|
205
|
+
"checkoutLatest() is not yet supported on the LanceDB cloud",
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
restore(): Promise<void> {
|
|
209
|
+
throw new Error("restore() is not yet supported on the LanceDB cloud");
|
|
210
|
+
}
|
|
211
|
+
optimize(_options?: unknown): Promise<import("../native").OptimizeStats> {
|
|
212
|
+
throw new Error("optimize() is not yet supported on the LanceDB cloud");
|
|
213
|
+
}
|
|
214
|
+
async listIndices(): Promise<import("../native").IndexConfig[]> {
|
|
215
|
+
return await this.#client.post(`${this.#tablePrefix}/index/list/`);
|
|
216
|
+
}
|
|
217
|
+
toArrow(): Promise<ArrowTable> {
|
|
218
|
+
throw new Error("toArrow() is not yet supported on the LanceDB cloud");
|
|
219
|
+
}
|
|
220
|
+
mergeInsert(_on: string | string[]): MergeInsertBuilder {
|
|
221
|
+
throw new Error("mergeInsert() is not yet supported on the LanceDB cloud");
|
|
222
|
+
}
|
|
223
|
+
async indexStats(_name: string): Promise<IndexStatistics | undefined> {
|
|
224
|
+
throw new Error("indexStats() is not yet supported on the LanceDB cloud");
|
|
225
|
+
}
|
|
226
|
+
}
|
package/lancedb/sanitize.ts
CHANGED
|
@@ -20,10 +20,12 @@
|
|
|
20
20
|
// comes from the exact same library instance. This is not always the case
|
|
21
21
|
// and so we must sanitize the input to ensure that it is compatible.
|
|
22
22
|
|
|
23
|
+
import { BufferType, Data } from "apache-arrow";
|
|
23
24
|
import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";
|
|
24
25
|
import {
|
|
25
26
|
Binary,
|
|
26
27
|
Bool,
|
|
28
|
+
DataLike,
|
|
27
29
|
DataType,
|
|
28
30
|
DateDay,
|
|
29
31
|
DateMillisecond,
|
|
@@ -56,9 +58,14 @@ import {
|
|
|
56
58
|
Map_,
|
|
57
59
|
Null,
|
|
58
60
|
type Precision,
|
|
61
|
+
RecordBatch,
|
|
62
|
+
RecordBatchLike,
|
|
59
63
|
Schema,
|
|
64
|
+
SchemaLike,
|
|
60
65
|
SparseUnion,
|
|
61
66
|
Struct,
|
|
67
|
+
Table,
|
|
68
|
+
TableLike,
|
|
62
69
|
Time,
|
|
63
70
|
TimeMicrosecond,
|
|
64
71
|
TimeMillisecond,
|
|
@@ -488,7 +495,7 @@ export function sanitizeField(fieldLike: unknown): Field {
|
|
|
488
495
|
* instance because they might be using a different instance of apache-arrow
|
|
489
496
|
* than lancedb is using.
|
|
490
497
|
*/
|
|
491
|
-
export function sanitizeSchema(schemaLike:
|
|
498
|
+
export function sanitizeSchema(schemaLike: SchemaLike): Schema {
|
|
492
499
|
if (schemaLike instanceof Schema) {
|
|
493
500
|
return schemaLike;
|
|
494
501
|
}
|
|
@@ -514,3 +521,68 @@ export function sanitizeSchema(schemaLike: unknown): Schema {
|
|
|
514
521
|
);
|
|
515
522
|
return new Schema(sanitizedFields, metadata);
|
|
516
523
|
}
|
|
524
|
+
|
|
525
|
+
export function sanitizeTable(tableLike: TableLike): Table {
|
|
526
|
+
if (tableLike instanceof Table) {
|
|
527
|
+
return tableLike;
|
|
528
|
+
}
|
|
529
|
+
if (typeof tableLike !== "object" || tableLike === null) {
|
|
530
|
+
throw Error("Expected a Table but object was null/undefined");
|
|
531
|
+
}
|
|
532
|
+
if (!("schema" in tableLike)) {
|
|
533
|
+
throw Error(
|
|
534
|
+
"The table passed in does not appear to be a table (no 'schema' property)",
|
|
535
|
+
);
|
|
536
|
+
}
|
|
537
|
+
if (!("batches" in tableLike)) {
|
|
538
|
+
throw Error(
|
|
539
|
+
"The table passed in does not appear to be a table (no 'columns' property)",
|
|
540
|
+
);
|
|
541
|
+
}
|
|
542
|
+
const schema = sanitizeSchema(tableLike.schema);
|
|
543
|
+
|
|
544
|
+
const batches = tableLike.batches.map(sanitizeRecordBatch);
|
|
545
|
+
return new Table(schema, batches);
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function sanitizeRecordBatch(batchLike: RecordBatchLike): RecordBatch {
|
|
549
|
+
if (batchLike instanceof RecordBatch) {
|
|
550
|
+
return batchLike;
|
|
551
|
+
}
|
|
552
|
+
if (typeof batchLike !== "object" || batchLike === null) {
|
|
553
|
+
throw Error("Expected a RecordBatch but object was null/undefined");
|
|
554
|
+
}
|
|
555
|
+
if (!("schema" in batchLike)) {
|
|
556
|
+
throw Error(
|
|
557
|
+
"The record batch passed in does not appear to be a record batch (no 'schema' property)",
|
|
558
|
+
);
|
|
559
|
+
}
|
|
560
|
+
if (!("data" in batchLike)) {
|
|
561
|
+
throw Error(
|
|
562
|
+
"The record batch passed in does not appear to be a record batch (no 'data' property)",
|
|
563
|
+
);
|
|
564
|
+
}
|
|
565
|
+
const schema = sanitizeSchema(batchLike.schema);
|
|
566
|
+
const data = sanitizeData(batchLike.data);
|
|
567
|
+
return new RecordBatch(schema, data);
|
|
568
|
+
}
|
|
569
|
+
function sanitizeData(
|
|
570
|
+
dataLike: DataLike,
|
|
571
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
572
|
+
): import("apache-arrow").Data<Struct<any>> {
|
|
573
|
+
if (dataLike instanceof Data) {
|
|
574
|
+
return dataLike;
|
|
575
|
+
}
|
|
576
|
+
return new Data(
|
|
577
|
+
dataLike.type,
|
|
578
|
+
dataLike.offset,
|
|
579
|
+
dataLike.length,
|
|
580
|
+
dataLike.nullCount,
|
|
581
|
+
{
|
|
582
|
+
[BufferType.OFFSET]: dataLike.valueOffsets,
|
|
583
|
+
[BufferType.DATA]: dataLike.values,
|
|
584
|
+
[BufferType.VALIDITY]: dataLike.nullBitmap,
|
|
585
|
+
[BufferType.TYPE]: dataLike.typeIds,
|
|
586
|
+
},
|
|
587
|
+
);
|
|
588
|
+
}
|