@lancedb/lancedb 0.4.3 → 0.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -3
- package/dist/arrow.d.ts +189 -0
- package/dist/arrow.js +539 -0
- package/dist/connection.d.ts +97 -0
- package/dist/connection.js +126 -0
- package/dist/embedding/embedding_function.d.ts +45 -0
- package/dist/embedding/embedding_function.js +27 -0
- package/dist/embedding/index.d.ts +2 -0
- package/dist/embedding/index.js +7 -0
- package/dist/embedding/openai.d.ts +8 -0
- package/dist/embedding/openai.js +53 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +52 -0
- package/dist/indices.d.ts +165 -0
- package/dist/indices.js +71 -0
- package/dist/native.d.ts +147 -0
- package/dist/native.js +314 -0
- package/dist/query.d.ts +248 -0
- package/dist/query.js +346 -0
- package/dist/sanitize.d.ts +9 -0
- package/dist/sanitize.js +369 -0
- package/dist/table.d.ts +252 -0
- package/dist/table.js +298 -0
- package/nodejs-artifacts/arrow.d.ts +189 -0
- package/nodejs-artifacts/arrow.js +539 -0
- package/nodejs-artifacts/connection.d.ts +97 -0
- package/nodejs-artifacts/connection.js +126 -0
- package/nodejs-artifacts/embedding/embedding_function.d.ts +45 -0
- package/nodejs-artifacts/embedding/embedding_function.js +27 -0
- package/nodejs-artifacts/embedding/index.d.ts +2 -0
- package/nodejs-artifacts/embedding/index.js +7 -0
- package/nodejs-artifacts/embedding/openai.d.ts +8 -0
- package/nodejs-artifacts/embedding/openai.js +53 -0
- package/nodejs-artifacts/index.d.ts +22 -0
- package/nodejs-artifacts/index.js +52 -0
- package/nodejs-artifacts/indices.d.ts +165 -0
- package/nodejs-artifacts/indices.js +71 -0
- package/nodejs-artifacts/native.d.ts +147 -0
- package/nodejs-artifacts/native.js +314 -0
- package/nodejs-artifacts/query.d.ts +248 -0
- package/nodejs-artifacts/query.js +346 -0
- package/nodejs-artifacts/sanitize.d.ts +9 -0
- package/nodejs-artifacts/sanitize.js +369 -0
- package/nodejs-artifacts/table.d.ts +252 -0
- package/nodejs-artifacts/table.js +298 -0
- package/package.json +9 -11
- package/typedoc.json +10 -0
- package/examples/js/index.mjs +0 -40
- package/examples/js/package.json +0 -14
- package/examples/js-openai/index.mjs +0 -43
- package/examples/js-openai/package-lock.json +0 -256
- package/examples/js-openai/package.json +0 -15
- package/examples/js-transformers/index.mjs +0 -65
- package/examples/js-transformers/package-lock.json +0 -1418
- package/examples/js-transformers/package.json +0 -15
- package/examples/js-youtube-transcripts/index.mjs +0 -135
- package/examples/js-youtube-transcripts/package.json +0 -15
- package/examples/ts/data/sample-lancedb/vectors.lance/_latest.manifest +0 -0
- package/examples/ts/data/sample-lancedb/vectors.lance/_transactions/0-adde4e05-fcfc-415c-86a6-5b252cb9e79a.txn +0 -0
- package/examples/ts/data/sample-lancedb/vectors.lance/_versions/1.manifest +0 -0
- package/examples/ts/data/sample-lancedb/vectors.lance/data/3618b33e-3eea-4b5e-a0fc-7d1f718d551e.lance +0 -0
- package/examples/ts/package-lock.json +0 -1340
- package/examples/ts/package.json +0 -22
- package/examples/ts/tsconfig.json +0 -10
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright 2023 LanceDB Developers.
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.sanitizeSchema = void 0;
|
|
17
|
+
// The utilities in this file help sanitize data from the user's arrow
|
|
18
|
+
// library into the types expected by vectordb's arrow library. Node
|
|
19
|
+
// generally allows for mulitple versions of the same library (and sometimes
|
|
20
|
+
// even multiple copies of the same version) to be installed at the same
|
|
21
|
+
// time. However, arrow-js uses instanceof which expected that the input
|
|
22
|
+
// comes from the exact same library instance. This is not always the case
|
|
23
|
+
// and so we must sanitize the input to ensure that it is compatible.
|
|
24
|
+
const apache_arrow_1 = require("apache-arrow");
|
|
25
|
+
function sanitizeMetadata(metadataLike) {
|
|
26
|
+
if (metadataLike === undefined || metadataLike === null) {
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
29
|
+
if (!(metadataLike instanceof Map)) {
|
|
30
|
+
throw Error("Expected metadata, if present, to be a Map<string, string>");
|
|
31
|
+
}
|
|
32
|
+
for (const item of metadataLike) {
|
|
33
|
+
if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) {
|
|
34
|
+
throw Error("Expected metadata, if present, to be a Map<string, string> but it had non-string keys or values");
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return metadataLike;
|
|
38
|
+
}
|
|
39
|
+
function sanitizeInt(typeLike) {
|
|
40
|
+
if (!("bitWidth" in typeLike) ||
|
|
41
|
+
typeof typeLike.bitWidth !== "number" ||
|
|
42
|
+
!("isSigned" in typeLike) ||
|
|
43
|
+
typeof typeLike.isSigned !== "boolean") {
|
|
44
|
+
throw Error("Expected an Int Type to have a `bitWidth` and `isSigned` property");
|
|
45
|
+
}
|
|
46
|
+
return new apache_arrow_1.Int(typeLike.isSigned, typeLike.bitWidth);
|
|
47
|
+
}
|
|
48
|
+
function sanitizeFloat(typeLike) {
|
|
49
|
+
if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
|
|
50
|
+
throw Error("Expected a Float Type to have a `precision` property");
|
|
51
|
+
}
|
|
52
|
+
return new apache_arrow_1.Float(typeLike.precision);
|
|
53
|
+
}
|
|
54
|
+
function sanitizeDecimal(typeLike) {
|
|
55
|
+
if (!("scale" in typeLike) ||
|
|
56
|
+
typeof typeLike.scale !== "number" ||
|
|
57
|
+
!("precision" in typeLike) ||
|
|
58
|
+
typeof typeLike.precision !== "number" ||
|
|
59
|
+
!("bitWidth" in typeLike) ||
|
|
60
|
+
typeof typeLike.bitWidth !== "number") {
|
|
61
|
+
throw Error("Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties");
|
|
62
|
+
}
|
|
63
|
+
return new apache_arrow_1.Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
|
|
64
|
+
}
|
|
65
|
+
function sanitizeDate(typeLike) {
|
|
66
|
+
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
67
|
+
throw Error("Expected a Date type to have a `unit` property");
|
|
68
|
+
}
|
|
69
|
+
return new apache_arrow_1.Date_(typeLike.unit);
|
|
70
|
+
}
|
|
71
|
+
function sanitizeTime(typeLike) {
|
|
72
|
+
if (!("unit" in typeLike) ||
|
|
73
|
+
typeof typeLike.unit !== "number" ||
|
|
74
|
+
!("bitWidth" in typeLike) ||
|
|
75
|
+
typeof typeLike.bitWidth !== "number") {
|
|
76
|
+
throw Error("Expected a Time type to have `unit` and `bitWidth` properties");
|
|
77
|
+
}
|
|
78
|
+
return new apache_arrow_1.Time(typeLike.unit, typeLike.bitWidth);
|
|
79
|
+
}
|
|
80
|
+
function sanitizeTimestamp(typeLike) {
|
|
81
|
+
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
82
|
+
throw Error("Expected a Timestamp type to have a `unit` property");
|
|
83
|
+
}
|
|
84
|
+
let timezone = null;
|
|
85
|
+
if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
|
|
86
|
+
timezone = typeLike.timezone;
|
|
87
|
+
}
|
|
88
|
+
return new apache_arrow_1.Timestamp(typeLike.unit, timezone);
|
|
89
|
+
}
|
|
90
|
+
function sanitizeTypedTimestamp(typeLike,
|
|
91
|
+
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
92
|
+
Datatype) {
|
|
93
|
+
let timezone = null;
|
|
94
|
+
if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
|
|
95
|
+
timezone = typeLike.timezone;
|
|
96
|
+
}
|
|
97
|
+
return new Datatype(timezone);
|
|
98
|
+
}
|
|
99
|
+
function sanitizeInterval(typeLike) {
|
|
100
|
+
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
101
|
+
throw Error("Expected an Interval type to have a `unit` property");
|
|
102
|
+
}
|
|
103
|
+
return new apache_arrow_1.Interval(typeLike.unit);
|
|
104
|
+
}
|
|
105
|
+
function sanitizeList(typeLike) {
|
|
106
|
+
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
107
|
+
throw Error("Expected a List type to have an array-like `children` property");
|
|
108
|
+
}
|
|
109
|
+
if (typeLike.children.length !== 1) {
|
|
110
|
+
throw Error("Expected a List type to have exactly one child");
|
|
111
|
+
}
|
|
112
|
+
return new apache_arrow_1.List(sanitizeField(typeLike.children[0]));
|
|
113
|
+
}
|
|
114
|
+
function sanitizeStruct(typeLike) {
|
|
115
|
+
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
116
|
+
throw Error("Expected a Struct type to have an array-like `children` property");
|
|
117
|
+
}
|
|
118
|
+
return new apache_arrow_1.Struct(typeLike.children.map((child) => sanitizeField(child)));
|
|
119
|
+
}
|
|
120
|
+
function sanitizeUnion(typeLike) {
|
|
121
|
+
if (!("typeIds" in typeLike) ||
|
|
122
|
+
!("mode" in typeLike) ||
|
|
123
|
+
typeof typeLike.mode !== "number") {
|
|
124
|
+
throw Error("Expected a Union type to have `typeIds` and `mode` properties");
|
|
125
|
+
}
|
|
126
|
+
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
127
|
+
throw Error("Expected a Union type to have an array-like `children` property");
|
|
128
|
+
}
|
|
129
|
+
return new apache_arrow_1.Union(typeLike.mode,
|
|
130
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
131
|
+
typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
|
|
132
|
+
}
|
|
133
|
+
function sanitizeTypedUnion(typeLike,
|
|
134
|
+
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
135
|
+
UnionType) {
|
|
136
|
+
if (!("typeIds" in typeLike)) {
|
|
137
|
+
throw Error("Expected a DenseUnion/SparseUnion type to have a `typeIds` property");
|
|
138
|
+
}
|
|
139
|
+
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
140
|
+
throw Error("Expected a DenseUnion/SparseUnion type to have an array-like `children` property");
|
|
141
|
+
}
|
|
142
|
+
return new UnionType(typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
|
|
143
|
+
}
|
|
144
|
+
function sanitizeFixedSizeBinary(typeLike) {
|
|
145
|
+
if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
|
|
146
|
+
throw Error("Expected a FixedSizeBinary type to have a `byteWidth` property");
|
|
147
|
+
}
|
|
148
|
+
return new apache_arrow_1.FixedSizeBinary(typeLike.byteWidth);
|
|
149
|
+
}
|
|
150
|
+
function sanitizeFixedSizeList(typeLike) {
|
|
151
|
+
if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
|
|
152
|
+
throw Error("Expected a FixedSizeList type to have a `listSize` property");
|
|
153
|
+
}
|
|
154
|
+
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
155
|
+
throw Error("Expected a FixedSizeList type to have an array-like `children` property");
|
|
156
|
+
}
|
|
157
|
+
if (typeLike.children.length !== 1) {
|
|
158
|
+
throw Error("Expected a FixedSizeList type to have exactly one child");
|
|
159
|
+
}
|
|
160
|
+
return new apache_arrow_1.FixedSizeList(typeLike.listSize, sanitizeField(typeLike.children[0]));
|
|
161
|
+
}
|
|
162
|
+
function sanitizeMap(typeLike) {
|
|
163
|
+
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
164
|
+
throw Error("Expected a Map type to have an array-like `children` property");
|
|
165
|
+
}
|
|
166
|
+
if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
|
|
167
|
+
throw Error("Expected a Map type to have a `keysSorted` property");
|
|
168
|
+
}
|
|
169
|
+
return new apache_arrow_1.Map_(
|
|
170
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
171
|
+
typeLike.children.map((field) => sanitizeField(field)), typeLike.keysSorted);
|
|
172
|
+
}
|
|
173
|
+
function sanitizeDuration(typeLike) {
|
|
174
|
+
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
175
|
+
throw Error("Expected a Duration type to have a `unit` property");
|
|
176
|
+
}
|
|
177
|
+
return new apache_arrow_1.Duration(typeLike.unit);
|
|
178
|
+
}
|
|
179
|
+
function sanitizeDictionary(typeLike) {
|
|
180
|
+
if (!("id" in typeLike) || typeof typeLike.id !== "number") {
|
|
181
|
+
throw Error("Expected a Dictionary type to have an `id` property");
|
|
182
|
+
}
|
|
183
|
+
if (!("indices" in typeLike) || typeof typeLike.indices !== "object") {
|
|
184
|
+
throw Error("Expected a Dictionary type to have an `indices` property");
|
|
185
|
+
}
|
|
186
|
+
if (!("dictionary" in typeLike) || typeof typeLike.dictionary !== "object") {
|
|
187
|
+
throw Error("Expected a Dictionary type to have an `dictionary` property");
|
|
188
|
+
}
|
|
189
|
+
if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") {
|
|
190
|
+
throw Error("Expected a Dictionary type to have an `isOrdered` property");
|
|
191
|
+
}
|
|
192
|
+
return new apache_arrow_1.Dictionary(sanitizeType(typeLike.dictionary), sanitizeType(typeLike.indices), typeLike.id, typeLike.isOrdered);
|
|
193
|
+
}
|
|
194
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
195
|
+
function sanitizeType(typeLike) {
|
|
196
|
+
if (typeof typeLike !== "object" || typeLike === null) {
|
|
197
|
+
throw Error("Expected a Type but object was null/undefined");
|
|
198
|
+
}
|
|
199
|
+
if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) {
|
|
200
|
+
throw Error("Expected a Type to have a typeId function");
|
|
201
|
+
}
|
|
202
|
+
let typeId;
|
|
203
|
+
if (typeof typeLike.typeId === "function") {
|
|
204
|
+
typeId = typeLike.typeId();
|
|
205
|
+
}
|
|
206
|
+
else if (typeof typeLike.typeId === "number") {
|
|
207
|
+
typeId = typeLike.typeId;
|
|
208
|
+
}
|
|
209
|
+
else {
|
|
210
|
+
throw Error("Type's typeId property was not a function or number");
|
|
211
|
+
}
|
|
212
|
+
switch (typeId) {
|
|
213
|
+
case apache_arrow_1.Type.NONE:
|
|
214
|
+
throw Error("Received a Type with a typeId of NONE");
|
|
215
|
+
case apache_arrow_1.Type.Null:
|
|
216
|
+
return new apache_arrow_1.Null();
|
|
217
|
+
case apache_arrow_1.Type.Int:
|
|
218
|
+
return sanitizeInt(typeLike);
|
|
219
|
+
case apache_arrow_1.Type.Float:
|
|
220
|
+
return sanitizeFloat(typeLike);
|
|
221
|
+
case apache_arrow_1.Type.Binary:
|
|
222
|
+
return new apache_arrow_1.Binary();
|
|
223
|
+
case apache_arrow_1.Type.Utf8:
|
|
224
|
+
return new apache_arrow_1.Utf8();
|
|
225
|
+
case apache_arrow_1.Type.Bool:
|
|
226
|
+
return new apache_arrow_1.Bool();
|
|
227
|
+
case apache_arrow_1.Type.Decimal:
|
|
228
|
+
return sanitizeDecimal(typeLike);
|
|
229
|
+
case apache_arrow_1.Type.Date:
|
|
230
|
+
return sanitizeDate(typeLike);
|
|
231
|
+
case apache_arrow_1.Type.Time:
|
|
232
|
+
return sanitizeTime(typeLike);
|
|
233
|
+
case apache_arrow_1.Type.Timestamp:
|
|
234
|
+
return sanitizeTimestamp(typeLike);
|
|
235
|
+
case apache_arrow_1.Type.Interval:
|
|
236
|
+
return sanitizeInterval(typeLike);
|
|
237
|
+
case apache_arrow_1.Type.List:
|
|
238
|
+
return sanitizeList(typeLike);
|
|
239
|
+
case apache_arrow_1.Type.Struct:
|
|
240
|
+
return sanitizeStruct(typeLike);
|
|
241
|
+
case apache_arrow_1.Type.Union:
|
|
242
|
+
return sanitizeUnion(typeLike);
|
|
243
|
+
case apache_arrow_1.Type.FixedSizeBinary:
|
|
244
|
+
return sanitizeFixedSizeBinary(typeLike);
|
|
245
|
+
case apache_arrow_1.Type.FixedSizeList:
|
|
246
|
+
return sanitizeFixedSizeList(typeLike);
|
|
247
|
+
case apache_arrow_1.Type.Map:
|
|
248
|
+
return sanitizeMap(typeLike);
|
|
249
|
+
case apache_arrow_1.Type.Duration:
|
|
250
|
+
return sanitizeDuration(typeLike);
|
|
251
|
+
case apache_arrow_1.Type.Dictionary:
|
|
252
|
+
return sanitizeDictionary(typeLike);
|
|
253
|
+
case apache_arrow_1.Type.Int8:
|
|
254
|
+
return new apache_arrow_1.Int8();
|
|
255
|
+
case apache_arrow_1.Type.Int16:
|
|
256
|
+
return new apache_arrow_1.Int16();
|
|
257
|
+
case apache_arrow_1.Type.Int32:
|
|
258
|
+
return new apache_arrow_1.Int32();
|
|
259
|
+
case apache_arrow_1.Type.Int64:
|
|
260
|
+
return new apache_arrow_1.Int64();
|
|
261
|
+
case apache_arrow_1.Type.Uint8:
|
|
262
|
+
return new apache_arrow_1.Uint8();
|
|
263
|
+
case apache_arrow_1.Type.Uint16:
|
|
264
|
+
return new apache_arrow_1.Uint16();
|
|
265
|
+
case apache_arrow_1.Type.Uint32:
|
|
266
|
+
return new apache_arrow_1.Uint32();
|
|
267
|
+
case apache_arrow_1.Type.Uint64:
|
|
268
|
+
return new apache_arrow_1.Uint64();
|
|
269
|
+
case apache_arrow_1.Type.Float16:
|
|
270
|
+
return new apache_arrow_1.Float16();
|
|
271
|
+
case apache_arrow_1.Type.Float32:
|
|
272
|
+
return new apache_arrow_1.Float32();
|
|
273
|
+
case apache_arrow_1.Type.Float64:
|
|
274
|
+
return new apache_arrow_1.Float64();
|
|
275
|
+
case apache_arrow_1.Type.DateMillisecond:
|
|
276
|
+
return new apache_arrow_1.DateMillisecond();
|
|
277
|
+
case apache_arrow_1.Type.DateDay:
|
|
278
|
+
return new apache_arrow_1.DateDay();
|
|
279
|
+
case apache_arrow_1.Type.TimeNanosecond:
|
|
280
|
+
return new apache_arrow_1.TimeNanosecond();
|
|
281
|
+
case apache_arrow_1.Type.TimeMicrosecond:
|
|
282
|
+
return new apache_arrow_1.TimeMicrosecond();
|
|
283
|
+
case apache_arrow_1.Type.TimeMillisecond:
|
|
284
|
+
return new apache_arrow_1.TimeMillisecond();
|
|
285
|
+
case apache_arrow_1.Type.TimeSecond:
|
|
286
|
+
return new apache_arrow_1.TimeSecond();
|
|
287
|
+
case apache_arrow_1.Type.TimestampNanosecond:
|
|
288
|
+
return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampNanosecond);
|
|
289
|
+
case apache_arrow_1.Type.TimestampMicrosecond:
|
|
290
|
+
return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampMicrosecond);
|
|
291
|
+
case apache_arrow_1.Type.TimestampMillisecond:
|
|
292
|
+
return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampMillisecond);
|
|
293
|
+
case apache_arrow_1.Type.TimestampSecond:
|
|
294
|
+
return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampSecond);
|
|
295
|
+
case apache_arrow_1.Type.DenseUnion:
|
|
296
|
+
return sanitizeTypedUnion(typeLike, apache_arrow_1.DenseUnion);
|
|
297
|
+
case apache_arrow_1.Type.SparseUnion:
|
|
298
|
+
return sanitizeTypedUnion(typeLike, apache_arrow_1.SparseUnion);
|
|
299
|
+
case apache_arrow_1.Type.IntervalDayTime:
|
|
300
|
+
return new apache_arrow_1.IntervalDayTime();
|
|
301
|
+
case apache_arrow_1.Type.IntervalYearMonth:
|
|
302
|
+
return new apache_arrow_1.IntervalYearMonth();
|
|
303
|
+
case apache_arrow_1.Type.DurationNanosecond:
|
|
304
|
+
return new apache_arrow_1.DurationNanosecond();
|
|
305
|
+
case apache_arrow_1.Type.DurationMicrosecond:
|
|
306
|
+
return new apache_arrow_1.DurationMicrosecond();
|
|
307
|
+
case apache_arrow_1.Type.DurationMillisecond:
|
|
308
|
+
return new apache_arrow_1.DurationMillisecond();
|
|
309
|
+
case apache_arrow_1.Type.DurationSecond:
|
|
310
|
+
return new apache_arrow_1.DurationSecond();
|
|
311
|
+
default:
|
|
312
|
+
throw new Error("Unrecoginized type id in schema: " + typeId);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
function sanitizeField(fieldLike) {
|
|
316
|
+
if (fieldLike instanceof apache_arrow_1.Field) {
|
|
317
|
+
return fieldLike;
|
|
318
|
+
}
|
|
319
|
+
if (typeof fieldLike !== "object" || fieldLike === null) {
|
|
320
|
+
throw Error("Expected a Field but object was null/undefined");
|
|
321
|
+
}
|
|
322
|
+
if (!("type" in fieldLike) ||
|
|
323
|
+
!("name" in fieldLike) ||
|
|
324
|
+
!("nullable" in fieldLike)) {
|
|
325
|
+
throw Error("The field passed in is missing a `type`/`name`/`nullable` property");
|
|
326
|
+
}
|
|
327
|
+
const type = sanitizeType(fieldLike.type);
|
|
328
|
+
const name = fieldLike.name;
|
|
329
|
+
if (!(typeof name === "string")) {
|
|
330
|
+
throw Error("The field passed in had a non-string `name` property");
|
|
331
|
+
}
|
|
332
|
+
const nullable = fieldLike.nullable;
|
|
333
|
+
if (!(typeof nullable === "boolean")) {
|
|
334
|
+
throw Error("The field passed in had a non-boolean `nullable` property");
|
|
335
|
+
}
|
|
336
|
+
let metadata;
|
|
337
|
+
if ("metadata" in fieldLike) {
|
|
338
|
+
metadata = sanitizeMetadata(fieldLike.metadata);
|
|
339
|
+
}
|
|
340
|
+
return new apache_arrow_1.Field(name, type, nullable, metadata);
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* Convert something schemaLike into a Schema instance
|
|
344
|
+
*
|
|
345
|
+
* This method is often needed even when the caller is using a Schema
|
|
346
|
+
* instance because they might be using a different instance of apache-arrow
|
|
347
|
+
* than lancedb is using.
|
|
348
|
+
*/
|
|
349
|
+
function sanitizeSchema(schemaLike) {
|
|
350
|
+
if (schemaLike instanceof apache_arrow_1.Schema) {
|
|
351
|
+
return schemaLike;
|
|
352
|
+
}
|
|
353
|
+
if (typeof schemaLike !== "object" || schemaLike === null) {
|
|
354
|
+
throw Error("Expected a Schema but object was null/undefined");
|
|
355
|
+
}
|
|
356
|
+
if (!("fields" in schemaLike)) {
|
|
357
|
+
throw Error("The schema passed in does not appear to be a schema (no 'fields' property)");
|
|
358
|
+
}
|
|
359
|
+
let metadata;
|
|
360
|
+
if ("metadata" in schemaLike) {
|
|
361
|
+
metadata = sanitizeMetadata(schemaLike.metadata);
|
|
362
|
+
}
|
|
363
|
+
if (!Array.isArray(schemaLike.fields)) {
|
|
364
|
+
throw Error("The schema passed in had a 'fields' property but it was not an array");
|
|
365
|
+
}
|
|
366
|
+
const sanitizedFields = schemaLike.fields.map((field) => sanitizeField(field));
|
|
367
|
+
return new apache_arrow_1.Schema(sanitizedFields, metadata);
|
|
368
|
+
}
|
|
369
|
+
exports.sanitizeSchema = sanitizeSchema;
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import { Schema } from "apache-arrow";
|
|
2
|
+
import { AddColumnsSql, ColumnAlteration, IndexConfig, Table as _NativeTable } from "./native";
|
|
3
|
+
import { Query, VectorQuery } from "./query";
|
|
4
|
+
import { IndexOptions } from "./indices";
|
|
5
|
+
import { Data } from "./arrow";
|
|
6
|
+
export { IndexConfig } from "./native";
|
|
7
|
+
/**
|
|
8
|
+
* Options for adding data to a table.
|
|
9
|
+
*/
|
|
10
|
+
export interface AddDataOptions {
|
|
11
|
+
/**
|
|
12
|
+
* If "append" (the default) then the new data will be added to the table
|
|
13
|
+
*
|
|
14
|
+
* If "overwrite" then the new data will replace the existing data in the table.
|
|
15
|
+
*/
|
|
16
|
+
mode: "append" | "overwrite";
|
|
17
|
+
}
|
|
18
|
+
export interface UpdateOptions {
|
|
19
|
+
/**
|
|
20
|
+
* A filter that limits the scope of the update.
|
|
21
|
+
*
|
|
22
|
+
* This should be an SQL filter expression.
|
|
23
|
+
*
|
|
24
|
+
* Only rows that satisfy the expression will be updated.
|
|
25
|
+
*
|
|
26
|
+
* For example, this could be 'my_col == 0' to replace all instances
|
|
27
|
+
* of 0 in a column with some other default value.
|
|
28
|
+
*/
|
|
29
|
+
where: string;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* A Table is a collection of Records in a LanceDB Database.
|
|
33
|
+
*
|
|
34
|
+
* A Table object is expected to be long lived and reused for multiple operations.
|
|
35
|
+
* Table objects will cache a certain amount of index data in memory. This cache
|
|
36
|
+
* will be freed when the Table is garbage collected. To eagerly free the cache you
|
|
37
|
+
* can call the `close` method. Once the Table is closed, it cannot be used for any
|
|
38
|
+
* further operations.
|
|
39
|
+
*
|
|
40
|
+
* Closing a table is optional. It not closed, it will be closed when it is garbage
|
|
41
|
+
* collected.
|
|
42
|
+
*/
|
|
43
|
+
export declare class Table {
|
|
44
|
+
private readonly inner;
|
|
45
|
+
/** Construct a Table. Internal use only. */
|
|
46
|
+
constructor(inner: _NativeTable);
|
|
47
|
+
/** Return true if the table has not been closed */
|
|
48
|
+
isOpen(): boolean;
|
|
49
|
+
/**
|
|
50
|
+
* Close the table, releasing any underlying resources.
|
|
51
|
+
*
|
|
52
|
+
* It is safe to call this method multiple times.
|
|
53
|
+
*
|
|
54
|
+
* Any attempt to use the table after it is closed will result in an error.
|
|
55
|
+
*/
|
|
56
|
+
close(): void;
|
|
57
|
+
/** Return a brief description of the table */
|
|
58
|
+
display(): string;
|
|
59
|
+
/** Get the schema of the table. */
|
|
60
|
+
schema(): Promise<Schema>;
|
|
61
|
+
/**
|
|
62
|
+
* Insert records into this Table.
|
|
63
|
+
* @param {Data} data Records to be inserted into the Table
|
|
64
|
+
*/
|
|
65
|
+
add(data: Data, options?: Partial<AddDataOptions>): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Update existing records in the Table
|
|
68
|
+
*
|
|
69
|
+
* An update operation can be used to adjust existing values. Use the
|
|
70
|
+
* returned builder to specify which columns to update. The new value
|
|
71
|
+
* can be a literal value (e.g. replacing nulls with some default value)
|
|
72
|
+
* or an expression applied to the old value (e.g. incrementing a value)
|
|
73
|
+
*
|
|
74
|
+
* An optional condition can be specified (e.g. "only update if the old
|
|
75
|
+
* value is 0")
|
|
76
|
+
*
|
|
77
|
+
* Note: if your condition is something like "some_id_column == 7" and
|
|
78
|
+
* you are updating many rows (with different ids) then you will get
|
|
79
|
+
* better performance with a single [`merge_insert`] call instead of
|
|
80
|
+
* repeatedly calilng this method.
|
|
81
|
+
* @param {Map<string, string> | Record<string, string>} updates - the
|
|
82
|
+
* columns to update
|
|
83
|
+
*
|
|
84
|
+
* Keys in the map should specify the name of the column to update.
|
|
85
|
+
* Values in the map provide the new value of the column. These can
|
|
86
|
+
* be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
|
|
87
|
+
* based on the row being updated (e.g. "my_col + 1")
|
|
88
|
+
* @param {Partial<UpdateOptions>} options - additional options to control
|
|
89
|
+
* the update behavior
|
|
90
|
+
*/
|
|
91
|
+
update(updates: Map<string, string> | Record<string, string>, options?: Partial<UpdateOptions>): Promise<void>;
|
|
92
|
+
/** Count the total number of rows in the dataset. */
|
|
93
|
+
countRows(filter?: string): Promise<number>;
|
|
94
|
+
/** Delete the rows that satisfy the predicate. */
|
|
95
|
+
delete(predicate: string): Promise<void>;
|
|
96
|
+
/**
|
|
97
|
+
* Create an index to speed up queries.
|
|
98
|
+
*
|
|
99
|
+
* Indices can be created on vector columns or scalar columns.
|
|
100
|
+
* Indices on vector columns will speed up vector searches.
|
|
101
|
+
* Indices on scalar columns will speed up filtering (in both
|
|
102
|
+
* vector and non-vector searches)
|
|
103
|
+
* @example
|
|
104
|
+
* // If the column has a vector (fixed size list) data type then
|
|
105
|
+
* // an IvfPq vector index will be created.
|
|
106
|
+
* const table = await conn.openTable("my_table");
|
|
107
|
+
* await table.createIndex(["vector"]);
|
|
108
|
+
* @example
|
|
109
|
+
* // For advanced control over vector index creation you can specify
|
|
110
|
+
* // the index type and options.
|
|
111
|
+
* const table = await conn.openTable("my_table");
|
|
112
|
+
* await table.createIndex(["vector"], I)
|
|
113
|
+
* .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
|
|
114
|
+
* .build();
|
|
115
|
+
* @example
|
|
116
|
+
* // Or create a Scalar index
|
|
117
|
+
* await table.createIndex("my_float_col").build();
|
|
118
|
+
*/
|
|
119
|
+
createIndex(column: string, options?: Partial<IndexOptions>): Promise<void>;
|
|
120
|
+
/**
|
|
121
|
+
* Create a {@link Query} Builder.
|
|
122
|
+
*
|
|
123
|
+
* Queries allow you to search your existing data. By default the query will
|
|
124
|
+
* return all the data in the table in no particular order. The builder
|
|
125
|
+
* returned by this method can be used to control the query using filtering,
|
|
126
|
+
* vector similarity, sorting, and more.
|
|
127
|
+
*
|
|
128
|
+
* Note: By default, all columns are returned. For best performance, you should
|
|
129
|
+
* only fetch the columns you need. See [`Query::select_with_projection`] for
|
|
130
|
+
* more details.
|
|
131
|
+
*
|
|
132
|
+
* When appropriate, various indices and statistics based pruning will be used to
|
|
133
|
+
* accelerate the query.
|
|
134
|
+
* @example
|
|
135
|
+
* // SQL-style filtering
|
|
136
|
+
* //
|
|
137
|
+
* // This query will return up to 1000 rows whose value in the `id` column
|
|
138
|
+
* // is greater than 5. LanceDb supports a broad set of filtering functions.
|
|
139
|
+
* for await (const batch of table.query()
|
|
140
|
+
* .filter("id > 1").select(["id"]).limit(20)) {
|
|
141
|
+
* console.log(batch);
|
|
142
|
+
* }
|
|
143
|
+
* @example
|
|
144
|
+
* // Vector Similarity Search
|
|
145
|
+
* //
|
|
146
|
+
* // This example will find the 10 rows whose value in the "vector" column are
|
|
147
|
+
* // closest to the query vector [1.0, 2.0, 3.0]. If an index has been created
|
|
148
|
+
* // on the "vector" column then this will perform an ANN search.
|
|
149
|
+
* //
|
|
150
|
+
* // The `refine_factor` and `nprobes` methods are used to control the recall /
|
|
151
|
+
* // latency tradeoff of the search.
|
|
152
|
+
* for await (const batch of table.query()
|
|
153
|
+
* .nearestTo([1, 2, 3])
|
|
154
|
+
* .refineFactor(5).nprobe(10)
|
|
155
|
+
* .limit(10)) {
|
|
156
|
+
* console.log(batch);
|
|
157
|
+
* }
|
|
158
|
+
* @example
|
|
159
|
+
* // Scan the full dataset
|
|
160
|
+
* //
|
|
161
|
+
* // This query will return everything in the table in no particular order.
|
|
162
|
+
* for await (const batch of table.query()) {
|
|
163
|
+
* console.log(batch);
|
|
164
|
+
* }
|
|
165
|
+
* @returns {Query} A builder that can be used to parameterize the query
|
|
166
|
+
*/
|
|
167
|
+
query(): Query;
|
|
168
|
+
/**
|
|
169
|
+
* Search the table with a given query vector.
|
|
170
|
+
*
|
|
171
|
+
* This is a convenience method for preparing a vector query and
|
|
172
|
+
* is the same thing as calling `nearestTo` on the builder returned
|
|
173
|
+
* by `query`. @see {@link Query#nearestTo} for more details.
|
|
174
|
+
*/
|
|
175
|
+
vectorSearch(vector: unknown): VectorQuery;
|
|
176
|
+
/**
|
|
177
|
+
* Add new columns with defined values.
|
|
178
|
+
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
|
|
179
|
+
* the SQL expression to use to calculate the value of the new column. These
|
|
180
|
+
* expressions will be evaluated for each row in the table, and can
|
|
181
|
+
* reference existing columns in the table.
|
|
182
|
+
*/
|
|
183
|
+
addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void>;
|
|
184
|
+
/**
|
|
185
|
+
* Alter the name or nullability of columns.
|
|
186
|
+
* @param {ColumnAlteration[]} columnAlterations One or more alterations to
|
|
187
|
+
* apply to columns.
|
|
188
|
+
*/
|
|
189
|
+
alterColumns(columnAlterations: ColumnAlteration[]): Promise<void>;
|
|
190
|
+
/**
|
|
191
|
+
* Drop one or more columns from the dataset
|
|
192
|
+
*
|
|
193
|
+
* This is a metadata-only operation and does not remove the data from the
|
|
194
|
+
* underlying storage. In order to remove the data, you must subsequently
|
|
195
|
+
* call ``compact_files`` to rewrite the data without the removed columns and
|
|
196
|
+
* then call ``cleanup_files`` to remove the old files.
|
|
197
|
+
* @param {string[]} columnNames The names of the columns to drop. These can
|
|
198
|
+
* be nested column references (e.g. "a.b.c") or top-level column names
|
|
199
|
+
* (e.g. "a").
|
|
200
|
+
*/
|
|
201
|
+
dropColumns(columnNames: string[]): Promise<void>;
|
|
202
|
+
/**
|
|
203
|
+
* Retrieve the version of the table
|
|
204
|
+
*
|
|
205
|
+
* LanceDb supports versioning. Every operation that modifies the table increases
|
|
206
|
+
* version. As long as a version hasn't been deleted you can `[Self::checkout]` that
|
|
207
|
+
* version to view the data at that point. In addition, you can `[Self::restore]` the
|
|
208
|
+
* version to replace the current table with a previous version.
|
|
209
|
+
*/
|
|
210
|
+
version(): Promise<number>;
|
|
211
|
+
/**
|
|
212
|
+
* Checks out a specific version of the Table
|
|
213
|
+
*
|
|
214
|
+
* Any read operation on the table will now access the data at the checked out version.
|
|
215
|
+
* As a consequence, calling this method will disable any read consistency interval
|
|
216
|
+
* that was previously set.
|
|
217
|
+
*
|
|
218
|
+
* This is a read-only operation that turns the table into a sort of "view"
|
|
219
|
+
* or "detached head". Other table instances will not be affected. To make the change
|
|
220
|
+
* permanent you can use the `[Self::restore]` method.
|
|
221
|
+
*
|
|
222
|
+
* Any operation that modifies the table will fail while the table is in a checked
|
|
223
|
+
* out state.
|
|
224
|
+
*
|
|
225
|
+
* To return the table to a normal state use `[Self::checkout_latest]`
|
|
226
|
+
*/
|
|
227
|
+
checkout(version: number): Promise<void>;
|
|
228
|
+
/**
|
|
229
|
+
* Ensures the table is pointing at the latest version
|
|
230
|
+
*
|
|
231
|
+
* This can be used to manually update a table when the read_consistency_interval is None
|
|
232
|
+
* It can also be used to undo a `[Self::checkout]` operation
|
|
233
|
+
*/
|
|
234
|
+
checkoutLatest(): Promise<void>;
|
|
235
|
+
/**
|
|
236
|
+
* Restore the table to the currently checked out version
|
|
237
|
+
*
|
|
238
|
+
* This operation will fail if checkout has not been called previously
|
|
239
|
+
*
|
|
240
|
+
* This operation will overwrite the latest version of the table with a
|
|
241
|
+
* previous version. Any changes made since the checked out version will
|
|
242
|
+
* no longer be visible.
|
|
243
|
+
*
|
|
244
|
+
* Once the operation concludes the table will no longer be in a checked
|
|
245
|
+
* out state and the read_consistency_interval, if any, will apply.
|
|
246
|
+
*/
|
|
247
|
+
restore(): Promise<void>;
|
|
248
|
+
/**
|
|
249
|
+
* List all indices that have been created with Self::create_index
|
|
250
|
+
*/
|
|
251
|
+
listIndices(): Promise<IndexConfig[]>;
|
|
252
|
+
}
|