@lancedb/lancedb 0.22.2-beta.1 → 0.22.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arrow.d.ts +1 -1
- package/dist/arrow.js +21 -5
- package/dist/sanitize.d.ts +1 -0
- package/dist/sanitize.js +49 -2
- package/package.json +9 -9
package/dist/arrow.d.ts
CHANGED
|
@@ -11,7 +11,7 @@ export type SchemaLike = Schema | {
|
|
|
11
11
|
export type FieldLike = Field | {
|
|
12
12
|
type: string;
|
|
13
13
|
name: string;
|
|
14
|
-
nullable
|
|
14
|
+
nullable: boolean;
|
|
15
15
|
metadata?: Map<string, string>;
|
|
16
16
|
};
|
|
17
17
|
export type DataLike = import("apache-arrow").Data<Struct<any>> | {
|
package/dist/arrow.js
CHANGED
|
@@ -1048,17 +1048,33 @@ function validateSchemaEmbeddings(schema, data, embeddings) {
|
|
|
1048
1048
|
if (isFixedSizeList(field.type)) {
|
|
1049
1049
|
field = (0, sanitize_1.sanitizeField)(field);
|
|
1050
1050
|
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
|
|
1051
|
+
// Check if there's an embedding function registered for this field
|
|
1052
|
+
let hasEmbeddingFunction = false;
|
|
1053
|
+
// Check schema metadata for embedding functions
|
|
1051
1054
|
if (schema.metadata.has("embedding_functions")) {
|
|
1052
1055
|
const embeddings = JSON.parse(schema.metadata.get("embedding_functions"));
|
|
1053
|
-
if (
|
|
1054
1056
|
// biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
|
|
1055
|
-
embeddings.find((f) => f["vectorColumn"] === field.name)
|
|
1056
|
-
|
|
1057
|
-
missingEmbeddingFields.push(field);
|
|
1057
|
+
if (embeddings.find((f) => f["vectorColumn"] === field.name)) {
|
|
1058
|
+
hasEmbeddingFunction = true;
|
|
1058
1059
|
}
|
|
1059
1060
|
}
|
|
1061
|
+
// Check passed embedding function parameter
|
|
1062
|
+
if (embeddings && embeddings.vectorColumn === field.name) {
|
|
1063
|
+
hasEmbeddingFunction = true;
|
|
1064
|
+
}
|
|
1065
|
+
// If the field is nullable AND there's no embedding function, allow undefined/omitted values
|
|
1066
|
+
if (field.nullable && !hasEmbeddingFunction) {
|
|
1067
|
+
fields.push(field);
|
|
1068
|
+
}
|
|
1060
1069
|
else {
|
|
1061
|
-
|
|
1070
|
+
// Either not nullable OR has embedding function - require explicit values
|
|
1071
|
+
if (hasEmbeddingFunction) {
|
|
1072
|
+
// Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
|
|
1073
|
+
fields.push(field);
|
|
1074
|
+
}
|
|
1075
|
+
else {
|
|
1076
|
+
missingEmbeddingFields.push(field);
|
|
1077
|
+
}
|
|
1062
1078
|
}
|
|
1063
1079
|
}
|
|
1064
1080
|
else {
|
package/dist/sanitize.d.ts
CHANGED
|
@@ -29,3 +29,4 @@ export declare function sanitizeField(fieldLike: unknown): Field;
|
|
|
29
29
|
*/
|
|
30
30
|
export declare function sanitizeSchema(schemaLike: SchemaLike): Schema;
|
|
31
31
|
export declare function sanitizeTable(tableLike: TableLike): Table;
|
|
32
|
+
export declare function dataTypeFromName(typeName: string): DataType;
|
package/dist/sanitize.js
CHANGED
|
@@ -24,6 +24,7 @@ exports.sanitizeType = sanitizeType;
|
|
|
24
24
|
exports.sanitizeField = sanitizeField;
|
|
25
25
|
exports.sanitizeSchema = sanitizeSchema;
|
|
26
26
|
exports.sanitizeTable = sanitizeTable;
|
|
27
|
+
exports.dataTypeFromName = dataTypeFromName;
|
|
27
28
|
// The utilities in this file help sanitize data from the user's arrow
|
|
28
29
|
// library into the types expected by vectordb's arrow library. Node
|
|
29
30
|
// generally allows for mulitple versions of the same library (and sometimes
|
|
@@ -204,6 +205,9 @@ function sanitizeDictionary(typeLike) {
|
|
|
204
205
|
}
|
|
205
206
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
206
207
|
function sanitizeType(typeLike) {
|
|
208
|
+
if (typeof typeLike === "string") {
|
|
209
|
+
return dataTypeFromName(typeLike);
|
|
210
|
+
}
|
|
207
211
|
if (typeof typeLike !== "object" || typeLike === null) {
|
|
208
212
|
throw Error("Expected a Type but object was null/undefined");
|
|
209
213
|
}
|
|
@@ -322,7 +326,7 @@ function sanitizeType(typeLike) {
|
|
|
322
326
|
case arrow_1.Type.DurationSecond:
|
|
323
327
|
return new arrow_1.DurationSecond();
|
|
324
328
|
default:
|
|
325
|
-
throw new Error("
|
|
329
|
+
throw new Error("Unrecognized type id in schema: " + typeId);
|
|
326
330
|
}
|
|
327
331
|
}
|
|
328
332
|
function sanitizeField(fieldLike) {
|
|
@@ -337,7 +341,13 @@ function sanitizeField(fieldLike) {
|
|
|
337
341
|
!("nullable" in fieldLike)) {
|
|
338
342
|
throw Error("The field passed in is missing a `type`/`name`/`nullable` property");
|
|
339
343
|
}
|
|
340
|
-
|
|
344
|
+
let type;
|
|
345
|
+
try {
|
|
346
|
+
type = sanitizeType(fieldLike.type);
|
|
347
|
+
}
|
|
348
|
+
catch (error) {
|
|
349
|
+
throw Error(`Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`, { cause: error });
|
|
350
|
+
}
|
|
341
351
|
const name = fieldLike.name;
|
|
342
352
|
if (!(typeof name === "string")) {
|
|
343
353
|
throw Error("The field passed in had a non-string `name` property");
|
|
@@ -424,3 +434,40 @@ function sanitizeData(dataLike) {
|
|
|
424
434
|
[apache_arrow_1.BufferType.TYPE]: dataLike.typeIds,
|
|
425
435
|
});
|
|
426
436
|
}
|
|
437
|
+
const constructorsByTypeName = {
|
|
438
|
+
null: () => new arrow_1.Null(),
|
|
439
|
+
binary: () => new arrow_1.Binary(),
|
|
440
|
+
utf8: () => new arrow_1.Utf8(),
|
|
441
|
+
bool: () => new arrow_1.Bool(),
|
|
442
|
+
int8: () => new arrow_1.Int8(),
|
|
443
|
+
int16: () => new arrow_1.Int16(),
|
|
444
|
+
int32: () => new arrow_1.Int32(),
|
|
445
|
+
int64: () => new arrow_1.Int64(),
|
|
446
|
+
uint8: () => new arrow_1.Uint8(),
|
|
447
|
+
uint16: () => new arrow_1.Uint16(),
|
|
448
|
+
uint32: () => new arrow_1.Uint32(),
|
|
449
|
+
uint64: () => new arrow_1.Uint64(),
|
|
450
|
+
float16: () => new arrow_1.Float16(),
|
|
451
|
+
float32: () => new arrow_1.Float32(),
|
|
452
|
+
float64: () => new arrow_1.Float64(),
|
|
453
|
+
datemillisecond: () => new arrow_1.DateMillisecond(),
|
|
454
|
+
dateday: () => new arrow_1.DateDay(),
|
|
455
|
+
timenanosecond: () => new arrow_1.TimeNanosecond(),
|
|
456
|
+
timemicrosecond: () => new arrow_1.TimeMicrosecond(),
|
|
457
|
+
timemillisecond: () => new arrow_1.TimeMillisecond(),
|
|
458
|
+
timesecond: () => new arrow_1.TimeSecond(),
|
|
459
|
+
intervaldaytime: () => new arrow_1.IntervalDayTime(),
|
|
460
|
+
intervalyearmonth: () => new arrow_1.IntervalYearMonth(),
|
|
461
|
+
durationnanosecond: () => new arrow_1.DurationNanosecond(),
|
|
462
|
+
durationmicrosecond: () => new arrow_1.DurationMicrosecond(),
|
|
463
|
+
durationmillisecond: () => new arrow_1.DurationMillisecond(),
|
|
464
|
+
durationsecond: () => new arrow_1.DurationSecond(),
|
|
465
|
+
};
|
|
466
|
+
function dataTypeFromName(typeName) {
|
|
467
|
+
const normalizedTypeName = typeName.toLowerCase();
|
|
468
|
+
const _constructor = constructorsByTypeName[normalizedTypeName];
|
|
469
|
+
if (!_constructor) {
|
|
470
|
+
throw new Error("Unrecognized type name in schema: " + typeName);
|
|
471
|
+
}
|
|
472
|
+
return _constructor();
|
|
473
|
+
}
|
package/package.json
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
"ann"
|
|
12
12
|
],
|
|
13
13
|
"private": false,
|
|
14
|
-
"version": "0.22.2
|
|
14
|
+
"version": "0.22.2",
|
|
15
15
|
"main": "dist/index.js",
|
|
16
16
|
"exports": {
|
|
17
17
|
".": "./dist/index.js",
|
|
@@ -100,14 +100,14 @@
|
|
|
100
100
|
"reflect-metadata": "^0.2.2"
|
|
101
101
|
},
|
|
102
102
|
"optionalDependencies": {
|
|
103
|
-
"@lancedb/lancedb-darwin-x64": "0.22.2
|
|
104
|
-
"@lancedb/lancedb-darwin-arm64": "0.22.2
|
|
105
|
-
"@lancedb/lancedb-linux-x64-gnu": "0.22.2
|
|
106
|
-
"@lancedb/lancedb-linux-arm64-gnu": "0.22.2
|
|
107
|
-
"@lancedb/lancedb-linux-x64-musl": "0.22.2
|
|
108
|
-
"@lancedb/lancedb-linux-arm64-musl": "0.22.2
|
|
109
|
-
"@lancedb/lancedb-win32-x64-msvc": "0.22.2
|
|
110
|
-
"@lancedb/lancedb-win32-arm64-msvc": "0.22.2
|
|
103
|
+
"@lancedb/lancedb-darwin-x64": "0.22.2",
|
|
104
|
+
"@lancedb/lancedb-darwin-arm64": "0.22.2",
|
|
105
|
+
"@lancedb/lancedb-linux-x64-gnu": "0.22.2",
|
|
106
|
+
"@lancedb/lancedb-linux-arm64-gnu": "0.22.2",
|
|
107
|
+
"@lancedb/lancedb-linux-x64-musl": "0.22.2",
|
|
108
|
+
"@lancedb/lancedb-linux-arm64-musl": "0.22.2",
|
|
109
|
+
"@lancedb/lancedb-win32-x64-msvc": "0.22.2",
|
|
110
|
+
"@lancedb/lancedb-win32-arm64-msvc": "0.22.2"
|
|
111
111
|
},
|
|
112
112
|
"peerDependencies": {
|
|
113
113
|
"apache-arrow": ">=15.0.0 <=18.1.0"
|