@lancedb/lancedb 0.22.2-beta.1 → 0.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/arrow.d.ts CHANGED
@@ -11,7 +11,7 @@ export type SchemaLike = Schema | {
11
11
  export type FieldLike = Field | {
12
12
  type: string;
13
13
  name: string;
14
- nullable?: boolean;
14
+ nullable: boolean;
15
15
  metadata?: Map<string, string>;
16
16
  };
17
17
  export type DataLike = import("apache-arrow").Data<Struct<any>> | {
package/dist/arrow.js CHANGED
@@ -1048,17 +1048,33 @@ function validateSchemaEmbeddings(schema, data, embeddings) {
1048
1048
  if (isFixedSizeList(field.type)) {
1049
1049
  field = (0, sanitize_1.sanitizeField)(field);
1050
1050
  if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
1051
+ // Check if there's an embedding function registered for this field
1052
+ let hasEmbeddingFunction = false;
1053
+ // Check schema metadata for embedding functions
1051
1054
  if (schema.metadata.has("embedding_functions")) {
1052
1055
  const embeddings = JSON.parse(schema.metadata.get("embedding_functions"));
1053
- if (
1054
1056
  // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
1055
- embeddings.find((f) => f["vectorColumn"] === field.name) ===
1056
- undefined) {
1057
- missingEmbeddingFields.push(field);
1057
+ if (embeddings.find((f) => f["vectorColumn"] === field.name)) {
1058
+ hasEmbeddingFunction = true;
1058
1059
  }
1059
1060
  }
1061
+ // Check passed embedding function parameter
1062
+ if (embeddings && embeddings.vectorColumn === field.name) {
1063
+ hasEmbeddingFunction = true;
1064
+ }
1065
+ // If the field is nullable AND there's no embedding function, allow undefined/omitted values
1066
+ if (field.nullable && !hasEmbeddingFunction) {
1067
+ fields.push(field);
1068
+ }
1060
1069
  else {
1061
- missingEmbeddingFields.push(field);
1070
+ // Either not nullable OR has embedding function - require explicit values
1071
+ if (hasEmbeddingFunction) {
1072
+ // Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
1073
+ fields.push(field);
1074
+ }
1075
+ else {
1076
+ missingEmbeddingFields.push(field);
1077
+ }
1062
1078
  }
1063
1079
  }
1064
1080
  else {
@@ -29,3 +29,4 @@ export declare function sanitizeField(fieldLike: unknown): Field;
29
29
  */
30
30
  export declare function sanitizeSchema(schemaLike: SchemaLike): Schema;
31
31
  export declare function sanitizeTable(tableLike: TableLike): Table;
32
+ export declare function dataTypeFromName(typeName: string): DataType;
package/dist/sanitize.js CHANGED
@@ -24,6 +24,7 @@ exports.sanitizeType = sanitizeType;
24
24
  exports.sanitizeField = sanitizeField;
25
25
  exports.sanitizeSchema = sanitizeSchema;
26
26
  exports.sanitizeTable = sanitizeTable;
27
+ exports.dataTypeFromName = dataTypeFromName;
27
28
  // The utilities in this file help sanitize data from the user's arrow
28
29
  // library into the types expected by vectordb's arrow library. Node
29
30
  // generally allows for mulitple versions of the same library (and sometimes
@@ -204,6 +205,9 @@ function sanitizeDictionary(typeLike) {
204
205
  }
205
206
  // biome-ignore lint/suspicious/noExplicitAny: skip
206
207
  function sanitizeType(typeLike) {
208
+ if (typeof typeLike === "string") {
209
+ return dataTypeFromName(typeLike);
210
+ }
207
211
  if (typeof typeLike !== "object" || typeLike === null) {
208
212
  throw Error("Expected a Type but object was null/undefined");
209
213
  }
@@ -322,7 +326,7 @@ function sanitizeType(typeLike) {
322
326
  case arrow_1.Type.DurationSecond:
323
327
  return new arrow_1.DurationSecond();
324
328
  default:
325
- throw new Error("Unrecoginized type id in schema: " + typeId);
329
+ throw new Error("Unrecognized type id in schema: " + typeId);
326
330
  }
327
331
  }
328
332
  function sanitizeField(fieldLike) {
@@ -337,7 +341,13 @@ function sanitizeField(fieldLike) {
337
341
  !("nullable" in fieldLike)) {
338
342
  throw Error("The field passed in is missing a `type`/`name`/`nullable` property");
339
343
  }
340
- const type = sanitizeType(fieldLike.type);
344
+ let type;
345
+ try {
346
+ type = sanitizeType(fieldLike.type);
347
+ }
348
+ catch (error) {
349
+ throw Error(`Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`, { cause: error });
350
+ }
341
351
  const name = fieldLike.name;
342
352
  if (!(typeof name === "string")) {
343
353
  throw Error("The field passed in had a non-string `name` property");
@@ -424,3 +434,40 @@ function sanitizeData(dataLike) {
424
434
  [apache_arrow_1.BufferType.TYPE]: dataLike.typeIds,
425
435
  });
426
436
  }
437
+ const constructorsByTypeName = {
438
+ null: () => new arrow_1.Null(),
439
+ binary: () => new arrow_1.Binary(),
440
+ utf8: () => new arrow_1.Utf8(),
441
+ bool: () => new arrow_1.Bool(),
442
+ int8: () => new arrow_1.Int8(),
443
+ int16: () => new arrow_1.Int16(),
444
+ int32: () => new arrow_1.Int32(),
445
+ int64: () => new arrow_1.Int64(),
446
+ uint8: () => new arrow_1.Uint8(),
447
+ uint16: () => new arrow_1.Uint16(),
448
+ uint32: () => new arrow_1.Uint32(),
449
+ uint64: () => new arrow_1.Uint64(),
450
+ float16: () => new arrow_1.Float16(),
451
+ float32: () => new arrow_1.Float32(),
452
+ float64: () => new arrow_1.Float64(),
453
+ datemillisecond: () => new arrow_1.DateMillisecond(),
454
+ dateday: () => new arrow_1.DateDay(),
455
+ timenanosecond: () => new arrow_1.TimeNanosecond(),
456
+ timemicrosecond: () => new arrow_1.TimeMicrosecond(),
457
+ timemillisecond: () => new arrow_1.TimeMillisecond(),
458
+ timesecond: () => new arrow_1.TimeSecond(),
459
+ intervaldaytime: () => new arrow_1.IntervalDayTime(),
460
+ intervalyearmonth: () => new arrow_1.IntervalYearMonth(),
461
+ durationnanosecond: () => new arrow_1.DurationNanosecond(),
462
+ durationmicrosecond: () => new arrow_1.DurationMicrosecond(),
463
+ durationmillisecond: () => new arrow_1.DurationMillisecond(),
464
+ durationsecond: () => new arrow_1.DurationSecond(),
465
+ };
466
+ function dataTypeFromName(typeName) {
467
+ const normalizedTypeName = typeName.toLowerCase();
468
+ const _constructor = constructorsByTypeName[normalizedTypeName];
469
+ if (!_constructor) {
470
+ throw new Error("Unrecognized type name in schema: " + typeName);
471
+ }
472
+ return _constructor();
473
+ }
package/package.json CHANGED
@@ -11,7 +11,7 @@
11
11
  "ann"
12
12
  ],
13
13
  "private": false,
14
- "version": "0.22.2-beta.1",
14
+ "version": "0.22.2",
15
15
  "main": "dist/index.js",
16
16
  "exports": {
17
17
  ".": "./dist/index.js",
@@ -100,14 +100,14 @@
100
100
  "reflect-metadata": "^0.2.2"
101
101
  },
102
102
  "optionalDependencies": {
103
- "@lancedb/lancedb-darwin-x64": "0.22.2-beta.1",
104
- "@lancedb/lancedb-darwin-arm64": "0.22.2-beta.1",
105
- "@lancedb/lancedb-linux-x64-gnu": "0.22.2-beta.1",
106
- "@lancedb/lancedb-linux-arm64-gnu": "0.22.2-beta.1",
107
- "@lancedb/lancedb-linux-x64-musl": "0.22.2-beta.1",
108
- "@lancedb/lancedb-linux-arm64-musl": "0.22.2-beta.1",
109
- "@lancedb/lancedb-win32-x64-msvc": "0.22.2-beta.1",
110
- "@lancedb/lancedb-win32-arm64-msvc": "0.22.2-beta.1"
103
+ "@lancedb/lancedb-darwin-x64": "0.22.2",
104
+ "@lancedb/lancedb-darwin-arm64": "0.22.2",
105
+ "@lancedb/lancedb-linux-x64-gnu": "0.22.2",
106
+ "@lancedb/lancedb-linux-arm64-gnu": "0.22.2",
107
+ "@lancedb/lancedb-linux-x64-musl": "0.22.2",
108
+ "@lancedb/lancedb-linux-arm64-musl": "0.22.2",
109
+ "@lancedb/lancedb-win32-x64-msvc": "0.22.2",
110
+ "@lancedb/lancedb-win32-arm64-msvc": "0.22.2"
111
111
  },
112
112
  "peerDependencies": {
113
113
  "apache-arrow": ">=15.0.0 <=18.1.0"