@lancedb/lancedb 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/arrow.d.ts CHANGED
@@ -37,10 +37,7 @@ export type TableLike = ArrowTable | {
37
37
  batches: RecordBatchLike[];
38
38
  };
39
39
  export type IntoVector = Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
40
- export type FloatLike = import("apache-arrow-13").Float | import("apache-arrow-14").Float | import("apache-arrow-15").Float | import("apache-arrow-16").Float | import("apache-arrow-17").Float;
41
- export type DataTypeLike = import("apache-arrow-13").DataType | import("apache-arrow-14").DataType | import("apache-arrow-15").DataType | import("apache-arrow-16").DataType | import("apache-arrow-17").DataType;
42
40
  export declare function isArrowTable(value: object): value is TableLike;
43
- export declare function isDataType(value: unknown): value is DataTypeLike;
44
41
  export declare function isNull(value: unknown): value is Null;
45
42
  export declare function isInt(value: unknown): value is Int;
46
43
  export declare function isFloat(value: unknown): value is Float;
package/dist/arrow.js CHANGED
@@ -27,7 +27,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
27
27
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
28
28
  };
29
29
  Object.defineProperty(exports, "__esModule", { value: true });
30
- exports.createEmptyTable = exports.fromTableToStreamBuffer = exports.fromDataToBuffer = exports.fromTableToBuffer = exports.fromRecordsToStreamBuffer = exports.fromRecordsToBuffer = exports.newVectorType = exports.convertToTable = exports.makeEmptyTable = exports.makeArrowTable = exports.MakeArrowTableOptions = exports.VectorColumnOptions = exports.isFixedSizeList = exports.isFixedSizeBinary = exports.isUnion = exports.isStruct = exports.isList = exports.isDuration = exports.isInterval = exports.isTimestamp = exports.isTime = exports.isDate = exports.isDecimal = exports.isBool = exports.isLargeUtf8 = exports.isUtf8 = exports.isLargeBinary = exports.isBinary = exports.isFloat = exports.isInt = exports.isNull = exports.isDataType = exports.isArrowTable = void 0;
30
+ exports.createEmptyTable = exports.fromTableToStreamBuffer = exports.fromDataToBuffer = exports.fromTableToBuffer = exports.fromRecordsToStreamBuffer = exports.fromRecordsToBuffer = exports.newVectorType = exports.convertToTable = exports.makeEmptyTable = exports.makeArrowTable = exports.MakeArrowTableOptions = exports.VectorColumnOptions = exports.isFixedSizeList = exports.isFixedSizeBinary = exports.isUnion = exports.isStruct = exports.isList = exports.isDuration = exports.isInterval = exports.isTimestamp = exports.isTime = exports.isDate = exports.isDecimal = exports.isBool = exports.isLargeUtf8 = exports.isUtf8 = exports.isLargeBinary = exports.isBinary = exports.isFloat = exports.isInt = exports.isNull = exports.isArrowTable = void 0;
31
31
  const apache_arrow_1 = require("apache-arrow");
32
32
  const registry_1 = require("./embedding/registry");
33
33
  const sanitize_1 = require("./sanitize");
@@ -38,31 +38,6 @@ function isArrowTable(value) {
38
38
  return "schema" in value && "batches" in value;
39
39
  }
40
40
  exports.isArrowTable = isArrowTable;
41
- function isDataType(value) {
42
- return (value instanceof apache_arrow_1.DataType ||
43
- apache_arrow_1.DataType.isNull(value) ||
44
- apache_arrow_1.DataType.isInt(value) ||
45
- apache_arrow_1.DataType.isFloat(value) ||
46
- apache_arrow_1.DataType.isBinary(value) ||
47
- apache_arrow_1.DataType.isLargeBinary(value) ||
48
- apache_arrow_1.DataType.isUtf8(value) ||
49
- apache_arrow_1.DataType.isLargeUtf8(value) ||
50
- apache_arrow_1.DataType.isBool(value) ||
51
- apache_arrow_1.DataType.isDecimal(value) ||
52
- apache_arrow_1.DataType.isDate(value) ||
53
- apache_arrow_1.DataType.isTime(value) ||
54
- apache_arrow_1.DataType.isTimestamp(value) ||
55
- apache_arrow_1.DataType.isInterval(value) ||
56
- apache_arrow_1.DataType.isDuration(value) ||
57
- apache_arrow_1.DataType.isList(value) ||
58
- apache_arrow_1.DataType.isStruct(value) ||
59
- apache_arrow_1.DataType.isUnion(value) ||
60
- apache_arrow_1.DataType.isFixedSizeBinary(value) ||
61
- apache_arrow_1.DataType.isFixedSizeList(value) ||
62
- apache_arrow_1.DataType.isMap(value) ||
63
- apache_arrow_1.DataType.isDictionary(value));
64
- }
65
- exports.isDataType = isDataType;
66
41
  function isNull(value) {
67
42
  return value instanceof apache_arrow_1.Null || apache_arrow_1.DataType.isNull(value);
68
43
  }
@@ -27,10 +27,19 @@ export interface CreateTableOptions {
27
27
  * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
28
28
  */
29
29
  storageOptions?: Record<string, string>;
30
+ /**
31
+ * The version of the data storage format to use.
32
+ *
33
+ * The default is `legacy`, which is Lance format v1.
34
+ * `stable` is the new format, which is Lance format v2.
35
+ */
36
+ dataStorageVersion?: string;
30
37
  /**
31
38
  * If true then data files will be written with the legacy format
32
39
  *
33
40
  * The default is true while the new format is in beta
41
+ *
42
+ * Deprecated.
34
43
  */
35
44
  useLegacyFormat?: boolean;
36
45
  schema?: SchemaLike;
@@ -72,7 +72,14 @@ class LocalConnection extends Connection {
72
72
  throw new Error("data is required");
73
73
  }
74
74
  const { buf, mode } = await table_1.Table.parseTableData(data, options);
75
- const innerTable = await this.inner.createTable(nameOrOptions, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
75
+ let dataStorageVersion = "legacy";
76
+ if (options?.dataStorageVersion !== undefined) {
77
+ dataStorageVersion = options.dataStorageVersion;
78
+ }
79
+ else if (options?.useLegacyFormat !== undefined) {
80
+ dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
81
+ }
82
+ const innerTable = await this.inner.createTable(nameOrOptions, buf, mode, cleanseStorageOptions(options?.storageOptions), dataStorageVersion);
76
83
  return new table_1.LocalTable(innerTable);
77
84
  }
78
85
  async createEmptyTable(name, schema, options) {
@@ -87,9 +94,16 @@ class LocalConnection extends Connection {
87
94
  const registry = (0, registry_1.getRegistry)();
88
95
  metadata = registry.getTableMetadata([embeddingFunction]);
89
96
  }
97
+ let dataStorageVersion = "legacy";
98
+ if (options?.dataStorageVersion !== undefined) {
99
+ dataStorageVersion = options.dataStorageVersion;
100
+ }
101
+ else if (options?.useLegacyFormat !== undefined) {
102
+ dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
103
+ }
90
104
  const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
91
105
  const buf = await (0, arrow_1.fromTableToBuffer)(table);
92
- const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
106
+ const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), dataStorageVersion);
93
107
  return new table_1.LocalTable(innerTable);
94
108
  }
95
109
  async dropTable(name) {
@@ -1,5 +1,5 @@
1
1
  import "reflect-metadata";
2
- import { DataType, DataTypeLike, FloatLike, type IntoVector } from "../arrow";
2
+ import { DataType, Float, type IntoVector } from "../arrow";
3
3
  /**
4
4
  * Options for a given embedding function
5
5
  */
@@ -52,7 +52,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
52
52
  *
53
53
  * @see {@link lancedb.LanceSchema}
54
54
  */
55
- sourceField(optionsOrDatatype: Partial<FieldOptions> | DataTypeLike): [DataTypeLike, Map<string, EmbeddingFunction>];
55
+ sourceField(optionsOrDatatype: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
56
56
  /**
57
57
  * vectorField is used in combination with `LanceSchema` to provide a declarative data model
58
58
  *
@@ -64,7 +64,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
64
64
  /** The number of dimensions of the embeddings */
65
65
  ndims(): number | undefined;
66
66
  /** The datatype of the embeddings */
67
- abstract embeddingDataType(): FloatLike;
67
+ abstract embeddingDataType(): Float;
68
68
  /**
69
69
  * Creates a vector representation for the given values.
70
70
  */
@@ -74,6 +74,16 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
74
74
  */
75
75
  computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>>;
76
76
  }
77
+ /**
78
+ * an abstract class for implementing embedding functions that take text as input
79
+ */
80
+ export declare abstract class TextEmbeddingFunction<M extends FunctionOptions = FunctionOptions> extends EmbeddingFunction<string, M> {
81
+ abstract generateEmbeddings(texts: string[], ...args: any[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
82
+ computeQueryEmbeddings(data: string): Promise<Awaited<IntoVector>>;
83
+ embeddingDataType(): Float;
84
+ sourceField(): [DataType, Map<string, EmbeddingFunction>];
85
+ computeSourceEmbeddings(data: string[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
86
+ }
77
87
  export interface FieldOptions<T extends DataType = DataType> {
78
88
  datatype: T;
79
89
  dims?: number;
@@ -13,7 +13,7 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.EmbeddingFunction = void 0;
16
+ exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
17
17
  require("reflect-metadata");
18
18
  const arrow_1 = require("../arrow");
19
19
  const sanitize_1 = require("../sanitize");
@@ -35,9 +35,9 @@ class EmbeddingFunction {
35
35
  * @see {@link lancedb.LanceSchema}
36
36
  */
37
37
  sourceField(optionsOrDatatype) {
38
- let datatype = (0, arrow_1.isDataType)(optionsOrDatatype)
39
- ? optionsOrDatatype
40
- : optionsOrDatatype?.datatype;
38
+ let datatype = "datatype" in optionsOrDatatype
39
+ ? optionsOrDatatype.datatype
40
+ : optionsOrDatatype;
41
41
  if (!datatype) {
42
42
  throw new Error("Datatype is required");
43
43
  }
@@ -58,8 +58,11 @@ class EmbeddingFunction {
58
58
  let vectorType;
59
59
  let dims = this.ndims();
60
60
  // `func.vectorField(new Float32())`
61
- if ((0, arrow_1.isDataType)(optionsOrDatatype)) {
62
- dtype = optionsOrDatatype;
61
+ if (optionsOrDatatype === undefined) {
62
+ dtype = new arrow_1.Float32();
63
+ }
64
+ else if (!("datatype" in optionsOrDatatype)) {
65
+ dtype = (0, sanitize_1.sanitizeType)(optionsOrDatatype);
63
66
  }
64
67
  else {
65
68
  // `func.vectorField({
@@ -67,7 +70,7 @@ class EmbeddingFunction {
67
70
  // dims: 10
68
71
  // })`
69
72
  dims = dims ?? optionsOrDatatype?.dims;
70
- dtype = optionsOrDatatype?.datatype;
73
+ dtype = (0, sanitize_1.sanitizeType)(optionsOrDatatype?.datatype);
71
74
  }
72
75
  if (dtype !== undefined) {
73
76
  // `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
@@ -110,3 +113,21 @@ class EmbeddingFunction {
110
113
  }
111
114
  }
112
115
  exports.EmbeddingFunction = EmbeddingFunction;
116
+ /**
117
+ * an abstract class for implementing embedding functions that take text as input
118
+ */
119
+ class TextEmbeddingFunction extends EmbeddingFunction {
120
+ async computeQueryEmbeddings(data) {
121
+ return this.generateEmbeddings([data]).then((data) => data[0]);
122
+ }
123
+ embeddingDataType() {
124
+ return new arrow_1.Float32();
125
+ }
126
+ sourceField() {
127
+ return super.sourceField(new arrow_1.Utf8());
128
+ }
129
+ computeSourceEmbeddings(data) {
130
+ return this.generateEmbeddings(data);
131
+ }
132
+ }
133
+ exports.TextEmbeddingFunction = TextEmbeddingFunction;
@@ -1,6 +1,6 @@
1
1
  import { Schema } from "../arrow";
2
2
  import { EmbeddingFunction } from "./embedding_function";
3
- export { EmbeddingFunction } from "./embedding_function";
3
+ export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
4
4
  export * from "./openai";
5
5
  export * from "./transformers";
6
6
  export * from "./registry";
@@ -27,13 +27,13 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
27
27
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
28
28
  };
29
29
  Object.defineProperty(exports, "__esModule", { value: true });
30
- exports.LanceSchema = exports.EmbeddingFunction = void 0;
30
+ exports.LanceSchema = exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
31
31
  const arrow_1 = require("../arrow");
32
- const arrow_2 = require("../arrow");
33
32
  const sanitize_1 = require("../sanitize");
34
33
  const registry_1 = require("./registry");
35
34
  var embedding_function_1 = require("./embedding_function");
36
35
  Object.defineProperty(exports, "EmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.EmbeddingFunction; } });
36
+ Object.defineProperty(exports, "TextEmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.TextEmbeddingFunction; } });
37
37
  // We need to explicitly export '*' so that the `register` decorator actually registers the class.
38
38
  __exportStar(require("./openai"), exports);
39
39
  __exportStar(require("./transformers"), exports);
@@ -64,14 +64,14 @@ function LanceSchema(fields) {
64
64
  const arrowFields = [];
65
65
  const embeddingFunctions = new Map();
66
66
  Object.entries(fields).forEach(([key, value]) => {
67
- if ((0, arrow_2.isDataType)(value)) {
68
- arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
69
- }
70
- else {
67
+ if (Array.isArray(value)) {
71
68
  const [dtype, metadata] = value;
72
69
  arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(dtype), true));
73
70
  parseEmbeddingFunctions(embeddingFunctions, key, metadata);
74
71
  }
72
+ else {
73
+ arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
74
+ }
75
75
  });
76
76
  const registry = (0, registry_1.getRegistry)();
77
77
  const metadata = registry.getTableMetadata(Array.from(embeddingFunctions.values()));
@@ -1,4 +1,4 @@
1
- import { type EmbeddingCreateParams } from "openai/resources";
1
+ import type { EmbeddingCreateParams } from "openai/resources/index";
2
2
  import { Float } from "../arrow";
3
3
  import { EmbeddingFunction } from "./embedding_function";
4
4
  export type OpenAIOptions = {
package/dist/index.d.ts CHANGED
@@ -6,7 +6,7 @@ export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } fro
6
6
  export { Connection, CreateTableOptions, TableNamesOptions, } from "./connection";
7
7
  export { ExecutableQuery, Query, QueryBase, VectorQuery, RecordBatchIterator, } from "./query";
8
8
  export { Index, IndexOptions, IvfPqOptions } from "./indices";
9
- export { Table, AddDataOptions, UpdateOptions } from "./table";
9
+ export { Table, AddDataOptions, UpdateOptions, OptimizeOptions } from "./table";
10
10
  export * as embedding from "./embedding";
11
11
  /**
12
12
  * Connect to a LanceDB instance at the given URI.
package/dist/sanitize.js CHANGED
@@ -215,8 +215,10 @@ function sanitizeType(typeLike) {
215
215
  if (typeof typeLike !== "object" || typeLike === null) {
216
216
  throw Error("Expected a Type but object was null/undefined");
217
217
  }
218
- if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) {
219
- throw Error("Expected a Type to have a typeId function");
218
+ if (!("typeId" in typeLike) ||
219
+ !(typeof typeLike.typeId !== "function" ||
220
+ typeof typeLike.typeId !== "number")) {
221
+ throw Error("Expected a Type to have a typeId property");
220
222
  }
221
223
  let typeId;
222
224
  if (typeof typeLike.typeId === "function") {
package/package.json CHANGED
@@ -10,7 +10,7 @@
10
10
  "vector database",
11
11
  "ann"
12
12
  ],
13
- "version": "0.8.0",
13
+ "version": "0.9.0",
14
14
  "main": "dist/index.js",
15
15
  "exports": {
16
16
  ".": "./dist/index.js",
@@ -81,6 +81,7 @@
81
81
  "docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
82
82
  "lint": "biome check . && biome format .",
83
83
  "lint-fix": "biome check --write . && biome format --write .",
84
+ "prepublishOnly": "napi prepublish -t npm",
84
85
  "test": "jest --verbose",
85
86
  "integration": "S3_TEST=1 npm run test",
86
87
  "universal": "napi universal",
@@ -91,11 +92,11 @@
91
92
  "reflect-metadata": "^0.2.2"
92
93
  },
93
94
  "optionalDependencies": {
94
- "@lancedb/lancedb-darwin-arm64": "0.8.0",
95
- "@lancedb/lancedb-linux-arm64-gnu": "0.8.0",
96
- "@lancedb/lancedb-darwin-x64": "0.8.0",
97
- "@lancedb/lancedb-linux-x64-gnu": "0.8.0",
98
- "@lancedb/lancedb-win32-x64-msvc": "0.8.0"
95
+ "@lancedb/lancedb-darwin-arm64": "0.9.0",
96
+ "@lancedb/lancedb-linux-arm64-gnu": "0.9.0",
97
+ "@lancedb/lancedb-darwin-x64": "0.9.0",
98
+ "@lancedb/lancedb-linux-x64-gnu": "0.9.0",
99
+ "@lancedb/lancedb-win32-x64-msvc": "0.9.0"
99
100
  },
100
101
  "peerDependencies": {
101
102
  "apache-arrow": ">=13.0.0 <=17.0.0"