@lancedb/lancedb 0.8.0 → 0.10.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/arrow.d.ts CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  import { Table as ArrowTable, Binary, BufferType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
3
2
  import { Buffers } from "apache-arrow/data";
4
3
  import { type EmbeddingFunction } from "./embedding/embedding_function";
@@ -37,10 +36,7 @@ export type TableLike = ArrowTable | {
37
36
  batches: RecordBatchLike[];
38
37
  };
39
38
  export type IntoVector = Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
40
- export type FloatLike = import("apache-arrow-13").Float | import("apache-arrow-14").Float | import("apache-arrow-15").Float | import("apache-arrow-16").Float | import("apache-arrow-17").Float;
41
- export type DataTypeLike = import("apache-arrow-13").DataType | import("apache-arrow-14").DataType | import("apache-arrow-15").DataType | import("apache-arrow-16").DataType | import("apache-arrow-17").DataType;
42
39
  export declare function isArrowTable(value: object): value is TableLike;
43
- export declare function isDataType(value: unknown): value is DataTypeLike;
44
40
  export declare function isNull(value: unknown): value is Null;
45
41
  export declare function isInt(value: unknown): value is Int;
46
42
  export declare function isFloat(value: unknown): value is Float;
package/dist/arrow.js CHANGED
@@ -27,7 +27,37 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
27
27
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
28
28
  };
29
29
  Object.defineProperty(exports, "__esModule", { value: true });
30
- exports.createEmptyTable = exports.fromTableToStreamBuffer = exports.fromDataToBuffer = exports.fromTableToBuffer = exports.fromRecordsToStreamBuffer = exports.fromRecordsToBuffer = exports.newVectorType = exports.convertToTable = exports.makeEmptyTable = exports.makeArrowTable = exports.MakeArrowTableOptions = exports.VectorColumnOptions = exports.isFixedSizeList = exports.isFixedSizeBinary = exports.isUnion = exports.isStruct = exports.isList = exports.isDuration = exports.isInterval = exports.isTimestamp = exports.isTime = exports.isDate = exports.isDecimal = exports.isBool = exports.isLargeUtf8 = exports.isUtf8 = exports.isLargeBinary = exports.isBinary = exports.isFloat = exports.isInt = exports.isNull = exports.isDataType = exports.isArrowTable = void 0;
30
+ exports.MakeArrowTableOptions = exports.VectorColumnOptions = void 0;
31
+ exports.isArrowTable = isArrowTable;
32
+ exports.isNull = isNull;
33
+ exports.isInt = isInt;
34
+ exports.isFloat = isFloat;
35
+ exports.isBinary = isBinary;
36
+ exports.isLargeBinary = isLargeBinary;
37
+ exports.isUtf8 = isUtf8;
38
+ exports.isLargeUtf8 = isLargeUtf8;
39
+ exports.isBool = isBool;
40
+ exports.isDecimal = isDecimal;
41
+ exports.isDate = isDate;
42
+ exports.isTime = isTime;
43
+ exports.isTimestamp = isTimestamp;
44
+ exports.isInterval = isInterval;
45
+ exports.isDuration = isDuration;
46
+ exports.isList = isList;
47
+ exports.isStruct = isStruct;
48
+ exports.isUnion = isUnion;
49
+ exports.isFixedSizeBinary = isFixedSizeBinary;
50
+ exports.isFixedSizeList = isFixedSizeList;
51
+ exports.makeArrowTable = makeArrowTable;
52
+ exports.makeEmptyTable = makeEmptyTable;
53
+ exports.convertToTable = convertToTable;
54
+ exports.newVectorType = newVectorType;
55
+ exports.fromRecordsToBuffer = fromRecordsToBuffer;
56
+ exports.fromRecordsToStreamBuffer = fromRecordsToStreamBuffer;
57
+ exports.fromTableToBuffer = fromTableToBuffer;
58
+ exports.fromDataToBuffer = fromDataToBuffer;
59
+ exports.fromTableToStreamBuffer = fromTableToStreamBuffer;
60
+ exports.createEmptyTable = createEmptyTable;
31
61
  const apache_arrow_1 = require("apache-arrow");
32
62
  const registry_1 = require("./embedding/registry");
33
63
  const sanitize_1 = require("./sanitize");
@@ -37,108 +67,63 @@ function isArrowTable(value) {
37
67
  return true;
38
68
  return "schema" in value && "batches" in value;
39
69
  }
40
- exports.isArrowTable = isArrowTable;
41
- function isDataType(value) {
42
- return (value instanceof apache_arrow_1.DataType ||
43
- apache_arrow_1.DataType.isNull(value) ||
44
- apache_arrow_1.DataType.isInt(value) ||
45
- apache_arrow_1.DataType.isFloat(value) ||
46
- apache_arrow_1.DataType.isBinary(value) ||
47
- apache_arrow_1.DataType.isLargeBinary(value) ||
48
- apache_arrow_1.DataType.isUtf8(value) ||
49
- apache_arrow_1.DataType.isLargeUtf8(value) ||
50
- apache_arrow_1.DataType.isBool(value) ||
51
- apache_arrow_1.DataType.isDecimal(value) ||
52
- apache_arrow_1.DataType.isDate(value) ||
53
- apache_arrow_1.DataType.isTime(value) ||
54
- apache_arrow_1.DataType.isTimestamp(value) ||
55
- apache_arrow_1.DataType.isInterval(value) ||
56
- apache_arrow_1.DataType.isDuration(value) ||
57
- apache_arrow_1.DataType.isList(value) ||
58
- apache_arrow_1.DataType.isStruct(value) ||
59
- apache_arrow_1.DataType.isUnion(value) ||
60
- apache_arrow_1.DataType.isFixedSizeBinary(value) ||
61
- apache_arrow_1.DataType.isFixedSizeList(value) ||
62
- apache_arrow_1.DataType.isMap(value) ||
63
- apache_arrow_1.DataType.isDictionary(value));
64
- }
65
- exports.isDataType = isDataType;
66
70
  function isNull(value) {
67
71
  return value instanceof apache_arrow_1.Null || apache_arrow_1.DataType.isNull(value);
68
72
  }
69
- exports.isNull = isNull;
70
73
  function isInt(value) {
71
74
  return value instanceof apache_arrow_1.Int || apache_arrow_1.DataType.isInt(value);
72
75
  }
73
- exports.isInt = isInt;
74
76
  function isFloat(value) {
75
77
  return value instanceof apache_arrow_1.Float || apache_arrow_1.DataType.isFloat(value);
76
78
  }
77
- exports.isFloat = isFloat;
78
79
  function isBinary(value) {
79
80
  return value instanceof apache_arrow_1.Binary || apache_arrow_1.DataType.isBinary(value);
80
81
  }
81
- exports.isBinary = isBinary;
82
82
  function isLargeBinary(value) {
83
83
  return value instanceof apache_arrow_1.LargeBinary || apache_arrow_1.DataType.isLargeBinary(value);
84
84
  }
85
- exports.isLargeBinary = isLargeBinary;
86
85
  function isUtf8(value) {
87
86
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isUtf8(value);
88
87
  }
89
- exports.isUtf8 = isUtf8;
90
88
  function isLargeUtf8(value) {
91
89
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isLargeUtf8(value);
92
90
  }
93
- exports.isLargeUtf8 = isLargeUtf8;
94
91
  function isBool(value) {
95
92
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isBool(value);
96
93
  }
97
- exports.isBool = isBool;
98
94
  function isDecimal(value) {
99
95
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isDecimal(value);
100
96
  }
101
- exports.isDecimal = isDecimal;
102
97
  function isDate(value) {
103
98
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isDate(value);
104
99
  }
105
- exports.isDate = isDate;
106
100
  function isTime(value) {
107
101
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isTime(value);
108
102
  }
109
- exports.isTime = isTime;
110
103
  function isTimestamp(value) {
111
104
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isTimestamp(value);
112
105
  }
113
- exports.isTimestamp = isTimestamp;
114
106
  function isInterval(value) {
115
107
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isInterval(value);
116
108
  }
117
- exports.isInterval = isInterval;
118
109
  function isDuration(value) {
119
110
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isDuration(value);
120
111
  }
121
- exports.isDuration = isDuration;
122
112
  function isList(value) {
123
113
  return value instanceof apache_arrow_1.List || apache_arrow_1.DataType.isList(value);
124
114
  }
125
- exports.isList = isList;
126
115
  function isStruct(value) {
127
116
  return value instanceof apache_arrow_1.Struct || apache_arrow_1.DataType.isStruct(value);
128
117
  }
129
- exports.isStruct = isStruct;
130
118
  function isUnion(value) {
131
119
  return value instanceof apache_arrow_1.Struct || apache_arrow_1.DataType.isUnion(value);
132
120
  }
133
- exports.isUnion = isUnion;
134
121
  function isFixedSizeBinary(value) {
135
122
  return value instanceof apache_arrow_1.FixedSizeBinary || apache_arrow_1.DataType.isFixedSizeBinary(value);
136
123
  }
137
- exports.isFixedSizeBinary = isFixedSizeBinary;
138
124
  function isFixedSizeList(value) {
139
125
  return value instanceof apache_arrow_1.FixedSizeList || apache_arrow_1.DataType.isFixedSizeList(value);
140
126
  }
141
- exports.isFixedSizeList = isFixedSizeList;
142
127
  /*
143
128
  * Options to control how a column should be converted to a vector array
144
129
  */
@@ -396,14 +381,12 @@ function makeArrowTable(data, options, metadata) {
396
381
  }
397
382
  return tbl;
398
383
  }
399
- exports.makeArrowTable = makeArrowTable;
400
384
  /**
401
385
  * Create an empty Arrow table with the provided schema
402
386
  */
403
387
  function makeEmptyTable(schema, metadata) {
404
388
  return makeArrowTable([], { schema }, metadata);
405
389
  }
406
- exports.makeEmptyTable = makeEmptyTable;
407
390
  /**
408
391
  * Helper function to convert Array<Array<any>> to a variable sized list array
409
392
  */
@@ -599,7 +582,6 @@ async function convertToTable(data, embeddings, makeTableOptions) {
599
582
  const table = makeArrowTable(data, makeTableOptions);
600
583
  return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
601
584
  }
602
- exports.convertToTable = convertToTable;
603
585
  /** Creates the Arrow Type for a Vector column with dimension `dim` */
604
586
  function newVectorType(dim, innerType) {
605
587
  // in Lance we always default to have the elements nullable, so we need to set it to true
@@ -607,7 +589,6 @@ function newVectorType(dim, innerType) {
607
589
  const children = new apache_arrow_1.Field("item", (0, sanitize_1.sanitizeType)(innerType), true);
608
590
  return new apache_arrow_1.FixedSizeList(dim, children);
609
591
  }
610
- exports.newVectorType = newVectorType;
611
592
  /**
612
593
  * Serialize an Array of records into a buffer using the Arrow IPC File serialization
613
594
  *
@@ -623,7 +604,6 @@ async function fromRecordsToBuffer(data, embeddings, schema) {
623
604
  const writer = apache_arrow_1.RecordBatchFileWriter.writeAll(table);
624
605
  return Buffer.from(await writer.toUint8Array());
625
606
  }
626
- exports.fromRecordsToBuffer = fromRecordsToBuffer;
627
607
  /**
628
608
  * Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
629
609
  *
@@ -639,7 +619,6 @@ async function fromRecordsToStreamBuffer(data, embeddings, schema) {
639
619
  const writer = apache_arrow_1.RecordBatchStreamWriter.writeAll(table);
640
620
  return Buffer.from(await writer.toUint8Array());
641
621
  }
642
- exports.fromRecordsToStreamBuffer = fromRecordsToStreamBuffer;
643
622
  /**
644
623
  * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
645
624
  *
@@ -656,7 +635,6 @@ async function fromTableToBuffer(table, embeddings, schema) {
656
635
  const writer = apache_arrow_1.RecordBatchFileWriter.writeAll(tableWithEmbeddings);
657
636
  return Buffer.from(await writer.toUint8Array());
658
637
  }
659
- exports.fromTableToBuffer = fromTableToBuffer;
660
638
  /**
661
639
  * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
662
640
  *
@@ -677,7 +655,6 @@ async function fromDataToBuffer(data, embeddings, schema) {
677
655
  return fromTableToBuffer(table);
678
656
  }
679
657
  }
680
- exports.fromDataToBuffer = fromDataToBuffer;
681
658
  /**
682
659
  * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
683
660
  *
@@ -691,7 +668,6 @@ async function fromTableToStreamBuffer(table, embeddings, schema) {
691
668
  const writer = apache_arrow_1.RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
692
669
  return Buffer.from(await writer.toUint8Array());
693
670
  }
694
- exports.fromTableToStreamBuffer = fromTableToStreamBuffer;
695
671
  /**
696
672
  * Reorder the columns in `batch` so that they agree with the field order in `schema`
697
673
  */
@@ -725,7 +701,6 @@ function alignTable(table, schema) {
725
701
  function createEmptyTable(schema) {
726
702
  return new apache_arrow_1.Table((0, sanitize_1.sanitizeSchema)(schema));
727
703
  }
728
- exports.createEmptyTable = createEmptyTable;
729
704
  function validateSchemaEmbeddings(schema, data, embeddings) {
730
705
  const fields = [];
731
706
  const missingEmbeddingFields = [];
@@ -27,10 +27,19 @@ export interface CreateTableOptions {
27
27
  * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
28
28
  */
29
29
  storageOptions?: Record<string, string>;
30
+ /**
31
+ * The version of the data storage format to use.
32
+ *
33
+ * The default is `legacy`, which is Lance format v1.
34
+ * `stable` is the new format, which is Lance format v2.
35
+ */
36
+ dataStorageVersion?: string;
30
37
  /**
31
38
  * If true then data files will be written with the legacy format
32
39
  *
33
40
  * The default is true while the new format is in beta
41
+ *
42
+ * Deprecated.
34
43
  */
35
44
  useLegacyFormat?: boolean;
36
45
  schema?: SchemaLike;
@@ -13,7 +13,8 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.cleanseStorageOptions = exports.LocalConnection = exports.Connection = void 0;
16
+ exports.LocalConnection = exports.Connection = void 0;
17
+ exports.cleanseStorageOptions = cleanseStorageOptions;
17
18
  const arrow_1 = require("./arrow");
18
19
  const registry_1 = require("./embedding/registry");
19
20
  const table_1 = require("./table");
@@ -72,7 +73,14 @@ class LocalConnection extends Connection {
72
73
  throw new Error("data is required");
73
74
  }
74
75
  const { buf, mode } = await table_1.Table.parseTableData(data, options);
75
- const innerTable = await this.inner.createTable(nameOrOptions, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
76
+ let dataStorageVersion = "legacy";
77
+ if (options?.dataStorageVersion !== undefined) {
78
+ dataStorageVersion = options.dataStorageVersion;
79
+ }
80
+ else if (options?.useLegacyFormat !== undefined) {
81
+ dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
82
+ }
83
+ const innerTable = await this.inner.createTable(nameOrOptions, buf, mode, cleanseStorageOptions(options?.storageOptions), dataStorageVersion);
76
84
  return new table_1.LocalTable(innerTable);
77
85
  }
78
86
  async createEmptyTable(name, schema, options) {
@@ -87,9 +95,16 @@ class LocalConnection extends Connection {
87
95
  const registry = (0, registry_1.getRegistry)();
88
96
  metadata = registry.getTableMetadata([embeddingFunction]);
89
97
  }
98
+ let dataStorageVersion = "legacy";
99
+ if (options?.dataStorageVersion !== undefined) {
100
+ dataStorageVersion = options.dataStorageVersion;
101
+ }
102
+ else if (options?.useLegacyFormat !== undefined) {
103
+ dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
104
+ }
90
105
  const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
91
106
  const buf = await (0, arrow_1.fromTableToBuffer)(table);
92
- const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
107
+ const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), dataStorageVersion);
93
108
  return new table_1.LocalTable(innerTable);
94
109
  }
95
110
  async dropTable(name) {
@@ -113,7 +128,6 @@ function cleanseStorageOptions(options) {
113
128
  }
114
129
  return result;
115
130
  }
116
- exports.cleanseStorageOptions = cleanseStorageOptions;
117
131
  /**
118
132
  * Convert a string to snake case. It might already be snake case, in which case it is
119
133
  * returned unchanged.
@@ -1,5 +1,5 @@
1
1
  import "reflect-metadata";
2
- import { DataType, DataTypeLike, FloatLike, type IntoVector } from "../arrow";
2
+ import { DataType, Float, type IntoVector } from "../arrow";
3
3
  /**
4
4
  * Options for a given embedding function
5
5
  */
@@ -52,7 +52,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
52
52
  *
53
53
  * @see {@link lancedb.LanceSchema}
54
54
  */
55
- sourceField(optionsOrDatatype: Partial<FieldOptions> | DataTypeLike): [DataTypeLike, Map<string, EmbeddingFunction>];
55
+ sourceField(optionsOrDatatype: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
56
56
  /**
57
57
  * vectorField is used in combination with `LanceSchema` to provide a declarative data model
58
58
  *
@@ -64,7 +64,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
64
64
  /** The number of dimensions of the embeddings */
65
65
  ndims(): number | undefined;
66
66
  /** The datatype of the embeddings */
67
- abstract embeddingDataType(): FloatLike;
67
+ abstract embeddingDataType(): Float;
68
68
  /**
69
69
  * Creates a vector representation for the given values.
70
70
  */
@@ -74,6 +74,16 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
74
74
  */
75
75
  computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>>;
76
76
  }
77
+ /**
78
+ * an abstract class for implementing embedding functions that take text as input
79
+ */
80
+ export declare abstract class TextEmbeddingFunction<M extends FunctionOptions = FunctionOptions> extends EmbeddingFunction<string, M> {
81
+ abstract generateEmbeddings(texts: string[], ...args: any[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
82
+ computeQueryEmbeddings(data: string): Promise<Awaited<IntoVector>>;
83
+ embeddingDataType(): Float;
84
+ sourceField(): [DataType, Map<string, EmbeddingFunction>];
85
+ computeSourceEmbeddings(data: string[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
86
+ }
77
87
  export interface FieldOptions<T extends DataType = DataType> {
78
88
  datatype: T;
79
89
  dims?: number;
@@ -13,7 +13,7 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.EmbeddingFunction = void 0;
16
+ exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
17
17
  require("reflect-metadata");
18
18
  const arrow_1 = require("../arrow");
19
19
  const sanitize_1 = require("../sanitize");
@@ -35,9 +35,9 @@ class EmbeddingFunction {
35
35
  * @see {@link lancedb.LanceSchema}
36
36
  */
37
37
  sourceField(optionsOrDatatype) {
38
- let datatype = (0, arrow_1.isDataType)(optionsOrDatatype)
39
- ? optionsOrDatatype
40
- : optionsOrDatatype?.datatype;
38
+ let datatype = "datatype" in optionsOrDatatype
39
+ ? optionsOrDatatype.datatype
40
+ : optionsOrDatatype;
41
41
  if (!datatype) {
42
42
  throw new Error("Datatype is required");
43
43
  }
@@ -58,8 +58,11 @@ class EmbeddingFunction {
58
58
  let vectorType;
59
59
  let dims = this.ndims();
60
60
  // `func.vectorField(new Float32())`
61
- if ((0, arrow_1.isDataType)(optionsOrDatatype)) {
62
- dtype = optionsOrDatatype;
61
+ if (optionsOrDatatype === undefined) {
62
+ dtype = new arrow_1.Float32();
63
+ }
64
+ else if (!("datatype" in optionsOrDatatype)) {
65
+ dtype = (0, sanitize_1.sanitizeType)(optionsOrDatatype);
63
66
  }
64
67
  else {
65
68
  // `func.vectorField({
@@ -67,7 +70,7 @@ class EmbeddingFunction {
67
70
  // dims: 10
68
71
  // })`
69
72
  dims = dims ?? optionsOrDatatype?.dims;
70
- dtype = optionsOrDatatype?.datatype;
73
+ dtype = (0, sanitize_1.sanitizeType)(optionsOrDatatype?.datatype);
71
74
  }
72
75
  if (dtype !== undefined) {
73
76
  // `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
@@ -110,3 +113,21 @@ class EmbeddingFunction {
110
113
  }
111
114
  }
112
115
  exports.EmbeddingFunction = EmbeddingFunction;
116
+ /**
117
+ * an abstract class for implementing embedding functions that take text as input
118
+ */
119
+ class TextEmbeddingFunction extends EmbeddingFunction {
120
+ async computeQueryEmbeddings(data) {
121
+ return this.generateEmbeddings([data]).then((data) => data[0]);
122
+ }
123
+ embeddingDataType() {
124
+ return new arrow_1.Float32();
125
+ }
126
+ sourceField() {
127
+ return super.sourceField(new arrow_1.Utf8());
128
+ }
129
+ computeSourceEmbeddings(data) {
130
+ return this.generateEmbeddings(data);
131
+ }
132
+ }
133
+ exports.TextEmbeddingFunction = TextEmbeddingFunction;
@@ -1,6 +1,6 @@
1
1
  import { Schema } from "../arrow";
2
2
  import { EmbeddingFunction } from "./embedding_function";
3
- export { EmbeddingFunction } from "./embedding_function";
3
+ export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
4
4
  export * from "./openai";
5
5
  export * from "./transformers";
6
6
  export * from "./registry";
@@ -27,13 +27,14 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
27
27
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
28
28
  };
29
29
  Object.defineProperty(exports, "__esModule", { value: true });
30
- exports.LanceSchema = exports.EmbeddingFunction = void 0;
30
+ exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
31
+ exports.LanceSchema = LanceSchema;
31
32
  const arrow_1 = require("../arrow");
32
- const arrow_2 = require("../arrow");
33
33
  const sanitize_1 = require("../sanitize");
34
34
  const registry_1 = require("./registry");
35
35
  var embedding_function_1 = require("./embedding_function");
36
36
  Object.defineProperty(exports, "EmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.EmbeddingFunction; } });
37
+ Object.defineProperty(exports, "TextEmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.TextEmbeddingFunction; } });
37
38
  // We need to explicitly export '*' so that the `register` decorator actually registers the class.
38
39
  __exportStar(require("./openai"), exports);
39
40
  __exportStar(require("./transformers"), exports);
@@ -64,21 +65,20 @@ function LanceSchema(fields) {
64
65
  const arrowFields = [];
65
66
  const embeddingFunctions = new Map();
66
67
  Object.entries(fields).forEach(([key, value]) => {
67
- if ((0, arrow_2.isDataType)(value)) {
68
- arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
69
- }
70
- else {
68
+ if (Array.isArray(value)) {
71
69
  const [dtype, metadata] = value;
72
70
  arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(dtype), true));
73
71
  parseEmbeddingFunctions(embeddingFunctions, key, metadata);
74
72
  }
73
+ else {
74
+ arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
75
+ }
75
76
  });
76
77
  const registry = (0, registry_1.getRegistry)();
77
78
  const metadata = registry.getTableMetadata(Array.from(embeddingFunctions.values()));
78
79
  const schema = new arrow_1.Schema(arrowFields, metadata);
79
80
  return schema;
80
81
  }
81
- exports.LanceSchema = LanceSchema;
82
82
  function parseEmbeddingFunctions(embeddingFunctions, key, metadata) {
83
83
  if (metadata.has("source_column_for")) {
84
84
  const embedFunction = metadata.get("source_column_for");
@@ -1,4 +1,4 @@
1
- import { type EmbeddingCreateParams } from "openai/resources";
1
+ import type { EmbeddingCreateParams } from "openai/resources/index";
2
2
  import { Float } from "../arrow";
3
3
  import { EmbeddingFunction } from "./embedding_function";
4
4
  export type OpenAIOptions = {
@@ -16,6 +16,10 @@ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
16
16
  */
17
17
  export declare class EmbeddingFunctionRegistry {
18
18
  #private;
19
+ /**
20
+ * Get the number of registered functions
21
+ */
22
+ length(): number;
19
23
  /**
20
24
  * Register an embedding function
21
25
  * @param name The name of the function
@@ -13,7 +13,9 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.getRegistry = exports.register = exports.EmbeddingFunctionRegistry = void 0;
16
+ exports.EmbeddingFunctionRegistry = void 0;
17
+ exports.register = register;
18
+ exports.getRegistry = getRegistry;
17
19
  require("reflect-metadata");
18
20
  /**
19
21
  * This is a singleton class used to register embedding functions
@@ -23,6 +25,12 @@ require("reflect-metadata");
23
25
  */
24
26
  class EmbeddingFunctionRegistry {
25
27
  #functions = new Map();
28
+ /**
29
+ * Get the number of registered functions
30
+ */
31
+ length() {
32
+ return this.#functions.size;
33
+ }
26
34
  /**
27
35
  * Register an embedding function
28
36
  * @param name The name of the function
@@ -130,7 +138,6 @@ const _REGISTRY = new EmbeddingFunctionRegistry();
130
138
  function register(name) {
131
139
  return _REGISTRY.register(name);
132
140
  }
133
- exports.register = register;
134
141
  /**
135
142
  * Utility function to get the global instance of the registry
136
143
  * @returns `EmbeddingFunctionRegistry` The global instance of the registry
@@ -142,4 +149,3 @@ exports.register = register;
142
149
  function getRegistry() {
143
150
  return _REGISTRY;
144
151
  }
145
- exports.getRegistry = getRegistry;
package/dist/index.d.ts CHANGED
@@ -6,7 +6,7 @@ export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } fro
6
6
  export { Connection, CreateTableOptions, TableNamesOptions, } from "./connection";
7
7
  export { ExecutableQuery, Query, QueryBase, VectorQuery, RecordBatchIterator, } from "./query";
8
8
  export { Index, IndexOptions, IvfPqOptions } from "./indices";
9
- export { Table, AddDataOptions, UpdateOptions } from "./table";
9
+ export { Table, AddDataOptions, UpdateOptions, OptimizeOptions } from "./table";
10
10
  export * as embedding from "./embedding";
11
11
  /**
12
12
  * Connect to a LanceDB instance at the given URI.
package/dist/index.js CHANGED
@@ -13,7 +13,8 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.connect = exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
16
+ exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
17
+ exports.connect = connect;
17
18
  const connection_1 = require("./connection");
18
19
  const native_js_1 = require("./native.js");
19
20
  const remote_1 = require("./remote");
@@ -54,4 +55,3 @@ async function connect(uriOrOptions, opts = {}) {
54
55
  const nativeConn = await native_js_1.Connection.new(uri, opts);
55
56
  return new connection_1.LocalConnection(nativeConn);
56
57
  }
57
- exports.connect = connect;
package/dist/indices.d.ts CHANGED
@@ -138,6 +138,36 @@ export declare class Index {
138
138
  * block size may be added in the future.
139
139
  */
140
140
  static btree(): Index;
141
+ /**
142
+ * Create a bitmap index.
143
+ *
144
+ * A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
145
+ *
146
+ * This index works best for low-cardinality columns, where the number of unique values
147
+ * is small (i.e., less than a few hundreds).
148
+ */
149
+ static bitmap(): Index;
150
+ /**
151
+ * Create a label list index.
152
+ *
153
+ * LabelList index is a scalar index that can be used on `List<T>` columns to
154
+ * support queries with `array_contains_all` and `array_contains_any`
155
+ * using an underlying bitmap index.
156
+ */
157
+ static labelList(): Index;
158
+ /**
159
+ * Create a full text search index
160
+ *
161
+ * A full text search index is an index on a string column, so that you can conduct full
162
+ * text searches on the column.
163
+ *
164
+ * The results of a full text search are ordered by relevance measured by BM25.
165
+ *
166
+ * You can combine filters with full text search.
167
+ *
168
+ * For now, the full text search index only supports English, and doesn't support phrase search.
169
+ */
170
+ static fts(): Index;
141
171
  }
142
172
  export interface IndexOptions {
143
173
  /**
package/dist/indices.js CHANGED
@@ -67,5 +67,41 @@ class Index {
67
67
  static btree() {
68
68
  return new Index(native_1.Index.btree());
69
69
  }
70
+ /**
71
+ * Create a bitmap index.
72
+ *
73
+ * A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
74
+ *
75
+ * This index works best for low-cardinality columns, where the number of unique values
76
+ * is small (i.e., less than a few hundreds).
77
+ */
78
+ static bitmap() {
79
+ return new Index(native_1.Index.bitmap());
80
+ }
81
+ /**
82
+ * Create a label list index.
83
+ *
84
+ * LabelList index is a scalar index that can be used on `List<T>` columns to
85
+ * support queries with `array_contains_all` and `array_contains_any`
86
+ * using an underlying bitmap index.
87
+ */
88
+ static labelList() {
89
+ return new Index(native_1.Index.labelList());
90
+ }
91
+ /**
92
+ * Create a full text search index
93
+ *
94
+ * A full text search index is an index on a string column, so that you can conduct full
95
+ * text searches on the column.
96
+ *
97
+ * The results of a full text search are ordered by relevance measured by BM25.
98
+ *
99
+ * You can combine filters with full text search.
100
+ *
101
+ * For now, the full text search index only supports English, and doesn't support phrase search.
102
+ */
103
+ static fts() {
104
+ return new Index(native_1.Index.fts());
105
+ }
70
106
  }
71
107
  exports.Index = Index;
package/dist/query.d.ts CHANGED
@@ -19,6 +19,18 @@ export interface QueryExecutionOptions {
19
19
  */
20
20
  maxBatchLength?: number;
21
21
  }
22
+ /**
23
+ * Options that control the behavior of a full text search
24
+ */
25
+ export interface FullTextSearchOptions {
26
+ /**
27
+ * The columns to search
28
+ *
29
+ * If not specified, all indexed columns will be searched.
30
+ * For now, only one column can be searched.
31
+ */
32
+ columns?: string | string[];
33
+ }
22
34
  /** Common methods supported by all query types */
23
35
  export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery> implements AsyncIterable<RecordBatch> {
24
36
  protected inner: NativeQueryType | Promise<NativeQueryType>;
@@ -43,6 +55,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
43
55
  * @deprecated Use `where` instead
44
56
  */
45
57
  filter(predicate: string): this;
58
+ fullTextSearch(query: string, options?: Partial<FullTextSearchOptions>): this;
46
59
  /**
47
60
  * Return only the specified columns.
48
61
  *
package/dist/query.js CHANGED
@@ -98,6 +98,19 @@ class QueryBase {
98
98
  filter(predicate) {
99
99
  return this.where(predicate);
100
100
  }
101
+ fullTextSearch(query, options) {
102
+ let columns = null;
103
+ if (options) {
104
+ if (typeof options.columns === "string") {
105
+ columns = [options.columns];
106
+ }
107
+ else if (Array.isArray(options.columns)) {
108
+ columns = options.columns;
109
+ }
110
+ }
111
+ this.doCall((inner) => inner.fullTextSearch(query, columns));
112
+ return this;
113
+ }
101
114
  /**
102
115
  * Return only the specified columns.
103
116
  *
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  import { type AxiosResponse } from "axios";
3
2
  import { Table as ArrowTable } from "../arrow";
4
3
  import { VectorQuery } from "../query";
@@ -12,7 +12,7 @@ class RemoteConnection extends connection_1.Connection {
12
12
  #apiKey;
13
13
  #region;
14
14
  #client;
15
- #tableCache = new util_1.TTLCache(300000);
15
+ #tableCache = new util_1.TTLCache(300_000);
16
16
  constructor(url, { apiKey, region, hostOverride, timeout }) {
17
17
  super();
18
18
  apiKey = apiKey ?? process.env.LANCEDB_API_KEY;
package/dist/sanitize.js CHANGED
@@ -13,7 +13,28 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.sanitizeTable = exports.sanitizeSchema = exports.sanitizeField = exports.sanitizeType = exports.sanitizeDictionary = exports.sanitizeDuration = exports.sanitizeMap = exports.sanitizeFixedSizeList = exports.sanitizeFixedSizeBinary = exports.sanitizeTypedUnion = exports.sanitizeUnion = exports.sanitizeStruct = exports.sanitizeList = exports.sanitizeInterval = exports.sanitizeTypedTimestamp = exports.sanitizeTimestamp = exports.sanitizeTime = exports.sanitizeDate = exports.sanitizeDecimal = exports.sanitizeFloat = exports.sanitizeInt = exports.sanitizeMetadata = void 0;
16
+ exports.sanitizeMetadata = sanitizeMetadata;
17
+ exports.sanitizeInt = sanitizeInt;
18
+ exports.sanitizeFloat = sanitizeFloat;
19
+ exports.sanitizeDecimal = sanitizeDecimal;
20
+ exports.sanitizeDate = sanitizeDate;
21
+ exports.sanitizeTime = sanitizeTime;
22
+ exports.sanitizeTimestamp = sanitizeTimestamp;
23
+ exports.sanitizeTypedTimestamp = sanitizeTypedTimestamp;
24
+ exports.sanitizeInterval = sanitizeInterval;
25
+ exports.sanitizeList = sanitizeList;
26
+ exports.sanitizeStruct = sanitizeStruct;
27
+ exports.sanitizeUnion = sanitizeUnion;
28
+ exports.sanitizeTypedUnion = sanitizeTypedUnion;
29
+ exports.sanitizeFixedSizeBinary = sanitizeFixedSizeBinary;
30
+ exports.sanitizeFixedSizeList = sanitizeFixedSizeList;
31
+ exports.sanitizeMap = sanitizeMap;
32
+ exports.sanitizeDuration = sanitizeDuration;
33
+ exports.sanitizeDictionary = sanitizeDictionary;
34
+ exports.sanitizeType = sanitizeType;
35
+ exports.sanitizeField = sanitizeField;
36
+ exports.sanitizeSchema = sanitizeSchema;
37
+ exports.sanitizeTable = sanitizeTable;
17
38
  // The utilities in this file help sanitize data from the user's arrow
18
39
  // library into the types expected by vectordb's arrow library. Node
19
40
  // generally allows for mulitple versions of the same library (and sometimes
@@ -37,7 +58,6 @@ function sanitizeMetadata(metadataLike) {
37
58
  }
38
59
  return metadataLike;
39
60
  }
40
- exports.sanitizeMetadata = sanitizeMetadata;
41
61
  function sanitizeInt(typeLike) {
42
62
  if (!("bitWidth" in typeLike) ||
43
63
  typeof typeLike.bitWidth !== "number" ||
@@ -47,14 +67,12 @@ function sanitizeInt(typeLike) {
47
67
  }
48
68
  return new arrow_1.Int(typeLike.isSigned, typeLike.bitWidth);
49
69
  }
50
- exports.sanitizeInt = sanitizeInt;
51
70
  function sanitizeFloat(typeLike) {
52
71
  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
53
72
  throw Error("Expected a Float Type to have a `precision` property");
54
73
  }
55
74
  return new arrow_1.Float(typeLike.precision);
56
75
  }
57
- exports.sanitizeFloat = sanitizeFloat;
58
76
  function sanitizeDecimal(typeLike) {
59
77
  if (!("scale" in typeLike) ||
60
78
  typeof typeLike.scale !== "number" ||
@@ -66,14 +84,12 @@ function sanitizeDecimal(typeLike) {
66
84
  }
67
85
  return new arrow_1.Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
68
86
  }
69
- exports.sanitizeDecimal = sanitizeDecimal;
70
87
  function sanitizeDate(typeLike) {
71
88
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
72
89
  throw Error("Expected a Date type to have a `unit` property");
73
90
  }
74
91
  return new arrow_1.Date_(typeLike.unit);
75
92
  }
76
- exports.sanitizeDate = sanitizeDate;
77
93
  function sanitizeTime(typeLike) {
78
94
  if (!("unit" in typeLike) ||
79
95
  typeof typeLike.unit !== "number" ||
@@ -83,7 +99,6 @@ function sanitizeTime(typeLike) {
83
99
  }
84
100
  return new arrow_1.Time(typeLike.unit, typeLike.bitWidth);
85
101
  }
86
- exports.sanitizeTime = sanitizeTime;
87
102
  function sanitizeTimestamp(typeLike) {
88
103
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
89
104
  throw Error("Expected a Timestamp type to have a `unit` property");
@@ -94,7 +109,6 @@ function sanitizeTimestamp(typeLike) {
94
109
  }
95
110
  return new arrow_1.Timestamp(typeLike.unit, timezone);
96
111
  }
97
- exports.sanitizeTimestamp = sanitizeTimestamp;
98
112
  function sanitizeTypedTimestamp(typeLike,
99
113
  // eslint-disable-next-line @typescript-eslint/naming-convention
100
114
  Datatype) {
@@ -104,14 +118,12 @@ Datatype) {
104
118
  }
105
119
  return new Datatype(timezone);
106
120
  }
107
- exports.sanitizeTypedTimestamp = sanitizeTypedTimestamp;
108
121
  function sanitizeInterval(typeLike) {
109
122
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
110
123
  throw Error("Expected an Interval type to have a `unit` property");
111
124
  }
112
125
  return new arrow_1.Interval(typeLike.unit);
113
126
  }
114
- exports.sanitizeInterval = sanitizeInterval;
115
127
  function sanitizeList(typeLike) {
116
128
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
117
129
  throw Error("Expected a List type to have an array-like `children` property");
@@ -121,14 +133,12 @@ function sanitizeList(typeLike) {
121
133
  }
122
134
  return new arrow_1.List(sanitizeField(typeLike.children[0]));
123
135
  }
124
- exports.sanitizeList = sanitizeList;
125
136
  function sanitizeStruct(typeLike) {
126
137
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
127
138
  throw Error("Expected a Struct type to have an array-like `children` property");
128
139
  }
129
140
  return new arrow_1.Struct(typeLike.children.map((child) => sanitizeField(child)));
130
141
  }
131
- exports.sanitizeStruct = sanitizeStruct;
132
142
  function sanitizeUnion(typeLike) {
133
143
  if (!("typeIds" in typeLike) ||
134
144
  !("mode" in typeLike) ||
@@ -142,7 +152,6 @@ function sanitizeUnion(typeLike) {
142
152
  // biome-ignore lint/suspicious/noExplicitAny: skip
143
153
  typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
144
154
  }
145
- exports.sanitizeUnion = sanitizeUnion;
146
155
  function sanitizeTypedUnion(typeLike,
147
156
  // eslint-disable-next-line @typescript-eslint/naming-convention
148
157
  UnionType) {
@@ -154,14 +163,12 @@ UnionType) {
154
163
  }
155
164
  return new UnionType(typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
156
165
  }
157
- exports.sanitizeTypedUnion = sanitizeTypedUnion;
158
166
  function sanitizeFixedSizeBinary(typeLike) {
159
167
  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
160
168
  throw Error("Expected a FixedSizeBinary type to have a `byteWidth` property");
161
169
  }
162
170
  return new arrow_1.FixedSizeBinary(typeLike.byteWidth);
163
171
  }
164
- exports.sanitizeFixedSizeBinary = sanitizeFixedSizeBinary;
165
172
  function sanitizeFixedSizeList(typeLike) {
166
173
  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
167
174
  throw Error("Expected a FixedSizeList type to have a `listSize` property");
@@ -174,7 +181,6 @@ function sanitizeFixedSizeList(typeLike) {
174
181
  }
175
182
  return new arrow_1.FixedSizeList(typeLike.listSize, sanitizeField(typeLike.children[0]));
176
183
  }
177
- exports.sanitizeFixedSizeList = sanitizeFixedSizeList;
178
184
  function sanitizeMap(typeLike) {
179
185
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
180
186
  throw Error("Expected a Map type to have an array-like `children` property");
@@ -186,14 +192,12 @@ function sanitizeMap(typeLike) {
186
192
  // biome-ignore lint/suspicious/noExplicitAny: skip
187
193
  typeLike.children.map((field) => sanitizeField(field)), typeLike.keysSorted);
188
194
  }
189
- exports.sanitizeMap = sanitizeMap;
190
195
  function sanitizeDuration(typeLike) {
191
196
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
192
197
  throw Error("Expected a Duration type to have a `unit` property");
193
198
  }
194
199
  return new arrow_1.Duration(typeLike.unit);
195
200
  }
196
- exports.sanitizeDuration = sanitizeDuration;
197
201
  function sanitizeDictionary(typeLike) {
198
202
  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
199
203
  throw Error("Expected a Dictionary type to have an `id` property");
@@ -209,14 +213,15 @@ function sanitizeDictionary(typeLike) {
209
213
  }
210
214
  return new arrow_1.Dictionary(sanitizeType(typeLike.dictionary), sanitizeType(typeLike.indices), typeLike.id, typeLike.isOrdered);
211
215
  }
212
- exports.sanitizeDictionary = sanitizeDictionary;
213
216
  // biome-ignore lint/suspicious/noExplicitAny: skip
214
217
  function sanitizeType(typeLike) {
215
218
  if (typeof typeLike !== "object" || typeLike === null) {
216
219
  throw Error("Expected a Type but object was null/undefined");
217
220
  }
218
- if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) {
219
- throw Error("Expected a Type to have a typeId function");
221
+ if (!("typeId" in typeLike) ||
222
+ !(typeof typeLike.typeId !== "function" ||
223
+ typeof typeLike.typeId !== "number")) {
224
+ throw Error("Expected a Type to have a typeId property");
220
225
  }
221
226
  let typeId;
222
227
  if (typeof typeLike.typeId === "function") {
@@ -331,7 +336,6 @@ function sanitizeType(typeLike) {
331
336
  throw new Error("Unrecoginized type id in schema: " + typeId);
332
337
  }
333
338
  }
334
- exports.sanitizeType = sanitizeType;
335
339
  function sanitizeField(fieldLike) {
336
340
  if (fieldLike instanceof arrow_1.Field) {
337
341
  return fieldLike;
@@ -359,7 +363,6 @@ function sanitizeField(fieldLike) {
359
363
  }
360
364
  return new arrow_1.Field(name, type, nullable, metadata);
361
365
  }
362
- exports.sanitizeField = sanitizeField;
363
366
  /**
364
367
  * Convert something schemaLike into a Schema instance
365
368
  *
@@ -387,7 +390,6 @@ function sanitizeSchema(schemaLike) {
387
390
  const sanitizedFields = schemaLike.fields.map((field) => sanitizeField(field));
388
391
  return new arrow_1.Schema(sanitizedFields, metadata);
389
392
  }
390
- exports.sanitizeSchema = sanitizeSchema;
391
393
  function sanitizeTable(tableLike) {
392
394
  if (tableLike instanceof arrow_1.Table) {
393
395
  return tableLike;
@@ -405,7 +407,6 @@ function sanitizeTable(tableLike) {
405
407
  const batches = tableLike.batches.map(sanitizeRecordBatch);
406
408
  return new arrow_1.Table(schema, batches);
407
409
  }
408
- exports.sanitizeTable = sanitizeTable;
409
410
  function sanitizeRecordBatch(batchLike) {
410
411
  if (batchLike instanceof arrow_1.RecordBatch) {
411
412
  return batchLike;
package/dist/table.d.ts CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  import { Table as ArrowTable, Data, IntoVector, Schema, TableLike } from "./arrow";
3
2
  import { CreateTableOptions } from "./connection";
4
3
  import { IndexOptions } from "./indices";
@@ -218,20 +217,16 @@ export declare abstract class Table {
218
217
  abstract query(): Query;
219
218
  /**
220
219
  * Create a search query to find the nearest neighbors
221
- * of the given query vector
222
- * @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
223
- * @note If no embedding functions are defined in the table, this will error when collecting the results.
224
- *
225
- * This is just a convenience method for calling `.query().nearestTo(await myEmbeddingFunction(query))`
226
- */
227
- abstract search(query: string): VectorQuery;
228
- /**
229
- * Create a search query to find the nearest neighbors
230
- * of the given query vector
231
- * @param {IntoVector} query - the query vector
232
- * This is just a convenience method for calling `.query().nearestTo(query)`
220
+ * of the given query
221
+ * @param {string | IntoVector} query - the query, a vector or string
222
+ * @param {string} queryType - the type of the query, "vector", "fts", or "auto"
223
+ * @param {string | string[]} ftsColumns - the columns to search in for full text search
224
+ * for now, only one column can be searched at a time.
225
+ *
226
+ * when "auto" is used, if the query is a string and an embedding function is defined, it will be treated as a vector query
227
+ * if the query is a string and no embedding function is defined, it will be treated as a full text search query
233
228
  */
234
- abstract search(query: IntoVector): VectorQuery;
229
+ abstract search(query: string | IntoVector, queryType?: string, ftsColumns?: string | string[]): VectorQuery | Query;
235
230
  /**
236
231
  * Search the table with a given query vector.
237
232
  *
@@ -381,7 +376,7 @@ export declare class LocalTable extends Table {
381
376
  delete(predicate: string): Promise<void>;
382
377
  createIndex(column: string, options?: Partial<IndexOptions>): Promise<void>;
383
378
  query(): Query;
384
- search(query: string | IntoVector): VectorQuery;
379
+ search(query: string | IntoVector, queryType?: string, ftsColumns?: string | string[]): VectorQuery | Query;
385
380
  vectorSearch(vector: IntoVector): VectorQuery;
386
381
  addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void>;
387
382
  alterColumns(columnAlterations: ColumnAlteration[]): Promise<void>;
package/dist/table.js CHANGED
@@ -157,23 +157,37 @@ class LocalTable extends Table {
157
157
  query() {
158
158
  return new query_1.Query(this.inner);
159
159
  }
160
- search(query) {
160
+ search(query, queryType = "auto", ftsColumns) {
161
161
  if (typeof query !== "string") {
162
+ if (queryType === "fts") {
163
+ throw new Error("Cannot perform full text search on a vector query");
164
+ }
162
165
  return this.vectorSearch(query);
163
166
  }
164
- else {
165
- const queryPromise = this.getEmbeddingFunctions().then(async (functions) => {
166
- // TODO: Support multiple embedding functions
167
- const embeddingFunc = functions
168
- .values()
169
- .next().value;
170
- if (!embeddingFunc) {
171
- return Promise.reject(new Error("No embedding functions are defined in the table"));
172
- }
173
- return await embeddingFunc.function.computeQueryEmbeddings(query);
167
+ // If the query is a string, we need to determine if it is a vector query or a full text search query
168
+ if (queryType === "fts") {
169
+ return this.query().fullTextSearch(query, {
170
+ columns: ftsColumns,
171
+ });
172
+ }
173
+ // The query type is auto or vector
174
+ // fall back to full text search if no embedding functions are defined and the query is a string
175
+ if (queryType === "auto" && (0, registry_1.getRegistry)().length() === 0) {
176
+ return this.query().fullTextSearch(query, {
177
+ columns: ftsColumns,
174
178
  });
175
- return this.query().nearestTo(queryPromise);
176
179
  }
180
+ const queryPromise = this.getEmbeddingFunctions().then(async (functions) => {
181
+ // TODO: Support multiple embedding functions
182
+ const embeddingFunc = functions
183
+ .values()
184
+ .next().value;
185
+ if (!embeddingFunc) {
186
+ return Promise.reject(new Error("No embedding functions are defined in the table"));
187
+ }
188
+ return await embeddingFunc.function.computeQueryEmbeddings(query);
189
+ });
190
+ return this.query().nearestTo(queryPromise);
177
191
  }
178
192
  vectorSearch(vector) {
179
193
  return this.query().nearestTo(vector);
package/dist/util.d.ts CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  export type IntoSql = string | number | boolean | null | Date | ArrayBufferLike | Buffer | IntoSql[];
3
2
  export declare function toSQL(value: IntoSql): string;
4
3
  export declare class TTLCache {
package/dist/util.js CHANGED
@@ -1,6 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.TTLCache = exports.toSQL = void 0;
3
+ exports.TTLCache = void 0;
4
+ exports.toSQL = toSQL;
4
5
  function toSQL(value) {
5
6
  if (typeof value === "string") {
6
7
  return `'${value.replace(/'/g, "''")}'`;
@@ -30,7 +31,6 @@ function toSQL(value) {
30
31
  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`);
31
32
  }
32
33
  }
33
- exports.toSQL = toSQL;
34
34
  class TTLCache {
35
35
  ttl;
36
36
  // biome-ignore lint/suspicious/noExplicitAny: <explanation>
package/package.json CHANGED
@@ -10,7 +10,7 @@
10
10
  "vector database",
11
11
  "ann"
12
12
  ],
13
- "version": "0.8.0",
13
+ "version": "0.10.0-beta.0",
14
14
  "main": "dist/index.js",
15
15
  "exports": {
16
16
  ".": "./dist/index.js",
@@ -53,7 +53,7 @@
53
53
  "ts-jest": "^29.1.2",
54
54
  "typedoc": "^0.26.4",
55
55
  "typedoc-plugin-markdown": "^4.2.1",
56
- "typescript": "^5.3.3",
56
+ "typescript": "^5.5.4",
57
57
  "typescript-eslint": "^7.1.0"
58
58
  },
59
59
  "ava": {
@@ -81,6 +81,7 @@
81
81
  "docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
82
82
  "lint": "biome check . && biome format .",
83
83
  "lint-fix": "biome check --write . && biome format --write .",
84
+ "prepublishOnly": "napi prepublish -t npm",
84
85
  "test": "jest --verbose",
85
86
  "integration": "S3_TEST=1 npm run test",
86
87
  "universal": "napi universal",
@@ -91,11 +92,11 @@
91
92
  "reflect-metadata": "^0.2.2"
92
93
  },
93
94
  "optionalDependencies": {
94
- "@lancedb/lancedb-darwin-arm64": "0.8.0",
95
- "@lancedb/lancedb-linux-arm64-gnu": "0.8.0",
96
- "@lancedb/lancedb-darwin-x64": "0.8.0",
97
- "@lancedb/lancedb-linux-x64-gnu": "0.8.0",
98
- "@lancedb/lancedb-win32-x64-msvc": "0.8.0"
95
+ "@lancedb/lancedb-darwin-arm64": "0.10.0-beta.0",
96
+ "@lancedb/lancedb-linux-arm64-gnu": "0.10.0-beta.0",
97
+ "@lancedb/lancedb-darwin-x64": "0.10.0-beta.0",
98
+ "@lancedb/lancedb-linux-x64-gnu": "0.10.0-beta.0",
99
+ "@lancedb/lancedb-win32-x64-msvc": "0.10.0-beta.0"
99
100
  },
100
101
  "peerDependencies": {
101
102
  "apache-arrow": ">=13.0.0 <=17.0.0"