@lancedb/lancedb 0.9.0 → 0.10.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/arrow.d.ts CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  import { Table as ArrowTable, Binary, BufferType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
3
2
  import { Buffers } from "apache-arrow/data";
4
3
  import { type EmbeddingFunction } from "./embedding/embedding_function";
package/dist/arrow.js CHANGED
@@ -27,7 +27,37 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
27
27
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
28
28
  };
29
29
  Object.defineProperty(exports, "__esModule", { value: true });
30
- exports.createEmptyTable = exports.fromTableToStreamBuffer = exports.fromDataToBuffer = exports.fromTableToBuffer = exports.fromRecordsToStreamBuffer = exports.fromRecordsToBuffer = exports.newVectorType = exports.convertToTable = exports.makeEmptyTable = exports.makeArrowTable = exports.MakeArrowTableOptions = exports.VectorColumnOptions = exports.isFixedSizeList = exports.isFixedSizeBinary = exports.isUnion = exports.isStruct = exports.isList = exports.isDuration = exports.isInterval = exports.isTimestamp = exports.isTime = exports.isDate = exports.isDecimal = exports.isBool = exports.isLargeUtf8 = exports.isUtf8 = exports.isLargeBinary = exports.isBinary = exports.isFloat = exports.isInt = exports.isNull = exports.isArrowTable = void 0;
30
+ exports.MakeArrowTableOptions = exports.VectorColumnOptions = void 0;
31
+ exports.isArrowTable = isArrowTable;
32
+ exports.isNull = isNull;
33
+ exports.isInt = isInt;
34
+ exports.isFloat = isFloat;
35
+ exports.isBinary = isBinary;
36
+ exports.isLargeBinary = isLargeBinary;
37
+ exports.isUtf8 = isUtf8;
38
+ exports.isLargeUtf8 = isLargeUtf8;
39
+ exports.isBool = isBool;
40
+ exports.isDecimal = isDecimal;
41
+ exports.isDate = isDate;
42
+ exports.isTime = isTime;
43
+ exports.isTimestamp = isTimestamp;
44
+ exports.isInterval = isInterval;
45
+ exports.isDuration = isDuration;
46
+ exports.isList = isList;
47
+ exports.isStruct = isStruct;
48
+ exports.isUnion = isUnion;
49
+ exports.isFixedSizeBinary = isFixedSizeBinary;
50
+ exports.isFixedSizeList = isFixedSizeList;
51
+ exports.makeArrowTable = makeArrowTable;
52
+ exports.makeEmptyTable = makeEmptyTable;
53
+ exports.convertToTable = convertToTable;
54
+ exports.newVectorType = newVectorType;
55
+ exports.fromRecordsToBuffer = fromRecordsToBuffer;
56
+ exports.fromRecordsToStreamBuffer = fromRecordsToStreamBuffer;
57
+ exports.fromTableToBuffer = fromTableToBuffer;
58
+ exports.fromDataToBuffer = fromDataToBuffer;
59
+ exports.fromTableToStreamBuffer = fromTableToStreamBuffer;
60
+ exports.createEmptyTable = createEmptyTable;
31
61
  const apache_arrow_1 = require("apache-arrow");
32
62
  const registry_1 = require("./embedding/registry");
33
63
  const sanitize_1 = require("./sanitize");
@@ -37,83 +67,63 @@ function isArrowTable(value) {
37
67
  return true;
38
68
  return "schema" in value && "batches" in value;
39
69
  }
40
- exports.isArrowTable = isArrowTable;
41
70
  function isNull(value) {
42
71
  return value instanceof apache_arrow_1.Null || apache_arrow_1.DataType.isNull(value);
43
72
  }
44
- exports.isNull = isNull;
45
73
  function isInt(value) {
46
74
  return value instanceof apache_arrow_1.Int || apache_arrow_1.DataType.isInt(value);
47
75
  }
48
- exports.isInt = isInt;
49
76
  function isFloat(value) {
50
77
  return value instanceof apache_arrow_1.Float || apache_arrow_1.DataType.isFloat(value);
51
78
  }
52
- exports.isFloat = isFloat;
53
79
  function isBinary(value) {
54
80
  return value instanceof apache_arrow_1.Binary || apache_arrow_1.DataType.isBinary(value);
55
81
  }
56
- exports.isBinary = isBinary;
57
82
  function isLargeBinary(value) {
58
83
  return value instanceof apache_arrow_1.LargeBinary || apache_arrow_1.DataType.isLargeBinary(value);
59
84
  }
60
- exports.isLargeBinary = isLargeBinary;
61
85
  function isUtf8(value) {
62
86
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isUtf8(value);
63
87
  }
64
- exports.isUtf8 = isUtf8;
65
88
  function isLargeUtf8(value) {
66
89
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isLargeUtf8(value);
67
90
  }
68
- exports.isLargeUtf8 = isLargeUtf8;
69
91
  function isBool(value) {
70
92
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isBool(value);
71
93
  }
72
- exports.isBool = isBool;
73
94
  function isDecimal(value) {
74
95
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isDecimal(value);
75
96
  }
76
- exports.isDecimal = isDecimal;
77
97
  function isDate(value) {
78
98
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isDate(value);
79
99
  }
80
- exports.isDate = isDate;
81
100
  function isTime(value) {
82
101
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isTime(value);
83
102
  }
84
- exports.isTime = isTime;
85
103
  function isTimestamp(value) {
86
104
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isTimestamp(value);
87
105
  }
88
- exports.isTimestamp = isTimestamp;
89
106
  function isInterval(value) {
90
107
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isInterval(value);
91
108
  }
92
- exports.isInterval = isInterval;
93
109
  function isDuration(value) {
94
110
  return value instanceof apache_arrow_1.Utf8 || apache_arrow_1.DataType.isDuration(value);
95
111
  }
96
- exports.isDuration = isDuration;
97
112
  function isList(value) {
98
113
  return value instanceof apache_arrow_1.List || apache_arrow_1.DataType.isList(value);
99
114
  }
100
- exports.isList = isList;
101
115
  function isStruct(value) {
102
116
  return value instanceof apache_arrow_1.Struct || apache_arrow_1.DataType.isStruct(value);
103
117
  }
104
- exports.isStruct = isStruct;
105
118
  function isUnion(value) {
106
119
  return value instanceof apache_arrow_1.Struct || apache_arrow_1.DataType.isUnion(value);
107
120
  }
108
- exports.isUnion = isUnion;
109
121
  function isFixedSizeBinary(value) {
110
122
  return value instanceof apache_arrow_1.FixedSizeBinary || apache_arrow_1.DataType.isFixedSizeBinary(value);
111
123
  }
112
- exports.isFixedSizeBinary = isFixedSizeBinary;
113
124
  function isFixedSizeList(value) {
114
125
  return value instanceof apache_arrow_1.FixedSizeList || apache_arrow_1.DataType.isFixedSizeList(value);
115
126
  }
116
- exports.isFixedSizeList = isFixedSizeList;
117
127
  /*
118
128
  * Options to control how a column should be converted to a vector array
119
129
  */
@@ -371,14 +381,12 @@ function makeArrowTable(data, options, metadata) {
371
381
  }
372
382
  return tbl;
373
383
  }
374
- exports.makeArrowTable = makeArrowTable;
375
384
  /**
376
385
  * Create an empty Arrow table with the provided schema
377
386
  */
378
387
  function makeEmptyTable(schema, metadata) {
379
388
  return makeArrowTable([], { schema }, metadata);
380
389
  }
381
- exports.makeEmptyTable = makeEmptyTable;
382
390
  /**
383
391
  * Helper function to convert Array<Array<any>> to a variable sized list array
384
392
  */
@@ -574,7 +582,6 @@ async function convertToTable(data, embeddings, makeTableOptions) {
574
582
  const table = makeArrowTable(data, makeTableOptions);
575
583
  return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
576
584
  }
577
- exports.convertToTable = convertToTable;
578
585
  /** Creates the Arrow Type for a Vector column with dimension `dim` */
579
586
  function newVectorType(dim, innerType) {
580
587
  // in Lance we always default to have the elements nullable, so we need to set it to true
@@ -582,7 +589,6 @@ function newVectorType(dim, innerType) {
582
589
  const children = new apache_arrow_1.Field("item", (0, sanitize_1.sanitizeType)(innerType), true);
583
590
  return new apache_arrow_1.FixedSizeList(dim, children);
584
591
  }
585
- exports.newVectorType = newVectorType;
586
592
  /**
587
593
  * Serialize an Array of records into a buffer using the Arrow IPC File serialization
588
594
  *
@@ -598,7 +604,6 @@ async function fromRecordsToBuffer(data, embeddings, schema) {
598
604
  const writer = apache_arrow_1.RecordBatchFileWriter.writeAll(table);
599
605
  return Buffer.from(await writer.toUint8Array());
600
606
  }
601
- exports.fromRecordsToBuffer = fromRecordsToBuffer;
602
607
  /**
603
608
  * Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
604
609
  *
@@ -614,7 +619,6 @@ async function fromRecordsToStreamBuffer(data, embeddings, schema) {
614
619
  const writer = apache_arrow_1.RecordBatchStreamWriter.writeAll(table);
615
620
  return Buffer.from(await writer.toUint8Array());
616
621
  }
617
- exports.fromRecordsToStreamBuffer = fromRecordsToStreamBuffer;
618
622
  /**
619
623
  * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
620
624
  *
@@ -631,7 +635,6 @@ async function fromTableToBuffer(table, embeddings, schema) {
631
635
  const writer = apache_arrow_1.RecordBatchFileWriter.writeAll(tableWithEmbeddings);
632
636
  return Buffer.from(await writer.toUint8Array());
633
637
  }
634
- exports.fromTableToBuffer = fromTableToBuffer;
635
638
  /**
636
639
  * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
637
640
  *
@@ -652,7 +655,6 @@ async function fromDataToBuffer(data, embeddings, schema) {
652
655
  return fromTableToBuffer(table);
653
656
  }
654
657
  }
655
- exports.fromDataToBuffer = fromDataToBuffer;
656
658
  /**
657
659
  * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
658
660
  *
@@ -666,7 +668,6 @@ async function fromTableToStreamBuffer(table, embeddings, schema) {
666
668
  const writer = apache_arrow_1.RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
667
669
  return Buffer.from(await writer.toUint8Array());
668
670
  }
669
- exports.fromTableToStreamBuffer = fromTableToStreamBuffer;
670
671
  /**
671
672
  * Reorder the columns in `batch` so that they agree with the field order in `schema`
672
673
  */
@@ -700,7 +701,6 @@ function alignTable(table, schema) {
700
701
  function createEmptyTable(schema) {
701
702
  return new apache_arrow_1.Table((0, sanitize_1.sanitizeSchema)(schema));
702
703
  }
703
- exports.createEmptyTable = createEmptyTable;
704
704
  function validateSchemaEmbeddings(schema, data, embeddings) {
705
705
  const fields = [];
706
706
  const missingEmbeddingFields = [];
@@ -13,7 +13,8 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.cleanseStorageOptions = exports.LocalConnection = exports.Connection = void 0;
16
+ exports.LocalConnection = exports.Connection = void 0;
17
+ exports.cleanseStorageOptions = cleanseStorageOptions;
17
18
  const arrow_1 = require("./arrow");
18
19
  const registry_1 = require("./embedding/registry");
19
20
  const table_1 = require("./table");
@@ -127,7 +128,6 @@ function cleanseStorageOptions(options) {
127
128
  }
128
129
  return result;
129
130
  }
130
- exports.cleanseStorageOptions = cleanseStorageOptions;
131
131
  /**
132
132
  * Convert a string to snake case. It might already be snake case, in which case it is
133
133
  * returned unchanged.
@@ -27,7 +27,8 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
27
27
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
28
28
  };
29
29
  Object.defineProperty(exports, "__esModule", { value: true });
30
- exports.LanceSchema = exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
30
+ exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
31
+ exports.LanceSchema = LanceSchema;
31
32
  const arrow_1 = require("../arrow");
32
33
  const sanitize_1 = require("../sanitize");
33
34
  const registry_1 = require("./registry");
@@ -78,7 +79,6 @@ function LanceSchema(fields) {
78
79
  const schema = new arrow_1.Schema(arrowFields, metadata);
79
80
  return schema;
80
81
  }
81
- exports.LanceSchema = LanceSchema;
82
82
  function parseEmbeddingFunctions(embeddingFunctions, key, metadata) {
83
83
  if (metadata.has("source_column_for")) {
84
84
  const embedFunction = metadata.get("source_column_for");
@@ -16,6 +16,10 @@ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
16
16
  */
17
17
  export declare class EmbeddingFunctionRegistry {
18
18
  #private;
19
+ /**
20
+ * Get the number of registered functions
21
+ */
22
+ length(): number;
19
23
  /**
20
24
  * Register an embedding function
21
25
  * @param name The name of the function
@@ -13,7 +13,9 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.getRegistry = exports.register = exports.EmbeddingFunctionRegistry = void 0;
16
+ exports.EmbeddingFunctionRegistry = void 0;
17
+ exports.register = register;
18
+ exports.getRegistry = getRegistry;
17
19
  require("reflect-metadata");
18
20
  /**
19
21
  * This is a singleton class used to register embedding functions
@@ -23,6 +25,12 @@ require("reflect-metadata");
23
25
  */
24
26
  class EmbeddingFunctionRegistry {
25
27
  #functions = new Map();
28
+ /**
29
+ * Get the number of registered functions
30
+ */
31
+ length() {
32
+ return this.#functions.size;
33
+ }
26
34
  /**
27
35
  * Register an embedding function
28
36
  * @param name The name of the function
@@ -130,7 +138,6 @@ const _REGISTRY = new EmbeddingFunctionRegistry();
130
138
  function register(name) {
131
139
  return _REGISTRY.register(name);
132
140
  }
133
- exports.register = register;
134
141
  /**
135
142
  * Utility function to get the global instance of the registry
136
143
  * @returns `EmbeddingFunctionRegistry` The global instance of the registry
@@ -142,4 +149,3 @@ exports.register = register;
142
149
  function getRegistry() {
143
150
  return _REGISTRY;
144
151
  }
145
- exports.getRegistry = getRegistry;
package/dist/index.js CHANGED
@@ -13,7 +13,8 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.connect = exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
16
+ exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
17
+ exports.connect = connect;
17
18
  const connection_1 = require("./connection");
18
19
  const native_js_1 = require("./native.js");
19
20
  const remote_1 = require("./remote");
@@ -54,4 +55,3 @@ async function connect(uriOrOptions, opts = {}) {
54
55
  const nativeConn = await native_js_1.Connection.new(uri, opts);
55
56
  return new connection_1.LocalConnection(nativeConn);
56
57
  }
57
- exports.connect = connect;
package/dist/indices.d.ts CHANGED
@@ -138,6 +138,36 @@ export declare class Index {
138
138
  * block size may be added in the future.
139
139
  */
140
140
  static btree(): Index;
141
+ /**
142
+ * Create a bitmap index.
143
+ *
144
+ * A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
145
+ *
146
+ * This index works best for low-cardinality columns, where the number of unique values
147
+ * is small (i.e., less than a few hundreds).
148
+ */
149
+ static bitmap(): Index;
150
+ /**
151
+ * Create a label list index.
152
+ *
153
+ * LabelList index is a scalar index that can be used on `List<T>` columns to
154
+ * support queries with `array_contains_all` and `array_contains_any`
155
+ * using an underlying bitmap index.
156
+ */
157
+ static labelList(): Index;
158
+ /**
159
+ * Create a full text search index
160
+ *
161
+ * A full text search index is an index on a string column, so that you can conduct full
162
+ * text searches on the column.
163
+ *
164
+ * The results of a full text search are ordered by relevance measured by BM25.
165
+ *
166
+ * You can combine filters with full text search.
167
+ *
168
+ * For now, the full text search index only supports English, and doesn't support phrase search.
169
+ */
170
+ static fts(): Index;
141
171
  }
142
172
  export interface IndexOptions {
143
173
  /**
package/dist/indices.js CHANGED
@@ -67,5 +67,41 @@ class Index {
67
67
  static btree() {
68
68
  return new Index(native_1.Index.btree());
69
69
  }
70
+ /**
71
+ * Create a bitmap index.
72
+ *
73
+ * A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
74
+ *
75
+ * This index works best for low-cardinality columns, where the number of unique values
76
+ * is small (i.e., less than a few hundreds).
77
+ */
78
+ static bitmap() {
79
+ return new Index(native_1.Index.bitmap());
80
+ }
81
+ /**
82
+ * Create a label list index.
83
+ *
84
+ * LabelList index is a scalar index that can be used on `List<T>` columns to
85
+ * support queries with `array_contains_all` and `array_contains_any`
86
+ * using an underlying bitmap index.
87
+ */
88
+ static labelList() {
89
+ return new Index(native_1.Index.labelList());
90
+ }
91
+ /**
92
+ * Create a full text search index
93
+ *
94
+ * A full text search index is an index on a string column, so that you can conduct full
95
+ * text searches on the column.
96
+ *
97
+ * The results of a full text search are ordered by relevance measured by BM25.
98
+ *
99
+ * You can combine filters with full text search.
100
+ *
101
+ * For now, the full text search index only supports English, and doesn't support phrase search.
102
+ */
103
+ static fts() {
104
+ return new Index(native_1.Index.fts());
105
+ }
70
106
  }
71
107
  exports.Index = Index;
package/dist/query.d.ts CHANGED
@@ -19,6 +19,18 @@ export interface QueryExecutionOptions {
19
19
  */
20
20
  maxBatchLength?: number;
21
21
  }
22
+ /**
23
+ * Options that control the behavior of a full text search
24
+ */
25
+ export interface FullTextSearchOptions {
26
+ /**
27
+ * The columns to search
28
+ *
29
+ * If not specified, all indexed columns will be searched.
30
+ * For now, only one column can be searched.
31
+ */
32
+ columns?: string | string[];
33
+ }
22
34
  /** Common methods supported by all query types */
23
35
  export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery> implements AsyncIterable<RecordBatch> {
24
36
  protected inner: NativeQueryType | Promise<NativeQueryType>;
@@ -43,6 +55,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
43
55
  * @deprecated Use `where` instead
44
56
  */
45
57
  filter(predicate: string): this;
58
+ fullTextSearch(query: string, options?: Partial<FullTextSearchOptions>): this;
46
59
  /**
47
60
  * Return only the specified columns.
48
61
  *
package/dist/query.js CHANGED
@@ -98,6 +98,19 @@ class QueryBase {
98
98
  filter(predicate) {
99
99
  return this.where(predicate);
100
100
  }
101
+ fullTextSearch(query, options) {
102
+ let columns = null;
103
+ if (options) {
104
+ if (typeof options.columns === "string") {
105
+ columns = [options.columns];
106
+ }
107
+ else if (Array.isArray(options.columns)) {
108
+ columns = options.columns;
109
+ }
110
+ }
111
+ this.doCall((inner) => inner.fullTextSearch(query, columns));
112
+ return this;
113
+ }
101
114
  /**
102
115
  * Return only the specified columns.
103
116
  *
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  import { type AxiosResponse } from "axios";
3
2
  import { Table as ArrowTable } from "../arrow";
4
3
  import { VectorQuery } from "../query";
@@ -12,7 +12,7 @@ class RemoteConnection extends connection_1.Connection {
12
12
  #apiKey;
13
13
  #region;
14
14
  #client;
15
- #tableCache = new util_1.TTLCache(300000);
15
+ #tableCache = new util_1.TTLCache(300_000);
16
16
  constructor(url, { apiKey, region, hostOverride, timeout }) {
17
17
  super();
18
18
  apiKey = apiKey ?? process.env.LANCEDB_API_KEY;
package/dist/sanitize.js CHANGED
@@ -13,7 +13,28 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.sanitizeTable = exports.sanitizeSchema = exports.sanitizeField = exports.sanitizeType = exports.sanitizeDictionary = exports.sanitizeDuration = exports.sanitizeMap = exports.sanitizeFixedSizeList = exports.sanitizeFixedSizeBinary = exports.sanitizeTypedUnion = exports.sanitizeUnion = exports.sanitizeStruct = exports.sanitizeList = exports.sanitizeInterval = exports.sanitizeTypedTimestamp = exports.sanitizeTimestamp = exports.sanitizeTime = exports.sanitizeDate = exports.sanitizeDecimal = exports.sanitizeFloat = exports.sanitizeInt = exports.sanitizeMetadata = void 0;
16
+ exports.sanitizeMetadata = sanitizeMetadata;
17
+ exports.sanitizeInt = sanitizeInt;
18
+ exports.sanitizeFloat = sanitizeFloat;
19
+ exports.sanitizeDecimal = sanitizeDecimal;
20
+ exports.sanitizeDate = sanitizeDate;
21
+ exports.sanitizeTime = sanitizeTime;
22
+ exports.sanitizeTimestamp = sanitizeTimestamp;
23
+ exports.sanitizeTypedTimestamp = sanitizeTypedTimestamp;
24
+ exports.sanitizeInterval = sanitizeInterval;
25
+ exports.sanitizeList = sanitizeList;
26
+ exports.sanitizeStruct = sanitizeStruct;
27
+ exports.sanitizeUnion = sanitizeUnion;
28
+ exports.sanitizeTypedUnion = sanitizeTypedUnion;
29
+ exports.sanitizeFixedSizeBinary = sanitizeFixedSizeBinary;
30
+ exports.sanitizeFixedSizeList = sanitizeFixedSizeList;
31
+ exports.sanitizeMap = sanitizeMap;
32
+ exports.sanitizeDuration = sanitizeDuration;
33
+ exports.sanitizeDictionary = sanitizeDictionary;
34
+ exports.sanitizeType = sanitizeType;
35
+ exports.sanitizeField = sanitizeField;
36
+ exports.sanitizeSchema = sanitizeSchema;
37
+ exports.sanitizeTable = sanitizeTable;
17
38
  // The utilities in this file help sanitize data from the user's arrow
18
39
  // library into the types expected by vectordb's arrow library. Node
19
40
  // generally allows for mulitple versions of the same library (and sometimes
@@ -37,7 +58,6 @@ function sanitizeMetadata(metadataLike) {
37
58
  }
38
59
  return metadataLike;
39
60
  }
40
- exports.sanitizeMetadata = sanitizeMetadata;
41
61
  function sanitizeInt(typeLike) {
42
62
  if (!("bitWidth" in typeLike) ||
43
63
  typeof typeLike.bitWidth !== "number" ||
@@ -47,14 +67,12 @@ function sanitizeInt(typeLike) {
47
67
  }
48
68
  return new arrow_1.Int(typeLike.isSigned, typeLike.bitWidth);
49
69
  }
50
- exports.sanitizeInt = sanitizeInt;
51
70
  function sanitizeFloat(typeLike) {
52
71
  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
53
72
  throw Error("Expected a Float Type to have a `precision` property");
54
73
  }
55
74
  return new arrow_1.Float(typeLike.precision);
56
75
  }
57
- exports.sanitizeFloat = sanitizeFloat;
58
76
  function sanitizeDecimal(typeLike) {
59
77
  if (!("scale" in typeLike) ||
60
78
  typeof typeLike.scale !== "number" ||
@@ -66,14 +84,12 @@ function sanitizeDecimal(typeLike) {
66
84
  }
67
85
  return new arrow_1.Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
68
86
  }
69
- exports.sanitizeDecimal = sanitizeDecimal;
70
87
  function sanitizeDate(typeLike) {
71
88
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
72
89
  throw Error("Expected a Date type to have a `unit` property");
73
90
  }
74
91
  return new arrow_1.Date_(typeLike.unit);
75
92
  }
76
- exports.sanitizeDate = sanitizeDate;
77
93
  function sanitizeTime(typeLike) {
78
94
  if (!("unit" in typeLike) ||
79
95
  typeof typeLike.unit !== "number" ||
@@ -83,7 +99,6 @@ function sanitizeTime(typeLike) {
83
99
  }
84
100
  return new arrow_1.Time(typeLike.unit, typeLike.bitWidth);
85
101
  }
86
- exports.sanitizeTime = sanitizeTime;
87
102
  function sanitizeTimestamp(typeLike) {
88
103
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
89
104
  throw Error("Expected a Timestamp type to have a `unit` property");
@@ -94,7 +109,6 @@ function sanitizeTimestamp(typeLike) {
94
109
  }
95
110
  return new arrow_1.Timestamp(typeLike.unit, timezone);
96
111
  }
97
- exports.sanitizeTimestamp = sanitizeTimestamp;
98
112
  function sanitizeTypedTimestamp(typeLike,
99
113
  // eslint-disable-next-line @typescript-eslint/naming-convention
100
114
  Datatype) {
@@ -104,14 +118,12 @@ Datatype) {
104
118
  }
105
119
  return new Datatype(timezone);
106
120
  }
107
- exports.sanitizeTypedTimestamp = sanitizeTypedTimestamp;
108
121
  function sanitizeInterval(typeLike) {
109
122
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
110
123
  throw Error("Expected an Interval type to have a `unit` property");
111
124
  }
112
125
  return new arrow_1.Interval(typeLike.unit);
113
126
  }
114
- exports.sanitizeInterval = sanitizeInterval;
115
127
  function sanitizeList(typeLike) {
116
128
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
117
129
  throw Error("Expected a List type to have an array-like `children` property");
@@ -121,14 +133,12 @@ function sanitizeList(typeLike) {
121
133
  }
122
134
  return new arrow_1.List(sanitizeField(typeLike.children[0]));
123
135
  }
124
- exports.sanitizeList = sanitizeList;
125
136
  function sanitizeStruct(typeLike) {
126
137
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
127
138
  throw Error("Expected a Struct type to have an array-like `children` property");
128
139
  }
129
140
  return new arrow_1.Struct(typeLike.children.map((child) => sanitizeField(child)));
130
141
  }
131
- exports.sanitizeStruct = sanitizeStruct;
132
142
  function sanitizeUnion(typeLike) {
133
143
  if (!("typeIds" in typeLike) ||
134
144
  !("mode" in typeLike) ||
@@ -142,7 +152,6 @@ function sanitizeUnion(typeLike) {
142
152
  // biome-ignore lint/suspicious/noExplicitAny: skip
143
153
  typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
144
154
  }
145
- exports.sanitizeUnion = sanitizeUnion;
146
155
  function sanitizeTypedUnion(typeLike,
147
156
  // eslint-disable-next-line @typescript-eslint/naming-convention
148
157
  UnionType) {
@@ -154,14 +163,12 @@ UnionType) {
154
163
  }
155
164
  return new UnionType(typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
156
165
  }
157
- exports.sanitizeTypedUnion = sanitizeTypedUnion;
158
166
  function sanitizeFixedSizeBinary(typeLike) {
159
167
  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
160
168
  throw Error("Expected a FixedSizeBinary type to have a `byteWidth` property");
161
169
  }
162
170
  return new arrow_1.FixedSizeBinary(typeLike.byteWidth);
163
171
  }
164
- exports.sanitizeFixedSizeBinary = sanitizeFixedSizeBinary;
165
172
  function sanitizeFixedSizeList(typeLike) {
166
173
  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
167
174
  throw Error("Expected a FixedSizeList type to have a `listSize` property");
@@ -174,7 +181,6 @@ function sanitizeFixedSizeList(typeLike) {
174
181
  }
175
182
  return new arrow_1.FixedSizeList(typeLike.listSize, sanitizeField(typeLike.children[0]));
176
183
  }
177
- exports.sanitizeFixedSizeList = sanitizeFixedSizeList;
178
184
  function sanitizeMap(typeLike) {
179
185
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
180
186
  throw Error("Expected a Map type to have an array-like `children` property");
@@ -186,14 +192,12 @@ function sanitizeMap(typeLike) {
186
192
  // biome-ignore lint/suspicious/noExplicitAny: skip
187
193
  typeLike.children.map((field) => sanitizeField(field)), typeLike.keysSorted);
188
194
  }
189
- exports.sanitizeMap = sanitizeMap;
190
195
  function sanitizeDuration(typeLike) {
191
196
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
192
197
  throw Error("Expected a Duration type to have a `unit` property");
193
198
  }
194
199
  return new arrow_1.Duration(typeLike.unit);
195
200
  }
196
- exports.sanitizeDuration = sanitizeDuration;
197
201
  function sanitizeDictionary(typeLike) {
198
202
  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
199
203
  throw Error("Expected a Dictionary type to have an `id` property");
@@ -209,7 +213,6 @@ function sanitizeDictionary(typeLike) {
209
213
  }
210
214
  return new arrow_1.Dictionary(sanitizeType(typeLike.dictionary), sanitizeType(typeLike.indices), typeLike.id, typeLike.isOrdered);
211
215
  }
212
- exports.sanitizeDictionary = sanitizeDictionary;
213
216
  // biome-ignore lint/suspicious/noExplicitAny: skip
214
217
  function sanitizeType(typeLike) {
215
218
  if (typeof typeLike !== "object" || typeLike === null) {
@@ -333,7 +336,6 @@ function sanitizeType(typeLike) {
333
336
  throw new Error("Unrecoginized type id in schema: " + typeId);
334
337
  }
335
338
  }
336
- exports.sanitizeType = sanitizeType;
337
339
  function sanitizeField(fieldLike) {
338
340
  if (fieldLike instanceof arrow_1.Field) {
339
341
  return fieldLike;
@@ -361,7 +363,6 @@ function sanitizeField(fieldLike) {
361
363
  }
362
364
  return new arrow_1.Field(name, type, nullable, metadata);
363
365
  }
364
- exports.sanitizeField = sanitizeField;
365
366
  /**
366
367
  * Convert something schemaLike into a Schema instance
367
368
  *
@@ -389,7 +390,6 @@ function sanitizeSchema(schemaLike) {
389
390
  const sanitizedFields = schemaLike.fields.map((field) => sanitizeField(field));
390
391
  return new arrow_1.Schema(sanitizedFields, metadata);
391
392
  }
392
- exports.sanitizeSchema = sanitizeSchema;
393
393
  function sanitizeTable(tableLike) {
394
394
  if (tableLike instanceof arrow_1.Table) {
395
395
  return tableLike;
@@ -407,7 +407,6 @@ function sanitizeTable(tableLike) {
407
407
  const batches = tableLike.batches.map(sanitizeRecordBatch);
408
408
  return new arrow_1.Table(schema, batches);
409
409
  }
410
- exports.sanitizeTable = sanitizeTable;
411
410
  function sanitizeRecordBatch(batchLike) {
412
411
  if (batchLike instanceof arrow_1.RecordBatch) {
413
412
  return batchLike;
package/dist/table.d.ts CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  import { Table as ArrowTable, Data, IntoVector, Schema, TableLike } from "./arrow";
3
2
  import { CreateTableOptions } from "./connection";
4
3
  import { IndexOptions } from "./indices";
@@ -218,20 +217,16 @@ export declare abstract class Table {
218
217
  abstract query(): Query;
219
218
  /**
220
219
  * Create a search query to find the nearest neighbors
221
- * of the given query vector
222
- * @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
223
- * @note If no embedding functions are defined in the table, this will error when collecting the results.
224
- *
225
- * This is just a convenience method for calling `.query().nearestTo(await myEmbeddingFunction(query))`
226
- */
227
- abstract search(query: string): VectorQuery;
228
- /**
229
- * Create a search query to find the nearest neighbors
230
- * of the given query vector
231
- * @param {IntoVector} query - the query vector
232
- * This is just a convenience method for calling `.query().nearestTo(query)`
220
+ * of the given query
221
+ * @param {string | IntoVector} query - the query, a vector or string
222
+ * @param {string} queryType - the type of the query, "vector", "fts", or "auto"
223
+ * @param {string | string[]} ftsColumns - the columns to search in for full text search
224
+ * for now, only one column can be searched at a time.
225
+ *
226
+ * when "auto" is used, if the query is a string and an embedding function is defined, it will be treated as a vector query
227
+ * if the query is a string and no embedding function is defined, it will be treated as a full text search query
233
228
  */
234
- abstract search(query: IntoVector): VectorQuery;
229
+ abstract search(query: string | IntoVector, queryType?: string, ftsColumns?: string | string[]): VectorQuery | Query;
235
230
  /**
236
231
  * Search the table with a given query vector.
237
232
  *
@@ -381,7 +376,7 @@ export declare class LocalTable extends Table {
381
376
  delete(predicate: string): Promise<void>;
382
377
  createIndex(column: string, options?: Partial<IndexOptions>): Promise<void>;
383
378
  query(): Query;
384
- search(query: string | IntoVector): VectorQuery;
379
+ search(query: string | IntoVector, queryType?: string, ftsColumns?: string | string[]): VectorQuery | Query;
385
380
  vectorSearch(vector: IntoVector): VectorQuery;
386
381
  addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void>;
387
382
  alterColumns(columnAlterations: ColumnAlteration[]): Promise<void>;
package/dist/table.js CHANGED
@@ -157,23 +157,37 @@ class LocalTable extends Table {
157
157
  query() {
158
158
  return new query_1.Query(this.inner);
159
159
  }
160
- search(query) {
160
+ search(query, queryType = "auto", ftsColumns) {
161
161
  if (typeof query !== "string") {
162
+ if (queryType === "fts") {
163
+ throw new Error("Cannot perform full text search on a vector query");
164
+ }
162
165
  return this.vectorSearch(query);
163
166
  }
164
- else {
165
- const queryPromise = this.getEmbeddingFunctions().then(async (functions) => {
166
- // TODO: Support multiple embedding functions
167
- const embeddingFunc = functions
168
- .values()
169
- .next().value;
170
- if (!embeddingFunc) {
171
- return Promise.reject(new Error("No embedding functions are defined in the table"));
172
- }
173
- return await embeddingFunc.function.computeQueryEmbeddings(query);
167
+ // If the query is a string, we need to determine if it is a vector query or a full text search query
168
+ if (queryType === "fts") {
169
+ return this.query().fullTextSearch(query, {
170
+ columns: ftsColumns,
171
+ });
172
+ }
173
+ // The query type is auto or vector
174
+ // fall back to full text search if no embedding functions are defined and the query is a string
175
+ if (queryType === "auto" && (0, registry_1.getRegistry)().length() === 0) {
176
+ return this.query().fullTextSearch(query, {
177
+ columns: ftsColumns,
174
178
  });
175
- return this.query().nearestTo(queryPromise);
176
179
  }
180
+ const queryPromise = this.getEmbeddingFunctions().then(async (functions) => {
181
+ // TODO: Support multiple embedding functions
182
+ const embeddingFunc = functions
183
+ .values()
184
+ .next().value;
185
+ if (!embeddingFunc) {
186
+ return Promise.reject(new Error("No embedding functions are defined in the table"));
187
+ }
188
+ return await embeddingFunc.function.computeQueryEmbeddings(query);
189
+ });
190
+ return this.query().nearestTo(queryPromise);
177
191
  }
178
192
  vectorSearch(vector) {
179
193
  return this.query().nearestTo(vector);
package/dist/util.d.ts CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  export type IntoSql = string | number | boolean | null | Date | ArrayBufferLike | Buffer | IntoSql[];
3
2
  export declare function toSQL(value: IntoSql): string;
4
3
  export declare class TTLCache {
package/dist/util.js CHANGED
@@ -1,6 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.TTLCache = exports.toSQL = void 0;
3
+ exports.TTLCache = void 0;
4
+ exports.toSQL = toSQL;
4
5
  function toSQL(value) {
5
6
  if (typeof value === "string") {
6
7
  return `'${value.replace(/'/g, "''")}'`;
@@ -30,7 +31,6 @@ function toSQL(value) {
30
31
  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`);
31
32
  }
32
33
  }
33
- exports.toSQL = toSQL;
34
34
  class TTLCache {
35
35
  ttl;
36
36
  // biome-ignore lint/suspicious/noExplicitAny: <explanation>
package/package.json CHANGED
@@ -10,7 +10,7 @@
10
10
  "vector database",
11
11
  "ann"
12
12
  ],
13
- "version": "0.9.0",
13
+ "version": "0.10.0-beta.0",
14
14
  "main": "dist/index.js",
15
15
  "exports": {
16
16
  ".": "./dist/index.js",
@@ -53,7 +53,7 @@
53
53
  "ts-jest": "^29.1.2",
54
54
  "typedoc": "^0.26.4",
55
55
  "typedoc-plugin-markdown": "^4.2.1",
56
- "typescript": "^5.3.3",
56
+ "typescript": "^5.5.4",
57
57
  "typescript-eslint": "^7.1.0"
58
58
  },
59
59
  "ava": {
@@ -92,11 +92,11 @@
92
92
  "reflect-metadata": "^0.2.2"
93
93
  },
94
94
  "optionalDependencies": {
95
- "@lancedb/lancedb-darwin-arm64": "0.9.0",
96
- "@lancedb/lancedb-linux-arm64-gnu": "0.9.0",
97
- "@lancedb/lancedb-darwin-x64": "0.9.0",
98
- "@lancedb/lancedb-linux-x64-gnu": "0.9.0",
99
- "@lancedb/lancedb-win32-x64-msvc": "0.9.0"
95
+ "@lancedb/lancedb-darwin-arm64": "0.10.0-beta.0",
96
+ "@lancedb/lancedb-linux-arm64-gnu": "0.10.0-beta.0",
97
+ "@lancedb/lancedb-darwin-x64": "0.10.0-beta.0",
98
+ "@lancedb/lancedb-linux-x64-gnu": "0.10.0-beta.0",
99
+ "@lancedb/lancedb-win32-x64-msvc": "0.10.0-beta.0"
100
100
  },
101
101
  "peerDependencies": {
102
102
  "apache-arrow": ">=13.0.0 <=17.0.0"