@lancedb/lancedb 0.18.0 → 0.18.2-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,11 +11,9 @@ npm install @lancedb/lancedb
11
11
  This will download the appropriate native library for your platform. We currently
12
12
  support:
13
13
 
14
- - Linux (x86_64 and aarch64)
14
+ - Linux (x86_64 and aarch64 on glibc and musl)
15
15
  - MacOS (Intel and ARM/M1/M2)
16
- - Windows (x86_64 only)
17
-
18
- We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
16
+ - Windows (x86_64 and aarch64)
19
17
 
20
18
  ## Usage
21
19
 
package/dist/arrow.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { Table as ArrowTable, Binary, BufferType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
1
+ import { Table as ArrowTable, Binary, BufferType, DataType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
2
2
  import { Buffers } from "apache-arrow/data";
3
3
  import { type EmbeddingFunction } from "./embedding/embedding_function";
4
4
  import { EmbeddingFunctionConfig } from "./embedding/registry";
@@ -257,3 +257,15 @@ export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?:
257
257
  * Create an empty table with the given schema
258
258
  */
259
259
  export declare function createEmptyTable(schema: Schema): ArrowTable;
260
+ interface JsonDataType {
261
+ type: string;
262
+ fields?: JsonField[];
263
+ length?: number;
264
+ }
265
+ interface JsonField {
266
+ name: string;
267
+ type: JsonDataType;
268
+ nullable: boolean;
269
+ metadata: Map<string, string>;
270
+ }
271
+ export declare function dataTypeToJson(dataType: DataType): JsonDataType;
package/dist/arrow.js CHANGED
@@ -49,6 +49,7 @@ exports.fromBufferToRecordBatch = fromBufferToRecordBatch;
49
49
  exports.fromRecordBatchToBuffer = fromRecordBatchToBuffer;
50
50
  exports.fromTableToStreamBuffer = fromTableToStreamBuffer;
51
51
  exports.createEmptyTable = createEmptyTable;
52
+ exports.dataTypeToJson = dataTypeToJson;
52
53
  const apache_arrow_1 = require("apache-arrow");
53
54
  const registry_1 = require("./embedding/registry");
54
55
  const sanitize_1 = require("./sanitize");
@@ -126,6 +127,22 @@ class VectorColumnOptions {
126
127
  }
127
128
  }
128
129
  exports.VectorColumnOptions = VectorColumnOptions;
130
+ // biome-ignore lint/suspicious/noExplicitAny: skip
131
+ function vectorFromArray(data, type) {
132
+ // Workaround for: https://github.com/apache/arrow/issues/45862
133
+ // If FSL type with float
134
+ if (apache_arrow_1.DataType.isFixedSizeList(type) && apache_arrow_1.DataType.isFloat(type.valueType)) {
135
+ const extendedData = [...data, new Array(type.listSize).fill(0.0)];
136
+ const array = (0, apache_arrow_1.vectorFromArray)(extendedData, type);
137
+ return array.slice(0, data.length);
138
+ }
139
+ else if (type === undefined) {
140
+ return (0, apache_arrow_1.vectorFromArray)(data);
141
+ }
142
+ else {
143
+ return (0, apache_arrow_1.vectorFromArray)(data, type);
144
+ }
145
+ }
129
146
  /** Options to control the makeArrowTable call. */
130
147
  class MakeArrowTableOptions {
131
148
  /*
@@ -618,7 +635,7 @@ function makeVector(values, type, stringAsDictionary) {
618
635
  });
619
636
  }
620
637
  }
621
- return (0, apache_arrow_1.vectorFromArray)(values, type);
638
+ return vectorFromArray(values, type);
622
639
  }
623
640
  if (values.length === 0) {
624
641
  throw Error("makeVector requires at least one value or the type must be specfied");
@@ -633,17 +650,17 @@ function makeVector(values, type, stringAsDictionary) {
633
650
  }
634
651
  else if (Buffer.isBuffer(sampleValue)) {
635
652
  // Default Arrow inference doesn't handle Buffer
636
- return (0, apache_arrow_1.vectorFromArray)(values, new apache_arrow_1.Binary());
653
+ return vectorFromArray(values, new apache_arrow_1.Binary());
637
654
  }
638
655
  else if (!(stringAsDictionary ?? false) &&
639
656
  (typeof sampleValue === "string" || sampleValue instanceof String)) {
640
657
  // If the type is string then don't use Arrow's default inference unless dictionaries are requested
641
658
  // because it will always use dictionary encoding for strings
642
- return (0, apache_arrow_1.vectorFromArray)(values, new apache_arrow_1.Utf8());
659
+ return vectorFromArray(values, new apache_arrow_1.Utf8());
643
660
  }
644
661
  else {
645
662
  // Convert a JS array of values to an arrow vector
646
- return (0, apache_arrow_1.vectorFromArray)(values);
663
+ return vectorFromArray(values);
647
664
  }
648
665
  }
649
666
  /** Helper function to apply embeddings from metadata to an input table */
@@ -966,3 +983,122 @@ function validateSchemaEmbeddings(schema, data, embeddings) {
966
983
  }
967
984
  return new apache_arrow_1.Schema(fields, schema.metadata);
968
985
  }
986
+ // Matches format of https://github.com/lancedb/lance/blob/main/rust/lance/src/arrow/json.rs
987
+ function dataTypeToJson(dataType) {
988
+ switch (dataType.typeId) {
989
+ // For primitives, matches https://github.com/lancedb/lance/blob/e12bb9eff2a52f753668d4b62c52e4d72b10d294/rust/lance-core/src/datatypes.rs#L185
990
+ case apache_arrow_1.Type.Null:
991
+ return { type: "null" };
992
+ case apache_arrow_1.Type.Bool:
993
+ return { type: "bool" };
994
+ case apache_arrow_1.Type.Int8:
995
+ return { type: "int8" };
996
+ case apache_arrow_1.Type.Int16:
997
+ return { type: "int16" };
998
+ case apache_arrow_1.Type.Int32:
999
+ return { type: "int32" };
1000
+ case apache_arrow_1.Type.Int64:
1001
+ return { type: "int64" };
1002
+ case apache_arrow_1.Type.Uint8:
1003
+ return { type: "uint8" };
1004
+ case apache_arrow_1.Type.Uint16:
1005
+ return { type: "uint16" };
1006
+ case apache_arrow_1.Type.Uint32:
1007
+ return { type: "uint32" };
1008
+ case apache_arrow_1.Type.Uint64:
1009
+ return { type: "uint64" };
1010
+ case apache_arrow_1.Type.Int: {
1011
+ const bitWidth = dataType.bitWidth;
1012
+ const signed = dataType.isSigned;
1013
+ const prefix = signed ? "" : "u";
1014
+ return { type: `${prefix}int${bitWidth}` };
1015
+ }
1016
+ case apache_arrow_1.Type.Float: {
1017
+ switch (dataType.precision) {
1018
+ case apache_arrow_1.Precision.HALF:
1019
+ return { type: "halffloat" };
1020
+ case apache_arrow_1.Precision.SINGLE:
1021
+ return { type: "float" };
1022
+ case apache_arrow_1.Precision.DOUBLE:
1023
+ return { type: "double" };
1024
+ }
1025
+ throw Error("Unsupported float precision");
1026
+ }
1027
+ case apache_arrow_1.Type.Float16:
1028
+ return { type: "halffloat" };
1029
+ case apache_arrow_1.Type.Float32:
1030
+ return { type: "float" };
1031
+ case apache_arrow_1.Type.Float64:
1032
+ return { type: "double" };
1033
+ case apache_arrow_1.Type.Utf8:
1034
+ return { type: "string" };
1035
+ case apache_arrow_1.Type.Binary:
1036
+ return { type: "binary" };
1037
+ case apache_arrow_1.Type.LargeUtf8:
1038
+ return { type: "large_string" };
1039
+ case apache_arrow_1.Type.LargeBinary:
1040
+ return { type: "large_binary" };
1041
+ case apache_arrow_1.Type.List:
1042
+ return {
1043
+ type: "list",
1044
+ fields: [fieldToJson(dataType.children[0])],
1045
+ };
1046
+ case apache_arrow_1.Type.FixedSizeList: {
1047
+ const fixedSizeList = dataType;
1048
+ return {
1049
+ type: "fixed_size_list",
1050
+ fields: [fieldToJson(fixedSizeList.children[0])],
1051
+ length: fixedSizeList.listSize,
1052
+ };
1053
+ }
1054
+ case apache_arrow_1.Type.Struct:
1055
+ return {
1056
+ type: "struct",
1057
+ fields: dataType.children.map(fieldToJson),
1058
+ };
1059
+ case apache_arrow_1.Type.Date: {
1060
+ const unit = dataType.unit;
1061
+ return {
1062
+ type: unit === apache_arrow_1.DateUnit.DAY ? "date32:day" : "date64:ms",
1063
+ };
1064
+ }
1065
+ case apache_arrow_1.Type.Timestamp: {
1066
+ const timestamp = dataType;
1067
+ const timezone = timestamp.timezone || "-";
1068
+ return {
1069
+ type: `timestamp:${timestamp.unit}:${timezone}`,
1070
+ };
1071
+ }
1072
+ case apache_arrow_1.Type.Decimal: {
1073
+ const decimal = dataType;
1074
+ return {
1075
+ type: `decimal:${decimal.bitWidth}:${decimal.precision}:${decimal.scale}`,
1076
+ };
1077
+ }
1078
+ case apache_arrow_1.Type.Duration: {
1079
+ const duration = dataType;
1080
+ return { type: `duration:${duration.unit}` };
1081
+ }
1082
+ case apache_arrow_1.Type.FixedSizeBinary: {
1083
+ const byteWidth = dataType.byteWidth;
1084
+ return { type: `fixed_size_binary:${byteWidth}` };
1085
+ }
1086
+ case apache_arrow_1.Type.Dictionary: {
1087
+ const dict = dataType;
1088
+ const indexType = dataTypeToJson(dict.indices);
1089
+ const valueType = dataTypeToJson(dict.valueType);
1090
+ return {
1091
+ type: `dict:${valueType.type}:${indexType.type}:false`,
1092
+ };
1093
+ }
1094
+ }
1095
+ throw new Error("Unsupported data type");
1096
+ }
1097
+ function fieldToJson(field) {
1098
+ return {
1099
+ name: field.name,
1100
+ type: dataTypeToJson(field.type),
1101
+ nullable: field.nullable,
1102
+ metadata: field.metadata,
1103
+ };
1104
+ }
package/dist/index.d.ts CHANGED
@@ -1,16 +1,16 @@
1
1
  import { Connection } from "./connection";
2
2
  import { ConnectionOptions } from "./native.js";
3
- export { AddColumnsSql, ColumnAlteration, ConnectionOptions, IndexStatistics, IndexConfig, ClientConfig, TimeoutConfig, RetryConfig, OptimizeStats, CompactionStats, RemovalStats, } from "./native.js";
3
+ export { AddColumnsSql, ConnectionOptions, IndexStatistics, IndexConfig, ClientConfig, TimeoutConfig, RetryConfig, OptimizeStats, CompactionStats, RemovalStats, } from "./native.js";
4
4
  export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } from "./arrow";
5
5
  export { Connection, CreateTableOptions, TableNamesOptions, OpenTableOptions, } from "./connection";
6
6
  export { ExecutableQuery, Query, QueryBase, VectorQuery, QueryExecutionOptions, FullTextSearchOptions, RecordBatchIterator, } from "./query";
7
- export { Index, IndexOptions, IvfPqOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
8
- export { Table, AddDataOptions, UpdateOptions, OptimizeOptions, Version, } from "./table";
7
+ export { Index, IndexOptions, IvfPqOptions, IvfFlatOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
8
+ export { Table, AddDataOptions, UpdateOptions, OptimizeOptions, Version, ColumnAlteration, } from "./table";
9
9
  export { MergeInsertBuilder } from "./merge";
10
10
  export * as embedding from "./embedding";
11
11
  export * as rerankers from "./rerankers";
12
12
  export { SchemaLike, TableLike, FieldLike, RecordBatchLike, DataLike, IntoVector, } from "./arrow";
13
- export { IntoSql } from "./util";
13
+ export { IntoSql, packBits } from "./util";
14
14
  /**
15
15
  * Connect to a LanceDB instance at the given URI.
16
16
  *
package/dist/index.js CHANGED
@@ -2,7 +2,7 @@
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
  // SPDX-FileCopyrightText: Copyright The LanceDB Authors
4
4
  Object.defineProperty(exports, "__esModule", { value: true });
5
- exports.rerankers = exports.embedding = exports.MergeInsertBuilder = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
5
+ exports.packBits = exports.rerankers = exports.embedding = exports.MergeInsertBuilder = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
6
6
  exports.connect = connect;
7
7
  const connection_1 = require("./connection");
8
8
  const native_js_1 = require("./native.js");
@@ -25,6 +25,8 @@ var merge_1 = require("./merge");
25
25
  Object.defineProperty(exports, "MergeInsertBuilder", { enumerable: true, get: function () { return merge_1.MergeInsertBuilder; } });
26
26
  exports.embedding = require("./embedding");
27
27
  exports.rerankers = require("./rerankers");
28
+ var util_1 = require("./util");
29
+ Object.defineProperty(exports, "packBits", { enumerable: true, get: function () { return util_1.packBits; } });
28
30
  async function connect(uriOrOptions, options = {}) {
29
31
  let uri;
30
32
  if (typeof uriOrOptions !== "string") {
package/dist/indices.d.ts CHANGED
@@ -54,13 +54,13 @@ export interface IvfPqOptions {
54
54
  *
55
55
  * "l2" - Euclidean distance. This is a very common distance metric that
56
56
  * accounts for both magnitude and direction when determining the distance
57
- * between vectors. L2 distance has a range of [0, ∞).
57
+ * between vectors. l2 distance has a range of [0, ∞).
58
58
  *
59
59
  * "cosine" - Cosine distance. Cosine distance is a distance metric
60
60
  * calculated from the cosine similarity between two vectors. Cosine
61
61
  * similarity is a measure of similarity between two non-zero vectors of an
62
62
  * inner product space. It is defined to equal the cosine of the angle
63
- * between them. Unlike L2, the cosine distance is not affected by the
63
+ * between them. Unlike l2, the cosine distance is not affected by the
64
64
  * magnitude of the vectors. Cosine distance has a range of [0, 2].
65
65
  *
66
66
  * Note: the cosine distance is undefined when one (or both) of the vectors
@@ -69,7 +69,7 @@ export interface IvfPqOptions {
69
69
  *
70
70
  * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
71
71
  * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
72
- * L2 norm is 1), then dot distance is equivalent to the cosine distance.
72
+ * l2 norm is 1), then dot distance is equivalent to the cosine distance.
73
73
  */
74
74
  distanceType?: "l2" | "cosine" | "dot";
75
75
  /**
@@ -114,18 +114,18 @@ export interface HnswPqOptions {
114
114
  *
115
115
  * "l2" - Euclidean distance. This is a very common distance metric that
116
116
  * accounts for both magnitude and direction when determining the distance
117
- * between vectors. L2 distance has a range of [0, ∞).
117
+ * between vectors. l2 distance has a range of [0, ∞).
118
118
  *
119
119
  * "cosine" - Cosine distance. Cosine distance is a distance metric
120
120
  * calculated from the cosine similarity between two vectors. Cosine
121
121
  * similarity is a measure of similarity between two non-zero vectors of an
122
122
  * inner product space. It is defined to equal the cosine of the angle
123
- * between them. Unlike L2, the cosine distance is not affected by the
123
+ * between them. Unlike l2, the cosine distance is not affected by the
124
124
  * magnitude of the vectors. Cosine distance has a range of [0, 2].
125
125
  *
126
126
  * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
127
127
  * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
128
- * L2 norm is 1), then dot distance is equivalent to the cosine distance.
128
+ * l2 norm is 1), then dot distance is equivalent to the cosine distance.
129
129
  */
130
130
  distanceType?: "l2" | "cosine" | "dot";
131
131
  /**
@@ -223,18 +223,18 @@ export interface HnswSqOptions {
223
223
  *
224
224
  * "l2" - Euclidean distance. This is a very common distance metric that
225
225
  * accounts for both magnitude and direction when determining the distance
226
- * between vectors. L2 distance has a range of [0, ∞).
226
+ * between vectors. l2 distance has a range of [0, ∞).
227
227
  *
228
228
  * "cosine" - Cosine distance. Cosine distance is a distance metric
229
229
  * calculated from the cosine similarity between two vectors. Cosine
230
230
  * similarity is a measure of similarity between two non-zero vectors of an
231
231
  * inner product space. It is defined to equal the cosine of the angle
232
- * between them. Unlike L2, the cosine distance is not affected by the
232
+ * between them. Unlike l2, the cosine distance is not affected by the
233
233
  * magnitude of the vectors. Cosine distance has a range of [0, 2].
234
234
  *
235
235
  * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
236
236
  * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
237
- * L2 norm is 1), then dot distance is equivalent to the cosine distance.
237
+ * l2 norm is 1), then dot distance is equivalent to the cosine distance.
238
238
  */
239
239
  distanceType?: "l2" | "cosine" | "dot";
240
240
  /**
@@ -303,6 +303,90 @@ export interface HnswSqOptions {
303
303
  */
304
304
  efConstruction?: number;
305
305
  }
306
+ /**
307
+ * Options to create an `IVF_FLAT` index
308
+ */
309
+ export interface IvfFlatOptions {
310
+ /**
311
+ * The number of IVF partitions to create.
312
+ *
313
+ * This value should generally scale with the number of rows in the dataset.
314
+ * By default the number of partitions is the square root of the number of
315
+ * rows.
316
+ *
317
+ * If this value is too large then the first part of the search (picking the
318
+ * right partition) will be slow. If this value is too small then the second
319
+ * part of the search (searching within a partition) will be slow.
320
+ */
321
+ numPartitions?: number;
322
+ /**
323
+ * Distance type to use to build the index.
324
+ *
325
+ * Default value is "l2".
326
+ *
327
+ * This is used when training the index to calculate the IVF partitions
328
+ * (vectors are grouped in partitions with similar vectors according to this
329
+ * distance type).
330
+ *
331
+ * The distance type used to train an index MUST match the distance type used
332
+ * to search the index. Failure to do so will yield inaccurate results.
333
+ *
334
+ * The following distance types are available:
335
+ *
336
+ * "l2" - Euclidean distance. This is a very common distance metric that
337
+ * accounts for both magnitude and direction when determining the distance
338
+ * between vectors. l2 distance has a range of [0, ∞).
339
+ *
340
+ * "cosine" - Cosine distance. Cosine distance is a distance metric
341
+ * calculated from the cosine similarity between two vectors. Cosine
342
+ * similarity is a measure of similarity between two non-zero vectors of an
343
+ * inner product space. It is defined to equal the cosine of the angle
344
+ * between them. Unlike l2, the cosine distance is not affected by the
345
+ * magnitude of the vectors. Cosine distance has a range of [0, 2].
346
+ *
347
+ * Note: the cosine distance is undefined when one (or both) of the vectors
348
+ * are all zeros (there is no direction). These vectors are invalid and may
349
+ * never be returned from a vector search.
350
+ *
351
+ * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
352
+ * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
353
+ * l2 norm is 1), then dot distance is equivalent to the cosine distance.
354
+ *
355
+ * "hamming" - Hamming distance. Hamming distance is a distance metric
356
+ * calculated from the number of bits that are different between two vectors.
357
+ * Hamming distance has a range of [0, dimension]. Note that the hamming distance
358
+ * is only valid for binary vectors.
359
+ */
360
+ distanceType?: "l2" | "cosine" | "dot" | "hamming";
361
+ /**
362
+ * Max iteration to train IVF kmeans.
363
+ *
364
+ * When training an IVF FLAT index we use kmeans to calculate the partitions. This parameter
365
+ * controls how many iterations of kmeans to run.
366
+ *
367
+ * Increasing this might improve the quality of the index but in most cases these extra
368
+ * iterations have diminishing returns.
369
+ *
370
+ * The default value is 50.
371
+ */
372
+ maxIterations?: number;
373
+ /**
374
+ * The number of vectors, per partition, to sample when training IVF kmeans.
375
+ *
376
+ * When an IVF FLAT index is trained, we need to calculate partitions. These are groups
377
+ * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
378
+ *
379
+ * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
380
+ * random sample of the data. This parameter controls the size of the sample. The total
381
+ * number of vectors used to train the index is `sample_rate * num_partitions`.
382
+ *
383
+ * Increasing this value might improve the quality of the index but in most cases the
384
+ * default should be sufficient.
385
+ *
386
+ * The default value is 256.
387
+ */
388
+ sampleRate?: number;
389
+ }
306
390
  /**
307
391
  * Options to create a full text search index
308
392
  */
@@ -380,6 +464,23 @@ export declare class Index {
380
464
  * currently is also a memory intensive operation.
381
465
  */
382
466
  static ivfPq(options?: Partial<IvfPqOptions>): Index;
467
+ /**
468
+ * Create an IvfFlat index
469
+ *
470
+ * This index groups vectors into partitions of similar vectors. Each partition keeps track of
471
+ * a centroid which is the average value of all vectors in the group.
472
+ *
473
+ * During a query the centroids are compared with the query vector to find the closest
474
+ * partitions. The vectors in these partitions are then searched to find
475
+ * the closest vectors.
476
+ *
477
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
478
+ * many groups to create.
479
+ *
480
+ * Note that training an IVF FLAT index on a large dataset is a slow operation and
481
+ * currently is also a memory intensive operation.
482
+ */
483
+ static ivfFlat(options?: Partial<IvfFlatOptions>): Index;
383
484
  /**
384
485
  * Create a btree index
385
486
  *
package/dist/indices.js CHANGED
@@ -34,6 +34,25 @@ class Index {
34
34
  static ivfPq(options) {
35
35
  return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate));
36
36
  }
37
+ /**
38
+ * Create an IvfFlat index
39
+ *
40
+ * This index groups vectors into partitions of similar vectors. Each partition keeps track of
41
+ * a centroid which is the average value of all vectors in the group.
42
+ *
43
+ * During a query the centroids are compared with the query vector to find the closest
44
+ * partitions. The vectors in these partitions are then searched to find
45
+ * the closest vectors.
46
+ *
47
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
48
+ * many groups to create.
49
+ *
50
+ * Note that training an IVF FLAT index on a large dataset is a slow operation and
51
+ * currently is also a memory intensive operation.
52
+ */
53
+ static ivfFlat(options) {
54
+ return new Index(native_1.Index.ivfFlat(options?.distanceType, options?.numPartitions, options?.maxIterations, options?.sampleRate));
55
+ }
37
56
  /**
38
57
  * Create a btree index
39
58
  *
package/dist/native.d.ts CHANGED
@@ -185,6 +185,11 @@ export interface IndexStatistics {
185
185
  distanceType?: string
186
186
  /** The number of parts this index is split into. */
187
187
  numIndices?: number
188
+ /**
189
+ * The KMeans loss value of the index,
190
+ * it is only present for vector indices.
191
+ */
192
+ loss?: number
188
193
  }
189
194
  export interface Version {
190
195
  version: number
@@ -257,6 +262,7 @@ export class Connection {
257
262
  }
258
263
  export class Index {
259
264
  static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
265
+ static ivfFlat(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
260
266
  static btree(): Index
261
267
  static bitmap(): Index
262
268
  static labelList(): Index
package/dist/table.d.ts CHANGED
@@ -1,7 +1,7 @@
1
- import { Table as ArrowTable, Data, IntoVector, Schema } from "./arrow";
1
+ import { Table as ArrowTable, Data, DataType, IntoVector, Schema } from "./arrow";
2
2
  import { IndexOptions } from "./indices";
3
3
  import { MergeInsertBuilder } from "./merge";
4
- import { AddColumnsSql, ColumnAlteration, IndexConfig, IndexStatistics, OptimizeStats, Table as _NativeTable } from "./native";
4
+ import { AddColumnsSql, IndexConfig, IndexStatistics, OptimizeStats, Table as _NativeTable } from "./native";
5
5
  import { Query, VectorQuery } from "./query";
6
6
  import { IntoSql } from "./util";
7
7
  export { IndexConfig } from "./native";
@@ -439,3 +439,37 @@ export declare class LocalTable extends Table {
439
439
  */
440
440
  migrateManifestPathsV2(): Promise<void>;
441
441
  }
442
+ /**
443
+ * A definition of a column alteration. The alteration changes the column at
444
+ * `path` to have the new name `name`, to be nullable if `nullable` is true,
445
+ * and to have the data type `data_type`. At least one of `rename` or `nullable`
446
+ * must be provided.
447
+ */
448
+ export interface ColumnAlteration {
449
+ /**
450
+ * The path to the column to alter. This is a dot-separated path to the column.
451
+ * If it is a top-level column then it is just the name of the column. If it is
452
+ * a nested column then it is the path to the column, e.g. "a.b.c" for a column
453
+ * `c` nested inside a column `b` nested inside a column `a`.
454
+ */
455
+ path: string;
456
+ /**
457
+ * The new name of the column. If not provided then the name will not be changed.
458
+ * This must be distinct from the names of all other columns in the table.
459
+ */
460
+ rename?: string;
461
+ /**
462
+ * A new data type for the column. If not provided then the data type will not be changed.
463
+ * Changing data types is limited to casting to the same general type. For example, these
464
+ * changes are valid:
465
+ * * `int32` -> `int64` (integers)
466
+ * * `double` -> `float` (floats)
467
+ * * `string` -> `large_string` (strings)
468
+ * But these changes are not:
469
+ * * `int32` -> `double` (mix integers and floats)
470
+ * * `string` -> `int32` (mix strings and integers)
471
+ */
472
+ dataType?: string | DataType;
473
+ /** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
474
+ nullable?: boolean;
475
+ }
package/dist/table.js CHANGED
@@ -7,6 +7,7 @@ const arrow_1 = require("./arrow");
7
7
  const registry_1 = require("./embedding/registry");
8
8
  const merge_1 = require("./merge");
9
9
  const query_1 = require("./query");
10
+ const sanitize_1 = require("./sanitize");
10
11
  const util_1 = require("./util");
11
12
  /**
12
13
  * A Table is a collection of Records in a LanceDB Database.
@@ -170,7 +171,28 @@ class LocalTable extends Table {
170
171
  await this.inner.addColumns(newColumnTransforms);
171
172
  }
172
173
  async alterColumns(columnAlterations) {
173
- await this.inner.alterColumns(columnAlterations);
174
+ const processedAlterations = columnAlterations.map((alteration) => {
175
+ if (typeof alteration.dataType === "string") {
176
+ return {
177
+ ...alteration,
178
+ dataType: JSON.stringify({ type: alteration.dataType }),
179
+ };
180
+ }
181
+ else if (alteration.dataType === undefined) {
182
+ return {
183
+ ...alteration,
184
+ dataType: undefined,
185
+ };
186
+ }
187
+ else {
188
+ const dataType = (0, sanitize_1.sanitizeType)(alteration.dataType);
189
+ return {
190
+ ...alteration,
191
+ dataType: JSON.stringify((0, arrow_1.dataTypeToJson)(dataType)),
192
+ };
193
+ }
194
+ });
195
+ await this.inner.alterColumns(processedAlterations);
174
196
  }
175
197
  async dropColumns(columnNames) {
176
198
  await this.inner.dropColumns(columnNames);
package/dist/util.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  export type IntoSql = string | number | boolean | null | Date | ArrayBufferLike | Buffer | IntoSql[];
2
2
  export declare function toSQL(value: IntoSql): string;
3
+ export declare function packBits(data: Array<number>): Array<number>;
3
4
  export declare class TTLCache {
4
5
  private readonly ttl;
5
6
  private readonly cache;
package/dist/util.js CHANGED
@@ -4,6 +4,7 @@
4
4
  Object.defineProperty(exports, "__esModule", { value: true });
5
5
  exports.TTLCache = void 0;
6
6
  exports.toSQL = toSQL;
7
+ exports.packBits = packBits;
7
8
  function toSQL(value) {
8
9
  if (typeof value === "string") {
9
10
  return `'${value.replace(/'/g, "''")}'`;
@@ -33,6 +34,15 @@ function toSQL(value) {
33
34
  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`);
34
35
  }
35
36
  }
37
+ function packBits(data) {
38
+ const packed = Array(data.length >> 3).fill(0);
39
+ for (let i = 0; i < data.length; i++) {
40
+ const byte = i >> 3;
41
+ const bit = i & 7;
42
+ packed[byte] |= data[i] << bit;
43
+ }
44
+ return packed;
45
+ }
36
46
  class TTLCache {
37
47
  ttl;
38
48
  // biome-ignore lint/suspicious/noExplicitAny: <explanation>
package/package.json CHANGED
@@ -11,7 +11,7 @@
11
11
  "ann"
12
12
  ],
13
13
  "private": false,
14
- "version": "0.18.0",
14
+ "version": "0.18.2-beta.0",
15
15
  "main": "dist/index.js",
16
16
  "exports": {
17
17
  ".": "./dist/index.js",
@@ -29,7 +29,6 @@
29
29
  "aarch64-apple-darwin",
30
30
  "x86_64-unknown-linux-gnu",
31
31
  "aarch64-unknown-linux-gnu",
32
- "x86_64-unknown-linux-musl",
33
32
  "aarch64-unknown-linux-musl",
34
33
  "x86_64-pc-windows-msvc",
35
34
  "aarch64-pc-windows-msvc"
@@ -81,8 +80,10 @@
81
80
  "artifacts": "napi artifacts",
82
81
  "build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
83
82
  "build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
84
- "build": "npm run build:debug && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts && shx cp lancedb/*.node dist/",
85
- "build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
83
+ "build": "npm run build:debug && npm run tsc && shx cp lancedb/*.node dist/",
84
+ "build-release": "npm run build:release && npm run tsc",
85
+ "tsc": "tsc -b",
86
+ "posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",
86
87
  "lint-ci": "biome ci .",
87
88
  "docs": "typedoc --plugin typedoc-plugin-markdown --treatWarningsAsErrors --out ../docs/src/js lancedb/index.ts",
88
89
  "postdocs": "node typedoc_post_process.js",
@@ -98,14 +99,13 @@
98
99
  "reflect-metadata": "^0.2.2"
99
100
  },
100
101
  "optionalDependencies": {
101
- "@lancedb/lancedb-darwin-x64": "0.18.0",
102
- "@lancedb/lancedb-darwin-arm64": "0.18.0",
103
- "@lancedb/lancedb-linux-x64-gnu": "0.18.0",
104
- "@lancedb/lancedb-linux-arm64-gnu": "0.18.0",
105
- "@lancedb/lancedb-linux-x64-musl": "0.18.0",
106
- "@lancedb/lancedb-linux-arm64-musl": "0.18.0",
107
- "@lancedb/lancedb-win32-x64-msvc": "0.18.0",
108
- "@lancedb/lancedb-win32-arm64-msvc": "0.18.0"
102
+ "@lancedb/lancedb-darwin-x64": "0.18.2-beta.0",
103
+ "@lancedb/lancedb-darwin-arm64": "0.18.2-beta.0",
104
+ "@lancedb/lancedb-linux-x64-gnu": "0.18.2-beta.0",
105
+ "@lancedb/lancedb-linux-arm64-gnu": "0.18.2-beta.0",
106
+ "@lancedb/lancedb-linux-arm64-musl": "0.18.2-beta.0",
107
+ "@lancedb/lancedb-win32-x64-msvc": "0.18.2-beta.0",
108
+ "@lancedb/lancedb-win32-arm64-msvc": "0.18.2-beta.0"
109
109
  },
110
110
  "peerDependencies": {
111
111
  "apache-arrow": ">=15.0.0 <=18.1.0"