@lancedb/lancedb 0.18.1 → 0.18.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -4
- package/dist/arrow.d.ts +13 -1
- package/dist/arrow.js +140 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +3 -1
- package/dist/indices.d.ts +110 -9
- package/dist/indices.js +19 -0
- package/dist/native.d.ts +6 -0
- package/dist/table.d.ts +36 -2
- package/dist/table.js +23 -1
- package/dist/util.d.ts +1 -0
- package/dist/util.js +10 -0
- package/package.json +12 -12
- package/typedoc_post_process.js +0 -68
package/README.md
CHANGED
|
@@ -11,11 +11,9 @@ npm install @lancedb/lancedb
|
|
|
11
11
|
This will download the appropriate native library for your platform. We currently
|
|
12
12
|
support:
|
|
13
13
|
|
|
14
|
-
- Linux (x86_64 and aarch64)
|
|
14
|
+
- Linux (x86_64 and aarch64 on glibc and musl)
|
|
15
15
|
- MacOS (Intel and ARM/M1/M2)
|
|
16
|
-
- Windows (x86_64
|
|
17
|
-
|
|
18
|
-
We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
|
|
16
|
+
- Windows (x86_64 and aarch64)
|
|
19
17
|
|
|
20
18
|
## Usage
|
|
21
19
|
|
package/dist/arrow.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Table as ArrowTable, Binary, BufferType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
|
|
1
|
+
import { Table as ArrowTable, Binary, BufferType, DataType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
|
|
2
2
|
import { Buffers } from "apache-arrow/data";
|
|
3
3
|
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
|
4
4
|
import { EmbeddingFunctionConfig } from "./embedding/registry";
|
|
@@ -257,3 +257,15 @@ export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?:
|
|
|
257
257
|
* Create an empty table with the given schema
|
|
258
258
|
*/
|
|
259
259
|
export declare function createEmptyTable(schema: Schema): ArrowTable;
|
|
260
|
+
interface JsonDataType {
|
|
261
|
+
type: string;
|
|
262
|
+
fields?: JsonField[];
|
|
263
|
+
length?: number;
|
|
264
|
+
}
|
|
265
|
+
interface JsonField {
|
|
266
|
+
name: string;
|
|
267
|
+
type: JsonDataType;
|
|
268
|
+
nullable: boolean;
|
|
269
|
+
metadata: Map<string, string>;
|
|
270
|
+
}
|
|
271
|
+
export declare function dataTypeToJson(dataType: DataType): JsonDataType;
|
package/dist/arrow.js
CHANGED
|
@@ -49,6 +49,7 @@ exports.fromBufferToRecordBatch = fromBufferToRecordBatch;
|
|
|
49
49
|
exports.fromRecordBatchToBuffer = fromRecordBatchToBuffer;
|
|
50
50
|
exports.fromTableToStreamBuffer = fromTableToStreamBuffer;
|
|
51
51
|
exports.createEmptyTable = createEmptyTable;
|
|
52
|
+
exports.dataTypeToJson = dataTypeToJson;
|
|
52
53
|
const apache_arrow_1 = require("apache-arrow");
|
|
53
54
|
const registry_1 = require("./embedding/registry");
|
|
54
55
|
const sanitize_1 = require("./sanitize");
|
|
@@ -126,6 +127,22 @@ class VectorColumnOptions {
|
|
|
126
127
|
}
|
|
127
128
|
}
|
|
128
129
|
exports.VectorColumnOptions = VectorColumnOptions;
|
|
130
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
131
|
+
function vectorFromArray(data, type) {
|
|
132
|
+
// Workaround for: https://github.com/apache/arrow/issues/45862
|
|
133
|
+
// If FSL type with float
|
|
134
|
+
if (apache_arrow_1.DataType.isFixedSizeList(type) && apache_arrow_1.DataType.isFloat(type.valueType)) {
|
|
135
|
+
const extendedData = [...data, new Array(type.listSize).fill(0.0)];
|
|
136
|
+
const array = (0, apache_arrow_1.vectorFromArray)(extendedData, type);
|
|
137
|
+
return array.slice(0, data.length);
|
|
138
|
+
}
|
|
139
|
+
else if (type === undefined) {
|
|
140
|
+
return (0, apache_arrow_1.vectorFromArray)(data);
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
return (0, apache_arrow_1.vectorFromArray)(data, type);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
129
146
|
/** Options to control the makeArrowTable call. */
|
|
130
147
|
class MakeArrowTableOptions {
|
|
131
148
|
/*
|
|
@@ -618,7 +635,7 @@ function makeVector(values, type, stringAsDictionary) {
|
|
|
618
635
|
});
|
|
619
636
|
}
|
|
620
637
|
}
|
|
621
|
-
return
|
|
638
|
+
return vectorFromArray(values, type);
|
|
622
639
|
}
|
|
623
640
|
if (values.length === 0) {
|
|
624
641
|
throw Error("makeVector requires at least one value or the type must be specfied");
|
|
@@ -633,17 +650,17 @@ function makeVector(values, type, stringAsDictionary) {
|
|
|
633
650
|
}
|
|
634
651
|
else if (Buffer.isBuffer(sampleValue)) {
|
|
635
652
|
// Default Arrow inference doesn't handle Buffer
|
|
636
|
-
return
|
|
653
|
+
return vectorFromArray(values, new apache_arrow_1.Binary());
|
|
637
654
|
}
|
|
638
655
|
else if (!(stringAsDictionary ?? false) &&
|
|
639
656
|
(typeof sampleValue === "string" || sampleValue instanceof String)) {
|
|
640
657
|
// If the type is string then don't use Arrow's default inference unless dictionaries are requested
|
|
641
658
|
// because it will always use dictionary encoding for strings
|
|
642
|
-
return
|
|
659
|
+
return vectorFromArray(values, new apache_arrow_1.Utf8());
|
|
643
660
|
}
|
|
644
661
|
else {
|
|
645
662
|
// Convert a JS array of values to an arrow vector
|
|
646
|
-
return
|
|
663
|
+
return vectorFromArray(values);
|
|
647
664
|
}
|
|
648
665
|
}
|
|
649
666
|
/** Helper function to apply embeddings from metadata to an input table */
|
|
@@ -966,3 +983,122 @@ function validateSchemaEmbeddings(schema, data, embeddings) {
|
|
|
966
983
|
}
|
|
967
984
|
return new apache_arrow_1.Schema(fields, schema.metadata);
|
|
968
985
|
}
|
|
986
|
+
// Matches format of https://github.com/lancedb/lance/blob/main/rust/lance/src/arrow/json.rs
|
|
987
|
+
function dataTypeToJson(dataType) {
|
|
988
|
+
switch (dataType.typeId) {
|
|
989
|
+
// For primitives, matches https://github.com/lancedb/lance/blob/e12bb9eff2a52f753668d4b62c52e4d72b10d294/rust/lance-core/src/datatypes.rs#L185
|
|
990
|
+
case apache_arrow_1.Type.Null:
|
|
991
|
+
return { type: "null" };
|
|
992
|
+
case apache_arrow_1.Type.Bool:
|
|
993
|
+
return { type: "bool" };
|
|
994
|
+
case apache_arrow_1.Type.Int8:
|
|
995
|
+
return { type: "int8" };
|
|
996
|
+
case apache_arrow_1.Type.Int16:
|
|
997
|
+
return { type: "int16" };
|
|
998
|
+
case apache_arrow_1.Type.Int32:
|
|
999
|
+
return { type: "int32" };
|
|
1000
|
+
case apache_arrow_1.Type.Int64:
|
|
1001
|
+
return { type: "int64" };
|
|
1002
|
+
case apache_arrow_1.Type.Uint8:
|
|
1003
|
+
return { type: "uint8" };
|
|
1004
|
+
case apache_arrow_1.Type.Uint16:
|
|
1005
|
+
return { type: "uint16" };
|
|
1006
|
+
case apache_arrow_1.Type.Uint32:
|
|
1007
|
+
return { type: "uint32" };
|
|
1008
|
+
case apache_arrow_1.Type.Uint64:
|
|
1009
|
+
return { type: "uint64" };
|
|
1010
|
+
case apache_arrow_1.Type.Int: {
|
|
1011
|
+
const bitWidth = dataType.bitWidth;
|
|
1012
|
+
const signed = dataType.isSigned;
|
|
1013
|
+
const prefix = signed ? "" : "u";
|
|
1014
|
+
return { type: `${prefix}int${bitWidth}` };
|
|
1015
|
+
}
|
|
1016
|
+
case apache_arrow_1.Type.Float: {
|
|
1017
|
+
switch (dataType.precision) {
|
|
1018
|
+
case apache_arrow_1.Precision.HALF:
|
|
1019
|
+
return { type: "halffloat" };
|
|
1020
|
+
case apache_arrow_1.Precision.SINGLE:
|
|
1021
|
+
return { type: "float" };
|
|
1022
|
+
case apache_arrow_1.Precision.DOUBLE:
|
|
1023
|
+
return { type: "double" };
|
|
1024
|
+
}
|
|
1025
|
+
throw Error("Unsupported float precision");
|
|
1026
|
+
}
|
|
1027
|
+
case apache_arrow_1.Type.Float16:
|
|
1028
|
+
return { type: "halffloat" };
|
|
1029
|
+
case apache_arrow_1.Type.Float32:
|
|
1030
|
+
return { type: "float" };
|
|
1031
|
+
case apache_arrow_1.Type.Float64:
|
|
1032
|
+
return { type: "double" };
|
|
1033
|
+
case apache_arrow_1.Type.Utf8:
|
|
1034
|
+
return { type: "string" };
|
|
1035
|
+
case apache_arrow_1.Type.Binary:
|
|
1036
|
+
return { type: "binary" };
|
|
1037
|
+
case apache_arrow_1.Type.LargeUtf8:
|
|
1038
|
+
return { type: "large_string" };
|
|
1039
|
+
case apache_arrow_1.Type.LargeBinary:
|
|
1040
|
+
return { type: "large_binary" };
|
|
1041
|
+
case apache_arrow_1.Type.List:
|
|
1042
|
+
return {
|
|
1043
|
+
type: "list",
|
|
1044
|
+
fields: [fieldToJson(dataType.children[0])],
|
|
1045
|
+
};
|
|
1046
|
+
case apache_arrow_1.Type.FixedSizeList: {
|
|
1047
|
+
const fixedSizeList = dataType;
|
|
1048
|
+
return {
|
|
1049
|
+
type: "fixed_size_list",
|
|
1050
|
+
fields: [fieldToJson(fixedSizeList.children[0])],
|
|
1051
|
+
length: fixedSizeList.listSize,
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
case apache_arrow_1.Type.Struct:
|
|
1055
|
+
return {
|
|
1056
|
+
type: "struct",
|
|
1057
|
+
fields: dataType.children.map(fieldToJson),
|
|
1058
|
+
};
|
|
1059
|
+
case apache_arrow_1.Type.Date: {
|
|
1060
|
+
const unit = dataType.unit;
|
|
1061
|
+
return {
|
|
1062
|
+
type: unit === apache_arrow_1.DateUnit.DAY ? "date32:day" : "date64:ms",
|
|
1063
|
+
};
|
|
1064
|
+
}
|
|
1065
|
+
case apache_arrow_1.Type.Timestamp: {
|
|
1066
|
+
const timestamp = dataType;
|
|
1067
|
+
const timezone = timestamp.timezone || "-";
|
|
1068
|
+
return {
|
|
1069
|
+
type: `timestamp:${timestamp.unit}:${timezone}`,
|
|
1070
|
+
};
|
|
1071
|
+
}
|
|
1072
|
+
case apache_arrow_1.Type.Decimal: {
|
|
1073
|
+
const decimal = dataType;
|
|
1074
|
+
return {
|
|
1075
|
+
type: `decimal:${decimal.bitWidth}:${decimal.precision}:${decimal.scale}`,
|
|
1076
|
+
};
|
|
1077
|
+
}
|
|
1078
|
+
case apache_arrow_1.Type.Duration: {
|
|
1079
|
+
const duration = dataType;
|
|
1080
|
+
return { type: `duration:${duration.unit}` };
|
|
1081
|
+
}
|
|
1082
|
+
case apache_arrow_1.Type.FixedSizeBinary: {
|
|
1083
|
+
const byteWidth = dataType.byteWidth;
|
|
1084
|
+
return { type: `fixed_size_binary:${byteWidth}` };
|
|
1085
|
+
}
|
|
1086
|
+
case apache_arrow_1.Type.Dictionary: {
|
|
1087
|
+
const dict = dataType;
|
|
1088
|
+
const indexType = dataTypeToJson(dict.indices);
|
|
1089
|
+
const valueType = dataTypeToJson(dict.valueType);
|
|
1090
|
+
return {
|
|
1091
|
+
type: `dict:${valueType.type}:${indexType.type}:false`,
|
|
1092
|
+
};
|
|
1093
|
+
}
|
|
1094
|
+
}
|
|
1095
|
+
throw new Error("Unsupported data type");
|
|
1096
|
+
}
|
|
1097
|
+
function fieldToJson(field) {
|
|
1098
|
+
return {
|
|
1099
|
+
name: field.name,
|
|
1100
|
+
type: dataTypeToJson(field.type),
|
|
1101
|
+
nullable: field.nullable,
|
|
1102
|
+
metadata: field.metadata,
|
|
1103
|
+
};
|
|
1104
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import { Connection } from "./connection";
|
|
2
2
|
import { ConnectionOptions } from "./native.js";
|
|
3
|
-
export { AddColumnsSql,
|
|
3
|
+
export { AddColumnsSql, ConnectionOptions, IndexStatistics, IndexConfig, ClientConfig, TimeoutConfig, RetryConfig, OptimizeStats, CompactionStats, RemovalStats, } from "./native.js";
|
|
4
4
|
export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } from "./arrow";
|
|
5
5
|
export { Connection, CreateTableOptions, TableNamesOptions, OpenTableOptions, } from "./connection";
|
|
6
6
|
export { ExecutableQuery, Query, QueryBase, VectorQuery, QueryExecutionOptions, FullTextSearchOptions, RecordBatchIterator, } from "./query";
|
|
7
|
-
export { Index, IndexOptions, IvfPqOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
|
|
8
|
-
export { Table, AddDataOptions, UpdateOptions, OptimizeOptions, Version, } from "./table";
|
|
7
|
+
export { Index, IndexOptions, IvfPqOptions, IvfFlatOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
|
|
8
|
+
export { Table, AddDataOptions, UpdateOptions, OptimizeOptions, Version, ColumnAlteration, } from "./table";
|
|
9
9
|
export { MergeInsertBuilder } from "./merge";
|
|
10
10
|
export * as embedding from "./embedding";
|
|
11
11
|
export * as rerankers from "./rerankers";
|
|
12
12
|
export { SchemaLike, TableLike, FieldLike, RecordBatchLike, DataLike, IntoVector, } from "./arrow";
|
|
13
|
-
export { IntoSql } from "./util";
|
|
13
|
+
export { IntoSql, packBits } from "./util";
|
|
14
14
|
/**
|
|
15
15
|
* Connect to a LanceDB instance at the given URI.
|
|
16
16
|
*
|
package/dist/index.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
4
4
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
-
exports.rerankers = exports.embedding = exports.MergeInsertBuilder = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
|
|
5
|
+
exports.packBits = exports.rerankers = exports.embedding = exports.MergeInsertBuilder = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
|
|
6
6
|
exports.connect = connect;
|
|
7
7
|
const connection_1 = require("./connection");
|
|
8
8
|
const native_js_1 = require("./native.js");
|
|
@@ -25,6 +25,8 @@ var merge_1 = require("./merge");
|
|
|
25
25
|
Object.defineProperty(exports, "MergeInsertBuilder", { enumerable: true, get: function () { return merge_1.MergeInsertBuilder; } });
|
|
26
26
|
exports.embedding = require("./embedding");
|
|
27
27
|
exports.rerankers = require("./rerankers");
|
|
28
|
+
var util_1 = require("./util");
|
|
29
|
+
Object.defineProperty(exports, "packBits", { enumerable: true, get: function () { return util_1.packBits; } });
|
|
28
30
|
async function connect(uriOrOptions, options = {}) {
|
|
29
31
|
let uri;
|
|
30
32
|
if (typeof uriOrOptions !== "string") {
|
package/dist/indices.d.ts
CHANGED
|
@@ -54,13 +54,13 @@ export interface IvfPqOptions {
|
|
|
54
54
|
*
|
|
55
55
|
* "l2" - Euclidean distance. This is a very common distance metric that
|
|
56
56
|
* accounts for both magnitude and direction when determining the distance
|
|
57
|
-
* between vectors.
|
|
57
|
+
* between vectors. l2 distance has a range of [0, ∞).
|
|
58
58
|
*
|
|
59
59
|
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
|
60
60
|
* calculated from the cosine similarity between two vectors. Cosine
|
|
61
61
|
* similarity is a measure of similarity between two non-zero vectors of an
|
|
62
62
|
* inner product space. It is defined to equal the cosine of the angle
|
|
63
|
-
* between them. Unlike
|
|
63
|
+
* between them. Unlike l2, the cosine distance is not affected by the
|
|
64
64
|
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
|
65
65
|
*
|
|
66
66
|
* Note: the cosine distance is undefined when one (or both) of the vectors
|
|
@@ -69,7 +69,7 @@ export interface IvfPqOptions {
|
|
|
69
69
|
*
|
|
70
70
|
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
|
71
71
|
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
|
72
|
-
*
|
|
72
|
+
* l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
|
73
73
|
*/
|
|
74
74
|
distanceType?: "l2" | "cosine" | "dot";
|
|
75
75
|
/**
|
|
@@ -114,18 +114,18 @@ export interface HnswPqOptions {
|
|
|
114
114
|
*
|
|
115
115
|
* "l2" - Euclidean distance. This is a very common distance metric that
|
|
116
116
|
* accounts for both magnitude and direction when determining the distance
|
|
117
|
-
* between vectors.
|
|
117
|
+
* between vectors. l2 distance has a range of [0, ∞).
|
|
118
118
|
*
|
|
119
119
|
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
|
120
120
|
* calculated from the cosine similarity between two vectors. Cosine
|
|
121
121
|
* similarity is a measure of similarity between two non-zero vectors of an
|
|
122
122
|
* inner product space. It is defined to equal the cosine of the angle
|
|
123
|
-
* between them. Unlike
|
|
123
|
+
* between them. Unlike l2, the cosine distance is not affected by the
|
|
124
124
|
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
|
125
125
|
*
|
|
126
126
|
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
|
127
127
|
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
|
128
|
-
*
|
|
128
|
+
* l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
|
129
129
|
*/
|
|
130
130
|
distanceType?: "l2" | "cosine" | "dot";
|
|
131
131
|
/**
|
|
@@ -223,18 +223,18 @@ export interface HnswSqOptions {
|
|
|
223
223
|
*
|
|
224
224
|
* "l2" - Euclidean distance. This is a very common distance metric that
|
|
225
225
|
* accounts for both magnitude and direction when determining the distance
|
|
226
|
-
* between vectors.
|
|
226
|
+
* between vectors. l2 distance has a range of [0, ∞).
|
|
227
227
|
*
|
|
228
228
|
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
|
229
229
|
* calculated from the cosine similarity between two vectors. Cosine
|
|
230
230
|
* similarity is a measure of similarity between two non-zero vectors of an
|
|
231
231
|
* inner product space. It is defined to equal the cosine of the angle
|
|
232
|
-
* between them. Unlike
|
|
232
|
+
* between them. Unlike l2, the cosine distance is not affected by the
|
|
233
233
|
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
|
234
234
|
*
|
|
235
235
|
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
|
236
236
|
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
|
237
|
-
*
|
|
237
|
+
* l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
|
238
238
|
*/
|
|
239
239
|
distanceType?: "l2" | "cosine" | "dot";
|
|
240
240
|
/**
|
|
@@ -303,6 +303,90 @@ export interface HnswSqOptions {
|
|
|
303
303
|
*/
|
|
304
304
|
efConstruction?: number;
|
|
305
305
|
}
|
|
306
|
+
/**
|
|
307
|
+
* Options to create an `IVF_FLAT` index
|
|
308
|
+
*/
|
|
309
|
+
export interface IvfFlatOptions {
|
|
310
|
+
/**
|
|
311
|
+
* The number of IVF partitions to create.
|
|
312
|
+
*
|
|
313
|
+
* This value should generally scale with the number of rows in the dataset.
|
|
314
|
+
* By default the number of partitions is the square root of the number of
|
|
315
|
+
* rows.
|
|
316
|
+
*
|
|
317
|
+
* If this value is too large then the first part of the search (picking the
|
|
318
|
+
* right partition) will be slow. If this value is too small then the second
|
|
319
|
+
* part of the search (searching within a partition) will be slow.
|
|
320
|
+
*/
|
|
321
|
+
numPartitions?: number;
|
|
322
|
+
/**
|
|
323
|
+
* Distance type to use to build the index.
|
|
324
|
+
*
|
|
325
|
+
* Default value is "l2".
|
|
326
|
+
*
|
|
327
|
+
* This is used when training the index to calculate the IVF partitions
|
|
328
|
+
* (vectors are grouped in partitions with similar vectors according to this
|
|
329
|
+
* distance type).
|
|
330
|
+
*
|
|
331
|
+
* The distance type used to train an index MUST match the distance type used
|
|
332
|
+
* to search the index. Failure to do so will yield inaccurate results.
|
|
333
|
+
*
|
|
334
|
+
* The following distance types are available:
|
|
335
|
+
*
|
|
336
|
+
* "l2" - Euclidean distance. This is a very common distance metric that
|
|
337
|
+
* accounts for both magnitude and direction when determining the distance
|
|
338
|
+
* between vectors. l2 distance has a range of [0, ∞).
|
|
339
|
+
*
|
|
340
|
+
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
|
341
|
+
* calculated from the cosine similarity between two vectors. Cosine
|
|
342
|
+
* similarity is a measure of similarity between two non-zero vectors of an
|
|
343
|
+
* inner product space. It is defined to equal the cosine of the angle
|
|
344
|
+
* between them. Unlike l2, the cosine distance is not affected by the
|
|
345
|
+
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
|
346
|
+
*
|
|
347
|
+
* Note: the cosine distance is undefined when one (or both) of the vectors
|
|
348
|
+
* are all zeros (there is no direction). These vectors are invalid and may
|
|
349
|
+
* never be returned from a vector search.
|
|
350
|
+
*
|
|
351
|
+
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
|
352
|
+
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
|
353
|
+
* l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
|
354
|
+
*
|
|
355
|
+
* "hamming" - Hamming distance. Hamming distance is a distance metric
|
|
356
|
+
* calculated from the number of bits that are different between two vectors.
|
|
357
|
+
* Hamming distance has a range of [0, dimension]. Note that the hamming distance
|
|
358
|
+
* is only valid for binary vectors.
|
|
359
|
+
*/
|
|
360
|
+
distanceType?: "l2" | "cosine" | "dot" | "hamming";
|
|
361
|
+
/**
|
|
362
|
+
* Max iteration to train IVF kmeans.
|
|
363
|
+
*
|
|
364
|
+
* When training an IVF FLAT index we use kmeans to calculate the partitions. This parameter
|
|
365
|
+
* controls how many iterations of kmeans to run.
|
|
366
|
+
*
|
|
367
|
+
* Increasing this might improve the quality of the index but in most cases these extra
|
|
368
|
+
* iterations have diminishing returns.
|
|
369
|
+
*
|
|
370
|
+
* The default value is 50.
|
|
371
|
+
*/
|
|
372
|
+
maxIterations?: number;
|
|
373
|
+
/**
|
|
374
|
+
* The number of vectors, per partition, to sample when training IVF kmeans.
|
|
375
|
+
*
|
|
376
|
+
* When an IVF FLAT index is trained, we need to calculate partitions. These are groups
|
|
377
|
+
* of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
|
378
|
+
*
|
|
379
|
+
* Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
|
380
|
+
* random sample of the data. This parameter controls the size of the sample. The total
|
|
381
|
+
* number of vectors used to train the index is `sample_rate * num_partitions`.
|
|
382
|
+
*
|
|
383
|
+
* Increasing this value might improve the quality of the index but in most cases the
|
|
384
|
+
* default should be sufficient.
|
|
385
|
+
*
|
|
386
|
+
* The default value is 256.
|
|
387
|
+
*/
|
|
388
|
+
sampleRate?: number;
|
|
389
|
+
}
|
|
306
390
|
/**
|
|
307
391
|
* Options to create a full text search index
|
|
308
392
|
*/
|
|
@@ -380,6 +464,23 @@ export declare class Index {
|
|
|
380
464
|
* currently is also a memory intensive operation.
|
|
381
465
|
*/
|
|
382
466
|
static ivfPq(options?: Partial<IvfPqOptions>): Index;
|
|
467
|
+
/**
|
|
468
|
+
* Create an IvfFlat index
|
|
469
|
+
*
|
|
470
|
+
* This index groups vectors into partitions of similar vectors. Each partition keeps track of
|
|
471
|
+
* a centroid which is the average value of all vectors in the group.
|
|
472
|
+
*
|
|
473
|
+
* During a query the centroids are compared with the query vector to find the closest
|
|
474
|
+
* partitions. The vectors in these partitions are then searched to find
|
|
475
|
+
* the closest vectors.
|
|
476
|
+
*
|
|
477
|
+
* The partitioning process is called IVF and the `num_partitions` parameter controls how
|
|
478
|
+
* many groups to create.
|
|
479
|
+
*
|
|
480
|
+
* Note that training an IVF FLAT index on a large dataset is a slow operation and
|
|
481
|
+
* currently is also a memory intensive operation.
|
|
482
|
+
*/
|
|
483
|
+
static ivfFlat(options?: Partial<IvfFlatOptions>): Index;
|
|
383
484
|
/**
|
|
384
485
|
* Create a btree index
|
|
385
486
|
*
|
package/dist/indices.js
CHANGED
|
@@ -34,6 +34,25 @@ class Index {
|
|
|
34
34
|
static ivfPq(options) {
|
|
35
35
|
return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate));
|
|
36
36
|
}
|
|
37
|
+
/**
|
|
38
|
+
* Create an IvfFlat index
|
|
39
|
+
*
|
|
40
|
+
* This index groups vectors into partitions of similar vectors. Each partition keeps track of
|
|
41
|
+
* a centroid which is the average value of all vectors in the group.
|
|
42
|
+
*
|
|
43
|
+
* During a query the centroids are compared with the query vector to find the closest
|
|
44
|
+
* partitions. The vectors in these partitions are then searched to find
|
|
45
|
+
* the closest vectors.
|
|
46
|
+
*
|
|
47
|
+
* The partitioning process is called IVF and the `num_partitions` parameter controls how
|
|
48
|
+
* many groups to create.
|
|
49
|
+
*
|
|
50
|
+
* Note that training an IVF FLAT index on a large dataset is a slow operation and
|
|
51
|
+
* currently is also a memory intensive operation.
|
|
52
|
+
*/
|
|
53
|
+
static ivfFlat(options) {
|
|
54
|
+
return new Index(native_1.Index.ivfFlat(options?.distanceType, options?.numPartitions, options?.maxIterations, options?.sampleRate));
|
|
55
|
+
}
|
|
37
56
|
/**
|
|
38
57
|
* Create a btree index
|
|
39
58
|
*
|
package/dist/native.d.ts
CHANGED
|
@@ -185,6 +185,11 @@ export interface IndexStatistics {
|
|
|
185
185
|
distanceType?: string
|
|
186
186
|
/** The number of parts this index is split into. */
|
|
187
187
|
numIndices?: number
|
|
188
|
+
/**
|
|
189
|
+
* The KMeans loss value of the index,
|
|
190
|
+
* it is only present for vector indices.
|
|
191
|
+
*/
|
|
192
|
+
loss?: number
|
|
188
193
|
}
|
|
189
194
|
export interface Version {
|
|
190
195
|
version: number
|
|
@@ -257,6 +262,7 @@ export class Connection {
|
|
|
257
262
|
}
|
|
258
263
|
export class Index {
|
|
259
264
|
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
|
265
|
+
static ivfFlat(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
|
260
266
|
static btree(): Index
|
|
261
267
|
static bitmap(): Index
|
|
262
268
|
static labelList(): Index
|
package/dist/table.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { Table as ArrowTable, Data, IntoVector, Schema } from "./arrow";
|
|
1
|
+
import { Table as ArrowTable, Data, DataType, IntoVector, Schema } from "./arrow";
|
|
2
2
|
import { IndexOptions } from "./indices";
|
|
3
3
|
import { MergeInsertBuilder } from "./merge";
|
|
4
|
-
import { AddColumnsSql,
|
|
4
|
+
import { AddColumnsSql, IndexConfig, IndexStatistics, OptimizeStats, Table as _NativeTable } from "./native";
|
|
5
5
|
import { Query, VectorQuery } from "./query";
|
|
6
6
|
import { IntoSql } from "./util";
|
|
7
7
|
export { IndexConfig } from "./native";
|
|
@@ -439,3 +439,37 @@ export declare class LocalTable extends Table {
|
|
|
439
439
|
*/
|
|
440
440
|
migrateManifestPathsV2(): Promise<void>;
|
|
441
441
|
}
|
|
442
|
+
/**
|
|
443
|
+
* A definition of a column alteration. The alteration changes the column at
|
|
444
|
+
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
|
445
|
+
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
|
446
|
+
* must be provided.
|
|
447
|
+
*/
|
|
448
|
+
export interface ColumnAlteration {
|
|
449
|
+
/**
|
|
450
|
+
* The path to the column to alter. This is a dot-separated path to the column.
|
|
451
|
+
* If it is a top-level column then it is just the name of the column. If it is
|
|
452
|
+
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
|
453
|
+
* `c` nested inside a column `b` nested inside a column `a`.
|
|
454
|
+
*/
|
|
455
|
+
path: string;
|
|
456
|
+
/**
|
|
457
|
+
* The new name of the column. If not provided then the name will not be changed.
|
|
458
|
+
* This must be distinct from the names of all other columns in the table.
|
|
459
|
+
*/
|
|
460
|
+
rename?: string;
|
|
461
|
+
/**
|
|
462
|
+
* A new data type for the column. If not provided then the data type will not be changed.
|
|
463
|
+
* Changing data types is limited to casting to the same general type. For example, these
|
|
464
|
+
* changes are valid:
|
|
465
|
+
* * `int32` -> `int64` (integers)
|
|
466
|
+
* * `double` -> `float` (floats)
|
|
467
|
+
* * `string` -> `large_string` (strings)
|
|
468
|
+
* But these changes are not:
|
|
469
|
+
* * `int32` -> `double` (mix integers and floats)
|
|
470
|
+
* * `string` -> `int32` (mix strings and integers)
|
|
471
|
+
*/
|
|
472
|
+
dataType?: string | DataType;
|
|
473
|
+
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
|
|
474
|
+
nullable?: boolean;
|
|
475
|
+
}
|
package/dist/table.js
CHANGED
|
@@ -7,6 +7,7 @@ const arrow_1 = require("./arrow");
|
|
|
7
7
|
const registry_1 = require("./embedding/registry");
|
|
8
8
|
const merge_1 = require("./merge");
|
|
9
9
|
const query_1 = require("./query");
|
|
10
|
+
const sanitize_1 = require("./sanitize");
|
|
10
11
|
const util_1 = require("./util");
|
|
11
12
|
/**
|
|
12
13
|
* A Table is a collection of Records in a LanceDB Database.
|
|
@@ -170,7 +171,28 @@ class LocalTable extends Table {
|
|
|
170
171
|
await this.inner.addColumns(newColumnTransforms);
|
|
171
172
|
}
|
|
172
173
|
async alterColumns(columnAlterations) {
|
|
173
|
-
|
|
174
|
+
const processedAlterations = columnAlterations.map((alteration) => {
|
|
175
|
+
if (typeof alteration.dataType === "string") {
|
|
176
|
+
return {
|
|
177
|
+
...alteration,
|
|
178
|
+
dataType: JSON.stringify({ type: alteration.dataType }),
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
else if (alteration.dataType === undefined) {
|
|
182
|
+
return {
|
|
183
|
+
...alteration,
|
|
184
|
+
dataType: undefined,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
else {
|
|
188
|
+
const dataType = (0, sanitize_1.sanitizeType)(alteration.dataType);
|
|
189
|
+
return {
|
|
190
|
+
...alteration,
|
|
191
|
+
dataType: JSON.stringify((0, arrow_1.dataTypeToJson)(dataType)),
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
await this.inner.alterColumns(processedAlterations);
|
|
174
196
|
}
|
|
175
197
|
async dropColumns(columnNames) {
|
|
176
198
|
await this.inner.dropColumns(columnNames);
|
package/dist/util.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export type IntoSql = string | number | boolean | null | Date | ArrayBufferLike | Buffer | IntoSql[];
|
|
2
2
|
export declare function toSQL(value: IntoSql): string;
|
|
3
|
+
export declare function packBits(data: Array<number>): Array<number>;
|
|
3
4
|
export declare class TTLCache {
|
|
4
5
|
private readonly ttl;
|
|
5
6
|
private readonly cache;
|
package/dist/util.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
5
|
exports.TTLCache = void 0;
|
|
6
6
|
exports.toSQL = toSQL;
|
|
7
|
+
exports.packBits = packBits;
|
|
7
8
|
function toSQL(value) {
|
|
8
9
|
if (typeof value === "string") {
|
|
9
10
|
return `'${value.replace(/'/g, "''")}'`;
|
|
@@ -33,6 +34,15 @@ function toSQL(value) {
|
|
|
33
34
|
throw new Error(`Unsupported value type: ${typeof value} value: (${value})`);
|
|
34
35
|
}
|
|
35
36
|
}
|
|
37
|
+
function packBits(data) {
|
|
38
|
+
const packed = Array(data.length >> 3).fill(0);
|
|
39
|
+
for (let i = 0; i < data.length; i++) {
|
|
40
|
+
const byte = i >> 3;
|
|
41
|
+
const bit = i & 7;
|
|
42
|
+
packed[byte] |= data[i] << bit;
|
|
43
|
+
}
|
|
44
|
+
return packed;
|
|
45
|
+
}
|
|
36
46
|
class TTLCache {
|
|
37
47
|
ttl;
|
|
38
48
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
package/package.json
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
"ann"
|
|
12
12
|
],
|
|
13
13
|
"private": false,
|
|
14
|
-
"version": "0.18.
|
|
14
|
+
"version": "0.18.2",
|
|
15
15
|
"main": "dist/index.js",
|
|
16
16
|
"exports": {
|
|
17
17
|
".": "./dist/index.js",
|
|
@@ -29,7 +29,6 @@
|
|
|
29
29
|
"aarch64-apple-darwin",
|
|
30
30
|
"x86_64-unknown-linux-gnu",
|
|
31
31
|
"aarch64-unknown-linux-gnu",
|
|
32
|
-
"x86_64-unknown-linux-musl",
|
|
33
32
|
"aarch64-unknown-linux-musl",
|
|
34
33
|
"x86_64-pc-windows-msvc",
|
|
35
34
|
"aarch64-pc-windows-msvc"
|
|
@@ -81,8 +80,10 @@
|
|
|
81
80
|
"artifacts": "napi artifacts",
|
|
82
81
|
"build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
|
|
83
82
|
"build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
|
|
84
|
-
"build": "npm run build:debug &&
|
|
85
|
-
"build-release": "npm run build:release &&
|
|
83
|
+
"build": "npm run build:debug && npm run tsc && shx cp lancedb/*.node dist/",
|
|
84
|
+
"build-release": "npm run build:release && npm run tsc",
|
|
85
|
+
"tsc": "tsc -b",
|
|
86
|
+
"posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",
|
|
86
87
|
"lint-ci": "biome ci .",
|
|
87
88
|
"docs": "typedoc --plugin typedoc-plugin-markdown --treatWarningsAsErrors --out ../docs/src/js lancedb/index.ts",
|
|
88
89
|
"postdocs": "node typedoc_post_process.js",
|
|
@@ -98,14 +99,13 @@
|
|
|
98
99
|
"reflect-metadata": "^0.2.2"
|
|
99
100
|
},
|
|
100
101
|
"optionalDependencies": {
|
|
101
|
-
"@lancedb/lancedb-darwin-x64": "0.18.
|
|
102
|
-
"@lancedb/lancedb-darwin-arm64": "0.18.
|
|
103
|
-
"@lancedb/lancedb-linux-x64-gnu": "0.18.
|
|
104
|
-
"@lancedb/lancedb-linux-arm64-gnu": "0.18.
|
|
105
|
-
"@lancedb/lancedb-linux-
|
|
106
|
-
"@lancedb/lancedb-
|
|
107
|
-
"@lancedb/lancedb-win32-
|
|
108
|
-
"@lancedb/lancedb-win32-arm64-msvc": "0.18.1"
|
|
102
|
+
"@lancedb/lancedb-darwin-x64": "0.18.2",
|
|
103
|
+
"@lancedb/lancedb-darwin-arm64": "0.18.2",
|
|
104
|
+
"@lancedb/lancedb-linux-x64-gnu": "0.18.2",
|
|
105
|
+
"@lancedb/lancedb-linux-arm64-gnu": "0.18.2",
|
|
106
|
+
"@lancedb/lancedb-linux-arm64-musl": "0.18.2",
|
|
107
|
+
"@lancedb/lancedb-win32-x64-msvc": "0.18.2",
|
|
108
|
+
"@lancedb/lancedb-win32-arm64-msvc": "0.18.2"
|
|
109
109
|
},
|
|
110
110
|
"peerDependencies": {
|
|
111
111
|
"apache-arrow": ">=15.0.0 <=18.1.0"
|
package/typedoc_post_process.js
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
const fs = require("fs");
|
|
2
|
-
const path = require("path");
|
|
3
|
-
|
|
4
|
-
// Read all files in the directory
|
|
5
|
-
function processDirectory(directoryPath) {
|
|
6
|
-
fs.readdir(directoryPath, { withFileTypes: true }, (err, files) => {
|
|
7
|
-
if (err) {
|
|
8
|
-
return console.error("Unable to scan directory: " + err);
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
files.forEach((file) => {
|
|
12
|
-
const filePath = path.join(directoryPath, file.name);
|
|
13
|
-
|
|
14
|
-
if (file.isDirectory()) {
|
|
15
|
-
// Recursively process subdirectory
|
|
16
|
-
processDirectory(filePath);
|
|
17
|
-
} else if (file.isFile()) {
|
|
18
|
-
// Read each file
|
|
19
|
-
fs.readFile(filePath, "utf8", (err, data) => {
|
|
20
|
-
if (err) {
|
|
21
|
-
return console.error("Unable to read file: " + err);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// Process the file content
|
|
25
|
-
const processedData = processContents(data);
|
|
26
|
-
|
|
27
|
-
// Write the processed content back to the file
|
|
28
|
-
fs.writeFile(filePath, processedData, "utf8", (err) => {
|
|
29
|
-
if (err) {
|
|
30
|
-
return console.error("Unable to write file: " + err);
|
|
31
|
-
}
|
|
32
|
-
console.log(`Processed file: ${filePath}`);
|
|
33
|
-
});
|
|
34
|
-
});
|
|
35
|
-
}
|
|
36
|
-
});
|
|
37
|
-
});
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
function processContents(contents) {
|
|
41
|
-
// This changes the parameters section to put the parameter description on
|
|
42
|
-
// the same line as the bullet with the parameter name and type.
|
|
43
|
-
return (
|
|
44
|
-
contents
|
|
45
|
-
.replace(/(## Parameters[\s\S]*?)(?=##|$)/g, (match) => {
|
|
46
|
-
let lines = match
|
|
47
|
-
.split("\n")
|
|
48
|
-
.map((line) => line.trim())
|
|
49
|
-
|
|
50
|
-
.filter((line) => line !== "")
|
|
51
|
-
.map((line) => {
|
|
52
|
-
if (line.startsWith("##")) {
|
|
53
|
-
return line;
|
|
54
|
-
} else if (line.startsWith("•")) {
|
|
55
|
-
return "\n*" + line.substring(1);
|
|
56
|
-
} else {
|
|
57
|
-
return " " + line;
|
|
58
|
-
}
|
|
59
|
-
});
|
|
60
|
-
return lines.join("\n") + "\n\n";
|
|
61
|
-
})
|
|
62
|
-
// Also trim trailing whitespace
|
|
63
|
-
.replace(/([^ \t])[ \t]+\n/g, "$1\n")
|
|
64
|
-
);
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// Start processing from the root directory
|
|
68
|
-
processDirectory("../docs/src/js");
|