@lancedb/lancedb 0.4.3 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +35 -3
  2. package/dist/arrow.d.ts +189 -0
  3. package/dist/arrow.js +539 -0
  4. package/dist/connection.d.ts +97 -0
  5. package/dist/connection.js +126 -0
  6. package/dist/embedding/embedding_function.d.ts +45 -0
  7. package/dist/embedding/embedding_function.js +27 -0
  8. package/dist/embedding/index.d.ts +2 -0
  9. package/dist/embedding/index.js +7 -0
  10. package/dist/embedding/openai.d.ts +8 -0
  11. package/dist/embedding/openai.js +53 -0
  12. package/dist/index.d.ts +22 -0
  13. package/dist/index.js +52 -0
  14. package/dist/indices.d.ts +165 -0
  15. package/dist/indices.js +71 -0
  16. package/dist/native.d.ts +147 -0
  17. package/dist/native.js +314 -0
  18. package/dist/query.d.ts +248 -0
  19. package/dist/query.js +346 -0
  20. package/dist/sanitize.d.ts +9 -0
  21. package/dist/sanitize.js +369 -0
  22. package/dist/table.d.ts +252 -0
  23. package/dist/table.js +298 -0
  24. package/nodejs-artifacts/arrow.d.ts +189 -0
  25. package/nodejs-artifacts/arrow.js +539 -0
  26. package/nodejs-artifacts/connection.d.ts +97 -0
  27. package/nodejs-artifacts/connection.js +126 -0
  28. package/nodejs-artifacts/embedding/embedding_function.d.ts +45 -0
  29. package/nodejs-artifacts/embedding/embedding_function.js +27 -0
  30. package/nodejs-artifacts/embedding/index.d.ts +2 -0
  31. package/nodejs-artifacts/embedding/index.js +7 -0
  32. package/nodejs-artifacts/embedding/openai.d.ts +8 -0
  33. package/nodejs-artifacts/embedding/openai.js +53 -0
  34. package/nodejs-artifacts/index.d.ts +22 -0
  35. package/nodejs-artifacts/index.js +52 -0
  36. package/nodejs-artifacts/indices.d.ts +165 -0
  37. package/nodejs-artifacts/indices.js +71 -0
  38. package/nodejs-artifacts/native.d.ts +147 -0
  39. package/nodejs-artifacts/native.js +314 -0
  40. package/nodejs-artifacts/query.d.ts +248 -0
  41. package/nodejs-artifacts/query.js +346 -0
  42. package/nodejs-artifacts/sanitize.d.ts +9 -0
  43. package/nodejs-artifacts/sanitize.js +369 -0
  44. package/nodejs-artifacts/table.d.ts +252 -0
  45. package/nodejs-artifacts/table.js +298 -0
  46. package/package.json +9 -11
  47. package/typedoc.json +10 -0
  48. package/examples/js/index.mjs +0 -40
  49. package/examples/js/package.json +0 -14
  50. package/examples/js-openai/index.mjs +0 -43
  51. package/examples/js-openai/package-lock.json +0 -256
  52. package/examples/js-openai/package.json +0 -15
  53. package/examples/js-transformers/index.mjs +0 -65
  54. package/examples/js-transformers/package-lock.json +0 -1418
  55. package/examples/js-transformers/package.json +0 -15
  56. package/examples/js-youtube-transcripts/index.mjs +0 -135
  57. package/examples/js-youtube-transcripts/package.json +0 -15
  58. package/examples/ts/data/sample-lancedb/vectors.lance/_latest.manifest +0 -0
  59. package/examples/ts/data/sample-lancedb/vectors.lance/_transactions/0-adde4e05-fcfc-415c-86a6-5b252cb9e79a.txn +0 -0
  60. package/examples/ts/data/sample-lancedb/vectors.lance/_versions/1.manifest +0 -0
  61. package/examples/ts/data/sample-lancedb/vectors.lance/data/3618b33e-3eea-4b5e-a0fc-7d1f718d551e.lance +0 -0
  62. package/examples/ts/package-lock.json +0 -1340
  63. package/examples/ts/package.json +0 -22
  64. package/examples/ts/tsconfig.json +0 -10
@@ -0,0 +1,126 @@
1
+ "use strict";
2
+ // Copyright 2024 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.Connection = void 0;
17
+ const arrow_1 = require("./arrow");
18
+ const table_1 = require("./table");
19
+ const apache_arrow_1 = require("apache-arrow");
20
+ /**
21
+ * A LanceDB Connection that allows you to open tables and create new ones.
22
+ *
23
+ * Connection could be local against filesystem or remote against a server.
24
+ *
25
+ * A Connection is intended to be a long lived object and may hold open
26
+ * resources such as HTTP connection pools. This is generally fine and
27
+ * a single connection should be shared if it is going to be used many
28
+ * times. However, if you are finished with a connection, you may call
29
+ * close to eagerly free these resources. Any call to a Connection
30
+ * method after it has been closed will result in an error.
31
+ *
32
+ * Closing a connection is optional. Connections will automatically
33
+ * be closed when they are garbage collected.
34
+ *
35
+ * Any created tables are independent and will continue to work even if
36
+ * the underlying connection has been closed.
37
+ */
38
+ class Connection {
39
+ inner;
40
+ constructor(inner) {
41
+ this.inner = inner;
42
+ }
43
+ /** Return true if the connection has not been closed */
44
+ isOpen() {
45
+ return this.inner.isOpen();
46
+ }
47
+ /**
48
+ * Close the connection, releasing any underlying resources.
49
+ *
50
+ * It is safe to call this method multiple times.
51
+ *
52
+ * Any attempt to use the connection after it is closed will result in an error.
53
+ */
54
+ close() {
55
+ this.inner.close();
56
+ }
57
+ /** Return a brief description of the connection */
58
+ display() {
59
+ return this.inner.display();
60
+ }
61
+ /**
62
+ * List all the table names in this database.
63
+ *
64
+ * Tables will be returned in lexicographical order.
65
+ * @param {Partial<TableNamesOptions>} options - options to control the
66
+ * paging / start point
67
+ */
68
+ async tableNames(options) {
69
+ return this.inner.tableNames(options?.startAfter, options?.limit);
70
+ }
71
+ /**
72
+ * Open a table in the database.
73
+ * @param {string} name - The name of the table
74
+ */
75
+ async openTable(name) {
76
+ const innerTable = await this.inner.openTable(name);
77
+ return new table_1.Table(innerTable);
78
+ }
79
+ /**
80
+ * Creates a new Table and initialize it with new data.
81
+ * @param {string} name - The name of the table.
82
+ * @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
83
+ * to be inserted into the table
84
+ */
85
+ async createTable(name, data, options) {
86
+ let mode = options?.mode ?? "create";
87
+ const existOk = options?.existOk ?? false;
88
+ if (mode === "create" && existOk) {
89
+ mode = "exist_ok";
90
+ }
91
+ let table;
92
+ if (data instanceof apache_arrow_1.Table) {
93
+ table = data;
94
+ }
95
+ else {
96
+ table = (0, arrow_1.makeArrowTable)(data);
97
+ }
98
+ const buf = await (0, arrow_1.fromTableToBuffer)(table);
99
+ const innerTable = await this.inner.createTable(name, buf, mode);
100
+ return new table_1.Table(innerTable);
101
+ }
102
+ /**
103
+ * Creates a new empty Table
104
+ * @param {string} name - The name of the table.
105
+ * @param {Schema} schema - The schema of the table
106
+ */
107
+ async createEmptyTable(name, schema, options) {
108
+ let mode = options?.mode ?? "create";
109
+ const existOk = options?.existOk ?? false;
110
+ if (mode === "create" && existOk) {
111
+ mode = "exist_ok";
112
+ }
113
+ const table = (0, arrow_1.makeEmptyTable)(schema);
114
+ const buf = await (0, arrow_1.fromTableToBuffer)(table);
115
+ const innerTable = await this.inner.createEmptyTable(name, buf, mode);
116
+ return new table_1.Table(innerTable);
117
+ }
118
+ /**
119
+ * Drop an existing table.
120
+ * @param {string} name The name of the table to drop.
121
+ */
122
+ async dropTable(name) {
123
+ return this.inner.dropTable(name);
124
+ }
125
+ }
126
+ exports.Connection = Connection;
@@ -0,0 +1,45 @@
1
+ import { type Float } from "apache-arrow";
2
+ /**
3
+ * An embedding function that automatically creates vector representation for a given column.
4
+ */
5
+ export interface EmbeddingFunction<T> {
6
+ /**
7
+ * The name of the column that will be used as input for the Embedding Function.
8
+ */
9
+ sourceColumn: string;
10
+ /**
11
+ * The data type of the embedding
12
+ *
13
+ * The embedding function should return `number`. This will be converted into
14
+ * an Arrow float array. By default this will be Float32 but this property can
15
+ * be used to control the conversion.
16
+ */
17
+ embeddingDataType?: Float;
18
+ /**
19
+ * The dimension of the embedding
20
+ *
21
+ * This is optional, normally this can be determined by looking at the results of
22
+ * `embed`. If this is not specified, and there is an attempt to apply the embedding
23
+ * to an empty table, then that process will fail.
24
+ */
25
+ embeddingDimension?: number;
26
+ /**
27
+ * The name of the column that will contain the embedding
28
+ *
29
+ * By default this is "vector"
30
+ */
31
+ destColumn?: string;
32
+ /**
33
+ * Should the source column be excluded from the resulting table
34
+ *
35
+ * By default the source column is included. Set this to true and
36
+ * only the embedding will be stored.
37
+ */
38
+ excludeSource?: boolean;
39
+ /**
40
+ * Creates a vector representation for the given values.
41
+ */
42
+ embed: (data: T[]) => Promise<number[][]>;
43
+ }
44
+ /** Test if the input seems to be an embedding function */
45
+ export declare function isEmbeddingFunction<T>(value: unknown): value is EmbeddingFunction<T>;
@@ -0,0 +1,27 @@
1
+ "use strict";
2
+ // Copyright 2023 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.isEmbeddingFunction = void 0;
17
+ /** Test if the input seems to be an embedding function */
18
+ function isEmbeddingFunction(value) {
19
+ if (typeof value !== "object" || value === null) {
20
+ return false;
21
+ }
22
+ if (!("sourceColumn" in value) || !("embed" in value)) {
23
+ return false;
24
+ }
25
+ return (typeof value.sourceColumn === "string" && typeof value.embed === "function");
26
+ }
27
+ exports.isEmbeddingFunction = isEmbeddingFunction;
@@ -0,0 +1,2 @@
1
+ export { EmbeddingFunction, isEmbeddingFunction } from "./embedding_function";
2
+ export { OpenAIEmbeddingFunction } from "./openai";
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.OpenAIEmbeddingFunction = exports.isEmbeddingFunction = void 0;
4
+ var embedding_function_1 = require("./embedding_function");
5
+ Object.defineProperty(exports, "isEmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.isEmbeddingFunction; } });
6
+ var openai_1 = require("./openai");
7
+ Object.defineProperty(exports, "OpenAIEmbeddingFunction", { enumerable: true, get: function () { return openai_1.OpenAIEmbeddingFunction; } });
@@ -0,0 +1,8 @@
1
+ import { type EmbeddingFunction } from "./embedding_function";
2
+ export declare class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
3
+ private readonly _openai;
4
+ private readonly _modelName;
5
+ constructor(sourceColumn: string, openAIKey: string, modelName?: string);
6
+ embed(data: string[]): Promise<number[][]>;
7
+ sourceColumn: string;
8
+ }
@@ -0,0 +1,53 @@
1
+ "use strict";
2
+ // Copyright 2023 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.OpenAIEmbeddingFunction = void 0;
17
+ class OpenAIEmbeddingFunction {
18
+ _openai;
19
+ _modelName;
20
+ constructor(sourceColumn, openAIKey, modelName = "text-embedding-ada-002") {
21
+ /**
22
+ * @type {import("openai").default}
23
+ */
24
+ // eslint-disable-next-line @typescript-eslint/naming-convention
25
+ let Openai;
26
+ try {
27
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
28
+ Openai = require("openai");
29
+ }
30
+ catch {
31
+ throw new Error("please install openai@^4.24.1 using npm install openai");
32
+ }
33
+ this.sourceColumn = sourceColumn;
34
+ const configuration = {
35
+ apiKey: openAIKey,
36
+ };
37
+ this._openai = new Openai(configuration);
38
+ this._modelName = modelName;
39
+ }
40
+ async embed(data) {
41
+ const response = await this._openai.embeddings.create({
42
+ model: this._modelName,
43
+ input: data,
44
+ });
45
+ const embeddings = [];
46
+ for (let i = 0; i < response.data.length; i++) {
47
+ embeddings.push(response.data[i].embedding);
48
+ }
49
+ return embeddings;
50
+ }
51
+ sourceColumn;
52
+ }
53
+ exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction;
@@ -0,0 +1,22 @@
1
+ import { Connection } from "./connection";
2
+ import { ConnectionOptions } from "./native.js";
3
+ export { WriteOptions, WriteMode, AddColumnsSql, ColumnAlteration, ConnectionOptions, } from "./native.js";
4
+ export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } from "./arrow";
5
+ export { Connection, CreateTableOptions, TableNamesOptions, } from "./connection";
6
+ export { ExecutableQuery, Query, QueryBase, VectorQuery, RecordBatchIterator, } from "./query";
7
+ export { Index, IndexOptions, IvfPqOptions } from "./indices";
8
+ export { Table, AddDataOptions, IndexConfig, UpdateOptions } from "./table";
9
+ export * as embedding from "./embedding";
10
+ /**
11
+ * Connect to a LanceDB instance at the given URI.
12
+ *
13
+ * Accpeted formats:
14
+ *
15
+ * - `/path/to/database` - local database
16
+ * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
17
+ * - `db://host:port` - remote database (LanceDB cloud)
18
+ * @param {string} uri - The uri of the database. If the database uri starts
19
+ * with `db://` then it connects to a remote database.
20
+ * @see {@link ConnectionOptions} for more details on the URI format.
21
+ */
22
+ export declare function connect(uri: string, opts?: Partial<ConnectionOptions>): Promise<Connection>;
package/dist/index.js ADDED
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+ // Copyright 2024 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.connect = exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
17
+ const connection_1 = require("./connection");
18
+ const native_js_1 = require("./native.js");
19
+ var arrow_1 = require("./arrow");
20
+ Object.defineProperty(exports, "makeArrowTable", { enumerable: true, get: function () { return arrow_1.makeArrowTable; } });
21
+ Object.defineProperty(exports, "MakeArrowTableOptions", { enumerable: true, get: function () { return arrow_1.MakeArrowTableOptions; } });
22
+ Object.defineProperty(exports, "VectorColumnOptions", { enumerable: true, get: function () { return arrow_1.VectorColumnOptions; } });
23
+ var connection_2 = require("./connection");
24
+ Object.defineProperty(exports, "Connection", { enumerable: true, get: function () { return connection_2.Connection; } });
25
+ var query_1 = require("./query");
26
+ Object.defineProperty(exports, "Query", { enumerable: true, get: function () { return query_1.Query; } });
27
+ Object.defineProperty(exports, "QueryBase", { enumerable: true, get: function () { return query_1.QueryBase; } });
28
+ Object.defineProperty(exports, "VectorQuery", { enumerable: true, get: function () { return query_1.VectorQuery; } });
29
+ Object.defineProperty(exports, "RecordBatchIterator", { enumerable: true, get: function () { return query_1.RecordBatchIterator; } });
30
+ var indices_1 = require("./indices");
31
+ Object.defineProperty(exports, "Index", { enumerable: true, get: function () { return indices_1.Index; } });
32
+ var table_1 = require("./table");
33
+ Object.defineProperty(exports, "Table", { enumerable: true, get: function () { return table_1.Table; } });
34
+ exports.embedding = require("./embedding");
35
+ /**
36
+ * Connect to a LanceDB instance at the given URI.
37
+ *
38
+ * Accpeted formats:
39
+ *
40
+ * - `/path/to/database` - local database
41
+ * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
42
+ * - `db://host:port` - remote database (LanceDB cloud)
43
+ * @param {string} uri - The uri of the database. If the database uri starts
44
+ * with `db://` then it connects to a remote database.
45
+ * @see {@link ConnectionOptions} for more details on the URI format.
46
+ */
47
+ async function connect(uri, opts) {
48
+ opts = opts ?? {};
49
+ const nativeConn = await native_js_1.Connection.new(uri, opts);
50
+ return new connection_1.Connection(nativeConn);
51
+ }
52
+ exports.connect = connect;
@@ -0,0 +1,165 @@
1
+ /**
2
+ * Options to create an `IVF_PQ` index
3
+ */
4
+ export interface IvfPqOptions {
5
+ /**
6
+ * The number of IVF partitions to create.
7
+ *
8
+ * This value should generally scale with the number of rows in the dataset.
9
+ * By default the number of partitions is the square root of the number of
10
+ * rows.
11
+ *
12
+ * If this value is too large then the first part of the search (picking the
13
+ * right partition) will be slow. If this value is too small then the second
14
+ * part of the search (searching within a partition) will be slow.
15
+ */
16
+ numPartitions?: number;
17
+ /**
18
+ * Number of sub-vectors of PQ.
19
+ *
20
+ * This value controls how much the vector is compressed during the quantization step.
21
+ * The more sub vectors there are the less the vector is compressed. The default is
22
+ * the dimension of the vector divided by 16. If the dimension is not evenly divisible
23
+ * by 16 we use the dimension divded by 8.
24
+ *
25
+ * The above two cases are highly preferred. Having 8 or 16 values per subvector allows
26
+ * us to use efficient SIMD instructions.
27
+ *
28
+ * If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and
29
+ * will likely result in poor performance.
30
+ */
31
+ numSubVectors?: number;
32
+ /**
33
+ * Distance type to use to build the index.
34
+ *
35
+ * Default value is "l2".
36
+ *
37
+ * This is used when training the index to calculate the IVF partitions
38
+ * (vectors are grouped in partitions with similar vectors according to this
39
+ * distance type) and to calculate a subvector's code during quantization.
40
+ *
41
+ * The distance type used to train an index MUST match the distance type used
42
+ * to search the index. Failure to do so will yield inaccurate results.
43
+ *
44
+ * The following distance types are available:
45
+ *
46
+ * "l2" - Euclidean distance. This is a very common distance metric that
47
+ * accounts for both magnitude and direction when determining the distance
48
+ * between vectors. L2 distance has a range of [0, ∞).
49
+ *
50
+ * "cosine" - Cosine distance. Cosine distance is a distance metric
51
+ * calculated from the cosine similarity between two vectors. Cosine
52
+ * similarity is a measure of similarity between two non-zero vectors of an
53
+ * inner product space. It is defined to equal the cosine of the angle
54
+ * between them. Unlike L2, the cosine distance is not affected by the
55
+ * magnitude of the vectors. Cosine distance has a range of [0, 2].
56
+ *
57
+ * Note: the cosine distance is undefined when one (or both) of the vectors
58
+ * are all zeros (there is no direction). These vectors are invalid and may
59
+ * never be returned from a vector search.
60
+ *
61
+ * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
62
+ * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
63
+ * L2 norm is 1), then dot distance is equivalent to the cosine distance.
64
+ */
65
+ distanceType?: "l2" | "cosine" | "dot";
66
+ /**
67
+ * Max iteration to train IVF kmeans.
68
+ *
69
+ * When training an IVF PQ index we use kmeans to calculate the partitions. This parameter
70
+ * controls how many iterations of kmeans to run.
71
+ *
72
+ * Increasing this might improve the quality of the index but in most cases these extra
73
+ * iterations have diminishing returns.
74
+ *
75
+ * The default value is 50.
76
+ */
77
+ maxIterations?: number;
78
+ /**
79
+ * The number of vectors, per partition, to sample when training IVF kmeans.
80
+ *
81
+ * When an IVF PQ index is trained, we need to calculate partitions. These are groups
82
+ * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
83
+ *
84
+ * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
85
+ * random sample of the data. This parameter controls the size of the sample. The total
86
+ * number of vectors used to train the index is `sample_rate * num_partitions`.
87
+ *
88
+ * Increasing this value might improve the quality of the index but in most cases the
89
+ * default should be sufficient.
90
+ *
91
+ * The default value is 256.
92
+ */
93
+ sampleRate?: number;
94
+ }
95
+ export declare class Index {
96
+ private readonly inner;
97
+ private constructor();
98
+ /**
99
+ * Create an IvfPq index
100
+ *
101
+ * This index stores a compressed (quantized) copy of every vector. These vectors
102
+ * are grouped into partitions of similar vectors. Each partition keeps track of
103
+ * a centroid which is the average value of all vectors in the group.
104
+ *
105
+ * During a query the centroids are compared with the query vector to find the closest
106
+ * partitions. The compressed vectors in these partitions are then searched to find
107
+ * the closest vectors.
108
+ *
109
+ * The compression scheme is called product quantization. Each vector is divided into
110
+ * subvectors and then each subvector is quantized into a small number of bits. the
111
+ * parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
112
+ * between index size (and thus search speed) and index accuracy.
113
+ *
114
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
115
+ * many groups to create.
116
+ *
117
+ * Note that training an IVF PQ index on a large dataset is a slow operation and
118
+ * currently is also a memory intensive operation.
119
+ */
120
+ static ivfPq(options?: Partial<IvfPqOptions>): Index;
121
+ /**
122
+ * Create a btree index
123
+ *
124
+ * A btree index is an index on a scalar columns. The index stores a copy of the column
125
+ * in sorted order. A header entry is created for each block of rows (currently the
126
+ * block size is fixed at 4096). These header entries are stored in a separate
127
+ * cacheable structure (a btree). To search for data the header is used to determine
128
+ * which blocks need to be read from disk.
129
+ *
130
+ * For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
131
+ * bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
132
+ * the correct row ids.
133
+ *
134
+ * This index is good for scalar columns with mostly distinct values and does best when
135
+ * the query is highly selective.
136
+ *
137
+ * The btree index does not currently have any parameters though parameters such as the
138
+ * block size may be added in the future.
139
+ */
140
+ static btree(): Index;
141
+ }
142
+ export interface IndexOptions {
143
+ /**
144
+ * Advanced index configuration
145
+ *
146
+ * This option allows you to specify a specfic index to create and also
147
+ * allows you to pass in configuration for training the index.
148
+ *
149
+ * See the static methods on Index for details on the various index types.
150
+ *
151
+ * If this is not supplied then column data type(s) and column statistics
152
+ * will be used to determine the most useful kind of index to create.
153
+ */
154
+ config?: Index;
155
+ /**
156
+ * Whether to replace the existing index
157
+ *
158
+ * If this is false, and another index already exists on the same columns
159
+ * and the same name, then an error will be returned. This is true even if
160
+ * that index is out of date.
161
+ *
162
+ * The default is true
163
+ */
164
+ replace?: boolean;
165
+ }
@@ -0,0 +1,71 @@
1
+ "use strict";
2
+ // Copyright 2024 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.Index = void 0;
17
+ const native_1 = require("./native");
18
+ class Index {
19
+ inner;
20
+ constructor(inner) {
21
+ this.inner = inner;
22
+ }
23
+ /**
24
+ * Create an IvfPq index
25
+ *
26
+ * This index stores a compressed (quantized) copy of every vector. These vectors
27
+ * are grouped into partitions of similar vectors. Each partition keeps track of
28
+ * a centroid which is the average value of all vectors in the group.
29
+ *
30
+ * During a query the centroids are compared with the query vector to find the closest
31
+ * partitions. The compressed vectors in these partitions are then searched to find
32
+ * the closest vectors.
33
+ *
34
+ * The compression scheme is called product quantization. Each vector is divided into
35
+ * subvectors and then each subvector is quantized into a small number of bits. the
36
+ * parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
37
+ * between index size (and thus search speed) and index accuracy.
38
+ *
39
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
40
+ * many groups to create.
41
+ *
42
+ * Note that training an IVF PQ index on a large dataset is a slow operation and
43
+ * currently is also a memory intensive operation.
44
+ */
45
+ static ivfPq(options) {
46
+ return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate));
47
+ }
48
+ /**
49
+ * Create a btree index
50
+ *
51
+ * A btree index is an index on a scalar columns. The index stores a copy of the column
52
+ * in sorted order. A header entry is created for each block of rows (currently the
53
+ * block size is fixed at 4096). These header entries are stored in a separate
54
+ * cacheable structure (a btree). To search for data the header is used to determine
55
+ * which blocks need to be read from disk.
56
+ *
57
+ * For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
58
+ * bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
59
+ * the correct row ids.
60
+ *
61
+ * This index is good for scalar columns with mostly distinct values and does best when
62
+ * the query is highly selective.
63
+ *
64
+ * The btree index does not currently have any parameters though parameters such as the
65
+ * block size may be added in the future.
66
+ */
67
+ static btree() {
68
+ return new Index(native_1.Index.btree());
69
+ }
70
+ }
71
+ exports.Index = Index;