@lancedb/lancedb 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arrow.d.ts +5 -3
- package/dist/arrow.js +1 -1
- package/dist/embedding/embedding_function.d.ts +4 -3
- package/dist/embedding/index.d.ts +1 -0
- package/dist/embedding/index.js +1 -0
- package/dist/embedding/registry.d.ts +9 -7
- package/dist/embedding/registry.js +24 -6
- package/dist/embedding/transformers.d.ts +37 -0
- package/dist/embedding/transformers.js +147 -0
- package/dist/query.js +15 -9
- package/dist/remote/client.d.ts +1 -1
- package/dist/remote/client.js +6 -8
- package/dist/remote/connection.d.ts +2 -3
- package/dist/remote/connection.js +2 -2
- package/dist/table.d.ts +3 -0
- package/dist/table.js +1 -1
- package/package.json +17 -14
- package/Cargo.toml +0 -28
- package/biome.json +0 -158
- package/build.rs +0 -5
- package/dist/native.d.ts +0 -208
- package/examples/ann_indexes.ts +0 -49
- package/examples/basic.ts +0 -149
- package/examples/embedding.ts +0 -83
- package/examples/filtering.ts +0 -34
- package/examples/jsconfig.json +0 -27
- package/examples/package-lock.json +0 -79
- package/examples/package.json +0 -18
- package/examples/search.ts +0 -37
- package/jest.config.js +0 -7
- package/lancedb/arrow.ts +0 -947
- package/lancedb/connection.ts +0 -333
- package/lancedb/embedding/embedding_function.ts +0 -194
- package/lancedb/embedding/index.ts +0 -113
- package/lancedb/embedding/openai.ts +0 -113
- package/lancedb/embedding/registry.ts +0 -188
- package/lancedb/index.ts +0 -142
- package/lancedb/indices.ts +0 -203
- package/lancedb/merge.ts +0 -70
- package/lancedb/query.ts +0 -507
- package/lancedb/remote/client.ts +0 -221
- package/lancedb/remote/connection.ts +0 -201
- package/lancedb/remote/index.ts +0 -3
- package/lancedb/remote/table.ts +0 -226
- package/lancedb/sanitize.ts +0 -588
- package/lancedb/table.ts +0 -669
- package/lancedb/util.ts +0 -69
- package/native.d.ts +0 -208
- package/nodejs-artifacts/arrow.d.ts +0 -250
- package/nodejs-artifacts/arrow.js +0 -768
- package/nodejs-artifacts/connection.d.ts +0 -171
- package/nodejs-artifacts/connection.js +0 -135
- package/nodejs-artifacts/embedding/embedding_function.d.ts +0 -79
- package/nodejs-artifacts/embedding/embedding_function.js +0 -112
- package/nodejs-artifacts/embedding/index.d.ts +0 -28
- package/nodejs-artifacts/embedding/index.js +0 -114
- package/nodejs-artifacts/embedding/openai.d.ts +0 -18
- package/nodejs-artifacts/embedding/openai.js +0 -105
- package/nodejs-artifacts/embedding/registry.d.ts +0 -53
- package/nodejs-artifacts/embedding/registry.js +0 -127
- package/nodejs-artifacts/index.d.ts +0 -55
- package/nodejs-artifacts/index.js +0 -57
- package/nodejs-artifacts/indices.d.ts +0 -165
- package/nodejs-artifacts/indices.js +0 -71
- package/nodejs-artifacts/merge.d.ts +0 -54
- package/nodejs-artifacts/merge.js +0 -64
- package/nodejs-artifacts/native.d.ts +0 -208
- package/nodejs-artifacts/native.js +0 -330
- package/nodejs-artifacts/query.d.ts +0 -283
- package/nodejs-artifacts/query.js +0 -448
- package/nodejs-artifacts/remote/client.d.ts +0 -28
- package/nodejs-artifacts/remote/client.js +0 -172
- package/nodejs-artifacts/remote/connection.d.ts +0 -25
- package/nodejs-artifacts/remote/connection.js +0 -110
- package/nodejs-artifacts/remote/index.d.ts +0 -3
- package/nodejs-artifacts/remote/index.js +0 -9
- package/nodejs-artifacts/remote/table.d.ts +0 -42
- package/nodejs-artifacts/remote/table.js +0 -179
- package/nodejs-artifacts/sanitize.d.ts +0 -31
- package/nodejs-artifacts/sanitize.js +0 -436
- package/nodejs-artifacts/table.d.ts +0 -395
- package/nodejs-artifacts/table.js +0 -230
- package/nodejs-artifacts/util.d.ts +0 -14
- package/nodejs-artifacts/util.js +0 -65
- package/tsconfig.json +0 -25
- package/typedoc.json +0 -10
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// Copyright 2023 Lance Developers.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
// you may not use this file except in compliance with the License.
|
|
6
|
-
// You may obtain a copy of the License at
|
|
7
|
-
//
|
|
8
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
//
|
|
10
|
-
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
// See the License for the specific language governing permissions and
|
|
14
|
-
// limitations under the License.
|
|
15
|
-
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
16
|
-
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
17
|
-
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
18
|
-
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
19
|
-
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
20
|
-
};
|
|
21
|
-
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
22
|
-
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
23
|
-
};
|
|
24
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
|
-
exports.OpenAIEmbeddingFunction = void 0;
|
|
26
|
-
const arrow_1 = require("../arrow");
|
|
27
|
-
const embedding_function_1 = require("./embedding_function");
|
|
28
|
-
const registry_1 = require("./registry");
|
|
29
|
-
let OpenAIEmbeddingFunction = class OpenAIEmbeddingFunction extends embedding_function_1.EmbeddingFunction {
|
|
30
|
-
#openai;
|
|
31
|
-
#modelName;
|
|
32
|
-
constructor(options = {
|
|
33
|
-
model: "text-embedding-ada-002",
|
|
34
|
-
}) {
|
|
35
|
-
super();
|
|
36
|
-
const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
|
|
37
|
-
if (!openAIKey) {
|
|
38
|
-
throw new Error("OpenAI API key is required");
|
|
39
|
-
}
|
|
40
|
-
const modelName = options?.model ?? "text-embedding-ada-002";
|
|
41
|
-
/**
|
|
42
|
-
* @type {import("openai").default}
|
|
43
|
-
*/
|
|
44
|
-
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
45
|
-
let Openai;
|
|
46
|
-
try {
|
|
47
|
-
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
48
|
-
Openai = require("openai");
|
|
49
|
-
}
|
|
50
|
-
catch {
|
|
51
|
-
throw new Error("please install openai@^4.24.1 using npm install openai");
|
|
52
|
-
}
|
|
53
|
-
const configuration = {
|
|
54
|
-
apiKey: openAIKey,
|
|
55
|
-
};
|
|
56
|
-
this.#openai = new Openai(configuration);
|
|
57
|
-
this.#modelName = modelName;
|
|
58
|
-
}
|
|
59
|
-
toJSON() {
|
|
60
|
-
return {
|
|
61
|
-
model: this.#modelName,
|
|
62
|
-
};
|
|
63
|
-
}
|
|
64
|
-
ndims() {
|
|
65
|
-
switch (this.#modelName) {
|
|
66
|
-
case "text-embedding-ada-002":
|
|
67
|
-
return 1536;
|
|
68
|
-
case "text-embedding-3-large":
|
|
69
|
-
return 3072;
|
|
70
|
-
case "text-embedding-3-small":
|
|
71
|
-
return 1536;
|
|
72
|
-
default:
|
|
73
|
-
throw new Error(`Unknown model: ${this.#modelName}`);
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
embeddingDataType() {
|
|
77
|
-
return new arrow_1.Float32();
|
|
78
|
-
}
|
|
79
|
-
async computeSourceEmbeddings(data) {
|
|
80
|
-
const response = await this.#openai.embeddings.create({
|
|
81
|
-
model: this.#modelName,
|
|
82
|
-
input: data,
|
|
83
|
-
});
|
|
84
|
-
const embeddings = [];
|
|
85
|
-
for (let i = 0; i < response.data.length; i++) {
|
|
86
|
-
embeddings.push(response.data[i].embedding);
|
|
87
|
-
}
|
|
88
|
-
return embeddings;
|
|
89
|
-
}
|
|
90
|
-
async computeQueryEmbeddings(data) {
|
|
91
|
-
if (typeof data !== "string") {
|
|
92
|
-
throw new Error("Data must be a string");
|
|
93
|
-
}
|
|
94
|
-
const response = await this.#openai.embeddings.create({
|
|
95
|
-
model: this.#modelName,
|
|
96
|
-
input: data,
|
|
97
|
-
});
|
|
98
|
-
return response.data[0].embedding;
|
|
99
|
-
}
|
|
100
|
-
};
|
|
101
|
-
exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction;
|
|
102
|
-
exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction = __decorate([
|
|
103
|
-
(0, registry_1.register)("openai"),
|
|
104
|
-
__metadata("design:paramtypes", [Object])
|
|
105
|
-
], OpenAIEmbeddingFunction);
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import { type EmbeddingFunction, type EmbeddingFunctionConstructor } from "./embedding_function";
|
|
2
|
-
import "reflect-metadata";
|
|
3
|
-
import { OpenAIEmbeddingFunction } from "./openai";
|
|
4
|
-
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
|
5
|
-
create(options?: T["TOptions"]): T;
|
|
6
|
-
}
|
|
7
|
-
/**
|
|
8
|
-
* This is a singleton class used to register embedding functions
|
|
9
|
-
* and fetch them by name. It also handles serializing and deserializing.
|
|
10
|
-
* You can implement your own embedding function by subclassing EmbeddingFunction
|
|
11
|
-
* or TextEmbeddingFunction and registering it with the registry
|
|
12
|
-
*/
|
|
13
|
-
export declare class EmbeddingFunctionRegistry {
|
|
14
|
-
#private;
|
|
15
|
-
/**
|
|
16
|
-
* Register an embedding function
|
|
17
|
-
* @param name The name of the function
|
|
18
|
-
* @param func The function to register
|
|
19
|
-
* @throws Error if the function is already registered
|
|
20
|
-
*/
|
|
21
|
-
register<T extends EmbeddingFunctionConstructor = EmbeddingFunctionConstructor>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
|
|
22
|
-
/**
|
|
23
|
-
* Fetch an embedding function by name
|
|
24
|
-
* @param name The name of the function
|
|
25
|
-
*/
|
|
26
|
-
get<T extends EmbeddingFunction<unknown>, Name extends string = "">(name: Name extends "openai" ? "openai" : string): Name extends "openai" ? EmbeddingFunctionCreate<OpenAIEmbeddingFunction> : EmbeddingFunctionCreate<T> | undefined;
|
|
27
|
-
/**
|
|
28
|
-
* reset the registry to the initial state
|
|
29
|
-
*/
|
|
30
|
-
reset(this: EmbeddingFunctionRegistry): void;
|
|
31
|
-
/**
|
|
32
|
-
* @ignore
|
|
33
|
-
*/
|
|
34
|
-
parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Map<string, EmbeddingFunctionConfig>;
|
|
35
|
-
functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any>;
|
|
36
|
-
getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string>;
|
|
37
|
-
}
|
|
38
|
-
export declare function register(name?: string): (ctor: EmbeddingFunctionConstructor<EmbeddingFunction<any, import("./embedding_function").FunctionOptions>>) => any;
|
|
39
|
-
/**
|
|
40
|
-
* Utility function to get the global instance of the registry
|
|
41
|
-
* @returns `EmbeddingFunctionRegistry` The global instance of the registry
|
|
42
|
-
* @example
|
|
43
|
-
* ```ts
|
|
44
|
-
* const registry = getRegistry();
|
|
45
|
-
* const openai = registry.get("openai").create();
|
|
46
|
-
*/
|
|
47
|
-
export declare function getRegistry(): EmbeddingFunctionRegistry;
|
|
48
|
-
export interface EmbeddingFunctionConfig {
|
|
49
|
-
sourceColumn: string;
|
|
50
|
-
vectorColumn?: string;
|
|
51
|
-
function: EmbeddingFunction;
|
|
52
|
-
}
|
|
53
|
-
export {};
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// Copyright 2024 Lance Developers.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
// you may not use this file except in compliance with the License.
|
|
6
|
-
// You may obtain a copy of the License at
|
|
7
|
-
//
|
|
8
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
//
|
|
10
|
-
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
// See the License for the specific language governing permissions and
|
|
14
|
-
// limitations under the License.
|
|
15
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.getRegistry = exports.register = exports.EmbeddingFunctionRegistry = void 0;
|
|
17
|
-
require("reflect-metadata");
|
|
18
|
-
/**
|
|
19
|
-
* This is a singleton class used to register embedding functions
|
|
20
|
-
* and fetch them by name. It also handles serializing and deserializing.
|
|
21
|
-
* You can implement your own embedding function by subclassing EmbeddingFunction
|
|
22
|
-
* or TextEmbeddingFunction and registering it with the registry
|
|
23
|
-
*/
|
|
24
|
-
class EmbeddingFunctionRegistry {
|
|
25
|
-
#functions = new Map();
|
|
26
|
-
/**
|
|
27
|
-
* Register an embedding function
|
|
28
|
-
* @param name The name of the function
|
|
29
|
-
* @param func The function to register
|
|
30
|
-
* @throws Error if the function is already registered
|
|
31
|
-
*/
|
|
32
|
-
register(alias) {
|
|
33
|
-
const self = this;
|
|
34
|
-
return function (ctor) {
|
|
35
|
-
if (!alias) {
|
|
36
|
-
alias = ctor.name;
|
|
37
|
-
}
|
|
38
|
-
if (self.#functions.has(alias)) {
|
|
39
|
-
throw new Error(`Embedding function with alias "${alias}" already exists`);
|
|
40
|
-
}
|
|
41
|
-
self.#functions.set(alias, ctor);
|
|
42
|
-
Reflect.defineMetadata("lancedb::embedding::name", alias, ctor);
|
|
43
|
-
return ctor;
|
|
44
|
-
};
|
|
45
|
-
}
|
|
46
|
-
/**
|
|
47
|
-
* Fetch an embedding function by name
|
|
48
|
-
* @param name The name of the function
|
|
49
|
-
*/
|
|
50
|
-
get(name) {
|
|
51
|
-
const factory = this.#functions.get(name);
|
|
52
|
-
if (!factory) {
|
|
53
|
-
return undefined;
|
|
54
|
-
}
|
|
55
|
-
return {
|
|
56
|
-
create: function (options) {
|
|
57
|
-
return new factory(options);
|
|
58
|
-
},
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* reset the registry to the initial state
|
|
63
|
-
*/
|
|
64
|
-
reset() {
|
|
65
|
-
this.#functions.clear();
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* @ignore
|
|
69
|
-
*/
|
|
70
|
-
parseFunctions(metadata) {
|
|
71
|
-
if (!metadata.has("embedding_functions")) {
|
|
72
|
-
return new Map();
|
|
73
|
-
}
|
|
74
|
-
else {
|
|
75
|
-
const functions = (JSON.parse(metadata.get("embedding_functions")));
|
|
76
|
-
return new Map(functions.map((f) => {
|
|
77
|
-
const fn = this.get(f.name);
|
|
78
|
-
if (!fn) {
|
|
79
|
-
throw new Error(`Function "${f.name}" not found in registry`);
|
|
80
|
-
}
|
|
81
|
-
return [
|
|
82
|
-
f.name,
|
|
83
|
-
{
|
|
84
|
-
sourceColumn: f.sourceColumn,
|
|
85
|
-
vectorColumn: f.vectorColumn,
|
|
86
|
-
function: this.get(f.name).create(f.model),
|
|
87
|
-
},
|
|
88
|
-
];
|
|
89
|
-
}));
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
93
|
-
functionToMetadata(conf) {
|
|
94
|
-
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
95
|
-
const metadata = {};
|
|
96
|
-
const name = Reflect.getMetadata("lancedb::embedding::name", conf.function.constructor);
|
|
97
|
-
metadata["sourceColumn"] = conf.sourceColumn;
|
|
98
|
-
metadata["vectorColumn"] = conf.vectorColumn ?? "vector";
|
|
99
|
-
metadata["name"] = name ?? conf.function.constructor.name;
|
|
100
|
-
metadata["model"] = conf.function.toJSON();
|
|
101
|
-
return metadata;
|
|
102
|
-
}
|
|
103
|
-
getTableMetadata(functions) {
|
|
104
|
-
const metadata = new Map();
|
|
105
|
-
const jsonData = functions.map((conf) => this.functionToMetadata(conf));
|
|
106
|
-
metadata.set("embedding_functions", JSON.stringify(jsonData));
|
|
107
|
-
return metadata;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
exports.EmbeddingFunctionRegistry = EmbeddingFunctionRegistry;
|
|
111
|
-
const _REGISTRY = new EmbeddingFunctionRegistry();
|
|
112
|
-
function register(name) {
|
|
113
|
-
return _REGISTRY.register(name);
|
|
114
|
-
}
|
|
115
|
-
exports.register = register;
|
|
116
|
-
/**
|
|
117
|
-
* Utility function to get the global instance of the registry
|
|
118
|
-
* @returns `EmbeddingFunctionRegistry` The global instance of the registry
|
|
119
|
-
* @example
|
|
120
|
-
* ```ts
|
|
121
|
-
* const registry = getRegistry();
|
|
122
|
-
* const openai = registry.get("openai").create();
|
|
123
|
-
*/
|
|
124
|
-
function getRegistry() {
|
|
125
|
-
return _REGISTRY;
|
|
126
|
-
}
|
|
127
|
-
exports.getRegistry = getRegistry;
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import { Connection } from "./connection";
|
|
2
|
-
import { ConnectionOptions } from "./native.js";
|
|
3
|
-
import { RemoteConnectionOptions } from "./remote";
|
|
4
|
-
export { WriteOptions, WriteMode, AddColumnsSql, ColumnAlteration, ConnectionOptions, IndexStatistics, IndexMetadata, IndexConfig, } from "./native.js";
|
|
5
|
-
export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } from "./arrow";
|
|
6
|
-
export { Connection, CreateTableOptions, TableNamesOptions, } from "./connection";
|
|
7
|
-
export { ExecutableQuery, Query, QueryBase, VectorQuery, RecordBatchIterator, } from "./query";
|
|
8
|
-
export { Index, IndexOptions, IvfPqOptions } from "./indices";
|
|
9
|
-
export { Table, AddDataOptions, UpdateOptions } from "./table";
|
|
10
|
-
export * as embedding from "./embedding";
|
|
11
|
-
/**
|
|
12
|
-
* Connect to a LanceDB instance at the given URI.
|
|
13
|
-
*
|
|
14
|
-
* Accepted formats:
|
|
15
|
-
*
|
|
16
|
-
* - `/path/to/database` - local database
|
|
17
|
-
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
18
|
-
* - `db://host:port` - remote database (LanceDB cloud)
|
|
19
|
-
* @param {string} uri - The uri of the database. If the database uri starts
|
|
20
|
-
* with `db://` then it connects to a remote database.
|
|
21
|
-
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
22
|
-
* @example
|
|
23
|
-
* ```ts
|
|
24
|
-
* const conn = await connect("/path/to/database");
|
|
25
|
-
* ```
|
|
26
|
-
* @example
|
|
27
|
-
* ```ts
|
|
28
|
-
* const conn = await connect(
|
|
29
|
-
* "s3://bucket/path/to/database",
|
|
30
|
-
* {storageOptions: {timeout: "60s"}
|
|
31
|
-
* });
|
|
32
|
-
* ```
|
|
33
|
-
*/
|
|
34
|
-
export declare function connect(uri: string, opts?: Partial<ConnectionOptions | RemoteConnectionOptions>): Promise<Connection>;
|
|
35
|
-
/**
|
|
36
|
-
* Connect to a LanceDB instance at the given URI.
|
|
37
|
-
*
|
|
38
|
-
* Accepted formats:
|
|
39
|
-
*
|
|
40
|
-
* - `/path/to/database` - local database
|
|
41
|
-
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
42
|
-
* - `db://host:port` - remote database (LanceDB cloud)
|
|
43
|
-
* @param options - The options to use when connecting to the database
|
|
44
|
-
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
45
|
-
* @example
|
|
46
|
-
* ```ts
|
|
47
|
-
* const conn = await connect({
|
|
48
|
-
* uri: "/path/to/database",
|
|
49
|
-
* storageOptions: {timeout: "60s"}
|
|
50
|
-
* });
|
|
51
|
-
* ```
|
|
52
|
-
*/
|
|
53
|
-
export declare function connect(opts: Partial<RemoteConnectionOptions | ConnectionOptions> & {
|
|
54
|
-
uri: string;
|
|
55
|
-
}): Promise<Connection>;
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// Copyright 2024 Lance Developers.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
// you may not use this file except in compliance with the License.
|
|
6
|
-
// You may obtain a copy of the License at
|
|
7
|
-
//
|
|
8
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
//
|
|
10
|
-
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
// See the License for the specific language governing permissions and
|
|
14
|
-
// limitations under the License.
|
|
15
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.connect = exports.embedding = exports.Table = exports.Index = exports.RecordBatchIterator = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = void 0;
|
|
17
|
-
const connection_1 = require("./connection");
|
|
18
|
-
const native_js_1 = require("./native.js");
|
|
19
|
-
const remote_1 = require("./remote");
|
|
20
|
-
var arrow_1 = require("./arrow");
|
|
21
|
-
Object.defineProperty(exports, "makeArrowTable", { enumerable: true, get: function () { return arrow_1.makeArrowTable; } });
|
|
22
|
-
Object.defineProperty(exports, "MakeArrowTableOptions", { enumerable: true, get: function () { return arrow_1.MakeArrowTableOptions; } });
|
|
23
|
-
Object.defineProperty(exports, "VectorColumnOptions", { enumerable: true, get: function () { return arrow_1.VectorColumnOptions; } });
|
|
24
|
-
var connection_2 = require("./connection");
|
|
25
|
-
Object.defineProperty(exports, "Connection", { enumerable: true, get: function () { return connection_2.Connection; } });
|
|
26
|
-
var query_1 = require("./query");
|
|
27
|
-
Object.defineProperty(exports, "Query", { enumerable: true, get: function () { return query_1.Query; } });
|
|
28
|
-
Object.defineProperty(exports, "QueryBase", { enumerable: true, get: function () { return query_1.QueryBase; } });
|
|
29
|
-
Object.defineProperty(exports, "VectorQuery", { enumerable: true, get: function () { return query_1.VectorQuery; } });
|
|
30
|
-
Object.defineProperty(exports, "RecordBatchIterator", { enumerable: true, get: function () { return query_1.RecordBatchIterator; } });
|
|
31
|
-
var indices_1 = require("./indices");
|
|
32
|
-
Object.defineProperty(exports, "Index", { enumerable: true, get: function () { return indices_1.Index; } });
|
|
33
|
-
var table_1 = require("./table");
|
|
34
|
-
Object.defineProperty(exports, "Table", { enumerable: true, get: function () { return table_1.Table; } });
|
|
35
|
-
exports.embedding = require("./embedding");
|
|
36
|
-
async function connect(uriOrOptions, opts = {}) {
|
|
37
|
-
let uri;
|
|
38
|
-
if (typeof uriOrOptions !== "string") {
|
|
39
|
-
const { uri: uri_, ...options } = uriOrOptions;
|
|
40
|
-
uri = uri_;
|
|
41
|
-
opts = options;
|
|
42
|
-
}
|
|
43
|
-
else {
|
|
44
|
-
uri = uriOrOptions;
|
|
45
|
-
}
|
|
46
|
-
if (!uri) {
|
|
47
|
-
throw new Error("uri is required");
|
|
48
|
-
}
|
|
49
|
-
if (uri?.startsWith("db://")) {
|
|
50
|
-
return new remote_1.RemoteConnection(uri, opts);
|
|
51
|
-
}
|
|
52
|
-
opts = opts ?? {};
|
|
53
|
-
opts.storageOptions = (0, connection_1.cleanseStorageOptions)(opts.storageOptions);
|
|
54
|
-
const nativeConn = await native_js_1.Connection.new(uri, opts);
|
|
55
|
-
return new connection_1.LocalConnection(nativeConn);
|
|
56
|
-
}
|
|
57
|
-
exports.connect = connect;
|
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Options to create an `IVF_PQ` index
|
|
3
|
-
*/
|
|
4
|
-
export interface IvfPqOptions {
|
|
5
|
-
/**
|
|
6
|
-
* The number of IVF partitions to create.
|
|
7
|
-
*
|
|
8
|
-
* This value should generally scale with the number of rows in the dataset.
|
|
9
|
-
* By default the number of partitions is the square root of the number of
|
|
10
|
-
* rows.
|
|
11
|
-
*
|
|
12
|
-
* If this value is too large then the first part of the search (picking the
|
|
13
|
-
* right partition) will be slow. If this value is too small then the second
|
|
14
|
-
* part of the search (searching within a partition) will be slow.
|
|
15
|
-
*/
|
|
16
|
-
numPartitions?: number;
|
|
17
|
-
/**
|
|
18
|
-
* Number of sub-vectors of PQ.
|
|
19
|
-
*
|
|
20
|
-
* This value controls how much the vector is compressed during the quantization step.
|
|
21
|
-
* The more sub vectors there are the less the vector is compressed. The default is
|
|
22
|
-
* the dimension of the vector divided by 16. If the dimension is not evenly divisible
|
|
23
|
-
* by 16 we use the dimension divded by 8.
|
|
24
|
-
*
|
|
25
|
-
* The above two cases are highly preferred. Having 8 or 16 values per subvector allows
|
|
26
|
-
* us to use efficient SIMD instructions.
|
|
27
|
-
*
|
|
28
|
-
* If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and
|
|
29
|
-
* will likely result in poor performance.
|
|
30
|
-
*/
|
|
31
|
-
numSubVectors?: number;
|
|
32
|
-
/**
|
|
33
|
-
* Distance type to use to build the index.
|
|
34
|
-
*
|
|
35
|
-
* Default value is "l2".
|
|
36
|
-
*
|
|
37
|
-
* This is used when training the index to calculate the IVF partitions
|
|
38
|
-
* (vectors are grouped in partitions with similar vectors according to this
|
|
39
|
-
* distance type) and to calculate a subvector's code during quantization.
|
|
40
|
-
*
|
|
41
|
-
* The distance type used to train an index MUST match the distance type used
|
|
42
|
-
* to search the index. Failure to do so will yield inaccurate results.
|
|
43
|
-
*
|
|
44
|
-
* The following distance types are available:
|
|
45
|
-
*
|
|
46
|
-
* "l2" - Euclidean distance. This is a very common distance metric that
|
|
47
|
-
* accounts for both magnitude and direction when determining the distance
|
|
48
|
-
* between vectors. L2 distance has a range of [0, ∞).
|
|
49
|
-
*
|
|
50
|
-
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
|
51
|
-
* calculated from the cosine similarity between two vectors. Cosine
|
|
52
|
-
* similarity is a measure of similarity between two non-zero vectors of an
|
|
53
|
-
* inner product space. It is defined to equal the cosine of the angle
|
|
54
|
-
* between them. Unlike L2, the cosine distance is not affected by the
|
|
55
|
-
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
|
56
|
-
*
|
|
57
|
-
* Note: the cosine distance is undefined when one (or both) of the vectors
|
|
58
|
-
* are all zeros (there is no direction). These vectors are invalid and may
|
|
59
|
-
* never be returned from a vector search.
|
|
60
|
-
*
|
|
61
|
-
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
|
62
|
-
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
|
63
|
-
* L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
|
64
|
-
*/
|
|
65
|
-
distanceType?: "l2" | "cosine" | "dot";
|
|
66
|
-
/**
|
|
67
|
-
* Max iteration to train IVF kmeans.
|
|
68
|
-
*
|
|
69
|
-
* When training an IVF PQ index we use kmeans to calculate the partitions. This parameter
|
|
70
|
-
* controls how many iterations of kmeans to run.
|
|
71
|
-
*
|
|
72
|
-
* Increasing this might improve the quality of the index but in most cases these extra
|
|
73
|
-
* iterations have diminishing returns.
|
|
74
|
-
*
|
|
75
|
-
* The default value is 50.
|
|
76
|
-
*/
|
|
77
|
-
maxIterations?: number;
|
|
78
|
-
/**
|
|
79
|
-
* The number of vectors, per partition, to sample when training IVF kmeans.
|
|
80
|
-
*
|
|
81
|
-
* When an IVF PQ index is trained, we need to calculate partitions. These are groups
|
|
82
|
-
* of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
|
83
|
-
*
|
|
84
|
-
* Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
|
85
|
-
* random sample of the data. This parameter controls the size of the sample. The total
|
|
86
|
-
* number of vectors used to train the index is `sample_rate * num_partitions`.
|
|
87
|
-
*
|
|
88
|
-
* Increasing this value might improve the quality of the index but in most cases the
|
|
89
|
-
* default should be sufficient.
|
|
90
|
-
*
|
|
91
|
-
* The default value is 256.
|
|
92
|
-
*/
|
|
93
|
-
sampleRate?: number;
|
|
94
|
-
}
|
|
95
|
-
export declare class Index {
|
|
96
|
-
private readonly inner;
|
|
97
|
-
private constructor();
|
|
98
|
-
/**
|
|
99
|
-
* Create an IvfPq index
|
|
100
|
-
*
|
|
101
|
-
* This index stores a compressed (quantized) copy of every vector. These vectors
|
|
102
|
-
* are grouped into partitions of similar vectors. Each partition keeps track of
|
|
103
|
-
* a centroid which is the average value of all vectors in the group.
|
|
104
|
-
*
|
|
105
|
-
* During a query the centroids are compared with the query vector to find the closest
|
|
106
|
-
* partitions. The compressed vectors in these partitions are then searched to find
|
|
107
|
-
* the closest vectors.
|
|
108
|
-
*
|
|
109
|
-
* The compression scheme is called product quantization. Each vector is divided into
|
|
110
|
-
* subvectors and then each subvector is quantized into a small number of bits. the
|
|
111
|
-
* parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
|
|
112
|
-
* between index size (and thus search speed) and index accuracy.
|
|
113
|
-
*
|
|
114
|
-
* The partitioning process is called IVF and the `num_partitions` parameter controls how
|
|
115
|
-
* many groups to create.
|
|
116
|
-
*
|
|
117
|
-
* Note that training an IVF PQ index on a large dataset is a slow operation and
|
|
118
|
-
* currently is also a memory intensive operation.
|
|
119
|
-
*/
|
|
120
|
-
static ivfPq(options?: Partial<IvfPqOptions>): Index;
|
|
121
|
-
/**
|
|
122
|
-
* Create a btree index
|
|
123
|
-
*
|
|
124
|
-
* A btree index is an index on a scalar columns. The index stores a copy of the column
|
|
125
|
-
* in sorted order. A header entry is created for each block of rows (currently the
|
|
126
|
-
* block size is fixed at 4096). These header entries are stored in a separate
|
|
127
|
-
* cacheable structure (a btree). To search for data the header is used to determine
|
|
128
|
-
* which blocks need to be read from disk.
|
|
129
|
-
*
|
|
130
|
-
* For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
|
|
131
|
-
* bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
|
|
132
|
-
* the correct row ids.
|
|
133
|
-
*
|
|
134
|
-
* This index is good for scalar columns with mostly distinct values and does best when
|
|
135
|
-
* the query is highly selective.
|
|
136
|
-
*
|
|
137
|
-
* The btree index does not currently have any parameters though parameters such as the
|
|
138
|
-
* block size may be added in the future.
|
|
139
|
-
*/
|
|
140
|
-
static btree(): Index;
|
|
141
|
-
}
|
|
142
|
-
export interface IndexOptions {
|
|
143
|
-
/**
|
|
144
|
-
* Advanced index configuration
|
|
145
|
-
*
|
|
146
|
-
* This option allows you to specify a specfic index to create and also
|
|
147
|
-
* allows you to pass in configuration for training the index.
|
|
148
|
-
*
|
|
149
|
-
* See the static methods on Index for details on the various index types.
|
|
150
|
-
*
|
|
151
|
-
* If this is not supplied then column data type(s) and column statistics
|
|
152
|
-
* will be used to determine the most useful kind of index to create.
|
|
153
|
-
*/
|
|
154
|
-
config?: Index;
|
|
155
|
-
/**
|
|
156
|
-
* Whether to replace the existing index
|
|
157
|
-
*
|
|
158
|
-
* If this is false, and another index already exists on the same columns
|
|
159
|
-
* and the same name, then an error will be returned. This is true even if
|
|
160
|
-
* that index is out of date.
|
|
161
|
-
*
|
|
162
|
-
* The default is true
|
|
163
|
-
*/
|
|
164
|
-
replace?: boolean;
|
|
165
|
-
}
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// Copyright 2024 Lance Developers.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
// you may not use this file except in compliance with the License.
|
|
6
|
-
// You may obtain a copy of the License at
|
|
7
|
-
//
|
|
8
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
//
|
|
10
|
-
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
// See the License for the specific language governing permissions and
|
|
14
|
-
// limitations under the License.
|
|
15
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.Index = void 0;
|
|
17
|
-
const native_1 = require("./native");
|
|
18
|
-
class Index {
|
|
19
|
-
inner;
|
|
20
|
-
constructor(inner) {
|
|
21
|
-
this.inner = inner;
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Create an IvfPq index
|
|
25
|
-
*
|
|
26
|
-
* This index stores a compressed (quantized) copy of every vector. These vectors
|
|
27
|
-
* are grouped into partitions of similar vectors. Each partition keeps track of
|
|
28
|
-
* a centroid which is the average value of all vectors in the group.
|
|
29
|
-
*
|
|
30
|
-
* During a query the centroids are compared with the query vector to find the closest
|
|
31
|
-
* partitions. The compressed vectors in these partitions are then searched to find
|
|
32
|
-
* the closest vectors.
|
|
33
|
-
*
|
|
34
|
-
* The compression scheme is called product quantization. Each vector is divided into
|
|
35
|
-
* subvectors and then each subvector is quantized into a small number of bits. the
|
|
36
|
-
* parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
|
|
37
|
-
* between index size (and thus search speed) and index accuracy.
|
|
38
|
-
*
|
|
39
|
-
* The partitioning process is called IVF and the `num_partitions` parameter controls how
|
|
40
|
-
* many groups to create.
|
|
41
|
-
*
|
|
42
|
-
* Note that training an IVF PQ index on a large dataset is a slow operation and
|
|
43
|
-
* currently is also a memory intensive operation.
|
|
44
|
-
*/
|
|
45
|
-
static ivfPq(options) {
|
|
46
|
-
return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate));
|
|
47
|
-
}
|
|
48
|
-
/**
|
|
49
|
-
* Create a btree index
|
|
50
|
-
*
|
|
51
|
-
* A btree index is an index on a scalar columns. The index stores a copy of the column
|
|
52
|
-
* in sorted order. A header entry is created for each block of rows (currently the
|
|
53
|
-
* block size is fixed at 4096). These header entries are stored in a separate
|
|
54
|
-
* cacheable structure (a btree). To search for data the header is used to determine
|
|
55
|
-
* which blocks need to be read from disk.
|
|
56
|
-
*
|
|
57
|
-
* For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
|
|
58
|
-
* bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
|
|
59
|
-
* the correct row ids.
|
|
60
|
-
*
|
|
61
|
-
* This index is good for scalar columns with mostly distinct values and does best when
|
|
62
|
-
* the query is highly selective.
|
|
63
|
-
*
|
|
64
|
-
* The btree index does not currently have any parameters though parameters such as the
|
|
65
|
-
* block size may be added in the future.
|
|
66
|
-
*/
|
|
67
|
-
static btree() {
|
|
68
|
-
return new Index(native_1.Index.btree());
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
exports.Index = Index;
|