@lancedb/lancedb 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/biome.json +8 -2
- package/dist/arrow.d.ts +36 -9
- package/dist/arrow.js +222 -24
- package/dist/connection.d.ts +10 -1
- package/dist/connection.js +13 -7
- package/dist/embedding/embedding_function.d.ts +54 -28
- package/dist/embedding/embedding_function.js +89 -10
- package/dist/embedding/index.d.ts +28 -2
- package/dist/embedding/index.js +111 -4
- package/dist/embedding/openai.d.ts +16 -7
- package/dist/embedding/openai.js +62 -12
- package/dist/embedding/registry.d.ts +58 -0
- package/dist/embedding/registry.js +127 -0
- package/dist/native.d.ts +5 -4
- package/dist/query.d.ts +19 -7
- package/dist/query.js +27 -13
- package/dist/sanitize.d.ts +22 -1
- package/dist/sanitize.js +123 -110
- package/dist/table.d.ts +18 -3
- package/dist/table.js +33 -3
- package/lancedb/arrow.ts +243 -41
- package/lancedb/connection.ts +35 -6
- package/lancedb/embedding/embedding_function.ts +147 -42
- package/lancedb/embedding/index.ts +113 -2
- package/lancedb/embedding/openai.ts +62 -16
- package/lancedb/embedding/registry.ts +176 -0
- package/lancedb/query.ts +58 -14
- package/lancedb/sanitize.ts +22 -22
- package/lancedb/table.ts +67 -5
- package/nodejs-artifacts/arrow.d.ts +36 -9
- package/nodejs-artifacts/arrow.js +222 -24
- package/nodejs-artifacts/connection.d.ts +10 -1
- package/nodejs-artifacts/connection.js +13 -7
- package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
- package/nodejs-artifacts/embedding/embedding_function.js +89 -10
- package/nodejs-artifacts/embedding/index.d.ts +28 -2
- package/nodejs-artifacts/embedding/index.js +111 -4
- package/nodejs-artifacts/embedding/openai.d.ts +16 -7
- package/nodejs-artifacts/embedding/openai.js +62 -12
- package/nodejs-artifacts/embedding/registry.d.ts +58 -0
- package/nodejs-artifacts/embedding/registry.js +127 -0
- package/nodejs-artifacts/native.d.ts +5 -4
- package/nodejs-artifacts/query.d.ts +19 -7
- package/nodejs-artifacts/query.js +27 -13
- package/nodejs-artifacts/sanitize.d.ts +22 -1
- package/nodejs-artifacts/sanitize.js +123 -110
- package/nodejs-artifacts/table.d.ts +18 -3
- package/nodejs-artifacts/table.js +33 -3
- package/package.json +14 -9
- package/tsconfig.json +3 -1
|
@@ -1,7 +1,114 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
// Copyright 2023 Lance Developers.
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
18
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
19
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
20
|
+
}
|
|
21
|
+
Object.defineProperty(o, k2, desc);
|
|
22
|
+
}) : (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
o[k2] = m[k];
|
|
25
|
+
}));
|
|
26
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
27
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
28
|
+
};
|
|
2
29
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
30
|
+
exports.LanceSchema = exports.EmbeddingFunction = void 0;
|
|
31
|
+
const arrow_1 = require("../arrow");
|
|
32
|
+
const arrow_2 = require("../arrow");
|
|
33
|
+
const sanitize_1 = require("../sanitize");
|
|
34
|
+
const registry_1 = require("./registry");
|
|
4
35
|
var embedding_function_1 = require("./embedding_function");
|
|
5
|
-
Object.defineProperty(exports, "
|
|
6
|
-
|
|
7
|
-
|
|
36
|
+
Object.defineProperty(exports, "EmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.EmbeddingFunction; } });
|
|
37
|
+
// We need to explicitly export '*' so that the `register` decorator actually registers the class.
|
|
38
|
+
__exportStar(require("./openai"), exports);
|
|
39
|
+
__exportStar(require("./registry"), exports);
|
|
40
|
+
/**
|
|
41
|
+
* Create a schema with embedding functions.
|
|
42
|
+
*
|
|
43
|
+
* @param fields
|
|
44
|
+
* @returns Schema
|
|
45
|
+
* @example
|
|
46
|
+
* ```ts
|
|
47
|
+
* class MyEmbeddingFunction extends EmbeddingFunction {
|
|
48
|
+
* // ...
|
|
49
|
+
* }
|
|
50
|
+
* const func = new MyEmbeddingFunction();
|
|
51
|
+
* const schema = LanceSchema({
|
|
52
|
+
* id: new Int32(),
|
|
53
|
+
* text: func.sourceField(new Utf8()),
|
|
54
|
+
* vector: func.vectorField(),
|
|
55
|
+
* // optional: specify the datatype and/or dimensions
|
|
56
|
+
* vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
|
|
57
|
+
* });
|
|
58
|
+
*
|
|
59
|
+
* const table = await db.createTable("my_table", data, { schema });
|
|
60
|
+
* ```
|
|
61
|
+
*/
|
|
62
|
+
function LanceSchema(fields) {
|
|
63
|
+
const arrowFields = [];
|
|
64
|
+
const embeddingFunctions = new Map();
|
|
65
|
+
Object.entries(fields).forEach(([key, value]) => {
|
|
66
|
+
if ((0, arrow_2.isDataType)(value)) {
|
|
67
|
+
arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
const [dtype, metadata] = value;
|
|
71
|
+
arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(dtype), true));
|
|
72
|
+
parseEmbeddingFunctions(embeddingFunctions, key, metadata);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
const registry = (0, registry_1.getRegistry)();
|
|
76
|
+
const metadata = registry.getTableMetadata(Array.from(embeddingFunctions.values()));
|
|
77
|
+
const schema = new arrow_1.Schema(arrowFields, metadata);
|
|
78
|
+
return schema;
|
|
79
|
+
}
|
|
80
|
+
exports.LanceSchema = LanceSchema;
|
|
81
|
+
function parseEmbeddingFunctions(embeddingFunctions, key, metadata) {
|
|
82
|
+
if (metadata.has("source_column_for")) {
|
|
83
|
+
const embedFunction = metadata.get("source_column_for");
|
|
84
|
+
const current = embeddingFunctions.get(embedFunction);
|
|
85
|
+
if (current !== undefined) {
|
|
86
|
+
embeddingFunctions.set(embedFunction, {
|
|
87
|
+
...current,
|
|
88
|
+
sourceColumn: key,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
embeddingFunctions.set(embedFunction, {
|
|
93
|
+
sourceColumn: key,
|
|
94
|
+
function: embedFunction,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
else if (metadata.has("vector_column_for")) {
|
|
99
|
+
const embedFunction = metadata.get("vector_column_for");
|
|
100
|
+
const current = embeddingFunctions.get(embedFunction);
|
|
101
|
+
if (current !== undefined) {
|
|
102
|
+
embeddingFunctions.set(embedFunction, {
|
|
103
|
+
...current,
|
|
104
|
+
vectorColumn: key,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
embeddingFunctions.set(embedFunction, {
|
|
109
|
+
vectorColumn: key,
|
|
110
|
+
function: embedFunction,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
@@ -1,8 +1,17 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
import { Float } from "../arrow";
|
|
2
|
+
import { EmbeddingFunction } from "./embedding_function";
|
|
3
|
+
export type OpenAIOptions = {
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
model?: string;
|
|
6
|
+
};
|
|
7
|
+
export declare class OpenAIEmbeddingFunction extends EmbeddingFunction<string, OpenAIOptions> {
|
|
8
|
+
#private;
|
|
9
|
+
constructor(options?: OpenAIOptions);
|
|
10
|
+
toJSON(): {
|
|
11
|
+
model: string;
|
|
12
|
+
};
|
|
13
|
+
ndims(): number;
|
|
14
|
+
embeddingDataType(): Float;
|
|
15
|
+
computeSourceEmbeddings(data: string[]): Promise<number[][]>;
|
|
16
|
+
computeQueryEmbeddings(data: string): Promise<number[]>;
|
|
8
17
|
}
|
|
@@ -12,12 +12,30 @@
|
|
|
12
12
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
// See the License for the specific language governing permissions and
|
|
14
14
|
// limitations under the License.
|
|
15
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
16
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
17
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
18
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
19
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
20
|
+
};
|
|
21
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
22
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
23
|
+
};
|
|
15
24
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
25
|
exports.OpenAIEmbeddingFunction = void 0;
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
26
|
+
const arrow_1 = require("../arrow");
|
|
27
|
+
const embedding_function_1 = require("./embedding_function");
|
|
28
|
+
const registry_1 = require("./registry");
|
|
29
|
+
let OpenAIEmbeddingFunction = class OpenAIEmbeddingFunction extends embedding_function_1.EmbeddingFunction {
|
|
30
|
+
#openai;
|
|
31
|
+
#modelName;
|
|
32
|
+
constructor(options = { model: "text-embedding-ada-002" }) {
|
|
33
|
+
super();
|
|
34
|
+
const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
|
|
35
|
+
if (!openAIKey) {
|
|
36
|
+
throw new Error("OpenAI API key is required");
|
|
37
|
+
}
|
|
38
|
+
const modelName = options?.model ?? "text-embedding-ada-002";
|
|
21
39
|
/**
|
|
22
40
|
* @type {import("openai").default}
|
|
23
41
|
*/
|
|
@@ -30,16 +48,35 @@ class OpenAIEmbeddingFunction {
|
|
|
30
48
|
catch {
|
|
31
49
|
throw new Error("please install openai@^4.24.1 using npm install openai");
|
|
32
50
|
}
|
|
33
|
-
this.sourceColumn = sourceColumn;
|
|
34
51
|
const configuration = {
|
|
35
52
|
apiKey: openAIKey,
|
|
36
53
|
};
|
|
37
|
-
this
|
|
38
|
-
this
|
|
54
|
+
this.#openai = new Openai(configuration);
|
|
55
|
+
this.#modelName = modelName;
|
|
56
|
+
}
|
|
57
|
+
toJSON() {
|
|
58
|
+
return {
|
|
59
|
+
model: this.#modelName,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
ndims() {
|
|
63
|
+
switch (this.#modelName) {
|
|
64
|
+
case "text-embedding-ada-002":
|
|
65
|
+
return 1536;
|
|
66
|
+
case "text-embedding-3-large":
|
|
67
|
+
return 3072;
|
|
68
|
+
case "text-embedding-3-small":
|
|
69
|
+
return 1536;
|
|
70
|
+
default:
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
39
73
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
74
|
+
embeddingDataType() {
|
|
75
|
+
return new arrow_1.Float32();
|
|
76
|
+
}
|
|
77
|
+
async computeSourceEmbeddings(data) {
|
|
78
|
+
const response = await this.#openai.embeddings.create({
|
|
79
|
+
model: this.#modelName,
|
|
43
80
|
input: data,
|
|
44
81
|
});
|
|
45
82
|
const embeddings = [];
|
|
@@ -48,6 +85,19 @@ class OpenAIEmbeddingFunction {
|
|
|
48
85
|
}
|
|
49
86
|
return embeddings;
|
|
50
87
|
}
|
|
51
|
-
|
|
52
|
-
|
|
88
|
+
async computeQueryEmbeddings(data) {
|
|
89
|
+
if (typeof data !== "string") {
|
|
90
|
+
throw new Error("Data must be a string");
|
|
91
|
+
}
|
|
92
|
+
const response = await this.#openai.embeddings.create({
|
|
93
|
+
model: this.#modelName,
|
|
94
|
+
input: data,
|
|
95
|
+
});
|
|
96
|
+
return response.data[0].embedding;
|
|
97
|
+
}
|
|
98
|
+
};
|
|
53
99
|
exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction;
|
|
100
|
+
exports.OpenAIEmbeddingFunction = OpenAIEmbeddingFunction = __decorate([
|
|
101
|
+
(0, registry_1.register)("openai"),
|
|
102
|
+
__metadata("design:paramtypes", [Object])
|
|
103
|
+
], OpenAIEmbeddingFunction);
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { EmbeddingFunction } from "./embedding_function";
|
|
2
|
+
import "reflect-metadata";
|
|
3
|
+
export interface EmbeddingFunctionOptions {
|
|
4
|
+
[key: string]: unknown;
|
|
5
|
+
}
|
|
6
|
+
export interface EmbeddingFunctionFactory<T extends EmbeddingFunction = EmbeddingFunction> {
|
|
7
|
+
new (modelOptions?: EmbeddingFunctionOptions): T;
|
|
8
|
+
}
|
|
9
|
+
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
|
10
|
+
create(options?: EmbeddingFunctionOptions): T;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* This is a singleton class used to register embedding functions
|
|
14
|
+
* and fetch them by name. It also handles serializing and deserializing.
|
|
15
|
+
* You can implement your own embedding function by subclassing EmbeddingFunction
|
|
16
|
+
* or TextEmbeddingFunction and registering it with the registry
|
|
17
|
+
*/
|
|
18
|
+
export declare class EmbeddingFunctionRegistry {
|
|
19
|
+
#private;
|
|
20
|
+
/**
|
|
21
|
+
* Register an embedding function
|
|
22
|
+
* @param name The name of the function
|
|
23
|
+
* @param func The function to register
|
|
24
|
+
* @throws Error if the function is already registered
|
|
25
|
+
*/
|
|
26
|
+
register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
|
|
27
|
+
/**
|
|
28
|
+
* Fetch an embedding function by name
|
|
29
|
+
* @param name The name of the function
|
|
30
|
+
*/
|
|
31
|
+
get<T extends EmbeddingFunction<unknown> = EmbeddingFunction>(name: string): EmbeddingFunctionCreate<T> | undefined;
|
|
32
|
+
/**
|
|
33
|
+
* reset the registry to the initial state
|
|
34
|
+
*/
|
|
35
|
+
reset(this: EmbeddingFunctionRegistry): void;
|
|
36
|
+
/**
|
|
37
|
+
* @ignore
|
|
38
|
+
*/
|
|
39
|
+
parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Map<string, EmbeddingFunctionConfig>;
|
|
40
|
+
functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any>;
|
|
41
|
+
getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string>;
|
|
42
|
+
}
|
|
43
|
+
export declare function register(name?: string): (ctor: EmbeddingFunctionFactory<EmbeddingFunction<any, import("./embedding_function").FunctionOptions>>) => any;
|
|
44
|
+
/**
|
|
45
|
+
* Utility function to get the global instance of the registry
|
|
46
|
+
* @returns `EmbeddingFunctionRegistry` The global instance of the registry
|
|
47
|
+
* @example
|
|
48
|
+
* ```ts
|
|
49
|
+
* const registry = getRegistry();
|
|
50
|
+
* const openai = registry.get("openai").create();
|
|
51
|
+
*/
|
|
52
|
+
export declare function getRegistry(): EmbeddingFunctionRegistry;
|
|
53
|
+
export interface EmbeddingFunctionConfig {
|
|
54
|
+
sourceColumn: string;
|
|
55
|
+
vectorColumn?: string;
|
|
56
|
+
function: EmbeddingFunction;
|
|
57
|
+
}
|
|
58
|
+
export {};
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright 2024 Lance Developers.
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.getRegistry = exports.register = exports.EmbeddingFunctionRegistry = void 0;
|
|
17
|
+
require("reflect-metadata");
|
|
18
|
+
/**
|
|
19
|
+
* This is a singleton class used to register embedding functions
|
|
20
|
+
* and fetch them by name. It also handles serializing and deserializing.
|
|
21
|
+
* You can implement your own embedding function by subclassing EmbeddingFunction
|
|
22
|
+
* or TextEmbeddingFunction and registering it with the registry
|
|
23
|
+
*/
|
|
24
|
+
class EmbeddingFunctionRegistry {
|
|
25
|
+
#functions = new Map();
|
|
26
|
+
/**
|
|
27
|
+
* Register an embedding function
|
|
28
|
+
* @param name The name of the function
|
|
29
|
+
* @param func The function to register
|
|
30
|
+
* @throws Error if the function is already registered
|
|
31
|
+
*/
|
|
32
|
+
register(alias) {
|
|
33
|
+
const self = this;
|
|
34
|
+
return function (ctor) {
|
|
35
|
+
if (!alias) {
|
|
36
|
+
alias = ctor.name;
|
|
37
|
+
}
|
|
38
|
+
if (self.#functions.has(alias)) {
|
|
39
|
+
throw new Error(`Embedding function with alias "${alias}" already exists`);
|
|
40
|
+
}
|
|
41
|
+
self.#functions.set(alias, ctor);
|
|
42
|
+
Reflect.defineMetadata("lancedb::embedding::name", alias, ctor);
|
|
43
|
+
return ctor;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Fetch an embedding function by name
|
|
48
|
+
* @param name The name of the function
|
|
49
|
+
*/
|
|
50
|
+
get(name) {
|
|
51
|
+
const factory = this.#functions.get(name);
|
|
52
|
+
if (!factory) {
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
create: function (options) {
|
|
57
|
+
return new factory(options);
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* reset the registry to the initial state
|
|
63
|
+
*/
|
|
64
|
+
reset() {
|
|
65
|
+
this.#functions.clear();
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* @ignore
|
|
69
|
+
*/
|
|
70
|
+
parseFunctions(metadata) {
|
|
71
|
+
if (!metadata.has("embedding_functions")) {
|
|
72
|
+
return new Map();
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
const functions = (JSON.parse(metadata.get("embedding_functions")));
|
|
76
|
+
return new Map(functions.map((f) => {
|
|
77
|
+
const fn = this.get(f.name);
|
|
78
|
+
if (!fn) {
|
|
79
|
+
throw new Error(`Function "${f.name}" not found in registry`);
|
|
80
|
+
}
|
|
81
|
+
return [
|
|
82
|
+
f.name,
|
|
83
|
+
{
|
|
84
|
+
sourceColumn: f.sourceColumn,
|
|
85
|
+
vectorColumn: f.vectorColumn,
|
|
86
|
+
function: this.get(f.name).create(f.model),
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
}));
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
93
|
+
functionToMetadata(conf) {
|
|
94
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
95
|
+
const metadata = {};
|
|
96
|
+
const name = Reflect.getMetadata("lancedb::embedding::name", conf.function.constructor);
|
|
97
|
+
metadata["sourceColumn"] = conf.sourceColumn;
|
|
98
|
+
metadata["vectorColumn"] = conf.vectorColumn ?? "vector";
|
|
99
|
+
metadata["name"] = name ?? conf.function.constructor.name;
|
|
100
|
+
metadata["model"] = conf.function.toJSON();
|
|
101
|
+
return metadata;
|
|
102
|
+
}
|
|
103
|
+
getTableMetadata(functions) {
|
|
104
|
+
const metadata = new Map();
|
|
105
|
+
const jsonData = functions.map((conf) => this.functionToMetadata(conf));
|
|
106
|
+
metadata.set("embedding_functions", JSON.stringify(jsonData));
|
|
107
|
+
return metadata;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
exports.EmbeddingFunctionRegistry = EmbeddingFunctionRegistry;
|
|
111
|
+
const _REGISTRY = new EmbeddingFunctionRegistry();
|
|
112
|
+
function register(name) {
|
|
113
|
+
return _REGISTRY.register(name);
|
|
114
|
+
}
|
|
115
|
+
exports.register = register;
|
|
116
|
+
/**
|
|
117
|
+
* Utility function to get the global instance of the registry
|
|
118
|
+
* @returns `EmbeddingFunctionRegistry` The global instance of the registry
|
|
119
|
+
* @example
|
|
120
|
+
* ```ts
|
|
121
|
+
* const registry = getRegistry();
|
|
122
|
+
* const openai = registry.get("openai").create();
|
|
123
|
+
*/
|
|
124
|
+
function getRegistry() {
|
|
125
|
+
return _REGISTRY;
|
|
126
|
+
}
|
|
127
|
+
exports.getRegistry = getRegistry;
|
|
@@ -102,6 +102,7 @@ export const enum WriteMode {
|
|
|
102
102
|
}
|
|
103
103
|
/** Write options when creating a Table. */
|
|
104
104
|
export interface WriteOptions {
|
|
105
|
+
/** Write mode for writing to a table. */
|
|
105
106
|
mode?: WriteMode
|
|
106
107
|
}
|
|
107
108
|
export interface OpenTableOptions {
|
|
@@ -123,8 +124,8 @@ export class Connection {
|
|
|
123
124
|
* - buf: The buffer containing the IPC file.
|
|
124
125
|
*
|
|
125
126
|
*/
|
|
126
|
-
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
|
|
127
|
-
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
|
|
127
|
+
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
128
|
+
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
128
129
|
openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
|
|
129
130
|
/** Drop table with the name. Or raise an error if the table does not exist. */
|
|
130
131
|
dropTable(name: string): Promise<void>
|
|
@@ -142,7 +143,7 @@ export class Query {
|
|
|
142
143
|
select(columns: Array<[string, string]>): void
|
|
143
144
|
limit(limit: number): void
|
|
144
145
|
nearestTo(vector: Float32Array): VectorQuery
|
|
145
|
-
execute(): Promise<RecordBatchIterator>
|
|
146
|
+
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
146
147
|
}
|
|
147
148
|
export class VectorQuery {
|
|
148
149
|
column(column: string): void
|
|
@@ -154,7 +155,7 @@ export class VectorQuery {
|
|
|
154
155
|
onlyIf(predicate: string): void
|
|
155
156
|
select(columns: Array<[string, string]>): void
|
|
156
157
|
limit(limit: number): void
|
|
157
|
-
execute(): Promise<RecordBatchIterator>
|
|
158
|
+
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
158
159
|
}
|
|
159
160
|
export class Table {
|
|
160
161
|
display(): string
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Table as ArrowTable, RecordBatch } from "
|
|
1
|
+
import { Table as ArrowTable, type IntoVector, RecordBatch } from "./arrow";
|
|
2
2
|
import { RecordBatchIterator as NativeBatchIterator, Query as NativeQuery, Table as NativeTable, VectorQuery as NativeVectorQuery } from "./native";
|
|
3
3
|
export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
4
4
|
private promisedInner?;
|
|
@@ -6,6 +6,18 @@ export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
|
6
6
|
constructor(promise?: Promise<NativeBatchIterator>);
|
|
7
7
|
next(): Promise<IteratorResult<RecordBatch<any>>>;
|
|
8
8
|
}
|
|
9
|
+
/**
|
|
10
|
+
* Options that control the behavior of a particular query execution
|
|
11
|
+
*/
|
|
12
|
+
export interface QueryExecutionOptions {
|
|
13
|
+
/**
|
|
14
|
+
* The maximum number of rows to return in a single batch
|
|
15
|
+
*
|
|
16
|
+
* Batches may have fewer rows if the underlying data is stored
|
|
17
|
+
* in smaller chunks.
|
|
18
|
+
*/
|
|
19
|
+
maxBatchLength?: number;
|
|
20
|
+
}
|
|
9
21
|
/** Common methods supported by all query types */
|
|
10
22
|
export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery, QueryType> implements AsyncIterable<RecordBatch> {
|
|
11
23
|
protected inner: NativeQueryType;
|
|
@@ -53,7 +65,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
53
65
|
* uses `Object.entries` which should preserve the insertion order of the object. However,
|
|
54
66
|
* object insertion order is easy to get wrong and `Map` is more foolproof.
|
|
55
67
|
*/
|
|
56
|
-
select(columns: string[] | Map<string, string> | Record<string, string>): QueryType;
|
|
68
|
+
select(columns: string[] | Map<string, string> | Record<string, string> | string): QueryType;
|
|
57
69
|
/**
|
|
58
70
|
* Set the maximum number of results to return.
|
|
59
71
|
*
|
|
@@ -61,7 +73,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
61
73
|
* called then every valid row from the table will be returned.
|
|
62
74
|
*/
|
|
63
75
|
limit(limit: number): QueryType;
|
|
64
|
-
protected nativeExecute(): Promise<NativeBatchIterator>;
|
|
76
|
+
protected nativeExecute(options?: Partial<QueryExecutionOptions>): Promise<NativeBatchIterator>;
|
|
65
77
|
/**
|
|
66
78
|
* Execute the query and return the results as an @see {@link AsyncIterator}
|
|
67
79
|
* of @see {@link RecordBatch}.
|
|
@@ -73,12 +85,12 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
73
85
|
* single query)
|
|
74
86
|
*
|
|
75
87
|
*/
|
|
76
|
-
protected execute(): RecordBatchIterator;
|
|
88
|
+
protected execute(options?: Partial<QueryExecutionOptions>): RecordBatchIterator;
|
|
77
89
|
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>>;
|
|
78
90
|
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
79
|
-
toArrow(): Promise<ArrowTable>;
|
|
91
|
+
toArrow(options?: Partial<QueryExecutionOptions>): Promise<ArrowTable>;
|
|
80
92
|
/** Collect the results as an array of objects. */
|
|
81
|
-
toArray(): Promise<
|
|
93
|
+
toArray(options?: Partial<QueryExecutionOptions>): Promise<any[]>;
|
|
82
94
|
}
|
|
83
95
|
/**
|
|
84
96
|
* An interface for a query that can be executed
|
|
@@ -244,5 +256,5 @@ export declare class Query extends QueryBase<NativeQuery, Query> {
|
|
|
244
256
|
* Vector searches always have a `limit`. If `limit` has not been called then
|
|
245
257
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
246
258
|
*/
|
|
247
|
-
nearestTo(vector:
|
|
259
|
+
nearestTo(vector: IntoVector): VectorQuery;
|
|
248
260
|
}
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
// limitations under the License.
|
|
15
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
16
|
exports.Query = exports.VectorQuery = exports.QueryBase = exports.RecordBatchIterator = void 0;
|
|
17
|
-
const
|
|
17
|
+
const arrow_1 = require("./arrow");
|
|
18
18
|
class RecordBatchIterator {
|
|
19
19
|
promisedInner;
|
|
20
20
|
inner;
|
|
@@ -34,7 +34,7 @@ class RecordBatchIterator {
|
|
|
34
34
|
if (n == null) {
|
|
35
35
|
return Promise.resolve({ done: true, value: null });
|
|
36
36
|
}
|
|
37
|
-
const tbl = (0,
|
|
37
|
+
const tbl = (0, arrow_1.tableFromIPC)(n);
|
|
38
38
|
if (tbl.batches.length != 1) {
|
|
39
39
|
throw new Error("Expected only one batch");
|
|
40
40
|
}
|
|
@@ -43,6 +43,18 @@ class RecordBatchIterator {
|
|
|
43
43
|
}
|
|
44
44
|
exports.RecordBatchIterator = RecordBatchIterator;
|
|
45
45
|
/* eslint-enable */
|
|
46
|
+
class RecordBatchIterable {
|
|
47
|
+
inner;
|
|
48
|
+
options;
|
|
49
|
+
constructor(inner, options) {
|
|
50
|
+
this.inner = inner;
|
|
51
|
+
this.options = options;
|
|
52
|
+
}
|
|
53
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
54
|
+
[Symbol.asyncIterator]() {
|
|
55
|
+
return new RecordBatchIterator(this.inner.execute(this.options?.maxBatchLength));
|
|
56
|
+
}
|
|
57
|
+
}
|
|
46
58
|
/** Common methods supported by all query types */
|
|
47
59
|
class QueryBase {
|
|
48
60
|
inner;
|
|
@@ -98,6 +110,9 @@ class QueryBase {
|
|
|
98
110
|
*/
|
|
99
111
|
select(columns) {
|
|
100
112
|
let columnTuples;
|
|
113
|
+
if (typeof columns === "string") {
|
|
114
|
+
columns = [columns];
|
|
115
|
+
}
|
|
101
116
|
if (Array.isArray(columns)) {
|
|
102
117
|
columnTuples = columns.map((c) => [c, c]);
|
|
103
118
|
}
|
|
@@ -120,8 +135,8 @@ class QueryBase {
|
|
|
120
135
|
this.inner.limit(limit);
|
|
121
136
|
return this;
|
|
122
137
|
}
|
|
123
|
-
nativeExecute() {
|
|
124
|
-
return this.inner.execute();
|
|
138
|
+
nativeExecute(options) {
|
|
139
|
+
return this.inner.execute(options?.maxBatchLength);
|
|
125
140
|
}
|
|
126
141
|
/**
|
|
127
142
|
* Execute the query and return the results as an @see {@link AsyncIterator}
|
|
@@ -134,8 +149,8 @@ class QueryBase {
|
|
|
134
149
|
* single query)
|
|
135
150
|
*
|
|
136
151
|
*/
|
|
137
|
-
execute() {
|
|
138
|
-
return new RecordBatchIterator(this.nativeExecute());
|
|
152
|
+
execute(options) {
|
|
153
|
+
return new RecordBatchIterator(this.nativeExecute(options));
|
|
139
154
|
}
|
|
140
155
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
141
156
|
[Symbol.asyncIterator]() {
|
|
@@ -143,17 +158,17 @@ class QueryBase {
|
|
|
143
158
|
return new RecordBatchIterator(promise);
|
|
144
159
|
}
|
|
145
160
|
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
|
146
|
-
async toArrow() {
|
|
161
|
+
async toArrow(options) {
|
|
147
162
|
const batches = [];
|
|
148
|
-
for await (const batch of this) {
|
|
163
|
+
for await (const batch of new RecordBatchIterable(this.inner, options)) {
|
|
149
164
|
batches.push(batch);
|
|
150
165
|
}
|
|
151
|
-
return new
|
|
166
|
+
return new arrow_1.Table(batches);
|
|
152
167
|
}
|
|
153
168
|
/** Collect the results as an array of objects. */
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
169
|
+
// biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
|
|
170
|
+
async toArray(options) {
|
|
171
|
+
const tbl = await this.toArrow(options);
|
|
157
172
|
return tbl.toArray();
|
|
158
173
|
}
|
|
159
174
|
}
|
|
@@ -339,7 +354,6 @@ class Query extends QueryBase {
|
|
|
339
354
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
340
355
|
*/
|
|
341
356
|
nearestTo(vector) {
|
|
342
|
-
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
343
357
|
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
|
344
358
|
return new VectorQuery(vectorQuery);
|
|
345
359
|
}
|
|
@@ -1,4 +1,25 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { TKeys } from "apache-arrow/type";
|
|
2
|
+
import { DataType, Date_, Decimal, DenseUnion, Dictionary, Duration, Field, FixedSizeBinary, FixedSizeList, Float, Int, Interval, List, Map_, Schema, SparseUnion, Struct, Time, Timestamp, TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond, Type, Union } from "./arrow";
|
|
3
|
+
export declare function sanitizeMetadata(metadataLike?: unknown): Map<string, string> | undefined;
|
|
4
|
+
export declare function sanitizeInt(typeLike: object): Int<Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64>;
|
|
5
|
+
export declare function sanitizeFloat(typeLike: object): Float<Type.Float | Type.Float16 | Type.Float32 | Type.Float64>;
|
|
6
|
+
export declare function sanitizeDecimal(typeLike: object): Decimal;
|
|
7
|
+
export declare function sanitizeDate(typeLike: object): Date_<import("apache-arrow/type").Dates>;
|
|
8
|
+
export declare function sanitizeTime(typeLike: object): Time<Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicrosecond | Type.TimeNanosecond>;
|
|
9
|
+
export declare function sanitizeTimestamp(typeLike: object): Timestamp<Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond>;
|
|
10
|
+
export declare function sanitizeTypedTimestamp(typeLike: object, Datatype: typeof TimestampNanosecond | typeof TimestampMicrosecond | typeof TimestampMillisecond | typeof TimestampSecond): TimestampSecond | TimestampMillisecond | TimestampMicrosecond | TimestampNanosecond;
|
|
11
|
+
export declare function sanitizeInterval(typeLike: object): Interval<Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth>;
|
|
12
|
+
export declare function sanitizeList(typeLike: object): List<any>;
|
|
13
|
+
export declare function sanitizeStruct(typeLike: object): Struct<any>;
|
|
14
|
+
export declare function sanitizeUnion(typeLike: object): Union<Type.Union | Type.DenseUnion | Type.SparseUnion>;
|
|
15
|
+
export declare function sanitizeTypedUnion(typeLike: object, UnionType: typeof DenseUnion | typeof SparseUnion): SparseUnion | DenseUnion;
|
|
16
|
+
export declare function sanitizeFixedSizeBinary(typeLike: object): FixedSizeBinary;
|
|
17
|
+
export declare function sanitizeFixedSizeList(typeLike: object): FixedSizeList<any>;
|
|
18
|
+
export declare function sanitizeMap(typeLike: object): Map_<any, any>;
|
|
19
|
+
export declare function sanitizeDuration(typeLike: object): Duration<Type.Duration | Type.DurationSecond | Type.DurationMillisecond | Type.DurationMicrosecond | Type.DurationNanosecond>;
|
|
20
|
+
export declare function sanitizeDictionary(typeLike: object): Dictionary<DataType<any, any>, TKeys>;
|
|
21
|
+
export declare function sanitizeType(typeLike: unknown): DataType<any>;
|
|
22
|
+
export declare function sanitizeField(fieldLike: unknown): Field;
|
|
2
23
|
/**
|
|
3
24
|
* Convert something schemaLike into a Schema instance
|
|
4
25
|
*
|