@lancedb/lancedb 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/biome.json +8 -2
- package/dist/arrow.d.ts +34 -9
- package/dist/arrow.js +220 -23
- package/dist/connection.d.ts +4 -1
- package/dist/connection.js +11 -5
- package/dist/embedding/embedding_function.d.ts +54 -28
- package/dist/embedding/embedding_function.js +71 -10
- package/dist/embedding/index.d.ts +28 -2
- package/dist/embedding/index.js +111 -4
- package/dist/embedding/openai.d.ts +16 -7
- package/dist/embedding/openai.js +62 -12
- package/dist/embedding/registry.d.ts +54 -0
- package/dist/embedding/registry.js +123 -0
- package/dist/query.d.ts +1 -1
- package/dist/query.js +3 -3
- package/dist/sanitize.d.ts +22 -1
- package/dist/sanitize.js +123 -110
- package/dist/table.d.ts +1 -2
- package/dist/table.js +6 -3
- package/lancedb/arrow.ts +234 -38
- package/lancedb/connection.ts +27 -6
- package/lancedb/embedding/embedding_function.ts +126 -42
- package/lancedb/embedding/index.ts +113 -2
- package/lancedb/embedding/openai.ts +62 -16
- package/lancedb/embedding/registry.ts +172 -0
- package/lancedb/query.ts +2 -1
- package/lancedb/sanitize.ts +22 -22
- package/lancedb/table.ts +10 -3
- package/nodejs-artifacts/arrow.d.ts +34 -9
- package/nodejs-artifacts/arrow.js +220 -23
- package/nodejs-artifacts/connection.d.ts +4 -1
- package/nodejs-artifacts/connection.js +11 -5
- package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
- package/nodejs-artifacts/embedding/embedding_function.js +71 -10
- package/nodejs-artifacts/embedding/index.d.ts +28 -2
- package/nodejs-artifacts/embedding/index.js +111 -4
- package/nodejs-artifacts/embedding/openai.d.ts +16 -7
- package/nodejs-artifacts/embedding/openai.js +62 -12
- package/nodejs-artifacts/embedding/registry.d.ts +54 -0
- package/nodejs-artifacts/embedding/registry.js +123 -0
- package/nodejs-artifacts/query.d.ts +1 -1
- package/nodejs-artifacts/query.js +3 -3
- package/nodejs-artifacts/sanitize.d.ts +22 -1
- package/nodejs-artifacts/sanitize.js +123 -110
- package/nodejs-artifacts/table.d.ts +1 -2
- package/nodejs-artifacts/table.js +6 -3
- package/package.json +14 -9
- package/tsconfig.json +3 -1
|
@@ -1,2 +1,113 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
// Copyright 2023 Lance Developers.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
|
|
15
|
+
import { DataType, Field, Schema } from "../arrow";
|
|
16
|
+
import { isDataType } from "../arrow";
|
|
17
|
+
import { sanitizeType } from "../sanitize";
|
|
18
|
+
import { EmbeddingFunction } from "./embedding_function";
|
|
19
|
+
import { EmbeddingFunctionConfig, getRegistry } from "./registry";
|
|
20
|
+
|
|
21
|
+
export { EmbeddingFunction } from "./embedding_function";
|
|
22
|
+
|
|
23
|
+
// We need to explicitly export '*' so that the `register` decorator actually registers the class.
|
|
24
|
+
export * from "./openai";
|
|
25
|
+
export * from "./registry";
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Create a schema with embedding functions.
|
|
29
|
+
*
|
|
30
|
+
* @param fields
|
|
31
|
+
* @returns Schema
|
|
32
|
+
* @example
|
|
33
|
+
* ```ts
|
|
34
|
+
* class MyEmbeddingFunction extends EmbeddingFunction {
|
|
35
|
+
* // ...
|
|
36
|
+
* }
|
|
37
|
+
* const func = new MyEmbeddingFunction();
|
|
38
|
+
* const schema = LanceSchema({
|
|
39
|
+
* id: new Int32(),
|
|
40
|
+
* text: func.sourceField(new Utf8()),
|
|
41
|
+
* vector: func.vectorField(),
|
|
42
|
+
* // optional: specify the datatype and/or dimensions
|
|
43
|
+
* vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
|
|
44
|
+
* });
|
|
45
|
+
*
|
|
46
|
+
* const table = await db.createTable("my_table", data, { schema });
|
|
47
|
+
* ```
|
|
48
|
+
*/
|
|
49
|
+
export function LanceSchema(
|
|
50
|
+
fields: Record<string, [object, Map<string, EmbeddingFunction>] | object>,
|
|
51
|
+
): Schema {
|
|
52
|
+
const arrowFields: Field[] = [];
|
|
53
|
+
|
|
54
|
+
const embeddingFunctions = new Map<
|
|
55
|
+
EmbeddingFunction,
|
|
56
|
+
Partial<EmbeddingFunctionConfig>
|
|
57
|
+
>();
|
|
58
|
+
Object.entries(fields).forEach(([key, value]) => {
|
|
59
|
+
if (isDataType(value)) {
|
|
60
|
+
arrowFields.push(new Field(key, sanitizeType(value), true));
|
|
61
|
+
} else {
|
|
62
|
+
const [dtype, metadata] = value as [
|
|
63
|
+
object,
|
|
64
|
+
Map<string, EmbeddingFunction>,
|
|
65
|
+
];
|
|
66
|
+
arrowFields.push(new Field(key, sanitizeType(dtype), true));
|
|
67
|
+
parseEmbeddingFunctions(embeddingFunctions, key, metadata);
|
|
68
|
+
}
|
|
69
|
+
});
|
|
70
|
+
const registry = getRegistry();
|
|
71
|
+
const metadata = registry.getTableMetadata(
|
|
72
|
+
Array.from(embeddingFunctions.values()) as EmbeddingFunctionConfig[],
|
|
73
|
+
);
|
|
74
|
+
const schema = new Schema(arrowFields, metadata);
|
|
75
|
+
return schema;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function parseEmbeddingFunctions(
|
|
79
|
+
embeddingFunctions: Map<EmbeddingFunction, Partial<EmbeddingFunctionConfig>>,
|
|
80
|
+
key: string,
|
|
81
|
+
metadata: Map<string, EmbeddingFunction>,
|
|
82
|
+
): void {
|
|
83
|
+
if (metadata.has("source_column_for")) {
|
|
84
|
+
const embedFunction = metadata.get("source_column_for")!;
|
|
85
|
+
const current = embeddingFunctions.get(embedFunction);
|
|
86
|
+
if (current !== undefined) {
|
|
87
|
+
embeddingFunctions.set(embedFunction, {
|
|
88
|
+
...current,
|
|
89
|
+
sourceColumn: key,
|
|
90
|
+
});
|
|
91
|
+
} else {
|
|
92
|
+
embeddingFunctions.set(embedFunction, {
|
|
93
|
+
sourceColumn: key,
|
|
94
|
+
function: embedFunction,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
} else if (metadata.has("vector_column_for")) {
|
|
98
|
+
const embedFunction = metadata.get("vector_column_for")!;
|
|
99
|
+
|
|
100
|
+
const current = embeddingFunctions.get(embedFunction);
|
|
101
|
+
if (current !== undefined) {
|
|
102
|
+
embeddingFunctions.set(embedFunction, {
|
|
103
|
+
...current,
|
|
104
|
+
vectorColumn: key,
|
|
105
|
+
});
|
|
106
|
+
} else {
|
|
107
|
+
embeddingFunctions.set(embedFunction, {
|
|
108
|
+
vectorColumn: key,
|
|
109
|
+
function: embedFunction,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -13,17 +13,31 @@
|
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
15
|
import type OpenAI from "openai";
|
|
16
|
-
import {
|
|
16
|
+
import { Float, Float32 } from "../arrow";
|
|
17
|
+
import { EmbeddingFunction } from "./embedding_function";
|
|
18
|
+
import { register } from "./registry";
|
|
17
19
|
|
|
18
|
-
export
|
|
19
|
-
|
|
20
|
-
|
|
20
|
+
export type OpenAIOptions = {
|
|
21
|
+
apiKey?: string;
|
|
22
|
+
model?: string;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
@register("openai")
|
|
26
|
+
export class OpenAIEmbeddingFunction extends EmbeddingFunction<
|
|
27
|
+
string,
|
|
28
|
+
OpenAIOptions
|
|
29
|
+
> {
|
|
30
|
+
#openai: OpenAI;
|
|
31
|
+
#modelName: string;
|
|
32
|
+
|
|
33
|
+
constructor(options: OpenAIOptions = { model: "text-embedding-ada-002" }) {
|
|
34
|
+
super();
|
|
35
|
+
const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
|
|
36
|
+
if (!openAIKey) {
|
|
37
|
+
throw new Error("OpenAI API key is required");
|
|
38
|
+
}
|
|
39
|
+
const modelName = options?.model ?? "text-embedding-ada-002";
|
|
21
40
|
|
|
22
|
-
constructor(
|
|
23
|
-
sourceColumn: string,
|
|
24
|
-
openAIKey: string,
|
|
25
|
-
modelName: string = "text-embedding-ada-002",
|
|
26
|
-
) {
|
|
27
41
|
/**
|
|
28
42
|
* @type {import("openai").default}
|
|
29
43
|
*/
|
|
@@ -36,18 +50,40 @@ export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
|
|
|
36
50
|
throw new Error("please install openai@^4.24.1 using npm install openai");
|
|
37
51
|
}
|
|
38
52
|
|
|
39
|
-
this.sourceColumn = sourceColumn;
|
|
40
53
|
const configuration = {
|
|
41
54
|
apiKey: openAIKey,
|
|
42
55
|
};
|
|
43
56
|
|
|
44
|
-
this
|
|
45
|
-
this
|
|
57
|
+
this.#openai = new Openai(configuration);
|
|
58
|
+
this.#modelName = modelName;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
toJSON() {
|
|
62
|
+
return {
|
|
63
|
+
model: this.#modelName,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
ndims(): number {
|
|
68
|
+
switch (this.#modelName) {
|
|
69
|
+
case "text-embedding-ada-002":
|
|
70
|
+
return 1536;
|
|
71
|
+
case "text-embedding-3-large":
|
|
72
|
+
return 3072;
|
|
73
|
+
case "text-embedding-3-small":
|
|
74
|
+
return 1536;
|
|
75
|
+
default:
|
|
76
|
+
return null as never;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
embeddingDataType(): Float {
|
|
81
|
+
return new Float32();
|
|
46
82
|
}
|
|
47
83
|
|
|
48
|
-
async
|
|
49
|
-
const response = await this.
|
|
50
|
-
model: this
|
|
84
|
+
async computeSourceEmbeddings(data: string[]): Promise<number[][]> {
|
|
85
|
+
const response = await this.#openai.embeddings.create({
|
|
86
|
+
model: this.#modelName,
|
|
51
87
|
input: data,
|
|
52
88
|
});
|
|
53
89
|
|
|
@@ -58,5 +94,15 @@ export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
|
|
|
58
94
|
return embeddings;
|
|
59
95
|
}
|
|
60
96
|
|
|
61
|
-
|
|
97
|
+
async computeQueryEmbeddings(data: string): Promise<number[]> {
|
|
98
|
+
if (typeof data !== "string") {
|
|
99
|
+
throw new Error("Data must be a string");
|
|
100
|
+
}
|
|
101
|
+
const response = await this.#openai.embeddings.create({
|
|
102
|
+
model: this.#modelName,
|
|
103
|
+
input: data,
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
return response.data[0].embedding;
|
|
107
|
+
}
|
|
62
108
|
}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
// Copyright 2024 Lance Developers.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
|
|
15
|
+
import type { EmbeddingFunction } from "./embedding_function";
|
|
16
|
+
import "reflect-metadata";
|
|
17
|
+
|
|
18
|
+
export interface EmbeddingFunctionOptions {
|
|
19
|
+
[key: string]: unknown;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface EmbeddingFunctionFactory<
|
|
23
|
+
T extends EmbeddingFunction = EmbeddingFunction,
|
|
24
|
+
> {
|
|
25
|
+
new (modelOptions?: EmbeddingFunctionOptions): T;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
|
29
|
+
create(options?: EmbeddingFunctionOptions): T;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* This is a singleton class used to register embedding functions
|
|
34
|
+
* and fetch them by name. It also handles serializing and deserializing.
|
|
35
|
+
* You can implement your own embedding function by subclassing EmbeddingFunction
|
|
36
|
+
* or TextEmbeddingFunction and registering it with the registry
|
|
37
|
+
*/
|
|
38
|
+
export class EmbeddingFunctionRegistry {
|
|
39
|
+
#functions: Map<string, EmbeddingFunctionFactory> = new Map();
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Register an embedding function
|
|
43
|
+
* @param name The name of the function
|
|
44
|
+
* @param func The function to register
|
|
45
|
+
*/
|
|
46
|
+
register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(
|
|
47
|
+
this: EmbeddingFunctionRegistry,
|
|
48
|
+
alias?: string,
|
|
49
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
50
|
+
): (ctor: T) => any {
|
|
51
|
+
const self = this;
|
|
52
|
+
return function (ctor: T) {
|
|
53
|
+
if (!alias) {
|
|
54
|
+
alias = ctor.name;
|
|
55
|
+
}
|
|
56
|
+
if (self.#functions.has(alias)) {
|
|
57
|
+
throw new Error(
|
|
58
|
+
`Embedding function with alias "${alias}" already exists`,
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
self.#functions.set(alias, ctor);
|
|
62
|
+
Reflect.defineMetadata("lancedb::embedding::name", alias, ctor);
|
|
63
|
+
return ctor;
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Fetch an embedding function by name
|
|
69
|
+
* @param name The name of the function
|
|
70
|
+
*/
|
|
71
|
+
get<T extends EmbeddingFunction<unknown> = EmbeddingFunction>(
|
|
72
|
+
name: string,
|
|
73
|
+
): EmbeddingFunctionCreate<T> | undefined {
|
|
74
|
+
const factory = this.#functions.get(name);
|
|
75
|
+
if (!factory) {
|
|
76
|
+
return undefined;
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
create: function (options: EmbeddingFunctionOptions) {
|
|
80
|
+
return new factory(options) as unknown as T;
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* reset the registry to the initial state
|
|
87
|
+
*/
|
|
88
|
+
reset(this: EmbeddingFunctionRegistry) {
|
|
89
|
+
this.#functions.clear();
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
parseFunctions(
|
|
93
|
+
this: EmbeddingFunctionRegistry,
|
|
94
|
+
metadata: Map<string, string>,
|
|
95
|
+
): Map<string, EmbeddingFunctionConfig> {
|
|
96
|
+
if (!metadata.has("embedding_functions")) {
|
|
97
|
+
return new Map();
|
|
98
|
+
} else {
|
|
99
|
+
type FunctionConfig = {
|
|
100
|
+
name: string;
|
|
101
|
+
sourceColumn: string;
|
|
102
|
+
vectorColumn: string;
|
|
103
|
+
model: EmbeddingFunctionOptions;
|
|
104
|
+
};
|
|
105
|
+
const functions = <FunctionConfig[]>(
|
|
106
|
+
JSON.parse(metadata.get("embedding_functions")!)
|
|
107
|
+
);
|
|
108
|
+
return new Map(
|
|
109
|
+
functions.map((f) => {
|
|
110
|
+
const fn = this.get(f.name);
|
|
111
|
+
if (!fn) {
|
|
112
|
+
throw new Error(`Function "${f.name}" not found in registry`);
|
|
113
|
+
}
|
|
114
|
+
return [
|
|
115
|
+
f.name,
|
|
116
|
+
{
|
|
117
|
+
sourceColumn: f.sourceColumn,
|
|
118
|
+
vectorColumn: f.vectorColumn,
|
|
119
|
+
function: this.get(f.name)!.create(f.model),
|
|
120
|
+
},
|
|
121
|
+
];
|
|
122
|
+
}),
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
127
|
+
functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any> {
|
|
128
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
129
|
+
const metadata: Record<string, any> = {};
|
|
130
|
+
const name = Reflect.getMetadata(
|
|
131
|
+
"lancedb::embedding::name",
|
|
132
|
+
conf.function.constructor,
|
|
133
|
+
);
|
|
134
|
+
metadata["sourceColumn"] = conf.sourceColumn;
|
|
135
|
+
metadata["vectorColumn"] = conf.vectorColumn ?? "vector";
|
|
136
|
+
metadata["name"] = name ?? conf.function.constructor.name;
|
|
137
|
+
metadata["model"] = conf.function.toJSON();
|
|
138
|
+
return metadata;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string> {
|
|
142
|
+
const metadata = new Map<string, string>();
|
|
143
|
+
const jsonData = functions.map((conf) => this.functionToMetadata(conf));
|
|
144
|
+
metadata.set("embedding_functions", JSON.stringify(jsonData));
|
|
145
|
+
|
|
146
|
+
return metadata;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const _REGISTRY = new EmbeddingFunctionRegistry();
|
|
151
|
+
|
|
152
|
+
export function register(name?: string) {
|
|
153
|
+
return _REGISTRY.register(name);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Utility function to get the global instance of the registry
|
|
158
|
+
* @returns `EmbeddingFunctionRegistry` The global instance of the registry
|
|
159
|
+
* @example
|
|
160
|
+
* ```ts
|
|
161
|
+
* const registry = getRegistry();
|
|
162
|
+
* const openai = registry.get("openai").create();
|
|
163
|
+
*/
|
|
164
|
+
export function getRegistry(): EmbeddingFunctionRegistry {
|
|
165
|
+
return _REGISTRY;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export interface EmbeddingFunctionConfig {
|
|
169
|
+
sourceColumn: string;
|
|
170
|
+
vectorColumn?: string;
|
|
171
|
+
function: EmbeddingFunction;
|
|
172
|
+
}
|
package/lancedb/query.ts
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import { Table as ArrowTable, RecordBatch, tableFromIPC } from "
|
|
15
|
+
import { Table as ArrowTable, RecordBatch, tableFromIPC } from "./arrow";
|
|
16
16
|
import { type IvfPqOptions } from "./indices";
|
|
17
17
|
import {
|
|
18
18
|
RecordBatchIterator as NativeBatchIterator,
|
|
@@ -170,6 +170,7 @@ export class QueryBase<
|
|
|
170
170
|
/** Collect the results as an array of objects. */
|
|
171
171
|
async toArray(): Promise<unknown[]> {
|
|
172
172
|
const tbl = await this.toArrow();
|
|
173
|
+
|
|
173
174
|
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
|
|
174
175
|
return tbl.toArray();
|
|
175
176
|
}
|
package/lancedb/sanitize.ts
CHANGED
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
// comes from the exact same library instance. This is not always the case
|
|
21
21
|
// and so we must sanitize the input to ensure that it is compatible.
|
|
22
22
|
|
|
23
|
+
import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";
|
|
23
24
|
import {
|
|
24
25
|
Binary,
|
|
25
26
|
Bool,
|
|
@@ -75,10 +76,9 @@ import {
|
|
|
75
76
|
Uint64,
|
|
76
77
|
Union,
|
|
77
78
|
Utf8,
|
|
78
|
-
} from "
|
|
79
|
-
import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";
|
|
79
|
+
} from "./arrow";
|
|
80
80
|
|
|
81
|
-
function sanitizeMetadata(
|
|
81
|
+
export function sanitizeMetadata(
|
|
82
82
|
metadataLike?: unknown,
|
|
83
83
|
): Map<string, string> | undefined {
|
|
84
84
|
if (metadataLike === undefined || metadataLike === null) {
|
|
@@ -97,7 +97,7 @@ function sanitizeMetadata(
|
|
|
97
97
|
return metadataLike as Map<string, string>;
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
function sanitizeInt(typeLike: object) {
|
|
100
|
+
export function sanitizeInt(typeLike: object) {
|
|
101
101
|
if (
|
|
102
102
|
!("bitWidth" in typeLike) ||
|
|
103
103
|
typeof typeLike.bitWidth !== "number" ||
|
|
@@ -111,14 +111,14 @@ function sanitizeInt(typeLike: object) {
|
|
|
111
111
|
return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
|
|
112
112
|
}
|
|
113
113
|
|
|
114
|
-
function sanitizeFloat(typeLike: object) {
|
|
114
|
+
export function sanitizeFloat(typeLike: object) {
|
|
115
115
|
if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
|
|
116
116
|
throw Error("Expected a Float Type to have a `precision` property");
|
|
117
117
|
}
|
|
118
118
|
return new Float(typeLike.precision as Precision);
|
|
119
119
|
}
|
|
120
120
|
|
|
121
|
-
function sanitizeDecimal(typeLike: object) {
|
|
121
|
+
export function sanitizeDecimal(typeLike: object) {
|
|
122
122
|
if (
|
|
123
123
|
!("scale" in typeLike) ||
|
|
124
124
|
typeof typeLike.scale !== "number" ||
|
|
@@ -134,14 +134,14 @@ function sanitizeDecimal(typeLike: object) {
|
|
|
134
134
|
return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
|
|
135
135
|
}
|
|
136
136
|
|
|
137
|
-
function sanitizeDate(typeLike: object) {
|
|
137
|
+
export function sanitizeDate(typeLike: object) {
|
|
138
138
|
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
139
139
|
throw Error("Expected a Date type to have a `unit` property");
|
|
140
140
|
}
|
|
141
141
|
return new Date_(typeLike.unit as DateUnit);
|
|
142
142
|
}
|
|
143
143
|
|
|
144
|
-
function sanitizeTime(typeLike: object) {
|
|
144
|
+
export function sanitizeTime(typeLike: object) {
|
|
145
145
|
if (
|
|
146
146
|
!("unit" in typeLike) ||
|
|
147
147
|
typeof typeLike.unit !== "number" ||
|
|
@@ -155,7 +155,7 @@ function sanitizeTime(typeLike: object) {
|
|
|
155
155
|
return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
|
|
156
156
|
}
|
|
157
157
|
|
|
158
|
-
function sanitizeTimestamp(typeLike: object) {
|
|
158
|
+
export function sanitizeTimestamp(typeLike: object) {
|
|
159
159
|
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
160
160
|
throw Error("Expected a Timestamp type to have a `unit` property");
|
|
161
161
|
}
|
|
@@ -166,7 +166,7 @@ function sanitizeTimestamp(typeLike: object) {
|
|
|
166
166
|
return new Timestamp(typeLike.unit, timezone);
|
|
167
167
|
}
|
|
168
168
|
|
|
169
|
-
function sanitizeTypedTimestamp(
|
|
169
|
+
export function sanitizeTypedTimestamp(
|
|
170
170
|
typeLike: object,
|
|
171
171
|
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
172
172
|
Datatype:
|
|
@@ -182,14 +182,14 @@ function sanitizeTypedTimestamp(
|
|
|
182
182
|
return new Datatype(timezone);
|
|
183
183
|
}
|
|
184
184
|
|
|
185
|
-
function sanitizeInterval(typeLike: object) {
|
|
185
|
+
export function sanitizeInterval(typeLike: object) {
|
|
186
186
|
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
187
187
|
throw Error("Expected an Interval type to have a `unit` property");
|
|
188
188
|
}
|
|
189
189
|
return new Interval(typeLike.unit);
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
-
function sanitizeList(typeLike: object) {
|
|
192
|
+
export function sanitizeList(typeLike: object) {
|
|
193
193
|
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
194
194
|
throw Error(
|
|
195
195
|
"Expected a List type to have an array-like `children` property",
|
|
@@ -201,7 +201,7 @@ function sanitizeList(typeLike: object) {
|
|
|
201
201
|
return new List(sanitizeField(typeLike.children[0]));
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
-
function sanitizeStruct(typeLike: object) {
|
|
204
|
+
export function sanitizeStruct(typeLike: object) {
|
|
205
205
|
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
206
206
|
throw Error(
|
|
207
207
|
"Expected a Struct type to have an array-like `children` property",
|
|
@@ -210,7 +210,7 @@ function sanitizeStruct(typeLike: object) {
|
|
|
210
210
|
return new Struct(typeLike.children.map((child) => sanitizeField(child)));
|
|
211
211
|
}
|
|
212
212
|
|
|
213
|
-
function sanitizeUnion(typeLike: object) {
|
|
213
|
+
export function sanitizeUnion(typeLike: object) {
|
|
214
214
|
if (
|
|
215
215
|
!("typeIds" in typeLike) ||
|
|
216
216
|
!("mode" in typeLike) ||
|
|
@@ -234,7 +234,7 @@ function sanitizeUnion(typeLike: object) {
|
|
|
234
234
|
);
|
|
235
235
|
}
|
|
236
236
|
|
|
237
|
-
function sanitizeTypedUnion(
|
|
237
|
+
export function sanitizeTypedUnion(
|
|
238
238
|
typeLike: object,
|
|
239
239
|
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
240
240
|
UnionType: typeof DenseUnion | typeof SparseUnion,
|
|
@@ -256,7 +256,7 @@ function sanitizeTypedUnion(
|
|
|
256
256
|
);
|
|
257
257
|
}
|
|
258
258
|
|
|
259
|
-
function sanitizeFixedSizeBinary(typeLike: object) {
|
|
259
|
+
export function sanitizeFixedSizeBinary(typeLike: object) {
|
|
260
260
|
if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
|
|
261
261
|
throw Error(
|
|
262
262
|
"Expected a FixedSizeBinary type to have a `byteWidth` property",
|
|
@@ -265,7 +265,7 @@ function sanitizeFixedSizeBinary(typeLike: object) {
|
|
|
265
265
|
return new FixedSizeBinary(typeLike.byteWidth);
|
|
266
266
|
}
|
|
267
267
|
|
|
268
|
-
function sanitizeFixedSizeList(typeLike: object) {
|
|
268
|
+
export function sanitizeFixedSizeList(typeLike: object) {
|
|
269
269
|
if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
|
|
270
270
|
throw Error("Expected a FixedSizeList type to have a `listSize` property");
|
|
271
271
|
}
|
|
@@ -283,7 +283,7 @@ function sanitizeFixedSizeList(typeLike: object) {
|
|
|
283
283
|
);
|
|
284
284
|
}
|
|
285
285
|
|
|
286
|
-
function sanitizeMap(typeLike: object) {
|
|
286
|
+
export function sanitizeMap(typeLike: object) {
|
|
287
287
|
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
288
288
|
throw Error(
|
|
289
289
|
"Expected a Map type to have an array-like `children` property",
|
|
@@ -300,14 +300,14 @@ function sanitizeMap(typeLike: object) {
|
|
|
300
300
|
);
|
|
301
301
|
}
|
|
302
302
|
|
|
303
|
-
function sanitizeDuration(typeLike: object) {
|
|
303
|
+
export function sanitizeDuration(typeLike: object) {
|
|
304
304
|
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
305
305
|
throw Error("Expected a Duration type to have a `unit` property");
|
|
306
306
|
}
|
|
307
307
|
return new Duration(typeLike.unit);
|
|
308
308
|
}
|
|
309
309
|
|
|
310
|
-
function sanitizeDictionary(typeLike: object) {
|
|
310
|
+
export function sanitizeDictionary(typeLike: object) {
|
|
311
311
|
if (!("id" in typeLike) || typeof typeLike.id !== "number") {
|
|
312
312
|
throw Error("Expected a Dictionary type to have an `id` property");
|
|
313
313
|
}
|
|
@@ -329,7 +329,7 @@ function sanitizeDictionary(typeLike: object) {
|
|
|
329
329
|
}
|
|
330
330
|
|
|
331
331
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
332
|
-
function sanitizeType(typeLike: unknown): DataType<any> {
|
|
332
|
+
export function sanitizeType(typeLike: unknown): DataType<any> {
|
|
333
333
|
if (typeof typeLike !== "object" || typeLike === null) {
|
|
334
334
|
throw Error("Expected a Type but object was null/undefined");
|
|
335
335
|
}
|
|
@@ -449,7 +449,7 @@ function sanitizeType(typeLike: unknown): DataType<any> {
|
|
|
449
449
|
}
|
|
450
450
|
}
|
|
451
451
|
|
|
452
|
-
function sanitizeField(fieldLike: unknown): Field {
|
|
452
|
+
export function sanitizeField(fieldLike: unknown): Field {
|
|
453
453
|
if (fieldLike instanceof Field) {
|
|
454
454
|
return fieldLike;
|
|
455
455
|
}
|
package/lancedb/table.ts
CHANGED
|
@@ -12,8 +12,9 @@
|
|
|
12
12
|
// See the License for the specific language governing permissions and
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
|
|
15
|
-
import { Schema, tableFromIPC } from "
|
|
16
|
-
|
|
15
|
+
import { Data, Schema, fromDataToBuffer, tableFromIPC } from "./arrow";
|
|
16
|
+
|
|
17
|
+
import { getRegistry } from "./embedding/registry";
|
|
17
18
|
import { IndexOptions } from "./indices";
|
|
18
19
|
import {
|
|
19
20
|
AddColumnsSql,
|
|
@@ -122,8 +123,14 @@ export class Table {
|
|
|
122
123
|
*/
|
|
123
124
|
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
|
124
125
|
const mode = options?.mode ?? "append";
|
|
126
|
+
const schema = await this.schema();
|
|
127
|
+
const registry = getRegistry();
|
|
128
|
+
const functions = registry.parseFunctions(schema.metadata);
|
|
125
129
|
|
|
126
|
-
const buffer = await fromDataToBuffer(
|
|
130
|
+
const buffer = await fromDataToBuffer(
|
|
131
|
+
data,
|
|
132
|
+
functions.values().next().value,
|
|
133
|
+
);
|
|
127
134
|
await this.inner.add(buffer, mode);
|
|
128
135
|
}
|
|
129
136
|
|