@lancedb/lancedb 0.14.0-beta.0 → 0.14.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DEVELOPMENT.md +42 -0
- package/package.json +12 -9
- package/dist/arrow.d.ts +0 -248
- package/dist/arrow.js +0 -743
- package/dist/connection.d.ts +0 -188
- package/dist/connection.js +0 -149
- package/dist/embedding/embedding_function.d.ts +0 -90
- package/dist/embedding/embedding_function.js +0 -133
- package/dist/embedding/index.d.ts +0 -27
- package/dist/embedding/index.js +0 -112
- package/dist/embedding/openai.d.ts +0 -18
- package/dist/embedding/openai.js +0 -105
- package/dist/embedding/registry.d.ts +0 -55
- package/dist/embedding/registry.js +0 -151
- package/dist/embedding/transformers.d.ts +0 -37
- package/dist/embedding/transformers.js +0 -148
- package/dist/index.d.ts +0 -54
- package/dist/index.js +0 -55
- package/dist/indices.d.ts +0 -429
- package/dist/indices.js +0 -131
- package/dist/merge.d.ts +0 -54
- package/dist/merge.js +0 -64
- package/dist/native.d.ts +0 -328
- package/dist/native.js +0 -330
- package/dist/query.d.ts +0 -324
- package/dist/query.js +0 -544
- package/dist/sanitize.d.ts +0 -31
- package/dist/sanitize.js +0 -437
- package/dist/table.d.ts +0 -425
- package/dist/table.js +0 -276
- package/dist/util.d.ts +0 -13
- package/dist/util.js +0 -65
package/dist/connection.d.ts
DELETED
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
import { Data, SchemaLike, TableLike } from "./arrow";
|
|
2
|
-
import { EmbeddingFunctionConfig } from "./embedding/registry";
|
|
3
|
-
import { Connection as LanceDbConnection } from "./native";
|
|
4
|
-
import { Table } from "./table";
|
|
5
|
-
export interface CreateTableOptions {
|
|
6
|
-
/**
|
|
7
|
-
* The mode to use when creating the table.
|
|
8
|
-
*
|
|
9
|
-
* If this is set to "create" and the table already exists then either
|
|
10
|
-
* an error will be thrown or, if existOk is true, then nothing will
|
|
11
|
-
* happen. Any provided data will be ignored.
|
|
12
|
-
*
|
|
13
|
-
* If this is set to "overwrite" then any existing table will be replaced.
|
|
14
|
-
*/
|
|
15
|
-
mode: "create" | "overwrite";
|
|
16
|
-
/**
|
|
17
|
-
* If this is true and the table already exists and the mode is "create"
|
|
18
|
-
* then no error will be raised.
|
|
19
|
-
*/
|
|
20
|
-
existOk: boolean;
|
|
21
|
-
/**
|
|
22
|
-
* Configuration for object storage.
|
|
23
|
-
*
|
|
24
|
-
* Options already set on the connection will be inherited by the table,
|
|
25
|
-
* but can be overridden here.
|
|
26
|
-
*
|
|
27
|
-
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
28
|
-
*/
|
|
29
|
-
storageOptions?: Record<string, string>;
|
|
30
|
-
/**
|
|
31
|
-
* The version of the data storage format to use.
|
|
32
|
-
*
|
|
33
|
-
* The default is `stable`.
|
|
34
|
-
* Set to "legacy" to use the old format.
|
|
35
|
-
*/
|
|
36
|
-
dataStorageVersion?: string;
|
|
37
|
-
/**
|
|
38
|
-
* Use the new V2 manifest paths. These paths provide more efficient
|
|
39
|
-
* opening of datasets with many versions on object stores. WARNING:
|
|
40
|
-
* turning this on will make the dataset unreadable for older versions
|
|
41
|
-
* of LanceDB (prior to 0.10.0). To migrate an existing dataset, instead
|
|
42
|
-
* use the {@link LocalTable#migrateManifestPathsV2} method.
|
|
43
|
-
*/
|
|
44
|
-
enableV2ManifestPaths?: boolean;
|
|
45
|
-
/**
|
|
46
|
-
* If true then data files will be written with the legacy format
|
|
47
|
-
*
|
|
48
|
-
* The default is false.
|
|
49
|
-
*
|
|
50
|
-
* Deprecated. Use data storage version instead.
|
|
51
|
-
*/
|
|
52
|
-
useLegacyFormat?: boolean;
|
|
53
|
-
schema?: SchemaLike;
|
|
54
|
-
embeddingFunction?: EmbeddingFunctionConfig;
|
|
55
|
-
}
|
|
56
|
-
export interface OpenTableOptions {
|
|
57
|
-
/**
|
|
58
|
-
* Configuration for object storage.
|
|
59
|
-
*
|
|
60
|
-
* Options already set on the connection will be inherited by the table,
|
|
61
|
-
* but can be overridden here.
|
|
62
|
-
*
|
|
63
|
-
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
64
|
-
*/
|
|
65
|
-
storageOptions?: Record<string, string>;
|
|
66
|
-
/**
|
|
67
|
-
* Set the size of the index cache, specified as a number of entries
|
|
68
|
-
*
|
|
69
|
-
* The exact meaning of an "entry" will depend on the type of index:
|
|
70
|
-
* - IVF: there is one entry for each IVF partition
|
|
71
|
-
* - BTREE: there is one entry for the entire index
|
|
72
|
-
*
|
|
73
|
-
* This cache applies to the entire opened table, across all indices.
|
|
74
|
-
* Setting this value higher will increase performance on larger datasets
|
|
75
|
-
* at the expense of more RAM
|
|
76
|
-
*/
|
|
77
|
-
indexCacheSize?: number;
|
|
78
|
-
}
|
|
79
|
-
export interface TableNamesOptions {
|
|
80
|
-
/**
|
|
81
|
-
* If present, only return names that come lexicographically after the
|
|
82
|
-
* supplied value.
|
|
83
|
-
*
|
|
84
|
-
* This can be combined with limit to implement pagination by setting this to
|
|
85
|
-
* the last table name from the previous page.
|
|
86
|
-
*/
|
|
87
|
-
startAfter?: string;
|
|
88
|
-
/** An optional limit to the number of results to return. */
|
|
89
|
-
limit?: number;
|
|
90
|
-
}
|
|
91
|
-
/**
|
|
92
|
-
* A LanceDB Connection that allows you to open tables and create new ones.
|
|
93
|
-
*
|
|
94
|
-
* Connection could be local against filesystem or remote against a server.
|
|
95
|
-
*
|
|
96
|
-
* A Connection is intended to be a long lived object and may hold open
|
|
97
|
-
* resources such as HTTP connection pools. This is generally fine and
|
|
98
|
-
* a single connection should be shared if it is going to be used many
|
|
99
|
-
* times. However, if you are finished with a connection, you may call
|
|
100
|
-
* close to eagerly free these resources. Any call to a Connection
|
|
101
|
-
* method after it has been closed will result in an error.
|
|
102
|
-
*
|
|
103
|
-
* Closing a connection is optional. Connections will automatically
|
|
104
|
-
* be closed when they are garbage collected.
|
|
105
|
-
*
|
|
106
|
-
* Any created tables are independent and will continue to work even if
|
|
107
|
-
* the underlying connection has been closed.
|
|
108
|
-
*/
|
|
109
|
-
export declare abstract class Connection {
|
|
110
|
-
/**
|
|
111
|
-
* Return true if the connection has not been closed
|
|
112
|
-
*/
|
|
113
|
-
abstract isOpen(): boolean;
|
|
114
|
-
/**
|
|
115
|
-
* Close the connection, releasing any underlying resources.
|
|
116
|
-
*
|
|
117
|
-
* It is safe to call this method multiple times.
|
|
118
|
-
*
|
|
119
|
-
* Any attempt to use the connection after it is closed will result in an error.
|
|
120
|
-
*/
|
|
121
|
-
abstract close(): void;
|
|
122
|
-
/**
|
|
123
|
-
* Return a brief description of the connection
|
|
124
|
-
*/
|
|
125
|
-
abstract display(): string;
|
|
126
|
-
/**
|
|
127
|
-
* List all the table names in this database.
|
|
128
|
-
*
|
|
129
|
-
* Tables will be returned in lexicographical order.
|
|
130
|
-
* @param {Partial<TableNamesOptions>} options - options to control the
|
|
131
|
-
* paging / start point
|
|
132
|
-
*
|
|
133
|
-
*/
|
|
134
|
-
abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
|
135
|
-
/**
|
|
136
|
-
* Open a table in the database.
|
|
137
|
-
* @param {string} name - The name of the table
|
|
138
|
-
*/
|
|
139
|
-
abstract openTable(name: string, options?: Partial<OpenTableOptions>): Promise<Table>;
|
|
140
|
-
/**
|
|
141
|
-
* Creates a new Table and initialize it with new data.
|
|
142
|
-
* @param {object} options - The options object.
|
|
143
|
-
* @param {string} options.name - The name of the table.
|
|
144
|
-
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
|
145
|
-
*
|
|
146
|
-
*/
|
|
147
|
-
abstract createTable(options: {
|
|
148
|
-
name: string;
|
|
149
|
-
data: Data;
|
|
150
|
-
} & Partial<CreateTableOptions>): Promise<Table>;
|
|
151
|
-
/**
|
|
152
|
-
* Creates a new Table and initialize it with new data.
|
|
153
|
-
* @param {string} name - The name of the table.
|
|
154
|
-
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
|
155
|
-
* to be inserted into the table
|
|
156
|
-
*/
|
|
157
|
-
abstract createTable(name: string, data: Record<string, unknown>[] | TableLike, options?: Partial<CreateTableOptions>): Promise<Table>;
|
|
158
|
-
/**
|
|
159
|
-
* Creates a new empty Table
|
|
160
|
-
* @param {string} name - The name of the table.
|
|
161
|
-
* @param {Schema} schema - The schema of the table
|
|
162
|
-
*/
|
|
163
|
-
abstract createEmptyTable(name: string, schema: import("./arrow").SchemaLike, options?: Partial<CreateTableOptions>): Promise<Table>;
|
|
164
|
-
/**
|
|
165
|
-
* Drop an existing table.
|
|
166
|
-
* @param {string} name The name of the table to drop.
|
|
167
|
-
*/
|
|
168
|
-
abstract dropTable(name: string): Promise<void>;
|
|
169
|
-
}
|
|
170
|
-
export declare class LocalConnection extends Connection {
|
|
171
|
-
readonly inner: LanceDbConnection;
|
|
172
|
-
constructor(inner: LanceDbConnection);
|
|
173
|
-
isOpen(): boolean;
|
|
174
|
-
close(): void;
|
|
175
|
-
display(): string;
|
|
176
|
-
tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
|
177
|
-
openTable(name: string, options?: Partial<OpenTableOptions>): Promise<Table>;
|
|
178
|
-
createTable(nameOrOptions: string | ({
|
|
179
|
-
name: string;
|
|
180
|
-
data: Data;
|
|
181
|
-
} & Partial<CreateTableOptions>), data?: Record<string, unknown>[] | TableLike, options?: Partial<CreateTableOptions>): Promise<Table>;
|
|
182
|
-
createEmptyTable(name: string, schema: import("./arrow").SchemaLike, options?: Partial<CreateTableOptions>): Promise<Table>;
|
|
183
|
-
dropTable(name: string): Promise<void>;
|
|
184
|
-
}
|
|
185
|
-
/**
|
|
186
|
-
* Takes storage options and makes all the keys snake case.
|
|
187
|
-
*/
|
|
188
|
-
export declare function cleanseStorageOptions(options?: Record<string, string>): Record<string, string> | undefined;
|
package/dist/connection.js
DELETED
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// Copyright 2024 Lance Developers.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
// you may not use this file except in compliance with the License.
|
|
6
|
-
// You may obtain a copy of the License at
|
|
7
|
-
//
|
|
8
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
//
|
|
10
|
-
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
// See the License for the specific language governing permissions and
|
|
14
|
-
// limitations under the License.
|
|
15
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.LocalConnection = exports.Connection = void 0;
|
|
17
|
-
exports.cleanseStorageOptions = cleanseStorageOptions;
|
|
18
|
-
const arrow_1 = require("./arrow");
|
|
19
|
-
const registry_1 = require("./embedding/registry");
|
|
20
|
-
const table_1 = require("./table");
|
|
21
|
-
/**
|
|
22
|
-
* A LanceDB Connection that allows you to open tables and create new ones.
|
|
23
|
-
*
|
|
24
|
-
* Connection could be local against filesystem or remote against a server.
|
|
25
|
-
*
|
|
26
|
-
* A Connection is intended to be a long lived object and may hold open
|
|
27
|
-
* resources such as HTTP connection pools. This is generally fine and
|
|
28
|
-
* a single connection should be shared if it is going to be used many
|
|
29
|
-
* times. However, if you are finished with a connection, you may call
|
|
30
|
-
* close to eagerly free these resources. Any call to a Connection
|
|
31
|
-
* method after it has been closed will result in an error.
|
|
32
|
-
*
|
|
33
|
-
* Closing a connection is optional. Connections will automatically
|
|
34
|
-
* be closed when they are garbage collected.
|
|
35
|
-
*
|
|
36
|
-
* Any created tables are independent and will continue to work even if
|
|
37
|
-
* the underlying connection has been closed.
|
|
38
|
-
*/
|
|
39
|
-
class Connection {
|
|
40
|
-
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
41
|
-
return this.display();
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
exports.Connection = Connection;
|
|
45
|
-
class LocalConnection extends Connection {
|
|
46
|
-
inner;
|
|
47
|
-
constructor(inner) {
|
|
48
|
-
super();
|
|
49
|
-
this.inner = inner;
|
|
50
|
-
}
|
|
51
|
-
isOpen() {
|
|
52
|
-
return this.inner.isOpen();
|
|
53
|
-
}
|
|
54
|
-
close() {
|
|
55
|
-
this.inner.close();
|
|
56
|
-
}
|
|
57
|
-
display() {
|
|
58
|
-
return this.inner.display();
|
|
59
|
-
}
|
|
60
|
-
async tableNames(options) {
|
|
61
|
-
return this.inner.tableNames(options?.startAfter, options?.limit);
|
|
62
|
-
}
|
|
63
|
-
async openTable(name, options) {
|
|
64
|
-
const innerTable = await this.inner.openTable(name, cleanseStorageOptions(options?.storageOptions), options?.indexCacheSize);
|
|
65
|
-
return new table_1.LocalTable(innerTable);
|
|
66
|
-
}
|
|
67
|
-
async createTable(nameOrOptions, data, options) {
|
|
68
|
-
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
|
69
|
-
const { name, data, ...options } = nameOrOptions;
|
|
70
|
-
return this.createTable(name, data, options);
|
|
71
|
-
}
|
|
72
|
-
if (data === undefined) {
|
|
73
|
-
throw new Error("data is required");
|
|
74
|
-
}
|
|
75
|
-
const { buf, mode } = await table_1.Table.parseTableData(data, options);
|
|
76
|
-
let dataStorageVersion = "stable";
|
|
77
|
-
if (options?.dataStorageVersion !== undefined) {
|
|
78
|
-
dataStorageVersion = options.dataStorageVersion;
|
|
79
|
-
}
|
|
80
|
-
else if (options?.useLegacyFormat !== undefined) {
|
|
81
|
-
dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
|
|
82
|
-
}
|
|
83
|
-
const innerTable = await this.inner.createTable(nameOrOptions, buf, mode, cleanseStorageOptions(options?.storageOptions), dataStorageVersion, options?.enableV2ManifestPaths);
|
|
84
|
-
return new table_1.LocalTable(innerTable);
|
|
85
|
-
}
|
|
86
|
-
async createEmptyTable(name, schema, options) {
|
|
87
|
-
let mode = options?.mode ?? "create";
|
|
88
|
-
const existOk = options?.existOk ?? false;
|
|
89
|
-
if (mode === "create" && existOk) {
|
|
90
|
-
mode = "exist_ok";
|
|
91
|
-
}
|
|
92
|
-
let metadata = undefined;
|
|
93
|
-
if (options?.embeddingFunction !== undefined) {
|
|
94
|
-
const embeddingFunction = options.embeddingFunction;
|
|
95
|
-
const registry = (0, registry_1.getRegistry)();
|
|
96
|
-
metadata = registry.getTableMetadata([embeddingFunction]);
|
|
97
|
-
}
|
|
98
|
-
let dataStorageVersion = "stable";
|
|
99
|
-
if (options?.dataStorageVersion !== undefined) {
|
|
100
|
-
dataStorageVersion = options.dataStorageVersion;
|
|
101
|
-
}
|
|
102
|
-
else if (options?.useLegacyFormat !== undefined) {
|
|
103
|
-
dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
|
|
104
|
-
}
|
|
105
|
-
const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
|
|
106
|
-
const buf = await (0, arrow_1.fromTableToBuffer)(table);
|
|
107
|
-
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), dataStorageVersion, options?.enableV2ManifestPaths);
|
|
108
|
-
return new table_1.LocalTable(innerTable);
|
|
109
|
-
}
|
|
110
|
-
async dropTable(name) {
|
|
111
|
-
return this.inner.dropTable(name);
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
exports.LocalConnection = LocalConnection;
|
|
115
|
-
/**
|
|
116
|
-
* Takes storage options and makes all the keys snake case.
|
|
117
|
-
*/
|
|
118
|
-
function cleanseStorageOptions(options) {
|
|
119
|
-
if (options === undefined) {
|
|
120
|
-
return undefined;
|
|
121
|
-
}
|
|
122
|
-
const result = {};
|
|
123
|
-
for (const [key, value] of Object.entries(options)) {
|
|
124
|
-
if (value !== undefined) {
|
|
125
|
-
const newKey = camelToSnakeCase(key);
|
|
126
|
-
result[newKey] = value;
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
return result;
|
|
130
|
-
}
|
|
131
|
-
/**
|
|
132
|
-
* Convert a string to snake case. It might already be snake case, in which case it is
|
|
133
|
-
* returned unchanged.
|
|
134
|
-
*/
|
|
135
|
-
function camelToSnakeCase(camel) {
|
|
136
|
-
if (camel.includes("_")) {
|
|
137
|
-
// Assume if there is at least one underscore, it is already snake case
|
|
138
|
-
return camel;
|
|
139
|
-
}
|
|
140
|
-
if (camel.toLocaleUpperCase() === camel) {
|
|
141
|
-
// Assume if the string is all uppercase, it is already snake case
|
|
142
|
-
return camel;
|
|
143
|
-
}
|
|
144
|
-
let result = camel.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
145
|
-
if (result.startsWith("_")) {
|
|
146
|
-
result = result.slice(1);
|
|
147
|
-
}
|
|
148
|
-
return result;
|
|
149
|
-
}
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
import "reflect-metadata";
|
|
2
|
-
import { DataType, Float, type IntoVector } from "../arrow";
|
|
3
|
-
/**
|
|
4
|
-
* Options for a given embedding function
|
|
5
|
-
*/
|
|
6
|
-
export interface FunctionOptions {
|
|
7
|
-
[key: string]: any;
|
|
8
|
-
}
|
|
9
|
-
export interface EmbeddingFunctionConstructor<T extends EmbeddingFunction = EmbeddingFunction> {
|
|
10
|
-
new (modelOptions?: T["TOptions"]): T;
|
|
11
|
-
}
|
|
12
|
-
/**
|
|
13
|
-
* An embedding function that automatically creates vector representation for a given column.
|
|
14
|
-
*/
|
|
15
|
-
export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptions = FunctionOptions> {
|
|
16
|
-
/**
|
|
17
|
-
* @ignore
|
|
18
|
-
* This is only used for associating the options type with the class for type checking
|
|
19
|
-
*/
|
|
20
|
-
readonly TOptions: M;
|
|
21
|
-
/**
|
|
22
|
-
* Convert the embedding function to a JSON object
|
|
23
|
-
* It is used to serialize the embedding function to the schema
|
|
24
|
-
* It's important that any object returned by this method contains all the necessary
|
|
25
|
-
* information to recreate the embedding function
|
|
26
|
-
*
|
|
27
|
-
* It should return the same object that was passed to the constructor
|
|
28
|
-
* If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
|
|
29
|
-
*
|
|
30
|
-
* @example
|
|
31
|
-
* ```ts
|
|
32
|
-
* class MyEmbeddingFunction extends EmbeddingFunction {
|
|
33
|
-
* constructor(options: {model: string, timeout: number}) {
|
|
34
|
-
* super();
|
|
35
|
-
* this.model = options.model;
|
|
36
|
-
* this.timeout = options.timeout;
|
|
37
|
-
* }
|
|
38
|
-
* toJSON() {
|
|
39
|
-
* return {
|
|
40
|
-
* model: this.model,
|
|
41
|
-
* timeout: this.timeout,
|
|
42
|
-
* };
|
|
43
|
-
* }
|
|
44
|
-
* ```
|
|
45
|
-
*/
|
|
46
|
-
abstract toJSON(): Partial<M>;
|
|
47
|
-
init?(): Promise<void>;
|
|
48
|
-
/**
|
|
49
|
-
* sourceField is used in combination with `LanceSchema` to provide a declarative data model
|
|
50
|
-
*
|
|
51
|
-
* @param optionsOrDatatype - The options for the field or the datatype
|
|
52
|
-
*
|
|
53
|
-
* @see {@link lancedb.LanceSchema}
|
|
54
|
-
*/
|
|
55
|
-
sourceField(optionsOrDatatype: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
|
|
56
|
-
/**
|
|
57
|
-
* vectorField is used in combination with `LanceSchema` to provide a declarative data model
|
|
58
|
-
*
|
|
59
|
-
* @param options - The options for the field
|
|
60
|
-
*
|
|
61
|
-
* @see {@link lancedb.LanceSchema}
|
|
62
|
-
*/
|
|
63
|
-
vectorField(optionsOrDatatype?: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
|
|
64
|
-
/** The number of dimensions of the embeddings */
|
|
65
|
-
ndims(): number | undefined;
|
|
66
|
-
/** The datatype of the embeddings */
|
|
67
|
-
abstract embeddingDataType(): Float;
|
|
68
|
-
/**
|
|
69
|
-
* Creates a vector representation for the given values.
|
|
70
|
-
*/
|
|
71
|
-
abstract computeSourceEmbeddings(data: T[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
|
|
72
|
-
/**
|
|
73
|
-
Compute the embeddings for a single query
|
|
74
|
-
*/
|
|
75
|
-
computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>>;
|
|
76
|
-
}
|
|
77
|
-
/**
|
|
78
|
-
* an abstract class for implementing embedding functions that take text as input
|
|
79
|
-
*/
|
|
80
|
-
export declare abstract class TextEmbeddingFunction<M extends FunctionOptions = FunctionOptions> extends EmbeddingFunction<string, M> {
|
|
81
|
-
abstract generateEmbeddings(texts: string[], ...args: any[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
|
|
82
|
-
computeQueryEmbeddings(data: string): Promise<Awaited<IntoVector>>;
|
|
83
|
-
embeddingDataType(): Float;
|
|
84
|
-
sourceField(): [DataType, Map<string, EmbeddingFunction>];
|
|
85
|
-
computeSourceEmbeddings(data: string[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
|
|
86
|
-
}
|
|
87
|
-
export interface FieldOptions<T extends DataType = DataType> {
|
|
88
|
-
datatype: T;
|
|
89
|
-
dims?: number;
|
|
90
|
-
}
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// Copyright 2024 Lance Developers.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
// you may not use this file except in compliance with the License.
|
|
6
|
-
// You may obtain a copy of the License at
|
|
7
|
-
//
|
|
8
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
//
|
|
10
|
-
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
// See the License for the specific language governing permissions and
|
|
14
|
-
// limitations under the License.
|
|
15
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
|
|
17
|
-
require("reflect-metadata");
|
|
18
|
-
const arrow_1 = require("../arrow");
|
|
19
|
-
const sanitize_1 = require("../sanitize");
|
|
20
|
-
/**
|
|
21
|
-
* An embedding function that automatically creates vector representation for a given column.
|
|
22
|
-
*/
|
|
23
|
-
class EmbeddingFunction {
|
|
24
|
-
/**
|
|
25
|
-
* @ignore
|
|
26
|
-
* This is only used for associating the options type with the class for type checking
|
|
27
|
-
*/
|
|
28
|
-
// biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
|
|
29
|
-
TOptions;
|
|
30
|
-
/**
|
|
31
|
-
* sourceField is used in combination with `LanceSchema` to provide a declarative data model
|
|
32
|
-
*
|
|
33
|
-
* @param optionsOrDatatype - The options for the field or the datatype
|
|
34
|
-
*
|
|
35
|
-
* @see {@link lancedb.LanceSchema}
|
|
36
|
-
*/
|
|
37
|
-
sourceField(optionsOrDatatype) {
|
|
38
|
-
let datatype = "datatype" in optionsOrDatatype
|
|
39
|
-
? optionsOrDatatype.datatype
|
|
40
|
-
: optionsOrDatatype;
|
|
41
|
-
if (!datatype) {
|
|
42
|
-
throw new Error("Datatype is required");
|
|
43
|
-
}
|
|
44
|
-
datatype = (0, sanitize_1.sanitizeType)(datatype);
|
|
45
|
-
const metadata = new Map();
|
|
46
|
-
metadata.set("source_column_for", this);
|
|
47
|
-
return [datatype, metadata];
|
|
48
|
-
}
|
|
49
|
-
/**
|
|
50
|
-
* vectorField is used in combination with `LanceSchema` to provide a declarative data model
|
|
51
|
-
*
|
|
52
|
-
* @param options - The options for the field
|
|
53
|
-
*
|
|
54
|
-
* @see {@link lancedb.LanceSchema}
|
|
55
|
-
*/
|
|
56
|
-
vectorField(optionsOrDatatype) {
|
|
57
|
-
let dtype;
|
|
58
|
-
let vectorType;
|
|
59
|
-
let dims = this.ndims();
|
|
60
|
-
// `func.vectorField(new Float32())`
|
|
61
|
-
if (optionsOrDatatype === undefined) {
|
|
62
|
-
dtype = new arrow_1.Float32();
|
|
63
|
-
}
|
|
64
|
-
else if (!("datatype" in optionsOrDatatype)) {
|
|
65
|
-
dtype = (0, sanitize_1.sanitizeType)(optionsOrDatatype);
|
|
66
|
-
}
|
|
67
|
-
else {
|
|
68
|
-
// `func.vectorField({
|
|
69
|
-
// datatype: new Float32(),
|
|
70
|
-
// dims: 10
|
|
71
|
-
// })`
|
|
72
|
-
dims = dims ?? optionsOrDatatype?.dims;
|
|
73
|
-
dtype = (0, sanitize_1.sanitizeType)(optionsOrDatatype?.datatype);
|
|
74
|
-
}
|
|
75
|
-
if (dtype !== undefined) {
|
|
76
|
-
// `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
|
|
77
|
-
// or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
|
|
78
|
-
if ((0, arrow_1.isFixedSizeList)(dtype)) {
|
|
79
|
-
vectorType = dtype;
|
|
80
|
-
// `func.vectorField(new Float32())`
|
|
81
|
-
// or `func.vectorField({datatype: new Float32()})`
|
|
82
|
-
}
|
|
83
|
-
else if ((0, arrow_1.isFloat)(dtype)) {
|
|
84
|
-
// No `ndims` impl and no `{dims: n}` provided;
|
|
85
|
-
if (dims === undefined) {
|
|
86
|
-
throw new Error("ndims is required for vector field");
|
|
87
|
-
}
|
|
88
|
-
vectorType = (0, arrow_1.newVectorType)(dims, dtype);
|
|
89
|
-
}
|
|
90
|
-
else {
|
|
91
|
-
throw new Error("Expected FixedSizeList or Float as datatype for vector field");
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
else {
|
|
95
|
-
if (dims === undefined) {
|
|
96
|
-
throw new Error("ndims is required for vector field");
|
|
97
|
-
}
|
|
98
|
-
vectorType = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
|
|
99
|
-
}
|
|
100
|
-
const metadata = new Map();
|
|
101
|
-
metadata.set("vector_column_for", this);
|
|
102
|
-
return [vectorType, metadata];
|
|
103
|
-
}
|
|
104
|
-
/** The number of dimensions of the embeddings */
|
|
105
|
-
ndims() {
|
|
106
|
-
return undefined;
|
|
107
|
-
}
|
|
108
|
-
/**
|
|
109
|
-
Compute the embeddings for a single query
|
|
110
|
-
*/
|
|
111
|
-
async computeQueryEmbeddings(data) {
|
|
112
|
-
return this.computeSourceEmbeddings([data]).then((embeddings) => embeddings[0]);
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
exports.EmbeddingFunction = EmbeddingFunction;
|
|
116
|
-
/**
|
|
117
|
-
* an abstract class for implementing embedding functions that take text as input
|
|
118
|
-
*/
|
|
119
|
-
class TextEmbeddingFunction extends EmbeddingFunction {
|
|
120
|
-
async computeQueryEmbeddings(data) {
|
|
121
|
-
return this.generateEmbeddings([data]).then((data) => data[0]);
|
|
122
|
-
}
|
|
123
|
-
embeddingDataType() {
|
|
124
|
-
return new arrow_1.Float32();
|
|
125
|
-
}
|
|
126
|
-
sourceField() {
|
|
127
|
-
return super.sourceField(new arrow_1.Utf8());
|
|
128
|
-
}
|
|
129
|
-
computeSourceEmbeddings(data) {
|
|
130
|
-
return this.generateEmbeddings(data);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
exports.TextEmbeddingFunction = TextEmbeddingFunction;
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import { Schema } from "../arrow";
|
|
2
|
-
import { EmbeddingFunction } from "./embedding_function";
|
|
3
|
-
export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
|
|
4
|
-
export * from "./registry";
|
|
5
|
-
/**
|
|
6
|
-
* Create a schema with embedding functions.
|
|
7
|
-
*
|
|
8
|
-
* @param fields
|
|
9
|
-
* @returns Schema
|
|
10
|
-
* @example
|
|
11
|
-
* ```ts
|
|
12
|
-
* class MyEmbeddingFunction extends EmbeddingFunction {
|
|
13
|
-
* // ...
|
|
14
|
-
* }
|
|
15
|
-
* const func = new MyEmbeddingFunction();
|
|
16
|
-
* const schema = LanceSchema({
|
|
17
|
-
* id: new Int32(),
|
|
18
|
-
* text: func.sourceField(new Utf8()),
|
|
19
|
-
* vector: func.vectorField(),
|
|
20
|
-
* // optional: specify the datatype and/or dimensions
|
|
21
|
-
* vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
|
|
22
|
-
* });
|
|
23
|
-
*
|
|
24
|
-
* const table = await db.createTable("my_table", data, { schema });
|
|
25
|
-
* ```
|
|
26
|
-
*/
|
|
27
|
-
export declare function LanceSchema(fields: Record<string, [object, Map<string, EmbeddingFunction>] | object>): Schema;
|