@lancedb/lancedb 0.4.20 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -14
- package/biome.json +142 -0
- package/dist/arrow.d.ts +35 -9
- package/dist/arrow.js +247 -19
- package/dist/connection.d.ts +4 -1
- package/dist/connection.js +11 -5
- package/dist/embedding/embedding_function.d.ts +54 -28
- package/dist/embedding/embedding_function.js +71 -10
- package/dist/embedding/index.d.ts +28 -2
- package/dist/embedding/index.js +111 -4
- package/dist/embedding/openai.d.ts +16 -7
- package/dist/embedding/openai.js +62 -12
- package/dist/embedding/registry.d.ts +54 -0
- package/dist/embedding/registry.js +123 -0
- package/dist/native.d.ts +26 -0
- package/dist/query.d.ts +1 -1
- package/dist/query.js +7 -6
- package/dist/sanitize.d.ts +22 -1
- package/dist/sanitize.js +126 -113
- package/dist/table.d.ts +50 -4
- package/dist/table.js +47 -5
- package/lancedb/arrow.ts +283 -49
- package/lancedb/connection.ts +27 -6
- package/lancedb/embedding/embedding_function.ts +126 -42
- package/lancedb/embedding/index.ts +113 -2
- package/lancedb/embedding/openai.ts +62 -16
- package/lancedb/embedding/registry.ts +172 -0
- package/lancedb/query.ts +9 -6
- package/lancedb/sanitize.ts +62 -62
- package/lancedb/table.ts +72 -5
- package/nodejs-artifacts/arrow.d.ts +35 -9
- package/nodejs-artifacts/arrow.js +247 -19
- package/nodejs-artifacts/connection.d.ts +4 -1
- package/nodejs-artifacts/connection.js +11 -5
- package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
- package/nodejs-artifacts/embedding/embedding_function.js +71 -10
- package/nodejs-artifacts/embedding/index.d.ts +28 -2
- package/nodejs-artifacts/embedding/index.js +111 -4
- package/nodejs-artifacts/embedding/openai.d.ts +16 -7
- package/nodejs-artifacts/embedding/openai.js +62 -12
- package/nodejs-artifacts/embedding/registry.d.ts +54 -0
- package/nodejs-artifacts/embedding/registry.js +123 -0
- package/nodejs-artifacts/native.d.ts +26 -0
- package/nodejs-artifacts/query.d.ts +1 -1
- package/nodejs-artifacts/query.js +7 -6
- package/nodejs-artifacts/sanitize.d.ts +22 -1
- package/nodejs-artifacts/sanitize.js +126 -113
- package/nodejs-artifacts/table.d.ts +50 -4
- package/nodejs-artifacts/table.js +47 -5
- package/package.json +23 -21
- package/tsconfig.json +3 -1
- package/.eslintignore +0 -3
- package/eslint.config.js +0 -28
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright 2024 Lance Developers.
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.getRegistry = exports.register = exports.EmbeddingFunctionRegistry = void 0;
|
|
17
|
+
require("reflect-metadata");
|
|
18
|
+
/**
|
|
19
|
+
* This is a singleton class used to register embedding functions
|
|
20
|
+
* and fetch them by name. It also handles serializing and deserializing.
|
|
21
|
+
* You can implement your own embedding function by subclassing EmbeddingFunction
|
|
22
|
+
* or TextEmbeddingFunction and registering it with the registry
|
|
23
|
+
*/
|
|
24
|
+
class EmbeddingFunctionRegistry {
|
|
25
|
+
#functions = new Map();
|
|
26
|
+
/**
|
|
27
|
+
* Register an embedding function
|
|
28
|
+
* @param name The name of the function
|
|
29
|
+
* @param func The function to register
|
|
30
|
+
*/
|
|
31
|
+
register(alias) {
|
|
32
|
+
const self = this;
|
|
33
|
+
return function (ctor) {
|
|
34
|
+
if (!alias) {
|
|
35
|
+
alias = ctor.name;
|
|
36
|
+
}
|
|
37
|
+
if (self.#functions.has(alias)) {
|
|
38
|
+
throw new Error(`Embedding function with alias "${alias}" already exists`);
|
|
39
|
+
}
|
|
40
|
+
self.#functions.set(alias, ctor);
|
|
41
|
+
Reflect.defineMetadata("lancedb::embedding::name", alias, ctor);
|
|
42
|
+
return ctor;
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Fetch an embedding function by name
|
|
47
|
+
* @param name The name of the function
|
|
48
|
+
*/
|
|
49
|
+
get(name) {
|
|
50
|
+
const factory = this.#functions.get(name);
|
|
51
|
+
if (!factory) {
|
|
52
|
+
return undefined;
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
create: function (options) {
|
|
56
|
+
return new factory(options);
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* reset the registry to the initial state
|
|
62
|
+
*/
|
|
63
|
+
reset() {
|
|
64
|
+
this.#functions.clear();
|
|
65
|
+
}
|
|
66
|
+
parseFunctions(metadata) {
|
|
67
|
+
if (!metadata.has("embedding_functions")) {
|
|
68
|
+
return new Map();
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
const functions = (JSON.parse(metadata.get("embedding_functions")));
|
|
72
|
+
return new Map(functions.map((f) => {
|
|
73
|
+
const fn = this.get(f.name);
|
|
74
|
+
if (!fn) {
|
|
75
|
+
throw new Error(`Function "${f.name}" not found in registry`);
|
|
76
|
+
}
|
|
77
|
+
return [
|
|
78
|
+
f.name,
|
|
79
|
+
{
|
|
80
|
+
sourceColumn: f.sourceColumn,
|
|
81
|
+
vectorColumn: f.vectorColumn,
|
|
82
|
+
function: this.get(f.name).create(f.model),
|
|
83
|
+
},
|
|
84
|
+
];
|
|
85
|
+
}));
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
89
|
+
functionToMetadata(conf) {
|
|
90
|
+
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
91
|
+
const metadata = {};
|
|
92
|
+
const name = Reflect.getMetadata("lancedb::embedding::name", conf.function.constructor);
|
|
93
|
+
metadata["sourceColumn"] = conf.sourceColumn;
|
|
94
|
+
metadata["vectorColumn"] = conf.vectorColumn ?? "vector";
|
|
95
|
+
metadata["name"] = name ?? conf.function.constructor.name;
|
|
96
|
+
metadata["model"] = conf.function.toJSON();
|
|
97
|
+
return metadata;
|
|
98
|
+
}
|
|
99
|
+
getTableMetadata(functions) {
|
|
100
|
+
const metadata = new Map();
|
|
101
|
+
const jsonData = functions.map((conf) => this.functionToMetadata(conf));
|
|
102
|
+
metadata.set("embedding_functions", JSON.stringify(jsonData));
|
|
103
|
+
return metadata;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
exports.EmbeddingFunctionRegistry = EmbeddingFunctionRegistry;
|
|
107
|
+
const _REGISTRY = new EmbeddingFunctionRegistry();
|
|
108
|
+
function register(name) {
|
|
109
|
+
return _REGISTRY.register(name);
|
|
110
|
+
}
|
|
111
|
+
exports.register = register;
|
|
112
|
+
/**
|
|
113
|
+
* Utility function to get the global instance of the registry
|
|
114
|
+
* @returns `EmbeddingFunctionRegistry` The global instance of the registry
|
|
115
|
+
* @example
|
|
116
|
+
* ```ts
|
|
117
|
+
* const registry = getRegistry();
|
|
118
|
+
* const openai = registry.get("openai").create();
|
|
119
|
+
*/
|
|
120
|
+
function getRegistry() {
|
|
121
|
+
return _REGISTRY;
|
|
122
|
+
}
|
|
123
|
+
exports.getRegistry = getRegistry;
|
package/dist/native.d.ts
CHANGED
|
@@ -15,6 +15,31 @@ export interface IndexConfig {
|
|
|
15
15
|
*/
|
|
16
16
|
columns: Array<string>
|
|
17
17
|
}
|
|
18
|
+
/** Statistics about a compaction operation. */
|
|
19
|
+
export interface CompactionStats {
|
|
20
|
+
/** The number of fragments removed */
|
|
21
|
+
fragmentsRemoved: number
|
|
22
|
+
/** The number of new, compacted fragments added */
|
|
23
|
+
fragmentsAdded: number
|
|
24
|
+
/** The number of data files removed */
|
|
25
|
+
filesRemoved: number
|
|
26
|
+
/** The number of new, compacted data files added */
|
|
27
|
+
filesAdded: number
|
|
28
|
+
}
|
|
29
|
+
/** Statistics about a cleanup operation */
|
|
30
|
+
export interface RemovalStats {
|
|
31
|
+
/** The number of bytes removed */
|
|
32
|
+
bytesRemoved: number
|
|
33
|
+
/** The number of old versions removed */
|
|
34
|
+
oldVersionsRemoved: number
|
|
35
|
+
}
|
|
36
|
+
/** Statistics about an optimize operation */
|
|
37
|
+
export interface OptimizeStats {
|
|
38
|
+
/** Statistics about the compaction operation */
|
|
39
|
+
compaction: CompactionStats
|
|
40
|
+
/** Statistics about the removal operation */
|
|
41
|
+
prune: RemovalStats
|
|
42
|
+
}
|
|
18
43
|
/**
|
|
19
44
|
* A definition of a column alteration. The alteration changes the column at
|
|
20
45
|
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
|
@@ -151,5 +176,6 @@ export class Table {
|
|
|
151
176
|
checkout(version: number): Promise<void>
|
|
152
177
|
checkoutLatest(): Promise<void>
|
|
153
178
|
restore(): Promise<void>
|
|
179
|
+
optimize(olderThanMs?: number | undefined | null): Promise<OptimizeStats>
|
|
154
180
|
listIndices(): Promise<Array<IndexConfig>>
|
|
155
181
|
}
|
package/dist/query.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Table as ArrowTable, RecordBatch } from "./arrow";
|
|
2
2
|
import { RecordBatchIterator as NativeBatchIterator, Query as NativeQuery, Table as NativeTable, VectorQuery as NativeVectorQuery } from "./native";
|
|
3
3
|
export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
|
4
4
|
private promisedInner?;
|
package/dist/query.js
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
// limitations under the License.
|
|
15
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
16
|
exports.Query = exports.VectorQuery = exports.QueryBase = exports.RecordBatchIterator = void 0;
|
|
17
|
-
const
|
|
17
|
+
const arrow_1 = require("./arrow");
|
|
18
18
|
class RecordBatchIterator {
|
|
19
19
|
promisedInner;
|
|
20
20
|
inner;
|
|
@@ -22,7 +22,7 @@ class RecordBatchIterator {
|
|
|
22
22
|
// TODO: check promise reliably so we dont need to pass two arguments.
|
|
23
23
|
this.promisedInner = promise;
|
|
24
24
|
}
|
|
25
|
-
//
|
|
25
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
26
26
|
async next() {
|
|
27
27
|
if (this.inner === undefined) {
|
|
28
28
|
this.inner = await this.promisedInner;
|
|
@@ -34,7 +34,7 @@ class RecordBatchIterator {
|
|
|
34
34
|
if (n == null) {
|
|
35
35
|
return Promise.resolve({ done: true, value: null });
|
|
36
36
|
}
|
|
37
|
-
const tbl = (0,
|
|
37
|
+
const tbl = (0, arrow_1.tableFromIPC)(n);
|
|
38
38
|
if (tbl.batches.length != 1) {
|
|
39
39
|
throw new Error("Expected only one batch");
|
|
40
40
|
}
|
|
@@ -48,6 +48,7 @@ class QueryBase {
|
|
|
48
48
|
inner;
|
|
49
49
|
constructor(inner) {
|
|
50
50
|
this.inner = inner;
|
|
51
|
+
// intentionally empty
|
|
51
52
|
}
|
|
52
53
|
/**
|
|
53
54
|
* A filter statement to be applied to this query.
|
|
@@ -136,7 +137,7 @@ class QueryBase {
|
|
|
136
137
|
execute() {
|
|
137
138
|
return new RecordBatchIterator(this.nativeExecute());
|
|
138
139
|
}
|
|
139
|
-
//
|
|
140
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
140
141
|
[Symbol.asyncIterator]() {
|
|
141
142
|
const promise = this.nativeExecute();
|
|
142
143
|
return new RecordBatchIterator(promise);
|
|
@@ -147,7 +148,7 @@ class QueryBase {
|
|
|
147
148
|
for await (const batch of this) {
|
|
148
149
|
batches.push(batch);
|
|
149
150
|
}
|
|
150
|
-
return new
|
|
151
|
+
return new arrow_1.Table(batches);
|
|
151
152
|
}
|
|
152
153
|
/** Collect the results as an array of objects. */
|
|
153
154
|
async toArray() {
|
|
@@ -338,7 +339,7 @@ class Query extends QueryBase {
|
|
|
338
339
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
|
339
340
|
*/
|
|
340
341
|
nearestTo(vector) {
|
|
341
|
-
//
|
|
342
|
+
// biome-ignore lint/suspicious/noExplicitAny: skip
|
|
342
343
|
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
|
343
344
|
return new VectorQuery(vectorQuery);
|
|
344
345
|
}
|
package/dist/sanitize.d.ts
CHANGED
|
@@ -1,4 +1,25 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { TKeys } from "apache-arrow/type";
|
|
2
|
+
import { DataType, Date_, Decimal, DenseUnion, Dictionary, Duration, Field, FixedSizeBinary, FixedSizeList, Float, Int, Interval, List, Map_, Schema, SparseUnion, Struct, Time, Timestamp, TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond, Type, Union } from "./arrow";
|
|
3
|
+
export declare function sanitizeMetadata(metadataLike?: unknown): Map<string, string> | undefined;
|
|
4
|
+
export declare function sanitizeInt(typeLike: object): Int<Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64>;
|
|
5
|
+
export declare function sanitizeFloat(typeLike: object): Float<Type.Float | Type.Float16 | Type.Float32 | Type.Float64>;
|
|
6
|
+
export declare function sanitizeDecimal(typeLike: object): Decimal;
|
|
7
|
+
export declare function sanitizeDate(typeLike: object): Date_<import("apache-arrow/type").Dates>;
|
|
8
|
+
export declare function sanitizeTime(typeLike: object): Time<Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicrosecond | Type.TimeNanosecond>;
|
|
9
|
+
export declare function sanitizeTimestamp(typeLike: object): Timestamp<Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond>;
|
|
10
|
+
export declare function sanitizeTypedTimestamp(typeLike: object, Datatype: typeof TimestampNanosecond | typeof TimestampMicrosecond | typeof TimestampMillisecond | typeof TimestampSecond): TimestampSecond | TimestampMillisecond | TimestampMicrosecond | TimestampNanosecond;
|
|
11
|
+
export declare function sanitizeInterval(typeLike: object): Interval<Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth>;
|
|
12
|
+
export declare function sanitizeList(typeLike: object): List<any>;
|
|
13
|
+
export declare function sanitizeStruct(typeLike: object): Struct<any>;
|
|
14
|
+
export declare function sanitizeUnion(typeLike: object): Union<Type.Union | Type.DenseUnion | Type.SparseUnion>;
|
|
15
|
+
export declare function sanitizeTypedUnion(typeLike: object, UnionType: typeof DenseUnion | typeof SparseUnion): SparseUnion | DenseUnion;
|
|
16
|
+
export declare function sanitizeFixedSizeBinary(typeLike: object): FixedSizeBinary;
|
|
17
|
+
export declare function sanitizeFixedSizeList(typeLike: object): FixedSizeList<any>;
|
|
18
|
+
export declare function sanitizeMap(typeLike: object): Map_<any, any>;
|
|
19
|
+
export declare function sanitizeDuration(typeLike: object): Duration<Type.Duration | Type.DurationSecond | Type.DurationMillisecond | Type.DurationMicrosecond | Type.DurationNanosecond>;
|
|
20
|
+
export declare function sanitizeDictionary(typeLike: object): Dictionary<DataType<any, any>, TKeys>;
|
|
21
|
+
export declare function sanitizeType(typeLike: unknown): DataType<any>;
|
|
22
|
+
export declare function sanitizeField(fieldLike: unknown): Field;
|
|
2
23
|
/**
|
|
3
24
|
* Convert something schemaLike into a Schema instance
|
|
4
25
|
*
|