@lancedb/lancedb 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arrow.d.ts +0 -3
- package/dist/arrow.js +1 -26
- package/dist/connection.d.ts +9 -0
- package/dist/connection.js +16 -2
- package/dist/embedding/embedding_function.d.ts +13 -3
- package/dist/embedding/embedding_function.js +28 -7
- package/dist/embedding/index.d.ts +1 -1
- package/dist/embedding/index.js +6 -6
- package/dist/embedding/openai.d.ts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/sanitize.js +4 -2
- package/package.json +7 -6
package/dist/arrow.d.ts
CHANGED
|
@@ -37,10 +37,7 @@ export type TableLike = ArrowTable | {
|
|
|
37
37
|
batches: RecordBatchLike[];
|
|
38
38
|
};
|
|
39
39
|
export type IntoVector = Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
|
|
40
|
-
export type FloatLike = import("apache-arrow-13").Float | import("apache-arrow-14").Float | import("apache-arrow-15").Float | import("apache-arrow-16").Float | import("apache-arrow-17").Float;
|
|
41
|
-
export type DataTypeLike = import("apache-arrow-13").DataType | import("apache-arrow-14").DataType | import("apache-arrow-15").DataType | import("apache-arrow-16").DataType | import("apache-arrow-17").DataType;
|
|
42
40
|
export declare function isArrowTable(value: object): value is TableLike;
|
|
43
|
-
export declare function isDataType(value: unknown): value is DataTypeLike;
|
|
44
41
|
export declare function isNull(value: unknown): value is Null;
|
|
45
42
|
export declare function isInt(value: unknown): value is Int;
|
|
46
43
|
export declare function isFloat(value: unknown): value is Float;
|
package/dist/arrow.js
CHANGED
|
@@ -27,7 +27,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
27
27
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
28
28
|
};
|
|
29
29
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
30
|
-
exports.createEmptyTable = exports.fromTableToStreamBuffer = exports.fromDataToBuffer = exports.fromTableToBuffer = exports.fromRecordsToStreamBuffer = exports.fromRecordsToBuffer = exports.newVectorType = exports.convertToTable = exports.makeEmptyTable = exports.makeArrowTable = exports.MakeArrowTableOptions = exports.VectorColumnOptions = exports.isFixedSizeList = exports.isFixedSizeBinary = exports.isUnion = exports.isStruct = exports.isList = exports.isDuration = exports.isInterval = exports.isTimestamp = exports.isTime = exports.isDate = exports.isDecimal = exports.isBool = exports.isLargeUtf8 = exports.isUtf8 = exports.isLargeBinary = exports.isBinary = exports.isFloat = exports.isInt = exports.isNull = exports.
|
|
30
|
+
exports.createEmptyTable = exports.fromTableToStreamBuffer = exports.fromDataToBuffer = exports.fromTableToBuffer = exports.fromRecordsToStreamBuffer = exports.fromRecordsToBuffer = exports.newVectorType = exports.convertToTable = exports.makeEmptyTable = exports.makeArrowTable = exports.MakeArrowTableOptions = exports.VectorColumnOptions = exports.isFixedSizeList = exports.isFixedSizeBinary = exports.isUnion = exports.isStruct = exports.isList = exports.isDuration = exports.isInterval = exports.isTimestamp = exports.isTime = exports.isDate = exports.isDecimal = exports.isBool = exports.isLargeUtf8 = exports.isUtf8 = exports.isLargeBinary = exports.isBinary = exports.isFloat = exports.isInt = exports.isNull = exports.isArrowTable = void 0;
|
|
31
31
|
const apache_arrow_1 = require("apache-arrow");
|
|
32
32
|
const registry_1 = require("./embedding/registry");
|
|
33
33
|
const sanitize_1 = require("./sanitize");
|
|
@@ -38,31 +38,6 @@ function isArrowTable(value) {
|
|
|
38
38
|
return "schema" in value && "batches" in value;
|
|
39
39
|
}
|
|
40
40
|
exports.isArrowTable = isArrowTable;
|
|
41
|
-
function isDataType(value) {
|
|
42
|
-
return (value instanceof apache_arrow_1.DataType ||
|
|
43
|
-
apache_arrow_1.DataType.isNull(value) ||
|
|
44
|
-
apache_arrow_1.DataType.isInt(value) ||
|
|
45
|
-
apache_arrow_1.DataType.isFloat(value) ||
|
|
46
|
-
apache_arrow_1.DataType.isBinary(value) ||
|
|
47
|
-
apache_arrow_1.DataType.isLargeBinary(value) ||
|
|
48
|
-
apache_arrow_1.DataType.isUtf8(value) ||
|
|
49
|
-
apache_arrow_1.DataType.isLargeUtf8(value) ||
|
|
50
|
-
apache_arrow_1.DataType.isBool(value) ||
|
|
51
|
-
apache_arrow_1.DataType.isDecimal(value) ||
|
|
52
|
-
apache_arrow_1.DataType.isDate(value) ||
|
|
53
|
-
apache_arrow_1.DataType.isTime(value) ||
|
|
54
|
-
apache_arrow_1.DataType.isTimestamp(value) ||
|
|
55
|
-
apache_arrow_1.DataType.isInterval(value) ||
|
|
56
|
-
apache_arrow_1.DataType.isDuration(value) ||
|
|
57
|
-
apache_arrow_1.DataType.isList(value) ||
|
|
58
|
-
apache_arrow_1.DataType.isStruct(value) ||
|
|
59
|
-
apache_arrow_1.DataType.isUnion(value) ||
|
|
60
|
-
apache_arrow_1.DataType.isFixedSizeBinary(value) ||
|
|
61
|
-
apache_arrow_1.DataType.isFixedSizeList(value) ||
|
|
62
|
-
apache_arrow_1.DataType.isMap(value) ||
|
|
63
|
-
apache_arrow_1.DataType.isDictionary(value));
|
|
64
|
-
}
|
|
65
|
-
exports.isDataType = isDataType;
|
|
66
41
|
function isNull(value) {
|
|
67
42
|
return value instanceof apache_arrow_1.Null || apache_arrow_1.DataType.isNull(value);
|
|
68
43
|
}
|
package/dist/connection.d.ts
CHANGED
|
@@ -27,10 +27,19 @@ export interface CreateTableOptions {
|
|
|
27
27
|
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
28
28
|
*/
|
|
29
29
|
storageOptions?: Record<string, string>;
|
|
30
|
+
/**
|
|
31
|
+
* The version of the data storage format to use.
|
|
32
|
+
*
|
|
33
|
+
* The default is `legacy`, which is Lance format v1.
|
|
34
|
+
* `stable` is the new format, which is Lance format v2.
|
|
35
|
+
*/
|
|
36
|
+
dataStorageVersion?: string;
|
|
30
37
|
/**
|
|
31
38
|
* If true then data files will be written with the legacy format
|
|
32
39
|
*
|
|
33
40
|
* The default is true while the new format is in beta
|
|
41
|
+
*
|
|
42
|
+
* Deprecated.
|
|
34
43
|
*/
|
|
35
44
|
useLegacyFormat?: boolean;
|
|
36
45
|
schema?: SchemaLike;
|
package/dist/connection.js
CHANGED
|
@@ -72,7 +72,14 @@ class LocalConnection extends Connection {
|
|
|
72
72
|
throw new Error("data is required");
|
|
73
73
|
}
|
|
74
74
|
const { buf, mode } = await table_1.Table.parseTableData(data, options);
|
|
75
|
-
|
|
75
|
+
let dataStorageVersion = "legacy";
|
|
76
|
+
if (options?.dataStorageVersion !== undefined) {
|
|
77
|
+
dataStorageVersion = options.dataStorageVersion;
|
|
78
|
+
}
|
|
79
|
+
else if (options?.useLegacyFormat !== undefined) {
|
|
80
|
+
dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
|
|
81
|
+
}
|
|
82
|
+
const innerTable = await this.inner.createTable(nameOrOptions, buf, mode, cleanseStorageOptions(options?.storageOptions), dataStorageVersion);
|
|
76
83
|
return new table_1.LocalTable(innerTable);
|
|
77
84
|
}
|
|
78
85
|
async createEmptyTable(name, schema, options) {
|
|
@@ -87,9 +94,16 @@ class LocalConnection extends Connection {
|
|
|
87
94
|
const registry = (0, registry_1.getRegistry)();
|
|
88
95
|
metadata = registry.getTableMetadata([embeddingFunction]);
|
|
89
96
|
}
|
|
97
|
+
let dataStorageVersion = "legacy";
|
|
98
|
+
if (options?.dataStorageVersion !== undefined) {
|
|
99
|
+
dataStorageVersion = options.dataStorageVersion;
|
|
100
|
+
}
|
|
101
|
+
else if (options?.useLegacyFormat !== undefined) {
|
|
102
|
+
dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
|
|
103
|
+
}
|
|
90
104
|
const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
|
|
91
105
|
const buf = await (0, arrow_1.fromTableToBuffer)(table);
|
|
92
|
-
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions),
|
|
106
|
+
const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), dataStorageVersion);
|
|
93
107
|
return new table_1.LocalTable(innerTable);
|
|
94
108
|
}
|
|
95
109
|
async dropTable(name) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import "reflect-metadata";
|
|
2
|
-
import { DataType,
|
|
2
|
+
import { DataType, Float, type IntoVector } from "../arrow";
|
|
3
3
|
/**
|
|
4
4
|
* Options for a given embedding function
|
|
5
5
|
*/
|
|
@@ -52,7 +52,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
52
52
|
*
|
|
53
53
|
* @see {@link lancedb.LanceSchema}
|
|
54
54
|
*/
|
|
55
|
-
sourceField(optionsOrDatatype: Partial<FieldOptions> |
|
|
55
|
+
sourceField(optionsOrDatatype: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
|
|
56
56
|
/**
|
|
57
57
|
* vectorField is used in combination with `LanceSchema` to provide a declarative data model
|
|
58
58
|
*
|
|
@@ -64,7 +64,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
64
64
|
/** The number of dimensions of the embeddings */
|
|
65
65
|
ndims(): number | undefined;
|
|
66
66
|
/** The datatype of the embeddings */
|
|
67
|
-
abstract embeddingDataType():
|
|
67
|
+
abstract embeddingDataType(): Float;
|
|
68
68
|
/**
|
|
69
69
|
* Creates a vector representation for the given values.
|
|
70
70
|
*/
|
|
@@ -74,6 +74,16 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
|
|
|
74
74
|
*/
|
|
75
75
|
computeQueryEmbeddings(data: T): Promise<Awaited<IntoVector>>;
|
|
76
76
|
}
|
|
77
|
+
/**
|
|
78
|
+
* an abstract class for implementing embedding functions that take text as input
|
|
79
|
+
*/
|
|
80
|
+
export declare abstract class TextEmbeddingFunction<M extends FunctionOptions = FunctionOptions> extends EmbeddingFunction<string, M> {
|
|
81
|
+
abstract generateEmbeddings(texts: string[], ...args: any[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
|
|
82
|
+
computeQueryEmbeddings(data: string): Promise<Awaited<IntoVector>>;
|
|
83
|
+
embeddingDataType(): Float;
|
|
84
|
+
sourceField(): [DataType, Map<string, EmbeddingFunction>];
|
|
85
|
+
computeSourceEmbeddings(data: string[]): Promise<number[][] | Float32Array[] | Float64Array[]>;
|
|
86
|
+
}
|
|
77
87
|
export interface FieldOptions<T extends DataType = DataType> {
|
|
78
88
|
datatype: T;
|
|
79
89
|
dims?: number;
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
// See the License for the specific language governing permissions and
|
|
14
14
|
// limitations under the License.
|
|
15
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.EmbeddingFunction = void 0;
|
|
16
|
+
exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
|
|
17
17
|
require("reflect-metadata");
|
|
18
18
|
const arrow_1 = require("../arrow");
|
|
19
19
|
const sanitize_1 = require("../sanitize");
|
|
@@ -35,9 +35,9 @@ class EmbeddingFunction {
|
|
|
35
35
|
* @see {@link lancedb.LanceSchema}
|
|
36
36
|
*/
|
|
37
37
|
sourceField(optionsOrDatatype) {
|
|
38
|
-
let datatype =
|
|
39
|
-
? optionsOrDatatype
|
|
40
|
-
: optionsOrDatatype
|
|
38
|
+
let datatype = "datatype" in optionsOrDatatype
|
|
39
|
+
? optionsOrDatatype.datatype
|
|
40
|
+
: optionsOrDatatype;
|
|
41
41
|
if (!datatype) {
|
|
42
42
|
throw new Error("Datatype is required");
|
|
43
43
|
}
|
|
@@ -58,8 +58,11 @@ class EmbeddingFunction {
|
|
|
58
58
|
let vectorType;
|
|
59
59
|
let dims = this.ndims();
|
|
60
60
|
// `func.vectorField(new Float32())`
|
|
61
|
-
if (
|
|
62
|
-
dtype =
|
|
61
|
+
if (optionsOrDatatype === undefined) {
|
|
62
|
+
dtype = new arrow_1.Float32();
|
|
63
|
+
}
|
|
64
|
+
else if (!("datatype" in optionsOrDatatype)) {
|
|
65
|
+
dtype = (0, sanitize_1.sanitizeType)(optionsOrDatatype);
|
|
63
66
|
}
|
|
64
67
|
else {
|
|
65
68
|
// `func.vectorField({
|
|
@@ -67,7 +70,7 @@ class EmbeddingFunction {
|
|
|
67
70
|
// dims: 10
|
|
68
71
|
// })`
|
|
69
72
|
dims = dims ?? optionsOrDatatype?.dims;
|
|
70
|
-
dtype = optionsOrDatatype?.datatype;
|
|
73
|
+
dtype = (0, sanitize_1.sanitizeType)(optionsOrDatatype?.datatype);
|
|
71
74
|
}
|
|
72
75
|
if (dtype !== undefined) {
|
|
73
76
|
// `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
|
|
@@ -110,3 +113,21 @@ class EmbeddingFunction {
|
|
|
110
113
|
}
|
|
111
114
|
}
|
|
112
115
|
exports.EmbeddingFunction = EmbeddingFunction;
|
|
116
|
+
/**
|
|
117
|
+
* an abstract class for implementing embedding functions that take text as input
|
|
118
|
+
*/
|
|
119
|
+
class TextEmbeddingFunction extends EmbeddingFunction {
|
|
120
|
+
async computeQueryEmbeddings(data) {
|
|
121
|
+
return this.generateEmbeddings([data]).then((data) => data[0]);
|
|
122
|
+
}
|
|
123
|
+
embeddingDataType() {
|
|
124
|
+
return new arrow_1.Float32();
|
|
125
|
+
}
|
|
126
|
+
sourceField() {
|
|
127
|
+
return super.sourceField(new arrow_1.Utf8());
|
|
128
|
+
}
|
|
129
|
+
computeSourceEmbeddings(data) {
|
|
130
|
+
return this.generateEmbeddings(data);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
exports.TextEmbeddingFunction = TextEmbeddingFunction;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Schema } from "../arrow";
|
|
2
2
|
import { EmbeddingFunction } from "./embedding_function";
|
|
3
|
-
export { EmbeddingFunction } from "./embedding_function";
|
|
3
|
+
export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
|
|
4
4
|
export * from "./openai";
|
|
5
5
|
export * from "./transformers";
|
|
6
6
|
export * from "./registry";
|
package/dist/embedding/index.js
CHANGED
|
@@ -27,13 +27,13 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
27
27
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
28
28
|
};
|
|
29
29
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
30
|
-
exports.LanceSchema = exports.EmbeddingFunction = void 0;
|
|
30
|
+
exports.LanceSchema = exports.TextEmbeddingFunction = exports.EmbeddingFunction = void 0;
|
|
31
31
|
const arrow_1 = require("../arrow");
|
|
32
|
-
const arrow_2 = require("../arrow");
|
|
33
32
|
const sanitize_1 = require("../sanitize");
|
|
34
33
|
const registry_1 = require("./registry");
|
|
35
34
|
var embedding_function_1 = require("./embedding_function");
|
|
36
35
|
Object.defineProperty(exports, "EmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.EmbeddingFunction; } });
|
|
36
|
+
Object.defineProperty(exports, "TextEmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.TextEmbeddingFunction; } });
|
|
37
37
|
// We need to explicitly export '*' so that the `register` decorator actually registers the class.
|
|
38
38
|
__exportStar(require("./openai"), exports);
|
|
39
39
|
__exportStar(require("./transformers"), exports);
|
|
@@ -64,14 +64,14 @@ function LanceSchema(fields) {
|
|
|
64
64
|
const arrowFields = [];
|
|
65
65
|
const embeddingFunctions = new Map();
|
|
66
66
|
Object.entries(fields).forEach(([key, value]) => {
|
|
67
|
-
if (
|
|
68
|
-
arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
|
|
69
|
-
}
|
|
70
|
-
else {
|
|
67
|
+
if (Array.isArray(value)) {
|
|
71
68
|
const [dtype, metadata] = value;
|
|
72
69
|
arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(dtype), true));
|
|
73
70
|
parseEmbeddingFunctions(embeddingFunctions, key, metadata);
|
|
74
71
|
}
|
|
72
|
+
else {
|
|
73
|
+
arrowFields.push(new arrow_1.Field(key, (0, sanitize_1.sanitizeType)(value), true));
|
|
74
|
+
}
|
|
75
75
|
});
|
|
76
76
|
const registry = (0, registry_1.getRegistry)();
|
|
77
77
|
const metadata = registry.getTableMetadata(Array.from(embeddingFunctions.values()));
|
package/dist/index.d.ts
CHANGED
|
@@ -6,7 +6,7 @@ export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } fro
|
|
|
6
6
|
export { Connection, CreateTableOptions, TableNamesOptions, } from "./connection";
|
|
7
7
|
export { ExecutableQuery, Query, QueryBase, VectorQuery, RecordBatchIterator, } from "./query";
|
|
8
8
|
export { Index, IndexOptions, IvfPqOptions } from "./indices";
|
|
9
|
-
export { Table, AddDataOptions, UpdateOptions } from "./table";
|
|
9
|
+
export { Table, AddDataOptions, UpdateOptions, OptimizeOptions } from "./table";
|
|
10
10
|
export * as embedding from "./embedding";
|
|
11
11
|
/**
|
|
12
12
|
* Connect to a LanceDB instance at the given URI.
|
package/dist/sanitize.js
CHANGED
|
@@ -215,8 +215,10 @@ function sanitizeType(typeLike) {
|
|
|
215
215
|
if (typeof typeLike !== "object" || typeLike === null) {
|
|
216
216
|
throw Error("Expected a Type but object was null/undefined");
|
|
217
217
|
}
|
|
218
|
-
if (!("typeId" in typeLike) ||
|
|
219
|
-
|
|
218
|
+
if (!("typeId" in typeLike) ||
|
|
219
|
+
!(typeof typeLike.typeId !== "function" ||
|
|
220
|
+
typeof typeLike.typeId !== "number")) {
|
|
221
|
+
throw Error("Expected a Type to have a typeId property");
|
|
220
222
|
}
|
|
221
223
|
let typeId;
|
|
222
224
|
if (typeof typeLike.typeId === "function") {
|
package/package.json
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"vector database",
|
|
11
11
|
"ann"
|
|
12
12
|
],
|
|
13
|
-
"version": "0.
|
|
13
|
+
"version": "0.9.0",
|
|
14
14
|
"main": "dist/index.js",
|
|
15
15
|
"exports": {
|
|
16
16
|
".": "./dist/index.js",
|
|
@@ -81,6 +81,7 @@
|
|
|
81
81
|
"docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
|
|
82
82
|
"lint": "biome check . && biome format .",
|
|
83
83
|
"lint-fix": "biome check --write . && biome format --write .",
|
|
84
|
+
"prepublishOnly": "napi prepublish -t npm",
|
|
84
85
|
"test": "jest --verbose",
|
|
85
86
|
"integration": "S3_TEST=1 npm run test",
|
|
86
87
|
"universal": "napi universal",
|
|
@@ -91,11 +92,11 @@
|
|
|
91
92
|
"reflect-metadata": "^0.2.2"
|
|
92
93
|
},
|
|
93
94
|
"optionalDependencies": {
|
|
94
|
-
"@lancedb/lancedb-darwin-arm64": "0.
|
|
95
|
-
"@lancedb/lancedb-linux-arm64-gnu": "0.
|
|
96
|
-
"@lancedb/lancedb-darwin-x64": "0.
|
|
97
|
-
"@lancedb/lancedb-linux-x64-gnu": "0.
|
|
98
|
-
"@lancedb/lancedb-win32-x64-msvc": "0.
|
|
95
|
+
"@lancedb/lancedb-darwin-arm64": "0.9.0",
|
|
96
|
+
"@lancedb/lancedb-linux-arm64-gnu": "0.9.0",
|
|
97
|
+
"@lancedb/lancedb-darwin-x64": "0.9.0",
|
|
98
|
+
"@lancedb/lancedb-linux-x64-gnu": "0.9.0",
|
|
99
|
+
"@lancedb/lancedb-win32-x64-msvc": "0.9.0"
|
|
99
100
|
},
|
|
100
101
|
"peerDependencies": {
|
|
101
102
|
"apache-arrow": ">=13.0.0 <=17.0.0"
|