@lancedb/lancedb 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/dist/arrow.d.ts +5 -3
  2. package/dist/arrow.js +1 -1
  3. package/dist/embedding/embedding_function.d.ts +4 -3
  4. package/dist/embedding/index.d.ts +1 -0
  5. package/dist/embedding/index.js +1 -0
  6. package/dist/embedding/registry.d.ts +9 -7
  7. package/dist/embedding/registry.js +24 -6
  8. package/dist/embedding/transformers.d.ts +37 -0
  9. package/dist/embedding/transformers.js +147 -0
  10. package/dist/query.js +15 -9
  11. package/dist/remote/client.d.ts +1 -1
  12. package/dist/remote/client.js +6 -8
  13. package/dist/remote/connection.d.ts +2 -3
  14. package/dist/remote/connection.js +2 -2
  15. package/dist/table.d.ts +3 -0
  16. package/dist/table.js +1 -1
  17. package/package.json +17 -14
  18. package/Cargo.toml +0 -28
  19. package/biome.json +0 -158
  20. package/build.rs +0 -5
  21. package/dist/native.d.ts +0 -208
  22. package/examples/ann_indexes.ts +0 -49
  23. package/examples/basic.ts +0 -149
  24. package/examples/embedding.ts +0 -83
  25. package/examples/filtering.ts +0 -34
  26. package/examples/jsconfig.json +0 -27
  27. package/examples/package-lock.json +0 -79
  28. package/examples/package.json +0 -18
  29. package/examples/search.ts +0 -37
  30. package/jest.config.js +0 -7
  31. package/lancedb/arrow.ts +0 -947
  32. package/lancedb/connection.ts +0 -333
  33. package/lancedb/embedding/embedding_function.ts +0 -194
  34. package/lancedb/embedding/index.ts +0 -113
  35. package/lancedb/embedding/openai.ts +0 -113
  36. package/lancedb/embedding/registry.ts +0 -188
  37. package/lancedb/index.ts +0 -142
  38. package/lancedb/indices.ts +0 -203
  39. package/lancedb/merge.ts +0 -70
  40. package/lancedb/query.ts +0 -507
  41. package/lancedb/remote/client.ts +0 -221
  42. package/lancedb/remote/connection.ts +0 -201
  43. package/lancedb/remote/index.ts +0 -3
  44. package/lancedb/remote/table.ts +0 -226
  45. package/lancedb/sanitize.ts +0 -588
  46. package/lancedb/table.ts +0 -669
  47. package/lancedb/util.ts +0 -69
  48. package/native.d.ts +0 -208
  49. package/nodejs-artifacts/arrow.d.ts +0 -250
  50. package/nodejs-artifacts/arrow.js +0 -768
  51. package/nodejs-artifacts/connection.d.ts +0 -171
  52. package/nodejs-artifacts/connection.js +0 -135
  53. package/nodejs-artifacts/embedding/embedding_function.d.ts +0 -79
  54. package/nodejs-artifacts/embedding/embedding_function.js +0 -112
  55. package/nodejs-artifacts/embedding/index.d.ts +0 -28
  56. package/nodejs-artifacts/embedding/index.js +0 -114
  57. package/nodejs-artifacts/embedding/openai.d.ts +0 -18
  58. package/nodejs-artifacts/embedding/openai.js +0 -105
  59. package/nodejs-artifacts/embedding/registry.d.ts +0 -53
  60. package/nodejs-artifacts/embedding/registry.js +0 -127
  61. package/nodejs-artifacts/index.d.ts +0 -55
  62. package/nodejs-artifacts/index.js +0 -57
  63. package/nodejs-artifacts/indices.d.ts +0 -165
  64. package/nodejs-artifacts/indices.js +0 -71
  65. package/nodejs-artifacts/merge.d.ts +0 -54
  66. package/nodejs-artifacts/merge.js +0 -64
  67. package/nodejs-artifacts/native.d.ts +0 -208
  68. package/nodejs-artifacts/native.js +0 -330
  69. package/nodejs-artifacts/query.d.ts +0 -283
  70. package/nodejs-artifacts/query.js +0 -448
  71. package/nodejs-artifacts/remote/client.d.ts +0 -28
  72. package/nodejs-artifacts/remote/client.js +0 -172
  73. package/nodejs-artifacts/remote/connection.d.ts +0 -25
  74. package/nodejs-artifacts/remote/connection.js +0 -110
  75. package/nodejs-artifacts/remote/index.d.ts +0 -3
  76. package/nodejs-artifacts/remote/index.js +0 -9
  77. package/nodejs-artifacts/remote/table.d.ts +0 -42
  78. package/nodejs-artifacts/remote/table.js +0 -179
  79. package/nodejs-artifacts/sanitize.d.ts +0 -31
  80. package/nodejs-artifacts/sanitize.js +0 -436
  81. package/nodejs-artifacts/table.d.ts +0 -395
  82. package/nodejs-artifacts/table.js +0 -230
  83. package/nodejs-artifacts/util.d.ts +0 -14
  84. package/nodejs-artifacts/util.js +0 -65
  85. package/tsconfig.json +0 -25
  86. package/typedoc.json +0 -10
package/dist/arrow.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /// <reference types="node" />
2
- import { Table as ArrowTable, Binary, BufferType, DataType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
2
+ import { Table as ArrowTable, Binary, BufferType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
3
3
  import { Buffers } from "apache-arrow/data";
4
4
  import { type EmbeddingFunction } from "./embedding/embedding_function";
5
5
  import { EmbeddingFunctionConfig } from "./embedding/registry";
@@ -37,8 +37,10 @@ export type TableLike = ArrowTable | {
37
37
  batches: RecordBatchLike[];
38
38
  };
39
39
  export type IntoVector = Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
40
+ export type FloatLike = import("apache-arrow-13").Float | import("apache-arrow-14").Float | import("apache-arrow-15").Float | import("apache-arrow-16").Float | import("apache-arrow-17").Float;
41
+ export type DataTypeLike = import("apache-arrow-13").DataType | import("apache-arrow-14").DataType | import("apache-arrow-15").DataType | import("apache-arrow-16").DataType | import("apache-arrow-17").DataType;
40
42
  export declare function isArrowTable(value: object): value is TableLike;
41
- export declare function isDataType(value: unknown): value is DataType;
43
+ export declare function isDataType(value: unknown): value is DataTypeLike;
42
44
  export declare function isNull(value: unknown): value is Null;
43
45
  export declare function isInt(value: unknown): value is Int;
44
46
  export declare function isFloat(value: unknown): value is Float;
@@ -200,7 +202,7 @@ export declare function makeEmptyTable(schema: SchemaLike, metadata?: Map<string
200
202
  */
201
203
  export declare function convertToTable(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, makeTableOptions?: Partial<MakeArrowTableOptions>): Promise<ArrowTable>;
202
204
  /** Creates the Arrow Type for a Vector column with dimension `dim` */
203
- export declare function newVectorType<T extends Float>(dim: number, innerType: T): FixedSizeList<T>;
205
+ export declare function newVectorType<T extends Float>(dim: number, innerType: unknown): FixedSizeList<T>;
204
206
  /**
205
207
  * Serialize an Array of records into a buffer using the Arrow IPC File serialization
206
208
  *
package/dist/arrow.js CHANGED
@@ -466,7 +466,7 @@ function makeVector(values, type, stringAsDictionary) {
466
466
  /** Helper function to apply embeddings from metadata to an input table */
467
467
  async function applyEmbeddingsFromMetadata(table, schema) {
468
468
  const registry = (0, registry_1.getRegistry)();
469
- const functions = registry.parseFunctions(schema.metadata);
469
+ const functions = await registry.parseFunctions(schema.metadata);
470
470
  const columns = Object.fromEntries(table.schema.fields.map((field) => [
471
471
  field.name,
472
472
  table.getChild(field.name),
@@ -1,5 +1,5 @@
1
1
  import "reflect-metadata";
2
- import { DataType, Float, type IntoVector } from "../arrow";
2
+ import { DataType, DataTypeLike, FloatLike, type IntoVector } from "../arrow";
3
3
  /**
4
4
  * Options for a given embedding function
5
5
  */
@@ -44,6 +44,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
44
44
  * ```
45
45
  */
46
46
  abstract toJSON(): Partial<M>;
47
+ init?(): Promise<void>;
47
48
  /**
48
49
  * sourceField is used in combination with `LanceSchema` to provide a declarative data model
49
50
  *
@@ -51,7 +52,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
51
52
  *
52
53
  * @see {@link lancedb.LanceSchema}
53
54
  */
54
- sourceField(optionsOrDatatype: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
55
+ sourceField(optionsOrDatatype: Partial<FieldOptions> | DataTypeLike): [DataTypeLike, Map<string, EmbeddingFunction>];
55
56
  /**
56
57
  * vectorField is used in combination with `LanceSchema` to provide a declarative data model
57
58
  *
@@ -63,7 +64,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
63
64
  /** The number of dimensions of the embeddings */
64
65
  ndims(): number | undefined;
65
66
  /** The datatype of the embeddings */
66
- abstract embeddingDataType(): Float;
67
+ abstract embeddingDataType(): FloatLike;
67
68
  /**
68
69
  * Creates a vector representation for the given values.
69
70
  */
@@ -2,6 +2,7 @@ import { Schema } from "../arrow";
2
2
  import { EmbeddingFunction } from "./embedding_function";
3
3
  export { EmbeddingFunction } from "./embedding_function";
4
4
  export * from "./openai";
5
+ export * from "./transformers";
5
6
  export * from "./registry";
6
7
  /**
7
8
  * Create a schema with embedding functions.
@@ -36,6 +36,7 @@ var embedding_function_1 = require("./embedding_function");
36
36
  Object.defineProperty(exports, "EmbeddingFunction", { enumerable: true, get: function () { return embedding_function_1.EmbeddingFunction; } });
37
37
  // We need to explicitly export '*' so that the `register` decorator actually registers the class.
38
38
  __exportStar(require("./openai"), exports);
39
+ __exportStar(require("./transformers"), exports);
39
40
  __exportStar(require("./registry"), exports);
40
41
  /**
41
42
  * Create a schema with embedding functions.
@@ -1,8 +1,12 @@
1
1
  import { type EmbeddingFunction, type EmbeddingFunctionConstructor } from "./embedding_function";
2
2
  import "reflect-metadata";
3
3
  import { OpenAIEmbeddingFunction } from "./openai";
4
+ import { TransformersEmbeddingFunction } from "./transformers";
5
+ type CreateReturnType<T> = T extends {
6
+ init: () => Promise<void>;
7
+ } ? Promise<T> : T;
4
8
  interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
5
- create(options?: T["TOptions"]): T;
9
+ create(options?: T["TOptions"]): CreateReturnType<T>;
6
10
  }
7
11
  /**
8
12
  * This is a singleton class used to register embedding functions
@@ -19,11 +23,9 @@ export declare class EmbeddingFunctionRegistry {
19
23
  * @throws Error if the function is already registered
20
24
  */
21
25
  register<T extends EmbeddingFunctionConstructor = EmbeddingFunctionConstructor>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
22
- /**
23
- * Fetch an embedding function by name
24
- * @param name The name of the function
25
- */
26
- get<T extends EmbeddingFunction<unknown>, Name extends string = "">(name: Name extends "openai" ? "openai" : string): Name extends "openai" ? EmbeddingFunctionCreate<OpenAIEmbeddingFunction> : EmbeddingFunctionCreate<T> | undefined;
26
+ get(name: "openai"): EmbeddingFunctionCreate<OpenAIEmbeddingFunction>;
27
+ get(name: "huggingface"): EmbeddingFunctionCreate<TransformersEmbeddingFunction>;
28
+ get<T extends EmbeddingFunction<unknown>>(name: string): EmbeddingFunctionCreate<T> | undefined;
27
29
  /**
28
30
  * reset the registry to the initial state
29
31
  */
@@ -31,7 +33,7 @@ export declare class EmbeddingFunctionRegistry {
31
33
  /**
32
34
  * @ignore
33
35
  */
34
- parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Map<string, EmbeddingFunctionConfig>;
36
+ parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Promise<Map<string, EmbeddingFunctionConfig>>;
35
37
  functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any>;
36
38
  getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string>;
37
39
  }
@@ -50,12 +50,28 @@ class EmbeddingFunctionRegistry {
50
50
  get(name) {
51
51
  const factory = this.#functions.get(name);
52
52
  if (!factory) {
53
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
53
54
  return undefined;
54
55
  }
56
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
57
+ let create;
58
+ if (factory.prototype.init) {
59
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
60
+ create = async function (options) {
61
+ const instance = new factory(options);
62
+ await instance.init();
63
+ return instance;
64
+ };
65
+ }
66
+ else {
67
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
68
+ create = function (options) {
69
+ const instance = new factory(options);
70
+ return instance;
71
+ };
72
+ }
55
73
  return {
56
- create: function (options) {
57
- return new factory(options);
58
- },
74
+ create,
59
75
  };
60
76
  }
61
77
  /**
@@ -67,26 +83,28 @@ class EmbeddingFunctionRegistry {
67
83
  /**
68
84
  * @ignore
69
85
  */
70
- parseFunctions(metadata) {
86
+ async parseFunctions(metadata) {
71
87
  if (!metadata.has("embedding_functions")) {
72
88
  return new Map();
73
89
  }
74
90
  else {
75
91
  const functions = (JSON.parse(metadata.get("embedding_functions")));
76
- return new Map(functions.map((f) => {
92
+ const items = await Promise.all(functions.map(async (f) => {
77
93
  const fn = this.get(f.name);
78
94
  if (!fn) {
79
95
  throw new Error(`Function "${f.name}" not found in registry`);
80
96
  }
97
+ const func = await this.get(f.name).create(f.model);
81
98
  return [
82
99
  f.name,
83
100
  {
84
101
  sourceColumn: f.sourceColumn,
85
102
  vectorColumn: f.vectorColumn,
86
- function: this.get(f.name).create(f.model),
103
+ function: func,
87
104
  },
88
105
  ];
89
106
  }));
107
+ return new Map(items);
90
108
  }
91
109
  }
92
110
  // biome-ignore lint/suspicious/noExplicitAny: <explanation>
@@ -0,0 +1,37 @@
1
+ import { Float } from "../arrow";
2
+ import { EmbeddingFunction } from "./embedding_function";
3
+ export type XenovaTransformerOptions = {
4
+ /** The wasm compatible model to use */
5
+ model: string;
6
+ /**
7
+ * The wasm compatible tokenizer to use
8
+ * If not provided, it will use the default tokenizer for the model
9
+ */
10
+ tokenizer?: string;
11
+ /**
12
+ * The number of dimensions of the embeddings
13
+ *
14
+ * We will attempt to infer this from the model config if not provided.
15
+ * Since there isn't a standard way to get this information from the model,
16
+ * you may need to manually specify this if using a model that doesn't have a 'hidden_size' in the config.
17
+ * */
18
+ ndims?: number;
19
+ /** Options for the tokenizer */
20
+ tokenizerOptions?: {
21
+ textPair?: string | string[];
22
+ padding?: boolean | "max_length";
23
+ addSpecialTokens?: boolean;
24
+ truncation?: boolean;
25
+ maxLength?: number;
26
+ };
27
+ };
28
+ export declare class TransformersEmbeddingFunction extends EmbeddingFunction<string, Partial<XenovaTransformerOptions>> {
29
+ #private;
30
+ constructor(options?: Partial<XenovaTransformerOptions>);
31
+ toJSON(): Record<string, any>;
32
+ init(): Promise<void>;
33
+ ndims(): number;
34
+ embeddingDataType(): Float;
35
+ computeSourceEmbeddings(data: string[]): Promise<number[][]>;
36
+ computeQueryEmbeddings(data: string): Promise<number[]>;
37
+ }
@@ -0,0 +1,147 @@
1
+ "use strict";
2
+ // Copyright 2023 Lance Developers.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
16
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
17
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
18
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
19
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
20
+ };
21
+ var __metadata = (this && this.__metadata) || function (k, v) {
22
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
23
+ };
24
+ Object.defineProperty(exports, "__esModule", { value: true });
25
+ exports.TransformersEmbeddingFunction = void 0;
26
+ const arrow_1 = require("../arrow");
27
+ const embedding_function_1 = require("./embedding_function");
28
+ const registry_1 = require("./registry");
29
+ let TransformersEmbeddingFunction = class TransformersEmbeddingFunction extends embedding_function_1.EmbeddingFunction {
30
+ #model;
31
+ #tokenizer;
32
+ #modelName;
33
+ #initialized = false;
34
+ #tokenizerOptions;
35
+ #ndims;
36
+ constructor(options = {
37
+ model: "Xenova/all-MiniLM-L6-v2",
38
+ }) {
39
+ super();
40
+ const modelName = options?.model ?? "Xenova/all-MiniLM-L6-v2";
41
+ this.#tokenizerOptions = {
42
+ padding: true,
43
+ ...options.tokenizerOptions,
44
+ };
45
+ this.#ndims = options.ndims;
46
+ this.#modelName = modelName;
47
+ }
48
+ toJSON() {
49
+ // biome-ignore lint/suspicious/noExplicitAny: <explanation>
50
+ const obj = {
51
+ model: this.#modelName,
52
+ };
53
+ if (this.#ndims) {
54
+ obj["ndims"] = this.#ndims;
55
+ }
56
+ if (this.#tokenizerOptions) {
57
+ obj["tokenizerOptions"] = this.#tokenizerOptions;
58
+ }
59
+ if (this.#tokenizer) {
60
+ obj["tokenizer"] = this.#tokenizer.name;
61
+ }
62
+ return obj;
63
+ }
64
+ async init() {
65
+ let transformers;
66
+ try {
67
+ // SAFETY:
68
+ // since typescript transpiles `import` to `require`, we need to do this in an unsafe way
69
+ // We can't use `require` because `@xenova/transformers` is an ESM module
70
+ // and we can't use `import` directly because typescript will transpile it to `require`.
71
+ // and we want to remain compatible with both ESM and CJS modules
72
+ // so we use `eval` to bypass typescript for this specific import.
73
+ transformers = await eval('import("@xenova/transformers")');
74
+ }
75
+ catch (e) {
76
+ throw new Error(`error loading @xenova/transformers\nReason: ${e}`);
77
+ }
78
+ try {
79
+ this.#model = await transformers.AutoModel.from_pretrained(this.#modelName);
80
+ }
81
+ catch (e) {
82
+ throw new Error(`error loading model ${this.#modelName}. Make sure you are using a wasm compatible model.\nReason: ${e}`);
83
+ }
84
+ try {
85
+ this.#tokenizer = await transformers.AutoTokenizer.from_pretrained(this.#modelName);
86
+ }
87
+ catch (e) {
88
+ throw new Error(`error loading tokenizer for ${this.#modelName}. Make sure you are using a wasm compatible model:\nReason: ${e}`);
89
+ }
90
+ this.#initialized = true;
91
+ }
92
+ ndims() {
93
+ if (this.#ndims) {
94
+ return this.#ndims;
95
+ }
96
+ else {
97
+ const config = this.#model.config;
98
+ const ndims = config["hidden_size"];
99
+ if (!ndims) {
100
+ throw new Error("hidden_size not found in model config, you may need to manually specify the embedding dimensions. ");
101
+ }
102
+ return ndims;
103
+ }
104
+ }
105
+ embeddingDataType() {
106
+ return new arrow_1.Float32();
107
+ }
108
+ async computeSourceEmbeddings(data) {
109
+ // this should only happen if the user is trying to use the function directly.
110
+ // Anything going through the registry should already be initialized.
111
+ if (!this.#initialized) {
112
+ return Promise.reject(new Error("something went wrong: embedding function not initialized. Please call init()"));
113
+ }
114
+ const tokenizer = this.#tokenizer;
115
+ const model = this.#model;
116
+ const inputs = await tokenizer(data, this.#tokenizerOptions);
117
+ let tokens = await model.forward(inputs);
118
+ tokens = tokens[Object.keys(tokens)[0]];
119
+ const [nItems, nTokens] = tokens.dims;
120
+ tokens = tensorDiv(tokens.sum(1), nTokens);
121
+ // TODO: support other data types
122
+ const tokenData = tokens.data;
123
+ const stride = this.ndims();
124
+ const embeddings = [];
125
+ for (let i = 0; i < nItems; i++) {
126
+ const start = i * stride;
127
+ const end = start + stride;
128
+ const slice = tokenData.slice(start, end);
129
+ embeddings.push(Array.from(slice)); // TODO: Avoid copy here
130
+ }
131
+ return embeddings;
132
+ }
133
+ async computeQueryEmbeddings(data) {
134
+ return (await this.computeSourceEmbeddings([data]))[0];
135
+ }
136
+ };
137
+ exports.TransformersEmbeddingFunction = TransformersEmbeddingFunction;
138
+ exports.TransformersEmbeddingFunction = TransformersEmbeddingFunction = __decorate([
139
+ (0, registry_1.register)("huggingface"),
140
+ __metadata("design:paramtypes", [Object])
141
+ ], TransformersEmbeddingFunction);
142
+ const tensorDiv = (src, divBy) => {
143
+ for (let i = 0; i < src.data.length; ++i) {
144
+ src.data[i] /= divBy;
145
+ }
146
+ return src;
147
+ };
package/dist/query.js CHANGED
@@ -129,22 +129,28 @@ class QueryBase {
129
129
  * object insertion order is easy to get wrong and `Map` is more foolproof.
130
130
  */
131
131
  select(columns) {
132
- let columnTuples;
132
+ const selectColumns = (columnArray) => {
133
+ this.doCall((inner) => {
134
+ inner.selectColumns(columnArray);
135
+ });
136
+ };
137
+ const selectMapping = (columnTuples) => {
138
+ this.doCall((inner) => {
139
+ inner.select(columnTuples);
140
+ });
141
+ };
133
142
  if (typeof columns === "string") {
134
- columns = [columns];
143
+ selectColumns([columns]);
135
144
  }
136
- if (Array.isArray(columns)) {
137
- columnTuples = columns.map((c) => [c, c]);
145
+ else if (Array.isArray(columns)) {
146
+ selectColumns(columns);
138
147
  }
139
148
  else if (columns instanceof Map) {
140
- columnTuples = Array.from(columns.entries());
149
+ selectMapping(Array.from(columns.entries()));
141
150
  }
142
151
  else {
143
- columnTuples = Object.entries(columns);
152
+ selectMapping(Object.entries(columns));
144
153
  }
145
- this.doCall((inner) => {
146
- inner.select(columnTuples);
147
- });
148
154
  return this;
149
155
  }
150
156
  /**
@@ -4,7 +4,7 @@ import { Table as ArrowTable } from "../arrow";
4
4
  import { VectorQuery } from "../query";
5
5
  export declare class RestfulLanceDBClient {
6
6
  #private;
7
- constructor(dbName: string, apiKey: string, region: string, hostOverride?: string, connectionTimeout?: number, readTimeout?: number);
7
+ constructor(dbName: string, apiKey: string, region: string, hostOverride?: string, timeout?: number);
8
8
  get session(): import("axios").AxiosInstance;
9
9
  get url(): string;
10
10
  get headers(): {
@@ -22,16 +22,14 @@ class RestfulLanceDBClient {
22
22
  #apiKey;
23
23
  #hostOverride;
24
24
  #closed = false;
25
- #connectionTimeout = 12 * 1000; // 12 seconds;
26
- #readTimeout = 30 * 1000; // 30 seconds;
25
+ #timeout = 12 * 1000; // 12 seconds;
27
26
  #session;
28
- constructor(dbName, apiKey, region, hostOverride, connectionTimeout, readTimeout) {
27
+ constructor(dbName, apiKey, region, hostOverride, timeout) {
29
28
  this.#dbName = dbName;
30
29
  this.#apiKey = apiKey;
31
30
  this.#region = region;
32
31
  this.#hostOverride = hostOverride ?? this.#hostOverride;
33
- this.#connectionTimeout = connectionTimeout ?? this.#connectionTimeout;
34
- this.#readTimeout = readTimeout ?? this.#readTimeout;
32
+ this.#timeout = timeout ?? this.#timeout;
35
33
  }
36
34
  // todo: cache the session.
37
35
  get session() {
@@ -46,7 +44,7 @@ class RestfulLanceDBClient {
46
44
  Authorization: `Bearer ${this.#apiKey}`,
47
45
  },
48
46
  transformResponse: decodeErrorData,
49
- timeout: this.#connectionTimeout,
47
+ timeout: this.#timeout,
50
48
  });
51
49
  }
52
50
  }
@@ -91,7 +89,7 @@ class RestfulLanceDBClient {
91
89
  });
92
90
  }
93
91
  catch (e) {
94
- if (e instanceof axios_1.AxiosError) {
92
+ if (e instanceof axios_1.AxiosError && e.response) {
95
93
  response = e.response;
96
94
  }
97
95
  else {
@@ -120,7 +118,7 @@ class RestfulLanceDBClient {
120
118
  });
121
119
  }
122
120
  catch (e) {
123
- if (e instanceof axios_1.AxiosError) {
121
+ if (e instanceof axios_1.AxiosError && e.response) {
124
122
  response = e.response;
125
123
  }
126
124
  else {
@@ -5,12 +5,11 @@ export interface RemoteConnectionOptions {
5
5
  apiKey?: string;
6
6
  region?: string;
7
7
  hostOverride?: string;
8
- connectionTimeout?: number;
9
- readTimeout?: number;
8
+ timeout?: number;
10
9
  }
11
10
  export declare class RemoteConnection extends Connection {
12
11
  #private;
13
- constructor(url: string, { apiKey, region, hostOverride, connectionTimeout, readTimeout, }: RemoteConnectionOptions);
12
+ constructor(url: string, { apiKey, region, hostOverride, timeout }: RemoteConnectionOptions);
14
13
  isOpen(): boolean;
15
14
  close(): void;
16
15
  display(): string;
@@ -13,7 +13,7 @@ class RemoteConnection extends connection_1.Connection {
13
13
  #region;
14
14
  #client;
15
15
  #tableCache = new util_1.TTLCache(300000);
16
- constructor(url, { apiKey, region, hostOverride, connectionTimeout, readTimeout, }) {
16
+ constructor(url, { apiKey, region, hostOverride, timeout }) {
17
17
  super();
18
18
  apiKey = apiKey ?? process.env.LANCEDB_API_KEY;
19
19
  region = region ?? process.env.LANCEDB_REGION;
@@ -30,7 +30,7 @@ class RemoteConnection extends connection_1.Connection {
30
30
  this.#dbName = parsed.hostname;
31
31
  this.#apiKey = apiKey;
32
32
  this.#region = region;
33
- this.#client = new client_1.RestfulLanceDBClient(this.#dbName, this.#apiKey, this.#region, hostOverride, connectionTimeout, readTimeout);
33
+ this.#client = new client_1.RestfulLanceDBClient(this.#dbName, this.#apiKey, this.#region, hostOverride, timeout);
34
34
  }
35
35
  isOpen() {
36
36
  return this.#client.isOpen();
package/dist/table.d.ts CHANGED
@@ -221,12 +221,15 @@ export declare abstract class Table {
221
221
  * of the given query vector
222
222
  * @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
223
223
  * @note If no embedding functions are defined in the table, this will error when collecting the results.
224
+ *
225
+ * This is just a convenience method for calling `.query().nearestTo(await myEmbeddingFunction(query))`
224
226
  */
225
227
  abstract search(query: string): VectorQuery;
226
228
  /**
227
229
  * Create a search query to find the nearest neighbors
228
230
  * of the given query vector
229
231
  * @param {IntoVector} query - the query vector
232
+ * This is just a convenience method for calling `.query().nearestTo(query)`
230
233
  */
231
234
  abstract search(query: IntoVector): VectorQuery;
232
235
  /**
package/dist/table.js CHANGED
@@ -93,7 +93,7 @@ class LocalTable extends Table {
93
93
  const mode = options?.mode ?? "append";
94
94
  const schema = await this.schema();
95
95
  const registry = (0, registry_1.getRegistry)();
96
- const functions = registry.parseFunctions(schema.metadata);
96
+ const functions = await registry.parseFunctions(schema.metadata);
97
97
  const buffer = await (0, arrow_1.fromDataToBuffer)(data, functions.values().next().value, schema);
98
98
  await this.inner.add(buffer, mode);
99
99
  }
package/package.json CHANGED
@@ -10,7 +10,7 @@
10
10
  "vector database",
11
11
  "ann"
12
12
  ],
13
- "version": "0.7.1",
13
+ "version": "0.8.0",
14
14
  "main": "dist/index.js",
15
15
  "exports": {
16
16
  ".": "./dist/index.js",
@@ -32,25 +32,29 @@
32
32
  },
33
33
  "license": "Apache 2.0",
34
34
  "devDependencies": {
35
+ "@aws-sdk/client-dynamodb": "^3.33.0",
35
36
  "@aws-sdk/client-kms": "^3.33.0",
36
37
  "@aws-sdk/client-s3": "^3.33.0",
37
- "@aws-sdk/client-dynamodb": "^3.33.0",
38
38
  "@biomejs/biome": "^1.7.3",
39
39
  "@jest/globals": "^29.7.0",
40
40
  "@napi-rs/cli": "^2.18.3",
41
+ "@types/axios": "^0.14.0",
41
42
  "@types/jest": "^29.1.2",
42
43
  "@types/tmp": "^0.2.6",
43
- "apache-arrow-old": "npm:apache-arrow@13.0.0",
44
+ "apache-arrow-13": "npm:apache-arrow@13.0.0",
45
+ "apache-arrow-14": "npm:apache-arrow@14.0.0",
46
+ "apache-arrow-15": "npm:apache-arrow@15.0.0",
47
+ "apache-arrow-16": "npm:apache-arrow@16.0.0",
48
+ "apache-arrow-17": "npm:apache-arrow@17.0.0",
44
49
  "eslint": "^8.57.0",
45
50
  "jest": "^29.7.0",
46
51
  "shx": "^0.3.4",
47
52
  "tmp": "^0.2.3",
48
53
  "ts-jest": "^29.1.2",
49
- "typedoc": "^0.25.7",
50
- "typedoc-plugin-markdown": "^3.17.1",
54
+ "typedoc": "^0.26.4",
55
+ "typedoc-plugin-markdown": "^4.2.1",
51
56
  "typescript": "^5.3.3",
52
- "typescript-eslint": "^7.1.0",
53
- "@types/axios": "^0.14.0"
57
+ "typescript-eslint": "^7.1.0"
54
58
  },
55
59
  "ava": {
56
60
  "timeout": "3m"
@@ -77,7 +81,6 @@
77
81
  "docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
78
82
  "lint": "biome check . && biome format .",
79
83
  "lint-fix": "biome check --write . && biome format --write .",
80
- "prepublishOnly": "napi prepublish -t npm",
81
84
  "test": "jest --verbose",
82
85
  "integration": "S3_TEST=1 npm run test",
83
86
  "universal": "napi universal",
@@ -88,13 +91,13 @@
88
91
  "reflect-metadata": "^0.2.2"
89
92
  },
90
93
  "optionalDependencies": {
91
- "@lancedb/lancedb-darwin-arm64": "0.7.1",
92
- "@lancedb/lancedb-linux-arm64-gnu": "0.7.1",
93
- "@lancedb/lancedb-darwin-x64": "0.7.1",
94
- "@lancedb/lancedb-linux-x64-gnu": "0.7.1",
95
- "@lancedb/lancedb-win32-x64-msvc": "0.7.1"
94
+ "@lancedb/lancedb-darwin-arm64": "0.8.0",
95
+ "@lancedb/lancedb-linux-arm64-gnu": "0.8.0",
96
+ "@lancedb/lancedb-darwin-x64": "0.8.0",
97
+ "@lancedb/lancedb-linux-x64-gnu": "0.8.0",
98
+ "@lancedb/lancedb-win32-x64-msvc": "0.8.0"
96
99
  },
97
100
  "peerDependencies": {
98
- "apache-arrow": "^15.0.0"
101
+ "apache-arrow": ">=13.0.0 <=17.0.0"
99
102
  }
100
103
  }
package/Cargo.toml DELETED
@@ -1,28 +0,0 @@
1
- [package]
2
- name = "lancedb-nodejs"
3
- edition.workspace = true
4
- version = "0.0.0"
5
- license.workspace = true
6
- description.workspace = true
7
- repository.workspace = true
8
- keywords.workspace = true
9
- categories.workspace = true
10
-
11
- [lib]
12
- crate-type = ["cdylib"]
13
-
14
- [dependencies]
15
- arrow-ipc.workspace = true
16
- futures.workspace = true
17
- lancedb = { path = "../rust/lancedb" }
18
- napi = { version = "2.16.8", default-features = false, features = [
19
- "napi9",
20
- "async",
21
- ] }
22
- napi-derive = "2.16.4"
23
-
24
- # Prevent dynamic linking of lzma, which comes from datafusion
25
- lzma-sys = { version = "*", features = ["static"] }
26
-
27
- [build-dependencies]
28
- napi-build = "2.1"