@lancedb/lancedb 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lancedb/query.ts CHANGED
@@ -12,7 +12,12 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
- import { Table as ArrowTable, RecordBatch, tableFromIPC } from "./arrow";
15
+ import {
16
+ Table as ArrowTable,
17
+ type IntoVector,
18
+ RecordBatch,
19
+ tableFromIPC,
20
+ } from "./arrow";
16
21
  import { type IvfPqOptions } from "./indices";
17
22
  import {
18
23
  RecordBatchIterator as NativeBatchIterator,
@@ -50,6 +55,39 @@ export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
50
55
  }
51
56
  /* eslint-enable */
52
57
 
58
+ class RecordBatchIterable<
59
+ NativeQueryType extends NativeQuery | NativeVectorQuery,
60
+ > implements AsyncIterable<RecordBatch>
61
+ {
62
+ private inner: NativeQueryType;
63
+ private options?: QueryExecutionOptions;
64
+
65
+ constructor(inner: NativeQueryType, options?: QueryExecutionOptions) {
66
+ this.inner = inner;
67
+ this.options = options;
68
+ }
69
+
70
+ // biome-ignore lint/suspicious/noExplicitAny: skip
71
+ [Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
72
+ return new RecordBatchIterator(
73
+ this.inner.execute(this.options?.maxBatchLength),
74
+ );
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Options that control the behavior of a particular query execution
80
+ */
81
+ export interface QueryExecutionOptions {
82
+ /**
83
+ * The maximum number of rows to return in a single batch
84
+ *
85
+ * Batches may have fewer rows if the underlying data is stored
86
+ * in smaller chunks.
87
+ */
88
+ maxBatchLength?: number;
89
+ }
90
+
53
91
  /** Common methods supported by all query types */
54
92
  export class QueryBase<
55
93
  NativeQueryType extends NativeQuery | NativeVectorQuery,
@@ -108,9 +146,12 @@ export class QueryBase<
108
146
  * object insertion order is easy to get wrong and `Map` is more foolproof.
109
147
  */
110
148
  select(
111
- columns: string[] | Map<string, string> | Record<string, string>,
149
+ columns: string[] | Map<string, string> | Record<string, string> | string,
112
150
  ): QueryType {
113
151
  let columnTuples: [string, string][];
152
+ if (typeof columns === "string") {
153
+ columns = [columns];
154
+ }
114
155
  if (Array.isArray(columns)) {
115
156
  columnTuples = columns.map((c) => [c, c]);
116
157
  } else if (columns instanceof Map) {
@@ -133,8 +174,10 @@ export class QueryBase<
133
174
  return this as unknown as QueryType;
134
175
  }
135
176
 
136
- protected nativeExecute(): Promise<NativeBatchIterator> {
137
- return this.inner.execute();
177
+ protected nativeExecute(
178
+ options?: Partial<QueryExecutionOptions>,
179
+ ): Promise<NativeBatchIterator> {
180
+ return this.inner.execute(options?.maxBatchLength);
138
181
  }
139
182
 
140
183
  /**
@@ -148,8 +191,10 @@ export class QueryBase<
148
191
  * single query)
149
192
  *
150
193
  */
151
- protected execute(): RecordBatchIterator {
152
- return new RecordBatchIterator(this.nativeExecute());
194
+ protected execute(
195
+ options?: Partial<QueryExecutionOptions>,
196
+ ): RecordBatchIterator {
197
+ return new RecordBatchIterator(this.nativeExecute(options));
153
198
  }
154
199
 
155
200
  // biome-ignore lint/suspicious/noExplicitAny: skip
@@ -159,19 +204,18 @@ export class QueryBase<
159
204
  }
160
205
 
161
206
  /** Collect the results as an Arrow @see {@link ArrowTable}. */
162
- async toArrow(): Promise<ArrowTable> {
207
+ async toArrow(options?: Partial<QueryExecutionOptions>): Promise<ArrowTable> {
163
208
  const batches = [];
164
- for await (const batch of this) {
209
+ for await (const batch of new RecordBatchIterable(this.inner, options)) {
165
210
  batches.push(batch);
166
211
  }
167
212
  return new ArrowTable(batches);
168
213
  }
169
214
 
170
215
  /** Collect the results as an array of objects. */
171
- async toArray(): Promise<unknown[]> {
172
- const tbl = await this.toArrow();
173
-
174
- // eslint-disable-next-line @typescript-eslint/no-unsafe-return
216
+ // biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
217
+ async toArray(options?: Partial<QueryExecutionOptions>): Promise<any[]> {
218
+ const tbl = await this.toArrow(options);
175
219
  return tbl.toArray();
176
220
  }
177
221
  }
@@ -370,9 +414,8 @@ export class Query extends QueryBase<NativeQuery, Query> {
370
414
  * Vector searches always have a `limit`. If `limit` has not been called then
371
415
  * a default `limit` of 10 will be used. @see {@link Query#limit}
372
416
  */
373
- nearestTo(vector: unknown): VectorQuery {
374
- // biome-ignore lint/suspicious/noExplicitAny: skip
375
- const vectorQuery = this.inner.nearestTo(Float32Array.from(vector as any));
417
+ nearestTo(vector: IntoVector): VectorQuery {
418
+ const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
376
419
  return new VectorQuery(vectorQuery);
377
420
  }
378
421
  }
package/lancedb/table.ts CHANGED
@@ -12,9 +12,16 @@
12
12
  // See the License for the specific language governing permissions and
13
13
  // limitations under the License.
14
14
 
15
- import { Data, Schema, fromDataToBuffer, tableFromIPC } from "./arrow";
15
+ import {
16
+ Table as ArrowTable,
17
+ Data,
18
+ IntoVector,
19
+ Schema,
20
+ fromDataToBuffer,
21
+ tableFromIPC,
22
+ } from "./arrow";
16
23
 
17
- import { getRegistry } from "./embedding/registry";
24
+ import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
18
25
  import { IndexOptions } from "./indices";
19
26
  import {
20
27
  AddColumnsSql,
@@ -24,8 +31,8 @@ import {
24
31
  Table as _NativeTable,
25
32
  } from "./native";
26
33
  import { Query, VectorQuery } from "./query";
27
-
28
34
  export { IndexConfig } from "./native";
35
+
29
36
  /**
30
37
  * Options for adding data to a table.
31
38
  */
@@ -110,6 +117,14 @@ export class Table {
110
117
  return this.inner.display();
111
118
  }
112
119
 
120
+ async #getEmbeddingFunctions(): Promise<
121
+ Map<string, EmbeddingFunctionConfig>
122
+ > {
123
+ const schema = await this.schema();
124
+ const registry = getRegistry();
125
+ return registry.parseFunctions(schema.metadata);
126
+ }
127
+
113
128
  /** Get the schema of the table. */
114
129
  async schema(): Promise<Schema> {
115
130
  const schemaBuf = await this.inner.schema();
@@ -130,6 +145,7 @@ export class Table {
130
145
  const buffer = await fromDataToBuffer(
131
146
  data,
132
147
  functions.values().next().value,
148
+ schema,
133
149
  );
134
150
  await this.inner.add(buffer, mode);
135
151
  }
@@ -270,6 +286,40 @@ export class Table {
270
286
  return new Query(this.inner);
271
287
  }
272
288
 
289
+ /**
290
+ * Create a search query to find the nearest neighbors
291
+ * of the given query vector
292
+ * @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
293
+ * @rejects {Error} If no embedding functions are defined in the table
294
+ */
295
+ search(query: string): Promise<VectorQuery>;
296
+ /**
297
+ * Create a search query to find the nearest neighbors
298
+ * of the given query vector
299
+ * @param {IntoVector} query - the query vector
300
+ */
301
+ search(query: IntoVector): VectorQuery;
302
+ search(query: string | IntoVector): Promise<VectorQuery> | VectorQuery {
303
+ if (typeof query !== "string") {
304
+ return this.vectorSearch(query);
305
+ } else {
306
+ return this.#getEmbeddingFunctions().then(async (functions) => {
307
+ // TODO: Support multiple embedding functions
308
+ const embeddingFunc: EmbeddingFunctionConfig | undefined = functions
309
+ .values()
310
+ .next().value;
311
+ if (!embeddingFunc) {
312
+ return Promise.reject(
313
+ new Error("No embedding functions are defined in the table"),
314
+ );
315
+ }
316
+ const embeddings =
317
+ await embeddingFunc.function.computeQueryEmbeddings(query);
318
+ return this.query().nearestTo(embeddings);
319
+ });
320
+ }
321
+ }
322
+
273
323
  /**
274
324
  * Search the table with a given query vector.
275
325
  *
@@ -277,7 +327,7 @@ export class Table {
277
327
  * is the same thing as calling `nearestTo` on the builder returned
278
328
  * by `query`. @see {@link Query#nearestTo} for more details.
279
329
  */
280
- vectorSearch(vector: unknown): VectorQuery {
330
+ vectorSearch(vector: IntoVector): VectorQuery {
281
331
  return this.query().nearestTo(vector);
282
332
  }
283
333
 
@@ -423,4 +473,9 @@ export class Table {
423
473
  async listIndices(): Promise<IndexConfig[]> {
424
474
  return await this.inner.listIndices();
425
475
  }
476
+
477
+ /** Return the table as an arrow table */
478
+ async toArrow(): Promise<ArrowTable> {
479
+ return await this.query().toArrow();
480
+ }
426
481
  }
@@ -3,6 +3,7 @@ import { Table as ArrowTable, Binary, DataType, FixedSizeBinary, FixedSizeList,
3
3
  import { type EmbeddingFunction } from "./embedding/embedding_function";
4
4
  import { EmbeddingFunctionConfig } from "./embedding/registry";
5
5
  export * from "apache-arrow";
6
+ export type IntoVector = Float32Array | Float64Array | number[];
6
7
  export declare function isArrowTable(value: object): value is ArrowTable;
7
8
  export declare function isDataType(value: unknown): value is DataType;
8
9
  export declare function isNull(value: unknown): value is Null;
@@ -36,6 +37,7 @@ export declare class MakeArrowTableOptions {
36
37
  schema?: Schema;
37
38
  vectorColumns: Record<string, VectorColumnOptions>;
38
39
  embeddings?: EmbeddingFunction<unknown>;
40
+ embeddingFunction?: EmbeddingFunctionConfig;
39
41
  /**
40
42
  * If true then string columns will be encoded with dictionary encoding
41
43
  *
@@ -184,6 +184,7 @@ class MakeArrowTableOptions {
184
184
  vector: new VectorColumnOptions(),
185
185
  };
186
186
  embeddings;
187
+ embeddingFunction;
187
188
  /**
188
189
  * If true then string columns will be encoded with dictionary encoding
189
190
  *
@@ -299,7 +300,7 @@ function makeArrowTable(data, options, metadata) {
299
300
  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
300
301
  if (opt.schema !== undefined && opt.schema !== null) {
301
302
  opt.schema = (0, sanitize_1.sanitizeSchema)(opt.schema);
302
- opt.schema = validateSchemaEmbeddings(opt.schema, data, opt.embeddings);
303
+ opt.schema = validateSchemaEmbeddings(opt.schema, data, options?.embeddingFunction);
303
304
  }
304
305
  const columns = {};
305
306
  // TODO: sample dataset to find missing columns
@@ -40,6 +40,12 @@ export interface CreateTableOptions {
40
40
  * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
41
41
  */
42
42
  storageOptions?: Record<string, string>;
43
+ /**
44
+ * If true then data files will be written with the legacy format
45
+ *
46
+ * The default is true while the new format is in beta
47
+ */
48
+ useLegacyFormat?: boolean;
43
49
  schema?: Schema;
44
50
  embeddingFunction?: EmbeddingFunctionConfig;
45
51
  }
@@ -116,7 +116,7 @@ class Connection {
116
116
  table = (0, arrow_1.makeArrowTable)(data, options);
117
117
  }
118
118
  const buf = await (0, arrow_1.fromTableToBuffer)(table, options?.embeddingFunction, options?.schema);
119
- const innerTable = await this.inner.createTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
119
+ const innerTable = await this.inner.createTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
120
120
  return new table_1.Table(innerTable);
121
121
  }
122
122
  /**
@@ -138,7 +138,7 @@ class Connection {
138
138
  }
139
139
  const table = (0, arrow_1.makeEmptyTable)(schema, metadata);
140
140
  const buf = await (0, arrow_1.fromTableToBuffer)(table);
141
- const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions));
141
+ const innerTable = await this.inner.createEmptyTable(name, buf, mode, cleanseStorageOptions(options?.storageOptions), options?.useLegacyFormat);
142
142
  return new table_1.Table(innerTable);
143
143
  }
144
144
  /**
@@ -1,5 +1,5 @@
1
1
  import "reflect-metadata";
2
- import { DataType, Float } from "../arrow";
2
+ import { DataType, Float, type IntoVector } from "../arrow";
3
3
  /**
4
4
  * Options for a given embedding function
5
5
  */
@@ -51,7 +51,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
51
51
  *
52
52
  * @see {@link lancedb.LanceSchema}
53
53
  */
54
- vectorField(options?: Partial<FieldOptions>): [DataType, Map<string, EmbeddingFunction>];
54
+ vectorField(optionsOrDatatype?: Partial<FieldOptions> | DataType): [DataType, Map<string, EmbeddingFunction>];
55
55
  /** The number of dimensions of the embeddings */
56
56
  ndims(): number | undefined;
57
57
  /** The datatype of the embeddings */
@@ -63,7 +63,7 @@ export declare abstract class EmbeddingFunction<T = any, M extends FunctionOptio
63
63
  /**
64
64
  Compute the embeddings for a single query
65
65
  */
66
- computeQueryEmbeddings(data: T): Promise<number[] | Float32Array | Float64Array>;
66
+ computeQueryEmbeddings(data: T): Promise<IntoVector>;
67
67
  }
68
68
  export interface FieldOptions<T extends DataType = DataType> {
69
69
  datatype: T;
@@ -47,32 +47,50 @@ class EmbeddingFunction {
47
47
  *
48
48
  * @see {@link lancedb.LanceSchema}
49
49
  */
50
- vectorField(options) {
50
+ vectorField(optionsOrDatatype) {
51
51
  let dtype;
52
- const dims = this.ndims() ?? options?.dims;
53
- if (!options?.datatype) {
54
- if (dims === undefined) {
55
- throw new Error("ndims is required for vector field");
56
- }
57
- dtype = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
52
+ let vectorType;
53
+ let dims = this.ndims();
54
+ // `func.vectorField(new Float32())`
55
+ if ((0, arrow_1.isDataType)(optionsOrDatatype)) {
56
+ dtype = optionsOrDatatype;
58
57
  }
59
58
  else {
60
- if ((0, arrow_1.isFixedSizeList)(options.datatype)) {
61
- dtype = options.datatype;
59
+ // `func.vectorField({
60
+ // datatype: new Float32(),
61
+ // dims: 10
62
+ // })`
63
+ dims = dims ?? optionsOrDatatype?.dims;
64
+ dtype = optionsOrDatatype?.datatype;
65
+ }
66
+ if (dtype !== undefined) {
67
+ // `func.vectorField(new FixedSizeList(dims, new Field("item", new Float32(), true)))`
68
+ // or `func.vectorField({datatype: new FixedSizeList(dims, new Field("item", new Float32(), true))})`
69
+ if ((0, arrow_1.isFixedSizeList)(dtype)) {
70
+ vectorType = dtype;
71
+ // `func.vectorField(new Float32())`
72
+ // or `func.vectorField({datatype: new Float32()})`
62
73
  }
63
- else if ((0, arrow_1.isFloat)(options.datatype)) {
74
+ else if ((0, arrow_1.isFloat)(dtype)) {
75
+ // No `ndims` impl and no `{dims: n}` provided;
64
76
  if (dims === undefined) {
65
77
  throw new Error("ndims is required for vector field");
66
78
  }
67
- dtype = (0, arrow_1.newVectorType)(dims, options.datatype);
79
+ vectorType = (0, arrow_1.newVectorType)(dims, dtype);
68
80
  }
69
81
  else {
70
82
  throw new Error("Expected FixedSizeList or Float as datatype for vector field");
71
83
  }
72
84
  }
85
+ else {
86
+ if (dims === undefined) {
87
+ throw new Error("ndims is required for vector field");
88
+ }
89
+ vectorType = new arrow_1.FixedSizeList(dims, new arrow_1.Field("item", new arrow_1.Float32(), true));
90
+ }
73
91
  const metadata = new Map();
74
92
  metadata.set("vector_column_for", this);
75
- return [dtype, metadata];
93
+ return [vectorType, metadata];
76
94
  }
77
95
  /** The number of dimensions of the embeddings */
78
96
  ndims() {
@@ -21,6 +21,7 @@ export declare class EmbeddingFunctionRegistry {
21
21
  * Register an embedding function
22
22
  * @param name The name of the function
23
23
  * @param func The function to register
24
+ * @throws Error if the function is already registered
24
25
  */
25
26
  register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(this: EmbeddingFunctionRegistry, alias?: string): (ctor: T) => any;
26
27
  /**
@@ -32,6 +33,9 @@ export declare class EmbeddingFunctionRegistry {
32
33
  * reset the registry to the initial state
33
34
  */
34
35
  reset(this: EmbeddingFunctionRegistry): void;
36
+ /**
37
+ * @ignore
38
+ */
35
39
  parseFunctions(this: EmbeddingFunctionRegistry, metadata: Map<string, string>): Map<string, EmbeddingFunctionConfig>;
36
40
  functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any>;
37
41
  getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string>;
@@ -27,6 +27,7 @@ class EmbeddingFunctionRegistry {
27
27
  * Register an embedding function
28
28
  * @param name The name of the function
29
29
  * @param func The function to register
30
+ * @throws Error if the function is already registered
30
31
  */
31
32
  register(alias) {
32
33
  const self = this;
@@ -63,6 +64,9 @@ class EmbeddingFunctionRegistry {
63
64
  reset() {
64
65
  this.#functions.clear();
65
66
  }
67
+ /**
68
+ * @ignore
69
+ */
66
70
  parseFunctions(metadata) {
67
71
  if (!metadata.has("embedding_functions")) {
68
72
  return new Map();
@@ -102,6 +102,7 @@ export const enum WriteMode {
102
102
  }
103
103
  /** Write options when creating a Table. */
104
104
  export interface WriteOptions {
105
+ /** Write mode for writing to a table. */
105
106
  mode?: WriteMode
106
107
  }
107
108
  export interface OpenTableOptions {
@@ -123,8 +124,8 @@ export class Connection {
123
124
  * - buf: The buffer containing the IPC file.
124
125
  *
125
126
  */
126
- createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
127
- createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
127
+ createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
128
+ createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
128
129
  openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
129
130
  /** Drop table with the name. Or raise an error if the table does not exist. */
130
131
  dropTable(name: string): Promise<void>
@@ -142,7 +143,7 @@ export class Query {
142
143
  select(columns: Array<[string, string]>): void
143
144
  limit(limit: number): void
144
145
  nearestTo(vector: Float32Array): VectorQuery
145
- execute(): Promise<RecordBatchIterator>
146
+ execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
146
147
  }
147
148
  export class VectorQuery {
148
149
  column(column: string): void
@@ -154,7 +155,7 @@ export class VectorQuery {
154
155
  onlyIf(predicate: string): void
155
156
  select(columns: Array<[string, string]>): void
156
157
  limit(limit: number): void
157
- execute(): Promise<RecordBatchIterator>
158
+ execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
158
159
  }
159
160
  export class Table {
160
161
  display(): string
@@ -1,4 +1,4 @@
1
- import { Table as ArrowTable, RecordBatch } from "./arrow";
1
+ import { Table as ArrowTable, type IntoVector, RecordBatch } from "./arrow";
2
2
  import { RecordBatchIterator as NativeBatchIterator, Query as NativeQuery, Table as NativeTable, VectorQuery as NativeVectorQuery } from "./native";
3
3
  export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
4
4
  private promisedInner?;
@@ -6,6 +6,18 @@ export declare class RecordBatchIterator implements AsyncIterator<RecordBatch> {
6
6
  constructor(promise?: Promise<NativeBatchIterator>);
7
7
  next(): Promise<IteratorResult<RecordBatch<any>>>;
8
8
  }
9
+ /**
10
+ * Options that control the behavior of a particular query execution
11
+ */
12
+ export interface QueryExecutionOptions {
13
+ /**
14
+ * The maximum number of rows to return in a single batch
15
+ *
16
+ * Batches may have fewer rows if the underlying data is stored
17
+ * in smaller chunks.
18
+ */
19
+ maxBatchLength?: number;
20
+ }
9
21
  /** Common methods supported by all query types */
10
22
  export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery, QueryType> implements AsyncIterable<RecordBatch> {
11
23
  protected inner: NativeQueryType;
@@ -53,7 +65,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
53
65
  * uses `Object.entries` which should preserve the insertion order of the object. However,
54
66
  * object insertion order is easy to get wrong and `Map` is more foolproof.
55
67
  */
56
- select(columns: string[] | Map<string, string> | Record<string, string>): QueryType;
68
+ select(columns: string[] | Map<string, string> | Record<string, string> | string): QueryType;
57
69
  /**
58
70
  * Set the maximum number of results to return.
59
71
  *
@@ -61,7 +73,7 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
61
73
  * called then every valid row from the table will be returned.
62
74
  */
63
75
  limit(limit: number): QueryType;
64
- protected nativeExecute(): Promise<NativeBatchIterator>;
76
+ protected nativeExecute(options?: Partial<QueryExecutionOptions>): Promise<NativeBatchIterator>;
65
77
  /**
66
78
  * Execute the query and return the results as an @see {@link AsyncIterator}
67
79
  * of @see {@link RecordBatch}.
@@ -73,12 +85,12 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
73
85
  * single query)
74
86
  *
75
87
  */
76
- protected execute(): RecordBatchIterator;
88
+ protected execute(options?: Partial<QueryExecutionOptions>): RecordBatchIterator;
77
89
  [Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>>;
78
90
  /** Collect the results as an Arrow @see {@link ArrowTable}. */
79
- toArrow(): Promise<ArrowTable>;
91
+ toArrow(options?: Partial<QueryExecutionOptions>): Promise<ArrowTable>;
80
92
  /** Collect the results as an array of objects. */
81
- toArray(): Promise<unknown[]>;
93
+ toArray(options?: Partial<QueryExecutionOptions>): Promise<any[]>;
82
94
  }
83
95
  /**
84
96
  * An interface for a query that can be executed
@@ -244,5 +256,5 @@ export declare class Query extends QueryBase<NativeQuery, Query> {
244
256
  * Vector searches always have a `limit`. If `limit` has not been called then
245
257
  * a default `limit` of 10 will be used. @see {@link Query#limit}
246
258
  */
247
- nearestTo(vector: unknown): VectorQuery;
259
+ nearestTo(vector: IntoVector): VectorQuery;
248
260
  }
@@ -43,6 +43,18 @@ class RecordBatchIterator {
43
43
  }
44
44
  exports.RecordBatchIterator = RecordBatchIterator;
45
45
  /* eslint-enable */
46
+ class RecordBatchIterable {
47
+ inner;
48
+ options;
49
+ constructor(inner, options) {
50
+ this.inner = inner;
51
+ this.options = options;
52
+ }
53
+ // biome-ignore lint/suspicious/noExplicitAny: skip
54
+ [Symbol.asyncIterator]() {
55
+ return new RecordBatchIterator(this.inner.execute(this.options?.maxBatchLength));
56
+ }
57
+ }
46
58
  /** Common methods supported by all query types */
47
59
  class QueryBase {
48
60
  inner;
@@ -98,6 +110,9 @@ class QueryBase {
98
110
  */
99
111
  select(columns) {
100
112
  let columnTuples;
113
+ if (typeof columns === "string") {
114
+ columns = [columns];
115
+ }
101
116
  if (Array.isArray(columns)) {
102
117
  columnTuples = columns.map((c) => [c, c]);
103
118
  }
@@ -120,8 +135,8 @@ class QueryBase {
120
135
  this.inner.limit(limit);
121
136
  return this;
122
137
  }
123
- nativeExecute() {
124
- return this.inner.execute();
138
+ nativeExecute(options) {
139
+ return this.inner.execute(options?.maxBatchLength);
125
140
  }
126
141
  /**
127
142
  * Execute the query and return the results as an @see {@link AsyncIterator}
@@ -134,8 +149,8 @@ class QueryBase {
134
149
  * single query)
135
150
  *
136
151
  */
137
- execute() {
138
- return new RecordBatchIterator(this.nativeExecute());
152
+ execute(options) {
153
+ return new RecordBatchIterator(this.nativeExecute(options));
139
154
  }
140
155
  // biome-ignore lint/suspicious/noExplicitAny: skip
141
156
  [Symbol.asyncIterator]() {
@@ -143,17 +158,17 @@ class QueryBase {
143
158
  return new RecordBatchIterator(promise);
144
159
  }
145
160
  /** Collect the results as an Arrow @see {@link ArrowTable}. */
146
- async toArrow() {
161
+ async toArrow(options) {
147
162
  const batches = [];
148
- for await (const batch of this) {
163
+ for await (const batch of new RecordBatchIterable(this.inner, options)) {
149
164
  batches.push(batch);
150
165
  }
151
166
  return new arrow_1.Table(batches);
152
167
  }
153
168
  /** Collect the results as an array of objects. */
154
- async toArray() {
155
- const tbl = await this.toArrow();
156
- // eslint-disable-next-line @typescript-eslint/no-unsafe-return
169
+ // biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
170
+ async toArray(options) {
171
+ const tbl = await this.toArrow(options);
157
172
  return tbl.toArray();
158
173
  }
159
174
  }
@@ -339,7 +354,6 @@ class Query extends QueryBase {
339
354
  * a default `limit` of 10 will be used. @see {@link Query#limit}
340
355
  */
341
356
  nearestTo(vector) {
342
- // biome-ignore lint/suspicious/noExplicitAny: skip
343
357
  const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
344
358
  return new VectorQuery(vectorQuery);
345
359
  }
@@ -1,4 +1,4 @@
1
- import { Data, Schema } from "./arrow";
1
+ import { Table as ArrowTable, Data, IntoVector, Schema } from "./arrow";
2
2
  import { IndexOptions } from "./indices";
3
3
  import { AddColumnsSql, ColumnAlteration, IndexConfig, OptimizeStats, Table as _NativeTable } from "./native";
4
4
  import { Query, VectorQuery } from "./query";
@@ -56,6 +56,7 @@ export interface OptimizeOptions {
56
56
  * collected.
57
57
  */
58
58
  export declare class Table {
59
+ #private;
59
60
  private readonly inner;
60
61
  /** Construct a Table. Internal use only. */
61
62
  constructor(inner: _NativeTable);
@@ -186,6 +187,19 @@ export declare class Table {
186
187
  * @returns {Query} A builder that can be used to parameterize the query
187
188
  */
188
189
  query(): Query;
190
+ /**
191
+ * Create a search query to find the nearest neighbors
192
+ * of the given query vector
193
+ * @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
194
+ * @rejects {Error} If no embedding functions are defined in the table
195
+ */
196
+ search(query: string): Promise<VectorQuery>;
197
+ /**
198
+ * Create a search query to find the nearest neighbors
199
+ * of the given query vector
200
+ * @param {IntoVector} query - the query vector
201
+ */
202
+ search(query: IntoVector): VectorQuery;
189
203
  /**
190
204
  * Search the table with a given query vector.
191
205
  *
@@ -193,7 +207,7 @@ export declare class Table {
193
207
  * is the same thing as calling `nearestTo` on the builder returned
194
208
  * by `query`. @see {@link Query#nearestTo} for more details.
195
209
  */
196
- vectorSearch(vector: unknown): VectorQuery;
210
+ vectorSearch(vector: IntoVector): VectorQuery;
197
211
  /**
198
212
  * Add new columns with defined values.
199
213
  * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
@@ -301,4 +315,6 @@ export declare class Table {
301
315
  optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats>;
302
316
  /** List all indices that have been created with {@link Table.createIndex} */
303
317
  listIndices(): Promise<IndexConfig[]>;
318
+ /** Return the table as an arrow table */
319
+ toArrow(): Promise<ArrowTable>;
304
320
  }