@lancedb/lancedb 0.22.2 → 0.22.3-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -2,16 +2,17 @@ import { Connection } from "./connection";
2
2
  import { ConnectionOptions, Session } from "./native.js";
3
3
  import { HeaderProvider } from "./header";
4
4
  export { JsHeaderProvider as NativeJsHeaderProvider } from "./native.js";
5
- export { AddColumnsSql, ConnectionOptions, IndexStatistics, IndexConfig, ClientConfig, TimeoutConfig, RetryConfig, TlsConfig, OptimizeStats, CompactionStats, RemovalStats, TableStatistics, FragmentStatistics, FragmentSummaryStats, Tags, TagContents, MergeResult, AddResult, AddColumnsResult, AlterColumnsResult, DeleteResult, DropColumnsResult, UpdateResult, } from "./native.js";
5
+ export { AddColumnsSql, ConnectionOptions, IndexStatistics, IndexConfig, ClientConfig, TimeoutConfig, RetryConfig, TlsConfig, OptimizeStats, CompactionStats, RemovalStats, TableStatistics, FragmentStatistics, FragmentSummaryStats, Tags, TagContents, MergeResult, AddResult, AddColumnsResult, AlterColumnsResult, DeleteResult, DropColumnsResult, UpdateResult, SplitRandomOptions, SplitHashOptions, SplitSequentialOptions, ShuffleOptions, } from "./native.js";
6
6
  export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } from "./arrow";
7
7
  export { Connection, CreateTableOptions, TableNamesOptions, OpenTableOptions, } from "./connection";
8
8
  export { Session } from "./native.js";
9
9
  export { ExecutableQuery, Query, QueryBase, VectorQuery, TakeQuery, QueryExecutionOptions, FullTextSearchOptions, RecordBatchIterator, FullTextQuery, MatchQuery, PhraseQuery, BoostQuery, MultiMatchQuery, BooleanQuery, FullTextQueryType, Operator, Occur, } from "./query";
10
- export { Index, IndexOptions, IvfPqOptions, IvfFlatOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
10
+ export { Index, IndexOptions, IvfPqOptions, IvfRqOptions, IvfFlatOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
11
11
  export { Table, AddDataOptions, UpdateOptions, OptimizeOptions, Version, ColumnAlteration, } from "./table";
12
12
  export { HeaderProvider, StaticHeaderProvider, OAuthHeaderProvider, TokenResponse, } from "./header";
13
13
  export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";
14
14
  export * as embedding from "./embedding";
15
+ export { permutationBuilder, PermutationBuilder } from "./permutation";
15
16
  export * as rerankers from "./rerankers";
16
17
  export { SchemaLike, TableLike, FieldLike, RecordBatchLike, DataLike, IntoVector, MultiVector, } from "./arrow";
17
18
  export { IntoSql, packBits } from "./util";
package/dist/index.js CHANGED
@@ -2,7 +2,7 @@
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
  // SPDX-FileCopyrightText: Copyright The LanceDB Authors
4
4
  Object.defineProperty(exports, "__esModule", { value: true });
5
- exports.packBits = exports.rerankers = exports.embedding = exports.MergeInsertBuilder = exports.OAuthHeaderProvider = exports.StaticHeaderProvider = exports.HeaderProvider = exports.Table = exports.Index = exports.Occur = exports.Operator = exports.FullTextQueryType = exports.BooleanQuery = exports.MultiMatchQuery = exports.BoostQuery = exports.PhraseQuery = exports.MatchQuery = exports.RecordBatchIterator = exports.TakeQuery = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Session = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = exports.TagContents = exports.Tags = exports.NativeJsHeaderProvider = void 0;
5
+ exports.packBits = exports.rerankers = exports.PermutationBuilder = exports.permutationBuilder = exports.embedding = exports.MergeInsertBuilder = exports.OAuthHeaderProvider = exports.StaticHeaderProvider = exports.HeaderProvider = exports.Table = exports.Index = exports.Occur = exports.Operator = exports.FullTextQueryType = exports.BooleanQuery = exports.MultiMatchQuery = exports.BoostQuery = exports.PhraseQuery = exports.MatchQuery = exports.RecordBatchIterator = exports.TakeQuery = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Session = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = exports.TagContents = exports.Tags = exports.NativeJsHeaderProvider = void 0;
6
6
  exports.connect = connect;
7
7
  const connection_1 = require("./connection");
8
8
  const native_js_1 = require("./native.js");
@@ -45,6 +45,9 @@ Object.defineProperty(exports, "OAuthHeaderProvider", { enumerable: true, get: f
45
45
  var merge_1 = require("./merge");
46
46
  Object.defineProperty(exports, "MergeInsertBuilder", { enumerable: true, get: function () { return merge_1.MergeInsertBuilder; } });
47
47
  exports.embedding = require("./embedding");
48
+ var permutation_1 = require("./permutation");
49
+ Object.defineProperty(exports, "permutationBuilder", { enumerable: true, get: function () { return permutation_1.permutationBuilder; } });
50
+ Object.defineProperty(exports, "PermutationBuilder", { enumerable: true, get: function () { return permutation_1.PermutationBuilder; } });
48
51
  exports.rerankers = require("./rerankers");
49
52
  var util_1 = require("./util");
50
53
  Object.defineProperty(exports, "packBits", { enumerable: true, get: function () { return util_1.packBits; } });
package/dist/indices.d.ts CHANGED
@@ -101,6 +101,72 @@ export interface IvfPqOptions {
101
101
  */
102
102
  sampleRate?: number;
103
103
  }
104
+ export interface IvfRqOptions {
105
+ /**
106
+ * The number of IVF partitions to create.
107
+ *
108
+ * This value should generally scale with the number of rows in the dataset.
109
+ * By default the number of partitions is the square root of the number of
110
+ * rows.
111
+ *
112
+ * If this value is too large then the first part of the search (picking the
113
+ * right partition) will be slow. If this value is too small then the second
114
+ * part of the search (searching within a partition) will be slow.
115
+ */
116
+ numPartitions?: number;
117
+ /**
118
+ * Number of bits per dimension for residual quantization.
119
+ *
120
+ * This value controls how much each residual component is compressed. The more
121
+ * bits, the more accurate the index will be but the slower search. Typical values
122
+ * are small integers; the default is 1 bit per dimension.
123
+ */
124
+ numBits?: number;
125
+ /**
126
+ * Distance type to use to build the index.
127
+ *
128
+ * Default value is "l2".
129
+ *
130
+ * This is used when training the index to calculate the IVF partitions
131
+ * (vectors are grouped in partitions with similar vectors according to this
132
+ * distance type) and during quantization.
133
+ *
134
+ * The distance type used to train an index MUST match the distance type used
135
+ * to search the index. Failure to do so will yield inaccurate results.
136
+ *
137
+ * The following distance types are available:
138
+ *
139
+ * "l2" - Euclidean distance.
140
+ * "cosine" - Cosine distance.
141
+ * "dot" - Dot product.
142
+ */
143
+ distanceType?: "l2" | "cosine" | "dot";
144
+ /**
145
+ * Max iterations to train IVF kmeans.
146
+ *
147
+ * When training an IVF index we use kmeans to calculate the partitions. This parameter
148
+ * controls how many iterations of kmeans to run.
149
+ *
150
+ * The default value is 50.
151
+ */
152
+ maxIterations?: number;
153
+ /**
154
+ * The number of vectors, per partition, to sample when training IVF kmeans.
155
+ *
156
+ * When an IVF index is trained, we need to calculate partitions. These are groups
157
+ * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
158
+ *
159
+ * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
160
+ * random sample of the data. This parameter controls the size of the sample. The total
161
+ * number of vectors used to train the index is `sample_rate * num_partitions`.
162
+ *
163
+ * Increasing this value might improve the quality of the index but in most cases the
164
+ * default should be sufficient.
165
+ *
166
+ * The default value is 256.
167
+ */
168
+ sampleRate?: number;
169
+ }
104
170
  /**
105
171
  * Options to create an `HNSW_PQ` index
106
172
  */
@@ -476,6 +542,23 @@ export declare class Index {
476
542
  * currently is also a memory intensive operation.
477
543
  */
478
544
  static ivfPq(options?: Partial<IvfPqOptions>): Index;
545
+ /**
546
+ * Create an IvfRq index
547
+ *
548
+ * IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
549
+ * and organizes them into IVF partitions.
550
+ *
551
+ * The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
552
+ * The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
553
+ * between index size (and thus search speed) and index accuracy.
554
+ *
555
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
556
+ * many groups to create.
557
+ *
558
+ * Note that training an IVF RQ index on a large dataset is a slow operation and
559
+ * currently is also a memory intensive operation.
560
+ */
561
+ static ivfRq(options?: Partial<IvfRqOptions>): Index;
479
562
  /**
480
563
  * Create an IvfFlat index
481
564
  *
package/dist/indices.js CHANGED
@@ -32,7 +32,26 @@ class Index {
32
32
  * currently is also a memory intensive operation.
33
33
  */
34
34
  static ivfPq(options) {
35
- return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate));
35
+ return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.numBits, options?.maxIterations, options?.sampleRate));
36
+ }
37
+ /**
38
+ * Create an IvfRq index
39
+ *
40
+ * IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
41
+ * and organizes them into IVF partitions.
42
+ *
43
+ * The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
44
+ * The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
45
+ * between index size (and thus search speed) and index accuracy.
46
+ *
47
+ * The partitioning process is called IVF and the `num_partitions` parameter controls how
48
+ * many groups to create.
49
+ *
50
+ * Note that training an IVF RQ index on a large dataset is a slow operation and
51
+ * currently is also a memory intensive operation.
52
+ */
53
+ static ivfRq(options) {
54
+ return new Index(native_1.Index.ivfRq(options?.distanceType, options?.numPartitions, options?.numBits, options?.maxIterations, options?.sampleRate));
36
55
  }
37
56
  /**
38
57
  * Create an IvfFlat index
package/dist/native.d.ts CHANGED
@@ -3,6 +3,28 @@
3
3
 
4
4
  /* auto-generated by NAPI-RS */
5
5
 
6
+ export interface SplitRandomOptions {
7
+ ratios?: Array<number>
8
+ counts?: Array<number>
9
+ fixed?: number
10
+ seed?: number
11
+ }
12
+ export interface SplitHashOptions {
13
+ columns: Array<string>
14
+ splitWeights: Array<number>
15
+ discardWeight?: number
16
+ }
17
+ export interface SplitSequentialOptions {
18
+ ratios?: Array<number>
19
+ counts?: Array<number>
20
+ fixed?: number
21
+ }
22
+ export interface ShuffleOptions {
23
+ seed?: number
24
+ clumpSize?: number
25
+ }
26
+ /** Create a permutation builder for the given table */
27
+ export declare function permutationBuilder(table: Table): PermutationBuilder
6
28
  /** Timeout configuration for remote HTTP client. */
7
29
  export interface TimeoutConfig {
8
30
  /**
@@ -357,6 +379,7 @@ export class JsHeaderProvider {
357
379
  }
358
380
  export class Index {
359
381
  static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
382
+ static ivfRq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
360
383
  static ivfFlat(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
361
384
  static btree(): Index
362
385
  static bitmap(): Index
@@ -378,6 +401,22 @@ export class NativeMergeInsertBuilder {
378
401
  useIndex(useIndex: boolean): NativeMergeInsertBuilder
379
402
  execute(buf: Buffer): Promise<MergeResult>
380
403
  }
404
+ export class PermutationBuilder {
405
+ /** Configure random splits */
406
+ splitRandom(options: SplitRandomOptions): PermutationBuilder
407
+ /** Configure hash-based splits */
408
+ splitHash(options: SplitHashOptions): PermutationBuilder
409
+ /** Configure sequential splits */
410
+ splitSequential(options: SplitSequentialOptions): PermutationBuilder
411
+ /** Configure calculated splits */
412
+ splitCalculated(calculation: string): PermutationBuilder
413
+ /** Configure shuffling */
414
+ shuffle(options: ShuffleOptions): PermutationBuilder
415
+ /** Configure filtering */
416
+ filter(filter: string): PermutationBuilder
417
+ /** Execute the permutation builder and create the table */
418
+ execute(): Promise<Table>
419
+ }
381
420
  export class Query {
382
421
  onlyIf(predicate: string): void
383
422
  fullTextSearch(query: object): void
@@ -388,6 +427,7 @@ export class Query {
388
427
  nearestTo(vector: Float32Array): VectorQuery
389
428
  fastSearch(): void
390
429
  withRowId(): void
430
+ outputSchema(): Promise<Buffer>
391
431
  execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
392
432
  explainPlan(verbose: boolean): Promise<string>
393
433
  analyzePlan(): Promise<string>
@@ -413,6 +453,7 @@ export class VectorQuery {
413
453
  fastSearch(): void
414
454
  withRowId(): void
415
455
  rerank(callbacks: RerankerCallbacks): void
456
+ outputSchema(): Promise<Buffer>
416
457
  execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
417
458
  explainPlan(verbose: boolean): Promise<string>
418
459
  analyzePlan(): Promise<string>
@@ -421,6 +462,7 @@ export class TakeQuery {
421
462
  select(columns: Array<[string, string]>): void
422
463
  selectColumns(columns: Array<string>): void
423
464
  withRowId(): void
465
+ outputSchema(): Promise<Buffer>
424
466
  execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
425
467
  explainPlan(verbose: boolean): Promise<string>
426
468
  analyzePlan(): Promise<string>
package/dist/native.js CHANGED
@@ -319,12 +319,14 @@ if (!nativeBinding) {
319
319
  }
320
320
  throw new Error(`Failed to load native binding`);
321
321
  }
322
- const { Connection, JsHeaderProvider, Index, RecordBatchIterator, NativeMergeInsertBuilder, Query, VectorQuery, TakeQuery, JsFullTextQuery, Reranker, RrfReranker, Session, Table, TagContents, Tags } = nativeBinding;
322
+ const { Connection, JsHeaderProvider, Index, RecordBatchIterator, NativeMergeInsertBuilder, PermutationBuilder, permutationBuilder, Query, VectorQuery, TakeQuery, JsFullTextQuery, Reranker, RrfReranker, Session, Table, TagContents, Tags } = nativeBinding;
323
323
  module.exports.Connection = Connection;
324
324
  module.exports.JsHeaderProvider = JsHeaderProvider;
325
325
  module.exports.Index = Index;
326
326
  module.exports.RecordBatchIterator = RecordBatchIterator;
327
327
  module.exports.NativeMergeInsertBuilder = NativeMergeInsertBuilder;
328
+ module.exports.PermutationBuilder = PermutationBuilder;
329
+ module.exports.permutationBuilder = permutationBuilder;
328
330
  module.exports.Query = Query;
329
331
  module.exports.VectorQuery = VectorQuery;
330
332
  module.exports.TakeQuery = TakeQuery;
@@ -0,0 +1,130 @@
1
+ import { PermutationBuilder as NativePermutationBuilder, ShuffleOptions, SplitHashOptions, SplitRandomOptions, SplitSequentialOptions } from "./native.js";
2
+ import { Table } from "./table";
3
+ /**
4
+ * A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
5
+ *
6
+ * This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
7
+ * offering methods to configure data splits, shuffling, and filtering before executing
8
+ * the permutation to create a new table.
9
+ */
10
+ export declare class PermutationBuilder {
11
+ private inner;
12
+ /**
13
+ * @hidden
14
+ */
15
+ constructor(inner: NativePermutationBuilder);
16
+ /**
17
+ * Configure random splits for the permutation.
18
+ *
19
+ * @param options - Configuration for random splitting
20
+ * @returns A new PermutationBuilder instance
21
+ * @example
22
+ * ```ts
23
+ * // Split by ratios
24
+ * builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
25
+ *
26
+ * // Split by counts
27
+ * builder.splitRandom({ counts: [1000, 500], seed: 42 });
28
+ *
29
+ * // Split with fixed size
30
+ * builder.splitRandom({ fixed: 100, seed: 42 });
31
+ * ```
32
+ */
33
+ splitRandom(options: SplitRandomOptions): PermutationBuilder;
34
+ /**
35
+ * Configure hash-based splits for the permutation.
36
+ *
37
+ * @param options - Configuration for hash-based splitting
38
+ * @returns A new PermutationBuilder instance
39
+ * @example
40
+ * ```ts
41
+ * builder.splitHash({
42
+ * columns: ["user_id"],
43
+ * splitWeights: [70, 30],
44
+ * discardWeight: 0
45
+ * });
46
+ * ```
47
+ */
48
+ splitHash(options: SplitHashOptions): PermutationBuilder;
49
+ /**
50
+ * Configure sequential splits for the permutation.
51
+ *
52
+ * @param options - Configuration for sequential splitting
53
+ * @returns A new PermutationBuilder instance
54
+ * @example
55
+ * ```ts
56
+ * // Split by ratios
57
+ * builder.splitSequential({ ratios: [0.8, 0.2] });
58
+ *
59
+ * // Split by counts
60
+ * builder.splitSequential({ counts: [800, 200] });
61
+ *
62
+ * // Split with fixed size
63
+ * builder.splitSequential({ fixed: 1000 });
64
+ * ```
65
+ */
66
+ splitSequential(options: SplitSequentialOptions): PermutationBuilder;
67
+ /**
68
+ * Configure calculated splits for the permutation.
69
+ *
70
+ * @param calculation - SQL expression for calculating splits
71
+ * @returns A new PermutationBuilder instance
72
+ * @example
73
+ * ```ts
74
+ * builder.splitCalculated("user_id % 3");
75
+ * ```
76
+ */
77
+ splitCalculated(calculation: string): PermutationBuilder;
78
+ /**
79
+ * Configure shuffling for the permutation.
80
+ *
81
+ * @param options - Configuration for shuffling
82
+ * @returns A new PermutationBuilder instance
83
+ * @example
84
+ * ```ts
85
+ * // Basic shuffle
86
+ * builder.shuffle({ seed: 42 });
87
+ *
88
+ * // Shuffle with clump size
89
+ * builder.shuffle({ seed: 42, clumpSize: 10 });
90
+ * ```
91
+ */
92
+ shuffle(options: ShuffleOptions): PermutationBuilder;
93
+ /**
94
+ * Configure filtering for the permutation.
95
+ *
96
+ * @param filter - SQL filter expression
97
+ * @returns A new PermutationBuilder instance
98
+ * @example
99
+ * ```ts
100
+ * builder.filter("age > 18 AND status = 'active'");
101
+ * ```
102
+ */
103
+ filter(filter: string): PermutationBuilder;
104
+ /**
105
+ * Execute the permutation and create the destination table.
106
+ *
107
+ * @returns A Promise that resolves to the new Table instance
108
+ * @example
109
+ * ```ts
110
+ * const permutationTable = await builder.execute();
111
+ * console.log(`Created table: ${permutationTable.name}`);
112
+ * ```
113
+ */
114
+ execute(): Promise<Table>;
115
+ }
116
+ /**
117
+ * Create a permutation builder for the given table.
118
+ *
119
+ * @param table - The source table to create a permutation from
120
+ * @returns A PermutationBuilder instance
121
+ * @example
122
+ * ```ts
123
+ * const builder = permutationBuilder(sourceTable, "training_data")
124
+ * .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
125
+ * .shuffle({ seed: 123 });
126
+ *
127
+ * const trainingTable = await builder.execute();
128
+ * ```
129
+ */
130
+ export declare function permutationBuilder(table: Table): PermutationBuilder;
@@ -0,0 +1,168 @@
1
+ "use strict";
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ // SPDX-FileCopyrightText: Copyright The LanceDB Authors
4
+ Object.defineProperty(exports, "__esModule", { value: true });
5
+ exports.PermutationBuilder = void 0;
6
+ exports.permutationBuilder = permutationBuilder;
7
+ const native_js_1 = require("./native.js");
8
+ const table_1 = require("./table");
9
+ /**
10
+ * A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
11
+ *
12
+ * This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
13
+ * offering methods to configure data splits, shuffling, and filtering before executing
14
+ * the permutation to create a new table.
15
+ */
16
+ class PermutationBuilder {
17
+ inner;
18
+ /**
19
+ * @hidden
20
+ */
21
+ constructor(inner) {
22
+ this.inner = inner;
23
+ }
24
+ /**
25
+ * Configure random splits for the permutation.
26
+ *
27
+ * @param options - Configuration for random splitting
28
+ * @returns A new PermutationBuilder instance
29
+ * @example
30
+ * ```ts
31
+ * // Split by ratios
32
+ * builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
33
+ *
34
+ * // Split by counts
35
+ * builder.splitRandom({ counts: [1000, 500], seed: 42 });
36
+ *
37
+ * // Split with fixed size
38
+ * builder.splitRandom({ fixed: 100, seed: 42 });
39
+ * ```
40
+ */
41
+ splitRandom(options) {
42
+ const newInner = this.inner.splitRandom(options);
43
+ return new PermutationBuilder(newInner);
44
+ }
45
+ /**
46
+ * Configure hash-based splits for the permutation.
47
+ *
48
+ * @param options - Configuration for hash-based splitting
49
+ * @returns A new PermutationBuilder instance
50
+ * @example
51
+ * ```ts
52
+ * builder.splitHash({
53
+ * columns: ["user_id"],
54
+ * splitWeights: [70, 30],
55
+ * discardWeight: 0
56
+ * });
57
+ * ```
58
+ */
59
+ splitHash(options) {
60
+ const newInner = this.inner.splitHash(options);
61
+ return new PermutationBuilder(newInner);
62
+ }
63
+ /**
64
+ * Configure sequential splits for the permutation.
65
+ *
66
+ * @param options - Configuration for sequential splitting
67
+ * @returns A new PermutationBuilder instance
68
+ * @example
69
+ * ```ts
70
+ * // Split by ratios
71
+ * builder.splitSequential({ ratios: [0.8, 0.2] });
72
+ *
73
+ * // Split by counts
74
+ * builder.splitSequential({ counts: [800, 200] });
75
+ *
76
+ * // Split with fixed size
77
+ * builder.splitSequential({ fixed: 1000 });
78
+ * ```
79
+ */
80
+ splitSequential(options) {
81
+ const newInner = this.inner.splitSequential(options);
82
+ return new PermutationBuilder(newInner);
83
+ }
84
+ /**
85
+ * Configure calculated splits for the permutation.
86
+ *
87
+ * @param calculation - SQL expression for calculating splits
88
+ * @returns A new PermutationBuilder instance
89
+ * @example
90
+ * ```ts
91
+ * builder.splitCalculated("user_id % 3");
92
+ * ```
93
+ */
94
+ splitCalculated(calculation) {
95
+ const newInner = this.inner.splitCalculated(calculation);
96
+ return new PermutationBuilder(newInner);
97
+ }
98
+ /**
99
+ * Configure shuffling for the permutation.
100
+ *
101
+ * @param options - Configuration for shuffling
102
+ * @returns A new PermutationBuilder instance
103
+ * @example
104
+ * ```ts
105
+ * // Basic shuffle
106
+ * builder.shuffle({ seed: 42 });
107
+ *
108
+ * // Shuffle with clump size
109
+ * builder.shuffle({ seed: 42, clumpSize: 10 });
110
+ * ```
111
+ */
112
+ shuffle(options) {
113
+ const newInner = this.inner.shuffle(options);
114
+ return new PermutationBuilder(newInner);
115
+ }
116
+ /**
117
+ * Configure filtering for the permutation.
118
+ *
119
+ * @param filter - SQL filter expression
120
+ * @returns A new PermutationBuilder instance
121
+ * @example
122
+ * ```ts
123
+ * builder.filter("age > 18 AND status = 'active'");
124
+ * ```
125
+ */
126
+ filter(filter) {
127
+ const newInner = this.inner.filter(filter);
128
+ return new PermutationBuilder(newInner);
129
+ }
130
+ /**
131
+ * Execute the permutation and create the destination table.
132
+ *
133
+ * @returns A Promise that resolves to the new Table instance
134
+ * @example
135
+ * ```ts
136
+ * const permutationTable = await builder.execute();
137
+ * console.log(`Created table: ${permutationTable.name}`);
138
+ * ```
139
+ */
140
+ async execute() {
141
+ const nativeTable = await this.inner.execute();
142
+ return new table_1.LocalTable(nativeTable);
143
+ }
144
+ }
145
+ exports.PermutationBuilder = PermutationBuilder;
146
+ /**
147
+ * Create a permutation builder for the given table.
148
+ *
149
+ * @param table - The source table to create a permutation from
150
+ * @returns A PermutationBuilder instance
151
+ * @example
152
+ * ```ts
153
+ * const builder = permutationBuilder(sourceTable, "training_data")
154
+ * .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
155
+ * .shuffle({ seed: 123 });
156
+ *
157
+ * const trainingTable = await builder.execute();
158
+ * ```
159
+ */
160
+ function permutationBuilder(table) {
161
+ // Extract the inner native table from the TypeScript wrapper
162
+ const localTable = table;
163
+ // Access inner through type assertion since it's private
164
+ const nativeBuilder = (0, native_js_1.permutationBuilder)(
165
+ // biome-ignore lint/suspicious/noExplicitAny: need access to private variable
166
+ localTable.inner);
167
+ return new PermutationBuilder(nativeBuilder);
168
+ }
package/dist/query.d.ts CHANGED
@@ -161,6 +161,15 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
161
161
  * @returns A query execution plan with runtime metrics for each step.
162
162
  */
163
163
  analyzePlan(): Promise<string>;
164
+ /**
165
+ * Returns the schema of the output that will be returned by this query.
166
+ *
167
+ * This can be used to inspect the types and names of the columns that will be
168
+ * returned by the query before executing it.
169
+ *
170
+ * @returns An Arrow Schema describing the output columns.
171
+ */
172
+ outputSchema(): Promise<import("./arrow").Schema>;
164
173
  }
165
174
  export declare class StandardQueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery> extends QueryBase<NativeQueryType> implements ExecutableQuery {
166
175
  constructor(inner: NativeQueryType | Promise<NativeQueryType>);
package/dist/query.js CHANGED
@@ -256,6 +256,25 @@ class QueryBase {
256
256
  return this.inner.analyzePlan();
257
257
  }
258
258
  }
259
+ /**
260
+ * Returns the schema of the output that will be returned by this query.
261
+ *
262
+ * This can be used to inspect the types and names of the columns that will be
263
+ * returned by the query before executing it.
264
+ *
265
+ * @returns An Arrow Schema describing the output columns.
266
+ */
267
+ async outputSchema() {
268
+ let schemaBuffer;
269
+ if (this.inner instanceof Promise) {
270
+ schemaBuffer = await this.inner.then((inner) => inner.outputSchema());
271
+ }
272
+ else {
273
+ schemaBuffer = await this.inner.outputSchema();
274
+ }
275
+ const schema = (0, arrow_1.tableFromIPC)(schemaBuffer).schema;
276
+ return schema;
277
+ }
259
278
  }
260
279
  exports.QueryBase = QueryBase;
261
280
  class StandardQueryBase extends QueryBase {
package/package.json CHANGED
@@ -11,7 +11,7 @@
11
11
  "ann"
12
12
  ],
13
13
  "private": false,
14
- "version": "0.22.2",
14
+ "version": "0.22.3-beta.1",
15
15
  "main": "dist/index.js",
16
16
  "exports": {
17
17
  ".": "./dist/index.js",
@@ -100,14 +100,14 @@
100
100
  "reflect-metadata": "^0.2.2"
101
101
  },
102
102
  "optionalDependencies": {
103
- "@lancedb/lancedb-darwin-x64": "0.22.2",
104
- "@lancedb/lancedb-darwin-arm64": "0.22.2",
105
- "@lancedb/lancedb-linux-x64-gnu": "0.22.2",
106
- "@lancedb/lancedb-linux-arm64-gnu": "0.22.2",
107
- "@lancedb/lancedb-linux-x64-musl": "0.22.2",
108
- "@lancedb/lancedb-linux-arm64-musl": "0.22.2",
109
- "@lancedb/lancedb-win32-x64-msvc": "0.22.2",
110
- "@lancedb/lancedb-win32-arm64-msvc": "0.22.2"
103
+ "@lancedb/lancedb-darwin-x64": "0.22.3-beta.1",
104
+ "@lancedb/lancedb-darwin-arm64": "0.22.3-beta.1",
105
+ "@lancedb/lancedb-linux-x64-gnu": "0.22.3-beta.1",
106
+ "@lancedb/lancedb-linux-arm64-gnu": "0.22.3-beta.1",
107
+ "@lancedb/lancedb-linux-x64-musl": "0.22.3-beta.1",
108
+ "@lancedb/lancedb-linux-arm64-musl": "0.22.3-beta.1",
109
+ "@lancedb/lancedb-win32-x64-msvc": "0.22.3-beta.1",
110
+ "@lancedb/lancedb-win32-arm64-msvc": "0.22.3-beta.1"
111
111
  },
112
112
  "peerDependencies": {
113
113
  "apache-arrow": ">=15.0.0 <=18.1.0"