@lancedb/lancedb 0.22.2 → 0.22.3-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +3 -2
- package/dist/index.js +4 -1
- package/dist/indices.d.ts +83 -0
- package/dist/indices.js +20 -1
- package/dist/native.d.ts +42 -0
- package/dist/native.js +3 -1
- package/dist/permutation.d.ts +130 -0
- package/dist/permutation.js +168 -0
- package/dist/query.d.ts +9 -0
- package/dist/query.js +19 -0
- package/package.json +9 -9
package/dist/index.d.ts
CHANGED
|
@@ -2,16 +2,17 @@ import { Connection } from "./connection";
|
|
|
2
2
|
import { ConnectionOptions, Session } from "./native.js";
|
|
3
3
|
import { HeaderProvider } from "./header";
|
|
4
4
|
export { JsHeaderProvider as NativeJsHeaderProvider } from "./native.js";
|
|
5
|
-
export { AddColumnsSql, ConnectionOptions, IndexStatistics, IndexConfig, ClientConfig, TimeoutConfig, RetryConfig, TlsConfig, OptimizeStats, CompactionStats, RemovalStats, TableStatistics, FragmentStatistics, FragmentSummaryStats, Tags, TagContents, MergeResult, AddResult, AddColumnsResult, AlterColumnsResult, DeleteResult, DropColumnsResult, UpdateResult, } from "./native.js";
|
|
5
|
+
export { AddColumnsSql, ConnectionOptions, IndexStatistics, IndexConfig, ClientConfig, TimeoutConfig, RetryConfig, TlsConfig, OptimizeStats, CompactionStats, RemovalStats, TableStatistics, FragmentStatistics, FragmentSummaryStats, Tags, TagContents, MergeResult, AddResult, AddColumnsResult, AlterColumnsResult, DeleteResult, DropColumnsResult, UpdateResult, SplitRandomOptions, SplitHashOptions, SplitSequentialOptions, ShuffleOptions, } from "./native.js";
|
|
6
6
|
export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } from "./arrow";
|
|
7
7
|
export { Connection, CreateTableOptions, TableNamesOptions, OpenTableOptions, } from "./connection";
|
|
8
8
|
export { Session } from "./native.js";
|
|
9
9
|
export { ExecutableQuery, Query, QueryBase, VectorQuery, TakeQuery, QueryExecutionOptions, FullTextSearchOptions, RecordBatchIterator, FullTextQuery, MatchQuery, PhraseQuery, BoostQuery, MultiMatchQuery, BooleanQuery, FullTextQueryType, Operator, Occur, } from "./query";
|
|
10
|
-
export { Index, IndexOptions, IvfPqOptions, IvfFlatOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
|
|
10
|
+
export { Index, IndexOptions, IvfPqOptions, IvfRqOptions, IvfFlatOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
|
|
11
11
|
export { Table, AddDataOptions, UpdateOptions, OptimizeOptions, Version, ColumnAlteration, } from "./table";
|
|
12
12
|
export { HeaderProvider, StaticHeaderProvider, OAuthHeaderProvider, TokenResponse, } from "./header";
|
|
13
13
|
export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";
|
|
14
14
|
export * as embedding from "./embedding";
|
|
15
|
+
export { permutationBuilder, PermutationBuilder } from "./permutation";
|
|
15
16
|
export * as rerankers from "./rerankers";
|
|
16
17
|
export { SchemaLike, TableLike, FieldLike, RecordBatchLike, DataLike, IntoVector, MultiVector, } from "./arrow";
|
|
17
18
|
export { IntoSql, packBits } from "./util";
|
package/dist/index.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
4
4
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
-
exports.packBits = exports.rerankers = exports.embedding = exports.MergeInsertBuilder = exports.OAuthHeaderProvider = exports.StaticHeaderProvider = exports.HeaderProvider = exports.Table = exports.Index = exports.Occur = exports.Operator = exports.FullTextQueryType = exports.BooleanQuery = exports.MultiMatchQuery = exports.BoostQuery = exports.PhraseQuery = exports.MatchQuery = exports.RecordBatchIterator = exports.TakeQuery = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Session = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = exports.TagContents = exports.Tags = exports.NativeJsHeaderProvider = void 0;
|
|
5
|
+
exports.packBits = exports.rerankers = exports.PermutationBuilder = exports.permutationBuilder = exports.embedding = exports.MergeInsertBuilder = exports.OAuthHeaderProvider = exports.StaticHeaderProvider = exports.HeaderProvider = exports.Table = exports.Index = exports.Occur = exports.Operator = exports.FullTextQueryType = exports.BooleanQuery = exports.MultiMatchQuery = exports.BoostQuery = exports.PhraseQuery = exports.MatchQuery = exports.RecordBatchIterator = exports.TakeQuery = exports.VectorQuery = exports.QueryBase = exports.Query = exports.Session = exports.Connection = exports.VectorColumnOptions = exports.MakeArrowTableOptions = exports.makeArrowTable = exports.TagContents = exports.Tags = exports.NativeJsHeaderProvider = void 0;
|
|
6
6
|
exports.connect = connect;
|
|
7
7
|
const connection_1 = require("./connection");
|
|
8
8
|
const native_js_1 = require("./native.js");
|
|
@@ -45,6 +45,9 @@ Object.defineProperty(exports, "OAuthHeaderProvider", { enumerable: true, get: f
|
|
|
45
45
|
var merge_1 = require("./merge");
|
|
46
46
|
Object.defineProperty(exports, "MergeInsertBuilder", { enumerable: true, get: function () { return merge_1.MergeInsertBuilder; } });
|
|
47
47
|
exports.embedding = require("./embedding");
|
|
48
|
+
var permutation_1 = require("./permutation");
|
|
49
|
+
Object.defineProperty(exports, "permutationBuilder", { enumerable: true, get: function () { return permutation_1.permutationBuilder; } });
|
|
50
|
+
Object.defineProperty(exports, "PermutationBuilder", { enumerable: true, get: function () { return permutation_1.PermutationBuilder; } });
|
|
48
51
|
exports.rerankers = require("./rerankers");
|
|
49
52
|
var util_1 = require("./util");
|
|
50
53
|
Object.defineProperty(exports, "packBits", { enumerable: true, get: function () { return util_1.packBits; } });
|
package/dist/indices.d.ts
CHANGED
|
@@ -101,6 +101,72 @@ export interface IvfPqOptions {
|
|
|
101
101
|
*/
|
|
102
102
|
sampleRate?: number;
|
|
103
103
|
}
|
|
104
|
+
export interface IvfRqOptions {
|
|
105
|
+
/**
|
|
106
|
+
* The number of IVF partitions to create.
|
|
107
|
+
*
|
|
108
|
+
* This value should generally scale with the number of rows in the dataset.
|
|
109
|
+
* By default the number of partitions is the square root of the number of
|
|
110
|
+
* rows.
|
|
111
|
+
*
|
|
112
|
+
* If this value is too large then the first part of the search (picking the
|
|
113
|
+
* right partition) will be slow. If this value is too small then the second
|
|
114
|
+
* part of the search (searching within a partition) will be slow.
|
|
115
|
+
*/
|
|
116
|
+
numPartitions?: number;
|
|
117
|
+
/**
|
|
118
|
+
* Number of bits per dimension for residual quantization.
|
|
119
|
+
*
|
|
120
|
+
* This value controls how much each residual component is compressed. The more
|
|
121
|
+
* bits, the more accurate the index will be but the slower search. Typical values
|
|
122
|
+
* are small integers; the default is 1 bit per dimension.
|
|
123
|
+
*/
|
|
124
|
+
numBits?: number;
|
|
125
|
+
/**
|
|
126
|
+
* Distance type to use to build the index.
|
|
127
|
+
*
|
|
128
|
+
* Default value is "l2".
|
|
129
|
+
*
|
|
130
|
+
* This is used when training the index to calculate the IVF partitions
|
|
131
|
+
* (vectors are grouped in partitions with similar vectors according to this
|
|
132
|
+
* distance type) and during quantization.
|
|
133
|
+
*
|
|
134
|
+
* The distance type used to train an index MUST match the distance type used
|
|
135
|
+
* to search the index. Failure to do so will yield inaccurate results.
|
|
136
|
+
*
|
|
137
|
+
* The following distance types are available:
|
|
138
|
+
*
|
|
139
|
+
* "l2" - Euclidean distance.
|
|
140
|
+
* "cosine" - Cosine distance.
|
|
141
|
+
* "dot" - Dot product.
|
|
142
|
+
*/
|
|
143
|
+
distanceType?: "l2" | "cosine" | "dot";
|
|
144
|
+
/**
|
|
145
|
+
* Max iterations to train IVF kmeans.
|
|
146
|
+
*
|
|
147
|
+
* When training an IVF index we use kmeans to calculate the partitions. This parameter
|
|
148
|
+
* controls how many iterations of kmeans to run.
|
|
149
|
+
*
|
|
150
|
+
* The default value is 50.
|
|
151
|
+
*/
|
|
152
|
+
maxIterations?: number;
|
|
153
|
+
/**
|
|
154
|
+
* The number of vectors, per partition, to sample when training IVF kmeans.
|
|
155
|
+
*
|
|
156
|
+
* When an IVF index is trained, we need to calculate partitions. These are groups
|
|
157
|
+
* of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
|
158
|
+
*
|
|
159
|
+
* Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
|
160
|
+
* random sample of the data. This parameter controls the size of the sample. The total
|
|
161
|
+
* number of vectors used to train the index is `sample_rate * num_partitions`.
|
|
162
|
+
*
|
|
163
|
+
* Increasing this value might improve the quality of the index but in most cases the
|
|
164
|
+
* default should be sufficient.
|
|
165
|
+
*
|
|
166
|
+
* The default value is 256.
|
|
167
|
+
*/
|
|
168
|
+
sampleRate?: number;
|
|
169
|
+
}
|
|
104
170
|
/**
|
|
105
171
|
* Options to create an `HNSW_PQ` index
|
|
106
172
|
*/
|
|
@@ -476,6 +542,23 @@ export declare class Index {
|
|
|
476
542
|
* currently is also a memory intensive operation.
|
|
477
543
|
*/
|
|
478
544
|
static ivfPq(options?: Partial<IvfPqOptions>): Index;
|
|
545
|
+
/**
|
|
546
|
+
* Create an IvfRq index
|
|
547
|
+
*
|
|
548
|
+
* IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
|
|
549
|
+
* and organizes them into IVF partitions.
|
|
550
|
+
*
|
|
551
|
+
* The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
|
|
552
|
+
* The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
|
|
553
|
+
* between index size (and thus search speed) and index accuracy.
|
|
554
|
+
*
|
|
555
|
+
* The partitioning process is called IVF and the `num_partitions` parameter controls how
|
|
556
|
+
* many groups to create.
|
|
557
|
+
*
|
|
558
|
+
* Note that training an IVF RQ index on a large dataset is a slow operation and
|
|
559
|
+
* currently is also a memory intensive operation.
|
|
560
|
+
*/
|
|
561
|
+
static ivfRq(options?: Partial<IvfRqOptions>): Index;
|
|
479
562
|
/**
|
|
480
563
|
* Create an IvfFlat index
|
|
481
564
|
*
|
package/dist/indices.js
CHANGED
|
@@ -32,7 +32,26 @@ class Index {
|
|
|
32
32
|
* currently is also a memory intensive operation.
|
|
33
33
|
*/
|
|
34
34
|
static ivfPq(options) {
|
|
35
|
-
return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate));
|
|
35
|
+
return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.numBits, options?.maxIterations, options?.sampleRate));
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Create an IvfRq index
|
|
39
|
+
*
|
|
40
|
+
* IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
|
|
41
|
+
* and organizes them into IVF partitions.
|
|
42
|
+
*
|
|
43
|
+
* The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
|
|
44
|
+
* The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
|
|
45
|
+
* between index size (and thus search speed) and index accuracy.
|
|
46
|
+
*
|
|
47
|
+
* The partitioning process is called IVF and the `num_partitions` parameter controls how
|
|
48
|
+
* many groups to create.
|
|
49
|
+
*
|
|
50
|
+
* Note that training an IVF RQ index on a large dataset is a slow operation and
|
|
51
|
+
* currently is also a memory intensive operation.
|
|
52
|
+
*/
|
|
53
|
+
static ivfRq(options) {
|
|
54
|
+
return new Index(native_1.Index.ivfRq(options?.distanceType, options?.numPartitions, options?.numBits, options?.maxIterations, options?.sampleRate));
|
|
36
55
|
}
|
|
37
56
|
/**
|
|
38
57
|
* Create an IvfFlat index
|
package/dist/native.d.ts
CHANGED
|
@@ -3,6 +3,28 @@
|
|
|
3
3
|
|
|
4
4
|
/* auto-generated by NAPI-RS */
|
|
5
5
|
|
|
6
|
+
export interface SplitRandomOptions {
|
|
7
|
+
ratios?: Array<number>
|
|
8
|
+
counts?: Array<number>
|
|
9
|
+
fixed?: number
|
|
10
|
+
seed?: number
|
|
11
|
+
}
|
|
12
|
+
export interface SplitHashOptions {
|
|
13
|
+
columns: Array<string>
|
|
14
|
+
splitWeights: Array<number>
|
|
15
|
+
discardWeight?: number
|
|
16
|
+
}
|
|
17
|
+
export interface SplitSequentialOptions {
|
|
18
|
+
ratios?: Array<number>
|
|
19
|
+
counts?: Array<number>
|
|
20
|
+
fixed?: number
|
|
21
|
+
}
|
|
22
|
+
export interface ShuffleOptions {
|
|
23
|
+
seed?: number
|
|
24
|
+
clumpSize?: number
|
|
25
|
+
}
|
|
26
|
+
/** Create a permutation builder for the given table */
|
|
27
|
+
export declare function permutationBuilder(table: Table): PermutationBuilder
|
|
6
28
|
/** Timeout configuration for remote HTTP client. */
|
|
7
29
|
export interface TimeoutConfig {
|
|
8
30
|
/**
|
|
@@ -357,6 +379,7 @@ export class JsHeaderProvider {
|
|
|
357
379
|
}
|
|
358
380
|
export class Index {
|
|
359
381
|
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
|
382
|
+
static ivfRq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
|
360
383
|
static ivfFlat(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
|
361
384
|
static btree(): Index
|
|
362
385
|
static bitmap(): Index
|
|
@@ -378,6 +401,22 @@ export class NativeMergeInsertBuilder {
|
|
|
378
401
|
useIndex(useIndex: boolean): NativeMergeInsertBuilder
|
|
379
402
|
execute(buf: Buffer): Promise<MergeResult>
|
|
380
403
|
}
|
|
404
|
+
export class PermutationBuilder {
|
|
405
|
+
/** Configure random splits */
|
|
406
|
+
splitRandom(options: SplitRandomOptions): PermutationBuilder
|
|
407
|
+
/** Configure hash-based splits */
|
|
408
|
+
splitHash(options: SplitHashOptions): PermutationBuilder
|
|
409
|
+
/** Configure sequential splits */
|
|
410
|
+
splitSequential(options: SplitSequentialOptions): PermutationBuilder
|
|
411
|
+
/** Configure calculated splits */
|
|
412
|
+
splitCalculated(calculation: string): PermutationBuilder
|
|
413
|
+
/** Configure shuffling */
|
|
414
|
+
shuffle(options: ShuffleOptions): PermutationBuilder
|
|
415
|
+
/** Configure filtering */
|
|
416
|
+
filter(filter: string): PermutationBuilder
|
|
417
|
+
/** Execute the permutation builder and create the table */
|
|
418
|
+
execute(): Promise<Table>
|
|
419
|
+
}
|
|
381
420
|
export class Query {
|
|
382
421
|
onlyIf(predicate: string): void
|
|
383
422
|
fullTextSearch(query: object): void
|
|
@@ -388,6 +427,7 @@ export class Query {
|
|
|
388
427
|
nearestTo(vector: Float32Array): VectorQuery
|
|
389
428
|
fastSearch(): void
|
|
390
429
|
withRowId(): void
|
|
430
|
+
outputSchema(): Promise<Buffer>
|
|
391
431
|
execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
|
|
392
432
|
explainPlan(verbose: boolean): Promise<string>
|
|
393
433
|
analyzePlan(): Promise<string>
|
|
@@ -413,6 +453,7 @@ export class VectorQuery {
|
|
|
413
453
|
fastSearch(): void
|
|
414
454
|
withRowId(): void
|
|
415
455
|
rerank(callbacks: RerankerCallbacks): void
|
|
456
|
+
outputSchema(): Promise<Buffer>
|
|
416
457
|
execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
|
|
417
458
|
explainPlan(verbose: boolean): Promise<string>
|
|
418
459
|
analyzePlan(): Promise<string>
|
|
@@ -421,6 +462,7 @@ export class TakeQuery {
|
|
|
421
462
|
select(columns: Array<[string, string]>): void
|
|
422
463
|
selectColumns(columns: Array<string>): void
|
|
423
464
|
withRowId(): void
|
|
465
|
+
outputSchema(): Promise<Buffer>
|
|
424
466
|
execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
|
|
425
467
|
explainPlan(verbose: boolean): Promise<string>
|
|
426
468
|
analyzePlan(): Promise<string>
|
package/dist/native.js
CHANGED
|
@@ -319,12 +319,14 @@ if (!nativeBinding) {
|
|
|
319
319
|
}
|
|
320
320
|
throw new Error(`Failed to load native binding`);
|
|
321
321
|
}
|
|
322
|
-
const { Connection, JsHeaderProvider, Index, RecordBatchIterator, NativeMergeInsertBuilder, Query, VectorQuery, TakeQuery, JsFullTextQuery, Reranker, RrfReranker, Session, Table, TagContents, Tags } = nativeBinding;
|
|
322
|
+
const { Connection, JsHeaderProvider, Index, RecordBatchIterator, NativeMergeInsertBuilder, PermutationBuilder, permutationBuilder, Query, VectorQuery, TakeQuery, JsFullTextQuery, Reranker, RrfReranker, Session, Table, TagContents, Tags } = nativeBinding;
|
|
323
323
|
module.exports.Connection = Connection;
|
|
324
324
|
module.exports.JsHeaderProvider = JsHeaderProvider;
|
|
325
325
|
module.exports.Index = Index;
|
|
326
326
|
module.exports.RecordBatchIterator = RecordBatchIterator;
|
|
327
327
|
module.exports.NativeMergeInsertBuilder = NativeMergeInsertBuilder;
|
|
328
|
+
module.exports.PermutationBuilder = PermutationBuilder;
|
|
329
|
+
module.exports.permutationBuilder = permutationBuilder;
|
|
328
330
|
module.exports.Query = Query;
|
|
329
331
|
module.exports.VectorQuery = VectorQuery;
|
|
330
332
|
module.exports.TakeQuery = TakeQuery;
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { PermutationBuilder as NativePermutationBuilder, ShuffleOptions, SplitHashOptions, SplitRandomOptions, SplitSequentialOptions } from "./native.js";
|
|
2
|
+
import { Table } from "./table";
|
|
3
|
+
/**
|
|
4
|
+
* A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
|
|
5
|
+
*
|
|
6
|
+
* This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
|
|
7
|
+
* offering methods to configure data splits, shuffling, and filtering before executing
|
|
8
|
+
* the permutation to create a new table.
|
|
9
|
+
*/
|
|
10
|
+
export declare class PermutationBuilder {
|
|
11
|
+
private inner;
|
|
12
|
+
/**
|
|
13
|
+
* @hidden
|
|
14
|
+
*/
|
|
15
|
+
constructor(inner: NativePermutationBuilder);
|
|
16
|
+
/**
|
|
17
|
+
* Configure random splits for the permutation.
|
|
18
|
+
*
|
|
19
|
+
* @param options - Configuration for random splitting
|
|
20
|
+
* @returns A new PermutationBuilder instance
|
|
21
|
+
* @example
|
|
22
|
+
* ```ts
|
|
23
|
+
* // Split by ratios
|
|
24
|
+
* builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
|
|
25
|
+
*
|
|
26
|
+
* // Split by counts
|
|
27
|
+
* builder.splitRandom({ counts: [1000, 500], seed: 42 });
|
|
28
|
+
*
|
|
29
|
+
* // Split with fixed size
|
|
30
|
+
* builder.splitRandom({ fixed: 100, seed: 42 });
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
splitRandom(options: SplitRandomOptions): PermutationBuilder;
|
|
34
|
+
/**
|
|
35
|
+
* Configure hash-based splits for the permutation.
|
|
36
|
+
*
|
|
37
|
+
* @param options - Configuration for hash-based splitting
|
|
38
|
+
* @returns A new PermutationBuilder instance
|
|
39
|
+
* @example
|
|
40
|
+
* ```ts
|
|
41
|
+
* builder.splitHash({
|
|
42
|
+
* columns: ["user_id"],
|
|
43
|
+
* splitWeights: [70, 30],
|
|
44
|
+
* discardWeight: 0
|
|
45
|
+
* });
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
splitHash(options: SplitHashOptions): PermutationBuilder;
|
|
49
|
+
/**
|
|
50
|
+
* Configure sequential splits for the permutation.
|
|
51
|
+
*
|
|
52
|
+
* @param options - Configuration for sequential splitting
|
|
53
|
+
* @returns A new PermutationBuilder instance
|
|
54
|
+
* @example
|
|
55
|
+
* ```ts
|
|
56
|
+
* // Split by ratios
|
|
57
|
+
* builder.splitSequential({ ratios: [0.8, 0.2] });
|
|
58
|
+
*
|
|
59
|
+
* // Split by counts
|
|
60
|
+
* builder.splitSequential({ counts: [800, 200] });
|
|
61
|
+
*
|
|
62
|
+
* // Split with fixed size
|
|
63
|
+
* builder.splitSequential({ fixed: 1000 });
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
66
|
+
splitSequential(options: SplitSequentialOptions): PermutationBuilder;
|
|
67
|
+
/**
|
|
68
|
+
* Configure calculated splits for the permutation.
|
|
69
|
+
*
|
|
70
|
+
* @param calculation - SQL expression for calculating splits
|
|
71
|
+
* @returns A new PermutationBuilder instance
|
|
72
|
+
* @example
|
|
73
|
+
* ```ts
|
|
74
|
+
* builder.splitCalculated("user_id % 3");
|
|
75
|
+
* ```
|
|
76
|
+
*/
|
|
77
|
+
splitCalculated(calculation: string): PermutationBuilder;
|
|
78
|
+
/**
|
|
79
|
+
* Configure shuffling for the permutation.
|
|
80
|
+
*
|
|
81
|
+
* @param options - Configuration for shuffling
|
|
82
|
+
* @returns A new PermutationBuilder instance
|
|
83
|
+
* @example
|
|
84
|
+
* ```ts
|
|
85
|
+
* // Basic shuffle
|
|
86
|
+
* builder.shuffle({ seed: 42 });
|
|
87
|
+
*
|
|
88
|
+
* // Shuffle with clump size
|
|
89
|
+
* builder.shuffle({ seed: 42, clumpSize: 10 });
|
|
90
|
+
* ```
|
|
91
|
+
*/
|
|
92
|
+
shuffle(options: ShuffleOptions): PermutationBuilder;
|
|
93
|
+
/**
|
|
94
|
+
* Configure filtering for the permutation.
|
|
95
|
+
*
|
|
96
|
+
* @param filter - SQL filter expression
|
|
97
|
+
* @returns A new PermutationBuilder instance
|
|
98
|
+
* @example
|
|
99
|
+
* ```ts
|
|
100
|
+
* builder.filter("age > 18 AND status = 'active'");
|
|
101
|
+
* ```
|
|
102
|
+
*/
|
|
103
|
+
filter(filter: string): PermutationBuilder;
|
|
104
|
+
/**
|
|
105
|
+
* Execute the permutation and create the destination table.
|
|
106
|
+
*
|
|
107
|
+
* @returns A Promise that resolves to the new Table instance
|
|
108
|
+
* @example
|
|
109
|
+
* ```ts
|
|
110
|
+
* const permutationTable = await builder.execute();
|
|
111
|
+
* console.log(`Created table: ${permutationTable.name}`);
|
|
112
|
+
* ```
|
|
113
|
+
*/
|
|
114
|
+
execute(): Promise<Table>;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Create a permutation builder for the given table.
|
|
118
|
+
*
|
|
119
|
+
* @param table - The source table to create a permutation from
|
|
120
|
+
* @returns A PermutationBuilder instance
|
|
121
|
+
* @example
|
|
122
|
+
* ```ts
|
|
123
|
+
* const builder = permutationBuilder(sourceTable, "training_data")
|
|
124
|
+
* .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
|
|
125
|
+
* .shuffle({ seed: 123 });
|
|
126
|
+
*
|
|
127
|
+
* const trainingTable = await builder.execute();
|
|
128
|
+
* ```
|
|
129
|
+
*/
|
|
130
|
+
export declare function permutationBuilder(table: Table): PermutationBuilder;
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.PermutationBuilder = void 0;
|
|
6
|
+
exports.permutationBuilder = permutationBuilder;
|
|
7
|
+
const native_js_1 = require("./native.js");
|
|
8
|
+
const table_1 = require("./table");
|
|
9
|
+
/**
|
|
10
|
+
* A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
|
|
11
|
+
*
|
|
12
|
+
* This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
|
|
13
|
+
* offering methods to configure data splits, shuffling, and filtering before executing
|
|
14
|
+
* the permutation to create a new table.
|
|
15
|
+
*/
|
|
16
|
+
class PermutationBuilder {
|
|
17
|
+
inner;
|
|
18
|
+
/**
|
|
19
|
+
* @hidden
|
|
20
|
+
*/
|
|
21
|
+
constructor(inner) {
|
|
22
|
+
this.inner = inner;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Configure random splits for the permutation.
|
|
26
|
+
*
|
|
27
|
+
* @param options - Configuration for random splitting
|
|
28
|
+
* @returns A new PermutationBuilder instance
|
|
29
|
+
* @example
|
|
30
|
+
* ```ts
|
|
31
|
+
* // Split by ratios
|
|
32
|
+
* builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
|
|
33
|
+
*
|
|
34
|
+
* // Split by counts
|
|
35
|
+
* builder.splitRandom({ counts: [1000, 500], seed: 42 });
|
|
36
|
+
*
|
|
37
|
+
* // Split with fixed size
|
|
38
|
+
* builder.splitRandom({ fixed: 100, seed: 42 });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
splitRandom(options) {
|
|
42
|
+
const newInner = this.inner.splitRandom(options);
|
|
43
|
+
return new PermutationBuilder(newInner);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Configure hash-based splits for the permutation.
|
|
47
|
+
*
|
|
48
|
+
* @param options - Configuration for hash-based splitting
|
|
49
|
+
* @returns A new PermutationBuilder instance
|
|
50
|
+
* @example
|
|
51
|
+
* ```ts
|
|
52
|
+
* builder.splitHash({
|
|
53
|
+
* columns: ["user_id"],
|
|
54
|
+
* splitWeights: [70, 30],
|
|
55
|
+
* discardWeight: 0
|
|
56
|
+
* });
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
splitHash(options) {
|
|
60
|
+
const newInner = this.inner.splitHash(options);
|
|
61
|
+
return new PermutationBuilder(newInner);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Configure sequential splits for the permutation.
|
|
65
|
+
*
|
|
66
|
+
* @param options - Configuration for sequential splitting
|
|
67
|
+
* @returns A new PermutationBuilder instance
|
|
68
|
+
* @example
|
|
69
|
+
* ```ts
|
|
70
|
+
* // Split by ratios
|
|
71
|
+
* builder.splitSequential({ ratios: [0.8, 0.2] });
|
|
72
|
+
*
|
|
73
|
+
* // Split by counts
|
|
74
|
+
* builder.splitSequential({ counts: [800, 200] });
|
|
75
|
+
*
|
|
76
|
+
* // Split with fixed size
|
|
77
|
+
* builder.splitSequential({ fixed: 1000 });
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
splitSequential(options) {
|
|
81
|
+
const newInner = this.inner.splitSequential(options);
|
|
82
|
+
return new PermutationBuilder(newInner);
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Configure calculated splits for the permutation.
|
|
86
|
+
*
|
|
87
|
+
* @param calculation - SQL expression for calculating splits
|
|
88
|
+
* @returns A new PermutationBuilder instance
|
|
89
|
+
* @example
|
|
90
|
+
* ```ts
|
|
91
|
+
* builder.splitCalculated("user_id % 3");
|
|
92
|
+
* ```
|
|
93
|
+
*/
|
|
94
|
+
splitCalculated(calculation) {
|
|
95
|
+
const newInner = this.inner.splitCalculated(calculation);
|
|
96
|
+
return new PermutationBuilder(newInner);
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Configure shuffling for the permutation.
|
|
100
|
+
*
|
|
101
|
+
* @param options - Configuration for shuffling
|
|
102
|
+
* @returns A new PermutationBuilder instance
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* // Basic shuffle
|
|
106
|
+
* builder.shuffle({ seed: 42 });
|
|
107
|
+
*
|
|
108
|
+
* // Shuffle with clump size
|
|
109
|
+
* builder.shuffle({ seed: 42, clumpSize: 10 });
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
shuffle(options) {
|
|
113
|
+
const newInner = this.inner.shuffle(options);
|
|
114
|
+
return new PermutationBuilder(newInner);
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Configure filtering for the permutation.
|
|
118
|
+
*
|
|
119
|
+
* @param filter - SQL filter expression
|
|
120
|
+
* @returns A new PermutationBuilder instance
|
|
121
|
+
* @example
|
|
122
|
+
* ```ts
|
|
123
|
+
* builder.filter("age > 18 AND status = 'active'");
|
|
124
|
+
* ```
|
|
125
|
+
*/
|
|
126
|
+
filter(filter) {
|
|
127
|
+
const newInner = this.inner.filter(filter);
|
|
128
|
+
return new PermutationBuilder(newInner);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Execute the permutation and create the destination table.
|
|
132
|
+
*
|
|
133
|
+
* @returns A Promise that resolves to the new Table instance
|
|
134
|
+
* @example
|
|
135
|
+
* ```ts
|
|
136
|
+
* const permutationTable = await builder.execute();
|
|
137
|
+
* console.log(`Created table: ${permutationTable.name}`);
|
|
138
|
+
* ```
|
|
139
|
+
*/
|
|
140
|
+
async execute() {
|
|
141
|
+
const nativeTable = await this.inner.execute();
|
|
142
|
+
return new table_1.LocalTable(nativeTable);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
exports.PermutationBuilder = PermutationBuilder;
|
|
146
|
+
/**
|
|
147
|
+
* Create a permutation builder for the given table.
|
|
148
|
+
*
|
|
149
|
+
* @param table - The source table to create a permutation from
|
|
150
|
+
* @returns A PermutationBuilder instance
|
|
151
|
+
* @example
|
|
152
|
+
* ```ts
|
|
153
|
+
* const builder = permutationBuilder(sourceTable, "training_data")
|
|
154
|
+
* .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
|
|
155
|
+
* .shuffle({ seed: 123 });
|
|
156
|
+
*
|
|
157
|
+
* const trainingTable = await builder.execute();
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
function permutationBuilder(table) {
|
|
161
|
+
// Extract the inner native table from the TypeScript wrapper
|
|
162
|
+
const localTable = table;
|
|
163
|
+
// Access inner through type assertion since it's private
|
|
164
|
+
const nativeBuilder = (0, native_js_1.permutationBuilder)(
|
|
165
|
+
// biome-ignore lint/suspicious/noExplicitAny: need access to private variable
|
|
166
|
+
localTable.inner);
|
|
167
|
+
return new PermutationBuilder(nativeBuilder);
|
|
168
|
+
}
|
package/dist/query.d.ts
CHANGED
|
@@ -161,6 +161,15 @@ export declare class QueryBase<NativeQueryType extends NativeQuery | NativeVecto
|
|
|
161
161
|
* @returns A query execution plan with runtime metrics for each step.
|
|
162
162
|
*/
|
|
163
163
|
analyzePlan(): Promise<string>;
|
|
164
|
+
/**
|
|
165
|
+
* Returns the schema of the output that will be returned by this query.
|
|
166
|
+
*
|
|
167
|
+
* This can be used to inspect the types and names of the columns that will be
|
|
168
|
+
* returned by the query before executing it.
|
|
169
|
+
*
|
|
170
|
+
* @returns An Arrow Schema describing the output columns.
|
|
171
|
+
*/
|
|
172
|
+
outputSchema(): Promise<import("./arrow").Schema>;
|
|
164
173
|
}
|
|
165
174
|
export declare class StandardQueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery> extends QueryBase<NativeQueryType> implements ExecutableQuery {
|
|
166
175
|
constructor(inner: NativeQueryType | Promise<NativeQueryType>);
|
package/dist/query.js
CHANGED
|
@@ -256,6 +256,25 @@ class QueryBase {
|
|
|
256
256
|
return this.inner.analyzePlan();
|
|
257
257
|
}
|
|
258
258
|
}
|
|
259
|
+
/**
|
|
260
|
+
* Returns the schema of the output that will be returned by this query.
|
|
261
|
+
*
|
|
262
|
+
* This can be used to inspect the types and names of the columns that will be
|
|
263
|
+
* returned by the query before executing it.
|
|
264
|
+
*
|
|
265
|
+
* @returns An Arrow Schema describing the output columns.
|
|
266
|
+
*/
|
|
267
|
+
async outputSchema() {
|
|
268
|
+
let schemaBuffer;
|
|
269
|
+
if (this.inner instanceof Promise) {
|
|
270
|
+
schemaBuffer = await this.inner.then((inner) => inner.outputSchema());
|
|
271
|
+
}
|
|
272
|
+
else {
|
|
273
|
+
schemaBuffer = await this.inner.outputSchema();
|
|
274
|
+
}
|
|
275
|
+
const schema = (0, arrow_1.tableFromIPC)(schemaBuffer).schema;
|
|
276
|
+
return schema;
|
|
277
|
+
}
|
|
259
278
|
}
|
|
260
279
|
exports.QueryBase = QueryBase;
|
|
261
280
|
class StandardQueryBase extends QueryBase {
|
package/package.json
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
"ann"
|
|
12
12
|
],
|
|
13
13
|
"private": false,
|
|
14
|
-
"version": "0.22.
|
|
14
|
+
"version": "0.22.3-beta.1",
|
|
15
15
|
"main": "dist/index.js",
|
|
16
16
|
"exports": {
|
|
17
17
|
".": "./dist/index.js",
|
|
@@ -100,14 +100,14 @@
|
|
|
100
100
|
"reflect-metadata": "^0.2.2"
|
|
101
101
|
},
|
|
102
102
|
"optionalDependencies": {
|
|
103
|
-
"@lancedb/lancedb-darwin-x64": "0.22.
|
|
104
|
-
"@lancedb/lancedb-darwin-arm64": "0.22.
|
|
105
|
-
"@lancedb/lancedb-linux-x64-gnu": "0.22.
|
|
106
|
-
"@lancedb/lancedb-linux-arm64-gnu": "0.22.
|
|
107
|
-
"@lancedb/lancedb-linux-x64-musl": "0.22.
|
|
108
|
-
"@lancedb/lancedb-linux-arm64-musl": "0.22.
|
|
109
|
-
"@lancedb/lancedb-win32-x64-msvc": "0.22.
|
|
110
|
-
"@lancedb/lancedb-win32-arm64-msvc": "0.22.
|
|
103
|
+
"@lancedb/lancedb-darwin-x64": "0.22.3-beta.1",
|
|
104
|
+
"@lancedb/lancedb-darwin-arm64": "0.22.3-beta.1",
|
|
105
|
+
"@lancedb/lancedb-linux-x64-gnu": "0.22.3-beta.1",
|
|
106
|
+
"@lancedb/lancedb-linux-arm64-gnu": "0.22.3-beta.1",
|
|
107
|
+
"@lancedb/lancedb-linux-x64-musl": "0.22.3-beta.1",
|
|
108
|
+
"@lancedb/lancedb-linux-arm64-musl": "0.22.3-beta.1",
|
|
109
|
+
"@lancedb/lancedb-win32-x64-msvc": "0.22.3-beta.1",
|
|
110
|
+
"@lancedb/lancedb-win32-arm64-msvc": "0.22.3-beta.1"
|
|
111
111
|
},
|
|
112
112
|
"peerDependencies": {
|
|
113
113
|
"apache-arrow": ">=15.0.0 <=18.1.0"
|