@lancedb/lancedb 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arrow.d.ts +5 -3
- package/dist/arrow.js +1 -1
- package/dist/embedding/embedding_function.d.ts +4 -3
- package/dist/embedding/index.d.ts +1 -0
- package/dist/embedding/index.js +1 -0
- package/dist/embedding/registry.d.ts +9 -7
- package/dist/embedding/registry.js +24 -6
- package/dist/embedding/transformers.d.ts +37 -0
- package/dist/embedding/transformers.js +147 -0
- package/dist/query.js +15 -9
- package/dist/remote/client.d.ts +1 -1
- package/dist/remote/client.js +6 -8
- package/dist/remote/connection.d.ts +2 -3
- package/dist/remote/connection.js +2 -2
- package/dist/table.d.ts +3 -0
- package/dist/table.js +1 -1
- package/package.json +17 -14
- package/Cargo.toml +0 -28
- package/biome.json +0 -158
- package/build.rs +0 -5
- package/dist/native.d.ts +0 -208
- package/examples/ann_indexes.ts +0 -49
- package/examples/basic.ts +0 -149
- package/examples/embedding.ts +0 -83
- package/examples/filtering.ts +0 -34
- package/examples/jsconfig.json +0 -27
- package/examples/package-lock.json +0 -79
- package/examples/package.json +0 -18
- package/examples/search.ts +0 -37
- package/jest.config.js +0 -7
- package/lancedb/arrow.ts +0 -947
- package/lancedb/connection.ts +0 -333
- package/lancedb/embedding/embedding_function.ts +0 -194
- package/lancedb/embedding/index.ts +0 -113
- package/lancedb/embedding/openai.ts +0 -113
- package/lancedb/embedding/registry.ts +0 -188
- package/lancedb/index.ts +0 -142
- package/lancedb/indices.ts +0 -203
- package/lancedb/merge.ts +0 -70
- package/lancedb/query.ts +0 -507
- package/lancedb/remote/client.ts +0 -221
- package/lancedb/remote/connection.ts +0 -201
- package/lancedb/remote/index.ts +0 -3
- package/lancedb/remote/table.ts +0 -226
- package/lancedb/sanitize.ts +0 -588
- package/lancedb/table.ts +0 -669
- package/lancedb/util.ts +0 -69
- package/native.d.ts +0 -208
- package/nodejs-artifacts/arrow.d.ts +0 -250
- package/nodejs-artifacts/arrow.js +0 -768
- package/nodejs-artifacts/connection.d.ts +0 -171
- package/nodejs-artifacts/connection.js +0 -135
- package/nodejs-artifacts/embedding/embedding_function.d.ts +0 -79
- package/nodejs-artifacts/embedding/embedding_function.js +0 -112
- package/nodejs-artifacts/embedding/index.d.ts +0 -28
- package/nodejs-artifacts/embedding/index.js +0 -114
- package/nodejs-artifacts/embedding/openai.d.ts +0 -18
- package/nodejs-artifacts/embedding/openai.js +0 -105
- package/nodejs-artifacts/embedding/registry.d.ts +0 -53
- package/nodejs-artifacts/embedding/registry.js +0 -127
- package/nodejs-artifacts/index.d.ts +0 -55
- package/nodejs-artifacts/index.js +0 -57
- package/nodejs-artifacts/indices.d.ts +0 -165
- package/nodejs-artifacts/indices.js +0 -71
- package/nodejs-artifacts/merge.d.ts +0 -54
- package/nodejs-artifacts/merge.js +0 -64
- package/nodejs-artifacts/native.d.ts +0 -208
- package/nodejs-artifacts/native.js +0 -330
- package/nodejs-artifacts/query.d.ts +0 -283
- package/nodejs-artifacts/query.js +0 -448
- package/nodejs-artifacts/remote/client.d.ts +0 -28
- package/nodejs-artifacts/remote/client.js +0 -172
- package/nodejs-artifacts/remote/connection.d.ts +0 -25
- package/nodejs-artifacts/remote/connection.js +0 -110
- package/nodejs-artifacts/remote/index.d.ts +0 -3
- package/nodejs-artifacts/remote/index.js +0 -9
- package/nodejs-artifacts/remote/table.d.ts +0 -42
- package/nodejs-artifacts/remote/table.js +0 -179
- package/nodejs-artifacts/sanitize.d.ts +0 -31
- package/nodejs-artifacts/sanitize.js +0 -436
- package/nodejs-artifacts/table.d.ts +0 -395
- package/nodejs-artifacts/table.js +0 -230
- package/nodejs-artifacts/util.d.ts +0 -14
- package/nodejs-artifacts/util.js +0 -65
- package/tsconfig.json +0 -25
- package/typedoc.json +0 -10
package/lancedb/util.ts
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
export type IntoSql =
|
|
2
|
-
| string
|
|
3
|
-
| number
|
|
4
|
-
| boolean
|
|
5
|
-
| null
|
|
6
|
-
| Date
|
|
7
|
-
| ArrayBufferLike
|
|
8
|
-
| Buffer
|
|
9
|
-
| IntoSql[];
|
|
10
|
-
|
|
11
|
-
export function toSQL(value: IntoSql): string {
|
|
12
|
-
if (typeof value === "string") {
|
|
13
|
-
return `'${value.replace(/'/g, "''")}'`;
|
|
14
|
-
} else if (typeof value === "number") {
|
|
15
|
-
return value.toString();
|
|
16
|
-
} else if (typeof value === "boolean") {
|
|
17
|
-
return value ? "TRUE" : "FALSE";
|
|
18
|
-
} else if (value === null) {
|
|
19
|
-
return "NULL";
|
|
20
|
-
} else if (value instanceof Date) {
|
|
21
|
-
return `'${value.toISOString()}'`;
|
|
22
|
-
} else if (Array.isArray(value)) {
|
|
23
|
-
return `[${value.map(toSQL).join(", ")}]`;
|
|
24
|
-
} else if (Buffer.isBuffer(value)) {
|
|
25
|
-
return `X'${value.toString("hex")}'`;
|
|
26
|
-
} else if (value instanceof ArrayBuffer) {
|
|
27
|
-
return `X'${Buffer.from(value).toString("hex")}'`;
|
|
28
|
-
} else {
|
|
29
|
-
throw new Error(
|
|
30
|
-
`Unsupported value type: ${typeof value} value: (${value})`,
|
|
31
|
-
);
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
export class TTLCache {
|
|
36
|
-
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
37
|
-
private readonly cache: Map<string, { value: any; expires: number }>;
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* @param ttl Time to live in milliseconds
|
|
41
|
-
*/
|
|
42
|
-
constructor(private readonly ttl: number) {
|
|
43
|
-
this.cache = new Map();
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
47
|
-
get(key: string): any | undefined {
|
|
48
|
-
const entry = this.cache.get(key);
|
|
49
|
-
if (entry === undefined) {
|
|
50
|
-
return undefined;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
if (entry.expires < Date.now()) {
|
|
54
|
-
this.cache.delete(key);
|
|
55
|
-
return undefined;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
return entry.value;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
62
|
-
set(key: string, value: any): void {
|
|
63
|
-
this.cache.set(key, { value, expires: Date.now() + this.ttl });
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
delete(key: string): void {
|
|
67
|
-
this.cache.delete(key);
|
|
68
|
-
}
|
|
69
|
-
}
|
package/native.d.ts
DELETED
|
@@ -1,208 +0,0 @@
|
|
|
1
|
-
/* tslint:disable */
|
|
2
|
-
/* eslint-disable */
|
|
3
|
-
|
|
4
|
-
/* auto-generated by NAPI-RS */
|
|
5
|
-
|
|
6
|
-
/** A description of an index currently configured on a column */
|
|
7
|
-
export interface IndexConfig {
|
|
8
|
-
/** The name of the index */
|
|
9
|
-
name: string
|
|
10
|
-
/** The type of the index */
|
|
11
|
-
indexType: string
|
|
12
|
-
/**
|
|
13
|
-
* The columns in the index
|
|
14
|
-
*
|
|
15
|
-
* Currently this is always an array of size 1. In the future there may
|
|
16
|
-
* be more columns to represent composite indices.
|
|
17
|
-
*/
|
|
18
|
-
columns: Array<string>
|
|
19
|
-
}
|
|
20
|
-
/** Statistics about a compaction operation. */
|
|
21
|
-
export interface CompactionStats {
|
|
22
|
-
/** The number of fragments removed */
|
|
23
|
-
fragmentsRemoved: number
|
|
24
|
-
/** The number of new, compacted fragments added */
|
|
25
|
-
fragmentsAdded: number
|
|
26
|
-
/** The number of data files removed */
|
|
27
|
-
filesRemoved: number
|
|
28
|
-
/** The number of new, compacted data files added */
|
|
29
|
-
filesAdded: number
|
|
30
|
-
}
|
|
31
|
-
/** Statistics about a cleanup operation */
|
|
32
|
-
export interface RemovalStats {
|
|
33
|
-
/** The number of bytes removed */
|
|
34
|
-
bytesRemoved: number
|
|
35
|
-
/** The number of old versions removed */
|
|
36
|
-
oldVersionsRemoved: number
|
|
37
|
-
}
|
|
38
|
-
/** Statistics about an optimize operation */
|
|
39
|
-
export interface OptimizeStats {
|
|
40
|
-
/** Statistics about the compaction operation */
|
|
41
|
-
compaction: CompactionStats
|
|
42
|
-
/** Statistics about the removal operation */
|
|
43
|
-
prune: RemovalStats
|
|
44
|
-
}
|
|
45
|
-
/**
|
|
46
|
-
* A definition of a column alteration. The alteration changes the column at
|
|
47
|
-
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
|
48
|
-
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
|
49
|
-
* must be provided.
|
|
50
|
-
*/
|
|
51
|
-
export interface ColumnAlteration {
|
|
52
|
-
/**
|
|
53
|
-
* The path to the column to alter. This is a dot-separated path to the column.
|
|
54
|
-
* If it is a top-level column then it is just the name of the column. If it is
|
|
55
|
-
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
|
56
|
-
* `c` nested inside a column `b` nested inside a column `a`.
|
|
57
|
-
*/
|
|
58
|
-
path: string
|
|
59
|
-
/**
|
|
60
|
-
* The new name of the column. If not provided then the name will not be changed.
|
|
61
|
-
* This must be distinct from the names of all other columns in the table.
|
|
62
|
-
*/
|
|
63
|
-
rename?: string
|
|
64
|
-
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
|
|
65
|
-
nullable?: boolean
|
|
66
|
-
}
|
|
67
|
-
/** A definition of a new column to add to a table. */
|
|
68
|
-
export interface AddColumnsSql {
|
|
69
|
-
/** The name of the new column. */
|
|
70
|
-
name: string
|
|
71
|
-
/**
|
|
72
|
-
* The values to populate the new column with, as a SQL expression.
|
|
73
|
-
* The expression can reference other columns in the table.
|
|
74
|
-
*/
|
|
75
|
-
valueSql: string
|
|
76
|
-
}
|
|
77
|
-
export interface IndexStatistics {
|
|
78
|
-
/** The number of rows indexed by the index */
|
|
79
|
-
numIndexedRows: number
|
|
80
|
-
/** The number of rows not indexed */
|
|
81
|
-
numUnindexedRows: number
|
|
82
|
-
/** The type of the index */
|
|
83
|
-
indexType?: string
|
|
84
|
-
/** The metadata for each index */
|
|
85
|
-
indices: Array<IndexMetadata>
|
|
86
|
-
}
|
|
87
|
-
export interface IndexMetadata {
|
|
88
|
-
metricType?: string
|
|
89
|
-
indexType?: string
|
|
90
|
-
}
|
|
91
|
-
export interface ConnectionOptions {
|
|
92
|
-
/**
|
|
93
|
-
* (For LanceDB OSS only): The interval, in seconds, at which to check for
|
|
94
|
-
* updates to the table from other processes. If None, then consistency is not
|
|
95
|
-
* checked. For performance reasons, this is the default. For strong
|
|
96
|
-
* consistency, set this to zero seconds. Then every read will check for
|
|
97
|
-
* updates from other processes. As a compromise, you can set this to a
|
|
98
|
-
* non-zero value for eventual consistency. If more than that interval
|
|
99
|
-
* has passed since the last check, then the table will be checked for updates.
|
|
100
|
-
* Note: this consistency only applies to read operations. Write operations are
|
|
101
|
-
* always consistent.
|
|
102
|
-
*/
|
|
103
|
-
readConsistencyInterval?: number
|
|
104
|
-
/**
|
|
105
|
-
* (For LanceDB OSS only): configuration for object storage.
|
|
106
|
-
*
|
|
107
|
-
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
|
108
|
-
*/
|
|
109
|
-
storageOptions?: Record<string, string>
|
|
110
|
-
}
|
|
111
|
-
/** Write mode for writing a table. */
|
|
112
|
-
export const enum WriteMode {
|
|
113
|
-
Create = 'Create',
|
|
114
|
-
Append = 'Append',
|
|
115
|
-
Overwrite = 'Overwrite'
|
|
116
|
-
}
|
|
117
|
-
/** Write options when creating a Table. */
|
|
118
|
-
export interface WriteOptions {
|
|
119
|
-
/** Write mode for writing to a table. */
|
|
120
|
-
mode?: WriteMode
|
|
121
|
-
}
|
|
122
|
-
export interface OpenTableOptions {
|
|
123
|
-
storageOptions?: Record<string, string>
|
|
124
|
-
}
|
|
125
|
-
export class Connection {
|
|
126
|
-
/** Create a new Connection instance from the given URI. */
|
|
127
|
-
static new(uri: string, options: ConnectionOptions): Promise<Connection>
|
|
128
|
-
display(): string
|
|
129
|
-
isOpen(): boolean
|
|
130
|
-
close(): void
|
|
131
|
-
/** List all tables in the dataset. */
|
|
132
|
-
tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
|
|
133
|
-
/**
|
|
134
|
-
* Create table from a Apache Arrow IPC (file) buffer.
|
|
135
|
-
*
|
|
136
|
-
* Parameters:
|
|
137
|
-
* - name: The name of the table.
|
|
138
|
-
* - buf: The buffer containing the IPC file.
|
|
139
|
-
*
|
|
140
|
-
*/
|
|
141
|
-
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
142
|
-
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
|
143
|
-
openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
|
|
144
|
-
/** Drop table with the name. Or raise an error if the table does not exist. */
|
|
145
|
-
dropTable(name: string): Promise<void>
|
|
146
|
-
}
|
|
147
|
-
export class Index {
|
|
148
|
-
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
|
149
|
-
static btree(): Index
|
|
150
|
-
}
|
|
151
|
-
/** Typescript-style Async Iterator over RecordBatches */
|
|
152
|
-
export class RecordBatchIterator {
|
|
153
|
-
next(): Promise<Buffer | null>
|
|
154
|
-
}
|
|
155
|
-
/** A builder used to create and run a merge insert operation */
|
|
156
|
-
export class NativeMergeInsertBuilder {
|
|
157
|
-
whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder
|
|
158
|
-
whenNotMatchedInsertAll(): NativeMergeInsertBuilder
|
|
159
|
-
whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder
|
|
160
|
-
execute(buf: Buffer): Promise<void>
|
|
161
|
-
}
|
|
162
|
-
export class Query {
|
|
163
|
-
onlyIf(predicate: string): void
|
|
164
|
-
select(columns: Array<[string, string]>): void
|
|
165
|
-
limit(limit: number): void
|
|
166
|
-
nearestTo(vector: Float32Array): VectorQuery
|
|
167
|
-
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
168
|
-
explainPlan(verbose: boolean): Promise<string>
|
|
169
|
-
}
|
|
170
|
-
export class VectorQuery {
|
|
171
|
-
column(column: string): void
|
|
172
|
-
distanceType(distanceType: string): void
|
|
173
|
-
postfilter(): void
|
|
174
|
-
refineFactor(refineFactor: number): void
|
|
175
|
-
nprobes(nprobe: number): void
|
|
176
|
-
bypassVectorIndex(): void
|
|
177
|
-
onlyIf(predicate: string): void
|
|
178
|
-
select(columns: Array<[string, string]>): void
|
|
179
|
-
limit(limit: number): void
|
|
180
|
-
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
|
181
|
-
explainPlan(verbose: boolean): Promise<string>
|
|
182
|
-
}
|
|
183
|
-
export class Table {
|
|
184
|
-
name: string
|
|
185
|
-
display(): string
|
|
186
|
-
isOpen(): boolean
|
|
187
|
-
close(): void
|
|
188
|
-
/** Return Schema as empty Arrow IPC file. */
|
|
189
|
-
schema(): Promise<Buffer>
|
|
190
|
-
add(buf: Buffer, mode: string): Promise<void>
|
|
191
|
-
countRows(filter?: string | undefined | null): Promise<number>
|
|
192
|
-
delete(predicate: string): Promise<void>
|
|
193
|
-
createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
|
|
194
|
-
update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<void>
|
|
195
|
-
query(): Query
|
|
196
|
-
vectorSearch(vector: Float32Array): VectorQuery
|
|
197
|
-
addColumns(transforms: Array<AddColumnsSql>): Promise<void>
|
|
198
|
-
alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
|
|
199
|
-
dropColumns(columns: Array<string>): Promise<void>
|
|
200
|
-
version(): Promise<number>
|
|
201
|
-
checkout(version: number): Promise<void>
|
|
202
|
-
checkoutLatest(): Promise<void>
|
|
203
|
-
restore(): Promise<void>
|
|
204
|
-
optimize(olderThanMs?: number | undefined | null): Promise<OptimizeStats>
|
|
205
|
-
listIndices(): Promise<Array<IndexConfig>>
|
|
206
|
-
indexStats(indexName: string): Promise<IndexStatistics | null>
|
|
207
|
-
mergeInsert(on: Array<string>): NativeMergeInsertBuilder
|
|
208
|
-
}
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
/// <reference types="node" />
|
|
2
|
-
import { Table as ArrowTable, Binary, BufferType, DataType, Field, FixedSizeBinary, FixedSizeList, Float, Int, LargeBinary, List, Null, RecordBatch, Schema, Struct, Utf8 } from "apache-arrow";
|
|
3
|
-
import { Buffers } from "apache-arrow/data";
|
|
4
|
-
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
|
5
|
-
import { EmbeddingFunctionConfig } from "./embedding/registry";
|
|
6
|
-
export * from "apache-arrow";
|
|
7
|
-
export type SchemaLike = Schema | {
|
|
8
|
-
fields: FieldLike[];
|
|
9
|
-
metadata: Map<string, string>;
|
|
10
|
-
get names(): unknown[];
|
|
11
|
-
};
|
|
12
|
-
export type FieldLike = Field | {
|
|
13
|
-
type: string;
|
|
14
|
-
name: string;
|
|
15
|
-
nullable?: boolean;
|
|
16
|
-
metadata?: Map<string, string>;
|
|
17
|
-
};
|
|
18
|
-
export type DataLike = import("apache-arrow").Data<Struct<any>> | {
|
|
19
|
-
type: any;
|
|
20
|
-
length: number;
|
|
21
|
-
offset: number;
|
|
22
|
-
stride: number;
|
|
23
|
-
nullable: boolean;
|
|
24
|
-
children: DataLike[];
|
|
25
|
-
get nullCount(): number;
|
|
26
|
-
values: Buffers<any>[BufferType.DATA];
|
|
27
|
-
typeIds: Buffers<any>[BufferType.TYPE];
|
|
28
|
-
nullBitmap: Buffers<any>[BufferType.VALIDITY];
|
|
29
|
-
valueOffsets: Buffers<any>[BufferType.OFFSET];
|
|
30
|
-
};
|
|
31
|
-
export type RecordBatchLike = RecordBatch | {
|
|
32
|
-
schema: SchemaLike;
|
|
33
|
-
data: DataLike;
|
|
34
|
-
};
|
|
35
|
-
export type TableLike = ArrowTable | {
|
|
36
|
-
schema: SchemaLike;
|
|
37
|
-
batches: RecordBatchLike[];
|
|
38
|
-
};
|
|
39
|
-
export type IntoVector = Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
|
|
40
|
-
export declare function isArrowTable(value: object): value is TableLike;
|
|
41
|
-
export declare function isDataType(value: unknown): value is DataType;
|
|
42
|
-
export declare function isNull(value: unknown): value is Null;
|
|
43
|
-
export declare function isInt(value: unknown): value is Int;
|
|
44
|
-
export declare function isFloat(value: unknown): value is Float;
|
|
45
|
-
export declare function isBinary(value: unknown): value is Binary;
|
|
46
|
-
export declare function isLargeBinary(value: unknown): value is LargeBinary;
|
|
47
|
-
export declare function isUtf8(value: unknown): value is Utf8;
|
|
48
|
-
export declare function isLargeUtf8(value: unknown): value is Utf8;
|
|
49
|
-
export declare function isBool(value: unknown): value is Utf8;
|
|
50
|
-
export declare function isDecimal(value: unknown): value is Utf8;
|
|
51
|
-
export declare function isDate(value: unknown): value is Utf8;
|
|
52
|
-
export declare function isTime(value: unknown): value is Utf8;
|
|
53
|
-
export declare function isTimestamp(value: unknown): value is Utf8;
|
|
54
|
-
export declare function isInterval(value: unknown): value is Utf8;
|
|
55
|
-
export declare function isDuration(value: unknown): value is Utf8;
|
|
56
|
-
export declare function isList(value: unknown): value is List;
|
|
57
|
-
export declare function isStruct(value: unknown): value is Struct;
|
|
58
|
-
export declare function isUnion(value: unknown): value is Struct;
|
|
59
|
-
export declare function isFixedSizeBinary(value: unknown): value is FixedSizeBinary;
|
|
60
|
-
export declare function isFixedSizeList(value: unknown): value is FixedSizeList;
|
|
61
|
-
/** Data type accepted by NodeJS SDK */
|
|
62
|
-
export type Data = Record<string, unknown>[] | TableLike;
|
|
63
|
-
export declare class VectorColumnOptions {
|
|
64
|
-
/** Vector column type. */
|
|
65
|
-
type: Float;
|
|
66
|
-
constructor(values?: Partial<VectorColumnOptions>);
|
|
67
|
-
}
|
|
68
|
-
/** Options to control the makeArrowTable call. */
|
|
69
|
-
export declare class MakeArrowTableOptions {
|
|
70
|
-
schema?: SchemaLike;
|
|
71
|
-
vectorColumns: Record<string, VectorColumnOptions>;
|
|
72
|
-
embeddings?: EmbeddingFunction<unknown>;
|
|
73
|
-
embeddingFunction?: EmbeddingFunctionConfig;
|
|
74
|
-
/**
|
|
75
|
-
* If true then string columns will be encoded with dictionary encoding
|
|
76
|
-
*
|
|
77
|
-
* Set this to true if your string columns tend to repeat the same values
|
|
78
|
-
* often. For more precise control use the `schema` property to specify the
|
|
79
|
-
* data type for individual columns.
|
|
80
|
-
*
|
|
81
|
-
* If `schema` is provided then this property is ignored.
|
|
82
|
-
*/
|
|
83
|
-
dictionaryEncodeStrings: boolean;
|
|
84
|
-
constructor(values?: Partial<MakeArrowTableOptions>);
|
|
85
|
-
}
|
|
86
|
-
/**
|
|
87
|
-
* An enhanced version of the {@link makeTable} function from Apache Arrow
|
|
88
|
-
* that supports nested fields and embeddings columns.
|
|
89
|
-
*
|
|
90
|
-
* (typically you do not need to call this function. It will be called automatically
|
|
91
|
-
* when creating a table or adding data to it)
|
|
92
|
-
*
|
|
93
|
-
* This function converts an array of Record<String, any> (row-major JS objects)
|
|
94
|
-
* to an Arrow Table (a columnar structure)
|
|
95
|
-
*
|
|
96
|
-
* Note that it currently does not support nulls.
|
|
97
|
-
*
|
|
98
|
-
* If a schema is provided then it will be used to determine the resulting array
|
|
99
|
-
* types. Fields will also be reordered to fit the order defined by the schema.
|
|
100
|
-
*
|
|
101
|
-
* If a schema is not provided then the types will be inferred and the field order
|
|
102
|
-
* will be controlled by the order of properties in the first record. If a type
|
|
103
|
-
* is inferred it will always be nullable.
|
|
104
|
-
*
|
|
105
|
-
* If the input is empty then a schema must be provided to create an empty table.
|
|
106
|
-
*
|
|
107
|
-
* When a schema is not specified then data types will be inferred. The inference
|
|
108
|
-
* rules are as follows:
|
|
109
|
-
*
|
|
110
|
-
* - boolean => Bool
|
|
111
|
-
* - number => Float64
|
|
112
|
-
* - String => Utf8
|
|
113
|
-
* - Buffer => Binary
|
|
114
|
-
* - Record<String, any> => Struct
|
|
115
|
-
* - Array<any> => List
|
|
116
|
-
* @example
|
|
117
|
-
* import { fromTableToBuffer, makeArrowTable } from "../arrow";
|
|
118
|
-
* import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
|
|
119
|
-
*
|
|
120
|
-
* const schema = new Schema([
|
|
121
|
-
* new Field("a", new Int32()),
|
|
122
|
-
* new Field("b", new Float32()),
|
|
123
|
-
* new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
|
|
124
|
-
* ]);
|
|
125
|
-
* const table = makeArrowTable([
|
|
126
|
-
* { a: 1, b: 2, c: [1, 2, 3] },
|
|
127
|
-
* { a: 4, b: 5, c: [4, 5, 6] },
|
|
128
|
-
* { a: 7, b: 8, c: [7, 8, 9] },
|
|
129
|
-
* ], { schema });
|
|
130
|
-
* ```
|
|
131
|
-
*
|
|
132
|
-
* By default it assumes that the column named `vector` is a vector column
|
|
133
|
-
* and it will be converted into a fixed size list array of type float32.
|
|
134
|
-
* The `vectorColumns` option can be used to support other vector column
|
|
135
|
-
* names and data types.
|
|
136
|
-
*
|
|
137
|
-
* ```ts
|
|
138
|
-
*
|
|
139
|
-
* const schema = new Schema([
|
|
140
|
-
new Field("a", new Float64()),
|
|
141
|
-
new Field("b", new Float64()),
|
|
142
|
-
new Field(
|
|
143
|
-
"vector",
|
|
144
|
-
new FixedSizeList(3, new Field("item", new Float32()))
|
|
145
|
-
),
|
|
146
|
-
]);
|
|
147
|
-
const table = makeArrowTable([
|
|
148
|
-
{ a: 1, b: 2, vector: [1, 2, 3] },
|
|
149
|
-
{ a: 4, b: 5, vector: [4, 5, 6] },
|
|
150
|
-
{ a: 7, b: 8, vector: [7, 8, 9] },
|
|
151
|
-
]);
|
|
152
|
-
assert.deepEqual(table.schema, schema);
|
|
153
|
-
* ```
|
|
154
|
-
*
|
|
155
|
-
* You can specify the vector column types and names using the options as well
|
|
156
|
-
*
|
|
157
|
-
* ```typescript
|
|
158
|
-
*
|
|
159
|
-
* const schema = new Schema([
|
|
160
|
-
new Field('a', new Float64()),
|
|
161
|
-
new Field('b', new Float64()),
|
|
162
|
-
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
|
|
163
|
-
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
|
|
164
|
-
]);
|
|
165
|
-
* const table = makeArrowTable([
|
|
166
|
-
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
|
|
167
|
-
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
|
|
168
|
-
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
|
|
169
|
-
], {
|
|
170
|
-
vectorColumns: {
|
|
171
|
-
vec1: { type: new Float16() },
|
|
172
|
-
vec2: { type: new Float16() }
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
* assert.deepEqual(table.schema, schema)
|
|
176
|
-
* ```
|
|
177
|
-
*/
|
|
178
|
-
export declare function makeArrowTable(data: Array<Record<string, unknown>>, options?: Partial<MakeArrowTableOptions>, metadata?: Map<string, string>): ArrowTable;
|
|
179
|
-
/**
|
|
180
|
-
* Create an empty Arrow table with the provided schema
|
|
181
|
-
*/
|
|
182
|
-
export declare function makeEmptyTable(schema: SchemaLike, metadata?: Map<string, string>): ArrowTable;
|
|
183
|
-
/**
|
|
184
|
-
* Convert an Array of records into an Arrow Table, optionally applying an
|
|
185
|
-
* embeddings function to it.
|
|
186
|
-
*
|
|
187
|
-
* This function calls `makeArrowTable` first to create the Arrow Table.
|
|
188
|
-
* Any provided `makeTableOptions` (e.g. a schema) will be passed on to
|
|
189
|
-
* that call.
|
|
190
|
-
*
|
|
191
|
-
* The embedding function will be passed a column of values (based on the
|
|
192
|
-
* `sourceColumn` of the embedding function) and expects to receive back
|
|
193
|
-
* number[][] which will be converted into a fixed size list column. By
|
|
194
|
-
* default this will be a fixed size list of Float32 but that can be
|
|
195
|
-
* customized by the `embeddingDataType` property of the embedding function.
|
|
196
|
-
*
|
|
197
|
-
* If a schema is provided in `makeTableOptions` then it should include the
|
|
198
|
-
* embedding columns. If no schema is provded then embedding columns will
|
|
199
|
-
* be placed at the end of the table, after all of the input columns.
|
|
200
|
-
*/
|
|
201
|
-
export declare function convertToTable(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, makeTableOptions?: Partial<MakeArrowTableOptions>): Promise<ArrowTable>;
|
|
202
|
-
/** Creates the Arrow Type for a Vector column with dimension `dim` */
|
|
203
|
-
export declare function newVectorType<T extends Float>(dim: number, innerType: T): FixedSizeList<T>;
|
|
204
|
-
/**
|
|
205
|
-
* Serialize an Array of records into a buffer using the Arrow IPC File serialization
|
|
206
|
-
*
|
|
207
|
-
* This function will call `convertToTable` and pass on `embeddings` and `schema`
|
|
208
|
-
*
|
|
209
|
-
* `schema` is required if data is empty
|
|
210
|
-
*/
|
|
211
|
-
export declare function fromRecordsToBuffer(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
|
|
212
|
-
/**
|
|
213
|
-
* Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
|
|
214
|
-
*
|
|
215
|
-
* This function will call `convertToTable` and pass on `embeddings` and `schema`
|
|
216
|
-
*
|
|
217
|
-
* `schema` is required if data is empty
|
|
218
|
-
*/
|
|
219
|
-
export declare function fromRecordsToStreamBuffer(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
|
|
220
|
-
/**
|
|
221
|
-
* Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
|
|
222
|
-
*
|
|
223
|
-
* This function will apply `embeddings` to the table in a manner similar to
|
|
224
|
-
* `convertToTable`.
|
|
225
|
-
*
|
|
226
|
-
* `schema` is required if the table is empty
|
|
227
|
-
*/
|
|
228
|
-
export declare function fromTableToBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: SchemaLike): Promise<Buffer>;
|
|
229
|
-
/**
|
|
230
|
-
* Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
|
|
231
|
-
*
|
|
232
|
-
* This function will apply `embeddings` to the table in a manner similar to
|
|
233
|
-
* `convertToTable`.
|
|
234
|
-
*
|
|
235
|
-
* `schema` is required if the table is empty
|
|
236
|
-
*/
|
|
237
|
-
export declare function fromDataToBuffer(data: Data, embeddings?: EmbeddingFunctionConfig, schema?: Schema): Promise<Buffer>;
|
|
238
|
-
/**
|
|
239
|
-
* Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
|
|
240
|
-
*
|
|
241
|
-
* This function will apply `embeddings` to the table in a manner similar to
|
|
242
|
-
* `convertToTable`.
|
|
243
|
-
*
|
|
244
|
-
* `schema` is required if the table is empty
|
|
245
|
-
*/
|
|
246
|
-
export declare function fromTableToStreamBuffer(table: ArrowTable, embeddings?: EmbeddingFunctionConfig, schema?: SchemaLike): Promise<Buffer>;
|
|
247
|
-
/**
|
|
248
|
-
* Create an empty table with the given schema
|
|
249
|
-
*/
|
|
250
|
-
export declare function createEmptyTable(schema: Schema): ArrowTable;
|