@thi.ng/column-store 0.1.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -15
- package/api.d.ts +45 -3
- package/api.js +12 -0
- package/columns/acolumn.d.ts +12 -3
- package/columns/acolumn.js +10 -5
- package/columns/dict-tuple.d.ts +2 -0
- package/columns/dict-tuple.js +8 -2
- package/columns/dict.d.ts +2 -0
- package/columns/dict.js +25 -10
- package/columns/plain.d.ts +4 -3
- package/columns/plain.js +13 -6
- package/columns/tuple.d.ts +2 -1
- package/columns/tuple.js +6 -3
- package/columns/typedarray.d.ts +5 -3
- package/columns/typedarray.js +28 -29
- package/columns/vector.d.ts +25 -0
- package/columns/vector.js +92 -0
- package/internal/serialize.d.ts +8 -0
- package/internal/serialize.js +20 -1
- package/package.json +6 -3
- package/query.js +19 -22
- package/table.d.ts +2 -2
- package/table.js +28 -14
package/README.md
CHANGED
|
@@ -17,6 +17,8 @@
|
|
|
17
17
|
- [About](#about)
|
|
18
18
|
- [Column storage](#column-storage)
|
|
19
19
|
- [Column types](#column-types)
|
|
20
|
+
- [Vector column types](#vector-column-types)
|
|
21
|
+
- [Serialization options](#serialization-options)
|
|
20
22
|
- [Custom column types](#custom-column-types)
|
|
21
23
|
- [Cardinality](#cardinality)
|
|
22
24
|
- [Default values](#default-values)
|
|
@@ -61,7 +63,7 @@ delegates them to the columns.
|
|
|
61
63
|
An example table definition looks like this (explanation of column types in next
|
|
62
64
|
section below):
|
|
63
65
|
|
|
64
|
-
```ts
|
|
66
|
+
```ts tangle:export/readme-types.ts
|
|
65
67
|
import { Table, FLAG_DICT, FLAG_UNIQUE } from "@thi.ng/column-store";
|
|
66
68
|
|
|
67
69
|
// define a table with the given columns
|
|
@@ -79,7 +81,7 @@ const table = new Table({
|
|
|
79
81
|
aliases: { type: "str", cardinality: [0, 3] },
|
|
80
82
|
|
|
81
83
|
// required fixed size tuples (aka vectors) of numbers
|
|
82
|
-
latlon: { type: "
|
|
84
|
+
latlon: { type: "f32vec", cardinality: [2, 2] },
|
|
83
85
|
|
|
84
86
|
// optional tuples of max. 10 strings, with default
|
|
85
87
|
// the given flags (explained further below) are triggering:
|
|
@@ -103,6 +105,11 @@ table.addRow({
|
|
|
103
105
|
});
|
|
104
106
|
```
|
|
105
107
|
|
|
108
|
+
> [!IMPORTANT]
|
|
109
|
+
> Columns can be named freely, with the exception that the `__` name prefix is
|
|
110
|
+
> reserved for internal use. For example, `foo` is allowed, but `__foo` is a
|
|
111
|
+
> reserved name.
|
|
112
|
+
|
|
106
113
|
### Column types
|
|
107
114
|
|
|
108
115
|
The current built-in column types only support numeric or string values, though
|
|
@@ -127,7 +134,57 @@ Note: Booleans and `BigInt`s are still unsupported, but being worked on...
|
|
|
127
134
|
| `f32` | 32bit float | ❌ | ❌ |
|
|
128
135
|
| `f64` | 64bit float | ❌ | ❌ |
|
|
129
136
|
|
|
130
|
-
- <sup>(1)</sup> only if
|
|
137
|
+
- <sup>(1)</sup> only if max. cardinality is 1, [further information](#flag_rle)
|
|
138
|
+
|
|
139
|
+
### Vector column types
|
|
140
|
+
|
|
141
|
+
Columns storing fixed size n-dimensional vectors can be created vis the `vec`
|
|
142
|
+
suffix for any of the typedarray based column types, i.e. `u8vec`, `i16vec`,
|
|
143
|
+
`f32vec` etc.
|
|
144
|
+
|
|
145
|
+
The `cardinality` column config (in the form `[min,max]`) is interpreted as follows:
|
|
146
|
+
|
|
147
|
+
- if `min` is zero, the value is optional, but a `default` value MUST be defined
|
|
148
|
+
for the column. Otherwise the `min` value MUST be the same as `max`.
|
|
149
|
+
- `max` defines the actual vector size
|
|
150
|
+
|
|
151
|
+
Therefore, using a 3D vector as example, the only two possible `cardinality`
|
|
152
|
+
configs are: `[0,3]` (with default given) or `[3,3]`.
|
|
153
|
+
|
|
154
|
+
When [querying](#query-engine) vector columns using the standard
|
|
155
|
+
`(n)or`/`(n)and` operators, always the entire vector is matched (by value).
|
|
156
|
+
|
|
157
|
+
> [!IMPORTANT]
|
|
158
|
+
> For performance reasons, rows retrieved from vector columns contain mutable
|
|
159
|
+
> data views of the underlying column storage. That means when manipulating data
|
|
160
|
+
> in these views, the underlying data in the column would be changed too. To
|
|
161
|
+
> avoid index corruption, always edit only copies of this vector data and then
|
|
162
|
+
> use `table.updateRow()` to properly update the column storage (incl. any
|
|
163
|
+
> internal indexes).
|
|
164
|
+
|
|
165
|
+
### Serialization options
|
|
166
|
+
|
|
167
|
+
For `f32`, `f64`, `f32vec` and `f64vec` column types, the optional `prec` column
|
|
168
|
+
option can be provided to specify the number of fractional digits used in
|
|
169
|
+
the JSON serialization:
|
|
170
|
+
|
|
171
|
+
```ts tangle:export/readme-serialize-prec.ts
|
|
172
|
+
import { Table } from "@thi.ng/column-store";
|
|
173
|
+
|
|
174
|
+
const table = new Table({
|
|
175
|
+
vec: { type: "f32vec", cardinality: [3, 3], opts: { prec: 2 } }
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
table.addRow({ vec: [1.11111, 22.22222, 333.33333]});
|
|
179
|
+
|
|
180
|
+
console.log(JSON.stringify(table));
|
|
181
|
+
// {
|
|
182
|
+
// "schema":{"vec":{"cardinality":[3,3],"flags":0,"type":"f32vec","opts":{"prec":2}}},
|
|
183
|
+
// "columns":{"vec":{"values":[1.11,22.22,333.33]}},
|
|
184
|
+
// "length":1
|
|
185
|
+
// }
|
|
186
|
+
|
|
187
|
+
```
|
|
131
188
|
|
|
132
189
|
### Custom column types
|
|
133
190
|
|
|
@@ -213,15 +270,18 @@ Note: Not supported by typedarray-backed column types.
|
|
|
213
270
|
|
|
214
271
|
(Value: 0x08)
|
|
215
272
|
|
|
216
|
-
This flag enables
|
|
273
|
+
This flag enables [Run-length encoding](https://thi.ng/rle-pack) in the
|
|
217
274
|
JSON serialization of a column, potentially leading to dramatic file size
|
|
218
275
|
savings, esp. for dictionary-based data.
|
|
219
276
|
|
|
220
|
-
|
|
277
|
+
Two modes of RLE compression are supported, depending on column type:
|
|
278
|
+
|
|
279
|
+
- Simple RLE merely stores arrays in `[value1, count1, value2, count2...]` form
|
|
280
|
+
- Binary RLE uses more advanced bitwise encoding, but is only available for
|
|
281
|
+
typedarray and vector columns
|
|
221
282
|
|
|
222
|
-
-
|
|
223
|
-
|
|
224
|
-
default value **must** be supplied)
|
|
283
|
+
Tuple-based columns do not support RLE and will throw an error at creation time
|
|
284
|
+
when trying to use this flag.
|
|
225
285
|
|
|
226
286
|
#### Custom flags
|
|
227
287
|
|
|
@@ -256,8 +316,8 @@ TODO see code examples below
|
|
|
256
316
|
|
|
257
317
|
The query engine works by applying a number of [query
|
|
258
318
|
terms](https://docs.thi.ng/umbrella/column-store/interfaces/QueryTerm.html) in
|
|
259
|
-
series, with each step intersecting
|
|
260
|
-
step(s), thereby narrowing down the result set.
|
|
319
|
+
series, with each step intersecting (aka logical AND) its results with the
|
|
320
|
+
results of the previous step(s), thereby narrowing down the result set.
|
|
261
321
|
|
|
262
322
|
By default, individual query terms operate on a single column, but can also can
|
|
263
323
|
also apply to multiple. Terms are supplied either as array given to the
|
|
@@ -352,7 +412,7 @@ For Node.js REPL:
|
|
|
352
412
|
const cs = await import("@thi.ng/column-store");
|
|
353
413
|
```
|
|
354
414
|
|
|
355
|
-
Package sizes (brotli'd, pre-treeshake): ESM: 4.
|
|
415
|
+
Package sizes (brotli'd, pre-treeshake): ESM: 4.67 KB
|
|
356
416
|
|
|
357
417
|
## Dependencies
|
|
358
418
|
|
|
@@ -411,19 +471,20 @@ table.addRows([
|
|
|
411
471
|
const unsortedImages = table.query().where("type", "img").and("tags", "unsorted");
|
|
412
472
|
|
|
413
473
|
// queries are iterables and only execute when the iterator is consumed
|
|
474
|
+
// each query result includes a `__row` ID
|
|
414
475
|
console.log([...unsortedImages]);
|
|
415
|
-
// [ { id: 102, type: "img", tags: [ "unsorted" ] } ]
|
|
476
|
+
// [ { id: 102, type: "img", tags: [ "unsorted" ], __row: 2 } ]
|
|
416
477
|
|
|
417
478
|
// select items with `a` OR `b` tags, intersect with those which have `c` AND `d` tags
|
|
418
479
|
const complexTagQuery = table.query().or("tags", ["a", "b"]).and("tags", ["c", "d"]);
|
|
419
480
|
console.log([...complexTagQuery]);
|
|
420
|
-
// [ { id: 104, type: "img", tags: [ "b", "c", "d" ] } ]
|
|
481
|
+
// [ { id: 104, type: "img", tags: [ "b", "c", "d" ], __row: 4 } ]
|
|
421
482
|
|
|
422
483
|
// query using custom predicates
|
|
423
484
|
console.log([...table.query().matchColumn("id", (id) => id > 102)]);
|
|
424
485
|
// [
|
|
425
|
-
// { id: 103, type: "img", tags: [ "unsorted" ] },
|
|
426
|
-
// { id: 104, type: "img", tags: [ "b", "c", "d" ] }
|
|
486
|
+
// { id: 103, type: "img", tags: [ "unsorted" ], __row: 3 },
|
|
487
|
+
// { id: 104, type: "img", tags: [ "b", "c", "d" ], __row: 4 }
|
|
427
488
|
// ]
|
|
428
489
|
|
|
429
490
|
// serialize table to JSON
|
package/api.d.ts
CHANGED
|
@@ -1,15 +1,54 @@
|
|
|
1
|
-
import type { FloatType, Fn3, IntType, Maybe,
|
|
1
|
+
import type { FloatType, Fn3, IntType, Maybe, UintType } from "@thi.ng/api";
|
|
2
2
|
import type { BitmapIndex } from "./bitmap.js";
|
|
3
3
|
import type { QueryCtx } from "./query.js";
|
|
4
4
|
import type { Table } from "./table.js";
|
|
5
5
|
export type ColumnSchema = Record<string, ColumnSpec>;
|
|
6
6
|
export type NumericType = IntType | UintType | FloatType;
|
|
7
|
+
export type VectorType = `${NumericType}vec`;
|
|
7
8
|
export type Cardinality = [number, number];
|
|
8
9
|
export interface ColumnSpec {
|
|
9
|
-
|
|
10
|
+
/**
|
|
11
|
+
* Column type ID (see readme for overview)
|
|
12
|
+
*/
|
|
13
|
+
type: NumericType | VectorType | "num" | "str" | string;
|
|
14
|
+
/**
|
|
15
|
+
* `[min,max]` number of allowed values per row. The following cardinality
|
|
16
|
+
* presets are available in general (but not for all column types, see
|
|
17
|
+
* readme for overview):
|
|
18
|
+
*
|
|
19
|
+
* - {@link REQUIRED}: [1,1] (default) — value is required
|
|
20
|
+
* - {@link OPTIONAL}: [0,1] — value is optional
|
|
21
|
+
* - {@link ZERO_PLUS}: [0, (2**32)-1] — zero or more values
|
|
22
|
+
* - {@link ONE_PLUS}: [1, (2**32)-1] — one or more values
|
|
23
|
+
*
|
|
24
|
+
* Note: Some column types always require a value. So when using
|
|
25
|
+
* {@link OPTIONAL}, you might also need to provide a
|
|
26
|
+
* {@link ColumnSpec.default} value.
|
|
27
|
+
*/
|
|
10
28
|
cardinality: Cardinality;
|
|
29
|
+
/**
|
|
30
|
+
* Bit mask to control column behavior/encoding. The lowest 16 bits are
|
|
31
|
+
* reserved for built-in column types and internal use. The upper 16 bits
|
|
32
|
+
* are freely usable for custom purposes.
|
|
33
|
+
*
|
|
34
|
+
* @remarks
|
|
35
|
+
* The following built-in flags are available in general (but not for all
|
|
36
|
+
* column types, see readme for overview):
|
|
37
|
+
*
|
|
38
|
+
* - {@link FLAG_BITMAP}: Enable bitmap indexing
|
|
39
|
+
* - {@link FLAG_DICT}: Enable dictionary encoding of values
|
|
40
|
+
* - {@link FLAG_UNIQUE}: Enable Set-semantics, i.e. unique values per tuple
|
|
41
|
+
* - {@link FLAG_RLE}: Enable run-length encoding in serialization
|
|
42
|
+
*/
|
|
11
43
|
flags: number;
|
|
44
|
+
/**
|
|
45
|
+
* Default value
|
|
46
|
+
*/
|
|
12
47
|
default?: any;
|
|
48
|
+
/**
|
|
49
|
+
* Columntype specific options (e.g. for serialization)
|
|
50
|
+
*/
|
|
51
|
+
opts?: Record<string, any>;
|
|
13
52
|
}
|
|
14
53
|
export interface ColumnTypeSpec {
|
|
15
54
|
/**
|
|
@@ -42,8 +81,9 @@ export declare const FLAG_DICT: number;
|
|
|
42
81
|
export declare const FLAG_BITMAP: number;
|
|
43
82
|
export declare const FLAG_UNIQUE: number;
|
|
44
83
|
export declare const FLAG_RLE: number;
|
|
84
|
+
/** @internal */
|
|
85
|
+
export declare const LIMITS: Record<NumericType, [number, number]>;
|
|
45
86
|
export interface IColumn {
|
|
46
|
-
values: any[] | TypedArray;
|
|
47
87
|
bitmap?: BitmapIndex;
|
|
48
88
|
readonly isArray: boolean;
|
|
49
89
|
load(spec: SerializedColumn): void;
|
|
@@ -65,6 +105,8 @@ export interface IColumn {
|
|
|
65
105
|
encode(value: any): any;
|
|
66
106
|
decode(value: any): any;
|
|
67
107
|
replaceValue(currValue: any, newValue: any): boolean;
|
|
108
|
+
getRowKey(i: number): any;
|
|
109
|
+
valueKey(value: any): any;
|
|
68
110
|
}
|
|
69
111
|
export interface SerializedTable {
|
|
70
112
|
schema: ColumnSchema;
|
package/api.js
CHANGED
|
@@ -6,11 +6,23 @@ const FLAG_DICT = 1 << 0;
|
|
|
6
6
|
const FLAG_BITMAP = 1 << 1;
|
|
7
7
|
const FLAG_UNIQUE = 1 << 2;
|
|
8
8
|
const FLAG_RLE = 1 << 3;
|
|
9
|
+
const LIMITS = {
|
|
10
|
+
u8: [0, 255],
|
|
11
|
+
u8c: [0, 255],
|
|
12
|
+
u16: [0, 65535],
|
|
13
|
+
u32: [0, 4294967295],
|
|
14
|
+
i8: [-128, 127],
|
|
15
|
+
i16: [-32768, 32767],
|
|
16
|
+
i32: [-2147483648, 2147483647],
|
|
17
|
+
f32: [-Infinity, Infinity],
|
|
18
|
+
f64: [-Infinity, Infinity]
|
|
19
|
+
};
|
|
9
20
|
export {
|
|
10
21
|
FLAG_BITMAP,
|
|
11
22
|
FLAG_DICT,
|
|
12
23
|
FLAG_RLE,
|
|
13
24
|
FLAG_UNIQUE,
|
|
25
|
+
LIMITS,
|
|
14
26
|
ONE_PLUS,
|
|
15
27
|
OPTIONAL,
|
|
16
28
|
REQUIRED,
|
package/columns/acolumn.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import type { BidirIndex } from "@thi.ng/bidir-index";
|
|
2
|
-
import { type ColumnSpec, type SerializedIndex } from "../api.js";
|
|
2
|
+
import { type ColumnSpec, type IColumn, type SerializedColumn, type SerializedIndex } from "../api.js";
|
|
3
3
|
import { BitmapIndex } from "../bitmap.js";
|
|
4
4
|
import type { Table } from "../table.js";
|
|
5
|
-
export declare abstract class AColumn {
|
|
5
|
+
export declare abstract class AColumn implements IColumn {
|
|
6
6
|
readonly id: string;
|
|
7
7
|
table: Table;
|
|
8
8
|
spec: ColumnSpec;
|
|
@@ -10,10 +10,19 @@ export declare abstract class AColumn {
|
|
|
10
10
|
dict?: BidirIndex<any>;
|
|
11
11
|
abstract isArray: boolean;
|
|
12
12
|
constructor(id: string, table: Table);
|
|
13
|
+
abstract load(spec: SerializedColumn): void;
|
|
14
|
+
reindex(): void;
|
|
15
|
+
abstract validate(value: any): boolean;
|
|
16
|
+
abstract setRow(i: number, value: any): void;
|
|
17
|
+
abstract removeRow(i: number): void;
|
|
18
|
+
abstract replaceValue(currValue: any, newValue: any): boolean;
|
|
19
|
+
abstract valueKey(x: any): any;
|
|
20
|
+
abstract getRow(i: number): any;
|
|
21
|
+
abstract getRowKey(i: number): any;
|
|
13
22
|
encode(value: any): any;
|
|
14
23
|
decode(value: any): any;
|
|
15
24
|
protected loadDict(serialized: SerializedIndex): void;
|
|
16
|
-
protected updateBitmap(
|
|
25
|
+
protected updateBitmap(): void;
|
|
17
26
|
protected ensureValue(val: any): any;
|
|
18
27
|
protected ensureBitmap(): void;
|
|
19
28
|
}
|
package/columns/acolumn.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { illegalArgs } from "@thi.ng/errors/illegal-arguments";
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
FLAG_BITMAP
|
|
4
|
+
} from "../api.js";
|
|
3
5
|
import { BitmapIndex } from "../bitmap.js";
|
|
4
6
|
class AColumn {
|
|
5
7
|
constructor(id, table) {
|
|
@@ -11,6 +13,9 @@ class AColumn {
|
|
|
11
13
|
spec;
|
|
12
14
|
bitmap;
|
|
13
15
|
dict;
|
|
16
|
+
reindex() {
|
|
17
|
+
this.updateBitmap();
|
|
18
|
+
}
|
|
14
19
|
encode(value) {
|
|
15
20
|
return value;
|
|
16
21
|
}
|
|
@@ -30,17 +35,17 @@ class AColumn {
|
|
|
30
35
|
}
|
|
31
36
|
dict.nextID = serialized.next;
|
|
32
37
|
}
|
|
33
|
-
updateBitmap(
|
|
38
|
+
updateBitmap() {
|
|
34
39
|
this.ensureBitmap();
|
|
35
40
|
const { bitmap, isArray } = this;
|
|
36
41
|
if (!bitmap) return;
|
|
37
42
|
bitmap.clear();
|
|
38
|
-
for (let i = 0; i <
|
|
39
|
-
const value =
|
|
43
|
+
for (let i = 0, n = this.table.length; i < n; i++) {
|
|
44
|
+
const value = this.getRow(i);
|
|
40
45
|
if (value == null) continue;
|
|
41
46
|
if (isArray) {
|
|
42
47
|
for (let x of value) bitmap.setBit(x, i);
|
|
43
|
-
} else bitmap.setBit(
|
|
48
|
+
} else bitmap.setBit(this.getRowKey(i), i);
|
|
44
49
|
}
|
|
45
50
|
}
|
|
46
51
|
ensureValue(val) {
|
package/columns/dict-tuple.d.ts
CHANGED
|
@@ -12,6 +12,8 @@ export declare class DictTupleColumn extends AColumn implements IColumn {
|
|
|
12
12
|
validate(value: any): boolean;
|
|
13
13
|
setRow(i: number, value: any[]): void;
|
|
14
14
|
getRow(i: number): any[] | null;
|
|
15
|
+
getRowKey(i: number): number[] | null;
|
|
16
|
+
valueKey(value: any): (number | null)[];
|
|
15
17
|
removeRow(i: number): void;
|
|
16
18
|
replaceValue(currValue: any, newValue: any): boolean;
|
|
17
19
|
toJSON(): {
|
package/columns/dict-tuple.js
CHANGED
|
@@ -11,7 +11,7 @@ class DictTupleColumn extends AColumn {
|
|
|
11
11
|
load({ dict, values }) {
|
|
12
12
|
this.values = values;
|
|
13
13
|
super.loadDict(dict);
|
|
14
|
-
super.updateBitmap(
|
|
14
|
+
super.updateBitmap();
|
|
15
15
|
}
|
|
16
16
|
reindex() {
|
|
17
17
|
const dict = this.dict;
|
|
@@ -20,7 +20,7 @@ class DictTupleColumn extends AColumn {
|
|
|
20
20
|
(ids) => ids ? newDict.addAll(dict.getAllIDs(ids)) : null
|
|
21
21
|
);
|
|
22
22
|
this.dict = newDict;
|
|
23
|
-
super.updateBitmap(
|
|
23
|
+
super.updateBitmap();
|
|
24
24
|
}
|
|
25
25
|
encode(value) {
|
|
26
26
|
return this.dict.getAll(isArray(value) ? value : [value], false, true);
|
|
@@ -45,6 +45,12 @@ class DictTupleColumn extends AColumn {
|
|
|
45
45
|
const values = this.values[i];
|
|
46
46
|
return values != null ? this.dict.getAllIDs(values) : null;
|
|
47
47
|
}
|
|
48
|
+
getRowKey(i) {
|
|
49
|
+
return this.values[i];
|
|
50
|
+
}
|
|
51
|
+
valueKey(value) {
|
|
52
|
+
return this.encode(value);
|
|
53
|
+
}
|
|
48
54
|
removeRow(i) {
|
|
49
55
|
this.values.splice(i, 1);
|
|
50
56
|
this.bitmap?.removeBit(i);
|
package/columns/dict.d.ts
CHANGED
|
@@ -12,6 +12,8 @@ export declare class DictColumn extends AColumn implements IColumn {
|
|
|
12
12
|
validate(value: any): boolean;
|
|
13
13
|
setRow(i: number, value: any): void;
|
|
14
14
|
getRow(i: number): any;
|
|
15
|
+
getRowKey(i: number): number | null;
|
|
16
|
+
valueKey(value: any): number | number[] | undefined;
|
|
15
17
|
removeRow(i: number): void;
|
|
16
18
|
replaceValue(currValue: any, newValue: any): boolean;
|
|
17
19
|
toJSON(): {
|
package/columns/dict.js
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
import { BidirIndex } from "@thi.ng/bidir-index";
|
|
2
|
-
import {
|
|
2
|
+
import { decodeBinary, encodeBinary } from "@thi.ng/rle-pack/binary";
|
|
3
|
+
import { decodeSimple, encodeSimple } from "@thi.ng/rle-pack/simple";
|
|
3
4
|
import { FLAG_RLE } from "../api.js";
|
|
4
5
|
import { __validateValue } from "../internal/checks.js";
|
|
5
6
|
import { __serializeDict } from "../internal/serialize.js";
|
|
6
7
|
import { AColumn } from "./acolumn.js";
|
|
8
|
+
import { isArray } from "@thi.ng/checks/is-array";
|
|
7
9
|
class DictColumn extends AColumn {
|
|
8
10
|
values = [];
|
|
9
11
|
dict = new BidirIndex();
|
|
10
12
|
isArray = false;
|
|
11
13
|
load({ dict, values }) {
|
|
12
|
-
this.values = this.spec.flags & FLAG_RLE ? Array.from(
|
|
14
|
+
this.values = this.spec.flags & FLAG_RLE ? this.spec.cardinality[0] === 0 && this.spec.default == null ? decodeSimple(values) : Array.from(decodeBinary(values)) : values;
|
|
13
15
|
super.loadDict(dict);
|
|
14
|
-
super.updateBitmap(
|
|
16
|
+
super.updateBitmap();
|
|
15
17
|
}
|
|
16
18
|
reindex() {
|
|
17
19
|
const dict = this.dict;
|
|
@@ -20,7 +22,7 @@ class DictColumn extends AColumn {
|
|
|
20
22
|
(x) => x != null ? newDict.add(dict.getID(x)) : null
|
|
21
23
|
);
|
|
22
24
|
this.dict = newDict;
|
|
23
|
-
super.updateBitmap(
|
|
25
|
+
super.updateBitmap();
|
|
24
26
|
}
|
|
25
27
|
encode(value) {
|
|
26
28
|
return this.dict.get(value);
|
|
@@ -45,6 +47,12 @@ class DictColumn extends AColumn {
|
|
|
45
47
|
const value = this.values[i];
|
|
46
48
|
return value != null ? this.dict.getID(value) : null;
|
|
47
49
|
}
|
|
50
|
+
getRowKey(i) {
|
|
51
|
+
return this.values[i];
|
|
52
|
+
}
|
|
53
|
+
valueKey(value) {
|
|
54
|
+
return isArray(value) ? this.dict.getAll(value) : this.dict.get(value);
|
|
55
|
+
}
|
|
48
56
|
removeRow(i) {
|
|
49
57
|
this.values.splice(i, 1);
|
|
50
58
|
this.bitmap?.removeBit(i);
|
|
@@ -67,12 +75,19 @@ class DictColumn extends AColumn {
|
|
|
67
75
|
return true;
|
|
68
76
|
}
|
|
69
77
|
toJSON() {
|
|
70
|
-
let values = this
|
|
71
|
-
if (
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
78
|
+
let { values, spec } = this;
|
|
79
|
+
if (spec.flags & FLAG_RLE) {
|
|
80
|
+
if (spec.cardinality[0] == 0 && spec.default == null) {
|
|
81
|
+
values = encodeSimple(values);
|
|
82
|
+
} else {
|
|
83
|
+
const numBits = Math.max(
|
|
84
|
+
1,
|
|
85
|
+
Math.ceil(Math.log2(this.dict.size))
|
|
86
|
+
);
|
|
87
|
+
values = Array.from(
|
|
88
|
+
encodeBinary(values, values.length, numBits)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
76
91
|
}
|
|
77
92
|
return { dict: __serializeDict(this.dict), values };
|
|
78
93
|
}
|
package/columns/plain.d.ts
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type IColumn, type SerializedColumn } from "../api.js";
|
|
2
2
|
import { AColumn } from "./acolumn.js";
|
|
3
3
|
export declare class PlainColumn extends AColumn implements IColumn {
|
|
4
4
|
values: any[];
|
|
5
5
|
readonly isArray = false;
|
|
6
|
-
load(
|
|
7
|
-
reindex(): void;
|
|
6
|
+
load({ values }: SerializedColumn): void;
|
|
8
7
|
validate(value: any): boolean;
|
|
9
8
|
setRow(i: number, value: any): void;
|
|
10
9
|
getRow(i: number): any;
|
|
10
|
+
getRowKey(i: number): any;
|
|
11
|
+
valueKey(x: any): any;
|
|
11
12
|
removeRow(i: number): void;
|
|
12
13
|
replaceValue(currValue: any, newValue: any): boolean;
|
|
13
14
|
toJSON(): {
|
package/columns/plain.js
CHANGED
|
@@ -1,16 +1,15 @@
|
|
|
1
|
+
import { decodeSimple, encodeSimple } from "@thi.ng/rle-pack/simple";
|
|
2
|
+
import { FLAG_RLE } from "../api.js";
|
|
1
3
|
import { __validateValue } from "../internal/checks.js";
|
|
2
4
|
import { __replaceValue } from "../internal/replace.js";
|
|
3
5
|
import { AColumn } from "./acolumn.js";
|
|
4
6
|
class PlainColumn extends AColumn {
|
|
5
7
|
values = [];
|
|
6
8
|
isArray = false;
|
|
7
|
-
load(
|
|
8
|
-
this.values = spec.values;
|
|
9
|
+
load({ values }) {
|
|
10
|
+
this.values = this.spec.flags & FLAG_RLE ? Array.from(decodeSimple(values)) : values;
|
|
9
11
|
this.reindex();
|
|
10
12
|
}
|
|
11
|
-
reindex() {
|
|
12
|
-
super.updateBitmap(this.values);
|
|
13
|
-
}
|
|
14
13
|
validate(value) {
|
|
15
14
|
return __validateValue(this.spec, value);
|
|
16
15
|
}
|
|
@@ -26,6 +25,12 @@ class PlainColumn extends AColumn {
|
|
|
26
25
|
getRow(i) {
|
|
27
26
|
return this.values[i];
|
|
28
27
|
}
|
|
28
|
+
getRowKey(i) {
|
|
29
|
+
return this.values[i];
|
|
30
|
+
}
|
|
31
|
+
valueKey(x) {
|
|
32
|
+
return x;
|
|
33
|
+
}
|
|
29
34
|
removeRow(i) {
|
|
30
35
|
this.values.splice(i, 1);
|
|
31
36
|
this.bitmap?.removeBit(i);
|
|
@@ -34,7 +39,9 @@ class PlainColumn extends AColumn {
|
|
|
34
39
|
return __replaceValue(this.bitmap, this.values, currValue, newValue);
|
|
35
40
|
}
|
|
36
41
|
toJSON() {
|
|
37
|
-
return {
|
|
42
|
+
return {
|
|
43
|
+
values: this.spec.flags & FLAG_RLE ? encodeSimple(this.values) : this.values
|
|
44
|
+
};
|
|
38
45
|
}
|
|
39
46
|
}
|
|
40
47
|
export {
|
package/columns/tuple.d.ts
CHANGED
|
@@ -5,11 +5,12 @@ export declare class TupleColumn extends AColumn implements IColumn {
|
|
|
5
5
|
values: Nullable<number[]>[];
|
|
6
6
|
readonly isArray = true;
|
|
7
7
|
load(spec: SerializedColumn): void;
|
|
8
|
-
reindex(): void;
|
|
9
8
|
encode(value: any): any[];
|
|
10
9
|
validate(value: any): boolean;
|
|
11
10
|
setRow(i: number, value: any[]): void;
|
|
12
11
|
getRow(i: number): Nullable<number[]>;
|
|
12
|
+
getRowKey(i: number): Nullable<number[]>;
|
|
13
|
+
valueKey(value: any): any[];
|
|
13
14
|
removeRow(i: number): void;
|
|
14
15
|
replaceValue(currValue: any, newValue: any): boolean;
|
|
15
16
|
toJSON(): {
|
package/columns/tuple.js
CHANGED
|
@@ -9,9 +9,6 @@ class TupleColumn extends AColumn {
|
|
|
9
9
|
this.values = spec.values;
|
|
10
10
|
this.reindex();
|
|
11
11
|
}
|
|
12
|
-
reindex() {
|
|
13
|
-
super.updateBitmap(this.values);
|
|
14
|
-
}
|
|
15
12
|
encode(value) {
|
|
16
13
|
return isArray(value) ? value : [value];
|
|
17
14
|
}
|
|
@@ -31,6 +28,12 @@ class TupleColumn extends AColumn {
|
|
|
31
28
|
getRow(i) {
|
|
32
29
|
return this.values[i];
|
|
33
30
|
}
|
|
31
|
+
getRowKey(i) {
|
|
32
|
+
return this.values[i];
|
|
33
|
+
}
|
|
34
|
+
valueKey(value) {
|
|
35
|
+
return this.encode(value);
|
|
36
|
+
}
|
|
34
37
|
removeRow(i) {
|
|
35
38
|
this.values.splice(i, 1);
|
|
36
39
|
this.bitmap?.removeBit(i);
|
package/columns/typedarray.d.ts
CHANGED
|
@@ -6,17 +6,19 @@ export declare class TypedArrayColumn extends AColumn implements IColumn {
|
|
|
6
6
|
values: TypedArray;
|
|
7
7
|
type: NumericType;
|
|
8
8
|
limit: [number, number];
|
|
9
|
+
protected tmp: TypedArray;
|
|
9
10
|
readonly isArray = false;
|
|
10
11
|
constructor(id: string, table: Table);
|
|
11
|
-
load(
|
|
12
|
-
reindex(): void;
|
|
12
|
+
load({ values }: SerializedColumn): void;
|
|
13
13
|
validate(value: any): boolean;
|
|
14
14
|
setRow(i: number, value: any): void;
|
|
15
15
|
getRow(i: number): number;
|
|
16
|
+
getRowKey(i: number): number;
|
|
17
|
+
valueKey(value: any): number | number[];
|
|
16
18
|
removeRow(i: number): void;
|
|
17
19
|
replaceValue(currValue: any, newValue: any): boolean;
|
|
18
20
|
toJSON(): {
|
|
19
|
-
values:
|
|
21
|
+
values: any[];
|
|
20
22
|
};
|
|
21
23
|
}
|
|
22
24
|
//# sourceMappingURL=typedarray.d.ts.map
|
package/columns/typedarray.js
CHANGED
|
@@ -1,45 +1,29 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { typedArray } from "@thi.ng/api/typedarray";
|
|
2
|
+
import { isArray } from "@thi.ng/checks/is-array";
|
|
2
3
|
import { isNumber } from "@thi.ng/checks/is-number";
|
|
3
|
-
import { decode as decodeRLE, encode as encodeRLE } from "@thi.ng/rle-pack";
|
|
4
4
|
import {
|
|
5
|
-
|
|
5
|
+
LIMITS
|
|
6
6
|
} from "../api.js";
|
|
7
7
|
import { __replaceValue } from "../internal/replace.js";
|
|
8
|
+
import { __deserializeTyped, __serializeTyped } from "../internal/serialize.js";
|
|
8
9
|
import { AColumn } from "./acolumn.js";
|
|
9
|
-
const LIMITS = {
|
|
10
|
-
u8: [0, 255],
|
|
11
|
-
u8c: [0, 255],
|
|
12
|
-
u16: [0, 65535],
|
|
13
|
-
u32: [0, 4294967295],
|
|
14
|
-
i8: [-128, 127],
|
|
15
|
-
i16: [-32768, 32767],
|
|
16
|
-
i32: [-2147483648, 2147483647],
|
|
17
|
-
f32: [-Infinity, Infinity],
|
|
18
|
-
f64: [-Infinity, Infinity]
|
|
19
|
-
};
|
|
20
10
|
class TypedArrayColumn extends AColumn {
|
|
21
11
|
values;
|
|
22
12
|
type;
|
|
23
13
|
limit;
|
|
14
|
+
tmp;
|
|
24
15
|
isArray = false;
|
|
25
16
|
constructor(id, table) {
|
|
26
17
|
super(id, table);
|
|
27
18
|
this.type = table.schema[id].type;
|
|
28
19
|
this.limit = LIMITS[this.type];
|
|
29
20
|
this.values = typedArray(this.type, 8);
|
|
21
|
+
this.tmp = typedArray(this.type, 1);
|
|
30
22
|
}
|
|
31
|
-
load(
|
|
32
|
-
|
|
33
|
-
const values = decodeRLE(spec.values);
|
|
34
|
-
this.values = typedArray(this.type, values.buffer);
|
|
35
|
-
} else {
|
|
36
|
-
this.values = typedArray(this.type, spec.values);
|
|
37
|
-
}
|
|
23
|
+
load({ values }) {
|
|
24
|
+
this.values = __deserializeTyped(this.type, this.spec.flags, values);
|
|
38
25
|
this.reindex();
|
|
39
26
|
}
|
|
40
|
-
reindex() {
|
|
41
|
-
super.updateBitmap(this.values.subarray(0, this.table.length));
|
|
42
|
-
}
|
|
43
27
|
validate(value) {
|
|
44
28
|
return isNumber(value) && value >= this.limit[0] && value <= this.limit[1] || value == null && this.spec.default != null;
|
|
45
29
|
}
|
|
@@ -63,6 +47,21 @@ class TypedArrayColumn extends AColumn {
|
|
|
63
47
|
getRow(i) {
|
|
64
48
|
return this.values[i];
|
|
65
49
|
}
|
|
50
|
+
getRowKey(i) {
|
|
51
|
+
return this.values[i];
|
|
52
|
+
}
|
|
53
|
+
valueKey(value) {
|
|
54
|
+
const { tmp } = this;
|
|
55
|
+
if (isArray(value)) {
|
|
56
|
+
return value.map((x) => {
|
|
57
|
+
tmp[0] = x;
|
|
58
|
+
return tmp[0];
|
|
59
|
+
});
|
|
60
|
+
} else {
|
|
61
|
+
tmp[0] = value;
|
|
62
|
+
return tmp[0];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
66
65
|
removeRow(i) {
|
|
67
66
|
this.values.copyWithin(i, i + 1, this.table.length);
|
|
68
67
|
this.values[this.table.length - 1] = 0;
|
|
@@ -72,11 +71,11 @@ class TypedArrayColumn extends AColumn {
|
|
|
72
71
|
return __replaceValue(this.bitmap, this.values, currValue, newValue);
|
|
73
72
|
}
|
|
74
73
|
toJSON() {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
74
|
+
return __serializeTyped(
|
|
75
|
+
this.values.subarray(0, this.table.length),
|
|
76
|
+
this.spec,
|
|
77
|
+
this.type
|
|
78
|
+
);
|
|
80
79
|
}
|
|
81
80
|
}
|
|
82
81
|
export {
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { type TypedArray } from "@thi.ng/api/typedarray";
|
|
2
|
+
import { type IColumn, type NumericType, type SerializedColumn } from "../api.js";
|
|
3
|
+
import type { Table } from "../table.js";
|
|
4
|
+
import { AColumn } from "./acolumn.js";
|
|
5
|
+
export declare class VectorColumn extends AColumn implements IColumn {
|
|
6
|
+
values: TypedArray;
|
|
7
|
+
type: NumericType;
|
|
8
|
+
size: number;
|
|
9
|
+
limit: [number, number];
|
|
10
|
+
protected tmp: TypedArray;
|
|
11
|
+
readonly isArray = false;
|
|
12
|
+
constructor(id: string, table: Table);
|
|
13
|
+
load({ values }: SerializedColumn): void;
|
|
14
|
+
validate(value: any): boolean;
|
|
15
|
+
setRow(i: number, value: any): void;
|
|
16
|
+
getRow(i: number): Float32Array<ArrayBufferLike> | Float64Array<ArrayBufferLike> | Int8Array<ArrayBufferLike> | Int16Array<ArrayBufferLike> | Int32Array<ArrayBufferLike> | Uint8Array<ArrayBufferLike> | Uint8ClampedArray<ArrayBufferLike> | Uint16Array<ArrayBufferLike> | Uint32Array<ArrayBufferLike>;
|
|
17
|
+
getRowKey(i: number): string;
|
|
18
|
+
valueKey(value: any): string | string[];
|
|
19
|
+
removeRow(i: number): void;
|
|
20
|
+
replaceValue(): boolean;
|
|
21
|
+
toJSON(): {
|
|
22
|
+
values: any[];
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=vector.d.ts.map
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { typedArray } from "@thi.ng/api/typedarray";
|
|
2
|
+
import { isArray } from "@thi.ng/checks/is-array";
|
|
3
|
+
import { isArrayLike } from "@thi.ng/checks/is-arraylike";
|
|
4
|
+
import { isNumber } from "@thi.ng/checks/is-number";
|
|
5
|
+
import { unsupportedOp } from "@thi.ng/errors/unsupported";
|
|
6
|
+
import {
|
|
7
|
+
LIMITS
|
|
8
|
+
} from "../api.js";
|
|
9
|
+
import { __deserializeTyped, __serializeTyped } from "../internal/serialize.js";
|
|
10
|
+
import { AColumn } from "./acolumn.js";
|
|
11
|
+
class VectorColumn extends AColumn {
|
|
12
|
+
values;
|
|
13
|
+
type;
|
|
14
|
+
size;
|
|
15
|
+
limit;
|
|
16
|
+
tmp;
|
|
17
|
+
isArray = false;
|
|
18
|
+
constructor(id, table) {
|
|
19
|
+
super(id, table);
|
|
20
|
+
this.type = this.spec.type.split("v")[0];
|
|
21
|
+
this.size = this.spec.cardinality[1];
|
|
22
|
+
this.limit = LIMITS[this.type];
|
|
23
|
+
this.values = typedArray(this.type, 8 * this.size);
|
|
24
|
+
this.tmp = typedArray(this.type, this.size);
|
|
25
|
+
}
|
|
26
|
+
load({ values }) {
|
|
27
|
+
this.values = __deserializeTyped(this.type, this.spec.flags, values);
|
|
28
|
+
this.reindex();
|
|
29
|
+
}
|
|
30
|
+
validate(value) {
|
|
31
|
+
return isArrayLike(value) && value.length == this.size || value == null && this.spec.default != null;
|
|
32
|
+
}
|
|
33
|
+
setRow(i, value) {
|
|
34
|
+
value = this.ensureValue(value);
|
|
35
|
+
const j = i * this.size;
|
|
36
|
+
let len = this.values.length;
|
|
37
|
+
if (j >= len) {
|
|
38
|
+
while (j >= len) len <<= 1;
|
|
39
|
+
const tmp = typedArray(this.type, len);
|
|
40
|
+
tmp.set(this.values);
|
|
41
|
+
this.values = tmp;
|
|
42
|
+
}
|
|
43
|
+
const { values, bitmap } = this;
|
|
44
|
+
if (bitmap) {
|
|
45
|
+
bitmap.clearBit(this.getRowKey(i), i);
|
|
46
|
+
bitmap.setBit(this.valueKey(value), i);
|
|
47
|
+
}
|
|
48
|
+
values.set(value, j);
|
|
49
|
+
}
|
|
50
|
+
getRow(i) {
|
|
51
|
+
const { size } = this;
|
|
52
|
+
i *= size;
|
|
53
|
+
return this.values.subarray(i, i + size);
|
|
54
|
+
}
|
|
55
|
+
getRowKey(i) {
|
|
56
|
+
return this.getRow(i).join("|");
|
|
57
|
+
}
|
|
58
|
+
valueKey(value) {
|
|
59
|
+
const { tmp } = this;
|
|
60
|
+
if (isArray(value) && !isNumber(value[0])) {
|
|
61
|
+
return value.map((x) => {
|
|
62
|
+
tmp.set(x);
|
|
63
|
+
return tmp.join("|");
|
|
64
|
+
});
|
|
65
|
+
} else {
|
|
66
|
+
tmp.set(value);
|
|
67
|
+
return tmp.join("|");
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
removeRow(i) {
|
|
71
|
+
const {
|
|
72
|
+
size,
|
|
73
|
+
table: { length }
|
|
74
|
+
} = this;
|
|
75
|
+
this.values.copyWithin(i, i + size, length * size);
|
|
76
|
+
this.values.fill(0, (length - 1) * size);
|
|
77
|
+
this.bitmap?.removeBit(i);
|
|
78
|
+
}
|
|
79
|
+
replaceValue() {
|
|
80
|
+
unsupportedOp("TODO");
|
|
81
|
+
}
|
|
82
|
+
toJSON() {
|
|
83
|
+
return __serializeTyped(
|
|
84
|
+
this.values.subarray(0, this.table.length * this.size),
|
|
85
|
+
this.spec,
|
|
86
|
+
this.type
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
export {
|
|
91
|
+
VectorColumn
|
|
92
|
+
};
|
package/internal/serialize.d.ts
CHANGED
|
@@ -1,7 +1,15 @@
|
|
|
1
|
+
import { type NumericArray, type Type } from "@thi.ng/api/typedarray";
|
|
1
2
|
import type { BidirIndex } from "@thi.ng/bidir-index";
|
|
3
|
+
import { type ColumnSpec } from "../api.js";
|
|
2
4
|
/** @internal */
|
|
3
5
|
export declare const __serializeDict: (dict: BidirIndex<any>) => {
|
|
4
6
|
index: any;
|
|
5
7
|
next: number;
|
|
6
8
|
};
|
|
9
|
+
/** @internal */
|
|
10
|
+
export declare const __serializeTyped: ($values: NumericArray, spec: ColumnSpec, type: Type) => {
|
|
11
|
+
values: any[];
|
|
12
|
+
};
|
|
13
|
+
/** @internal */
|
|
14
|
+
export declare const __deserializeTyped: (type: Type, flags: number, values: number[]) => Float32Array<ArrayBufferLike> | Float64Array<ArrayBufferLike> | Int8Array<ArrayBufferLike> | Int16Array<ArrayBufferLike> | Int32Array<ArrayBufferLike> | Uint8Array<ArrayBufferLike> | Uint8ClampedArray<ArrayBufferLike> | Uint16Array<ArrayBufferLike> | Uint32Array<ArrayBufferLike>;
|
|
7
15
|
//# sourceMappingURL=serialize.d.ts.map
|
package/internal/serialize.js
CHANGED
|
@@ -1,8 +1,27 @@
|
|
|
1
|
+
import {
|
|
2
|
+
SIZEOF,
|
|
3
|
+
typedArray
|
|
4
|
+
} from "@thi.ng/api/typedarray";
|
|
5
|
+
import { decodeBinary, encodeBinary } from "@thi.ng/rle-pack/binary";
|
|
6
|
+
import { decodeSimple, encodeSimple } from "@thi.ng/rle-pack/simple";
|
|
7
|
+
import { FLAG_RLE } from "../api.js";
|
|
1
8
|
const __serializeDict = (dict) => {
|
|
2
9
|
const res = [];
|
|
3
10
|
for (let [val, id] of dict.entries()) res[id] = val;
|
|
4
11
|
return { index: res, next: dict.nextID };
|
|
5
12
|
};
|
|
13
|
+
const __serializeTyped = ($values, spec, type) => {
|
|
14
|
+
if (spec.flags & FLAG_RLE) {
|
|
15
|
+
$values = type[0] === "f" ? encodeSimple($values) : encodeBinary($values, $values.length, SIZEOF[type] * 8);
|
|
16
|
+
}
|
|
17
|
+
let values = Array.from($values);
|
|
18
|
+
const prec = spec.opts?.prec;
|
|
19
|
+
if (prec != null) values = values.map((x) => +x.toFixed(prec));
|
|
20
|
+
return { values };
|
|
21
|
+
};
|
|
22
|
+
const __deserializeTyped = (type, flags, values) => flags & FLAG_RLE ? type[0] === "f" ? typedArray(type, decodeSimple(values)) : typedArray(type, decodeBinary(values).buffer) : typedArray(type, values);
|
|
6
23
|
export {
|
|
7
|
-
|
|
24
|
+
__deserializeTyped,
|
|
25
|
+
__serializeDict,
|
|
26
|
+
__serializeTyped
|
|
8
27
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@thi.ng/column-store",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "In-memory column store database with customizable column types, extensible query engine, bitfield indexing for query acceleration, JSON serialization with optional RLE compression",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"module": "./index.js",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"@thi.ng/bidir-index": "^1.5.0",
|
|
45
45
|
"@thi.ng/checks": "^3.8.4",
|
|
46
46
|
"@thi.ng/errors": "^2.6.3",
|
|
47
|
-
"@thi.ng/rle-pack": "^3.
|
|
47
|
+
"@thi.ng/rle-pack": "^3.2.0"
|
|
48
48
|
},
|
|
49
49
|
"devDependencies": {
|
|
50
50
|
"esbuild": "^0.27.2",
|
|
@@ -112,6 +112,9 @@
|
|
|
112
112
|
"./columns/typedarray": {
|
|
113
113
|
"default": "./columns/typedarray.js"
|
|
114
114
|
},
|
|
115
|
+
"./columns/vector": {
|
|
116
|
+
"default": "./columns/vector.js"
|
|
117
|
+
},
|
|
115
118
|
"./query": {
|
|
116
119
|
"default": "./query.js"
|
|
117
120
|
},
|
|
@@ -123,5 +126,5 @@
|
|
|
123
126
|
"status": "alpha",
|
|
124
127
|
"year": 2025
|
|
125
128
|
},
|
|
126
|
-
"gitHead": "
|
|
129
|
+
"gitHead": "e3f0a8a9868bb51ae0c0c4882763f68ec4083bc3\n"
|
|
127
130
|
}
|
package/query.js
CHANGED
|
@@ -67,7 +67,7 @@ class Query {
|
|
|
67
67
|
}
|
|
68
68
|
if (ctx.bitmap) {
|
|
69
69
|
for (let i of new Bitfield(ctx.bitmap).ones(table.length))
|
|
70
|
-
yield table.getRow(i);
|
|
70
|
+
yield table.getRow(i, false, true);
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
73
|
}
|
|
@@ -123,18 +123,18 @@ class QueryCtx {
|
|
|
123
123
|
}
|
|
124
124
|
const execBitOr = (ctx, term, column) => {
|
|
125
125
|
const bitmap = column.bitmap;
|
|
126
|
-
const
|
|
126
|
+
const key = column.valueKey(term.value);
|
|
127
127
|
let mask;
|
|
128
|
-
if (isArray(
|
|
129
|
-
for (let
|
|
130
|
-
const b = bitmap.index.get(
|
|
128
|
+
if (isArray(key)) {
|
|
129
|
+
for (let k of key) {
|
|
130
|
+
const b = bitmap.index.get(k)?.buffer;
|
|
131
131
|
if (!b) continue;
|
|
132
132
|
if (mask) {
|
|
133
133
|
for (let i = 0; i < b.length; i++) mask[i] |= b[i];
|
|
134
134
|
} else mask = ctx.makeMask(b);
|
|
135
135
|
}
|
|
136
136
|
} else {
|
|
137
|
-
const b = bitmap.index.get(
|
|
137
|
+
const b = bitmap.index.get(key)?.buffer;
|
|
138
138
|
if (b) mask = ctx.makeMask(b);
|
|
139
139
|
}
|
|
140
140
|
if (mask) {
|
|
@@ -143,13 +143,12 @@ const execBitOr = (ctx, term, column) => {
|
|
|
143
143
|
};
|
|
144
144
|
const execOr = (ctx, term, column) => {
|
|
145
145
|
const n = ctx.table.length;
|
|
146
|
-
const
|
|
147
|
-
const
|
|
148
|
-
const pred = column.isArray ? (row, v) => row.includes(v) : (row, v) => row === v;
|
|
146
|
+
const key = column.valueKey(term.value);
|
|
147
|
+
const pred = column.isArray ? (row, k) => row.includes(k) : (row, k) => row === k;
|
|
149
148
|
let mask;
|
|
150
|
-
for (let
|
|
149
|
+
for (let k of isArray(key) ? key : [key]) {
|
|
151
150
|
for (let i = 0; i < n; i++) {
|
|
152
|
-
if (pred(
|
|
151
|
+
if (pred(column.getRowKey(i), k)) {
|
|
153
152
|
if (!mask) mask = ctx.makeMask();
|
|
154
153
|
mask[i >>> 5] |= 1 << (i & 31);
|
|
155
154
|
}
|
|
@@ -168,12 +167,12 @@ const delegateOr = (ctx, term, column) => {
|
|
|
168
167
|
};
|
|
169
168
|
const execBitAnd = (ctx, term, column) => {
|
|
170
169
|
const bitmap = column.bitmap;
|
|
171
|
-
const
|
|
170
|
+
const key = column.valueKey(term.value);
|
|
172
171
|
let mask;
|
|
173
|
-
if (isArray(
|
|
172
|
+
if (isArray(key)) {
|
|
174
173
|
const colBitmaps = [];
|
|
175
|
-
for (let
|
|
176
|
-
const b = bitmap.index.get(
|
|
174
|
+
for (let k of key) {
|
|
175
|
+
const b = bitmap.index.get(k)?.buffer;
|
|
177
176
|
if (!b) {
|
|
178
177
|
if (term.type === "and") ctx.bitmap = void 0;
|
|
179
178
|
return;
|
|
@@ -187,7 +186,7 @@ const execBitAnd = (ctx, term, column) => {
|
|
|
187
186
|
} else mask = ctx.makeMask(b);
|
|
188
187
|
}
|
|
189
188
|
} else {
|
|
190
|
-
const b = bitmap.index.get(
|
|
189
|
+
const b = bitmap.index.get(key)?.buffer;
|
|
191
190
|
if (b) mask = ctx.makeMask(b);
|
|
192
191
|
}
|
|
193
192
|
if (mask) {
|
|
@@ -198,14 +197,13 @@ const execBitAnd = (ctx, term, column) => {
|
|
|
198
197
|
};
|
|
199
198
|
const execAnd = (ctx, term, column) => {
|
|
200
199
|
const n = ctx.table.length;
|
|
201
|
-
const
|
|
202
|
-
const values = column.values;
|
|
200
|
+
const key = column.valueKey(term.value) ?? null;
|
|
203
201
|
const pred = column.isArray ? (row, v) => row.includes(v) : (row, v) => row === v;
|
|
204
202
|
let mask;
|
|
205
|
-
for (let
|
|
203
|
+
for (let k of isArray(key) ? key : [key]) {
|
|
206
204
|
let m;
|
|
207
205
|
for (let i = 0; i < n; i++) {
|
|
208
|
-
if (pred(
|
|
206
|
+
if (pred(column.getRowKey(i), k)) {
|
|
209
207
|
if (!m) m = ctx.makeMask();
|
|
210
208
|
m[i >>> 5] |= 1 << (i & 31);
|
|
211
209
|
}
|
|
@@ -237,11 +235,10 @@ const QUERY_OPS = {
|
|
|
237
235
|
nand: { fn: delegateAnd },
|
|
238
236
|
matchCol: {
|
|
239
237
|
fn: (ctx, term, column) => {
|
|
240
|
-
const values = column.values;
|
|
241
238
|
const pred = term.value;
|
|
242
239
|
let mask;
|
|
243
240
|
for (let i = 0, n = ctx.table.length; i < n; i++) {
|
|
244
|
-
if (pred(column.
|
|
241
|
+
if (pred(column.getRow(i))) {
|
|
245
242
|
if (!mask) mask = ctx.makeMask();
|
|
246
243
|
mask[i >>> 5] |= 1 << (i & 31);
|
|
247
244
|
}
|
package/table.d.ts
CHANGED
|
@@ -25,8 +25,8 @@ export declare class Table {
|
|
|
25
25
|
addRows(rows: Iterable<Row>): void;
|
|
26
26
|
updateRow(i: number, row: Row): void;
|
|
27
27
|
removeRow(i: number): void;
|
|
28
|
-
getRow(i: number, safe?: boolean): Row | undefined;
|
|
29
|
-
getPartialRow(i: number, columns: string[], safe?: boolean): Row | undefined;
|
|
28
|
+
getRow(i: number, safe?: boolean, includeID?: boolean): Row | undefined;
|
|
29
|
+
getPartialRow(i: number, columns: string[], safe?: boolean, includeID?: boolean): Row | undefined;
|
|
30
30
|
validateRow(row: Row): void;
|
|
31
31
|
validateColumnSpec(id: string, spec: ColumnSpec): void;
|
|
32
32
|
toJSON(): {
|
package/table.js
CHANGED
|
@@ -11,6 +11,7 @@ import { DictColumn } from "./columns/dict.js";
|
|
|
11
11
|
import { PlainColumn } from "./columns/plain.js";
|
|
12
12
|
import { TupleColumn } from "./columns/tuple.js";
|
|
13
13
|
import { TypedArrayColumn } from "./columns/typedarray.js";
|
|
14
|
+
import { VectorColumn } from "./columns/vector.js";
|
|
14
15
|
import { __columnError } from "./internal/checks.js";
|
|
15
16
|
import { Query } from "./query.js";
|
|
16
17
|
class Table {
|
|
@@ -81,17 +82,17 @@ class Table {
|
|
|
81
82
|
}
|
|
82
83
|
this.length--;
|
|
83
84
|
}
|
|
84
|
-
getRow(i, safe = true) {
|
|
85
|
+
getRow(i, safe = true, includeID = false) {
|
|
85
86
|
if (safe && (i < 0 || i >= this.length)) return;
|
|
86
|
-
const row = {};
|
|
87
|
+
const row = includeID ? { __row: i } : {};
|
|
87
88
|
for (let id in this.columns) {
|
|
88
89
|
row[id] = this.columns[id].getRow(i);
|
|
89
90
|
}
|
|
90
91
|
return row;
|
|
91
92
|
}
|
|
92
|
-
getPartialRow(i, columns, safe = true) {
|
|
93
|
+
getPartialRow(i, columns, safe = true, includeID = false) {
|
|
93
94
|
if (safe && (i < 0 || i >= this.length)) return;
|
|
94
|
-
const row = {};
|
|
95
|
+
const row = includeID ? { __row: i } : {};
|
|
95
96
|
for (let id of columns) {
|
|
96
97
|
row[id] = this.columns[id]?.getRow(i);
|
|
97
98
|
}
|
|
@@ -138,20 +139,25 @@ const $typed = {
|
|
|
138
139
|
cardinality: [0, 1],
|
|
139
140
|
required: true
|
|
140
141
|
};
|
|
141
|
-
const $float = { ...$typed, flags: FLAG_BITMAP };
|
|
142
142
|
const $untyped = {
|
|
143
|
-
impl: (table, id, { flags, cardinality: [
|
|
143
|
+
impl: (table, id, { flags, cardinality: [_, max] }) => {
|
|
144
144
|
const isDict = flags & FLAG_DICT;
|
|
145
|
-
if (flags & FLAG_RLE)
|
|
146
|
-
if (!isDict || max > 1 || min === 0 && d == null) {
|
|
147
|
-
__columnError(id, `RLE encoding not supported`);
|
|
148
|
-
}
|
|
149
|
-
}
|
|
145
|
+
if (flags & FLAG_RLE && max > 1) __columnError(id, `RLE not supported`);
|
|
150
146
|
return max > 1 ? new (isDict ? DictTupleColumn : TupleColumn)(id, table) : new (isDict ? DictColumn : PlainColumn)(id, table);
|
|
151
147
|
},
|
|
152
148
|
flags: FLAG_BITMAP | FLAG_DICT | FLAG_UNIQUE | FLAG_RLE,
|
|
153
149
|
cardinality: [0, -1 >>> 0]
|
|
154
150
|
};
|
|
151
|
+
const $vec = {
|
|
152
|
+
impl: (table, id, { cardinality: [min, max] }) => {
|
|
153
|
+
if (min > 0 && min !== max)
|
|
154
|
+
__columnError(id, `only fixed size vectors supported`);
|
|
155
|
+
return new VectorColumn(id, table);
|
|
156
|
+
},
|
|
157
|
+
flags: FLAG_BITMAP | FLAG_RLE,
|
|
158
|
+
cardinality: [0, -1 >>> 0],
|
|
159
|
+
required: true
|
|
160
|
+
};
|
|
155
161
|
const COLUMN_TYPES = {
|
|
156
162
|
u8: $typed,
|
|
157
163
|
i8: $typed,
|
|
@@ -159,10 +165,18 @@ const COLUMN_TYPES = {
|
|
|
159
165
|
i16: $typed,
|
|
160
166
|
u32: $typed,
|
|
161
167
|
i32: $typed,
|
|
162
|
-
f32: $
|
|
163
|
-
f64: $
|
|
168
|
+
f32: $typed,
|
|
169
|
+
f64: $typed,
|
|
164
170
|
num: $untyped,
|
|
165
|
-
str: $untyped
|
|
171
|
+
str: $untyped,
|
|
172
|
+
u8vec: $vec,
|
|
173
|
+
u16vec: $vec,
|
|
174
|
+
u32vec: $vec,
|
|
175
|
+
i8vec: $vec,
|
|
176
|
+
i16vec: $vec,
|
|
177
|
+
i32vec: $vec,
|
|
178
|
+
f32vec: $vec,
|
|
179
|
+
f64vec: $vec
|
|
166
180
|
};
|
|
167
181
|
const registerColumnType = (type, spec) => {
|
|
168
182
|
if (COLUMN_TYPES[type])
|