@thi.ng/column-store 0.11.3 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bitmap.js +2 -2
- package/columns/acolumn.js +1 -1
- package/columns/dict-tuple.js +12 -5
- package/columns/dict.js +3 -2
- package/columns/tuple.js +3 -3
- package/internal/frequencies.d.ts +26 -0
- package/internal/frequencies.js +33 -0
- package/package.json +3 -3
- package/query.d.ts +2 -1
- package/query.js +17 -14
- package/table.js +2 -2
package/README.md
CHANGED
package/bitmap.js
CHANGED
|
@@ -35,11 +35,11 @@ class BitmapIndex {
|
|
|
35
35
|
* @param id
|
|
36
36
|
*/
|
|
37
37
|
removeBit(rowID) {
|
|
38
|
-
for (
|
|
38
|
+
for (const bitmap of this.index.values()) bitmap.removeBit(rowID);
|
|
39
39
|
}
|
|
40
40
|
toJSON() {
|
|
41
41
|
const res = {};
|
|
42
|
-
for (
|
|
42
|
+
for (const [k, bits] of this.index) {
|
|
43
43
|
if (bits.buffer) res[k] = Array.from(bits.buffer);
|
|
44
44
|
}
|
|
45
45
|
return res;
|
package/columns/acolumn.js
CHANGED
package/columns/dict-tuple.js
CHANGED
|
@@ -2,6 +2,10 @@ import { BidirIndex } from "@thi.ng/bidir-index";
|
|
|
2
2
|
import { isArray } from "@thi.ng/checks/is-array";
|
|
3
3
|
import { FLAG_UNIQUE } from "../api.js";
|
|
4
4
|
import { __validateArrayValue } from "../internal/checks.js";
|
|
5
|
+
import {
|
|
6
|
+
__frequenciesTuples,
|
|
7
|
+
__frequencyIndex
|
|
8
|
+
} from "../internal/frequencies.js";
|
|
5
9
|
import { __indexOfTuple, __lastIndexOfTuple } from "../internal/indexof.js";
|
|
6
10
|
import { __serializeDict } from "../internal/serialize.js";
|
|
7
11
|
import { AColumn } from "./acolumn.js";
|
|
@@ -21,9 +25,12 @@ class DictTupleColumn extends AColumn {
|
|
|
21
25
|
}
|
|
22
26
|
reindex() {
|
|
23
27
|
const dict = this.dict;
|
|
24
|
-
const newDict =
|
|
28
|
+
const newDict = __frequencyIndex(
|
|
29
|
+
dict,
|
|
30
|
+
__frequenciesTuples(this.values)
|
|
31
|
+
);
|
|
25
32
|
this.values = this.values.map(
|
|
26
|
-
(ids) => ids ? newDict.
|
|
33
|
+
(ids) => ids ? newDict.getAll(dict.getAllIDs(ids)) : null
|
|
27
34
|
);
|
|
28
35
|
this.dict = newDict;
|
|
29
36
|
super.updateBitmap();
|
|
@@ -48,7 +55,7 @@ class DictTupleColumn extends AColumn {
|
|
|
48
55
|
values.length = n;
|
|
49
56
|
values.fill($value, 0, n);
|
|
50
57
|
if (bitmap && $value) {
|
|
51
|
-
for (
|
|
58
|
+
for (const x of $value) bitmap.ensure(x).fill(1, 0, n);
|
|
52
59
|
}
|
|
53
60
|
}
|
|
54
61
|
setRow(i, value) {
|
|
@@ -57,8 +64,8 @@ class DictTupleColumn extends AColumn {
|
|
|
57
64
|
const old = values[i];
|
|
58
65
|
const encoded = values[i] = value != null ? this.table.schema[this.id].flags & FLAG_UNIQUE ? [...dict.addAllUnique(value)] : dict.addAll(value) : null;
|
|
59
66
|
if (bitmap) {
|
|
60
|
-
if (old) for (
|
|
61
|
-
if (encoded) for (
|
|
67
|
+
if (old) for (const x of old) bitmap.clearBit(x, i);
|
|
68
|
+
if (encoded) for (const x of encoded) bitmap.setBit(x, i);
|
|
62
69
|
}
|
|
63
70
|
}
|
|
64
71
|
getRow(i) {
|
package/columns/dict.js
CHANGED
|
@@ -4,6 +4,7 @@ import { decodeBinary, encodeBinary } from "@thi.ng/rle-pack/binary";
|
|
|
4
4
|
import { decodeSimple, encodeSimple } from "@thi.ng/rle-pack/simple";
|
|
5
5
|
import { FLAG_RLE } from "../api.js";
|
|
6
6
|
import { __validateValue } from "../internal/checks.js";
|
|
7
|
+
import { __frequencies, __frequencyIndex } from "../internal/frequencies.js";
|
|
7
8
|
import { __indexOfSingle, __lastIndexOfSingle } from "../internal/indexof.js";
|
|
8
9
|
import { __serializeDict } from "../internal/serialize.js";
|
|
9
10
|
import { AColumn } from "./acolumn.js";
|
|
@@ -23,9 +24,9 @@ class DictColumn extends AColumn {
|
|
|
23
24
|
}
|
|
24
25
|
reindex() {
|
|
25
26
|
const dict = this.dict;
|
|
26
|
-
const newDict =
|
|
27
|
+
const newDict = __frequencyIndex(dict, __frequencies(this.values));
|
|
27
28
|
this.values = this.values.map(
|
|
28
|
-
(x) => x != null ? newDict.
|
|
29
|
+
(x) => x != null ? newDict.get(dict.getID(x)) : null
|
|
29
30
|
);
|
|
30
31
|
this.dict = newDict;
|
|
31
32
|
super.updateBitmap();
|
package/columns/tuple.js
CHANGED
|
@@ -30,7 +30,7 @@ class TupleColumn extends AColumn {
|
|
|
30
30
|
values.length = n;
|
|
31
31
|
values.fill(value ?? null, 0, n);
|
|
32
32
|
if (bitmap && value) {
|
|
33
|
-
for (
|
|
33
|
+
for (const x of value) bitmap.ensure(x).fill(1, 0, n);
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
setRow(i, value) {
|
|
@@ -39,8 +39,8 @@ class TupleColumn extends AColumn {
|
|
|
39
39
|
const old = values[i];
|
|
40
40
|
const row = values[i] = value != null ? this.table.schema[this.id].flags & FLAG_UNIQUE ? [...new Set(value)] : value : null;
|
|
41
41
|
if (bitmap) {
|
|
42
|
-
if (old) for (
|
|
43
|
-
if (row) for (
|
|
42
|
+
if (old) for (const x of old) bitmap.clearBit(x, i);
|
|
43
|
+
if (row) for (const x of row) bitmap.setBit(x, i);
|
|
44
44
|
}
|
|
45
45
|
}
|
|
46
46
|
getRow(i) {
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { TypedArray } from "@thi.ng/api";
|
|
2
|
+
import { BidirIndex } from "@thi.ng/bidir-index";
|
|
3
|
+
/**
|
|
4
|
+
* Constructs a new bidir index, sorted by key frequency, based on given index
|
|
5
|
+
* and pre-computed histogram.
|
|
6
|
+
*
|
|
7
|
+
* @param dict
|
|
8
|
+
* @param bins
|
|
9
|
+
*/
|
|
10
|
+
export declare const __frequencyIndex: (dict: BidirIndex<any>, bins: Map<number, number>) => BidirIndex<unknown>;
|
|
11
|
+
/**
|
|
12
|
+
* Computes histogram of `values`, sorted frequency. Returns array of
|
|
13
|
+
* `[value,count]` bins. Nullish values in
|
|
14
|
+
*
|
|
15
|
+
* @internal
|
|
16
|
+
*/
|
|
17
|
+
export declare const __frequencies: (rows: TypedArray | (number | null)[]) => Map<number, number>;
|
|
18
|
+
/**
|
|
19
|
+
* Same as {@link __frequencies}, but for tuple-based columns. Computes
|
|
20
|
+
* histogram for unique component values inside tuples, not for the tuples
|
|
21
|
+
* themselves.
|
|
22
|
+
*
|
|
23
|
+
* @internal
|
|
24
|
+
*/
|
|
25
|
+
export declare const __frequenciesTuples: (rows: (number[] | null)[]) => Map<number, number>;
|
|
26
|
+
//# sourceMappingURL=frequencies.d.ts.map
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { BidirIndex } from "@thi.ng/bidir-index";
|
|
2
|
+
const __frequencyIndex = (dict, bins) => {
|
|
3
|
+
const newDict = new BidirIndex();
|
|
4
|
+
for (const bin of [...bins].sort((a, b) => b[1] - a[1])) {
|
|
5
|
+
newDict.add(dict.getID(bin[0]));
|
|
6
|
+
}
|
|
7
|
+
return newDict;
|
|
8
|
+
};
|
|
9
|
+
const __frequencies = (rows) => {
|
|
10
|
+
const bins = /* @__PURE__ */ new Map();
|
|
11
|
+
for (const row of rows) {
|
|
12
|
+
if (row == null) continue;
|
|
13
|
+
const n = bins.get(row);
|
|
14
|
+
bins.set(row, n != null ? n + 1 : 1);
|
|
15
|
+
}
|
|
16
|
+
return bins;
|
|
17
|
+
};
|
|
18
|
+
const __frequenciesTuples = (rows) => {
|
|
19
|
+
const bins = /* @__PURE__ */ new Map();
|
|
20
|
+
for (const row of rows) {
|
|
21
|
+
if (row == null) continue;
|
|
22
|
+
for (const v of row) {
|
|
23
|
+
const n = bins.get(v);
|
|
24
|
+
bins.set(v, n != null ? n + 1 : 1);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return bins;
|
|
28
|
+
};
|
|
29
|
+
export {
|
|
30
|
+
__frequencies,
|
|
31
|
+
__frequenciesTuples,
|
|
32
|
+
__frequencyIndex
|
|
33
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@thi.ng/column-store",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "In-memory column store database with customizable column types, extensible query engine, bitfield indexing for query acceleration, JSON serialization with optional RLE compression",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"module": "./index.js",
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
"@thi.ng/api": "^8.12.16",
|
|
44
44
|
"@thi.ng/bidir-index": "^1.5.2",
|
|
45
45
|
"@thi.ng/checks": "^3.8.6",
|
|
46
|
-
"@thi.ng/compare": "^2.5.
|
|
46
|
+
"@thi.ng/compare": "^2.5.4",
|
|
47
47
|
"@thi.ng/errors": "^2.6.5",
|
|
48
48
|
"@thi.ng/rle-pack": "^3.2.3"
|
|
49
49
|
},
|
|
@@ -130,5 +130,5 @@
|
|
|
130
130
|
"status": "alpha",
|
|
131
131
|
"year": 2025
|
|
132
132
|
},
|
|
133
|
-
"gitHead": "
|
|
133
|
+
"gitHead": "82ec7cc54d0e89dad89cde87fa36c444512d71d7\n"
|
|
134
134
|
}
|
package/query.d.ts
CHANGED
|
@@ -7,8 +7,9 @@ export declare class Query<T extends Row> {
|
|
|
7
7
|
protected _cmp?: Comparator<any>;
|
|
8
8
|
protected _limit: number;
|
|
9
9
|
protected _offset: number;
|
|
10
|
-
constructor(table: Table<T>, terms?: QueryTerm<T
|
|
10
|
+
constructor(table: Table<T>, terms?: Iterable<QueryTerm<T>>);
|
|
11
11
|
addTerm(term: QueryTerm<T>): this;
|
|
12
|
+
addTerms(terms: Iterable<QueryTerm<T>>): void;
|
|
12
13
|
limit(limit: number, offset?: number): this;
|
|
13
14
|
/**
|
|
14
15
|
* Constructs a comparator for query results based on given sort criteria,
|
package/query.js
CHANGED
|
@@ -12,9 +12,9 @@ import { Bitfield } from "./bitmap.js";
|
|
|
12
12
|
import { __columnError } from "./internal/checks.js";
|
|
13
13
|
import { __clampRange } from "./internal/indexof.js";
|
|
14
14
|
class Query {
|
|
15
|
-
constructor(table, terms
|
|
15
|
+
constructor(table, terms) {
|
|
16
16
|
this.table = table;
|
|
17
|
-
|
|
17
|
+
if (terms) this.addTerms(terms);
|
|
18
18
|
}
|
|
19
19
|
terms = [];
|
|
20
20
|
_cmp;
|
|
@@ -25,6 +25,9 @@ class Query {
|
|
|
25
25
|
this.terms.push(term);
|
|
26
26
|
return this;
|
|
27
27
|
}
|
|
28
|
+
addTerms(terms) {
|
|
29
|
+
for (const term of terms) this.addTerm(term);
|
|
30
|
+
}
|
|
28
31
|
limit(limit, offset = 0) {
|
|
29
32
|
this._limit = limit;
|
|
30
33
|
this._offset = offset;
|
|
@@ -103,7 +106,7 @@ class Query {
|
|
|
103
106
|
*[Symbol.iterator]() {
|
|
104
107
|
const { table, _limit, _offset } = this;
|
|
105
108
|
const ctx = new QueryCtx(this);
|
|
106
|
-
for (
|
|
109
|
+
for (const term of this.terms) {
|
|
107
110
|
const op = QUERY_OPS[term.type];
|
|
108
111
|
let column;
|
|
109
112
|
if (term.column) {
|
|
@@ -119,7 +122,7 @@ class Query {
|
|
|
119
122
|
if (!ctx.bitmap) return;
|
|
120
123
|
if (this._cmp) {
|
|
121
124
|
const rows = [];
|
|
122
|
-
for (
|
|
125
|
+
for (const i of ctx) {
|
|
123
126
|
rows.push(table.getRow(i, false, true));
|
|
124
127
|
}
|
|
125
128
|
rows.sort(this._cmp);
|
|
@@ -130,7 +133,7 @@ class Query {
|
|
|
130
133
|
}
|
|
131
134
|
let j = 0;
|
|
132
135
|
const n = _offset + _limit;
|
|
133
|
-
for (
|
|
136
|
+
for (const i of ctx) {
|
|
134
137
|
if (j >= _offset) {
|
|
135
138
|
if (j >= n) return;
|
|
136
139
|
yield table.getRow(i, false, true);
|
|
@@ -211,7 +214,7 @@ const execBitOr = (ctx, term, column) => {
|
|
|
211
214
|
const key = column.valueKey(term.value);
|
|
212
215
|
let mask;
|
|
213
216
|
if (isArray(key)) {
|
|
214
|
-
for (
|
|
217
|
+
for (const k of key) {
|
|
215
218
|
const b = bitmap.index.get(k)?.buffer;
|
|
216
219
|
if (!b) continue;
|
|
217
220
|
if (mask) {
|
|
@@ -228,8 +231,8 @@ const execOr = (ctx, term, column) => {
|
|
|
228
231
|
const key = column.valueKey(term.value);
|
|
229
232
|
const pred = column.isArray ? (row, k) => row.includes(k) : (row, k) => row === k;
|
|
230
233
|
let mask;
|
|
231
|
-
for (
|
|
232
|
-
for (
|
|
234
|
+
for (const k of isArray(key) ? key : [key]) {
|
|
235
|
+
for (const i of ctx) {
|
|
233
236
|
if (pred(column.getRowKey(i), k)) {
|
|
234
237
|
if (!mask) mask = ctx.makeMask();
|
|
235
238
|
mask[i >>> 5] |= 1 << (i & 31);
|
|
@@ -249,7 +252,7 @@ const execBitAnd = (ctx, term, column) => {
|
|
|
249
252
|
const isNeg = term.type === "nand";
|
|
250
253
|
let mask;
|
|
251
254
|
if (isArray(key)) {
|
|
252
|
-
for (
|
|
255
|
+
for (const k of key) {
|
|
253
256
|
const b = bitmap.index.get(k)?.buffer;
|
|
254
257
|
if (!b) {
|
|
255
258
|
if (isNeg) {
|
|
@@ -276,9 +279,9 @@ const execAnd = (ctx, term, column) => {
|
|
|
276
279
|
const pred = column.isArray ? (row, v) => row.includes(v) : (row, v) => row === v;
|
|
277
280
|
const isNeg = term.type === "nand";
|
|
278
281
|
let mask;
|
|
279
|
-
for (
|
|
282
|
+
for (const k of isArray(key) ? key : [key]) {
|
|
280
283
|
let m;
|
|
281
|
-
for (
|
|
284
|
+
for (const i of ctx) {
|
|
282
285
|
if (pred(column.getRowKey(i), k)) {
|
|
283
286
|
if (!m) m = ctx.makeMask();
|
|
284
287
|
m[i >>> 5] |= 1 << (i & 31);
|
|
@@ -321,7 +324,7 @@ const QUERY_OPS = {
|
|
|
321
324
|
fn: (ctx, term, column) => {
|
|
322
325
|
const pred = term.value;
|
|
323
326
|
let mask;
|
|
324
|
-
for (
|
|
327
|
+
for (const i of ctx) {
|
|
325
328
|
if (pred(column.getRow(i))) {
|
|
326
329
|
if (!mask) mask = ctx.makeMask();
|
|
327
330
|
mask[i >>> 5] |= 1 << (i & 31);
|
|
@@ -338,7 +341,7 @@ const QUERY_OPS = {
|
|
|
338
341
|
const columns = term.params;
|
|
339
342
|
const pred = term.value;
|
|
340
343
|
let mask;
|
|
341
|
-
for (
|
|
344
|
+
for (const i of ctx) {
|
|
342
345
|
if (pred(table.getPartialRow(i, columns, false))) {
|
|
343
346
|
if (!mask) mask = ctx.makeMask();
|
|
344
347
|
mask[i >>> 5] |= 1 << (i & 31);
|
|
@@ -354,7 +357,7 @@ const QUERY_OPS = {
|
|
|
354
357
|
const table = ctx.table;
|
|
355
358
|
const pred = term.value;
|
|
356
359
|
let mask;
|
|
357
|
-
for (
|
|
360
|
+
for (const i of ctx) {
|
|
358
361
|
if (pred(table.getRow(i, false))) {
|
|
359
362
|
if (!mask) mask = ctx.makeMask();
|
|
360
363
|
mask[i >>> 5] |= 1 << (i & 31);
|
package/table.js
CHANGED
|
@@ -86,7 +86,7 @@ class Table {
|
|
|
86
86
|
this.length++;
|
|
87
87
|
}
|
|
88
88
|
addRows(rows) {
|
|
89
|
-
for (
|
|
89
|
+
for (const row of rows) this.addRow(row);
|
|
90
90
|
}
|
|
91
91
|
updateRow(i, row) {
|
|
92
92
|
if (i < 0 || i >= this.length) illegalArgs(`row ID: ${i}`);
|
|
@@ -113,7 +113,7 @@ class Table {
|
|
|
113
113
|
getPartialRow(i, columns, safe = true, includeID = false) {
|
|
114
114
|
if (safe && (i < 0 || i >= this.length)) return;
|
|
115
115
|
const row = includeID ? { __row: i } : {};
|
|
116
|
-
for (
|
|
116
|
+
for (const id of columns) {
|
|
117
117
|
row[id] = this.columns[id]?.getRow(i);
|
|
118
118
|
}
|
|
119
119
|
return row;
|