@thi.ng/column-store 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -30,6 +30,8 @@
30
30
  - [FLAG_RLE](#flag_rle)
31
31
  - [Custom flags](#custom-flags)
32
32
  - [Query engine](#query-engine)
33
+ - [Query execution](#query-execution)
34
+ - [Optimized row iteration](#optimized-row-iteration)
33
35
  - [Built-in operators](#built-in-operators)
34
36
  - [OR](#or)
35
37
  - [AND](#and)
@@ -315,7 +317,9 @@ types](#custom-column-types).
315
317
  ## Query engine
316
318
 
317
319
  The query engine is highly extensible and can be used for executing arbitrarily
318
- complex queries.
320
+ complex queries via chaining of query operators.
321
+
322
+ ### Query execution
319
323
 
320
324
  The system allows predefining queries, which are then only evaluated and produce
321
325
  up-to-date results via the standard JS iterable mechanism (i.e. queries
@@ -328,19 +332,46 @@ const query = table.query().or("name", ["alice", "bob"]);
328
332
  // actually (re)execute query
329
333
  for(let result of query) { ... }
330
334
 
331
- // ..or using slice operator
335
+ // ..or collect result into an array using slice operator
332
336
  const results = [...query];
333
337
  ```
334
338
 
335
- TODO see code examples below
336
-
337
- ### Built-in operators
339
+ #### Optimized row iteration
338
340
 
339
341
  The query engine works by applying a number of [query
340
342
  terms](https://docs.thi.ng/umbrella/column-store/interfaces/QueryTerm.html) in
341
343
  series, with each step intersecting (aka logical AND) its results with the
342
344
  results of the previous step(s), thereby narrowing down the result set.
343
345
 
346
+ For each query term, only the rows already marked (aka pre-selected by
347
+ predecessor query terms) are visited. When a query term does not manage to
348
+ select any rows, the query is terminated. Internally, this selecting and
349
+ intersecting of partial query results is done via bitfields only. There's no
350
+ creation of interim result arrays, nor any full decoding/construction of interim
351
+ row records. The latter only happens for the final result rows and/or when using
352
+ the [`matchRow()` or `matchPartialRow()`](#predicate-based-matchers) query
353
+ operators.
354
+
355
+ When a column has an associated bitfield index (enabled via
356
+ [`FLAG_BITMAP`](#flag_bitmap)), some query operators (see below) are optimized
357
+ even further, entirely avoiding the need to visit any individual rows.
358
+
359
+ The diagram below illustrates the application of the following 3-operator query
360
+ and the resulting stepwise narrowing of the result set:
361
+
362
+ ```ts
363
+ table.query()
364
+ .matchColumn("id", inRange(100, 110))
365
+ .matchColumn("age", inRange(20, 50))
366
+ .matchColumn("name", startsWith("a"))
367
+ ```
368
+
369
+ ![Diagram showing a list of rows with object values and three columns
370
+ illustrating the narrowing effect of query operators with their partial
371
+ results](https://raw.githubusercontent.com/thi-ng/umbrella/develop/assets/column-store/query-narrowing.png)
372
+
373
+ ### Built-in operators
374
+
344
375
  By default, individual query terms operate on a single column, but can also can
345
376
  also apply to multiple. Terms are supplied either as array given to the
346
377
  [`Query`](https://docs.thi.ng/umbrella/column-store/classes/Query.html)
@@ -382,12 +413,18 @@ can be used, otherwise the behavior is:
382
413
 
383
414
  #### Predicate-based matchers
384
415
 
416
+ > [!NOTE]
417
+ > For best performance and to minimize/avoid potential decoding and construction
418
+ > of interim row objects, prefer `matchColumn` or `matchPartialRow` over
419
+ > `matchRow` if at all possible. Oftentimes, query predicates requiring multiple
420
+ > column values can be easily refactored into separate query terms.
421
+
385
422
  - [`matchColumn`](https://docs.thi.ng/umbrella/column-store/classes/Query.html#matchcolumn):
386
423
  apply predicate to column value
387
- - [`matchRow`](https://docs.thi.ng/umbrella/column-store/classes/Query.html#matchrow):
388
- apply predicate to full row
389
424
  - [`matchPartialRow`](https://docs.thi.ng/umbrella/column-store/classes/Query.html#matchpartialrow):
390
425
  apply predicate to partial row (only selected columns)
426
+ - [`matchRow`](https://docs.thi.ng/umbrella/column-store/classes/Query.html#matchrow):
427
+ apply predicate to full row
391
428
 
392
429
  #### Row ranges
393
430
 
@@ -458,7 +495,7 @@ For Node.js REPL:
458
495
  const cs = await import("@thi.ng/column-store");
459
496
  ```
460
497
 
461
- Package sizes (brotli'd, pre-treeshake): ESM: 5.71 KB
498
+ Package sizes (brotli'd, pre-treeshake): ESM: 5.78 KB
462
499
 
463
500
  ## Dependencies
464
501
 
package/api.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { FloatType, Fn3, IntType, Maybe, Predicate, UintType } from "@thi.ng/api";
1
+ import type { FloatType, Fn3, IClear, IntType, Maybe, Predicate, UintType } from "@thi.ng/api";
2
2
  import type { BitmapIndex } from "./bitmap.js";
3
3
  import type { QueryCtx } from "./query.js";
4
4
  import type { Table } from "./table.js";
@@ -84,7 +84,7 @@ export declare const FLAG_UNIQUE: number;
84
84
  export declare const FLAG_RLE: number;
85
85
  /** @internal */
86
86
  export declare const LIMITS: Record<NumericType, [number, number]>;
87
- export interface IColumn extends Iterable<any> {
87
+ export interface IColumn extends Iterable<any>, IClear {
88
88
  bitmap?: BitmapIndex;
89
89
  readonly isArray: boolean;
90
90
  load(spec: SerializedColumn): void;
@@ -192,4 +192,10 @@ export interface QueryTermOpSpec {
192
192
  */
193
193
  fn: QueryTermOp;
194
194
  }
195
+ /**
196
+ * Initial capacity for typedarray and vector columns
197
+ *
198
+ * @internal
199
+ */
200
+ export declare const INITIAL_CAPACITY = 8;
195
201
  //# sourceMappingURL=api.d.ts.map
package/api.js CHANGED
@@ -17,11 +17,13 @@ const LIMITS = {
17
17
  f32: [-Infinity, Infinity],
18
18
  f64: [-Infinity, Infinity]
19
19
  };
20
+ const INITIAL_CAPACITY = 8;
20
21
  export {
21
22
  FLAG_BITMAP,
22
23
  FLAG_DICT,
23
24
  FLAG_RLE,
24
25
  FLAG_UNIQUE,
26
+ INITIAL_CAPACITY,
25
27
  LIMITS,
26
28
  ONE_PLUS,
27
29
  OPTIONAL,
@@ -13,6 +13,7 @@ export declare abstract class AColumn<T extends Row = Row> implements IColumn {
13
13
  constructor(id: ColumnID<T>, table: Table<T>);
14
14
  [Symbol.iterator](): Generator<any, void, unknown>;
15
15
  reindex(): void;
16
+ abstract clear(): void;
16
17
  abstract load(spec: SerializedColumn): void;
17
18
  abstract validate(value: any): boolean;
18
19
  abstract setRow(i: number, value: any): void;
@@ -5,6 +5,7 @@ export declare class DictTupleColumn<T extends Row = Row> extends AColumn<T> {
5
5
  values: (number[] | null)[];
6
6
  dict: BidirIndex<any>;
7
7
  readonly isArray = true;
8
+ clear(): void;
8
9
  load({ dict, values }: SerializedColumn): void;
9
10
  reindex(): void;
10
11
  encode(value: any): (number | null)[];
@@ -9,6 +9,11 @@ class DictTupleColumn extends AColumn {
9
9
  values = [];
10
10
  dict = new BidirIndex();
11
11
  isArray = true;
12
+ clear() {
13
+ this.values = [];
14
+ this.dict.clear();
15
+ this.bitmap?.clear();
16
+ }
12
17
  load({ dict, values }) {
13
18
  this.values = values;
14
19
  super.loadDict(dict);
package/columns/dict.d.ts CHANGED
@@ -5,6 +5,7 @@ export declare class DictColumn<T extends Row = Row> extends AColumn<T> {
5
5
  values: (number | null)[];
6
6
  dict: BidirIndex<any>;
7
7
  readonly isArray = false;
8
+ clear(): void;
8
9
  load({ dict, values }: SerializedColumn): void;
9
10
  reindex(): void;
10
11
  encode(value: any): number | null;
package/columns/dict.js CHANGED
@@ -11,6 +11,11 @@ class DictColumn extends AColumn {
11
11
  values = [];
12
12
  dict = new BidirIndex();
13
13
  isArray = false;
14
+ clear() {
15
+ this.values = [];
16
+ this.dict.clear();
17
+ this.bitmap?.clear();
18
+ }
14
19
  load({ dict, values }) {
15
20
  this.values = this.spec.flags & FLAG_RLE ? this.spec.cardinality[0] === 0 && this.spec.default == null ? decodeSimple(values) : Array.from(decodeBinary(values)) : values;
16
21
  super.loadDict(dict);
@@ -3,6 +3,7 @@ import { AColumn } from "./acolumn.js";
3
3
  export declare class PlainColumn<T extends Row = Row> extends AColumn<T> {
4
4
  values: any[];
5
5
  readonly isArray = false;
6
+ clear(): void;
6
7
  load({ values }: SerializedColumn): void;
7
8
  ensureRows(): void;
8
9
  validate(value: any): boolean;
package/columns/plain.js CHANGED
@@ -7,6 +7,10 @@ import { AColumn } from "./acolumn.js";
7
7
  class PlainColumn extends AColumn {
8
8
  values = [];
9
9
  isArray = false;
10
+ clear() {
11
+ this.values = [];
12
+ this.bitmap?.clear();
13
+ }
10
14
  load({ values }) {
11
15
  this.values = this.spec.flags & FLAG_RLE ? Array.from(decodeSimple(values)) : values;
12
16
  this.reindex();
@@ -4,6 +4,7 @@ import { AColumn } from "./acolumn.js";
4
4
  export declare class TupleColumn<T extends Row = Row> extends AColumn<T> {
5
5
  values: Nullable<number[]>[];
6
6
  readonly isArray = true;
7
+ clear(): void;
7
8
  load(spec: SerializedColumn): void;
8
9
  encode(value: any): any[];
9
10
  validate(value: any): boolean;
package/columns/tuple.js CHANGED
@@ -6,6 +6,10 @@ import { AColumn } from "./acolumn.js";
6
6
  class TupleColumn extends AColumn {
7
7
  values = [];
8
8
  isArray = true;
9
+ clear() {
10
+ this.values = [];
11
+ this.bitmap?.clear();
12
+ }
9
13
  load(spec) {
10
14
  this.values = spec.values;
11
15
  this.reindex();
@@ -9,6 +9,7 @@ export declare class TypedArrayColumn<T extends Row = Row> extends AColumn<T> {
9
9
  protected tmp: TypedArray;
10
10
  readonly isArray = false;
11
11
  constructor(id: ColumnID<T>, table: Table<T>);
12
+ clear(): void;
12
13
  load({ values }: SerializedColumn): void;
13
14
  validate(value: any): boolean;
14
15
  ensureRows(): void;
@@ -2,6 +2,7 @@ import { typedArray } from "@thi.ng/api/typedarray";
2
2
  import { isArray } from "@thi.ng/checks/is-array";
3
3
  import { isNumber } from "@thi.ng/checks/is-number";
4
4
  import {
5
+ INITIAL_CAPACITY,
5
6
  LIMITS
6
7
  } from "../api.js";
7
8
  import { __indexOfSingle, __lastIndexOfSingle } from "../internal/indexof.js";
@@ -18,9 +19,13 @@ class TypedArrayColumn extends AColumn {
18
19
  super(id, table);
19
20
  this.type = table.schema[id].type;
20
21
  this.limit = LIMITS[this.type];
21
- this.values = typedArray(this.type, 8);
22
+ this.values = typedArray(this.type, INITIAL_CAPACITY);
22
23
  this.tmp = typedArray(this.type, 1);
23
24
  }
25
+ clear() {
26
+ this.values = typedArray(this.type, INITIAL_CAPACITY);
27
+ this.bitmap?.clear();
28
+ }
24
29
  load({ values }) {
25
30
  this.values = __deserializeTyped(this.type, this.spec.flags, values);
26
31
  this.reindex();
@@ -10,6 +10,7 @@ export declare class VectorColumn<T extends Row = Row> extends AColumn<T> {
10
10
  protected tmp: TypedArray;
11
11
  readonly isArray = false;
12
12
  constructor(id: ColumnID<T>, table: Table<T>);
13
+ clear(): void;
13
14
  load({ values }: SerializedColumn): void;
14
15
  validate(value: any): boolean;
15
16
  ensureRows(): void;
package/columns/vector.js CHANGED
@@ -4,6 +4,7 @@ import { isArrayLike } from "@thi.ng/checks/is-arraylike";
4
4
  import { isNumber } from "@thi.ng/checks/is-number";
5
5
  import { unsupportedOp } from "@thi.ng/errors/unsupported";
6
6
  import {
7
+ INITIAL_CAPACITY,
7
8
  LIMITS
8
9
  } from "../api.js";
9
10
  import { __clampRange } from "../internal/indexof.js";
@@ -21,9 +22,13 @@ class VectorColumn extends AColumn {
21
22
  this.type = this.spec.type.split("v")[0];
22
23
  this.size = this.spec.cardinality[1];
23
24
  this.limit = LIMITS[this.type];
24
- this.values = typedArray(this.type, 8 * this.size);
25
+ this.values = typedArray(this.type, INITIAL_CAPACITY * this.size);
25
26
  this.tmp = typedArray(this.type, this.size);
26
27
  }
28
+ clear() {
29
+ this.values = typedArray(this.type, INITIAL_CAPACITY * this.size);
30
+ this.bitmap?.clear();
31
+ }
27
32
  load({ values }) {
28
33
  this.values = __deserializeTyped(this.type, this.spec.flags, values);
29
34
  this.reindex();
package/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  export * from "./api.js";
2
2
  export * from "./bitmap.js";
3
+ export * from "./predicates.js";
3
4
  export * from "./query.js";
4
5
  export * from "./table.js";
5
6
  //# sourceMappingURL=index.d.ts.map
package/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./api.js";
2
2
  export * from "./bitmap.js";
3
+ export * from "./predicates.js";
3
4
  export * from "./query.js";
4
5
  export * from "./table.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thi.ng/column-store",
3
- "version": "0.8.0",
3
+ "version": "0.10.0",
4
4
  "description": "In-memory column store database with customizable column types, extensible query engine, bitfield indexing for query acceleration, JSON serialization with optional RLE compression",
5
5
  "type": "module",
6
6
  "module": "./index.js",
@@ -115,6 +115,9 @@
115
115
  "./columns/vector": {
116
116
  "default": "./columns/vector.js"
117
117
  },
118
+ "./predicates": {
119
+ "default": "./predicates.js"
120
+ },
118
121
  "./query": {
119
122
  "default": "./query.js"
120
123
  },
@@ -126,5 +129,5 @@
126
129
  "status": "alpha",
127
130
  "year": 2025
128
131
  },
129
- "gitHead": "63ca02b09367c07f6cb785642bc3f2b7df2e5804\n"
132
+ "gitHead": "f15a589f1695f67f2e90b8d221a89fc9960a2e83\n"
130
133
  }
@@ -0,0 +1,25 @@
1
+ import type { NumOrString, Predicate } from "@thi.ng/api";
2
+ /**
3
+ * Higher-order query predicate for {@link Query.matchColumn}. The returned
4
+ * predicate returns true if a row value is in the semi-open interval defined by
5
+ * `[min,max)`.
6
+ *
7
+ * @param min
8
+ * @param max
9
+ */
10
+ export declare const inRange: (min: NumOrString, max: NumOrString) => Predicate<NumOrString | null>;
11
+ /**
12
+ * Higher-order query predicate for {@link Query.matchColumn}. The returned
13
+ * predicate returns true if a row value starts with given `prefix`.
14
+ *
15
+ * @param prefix
16
+ */
17
+ export declare const startsWith: (prefix: string) => Predicate<string | null>;
18
+ /**
19
+ * Higher-order query predicate for {@link Query.matchColumn}. The returned
20
+ * predicate returns true if a row value matches the given `regexp`.
21
+ *
22
+ * @param re
23
+ */
24
+ export declare const matchRegExp: (re: RegExp) => Predicate<string | null>;
25
+ //# sourceMappingURL=predicates.d.ts.map
package/predicates.js ADDED
@@ -0,0 +1,8 @@
1
+ const inRange = (min, max) => (x) => x != null && x >= min && x < max;
2
+ const startsWith = (prefix) => (x) => x?.startsWith(prefix) ?? false;
3
+ const matchRegExp = (re) => (x) => x != null ? re.test(x) : false;
4
+ export {
5
+ inRange,
6
+ matchRegExp,
7
+ startsWith
8
+ };
package/table.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { Maybe } from "@thi.ng/api";
1
+ import type { IClear, ICopy, IEmpty, Maybe } from "@thi.ng/api";
2
2
  import { type ColumnID, type ColumnSchema, type ColumnSpec, type ColumnTypeSpec, type IColumn, type QueryTerm, type Row, type RowWithMeta, type SerializedTable } from "./api.js";
3
3
  import { Query } from "./query.js";
4
4
  /**
@@ -6,7 +6,7 @@ import { Query } from "./query.js";
6
6
  */
7
7
  export interface TableOpts {
8
8
  }
9
- export declare class Table<T extends Row> {
9
+ export declare class Table<T extends Row> implements IClear, ICopy<Table<T>>, IEmpty<Table<T>> {
10
10
  opts: TableOpts;
11
11
  schema: ColumnSchema<T>;
12
12
  columns: Record<ColumnID<T>, IColumn>;
@@ -15,12 +15,15 @@ export declare class Table<T extends Row> {
15
15
  constructor(schema: Record<ColumnID<T>, Partial<ColumnSpec> & {
16
16
  type: ColumnSpec["type"];
17
17
  }>, opts?: Partial<TableOpts>);
18
+ clear(): void;
19
+ copy(): Table<T>;
20
+ empty(): Table<T>;
18
21
  query(terms?: QueryTerm<T>[]): Query<T>;
19
22
  addColumn(id: ColumnID<T>, spec: Partial<ColumnSpec> & {
20
23
  type: ColumnSpec["type"];
21
24
  }): IColumn;
22
25
  removeColumn(id: ColumnID<T>): boolean;
23
- [Symbol.iterator](): Generator<Maybe<T>, void, unknown>;
26
+ [Symbol.iterator](): Generator<T, void, unknown>;
24
27
  reindex(): void;
25
28
  addRow(row: Partial<T>): void;
26
29
  addRows(rows: Iterable<Partial<T>>): void;
@@ -32,6 +35,7 @@ export declare class Table<T extends Row> {
32
35
  getPartialRow<K extends ColumnID<T>>(i: number, columns: K[], safe?: boolean): Maybe<Pick<T, K>>;
33
36
  getPartialRow<K extends ColumnID<T>>(i: number, columns: K[], safe?: boolean, includeID?: false): Maybe<Pick<T, K>>;
34
37
  getPartialRow<K extends ColumnID<T>>(i: number, columns: K[], safe?: boolean, includeID?: true): Maybe<RowWithMeta<Pick<T, K>>>;
38
+ slice(start?: number, end?: number): Table<T>;
35
39
  indexOf(id: ColumnID<T>, value: any, start?: number, end?: number): number;
36
40
  lastIndexOf(id: ColumnID<T>, value: any, start?: number, end?: number): number;
37
41
  validateRow(row: Partial<T>): void;
package/table.js CHANGED
@@ -13,6 +13,7 @@ import { TupleColumn } from "./columns/tuple.js";
13
13
  import { TypedArrayColumn } from "./columns/typedarray.js";
14
14
  import { VectorColumn } from "./columns/vector.js";
15
15
  import { __columnError } from "./internal/checks.js";
16
+ import { __clamp } from "./internal/indexof.js";
16
17
  import { Query } from "./query.js";
17
18
  class Table {
18
19
  opts;
@@ -31,6 +32,19 @@ class Table {
31
32
  this.opts = { ...opts };
32
33
  for (let id in schema) this.addColumn(id, schema[id]);
33
34
  }
35
+ clear() {
36
+ const { columns } = this;
37
+ for (let id in columns) columns[id].clear();
38
+ this.length = 0;
39
+ }
40
+ copy() {
41
+ const copy = new Table(this.schema, this.opts);
42
+ copy.addRows(this);
43
+ return copy;
44
+ }
45
+ empty() {
46
+ return new Table(this.schema, this.opts);
47
+ }
34
48
  query(terms) {
35
49
  return new Query(this, terms);
36
50
  }
@@ -104,6 +118,14 @@ class Table {
104
118
  }
105
119
  return row;
106
120
  }
121
+ slice(start = 0, end) {
122
+ const max = this.length;
123
+ start = __clamp(start, 0, max);
124
+ end = __clamp(end ?? max, start, max);
125
+ const copy = this.empty();
126
+ for (let i = start; i < end; i++) copy.addRow(this.getRow(i, true));
127
+ return copy;
128
+ }
107
129
  indexOf(id, value, start, end) {
108
130
  return this.columns[id]?.indexOf(value, start, end) ?? -1;
109
131
  }