@shrkcrft/compress 0.1.0-alpha.17 → 0.1.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -25,6 +25,7 @@ export type { IFieldSpec } from './table/field-spec.js';
25
25
  export type { ITableCompaction } from './table/table-compaction.js';
26
26
  export { compactObjectArray } from './table/compact-object-array.js';
27
27
  export type { IColumnarTable } from './table/columnar-table.js';
28
+ export type { IDerivedColumn } from './table/derived-columns.js';
28
29
  export { tableToColumnar, compactArrayToColumnar, isColumnarTable, expandColumnar, } from './table/columnar-json.js';
29
30
  export { renderTable } from './table/render-table.js';
30
31
  export { renderCompactJson } from './json/render-compact-json.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7E,YAAY,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,kCAAkC,CAAC;AACrE,YAAY,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAGzE,YAAY,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,YAAY,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtF,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,YAAY,EAAE,uBAAuB,EAAE,MAAM,6BAA6B,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAG9D,YAAY,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,YAAY,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,eAAe,EACf,cAAc,GACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAGvD,OAAO,EACL,aAAa,EACb,YAAY,EACZ,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,0BAA0B,CAAC;AAGlC,YAAY,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAGvF,YAAY,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAC/E,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAGnH,YAAY,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAG5D,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,YAAY,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAGnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAC;AACxE,YAAY,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACzE,YAAY,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAGrE,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAGvD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3E,YAAY,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACjF,YAAY,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AACvE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AAG3E,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7E,YAAY,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,kCAAkC,CAAC;AACrE,YAAY,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAGzE,YAAY,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,YAAY,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtF,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,YAAY,EAAE,uBAAuB,EAAE,MAAM,6BAA6B,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAG9D,YAAY,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,YAAY,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,YAAY,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AACjE,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,eAAe,EACf,cAAc,GACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAGvD,OAAO,EACL,aAAa,EACb,YAAY,EACZ,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,0BAA0B,CAAC;AAGlC,YAAY,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAGvF,YAAY,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAC/E,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAGnH,YAAY,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAG5D,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,YAAY,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAGnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAC;AACxE,YAAY,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACzE,YAAY,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAGrE,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAGvD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3E,YAAY,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACjF,YAAY,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AACvE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AAG3E,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"columnar-json.d.ts","sourceRoot":"","sources":["../../src/table/columnar-json.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAI1D;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,gBAAgB,GAAG,cAAc,CAWvE;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAG5E;AAED,wDAAwD;AACxD,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,cAAc,CAYvE;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAiCpF"}
1
+ {"version":3,"file":"columnar-json.d.ts","sourceRoot":"","sources":["../../src/table/columnar-json.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAK1D;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,gBAAgB,GAAG,cAAc,CAgBvE;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAG5E;AAED,wDAAwD;AACxD,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,cAAc,CAYvE;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAmCpF"}
@@ -1,5 +1,6 @@
1
1
  import { compactObjectArray } from "./compact-object-array.js";
2
2
  import { applyValueDictionaries } from "./apply-value-dictionaries.js";
3
+ import { dropDerivedColumns, reconstructDerived } from "./derived-columns.js";
3
4
  /**
4
5
  * Encode a compacted table as a valid-JSON columnar object. Only the fields a
5
6
  * decoder needs are emitted — `cols`, `rows`, `absent` (and an optional `dict`).
@@ -8,14 +9,19 @@ import { applyValueDictionaries } from "./apply-value-dictionaries.js";
8
9
  * tokens. Low-cardinality columns are value-dictionary encoded (never inflates).
9
10
  */
10
11
  export function tableToColumnar(table) {
11
- const cols = table.cols.map((c) => c.name);
12
- const { rows, dict } = applyValueDictionaries(cols, table.rows, table.absent);
12
+ const cols0 = table.cols.map((c) => c.name);
13
+ // Drop columns whose every-row value is derivable from another column
14
+ // (e.g. graph file nodes: id="file:"+path, label=basename(path), kind=const).
15
+ // Runs on raw values, before value-dictionary encoding.
16
+ const split = dropDerivedColumns(cols0, table.rows, table.absent);
17
+ const { rows, dict } = applyValueDictionaries(split.cols, split.rows, split.absent);
13
18
  return {
14
19
  _table: {
15
- cols,
20
+ cols: split.cols,
16
21
  rows,
17
- absent: table.absent,
22
+ absent: split.absent,
18
23
  ...(dict ? { dict } : {}),
24
+ ...(split.derived && split.derived.length > 0 ? { derived: split.derived } : {}),
19
25
  },
20
26
  };
21
27
  }
@@ -47,7 +53,7 @@ export function isColumnarTable(value) {
47
53
  * (an absent key stays absent; key order is not significant).
48
54
  */
49
55
  export function expandColumnar(table) {
50
- const { cols, rows, absent, dict } = table._table;
56
+ const { cols, rows, absent, dict, derived } = table._table;
51
57
  const width = cols.length;
52
58
  const absentSet = new Set(absent.map(([r, c]) => r * width + c));
53
59
  const out = [];
@@ -77,6 +83,9 @@ export function expandColumnar(table) {
77
83
  configurable: true,
78
84
  });
79
85
  }
86
+ // Rebuild columns dropped because they were a pure function of a kept one.
87
+ if (derived && derived.length > 0)
88
+ reconstructDerived(obj, derived);
80
89
  out.push(obj);
81
90
  }
82
91
  return out;
@@ -5,6 +5,7 @@
5
5
  * while still deduplicating the schema out of every row. This is what shrk's
6
6
  * MCP tools emit so JSON-parsing agents keep working.
7
7
  */
8
+ import type { IDerivedColumn } from './derived-columns.js';
8
9
  export interface IColumnarTable {
9
10
  _table: {
10
11
  /** Column names, in schema order. */
@@ -19,6 +20,12 @@ export interface IColumnarTable {
19
20
  * integer indices into `dict[name]` — deref to recover the real value.
20
21
  */
21
22
  dict?: Record<string, unknown[]>;
23
+ /**
24
+ * Optional reconstruction list for columns dropped because each row's value
25
+ * is a pure function of another column (`const` | `prefix` | `basename`).
26
+ * `expandColumnar` rebuilds them losslessly; absent here means none dropped.
27
+ */
28
+ derived?: IDerivedColumn[];
22
29
  };
23
30
  }
24
31
  //# sourceMappingURL=columnar-table.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"columnar-table.d.ts","sourceRoot":"","sources":["../../src/table/columnar-table.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE;QACN,qCAAqC;QACrC,IAAI,EAAE,MAAM,EAAE,CAAC;QACf,0DAA0D;QAC1D,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;QAClB,wEAAwE;QACxE,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAChC;;;;WAIG;QACH,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;KAClC,CAAC;CACH"}
1
+ {"version":3,"file":"columnar-table.d.ts","sourceRoot":"","sources":["../../src/table/columnar-table.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAE3D,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE;QACN,qCAAqC;QACrC,IAAI,EAAE,MAAM,EAAE,CAAC;QACf,0DAA0D;QAC1D,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;QAClB,wEAAwE;QACxE,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAChC;;;;WAIG;QACH,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;QACjC;;;;WAIG;QACH,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;KAC5B,CAAC;CACH"}
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Reversible "derived column" pass for the columnar codec.
3
+ *
4
+ * Many homogeneous object arrays carry columns that are a pure, deterministic
5
+ * function of another column FOR EVERY ROW — e.g. a graph file node where
6
+ * `id === "file:" + path`, `label === basename(path)`, `kind === "file"`.
7
+ * Value-dictionary/columnar encoding can only dedupe the *key* and
8
+ * low-cardinality *values*; it can't recover this per-row derivable content.
9
+ * This pass drops such a column entirely and records how to rebuild it, so
10
+ * {@link reconstructDerived} restores the exact value on decode.
11
+ *
12
+ * Correctness rules:
13
+ * - CONTENT-checked over every row (never a key-name heuristic) — a column
14
+ * is only dropped when the transform holds for all rows. Rule/path nodes
15
+ * where `label` is NOT a function of `id` are left untouched.
16
+ * - A derived column's base must itself be irreducible (a "base" column), so
17
+ * decode never chases a dropped column. No cycles, no chains.
18
+ * - Only fully-present columns (no absent cells) participate, so the `absent`
19
+ * map never references a dropped column.
20
+ */
21
+ export interface IDerivedColumn {
22
+ /** Column to reconstruct on decode. */
23
+ name: string;
24
+ /** Reconstruction op. */
25
+ op: 'const' | 'prefix' | 'basename';
26
+ /** Base column name the value is derived from (op = prefix | basename). */
27
+ from?: string;
28
+ /** Prefix string (op = prefix) or the constant value (op = const). */
29
+ arg?: unknown;
30
+ }
31
+ export interface IDerivedSplit {
32
+ cols: string[];
33
+ rows: unknown[][];
34
+ absent: Array<[number, number]>;
35
+ derived?: IDerivedColumn[];
36
+ }
37
+ /**
38
+ * Detect and drop derivable columns from a compacted table's raw rows. Returns
39
+ * the reduced cols/rows/absent plus the `derived` reconstruction list (omitted
40
+ * when nothing was dropped). Runs BEFORE value-dictionary encoding so it sees
41
+ * the real values.
42
+ */
43
+ export declare function dropDerivedColumns(cols: readonly string[], rows: readonly unknown[][], absent: ReadonlyArray<[number, number]>): IDerivedSplit;
44
+ /** Restore the dropped derived columns onto a decoded object (in place). */
45
+ export declare function reconstructDerived(obj: Record<string, unknown>, derived: readonly IDerivedColumn[]): void;
46
+ //# sourceMappingURL=derived-columns.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"derived-columns.d.ts","sourceRoot":"","sources":["../../src/table/derived-columns.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,MAAM,WAAW,cAAc;IAC7B,uCAAuC;IACvC,IAAI,EAAE,MAAM,CAAC;IACb,yBAAyB;IACzB,EAAE,EAAE,OAAO,GAAG,QAAQ,GAAG,UAAU,CAAC;IACpC,2EAA2E;IAC3E,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,GAAG,CAAC,EAAE,OAAO,CAAC;CACf;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;IAClB,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAChC,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;CAC5B;AAoBD;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,IAAI,EAAE,SAAS,OAAO,EAAE,EAAE,EAC1B,MAAM,EAAE,aAAa,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,GACtC,aAAa,CA4Gf;AAED,4EAA4E;AAC5E,wBAAgB,kBAAkB,CAChC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC5B,OAAO,EAAE,SAAS,cAAc,EAAE,GACjC,IAAI,CAiBN"}
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Reversible "derived column" pass for the columnar codec.
3
+ *
4
+ * Many homogeneous object arrays carry columns that are a pure, deterministic
5
+ * function of another column FOR EVERY ROW — e.g. a graph file node where
6
+ * `id === "file:" + path`, `label === basename(path)`, `kind === "file"`.
7
+ * Value-dictionary/columnar encoding can only dedupe the *key* and
8
+ * low-cardinality *values*; it can't recover this per-row derivable content.
9
+ * This pass drops such a column entirely and records how to rebuild it, so
10
+ * {@link reconstructDerived} restores the exact value on decode.
11
+ *
12
+ * Correctness rules:
13
+ * - CONTENT-checked over every row (never a key-name heuristic) — a column
14
+ * is only dropped when the transform holds for all rows. Rule/path nodes
15
+ * where `label` is NOT a function of `id` are left untouched.
16
+ * - A derived column's base must itself be irreducible (a "base" column), so
17
+ * decode never chases a dropped column. No cycles, no chains.
18
+ * - Only fully-present columns (no absent cells) participate, so the `absent`
19
+ * map never references a dropped column.
20
+ */
21
+ /** Last path segment (everything after the final `/`), or the whole string. */
22
+ function basename(s) {
23
+ const i = s.lastIndexOf('/');
24
+ return i === -1 ? s : s.slice(i + 1);
25
+ }
26
+ function copy(cols, rows, absent) {
27
+ return {
28
+ cols: [...cols],
29
+ rows: rows.map((r) => [...r]),
30
+ absent: absent.map((a) => [a[0], a[1]]),
31
+ };
32
+ }
33
+ /**
34
+ * Detect and drop derivable columns from a compacted table's raw rows. Returns
35
+ * the reduced cols/rows/absent plus the `derived` reconstruction list (omitted
36
+ * when nothing was dropped). Runs BEFORE value-dictionary encoding so it sees
37
+ * the real values.
38
+ */
39
+ export function dropDerivedColumns(cols, rows, absent) {
40
+ const n = rows.length;
41
+ // Multi-row only — a single row never pays for the reconstruction metadata.
42
+ if (n < 2 || cols.length < 1)
43
+ return copy(cols, rows, absent);
44
+ const absentCols = new Set();
45
+ for (const [, c] of absent)
46
+ absentCols.add(c);
47
+ const present = (c) => !absentCols.has(c);
48
+ const val = (r, c) => rows[r][c];
49
+ const isStringCol = (c) => present(c) && rows.every((row) => typeof row[c] === 'string');
50
+ const dropped = new Set();
51
+ const derived = [];
52
+ // 1) Constant columns (every present row holds the same JSON primitive).
53
+ for (let c = 0; c < cols.length; c += 1) {
54
+ if (!present(c))
55
+ continue;
56
+ const first = val(0, c);
57
+ if (typeof first === 'object' && first !== null)
58
+ continue; // only primitives/null
59
+ let allEqual = true;
60
+ for (let r = 1; r < n; r += 1) {
61
+ if (val(r, c) !== first) {
62
+ allEqual = false;
63
+ break;
64
+ }
65
+ }
66
+ if (allEqual) {
67
+ dropped.add(c);
68
+ derived.push({ name: cols[c], op: 'const', arg: first });
69
+ }
70
+ }
71
+ // 2) Prefix / basename derivations among the remaining string columns.
72
+ const stringCols = [];
73
+ for (let c = 0; c < cols.length; c += 1) {
74
+ if (!dropped.has(c) && isStringCol(c))
75
+ stringCols.push(c);
76
+ }
77
+ // All valid prefix/basename derivations of column `c` (from every other
78
+ // string column), so we can later prefer one whose base is irreducible.
79
+ const allTransformsOf = (c) => {
80
+ const out = [];
81
+ for (const d of stringCols) {
82
+ if (d === c)
83
+ continue;
84
+ const c0 = val(0, c);
85
+ const d0 = val(0, d);
86
+ // prefix: c === arg + d for all rows (c contains d as a suffix).
87
+ if (c0.endsWith(d0) && c0.length > d0.length) {
88
+ const arg = c0.slice(0, c0.length - d0.length);
89
+ let ok = true;
90
+ for (let r = 0; r < n; r += 1) {
91
+ if (val(r, c) !== arg + val(r, d)) {
92
+ ok = false;
93
+ break;
94
+ }
95
+ }
96
+ if (ok)
97
+ out.push({ name: cols[c], op: 'prefix', from: cols[d], arg });
98
+ }
99
+ // basename: c === basename(d) for all rows.
100
+ let okB = true;
101
+ for (let r = 0; r < n; r += 1) {
102
+ if (val(r, c) !== basename(val(r, d))) {
103
+ okB = false;
104
+ break;
105
+ }
106
+ }
107
+ if (okB)
108
+ out.push({ name: cols[c], op: 'basename', from: cols[d] });
109
+ }
110
+ return out;
111
+ };
112
+ const candidates = new Map();
113
+ for (const c of stringCols)
114
+ candidates.set(c, allTransformsOf(c));
115
+ // A column is a BASE iff it cannot be derived from any other column.
116
+ const baseNames = new Set();
117
+ for (const c of stringCols)
118
+ if (candidates.get(c).length === 0)
119
+ baseNames.add(cols[c]);
120
+ // Drop a column only when it can be derived from an irreducible base, so the
121
+ // decoder never chases a dropped column (breaks mutual-derivability cleanly:
122
+ // path stays, id & label derive from it).
123
+ for (const c of stringCols) {
124
+ const pick = candidates.get(c).find((t) => t.from && baseNames.has(t.from));
125
+ if (pick) {
126
+ dropped.add(c);
127
+ derived.push(pick);
128
+ }
129
+ }
130
+ if (dropped.size === 0)
131
+ return copy(cols, rows, absent);
132
+ // Rebuild cols/rows without the dropped columns; remap `absent` col indices
133
+ // (dropped columns carry no absent cells, so none are lost).
134
+ const oldToNew = new Map();
135
+ const keptIdx = [];
136
+ for (let c = 0; c < cols.length; c += 1) {
137
+ if (!dropped.has(c)) {
138
+ oldToNew.set(c, keptIdx.length);
139
+ keptIdx.push(c);
140
+ }
141
+ }
142
+ const newCols = keptIdx.map((c) => cols[c]);
143
+ const newRows = rows.map((row) => keptIdx.map((c) => row[c]));
144
+ const newAbsent = [];
145
+ for (const [r, c] of absent) {
146
+ const nc = oldToNew.get(c);
147
+ if (nc !== undefined)
148
+ newAbsent.push([r, nc]);
149
+ }
150
+ return { cols: newCols, rows: newRows, absent: newAbsent, derived };
151
+ }
152
+ /** Restore the dropped derived columns onto a decoded object (in place). */
153
+ export function reconstructDerived(obj, derived) {
154
+ for (const d of derived) {
155
+ let value;
156
+ if (d.op === 'const') {
157
+ value = d.arg;
158
+ }
159
+ else {
160
+ const base = obj[d.from];
161
+ if (typeof base !== 'string')
162
+ continue; // base missing/non-string — leave absent
163
+ value = d.op === 'prefix' ? String(d.arg) + base : basename(base);
164
+ }
165
+ Object.defineProperty(obj, d.name, {
166
+ value,
167
+ writable: true,
168
+ enumerable: true,
169
+ configurable: true,
170
+ });
171
+ }
172
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@shrkcrft/compress",
3
- "version": "0.1.0-alpha.17",
3
+ "version": "0.1.0-alpha.18",
4
4
  "description": "SharkCraft deterministic context-compression engine: content routing, lossless columnar/table compaction, log/search/diff line reduction, and reversible Compress-Cache-Retrieve (CCR). No model inside — every transform is a pure function of its input.",
5
5
  "license": "MIT",
6
6
  "author": "SharkCraft contributors",
@@ -44,7 +44,7 @@
44
44
  "typecheck": "tsc --noEmit -p tsconfig.json"
45
45
  },
46
46
  "dependencies": {
47
- "@shrkcrft/core": "^0.1.0-alpha.17"
47
+ "@shrkcrft/core": "^0.1.0-alpha.18"
48
48
  },
49
49
  "publishConfig": {
50
50
  "access": "public"