@shrkcrft/compress 0.1.0-alpha.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +21 -0
- package/dist/cache/align-volatile-tokens.d.ts +13 -0
- package/dist/cache/align-volatile-tokens.d.ts.map +1 -0
- package/dist/cache/align-volatile-tokens.js +51 -0
- package/dist/cache/alignment-map.d.ts +23 -0
- package/dist/cache/alignment-map.d.ts.map +1 -0
- package/dist/cache/alignment-map.js +1 -0
- package/dist/cache/alignment-result.d.ts +11 -0
- package/dist/cache/alignment-result.d.ts.map +1 -0
- package/dist/cache/alignment-result.js +1 -0
- package/dist/cache/detect-volatile-tokens.d.ts +10 -0
- package/dist/cache/detect-volatile-tokens.d.ts.map +1 -0
- package/dist/cache/detect-volatile-tokens.js +41 -0
- package/dist/cache/placeholder.d.ts +28 -0
- package/dist/cache/placeholder.d.ts.map +1 -0
- package/dist/cache/placeholder.js +0 -0
- package/dist/cache/restore-volatile-tokens.d.ts +10 -0
- package/dist/cache/restore-volatile-tokens.d.ts.map +1 -0
- package/dist/cache/restore-volatile-tokens.js +21 -0
- package/dist/cache/volatile-classify.d.ts +11 -0
- package/dist/cache/volatile-classify.d.ts.map +1 -0
- package/dist/cache/volatile-classify.js +35 -0
- package/dist/cache/volatile-kind.d.ts +13 -0
- package/dist/cache/volatile-kind.d.ts.map +1 -0
- package/dist/cache/volatile-kind.js +13 -0
- package/dist/cache/volatile-token.d.ts +14 -0
- package/dist/cache/volatile-token.d.ts.map +1 -0
- package/dist/cache/volatile-token.js +1 -0
- package/dist/ccr/ccr-entry.d.ts +13 -0
- package/dist/ccr/ccr-entry.d.ts.map +1 -0
- package/dist/ccr/ccr-entry.js +1 -0
- package/dist/ccr/ccr-key.d.ts +9 -0
- package/dist/ccr/ccr-key.d.ts.map +1 -0
- package/dist/ccr/ccr-key.js +19 -0
- package/dist/ccr/ccr-marker.d.ts +23 -0
- package/dist/ccr/ccr-marker.d.ts.map +1 -0
- package/dist/ccr/ccr-marker.js +30 -0
- package/dist/ccr/ccr-store.d.ts +18 -0
- package/dist/ccr/ccr-store.d.ts.map +1 -0
- package/dist/ccr/ccr-store.js +1 -0
- package/dist/ccr/file-ccr-store.d.ts +19 -0
- package/dist/ccr/file-ccr-store.d.ts.map +1 -0
- package/dist/ccr/file-ccr-store.js +53 -0
- package/dist/ccr/in-memory-ccr-store.d.ts +21 -0
- package/dist/ccr/in-memory-ccr-store.d.ts.map +1 -0
- package/dist/ccr/in-memory-ccr-store.js +45 -0
- package/dist/ccr/ttl-file-ccr-store.d.ts +43 -0
- package/dist/ccr/ttl-file-ccr-store.d.ts.map +1 -0
- package/dist/ccr/ttl-file-ccr-store.js +117 -0
- package/dist/code/compress-code.d.ts +4 -0
- package/dist/code/compress-code.d.ts.map +1 -0
- package/dist/code/compress-code.js +294 -0
- package/dist/compress-content.d.ts +11 -0
- package/dist/compress-content.d.ts.map +1 -0
- package/dist/compress-content.js +79 -0
- package/dist/content/content-type.d.ts +28 -0
- package/dist/content/content-type.d.ts.map +1 -0
- package/dist/content/content-type.js +28 -0
- package/dist/content/detect-content-type.d.ts +9 -0
- package/dist/content/detect-content-type.d.ts.map +1 -0
- package/dist/content/detect-content-type.js +184 -0
- package/dist/content/segment.d.ts +21 -0
- package/dist/content/segment.d.ts.map +1 -0
- package/dist/content/segment.js +117 -0
- package/dist/index.d.ts +61 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +49 -0
- package/dist/json/compress-json.d.ts +18 -0
- package/dist/json/compress-json.d.ts.map +1 -0
- package/dist/json/compress-json.js +139 -0
- package/dist/json/render-compact-json.d.ts +10 -0
- package/dist/json/render-compact-json.d.ts.map +1 -0
- package/dist/json/render-compact-json.js +18 -0
- package/dist/relevance/bm25.d.ts +26 -0
- package/dist/relevance/bm25.d.ts.map +1 -0
- package/dist/relevance/bm25.js +115 -0
- package/dist/result/compress-options.d.ts +26 -0
- package/dist/result/compress-options.d.ts.map +1 -0
- package/dist/result/compress-options.js +1 -0
- package/dist/result/compression-result.d.ts +26 -0
- package/dist/result/compression-result.d.ts.map +1 -0
- package/dist/result/compression-result.js +1 -0
- package/dist/result/compression-strategy.d.ts +30 -0
- package/dist/result/compression-strategy.d.ts.map +1 -0
- package/dist/result/compression-strategy.js +30 -0
- package/dist/table/adaptive-size.d.ts +46 -0
- package/dist/table/adaptive-size.d.ts.map +1 -0
- package/dist/table/adaptive-size.js +170 -0
- package/dist/table/apply-value-dictionaries.d.ts +30 -0
- package/dist/table/apply-value-dictionaries.d.ts.map +1 -0
- package/dist/table/apply-value-dictionaries.js +99 -0
- package/dist/table/column-presence.d.ts +20 -0
- package/dist/table/column-presence.d.ts.map +1 -0
- package/dist/table/column-presence.js +52 -0
- package/dist/table/columnar-json.d.ts +24 -0
- package/dist/table/columnar-json.d.ts.map +1 -0
- package/dist/table/columnar-json.js +83 -0
- package/dist/table/columnar-table.d.ts +24 -0
- package/dist/table/columnar-table.d.ts.map +1 -0
- package/dist/table/columnar-table.js +1 -0
- package/dist/table/compact-object-array.d.ts +12 -0
- package/dist/table/compact-object-array.d.ts.map +1 -0
- package/dist/table/compact-object-array.js +88 -0
- package/dist/table/field-spec.d.ts +13 -0
- package/dist/table/field-spec.d.ts.map +1 -0
- package/dist/table/field-spec.js +1 -0
- package/dist/table/object-map.d.ts +28 -0
- package/dist/table/object-map.d.ts.map +1 -0
- package/dist/table/object-map.js +119 -0
- package/dist/table/render-table.d.ts +11 -0
- package/dist/table/render-table.d.ts.map +1 -0
- package/dist/table/render-table.js +39 -0
- package/dist/table/sample-object-array.d.ts +11 -0
- package/dist/table/sample-object-array.d.ts.map +1 -0
- package/dist/table/sample-object-array.js +171 -0
- package/dist/table/sample-options.d.ts +29 -0
- package/dist/table/sample-options.d.ts.map +1 -0
- package/dist/table/sample-options.js +1 -0
- package/dist/table/sampled-table.d.ts +33 -0
- package/dist/table/sampled-table.d.ts.map +1 -0
- package/dist/table/sampled-table.js +8 -0
- package/dist/table/table-compaction.d.ts +19 -0
- package/dist/table/table-compaction.d.ts.map +1 -0
- package/dist/table/table-compaction.js +1 -0
- package/dist/table/table-formats.d.ts +23 -0
- package/dist/table/table-formats.d.ts.map +1 -0
- package/dist/table/table-formats.js +233 -0
- package/dist/text/compress-diff.d.ts +20 -0
- package/dist/text/compress-diff.d.ts.map +1 -0
- package/dist/text/compress-diff.js +344 -0
- package/dist/text/compress-lines.d.ts +12 -0
- package/dist/text/compress-lines.d.ts.map +1 -0
- package/dist/text/compress-lines.js +44 -0
- package/dist/text/compress-log.d.ts +12 -0
- package/dist/text/compress-log.d.ts.map +1 -0
- package/dist/text/compress-log.js +202 -0
- package/dist/text/compress-markdown.d.ts +15 -0
- package/dist/text/compress-markdown.d.ts.map +1 -0
- package/dist/text/compress-markdown.js +96 -0
- package/dist/text/compress-search.d.ts +11 -0
- package/dist/text/compress-search.d.ts.map +1 -0
- package/dist/text/compress-search.js +78 -0
- package/dist/text/finalize.d.ts +21 -0
- package/dist/text/finalize.d.ts.map +1 -0
- package/dist/text/finalize.js +54 -0
- package/dist/text/line-utils.d.ts +20 -0
- package/dist/text/line-utils.d.ts.map +1 -0
- package/dist/text/line-utils.js +65 -0
- package/dist/text/lockfile-names.d.ts +3 -0
- package/dist/text/lockfile-names.d.ts.map +1 -0
- package/dist/text/lockfile-names.js +33 -0
- package/dist/text/log-template.d.ts +31 -0
- package/dist/text/log-template.d.ts.map +1 -0
- package/dist/text/log-template.js +239 -0
- package/dist/tokens/estimate-tokens.d.ts +17 -0
- package/dist/tokens/estimate-tokens.d.ts.map +1 -0
- package/dist/tokens/estimate-tokens.js +53 -0
- package/dist/tokens/token-savings.d.ts +20 -0
- package/dist/tokens/token-savings.d.ts.map +1 -0
- package/dist/tokens/token-savings.js +1 -0
- package/package.json +52 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-column value-dictionary (enum) encoding for the columnar table — a
|
|
3
|
+
* lossless token reduction for low-cardinality columns. The columnar form
|
|
4
|
+
* already hoists the schema (column names written once), but a column like the
|
|
5
|
+
* knowledge graph's `kind`/`relation`/`source` still writes its value once per
|
|
6
|
+
* row. This pass replaces those repeats with a one-time dictionary plus a small
|
|
7
|
+
* integer index per row.
|
|
8
|
+
*
|
|
9
|
+
* Disambiguation: a cell is a dict INDEX iff its column name is a key of the
|
|
10
|
+
* returned `dict` (decided structurally, never by the cell's value/type), so a
|
|
11
|
+
* literal-number column and a dict-encoded numeric enum never collide. Absent
|
|
12
|
+
* cells get no index (they stay in `absent` and are skipped before any deref).
|
|
13
|
+
*
|
|
14
|
+
* Pure and deterministic (dict values in first-appearance order). Two guards
|
|
15
|
+
* ensure it NEVER inflates: a per-column byte check, and a table-level byte
|
|
16
|
+
* fallback that returns the dict-free rows when the dict didn't actually shrink.
|
|
17
|
+
*/
|
|
18
|
+
/** Minimum present cells in a column before a dictionary can pay for itself. */
|
|
19
|
+
const MIN_DICT_CELLS = 4;
|
|
20
|
+
/** Cheap cap on distinct values; the byte net-check is the real gate. */
|
|
21
|
+
const MAX_DICT_CARDINALITY = 64;
|
|
22
|
+
/** Canonical structural identity of a JSON value (absent cells are pre-filtered). */
|
|
23
|
+
function canon(value) {
|
|
24
|
+
return JSON.stringify(value) ?? 'null';
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Dictionary-encode every low-cardinality column that strictly shrinks. Returns
|
|
28
|
+
* the (possibly unchanged) rows and an optional `dict`. The input `rows` is
|
|
29
|
+
* never mutated — it is cloned lazily only if a column is encoded.
|
|
30
|
+
*/
|
|
31
|
+
export function applyValueDictionaries(cols, rows, absent) {
|
|
32
|
+
const width = cols.length;
|
|
33
|
+
if (width === 0 || rows.length < MIN_DICT_CELLS)
|
|
34
|
+
return { rows: rows };
|
|
35
|
+
const absentSet = new Set(absent.map(([r, c]) => r * width + c));
|
|
36
|
+
const dict = {};
|
|
37
|
+
let newRows = null;
|
|
38
|
+
for (let c = 0; c < width; c += 1) {
|
|
39
|
+
const name = cols[c];
|
|
40
|
+
// Present cells for this column, ascending row order, with canonical keys.
|
|
41
|
+
const presentRows = [];
|
|
42
|
+
const presentKeys = [];
|
|
43
|
+
for (let r = 0; r < rows.length; r += 1) {
|
|
44
|
+
if (absentSet.has(r * width + c))
|
|
45
|
+
continue;
|
|
46
|
+
presentRows.push(r);
|
|
47
|
+
presentKeys.push(canon(rows[r][c]));
|
|
48
|
+
}
|
|
49
|
+
if (presentRows.length < MIN_DICT_CELLS)
|
|
50
|
+
continue;
|
|
51
|
+
// Intern distinct values in first-appearance order; record each cell's index.
|
|
52
|
+
const indexOf = new Map();
|
|
53
|
+
const values = [];
|
|
54
|
+
const indices = [];
|
|
55
|
+
for (let i = 0; i < presentRows.length; i += 1) {
|
|
56
|
+
const key = presentKeys[i];
|
|
57
|
+
let idx = indexOf.get(key);
|
|
58
|
+
if (idx === undefined) {
|
|
59
|
+
idx = values.length;
|
|
60
|
+
indexOf.set(key, idx);
|
|
61
|
+
values.push(rows[presentRows[i]][c]);
|
|
62
|
+
}
|
|
63
|
+
indices.push(idx);
|
|
64
|
+
}
|
|
65
|
+
const k = values.length;
|
|
66
|
+
if (k > MAX_DICT_CARDINALITY || k >= presentRows.length)
|
|
67
|
+
continue;
|
|
68
|
+
// Per-column net check (exact bytes). The rows structure (commas/brackets)
|
|
69
|
+
// is identical either way, so only the per-cell value vs index bytes plus
|
|
70
|
+
// the new `"name":[…]` dict entry matter.
|
|
71
|
+
let inlineBytes = 0;
|
|
72
|
+
let indexBytes = 0;
|
|
73
|
+
for (let i = 0; i < presentRows.length; i += 1) {
|
|
74
|
+
inlineBytes += presentKeys[i].length;
|
|
75
|
+
indexBytes += String(indices[i]).length;
|
|
76
|
+
}
|
|
77
|
+
const dictEntryBytes = JSON.stringify(name).length + JSON.stringify(values).length + 2;
|
|
78
|
+
if (indexBytes + dictEntryBytes >= inlineBytes)
|
|
79
|
+
continue;
|
|
80
|
+
// Commit: rewrite this column's cells to indices (absent cells → unread 0).
|
|
81
|
+
if (!newRows)
|
|
82
|
+
newRows = rows.map((row) => row.slice());
|
|
83
|
+
for (let i = 0; i < presentRows.length; i += 1)
|
|
84
|
+
newRows[presentRows[i]][c] = indices[i];
|
|
85
|
+
for (let r = 0; r < rows.length; r += 1)
|
|
86
|
+
if (absentSet.has(r * width + c))
|
|
87
|
+
newRows[r][c] = 0;
|
|
88
|
+
dict[name] = values;
|
|
89
|
+
}
|
|
90
|
+
if (!newRows)
|
|
91
|
+
return { rows: rows };
|
|
92
|
+
// Table-level byte fallback: account for the shared `,"dict":{…}` wrapper that
|
|
93
|
+
// the per-column check doesn't. If the dict didn't actually shrink the table,
|
|
94
|
+
// ship the dict-free rows so the encoding can never inflate.
|
|
95
|
+
const withDict = JSON.stringify(newRows).length + JSON.stringify(dict).length + 8;
|
|
96
|
+
if (withDict >= JSON.stringify(rows).length)
|
|
97
|
+
return { rows: rows };
|
|
98
|
+
return { rows: newRows, dict };
|
|
99
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared column-presence logic for the homogeneous-shape compactors
|
|
3
|
+
* ({@link compactObjectArray}, {@link compactObjectMap}). Hoisting a schema out
|
|
4
|
+
* of every record only pays off when most columns are present on most records;
|
|
5
|
+
* these pure helpers compute that gate and order the columns deterministically,
|
|
6
|
+
* keeping the threshold tuning in one place.
|
|
7
|
+
*/
|
|
8
|
+
/** True for a plain (non-array, non-null) object — the only shape these compactors hoist. */
|
|
9
|
+
export declare function isPlainObject(value: unknown): value is Record<string, unknown>;
|
|
10
|
+
/** Count, per key, how many records have it set (undefined counts as absent, per JSON). */
|
|
11
|
+
export declare function buildPresenceMap(records: ReadonlyArray<Record<string, unknown>>): Map<string, number>;
|
|
12
|
+
/**
|
|
13
|
+
* Whether columns are broadly-enough shared that hoisting a schema is worth it:
|
|
14
|
+
* a non-empty presence map where ≥ `CORE_RATIO` of columns are present on ≥
|
|
15
|
+
* `CORE_PRESENCE` of the records.
|
|
16
|
+
*/
|
|
17
|
+
export declare function passesHeterogeneityGate(presence: Map<string, number>, recordCount: number): boolean;
|
|
18
|
+
/** Deterministic column order: most-present first, ties broken by name. */
|
|
19
|
+
export declare function sortColumnsByPresence(presence: Map<string, number>): string[];
|
|
20
|
+
//# sourceMappingURL=column-presence.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"column-presence.d.ts","sourceRoot":"","sources":["../../src/table/column-presence.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAOH,6FAA6F;AAC7F,wBAAgB,aAAa,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAE9E;AAED,2FAA2F;AAC3F,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,aAAa,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GAC9C,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CASrB;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CACrC,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAC7B,WAAW,EAAE,MAAM,GAClB,OAAO,CAMT;AAED,2EAA2E;AAC3E,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,MAAM,EAAE,CAO7E"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared column-presence logic for the homogeneous-shape compactors
|
|
3
|
+
* ({@link compactObjectArray}, {@link compactObjectMap}). Hoisting a schema out
|
|
4
|
+
* of every record only pays off when most columns are present on most records;
|
|
5
|
+
* these pure helpers compute that gate and order the columns deterministically,
|
|
6
|
+
* keeping the threshold tuning in one place.
|
|
7
|
+
*/
|
|
8
|
+
/** A column is "core" when present on at least this fraction of records. */
|
|
9
|
+
const CORE_PRESENCE = 0.8;
|
|
10
|
+
/** Compaction only helps when at least this fraction of columns are core. */
|
|
11
|
+
const CORE_RATIO = 0.5;
|
|
12
|
+
/** True for a plain (non-array, non-null) object — the only shape these compactors hoist. */
|
|
13
|
+
export function isPlainObject(value) {
|
|
14
|
+
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
15
|
+
}
|
|
16
|
+
/** Count, per key, how many records have it set (undefined counts as absent, per JSON). */
|
|
17
|
+
export function buildPresenceMap(records) {
|
|
18
|
+
const presence = new Map();
|
|
19
|
+
for (const record of records) {
|
|
20
|
+
for (const key of Object.keys(record)) {
|
|
21
|
+
if (record[key] === undefined)
|
|
22
|
+
continue;
|
|
23
|
+
presence.set(key, (presence.get(key) ?? 0) + 1);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return presence;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Whether columns are broadly-enough shared that hoisting a schema is worth it:
|
|
30
|
+
* a non-empty presence map where ≥ `CORE_RATIO` of columns are present on ≥
|
|
31
|
+
* `CORE_PRESENCE` of the records.
|
|
32
|
+
*/
|
|
33
|
+
export function passesHeterogeneityGate(presence, recordCount) {
|
|
34
|
+
if (presence.size === 0)
|
|
35
|
+
return false;
|
|
36
|
+
const coreThreshold = recordCount * CORE_PRESENCE;
|
|
37
|
+
let coreCols = 0;
|
|
38
|
+
for (const count of presence.values())
|
|
39
|
+
if (count >= coreThreshold)
|
|
40
|
+
coreCols += 1;
|
|
41
|
+
return coreCols / presence.size >= CORE_RATIO;
|
|
42
|
+
}
|
|
43
|
+
/** Deterministic column order: most-present first, ties broken by name. */
|
|
44
|
+
export function sortColumnsByPresence(presence) {
|
|
45
|
+
return [...presence.keys()].sort((a, b) => {
|
|
46
|
+
const pa = presence.get(a) ?? 0;
|
|
47
|
+
const pb = presence.get(b) ?? 0;
|
|
48
|
+
if (pa !== pb)
|
|
49
|
+
return pb - pa;
|
|
50
|
+
return a < b ? -1 : a > b ? 1 : 0;
|
|
51
|
+
});
|
|
52
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { ITableCompaction } from './table-compaction.js';
|
|
2
|
+
import type { IColumnarTable } from './columnar-table.js';
|
|
3
|
+
/**
|
|
4
|
+
* Encode a compacted table as a valid-JSON columnar object. Only the fields a
|
|
5
|
+
* decoder needs are emitted — `cols`, `rows`, `absent` (and an optional `dict`).
|
|
6
|
+
* The per-column type hints and the source count (== `rows.length`) are
|
|
7
|
+
* encoder-side only and never read off the wire, so they're omitted to save
|
|
8
|
+
* tokens. Low-cardinality columns are value-dictionary encoded (never inflates).
|
|
9
|
+
*/
|
|
10
|
+
export declare function tableToColumnar(table: ITableCompaction): IColumnarTable;
|
|
11
|
+
/**
|
|
12
|
+
* Compact an object array straight to columnar JSON, or `null` if it doesn't
|
|
13
|
+
* qualify. Convenience for the common "compact this list for output" path.
|
|
14
|
+
*/
|
|
15
|
+
export declare function compactArrayToColumnar(items: unknown): IColumnarTable | null;
|
|
16
|
+
/** Type guard: is `value` a columnar table envelope? */
|
|
17
|
+
export declare function isColumnarTable(value: unknown): value is IColumnarTable;
|
|
18
|
+
/**
|
|
19
|
+
* Reconstruct the original object array from a columnar envelope. Inverse of
|
|
20
|
+
* {@link tableToColumnar} ∘ {@link compactObjectArray} up to JSON semantics
|
|
21
|
+
* (an absent key stays absent; key order is not significant).
|
|
22
|
+
*/
|
|
23
|
+
export declare function expandColumnar(table: IColumnarTable): Array<Record<string, unknown>>;
|
|
24
|
+
//# sourceMappingURL=columnar-json.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"columnar-json.d.ts","sourceRoot":"","sources":["../../src/table/columnar-json.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAI1D;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,gBAAgB,GAAG,cAAc,CAWvE;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAG5E;AAED,wDAAwD;AACxD,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,cAAc,CAYvE;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAiCpF"}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { compactObjectArray } from "./compact-object-array.js";
|
|
2
|
+
import { applyValueDictionaries } from "./apply-value-dictionaries.js";
|
|
3
|
+
/**
|
|
4
|
+
* Encode a compacted table as a valid-JSON columnar object. Only the fields a
|
|
5
|
+
* decoder needs are emitted — `cols`, `rows`, `absent` (and an optional `dict`).
|
|
6
|
+
* The per-column type hints and the source count (== `rows.length`) are
|
|
7
|
+
* encoder-side only and never read off the wire, so they're omitted to save
|
|
8
|
+
* tokens. Low-cardinality columns are value-dictionary encoded (never inflates).
|
|
9
|
+
*/
|
|
10
|
+
export function tableToColumnar(table) {
|
|
11
|
+
const cols = table.cols.map((c) => c.name);
|
|
12
|
+
const { rows, dict } = applyValueDictionaries(cols, table.rows, table.absent);
|
|
13
|
+
return {
|
|
14
|
+
_table: {
|
|
15
|
+
cols,
|
|
16
|
+
rows,
|
|
17
|
+
absent: table.absent,
|
|
18
|
+
...(dict ? { dict } : {}),
|
|
19
|
+
},
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Compact an object array straight to columnar JSON, or `null` if it doesn't
|
|
24
|
+
* qualify. Convenience for the common "compact this list for output" path.
|
|
25
|
+
*/
|
|
26
|
+
export function compactArrayToColumnar(items) {
|
|
27
|
+
const table = compactObjectArray(items);
|
|
28
|
+
return table ? tableToColumnar(table) : null;
|
|
29
|
+
}
|
|
30
|
+
/** Type guard: is `value` a columnar table envelope? */
|
|
31
|
+
export function isColumnarTable(value) {
|
|
32
|
+
if (typeof value !== 'object' || value === null)
|
|
33
|
+
return false;
|
|
34
|
+
const t = value._table;
|
|
35
|
+
if (typeof t !== 'object' || t === null)
|
|
36
|
+
return false;
|
|
37
|
+
const r = t;
|
|
38
|
+
return (Array.isArray(r.cols) &&
|
|
39
|
+
Array.isArray(r.rows) &&
|
|
40
|
+
Array.isArray(r.absent) &&
|
|
41
|
+
(r.dict === undefined ||
|
|
42
|
+
(typeof r.dict === 'object' && r.dict !== null && !Array.isArray(r.dict))));
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Reconstruct the original object array from a columnar envelope. Inverse of
|
|
46
|
+
* {@link tableToColumnar} ∘ {@link compactObjectArray} up to JSON semantics
|
|
47
|
+
* (an absent key stays absent; key order is not significant).
|
|
48
|
+
*/
|
|
49
|
+
export function expandColumnar(table) {
|
|
50
|
+
const { cols, rows, absent, dict } = table._table;
|
|
51
|
+
const width = cols.length;
|
|
52
|
+
const absentSet = new Set(absent.map(([r, c]) => r * width + c));
|
|
53
|
+
const out = [];
|
|
54
|
+
for (let r = 0; r < rows.length; r += 1) {
|
|
55
|
+
const row = rows[r] ?? [];
|
|
56
|
+
const obj = {};
|
|
57
|
+
for (let c = 0; c < width; c += 1) {
|
|
58
|
+
if (absentSet.has(r * width + c))
|
|
59
|
+
continue;
|
|
60
|
+
const name = cols[c];
|
|
61
|
+
if (name === undefined)
|
|
62
|
+
continue;
|
|
63
|
+
// A dict-encoded column holds an index into dict[name]; deref it. `hasOwnProperty`
|
|
64
|
+
// (not `in`) so a column literally named "toString" only matches a real own key.
|
|
65
|
+
const raw = row[c];
|
|
66
|
+
const value = dict && Object.prototype.hasOwnProperty.call(dict, name)
|
|
67
|
+
? dict[name][raw]
|
|
68
|
+
: raw;
|
|
69
|
+
// `obj[name] = …` would invoke the Object.prototype `__proto__` setter
|
|
70
|
+
// for a column literally named "__proto__" (a real own key after
|
|
71
|
+
// JSON.parse), silently dropping it and breaking the lossless guarantee.
|
|
72
|
+
// defineProperty always creates an own enumerable data property.
|
|
73
|
+
Object.defineProperty(obj, name, {
|
|
74
|
+
value,
|
|
75
|
+
writable: true,
|
|
76
|
+
enumerable: true,
|
|
77
|
+
configurable: true,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
out.push(obj);
|
|
81
|
+
}
|
|
82
|
+
return out;
|
|
83
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Valid-JSON columnar encoding of a compacted table. Unlike the dense text
|
|
3
|
+
* form ({@link renderTable}), this stays parseable JSON — a programmatic
|
|
4
|
+
* client can reconstruct the original objects with {@link expandColumnar} —
|
|
5
|
+
* while still deduplicating the schema out of every row. This is what shrk's
|
|
6
|
+
* MCP tools emit so JSON-parsing agents keep working.
|
|
7
|
+
*/
|
|
8
|
+
export interface IColumnarTable {
|
|
9
|
+
_table: {
|
|
10
|
+
/** Column names, in schema order. */
|
|
11
|
+
cols: string[];
|
|
12
|
+
/** Row-major values; `rows[r][c]` aligns to `cols[c]`. */
|
|
13
|
+
rows: unknown[][];
|
|
14
|
+
/** `[row, col]` positions whose key was absent on the source object. */
|
|
15
|
+
absent: Array<[number, number]>;
|
|
16
|
+
/**
|
|
17
|
+
* Optional per-column value dictionaries (low-cardinality de-duplication).
|
|
18
|
+
* When a column name is a key here, that column's cells in `rows` are
|
|
19
|
+
* integer indices into `dict[name]` — deref to recover the real value.
|
|
20
|
+
*/
|
|
21
|
+
dict?: Record<string, unknown[]>;
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=columnar-table.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"columnar-table.d.ts","sourceRoot":"","sources":["../../src/table/columnar-table.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE;QACN,qCAAqC;QACrC,IAAI,EAAE,MAAM,EAAE,CAAC;QACf,0DAA0D;QAC1D,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;QAClB,wEAAwE;QACxE,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAChC;;;;WAIG;QACH,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;KAClC,CAAC;CACH"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ITableCompaction } from './table-compaction.js';
|
|
2
|
+
/**
|
|
3
|
+
* Compact a homogeneous array of objects into a lossless columnar table.
|
|
4
|
+
* Returns `null` when the input doesn't qualify (too few rows, non-objects,
|
|
5
|
+
* or too heterogeneous to benefit). A JSON value (`undefined` treated as an
|
|
6
|
+
* absent key, matching JSON semantics) round-trips exactly through
|
|
7
|
+
* {@link expandColumnar}.
|
|
8
|
+
*
|
|
9
|
+
* Deterministic: columns are ordered by descending presence, then by name.
|
|
10
|
+
*/
|
|
11
|
+
export declare function compactObjectArray(items: unknown): ITableCompaction | null;
|
|
12
|
+
//# sourceMappingURL=compact-object-array.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compact-object-array.d.ts","sourceRoot":"","sources":["../../src/table/compact-object-array.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAqC9D;;;;;;;;GAQG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,OAAO,GAAG,gBAAgB,GAAG,IAAI,CAwC1E"}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { isPlainObject, buildPresenceMap, passesHeterogeneityGate, sortColumnsByPresence, } from "./column-presence.js";
|
|
2
|
+
/** Minimum rows before a table is worth its schema header. */
|
|
3
|
+
const MIN_ROWS = 2;
|
|
4
|
+
function inferType(values) {
|
|
5
|
+
let sawNonNull = false;
|
|
6
|
+
let allBool = true;
|
|
7
|
+
let allInt = true;
|
|
8
|
+
let allNumber = true;
|
|
9
|
+
let allString = true;
|
|
10
|
+
for (const v of values) {
|
|
11
|
+
if (v === null || v === undefined)
|
|
12
|
+
continue;
|
|
13
|
+
sawNonNull = true;
|
|
14
|
+
if (typeof v !== 'boolean')
|
|
15
|
+
allBool = false;
|
|
16
|
+
if (typeof v !== 'number') {
|
|
17
|
+
allInt = false;
|
|
18
|
+
allNumber = false;
|
|
19
|
+
}
|
|
20
|
+
else if (!Number.isInteger(v)) {
|
|
21
|
+
allInt = false;
|
|
22
|
+
}
|
|
23
|
+
if (typeof v !== 'string')
|
|
24
|
+
allString = false;
|
|
25
|
+
}
|
|
26
|
+
if (!sawNonNull)
|
|
27
|
+
return 'null';
|
|
28
|
+
if (allBool)
|
|
29
|
+
return 'bool';
|
|
30
|
+
if (allInt)
|
|
31
|
+
return 'int';
|
|
32
|
+
if (allNumber)
|
|
33
|
+
return 'float';
|
|
34
|
+
if (allString)
|
|
35
|
+
return 'str';
|
|
36
|
+
return 'json';
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Compact a homogeneous array of objects into a lossless columnar table.
|
|
40
|
+
* Returns `null` when the input doesn't qualify (too few rows, non-objects,
|
|
41
|
+
* or too heterogeneous to benefit). A JSON value (`undefined` treated as an
|
|
42
|
+
* absent key, matching JSON semantics) round-trips exactly through
|
|
43
|
+
* {@link expandColumnar}.
|
|
44
|
+
*
|
|
45
|
+
* Deterministic: columns are ordered by descending presence, then by name.
|
|
46
|
+
*/
|
|
47
|
+
export function compactObjectArray(items) {
|
|
48
|
+
if (!Array.isArray(items) || items.length < MIN_ROWS)
|
|
49
|
+
return null;
|
|
50
|
+
for (const item of items)
|
|
51
|
+
if (!isPlainObject(item))
|
|
52
|
+
return null;
|
|
53
|
+
const rowsIn = items;
|
|
54
|
+
// Column presence + heterogeneity gate (shared with the object-map compactor).
|
|
55
|
+
const presence = buildPresenceMap(rowsIn);
|
|
56
|
+
if (!passesHeterogeneityGate(presence, rowsIn.length))
|
|
57
|
+
return null;
|
|
58
|
+
const colNames = sortColumnsByPresence(presence);
|
|
59
|
+
const rows = [];
|
|
60
|
+
const absent = [];
|
|
61
|
+
for (let r = 0; r < rowsIn.length; r += 1) {
|
|
62
|
+
const item = rowsIn[r];
|
|
63
|
+
const row = [];
|
|
64
|
+
for (let c = 0; c < colNames.length; c += 1) {
|
|
65
|
+
const key = colNames[c];
|
|
66
|
+
const present = key in item && item[key] !== undefined;
|
|
67
|
+
if (!present) {
|
|
68
|
+
absent.push([r, c]);
|
|
69
|
+
row.push(null);
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
row.push(item[key]);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
rows.push(row);
|
|
76
|
+
}
|
|
77
|
+
const cols = colNames.map((name, c) => {
|
|
78
|
+
const columnValues = rows.map((row) => row[c]);
|
|
79
|
+
const presentCount = presence.get(name) ?? 0;
|
|
80
|
+
const hasNull = columnValues.some((v) => v === null);
|
|
81
|
+
return {
|
|
82
|
+
name,
|
|
83
|
+
type: inferType(columnValues),
|
|
84
|
+
nullable: presentCount < rowsIn.length || hasNull,
|
|
85
|
+
};
|
|
86
|
+
});
|
|
87
|
+
return { cols, rows, absent, originalCount: rowsIn.length };
|
|
88
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* One column of a compacted table — its name plus coarse type/nullability
|
|
3
|
+
* hints lifted out of the rows so they aren't repeated per row.
|
|
4
|
+
*/
|
|
5
|
+
export interface IFieldSpec {
|
|
6
|
+
/** Column (object key) name. */
|
|
7
|
+
name: string;
|
|
8
|
+
/** Coarse type hint: `bool` | `int` | `float` | `str` | `json` | `null`. */
|
|
9
|
+
type: string;
|
|
10
|
+
/** True when at least one source object omits the key or has it null. */
|
|
11
|
+
nullable: boolean;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=field-spec.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"field-spec.d.ts","sourceRoot":"","sources":["../../src/table/field-spec.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,gCAAgC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,4EAA4E;IAC5E,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,QAAQ,EAAE,OAAO,CAAC;CACnB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/** The hoisted form of a homogeneous keyed object. */
|
|
2
|
+
export interface IObjectMap {
|
|
3
|
+
/** The map's own keys, in original insertion order. */
|
|
4
|
+
keys: string[];
|
|
5
|
+
/** Hoisted field names, ordered by descending presence then name. */
|
|
6
|
+
cols: string[];
|
|
7
|
+
/** `rows[i][c]` is entry `keys[i]`'s value for `cols[c]` (null when absent). */
|
|
8
|
+
rows: unknown[][];
|
|
9
|
+
/** `[i, c]` pairs whose key was absent on that entry (distinguishes null vs missing). */
|
|
10
|
+
absent: Array<[number, number]>;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Compact a homogeneous keyed object into a columnar {@link IObjectMap}, or
|
|
14
|
+
* return `null` when it doesn't qualify (not a plain object, too few entries,
|
|
15
|
+
* a non-object value, or too heterogeneous to benefit). Deterministic.
|
|
16
|
+
*/
|
|
17
|
+
export declare function compactObjectMap(value: unknown): IObjectMap | null;
|
|
18
|
+
/** True when `value` is a `{ _omap: IObjectMap }` envelope. */
|
|
19
|
+
export declare function isObjectMap(value: unknown): value is {
|
|
20
|
+
_omap: IObjectMap;
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Inverse of {@link compactObjectMap}: rebuild the original keyed object.
|
|
24
|
+
* Accepts either a bare {@link IObjectMap} or a `{ _omap }` envelope. Returns
|
|
25
|
+
* `null` when the input isn't a valid object map.
|
|
26
|
+
*/
|
|
27
|
+
export declare function expandObjectMap(value: unknown): Record<string, unknown> | null;
|
|
28
|
+
//# sourceMappingURL=object-map.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"object-map.d.ts","sourceRoot":"","sources":["../../src/table/object-map.ts"],"names":[],"mappings":"AAyBA,sDAAsD;AACtD,MAAM,WAAW,UAAU;IACzB,uDAAuD;IACvD,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,qEAAqE;IACrE,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,gFAAgF;IAChF,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;IAClB,yFAAyF;IACzF,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACjC;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,UAAU,GAAG,IAAI,CAmClE;AAED,+DAA+D;AAC/D,wBAAgB,WAAW,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI;IAAE,KAAK,EAAE,UAAU,CAAA;CAAE,CAU1E;AAED;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAmB9E"}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lossless columnar compaction for an OBJECT keyed by id whose values share a
|
|
3
|
+
* schema — a very common API/registry shape (`{ n1:{kind,score}, n2:{…}, … }`)
|
|
4
|
+
* that {@link compactObjectArray} (arrays only) leaves untouched. The shared
|
|
5
|
+
* field names are hoisted out of every entry into `cols`, written once; each
|
|
6
|
+
* entry becomes a positional `rows[i]` aligned to its key in `keys[i]`. Absent
|
|
7
|
+
* keys are tracked in `absent` so `null`-vs-absent round-trips exactly.
|
|
8
|
+
*
|
|
9
|
+
* The encoding is still valid JSON and exactly reconstructable via
|
|
10
|
+
* {@link expandObjectMap}: `null`, absent keys, and all JSON values round-trip
|
|
11
|
+
* exactly. A property whose value is `undefined` is treated as absent — matching
|
|
12
|
+
* `JSON.stringify` and {@link compactObjectArray} — since `undefined` is not a
|
|
13
|
+
* JSON value and cannot survive any JSON envelope. Net-loss is guarded by the
|
|
14
|
+
* caller (`compressJson`).
|
|
15
|
+
*/
|
|
16
|
+
import { isPlainObject, buildPresenceMap, passesHeterogeneityGate, sortColumnsByPresence, } from "./column-presence.js";
|
|
17
|
+
/** Minimum keyed entries before the hoisted schema pays for itself. */
|
|
18
|
+
const MIN_ENTRIES = 2;
|
|
19
|
+
/**
|
|
20
|
+
* Compact a homogeneous keyed object into a columnar {@link IObjectMap}, or
|
|
21
|
+
* return `null` when it doesn't qualify (not a plain object, too few entries,
|
|
22
|
+
* a non-object value, or too heterogeneous to benefit). Deterministic.
|
|
23
|
+
*/
|
|
24
|
+
export function compactObjectMap(value) {
|
|
25
|
+
if (!isPlainObject(value))
|
|
26
|
+
return null;
|
|
27
|
+
const keys = Object.keys(value);
|
|
28
|
+
if (keys.length < MIN_ENTRIES)
|
|
29
|
+
return null;
|
|
30
|
+
const entries = [];
|
|
31
|
+
for (const key of keys) {
|
|
32
|
+
const v = value[key];
|
|
33
|
+
if (!isPlainObject(v))
|
|
34
|
+
return null;
|
|
35
|
+
entries.push(v);
|
|
36
|
+
}
|
|
37
|
+
// Column presence + heterogeneity gate (shared with the array compactor).
|
|
38
|
+
const presence = buildPresenceMap(entries);
|
|
39
|
+
if (!passesHeterogeneityGate(presence, entries.length))
|
|
40
|
+
return null;
|
|
41
|
+
const cols = sortColumnsByPresence(presence);
|
|
42
|
+
const rows = [];
|
|
43
|
+
const absent = [];
|
|
44
|
+
for (let r = 0; r < entries.length; r += 1) {
|
|
45
|
+
const entry = entries[r];
|
|
46
|
+
const row = [];
|
|
47
|
+
for (let c = 0; c < cols.length; c += 1) {
|
|
48
|
+
const key = cols[c];
|
|
49
|
+
if (key in entry && entry[key] !== undefined) {
|
|
50
|
+
row.push(entry[key]);
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
absent.push([r, c]);
|
|
54
|
+
row.push(null);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
rows.push(row);
|
|
58
|
+
}
|
|
59
|
+
return { keys, cols, rows, absent };
|
|
60
|
+
}
|
|
61
|
+
/** True when `value` is a `{ _omap: IObjectMap }` envelope. */
|
|
62
|
+
export function isObjectMap(value) {
|
|
63
|
+
if (!isPlainObject(value))
|
|
64
|
+
return false;
|
|
65
|
+
const m = value._omap;
|
|
66
|
+
return (isPlainObject(m) &&
|
|
67
|
+
Array.isArray(m.keys) &&
|
|
68
|
+
Array.isArray(m.cols) &&
|
|
69
|
+
Array.isArray(m.rows) &&
|
|
70
|
+
Array.isArray(m.absent));
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Inverse of {@link compactObjectMap}: rebuild the original keyed object.
|
|
74
|
+
* Accepts either a bare {@link IObjectMap} or a `{ _omap }` envelope. Returns
|
|
75
|
+
* `null` when the input isn't a valid object map.
|
|
76
|
+
*/
|
|
77
|
+
export function expandObjectMap(value) {
|
|
78
|
+
const map = isObjectMap(value)
|
|
79
|
+
? value._omap
|
|
80
|
+
: isBareObjectMap(value)
|
|
81
|
+
? value
|
|
82
|
+
: undefined;
|
|
83
|
+
if (!map)
|
|
84
|
+
return null;
|
|
85
|
+
const absent = new Set(map.absent.map(([r, c]) => `${r},${c}`));
|
|
86
|
+
const out = {};
|
|
87
|
+
for (let r = 0; r < map.keys.length; r += 1) {
|
|
88
|
+
const obj = {};
|
|
89
|
+
for (let c = 0; c < map.cols.length; c += 1) {
|
|
90
|
+
if (absent.has(`${r},${c}`))
|
|
91
|
+
continue;
|
|
92
|
+
setOwn(obj, map.cols[c], map.rows[r]?.[c]);
|
|
93
|
+
}
|
|
94
|
+
setOwn(out, map.keys[r], obj);
|
|
95
|
+
}
|
|
96
|
+
return out;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Assign an own enumerable data property. Plain `obj[key] = value` would invoke
|
|
100
|
+
* the `Object.prototype.__proto__` setter for a column name or map key literally
|
|
101
|
+
* equal to `"__proto__"` (a real own key after `JSON.parse`), silently dropping
|
|
102
|
+
* the value and breaking the lossless round-trip. The array path
|
|
103
|
+
* ({@link expandColumnar}) hardens against this the same way.
|
|
104
|
+
*/
|
|
105
|
+
function setOwn(target, key, value) {
|
|
106
|
+
Object.defineProperty(target, key, {
|
|
107
|
+
value,
|
|
108
|
+
writable: true,
|
|
109
|
+
enumerable: true,
|
|
110
|
+
configurable: true,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
function isBareObjectMap(value) {
|
|
114
|
+
return (isPlainObject(value) &&
|
|
115
|
+
Array.isArray(value.keys) &&
|
|
116
|
+
Array.isArray(value.cols) &&
|
|
117
|
+
Array.isArray(value.rows) &&
|
|
118
|
+
Array.isArray(value.absent));
|
|
119
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { ITableCompaction } from './table-compaction.js';
|
|
2
|
+
/**
|
|
3
|
+
* Render a compacted table as a dense, model-readable text block. The header
|
|
4
|
+
* lifts the schema once (`name?` marks a nullable column); rows are
|
|
5
|
+
* pipe-delimited values with the delimiter / newlines / backslashes escaped.
|
|
6
|
+
* This is the densest representation — used by `shrk compress` and the
|
|
7
|
+
* `compress_context` tool, where the consumer reads text rather than parsing
|
|
8
|
+
* JSON. For a parseable form, use {@link tableToColumnar}.
|
|
9
|
+
*/
|
|
10
|
+
export declare function renderTable(table: ITableCompaction): string;
|
|
11
|
+
//# sourceMappingURL=render-table.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"render-table.d.ts","sourceRoot":"","sources":["../../src/table/render-table.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AA0B9D;;;;;;;GAOG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,gBAAgB,GAAG,MAAM,CAS3D"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
const CELL_DELIM = '|';
|
|
2
|
+
function escapeCell(text) {
|
|
3
|
+
return text
|
|
4
|
+
.replace(/\\/g, '\\\\')
|
|
5
|
+
.replace(/\|/g, '\\|')
|
|
6
|
+
.replace(/\r?\n/g, '\\n');
|
|
7
|
+
}
|
|
8
|
+
// Column names are arbitrary object keys (anything after JSON.parse), so the
|
|
9
|
+
// header must escape the same chars as a cell PLUS the `,` separator and the
|
|
10
|
+
// trailing-`?` nullable marker — otherwise a key like `a,b` or `a\n` or `a?`
|
|
11
|
+
// would shatter or misrepresent the schema line.
|
|
12
|
+
function escapeHeaderName(name) {
|
|
13
|
+
return escapeCell(name).replace(/,/g, '\\,').replace(/\?/g, '\\?');
|
|
14
|
+
}
|
|
15
|
+
function renderCell(value) {
|
|
16
|
+
if (value === null || value === undefined)
|
|
17
|
+
return '';
|
|
18
|
+
if (typeof value === 'string')
|
|
19
|
+
return escapeCell(value);
|
|
20
|
+
if (typeof value === 'number' || typeof value === 'boolean')
|
|
21
|
+
return String(value);
|
|
22
|
+
return escapeCell(JSON.stringify(value) ?? '');
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Render a compacted table as a dense, model-readable text block. The header
|
|
26
|
+
* lifts the schema once (`name?` marks a nullable column); rows are
|
|
27
|
+
* pipe-delimited values with the delimiter / newlines / backslashes escaped.
|
|
28
|
+
* This is the densest representation — used by `shrk compress` and the
|
|
29
|
+
* `compress_context` tool, where the consumer reads text rather than parsing
|
|
30
|
+
* JSON. For a parseable form, use {@link tableToColumnar}.
|
|
31
|
+
*/
|
|
32
|
+
export function renderTable(table) {
|
|
33
|
+
const head = `⟦table n=${table.originalCount} c=${table.cols.length}⟧`;
|
|
34
|
+
const schema = table.cols
|
|
35
|
+
.map((c) => (c.nullable ? `${escapeHeaderName(c.name)}?` : escapeHeaderName(c.name)))
|
|
36
|
+
.join(',');
|
|
37
|
+
const lines = table.rows.map((row) => table.cols.map((_, c) => renderCell(row[c])).join(CELL_DELIM));
|
|
38
|
+
return [head, schema, ...lines].join('\n');
|
|
39
|
+
}
|