@shrkcrft/compress 0.1.0-alpha.16 → 0.1.0-alpha.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/compress-content.d.ts.map +1 -1
- package/dist/compress-content.js +11 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/result/compress-options.d.ts +7 -0
- package/dist/result/compress-options.d.ts.map +1 -1
- package/dist/table/columnar-json.d.ts.map +1 -1
- package/dist/table/columnar-json.js +14 -5
- package/dist/table/columnar-table.d.ts +7 -0
- package/dist/table/columnar-table.d.ts.map +1 -1
- package/dist/table/compact-object-array.d.ts.map +1 -1
- package/dist/table/compact-object-array.js +4 -1
- package/dist/table/derived-columns.d.ts +46 -0
- package/dist/table/derived-columns.d.ts.map +1 -0
- package/dist/table/derived-columns.js +172 -0
- package/dist/table/object-map.d.ts.map +1 -1
- package/dist/table/object-map.js +3 -1
- package/dist/text/finalize.d.ts.map +1 -1
- package/dist/text/finalize.js +20 -1
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compress-content.d.ts","sourceRoot":"","sources":["../src/compress-content.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACzE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"compress-content.d.ts","sourceRoot":"","sources":["../src/compress-content.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACzE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAYrE;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,gBAAqB,GAAG,kBAAkB,CAS7F"}
|
package/dist/compress-content.js
CHANGED
|
@@ -3,6 +3,7 @@ import { detectContentType } from "./content/detect-content-type.js";
|
|
|
3
3
|
import { segmentContent, isRichSegmentType } from "./content/segment.js";
|
|
4
4
|
import { ECompressionStrategy } from "./result/compression-strategy.js";
|
|
5
5
|
import { measureSavings } from "./tokens/estimate-tokens.js";
|
|
6
|
+
import { passthroughResult } from "./text/finalize.js";
|
|
6
7
|
import { compressJson } from "./json/compress-json.js";
|
|
7
8
|
import { compressLog } from "./text/compress-log.js";
|
|
8
9
|
import { compressSearch } from "./text/compress-search.js";
|
|
@@ -18,6 +19,16 @@ import { compressCode } from "./code/compress-code.js";
|
|
|
18
19
|
* and options always yield the same output.
|
|
19
20
|
*/
|
|
20
21
|
export function compressContent(text, opts = {}) {
|
|
22
|
+
const result = routeCompressContent(text, opts);
|
|
23
|
+
// `lossless` is a hard guard applied at the single entry point so it catches
|
|
24
|
+
// every lossy path (text elision, JSON row-sampling, mixed) uniformly: a
|
|
25
|
+
// result that drops information is replaced by the verbatim original.
|
|
26
|
+
if (opts.lossless && result.lossy) {
|
|
27
|
+
return passthroughResult(text, result.contentType, 'lossless requested — lossy reduction skipped');
|
|
28
|
+
}
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
function routeCompressContent(text, opts) {
|
|
21
32
|
const type = opts.contentType ?? detectContentType(text);
|
|
22
33
|
switch (type) {
|
|
23
34
|
case EContentType.JsonArray:
|
package/dist/index.d.ts
CHANGED
|
@@ -25,6 +25,7 @@ export type { IFieldSpec } from './table/field-spec.js';
|
|
|
25
25
|
export type { ITableCompaction } from './table/table-compaction.js';
|
|
26
26
|
export { compactObjectArray } from './table/compact-object-array.js';
|
|
27
27
|
export type { IColumnarTable } from './table/columnar-table.js';
|
|
28
|
+
export type { IDerivedColumn } from './table/derived-columns.js';
|
|
28
29
|
export { tableToColumnar, compactArrayToColumnar, isColumnarTable, expandColumnar, } from './table/columnar-json.js';
|
|
29
30
|
export { renderTable } from './table/render-table.js';
|
|
30
31
|
export { renderCompactJson } from './json/render-compact-json.js';
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7E,YAAY,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,kCAAkC,CAAC;AACrE,YAAY,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAGzE,YAAY,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,YAAY,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtF,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,YAAY,EAAE,uBAAuB,EAAE,MAAM,6BAA6B,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAG9D,YAAY,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,YAAY,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,eAAe,EACf,cAAc,GACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAGvD,OAAO,EACL,aAAa,EACb,YAAY,EACZ,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,0BAA0B,CAAC;AAGlC,YAAY,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAGvF,YAAY,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAC/E,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAGnH,YAAY,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAG5D,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,YAAY,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAGnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAC;AACxE,YAAY,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACzE,YAAY,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAGrE,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAGvD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3E,YAAY,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACjF,YAAY,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AACvE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AAG3E,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7E,YAAY,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,kCAAkC,CAAC;AACrE,YAAY,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAGzE,YAAY,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,YAAY,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtF,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,YAAY,EAAE,uBAAuB,EAAE,MAAM,6BAA6B,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAG9D,YAAY,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,YAAY,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,YAAY,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AACjE,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,eAAe,EACf,cAAc,GACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAGvD,OAAO,EACL,aAAa,EACb,YAAY,EACZ,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,0BAA0B,CAAC;AAGlC,YAAY,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAGvF,YAAY,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAC/E,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAGnH,YAAY,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAG5D,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,YAAY,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAGnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAC;AACxE,YAAY,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACzE,YAAY,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAGrE,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAGvD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,YAAY,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3E,YAAY,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACjF,YAAY,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AACvE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AAG3E,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC"}
|
|
@@ -14,6 +14,13 @@ export interface ICompressOptions {
|
|
|
14
14
|
contentType?: EContentType;
|
|
15
15
|
/** Soft cap on retained items/lines/matches/hunks (compressor-specific). */
|
|
16
16
|
maxItems?: number;
|
|
17
|
+
/**
|
|
18
|
+
* Refuse any lossy reduction: a pass that would drop lines/rows/hunks
|
|
19
|
+
* returns the input untouched (passthrough) instead. Provably-lossless
|
|
20
|
+
* transforms (JSON columnar, dedup that round-trips) still apply. Lets a
|
|
21
|
+
* caller demand "shrink only if fully reconstructable from the output alone".
|
|
22
|
+
*/
|
|
23
|
+
lossless?: boolean;
|
|
17
24
|
/** Below this many lines a lossy text pass returns the input untouched. */
|
|
18
25
|
minLines?: number;
|
|
19
26
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compress-options.d.ts","sourceRoot":"","sources":["../../src/result/compress-options.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErD;;;;GAIG;AACH,MAAM,WAAW,gBAAgB;IAC/B,oEAAoE;IACpE,KAAK,CAAC,EAAE,SAAS,CAAC;IAClB,qEAAqE;IACrE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,uDAAuD;IACvD,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,4EAA4E;IAC5E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2EAA2E;IAC3E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB"}
|
|
1
|
+
{"version":3,"file":"compress-options.d.ts","sourceRoot":"","sources":["../../src/result/compress-options.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErD;;;;GAIG;AACH,MAAM,WAAW,gBAAgB;IAC/B,oEAAoE;IACpE,KAAK,CAAC,EAAE,SAAS,CAAC;IAClB,qEAAqE;IACrE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,uDAAuD;IACvD,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,4EAA4E;IAC5E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,2EAA2E;IAC3E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"columnar-json.d.ts","sourceRoot":"","sources":["../../src/table/columnar-json.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"columnar-json.d.ts","sourceRoot":"","sources":["../../src/table/columnar-json.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAK1D;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,gBAAgB,GAAG,cAAc,CAgBvE;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAG5E;AAED,wDAAwD;AACxD,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,cAAc,CAYvE;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAmCpF"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { compactObjectArray } from "./compact-object-array.js";
|
|
2
2
|
import { applyValueDictionaries } from "./apply-value-dictionaries.js";
|
|
3
|
+
import { dropDerivedColumns, reconstructDerived } from "./derived-columns.js";
|
|
3
4
|
/**
|
|
4
5
|
* Encode a compacted table as a valid-JSON columnar object. Only the fields a
|
|
5
6
|
* decoder needs are emitted — `cols`, `rows`, `absent` (and an optional `dict`).
|
|
@@ -8,14 +9,19 @@ import { applyValueDictionaries } from "./apply-value-dictionaries.js";
|
|
|
8
9
|
* tokens. Low-cardinality columns are value-dictionary encoded (never inflates).
|
|
9
10
|
*/
|
|
10
11
|
export function tableToColumnar(table) {
|
|
11
|
-
const
|
|
12
|
-
|
|
12
|
+
const cols0 = table.cols.map((c) => c.name);
|
|
13
|
+
// Drop columns whose every-row value is derivable from another column
|
|
14
|
+
// (e.g. graph file nodes: id="file:"+path, label=basename(path), kind=const).
|
|
15
|
+
// Runs on raw values, before value-dictionary encoding.
|
|
16
|
+
const split = dropDerivedColumns(cols0, table.rows, table.absent);
|
|
17
|
+
const { rows, dict } = applyValueDictionaries(split.cols, split.rows, split.absent);
|
|
13
18
|
return {
|
|
14
19
|
_table: {
|
|
15
|
-
cols,
|
|
20
|
+
cols: split.cols,
|
|
16
21
|
rows,
|
|
17
|
-
absent:
|
|
22
|
+
absent: split.absent,
|
|
18
23
|
...(dict ? { dict } : {}),
|
|
24
|
+
...(split.derived && split.derived.length > 0 ? { derived: split.derived } : {}),
|
|
19
25
|
},
|
|
20
26
|
};
|
|
21
27
|
}
|
|
@@ -47,7 +53,7 @@ export function isColumnarTable(value) {
|
|
|
47
53
|
* (an absent key stays absent; key order is not significant).
|
|
48
54
|
*/
|
|
49
55
|
export function expandColumnar(table) {
|
|
50
|
-
const { cols, rows, absent, dict } = table._table;
|
|
56
|
+
const { cols, rows, absent, dict, derived } = table._table;
|
|
51
57
|
const width = cols.length;
|
|
52
58
|
const absentSet = new Set(absent.map(([r, c]) => r * width + c));
|
|
53
59
|
const out = [];
|
|
@@ -77,6 +83,9 @@ export function expandColumnar(table) {
|
|
|
77
83
|
configurable: true,
|
|
78
84
|
});
|
|
79
85
|
}
|
|
86
|
+
// Rebuild columns dropped because they were a pure function of a kept one.
|
|
87
|
+
if (derived && derived.length > 0)
|
|
88
|
+
reconstructDerived(obj, derived);
|
|
80
89
|
out.push(obj);
|
|
81
90
|
}
|
|
82
91
|
return out;
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* while still deduplicating the schema out of every row. This is what shrk's
|
|
6
6
|
* MCP tools emit so JSON-parsing agents keep working.
|
|
7
7
|
*/
|
|
8
|
+
import type { IDerivedColumn } from './derived-columns.js';
|
|
8
9
|
export interface IColumnarTable {
|
|
9
10
|
_table: {
|
|
10
11
|
/** Column names, in schema order. */
|
|
@@ -19,6 +20,12 @@ export interface IColumnarTable {
|
|
|
19
20
|
* integer indices into `dict[name]` — deref to recover the real value.
|
|
20
21
|
*/
|
|
21
22
|
dict?: Record<string, unknown[]>;
|
|
23
|
+
/**
|
|
24
|
+
* Optional reconstruction list for columns dropped because each row's value
|
|
25
|
+
* is a pure function of another column (`const` | `prefix` | `basename`).
|
|
26
|
+
* `expandColumnar` rebuilds them losslessly; absent here means none dropped.
|
|
27
|
+
*/
|
|
28
|
+
derived?: IDerivedColumn[];
|
|
22
29
|
};
|
|
23
30
|
}
|
|
24
31
|
//# sourceMappingURL=columnar-table.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"columnar-table.d.ts","sourceRoot":"","sources":["../../src/table/columnar-table.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE;QACN,qCAAqC;QACrC,IAAI,EAAE,MAAM,EAAE,CAAC;QACf,0DAA0D;QAC1D,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;QAClB,wEAAwE;QACxE,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAChC;;;;WAIG;QACH,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"columnar-table.d.ts","sourceRoot":"","sources":["../../src/table/columnar-table.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAE3D,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE;QACN,qCAAqC;QACrC,IAAI,EAAE,MAAM,EAAE,CAAC;QACf,0DAA0D;QAC1D,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;QAClB,wEAAwE;QACxE,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAChC;;;;WAIG;QACH,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;QACjC;;;;WAIG;QACH,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;KAC5B,CAAC;CACH"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compact-object-array.d.ts","sourceRoot":"","sources":["../../src/table/compact-object-array.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAqC9D;;;;;;;;GAQG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,OAAO,GAAG,gBAAgB,GAAG,IAAI,
|
|
1
|
+
{"version":3,"file":"compact-object-array.d.ts","sourceRoot":"","sources":["../../src/table/compact-object-array.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAqC9D;;;;;;;;GAQG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,OAAO,GAAG,gBAAgB,GAAG,IAAI,CA2C1E"}
|
|
@@ -63,7 +63,10 @@ export function compactObjectArray(items) {
|
|
|
63
63
|
const row = [];
|
|
64
64
|
for (let c = 0; c < colNames.length; c += 1) {
|
|
65
65
|
const key = colNames[c];
|
|
66
|
-
|
|
66
|
+
// `key in item` walks the prototype chain, so a column named after an
|
|
67
|
+
// Object.prototype member (`toString`, `hasOwnProperty`, …) would read the
|
|
68
|
+
// inherited member as a cell value. Own-property check keeps it lossless.
|
|
69
|
+
const present = Object.prototype.hasOwnProperty.call(item, key) && item[key] !== undefined;
|
|
67
70
|
if (!present) {
|
|
68
71
|
absent.push([r, c]);
|
|
69
72
|
row.push(null);
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reversible "derived column" pass for the columnar codec.
|
|
3
|
+
*
|
|
4
|
+
* Many homogeneous object arrays carry columns that are a pure, deterministic
|
|
5
|
+
* function of another column FOR EVERY ROW — e.g. a graph file node where
|
|
6
|
+
* `id === "file:" + path`, `label === basename(path)`, `kind === "file"`.
|
|
7
|
+
* Value-dictionary/columnar encoding can only dedupe the *key* and
|
|
8
|
+
* low-cardinality *values*; it can't recover this per-row derivable content.
|
|
9
|
+
* This pass drops such a column entirely and records how to rebuild it, so
|
|
10
|
+
* {@link reconstructDerived} restores the exact value on decode.
|
|
11
|
+
*
|
|
12
|
+
* Correctness rules:
|
|
13
|
+
* - CONTENT-checked over every row (never a key-name heuristic) — a column
|
|
14
|
+
* is only dropped when the transform holds for all rows. Rule/path nodes
|
|
15
|
+
* where `label` is NOT a function of `id` are left untouched.
|
|
16
|
+
* - A derived column's base must itself be irreducible (a "base" column), so
|
|
17
|
+
* decode never chases a dropped column. No cycles, no chains.
|
|
18
|
+
* - Only fully-present columns (no absent cells) participate, so the `absent`
|
|
19
|
+
* map never references a dropped column.
|
|
20
|
+
*/
|
|
21
|
+
export interface IDerivedColumn {
|
|
22
|
+
/** Column to reconstruct on decode. */
|
|
23
|
+
name: string;
|
|
24
|
+
/** Reconstruction op. */
|
|
25
|
+
op: 'const' | 'prefix' | 'basename';
|
|
26
|
+
/** Base column name the value is derived from (op = prefix | basename). */
|
|
27
|
+
from?: string;
|
|
28
|
+
/** Prefix string (op = prefix) or the constant value (op = const). */
|
|
29
|
+
arg?: unknown;
|
|
30
|
+
}
|
|
31
|
+
export interface IDerivedSplit {
|
|
32
|
+
cols: string[];
|
|
33
|
+
rows: unknown[][];
|
|
34
|
+
absent: Array<[number, number]>;
|
|
35
|
+
derived?: IDerivedColumn[];
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Detect and drop derivable columns from a compacted table's raw rows. Returns
|
|
39
|
+
* the reduced cols/rows/absent plus the `derived` reconstruction list (omitted
|
|
40
|
+
* when nothing was dropped). Runs BEFORE value-dictionary encoding so it sees
|
|
41
|
+
* the real values.
|
|
42
|
+
*/
|
|
43
|
+
export declare function dropDerivedColumns(cols: readonly string[], rows: readonly unknown[][], absent: ReadonlyArray<[number, number]>): IDerivedSplit;
|
|
44
|
+
/** Restore the dropped derived columns onto a decoded object (in place). */
|
|
45
|
+
export declare function reconstructDerived(obj: Record<string, unknown>, derived: readonly IDerivedColumn[]): void;
|
|
46
|
+
//# sourceMappingURL=derived-columns.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"derived-columns.d.ts","sourceRoot":"","sources":["../../src/table/derived-columns.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,MAAM,WAAW,cAAc;IAC7B,uCAAuC;IACvC,IAAI,EAAE,MAAM,CAAC;IACb,yBAAyB;IACzB,EAAE,EAAE,OAAO,GAAG,QAAQ,GAAG,UAAU,CAAC;IACpC,2EAA2E;IAC3E,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,GAAG,CAAC,EAAE,OAAO,CAAC;CACf;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;IAClB,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAChC,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;CAC5B;AAoBD;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,IAAI,EAAE,SAAS,OAAO,EAAE,EAAE,EAC1B,MAAM,EAAE,aAAa,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,GACtC,aAAa,CA4Gf;AAED,4EAA4E;AAC5E,wBAAgB,kBAAkB,CAChC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC5B,OAAO,EAAE,SAAS,cAAc,EAAE,GACjC,IAAI,CAiBN"}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reversible "derived column" pass for the columnar codec.
|
|
3
|
+
*
|
|
4
|
+
* Many homogeneous object arrays carry columns that are a pure, deterministic
|
|
5
|
+
* function of another column FOR EVERY ROW — e.g. a graph file node where
|
|
6
|
+
* `id === "file:" + path`, `label === basename(path)`, `kind === "file"`.
|
|
7
|
+
* Value-dictionary/columnar encoding can only dedupe the *key* and
|
|
8
|
+
* low-cardinality *values*; it can't recover this per-row derivable content.
|
|
9
|
+
* This pass drops such a column entirely and records how to rebuild it, so
|
|
10
|
+
* {@link reconstructDerived} restores the exact value on decode.
|
|
11
|
+
*
|
|
12
|
+
* Correctness rules:
|
|
13
|
+
* - CONTENT-checked over every row (never a key-name heuristic) — a column
|
|
14
|
+
* is only dropped when the transform holds for all rows. Rule/path nodes
|
|
15
|
+
* where `label` is NOT a function of `id` are left untouched.
|
|
16
|
+
* - A derived column's base must itself be irreducible (a "base" column), so
|
|
17
|
+
* decode never chases a dropped column. No cycles, no chains.
|
|
18
|
+
* - Only fully-present columns (no absent cells) participate, so the `absent`
|
|
19
|
+
* map never references a dropped column.
|
|
20
|
+
*/
|
|
21
|
+
/** Last path segment (everything after the final `/`), or the whole string. */
|
|
22
|
+
function basename(s) {
|
|
23
|
+
const i = s.lastIndexOf('/');
|
|
24
|
+
return i === -1 ? s : s.slice(i + 1);
|
|
25
|
+
}
|
|
26
|
+
function copy(cols, rows, absent) {
|
|
27
|
+
return {
|
|
28
|
+
cols: [...cols],
|
|
29
|
+
rows: rows.map((r) => [...r]),
|
|
30
|
+
absent: absent.map((a) => [a[0], a[1]]),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Detect and drop derivable columns from a compacted table's raw rows. Returns
|
|
35
|
+
* the reduced cols/rows/absent plus the `derived` reconstruction list (omitted
|
|
36
|
+
* when nothing was dropped). Runs BEFORE value-dictionary encoding so it sees
|
|
37
|
+
* the real values.
|
|
38
|
+
*/
|
|
39
|
+
export function dropDerivedColumns(cols, rows, absent) {
|
|
40
|
+
const n = rows.length;
|
|
41
|
+
// Multi-row only — a single row never pays for the reconstruction metadata.
|
|
42
|
+
if (n < 2 || cols.length < 1)
|
|
43
|
+
return copy(cols, rows, absent);
|
|
44
|
+
const absentCols = new Set();
|
|
45
|
+
for (const [, c] of absent)
|
|
46
|
+
absentCols.add(c);
|
|
47
|
+
const present = (c) => !absentCols.has(c);
|
|
48
|
+
const val = (r, c) => rows[r][c];
|
|
49
|
+
const isStringCol = (c) => present(c) && rows.every((row) => typeof row[c] === 'string');
|
|
50
|
+
const dropped = new Set();
|
|
51
|
+
const derived = [];
|
|
52
|
+
// 1) Constant columns (every present row holds the same JSON primitive).
|
|
53
|
+
for (let c = 0; c < cols.length; c += 1) {
|
|
54
|
+
if (!present(c))
|
|
55
|
+
continue;
|
|
56
|
+
const first = val(0, c);
|
|
57
|
+
if (typeof first === 'object' && first !== null)
|
|
58
|
+
continue; // only primitives/null
|
|
59
|
+
let allEqual = true;
|
|
60
|
+
for (let r = 1; r < n; r += 1) {
|
|
61
|
+
if (val(r, c) !== first) {
|
|
62
|
+
allEqual = false;
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
if (allEqual) {
|
|
67
|
+
dropped.add(c);
|
|
68
|
+
derived.push({ name: cols[c], op: 'const', arg: first });
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// 2) Prefix / basename derivations among the remaining string columns.
|
|
72
|
+
const stringCols = [];
|
|
73
|
+
for (let c = 0; c < cols.length; c += 1) {
|
|
74
|
+
if (!dropped.has(c) && isStringCol(c))
|
|
75
|
+
stringCols.push(c);
|
|
76
|
+
}
|
|
77
|
+
// All valid prefix/basename derivations of column `c` (from every other
|
|
78
|
+
// string column), so we can later prefer one whose base is irreducible.
|
|
79
|
+
const allTransformsOf = (c) => {
|
|
80
|
+
const out = [];
|
|
81
|
+
for (const d of stringCols) {
|
|
82
|
+
if (d === c)
|
|
83
|
+
continue;
|
|
84
|
+
const c0 = val(0, c);
|
|
85
|
+
const d0 = val(0, d);
|
|
86
|
+
// prefix: c === arg + d for all rows (c contains d as a suffix).
|
|
87
|
+
if (c0.endsWith(d0) && c0.length > d0.length) {
|
|
88
|
+
const arg = c0.slice(0, c0.length - d0.length);
|
|
89
|
+
let ok = true;
|
|
90
|
+
for (let r = 0; r < n; r += 1) {
|
|
91
|
+
if (val(r, c) !== arg + val(r, d)) {
|
|
92
|
+
ok = false;
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (ok)
|
|
97
|
+
out.push({ name: cols[c], op: 'prefix', from: cols[d], arg });
|
|
98
|
+
}
|
|
99
|
+
// basename: c === basename(d) for all rows.
|
|
100
|
+
let okB = true;
|
|
101
|
+
for (let r = 0; r < n; r += 1) {
|
|
102
|
+
if (val(r, c) !== basename(val(r, d))) {
|
|
103
|
+
okB = false;
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
if (okB)
|
|
108
|
+
out.push({ name: cols[c], op: 'basename', from: cols[d] });
|
|
109
|
+
}
|
|
110
|
+
return out;
|
|
111
|
+
};
|
|
112
|
+
const candidates = new Map();
|
|
113
|
+
for (const c of stringCols)
|
|
114
|
+
candidates.set(c, allTransformsOf(c));
|
|
115
|
+
// A column is a BASE iff it cannot be derived from any other column.
|
|
116
|
+
const baseNames = new Set();
|
|
117
|
+
for (const c of stringCols)
|
|
118
|
+
if (candidates.get(c).length === 0)
|
|
119
|
+
baseNames.add(cols[c]);
|
|
120
|
+
// Drop a column only when it can be derived from an irreducible base, so the
|
|
121
|
+
// decoder never chases a dropped column (breaks mutual-derivability cleanly:
|
|
122
|
+
// path stays, id & label derive from it).
|
|
123
|
+
for (const c of stringCols) {
|
|
124
|
+
const pick = candidates.get(c).find((t) => t.from && baseNames.has(t.from));
|
|
125
|
+
if (pick) {
|
|
126
|
+
dropped.add(c);
|
|
127
|
+
derived.push(pick);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (dropped.size === 0)
|
|
131
|
+
return copy(cols, rows, absent);
|
|
132
|
+
// Rebuild cols/rows without the dropped columns; remap `absent` col indices
|
|
133
|
+
// (dropped columns carry no absent cells, so none are lost).
|
|
134
|
+
const oldToNew = new Map();
|
|
135
|
+
const keptIdx = [];
|
|
136
|
+
for (let c = 0; c < cols.length; c += 1) {
|
|
137
|
+
if (!dropped.has(c)) {
|
|
138
|
+
oldToNew.set(c, keptIdx.length);
|
|
139
|
+
keptIdx.push(c);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
const newCols = keptIdx.map((c) => cols[c]);
|
|
143
|
+
const newRows = rows.map((row) => keptIdx.map((c) => row[c]));
|
|
144
|
+
const newAbsent = [];
|
|
145
|
+
for (const [r, c] of absent) {
|
|
146
|
+
const nc = oldToNew.get(c);
|
|
147
|
+
if (nc !== undefined)
|
|
148
|
+
newAbsent.push([r, nc]);
|
|
149
|
+
}
|
|
150
|
+
return { cols: newCols, rows: newRows, absent: newAbsent, derived };
|
|
151
|
+
}
|
|
152
|
+
/** Restore the dropped derived columns onto a decoded object (in place). */
|
|
153
|
+
export function reconstructDerived(obj, derived) {
|
|
154
|
+
for (const d of derived) {
|
|
155
|
+
let value;
|
|
156
|
+
if (d.op === 'const') {
|
|
157
|
+
value = d.arg;
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
const base = obj[d.from];
|
|
161
|
+
if (typeof base !== 'string')
|
|
162
|
+
continue; // base missing/non-string — leave absent
|
|
163
|
+
value = d.op === 'prefix' ? String(d.arg) + base : basename(base);
|
|
164
|
+
}
|
|
165
|
+
Object.defineProperty(obj, d.name, {
|
|
166
|
+
value,
|
|
167
|
+
writable: true,
|
|
168
|
+
enumerable: true,
|
|
169
|
+
configurable: true,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"object-map.d.ts","sourceRoot":"","sources":["../../src/table/object-map.ts"],"names":[],"mappings":"AAyBA,sDAAsD;AACtD,MAAM,WAAW,UAAU;IACzB,uDAAuD;IACvD,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,qEAAqE;IACrE,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,gFAAgF;IAChF,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;IAClB,yFAAyF;IACzF,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACjC;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,UAAU,GAAG,IAAI,
|
|
1
|
+
{"version":3,"file":"object-map.d.ts","sourceRoot":"","sources":["../../src/table/object-map.ts"],"names":[],"mappings":"AAyBA,sDAAsD;AACtD,MAAM,WAAW,UAAU;IACzB,uDAAuD;IACvD,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,qEAAqE;IACrE,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,gFAAgF;IAChF,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;IAClB,yFAAyF;IACzF,MAAM,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACjC;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,UAAU,GAAG,IAAI,CAqClE;AAED,+DAA+D;AAC/D,wBAAgB,WAAW,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI;IAAE,KAAK,EAAE,UAAU,CAAA;CAAE,CAU1E;AAED;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAmB9E"}
|
package/dist/table/object-map.js
CHANGED
|
@@ -46,7 +46,9 @@ export function compactObjectMap(value) {
|
|
|
46
46
|
const row = [];
|
|
47
47
|
for (let c = 0; c < cols.length; c += 1) {
|
|
48
48
|
const key = cols[c];
|
|
49
|
-
|
|
49
|
+
// Own-property check (not `key in entry`, which walks the prototype chain)
|
|
50
|
+
// so a column named after an Object.prototype member isn't read as a value.
|
|
51
|
+
if (Object.prototype.hasOwnProperty.call(entry, key) && entry[key] !== undefined) {
|
|
50
52
|
row.push(entry[key]);
|
|
51
53
|
}
|
|
52
54
|
else {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"finalize.d.ts","sourceRoot":"","sources":["../../src/text/finalize.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAItE,0DAA0D;AAC1D,wBAAgB,iBAAiB,CAC/B,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,YAAY,EACzB,IAAI,SAAmC,GACtC,kBAAkB,CASpB;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,YAAY,CAAC;IAC1B,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,kBAAkB,
|
|
1
|
+
{"version":3,"file":"finalize.d.ts","sourceRoot":"","sources":["../../src/text/finalize.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAItE,0DAA0D;AAC1D,wBAAgB,iBAAiB,CAC/B,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,YAAY,EACzB,IAAI,SAAmC,GACtC,kBAAkB,CASpB;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,YAAY,CAAC;IAC1B,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,kBAAkB,CAsCrB"}
|
package/dist/text/finalize.js
CHANGED
|
@@ -30,13 +30,19 @@ export function finalizeLossy(params) {
|
|
|
30
30
|
let key;
|
|
31
31
|
if (opts.store) {
|
|
32
32
|
key = opts.store.put(original);
|
|
33
|
+
// Make inline `… N lines omitted` placeholders self-describing: a reader
|
|
34
|
+
// who only sees a clipped middle of the output otherwise can't tell the
|
|
35
|
+
// dropped detail is retrievable. Annotate each with the recovery key.
|
|
36
|
+
compressed = annotateElisionMarkers(body, key);
|
|
33
37
|
// Skip the trailing marker when the body already references THIS key inline
|
|
34
38
|
// (e.g. compressLog's per-drop elision hints) — no need to repeat it. A
|
|
35
39
|
// different inline key (e.g. a diff's per-section keys) still gets the
|
|
36
40
|
// whole-blob marker appended. The marker carries only the key: the human
|
|
37
41
|
// `note` is shipped separately in the result, so repeating it on the wire
|
|
38
42
|
// would just cost tokens.
|
|
39
|
-
compressed =
|
|
43
|
+
compressed = compressed.includes(`<<ccr:${key}`)
|
|
44
|
+
? compressed
|
|
45
|
+
: `${compressed}\n${formatCcrMarker(key)}`;
|
|
40
46
|
}
|
|
41
47
|
const savings = measureSavings(original, compressed, contentType);
|
|
42
48
|
if (savings.after >= savings.before) {
|
|
@@ -52,3 +58,16 @@ export function finalizeLossy(params) {
|
|
|
52
58
|
note,
|
|
53
59
|
};
|
|
54
60
|
}
|
|
61
|
+
/**
|
|
62
|
+
* Append the recovery key to each `… N line(s) omitted` placeholder produced by
|
|
63
|
+
* {@link elide} (used by the markdown/search/lines compressors), so a clipped
|
|
64
|
+
* view still advertises that the dropped detail is retrievable via `shrk expand`.
|
|
65
|
+
* Deterministic; leaves bodies without such markers (logs/diffs use their own
|
|
66
|
+
* keyed hints) untouched.
|
|
67
|
+
*/
|
|
68
|
+
function annotateElisionMarkers(body, key) {
|
|
69
|
+
// Match ONLY a standalone-line `… N lines omitted` (what elide() emits, on its
|
|
70
|
+
// own line). The lookahead for end-of-line excludes compressLog's inline-keyed
|
|
71
|
+
// markers (`… N lines omitted → <<ccr:KEY>>`), which already carry the key.
|
|
72
|
+
return body.replace(/(… \d+ lines? omitted)(?=\n|$)/g, (marker) => `${marker} (shrk expand ${key})`);
|
|
73
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shrkcrft/compress",
|
|
3
|
-
"version": "0.1.0-alpha.
|
|
3
|
+
"version": "0.1.0-alpha.18",
|
|
4
4
|
"description": "SharkCraft deterministic context-compression engine: content routing, lossless columnar/table compaction, log/search/diff line reduction, and reversible Compress-Cache-Retrieve (CCR). No model inside — every transform is a pure function of its input.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "SharkCraft contributors",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"typecheck": "tsc --noEmit -p tsconfig.json"
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
|
-
"@shrkcrft/core": "^0.1.0-alpha.
|
|
47
|
+
"@shrkcrft/core": "^0.1.0-alpha.18"
|
|
48
48
|
},
|
|
49
49
|
"publishConfig": {
|
|
50
50
|
"access": "public"
|