llm-chat-msg-compressor 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/analyzer.js CHANGED
@@ -30,7 +30,7 @@ class Analyzer {
30
30
  let schemaSavings = 0;
31
31
  // Helper to check schema suitability for a single array
32
32
  const calculateArraySchemaSavings = (arr) => {
33
- if (arr.length < 3 || typeof arr[0] !== 'object' || arr[0] === null)
33
+ if (arr.length < 2 || typeof arr[0] !== 'object' || arr[0] === null)
34
34
  return 0;
35
35
  // Check consistency of first few items
36
36
  const firstItem = arr[0];
@@ -67,7 +67,9 @@ class Analyzer {
67
67
  keysLen += key.length;
68
68
  }
69
69
  const perItemOverhead = keysLen + (keyCount * 2); // quotes + colon approx
70
- const schemaArrayOverhead = keysLen + (keyCount * 3) + 10; // keys + quotes/commas + "$s":[]
70
+ // For tokens, schema separation is very efficient.
71
+ // The overhead of the schema array is small compared to repeating keys.
72
+ const schemaArrayOverhead = keysLen + (keyCount * 2) + 5;
71
73
  return Math.max(0, ((arr.length - 1) * perItemOverhead) - schemaArrayOverhead);
72
74
  }
73
75
  return 0;
@@ -131,21 +133,20 @@ class Analyzer {
131
133
  };
132
134
  traverse(data, 0);
133
135
  // Estimate Abbreviation Savings
134
- // We replace AvgKeyLen with ShortKeyLen (approx 2 chars avg for small-med payloads)
135
- // Savings = TotalKeys * (AvgKeyLen - 2)
136
- // Overhead: We need to send the map! Map output is approx TotalUniqueKeys * (AvgKeyLen + 2)
137
- // Since we don't track unique keys efficiently here, let's assume worst case or strict ratio.
138
- // A simple heuristic: Savings is roughly half total key length if repetitive.
139
- // Let's be more precise:
140
- const avgKeyLen = totalKeysCount > 0 ? totalKeyLength / totalKeysCount : 0;
141
- const estimatedShortKeyLen = 2.5; // 'a', 'b', ... 'aa'
136
+ // For LLM tokens, shortening keys is often a net LOSS because:
137
+ // 1. Common keys (metadata, id, role) are already 1 token.
138
+ // 2. Short keys (a, b, c) are also 1 token.
139
+ // 3. The mapping table 'm' adds significant token overhead.
140
+ // We use a much more conservative byte-per-key saving (e.g. 1.5 bytes instead of avgKeyLen - 2)
141
+ const tokenAwareSavingsPerKey = 1.5;
142
142
  // Rough estimate of unique keys: usually much smaller than total keys for compressed data.
143
143
  // Let's assume 20% distinct keys for a "compressible" workload.
144
- const estimatedMapOverhead = (totalKeysCount * 0.2) * (avgKeyLen + 3);
145
- const rawAbbrevSavings = totalKeysCount * (avgKeyLen - estimatedShortKeyLen);
146
- const abbrevMetadataTax = 40; // { m: {}, d: } overhead
144
+ const avgKeyLen = totalKeysCount > 0 ? totalKeyLength / totalKeysCount : 0;
145
+ const estimatedMapOverhead = (totalKeysCount * 0.2) * (avgKeyLen + 5);
146
+ const rawAbbrevSavings = totalKeysCount * tokenAwareSavingsPerKey;
147
+ const abbrevMetadataTax = 60; // Increased tax for { m: {}, d: } wrapper tokens
147
148
  const estimatedAbbrevSavings = Math.max(0, rawAbbrevSavings - estimatedMapOverhead - abbrevMetadataTax);
148
- const schemaMetadataTax = 30; // { $s: [], $d: [] } overhead
149
+ const schemaMetadataTax = 20; // Reduced tax for { $s: [], $d: [] } as it's more token-friendly
149
150
  const finalSchemaSavings = Math.max(0, schemaSavings - schemaMetadataTax);
150
151
  return {
151
152
  totalBytes,
package/dist/optimizer.js CHANGED
@@ -38,9 +38,9 @@ class Optimizer {
38
38
  else {
39
39
  // 2. Smart Strategy Selection
40
40
  // Compare estimated savings to pick the winner.
41
- // Prefer SchemaSeparation if it saves MORE than AbbreviatedKeys (with a slight buffer for safety)
42
- // Schema Separation is "riskier" structure-wise (arrays vs maps), so we want it to be worth it.
43
- if (metrics.estimatedSchemaSavings > metrics.estimatedAbbrevSavings * 1.1) {
41
+ // Prefer SchemaSeparation if it saves anything significant.
42
+ // It is generally much better for LLM tokens than Abbreviated Keys.
43
+ if (metrics.estimatedSchemaSavings > 50 || metrics.estimatedSchemaSavings > metrics.estimatedAbbrevSavings) {
44
44
  result = this.schemaStrat.compress(data);
45
45
  }
46
46
  else if (aggressive) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-chat-msg-compressor",
3
- "version": "1.0.5",
3
+ "version": "1.0.6",
4
4
  "description": "Intelligent JSON compression for LLM API optimization",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",