llm-chat-msg-compressor 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/analyzer.js CHANGED
@@ -1,6 +1,10 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.Analyzer = void 0;
4
+ const isPlainObject = (obj) => {
5
+ return obj !== null && typeof obj === 'object' && !Array.isArray(obj) &&
6
+ (Object.getPrototypeOf(obj) === Object.prototype || Object.getPrototypeOf(obj) === null);
7
+ };
4
8
  class Analyzer {
5
9
  static analyze(data) {
6
10
  // Pre-flight check for primitives or very small objects
@@ -81,7 +85,7 @@ class Analyzer {
81
85
  traverse(obj[i], currentDepth + 1);
82
86
  }
83
87
  }
84
- else if (obj && typeof obj === 'object') {
88
+ else if (isPlainObject(obj)) {
85
89
  objectCount++;
86
90
  totalBytes += 2; // {}
87
91
  let first = true;
@@ -98,16 +102,30 @@ class Analyzer {
98
102
  }
99
103
  }
100
104
  else {
101
- // Primitive
105
+ // Primitive or non-plain object (Date, etc.)
102
106
  if (typeof obj === 'string') {
103
107
  totalBytes += Buffer.byteLength(obj, 'utf8') + 2; // quotes
104
108
  }
105
109
  else if (typeof obj === 'number' || typeof obj === 'boolean') {
106
110
  totalBytes += String(obj).length;
107
111
  }
112
+ else if (obj instanceof Date) {
113
+ totalBytes += obj.toISOString().length + 2; // quotes
114
+ }
108
115
  else if (obj === null) {
109
116
  totalBytes += 4;
110
117
  }
118
+ else {
119
+ // Fallback for other types that might be stringified
120
+ try {
121
+ const s = JSON.stringify(obj);
122
+ if (s)
123
+ totalBytes += Buffer.byteLength(s, 'utf8');
124
+ }
125
+ catch {
126
+ // Ignore if not stringifiable
127
+ }
128
+ }
111
129
  }
112
130
  };
113
131
  traverse(data, 0);
@@ -3,6 +3,8 @@ export interface OptimizerOptions {
3
3
  aggressive?: boolean;
4
4
  thresholdBytes?: number;
5
5
  unsafe?: boolean;
6
+ validateTokenSavings?: boolean;
7
+ tokenizer?: string | ((text: string) => number);
6
8
  }
7
9
  export declare class Optimizer {
8
10
  private schemaStrat;
package/dist/optimizer.js CHANGED
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.Optimizer = void 0;
4
4
  const strategies_1 = require("./strategies");
5
5
  const analyzer_1 = require("./analyzer");
6
+ const tokenizer_1 = require("./tokenizer");
6
7
  class Optimizer {
7
8
  constructor() {
8
9
  this.schemaStrat = new strategies_1.SchemaDataSeparationStrategy();
@@ -20,27 +21,48 @@ class Optimizer {
20
21
  */
21
22
  optimize(data, options = {}) {
22
23
  const { aggressive = false, thresholdBytes = 500, // Increased default: small payloads often grow with key-map overhead
23
- unsafe = false } = options;
24
+ unsafe = false, validateTokenSavings = false, tokenizer = 'cl100k_base' } = options;
24
25
  const metrics = analyzer_1.Analyzer.analyze(data);
26
+ // Helper to count tokens
27
+ const countTokens = (val) => {
28
+ if (typeof tokenizer === 'function') {
29
+ return tokenizer(typeof val === 'string' ? val : JSON.stringify(val));
30
+ }
31
+ return tokenizer_1.TokenCounter.count(val, tokenizer);
32
+ };
33
+ let result;
25
34
  // 1. If too small, just minify
26
35
  if (metrics.totalBytes < thresholdBytes) {
27
- return strategies_1.minify.compress(data);
36
+ result = strategies_1.minify.compress(data);
28
37
  }
29
- // 2. Smart Strategy Selection
30
- // Compare estimated savings to pick the winner.
31
- // Prefer SchemaSeparation if it saves MORE than AbbreviatedKeys (with a slight buffer for safety)
32
- // Schema Separation is "riskier" structure-wise (arrays vs maps), so we want it to be worth it.
33
- if (metrics.estimatedSchemaSavings > metrics.estimatedAbbrevSavings * 1.1) {
34
- return this.schemaStrat.compress(data);
38
+ else {
39
+ // 2. Smart Strategy Selection
40
+ // Compare estimated savings to pick the winner.
41
+ // Prefer SchemaSeparation if it saves MORE than AbbreviatedKeys (with a slight buffer for safety)
42
+ // Schema Separation is "riskier" structure-wise (arrays vs maps), so we want it to be worth it.
43
+ if (metrics.estimatedSchemaSavings > metrics.estimatedAbbrevSavings * 1.1) {
44
+ result = this.schemaStrat.compress(data);
45
+ }
46
+ else if (aggressive) {
47
+ // 3. Fallback to UltraCompact if aggressive is set
48
+ result = unsafe ? this.ultraStratUnsafe.compress(data) : this.ultraStratSafe.compress(data);
49
+ }
50
+ else {
51
+ // 4. Default: Abbreviated Keys
52
+ // If Schema Separation isn't significantly better, we default to this.
53
+ // It handles mixed/nested payloads better and is "safer" structure-wise.
54
+ result = this.abbrevStrat.compress(data);
55
+ }
35
56
  }
36
- // 3. Fallback to UltraCompact if aggressive is set
37
- if (aggressive) {
38
- return unsafe ? this.ultraStratUnsafe.compress(data) : this.ultraStratSafe.compress(data);
57
+ // 5. Token Validation
58
+ if (validateTokenSavings) {
59
+ const inputTokens = countTokens(data);
60
+ const outputTokens = countTokens(result);
61
+ if (outputTokens > inputTokens) {
62
+ return data; // Return original data if compression increased token count
63
+ }
39
64
  }
40
- // 4. Default: Abbreviated Keys
41
- // If Schema Separation isn't significantly better, we default to this.
42
- // It handles mixed/nested payloads better and is "safer" structure-wise.
43
- return this.abbrevStrat.compress(data);
65
+ return result;
44
66
  }
45
67
  /**
46
68
  * Helper to get a specific strategy
@@ -14,6 +14,13 @@ const generateShortKey = (index) => {
14
14
  return shortKey;
15
15
  };
16
16
  exports.generateShortKey = generateShortKey;
17
+ /**
18
+ * Helper to check if value is a plain object
19
+ */
20
+ const isPlainObject = (obj) => {
21
+ return obj !== null && typeof obj === 'object' && !Array.isArray(obj) &&
22
+ (Object.getPrototypeOf(obj) === Object.prototype || Object.getPrototypeOf(obj) === null);
23
+ };
17
24
  /**
18
25
  * Strategy 1: Minify (Baseline)
19
26
  * Just standard JSON serialization (handled by default JSON.stringify)
@@ -53,7 +60,7 @@ class AbbreviatedKeysStrategy {
53
60
  }
54
61
  return newArr;
55
62
  }
56
- if (obj && typeof obj === 'object') {
63
+ if (isPlainObject(obj)) {
57
64
  const newObj = {};
58
65
  for (const k in obj) {
59
66
  if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -87,7 +94,7 @@ class AbbreviatedKeysStrategy {
87
94
  }
88
95
  return newArr;
89
96
  }
90
- if (obj && typeof obj === 'object') {
97
+ if (isPlainObject(obj)) {
91
98
  const newObj = {};
92
99
  for (const k in obj) {
93
100
  if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -153,7 +160,7 @@ class SchemaDataSeparationStrategy {
153
160
  }
154
161
  return newArr;
155
162
  }
156
- if (obj && typeof obj === 'object') {
163
+ if (isPlainObject(obj)) {
157
164
  const newObj = {};
158
165
  for (const k in obj) {
159
166
  if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -190,13 +197,15 @@ class SchemaDataSeparationStrategy {
190
197
  }
191
198
  return newArr;
192
199
  }
193
- const newObj = {};
194
- for (const k in obj) {
195
- if (Object.prototype.hasOwnProperty.call(obj, k)) {
196
- newObj[k] = traverse(obj[k]);
200
+ if (isPlainObject(obj)) {
201
+ const newObj = {};
202
+ for (const k in obj) {
203
+ if (Object.prototype.hasOwnProperty.call(obj, k)) {
204
+ newObj[k] = traverse(obj[k]);
205
+ }
197
206
  }
207
+ return newObj;
198
208
  }
199
- return newObj;
200
209
  }
201
210
  return obj;
202
211
  };
@@ -239,7 +248,7 @@ class UltraCompactStrategy {
239
248
  }
240
249
  return newArr;
241
250
  }
242
- if (obj && typeof obj === 'object') {
251
+ if (isPlainObject(obj)) {
243
252
  const newObj = {};
244
253
  for (const k in obj) {
245
254
  if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -273,7 +282,7 @@ class UltraCompactStrategy {
273
282
  }
274
283
  return newArr;
275
284
  }
276
- if (obj && typeof obj === 'object') {
285
+ if (isPlainObject(obj)) {
277
286
  const newObj = {};
278
287
  for (const k in obj) {
279
288
  if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -0,0 +1,13 @@
1
+ import { TiktokenEncoding } from "js-tiktoken";
2
+ export type SupportedEncoding = TiktokenEncoding | 'cl100k_base' | 'o200k_base' | 'p50k_base' | 'r50k_base';
3
+ export declare class TokenCounter {
4
+ private static cache;
5
+ /**
6
+ * Gets a tokenizer instance for the specified encoding or model.
7
+ */
8
+ private static getTokenizer;
9
+ /**
10
+ * Counts tokens in a string or object (as JSON).
11
+ */
12
+ static count(data: any, encodingOrModel?: string): number;
13
+ }
@@ -0,0 +1,41 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TokenCounter = void 0;
4
+ const js_tiktoken_1 = require("js-tiktoken");
5
+ class TokenCounter {
6
+ /**
7
+ * Gets a tokenizer instance for the specified encoding or model.
8
+ */
9
+ static getTokenizer(encodingOrModel) {
10
+ if (this.cache.has(encodingOrModel)) {
11
+ return this.cache.get(encodingOrModel);
12
+ }
13
+ let tokenizer;
14
+ try {
15
+ // Try as encoding first
16
+ tokenizer = (0, js_tiktoken_1.getEncoding)(encodingOrModel);
17
+ }
18
+ catch {
19
+ try {
20
+ // Try as model name
21
+ tokenizer = (0, js_tiktoken_1.encodingForModel)(encodingOrModel);
22
+ }
23
+ catch {
24
+ // Fallback to cl100k_base (GPT-4)
25
+ tokenizer = (0, js_tiktoken_1.getEncoding)("cl100k_base");
26
+ }
27
+ }
28
+ this.cache.set(encodingOrModel, tokenizer);
29
+ return tokenizer;
30
+ }
31
+ /**
32
+ * Counts tokens in a string or object (as JSON).
33
+ */
34
+ static count(data, encodingOrModel = "cl100k_base") {
35
+ const text = typeof data === 'string' ? data : JSON.stringify(data);
36
+ const tokenizer = this.getTokenizer(encodingOrModel);
37
+ return tokenizer.encode(text).length;
38
+ }
39
+ }
40
+ exports.TokenCounter = TokenCounter;
41
+ TokenCounter.cache = new Map();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-chat-msg-compressor",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "Intelligent JSON compression for LLM API optimization",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -48,5 +48,8 @@
48
48
  "ts-jest": "^29.4.6",
49
49
  "ts-node": "^10.9.2",
50
50
  "typescript": "^5.0.0"
51
+ },
52
+ "dependencies": {
53
+ "js-tiktoken": "^1.0.21"
51
54
  }
52
- }
55
+ }