llm-chat-msg-compressor 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer.js +20 -2
- package/dist/optimizer.d.ts +2 -0
- package/dist/optimizer.js +37 -15
- package/dist/strategies.js +19 -10
- package/dist/tokenizer.d.ts +13 -0
- package/dist/tokenizer.js +41 -0
- package/package.json +5 -2
package/dist/analyzer.js
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.Analyzer = void 0;
|
|
4
|
+
const isPlainObject = (obj) => {
|
|
5
|
+
return obj !== null && typeof obj === 'object' && !Array.isArray(obj) &&
|
|
6
|
+
(Object.getPrototypeOf(obj) === Object.prototype || Object.getPrototypeOf(obj) === null);
|
|
7
|
+
};
|
|
4
8
|
class Analyzer {
|
|
5
9
|
static analyze(data) {
|
|
6
10
|
// Pre-flight check for primitives or very small objects
|
|
@@ -81,7 +85,7 @@ class Analyzer {
|
|
|
81
85
|
traverse(obj[i], currentDepth + 1);
|
|
82
86
|
}
|
|
83
87
|
}
|
|
84
|
-
else if (obj
|
|
88
|
+
else if (isPlainObject(obj)) {
|
|
85
89
|
objectCount++;
|
|
86
90
|
totalBytes += 2; // {}
|
|
87
91
|
let first = true;
|
|
@@ -98,16 +102,30 @@ class Analyzer {
|
|
|
98
102
|
}
|
|
99
103
|
}
|
|
100
104
|
else {
|
|
101
|
-
// Primitive
|
|
105
|
+
// Primitive or non-plain object (Date, etc.)
|
|
102
106
|
if (typeof obj === 'string') {
|
|
103
107
|
totalBytes += Buffer.byteLength(obj, 'utf8') + 2; // quotes
|
|
104
108
|
}
|
|
105
109
|
else if (typeof obj === 'number' || typeof obj === 'boolean') {
|
|
106
110
|
totalBytes += String(obj).length;
|
|
107
111
|
}
|
|
112
|
+
else if (obj instanceof Date) {
|
|
113
|
+
totalBytes += obj.toISOString().length + 2; // quotes
|
|
114
|
+
}
|
|
108
115
|
else if (obj === null) {
|
|
109
116
|
totalBytes += 4;
|
|
110
117
|
}
|
|
118
|
+
else {
|
|
119
|
+
// Fallback for other types that might be stringified
|
|
120
|
+
try {
|
|
121
|
+
const s = JSON.stringify(obj);
|
|
122
|
+
if (s)
|
|
123
|
+
totalBytes += Buffer.byteLength(s, 'utf8');
|
|
124
|
+
}
|
|
125
|
+
catch {
|
|
126
|
+
// Ignore if not stringifiable
|
|
127
|
+
}
|
|
128
|
+
}
|
|
111
129
|
}
|
|
112
130
|
};
|
|
113
131
|
traverse(data, 0);
|
package/dist/optimizer.d.ts
CHANGED
package/dist/optimizer.js
CHANGED
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.Optimizer = void 0;
|
|
4
4
|
const strategies_1 = require("./strategies");
|
|
5
5
|
const analyzer_1 = require("./analyzer");
|
|
6
|
+
const tokenizer_1 = require("./tokenizer");
|
|
6
7
|
class Optimizer {
|
|
7
8
|
constructor() {
|
|
8
9
|
this.schemaStrat = new strategies_1.SchemaDataSeparationStrategy();
|
|
@@ -20,27 +21,48 @@ class Optimizer {
|
|
|
20
21
|
*/
|
|
21
22
|
optimize(data, options = {}) {
|
|
22
23
|
const { aggressive = false, thresholdBytes = 500, // Increased default: small payloads often grow with key-map overhead
|
|
23
|
-
unsafe = false } = options;
|
|
24
|
+
unsafe = false, validateTokenSavings = false, tokenizer = 'cl100k_base' } = options;
|
|
24
25
|
const metrics = analyzer_1.Analyzer.analyze(data);
|
|
26
|
+
// Helper to count tokens
|
|
27
|
+
const countTokens = (val) => {
|
|
28
|
+
if (typeof tokenizer === 'function') {
|
|
29
|
+
return tokenizer(typeof val === 'string' ? val : JSON.stringify(val));
|
|
30
|
+
}
|
|
31
|
+
return tokenizer_1.TokenCounter.count(val, tokenizer);
|
|
32
|
+
};
|
|
33
|
+
let result;
|
|
25
34
|
// 1. If too small, just minify
|
|
26
35
|
if (metrics.totalBytes < thresholdBytes) {
|
|
27
|
-
|
|
36
|
+
result = strategies_1.minify.compress(data);
|
|
28
37
|
}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
38
|
+
else {
|
|
39
|
+
// 2. Smart Strategy Selection
|
|
40
|
+
// Compare estimated savings to pick the winner.
|
|
41
|
+
// Prefer SchemaSeparation if it saves MORE than AbbreviatedKeys (with a slight buffer for safety)
|
|
42
|
+
// Schema Separation is "riskier" structure-wise (arrays vs maps), so we want it to be worth it.
|
|
43
|
+
if (metrics.estimatedSchemaSavings > metrics.estimatedAbbrevSavings * 1.1) {
|
|
44
|
+
result = this.schemaStrat.compress(data);
|
|
45
|
+
}
|
|
46
|
+
else if (aggressive) {
|
|
47
|
+
// 3. Fallback to UltraCompact if aggressive is set
|
|
48
|
+
result = unsafe ? this.ultraStratUnsafe.compress(data) : this.ultraStratSafe.compress(data);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
// 4. Default: Abbreviated Keys
|
|
52
|
+
// If Schema Separation isn't significantly better, we default to this.
|
|
53
|
+
// It handles mixed/nested payloads better and is "safer" structure-wise.
|
|
54
|
+
result = this.abbrevStrat.compress(data);
|
|
55
|
+
}
|
|
35
56
|
}
|
|
36
|
-
//
|
|
37
|
-
if (
|
|
38
|
-
|
|
57
|
+
// 5. Token Validation
|
|
58
|
+
if (validateTokenSavings) {
|
|
59
|
+
const inputTokens = countTokens(data);
|
|
60
|
+
const outputTokens = countTokens(result);
|
|
61
|
+
if (outputTokens > inputTokens) {
|
|
62
|
+
return data; // Return original data if compression increased token count
|
|
63
|
+
}
|
|
39
64
|
}
|
|
40
|
-
|
|
41
|
-
// If Schema Separation isn't significantly better, we default to this.
|
|
42
|
-
// It handles mixed/nested payloads better and is "safer" structure-wise.
|
|
43
|
-
return this.abbrevStrat.compress(data);
|
|
65
|
+
return result;
|
|
44
66
|
}
|
|
45
67
|
/**
|
|
46
68
|
* Helper to get a specific strategy
|
package/dist/strategies.js
CHANGED
|
@@ -14,6 +14,13 @@ const generateShortKey = (index) => {
|
|
|
14
14
|
return shortKey;
|
|
15
15
|
};
|
|
16
16
|
exports.generateShortKey = generateShortKey;
|
|
17
|
+
/**
|
|
18
|
+
* Helper to check if value is a plain object
|
|
19
|
+
*/
|
|
20
|
+
const isPlainObject = (obj) => {
|
|
21
|
+
return obj !== null && typeof obj === 'object' && !Array.isArray(obj) &&
|
|
22
|
+
(Object.getPrototypeOf(obj) === Object.prototype || Object.getPrototypeOf(obj) === null);
|
|
23
|
+
};
|
|
17
24
|
/**
|
|
18
25
|
* Strategy 1: Minify (Baseline)
|
|
19
26
|
* Just standard JSON serialization (handled by default JSON.stringify)
|
|
@@ -53,7 +60,7 @@ class AbbreviatedKeysStrategy {
|
|
|
53
60
|
}
|
|
54
61
|
return newArr;
|
|
55
62
|
}
|
|
56
|
-
if (obj
|
|
63
|
+
if (isPlainObject(obj)) {
|
|
57
64
|
const newObj = {};
|
|
58
65
|
for (const k in obj) {
|
|
59
66
|
if (Object.prototype.hasOwnProperty.call(obj, k)) {
|
|
@@ -87,7 +94,7 @@ class AbbreviatedKeysStrategy {
|
|
|
87
94
|
}
|
|
88
95
|
return newArr;
|
|
89
96
|
}
|
|
90
|
-
if (obj
|
|
97
|
+
if (isPlainObject(obj)) {
|
|
91
98
|
const newObj = {};
|
|
92
99
|
for (const k in obj) {
|
|
93
100
|
if (Object.prototype.hasOwnProperty.call(obj, k)) {
|
|
@@ -153,7 +160,7 @@ class SchemaDataSeparationStrategy {
|
|
|
153
160
|
}
|
|
154
161
|
return newArr;
|
|
155
162
|
}
|
|
156
|
-
if (obj
|
|
163
|
+
if (isPlainObject(obj)) {
|
|
157
164
|
const newObj = {};
|
|
158
165
|
for (const k in obj) {
|
|
159
166
|
if (Object.prototype.hasOwnProperty.call(obj, k)) {
|
|
@@ -190,13 +197,15 @@ class SchemaDataSeparationStrategy {
|
|
|
190
197
|
}
|
|
191
198
|
return newArr;
|
|
192
199
|
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
200
|
+
if (isPlainObject(obj)) {
|
|
201
|
+
const newObj = {};
|
|
202
|
+
for (const k in obj) {
|
|
203
|
+
if (Object.prototype.hasOwnProperty.call(obj, k)) {
|
|
204
|
+
newObj[k] = traverse(obj[k]);
|
|
205
|
+
}
|
|
197
206
|
}
|
|
207
|
+
return newObj;
|
|
198
208
|
}
|
|
199
|
-
return newObj;
|
|
200
209
|
}
|
|
201
210
|
return obj;
|
|
202
211
|
};
|
|
@@ -239,7 +248,7 @@ class UltraCompactStrategy {
|
|
|
239
248
|
}
|
|
240
249
|
return newArr;
|
|
241
250
|
}
|
|
242
|
-
if (obj
|
|
251
|
+
if (isPlainObject(obj)) {
|
|
243
252
|
const newObj = {};
|
|
244
253
|
for (const k in obj) {
|
|
245
254
|
if (Object.prototype.hasOwnProperty.call(obj, k)) {
|
|
@@ -273,7 +282,7 @@ class UltraCompactStrategy {
|
|
|
273
282
|
}
|
|
274
283
|
return newArr;
|
|
275
284
|
}
|
|
276
|
-
if (obj
|
|
285
|
+
if (isPlainObject(obj)) {
|
|
277
286
|
const newObj = {};
|
|
278
287
|
for (const k in obj) {
|
|
279
288
|
if (Object.prototype.hasOwnProperty.call(obj, k)) {
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { TiktokenEncoding } from "js-tiktoken";
|
|
2
|
+
export type SupportedEncoding = TiktokenEncoding | 'cl100k_base' | 'o200k_base' | 'p50k_base' | 'r50k_base';
|
|
3
|
+
export declare class TokenCounter {
|
|
4
|
+
private static cache;
|
|
5
|
+
/**
|
|
6
|
+
* Gets a tokenizer instance for the specified encoding or model.
|
|
7
|
+
*/
|
|
8
|
+
private static getTokenizer;
|
|
9
|
+
/**
|
|
10
|
+
* Counts tokens in a string or object (as JSON).
|
|
11
|
+
*/
|
|
12
|
+
static count(data: any, encodingOrModel?: string): number;
|
|
13
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TokenCounter = void 0;
|
|
4
|
+
const js_tiktoken_1 = require("js-tiktoken");
|
|
5
|
+
class TokenCounter {
|
|
6
|
+
/**
|
|
7
|
+
* Gets a tokenizer instance for the specified encoding or model.
|
|
8
|
+
*/
|
|
9
|
+
static getTokenizer(encodingOrModel) {
|
|
10
|
+
if (this.cache.has(encodingOrModel)) {
|
|
11
|
+
return this.cache.get(encodingOrModel);
|
|
12
|
+
}
|
|
13
|
+
let tokenizer;
|
|
14
|
+
try {
|
|
15
|
+
// Try as encoding first
|
|
16
|
+
tokenizer = (0, js_tiktoken_1.getEncoding)(encodingOrModel);
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
try {
|
|
20
|
+
// Try as model name
|
|
21
|
+
tokenizer = (0, js_tiktoken_1.encodingForModel)(encodingOrModel);
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
// Fallback to cl100k_base (GPT-4)
|
|
25
|
+
tokenizer = (0, js_tiktoken_1.getEncoding)("cl100k_base");
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
this.cache.set(encodingOrModel, tokenizer);
|
|
29
|
+
return tokenizer;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Counts tokens in a string or object (as JSON).
|
|
33
|
+
*/
|
|
34
|
+
static count(data, encodingOrModel = "cl100k_base") {
|
|
35
|
+
const text = typeof data === 'string' ? data : JSON.stringify(data);
|
|
36
|
+
const tokenizer = this.getTokenizer(encodingOrModel);
|
|
37
|
+
return tokenizer.encode(text).length;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
exports.TokenCounter = TokenCounter;
|
|
41
|
+
TokenCounter.cache = new Map();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm-chat-msg-compressor",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.4",
|
|
4
4
|
"description": "Intelligent JSON compression for LLM API optimization",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -48,5 +48,8 @@
|
|
|
48
48
|
"ts-jest": "^29.4.6",
|
|
49
49
|
"ts-node": "^10.9.2",
|
|
50
50
|
"typescript": "^5.0.0"
|
|
51
|
+
},
|
|
52
|
+
"dependencies": {
|
|
53
|
+
"js-tiktoken": "^1.0.21"
|
|
51
54
|
}
|
|
52
|
-
}
|
|
55
|
+
}
|