npm - llm-chat-msg-compressor - Versions diffs - 1.0.3 → 1.0.4 - Mend

llm-chat-msg-compressor 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/analyzer.js CHANGED Viewed

@@ -1,6 +1,10 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.Analyzer = void 0;
+const isPlainObject = (obj) => {
+    return obj !== null && typeof obj === 'object' && !Array.isArray(obj) &&
+        (Object.getPrototypeOf(obj) === Object.prototype || Object.getPrototypeOf(obj) === null);
+};
 class Analyzer {
     static analyze(data) {
         // Pre-flight check for primitives or very small objects
@@ -81,7 +85,7 @@ class Analyzer {
                     traverse(obj[i], currentDepth + 1);
                 }
             }
-            else if (obj && typeof obj === 'object') {
+            else if (isPlainObject(obj)) {
                 objectCount++;
                 totalBytes += 2; // {}
                 let first = true;
@@ -98,16 +102,30 @@ class Analyzer {
                 }
             }
             else {
-                // Primitive
+                // Primitive or non-plain object (Date, etc.)
                 if (typeof obj === 'string') {
                     totalBytes += Buffer.byteLength(obj, 'utf8') + 2; // quotes
                 }
                 else if (typeof obj === 'number' || typeof obj === 'boolean') {
                     totalBytes += String(obj).length;
                 }
+                else if (obj instanceof Date) {
+                    totalBytes += obj.toISOString().length + 2; // quotes
+                }
                 else if (obj === null) {
                     totalBytes += 4;
                 }
+                else {
+                    // Fallback for other types that might be stringified
+                    try {
+                        const s = JSON.stringify(obj);
+                        if (s)
+                            totalBytes += Buffer.byteLength(s, 'utf8');
+                    }
+                    catch {
+                        // Ignore if not stringifiable
+                    }
+                }
             }
         };
         traverse(data, 0);

package/dist/optimizer.d.ts CHANGED Viewed

@@ -3,6 +3,8 @@ export interface OptimizerOptions {
     aggressive?: boolean;
     thresholdBytes?: number;
     unsafe?: boolean;
+    validateTokenSavings?: boolean;
+    tokenizer?: string | ((text: string) => number);
 }
 export declare class Optimizer {
     private schemaStrat;

package/dist/optimizer.js CHANGED Viewed

@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.Optimizer = void 0;
 const strategies_1 = require("./strategies");
 const analyzer_1 = require("./analyzer");
+const tokenizer_1 = require("./tokenizer");
 class Optimizer {
     constructor() {
         this.schemaStrat = new strategies_1.SchemaDataSeparationStrategy();
@@ -20,27 +21,48 @@ class Optimizer {
      */
     optimize(data, options = {}) {
         const { aggressive = false, thresholdBytes = 500, // Increased default: small payloads often grow with key-map overhead
-        unsafe = false } = options;
+        unsafe = false, validateTokenSavings = false, tokenizer = 'cl100k_base' } = options;
         const metrics = analyzer_1.Analyzer.analyze(data);
+        // Helper to count tokens
+        const countTokens = (val) => {
+            if (typeof tokenizer === 'function') {
+                return tokenizer(typeof val === 'string' ? val : JSON.stringify(val));
+            }
+            return tokenizer_1.TokenCounter.count(val, tokenizer);
+        };
+        let result;
         // 1. If too small, just minify
         if (metrics.totalBytes < thresholdBytes) {
-            return strategies_1.minify.compress(data);
+            result = strategies_1.minify.compress(data);
         }
-        // 2. Smart Strategy Selection
-        // Compare estimated savings to pick the winner.
-        // Prefer SchemaSeparation if it saves MORE than AbbreviatedKeys (with a slight buffer for safety)
-        // Schema Separation is "riskier" structure-wise (arrays vs maps), so we want it to be worth it.
-        if (metrics.estimatedSchemaSavings > metrics.estimatedAbbrevSavings * 1.1) {
-            return this.schemaStrat.compress(data);
+        else {
+            // 2. Smart Strategy Selection
+            // Compare estimated savings to pick the winner.
+            // Prefer SchemaSeparation if it saves MORE than AbbreviatedKeys (with a slight buffer for safety)
+            // Schema Separation is "riskier" structure-wise (arrays vs maps), so we want it to be worth it.
+            if (metrics.estimatedSchemaSavings > metrics.estimatedAbbrevSavings * 1.1) {
+                result = this.schemaStrat.compress(data);
+            }
+            else if (aggressive) {
+                // 3. Fallback to UltraCompact if aggressive is set
+                result = unsafe ? this.ultraStratUnsafe.compress(data) : this.ultraStratSafe.compress(data);
+            }
+            else {
+                // 4. Default: Abbreviated Keys
+                // If Schema Separation isn't significantly better, we default to this.
+                // It handles mixed/nested payloads better and is "safer" structure-wise.
+                result = this.abbrevStrat.compress(data);
+            }
         }
-        // 3. Fallback to UltraCompact if aggressive is set
-        if (aggressive) {
-            return unsafe ? this.ultraStratUnsafe.compress(data) : this.ultraStratSafe.compress(data);
+        // 5. Token Validation
+        if (validateTokenSavings) {
+            const inputTokens = countTokens(data);
+            const outputTokens = countTokens(result);
+            if (outputTokens > inputTokens) {
+                return data; // Return original data if compression increased token count
+            }
         }
-        // 4. Default: Abbreviated Keys
-        // If Schema Separation isn't significantly better, we default to this.
-        // It handles mixed/nested payloads better and is "safer" structure-wise.
-        return this.abbrevStrat.compress(data);
+        return result;
     }
     /**
      * Helper to get a specific strategy

package/dist/strategies.js CHANGED Viewed

@@ -14,6 +14,13 @@ const generateShortKey = (index) => {
     return shortKey;
 };
 exports.generateShortKey = generateShortKey;
+/**
+ * Helper to check if value is a plain object
+ */
+const isPlainObject = (obj) => {
+    return obj !== null && typeof obj === 'object' && !Array.isArray(obj) &&
+        (Object.getPrototypeOf(obj) === Object.prototype || Object.getPrototypeOf(obj) === null);
+};
 /**
  * Strategy 1: Minify (Baseline)
  * Just standard JSON serialization (handled by default JSON.stringify)
@@ -53,7 +60,7 @@ class AbbreviatedKeysStrategy {
                 }
                 return newArr;
             }
-            if (obj && typeof obj === 'object') {
+            if (isPlainObject(obj)) {
                 const newObj = {};
                 for (const k in obj) {
                     if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -87,7 +94,7 @@ class AbbreviatedKeysStrategy {
                 }
                 return newArr;
             }
-            if (obj && typeof obj === 'object') {
+            if (isPlainObject(obj)) {
                 const newObj = {};
                 for (const k in obj) {
                     if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -153,7 +160,7 @@ class SchemaDataSeparationStrategy {
                 }
                 return newArr;
             }
-            if (obj && typeof obj === 'object') {
+            if (isPlainObject(obj)) {
                 const newObj = {};
                 for (const k in obj) {
                     if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -190,13 +197,15 @@ class SchemaDataSeparationStrategy {
                     }
                     return newArr;
                 }
-                const newObj = {};
-                for (const k in obj) {
-                    if (Object.prototype.hasOwnProperty.call(obj, k)) {
-                        newObj[k] = traverse(obj[k]);
+                if (isPlainObject(obj)) {
+                    const newObj = {};
+                    for (const k in obj) {
+                        if (Object.prototype.hasOwnProperty.call(obj, k)) {
+                            newObj[k] = traverse(obj[k]);
+                        }
                     }
+                    return newObj;
                 }
-                return newObj;
             }
             return obj;
         };
@@ -239,7 +248,7 @@ class UltraCompactStrategy {
                 }
                 return newArr;
             }
-            if (obj && typeof obj === 'object') {
+            if (isPlainObject(obj)) {
                 const newObj = {};
                 for (const k in obj) {
                     if (Object.prototype.hasOwnProperty.call(obj, k)) {
@@ -273,7 +282,7 @@ class UltraCompactStrategy {
                 }
                 return newArr;
             }
-            if (obj && typeof obj === 'object') {
+            if (isPlainObject(obj)) {
                 const newObj = {};
                 for (const k in obj) {
                     if (Object.prototype.hasOwnProperty.call(obj, k)) {

package/dist/tokenizer.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import { TiktokenEncoding } from "js-tiktoken";
+export type SupportedEncoding = TiktokenEncoding | 'cl100k_base' | 'o200k_base' | 'p50k_base' | 'r50k_base';
+export declare class TokenCounter {
+    private static cache;
+    /**
+     * Gets a tokenizer instance for the specified encoding or model.
+     */
+    private static getTokenizer;
+    /**
+     * Counts tokens in a string or object (as JSON).
+     */
+    static count(data: any, encodingOrModel?: string): number;
+}

package/dist/tokenizer.js ADDED Viewed

@@ -0,0 +1,41 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.TokenCounter = void 0;
+const js_tiktoken_1 = require("js-tiktoken");
+class TokenCounter {
+    /**
+     * Gets a tokenizer instance for the specified encoding or model.
+     */
+    static getTokenizer(encodingOrModel) {
+        if (this.cache.has(encodingOrModel)) {
+            return this.cache.get(encodingOrModel);
+        }
+        let tokenizer;
+        try {
+            // Try as encoding first
+            tokenizer = (0, js_tiktoken_1.getEncoding)(encodingOrModel);
+        }
+        catch {
+            try {
+                // Try as model name
+                tokenizer = (0, js_tiktoken_1.encodingForModel)(encodingOrModel);
+            }
+            catch {
+                // Fallback to cl100k_base (GPT-4)
+                tokenizer = (0, js_tiktoken_1.getEncoding)("cl100k_base");
+            }
+        }
+        this.cache.set(encodingOrModel, tokenizer);
+        return tokenizer;
+    }
+    /**
+     * Counts tokens in a string or object (as JSON).
+     */
+    static count(data, encodingOrModel = "cl100k_base") {
+        const text = typeof data === 'string' ? data : JSON.stringify(data);
+        const tokenizer = this.getTokenizer(encodingOrModel);
+        return tokenizer.encode(text).length;
+    }
+}
+exports.TokenCounter = TokenCounter;
+TokenCounter.cache = new Map();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "llm-chat-msg-compressor",
-    "version": "1.0.3",
+    "version": "1.0.4",
     "description": "Intelligent JSON compression for LLM API optimization",
     "main": "dist/index.js",
     "types": "dist/index.d.ts",
@@ -48,5 +48,8 @@
         "ts-jest": "^29.4.6",
         "ts-node": "^10.9.2",
         "typescript": "^5.0.0"
+    },
+    "dependencies": {
+        "js-tiktoken": "^1.0.21"
     }
-}
+}