@tyvm/knowhow 0.0.61 → 0.0.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/package.json +1 -1
  2. package/src/chat/modules/AgentModule.ts +6 -6
  3. package/src/processors/JsonCompressor.ts +496 -0
  4. package/src/processors/TokenCompressor.ts +194 -125
  5. package/src/processors/ToolResponseCache.ts +64 -11
  6. package/src/processors/index.ts +1 -0
  7. package/tests/compressor/bigstring.test.ts +352 -2
  8. package/tests/compressor/githubjson.txt +1 -0
  9. package/tests/compressor/toolResponseCache.test.ts +303 -0
  10. package/ts_build/package.json +1 -1
  11. package/ts_build/src/chat/modules/AgentModule.js +5 -4
  12. package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
  13. package/ts_build/src/processors/JsonCompressor.d.ts +36 -0
  14. package/ts_build/src/processors/JsonCompressor.js +295 -0
  15. package/ts_build/src/processors/JsonCompressor.js.map +1 -0
  16. package/ts_build/src/processors/TokenCompressor.d.ts +23 -5
  17. package/ts_build/src/processors/TokenCompressor.js +106 -70
  18. package/ts_build/src/processors/TokenCompressor.js.map +1 -1
  19. package/ts_build/src/processors/ToolResponseCache.d.ts +4 -2
  20. package/ts_build/src/processors/ToolResponseCache.js +50 -10
  21. package/ts_build/src/processors/ToolResponseCache.js.map +1 -1
  22. package/ts_build/src/processors/index.d.ts +1 -0
  23. package/ts_build/src/processors/index.js +3 -1
  24. package/ts_build/src/processors/index.js.map +1 -1
  25. package/ts_build/tests/compressor/bigstring.test.js +209 -0
  26. package/ts_build/tests/compressor/bigstring.test.js.map +1 -1
  27. package/ts_build/tests/compressor/toolResponseCache.test.d.ts +1 -0
  28. package/ts_build/tests/compressor/toolResponseCache.test.js +240 -0
  29. package/ts_build/tests/compressor/toolResponseCache.test.js.map +1 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tyvm/knowhow",
3
- "version": "0.0.61",
3
+ "version": "0.0.63",
4
4
  "description": "ai cli with plugins and agents",
5
5
  "main": "ts_build/src/index.js",
6
6
  "bin": {
@@ -25,6 +25,7 @@ import {
25
25
  import { TaskInfo, ChatSession } from "../types";
26
26
  import { agents } from "../../agents";
27
27
  import { ToolCallEvent } from "../../agents/base/base";
28
+ import { $Command } from "@aws-sdk/client-s3";
28
29
 
29
30
  export class AgentModule extends BaseChatModule {
30
31
  name = "agent";
@@ -582,11 +583,14 @@ Please continue from where you left off and complete the original request.
582
583
  10 // Priority level
583
584
  );
584
585
 
585
- agent.messageProcessor.setProcessors("pre_call", [
586
+ const caching = [
586
587
  new ToolResponseCache(agent.tools).createProcessor(),
587
588
  new TokenCompressor(agent.tools).createProcessor((msg) =>
588
589
  Boolean(msg.role === "tool" && msg.tool_call_id)
589
590
  ),
591
+ ];
592
+ agent.messageProcessor.setProcessors("pre_call", [
593
+ ...caching,
590
594
  new CustomVariables(agent.tools).createProcessor(),
591
595
  ]);
592
596
 
@@ -595,11 +599,7 @@ Please continue from where you left off and complete the original request.
595
599
  new HarmonyToolProcessor().createProcessor(),
596
600
  ]);
597
601
 
598
- agent.messageProcessor.setProcessors("post_tools", [
599
- new TokenCompressor(agent.tools).createProcessor((msg) =>
600
- Boolean(msg.role === "tool" && msg.tool_call_id)
601
- ),
602
- ]);
602
+ agent.messageProcessor.setProcessors("post_tools", caching);
603
603
 
604
604
  // Set up event listeners
605
605
  if (!agent.agentEvents.listenerCount(agent.eventTypes.toolCall)) {
@@ -0,0 +1,496 @@
1
+ /**
2
+ * Schema information for compressed JSON
3
+ */
4
+ export interface JsonSchema {
5
+ type: string;
6
+ properties?: Record<string, JsonSchema>;
7
+ items?: JsonSchema;
8
+ compressed_properties?: string[];
9
+ }
10
+
11
+ /**
12
+ * Metadata about compressed properties
13
+ */
14
+ export interface CompressionMetadata {
15
+ compressed_properties: Record<string, any>;
16
+ compression_reason: string;
17
+ similarity_score?: number;
18
+ }
19
+
20
+ /**
21
+ * Interface for storage operations
22
+ */
23
+ export interface JsonCompressorStorage {
24
+ storeString(key: string, value: string): void;
25
+ generateKey(): string;
26
+ estimateTokens(text: string): number;
27
+ }
28
+
29
+ /**
30
+ * Handles JSON-specific compression logic including schema generation,
31
+ * low-signal property detection, and deduplication.
32
+ */
33
+ export class JsonCompressor {
34
+ // Deduplication tracking
35
+ private deduplicationMap: Map<string, string> = new Map();
36
+ private objectSeenCount: Map<string, number> = new Map();
37
+ private propertyNamesMap: Map<string, string> = new Map();
38
+ private propertyNamesSeenCount: Map<string, number> = new Map();
39
+
40
+ private compressionThreshold: number;
41
+ private maxTokens: number;
42
+ private toolName: string;
43
+ private storage: JsonCompressorStorage;
44
+
45
+ constructor(
46
+ storage: JsonCompressorStorage,
47
+ compressionThreshold: number,
48
+ maxTokens: number,
49
+ toolName: string
50
+ ) {
51
+ this.storage = storage;
52
+ this.compressionThreshold = compressionThreshold;
53
+ this.maxTokens = maxTokens;
54
+ this.toolName = toolName;
55
+ }
56
+
57
+
58
+ /**
59
+ * Clear all deduplication tracking
60
+ */
61
+ clearDeduplication(): void {
62
+ this.deduplicationMap.clear();
63
+ this.objectSeenCount.clear();
64
+ this.propertyNamesMap.clear();
65
+ this.propertyNamesSeenCount.clear();
66
+ }
67
+
68
+ /**
69
+ * Update compression settings
70
+ */
71
+ updateSettings(compressionThreshold: number, maxTokens: number): void {
72
+ this.compressionThreshold = compressionThreshold;
73
+ this.maxTokens = maxTokens;
74
+ }
75
+
76
+ /**
77
+ * Attempts to parse content as JSON and returns parsed object if successful.
78
+ * Also handles MCP tool response format where actual data is in content[0].text
79
+ */
80
+ tryParseJson(content: string): any | null {
81
+ try {
82
+ const parsed = JSON.parse(content);
83
+
84
+ // If the parsed result is a string, try parsing it again (double-encoded JSON)
85
+ if (typeof parsed === 'string') {
86
+ try {
87
+ return this.tryParseJson(parsed); // Recursive call to handle nested stringified JSON
88
+ } catch (e) {
89
+ return parsed; // If second parse fails, return the string
90
+ }
91
+ }
92
+
93
+ // Check if this is an MCP tool response format
94
+ if (parsed &&
95
+ typeof parsed === 'object' &&
96
+ Array.isArray(parsed.content) &&
97
+ parsed.content.length > 0) {
98
+
99
+ const firstContent = parsed.content[0];
100
+
101
+ // Check if it has type: "text" and a text field
102
+ if (firstContent.type === 'text' && typeof firstContent.text === 'string') {
103
+ try {
104
+ // Try to parse the nested text as JSON (recursively to handle double-encoding)
105
+ const nestedData = this.tryParseJson(firstContent.text);
106
+
107
+ // Return a structured object that preserves the MCP format but exposes the data
108
+ return {
109
+ _mcp_format: true,
110
+ _raw_structure: { content: [{ type: 'text' }] },
111
+ _data: nestedData
112
+ };
113
+ } catch (e) {
114
+ // If nested text isn't JSON, return original parsed
115
+ return parsed;
116
+ }
117
+ }
118
+ }
119
+
120
+ return parsed;
121
+ } catch {
122
+ return null;
123
+ }
124
+ }
125
+
126
+
127
+ /**
128
+ * Generate a JSON schema from an object
129
+ */
130
+ public generateSchema(obj: any, maxDepth: number = 3, currentDepth: number = 0): JsonSchema {
131
+ if (currentDepth > maxDepth) {
132
+ return { type: 'any' };
133
+ }
134
+
135
+ // Handle MCP format objects
136
+ if (obj && typeof obj === 'object' && obj._mcp_format === true && obj._data) {
137
+ // Generate schema for the actual data, not the wrapper
138
+ const dataSchema = this.generateSchema(obj._data, maxDepth, currentDepth);
139
+ return {
140
+ type: 'mcp_response',
141
+ properties: {
142
+ _data: dataSchema
143
+ }
144
+ };
145
+ }
146
+
147
+ if (obj === null) {
148
+ return { type: 'null' };
149
+ }
150
+
151
+ if (Array.isArray(obj)) {
152
+ if (obj.length === 0) {
153
+ return { type: 'array', items: { type: 'unknown' } };
154
+ }
155
+ // Sample first few items to infer schema
156
+ const sample = obj.slice(0, 3);
157
+ const itemSchemas = sample.map(item => this.generateSchema(item, maxDepth, currentDepth + 1));
158
+ // Use first item's schema as representative
159
+ return { type: 'array', items: itemSchemas[0] };
160
+ }
161
+
162
+ if (typeof obj === 'object') {
163
+ const properties: Record<string, JsonSchema> = {};
164
+ for (const [key, value] of Object.entries(obj)) {
165
+ properties[key] = this.generateSchema(value, maxDepth, currentDepth + 1);
166
+ }
167
+ return { type: 'object', properties };
168
+ }
169
+
170
+ return { type: typeof obj };
171
+ }
172
+
173
+
174
+ /**
175
+ * Calculate similarity between two strings (simple prefix-based)
176
+ */
177
+ private calculateSimilarity(str1: string, str2: string): number {
178
+ const maxLen = Math.max(str1.length, str2.length);
179
+ if (maxLen === 0) return 1.0;
180
+
181
+ // Simple prefix similarity for URLs and similar strings
182
+ let commonPrefixLen = 0;
183
+ const minLen = Math.min(str1.length, str2.length);
184
+ for (let i = 0; i < minLen; i++) {
185
+ if (str1[i] === str2[i]) {
186
+ commonPrefixLen++;
187
+ } else {
188
+ break;
189
+ }
190
+ }
191
+
192
+ return commonPrefixLen / maxLen;
193
+ }
194
+
195
+ /**
196
+ * Detect low-signal properties in an object (URLs, highly repetitive data)
197
+ */
198
+ private detectLowSignalProperties(obj: any): { lowSignal: string[], metadata: Record<string, any> } {
199
+ if (!obj || typeof obj !== 'object' || Array.isArray(obj)) {
200
+ return { lowSignal: [], metadata: {} };
201
+ }
202
+
203
+ const lowSignal: string[] = [];
204
+ const metadata: Record<string, any> = {};
205
+ const entries = Object.entries(obj);
206
+
207
+ // Detect URL properties
208
+ const urlPattern = /^https?:\/\//;
209
+ const urlProps: string[] = [];
210
+
211
+ for (const [key, value] of entries) {
212
+ if (typeof value === 'string' && urlPattern.test(value)) {
213
+ urlProps.push(key);
214
+ }
215
+ }
216
+
217
+ // If multiple URL properties exist, check their similarity
218
+ if (urlProps.length >= 3) {
219
+ const urlValues = urlProps.map(key => obj[key] as string);
220
+ let totalSimilarity = 0;
221
+ let comparisons = 0;
222
+
223
+ for (let i = 0; i < urlValues.length - 1; i++) {
224
+ for (let j = i + 1; j < urlValues.length; j++) {
225
+ totalSimilarity += this.calculateSimilarity(urlValues[i], urlValues[j]);
226
+ comparisons++;
227
+ }
228
+ }
229
+
230
+ const avgSimilarity = comparisons > 0 ? totalSimilarity / comparisons : 0;
231
+
232
+ // If URLs are highly similar (>60% common prefix), consider them low signal
233
+ if (avgSimilarity > 0.6) {
234
+ lowSignal.push(...urlProps);
235
+ metadata.url_similarity = avgSimilarity;
236
+ metadata.url_count = urlProps.length;
237
+ }
238
+ }
239
+
240
+ // Detect properties ending with _url, _id, node_id, etc.
241
+ const lowSignalPatterns = [/_url$/, /_id$/, /^node_id$/, /^avatar_url$/, /^gravatar_id$/];
242
+ for (const [key, value] of entries) {
243
+ if (lowSignalPatterns.some(pattern => pattern.test(key)) && !lowSignal.includes(key)) {
244
+ lowSignal.push(key);
245
+ }
246
+ }
247
+
248
+ return { lowSignal, metadata };
249
+ }
250
+
251
+
252
+ /**
253
+ * Compress an object by extracting low-signal properties
254
+ */
255
+ compressObjectWithLowSignalDetection(obj: any, path: string = ""): any {
256
+ if (!obj || typeof obj !== 'object' || Array.isArray(obj)) {
257
+ return obj;
258
+ }
259
+
260
+ const { lowSignal, metadata } = this.detectLowSignalProperties(obj);
261
+
262
+ // Only compress if we have significant low-signal properties (at least 5)
263
+ if (lowSignal.length < 5) {
264
+ return obj;
265
+ }
266
+
267
+ const highSignal: any = {};
268
+ const compressed: any = {};
269
+
270
+ for (const [key, value] of Object.entries(obj)) {
271
+ if (lowSignal.includes(key)) {
272
+ compressed[key] = value;
273
+ } else {
274
+ highSignal[key] = value;
275
+ }
276
+ }
277
+
278
+ // Check if we've already compressed identical low-signal properties
279
+ const compressedHash = this.hashObject(compressed);
280
+ let compressedKey = this.deduplicationMap.get(compressedHash);
281
+
282
+ if (!compressedKey) {
283
+ // First time seeing these properties - store them
284
+ compressedKey = this.storage.generateKey();
285
+ this.deduplicationMap.set(compressedHash, compressedKey);
286
+
287
+ const compressionMetadata: CompressionMetadata = {
288
+ compressed_properties: compressed,
289
+ compression_reason: 'low_signal_detection',
290
+ similarity_score: metadata.url_similarity,
291
+ };
292
+ this.storage.storeString(compressedKey, JSON.stringify(compressionMetadata));
293
+ }
294
+
295
+ // If compressedKey already exists, we're reusing it from a duplicate object
296
+ // This significantly reduces storage when objects like "owner" repeat
297
+
298
+ // Deduplicate the property names array
299
+ const propertyNamesHash = this.hashObject(lowSignal);
300
+ const propertyNamesSeenCount = this.propertyNamesSeenCount.get(propertyNamesHash) || 0;
301
+ this.propertyNamesSeenCount.set(propertyNamesHash, propertyNamesSeenCount + 1);
302
+
303
+ let propertyNamesValue: string | any[] = lowSignal;
304
+
305
+ if (propertyNamesSeenCount === 0) {
306
+ // First occurrence - store it and return the full array
307
+ const propertyNamesKey = this.storage.generateKey();
308
+ this.propertyNamesMap.set(propertyNamesHash, propertyNamesKey);
309
+ this.storage.storeString(propertyNamesKey, JSON.stringify(lowSignal));
310
+ propertyNamesValue = lowSignal; // Return full array first time
311
+ } else if (propertyNamesSeenCount >= 1) {
312
+ // Subsequent occurrences - return a reference
313
+ const existingPropertyNamesKey = this.propertyNamesMap.get(propertyNamesHash);
314
+ propertyNamesValue = `[DEDUPLICATED_ARRAY]\nKey: ${existingPropertyNamesKey}`;
315
+ }
316
+
317
+ // Return high-signal properties with reference to compressed data
318
+ return {
319
+ ...highSignal,
320
+ _compressed_properties_key: compressedKey,
321
+ _compressed_property_names: propertyNamesValue,
322
+ _compression_info: `${lowSignal.length} low-signal properties compressed (URLs, IDs). Use expandTokens with key "${compressedKey}" to retrieve.`
323
+ };
324
+ }
325
+
326
+
327
+ /**
328
+ * Creates a stable hash of an object for deduplication
329
+ */
330
+ private hashObject(obj: any): string {
331
+ // Create a stable JSON representation for hashing
332
+ const normalized = JSON.stringify(obj, Object.keys(obj).sort());
333
+ // Simple hash function (for deduplication, not cryptographic security)
334
+ let hash = 0;
335
+ for (let i = 0; i < normalized.length; i++) {
336
+ const char = normalized.charCodeAt(i);
337
+ hash = ((hash << 5) - hash) + char;
338
+ hash = hash & hash; // Convert to 32bit integer
339
+ }
340
+ return hash.toString(36);
341
+ }
342
+
343
+ /**
344
+ * Compresses large properties within a JSON object using depth-first traversal.
345
+ * Implements an efficient backward-iterating chunking strategy for large arrays.
346
+ */
347
+ compressJsonProperties(obj: any, path: string = ""): any {
348
+ if (
349
+ path === "" &&
350
+ this.storage.estimateTokens(JSON.stringify(obj)) <= this.maxTokens
351
+ ) {
352
+ return obj;
353
+ }
354
+
355
+ if (Array.isArray(obj)) {
356
+ // Step 1: Recursively compress all items first (depth-first).
357
+ const processedItems = obj.map((item, index) =>
358
+ this.compressJsonProperties(item, `${path}[${index}]`)
359
+ );
360
+
361
+ // Step 2: Early exit if the whole array is already small enough.
362
+ // maxTokens allows us to fetch objects from the store without recompressing
363
+
364
+ // Step 3: Iterate backwards, building chunks from the end.
365
+ const finalArray: any[] = [];
366
+ let currentChunk: any[] = [];
367
+
368
+ for (let i = processedItems.length - 1; i >= 0; i--) {
369
+ const item = processedItems[i];
370
+ currentChunk.unshift(item); // Add item to the front of the current chunk
371
+
372
+ const chunkString = JSON.stringify(currentChunk);
373
+ const chunkTokens = this.storage.estimateTokens(chunkString);
374
+
375
+ if (chunkTokens > this.compressionThreshold) {
376
+ const key = this.storage.generateKey();
377
+ this.storage.storeString(key, chunkString);
378
+
379
+ const stub = `[COMPRESSED_JSON_ARRAY_CHUNK - ${chunkTokens} tokens, ${
380
+ currentChunk.length
381
+ } items]\nKey: ${key}\nPath: ${path}[${i}...${
382
+ i + currentChunk.length - 1
383
+ }]\nPreview: ${chunkString.substring(0, 100)}...\n[Use ${
384
+ this.toolName
385
+ } tool with key "${key}" to retrieve this chunk]`;
386
+ finalArray.unshift(stub); // Add stub to the start of our final result.
387
+
388
+ currentChunk = [];
389
+ }
390
+ }
391
+
392
+ // Step 4: After the loop, add any remaining items from the start of the
393
+ // array that did not form a full chunk.
394
+ if (currentChunk.length > 0) {
395
+ finalArray.unshift(...currentChunk);
396
+ }
397
+ return finalArray;
398
+ }
399
+
400
+
401
+ // Handle objects - try low-signal detection first, then process properties (depth-first)
402
+ if (obj && typeof obj === "object") {
403
+ // Check if this exact object (by original content) is a duplicate
404
+ const objHash = this.hashObject(obj);
405
+ const existingKey = this.deduplicationMap.get(objHash);
406
+
407
+ if (existingKey) {
408
+ // We've seen this exact object before and stored it
409
+ return `[DEDUPLICATED_OBJECT]\nKey: ${existingKey}\nPath: ${path}\n[Use ${this.toolName} tool with key "${existingKey}" to retrieve content]`;
410
+ }
411
+
412
+ // Track that we've seen this object (increment count)
413
+ const seenCount = this.objectSeenCount.get(objHash) || 0;
414
+ this.objectSeenCount.set(objHash, seenCount + 1);
415
+
416
+ // Store objects on FIRST occurrence so second occurrence can reference it
417
+ // We increment seenCount above, so after increment:
418
+ // seenCount=1: first occurrence (just incremented from 0 to 1), store it
419
+ // seenCount>=2: we already stored it on first occurrence, should be in dedup map
420
+ // Note: This means we store proactively - first occurrence gets stored AND returned in full
421
+ // Second+ occurrences will find it in the dedup map and return a reference
422
+ const isFirstOccurrence = seenCount === 1;
423
+
424
+ // Process the object - apply low-signal detection
425
+ const objWithLowSignalCompressed = this.compressObjectWithLowSignalDetection(obj, path);
426
+ const objToProcess = objWithLowSignalCompressed !== obj ? objWithLowSignalCompressed : obj;
427
+
428
+ const result: any = {};
429
+ for (const [key, value] of Object.entries(objToProcess)) {
430
+ const newPath = path ? `${path}.${key}` : key;
431
+ result[key] = this.compressJsonProperties(value, newPath);
432
+ }
433
+
434
+ // After processing children, check if the entire object should be compressed
435
+ const objectAsString = JSON.stringify(result);
436
+ const tokens = this.storage.estimateTokens(objectAsString);
437
+
438
+ // If this is the first occurrence of a potentially duplicated object, store it
439
+ if (isFirstOccurrence && tokens > 100) {
440
+ const key = this.storage.generateKey();
441
+ this.deduplicationMap.set(objHash, key);
442
+ this.storage.storeString(key, objectAsString);
443
+ // Return the object data this time, next occurrences will get a reference
444
+ return result;
445
+ }
446
+
447
+ // Check if object is large enough to compress as a whole
448
+ if (tokens > this.compressionThreshold) {
449
+ const key = this.storage.generateKey();
450
+ this.storage.storeString(key, objectAsString);
451
+
452
+ return `[COMPRESSED_JSON_OBJECT - ${tokens} tokens]\nKey: ${key}\nPath: ${path}\nKeys: ${Object.keys(
453
+ result
454
+ ).join(", ")}\nPreview: ${objectAsString.substring(0, 200)}...\n[Use ${
455
+ this.toolName
456
+ } tool with key "${key}" to retrieve full content]`;
457
+ }
458
+ return result;
459
+ }
460
+
461
+
462
+ // Handle primitive values (strings, numbers, booleans, null)
463
+ if (typeof obj === "string") {
464
+ // First, check if this string contains JSON that we can parse and compress more granularly
465
+ const parsedJson = this.tryParseJson(obj);
466
+ if (parsedJson) {
467
+ const compressedJson = this.compressJsonProperties(parsedJson, path);
468
+ const compressedJsonString = JSON.stringify(compressedJson, null, 2);
469
+
470
+ const originalTokens = this.storage.estimateTokens(obj);
471
+ const compressedTokens = this.storage.estimateTokens(compressedJsonString);
472
+
473
+ if (compressedTokens < originalTokens * 0.8) {
474
+ return compressedJsonString;
475
+ }
476
+ }
477
+
478
+ // If not JSON or compression wasn't effective, handle as regular string
479
+ const tokens = this.storage.estimateTokens(obj);
480
+ if (tokens > this.compressionThreshold) {
481
+ const key = this.storage.generateKey();
482
+ this.storage.storeString(key, obj);
483
+
484
+ return `[COMPRESSED_JSON_PROPERTY - ${tokens} tokens]\nKey: ${key}\nPath: ${path}\nPreview: ${obj.substring(
485
+ 0,
486
+ 200
487
+ )}...\n[Use ${
488
+ this.toolName
489
+ } tool with key "${key}" to retrieve full content]`;
490
+ }
491
+ return obj;
492
+ }
493
+
494
+ return obj;
495
+ }
496
+ }