@tyvm/knowhow 0.0.61 → 0.0.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/chat/modules/AgentModule.ts +6 -6
- package/src/processors/JsonCompressor.ts +496 -0
- package/src/processors/TokenCompressor.ts +194 -125
- package/src/processors/ToolResponseCache.ts +64 -11
- package/src/processors/index.ts +1 -0
- package/tests/compressor/bigstring.test.ts +352 -2
- package/tests/compressor/githubjson.txt +1 -0
- package/tests/compressor/toolResponseCache.test.ts +303 -0
- package/ts_build/package.json +1 -1
- package/ts_build/src/chat/modules/AgentModule.js +5 -4
- package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
- package/ts_build/src/processors/JsonCompressor.d.ts +36 -0
- package/ts_build/src/processors/JsonCompressor.js +295 -0
- package/ts_build/src/processors/JsonCompressor.js.map +1 -0
- package/ts_build/src/processors/TokenCompressor.d.ts +23 -5
- package/ts_build/src/processors/TokenCompressor.js +106 -70
- package/ts_build/src/processors/TokenCompressor.js.map +1 -1
- package/ts_build/src/processors/ToolResponseCache.d.ts +4 -2
- package/ts_build/src/processors/ToolResponseCache.js +50 -10
- package/ts_build/src/processors/ToolResponseCache.js.map +1 -1
- package/ts_build/src/processors/index.d.ts +1 -0
- package/ts_build/src/processors/index.js +3 -1
- package/ts_build/src/processors/index.js.map +1 -1
- package/ts_build/tests/compressor/bigstring.test.js +209 -0
- package/ts_build/tests/compressor/bigstring.test.js.map +1 -1
- package/ts_build/tests/compressor/toolResponseCache.test.d.ts +1 -0
- package/ts_build/tests/compressor/toolResponseCache.test.js +240 -0
- package/ts_build/tests/compressor/toolResponseCache.test.js.map +1 -0
package/package.json
CHANGED
|
@@ -25,6 +25,7 @@ import {
|
|
|
25
25
|
import { TaskInfo, ChatSession } from "../types";
|
|
26
26
|
import { agents } from "../../agents";
|
|
27
27
|
import { ToolCallEvent } from "../../agents/base/base";
|
|
28
|
+
import { $Command } from "@aws-sdk/client-s3";
|
|
28
29
|
|
|
29
30
|
export class AgentModule extends BaseChatModule {
|
|
30
31
|
name = "agent";
|
|
@@ -582,11 +583,14 @@ Please continue from where you left off and complete the original request.
|
|
|
582
583
|
10 // Priority level
|
|
583
584
|
);
|
|
584
585
|
|
|
585
|
-
|
|
586
|
+
const caching = [
|
|
586
587
|
new ToolResponseCache(agent.tools).createProcessor(),
|
|
587
588
|
new TokenCompressor(agent.tools).createProcessor((msg) =>
|
|
588
589
|
Boolean(msg.role === "tool" && msg.tool_call_id)
|
|
589
590
|
),
|
|
591
|
+
];
|
|
592
|
+
agent.messageProcessor.setProcessors("pre_call", [
|
|
593
|
+
...caching,
|
|
590
594
|
new CustomVariables(agent.tools).createProcessor(),
|
|
591
595
|
]);
|
|
592
596
|
|
|
@@ -595,11 +599,7 @@ Please continue from where you left off and complete the original request.
|
|
|
595
599
|
new HarmonyToolProcessor().createProcessor(),
|
|
596
600
|
]);
|
|
597
601
|
|
|
598
|
-
agent.messageProcessor.setProcessors("post_tools",
|
|
599
|
-
new TokenCompressor(agent.tools).createProcessor((msg) =>
|
|
600
|
-
Boolean(msg.role === "tool" && msg.tool_call_id)
|
|
601
|
-
),
|
|
602
|
-
]);
|
|
602
|
+
agent.messageProcessor.setProcessors("post_tools", caching);
|
|
603
603
|
|
|
604
604
|
// Set up event listeners
|
|
605
605
|
if (!agent.agentEvents.listenerCount(agent.eventTypes.toolCall)) {
|
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema information for compressed JSON
|
|
3
|
+
*/
|
|
4
|
+
export interface JsonSchema {
|
|
5
|
+
type: string;
|
|
6
|
+
properties?: Record<string, JsonSchema>;
|
|
7
|
+
items?: JsonSchema;
|
|
8
|
+
compressed_properties?: string[];
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Metadata about compressed properties
|
|
13
|
+
*/
|
|
14
|
+
export interface CompressionMetadata {
|
|
15
|
+
compressed_properties: Record<string, any>;
|
|
16
|
+
compression_reason: string;
|
|
17
|
+
similarity_score?: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Interface for storage operations
|
|
22
|
+
*/
|
|
23
|
+
export interface JsonCompressorStorage {
|
|
24
|
+
storeString(key: string, value: string): void;
|
|
25
|
+
generateKey(): string;
|
|
26
|
+
estimateTokens(text: string): number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Handles JSON-specific compression logic including schema generation,
|
|
31
|
+
* low-signal property detection, and deduplication.
|
|
32
|
+
*/
|
|
33
|
+
export class JsonCompressor {
|
|
34
|
+
// Deduplication tracking
|
|
35
|
+
private deduplicationMap: Map<string, string> = new Map();
|
|
36
|
+
private objectSeenCount: Map<string, number> = new Map();
|
|
37
|
+
private propertyNamesMap: Map<string, string> = new Map();
|
|
38
|
+
private propertyNamesSeenCount: Map<string, number> = new Map();
|
|
39
|
+
|
|
40
|
+
private compressionThreshold: number;
|
|
41
|
+
private maxTokens: number;
|
|
42
|
+
private toolName: string;
|
|
43
|
+
private storage: JsonCompressorStorage;
|
|
44
|
+
|
|
45
|
+
constructor(
|
|
46
|
+
storage: JsonCompressorStorage,
|
|
47
|
+
compressionThreshold: number,
|
|
48
|
+
maxTokens: number,
|
|
49
|
+
toolName: string
|
|
50
|
+
) {
|
|
51
|
+
this.storage = storage;
|
|
52
|
+
this.compressionThreshold = compressionThreshold;
|
|
53
|
+
this.maxTokens = maxTokens;
|
|
54
|
+
this.toolName = toolName;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Clear all deduplication tracking
|
|
60
|
+
*/
|
|
61
|
+
clearDeduplication(): void {
|
|
62
|
+
this.deduplicationMap.clear();
|
|
63
|
+
this.objectSeenCount.clear();
|
|
64
|
+
this.propertyNamesMap.clear();
|
|
65
|
+
this.propertyNamesSeenCount.clear();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Update compression settings
|
|
70
|
+
*/
|
|
71
|
+
updateSettings(compressionThreshold: number, maxTokens: number): void {
|
|
72
|
+
this.compressionThreshold = compressionThreshold;
|
|
73
|
+
this.maxTokens = maxTokens;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Attempts to parse content as JSON and returns parsed object if successful.
|
|
78
|
+
* Also handles MCP tool response format where actual data is in content[0].text
|
|
79
|
+
*/
|
|
80
|
+
tryParseJson(content: string): any | null {
|
|
81
|
+
try {
|
|
82
|
+
const parsed = JSON.parse(content);
|
|
83
|
+
|
|
84
|
+
// If the parsed result is a string, try parsing it again (double-encoded JSON)
|
|
85
|
+
if (typeof parsed === 'string') {
|
|
86
|
+
try {
|
|
87
|
+
return this.tryParseJson(parsed); // Recursive call to handle nested stringified JSON
|
|
88
|
+
} catch (e) {
|
|
89
|
+
return parsed; // If second parse fails, return the string
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Check if this is an MCP tool response format
|
|
94
|
+
if (parsed &&
|
|
95
|
+
typeof parsed === 'object' &&
|
|
96
|
+
Array.isArray(parsed.content) &&
|
|
97
|
+
parsed.content.length > 0) {
|
|
98
|
+
|
|
99
|
+
const firstContent = parsed.content[0];
|
|
100
|
+
|
|
101
|
+
// Check if it has type: "text" and a text field
|
|
102
|
+
if (firstContent.type === 'text' && typeof firstContent.text === 'string') {
|
|
103
|
+
try {
|
|
104
|
+
// Try to parse the nested text as JSON (recursively to handle double-encoding)
|
|
105
|
+
const nestedData = this.tryParseJson(firstContent.text);
|
|
106
|
+
|
|
107
|
+
// Return a structured object that preserves the MCP format but exposes the data
|
|
108
|
+
return {
|
|
109
|
+
_mcp_format: true,
|
|
110
|
+
_raw_structure: { content: [{ type: 'text' }] },
|
|
111
|
+
_data: nestedData
|
|
112
|
+
};
|
|
113
|
+
} catch (e) {
|
|
114
|
+
// If nested text isn't JSON, return original parsed
|
|
115
|
+
return parsed;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return parsed;
|
|
121
|
+
} catch {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Generate a JSON schema from an object
|
|
129
|
+
*/
|
|
130
|
+
public generateSchema(obj: any, maxDepth: number = 3, currentDepth: number = 0): JsonSchema {
|
|
131
|
+
if (currentDepth > maxDepth) {
|
|
132
|
+
return { type: 'any' };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Handle MCP format objects
|
|
136
|
+
if (obj && typeof obj === 'object' && obj._mcp_format === true && obj._data) {
|
|
137
|
+
// Generate schema for the actual data, not the wrapper
|
|
138
|
+
const dataSchema = this.generateSchema(obj._data, maxDepth, currentDepth);
|
|
139
|
+
return {
|
|
140
|
+
type: 'mcp_response',
|
|
141
|
+
properties: {
|
|
142
|
+
_data: dataSchema
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (obj === null) {
|
|
148
|
+
return { type: 'null' };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (Array.isArray(obj)) {
|
|
152
|
+
if (obj.length === 0) {
|
|
153
|
+
return { type: 'array', items: { type: 'unknown' } };
|
|
154
|
+
}
|
|
155
|
+
// Sample first few items to infer schema
|
|
156
|
+
const sample = obj.slice(0, 3);
|
|
157
|
+
const itemSchemas = sample.map(item => this.generateSchema(item, maxDepth, currentDepth + 1));
|
|
158
|
+
// Use first item's schema as representative
|
|
159
|
+
return { type: 'array', items: itemSchemas[0] };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (typeof obj === 'object') {
|
|
163
|
+
const properties: Record<string, JsonSchema> = {};
|
|
164
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
165
|
+
properties[key] = this.generateSchema(value, maxDepth, currentDepth + 1);
|
|
166
|
+
}
|
|
167
|
+
return { type: 'object', properties };
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return { type: typeof obj };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Calculate similarity between two strings (simple prefix-based)
|
|
176
|
+
*/
|
|
177
|
+
private calculateSimilarity(str1: string, str2: string): number {
|
|
178
|
+
const maxLen = Math.max(str1.length, str2.length);
|
|
179
|
+
if (maxLen === 0) return 1.0;
|
|
180
|
+
|
|
181
|
+
// Simple prefix similarity for URLs and similar strings
|
|
182
|
+
let commonPrefixLen = 0;
|
|
183
|
+
const minLen = Math.min(str1.length, str2.length);
|
|
184
|
+
for (let i = 0; i < minLen; i++) {
|
|
185
|
+
if (str1[i] === str2[i]) {
|
|
186
|
+
commonPrefixLen++;
|
|
187
|
+
} else {
|
|
188
|
+
break;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return commonPrefixLen / maxLen;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Detect low-signal properties in an object (URLs, highly repetitive data)
|
|
197
|
+
*/
|
|
198
|
+
private detectLowSignalProperties(obj: any): { lowSignal: string[], metadata: Record<string, any> } {
|
|
199
|
+
if (!obj || typeof obj !== 'object' || Array.isArray(obj)) {
|
|
200
|
+
return { lowSignal: [], metadata: {} };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const lowSignal: string[] = [];
|
|
204
|
+
const metadata: Record<string, any> = {};
|
|
205
|
+
const entries = Object.entries(obj);
|
|
206
|
+
|
|
207
|
+
// Detect URL properties
|
|
208
|
+
const urlPattern = /^https?:\/\//;
|
|
209
|
+
const urlProps: string[] = [];
|
|
210
|
+
|
|
211
|
+
for (const [key, value] of entries) {
|
|
212
|
+
if (typeof value === 'string' && urlPattern.test(value)) {
|
|
213
|
+
urlProps.push(key);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// If multiple URL properties exist, check their similarity
|
|
218
|
+
if (urlProps.length >= 3) {
|
|
219
|
+
const urlValues = urlProps.map(key => obj[key] as string);
|
|
220
|
+
let totalSimilarity = 0;
|
|
221
|
+
let comparisons = 0;
|
|
222
|
+
|
|
223
|
+
for (let i = 0; i < urlValues.length - 1; i++) {
|
|
224
|
+
for (let j = i + 1; j < urlValues.length; j++) {
|
|
225
|
+
totalSimilarity += this.calculateSimilarity(urlValues[i], urlValues[j]);
|
|
226
|
+
comparisons++;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const avgSimilarity = comparisons > 0 ? totalSimilarity / comparisons : 0;
|
|
231
|
+
|
|
232
|
+
// If URLs are highly similar (>60% common prefix), consider them low signal
|
|
233
|
+
if (avgSimilarity > 0.6) {
|
|
234
|
+
lowSignal.push(...urlProps);
|
|
235
|
+
metadata.url_similarity = avgSimilarity;
|
|
236
|
+
metadata.url_count = urlProps.length;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Detect properties ending with _url, _id, node_id, etc.
|
|
241
|
+
const lowSignalPatterns = [/_url$/, /_id$/, /^node_id$/, /^avatar_url$/, /^gravatar_id$/];
|
|
242
|
+
for (const [key, value] of entries) {
|
|
243
|
+
if (lowSignalPatterns.some(pattern => pattern.test(key)) && !lowSignal.includes(key)) {
|
|
244
|
+
lowSignal.push(key);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return { lowSignal, metadata };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Compress an object by extracting low-signal properties
|
|
254
|
+
*/
|
|
255
|
+
compressObjectWithLowSignalDetection(obj: any, path: string = ""): any {
|
|
256
|
+
if (!obj || typeof obj !== 'object' || Array.isArray(obj)) {
|
|
257
|
+
return obj;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const { lowSignal, metadata } = this.detectLowSignalProperties(obj);
|
|
261
|
+
|
|
262
|
+
// Only compress if we have significant low-signal properties (at least 5)
|
|
263
|
+
if (lowSignal.length < 5) {
|
|
264
|
+
return obj;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const highSignal: any = {};
|
|
268
|
+
const compressed: any = {};
|
|
269
|
+
|
|
270
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
271
|
+
if (lowSignal.includes(key)) {
|
|
272
|
+
compressed[key] = value;
|
|
273
|
+
} else {
|
|
274
|
+
highSignal[key] = value;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Check if we've already compressed identical low-signal properties
|
|
279
|
+
const compressedHash = this.hashObject(compressed);
|
|
280
|
+
let compressedKey = this.deduplicationMap.get(compressedHash);
|
|
281
|
+
|
|
282
|
+
if (!compressedKey) {
|
|
283
|
+
// First time seeing these properties - store them
|
|
284
|
+
compressedKey = this.storage.generateKey();
|
|
285
|
+
this.deduplicationMap.set(compressedHash, compressedKey);
|
|
286
|
+
|
|
287
|
+
const compressionMetadata: CompressionMetadata = {
|
|
288
|
+
compressed_properties: compressed,
|
|
289
|
+
compression_reason: 'low_signal_detection',
|
|
290
|
+
similarity_score: metadata.url_similarity,
|
|
291
|
+
};
|
|
292
|
+
this.storage.storeString(compressedKey, JSON.stringify(compressionMetadata));
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// If compressedKey already exists, we're reusing it from a duplicate object
|
|
296
|
+
// This significantly reduces storage when objects like "owner" repeat
|
|
297
|
+
|
|
298
|
+
// Deduplicate the property names array
|
|
299
|
+
const propertyNamesHash = this.hashObject(lowSignal);
|
|
300
|
+
const propertyNamesSeenCount = this.propertyNamesSeenCount.get(propertyNamesHash) || 0;
|
|
301
|
+
this.propertyNamesSeenCount.set(propertyNamesHash, propertyNamesSeenCount + 1);
|
|
302
|
+
|
|
303
|
+
let propertyNamesValue: string | any[] = lowSignal;
|
|
304
|
+
|
|
305
|
+
if (propertyNamesSeenCount === 0) {
|
|
306
|
+
// First occurrence - store it and return the full array
|
|
307
|
+
const propertyNamesKey = this.storage.generateKey();
|
|
308
|
+
this.propertyNamesMap.set(propertyNamesHash, propertyNamesKey);
|
|
309
|
+
this.storage.storeString(propertyNamesKey, JSON.stringify(lowSignal));
|
|
310
|
+
propertyNamesValue = lowSignal; // Return full array first time
|
|
311
|
+
} else if (propertyNamesSeenCount >= 1) {
|
|
312
|
+
// Subsequent occurrences - return a reference
|
|
313
|
+
const existingPropertyNamesKey = this.propertyNamesMap.get(propertyNamesHash);
|
|
314
|
+
propertyNamesValue = `[DEDUPLICATED_ARRAY]\nKey: ${existingPropertyNamesKey}`;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Return high-signal properties with reference to compressed data
|
|
318
|
+
return {
|
|
319
|
+
...highSignal,
|
|
320
|
+
_compressed_properties_key: compressedKey,
|
|
321
|
+
_compressed_property_names: propertyNamesValue,
|
|
322
|
+
_compression_info: `${lowSignal.length} low-signal properties compressed (URLs, IDs). Use expandTokens with key "${compressedKey}" to retrieve.`
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Creates a stable hash of an object for deduplication
|
|
329
|
+
*/
|
|
330
|
+
private hashObject(obj: any): string {
|
|
331
|
+
// Create a stable JSON representation for hashing
|
|
332
|
+
const normalized = JSON.stringify(obj, Object.keys(obj).sort());
|
|
333
|
+
// Simple hash function (for deduplication, not cryptographic security)
|
|
334
|
+
let hash = 0;
|
|
335
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
336
|
+
const char = normalized.charCodeAt(i);
|
|
337
|
+
hash = ((hash << 5) - hash) + char;
|
|
338
|
+
hash = hash & hash; // Convert to 32bit integer
|
|
339
|
+
}
|
|
340
|
+
return hash.toString(36);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/**
|
|
344
|
+
* Compresses large properties within a JSON object using depth-first traversal.
|
|
345
|
+
* Implements an efficient backward-iterating chunking strategy for large arrays.
|
|
346
|
+
*/
|
|
347
|
+
compressJsonProperties(obj: any, path: string = ""): any {
|
|
348
|
+
if (
|
|
349
|
+
path === "" &&
|
|
350
|
+
this.storage.estimateTokens(JSON.stringify(obj)) <= this.maxTokens
|
|
351
|
+
) {
|
|
352
|
+
return obj;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
if (Array.isArray(obj)) {
|
|
356
|
+
// Step 1: Recursively compress all items first (depth-first).
|
|
357
|
+
const processedItems = obj.map((item, index) =>
|
|
358
|
+
this.compressJsonProperties(item, `${path}[${index}]`)
|
|
359
|
+
);
|
|
360
|
+
|
|
361
|
+
// Step 2: Early exit if the whole array is already small enough.
|
|
362
|
+
// maxTokens allows us to fetch objects from the store without recompressing
|
|
363
|
+
|
|
364
|
+
// Step 3: Iterate backwards, building chunks from the end.
|
|
365
|
+
const finalArray: any[] = [];
|
|
366
|
+
let currentChunk: any[] = [];
|
|
367
|
+
|
|
368
|
+
for (let i = processedItems.length - 1; i >= 0; i--) {
|
|
369
|
+
const item = processedItems[i];
|
|
370
|
+
currentChunk.unshift(item); // Add item to the front of the current chunk
|
|
371
|
+
|
|
372
|
+
const chunkString = JSON.stringify(currentChunk);
|
|
373
|
+
const chunkTokens = this.storage.estimateTokens(chunkString);
|
|
374
|
+
|
|
375
|
+
if (chunkTokens > this.compressionThreshold) {
|
|
376
|
+
const key = this.storage.generateKey();
|
|
377
|
+
this.storage.storeString(key, chunkString);
|
|
378
|
+
|
|
379
|
+
const stub = `[COMPRESSED_JSON_ARRAY_CHUNK - ${chunkTokens} tokens, ${
|
|
380
|
+
currentChunk.length
|
|
381
|
+
} items]\nKey: ${key}\nPath: ${path}[${i}...${
|
|
382
|
+
i + currentChunk.length - 1
|
|
383
|
+
}]\nPreview: ${chunkString.substring(0, 100)}...\n[Use ${
|
|
384
|
+
this.toolName
|
|
385
|
+
} tool with key "${key}" to retrieve this chunk]`;
|
|
386
|
+
finalArray.unshift(stub); // Add stub to the start of our final result.
|
|
387
|
+
|
|
388
|
+
currentChunk = [];
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Step 4: After the loop, add any remaining items from the start of the
|
|
393
|
+
// array that did not form a full chunk.
|
|
394
|
+
if (currentChunk.length > 0) {
|
|
395
|
+
finalArray.unshift(...currentChunk);
|
|
396
|
+
}
|
|
397
|
+
return finalArray;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
// Handle objects - try low-signal detection first, then process properties (depth-first)
|
|
402
|
+
if (obj && typeof obj === "object") {
|
|
403
|
+
// Check if this exact object (by original content) is a duplicate
|
|
404
|
+
const objHash = this.hashObject(obj);
|
|
405
|
+
const existingKey = this.deduplicationMap.get(objHash);
|
|
406
|
+
|
|
407
|
+
if (existingKey) {
|
|
408
|
+
// We've seen this exact object before and stored it
|
|
409
|
+
return `[DEDUPLICATED_OBJECT]\nKey: ${existingKey}\nPath: ${path}\n[Use ${this.toolName} tool with key "${existingKey}" to retrieve content]`;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Track that we've seen this object (increment count)
|
|
413
|
+
const seenCount = this.objectSeenCount.get(objHash) || 0;
|
|
414
|
+
this.objectSeenCount.set(objHash, seenCount + 1);
|
|
415
|
+
|
|
416
|
+
// Store objects on FIRST occurrence so second occurrence can reference it
|
|
417
|
+
// We increment seenCount above, so after increment:
|
|
418
|
+
// seenCount=1: first occurrence (just incremented from 0 to 1), store it
|
|
419
|
+
// seenCount>=2: we already stored it on first occurrence, should be in dedup map
|
|
420
|
+
// Note: This means we store proactively - first occurrence gets stored AND returned in full
|
|
421
|
+
// Second+ occurrences will find it in the dedup map and return a reference
|
|
422
|
+
const isFirstOccurrence = seenCount === 1;
|
|
423
|
+
|
|
424
|
+
// Process the object - apply low-signal detection
|
|
425
|
+
const objWithLowSignalCompressed = this.compressObjectWithLowSignalDetection(obj, path);
|
|
426
|
+
const objToProcess = objWithLowSignalCompressed !== obj ? objWithLowSignalCompressed : obj;
|
|
427
|
+
|
|
428
|
+
const result: any = {};
|
|
429
|
+
for (const [key, value] of Object.entries(objToProcess)) {
|
|
430
|
+
const newPath = path ? `${path}.${key}` : key;
|
|
431
|
+
result[key] = this.compressJsonProperties(value, newPath);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// After processing children, check if the entire object should be compressed
|
|
435
|
+
const objectAsString = JSON.stringify(result);
|
|
436
|
+
const tokens = this.storage.estimateTokens(objectAsString);
|
|
437
|
+
|
|
438
|
+
// If this is the first occurrence of a potentially duplicated object, store it
|
|
439
|
+
if (isFirstOccurrence && tokens > 100) {
|
|
440
|
+
const key = this.storage.generateKey();
|
|
441
|
+
this.deduplicationMap.set(objHash, key);
|
|
442
|
+
this.storage.storeString(key, objectAsString);
|
|
443
|
+
// Return the object data this time, next occurrences will get a reference
|
|
444
|
+
return result;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Check if object is large enough to compress as a whole
|
|
448
|
+
if (tokens > this.compressionThreshold) {
|
|
449
|
+
const key = this.storage.generateKey();
|
|
450
|
+
this.storage.storeString(key, objectAsString);
|
|
451
|
+
|
|
452
|
+
return `[COMPRESSED_JSON_OBJECT - ${tokens} tokens]\nKey: ${key}\nPath: ${path}\nKeys: ${Object.keys(
|
|
453
|
+
result
|
|
454
|
+
).join(", ")}\nPreview: ${objectAsString.substring(0, 200)}...\n[Use ${
|
|
455
|
+
this.toolName
|
|
456
|
+
} tool with key "${key}" to retrieve full content]`;
|
|
457
|
+
}
|
|
458
|
+
return result;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
// Handle primitive values (strings, numbers, booleans, null)
|
|
463
|
+
if (typeof obj === "string") {
|
|
464
|
+
// First, check if this string contains JSON that we can parse and compress more granularly
|
|
465
|
+
const parsedJson = this.tryParseJson(obj);
|
|
466
|
+
if (parsedJson) {
|
|
467
|
+
const compressedJson = this.compressJsonProperties(parsedJson, path);
|
|
468
|
+
const compressedJsonString = JSON.stringify(compressedJson, null, 2);
|
|
469
|
+
|
|
470
|
+
const originalTokens = this.storage.estimateTokens(obj);
|
|
471
|
+
const compressedTokens = this.storage.estimateTokens(compressedJsonString);
|
|
472
|
+
|
|
473
|
+
if (compressedTokens < originalTokens * 0.8) {
|
|
474
|
+
return compressedJsonString;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// If not JSON or compression wasn't effective, handle as regular string
|
|
479
|
+
const tokens = this.storage.estimateTokens(obj);
|
|
480
|
+
if (tokens > this.compressionThreshold) {
|
|
481
|
+
const key = this.storage.generateKey();
|
|
482
|
+
this.storage.storeString(key, obj);
|
|
483
|
+
|
|
484
|
+
return `[COMPRESSED_JSON_PROPERTY - ${tokens} tokens]\nKey: ${key}\nPath: ${path}\nPreview: ${obj.substring(
|
|
485
|
+
0,
|
|
486
|
+
200
|
|
487
|
+
)}...\n[Use ${
|
|
488
|
+
this.toolName
|
|
489
|
+
} tool with key "${key}" to retrieve full content]`;
|
|
490
|
+
}
|
|
491
|
+
return obj;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
return obj;
|
|
495
|
+
}
|
|
496
|
+
}
|