@tyvm/knowhow 0.0.60 → 0.0.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/package.json +1 -1
  2. package/src/agents/setup/setup.ts +9 -2
  3. package/src/agents/tools/textSearch.ts +4 -1
  4. package/src/chat/CliChatService.ts +5 -6
  5. package/src/chat/modules/SystemModule.ts +1 -1
  6. package/src/clients/anthropic.ts +12 -0
  7. package/src/config.ts +5 -0
  8. package/src/plugins/language.ts +4 -0
  9. package/src/processors/JsonCompressor.ts +496 -0
  10. package/src/processors/TokenCompressor.ts +194 -125
  11. package/src/processors/index.ts +1 -0
  12. package/src/services/Mcp.ts +1 -0
  13. package/src/services/Tools.ts +5 -3
  14. package/src/types.ts +1 -0
  15. package/src/utils/InputQueueManager.ts +119 -95
  16. package/tests/compressor/bigstring.test.ts +352 -2
  17. package/tests/compressor/githubjson.txt +1 -0
  18. package/ts_build/package.json +1 -1
  19. package/ts_build/src/agents/setup/setup.js +9 -2
  20. package/ts_build/src/agents/setup/setup.js.map +1 -1
  21. package/ts_build/src/agents/tools/textSearch.js +2 -1
  22. package/ts_build/src/agents/tools/textSearch.js.map +1 -1
  23. package/ts_build/src/chat/CliChatService.d.ts +1 -1
  24. package/ts_build/src/chat/CliChatService.js +6 -6
  25. package/ts_build/src/chat/CliChatService.js.map +1 -1
  26. package/ts_build/src/chat/modules/SystemModule.js +1 -1
  27. package/ts_build/src/chat/modules/SystemModule.js.map +1 -1
  28. package/ts_build/src/clients/anthropic.js +12 -0
  29. package/ts_build/src/clients/anthropic.js.map +1 -1
  30. package/ts_build/src/config.js +5 -0
  31. package/ts_build/src/config.js.map +1 -1
  32. package/ts_build/src/plugins/language.js +4 -0
  33. package/ts_build/src/plugins/language.js.map +1 -1
  34. package/ts_build/src/processors/JsonCompressor.d.ts +36 -0
  35. package/ts_build/src/processors/JsonCompressor.js +295 -0
  36. package/ts_build/src/processors/JsonCompressor.js.map +1 -0
  37. package/ts_build/src/processors/TokenCompressor.d.ts +23 -5
  38. package/ts_build/src/processors/TokenCompressor.js +106 -70
  39. package/ts_build/src/processors/TokenCompressor.js.map +1 -1
  40. package/ts_build/src/processors/index.d.ts +1 -0
  41. package/ts_build/src/processors/index.js +3 -1
  42. package/ts_build/src/processors/index.js.map +1 -1
  43. package/ts_build/src/services/Mcp.js.map +1 -1
  44. package/ts_build/src/services/Tools.js +1 -1
  45. package/ts_build/src/services/Tools.js.map +1 -1
  46. package/ts_build/src/types.d.ts +1 -0
  47. package/ts_build/src/types.js +1 -0
  48. package/ts_build/src/types.js.map +1 -1
  49. package/ts_build/src/utils/InputQueueManager.d.ts +4 -1
  50. package/ts_build/src/utils/InputQueueManager.js +93 -78
  51. package/ts_build/src/utils/InputQueueManager.js.map +1 -1
  52. package/ts_build/tests/compressor/bigstring.test.js +209 -0
  53. package/ts_build/tests/compressor/bigstring.test.js.map +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tyvm/knowhow",
3
- "version": "0.0.60",
3
+ "version": "0.0.62",
4
4
  "description": "ai cli with plugins and agents",
5
5
  "main": "ts_build/src/index.js",
6
6
  "bin": {
@@ -38,15 +38,22 @@ export class SetupAgent extends BaseAgent {
38
38
 
39
39
  Always ask the user to approve what you're going to do to the config, that way you can get feedback via askHuman before modifying the config
40
40
 
41
+ After using askHuman and them providing their feedback of what you'd like to do, only follow what they say. We want to make the minimum set of changes to the config.
42
+
43
+ For codebase embeddings you don't want to use prompt, as that'd embed a transformation of the code, you want to embed the actual source, so don't use prompt.
44
+ For embeddings prompt would only be used for generating an embedding from transformed data, like if you wanted to summarize a transcript and make embeddings from the summary, then you'd use prompt on the embeddings, otherwise you should not need it.
45
+
41
46
  When setting up the language plugin for a user you should come up with phrases they're likely to say, like frontend/backend/schema etc that will signal we should load in guides or rules for that type of task. You should put any of your rules/analses in .knowhow/docs and the language plugin should reference those.
42
47
 
48
+ The language plugin can only read in files, not directories, so do not add entries to language plugin unless you've first written some markdown files to load in as guidance. The files loaded by the language plugin should give quick tips to any unusual things about the project, commands that should be run to rebuild any auto-generated code, quirks about codebase behavior etc.
49
+
43
50
  If a user is vauge about setting up, you should give them some options of what all you could help them setup with a brief explanation of what those setups would enable.
44
51
 
45
52
  Only suggest embeddings that include a folder path with many elements, ie src/**/*.ts, never suggest entries with one element
46
53
 
47
- If a user is requesting help with setting up a coding project, you can look at their package.json to setup the lintCommands so that we get feedback on file edits, and embeddings for the source code as those two features are the highest impact
54
+ If a user is requesting help with setting up a coding project, you can look at their package.json, or language specific config to setup the lintCommands so that we get feedback on file edits, and embeddings for the source code as those two features are the highest impact
48
55
 
49
- If the user just says setup fast, try to get a general idea of the project file structure and setup one source code embedding for the whole codebaseand linter commands if possible. Try not do dig too deep if they want fast, just get the highest impact features setup
56
+ If the user just says setup fast, try to get a general idea of the project file structure and setup one source code embedding for the whole codebase and linter commands if possible. Try not do dig too deep if they want fast, just get the highest impact features setup
50
57
 
51
58
  `,
52
59
  },
@@ -3,7 +3,10 @@ import { execCommand } from "./execCommand";
3
3
 
4
4
  export async function textSearch(searchTerm) {
5
5
  try {
6
- const command = `ag -m 3 -Q "${searchTerm}"`;
6
+ // Escape the search term for safe shell usage
7
+ // Replace single quotes with '\'' which closes quote, adds escaped quote, reopens quote
8
+ const escapedTerm = searchTerm.replace(/'/g, "'\\''");
9
+ const command = `ag -m 3 -Q '${escapedTerm}'`;
7
10
  const output = await execCommand(command);
8
11
  return output;
9
12
  } catch (err) {
@@ -61,8 +61,8 @@ export class CliChatService implements ChatService {
61
61
  try {
62
62
  if (fs.existsSync(this.historyFile)) {
63
63
  const historyData = fs.readFileSync(this.historyFile, "utf8");
64
- const chatHistory: ChatHistory = JSON.parse(historyData);
65
- this.inputHistory = chatHistory.inputs || [];
64
+ const parsedHistory: ChatHistory = JSON.parse(historyData);
65
+ this.inputHistory = parsedHistory.inputs || [];
66
66
  }
67
67
  } catch (error) {
68
68
  console.error("Error loading input history:", error);
@@ -81,11 +81,11 @@ export class CliChatService implements ChatService {
81
81
  fs.mkdirSync(dir, { recursive: true });
82
82
  }
83
83
 
84
- const chatHistory: ChatHistory = {
84
+ const inputHistory: ChatHistory = {
85
85
  inputs: this.inputHistory,
86
86
  };
87
87
 
88
- fs.writeFileSync(this.historyFile, JSON.stringify(chatHistory, null, 2));
88
+ fs.writeFileSync(this.historyFile, JSON.stringify(inputHistory, null, 2));
89
89
  } catch (error) {
90
90
  console.error("Error saving input history:", error);
91
91
  }
@@ -154,6 +154,7 @@ export class CliChatService implements ChatService {
154
154
 
155
155
  async processInput(input: string): Promise<boolean> {
156
156
  // Note: Input is added to history via setOnNewHistoryEntry callback when user presses Enter
157
+ // Note: this actually sends all commands to modules, first to service takes it
157
158
 
158
159
  // Check if input is a command
159
160
  if (input.startsWith("/")) {
@@ -198,7 +199,6 @@ export class CliChatService implements ChatService {
198
199
  async getInput(
199
200
  prompt: string = "> ",
200
201
  options: string[] = [],
201
- chatHistory: any[] = []
202
202
  ): Promise<string> {
203
203
  if (this.context.inputMethod) {
204
204
  return await this.context.inputMethod.getInput(prompt);
@@ -277,7 +277,6 @@ export class CliChatService implements ChatService {
277
277
  const input = await this.getInput(
278
278
  promptText,
279
279
  commandNames,
280
- this.chatHistory
281
280
  );
282
281
 
283
282
  if (input.trim() === "") {
@@ -28,7 +28,7 @@ export class SystemModule extends BaseChatModule {
28
28
  },
29
29
  {
30
30
  name: "clear",
31
- description: "Clear chat history",
31
+ description: "Clear chat history - AI will not remember previous messages",
32
32
  handler: this.handleClearCommand.bind(this),
33
33
  },
34
34
  ];
@@ -325,12 +325,24 @@ export class GenericAnthropicClient implements GenericClient {
325
325
 
326
326
  pricesPerMillion() {
327
327
  return {
328
+ [Models.anthropic.Opus4_6]: {
329
+ input: 5.0,
330
+ cache_write: 6.25,
331
+ cache_hit: 0.5,
332
+ output: 25.0,
333
+ },
328
334
  [Models.anthropic.Opus4_5]: {
329
335
  input: 5.0,
330
336
  cache_write: 6.25,
331
337
  cache_hit: 0.5,
332
338
  output: 25.0,
333
339
  },
340
+ [Models.anthropic.Opus4_1]: {
341
+ input: 15.0,
342
+ cache_write: 18.75,
343
+ cache_hit: 1.5,
344
+ output: 75.0,
345
+ },
334
346
  [Models.anthropic.Opus4]: {
335
347
  input: 15.0,
336
348
  cache_write: 18.75,
package/src/config.ts CHANGED
@@ -52,6 +52,11 @@ const defaultConfig = {
52
52
  prompt: "BasicEmbeddingExplainer",
53
53
  chunkSize: 2000,
54
54
  },
55
+ {
56
+ input: "src/**/*.ts",
57
+ output: ".knowhow/embeddings/code.json",
58
+ chunkSize: 2000,
59
+ },
55
60
  ],
56
61
  embeddingModel: EmbeddingModels.openai.EmbeddingAda2,
57
62
 
@@ -96,6 +96,10 @@ export class LanguagePlugin extends PluginBase implements Plugin {
96
96
  if (!exists) {
97
97
  return { filePath, content: `File ${filePath} does not exist` };
98
98
  }
99
+ const stat = await fileStat(filePath);
100
+ if (stat.isDirectory()) {
101
+ throw new Error(`Cannot read directories: ${filePath}`);
102
+ }
99
103
  const content = (await readFile(filePath, "utf8")).toString();
100
104
  return { filePath, content };
101
105
  })
@@ -0,0 +1,496 @@
1
+ /**
2
+ * Schema information for compressed JSON
3
+ */
4
+ export interface JsonSchema {
5
+ type: string;
6
+ properties?: Record<string, JsonSchema>;
7
+ items?: JsonSchema;
8
+ compressed_properties?: string[];
9
+ }
10
+
11
+ /**
12
+ * Metadata about compressed properties
13
+ */
14
+ export interface CompressionMetadata {
15
+ compressed_properties: Record<string, any>;
16
+ compression_reason: string;
17
+ similarity_score?: number;
18
+ }
19
+
20
+ /**
21
+ * Interface for storage operations
22
+ */
23
+ export interface JsonCompressorStorage {
24
+ storeString(key: string, value: string): void;
25
+ generateKey(): string;
26
+ estimateTokens(text: string): number;
27
+ }
28
+
29
+ /**
30
+ * Handles JSON-specific compression logic including schema generation,
31
+ * low-signal property detection, and deduplication.
32
+ */
33
+ export class JsonCompressor {
34
+ // Deduplication tracking
35
+ private deduplicationMap: Map<string, string> = new Map();
36
+ private objectSeenCount: Map<string, number> = new Map();
37
+ private propertyNamesMap: Map<string, string> = new Map();
38
+ private propertyNamesSeenCount: Map<string, number> = new Map();
39
+
40
+ private compressionThreshold: number;
41
+ private maxTokens: number;
42
+ private toolName: string;
43
+ private storage: JsonCompressorStorage;
44
+
45
+ constructor(
46
+ storage: JsonCompressorStorage,
47
+ compressionThreshold: number,
48
+ maxTokens: number,
49
+ toolName: string
50
+ ) {
51
+ this.storage = storage;
52
+ this.compressionThreshold = compressionThreshold;
53
+ this.maxTokens = maxTokens;
54
+ this.toolName = toolName;
55
+ }
56
+
57
+
58
+ /**
59
+ * Clear all deduplication tracking
60
+ */
61
+ clearDeduplication(): void {
62
+ this.deduplicationMap.clear();
63
+ this.objectSeenCount.clear();
64
+ this.propertyNamesMap.clear();
65
+ this.propertyNamesSeenCount.clear();
66
+ }
67
+
68
+ /**
69
+ * Update compression settings
70
+ */
71
+ updateSettings(compressionThreshold: number, maxTokens: number): void {
72
+ this.compressionThreshold = compressionThreshold;
73
+ this.maxTokens = maxTokens;
74
+ }
75
+
76
+ /**
77
+ * Attempts to parse content as JSON and returns parsed object if successful.
78
+ * Also handles MCP tool response format where actual data is in content[0].text
79
+ */
80
+ tryParseJson(content: string): any | null {
81
+ try {
82
+ const parsed = JSON.parse(content);
83
+
84
+ // If the parsed result is a string, try parsing it again (double-encoded JSON)
85
+ if (typeof parsed === 'string') {
86
+ try {
87
+ return this.tryParseJson(parsed); // Recursive call to handle nested stringified JSON
88
+ } catch (e) {
89
+ return parsed; // If second parse fails, return the string
90
+ }
91
+ }
92
+
93
+ // Check if this is an MCP tool response format
94
+ if (parsed &&
95
+ typeof parsed === 'object' &&
96
+ Array.isArray(parsed.content) &&
97
+ parsed.content.length > 0) {
98
+
99
+ const firstContent = parsed.content[0];
100
+
101
+ // Check if it has type: "text" and a text field
102
+ if (firstContent.type === 'text' && typeof firstContent.text === 'string') {
103
+ try {
104
+ // Try to parse the nested text as JSON
105
+ const nestedData = JSON.parse(firstContent.text);
106
+
107
+ // Return a structured object that preserves the MCP format but exposes the data
108
+ return {
109
+ _mcp_format: true,
110
+ _raw_structure: { content: [{ type: 'text' }] },
111
+ data: nestedData
112
+ };
113
+ } catch (e) {
114
+ // If nested text isn't JSON, return original parsed
115
+ return parsed;
116
+ }
117
+ }
118
+ }
119
+
120
+ return parsed;
121
+ } catch {
122
+ return null;
123
+ }
124
+ }
125
+
126
+
127
+ /**
128
+ * Generate a JSON schema from an object
129
+ */
130
+ public generateSchema(obj: any, maxDepth: number = 3, currentDepth: number = 0): JsonSchema {
131
+ if (currentDepth > maxDepth) {
132
+ return { type: 'any' };
133
+ }
134
+
135
+ // Handle MCP format objects
136
+ if (obj && typeof obj === 'object' && obj._mcp_format === true && obj.data) {
137
+ // Generate schema for the actual data, not the wrapper
138
+ const dataSchema = this.generateSchema(obj.data, maxDepth, currentDepth);
139
+ return {
140
+ type: 'mcp_response',
141
+ properties: {
142
+ data: dataSchema
143
+ }
144
+ };
145
+ }
146
+
147
+ if (obj === null) {
148
+ return { type: 'null' };
149
+ }
150
+
151
+ if (Array.isArray(obj)) {
152
+ if (obj.length === 0) {
153
+ return { type: 'array', items: { type: 'unknown' } };
154
+ }
155
+ // Sample first few items to infer schema
156
+ const sample = obj.slice(0, 3);
157
+ const itemSchemas = sample.map(item => this.generateSchema(item, maxDepth, currentDepth + 1));
158
+ // Use first item's schema as representative
159
+ return { type: 'array', items: itemSchemas[0] };
160
+ }
161
+
162
+ if (typeof obj === 'object') {
163
+ const properties: Record<string, JsonSchema> = {};
164
+ for (const [key, value] of Object.entries(obj)) {
165
+ properties[key] = this.generateSchema(value, maxDepth, currentDepth + 1);
166
+ }
167
+ return { type: 'object', properties };
168
+ }
169
+
170
+ return { type: typeof obj };
171
+ }
172
+
173
+
174
+ /**
175
+ * Calculate similarity between two strings (simple prefix-based)
176
+ */
177
+ private calculateSimilarity(str1: string, str2: string): number {
178
+ const maxLen = Math.max(str1.length, str2.length);
179
+ if (maxLen === 0) return 1.0;
180
+
181
+ // Simple prefix similarity for URLs and similar strings
182
+ let commonPrefixLen = 0;
183
+ const minLen = Math.min(str1.length, str2.length);
184
+ for (let i = 0; i < minLen; i++) {
185
+ if (str1[i] === str2[i]) {
186
+ commonPrefixLen++;
187
+ } else {
188
+ break;
189
+ }
190
+ }
191
+
192
+ return commonPrefixLen / maxLen;
193
+ }
194
+
195
+ /**
196
+ * Detect low-signal properties in an object (URLs, highly repetitive data)
197
+ */
198
+ private detectLowSignalProperties(obj: any): { lowSignal: string[], metadata: Record<string, any> } {
199
+ if (!obj || typeof obj !== 'object' || Array.isArray(obj)) {
200
+ return { lowSignal: [], metadata: {} };
201
+ }
202
+
203
+ const lowSignal: string[] = [];
204
+ const metadata: Record<string, any> = {};
205
+ const entries = Object.entries(obj);
206
+
207
+ // Detect URL properties
208
+ const urlPattern = /^https?:\/\//;
209
+ const urlProps: string[] = [];
210
+
211
+ for (const [key, value] of entries) {
212
+ if (typeof value === 'string' && urlPattern.test(value)) {
213
+ urlProps.push(key);
214
+ }
215
+ }
216
+
217
+ // If multiple URL properties exist, check their similarity
218
+ if (urlProps.length >= 3) {
219
+ const urlValues = urlProps.map(key => obj[key] as string);
220
+ let totalSimilarity = 0;
221
+ let comparisons = 0;
222
+
223
+ for (let i = 0; i < urlValues.length - 1; i++) {
224
+ for (let j = i + 1; j < urlValues.length; j++) {
225
+ totalSimilarity += this.calculateSimilarity(urlValues[i], urlValues[j]);
226
+ comparisons++;
227
+ }
228
+ }
229
+
230
+ const avgSimilarity = comparisons > 0 ? totalSimilarity / comparisons : 0;
231
+
232
+ // If URLs are highly similar (>60% common prefix), consider them low signal
233
+ if (avgSimilarity > 0.6) {
234
+ lowSignal.push(...urlProps);
235
+ metadata.url_similarity = avgSimilarity;
236
+ metadata.url_count = urlProps.length;
237
+ }
238
+ }
239
+
240
+ // Detect properties ending with _url, _id, node_id, etc.
241
+ const lowSignalPatterns = [/_url$/, /_id$/, /^node_id$/, /^avatar_url$/, /^gravatar_id$/];
242
+ for (const [key, value] of entries) {
243
+ if (lowSignalPatterns.some(pattern => pattern.test(key)) && !lowSignal.includes(key)) {
244
+ lowSignal.push(key);
245
+ }
246
+ }
247
+
248
+ return { lowSignal, metadata };
249
+ }
250
+
251
+
252
+ /**
253
+ * Compress an object by extracting low-signal properties
254
+ */
255
+ compressObjectWithLowSignalDetection(obj: any, path: string = ""): any {
256
+ if (!obj || typeof obj !== 'object' || Array.isArray(obj)) {
257
+ return obj;
258
+ }
259
+
260
+ const { lowSignal, metadata } = this.detectLowSignalProperties(obj);
261
+
262
+ // Only compress if we have significant low-signal properties (at least 5)
263
+ if (lowSignal.length < 5) {
264
+ return obj;
265
+ }
266
+
267
+ const highSignal: any = {};
268
+ const compressed: any = {};
269
+
270
+ for (const [key, value] of Object.entries(obj)) {
271
+ if (lowSignal.includes(key)) {
272
+ compressed[key] = value;
273
+ } else {
274
+ highSignal[key] = value;
275
+ }
276
+ }
277
+
278
+ // Check if we've already compressed identical low-signal properties
279
+ const compressedHash = this.hashObject(compressed);
280
+ let compressedKey = this.deduplicationMap.get(compressedHash);
281
+
282
+ if (!compressedKey) {
283
+ // First time seeing these properties - store them
284
+ compressedKey = this.storage.generateKey();
285
+ this.deduplicationMap.set(compressedHash, compressedKey);
286
+
287
+ const compressionMetadata: CompressionMetadata = {
288
+ compressed_properties: compressed,
289
+ compression_reason: 'low_signal_detection',
290
+ similarity_score: metadata.url_similarity,
291
+ };
292
+ this.storage.storeString(compressedKey, JSON.stringify(compressionMetadata));
293
+ }
294
+
295
+ // If compressedKey already exists, we're reusing it from a duplicate object
296
+ // This significantly reduces storage when objects like "owner" repeat
297
+
298
+ // Deduplicate the property names array
299
+ const propertyNamesHash = this.hashObject(lowSignal);
300
+ const propertyNamesSeenCount = this.propertyNamesSeenCount.get(propertyNamesHash) || 0;
301
+ this.propertyNamesSeenCount.set(propertyNamesHash, propertyNamesSeenCount + 1);
302
+
303
+ let propertyNamesValue: string | any[] = lowSignal;
304
+
305
+ if (propertyNamesSeenCount === 0) {
306
+ // First occurrence - store it and return the full array
307
+ const propertyNamesKey = this.storage.generateKey();
308
+ this.propertyNamesMap.set(propertyNamesHash, propertyNamesKey);
309
+ this.storage.storeString(propertyNamesKey, JSON.stringify(lowSignal));
310
+ propertyNamesValue = lowSignal; // Return full array first time
311
+ } else if (propertyNamesSeenCount >= 1) {
312
+ // Subsequent occurrences - return a reference
313
+ const existingPropertyNamesKey = this.propertyNamesMap.get(propertyNamesHash);
314
+ propertyNamesValue = `[DEDUPLICATED_ARRAY]\nKey: ${existingPropertyNamesKey}`;
315
+ }
316
+
317
+ // Return high-signal properties with reference to compressed data
318
+ return {
319
+ ...highSignal,
320
+ _compressed_properties_key: compressedKey,
321
+ _compressed_property_names: propertyNamesValue,
322
+ _compression_info: `${lowSignal.length} low-signal properties compressed (URLs, IDs). Use expandTokens with key "${compressedKey}" to retrieve.`
323
+ };
324
+ }
325
+
326
+
327
+ /**
328
+ * Creates a stable hash of an object for deduplication
329
+ */
330
+ private hashObject(obj: any): string {
331
+ // Create a stable JSON representation for hashing
332
+ const normalized = JSON.stringify(obj, Object.keys(obj).sort());
333
+ // Simple hash function (for deduplication, not cryptographic security)
334
+ let hash = 0;
335
+ for (let i = 0; i < normalized.length; i++) {
336
+ const char = normalized.charCodeAt(i);
337
+ hash = ((hash << 5) - hash) + char;
338
+ hash = hash & hash; // Convert to 32bit integer
339
+ }
340
+ return hash.toString(36);
341
+ }
342
+
343
+ /**
344
+ * Compresses large properties within a JSON object using depth-first traversal.
345
+ * Implements an efficient backward-iterating chunking strategy for large arrays.
346
+ */
347
+ compressJsonProperties(obj: any, path: string = ""): any {
348
+ if (
349
+ path === "" &&
350
+ this.storage.estimateTokens(JSON.stringify(obj)) <= this.maxTokens
351
+ ) {
352
+ return obj;
353
+ }
354
+
355
+ if (Array.isArray(obj)) {
356
+ // Step 1: Recursively compress all items first (depth-first).
357
+ const processedItems = obj.map((item, index) =>
358
+ this.compressJsonProperties(item, `${path}[${index}]`)
359
+ );
360
+
361
+ // Step 2: Early exit if the whole array is already small enough.
362
+ // maxTokens allows us to fetch objects from the store without recompressing
363
+
364
+ // Step 3: Iterate backwards, building chunks from the end.
365
+ const finalArray: any[] = [];
366
+ let currentChunk: any[] = [];
367
+
368
+ for (let i = processedItems.length - 1; i >= 0; i--) {
369
+ const item = processedItems[i];
370
+ currentChunk.unshift(item); // Add item to the front of the current chunk
371
+
372
+ const chunkString = JSON.stringify(currentChunk);
373
+ const chunkTokens = this.storage.estimateTokens(chunkString);
374
+
375
+ if (chunkTokens > this.compressionThreshold) {
376
+ const key = this.storage.generateKey();
377
+ this.storage.storeString(key, chunkString);
378
+
379
+ const stub = `[COMPRESSED_JSON_ARRAY_CHUNK - ${chunkTokens} tokens, ${
380
+ currentChunk.length
381
+ } items]\nKey: ${key}\nPath: ${path}[${i}...${
382
+ i + currentChunk.length - 1
383
+ }]\nPreview: ${chunkString.substring(0, 100)}...\n[Use ${
384
+ this.toolName
385
+ } tool with key "${key}" to retrieve this chunk]`;
386
+ finalArray.unshift(stub); // Add stub to the start of our final result.
387
+
388
+ currentChunk = [];
389
+ }
390
+ }
391
+
392
+ // Step 4: After the loop, add any remaining items from the start of the
393
+ // array that did not form a full chunk.
394
+ if (currentChunk.length > 0) {
395
+ finalArray.unshift(...currentChunk);
396
+ }
397
+ return finalArray;
398
+ }
399
+
400
+
401
+ // Handle objects - try low-signal detection first, then process properties (depth-first)
402
+ if (obj && typeof obj === "object") {
403
+ // Check if this exact object (by original content) is a duplicate
404
+ const objHash = this.hashObject(obj);
405
+ const existingKey = this.deduplicationMap.get(objHash);
406
+
407
+ if (existingKey) {
408
+ // We've seen this exact object before and stored it
409
+ return `[DEDUPLICATED_OBJECT]\nKey: ${existingKey}\nPath: ${path}\n[Use ${this.toolName} tool with key "${existingKey}" to retrieve content]`;
410
+ }
411
+
412
+ // Track that we've seen this object (increment count)
413
+ const seenCount = this.objectSeenCount.get(objHash) || 0;
414
+ this.objectSeenCount.set(objHash, seenCount + 1);
415
+
416
+ // Store objects on FIRST occurrence so second occurrence can reference it
417
+ // We increment seenCount above, so after increment:
418
+ // seenCount=1: first occurrence (just incremented from 0 to 1), store it
419
+ // seenCount>=2: we already stored it on first occurrence, should be in dedup map
420
+ // Note: This means we store proactively - first occurrence gets stored AND returned in full
421
+ // Second+ occurrences will find it in the dedup map and return a reference
422
+ const isFirstOccurrence = seenCount === 1;
423
+
424
+ // Process the object - apply low-signal detection
425
+ const objWithLowSignalCompressed = this.compressObjectWithLowSignalDetection(obj, path);
426
+ const objToProcess = objWithLowSignalCompressed !== obj ? objWithLowSignalCompressed : obj;
427
+
428
+ const result: any = {};
429
+ for (const [key, value] of Object.entries(objToProcess)) {
430
+ const newPath = path ? `${path}.${key}` : key;
431
+ result[key] = this.compressJsonProperties(value, newPath);
432
+ }
433
+
434
+ // After processing children, check if the entire object should be compressed
435
+ const objectAsString = JSON.stringify(result);
436
+ const tokens = this.storage.estimateTokens(objectAsString);
437
+
438
+ // If this is the first occurrence of a potentially duplicated object, store it
439
+ if (isFirstOccurrence && tokens > 100) {
440
+ const key = this.storage.generateKey();
441
+ this.deduplicationMap.set(objHash, key);
442
+ this.storage.storeString(key, objectAsString);
443
+ // Return the object data this time, next occurrences will get a reference
444
+ return result;
445
+ }
446
+
447
+ // Check if object is large enough to compress as a whole
448
+ if (tokens > this.compressionThreshold) {
449
+ const key = this.storage.generateKey();
450
+ this.storage.storeString(key, objectAsString);
451
+
452
+ return `[COMPRESSED_JSON_OBJECT - ${tokens} tokens]\nKey: ${key}\nPath: ${path}\nKeys: ${Object.keys(
453
+ result
454
+ ).join(", ")}\nPreview: ${objectAsString.substring(0, 200)}...\n[Use ${
455
+ this.toolName
456
+ } tool with key "${key}" to retrieve full content]`;
457
+ }
458
+ return result;
459
+ }
460
+
461
+
462
+ // Handle primitive values (strings, numbers, booleans, null)
463
+ if (typeof obj === "string") {
464
+ // First, check if this string contains JSON that we can parse and compress more granularly
465
+ const parsedJson = this.tryParseJson(obj);
466
+ if (parsedJson) {
467
+ const compressedJson = this.compressJsonProperties(parsedJson, path);
468
+ const compressedJsonString = JSON.stringify(compressedJson, null, 2);
469
+
470
+ const originalTokens = this.storage.estimateTokens(obj);
471
+ const compressedTokens = this.storage.estimateTokens(compressedJsonString);
472
+
473
+ if (compressedTokens < originalTokens * 0.8) {
474
+ return compressedJsonString;
475
+ }
476
+ }
477
+
478
+ // If not JSON or compression wasn't effective, handle as regular string
479
+ const tokens = this.storage.estimateTokens(obj);
480
+ if (tokens > this.compressionThreshold) {
481
+ const key = this.storage.generateKey();
482
+ this.storage.storeString(key, obj);
483
+
484
+ return `[COMPRESSED_JSON_PROPERTY - ${tokens} tokens]\nKey: ${key}\nPath: ${path}\nPreview: ${obj.substring(
485
+ 0,
486
+ 200
487
+ )}...\n[Use ${
488
+ this.toolName
489
+ } tool with key "${key}" to retrieve full content]`;
490
+ }
491
+ return obj;
492
+ }
493
+
494
+ return obj;
495
+ }
496
+ }