@tyvm/knowhow 0.0.60 → 0.0.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agents/setup/setup.ts +9 -2
- package/src/agents/tools/textSearch.ts +4 -1
- package/src/chat/CliChatService.ts +5 -6
- package/src/chat/modules/SystemModule.ts +1 -1
- package/src/clients/anthropic.ts +12 -0
- package/src/config.ts +5 -0
- package/src/plugins/language.ts +4 -0
- package/src/processors/JsonCompressor.ts +496 -0
- package/src/processors/TokenCompressor.ts +194 -125
- package/src/processors/index.ts +1 -0
- package/src/services/Mcp.ts +1 -0
- package/src/services/Tools.ts +5 -3
- package/src/types.ts +1 -0
- package/src/utils/InputQueueManager.ts +119 -95
- package/tests/compressor/bigstring.test.ts +352 -2
- package/tests/compressor/githubjson.txt +1 -0
- package/ts_build/package.json +1 -1
- package/ts_build/src/agents/setup/setup.js +9 -2
- package/ts_build/src/agents/setup/setup.js.map +1 -1
- package/ts_build/src/agents/tools/textSearch.js +2 -1
- package/ts_build/src/agents/tools/textSearch.js.map +1 -1
- package/ts_build/src/chat/CliChatService.d.ts +1 -1
- package/ts_build/src/chat/CliChatService.js +6 -6
- package/ts_build/src/chat/CliChatService.js.map +1 -1
- package/ts_build/src/chat/modules/SystemModule.js +1 -1
- package/ts_build/src/chat/modules/SystemModule.js.map +1 -1
- package/ts_build/src/clients/anthropic.js +12 -0
- package/ts_build/src/clients/anthropic.js.map +1 -1
- package/ts_build/src/config.js +5 -0
- package/ts_build/src/config.js.map +1 -1
- package/ts_build/src/plugins/language.js +4 -0
- package/ts_build/src/plugins/language.js.map +1 -1
- package/ts_build/src/processors/JsonCompressor.d.ts +36 -0
- package/ts_build/src/processors/JsonCompressor.js +295 -0
- package/ts_build/src/processors/JsonCompressor.js.map +1 -0
- package/ts_build/src/processors/TokenCompressor.d.ts +23 -5
- package/ts_build/src/processors/TokenCompressor.js +106 -70
- package/ts_build/src/processors/TokenCompressor.js.map +1 -1
- package/ts_build/src/processors/index.d.ts +1 -0
- package/ts_build/src/processors/index.js +3 -1
- package/ts_build/src/processors/index.js.map +1 -1
- package/ts_build/src/services/Mcp.js.map +1 -1
- package/ts_build/src/services/Tools.js +1 -1
- package/ts_build/src/services/Tools.js.map +1 -1
- package/ts_build/src/types.d.ts +1 -0
- package/ts_build/src/types.js +1 -0
- package/ts_build/src/types.js.map +1 -1
- package/ts_build/src/utils/InputQueueManager.d.ts +4 -1
- package/ts_build/src/utils/InputQueueManager.js +93 -78
- package/ts_build/src/utils/InputQueueManager.js.map +1 -1
- package/ts_build/tests/compressor/bigstring.test.js +209 -0
- package/ts_build/tests/compressor/bigstring.test.js.map +1 -1
package/package.json
CHANGED
|
@@ -38,15 +38,22 @@ export class SetupAgent extends BaseAgent {
|
|
|
38
38
|
|
|
39
39
|
Always ask the user to approve what you're going to do to the config, that way you can get feedback via askHuman before modifying the config
|
|
40
40
|
|
|
41
|
+
After using askHuman and them providing their feedback of what you'd like to do, only follow what they say. We want to make the minimum set of changes to the config.
|
|
42
|
+
|
|
43
|
+
For codebase embeddings you don't want to use prompt, as that'd embed a transformation of the code, you want to embed the actual source, so don't use prompt.
|
|
44
|
+
For embeddings prompt would only be used for generating an embedding from transformed data, like if you wanted to summarize a transcript and make embeddings from the summary, then you'd use prompt on the embeddings, otherwise you should not need it.
|
|
45
|
+
|
|
41
46
|
When setting up the language plugin for a user you should come up with phrases they're likely to say, like frontend/backend/schema etc that will signal we should load in guides or rules for that type of task. You should put any of your rules/analses in .knowhow/docs and the language plugin should reference those.
|
|
42
47
|
|
|
48
|
+
The language plugin can only read in files, not directories, so do not add entries to language plugin unless you've first written some markdown files to load in as guidance. The files loaded by the language plugin should give quick tips to any unusual things about the project, commands that should be run to rebuild any auto-generated code, quirks about codebase behavior etc.
|
|
49
|
+
|
|
43
50
|
If a user is vauge about setting up, you should give them some options of what all you could help them setup with a brief explanation of what those setups would enable.
|
|
44
51
|
|
|
45
52
|
Only suggest embeddings that include a folder path with many elements, ie src/**/*.ts, never suggest entries with one element
|
|
46
53
|
|
|
47
|
-
If a user is requesting help with setting up a coding project, you can look at their package.json to setup the lintCommands so that we get feedback on file edits, and embeddings for the source code as those two features are the highest impact
|
|
54
|
+
If a user is requesting help with setting up a coding project, you can look at their package.json, or language specific config to setup the lintCommands so that we get feedback on file edits, and embeddings for the source code as those two features are the highest impact
|
|
48
55
|
|
|
49
|
-
If the user just says setup fast, try to get a general idea of the project file structure and setup one source code embedding for the whole
|
|
56
|
+
If the user just says setup fast, try to get a general idea of the project file structure and setup one source code embedding for the whole codebase and linter commands if possible. Try not do dig too deep if they want fast, just get the highest impact features setup
|
|
50
57
|
|
|
51
58
|
`,
|
|
52
59
|
},
|
|
@@ -3,7 +3,10 @@ import { execCommand } from "./execCommand";
|
|
|
3
3
|
|
|
4
4
|
export async function textSearch(searchTerm) {
|
|
5
5
|
try {
|
|
6
|
-
|
|
6
|
+
// Escape the search term for safe shell usage
|
|
7
|
+
// Replace single quotes with '\'' which closes quote, adds escaped quote, reopens quote
|
|
8
|
+
const escapedTerm = searchTerm.replace(/'/g, "'\\''");
|
|
9
|
+
const command = `ag -m 3 -Q '${escapedTerm}'`;
|
|
7
10
|
const output = await execCommand(command);
|
|
8
11
|
return output;
|
|
9
12
|
} catch (err) {
|
|
@@ -61,8 +61,8 @@ export class CliChatService implements ChatService {
|
|
|
61
61
|
try {
|
|
62
62
|
if (fs.existsSync(this.historyFile)) {
|
|
63
63
|
const historyData = fs.readFileSync(this.historyFile, "utf8");
|
|
64
|
-
const
|
|
65
|
-
this.inputHistory =
|
|
64
|
+
const parsedHistory: ChatHistory = JSON.parse(historyData);
|
|
65
|
+
this.inputHistory = parsedHistory.inputs || [];
|
|
66
66
|
}
|
|
67
67
|
} catch (error) {
|
|
68
68
|
console.error("Error loading input history:", error);
|
|
@@ -81,11 +81,11 @@ export class CliChatService implements ChatService {
|
|
|
81
81
|
fs.mkdirSync(dir, { recursive: true });
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
-
const
|
|
84
|
+
const inputHistory: ChatHistory = {
|
|
85
85
|
inputs: this.inputHistory,
|
|
86
86
|
};
|
|
87
87
|
|
|
88
|
-
fs.writeFileSync(this.historyFile, JSON.stringify(
|
|
88
|
+
fs.writeFileSync(this.historyFile, JSON.stringify(inputHistory, null, 2));
|
|
89
89
|
} catch (error) {
|
|
90
90
|
console.error("Error saving input history:", error);
|
|
91
91
|
}
|
|
@@ -154,6 +154,7 @@ export class CliChatService implements ChatService {
|
|
|
154
154
|
|
|
155
155
|
async processInput(input: string): Promise<boolean> {
|
|
156
156
|
// Note: Input is added to history via setOnNewHistoryEntry callback when user presses Enter
|
|
157
|
+
// Note: this actually sends all commands to modules, first to service takes it
|
|
157
158
|
|
|
158
159
|
// Check if input is a command
|
|
159
160
|
if (input.startsWith("/")) {
|
|
@@ -198,7 +199,6 @@ export class CliChatService implements ChatService {
|
|
|
198
199
|
async getInput(
|
|
199
200
|
prompt: string = "> ",
|
|
200
201
|
options: string[] = [],
|
|
201
|
-
chatHistory: any[] = []
|
|
202
202
|
): Promise<string> {
|
|
203
203
|
if (this.context.inputMethod) {
|
|
204
204
|
return await this.context.inputMethod.getInput(prompt);
|
|
@@ -277,7 +277,6 @@ export class CliChatService implements ChatService {
|
|
|
277
277
|
const input = await this.getInput(
|
|
278
278
|
promptText,
|
|
279
279
|
commandNames,
|
|
280
|
-
this.chatHistory
|
|
281
280
|
);
|
|
282
281
|
|
|
283
282
|
if (input.trim() === "") {
|
package/src/clients/anthropic.ts
CHANGED
|
@@ -325,12 +325,24 @@ export class GenericAnthropicClient implements GenericClient {
|
|
|
325
325
|
|
|
326
326
|
pricesPerMillion() {
|
|
327
327
|
return {
|
|
328
|
+
[Models.anthropic.Opus4_6]: {
|
|
329
|
+
input: 5.0,
|
|
330
|
+
cache_write: 6.25,
|
|
331
|
+
cache_hit: 0.5,
|
|
332
|
+
output: 25.0,
|
|
333
|
+
},
|
|
328
334
|
[Models.anthropic.Opus4_5]: {
|
|
329
335
|
input: 5.0,
|
|
330
336
|
cache_write: 6.25,
|
|
331
337
|
cache_hit: 0.5,
|
|
332
338
|
output: 25.0,
|
|
333
339
|
},
|
|
340
|
+
[Models.anthropic.Opus4_1]: {
|
|
341
|
+
input: 15.0,
|
|
342
|
+
cache_write: 18.75,
|
|
343
|
+
cache_hit: 1.5,
|
|
344
|
+
output: 75.0,
|
|
345
|
+
},
|
|
334
346
|
[Models.anthropic.Opus4]: {
|
|
335
347
|
input: 15.0,
|
|
336
348
|
cache_write: 18.75,
|
package/src/config.ts
CHANGED
|
@@ -52,6 +52,11 @@ const defaultConfig = {
|
|
|
52
52
|
prompt: "BasicEmbeddingExplainer",
|
|
53
53
|
chunkSize: 2000,
|
|
54
54
|
},
|
|
55
|
+
{
|
|
56
|
+
input: "src/**/*.ts",
|
|
57
|
+
output: ".knowhow/embeddings/code.json",
|
|
58
|
+
chunkSize: 2000,
|
|
59
|
+
},
|
|
55
60
|
],
|
|
56
61
|
embeddingModel: EmbeddingModels.openai.EmbeddingAda2,
|
|
57
62
|
|
package/src/plugins/language.ts
CHANGED
|
@@ -96,6 +96,10 @@ export class LanguagePlugin extends PluginBase implements Plugin {
|
|
|
96
96
|
if (!exists) {
|
|
97
97
|
return { filePath, content: `File ${filePath} does not exist` };
|
|
98
98
|
}
|
|
99
|
+
const stat = await fileStat(filePath);
|
|
100
|
+
if (stat.isDirectory()) {
|
|
101
|
+
throw new Error(`Cannot read directories: ${filePath}`);
|
|
102
|
+
}
|
|
99
103
|
const content = (await readFile(filePath, "utf8")).toString();
|
|
100
104
|
return { filePath, content };
|
|
101
105
|
})
|
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema information for compressed JSON
|
|
3
|
+
*/
|
|
4
|
+
export interface JsonSchema {
|
|
5
|
+
type: string;
|
|
6
|
+
properties?: Record<string, JsonSchema>;
|
|
7
|
+
items?: JsonSchema;
|
|
8
|
+
compressed_properties?: string[];
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Metadata about compressed properties
|
|
13
|
+
*/
|
|
14
|
+
export interface CompressionMetadata {
|
|
15
|
+
compressed_properties: Record<string, any>;
|
|
16
|
+
compression_reason: string;
|
|
17
|
+
similarity_score?: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Interface for storage operations
|
|
22
|
+
*/
|
|
23
|
+
export interface JsonCompressorStorage {
|
|
24
|
+
storeString(key: string, value: string): void;
|
|
25
|
+
generateKey(): string;
|
|
26
|
+
estimateTokens(text: string): number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Handles JSON-specific compression logic including schema generation,
|
|
31
|
+
* low-signal property detection, and deduplication.
|
|
32
|
+
*/
|
|
33
|
+
export class JsonCompressor {
|
|
34
|
+
// Deduplication tracking
|
|
35
|
+
private deduplicationMap: Map<string, string> = new Map();
|
|
36
|
+
private objectSeenCount: Map<string, number> = new Map();
|
|
37
|
+
private propertyNamesMap: Map<string, string> = new Map();
|
|
38
|
+
private propertyNamesSeenCount: Map<string, number> = new Map();
|
|
39
|
+
|
|
40
|
+
private compressionThreshold: number;
|
|
41
|
+
private maxTokens: number;
|
|
42
|
+
private toolName: string;
|
|
43
|
+
private storage: JsonCompressorStorage;
|
|
44
|
+
|
|
45
|
+
constructor(
|
|
46
|
+
storage: JsonCompressorStorage,
|
|
47
|
+
compressionThreshold: number,
|
|
48
|
+
maxTokens: number,
|
|
49
|
+
toolName: string
|
|
50
|
+
) {
|
|
51
|
+
this.storage = storage;
|
|
52
|
+
this.compressionThreshold = compressionThreshold;
|
|
53
|
+
this.maxTokens = maxTokens;
|
|
54
|
+
this.toolName = toolName;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Clear all deduplication tracking
|
|
60
|
+
*/
|
|
61
|
+
clearDeduplication(): void {
|
|
62
|
+
this.deduplicationMap.clear();
|
|
63
|
+
this.objectSeenCount.clear();
|
|
64
|
+
this.propertyNamesMap.clear();
|
|
65
|
+
this.propertyNamesSeenCount.clear();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Update compression settings
|
|
70
|
+
*/
|
|
71
|
+
updateSettings(compressionThreshold: number, maxTokens: number): void {
|
|
72
|
+
this.compressionThreshold = compressionThreshold;
|
|
73
|
+
this.maxTokens = maxTokens;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Attempts to parse content as JSON and returns parsed object if successful.
|
|
78
|
+
* Also handles MCP tool response format where actual data is in content[0].text
|
|
79
|
+
*/
|
|
80
|
+
tryParseJson(content: string): any | null {
|
|
81
|
+
try {
|
|
82
|
+
const parsed = JSON.parse(content);
|
|
83
|
+
|
|
84
|
+
// If the parsed result is a string, try parsing it again (double-encoded JSON)
|
|
85
|
+
if (typeof parsed === 'string') {
|
|
86
|
+
try {
|
|
87
|
+
return this.tryParseJson(parsed); // Recursive call to handle nested stringified JSON
|
|
88
|
+
} catch (e) {
|
|
89
|
+
return parsed; // If second parse fails, return the string
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Check if this is an MCP tool response format
|
|
94
|
+
if (parsed &&
|
|
95
|
+
typeof parsed === 'object' &&
|
|
96
|
+
Array.isArray(parsed.content) &&
|
|
97
|
+
parsed.content.length > 0) {
|
|
98
|
+
|
|
99
|
+
const firstContent = parsed.content[0];
|
|
100
|
+
|
|
101
|
+
// Check if it has type: "text" and a text field
|
|
102
|
+
if (firstContent.type === 'text' && typeof firstContent.text === 'string') {
|
|
103
|
+
try {
|
|
104
|
+
// Try to parse the nested text as JSON
|
|
105
|
+
const nestedData = JSON.parse(firstContent.text);
|
|
106
|
+
|
|
107
|
+
// Return a structured object that preserves the MCP format but exposes the data
|
|
108
|
+
return {
|
|
109
|
+
_mcp_format: true,
|
|
110
|
+
_raw_structure: { content: [{ type: 'text' }] },
|
|
111
|
+
data: nestedData
|
|
112
|
+
};
|
|
113
|
+
} catch (e) {
|
|
114
|
+
// If nested text isn't JSON, return original parsed
|
|
115
|
+
return parsed;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return parsed;
|
|
121
|
+
} catch {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Generate a JSON schema from an object
|
|
129
|
+
*/
|
|
130
|
+
public generateSchema(obj: any, maxDepth: number = 3, currentDepth: number = 0): JsonSchema {
|
|
131
|
+
if (currentDepth > maxDepth) {
|
|
132
|
+
return { type: 'any' };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Handle MCP format objects
|
|
136
|
+
if (obj && typeof obj === 'object' && obj._mcp_format === true && obj.data) {
|
|
137
|
+
// Generate schema for the actual data, not the wrapper
|
|
138
|
+
const dataSchema = this.generateSchema(obj.data, maxDepth, currentDepth);
|
|
139
|
+
return {
|
|
140
|
+
type: 'mcp_response',
|
|
141
|
+
properties: {
|
|
142
|
+
data: dataSchema
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (obj === null) {
|
|
148
|
+
return { type: 'null' };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (Array.isArray(obj)) {
|
|
152
|
+
if (obj.length === 0) {
|
|
153
|
+
return { type: 'array', items: { type: 'unknown' } };
|
|
154
|
+
}
|
|
155
|
+
// Sample first few items to infer schema
|
|
156
|
+
const sample = obj.slice(0, 3);
|
|
157
|
+
const itemSchemas = sample.map(item => this.generateSchema(item, maxDepth, currentDepth + 1));
|
|
158
|
+
// Use first item's schema as representative
|
|
159
|
+
return { type: 'array', items: itemSchemas[0] };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (typeof obj === 'object') {
|
|
163
|
+
const properties: Record<string, JsonSchema> = {};
|
|
164
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
165
|
+
properties[key] = this.generateSchema(value, maxDepth, currentDepth + 1);
|
|
166
|
+
}
|
|
167
|
+
return { type: 'object', properties };
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return { type: typeof obj };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Calculate similarity between two strings (simple prefix-based)
|
|
176
|
+
*/
|
|
177
|
+
private calculateSimilarity(str1: string, str2: string): number {
|
|
178
|
+
const maxLen = Math.max(str1.length, str2.length);
|
|
179
|
+
if (maxLen === 0) return 1.0;
|
|
180
|
+
|
|
181
|
+
// Simple prefix similarity for URLs and similar strings
|
|
182
|
+
let commonPrefixLen = 0;
|
|
183
|
+
const minLen = Math.min(str1.length, str2.length);
|
|
184
|
+
for (let i = 0; i < minLen; i++) {
|
|
185
|
+
if (str1[i] === str2[i]) {
|
|
186
|
+
commonPrefixLen++;
|
|
187
|
+
} else {
|
|
188
|
+
break;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return commonPrefixLen / maxLen;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Detect low-signal properties in an object (URLs, highly repetitive data)
|
|
197
|
+
*/
|
|
198
|
+
private detectLowSignalProperties(obj: any): { lowSignal: string[], metadata: Record<string, any> } {
|
|
199
|
+
if (!obj || typeof obj !== 'object' || Array.isArray(obj)) {
|
|
200
|
+
return { lowSignal: [], metadata: {} };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const lowSignal: string[] = [];
|
|
204
|
+
const metadata: Record<string, any> = {};
|
|
205
|
+
const entries = Object.entries(obj);
|
|
206
|
+
|
|
207
|
+
// Detect URL properties
|
|
208
|
+
const urlPattern = /^https?:\/\//;
|
|
209
|
+
const urlProps: string[] = [];
|
|
210
|
+
|
|
211
|
+
for (const [key, value] of entries) {
|
|
212
|
+
if (typeof value === 'string' && urlPattern.test(value)) {
|
|
213
|
+
urlProps.push(key);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// If multiple URL properties exist, check their similarity
|
|
218
|
+
if (urlProps.length >= 3) {
|
|
219
|
+
const urlValues = urlProps.map(key => obj[key] as string);
|
|
220
|
+
let totalSimilarity = 0;
|
|
221
|
+
let comparisons = 0;
|
|
222
|
+
|
|
223
|
+
for (let i = 0; i < urlValues.length - 1; i++) {
|
|
224
|
+
for (let j = i + 1; j < urlValues.length; j++) {
|
|
225
|
+
totalSimilarity += this.calculateSimilarity(urlValues[i], urlValues[j]);
|
|
226
|
+
comparisons++;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const avgSimilarity = comparisons > 0 ? totalSimilarity / comparisons : 0;
|
|
231
|
+
|
|
232
|
+
// If URLs are highly similar (>60% common prefix), consider them low signal
|
|
233
|
+
if (avgSimilarity > 0.6) {
|
|
234
|
+
lowSignal.push(...urlProps);
|
|
235
|
+
metadata.url_similarity = avgSimilarity;
|
|
236
|
+
metadata.url_count = urlProps.length;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Detect properties ending with _url, _id, node_id, etc.
|
|
241
|
+
const lowSignalPatterns = [/_url$/, /_id$/, /^node_id$/, /^avatar_url$/, /^gravatar_id$/];
|
|
242
|
+
for (const [key, value] of entries) {
|
|
243
|
+
if (lowSignalPatterns.some(pattern => pattern.test(key)) && !lowSignal.includes(key)) {
|
|
244
|
+
lowSignal.push(key);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return { lowSignal, metadata };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Compress an object by extracting low-signal properties
|
|
254
|
+
*/
|
|
255
|
+
compressObjectWithLowSignalDetection(obj: any, path: string = ""): any {
|
|
256
|
+
if (!obj || typeof obj !== 'object' || Array.isArray(obj)) {
|
|
257
|
+
return obj;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const { lowSignal, metadata } = this.detectLowSignalProperties(obj);
|
|
261
|
+
|
|
262
|
+
// Only compress if we have significant low-signal properties (at least 5)
|
|
263
|
+
if (lowSignal.length < 5) {
|
|
264
|
+
return obj;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const highSignal: any = {};
|
|
268
|
+
const compressed: any = {};
|
|
269
|
+
|
|
270
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
271
|
+
if (lowSignal.includes(key)) {
|
|
272
|
+
compressed[key] = value;
|
|
273
|
+
} else {
|
|
274
|
+
highSignal[key] = value;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Check if we've already compressed identical low-signal properties
|
|
279
|
+
const compressedHash = this.hashObject(compressed);
|
|
280
|
+
let compressedKey = this.deduplicationMap.get(compressedHash);
|
|
281
|
+
|
|
282
|
+
if (!compressedKey) {
|
|
283
|
+
// First time seeing these properties - store them
|
|
284
|
+
compressedKey = this.storage.generateKey();
|
|
285
|
+
this.deduplicationMap.set(compressedHash, compressedKey);
|
|
286
|
+
|
|
287
|
+
const compressionMetadata: CompressionMetadata = {
|
|
288
|
+
compressed_properties: compressed,
|
|
289
|
+
compression_reason: 'low_signal_detection',
|
|
290
|
+
similarity_score: metadata.url_similarity,
|
|
291
|
+
};
|
|
292
|
+
this.storage.storeString(compressedKey, JSON.stringify(compressionMetadata));
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// If compressedKey already exists, we're reusing it from a duplicate object
|
|
296
|
+
// This significantly reduces storage when objects like "owner" repeat
|
|
297
|
+
|
|
298
|
+
// Deduplicate the property names array
|
|
299
|
+
const propertyNamesHash = this.hashObject(lowSignal);
|
|
300
|
+
const propertyNamesSeenCount = this.propertyNamesSeenCount.get(propertyNamesHash) || 0;
|
|
301
|
+
this.propertyNamesSeenCount.set(propertyNamesHash, propertyNamesSeenCount + 1);
|
|
302
|
+
|
|
303
|
+
let propertyNamesValue: string | any[] = lowSignal;
|
|
304
|
+
|
|
305
|
+
if (propertyNamesSeenCount === 0) {
|
|
306
|
+
// First occurrence - store it and return the full array
|
|
307
|
+
const propertyNamesKey = this.storage.generateKey();
|
|
308
|
+
this.propertyNamesMap.set(propertyNamesHash, propertyNamesKey);
|
|
309
|
+
this.storage.storeString(propertyNamesKey, JSON.stringify(lowSignal));
|
|
310
|
+
propertyNamesValue = lowSignal; // Return full array first time
|
|
311
|
+
} else if (propertyNamesSeenCount >= 1) {
|
|
312
|
+
// Subsequent occurrences - return a reference
|
|
313
|
+
const existingPropertyNamesKey = this.propertyNamesMap.get(propertyNamesHash);
|
|
314
|
+
propertyNamesValue = `[DEDUPLICATED_ARRAY]\nKey: ${existingPropertyNamesKey}`;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Return high-signal properties with reference to compressed data
|
|
318
|
+
return {
|
|
319
|
+
...highSignal,
|
|
320
|
+
_compressed_properties_key: compressedKey,
|
|
321
|
+
_compressed_property_names: propertyNamesValue,
|
|
322
|
+
_compression_info: `${lowSignal.length} low-signal properties compressed (URLs, IDs). Use expandTokens with key "${compressedKey}" to retrieve.`
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Creates a stable hash of an object for deduplication
|
|
329
|
+
*/
|
|
330
|
+
private hashObject(obj: any): string {
|
|
331
|
+
// Create a stable JSON representation for hashing
|
|
332
|
+
const normalized = JSON.stringify(obj, Object.keys(obj).sort());
|
|
333
|
+
// Simple hash function (for deduplication, not cryptographic security)
|
|
334
|
+
let hash = 0;
|
|
335
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
336
|
+
const char = normalized.charCodeAt(i);
|
|
337
|
+
hash = ((hash << 5) - hash) + char;
|
|
338
|
+
hash = hash & hash; // Convert to 32bit integer
|
|
339
|
+
}
|
|
340
|
+
return hash.toString(36);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/**
|
|
344
|
+
* Compresses large properties within a JSON object using depth-first traversal.
|
|
345
|
+
* Implements an efficient backward-iterating chunking strategy for large arrays.
|
|
346
|
+
*/
|
|
347
|
+
compressJsonProperties(obj: any, path: string = ""): any {
|
|
348
|
+
if (
|
|
349
|
+
path === "" &&
|
|
350
|
+
this.storage.estimateTokens(JSON.stringify(obj)) <= this.maxTokens
|
|
351
|
+
) {
|
|
352
|
+
return obj;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
if (Array.isArray(obj)) {
|
|
356
|
+
// Step 1: Recursively compress all items first (depth-first).
|
|
357
|
+
const processedItems = obj.map((item, index) =>
|
|
358
|
+
this.compressJsonProperties(item, `${path}[${index}]`)
|
|
359
|
+
);
|
|
360
|
+
|
|
361
|
+
// Step 2: Early exit if the whole array is already small enough.
|
|
362
|
+
// maxTokens allows us to fetch objects from the store without recompressing
|
|
363
|
+
|
|
364
|
+
// Step 3: Iterate backwards, building chunks from the end.
|
|
365
|
+
const finalArray: any[] = [];
|
|
366
|
+
let currentChunk: any[] = [];
|
|
367
|
+
|
|
368
|
+
for (let i = processedItems.length - 1; i >= 0; i--) {
|
|
369
|
+
const item = processedItems[i];
|
|
370
|
+
currentChunk.unshift(item); // Add item to the front of the current chunk
|
|
371
|
+
|
|
372
|
+
const chunkString = JSON.stringify(currentChunk);
|
|
373
|
+
const chunkTokens = this.storage.estimateTokens(chunkString);
|
|
374
|
+
|
|
375
|
+
if (chunkTokens > this.compressionThreshold) {
|
|
376
|
+
const key = this.storage.generateKey();
|
|
377
|
+
this.storage.storeString(key, chunkString);
|
|
378
|
+
|
|
379
|
+
const stub = `[COMPRESSED_JSON_ARRAY_CHUNK - ${chunkTokens} tokens, ${
|
|
380
|
+
currentChunk.length
|
|
381
|
+
} items]\nKey: ${key}\nPath: ${path}[${i}...${
|
|
382
|
+
i + currentChunk.length - 1
|
|
383
|
+
}]\nPreview: ${chunkString.substring(0, 100)}...\n[Use ${
|
|
384
|
+
this.toolName
|
|
385
|
+
} tool with key "${key}" to retrieve this chunk]`;
|
|
386
|
+
finalArray.unshift(stub); // Add stub to the start of our final result.
|
|
387
|
+
|
|
388
|
+
currentChunk = [];
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Step 4: After the loop, add any remaining items from the start of the
|
|
393
|
+
// array that did not form a full chunk.
|
|
394
|
+
if (currentChunk.length > 0) {
|
|
395
|
+
finalArray.unshift(...currentChunk);
|
|
396
|
+
}
|
|
397
|
+
return finalArray;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
// Handle objects - try low-signal detection first, then process properties (depth-first)
|
|
402
|
+
if (obj && typeof obj === "object") {
|
|
403
|
+
// Check if this exact object (by original content) is a duplicate
|
|
404
|
+
const objHash = this.hashObject(obj);
|
|
405
|
+
const existingKey = this.deduplicationMap.get(objHash);
|
|
406
|
+
|
|
407
|
+
if (existingKey) {
|
|
408
|
+
// We've seen this exact object before and stored it
|
|
409
|
+
return `[DEDUPLICATED_OBJECT]\nKey: ${existingKey}\nPath: ${path}\n[Use ${this.toolName} tool with key "${existingKey}" to retrieve content]`;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Track that we've seen this object (increment count)
|
|
413
|
+
const seenCount = this.objectSeenCount.get(objHash) || 0;
|
|
414
|
+
this.objectSeenCount.set(objHash, seenCount + 1);
|
|
415
|
+
|
|
416
|
+
// Store objects on FIRST occurrence so second occurrence can reference it
|
|
417
|
+
// We increment seenCount above, so after increment:
|
|
418
|
+
// seenCount=1: first occurrence (just incremented from 0 to 1), store it
|
|
419
|
+
// seenCount>=2: we already stored it on first occurrence, should be in dedup map
|
|
420
|
+
// Note: This means we store proactively - first occurrence gets stored AND returned in full
|
|
421
|
+
// Second+ occurrences will find it in the dedup map and return a reference
|
|
422
|
+
const isFirstOccurrence = seenCount === 1;
|
|
423
|
+
|
|
424
|
+
// Process the object - apply low-signal detection
|
|
425
|
+
const objWithLowSignalCompressed = this.compressObjectWithLowSignalDetection(obj, path);
|
|
426
|
+
const objToProcess = objWithLowSignalCompressed !== obj ? objWithLowSignalCompressed : obj;
|
|
427
|
+
|
|
428
|
+
const result: any = {};
|
|
429
|
+
for (const [key, value] of Object.entries(objToProcess)) {
|
|
430
|
+
const newPath = path ? `${path}.${key}` : key;
|
|
431
|
+
result[key] = this.compressJsonProperties(value, newPath);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// After processing children, check if the entire object should be compressed
|
|
435
|
+
const objectAsString = JSON.stringify(result);
|
|
436
|
+
const tokens = this.storage.estimateTokens(objectAsString);
|
|
437
|
+
|
|
438
|
+
// If this is the first occurrence of a potentially duplicated object, store it
|
|
439
|
+
if (isFirstOccurrence && tokens > 100) {
|
|
440
|
+
const key = this.storage.generateKey();
|
|
441
|
+
this.deduplicationMap.set(objHash, key);
|
|
442
|
+
this.storage.storeString(key, objectAsString);
|
|
443
|
+
// Return the object data this time, next occurrences will get a reference
|
|
444
|
+
return result;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Check if object is large enough to compress as a whole
|
|
448
|
+
if (tokens > this.compressionThreshold) {
|
|
449
|
+
const key = this.storage.generateKey();
|
|
450
|
+
this.storage.storeString(key, objectAsString);
|
|
451
|
+
|
|
452
|
+
return `[COMPRESSED_JSON_OBJECT - ${tokens} tokens]\nKey: ${key}\nPath: ${path}\nKeys: ${Object.keys(
|
|
453
|
+
result
|
|
454
|
+
).join(", ")}\nPreview: ${objectAsString.substring(0, 200)}...\n[Use ${
|
|
455
|
+
this.toolName
|
|
456
|
+
} tool with key "${key}" to retrieve full content]`;
|
|
457
|
+
}
|
|
458
|
+
return result;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
// Handle primitive values (strings, numbers, booleans, null)
|
|
463
|
+
if (typeof obj === "string") {
|
|
464
|
+
// First, check if this string contains JSON that we can parse and compress more granularly
|
|
465
|
+
const parsedJson = this.tryParseJson(obj);
|
|
466
|
+
if (parsedJson) {
|
|
467
|
+
const compressedJson = this.compressJsonProperties(parsedJson, path);
|
|
468
|
+
const compressedJsonString = JSON.stringify(compressedJson, null, 2);
|
|
469
|
+
|
|
470
|
+
const originalTokens = this.storage.estimateTokens(obj);
|
|
471
|
+
const compressedTokens = this.storage.estimateTokens(compressedJsonString);
|
|
472
|
+
|
|
473
|
+
if (compressedTokens < originalTokens * 0.8) {
|
|
474
|
+
return compressedJsonString;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// If not JSON or compression wasn't effective, handle as regular string
|
|
479
|
+
const tokens = this.storage.estimateTokens(obj);
|
|
480
|
+
if (tokens > this.compressionThreshold) {
|
|
481
|
+
const key = this.storage.generateKey();
|
|
482
|
+
this.storage.storeString(key, obj);
|
|
483
|
+
|
|
484
|
+
return `[COMPRESSED_JSON_PROPERTY - ${tokens} tokens]\nKey: ${key}\nPath: ${path}\nPreview: ${obj.substring(
|
|
485
|
+
0,
|
|
486
|
+
200
|
|
487
|
+
)}...\n[Use ${
|
|
488
|
+
this.toolName
|
|
489
|
+
} tool with key "${key}" to retrieve full content]`;
|
|
490
|
+
}
|
|
491
|
+
return obj;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
return obj;
|
|
495
|
+
}
|
|
496
|
+
}
|