@thispointon/kondi-chat 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +556 -0
- package/bin/kondi-chat +56 -0
- package/bin/kondi-chat.js +72 -0
- package/package.json +55 -0
- package/scripts/demo.tape +49 -0
- package/scripts/postinstall.cjs +103 -0
- package/src/audit/analytics.ts +261 -0
- package/src/audit/ledger.ts +253 -0
- package/src/audit/telemetry.ts +165 -0
- package/src/cli/backend.ts +675 -0
- package/src/cli/commands.ts +419 -0
- package/src/cli/help.ts +182 -0
- package/src/cli/submit-helpers.ts +159 -0
- package/src/cli/submit.ts +539 -0
- package/src/cli/wizard.ts +121 -0
- package/src/context/bootstrap.ts +138 -0
- package/src/context/budget.ts +100 -0
- package/src/context/manager.ts +666 -0
- package/src/context/memory.ts +160 -0
- package/src/context/preflight.ts +176 -0
- package/src/context/project-brain.ts +101 -0
- package/src/context/receipts.ts +108 -0
- package/src/context/skills.ts +154 -0
- package/src/context/symbol-index.ts +240 -0
- package/src/council/profiles.ts +137 -0
- package/src/council/tool.ts +138 -0
- package/src/council-engine/cli/council-artifacts.ts +230 -0
- package/src/council-engine/cli/council-config.ts +178 -0
- package/src/council-engine/cli/council-session-export.ts +116 -0
- package/src/council-engine/cli/kondi.ts +98 -0
- package/src/council-engine/cli/llm-caller.ts +229 -0
- package/src/council-engine/cli/localStorage-shim.ts +119 -0
- package/src/council-engine/cli/node-platform.ts +68 -0
- package/src/council-engine/cli/run-council.ts +481 -0
- package/src/council-engine/cli/run-pipeline.ts +772 -0
- package/src/council-engine/cli/session-export.ts +153 -0
- package/src/council-engine/configs/councils/analysis.json +101 -0
- package/src/council-engine/configs/councils/code-planning.json +86 -0
- package/src/council-engine/configs/councils/coding.json +89 -0
- package/src/council-engine/configs/councils/debate.json +97 -0
- package/src/council-engine/configs/councils/solo-claude.json +34 -0
- package/src/council-engine/configs/councils/solo-gpt.json +34 -0
- package/src/council-engine/council/coding-orchestrator.ts +1205 -0
- package/src/council-engine/council/context-bootstrap.ts +147 -0
- package/src/council-engine/council/context-inspection.ts +42 -0
- package/src/council-engine/council/context-store.ts +763 -0
- package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
- package/src/council-engine/council/factory.ts +164 -0
- package/src/council-engine/council/index.ts +201 -0
- package/src/council-engine/council/ledger-store.ts +438 -0
- package/src/council-engine/council/prompts.ts +1689 -0
- package/src/council-engine/council/storage-cleanup.ts +164 -0
- package/src/council-engine/council/store.ts +1110 -0
- package/src/council-engine/council/synthesis.ts +291 -0
- package/src/council-engine/council/types.ts +845 -0
- package/src/council-engine/council/validation.ts +613 -0
- package/src/council-engine/pipeline/build-detect.ts +73 -0
- package/src/council-engine/pipeline/executor.ts +1048 -0
- package/src/council-engine/pipeline/index.ts +9 -0
- package/src/council-engine/pipeline/install-detect.ts +84 -0
- package/src/council-engine/pipeline/memory-store.ts +182 -0
- package/src/council-engine/pipeline/output-parsers.ts +146 -0
- package/src/council-engine/pipeline/run-output.ts +149 -0
- package/src/council-engine/pipeline/session-import.ts +177 -0
- package/src/council-engine/pipeline/store.ts +753 -0
- package/src/council-engine/pipeline/test-detect.ts +82 -0
- package/src/council-engine/pipeline/types.ts +401 -0
- package/src/council-engine/services/deliberationSummary.ts +114 -0
- package/src/council-engine/tsconfig.json +16 -0
- package/src/council-engine/types/mcp.ts +122 -0
- package/src/council-engine/utils/filterTools.ts +73 -0
- package/src/engine/apply.ts +238 -0
- package/src/engine/checkpoints.ts +237 -0
- package/src/engine/consultants.ts +347 -0
- package/src/engine/diff.ts +171 -0
- package/src/engine/errors.ts +102 -0
- package/src/engine/git-tools.ts +246 -0
- package/src/engine/hooks.ts +181 -0
- package/src/engine/loop-guard.ts +155 -0
- package/src/engine/permissions.ts +293 -0
- package/src/engine/pipeline.ts +376 -0
- package/src/engine/sub-agents.ts +133 -0
- package/src/engine/task-card.ts +185 -0
- package/src/engine/task-router.ts +256 -0
- package/src/engine/task-store.ts +86 -0
- package/src/engine/tools.ts +783 -0
- package/src/engine/verify.ts +111 -0
- package/src/mcp/client.ts +225 -0
- package/src/mcp/config.ts +120 -0
- package/src/mcp/tool-manager.ts +192 -0
- package/src/mcp/types.ts +61 -0
- package/src/providers/llm-caller.ts +943 -0
- package/src/providers/rate-limiter.ts +238 -0
- package/src/router/NOTES.md +28 -0
- package/src/router/collector.ts +474 -0
- package/src/router/embeddings.ts +286 -0
- package/src/router/index.ts +299 -0
- package/src/router/intent-router.ts +225 -0
- package/src/router/nn-router.ts +205 -0
- package/src/router/profiles.ts +309 -0
- package/src/router/registry.ts +565 -0
- package/src/router/rules.ts +274 -0
- package/src/router/train.py +408 -0
- package/src/session/store.ts +211 -0
- package/src/test-utils/mock-llm.ts +39 -0
- package/src/types.ts +322 -0
- package/src/web/manager.ts +311 -0
|
@@ -0,0 +1,666 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Manager — maintains conversation state across turns.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by Claude Code's context management:
|
|
5
|
+
* - Threshold-based auto-compaction (by token count, not turn count)
|
|
6
|
+
* - Compact boundary markers — only send messages after boundary
|
|
7
|
+
* - Post-compact restoration of relevant files and session state
|
|
8
|
+
* - Message normalization before API calls
|
|
9
|
+
* - Token budget tracking with warnings
|
|
10
|
+
* - Prompt caching optimization
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { Message, Session, SessionState, RepoMap, LLMResponse, ProviderId } from '../types.ts';
|
|
14
|
+
import { ContextBudget, estimateTokens } from './budget.ts';
|
|
15
|
+
import { callLLM } from '../providers/llm-caller.ts';
|
|
16
|
+
import type { Ledger } from '../audit/ledger.ts';
|
|
17
|
+
import type { MemoryManager } from './memory.ts';
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Constants (matching Claude Code's approach)
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
/** Buffer from context window limit to trigger auto-compact */
|
|
24
|
+
const AUTOCOMPACT_BUFFER = 13_000;
|
|
25
|
+
/** Warning threshold — larger buffer */
|
|
26
|
+
const AUTOCOMPACT_WARNING_BUFFER = 20_000;
|
|
27
|
+
/** Max files to restore after compaction */
|
|
28
|
+
const POST_COMPACT_MAX_FILES = 5;
|
|
29
|
+
/** Max tokens per restored file */
|
|
30
|
+
const POST_COMPACT_MAX_TOKENS_PER_FILE = 5_000;
|
|
31
|
+
/** Max total tokens for post-compact restoration */
|
|
32
|
+
const POST_COMPACT_TOKEN_BUDGET = 25_000;
|
|
33
|
+
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Configuration
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
export interface ContextManagerConfig {
|
|
39
|
+
contextBudget?: number;
|
|
40
|
+
/** Model's context window size (for auto-compact threshold) */
|
|
41
|
+
modelContextWindow?: number;
|
|
42
|
+
recentWindowSize?: number;
|
|
43
|
+
compressionProvider?: ProviderId;
|
|
44
|
+
compressionModel?: string;
|
|
45
|
+
systemPrompt?: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const DEFAULT_CONFIG: Required<ContextManagerConfig> = {
|
|
49
|
+
contextBudget: 30_000,
|
|
50
|
+
modelContextWindow: 128_000,
|
|
51
|
+
recentWindowSize: 4,
|
|
52
|
+
compressionProvider: 'anthropic',
|
|
53
|
+
compressionModel: 'claude-haiku-4-5-20251001',
|
|
54
|
+
systemPrompt: `You are a coding assistant with access to tools. You MUST use your tools to perform work — never describe what you would do without actually doing it. If the user asks you to write a file, call write_file. If they ask you to search, call web_search or search_code. If they ask you to run something, call run_command. Always act, never just narrate.
|
|
55
|
+
|
|
56
|
+
All file paths are relative to the working directory. When you call write_file, edit_file, or read_file, the path you provide is resolved against the working directory automatically. Use relative paths (e.g. "src/main.ts", not absolute paths).
|
|
57
|
+
|
|
58
|
+
When the user asks you to implement, fix, refactor, or test something:
|
|
59
|
+
1. Use repo_map and find_symbol to understand the project structure
|
|
60
|
+
2. Use read_file and search_code to inspect the relevant code
|
|
61
|
+
3. Use update_plan to track what you're doing
|
|
62
|
+
4. Use write_file and edit_file to make the changes directly
|
|
63
|
+
5. Verification runs automatically after each edit (typecheck) — read the auto-verify output
|
|
64
|
+
6. Report what you did and what the results were
|
|
65
|
+
|
|
66
|
+
For questions about the codebase, use repo_map, find_symbol, related_files, read_file, and search_code.
|
|
67
|
+
For web research, use web_search to find information and web_fetch to read pages.
|
|
68
|
+
For domain expertise, use consult to get a specialist opinion.
|
|
69
|
+
|
|
70
|
+
DELEGATION POLICY: When a task requires reading many files (>5) or heavy investigation, use spawn_agent with type "research" to delegate the file reading. The sub-agent reads and summarizes; you receive the summary without polluting your context with raw file contents. Use the main context for planning and editing, not for bulk reading.
|
|
71
|
+
|
|
72
|
+
IMPORTANT: Every file you write goes to the working directory. Do not claim you wrote files without actually calling write_file. Do not output code blocks and ask the user to save them — call write_file yourself.`,
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
// Compact boundary marker
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
const COMPACT_BOUNDARY_ROLE = 'system' as const;
|
|
80
|
+
const COMPACT_BOUNDARY_PREFIX = '[COMPACT_BOUNDARY]';
|
|
81
|
+
|
|
82
|
+
function isCompactBoundary(msg: Message): boolean {
|
|
83
|
+
return msg.role === COMPACT_BOUNDARY_ROLE && msg.content.startsWith(COMPACT_BOUNDARY_PREFIX);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// ---------------------------------------------------------------------------
|
|
87
|
+
// Context Manager
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
export class ContextManager {
|
|
91
|
+
private session: Session;
|
|
92
|
+
private config: Required<ContextManagerConfig>;
|
|
93
|
+
private ledger?: Ledger;
|
|
94
|
+
private memoryManager?: MemoryManager;
|
|
95
|
+
/** Last file a tool touched — used as an anchor for subdirectory memory lookup. */
|
|
96
|
+
private activeFile?: string;
|
|
97
|
+
/** Spec 02 — pre-formatted git context injected per-turn. */
|
|
98
|
+
private gitContextText = '';
|
|
99
|
+
|
|
100
|
+
/** Token budget tracking */
|
|
101
|
+
private sessionTokensUsed = 0;
|
|
102
|
+
private sessionTokenBudget: number | null = null;
|
|
103
|
+
private compactionCount = 0;
|
|
104
|
+
|
|
105
|
+
/** Prompt cache tracking */
|
|
106
|
+
private lastSystemPromptHash = '';
|
|
107
|
+
private cacheHits = 0;
|
|
108
|
+
private cacheMisses = 0;
|
|
109
|
+
|
|
110
|
+
constructor(session: Session, config?: ContextManagerConfig, ledger?: Ledger, memoryManager?: MemoryManager) {
|
|
111
|
+
this.session = session;
|
|
112
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
113
|
+
this.ledger = ledger;
|
|
114
|
+
this.memoryManager = memoryManager;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
getSession(): Session { return this.session; }
|
|
118
|
+
getConfig(): Required<ContextManagerConfig> { return this.config; }
|
|
119
|
+
setTokenBudget(budget: number | null): void { this.sessionTokenBudget = budget; }
|
|
120
|
+
setActiveFile(path: string): void { this.activeFile = path; }
|
|
121
|
+
setGitContextText(text: string): void { this.gitContextText = text; }
|
|
122
|
+
|
|
123
|
+
// -------------------------------------------------------------------------
|
|
124
|
+
// Turn management
|
|
125
|
+
// -------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
addUserMessage(content: string): void {
|
|
128
|
+
this.session.messages.push({
|
|
129
|
+
role: 'user',
|
|
130
|
+
content,
|
|
131
|
+
timestamp: new Date().toISOString(),
|
|
132
|
+
tokenCount: estimateTokens(content),
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
addAssistantMessage(response: LLMResponse): void {
|
|
137
|
+
this.session.messages.push({
|
|
138
|
+
role: 'assistant',
|
|
139
|
+
content: response.content,
|
|
140
|
+
timestamp: new Date().toISOString(),
|
|
141
|
+
model: response.model,
|
|
142
|
+
provider: response.provider,
|
|
143
|
+
tokenCount: estimateTokens(response.content),
|
|
144
|
+
inputTokens: response.inputTokens,
|
|
145
|
+
outputTokens: response.outputTokens,
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
this.session.totalInputTokens += response.inputTokens;
|
|
149
|
+
this.session.totalOutputTokens += response.outputTokens;
|
|
150
|
+
this.sessionTokensUsed += response.inputTokens + response.outputTokens;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// -------------------------------------------------------------------------
|
|
154
|
+
// Context assembly
|
|
155
|
+
// -------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Assemble the prompt for the current turn.
|
|
159
|
+
*
|
|
160
|
+
* Only sends messages AFTER the last compact boundary (if any).
|
|
161
|
+
* Context goes into the system prompt for caching efficiency.
|
|
162
|
+
*/
|
|
163
|
+
assemblePrompt(): { systemPrompt: string; userMessage: string; cacheablePrefix?: string } {
|
|
164
|
+
const budget = new ContextBudget(this.config.contextBudget);
|
|
165
|
+
const messages = this.getMessagesAfterBoundary();
|
|
166
|
+
const currentMessage = messages[messages.length - 1];
|
|
167
|
+
|
|
168
|
+
// Priority 0: Memory (KONDI.md files) — highest priority, non-compressible
|
|
169
|
+
if (this.memoryManager) {
|
|
170
|
+
const memEntries = this.memoryManager.load(this.activeFile);
|
|
171
|
+
const memText = this.memoryManager.formatForPrompt(memEntries);
|
|
172
|
+
if (memText) budget.add('memory', memText, 0, false);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Priority 1: Session state
|
|
176
|
+
const stateText = this.formatSessionState();
|
|
177
|
+
if (stateText) {
|
|
178
|
+
budget.add('session-state', `## Session State\n${stateText}`, 1, false);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Priority 2a: Git context (Spec 02) — cheap to refresh, always current
|
|
182
|
+
if (this.gitContextText) {
|
|
183
|
+
budget.add('git-context', this.gitContextText, 2, false);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Priority 2: Repo map
|
|
187
|
+
if (this.session.repoMap) {
|
|
188
|
+
const mapText = this.formatRepoMap();
|
|
189
|
+
budget.add('repo-map', `## Repo Map\n${mapText}`, 2, false);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Priority 3: Recent exchange window (from post-boundary messages)
|
|
193
|
+
const recentWindow = this.getRecentWindow(messages);
|
|
194
|
+
if (recentWindow) {
|
|
195
|
+
budget.add('recent-exchanges', `## Recent Conversation\n${recentWindow}`, 3, true);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Priority 4: Compact summary (from the boundary marker itself)
|
|
199
|
+
const compactSummary = this.getCompactSummary();
|
|
200
|
+
if (compactSummary) {
|
|
201
|
+
budget.add('compact-summary', `## Earlier Discussion\n${compactSummary}`, 4, true);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Priority 5: Grounding context
|
|
205
|
+
if (this.session.groundingContext) {
|
|
206
|
+
budget.add('grounding-context', `## Project Files\n${this.session.groundingContext}`, 5, true);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const assembledContext = budget.assemble();
|
|
210
|
+
|
|
211
|
+
const dropped = budget.getDropped();
|
|
212
|
+
const compressed = budget.getCompressed();
|
|
213
|
+
if (dropped.length > 0 || compressed.length > 0) {
|
|
214
|
+
const parts: string[] = [];
|
|
215
|
+
if (compressed.length > 0) parts.push(`truncated: ${compressed.join(', ')}`);
|
|
216
|
+
if (dropped.length > 0) parts.push(`dropped: ${dropped.join(', ')}`);
|
|
217
|
+
process.stderr.write(`[context] Budget ${this.config.contextBudget} tokens — ${parts.join('; ')}\n`);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Build system prompt — inject the working directory so the model
|
|
221
|
+
// knows where it is and where files go when it calls write_file.
|
|
222
|
+
const workingDirNote = this.session.workingDirectory
|
|
223
|
+
? `\nWorking directory: ${this.session.workingDirectory}\nAll file tool paths are relative to this directory.`
|
|
224
|
+
: '';
|
|
225
|
+
const systemParts = [this.config.systemPrompt + workingDirNote];
|
|
226
|
+
if (assembledContext) {
|
|
227
|
+
systemParts.push(assembledContext);
|
|
228
|
+
}
|
|
229
|
+
const fullSystemPrompt = systemParts.join('\n\n---\n\n');
|
|
230
|
+
|
|
231
|
+
// Track cache breaks
|
|
232
|
+
const promptHash = simpleHash(fullSystemPrompt);
|
|
233
|
+
if (this.lastSystemPromptHash && promptHash !== this.lastSystemPromptHash) {
|
|
234
|
+
this.cacheMisses++;
|
|
235
|
+
} else if (this.lastSystemPromptHash) {
|
|
236
|
+
this.cacheHits++;
|
|
237
|
+
}
|
|
238
|
+
this.lastSystemPromptHash = promptHash;
|
|
239
|
+
|
|
240
|
+
// Cacheable prefix — stable content that doesn't change between calls
|
|
241
|
+
let cacheablePrefix: string | undefined;
|
|
242
|
+
if (this.session.groundingContext && !dropped.includes('grounding-context')) {
|
|
243
|
+
cacheablePrefix = `## Project Files\n${this.session.groundingContext}`;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return {
|
|
247
|
+
systemPrompt: fullSystemPrompt,
|
|
248
|
+
userMessage: currentMessage?.content || '',
|
|
249
|
+
cacheablePrefix,
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// -------------------------------------------------------------------------
|
|
254
|
+
// Auto-compaction (threshold-based, like Claude Code)
|
|
255
|
+
// -------------------------------------------------------------------------
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Check if compaction is needed and perform it.
|
|
259
|
+
*
|
|
260
|
+
* Triggers on the smaller of:
|
|
261
|
+
* - profile contextBudget + 20% headroom (forces compaction while tokens
|
|
262
|
+
* are still cheap, instead of waiting for the 200k model window)
|
|
263
|
+
* - modelContextWindow - AUTOCOMPACT_BUFFER (hard ceiling fallback)
|
|
264
|
+
*/
|
|
265
|
+
async maybeCompact(): Promise<{ compacted: boolean; reason?: string }> {
|
|
266
|
+
const contextSize = this.estimateCurrentContextSize();
|
|
267
|
+
|
|
268
|
+
const budgetThreshold = Math.floor(this.config.contextBudget * 1.2);
|
|
269
|
+
const windowThreshold = this.config.modelContextWindow - AUTOCOMPACT_BUFFER;
|
|
270
|
+
const threshold = Math.min(budgetThreshold, windowThreshold);
|
|
271
|
+
|
|
272
|
+
if (contextSize < threshold) {
|
|
273
|
+
return { compacted: false };
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Compact needed
|
|
277
|
+
process.stderr.write(
|
|
278
|
+
`[context] Auto-compact triggered: ${contextSize.toLocaleString()} tokens ` +
|
|
279
|
+
`(threshold: ${threshold.toLocaleString()}, budget: ${this.config.contextBudget.toLocaleString()})\n`
|
|
280
|
+
);
|
|
281
|
+
|
|
282
|
+
await this.compact();
|
|
283
|
+
return { compacted: true, reason: `${contextSize} tokens exceeded threshold ${threshold}` };
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Override the compression provider + model at runtime. Called from the
|
|
288
|
+
* backend after it's determined which profile is active, so compaction in
|
|
289
|
+
* `zai` mode uses glm-4.5-flash (free) instead of claude-haiku.
|
|
290
|
+
*/
|
|
291
|
+
setCompressionModel(provider: ProviderId, model: string): void {
|
|
292
|
+
this.config.compressionProvider = provider;
|
|
293
|
+
this.config.compressionModel = model;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Force compaction: summarize old messages, insert boundary, restore context.
|
|
298
|
+
*/
|
|
299
|
+
async compact(): Promise<void> {
|
|
300
|
+
const messages = this.session.messages;
|
|
301
|
+
if (messages.length < 4) return;
|
|
302
|
+
|
|
303
|
+
// Keep the last N messages intact
|
|
304
|
+
const keepCount = Math.min(this.config.recentWindowSize * 2, messages.length - 1);
|
|
305
|
+
const toCompact = messages.slice(0, messages.length - keepCount)
|
|
306
|
+
.filter(m => !isCompactBoundary(m));
|
|
307
|
+
|
|
308
|
+
if (toCompact.length === 0) return;
|
|
309
|
+
|
|
310
|
+
const transcript = toCompact
|
|
311
|
+
.map(m => `[${m.role}${m.model ? ` (${m.model})` : ''}]: ${m.content.slice(0, 2000)}`)
|
|
312
|
+
.join('\n\n');
|
|
313
|
+
|
|
314
|
+
// Get existing summary to build on
|
|
315
|
+
const existingSummary = this.getCompactSummary();
|
|
316
|
+
const summaryPrefix = existingSummary
|
|
317
|
+
? `Previous summary:\n${existingSummary}\n\nNew messages to incorporate:\n`
|
|
318
|
+
: '';
|
|
319
|
+
|
|
320
|
+
try {
|
|
321
|
+
const response = await callLLM({
|
|
322
|
+
provider: this.config.compressionProvider,
|
|
323
|
+
model: this.config.compressionModel,
|
|
324
|
+
systemPrompt: `Summarize this conversation concisely. Preserve:
|
|
325
|
+
- All technical decisions and their rationale
|
|
326
|
+
- File paths and code references mentioned
|
|
327
|
+
- Constraints and requirements discussed
|
|
328
|
+
- Current plan and progress
|
|
329
|
+
- Any errors or failures encountered
|
|
330
|
+
Use past tense. No commentary. Max 800 words.`,
|
|
331
|
+
userMessage: `${summaryPrefix}${transcript}\n\nSummarize:`,
|
|
332
|
+
maxOutputTokens: 1500,
|
|
333
|
+
temperature: 0,
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// Build the compact boundary message
|
|
337
|
+
const boundaryContent = `${COMPACT_BOUNDARY_PREFIX}\n${response.content}`;
|
|
338
|
+
|
|
339
|
+
// Replace old messages with boundary + kept messages
|
|
340
|
+
const keptMessages = messages.slice(messages.length - keepCount);
|
|
341
|
+
this.session.messages = [
|
|
342
|
+
{ role: COMPACT_BOUNDARY_ROLE, content: boundaryContent, timestamp: new Date().toISOString() },
|
|
343
|
+
...keptMessages,
|
|
344
|
+
];
|
|
345
|
+
|
|
346
|
+
this.compactionCount++;
|
|
347
|
+
this.session.totalInputTokens += response.inputTokens;
|
|
348
|
+
this.session.totalOutputTokens += response.outputTokens;
|
|
349
|
+
this.ledger?.record('compress', response, `Compaction #${this.compactionCount}`);
|
|
350
|
+
|
|
351
|
+
// Post-compact restoration
|
|
352
|
+
await this.restorePostCompact();
|
|
353
|
+
|
|
354
|
+
const newSize = this.estimateCurrentContextSize();
|
|
355
|
+
process.stderr.write(
|
|
356
|
+
`[context] Compacted: ${this.compactionCount} total, ` +
|
|
357
|
+
`${toCompact.length} messages summarized, ` +
|
|
358
|
+
`new size: ${newSize.toLocaleString()} tokens\n`
|
|
359
|
+
);
|
|
360
|
+
} catch (error) {
|
|
361
|
+
process.stderr.write(`[context] Compaction failed: ${(error as Error).message}\n`);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* After compaction, re-inject the most relevant files to keep
|
|
367
|
+
* the context useful. (Like Claude Code's post-compact restoration.)
|
|
368
|
+
*/
|
|
369
|
+
private async restorePostCompact(): Promise<void> {
|
|
370
|
+
// Re-inject session state (already happens via assemblePrompt)
|
|
371
|
+
// Re-inject most recently referenced files
|
|
372
|
+
const recentFiles = this.extractRecentFileReferences();
|
|
373
|
+
if (recentFiles.length === 0) return;
|
|
374
|
+
|
|
375
|
+
const filesToRestore = recentFiles.slice(0, POST_COMPACT_MAX_FILES);
|
|
376
|
+
process.stderr.write(
|
|
377
|
+
`[context] Post-compact: restoring ${filesToRestore.length} file references\n`
|
|
378
|
+
);
|
|
379
|
+
// File contents will be re-read on next tool use — no need to inject here.
|
|
380
|
+
// The session state and repo map are preserved and re-assembled each turn.
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Update session state (goal, decisions, plan) based on recent conversation.
|
|
385
|
+
*/
|
|
386
|
+
async updateSessionState(): Promise<void> {
|
|
387
|
+
const messages = this.getMessagesAfterBoundary();
|
|
388
|
+
if (messages.length < 2) return;
|
|
389
|
+
|
|
390
|
+
const turnCount = messages.filter(m => m.role === 'user').length;
|
|
391
|
+
if (turnCount <= this.session.state.lastUpdatedAtTurn + 1) return;
|
|
392
|
+
|
|
393
|
+
const lastExchange = messages.slice(-4).map(m => `[${m.role}]: ${m.content.slice(0, 1000)}`).join('\n\n');
|
|
394
|
+
const currentState = this.formatSessionState();
|
|
395
|
+
|
|
396
|
+
try {
|
|
397
|
+
const response = await callLLM({
|
|
398
|
+
provider: this.config.compressionProvider,
|
|
399
|
+
model: this.config.compressionModel,
|
|
400
|
+
systemPrompt: `Update the session state. Output ONLY valid JSON:
|
|
401
|
+
{
|
|
402
|
+
"goal": "current goal",
|
|
403
|
+
"decisions": ["decision 1"],
|
|
404
|
+
"constraints": ["constraint 1"],
|
|
405
|
+
"currentPlan": ["step 1", "step 2"],
|
|
406
|
+
"recentFailures": ["failure 1"]
|
|
407
|
+
}
|
|
408
|
+
Keep lists short (max 5 items). Remove resolved items.`,
|
|
409
|
+
userMessage: `Current state:\n${currentState || '(empty)'}\n\nLatest exchange:\n${lastExchange}\n\nUpdated state as JSON:`,
|
|
410
|
+
maxOutputTokens: 800,
|
|
411
|
+
temperature: 0,
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
const parsed = JSON.parse(response.content);
|
|
415
|
+
this.session.state = {
|
|
416
|
+
...this.session.state,
|
|
417
|
+
goal: parsed.goal || this.session.state.goal,
|
|
418
|
+
decisions: parsed.decisions || this.session.state.decisions,
|
|
419
|
+
constraints: parsed.constraints || this.session.state.constraints,
|
|
420
|
+
currentPlan: parsed.currentPlan || this.session.state.currentPlan,
|
|
421
|
+
recentFailures: parsed.recentFailures || this.session.state.recentFailures,
|
|
422
|
+
lastUpdatedAtTurn: turnCount,
|
|
423
|
+
};
|
|
424
|
+
|
|
425
|
+
this.session.totalInputTokens += response.inputTokens;
|
|
426
|
+
this.session.totalOutputTokens += response.outputTokens;
|
|
427
|
+
this.ledger?.record('state_update', response, 'Session state update');
|
|
428
|
+
} catch {
|
|
429
|
+
// Non-fatal
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// -------------------------------------------------------------------------
|
|
434
|
+
// Token budget tracking
|
|
435
|
+
// -------------------------------------------------------------------------
|
|
436
|
+
|
|
437
|
+
/**
|
|
438
|
+
* Estimate the current context size (all messages after boundary + system prompt).
|
|
439
|
+
*/
|
|
440
|
+
estimateCurrentContextSize(): number {
|
|
441
|
+
const messages = this.getMessagesAfterBoundary();
|
|
442
|
+
let total = estimateTokens(this.config.systemPrompt);
|
|
443
|
+
|
|
444
|
+
// Session state
|
|
445
|
+
total += estimateTokens(this.formatSessionState());
|
|
446
|
+
|
|
447
|
+
// Messages
|
|
448
|
+
for (const m of messages) {
|
|
449
|
+
total += m.tokenCount || estimateTokens(m.content);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Compact summary
|
|
453
|
+
const summary = this.getCompactSummary();
|
|
454
|
+
if (summary) total += estimateTokens(summary);
|
|
455
|
+
|
|
456
|
+
// Grounding context
|
|
457
|
+
if (this.session.groundingContext) {
|
|
458
|
+
total += estimateTokens(this.session.groundingContext);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
return total;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* Check token budget status.
|
|
466
|
+
*/
|
|
467
|
+
getBudgetStatus(): {
|
|
468
|
+
sessionTokensUsed: number;
|
|
469
|
+
sessionBudget: number | null;
|
|
470
|
+
currentContextSize: number;
|
|
471
|
+
modelContextWindow: number;
|
|
472
|
+
contextUtilization: number;
|
|
473
|
+
compactionCount: number;
|
|
474
|
+
cacheHitRate: number;
|
|
475
|
+
} {
|
|
476
|
+
const contextSize = this.estimateCurrentContextSize();
|
|
477
|
+
const totalCacheAttempts = this.cacheHits + this.cacheMisses;
|
|
478
|
+
return {
|
|
479
|
+
sessionTokensUsed: this.sessionTokensUsed,
|
|
480
|
+
sessionBudget: this.sessionTokenBudget,
|
|
481
|
+
currentContextSize: contextSize,
|
|
482
|
+
modelContextWindow: this.config.modelContextWindow,
|
|
483
|
+
contextUtilization: contextSize / this.config.modelContextWindow,
|
|
484
|
+
compactionCount: this.compactionCount,
|
|
485
|
+
cacheHitRate: totalCacheAttempts > 0 ? this.cacheHits / totalCacheAttempts : 0,
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// -------------------------------------------------------------------------
|
|
490
|
+
// Message normalization
|
|
491
|
+
// -------------------------------------------------------------------------
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Normalize messages for API consumption.
|
|
495
|
+
* - Strips compact boundary markers (replaced by summary in system prompt)
|
|
496
|
+
* - Merges consecutive user messages
|
|
497
|
+
* - Strips internal fields
|
|
498
|
+
* - Truncates excessively long messages
|
|
499
|
+
*/
|
|
500
|
+
normalizeForAPI(messages: Message[]): Message[] {
|
|
501
|
+
const normalized: Message[] = [];
|
|
502
|
+
|
|
503
|
+
for (const msg of messages) {
|
|
504
|
+
// Skip compact boundaries
|
|
505
|
+
if (isCompactBoundary(msg)) continue;
|
|
506
|
+
|
|
507
|
+
// Merge consecutive user messages
|
|
508
|
+
const last = normalized[normalized.length - 1];
|
|
509
|
+
if (last && last.role === 'user' && msg.role === 'user') {
|
|
510
|
+
last.content += '\n\n' + msg.content;
|
|
511
|
+
last.tokenCount = estimateTokens(last.content);
|
|
512
|
+
continue;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// Truncate extremely long messages (>10K tokens)
|
|
516
|
+
const tokenCount = msg.tokenCount || estimateTokens(msg.content);
|
|
517
|
+
if (tokenCount > 10_000) {
|
|
518
|
+
normalized.push({
|
|
519
|
+
...msg,
|
|
520
|
+
content: msg.content.slice(0, 40_000) + '\n\n[... message truncated ...]',
|
|
521
|
+
tokenCount: estimateTokens(msg.content.slice(0, 40_000)),
|
|
522
|
+
});
|
|
523
|
+
continue;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
normalized.push({ ...msg });
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
return normalized;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// -------------------------------------------------------------------------
|
|
533
|
+
// Internal helpers
|
|
534
|
+
// -------------------------------------------------------------------------
|
|
535
|
+
|
|
536
|
+
/** Get messages after the last compact boundary */
|
|
537
|
+
private getMessagesAfterBoundary(): Message[] {
|
|
538
|
+
const messages = this.session.messages;
|
|
539
|
+
let boundaryIndex = -1;
|
|
540
|
+
|
|
541
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
542
|
+
if (isCompactBoundary(messages[i])) {
|
|
543
|
+
boundaryIndex = i;
|
|
544
|
+
break;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
return boundaryIndex >= 0
|
|
549
|
+
? messages.slice(boundaryIndex + 1)
|
|
550
|
+
: messages;
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
/** Get the compact summary from the boundary marker */
|
|
554
|
+
private getCompactSummary(): string {
|
|
555
|
+
const messages = this.session.messages;
|
|
556
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
557
|
+
if (isCompactBoundary(messages[i])) {
|
|
558
|
+
return messages[i].content.slice(COMPACT_BOUNDARY_PREFIX.length + 1);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
return '';
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/** Extract file paths mentioned in recent messages */
|
|
565
|
+
private extractRecentFileReferences(): string[] {
|
|
566
|
+
const messages = this.getMessagesAfterBoundary();
|
|
567
|
+
const files = new Set<string>();
|
|
568
|
+
const filePattern = /(?:src|lib|test|app|pages|components)\/[\w/.,-]+\.\w+/g;
|
|
569
|
+
|
|
570
|
+
for (const m of messages.slice(-6)) {
|
|
571
|
+
const matches = m.content.match(filePattern);
|
|
572
|
+
if (matches) {
|
|
573
|
+
for (const f of matches) files.add(f);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
return [...files];
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
private getRecentWindow(messages?: Message[]): string {
|
|
581
|
+
const msgs = messages || this.getMessagesAfterBoundary();
|
|
582
|
+
const windowMessages = msgs.slice(-(this.config.recentWindowSize * 2 + 1), -1);
|
|
583
|
+
if (windowMessages.length === 0) return '';
|
|
584
|
+
return windowMessages
|
|
585
|
+
.map(m => `[${m.role}${m.model ? ` (${m.model})` : ''}]: ${m.content.slice(0, 2000)}`)
|
|
586
|
+
.join('\n\n');
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
private formatSessionState(): string {
|
|
590
|
+
const s = this.session.state;
|
|
591
|
+
if (!s.goal && s.decisions.length === 0 && s.currentPlan.length === 0) return '';
|
|
592
|
+
|
|
593
|
+
const parts: string[] = [];
|
|
594
|
+
if (s.goal) parts.push(`Goal: ${s.goal}`);
|
|
595
|
+
if (s.currentPlan.length > 0) parts.push(`Plan: ${s.currentPlan.join(' → ')}`);
|
|
596
|
+
if (s.decisions.length > 0) parts.push(`Decisions: ${s.decisions.join('; ')}`);
|
|
597
|
+
if (s.constraints.length > 0) parts.push(`Constraints: ${s.constraints.join('; ')}`);
|
|
598
|
+
if (s.activeTaskId) parts.push(`Active task: ${s.activeTaskId}`);
|
|
599
|
+
if (s.recentFailures.length > 0) parts.push(`Recent failures: ${s.recentFailures.join('; ')}`);
|
|
600
|
+
return parts.join('\n');
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
private formatRepoMap(): string {
|
|
604
|
+
const r = this.session.repoMap;
|
|
605
|
+
if (!r) return '';
|
|
606
|
+
const parts: string[] = [];
|
|
607
|
+
parts.push(`Stack: ${r.stack.join(', ')}`);
|
|
608
|
+
if (r.entrypoints.length > 0) parts.push(`Entrypoints: ${r.entrypoints.join(', ')}`);
|
|
609
|
+
if (r.subsystems.length > 0) {
|
|
610
|
+
parts.push('Subsystems:');
|
|
611
|
+
for (const s of r.subsystems) {
|
|
612
|
+
parts.push(` ${s.name} (${s.paths.join(', ')}): ${s.purpose}`);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
const cmds = r.commands;
|
|
616
|
+
const cmdParts = [];
|
|
617
|
+
if (cmds.build) cmdParts.push(`build: ${cmds.build}`);
|
|
618
|
+
if (cmds.test) cmdParts.push(`test: ${cmds.test}`);
|
|
619
|
+
if (cmds.lint) cmdParts.push(`lint: ${cmds.lint}`);
|
|
620
|
+
if (cmds.typecheck) cmdParts.push(`typecheck: ${cmds.typecheck}`);
|
|
621
|
+
if (cmdParts.length > 0) parts.push(`Commands: ${cmdParts.join(', ')}`);
|
|
622
|
+
if (r.conventions.length > 0) parts.push(`Conventions: ${r.conventions.join('; ')}`);
|
|
623
|
+
return parts.join('\n');
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
// ---------------------------------------------------------------------------
|
|
628
|
+
// Helpers
|
|
629
|
+
// ---------------------------------------------------------------------------
|
|
630
|
+
|
|
631
|
+
function simpleHash(text: string): string {
|
|
632
|
+
let hash = 0;
|
|
633
|
+
for (let i = 0; i < text.length; i++) {
|
|
634
|
+
const char = text.charCodeAt(i);
|
|
635
|
+
hash = ((hash << 5) - hash) + char;
|
|
636
|
+
hash |= 0;
|
|
637
|
+
}
|
|
638
|
+
return hash.toString(36);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// ---------------------------------------------------------------------------
|
|
642
|
+
// Factory
|
|
643
|
+
// ---------------------------------------------------------------------------
|
|
644
|
+
|
|
645
|
+
export function createSession(provider: ProviderId, model?: string, workingDirectory?: string): Session {
|
|
646
|
+
return {
|
|
647
|
+
id: crypto.randomUUID(),
|
|
648
|
+
createdAt: new Date().toISOString(),
|
|
649
|
+
workingDirectory,
|
|
650
|
+
messages: [],
|
|
651
|
+
state: {
|
|
652
|
+
goal: '',
|
|
653
|
+
decisions: [],
|
|
654
|
+
constraints: [],
|
|
655
|
+
currentPlan: [],
|
|
656
|
+
recentFailures: [],
|
|
657
|
+
lastUpdatedAtTurn: 0,
|
|
658
|
+
},
|
|
659
|
+
tasks: [],
|
|
660
|
+
activeProvider: provider,
|
|
661
|
+
activeModel: model,
|
|
662
|
+
totalInputTokens: 0,
|
|
663
|
+
totalOutputTokens: 0,
|
|
664
|
+
totalCostUsd: 0,
|
|
665
|
+
};
|
|
666
|
+
}
|