@psiclawops/hypermem 0.1.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +4 -3
- package/README.md +457 -174
- package/dist/background-indexer.d.ts +19 -4
- package/dist/background-indexer.d.ts.map +1 -1
- package/dist/background-indexer.js +329 -17
- package/dist/cache.d.ts +110 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +495 -0
- package/dist/compaction-fence.d.ts +1 -1
- package/dist/compaction-fence.js +1 -1
- package/dist/compositor.d.ts +114 -27
- package/dist/compositor.d.ts.map +1 -1
- package/dist/compositor.js +1678 -229
- package/dist/content-type-classifier.d.ts +41 -0
- package/dist/content-type-classifier.d.ts.map +1 -0
- package/dist/content-type-classifier.js +181 -0
- package/dist/cross-agent.d.ts +5 -0
- package/dist/cross-agent.d.ts.map +1 -1
- package/dist/cross-agent.js +5 -0
- package/dist/db.d.ts +1 -1
- package/dist/db.d.ts.map +1 -1
- package/dist/db.js +6 -2
- package/dist/desired-state-store.d.ts +1 -1
- package/dist/desired-state-store.d.ts.map +1 -1
- package/dist/desired-state-store.js +15 -5
- package/dist/doc-chunk-store.d.ts +26 -1
- package/dist/doc-chunk-store.d.ts.map +1 -1
- package/dist/doc-chunk-store.js +114 -1
- package/dist/doc-chunker.d.ts +1 -1
- package/dist/doc-chunker.js +1 -1
- package/dist/dreaming-promoter.d.ts +86 -0
- package/dist/dreaming-promoter.d.ts.map +1 -0
- package/dist/dreaming-promoter.js +381 -0
- package/dist/episode-store.d.ts +2 -1
- package/dist/episode-store.d.ts.map +1 -1
- package/dist/episode-store.js +4 -4
- package/dist/fact-store.d.ts +19 -1
- package/dist/fact-store.d.ts.map +1 -1
- package/dist/fact-store.js +64 -3
- package/dist/fleet-store.d.ts +1 -1
- package/dist/fleet-store.js +1 -1
- package/dist/fos-mod.d.ts +178 -0
- package/dist/fos-mod.d.ts.map +1 -0
- package/dist/fos-mod.js +416 -0
- package/dist/hybrid-retrieval.d.ts +5 -1
- package/dist/hybrid-retrieval.d.ts.map +1 -1
- package/dist/hybrid-retrieval.js +7 -3
- package/dist/image-eviction.d.ts +49 -0
- package/dist/image-eviction.d.ts.map +1 -0
- package/dist/image-eviction.js +251 -0
- package/dist/index.d.ts +50 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +73 -43
- package/dist/keystone-scorer.d.ts +51 -0
- package/dist/keystone-scorer.d.ts.map +1 -0
- package/dist/keystone-scorer.js +52 -0
- package/dist/knowledge-graph.d.ts +1 -1
- package/dist/knowledge-graph.js +1 -1
- package/dist/knowledge-lint.d.ts +29 -0
- package/dist/knowledge-lint.d.ts.map +1 -0
- package/dist/knowledge-lint.js +116 -0
- package/dist/knowledge-store.d.ts +1 -1
- package/dist/knowledge-store.d.ts.map +1 -1
- package/dist/knowledge-store.js +8 -2
- package/dist/library-schema.d.ts +3 -3
- package/dist/library-schema.d.ts.map +1 -1
- package/dist/library-schema.js +324 -3
- package/dist/message-store.d.ts +15 -2
- package/dist/message-store.d.ts.map +1 -1
- package/dist/message-store.js +51 -1
- package/dist/metrics-dashboard.d.ts +114 -0
- package/dist/metrics-dashboard.d.ts.map +1 -0
- package/dist/metrics-dashboard.js +260 -0
- package/dist/obsidian-exporter.d.ts +57 -0
- package/dist/obsidian-exporter.d.ts.map +1 -0
- package/dist/obsidian-exporter.js +274 -0
- package/dist/obsidian-watcher.d.ts +147 -0
- package/dist/obsidian-watcher.d.ts.map +1 -0
- package/dist/obsidian-watcher.js +403 -0
- package/dist/open-domain.d.ts +46 -0
- package/dist/open-domain.d.ts.map +1 -0
- package/dist/open-domain.js +125 -0
- package/dist/preference-store.d.ts +1 -1
- package/dist/preference-store.js +1 -1
- package/dist/preservation-gate.d.ts +1 -1
- package/dist/preservation-gate.js +1 -1
- package/dist/proactive-pass.d.ts +63 -0
- package/dist/proactive-pass.d.ts.map +1 -0
- package/dist/proactive-pass.js +239 -0
- package/dist/profiles.d.ts +44 -0
- package/dist/profiles.d.ts.map +1 -0
- package/dist/profiles.js +227 -0
- package/dist/provider-translator.d.ts +13 -3
- package/dist/provider-translator.d.ts.map +1 -1
- package/dist/provider-translator.js +63 -9
- package/dist/rate-limiter.d.ts +1 -1
- package/dist/rate-limiter.js +1 -1
- package/dist/repair-tool-pairs.d.ts +38 -0
- package/dist/repair-tool-pairs.d.ts.map +1 -0
- package/dist/repair-tool-pairs.js +138 -0
- package/dist/retrieval-policy.d.ts +51 -0
- package/dist/retrieval-policy.d.ts.map +1 -0
- package/dist/retrieval-policy.js +77 -0
- package/dist/schema.d.ts +2 -2
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +28 -2
- package/dist/secret-scanner.d.ts +1 -1
- package/dist/secret-scanner.js +1 -1
- package/dist/seed.d.ts +2 -2
- package/dist/seed.js +2 -2
- package/dist/session-flusher.d.ts +53 -0
- package/dist/session-flusher.d.ts.map +1 -0
- package/dist/session-flusher.js +69 -0
- package/dist/session-topic-map.d.ts +41 -0
- package/dist/session-topic-map.d.ts.map +1 -0
- package/dist/session-topic-map.js +77 -0
- package/dist/spawn-context.d.ts +54 -0
- package/dist/spawn-context.d.ts.map +1 -0
- package/dist/spawn-context.js +159 -0
- package/dist/system-store.d.ts +1 -1
- package/dist/system-store.js +1 -1
- package/dist/temporal-store.d.ts +80 -0
- package/dist/temporal-store.d.ts.map +1 -0
- package/dist/temporal-store.js +149 -0
- package/dist/topic-detector.d.ts +35 -0
- package/dist/topic-detector.d.ts.map +1 -0
- package/dist/topic-detector.js +249 -0
- package/dist/topic-store.d.ts +1 -1
- package/dist/topic-store.js +1 -1
- package/dist/topic-synthesizer.d.ts +51 -0
- package/dist/topic-synthesizer.d.ts.map +1 -0
- package/dist/topic-synthesizer.js +315 -0
- package/dist/trigger-registry.d.ts +63 -0
- package/dist/trigger-registry.d.ts.map +1 -0
- package/dist/trigger-registry.js +163 -0
- package/dist/types.d.ts +214 -10
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -1
- package/dist/vector-store.d.ts +43 -5
- package/dist/vector-store.d.ts.map +1 -1
- package/dist/vector-store.js +189 -10
- package/dist/version.d.ts +34 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +34 -0
- package/dist/wiki-page-emitter.d.ts +65 -0
- package/dist/wiki-page-emitter.d.ts.map +1 -0
- package/dist/wiki-page-emitter.js +258 -0
- package/dist/work-store.d.ts +1 -1
- package/dist/work-store.js +1 -1
- package/package.json +15 -5
- package/dist/redis.d.ts +0 -188
- package/dist/redis.d.ts.map +0 -1
- package/dist/redis.js +0 -534
package/dist/compositor.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* hypermem Compositor
|
|
3
3
|
*
|
|
4
4
|
* Assembles context for LLM calls by orchestrating all four memory layers:
|
|
5
5
|
* L1 Redis — hot session working memory (system, identity, recent msgs)
|
|
@@ -10,101 +10,249 @@
|
|
|
10
10
|
* Token-budgeted: never exceeds the budget, prioritizes by configured order.
|
|
11
11
|
* Provider-neutral internally, translates at the output boundary.
|
|
12
12
|
*/
|
|
13
|
-
import {
|
|
13
|
+
import { filterByScope } from './retrieval-policy.js';
|
|
14
|
+
import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
|
|
14
15
|
import { MessageStore } from './message-store.js';
|
|
16
|
+
import { SessionTopicMap } from './session-topic-map.js';
|
|
15
17
|
import { toProviderFormat } from './provider-translator.js';
|
|
16
18
|
import { DocChunkStore } from './doc-chunk-store.js';
|
|
17
19
|
import { hybridSearch } from './hybrid-retrieval.js';
|
|
18
20
|
import { ensureCompactionFenceSchema, updateCompactionFence } from './compaction-fence.js';
|
|
21
|
+
import { rankKeystones, scoreKeystone } from './keystone-scorer.js';
|
|
22
|
+
import { buildOrgRegistryFromDb, defaultOrgRegistry } from './cross-agent.js';
|
|
23
|
+
import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOutputTier, buildActionVerificationSummary } from './fos-mod.js';
|
|
24
|
+
import { KnowledgeStore } from './knowledge-store.js';
|
|
25
|
+
import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
|
|
26
|
+
import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
|
|
27
|
+
/**
|
|
28
|
+
* Model context window sizes by provider/model string (or partial match).
|
|
29
|
+
* Used as fallback when tokenBudget is not passed by the runtime.
|
|
30
|
+
* Order matters: first match wins. Partial substring match on the model string.
|
|
31
|
+
*/
|
|
32
|
+
const MODEL_CONTEXT_WINDOWS = [
|
|
33
|
+
// Anthropic
|
|
34
|
+
{ pattern: 'claude-opus-4', tokens: 200_000 },
|
|
35
|
+
{ pattern: 'claude-sonnet-4', tokens: 200_000 },
|
|
36
|
+
{ pattern: 'claude-3-5', tokens: 200_000 },
|
|
37
|
+
{ pattern: 'claude-3-7', tokens: 200_000 },
|
|
38
|
+
{ pattern: 'claude', tokens: 200_000 },
|
|
39
|
+
// OpenAI
|
|
40
|
+
{ pattern: 'gpt-5', tokens: 128_000 },
|
|
41
|
+
{ pattern: 'gpt-4o', tokens: 128_000 },
|
|
42
|
+
{ pattern: 'gpt-4', tokens: 128_000 },
|
|
43
|
+
{ pattern: 'o3', tokens: 128_000 },
|
|
44
|
+
{ pattern: 'o4', tokens: 128_000 },
|
|
45
|
+
// Google
|
|
46
|
+
{ pattern: 'gemini-3.1-pro', tokens: 1_000_000 },
|
|
47
|
+
{ pattern: 'gemini-3.1-flash', tokens: 1_000_000 },
|
|
48
|
+
{ pattern: 'gemini-2.5-pro', tokens: 1_000_000 },
|
|
49
|
+
{ pattern: 'gemini-2', tokens: 1_000_000 },
|
|
50
|
+
{ pattern: 'gemini', tokens: 1_000_000 },
|
|
51
|
+
// Zhipu / GLM
|
|
52
|
+
{ pattern: 'glm-5', tokens: 131_072 },
|
|
53
|
+
{ pattern: 'glm-4', tokens: 131_072 },
|
|
54
|
+
// Alibaba / Qwen
|
|
55
|
+
{ pattern: 'qwen3', tokens: 262_144 },
|
|
56
|
+
{ pattern: 'qwen', tokens: 131_072 },
|
|
57
|
+
// DeepSeek
|
|
58
|
+
{ pattern: 'deepseek-v3', tokens: 131_072 },
|
|
59
|
+
{ pattern: 'deepseek', tokens: 131_072 },
|
|
60
|
+
];
|
|
61
|
+
/**
|
|
62
|
+
* Resolve effective token budget from model string.
|
|
63
|
+
* Returns the context window for the model, minus the configured reserve fraction
|
|
64
|
+
* for output tokens and hypermem operational overhead.
|
|
65
|
+
* Default reserve: 25% (leaves 75% for input context).
|
|
66
|
+
* Falls back to defaultTokenBudget if no model match.
|
|
67
|
+
*/
|
|
68
|
+
function resolveModelBudget(model, defaultBudget, reserve = 0.15) {
|
|
69
|
+
if (!model)
|
|
70
|
+
return defaultBudget;
|
|
71
|
+
const normalized = model.toLowerCase();
|
|
72
|
+
for (const entry of MODEL_CONTEXT_WINDOWS) {
|
|
73
|
+
if (normalized.includes(entry.pattern)) {
|
|
74
|
+
return Math.floor(entry.tokens * (1 - reserve));
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return defaultBudget;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Resolve the raw context window size for a model (no reserve applied).
|
|
81
|
+
* Used as totalWindow for dynamic reserve calculation.
|
|
82
|
+
* Falls back to defaultBudget / 0.85 (reverse of 15% reserve default) if no match.
|
|
83
|
+
*/
|
|
84
|
+
function resolveModelWindow(model, defaultBudget) {
|
|
85
|
+
if (!model)
|
|
86
|
+
return Math.floor(defaultBudget / 0.85);
|
|
87
|
+
const normalized = model.toLowerCase();
|
|
88
|
+
for (const entry of MODEL_CONTEXT_WINDOWS) {
|
|
89
|
+
if (normalized.includes(entry.pattern)) {
|
|
90
|
+
return entry.tokens;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return Math.floor(defaultBudget / 0.85);
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Compute dynamic context window reserve based on recent turn cost.
|
|
97
|
+
*
|
|
98
|
+
* Reserve = clamp(avg_turn_cost × horizon / totalWindow, base, max)
|
|
99
|
+
*
|
|
100
|
+
* Returns the reserve fraction and diagnostics. When dynamic reserve
|
|
101
|
+
* is clamped at max, sessionPressureHigh is set true so callers can
|
|
102
|
+
* emit a warning or trigger checkpointing.
|
|
103
|
+
*/
|
|
104
|
+
function computeDynamicReserve(recentMessages, totalWindow, config) {
|
|
105
|
+
const base = config.contextWindowReserve ?? 0.15;
|
|
106
|
+
const horizon = config.dynamicReserveTurnHorizon ?? 5;
|
|
107
|
+
const max = config.dynamicReserveMax ?? 0.50;
|
|
108
|
+
const enabled = config.dynamicReserveEnabled ?? true;
|
|
109
|
+
if (!enabled || recentMessages.length === 0 || totalWindow <= 0) {
|
|
110
|
+
return { reserve: base, avgTurnCost: 0, dynamic: false, pressureHigh: false };
|
|
111
|
+
}
|
|
112
|
+
// Sample the last 20 user+assistant messages for turn cost estimation.
|
|
113
|
+
// Tool messages are excluded — they're already compressed by the gradient
|
|
114
|
+
// and don't represent per-turn user intent cost.
|
|
115
|
+
const sample = recentMessages
|
|
116
|
+
.filter(m => m.role === 'user' || m.role === 'assistant')
|
|
117
|
+
.slice(-20);
|
|
118
|
+
if (sample.length === 0) {
|
|
119
|
+
return { reserve: base, avgTurnCost: 0, dynamic: false, pressureHigh: false };
|
|
120
|
+
}
|
|
121
|
+
const totalCost = sample.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
122
|
+
const avgTurnCost = Math.floor(totalCost / sample.length);
|
|
123
|
+
const safetyTokens = avgTurnCost * horizon;
|
|
124
|
+
const dynamicFrac = safetyTokens / totalWindow;
|
|
125
|
+
if (dynamicFrac <= base) {
|
|
126
|
+
return { reserve: base, avgTurnCost, dynamic: false, pressureHigh: false };
|
|
127
|
+
}
|
|
128
|
+
if (dynamicFrac >= max) {
|
|
129
|
+
return { reserve: max, avgTurnCost, dynamic: true, pressureHigh: true };
|
|
130
|
+
}
|
|
131
|
+
return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
|
|
132
|
+
}
|
|
19
133
|
const DEFAULT_CONFIG = {
|
|
20
134
|
defaultTokenBudget: 90000,
|
|
21
135
|
maxHistoryMessages: 250,
|
|
22
|
-
maxFacts:
|
|
23
|
-
maxCrossSessionContext:
|
|
136
|
+
maxFacts: 28,
|
|
137
|
+
maxCrossSessionContext: 6000,
|
|
24
138
|
maxRecentToolPairs: 3,
|
|
25
139
|
maxProseToolPairs: 10,
|
|
26
140
|
warmHistoryBudgetFraction: 0.4,
|
|
141
|
+
keystoneHistoryFraction: 0.2,
|
|
142
|
+
keystoneMaxMessages: 15,
|
|
143
|
+
keystoneMinSignificance: 0.5,
|
|
144
|
+
contextWindowReserve: 0.15,
|
|
145
|
+
dynamicReserveTurnHorizon: 5,
|
|
146
|
+
dynamicReserveMax: 0.50,
|
|
147
|
+
dynamicReserveEnabled: true,
|
|
27
148
|
};
|
|
149
|
+
// Tool gradient thresholds — controls how aggressively tool results are
|
|
150
|
+
// truncated as they age out of the recent window.
|
|
151
|
+
// Recent-turn policy (2026-04-07): protect turn 0 + turn 1, budget against a
|
|
152
|
+
// conservative 120k planning window, and only head+tail trim large (>40k)
|
|
153
|
+
// recent results when projected occupancy crosses the orange zone.
|
|
154
|
+
const TOOL_GRADIENT_T0_TURNS = 2; // current + 2 prior completed turns: full fidelity (matches OpenClaw keepLastAssistants: 3)
|
|
155
|
+
const TOOL_GRADIENT_T1_TURNS = 4; // turns 2-4: moderate truncation (was 3)
|
|
156
|
+
const TOOL_GRADIENT_T2_TURNS = 7; // turns 4-7: aggressive truncation (was 12)
|
|
157
|
+
// T3 = turns 8+: one-liner stub
|
|
158
|
+
const TOOL_GRADIENT_T1_CHAR_CAP = 6_000; // per-message cap (was 8k)
|
|
159
|
+
const TOOL_GRADIENT_T1_TURN_CAP = 12_000; // per-turn-pair cap (was 16k)
|
|
160
|
+
const TOOL_GRADIENT_T2_CHAR_CAP = 800; // per-message cap (was 1k)
|
|
161
|
+
const TOOL_GRADIENT_T2_TURN_CAP = 3_000; // per-turn-pair cap (was 4k)
|
|
162
|
+
const TOOL_GRADIENT_T3_CHAR_CAP = 150; // oldest tier: stub only (was 200)
|
|
163
|
+
const TOOL_GRADIENT_T3_TURN_CAP = 800; // per-turn-pair cap (was 1k)
|
|
164
|
+
const TOOL_GRADIENT_MAX_TAIL_CHARS = 3_000; // tail preserve budget for T1+
|
|
165
|
+
const TOOL_GRADIENT_MIDDLE_MARKER = '\n[... tool output truncated ...]\n';
|
|
166
|
+
const TOOL_PLANNING_BASELINE_WINDOW = 120_000;
|
|
167
|
+
const TOOL_PLANNING_MIN_RESERVE_TOKENS = 24_000;
|
|
168
|
+
const TOOL_PRESSURE_YELLOW = 0.75;
|
|
169
|
+
const TOOL_PRESSURE_ORANGE = 0.80;
|
|
170
|
+
const TOOL_PRESSURE_RED = 0.85;
|
|
171
|
+
const TOOL_RECENT_OVERSIZE_CHAR_THRESHOLD = 40_000;
|
|
172
|
+
const TOOL_RECENT_OVERSIZE_TARGET_CHARS = 40_000;
|
|
173
|
+
const TOOL_RECENT_OVERSIZE_MAX_TAIL_CHARS = 12_000;
|
|
174
|
+
const TOOL_TRIM_NOTE_PREFIX = '[hypermem_tool_result_trim';
|
|
175
|
+
// ─── Trigger Registry ────────────────────────────────────────────
|
|
176
|
+
// Moved to src/trigger-registry.ts (W5).
|
|
177
|
+
// CollectionTrigger, DEFAULT_TRIGGERS, matchTriggers imported above.
|
|
178
|
+
// Re-exported below for backward compatibility with existing consumers.
|
|
179
|
+
export { DEFAULT_TRIGGERS, matchTriggers } from './trigger-registry.js';
|
|
180
|
+
// ─── Test-only exports (not part of public API) ───────────────────────────
|
|
181
|
+
// These are exported solely for unit testing. Do not use in production code.
|
|
182
|
+
export { getTurnAge, applyToolGradient, appendToolSummary, truncateWithHeadTail, applyTierPayloadCap, evictLargeToolResults };
|
|
183
|
+
function clusterNeutralMessages(messages) {
|
|
184
|
+
const clusters = [];
|
|
185
|
+
for (let i = 0; i < messages.length; i++) {
|
|
186
|
+
const current = messages[i];
|
|
187
|
+
const cluster = [current];
|
|
188
|
+
if (current.toolCalls && current.toolCalls.length > 0) {
|
|
189
|
+
const callIds = new Set(current.toolCalls.map(tc => tc.id).filter(Boolean));
|
|
190
|
+
let j = i + 1;
|
|
191
|
+
while (j < messages.length) {
|
|
192
|
+
const candidate = messages[j];
|
|
193
|
+
if (!candidate.toolResults || candidate.toolResults.length === 0)
|
|
194
|
+
break;
|
|
195
|
+
const resultIds = candidate.toolResults.map(tr => tr.callId).filter(Boolean);
|
|
196
|
+
if (callIds.size > 0 && resultIds.length > 0 && !resultIds.some(id => callIds.has(id)))
|
|
197
|
+
break;
|
|
198
|
+
cluster.push(candidate);
|
|
199
|
+
j++;
|
|
200
|
+
}
|
|
201
|
+
i = j - 1;
|
|
202
|
+
}
|
|
203
|
+
else if (current.toolResults && current.toolResults.length > 0) {
|
|
204
|
+
let j = i + 1;
|
|
205
|
+
while (j < messages.length) {
|
|
206
|
+
const candidate = messages[j];
|
|
207
|
+
if (!candidate.toolResults || candidate.toolResults.length === 0 || (candidate.toolCalls && candidate.toolCalls.length > 0))
|
|
208
|
+
break;
|
|
209
|
+
cluster.push(candidate);
|
|
210
|
+
j++;
|
|
211
|
+
}
|
|
212
|
+
i = j - 1;
|
|
213
|
+
}
|
|
214
|
+
clusters.push({
|
|
215
|
+
messages: cluster,
|
|
216
|
+
tokenCost: cluster.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0),
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
return clusters;
|
|
220
|
+
}
|
|
28
221
|
/**
|
|
29
|
-
*
|
|
30
|
-
*
|
|
222
|
+
* Public reshape helper: apply tool gradient then trim to fit within a token budget.
|
|
223
|
+
*
|
|
224
|
+
* Used by the plugin's budget-downshift pass to pre-process a Redis history window
|
|
225
|
+
* after a model switch to a smaller context window, before the full compose pipeline
|
|
226
|
+
* runs. Trims from oldest to newest until estimated token cost fits within
|
|
227
|
+
* tokenBudget * 0.65 (using the standard char/4 heuristic).
|
|
228
|
+
*
|
|
229
|
+
* @param messages NeutralMessage array from the Redis hot window
|
|
230
|
+
* @param tokenBudget Effective token budget for this session
|
|
231
|
+
* @returns Trimmed message array ready for setWindow()
|
|
31
232
|
*/
|
|
32
|
-
export
|
|
33
|
-
{
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
keywords: [
|
|
46
|
-
'charter', 'mission', 'director', 'org', 'reporting', 'boundary',
|
|
47
|
-
'delegation', 'authority', 'jurisdiction',
|
|
48
|
-
],
|
|
49
|
-
maxTokens: 1000,
|
|
50
|
-
maxChunks: 2,
|
|
51
|
-
},
|
|
52
|
-
{
|
|
53
|
-
collection: 'governance/comms',
|
|
54
|
-
keywords: [
|
|
55
|
-
'message', 'send', 'tier 1', 'tier 2', 'tier 3', 'async', 'dispatch',
|
|
56
|
-
'sessions_send', 'inter-agent', 'protocol', 'comms', 'ping', 'notify',
|
|
57
|
-
],
|
|
58
|
-
maxTokens: 800,
|
|
59
|
-
maxChunks: 2,
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
collection: 'operations/agents',
|
|
63
|
-
keywords: [
|
|
64
|
-
'boot', 'startup', 'bootstrap', 'heartbeat', 'workqueue', 'checkpoint',
|
|
65
|
-
'session start', 'roll call', 'memory recall', 'dispatch inbox',
|
|
66
|
-
],
|
|
67
|
-
maxTokens: 800,
|
|
68
|
-
maxChunks: 2,
|
|
69
|
-
},
|
|
70
|
-
{
|
|
71
|
-
collection: 'identity/job',
|
|
72
|
-
keywords: [
|
|
73
|
-
'deliberat', 'council round', 'vote', 'response contract', 'rating',
|
|
74
|
-
'first response', 'second response', 'handoff', 'floor open',
|
|
75
|
-
'performance', 'output discipline', 'assessment',
|
|
76
|
-
],
|
|
77
|
-
maxTokens: 1200,
|
|
78
|
-
maxChunks: 3,
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
collection: 'identity/motivations',
|
|
82
|
-
keywords: [
|
|
83
|
-
'motivation', 'fear', 'tension', 'why do you', 'how do you feel',
|
|
84
|
-
'drives', 'values',
|
|
85
|
-
],
|
|
86
|
-
maxTokens: 600,
|
|
87
|
-
maxChunks: 1,
|
|
88
|
-
},
|
|
89
|
-
{
|
|
90
|
-
collection: 'memory/decisions',
|
|
91
|
-
keywords: [
|
|
92
|
-
'remember', 'decision', 'we decided', 'previously', 'last time',
|
|
93
|
-
'history', 'past', 'earlier', 'recall', 'context',
|
|
94
|
-
],
|
|
95
|
-
maxTokens: 1500,
|
|
96
|
-
maxChunks: 4,
|
|
97
|
-
},
|
|
98
|
-
];
|
|
233
|
+
export function applyToolGradientToWindow(messages, tokenBudget, totalWindowTokens) {
|
|
234
|
+
const reshaped = applyToolGradient(messages, { totalWindowTokens });
|
|
235
|
+
const targetTokens = Math.floor(tokenBudget * 0.65);
|
|
236
|
+
const clusters = clusterNeutralMessages(reshaped);
|
|
237
|
+
let totalTokens = clusters.reduce((sum, cluster) => sum + cluster.tokenCost, 0);
|
|
238
|
+
let start = 0;
|
|
239
|
+
// walk oldest to newest, drop until we fit
|
|
240
|
+
while (totalTokens > targetTokens && start < clusters.length - 1) {
|
|
241
|
+
totalTokens -= clusters[start].tokenCost;
|
|
242
|
+
start++;
|
|
243
|
+
}
|
|
244
|
+
return clusters.slice(start).flatMap(cluster => cluster.messages);
|
|
245
|
+
}
|
|
99
246
|
/**
|
|
100
|
-
*
|
|
101
|
-
*
|
|
247
|
+
* Canonical history must remain lossless for tool turns.
|
|
248
|
+
*
|
|
249
|
+
* If a window contains any structured tool calls or tool results, the caller
|
|
250
|
+
* should treat applyToolGradientToWindow() as a view-only transform for the
|
|
251
|
+
* current compose pass and avoid writing the reshaped messages back into the
|
|
252
|
+
* canonical cache/history store.
|
|
102
253
|
*/
|
|
103
|
-
function
|
|
104
|
-
|
|
105
|
-
return [];
|
|
106
|
-
const lower = userMessage.toLowerCase();
|
|
107
|
-
return triggers.filter(t => t.keywords.some(kw => lower.includes(kw.toLowerCase())));
|
|
254
|
+
export function canPersistReshapedHistory(messages) {
|
|
255
|
+
return !messages.some(msg => hasToolContent(msg));
|
|
108
256
|
}
|
|
109
257
|
/**
|
|
110
258
|
* Rough token estimation: ~4 chars per token for English text.
|
|
@@ -135,154 +283,490 @@ function estimateMessageTokens(msg) {
|
|
|
135
283
|
tokens += 4;
|
|
136
284
|
return tokens;
|
|
137
285
|
}
|
|
286
|
+
function parseToolArgs(argumentsJson) {
|
|
287
|
+
try {
|
|
288
|
+
return JSON.parse(argumentsJson);
|
|
289
|
+
}
|
|
290
|
+
catch {
|
|
291
|
+
return {};
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
function toolLabelFromCall(name, args) {
|
|
295
|
+
switch (name) {
|
|
296
|
+
case 'read':
|
|
297
|
+
return `read ${(args.path ?? args.file_path ?? args.filePath ?? 'file')}`;
|
|
298
|
+
case 'write':
|
|
299
|
+
return `write ${(args.path ?? args.file ?? args.filePath ?? 'file')}`;
|
|
300
|
+
case 'edit':
|
|
301
|
+
return `edit ${(args.path ?? args.file ?? args.filePath ?? 'file')}`;
|
|
302
|
+
case 'exec':
|
|
303
|
+
return `exec ${String(args.command ?? '').slice(0, 80) || 'command'}`;
|
|
304
|
+
case 'web_search':
|
|
305
|
+
return `web_search ${String(args.query ?? '').slice(0, 80) || 'query'}`;
|
|
306
|
+
case 'web_fetch':
|
|
307
|
+
return `web_fetch ${String(args.url ?? '').slice(0, 80) || 'url'}`;
|
|
308
|
+
case 'sessions_send':
|
|
309
|
+
return `sessions_send ${String(args.sessionKey ?? args.label ?? '').slice(0, 80) || 'target'}`;
|
|
310
|
+
case 'memory_search':
|
|
311
|
+
return `memory_search ${String(args.query ?? '').slice(0, 80) || 'query'}`;
|
|
312
|
+
default:
|
|
313
|
+
return name;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Strip OpenClaw's external-content security wrapper from tool results before truncation.
|
|
318
|
+
* web_fetch results are wrapped in <<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT ... >>> blocks.
|
|
319
|
+
* That preamble consumes the entire head budget in truncateWithHeadTail, leaving only
|
|
320
|
+
* the security notice + last sentence visible — the actual body becomes the middle marker.
|
|
321
|
+
* Strip the wrapper first so truncation operates on the real content.
|
|
322
|
+
*/
|
|
323
|
+
function stripSecurityPreamble(content) {
|
|
324
|
+
// Match: <<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT id="...">\n...\n<<<END_EXTERNAL_UNTRUSTED_CONTENT id="...">>>
|
|
325
|
+
// Strip opening tag line and closing tag line; keep the content between.
|
|
326
|
+
const stripped = content.replace(/^[\s\S]*?<<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT[^\n]*>>>?\n?/, '').replace(/\n?<<<END_EXTERNAL_UNTRUSTED_CONTENT[^\n]*>>>?[\s\S]*$/, '');
|
|
327
|
+
// If stripping removed everything or nearly everything, return original.
|
|
328
|
+
return stripped.trim().length > 20 ? stripped.trim() : content;
|
|
329
|
+
}
|
|
330
|
+
// Minimum floor: if trimming would leave less than 30% of original content, return a
|
|
331
|
+
// stripped sentinel instead of a misleading fragment. A partial result that looks
|
|
332
|
+
// complete is worse than a clear signal that the result was dropped.
|
|
333
|
+
// Applied only in applyTierPayloadCap (pressure-driven trimming), not in structural
|
|
334
|
+
// truncation paths where head+tail is always semantically useful.
|
|
335
|
+
const TOOL_GRADIENT_MIN_USEFUL_FRACTION = 0.30;
|
|
336
|
+
function truncateWithHeadTail(content, maxChars, maxTailChars = TOOL_GRADIENT_MAX_TAIL_CHARS) {
|
|
337
|
+
if (content.length <= maxChars)
|
|
338
|
+
return content;
|
|
339
|
+
const tailBudget = Math.min(Math.floor(maxChars * 0.30), maxTailChars);
|
|
340
|
+
const headBudget = Math.max(0, maxChars - tailBudget - TOOL_GRADIENT_MIDDLE_MARKER.length);
|
|
341
|
+
return content.slice(0, headBudget) + TOOL_GRADIENT_MIDDLE_MARKER + content.slice(-tailBudget);
|
|
342
|
+
}
|
|
343
|
+
function truncateHead(content, maxChars) {
|
|
344
|
+
if (content.length <= maxChars)
|
|
345
|
+
return content;
|
|
346
|
+
const marker = '…';
|
|
347
|
+
const keep = Math.max(0, maxChars - marker.length);
|
|
348
|
+
return content.slice(0, keep) + marker;
|
|
349
|
+
}
|
|
350
|
+
function firstNonEmptyLine(content) {
|
|
351
|
+
const line = content.split('\n').find(l => l.trim().length > 0) ?? '';
|
|
352
|
+
return line.trim();
|
|
353
|
+
}
|
|
354
|
+
function normalizeInline(text) {
|
|
355
|
+
return text.replace(/\s+/g, ' ').trim();
|
|
356
|
+
}
|
|
357
|
+
function hostFromUrl(raw) {
|
|
358
|
+
try {
|
|
359
|
+
return new URL(raw).hostname;
|
|
360
|
+
}
|
|
361
|
+
catch {
|
|
362
|
+
return raw;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
function extractTopHeading(content) {
|
|
366
|
+
const heading = content.split('\n').find(line => /^#{1,3}\s+/.test(line.trim()));
|
|
367
|
+
return heading ? heading.replace(/^#{1,3}\s+/, '').trim() : '';
|
|
368
|
+
}
|
|
369
|
+
function extractExitCode(content) {
|
|
370
|
+
const match = content.match(/(?:exit code|exit|code)\s*[:=]?\s*(\d+)/i);
|
|
371
|
+
return match ? match[1] : null;
|
|
372
|
+
}
|
|
373
|
+
function estimateSearchResultCount(content) {
|
|
374
|
+
const jsonMatch = content.match(/"results"\s*:\s*\[/);
|
|
375
|
+
if (jsonMatch) {
|
|
376
|
+
const titles = content.match(/"title"\s*:/g);
|
|
377
|
+
if (titles?.length)
|
|
378
|
+
return titles.length;
|
|
379
|
+
}
|
|
380
|
+
const resultLines = content.match(/\bSource:\b|\bsiteName\b|\btitle\b/gi);
|
|
381
|
+
return resultLines?.length ? Math.min(resultLines.length, 20) : null;
|
|
382
|
+
}
|
|
383
|
+
function summarizeOutcome(label, content, maxChars) {
|
|
384
|
+
const firstLine = firstNonEmptyLine(content);
|
|
385
|
+
const base = firstLine ? `${label} — ${firstLine}` : `${label} — ${content.length} chars`;
|
|
386
|
+
return truncateHead(base, maxChars);
|
|
387
|
+
}
|
|
388
|
+
function summarizeToolInteraction(name, args, content, maxChars, compact = false) {
|
|
389
|
+
const line = normalizeInline(firstNonEmptyLine(content));
|
|
390
|
+
switch (name) {
|
|
391
|
+
case 'read': {
|
|
392
|
+
const path = String(args.path ?? args.file_path ?? args.filePath ?? 'file');
|
|
393
|
+
const heading = extractTopHeading(content);
|
|
394
|
+
const detail = heading || line || `${content.length} chars`;
|
|
395
|
+
return truncateHead(`Read ${path} — ${detail}`, maxChars);
|
|
396
|
+
}
|
|
397
|
+
case 'exec': {
|
|
398
|
+
const cmd = String(args.command ?? 'command').slice(0, compact ? 40 : 80);
|
|
399
|
+
const exitCode = extractExitCode(content);
|
|
400
|
+
const status = exitCode ? `exit ${exitCode}` : (/(error|failed|timeout|timed out)/i.test(content) ? 'failed' : 'completed');
|
|
401
|
+
const detail = line && !/^exit\s+\d+$/i.test(line) ? `, ${line}` : '';
|
|
402
|
+
return truncateHead(`Ran ${cmd} — ${status}${detail}`, maxChars);
|
|
403
|
+
}
|
|
404
|
+
case 'web_search': {
|
|
405
|
+
const query = String(args.query ?? 'query').slice(0, compact ? 40 : 80);
|
|
406
|
+
const count = estimateSearchResultCount(content);
|
|
407
|
+
const heading = extractTopHeading(content);
|
|
408
|
+
const detail = heading || line;
|
|
409
|
+
const countText = count ? ` — ${count} results` : '';
|
|
410
|
+
const summary = compact
|
|
411
|
+
? `Searched '${query}'${countText}`
|
|
412
|
+
: `Searched '${query}'${countText}${detail ? `, top: ${detail}` : ''}`;
|
|
413
|
+
return truncateHead(summary, maxChars);
|
|
414
|
+
}
|
|
415
|
+
case 'web_fetch': {
|
|
416
|
+
const url = String(args.url ?? 'url');
|
|
417
|
+
const host = hostFromUrl(url);
|
|
418
|
+
const heading = extractTopHeading(content);
|
|
419
|
+
const detail = heading || line || `${content.length} chars`;
|
|
420
|
+
return truncateHead(`Fetched ${host} — ${detail}`, maxChars);
|
|
421
|
+
}
|
|
422
|
+
case 'memory_search': {
|
|
423
|
+
const query = String(args.query ?? 'query').slice(0, compact ? 40 : 80);
|
|
424
|
+
const count = estimateSearchResultCount(content);
|
|
425
|
+
return truncateHead(`Searched memory for '${query}'${count ? ` — ${count} hits` : ''}${line ? `, top: ${line}` : ''}`, maxChars);
|
|
426
|
+
}
|
|
427
|
+
default: {
|
|
428
|
+
const label = toolLabelFromCall(name, args);
|
|
429
|
+
return compact
|
|
430
|
+
? truncateHead(`${label} — ${line || `${content.length} chars`}`, maxChars)
|
|
431
|
+
: (() => {
|
|
432
|
+
const prefix = `[${label}] `;
|
|
433
|
+
const available = Math.max(40, maxChars - prefix.length);
|
|
434
|
+
return prefix + truncateWithHeadTail(content, available);
|
|
435
|
+
})();
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
function buildTier2Envelope(label, content, maxChars, name, args) {
|
|
440
|
+
if (name && args)
|
|
441
|
+
return summarizeToolInteraction(name, args, content, maxChars, false);
|
|
442
|
+
const prefix = `[${label}] `;
|
|
443
|
+
const available = Math.max(40, maxChars - prefix.length);
|
|
444
|
+
return prefix + truncateWithHeadTail(content, available);
|
|
445
|
+
}
|
|
446
|
+
function buildTier3Envelope(label, content, maxChars, name, args) {
|
|
447
|
+
if (name && args)
|
|
448
|
+
return `[${summarizeToolInteraction(name, args, content, maxChars - 2, true)}]`;
|
|
449
|
+
return `[${summarizeOutcome(label, content, maxChars - 2)}]`;
|
|
450
|
+
}
|
|
138
451
|
/**
|
|
139
452
|
* Extract a heuristic prose summary from a tool call/result pair.
|
|
140
|
-
*
|
|
141
|
-
* Used for Tier 2 tool treatment in applyToolGradient().
|
|
453
|
+
* Used when tool payloads are removed but continuity should remain.
|
|
142
454
|
*/
|
|
143
|
-
function extractToolProseSummary(msg) {
|
|
455
|
+
function extractToolProseSummary(msg, perResultCap, compact = false) {
|
|
144
456
|
const parts = [];
|
|
145
457
|
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
|
146
458
|
for (const tc of msg.toolCalls) {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
args = JSON.parse(tc.arguments);
|
|
150
|
-
}
|
|
151
|
-
catch { /* best-effort */ }
|
|
459
|
+
const args = parseToolArgs(tc.arguments);
|
|
460
|
+
const label = toolLabelFromCall(tc.name, args);
|
|
152
461
|
const resultContent = msg.toolResults?.find(r => r.callId === tc.id)?.content ?? '';
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
}
|
|
160
|
-
case 'write': {
|
|
161
|
-
const p = (args.path ?? args.file ?? args.filePath ?? '');
|
|
162
|
-
parts.push(p ? `Wrote ${p}${resultKB ? ` (${resultKB})` : ''}` : 'Wrote a file');
|
|
163
|
-
break;
|
|
164
|
-
}
|
|
165
|
-
case 'edit': {
|
|
166
|
-
const p = (args.path ?? args.file ?? args.filePath ?? '');
|
|
167
|
-
parts.push(p ? `Edited ${p}` : 'Edited a file');
|
|
168
|
-
break;
|
|
169
|
-
}
|
|
170
|
-
case 'exec': {
|
|
171
|
-
const cmd = (args.command ?? '').slice(0, 60);
|
|
172
|
-
const firstLine = resultContent.split('\n')[0]?.slice(0, 80) ?? '';
|
|
173
|
-
parts.push(cmd ? `Ran ${cmd}${firstLine ? ` — ${firstLine}` : ''}` : 'Ran a command');
|
|
174
|
-
break;
|
|
175
|
-
}
|
|
176
|
-
case 'web_search': {
|
|
177
|
-
const q = (args.query ?? '');
|
|
178
|
-
parts.push(q ? `Searched '${q.slice(0, 60)}'` : 'Searched the web');
|
|
179
|
-
break;
|
|
180
|
-
}
|
|
181
|
-
case 'web_fetch': {
|
|
182
|
-
const u = (args.url ?? '');
|
|
183
|
-
parts.push(u ? `Fetched ${u.slice(0, 80)}` : 'Fetched a URL');
|
|
184
|
-
break;
|
|
185
|
-
}
|
|
186
|
-
case 'sessions_send': {
|
|
187
|
-
const target = (args.sessionKey ?? args.label ?? '');
|
|
188
|
-
parts.push(target ? `Sent message to ${target}` : 'Sent an inter-session message');
|
|
189
|
-
break;
|
|
190
|
-
}
|
|
191
|
-
case 'memory_search': {
|
|
192
|
-
const q = (args.query ?? '');
|
|
193
|
-
parts.push(q ? `Searched memory for '${q.slice(0, 60)}'` : 'Searched memory');
|
|
194
|
-
break;
|
|
195
|
-
}
|
|
196
|
-
default:
|
|
197
|
-
parts.push(`Used ${tc.name}`);
|
|
462
|
+
if (resultContent) {
|
|
463
|
+
parts.push(compact
|
|
464
|
+
? buildTier3Envelope(label, resultContent, perResultCap, tc.name, args)
|
|
465
|
+
: buildTier2Envelope(label, resultContent, perResultCap, tc.name, args));
|
|
466
|
+
}
|
|
467
|
+
else {
|
|
468
|
+
parts.push(compact ? `[${truncateHead(label, perResultCap - 2)}]` : label);
|
|
198
469
|
}
|
|
199
470
|
}
|
|
200
471
|
}
|
|
201
472
|
else if (msg.toolResults && msg.toolResults.length > 0) {
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
473
|
+
for (const tr of msg.toolResults) {
|
|
474
|
+
const label = tr.name || 'tool_result';
|
|
475
|
+
const args = {};
|
|
476
|
+
parts.push(compact
|
|
477
|
+
? buildTier3Envelope(label, tr.content ?? '', perResultCap, tr.name || 'tool_result', args)
|
|
478
|
+
: buildTier2Envelope(label, tr.content ?? '', perResultCap, tr.name || 'tool_result', args));
|
|
479
|
+
}
|
|
205
480
|
}
|
|
206
|
-
return parts.join('; ');
|
|
481
|
+
return truncateHead(parts.join('; '), Math.max(perResultCap, 120));
|
|
482
|
+
}
|
|
483
|
+
function appendToolSummary(textContent, summary) {
|
|
484
|
+
const existing = textContent ?? '';
|
|
485
|
+
if (!summary)
|
|
486
|
+
return existing;
|
|
487
|
+
return existing ? `${existing}\n[Tools: ${summary}]` : summary;
|
|
488
|
+
}
|
|
489
|
+
function getTurnAge(messages, index) {
|
|
490
|
+
let turnAge = 0;
|
|
491
|
+
for (let i = messages.length - 1; i > index; i--) {
|
|
492
|
+
const candidate = messages[i];
|
|
493
|
+
if (candidate?.role === 'user' && (!candidate.toolResults || candidate.toolResults.length === 0)) {
|
|
494
|
+
turnAge++;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
return turnAge;
|
|
498
|
+
}
|
|
499
|
+
function hasToolContent(msg) {
|
|
500
|
+
return Boolean((msg.toolCalls && msg.toolCalls.length > 0) || (msg.toolResults && msg.toolResults.length > 0));
|
|
501
|
+
}
|
|
502
|
+
function resolveToolPlanningWindow(totalWindowTokens) {
|
|
503
|
+
const actualWindow = totalWindowTokens && totalWindowTokens > 0
|
|
504
|
+
? totalWindowTokens
|
|
505
|
+
: TOOL_PLANNING_BASELINE_WINDOW;
|
|
506
|
+
return Math.min(actualWindow, TOOL_PLANNING_BASELINE_WINDOW);
|
|
507
|
+
}
|
|
508
|
+
function computeToolPressureState(messages, totalWindowTokens) {
|
|
509
|
+
const planningWindowTokens = resolveToolPlanningWindow(totalWindowTokens);
|
|
510
|
+
const reserveTokens = Math.max(TOOL_PLANNING_MIN_RESERVE_TOKENS, Math.floor(planningWindowTokens * 0.10));
|
|
511
|
+
const usedTokens = messages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
|
|
512
|
+
const projectedTokens = usedTokens + reserveTokens;
|
|
513
|
+
const occupancy = planningWindowTokens > 0 ? projectedTokens / planningWindowTokens : 1;
|
|
514
|
+
let zone = 'green';
|
|
515
|
+
if (occupancy > TOOL_PRESSURE_RED)
|
|
516
|
+
zone = 'red';
|
|
517
|
+
else if (occupancy > TOOL_PRESSURE_ORANGE)
|
|
518
|
+
zone = 'orange';
|
|
519
|
+
else if (occupancy > TOOL_PRESSURE_YELLOW)
|
|
520
|
+
zone = 'yellow';
|
|
521
|
+
return {
|
|
522
|
+
planningWindowTokens,
|
|
523
|
+
reserveTokens,
|
|
524
|
+
projectedTokens,
|
|
525
|
+
occupancy,
|
|
526
|
+
zone,
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
function isStructuredTrimNote(content) {
|
|
530
|
+
return content.startsWith(TOOL_TRIM_NOTE_PREFIX);
|
|
531
|
+
}
|
|
532
|
+
function buildRecentTrimNote(originalChars, keptHeadChars, keptTailChars, pressure, resultId) {
|
|
533
|
+
const parts = [
|
|
534
|
+
TOOL_TRIM_NOTE_PREFIX,
|
|
535
|
+
'partial_result=true',
|
|
536
|
+
'reason=oversize_turn0_trim',
|
|
537
|
+
`original_chars=${originalChars}`,
|
|
538
|
+
`kept_head_chars=${keptHeadChars}`,
|
|
539
|
+
`kept_tail_chars=${keptTailChars}`,
|
|
540
|
+
`projected_occupancy_pct=${Math.round(pressure.occupancy * 100)}`,
|
|
541
|
+
`planning_window_tokens=${pressure.planningWindowTokens}`,
|
|
542
|
+
`reserve_tokens=${pressure.reserveTokens}`,
|
|
543
|
+
'retry_recommended=true',
|
|
544
|
+
];
|
|
545
|
+
if (resultId)
|
|
546
|
+
parts.push(`result_id=${resultId}`);
|
|
547
|
+
parts.push(']');
|
|
548
|
+
return parts.join(' ');
|
|
549
|
+
}
|
|
550
|
+
function countHeadTailChars(content) {
|
|
551
|
+
const markerIdx = content.indexOf(TOOL_GRADIENT_MIDDLE_MARKER);
|
|
552
|
+
if (markerIdx === -1) {
|
|
553
|
+
return { headChars: content.length, tailChars: 0 };
|
|
554
|
+
}
|
|
555
|
+
return {
|
|
556
|
+
headChars: markerIdx,
|
|
557
|
+
tailChars: content.length - markerIdx - TOOL_GRADIENT_MIDDLE_MARKER.length,
|
|
558
|
+
};
|
|
559
|
+
}
|
|
560
|
+
function trimRecentToolResult(content, pressure, resultId) {
|
|
561
|
+
if (isStructuredTrimNote(content))
|
|
562
|
+
return content;
|
|
563
|
+
const stripped = stripSecurityPreamble(content);
|
|
564
|
+
const baseOriginal = stripped.length > 0 ? stripped : content;
|
|
565
|
+
const noteSkeleton = buildRecentTrimNote(baseOriginal.length, 0, 0, pressure, resultId);
|
|
566
|
+
const availableChars = Math.max(2_000, TOOL_RECENT_OVERSIZE_TARGET_CHARS - noteSkeleton.length - 1);
|
|
567
|
+
const truncated = truncateWithHeadTail(baseOriginal, availableChars, TOOL_RECENT_OVERSIZE_MAX_TAIL_CHARS);
|
|
568
|
+
const { headChars, tailChars } = countHeadTailChars(truncated);
|
|
569
|
+
const note = buildRecentTrimNote(baseOriginal.length, headChars, tailChars, pressure, resultId);
|
|
570
|
+
return `${note}
|
|
571
|
+
${truncated}`;
|
|
572
|
+
}
|
|
573
|
+
function protectRecentToolContent(msg, pressure) {
|
|
574
|
+
if (!msg.toolResults || msg.toolResults.length === 0)
|
|
575
|
+
return msg;
|
|
576
|
+
const shouldEmergencyTrim = pressure.zone === 'orange' || pressure.zone === 'red';
|
|
577
|
+
const toolResults = msg.toolResults.map(result => {
|
|
578
|
+
const content = result.content ?? '';
|
|
579
|
+
if (!content)
|
|
580
|
+
return result;
|
|
581
|
+
if (!shouldEmergencyTrim)
|
|
582
|
+
return result;
|
|
583
|
+
if (content.length <= TOOL_RECENT_OVERSIZE_CHAR_THRESHOLD)
|
|
584
|
+
return result;
|
|
585
|
+
return {
|
|
586
|
+
...result,
|
|
587
|
+
content: trimRecentToolResult(content, pressure, result.callId || result.name || undefined),
|
|
588
|
+
};
|
|
589
|
+
});
|
|
590
|
+
return { ...msg, toolResults };
|
|
591
|
+
}
|
|
592
|
+
function applyTierPayloadCap(msg, perResultCap, perTurnCap, usedSoFar = 0, maxTailChars = TOOL_GRADIENT_MAX_TAIL_CHARS) {
|
|
593
|
+
const toolResults = msg.toolResults?.map(result => {
|
|
594
|
+
let content = result.content ?? '';
|
|
595
|
+
if (content.length > perResultCap) {
|
|
596
|
+
// Strip security preamble before truncation so it doesn't consume the head budget.
|
|
597
|
+
// web_fetch results wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> blocks would otherwise
|
|
598
|
+
// render the truncated result as: [security notice] + [middle marker] + [last line].
|
|
599
|
+
const stripped = stripSecurityPreamble(content);
|
|
600
|
+
// Floor check (TUNE-015): if the cap would leave less than 30% of the stripped content
|
|
601
|
+
// AND less than 2000 chars absolute, return a sentinel instead of a misleading fragment.
|
|
602
|
+
// Partial results that look complete are worse than a clear dropped-result signal.
|
|
603
|
+
// The absolute floor prevents the sentinel from firing on large natural truncations
|
|
604
|
+
// (e.g., 110k → 16k is a meaningful slice, not a misleading fragment).
|
|
605
|
+
if (perResultCap < stripped.length * TOOL_GRADIENT_MIN_USEFUL_FRACTION && perResultCap < 2_000) {
|
|
606
|
+
content = `[result too large for current context budget \u2014 ${stripped.length} chars stripped]`;
|
|
607
|
+
}
|
|
608
|
+
else {
|
|
609
|
+
// Reserve space for the \n[trimmed] marker within the cap so the total
|
|
610
|
+
// content length stays within perResultCap and doesn't overflow the
|
|
611
|
+
// per-turn aggregate cap when multiple results are truncated.
|
|
612
|
+
const TRIMMED_MARKER = '\n[trimmed]';
|
|
613
|
+
content = truncateWithHeadTail(stripped, perResultCap - TRIMMED_MARKER.length, maxTailChars) + TRIMMED_MARKER;
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
return { ...result, content };
|
|
617
|
+
}) ?? null;
|
|
618
|
+
let usedChars = usedSoFar + (toolResults?.reduce((sum, r) => sum + (r.content?.length ?? 0), 0) ?? 0);
|
|
619
|
+
if (perTurnCap != null && usedChars > perTurnCap) {
|
|
620
|
+
const downgradeSummary = extractToolProseSummary(msg, TOOL_GRADIENT_T2_CHAR_CAP, false);
|
|
621
|
+
return {
|
|
622
|
+
msg: {
|
|
623
|
+
...msg,
|
|
624
|
+
textContent: appendToolSummary(msg.textContent, downgradeSummary),
|
|
625
|
+
toolCalls: null,
|
|
626
|
+
toolResults: null,
|
|
627
|
+
},
|
|
628
|
+
usedChars: usedSoFar + downgradeSummary.length,
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
return {
|
|
632
|
+
msg: { ...msg, toolResults },
|
|
633
|
+
usedChars,
|
|
634
|
+
};
|
|
207
635
|
}
|
|
208
636
|
/**
|
|
209
|
-
*
|
|
637
|
+
* Evict tool results exceeding 800 tokens (~3200 chars) before the history
|
|
638
|
+
* budget-fit loop. Large stale results waste budget; replace them with a
|
|
639
|
+
* stub so consumers know the result existed and can re-run if needed.
|
|
210
640
|
*
|
|
211
|
-
*
|
|
212
|
-
*
|
|
213
|
-
|
|
214
|
-
|
|
641
|
+
* Applied to the already-gradient-processed history before window selection.
|
|
642
|
+
* Does NOT affect turn 0 or turn 1.
|
|
643
|
+
*/
|
|
644
|
+
const TOOL_RESULT_EVICTION_CHAR_THRESHOLD = 3_200; // ~800 tokens at 4 chars/token
|
|
645
|
+
function evictLargeToolResults(messages) {
|
|
646
|
+
return messages.map((msg, idx) => {
|
|
647
|
+
// Never evict from the protected recent-turn window.
|
|
648
|
+
const turnAge = getTurnAge(messages, idx);
|
|
649
|
+
if (turnAge <= TOOL_GRADIENT_T0_TURNS)
|
|
650
|
+
return msg;
|
|
651
|
+
if (!msg.toolResults || msg.toolResults.length === 0)
|
|
652
|
+
return msg;
|
|
653
|
+
const evicted = msg.toolResults.map(result => {
|
|
654
|
+
const content = result.content ?? '';
|
|
655
|
+
if (content.length <= TOOL_RESULT_EVICTION_CHAR_THRESHOLD)
|
|
656
|
+
return result;
|
|
657
|
+
const approxKTokens = Math.round(content.length / 4 / 1000);
|
|
658
|
+
return {
|
|
659
|
+
...result,
|
|
660
|
+
content: `[tool result evicted: ~${approxKTokens}k tokens \u2014 use memory_search or re-run if needed]`,
|
|
661
|
+
};
|
|
662
|
+
});
|
|
663
|
+
return { ...msg, toolResults: evicted };
|
|
664
|
+
});
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Apply gradient tool treatment to a message array.
|
|
215
668
|
*
|
|
216
|
-
*
|
|
217
|
-
*
|
|
218
|
-
* the actual cost that will be submitted, not the pre-transform cost.
|
|
669
|
+
* Tiers are based on turn age, where turn age is the number of newer user
|
|
670
|
+
* messages after the current message.
|
|
219
671
|
*/
|
|
220
|
-
function applyToolGradient(messages,
|
|
221
|
-
let toolPairsSeen = 0;
|
|
222
|
-
// Walk newest→oldest to assign tiers, transform in place (new objects)
|
|
672
|
+
function applyToolGradient(messages, opts) {
|
|
223
673
|
const result = [...messages];
|
|
674
|
+
const pressure = computeToolPressureState(messages, opts?.totalWindowTokens);
|
|
675
|
+
const perTurnUsage = new Map();
|
|
224
676
|
for (let i = result.length - 1; i >= 0; i--) {
|
|
225
677
|
const msg = result[i];
|
|
226
|
-
|
|
227
|
-
(msg.toolResults && msg.toolResults.length > 0);
|
|
228
|
-
if (!hasToolContent)
|
|
229
|
-
continue;
|
|
230
|
-
toolPairsSeen++;
|
|
231
|
-
if (toolPairsSeen <= maxRecentToolPairs) {
|
|
232
|
-
// Tier 1: verbatim — no change
|
|
678
|
+
if (!hasToolContent(msg))
|
|
233
679
|
continue;
|
|
680
|
+
const turnAge = getTurnAge(result, i);
|
|
681
|
+
const usage = perTurnUsage.get(turnAge) ?? { t0: 0, t1: 0, t2: 0, t3: 0 };
|
|
682
|
+
if (turnAge <= TOOL_GRADIENT_T0_TURNS) {
|
|
683
|
+
// T0/T1: preserve full recent tool results unless we hit the conservative
|
|
684
|
+
// orange/red pressure zones and the payload itself is oversized (>40k).
|
|
685
|
+
result[i] = protectRecentToolContent(msg, pressure);
|
|
686
|
+
}
|
|
687
|
+
else if (turnAge <= TOOL_GRADIENT_T1_TURNS) {
|
|
688
|
+
const capped = applyTierPayloadCap(msg, TOOL_GRADIENT_T1_CHAR_CAP, TOOL_GRADIENT_T1_TURN_CAP, usage.t1);
|
|
689
|
+
usage.t1 = capped.usedChars;
|
|
690
|
+
result[i] = capped.msg;
|
|
234
691
|
}
|
|
235
|
-
else if (
|
|
236
|
-
|
|
237
|
-
const
|
|
692
|
+
else if (turnAge <= TOOL_GRADIENT_T2_TURNS) {
|
|
693
|
+
const summary = extractToolProseSummary(msg, TOOL_GRADIENT_T2_CHAR_CAP, false);
|
|
694
|
+
const allowed = Math.max(0, TOOL_GRADIENT_T2_TURN_CAP - usage.t2);
|
|
695
|
+
const boundedSummary = truncateHead(summary, Math.min(TOOL_GRADIENT_T2_CHAR_CAP, allowed || TOOL_GRADIENT_T3_CHAR_CAP));
|
|
696
|
+
usage.t2 += boundedSummary.length;
|
|
238
697
|
result[i] = {
|
|
239
698
|
...msg,
|
|
240
|
-
textContent:
|
|
699
|
+
textContent: appendToolSummary(msg.textContent, boundedSummary),
|
|
241
700
|
toolCalls: null,
|
|
242
701
|
toolResults: null,
|
|
243
702
|
};
|
|
244
703
|
}
|
|
245
704
|
else {
|
|
246
|
-
|
|
705
|
+
const summary = extractToolProseSummary(msg, TOOL_GRADIENT_T3_CHAR_CAP, true);
|
|
706
|
+
const allowed = Math.max(0, TOOL_GRADIENT_T3_TURN_CAP - usage.t3);
|
|
707
|
+
const boundedSummary = truncateHead(summary, Math.min(TOOL_GRADIENT_T3_CHAR_CAP, allowed || TOOL_GRADIENT_T3_CHAR_CAP));
|
|
708
|
+
usage.t3 += boundedSummary.length;
|
|
247
709
|
result[i] = {
|
|
248
710
|
...msg,
|
|
711
|
+
textContent: appendToolSummary(msg.textContent, boundedSummary),
|
|
249
712
|
toolCalls: null,
|
|
250
713
|
toolResults: null,
|
|
251
714
|
};
|
|
252
715
|
}
|
|
716
|
+
perTurnUsage.set(turnAge, usage);
|
|
253
717
|
}
|
|
254
718
|
return result;
|
|
255
719
|
}
|
|
720
|
+
/** Guard: logRegistryStartup() fires only once per process, not per instance. */
|
|
721
|
+
let _registryLogged = false;
|
|
256
722
|
export class Compositor {
|
|
257
723
|
config;
|
|
258
|
-
|
|
724
|
+
cache;
|
|
259
725
|
vectorStore;
|
|
260
726
|
libraryDb;
|
|
261
727
|
triggerRegistry;
|
|
728
|
+
/** Cached org registry loaded from fleet_agents at construction time. */
|
|
729
|
+
_orgRegistry;
|
|
262
730
|
constructor(deps, config) {
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
this.
|
|
270
|
-
|
|
271
|
-
else {
|
|
272
|
-
this.redis = deps.redis;
|
|
273
|
-
this.vectorStore = deps.vectorStore || null;
|
|
274
|
-
this.libraryDb = deps.libraryDb || null;
|
|
275
|
-
this.triggerRegistry = deps.triggerRegistry || DEFAULT_TRIGGERS;
|
|
276
|
-
}
|
|
731
|
+
this.cache = deps.cache;
|
|
732
|
+
this.vectorStore = deps.vectorStore || null;
|
|
733
|
+
this.libraryDb = deps.libraryDb || null;
|
|
734
|
+
this.triggerRegistry = deps.triggerRegistry || DEFAULT_TRIGGERS;
|
|
735
|
+
// Load org registry from DB on init; fall back to hardcoded if DB empty.
|
|
736
|
+
this._orgRegistry = this.libraryDb
|
|
737
|
+
? buildOrgRegistryFromDb(this.libraryDb)
|
|
738
|
+
: defaultOrgRegistry();
|
|
277
739
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
740
|
+
if (!_registryLogged) {
|
|
741
|
+
logRegistryStartup();
|
|
742
|
+
_registryLogged = true;
|
|
743
|
+
}
|
|
278
744
|
}
|
|
279
745
|
/**
|
|
280
746
|
* Set or replace the vector store after construction.
|
|
281
|
-
* Called by
|
|
747
|
+
* Called by hypermem.create() once sqlite-vec is confirmed available.
|
|
282
748
|
*/
|
|
283
749
|
setVectorStore(vs) {
|
|
284
750
|
this.vectorStore = vs;
|
|
285
751
|
}
|
|
752
|
+
/**
|
|
753
|
+
* Hot-reload the org registry from the fleet_agents table.
|
|
754
|
+
* Call after fleet membership changes (new agent, org restructure)
|
|
755
|
+
* to pick up the latest without a full restart.
|
|
756
|
+
* Falls back to the current cached registry if the DB is unavailable.
|
|
757
|
+
*/
|
|
758
|
+
refreshOrgRegistry() {
|
|
759
|
+
if (this.libraryDb) {
|
|
760
|
+
this._orgRegistry = buildOrgRegistryFromDb(this.libraryDb);
|
|
761
|
+
}
|
|
762
|
+
return this._orgRegistry;
|
|
763
|
+
}
|
|
764
|
+
/**
|
|
765
|
+
* Return the currently cached org registry.
|
|
766
|
+
*/
|
|
767
|
+
get orgRegistry() {
|
|
768
|
+
return this._orgRegistry;
|
|
769
|
+
}
|
|
286
770
|
/**
|
|
287
771
|
* Compose a complete message array for sending to an LLM.
|
|
288
772
|
*
|
|
@@ -300,7 +784,17 @@ export class Compositor {
|
|
|
300
784
|
async compose(request, db, libraryDb) {
|
|
301
785
|
const store = new MessageStore(db);
|
|
302
786
|
const libDb = libraryDb || this.libraryDb;
|
|
303
|
-
|
|
787
|
+
// Dynamic reserve: use a lightweight SQLite sample to estimate avg turn cost
|
|
788
|
+
// BEFORE assembling the full context. This gives us the reserve fraction we
|
|
789
|
+
// need to compute the effective token budget at the start of compose.
|
|
790
|
+
// Full history assembly happens later in the pipeline.
|
|
791
|
+
const totalWindow = resolveModelWindow(request.model, this.config.defaultTokenBudget);
|
|
792
|
+
const sampleConv = store.getConversation(request.sessionKey);
|
|
793
|
+
const sampleMessages = sampleConv
|
|
794
|
+
? store.getRecentMessages(sampleConv.id, 40)
|
|
795
|
+
: [];
|
|
796
|
+
const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
|
|
797
|
+
const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve);
|
|
304
798
|
let remaining = budget;
|
|
305
799
|
const warnings = [];
|
|
306
800
|
const slots = {
|
|
@@ -338,9 +832,88 @@ export class Compositor {
|
|
|
338
832
|
slots.identity = tokens;
|
|
339
833
|
remaining -= tokens;
|
|
340
834
|
}
|
|
835
|
+
// ─── Stable Output Profile Prefix ──────────────────────────
|
|
836
|
+
// Keep deterministic output instructions on the static side of the cache
|
|
837
|
+
// boundary so Anthropic and OpenAI warm-prefix caching can reuse them.
|
|
838
|
+
if (remaining > 100 && request.includeLibrary !== false) {
|
|
839
|
+
const fosEnabled = this.config?.enableFOS !== false;
|
|
840
|
+
const modEnabled = this.config?.enableMOD !== false;
|
|
841
|
+
const outputTier = resolveOutputTier((this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
|
|
842
|
+
const stableOutputParts = [];
|
|
843
|
+
if (outputTier.tier === 'light') {
|
|
844
|
+
stableOutputParts.push(renderLightFOS().join('\n'));
|
|
845
|
+
}
|
|
846
|
+
else if (libDb) {
|
|
847
|
+
if (outputTier.fos) {
|
|
848
|
+
const fos = getActiveFOS(libDb);
|
|
849
|
+
if (fos) {
|
|
850
|
+
const fosContent = renderFOS(fos).join('\n');
|
|
851
|
+
if (fosContent.trim())
|
|
852
|
+
stableOutputParts.push(fosContent);
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
if (outputTier.mod) {
|
|
856
|
+
const mod = matchMOD(request.model, libDb);
|
|
857
|
+
if (mod) {
|
|
858
|
+
const modContent = renderMOD(mod, null, request.model || '').join('\n');
|
|
859
|
+
if (modContent.trim())
|
|
860
|
+
stableOutputParts.push(modContent);
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
if (stableOutputParts.length > 0) {
|
|
865
|
+
const stableOutputContent = stableOutputParts.join('\n\n');
|
|
866
|
+
const stableOutputTokens = estimateTokens(stableOutputContent);
|
|
867
|
+
if (stableOutputTokens <= remaining) {
|
|
868
|
+
messages.push({
|
|
869
|
+
role: 'system',
|
|
870
|
+
textContent: stableOutputContent,
|
|
871
|
+
toolCalls: null,
|
|
872
|
+
toolResults: null,
|
|
873
|
+
});
|
|
874
|
+
slots.system += stableOutputTokens;
|
|
875
|
+
remaining -= stableOutputTokens;
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
}
|
|
341
879
|
// ─── Conversation History ──────────────────────────────────
|
|
880
|
+
let diagCrossTopicKeystones = 0;
|
|
881
|
+
// Hoisted: activeTopicId/name resolved inside history block, used for window dual-write (VS-1) and wiki page injection
|
|
882
|
+
let composedActiveTopicId;
|
|
883
|
+
let composedActiveTopicName;
|
|
342
884
|
if (request.includeHistory !== false) {
|
|
343
|
-
|
|
885
|
+
// P3.4: Look up the active topic for this session (non-fatal)
|
|
886
|
+
let activeTopicId;
|
|
887
|
+
let activeTopic;
|
|
888
|
+
if (!request.topicId) {
|
|
889
|
+
try {
|
|
890
|
+
const topicMap = new SessionTopicMap(db);
|
|
891
|
+
activeTopic = topicMap.getActiveTopic(request.sessionKey) || undefined;
|
|
892
|
+
if (activeTopic)
|
|
893
|
+
activeTopicId = activeTopic.id;
|
|
894
|
+
}
|
|
895
|
+
catch {
|
|
896
|
+
// Topic lookup is best-effort — fall back to full history
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
else {
|
|
900
|
+
activeTopicId = request.topicId;
|
|
901
|
+
try {
|
|
902
|
+
activeTopic = db.prepare(`
|
|
903
|
+
SELECT id, name
|
|
904
|
+
FROM topics
|
|
905
|
+
WHERE session_key = ? AND id = ?
|
|
906
|
+
LIMIT 1
|
|
907
|
+
`).get(request.sessionKey, request.topicId);
|
|
908
|
+
}
|
|
909
|
+
catch {
|
|
910
|
+
// Topic lookup is best-effort — fall back to ID-only history fetch
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
// Hoist resolved topic id+name so the window dual-write and wiki injection sections can access them
|
|
914
|
+
composedActiveTopicId = activeTopicId;
|
|
915
|
+
composedActiveTopicName = activeTopic?.name;
|
|
916
|
+
const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, request.historyDepth || this.config.maxHistoryMessages, store, activeTopicId);
|
|
344
917
|
// Deduplicate history by StoredMessage.id (second line of defense after
|
|
345
918
|
// pushHistory() tail-check dedup). Guards against any duplicates that
|
|
346
919
|
// slipped through the warm path — e.g. bootstrap re-runs on existing sessions.
|
|
@@ -359,24 +932,141 @@ export class Compositor {
|
|
|
359
932
|
// This ensures estimateMessageTokens() measures actual submission cost,
|
|
360
933
|
// not pre-transform cost (which caused overflow: dense tool JSON was
|
|
361
934
|
// undercounted at length/4 when it should be measured post-stub).
|
|
362
|
-
const transformedHistory = applyToolGradient(historyMessages,
|
|
363
|
-
// ──
|
|
364
|
-
//
|
|
935
|
+
const transformedHistory = applyToolGradient(historyMessages, { totalWindowTokens: totalWindow });
|
|
936
|
+
// ── Evict large tool results (>800 tokens) before window selection ─────
|
|
937
|
+
// Replace oversized stale results with stubs so they don't burn budget.
|
|
938
|
+
// Current-turn results (turn age 0) are never evicted.
|
|
939
|
+
const evictedHistory = evictLargeToolResults(transformedHistory);
|
|
940
|
+
// ── Budget-fit: walk newest→oldest, drop whole clusters ─────────────
|
|
941
|
+
// Group tool_use + tool_result messages into clusters so they are kept
|
|
942
|
+
// or dropped as a unit. Breaking mid-cluster creates orphaned tool
|
|
943
|
+
// pairs that repairToolPairs has to strip downstream — wasting budget
|
|
944
|
+
// and leaving gaps in conversation continuity.
|
|
945
|
+
const budgetClusters = clusterNeutralMessages(evictedHistory);
|
|
365
946
|
let historyTokens = 0;
|
|
366
|
-
const
|
|
367
|
-
for (let i =
|
|
368
|
-
const
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
warnings.push(`History truncated at
|
|
947
|
+
const includedClusters = [];
|
|
948
|
+
for (let i = budgetClusters.length - 1; i >= 0; i--) {
|
|
949
|
+
const cluster = budgetClusters[i];
|
|
950
|
+
if (historyTokens + cluster.tokenCost > remaining && includedClusters.length > 0) {
|
|
951
|
+
const droppedMsgCount = budgetClusters.slice(0, i + 1).reduce((s, c) => s + c.messages.length, 0);
|
|
952
|
+
warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)`);
|
|
372
953
|
break;
|
|
373
954
|
}
|
|
374
|
-
|
|
375
|
-
historyTokens +=
|
|
955
|
+
includedClusters.unshift(cluster);
|
|
956
|
+
historyTokens += cluster.tokenCost;
|
|
957
|
+
}
|
|
958
|
+
const includedHistory = includedClusters.flatMap(c => c.messages);
|
|
959
|
+
// ── Keystone History Slot (P2.1) ──────────────────────────────────
|
|
960
|
+
// For long conversations (≥30 messages), inject high-signal older messages
|
|
961
|
+
// from before the recent window as recalled context. This lets the model
|
|
962
|
+
// see key decisions and specs that happened earlier in the conversation
|
|
963
|
+
// without them consuming the full recent history budget.
|
|
964
|
+
const keystoneFraction = this.config.keystoneHistoryFraction ?? 0.2;
|
|
965
|
+
const keystoneMaxMsgs = this.config.keystoneMaxMessages ?? 15;
|
|
966
|
+
let keystoneMessages = [];
|
|
967
|
+
let keystoneTokens = 0;
|
|
968
|
+
if (includedHistory.length >= 30 && keystoneFraction > 0) {
|
|
969
|
+
const keystoneResult = await this.buildKeystones(db, request.agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, request.prompt, libDb || undefined);
|
|
970
|
+
if (keystoneResult) {
|
|
971
|
+
keystoneMessages = keystoneResult.keystoneMessages;
|
|
972
|
+
keystoneTokens = keystoneResult.keystoneTokens;
|
|
973
|
+
// Replace includedHistory and historyTokens with the trimmed versions
|
|
974
|
+
// (keystoneResult reflects the trimming done inside buildKeystones)
|
|
975
|
+
includedHistory.splice(0, includedHistory.length, ...keystoneResult.trimmedHistory);
|
|
976
|
+
historyTokens = keystoneResult.trimmedHistoryTokens;
|
|
977
|
+
warnings.push(`Keystone: injected ${keystoneMessages.length} recalled messages`);
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
// ── Cross-Topic Keystones (P3.5) ──────────────────────────────────
|
|
981
|
+
// Pull high-signal messages from OTHER topics in this session when their
|
|
982
|
+
// content is semantically relevant to the current topic. Non-fatal.
|
|
983
|
+
let crossTopicMessages = [];
|
|
984
|
+
let crossTopicTokens = 0;
|
|
985
|
+
if (activeTopic && this.vectorStore) {
|
|
986
|
+
try {
|
|
987
|
+
const rawCrossTopicKeystones = await this.getKeystonesByTopic(request.agentId, request.sessionKey, activeTopic, includedHistory, db, 3);
|
|
988
|
+
if (rawCrossTopicKeystones.length > 0) {
|
|
989
|
+
// Token budget: cap the full cross-topic block at 15% of remaining,
|
|
990
|
+
// including the header line.
|
|
991
|
+
const crossTopicHeaderTokens = estimateTokens('## Cross-Topic Context');
|
|
992
|
+
const crossTopicBudget = Math.max(0, Math.floor(remaining * 0.15) - crossTopicHeaderTokens);
|
|
993
|
+
let used = 0;
|
|
994
|
+
for (const candidate of rawCrossTopicKeystones) {
|
|
995
|
+
const msg = {
|
|
996
|
+
role: candidate.role,
|
|
997
|
+
textContent: candidate.content,
|
|
998
|
+
toolCalls: null,
|
|
999
|
+
toolResults: null,
|
|
1000
|
+
};
|
|
1001
|
+
const msgTokens = estimateMessageTokens(msg);
|
|
1002
|
+
if (used + msgTokens > crossTopicBudget)
|
|
1003
|
+
continue;
|
|
1004
|
+
crossTopicMessages.push(msg);
|
|
1005
|
+
used += msgTokens;
|
|
1006
|
+
}
|
|
1007
|
+
crossTopicTokens = used;
|
|
1008
|
+
diagCrossTopicKeystones = crossTopicMessages.length;
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
catch {
|
|
1012
|
+
// Cross-topic retrieval is non-fatal — never block compose
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
// Push history with keystone separators if we have keystones.
|
|
1016
|
+
if (keystoneMessages.length > 0 || crossTopicMessages.length > 0) {
|
|
1017
|
+
// Cross-topic context (from other topics) — prepended before within-session keystones
|
|
1018
|
+
if (crossTopicMessages.length > 0) {
|
|
1019
|
+
messages.push({
|
|
1020
|
+
role: 'system',
|
|
1021
|
+
textContent: '## Cross-Topic Context',
|
|
1022
|
+
toolCalls: null,
|
|
1023
|
+
toolResults: null,
|
|
1024
|
+
});
|
|
1025
|
+
messages.push(...crossTopicMessages);
|
|
1026
|
+
}
|
|
1027
|
+
// Separator before recalled context (within-session keystones)
|
|
1028
|
+
if (keystoneMessages.length > 0) {
|
|
1029
|
+
messages.push({
|
|
1030
|
+
role: 'system',
|
|
1031
|
+
textContent: '## Recalled Context (high-signal older messages)',
|
|
1032
|
+
toolCalls: null,
|
|
1033
|
+
toolResults: null,
|
|
1034
|
+
});
|
|
1035
|
+
messages.push(...keystoneMessages);
|
|
1036
|
+
}
|
|
1037
|
+
// Separator before recent conversation
|
|
1038
|
+
messages.push({
|
|
1039
|
+
role: 'system',
|
|
1040
|
+
textContent: '## Recent Conversation',
|
|
1041
|
+
toolCalls: null,
|
|
1042
|
+
toolResults: null,
|
|
1043
|
+
});
|
|
1044
|
+
messages.push(...includedHistory);
|
|
1045
|
+
// Account for separator tokens in history slot
|
|
1046
|
+
const crossTopicSepTokens = crossTopicMessages.length > 0
|
|
1047
|
+
? estimateTokens('## Cross-Topic Context')
|
|
1048
|
+
: 0;
|
|
1049
|
+
const keystoneSepTokens = keystoneMessages.length > 0
|
|
1050
|
+
? estimateTokens('## Recalled Context (high-signal older messages)')
|
|
1051
|
+
: 0;
|
|
1052
|
+
const recentSepTokens = estimateTokens('## Recent Conversation');
|
|
1053
|
+
const sepTokens = crossTopicSepTokens + keystoneSepTokens + recentSepTokens;
|
|
1054
|
+
slots.history = historyTokens + keystoneTokens + crossTopicTokens + sepTokens;
|
|
1055
|
+
remaining -= (historyTokens + keystoneTokens + crossTopicTokens + sepTokens);
|
|
1056
|
+
}
|
|
1057
|
+
else {
|
|
1058
|
+
messages.push(...includedHistory);
|
|
1059
|
+
slots.history = historyTokens;
|
|
1060
|
+
remaining -= historyTokens;
|
|
1061
|
+
}
|
|
1062
|
+
// targetBudgetFraction cap: limit total context slots to a fraction of the
|
|
1063
|
+
// effective budget. This gives operators a single knob to make the system
|
|
1064
|
+
// lighter without tuning individual slot fractions.
|
|
1065
|
+
const targetFraction = this.config.targetBudgetFraction ?? 0.65;
|
|
1066
|
+
const contextCap = Math.floor(budget * targetFraction);
|
|
1067
|
+
if (remaining > contextCap) {
|
|
1068
|
+
remaining = contextCap;
|
|
376
1069
|
}
|
|
377
|
-
messages.push(...includedHistory);
|
|
378
|
-
slots.history = historyTokens;
|
|
379
|
-
remaining -= historyTokens;
|
|
380
1070
|
// T1.3: Ghost message suppression.
|
|
381
1071
|
// If the last message in the included history is a warm-seeded user message
|
|
382
1072
|
// AND there's a subsequent message in SQLite that wasn't included (meaning
|
|
@@ -409,30 +1099,162 @@ export class Compositor {
|
|
|
409
1099
|
// conversation history (after system/identity).
|
|
410
1100
|
const contextParts = [];
|
|
411
1101
|
let contextTokens = 0;
|
|
412
|
-
// ──
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
1102
|
+
// ── Compose-level diagnostics tracking vars ──────────────
|
|
1103
|
+
let diagTriggerHits = 0;
|
|
1104
|
+
let diagTriggerFallbackUsed = false;
|
|
1105
|
+
let diagFactsIncluded = 0;
|
|
1106
|
+
let diagSemanticResults = 0;
|
|
1107
|
+
let diagDocChunkCollections = 0;
|
|
1108
|
+
let diagScopeFiltered = 0;
|
|
1109
|
+
let diagRetrievalMode = 'none';
|
|
1110
|
+
// ── Wiki Page (L4: Library — active topic synthesis) ──────
|
|
1111
|
+
// Inject synthesized wiki page for the active topic before general knowledge.
|
|
1112
|
+
// Token budget: capped at 15% of remaining.
|
|
1113
|
+
if (request.includeLibrary !== false && remaining > 300 && libDb && composedActiveTopicName) {
|
|
1114
|
+
const wikiContent = this.buildWikiPageContext(request.agentId, composedActiveTopicName, libDb);
|
|
1115
|
+
if (wikiContent) {
|
|
1116
|
+
const tokens = estimateTokens(wikiContent);
|
|
1117
|
+
const cap = Math.floor(remaining * 0.15);
|
|
1118
|
+
if (tokens <= cap) {
|
|
1119
|
+
contextParts.push(wikiContent);
|
|
419
1120
|
contextTokens += tokens;
|
|
420
1121
|
remaining -= tokens;
|
|
421
|
-
slots.
|
|
1122
|
+
slots.library += tokens;
|
|
422
1123
|
}
|
|
423
1124
|
else {
|
|
424
|
-
|
|
425
|
-
const truncated = this.truncateToTokens(factsContent, Math.floor(remaining * 0.3));
|
|
1125
|
+
const truncated = this.truncateToTokens(wikiContent, cap);
|
|
426
1126
|
const truncTokens = estimateTokens(truncated);
|
|
427
|
-
contextParts.push(
|
|
1127
|
+
contextParts.push(truncated);
|
|
428
1128
|
contextTokens += truncTokens;
|
|
429
1129
|
remaining -= truncTokens;
|
|
430
|
-
slots.
|
|
431
|
-
|
|
1130
|
+
slots.library += truncTokens;
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
// ── Facts (L4: Library) ──────────────────────────────────
|
|
1135
|
+
// scope: agent — filtered by agentId via filterByScope after fetch
|
|
1136
|
+
if (request.includeFacts !== false && remaining > 500) {
|
|
1137
|
+
const factsContent = this.buildFactsFromDb(request.agentId, request.sessionKey, libDb || db);
|
|
1138
|
+
if (factsContent !== null) {
|
|
1139
|
+
const [content, factCount, scopeFiltered] = factsContent;
|
|
1140
|
+
diagFactsIncluded += factCount;
|
|
1141
|
+
diagScopeFiltered += scopeFiltered;
|
|
1142
|
+
if (content) {
|
|
1143
|
+
const tokens = estimateTokens(content);
|
|
1144
|
+
if (tokens <= remaining * 0.25) { // Cap facts at 25% of remaining (W4: was 0.3)
|
|
1145
|
+
contextParts.push(`## Active Facts\n${content}`);
|
|
1146
|
+
contextTokens += tokens;
|
|
1147
|
+
remaining -= tokens;
|
|
1148
|
+
slots.facts = tokens;
|
|
1149
|
+
}
|
|
1150
|
+
else {
|
|
1151
|
+
// Truncate to budget
|
|
1152
|
+
const truncated = this.truncateToTokens(content, Math.floor(remaining * 0.25));
|
|
1153
|
+
const truncTokens = estimateTokens(truncated);
|
|
1154
|
+
contextParts.push(`## Active Facts (truncated)\n${truncated}`);
|
|
1155
|
+
contextTokens += truncTokens;
|
|
1156
|
+
remaining -= truncTokens;
|
|
1157
|
+
slots.facts = truncTokens;
|
|
1158
|
+
warnings.push('Facts truncated to fit budget');
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
// ── Temporal retrieval (L4: Library) ─────────────────────
|
|
1163
|
+
// Fires when the query has temporal signals (before/after/when/last etc).
|
|
1164
|
+
// Returns facts in time order from temporal_index. Deduplicates against
|
|
1165
|
+
// facts already included above. Uses ingest_at as occurred_at proxy (v1).
|
|
1166
|
+
const queryText = request.prompt ?? '';
|
|
1167
|
+
if (queryText && hasTemporalSignals(queryText) && libDb && remaining > 300) {
|
|
1168
|
+
try {
|
|
1169
|
+
const temporalStore = new TemporalStore(libDb);
|
|
1170
|
+
const temporalFacts = temporalStore.timeRangeQuery({
|
|
1171
|
+
agentId: request.agentId,
|
|
1172
|
+
limit: 15,
|
|
1173
|
+
order: 'DESC',
|
|
1174
|
+
});
|
|
1175
|
+
if (temporalFacts.length > 0) {
|
|
1176
|
+
// Deduplicate against facts already in context
|
|
1177
|
+
const existingContent = contextParts.join('\n');
|
|
1178
|
+
const novel = temporalFacts.filter(f => !existingContent.includes(f.content.slice(0, 60)));
|
|
1179
|
+
if (novel.length > 0) {
|
|
1180
|
+
const temporalBlock = novel
|
|
1181
|
+
.map(f => {
|
|
1182
|
+
const ts = new Date(f.occurredAt).toISOString().slice(0, 10);
|
|
1183
|
+
return `[${ts}] ${f.content}`;
|
|
1184
|
+
})
|
|
1185
|
+
.join('\n');
|
|
1186
|
+
const temporalSection = `## Temporal Context\n${temporalBlock}`;
|
|
1187
|
+
const tempTokens = estimateTokens(temporalSection);
|
|
1188
|
+
const tempBudget = Math.floor(remaining * 0.20); // Cap at 20% of remaining
|
|
1189
|
+
if (tempTokens <= tempBudget) {
|
|
1190
|
+
contextParts.push(temporalSection);
|
|
1191
|
+
contextTokens += tempTokens;
|
|
1192
|
+
remaining -= tempTokens;
|
|
1193
|
+
slots.facts = (slots.facts ?? 0) + tempTokens;
|
|
1194
|
+
}
|
|
1195
|
+
else {
|
|
1196
|
+
const truncated = this.truncateToTokens(temporalSection, tempBudget);
|
|
1197
|
+
const truncTokens = estimateTokens(truncated);
|
|
1198
|
+
contextParts.push(truncated);
|
|
1199
|
+
contextTokens += truncTokens;
|
|
1200
|
+
remaining -= truncTokens;
|
|
1201
|
+
slots.facts = (slots.facts ?? 0) + truncTokens;
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
catch {
|
|
1207
|
+
// Temporal index not yet available (migration pending) — skip silently
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
// ── Open-domain FTS retrieval (L4: Library) ──────────────────
|
|
1211
|
+
// Fires when the query looks broad/exploratory with no topical anchor.
|
|
1212
|
+
// Searches raw messages_fts — bypasses isQualityFact() quality gate so
|
|
1213
|
+
// content filtered from library.db is still reachable for open-domain
|
|
1214
|
+
// questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
|
|
1215
|
+
if (queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
|
|
1216
|
+
try {
|
|
1217
|
+
const existingContent = contextParts.join('\n');
|
|
1218
|
+
const odResults = searchOpenDomain(db, queryText, existingContent, 10);
|
|
1219
|
+
if (odResults.length > 0) {
|
|
1220
|
+
const odBlock = odResults
|
|
1221
|
+
.map(r => {
|
|
1222
|
+
const ts = r.createdAt
|
|
1223
|
+
? new Date(r.createdAt).toISOString().slice(0, 10)
|
|
1224
|
+
: '';
|
|
1225
|
+
const prefix = ts ? `[${ts}] ` : '';
|
|
1226
|
+
const snippet = r.content.length > 300
|
|
1227
|
+
? r.content.slice(0, 300) + '…'
|
|
1228
|
+
: r.content;
|
|
1229
|
+
return `${prefix}${snippet}`;
|
|
1230
|
+
})
|
|
1231
|
+
.join('\n');
|
|
1232
|
+
const odSection = `## Open Domain Context\n${odBlock}`;
|
|
1233
|
+
const odTokens = estimateTokens(odSection);
|
|
1234
|
+
const odBudget = Math.floor(remaining * 0.20); // Cap at 20% of remaining
|
|
1235
|
+
if (odTokens <= odBudget) {
|
|
1236
|
+
contextParts.push(odSection);
|
|
1237
|
+
contextTokens += odTokens;
|
|
1238
|
+
remaining -= odTokens;
|
|
1239
|
+
slots.facts = (slots.facts ?? 0) + odTokens;
|
|
1240
|
+
}
|
|
1241
|
+
else {
|
|
1242
|
+
const truncated = this.truncateToTokens(odSection, odBudget);
|
|
1243
|
+
const truncTokens = estimateTokens(truncated);
|
|
1244
|
+
contextParts.push(truncated);
|
|
1245
|
+
contextTokens += truncTokens;
|
|
1246
|
+
remaining -= truncTokens;
|
|
1247
|
+
slots.facts = (slots.facts ?? 0) + truncTokens;
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
catch {
|
|
1252
|
+
// Open-domain FTS unavailable — skip silently
|
|
432
1253
|
}
|
|
433
1254
|
}
|
|
434
1255
|
}
|
|
435
1256
|
// ── Knowledge (L4: Library) ──────────────────────────────
|
|
1257
|
+
// scope: agent — filtered by agent_id in the SQL query (existing behavior)
|
|
436
1258
|
if (request.includeLibrary !== false && remaining > 500 && libDb) {
|
|
437
1259
|
const knowledgeContent = this.buildKnowledgeFromDb(request.agentId, libDb);
|
|
438
1260
|
if (knowledgeContent) {
|
|
@@ -455,6 +1277,7 @@ export class Compositor {
|
|
|
455
1277
|
}
|
|
456
1278
|
}
|
|
457
1279
|
// ── Preferences (L4: Library) ────────────────────────────
|
|
1280
|
+
// scope: agent — filtered by agent_id OR NULL in the SQL query (existing behavior)
|
|
458
1281
|
if (request.includeLibrary !== false && remaining > 300 && libDb) {
|
|
459
1282
|
const prefsContent = this.buildPreferencesFromDb(request.agentId, libDb);
|
|
460
1283
|
if (prefsContent) {
|
|
@@ -468,6 +1291,7 @@ export class Compositor {
|
|
|
468
1291
|
}
|
|
469
1292
|
}
|
|
470
1293
|
// ── Semantic Recall (L3: Hybrid FTS5+KNN) ───────────────
|
|
1294
|
+
// scope: agent — buildSemanticRecall filters by agentId internally
|
|
471
1295
|
// Fires when either vector store or library DB is available.
|
|
472
1296
|
// FTS5-only (no embeddings) still returns keyword matches.
|
|
473
1297
|
// KNN-only (no FTS terms) still returns semantic matches.
|
|
@@ -479,8 +1303,18 @@ export class Compositor {
|
|
|
479
1303
|
const lastUserMsg = request.prompt?.trim() || this.getLastUserMessage(messages);
|
|
480
1304
|
if (lastUserMsg) {
|
|
481
1305
|
try {
|
|
482
|
-
|
|
483
|
-
|
|
1306
|
+
// Check Redis for a pre-computed embedding from afterTurn()
|
|
1307
|
+
let precomputedEmbedding;
|
|
1308
|
+
try {
|
|
1309
|
+
const cached = await this.cache.getQueryEmbedding(request.agentId, request.sessionKey);
|
|
1310
|
+
if (cached)
|
|
1311
|
+
precomputedEmbedding = cached;
|
|
1312
|
+
}
|
|
1313
|
+
catch {
|
|
1314
|
+
// Redis lookup is best-effort — fall through to Ollama
|
|
1315
|
+
}
|
|
1316
|
+
const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId, Math.floor(remaining * 0.12), // Cap at 12% of remaining (W4: was 0.15)
|
|
1317
|
+
libDb || undefined, precomputedEmbedding);
|
|
484
1318
|
if (semanticContent) {
|
|
485
1319
|
const tokens = estimateTokens(semanticContent);
|
|
486
1320
|
contextParts.push(`## Related Memory\n${semanticContent}`);
|
|
@@ -488,6 +1322,8 @@ export class Compositor {
|
|
|
488
1322
|
remaining -= tokens;
|
|
489
1323
|
// Semantic recall draws from multiple sources, attribute to context
|
|
490
1324
|
slots.context += tokens;
|
|
1325
|
+
// W3 diagnostics: count non-empty lines as rough results count
|
|
1326
|
+
diagSemanticResults = semanticContent.split('\n').filter(l => l.trim().length > 0).length;
|
|
491
1327
|
}
|
|
492
1328
|
}
|
|
493
1329
|
catch (err) {
|
|
@@ -497,21 +1333,32 @@ export class Compositor {
|
|
|
497
1333
|
}
|
|
498
1334
|
}
|
|
499
1335
|
// ── Doc Chunks (L4: Trigger-based retrieval) ─────────────
|
|
1336
|
+
// scope: per-tier/per-agent — queryChunks filters by agentId and tier
|
|
500
1337
|
// Demand-load governance, identity, and memory chunks based on
|
|
501
1338
|
// conversation context. Replaces full ACA file injection for
|
|
502
1339
|
// the files that have been seeded into the doc chunk index.
|
|
1340
|
+
let triggerFallbackUsed = false;
|
|
503
1341
|
if (request.includeDocChunks !== false && remaining > 400 && libDb) {
|
|
504
1342
|
// Use request.prompt when available (current-turn text, not stale history)
|
|
505
1343
|
const lastMsg = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
|
|
506
1344
|
const triggered = matchTriggers(lastMsg, this.triggerRegistry);
|
|
507
1345
|
if (triggered.length > 0) {
|
|
1346
|
+
diagTriggerHits = triggered.length;
|
|
1347
|
+
diagRetrievalMode = 'triggered';
|
|
508
1348
|
const docChunkStore = new DocChunkStore(libDb);
|
|
509
1349
|
const docParts = [];
|
|
1350
|
+
const maxTotalTriggerTokens = Math.min(remaining, this.config.maxTotalTriggerTokens && this.config.maxTotalTriggerTokens > 0
|
|
1351
|
+
? this.config.maxTotalTriggerTokens
|
|
1352
|
+
: Math.floor(remaining * 0.40));
|
|
1353
|
+
let totalTriggerTokens = 0;
|
|
510
1354
|
for (const trigger of triggered) {
|
|
511
1355
|
if (remaining < 200)
|
|
512
1356
|
break;
|
|
513
|
-
const
|
|
514
|
-
)
|
|
1357
|
+
const triggerBudgetRemaining = maxTotalTriggerTokens - totalTriggerTokens;
|
|
1358
|
+
if (triggerBudgetRemaining < 200)
|
|
1359
|
+
break;
|
|
1360
|
+
const maxTokens = Math.min(trigger.maxTokens || 1000, Math.floor(remaining * 0.12), // No single collection takes > 12% of remaining (W4: was 0.15)
|
|
1361
|
+
triggerBudgetRemaining);
|
|
515
1362
|
try {
|
|
516
1363
|
// Build a relevance-based FTS5 query from the user message.
|
|
517
1364
|
//
|
|
@@ -569,9 +1416,11 @@ export class Compositor {
|
|
|
569
1416
|
if (chunkLines.length > 0) {
|
|
570
1417
|
const collectionLabel = trigger.collection.split('/').pop() || trigger.collection;
|
|
571
1418
|
docParts.push(`## ${collectionLabel} (retrieved)\n${chunkLines.join('\n\n')}`);
|
|
1419
|
+
totalTriggerTokens += chunkTokens;
|
|
572
1420
|
contextTokens += chunkTokens;
|
|
573
1421
|
remaining -= chunkTokens;
|
|
574
1422
|
slots.library += chunkTokens;
|
|
1423
|
+
diagDocChunkCollections++;
|
|
575
1424
|
}
|
|
576
1425
|
}
|
|
577
1426
|
catch {
|
|
@@ -582,6 +1431,61 @@ export class Compositor {
|
|
|
582
1431
|
contextParts.push(docParts.join('\n\n'));
|
|
583
1432
|
}
|
|
584
1433
|
}
|
|
1434
|
+
else if (remaining > 400 && (this.vectorStore || libDb)) {
|
|
1435
|
+
// Trigger-miss fallback: no trigger fired — attempt bounded semantic retrieval
|
|
1436
|
+
// so there is never a silent zero-memory path on doc chunks.
|
|
1437
|
+
// INVARIANT: this block is mutually exclusive with triggered-retrieval above.
|
|
1438
|
+
// If refactored to run both paths, cap combined semantic budget to avoid double-recall.
|
|
1439
|
+
try {
|
|
1440
|
+
const fallbackContent = await Promise.race([
|
|
1441
|
+
this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined),
|
|
1442
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
|
|
1443
|
+
]);
|
|
1444
|
+
if (fallbackContent) {
|
|
1445
|
+
contextParts.push(`## Related Memory\n${fallbackContent}`);
|
|
1446
|
+
const fallbackTokens = estimateTokens(fallbackContent);
|
|
1447
|
+
contextTokens += fallbackTokens;
|
|
1448
|
+
remaining -= fallbackTokens;
|
|
1449
|
+
slots.context += fallbackTokens;
|
|
1450
|
+
triggerFallbackUsed = true;
|
|
1451
|
+
diagTriggerFallbackUsed = true;
|
|
1452
|
+
diagRetrievalMode = 'fallback_knn';
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
1455
|
+
catch {
|
|
1456
|
+
// Fallback is best-effort — never fail composition (includes timeout)
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
// ── Session-Scoped Doc Chunks (spawn context inheritance) ────
|
|
1461
|
+
// When parentSessionKey is set, retrieve ephemeral doc chunks indexed
|
|
1462
|
+
// by buildSpawnContext() for this spawn session.
|
|
1463
|
+
if (request.parentSessionKey && remaining > 200 && libDb) {
|
|
1464
|
+
try {
|
|
1465
|
+
const spawnChunkStore = new DocChunkStore(libDb);
|
|
1466
|
+
const spawnQueryMsg = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
|
|
1467
|
+
const spawnChunks = spawnChunkStore.queryDocChunks(request.agentId, spawnQueryMsg, { sessionKey: request.parentSessionKey, limit: 8 });
|
|
1468
|
+
if (spawnChunks.length > 0) {
|
|
1469
|
+
const spawnLines = [];
|
|
1470
|
+
let spawnTokens = 0;
|
|
1471
|
+
const maxSpawnTokens = Math.floor(remaining * 0.15);
|
|
1472
|
+
for (const chunk of spawnChunks) {
|
|
1473
|
+
if (spawnTokens + chunk.tokenEstimate > maxSpawnTokens)
|
|
1474
|
+
break;
|
|
1475
|
+
spawnLines.push(chunk.content);
|
|
1476
|
+
spawnTokens += chunk.tokenEstimate;
|
|
1477
|
+
}
|
|
1478
|
+
if (spawnLines.length > 0) {
|
|
1479
|
+
contextParts.push(`## Spawn Context Documents\n${spawnLines.join('\n\n')}`);
|
|
1480
|
+
contextTokens += spawnTokens;
|
|
1481
|
+
remaining -= spawnTokens;
|
|
1482
|
+
slots.library += spawnTokens;
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
catch {
|
|
1487
|
+
// Session-scoped chunk retrieval is best-effort
|
|
1488
|
+
}
|
|
585
1489
|
}
|
|
586
1490
|
// ── Cross-Session Context (L2: Messages) ─────────────────
|
|
587
1491
|
if (request.includeContext !== false && remaining > 500) {
|
|
@@ -606,6 +1510,21 @@ export class Compositor {
|
|
|
606
1510
|
}
|
|
607
1511
|
}
|
|
608
1512
|
}
|
|
1513
|
+
// ── Action Verification Summary ─────────────────────────
|
|
1514
|
+
// Keep recent action history on the dynamic side of the cache boundary.
|
|
1515
|
+
if (remaining > 50 && request.includeLibrary !== false) {
|
|
1516
|
+
const pressurePct = budget > 0 ? Math.round(((budget - remaining) / budget) * 100) : 0;
|
|
1517
|
+
const actionSummary = buildActionVerificationSummary(messages, pressurePct);
|
|
1518
|
+
if (actionSummary) {
|
|
1519
|
+
const actionTokens = Math.ceil(actionSummary.length / 4);
|
|
1520
|
+
if (actionTokens <= remaining) {
|
|
1521
|
+
contextParts.push(actionSummary);
|
|
1522
|
+
contextTokens += actionTokens;
|
|
1523
|
+
remaining -= actionTokens;
|
|
1524
|
+
slots.context += actionTokens;
|
|
1525
|
+
}
|
|
1526
|
+
}
|
|
1527
|
+
}
|
|
609
1528
|
// ── Inject assembled context block ──────────────────────
|
|
610
1529
|
const assembledContextBlock = contextParts.length > 0 ? contextParts.join('\n\n') : undefined;
|
|
611
1530
|
if (assembledContextBlock) {
|
|
@@ -680,16 +1599,44 @@ export class Compositor {
|
|
|
680
1599
|
}
|
|
681
1600
|
}
|
|
682
1601
|
const totalTokens = budget - remaining;
|
|
683
|
-
// ───
|
|
1602
|
+
// ─── Slot reconciliation ─────────────────────────────────────────────────
|
|
1603
|
+
// totalTokens = budget - remaining is the authoritative spend figure.
|
|
1604
|
+
// The slot accounting can drift from this due to history trim (which
|
|
1605
|
+
// reduces slots.history but adds back to remaining after the budget
|
|
1606
|
+
// was already committed) and FOS/MOD token rounding.
|
|
1607
|
+
// Reconcile: assign any unaccounted tokens to slots.history so that
|
|
1608
|
+
// sum(slots) === totalTokens always holds.
|
|
1609
|
+
{
|
|
1610
|
+
const slotSum = (slots.system ?? 0) + (slots.identity ?? 0) +
|
|
1611
|
+
(slots.history ?? 0) + (slots.facts ?? 0) +
|
|
1612
|
+
(slots.context ?? 0) + (slots.library ?? 0);
|
|
1613
|
+
const delta = totalTokens - slotSum;
|
|
1614
|
+
if (delta !== 0) {
|
|
1615
|
+
slots.history = (slots.history ?? 0) + delta;
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
// ─── Write Window Cache ─────────────────────────────
|
|
684
1619
|
// Cache the composed message array so the plugin can serve it directly
|
|
685
1620
|
// on the next assemble() call without re-running the full compose pipeline.
|
|
686
1621
|
// Short TTL (120s) — invalidated by afterTurn when new messages arrive.
|
|
1622
|
+
//
|
|
1623
|
+
// VS-1: Dual-write — session-scoped key for backwards compat;
|
|
1624
|
+
// topic-scoped key for per-topic window retrieval when activeTopicId is set.
|
|
687
1625
|
try {
|
|
688
|
-
await this.
|
|
1626
|
+
await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
|
|
689
1627
|
}
|
|
690
1628
|
catch {
|
|
691
1629
|
// Window cache write is best-effort
|
|
692
1630
|
}
|
|
1631
|
+
// VS-1: Topic-scoped window dual-write
|
|
1632
|
+
if (composedActiveTopicId) {
|
|
1633
|
+
try {
|
|
1634
|
+
await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
|
|
1635
|
+
}
|
|
1636
|
+
catch {
|
|
1637
|
+
// Topic window write is best-effort
|
|
1638
|
+
}
|
|
1639
|
+
}
|
|
693
1640
|
// ─── Write Session Cursor ─────────────────────────────────
|
|
694
1641
|
// Record the newest message included in the submission window.
|
|
695
1642
|
// Background indexer uses this to find unprocessed high-signal content.
|
|
@@ -707,7 +1654,7 @@ export class Compositor {
|
|
|
707
1654
|
windowSize: historyMsgs.length,
|
|
708
1655
|
tokenCount: totalTokens,
|
|
709
1656
|
};
|
|
710
|
-
await this.
|
|
1657
|
+
await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
|
|
711
1658
|
// Dual-write cursor to SQLite for durability across Redis eviction (P1.3)
|
|
712
1659
|
try {
|
|
713
1660
|
db.prepare(`
|
|
@@ -764,6 +1711,49 @@ export class Compositor {
|
|
|
764
1711
|
warnings.push('Compaction fence update failed (non-fatal)');
|
|
765
1712
|
}
|
|
766
1713
|
}
|
|
1714
|
+
// W3: Build compose diagnostics
|
|
1715
|
+
let zeroResultReason;
|
|
1716
|
+
if (contextParts.length === 0) {
|
|
1717
|
+
if (diagScopeFiltered > 0 && diagFactsIncluded === 0 && diagSemanticResults === 0) {
|
|
1718
|
+
zeroResultReason = 'scope_filtered_all';
|
|
1719
|
+
}
|
|
1720
|
+
else if (remaining <= 0) {
|
|
1721
|
+
zeroResultReason = 'budget_exhausted';
|
|
1722
|
+
}
|
|
1723
|
+
else if (diagTriggerHits === 0 && !diagTriggerFallbackUsed) {
|
|
1724
|
+
zeroResultReason = 'no_trigger_no_fallback';
|
|
1725
|
+
}
|
|
1726
|
+
else if ((diagTriggerHits > 0 || diagTriggerFallbackUsed) && diagFactsIncluded === 0 && diagSemanticResults === 0 && diagDocChunkCollections === 0) {
|
|
1727
|
+
// Retrieval was attempted (trigger fired or fallback ran) but returned nothing — likely a retrieval bug
|
|
1728
|
+
// rather than a genuinely empty corpus. Distinguish from 'empty_corpus' for observability.
|
|
1729
|
+
zeroResultReason = 'unknown';
|
|
1730
|
+
}
|
|
1731
|
+
else {
|
|
1732
|
+
zeroResultReason = 'empty_corpus';
|
|
1733
|
+
}
|
|
1734
|
+
}
|
|
1735
|
+
const diagnostics = {
|
|
1736
|
+
triggerHits: diagTriggerHits,
|
|
1737
|
+
triggerFallbackUsed: diagTriggerFallbackUsed,
|
|
1738
|
+
factsIncluded: diagFactsIncluded,
|
|
1739
|
+
semanticResultsIncluded: diagSemanticResults,
|
|
1740
|
+
docChunksCollections: diagDocChunkCollections,
|
|
1741
|
+
scopeFiltered: diagScopeFiltered,
|
|
1742
|
+
zeroResultReason,
|
|
1743
|
+
retrievalMode: diagRetrievalMode,
|
|
1744
|
+
crossTopicKeystones: diagCrossTopicKeystones,
|
|
1745
|
+
reserveFraction: dynamicReserve,
|
|
1746
|
+
avgTurnCostTokens: avgTurnCost,
|
|
1747
|
+
dynamicReserveActive: isDynamic,
|
|
1748
|
+
sessionPressureHigh: pressureHigh,
|
|
1749
|
+
};
|
|
1750
|
+
if (pressureHigh) {
|
|
1751
|
+
warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
|
|
1752
|
+
}
|
|
1753
|
+
else if (dynamicReserve > 0.40) {
|
|
1754
|
+
console.info(`[hypermem:compositor] dynamic_reserve=${Math.round(dynamicReserve * 100)}% avg_turn_cost=${Math.round(avgTurnCost / 1000)}k horizon=${this.config.dynamicReserveTurnHorizon ?? 5}`);
|
|
1755
|
+
}
|
|
1756
|
+
console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones}`);
|
|
767
1757
|
return {
|
|
768
1758
|
messages: outputMessages,
|
|
769
1759
|
tokenCount: totalTokens,
|
|
@@ -772,6 +1762,7 @@ export class Compositor {
|
|
|
772
1762
|
hasWarnings: warnings.length > 0,
|
|
773
1763
|
warnings,
|
|
774
1764
|
contextBlock: assembledContextBlock,
|
|
1765
|
+
diagnostics,
|
|
775
1766
|
};
|
|
776
1767
|
}
|
|
777
1768
|
/**
|
|
@@ -787,9 +1778,15 @@ export class Compositor {
|
|
|
787
1778
|
// token-budget-cap the warm set. This replaces the old WARM_BOOTSTRAP_CAP
|
|
788
1779
|
// message-count constant which was a blunt instrument — 100 messages of
|
|
789
1780
|
// large tool results can massively exceed the history budget allocation.
|
|
790
|
-
|
|
1781
|
+
// Warm budget uses the same reserve fraction as compose() so warm history
|
|
1782
|
+
// never pre-fills more than compose() would actually allow.
|
|
1783
|
+
const reserve = this.config.contextWindowReserve ?? 0.15;
|
|
1784
|
+
const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve);
|
|
1785
|
+
const warmBudget = Math.floor(effectiveBudget * (this.config.warmHistoryBudgetFraction ?? 0.4));
|
|
791
1786
|
const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
|
|
792
|
-
const transformedForWarm = applyToolGradient(rawHistory,
|
|
1787
|
+
const transformedForWarm = applyToolGradient(rawHistory, {
|
|
1788
|
+
totalWindowTokens: resolveModelWindow(opts?.model, this.config.defaultTokenBudget),
|
|
1789
|
+
});
|
|
793
1790
|
// Walk newest→oldest, accumulate transformed token cost, stop when budget exhausted
|
|
794
1791
|
let warmTokens = 0;
|
|
795
1792
|
const history = [];
|
|
@@ -811,7 +1808,7 @@ export class Compositor {
|
|
|
811
1808
|
// compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
|
|
812
1809
|
// from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
|
|
813
1810
|
// Caching them here would create stale entries that compose() ignores anyway.
|
|
814
|
-
await this.
|
|
1811
|
+
await this.cache.warmSession(agentId, sessionKey, {
|
|
815
1812
|
system: opts?.systemPrompt,
|
|
816
1813
|
identity: opts?.identity,
|
|
817
1814
|
history,
|
|
@@ -827,17 +1824,58 @@ export class Compositor {
|
|
|
827
1824
|
},
|
|
828
1825
|
});
|
|
829
1826
|
}
|
|
1827
|
+
async refreshRedisGradient(agentId, sessionKey, db, tokenBudget) {
|
|
1828
|
+
const store = new MessageStore(db);
|
|
1829
|
+
const conversation = store.getConversation(sessionKey);
|
|
1830
|
+
if (!conversation)
|
|
1831
|
+
return;
|
|
1832
|
+
const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
|
|
1833
|
+
const transformedHistory = applyToolGradient(rawHistory, {
|
|
1834
|
+
totalWindowTokens: tokenBudget && tokenBudget > 0
|
|
1835
|
+
? Math.max(tokenBudget, Math.floor(tokenBudget / 0.80))
|
|
1836
|
+
: TOOL_PLANNING_BASELINE_WINDOW,
|
|
1837
|
+
});
|
|
1838
|
+
// If a token budget is provided, trim the gradient-compressed window to fit
|
|
1839
|
+
// before writing to Redis. Without this, up to maxHistoryMessages messages
|
|
1840
|
+
// land in Redis regardless of size, and trimHistoryToTokenBudget fires
|
|
1841
|
+
// on every subsequent assemble() causing per-turn churn.
|
|
1842
|
+
let historyToWrite = transformedHistory;
|
|
1843
|
+
if (tokenBudget && tokenBudget > 0) {
|
|
1844
|
+
const budgetCap = Math.floor(tokenBudget * 0.8);
|
|
1845
|
+
let runningTokens = 0;
|
|
1846
|
+
const clusters = clusterNeutralMessages(transformedHistory);
|
|
1847
|
+
const cappedClusters = [];
|
|
1848
|
+
// Walk newest-first, keep whole clusters so tool-call/result pairs survive together.
|
|
1849
|
+
for (let i = clusters.length - 1; i >= 0; i--) {
|
|
1850
|
+
const cluster = clusters[i];
|
|
1851
|
+
if (runningTokens + cluster.tokenCost > budgetCap && cappedClusters.length > 0)
|
|
1852
|
+
break;
|
|
1853
|
+
cappedClusters.unshift(cluster);
|
|
1854
|
+
runningTokens += cluster.tokenCost;
|
|
1855
|
+
if (runningTokens >= budgetCap)
|
|
1856
|
+
break;
|
|
1857
|
+
}
|
|
1858
|
+
historyToWrite = cappedClusters.flatMap(cluster => cluster.messages);
|
|
1859
|
+
if (historyToWrite.length < transformedHistory.length) {
|
|
1860
|
+
console.log(`[hypermem] refreshRedisGradient: cluster-capped ${transformedHistory.length}→${historyToWrite.length} messages ` +
|
|
1861
|
+
`for ${agentId}/${sessionKey} (budgetCap=${budgetCap}, tokenCost=${runningTokens})`);
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
await this.cache.replaceHistory(agentId, sessionKey, historyToWrite, this.config.maxHistoryMessages);
|
|
1865
|
+
}
|
|
830
1866
|
// ─── Slot Content Resolution ─────────────────────────────────
|
|
831
1867
|
/**
|
|
832
1868
|
* Get slot content: try Redis first, fall back to SQLite.
|
|
833
1869
|
*/
|
|
834
1870
|
async getSlotContent(agentId, sessionKey, slot, db, libraryDb) {
|
|
835
|
-
const cached = await this.
|
|
1871
|
+
const cached = await this.cache.getSlot(agentId, sessionKey, slot);
|
|
836
1872
|
if (cached)
|
|
837
1873
|
return cached;
|
|
838
1874
|
switch (slot) {
|
|
839
|
-
case 'facts':
|
|
840
|
-
|
|
1875
|
+
case 'facts': {
|
|
1876
|
+
const result = this.buildFactsFromDb(agentId, sessionKey, libraryDb || this.libraryDb || db);
|
|
1877
|
+
return result ? result[0] : null;
|
|
1878
|
+
}
|
|
841
1879
|
case 'context':
|
|
842
1880
|
return this.buildCrossSessionContext(agentId, sessionKey, db, libraryDb || this.libraryDb);
|
|
843
1881
|
default:
|
|
@@ -846,31 +1884,45 @@ export class Compositor {
|
|
|
846
1884
|
}
|
|
847
1885
|
/**
|
|
848
1886
|
* Get conversation history: try Redis first, fall back to SQLite.
|
|
1887
|
+
*
|
|
1888
|
+
* When topicId is provided (P3.4), the SQLite path filters to messages
|
|
1889
|
+
* matching that topic OR with topic_id IS NULL (Option B transition safety).
|
|
1890
|
+
* The Redis path is unaffected — Redis doesn't index by topic, so topic
|
|
1891
|
+
* filtering only applies to the SQLite fallback.
|
|
849
1892
|
*/
|
|
850
|
-
async getHistory(agentId, sessionKey, limit, store) {
|
|
1893
|
+
async getHistory(agentId, sessionKey, limit, store, topicId) {
|
|
851
1894
|
// Pass limit through to Redis — this is the correct enforcement point.
|
|
852
1895
|
// Previously getHistory() ignored the limit on the Redis path (LRANGE 0 -1),
|
|
853
1896
|
// meaning historyDepth in the compose request had no effect on hot sessions.
|
|
854
|
-
const cached = await this.
|
|
1897
|
+
const cached = await this.cache.getHistory(agentId, sessionKey, limit);
|
|
855
1898
|
if (cached.length > 0)
|
|
856
1899
|
return cached;
|
|
857
1900
|
const conversation = store.getConversation(sessionKey);
|
|
858
1901
|
if (!conversation)
|
|
859
1902
|
return [];
|
|
1903
|
+
if (topicId) {
|
|
1904
|
+
// P3.4: Option B — active topic messages + legacy NULL messages
|
|
1905
|
+
return store.getRecentMessagesByTopic(conversation.id, topicId, limit);
|
|
1906
|
+
}
|
|
860
1907
|
return store.getRecentMessages(conversation.id, limit);
|
|
861
1908
|
}
|
|
862
1909
|
// ─── L4 Library Builders ─────────────────────────────────────
|
|
863
1910
|
/**
|
|
864
1911
|
* Build facts content from library DB.
|
|
865
1912
|
*/
|
|
866
|
-
|
|
1913
|
+
/**
|
|
1914
|
+
* Build facts content from library DB.
|
|
1915
|
+
* Applies filterByScope (W1) to enforce retrieval access control.
|
|
1916
|
+
* Returns [content, factCount, scopeFilteredCount] or null if DB unavailable.
|
|
1917
|
+
*/
|
|
1918
|
+
buildFactsFromDb(agentId, sessionKey, db) {
|
|
867
1919
|
if (!db)
|
|
868
1920
|
return null;
|
|
869
1921
|
const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='facts'").get();
|
|
870
1922
|
if (!tableExists || tableExists.cnt === 0)
|
|
871
1923
|
return null;
|
|
872
|
-
const
|
|
873
|
-
SELECT content, domain, confidence FROM facts
|
|
1924
|
+
const rawRows = db.prepare(`
|
|
1925
|
+
SELECT content, domain, confidence, agent_id, source_session_key AS session_key, scope FROM facts
|
|
874
1926
|
WHERE agent_id = ?
|
|
875
1927
|
AND superseded_by IS NULL
|
|
876
1928
|
AND (expires_at IS NULL OR expires_at > datetime('now'))
|
|
@@ -879,11 +1931,30 @@ export class Compositor {
|
|
|
879
1931
|
ORDER BY confidence DESC, decay_score ASC
|
|
880
1932
|
LIMIT ?
|
|
881
1933
|
`).all(agentId, this.config.maxFacts);
|
|
882
|
-
if (
|
|
883
|
-
return null;
|
|
884
|
-
|
|
885
|
-
|
|
1934
|
+
if (rawRows.length === 0)
|
|
1935
|
+
return [null, 0, 0];
|
|
1936
|
+
// W1: Apply scope filter — enforce retrieval access control
|
|
1937
|
+
const ctx = { agentId, sessionKey };
|
|
1938
|
+
const { allowed, filteredCount } = filterByScope(rawRows.map(r => ({
|
|
1939
|
+
...r,
|
|
1940
|
+
agentId: r.agent_id,
|
|
1941
|
+
sessionKey: r.session_key,
|
|
1942
|
+
})), ctx);
|
|
1943
|
+
if (allowed.length === 0)
|
|
1944
|
+
return [null, 0, filteredCount];
|
|
1945
|
+
const content = allowed
|
|
1946
|
+
.map(r => {
|
|
1947
|
+
// Session attribution: label facts from a different session so the model
|
|
1948
|
+
// can distinguish current-session context from cross-session facts.
|
|
1949
|
+
// Shows last 8 chars of session key as a stable short identifier.
|
|
1950
|
+
const fromOtherSession = r.sessionKey && r.sessionKey !== sessionKey;
|
|
1951
|
+
const sessionSuffix = fromOtherSession
|
|
1952
|
+
? `, session:${r.sessionKey.slice(-8)}`
|
|
1953
|
+
: '';
|
|
1954
|
+
return `- [${r.domain || 'general'}${sessionSuffix}] ${r.content}`;
|
|
1955
|
+
})
|
|
886
1956
|
.join('\n');
|
|
1957
|
+
return [content, allowed.length, filteredCount];
|
|
887
1958
|
}
|
|
888
1959
|
/**
|
|
889
1960
|
* Build knowledge content from library DB.
|
|
@@ -919,6 +1990,19 @@ export class Compositor {
|
|
|
919
1990
|
}
|
|
920
1991
|
return lines.join('\n');
|
|
921
1992
|
}
|
|
1993
|
+
/**
|
|
1994
|
+
* Build wiki page context for the active topic.
|
|
1995
|
+
* Queries the knowledge table for a synthesized topic page and returns it
|
|
1996
|
+
* wrapped with a header. Capped at 600 tokens.
|
|
1997
|
+
*/
|
|
1998
|
+
buildWikiPageContext(agentId, topicName, db) {
|
|
1999
|
+
const knowledgeStore = new KnowledgeStore(db);
|
|
2000
|
+
const knowledge = knowledgeStore.get(agentId, 'topic-synthesis', topicName);
|
|
2001
|
+
if (!knowledge)
|
|
2002
|
+
return null;
|
|
2003
|
+
const wrapped = `## Active Topic: ${topicName}\n${knowledge.content}`;
|
|
2004
|
+
return this.truncateToTokens(wrapped, 600);
|
|
2005
|
+
}
|
|
922
2006
|
/**
|
|
923
2007
|
* Build preferences content from library DB.
|
|
924
2008
|
* Shows user/operator preferences relevant to this agent.
|
|
@@ -960,8 +2044,11 @@ export class Compositor {
|
|
|
960
2044
|
* Uses Reciprocal Rank Fusion to merge keyword and vector results.
|
|
961
2045
|
* Gracefully degrades: FTS5-only when no vector store, KNN-only
|
|
962
2046
|
* when FTS query is empty (all stop words), both when available.
|
|
2047
|
+
*
|
|
2048
|
+
* @param precomputedEmbedding — optional pre-computed embedding for the query.
|
|
2049
|
+
* When provided, the Ollama call inside VectorStore.search() is skipped.
|
|
963
2050
|
*/
|
|
964
|
-
async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb) {
|
|
2051
|
+
async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding) {
|
|
965
2052
|
const libDb = libraryDb || this.libraryDb;
|
|
966
2053
|
if (!libDb && !this.vectorStore)
|
|
967
2054
|
return null;
|
|
@@ -972,15 +2059,55 @@ export class Compositor {
|
|
|
972
2059
|
limit: 10,
|
|
973
2060
|
agentId,
|
|
974
2061
|
maxKnnDistance: 1.2,
|
|
2062
|
+
precomputedEmbedding,
|
|
975
2063
|
});
|
|
976
2064
|
if (results.length === 0)
|
|
977
2065
|
return null;
|
|
978
2066
|
const lines = [];
|
|
979
2067
|
let tokens = 0;
|
|
980
|
-
|
|
2068
|
+
// TUNE-015: apply recency decay to recall scores.
|
|
2069
|
+
// Messages and episodes from distant past score down even if semantically relevant.
|
|
2070
|
+
// A 5-day-old task-request should not compete equally with today's messages.
|
|
2071
|
+
// - Episodes: exponential decay, half-life 7 days
|
|
2072
|
+
// - Facts/knowledge: step-function penalty for items older than 48h
|
|
2073
|
+
// (prevents completed/stale tasks from outranking recent ones)
|
|
2074
|
+
// 48-72h: multiply by 0.7
|
|
2075
|
+
// >72h: multiply by 0.5
|
|
2076
|
+
const now = Date.now();
|
|
2077
|
+
const decayedResults = results.map(result => {
|
|
2078
|
+
if (!result.createdAt)
|
|
2079
|
+
return result;
|
|
2080
|
+
const ageMs = now - new Date(result.createdAt).getTime();
|
|
2081
|
+
const ageDays = ageMs / 86_400_000;
|
|
2082
|
+
if (result.sourceTable === 'episodes') {
|
|
2083
|
+
// Exponential half-life decay for episodes
|
|
2084
|
+
const decayFactor = Math.pow(0.5, ageDays / 7);
|
|
2085
|
+
return { ...result, score: result.score * decayFactor };
|
|
2086
|
+
}
|
|
2087
|
+
// Step-function recency penalty for facts and knowledge
|
|
2088
|
+
const ageHours = ageMs / 3_600_000;
|
|
2089
|
+
if (ageHours > 72) {
|
|
2090
|
+
return { ...result, score: result.score * 0.5 };
|
|
2091
|
+
}
|
|
2092
|
+
if (ageHours > 48) {
|
|
2093
|
+
return { ...result, score: result.score * 0.7 };
|
|
2094
|
+
}
|
|
2095
|
+
return result;
|
|
2096
|
+
});
|
|
2097
|
+
// Re-sort after decay adjustment
|
|
2098
|
+
decayedResults.sort((a, b) => b.score - a.score);
|
|
2099
|
+
for (const result of decayedResults) {
|
|
981
2100
|
// TUNE-001: drop very-low-relevance results (RRF scores below 0.008 are noise)
|
|
982
2101
|
if (result.score < 0.008)
|
|
983
2102
|
continue;
|
|
2103
|
+
// TUNE-016: FTS-only results require higher floor — low-score FTS hits are noise
|
|
2104
|
+
if (result.sources.length === 1 && result.sources[0] === 'fts' && result.score < 0.05)
|
|
2105
|
+
continue;
|
|
2106
|
+
// TUNE-014: episodes require higher confidence — score:2 episodes bleed adjacent
|
|
2107
|
+
// session context and contaminate current session. Require fts+knn agreement
|
|
2108
|
+
// (score >= 0.04) for episodes to make it into assembled context.
|
|
2109
|
+
if (result.sourceTable === 'episodes' && result.score < 0.04)
|
|
2110
|
+
continue;
|
|
984
2111
|
const label = this.formatHybridResult(result);
|
|
985
2112
|
const lineTokens = estimateTokens(label);
|
|
986
2113
|
if (tokens + lineTokens > maxTokens)
|
|
@@ -997,6 +2124,7 @@ export class Compositor {
|
|
|
997
2124
|
tables: ['facts', 'knowledge', 'episodes'],
|
|
998
2125
|
limit: 8,
|
|
999
2126
|
maxDistance: 1.2,
|
|
2127
|
+
precomputedEmbedding,
|
|
1000
2128
|
});
|
|
1001
2129
|
if (results.length === 0)
|
|
1002
2130
|
return null;
|
|
@@ -1105,5 +2233,326 @@ export class Compositor {
|
|
|
1105
2233
|
}
|
|
1106
2234
|
return truncated + '…';
|
|
1107
2235
|
}
|
|
2236
|
+
// ─── Keystone History Builder ─────────────────────────────────────
|
|
2237
|
+
/**
|
|
2238
|
+
* Query and score keystone candidates from before the current history window.
|
|
2239
|
+
*
|
|
2240
|
+
* Trims the oldest messages from includedHistory to free a keystone budget,
|
|
2241
|
+
* then queries the DB for older messages scored by episode significance,
|
|
2242
|
+
* FTS5 relevance, and recency.
|
|
2243
|
+
*
|
|
2244
|
+
* Returns null if keystones cannot be injected (no cutoff ID found,
|
|
2245
|
+
* no candidates, or all errors).
|
|
2246
|
+
*/
|
|
2247
|
+
async buildKeystones(db, agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, prompt, libraryDb) {
|
|
2248
|
+
const keystoneBudget = Math.floor(historyTokens * keystoneFraction);
|
|
2249
|
+
if (keystoneBudget <= 0)
|
|
2250
|
+
return null;
|
|
2251
|
+
// Trim oldest messages from includedHistory to free keystone budget.
|
|
2252
|
+
const trimmedHistory = [...includedHistory];
|
|
2253
|
+
let trimmedHistoryTokens = historyTokens;
|
|
2254
|
+
let freed = 0;
|
|
2255
|
+
while (trimmedHistory.length > 1 && freed < keystoneBudget) {
|
|
2256
|
+
const oldest = trimmedHistory.shift();
|
|
2257
|
+
const oldestTokens = estimateMessageTokens(oldest);
|
|
2258
|
+
freed += oldestTokens;
|
|
2259
|
+
trimmedHistoryTokens -= oldestTokens;
|
|
2260
|
+
}
|
|
2261
|
+
// Find the oldest message ID in the trimmed recent window (cutoff point).
|
|
2262
|
+
const oldestRecentMsg = trimmedHistory[0];
|
|
2263
|
+
const cutoffId = oldestRecentMsg?.id ?? null;
|
|
2264
|
+
if (cutoffId == null)
|
|
2265
|
+
return null;
|
|
2266
|
+
// Find the current user prompt for FTS matching.
|
|
2267
|
+
const promptForFts = prompt?.trim() ||
|
|
2268
|
+
(() => {
|
|
2269
|
+
for (let i = trimmedHistory.length - 1; i >= 0; i--) {
|
|
2270
|
+
if (trimmedHistory[i].role === 'user' && trimmedHistory[i].textContent) {
|
|
2271
|
+
return trimmedHistory[i].textContent;
|
|
2272
|
+
}
|
|
2273
|
+
}
|
|
2274
|
+
return null;
|
|
2275
|
+
})();
|
|
2276
|
+
try {
|
|
2277
|
+
// Get the conversation ID from the oldest recent message.
|
|
2278
|
+
const convRow = db.prepare('SELECT conversation_id FROM messages WHERE id = ?').get(cutoffId);
|
|
2279
|
+
if (!convRow)
|
|
2280
|
+
return null;
|
|
2281
|
+
const conversationId = convRow.conversation_id;
|
|
2282
|
+
const maxAgeHours = 168; // 7 days — tighter window gives recency real scoring weight
|
|
2283
|
+
const nowMs = Date.now();
|
|
2284
|
+
// Build episode significance map from libraryDb (episodes live there, not in messages.db).
|
|
2285
|
+
// Key: source_message_id, Value: max significance for that message.
|
|
2286
|
+
const sigMap = new Map();
|
|
2287
|
+
if (libraryDb) {
|
|
2288
|
+
try {
|
|
2289
|
+
const episodeRows = libraryDb.prepare(`
|
|
2290
|
+
SELECT source_message_id, MAX(significance) AS significance
|
|
2291
|
+
FROM episodes
|
|
2292
|
+
WHERE agent_id = ? AND source_message_id IS NOT NULL
|
|
2293
|
+
GROUP BY source_message_id
|
|
2294
|
+
`).all(agentId);
|
|
2295
|
+
for (const row of episodeRows) {
|
|
2296
|
+
sigMap.set(row.source_message_id, row.significance);
|
|
2297
|
+
}
|
|
2298
|
+
}
|
|
2299
|
+
catch {
|
|
2300
|
+
// Episodes query is best-effort
|
|
2301
|
+
}
|
|
2302
|
+
}
|
|
2303
|
+
const baseQuery = `
|
|
2304
|
+
SELECT
|
|
2305
|
+
m.id,
|
|
2306
|
+
m.message_index,
|
|
2307
|
+
m.role,
|
|
2308
|
+
m.text_content,
|
|
2309
|
+
m.created_at
|
|
2310
|
+
FROM messages m
|
|
2311
|
+
WHERE m.conversation_id = ?
|
|
2312
|
+
AND m.id < ?
|
|
2313
|
+
AND m.text_content IS NOT NULL
|
|
2314
|
+
AND m.is_heartbeat = 0
|
|
2315
|
+
AND m.text_content != ''
|
|
2316
|
+
LIMIT 200
|
|
2317
|
+
`;
|
|
2318
|
+
let candidateRows;
|
|
2319
|
+
if (promptForFts && promptForFts.length >= 3) {
|
|
2320
|
+
// Build a safe FTS5 query: extract words ≥3 chars, up to 8, OR with prefix.
|
|
2321
|
+
const ftsTerms = (promptForFts.match(/\b\w{3,}\b/g) || [])
|
|
2322
|
+
.slice(0, 8)
|
|
2323
|
+
.map(w => `"${w.replace(/"/g, '')}"*`)
|
|
2324
|
+
.join(' OR ');
|
|
2325
|
+
if (ftsTerms) {
|
|
2326
|
+
try {
|
|
2327
|
+
candidateRows = db.prepare(`
|
|
2328
|
+
SELECT
|
|
2329
|
+
m.id,
|
|
2330
|
+
m.message_index,
|
|
2331
|
+
m.role,
|
|
2332
|
+
m.text_content,
|
|
2333
|
+
m.created_at
|
|
2334
|
+
FROM messages m
|
|
2335
|
+
WHERE m.conversation_id = ?
|
|
2336
|
+
AND m.id < ?
|
|
2337
|
+
AND m.text_content IS NOT NULL
|
|
2338
|
+
AND m.is_heartbeat = 0
|
|
2339
|
+
AND m.text_content != ''
|
|
2340
|
+
AND m.id IN (
|
|
2341
|
+
SELECT rowid FROM messages_fts
|
|
2342
|
+
WHERE messages_fts MATCH ?
|
|
2343
|
+
LIMIT 100
|
|
2344
|
+
)
|
|
2345
|
+
LIMIT 200
|
|
2346
|
+
`).all(conversationId, cutoffId, ftsTerms);
|
|
2347
|
+
}
|
|
2348
|
+
catch {
|
|
2349
|
+
// FTS query may fail on special characters — fall back to base query
|
|
2350
|
+
candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
|
|
2351
|
+
}
|
|
2352
|
+
}
|
|
2353
|
+
else {
|
|
2354
|
+
candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
|
|
2355
|
+
}
|
|
2356
|
+
}
|
|
2357
|
+
else {
|
|
2358
|
+
candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
|
|
2359
|
+
}
|
|
2360
|
+
if (candidateRows.length === 0)
|
|
2361
|
+
return null;
|
|
2362
|
+
// Build KeystoneCandidate objects with computed ftsRank and ageHours.
|
|
2363
|
+
const totalCandidates = candidateRows.length;
|
|
2364
|
+
const candidates = candidateRows.map((row, idx) => {
|
|
2365
|
+
const createdMs = new Date(row.created_at).getTime();
|
|
2366
|
+
const ageHours = (nowMs - createdMs) / (1000 * 60 * 60);
|
|
2367
|
+
// Normalize FTS rank by position (best match = 1.0, worst = 0.1)
|
|
2368
|
+
const ftsRank = totalCandidates > 1
|
|
2369
|
+
? 1.0 - (idx / totalCandidates) * 0.9
|
|
2370
|
+
: 1.0;
|
|
2371
|
+
return {
|
|
2372
|
+
messageId: row.id,
|
|
2373
|
+
messageIndex: row.message_index,
|
|
2374
|
+
role: row.role,
|
|
2375
|
+
content: row.text_content || '',
|
|
2376
|
+
timestamp: row.created_at,
|
|
2377
|
+
episodeSignificance: sigMap.get(row.id) ?? null,
|
|
2378
|
+
ftsRank,
|
|
2379
|
+
ageHours,
|
|
2380
|
+
};
|
|
2381
|
+
});
|
|
2382
|
+
// Score and rank candidates.
|
|
2383
|
+
const ranked = rankKeystones(candidates, maxAgeHours);
|
|
2384
|
+
// Budget-fit: take top-scored candidates until keystoneBudget exhausted.
|
|
2385
|
+
let kTokens = 0;
|
|
2386
|
+
const selectedKeystones = [];
|
|
2387
|
+
for (const candidate of ranked) {
|
|
2388
|
+
if (selectedKeystones.length >= keystoneMaxMsgs)
|
|
2389
|
+
break;
|
|
2390
|
+
const msg = {
|
|
2391
|
+
role: candidate.role,
|
|
2392
|
+
textContent: candidate.content,
|
|
2393
|
+
toolCalls: null,
|
|
2394
|
+
toolResults: null,
|
|
2395
|
+
};
|
|
2396
|
+
const msgTokens = estimateMessageTokens(msg);
|
|
2397
|
+
if (kTokens + msgTokens > keystoneBudget)
|
|
2398
|
+
continue; // skip oversized; keep trying
|
|
2399
|
+
selectedKeystones.push(candidate);
|
|
2400
|
+
kTokens += msgTokens;
|
|
2401
|
+
}
|
|
2402
|
+
if (selectedKeystones.length === 0)
|
|
2403
|
+
return null;
|
|
2404
|
+
// Sort selected keystones chronologically for injection.
|
|
2405
|
+
selectedKeystones.sort((a, b) => a.messageIndex - b.messageIndex);
|
|
2406
|
+
const keystoneMessages = selectedKeystones.map(c => ({
|
|
2407
|
+
role: c.role,
|
|
2408
|
+
textContent: c.content,
|
|
2409
|
+
toolCalls: null,
|
|
2410
|
+
toolResults: null,
|
|
2411
|
+
}));
|
|
2412
|
+
return {
|
|
2413
|
+
keystoneMessages,
|
|
2414
|
+
keystoneTokens: kTokens,
|
|
2415
|
+
trimmedHistory,
|
|
2416
|
+
trimmedHistoryTokens,
|
|
2417
|
+
};
|
|
2418
|
+
}
|
|
2419
|
+
catch {
|
|
2420
|
+
// Keystone injection is best-effort — never fail compose
|
|
2421
|
+
return null;
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
// ─── Cross-Topic Keystone Retrieval (P3.5) ───────────────────────
|
|
2425
|
+
/**
|
|
2426
|
+
* Pull high-signal messages from OTHER topics in this session when their
|
|
2427
|
+
* content is semantically relevant to the current active topic.
|
|
2428
|
+
*
|
|
2429
|
+
* Heuristic-only: no model calls. Token overlap between the current topic
|
|
2430
|
+
* name + last 3 user messages and candidate message content.
|
|
2431
|
+
*
|
|
2432
|
+
* @param agentId - The agent's ID
|
|
2433
|
+
* @param sessionKey - Current session key
|
|
2434
|
+
* @param activeTopic - The current active topic (id + name)
|
|
2435
|
+
* @param currentMessages - Recently included history messages for query extraction
|
|
2436
|
+
* @param db - The messages database
|
|
2437
|
+
* @param maxKeystones - Max cross-topic keystones to return (default 3)
|
|
2438
|
+
* @returns Scored keystones sorted by score DESC, deduplicated by message id
|
|
2439
|
+
*/
|
|
2440
|
+
async getKeystonesByTopic(agentId, sessionKey, activeTopic, currentMessages, db, maxKeystones = 3) {
|
|
2441
|
+
const otherTopics = db.prepare(`
|
|
2442
|
+
SELECT id, name
|
|
2443
|
+
FROM topics
|
|
2444
|
+
WHERE session_key = ? AND id != ?
|
|
2445
|
+
ORDER BY last_active_at DESC
|
|
2446
|
+
LIMIT 5
|
|
2447
|
+
`).all(sessionKey, activeTopic.id);
|
|
2448
|
+
if (otherTopics.length === 0)
|
|
2449
|
+
return [];
|
|
2450
|
+
// Extract key terms from active topic name + last 3 user messages
|
|
2451
|
+
const queryTerms = this.extractQueryTerms(activeTopic.name, currentMessages);
|
|
2452
|
+
if (queryTerms.size === 0)
|
|
2453
|
+
return [];
|
|
2454
|
+
const nowMs = Date.now();
|
|
2455
|
+
const maxAgeHours = 168; // 7 days, same as within-session keystones
|
|
2456
|
+
const seenIds = new Set();
|
|
2457
|
+
const allCandidates = [];
|
|
2458
|
+
for (const topic of otherTopics) {
|
|
2459
|
+
let topicMessages;
|
|
2460
|
+
try {
|
|
2461
|
+
topicMessages = db.prepare(`
|
|
2462
|
+
SELECT m.id, m.message_index, m.role, m.text_content, m.created_at
|
|
2463
|
+
FROM messages m
|
|
2464
|
+
JOIN conversations c ON m.conversation_id = c.id
|
|
2465
|
+
WHERE c.session_key = ?
|
|
2466
|
+
AND c.agent_id = ?
|
|
2467
|
+
AND m.topic_id = ?
|
|
2468
|
+
AND m.text_content IS NOT NULL
|
|
2469
|
+
AND m.text_content != ''
|
|
2470
|
+
AND m.is_heartbeat = 0
|
|
2471
|
+
ORDER BY m.message_index DESC
|
|
2472
|
+
LIMIT 50
|
|
2473
|
+
`).all(sessionKey, agentId, topic.id);
|
|
2474
|
+
}
|
|
2475
|
+
catch {
|
|
2476
|
+
// Corrupt topic data — skip this topic, never throw
|
|
2477
|
+
continue;
|
|
2478
|
+
}
|
|
2479
|
+
if (topicMessages.length === 0)
|
|
2480
|
+
continue;
|
|
2481
|
+
const topicCandidates = topicMessages.map((msg, idx) => {
|
|
2482
|
+
const createdMs = new Date(msg.created_at).getTime();
|
|
2483
|
+
const ageHours = (nowMs - createdMs) / (1000 * 60 * 60);
|
|
2484
|
+
const ftsRank = topicMessages.length > 1
|
|
2485
|
+
? 1.0 - (idx / topicMessages.length) * 0.9
|
|
2486
|
+
: 1.0;
|
|
2487
|
+
return {
|
|
2488
|
+
messageId: msg.id,
|
|
2489
|
+
messageIndex: msg.message_index,
|
|
2490
|
+
role: msg.role,
|
|
2491
|
+
content: msg.text_content,
|
|
2492
|
+
timestamp: msg.created_at,
|
|
2493
|
+
episodeSignificance: null,
|
|
2494
|
+
ftsRank,
|
|
2495
|
+
ageHours,
|
|
2496
|
+
};
|
|
2497
|
+
});
|
|
2498
|
+
const topTopicKeystones = rankKeystones(topicCandidates, maxAgeHours).slice(0, 10);
|
|
2499
|
+
// Filter to messages with semantic overlap (≥2 matching terms)
|
|
2500
|
+
const relevant = topTopicKeystones.filter(candidate => {
|
|
2501
|
+
const contentLower = candidate.content.toLowerCase();
|
|
2502
|
+
let matches = 0;
|
|
2503
|
+
for (const term of queryTerms) {
|
|
2504
|
+
if (contentLower.includes(term)) {
|
|
2505
|
+
matches++;
|
|
2506
|
+
if (matches >= 2)
|
|
2507
|
+
return true;
|
|
2508
|
+
}
|
|
2509
|
+
}
|
|
2510
|
+
return false;
|
|
2511
|
+
});
|
|
2512
|
+
if (relevant.length === 0)
|
|
2513
|
+
continue;
|
|
2514
|
+
// Re-score filtered candidates so they compete on the same final scale
|
|
2515
|
+
for (const candidate of relevant) {
|
|
2516
|
+
if (seenIds.has(candidate.messageId))
|
|
2517
|
+
continue;
|
|
2518
|
+
seenIds.add(candidate.messageId);
|
|
2519
|
+
const score = scoreKeystone(candidate, maxAgeHours);
|
|
2520
|
+
allCandidates.push({ ...candidate, score });
|
|
2521
|
+
}
|
|
2522
|
+
}
|
|
2523
|
+
if (allCandidates.length === 0)
|
|
2524
|
+
return [];
|
|
2525
|
+
// Sort by score DESC and return top maxKeystones
|
|
2526
|
+
return allCandidates
|
|
2527
|
+
.sort((a, b) => b.score - a.score)
|
|
2528
|
+
.slice(0, maxKeystones);
|
|
2529
|
+
}
|
|
2530
|
+
/**
|
|
2531
|
+
* Extract lowercase key terms from a topic name and the last 3 user messages.
|
|
2532
|
+
* Terms are: tokens with ≥4 characters (skip short stop words).
|
|
2533
|
+
* Returns a Set for O(1) lookup.
|
|
2534
|
+
*/
|
|
2535
|
+
extractQueryTerms(topicName, messages) {
|
|
2536
|
+
const terms = new Set();
|
|
2537
|
+
const MIN_TERM_LEN = 4;
|
|
2538
|
+
// From topic name
|
|
2539
|
+
const topicTokens = topicName.toLowerCase().match(/\b[a-z0-9]{4,}\b/g) ?? [];
|
|
2540
|
+
for (const t of topicTokens)
|
|
2541
|
+
terms.add(t);
|
|
2542
|
+
// From last 3 user messages
|
|
2543
|
+
let userCount = 0;
|
|
2544
|
+
for (let i = messages.length - 1; i >= 0 && userCount < 3; i--) {
|
|
2545
|
+
const msg = messages[i];
|
|
2546
|
+
if (msg.role === 'user' && msg.textContent) {
|
|
2547
|
+
const tokens = msg.textContent.toLowerCase().match(/\b[a-z0-9]{4,}\b/g) ?? [];
|
|
2548
|
+
for (const t of tokens) {
|
|
2549
|
+
if (t.length >= MIN_TERM_LEN)
|
|
2550
|
+
terms.add(t);
|
|
2551
|
+
}
|
|
2552
|
+
userCount++;
|
|
2553
|
+
}
|
|
2554
|
+
}
|
|
2555
|
+
return terms;
|
|
2556
|
+
}
|
|
1108
2557
|
}
|
|
1109
2558
|
//# sourceMappingURL=compositor.js.map
|