@psiclawops/hypermem 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +31 -39
- package/README.md +20 -14
- package/bin/hypermem-status.mjs +1 -1
- package/dist/background-indexer.d.ts +14 -3
- package/dist/background-indexer.d.ts.map +1 -1
- package/dist/background-indexer.js +135 -27
- package/dist/budget-policy.d.ts +22 -0
- package/dist/budget-policy.d.ts.map +1 -0
- package/dist/budget-policy.js +27 -0
- package/dist/cache.d.ts +11 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/compositor-utils.d.ts +31 -0
- package/dist/compositor-utils.d.ts.map +1 -0
- package/dist/compositor-utils.js +47 -0
- package/dist/compositor.d.ts +163 -1
- package/dist/compositor.d.ts.map +1 -1
- package/dist/compositor.js +862 -130
- package/dist/content-hash.d.ts +43 -0
- package/dist/content-hash.d.ts.map +1 -0
- package/dist/content-hash.js +75 -0
- package/dist/context-store.d.ts +54 -0
- package/dist/context-store.d.ts.map +1 -1
- package/dist/context-store.js +102 -0
- package/dist/contradiction-audit-store.d.ts +54 -0
- package/dist/contradiction-audit-store.d.ts.map +1 -0
- package/dist/contradiction-audit-store.js +88 -0
- package/dist/contradiction-resolution-policy.d.ts +21 -0
- package/dist/contradiction-resolution-policy.d.ts.map +1 -0
- package/dist/contradiction-resolution-policy.js +17 -0
- package/dist/cross-agent.d.ts +1 -1
- package/dist/cross-agent.js +17 -17
- package/dist/degradation.d.ts +102 -0
- package/dist/degradation.d.ts.map +1 -0
- package/dist/degradation.js +141 -0
- package/dist/dreaming-promoter.d.ts +39 -1
- package/dist/dreaming-promoter.d.ts.map +1 -1
- package/dist/dreaming-promoter.js +70 -4
- package/dist/index.d.ts +70 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +405 -29
- package/dist/knowledge-lint.d.ts +2 -0
- package/dist/knowledge-lint.d.ts.map +1 -1
- package/dist/knowledge-lint.js +40 -1
- package/dist/library-schema.d.ts +7 -2
- package/dist/library-schema.d.ts.map +1 -1
- package/dist/library-schema.js +236 -1
- package/dist/message-store.d.ts +64 -1
- package/dist/message-store.d.ts.map +1 -1
- package/dist/message-store.js +137 -1
- package/dist/proactive-pass.d.ts +2 -2
- package/dist/proactive-pass.d.ts.map +1 -1
- package/dist/proactive-pass.js +66 -12
- package/dist/replay-recovery.d.ts +29 -0
- package/dist/replay-recovery.d.ts.map +1 -0
- package/dist/replay-recovery.js +82 -0
- package/dist/reranker.d.ts +95 -0
- package/dist/reranker.d.ts.map +1 -0
- package/dist/reranker.js +308 -0
- package/dist/schema.d.ts +1 -1
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +46 -1
- package/dist/seed.d.ts +1 -1
- package/dist/seed.js +1 -1
- package/dist/session-flusher.d.ts +4 -4
- package/dist/session-flusher.d.ts.map +1 -1
- package/dist/session-flusher.js +3 -3
- package/dist/spawn-context.d.ts +1 -1
- package/dist/spawn-context.js +1 -1
- package/dist/tool-artifact-store.d.ts +98 -0
- package/dist/tool-artifact-store.d.ts.map +1 -0
- package/dist/tool-artifact-store.js +244 -0
- package/dist/topic-detector.js +2 -2
- package/dist/topic-store.d.ts +6 -0
- package/dist/topic-store.d.ts.map +1 -1
- package/dist/topic-store.js +39 -0
- package/dist/topic-synthesizer.js +1 -1
- package/dist/trigger-registry.d.ts +1 -1
- package/dist/trigger-registry.js +4 -4
- package/dist/types.d.ts +235 -3
- package/dist/types.d.ts.map +1 -1
- package/dist/vector-store.d.ts +2 -1
- package/dist/vector-store.d.ts.map +1 -1
- package/dist/vector-store.js +3 -0
- package/dist/version.d.ts +10 -10
- package/dist/version.d.ts.map +1 -1
- package/dist/version.js +10 -10
- package/package.json +6 -4
package/dist/compositor.js
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* Token-budgeted: never exceeds the budget, prioritizes by configured order.
|
|
11
11
|
* Provider-neutral internally, translates at the output boundary.
|
|
12
12
|
*/
|
|
13
|
+
import { createHash } from 'node:crypto';
|
|
13
14
|
import { filterByScope } from './retrieval-policy.js';
|
|
14
15
|
import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
|
|
15
16
|
import { MessageStore } from './message-store.js';
|
|
@@ -25,6 +26,9 @@ import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOu
|
|
|
25
26
|
import { KnowledgeStore } from './knowledge-store.js';
|
|
26
27
|
import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
|
|
27
28
|
import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
|
|
29
|
+
import { TRIM_BUDGET_POLICY, resolveTrimBudgets } from './budget-policy.js';
|
|
30
|
+
import { formatToolChainStub, parseToolChainStub, formatArtifactRef, isArtifactRef } from './degradation.js';
|
|
31
|
+
import { ToolArtifactStore } from './tool-artifact-store.js';
|
|
28
32
|
/**
|
|
29
33
|
* Files that OpenClaw's contextInjection injects into the system prompt.
|
|
30
34
|
* HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
|
|
@@ -34,6 +38,7 @@ export const OPENCLAW_BOOTSTRAP_FILES = new Set([
|
|
|
34
38
|
'SOUL.md', 'IDENTITY.md', 'USER.md', 'TOOLS.md',
|
|
35
39
|
'AGENTS.md', 'HEARTBEAT.md', 'MEMORY.md', 'BOOTSTRAP.md',
|
|
36
40
|
]);
|
|
41
|
+
const CACHE_PREFIX_BOUNDARY_SLOT = 'cache-prefix-boundary';
|
|
37
42
|
/**
|
|
38
43
|
* Model context window sizes by provider/model string (or partial match).
|
|
39
44
|
* Used as fallback when tokenBudget is not passed by the runtime.
|
|
@@ -68,6 +73,67 @@ const MODEL_CONTEXT_WINDOWS = [
|
|
|
68
73
|
{ pattern: 'deepseek-v3', tokens: 131_072 },
|
|
69
74
|
{ pattern: 'deepseek', tokens: 131_072 },
|
|
70
75
|
];
|
|
76
|
+
const MODEL_MECW = [
|
|
77
|
+
// Claude 200k: effective recall degrades above ~140k; clamp composite budget
|
|
78
|
+
{ pattern: 'claude', mecwFloor: 80_000, mecwCeiling: 140_000, preferredHistoryFraction: 0.35, preferredMemoryFraction: 0.45 },
|
|
79
|
+
// Gemini 1M: reliable up to ~180k for grounded retrieval; less for recall
|
|
80
|
+
{ pattern: 'gemini', mecwFloor: 100_000, mecwCeiling: 180_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.45 },
|
|
81
|
+
// OpenAI 128k: full window is trustable; use standard fractions
|
|
82
|
+
{ pattern: 'gpt', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
|
|
83
|
+
{ pattern: 'o3', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
|
|
84
|
+
{ pattern: 'o4', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
|
|
85
|
+
// Smaller windows: full window is trustable
|
|
86
|
+
{ pattern: 'qwen3', mecwFloor: 262_144, mecwCeiling: 262_144, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
|
|
87
|
+
{ pattern: 'qwen', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
|
|
88
|
+
{ pattern: 'glm', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
|
|
89
|
+
{ pattern: 'deepseek', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
|
|
90
|
+
];
|
|
91
|
+
/**
|
|
92
|
+
* B4: Compute model-aware lane budget fractions.
|
|
93
|
+
*
|
|
94
|
+
* Resolves the effective historyFraction and memoryFraction for a compose pass
|
|
95
|
+
* given the model and its effective budget. Uses the MECW catalog to blend
|
|
96
|
+
* away from fixed fractions when the budget approaches the MECW ceiling,
|
|
97
|
+
* so the compositor allocates proportionally for what the model can actually use.
|
|
98
|
+
*
|
|
99
|
+
* Returns:
|
|
100
|
+
* historyFraction — fraction of effective budget to give history
|
|
101
|
+
* memoryFraction — fraction of effective budget to give memory pool
|
|
102
|
+
* mecwProfile — which MECW entry matched (undefined = no match / full window)
|
|
103
|
+
* mecwApplied — true when MECW adjustment changed the fractions
|
|
104
|
+
* mecwBlend — 0..1 blend factor (0 = below floor, 1 = at/above ceiling)
|
|
105
|
+
*/
|
|
106
|
+
export function resolveModelLaneBudgets(model, effectiveBudget, configHistoryFraction, configMemoryFraction) {
|
|
107
|
+
if (!model) {
|
|
108
|
+
return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: undefined, mecwApplied: false, mecwBlend: 0 };
|
|
109
|
+
}
|
|
110
|
+
const normalized = model.toLowerCase();
|
|
111
|
+
for (const entry of MODEL_MECW) {
|
|
112
|
+
if (!normalized.includes(entry.pattern))
|
|
113
|
+
continue;
|
|
114
|
+
// Budget is at or below the floor — full window is safe, use config fractions
|
|
115
|
+
if (effectiveBudget <= entry.mecwFloor) {
|
|
116
|
+
return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: entry.pattern, mecwApplied: false, mecwBlend: 0 };
|
|
117
|
+
}
|
|
118
|
+
// Budget is at or above the ceiling — use preferred fractions fully
|
|
119
|
+
if (effectiveBudget >= entry.mecwCeiling) {
|
|
120
|
+
return { historyFraction: entry.preferredHistoryFraction, memoryFraction: entry.preferredMemoryFraction, mecwProfile: entry.pattern, mecwApplied: true, mecwBlend: 1 };
|
|
121
|
+
}
|
|
122
|
+
// Budget is between floor and ceiling — linear blend
|
|
123
|
+
const blend = (effectiveBudget - entry.mecwFloor) / (entry.mecwCeiling - entry.mecwFloor);
|
|
124
|
+
const historyFraction = configHistoryFraction + blend * (entry.preferredHistoryFraction - configHistoryFraction);
|
|
125
|
+
const memoryFraction = configMemoryFraction + blend * (entry.preferredMemoryFraction - configMemoryFraction);
|
|
126
|
+
return {
|
|
127
|
+
historyFraction: Math.round(historyFraction * 1000) / 1000,
|
|
128
|
+
memoryFraction: Math.round(memoryFraction * 1000) / 1000,
|
|
129
|
+
mecwProfile: entry.pattern,
|
|
130
|
+
mecwApplied: true,
|
|
131
|
+
mecwBlend: Math.round(blend * 1000) / 1000,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
// No MECW entry matched — use config fractions unchanged
|
|
135
|
+
return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: undefined, mecwApplied: false, mecwBlend: 0 };
|
|
136
|
+
}
|
|
71
137
|
/**
|
|
72
138
|
* Resolve effective token budget from model string.
|
|
73
139
|
* Returns the context window for the model, minus the configured reserve fraction
|
|
@@ -166,6 +232,74 @@ function computeDynamicReserve(recentMessages, totalWindow, config) {
|
|
|
166
232
|
}
|
|
167
233
|
return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
|
|
168
234
|
}
|
|
235
|
+
/**
|
|
236
|
+
* Classify a session based on the ratio of tool messages in the recent sample.
|
|
237
|
+
* 'tool-heavy': >= 20% of sampled messages carry tool calls or tool results.
|
|
238
|
+
* 'plain-chat': below that threshold (text-only or occasional tool use).
|
|
239
|
+
*
|
|
240
|
+
* The 20% threshold is intentionally conservative: most tool-heavy agents
|
|
241
|
+
* have tool messages on every assistant turn, so the ratio quickly exceeds
|
|
242
|
+
* the threshold without false-positive risk for light tool users.
|
|
243
|
+
*/
|
|
244
|
+
export function classifySessionType(messages) {
|
|
245
|
+
if (messages.length === 0)
|
|
246
|
+
return 'plain-chat';
|
|
247
|
+
const toolCount = messages.filter(m => hasToolContent(m)).length;
|
|
248
|
+
return toolCount / messages.length >= 0.20 ? 'tool-heavy' : 'plain-chat';
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Estimate the average token cost per message from a recent message sample.
|
|
252
|
+
* Uses the same estimateMessageTokens heuristic as the compositor budget walk
|
|
253
|
+
* so the returned depth is directly comparable to the historyFillCap check.
|
|
254
|
+
*
|
|
255
|
+
* Returns a conservative floor (100 tokens) when the sample is empty to avoid
|
|
256
|
+
* returning Infinity when historyBudget is divided by density.
|
|
257
|
+
*/
|
|
258
|
+
export function estimateObservedMsgDensity(messages) {
|
|
259
|
+
if (messages.length === 0)
|
|
260
|
+
return 100;
|
|
261
|
+
const total = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
262
|
+
return Math.max(1, Math.ceil(total / messages.length));
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Compute an adaptive history depth that pre-fits the session type.
|
|
266
|
+
*
|
|
267
|
+
* For plain-chat sessions: divides historyBudget by observed density to get a
|
|
268
|
+
* depth that fills the budget without overflow, bounded by the default maximum.
|
|
269
|
+
* Recall quality is preserved because the density estimate is honest for
|
|
270
|
+
* text-only turns.
|
|
271
|
+
*
|
|
272
|
+
* For tool-heavy sessions: applies a post-gradient compression factor
|
|
273
|
+
* (TOOL_GRADIENT_DENSITY_FACTOR = 0.30) to the observed pre-gradient density.
|
|
274
|
+
* This accounts for the gradient transform collapsing large tool payloads to
|
|
275
|
+
* prose stubs before the budget-fit walk runs. A tighter depth is chosen so
|
|
276
|
+
* the gradient-compressed messages fit inside historyFillCap without triggering
|
|
277
|
+
* a rescue trim.
|
|
278
|
+
*
|
|
279
|
+
* A 0.85 safety margin is applied to both paths so estimates that are
|
|
280
|
+
* slightly off don't cause immediate overflow on the first warm compose.
|
|
281
|
+
*
|
|
282
|
+
* Min/max bounds ensure the compositor always sees a meaningful window:
|
|
283
|
+
* - plain-chat min: 20 messages (enough for short recent context)
|
|
284
|
+
* - tool-heavy min: 15 messages (recent tool context + a few prior turns)
|
|
285
|
+
* - shared max: config.maxHistoryMessages (never exceed the DB fetch ceiling)
|
|
286
|
+
*/
|
|
287
|
+
export function computeAdaptiveHistoryDepth(sessionType, observedDensity, historyBudgetTokens, maxHistoryMessages) {
|
|
288
|
+
const SAFETY_MARGIN = 0.85;
|
|
289
|
+
if (sessionType === 'tool-heavy') {
|
|
290
|
+
// Tool-heavy: post-gradient density is much lower than pre-gradient.
|
|
291
|
+
// Gradient tiers collapse T2/T3 payloads to compact stubs (15-30% of original).
|
|
292
|
+
// Use a blended factor of 0.30 as the expected post-gradient density ratio.
|
|
293
|
+
const TOOL_GRADIENT_DENSITY_FACTOR = 0.30;
|
|
294
|
+
const postGradientDensity = Math.max(50, Math.floor(observedDensity * TOOL_GRADIENT_DENSITY_FACTOR));
|
|
295
|
+
const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / postGradientDensity);
|
|
296
|
+
return Math.min(maxHistoryMessages, Math.max(15, depth));
|
|
297
|
+
}
|
|
298
|
+
// Plain-chat: pre-gradient and post-gradient density are the same.
|
|
299
|
+
// historyBudget / avgMsgCost gives the message count that fills the budget.
|
|
300
|
+
const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / observedDensity);
|
|
301
|
+
return Math.min(maxHistoryMessages, Math.max(20, depth));
|
|
302
|
+
}
|
|
169
303
|
const DEFAULT_CONFIG = {
|
|
170
304
|
// Primary budget controls
|
|
171
305
|
budgetFraction: 0.703,
|
|
@@ -277,7 +411,7 @@ function clusterNeutralMessages(messages) {
|
|
|
277
411
|
*/
|
|
278
412
|
export function applyToolGradientToWindow(messages, tokenBudget, totalWindowTokens) {
|
|
279
413
|
const reshaped = applyToolGradient(messages, { totalWindowTokens });
|
|
280
|
-
const targetTokens =
|
|
414
|
+
const { softBudget: targetTokens } = resolveTrimBudgets(tokenBudget);
|
|
281
415
|
const clusters = clusterNeutralMessages(reshaped);
|
|
282
416
|
let totalTokens = clusters.reduce((sum, cluster) => sum + cluster.tokenCost, 0);
|
|
283
417
|
let start = 0;
|
|
@@ -328,6 +462,30 @@ function estimateMessageTokens(msg) {
|
|
|
328
462
|
tokens += 4;
|
|
329
463
|
return tokens;
|
|
330
464
|
}
|
|
465
|
+
function isDynamicBoundaryMessage(msg) {
|
|
466
|
+
return Boolean(msg.metadata?.dynamicBoundary);
|
|
467
|
+
}
|
|
468
|
+
function getStablePrefixMessages(messages) {
|
|
469
|
+
const prefix = [];
|
|
470
|
+
for (const msg of messages) {
|
|
471
|
+
if (msg.role !== 'system')
|
|
472
|
+
break;
|
|
473
|
+
if (isDynamicBoundaryMessage(msg))
|
|
474
|
+
break;
|
|
475
|
+
prefix.push(msg);
|
|
476
|
+
}
|
|
477
|
+
return prefix;
|
|
478
|
+
}
|
|
479
|
+
function computeStablePrefixHash(messages) {
|
|
480
|
+
if (messages.length === 0)
|
|
481
|
+
return undefined;
|
|
482
|
+
const hash = createHash('sha256');
|
|
483
|
+
for (const msg of messages) {
|
|
484
|
+
hash.update(msg.textContent ?? '');
|
|
485
|
+
hash.update('\n␞\n');
|
|
486
|
+
}
|
|
487
|
+
return hash.digest('hex');
|
|
488
|
+
}
|
|
331
489
|
function parseToolArgs(argumentsJson) {
|
|
332
490
|
try {
|
|
333
491
|
return JSON.parse(argumentsJson);
|
|
@@ -702,12 +860,211 @@ function evictLargeToolResults(messages) {
|
|
|
702
860
|
const approxKTokens = Math.round(content.length / 4 / 1000);
|
|
703
861
|
return {
|
|
704
862
|
...result,
|
|
705
|
-
content:
|
|
863
|
+
content: formatToolChainStub({
|
|
864
|
+
name: result.name || 'tool_result',
|
|
865
|
+
id: result.callId || 'unknown',
|
|
866
|
+
status: 'ejected',
|
|
867
|
+
reason: 'eviction_oversize',
|
|
868
|
+
summary: `~${approxKTokens}k tokens, use memory_search or re-run if needed`,
|
|
869
|
+
}),
|
|
706
870
|
};
|
|
707
871
|
});
|
|
708
872
|
return { ...msg, toolResults: evicted };
|
|
709
873
|
});
|
|
710
874
|
}
|
|
875
|
+
// ─── C2: Oversized artifact handling ────────────────────────────────────────
|
|
876
|
+
/**
|
|
877
|
+
* C2: Resolve the artifact oversize threshold (in tokens) for the current compose pass.
|
|
878
|
+
*
|
|
879
|
+
* The threshold scales with the effective model budget from B4 so:
|
|
880
|
+
* - Small-window models (16k–32k effective) get a proportionally tighter threshold
|
|
881
|
+
* (threshold = budget × ARTIFACT_OVERSIZE_FRACTION, floor 500, ceiling 8000).
|
|
882
|
+
* - Large-window models (200k+) get a higher ceiling but it still stays bounded
|
|
883
|
+
* so artifacts never fill the lane unconditionally.
|
|
884
|
+
*
|
|
885
|
+
* ARTIFACT_BUDGET_FRACTION: fraction of the soft budget above which a single
|
|
886
|
+
* retrieved artifact/chunk is considered oversized. Default 0.10 (10%).
|
|
887
|
+
*
|
|
888
|
+
* Headroom preservation comes from replacing the oversized artifact with a cheap
|
|
889
|
+
* reference, not from shrinking the threshold itself.
|
|
890
|
+
*/
|
|
891
|
+
const ARTIFACT_BUDGET_FRACTION = 0.10; // 10% of soft budget is the raw threshold
|
|
892
|
+
const ARTIFACT_THRESHOLD_FLOOR = 500; // never below 500 tokens (~2k chars)
|
|
893
|
+
const ARTIFACT_THRESHOLD_CEILING = 8_000; // never above 8k tokens (~32k chars)
|
|
894
|
+
export function resolveArtifactOversizeThreshold(effectiveBudget) {
|
|
895
|
+
const { softBudget } = resolveTrimBudgets(effectiveBudget);
|
|
896
|
+
const raw = Math.floor(softBudget * ARTIFACT_BUDGET_FRACTION);
|
|
897
|
+
return Math.min(ARTIFACT_THRESHOLD_CEILING, Math.max(ARTIFACT_THRESHOLD_FLOOR, raw));
|
|
898
|
+
}
|
|
899
|
+
/**
|
|
900
|
+
* C2: Degrade an oversized doc chunk to a canonical ArtifactRef string.
|
|
901
|
+
*
|
|
902
|
+
* When a retrieved chunk's content exceeds the oversize threshold (in tokens),
|
|
903
|
+
* replace it with a fetchable canonical reference instead of injecting raw content.
|
|
904
|
+
* This preserves headroom in the lane instead of filling it with a large payload.
|
|
905
|
+
*
|
|
906
|
+
* Returns:
|
|
907
|
+
* - `null` → content is within the threshold; caller should inject as-is.
|
|
908
|
+
* - `string` → canonical artifact reference; caller should inject this instead of raw content.
|
|
909
|
+
*
|
|
910
|
+
* The sizeTokens reported in the reference is the ACTUAL estimated size so downstream
|
|
911
|
+
* tooling can make informed decisions about whether to fetch.
|
|
912
|
+
*/
|
|
913
|
+
export function degradeOversizedDocChunk(chunkId, sourcePath, content, thresholdTokens) {
|
|
914
|
+
const contentTokens = estimateTokens(content);
|
|
915
|
+
if (contentTokens <= thresholdTokens)
|
|
916
|
+
return null;
|
|
917
|
+
const ref = {
|
|
918
|
+
id: chunkId,
|
|
919
|
+
path: sourcePath,
|
|
920
|
+
sizeTokens: contentTokens,
|
|
921
|
+
status: 'degraded',
|
|
922
|
+
reason: 'artifact_oversize',
|
|
923
|
+
fetchHint: 'memory_search or re-read source file',
|
|
924
|
+
};
|
|
925
|
+
return formatArtifactRef(ref);
|
|
926
|
+
}
|
|
927
|
+
/**
|
|
928
|
+
* C2: Resolve oversized artifacts in a history message array.
|
|
929
|
+
*
|
|
930
|
+
* Scans the message array and replaces user/assistant messages whose text content
|
|
931
|
+
* exceeds the model-aware artifact oversize threshold with canonical ArtifactRef
|
|
932
|
+
* strings. System messages, tool-call messages, and tool-result messages are always
|
|
933
|
+
* passed through unchanged.
|
|
934
|
+
*
|
|
935
|
+
* @param messages — neutral message array (already-assembled history window)
|
|
936
|
+
* @param effectiveBudget — effective model budget from B4 (drives the threshold)
|
|
937
|
+
* @returns { messages, refCount, tokensSaved }
|
|
938
|
+
*/
|
|
939
|
+
export function resolveOversizedArtifacts(messages, effectiveBudget) {
|
|
940
|
+
const thresholdTokens = resolveArtifactOversizeThreshold(effectiveBudget);
|
|
941
|
+
let refCount = 0;
|
|
942
|
+
let tokensSaved = 0;
|
|
943
|
+
const out = messages.map(msg => {
|
|
944
|
+
// System messages are never degraded (they are in the stable prefix).
|
|
945
|
+
if (msg.role === 'system')
|
|
946
|
+
return msg;
|
|
947
|
+
// Tool content (calls/results) is C1's domain — never touch here.
|
|
948
|
+
if (msg.toolResults || msg.toolCalls)
|
|
949
|
+
return msg;
|
|
950
|
+
const text = msg.textContent ?? '';
|
|
951
|
+
// Already a ref — idempotent; don't re-degrade.
|
|
952
|
+
if (isArtifactRef(text))
|
|
953
|
+
return msg;
|
|
954
|
+
const contentTokens = estimateTokens(text);
|
|
955
|
+
if (contentTokens <= thresholdTokens)
|
|
956
|
+
return msg;
|
|
957
|
+
// Oversized — replace with canonical artifact reference.
|
|
958
|
+
const meta = msg;
|
|
959
|
+
const id = (typeof meta['_artifactId'] === 'string' ? meta['_artifactId'] : null)
|
|
960
|
+
?? `msg-${createHash('sha1').update(`${msg.role}:${text}`).digest('hex').slice(0, 12)}`;
|
|
961
|
+
const path = (typeof meta['_artifactPath'] === 'string' ? meta['_artifactPath'] : null)
|
|
962
|
+
?? '/unknown/artifact';
|
|
963
|
+
const ref = {
|
|
964
|
+
id,
|
|
965
|
+
path,
|
|
966
|
+
sizeTokens: contentTokens,
|
|
967
|
+
status: 'degraded',
|
|
968
|
+
reason: 'artifact_oversize',
|
|
969
|
+
fetchHint: 'memory_search',
|
|
970
|
+
};
|
|
971
|
+
const refText = formatArtifactRef(ref);
|
|
972
|
+
const refTokens = estimateTokens(refText);
|
|
973
|
+
tokensSaved += contentTokens - refTokens;
|
|
974
|
+
refCount++;
|
|
975
|
+
return { ...msg, textContent: refText };
|
|
976
|
+
});
|
|
977
|
+
return { messages: out, refCount, tokensSaved };
|
|
978
|
+
}
|
|
979
|
+
/**
|
|
980
|
+
* C1: Centralized tool-chain dependency ejection.
|
|
981
|
+
*
|
|
982
|
+
* Given a set of tool-use message indices that are being ejected from the
|
|
983
|
+
* context window, this function ensures that no orphaned tool-results survive:
|
|
984
|
+
*
|
|
985
|
+
* - For each ejected assistant message carrying toolCalls, collect the set
|
|
986
|
+
* of call IDs being removed.
|
|
987
|
+
* - Walk the remaining messages: if a message's toolResults reference any
|
|
988
|
+
* of those ejected IDs:
|
|
989
|
+
* a) If the message carries ONLY tool-results and no other text, co-eject
|
|
990
|
+
* it (remove it entirely). This is the zero-cost path.
|
|
991
|
+
* b) If the message also carries text content, replace only the dependent
|
|
992
|
+
* toolResults entries with canonical ToolChainStub strings so the
|
|
993
|
+
* message is not silently mutilated.
|
|
994
|
+
*
|
|
995
|
+
* The caller is responsible for removing the ejected messages by index BEFORE
|
|
996
|
+
* or AFTER calling this function; this function operates on the full array and
|
|
997
|
+
* marks the ejected indices for removal, returning the cleaned result.
|
|
998
|
+
*
|
|
999
|
+
* @param messages Full message array (order preserved)
|
|
1000
|
+
* @param ejectIndices Set of indices into `messages` that are being ejected
|
|
1001
|
+
* (these are the tool-use / assistant messages being removed).
|
|
1002
|
+
* @param reason DegradationReason to embed in any canonical stubs.
|
|
1003
|
+
* @returns Cleaned message array + telemetry counters.
|
|
1004
|
+
*/
|
|
1005
|
+
export function resolveToolChainEjections(messages, ejectIndices, reason = 'eviction_oversize') {
|
|
1006
|
+
// Collect all tool-call IDs that are being ejected.
|
|
1007
|
+
const ejectedCallIds = new Set();
|
|
1008
|
+
for (const idx of ejectIndices) {
|
|
1009
|
+
const msg = messages[idx];
|
|
1010
|
+
if (!msg)
|
|
1011
|
+
continue;
|
|
1012
|
+
if (msg.toolCalls) {
|
|
1013
|
+
for (const tc of msg.toolCalls) {
|
|
1014
|
+
if (tc.id)
|
|
1015
|
+
ejectedCallIds.add(tc.id);
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
let coEjections = 0;
|
|
1020
|
+
let stubReplacements = 0;
|
|
1021
|
+
// If no call IDs were ejected, nothing to do beyond dropping the ejected messages.
|
|
1022
|
+
if (ejectedCallIds.size === 0) {
|
|
1023
|
+
const result = messages.filter((_, idx) => !ejectIndices.has(idx));
|
|
1024
|
+
return { messages: result, coEjections, stubReplacements };
|
|
1025
|
+
}
|
|
1026
|
+
// Walk all messages and handle dependent tool-results.
|
|
1027
|
+
const transformed = messages.map((msg, idx) => {
|
|
1028
|
+
// Already being ejected — remove.
|
|
1029
|
+
if (ejectIndices.has(idx))
|
|
1030
|
+
return null;
|
|
1031
|
+
if (!msg.toolResults || msg.toolResults.length === 0)
|
|
1032
|
+
return msg;
|
|
1033
|
+
// Determine which results in this message depend on ejected calls.
|
|
1034
|
+
const dependentResultIds = msg.toolResults
|
|
1035
|
+
.map(r => r.callId)
|
|
1036
|
+
.filter((id) => Boolean(id) && ejectedCallIds.has(id));
|
|
1037
|
+
if (dependentResultIds.length === 0)
|
|
1038
|
+
return msg;
|
|
1039
|
+
const dependentSet = new Set(dependentResultIds);
|
|
1040
|
+
// Case (a): The message carries ONLY tool-results and no other text content,
|
|
1041
|
+
// and ALL of its results are dependent on ejected calls.
|
|
1042
|
+
// Co-eject the whole message — zero budget cost, no stub needed.
|
|
1043
|
+
const hasText = Boolean(msg.textContent && msg.textContent.trim().length > 0);
|
|
1044
|
+
const hasNonDependentResults = msg.toolResults.some(r => !dependentSet.has(r.callId));
|
|
1045
|
+
if (!hasText && !hasNonDependentResults) {
|
|
1046
|
+
coEjections++;
|
|
1047
|
+
return null;
|
|
1048
|
+
}
|
|
1049
|
+
// Case (b): Message has text or unrelated results — stub only the dependent entries.
|
|
1050
|
+
const stubbedResults = msg.toolResults.map(result => {
|
|
1051
|
+
if (!result.callId || !dependentSet.has(result.callId))
|
|
1052
|
+
return result;
|
|
1053
|
+
const stubContent = formatToolChainStub({
|
|
1054
|
+
name: result.name || 'tool_result',
|
|
1055
|
+
id: result.callId || 'unknown',
|
|
1056
|
+
status: 'ejected',
|
|
1057
|
+
reason,
|
|
1058
|
+
summary: 'parent tool-use ejected from context window',
|
|
1059
|
+
});
|
|
1060
|
+
stubReplacements++;
|
|
1061
|
+
return { ...result, content: stubContent };
|
|
1062
|
+
});
|
|
1063
|
+
return { ...msg, toolResults: stubbedResults };
|
|
1064
|
+
});
|
|
1065
|
+
const result = transformed.filter((m) => m !== null);
|
|
1066
|
+
return { messages: result, coEjections, stubReplacements };
|
|
1067
|
+
}
|
|
711
1068
|
/**
|
|
712
1069
|
* Apply gradient tool treatment to a message array.
|
|
713
1070
|
*
|
|
@@ -812,6 +1169,131 @@ export class Compositor {
|
|
|
812
1169
|
get orgRegistry() {
|
|
813
1170
|
return this._orgRegistry;
|
|
814
1171
|
}
|
|
1172
|
+
/**
|
|
1173
|
+
* Sprint 2.1: Hydrate tool-artifact stubs in the active turn.
|
|
1174
|
+
*
|
|
1175
|
+
* The active turn is the contiguous trailing block of tool-bearing messages
|
|
1176
|
+
* at the tail of the assembled window (positional, NOT turn_id-based):
|
|
1177
|
+
* - Walk backward from the last message
|
|
1178
|
+
* - Collect tool-bearing messages (toolCalls != null OR toolResults != null)
|
|
1179
|
+
* - Plus the bounding user message that opened the turn
|
|
1180
|
+
* - Stop at the first plain message once at least one tool message was found
|
|
1181
|
+
*
|
|
1182
|
+
* For every toolResult stub with an `artifact=<id>` pointer, look up the
|
|
1183
|
+
* full payload in ToolArtifactStore and replace the stub content in-place.
|
|
1184
|
+
* Uses a single batched `WHERE id IN (...)` lookup (no N+1 queries).
|
|
1185
|
+
* Touches `last_used_at` on every hydrated artifact in a single batch.
|
|
1186
|
+
*
|
|
1187
|
+
* Failure mode: if a lookup returns null (artifact missing), leave the stub
|
|
1188
|
+
* unchanged and increment hydrationMisses.
|
|
1189
|
+
*
|
|
1190
|
+
* Returns diagnostics counters.
|
|
1191
|
+
*/
|
|
1192
|
+
hydrateActiveTurnArtifacts(messages, db) {
|
|
1193
|
+
if (messages.length === 0) {
|
|
1194
|
+
return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
|
|
1195
|
+
}
|
|
1196
|
+
const store = new ToolArtifactStore(db);
|
|
1197
|
+
// ── 1. Detect active turn (positional, backward walk) ─────────────────────
|
|
1198
|
+
// Collect indices belonging to the active turn.
|
|
1199
|
+
const activeTurnIndices = [];
|
|
1200
|
+
let foundToolBearing = false;
|
|
1201
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1202
|
+
const msg = messages[i];
|
|
1203
|
+
const isToolBearing = msg.toolCalls != null || msg.toolResults != null;
|
|
1204
|
+
if (isToolBearing) {
|
|
1205
|
+
foundToolBearing = true;
|
|
1206
|
+
activeTurnIndices.push(i);
|
|
1207
|
+
}
|
|
1208
|
+
else if (foundToolBearing) {
|
|
1209
|
+
// First plain message after at least one tool-bearing message — this
|
|
1210
|
+
// is the bounding user message that opened the turn. Include it and stop.
|
|
1211
|
+
activeTurnIndices.push(i);
|
|
1212
|
+
break;
|
|
1213
|
+
}
|
|
1214
|
+
else {
|
|
1215
|
+
// Haven't found any tool-bearing messages yet — still in non-tool tail
|
|
1216
|
+
// (e.g., the last message is a plain user message). No active turn.
|
|
1217
|
+
break;
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1220
|
+
if (activeTurnIndices.length === 0 || !foundToolBearing) {
|
|
1221
|
+
return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
|
|
1222
|
+
}
|
|
1223
|
+
// ── 2. Collect all artifactIds from stub toolResults in the active turn ───
|
|
1224
|
+
// Map: artifactId -> array of [msgIndex, resultIndex] for in-place replacement
|
|
1225
|
+
const artifactTargets = new Map();
|
|
1226
|
+
for (const msgIdx of activeTurnIndices) {
|
|
1227
|
+
const msg = messages[msgIdx];
|
|
1228
|
+
if (!msg.toolResults)
|
|
1229
|
+
continue;
|
|
1230
|
+
for (let resultIdx = 0; resultIdx < msg.toolResults.length; resultIdx++) {
|
|
1231
|
+
const result = msg.toolResults[resultIdx];
|
|
1232
|
+
const stub = parseToolChainStub(result.content);
|
|
1233
|
+
if (stub && stub.artifactId) {
|
|
1234
|
+
const existing = artifactTargets.get(stub.artifactId) ?? [];
|
|
1235
|
+
existing.push({ msgIdx, resultIdx });
|
|
1236
|
+
artifactTargets.set(stub.artifactId, existing);
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
if (artifactTargets.size === 0) {
|
|
1241
|
+
return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
|
|
1242
|
+
}
|
|
1243
|
+
// ── 3. Batch lookup ────────────────────────────────────────────────────────
|
|
1244
|
+
const ids = Array.from(artifactTargets.keys());
|
|
1245
|
+
const placeholders = ids.map(() => '?').join(', ');
|
|
1246
|
+
const rows = db
|
|
1247
|
+
.prepare(`SELECT * FROM tool_artifacts WHERE id IN (${placeholders})`)
|
|
1248
|
+
.all(...ids);
|
|
1249
|
+
// Build id -> payload map
|
|
1250
|
+
const payloadMap = new Map();
|
|
1251
|
+
for (const row of rows) {
|
|
1252
|
+
payloadMap.set(row.id, row.payload);
|
|
1253
|
+
}
|
|
1254
|
+
// ── 4. Hydrate in-place ────────────────────────────────────────────────────
|
|
1255
|
+
let artifactsHydrated = 0;
|
|
1256
|
+
let hydrationBytes = 0;
|
|
1257
|
+
let hydrationMisses = 0;
|
|
1258
|
+
const touchIds = [];
|
|
1259
|
+
for (const [artifactId, targets] of artifactTargets) {
|
|
1260
|
+
const payload = payloadMap.get(artifactId);
|
|
1261
|
+
if (payload == null) {
|
|
1262
|
+
// Graceful miss — stub stays as-is
|
|
1263
|
+
hydrationMisses += targets.length;
|
|
1264
|
+
continue;
|
|
1265
|
+
}
|
|
1266
|
+
for (const { msgIdx, resultIdx } of targets) {
|
|
1267
|
+
const msg = messages[msgIdx];
|
|
1268
|
+
// Safety: if content doesn't look like a stub anymore (defensive idempotency check)
|
|
1269
|
+
const existingContent = msg.toolResults[resultIdx].content;
|
|
1270
|
+
if (!parseToolChainStub(existingContent)) {
|
|
1271
|
+
// Already full content — pass through unchanged
|
|
1272
|
+
continue;
|
|
1273
|
+
}
|
|
1274
|
+
// Replace stub with full payload
|
|
1275
|
+
msg.toolResults[resultIdx] = {
|
|
1276
|
+
...msg.toolResults[resultIdx],
|
|
1277
|
+
content: payload,
|
|
1278
|
+
};
|
|
1279
|
+
artifactsHydrated++;
|
|
1280
|
+
hydrationBytes += Buffer.byteLength(payload, 'utf8');
|
|
1281
|
+
}
|
|
1282
|
+
touchIds.push(artifactId);
|
|
1283
|
+
}
|
|
1284
|
+
// ── 5. Batch touch last_used_at ───────────────────────────────────────────
|
|
1285
|
+
if (touchIds.length > 0) {
|
|
1286
|
+
const ts = new Date().toISOString();
|
|
1287
|
+
const touchPlaceholders = touchIds.map(() => '?').join(', ');
|
|
1288
|
+
try {
|
|
1289
|
+
db.prepare(`UPDATE tool_artifacts SET last_used_at = ? WHERE id IN (${touchPlaceholders})`).run(ts, ...touchIds);
|
|
1290
|
+
}
|
|
1291
|
+
catch {
|
|
1292
|
+
// Touch is best-effort — hydration still succeeded
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
return { artifactsHydrated, hydrationBytes, hydrationMisses };
|
|
1296
|
+
}
|
|
815
1297
|
/**
|
|
816
1298
|
* Compose a complete message array for sending to an LLM.
|
|
817
1299
|
*
|
|
@@ -842,6 +1324,11 @@ export class Compositor {
|
|
|
842
1324
|
// Particularly effective for low-frequency sessions (heartbeat agents, council
|
|
843
1325
|
// seats between rounds). TTL on the cache write remains 120s — this is a
|
|
844
1326
|
// conservative early-exit before the TTL expires, not a TTL extension.
|
|
1327
|
+
//
|
|
1328
|
+
// B2: prevPrefixHash is set when a cached bundle is found but bypassed due to
|
|
1329
|
+
// prefix-input mutation. It is surfaced in the full-compose diagnostics so
|
|
1330
|
+
// callers can confirm the bypass fired correctly.
|
|
1331
|
+
let _prevPrefixHashFromBypass;
|
|
845
1332
|
if (request.includeHistory !== false && request.skipWindowCache !== true) {
|
|
846
1333
|
try {
|
|
847
1334
|
const newestRow = db.prepare('SELECT MAX(id) AS maxId FROM messages WHERE agent_id = ?').get(request.agentId);
|
|
@@ -866,7 +1353,28 @@ export class Compositor {
|
|
|
866
1353
|
// historyDepth constrains how many messages the caller wants;
|
|
867
1354
|
// we can't slice a cached bundle safely, so skip cache.
|
|
868
1355
|
const depthOk = !request.historyDepth;
|
|
869
|
-
|
|
1356
|
+
// B2: Stable-prefix hash check.
|
|
1357
|
+
// If the system/identity slots changed since this cache entry was
|
|
1358
|
+
// written, the stable prefix is stale even if cursor freshness
|
|
1359
|
+
// passes. Compute a cheap input hash from slot contents and compare
|
|
1360
|
+
// against the one stored in the cache meta. If no stored hash exists
|
|
1361
|
+
// (pre-B2 cache entries), fall through to prefix check on the
|
|
1362
|
+
// cached message content itself.
|
|
1363
|
+
let prefixInputOk = true;
|
|
1364
|
+
const _cachedPrefixInputHash = cachedBundle.meta.prefixInputHash;
|
|
1365
|
+
if (_cachedPrefixInputHash) {
|
|
1366
|
+
const _sysSlot = await this.cache.getSlot(request.agentId, request.sessionKey, 'system');
|
|
1367
|
+
const _idSlot = await this.cache.getSlot(request.agentId, request.sessionKey, 'identity');
|
|
1368
|
+
const _incomingInputHash = createHash('sha256')
|
|
1369
|
+
.update(_sysSlot ?? '')
|
|
1370
|
+
.update('\n␞\n')
|
|
1371
|
+
.update(_idSlot ?? '')
|
|
1372
|
+
.digest('hex');
|
|
1373
|
+
if (_incomingInputHash !== _cachedPrefixInputHash) {
|
|
1374
|
+
prefixInputOk = false;
|
|
1375
|
+
}
|
|
1376
|
+
}
|
|
1377
|
+
if (budgetOk && factsOk && libraryOk && contextOk && depthOk && prefixInputOk) {
|
|
870
1378
|
const cachedSlots = {
|
|
871
1379
|
system: cachedBundle.meta.slots['system'] ?? 0,
|
|
872
1380
|
identity: cachedBundle.meta.slots['identity'] ?? 0,
|
|
@@ -875,6 +1383,8 @@ export class Compositor {
|
|
|
875
1383
|
context: cachedBundle.meta.slots['context'] ?? 0,
|
|
876
1384
|
library: cachedBundle.meta.slots['library'] ?? 0,
|
|
877
1385
|
};
|
|
1386
|
+
// Sprint 2.1: hydrate active-turn artifact stubs before converting.
|
|
1387
|
+
const cachedHydration = this.hydrateActiveTurnArtifacts(cachedBundle.messages, db);
|
|
878
1388
|
return {
|
|
879
1389
|
messages: toComposeOutputMessages(cachedBundle.messages),
|
|
880
1390
|
tokenCount: cachedBundle.meta.totalTokens,
|
|
@@ -885,10 +1395,17 @@ export class Compositor {
|
|
|
885
1395
|
diagnostics: {
|
|
886
1396
|
...cachedBundle.meta.diagnostics,
|
|
887
1397
|
windowCacheHit: true,
|
|
1398
|
+
// Carry forward the stored prefixHash so callers can observe it.
|
|
1399
|
+
prefixHash: cachedBundle.meta.prefixHash ?? cachedBundle.meta.diagnostics.prefixHash,
|
|
1400
|
+
artifactsHydrated: cachedHydration.artifactsHydrated > 0 ? cachedHydration.artifactsHydrated : undefined,
|
|
1401
|
+
hydrationBytes: cachedHydration.hydrationBytes > 0 ? cachedHydration.hydrationBytes : undefined,
|
|
1402
|
+
hydrationMisses: cachedHydration.hydrationMisses > 0 ? cachedHydration.hydrationMisses : undefined,
|
|
888
1403
|
},
|
|
889
1404
|
};
|
|
890
1405
|
}
|
|
891
|
-
// Incompatible request — fall through to full compose
|
|
1406
|
+
// Incompatible request — fall through to full compose.
|
|
1407
|
+
// Surface prevPrefixHash so the full compose diagnostics can report it.
|
|
1408
|
+
_prevPrefixHashFromBypass = cachedBundle.meta.prefixHash ?? cachedBundle.meta.diagnostics.prefixHash;
|
|
892
1409
|
}
|
|
893
1410
|
}
|
|
894
1411
|
}
|
|
@@ -907,6 +1424,36 @@ export class Compositor {
|
|
|
907
1424
|
: [];
|
|
908
1425
|
const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
|
|
909
1426
|
const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve, this.config.budgetFraction);
|
|
1427
|
+
// B4: Model-aware lane budgets.
|
|
1428
|
+
// Resolve historyFraction and memoryFraction by blending config values toward
|
|
1429
|
+
// model-preferred fractions when the effective budget approaches the MECW ceiling.
|
|
1430
|
+
// This ensures the compositor doesn't allocate more history than the model can
|
|
1431
|
+
// reliably reason over, and adjusts the memory pool proportionally.
|
|
1432
|
+
const _b4ConfigHistoryFraction = this.config.historyFraction ?? 0.40;
|
|
1433
|
+
const _b4ConfigMemoryFraction = this.config.memoryFraction ?? 0.40;
|
|
1434
|
+
const { historyFraction: b4HistoryFraction, memoryFraction: b4MemoryFraction, mecwProfile: b4MecwProfile, mecwApplied: b4MecwApplied, mecwBlend: b4MecwBlend, } = resolveModelLaneBudgets(request.model, budget, _b4ConfigHistoryFraction, _b4ConfigMemoryFraction);
|
|
1435
|
+
// C2: Compute the artifact oversize threshold once per compose pass from the
|
|
1436
|
+
// effective model budget (from B4). Chunk injection paths consult this threshold
|
|
1437
|
+
// to degrade retrieved payloads that would fill the lane instead of injecting them.
|
|
1438
|
+
const c2ArtifactThresholdTokens = resolveArtifactOversizeThreshold(budget);
|
|
1439
|
+
let c2ArtifactDegradations = 0;
|
|
1440
|
+
// Sprint 4: Pre-compose history depth tightening.
|
|
1441
|
+
// Classify the session and compute an adaptive depth from observed message
|
|
1442
|
+
// density. This replaces the old fixed maxHistoryMessages ceiling that over-
|
|
1443
|
+
// fed the compositor for tool-heavy sessions.
|
|
1444
|
+
//
|
|
1445
|
+
// If the caller already passed historyDepth (plugin assemble path), honour it
|
|
1446
|
+
// as an explicit cap — the adaptive depth still applies as a lower bound so
|
|
1447
|
+
// we never request more than the budget can absorb.
|
|
1448
|
+
const s4SessionType = classifySessionType(sampleMessages);
|
|
1449
|
+
const s4ObservedDensity = estimateObservedMsgDensity(sampleMessages);
|
|
1450
|
+
const s4HistoryBudget = Math.floor(budget * b4HistoryFraction);
|
|
1451
|
+
const s4AdaptiveDepth = computeAdaptiveHistoryDepth(s4SessionType, s4ObservedDensity, s4HistoryBudget, this.config.maxHistoryMessages);
|
|
1452
|
+
// Effective depth: caller-provided historyDepth overrides adaptive when it is
|
|
1453
|
+
// the tighter constraint; otherwise use the adaptive depth.
|
|
1454
|
+
const s4EffectiveDepth = request.historyDepth
|
|
1455
|
+
? Math.min(request.historyDepth, s4AdaptiveDepth)
|
|
1456
|
+
: s4AdaptiveDepth;
|
|
910
1457
|
let remaining = budget;
|
|
911
1458
|
// Phase 0 fence enforcement: resolve the compaction fence for this conversation.
|
|
912
1459
|
// All downstream message queries use this as a lower bound to exclude zombie
|
|
@@ -1005,6 +1552,11 @@ export class Compositor {
|
|
|
1005
1552
|
}
|
|
1006
1553
|
// ─── Conversation History ──────────────────────────────────
|
|
1007
1554
|
let diagCrossTopicKeystones = 0;
|
|
1555
|
+
// Sprint 4: hoisted so diagnostics block can read it regardless of includeHistory branch.
|
|
1556
|
+
let s4RescueTrimFired = false;
|
|
1557
|
+
// C1: total tool-chain degradation counters across history budget-fit and safety-valve passes.
|
|
1558
|
+
let c1CoEjections = 0;
|
|
1559
|
+
let c1StubReplacements = 0;
|
|
1008
1560
|
// Hoisted: activeTopicId/name resolved inside history block, used for window dual-write (VS-1) and wiki page injection
|
|
1009
1561
|
let composedActiveTopicId;
|
|
1010
1562
|
let composedActiveTopicName;
|
|
@@ -1049,7 +1601,8 @@ export class Compositor {
|
|
|
1049
1601
|
// Hoist resolved topic id+name so the window dual-write and wiki injection sections can access them
|
|
1050
1602
|
composedActiveTopicId = activeTopicId;
|
|
1051
1603
|
composedActiveTopicName = activeTopic?.name;
|
|
1052
|
-
const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey,
|
|
1604
|
+
const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, s4EffectiveDepth, // Sprint 4: adaptive depth (replaces fixed maxHistoryMessages)
|
|
1605
|
+
store, activeTopicId, fenceMessageId, activeContext);
|
|
1053
1606
|
// Deduplicate history by StoredMessage.id (second line of defense after
|
|
1054
1607
|
// pushHistory() tail-check dedup). Guards against any duplicates that
|
|
1055
1608
|
// slipped through the warm path — e.g. bootstrap re-runs on existing sessions.
|
|
@@ -1073,26 +1626,38 @@ export class Compositor {
|
|
|
1073
1626
|
// Replace oversized stale results with stubs so they don't burn budget.
|
|
1074
1627
|
// Current-turn results (turn age 0) are never evicted.
|
|
1075
1628
|
const evictedHistory = evictLargeToolResults(transformedHistory);
|
|
1629
|
+
const c2ResolvedHistory = resolveOversizedArtifacts(evictedHistory, budget);
|
|
1630
|
+
c2ArtifactDegradations += c2ResolvedHistory.refCount;
|
|
1076
1631
|
// ── Budget-fit: walk newest→oldest, drop whole clusters ─────────────
|
|
1077
1632
|
// Group tool_use + tool_result messages into clusters so they are kept
|
|
1078
1633
|
// or dropped as a unit. Breaking mid-cluster creates orphaned tool
|
|
1079
1634
|
// pairs that repairToolPairs has to strip downstream — wasting budget
|
|
1080
1635
|
// and leaving gaps in conversation continuity.
|
|
1081
|
-
const budgetClusters = clusterNeutralMessages(
|
|
1636
|
+
const budgetClusters = clusterNeutralMessages(c2ResolvedHistory.messages);
|
|
1082
1637
|
let historyTokens = 0;
|
|
1083
1638
|
const includedClusters = [];
|
|
1084
1639
|
// Pre-allocate history budget. historyFraction is a fraction of the
|
|
1085
1640
|
// effective token budget (post-reserve). Falls back to unbounded fill
|
|
1086
1641
|
// (remaining) when historyFraction is not set.
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1642
|
+
// B4: uses b4HistoryFraction (model-aware, blended from MECW catalog) instead
|
|
1643
|
+
// of raw config.historyFraction so history doesn't overflow MECW ceiling.
|
|
1644
|
+
const historyBudget = Math.floor(budget * b4HistoryFraction);
|
|
1090
1645
|
const historyFillCap = Math.min(historyBudget, remaining);
|
|
1091
1646
|
for (let i = budgetClusters.length - 1; i >= 0; i--) {
|
|
1092
1647
|
const cluster = budgetClusters[i];
|
|
1093
1648
|
if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
|
|
1094
|
-
const
|
|
1095
|
-
|
|
1649
|
+
const droppedClusters = budgetClusters.slice(0, i + 1);
|
|
1650
|
+
const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
|
|
1651
|
+
const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
|
|
1652
|
+
if (droppedToolResultCount > 0) {
|
|
1653
|
+
c1CoEjections += droppedToolResultCount;
|
|
1654
|
+
console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
|
|
1655
|
+
}
|
|
1656
|
+
const c1Note = droppedToolResultCount > 0
|
|
1657
|
+
? ` [C1: ${droppedToolResultCount} co-ejected reason=budget_cluster_drop]`
|
|
1658
|
+
: '';
|
|
1659
|
+
warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)${c1Note}`);
|
|
1660
|
+
s4RescueTrimFired = true;
|
|
1096
1661
|
break;
|
|
1097
1662
|
}
|
|
1098
1663
|
includedClusters.unshift(cluster);
|
|
@@ -1204,18 +1769,12 @@ export class Compositor {
|
|
|
1204
1769
|
}
|
|
1205
1770
|
// Memory budget pool: facts, wiki, semantic recall, cross-session, and
|
|
1206
1771
|
// trigger-fired doc chunks all draw from this shared pool via `remaining`.
|
|
1207
|
-
//
|
|
1208
|
-
//
|
|
1772
|
+
// B4: uses b4MemoryFraction (model-aware, blended from MECW catalog) instead
|
|
1773
|
+
// of raw config.memoryFraction so the memory pool scales with what the model
|
|
1774
|
+
// can effectively attend to within its MECW ceiling.
|
|
1209
1775
|
let memoryBudget;
|
|
1210
|
-
|
|
1211
|
-
memoryBudget = Math.floor(budget *
|
|
1212
|
-
if (remaining > memoryBudget) {
|
|
1213
|
-
remaining = memoryBudget;
|
|
1214
|
-
}
|
|
1215
|
-
}
|
|
1216
|
-
else {
|
|
1217
|
-
const targetFraction = this.config.targetBudgetFraction ?? 0.65;
|
|
1218
|
-
memoryBudget = Math.floor(budget * targetFraction);
|
|
1776
|
+
{
|
|
1777
|
+
memoryBudget = Math.floor(budget * b4MemoryFraction);
|
|
1219
1778
|
if (remaining > memoryBudget) {
|
|
1220
1779
|
remaining = memoryBudget;
|
|
1221
1780
|
}
|
|
@@ -1246,11 +1805,12 @@ export class Compositor {
|
|
|
1246
1805
|
}
|
|
1247
1806
|
}
|
|
1248
1807
|
}
|
|
1249
|
-
// ───
|
|
1250
|
-
//
|
|
1251
|
-
//
|
|
1252
|
-
//
|
|
1253
|
-
const
|
|
1808
|
+
// ─── Cache-ordered context assembly ─────────────────────────
|
|
1809
|
+
// Stable, reusable material is lifted above the cache boundary as its
|
|
1810
|
+
// own system messages. Session-volatile material stays in the dynamic
|
|
1811
|
+
// context block below that boundary.
|
|
1812
|
+
const stablePrefixMessages = [];
|
|
1813
|
+
const volatileContextParts = [];
|
|
1254
1814
|
let contextTokens = 0;
|
|
1255
1815
|
// ── C1: Content fingerprint dedup set ────────────────────
|
|
1256
1816
|
// Replaces fragile substring-match dedup across temporal, open-domain,
|
|
@@ -1301,7 +1861,7 @@ export class Compositor {
|
|
|
1301
1861
|
if (wikiContent) {
|
|
1302
1862
|
const tokens = estimateTokens(wikiContent);
|
|
1303
1863
|
if (tokens <= remaining) {
|
|
1304
|
-
|
|
1864
|
+
volatileContextParts.push(wikiContent);
|
|
1305
1865
|
contextTokens += tokens;
|
|
1306
1866
|
remaining -= tokens;
|
|
1307
1867
|
slots.library += tokens;
|
|
@@ -1309,7 +1869,7 @@ export class Compositor {
|
|
|
1309
1869
|
else if (remaining > 200) {
|
|
1310
1870
|
const truncated = this.truncateToTokens(wikiContent, remaining);
|
|
1311
1871
|
const truncTokens = estimateTokens(truncated);
|
|
1312
|
-
|
|
1872
|
+
volatileContextParts.push(truncated);
|
|
1313
1873
|
contextTokens += truncTokens;
|
|
1314
1874
|
remaining -= truncTokens;
|
|
1315
1875
|
slots.library += truncTokens;
|
|
@@ -1320,34 +1880,65 @@ export class Compositor {
|
|
|
1320
1880
|
// scope: agent — filtered by agentId via filterByScope after fetch
|
|
1321
1881
|
// Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
|
|
1322
1882
|
if (request.includeFacts !== false && remaining > 500) {
|
|
1323
|
-
const
|
|
1324
|
-
if (
|
|
1325
|
-
const
|
|
1326
|
-
diagFactsIncluded +=
|
|
1327
|
-
diagScopeFiltered +=
|
|
1328
|
-
if (
|
|
1329
|
-
const
|
|
1883
|
+
const factSections = this.buildFactSectionsFromDb(request.agentId, request.sessionKey, libDb || db);
|
|
1884
|
+
if (factSections !== null) {
|
|
1885
|
+
const { stableContent, stableCount, volatileContent, volatileCount, filteredCount } = factSections;
|
|
1886
|
+
diagFactsIncluded += stableCount + volatileCount;
|
|
1887
|
+
diagScopeFiltered += filteredCount;
|
|
1888
|
+
if (stableContent) {
|
|
1889
|
+
const stableFactsBlock = `## Stable Facts\n${stableContent}`;
|
|
1890
|
+
const tokens = estimateTokens(stableFactsBlock);
|
|
1330
1891
|
if (tokens <= remaining) {
|
|
1331
|
-
|
|
1892
|
+
stablePrefixMessages.push({
|
|
1893
|
+
role: 'system',
|
|
1894
|
+
textContent: stableFactsBlock,
|
|
1895
|
+
toolCalls: null,
|
|
1896
|
+
toolResults: null,
|
|
1897
|
+
});
|
|
1332
1898
|
contextTokens += tokens;
|
|
1333
1899
|
remaining -= tokens;
|
|
1334
|
-
slots.facts
|
|
1900
|
+
slots.facts += tokens;
|
|
1335
1901
|
}
|
|
1336
1902
|
else if (remaining > 200) {
|
|
1337
|
-
const truncated = this.truncateToTokens(
|
|
1903
|
+
const truncated = this.truncateToTokens(stableFactsBlock, remaining);
|
|
1338
1904
|
const truncTokens = estimateTokens(truncated);
|
|
1339
|
-
|
|
1905
|
+
stablePrefixMessages.push({
|
|
1906
|
+
role: 'system',
|
|
1907
|
+
textContent: truncated,
|
|
1908
|
+
toolCalls: null,
|
|
1909
|
+
toolResults: null,
|
|
1910
|
+
});
|
|
1340
1911
|
contextTokens += truncTokens;
|
|
1341
1912
|
remaining -= truncTokens;
|
|
1342
|
-
slots.facts
|
|
1343
|
-
warnings.push('
|
|
1913
|
+
slots.facts += truncTokens;
|
|
1914
|
+
warnings.push('Stable facts truncated to fit memory budget');
|
|
1344
1915
|
}
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1916
|
+
for (const line of stableContent.split('\n')) {
|
|
1917
|
+
if (line.startsWith('- ['))
|
|
1918
|
+
addFingerprint(line);
|
|
1919
|
+
}
|
|
1920
|
+
}
|
|
1921
|
+
if (volatileContent) {
|
|
1922
|
+
const volatileFactsBlock = `## Active Facts\n${volatileContent}`;
|
|
1923
|
+
const tokens = estimateTokens(volatileFactsBlock);
|
|
1924
|
+
if (tokens <= remaining) {
|
|
1925
|
+
volatileContextParts.push(volatileFactsBlock);
|
|
1926
|
+
contextTokens += tokens;
|
|
1927
|
+
remaining -= tokens;
|
|
1928
|
+
slots.facts += tokens;
|
|
1929
|
+
}
|
|
1930
|
+
else if (remaining > 200) {
|
|
1931
|
+
const truncated = this.truncateToTokens(volatileFactsBlock, remaining);
|
|
1932
|
+
const truncTokens = estimateTokens(truncated);
|
|
1933
|
+
volatileContextParts.push(truncated);
|
|
1934
|
+
contextTokens += truncTokens;
|
|
1935
|
+
remaining -= truncTokens;
|
|
1936
|
+
slots.facts += truncTokens;
|
|
1937
|
+
warnings.push('Active facts truncated to fit memory budget');
|
|
1938
|
+
}
|
|
1939
|
+
for (const line of volatileContent.split('\n')) {
|
|
1940
|
+
if (line.startsWith('- ['))
|
|
1349
1941
|
addFingerprint(line);
|
|
1350
|
-
}
|
|
1351
1942
|
}
|
|
1352
1943
|
}
|
|
1353
1944
|
}
|
|
@@ -1365,7 +1956,6 @@ export class Compositor {
|
|
|
1365
1956
|
order: 'DESC',
|
|
1366
1957
|
});
|
|
1367
1958
|
if (temporalFacts.length > 0) {
|
|
1368
|
-
// C1: Use fingerprint dedup instead of fragile substring match
|
|
1369
1959
|
const beforeCount = temporalFacts.length;
|
|
1370
1960
|
const novel = temporalFacts.filter(f => !isDuplicate(f.content));
|
|
1371
1961
|
diagFingerprintDedups += beforeCount - novel.length;
|
|
@@ -1380,9 +1970,9 @@ export class Compositor {
|
|
|
1380
1970
|
.join('\n');
|
|
1381
1971
|
const temporalSection = `## Temporal Context\n${temporalBlock}`;
|
|
1382
1972
|
const tempTokens = estimateTokens(temporalSection);
|
|
1383
|
-
const tempBudget = Math.floor(remaining * 0.20);
|
|
1973
|
+
const tempBudget = Math.floor(remaining * 0.20);
|
|
1384
1974
|
if (tempTokens <= tempBudget) {
|
|
1385
|
-
|
|
1975
|
+
volatileContextParts.push(temporalSection);
|
|
1386
1976
|
contextTokens += tempTokens;
|
|
1387
1977
|
remaining -= tempTokens;
|
|
1388
1978
|
slots.facts = (slots.facts ?? 0) + tempTokens;
|
|
@@ -1390,7 +1980,7 @@ export class Compositor {
|
|
|
1390
1980
|
else {
|
|
1391
1981
|
const truncated = this.truncateToTokens(temporalSection, tempBudget);
|
|
1392
1982
|
const truncTokens = estimateTokens(truncated);
|
|
1393
|
-
|
|
1983
|
+
volatileContextParts.push(truncated);
|
|
1394
1984
|
contextTokens += truncTokens;
|
|
1395
1985
|
remaining -= truncTokens;
|
|
1396
1986
|
slots.facts = (slots.facts ?? 0) + truncTokens;
|
|
@@ -1409,8 +1999,6 @@ export class Compositor {
|
|
|
1409
1999
|
// questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
|
|
1410
2000
|
if (request.includeSemanticRecall !== false && queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
|
|
1411
2001
|
try {
|
|
1412
|
-
// searchOpenDomain still does intra-result dedup. Existing-context dedup
|
|
1413
|
-
// now happens here via fingerprints so we keep one dedup path.
|
|
1414
2002
|
const rawOdResults = searchOpenDomain(db, queryText, '', 10);
|
|
1415
2003
|
const beforeOd = rawOdResults.length;
|
|
1416
2004
|
const odResults = rawOdResults.filter(r => !isDuplicate(r.content));
|
|
@@ -1431,9 +2019,9 @@ export class Compositor {
|
|
|
1431
2019
|
.join('\n');
|
|
1432
2020
|
const odSection = `## Open Domain Context\n${odBlock}`;
|
|
1433
2021
|
const odTokens = estimateTokens(odSection);
|
|
1434
|
-
const odBudget = Math.floor(remaining * 0.20);
|
|
2022
|
+
const odBudget = Math.floor(remaining * 0.20);
|
|
1435
2023
|
if (odTokens <= odBudget) {
|
|
1436
|
-
|
|
2024
|
+
volatileContextParts.push(odSection);
|
|
1437
2025
|
contextTokens += odTokens;
|
|
1438
2026
|
remaining -= odTokens;
|
|
1439
2027
|
slots.facts = (slots.facts ?? 0) + odTokens;
|
|
@@ -1441,7 +2029,7 @@ export class Compositor {
|
|
|
1441
2029
|
else {
|
|
1442
2030
|
const truncated = this.truncateToTokens(odSection, odBudget);
|
|
1443
2031
|
const truncTokens = estimateTokens(truncated);
|
|
1444
|
-
|
|
2032
|
+
volatileContextParts.push(truncated);
|
|
1445
2033
|
contextTokens += truncTokens;
|
|
1446
2034
|
remaining -= truncTokens;
|
|
1447
2035
|
slots.facts = (slots.facts ?? 0) + truncTokens;
|
|
@@ -1458,17 +2046,28 @@ export class Compositor {
|
|
|
1458
2046
|
if (request.includeLibrary !== false && remaining > 500 && libDb) {
|
|
1459
2047
|
const knowledgeContent = this.buildKnowledgeFromDb(request.agentId, libDb);
|
|
1460
2048
|
if (knowledgeContent) {
|
|
1461
|
-
const
|
|
1462
|
-
|
|
1463
|
-
|
|
2049
|
+
const stableKnowledgeBlock = `## Knowledge\n${knowledgeContent}`;
|
|
2050
|
+
const tokens = estimateTokens(stableKnowledgeBlock);
|
|
2051
|
+
if (tokens <= remaining * 0.2) {
|
|
2052
|
+
stablePrefixMessages.push({
|
|
2053
|
+
role: 'system',
|
|
2054
|
+
textContent: stableKnowledgeBlock,
|
|
2055
|
+
toolCalls: null,
|
|
2056
|
+
toolResults: null,
|
|
2057
|
+
});
|
|
1464
2058
|
contextTokens += tokens;
|
|
1465
2059
|
remaining -= tokens;
|
|
1466
2060
|
slots.library += tokens;
|
|
1467
2061
|
}
|
|
1468
2062
|
else {
|
|
1469
|
-
const truncated = this.truncateToTokens(
|
|
2063
|
+
const truncated = this.truncateToTokens(stableKnowledgeBlock, Math.floor(remaining * 0.2));
|
|
1470
2064
|
const truncTokens = estimateTokens(truncated);
|
|
1471
|
-
|
|
2065
|
+
stablePrefixMessages.push({
|
|
2066
|
+
role: 'system',
|
|
2067
|
+
textContent: truncated,
|
|
2068
|
+
toolCalls: null,
|
|
2069
|
+
toolResults: null,
|
|
2070
|
+
});
|
|
1472
2071
|
contextTokens += truncTokens;
|
|
1473
2072
|
remaining -= truncTokens;
|
|
1474
2073
|
slots.library += truncTokens;
|
|
@@ -1481,9 +2080,15 @@ export class Compositor {
|
|
|
1481
2080
|
if (request.includeLibrary !== false && remaining > 300 && libDb) {
|
|
1482
2081
|
const prefsContent = this.buildPreferencesFromDb(request.agentId, libDb);
|
|
1483
2082
|
if (prefsContent) {
|
|
1484
|
-
const
|
|
1485
|
-
|
|
1486
|
-
|
|
2083
|
+
const stablePrefsBlock = `## User Preferences\n${prefsContent}`;
|
|
2084
|
+
const tokens = estimateTokens(stablePrefsBlock);
|
|
2085
|
+
if (tokens <= remaining * 0.1) {
|
|
2086
|
+
stablePrefixMessages.push({
|
|
2087
|
+
role: 'system',
|
|
2088
|
+
textContent: stablePrefsBlock,
|
|
2089
|
+
toolCalls: null,
|
|
2090
|
+
toolResults: null,
|
|
2091
|
+
});
|
|
1487
2092
|
contextTokens += tokens;
|
|
1488
2093
|
remaining -= tokens;
|
|
1489
2094
|
slots.library += tokens;
|
|
@@ -1518,7 +2123,7 @@ export class Compositor {
|
|
|
1518
2123
|
);
|
|
1519
2124
|
if (semanticContent) {
|
|
1520
2125
|
const tokens = estimateTokens(semanticContent);
|
|
1521
|
-
|
|
2126
|
+
volatileContextParts.push(`## Related Memory\n${semanticContent}`);
|
|
1522
2127
|
contextTokens += tokens;
|
|
1523
2128
|
remaining -= tokens;
|
|
1524
2129
|
// Semantic recall draws from multiple sources, attribute to context
|
|
@@ -1614,14 +2219,24 @@ export class Compositor {
|
|
|
1614
2219
|
const chunkLines = [];
|
|
1615
2220
|
let chunkTokens = 0;
|
|
1616
2221
|
for (const chunk of chunks) {
|
|
1617
|
-
if (chunkTokens + chunk.tokenEstimate > maxTokens)
|
|
1618
|
-
break;
|
|
1619
2222
|
// Skip chunks from files OpenClaw already injects into the system prompt
|
|
1620
2223
|
const chunkBasename = chunk.sourcePath.split('/').pop() || '';
|
|
1621
2224
|
if (OPENCLAW_BOOTSTRAP_FILES.has(chunkBasename))
|
|
1622
2225
|
continue;
|
|
1623
|
-
|
|
1624
|
-
|
|
2226
|
+
// C2: degrade oversized chunks to canonical artifact references before
|
|
2227
|
+
// enforcing the per-collection budget gate. Otherwise an oversized raw
|
|
2228
|
+
// chunk gets dropped before the tiny degraded ref ever has a chance to fit.
|
|
2229
|
+
const c2ChunkRef = degradeOversizedDocChunk(chunk.id, chunk.sourcePath, chunk.content, c2ArtifactThresholdTokens);
|
|
2230
|
+
const renderedChunk = c2ChunkRef !== null
|
|
2231
|
+
? `### ${chunk.sectionPath}\n${c2ChunkRef}`
|
|
2232
|
+
: `### ${chunk.sectionPath}\n${chunk.content}`;
|
|
2233
|
+
const renderedTokens = estimateTokens(renderedChunk);
|
|
2234
|
+
if (chunkTokens + renderedTokens > maxTokens)
|
|
2235
|
+
break;
|
|
2236
|
+
chunkLines.push(renderedChunk);
|
|
2237
|
+
chunkTokens += renderedTokens;
|
|
2238
|
+
if (c2ChunkRef !== null)
|
|
2239
|
+
c2ArtifactDegradations++;
|
|
1625
2240
|
}
|
|
1626
2241
|
if (chunkLines.length > 0) {
|
|
1627
2242
|
const collectionLabel = trigger.collection.split('/').pop() || trigger.collection;
|
|
@@ -1638,7 +2253,7 @@ export class Compositor {
|
|
|
1638
2253
|
}
|
|
1639
2254
|
}
|
|
1640
2255
|
if (docParts.length > 0) {
|
|
1641
|
-
|
|
2256
|
+
volatileContextParts.push(docParts.join('\n\n'));
|
|
1642
2257
|
}
|
|
1643
2258
|
}
|
|
1644
2259
|
else if (remaining > 400 && (this.vectorStore || libDb)) {
|
|
@@ -1653,7 +2268,7 @@ export class Compositor {
|
|
|
1653
2268
|
new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
|
|
1654
2269
|
]);
|
|
1655
2270
|
if (fallbackContent) {
|
|
1656
|
-
|
|
2271
|
+
volatileContextParts.push(`## Related Memory\n${fallbackContent}`);
|
|
1657
2272
|
const fallbackTokens = estimateTokens(fallbackContent);
|
|
1658
2273
|
contextTokens += fallbackTokens;
|
|
1659
2274
|
remaining -= fallbackTokens;
|
|
@@ -1681,13 +2296,20 @@ export class Compositor {
|
|
|
1681
2296
|
let spawnTokens = 0;
|
|
1682
2297
|
const maxSpawnTokens = Math.floor(remaining * 0.15);
|
|
1683
2298
|
for (const chunk of spawnChunks) {
|
|
1684
|
-
|
|
2299
|
+
// C2: degrade oversized spawn chunks before enforcing the lane budget,
|
|
2300
|
+
// so a bounded reference can fit even when the raw chunk cannot.
|
|
2301
|
+
const c2SpawnRef = degradeOversizedDocChunk(chunk.id, chunk.sourcePath, chunk.content, c2ArtifactThresholdTokens);
|
|
2302
|
+
const renderedChunk = c2SpawnRef ?? chunk.content;
|
|
2303
|
+
const renderedTokens = estimateTokens(renderedChunk);
|
|
2304
|
+
if (spawnTokens + renderedTokens > maxSpawnTokens)
|
|
1685
2305
|
break;
|
|
1686
|
-
spawnLines.push(
|
|
1687
|
-
spawnTokens +=
|
|
2306
|
+
spawnLines.push(renderedChunk);
|
|
2307
|
+
spawnTokens += renderedTokens;
|
|
2308
|
+
if (c2SpawnRef !== null)
|
|
2309
|
+
c2ArtifactDegradations++;
|
|
1688
2310
|
}
|
|
1689
2311
|
if (spawnLines.length > 0) {
|
|
1690
|
-
|
|
2312
|
+
volatileContextParts.push(`## Spawn Context Documents\n${spawnLines.join('\n\n')}`);
|
|
1691
2313
|
contextTokens += spawnTokens;
|
|
1692
2314
|
remaining -= spawnTokens;
|
|
1693
2315
|
slots.library += spawnTokens;
|
|
@@ -1706,7 +2328,7 @@ export class Compositor {
|
|
|
1706
2328
|
const tokens = estimateTokens(crossSessionContent);
|
|
1707
2329
|
const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
|
|
1708
2330
|
if (tokens <= maxContextTokens) {
|
|
1709
|
-
|
|
2331
|
+
volatileContextParts.push(`## Other Active Sessions\n${crossSessionContent}`);
|
|
1710
2332
|
contextTokens += tokens;
|
|
1711
2333
|
remaining -= tokens;
|
|
1712
2334
|
slots.context += tokens;
|
|
@@ -1714,7 +2336,7 @@ export class Compositor {
|
|
|
1714
2336
|
else {
|
|
1715
2337
|
const truncated = this.truncateToTokens(crossSessionContent, maxContextTokens);
|
|
1716
2338
|
const truncTokens = estimateTokens(truncated);
|
|
1717
|
-
|
|
2339
|
+
volatileContextParts.push(`## Other Active Sessions (truncated)\n${truncated}`);
|
|
1718
2340
|
contextTokens += truncTokens;
|
|
1719
2341
|
remaining -= truncTokens;
|
|
1720
2342
|
slots.context += truncTokens;
|
|
@@ -1730,68 +2352,92 @@ export class Compositor {
|
|
|
1730
2352
|
if (actionSummary) {
|
|
1731
2353
|
const actionTokens = Math.ceil(actionSummary.length / 4);
|
|
1732
2354
|
if (actionTokens <= remaining) {
|
|
1733
|
-
|
|
2355
|
+
volatileContextParts.push(actionSummary);
|
|
1734
2356
|
contextTokens += actionTokens;
|
|
1735
2357
|
remaining -= actionTokens;
|
|
1736
2358
|
slots.context += actionTokens;
|
|
1737
2359
|
}
|
|
1738
2360
|
}
|
|
1739
2361
|
}
|
|
2362
|
+
const firstNonSystem = messages.findIndex(m => m.role !== 'system');
|
|
2363
|
+
const stableInsertIdx = firstNonSystem === -1 ? messages.length : firstNonSystem;
|
|
2364
|
+
if (stablePrefixMessages.length > 0) {
|
|
2365
|
+
messages.splice(stableInsertIdx, 0, ...stablePrefixMessages);
|
|
2366
|
+
}
|
|
1740
2367
|
// ── Inject assembled context block ──────────────────────
|
|
1741
|
-
const assembledContextBlock =
|
|
2368
|
+
const assembledContextBlock = volatileContextParts.length > 0 ? volatileContextParts.join('\n\n') : undefined;
|
|
1742
2369
|
if (assembledContextBlock) {
|
|
1743
2370
|
const contextMsg = {
|
|
1744
2371
|
role: 'system',
|
|
1745
2372
|
textContent: assembledContextBlock,
|
|
1746
2373
|
toolCalls: null,
|
|
1747
2374
|
toolResults: null,
|
|
1748
|
-
//
|
|
1749
|
-
//
|
|
1750
|
-
//
|
|
1751
|
-
|
|
1752
|
-
metadata: { dynamicBoundary: true },
|
|
2375
|
+
// CACHE_PREFIX_BOUNDARY_SLOT: this message starts the volatile side of the
|
|
2376
|
+
// prompt. Everything above it is stable-prefix material eligible for reuse;
|
|
2377
|
+
// everything at or below it is per-session / per-turn context.
|
|
2378
|
+
metadata: { dynamicBoundary: true, cacheBoundarySlot: CACHE_PREFIX_BOUNDARY_SLOT },
|
|
1753
2379
|
};
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
2380
|
+
messages.splice(stableInsertIdx + stablePrefixMessages.length, 0, contextMsg);
|
|
2381
|
+
}
|
|
2382
|
+
const stablePrefix = getStablePrefixMessages(messages);
|
|
2383
|
+
const prefixSegmentCount = stablePrefix.length;
|
|
2384
|
+
const prefixTokens = stablePrefix.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
|
|
2385
|
+
const volatileHistoryTokens = messages.slice(prefixSegmentCount)
|
|
2386
|
+
.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
|
|
2387
|
+
const prefixHash = computeStablePrefixHash(stablePrefix);
|
|
2388
|
+
// ─── Safety Valve: Post-Assembly Budget Check (C1-aware) ──────────────
|
|
1762
2389
|
// Re-estimate total tokens after all slots are assembled. If the
|
|
1763
2390
|
// composition exceeds tokenBudget * 1.05 (5% tolerance for estimation
|
|
1764
2391
|
// drift), trim history messages from the oldest until we're under budget.
|
|
1765
2392
|
// History is the most compressible slot — system/identity are never
|
|
1766
2393
|
// truncated, and context (facts/recall/episodes) is more valuable per-token.
|
|
2394
|
+
//
|
|
2395
|
+
// C1: When an assistant message with toolCalls is ejected, its dependent
|
|
2396
|
+
// tool-result messages are co-ejected or stubbed via resolveToolChainEjections.
|
|
2397
|
+
// This ensures no orphaned tool-results survive above the stable-prefix
|
|
2398
|
+
// boundary and eliminates the downstream repairToolPairs cleanup cost.
|
|
1767
2399
|
const estimatedTotal = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
1768
2400
|
const hardCeiling = Math.floor(budget * 1.05);
|
|
1769
2401
|
if (estimatedTotal > hardCeiling) {
|
|
1770
2402
|
const overage = estimatedTotal - budget;
|
|
1771
2403
|
let trimmed = 0;
|
|
1772
2404
|
let trimCount = 0;
|
|
1773
|
-
//
|
|
1774
|
-
// Walk forward from the first non-system message, trimming oldest
|
|
2405
|
+
// Collect indices of messages to eject before mutating the array.
|
|
2406
|
+
// Walk forward from the first non-system message, trimming oldest first.
|
|
1775
2407
|
const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
|
|
2408
|
+
const ejectIndices = new Set();
|
|
1776
2409
|
if (firstNonSystemIdx >= 0) {
|
|
1777
2410
|
let i = firstNonSystemIdx;
|
|
1778
2411
|
while (i < messages.length && trimmed < overage) {
|
|
1779
|
-
// Don't trim the last user message (current prompt)
|
|
2412
|
+
// Don't trim the last user message (current prompt).
|
|
1780
2413
|
if (i === messages.length - 1 && messages[i].role === 'user')
|
|
1781
2414
|
break;
|
|
1782
2415
|
const msgTokens = estimateMessageTokens(messages[i]);
|
|
1783
|
-
|
|
2416
|
+
ejectIndices.add(i);
|
|
1784
2417
|
trimmed += msgTokens;
|
|
1785
2418
|
trimCount++;
|
|
1786
|
-
|
|
2419
|
+
i++;
|
|
1787
2420
|
}
|
|
1788
2421
|
}
|
|
1789
|
-
if (
|
|
2422
|
+
if (ejectIndices.size > 0) {
|
|
2423
|
+
// C1: centralized ejection — resolves dependent tool-results atomically.
|
|
2424
|
+
const ejectionResult = resolveToolChainEjections(messages, ejectIndices, 'eviction_oversize');
|
|
2425
|
+
// Replace in-place so the rest of the compose path sees the clean array.
|
|
2426
|
+
messages.length = 0;
|
|
2427
|
+
messages.push(...ejectionResult.messages);
|
|
2428
|
+
c1CoEjections += ejectionResult.coEjections;
|
|
2429
|
+
c1StubReplacements += ejectionResult.stubReplacements;
|
|
1790
2430
|
slots.history = Math.max(0, slots.history - trimmed);
|
|
1791
2431
|
remaining += trimmed;
|
|
1792
|
-
|
|
2432
|
+
const c1Note = (ejectionResult.coEjections + ejectionResult.stubReplacements > 0)
|
|
2433
|
+
? ` [C1: ${ejectionResult.coEjections} co-ejected, ${ejectionResult.stubReplacements} stubbed]`
|
|
2434
|
+
: '';
|
|
2435
|
+
warnings.push(`Safety valve: trimmed ${trimCount} oldest history messages (${trimmed} tokens) to fit budget${c1Note}`);
|
|
1793
2436
|
}
|
|
1794
2437
|
}
|
|
2438
|
+
// ─── Sprint 2.1: Hydrate active-turn artifact stubs ────────────────────
|
|
2439
|
+
// Must run on NeutralMessages[] BEFORE provider translation.
|
|
2440
|
+
const hydrationResult = this.hydrateActiveTurnArtifacts(messages, db);
|
|
1795
2441
|
// ─── Translate to provider format (unless caller wants neutral) ───
|
|
1796
2442
|
// When skipProviderTranslation is set, return NeutralMessages directly.
|
|
1797
2443
|
// The context engine plugin uses this: the OpenClaw runtime handles its
|
|
@@ -1861,7 +2507,7 @@ export class Compositor {
|
|
|
1861
2507
|
}
|
|
1862
2508
|
// W3: Build compose diagnostics
|
|
1863
2509
|
let zeroResultReason;
|
|
1864
|
-
if (
|
|
2510
|
+
if (volatileContextParts.length === 0 && stablePrefixMessages.length === 0) {
|
|
1865
2511
|
if (diagScopeFiltered > 0 && diagFactsIncluded === 0 && diagSemanticResults === 0) {
|
|
1866
2512
|
zeroResultReason = 'scope_filtered_all';
|
|
1867
2513
|
}
|
|
@@ -1897,6 +2543,37 @@ export class Compositor {
|
|
|
1897
2543
|
fingerprintDedups: diagFingerprintDedups,
|
|
1898
2544
|
fingerprintCollisions: diagFingerprintCollisions,
|
|
1899
2545
|
windowCacheHit: false,
|
|
2546
|
+
prefixSegmentCount,
|
|
2547
|
+
prefixTokens,
|
|
2548
|
+
prefixHash,
|
|
2549
|
+
// B2: Surface the previous cached prefixHash when this full compose was
|
|
2550
|
+
// triggered by a cache bypass (stable-prefix mutation detected).
|
|
2551
|
+
prevPrefixHash: _prevPrefixHashFromBypass,
|
|
2552
|
+
volatileHistoryTokens,
|
|
2553
|
+
// Sprint 4 fields
|
|
2554
|
+
sessionType: s4SessionType,
|
|
2555
|
+
historyDepthChosen: s4EffectiveDepth,
|
|
2556
|
+
estimatedMsgDensityTokens: s4ObservedDensity,
|
|
2557
|
+
rescueTrimFired: s4RescueTrimFired,
|
|
2558
|
+
// B4: model-aware lane budget diagnostics
|
|
2559
|
+
mecwProfile: b4MecwProfile,
|
|
2560
|
+
mecwApplied: b4MecwApplied,
|
|
2561
|
+
mecwBlend: b4MecwBlend,
|
|
2562
|
+
effectiveHistoryFraction: b4HistoryFraction,
|
|
2563
|
+
effectiveMemoryFraction: b4MemoryFraction,
|
|
2564
|
+
trimSoftTarget: TRIM_BUDGET_POLICY.trimSoftTarget,
|
|
2565
|
+
trimGrowthThreshold: TRIM_BUDGET_POLICY.trimGrowthThreshold,
|
|
2566
|
+
trimHeadroomFraction: TRIM_BUDGET_POLICY.trimHeadroomFraction,
|
|
2567
|
+
// C1: tool-chain ejection telemetry
|
|
2568
|
+
toolChainCoEjections: c1CoEjections > 0 ? c1CoEjections : undefined,
|
|
2569
|
+
toolChainStubReplacements: c1StubReplacements > 0 ? c1StubReplacements : undefined,
|
|
2570
|
+
// C2: artifact oversize degradation telemetry
|
|
2571
|
+
artifactDegradations: c2ArtifactDegradations > 0 ? c2ArtifactDegradations : undefined,
|
|
2572
|
+
artifactOversizeThresholdTokens: c2ArtifactThresholdTokens,
|
|
2573
|
+
// Sprint 2.1: tool artifact hydration telemetry
|
|
2574
|
+
artifactsHydrated: hydrationResult.artifactsHydrated > 0 ? hydrationResult.artifactsHydrated : undefined,
|
|
2575
|
+
hydrationBytes: hydrationResult.hydrationBytes > 0 ? hydrationResult.hydrationBytes : undefined,
|
|
2576
|
+
hydrationMisses: hydrationResult.hydrationMisses > 0 ? hydrationResult.hydrationMisses : undefined,
|
|
1900
2577
|
};
|
|
1901
2578
|
if (pressureHigh) {
|
|
1902
2579
|
warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
|
|
@@ -1913,6 +2590,14 @@ export class Compositor {
|
|
|
1913
2590
|
// VS-1: Dual-write, session-scoped key for backwards compat;
|
|
1914
2591
|
// topic-scoped key for per-topic window retrieval when activeTopicId is set.
|
|
1915
2592
|
try {
|
|
2593
|
+
// B2: Compute a cheap prefix input hash from the system + identity slot
|
|
2594
|
+
// contents that fed the stable prefix. Stored in WindowCacheMeta so the
|
|
2595
|
+
// C4 fast-exit can detect prefix mutations without re-running full compose.
|
|
2596
|
+
const _prefixInputHash = createHash('sha256')
|
|
2597
|
+
.update(systemContent ?? '')
|
|
2598
|
+
.update('\n␞\n')
|
|
2599
|
+
.update(identityContent ?? '')
|
|
2600
|
+
.digest('hex');
|
|
1916
2601
|
await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
|
|
1917
2602
|
await this.cache.setWindowMeta(request.agentId, request.sessionKey, {
|
|
1918
2603
|
slots: slots,
|
|
@@ -1920,6 +2605,8 @@ export class Compositor {
|
|
|
1920
2605
|
warnings,
|
|
1921
2606
|
diagnostics,
|
|
1922
2607
|
composedAt,
|
|
2608
|
+
prefixHash,
|
|
2609
|
+
prefixInputHash: _prefixInputHash,
|
|
1923
2610
|
}, 120);
|
|
1924
2611
|
}
|
|
1925
2612
|
catch {
|
|
@@ -1972,7 +2659,7 @@ export class Compositor {
|
|
|
1972
2659
|
// Cursor write is best-effort
|
|
1973
2660
|
}
|
|
1974
2661
|
}
|
|
1975
|
-
console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones}`);
|
|
2662
|
+
console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones} c2_degradations=${c2ArtifactDegradations} c2_threshold=${c2ArtifactThresholdTokens}`);
|
|
1976
2663
|
return {
|
|
1977
2664
|
messages: outputMessages,
|
|
1978
2665
|
tokenCount: totalTokens,
|
|
@@ -2080,7 +2767,7 @@ export class Compositor {
|
|
|
2080
2767
|
},
|
|
2081
2768
|
});
|
|
2082
2769
|
}
|
|
2083
|
-
async refreshRedisGradient(agentId, sessionKey, db, tokenBudget) {
|
|
2770
|
+
async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth) {
|
|
2084
2771
|
const store = new MessageStore(db);
|
|
2085
2772
|
const conversation = store.getConversation(sessionKey);
|
|
2086
2773
|
if (!conversation)
|
|
@@ -2105,28 +2792,38 @@ export class Compositor {
|
|
|
2105
2792
|
// Fence lookup is best-effort
|
|
2106
2793
|
}
|
|
2107
2794
|
// Phase 3: prefer DAG walk from context head
|
|
2795
|
+
const refreshHistoryLimit = Math.min(this.config.maxHistoryMessages, Math.max(1, historyDepth ?? this.config.maxHistoryMessages));
|
|
2108
2796
|
let rawHistory;
|
|
2109
2797
|
if (activeContext?.headMessageId) {
|
|
2110
|
-
rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId,
|
|
2798
|
+
rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, refreshHistoryLimit);
|
|
2111
2799
|
if (rawHistory.length === 0) {
|
|
2112
|
-
rawHistory = store.getRecentMessages(conversation.id,
|
|
2800
|
+
rawHistory = store.getRecentMessages(conversation.id, refreshHistoryLimit, gradientFenceMessageId);
|
|
2113
2801
|
}
|
|
2114
2802
|
}
|
|
2115
2803
|
else {
|
|
2116
|
-
rawHistory = store.getRecentMessages(conversation.id,
|
|
2117
|
-
}
|
|
2804
|
+
rawHistory = store.getRecentMessages(conversation.id, refreshHistoryLimit, gradientFenceMessageId);
|
|
2805
|
+
}
|
|
2806
|
+
// Sprint 3 (AfterTurn Rebuild/Trim Loop Fix): cap gradient total-window tokens
|
|
2807
|
+
// at the same 65% target that assemble.normal trims to. Previously this was
|
|
2808
|
+
// tokenBudget/0.80 (≈1.25×budget), which made applyToolGradient preserve more
|
|
2809
|
+
// content than the trim target allowed — causing assemble.normal to always trim
|
|
2810
|
+
// on the next turn even in the steady-state path. Aligning the gradient cap to
|
|
2811
|
+
// the trim target means the rebuilt window already fits within the assemble
|
|
2812
|
+
// envelope by construction.
|
|
2813
|
+
const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0);
|
|
2118
2814
|
const transformedHistory = applyToolGradient(rawHistory, {
|
|
2119
2815
|
totalWindowTokens: tokenBudget && tokenBudget > 0
|
|
2120
|
-
?
|
|
2816
|
+
? gradientAssembleBudget
|
|
2121
2817
|
: TOOL_PLANNING_BASELINE_WINDOW,
|
|
2122
2818
|
});
|
|
2123
2819
|
// If a token budget is provided, trim the gradient-compressed window to fit
|
|
2124
|
-
// before writing to Redis.
|
|
2125
|
-
//
|
|
2126
|
-
//
|
|
2820
|
+
// before writing to Redis. The cap uses the same GRADIENT_ASSEMBLE_TARGET
|
|
2821
|
+
// (0.65) so the window written to Redis sits inside the assemble.normal trim
|
|
2822
|
+
// envelope. The next assemble() will find the window already within budget
|
|
2823
|
+
// and skip the trim entirely in the steady-state path.
|
|
2127
2824
|
let historyToWrite = transformedHistory;
|
|
2128
2825
|
if (tokenBudget && tokenBudget > 0) {
|
|
2129
|
-
const budgetCap =
|
|
2826
|
+
const budgetCap = gradientAssembleBudget;
|
|
2130
2827
|
let runningTokens = 0;
|
|
2131
2828
|
const clusters = clusterNeutralMessages(transformedHistory);
|
|
2132
2829
|
const cappedClusters = [];
|
|
@@ -2146,7 +2843,7 @@ export class Compositor {
|
|
|
2146
2843
|
`for ${agentId}/${sessionKey} (budgetCap=${budgetCap}, tokenCost=${runningTokens})`);
|
|
2147
2844
|
}
|
|
2148
2845
|
}
|
|
2149
|
-
await this.cache.replaceHistory(agentId, sessionKey, historyToWrite,
|
|
2846
|
+
await this.cache.replaceHistory(agentId, sessionKey, historyToWrite, refreshHistoryLimit);
|
|
2150
2847
|
}
|
|
2151
2848
|
// ─── Slot Content Resolution ─────────────────────────────────
|
|
2152
2849
|
/**
|
|
@@ -2210,6 +2907,19 @@ export class Compositor {
|
|
|
2210
2907
|
* Returns [content, factCount, scopeFilteredCount] or null if DB unavailable.
|
|
2211
2908
|
*/
|
|
2212
2909
|
buildFactsFromDb(agentId, sessionKey, db) {
|
|
2910
|
+
const sections = this.buildFactSectionsFromDb(agentId, sessionKey, db);
|
|
2911
|
+
if (!sections)
|
|
2912
|
+
return null;
|
|
2913
|
+
const combined = [sections.stableContent, sections.volatileContent]
|
|
2914
|
+
.filter((value) => Boolean(value))
|
|
2915
|
+
.join('\n');
|
|
2916
|
+
return [
|
|
2917
|
+
combined || null,
|
|
2918
|
+
sections.stableCount + sections.volatileCount,
|
|
2919
|
+
sections.filteredCount,
|
|
2920
|
+
];
|
|
2921
|
+
}
|
|
2922
|
+
buildFactSectionsFromDb(agentId, sessionKey, db) {
|
|
2213
2923
|
if (!db)
|
|
2214
2924
|
return null;
|
|
2215
2925
|
const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='facts'").get();
|
|
@@ -2225,30 +2935,52 @@ export class Compositor {
|
|
|
2225
2935
|
ORDER BY confidence DESC, decay_score ASC
|
|
2226
2936
|
LIMIT ?
|
|
2227
2937
|
`).all(agentId, this.config.maxFacts);
|
|
2228
|
-
if (rawRows.length === 0)
|
|
2229
|
-
return
|
|
2230
|
-
|
|
2938
|
+
if (rawRows.length === 0) {
|
|
2939
|
+
return {
|
|
2940
|
+
stableContent: null,
|
|
2941
|
+
stableCount: 0,
|
|
2942
|
+
volatileContent: null,
|
|
2943
|
+
volatileCount: 0,
|
|
2944
|
+
filteredCount: 0,
|
|
2945
|
+
};
|
|
2946
|
+
}
|
|
2231
2947
|
const ctx = { agentId, sessionKey };
|
|
2232
2948
|
const { allowed, filteredCount } = filterByScope(rawRows.map(r => ({
|
|
2233
2949
|
...r,
|
|
2234
2950
|
agentId: r.agent_id,
|
|
2235
2951
|
sessionKey: r.session_key,
|
|
2236
2952
|
})), ctx);
|
|
2237
|
-
if (allowed.length === 0)
|
|
2238
|
-
return
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
|
|
2953
|
+
if (allowed.length === 0) {
|
|
2954
|
+
return {
|
|
2955
|
+
stableContent: null,
|
|
2956
|
+
stableCount: 0,
|
|
2957
|
+
volatileContent: null,
|
|
2958
|
+
volatileCount: 0,
|
|
2959
|
+
filteredCount,
|
|
2960
|
+
};
|
|
2961
|
+
}
|
|
2962
|
+
const formatRows = (rows) => {
|
|
2963
|
+
if (rows.length === 0)
|
|
2964
|
+
return null;
|
|
2965
|
+
return rows
|
|
2966
|
+
.map(r => {
|
|
2967
|
+
const fromOtherSession = r.sessionKey && r.sessionKey !== sessionKey;
|
|
2968
|
+
const sessionSuffix = fromOtherSession
|
|
2969
|
+
? `, session:${r.sessionKey.slice(-8)}`
|
|
2970
|
+
: '';
|
|
2971
|
+
return `- [${r.domain || 'general'}${sessionSuffix}] ${r.content}`;
|
|
2972
|
+
})
|
|
2973
|
+
.join('\n');
|
|
2974
|
+
};
|
|
2975
|
+
const stableRows = allowed.filter(r => r.scope !== 'session' && (!r.sessionKey || r.sessionKey !== sessionKey));
|
|
2976
|
+
const volatileRows = allowed.filter(r => !stableRows.includes(r));
|
|
2977
|
+
return {
|
|
2978
|
+
stableContent: formatRows(stableRows),
|
|
2979
|
+
stableCount: stableRows.length,
|
|
2980
|
+
volatileContent: formatRows(volatileRows),
|
|
2981
|
+
volatileCount: volatileRows.length,
|
|
2982
|
+
filteredCount,
|
|
2983
|
+
};
|
|
2252
2984
|
}
|
|
2253
2985
|
/**
|
|
2254
2986
|
* Build knowledge content from library DB.
|