@miriad-systems/nuum 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +37 -8
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -31657,8 +31657,9 @@ var Config;
|
|
|
31657
31657
|
tokenBudgets: exports_external.object({
|
|
31658
31658
|
mainAgentContext: exports_external.number().default(180000),
|
|
31659
31659
|
temporalBudget: exports_external.number().default(64000),
|
|
31660
|
-
compactionThreshold: exports_external.number().default(
|
|
31661
|
-
compactionTarget: exports_external.number().default(
|
|
31660
|
+
compactionThreshold: exports_external.number().default(80000),
|
|
31661
|
+
compactionTarget: exports_external.number().default(60000),
|
|
31662
|
+
compactionHardLimit: exports_external.number().default(150000),
|
|
31662
31663
|
recencyBufferMessages: exports_external.number().default(10),
|
|
31663
31664
|
temporalQueryBudget: exports_external.number().default(512000),
|
|
31664
31665
|
ltmReflectBudget: exports_external.number().default(180000),
|
|
@@ -34650,11 +34651,12 @@ async function shouldTriggerCompaction(temporal, workers2, config) {
|
|
|
34650
34651
|
const viewTokens = await getEffectiveViewTokens(temporal);
|
|
34651
34652
|
return viewTokens > config.compactionThreshold;
|
|
34652
34653
|
}
|
|
34654
|
+
var FIXED_OVERHEAD_TOKENS = 40000;
|
|
34653
34655
|
async function getEffectiveViewTokens(temporal) {
|
|
34654
34656
|
const messages = await temporal.getMessages();
|
|
34655
34657
|
const summaries = await temporal.getSummaries();
|
|
34656
34658
|
const view = buildTemporalView({ budget: 0, messages, summaries });
|
|
34657
|
-
return view.totalTokens;
|
|
34659
|
+
return view.totalTokens + FIXED_OVERHEAD_TOKENS;
|
|
34658
34660
|
}
|
|
34659
34661
|
async function getMessagesToCompact(temporal) {
|
|
34660
34662
|
const lastEndId = await temporal.getLastSummaryEndId();
|
|
@@ -34750,16 +34752,17 @@ async function runAgentLoop(options) {
|
|
|
34750
34752
|
totalOutputTokens += response.usage.completionTokens;
|
|
34751
34753
|
const anthropicMeta = response.providerMetadata?.anthropic;
|
|
34752
34754
|
if (anthropicMeta) {
|
|
34753
|
-
const
|
|
34755
|
+
const cacheWrite = anthropicMeta.cacheCreationInputTokens ?? 0;
|
|
34754
34756
|
const cacheRead = anthropicMeta.cacheReadInputTokens ?? 0;
|
|
34755
|
-
const uncached = response.usage.promptTokens
|
|
34757
|
+
const uncached = response.usage.promptTokens;
|
|
34758
|
+
const total = cacheRead + cacheWrite + uncached;
|
|
34756
34759
|
log5.info("token usage", {
|
|
34757
|
-
input:
|
|
34760
|
+
input: total,
|
|
34758
34761
|
output: response.usage.completionTokens,
|
|
34759
|
-
cacheWrite
|
|
34762
|
+
cacheWrite,
|
|
34760
34763
|
cacheRead,
|
|
34761
34764
|
uncached,
|
|
34762
|
-
cacheHitRate:
|
|
34765
|
+
cacheHitRate: total > 0 ? `${Math.round(cacheRead / total * 100)}%` : "0%"
|
|
34763
34766
|
});
|
|
34764
34767
|
}
|
|
34765
34768
|
if (response.text) {
|
|
@@ -45136,6 +45139,32 @@ async function runAgent(prompt, options) {
|
|
|
45136
45139
|
const { storage, onEvent, abortSignal, onBeforeTurn } = options;
|
|
45137
45140
|
const sessionId = Identifier.ascending("session");
|
|
45138
45141
|
await initializeMcp();
|
|
45142
|
+
const config2 = Config.get();
|
|
45143
|
+
const softLimit = config2.tokenBudgets.compactionThreshold;
|
|
45144
|
+
const hardLimit = config2.tokenBudgets.compactionHardLimit;
|
|
45145
|
+
const tokensBefore = await getEffectiveViewTokens(storage.temporal);
|
|
45146
|
+
if (tokensBefore > hardLimit) {
|
|
45147
|
+
log10.error("context overflow - refusing turn", { tokens: tokensBefore, hardLimit });
|
|
45148
|
+
throw new Error(`Context overflow: ${tokensBefore} tokens exceeds hard limit of ${hardLimit}. ` + `Run 'miriad-code --compact' to reduce context size before continuing.`);
|
|
45149
|
+
}
|
|
45150
|
+
if (tokensBefore > softLimit) {
|
|
45151
|
+
log10.warn("approaching token limit, running compaction before turn", {
|
|
45152
|
+
tokens: tokensBefore,
|
|
45153
|
+
softLimit,
|
|
45154
|
+
target: config2.tokenBudgets.compactionTarget
|
|
45155
|
+
});
|
|
45156
|
+
await runMemoryCuration(storage, { force: true });
|
|
45157
|
+
const tokensAfter = await getEffectiveViewTokens(storage.temporal);
|
|
45158
|
+
if (tokensAfter > softLimit) {
|
|
45159
|
+
log10.warn("compaction didn't reduce tokens below soft limit", {
|
|
45160
|
+
before: tokensBefore,
|
|
45161
|
+
after: tokensAfter,
|
|
45162
|
+
softLimit
|
|
45163
|
+
});
|
|
45164
|
+
} else {
|
|
45165
|
+
log10.info("pre-turn compaction successful", { before: tokensBefore, after: tokensAfter });
|
|
45166
|
+
}
|
|
45167
|
+
}
|
|
45139
45168
|
const model = Provider.getModelForTier("reasoning");
|
|
45140
45169
|
const ctx = await buildAgentContext(storage);
|
|
45141
45170
|
const userMessageId = Identifier.ascending("message");
|