npm - @roj-ai/sdk - Versions diffs - 0.1.19 → 0.1.21 - Mend

@roj-ai/sdk 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/dist/core/agents/agent.d.ts.map +1 -1
package/dist/core/agents/agent.js +13 -3
package/dist/core/agents/agent.js.map +1 -1
package/dist/core/context/state.d.ts +8 -0
package/dist/core/context/state.d.ts.map +1 -1
package/dist/core/context/state.js +10 -0
package/dist/core/context/state.js.map +1 -1
package/dist/core/events/base-event-store.d.ts.map +1 -1
package/dist/core/events/base-event-store.js +2 -0
package/dist/core/events/base-event-store.js.map +1 -1
package/dist/core/events/metadata-utils.d.ts.map +1 -1
package/dist/core/events/metadata-utils.js +2 -0
package/dist/core/events/metadata-utils.js.map +1 -1
package/dist/core/llm/anthropic.test.js +27 -0
package/dist/core/llm/anthropic.test.js.map +1 -1
package/dist/core/llm/cache-breakpoints.d.ts +19 -5
package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
package/dist/core/llm/cache-breakpoints.js +40 -23
package/dist/core/llm/cache-breakpoints.js.map +1 -1
package/dist/core/llm/cache-breakpoints.test.d.ts +2 -0
package/dist/core/llm/cache-breakpoints.test.d.ts.map +1 -0
package/dist/core/llm/cache-breakpoints.test.js +45 -0
package/dist/core/llm/cache-breakpoints.test.js.map +1 -0
package/dist/core/llm/state.d.ts +22 -0
package/dist/core/llm/state.d.ts.map +1 -1
package/dist/core/llm/state.js +23 -11
package/dist/core/llm/state.js.map +1 -1
package/dist/index.d.ts +3 -3
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -1
package/dist/index.js.map +1 -1
package/dist/lib/mime.d.ts +1 -1
package/dist/lib/mime.d.ts.map +1 -1
package/dist/lib/mime.js +7 -4
package/dist/lib/mime.js.map +1 -1
package/dist/plugins/agents/plugin.d.ts.map +1 -1
package/dist/plugins/agents/plugin.js +7 -1
package/dist/plugins/agents/plugin.js.map +1 -1
package/dist/plugins/context-compact/context-compact.integration.test.js +54 -0
package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
package/dist/plugins/context-compact/context-compactor.d.ts +2 -0
package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
package/dist/plugins/context-compact/context-compactor.js +29 -0
package/dist/plugins/context-compact/context-compactor.js.map +1 -1
package/dist/plugins/context-compact/context-compactor.test.js +6 -0
package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
package/dist/plugins/limits-guard/config.d.ts +30 -0
package/dist/plugins/limits-guard/config.d.ts.map +1 -1
package/dist/plugins/limits-guard/index.d.ts +3 -3
package/dist/plugins/limits-guard/index.d.ts.map +1 -1
package/dist/plugins/limits-guard/index.js +1 -1
package/dist/plugins/limits-guard/index.js.map +1 -1
package/dist/plugins/limits-guard/limit-guard.d.ts +27 -1
package/dist/plugins/limits-guard/limit-guard.d.ts.map +1 -1
package/dist/plugins/limits-guard/limit-guard.js +67 -0
package/dist/plugins/limits-guard/limit-guard.js.map +1 -1
package/dist/plugins/limits-guard/limit-guard.test.js +65 -1
package/dist/plugins/limits-guard/limit-guard.test.js.map +1 -1
package/dist/plugins/limits-guard/limits-guard.integration.test.js +295 -1
package/dist/plugins/limits-guard/limits-guard.integration.test.js.map +1 -1
package/dist/plugins/limits-guard/plugin.d.ts +23 -2
package/dist/plugins/limits-guard/plugin.d.ts.map +1 -1
package/dist/plugins/limits-guard/plugin.js +107 -2
package/dist/plugins/limits-guard/plugin.js.map +1 -1
package/dist/plugins/mailbox/plugin.d.ts.map +1 -1
package/dist/plugins/mailbox/plugin.js +18 -0
package/dist/plugins/mailbox/plugin.js.map +1 -1
package/dist/plugins/session-stats/plugin.d.ts.map +1 -1
package/dist/plugins/session-stats/plugin.js +5 -1
package/dist/plugins/session-stats/plugin.js.map +1 -1
package/package.json +2 -2
package/src/core/agents/agent.ts +18 -2
package/src/core/context/state.ts +10 -0
package/src/core/events/base-event-store.ts +2 -0
package/src/core/events/metadata-utils.ts +2 -0
package/src/core/llm/anthropic.test.ts +34 -0
package/src/core/llm/cache-breakpoints.test.ts +55 -0
package/src/core/llm/cache-breakpoints.ts +39 -21
package/src/core/llm/state.ts +25 -11
package/src/index.ts +3 -3
package/src/lib/mime.ts +7 -4
package/src/plugins/agents/plugin.ts +7 -1
package/src/plugins/context-compact/context-compact.integration.test.ts +62 -0
package/src/plugins/context-compact/context-compactor.test.ts +6 -0
package/src/plugins/context-compact/context-compactor.ts +31 -0
package/src/plugins/limits-guard/config.ts +35 -0
package/src/plugins/limits-guard/index.ts +3 -3
package/src/plugins/limits-guard/limit-guard.test.ts +80 -1
package/src/plugins/limits-guard/limit-guard.ts +98 -1
package/src/plugins/limits-guard/limits-guard.integration.test.ts +331 -1
package/src/plugins/limits-guard/plugin.ts +153 -3
package/src/plugins/mailbox/plugin.ts +18 -0
package/src/plugins/session-stats/plugin.ts +5 -1

package/src/core/llm/state.ts CHANGED Viewed

@@ -48,6 +48,18 @@ export type LLMMetrics = {
 // LLM events
 // ============================================================================
+const llmMetricsSchema = z4.object({
+	promptTokens: z4.number(),
+	completionTokens: z4.number(),
+	totalTokens: z4.number(),
+	latencyMs: z4.number(),
+	model: z4.string(),
+	provider: z4.string().optional(),
+	cost: z4.number().optional(),
+	cachedTokens: z4.number().optional(),
+	cacheWriteTokens: z4.number().optional(),
+})
 export const llmEvents = createEventsFactory({
 	events: {
 		inference_started: z4.object({
@@ -66,19 +78,20 @@ export const llmEvents = createEventsFactory({
 					input: z4.unknown(),
 				})),
 			}),
-			metrics: z4.object({
-				promptTokens: z4.number(),
-				completionTokens: z4.number(),
-				totalTokens: z4.number(),
-				latencyMs: z4.number(),
-				model: z4.string(),
-				provider: z4.string().optional(),
-				cost: z4.number().optional(),
-				cachedTokens: z4.number().optional(),
-				cacheWriteTokens: z4.number().optional(),
-			}),
+			metrics: llmMetricsSchema,
 			llmCallId: llmCallIdSchema.optional(),
 		}),
+		/**
+		 * A side-channel ("auxiliary") inference completed — e.g. the context-compact
+		 * plugin asking the model for a summary. Unlike `inference_completed`, this
+		 * does NOT touch conversation state; it exists purely so the call's token
+		 * usage and cost are still accounted in session stats and metadata. Without
+		 * it, compaction (and any other auxiliary call) would be billed but invisible.
+		 */
+		auxiliary_inference_completed: z4.object({
+			agentId: agentIdSchema,
+			metrics: llmMetricsSchema,
+		}),
 		inference_failed: z4.object({
 			agentId: agentIdSchema,
 			error: z4.string(),
@@ -89,4 +102,5 @@ export const llmEvents = createEventsFactory({
 export type InferenceStartedEvent = (typeof llmEvents)['Events']['inference_started']
 export type InferenceCompletedEvent = (typeof llmEvents)['Events']['inference_completed']
+export type AuxiliaryInferenceCompletedEvent = (typeof llmEvents)['Events']['auxiliary_inference_completed']
 export type InferenceFailedEvent = (typeof llmEvents)['Events']['inference_failed']

package/src/index.ts CHANGED Viewed

@@ -87,9 +87,9 @@ export { agentsPlugin } from '~/plugins/agents/plugin.js'
 export type { AgentsPluginConfig } from '~/plugins/agents/plugin.js'
 export { contextCompactPlugin } from '~/plugins/context-compact/plugin.js'
 export type { ContextCompactPluginConfig } from '~/plugins/context-compact/plugin.js'
-export { limitsGuardPlugin, selectAgentCounters } from '~/plugins/limits-guard/plugin.js'
-export type { AgentCounters, LimitsAgentConfig } from '~/plugins/limits-guard/plugin.js'
-export type { AgentLimits } from '~/plugins/limits-guard/config.js'
+export { limitsGuardPlugin, selectAgentCounters, sumSessionSpend } from '~/plugins/limits-guard/plugin.js'
+export type { AgentCounters, BudgetExceededEvent, LimitsAgentConfig } from '~/plugins/limits-guard/plugin.js'
+export type { AgentLimits, LimitsSessionConfig } from '~/plugins/limits-guard/config.js'
 export { mailboxPlugin } from '~/plugins/mailbox/plugin.js'
 export type { MailboxAgentConfig, MailboxPresetConfig } from '~/plugins/mailbox/plugin.js'
 export { resultEvictionPlugin } from '~/plugins/result-eviction/plugin.js'

package/src/lib/mime.ts CHANGED Viewed

@@ -2,20 +2,23 @@
  * MIME type detection utilities.
  */
+// Only formats accepted as image content by the LLM providers (Anthropic
+// allows jpeg/png/gif/webp). Notably excludes svg/bmp/ico: feeding those as
+// image blocks makes the provider reject the whole request with a 400
+// ("media_type: Input should be 'image/jpeg', 'image/png', 'image/gif' or
+// 'image/webp'"). Such files fall through to being read as text instead —
+// which for SVG (XML) is more useful to the model anyway.
 const IMAGE_MIME_TYPES: Record<string, string> = {
 	png: 'image/png',
 	jpg: 'image/jpeg',
 	jpeg: 'image/jpeg',
 	gif: 'image/gif',
 	webp: 'image/webp',
-	svg: 'image/svg+xml',
-	bmp: 'image/bmp',
-	ico: 'image/x-icon',
 }
 /**
  * Get MIME type for an image file based on extension.
- * Returns undefined if not a recognized image format.
+ * Returns undefined if not an LLM-supported image format.
  */
 export function getImageMimeType(filename: string): string | undefined {
 	const ext = filename.split('.').pop()?.toLowerCase()

package/src/plugins/agents/plugin.ts CHANGED Viewed

@@ -127,6 +127,11 @@ function buildChildrenStatus(sessionAgents: Map<AgentId, AgentState>, parentId:
 		const last = previewLastAssistant(c)
 		const parts: string[] = [c.id, c.status]
+		// Surface why a child paused (e.g. budget/limit exhaustion) so the parent can
+		// react — bump the budget and resume, reassign the work, or stop.
+		if (c.status === 'paused' && c.pauseMessage) {
+			parts.push(`reason: ${c.pauseMessage.replaceAll('"', "'")}`)
+		}
 		parts.push(`${tools} tools`)
 		parts.push(`${llm} llm`)
 		if (subs > 0) parts.push(`${subs} sub${subs === 1 ? '' : 's'}`)
@@ -414,7 +419,8 @@ export const agentsPlugin = definePlugin('agents')
 - **New task** → spawn a new agent using \`start_<agent_name>\`. You will receive the agent's ID in the result — use it with \`send_message\` for follow-up communication.
 - **Follow-up on an existing task** → send a message to the existing agent via \`send_message\` with the agent's ID. Do NOT spawn a new agent for feedback, corrections, or additional instructions on a task already assigned.
-- Spawned agents communicate back to you via \`send_message\`. Check your incoming messages for their results and progress updates.`
+- Spawned agents communicate back to you via \`send_message\`. Check your incoming messages for their results and progress updates.
+- If a child pauses early it sends you a \`<child-paused agent="…">reason</child-paused>\` message (e.g. it hit a cost/limit budget). Decide what to do: resume it (after addressing the cause), reassign or drop the work, or stop.`
 		// Only include supervision instructions if supervision is actually enabled
 		// for this session — otherwise the section is misleading bloat.

package/src/plugins/context-compact/context-compact.integration.test.ts CHANGED Viewed

@@ -1,12 +1,14 @@
 import { describe, expect, it } from 'bun:test'
 import z from 'zod/v4'
 import { contextEvents } from '~/core/context/state.js'
+import { llmEvents } from '~/core/llm/state.js'
 import { MockLLMProvider } from '~/core/llm/mock.js'
 import type { InferenceRequest } from '~/core/llm/provider.js'
 import { ModelId } from '~/core/llm/schema.js'
 import type { Preset } from '~/core/preset/index.js'
 import { createTool } from '~/core/tools/definition.js'
 import { ToolCallId } from '~/core/tools/schema.js'
+import { selectSessionStats, sessionStatsPlugin } from '~/plugins/session-stats/index.js'
 import { createTestPreset, TestHarness } from '~/testing/index.js'
 import { contextCompactPlugin } from './index.js'
@@ -336,4 +338,64 @@ describe('context-compact plugin', () => {
 			await harness.shutdown()
 		})
 	})
+	// =========================================================================
+	// Cost accounting — the compaction summarization call is a real, billed LLM
+	// call. Its tokens/cost must land in session stats, not vanish. (Regression:
+	// runAuxiliaryInference used to skip emitting any stats event.)
+	// =========================================================================
+	describe('compaction cost accounting', () => {
+		it('summarization call cost is counted in session stats', async () => {
+			const REGULAR_COST = 0.01
+			const SUMMARY_COST = 0.05
+			const harness = new TestHarness({
+				systemPlugins: [contextCompactPlugin, sessionStatsPlugin],
+				presets: [createCompactPreset(10)],
+				mockHandler: (request) => {
+					if (isSummarizationRequest(request)) {
+						return {
+							content: 'Summary of conversation so far.',
+							toolCalls: [],
+							finishReason: 'stop',
+							metrics: MockLLMProvider.defaultMetricsWithCost(SUMMARY_COST),
+						}
+					}
+					return {
+						content: 'Agent response with some content to increase token count.',
+						toolCalls: [],
+						finishReason: 'stop',
+						metrics: MockLLMProvider.defaultMetricsWithCost(REGULAR_COST),
+					}
+				},
+			})
+			const session = await harness.createSession('test')
+			await session.sendAndWaitForIdle('First message')
+			await session.sendAndWaitForIdle('Second message')
+			await session.sendAndWaitForIdle('Third message to trigger actual compaction')
+			// Compaction actually ran and made a billed summarization call.
+			const auxEvents = await session.getEventsByType(llmEvents, 'auxiliary_inference_completed')
+			expect(auxEvents.length).toBeGreaterThanOrEqual(1)
+			expect(auxEvents.some((e) => e.metrics.cost === SUMMARY_COST)).toBe(true)
+			// Session stats must include both the regular turns AND the summarization
+			// call — in count, tokens, and cost.
+			const inferEvents = await session.getEventsByType(llmEvents, 'inference_completed')
+			const allLlmEvents = [...inferEvents, ...auxEvents]
+			const expectedCost = allLlmEvents.reduce((sum, e) => sum + (e.metrics.cost ?? 0), 0)
+			const expectedTokens = allLlmEvents.reduce((sum, e) => sum + e.metrics.totalTokens, 0)
+			const stats = selectSessionStats(session.state)
+			expect(stats.llmCalls).toBe(allLlmEvents.length)
+			expect(stats.totalCost).toBeCloseTo(expectedCost, 10)
+			expect(stats.totalTokens).toBe(expectedTokens)
+			// And the summarization cost is genuinely part of the total (not zero).
+			expect(stats.totalCost).toBeGreaterThanOrEqual(SUMMARY_COST)
+			await harness.shutdown()
+		})
+	})
 })

package/src/plugins/context-compact/context-compactor.test.ts CHANGED Viewed

@@ -291,6 +291,7 @@ describe('createContextCompactedEvent', () => {
 				{ role: 'system', content: 'summary' },
 				{ role: 'user', content: 'recent' },
 			],
+			originalMessages: [{ role: 'user', content: 'old message' }],
 			summary: 'The summary',
 			originalTokens: 1000,
 			compactedTokens: 200,
@@ -309,6 +310,8 @@ describe('createContextCompactedEvent', () => {
 		expect(event.newConversationHistory.length).toBe(2)
 		expect(event.newConversationHistory[0].role).toBe('system')
 		expect(event.newConversationHistory[0].content).toBe('summary')
+		expect(event.originalMessages?.length).toBe(1)
+		expect(event.originalMessages?.[0].content).toBe('old message')
 		expect(event.timestamp).toBeDefined()
 	})
@@ -318,6 +321,7 @@ describe('createContextCompactedEvent', () => {
 		const toolCallId = generateToolCallId()
 		const result: CompactionResult = {
 			compactedMessages: [{ role: 'tool', content: 'tool result', toolCallId }],
+			originalMessages: [],
 			summary: '',
 			originalTokens: 100,
 			compactedTokens: 50,
@@ -852,6 +856,7 @@ describe('createContextCompactedEvent with historyPath', () => {
 			compactedMessages: [
 				{ role: 'system', content: 'summary' },
 			],
+			originalMessages: [],
 			summary: 'The summary',
 			originalTokens: 1000,
 			compactedTokens: 200,
@@ -871,6 +876,7 @@ describe('createContextCompactedEvent with historyPath', () => {
 			compactedMessages: [
 				{ role: 'system', content: 'summary' },
 			],
+			originalMessages: [],
 			summary: 'The summary',
 			originalTokens: 1000,
 			compactedTokens: 200,

package/src/plugins/context-compact/context-compactor.ts CHANGED Viewed

@@ -131,6 +131,8 @@ export const DEFAULT_SUMMARY_INSTRUCTION =
 export interface CompactionResult {
 	/** New messages to use (summary + kept messages) */
 	compactedMessages: LLMMessage[]
+	/** The older messages that were summarized away (the compaction input) */
+	originalMessages: LLMMessage[]
 	/** Generated summary text */
 	summary: string
 	/** Token count before compaction */
@@ -274,6 +276,7 @@ export class ContextCompactor {
 			this.logger.warn('No messages to compact', { sessionId, agentId })
 			return Ok({
 				compactedMessages: messages,
+				originalMessages: [],
 				summary: '',
 				originalTokens,
 				compactedTokens: originalTokens,
@@ -349,6 +352,7 @@ export class ContextCompactor {
 		return Ok({
 			compactedMessages,
+			originalMessages: toCompact,
 			summary,
 			originalTokens,
 			compactedTokens,
@@ -378,6 +382,7 @@ export function createContextCompactedEvent(
 		contextEvents.create('context_compacted', {
 			agentId,
 			compactedContent: result.summary,
+			originalMessages: result.originalMessages.map(toDisplayMessage),
 			newConversationHistory,
 			originalTokens: result.originalTokens,
 			compactedTokens: result.compactedTokens,
@@ -386,3 +391,29 @@ export function createContextCompactedEvent(
 		}),
 	)
 }
+/**
+ * Convert an LLM message into a display-only conversation message, preserving
+ * tool-call and tool-result detail in the rendered content. Used for the
+ * compaction "input" snapshot shown in the debug UI — not for reconstruction.
+ */
+function toDisplayMessage(msg: LLMMessage): CompactedConversationMessage {
+	if (msg.role === 'assistant') {
+		const parts: string[] = []
+		if (msg.content) parts.push(msg.content)
+		if (msg.toolCalls?.length) {
+			for (const tc of msg.toolCalls) {
+				parts.push(`[tool call: ${tc.name}(${JSON.stringify(tc.input)})]`)
+			}
+		}
+		return { role: 'assistant', content: parts.join('\n') }
+	}
+	if (msg.role === 'tool') {
+		const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
+		return { role: 'system', content: `[tool result${msg.toolName ? `: ${msg.toolName}` : ''}]\n${content}` }
+	}
+	return {
+		role: msg.role,
+		content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
+	}
+}

package/src/plugins/limits-guard/config.ts CHANGED Viewed

@@ -23,4 +23,39 @@ export interface AgentLimits {
 	maxRepeatedToolCalls?: number
 	/** Maximum consecutive identical text-only responses. Default: 3 */
 	maxRepeatedResponses?: number
+	/**
+	 * Maximum cumulative LLM cost (USD) this agent may spend before it is paused.
+	 * Spend is summed from `inference_completed` metrics and, unlike the counter
+	 * limits, is NOT reset on resume. Default: unlimited.
+	 */
+	maxCost?: number
+	/**
+	 * Maximum cumulative total tokens (prompt + completion) this agent may consume
+	 * before it is paused. Useful as a fallback when providers don't report cost.
+	 * Not reset on resume. Default: unlimited.
+	 */
+	maxTokens?: number
+	/**
+	 * Maximum number of context compaction events for this agent before it is paused.
+	 * Guards against pathological compaction loops. Reset on resume. Default: unlimited.
+	 */
+	maxCompactions?: number
+}
+// ============================================================================
+// Session Limits (budget across all agents)
+// ============================================================================
+/**
+ * Session-wide budget, summed across every agent in the session. Configured via
+ * the plugin's session-level config (`pluginConfig`), independent of per-agent
+ * limits. All fields optional - defaults applied via resolveSessionLimits().
+ */
+export interface LimitsSessionConfig {
+	/** Maximum cumulative LLM cost (USD) across all agents. Default: unlimited */
+	maxSessionCost?: number
+	/** Maximum cumulative total tokens across all agents. Default: unlimited */
+	maxSessionTokens?: number
+	/** Ratio of the session budget at which a soft warning is emitted. Default: 0.8 */
+	softLimitRatio?: number
 }

package/src/plugins/limits-guard/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export type { AgentLimits } from './config.js'
+export type { AgentLimits, LimitsSessionConfig } from './config.js'
 export { limitsGuardPlugin } from './plugin.js'
-export type { AgentCounters, LimitsAgentConfig, LimitWarningEvent } from './plugin.js'
-export { createAgentCounters, limitsEvents } from './plugin.js'
+export type { AgentCounters, BudgetExceededEvent, LimitsAgentConfig, LimitWarningEvent } from './plugin.js'
+export { createAgentCounters, limitsEvents, sumSessionSpend } from './plugin.js'

package/src/plugins/limits-guard/limit-guard.test.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { describe, expect, it } from 'bun:test'
-import { checkLimits, resolveAgentLimits } from './limit-guard.js'
+import { checkBudget, checkLimits, resolveAgentLimits, resolveSessionLimits } from './limit-guard.js'
 import { createAgentCounters } from './plugin.js'
 import type { AgentCounters } from './plugin.js'
@@ -14,6 +14,10 @@ describe('resolveAgentLimits', () => {
 		expect(limits.softLimitRatio).toBe(0.8)
 		expect(limits.maxRepeatedToolCalls).toBe(3)
 		expect(limits.maxRepeatedResponses).toBe(3)
+		// Budgets and compaction cap are opt-in (unlimited by default)
+		expect(limits.maxCost).toBe(Number.POSITIVE_INFINITY)
+		expect(limits.maxTokens).toBe(Number.POSITIVE_INFINITY)
+		expect(limits.maxCompactions).toBe(Number.POSITIVE_INFINITY)
 	})
 	it('returns defaults when empty config', () => {
@@ -158,4 +162,79 @@ describe('checkLimits', () => {
 		)
 		expect(result.status).toBe('hard_limit')
 	})
+	// --- Compaction limit ---
+	it('detects maxCompactions hard limit', () => {
+		const limits = resolveAgentLimits({ maxCompactions: 5 })
+		const result = checkLimits(makeCounters({ compactionCount: 5 }), limits)
+		expect(result.status).toBe('hard_limit')
+		if (result.status === 'hard_limit') {
+			expect(result.limitName).toBe('maxCompactions')
+		}
+	})
+	it('does not cap compactions by default (unlimited)', () => {
+		const result = checkLimits(makeCounters({ compactionCount: 9999 }), defaultLimits)
+		expect(result.status).toBe('ok')
+	})
+})
+describe('checkBudget', () => {
+	const names = { cost: 'maxCost', tokens: 'maxTokens' }
+	it('returns ok when under budget', () => {
+		const result = checkBudget({ costSpent: 1, tokensUsed: 100 }, 5, 1000, 0.8, names)
+		expect(result.status).toBe('ok')
+	})
+	it('returns ok when unlimited (Infinity)', () => {
+		const result = checkBudget(
+			{ costSpent: 1_000_000, tokensUsed: 1_000_000 },
+			Number.POSITIVE_INFINITY,
+			Number.POSITIVE_INFINITY,
+			0.8,
+			names,
+		)
+		expect(result.status).toBe('ok')
+	})
+	it('detects cost hard limit', () => {
+		const result = checkBudget({ costSpent: 5.01, tokensUsed: 0 }, 5, Number.POSITIVE_INFINITY, 0.8, names)
+		expect(result.status).toBe('hard_limit')
+		if (result.status === 'hard_limit') expect(result.limitName).toBe('maxCost')
+	})
+	it('detects token hard limit', () => {
+		const result = checkBudget({ costSpent: 0, tokensUsed: 1000 }, Number.POSITIVE_INFINITY, 1000, 0.8, names)
+		expect(result.status).toBe('hard_limit')
+		if (result.status === 'hard_limit') expect(result.limitName).toBe('maxTokens')
+	})
+	it('emits soft warning approaching cost budget', () => {
+		const result = checkBudget({ costSpent: 4.2, tokensUsed: 0 }, 5, Number.POSITIVE_INFINITY, 0.8, names)
+		expect(result.status).toBe('soft_warning')
+		if (result.status === 'soft_warning') expect(result.limitName).toBe('maxCost')
+	})
+	it('handles sub-dollar budgets without spurious warnings', () => {
+		// floor-based logic would warn at $0 for a $0.50 budget — float-aware must not.
+		const result = checkBudget({ costSpent: 0.1, tokensUsed: 0 }, 0.5, Number.POSITIVE_INFINITY, 0.8, names)
+		expect(result.status).toBe('ok')
+	})
+})
+describe('resolveSessionLimits', () => {
+	it('defaults to unlimited', () => {
+		const limits = resolveSessionLimits()
+		expect(limits.maxSessionCost).toBe(Number.POSITIVE_INFINITY)
+		expect(limits.maxSessionTokens).toBe(Number.POSITIVE_INFINITY)
+		expect(limits.softLimitRatio).toBe(0.8)
+	})
+	it('overrides specific values', () => {
+		const limits = resolveSessionLimits({ maxSessionCost: 10 })
+		expect(limits.maxSessionCost).toBe(10)
+		expect(limits.maxSessionTokens).toBe(Number.POSITIVE_INFINITY)
+	})
 })

package/src/plugins/limits-guard/limit-guard.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  * Returns the worst result: hard_limit > soft_warning > ok.
  */
-import type { AgentLimits } from '~/plugins/limits-guard/config.js'
+import type { AgentLimits, LimitsSessionConfig } from '~/plugins/limits-guard/config.js'
 import type { AgentCounters } from './plugin.js'
 // ============================================================================
@@ -20,6 +20,9 @@ export interface ResolvedAgentLimits {
 	softLimitRatio: number
 	maxRepeatedToolCalls: number
 	maxRepeatedResponses: number
+	maxCost: number
+	maxTokens: number
+	maxCompactions: number
 }
 const DEFAULTS: ResolvedAgentLimits = {
@@ -31,6 +34,10 @@ const DEFAULTS: ResolvedAgentLimits = {
 	softLimitRatio: 0.8,
 	maxRepeatedToolCalls: 3,
 	maxRepeatedResponses: 3,
+	// Budgets and the compaction cap are opt-in: unset means unlimited.
+	maxCost: Number.POSITIVE_INFINITY,
+	maxTokens: Number.POSITIVE_INFINITY,
+	maxCompactions: Number.POSITIVE_INFINITY,
 }
 export function resolveAgentLimits(config?: AgentLimits): ResolvedAgentLimits {
@@ -44,9 +51,98 @@ export function resolveAgentLimits(config?: AgentLimits): ResolvedAgentLimits {
 		softLimitRatio: config.softLimitRatio ?? DEFAULTS.softLimitRatio,
 		maxRepeatedToolCalls: config.maxRepeatedToolCalls ?? DEFAULTS.maxRepeatedToolCalls,
 		maxRepeatedResponses: config.maxRepeatedResponses ?? DEFAULTS.maxRepeatedResponses,
+		maxCost: config.maxCost ?? DEFAULTS.maxCost,
+		maxTokens: config.maxTokens ?? DEFAULTS.maxTokens,
+		maxCompactions: config.maxCompactions ?? DEFAULTS.maxCompactions,
 	}
 }
+// ============================================================================
+// Session budget
+// ============================================================================
+export interface ResolvedSessionLimits {
+	maxSessionCost: number
+	maxSessionTokens: number
+	softLimitRatio: number
+}
+const SESSION_DEFAULTS: ResolvedSessionLimits = {
+	maxSessionCost: Number.POSITIVE_INFINITY,
+	maxSessionTokens: Number.POSITIVE_INFINITY,
+	softLimitRatio: 0.8,
+}
+export function resolveSessionLimits(config?: LimitsSessionConfig): ResolvedSessionLimits {
+	if (!config) return SESSION_DEFAULTS
+	return {
+		maxSessionCost: config.maxSessionCost ?? SESSION_DEFAULTS.maxSessionCost,
+		maxSessionTokens: config.maxSessionTokens ?? SESSION_DEFAULTS.maxSessionTokens,
+		softLimitRatio: config.softLimitRatio ?? SESSION_DEFAULTS.softLimitRatio,
+	}
+}
+/** Cumulative spend, either for a single agent or summed across the session. */
+export interface BudgetSpend {
+	costSpent: number
+	tokensUsed: number
+}
+/**
+ * Budget check (cost + tokens) shared by per-agent and session-wide budgets.
+ *
+ * Kept separate from {@link checkLimits} so it can run in `beforeInference` —
+ * blocking the *next* call once the budget is exhausted — without also tripping
+ * the counter/pattern limits (those are enforced in `afterInference`). Uses
+ * float-aware comparisons (no flooring) so sub-dollar budgets behave correctly.
+ */
+export function checkBudget(
+	spend: BudgetSpend,
+	costLimit: number,
+	tokenLimit: number,
+	softLimitRatio: number,
+	names: { cost: string; tokens: string },
+): LimitCheckResult {
+	const checks: Array<{ name: string; current: number; max: number }> = [
+		{ name: names.cost, current: spend.costSpent, max: costLimit },
+		{ name: names.tokens, current: spend.tokensUsed, max: tokenLimit },
+	]
+	// Hard limits
+	for (const check of checks) {
+		if (check.current >= check.max) {
+			return {
+				status: 'hard_limit',
+				limitName: check.name,
+				currentValue: check.current,
+				hardLimit: check.max,
+				reason: `${check.name} reached: ${formatBudget(check.current)}/${formatBudget(check.max)}`,
+			}
+		}
+	}
+	// Soft warnings
+	for (const check of checks) {
+		if (check.max !== Number.POSITIVE_INFINITY && check.current >= check.max * softLimitRatio) {
+			return {
+				status: 'soft_warning',
+				limitName: check.name,
+				currentValue: check.current,
+				hardLimit: check.max,
+				message: `Approaching ${check.name} limit: ${formatBudget(check.current)}/${formatBudget(check.max)}`,
+			}
+		}
+	}
+	return { status: 'ok' }
+}
+/** Format a budget value compactly — 4 decimals for fractional (cost), integer otherwise. */
+function formatBudget(value: number): string {
+	if (value === Number.POSITIVE_INFINITY) return '∞'
+	return Number.isInteger(value) ? String(value) : value.toFixed(4)
+}
 // ============================================================================
 // Check result
 // ============================================================================
@@ -68,6 +164,7 @@ export function checkLimits(counters: AgentCounters, limits: ResolvedAgentLimits
 		{ name: 'maxToolCalls', current: counters.toolCallCount, max: limits.maxToolCalls },
 		{ name: 'maxSpawnedAgents', current: counters.spawnedAgentCount, max: limits.maxSpawnedAgents },
 		{ name: 'maxMessagesSent', current: counters.messagesSentCount, max: limits.maxMessagesSent },
+		{ name: 'maxCompactions', current: counters.compactionCount, max: limits.maxCompactions },
 	]
 	for (const check of hardChecks) {