@roj-ai/sdk 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/core/agents/agent.d.ts.map +1 -1
  2. package/dist/core/agents/agent.js +13 -3
  3. package/dist/core/agents/agent.js.map +1 -1
  4. package/dist/core/context/state.d.ts +8 -0
  5. package/dist/core/context/state.d.ts.map +1 -1
  6. package/dist/core/context/state.js +10 -0
  7. package/dist/core/context/state.js.map +1 -1
  8. package/dist/core/events/base-event-store.d.ts.map +1 -1
  9. package/dist/core/events/base-event-store.js +2 -0
  10. package/dist/core/events/base-event-store.js.map +1 -1
  11. package/dist/core/events/metadata-utils.d.ts.map +1 -1
  12. package/dist/core/events/metadata-utils.js +2 -0
  13. package/dist/core/events/metadata-utils.js.map +1 -1
  14. package/dist/core/llm/anthropic.test.js +27 -0
  15. package/dist/core/llm/anthropic.test.js.map +1 -1
  16. package/dist/core/llm/cache-breakpoints.d.ts +19 -5
  17. package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
  18. package/dist/core/llm/cache-breakpoints.js +40 -23
  19. package/dist/core/llm/cache-breakpoints.js.map +1 -1
  20. package/dist/core/llm/cache-breakpoints.test.d.ts +2 -0
  21. package/dist/core/llm/cache-breakpoints.test.d.ts.map +1 -0
  22. package/dist/core/llm/cache-breakpoints.test.js +45 -0
  23. package/dist/core/llm/cache-breakpoints.test.js.map +1 -0
  24. package/dist/core/llm/state.d.ts +22 -0
  25. package/dist/core/llm/state.d.ts.map +1 -1
  26. package/dist/core/llm/state.js +23 -11
  27. package/dist/core/llm/state.js.map +1 -1
  28. package/dist/index.d.ts +3 -3
  29. package/dist/index.d.ts.map +1 -1
  30. package/dist/index.js +1 -1
  31. package/dist/index.js.map +1 -1
  32. package/dist/lib/mime.d.ts +1 -1
  33. package/dist/lib/mime.d.ts.map +1 -1
  34. package/dist/lib/mime.js +7 -4
  35. package/dist/lib/mime.js.map +1 -1
  36. package/dist/plugins/agents/plugin.d.ts.map +1 -1
  37. package/dist/plugins/agents/plugin.js +7 -1
  38. package/dist/plugins/agents/plugin.js.map +1 -1
  39. package/dist/plugins/context-compact/context-compact.integration.test.js +54 -0
  40. package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
  41. package/dist/plugins/context-compact/context-compactor.d.ts +2 -0
  42. package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
  43. package/dist/plugins/context-compact/context-compactor.js +29 -0
  44. package/dist/plugins/context-compact/context-compactor.js.map +1 -1
  45. package/dist/plugins/context-compact/context-compactor.test.js +6 -0
  46. package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
  47. package/dist/plugins/limits-guard/config.d.ts +30 -0
  48. package/dist/plugins/limits-guard/config.d.ts.map +1 -1
  49. package/dist/plugins/limits-guard/index.d.ts +3 -3
  50. package/dist/plugins/limits-guard/index.d.ts.map +1 -1
  51. package/dist/plugins/limits-guard/index.js +1 -1
  52. package/dist/plugins/limits-guard/index.js.map +1 -1
  53. package/dist/plugins/limits-guard/limit-guard.d.ts +27 -1
  54. package/dist/plugins/limits-guard/limit-guard.d.ts.map +1 -1
  55. package/dist/plugins/limits-guard/limit-guard.js +67 -0
  56. package/dist/plugins/limits-guard/limit-guard.js.map +1 -1
  57. package/dist/plugins/limits-guard/limit-guard.test.js +65 -1
  58. package/dist/plugins/limits-guard/limit-guard.test.js.map +1 -1
  59. package/dist/plugins/limits-guard/limits-guard.integration.test.js +295 -1
  60. package/dist/plugins/limits-guard/limits-guard.integration.test.js.map +1 -1
  61. package/dist/plugins/limits-guard/plugin.d.ts +23 -2
  62. package/dist/plugins/limits-guard/plugin.d.ts.map +1 -1
  63. package/dist/plugins/limits-guard/plugin.js +107 -2
  64. package/dist/plugins/limits-guard/plugin.js.map +1 -1
  65. package/dist/plugins/mailbox/plugin.d.ts.map +1 -1
  66. package/dist/plugins/mailbox/plugin.js +18 -0
  67. package/dist/plugins/mailbox/plugin.js.map +1 -1
  68. package/dist/plugins/session-stats/plugin.d.ts.map +1 -1
  69. package/dist/plugins/session-stats/plugin.js +5 -1
  70. package/dist/plugins/session-stats/plugin.js.map +1 -1
  71. package/package.json +2 -2
  72. package/src/core/agents/agent.ts +18 -2
  73. package/src/core/context/state.ts +10 -0
  74. package/src/core/events/base-event-store.ts +2 -0
  75. package/src/core/events/metadata-utils.ts +2 -0
  76. package/src/core/llm/anthropic.test.ts +34 -0
  77. package/src/core/llm/cache-breakpoints.test.ts +55 -0
  78. package/src/core/llm/cache-breakpoints.ts +39 -21
  79. package/src/core/llm/state.ts +25 -11
  80. package/src/index.ts +3 -3
  81. package/src/lib/mime.ts +7 -4
  82. package/src/plugins/agents/plugin.ts +7 -1
  83. package/src/plugins/context-compact/context-compact.integration.test.ts +62 -0
  84. package/src/plugins/context-compact/context-compactor.test.ts +6 -0
  85. package/src/plugins/context-compact/context-compactor.ts +31 -0
  86. package/src/plugins/limits-guard/config.ts +35 -0
  87. package/src/plugins/limits-guard/index.ts +3 -3
  88. package/src/plugins/limits-guard/limit-guard.test.ts +80 -1
  89. package/src/plugins/limits-guard/limit-guard.ts +98 -1
  90. package/src/plugins/limits-guard/limits-guard.integration.test.ts +331 -1
  91. package/src/plugins/limits-guard/plugin.ts +153 -3
  92. package/src/plugins/mailbox/plugin.ts +18 -0
  93. package/src/plugins/session-stats/plugin.ts +5 -1
@@ -48,6 +48,18 @@ export type LLMMetrics = {
48
48
  // LLM events
49
49
  // ============================================================================
50
50
 
51
+ const llmMetricsSchema = z4.object({
52
+ promptTokens: z4.number(),
53
+ completionTokens: z4.number(),
54
+ totalTokens: z4.number(),
55
+ latencyMs: z4.number(),
56
+ model: z4.string(),
57
+ provider: z4.string().optional(),
58
+ cost: z4.number().optional(),
59
+ cachedTokens: z4.number().optional(),
60
+ cacheWriteTokens: z4.number().optional(),
61
+ })
62
+
51
63
  export const llmEvents = createEventsFactory({
52
64
  events: {
53
65
  inference_started: z4.object({
@@ -66,19 +78,20 @@ export const llmEvents = createEventsFactory({
66
78
  input: z4.unknown(),
67
79
  })),
68
80
  }),
69
- metrics: z4.object({
70
- promptTokens: z4.number(),
71
- completionTokens: z4.number(),
72
- totalTokens: z4.number(),
73
- latencyMs: z4.number(),
74
- model: z4.string(),
75
- provider: z4.string().optional(),
76
- cost: z4.number().optional(),
77
- cachedTokens: z4.number().optional(),
78
- cacheWriteTokens: z4.number().optional(),
79
- }),
81
+ metrics: llmMetricsSchema,
80
82
  llmCallId: llmCallIdSchema.optional(),
81
83
  }),
84
+ /**
85
+ * A side-channel ("auxiliary") inference completed — e.g. the context-compact
86
+ * plugin asking the model for a summary. Unlike `inference_completed`, this
87
+ * does NOT touch conversation state; it exists purely so the call's token
88
+ * usage and cost are still accounted in session stats and metadata. Without
89
+ * it, compaction (and any other auxiliary call) would be billed but invisible.
90
+ */
91
+ auxiliary_inference_completed: z4.object({
92
+ agentId: agentIdSchema,
93
+ metrics: llmMetricsSchema,
94
+ }),
82
95
  inference_failed: z4.object({
83
96
  agentId: agentIdSchema,
84
97
  error: z4.string(),
@@ -89,4 +102,5 @@ export const llmEvents = createEventsFactory({
89
102
 
90
103
  export type InferenceStartedEvent = (typeof llmEvents)['Events']['inference_started']
91
104
  export type InferenceCompletedEvent = (typeof llmEvents)['Events']['inference_completed']
105
+ export type AuxiliaryInferenceCompletedEvent = (typeof llmEvents)['Events']['auxiliary_inference_completed']
92
106
  export type InferenceFailedEvent = (typeof llmEvents)['Events']['inference_failed']
package/src/index.ts CHANGED
@@ -87,9 +87,9 @@ export { agentsPlugin } from '~/plugins/agents/plugin.js'
87
87
  export type { AgentsPluginConfig } from '~/plugins/agents/plugin.js'
88
88
  export { contextCompactPlugin } from '~/plugins/context-compact/plugin.js'
89
89
  export type { ContextCompactPluginConfig } from '~/plugins/context-compact/plugin.js'
90
- export { limitsGuardPlugin, selectAgentCounters } from '~/plugins/limits-guard/plugin.js'
91
- export type { AgentCounters, LimitsAgentConfig } from '~/plugins/limits-guard/plugin.js'
92
- export type { AgentLimits } from '~/plugins/limits-guard/config.js'
90
+ export { limitsGuardPlugin, selectAgentCounters, sumSessionSpend } from '~/plugins/limits-guard/plugin.js'
91
+ export type { AgentCounters, BudgetExceededEvent, LimitsAgentConfig } from '~/plugins/limits-guard/plugin.js'
92
+ export type { AgentLimits, LimitsSessionConfig } from '~/plugins/limits-guard/config.js'
93
93
  export { mailboxPlugin } from '~/plugins/mailbox/plugin.js'
94
94
  export type { MailboxAgentConfig, MailboxPresetConfig } from '~/plugins/mailbox/plugin.js'
95
95
  export { resultEvictionPlugin } from '~/plugins/result-eviction/plugin.js'
package/src/lib/mime.ts CHANGED
@@ -2,20 +2,23 @@
2
2
  * MIME type detection utilities.
3
3
  */
4
4
 
5
+ // Only formats accepted as image content by the LLM providers (Anthropic
6
+ // allows jpeg/png/gif/webp). Notably excludes svg/bmp/ico: feeding those as
7
+ // image blocks makes the provider reject the whole request with a 400
8
+ // ("media_type: Input should be 'image/jpeg', 'image/png', 'image/gif' or
9
+ // 'image/webp'"). Such files fall through to being read as text instead —
10
+ // which for SVG (XML) is more useful to the model anyway.
5
11
  const IMAGE_MIME_TYPES: Record<string, string> = {
6
12
  png: 'image/png',
7
13
  jpg: 'image/jpeg',
8
14
  jpeg: 'image/jpeg',
9
15
  gif: 'image/gif',
10
16
  webp: 'image/webp',
11
- svg: 'image/svg+xml',
12
- bmp: 'image/bmp',
13
- ico: 'image/x-icon',
14
17
  }
15
18
 
16
19
  /**
17
20
  * Get MIME type for an image file based on extension.
18
- * Returns undefined if not a recognized image format.
21
+ * Returns undefined if not an LLM-supported image format.
19
22
  */
20
23
  export function getImageMimeType(filename: string): string | undefined {
21
24
  const ext = filename.split('.').pop()?.toLowerCase()
@@ -127,6 +127,11 @@ function buildChildrenStatus(sessionAgents: Map<AgentId, AgentState>, parentId:
127
127
  const last = previewLastAssistant(c)
128
128
 
129
129
  const parts: string[] = [c.id, c.status]
130
+ // Surface why a child paused (e.g. budget/limit exhaustion) so the parent can
131
+ // react — bump the budget and resume, reassign the work, or stop.
132
+ if (c.status === 'paused' && c.pauseMessage) {
133
+ parts.push(`reason: ${c.pauseMessage.replaceAll('"', "'")}`)
134
+ }
130
135
  parts.push(`${tools} tools`)
131
136
  parts.push(`${llm} llm`)
132
137
  if (subs > 0) parts.push(`${subs} sub${subs === 1 ? '' : 's'}`)
@@ -414,7 +419,8 @@ export const agentsPlugin = definePlugin('agents')
414
419
 
415
420
  - **New task** → spawn a new agent using \`start_<agent_name>\`. You will receive the agent's ID in the result — use it with \`send_message\` for follow-up communication.
416
421
  - **Follow-up on an existing task** → send a message to the existing agent via \`send_message\` with the agent's ID. Do NOT spawn a new agent for feedback, corrections, or additional instructions on a task already assigned.
417
- - Spawned agents communicate back to you via \`send_message\`. Check your incoming messages for their results and progress updates.`
422
+ - Spawned agents communicate back to you via \`send_message\`. Check your incoming messages for their results and progress updates.
423
+ - If a child pauses early it sends you a \`<child-paused agent="…">reason</child-paused>\` message (e.g. it hit a cost/limit budget). Decide what to do: resume it (after addressing the cause), reassign or drop the work, or stop.`
418
424
 
419
425
  // Only include supervision instructions if supervision is actually enabled
420
426
  // for this session — otherwise the section is misleading bloat.
@@ -1,12 +1,14 @@
1
1
  import { describe, expect, it } from 'bun:test'
2
2
  import z from 'zod/v4'
3
3
  import { contextEvents } from '~/core/context/state.js'
4
+ import { llmEvents } from '~/core/llm/state.js'
4
5
  import { MockLLMProvider } from '~/core/llm/mock.js'
5
6
  import type { InferenceRequest } from '~/core/llm/provider.js'
6
7
  import { ModelId } from '~/core/llm/schema.js'
7
8
  import type { Preset } from '~/core/preset/index.js'
8
9
  import { createTool } from '~/core/tools/definition.js'
9
10
  import { ToolCallId } from '~/core/tools/schema.js'
11
+ import { selectSessionStats, sessionStatsPlugin } from '~/plugins/session-stats/index.js'
10
12
  import { createTestPreset, TestHarness } from '~/testing/index.js'
11
13
  import { contextCompactPlugin } from './index.js'
12
14
 
@@ -336,4 +338,64 @@ describe('context-compact plugin', () => {
336
338
  await harness.shutdown()
337
339
  })
338
340
  })
341
+
342
+ // =========================================================================
343
+ // Cost accounting — the compaction summarization call is a real, billed LLM
344
+ // call. Its tokens/cost must land in session stats, not vanish. (Regression:
345
+ // runAuxiliaryInference used to skip emitting any stats event.)
346
+ // =========================================================================
347
+
348
+ describe('compaction cost accounting', () => {
349
+ it('summarization call cost is counted in session stats', async () => {
350
+ const REGULAR_COST = 0.01
351
+ const SUMMARY_COST = 0.05
352
+
353
+ const harness = new TestHarness({
354
+ systemPlugins: [contextCompactPlugin, sessionStatsPlugin],
355
+ presets: [createCompactPreset(10)],
356
+ mockHandler: (request) => {
357
+ if (isSummarizationRequest(request)) {
358
+ return {
359
+ content: 'Summary of conversation so far.',
360
+ toolCalls: [],
361
+ finishReason: 'stop',
362
+ metrics: MockLLMProvider.defaultMetricsWithCost(SUMMARY_COST),
363
+ }
364
+ }
365
+ return {
366
+ content: 'Agent response with some content to increase token count.',
367
+ toolCalls: [],
368
+ finishReason: 'stop',
369
+ metrics: MockLLMProvider.defaultMetricsWithCost(REGULAR_COST),
370
+ }
371
+ },
372
+ })
373
+
374
+ const session = await harness.createSession('test')
375
+ await session.sendAndWaitForIdle('First message')
376
+ await session.sendAndWaitForIdle('Second message')
377
+ await session.sendAndWaitForIdle('Third message to trigger actual compaction')
378
+
379
+ // Compaction actually ran and made a billed summarization call.
380
+ const auxEvents = await session.getEventsByType(llmEvents, 'auxiliary_inference_completed')
381
+ expect(auxEvents.length).toBeGreaterThanOrEqual(1)
382
+ expect(auxEvents.some((e) => e.metrics.cost === SUMMARY_COST)).toBe(true)
383
+
384
+ // Session stats must include both the regular turns AND the summarization
385
+ // call — in count, tokens, and cost.
386
+ const inferEvents = await session.getEventsByType(llmEvents, 'inference_completed')
387
+ const allLlmEvents = [...inferEvents, ...auxEvents]
388
+ const expectedCost = allLlmEvents.reduce((sum, e) => sum + (e.metrics.cost ?? 0), 0)
389
+ const expectedTokens = allLlmEvents.reduce((sum, e) => sum + e.metrics.totalTokens, 0)
390
+
391
+ const stats = selectSessionStats(session.state)
392
+ expect(stats.llmCalls).toBe(allLlmEvents.length)
393
+ expect(stats.totalCost).toBeCloseTo(expectedCost, 10)
394
+ expect(stats.totalTokens).toBe(expectedTokens)
395
+ // And the summarization cost is genuinely part of the total (not zero).
396
+ expect(stats.totalCost).toBeGreaterThanOrEqual(SUMMARY_COST)
397
+
398
+ await harness.shutdown()
399
+ })
400
+ })
339
401
  })
@@ -291,6 +291,7 @@ describe('createContextCompactedEvent', () => {
291
291
  { role: 'system', content: 'summary' },
292
292
  { role: 'user', content: 'recent' },
293
293
  ],
294
+ originalMessages: [{ role: 'user', content: 'old message' }],
294
295
  summary: 'The summary',
295
296
  originalTokens: 1000,
296
297
  compactedTokens: 200,
@@ -309,6 +310,8 @@ describe('createContextCompactedEvent', () => {
309
310
  expect(event.newConversationHistory.length).toBe(2)
310
311
  expect(event.newConversationHistory[0].role).toBe('system')
311
312
  expect(event.newConversationHistory[0].content).toBe('summary')
313
+ expect(event.originalMessages?.length).toBe(1)
314
+ expect(event.originalMessages?.[0].content).toBe('old message')
312
315
  expect(event.timestamp).toBeDefined()
313
316
  })
314
317
 
@@ -318,6 +321,7 @@ describe('createContextCompactedEvent', () => {
318
321
  const toolCallId = generateToolCallId()
319
322
  const result: CompactionResult = {
320
323
  compactedMessages: [{ role: 'tool', content: 'tool result', toolCallId }],
324
+ originalMessages: [],
321
325
  summary: '',
322
326
  originalTokens: 100,
323
327
  compactedTokens: 50,
@@ -852,6 +856,7 @@ describe('createContextCompactedEvent with historyPath', () => {
852
856
  compactedMessages: [
853
857
  { role: 'system', content: 'summary' },
854
858
  ],
859
+ originalMessages: [],
855
860
  summary: 'The summary',
856
861
  originalTokens: 1000,
857
862
  compactedTokens: 200,
@@ -871,6 +876,7 @@ describe('createContextCompactedEvent with historyPath', () => {
871
876
  compactedMessages: [
872
877
  { role: 'system', content: 'summary' },
873
878
  ],
879
+ originalMessages: [],
874
880
  summary: 'The summary',
875
881
  originalTokens: 1000,
876
882
  compactedTokens: 200,
@@ -131,6 +131,8 @@ export const DEFAULT_SUMMARY_INSTRUCTION =
131
131
  export interface CompactionResult {
132
132
  /** New messages to use (summary + kept messages) */
133
133
  compactedMessages: LLMMessage[]
134
+ /** The older messages that were summarized away (the compaction input) */
135
+ originalMessages: LLMMessage[]
134
136
  /** Generated summary text */
135
137
  summary: string
136
138
  /** Token count before compaction */
@@ -274,6 +276,7 @@ export class ContextCompactor {
274
276
  this.logger.warn('No messages to compact', { sessionId, agentId })
275
277
  return Ok({
276
278
  compactedMessages: messages,
279
+ originalMessages: [],
277
280
  summary: '',
278
281
  originalTokens,
279
282
  compactedTokens: originalTokens,
@@ -349,6 +352,7 @@ export class ContextCompactor {
349
352
 
350
353
  return Ok({
351
354
  compactedMessages,
355
+ originalMessages: toCompact,
352
356
  summary,
353
357
  originalTokens,
354
358
  compactedTokens,
@@ -378,6 +382,7 @@ export function createContextCompactedEvent(
378
382
  contextEvents.create('context_compacted', {
379
383
  agentId,
380
384
  compactedContent: result.summary,
385
+ originalMessages: result.originalMessages.map(toDisplayMessage),
381
386
  newConversationHistory,
382
387
  originalTokens: result.originalTokens,
383
388
  compactedTokens: result.compactedTokens,
@@ -386,3 +391,29 @@ export function createContextCompactedEvent(
386
391
  }),
387
392
  )
388
393
  }
394
+
395
+ /**
396
+ * Convert an LLM message into a display-only conversation message, preserving
397
+ * tool-call and tool-result detail in the rendered content. Used for the
398
+ * compaction "input" snapshot shown in the debug UI — not for reconstruction.
399
+ */
400
+ function toDisplayMessage(msg: LLMMessage): CompactedConversationMessage {
401
+ if (msg.role === 'assistant') {
402
+ const parts: string[] = []
403
+ if (msg.content) parts.push(msg.content)
404
+ if (msg.toolCalls?.length) {
405
+ for (const tc of msg.toolCalls) {
406
+ parts.push(`[tool call: ${tc.name}(${JSON.stringify(tc.input)})]`)
407
+ }
408
+ }
409
+ return { role: 'assistant', content: parts.join('\n') }
410
+ }
411
+ if (msg.role === 'tool') {
412
+ const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
413
+ return { role: 'system', content: `[tool result${msg.toolName ? `: ${msg.toolName}` : ''}]\n${content}` }
414
+ }
415
+ return {
416
+ role: msg.role,
417
+ content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
418
+ }
419
+ }
@@ -23,4 +23,39 @@ export interface AgentLimits {
23
23
  maxRepeatedToolCalls?: number
24
24
  /** Maximum consecutive identical text-only responses. Default: 3 */
25
25
  maxRepeatedResponses?: number
26
+ /**
27
+ * Maximum cumulative LLM cost (USD) this agent may spend before it is paused.
28
+ * Spend is summed from `inference_completed` metrics and, unlike the counter
29
+ * limits, is NOT reset on resume. Default: unlimited.
30
+ */
31
+ maxCost?: number
32
+ /**
33
+ * Maximum cumulative total tokens (prompt + completion) this agent may consume
34
+ * before it is paused. Useful as a fallback when providers don't report cost.
35
+ * Not reset on resume. Default: unlimited.
36
+ */
37
+ maxTokens?: number
38
+ /**
39
+ * Maximum number of context compaction events for this agent before it is paused.
40
+ * Guards against pathological compaction loops. Reset on resume. Default: unlimited.
41
+ */
42
+ maxCompactions?: number
43
+ }
44
+
45
+ // ============================================================================
46
+ // Session Limits (budget across all agents)
47
+ // ============================================================================
48
+
49
+ /**
50
+ * Session-wide budget, summed across every agent in the session. Configured via
51
+ * the plugin's session-level config (`pluginConfig`), independent of per-agent
52
+ * limits. All fields optional - defaults applied via resolveSessionLimits().
53
+ */
54
+ export interface LimitsSessionConfig {
55
+ /** Maximum cumulative LLM cost (USD) across all agents. Default: unlimited */
56
+ maxSessionCost?: number
57
+ /** Maximum cumulative total tokens across all agents. Default: unlimited */
58
+ maxSessionTokens?: number
59
+ /** Ratio of the session budget at which a soft warning is emitted. Default: 0.8 */
60
+ softLimitRatio?: number
26
61
  }
@@ -1,4 +1,4 @@
1
- export type { AgentLimits } from './config.js'
1
+ export type { AgentLimits, LimitsSessionConfig } from './config.js'
2
2
  export { limitsGuardPlugin } from './plugin.js'
3
- export type { AgentCounters, LimitsAgentConfig, LimitWarningEvent } from './plugin.js'
4
- export { createAgentCounters, limitsEvents } from './plugin.js'
3
+ export type { AgentCounters, BudgetExceededEvent, LimitsAgentConfig, LimitWarningEvent } from './plugin.js'
4
+ export { createAgentCounters, limitsEvents, sumSessionSpend } from './plugin.js'
@@ -1,5 +1,5 @@
1
1
  import { describe, expect, it } from 'bun:test'
2
- import { checkLimits, resolveAgentLimits } from './limit-guard.js'
2
+ import { checkBudget, checkLimits, resolveAgentLimits, resolveSessionLimits } from './limit-guard.js'
3
3
  import { createAgentCounters } from './plugin.js'
4
4
  import type { AgentCounters } from './plugin.js'
5
5
 
@@ -14,6 +14,10 @@ describe('resolveAgentLimits', () => {
14
14
  expect(limits.softLimitRatio).toBe(0.8)
15
15
  expect(limits.maxRepeatedToolCalls).toBe(3)
16
16
  expect(limits.maxRepeatedResponses).toBe(3)
17
+ // Budgets and compaction cap are opt-in (unlimited by default)
18
+ expect(limits.maxCost).toBe(Number.POSITIVE_INFINITY)
19
+ expect(limits.maxTokens).toBe(Number.POSITIVE_INFINITY)
20
+ expect(limits.maxCompactions).toBe(Number.POSITIVE_INFINITY)
17
21
  })
18
22
 
19
23
  it('returns defaults when empty config', () => {
@@ -158,4 +162,79 @@ describe('checkLimits', () => {
158
162
  )
159
163
  expect(result.status).toBe('hard_limit')
160
164
  })
165
+
166
+ // --- Compaction limit ---
167
+
168
+ it('detects maxCompactions hard limit', () => {
169
+ const limits = resolveAgentLimits({ maxCompactions: 5 })
170
+ const result = checkLimits(makeCounters({ compactionCount: 5 }), limits)
171
+ expect(result.status).toBe('hard_limit')
172
+ if (result.status === 'hard_limit') {
173
+ expect(result.limitName).toBe('maxCompactions')
174
+ }
175
+ })
176
+
177
+ it('does not cap compactions by default (unlimited)', () => {
178
+ const result = checkLimits(makeCounters({ compactionCount: 9999 }), defaultLimits)
179
+ expect(result.status).toBe('ok')
180
+ })
181
+ })
182
+
183
+ describe('checkBudget', () => {
184
+ const names = { cost: 'maxCost', tokens: 'maxTokens' }
185
+
186
+ it('returns ok when under budget', () => {
187
+ const result = checkBudget({ costSpent: 1, tokensUsed: 100 }, 5, 1000, 0.8, names)
188
+ expect(result.status).toBe('ok')
189
+ })
190
+
191
+ it('returns ok when unlimited (Infinity)', () => {
192
+ const result = checkBudget(
193
+ { costSpent: 1_000_000, tokensUsed: 1_000_000 },
194
+ Number.POSITIVE_INFINITY,
195
+ Number.POSITIVE_INFINITY,
196
+ 0.8,
197
+ names,
198
+ )
199
+ expect(result.status).toBe('ok')
200
+ })
201
+
202
+ it('detects cost hard limit', () => {
203
+ const result = checkBudget({ costSpent: 5.01, tokensUsed: 0 }, 5, Number.POSITIVE_INFINITY, 0.8, names)
204
+ expect(result.status).toBe('hard_limit')
205
+ if (result.status === 'hard_limit') expect(result.limitName).toBe('maxCost')
206
+ })
207
+
208
+ it('detects token hard limit', () => {
209
+ const result = checkBudget({ costSpent: 0, tokensUsed: 1000 }, Number.POSITIVE_INFINITY, 1000, 0.8, names)
210
+ expect(result.status).toBe('hard_limit')
211
+ if (result.status === 'hard_limit') expect(result.limitName).toBe('maxTokens')
212
+ })
213
+
214
+ it('emits soft warning approaching cost budget', () => {
215
+ const result = checkBudget({ costSpent: 4.2, tokensUsed: 0 }, 5, Number.POSITIVE_INFINITY, 0.8, names)
216
+ expect(result.status).toBe('soft_warning')
217
+ if (result.status === 'soft_warning') expect(result.limitName).toBe('maxCost')
218
+ })
219
+
220
+ it('handles sub-dollar budgets without spurious warnings', () => {
221
+ // floor-based logic would warn at $0 for a $0.50 budget — float-aware must not.
222
+ const result = checkBudget({ costSpent: 0.1, tokensUsed: 0 }, 0.5, Number.POSITIVE_INFINITY, 0.8, names)
223
+ expect(result.status).toBe('ok')
224
+ })
225
+ })
226
+
227
+ describe('resolveSessionLimits', () => {
228
+ it('defaults to unlimited', () => {
229
+ const limits = resolveSessionLimits()
230
+ expect(limits.maxSessionCost).toBe(Number.POSITIVE_INFINITY)
231
+ expect(limits.maxSessionTokens).toBe(Number.POSITIVE_INFINITY)
232
+ expect(limits.softLimitRatio).toBe(0.8)
233
+ })
234
+
235
+ it('overrides specific values', () => {
236
+ const limits = resolveSessionLimits({ maxSessionCost: 10 })
237
+ expect(limits.maxSessionCost).toBe(10)
238
+ expect(limits.maxSessionTokens).toBe(Number.POSITIVE_INFINITY)
239
+ })
161
240
  })
@@ -4,7 +4,7 @@
4
4
  * Returns the worst result: hard_limit > soft_warning > ok.
5
5
  */
6
6
 
7
- import type { AgentLimits } from '~/plugins/limits-guard/config.js'
7
+ import type { AgentLimits, LimitsSessionConfig } from '~/plugins/limits-guard/config.js'
8
8
  import type { AgentCounters } from './plugin.js'
9
9
 
10
10
  // ============================================================================
@@ -20,6 +20,9 @@ export interface ResolvedAgentLimits {
20
20
  softLimitRatio: number
21
21
  maxRepeatedToolCalls: number
22
22
  maxRepeatedResponses: number
23
+ maxCost: number
24
+ maxTokens: number
25
+ maxCompactions: number
23
26
  }
24
27
 
25
28
  const DEFAULTS: ResolvedAgentLimits = {
@@ -31,6 +34,10 @@ const DEFAULTS: ResolvedAgentLimits = {
31
34
  softLimitRatio: 0.8,
32
35
  maxRepeatedToolCalls: 3,
33
36
  maxRepeatedResponses: 3,
37
+ // Budgets and the compaction cap are opt-in: unset means unlimited.
38
+ maxCost: Number.POSITIVE_INFINITY,
39
+ maxTokens: Number.POSITIVE_INFINITY,
40
+ maxCompactions: Number.POSITIVE_INFINITY,
34
41
  }
35
42
 
36
43
  export function resolveAgentLimits(config?: AgentLimits): ResolvedAgentLimits {
@@ -44,9 +51,98 @@ export function resolveAgentLimits(config?: AgentLimits): ResolvedAgentLimits {
44
51
  softLimitRatio: config.softLimitRatio ?? DEFAULTS.softLimitRatio,
45
52
  maxRepeatedToolCalls: config.maxRepeatedToolCalls ?? DEFAULTS.maxRepeatedToolCalls,
46
53
  maxRepeatedResponses: config.maxRepeatedResponses ?? DEFAULTS.maxRepeatedResponses,
54
+ maxCost: config.maxCost ?? DEFAULTS.maxCost,
55
+ maxTokens: config.maxTokens ?? DEFAULTS.maxTokens,
56
+ maxCompactions: config.maxCompactions ?? DEFAULTS.maxCompactions,
47
57
  }
48
58
  }
49
59
 
60
+ // ============================================================================
61
+ // Session budget
62
+ // ============================================================================
63
+
64
+ export interface ResolvedSessionLimits {
65
+ maxSessionCost: number
66
+ maxSessionTokens: number
67
+ softLimitRatio: number
68
+ }
69
+
70
+ const SESSION_DEFAULTS: ResolvedSessionLimits = {
71
+ maxSessionCost: Number.POSITIVE_INFINITY,
72
+ maxSessionTokens: Number.POSITIVE_INFINITY,
73
+ softLimitRatio: 0.8,
74
+ }
75
+
76
+ export function resolveSessionLimits(config?: LimitsSessionConfig): ResolvedSessionLimits {
77
+ if (!config) return SESSION_DEFAULTS
78
+ return {
79
+ maxSessionCost: config.maxSessionCost ?? SESSION_DEFAULTS.maxSessionCost,
80
+ maxSessionTokens: config.maxSessionTokens ?? SESSION_DEFAULTS.maxSessionTokens,
81
+ softLimitRatio: config.softLimitRatio ?? SESSION_DEFAULTS.softLimitRatio,
82
+ }
83
+ }
84
+
85
+ /** Cumulative spend, either for a single agent or summed across the session. */
86
+ export interface BudgetSpend {
87
+ costSpent: number
88
+ tokensUsed: number
89
+ }
90
+
91
+ /**
92
+ * Budget check (cost + tokens) shared by per-agent and session-wide budgets.
93
+ *
94
+ * Kept separate from {@link checkLimits} so it can run in `beforeInference` —
95
+ * blocking the *next* call once the budget is exhausted — without also tripping
96
+ * the counter/pattern limits (those are enforced in `afterInference`). Uses
97
+ * float-aware comparisons (no flooring) so sub-dollar budgets behave correctly.
98
+ */
99
+ export function checkBudget(
100
+ spend: BudgetSpend,
101
+ costLimit: number,
102
+ tokenLimit: number,
103
+ softLimitRatio: number,
104
+ names: { cost: string; tokens: string },
105
+ ): LimitCheckResult {
106
+ const checks: Array<{ name: string; current: number; max: number }> = [
107
+ { name: names.cost, current: spend.costSpent, max: costLimit },
108
+ { name: names.tokens, current: spend.tokensUsed, max: tokenLimit },
109
+ ]
110
+
111
+ // Hard limits
112
+ for (const check of checks) {
113
+ if (check.current >= check.max) {
114
+ return {
115
+ status: 'hard_limit',
116
+ limitName: check.name,
117
+ currentValue: check.current,
118
+ hardLimit: check.max,
119
+ reason: `${check.name} reached: ${formatBudget(check.current)}/${formatBudget(check.max)}`,
120
+ }
121
+ }
122
+ }
123
+
124
+ // Soft warnings
125
+ for (const check of checks) {
126
+ if (check.max !== Number.POSITIVE_INFINITY && check.current >= check.max * softLimitRatio) {
127
+ return {
128
+ status: 'soft_warning',
129
+ limitName: check.name,
130
+ currentValue: check.current,
131
+ hardLimit: check.max,
132
+ message: `Approaching ${check.name} limit: ${formatBudget(check.current)}/${formatBudget(check.max)}`,
133
+ }
134
+ }
135
+ }
136
+
137
+ return { status: 'ok' }
138
+ }
139
+
140
+ /** Format a budget value compactly — 4 decimals for fractional (cost), integer otherwise. */
141
+ function formatBudget(value: number): string {
142
+ if (value === Number.POSITIVE_INFINITY) return '∞'
143
+ return Number.isInteger(value) ? String(value) : value.toFixed(4)
144
+ }
145
+
50
146
  // ============================================================================
51
147
  // Check result
52
148
  // ============================================================================
@@ -68,6 +164,7 @@ export function checkLimits(counters: AgentCounters, limits: ResolvedAgentLimits
68
164
  { name: 'maxToolCalls', current: counters.toolCallCount, max: limits.maxToolCalls },
69
165
  { name: 'maxSpawnedAgents', current: counters.spawnedAgentCount, max: limits.maxSpawnedAgents },
70
166
  { name: 'maxMessagesSent', current: counters.messagesSentCount, max: limits.maxMessagesSent },
167
+ { name: 'maxCompactions', current: counters.compactionCount, max: limits.maxCompactions },
71
168
  ]
72
169
 
73
170
  for (const check of hardChecks) {