@roj-ai/sdk 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/core/agents/agent.d.ts.map +1 -1
  2. package/dist/core/agents/agent.js +13 -3
  3. package/dist/core/agents/agent.js.map +1 -1
  4. package/dist/core/context/state.d.ts +8 -0
  5. package/dist/core/context/state.d.ts.map +1 -1
  6. package/dist/core/context/state.js +10 -0
  7. package/dist/core/context/state.js.map +1 -1
  8. package/dist/core/events/base-event-store.d.ts.map +1 -1
  9. package/dist/core/events/base-event-store.js +2 -0
  10. package/dist/core/events/base-event-store.js.map +1 -1
  11. package/dist/core/events/metadata-utils.d.ts.map +1 -1
  12. package/dist/core/events/metadata-utils.js +2 -0
  13. package/dist/core/events/metadata-utils.js.map +1 -1
  14. package/dist/core/llm/anthropic.test.js +27 -0
  15. package/dist/core/llm/anthropic.test.js.map +1 -1
  16. package/dist/core/llm/cache-breakpoints.d.ts +19 -5
  17. package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
  18. package/dist/core/llm/cache-breakpoints.js +40 -23
  19. package/dist/core/llm/cache-breakpoints.js.map +1 -1
  20. package/dist/core/llm/cache-breakpoints.test.d.ts +2 -0
  21. package/dist/core/llm/cache-breakpoints.test.d.ts.map +1 -0
  22. package/dist/core/llm/cache-breakpoints.test.js +45 -0
  23. package/dist/core/llm/cache-breakpoints.test.js.map +1 -0
  24. package/dist/core/llm/state.d.ts +22 -0
  25. package/dist/core/llm/state.d.ts.map +1 -1
  26. package/dist/core/llm/state.js +23 -11
  27. package/dist/core/llm/state.js.map +1 -1
  28. package/dist/index.d.ts +3 -3
  29. package/dist/index.d.ts.map +1 -1
  30. package/dist/index.js +1 -1
  31. package/dist/index.js.map +1 -1
  32. package/dist/lib/mime.d.ts +1 -1
  33. package/dist/lib/mime.d.ts.map +1 -1
  34. package/dist/lib/mime.js +7 -4
  35. package/dist/lib/mime.js.map +1 -1
  36. package/dist/plugins/agents/plugin.d.ts.map +1 -1
  37. package/dist/plugins/agents/plugin.js +7 -1
  38. package/dist/plugins/agents/plugin.js.map +1 -1
  39. package/dist/plugins/context-compact/context-compact.integration.test.js +54 -0
  40. package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
  41. package/dist/plugins/context-compact/context-compactor.d.ts +2 -0
  42. package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
  43. package/dist/plugins/context-compact/context-compactor.js +29 -0
  44. package/dist/plugins/context-compact/context-compactor.js.map +1 -1
  45. package/dist/plugins/context-compact/context-compactor.test.js +6 -0
  46. package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
  47. package/dist/plugins/limits-guard/config.d.ts +30 -0
  48. package/dist/plugins/limits-guard/config.d.ts.map +1 -1
  49. package/dist/plugins/limits-guard/index.d.ts +3 -3
  50. package/dist/plugins/limits-guard/index.d.ts.map +1 -1
  51. package/dist/plugins/limits-guard/index.js +1 -1
  52. package/dist/plugins/limits-guard/index.js.map +1 -1
  53. package/dist/plugins/limits-guard/limit-guard.d.ts +27 -1
  54. package/dist/plugins/limits-guard/limit-guard.d.ts.map +1 -1
  55. package/dist/plugins/limits-guard/limit-guard.js +67 -0
  56. package/dist/plugins/limits-guard/limit-guard.js.map +1 -1
  57. package/dist/plugins/limits-guard/limit-guard.test.js +65 -1
  58. package/dist/plugins/limits-guard/limit-guard.test.js.map +1 -1
  59. package/dist/plugins/limits-guard/limits-guard.integration.test.js +295 -1
  60. package/dist/plugins/limits-guard/limits-guard.integration.test.js.map +1 -1
  61. package/dist/plugins/limits-guard/plugin.d.ts +23 -2
  62. package/dist/plugins/limits-guard/plugin.d.ts.map +1 -1
  63. package/dist/plugins/limits-guard/plugin.js +107 -2
  64. package/dist/plugins/limits-guard/plugin.js.map +1 -1
  65. package/dist/plugins/mailbox/plugin.d.ts.map +1 -1
  66. package/dist/plugins/mailbox/plugin.js +18 -0
  67. package/dist/plugins/mailbox/plugin.js.map +1 -1
  68. package/dist/plugins/session-stats/plugin.d.ts.map +1 -1
  69. package/dist/plugins/session-stats/plugin.js +5 -1
  70. package/dist/plugins/session-stats/plugin.js.map +1 -1
  71. package/package.json +2 -2
  72. package/src/core/agents/agent.ts +18 -2
  73. package/src/core/context/state.ts +10 -0
  74. package/src/core/events/base-event-store.ts +2 -0
  75. package/src/core/events/metadata-utils.ts +2 -0
  76. package/src/core/llm/anthropic.test.ts +34 -0
  77. package/src/core/llm/cache-breakpoints.test.ts +55 -0
  78. package/src/core/llm/cache-breakpoints.ts +39 -21
  79. package/src/core/llm/state.ts +25 -11
  80. package/src/index.ts +3 -3
  81. package/src/lib/mime.ts +7 -4
  82. package/src/plugins/agents/plugin.ts +7 -1
  83. package/src/plugins/context-compact/context-compact.integration.test.ts +62 -0
  84. package/src/plugins/context-compact/context-compactor.test.ts +6 -0
  85. package/src/plugins/context-compact/context-compactor.ts +31 -0
  86. package/src/plugins/limits-guard/config.ts +35 -0
  87. package/src/plugins/limits-guard/index.ts +3 -3
  88. package/src/plugins/limits-guard/limit-guard.test.ts +80 -1
  89. package/src/plugins/limits-guard/limit-guard.ts +98 -1
  90. package/src/plugins/limits-guard/limits-guard.integration.test.ts +331 -1
  91. package/src/plugins/limits-guard/plugin.ts +153 -3
  92. package/src/plugins/mailbox/plugin.ts +18 -0
  93. package/src/plugins/session-stats/plugin.ts +5 -1
@@ -2,11 +2,17 @@ import { describe, expect, it } from 'bun:test'
2
2
  import { AgentId } from '~/core/agents/schema.js'
3
3
  import { agentEvents } from '~/core/agents/state.js'
4
4
  import { MockLLMProvider } from '~/core/llm/mock.js'
5
+ import type { InferenceRequest } from '~/core/llm/provider.js'
6
+ import { ModelId } from '~/core/llm/schema.js'
7
+ import { llmEvents } from '~/core/llm/state.js'
5
8
  import { selectPluginState } from '~/core/sessions/reducer.js'
6
9
  import { ToolCallId } from '~/core/tools/schema.js'
10
+ import { contextCompactPlugin } from '~/plugins/context-compact/index.js'
11
+ import { getAgentMailbox, selectMailboxState } from '~/plugins/mailbox/query.js'
12
+ import { mailboxEvents } from '~/plugins/mailbox/state.js'
7
13
  import { createMultiAgentPreset, createTestPreset, TestHarness } from '~/testing/index.js'
8
14
  import type { AgentCounters } from './plugin.js'
9
- import { limitsGuardPlugin } from './plugin.js'
15
+ import { limitsEvents, limitsGuardPlugin } from './plugin.js'
10
16
 
11
17
  function createLimitsHarness(options: Omit<ConstructorParameters<typeof TestHarness>[0], 'systemPlugins'>) {
12
18
  return new TestHarness({ ...options, systemPlugins: [limitsGuardPlugin] })
@@ -434,4 +440,328 @@ describe('limits-guard plugin', () => {
434
440
  await harness.shutdown()
435
441
  })
436
442
  })
443
+
444
+ // =========================================================================
445
+ // budgets (cost / tokens)
446
+ // =========================================================================
447
+
448
+ describe('budgets', () => {
449
+ it('agent exceeding cost budget → paused with budget_exceeded event', async () => {
450
+ let n = 0
451
+ const harness = createLimitsHarness({
452
+ presets: [createTestPreset({
453
+ orchestratorSystem: 'Test agent.',
454
+ // $0.50 per call, $1.00 budget → pauses before the 3rd call.
455
+ orchestratorPlugins: [limitsGuardPlugin.configureAgent({ limits: { maxCost: 1.0, maxTurns: 100 } })],
456
+ })],
457
+ mockHandler: () => {
458
+ n++
459
+ return {
460
+ content: null,
461
+ toolCalls: [{ id: ToolCallId(`tc${n}`), name: 'tell_user', input: { message: `Turn ${n}` } }],
462
+ finishReason: 'stop',
463
+ metrics: MockLLMProvider.defaultMetricsWithCost(0.5),
464
+ }
465
+ },
466
+ })
467
+
468
+ const session = await harness.createSession('test')
469
+ const entryAgentId = session.getEntryAgentId()!
470
+ await session.sendMessage('Start')
471
+ await waitForAgentPaused(session, entryAgentId)
472
+
473
+ expect(session.state.agents.get(entryAgentId)!.status).toBe('paused')
474
+
475
+ const counters = selectPluginState<Map<AgentId, AgentCounters>>(session.state, 'agentLimits')?.get(entryAgentId)
476
+ expect(counters!.costSpent).toBeGreaterThanOrEqual(1.0)
477
+
478
+ const budgetEvents = await session.getEventsByType(limitsEvents, 'budget_exceeded')
479
+ const evt = budgetEvents.find(e => e.agentId === entryAgentId)
480
+ expect(evt).toBeDefined()
481
+ expect(evt!.scope).toBe('agent')
482
+ expect(evt!.limitName).toBe('maxCost')
483
+
484
+ await harness.shutdown()
485
+ })
486
+
487
+ it('costSpent is preserved across resume — budget cannot be bypassed by pausing', async () => {
488
+ let n = 0
489
+ const harness = createLimitsHarness({
490
+ presets: [createTestPreset({
491
+ orchestratorSystem: 'Test agent.',
492
+ orchestratorPlugins: [limitsGuardPlugin.configureAgent({ limits: { maxCost: 1.0, maxTurns: 100 } })],
493
+ })],
494
+ mockHandler: () => {
495
+ n++
496
+ return {
497
+ content: null,
498
+ toolCalls: [{ id: ToolCallId(`tc${n}`), name: 'tell_user', input: { message: `Turn ${n}` } }],
499
+ finishReason: 'stop',
500
+ metrics: MockLLMProvider.defaultMetricsWithCost(0.5),
501
+ }
502
+ },
503
+ })
504
+
505
+ const session = await harness.createSession('test')
506
+ const entryAgentId = session.getEntryAgentId()!
507
+ await session.sendMessage('Start')
508
+ await waitForAgentPaused(session, entryAgentId)
509
+
510
+ const before = selectPluginState<Map<AgentId, AgentCounters>>(session.state, 'agentLimits')?.get(entryAgentId)
511
+ expect(before).toBeDefined()
512
+ expect(before!.costSpent).toBeGreaterThanOrEqual(1.0)
513
+
514
+ await session.callPluginMethod('agents.resume', { agentId: String(entryAgentId) })
515
+ // Budget is still exhausted → agent pauses again immediately without inferring.
516
+ await waitForAgentPaused(session, entryAgentId)
517
+
518
+ const after = selectPluginState<Map<AgentId, AgentCounters>>(session.state, 'agentLimits')?.get(entryAgentId)
519
+ expect(after).toBeDefined()
520
+ // Anti-looping counter reset…
521
+ expect(after!.inferenceCount).toBe(0)
522
+ // …but spend preserved, so the cap is not bypassable.
523
+ expect(after!.costSpent).toBeGreaterThanOrEqual(before!.costSpent)
524
+
525
+ await harness.shutdown()
526
+ })
527
+
528
+ it('child pausing on budget → parent is notified via a child-paused message', async () => {
529
+ let orchestratorCalls = 0
530
+ let workerCalls = 0
531
+ const harness = createLimitsHarness({
532
+ presets: [createTestPreset({
533
+ orchestratorSystem: 'Orchestrator agent.',
534
+ agents: [{
535
+ name: 'worker',
536
+ system: 'Worker agent.',
537
+ tools: [],
538
+ agents: [],
539
+ // $0.50 per call, $0.50 budget → pauses at the 2nd inference's
540
+ // beforeInference (after one completed call spent the budget).
541
+ plugins: [limitsGuardPlugin.configureAgent({ limits: { maxCost: 0.5, maxTurns: 100 } })],
542
+ }],
543
+ })],
544
+ mockHandler: (request) => {
545
+ // Worker: keep spending until the budget pauses it.
546
+ if (request.systemPrompt.includes('Worker agent.')) {
547
+ workerCalls++
548
+ return {
549
+ content: null,
550
+ toolCalls: [{ id: ToolCallId(`w${workerCalls}`), name: 'tell_user', input: { message: `Work ${workerCalls}` } }],
551
+ finishReason: 'stop',
552
+ metrics: MockLLMProvider.defaultMetricsWithCost(0.5),
553
+ }
554
+ }
555
+ // Orchestrator: spawn the worker exactly once, then idle.
556
+ orchestratorCalls++
557
+ if (orchestratorCalls === 1) {
558
+ return {
559
+ content: null,
560
+ toolCalls: [{ id: ToolCallId('spawn'), name: 'start_worker', input: { message: 'Do work' } }],
561
+ finishReason: 'stop',
562
+ metrics: MockLLMProvider.defaultMetrics(),
563
+ }
564
+ }
565
+ return { content: 'Waiting', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
566
+ },
567
+ })
568
+
569
+ const session = await harness.createSession('test')
570
+ await session.sendMessage('Start')
571
+ await waitForAgentPaused(session, AgentId('worker_1'))
572
+
573
+ const orchestratorId = session.getEntryAgentId()!
574
+ // The mailbox plugin's onPause hook reports the pause to the parent.
575
+ // onPause runs *after* the agent_paused event (which flips status to
576
+ // 'paused'), so poll for the notification.
577
+ const findNotice = async () =>
578
+ (await session.getEventsByType(mailboxEvents, 'mailbox_message')).find(m =>
579
+ m.toAgentId === orchestratorId
580
+ && m.message.from === AgentId('worker_1')
581
+ && m.message.content.includes('<child-paused')
582
+ && m.message.content.includes('worker_1'),
583
+ )
584
+ let notice = await findNotice()
585
+ const deadline = Date.now() + 5000
586
+ while (!notice && Date.now() < deadline) {
587
+ await new Promise(r => setTimeout(r, 20))
588
+ notice = await findNotice()
589
+ }
590
+ expect(notice).toBeDefined()
591
+
592
+ await harness.shutdown()
593
+ })
594
+
595
+ it('child-paused notice is actually consumed by a parent that already went idle', async () => {
596
+ // Regression guard for the lifecycle: a parent that finished its work is
597
+ // NOT in a terminal "complete" state — it's persisted as `pending` with an
598
+ // empty mailbox. When the child pauses and delivers <child-paused>, the
599
+ // dequeue check flips the parent's decide() from "complete" back to "infer",
600
+ // so the parent wakes and reads the message rather than leaving it unconsumed.
601
+ let workerCalls = 0
602
+ let orchestratorSawChildPaused = false
603
+
604
+ const requestHasChildPaused = (request: InferenceRequest): boolean =>
605
+ request.messages.some((m) => {
606
+ const c = typeof m.content === 'string' ? m.content : JSON.stringify(m.content)
607
+ return c.includes('<child-paused')
608
+ })
609
+
610
+ const harness = createLimitsHarness({
611
+ presets: [createTestPreset({
612
+ orchestratorSystem: 'Orchestrator agent.',
613
+ agents: [{
614
+ name: 'worker',
615
+ system: 'Worker agent.',
616
+ tools: [],
617
+ agents: [],
618
+ plugins: [limitsGuardPlugin.configureAgent({ limits: { maxCost: 0.5, maxTurns: 100 } })],
619
+ }],
620
+ })],
621
+ mockHandler: (request) => {
622
+ if (request.systemPrompt.includes('Worker agent.')) {
623
+ workerCalls++
624
+ return {
625
+ content: null,
626
+ toolCalls: [{ id: ToolCallId(`w${workerCalls}`), name: 'tell_user', input: { message: `Work ${workerCalls}` } }],
627
+ finishReason: 'stop',
628
+ metrics: MockLLMProvider.defaultMetricsWithCost(0.5),
629
+ }
630
+ }
631
+ // Orchestrator: spawn the worker once, then go idle. Any later wake-up
632
+ // is driven by an incoming message — record if it carried the notice.
633
+ if (requestHasChildPaused(request)) orchestratorSawChildPaused = true
634
+ if (workerCalls === 0) {
635
+ return {
636
+ content: null,
637
+ toolCalls: [{ id: ToolCallId('spawn'), name: 'start_worker', input: { message: 'Do work' } }],
638
+ finishReason: 'stop',
639
+ metrics: MockLLMProvider.defaultMetrics(),
640
+ }
641
+ }
642
+ return { content: 'Acknowledged', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
643
+ },
644
+ })
645
+
646
+ const session = await harness.createSession('test')
647
+ await session.sendMessage('Start')
648
+ await waitForAgentPaused(session, AgentId('worker_1'))
649
+
650
+ // The parent should wake from idle and run an inference that includes the
651
+ // <child-paused> message — proving the notice is consumed, not orphaned.
652
+ const deadline = Date.now() + 5000
653
+ while (!orchestratorSawChildPaused && Date.now() < deadline) {
654
+ await new Promise(r => setTimeout(r, 20))
655
+ }
656
+ expect(orchestratorSawChildPaused).toBe(true)
657
+
658
+ // And the message is marked consumed in the parent's mailbox.
659
+ const orchestratorId = session.getEntryAgentId()!
660
+ const mailbox = getAgentMailbox(selectMailboxState(session.state), orchestratorId)
661
+ const childPausedMsg = mailbox.find((m) => m.content.includes('<child-paused'))
662
+ expect(childPausedMsg).toBeDefined()
663
+ expect(childPausedMsg!.consumed).toBe(true)
664
+
665
+ await harness.shutdown()
666
+ })
667
+
668
+ it('compaction (auxiliary inference) cost counts toward the budget', async () => {
669
+ // The compaction summarization is a real, billed LLM call routed through
670
+ // runAuxiliaryInference → auxiliary_inference_completed. It must be charged
671
+ // against the cost budget, otherwise an agent could spend unboundedly on
672
+ // compaction without ever tripping its cap.
673
+ const REGULAR_COST = 0.1
674
+ const SUMMARY_COST = 5.0
675
+
676
+ // Compaction request detection: inline compaction appends a trailing user
677
+ // message containing the summarization marker.
678
+ const isSummarizationRequest = (request: InferenceRequest): boolean => {
679
+ const last = request.messages[request.messages.length - 1]
680
+ if (!last || last.role !== 'user') return false
681
+ const content = typeof last.content === 'string' ? last.content : JSON.stringify(last.content)
682
+ return content.includes('[CONTEXT COMPACTION REQUEST]')
683
+ }
684
+
685
+ const harness = new TestHarness({
686
+ systemPlugins: [contextCompactPlugin, limitsGuardPlugin],
687
+ presets: [createTestPreset({
688
+ orchestratorSystem: 'Test agent.',
689
+ plugins: [
690
+ contextCompactPlugin.configure({
691
+ compaction: { model: ModelId('mock'), maxTokens: 10, keepRecentMessages: 2 },
692
+ }),
693
+ ],
694
+ // Budget large enough to survive the cheap regular turns but small
695
+ // enough that one expensive summarization call blows past it.
696
+ orchestratorPlugins: [
697
+ limitsGuardPlugin.configureAgent({ limits: { maxCost: 2.0, maxTurns: 100 } }),
698
+ ],
699
+ })],
700
+ mockHandler: (request) => {
701
+ if (isSummarizationRequest(request)) {
702
+ return {
703
+ content: 'Summary of conversation so far.',
704
+ toolCalls: [],
705
+ finishReason: 'stop',
706
+ metrics: MockLLMProvider.defaultMetricsWithCost(SUMMARY_COST),
707
+ }
708
+ }
709
+ return {
710
+ content: 'Agent response with some content to increase token count.',
711
+ toolCalls: [],
712
+ finishReason: 'stop',
713
+ metrics: MockLLMProvider.defaultMetricsWithCost(REGULAR_COST),
714
+ }
715
+ },
716
+ })
717
+
718
+ const session = await harness.createSession('test')
719
+ const entryAgentId = session.getEntryAgentId()!
720
+
721
+ // Returns once the agent is either idle or paused — used because we don't
722
+ // know up front whether the compaction cost trips the budget on the same
723
+ // turn (depends on beforeInference hook ordering) or on the next one.
724
+ const waitForIdleOrPaused = async (timeoutMs = 10000): Promise<'idle' | 'paused'> => {
725
+ const deadline = Date.now() + timeoutMs
726
+ while (Date.now() < deadline) {
727
+ const st = session.state.agents.get(entryAgentId)
728
+ if (st?.status === 'paused') return 'paused'
729
+ if (st?.status === 'pending' && st.pendingToolCalls.length === 0 && st.pendingToolResults.length === 0) {
730
+ return 'idle'
731
+ }
732
+ await new Promise(r => setTimeout(r, 10))
733
+ }
734
+ throw new Error('waitForIdleOrPaused timed out')
735
+ }
736
+
737
+ await session.sendAndWaitForIdle('First message')
738
+ await session.sendAndWaitForIdle('Second message')
739
+ // Third message triggers compaction (the expensive summarization call).
740
+ // It may pause on this turn or settle idle and pause on the next one.
741
+ await session.sendMessage('Third message to trigger compaction')
742
+ if (await waitForIdleOrPaused() === 'idle') {
743
+ await session.sendMessage('Fourth message')
744
+ }
745
+ await waitForAgentPaused(session, entryAgentId)
746
+
747
+ // Compaction genuinely ran and was billed.
748
+ const auxEvents = await session.getEventsByType(llmEvents, 'auxiliary_inference_completed')
749
+ expect(auxEvents.some((e) => e.metrics.cost === SUMMARY_COST)).toBe(true)
750
+
751
+ // The summarization cost is reflected in the agent's tracked spend…
752
+ const counters = selectPluginState<Map<AgentId, AgentCounters>>(session.state, 'agentLimits')?.get(entryAgentId)
753
+ expect(counters).toBeDefined()
754
+ expect(counters!.costSpent).toBeGreaterThanOrEqual(SUMMARY_COST)
755
+
756
+ // …and it tripped the cost budget (the regular turns alone, at 0.1 each,
757
+ // could never reach the 2.0 cap on their own here).
758
+ const budgetEvents = await session.getEventsByType(limitsEvents, 'budget_exceeded')
759
+ const evt = budgetEvents.find((e) => e.agentId === entryAgentId)
760
+ expect(evt).toBeDefined()
761
+ expect(evt!.scope).toBe('agent')
762
+ expect(evt!.limitName).toBe('maxCost')
763
+
764
+ await harness.shutdown()
765
+ })
766
+ })
437
767
  })
@@ -1,5 +1,6 @@
1
1
  import type { AgentId } from '~/core/agents/schema.js'
2
2
  import { agentEvents } from '~/core/agents/state.js'
3
+ import { contextEvents } from '~/core/context/state.js'
3
4
  import { llmEvents } from '~/core/llm/state.js'
4
5
  import { definePlugin } from '~/core/plugins/plugin-builder.js'
5
6
  import { selectPluginState } from '~/core/sessions/reducer.js'
@@ -7,8 +8,15 @@ import type { SessionState } from '~/core/sessions/state.js'
7
8
  import { toolEvents } from '~/core/tools/state.js'
8
9
  import { responseFingerprint, toolCallFingerprint } from '~/lib/utils/hash.js'
9
10
  import { mailboxEvents } from '~/plugins/mailbox/state.js'
10
- import type { AgentLimits } from './config.js'
11
- import { checkLimits, countConsecutiveTailDuplicates, resolveAgentLimits } from './limit-guard.js'
11
+ import type { AgentLimits, LimitsSessionConfig } from './config.js'
12
+ import {
13
+ type BudgetSpend,
14
+ checkBudget,
15
+ checkLimits,
16
+ countConsecutiveTailDuplicates,
17
+ resolveAgentLimits,
18
+ resolveSessionLimits,
19
+ } from './limit-guard.js'
12
20
 
13
21
  // ============================================================================
14
22
  // Agent counters (state)
@@ -19,6 +27,12 @@ export interface AgentCounters {
19
27
  toolCallCount: number
20
28
  spawnedAgentCount: number
21
29
  messagesSentCount: number
30
+ /** Number of context compaction events for this agent. */
31
+ compactionCount: number
32
+ /** Cumulative LLM cost (USD) summed from inference metrics. NOT reset on resume. */
33
+ costSpent: number
34
+ /** Cumulative total tokens (prompt + completion). NOT reset on resume. */
35
+ tokensUsed: number
22
36
  /** Tool name → consecutive failure count + last error message. Reset on success. */
23
37
  consecutiveToolFailures: Record<string, { count: number; lastError: string }>
24
38
  /** Ring buffer of last 20 tool call fingerprints ("toolName:inputHash") */
@@ -32,11 +46,25 @@ export const createAgentCounters = (): AgentCounters => ({
32
46
  toolCallCount: 0,
33
47
  spawnedAgentCount: 0,
34
48
  messagesSentCount: 0,
49
+ compactionCount: 0,
50
+ costSpent: 0,
51
+ tokensUsed: 0,
35
52
  consecutiveToolFailures: {},
36
53
  recentToolCallHashes: [],
37
54
  recentResponseHashes: [],
38
55
  })
39
56
 
57
+ /** Sum cost + tokens across all agents in the session (for the session-wide budget). */
58
+ export function sumSessionSpend(limits: Map<AgentId, AgentCounters>): BudgetSpend {
59
+ let costSpent = 0
60
+ let tokensUsed = 0
61
+ for (const counters of limits.values()) {
62
+ costSpent += counters.costSpent
63
+ tokensUsed += counters.tokensUsed
64
+ }
65
+ return { costSpent, tokensUsed }
66
+ }
67
+
40
68
  /**
41
69
  * Extract agent counters from session state (for external consumers).
42
70
  */
@@ -61,10 +89,21 @@ export const limitsEvents = createEventsFactory({
61
89
  hardLimit: z.number(),
62
90
  message: z.string(),
63
91
  }),
92
+ budget_exceeded: z.object({
93
+ agentId: agentIdSchema,
94
+ /** Whether the per-agent or the session-wide budget was hit. */
95
+ scope: z.enum(['agent', 'session']),
96
+ /** Which limit tripped: maxCost / maxTokens / maxSessionCost / maxSessionTokens. */
97
+ limitName: z.string(),
98
+ spent: z.number(),
99
+ limit: z.number(),
100
+ message: z.string(),
101
+ }),
64
102
  },
65
103
  })
66
104
 
67
105
  export type LimitWarningEvent = (typeof limitsEvents)['Events']['limit_warning']
106
+ export type BudgetExceededEvent = (typeof limitsEvents)['Events']['budget_exceeded']
68
107
 
69
108
  // ============================================================================
70
109
  // Helper
@@ -95,7 +134,8 @@ export interface LimitsAgentConfig {
95
134
  }
96
135
 
97
136
  export const limitsGuardPlugin = definePlugin('limits-guard')
98
- .events([agentEvents, llmEvents, toolEvents, mailboxEvents])
137
+ .events([agentEvents, llmEvents, toolEvents, mailboxEvents, contextEvents, limitsEvents])
138
+ .pluginConfig<LimitsSessionConfig>()
99
139
  .state({
100
140
  key: 'agentLimits',
101
141
  initial: (): Map<AgentId, AgentCounters> => new Map(),
@@ -156,11 +196,41 @@ export const limitsGuardPlugin = definePlugin('limits-guard')
156
196
  newLimits.set(event.agentId, {
157
197
  ...counters,
158
198
  inferenceCount: counters.inferenceCount + 1,
199
+ costSpent: counters.costSpent + (event.metrics.cost ?? 0),
200
+ tokensUsed: counters.tokensUsed + (event.metrics.totalTokens ?? 0),
159
201
  recentResponseHashes: newRecentResponseHashes,
160
202
  })
161
203
  return newLimits
162
204
  }
163
205
 
206
+ case 'auxiliary_inference_completed': {
207
+ // Side-channel calls (e.g. context compaction) are billed but don't
208
+ // touch conversation state, so they count toward cost/token budgets
209
+ // but NOT toward inferenceCount or the anti-looping response hashes.
210
+ const counters = limits.get(event.agentId)
211
+ if (!counters) return limits
212
+
213
+ const newLimits = new Map(limits)
214
+ newLimits.set(event.agentId, {
215
+ ...counters,
216
+ costSpent: counters.costSpent + (event.metrics.cost ?? 0),
217
+ tokensUsed: counters.tokensUsed + (event.metrics.totalTokens ?? 0),
218
+ })
219
+ return newLimits
220
+ }
221
+
222
+ case 'context_compacted': {
223
+ const counters = limits.get(event.agentId)
224
+ if (!counters) return limits
225
+
226
+ const newLimits = new Map(limits)
227
+ newLimits.set(event.agentId, {
228
+ ...counters,
229
+ compactionCount: counters.compactionCount + 1,
230
+ })
231
+ return newLimits
232
+ }
233
+
164
234
  case 'tool_started': {
165
235
  const counters = limits.get(event.agentId)
166
236
  if (!counters) return limits
@@ -214,6 +284,11 @@ export const limitsGuardPlugin = definePlugin('limits-guard')
214
284
  const counters = limits.get(event.agentId)
215
285
  if (!counters) return limits
216
286
 
287
+ // Reset the anti-looping counters so the agent can make progress
288
+ // again. Budget spend (costSpent/tokensUsed) is deliberately NOT
289
+ // reset — otherwise a per-agent or session cost cap could be bypassed
290
+ // by repeatedly pausing and resuming. To grant more budget, raise the
291
+ // configured limit instead.
217
292
  const newLimits = new Map(limits)
218
293
  newLimits.set(event.agentId, {
219
294
  ...counters,
@@ -221,6 +296,7 @@ export const limitsGuardPlugin = definePlugin('limits-guard')
221
296
  toolCallCount: 0,
222
297
  spawnedAgentCount: 0,
223
298
  messagesSentCount: 0,
299
+ compactionCount: 0,
224
300
  consecutiveToolFailures: {},
225
301
  recentToolCallHashes: [],
226
302
  recentResponseHashes: [],
@@ -234,6 +310,56 @@ export const limitsGuardPlugin = definePlugin('limits-guard')
234
310
  },
235
311
  })
236
312
  .agentConfig<LimitsAgentConfig>()
313
+ .hook('beforeInference', async (ctx) => {
314
+ // Budgets are enforced here (before the call) so an exhausted agent is
315
+ // paused before spending more — cost/tokens of a call aren't known until
316
+ // after it returns, so we stop the *next* call once the threshold is hit.
317
+ const counters = ctx.pluginState.get(ctx.agentId)
318
+ if (!counters) return null
319
+
320
+ const agentLimits = resolveAgentLimits(ctx.pluginAgentConfig?.limits)
321
+ const agentCheck = checkBudget(
322
+ counters,
323
+ agentLimits.maxCost,
324
+ agentLimits.maxTokens,
325
+ agentLimits.softLimitRatio,
326
+ { cost: 'maxCost', tokens: 'maxTokens' },
327
+ )
328
+ if (agentCheck.status === 'hard_limit') {
329
+ await ctx.emitEvent(limitsEvents.create('budget_exceeded', {
330
+ agentId: ctx.agentId,
331
+ scope: 'agent',
332
+ limitName: agentCheck.limitName,
333
+ spent: agentCheck.currentValue,
334
+ limit: agentCheck.hardLimit,
335
+ message: agentCheck.reason,
336
+ }))
337
+ return { action: 'pause', reason: `Agent budget exceeded — ${agentCheck.reason}` }
338
+ }
339
+
340
+ const sessionLimits = resolveSessionLimits(ctx.pluginConfig)
341
+ const sessionSpend = sumSessionSpend(ctx.pluginState)
342
+ const sessionCheck = checkBudget(
343
+ sessionSpend,
344
+ sessionLimits.maxSessionCost,
345
+ sessionLimits.maxSessionTokens,
346
+ sessionLimits.softLimitRatio,
347
+ { cost: 'maxSessionCost', tokens: 'maxSessionTokens' },
348
+ )
349
+ if (sessionCheck.status === 'hard_limit') {
350
+ await ctx.emitEvent(limitsEvents.create('budget_exceeded', {
351
+ agentId: ctx.agentId,
352
+ scope: 'session',
353
+ limitName: sessionCheck.limitName,
354
+ spent: sessionCheck.currentValue,
355
+ limit: sessionCheck.hardLimit,
356
+ message: sessionCheck.reason,
357
+ }))
358
+ return { action: 'pause', reason: `Session budget exceeded — ${sessionCheck.reason}` }
359
+ }
360
+
361
+ return null
362
+ })
237
363
  .hook('afterInference', async (ctx) => {
238
364
  const resolvedLimits = resolveAgentLimits(ctx.pluginAgentConfig?.limits)
239
365
  const counters = ctx.pluginState.get(ctx.agentId)
@@ -301,6 +427,30 @@ export const limitsGuardPlugin = definePlugin('limits-guard')
301
427
  )
302
428
  }
303
429
 
430
+ // Budget soft warnings — per-agent spend, then the session-wide budget.
431
+ const agentBudget = checkBudget(
432
+ counters,
433
+ resolvedLimits.maxCost,
434
+ resolvedLimits.maxTokens,
435
+ resolvedLimits.softLimitRatio,
436
+ { cost: 'maxCost', tokens: 'maxTokens' },
437
+ )
438
+ if (agentBudget.status === 'soft_warning') {
439
+ parts.push(`⚠️ ${agentBudget.message}. You will be paused when the budget is reached — wrap up.`)
440
+ }
441
+
442
+ const sessionLimits = resolveSessionLimits(ctx.pluginConfig)
443
+ const sessionBudget = checkBudget(
444
+ sumSessionSpend(ctx.pluginState),
445
+ sessionLimits.maxSessionCost,
446
+ sessionLimits.maxSessionTokens,
447
+ sessionLimits.softLimitRatio,
448
+ { cost: 'maxSessionCost', tokens: 'maxSessionTokens' },
449
+ )
450
+ if (sessionBudget.status === 'soft_warning') {
451
+ parts.push(`⚠️ Session-wide ${sessionBudget.message}.`)
452
+ }
453
+
304
454
  return parts.length > 0 ? parts.join('\n\n') : null
305
455
  })
306
456
  .build()
@@ -228,6 +228,24 @@ export const mailboxPlugin = definePlugin("mailbox")
228
228
  });
229
229
  return null;
230
230
  })
231
+ .hook("onPause", async (ctx) => {
232
+ // Notify the parent immediately when a child pauses (budget/limit exhaustion,
233
+ // manual pause, …) so it can react: resume after addressing the cause,
234
+ // reassign the work, or stop. Lives here (not in the agents plugin) because
235
+ // the agents plugin is disabled on leaf agents that can't spawn — but those
236
+ // are exactly the agents that pause and need to report upward. mailbox is
237
+ // enabled on every agent, and self.send wakes the parent. Root agents
238
+ // (no parent) have no one to notify.
239
+ const parentId = ctx.agentState.parentId;
240
+ if (!parentId || !ctx.sessionState.agents.has(parentId)) return null;
241
+
242
+ await ctx.self.send({
243
+ fromAgentId: ctx.agentId,
244
+ toAgentId: parentId,
245
+ content: `<child-paused agent="${ctx.agentId}">${ctx.reason ?? "no reason given"}</child-paused>`,
246
+ });
247
+ return null;
248
+ })
231
249
  .systemPrompt((ctx) => {
232
250
  const role = getAgentRole(ctx.agentState, ctx.sessionState);
233
251
  switch (role) {
@@ -79,7 +79,11 @@ export const sessionStatsPlugin = definePlugin('session-stats')
79
79
  case 'agent_spawned':
80
80
  return withTimestamp({ agentCount: stats.agentCount + 1 })
81
81
 
82
- case 'inference_completed': {
82
+ // inference_completed = main agent turns; auxiliary_inference_completed =
83
+ // side-channel calls (e.g. context compaction). Both are billed LLM
84
+ // calls, so both feed the same usage/cost accounting.
85
+ case 'inference_completed':
86
+ case 'auxiliary_inference_completed': {
83
87
  const provider = event.metrics.provider
84
88
  const byProvider = provider
85
89
  ? {