aisoc 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisoc/__init__.py +108 -0
- aisoc/agents/__init__.py +60 -0
- aisoc/agents/base.py +328 -0
- aisoc/agents/campaign_detector.py +226 -0
- aisoc/agents/detection_eng.py +380 -0
- aisoc/agents/ir_lead.py +256 -0
- aisoc/agents/soc_manager.py +354 -0
- aisoc/agents/threat_hunter.py +435 -0
- aisoc/agents/threat_intel.py +249 -0
- aisoc/agents/tier2.py +249 -0
- aisoc/agents/triage.py +175 -0
- aisoc/backtest.py +174 -0
- aisoc/bus.py +216 -0
- aisoc/case_memory.py +1418 -0
- aisoc/config.py +25 -0
- aisoc/extract.py +63 -0
- aisoc/hitl_store.py +260 -0
- aisoc/py.typed +0 -0
- aisoc/redis_bus.py +146 -0
- aisoc/schemas.py +431 -0
- aisoc/seams.py +108 -0
- aisoc/verdict_store.py +126 -0
- aisoc-0.1.0.dist-info/METADATA +306 -0
- aisoc-0.1.0.dist-info/RECORD +26 -0
- aisoc-0.1.0.dist-info/WHEEL +4 -0
- aisoc-0.1.0.dist-info/licenses/LICENSE +21 -0
aisoc/__init__.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""aisoc — an AI SOC kernel.
|
|
2
|
+
|
|
3
|
+
A SOC modeled as a *team*, not a classifier: a set of role-based LLM agents —
|
|
4
|
+
triage, Tier 2, IR Lead, Threat Intel, Threat Hunter, Detection Engineer, and a
|
|
5
|
+
SOC Manager over the top — that work a case together over a shared, event-sourced
|
|
6
|
+
bus, with a human in the loop on every consequential action.
|
|
7
|
+
|
|
8
|
+
aisoc owns the kernel: the event contract, the bus, the case-memory read models,
|
|
9
|
+
and the agent framework. It owns none of your environment — you inject three
|
|
10
|
+
seams (see :mod:`aisoc.seams`): an LLM, an alert source, and a tool registry.
|
|
11
|
+
|
|
12
|
+
Quick start (zero infrastructure)::
|
|
13
|
+
|
|
14
|
+
from aisoc import InMemoryBus, AlertTriaged, STREAM_TRIAGE, parse_event
|
|
15
|
+
|
|
16
|
+
bus = InMemoryBus()
|
|
17
|
+
bus.publish(STREAM_TRIAGE, AlertTriaged(
|
|
18
|
+
correlation_id="TICKET-1", produced_by="triage", ticket_id="TICKET-1",
|
|
19
|
+
verdict="false_positive", confidence=0.91, summary="benign scanner",
|
|
20
|
+
))
|
|
21
|
+
|
|
22
|
+
for raw in bus.replay(): # read the audit log back
|
|
23
|
+
print(parse_event(raw).event_type)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from aisoc.bus import (
|
|
27
|
+
ALL_STREAMS,
|
|
28
|
+
STREAM_ALERTS,
|
|
29
|
+
STREAM_AUDIT,
|
|
30
|
+
STREAM_CASES,
|
|
31
|
+
STREAM_TRIAGE,
|
|
32
|
+
Bus,
|
|
33
|
+
InMemoryBus,
|
|
34
|
+
)
|
|
35
|
+
from aisoc.schemas import (
|
|
36
|
+
EVENT_TYPES,
|
|
37
|
+
VALID_SEVERITIES,
|
|
38
|
+
VALID_VERDICTS,
|
|
39
|
+
ActionDecision,
|
|
40
|
+
ActionProposed,
|
|
41
|
+
AlertReceived,
|
|
42
|
+
AlertTriaged,
|
|
43
|
+
BusEvent,
|
|
44
|
+
CampaignDetected,
|
|
45
|
+
CaseEscalated,
|
|
46
|
+
DetectionTuningReport,
|
|
47
|
+
HuntingReport,
|
|
48
|
+
IRPlan,
|
|
49
|
+
Severity,
|
|
50
|
+
ShiftSummary,
|
|
51
|
+
ThreatIntelReport,
|
|
52
|
+
Tier2Analysis,
|
|
53
|
+
Verdict,
|
|
54
|
+
parse_event,
|
|
55
|
+
)
|
|
56
|
+
from aisoc.seams import AlertSource, ChatModel, StructuredChatModel, ToolProvider
|
|
57
|
+
|
|
58
|
+
# Case memory — the event-log read models + their sidecar stores. Imported so
|
|
59
|
+
# they're part of the documented surface: `from aisoc import case_memory`.
|
|
60
|
+
from aisoc import case_memory, hitl_store, verdict_store
|
|
61
|
+
from aisoc.config import data_dir
|
|
62
|
+
from aisoc.extract import extract_indicators, parse_event_ts
|
|
63
|
+
|
|
64
|
+
__version__ = "0.1.0"
|
|
65
|
+
|
|
66
|
+
__all__ = [
|
|
67
|
+
# Bus
|
|
68
|
+
"Bus",
|
|
69
|
+
"InMemoryBus",
|
|
70
|
+
"STREAM_ALERTS",
|
|
71
|
+
"STREAM_TRIAGE",
|
|
72
|
+
"STREAM_CASES",
|
|
73
|
+
"STREAM_AUDIT",
|
|
74
|
+
"ALL_STREAMS",
|
|
75
|
+
# Event contract
|
|
76
|
+
"BusEvent",
|
|
77
|
+
"AlertReceived",
|
|
78
|
+
"AlertTriaged",
|
|
79
|
+
"CaseEscalated",
|
|
80
|
+
"Tier2Analysis",
|
|
81
|
+
"IRPlan",
|
|
82
|
+
"ActionProposed",
|
|
83
|
+
"ActionDecision",
|
|
84
|
+
"ThreatIntelReport",
|
|
85
|
+
"DetectionTuningReport",
|
|
86
|
+
"HuntingReport",
|
|
87
|
+
"CampaignDetected",
|
|
88
|
+
"ShiftSummary",
|
|
89
|
+
"Verdict",
|
|
90
|
+
"VALID_VERDICTS",
|
|
91
|
+
"Severity",
|
|
92
|
+
"VALID_SEVERITIES",
|
|
93
|
+
"EVENT_TYPES",
|
|
94
|
+
"parse_event",
|
|
95
|
+
# Injection seams
|
|
96
|
+
"ChatModel",
|
|
97
|
+
"StructuredChatModel",
|
|
98
|
+
"AlertSource",
|
|
99
|
+
"ToolProvider",
|
|
100
|
+
# Case memory
|
|
101
|
+
"case_memory",
|
|
102
|
+
"verdict_store",
|
|
103
|
+
"hitl_store",
|
|
104
|
+
"data_dir",
|
|
105
|
+
"extract_indicators",
|
|
106
|
+
"parse_event_ts",
|
|
107
|
+
"__version__",
|
|
108
|
+
]
|
aisoc/agents/__init__.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""aisoc role agents — the bus-consuming team over injected seams.
|
|
2
|
+
|
|
3
|
+
Part of the ``agent`` extra (``pip install "aisoc[agent]"``). Two shapes:
|
|
4
|
+
|
|
5
|
+
**Per-ticket roles** subclass :class:`~aisoc.agents.base.Agent` and are
|
|
6
|
+
constructed with an injected bus, chat model, and tool provider. They consume
|
|
7
|
+
events and publish follow-ups:
|
|
8
|
+
|
|
9
|
+
from aisoc import InMemoryBus
|
|
10
|
+
from aisoc.agents import TriageAgent, Tier2Agent, IRLeadAgent, ThreatIntelAgent
|
|
11
|
+
|
|
12
|
+
TriageAgent(bus=bus, model=model, tools=tools).run() # alerts -> verdicts
|
|
13
|
+
|
|
14
|
+
The chain is: ``TriageAgent`` (alert → first-pass verdict) → ``Tier2Agent``
|
|
15
|
+
(deeper look, escalate?) → ``IRLeadAgent`` (containment plan + a human-gated
|
|
16
|
+
``ActionProposed``) → ``ThreatIntelAgent`` (actor attribution).
|
|
17
|
+
|
|
18
|
+
**Windowed roles** are scheduled ``run_once(*, bus, model=...)`` functions that
|
|
19
|
+
replay the audit log over a time window and publish a report — call them on a
|
|
20
|
+
timer rather than running a consume loop:
|
|
21
|
+
|
|
22
|
+
from aisoc.agents import detection_eng, soc_manager, threat_hunter, campaign_detector
|
|
23
|
+
|
|
24
|
+
detection_eng.run_once(bus=bus, model=model, window_hours=24)
|
|
25
|
+
soc_manager.run_once(bus=bus, model=model, window_hours=8)
|
|
26
|
+
threat_hunter.run_once(bus=bus, model=model, window_hours=24)
|
|
27
|
+
campaign_detector.run_once(bus=bus, model=model, window_days=14)
|
|
28
|
+
|
|
29
|
+
Everything is injected — no role touches your environment directly. The same
|
|
30
|
+
code runs in a unit test against a stub, in the demo, or in production.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from aisoc.agents import (
|
|
34
|
+
campaign_detector,
|
|
35
|
+
detection_eng,
|
|
36
|
+
soc_manager,
|
|
37
|
+
threat_hunter,
|
|
38
|
+
)
|
|
39
|
+
from aisoc.agents.base import Agent, BudgetExceeded, parse_json_block
|
|
40
|
+
from aisoc.agents.ir_lead import IRLeadAgent
|
|
41
|
+
from aisoc.agents.threat_intel import ThreatIntelAgent
|
|
42
|
+
from aisoc.agents.tier2 import Tier2Agent
|
|
43
|
+
from aisoc.agents.triage import TriageAgent
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
# framework
|
|
47
|
+
"Agent",
|
|
48
|
+
"BudgetExceeded",
|
|
49
|
+
"parse_json_block",
|
|
50
|
+
# per-ticket roles
|
|
51
|
+
"TriageAgent",
|
|
52
|
+
"Tier2Agent",
|
|
53
|
+
"IRLeadAgent",
|
|
54
|
+
"ThreatIntelAgent",
|
|
55
|
+
# windowed roles (call <module>.run_once)
|
|
56
|
+
"detection_eng",
|
|
57
|
+
"soc_manager",
|
|
58
|
+
"threat_hunter",
|
|
59
|
+
"campaign_detector",
|
|
60
|
+
]
|
aisoc/agents/base.py
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""Base class for aisoc role agents.
|
|
2
|
+
|
|
3
|
+
Each role (triage, Tier 2, IR Lead, Threat Intel, Threat Hunter, Detection
|
|
4
|
+
Engineer, SOC Manager) subclasses :class:`Agent` and implements:
|
|
5
|
+
|
|
6
|
+
- ``role``: short identifier — the consumer group *and* the key the
|
|
7
|
+
:class:`~aisoc.seams.ToolProvider` is asked for tools with
|
|
8
|
+
- ``streams_to_consume()``: which bus streams to subscribe to
|
|
9
|
+
- ``handle(stream, event)``: process one event, publish follow-up events
|
|
10
|
+
|
|
11
|
+
Everything an agent needs is **injected** — there are no environment hooks in
|
|
12
|
+
this layer:
|
|
13
|
+
|
|
14
|
+
- the **bus** (:class:`~aisoc.bus.Bus`) it reads from and publishes to,
|
|
15
|
+
- the **chat model** (:class:`~aisoc.seams.ChatModel`) it reasons with,
|
|
16
|
+
- the **tool provider** (:class:`~aisoc.seams.ToolProvider`) that hands each
|
|
17
|
+
role its allowed tools.
|
|
18
|
+
|
|
19
|
+
The base owns the parts that are the same for every role: consumer-group
|
|
20
|
+
registration, the bind-tools-and-iterate **tool-call loop**, per-event tool-call
|
|
21
|
+
budget enforcement, and graceful SIGTERM shutdown. A subclass is then just "which
|
|
22
|
+
events do I care about, and what do I publish in response."
|
|
23
|
+
|
|
24
|
+
This module is part of the ``agent`` extra (``pip install "aisoc[agent]"``) — it
|
|
25
|
+
imports ``langchain_core`` for its message/tool types. The kernel itself (bus,
|
|
26
|
+
schemas, seams, case memory) never imports it.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import json
|
|
32
|
+
import logging
|
|
33
|
+
import re
|
|
34
|
+
import signal
|
|
35
|
+
import time
|
|
36
|
+
from abc import ABC, abstractmethod
|
|
37
|
+
from typing import Any, Optional
|
|
38
|
+
|
|
39
|
+
from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage
|
|
40
|
+
from pydantic import BaseModel
|
|
41
|
+
|
|
42
|
+
from aisoc.bus import Bus
|
|
43
|
+
from aisoc.seams import ChatModel, ToolProvider
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class BudgetExceeded(Exception):
|
|
49
|
+
"""Raised when an agent burns through its per-event tool-call budget."""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def parse_json_block(text: str) -> Optional[dict[str, Any]]:
|
|
53
|
+
"""Pull the first JSON object out of an LLM response, tolerant of fences.
|
|
54
|
+
|
|
55
|
+
Role agents ask the model for a strict JSON decision block; models still
|
|
56
|
+
wrap it in prose or ```` ```json ```` fences often enough that every role
|
|
57
|
+
needs this. Returns ``None`` when no object can be recovered.
|
|
58
|
+
"""
|
|
59
|
+
if not text:
|
|
60
|
+
return None
|
|
61
|
+
text = text.strip()
|
|
62
|
+
text = re.sub(r"^```(?:json)?\s*", "", text)
|
|
63
|
+
text = re.sub(r"\s*```$", "", text)
|
|
64
|
+
start = text.find("{")
|
|
65
|
+
end = text.rfind("}")
|
|
66
|
+
if start == -1 or end == -1 or end < start:
|
|
67
|
+
return None
|
|
68
|
+
try:
|
|
69
|
+
result = json.loads(text[start:end + 1])
|
|
70
|
+
except json.JSONDecodeError as exc:
|
|
71
|
+
logger.warning("parse_json_block: JSON parse failed: %s", exc)
|
|
72
|
+
return None
|
|
73
|
+
return result if isinstance(result, dict) else None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Agent(ABC):
|
|
77
|
+
"""Abstract bus-consuming agent over the three injection seams.
|
|
78
|
+
|
|
79
|
+
Subclasses set ``role`` and optionally override ``budget`` (the per-event
|
|
80
|
+
tool-call cap — enrichment-heavy roles want it higher, advisory roles
|
|
81
|
+
lower).
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
role: str = ""
|
|
85
|
+
budget: int = 20
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
bus: Bus,
|
|
90
|
+
model: ChatModel,
|
|
91
|
+
tools: Optional[ToolProvider] = None,
|
|
92
|
+
):
|
|
93
|
+
if not self.role:
|
|
94
|
+
raise ValueError(f"{type(self).__name__} must set role")
|
|
95
|
+
|
|
96
|
+
self.bus = bus
|
|
97
|
+
self.model = model
|
|
98
|
+
self.tools = tools
|
|
99
|
+
# Unique consumer id per process so multiple instances of one role can
|
|
100
|
+
# run in parallel and share the consumer group (the bus distributes
|
|
101
|
+
# events across them).
|
|
102
|
+
self.consumer = f"{self.role}-{int(time.time())}"
|
|
103
|
+
self._stop = False
|
|
104
|
+
self._tool_calls_used = 0
|
|
105
|
+
|
|
106
|
+
signal.signal(signal.SIGTERM, self._on_signal)
|
|
107
|
+
signal.signal(signal.SIGINT, self._on_signal)
|
|
108
|
+
|
|
109
|
+
# ----- subclass surface ------------------------------------------------
|
|
110
|
+
|
|
111
|
+
@abstractmethod
|
|
112
|
+
def streams_to_consume(self) -> list[str]:
|
|
113
|
+
"""Bus stream names this agent reads from."""
|
|
114
|
+
|
|
115
|
+
@abstractmethod
|
|
116
|
+
def handle(self, stream: str, event: dict[str, Any]) -> None:
|
|
117
|
+
"""Process one event. Call ``self.publish()`` to emit follow-ups."""
|
|
118
|
+
|
|
119
|
+
# ----- shared infrastructure ------------------------------------------
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def consumer_group(self) -> str:
|
|
123
|
+
return self.role
|
|
124
|
+
|
|
125
|
+
def tools_for_role(self) -> list[Any]:
|
|
126
|
+
"""The tools this role may call, from the injected provider (or none)."""
|
|
127
|
+
if self.tools is None:
|
|
128
|
+
return []
|
|
129
|
+
return list(self.tools.tools_for(self.role))
|
|
130
|
+
|
|
131
|
+
def publish(self, stream: str, event: BaseModel) -> str:
|
|
132
|
+
"""Publish an event, then fire the :meth:`notify` side-effect hook."""
|
|
133
|
+
msg_id = self.bus.publish(stream, event)
|
|
134
|
+
try:
|
|
135
|
+
self.notify(stream, event)
|
|
136
|
+
except Exception: # a notifier side-effect must never break the case
|
|
137
|
+
logger.exception("agent.%s notify hook failed on %s", self.role, stream)
|
|
138
|
+
return msg_id
|
|
139
|
+
|
|
140
|
+
def notify(self, stream: str, event: BaseModel) -> None:
|
|
141
|
+
"""Side-effect hook, fired after each successful publish. No-op by default.
|
|
142
|
+
|
|
143
|
+
The kernel notifies no one — every result is just an event on the bus,
|
|
144
|
+
and the intended consumer is another agent (or a dedicated notifier that
|
|
145
|
+
subscribes to the stream). But an integration often wants to *also* push
|
|
146
|
+
an out-of-band notification — a chat card, a page, a webhook — at the
|
|
147
|
+
moment a role publishes. Override this to do that. It runs only after the
|
|
148
|
+
event is durably on the bus, dispatch on ``event.event_type``, and any
|
|
149
|
+
exception it raises is logged and swallowed so a flaky notifier can never
|
|
150
|
+
break the investigation it is reporting on.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
# ----- the tool-call loop ---------------------------------------------
|
|
154
|
+
|
|
155
|
+
def run_tool_loop(
|
|
156
|
+
self,
|
|
157
|
+
messages: list[BaseMessage],
|
|
158
|
+
tools: Optional[list[Any]] = None,
|
|
159
|
+
max_iterations: Optional[int] = None,
|
|
160
|
+
) -> tuple[list[BaseMessage], list[dict[str, Any]]]:
|
|
161
|
+
"""Bind ``tools`` to the model and iterate until it stops calling them.
|
|
162
|
+
|
|
163
|
+
The generic version of what every reasoning role does: invoke the model,
|
|
164
|
+
run any tool calls it asks for (each charged against the per-event
|
|
165
|
+
budget), feed the results back, and repeat until the model answers
|
|
166
|
+
without a tool call or the budget runs out. Returns the grown message
|
|
167
|
+
list (the final answer is the last message) plus a trace of the tool
|
|
168
|
+
calls made, for the verdict record.
|
|
169
|
+
|
|
170
|
+
Models that can't bind tools (a plain stub, a non-LangChain wrapper) are
|
|
171
|
+
invoked as-is — the loop then just does a single pass.
|
|
172
|
+
"""
|
|
173
|
+
tools = tools if tools is not None else self.tools_for_role()
|
|
174
|
+
tool_map = {getattr(t, "name", str(i)): t for i, t in enumerate(tools)}
|
|
175
|
+
cap = max_iterations if max_iterations is not None else self.budget
|
|
176
|
+
|
|
177
|
+
bind = getattr(self.model, "bind_tools", None)
|
|
178
|
+
runnable: Any = bind(tools) if (tools and callable(bind)) else self.model
|
|
179
|
+
|
|
180
|
+
trace: list[dict[str, Any]] = []
|
|
181
|
+
for _ in range(cap + 1): # +1 so the final no-tool answer is captured
|
|
182
|
+
resp = runnable.invoke(messages)
|
|
183
|
+
messages.append(resp)
|
|
184
|
+
tool_calls = getattr(resp, "tool_calls", None) or []
|
|
185
|
+
if not tool_calls:
|
|
186
|
+
break
|
|
187
|
+
for call in tool_calls:
|
|
188
|
+
name = call.get("name", "")
|
|
189
|
+
args = call.get("args", {}) or {}
|
|
190
|
+
call_id = call.get("id") or name
|
|
191
|
+
try:
|
|
192
|
+
self.check_budget()
|
|
193
|
+
except BudgetExceeded:
|
|
194
|
+
messages.append(ToolMessage(
|
|
195
|
+
content="error: tool-call budget exhausted",
|
|
196
|
+
tool_call_id=call_id,
|
|
197
|
+
))
|
|
198
|
+
return messages, trace
|
|
199
|
+
result = self._invoke_tool(tool_map.get(name), name, args)
|
|
200
|
+
trace.append({"tool": name, "args": args})
|
|
201
|
+
messages.append(ToolMessage(content=str(result), tool_call_id=call_id))
|
|
202
|
+
return messages, trace
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def _invoke_tool(tool: Any, name: str, args: dict[str, Any]) -> Any:
|
|
206
|
+
"""Run one tool call, duck-typing LangChain tools and plain callables."""
|
|
207
|
+
if tool is None:
|
|
208
|
+
return f"error: unknown tool {name!r}"
|
|
209
|
+
try:
|
|
210
|
+
invoke = getattr(tool, "invoke", None)
|
|
211
|
+
if callable(invoke):
|
|
212
|
+
return invoke(args)
|
|
213
|
+
return tool(**args)
|
|
214
|
+
except Exception as exc: # a tool failure must not kill the case
|
|
215
|
+
logger.warning("tool %s raised: %s", name, exc)
|
|
216
|
+
return f"error: {exc}"
|
|
217
|
+
|
|
218
|
+
def final_text(self, messages: list[BaseMessage]) -> str:
|
|
219
|
+
"""The text content of the last message, or '' if there is none."""
|
|
220
|
+
if not messages:
|
|
221
|
+
return ""
|
|
222
|
+
content = getattr(messages[-1], "content", "")
|
|
223
|
+
return content if isinstance(content, str) else str(content)
|
|
224
|
+
|
|
225
|
+
def force_json(
|
|
226
|
+
self, messages: list[BaseMessage], instruction: str | None = None,
|
|
227
|
+
) -> dict[str, Any]:
|
|
228
|
+
"""Coerce a JSON decision block out of the conversation.
|
|
229
|
+
|
|
230
|
+
Tries the last message first; if it carries no JSON, asks the model once
|
|
231
|
+
more for the block alone (no tools bound). Returns ``{}`` if even that
|
|
232
|
+
fails — callers coerce missing fields against their own defaults.
|
|
233
|
+
"""
|
|
234
|
+
decision = parse_json_block(self.final_text(messages))
|
|
235
|
+
if decision is not None:
|
|
236
|
+
return decision
|
|
237
|
+
messages.append(HumanMessage(
|
|
238
|
+
content=instruction or "Emit ONLY the JSON decision block now. "
|
|
239
|
+
"No prose, no fences."
|
|
240
|
+
))
|
|
241
|
+
resp = self.model.invoke(messages)
|
|
242
|
+
text = resp.content if hasattr(resp, "content") else str(resp)
|
|
243
|
+
return parse_json_block(text if isinstance(text, str) else str(text)) or {}
|
|
244
|
+
|
|
245
|
+
# ----- budget ----------------------------------------------------------
|
|
246
|
+
|
|
247
|
+
def check_budget(self) -> None:
|
|
248
|
+
"""Call before each tool invocation. Raises at the per-event cap."""
|
|
249
|
+
if self._tool_calls_used >= self.budget:
|
|
250
|
+
raise BudgetExceeded(
|
|
251
|
+
f"{self.role} exceeded {self.budget} tool calls per event"
|
|
252
|
+
)
|
|
253
|
+
self._tool_calls_used += 1
|
|
254
|
+
|
|
255
|
+
def reset_budget(self) -> None:
|
|
256
|
+
self._tool_calls_used = 0
|
|
257
|
+
|
|
258
|
+
# ----- run loop --------------------------------------------------------
|
|
259
|
+
|
|
260
|
+
def _on_signal(self, signum: int, _frame: Any) -> None:
|
|
261
|
+
logger.info("agent.%s received signal %d — draining", self.role, signum)
|
|
262
|
+
self._stop = True
|
|
263
|
+
|
|
264
|
+
def _process_one(self, stream: str, msg_id: str, event: dict[str, Any]) -> None:
|
|
265
|
+
"""Run the handler for one event and ack per the same policy as run()."""
|
|
266
|
+
try:
|
|
267
|
+
self.reset_budget()
|
|
268
|
+
self.handle(stream, event)
|
|
269
|
+
self.bus.ack(stream, self.consumer_group, msg_id)
|
|
270
|
+
except BudgetExceeded as exc:
|
|
271
|
+
logger.warning("agent.%s budget exhausted msg=%s: %s",
|
|
272
|
+
self.role, msg_id, exc)
|
|
273
|
+
self.bus.ack(stream, self.consumer_group, msg_id)
|
|
274
|
+
except Exception:
|
|
275
|
+
logger.exception("agent.%s handler error msg=%s", self.role, msg_id)
|
|
276
|
+
|
|
277
|
+
def drain(self, max_events: Optional[int] = None) -> int:
|
|
278
|
+
"""Process every event currently available, then return — no blocking.
|
|
279
|
+
|
|
280
|
+
The non-looping counterpart to :meth:`run`: it consumes what's on the
|
|
281
|
+
agent's streams right now and stops once caught up. This is what the
|
|
282
|
+
offline backtest and the zero-infra demo drive each role with — publish
|
|
283
|
+
a batch, drain triage, drain Tier 2, and so on down the chain. Returns
|
|
284
|
+
the number of events processed.
|
|
285
|
+
"""
|
|
286
|
+
streams = self.streams_to_consume()
|
|
287
|
+
processed = 0
|
|
288
|
+
while not self._stop:
|
|
289
|
+
batch = self.bus.consume_batch(
|
|
290
|
+
streams, self.consumer_group, self.consumer,
|
|
291
|
+
batch_size=10, block_ms=0,
|
|
292
|
+
)
|
|
293
|
+
if not batch:
|
|
294
|
+
break
|
|
295
|
+
for stream, msg_id, event in batch:
|
|
296
|
+
self._process_one(stream, msg_id, event)
|
|
297
|
+
processed += 1
|
|
298
|
+
if max_events is not None and processed >= max_events:
|
|
299
|
+
return processed
|
|
300
|
+
return processed
|
|
301
|
+
|
|
302
|
+
def run(self) -> None:
|
|
303
|
+
"""Main loop. Returns on SIGTERM/SIGINT.
|
|
304
|
+
|
|
305
|
+
Each event is acked on success or on budget exhaustion (a normal-ish
|
|
306
|
+
outcome). A handler that raises leaves the event unacked so the bus
|
|
307
|
+
redelivers it — a crash mid-case never silently drops work.
|
|
308
|
+
"""
|
|
309
|
+
streams = self.streams_to_consume()
|
|
310
|
+
logger.info(
|
|
311
|
+
"agent.%s starting consumer=%s group=%s streams=%s budget=%d",
|
|
312
|
+
self.role, self.consumer, self.consumer_group, streams, self.budget,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
while not self._stop:
|
|
316
|
+
batch = self.bus.consume_batch(
|
|
317
|
+
streams, self.consumer_group, self.consumer,
|
|
318
|
+
batch_size=10, block_ms=1000,
|
|
319
|
+
)
|
|
320
|
+
for stream, msg_id, event in batch:
|
|
321
|
+
if self._stop:
|
|
322
|
+
break
|
|
323
|
+
self._process_one(stream, msg_id, event)
|
|
324
|
+
|
|
325
|
+
logger.info("agent.%s shut down clean", self.role)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
__all__ = ["Agent", "BudgetExceeded", "parse_json_block"]
|