agentarmour-toolkit 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """AgentArmour — production reliability suite for LangChain/LangGraph multi-agent systems."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,34 @@
1
+ """
2
+ CascadeBreaker — Circuit breaker and self-healing layer for LangGraph multi-agent systems.
3
+ """
4
+
5
+ from agentarmour.cascadebreaker.breaker import CircuitBreaker
6
+ from agentarmour.cascadebreaker.config import BreakerConfig
7
+ from agentarmour.cascadebreaker.states import BreakerState, FailureRecord, StateTransition
8
+ from agentarmour.cascadebreaker.strategies import (
9
+ FallbackStrategy,
10
+ FallbackResult,
11
+ DegradeStrategy,
12
+ CacheStrategy,
13
+ EscalateStrategy,
14
+ DecomposeStrategy,
15
+ )
16
+ from agentarmour.cascadebreaker.guard import CascadeGuard
17
+ from agentarmour.cascadebreaker.registry import BreakerRegistry, get_registry
18
+
19
+ __all__ = [
20
+ "CircuitBreaker",
21
+ "BreakerConfig",
22
+ "BreakerState",
23
+ "FailureRecord",
24
+ "StateTransition",
25
+ "FallbackStrategy",
26
+ "FallbackResult",
27
+ "DegradeStrategy",
28
+ "CacheStrategy",
29
+ "EscalateStrategy",
30
+ "DecomposeStrategy",
31
+ "CascadeGuard",
32
+ "BreakerRegistry",
33
+ "get_registry",
34
+ ]
@@ -0,0 +1,310 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import functools
5
+ import time
6
+ import traceback
7
+ from typing import Any, Callable, Awaitable, TypeVar, Optional, TYPE_CHECKING
8
+ import structlog
9
+
10
+ if TYPE_CHECKING:
11
+ from agentarmour.cascadebreaker.storage.base import AuditLedger
12
+
13
+ from agentarmour.cascadebreaker.config import BreakerConfig
14
+ from agentarmour.cascadebreaker.states import (
15
+ BreakerState,
16
+ BreakerStateMachine,
17
+ FailureCategory,
18
+ FailureRecord,
19
+ )
20
+ from agentarmour.cascadebreaker.strategies import (
21
+ BaseFallbackStrategy,
22
+ CacheStrategy,
23
+ DegradeStrategy,
24
+ DecomposeStrategy,
25
+ EscalateStrategy,
26
+ FallbackResult,
27
+ FallbackStrategy,
28
+ )
29
+
30
+ logger = structlog.get_logger(__name__)
31
+
32
+ NodeFn = TypeVar("NodeFn", bound=Callable[..., Awaitable[dict[str, Any]]])
33
+
34
+
35
+ def _classify_exception(exc: Exception) -> FailureCategory:
36
+ exc_type = type(exc).__name__.lower()
37
+ msg = str(exc).lower()
38
+
39
+ if "timeout" in exc_type or "timeout" in msg:
40
+ return FailureCategory.LATENCY_BREACH
41
+ if "recursion" in exc_type or "loop" in msg:
42
+ return FailureCategory.REASONING_LOOP
43
+ if "token" in msg or "context" in msg or "length" in msg:
44
+ return FailureCategory.CONTEXT_OVERFLOW
45
+ if "tool" in msg or "function" in msg:
46
+ return FailureCategory.TOOL_CALL_FAILURE
47
+ if "invalid" in msg and "state" in msg:
48
+ return FailureCategory.STATE_CORRUPTION
49
+ return FailureCategory.EXCEPTION
50
+
51
+
52
+ class CircuitBreaker:
53
+ def __init__(
54
+ self,
55
+ name: str,
56
+ config: Optional[BreakerConfig] = None,
57
+ fallback_strategy: Any = FallbackStrategy.CACHE,
58
+ on_open: Optional[Callable[[str], Awaitable[None]]] = None,
59
+ on_close: Optional[Callable[[str], Awaitable[None]]] = None,
60
+ on_half_open: Optional[Callable[[str], Awaitable[None]]] = None,
61
+ ledger: Optional["AuditLedger"] = None,
62
+ ) -> None:
63
+ self.name = name
64
+ self.config = config or BreakerConfig()
65
+
66
+ if isinstance(fallback_strategy, FallbackStrategy):
67
+ self._strategy_enum = fallback_strategy
68
+ self._strategy = self._build_default_strategy(fallback_strategy)
69
+ else:
70
+ self._strategy_enum = None
71
+ self._strategy = fallback_strategy
72
+
73
+ self._state_machine = BreakerStateMachine(
74
+ breaker_name=name,
75
+ failure_threshold=self.config.failure_threshold,
76
+ recovery_timeout=self.config.recovery_timeout,
77
+ window_seconds=self.config.window_seconds,
78
+ half_open_max_calls=self.config.half_open_max_calls,
79
+ audit_ledger=ledger,
80
+ )
81
+
82
+ self._on_open = on_open
83
+ self._on_close = on_close
84
+ self._on_half_open = on_half_open
85
+ self._prev_state: BreakerState = BreakerState.CLOSED
86
+
87
+ logger.info(
88
+ "circuit_breaker.created",
89
+ name=name,
90
+ strategy=str(fallback_strategy),
91
+ failure_threshold=self.config.failure_threshold,
92
+ )
93
+
94
+ @property
95
+ def state(self) -> BreakerState:
96
+ return self._state_machine.state
97
+
98
+ @property
99
+ def metrics(self) -> dict[str, Any]:
100
+ return self._state_machine.metrics
101
+
102
+ @property
103
+ def is_open(self) -> bool:
104
+ return self._state_machine.state is BreakerState.OPEN
105
+
106
+ @property
107
+ def is_closed(self) -> bool:
108
+ return self._state_machine.state is BreakerState.CLOSED
109
+
110
+ def protect(self, fn: NodeFn) -> NodeFn:
111
+ if not asyncio.iscoroutinefunction(fn):
112
+ raise TypeError(
113
+ f"CircuitBreaker.protect requires an async function; "
114
+ f"'{fn.__name__}' is synchronous."
115
+ )
116
+
117
+ @functools.wraps(fn)
118
+ async def _wrapper(*args: Any, **kwargs: Any) -> dict[str, Any]:
119
+ state = kwargs.get("state") or (args[0] if args else {})
120
+ if not isinstance(state, dict):
121
+ state = {}
122
+ return await self.call(fn, state, *args[1:], **kwargs)
123
+
124
+ _wrapper.__cascadebreaker__ = self
125
+ _wrapper.__wrapped__ = fn
126
+ return _wrapper
127
+
128
+ async def call(
129
+ self,
130
+ fn: Callable[..., Awaitable[dict[str, Any]]],
131
+ state: dict[str, Any],
132
+ *args: Any,
133
+ **kwargs: Any,
134
+ ) -> dict[str, Any]:
135
+ if not self.config.enabled:
136
+ return await fn(state, *args, **kwargs)
137
+
138
+ permitted = await self._state_machine.is_call_permitted()
139
+ prev_state = self._prev_state
140
+
141
+ if not permitted:
142
+ logger.info(
143
+ "circuit_breaker.short_circuited",
144
+ name=self.name,
145
+ state=self.state.value,
146
+ )
147
+ await self._state_machine.record_fallback()
148
+ result = await self._invoke_fallback(state, {})
149
+ return result.state
150
+
151
+ t0 = time.monotonic()
152
+ try:
153
+ if self.config.call_timeout is not None:
154
+ result_state = await asyncio.wait_for(
155
+ fn(state, *args, **kwargs),
156
+ timeout=self.config.call_timeout,
157
+ )
158
+ else:
159
+ result_state = await fn(state, *args, **kwargs)
160
+
161
+ if isinstance(self._strategy, CacheStrategy):
162
+ self._strategy.store(self.name, result_state)
163
+
164
+ await self._state_machine.record_success()
165
+ await self._fire_transition_callbacks(prev_state)
166
+ self._prev_state = self.state
167
+ return result_state
168
+
169
+ except asyncio.TimeoutError as exc:
170
+ latency_ms = (time.monotonic() - t0) * 1000
171
+ return await self._handle_failure(
172
+ exc=exc,
173
+ state=state,
174
+ latency_ms=latency_ms,
175
+ category=FailureCategory.LATENCY_BREACH,
176
+ prev_state=prev_state,
177
+ )
178
+
179
+ except Exception as exc:
180
+ latency_ms = (time.monotonic() - t0) * 1000
181
+ return await self._handle_failure(
182
+ exc=exc,
183
+ state=state,
184
+ latency_ms=latency_ms,
185
+ category=_classify_exception(exc),
186
+ prev_state=prev_state,
187
+ )
188
+
189
+ async def _handle_failure(
190
+ self,
191
+ exc: Exception,
192
+ state: dict[str, Any],
193
+ latency_ms: float,
194
+ category: FailureCategory,
195
+ prev_state: BreakerState,
196
+ ) -> dict[str, Any]:
197
+ tb_str = traceback.format_exc()
198
+ record = FailureRecord(
199
+ breaker_name=self.name,
200
+ category=category,
201
+ error_type=f"{type(exc).__module__}.{type(exc).__name__}",
202
+ error_message=str(exc),
203
+ traceback_str=tb_str,
204
+ latency_ms=latency_ms,
205
+ metadata=self.config.extra_metadata,
206
+ )
207
+ await self._state_machine.record_failure(record)
208
+ await self._state_machine.record_fallback()
209
+ await self._fire_transition_callbacks(prev_state)
210
+ self._prev_state = self.state
211
+
212
+ failure_context = {
213
+ "last_error": str(exc),
214
+ "failure_count": self._state_machine.failure_count,
215
+ "category": category.value,
216
+ "traceback": tb_str,
217
+ }
218
+ result = await self._invoke_fallback(state, failure_context)
219
+ result_state = result.state
220
+ result_state["__cascadebreaker_traceback__"] = tb_str
221
+ return result_state
222
+
223
+ async def _invoke_fallback(
224
+ self,
225
+ state: dict[str, Any],
226
+ failure_context: dict[str, Any],
227
+ ) -> FallbackResult:
228
+ try:
229
+ return await self._strategy.execute(
230
+ state=state,
231
+ breaker_name=self.name,
232
+ failure_context=failure_context,
233
+ )
234
+ except Exception as exc:
235
+ logger.error(
236
+ "circuit_breaker.fallback_strategy_error",
237
+ name=self.name,
238
+ error=str(exc),
239
+ )
240
+ safe_state = dict(state)
241
+ safe_state["__cascadebreaker_fallback_failed__"] = str(exc)
242
+ return FallbackResult(
243
+ state=safe_state,
244
+ strategy_used=FallbackStrategy.CACHE,
245
+ degraded=True,
246
+ confidence=0.0,
247
+ )
248
+
249
+ async def _fire_transition_callbacks(self, prev_state: BreakerState) -> None:
250
+ current = self.state
251
+ if current == prev_state:
252
+ return
253
+ try:
254
+ if current is BreakerState.OPEN and self._on_open:
255
+ await self._on_open(self.name)
256
+ elif current is BreakerState.CLOSED and self._on_close:
257
+ await self._on_close(self.name)
258
+ elif current is BreakerState.HALF_OPEN and self._on_half_open:
259
+ await self._on_half_open(self.name)
260
+ except Exception as exc:
261
+ logger.warning(
262
+ "circuit_breaker.callback_error",
263
+ name=self.name,
264
+ error=str(exc),
265
+ )
266
+
267
+ async def reset(self) -> None:
268
+ await self._state_machine.reset()
269
+ self._prev_state = BreakerState.CLOSED
270
+
271
+ def _build_default_strategy(
272
+ self, strategy_enum: FallbackStrategy
273
+ ) -> BaseFallbackStrategy:
274
+ if strategy_enum is FallbackStrategy.CACHE:
275
+ return CacheStrategy()
276
+
277
+ if strategy_enum is FallbackStrategy.DEGRADE:
278
+ async def _noop_backup(state: dict[str, Any]) -> dict[str, Any]:
279
+ return state
280
+ return DegradeStrategy(backup_fn=_noop_backup, confidence_override=0.1)
281
+
282
+ if strategy_enum is FallbackStrategy.ESCALATE:
283
+ async def _noop_escalate(
284
+ breaker_name: str,
285
+ state: dict[str, Any],
286
+ context: dict[str, Any],
287
+ ) -> Optional[dict[str, Any]]:
288
+ return None
289
+ return EscalateStrategy(
290
+ escalation_fn=_noop_escalate,
291
+ notification_only=True,
292
+ )
293
+
294
+ if strategy_enum is FallbackStrategy.DECOMPOSE:
295
+ async def _noop_decompose(state: dict[str, Any]) -> list[dict[str, Any]]:
296
+ return [state]
297
+ async def _noop_execute(sub: dict[str, Any]) -> dict[str, Any]:
298
+ return sub
299
+ return DecomposeStrategy(
300
+ decompose_fn=_noop_decompose,
301
+ execute_fn=_noop_execute,
302
+ )
303
+
304
+ raise ValueError(f"Unknown FallbackStrategy: {strategy_enum}")
305
+
306
+ def __repr__(self) -> str:
307
+ return (
308
+ f"CircuitBreaker(name={self.name!r}, "
309
+ f"state={self.state.value})"
310
+ )
@@ -0,0 +1,183 @@
1
+ """
2
+ Command-line interface for CascadeBreaker.
3
+
4
+ Lets you inspect a SQLite audit ledger from the terminal without writing
5
+ a throwaway script every time. Uses only the standard library.
6
+
7
+ Usage:
8
+ agentarmour --version
9
+ agentarmour ledger summary
10
+ agentarmour ledger failures --limit 10
11
+ agentarmour ledger transitions --breaker research_agent
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import sqlite3
18
+ import sys
19
+
20
+ from agentarmour import __version__
21
+
22
+
23
+ def _connect(db_path: str, table_prefix: str) -> sqlite3.Connection:
24
+ conn = sqlite3.connect(db_path)
25
+ conn.row_factory = sqlite3.Row
26
+ return conn
27
+
28
+
29
+ def _table_exists(conn: sqlite3.Connection, table_name: str) -> bool:
30
+ cursor = conn.execute(
31
+ "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
32
+ (table_name,),
33
+ )
34
+ return cursor.fetchone() is not None
35
+
36
+
37
+ def cmd_summary(args: argparse.Namespace) -> int:
38
+ prefix = args.table_prefix
39
+ conn = _connect(args.db, prefix)
40
+
41
+ failures_table = f"{prefix}failures"
42
+ transitions_table = f"{prefix}transitions"
43
+
44
+ if not _table_exists(conn, failures_table):
45
+ print(f"No audit data found in '{args.db}'. Has the breaker run yet?")
46
+ return 1
47
+
48
+ total_failures = conn.execute(f"SELECT COUNT(*) FROM {failures_table}").fetchone()[0]
49
+ total_transitions = conn.execute(f"SELECT COUNT(*) FROM {transitions_table}").fetchone()[0]
50
+
51
+ breakers = conn.execute(
52
+ f"SELECT DISTINCT breaker_name FROM {failures_table}"
53
+ ).fetchall()
54
+ breaker_names = [row["breaker_name"] for row in breakers]
55
+
56
+ print(f"Audit ledger: {args.db}")
57
+ print(f"Breakers seen: {', '.join(breaker_names) if breaker_names else '(none)'}")
58
+ print(f"Total failures recorded: {total_failures}")
59
+ print(f"Total state transitions: {total_transitions}")
60
+
61
+ for name in breaker_names:
62
+ count = conn.execute(
63
+ f"SELECT COUNT(*) FROM {failures_table} WHERE breaker_name = ?", (name,)
64
+ ).fetchone()[0]
65
+ print(f" {name}: {count} failure(s)")
66
+
67
+ conn.close()
68
+ return 0
69
+
70
+
71
+ def cmd_failures(args: argparse.Namespace) -> int:
72
+ prefix = args.table_prefix
73
+ conn = _connect(args.db, prefix)
74
+ table = f"{prefix}failures"
75
+
76
+ if not _table_exists(conn, table):
77
+ print(f"No audit data found in '{args.db}'.")
78
+ return 1
79
+
80
+ query = f"SELECT breaker_name, category, error_message, timestamp FROM {table}"
81
+ params: list[str] = []
82
+ if args.breaker:
83
+ query += " WHERE breaker_name = ?"
84
+ params.append(args.breaker)
85
+ query += " ORDER BY timestamp DESC LIMIT ?"
86
+ params.append(str(args.limit))
87
+
88
+ rows = conn.execute(query, params).fetchall()
89
+
90
+ if not rows:
91
+ print("No failures found matching that filter.")
92
+ return 0
93
+
94
+ for row in rows:
95
+ print(f"[{row['breaker_name']}] {row['category']}: {row['error_message']}")
96
+
97
+ conn.close()
98
+ return 0
99
+
100
+
101
+ def cmd_transitions(args: argparse.Namespace) -> int:
102
+ prefix = args.table_prefix
103
+ conn = _connect(args.db, prefix)
104
+ table = f"{prefix}transitions"
105
+
106
+ if not _table_exists(conn, table):
107
+ print(f"No audit data found in '{args.db}'.")
108
+ return 1
109
+
110
+ query = f"SELECT breaker_name, from_state, to_state, reason, timestamp FROM {table}"
111
+ params: list[str] = []
112
+ if args.breaker:
113
+ query += " WHERE breaker_name = ?"
114
+ params.append(args.breaker)
115
+ query += " ORDER BY timestamp DESC LIMIT ?"
116
+ params.append(str(args.limit))
117
+
118
+ rows = conn.execute(query, params).fetchall()
119
+
120
+ if not rows:
121
+ print("No transitions found matching that filter.")
122
+ return 0
123
+
124
+ for row in rows:
125
+ print(
126
+ f"[{row['breaker_name']}] {row['from_state']} -> {row['to_state']}: "
127
+ f"{row['reason']}"
128
+ )
129
+
130
+ conn.close()
131
+ return 0
132
+
133
+
134
+ def build_parser() -> argparse.ArgumentParser:
135
+ parser = argparse.ArgumentParser(
136
+ prog="agentarmour",
137
+ description="CascadeBreaker CLI — inspect your circuit breaker audit ledger.",
138
+ )
139
+ parser.add_argument(
140
+ "--version", action="version", version=f"agentarmour {__version__}"
141
+ )
142
+
143
+ subparsers = parser.add_subparsers(dest="command")
144
+
145
+ ledger_parser = subparsers.add_parser("ledger", help="Inspect the audit ledger")
146
+ ledger_sub = ledger_parser.add_subparsers(dest="ledger_command")
147
+
148
+
149
+ summary_parser = ledger_sub.add_parser("summary", help="Show a high-level summary")
150
+ summary_parser.add_argument("--db", default="cascadebreaker.db")
151
+ summary_parser.add_argument("--table-prefix", default="cb_")
152
+ summary_parser.set_defaults(func=cmd_summary)
153
+
154
+ failures_parser = ledger_sub.add_parser("failures", help="List recorded failures")
155
+ failures_parser.add_argument("--db", default="cascadebreaker.db")
156
+ failures_parser.add_argument("--table-prefix", default="cb_")
157
+ failures_parser.add_argument("--breaker", default=None, help="Filter by breaker name")
158
+ failures_parser.add_argument("--limit", type=int, default=20)
159
+ failures_parser.set_defaults(func=cmd_failures)
160
+
161
+ transitions_parser = ledger_sub.add_parser("transitions", help="List state transitions")
162
+ transitions_parser.add_argument("--db", default="cascadebreaker.db")
163
+ transitions_parser.add_argument("--table-prefix", default="cb_")
164
+ transitions_parser.add_argument("--breaker", default=None, help="Filter by breaker name")
165
+ transitions_parser.add_argument("--limit", type=int, default=20)
166
+ transitions_parser.set_defaults(func=cmd_transitions)
167
+
168
+ return parser
169
+
170
+
171
+ def main() -> None:
172
+ parser = build_parser()
173
+ args = parser.parse_args()
174
+
175
+ if not hasattr(args, "func"):
176
+ parser.print_help()
177
+ sys.exit(1)
178
+
179
+ sys.exit(args.func(args))
180
+
181
+
182
+ if __name__ == "__main__":
183
+ main()
@@ -0,0 +1,43 @@
1
+ """
2
+ Pydantic configuration models for CascadeBreaker.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Literal
8
+ from pydantic import BaseModel, Field, model_validator
9
+
10
+
11
+ class StorageConfig(BaseModel):
12
+
13
+
14
+ backend: Literal["sqlite", "postgres"] = "sqlite"
15
+ sqlite_path: str = "cascadebreaker.db"
16
+ postgres_dsn: str | None = None
17
+ table_prefix: str = "cb_"
18
+
19
+ @model_validator(mode="after")
20
+ def validate_postgres_dsn(self) -> "StorageConfig":
21
+ if self.backend == "postgres" and not self.postgres_dsn:
22
+ raise ValueError(
23
+ "postgres_dsn is required when storage backend is 'postgres'"
24
+ )
25
+ return self
26
+
27
+
28
+ class BreakerConfig(BaseModel):
29
+
30
+
31
+ failure_threshold: int = Field(default=3, ge=1, le=100)
32
+ recovery_timeout: float = Field(default=30.0, gt=0)
33
+ window_seconds: float = Field(default=60.0, gt=0)
34
+ half_open_max_calls: int = Field(default=1, ge=1)
35
+ call_timeout: float | None = Field(default=30.0, gt=0)
36
+ exclude_exceptions: list[str] = Field(default_factory=list)
37
+ include_exceptions: list[str] = Field(default_factory=list)
38
+ storage: StorageConfig = Field(default_factory=StorageConfig)
39
+ enabled: bool = True
40
+ name_prefix: str = ""
41
+ extra_metadata: dict[str, Any] = Field(default_factory=dict)
42
+
43
+ model_config = {"frozen": True}
File without changes