delimit-cli 4.1.44 → 4.1.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/bin/delimit-cli.js +365 -30
- package/bin/delimit-setup.js +100 -64
- package/gateway/ai/activate_helpers.py +253 -7
- package/gateway/ai/backends/gateway_core.py +236 -13
- package/gateway/ai/backends/repo_bridge.py +80 -16
- package/gateway/ai/backends/tools_infra.py +49 -32
- package/gateway/ai/checksums.sha256 +6 -0
- package/gateway/ai/continuity.py +462 -0
- package/gateway/ai/deliberation.pyi +53 -0
- package/gateway/ai/governance.pyi +32 -0
- package/gateway/ai/governance_hardening.py +569 -0
- package/gateway/ai/inbox_daemon_runner.py +217 -0
- package/gateway/ai/ledger_manager.py +40 -0
- package/gateway/ai/license.py +104 -3
- package/gateway/ai/license_core.py +177 -36
- package/gateway/ai/license_core.pyi +50 -0
- package/gateway/ai/loop_engine.py +786 -22
- package/gateway/ai/reddit_scanner.py +150 -5
- package/gateway/ai/server.py +254 -19
- package/gateway/ai/swarm.py +86 -0
- package/gateway/ai/swarm_infra.py +656 -0
- package/gateway/ai/tweet_corpus_schema.sql +76 -0
- package/gateway/core/diff_engine_v2.py +6 -2
- package/gateway/core/generator_drift.py +242 -0
- package/gateway/core/json_schema_diff.py +375 -0
- package/gateway/core/openapi_version.py +124 -0
- package/gateway/core/spec_detector.py +47 -7
- package/gateway/core/spec_health.py +5 -2
- package/lib/cross-model-hooks.js +4 -12
- package/package.json +8 -1
- package/scripts/sync-gateway.sh +13 -1
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
"""Governance Hardening for Proactive Auto-Triggers (LED-661).
|
|
2
|
+
|
|
3
|
+
Provides four hardening primitives that wrap MCP tool calls and loop engine
|
|
4
|
+
operations with resilience guarantees:
|
|
5
|
+
|
|
6
|
+
- ResilientToolCaller: retry with exponential backoff, timeout, fallback
|
|
7
|
+
- ApprovalFlow: email-based approve/reject for founder decisions
|
|
8
|
+
- TriggerDebouncer: per-tool cooldowns to prevent notification storms
|
|
9
|
+
- ChainCircuitBreaker: halt chains after consecutive failures
|
|
10
|
+
|
|
11
|
+
All classes are opt-in. When not configured, existing behavior is unchanged.
|
|
12
|
+
Wire into loop_engine.run_governed_iteration() via GovernanceHardeningConfig.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import time
|
|
18
|
+
import uuid
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger("delimit.ai.governance_hardening")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ── ResilientToolCaller ─────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
class ResilientToolCaller:
|
|
29
|
+
"""Wrap MCP tool calls with retry, timeout, and fallback.
|
|
30
|
+
|
|
31
|
+
Parameters:
|
|
32
|
+
max_retries: Maximum number of retry attempts (default 3).
|
|
33
|
+
base_delay: Initial delay in seconds for exponential backoff (default 1.0).
|
|
34
|
+
max_delay: Cap on backoff delay in seconds (default 30.0).
|
|
35
|
+
timeout: Per-call timeout in seconds (default 60.0).
|
|
36
|
+
fallback: Optional callable returning a fallback result on total failure.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
max_retries: int = 3,
|
|
42
|
+
base_delay: float = 1.0,
|
|
43
|
+
max_delay: float = 30.0,
|
|
44
|
+
timeout: float = 60.0,
|
|
45
|
+
fallback: Optional[Callable[..., Any]] = None,
|
|
46
|
+
):
|
|
47
|
+
self.max_retries = max_retries
|
|
48
|
+
self.base_delay = base_delay
|
|
49
|
+
self.max_delay = max_delay
|
|
50
|
+
self.timeout = timeout
|
|
51
|
+
self.fallback = fallback
|
|
52
|
+
self._call_log: List[Dict[str, Any]] = []
|
|
53
|
+
|
|
54
|
+
def call(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
|
|
55
|
+
"""Execute fn with retry and exponential backoff.
|
|
56
|
+
|
|
57
|
+
Returns the result on success, or the fallback result if all retries
|
|
58
|
+
are exhausted and a fallback is configured. Raises the last exception
|
|
59
|
+
if no fallback is available.
|
|
60
|
+
"""
|
|
61
|
+
last_error: Optional[Exception] = None
|
|
62
|
+
|
|
63
|
+
for attempt in range(1, self.max_retries + 1):
|
|
64
|
+
start = time.monotonic()
|
|
65
|
+
try:
|
|
66
|
+
result = fn(*args, **kwargs)
|
|
67
|
+
elapsed = time.monotonic() - start
|
|
68
|
+
self._call_log.append({
|
|
69
|
+
"fn": getattr(fn, "__name__", str(fn)),
|
|
70
|
+
"attempt": attempt,
|
|
71
|
+
"status": "success",
|
|
72
|
+
"elapsed": round(elapsed, 3),
|
|
73
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
74
|
+
})
|
|
75
|
+
return result
|
|
76
|
+
except Exception as exc:
|
|
77
|
+
elapsed = time.monotonic() - start
|
|
78
|
+
last_error = exc
|
|
79
|
+
self._call_log.append({
|
|
80
|
+
"fn": getattr(fn, "__name__", str(fn)),
|
|
81
|
+
"attempt": attempt,
|
|
82
|
+
"status": "error",
|
|
83
|
+
"error": str(exc),
|
|
84
|
+
"elapsed": round(elapsed, 3),
|
|
85
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
86
|
+
})
|
|
87
|
+
logger.warning(
|
|
88
|
+
"ResilientToolCaller: %s attempt %d/%d failed: %s",
|
|
89
|
+
getattr(fn, "__name__", "?"), attempt, self.max_retries, exc,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if attempt < self.max_retries:
|
|
93
|
+
delay = min(self.base_delay * (2 ** (attempt - 1)), self.max_delay)
|
|
94
|
+
time.sleep(delay)
|
|
95
|
+
|
|
96
|
+
# All retries exhausted
|
|
97
|
+
if self.fallback is not None:
|
|
98
|
+
logger.info("ResilientToolCaller: falling back for %s", getattr(fn, "__name__", "?"))
|
|
99
|
+
return self.fallback(*args, **kwargs)
|
|
100
|
+
|
|
101
|
+
raise last_error # type: ignore[misc]
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def call_log(self) -> List[Dict[str, Any]]:
|
|
105
|
+
return list(self._call_log)
|
|
106
|
+
|
|
107
|
+
def reset_log(self) -> None:
|
|
108
|
+
self._call_log.clear()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ── ApprovalFlow ────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
class ApprovalFlow:
|
|
114
|
+
"""Email-based approval flow for founder decisions.
|
|
115
|
+
|
|
116
|
+
Sends an approval request via email, then polls the inbox for a response.
|
|
117
|
+
Times out after a configurable period with a configurable default action.
|
|
118
|
+
|
|
119
|
+
Parameters:
|
|
120
|
+
send_fn: Callable to send an email. Signature: (subject, body, priority) -> None.
|
|
121
|
+
poll_fn: Callable to poll inbox. Signature: () -> List[Dict] of messages.
|
|
122
|
+
timeout_seconds: Max wait time (default 86400 = 24h).
|
|
123
|
+
poll_interval: Seconds between inbox checks (default 300 = 5min).
|
|
124
|
+
default_action: Action when timeout expires ("reject" or "approve").
|
|
125
|
+
state_dir: Directory to persist pending approval state.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
PENDING_FILE = "pending_approvals.json"
|
|
129
|
+
|
|
130
|
+
def __init__(
|
|
131
|
+
self,
|
|
132
|
+
send_fn: Optional[Callable] = None,
|
|
133
|
+
poll_fn: Optional[Callable] = None,
|
|
134
|
+
timeout_seconds: float = 86400,
|
|
135
|
+
poll_interval: float = 300,
|
|
136
|
+
default_action: str = "reject",
|
|
137
|
+
state_dir: Optional[Path] = None,
|
|
138
|
+
):
|
|
139
|
+
self.send_fn = send_fn
|
|
140
|
+
self.poll_fn = poll_fn
|
|
141
|
+
self.timeout_seconds = timeout_seconds
|
|
142
|
+
self.poll_interval = poll_interval
|
|
143
|
+
self.default_action = default_action
|
|
144
|
+
self.state_dir = state_dir or Path.home() / ".delimit" / "loop" / "approvals"
|
|
145
|
+
self._pending: Dict[str, Dict[str, Any]] = {}
|
|
146
|
+
self._load_state()
|
|
147
|
+
|
|
148
|
+
def _load_state(self) -> None:
|
|
149
|
+
self.state_dir.mkdir(parents=True, exist_ok=True)
|
|
150
|
+
path = self.state_dir / self.PENDING_FILE
|
|
151
|
+
if path.exists():
|
|
152
|
+
try:
|
|
153
|
+
self._pending = json.loads(path.read_text())
|
|
154
|
+
except (json.JSONDecodeError, OSError):
|
|
155
|
+
self._pending = {}
|
|
156
|
+
|
|
157
|
+
def _save_state(self) -> None:
|
|
158
|
+
self.state_dir.mkdir(parents=True, exist_ok=True)
|
|
159
|
+
path = self.state_dir / self.PENDING_FILE
|
|
160
|
+
path.write_text(json.dumps(self._pending, indent=2))
|
|
161
|
+
|
|
162
|
+
def request_approval(
|
|
163
|
+
self,
|
|
164
|
+
action_description: str,
|
|
165
|
+
context: str = "",
|
|
166
|
+
priority: str = "P1",
|
|
167
|
+
) -> str:
|
|
168
|
+
"""Send an approval request email and return a request ID.
|
|
169
|
+
|
|
170
|
+
The request is persisted so it survives process restarts.
|
|
171
|
+
"""
|
|
172
|
+
request_id = f"approval-{uuid.uuid4().hex[:8]}"
|
|
173
|
+
subject = f"[Delimit Approval] {action_description[:80]}"
|
|
174
|
+
body = (
|
|
175
|
+
f"Action: {action_description}\n"
|
|
176
|
+
f"Context: {context}\n"
|
|
177
|
+
f"Request ID: {request_id}\n\n"
|
|
178
|
+
f"Reply APPROVE or REJECT to this email.\n"
|
|
179
|
+
f"Auto-{self.default_action} in {self.timeout_seconds // 3600}h if no response."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
record = {
|
|
183
|
+
"request_id": request_id,
|
|
184
|
+
"action": action_description,
|
|
185
|
+
"context": context,
|
|
186
|
+
"priority": priority,
|
|
187
|
+
"status": "pending",
|
|
188
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
189
|
+
"timeout_at": datetime.fromtimestamp(
|
|
190
|
+
time.time() + self.timeout_seconds, tz=timezone.utc
|
|
191
|
+
).isoformat(),
|
|
192
|
+
}
|
|
193
|
+
self._pending[request_id] = record
|
|
194
|
+
|
|
195
|
+
if self.send_fn:
|
|
196
|
+
try:
|
|
197
|
+
self.send_fn(subject, body, priority)
|
|
198
|
+
record["email_sent"] = True
|
|
199
|
+
except Exception as exc:
|
|
200
|
+
logger.error("ApprovalFlow: failed to send email: %s", exc)
|
|
201
|
+
record["email_sent"] = False
|
|
202
|
+
record["email_error"] = str(exc)
|
|
203
|
+
|
|
204
|
+
self._save_state()
|
|
205
|
+
return request_id
|
|
206
|
+
|
|
207
|
+
def check_approval(self, request_id: str) -> Dict[str, Any]:
|
|
208
|
+
"""Check the status of a pending approval request.
|
|
209
|
+
|
|
210
|
+
Polls the inbox for responses matching the request ID.
|
|
211
|
+
Returns {"status": "approved"|"rejected"|"pending"|"timed_out"}.
|
|
212
|
+
"""
|
|
213
|
+
record = self._pending.get(request_id)
|
|
214
|
+
if not record:
|
|
215
|
+
return {"status": "not_found", "request_id": request_id}
|
|
216
|
+
|
|
217
|
+
if record["status"] != "pending":
|
|
218
|
+
return {"status": record["status"], "request_id": request_id}
|
|
219
|
+
|
|
220
|
+
# Check timeout
|
|
221
|
+
timeout_at = datetime.fromisoformat(record["timeout_at"])
|
|
222
|
+
if datetime.now(timezone.utc) >= timeout_at:
|
|
223
|
+
record["status"] = f"timed_out_{self.default_action}"
|
|
224
|
+
self._save_state()
|
|
225
|
+
return {"status": record["status"], "request_id": request_id}
|
|
226
|
+
|
|
227
|
+
# Poll inbox
|
|
228
|
+
if self.poll_fn:
|
|
229
|
+
try:
|
|
230
|
+
messages = self.poll_fn()
|
|
231
|
+
for msg in messages:
|
|
232
|
+
msg_text = str(msg.get("body", "") or msg.get("subject", "")).upper()
|
|
233
|
+
if request_id in str(msg.get("body", "")) or request_id in str(msg.get("subject", "")):
|
|
234
|
+
if "APPROVE" in msg_text:
|
|
235
|
+
record["status"] = "approved"
|
|
236
|
+
record["resolved_at"] = datetime.now(timezone.utc).isoformat()
|
|
237
|
+
self._save_state()
|
|
238
|
+
return {"status": "approved", "request_id": request_id}
|
|
239
|
+
elif "REJECT" in msg_text:
|
|
240
|
+
record["status"] = "rejected"
|
|
241
|
+
record["resolved_at"] = datetime.now(timezone.utc).isoformat()
|
|
242
|
+
self._save_state()
|
|
243
|
+
return {"status": "rejected", "request_id": request_id}
|
|
244
|
+
except Exception as exc:
|
|
245
|
+
logger.warning("ApprovalFlow: poll failed: %s", exc)
|
|
246
|
+
|
|
247
|
+
return {"status": "pending", "request_id": request_id}
|
|
248
|
+
|
|
249
|
+
@property
|
|
250
|
+
def pending_requests(self) -> List[Dict[str, Any]]:
|
|
251
|
+
return [r for r in self._pending.values() if r.get("status") == "pending"]
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ── TriggerDebouncer ────────────────────────────────────────────────────
|
|
255
|
+
|
|
256
|
+
class TriggerDebouncer:
|
|
257
|
+
"""Prevent storms of tool calls by enforcing per-tool cooldowns.
|
|
258
|
+
|
|
259
|
+
Parameters:
|
|
260
|
+
default_cooldown: Default cooldown in seconds (default 300 = 5min).
|
|
261
|
+
tool_cooldowns: Dict mapping tool names to specific cooldowns.
|
|
262
|
+
max_calls_per_hour: Global rate limit across all tools (default 5).
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
def __init__(
|
|
266
|
+
self,
|
|
267
|
+
default_cooldown: float = 300.0,
|
|
268
|
+
tool_cooldowns: Optional[Dict[str, float]] = None,
|
|
269
|
+
max_calls_per_hour: int = 5,
|
|
270
|
+
):
|
|
271
|
+
self.default_cooldown = default_cooldown
|
|
272
|
+
self.tool_cooldowns = tool_cooldowns or {}
|
|
273
|
+
self.max_calls_per_hour = max_calls_per_hour
|
|
274
|
+
self._last_call: Dict[str, float] = {} # tool_name -> monotonic timestamp
|
|
275
|
+
self._hourly_calls: List[float] = [] # monotonic timestamps of all calls
|
|
276
|
+
|
|
277
|
+
def can_fire(self, tool_name: str) -> bool:
|
|
278
|
+
"""Check if the tool is allowed to fire (respects cooldown and rate limit)."""
|
|
279
|
+
now = time.monotonic()
|
|
280
|
+
|
|
281
|
+
# Per-tool cooldown check
|
|
282
|
+
cooldown = self.tool_cooldowns.get(tool_name, self.default_cooldown)
|
|
283
|
+
last = self._last_call.get(tool_name)
|
|
284
|
+
if last is not None and (now - last) < cooldown:
|
|
285
|
+
return False
|
|
286
|
+
|
|
287
|
+
# Global hourly rate limit
|
|
288
|
+
cutoff = now - 3600
|
|
289
|
+
self._hourly_calls = [t for t in self._hourly_calls if t > cutoff]
|
|
290
|
+
if len(self._hourly_calls) >= self.max_calls_per_hour:
|
|
291
|
+
return False
|
|
292
|
+
|
|
293
|
+
return True
|
|
294
|
+
|
|
295
|
+
def record_call(self, tool_name: str) -> None:
|
|
296
|
+
"""Record that a tool was fired."""
|
|
297
|
+
now = time.monotonic()
|
|
298
|
+
self._last_call[tool_name] = now
|
|
299
|
+
self._hourly_calls.append(now)
|
|
300
|
+
|
|
301
|
+
def try_fire(self, tool_name: str, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> Optional[Any]:
|
|
302
|
+
"""Fire the tool only if debounce allows it. Returns None if suppressed."""
|
|
303
|
+
if not self.can_fire(tool_name):
|
|
304
|
+
logger.debug("TriggerDebouncer: suppressed %s (cooldown)", tool_name)
|
|
305
|
+
return None
|
|
306
|
+
result = fn(*args, **kwargs)
|
|
307
|
+
self.record_call(tool_name)
|
|
308
|
+
return result
|
|
309
|
+
|
|
310
|
+
def time_until_allowed(self, tool_name: str) -> float:
|
|
311
|
+
"""Seconds until this tool can fire again. 0.0 if allowed now."""
|
|
312
|
+
now = time.monotonic()
|
|
313
|
+
cooldown = self.tool_cooldowns.get(tool_name, self.default_cooldown)
|
|
314
|
+
last = self._last_call.get(tool_name)
|
|
315
|
+
if last is None:
|
|
316
|
+
return 0.0
|
|
317
|
+
remaining = cooldown - (now - last)
|
|
318
|
+
return max(0.0, remaining)
|
|
319
|
+
|
|
320
|
+
def reset(self, tool_name: Optional[str] = None) -> None:
|
|
321
|
+
"""Reset cooldown state for a specific tool or all tools."""
|
|
322
|
+
if tool_name:
|
|
323
|
+
self._last_call.pop(tool_name, None)
|
|
324
|
+
else:
|
|
325
|
+
self._last_call.clear()
|
|
326
|
+
self._hourly_calls.clear()
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
# ── ChainCircuitBreaker ────────────────────────────────────────────────
|
|
330
|
+
|
|
331
|
+
class ChainCircuitBreaker:
|
|
332
|
+
"""Halt tool chains after consecutive failures.
|
|
333
|
+
|
|
334
|
+
Parameters:
|
|
335
|
+
failure_threshold: Number of consecutive failures to trip the breaker (default 3).
|
|
336
|
+
recovery_timeout: Seconds to wait before allowing a retry (default 300 = 5min).
|
|
337
|
+
notify_fn: Optional callable invoked when the breaker trips.
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
STATE_CLOSED = "closed" # Normal operation
|
|
341
|
+
STATE_OPEN = "open" # Breaker tripped, rejecting calls
|
|
342
|
+
STATE_HALF_OPEN = "half_open" # Allowing a single probe call
|
|
343
|
+
|
|
344
|
+
def __init__(
|
|
345
|
+
self,
|
|
346
|
+
failure_threshold: int = 3,
|
|
347
|
+
recovery_timeout: float = 300.0,
|
|
348
|
+
notify_fn: Optional[Callable[[str, int], None]] = None,
|
|
349
|
+
):
|
|
350
|
+
self.failure_threshold = failure_threshold
|
|
351
|
+
self.recovery_timeout = recovery_timeout
|
|
352
|
+
self.notify_fn = notify_fn
|
|
353
|
+
|
|
354
|
+
self._state: str = self.STATE_CLOSED
|
|
355
|
+
self._consecutive_failures: int = 0
|
|
356
|
+
self._last_failure_time: float = 0.0
|
|
357
|
+
self._failure_log: List[Dict[str, Any]] = []
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def state(self) -> str:
|
|
361
|
+
"""Current breaker state, accounting for recovery timeout."""
|
|
362
|
+
if self._state == self.STATE_OPEN:
|
|
363
|
+
if time.monotonic() - self._last_failure_time >= self.recovery_timeout:
|
|
364
|
+
self._state = self.STATE_HALF_OPEN
|
|
365
|
+
return self._state
|
|
366
|
+
|
|
367
|
+
@property
|
|
368
|
+
def consecutive_failures(self) -> int:
|
|
369
|
+
return self._consecutive_failures
|
|
370
|
+
|
|
371
|
+
def record_success(self) -> None:
|
|
372
|
+
"""Record a successful call. Resets failure count and closes the breaker."""
|
|
373
|
+
self._consecutive_failures = 0
|
|
374
|
+
self._state = self.STATE_CLOSED
|
|
375
|
+
|
|
376
|
+
def record_failure(self, error: str = "") -> None:
|
|
377
|
+
"""Record a failed call. May trip the breaker."""
|
|
378
|
+
self._consecutive_failures += 1
|
|
379
|
+
self._last_failure_time = time.monotonic()
|
|
380
|
+
self._failure_log.append({
|
|
381
|
+
"error": error,
|
|
382
|
+
"count": self._consecutive_failures,
|
|
383
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
384
|
+
})
|
|
385
|
+
|
|
386
|
+
if self._consecutive_failures >= self.failure_threshold:
|
|
387
|
+
self._state = self.STATE_OPEN
|
|
388
|
+
logger.warning(
|
|
389
|
+
"ChainCircuitBreaker: tripped after %d consecutive failures",
|
|
390
|
+
self._consecutive_failures,
|
|
391
|
+
)
|
|
392
|
+
if self.notify_fn:
|
|
393
|
+
try:
|
|
394
|
+
self.notify_fn(error, self._consecutive_failures)
|
|
395
|
+
except Exception as exc:
|
|
396
|
+
logger.error("ChainCircuitBreaker: notify_fn failed: %s", exc)
|
|
397
|
+
|
|
398
|
+
def allow_call(self) -> bool:
|
|
399
|
+
"""Check if a call is currently allowed."""
|
|
400
|
+
current = self.state # triggers timeout check
|
|
401
|
+
if current == self.STATE_CLOSED:
|
|
402
|
+
return True
|
|
403
|
+
if current == self.STATE_HALF_OPEN:
|
|
404
|
+
return True # Allow one probe
|
|
405
|
+
return False
|
|
406
|
+
|
|
407
|
+
def execute(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
|
|
408
|
+
"""Execute fn through the circuit breaker.
|
|
409
|
+
|
|
410
|
+
Raises CircuitBreakerOpen if the breaker is open.
|
|
411
|
+
Records success/failure automatically.
|
|
412
|
+
"""
|
|
413
|
+
if not self.allow_call():
|
|
414
|
+
raise CircuitBreakerOpen(
|
|
415
|
+
f"Circuit breaker is open after {self._consecutive_failures} failures. "
|
|
416
|
+
f"Recovery in {self.recovery_timeout - (time.monotonic() - self._last_failure_time):.0f}s."
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
try:
|
|
420
|
+
result = fn(*args, **kwargs)
|
|
421
|
+
self.record_success()
|
|
422
|
+
return result
|
|
423
|
+
except Exception as exc:
|
|
424
|
+
self.record_failure(str(exc))
|
|
425
|
+
raise
|
|
426
|
+
|
|
427
|
+
def reset(self) -> None:
|
|
428
|
+
"""Manually reset the breaker to closed state."""
|
|
429
|
+
self._state = self.STATE_CLOSED
|
|
430
|
+
self._consecutive_failures = 0
|
|
431
|
+
self._failure_log.clear()
|
|
432
|
+
|
|
433
|
+
@property
|
|
434
|
+
def failure_log(self) -> List[Dict[str, Any]]:
|
|
435
|
+
return list(self._failure_log)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
class CircuitBreakerOpen(Exception):
|
|
439
|
+
"""Raised when a call is attempted on an open circuit breaker."""
|
|
440
|
+
pass
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
# ── GovernanceHardeningConfig ───────────────────────────────────────────
|
|
444
|
+
|
|
445
|
+
class GovernanceHardeningConfig:
|
|
446
|
+
"""Central configuration for governance hardening.
|
|
447
|
+
|
|
448
|
+
Opt-in: when not configured, all components return passthrough behavior.
|
|
449
|
+
Wire into loop_engine.run_governed_iteration() to enable hardening.
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
def __init__(
|
|
453
|
+
self,
|
|
454
|
+
resilient_caller: Optional[ResilientToolCaller] = None,
|
|
455
|
+
approval_flow: Optional[ApprovalFlow] = None,
|
|
456
|
+
debouncer: Optional[TriggerDebouncer] = None,
|
|
457
|
+
circuit_breaker: Optional[ChainCircuitBreaker] = None,
|
|
458
|
+
):
|
|
459
|
+
self.resilient_caller = resilient_caller
|
|
460
|
+
self.approval_flow = approval_flow
|
|
461
|
+
self.debouncer = debouncer
|
|
462
|
+
self.circuit_breaker = circuit_breaker
|
|
463
|
+
|
|
464
|
+
@classmethod
|
|
465
|
+
def from_dict(cls, config: Dict[str, Any]) -> "GovernanceHardeningConfig":
|
|
466
|
+
"""Create from a config dict (e.g., loaded from settings.json)."""
|
|
467
|
+
hardening = config.get("governance_hardening", {})
|
|
468
|
+
if not hardening.get("enabled", False):
|
|
469
|
+
return cls() # All None = passthrough
|
|
470
|
+
|
|
471
|
+
rc_cfg = hardening.get("resilient_caller", {})
|
|
472
|
+
af_cfg = hardening.get("approval_flow", {})
|
|
473
|
+
db_cfg = hardening.get("debouncer", {})
|
|
474
|
+
cb_cfg = hardening.get("circuit_breaker", {})
|
|
475
|
+
|
|
476
|
+
resilient_caller = ResilientToolCaller(
|
|
477
|
+
max_retries=rc_cfg.get("max_retries", 3),
|
|
478
|
+
base_delay=rc_cfg.get("base_delay", 1.0),
|
|
479
|
+
max_delay=rc_cfg.get("max_delay", 30.0),
|
|
480
|
+
timeout=rc_cfg.get("timeout", 60.0),
|
|
481
|
+
) if rc_cfg.get("enabled", True) else None
|
|
482
|
+
|
|
483
|
+
debouncer = TriggerDebouncer(
|
|
484
|
+
default_cooldown=db_cfg.get("default_cooldown", 300),
|
|
485
|
+
tool_cooldowns=db_cfg.get("tool_cooldowns", {}),
|
|
486
|
+
max_calls_per_hour=db_cfg.get("max_calls_per_hour", 5),
|
|
487
|
+
) if db_cfg.get("enabled", True) else None
|
|
488
|
+
|
|
489
|
+
circuit_breaker = ChainCircuitBreaker(
|
|
490
|
+
failure_threshold=cb_cfg.get("failure_threshold", 3),
|
|
491
|
+
recovery_timeout=cb_cfg.get("recovery_timeout", 300),
|
|
492
|
+
) if cb_cfg.get("enabled", True) else None
|
|
493
|
+
|
|
494
|
+
# ApprovalFlow needs send/poll functions — created without them here;
|
|
495
|
+
# caller should inject the actual functions after construction.
|
|
496
|
+
approval_flow = ApprovalFlow(
|
|
497
|
+
timeout_seconds=af_cfg.get("timeout_seconds", 86400),
|
|
498
|
+
poll_interval=af_cfg.get("poll_interval", 300),
|
|
499
|
+
default_action=af_cfg.get("default_action", "reject"),
|
|
500
|
+
) if af_cfg.get("enabled", False) else None
|
|
501
|
+
|
|
502
|
+
return cls(
|
|
503
|
+
resilient_caller=resilient_caller,
|
|
504
|
+
approval_flow=approval_flow,
|
|
505
|
+
debouncer=debouncer,
|
|
506
|
+
circuit_breaker=circuit_breaker,
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
def is_active(self) -> bool:
|
|
510
|
+
"""True if any hardening component is configured."""
|
|
511
|
+
return any([
|
|
512
|
+
self.resilient_caller,
|
|
513
|
+
self.approval_flow,
|
|
514
|
+
self.debouncer,
|
|
515
|
+
self.circuit_breaker,
|
|
516
|
+
])
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
# ── Integration helper for loop_engine ──────────────────────────────────
|
|
520
|
+
|
|
521
|
+
def hardened_dispatch(
|
|
522
|
+
config: GovernanceHardeningConfig,
|
|
523
|
+
dispatch_fn: Callable[..., Any],
|
|
524
|
+
tool_name: str = "dispatch_task",
|
|
525
|
+
**kwargs: Any,
|
|
526
|
+
) -> Any:
|
|
527
|
+
"""Run a dispatch function through the full hardening stack.
|
|
528
|
+
|
|
529
|
+
Order: debouncer -> circuit breaker -> resilient caller -> dispatch_fn
|
|
530
|
+
If any layer is not configured, it is skipped (passthrough).
|
|
531
|
+
"""
|
|
532
|
+
# 1. Debouncer gate
|
|
533
|
+
if config.debouncer:
|
|
534
|
+
if not config.debouncer.can_fire(tool_name):
|
|
535
|
+
remaining = config.debouncer.time_until_allowed(tool_name)
|
|
536
|
+
return {
|
|
537
|
+
"status": "debounced",
|
|
538
|
+
"tool": tool_name,
|
|
539
|
+
"retry_in_seconds": round(remaining, 1),
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
# 2. Circuit breaker gate
|
|
543
|
+
if config.circuit_breaker:
|
|
544
|
+
if not config.circuit_breaker.allow_call():
|
|
545
|
+
return {
|
|
546
|
+
"status": "circuit_open",
|
|
547
|
+
"tool": tool_name,
|
|
548
|
+
"consecutive_failures": config.circuit_breaker.consecutive_failures,
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
# 3. Execute with resilient caller (or directly)
|
|
552
|
+
try:
|
|
553
|
+
if config.resilient_caller:
|
|
554
|
+
result = config.resilient_caller.call(dispatch_fn, **kwargs)
|
|
555
|
+
else:
|
|
556
|
+
result = dispatch_fn(**kwargs)
|
|
557
|
+
|
|
558
|
+
# Record success
|
|
559
|
+
if config.circuit_breaker:
|
|
560
|
+
config.circuit_breaker.record_success()
|
|
561
|
+
if config.debouncer:
|
|
562
|
+
config.debouncer.record_call(tool_name)
|
|
563
|
+
|
|
564
|
+
return result
|
|
565
|
+
|
|
566
|
+
except Exception as exc:
|
|
567
|
+
if config.circuit_breaker:
|
|
568
|
+
config.circuit_breaker.record_failure(str(exc))
|
|
569
|
+
raise
|