sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sliceagent/__init__.py +3 -0
  2. sliceagent/__main__.py +6 -0
  3. sliceagent/access.py +93 -0
  4. sliceagent/agents.py +173 -0
  5. sliceagent/background_review.py +146 -0
  6. sliceagent/binsniff.py +89 -0
  7. sliceagent/cli.py +890 -0
  8. sliceagent/clock.py +32 -0
  9. sliceagent/code_grep.py +329 -0
  10. sliceagent/code_index.py +417 -0
  11. sliceagent/config.py +240 -0
  12. sliceagent/context_overflow.py +227 -0
  13. sliceagent/envspec.py +129 -0
  14. sliceagent/errors.py +167 -0
  15. sliceagent/events.py +96 -0
  16. sliceagent/finding_types.py +70 -0
  17. sliceagent/flags.py +63 -0
  18. sliceagent/fuzzy.py +135 -0
  19. sliceagent/guardrails.py +438 -0
  20. sliceagent/guidance.py +69 -0
  21. sliceagent/hippocampus.py +581 -0
  22. sliceagent/hooks.py +334 -0
  23. sliceagent/interfaces.py +144 -0
  24. sliceagent/llm.py +695 -0
  25. sliceagent/loop.py +548 -0
  26. sliceagent/mcp_client.py +255 -0
  27. sliceagent/mcp_security.py +77 -0
  28. sliceagent/memory.py +428 -0
  29. sliceagent/metrics.py +103 -0
  30. sliceagent/model_catalog.py +124 -0
  31. sliceagent/monitor.py +615 -0
  32. sliceagent/neocortex.py +436 -0
  33. sliceagent/onboarding.py +323 -0
  34. sliceagent/oracle.py +36 -0
  35. sliceagent/pagetable.py +255 -0
  36. sliceagent/pfc.py +449 -0
  37. sliceagent/plugins.py +127 -0
  38. sliceagent/policy.py +234 -0
  39. sliceagent/procman.py +187 -0
  40. sliceagent/prompt.py +239 -0
  41. sliceagent/records.py +108 -0
  42. sliceagent/recovery.py +119 -0
  43. sliceagent/regions.py +678 -0
  44. sliceagent/registry.py +128 -0
  45. sliceagent/retriever.py +19 -0
  46. sliceagent/safety.py +332 -0
  47. sliceagent/sandbox.py +143 -0
  48. sliceagent/scheduler.py +92 -0
  49. sliceagent/search_index.py +289 -0
  50. sliceagent/seed.py +465 -0
  51. sliceagent/sensory_cortex.py +500 -0
  52. sliceagent/session.py +222 -0
  53. sliceagent/skill_provenance.py +71 -0
  54. sliceagent/skill_usage.py +123 -0
  55. sliceagent/skills.py +209 -0
  56. sliceagent/subagent.py +332 -0
  57. sliceagent/subdir_hints.py +222 -0
  58. sliceagent/swap.py +182 -0
  59. sliceagent/taskstate.py +57 -0
  60. sliceagent/telemetry.py +59 -0
  61. sliceagent/terminal.py +240 -0
  62. sliceagent/text_utils.py +56 -0
  63. sliceagent/tool_summary.py +93 -0
  64. sliceagent/tools.py +1194 -0
  65. sliceagent/tui.py +1377 -0
  66. sliceagent/web.py +354 -0
  67. sliceagent-0.1.0.dist-info/METADATA +262 -0
  68. sliceagent-0.1.0.dist-info/RECORD +71 -0
  69. sliceagent-0.1.0.dist-info/WHEEL +4 -0
  70. sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
  71. sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,227 @@
1
+ """Context-overflow classification — pure stdlib, never imports openai.
2
+
3
+ Provides:
4
+ - the `_CONTEXT_OVERFLOW_PATTERNS` message table
5
+ - the cause-walk `_extract_status_code` (max depth 5)
6
+ - the 400 / 413 / `context_length_exceeded` overflow rules
7
+
8
+ The moat: sliceagent always has a slice it can TIGHTEN, so there is NO
9
+ session-size / token-count heuristic here — a generic-400 + large-session
10
+ proxy is deliberately avoided. Overflow is decided purely from the
11
+ error's text and HTTP status.
12
+
13
+ Public surface (pinned in adopt_plan.md sec 1):
14
+ class ContextOverflow(Exception)
15
+ def is_context_overflow(error: Exception) -> bool
16
+ def classify(error: Exception) -> dict # {retryable, is_context_overflow, status}
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import Optional
22
+
23
+ # ── Pattern table ───────────────────────────────────────────────────────────
24
+ # Matched against str(error).lower().
25
+ _CONTEXT_OVERFLOW_PATTERNS = (
26
+ "context length",
27
+ "context size",
28
+ "maximum context",
29
+ "token limit",
30
+ "too many tokens",
31
+ "reduce the length",
32
+ "exceeds the limit",
33
+ "context window",
34
+ "prompt is too long",
35
+ "prompt exceeds max length",
36
+ "maximum number of tokens",
37
+ # vLLM / local inference server patterns
38
+ "exceeds the max_model_len",
39
+ "max_model_len",
40
+ "prompt length", # "engine prompt length X exceeds"
41
+ "input is too long",
42
+ "maximum model length",
43
+ # Ollama patterns
44
+ "context length exceeded",
45
+ "truncating input",
46
+ # llama.cpp / llama-server patterns
47
+ "slot context", # "slot context: N tokens, prompt N tokens"
48
+ "n_ctx_slot",
49
+ # Chinese error messages (some providers return these)
50
+ "超过最大长度",
51
+ "上下文长度",
52
+ # AWS Bedrock Converse API error patterns
53
+ "input is too long",
54
+ "max input token",
55
+ "exceeds the maximum number of input tokens",
56
+ # NOTE: bare "input token" was removed — it matched OpenAI's TPM rate-limit text
57
+ # ("Limit: 30000 input tokens per minute"), misclassifying a 429 as a hard overflow.
58
+ )
59
+
60
+ # Structured error codes that unambiguously mean overflow.
61
+ _CONTEXT_OVERFLOW_CODES = frozenset({
62
+ "context_length_exceeded",
63
+ "max_tokens_exceeded",
64
+ })
65
+
66
+ # Payload-too-large message patterns — a 413 surfaced in the message text
67
+ # when no status_code attr is present.
68
+ _PAYLOAD_TOO_LARGE_PATTERNS = (
69
+ "request entity too large",
70
+ "payload too large",
71
+ "error code: 413",
72
+ )
73
+
74
+
75
+ # A parameter / validation error (e.g. "unsupported parameter 'max_tokens'") is NOT a context
76
+ # overflow even though it may name a token param — reading it as overflow would wrongly trigger the
77
+ # slice-tighten/rebuild loop. Root-cause guard: exclude param errors regardless of which param.
78
+ # (Kept SPECIFIC: a real OpenAI overflow is type invalid_request_error / code context_length_exceeded,
79
+ # so we must NOT exclude on those — only on explicit "unsupported/invalid parameter" wording.)
80
+ _NOT_OVERFLOW_MARKERS = (
81
+ "unsupported parameter",
82
+ "unsupported_parameter",
83
+ "is not supported with this model",
84
+ "unknown parameter",
85
+ "invalid parameter",
86
+ "parameter is invalid", # "the prompt length parameter is invalid" — a validation error, NOT overflow
87
+ "invalid value",
88
+ "invalid input token", # "invalid input token format" — not a context-size overflow
89
+ )
90
+
91
+
92
+ def _error_text(error: Exception) -> str:
93
+ """Lowercased message text for pattern matching."""
94
+ return str(error).lower()
95
+
96
+
97
+ def _extract_status_code(error: Exception) -> Optional[int]:
98
+ """Walk the error and its cause chain to find an HTTP status code.
99
+
100
+ Max depth 5 to bound the walk. Checks `.status_code` (int) then `.status`
101
+ (sane int) on each node, following `__cause__`/`__context__`.
102
+ """
103
+ current: Optional[BaseException] = error
104
+ for _ in range(5): # max depth to prevent infinite loops
105
+ if current is None:
106
+ break
107
+ code = getattr(current, "status_code", None)
108
+ if isinstance(code, int):
109
+ return code
110
+ # Some SDKs use .status instead of .status_code
111
+ code = getattr(current, "status", None)
112
+ if isinstance(code, int) and 100 <= code < 600:
113
+ return code
114
+ # Walk cause chain
115
+ cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
116
+ if cause is None or cause is current:
117
+ break
118
+ current = cause
119
+ return None
120
+
121
+
122
+ def _extract_error_code(error: Exception) -> str:
123
+ """Best-effort structured error-code string from a `.code`/`.body` attr.
124
+
125
+ Defensive and pure-stdlib: never imports an SDK, never raises. Returns ''
126
+ when no usable code is found.
127
+ """
128
+ # Direct attribute (many SDK exceptions expose .code)
129
+ code = getattr(error, "code", None)
130
+ if isinstance(code, str) and code.strip():
131
+ return code.strip().lower()
132
+
133
+ body = getattr(error, "body", None)
134
+ if isinstance(body, dict):
135
+ err_obj = body.get("error")
136
+ if isinstance(err_obj, dict):
137
+ nested = err_obj.get("code") or err_obj.get("type") or ""
138
+ if isinstance(nested, str) and nested.strip():
139
+ return nested.strip().lower()
140
+ top = body.get("code") or body.get("error_code") or ""
141
+ if isinstance(top, str) and top.strip():
142
+ return top.strip().lower()
143
+ return ""
144
+
145
+
146
+ def is_context_overflow(error: Exception) -> bool:
147
+ """True when `error` is (or wraps) a context-overflow signal.
148
+
149
+ An overflow is recognised when ANY of the following hold:
150
+ - the lowercased message matches `_CONTEXT_OVERFLOW_PATTERNS`;
151
+ - a structured error code is `context_length_exceeded`/`max_tokens_exceeded`;
152
+ - the cause chain carries HTTP 413 (payload-too-large => compress);
153
+ - a 413 phrased in the message text (`_PAYLOAD_TOO_LARGE_PATTERNS`).
154
+
155
+ A bare 400/404 is NOT treated as overflow unless its TEXT matches the
156
+ overflow table — many non-overflow errors also use 400/404, and sliceagent
157
+ uses no session-size proxy (we always have a slice to tighten).
158
+ """
159
+ msg = _error_text(error)
160
+ if "rate limit" in msg or "too many requests" in msg or "tokens per min" in msg:
161
+ return False # a TPM/RPM RATE LIMIT (429) must back off + retry, NOT trigger slice-destroying overflow handling
162
+ if any(m in msg for m in _NOT_OVERFLOW_MARKERS):
163
+ return False # a parameter/validation error is never a context overflow
164
+ if any(p in msg for p in _CONTEXT_OVERFLOW_PATTERNS):
165
+ return True
166
+ if any(p in msg for p in _PAYLOAD_TOO_LARGE_PATTERNS):
167
+ return True
168
+
169
+ code = _extract_error_code(error)
170
+ if code in _CONTEXT_OVERFLOW_CODES:
171
+ return True
172
+
173
+ status = _extract_status_code(error)
174
+ if status == 413:
175
+ return True
176
+
177
+ return False
178
+
179
+
180
+ def classify(error: Exception) -> dict:
181
+ """Classify `error` for the retry/rebuild loop.
182
+
183
+ Returns `{retryable, is_context_overflow, status}`:
184
+ - `is_context_overflow`: result of `is_context_overflow(error)`;
185
+ - `status`: HTTP status from the cause-walk, or None;
186
+ - `retryable`: True for transient transport errors (5xx, 408, 429) and
187
+ for timeout/connection wording; False for context overflow (the slice
188
+ must be TIGHTENED, not blindly retried) and for non-transient 4xx.
189
+
190
+ Overflow is intentionally `retryable=False` here: the caller rebuilds a
191
+ smaller slice (see W5 errors.classify glue / loop overflow-rebuild loop)
192
+ rather than re-sending the identical oversized request.
193
+ """
194
+ overflow = is_context_overflow(error)
195
+ status = _extract_status_code(error)
196
+
197
+ if overflow:
198
+ return {"retryable": False, "is_context_overflow": True, "status": status}
199
+
200
+ msg = _error_text(error)
201
+ retryable = False
202
+ if status is not None:
203
+ # 5xx server errors + 408 request-timeout + 429 rate-limit are transient.
204
+ if status >= 500 or status in (408, 429):
205
+ retryable = True
206
+ else:
207
+ # No status code: fall back to transient transport wording.
208
+ if any(
209
+ p in msg
210
+ for p in ("timeout", "timed out", "connection", "temporarily unavailable")
211
+ ):
212
+ retryable = True
213
+
214
+ return {"retryable": retryable, "is_context_overflow": False, "status": status}
215
+
216
+
217
+ class ContextOverflow(Exception):
218
+ """Raised by the LLM adapter when a request overflows the context window.
219
+
220
+ Carries the original provider error so the retry/rebuild loop can inspect
221
+ it. `status_code` is the HTTP status if one was extractable, else None.
222
+ """
223
+
224
+ def __init__(self, original: Exception, *, status_code: Optional[int] = None):
225
+ self.original = original
226
+ self.status_code = status_code
227
+ super().__init__(str(original))
sliceagent/envspec.py ADDED
@@ -0,0 +1,129 @@
1
+ """Single source of truth for every sliceagent environment variable.
2
+
3
+ Before this, 28 env vars were scattered across llm.py / cli.py / config.py / hooks.py with no discovery and
4
+ no validation (a typo'd AGENT_POLICY silently used the default). This module centralizes them so:
5
+ * `sliceagent config --list` can show every knob, its group, default, and current value;
6
+ * `validate_env()` warns on a misspelled enum value at startup instead of silently defaulting;
7
+ * a coverage test asserts no AGENT_*/LLM_*/SLICEAGENT_* var is read in the code without being documented here.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ from dataclasses import dataclass
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class EnvVar:
17
+ name: str
18
+ group: str
19
+ desc: str
20
+ default: str = ""
21
+ choices: tuple = () # if set AND validate=True, an out-of-set value warns at startup
22
+ secret: bool = False # value is masked in `config --list`
23
+ validate: bool = False # run startup validation against `choices`
24
+ aliases: tuple = () # extra accepted values not shown as the canonical choice set
25
+
26
+
27
+ REGISTRY: list[EnvVar] = [
28
+ # ── agent behaviour ───────────────────────────────────────────────────────────────────────
29
+ EnvVar("AGENT_MODEL", "agent", "LLM model id to drive the agent. REQUIRED — no default; set it here "
30
+ "or pick a provider+model via `sliceagent init`.", ""),
31
+ EnvVar("AGENT_MODEL_FALLBACK", "agent", "Larger-context model to switch to ONCE if the context overflows "
32
+ "even after compaction (secondary net; the bounded slice is the primary).", ""),
33
+ EnvVar("AGENT_PROVIDER", "agent", "Default provider id to use from the config's [providers.<id>] tables "
34
+ "(overrides [agent].default_provider).", ""),
35
+ EnvVar("AGENT_POLICY", "agent", "Permission mode: baby-sitter (confirm all) | teenager (auto edits, "
36
+ "confirm commands) | let-it-go (auto, blocks catastrophic). All block catastrophic moves.",
37
+ "teenager", choices=("baby-sitter", "teenager", "let-it-go"),
38
+ aliases=("guard", "allow", "readonly", "ask", "babysitter", "teen", "letgo", "letitgo", "yolo", "baby"),
39
+ validate=True),
40
+ EnvVar("AGENT_ROUTER", "agent", "Topic router: lexical (instant, no LLM) or llm (classifier round-trip).",
41
+ "lexical", choices=("lexical", "llm"), validate=True),
42
+ EnvVar("AGENT_REASONING", "agent", "Reasoning effort: full=provider default, fast=minimal, high/max=more.",
43
+ "full", choices=("full", "fast", "high", "max"), validate=True),
44
+ EnvVar("AGENT_THINKING", "agent", "Set to 'off' to disable reasoning (alias for AGENT_REASONING=fast).", ""),
45
+ EnvVar("AGENT_MINE", "agent", "Lesson-mining mode for end-of-session consolidation.", "deterministic"),
46
+ EnvVar("AGENT_SUBAGENT_DEPTH", "agent", "Max delegation depth for spawn_subagent/spawn_explore (0=off).", "1"),
47
+ EnvVar("AGENT_EXPLORER_REASONING", "agent", "Reasoning effort for read-only explorer children.", "fast"),
48
+ EnvVar("AGENT_AUTO_APPROVE", "agent", "Comma-separated globs of pre-approved safe commands (skip prompt).", ""),
49
+ EnvVar("AGENT_VERIFY_CMD", "agent", "Oracle verify command run after a turn (e.g. 'pytest -q').", ""),
50
+ EnvVar("AGENT_MAX_TOKENS", "agent", "Per-session token budget (parks the turn when exhausted).", ""),
51
+ EnvVar("AGENT_COMPLETION_TOKENS", "agent", "Per-REQUEST completion cap (max output tokens); distinct from the AGENT_MAX_TOKENS turn budget.", "8192"),
52
+ EnvVar("AGENT_MAX_STEPS", "agent", "Per-turn step ceiling (runaway backstop); raise for deep analysis.", "60"),
53
+ EnvVar("AGENT_SELFCHECK_MAX", "agent", "Max grounded done-gate verification rounds before accepting 'done'.", "3"),
54
+ EnvVar("AGENT_TOOL_TIMEOUT", "agent", "Per-tool wall-clock deadline in seconds (0/unset = off).", ""),
55
+ EnvVar("AGENT_ROOT", "agent", "Workspace root override (defaults to the current directory).", ""),
56
+ EnvVar("AGENT_ALLOW_PLUGINS", "agent", "Set truthy to load project/user plugins.", ""),
57
+ EnvVar("AGENT_SANDBOX", "agent", "Tool sandbox backend.", "local", choices=("local", "docker"), validate=True),
58
+ EnvVar("AGENT_WEB", "agent", "Enable the web tools (fetch_url + web_search, DuckDuckGo, no key); set "
59
+ "0/off to disable network egress from the agent.", "1"),
60
+ # ── provider / network ────────────────────────────────────────────────────────────────────
61
+ EnvVar("LLM_API_KEY", "provider", "API key for the LLM provider (REQUIRED).", "", secret=True),
62
+ EnvVar("LLM_BASE_URL", "provider", "OpenAI-compatible endpoint (e.g. https://api.moonshot.cn/v1).", ""),
63
+ EnvVar("LLM_TIMEOUT", "provider", "Per-request timeout in seconds.", ""),
64
+ EnvVar("LLM_TIMEOUT_SEC", "provider", "Alias for LLM_TIMEOUT.", ""),
65
+ EnvVar("AGENT_PROXY", "provider", "HTTP proxy URL for LLM calls; 'none'/'off' forces direct. Unset = direct (no proxy).", ""),
66
+ EnvVar("OPENAI_API_KEY", "provider", "Legacy alias for LLM_API_KEY.", "", secret=True),
67
+ EnvVar("MOONSHOT_API_KEY", "provider", "Legacy alias for LLM_API_KEY (Moonshot).", "", secret=True),
68
+ EnvVar("OPENAI_BASE_URL", "provider", "Legacy alias for LLM_BASE_URL.", ""),
69
+ # ── UI ────────────────────────────────────────────────────────────────────────────────────
70
+ EnvVar("AGENT_TUI", "ui", "UI mode: rich (default inline), live (pinned box), off (plain).",
71
+ "rich", choices=("rich", "live", "off"),
72
+ aliases=("1", "on", "true", "yes", "0", "false", "no")),
73
+ EnvVar("AGENT_SPINNER", "ui", "Animated in-place status spinner during a turn (a Rich live region). "
74
+ "Set off to drop just the spinner; all other Rich formatting stays.",
75
+ "on", choices=("on", "off"), aliases=("1", "true", "yes", "0", "false", "no")),
76
+ EnvVar("SHOW_SLICE", "ui", "Set truthy to print the rebuilt slice each turn (debug view).", ""),
77
+ # ── memory ────────────────────────────────────────────────────────────────────────────────
78
+ EnvVar("SLICEAGENT_VAULT", "memory", "sliceagent's STATE vault (episodic cache + task-state records).", ""),
79
+ EnvVar("MEMEM_VAULT", "memory", "memem's lesson vault (markdown long-term memories), if memem is installed.", ""),
80
+ EnvVar("SLICEAGENT_SKILLS_DIR", "memory", "Extra directory to discover skills from.", ""),
81
+ EnvVar("AGENT_BACKGROUND_REVIEW", "agent", "Set truthy to run an off-thread reviewer that consolidates "
82
+ "lessons after each turn.", ""),
83
+ EnvVar("SLICEAGENT_CACHE_DIR", "memory", "Directory for the episodic cache / durable log.", ""),
84
+ EnvVar("AGENT_EXPERIMENTAL_ALL", "debug", "Master switch: set truthy to enable ALL experimental flags "
85
+ "(per-flag AGENT_EXPERIMENTAL_<ID> overrides).", ""),
86
+ # ── monitoring / debug ────────────────────────────────────────────────────────────────────
87
+ EnvVar("AGENT_METRICS", "monitor", "Set truthy to print per-turn fresh-token (moat) metrics at exit.", ""),
88
+ EnvVar("AGENT_TIMING", "monitor", "Set truthy to print a per-turn latency breakdown (slice build vs model).", ""),
89
+ EnvVar("AGENT_MONITOR", "monitor", "Set truthy to enable the live monitor server.", ""),
90
+ EnvVar("AGENT_MONITOR_PORT", "monitor", "Port for the monitor server.", ""),
91
+ EnvVar("SLICEAGENT_MONITOR_DIR", "monitor", "Directory the monitor writes slice snapshots to.", ""),
92
+ EnvVar("SLICEAGENT_DEBUG_TRACE", "debug", "Set truthy to print tracebacks for parked/hook errors.", ""),
93
+ EnvVar("SLICEAGENT_NO_CLOSURE", "debug", "Debug flag: disable the turn closeout call.", ""),
94
+ EnvVar("SLICEAGENT_PROMPT_FILE", "debug", "A/B experiment seam: path to a full SYSTEM_PROMPT template "
95
+ "(must keep the {{MEMORY_MODEL}} marker) to override the prompt for a measurement run "
96
+ "(evals/prompt_ab). Unset → the production prompt.", ""),
97
+ ]
98
+
99
+ BY_NAME: dict[str, EnvVar] = {e.name: e for e in REGISTRY}
100
+ GROUPS = ("agent", "provider", "ui", "memory", "monitor", "debug")
101
+
102
+
103
+ def validate_env(env: dict | None = None) -> list[str]:
104
+ """Return a list of human-readable warnings for any enum var set to an out-of-set value. Non-fatal:
105
+ the caller prints them and continues on defaults (mature CLIs validate; they don't silently misbehave)."""
106
+ env = env if env is not None else os.environ
107
+ warnings = []
108
+ for e in REGISTRY:
109
+ if not (e.validate and e.choices):
110
+ continue
111
+ raw = env.get(e.name)
112
+ if raw is None or raw == "":
113
+ continue
114
+ if raw.strip().lower() not in {c.lower() for c in e.choices} | {a.lower() for a in e.aliases}:
115
+ warnings.append(f"{e.name}={raw!r} is not one of {{{', '.join(e.choices)}}} — using default "
116
+ f"{e.default!r}")
117
+ return warnings
118
+
119
+
120
+ def current_value(name: str, env: dict | None = None) -> str:
121
+ """The effective value of a var for display, masked if it is a secret."""
122
+ env = env if env is not None else os.environ
123
+ e = BY_NAME.get(name)
124
+ raw = env.get(name)
125
+ if raw is None:
126
+ return ""
127
+ if e and e.secret and raw:
128
+ return f"*** ({len(raw)} chars)" # never reveal any of a secret (not even the 'sk-' prefix)
129
+ return raw
sliceagent/errors.py ADDED
@@ -0,0 +1,167 @@
1
+ """Error classification + retry.
2
+
3
+ `classify` maps an exception to structured recovery hints; `with_retry` does
4
+ abort-aware jittered backoff on retryable errors.
5
+
6
+ The backoff is jittered: a
7
+ lock-guarded monotonic counter seeds a per-call RNG so concurrent sessions
8
+ hitting the same rate-limited provider don't all retry at the same instant
9
+ (decorrelated uniform jitter).
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import random
14
+ import threading
15
+ import time
16
+ from enum import Enum
17
+ from typing import Callable
18
+
19
+ from .context_overflow import is_context_overflow
20
+ from .events import ApiRetry, Dispatcher
21
+
22
+
23
+ class ErrorKind(str, Enum):
24
+ """Typed failure taxonomy. str-based so it stays
25
+ backward-compatible: `classify(e)["kind"] == "rate_limit"` still works, and it's JSON-serializable for
26
+ telemetry. ALL members let the metrics layer pre-register a counter per kind."""
27
+ CONTEXT_OVERFLOW = "context_overflow"
28
+ AUTH = "auth"
29
+ EMPTY_RESPONSE = "empty_response"
30
+ RATE_LIMIT = "rate_limit"
31
+ SERVER = "server"
32
+ TIMEOUT = "timeout"
33
+ CONNECTION = "connection"
34
+ UNKNOWN = "unknown"
35
+
36
+
37
+ class EmptyResponseError(Exception):
38
+ """The provider returned a degenerate completion — no content AND no tool calls. Some
39
+ providers/proxies occasionally emit an empty body; returning it stalls the loop. Classified
40
+ RETRYABLE so `with_retry` re-rolls instead."""
41
+
42
+
43
+ # Monotonic counter for jitter-seed uniqueness within the same process.
44
+ # Lock-guarded to avoid races in concurrent retry paths.
45
+ _jitter_counter = 0
46
+ _jitter_lock = threading.Lock()
47
+
48
+
49
+ def jittered_backoff(
50
+ attempt: int,
51
+ *,
52
+ base_delay: float = 0.5,
53
+ max_delay: float = 5.0,
54
+ jitter_ratio: float = 0.5,
55
+ ) -> float:
56
+ """Compute a jittered exponential backoff delay.
57
+
58
+ Args:
59
+ attempt: 1-based retry attempt number.
60
+ base_delay: Base delay in seconds for attempt 1.
61
+ max_delay: Maximum delay cap in seconds.
62
+ jitter_ratio: Fraction of the computed delay to use as random jitter
63
+ range. 0.5 means jitter is uniform in [0, 0.5 * delay].
64
+
65
+ Returns:
66
+ Delay in seconds: min(base * 2^(attempt-1), max_delay) + jitter.
67
+
68
+ The jitter decorrelates concurrent retries so multiple sessions hitting the
69
+ same provider don't all retry at the same instant.
70
+ """
71
+ global _jitter_counter
72
+ with _jitter_lock:
73
+ _jitter_counter += 1
74
+ tick = _jitter_counter
75
+
76
+ exponent = max(0, attempt - 1)
77
+ if exponent >= 63 or base_delay <= 0:
78
+ delay = max_delay
79
+ else:
80
+ delay = min(base_delay * (2 ** exponent), max_delay)
81
+
82
+ # Seed from time + counter for decorrelation even with coarse clocks.
83
+ seed = (time.time_ns() ^ (tick * 0x9E3779B9)) & 0xFFFFFFFF
84
+ rng = random.Random(seed)
85
+ jitter = rng.uniform(0, jitter_ratio * delay)
86
+
87
+ return delay + jitter
88
+
89
+
90
+ def classify(error: Exception) -> dict:
91
+ msg = str(error).lower()
92
+ status = getattr(error, "status_code", None) or getattr(error, "status", None)
93
+ overflow = is_context_overflow(error)
94
+ empty = isinstance(error, EmptyResponseError)
95
+ retryable = False
96
+ if status == 429 or "rate limit" in msg or "too many requests" in msg or "overloaded" in msg or "503" in msg:
97
+ retryable = True
98
+ if isinstance(status, int) and 500 <= status < 600:
99
+ retryable = True
100
+ if "timeout" in msg or "timed out" in msg or "connection error" in msg or "econn" in msg:
101
+ retryable = True
102
+ if empty:
103
+ retryable = True # degenerate empty completion — re-roll
104
+ if status in (401, 403):
105
+ retryable = False # auth — never retry
106
+ if overflow:
107
+ retryable = False # tighten the slice, don't blindly re-send the oversized request
108
+ # Bucket the failure for telemetry (orthogonal to `retryable`; lets the metrics layer count
109
+ # rate-limit vs timeout vs overflow vs empty).
110
+ if overflow:
111
+ kind = ErrorKind.CONTEXT_OVERFLOW
112
+ elif status in (401, 403):
113
+ kind = ErrorKind.AUTH
114
+ elif empty:
115
+ kind = ErrorKind.EMPTY_RESPONSE
116
+ elif status == 429 or "rate limit" in msg or "too many requests" in msg or "overloaded" in msg:
117
+ kind = ErrorKind.RATE_LIMIT
118
+ elif (isinstance(status, int) and 500 <= status < 600) or "503" in msg:
119
+ kind = ErrorKind.SERVER
120
+ elif "timeout" in msg or "timed out" in msg:
121
+ kind = ErrorKind.TIMEOUT
122
+ elif "connection" in msg or "econn" in msg:
123
+ kind = ErrorKind.CONNECTION
124
+ else:
125
+ kind = ErrorKind.UNKNOWN
126
+ return {"retryable": retryable, "is_context_overflow": overflow, "status": status, "kind": kind}
127
+
128
+
129
+ def _retry_after_seconds(error: Exception) -> "float | None":
130
+ """Best-effort: a 429/503 may carry a Retry-After (SDK `.retry_after` or a response header) telling us
131
+ EXACTLY how long to wait — honor it instead of guessing. Returns seconds, or None to fall back to
132
+ backoff (incl. when Retry-After is an HTTP-date, which we don't parse). Never raises."""
133
+ try:
134
+ val = getattr(error, "retry_after", None)
135
+ if val is None:
136
+ hdrs = getattr(getattr(error, "response", None), "headers", None)
137
+ if hdrs is not None:
138
+ val = hdrs.get("retry-after") or hdrs.get("Retry-After")
139
+ if val is None:
140
+ return None
141
+ secs = float(val)
142
+ return secs if secs >= 0 else None
143
+ except (TypeError, ValueError):
144
+ return None
145
+
146
+
147
+ def with_retry(
148
+ fn: Callable[[], object],
149
+ *,
150
+ max_attempts: int = 3,
151
+ is_retryable: Callable[[Exception], bool] | None = None,
152
+ dispatch: Dispatcher | None = None,
153
+ ):
154
+ for attempt in range(1, max_attempts + 1):
155
+ try:
156
+ return fn()
157
+ except Exception as e:
158
+ retry = is_retryable(e) if is_retryable else classify(e)["retryable"]
159
+ if not retry or attempt == max_attempts:
160
+ raise
161
+ if dispatch:
162
+ dispatch(ApiRetry(attempt=attempt, error=str(e)[:200]))
163
+ delay = jittered_backoff(attempt)
164
+ ra = _retry_after_seconds(e)
165
+ if ra is not None:
166
+ delay = max(delay, min(ra, 60.0)) # honor server Retry-After, capped so a huge value can't stall the turn
167
+ time.sleep(delay)
sliceagent/events.py ADDED
@@ -0,0 +1,96 @@
1
+ """Event system: the loop's ONLY output path.
2
+
3
+ The core never prints or writes files — it dispatches events. The host composes a
4
+ dispatcher from sinks (slice-updater, durable log, CLI/TUI, SDK). Sink failures are
5
+ contained so a frontend can't break the loop.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Callable
11
+
12
+
13
+ @dataclass
14
+ class Event:
15
+ pass
16
+
17
+
18
+ @dataclass
19
+ class StepBegin(Event):
20
+ step: int
21
+
22
+
23
+ @dataclass
24
+ class StepEnd(Event):
25
+ step: int
26
+ usage: dict
27
+ stop_reason: str
28
+
29
+
30
+ @dataclass
31
+ class SliceBuilt(Event):
32
+ rendered: str # the volatile user message this turn (for debugging/inspection)
33
+ messages: list | None = None # the FULL model-visible messages [system, user] (monitor/inspection)
34
+
35
+
36
+ @dataclass
37
+ class AssistantText(Event):
38
+ content: str
39
+
40
+
41
+ @dataclass
42
+ class ToolStarted(Event):
43
+ name: str
44
+ args: dict
45
+
46
+
47
+ @dataclass
48
+ class ToolResult(Event):
49
+ name: str
50
+ args: dict
51
+ output: str
52
+ failing: bool
53
+
54
+
55
+ @dataclass
56
+ class ApiRetry(Event):
57
+ attempt: int
58
+ error: str
59
+
60
+
61
+ @dataclass
62
+ class SliceTightened(Event):
63
+ level: int
64
+ reason: str = "context_overflow"
65
+
66
+
67
+ @dataclass
68
+ class TurnEnd(Event):
69
+ stop_reason: str
70
+ steps: int
71
+ usage: dict
72
+
73
+
74
+ @dataclass
75
+ class TurnInterrupted(Event):
76
+ reason: str # "aborted" | "max_steps" | "error"
77
+ message: str | None = None
78
+
79
+
80
+ @dataclass
81
+ class LessonSaved(Event):
82
+ title: str
83
+ content: str # the lesson mined into memem (write side of the memory loop)
84
+
85
+
86
+ Dispatcher = Callable[[Event], None]
87
+
88
+
89
+ def make_dispatcher(*sinks: Callable[[Event], None]) -> Dispatcher:
90
+ def dispatch(event: Event) -> None:
91
+ for sink in sinks:
92
+ try:
93
+ sink(event)
94
+ except Exception:
95
+ pass # a sink/listener failure must not affect the loop
96
+ return dispatch