sliceagent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sliceagent/__init__.py +3 -0
- sliceagent/__main__.py +6 -0
- sliceagent/access.py +93 -0
- sliceagent/agents.py +173 -0
- sliceagent/background_review.py +146 -0
- sliceagent/binsniff.py +89 -0
- sliceagent/cli.py +890 -0
- sliceagent/clock.py +32 -0
- sliceagent/code_grep.py +329 -0
- sliceagent/code_index.py +417 -0
- sliceagent/config.py +240 -0
- sliceagent/context_overflow.py +227 -0
- sliceagent/envspec.py +129 -0
- sliceagent/errors.py +167 -0
- sliceagent/events.py +96 -0
- sliceagent/finding_types.py +70 -0
- sliceagent/flags.py +63 -0
- sliceagent/fuzzy.py +135 -0
- sliceagent/guardrails.py +438 -0
- sliceagent/guidance.py +69 -0
- sliceagent/hippocampus.py +581 -0
- sliceagent/hooks.py +334 -0
- sliceagent/interfaces.py +144 -0
- sliceagent/llm.py +695 -0
- sliceagent/loop.py +548 -0
- sliceagent/mcp_client.py +255 -0
- sliceagent/mcp_security.py +77 -0
- sliceagent/memory.py +428 -0
- sliceagent/metrics.py +103 -0
- sliceagent/model_catalog.py +124 -0
- sliceagent/monitor.py +615 -0
- sliceagent/neocortex.py +436 -0
- sliceagent/onboarding.py +323 -0
- sliceagent/oracle.py +36 -0
- sliceagent/pagetable.py +255 -0
- sliceagent/pfc.py +449 -0
- sliceagent/plugins.py +127 -0
- sliceagent/policy.py +234 -0
- sliceagent/procman.py +187 -0
- sliceagent/prompt.py +239 -0
- sliceagent/records.py +108 -0
- sliceagent/recovery.py +119 -0
- sliceagent/regions.py +678 -0
- sliceagent/registry.py +128 -0
- sliceagent/retriever.py +19 -0
- sliceagent/safety.py +332 -0
- sliceagent/sandbox.py +143 -0
- sliceagent/scheduler.py +92 -0
- sliceagent/search_index.py +289 -0
- sliceagent/seed.py +465 -0
- sliceagent/sensory_cortex.py +500 -0
- sliceagent/session.py +222 -0
- sliceagent/skill_provenance.py +71 -0
- sliceagent/skill_usage.py +123 -0
- sliceagent/skills.py +209 -0
- sliceagent/subagent.py +332 -0
- sliceagent/subdir_hints.py +222 -0
- sliceagent/swap.py +182 -0
- sliceagent/taskstate.py +57 -0
- sliceagent/telemetry.py +59 -0
- sliceagent/terminal.py +240 -0
- sliceagent/text_utils.py +56 -0
- sliceagent/tool_summary.py +93 -0
- sliceagent/tools.py +1194 -0
- sliceagent/tui.py +1377 -0
- sliceagent/web.py +354 -0
- sliceagent-0.1.0.dist-info/METADATA +262 -0
- sliceagent-0.1.0.dist-info/RECORD +71 -0
- sliceagent-0.1.0.dist-info/WHEEL +4 -0
- sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
- sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""Context-overflow classification — pure stdlib, never imports openai.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- the `_CONTEXT_OVERFLOW_PATTERNS` message table
|
|
5
|
+
- the cause-walk `_extract_status_code` (max depth 5)
|
|
6
|
+
- the 400 / 413 / `context_length_exceeded` overflow rules
|
|
7
|
+
|
|
8
|
+
The moat: sliceagent always has a slice it can TIGHTEN, so there is NO
|
|
9
|
+
session-size / token-count heuristic here — a generic-400 + large-session
|
|
10
|
+
proxy is deliberately avoided. Overflow is decided purely from the
|
|
11
|
+
error's text and HTTP status.
|
|
12
|
+
|
|
13
|
+
Public surface (pinned in adopt_plan.md sec 1):
|
|
14
|
+
class ContextOverflow(Exception)
|
|
15
|
+
def is_context_overflow(error: Exception) -> bool
|
|
16
|
+
def classify(error: Exception) -> dict # {retryable, is_context_overflow, status}
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
# ── Pattern table ───────────────────────────────────────────────────────────
|
|
24
|
+
# Matched against str(error).lower().
|
|
25
|
+
_CONTEXT_OVERFLOW_PATTERNS = (
|
|
26
|
+
"context length",
|
|
27
|
+
"context size",
|
|
28
|
+
"maximum context",
|
|
29
|
+
"token limit",
|
|
30
|
+
"too many tokens",
|
|
31
|
+
"reduce the length",
|
|
32
|
+
"exceeds the limit",
|
|
33
|
+
"context window",
|
|
34
|
+
"prompt is too long",
|
|
35
|
+
"prompt exceeds max length",
|
|
36
|
+
"maximum number of tokens",
|
|
37
|
+
# vLLM / local inference server patterns
|
|
38
|
+
"exceeds the max_model_len",
|
|
39
|
+
"max_model_len",
|
|
40
|
+
"prompt length", # "engine prompt length X exceeds"
|
|
41
|
+
"input is too long",
|
|
42
|
+
"maximum model length",
|
|
43
|
+
# Ollama patterns
|
|
44
|
+
"context length exceeded",
|
|
45
|
+
"truncating input",
|
|
46
|
+
# llama.cpp / llama-server patterns
|
|
47
|
+
"slot context", # "slot context: N tokens, prompt N tokens"
|
|
48
|
+
"n_ctx_slot",
|
|
49
|
+
# Chinese error messages (some providers return these)
|
|
50
|
+
"超过最大长度",
|
|
51
|
+
"上下文长度",
|
|
52
|
+
# AWS Bedrock Converse API error patterns
|
|
53
|
+
"input is too long",
|
|
54
|
+
"max input token",
|
|
55
|
+
"exceeds the maximum number of input tokens",
|
|
56
|
+
# NOTE: bare "input token" was removed — it matched OpenAI's TPM rate-limit text
|
|
57
|
+
# ("Limit: 30000 input tokens per minute"), misclassifying a 429 as a hard overflow.
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Structured error codes that unambiguously mean overflow.
|
|
61
|
+
_CONTEXT_OVERFLOW_CODES = frozenset({
|
|
62
|
+
"context_length_exceeded",
|
|
63
|
+
"max_tokens_exceeded",
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
# Payload-too-large message patterns — a 413 surfaced in the message text
|
|
67
|
+
# when no status_code attr is present.
|
|
68
|
+
_PAYLOAD_TOO_LARGE_PATTERNS = (
|
|
69
|
+
"request entity too large",
|
|
70
|
+
"payload too large",
|
|
71
|
+
"error code: 413",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# A parameter / validation error (e.g. "unsupported parameter 'max_tokens'") is NOT a context
|
|
76
|
+
# overflow even though it may name a token param — reading it as overflow would wrongly trigger the
|
|
77
|
+
# slice-tighten/rebuild loop. Root-cause guard: exclude param errors regardless of which param.
|
|
78
|
+
# (Kept SPECIFIC: a real OpenAI overflow is type invalid_request_error / code context_length_exceeded,
|
|
79
|
+
# so we must NOT exclude on those — only on explicit "unsupported/invalid parameter" wording.)
|
|
80
|
+
_NOT_OVERFLOW_MARKERS = (
|
|
81
|
+
"unsupported parameter",
|
|
82
|
+
"unsupported_parameter",
|
|
83
|
+
"is not supported with this model",
|
|
84
|
+
"unknown parameter",
|
|
85
|
+
"invalid parameter",
|
|
86
|
+
"parameter is invalid", # "the prompt length parameter is invalid" — a validation error, NOT overflow
|
|
87
|
+
"invalid value",
|
|
88
|
+
"invalid input token", # "invalid input token format" — not a context-size overflow
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _error_text(error: Exception) -> str:
|
|
93
|
+
"""Lowercased message text for pattern matching."""
|
|
94
|
+
return str(error).lower()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _extract_status_code(error: Exception) -> Optional[int]:
|
|
98
|
+
"""Walk the error and its cause chain to find an HTTP status code.
|
|
99
|
+
|
|
100
|
+
Max depth 5 to bound the walk. Checks `.status_code` (int) then `.status`
|
|
101
|
+
(sane int) on each node, following `__cause__`/`__context__`.
|
|
102
|
+
"""
|
|
103
|
+
current: Optional[BaseException] = error
|
|
104
|
+
for _ in range(5): # max depth to prevent infinite loops
|
|
105
|
+
if current is None:
|
|
106
|
+
break
|
|
107
|
+
code = getattr(current, "status_code", None)
|
|
108
|
+
if isinstance(code, int):
|
|
109
|
+
return code
|
|
110
|
+
# Some SDKs use .status instead of .status_code
|
|
111
|
+
code = getattr(current, "status", None)
|
|
112
|
+
if isinstance(code, int) and 100 <= code < 600:
|
|
113
|
+
return code
|
|
114
|
+
# Walk cause chain
|
|
115
|
+
cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
|
|
116
|
+
if cause is None or cause is current:
|
|
117
|
+
break
|
|
118
|
+
current = cause
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _extract_error_code(error: Exception) -> str:
|
|
123
|
+
"""Best-effort structured error-code string from a `.code`/`.body` attr.
|
|
124
|
+
|
|
125
|
+
Defensive and pure-stdlib: never imports an SDK, never raises. Returns ''
|
|
126
|
+
when no usable code is found.
|
|
127
|
+
"""
|
|
128
|
+
# Direct attribute (many SDK exceptions expose .code)
|
|
129
|
+
code = getattr(error, "code", None)
|
|
130
|
+
if isinstance(code, str) and code.strip():
|
|
131
|
+
return code.strip().lower()
|
|
132
|
+
|
|
133
|
+
body = getattr(error, "body", None)
|
|
134
|
+
if isinstance(body, dict):
|
|
135
|
+
err_obj = body.get("error")
|
|
136
|
+
if isinstance(err_obj, dict):
|
|
137
|
+
nested = err_obj.get("code") or err_obj.get("type") or ""
|
|
138
|
+
if isinstance(nested, str) and nested.strip():
|
|
139
|
+
return nested.strip().lower()
|
|
140
|
+
top = body.get("code") or body.get("error_code") or ""
|
|
141
|
+
if isinstance(top, str) and top.strip():
|
|
142
|
+
return top.strip().lower()
|
|
143
|
+
return ""
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def is_context_overflow(error: Exception) -> bool:
|
|
147
|
+
"""True when `error` is (or wraps) a context-overflow signal.
|
|
148
|
+
|
|
149
|
+
An overflow is recognised when ANY of the following hold:
|
|
150
|
+
- the lowercased message matches `_CONTEXT_OVERFLOW_PATTERNS`;
|
|
151
|
+
- a structured error code is `context_length_exceeded`/`max_tokens_exceeded`;
|
|
152
|
+
- the cause chain carries HTTP 413 (payload-too-large => compress);
|
|
153
|
+
- a 413 phrased in the message text (`_PAYLOAD_TOO_LARGE_PATTERNS`).
|
|
154
|
+
|
|
155
|
+
A bare 400/404 is NOT treated as overflow unless its TEXT matches the
|
|
156
|
+
overflow table — many non-overflow errors also use 400/404, and sliceagent
|
|
157
|
+
uses no session-size proxy (we always have a slice to tighten).
|
|
158
|
+
"""
|
|
159
|
+
msg = _error_text(error)
|
|
160
|
+
if "rate limit" in msg or "too many requests" in msg or "tokens per min" in msg:
|
|
161
|
+
return False # a TPM/RPM RATE LIMIT (429) must back off + retry, NOT trigger slice-destroying overflow handling
|
|
162
|
+
if any(m in msg for m in _NOT_OVERFLOW_MARKERS):
|
|
163
|
+
return False # a parameter/validation error is never a context overflow
|
|
164
|
+
if any(p in msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
|
165
|
+
return True
|
|
166
|
+
if any(p in msg for p in _PAYLOAD_TOO_LARGE_PATTERNS):
|
|
167
|
+
return True
|
|
168
|
+
|
|
169
|
+
code = _extract_error_code(error)
|
|
170
|
+
if code in _CONTEXT_OVERFLOW_CODES:
|
|
171
|
+
return True
|
|
172
|
+
|
|
173
|
+
status = _extract_status_code(error)
|
|
174
|
+
if status == 413:
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def classify(error: Exception) -> dict:
|
|
181
|
+
"""Classify `error` for the retry/rebuild loop.
|
|
182
|
+
|
|
183
|
+
Returns `{retryable, is_context_overflow, status}`:
|
|
184
|
+
- `is_context_overflow`: result of `is_context_overflow(error)`;
|
|
185
|
+
- `status`: HTTP status from the cause-walk, or None;
|
|
186
|
+
- `retryable`: True for transient transport errors (5xx, 408, 429) and
|
|
187
|
+
for timeout/connection wording; False for context overflow (the slice
|
|
188
|
+
must be TIGHTENED, not blindly retried) and for non-transient 4xx.
|
|
189
|
+
|
|
190
|
+
Overflow is intentionally `retryable=False` here: the caller rebuilds a
|
|
191
|
+
smaller slice (see W5 errors.classify glue / loop overflow-rebuild loop)
|
|
192
|
+
rather than re-sending the identical oversized request.
|
|
193
|
+
"""
|
|
194
|
+
overflow = is_context_overflow(error)
|
|
195
|
+
status = _extract_status_code(error)
|
|
196
|
+
|
|
197
|
+
if overflow:
|
|
198
|
+
return {"retryable": False, "is_context_overflow": True, "status": status}
|
|
199
|
+
|
|
200
|
+
msg = _error_text(error)
|
|
201
|
+
retryable = False
|
|
202
|
+
if status is not None:
|
|
203
|
+
# 5xx server errors + 408 request-timeout + 429 rate-limit are transient.
|
|
204
|
+
if status >= 500 or status in (408, 429):
|
|
205
|
+
retryable = True
|
|
206
|
+
else:
|
|
207
|
+
# No status code: fall back to transient transport wording.
|
|
208
|
+
if any(
|
|
209
|
+
p in msg
|
|
210
|
+
for p in ("timeout", "timed out", "connection", "temporarily unavailable")
|
|
211
|
+
):
|
|
212
|
+
retryable = True
|
|
213
|
+
|
|
214
|
+
return {"retryable": retryable, "is_context_overflow": False, "status": status}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class ContextOverflow(Exception):
|
|
218
|
+
"""Raised by the LLM adapter when a request overflows the context window.
|
|
219
|
+
|
|
220
|
+
Carries the original provider error so the retry/rebuild loop can inspect
|
|
221
|
+
it. `status_code` is the HTTP status if one was extractable, else None.
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
def __init__(self, original: Exception, *, status_code: Optional[int] = None):
|
|
225
|
+
self.original = original
|
|
226
|
+
self.status_code = status_code
|
|
227
|
+
super().__init__(str(original))
|
sliceagent/envspec.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Single source of truth for every sliceagent environment variable.
|
|
2
|
+
|
|
3
|
+
Before this, 28 env vars were scattered across llm.py / cli.py / config.py / hooks.py with no discovery and
|
|
4
|
+
no validation (a typo'd AGENT_POLICY silently used the default). This module centralizes them so:
|
|
5
|
+
* `sliceagent config --list` can show every knob, its group, default, and current value;
|
|
6
|
+
* `validate_env()` warns on a misspelled enum value at startup instead of silently defaulting;
|
|
7
|
+
* a coverage test asserts no AGENT_*/LLM_*/SLICEAGENT_* var is read in the code without being documented here.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class EnvVar:
|
|
17
|
+
name: str
|
|
18
|
+
group: str
|
|
19
|
+
desc: str
|
|
20
|
+
default: str = ""
|
|
21
|
+
choices: tuple = () # if set AND validate=True, an out-of-set value warns at startup
|
|
22
|
+
secret: bool = False # value is masked in `config --list`
|
|
23
|
+
validate: bool = False # run startup validation against `choices`
|
|
24
|
+
aliases: tuple = () # extra accepted values not shown as the canonical choice set
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
REGISTRY: list[EnvVar] = [
|
|
28
|
+
# ── agent behaviour ───────────────────────────────────────────────────────────────────────
|
|
29
|
+
EnvVar("AGENT_MODEL", "agent", "LLM model id to drive the agent. REQUIRED — no default; set it here "
|
|
30
|
+
"or pick a provider+model via `sliceagent init`.", ""),
|
|
31
|
+
EnvVar("AGENT_MODEL_FALLBACK", "agent", "Larger-context model to switch to ONCE if the context overflows "
|
|
32
|
+
"even after compaction (secondary net; the bounded slice is the primary).", ""),
|
|
33
|
+
EnvVar("AGENT_PROVIDER", "agent", "Default provider id to use from the config's [providers.<id>] tables "
|
|
34
|
+
"(overrides [agent].default_provider).", ""),
|
|
35
|
+
EnvVar("AGENT_POLICY", "agent", "Permission mode: baby-sitter (confirm all) | teenager (auto edits, "
|
|
36
|
+
"confirm commands) | let-it-go (auto, blocks catastrophic). All block catastrophic moves.",
|
|
37
|
+
"teenager", choices=("baby-sitter", "teenager", "let-it-go"),
|
|
38
|
+
aliases=("guard", "allow", "readonly", "ask", "babysitter", "teen", "letgo", "letitgo", "yolo", "baby"),
|
|
39
|
+
validate=True),
|
|
40
|
+
EnvVar("AGENT_ROUTER", "agent", "Topic router: lexical (instant, no LLM) or llm (classifier round-trip).",
|
|
41
|
+
"lexical", choices=("lexical", "llm"), validate=True),
|
|
42
|
+
EnvVar("AGENT_REASONING", "agent", "Reasoning effort: full=provider default, fast=minimal, high/max=more.",
|
|
43
|
+
"full", choices=("full", "fast", "high", "max"), validate=True),
|
|
44
|
+
EnvVar("AGENT_THINKING", "agent", "Set to 'off' to disable reasoning (alias for AGENT_REASONING=fast).", ""),
|
|
45
|
+
EnvVar("AGENT_MINE", "agent", "Lesson-mining mode for end-of-session consolidation.", "deterministic"),
|
|
46
|
+
EnvVar("AGENT_SUBAGENT_DEPTH", "agent", "Max delegation depth for spawn_subagent/spawn_explore (0=off).", "1"),
|
|
47
|
+
EnvVar("AGENT_EXPLORER_REASONING", "agent", "Reasoning effort for read-only explorer children.", "fast"),
|
|
48
|
+
EnvVar("AGENT_AUTO_APPROVE", "agent", "Comma-separated globs of pre-approved safe commands (skip prompt).", ""),
|
|
49
|
+
EnvVar("AGENT_VERIFY_CMD", "agent", "Oracle verify command run after a turn (e.g. 'pytest -q').", ""),
|
|
50
|
+
EnvVar("AGENT_MAX_TOKENS", "agent", "Per-session token budget (parks the turn when exhausted).", ""),
|
|
51
|
+
EnvVar("AGENT_COMPLETION_TOKENS", "agent", "Per-REQUEST completion cap (max output tokens); distinct from the AGENT_MAX_TOKENS turn budget.", "8192"),
|
|
52
|
+
EnvVar("AGENT_MAX_STEPS", "agent", "Per-turn step ceiling (runaway backstop); raise for deep analysis.", "60"),
|
|
53
|
+
EnvVar("AGENT_SELFCHECK_MAX", "agent", "Max grounded done-gate verification rounds before accepting 'done'.", "3"),
|
|
54
|
+
EnvVar("AGENT_TOOL_TIMEOUT", "agent", "Per-tool wall-clock deadline in seconds (0/unset = off).", ""),
|
|
55
|
+
EnvVar("AGENT_ROOT", "agent", "Workspace root override (defaults to the current directory).", ""),
|
|
56
|
+
EnvVar("AGENT_ALLOW_PLUGINS", "agent", "Set truthy to load project/user plugins.", ""),
|
|
57
|
+
EnvVar("AGENT_SANDBOX", "agent", "Tool sandbox backend.", "local", choices=("local", "docker"), validate=True),
|
|
58
|
+
EnvVar("AGENT_WEB", "agent", "Enable the web tools (fetch_url + web_search, DuckDuckGo, no key); set "
|
|
59
|
+
"0/off to disable network egress from the agent.", "1"),
|
|
60
|
+
# ── provider / network ────────────────────────────────────────────────────────────────────
|
|
61
|
+
EnvVar("LLM_API_KEY", "provider", "API key for the LLM provider (REQUIRED).", "", secret=True),
|
|
62
|
+
EnvVar("LLM_BASE_URL", "provider", "OpenAI-compatible endpoint (e.g. https://api.moonshot.cn/v1).", ""),
|
|
63
|
+
EnvVar("LLM_TIMEOUT", "provider", "Per-request timeout in seconds.", ""),
|
|
64
|
+
EnvVar("LLM_TIMEOUT_SEC", "provider", "Alias for LLM_TIMEOUT.", ""),
|
|
65
|
+
EnvVar("AGENT_PROXY", "provider", "HTTP proxy URL for LLM calls; 'none'/'off' forces direct. Unset = direct (no proxy).", ""),
|
|
66
|
+
EnvVar("OPENAI_API_KEY", "provider", "Legacy alias for LLM_API_KEY.", "", secret=True),
|
|
67
|
+
EnvVar("MOONSHOT_API_KEY", "provider", "Legacy alias for LLM_API_KEY (Moonshot).", "", secret=True),
|
|
68
|
+
EnvVar("OPENAI_BASE_URL", "provider", "Legacy alias for LLM_BASE_URL.", ""),
|
|
69
|
+
# ── UI ────────────────────────────────────────────────────────────────────────────────────
|
|
70
|
+
EnvVar("AGENT_TUI", "ui", "UI mode: rich (default inline), live (pinned box), off (plain).",
|
|
71
|
+
"rich", choices=("rich", "live", "off"),
|
|
72
|
+
aliases=("1", "on", "true", "yes", "0", "false", "no")),
|
|
73
|
+
EnvVar("AGENT_SPINNER", "ui", "Animated in-place status spinner during a turn (a Rich live region). "
|
|
74
|
+
"Set off to drop just the spinner; all other Rich formatting stays.",
|
|
75
|
+
"on", choices=("on", "off"), aliases=("1", "true", "yes", "0", "false", "no")),
|
|
76
|
+
EnvVar("SHOW_SLICE", "ui", "Set truthy to print the rebuilt slice each turn (debug view).", ""),
|
|
77
|
+
# ── memory ────────────────────────────────────────────────────────────────────────────────
|
|
78
|
+
EnvVar("SLICEAGENT_VAULT", "memory", "sliceagent's STATE vault (episodic cache + task-state records).", ""),
|
|
79
|
+
EnvVar("MEMEM_VAULT", "memory", "memem's lesson vault (markdown long-term memories), if memem is installed.", ""),
|
|
80
|
+
EnvVar("SLICEAGENT_SKILLS_DIR", "memory", "Extra directory to discover skills from.", ""),
|
|
81
|
+
EnvVar("AGENT_BACKGROUND_REVIEW", "agent", "Set truthy to run an off-thread reviewer that consolidates "
|
|
82
|
+
"lessons after each turn.", ""),
|
|
83
|
+
EnvVar("SLICEAGENT_CACHE_DIR", "memory", "Directory for the episodic cache / durable log.", ""),
|
|
84
|
+
EnvVar("AGENT_EXPERIMENTAL_ALL", "debug", "Master switch: set truthy to enable ALL experimental flags "
|
|
85
|
+
"(per-flag AGENT_EXPERIMENTAL_<ID> overrides).", ""),
|
|
86
|
+
# ── monitoring / debug ────────────────────────────────────────────────────────────────────
|
|
87
|
+
EnvVar("AGENT_METRICS", "monitor", "Set truthy to print per-turn fresh-token (moat) metrics at exit.", ""),
|
|
88
|
+
EnvVar("AGENT_TIMING", "monitor", "Set truthy to print a per-turn latency breakdown (slice build vs model).", ""),
|
|
89
|
+
EnvVar("AGENT_MONITOR", "monitor", "Set truthy to enable the live monitor server.", ""),
|
|
90
|
+
EnvVar("AGENT_MONITOR_PORT", "monitor", "Port for the monitor server.", ""),
|
|
91
|
+
EnvVar("SLICEAGENT_MONITOR_DIR", "monitor", "Directory the monitor writes slice snapshots to.", ""),
|
|
92
|
+
EnvVar("SLICEAGENT_DEBUG_TRACE", "debug", "Set truthy to print tracebacks for parked/hook errors.", ""),
|
|
93
|
+
EnvVar("SLICEAGENT_NO_CLOSURE", "debug", "Debug flag: disable the turn closeout call.", ""),
|
|
94
|
+
EnvVar("SLICEAGENT_PROMPT_FILE", "debug", "A/B experiment seam: path to a full SYSTEM_PROMPT template "
|
|
95
|
+
"(must keep the {{MEMORY_MODEL}} marker) to override the prompt for a measurement run "
|
|
96
|
+
"(evals/prompt_ab). Unset → the production prompt.", ""),
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
BY_NAME: dict[str, EnvVar] = {e.name: e for e in REGISTRY}
|
|
100
|
+
GROUPS = ("agent", "provider", "ui", "memory", "monitor", "debug")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def validate_env(env: dict | None = None) -> list[str]:
|
|
104
|
+
"""Return a list of human-readable warnings for any enum var set to an out-of-set value. Non-fatal:
|
|
105
|
+
the caller prints them and continues on defaults (mature CLIs validate; they don't silently misbehave)."""
|
|
106
|
+
env = env if env is not None else os.environ
|
|
107
|
+
warnings = []
|
|
108
|
+
for e in REGISTRY:
|
|
109
|
+
if not (e.validate and e.choices):
|
|
110
|
+
continue
|
|
111
|
+
raw = env.get(e.name)
|
|
112
|
+
if raw is None or raw == "":
|
|
113
|
+
continue
|
|
114
|
+
if raw.strip().lower() not in {c.lower() for c in e.choices} | {a.lower() for a in e.aliases}:
|
|
115
|
+
warnings.append(f"{e.name}={raw!r} is not one of {{{', '.join(e.choices)}}} — using default "
|
|
116
|
+
f"{e.default!r}")
|
|
117
|
+
return warnings
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def current_value(name: str, env: dict | None = None) -> str:
|
|
121
|
+
"""The effective value of a var for display, masked if it is a secret."""
|
|
122
|
+
env = env if env is not None else os.environ
|
|
123
|
+
e = BY_NAME.get(name)
|
|
124
|
+
raw = env.get(name)
|
|
125
|
+
if raw is None:
|
|
126
|
+
return ""
|
|
127
|
+
if e and e.secret and raw:
|
|
128
|
+
return f"*** ({len(raw)} chars)" # never reveal any of a secret (not even the 'sk-' prefix)
|
|
129
|
+
return raw
|
sliceagent/errors.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Error classification + retry.
|
|
2
|
+
|
|
3
|
+
`classify` maps an exception to structured recovery hints; `with_retry` does
|
|
4
|
+
abort-aware jittered backoff on retryable errors.
|
|
5
|
+
|
|
6
|
+
The backoff is jittered: a
|
|
7
|
+
lock-guarded monotonic counter seeds a per-call RNG so concurrent sessions
|
|
8
|
+
hitting the same rate-limited provider don't all retry at the same instant
|
|
9
|
+
(decorrelated uniform jitter).
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import random
|
|
14
|
+
import threading
|
|
15
|
+
import time
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Callable
|
|
18
|
+
|
|
19
|
+
from .context_overflow import is_context_overflow
|
|
20
|
+
from .events import ApiRetry, Dispatcher
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ErrorKind(str, Enum):
|
|
24
|
+
"""Typed failure taxonomy. str-based so it stays
|
|
25
|
+
backward-compatible: `classify(e)["kind"] == "rate_limit"` still works, and it's JSON-serializable for
|
|
26
|
+
telemetry. ALL members let the metrics layer pre-register a counter per kind."""
|
|
27
|
+
CONTEXT_OVERFLOW = "context_overflow"
|
|
28
|
+
AUTH = "auth"
|
|
29
|
+
EMPTY_RESPONSE = "empty_response"
|
|
30
|
+
RATE_LIMIT = "rate_limit"
|
|
31
|
+
SERVER = "server"
|
|
32
|
+
TIMEOUT = "timeout"
|
|
33
|
+
CONNECTION = "connection"
|
|
34
|
+
UNKNOWN = "unknown"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class EmptyResponseError(Exception):
|
|
38
|
+
"""The provider returned a degenerate completion — no content AND no tool calls. Some
|
|
39
|
+
providers/proxies occasionally emit an empty body; returning it stalls the loop. Classified
|
|
40
|
+
RETRYABLE so `with_retry` re-rolls instead."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Monotonic counter for jitter-seed uniqueness within the same process.
|
|
44
|
+
# Lock-guarded to avoid races in concurrent retry paths.
|
|
45
|
+
_jitter_counter = 0
|
|
46
|
+
_jitter_lock = threading.Lock()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def jittered_backoff(
|
|
50
|
+
attempt: int,
|
|
51
|
+
*,
|
|
52
|
+
base_delay: float = 0.5,
|
|
53
|
+
max_delay: float = 5.0,
|
|
54
|
+
jitter_ratio: float = 0.5,
|
|
55
|
+
) -> float:
|
|
56
|
+
"""Compute a jittered exponential backoff delay.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
attempt: 1-based retry attempt number.
|
|
60
|
+
base_delay: Base delay in seconds for attempt 1.
|
|
61
|
+
max_delay: Maximum delay cap in seconds.
|
|
62
|
+
jitter_ratio: Fraction of the computed delay to use as random jitter
|
|
63
|
+
range. 0.5 means jitter is uniform in [0, 0.5 * delay].
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Delay in seconds: min(base * 2^(attempt-1), max_delay) + jitter.
|
|
67
|
+
|
|
68
|
+
The jitter decorrelates concurrent retries so multiple sessions hitting the
|
|
69
|
+
same provider don't all retry at the same instant.
|
|
70
|
+
"""
|
|
71
|
+
global _jitter_counter
|
|
72
|
+
with _jitter_lock:
|
|
73
|
+
_jitter_counter += 1
|
|
74
|
+
tick = _jitter_counter
|
|
75
|
+
|
|
76
|
+
exponent = max(0, attempt - 1)
|
|
77
|
+
if exponent >= 63 or base_delay <= 0:
|
|
78
|
+
delay = max_delay
|
|
79
|
+
else:
|
|
80
|
+
delay = min(base_delay * (2 ** exponent), max_delay)
|
|
81
|
+
|
|
82
|
+
# Seed from time + counter for decorrelation even with coarse clocks.
|
|
83
|
+
seed = (time.time_ns() ^ (tick * 0x9E3779B9)) & 0xFFFFFFFF
|
|
84
|
+
rng = random.Random(seed)
|
|
85
|
+
jitter = rng.uniform(0, jitter_ratio * delay)
|
|
86
|
+
|
|
87
|
+
return delay + jitter
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def classify(error: Exception) -> dict:
|
|
91
|
+
msg = str(error).lower()
|
|
92
|
+
status = getattr(error, "status_code", None) or getattr(error, "status", None)
|
|
93
|
+
overflow = is_context_overflow(error)
|
|
94
|
+
empty = isinstance(error, EmptyResponseError)
|
|
95
|
+
retryable = False
|
|
96
|
+
if status == 429 or "rate limit" in msg or "too many requests" in msg or "overloaded" in msg or "503" in msg:
|
|
97
|
+
retryable = True
|
|
98
|
+
if isinstance(status, int) and 500 <= status < 600:
|
|
99
|
+
retryable = True
|
|
100
|
+
if "timeout" in msg or "timed out" in msg or "connection error" in msg or "econn" in msg:
|
|
101
|
+
retryable = True
|
|
102
|
+
if empty:
|
|
103
|
+
retryable = True # degenerate empty completion — re-roll
|
|
104
|
+
if status in (401, 403):
|
|
105
|
+
retryable = False # auth — never retry
|
|
106
|
+
if overflow:
|
|
107
|
+
retryable = False # tighten the slice, don't blindly re-send the oversized request
|
|
108
|
+
# Bucket the failure for telemetry (orthogonal to `retryable`; lets the metrics layer count
|
|
109
|
+
# rate-limit vs timeout vs overflow vs empty).
|
|
110
|
+
if overflow:
|
|
111
|
+
kind = ErrorKind.CONTEXT_OVERFLOW
|
|
112
|
+
elif status in (401, 403):
|
|
113
|
+
kind = ErrorKind.AUTH
|
|
114
|
+
elif empty:
|
|
115
|
+
kind = ErrorKind.EMPTY_RESPONSE
|
|
116
|
+
elif status == 429 or "rate limit" in msg or "too many requests" in msg or "overloaded" in msg:
|
|
117
|
+
kind = ErrorKind.RATE_LIMIT
|
|
118
|
+
elif (isinstance(status, int) and 500 <= status < 600) or "503" in msg:
|
|
119
|
+
kind = ErrorKind.SERVER
|
|
120
|
+
elif "timeout" in msg or "timed out" in msg:
|
|
121
|
+
kind = ErrorKind.TIMEOUT
|
|
122
|
+
elif "connection" in msg or "econn" in msg:
|
|
123
|
+
kind = ErrorKind.CONNECTION
|
|
124
|
+
else:
|
|
125
|
+
kind = ErrorKind.UNKNOWN
|
|
126
|
+
return {"retryable": retryable, "is_context_overflow": overflow, "status": status, "kind": kind}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _retry_after_seconds(error: Exception) -> "float | None":
|
|
130
|
+
"""Best-effort: a 429/503 may carry a Retry-After (SDK `.retry_after` or a response header) telling us
|
|
131
|
+
EXACTLY how long to wait — honor it instead of guessing. Returns seconds, or None to fall back to
|
|
132
|
+
backoff (incl. when Retry-After is an HTTP-date, which we don't parse). Never raises."""
|
|
133
|
+
try:
|
|
134
|
+
val = getattr(error, "retry_after", None)
|
|
135
|
+
if val is None:
|
|
136
|
+
hdrs = getattr(getattr(error, "response", None), "headers", None)
|
|
137
|
+
if hdrs is not None:
|
|
138
|
+
val = hdrs.get("retry-after") or hdrs.get("Retry-After")
|
|
139
|
+
if val is None:
|
|
140
|
+
return None
|
|
141
|
+
secs = float(val)
|
|
142
|
+
return secs if secs >= 0 else None
|
|
143
|
+
except (TypeError, ValueError):
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def with_retry(
|
|
148
|
+
fn: Callable[[], object],
|
|
149
|
+
*,
|
|
150
|
+
max_attempts: int = 3,
|
|
151
|
+
is_retryable: Callable[[Exception], bool] | None = None,
|
|
152
|
+
dispatch: Dispatcher | None = None,
|
|
153
|
+
):
|
|
154
|
+
for attempt in range(1, max_attempts + 1):
|
|
155
|
+
try:
|
|
156
|
+
return fn()
|
|
157
|
+
except Exception as e:
|
|
158
|
+
retry = is_retryable(e) if is_retryable else classify(e)["retryable"]
|
|
159
|
+
if not retry or attempt == max_attempts:
|
|
160
|
+
raise
|
|
161
|
+
if dispatch:
|
|
162
|
+
dispatch(ApiRetry(attempt=attempt, error=str(e)[:200]))
|
|
163
|
+
delay = jittered_backoff(attempt)
|
|
164
|
+
ra = _retry_after_seconds(e)
|
|
165
|
+
if ra is not None:
|
|
166
|
+
delay = max(delay, min(ra, 60.0)) # honor server Retry-After, capped so a huge value can't stall the turn
|
|
167
|
+
time.sleep(delay)
|
sliceagent/events.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Event system: the loop's ONLY output path.
|
|
2
|
+
|
|
3
|
+
The core never prints or writes files — it dispatches events. The host composes a
|
|
4
|
+
dispatcher from sinks (slice-updater, durable log, CLI/TUI, SDK). Sink failures are
|
|
5
|
+
contained so a frontend can't break the loop.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Callable
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class Event:
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class StepBegin(Event):
|
|
20
|
+
step: int
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class StepEnd(Event):
|
|
25
|
+
step: int
|
|
26
|
+
usage: dict
|
|
27
|
+
stop_reason: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class SliceBuilt(Event):
|
|
32
|
+
rendered: str # the volatile user message this turn (for debugging/inspection)
|
|
33
|
+
messages: list | None = None # the FULL model-visible messages [system, user] (monitor/inspection)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class AssistantText(Event):
|
|
38
|
+
content: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ToolStarted(Event):
|
|
43
|
+
name: str
|
|
44
|
+
args: dict
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class ToolResult(Event):
|
|
49
|
+
name: str
|
|
50
|
+
args: dict
|
|
51
|
+
output: str
|
|
52
|
+
failing: bool
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class ApiRetry(Event):
|
|
57
|
+
attempt: int
|
|
58
|
+
error: str
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class SliceTightened(Event):
|
|
63
|
+
level: int
|
|
64
|
+
reason: str = "context_overflow"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class TurnEnd(Event):
|
|
69
|
+
stop_reason: str
|
|
70
|
+
steps: int
|
|
71
|
+
usage: dict
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class TurnInterrupted(Event):
|
|
76
|
+
reason: str # "aborted" | "max_steps" | "error"
|
|
77
|
+
message: str | None = None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class LessonSaved(Event):
|
|
82
|
+
title: str
|
|
83
|
+
content: str # the lesson mined into memem (write side of the memory loop)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
Dispatcher = Callable[[Event], None]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def make_dispatcher(*sinks: Callable[[Event], None]) -> Dispatcher:
|
|
90
|
+
def dispatch(event: Event) -> None:
|
|
91
|
+
for sink in sinks:
|
|
92
|
+
try:
|
|
93
|
+
sink(event)
|
|
94
|
+
except Exception:
|
|
95
|
+
pass # a sink/listener failure must not affect the loop
|
|
96
|
+
return dispatch
|