nexo-brain 7.9.27 → 7.9.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +16 -1
- package/package.json +1 -1
- package/src/agent_runner.py +58 -2
- package/src/call_model_raw.py +259 -7
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.9.
|
|
3
|
+
"version": "7.9.28",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,9 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.9.
|
|
21
|
+
Version `7.9.28` is the current packaged-runtime line. Patch release over `7.9.27`: optional override files at `~/.nexo/config/llm_endpoint.json` and `~/.nexo/config/auth_provider.json` let third-party orchestrators redirect Brain's Anthropic SDK calls and delegate bearer token resolution to a local command (analogous to git's `credential.helper`). The same redirection is propagated to every CLI child Brain spawns (deep-sleep, evolution, followup-runner, morning-agent, email-monitor, `nexo chat`) by injecting `ANTHROPIC_BASE_URL` and `ANTHROPIC_API_KEY` into the spawned environment, so headless crons reach the proxy too. An `Idempotency-Key` (UUID4 hex) is attached per request for proxy-side dedup of transparent retries within 24h. Brain libre standalone (no override files) hits `api.anthropic.com` directly with `ANTHROPIC_API_KEY` exactly as before.
|
|
22
|
+
|
|
23
|
+
Previously in `7.9.27`: server startup no longer hangs the MCP `initialize` handshake when legacy followups/reminders still need owner backfill — the synchronous startup migration now runs `--rules-only` and skips the multi-minute `LocalZeroShotClassifier` load, keeping handshake under a few seconds.
|
|
22
24
|
|
|
23
25
|
Previously in `7.9.26`: headless automation prompts now receive the operator-language contract centrally, so reports, diaries, syntheses, followups, escalations, and Deep Sleep-generated memory text follow calibration even when the underlying template is English.
|
|
24
26
|
|
|
@@ -1077,6 +1079,19 @@ Use a personal plugin only when you need a new MCP tool in the runtime surface.
|
|
|
1077
1079
|
- **Auto-update is resilient.** NEXO checks for updates on startup. If an update fails, it continues with the current version and notifies you. Local migrations (database schema, configuration) always run. Network updates (git pull) can be disabled by setting `auto_update: false` in `NEXO_HOME/config/schedule.json`.
|
|
1078
1080
|
- **Secret redaction.** API keys and tokens are stripped before they ever reach memory storage.
|
|
1079
1081
|
|
|
1082
|
+
## Custom LLM endpoint (advanced)
|
|
1083
|
+
|
|
1084
|
+
NEXO Brain reads two optional override files at `~/.nexo/config/`:
|
|
1085
|
+
|
|
1086
|
+
- `llm_endpoint.json` — set a custom Anthropic-compatible base URL.
|
|
1087
|
+
- `auth_provider.json` — delegate bearer token resolution to a local command (analogous to git's `credential.helper`).
|
|
1088
|
+
|
|
1089
|
+
This lets third-party orchestrators — for example an Anthropic-compatible proxy that adds rate limiting, cost accounting, multi-provider failover, or per-team auth — route Brain's LLM calls without modifying its source.
|
|
1090
|
+
|
|
1091
|
+
**If neither file exists, Brain operates exactly as before:** direct call to `https://api.anthropic.com` using `ANTHROPIC_API_KEY` from environment or filesystem. The override path is opt-in.
|
|
1092
|
+
|
|
1093
|
+
When override mode is active, Brain attaches an opaque `Idempotency-Key` to every request so the proxy can dedup transparent retries (24h window) without double-billing. The same redirection applies to every CLI child Brain spawns (deep-sleep, evolution, followup-runner, morning-agent, email-monitor, `nexo chat`): `agent_runner.py` injects `ANTHROPIC_BASE_URL` and `ANTHROPIC_API_KEY` into the spawned environment when override mode is on, so headless crons hit the proxy too — LaunchAgent crons do not inherit env from a UI process. See `docs/api/override-files.md` for the full schema, fallback rules, and an end-to-end example.
|
|
1094
|
+
|
|
1080
1095
|
## The Psychology Behind NEXO Brain
|
|
1081
1096
|
|
|
1082
1097
|
NEXO Brain isn't just engineering — it's applied cognitive psychology:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.9.
|
|
3
|
+
"version": "7.9.28",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/agent_runner.py
CHANGED
|
@@ -374,6 +374,47 @@ def _codex_config_path() -> Path:
|
|
|
374
374
|
return Path.home() / ".codex" / "config.toml"
|
|
375
375
|
|
|
376
376
|
|
|
377
|
+
def _apply_llm_endpoint_override(env: dict) -> dict:
|
|
378
|
+
"""Redirect the child Anthropic-compatible CLI to the configured proxy
|
|
379
|
+
when Brain is in override mode (``~/.nexo/config/llm_endpoint.json``
|
|
380
|
+
present). Standalone runs leave ``env`` untouched, so Brain libre keeps
|
|
381
|
+
hitting ``api.anthropic.com`` directly with whatever ``ANTHROPIC_API_KEY``
|
|
382
|
+
the operator already had configured.
|
|
383
|
+
|
|
384
|
+
The contract is symmetric with what ``call_model_raw.py`` does for SDK
|
|
385
|
+
direct calls: same files, same precedence, same alias system. The CLI
|
|
386
|
+
child reads ``ANTHROPIC_BASE_URL`` (Anthropic SDK convention) and
|
|
387
|
+
``ANTHROPIC_API_KEY`` from the spawned environment.
|
|
388
|
+
|
|
389
|
+
No-op (and silent) when ``call_model_raw`` is unavailable for any
|
|
390
|
+
reason; the headless surface should never block on this helper.
|
|
391
|
+
"""
|
|
392
|
+
try:
|
|
393
|
+
from call_model_raw import (
|
|
394
|
+
is_override_mode,
|
|
395
|
+
resolve_api_base_url,
|
|
396
|
+
resolve_auth_token,
|
|
397
|
+
)
|
|
398
|
+
except Exception:
|
|
399
|
+
return env
|
|
400
|
+
try:
|
|
401
|
+
if not is_override_mode():
|
|
402
|
+
return env
|
|
403
|
+
base_url = resolve_api_base_url()
|
|
404
|
+
if base_url:
|
|
405
|
+
env["ANTHROPIC_BASE_URL"] = base_url
|
|
406
|
+
bearer = resolve_auth_token()
|
|
407
|
+
if bearer:
|
|
408
|
+
env["ANTHROPIC_API_KEY"] = bearer
|
|
409
|
+
except Exception:
|
|
410
|
+
# Override is best-effort: a misconfigured override file must not
|
|
411
|
+
# crash an automation run that would otherwise have worked in
|
|
412
|
+
# standalone. The SDK direct path already surfaces config errors
|
|
413
|
+
# via ClassifierUnavailableError; the CLI path stays defensive.
|
|
414
|
+
pass
|
|
415
|
+
return env
|
|
416
|
+
|
|
417
|
+
|
|
377
418
|
def _headless_env(env: dict | None = None) -> dict:
|
|
378
419
|
merged = os.environ.copy()
|
|
379
420
|
if env:
|
|
@@ -382,7 +423,7 @@ def _headless_env(env: dict | None = None) -> dict:
|
|
|
382
423
|
merged["NEXO_AUTOMATION"] = "1"
|
|
383
424
|
merged.pop("CLAUDECODE", None)
|
|
384
425
|
merged.pop("CLAUDE_CODE", None)
|
|
385
|
-
return merged
|
|
426
|
+
return _apply_llm_endpoint_override(merged)
|
|
386
427
|
|
|
387
428
|
|
|
388
429
|
def _load_client_bootstrap_prompt(client: str) -> str:
|
|
@@ -603,6 +644,7 @@ def run_automation_interactive(
|
|
|
603
644
|
launch_env = os.environ.copy()
|
|
604
645
|
if env:
|
|
605
646
|
launch_env.update(env)
|
|
647
|
+
launch_env = _apply_llm_endpoint_override(launch_env)
|
|
606
648
|
cwd_path = Path(_interactive_target_cwd(target))
|
|
607
649
|
|
|
608
650
|
# Best-effort resonance lookup — interactive sessions do not swap the
|
|
@@ -1043,7 +1085,21 @@ def run_automation_prompt(
|
|
|
1043
1085
|
|
|
1044
1086
|
bare_api_key = ""
|
|
1045
1087
|
if resolved_bare:
|
|
1046
|
-
|
|
1088
|
+
# In override mode the bearer was already injected into
|
|
1089
|
+
# run_env by _apply_llm_endpoint_override (proxy token, not
|
|
1090
|
+
# the operator's raw Anthropic key). Reuse it instead of
|
|
1091
|
+
# asking the keychain helper for a real Anthropic key — the
|
|
1092
|
+
# proxy expects its own bearer and would reject the real one.
|
|
1093
|
+
override_bearer = run_env.get("ANTHROPIC_API_KEY", "").strip() if run_env else ""
|
|
1094
|
+
try:
|
|
1095
|
+
from call_model_raw import is_override_mode as _is_override_mode
|
|
1096
|
+
_override_active = _is_override_mode()
|
|
1097
|
+
except Exception:
|
|
1098
|
+
_override_active = False
|
|
1099
|
+
if _override_active and override_bearer:
|
|
1100
|
+
bare_api_key = override_bearer
|
|
1101
|
+
else:
|
|
1102
|
+
bare_api_key = _resolve_anthropic_api_key()
|
|
1047
1103
|
if not bare_api_key:
|
|
1048
1104
|
# Silent fallback: we would rather take the slower path
|
|
1049
1105
|
# than force the caller to fail-closed on an env quirk.
|
package/src/call_model_raw.py
CHANGED
|
@@ -41,7 +41,11 @@ gap.
|
|
|
41
41
|
from __future__ import annotations
|
|
42
42
|
|
|
43
43
|
import json
|
|
44
|
+
import logging
|
|
44
45
|
import os
|
|
46
|
+
import subprocess
|
|
47
|
+
import sys
|
|
48
|
+
import uuid
|
|
45
49
|
from pathlib import Path
|
|
46
50
|
|
|
47
51
|
|
|
@@ -65,6 +69,203 @@ _OPENAI_KEY_PATHS = (
|
|
|
65
69
|
Path.home() / ".codex" / "auth.json",
|
|
66
70
|
)
|
|
67
71
|
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Optional override files (~/.nexo/config/)
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Two forward-compatible JSON files let third-party orchestrators (such as an
|
|
76
|
+
# Anthropic-compatible proxy) redirect the LLM endpoint and delegate token
|
|
77
|
+
# resolution to a local helper. Pattern is analogous to git's `core.editor`
|
|
78
|
+
# and `credential.helper`.
|
|
79
|
+
#
|
|
80
|
+
# ~/.nexo/config/llm_endpoint.json
|
|
81
|
+
# {
|
|
82
|
+
# "version": 1,
|
|
83
|
+
# "anthropic_base_url": "https://my-proxy.example.com/api/proxy"
|
|
84
|
+
# }
|
|
85
|
+
#
|
|
86
|
+
# ~/.nexo/config/auth_provider.json
|
|
87
|
+
# {
|
|
88
|
+
# "version": 1,
|
|
89
|
+
# "command": "/path/to/auth-helper",
|
|
90
|
+
# "args": ["--for", "anthropic"],
|
|
91
|
+
# "timeout_sec": 5
|
|
92
|
+
# }
|
|
93
|
+
#
|
|
94
|
+
# If neither file exists the caller falls back to standalone behaviour:
|
|
95
|
+
# direct call to api.anthropic.com using ANTHROPIC_API_KEY from environment
|
|
96
|
+
# or filesystem. NEXO Brain's open-source distribution is unaffected.
|
|
97
|
+
|
|
98
|
+
def _resolve_brain_config_dir() -> Path:
|
|
99
|
+
"""Honour ``NEXO_HOME`` so tests, devcontainers and non-default
|
|
100
|
+
installs (Maria iMac, Codex sandboxes, etc.) hit the right
|
|
101
|
+
``config/`` directory. Falls back to ``~/.nexo/config/``."""
|
|
102
|
+
nexo_home = os.environ.get("NEXO_HOME", "").strip()
|
|
103
|
+
if nexo_home:
|
|
104
|
+
return Path(nexo_home).expanduser() / "config"
|
|
105
|
+
return Path.home() / ".nexo" / "config"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
_BRAIN_CONFIG_DIR = _resolve_brain_config_dir()
|
|
109
|
+
_SUPPORTED_OVERRIDE_VERSION = 1
|
|
110
|
+
_LLM_ENDPOINT_FILENAME = "llm_endpoint.json"
|
|
111
|
+
_AUTH_PROVIDER_FILENAME = "auth_provider.json"
|
|
112
|
+
_DEFAULT_ANTHROPIC_BASE_URL = "https://api.anthropic.com"
|
|
113
|
+
_DEFAULT_AUTH_PROVIDER_TIMEOUT = 5
|
|
114
|
+
|
|
115
|
+
# Internal map: (concrete_model, effort) -> wire alias accepted by an
|
|
116
|
+
# Anthropic-compatible proxy. ONLY consulted when override mode is active.
|
|
117
|
+
# Standalone mode never reads this map and keeps using the concrete model.
|
|
118
|
+
#
|
|
119
|
+
# Add entries here in lockstep with new tiers added to resonance_tiers.json.
|
|
120
|
+
# Failing fast on an unmapped (model, effort) is preferable to letting the
|
|
121
|
+
# proxy reject the request with a 400 — the operator gets a clear local
|
|
122
|
+
# error instead of a remote one.
|
|
123
|
+
_CONCRETE_TO_ALIAS: dict[tuple[str, str], str] = {
|
|
124
|
+
("claude-opus-4-7[1m]", "max"): "nexo-max",
|
|
125
|
+
("claude-opus-4-7[1m]", "xhigh"): "nexo-high",
|
|
126
|
+
("claude-opus-4-7[1m]", "high"): "nexo-medium",
|
|
127
|
+
("claude-opus-4-7[1m]", "medium"): "nexo-low",
|
|
128
|
+
("claude-haiku-4-5-20251001", ""): "nexo-mini",
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _read_versioned_config(filename: str) -> dict | None:
|
|
133
|
+
"""Load a versioned override file from the Brain config directory.
|
|
134
|
+
|
|
135
|
+
The directory is resolved at call time (not module import time) so
|
|
136
|
+
tests can monkeypatch ``_BRAIN_CONFIG_DIR`` and so a process that
|
|
137
|
+
sets ``NEXO_HOME`` after importing the module still picks up the
|
|
138
|
+
right path on the first real call.
|
|
139
|
+
|
|
140
|
+
Returns the dict iff the file exists, parses as JSON and declares
|
|
141
|
+
``version: 1``. Any other case (missing, malformed, unsupported version)
|
|
142
|
+
returns None and emits a stderr warning so operators can see why the
|
|
143
|
+
override was ignored. Never raises.
|
|
144
|
+
"""
|
|
145
|
+
path = _BRAIN_CONFIG_DIR / filename
|
|
146
|
+
try:
|
|
147
|
+
if not path.is_file():
|
|
148
|
+
return None
|
|
149
|
+
cfg = json.loads(path.read_text())
|
|
150
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
151
|
+
sys.stderr.write(
|
|
152
|
+
f"[brain] failed to read override {filename}: {exc}; ignoring\n"
|
|
153
|
+
)
|
|
154
|
+
return None
|
|
155
|
+
if not isinstance(cfg, dict):
|
|
156
|
+
sys.stderr.write(
|
|
157
|
+
f"[brain] override {filename} is not a JSON object; ignoring\n"
|
|
158
|
+
)
|
|
159
|
+
return None
|
|
160
|
+
version = cfg.get("version", 0)
|
|
161
|
+
if version != _SUPPORTED_OVERRIDE_VERSION:
|
|
162
|
+
sys.stderr.write(
|
|
163
|
+
f"[brain] override {filename} version {version!r} not supported "
|
|
164
|
+
f"(expected {_SUPPORTED_OVERRIDE_VERSION}); ignoring\n"
|
|
165
|
+
)
|
|
166
|
+
return None
|
|
167
|
+
return cfg
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def resolve_api_base_url() -> str:
|
|
171
|
+
"""Return the Anthropic API base URL.
|
|
172
|
+
|
|
173
|
+
Resolution order:
|
|
174
|
+
1) ``~/.nexo/config/llm_endpoint.json`` with ``anthropic_base_url``.
|
|
175
|
+
2) ``NEXO_LLM_ENDPOINT`` env var.
|
|
176
|
+
3) Default ``https://api.anthropic.com`` (standalone).
|
|
177
|
+
"""
|
|
178
|
+
cfg = _read_versioned_config(_LLM_ENDPOINT_FILENAME)
|
|
179
|
+
if cfg:
|
|
180
|
+
url = str(cfg.get("anthropic_base_url", "") or "").strip()
|
|
181
|
+
if url:
|
|
182
|
+
return url
|
|
183
|
+
env_url = os.environ.get("NEXO_LLM_ENDPOINT", "").strip()
|
|
184
|
+
if env_url:
|
|
185
|
+
return env_url
|
|
186
|
+
return _DEFAULT_ANTHROPIC_BASE_URL
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _override_force_disabled() -> bool:
|
|
190
|
+
# Internal escape hatch used by the test suite and by maintainers when
|
|
191
|
+
# they need to validate a regression against the upstream Anthropic API
|
|
192
|
+
# without renaming the override files on disk. Intentionally undocumented
|
|
193
|
+
# outside the source so that the canonical override-mode contract stays
|
|
194
|
+
# purely file-driven for everybody else.
|
|
195
|
+
raw = os.environ.get("NEXO_RAW_ANTHROPIC", "").strip().lower()
|
|
196
|
+
return raw in ("1", "true", "yes", "on")
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def is_override_mode() -> bool:
|
|
200
|
+
"""True iff a valid ``llm_endpoint.json`` is present and selects a custom
|
|
201
|
+
base URL. The override gate is the file (not an env var) so that
|
|
202
|
+
env-only configurations remain transparent to standalone callers."""
|
|
203
|
+
if _override_force_disabled():
|
|
204
|
+
return False
|
|
205
|
+
cfg = _read_versioned_config(_LLM_ENDPOINT_FILENAME)
|
|
206
|
+
if not cfg:
|
|
207
|
+
return False
|
|
208
|
+
url = str(cfg.get("anthropic_base_url", "") or "").strip()
|
|
209
|
+
return bool(url)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def resolve_auth_token() -> str:
|
|
213
|
+
"""Return the bearer token to use against the resolved base URL.
|
|
214
|
+
|
|
215
|
+
Resolution order:
|
|
216
|
+
1) ``~/.nexo/config/auth_provider.json`` ``command`` (subprocess
|
|
217
|
+
stdout, trimmed). Honours ``timeout_sec`` (default 5). Falls
|
|
218
|
+
through to (2) on any failure.
|
|
219
|
+
2) ``ANTHROPIC_API_KEY`` env var.
|
|
220
|
+
3) Legacy filesystem fallbacks (``_ANTHROPIC_KEY_PATHS``).
|
|
221
|
+
|
|
222
|
+
Returns an empty string if nothing resolves; the caller raises
|
|
223
|
+
``ClassifierUnavailableError`` so the failure surfaces explicitly.
|
|
224
|
+
"""
|
|
225
|
+
cfg = _read_versioned_config(_AUTH_PROVIDER_FILENAME)
|
|
226
|
+
if cfg:
|
|
227
|
+
cmd = str(cfg.get("command", "") or "").strip()
|
|
228
|
+
if cmd:
|
|
229
|
+
args_raw = cfg.get("args", []) or []
|
|
230
|
+
args = [str(a) for a in args_raw if isinstance(a, (str, int, float))]
|
|
231
|
+
try:
|
|
232
|
+
timeout_sec = int(cfg.get("timeout_sec", _DEFAULT_AUTH_PROVIDER_TIMEOUT))
|
|
233
|
+
except (TypeError, ValueError):
|
|
234
|
+
timeout_sec = _DEFAULT_AUTH_PROVIDER_TIMEOUT
|
|
235
|
+
try:
|
|
236
|
+
result = subprocess.run(
|
|
237
|
+
[cmd, *args],
|
|
238
|
+
capture_output=True,
|
|
239
|
+
text=True,
|
|
240
|
+
timeout=timeout_sec,
|
|
241
|
+
check=False,
|
|
242
|
+
)
|
|
243
|
+
except subprocess.TimeoutExpired as exc:
|
|
244
|
+
# Learning #294: subprocess timeouts must be captured
|
|
245
|
+
# explicitly so the operator sees the helper hung instead
|
|
246
|
+
# of a generic "auth missing" downstream.
|
|
247
|
+
sys.stderr.write(
|
|
248
|
+
f"[brain] auth_provider command timed out after {timeout_sec}s: "
|
|
249
|
+
f"{exc}; falling back to env\n"
|
|
250
|
+
)
|
|
251
|
+
except (FileNotFoundError, PermissionError, OSError) as exc:
|
|
252
|
+
sys.stderr.write(
|
|
253
|
+
f"[brain] auth_provider command failed: {exc}; falling back to env\n"
|
|
254
|
+
)
|
|
255
|
+
else:
|
|
256
|
+
if result.returncode == 0:
|
|
257
|
+
token = (result.stdout or "").strip()
|
|
258
|
+
if token:
|
|
259
|
+
return token
|
|
260
|
+
else:
|
|
261
|
+
stderr_excerpt = (result.stderr or "").strip()[:200]
|
|
262
|
+
sys.stderr.write(
|
|
263
|
+
f"[brain] auth_provider command exit={result.returncode}: "
|
|
264
|
+
f"{stderr_excerpt}; falling back to env\n"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
return _resolve_anthropic_key()
|
|
268
|
+
|
|
68
269
|
|
|
69
270
|
def _resolve_anthropic_key() -> str:
|
|
70
271
|
env_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
|
|
@@ -138,11 +339,28 @@ def _extract_openai_text(response) -> str:
|
|
|
138
339
|
return ""
|
|
139
340
|
|
|
140
341
|
|
|
342
|
+
def _resolve_override_alias(model: str, effort: str) -> str:
|
|
343
|
+
"""In override mode the proxy speaks aliases, not concrete model names.
|
|
344
|
+
Translate ``(model, effort)`` into the wire alias the proxy validates.
|
|
345
|
+
Unmapped pairs fail-closed: better to surface a local config error than
|
|
346
|
+
let the proxy reject the request remotely.
|
|
347
|
+
"""
|
|
348
|
+
key = (model, effort)
|
|
349
|
+
alias = _CONCRETE_TO_ALIAS.get(key)
|
|
350
|
+
if not alias:
|
|
351
|
+
raise ClassifierUnavailableError(
|
|
352
|
+
f"override mode: no alias mapped for (model={model!r}, "
|
|
353
|
+
f"effort={effort!r}); update _CONCRETE_TO_ALIAS in call_model_raw.py"
|
|
354
|
+
)
|
|
355
|
+
return alias
|
|
356
|
+
|
|
357
|
+
|
|
141
358
|
def _call_anthropic_raw(
|
|
142
359
|
*,
|
|
143
360
|
prompt: str,
|
|
144
361
|
system: str | None,
|
|
145
362
|
model: str,
|
|
363
|
+
effort: str,
|
|
146
364
|
max_tokens: int,
|
|
147
365
|
temperature: float,
|
|
148
366
|
stop_sequences: list[str],
|
|
@@ -153,13 +371,34 @@ def _call_anthropic_raw(
|
|
|
153
371
|
except ImportError as exc:
|
|
154
372
|
raise ClassifierUnavailableError(f"anthropic SDK missing: {exc}") from exc
|
|
155
373
|
|
|
156
|
-
|
|
157
|
-
if
|
|
158
|
-
|
|
374
|
+
override = is_override_mode()
|
|
375
|
+
if override:
|
|
376
|
+
# Proxy mode: resolve bearer via auth_provider + env fallbacks,
|
|
377
|
+
# redirect base_url, translate concrete model to wire alias, and
|
|
378
|
+
# attach an Idempotency-Key so the proxy can dedup retries.
|
|
379
|
+
wire_model = _resolve_override_alias(model, effort)
|
|
380
|
+
base_url = resolve_api_base_url()
|
|
381
|
+
api_key = resolve_auth_token()
|
|
382
|
+
if not api_key:
|
|
383
|
+
raise ClassifierUnavailableError(
|
|
384
|
+
"anthropic override: no bearer resolved (auth_provider and env both empty)"
|
|
385
|
+
)
|
|
386
|
+
client = anthropic.Anthropic(
|
|
387
|
+
api_key=api_key,
|
|
388
|
+
base_url=base_url,
|
|
389
|
+
timeout=timeout,
|
|
390
|
+
)
|
|
391
|
+
else:
|
|
392
|
+
# Standalone: behaviour identical to pre-V11. No override, no alias
|
|
393
|
+
# translation, no extra headers — direct hit to api.anthropic.com.
|
|
394
|
+
wire_model = model
|
|
395
|
+
api_key = _resolve_anthropic_key()
|
|
396
|
+
if not api_key:
|
|
397
|
+
raise ClassifierUnavailableError("anthropic: no ANTHROPIC_API_KEY found")
|
|
398
|
+
client = anthropic.Anthropic(api_key=api_key, timeout=timeout)
|
|
159
399
|
|
|
160
|
-
client = anthropic.Anthropic(api_key=api_key, timeout=timeout)
|
|
161
400
|
kwargs: dict = {
|
|
162
|
-
"model":
|
|
401
|
+
"model": wire_model,
|
|
163
402
|
"max_tokens": max_tokens,
|
|
164
403
|
"temperature": temperature,
|
|
165
404
|
"stop_sequences": stop_sequences,
|
|
@@ -168,6 +407,12 @@ def _call_anthropic_raw(
|
|
|
168
407
|
if system:
|
|
169
408
|
kwargs["system"] = system
|
|
170
409
|
|
|
410
|
+
if override:
|
|
411
|
+
# Idempotency-Key: opaque per-request token reused on transparent
|
|
412
|
+
# retries. Proxy dedups on (token_id + idempotency_key) for 24h, so
|
|
413
|
+
# network-level retries do not double-bill the user.
|
|
414
|
+
kwargs["extra_headers"] = {"Idempotency-Key": uuid.uuid4().hex}
|
|
415
|
+
|
|
171
416
|
try:
|
|
172
417
|
response = client.messages.create(**kwargs)
|
|
173
418
|
except anthropic.APITimeoutError as exc:
|
|
@@ -301,7 +546,7 @@ def call_model_raw(
|
|
|
301
546
|
raise ClassifierUnavailableError("automation_backend=none")
|
|
302
547
|
|
|
303
548
|
try:
|
|
304
|
-
model,
|
|
549
|
+
model, effort = resolve_model_and_effort(
|
|
305
550
|
caller=caller,
|
|
306
551
|
backend=backend,
|
|
307
552
|
explicit_tier=tier,
|
|
@@ -320,6 +565,7 @@ def call_model_raw(
|
|
|
320
565
|
prompt=prompt,
|
|
321
566
|
system=system,
|
|
322
567
|
model=model,
|
|
568
|
+
effort=effort,
|
|
323
569
|
max_tokens=max_tokens,
|
|
324
570
|
temperature=temperature,
|
|
325
571
|
stop_sequences=stop_sequences,
|
|
@@ -339,4 +585,10 @@ def call_model_raw(
|
|
|
339
585
|
raise ClassifierUnavailableError(f"unsupported backend: {backend}")
|
|
340
586
|
|
|
341
587
|
|
|
342
|
-
__all__ = [
|
|
588
|
+
__all__ = [
|
|
589
|
+
"call_model_raw",
|
|
590
|
+
"ClassifierUnavailableError",
|
|
591
|
+
"is_override_mode",
|
|
592
|
+
"resolve_api_base_url",
|
|
593
|
+
"resolve_auth_token",
|
|
594
|
+
]
|