coderouter-cli 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/__init__.py +17 -0
- coderouter/__main__.py +6 -0
- coderouter/adapters/__init__.py +23 -0
- coderouter/adapters/anthropic_native.py +502 -0
- coderouter/adapters/base.py +220 -0
- coderouter/adapters/openai_compat.py +395 -0
- coderouter/adapters/registry.py +17 -0
- coderouter/cli.py +345 -0
- coderouter/cli_stats.py +751 -0
- coderouter/config/__init__.py +10 -0
- coderouter/config/capability_registry.py +339 -0
- coderouter/config/env_file.py +295 -0
- coderouter/config/loader.py +73 -0
- coderouter/config/schemas.py +515 -0
- coderouter/data/__init__.py +7 -0
- coderouter/data/model-capabilities.yaml +86 -0
- coderouter/doctor.py +1596 -0
- coderouter/env_security.py +434 -0
- coderouter/errors.py +29 -0
- coderouter/ingress/__init__.py +5 -0
- coderouter/ingress/anthropic_routes.py +205 -0
- coderouter/ingress/app.py +144 -0
- coderouter/ingress/dashboard_routes.py +493 -0
- coderouter/ingress/metrics_routes.py +92 -0
- coderouter/ingress/openai_routes.py +153 -0
- coderouter/logging.py +315 -0
- coderouter/metrics/__init__.py +39 -0
- coderouter/metrics/collector.py +471 -0
- coderouter/metrics/prometheus.py +221 -0
- coderouter/output_filters.py +407 -0
- coderouter/routing/__init__.py +13 -0
- coderouter/routing/auto_router.py +244 -0
- coderouter/routing/capability.py +285 -0
- coderouter/routing/fallback.py +611 -0
- coderouter/translation/__init__.py +57 -0
- coderouter/translation/anthropic.py +204 -0
- coderouter/translation/convert.py +1291 -0
- coderouter/translation/tool_repair.py +236 -0
- coderouter_cli-1.7.0.dist-info/METADATA +509 -0
- coderouter_cli-1.7.0.dist-info/RECORD +43 -0
- coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
- coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
- coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Configuration loading and schemas."""
|
|
2
|
+
|
|
3
|
+
from coderouter.config.loader import load_config
|
|
4
|
+
from coderouter.config.schemas import (
|
|
5
|
+
Capabilities,
|
|
6
|
+
CodeRouterConfig,
|
|
7
|
+
ProviderConfig,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = ["Capabilities", "CodeRouterConfig", "ProviderConfig", "load_config"]
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""Declarative model-capabilities.yaml registry (v0.7-A).
|
|
2
|
+
|
|
3
|
+
Motivation
|
|
4
|
+
v0.5-A hardcoded the "which Anthropic models accept the `thinking`
|
|
5
|
+
body field" heuristic as a regex literal inside
|
|
6
|
+
``coderouter.routing.capability``. Adding a new family (or a new
|
|
7
|
+
capability flag) meant a code change. v0.7-A externalizes that
|
|
8
|
+
heuristic into a YAML registry so the common maintenance action —
|
|
9
|
+
"Anthropic shipped 4-8, which accepts thinking" — becomes a one-line
|
|
10
|
+
YAML edit instead of a Python patch.
|
|
11
|
+
|
|
12
|
+
The registry answers "given (kind, model), what capability flags are
|
|
13
|
+
declared?" and is consulted by the v0.5 capability gate functions
|
|
14
|
+
(``provider_supports_thinking`` et al.) after the per-provider
|
|
15
|
+
explicit flag on ``providers.yaml`` has been checked. Precedence:
|
|
16
|
+
|
|
17
|
+
providers.yaml capabilities.* ─── highest (explicit opt-in)
|
|
18
|
+
user file ~/.coderouter/mcy ─── per-deployment overrides
|
|
19
|
+
bundled coderouter/data/mcy ─── shipped defaults
|
|
20
|
+
unset ─── flag defaults to False
|
|
21
|
+
|
|
22
|
+
Design notes
|
|
23
|
+
- Pure functions + a small Pydantic schema. No I/O outside the
|
|
24
|
+
loader entry points. The registry instance is loaded once at
|
|
25
|
+
startup (via ``CapabilityRegistry.load_default``) and reused.
|
|
26
|
+
- fnmatch globs, not regex. Globs cover the real-world pattern
|
|
27
|
+
("all claude-opus-4-x", "qwen3-coder:*") without making the YAML
|
|
28
|
+
author learn escape rules.
|
|
29
|
+
- First-match-per-flag semantics: rules are walked top-to-bottom
|
|
30
|
+
per flag, first rule whose glob matches AND declares that flag
|
|
31
|
+
determines the value. A rule may declare only a subset of flags;
|
|
32
|
+
undeclared flags keep looking further down the list. This means a
|
|
33
|
+
specific early rule can override one capability while letting a
|
|
34
|
+
broader later rule handle the rest.
|
|
35
|
+
- ``kind`` filter is optional (``"any"`` default). When set, only
|
|
36
|
+
providers of that adapter kind are candidates. This replaces the
|
|
37
|
+
old hardcoded ``if kind != "anthropic": return False`` guard —
|
|
38
|
+
the bundled YAML simply does not declare any openai_compat rules
|
|
39
|
+
for thinking, so openai_compat providers get ``thinking=None``
|
|
40
|
+
from the registry, which the gate function treats as False.
|
|
41
|
+
- No mutation. Registry instances are immutable; ``load_default``
|
|
42
|
+
re-reads disk each call (cheap), but the capability module caches
|
|
43
|
+
a single instance.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
from __future__ import annotations
|
|
47
|
+
|
|
48
|
+
import fnmatch
|
|
49
|
+
from dataclasses import dataclass
|
|
50
|
+
from importlib import resources
|
|
51
|
+
from pathlib import Path
|
|
52
|
+
from typing import Literal
|
|
53
|
+
|
|
54
|
+
import yaml
|
|
55
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# YAML schema
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class RegistryCapabilities(BaseModel):
|
|
63
|
+
"""Per-rule capability flag declarations (all optional).
|
|
64
|
+
|
|
65
|
+
``None`` means "this rule does not have an opinion about this flag".
|
|
66
|
+
Rules that omit a flag let the lookup fall through to later rules
|
|
67
|
+
(or to the Python fallback of False / None) for that specific flag.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
model_config = ConfigDict(extra="forbid")
|
|
71
|
+
|
|
72
|
+
thinking: bool | None = Field(
|
|
73
|
+
default=None,
|
|
74
|
+
description=(
|
|
75
|
+
"Anthropic `thinking: {type: enabled}` body field support. "
|
|
76
|
+
"When True, the capability gate treats this (kind, model) as "
|
|
77
|
+
"able to receive the block without translation loss."
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
reasoning_passthrough: bool | None = Field(
|
|
81
|
+
default=None,
|
|
82
|
+
description=(
|
|
83
|
+
"Opt-OUT of the openai_compat adapter's passive strip of the "
|
|
84
|
+
"non-standard `message.reasoning` field. True = let the raw "
|
|
85
|
+
"reasoning text flow to the client. Default (None/False) = "
|
|
86
|
+
"strip it (v0.5-C behavior)."
|
|
87
|
+
),
|
|
88
|
+
)
|
|
89
|
+
tools: bool | None = Field(
|
|
90
|
+
default=None,
|
|
91
|
+
description=(
|
|
92
|
+
"Model reliably emits structured tool_calls. Declared here "
|
|
93
|
+
'as a glob default (e.g. "qwen3-coder:*" tools=true); the '
|
|
94
|
+
"actual adapter-level handling sits in v0.3-A tool repair."
|
|
95
|
+
),
|
|
96
|
+
)
|
|
97
|
+
max_context_tokens: int | None = Field(
|
|
98
|
+
default=None,
|
|
99
|
+
ge=1,
|
|
100
|
+
description=(
|
|
101
|
+
"Declared context window for this model. Used by v0.7-B "
|
|
102
|
+
"doctor --check-model num_ctx probe (not consumed in v0.7-A)."
|
|
103
|
+
),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class CapabilityRule(BaseModel):
|
|
108
|
+
"""One entry in the registry YAML ``rules:`` list."""
|
|
109
|
+
|
|
110
|
+
model_config = ConfigDict(extra="forbid")
|
|
111
|
+
|
|
112
|
+
match: str = Field(
|
|
113
|
+
...,
|
|
114
|
+
min_length=1,
|
|
115
|
+
description=(
|
|
116
|
+
"fnmatch-style glob applied case-sensitively against "
|
|
117
|
+
"ProviderConfig.model. Supported wildcards: *, ?, [seq]."
|
|
118
|
+
),
|
|
119
|
+
)
|
|
120
|
+
kind: Literal["anthropic", "openai_compat", "any"] = Field(
|
|
121
|
+
default="any",
|
|
122
|
+
description=(
|
|
123
|
+
"Restrict rule to providers of this adapter kind. 'any' "
|
|
124
|
+
"means the rule matches regardless of kind."
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
capabilities: RegistryCapabilities = Field(default_factory=RegistryCapabilities)
|
|
128
|
+
|
|
129
|
+
def kind_matches(self, provider_kind: str) -> bool:
|
|
130
|
+
"""True if this rule's kind filter admits ``provider_kind``."""
|
|
131
|
+
return self.kind == "any" or self.kind == provider_kind
|
|
132
|
+
|
|
133
|
+
def glob_matches(self, model: str) -> bool:
|
|
134
|
+
"""True if this rule's glob matches ``model`` (case-sensitive)."""
|
|
135
|
+
return fnmatch.fnmatchcase(model, self.match)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class CapabilityRegistryFile(BaseModel):
|
|
139
|
+
"""Top-level shape of a model-capabilities.yaml file."""
|
|
140
|
+
|
|
141
|
+
model_config = ConfigDict(extra="forbid")
|
|
142
|
+
|
|
143
|
+
version: Literal[1] = Field(
|
|
144
|
+
default=1,
|
|
145
|
+
description="Registry format version — bump on breaking schema changes.",
|
|
146
|
+
)
|
|
147
|
+
rules: list[CapabilityRule] = Field(
|
|
148
|
+
default_factory=list,
|
|
149
|
+
description="First-match-per-flag rule list.",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# ---------------------------------------------------------------------------
|
|
154
|
+
# Lookup result
|
|
155
|
+
# ---------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass(frozen=True)
|
|
159
|
+
class ResolvedCapabilities:
|
|
160
|
+
"""Result of a registry lookup — merged per-flag from the rule chain.
|
|
161
|
+
|
|
162
|
+
Each field is ``None`` when no matching rule declared that flag; the
|
|
163
|
+
gate function coerces ``None`` to the conservative "not supported"
|
|
164
|
+
answer (False), matching the pre-v0.7-A regex fallback.
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
thinking: bool | None = None
|
|
168
|
+
reasoning_passthrough: bool | None = None
|
|
169
|
+
tools: bool | None = None
|
|
170
|
+
max_context_tokens: int | None = None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
# Registry
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
_BUNDLED_PACKAGE = "coderouter.data"
|
|
179
|
+
_BUNDLED_NAME = "model-capabilities.yaml"
|
|
180
|
+
_USER_PATH = Path.home() / ".coderouter" / "model-capabilities.yaml"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class CapabilityRegistry:
|
|
184
|
+
"""Layered, glob-based capability registry.
|
|
185
|
+
|
|
186
|
+
The rule list is ordered with user rules first (higher priority),
|
|
187
|
+
bundled rules second. Lookup walks the combined list top-to-bottom;
|
|
188
|
+
for each declared flag, the first rule whose ``(kind, match)`` fits
|
|
189
|
+
the query wins. A rule that does not declare a given flag is
|
|
190
|
+
transparent for that flag — the walk continues past it.
|
|
191
|
+
|
|
192
|
+
Thread-safety: the instance is read-only after construction, so
|
|
193
|
+
concurrent ``lookup`` calls are safe. Loading is not thread-safe but
|
|
194
|
+
is expected to happen once at process startup.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
def __init__(self, rules: list[CapabilityRule]) -> None:
|
|
198
|
+
self._rules: list[CapabilityRule] = list(rules)
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def rules(self) -> list[CapabilityRule]:
|
|
202
|
+
"""Return a copy of the rule list in evaluation order."""
|
|
203
|
+
return list(self._rules)
|
|
204
|
+
|
|
205
|
+
def lookup(self, *, kind: str, model: str) -> ResolvedCapabilities:
|
|
206
|
+
"""Resolve capability flags for ``(kind, model)``.
|
|
207
|
+
|
|
208
|
+
Per-flag first-match: for each of the four flags, returns the
|
|
209
|
+
value from the first rule whose ``kind`` filter admits the
|
|
210
|
+
query AND whose glob matches ``model`` AND which explicitly
|
|
211
|
+
declares that flag.
|
|
212
|
+
|
|
213
|
+
``model`` may be the empty string; globs that match "" still
|
|
214
|
+
apply (but no bundled rule does today). Callers pass
|
|
215
|
+
``provider.model or ""`` directly.
|
|
216
|
+
"""
|
|
217
|
+
resolved_thinking: bool | None = None
|
|
218
|
+
resolved_reasoning: bool | None = None
|
|
219
|
+
resolved_tools: bool | None = None
|
|
220
|
+
resolved_max_ctx: int | None = None
|
|
221
|
+
|
|
222
|
+
thinking_locked = False
|
|
223
|
+
reasoning_locked = False
|
|
224
|
+
tools_locked = False
|
|
225
|
+
max_ctx_locked = False
|
|
226
|
+
|
|
227
|
+
for rule in self._rules:
|
|
228
|
+
if not rule.kind_matches(kind):
|
|
229
|
+
continue
|
|
230
|
+
if not rule.glob_matches(model):
|
|
231
|
+
continue
|
|
232
|
+
caps = rule.capabilities
|
|
233
|
+
if not thinking_locked and caps.thinking is not None:
|
|
234
|
+
resolved_thinking = caps.thinking
|
|
235
|
+
thinking_locked = True
|
|
236
|
+
if not reasoning_locked and caps.reasoning_passthrough is not None:
|
|
237
|
+
resolved_reasoning = caps.reasoning_passthrough
|
|
238
|
+
reasoning_locked = True
|
|
239
|
+
if not tools_locked and caps.tools is not None:
|
|
240
|
+
resolved_tools = caps.tools
|
|
241
|
+
tools_locked = True
|
|
242
|
+
if not max_ctx_locked and caps.max_context_tokens is not None:
|
|
243
|
+
resolved_max_ctx = caps.max_context_tokens
|
|
244
|
+
max_ctx_locked = True
|
|
245
|
+
if thinking_locked and reasoning_locked and tools_locked and max_ctx_locked:
|
|
246
|
+
break
|
|
247
|
+
|
|
248
|
+
return ResolvedCapabilities(
|
|
249
|
+
thinking=resolved_thinking,
|
|
250
|
+
reasoning_passthrough=resolved_reasoning,
|
|
251
|
+
tools=resolved_tools,
|
|
252
|
+
max_context_tokens=resolved_max_ctx,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# ------------------------------------------------------------------
|
|
256
|
+
# Loaders
|
|
257
|
+
# ------------------------------------------------------------------
|
|
258
|
+
|
|
259
|
+
@classmethod
|
|
260
|
+
def from_rule_lists(
|
|
261
|
+
cls,
|
|
262
|
+
*,
|
|
263
|
+
user: list[CapabilityRule] | None = None,
|
|
264
|
+
bundled: list[CapabilityRule] | None = None,
|
|
265
|
+
) -> CapabilityRegistry:
|
|
266
|
+
"""Build a registry from pre-loaded rule lists.
|
|
267
|
+
|
|
268
|
+
Primarily useful for tests that want to inject rule data without
|
|
269
|
+
touching disk. The order in the returned registry is
|
|
270
|
+
``user + bundled`` — user rules are evaluated first.
|
|
271
|
+
"""
|
|
272
|
+
return cls((user or []) + (bundled or []))
|
|
273
|
+
|
|
274
|
+
@classmethod
|
|
275
|
+
def load_default(cls) -> CapabilityRegistry:
|
|
276
|
+
"""Load the bundled YAML + optional user override.
|
|
277
|
+
|
|
278
|
+
User file is resolved at ``~/.coderouter/model-capabilities.yaml``;
|
|
279
|
+
missing = empty user layer. Bundled file is required — if it
|
|
280
|
+
cannot be read, this raises ``RuntimeError`` (the package is
|
|
281
|
+
broken). Both files are validated against
|
|
282
|
+
``CapabilityRegistryFile``; schema errors propagate as
|
|
283
|
+
Pydantic ``ValidationError`` so the failure is visible at load.
|
|
284
|
+
"""
|
|
285
|
+
bundled = cls._read_bundled_file()
|
|
286
|
+
user = cls._read_user_file()
|
|
287
|
+
return cls.from_rule_lists(user=user, bundled=bundled)
|
|
288
|
+
|
|
289
|
+
@staticmethod
|
|
290
|
+
def _read_bundled_file() -> list[CapabilityRule]:
|
|
291
|
+
try:
|
|
292
|
+
text = (
|
|
293
|
+
resources.files(_BUNDLED_PACKAGE)
|
|
294
|
+
.joinpath(_BUNDLED_NAME)
|
|
295
|
+
.read_text(encoding="utf-8")
|
|
296
|
+
)
|
|
297
|
+
except (FileNotFoundError, ModuleNotFoundError) as exc:
|
|
298
|
+
raise RuntimeError(
|
|
299
|
+
"Bundled model-capabilities.yaml is missing from the "
|
|
300
|
+
f"'{_BUNDLED_PACKAGE}' package — installation is "
|
|
301
|
+
"incomplete or corrupted."
|
|
302
|
+
) from exc
|
|
303
|
+
raw = yaml.safe_load(text) or {}
|
|
304
|
+
return CapabilityRegistryFile.model_validate(raw).rules
|
|
305
|
+
|
|
306
|
+
@staticmethod
|
|
307
|
+
def _read_user_file(path: Path | None = None) -> list[CapabilityRule]:
|
|
308
|
+
target = path or _USER_PATH
|
|
309
|
+
if not target.is_file():
|
|
310
|
+
return []
|
|
311
|
+
raw = yaml.safe_load(target.read_text(encoding="utf-8")) or {}
|
|
312
|
+
return CapabilityRegistryFile.model_validate(raw).rules
|
|
313
|
+
|
|
314
|
+
@classmethod
|
|
315
|
+
def load_from_paths(
|
|
316
|
+
cls,
|
|
317
|
+
*,
|
|
318
|
+
bundled_path: Path,
|
|
319
|
+
user_path: Path | None = None,
|
|
320
|
+
) -> CapabilityRegistry:
|
|
321
|
+
"""Test-friendly loader that reads YAML from explicit paths.
|
|
322
|
+
|
|
323
|
+
Production code uses :meth:`load_default`; this variant lets
|
|
324
|
+
tests stage a custom bundled file alongside an optional user
|
|
325
|
+
file without relying on the package data location.
|
|
326
|
+
"""
|
|
327
|
+
bundled_raw = yaml.safe_load(bundled_path.read_text(encoding="utf-8")) or {}
|
|
328
|
+
bundled = CapabilityRegistryFile.model_validate(bundled_raw).rules
|
|
329
|
+
user = cls._read_user_file(user_path)
|
|
330
|
+
return cls.from_rule_lists(user=user, bundled=bundled)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
__all__ = [
|
|
334
|
+
"CapabilityRegistry",
|
|
335
|
+
"CapabilityRegistryFile",
|
|
336
|
+
"CapabilityRule",
|
|
337
|
+
"RegistryCapabilities",
|
|
338
|
+
"ResolvedCapabilities",
|
|
339
|
+
]
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"""`.env` file loader (v1.6.3).
|
|
2
|
+
|
|
3
|
+
Purpose
|
|
4
|
+
-------
|
|
5
|
+
Provide a standard-library-only parser for `.env`-style files so that
|
|
6
|
+
``coderouter serve --env-file PATH`` can act as a thin gateway between
|
|
7
|
+
CodeRouter and any tool that emits a `.env` (1Password CLI, sops,
|
|
8
|
+
direnv, plain hand-edited files, etc.).
|
|
9
|
+
|
|
10
|
+
We deliberately do NOT pull in ``python-dotenv``: the runtime-deps
|
|
11
|
+
freeze policy (5 packages, see plan.md §5.4) is part of the project's
|
|
12
|
+
audit story. The parser below covers the cases that 1Password / sops /
|
|
13
|
+
manual editing actually emit; the spec is intentionally narrower than
|
|
14
|
+
``python-dotenv`` (no variable expansion, no command substitution, no
|
|
15
|
+
multi-line values).
|
|
16
|
+
|
|
17
|
+
File format
|
|
18
|
+
-----------
|
|
19
|
+
Each non-empty, non-comment line must match::
|
|
20
|
+
|
|
21
|
+
[export ]KEY=value
|
|
22
|
+
|
|
23
|
+
Where:
|
|
24
|
+
|
|
25
|
+
* ``KEY`` is ``[A-Za-z_][A-Za-z0-9_]*`` (POSIX identifier).
|
|
26
|
+
* ``value`` is one of:
|
|
27
|
+
- bare: ``value`` (no whitespace, no quotes)
|
|
28
|
+
- double-quoted: ``"value with spaces"`` — backslash escapes
|
|
29
|
+
``\\n`` ``\\t`` ``\\"`` ``\\\\`` are interpreted.
|
|
30
|
+
- single-quoted: ``'literal value'`` — no escape processing,
|
|
31
|
+
contents are taken verbatim.
|
|
32
|
+
* Inline comments (` # comment`) are stripped from BARE values only.
|
|
33
|
+
Quoted values keep ``#`` verbatim.
|
|
34
|
+
* Lines starting with ``#`` (after optional whitespace) are skipped.
|
|
35
|
+
* Blank lines are skipped.
|
|
36
|
+
* The ``export`` prefix is optional and discarded — supports `.env`
|
|
37
|
+
files that double as shell sources (``source .env``).
|
|
38
|
+
|
|
39
|
+
Loading semantics
|
|
40
|
+
-----------------
|
|
41
|
+
:func:`load_env_file` returns a ``dict[str, str]`` and (by default)
|
|
42
|
+
copies entries into ``os.environ`` ONLY for keys that are not already
|
|
43
|
+
set. This is the v1.6.3 default — the file is treated as
|
|
44
|
+
"defaults / setup" rather than authoritative override, so an operator
|
|
45
|
+
who deliberately exports an override at the shell wins. Pass
|
|
46
|
+
``override=True`` to flip the precedence (useful for tests).
|
|
47
|
+
|
|
48
|
+
The parser raises :class:`EnvFileError` on malformed lines (not on
|
|
49
|
+
unknown keys — those just become entries in the returned dict). The
|
|
50
|
+
caller is expected to surface the error to the user; the CLI does so
|
|
51
|
+
with a friendly stderr message and exit 1.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
from __future__ import annotations
|
|
55
|
+
|
|
56
|
+
import os
|
|
57
|
+
import re
|
|
58
|
+
from collections.abc import Iterable
|
|
59
|
+
from pathlib import Path
|
|
60
|
+
|
|
61
|
+
__all__ = [
|
|
62
|
+
"EnvFileError",
|
|
63
|
+
"load_env_file",
|
|
64
|
+
"parse_env_file",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# POSIX identifier — matches sh / bash / zsh shell variable name rules.
|
|
69
|
+
# This is intentionally strict: keys with hyphens or starting with a
|
|
70
|
+
# digit are rejected so we surface the typo before exporting them
|
|
71
|
+
# into a place that can't actually consume them.
|
|
72
|
+
_KEY_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
73
|
+
|
|
74
|
+
# Recognized double-quote escape sequences. Anything else after a
|
|
75
|
+
# backslash inside double quotes is left as `\<char>` (matches POSIX
|
|
76
|
+
# "weak quote" behavior of the shell — surprising-free for users who
|
|
77
|
+
# copy paste shell snippets).
|
|
78
|
+
_DQ_ESCAPES = {
|
|
79
|
+
"n": "\n",
|
|
80
|
+
"t": "\t",
|
|
81
|
+
"r": "\r",
|
|
82
|
+
'"': '"',
|
|
83
|
+
"\\": "\\",
|
|
84
|
+
"$": "$", # so ``"\$VAR"`` survives as ``$VAR`` literally
|
|
85
|
+
"`": "`",
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class EnvFileError(ValueError):
|
|
90
|
+
"""Raised when a `.env`-style file cannot be parsed.
|
|
91
|
+
|
|
92
|
+
The exception message contains the file path and 1-based line
|
|
93
|
+
number so the user can jump straight to the offending row. Caught
|
|
94
|
+
by the CLI to emit a friendly error and exit 1.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def parse_env_file(path: str | os.PathLike[str]) -> dict[str, str]:
|
|
99
|
+
"""Parse a `.env` file at ``path`` into a ``dict[str, str]``.
|
|
100
|
+
|
|
101
|
+
Does NOT mutate ``os.environ`` — that is :func:`load_env_file`'s
|
|
102
|
+
job. Pure parser, useful for tests and for callers that want to
|
|
103
|
+
diff against the current environment before applying.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
FileNotFoundError: ``path`` does not exist.
|
|
107
|
+
EnvFileError: malformed line (with file path + 1-based line
|
|
108
|
+
number in the message).
|
|
109
|
+
"""
|
|
110
|
+
p = Path(path)
|
|
111
|
+
if not p.exists():
|
|
112
|
+
raise FileNotFoundError(f"env file not found: {p}")
|
|
113
|
+
|
|
114
|
+
parsed: dict[str, str] = {}
|
|
115
|
+
text = p.read_text(encoding="utf-8")
|
|
116
|
+
for lineno, raw_line in enumerate(text.splitlines(), start=1):
|
|
117
|
+
line = raw_line.strip()
|
|
118
|
+
# Skip blank lines and pure comment lines.
|
|
119
|
+
if not line or line.startswith("#"):
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
# Strip optional ``export `` prefix. After ``.strip()`` above,
|
|
123
|
+
# ``line == "export"`` (no trailing whitespace) is impossible to
|
|
124
|
+
# land in this branch — that case falls through to the
|
|
125
|
+
# missing-``=`` check below, which surfaces a sufficient error
|
|
126
|
+
# to the user. We only need to peel ``export`` when followed
|
|
127
|
+
# by content.
|
|
128
|
+
if line.startswith("export ") or line.startswith("export\t"):
|
|
129
|
+
line = line[len("export") :].lstrip()
|
|
130
|
+
|
|
131
|
+
# Split on the FIRST `=`. Subsequent `=` are part of the value.
|
|
132
|
+
if "=" not in line:
|
|
133
|
+
raise EnvFileError(f"{p}:{lineno}: missing `=` separator: {raw_line!r}")
|
|
134
|
+
key_raw, value_raw = line.split("=", 1)
|
|
135
|
+
key = key_raw.strip()
|
|
136
|
+
|
|
137
|
+
if not _KEY_RE.match(key):
|
|
138
|
+
raise EnvFileError(
|
|
139
|
+
f"{p}:{lineno}: invalid key {key!r} "
|
|
140
|
+
f"(must match {_KEY_RE.pattern})"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
value = _parse_value(value_raw)
|
|
145
|
+
except EnvFileError as exc:
|
|
146
|
+
# Re-attach file:lineno context.
|
|
147
|
+
raise EnvFileError(f"{p}:{lineno}: {exc}") from None
|
|
148
|
+
|
|
149
|
+
parsed[key] = value
|
|
150
|
+
|
|
151
|
+
return parsed
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def load_env_file(
|
|
155
|
+
path: str | os.PathLike[str],
|
|
156
|
+
*,
|
|
157
|
+
override: bool = False,
|
|
158
|
+
environ: dict[str, str] | None = None,
|
|
159
|
+
) -> list[str]:
|
|
160
|
+
"""Parse ``path`` and copy entries into ``environ`` (default: ``os.environ``).
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
path: Path to the `.env`-style file.
|
|
164
|
+
override: If ``False`` (default), only set keys that aren't
|
|
165
|
+
already in ``environ`` — file values are best-effort
|
|
166
|
+
defaults, the shell environment wins. If ``True``, file
|
|
167
|
+
values overwrite existing entries.
|
|
168
|
+
environ: Target mapping; defaults to ``os.environ``. Tests can
|
|
169
|
+
pass a plain ``dict`` to avoid mutating the real env.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
List of key names that were actually applied (i.e. either
|
|
173
|
+
newly set or overwritten). Useful for the CLI to log
|
|
174
|
+
"loaded N variables from <path>".
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
FileNotFoundError, EnvFileError: see :func:`parse_env_file`.
|
|
178
|
+
"""
|
|
179
|
+
target = environ if environ is not None else os.environ
|
|
180
|
+
applied: list[str] = []
|
|
181
|
+
for key, value in parse_env_file(path).items():
|
|
182
|
+
if not override and key in target:
|
|
183
|
+
continue
|
|
184
|
+
target[key] = value
|
|
185
|
+
applied.append(key)
|
|
186
|
+
return applied
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def load_env_files(
|
|
190
|
+
paths: Iterable[str | os.PathLike[str]],
|
|
191
|
+
*,
|
|
192
|
+
override: bool = False,
|
|
193
|
+
environ: dict[str, str] | None = None,
|
|
194
|
+
) -> list[tuple[str, list[str]]]:
|
|
195
|
+
"""Apply :func:`load_env_file` to multiple paths in order.
|
|
196
|
+
|
|
197
|
+
Useful for layering: ``[~/.coderouter/.env, ./.env]`` lets a user
|
|
198
|
+
keep cross-project defaults globally and override per-project at
|
|
199
|
+
the cwd. Files are processed left-to-right, so later files override
|
|
200
|
+
earlier ones (when ``override=True``) or fill in gaps (default).
|
|
201
|
+
|
|
202
|
+
Returns a list of ``(path, applied_keys)`` tuples in load order so
|
|
203
|
+
the caller can log a per-file summary.
|
|
204
|
+
"""
|
|
205
|
+
out: list[tuple[str, list[str]]] = []
|
|
206
|
+
for p in paths:
|
|
207
|
+
out.append((str(p), load_env_file(p, override=override, environ=environ)))
|
|
208
|
+
return out
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
# Internal helpers
|
|
213
|
+
# ---------------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _parse_value(raw: str) -> str:
|
|
217
|
+
"""Parse the right-hand side of `KEY=value`.
|
|
218
|
+
|
|
219
|
+
Strips leading whitespace (so ``KEY = value`` is tolerated, though
|
|
220
|
+
style-discouraged), detects quoting, applies the appropriate
|
|
221
|
+
escape rules.
|
|
222
|
+
"""
|
|
223
|
+
s = raw.lstrip()
|
|
224
|
+
|
|
225
|
+
if not s:
|
|
226
|
+
return ""
|
|
227
|
+
|
|
228
|
+
if s[0] == '"':
|
|
229
|
+
# Double-quoted: process escapes, terminate at unescaped ".
|
|
230
|
+
return _parse_double_quoted(s)
|
|
231
|
+
if s[0] == "'":
|
|
232
|
+
# Single-quoted: literal until next single quote.
|
|
233
|
+
return _parse_single_quoted(s)
|
|
234
|
+
|
|
235
|
+
# Bare value: strip inline comment and trailing whitespace.
|
|
236
|
+
# We use a tiny state machine rather than a regex so we don't
|
|
237
|
+
# accidentally match `#` inside a hash-bang-looking value
|
|
238
|
+
# (e.g. ``KEY=#1`` — though questionable, this preserves it).
|
|
239
|
+
# The rule: a bare value's inline comment requires whitespace
|
|
240
|
+
# before `#`, so ``KEY=foo#bar`` keeps `foo#bar`.
|
|
241
|
+
out_chars: list[str] = []
|
|
242
|
+
prev_was_space = False
|
|
243
|
+
for ch in s:
|
|
244
|
+
if ch == "#" and prev_was_space:
|
|
245
|
+
break
|
|
246
|
+
out_chars.append(ch)
|
|
247
|
+
prev_was_space = ch in (" ", "\t")
|
|
248
|
+
return "".join(out_chars).rstrip()
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _parse_double_quoted(s: str) -> str:
|
|
252
|
+
"""Parse a string starting at the opening ``"``.
|
|
253
|
+
|
|
254
|
+
Recognized escapes: ``\\n``, ``\\t``, ``\\r``, ``\\"``, ``\\\\``,
|
|
255
|
+
``\\$``, ``\\```. Unknown escapes pass through as-is (POSIX weak
|
|
256
|
+
quoting compatible).
|
|
257
|
+
"""
|
|
258
|
+
assert s[0] == '"'
|
|
259
|
+
out: list[str] = []
|
|
260
|
+
i = 1
|
|
261
|
+
while i < len(s):
|
|
262
|
+
ch = s[i]
|
|
263
|
+
if ch == "\\" and i + 1 < len(s):
|
|
264
|
+
nxt = s[i + 1]
|
|
265
|
+
out.append(_DQ_ESCAPES.get(nxt, "\\" + nxt))
|
|
266
|
+
i += 2
|
|
267
|
+
continue
|
|
268
|
+
if ch == '"':
|
|
269
|
+
# End of quoted value. Anything after (other than whitespace
|
|
270
|
+
# and an inline comment) is a syntax error.
|
|
271
|
+
tail = s[i + 1 :].lstrip()
|
|
272
|
+
if tail and not tail.startswith("#"):
|
|
273
|
+
raise EnvFileError(
|
|
274
|
+
f"unexpected content after closing quote: {tail!r}"
|
|
275
|
+
)
|
|
276
|
+
return "".join(out)
|
|
277
|
+
out.append(ch)
|
|
278
|
+
i += 1
|
|
279
|
+
raise EnvFileError("unterminated double-quoted value")
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _parse_single_quoted(s: str) -> str:
|
|
283
|
+
"""Parse a string starting at the opening ``'``.
|
|
284
|
+
|
|
285
|
+
Single quotes are literal — no escapes, the value is everything
|
|
286
|
+
up to the next ``'``.
|
|
287
|
+
"""
|
|
288
|
+
assert s[0] == "'"
|
|
289
|
+
end = s.find("'", 1)
|
|
290
|
+
if end == -1:
|
|
291
|
+
raise EnvFileError("unterminated single-quoted value")
|
|
292
|
+
tail = s[end + 1 :].lstrip()
|
|
293
|
+
if tail and not tail.startswith("#"):
|
|
294
|
+
raise EnvFileError(f"unexpected content after closing quote: {tail!r}")
|
|
295
|
+
return s[1:end]
|