PyPI - coderouter-cli - Versions diffs - 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

coderouter-cli 1.7.0py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

coderouter/cli.py +168 -2
coderouter/config/capability_registry.py +27 -1
coderouter/data/model-capabilities.yaml +149 -0
coderouter/doctor_apply.py +612 -0
coderouter/ingress/app.py +8 -0
coderouter/logging.py +86 -0
coderouter/routing/capability.py +113 -1
{coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.0.dist-info}/METADATA +44 -19
{coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.0.dist-info}/RECORD +12 -11
{coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.0.dist-info}/WHEEL +0 -0
{coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.0.dist-info}/entry_points.txt +0 -0
{coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.0.dist-info}/licenses/LICENSE +0 -0

coderouter/cli.py CHANGED Viewed

@@ -4,6 +4,7 @@ from __future__ import annotations
 import argparse
 import sys
+from pathlib import Path
 import uvicorn
@@ -126,6 +127,40 @@ def _build_parser() -> argparse.ArgumentParser:
             "./providers.yaml, or ~/.coderouter/providers.yaml."
         ),
     )
+    # v1.7-B (#3): --apply writes the doctor-emitted YAML patches back
+    # into providers.yaml / model-capabilities.yaml while preserving
+    # comments and key order. --dry-run is the same path minus the file
+    # write — prints a unified diff (``git apply``-compatible) for review.
+    # Bare ``--dry-run`` (without ``--apply``) is the canonical "preview"
+    # form; ``--apply --dry-run`` is also accepted as an explicit synonym
+    # so muscle-memory from ``git apply --dry-run`` works either way.
+    # Both flags are no-ops when --check-model is absent (--check-env
+    # has its own remediation surface and is not in scope for --apply).
+    # Implementation lives in coderouter/doctor_apply.py — round-trip
+    # via the optional ``ruamel.yaml`` dependency, see that module's
+    # docstring for the contract and shape invariants.
+    doctor.add_argument(
+        "--apply",
+        action="store_true",
+        help=(
+            "After --check-model, write the suggested patches back into "
+            "providers.yaml / model-capabilities.yaml. A `.bak` backup is "
+            "created next to each modified file. Idempotent: a re-run "
+            "after a successful apply is a no-op (no write, exit 0). "
+            "Requires the optional `ruamel.yaml` dependency — install "
+            "via `pip install coderouter-cli[doctor]`."
+        ),
+    )
+    doctor.add_argument(
+        "--dry-run",
+        action="store_true",
+        help=(
+            "Preview --apply changes as a unified diff without writing "
+            "to disk. Implies --apply mode for diff generation. The "
+            "output is `git apply`-compatible so it can be saved and "
+            "applied later (or piped to `patch -p0`)."
+        ),
+    )
     # v1.5-C: `coderouter stats` — live TUI over GET /metrics.json.
     # Lazy-imports ``curses`` inside the runner so the CLI boot stays
@@ -283,7 +318,14 @@ def _run_doctor(args: argparse.Namespace) -> int:
 def _run_check_model(args: argparse.Namespace) -> int:
-    """v0.7-B: per-provider HTTP capability probe."""
+    """v0.7-B: per-provider HTTP capability probe.
+    v1.7-B (#3): when ``--apply`` or ``--dry-run`` is also set, we run
+    the same probes and then route the emitted patches through
+    :func:`coderouter.doctor_apply.apply_doctor_patches`. Bare probe
+    (no apply / dry-run flags) keeps the original behavior verbatim
+    so existing CI integrations don't change shape.
+    """
     from coderouter.config.loader import load_config
     from coderouter.doctor import (
         exit_code_for,
@@ -307,7 +349,131 @@ def _run_check_model(args: argparse.Namespace) -> int:
         return 1
     print(format_report(report))
-    return exit_code_for(report)
+    base_exit = exit_code_for(report)
+    apply_mode = bool(getattr(args, "apply", False))
+    dry_run_mode = bool(getattr(args, "dry_run", False))
+    if apply_mode or dry_run_mode:
+        # Resolve the same providers.yaml the loader picked up so the
+        # apply step writes back to the exact file that was probed
+        # (avoids a mismatch when CODEROUTER_CONFIG points elsewhere
+        # than the default path).
+        config_path = _resolve_config_path(args.config)
+        return _run_apply_or_dry_run(
+            report=report,
+            config_path=config_path,
+            write=apply_mode and not dry_run_mode,
+            base_exit=base_exit,
+        )
+    return base_exit
+def _resolve_config_path(explicit: str | None) -> Path:
+    """Mirror loader._candidate_paths and return the file actually used.
+    Used by ``--apply`` to write back to the same path the loader
+    picked up when it parsed providers.yaml. Falls through the same
+    search order so a ``CODEROUTER_CONFIG`` env or default-path lookup
+    matches the live config.
+    """
+    import os
+    candidates: list[Path] = []
+    if explicit:
+        candidates.append(Path(explicit))
+    if env_path := os.environ.get("CODEROUTER_CONFIG"):
+        candidates.append(Path(env_path))
+    candidates.append(Path.cwd() / "providers.yaml")
+    candidates.append(Path.home() / ".coderouter" / "providers.yaml")
+    for p in candidates:
+        if p.is_file():
+            return p
+    # Fall back to the last candidate even if absent — the apply step
+    # will surface a clearer error than this resolver would.
+    return candidates[-1]
+def _run_apply_or_dry_run(
+    *,
+    report: object,
+    config_path: Path,
+    write: bool,
+    base_exit: int,
+) -> int:
+    """v1.7-B (#3): drive ``apply_doctor_patches`` and render the result.
+    Returns 0 when the apply step itself is clean (regardless of
+    whether the underlying probes flagged ``NEEDS_TUNING``). The
+    rationale: once the operator has applied the patches, the next
+    ``doctor`` run is the right place to re-evaluate the chain — a
+    successful apply should not propagate the "exit 2 / needs tuning"
+    signal because the issue is now (presumably) addressed.
+    """
+    from coderouter.doctor_apply import (
+        DoctorApplyError,
+        MissingDependencyError,
+        apply_doctor_patches,
+    )
+    print()  # blank line between probe report and apply section
+    try:
+        result = apply_doctor_patches(
+            report=report,
+            config_path=config_path,
+            write=write,
+        )
+    except MissingDependencyError as exc:
+        print(f"doctor --apply: {exc}", file=sys.stderr)
+        return 1
+    except DoctorApplyError as exc:
+        print(f"doctor --apply: {exc}", file=sys.stderr)
+        return 1
+    label = "Apply" if write else "Dry-run"
+    print(f"{label}: {len(result.target_paths)} target file(s).")
+    if result.skipped_unknown_target:
+        print(
+            f"  warning: {len(result.skipped_unknown_target)} probe(s) "
+            f"emitted an unknown target_file value: "
+            f"{sorted(set(result.skipped_unknown_target))}",
+            file=sys.stderr,
+        )
+    if result.is_no_op:
+        # Distinguish "nothing to do because base_exit was 0" from
+        # "nothing to do because everything already applied":
+        if base_exit == 0:
+            print("  No NEEDS_TUNING patches to apply — chain is healthy.")
+        else:
+            print(
+                f"  All {result.no_op_patches} patch(es) already applied "
+                f"— providers.yaml is up to date."
+            )
+        return 0
+    print(
+        f"  {result.changes_applied} patch(es) applied"
+        + (f", {result.no_op_patches} already up to date" if result.no_op_patches else "")
+        + "."
+    )
+    for path in result.target_paths:
+        diff = result.diffs.get(str(path), "")
+        if not diff:
+            continue
+        print()
+        print(diff, end="" if diff.endswith("\n") else "\n")
+    if write:
+        for orig, bak in result.backups.items():
+            print(f"  Backup: {orig} → {bak}")
+    else:
+        print()
+        print("  (dry-run — no files were modified. Re-run with --apply to write.)")
+    return 0
 def _run_check_env(arg_value: str) -> int:

coderouter/config/capability_registry.py CHANGED Viewed

@@ -102,6 +102,19 @@ class RegistryCapabilities(BaseModel):
             "doctor --check-model num_ctx probe (not consumed in v0.7-A)."
         ),
     )
+    claude_code_suitability: Literal["ok", "degraded"] | None = Field(
+        default=None,
+        description=(
+            "v1.7-B: hint for use behind Claude Code's agentic-coding "
+            "harness. ``degraded`` = the model over-eagerly invokes "
+            "tools/skills when given Claude Code's system prompt — e.g. "
+            "Llama-3.3-70B treating small talk like ``こんにちは`` as "
+            "``Skill(hello)`` invocations (see docs/troubleshooting.md "
+            "§4-1 for the symptom log). ``ok`` = explicitly verified "
+            "clean. ``None`` = no opinion (treated as ``ok`` at the "
+            "startup check)."
+        ),
+    )
 class CapabilityRule(BaseModel):
@@ -168,6 +181,7 @@ class ResolvedCapabilities:
     reasoning_passthrough: bool | None = None
     tools: bool | None = None
     max_context_tokens: int | None = None
+    claude_code_suitability: Literal["ok", "degraded"] | None = None
 # ---------------------------------------------------------------------------
@@ -218,11 +232,13 @@ class CapabilityRegistry:
         resolved_reasoning: bool | None = None
         resolved_tools: bool | None = None
         resolved_max_ctx: int | None = None
+        resolved_suitability: Literal["ok", "degraded"] | None = None
         thinking_locked = False
         reasoning_locked = False
         tools_locked = False
         max_ctx_locked = False
+        suitability_locked = False
         for rule in self._rules:
             if not rule.kind_matches(kind):
@@ -242,7 +258,16 @@ class CapabilityRegistry:
             if not max_ctx_locked and caps.max_context_tokens is not None:
                 resolved_max_ctx = caps.max_context_tokens
                 max_ctx_locked = True
-            if thinking_locked and reasoning_locked and tools_locked and max_ctx_locked:
+            if not suitability_locked and caps.claude_code_suitability is not None:
+                resolved_suitability = caps.claude_code_suitability
+                suitability_locked = True
+            if (
+                thinking_locked
+                and reasoning_locked
+                and tools_locked
+                and max_ctx_locked
+                and suitability_locked
+            ):
                 break
         return ResolvedCapabilities(
@@ -250,6 +275,7 @@ class CapabilityRegistry:
             reasoning_passthrough=resolved_reasoning,
             tools=resolved_tools,
             max_context_tokens=resolved_max_ctx,
+            claude_code_suitability=resolved_suitability,
         )
     # ------------------------------------------------------------------

coderouter/data/model-capabilities.yaml CHANGED Viewed

@@ -31,6 +31,11 @@
 #       reasoning_passthrough: bool   — opt OUT of the adapter's passive `reasoning` strip
 #       tools: bool                   — upstream reliably emits tool_calls
 #       max_context_tokens: int       — declared model context window
+#       claude_code_suitability: str  — "ok" | "degraded". Hint for use behind
+#                                       Claude Code's agentic-coding harness;
+#                                       "degraded" triggers a startup WARN when
+#                                       the provider is on a `claude-code-*`
+#                                       chain. See docs/troubleshooting.md §4-1.
 #
 # First-match semantics: rules within a file are evaluated top-to-bottom
 # per flag; the first rule whose glob matches AND declares that flag
@@ -84,3 +89,147 @@ rules:
     kind: anthropic
     capabilities:
       thinking: true
+  # ------------------------------------------------------------------
+  # Claude Code suitability — agentic harness compatibility hint (v1.7-B).
+  #
+  # "degraded" = the model over-eagerly invokes tools / skills when given
+  # Claude Code's system prompt, even for trivial small talk. Concretely,
+  # Llama-3.3-70B (verified 2026-04-24 against NVIDIA NIM) rewrites
+  # ``こんにちは`` into ``Skill(hello)`` invocations and fabricates
+  # ``AskUserQuestion("What is your name?")`` elicitations — see
+  # docs/articles/note-nvidia-nim.md §6-2 + docs/troubleshooting.md §4-1.
+  #
+  # Glob coverage: NIM uses ``meta/llama-3.3-70b-instruct``, OpenRouter
+  # uses ``meta-llama/llama-3.3-70b-instruct``, some local servers use
+  # ``Llama-3.3-70B-Instruct``. fnmatch is case-sensitive so we declare
+  # both common case-variants explicitly. The leading ``*`` wildcard
+  # absorbs any vendor-prefix slug (``meta/`` / ``meta-llama/`` / etc.).
+  #
+  # An operator who has tuned their Llama-3.3 deployment (custom system
+  # prompt, tool whitelist, etc.) can opt out via
+  # ``~/.coderouter/model-capabilities.yaml`` with the matching glob and
+  # ``claude_code_suitability: ok``.
+  # ------------------------------------------------------------------
+  - match: "*llama-3.3-70b*"
+    kind: openai_compat
+    capabilities:
+      claude_code_suitability: degraded
+  - match: "*Llama-3.3-70B*"
+    kind: openai_compat
+    capabilities:
+      claude_code_suitability: degraded
+  # ------------------------------------------------------------------
+  # Qwen3-Coder family — agentic coding 専用設計 (v1.7-B 追加)
+  #
+  # Alibaba の Qwen3-Coder series は agentic coding と tool use を
+  # 主目的に学習されており、Claude Sonnet の tool-call 行動に最も近い
+  # ローカル/オープン代替として知られています (note 記事 + r/LocalLLaMA
+  # 2026-04 Megathread コミュニティ評)。
+  #
+  # ここで `tools: true` を先回り宣言することで、providers.yaml 側で
+  # 個別に capabilities.tools: true を書かなくても tool-call 経路が
+  # 有効になります。`claude_code_suitability: ok` も併せて宣言、
+  # claude-code-* プロファイル startup check (v1.7-B) で degraded 警告が
+  # 出ないことを保証。
+  #
+  # glob 範囲 (case-sensitive — 大文字版も併記):
+  #   Ollama tag       : qwen3-coder:*       (例: qwen3-coder:30b-a3b)
+  #   NIM slug         : qwen/qwen3-coder-*  (例: qwen/qwen3-coder-480b-a35b-instruct)
+  #   OpenRouter slug  : qwen/qwen3-coder*   (例: qwen/qwen3-coder:free)
+  #   HF GGUF (Ollama) : hf.co/*/Qwen3-Coder-*-GGUF*  (大文字)
+  # ------------------------------------------------------------------
+  - match: "qwen3-coder:*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+      claude_code_suitability: ok
+  - match: "qwen/qwen3-coder-*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+      claude_code_suitability: ok
+  - match: "qwen/qwen3-coder*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+      claude_code_suitability: ok
+  - match: "*Qwen3-Coder-*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+      claude_code_suitability: ok
+  # ------------------------------------------------------------------
+  # Qwen3.6 family (v1.7-B 追加)
+  #
+  # 2026-04 リリースの Qwen3.6 シリーズ。Ollama 公式 tag は
+  # qwen3.6:27b / qwen3.6:35b、全 variant が tools+vision+thinking 対応、
+  # 256K context。note 記事 (r/LocalLLaMA 2026-04 Megathread) で
+  # 「Claude Code 代替として最高」「local champ」と評価。
+  # Qwen3-Coder と並んで Claude Sonnet 互換性が高い。
+  # ------------------------------------------------------------------
+  - match: "qwen3.6:*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+      claude_code_suitability: ok
+  - match: "qwen/qwen3.6-*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+      claude_code_suitability: ok
+  # ------------------------------------------------------------------
+  # Gemma 4 family (v1.7-B 追加)
+  #
+  # Google 公式 Gemma 4。Ollama 公式 tag は gemma4:e2b / e4b / 26b / 31b、
+  # 全 variant が tools+vision+thinking 対応、E2B/E4B は audio もサポート。
+  # MoE (26b は active 3.8B / total 25.2B)。note 記事で「日常・バランスの
+  # 王者」と評価。Claude Haiku 互換性に近い簡潔な応答スタイル。
+  # ------------------------------------------------------------------
+  - match: "gemma4:*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+  - match: "google/gemma-4*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+  # ------------------------------------------------------------------
+  # GLM family (Z.AI / Zhipu AI、v1.7-B 追加)
+  #
+  # Z.AI の OpenAI-compat エンドポイントから利用する GLM-4.x / 5.x 系列。
+  # モデル名 slug は **大文字必須** (Cursor 等のドキュメント明記)。
+  # tools / vision 対応、Coding Plan の API 経由でも General API 経由でも
+  # 同じモデルが利用可能。
+  #
+  # GLM-5.1 / GLM-5-Turbo: Opus 級フラッグシップ
+  # GLM-4.7: Sonnet/Opus 級、Coding Plan のデフォルト
+  # GLM-4.5-Air: Haiku 級、軽量・高速
+  #
+  # note 記事は「intent 理解が Claude Opus 級」と評価。reasoning 用途に
+  # 特に向く。
+  # ------------------------------------------------------------------
+  - match: "GLM-5*"
+    kind: openai_compat
+    capabilities:
+      tools: true
+  - match: "GLM-4.[5-9]*"
+    kind: openai_compat
+    capabilities:
+      tools: true

coderouter-cli 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

coderouter-cli 1.7.0py3-none-any.whl → 1.8.0py3-none-any.whl