coderouter-cli 2.5.5__tar.gz → 2.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/CHANGELOG.md +71 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/PKG-INFO +1 -1
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/config/schemas.py +31 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/cost.py +32 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/ingress/dashboard_routes.py +42 -0
- coderouter_cli-2.6.0/coderouter/language_tax.py +244 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/logging.py +8 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/metrics/collector.py +45 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/routing/auto_router.py +7 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/routing/fallback.py +30 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/token_estimation.py +47 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/pyproject.toml +1 -1
- coderouter_cli-2.6.0/tests/test_auto_router_cjk.py +99 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_dashboard_endpoint.py +6 -0
- coderouter_cli-2.6.0/tests/test_language_tax.py +147 -0
- coderouter_cli-2.6.0/tests/test_language_tax_integration.py +215 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/.gitignore +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/LICENSE +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/README.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/README.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/__main__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/adapters/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/adapters/anthropic_native.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/adapters/base.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/adapters/openai_compat.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/adapters/registry.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/cli.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/cli_stats.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/config/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/config/capability_registry.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/config/env_file.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/config/loader.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/data/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/data/model-capabilities.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/doctor.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/doctor_apply.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/env_security.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/errors.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/gguf_introspect.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/_fingerprint.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/backend_health.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/context_budget.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/continuous_probe.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/drift_actions.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/drift_detection.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/memory_budget.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/memory_pressure.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/self_healing.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/guards/tool_loop.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/hardware.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/ingress/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/ingress/anthropic_routes.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/ingress/app.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/ingress/launcher_routes.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/ingress/metrics_routes.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/ingress/openai_routes.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/metrics/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/metrics/prometheus.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/output_filters.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/plugins/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/plugins/base.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/plugins/loader.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/plugins/registry.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/routing/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/routing/adaptive.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/routing/budget.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/routing/capability.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/state/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/state/audit_log.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/state/replay.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/state/request_log.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/state/store.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/state/suggest_rules.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/token_estimation_accurate.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/translation/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/translation/anthropic.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/translation/convert.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/coderouter/translation/tool_repair.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/README.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/assets/dashboard-demo.png +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/gguf_dl.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/hf-ollama-models.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/install-backends.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/install-backends.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/launcher-quickstart.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/launcher.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/llamacpp-direct.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/llamacpp-direct.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/lmstudio-direct.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/lmstudio-direct.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/backends/verify-ollama-0.23.1.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/concepts/architecture.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/concepts/context-budget.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/concepts/continuous-probing.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/concepts/drift-detection.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/concepts/partial-stitch.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/designs/v1.5-dashboard-mockup.html +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/designs/v1.6-auto-router-verification.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/designs/v1.6-auto-router.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/guides/free-tier-guide.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/guides/free-tier-guide.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/guides/security.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/guides/security.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/guides/troubleshooting.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/guides/troubleshooting.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/guides/usage-guide.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/guides/usage-guide.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/low-memory-integration.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/openrouter-roster/CHANGES.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/openrouter-roster/README.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/openrouter-roster/latest.json +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/retrospectives/v0.4.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/retrospectives/v0.5-verify.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/retrospectives/v0.5.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/retrospectives/v0.6.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/retrospectives/v0.7.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/retrospectives/v1.0-verify.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/retrospectives/v1.0.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/start/quickstart.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/start/quickstart.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/start/when-do-i-need-coderouter.en.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/docs/start/when-do-i-need-coderouter.md +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/.env.example +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/providers.auto-custom.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/providers.auto.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/providers.llama-cpp-vllm.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/providers.note-2026.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/providers.nvidia-nim.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/providers.raspberrypi.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/providers.v2-context-budget.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/examples/providers.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/scripts/demo_traffic.sh +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/scripts/openrouter_roster_diff.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/scripts/smoke_v2_2.sh +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/scripts/verify-providers.yaml +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/scripts/verify_ollama_0_23.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/scripts/verify_v0_5.sh +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/scripts/verify_v1_0.sh +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/__init__.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/conftest.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_adapter_anthropic.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_audit_log.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_auto_router.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_backend_health.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_budget.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_capability.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_capability_degraded_payload.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_capability_registry.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_capability_registry_cache_control.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_claude_code_suitability.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_cli.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_cli_stats.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_config.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_context_budget.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_continuous_probe.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_doctor.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_doctor_apply.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_doctor_cache_probe.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_drift_actions.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_drift_detection.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_drift_detection_integration.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_env_file.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_env_security.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_errors.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_examples_yaml.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_fallback.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_fallback_anthropic.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_fallback_cache_control.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_fallback_cache_observed.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_fallback_misconfig_warn.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_fallback_paid_gate.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_fallback_thinking.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_gguf_introspect.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_guards_tool_loop.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_hardware.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_ingress_anthropic.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_ingress_profile.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_memory_budget.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_memory_pressure.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_metrics_cache.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_metrics_collector.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_metrics_cost.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_metrics_endpoint.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_metrics_jsonl.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_metrics_prometheus.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_metrics_prometheus_cache.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_openai_compat.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_openrouter_roster_diff.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_output_filters.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_output_filters_adapters.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_partial_stitch.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_plugins_integration.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_plugins_loader.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_plugins_registry.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_reasoning_strip.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_repair_byte_fallback.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_request_log.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_role_normalization.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_routing_adaptive.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_self_healing.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_setup_sh.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_state_store.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_token_estimation.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_token_estimation_accurate.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_tool_repair.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_translation_anthropic.py +0 -0
- {coderouter_cli-2.5.5 → coderouter_cli-2.6.0}/tests/test_translation_reverse.py +0 -0
|
@@ -6,6 +6,77 @@ versioning follows [SemVer](https://semver.org/).
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [v2.6.0] — 2026-06-20 (Language Tax: measure, route, visualize)
|
|
10
|
+
|
|
11
|
+
Minor release: makes the CJK **"language tax"** — cloud tokenizers bill
|
|
12
|
+
Japanese/Chinese/Korean text ~1.2–1.5× more tokens per character than
|
|
13
|
+
English, while local models are unaffected — measurable, routable, and
|
|
14
|
+
visible. Built entirely on existing infrastructure; **no new core
|
|
15
|
+
dependency** (the accurate tokenizer is the existing optional `accuracy`
|
|
16
|
+
extra), **no network** (local `tokenizer.json` only), and **fully
|
|
17
|
+
backward compatible** — the feature is inert until a provider declares
|
|
18
|
+
`tokenizer_path`.
|
|
19
|
+
|
|
20
|
+
### Added
|
|
21
|
+
|
|
22
|
+
- **Language-tax measurement (`coderouter/language_tax.py`).** A leaf
|
|
23
|
+
module exposing `cjk_char_ratio`, `estimate_language_tax`,
|
|
24
|
+
`LanguageTaxBreakdown`, and `language_tax_usd`. CJK detection is
|
|
25
|
+
stdlib-only (Unicode range checks); the accurate token count is
|
|
26
|
+
delegated to the optional `accuracy` (`tokenizers`) backend with a
|
|
27
|
+
char/4 fallback. The tax multiplier is `tokens_accurate /
|
|
28
|
+
tokens_heuristic` — ~1.0 for English/code, ~2.0–4.0 for pure CJK.
|
|
29
|
+
|
|
30
|
+
- **End-to-end cost integration.** `CostBreakdown` gains
|
|
31
|
+
`language_tax_multiplier` / `language_tax_usd`;
|
|
32
|
+
`compute_cost_for_attempt` accepts an optional `language_tax=`. Both
|
|
33
|
+
`cache-observed` emit sites in `routing/fallback.py` (streaming +
|
|
34
|
+
non-streaming) build a `LanguageTaxBreakdown` **only when the provider
|
|
35
|
+
declares `tokenizer_path`**, so the hot path is untouched by default.
|
|
36
|
+
The `cache-observed` log line now carries `language_tax_usd` /
|
|
37
|
+
`language_tax_multiplier`, and `MetricsCollector` aggregates per-provider
|
|
38
|
+
+ total language-tax spend (mirroring the cost-savings aggregation).
|
|
39
|
+
|
|
40
|
+
- **`ProviderConfig.tokenizer_path`** — optional path to a local
|
|
41
|
+
`tokenizer.json` for accurate (language-tax) token counting. Local-file
|
|
42
|
+
only; never contacts the HuggingFace Hub. Inert when unset.
|
|
43
|
+
|
|
44
|
+
- **`cjk_ratio_min` auto-route matcher.** A new `RuleMatcher` variant that
|
|
45
|
+
routes turns whose latest user message CJK ratio ≥ threshold to a
|
|
46
|
+
(typically local, tax-free) profile, while ASCII/code turns fall through
|
|
47
|
+
to the cloud chain. Per-turn property mirroring `code_fence_ratio_min`.
|
|
48
|
+
|
|
49
|
+
```yaml
|
|
50
|
+
auto_router:
|
|
51
|
+
rules:
|
|
52
|
+
- match: { cjk_ratio_min: 0.3 } # JA-heavy turns → local
|
|
53
|
+
profile: local
|
|
54
|
+
- match: { has_tools: true }
|
|
55
|
+
profile: cloud
|
|
56
|
+
default_rule_profile: cloud
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
- **Dashboard "Cost & Language Tax" panel** on `/dashboard`: total spend,
|
|
60
|
+
cache savings, and CJK language-tax spend (aggregate + per-provider).
|
|
61
|
+
Also surfaces the previously-hidden cost aggregates.
|
|
62
|
+
|
|
63
|
+
- **`token_estimation.extract_text_from_anthropic_request()`** — pulls the
|
|
64
|
+
concatenated request text for the accurate tokenizer leg.
|
|
65
|
+
|
|
66
|
+
### Security
|
|
67
|
+
|
|
68
|
+
- **Bump starlette 1.0.1 → 1.3.1**, clearing four advisories
|
|
69
|
+
(CVE-2026-48817 / CVE-2026-48818 / CVE-2026-54282 / CVE-2026-54283) that
|
|
70
|
+
failed the `cve-audit` CI job (`pip-audit --strict`).
|
|
71
|
+
|
|
72
|
+
### Notes
|
|
73
|
+
|
|
74
|
+
- 38 new tests (`test_language_tax`, `test_language_tax_integration`,
|
|
75
|
+
`test_auto_router_cjk`, extended dashboard contract). Full suite:
|
|
76
|
+
**1250 passed, 8 skipped**. ruff clean. The 5-deps invariant is intact.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
9
80
|
## [v2.5.5] — 2026-06-06 (Claude Code >= 2.1.154 `system` role normalization)
|
|
10
81
|
|
|
11
82
|
Patch release: ingress-side workaround for a Claude Code CLI regression.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.6.0
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -185,6 +185,19 @@ class ProviderConfig(BaseModel):
|
|
|
185
185
|
)
|
|
186
186
|
timeout_s: float = Field(default=30.0, ge=1.0, le=600.0)
|
|
187
187
|
|
|
188
|
+
# v2.6 language-tax track: path to a LOCAL ``tokenizer.json`` for this
|
|
189
|
+
# provider's model, used to measure the CJK over-count vs the char/4
|
|
190
|
+
# baseline (see ``coderouter.language_tax``). Loaded local-file-only —
|
|
191
|
+
# never contacts the HuggingFace Hub. When unset, language-tax falls
|
|
192
|
+
# back to char/4 (multiplier 1.0) and the feature is silently inert.
|
|
193
|
+
tokenizer_path: str | None = Field(
|
|
194
|
+
default=None,
|
|
195
|
+
description=(
|
|
196
|
+
"Local tokenizer.json for accurate (language-tax) token "
|
|
197
|
+
"counting. No network access. Requires the 'accuracy' extra."
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
|
|
188
201
|
# Provider-specific extras merged into the outbound request body.
|
|
189
202
|
# Use for non-standard fields like Ollama's `think: false`, `keep_alive`,
|
|
190
203
|
# `options.num_ctx`, or any vendor-specific toggle. User-supplied request
|
|
@@ -763,6 +776,16 @@ class RuleMatcher(BaseModel):
|
|
|
763
776
|
``request.tools`` set). The ``has_tools`` matcher is the
|
|
764
777
|
profile-level lever for steering tool-laden traffic to the right
|
|
765
778
|
chain entirely.
|
|
779
|
+
|
|
780
|
+
Variants (v2.6 / language-tax routing):
|
|
781
|
+
|
|
782
|
+
- ``cjk_ratio_min: 0.3`` — CJK character ratio of the latest user
|
|
783
|
+
message is ``>=`` this threshold. Routes CJK-heavy turns (which
|
|
784
|
+
pay the cloud "language tax" of ~1.2-1.5x more tokens) to a local
|
|
785
|
+
model that bills nothing per token, while ASCII/code turns fall
|
|
786
|
+
through to the cloud chain. Per-turn property like
|
|
787
|
+
``code_fence_ratio_min``; see
|
|
788
|
+
:func:`coderouter.language_tax.cjk_char_ratio`.
|
|
766
789
|
"""
|
|
767
790
|
|
|
768
791
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -773,6 +796,13 @@ class RuleMatcher(BaseModel):
|
|
|
773
796
|
content_regex: str | None = None
|
|
774
797
|
model_pattern: str | None = None
|
|
775
798
|
content_token_count_min: int | None = Field(default=None, ge=1)
|
|
799
|
+
# v2.6 language-tax routing: CJK character ratio of the latest user
|
|
800
|
+
# message >= this threshold. Lets operators steer CJK-heavy traffic
|
|
801
|
+
# (which carries the cloud language tax) to a local model that bills
|
|
802
|
+
# nothing per token. Operates on the latest user message like
|
|
803
|
+
# ``code_fence_ratio_min`` (a per-turn property), not the whole
|
|
804
|
+
# request. See ``coderouter.language_tax.cjk_char_ratio``.
|
|
805
|
+
cjk_ratio_min: float | None = Field(default=None, ge=0.0, le=1.0)
|
|
776
806
|
# [Unreleased]: tool-aware routing (OpenClaw + Raspberry Pi 由来).
|
|
777
807
|
# See class docstring "Variants ([Unreleased] / tool-aware routing)"
|
|
778
808
|
# above for the full rationale. Boolean shape mirrors ``has_image`` —
|
|
@@ -789,6 +819,7 @@ class RuleMatcher(BaseModel):
|
|
|
789
819
|
"model_pattern",
|
|
790
820
|
"content_token_count_min",
|
|
791
821
|
"has_tools",
|
|
822
|
+
"cjk_ratio_min",
|
|
792
823
|
)
|
|
793
824
|
|
|
794
825
|
@model_validator(mode="after")
|
|
@@ -58,9 +58,13 @@ in the cost calc.
|
|
|
58
58
|
from __future__ import annotations
|
|
59
59
|
|
|
60
60
|
from dataclasses import dataclass
|
|
61
|
+
from typing import TYPE_CHECKING
|
|
61
62
|
|
|
62
63
|
from coderouter.config.schemas import CostConfig
|
|
63
64
|
|
|
65
|
+
if TYPE_CHECKING: # avoid an import cycle at runtime; used only for typing
|
|
66
|
+
from coderouter.language_tax import LanguageTaxBreakdown
|
|
67
|
+
|
|
64
68
|
|
|
65
69
|
@dataclass(frozen=True)
|
|
66
70
|
class CostBreakdown:
|
|
@@ -82,6 +86,12 @@ class CostBreakdown:
|
|
|
82
86
|
chart. ``input_usd`` is "fresh input only" (does not
|
|
83
87
|
include cache buckets); cache_read_usd / cache_creation_usd
|
|
84
88
|
are the post-discount / post-premium values.
|
|
89
|
+
language_tax_multiplier: ``tokens_accurate / tokens_heuristic``
|
|
90
|
+
for the request text (v2.6 language-tax track). 1.0 when no
|
|
91
|
+
tax is measurable (English/code, or no accurate tokenizer).
|
|
92
|
+
language_tax_usd: USD share of ``total_usd`` attributable to the
|
|
93
|
+
CJK over-count vs CodeRouter's char/4 English baseline.
|
|
94
|
+
0.0 for free / local providers. See :mod:`coderouter.language_tax`.
|
|
85
95
|
"""
|
|
86
96
|
|
|
87
97
|
total_usd: float = 0.0
|
|
@@ -90,6 +100,10 @@ class CostBreakdown:
|
|
|
90
100
|
output_usd: float = 0.0
|
|
91
101
|
cache_read_usd: float = 0.0
|
|
92
102
|
cache_creation_usd: float = 0.0
|
|
103
|
+
# v2.6 language-tax track (additive; defaults keep pre-v2.6 behaviour
|
|
104
|
+
# and equality with a bare ``CostBreakdown()``).
|
|
105
|
+
language_tax_multiplier: float = 1.0
|
|
106
|
+
language_tax_usd: float = 0.0
|
|
93
107
|
|
|
94
108
|
|
|
95
109
|
_PER_MILLION: float = 1_000_000.0
|
|
@@ -102,6 +116,7 @@ def compute_cost_for_attempt(
|
|
|
102
116
|
output_tokens: int,
|
|
103
117
|
cache_read_input_tokens: int,
|
|
104
118
|
cache_creation_input_tokens: int,
|
|
119
|
+
language_tax: LanguageTaxBreakdown | None = None,
|
|
105
120
|
) -> CostBreakdown:
|
|
106
121
|
"""Translate per-attempt token counts into a USD :class:`CostBreakdown`.
|
|
107
122
|
|
|
@@ -144,6 +159,21 @@ def compute_cost_for_attempt(
|
|
|
144
159
|
full_rate_for_cache_read = safe_read * input_rate
|
|
145
160
|
savings_usd = full_rate_for_cache_read - cache_read_usd
|
|
146
161
|
|
|
162
|
+
# v2.6 language tax: the share of fresh-input spend attributable to
|
|
163
|
+
# the CJK over-count vs the char/4 English baseline. Defaults to a
|
|
164
|
+
# 1.0 multiplier / $0 when no LanguageTaxBreakdown is supplied, so
|
|
165
|
+
# the pre-v2.6 call shape is unchanged.
|
|
166
|
+
lt_multiplier = 1.0
|
|
167
|
+
lt_usd = 0.0
|
|
168
|
+
if language_tax is not None:
|
|
169
|
+
lt_multiplier = language_tax.tax_multiplier
|
|
170
|
+
from coderouter.language_tax import language_tax_usd
|
|
171
|
+
|
|
172
|
+
lt_usd = language_tax_usd(
|
|
173
|
+
language_tax.extra_tokens,
|
|
174
|
+
input_tokens_per_million=cost_config.input_tokens_per_million,
|
|
175
|
+
)
|
|
176
|
+
|
|
147
177
|
return CostBreakdown(
|
|
148
178
|
total_usd=total_usd,
|
|
149
179
|
savings_usd=max(savings_usd, 0.0),
|
|
@@ -151,4 +181,6 @@ def compute_cost_for_attempt(
|
|
|
151
181
|
output_usd=output_usd,
|
|
152
182
|
cache_read_usd=cache_read_usd,
|
|
153
183
|
cache_creation_usd=cache_creation_usd,
|
|
184
|
+
language_tax_multiplier=lt_multiplier,
|
|
185
|
+
language_tax_usd=lt_usd,
|
|
154
186
|
)
|
|
@@ -165,6 +165,26 @@ _DASHBOARD_HTML = r"""<!doctype html>
|
|
|
165
165
|
</main>
|
|
166
166
|
|
|
167
167
|
<footer class="max-w-7xl mx-auto px-4 md:px-6 pb-8">
|
|
168
|
+
<!-- Panel: Cost & Language Tax (v2.6) -->
|
|
169
|
+
<section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4 mb-4">
|
|
170
|
+
<h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400 mb-3">Cost & Language Tax</h2>
|
|
171
|
+
<div class="grid grid-cols-3 gap-3">
|
|
172
|
+
<div class="rounded-md bg-slate-800/50 p-3">
|
|
173
|
+
<div class="text-xs text-slate-400">Total spend</div>
|
|
174
|
+
<div class="text-2xl font-semibold tabnum" data-bind="cost_total">$0.00</div>
|
|
175
|
+
</div>
|
|
176
|
+
<div class="rounded-md bg-slate-800/50 p-3">
|
|
177
|
+
<div class="text-xs text-slate-400">Cache savings</div>
|
|
178
|
+
<div class="text-2xl font-semibold tabnum text-green-400" data-bind="cost_savings">$0.00</div>
|
|
179
|
+
</div>
|
|
180
|
+
<div class="rounded-md bg-slate-800/50 p-3">
|
|
181
|
+
<div class="text-xs text-slate-400">Language tax (CJK)</div>
|
|
182
|
+
<div class="text-2xl font-semibold tabnum text-amber-400" data-bind="language_tax_total">$0.00</div>
|
|
183
|
+
<div class="text-xs text-slate-500" data-bind="language_tax_hint">no tokenizer configured</div>
|
|
184
|
+
</div>
|
|
185
|
+
</div>
|
|
186
|
+
<div id="language-tax-by-provider" class="text-xs text-slate-400 tabnum mt-3"></div>
|
|
187
|
+
</section>
|
|
168
188
|
<section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4">
|
|
169
189
|
<h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400 mb-3">Usage Mix</h2>
|
|
170
190
|
<div id="usage-bar" class="flex h-3 rounded-full overflow-hidden bg-slate-800" role="img" aria-label="usage mix"></div>
|
|
@@ -435,6 +455,27 @@ _DASHBOARD_HTML = r"""<!doctype html>
|
|
|
435
455
|
{"&": "&", "<": "<", ">": ">", '"': """, "'": "'"}[c]
|
|
436
456
|
));
|
|
437
457
|
|
|
458
|
+
// v2.6: cost + language-tax panel. The collector zero-fills these, so
|
|
459
|
+
// a fresh/local-only deployment shows $0.00 across the board.
|
|
460
|
+
const renderCostTax = (snap) => {
|
|
461
|
+
const c = snap.counters || {};
|
|
462
|
+
const usd = (x) => "$" + (Number(x) || 0).toFixed(4);
|
|
463
|
+
setBind("cost_total", usd(c.cost_total_usd_aggregate));
|
|
464
|
+
setBind("cost_savings", usd(c.cost_savings_usd_aggregate));
|
|
465
|
+
const taxTotal = Number(c.language_tax_usd_aggregate) || 0;
|
|
466
|
+
setBind("language_tax_total", usd(taxTotal));
|
|
467
|
+
setBind("language_tax_hint",
|
|
468
|
+
taxTotal > 0 ? "extra paid for CJK vs char/4 baseline"
|
|
469
|
+
: "no tax measured (set provider tokenizer_path)");
|
|
470
|
+
const byProv = c.language_tax_usd || {};
|
|
471
|
+
const el = document.getElementById("language-tax-by-provider");
|
|
472
|
+
const rows = Object.entries(byProv).filter(([, v]) => Number(v) > 0);
|
|
473
|
+
el.innerHTML = rows.length === 0 ? "" :
|
|
474
|
+
rows.map(([n, v]) =>
|
|
475
|
+
'<span class="mr-4"><span class="text-slate-500">' + escapeHTML(n) +
|
|
476
|
+
'</span> ' + usd(v) + '</span>').join("");
|
|
477
|
+
};
|
|
478
|
+
|
|
438
479
|
const renderSnapshot = (snap) => {
|
|
439
480
|
const startup = snap.startup || {};
|
|
440
481
|
const cfg = snap.config || {};
|
|
@@ -451,6 +492,7 @@ _DASHBOARD_HTML = r"""<!doctype html>
|
|
|
451
492
|
renderSparkline(snap);
|
|
452
493
|
renderRecent(snap);
|
|
453
494
|
renderUsageMix(snap);
|
|
495
|
+
renderCostTax(snap);
|
|
454
496
|
};
|
|
455
497
|
|
|
456
498
|
const renderError = (msg) => {
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""Language-tax measurement (Phase 1 PoC, 5-deps invariant).
|
|
2
|
+
|
|
3
|
+
Why this module exists
|
|
4
|
+
======================
|
|
5
|
+
|
|
6
|
+
Cloud LLM tokenizers charge CJK text far more tokens-per-character
|
|
7
|
+
than English. CodeRouter's core router uses a ``char/4`` heuristic
|
|
8
|
+
(:mod:`coderouter.token_estimation`) which is *conservative for CJK*
|
|
9
|
+
— i.e. it **under-counts** Japanese/Chinese/Korean text. That gap is
|
|
10
|
+
the "language tax": a Japanese prompt that the heuristic prices at N
|
|
11
|
+
tokens is actually billed at ~1.2-1.5x N by the cloud provider.
|
|
12
|
+
|
|
13
|
+
Local models are unaffected (no per-token billing), so the tax only
|
|
14
|
+
matters on the cloud leg. This module quantifies it so the cost
|
|
15
|
+
tracker / dashboard can surface "how much extra am I paying to work
|
|
16
|
+
in Japanese?".
|
|
17
|
+
|
|
18
|
+
Design constraints (mirrors token_estimation_accurate.py)
|
|
19
|
+
=========================================================
|
|
20
|
+
|
|
21
|
+
* **No new core dependency.** CJK detection is pure ``str`` + Unicode
|
|
22
|
+
range checks (stdlib only). The *accurate* token count is delegated
|
|
23
|
+
to :func:`coderouter.token_estimation_accurate.count_tokens`, whose
|
|
24
|
+
precise backend (HuggingFace ``tokenizers``) is the existing
|
|
25
|
+
optional ``accuracy`` extra. When that backend is absent every
|
|
26
|
+
function still returns a sane value — the tax_multiplier simply
|
|
27
|
+
collapses to 1.0 because both legs use char/4.
|
|
28
|
+
* **Local only / no network.** No tokenizer is ever downloaded; we
|
|
29
|
+
only pass through a caller-supplied local ``tokenizer.json`` path.
|
|
30
|
+
* **Leaf module.** Imports only ``token_estimation`` /
|
|
31
|
+
``token_estimation_accurate`` (both leaves), never the engine or
|
|
32
|
+
collector — keeps it trivially testable and circular-import-free.
|
|
33
|
+
|
|
34
|
+
The tax multiplier, defined
|
|
35
|
+
===========================
|
|
36
|
+
|
|
37
|
+
``tax_multiplier = tokens_accurate / tokens_heuristic``
|
|
38
|
+
|
|
39
|
+
where ``tokens_heuristic`` is the char/4 estimate (CodeRouter's
|
|
40
|
+
English-calibrated baseline) and ``tokens_accurate`` is the real
|
|
41
|
+
tokenizer count. Reading it:
|
|
42
|
+
|
|
43
|
+
* English / code text → real tokenizers land near char/4, so the
|
|
44
|
+
multiplier is ~1.0 (no tax).
|
|
45
|
+
* Japanese prose → real tokenizers emit ~0.5-1.0 tokens/char vs the
|
|
46
|
+
0.25 the heuristic assumes, so the multiplier lands ~2.0-4.0 on
|
|
47
|
+
*pure* CJK and ~1.2-1.5 on realistic mixed coding prompts (CJK
|
|
48
|
+
comments/instructions + ASCII code/identifiers).
|
|
49
|
+
|
|
50
|
+
Confidence: **MODERATE.** char/4 is itself an approximation of
|
|
51
|
+
English, so the multiplier is "tax relative to CodeRouter's own
|
|
52
|
+
English baseline", not a lab-grade JA-vs-EN figure. It is, however,
|
|
53
|
+
fully measurable with zero network and no guessing — which is why we
|
|
54
|
+
prefer it to a translate-and-compare counterfactual.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
from __future__ import annotations
|
|
58
|
+
|
|
59
|
+
from dataclasses import dataclass
|
|
60
|
+
from pathlib import Path
|
|
61
|
+
from typing import Any
|
|
62
|
+
|
|
63
|
+
from coderouter.token_estimation import (
|
|
64
|
+
CHARS_PER_TOKEN_HEURISTIC,
|
|
65
|
+
extract_text_from_anthropic_request,
|
|
66
|
+
)
|
|
67
|
+
from coderouter.token_estimation_accurate import count_tokens
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# CJK Unicode ranges
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
#
|
|
73
|
+
# We count a character as "CJK" when it falls in one of the blocks that
|
|
74
|
+
# real tokenizers fragment heavily. Latin, digits, punctuation and
|
|
75
|
+
# whitespace are excluded so that an ASCII-only prompt scores 0.0 and a
|
|
76
|
+
# pure-Japanese prompt scores ~1.0. Half-width katakana and full-width
|
|
77
|
+
# forms are included because they tokenize like their full-width kin.
|
|
78
|
+
#
|
|
79
|
+
# Ranges are (low, high) inclusive code points.
|
|
80
|
+
_CJK_RANGES: tuple[tuple[int, int], ...] = (
|
|
81
|
+
(0x3040, 0x309F), # Hiragana
|
|
82
|
+
(0x30A0, 0x30FF), # Katakana
|
|
83
|
+
(0x3400, 0x4DBF), # CJK Unified Ideographs Extension A
|
|
84
|
+
(0x4E00, 0x9FFF), # CJK Unified Ideographs (common Kanji/Hanzi)
|
|
85
|
+
(0xF900, 0xFAFF), # CJK Compatibility Ideographs
|
|
86
|
+
(0xFF00, 0xFFEF), # Half/Full-width forms (full-width punct, half kana)
|
|
87
|
+
(0x3000, 0x303F), # CJK symbols & punctuation (、。「」etc.)
|
|
88
|
+
(0xAC00, 0xD7A3), # Hangul syllables (Korean)
|
|
89
|
+
(0x1100, 0x11FF), # Hangul Jamo
|
|
90
|
+
(0x20000, 0x2A6DF), # CJK Ext. B (rare ideographs)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _is_cjk(cp: int) -> bool:
|
|
95
|
+
return any(low <= cp <= high for low, high in _CJK_RANGES)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
# Public API
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def cjk_char_ratio(text: str) -> float:
|
|
104
|
+
"""Fraction of *non-whitespace* characters in ``text`` that are CJK.
|
|
105
|
+
|
|
106
|
+
Whitespace is excluded from the denominator so that indentation /
|
|
107
|
+
blank lines in a code block don't dilute the score. Returns ``0.0``
|
|
108
|
+
for empty or whitespace-only / pure-ASCII text and ``1.0`` for pure
|
|
109
|
+
CJK. The value feeds the Phase-2 ``cjk_ratio_min`` auto-route
|
|
110
|
+
matcher and the Phase-1 reporting below.
|
|
111
|
+
"""
|
|
112
|
+
if not text:
|
|
113
|
+
return 0.0
|
|
114
|
+
cjk = 0
|
|
115
|
+
total = 0
|
|
116
|
+
for ch in text:
|
|
117
|
+
if ch.isspace():
|
|
118
|
+
continue
|
|
119
|
+
total += 1
|
|
120
|
+
if _is_cjk(ord(ch)):
|
|
121
|
+
cjk += 1
|
|
122
|
+
if total == 0:
|
|
123
|
+
return 0.0
|
|
124
|
+
return cjk / total
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass(frozen=True)
|
|
128
|
+
class LanguageTaxBreakdown:
|
|
129
|
+
"""Per-text language-tax measurement.
|
|
130
|
+
|
|
131
|
+
Fields
|
|
132
|
+
char_count: non-whitespace-inclusive length of the text.
|
|
133
|
+
cjk_ratio: see :func:`cjk_char_ratio` (0.0-1.0).
|
|
134
|
+
tokens_heuristic: char/4 estimate (CodeRouter's English
|
|
135
|
+
baseline). Always available.
|
|
136
|
+
tokens_accurate: real tokenizer count when a ``tokenizer_path``
|
|
137
|
+
was supplied *and* the optional backend is installed;
|
|
138
|
+
otherwise equals ``tokens_heuristic`` (graceful fallback).
|
|
139
|
+
accurate_available: whether ``tokens_accurate`` came from the
|
|
140
|
+
precise backend (True) or fell back to char/4 (False).
|
|
141
|
+
tax_multiplier: ``tokens_accurate / tokens_heuristic``; 1.0
|
|
142
|
+
when no tax is measurable. See module docstring for the
|
|
143
|
+
MODERATE-confidence caveat.
|
|
144
|
+
extra_tokens: ``tokens_accurate - tokens_heuristic`` (>= 0 for
|
|
145
|
+
CJK; the visible "tax" in tokens).
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
char_count: int = 0
|
|
149
|
+
cjk_ratio: float = 0.0
|
|
150
|
+
tokens_heuristic: int = 0
|
|
151
|
+
tokens_accurate: int = 0
|
|
152
|
+
accurate_available: bool = False
|
|
153
|
+
tax_multiplier: float = 1.0
|
|
154
|
+
extra_tokens: int = 0
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def estimate_language_tax(
|
|
158
|
+
text: str,
|
|
159
|
+
*,
|
|
160
|
+
tokenizer_path: str | Path | None = None,
|
|
161
|
+
) -> LanguageTaxBreakdown:
|
|
162
|
+
"""Measure the language tax of ``text``.
|
|
163
|
+
|
|
164
|
+
With ``tokenizer_path`` pointing at a readable local
|
|
165
|
+
``tokenizer.json`` (and the ``accuracy`` extra installed), the
|
|
166
|
+
accurate leg uses the real tokenizer and the multiplier reflects
|
|
167
|
+
the true char/4 under-count. Without it, both legs use char/4 and
|
|
168
|
+
the multiplier is 1.0 — the function never raises and never
|
|
169
|
+
touches the network.
|
|
170
|
+
"""
|
|
171
|
+
if not text:
|
|
172
|
+
return LanguageTaxBreakdown()
|
|
173
|
+
|
|
174
|
+
heuristic = len(text) // CHARS_PER_TOKEN_HEURISTIC
|
|
175
|
+
accurate_raw = count_tokens(text, tokenizer_path=tokenizer_path)
|
|
176
|
+
|
|
177
|
+
# When the precise backend is unavailable, count_tokens returns the
|
|
178
|
+
# same char/4 value, so accurate == heuristic and we report no tax.
|
|
179
|
+
accurate_available = tokenizer_path is not None and accurate_raw != heuristic
|
|
180
|
+
|
|
181
|
+
# Guard against a zero-heuristic (text shorter than 4 chars) to keep
|
|
182
|
+
# the multiplier finite and meaningful.
|
|
183
|
+
if heuristic <= 0:
|
|
184
|
+
multiplier = 1.0
|
|
185
|
+
extra = max(accurate_raw - 0, 0)
|
|
186
|
+
else:
|
|
187
|
+
multiplier = accurate_raw / heuristic
|
|
188
|
+
extra = accurate_raw - heuristic
|
|
189
|
+
|
|
190
|
+
return LanguageTaxBreakdown(
|
|
191
|
+
char_count=len(text),
|
|
192
|
+
cjk_ratio=cjk_char_ratio(text),
|
|
193
|
+
tokens_heuristic=heuristic,
|
|
194
|
+
tokens_accurate=accurate_raw,
|
|
195
|
+
accurate_available=accurate_available,
|
|
196
|
+
tax_multiplier=multiplier,
|
|
197
|
+
extra_tokens=max(extra, 0),
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def language_tax_usd(
|
|
202
|
+
extra_tokens: int,
|
|
203
|
+
*,
|
|
204
|
+
input_tokens_per_million: float | None,
|
|
205
|
+
) -> float:
|
|
206
|
+
"""USD attributable to the language tax for one request leg.
|
|
207
|
+
|
|
208
|
+
``extra_tokens`` is the :attr:`LanguageTaxBreakdown.extra_tokens`
|
|
209
|
+
delta; pricing is the provider's normal input rate. Returns 0.0 for
|
|
210
|
+
a free / unpriced (typically local) provider — mirroring
|
|
211
|
+
:func:`coderouter.cost.compute_cost_for_attempt`'s zero-on-None
|
|
212
|
+
behaviour so callers never special-case local models.
|
|
213
|
+
"""
|
|
214
|
+
if not input_tokens_per_million or extra_tokens <= 0:
|
|
215
|
+
return 0.0
|
|
216
|
+
return extra_tokens * (input_tokens_per_million / 1_000_000.0)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def estimate_language_tax_for_request(
|
|
220
|
+
system: Any,
|
|
221
|
+
messages: list[Any],
|
|
222
|
+
*,
|
|
223
|
+
tokenizer_path: str | Path | None = None,
|
|
224
|
+
) -> LanguageTaxBreakdown:
|
|
225
|
+
"""Measure the language tax of a whole Anthropic-shaped request.
|
|
226
|
+
|
|
227
|
+
Convenience wrapper used by the engine's cost-emit path: pulls the
|
|
228
|
+
concatenated request text (system + message text blocks) and runs it
|
|
229
|
+
through :func:`estimate_language_tax`. With no ``tokenizer_path`` the
|
|
230
|
+
multiplier is 1.0 (inert), so calling this on every request is safe
|
|
231
|
+
and cheap — the engine only invokes it when a provider declares a
|
|
232
|
+
local ``tokenizer.json``.
|
|
233
|
+
"""
|
|
234
|
+
text = extract_text_from_anthropic_request(system=system, messages=messages)
|
|
235
|
+
return estimate_language_tax(text, tokenizer_path=tokenizer_path)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
__all__ = [
|
|
239
|
+
"LanguageTaxBreakdown",
|
|
240
|
+
"cjk_char_ratio",
|
|
241
|
+
"estimate_language_tax",
|
|
242
|
+
"estimate_language_tax_for_request",
|
|
243
|
+
"language_tax_usd",
|
|
244
|
+
]
|
|
@@ -971,6 +971,10 @@ class CacheObservedPayload(TypedDict):
|
|
|
971
971
|
streaming: bool
|
|
972
972
|
cost_usd: float
|
|
973
973
|
cost_savings_usd: float
|
|
974
|
+
# v2.6 language-tax track (optional; default 0.0 / 1.0 at the emit
|
|
975
|
+
# site keeps pre-v2.6 callers and log consumers working unchanged).
|
|
976
|
+
language_tax_usd: float
|
|
977
|
+
language_tax_multiplier: float
|
|
974
978
|
|
|
975
979
|
|
|
976
980
|
def log_cache_observed(
|
|
@@ -986,6 +990,8 @@ def log_cache_observed(
|
|
|
986
990
|
streaming: bool,
|
|
987
991
|
cost_usd: float = 0.0,
|
|
988
992
|
cost_savings_usd: float = 0.0,
|
|
993
|
+
language_tax_usd: float = 0.0,
|
|
994
|
+
language_tax_multiplier: float = 1.0,
|
|
989
995
|
) -> None:
|
|
990
996
|
"""Emit a ``cache-observed`` info record with the unified shape.
|
|
991
997
|
|
|
@@ -1013,6 +1019,8 @@ def log_cache_observed(
|
|
|
1013
1019
|
"streaming": streaming,
|
|
1014
1020
|
"cost_usd": cost_usd,
|
|
1015
1021
|
"cost_savings_usd": cost_savings_usd,
|
|
1022
|
+
"language_tax_usd": language_tax_usd,
|
|
1023
|
+
"language_tax_multiplier": language_tax_multiplier,
|
|
1016
1024
|
}
|
|
1017
1025
|
logger.info("cache-observed", extra=payload)
|
|
1018
1026
|
|
|
@@ -190,6 +190,13 @@ class MetricsCollector(logging.Handler):
|
|
|
190
190
|
self._cost_total_usd_aggregate: float = 0.0
|
|
191
191
|
self._cost_savings_usd_aggregate: float = 0.0
|
|
192
192
|
|
|
193
|
+
# v2.6: per-provider language-tax spend — the USD share of input
|
|
194
|
+
# cost attributable to the CJK over-count vs the char/4 baseline.
|
|
195
|
+
# Zero for English/code workloads and for providers without a
|
|
196
|
+
# configured tokenizer_path. Surfaced alongside cost_total_usd.
|
|
197
|
+
self._language_tax_usd: dict[str, float] = {}
|
|
198
|
+
self._language_tax_usd_aggregate: float = 0.0
|
|
199
|
+
|
|
193
200
|
# v2.0-F (L1): context budget guard counters. Per-profile counts
|
|
194
201
|
# of warnings (over warn threshold) and trims (messages removed).
|
|
195
202
|
# The ``latest_usage_ratio`` dict records the most recent ratio
|
|
@@ -388,6 +395,22 @@ class MetricsCollector(logging.Handler):
|
|
|
388
395
|
self._cost_savings_usd.get(provider, 0.0) + savings_usd
|
|
389
396
|
)
|
|
390
397
|
self._cost_savings_usd_aggregate += savings_usd
|
|
398
|
+
|
|
399
|
+
# v2.6: language-tax spend. Same defensive coercion as the
|
|
400
|
+
# cost fields; defaults to 0.0 for pre-v2.6 log lines and
|
|
401
|
+
# English/code traffic, so the aggregate only moves on
|
|
402
|
+
# CJK-heavy requests against a tokenizer-configured provider.
|
|
403
|
+
lt_usd_raw = extras.get("language_tax_usd", 0.0)
|
|
404
|
+
lt_usd = (
|
|
405
|
+
float(lt_usd_raw)
|
|
406
|
+
if isinstance(lt_usd_raw, int | float)
|
|
407
|
+
else 0.0
|
|
408
|
+
)
|
|
409
|
+
if lt_usd > 0.0:
|
|
410
|
+
self._language_tax_usd[provider] = (
|
|
411
|
+
self._language_tax_usd.get(provider, 0.0) + lt_usd
|
|
412
|
+
)
|
|
413
|
+
self._language_tax_usd_aggregate += lt_usd
|
|
391
414
|
elif event == "context-budget-warning":
|
|
392
415
|
# v2.0-F (L1): context usage exceeded the warn threshold.
|
|
393
416
|
# Track per-profile and aggregate, plus latest ratio gauge.
|
|
@@ -522,6 +545,10 @@ class MetricsCollector(logging.Handler):
|
|
|
522
545
|
"savings_usd": round(
|
|
523
546
|
self._cost_savings_usd.get(name, 0.0), 6
|
|
524
547
|
),
|
|
548
|
+
# v2.6: per-provider language-tax spend.
|
|
549
|
+
"language_tax_usd": round(
|
|
550
|
+
self._language_tax_usd.get(name, 0.0), 6
|
|
551
|
+
),
|
|
525
552
|
},
|
|
526
553
|
}
|
|
527
554
|
for name in providers
|
|
@@ -589,6 +616,14 @@ class MetricsCollector(logging.Handler):
|
|
|
589
616
|
"cost_savings_usd_aggregate": round(
|
|
590
617
|
self._cost_savings_usd_aggregate, 6
|
|
591
618
|
),
|
|
619
|
+
# v2.6: per-provider + aggregate language-tax spend.
|
|
620
|
+
"language_tax_usd": {
|
|
621
|
+
n: round(v, 6)
|
|
622
|
+
for n, v in self._language_tax_usd.items()
|
|
623
|
+
},
|
|
624
|
+
"language_tax_usd_aggregate": round(
|
|
625
|
+
self._language_tax_usd_aggregate, 6
|
|
626
|
+
),
|
|
592
627
|
# v2.0-F (L1): context budget guard aggregate counters.
|
|
593
628
|
"context_budget_warnings_total": self._context_budget_warnings_total,
|
|
594
629
|
"context_budget_trims_total": self._context_budget_trims_total,
|
|
@@ -682,6 +717,13 @@ class MetricsCollector(logging.Handler):
|
|
|
682
717
|
self._cost_savings_usd_aggregate += float(
|
|
683
718
|
state.get("cost_savings_usd_aggregate", 0.0)
|
|
684
719
|
)
|
|
720
|
+
for k, v in (state.get("language_tax_usd") or {}).items():
|
|
721
|
+
self._language_tax_usd[k] = (
|
|
722
|
+
self._language_tax_usd.get(k, 0.0) + float(v)
|
|
723
|
+
)
|
|
724
|
+
self._language_tax_usd_aggregate += float(
|
|
725
|
+
state.get("language_tax_usd_aggregate", 0.0)
|
|
726
|
+
)
|
|
685
727
|
self._chain_paid_gate_blocked_total += int(
|
|
686
728
|
state.get("chain_paid_gate_blocked_total", 0)
|
|
687
729
|
)
|
|
@@ -737,6 +779,9 @@ class MetricsCollector(logging.Handler):
|
|
|
737
779
|
self._cost_savings_usd.clear()
|
|
738
780
|
self._cost_total_usd_aggregate = 0.0
|
|
739
781
|
self._cost_savings_usd_aggregate = 0.0
|
|
782
|
+
# v2.6
|
|
783
|
+
self._language_tax_usd.clear()
|
|
784
|
+
self._language_tax_usd_aggregate = 0.0
|
|
740
785
|
# v2.0-H (L6)
|
|
741
786
|
self._partial_stitch_surfaced_total = 0
|
|
742
787
|
# v2.0-I
|
|
@@ -39,6 +39,7 @@ import re
|
|
|
39
39
|
from typing import TYPE_CHECKING, Any
|
|
40
40
|
|
|
41
41
|
from coderouter.config.schemas import AutoRouterConfig, AutoRouteRule, RuleMatcher
|
|
42
|
+
from coderouter.language_tax import cjk_char_ratio
|
|
42
43
|
from coderouter.token_estimation import estimate_tokens_from_body as _estimate_total_tokens
|
|
43
44
|
|
|
44
45
|
if TYPE_CHECKING:
|
|
@@ -181,6 +182,12 @@ def _match_rule(
|
|
|
181
182
|
return message is not None and _has_image(message)
|
|
182
183
|
if m.code_fence_ratio_min is not None:
|
|
183
184
|
return _code_fence_ratio(text) >= m.code_fence_ratio_min
|
|
185
|
+
if m.cjk_ratio_min is not None:
|
|
186
|
+
# v2.6: language-tax routing. CJK ratio of the latest user
|
|
187
|
+
# message — a per-turn property like code_fence_ratio_min, so it
|
|
188
|
+
# reuses ``text`` (latest user message) rather than walking the
|
|
189
|
+
# whole request. Steers CJK-heavy turns to a local, tax-free model.
|
|
190
|
+
return cjk_char_ratio(text) >= m.cjk_ratio_min
|
|
184
191
|
if m.content_contains is not None:
|
|
185
192
|
return m.content_contains in text
|
|
186
193
|
if m.content_regex is not None:
|