runtimerouter 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ """
2
+ RuntimeRouter — LLM routing library for the Python AI ecosystem.
3
+
4
+ Optimize the entire AI session, not just the next model call.
5
+ """
6
+
7
+ from runtimerouter.autollm import AutoLLM
8
+ from runtimerouter.router import Router
9
+ from runtimerouter.types import (
10
+ ComplexityLevel,
11
+ ModelCandidate,
12
+ RouteContext,
13
+ RouteDecision,
14
+ RouterConfig,
15
+ )
16
+
17
+ __version__ = "0.1.0"
18
+
19
+ __all__ = [
20
+ "AutoLLM",
21
+ "Router",
22
+ "ComplexityLevel",
23
+ "ModelCandidate",
24
+ "RouteContext",
25
+ "RouteDecision",
26
+ "RouterConfig",
27
+ "__version__",
28
+ ]
@@ -0,0 +1,110 @@
1
+ """
2
+ AutoLLM — the primary user-facing entry point.
3
+
4
+ Responsibility:
5
+ - Accept user prompts/messages
6
+ - Delegate routing to Router
7
+ - Delegate execution to LiteLLM integration
8
+ - Return unified responses
9
+
10
+ Boundary: AutoLLM orchestrates; it does NOT implement routing policies.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import Any
16
+
17
+ from runtimerouter.integrations.litellm import LiteLLMIntegration
18
+ from runtimerouter.router import Router
19
+ from runtimerouter.types import RouteContext, RouterConfig
20
+
21
+
22
+ class AutoLLM:
23
+ """
24
+ Automatic model selection and invocation.
25
+
26
+ Usage::
27
+
28
+ from runtimerouter import AutoLLM
29
+
30
+ llm = AutoLLM()
31
+ response = llm.invoke("帮我分析整个代码仓库")
32
+
33
+ The router selects an appropriate model; the user never specifies one.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ config: RouterConfig | None = None,
39
+ router: Router | None = None,
40
+ integration: LiteLLMIntegration | None = None,
41
+ ) -> None:
42
+ self.config = config or RouterConfig()
43
+ self.router = router or Router(config=self.config)
44
+ self.integration = integration or LiteLLMIntegration(config=self.config)
45
+
46
+ def invoke(
47
+ self,
48
+ prompt: str,
49
+ *,
50
+ messages: list[dict[str, Any]] | None = None,
51
+ **kwargs: Any,
52
+ ) -> Any:
53
+ """
54
+ Route the request to the best model and return the LLM response.
55
+
56
+ Args:
57
+ prompt: User prompt text.
58
+ messages: Optional chat history in OpenAI message format.
59
+ **kwargs: Passed through to the LiteLLM integration.
60
+
61
+ Returns:
62
+ LiteLLM response object (shape depends on LiteLLM version).
63
+
64
+ Raises:
65
+ RoutingError: No suitable model found.
66
+ IntegrationError: LiteLLM call failed.
67
+ """
68
+ context = RouteContext(
69
+ prompt=prompt,
70
+ messages=messages or [{"role": "user", "content": prompt}],
71
+ )
72
+ decision = self.router.route(context)
73
+ return self.integration.completion(
74
+ model=decision.selected_model,
75
+ messages=context.messages,
76
+ route_decision=decision,
77
+ **kwargs,
78
+ )
79
+
80
+ async def ainvoke(
81
+ self,
82
+ prompt: str,
83
+ *,
84
+ messages: list[dict[str, Any]] | None = None,
85
+ **kwargs: Any,
86
+ ) -> Any:
87
+ """Async variant of :meth:`invoke`."""
88
+ context = RouteContext(
89
+ prompt=prompt,
90
+ messages=messages or [{"role": "user", "content": prompt}],
91
+ )
92
+ decision = self.router.route(context)
93
+ return await self.integration.acompletion(
94
+ model=decision.selected_model,
95
+ messages=context.messages,
96
+ route_decision=decision,
97
+ **kwargs,
98
+ )
99
+
100
+ def route_only(self, prompt: str) -> Any:
101
+ """
102
+ Return routing decision without invoking the model.
103
+
104
+ Useful for debugging and observability.
105
+ """
106
+ context = RouteContext(
107
+ prompt=prompt,
108
+ messages=[{"role": "user", "content": prompt}],
109
+ )
110
+ return self.router.route(context)
@@ -0,0 +1,62 @@
1
+ """
2
+ TaskClassifier — enriches RouteContext before policy evaluation.
3
+
4
+ Responsibility (v0.1):
5
+ - Estimate task complexity (ComplexityLevel)
6
+ - Optionally estimate input token count
7
+
8
+ Boundary: Classification heuristics belong here, NOT in policies or Router.
9
+ Implementation of classification algorithms is deferred to future PRs.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from runtimerouter.types import ComplexityLevel, RouteContext
15
+
16
+
17
+ class TaskClassifier:
18
+ """
19
+ Enriches routing context with task metadata.
20
+
21
+ v0.1 ships the interface only. Concrete classification logic
22
+ (rule-based, embedding-based, or LLM-as-judge) will be added
23
+ in subsequent releases.
24
+ """
25
+
26
+ def enrich(self, context: RouteContext) -> RouteContext:
27
+ """
28
+ Return a copy of context with complexity and token estimates filled in.
29
+
30
+ If complexity is already set on the input context, it is preserved.
31
+ """
32
+ complexity = context.complexity or self._classify_complexity(context)
33
+ tokens = context.estimated_input_tokens or self._estimate_tokens(context)
34
+
35
+ return context.model_copy(
36
+ update={
37
+ "complexity": complexity,
38
+ "estimated_input_tokens": tokens,
39
+ }
40
+ )
41
+
42
+ def _classify_complexity(self, context: RouteContext) -> ComplexityLevel:
43
+ """
44
+ Classify task complexity.
45
+
46
+ TODO(v0.1): Implement heuristic or model-based classification.
47
+ Default stub returns MODERATE for all requests.
48
+ """
49
+ _ = context
50
+ return ComplexityLevel.MODERATE
51
+
52
+ def _estimate_tokens(self, context: RouteContext) -> int:
53
+ """
54
+ Estimate input token count from prompt/messages.
55
+
56
+ TODO(v0.1): Integrate tiktoken or LiteLLM token counter.
57
+ Default stub uses a rough character-based estimate.
58
+ """
59
+ text = context.prompt or ""
60
+ if context.messages:
61
+ text = " ".join(str(m.get("content", "")) for m in context.messages)
62
+ return max(1, len(text) // 4)
@@ -0,0 +1,38 @@
1
+ """
2
+ Configuration loading utilities.
3
+
4
+ Responsibility: load RouterConfig from env, dict, or YAML/JSON files.
5
+ Does NOT contain routing logic.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from typing import Any
12
+
13
+ from runtimerouter.types import RouterConfig
14
+
15
+
16
+ def load_config_from_env(prefix: str = "RUNTIMEROUTER_") -> RouterConfig:
17
+ """
18
+ Build RouterConfig from environment variables.
19
+
20
+ Supported variables (v0.1):
21
+ - RUNTIMEROUTER_FALLBACK_MODEL
22
+ - RUNTIMEROUTER_ENABLE_COMPLEXITY_ROUTING (true/false)
23
+ - RUNTIMEROUTER_ENABLE_COST_ROUTING (true/false)
24
+ """
25
+ fallback = os.getenv(f"{prefix}FALLBACK_MODEL")
26
+ enable_complexity = os.getenv(f"{prefix}ENABLE_COMPLEXITY_ROUTING", "true").lower() == "true"
27
+ enable_cost = os.getenv(f"{prefix}ENABLE_COST_ROUTING", "true").lower() == "true"
28
+
29
+ return RouterConfig(
30
+ fallback_model=fallback,
31
+ enable_complexity_routing=enable_complexity,
32
+ enable_cost_routing=enable_cost,
33
+ )
34
+
35
+
36
+ def load_config_from_dict(data: dict[str, Any]) -> RouterConfig:
37
+ """Build RouterConfig from a plain dictionary."""
38
+ return RouterConfig.model_validate(data)
@@ -0,0 +1,25 @@
1
+ """RuntimeRouter exception hierarchy."""
2
+
3
+
4
+ class RuntimeRouterError(Exception):
5
+ """Base exception for all RuntimeRouter errors."""
6
+
7
+
8
+ class RoutingError(RuntimeRouterError):
9
+ """Raised when no suitable model can be selected."""
10
+
11
+
12
+ class PolicyError(RuntimeRouterError):
13
+ """Raised when a routing policy fails or returns invalid output."""
14
+
15
+
16
+ class ClassifierError(RuntimeRouterError):
17
+ """Raised when task complexity classification fails."""
18
+
19
+
20
+ class ProviderError(RuntimeRouterError):
21
+ """Raised when a model provider is unavailable or misconfigured."""
22
+
23
+
24
+ class IntegrationError(RuntimeRouterError):
25
+ """Raised when an external integration (e.g. LiteLLM) fails."""
@@ -0,0 +1,10 @@
1
+ """
2
+ External integrations — adapters for third-party execution layers.
3
+
4
+ v0.1: LiteLLM integration only.
5
+ Future: LangGraph, PydanticAI, CrewAI, AutoGen hooks.
6
+ """
7
+
8
+ from runtimerouter.integrations.litellm import LiteLLMIntegration
9
+
10
+ __all__ = ["LiteLLMIntegration"]
@@ -0,0 +1,74 @@
1
+ """
2
+ LiteLLM integration — unified model execution layer.
3
+
4
+ Responsibility:
5
+ - Execute completion/acompletion via LiteLLM
6
+ - Attach RouteDecision metadata for observability
7
+ - Pass through RouterConfig.litellm_kwargs
8
+
9
+ Boundary: This module calls LiteLLM; it does NOT decide which model to use.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any
15
+
16
+ from runtimerouter.exceptions import IntegrationError
17
+ from runtimerouter.types import RouteDecision, RouterConfig
18
+
19
+
20
+ class LiteLLMIntegration:
21
+ """Thin wrapper around LiteLLM for routed completions."""
22
+
23
+ def __init__(self, config: RouterConfig | None = None) -> None:
24
+ self.config = config or RouterConfig()
25
+
26
+ def completion(
27
+ self,
28
+ model: str,
29
+ messages: list[dict[str, Any]],
30
+ route_decision: RouteDecision | None = None,
31
+ **kwargs: Any,
32
+ ) -> Any:
33
+ """
34
+ Synchronous completion via LiteLLM.
35
+
36
+ TODO(v0.1): Wire up litellm.completion with metadata tags.
37
+ """
38
+ try:
39
+ import litellm
40
+ except ImportError as exc:
41
+ raise IntegrationError(
42
+ "litellm is required. Install with: pip install runtimerouter"
43
+ ) from exc
44
+
45
+ merged_kwargs = {**self.config.litellm_kwargs, **kwargs}
46
+ metadata = merged_kwargs.setdefault("metadata", {})
47
+ if route_decision is not None:
48
+ metadata["runtimerouter_policy"] = route_decision.policy_name
49
+ metadata["runtimerouter_reason"] = route_decision.reason
50
+
51
+ return litellm.completion(model=model, messages=messages, **merged_kwargs)
52
+
53
+ async def acompletion(
54
+ self,
55
+ model: str,
56
+ messages: list[dict[str, Any]],
57
+ route_decision: RouteDecision | None = None,
58
+ **kwargs: Any,
59
+ ) -> Any:
60
+ """Async completion via LiteLLM."""
61
+ try:
62
+ import litellm
63
+ except ImportError as exc:
64
+ raise IntegrationError(
65
+ "litellm is required. Install with: pip install runtimerouter"
66
+ ) from exc
67
+
68
+ merged_kwargs = {**self.config.litellm_kwargs, **kwargs}
69
+ metadata = merged_kwargs.setdefault("metadata", {})
70
+ if route_decision is not None:
71
+ metadata["runtimerouter_policy"] = route_decision.policy_name
72
+ metadata["runtimerouter_reason"] = route_decision.reason
73
+
74
+ return await litellm.acompletion(model=model, messages=messages, **merged_kwargs)
@@ -0,0 +1,17 @@
1
+ """
2
+ Routing policies — pluggable decision modules.
3
+
4
+ Each policy receives an enriched RouteContext and a list of ModelCandidates,
5
+ and returns a filtered/reordered subset. Policies are composable and ordered
6
+ by Router according to RouterConfig.policy_order.
7
+ """
8
+
9
+ from runtimerouter.policies.base import RoutingPolicy
10
+ from runtimerouter.policies.complexity import ComplexityPolicy
11
+ from runtimerouter.policies.cost import CostPolicy
12
+
13
+ __all__ = [
14
+ "RoutingPolicy",
15
+ "ComplexityPolicy",
16
+ "CostPolicy",
17
+ ]
@@ -0,0 +1,40 @@
1
+ """
2
+ Base routing policy interface.
3
+
4
+ All policies must implement apply(). Policies are stateless by default;
5
+ configuration is injected via RouterConfig at construction time.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from abc import ABC, abstractmethod
11
+
12
+ from runtimerouter.types import ModelCandidate, RouteContext, RouterConfig
13
+
14
+
15
+ class RoutingPolicy(ABC):
16
+ """Abstract base class for routing policies."""
17
+
18
+ name: str = "base"
19
+
20
+ def __init__(self, config: RouterConfig | None = None) -> None:
21
+ self.config = config or RouterConfig()
22
+
23
+ @abstractmethod
24
+ def apply(
25
+ self,
26
+ context: RouteContext,
27
+ candidates: list[ModelCandidate],
28
+ ) -> list[ModelCandidate]:
29
+ """
30
+ Filter or reorder model candidates based on context.
31
+
32
+ Args:
33
+ context: Enriched routing context from TaskClassifier.
34
+ candidates: Current list of viable models.
35
+
36
+ Returns:
37
+ Subset (or reordered list) of candidates. Empty list means
38
+ no model satisfies this policy's constraints.
39
+ """
40
+ ...
@@ -0,0 +1,47 @@
1
+ """
2
+ ComplexityPolicy — routes requests to models matched to task complexity.
3
+
4
+ Maps ComplexityLevel → preferred model tiers, e.g.:
5
+ - TRIVIAL/SIMPLE → fast, cheap models (gpt-4o-mini, gemini-flash)
6
+ - MODERATE → balanced models (gpt-4o, claude-sonnet)
7
+ - COMPLEX/EXPERT → frontier models (claude-opus, o1)
8
+
9
+ Boundary: Complexity mapping tables live here. Classification is in classifier.py.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from runtimerouter.policies.base import RoutingPolicy
15
+ from runtimerouter.types import ComplexityLevel, ModelCandidate, RouteContext
16
+
17
+
18
+ # Complexity → capability tags expected on ModelCandidate.capabilities
19
+ _COMPLEXITY_CAPABILITY_MAP: dict[ComplexityLevel, list[str]] = {
20
+ ComplexityLevel.TRIVIAL: ["fast", "cheap"],
21
+ ComplexityLevel.SIMPLE: ["fast", "cheap"],
22
+ ComplexityLevel.MODERATE: ["balanced"],
23
+ ComplexityLevel.COMPLEX: ["frontier", "reasoning"],
24
+ ComplexityLevel.EXPERT: ["frontier", "reasoning"],
25
+ }
26
+
27
+
28
+ class ComplexityPolicy(RoutingPolicy):
29
+ """Select models appropriate for the detected task complexity."""
30
+
31
+ name = "complexity"
32
+
33
+ def apply(
34
+ self,
35
+ context: RouteContext,
36
+ candidates: list[ModelCandidate],
37
+ ) -> list[ModelCandidate]:
38
+ """
39
+ Filter candidates by complexity-matched capabilities.
40
+
41
+ TODO(v0.1): Implement capability matching and tier fallback.
42
+ Current stub passes candidates through unchanged.
43
+ """
44
+ _complexity = context.complexity or ComplexityLevel.MODERATE
45
+ _expected = _COMPLEXITY_CAPABILITY_MAP.get(_complexity, ["balanced"])
46
+ _ = _expected
47
+ return candidates
@@ -0,0 +1,41 @@
1
+ """
2
+ CostPolicy — routes requests based on budget and token cost estimates.
3
+
4
+ Considers:
5
+ - context.budget_usd (per-request budget cap)
6
+ - ModelCandidate.input_cost_per_1k / output_cost_per_1k
7
+ - context.estimated_input_tokens
8
+
9
+ Boundary: Cost calculation and ranking logic belongs here, not in Router.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from runtimerouter.policies.base import RoutingPolicy
15
+ from runtimerouter.types import ModelCandidate, RouteContext
16
+
17
+
18
+ class CostPolicy(RoutingPolicy):
19
+ """Select the most cost-effective model that meets quality constraints."""
20
+
21
+ name = "cost"
22
+
23
+ def apply(
24
+ self,
25
+ context: RouteContext,
26
+ candidates: list[ModelCandidate],
27
+ ) -> list[ModelCandidate]:
28
+ """
29
+ Filter/rank candidates by estimated cost.
30
+
31
+ TODO(v0.1): Implement cost estimation and budget filtering.
32
+ Current stub returns candidates sorted by input cost (None last).
33
+ """
34
+ if context.budget_usd is not None:
35
+ # TODO: filter candidates exceeding budget
36
+ pass
37
+
38
+ return sorted(
39
+ candidates,
40
+ key=lambda c: (c.input_cost_per_1k is None, c.input_cost_per_1k or float("inf")),
41
+ )
@@ -0,0 +1,15 @@
1
+ """
2
+ Model providers — registry and provider abstractions.
3
+
4
+ Providers represent upstream LLM backends (OpenAI, Anthropic, etc.).
5
+ The registry holds ModelCandidate metadata used during routing.
6
+ Actual API calls go through integrations/litellm.py.
7
+ """
8
+
9
+ from runtimerouter.providers.base import ModelProvider
10
+ from runtimerouter.providers.registry import ModelRegistry
11
+
12
+ __all__ = [
13
+ "ModelProvider",
14
+ "ModelRegistry",
15
+ ]
@@ -0,0 +1,28 @@
1
+ """
2
+ ModelProvider — abstract interface for provider-specific metadata.
3
+
4
+ v0.1 uses LiteLLM as the unified execution layer; providers here supply
5
+ model catalog metadata rather than direct API calls.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from abc import ABC, abstractmethod
11
+
12
+ from runtimerouter.types import ModelCandidate
13
+
14
+
15
+ class ModelProvider(ABC):
16
+ """Abstract provider for model catalog entries."""
17
+
18
+ name: str = "base"
19
+
20
+ @abstractmethod
21
+ def list_models(self) -> list[ModelCandidate]:
22
+ """Return models offered by this provider."""
23
+ ...
24
+
25
+ @abstractmethod
26
+ def resolve_model_id(self, alias: str) -> str:
27
+ """Resolve a friendly alias to a LiteLLM model identifier."""
28
+ ...
@@ -0,0 +1,74 @@
1
+ """
2
+ ModelRegistry — central catalog of routable models.
3
+
4
+ Default v0.1 catalog includes common models across major providers.
5
+ Users can extend via RouterConfig.default_models or register() at runtime.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from runtimerouter.types import ModelCandidate
11
+
12
+ _DEFAULT_MODELS: list[ModelCandidate] = [
13
+ ModelCandidate(
14
+ model_id="gpt-4o-mini",
15
+ provider="openai",
16
+ input_cost_per_1k=0.00015,
17
+ output_cost_per_1k=0.0006,
18
+ capabilities=["fast", "cheap"],
19
+ ),
20
+ ModelCandidate(
21
+ model_id="gpt-4o",
22
+ provider="openai",
23
+ input_cost_per_1k=0.0025,
24
+ output_cost_per_1k=0.01,
25
+ capabilities=["balanced"],
26
+ ),
27
+ ModelCandidate(
28
+ model_id="claude-sonnet-4-20250514",
29
+ provider="anthropic",
30
+ input_cost_per_1k=0.003,
31
+ output_cost_per_1k=0.015,
32
+ capabilities=["balanced", "reasoning"],
33
+ ),
34
+ ModelCandidate(
35
+ model_id="gemini/gemini-2.0-flash",
36
+ provider="google",
37
+ input_cost_per_1k=0.0001,
38
+ output_cost_per_1k=0.0004,
39
+ capabilities=["fast", "cheap"],
40
+ ),
41
+ ModelCandidate(
42
+ model_id="deepseek/deepseek-chat",
43
+ provider="deepseek",
44
+ input_cost_per_1k=0.00014,
45
+ output_cost_per_1k=0.00028,
46
+ capabilities=["fast", "cheap", "reasoning"],
47
+ ),
48
+ ]
49
+
50
+
51
+ class ModelRegistry:
52
+ """In-memory registry of model candidates available for routing."""
53
+
54
+ def __init__(self, models: list[ModelCandidate] | None = None) -> None:
55
+ self._models: dict[str, ModelCandidate] = {}
56
+ source = _DEFAULT_MODELS if models is None else models
57
+ for model in source:
58
+ self.register(model)
59
+
60
+ def register(self, model: ModelCandidate) -> None:
61
+ """Add or overwrite a model candidate."""
62
+ self._models[model.model_id] = model
63
+
64
+ def get(self, model_id: str) -> ModelCandidate | None:
65
+ """Look up a model by ID."""
66
+ return self._models.get(model_id)
67
+
68
+ def list_candidates(self) -> list[ModelCandidate]:
69
+ """Return all registered candidates."""
70
+ return list(self._models.values())
71
+
72
+ def remove(self, model_id: str) -> None:
73
+ """Remove a model from the registry."""
74
+ self._models.pop(model_id, None)
@@ -0,0 +1,104 @@
1
+ """
2
+ Router — orchestrates the routing pipeline.
3
+
4
+ Pipeline (v0.1):
5
+ 1. Classifier enriches RouteContext (complexity, token estimates)
6
+ 2. Policies filter/rank ModelCandidates in policy_order
7
+ 3. Router returns RouteDecision
8
+
9
+ Boundary: Router coordinates policies; individual policies live in policies/.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from runtimerouter.classifier import TaskClassifier
15
+ from runtimerouter.exceptions import RoutingError
16
+ from runtimerouter.policies.base import RoutingPolicy
17
+ from runtimerouter.policies.complexity import ComplexityPolicy
18
+ from runtimerouter.policies.cost import CostPolicy
19
+ from runtimerouter.providers.registry import ModelRegistry
20
+ from runtimerouter.types import ModelCandidate, RouteContext, RouteDecision, RouterConfig
21
+
22
+
23
+ class Router:
24
+ """
25
+ Core routing engine.
26
+
27
+ Applies an ordered chain of pluggable policies to select the best model
28
+ for a given request context.
29
+ """
30
+
31
+ _POLICY_REGISTRY: dict[str, type[RoutingPolicy]] = {
32
+ "complexity": ComplexityPolicy,
33
+ "cost": CostPolicy,
34
+ }
35
+
36
+ def __init__(
37
+ self,
38
+ config: RouterConfig | None = None,
39
+ classifier: TaskClassifier | None = None,
40
+ registry: ModelRegistry | None = None,
41
+ policies: list[RoutingPolicy] | None = None,
42
+ ) -> None:
43
+ self.config = config or RouterConfig()
44
+ self.classifier = classifier or TaskClassifier()
45
+ self.registry = registry or ModelRegistry(self.config.default_models)
46
+ self.policies = policies or self._build_default_policies()
47
+
48
+ def _build_default_policies(self) -> list[RoutingPolicy]:
49
+ """Instantiate policies according to config.policy_order."""
50
+ policies: list[RoutingPolicy] = []
51
+ for name in self.config.policy_order:
52
+ if name == "complexity" and not self.config.enable_complexity_routing:
53
+ continue
54
+ if name == "cost" and not self.config.enable_cost_routing:
55
+ continue
56
+ policy_cls = self._POLICY_REGISTRY.get(name)
57
+ if policy_cls is not None:
58
+ policies.append(policy_cls(config=self.config))
59
+ return policies
60
+
61
+ def register_policy(self, name: str, policy: RoutingPolicy) -> None:
62
+ """Register a custom policy at runtime."""
63
+ self._POLICY_REGISTRY[name] = type(policy)
64
+ self.policies.append(policy)
65
+
66
+ def route(self, context: RouteContext) -> RouteDecision:
67
+ """
68
+ Run the full routing pipeline and return a model selection decision.
69
+
70
+ Raises:
71
+ RoutingError: When no candidate survives the policy chain.
72
+ """
73
+ enriched = self.classifier.enrich(context)
74
+ candidates = self.registry.list_candidates()
75
+
76
+ for policy in self.policies:
77
+ candidates = policy.apply(enriched, candidates)
78
+ if not candidates:
79
+ break
80
+
81
+ if not candidates and self.config.fallback_model:
82
+ return RouteDecision(
83
+ selected_model=self.config.fallback_model,
84
+ provider="fallback",
85
+ reason="No candidates after policy chain; using fallback",
86
+ policy_name="fallback",
87
+ )
88
+
89
+ if not candidates:
90
+ raise RoutingError("No suitable model found for the given context")
91
+
92
+ selected = candidates[0]
93
+ return RouteDecision(
94
+ selected_model=selected.model_id,
95
+ provider=selected.provider,
96
+ reason=f"Selected by policy chain: {self.config.policy_order}",
97
+ policy_name=self.policies[-1].name if self.policies else "default",
98
+ complexity=enriched.complexity,
99
+ candidates_considered=[c.model_id for c in candidates],
100
+ )
101
+
102
+ def list_models(self) -> list[ModelCandidate]:
103
+ """Return all registered model candidates."""
104
+ return self.registry.list_candidates()
runtimerouter/types.py ADDED
@@ -0,0 +1,79 @@
1
+ """
2
+ Core type definitions shared across RuntimeRouter modules.
3
+
4
+ This module defines the data contracts between Router, Classifier, Policies,
5
+ Providers, and Integrations. Keep it free of routing logic.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from enum import Enum
11
+ from typing import Any
12
+
13
+ from pydantic import BaseModel, Field
14
+
15
+
16
+ class ComplexityLevel(str, Enum):
17
+ """Task complexity tier used by complexity-based routing policies."""
18
+
19
+ TRIVIAL = "trivial"
20
+ SIMPLE = "simple"
21
+ MODERATE = "moderate"
22
+ COMPLEX = "complex"
23
+ EXPERT = "expert"
24
+
25
+
26
+ class ModelCandidate(BaseModel):
27
+ """A model that may be selected by the router."""
28
+
29
+ model_id: str = Field(..., description="LiteLLM-compatible model identifier, e.g. gpt-4o")
30
+ provider: str = Field(..., description="Provider name, e.g. openai, anthropic")
31
+ input_cost_per_1k: float | None = Field(
32
+ default=None, description="Optional input token cost (USD per 1K tokens)"
33
+ )
34
+ output_cost_per_1k: float | None = Field(
35
+ default=None, description="Optional output token cost (USD per 1K tokens)"
36
+ )
37
+ max_context_tokens: int | None = Field(default=None)
38
+ capabilities: list[str] = Field(default_factory=list)
39
+ metadata: dict[str, Any] = Field(default_factory=dict)
40
+
41
+
42
+ class RouteContext(BaseModel):
43
+ """Input context passed to policies and the router for a single routing decision."""
44
+
45
+ messages: list[dict[str, Any]] = Field(default_factory=list)
46
+ prompt: str | None = None
47
+ complexity: ComplexityLevel | None = None
48
+ estimated_input_tokens: int | None = None
49
+ budget_usd: float | None = None
50
+ preferred_providers: list[str] = Field(default_factory=list)
51
+ excluded_models: list[str] = Field(default_factory=list)
52
+ metadata: dict[str, Any] = Field(default_factory=dict)
53
+
54
+
55
+ class RouteDecision(BaseModel):
56
+ """Output of the routing pipeline: which model to use and why."""
57
+
58
+ selected_model: str
59
+ provider: str
60
+ reason: str
61
+ policy_name: str
62
+ complexity: ComplexityLevel | None = None
63
+ estimated_cost_usd: float | None = None
64
+ candidates_considered: list[str] = Field(default_factory=list)
65
+ metadata: dict[str, Any] = Field(default_factory=dict)
66
+
67
+
68
+ class RouterConfig(BaseModel):
69
+ """Top-level configuration for Router and AutoLLM."""
70
+
71
+ default_models: list[ModelCandidate] = Field(default_factory=list)
72
+ enable_complexity_routing: bool = True
73
+ enable_cost_routing: bool = True
74
+ fallback_model: str | None = None
75
+ litellm_kwargs: dict[str, Any] = Field(default_factory=dict)
76
+ policy_order: list[str] = Field(
77
+ default_factory=lambda: ["complexity", "cost"],
78
+ description="Ordered list of policy names to apply",
79
+ )
@@ -0,0 +1,246 @@
1
+ Metadata-Version: 2.4
2
+ Name: runtimerouter
3
+ Version: 0.1.0
4
+ Summary: Optimize the entire AI session, not just the next model call.
5
+ Project-URL: Homepage, https://github.com/chenyu-dev25/RuntimeRouter
6
+ Project-URL: Documentation, https://github.com/chenyu-dev25/RuntimeRouter#readme
7
+ Project-URL: Repository, https://github.com/chenyu-dev25/RuntimeRouter
8
+ Project-URL: Issues, https://github.com/chenyu-dev25/RuntimeRouter/issues
9
+ Project-URL: Changelog, https://github.com/chenyu-dev25/RuntimeRouter/blob/main/ROADMAP.md
10
+ Author: RuntimeRouter Contributors
11
+ License-Expression: MIT
12
+ License-File: LICENSE
13
+ Keywords: agent,langgraph,litellm,llm,model-selection,pydantic-ai,routing
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Classifier: Typing :: Typed
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: litellm>=1.40.0
27
+ Requires-Dist: pydantic>=2.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: mypy>=1.10; extra == 'dev'
30
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
31
+ Requires-Dist: pytest>=8.0; extra == 'dev'
32
+ Requires-Dist: ruff>=0.4; extra == 'dev'
33
+ Provides-Extra: docs
34
+ Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
35
+ Requires-Dist: mkdocs>=1.6; extra == 'docs'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # RuntimeRouter
39
+
40
+ > **Optimize the entire AI session, not just the next model call.**
41
+
42
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
43
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
44
+ [![PyPI version](https://img.shields.io/pypi/v/runtimerouter.svg)](https://pypi.org/project/runtimerouter/)
45
+
46
+ RuntimeRouter is a **Python-first LLM routing library** for the AI ecosystem. It sits **above** agent frameworks — LangGraph, PydanticAI, CrewAI, AutoGen — as a **Runtime Router**, not a replacement for them.
47
+
48
+ You write agents with your favorite framework. RuntimeRouter decides **which model** to call, **when**, and **why** — across the full session lifecycle.
49
+
50
+ ---
51
+
52
+ ## Vision
53
+
54
+ Traditional routers optimize a single inference call: given a prompt, pick the cheapest or fastest model.
55
+
56
+ RuntimeRouter's long-term goal is different: **optimize the entire AI session** — model selection, cost, latency, context, caching, and privacy — as a unified runtime decision layer.
57
+
58
+ ```
59
+ ┌─────────────────────────────────────────────────────────┐
60
+ │ Your Agent Framework (LangGraph / PydanticAI / …) │
61
+ ├─────────────────────────────────────────────────────────┤
62
+ │ RuntimeRouter ← session-aware routing (this library) │
63
+ ├─────────────────────────────────────────────────────────┤
64
+ │ LiteLLM / Provider APIs (OpenAI, Anthropic, …) │
65
+ └─────────────────────────────────────────────────────────┘
66
+ ```
67
+
68
+ ---
69
+
70
+ ## Why RuntimeRouter?
71
+
72
+ | Problem | RuntimeRouter approach |
73
+ |---------|------------------------|
74
+ | Hard-coding `model="gpt-4o"` everywhere | `AutoLLM()` picks the right model per request |
75
+ | Simple tasks burning frontier-model budget | Complexity routing sends easy tasks to cheap models |
76
+ | No visibility into routing decisions | Every call returns a `RouteDecision` with reason |
77
+ | Framework lock-in | Framework-agnostic; works with any LiteLLM-compatible stack |
78
+ | Proxy-only routers | Designed for **session-level** optimization (roadmap) |
79
+
80
+ ---
81
+
82
+ ## vs OpenRouter
83
+
84
+ | | **OpenRouter** | **RuntimeRouter** |
85
+ |---|----------------|-------------------|
86
+ | **What it is** | Unified API proxy to 100+ models | Python routing **library** embedded in your app |
87
+ | **Scope** | Single HTTP request → model | Entire AI **session** (roadmap) |
88
+ | **Integration** | Replace your API base URL | Drop-in `AutoLLM()` or `Router` in Python code |
89
+ | **Policies** | Provider-side routing rules | Pluggable Python **Policy** classes you own |
90
+ | **Framework** | Language-agnostic HTTP | **Python-first**, native LangGraph/PydanticAI hooks (v0.2+) |
91
+
92
+ OpenRouter is excellent as a model gateway. RuntimeRouter is a **runtime decision engine** you control in-process.
93
+
94
+ ---
95
+
96
+ ## vs Not Diamond
97
+
98
+ | | **Not Diamond** | **RuntimeRouter** |
99
+ |---|-----------------|-------------------|
100
+ | **What it is** | Managed routing SaaS | Open-source Python library |
101
+ | **Deployment** | Cloud API | In-process, self-hosted |
102
+ | **Customization** | Platform-configured | Full **plugin Policy** architecture |
103
+ | **Session scope** | Per-call model selection | Session-aware optimization (roadmap) |
104
+ | **Cost** | SaaS pricing | Free & open (MIT) |
105
+
106
+ Not Diamond provides intelligent routing as a service. RuntimeRouter gives you the same **concept** as extensible, auditable Python code.
107
+
108
+ ---
109
+
110
+ ## Quick Start
111
+
112
+ ### Install
113
+
114
+ ```bash
115
+ pip install runtimerouter
116
+ ```
117
+
118
+ ### Basic usage
119
+
120
+ ```python
121
+ from runtimerouter import AutoLLM
122
+
123
+ llm = AutoLLM()
124
+
125
+ # RuntimeRouter automatically selects Claude, Gemini, DeepSeek, OpenAI, etc.
126
+ response = llm.invoke("帮我分析整个代码仓库")
127
+
128
+ print(response.choices[0].message.content)
129
+ ```
130
+
131
+ ### Inspect routing decisions
132
+
133
+ ```python
134
+ decision = llm.route_only("Summarize this paragraph in one sentence.")
135
+ print(decision.selected_model) # e.g. "gpt-4o-mini"
136
+ print(decision.reason) # why this model was chosen
137
+ print(decision.complexity) # e.g. ComplexityLevel.SIMPLE
138
+ ```
139
+
140
+ ### Configure policies
141
+
142
+ ```python
143
+ from runtimerouter import AutoLLM, RouterConfig
144
+
145
+ config = RouterConfig(
146
+ enable_complexity_routing=True,
147
+ enable_cost_routing=True,
148
+ fallback_model="gpt-4o-mini",
149
+ policy_order=["complexity", "cost"],
150
+ )
151
+
152
+ llm = AutoLLM(config=config)
153
+ ```
154
+
155
+ ### Use Router directly (without invoking)
156
+
157
+ ```python
158
+ from runtimerouter import Router, RouteContext
159
+
160
+ router = Router()
161
+ decision = router.route(RouteContext(prompt="Explain quantum entanglement"))
162
+ print(decision.selected_model)
163
+ ```
164
+
165
+ ---
166
+
167
+ ## Architecture
168
+
169
+ ```
170
+ runtimerouter/
171
+ ├── autollm.py # User-facing entry point
172
+ ├── router.py # Routing pipeline orchestrator
173
+ ├── classifier.py # Task complexity & token estimation
174
+ ├── types.py # Shared data contracts (Pydantic models)
175
+ ├── config.py # Config loading utilities
176
+ ├── policies/ # Pluggable routing policies
177
+ │ ├── base.py # RoutingPolicy ABC
178
+ │ ├── complexity.py # Complexity-based routing
179
+ │ └── cost.py # Cost-based routing
180
+ ├── providers/ # Model catalog & provider metadata
181
+ │ ├── base.py # ModelProvider ABC
182
+ │ └── registry.py # ModelRegistry (default model catalog)
183
+ └── integrations/ # External execution layers
184
+ └── litellm.py # LiteLLM completion wrapper
185
+ ```
186
+
187
+ **Routing pipeline (v0.1):**
188
+
189
+ ```
190
+ User prompt
191
+
192
+
193
+ TaskClassifier.enrich() ← complexity, token estimate
194
+
195
+
196
+ Policy chain (ordered) ← complexity → cost
197
+
198
+
199
+ RouteDecision ← selected model + reason
200
+
201
+
202
+ LiteLLMIntegration ← execute completion
203
+ ```
204
+
205
+ See [docs/architecture.md](docs/architecture.md) for module boundaries and extension points.
206
+
207
+ ---
208
+
209
+ ## Roadmap
210
+
211
+ | Version | Focus |
212
+ |---------|-------|
213
+ | **v0.1** *(current)* | Auto Model Selection, Complexity Routing, Cost Routing, LiteLLM Integration |
214
+ | **v0.2** | LangGraph Integration, PydanticAI Integration |
215
+ | **v0.3** | Cache-aware Routing |
216
+ | **v0.4** | Context-aware Routing |
217
+ | **v0.5** | Session-aware Optimization |
218
+
219
+ Full details: [ROADMAP.md](ROADMAP.md)
220
+
221
+ ---
222
+
223
+ ## Contributing
224
+
225
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
226
+
227
+ ```bash
228
+ git clone https://github.com/chenyu-dev25/RuntimeRouter.git
229
+ cd RuntimeRouter
230
+ python -m venv .venv && source .venv/bin/activate
231
+ pip install -e ".[dev]"
232
+ pytest
233
+ ```
234
+
235
+ ---
236
+
237
+ ## License
238
+
239
+ MIT — see [LICENSE](LICENSE).
240
+
241
+ ---
242
+
243
+ <p align="center">
244
+ <strong>RuntimeRouter</strong><br>
245
+ Optimize the entire AI session, not just the next model call.
246
+ </p>
@@ -0,0 +1,20 @@
1
+ runtimerouter/__init__.py,sha256=mGOpANV1MH0-KRG10B0jUdQROyHjx-cIal-JZ7VAPyo,545
2
+ runtimerouter/autollm.py,sha256=Fv-AxAt-GpgvexszBiiltEtX-HkRoDKSGe8ujFFsNLg,3154
3
+ runtimerouter/classifier.py,sha256=LPwdQrPcZP3B3HWk5ARUz4xZxAP9QsbexI_AhsZksq0,2034
4
+ runtimerouter/config.py,sha256=td2l_W_tCOISqvvigIZM0M1S4p9bPKnFR-4Z0I3lr2c,1144
5
+ runtimerouter/exceptions.py,sha256=IwO_mUGI0OvKfxz5wugMLXucxHtDtWnzBH3hx2ILWzI,681
6
+ runtimerouter/router.py,sha256=mtHomaXCAAYKwM5W2dSJmHbFygjkHKGimUFGaA33frM,3886
7
+ runtimerouter/types.py,sha256=mgQElmYYphVqYjD_uiOvbQf3VL03fJ03VpJxCvFy3yQ,2703
8
+ runtimerouter/integrations/__init__.py,sha256=ZU5vk85mWFDL8jd1ARMFvPRftU-sOtHaH4zpFqDhJi8,265
9
+ runtimerouter/integrations/litellm.py,sha256=6bwVEMFfd0CERK9ufAVCB_mHlL_0540SisVngdXIXW4,2450
10
+ runtimerouter/policies/__init__.py,sha256=x9SzZMOJmxteCG30L1ZrKkV1TtNE7SC-EnnPsnoEHlE,509
11
+ runtimerouter/policies/base.py,sha256=mvaYdeH9PgdAXmM3mbe8fh17iYImDWsImPCGkKWfibI,1081
12
+ runtimerouter/policies/complexity.py,sha256=LChQAGjGO8m96RroQ_sS81Ecsv-mGLzbEVcYW9UyfPA,1649
13
+ runtimerouter/policies/cost.py,sha256=UCO4iRy87j4KxJ4dOjtGpYbZiX66usAa8qLDVzHgdro,1206
14
+ runtimerouter/providers/__init__.py,sha256=-3Nq-65J_hED-DXudMUEA17ym37owdmhjy0Rz1Kz7jA,423
15
+ runtimerouter/providers/base.py,sha256=Qi6fBInrImIDWpvoZ_HApLtrGmQr9plm3ZRX0LJpaZ8,723
16
+ runtimerouter/providers/registry.py,sha256=7Hk5rOHMby1FwUuTv4WvCrw2lVPoSpLi45jyakzIUkY,2283
17
+ runtimerouter-0.1.0.dist-info/METADATA,sha256=NPC3uHTnp0fLQJmKLbWMfUsGTkV5xIptSBgNXq3u44k,8774
18
+ runtimerouter-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
19
+ runtimerouter-0.1.0.dist-info/licenses/LICENSE,sha256=lC8UQNq3Ya2RkrlXf3o7xV8FMW4ucNc_HEdekVvkcxs,1083
20
+ runtimerouter-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 RuntimeRouter Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.