runtimerouter 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runtimerouter/__init__.py +28 -0
- runtimerouter/autollm.py +110 -0
- runtimerouter/classifier.py +62 -0
- runtimerouter/config.py +38 -0
- runtimerouter/exceptions.py +25 -0
- runtimerouter/integrations/__init__.py +10 -0
- runtimerouter/integrations/litellm.py +74 -0
- runtimerouter/policies/__init__.py +17 -0
- runtimerouter/policies/base.py +40 -0
- runtimerouter/policies/complexity.py +47 -0
- runtimerouter/policies/cost.py +41 -0
- runtimerouter/providers/__init__.py +15 -0
- runtimerouter/providers/base.py +28 -0
- runtimerouter/providers/registry.py +74 -0
- runtimerouter/router.py +104 -0
- runtimerouter/types.py +79 -0
- runtimerouter-0.1.0.dist-info/METADATA +246 -0
- runtimerouter-0.1.0.dist-info/RECORD +20 -0
- runtimerouter-0.1.0.dist-info/WHEEL +4 -0
- runtimerouter-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RuntimeRouter — LLM routing library for the Python AI ecosystem.
|
|
3
|
+
|
|
4
|
+
Optimize the entire AI session, not just the next model call.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from runtimerouter.autollm import AutoLLM
|
|
8
|
+
from runtimerouter.router import Router
|
|
9
|
+
from runtimerouter.types import (
|
|
10
|
+
ComplexityLevel,
|
|
11
|
+
ModelCandidate,
|
|
12
|
+
RouteContext,
|
|
13
|
+
RouteDecision,
|
|
14
|
+
RouterConfig,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"AutoLLM",
|
|
21
|
+
"Router",
|
|
22
|
+
"ComplexityLevel",
|
|
23
|
+
"ModelCandidate",
|
|
24
|
+
"RouteContext",
|
|
25
|
+
"RouteDecision",
|
|
26
|
+
"RouterConfig",
|
|
27
|
+
"__version__",
|
|
28
|
+
]
|
runtimerouter/autollm.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AutoLLM — the primary user-facing entry point.
|
|
3
|
+
|
|
4
|
+
Responsibility:
|
|
5
|
+
- Accept user prompts/messages
|
|
6
|
+
- Delegate routing to Router
|
|
7
|
+
- Delegate execution to LiteLLM integration
|
|
8
|
+
- Return unified responses
|
|
9
|
+
|
|
10
|
+
Boundary: AutoLLM orchestrates; it does NOT implement routing policies.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from runtimerouter.integrations.litellm import LiteLLMIntegration
|
|
18
|
+
from runtimerouter.router import Router
|
|
19
|
+
from runtimerouter.types import RouteContext, RouterConfig
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AutoLLM:
|
|
23
|
+
"""
|
|
24
|
+
Automatic model selection and invocation.
|
|
25
|
+
|
|
26
|
+
Usage::
|
|
27
|
+
|
|
28
|
+
from runtimerouter import AutoLLM
|
|
29
|
+
|
|
30
|
+
llm = AutoLLM()
|
|
31
|
+
response = llm.invoke("帮我分析整个代码仓库")
|
|
32
|
+
|
|
33
|
+
The router selects an appropriate model; the user never specifies one.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
config: RouterConfig | None = None,
|
|
39
|
+
router: Router | None = None,
|
|
40
|
+
integration: LiteLLMIntegration | None = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
self.config = config or RouterConfig()
|
|
43
|
+
self.router = router or Router(config=self.config)
|
|
44
|
+
self.integration = integration or LiteLLMIntegration(config=self.config)
|
|
45
|
+
|
|
46
|
+
def invoke(
|
|
47
|
+
self,
|
|
48
|
+
prompt: str,
|
|
49
|
+
*,
|
|
50
|
+
messages: list[dict[str, Any]] | None = None,
|
|
51
|
+
**kwargs: Any,
|
|
52
|
+
) -> Any:
|
|
53
|
+
"""
|
|
54
|
+
Route the request to the best model and return the LLM response.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
prompt: User prompt text.
|
|
58
|
+
messages: Optional chat history in OpenAI message format.
|
|
59
|
+
**kwargs: Passed through to the LiteLLM integration.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
LiteLLM response object (shape depends on LiteLLM version).
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
RoutingError: No suitable model found.
|
|
66
|
+
IntegrationError: LiteLLM call failed.
|
|
67
|
+
"""
|
|
68
|
+
context = RouteContext(
|
|
69
|
+
prompt=prompt,
|
|
70
|
+
messages=messages or [{"role": "user", "content": prompt}],
|
|
71
|
+
)
|
|
72
|
+
decision = self.router.route(context)
|
|
73
|
+
return self.integration.completion(
|
|
74
|
+
model=decision.selected_model,
|
|
75
|
+
messages=context.messages,
|
|
76
|
+
route_decision=decision,
|
|
77
|
+
**kwargs,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
async def ainvoke(
|
|
81
|
+
self,
|
|
82
|
+
prompt: str,
|
|
83
|
+
*,
|
|
84
|
+
messages: list[dict[str, Any]] | None = None,
|
|
85
|
+
**kwargs: Any,
|
|
86
|
+
) -> Any:
|
|
87
|
+
"""Async variant of :meth:`invoke`."""
|
|
88
|
+
context = RouteContext(
|
|
89
|
+
prompt=prompt,
|
|
90
|
+
messages=messages or [{"role": "user", "content": prompt}],
|
|
91
|
+
)
|
|
92
|
+
decision = self.router.route(context)
|
|
93
|
+
return await self.integration.acompletion(
|
|
94
|
+
model=decision.selected_model,
|
|
95
|
+
messages=context.messages,
|
|
96
|
+
route_decision=decision,
|
|
97
|
+
**kwargs,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def route_only(self, prompt: str) -> Any:
|
|
101
|
+
"""
|
|
102
|
+
Return routing decision without invoking the model.
|
|
103
|
+
|
|
104
|
+
Useful for debugging and observability.
|
|
105
|
+
"""
|
|
106
|
+
context = RouteContext(
|
|
107
|
+
prompt=prompt,
|
|
108
|
+
messages=[{"role": "user", "content": prompt}],
|
|
109
|
+
)
|
|
110
|
+
return self.router.route(context)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TaskClassifier — enriches RouteContext before policy evaluation.
|
|
3
|
+
|
|
4
|
+
Responsibility (v0.1):
|
|
5
|
+
- Estimate task complexity (ComplexityLevel)
|
|
6
|
+
- Optionally estimate input token count
|
|
7
|
+
|
|
8
|
+
Boundary: Classification heuristics belong here, NOT in policies or Router.
|
|
9
|
+
Implementation of classification algorithms is deferred to future PRs.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from runtimerouter.types import ComplexityLevel, RouteContext
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TaskClassifier:
|
|
18
|
+
"""
|
|
19
|
+
Enriches routing context with task metadata.
|
|
20
|
+
|
|
21
|
+
v0.1 ships the interface only. Concrete classification logic
|
|
22
|
+
(rule-based, embedding-based, or LLM-as-judge) will be added
|
|
23
|
+
in subsequent releases.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def enrich(self, context: RouteContext) -> RouteContext:
|
|
27
|
+
"""
|
|
28
|
+
Return a copy of context with complexity and token estimates filled in.
|
|
29
|
+
|
|
30
|
+
If complexity is already set on the input context, it is preserved.
|
|
31
|
+
"""
|
|
32
|
+
complexity = context.complexity or self._classify_complexity(context)
|
|
33
|
+
tokens = context.estimated_input_tokens or self._estimate_tokens(context)
|
|
34
|
+
|
|
35
|
+
return context.model_copy(
|
|
36
|
+
update={
|
|
37
|
+
"complexity": complexity,
|
|
38
|
+
"estimated_input_tokens": tokens,
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def _classify_complexity(self, context: RouteContext) -> ComplexityLevel:
|
|
43
|
+
"""
|
|
44
|
+
Classify task complexity.
|
|
45
|
+
|
|
46
|
+
TODO(v0.1): Implement heuristic or model-based classification.
|
|
47
|
+
Default stub returns MODERATE for all requests.
|
|
48
|
+
"""
|
|
49
|
+
_ = context
|
|
50
|
+
return ComplexityLevel.MODERATE
|
|
51
|
+
|
|
52
|
+
def _estimate_tokens(self, context: RouteContext) -> int:
|
|
53
|
+
"""
|
|
54
|
+
Estimate input token count from prompt/messages.
|
|
55
|
+
|
|
56
|
+
TODO(v0.1): Integrate tiktoken or LiteLLM token counter.
|
|
57
|
+
Default stub uses a rough character-based estimate.
|
|
58
|
+
"""
|
|
59
|
+
text = context.prompt or ""
|
|
60
|
+
if context.messages:
|
|
61
|
+
text = " ".join(str(m.get("content", "")) for m in context.messages)
|
|
62
|
+
return max(1, len(text) // 4)
|
runtimerouter/config.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration loading utilities.
|
|
3
|
+
|
|
4
|
+
Responsibility: load RouterConfig from env, dict, or YAML/JSON files.
|
|
5
|
+
Does NOT contain routing logic.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from runtimerouter.types import RouterConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_config_from_env(prefix: str = "RUNTIMEROUTER_") -> RouterConfig:
|
|
17
|
+
"""
|
|
18
|
+
Build RouterConfig from environment variables.
|
|
19
|
+
|
|
20
|
+
Supported variables (v0.1):
|
|
21
|
+
- RUNTIMEROUTER_FALLBACK_MODEL
|
|
22
|
+
- RUNTIMEROUTER_ENABLE_COMPLEXITY_ROUTING (true/false)
|
|
23
|
+
- RUNTIMEROUTER_ENABLE_COST_ROUTING (true/false)
|
|
24
|
+
"""
|
|
25
|
+
fallback = os.getenv(f"{prefix}FALLBACK_MODEL")
|
|
26
|
+
enable_complexity = os.getenv(f"{prefix}ENABLE_COMPLEXITY_ROUTING", "true").lower() == "true"
|
|
27
|
+
enable_cost = os.getenv(f"{prefix}ENABLE_COST_ROUTING", "true").lower() == "true"
|
|
28
|
+
|
|
29
|
+
return RouterConfig(
|
|
30
|
+
fallback_model=fallback,
|
|
31
|
+
enable_complexity_routing=enable_complexity,
|
|
32
|
+
enable_cost_routing=enable_cost,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def load_config_from_dict(data: dict[str, Any]) -> RouterConfig:
|
|
37
|
+
"""Build RouterConfig from a plain dictionary."""
|
|
38
|
+
return RouterConfig.model_validate(data)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""RuntimeRouter exception hierarchy."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class RuntimeRouterError(Exception):
|
|
5
|
+
"""Base exception for all RuntimeRouter errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RoutingError(RuntimeRouterError):
|
|
9
|
+
"""Raised when no suitable model can be selected."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PolicyError(RuntimeRouterError):
|
|
13
|
+
"""Raised when a routing policy fails or returns invalid output."""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ClassifierError(RuntimeRouterError):
|
|
17
|
+
"""Raised when task complexity classification fails."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ProviderError(RuntimeRouterError):
|
|
21
|
+
"""Raised when a model provider is unavailable or misconfigured."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class IntegrationError(RuntimeRouterError):
|
|
25
|
+
"""Raised when an external integration (e.g. LiteLLM) fails."""
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
External integrations — adapters for third-party execution layers.
|
|
3
|
+
|
|
4
|
+
v0.1: LiteLLM integration only.
|
|
5
|
+
Future: LangGraph, PydanticAI, CrewAI, AutoGen hooks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from runtimerouter.integrations.litellm import LiteLLMIntegration
|
|
9
|
+
|
|
10
|
+
__all__ = ["LiteLLMIntegration"]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LiteLLM integration — unified model execution layer.
|
|
3
|
+
|
|
4
|
+
Responsibility:
|
|
5
|
+
- Execute completion/acompletion via LiteLLM
|
|
6
|
+
- Attach RouteDecision metadata for observability
|
|
7
|
+
- Pass through RouterConfig.litellm_kwargs
|
|
8
|
+
|
|
9
|
+
Boundary: This module calls LiteLLM; it does NOT decide which model to use.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from runtimerouter.exceptions import IntegrationError
|
|
17
|
+
from runtimerouter.types import RouteDecision, RouterConfig
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LiteLLMIntegration:
|
|
21
|
+
"""Thin wrapper around LiteLLM for routed completions."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, config: RouterConfig | None = None) -> None:
|
|
24
|
+
self.config = config or RouterConfig()
|
|
25
|
+
|
|
26
|
+
def completion(
|
|
27
|
+
self,
|
|
28
|
+
model: str,
|
|
29
|
+
messages: list[dict[str, Any]],
|
|
30
|
+
route_decision: RouteDecision | None = None,
|
|
31
|
+
**kwargs: Any,
|
|
32
|
+
) -> Any:
|
|
33
|
+
"""
|
|
34
|
+
Synchronous completion via LiteLLM.
|
|
35
|
+
|
|
36
|
+
TODO(v0.1): Wire up litellm.completion with metadata tags.
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
import litellm
|
|
40
|
+
except ImportError as exc:
|
|
41
|
+
raise IntegrationError(
|
|
42
|
+
"litellm is required. Install with: pip install runtimerouter"
|
|
43
|
+
) from exc
|
|
44
|
+
|
|
45
|
+
merged_kwargs = {**self.config.litellm_kwargs, **kwargs}
|
|
46
|
+
metadata = merged_kwargs.setdefault("metadata", {})
|
|
47
|
+
if route_decision is not None:
|
|
48
|
+
metadata["runtimerouter_policy"] = route_decision.policy_name
|
|
49
|
+
metadata["runtimerouter_reason"] = route_decision.reason
|
|
50
|
+
|
|
51
|
+
return litellm.completion(model=model, messages=messages, **merged_kwargs)
|
|
52
|
+
|
|
53
|
+
async def acompletion(
|
|
54
|
+
self,
|
|
55
|
+
model: str,
|
|
56
|
+
messages: list[dict[str, Any]],
|
|
57
|
+
route_decision: RouteDecision | None = None,
|
|
58
|
+
**kwargs: Any,
|
|
59
|
+
) -> Any:
|
|
60
|
+
"""Async completion via LiteLLM."""
|
|
61
|
+
try:
|
|
62
|
+
import litellm
|
|
63
|
+
except ImportError as exc:
|
|
64
|
+
raise IntegrationError(
|
|
65
|
+
"litellm is required. Install with: pip install runtimerouter"
|
|
66
|
+
) from exc
|
|
67
|
+
|
|
68
|
+
merged_kwargs = {**self.config.litellm_kwargs, **kwargs}
|
|
69
|
+
metadata = merged_kwargs.setdefault("metadata", {})
|
|
70
|
+
if route_decision is not None:
|
|
71
|
+
metadata["runtimerouter_policy"] = route_decision.policy_name
|
|
72
|
+
metadata["runtimerouter_reason"] = route_decision.reason
|
|
73
|
+
|
|
74
|
+
return await litellm.acompletion(model=model, messages=messages, **merged_kwargs)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Routing policies — pluggable decision modules.
|
|
3
|
+
|
|
4
|
+
Each policy receives an enriched RouteContext and a list of ModelCandidates,
|
|
5
|
+
and returns a filtered/reordered subset. Policies are composable and ordered
|
|
6
|
+
by Router according to RouterConfig.policy_order.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from runtimerouter.policies.base import RoutingPolicy
|
|
10
|
+
from runtimerouter.policies.complexity import ComplexityPolicy
|
|
11
|
+
from runtimerouter.policies.cost import CostPolicy
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"RoutingPolicy",
|
|
15
|
+
"ComplexityPolicy",
|
|
16
|
+
"CostPolicy",
|
|
17
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base routing policy interface.
|
|
3
|
+
|
|
4
|
+
All policies must implement apply(). Policies are stateless by default;
|
|
5
|
+
configuration is injected via RouterConfig at construction time.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
|
|
12
|
+
from runtimerouter.types import ModelCandidate, RouteContext, RouterConfig
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RoutingPolicy(ABC):
|
|
16
|
+
"""Abstract base class for routing policies."""
|
|
17
|
+
|
|
18
|
+
name: str = "base"
|
|
19
|
+
|
|
20
|
+
def __init__(self, config: RouterConfig | None = None) -> None:
|
|
21
|
+
self.config = config or RouterConfig()
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def apply(
|
|
25
|
+
self,
|
|
26
|
+
context: RouteContext,
|
|
27
|
+
candidates: list[ModelCandidate],
|
|
28
|
+
) -> list[ModelCandidate]:
|
|
29
|
+
"""
|
|
30
|
+
Filter or reorder model candidates based on context.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
context: Enriched routing context from TaskClassifier.
|
|
34
|
+
candidates: Current list of viable models.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Subset (or reordered list) of candidates. Empty list means
|
|
38
|
+
no model satisfies this policy's constraints.
|
|
39
|
+
"""
|
|
40
|
+
...
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ComplexityPolicy — routes requests to models matched to task complexity.
|
|
3
|
+
|
|
4
|
+
Maps ComplexityLevel → preferred model tiers, e.g.:
|
|
5
|
+
- TRIVIAL/SIMPLE → fast, cheap models (gpt-4o-mini, gemini-flash)
|
|
6
|
+
- MODERATE → balanced models (gpt-4o, claude-sonnet)
|
|
7
|
+
- COMPLEX/EXPERT → frontier models (claude-opus, o1)
|
|
8
|
+
|
|
9
|
+
Boundary: Complexity mapping tables live here. Classification is in classifier.py.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from runtimerouter.policies.base import RoutingPolicy
|
|
15
|
+
from runtimerouter.types import ComplexityLevel, ModelCandidate, RouteContext
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Complexity → capability tags expected on ModelCandidate.capabilities
|
|
19
|
+
_COMPLEXITY_CAPABILITY_MAP: dict[ComplexityLevel, list[str]] = {
|
|
20
|
+
ComplexityLevel.TRIVIAL: ["fast", "cheap"],
|
|
21
|
+
ComplexityLevel.SIMPLE: ["fast", "cheap"],
|
|
22
|
+
ComplexityLevel.MODERATE: ["balanced"],
|
|
23
|
+
ComplexityLevel.COMPLEX: ["frontier", "reasoning"],
|
|
24
|
+
ComplexityLevel.EXPERT: ["frontier", "reasoning"],
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ComplexityPolicy(RoutingPolicy):
|
|
29
|
+
"""Select models appropriate for the detected task complexity."""
|
|
30
|
+
|
|
31
|
+
name = "complexity"
|
|
32
|
+
|
|
33
|
+
def apply(
|
|
34
|
+
self,
|
|
35
|
+
context: RouteContext,
|
|
36
|
+
candidates: list[ModelCandidate],
|
|
37
|
+
) -> list[ModelCandidate]:
|
|
38
|
+
"""
|
|
39
|
+
Filter candidates by complexity-matched capabilities.
|
|
40
|
+
|
|
41
|
+
TODO(v0.1): Implement capability matching and tier fallback.
|
|
42
|
+
Current stub passes candidates through unchanged.
|
|
43
|
+
"""
|
|
44
|
+
_complexity = context.complexity or ComplexityLevel.MODERATE
|
|
45
|
+
_expected = _COMPLEXITY_CAPABILITY_MAP.get(_complexity, ["balanced"])
|
|
46
|
+
_ = _expected
|
|
47
|
+
return candidates
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CostPolicy — routes requests based on budget and token cost estimates.
|
|
3
|
+
|
|
4
|
+
Considers:
|
|
5
|
+
- context.budget_usd (per-request budget cap)
|
|
6
|
+
- ModelCandidate.input_cost_per_1k / output_cost_per_1k
|
|
7
|
+
- context.estimated_input_tokens
|
|
8
|
+
|
|
9
|
+
Boundary: Cost calculation and ranking logic belongs here, not in Router.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from runtimerouter.policies.base import RoutingPolicy
|
|
15
|
+
from runtimerouter.types import ModelCandidate, RouteContext
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CostPolicy(RoutingPolicy):
|
|
19
|
+
"""Select the most cost-effective model that meets quality constraints."""
|
|
20
|
+
|
|
21
|
+
name = "cost"
|
|
22
|
+
|
|
23
|
+
def apply(
|
|
24
|
+
self,
|
|
25
|
+
context: RouteContext,
|
|
26
|
+
candidates: list[ModelCandidate],
|
|
27
|
+
) -> list[ModelCandidate]:
|
|
28
|
+
"""
|
|
29
|
+
Filter/rank candidates by estimated cost.
|
|
30
|
+
|
|
31
|
+
TODO(v0.1): Implement cost estimation and budget filtering.
|
|
32
|
+
Current stub returns candidates sorted by input cost (None last).
|
|
33
|
+
"""
|
|
34
|
+
if context.budget_usd is not None:
|
|
35
|
+
# TODO: filter candidates exceeding budget
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
return sorted(
|
|
39
|
+
candidates,
|
|
40
|
+
key=lambda c: (c.input_cost_per_1k is None, c.input_cost_per_1k or float("inf")),
|
|
41
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model providers — registry and provider abstractions.
|
|
3
|
+
|
|
4
|
+
Providers represent upstream LLM backends (OpenAI, Anthropic, etc.).
|
|
5
|
+
The registry holds ModelCandidate metadata used during routing.
|
|
6
|
+
Actual API calls go through integrations/litellm.py.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from runtimerouter.providers.base import ModelProvider
|
|
10
|
+
from runtimerouter.providers.registry import ModelRegistry
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"ModelProvider",
|
|
14
|
+
"ModelRegistry",
|
|
15
|
+
]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ModelProvider — abstract interface for provider-specific metadata.
|
|
3
|
+
|
|
4
|
+
v0.1 uses LiteLLM as the unified execution layer; providers here supply
|
|
5
|
+
model catalog metadata rather than direct API calls.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
|
|
12
|
+
from runtimerouter.types import ModelCandidate
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ModelProvider(ABC):
|
|
16
|
+
"""Abstract provider for model catalog entries."""
|
|
17
|
+
|
|
18
|
+
name: str = "base"
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def list_models(self) -> list[ModelCandidate]:
|
|
22
|
+
"""Return models offered by this provider."""
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def resolve_model_id(self, alias: str) -> str:
|
|
27
|
+
"""Resolve a friendly alias to a LiteLLM model identifier."""
|
|
28
|
+
...
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ModelRegistry — central catalog of routable models.
|
|
3
|
+
|
|
4
|
+
Default v0.1 catalog includes common models across major providers.
|
|
5
|
+
Users can extend via RouterConfig.default_models or register() at runtime.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from runtimerouter.types import ModelCandidate
|
|
11
|
+
|
|
12
|
+
_DEFAULT_MODELS: list[ModelCandidate] = [
|
|
13
|
+
ModelCandidate(
|
|
14
|
+
model_id="gpt-4o-mini",
|
|
15
|
+
provider="openai",
|
|
16
|
+
input_cost_per_1k=0.00015,
|
|
17
|
+
output_cost_per_1k=0.0006,
|
|
18
|
+
capabilities=["fast", "cheap"],
|
|
19
|
+
),
|
|
20
|
+
ModelCandidate(
|
|
21
|
+
model_id="gpt-4o",
|
|
22
|
+
provider="openai",
|
|
23
|
+
input_cost_per_1k=0.0025,
|
|
24
|
+
output_cost_per_1k=0.01,
|
|
25
|
+
capabilities=["balanced"],
|
|
26
|
+
),
|
|
27
|
+
ModelCandidate(
|
|
28
|
+
model_id="claude-sonnet-4-20250514",
|
|
29
|
+
provider="anthropic",
|
|
30
|
+
input_cost_per_1k=0.003,
|
|
31
|
+
output_cost_per_1k=0.015,
|
|
32
|
+
capabilities=["balanced", "reasoning"],
|
|
33
|
+
),
|
|
34
|
+
ModelCandidate(
|
|
35
|
+
model_id="gemini/gemini-2.0-flash",
|
|
36
|
+
provider="google",
|
|
37
|
+
input_cost_per_1k=0.0001,
|
|
38
|
+
output_cost_per_1k=0.0004,
|
|
39
|
+
capabilities=["fast", "cheap"],
|
|
40
|
+
),
|
|
41
|
+
ModelCandidate(
|
|
42
|
+
model_id="deepseek/deepseek-chat",
|
|
43
|
+
provider="deepseek",
|
|
44
|
+
input_cost_per_1k=0.00014,
|
|
45
|
+
output_cost_per_1k=0.00028,
|
|
46
|
+
capabilities=["fast", "cheap", "reasoning"],
|
|
47
|
+
),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ModelRegistry:
|
|
52
|
+
"""In-memory registry of model candidates available for routing."""
|
|
53
|
+
|
|
54
|
+
def __init__(self, models: list[ModelCandidate] | None = None) -> None:
|
|
55
|
+
self._models: dict[str, ModelCandidate] = {}
|
|
56
|
+
source = _DEFAULT_MODELS if models is None else models
|
|
57
|
+
for model in source:
|
|
58
|
+
self.register(model)
|
|
59
|
+
|
|
60
|
+
def register(self, model: ModelCandidate) -> None:
|
|
61
|
+
"""Add or overwrite a model candidate."""
|
|
62
|
+
self._models[model.model_id] = model
|
|
63
|
+
|
|
64
|
+
def get(self, model_id: str) -> ModelCandidate | None:
|
|
65
|
+
"""Look up a model by ID."""
|
|
66
|
+
return self._models.get(model_id)
|
|
67
|
+
|
|
68
|
+
def list_candidates(self) -> list[ModelCandidate]:
|
|
69
|
+
"""Return all registered candidates."""
|
|
70
|
+
return list(self._models.values())
|
|
71
|
+
|
|
72
|
+
def remove(self, model_id: str) -> None:
|
|
73
|
+
"""Remove a model from the registry."""
|
|
74
|
+
self._models.pop(model_id, None)
|
runtimerouter/router.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Router — orchestrates the routing pipeline.
|
|
3
|
+
|
|
4
|
+
Pipeline (v0.1):
|
|
5
|
+
1. Classifier enriches RouteContext (complexity, token estimates)
|
|
6
|
+
2. Policies filter/rank ModelCandidates in policy_order
|
|
7
|
+
3. Router returns RouteDecision
|
|
8
|
+
|
|
9
|
+
Boundary: Router coordinates policies; individual policies live in policies/.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from runtimerouter.classifier import TaskClassifier
|
|
15
|
+
from runtimerouter.exceptions import RoutingError
|
|
16
|
+
from runtimerouter.policies.base import RoutingPolicy
|
|
17
|
+
from runtimerouter.policies.complexity import ComplexityPolicy
|
|
18
|
+
from runtimerouter.policies.cost import CostPolicy
|
|
19
|
+
from runtimerouter.providers.registry import ModelRegistry
|
|
20
|
+
from runtimerouter.types import ModelCandidate, RouteContext, RouteDecision, RouterConfig
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Router:
|
|
24
|
+
"""
|
|
25
|
+
Core routing engine.
|
|
26
|
+
|
|
27
|
+
Applies an ordered chain of pluggable policies to select the best model
|
|
28
|
+
for a given request context.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
_POLICY_REGISTRY: dict[str, type[RoutingPolicy]] = {
|
|
32
|
+
"complexity": ComplexityPolicy,
|
|
33
|
+
"cost": CostPolicy,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
config: RouterConfig | None = None,
|
|
39
|
+
classifier: TaskClassifier | None = None,
|
|
40
|
+
registry: ModelRegistry | None = None,
|
|
41
|
+
policies: list[RoutingPolicy] | None = None,
|
|
42
|
+
) -> None:
|
|
43
|
+
self.config = config or RouterConfig()
|
|
44
|
+
self.classifier = classifier or TaskClassifier()
|
|
45
|
+
self.registry = registry or ModelRegistry(self.config.default_models)
|
|
46
|
+
self.policies = policies or self._build_default_policies()
|
|
47
|
+
|
|
48
|
+
def _build_default_policies(self) -> list[RoutingPolicy]:
|
|
49
|
+
"""Instantiate policies according to config.policy_order."""
|
|
50
|
+
policies: list[RoutingPolicy] = []
|
|
51
|
+
for name in self.config.policy_order:
|
|
52
|
+
if name == "complexity" and not self.config.enable_complexity_routing:
|
|
53
|
+
continue
|
|
54
|
+
if name == "cost" and not self.config.enable_cost_routing:
|
|
55
|
+
continue
|
|
56
|
+
policy_cls = self._POLICY_REGISTRY.get(name)
|
|
57
|
+
if policy_cls is not None:
|
|
58
|
+
policies.append(policy_cls(config=self.config))
|
|
59
|
+
return policies
|
|
60
|
+
|
|
61
|
+
def register_policy(self, name: str, policy: RoutingPolicy) -> None:
|
|
62
|
+
"""Register a custom policy at runtime."""
|
|
63
|
+
self._POLICY_REGISTRY[name] = type(policy)
|
|
64
|
+
self.policies.append(policy)
|
|
65
|
+
|
|
66
|
+
def route(self, context: RouteContext) -> RouteDecision:
|
|
67
|
+
"""
|
|
68
|
+
Run the full routing pipeline and return a model selection decision.
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
RoutingError: When no candidate survives the policy chain.
|
|
72
|
+
"""
|
|
73
|
+
enriched = self.classifier.enrich(context)
|
|
74
|
+
candidates = self.registry.list_candidates()
|
|
75
|
+
|
|
76
|
+
for policy in self.policies:
|
|
77
|
+
candidates = policy.apply(enriched, candidates)
|
|
78
|
+
if not candidates:
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
if not candidates and self.config.fallback_model:
|
|
82
|
+
return RouteDecision(
|
|
83
|
+
selected_model=self.config.fallback_model,
|
|
84
|
+
provider="fallback",
|
|
85
|
+
reason="No candidates after policy chain; using fallback",
|
|
86
|
+
policy_name="fallback",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if not candidates:
|
|
90
|
+
raise RoutingError("No suitable model found for the given context")
|
|
91
|
+
|
|
92
|
+
selected = candidates[0]
|
|
93
|
+
return RouteDecision(
|
|
94
|
+
selected_model=selected.model_id,
|
|
95
|
+
provider=selected.provider,
|
|
96
|
+
reason=f"Selected by policy chain: {self.config.policy_order}",
|
|
97
|
+
policy_name=self.policies[-1].name if self.policies else "default",
|
|
98
|
+
complexity=enriched.complexity,
|
|
99
|
+
candidates_considered=[c.model_id for c in candidates],
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def list_models(self) -> list[ModelCandidate]:
|
|
103
|
+
"""Return all registered model candidates."""
|
|
104
|
+
return self.registry.list_candidates()
|
runtimerouter/types.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core type definitions shared across RuntimeRouter modules.
|
|
3
|
+
|
|
4
|
+
This module defines the data contracts between Router, Classifier, Policies,
|
|
5
|
+
Providers, and Integrations. Keep it free of routing logic.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, Field
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ComplexityLevel(str, Enum):
|
|
17
|
+
"""Task complexity tier used by complexity-based routing policies."""
|
|
18
|
+
|
|
19
|
+
TRIVIAL = "trivial"
|
|
20
|
+
SIMPLE = "simple"
|
|
21
|
+
MODERATE = "moderate"
|
|
22
|
+
COMPLEX = "complex"
|
|
23
|
+
EXPERT = "expert"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ModelCandidate(BaseModel):
|
|
27
|
+
"""A model that may be selected by the router."""
|
|
28
|
+
|
|
29
|
+
model_id: str = Field(..., description="LiteLLM-compatible model identifier, e.g. gpt-4o")
|
|
30
|
+
provider: str = Field(..., description="Provider name, e.g. openai, anthropic")
|
|
31
|
+
input_cost_per_1k: float | None = Field(
|
|
32
|
+
default=None, description="Optional input token cost (USD per 1K tokens)"
|
|
33
|
+
)
|
|
34
|
+
output_cost_per_1k: float | None = Field(
|
|
35
|
+
default=None, description="Optional output token cost (USD per 1K tokens)"
|
|
36
|
+
)
|
|
37
|
+
max_context_tokens: int | None = Field(default=None)
|
|
38
|
+
capabilities: list[str] = Field(default_factory=list)
|
|
39
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class RouteContext(BaseModel):
|
|
43
|
+
"""Input context passed to policies and the router for a single routing decision."""
|
|
44
|
+
|
|
45
|
+
messages: list[dict[str, Any]] = Field(default_factory=list)
|
|
46
|
+
prompt: str | None = None
|
|
47
|
+
complexity: ComplexityLevel | None = None
|
|
48
|
+
estimated_input_tokens: int | None = None
|
|
49
|
+
budget_usd: float | None = None
|
|
50
|
+
preferred_providers: list[str] = Field(default_factory=list)
|
|
51
|
+
excluded_models: list[str] = Field(default_factory=list)
|
|
52
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class RouteDecision(BaseModel):
|
|
56
|
+
"""Output of the routing pipeline: which model to use and why."""
|
|
57
|
+
|
|
58
|
+
selected_model: str
|
|
59
|
+
provider: str
|
|
60
|
+
reason: str
|
|
61
|
+
policy_name: str
|
|
62
|
+
complexity: ComplexityLevel | None = None
|
|
63
|
+
estimated_cost_usd: float | None = None
|
|
64
|
+
candidates_considered: list[str] = Field(default_factory=list)
|
|
65
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class RouterConfig(BaseModel):
|
|
69
|
+
"""Top-level configuration for Router and AutoLLM."""
|
|
70
|
+
|
|
71
|
+
default_models: list[ModelCandidate] = Field(default_factory=list)
|
|
72
|
+
enable_complexity_routing: bool = True
|
|
73
|
+
enable_cost_routing: bool = True
|
|
74
|
+
fallback_model: str | None = None
|
|
75
|
+
litellm_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
76
|
+
policy_order: list[str] = Field(
|
|
77
|
+
default_factory=lambda: ["complexity", "cost"],
|
|
78
|
+
description="Ordered list of policy names to apply",
|
|
79
|
+
)
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: runtimerouter
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Optimize the entire AI session, not just the next model call.
|
|
5
|
+
Project-URL: Homepage, https://github.com/chenyu-dev25/RuntimeRouter
|
|
6
|
+
Project-URL: Documentation, https://github.com/chenyu-dev25/RuntimeRouter#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/chenyu-dev25/RuntimeRouter
|
|
8
|
+
Project-URL: Issues, https://github.com/chenyu-dev25/RuntimeRouter/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/chenyu-dev25/RuntimeRouter/blob/main/ROADMAP.md
|
|
10
|
+
Author: RuntimeRouter Contributors
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: agent,langgraph,litellm,llm,model-selection,pydantic-ai,routing
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Requires-Dist: litellm>=1.40.0
|
|
27
|
+
Requires-Dist: pydantic>=2.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
33
|
+
Provides-Extra: docs
|
|
34
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
35
|
+
Requires-Dist: mkdocs>=1.6; extra == 'docs'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# RuntimeRouter
|
|
39
|
+
|
|
40
|
+
> **Optimize the entire AI session, not just the next model call.**
|
|
41
|
+
|
|
42
|
+
[](https://www.python.org/downloads/)
|
|
43
|
+
[](LICENSE)
|
|
44
|
+
[](https://pypi.org/project/runtimerouter/)
|
|
45
|
+
|
|
46
|
+
RuntimeRouter is a **Python-first LLM routing library** for the AI ecosystem. It sits **above** agent frameworks — LangGraph, PydanticAI, CrewAI, AutoGen — as a **Runtime Router**, not a replacement for them.
|
|
47
|
+
|
|
48
|
+
You write agents with your favorite framework. RuntimeRouter decides **which model** to call, **when**, and **why** — across the full session lifecycle.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Vision
|
|
53
|
+
|
|
54
|
+
Traditional routers optimize a single inference call: given a prompt, pick the cheapest or fastest model.
|
|
55
|
+
|
|
56
|
+
RuntimeRouter's long-term goal is different: **optimize the entire AI session** — model selection, cost, latency, context, caching, and privacy — as a unified runtime decision layer.
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
┌─────────────────────────────────────────────────────────┐
|
|
60
|
+
│ Your Agent Framework (LangGraph / PydanticAI / …) │
|
|
61
|
+
├─────────────────────────────────────────────────────────┤
|
|
62
|
+
│ RuntimeRouter ← session-aware routing (this library) │
|
|
63
|
+
├─────────────────────────────────────────────────────────┤
|
|
64
|
+
│ LiteLLM / Provider APIs (OpenAI, Anthropic, …) │
|
|
65
|
+
└─────────────────────────────────────────────────────────┘
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Why RuntimeRouter?
|
|
71
|
+
|
|
72
|
+
| Problem | RuntimeRouter approach |
|
|
73
|
+
|---------|------------------------|
|
|
74
|
+
| Hard-coding `model="gpt-4o"` everywhere | `AutoLLM()` picks the right model per request |
|
|
75
|
+
| Simple tasks burning frontier-model budget | Complexity routing sends easy tasks to cheap models |
|
|
76
|
+
| No visibility into routing decisions | Every call returns a `RouteDecision` with reason |
|
|
77
|
+
| Framework lock-in | Framework-agnostic; works with any LiteLLM-compatible stack |
|
|
78
|
+
| Proxy-only routers | Designed for **session-level** optimization (roadmap) |
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## vs OpenRouter
|
|
83
|
+
|
|
84
|
+
| | **OpenRouter** | **RuntimeRouter** |
|
|
85
|
+
|---|----------------|-------------------|
|
|
86
|
+
| **What it is** | Unified API proxy to 100+ models | Python routing **library** embedded in your app |
|
|
87
|
+
| **Scope** | Single HTTP request → model | Entire AI **session** (roadmap) |
|
|
88
|
+
| **Integration** | Replace your API base URL | Drop-in `AutoLLM()` or `Router` in Python code |
|
|
89
|
+
| **Policies** | Provider-side routing rules | Pluggable Python **Policy** classes you own |
|
|
90
|
+
| **Framework** | Language-agnostic HTTP | **Python-first**, native LangGraph/PydanticAI hooks (v0.2+) |
|
|
91
|
+
|
|
92
|
+
OpenRouter is excellent as a model gateway. RuntimeRouter is a **runtime decision engine** you control in-process.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## vs Not Diamond
|
|
97
|
+
|
|
98
|
+
| | **Not Diamond** | **RuntimeRouter** |
|
|
99
|
+
|---|-----------------|-------------------|
|
|
100
|
+
| **What it is** | Managed routing SaaS | Open-source Python library |
|
|
101
|
+
| **Deployment** | Cloud API | In-process, self-hosted |
|
|
102
|
+
| **Customization** | Platform-configured | Full **plugin Policy** architecture |
|
|
103
|
+
| **Session scope** | Per-call model selection | Session-aware optimization (roadmap) |
|
|
104
|
+
| **Cost** | SaaS pricing | Free & open (MIT) |
|
|
105
|
+
|
|
106
|
+
Not Diamond provides intelligent routing as a service. RuntimeRouter gives you the same **concept** as extensible, auditable Python code.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Quick Start
|
|
111
|
+
|
|
112
|
+
### Install
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip install runtimerouter
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Basic usage
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from runtimerouter import AutoLLM
|
|
122
|
+
|
|
123
|
+
llm = AutoLLM()
|
|
124
|
+
|
|
125
|
+
# RuntimeRouter automatically selects Claude, Gemini, DeepSeek, OpenAI, etc.
|
|
126
|
+
response = llm.invoke("帮我分析整个代码仓库")
|
|
127
|
+
|
|
128
|
+
print(response.choices[0].message.content)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Inspect routing decisions
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
decision = llm.route_only("Summarize this paragraph in one sentence.")
|
|
135
|
+
print(decision.selected_model) # e.g. "gpt-4o-mini"
|
|
136
|
+
print(decision.reason) # why this model was chosen
|
|
137
|
+
print(decision.complexity) # e.g. ComplexityLevel.SIMPLE
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Configure policies
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
from runtimerouter import AutoLLM, RouterConfig
|
|
144
|
+
|
|
145
|
+
config = RouterConfig(
|
|
146
|
+
enable_complexity_routing=True,
|
|
147
|
+
enable_cost_routing=True,
|
|
148
|
+
fallback_model="gpt-4o-mini",
|
|
149
|
+
policy_order=["complexity", "cost"],
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
llm = AutoLLM(config=config)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Use Router directly (without invoking)
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from runtimerouter import Router, RouteContext
|
|
159
|
+
|
|
160
|
+
router = Router()
|
|
161
|
+
decision = router.route(RouteContext(prompt="Explain quantum entanglement"))
|
|
162
|
+
print(decision.selected_model)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Architecture
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
runtimerouter/
|
|
171
|
+
├── autollm.py # User-facing entry point
|
|
172
|
+
├── router.py # Routing pipeline orchestrator
|
|
173
|
+
├── classifier.py # Task complexity & token estimation
|
|
174
|
+
├── types.py # Shared data contracts (Pydantic models)
|
|
175
|
+
├── config.py # Config loading utilities
|
|
176
|
+
├── policies/ # Pluggable routing policies
|
|
177
|
+
│ ├── base.py # RoutingPolicy ABC
|
|
178
|
+
│ ├── complexity.py # Complexity-based routing
|
|
179
|
+
│ └── cost.py # Cost-based routing
|
|
180
|
+
├── providers/ # Model catalog & provider metadata
|
|
181
|
+
│ ├── base.py # ModelProvider ABC
|
|
182
|
+
│ └── registry.py # ModelRegistry (default model catalog)
|
|
183
|
+
└── integrations/ # External execution layers
|
|
184
|
+
└── litellm.py # LiteLLM completion wrapper
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
**Routing pipeline (v0.1):**
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
User prompt
|
|
191
|
+
│
|
|
192
|
+
▼
|
|
193
|
+
TaskClassifier.enrich() ← complexity, token estimate
|
|
194
|
+
│
|
|
195
|
+
▼
|
|
196
|
+
Policy chain (ordered) ← complexity → cost
|
|
197
|
+
│
|
|
198
|
+
▼
|
|
199
|
+
RouteDecision ← selected model + reason
|
|
200
|
+
│
|
|
201
|
+
▼
|
|
202
|
+
LiteLLMIntegration ← execute completion
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
See [docs/architecture.md](docs/architecture.md) for module boundaries and extension points.
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## Roadmap
|
|
210
|
+
|
|
211
|
+
| Version | Focus |
|
|
212
|
+
|---------|-------|
|
|
213
|
+
| **v0.1** *(current)* | Auto Model Selection, Complexity Routing, Cost Routing, LiteLLM Integration |
|
|
214
|
+
| **v0.2** | LangGraph Integration, PydanticAI Integration |
|
|
215
|
+
| **v0.3** | Cache-aware Routing |
|
|
216
|
+
| **v0.4** | Context-aware Routing |
|
|
217
|
+
| **v0.5** | Session-aware Optimization |
|
|
218
|
+
|
|
219
|
+
Full details: [ROADMAP.md](ROADMAP.md)
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Contributing
|
|
224
|
+
|
|
225
|
+
We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
git clone https://github.com/chenyu-dev25/RuntimeRouter.git
|
|
229
|
+
cd RuntimeRouter
|
|
230
|
+
python -m venv .venv && source .venv/bin/activate
|
|
231
|
+
pip install -e ".[dev]"
|
|
232
|
+
pytest
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## License
|
|
238
|
+
|
|
239
|
+
MIT — see [LICENSE](LICENSE).
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
<p align="center">
|
|
244
|
+
<strong>RuntimeRouter</strong><br>
|
|
245
|
+
Optimize the entire AI session, not just the next model call.
|
|
246
|
+
</p>
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
runtimerouter/__init__.py,sha256=mGOpANV1MH0-KRG10B0jUdQROyHjx-cIal-JZ7VAPyo,545
|
|
2
|
+
runtimerouter/autollm.py,sha256=Fv-AxAt-GpgvexszBiiltEtX-HkRoDKSGe8ujFFsNLg,3154
|
|
3
|
+
runtimerouter/classifier.py,sha256=LPwdQrPcZP3B3HWk5ARUz4xZxAP9QsbexI_AhsZksq0,2034
|
|
4
|
+
runtimerouter/config.py,sha256=td2l_W_tCOISqvvigIZM0M1S4p9bPKnFR-4Z0I3lr2c,1144
|
|
5
|
+
runtimerouter/exceptions.py,sha256=IwO_mUGI0OvKfxz5wugMLXucxHtDtWnzBH3hx2ILWzI,681
|
|
6
|
+
runtimerouter/router.py,sha256=mtHomaXCAAYKwM5W2dSJmHbFygjkHKGimUFGaA33frM,3886
|
|
7
|
+
runtimerouter/types.py,sha256=mgQElmYYphVqYjD_uiOvbQf3VL03fJ03VpJxCvFy3yQ,2703
|
|
8
|
+
runtimerouter/integrations/__init__.py,sha256=ZU5vk85mWFDL8jd1ARMFvPRftU-sOtHaH4zpFqDhJi8,265
|
|
9
|
+
runtimerouter/integrations/litellm.py,sha256=6bwVEMFfd0CERK9ufAVCB_mHlL_0540SisVngdXIXW4,2450
|
|
10
|
+
runtimerouter/policies/__init__.py,sha256=x9SzZMOJmxteCG30L1ZrKkV1TtNE7SC-EnnPsnoEHlE,509
|
|
11
|
+
runtimerouter/policies/base.py,sha256=mvaYdeH9PgdAXmM3mbe8fh17iYImDWsImPCGkKWfibI,1081
|
|
12
|
+
runtimerouter/policies/complexity.py,sha256=LChQAGjGO8m96RroQ_sS81Ecsv-mGLzbEVcYW9UyfPA,1649
|
|
13
|
+
runtimerouter/policies/cost.py,sha256=UCO4iRy87j4KxJ4dOjtGpYbZiX66usAa8qLDVzHgdro,1206
|
|
14
|
+
runtimerouter/providers/__init__.py,sha256=-3Nq-65J_hED-DXudMUEA17ym37owdmhjy0Rz1Kz7jA,423
|
|
15
|
+
runtimerouter/providers/base.py,sha256=Qi6fBInrImIDWpvoZ_HApLtrGmQr9plm3ZRX0LJpaZ8,723
|
|
16
|
+
runtimerouter/providers/registry.py,sha256=7Hk5rOHMby1FwUuTv4WvCrw2lVPoSpLi45jyakzIUkY,2283
|
|
17
|
+
runtimerouter-0.1.0.dist-info/METADATA,sha256=NPC3uHTnp0fLQJmKLbWMfUsGTkV5xIptSBgNXq3u44k,8774
|
|
18
|
+
runtimerouter-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
19
|
+
runtimerouter-0.1.0.dist-info/licenses/LICENSE,sha256=lC8UQNq3Ya2RkrlXf3o7xV8FMW4ucNc_HEdekVvkcxs,1083
|
|
20
|
+
runtimerouter-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 RuntimeRouter Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|