route67 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_router/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Public package interface for route67."""
2
+
3
+ from .config import ModelSpec, RouterConfig, RoutingTableEntry
4
+ from .controller import Controller
5
+
6
+ __all__ = ["Controller", "ModelSpec", "RouterConfig", "RoutingTableEntry"]
7
+ __version__ = "0.1.0"
8
+
llm_router/config.py ADDED
@@ -0,0 +1,61 @@
1
+ """Configuration models for the router."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Literal
7
+
8
+ ModelTarget = Literal["weak_model", "strong_model"]
9
+ MODEL_TARGETS = frozenset({"weak_model", "strong_model"})
10
+
11
+
12
+ @dataclass(frozen=True, slots=True)
13
+ class RoutingTableEntry:
14
+ query: str
15
+ target: ModelTarget
16
+ notes: str | None = None
17
+
18
+ def __post_init__(self) -> None:
19
+ if self.target not in MODEL_TARGETS:
20
+ raise ValueError(
21
+ "routing target must be 'weak_model' or 'strong_model'"
22
+ )
23
+
24
+
25
+ @dataclass(frozen=True, slots=True)
26
+ class ModelSpec:
27
+ name: str
28
+ usage_notes: str | None = None
29
+
30
+
31
+ @dataclass(slots=True)
32
+ class RouterConfig:
33
+ routing_table: list[RoutingTableEntry] = field(default_factory=list)
34
+ similarity_threshold: float = 0.75
35
+ weak_model: ModelSpec | None = None
36
+ strong_model: ModelSpec | None = None
37
+ embedding_cache_path: str | None = None
38
+ log_path: str | None = None
39
+ escalation_max_tokens: int = 10
40
+ embedding_model: str = "minishlab/potion-base-8M"
41
+
42
+ def __post_init__(self) -> None:
43
+ if not -1.0 <= self.similarity_threshold <= 1.0:
44
+ raise ValueError("similarity_threshold must be between -1.0 and 1.0")
45
+ if self.weak_model is None:
46
+ raise ValueError("weak_model is required")
47
+ if self.strong_model is None:
48
+ raise ValueError("strong_model is required")
49
+ if self.escalation_max_tokens < 1:
50
+ raise ValueError("escalation_max_tokens must be at least 1")
51
+
52
+ def resolve_target(self, target: ModelTarget) -> ModelSpec:
53
+ if target == "weak_model":
54
+ if self.weak_model is None:
55
+ raise RuntimeError("weak_model is not configured")
56
+ return self.weak_model
57
+ if target == "strong_model":
58
+ if self.strong_model is None:
59
+ raise RuntimeError("strong_model is not configured")
60
+ return self.strong_model
61
+ raise ValueError("routing target must be 'weak_model' or 'strong_model'")
@@ -0,0 +1,101 @@
1
+ """OpenAI-compatible public controller."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from .config import RouterConfig
8
+ from .embedder import Embedder
9
+ from .escalation import run_with_escalation
10
+ from .logging_utils import RoutingDecision, log_decision
11
+ from .routing_table import RoutingTable
12
+
13
+
14
+ class Controller:
15
+ def __init__(
16
+ self,
17
+ config: RouterConfig,
18
+ openai_client: Any | None = None,
19
+ embedder: Embedder | None = None,
20
+ ) -> None:
21
+ self.config = config
22
+ self.client = openai_client or _default_openai_client()
23
+ self.table = RoutingTable(
24
+ config.routing_table,
25
+ embedder or Embedder(config.embedding_model),
26
+ config.embedding_cache_path,
27
+ )
28
+ self.chat = _ChatProxy(self)
29
+
30
+ def chat_completions_create(self, **kwargs: Any) -> Any:
31
+ if kwargs.get("stream"):
32
+ raise NotImplementedError("Public streaming is not supported in route67 v1")
33
+ messages = kwargs.get("messages")
34
+ if not isinstance(messages, list):
35
+ raise TypeError("messages must be provided as a list")
36
+
37
+ query = extract_user_query(messages)
38
+ entry, score = self.table.best_match(query)
39
+ forwarded = {key: value for key, value in kwargs.items() if key != "model"}
40
+
41
+ if entry is not None and score >= self.config.similarity_threshold:
42
+ selected_model = self.config.resolve_target(entry.target)
43
+ response = self.client.chat.completions.create(
44
+ model=selected_model.name,
45
+ **forwarded,
46
+ )
47
+ decision = RoutingDecision("table_match", selected_model.name, score)
48
+ else:
49
+ result = run_with_escalation(
50
+ self.client,
51
+ self.config,
52
+ messages,
53
+ request_kwargs=forwarded,
54
+ )
55
+ response = result.response
56
+ decision = RoutingDecision(
57
+ "escalated" if result.escalated else "weak_model_direct",
58
+ result.used_model,
59
+ score,
60
+ )
61
+
62
+ log_decision(self.config.log_path, query, decision)
63
+ return response
64
+
65
+
66
+ def extract_user_query(messages: list[dict[str, Any]]) -> str:
67
+ for message in reversed(messages):
68
+ if message.get("role") != "user":
69
+ continue
70
+ content = message.get("content", "")
71
+ if isinstance(content, str):
72
+ return content
73
+ if isinstance(content, list):
74
+ return " ".join(
75
+ part.get("text", "")
76
+ for part in content
77
+ if isinstance(part, dict) and part.get("type") == "text"
78
+ )
79
+ return str(content)
80
+ raise ValueError("messages must contain at least one user message")
81
+
82
+
83
+ def _default_openai_client() -> Any:
84
+ try:
85
+ from openai import OpenAI
86
+ except ImportError as exc:
87
+ raise ImportError("openai is required when openai_client is not supplied") from exc
88
+ return OpenAI()
89
+
90
+
91
+ class _ChatProxy:
92
+ def __init__(self, controller: Controller) -> None:
93
+ self.completions = _CompletionsProxy(controller)
94
+
95
+
96
+ class _CompletionsProxy:
97
+ def __init__(self, controller: Controller) -> None:
98
+ self._controller = controller
99
+
100
+ def create(self, **kwargs: Any) -> Any:
101
+ return self._controller.chat_completions_create(**kwargs)
llm_router/embedder.py ADDED
@@ -0,0 +1,42 @@
1
+ """Small, lazy-loading Model2Vec embedder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Sequence
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+
10
+
11
+ class Embedder:
12
+ def __init__(
13
+ self,
14
+ model_name: str = "minishlab/potion-base-8M",
15
+ model: Any | None = None,
16
+ ) -> None:
17
+ self.model_name = model_name
18
+ self._model = model
19
+
20
+ @property
21
+ def model(self) -> Any:
22
+ if self._model is None:
23
+ try:
24
+ from model2vec import StaticModel
25
+ except ImportError as exc:
26
+ raise ImportError(
27
+ "model2vec is required to compute embeddings; install route67"
28
+ ) from exc
29
+ self._model = StaticModel.from_pretrained(self.model_name)
30
+ return self._model
31
+
32
+ def encode(self, texts: Sequence[str]) -> np.ndarray:
33
+ if isinstance(texts, str):
34
+ raise TypeError("encode expects a sequence of strings; use encode_one for one string")
35
+ vectors = np.asarray(self.model.encode(list(texts)), dtype=np.float32)
36
+ if vectors.ndim != 2:
37
+ raise ValueError("embedder returned an array with an unexpected shape")
38
+ return vectors
39
+
40
+ def encode_one(self, text: str) -> np.ndarray:
41
+ return self.encode([text])[0]
42
+
@@ -0,0 +1,248 @@
1
+ """Sequential weak-to-strong escalation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from dataclasses import dataclass
7
+ from types import SimpleNamespace
8
+ from typing import Any
9
+
10
+ from .config import RouterConfig
11
+ from .prompts import build_escalation_prompt
12
+
13
+ SENTINEL = "ESCALATE"
14
+
15
+
16
+ @dataclass(frozen=True, slots=True)
17
+ class EscalationResult:
18
+ used_model: str
19
+ response: Any
20
+ escalated: bool
21
+
22
+
23
+ def run_with_escalation(
24
+ client: Any,
25
+ config: RouterConfig,
26
+ messages: list[dict[str, Any]],
27
+ request_kwargs: dict[str, Any] | None = None,
28
+ ) -> EscalationResult:
29
+ request_kwargs = dict(request_kwargs or {})
30
+ request_kwargs.pop("model", None)
31
+ request_kwargs.pop("messages", None)
32
+ request_kwargs.pop("stream", None)
33
+
34
+ prompt = build_escalation_prompt(
35
+ config.weak_model.usage_notes,
36
+ config.strong_model,
37
+ config.routing_table,
38
+ )
39
+ weak_messages = [{"role": "system", "content": prompt}, *messages]
40
+ weak_stream = client.chat.completions.create(
41
+ model=config.weak_model.name,
42
+ messages=weak_messages,
43
+ stream=True,
44
+ **request_kwargs,
45
+ )
46
+
47
+ chunks: list[Any] = []
48
+ preview = ""
49
+ decision_made = False
50
+ try:
51
+ for chunk in weak_stream:
52
+ chunks.append(chunk)
53
+ preview += _chunk_text(chunk)
54
+ if not decision_made and _decision_boundary_reached(
55
+ preview, config.escalation_max_tokens
56
+ ):
57
+ decision_made = True
58
+ if _is_escalation(preview):
59
+ _close_stream(weak_stream)
60
+ strong_response = client.chat.completions.create(
61
+ model=config.strong_model.name,
62
+ messages=messages,
63
+ stream=False,
64
+ **request_kwargs,
65
+ )
66
+ return EscalationResult(
67
+ used_model=config.strong_model.name,
68
+ response=strong_response,
69
+ escalated=True,
70
+ )
71
+ finally:
72
+ _close_stream(weak_stream)
73
+
74
+ if _is_escalation(preview):
75
+ strong_response = client.chat.completions.create(
76
+ model=config.strong_model.name,
77
+ messages=messages,
78
+ stream=False,
79
+ **request_kwargs,
80
+ )
81
+ return EscalationResult(config.strong_model.name, strong_response, True)
82
+
83
+ response = _assemble_chat_completion(chunks, config.weak_model.name)
84
+ return EscalationResult(config.weak_model.name, response, False)
85
+
86
+
87
+ def _decision_boundary_reached(text: str, max_tokens: int) -> bool:
88
+ stripped = text.lstrip()
89
+ if "\n" in stripped or "\r" in stripped:
90
+ return True
91
+ if stripped.lower().startswith(SENTINEL.lower()) and len(stripped) >= len(SENTINEL):
92
+ return True
93
+ if stripped and not SENTINEL.lower().startswith(stripped.lower()):
94
+ return True
95
+ return len(stripped.split()) >= max_tokens
96
+
97
+
98
+ def _is_escalation(text: str) -> bool:
99
+ return text.lstrip().lower().startswith(SENTINEL.lower())
100
+
101
+
102
+ def _chunk_text(chunk: Any) -> str:
103
+ choices = _get(chunk, "choices", []) or []
104
+ if not choices:
105
+ return ""
106
+ delta = _get(choices[0], "delta")
107
+ return _get(delta, "content", "") or ""
108
+
109
+
110
+ def _close_stream(stream: Any) -> None:
111
+ close = getattr(stream, "close", None)
112
+ if callable(close):
113
+ close()
114
+
115
+
116
+ def _assemble_chat_completion(chunks: list[Any], model: str) -> Any:
117
+ message: dict[str, Any] = {"role": "assistant", "content": ""}
118
+ response_extensions: dict[str, Any] = {}
119
+ choice_extensions: dict[str, Any] = {}
120
+ finish_reason = "stop"
121
+ completion_id = "route67-weak"
122
+ created = int(time.time())
123
+ system_fingerprint = None
124
+ usage = None
125
+
126
+ for chunk in chunks:
127
+ chunk_payload = _dump(chunk)
128
+ if isinstance(chunk_payload, dict):
129
+ for key, value in chunk_payload.items():
130
+ if key not in {
131
+ "id",
132
+ "object",
133
+ "created",
134
+ "model",
135
+ "choices",
136
+ "usage",
137
+ "system_fingerprint",
138
+ } and value is not None:
139
+ response_extensions[key] = value
140
+ completion_id = _get(chunk, "id", completion_id) or completion_id
141
+ created = _get(chunk, "created", created) or created
142
+ system_fingerprint = _get(chunk, "system_fingerprint", system_fingerprint)
143
+ usage = _get(chunk, "usage", usage)
144
+ choices = _get(chunk, "choices", []) or []
145
+ if not choices:
146
+ continue
147
+ choice = choices[0]
148
+ delta = _get(choice, "delta")
149
+ delta_payload = _dump(delta)
150
+ if isinstance(delta_payload, dict):
151
+ message = _merge_stream_value(message, delta_payload)
152
+ choice_payload = _dump(choice)
153
+ if isinstance(choice_payload, dict):
154
+ for key, value in choice_payload.items():
155
+ if key not in {"index", "delta", "finish_reason"} and value is not None:
156
+ choice_extensions[key] = value
157
+ finish_reason = _get(choice, "finish_reason", finish_reason) or finish_reason
158
+
159
+ payload = {
160
+ "id": completion_id,
161
+ "object": "chat.completion",
162
+ "created": created,
163
+ "model": model,
164
+ "choices": [
165
+ {
166
+ "index": 0,
167
+ "message": message,
168
+ "finish_reason": finish_reason,
169
+ "logprobs": None,
170
+ **choice_extensions,
171
+ }
172
+ ],
173
+ "usage": _dump(usage) if usage is not None else None,
174
+ "system_fingerprint": system_fingerprint,
175
+ **response_extensions,
176
+ }
177
+ try:
178
+ from openai.types.chat import ChatCompletion
179
+
180
+ return ChatCompletion.model_validate(payload)
181
+ except (ImportError, TypeError, ValueError):
182
+ # Compatible providers can add values before the OpenAI SDK schema knows
183
+ # about them. Preserve the response shape instead of rejecting the data.
184
+ return _namespace(payload)
185
+
186
+
187
+ def _get(value: Any, name: str, default: Any = None) -> Any:
188
+ if isinstance(value, dict):
189
+ return value.get(name, default)
190
+ return getattr(value, name, default)
191
+
192
+
193
+ def _dump(value: Any) -> Any:
194
+ if isinstance(value, dict):
195
+ return value
196
+ model_dump = getattr(value, "model_dump", None)
197
+ if callable(model_dump):
198
+ return model_dump(exclude_none=True)
199
+ attributes = getattr(value, "__dict__", None)
200
+ return dict(attributes) if isinstance(attributes, dict) else value
201
+
202
+
203
+ def _merge_stream_value(current: Any, incoming: Any, key: str | None = None) -> Any:
204
+ """Merge streamed OpenAI-format deltas without dropping provider extensions."""
205
+ if incoming is None:
206
+ return current
207
+ if current is None:
208
+ return incoming
209
+ if isinstance(current, dict) and isinstance(incoming, dict):
210
+ merged = dict(current)
211
+ for child_key, value in incoming.items():
212
+ merged[child_key] = _merge_stream_value(
213
+ merged.get(child_key), value, child_key
214
+ )
215
+ return merged
216
+ if isinstance(current, list) and isinstance(incoming, list):
217
+ return _merge_stream_lists(current, incoming)
218
+ if isinstance(current, str) and isinstance(incoming, str):
219
+ if key in {"role", "name"}:
220
+ return incoming
221
+ return current + incoming
222
+ return incoming
223
+
224
+
225
+ def _merge_stream_lists(current: list[Any], incoming: list[Any]) -> list[Any]:
226
+ merged = list(current)
227
+ positions = {
228
+ item.get("index"): position
229
+ for position, item in enumerate(merged)
230
+ if isinstance(item, dict) and item.get("index") is not None
231
+ }
232
+ for item in incoming:
233
+ if isinstance(item, dict) and item.get("index") in positions:
234
+ position = positions[item["index"]]
235
+ merged[position] = _merge_stream_value(merged[position], item)
236
+ else:
237
+ merged.append(item)
238
+ if isinstance(item, dict) and item.get("index") is not None:
239
+ positions[item["index"]] = len(merged) - 1
240
+ return merged
241
+
242
+
243
+ def _namespace(value: Any) -> Any:
244
+ if isinstance(value, dict):
245
+ return SimpleNamespace(**{key: _namespace(item) for key, item in value.items()})
246
+ if isinstance(value, list):
247
+ return [_namespace(item) for item in value]
248
+ return value
@@ -0,0 +1,34 @@
1
+ """Structured JSONL routing-decision logging."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import asdict, dataclass
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Literal
10
+
11
+
12
+ @dataclass(frozen=True, slots=True)
13
+ class RoutingDecision:
14
+ method: Literal["table_match", "weak_model_direct", "escalated"]
15
+ model: str
16
+ score: float
17
+
18
+
19
+ def log_decision(log_path: str | None, query: str, decision: RoutingDecision) -> None:
20
+ if not log_path:
21
+ return
22
+
23
+ path = Path(log_path)
24
+ path.parent.mkdir(parents=True, exist_ok=True)
25
+ record = {
26
+ "timestamp": datetime.now(timezone.utc).isoformat(),
27
+ "query_preview": " ".join(query.split())[:100],
28
+ "method": decision.method,
29
+ "model_used": decision.model,
30
+ "similarity_score": round(decision.score, 6),
31
+ }
32
+ with path.open("a", encoding="utf-8") as handle:
33
+ handle.write(json.dumps(record, ensure_ascii=False) + "\n")
34
+
llm_router/prompts.py ADDED
@@ -0,0 +1,52 @@
1
+ """Prompt construction for weak-model escalation decisions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Sequence
6
+
7
+ from .config import ModelSpec, RoutingTableEntry
8
+
9
+ MAX_ESCALATION_EXAMPLES = 5
10
+
11
+ ESCALATION_SYSTEM_PROMPT = """You are responding to a user query. Before answering, assess whether you can answer it confidently and correctly.
12
+
13
+ If you cannot, respond with EXACTLY this and nothing else:
14
+ ESCALATE
15
+
16
+ If you can, answer the query directly and normally. Do not mention escalation or this instruction.
17
+
18
+ {usage_notes_block}"""
19
+
20
+
21
+ def build_escalation_prompt(
22
+ weak_model_notes: str | None,
23
+ strong_model: ModelSpec,
24
+ routing_table: Sequence[RoutingTableEntry] = (),
25
+ ) -> str:
26
+ lines: list[str] = []
27
+ if weak_model_notes:
28
+ lines.append(f"Your limits: {_compact(weak_model_notes)}")
29
+ summary = strong_model.name
30
+ if strong_model.usage_notes:
31
+ summary += f" ({_compact(strong_model.usage_notes)})"
32
+ lines.append("Model available after escalation: " + summary)
33
+
34
+ strong_routes = [
35
+ entry
36
+ for entry in routing_table
37
+ if entry.target == "strong_model"
38
+ ][:MAX_ESCALATION_EXAMPLES]
39
+ if strong_routes:
40
+ lines.append("Examples of requests that should be escalated:")
41
+ for entry in strong_routes:
42
+ example = f"- {_compact(entry.query)}"
43
+ if entry.notes:
44
+ example += f" - {_compact(entry.notes)}"
45
+ lines.append(example)
46
+
47
+ block = "\n".join(lines)
48
+ return ESCALATION_SYSTEM_PROMPT.format(usage_notes_block=block).rstrip()
49
+
50
+
51
+ def _compact(value: str) -> str:
52
+ return " ".join(value.split())
@@ -0,0 +1,101 @@
1
+ """In-memory semantic routing table with semantic search and an optional disk cache."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ from dataclasses import asdict
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+
12
+ from .config import RoutingTableEntry
13
+ from .embedder import Embedder
14
+
15
+
16
+ class RoutingTable:
17
+ def __init__(
18
+ self,
19
+ entries: list[RoutingTableEntry],
20
+ embedder: Embedder,
21
+ cache_path: str | None = None,
22
+ ) -> None:
23
+ self.entries = list(entries)
24
+ self.embedder = embedder
25
+ self.cache_path = cache_path
26
+ self.embeddings = np.empty((0, 0), dtype=np.float32)
27
+ self.load_or_build()
28
+
29
+ def load_or_build(self) -> None:
30
+ if not self.entries:
31
+ return
32
+
33
+ table_hash = self._table_hash()
34
+ vector_path, metadata_path = self._cache_paths()
35
+ if vector_path and metadata_path and vector_path.exists() and metadata_path.exists():
36
+ try:
37
+ metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
38
+ if (
39
+ metadata.get("table_hash") == table_hash
40
+ and metadata.get("embedding_model") == self.embedder.model_name
41
+ ):
42
+ cached = np.load(vector_path, allow_pickle=False)
43
+ if cached.ndim == 2 and cached.shape[0] == len(self.entries):
44
+ self.embeddings = cached.astype(np.float32, copy=False)
45
+ return
46
+ except (OSError, ValueError, json.JSONDecodeError):
47
+ pass
48
+
49
+ vectors = self.embedder.encode([entry.query for entry in self.entries])
50
+ self.embeddings = _normalize_rows(vectors)
51
+
52
+ if vector_path and metadata_path:
53
+ vector_path.parent.mkdir(parents=True, exist_ok=True)
54
+ np.save(vector_path, self.embeddings, allow_pickle=False)
55
+ metadata_path.write_text(
56
+ json.dumps(
57
+ {
58
+ "table_hash": table_hash,
59
+ "embedding_model": self.embedder.model_name,
60
+ "entries": [asdict(entry) for entry in self.entries],
61
+ },
62
+ ensure_ascii=False,
63
+ indent=2,
64
+ ),
65
+ encoding="utf-8",
66
+ )
67
+
68
+ def best_match(self, query: str) -> tuple[RoutingTableEntry | None, float]:
69
+ if not self.entries:
70
+ return None, 0.0
71
+
72
+ query_vector = _normalize_rows(self.embedder.encode([query]))[0]
73
+ scores = self.embeddings @ query_vector
74
+ best_index = int(np.argmax(scores))
75
+ return self.entries[best_index], float(scores[best_index])
76
+
77
+ def _table_hash(self) -> str:
78
+ payload = json.dumps(
79
+ [asdict(entry) for entry in self.entries],
80
+ sort_keys=True,
81
+ ensure_ascii=False,
82
+ separators=(",", ":"),
83
+ )
84
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
85
+
86
+ def _cache_paths(self) -> tuple[Path | None, Path | None]:
87
+ if not self.cache_path:
88
+ return None, None
89
+ base = Path(self.cache_path)
90
+ if base.suffix in {".npy", ".json"}:
91
+ base = base.with_suffix("")
92
+ return base.with_suffix(".npy"), base.with_suffix(".json")
93
+
94
+
95
+ def _normalize_rows(vectors: np.ndarray) -> np.ndarray:
96
+ vectors = np.asarray(vectors, dtype=np.float32)
97
+ if vectors.ndim != 2:
98
+ raise ValueError("embeddings must be a two-dimensional array")
99
+ norms = np.linalg.norm(vectors, axis=1, keepdims=True)
100
+ return np.divide(vectors, norms, out=np.zeros_like(vectors), where=norms != 0)
101
+
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: route67
3
+ Version: 0.1.0
4
+ Summary: A semantic LLM router for OpenAI-compatible chat completions.
5
+ Project-URL: Homepage, https://github.com/SmallChungus1/route67
6
+ Project-URL: Repository, https://github.com/SmallChungus1/route67
7
+ Project-URL: Issues, https://github.com/SmallChungus1/route67/issues
8
+ Author: route67 contributors
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: llm,openai,router,semantic-routing
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: model2vec<1,>=0.6
24
+ Requires-Dist: numpy>=1.24
25
+ Requires-Dist: openai<3,>=1.0
26
+ Provides-Extra: test
27
+ Requires-Dist: pytest>=8; extra == 'test'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # route67
31
+
32
+ `route67` is a LLM router for OpenAI-compatible chat
33
+ completions format. It uses a user-defined routing table for user defined question-model routing via semantic similarity, as a fallback a weak model answer or explicitly escalate to a strong model.
34
+
35
+ ## How it works
36
+
37
+ ```mermaid
38
+ flowchart LR
39
+ Q["User request"] --> R{"Semantic route match?"}
40
+ R -- Yes --> M["Configured weak or strong model"]
41
+ R -- No --> W["Weak model gate<br/>usage notes + strong-route examples"]
42
+ W -- Answers --> O["Response"]
43
+ W -- ESCALATE --> S["Strong model"]
44
+ M --> O
45
+ S --> O
46
+ ```
47
+
48
+ ## Install
49
+
50
+ route67 requires Python 3.10 or newer. Choose either the standard Python workflow
51
+ or the `uv` workflow.
52
+
53
+ ### Using `python -m venv`
54
+
55
+ Create and activate a virtual environment:
56
+
57
+ ```console
58
+ python -m venv .venv
59
+ ```
60
+
61
+ ```powershell
62
+ # Windows PowerShell
63
+ .\.venv\Scripts\Activate.ps1
64
+ ```
65
+
66
+ ```console
67
+ # macOS/Linux
68
+ source .venv/bin/activate
69
+ ```
70
+
71
+ Then install route67 and its dependencies:
72
+
73
+ ```console
74
+ python -m pip install --upgrade pip
75
+ python -m pip install -e .
76
+ ```
77
+
78
+ To also install the test dependencies, use `python -m pip install -e ".[test]"`.
79
+
80
+ ### Using `uv`
81
+
82
+ With [`uv`](https://docs.astral.sh/uv/) installed, create the environment and
83
+ install the project from the lockfile:
84
+
85
+ ```console
86
+ uv sync
87
+ ```
88
+
89
+ Run commands inside the environment with `uv run`, for example
90
+ `uv run python example.py`. To include test dependencies, use
91
+ `uv sync --extra test`.
92
+
93
+ ## Get started
94
+
95
+ Set an OpenAI API key in your environment:
96
+
97
+ ```powershell
98
+ # Windows PowerShell
99
+ $env:OPENAI_API_KEY = "your-api-key"
100
+ ```
101
+
102
+ ```console
103
+ # macOS/Linux
104
+ export OPENAI_API_KEY="your-api-key"
105
+ ```
106
+
107
+ Create `example.py`:
108
+
109
+ ```python
110
+ from llm_router import Controller, ModelSpec, RouterConfig, RoutingTableEntry
111
+
112
+ config = RouterConfig(
113
+ routing_table=[
114
+ RoutingTableEntry(
115
+ "Prove this theorem",
116
+ "strong_model",
117
+ notes="Requires a rigorous multi-step proof.",
118
+ ),
119
+ RoutingTableEntry("Rewrite this paragraph", "weak_model"),
120
+ ],
121
+ weak_model=ModelSpec(
122
+ "gpt-5-mini",
123
+ usage_notes="Avoid difficult multi-step proofs.",
124
+ ),
125
+ strong_model=ModelSpec(
126
+ "gpt-5",
127
+ usage_notes="Use for rigorous proofs and difficult reasoning.",
128
+ ),
129
+ embedding_cache_path=".cache/routes",
130
+ log_path=".cache/routing.jsonl",
131
+ )
132
+
133
+ client = Controller(config)
134
+ response = client.chat.completions.create(
135
+ messages=[{"role": "user", "content": "Prove that sqrt(2) is irrational."}]
136
+ )
137
+ print(response.choices[0].message.content)
138
+ ```
139
+
140
+ Run it with the activated standard virtual environment:
141
+
142
+ ```console
143
+ python example.py
144
+ ```
145
+
146
+ Or with `uv`:
147
+
148
+ ```console
149
+ uv run python example.py
150
+ ```
151
+
152
+ ### OpenAI-compatible providers
153
+
154
+ route67 can use any provider exposed through an OpenAI-compatible client. Create
155
+ the provider's client normally and inject it into the controller. Model names in
156
+ the routing configuration are passed to that provider unchanged.
157
+
158
+ For example, with OpenRouter:
159
+
160
+ ```python
161
+ import os
162
+
163
+ from openai import OpenAI
164
+ from llm_router import Controller, ModelSpec, RouterConfig, RoutingTableEntry
165
+
166
+ openrouter = OpenAI(
167
+ base_url="https://openrouter.ai/api/v1",
168
+ api_key=os.environ["OPENROUTER_API_KEY"],
169
+ )
170
+
171
+ config = RouterConfig(
172
+ routing_table=[
173
+ RoutingTableEntry(
174
+ "Answer questions about a country",
175
+ "weak_model",
176
+ ),
177
+ RoutingTableEntry(
178
+ "Solve a difficult reasoning or math problem",
179
+ "strong_model",
180
+ notes="Requires careful multi-step reasoning.",
181
+ ),
182
+ ],
183
+ weak_model=ModelSpec(
184
+ "openai/gpt-4.1-mini",
185
+ usage_notes="Best for straightforward factual and writing questions.",
186
+ ),
187
+ strong_model=ModelSpec(
188
+ "deepseek/deepseek-v4-flash",
189
+ usage_notes="Use for difficult reasoning, mathematics, and verification.",
190
+ ),
191
+ )
192
+
193
+ client = Controller(config, openai_client=openrouter)
194
+ response = client.chat.completions.create(
195
+ messages=[
196
+ {
197
+ "role": "user",
198
+ "content": "How many r's are in the word 'strawberry'?",
199
+ }
200
+ ],
201
+ extra_body={"reasoning": {"enabled": True}},
202
+ )
203
+ ```
204
+
205
+ Provider-specific request options such as `extra_body` and `extra_headers` are
206
+ forwarded unchanged. Provider-specific response fields, including
207
+ `reasoning_details`, are also preserved. To continue a provider's reasoning,
208
+ pass its assistant message fields back unmodified in the next request.
209
+
210
+ Routing table entries target only `"weak_model"` or `"strong_model"`. Provider
211
+ model names live in `ModelSpec`, so switching models or providers does not
212
+ require rewriting the routing table.
213
+
214
+ `ModelSpec.usage_notes` are added to the weak model's escalation system prompt.
215
+ The prompt also includes up to five routing-table entries targeting
216
+ `"strong_model"` as examples of requests that should be escalated. Add concise
217
+ `notes` to those entries when the reason for escalation is useful context.
218
+
219
+ Your first request will download the `minishlab/potion-base-8M` from HuggingFace. The model is lazy-loaded,
220
+ so constructing a controller with an empty routing table does not download it.
@@ -0,0 +1,12 @@
1
+ llm_router/__init__.py,sha256=RwHWX6F0Is8P0MaQTNj7wLZDurk4vHtpZ7L3W6kC6dU,242
2
+ llm_router/config.py,sha256=FxjEeO62qVwVcPtxsvfsm82ezo91A2-A9zHIq76oLd4,2086
3
+ llm_router/controller.py,sha256=5rSAP2pTzFWApZp0_oB7y7RWGTpKkuiYCgC7gkWjoCs,3390
4
+ llm_router/embedder.py,sha256=9g1V7toS7c8H7Jvmx5ho0kZsnzlPXuyYV5xo1Ao4qKo,1305
5
+ llm_router/escalation.py,sha256=S22Lt3bUei3QjOKd1470Hpn6CujAiu46UcOI6K_-m6c,8445
6
+ llm_router/logging_utils.py,sha256=G9d-aEFGYlaKDYNyiuklMwAu8MUyOXFvDOCAzNKekYk,984
7
+ llm_router/prompts.py,sha256=tC01-zazA0wcclb8eAS0I7JPMCm0WZs55jIHyKl8QG4,1643
8
+ llm_router/routing_table.py,sha256=x5X0sGMn-oOaPncTXswqCGIJFd0aGiNRvTHXVnjkxeA,3667
9
+ route67-0.1.0.dist-info/METADATA,sha256=27TSDzfH3YjRE8e7lQxZ1uCLZWikph3Op52K2vxM_10,6343
10
+ route67-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
11
+ route67-0.1.0.dist-info/licenses/LICENSE,sha256=QaM-505zGS0RtXBxfg_YtfN-J3YndVxG7ruuzAGs2v4,1077
12
+ route67-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 route67 contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.