shiftgate 0.1.9__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {shiftgate-0.1.9 → shiftgate-0.2.0}/PKG-INFO +5 -1
- {shiftgate-0.1.9 → shiftgate-0.2.0}/README.md +4 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/pyproject.toml +1 -1
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/cli.py +35 -5
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/matcher.py +64 -28
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/router.py +8 -1
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/serve/app.py +21 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/utils/display.py +28 -2
- {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_router.py +88 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_serve.py +4 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/.gitignore +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/data/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/data/default_tasks.json +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/feedback/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/feedback/loop.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/registry/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/registry/adapter_registry.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/registry/schemas.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/registry/task_registry.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/embedder.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/runtime/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/runtime/backend.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/serve/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/utils/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/__init__.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_backend.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_feedback.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_packaging.py +0 -0
- {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_registry.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: shiftgate
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
|
|
5
5
|
Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
|
|
6
6
|
Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
|
|
@@ -276,6 +276,10 @@ User query
|
|
|
276
276
|
└────────────────────────────────┘
|
|
277
277
|
```
|
|
278
278
|
|
|
279
|
+
### How routing works
|
|
280
|
+
|
|
281
|
+
When a backend is active, shiftgate filters candidate adapters to only those actually loaded on that backend. Switch from vLLM to Cerebras and shiftgate automatically picks Cerebras-compatible adapters — no re-registration needed. (When you run `shiftgate route` with no backend running, no filtering is applied, so you still see the full routing preview.)
|
|
282
|
+
|
|
279
283
|
---
|
|
280
284
|
|
|
281
285
|
## Bring Your Own Models
|
|
@@ -240,6 +240,10 @@ User query
|
|
|
240
240
|
└────────────────────────────────┘
|
|
241
241
|
```
|
|
242
242
|
|
|
243
|
+
### How routing works
|
|
244
|
+
|
|
245
|
+
When a backend is active, shiftgate filters candidate adapters to only those actually loaded on that backend. Switch from vLLM to Cerebras and shiftgate automatically picks Cerebras-compatible adapters — no re-registration needed. (When you run `shiftgate route` with no backend running, no filtering is applied, so you still see the full routing preview.)
|
|
246
|
+
|
|
243
247
|
---
|
|
244
248
|
|
|
245
249
|
## Bring Your Own Models
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "shiftgate"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -83,6 +83,19 @@ def _get_embedder():
|
|
|
83
83
|
return Embedder()
|
|
84
84
|
|
|
85
85
|
|
|
86
|
+
def _active_runtimes(backend_router) -> set[str] | None:
|
|
87
|
+
"""Return the set of runtime names loaded on the active backend, or None.
|
|
88
|
+
|
|
89
|
+
``None`` means no backend is active → the router should not filter
|
|
90
|
+
(preview behaviour). An empty set means a backend is active but reports no
|
|
91
|
+
loaded models.
|
|
92
|
+
"""
|
|
93
|
+
active = backend_router.active_backend
|
|
94
|
+
if active is None:
|
|
95
|
+
return None
|
|
96
|
+
return set(active.list_loaded_adapters())
|
|
97
|
+
|
|
98
|
+
|
|
86
99
|
def _auto_link_adapter(adapter: AdapterEntry, task_reg) -> list[str]:
|
|
87
100
|
"""Add ``adapter.id`` to the ``preferred_adapters`` of matching task clusters.
|
|
88
101
|
|
|
@@ -464,6 +477,7 @@ def route(
|
|
|
464
477
|
"""
|
|
465
478
|
from shiftgate.feedback import loop as feedback_loop
|
|
466
479
|
from shiftgate.router import router as routing
|
|
480
|
+
from shiftgate.runtime.backend import BackendRouter
|
|
467
481
|
from shiftgate.utils.display import show_explain_decision, show_routing_decision
|
|
468
482
|
|
|
469
483
|
task_reg, adapter_reg = _load_registries()
|
|
@@ -474,8 +488,15 @@ def route(
|
|
|
474
488
|
|
|
475
489
|
embedder = _get_embedder()
|
|
476
490
|
|
|
491
|
+
backend_router = BackendRouter()
|
|
492
|
+
backend_name = backend_router.detect()
|
|
493
|
+
available_runtimes = _active_runtimes(backend_router)
|
|
494
|
+
|
|
477
495
|
try:
|
|
478
|
-
trace, match_result = routing.route(
|
|
496
|
+
trace, match_result = routing.route(
|
|
497
|
+
query, task_reg, adapter_reg, embedder,
|
|
498
|
+
top_k=top_k, available_runtimes=available_runtimes,
|
|
499
|
+
)
|
|
479
500
|
except Exception as exc:
|
|
480
501
|
console.print(f"[red]Routing error:[/red] {exc}")
|
|
481
502
|
raise typer.Exit(1)
|
|
@@ -487,7 +508,9 @@ def route(
|
|
|
487
508
|
trace,
|
|
488
509
|
adapter=adapter,
|
|
489
510
|
task_name=task.name if task else None,
|
|
490
|
-
backend_name=
|
|
511
|
+
backend_name=backend_name,
|
|
512
|
+
loaded_runtimes=available_runtimes,
|
|
513
|
+
selection_method=match_result.selection_method,
|
|
491
514
|
)
|
|
492
515
|
|
|
493
516
|
if explain:
|
|
@@ -524,22 +547,29 @@ def run(
|
|
|
524
547
|
|
|
525
548
|
embedder = _get_embedder()
|
|
526
549
|
|
|
550
|
+
backend_router = BackendRouter()
|
|
551
|
+
backend_name = backend_router.detect()
|
|
552
|
+
available_runtimes = _active_runtimes(backend_router)
|
|
553
|
+
|
|
527
554
|
try:
|
|
528
|
-
trace, match_result = routing.route(
|
|
555
|
+
trace, match_result = routing.route(
|
|
556
|
+
query, task_reg, adapter_reg, embedder,
|
|
557
|
+
top_k=top_k, available_runtimes=available_runtimes,
|
|
558
|
+
)
|
|
529
559
|
except Exception as exc:
|
|
530
560
|
console.print(f"[red]Routing error:[/red] {exc}")
|
|
531
561
|
raise typer.Exit(1)
|
|
532
562
|
|
|
533
563
|
adapter = adapter_reg.get_adapter(trace.selected_adapter_id)
|
|
534
564
|
task = task_reg.get_task(trace.matched_task_id)
|
|
535
|
-
backend_router = BackendRouter()
|
|
536
|
-
backend_name = backend_router.detect()
|
|
537
565
|
|
|
538
566
|
show_routing_decision(
|
|
539
567
|
trace,
|
|
540
568
|
adapter=adapter,
|
|
541
569
|
task_name=task.name if task else None,
|
|
542
570
|
backend_name=backend_name,
|
|
571
|
+
loaded_runtimes=available_runtimes,
|
|
572
|
+
selection_method=match_result.selection_method,
|
|
543
573
|
)
|
|
544
574
|
|
|
545
575
|
if adapter is None:
|
|
@@ -117,6 +117,7 @@ def top_k_tasks(
|
|
|
117
117
|
def select_adapter(
|
|
118
118
|
top_tasks: list[TaskMatch],
|
|
119
119
|
adapter_registry, # AdapterRegistry — avoid circular import with string hint
|
|
120
|
+
available_runtimes: set[str] | None = None,
|
|
120
121
|
) -> MatchResult:
|
|
121
122
|
"""Select the adapter linked to the best-matching task.
|
|
122
123
|
|
|
@@ -125,15 +126,23 @@ def select_adapter(
|
|
|
125
126
|
For each top task (highest score first), walk ``preferred_adapters`` then
|
|
126
127
|
``fallback_adapters`` and collect the adapters that exist in the registry
|
|
127
128
|
(populating ``TaskMatch.candidate_adapters`` for the ``--explain`` view).
|
|
128
|
-
The first
|
|
129
|
+
The first viable adapter found, on the highest-scoring task, is selected.
|
|
130
|
+
|
|
131
|
+
Backend-aware filtering
|
|
132
|
+
-----------------------
|
|
133
|
+
When ``available_runtimes`` is provided (the set of model/adapter names
|
|
134
|
+
actually loaded on the active backend), only adapters whose
|
|
135
|
+
``effective_backend_name()`` is in that set are considered viable. If a
|
|
136
|
+
task's entire candidate list is filtered out, selection falls through to
|
|
137
|
+
the next-best task. When ``available_runtimes`` is ``None`` no filtering
|
|
138
|
+
happens (the preview behaviour used by ``shiftgate route``).
|
|
129
139
|
|
|
130
140
|
No silent fallback
|
|
131
141
|
------------------
|
|
132
142
|
If the matched (top) task has **no** linked adapter in the registry, the
|
|
133
143
|
router must NOT substitute an arbitrary adapter — doing so silently routes,
|
|
134
144
|
e.g., a music query to a SQL adapter and destroys trust. Instead this
|
|
135
|
-
returns a ``MatchResult`` with ``selected_adapter=None
|
|
136
|
-
``selection_method="no_adapter_for_task"``.
|
|
145
|
+
returns a ``MatchResult`` with ``selected_adapter=None``.
|
|
137
146
|
|
|
138
147
|
Parameters
|
|
139
148
|
----------
|
|
@@ -141,39 +150,56 @@ def select_adapter(
|
|
|
141
150
|
Output of ``top_k_tasks`` (sorted by score descending).
|
|
142
151
|
adapter_registry:
|
|
143
152
|
``AdapterRegistry`` instance to look up adapter IDs.
|
|
153
|
+
available_runtimes:
|
|
154
|
+
Optional set of runtime names loaded on the active backend. When set,
|
|
155
|
+
adapters not in the set are skipped during selection.
|
|
144
156
|
|
|
145
157
|
Returns
|
|
146
158
|
-------
|
|
147
|
-
``MatchResult``. ``selected_adapter`` is ``None`` when no adapter is
|
|
148
|
-
|
|
149
|
-
|
|
159
|
+
``MatchResult``. ``selected_adapter`` is ``None`` when no viable adapter is
|
|
160
|
+
found. ``selection_method`` is ``"no_adapter_on_active_backend"`` when
|
|
161
|
+
linked adapters exist but none are loaded on the active backend, otherwise
|
|
162
|
+
``"no_adapter_for_task"``. The ``matched_task`` is always the top-scoring
|
|
163
|
+
task so callers can still report what was matched.
|
|
150
164
|
"""
|
|
151
|
-
|
|
152
|
-
|
|
165
|
+
def _is_viable(adapter) -> bool:
|
|
166
|
+
if available_runtimes is None:
|
|
167
|
+
return True
|
|
168
|
+
return adapter.effective_backend_name() in available_runtimes
|
|
169
|
+
|
|
153
170
|
explicit_result: MatchResult | None = None
|
|
171
|
+
any_linked_adapter = False # any task had at least one registered adapter
|
|
154
172
|
|
|
155
173
|
for tm in top_tasks:
|
|
156
174
|
preferred_ids = list(tm.task.preferred_adapters)
|
|
157
175
|
fallback_ids = list(tm.task.fallback_adapters)
|
|
158
176
|
|
|
177
|
+
# Populate candidate_adapters with every registered adapter (for the
|
|
178
|
+
# --explain view, showing all candidates regardless of runtime).
|
|
159
179
|
for adapter_id in preferred_ids + fallback_ids:
|
|
160
180
|
adapter = adapter_registry.get_adapter(adapter_id)
|
|
161
181
|
if adapter is not None and adapter not in tm.candidate_adapters:
|
|
162
182
|
tm.candidate_adapters.append(adapter)
|
|
163
183
|
|
|
164
|
-
if
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
184
|
+
if tm.candidate_adapters:
|
|
185
|
+
any_linked_adapter = True
|
|
186
|
+
|
|
187
|
+
if explicit_result is None:
|
|
188
|
+
viable = [a for a in tm.candidate_adapters if _is_viable(a)]
|
|
189
|
+
if viable:
|
|
190
|
+
chosen = viable[0]
|
|
191
|
+
method = (
|
|
192
|
+
"preferred"
|
|
193
|
+
if chosen.id in tm.task.preferred_adapters
|
|
194
|
+
else "fallback"
|
|
195
|
+
)
|
|
196
|
+
explicit_result = MatchResult(
|
|
197
|
+
selected_adapter=chosen,
|
|
198
|
+
matched_task=tm.task,
|
|
199
|
+
similarity_score=tm.score,
|
|
200
|
+
all_task_matches=top_tasks,
|
|
201
|
+
selection_method=method,
|
|
202
|
+
)
|
|
177
203
|
|
|
178
204
|
if explicit_result is not None:
|
|
179
205
|
logger.debug(
|
|
@@ -185,19 +211,29 @@ def select_adapter(
|
|
|
185
211
|
)
|
|
186
212
|
return explicit_result
|
|
187
213
|
|
|
188
|
-
# No adapter
|
|
189
|
-
#
|
|
214
|
+
# No viable adapter across any ranked task. Distinguish "nothing linked at
|
|
215
|
+
# all" from "linked but not loaded on the active backend".
|
|
190
216
|
top_task = top_tasks[0]
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
217
|
+
if available_runtimes is not None and any_linked_adapter:
|
|
218
|
+
method = "no_adapter_on_active_backend"
|
|
219
|
+
logger.info(
|
|
220
|
+
"Linked adapter(s) for task '%s' exist but none are loaded on the "
|
|
221
|
+
"active backend — refusing to guess.",
|
|
222
|
+
top_task.task.id,
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
method = "no_adapter_for_task"
|
|
226
|
+
logger.info(
|
|
227
|
+
"No linked adapter for matched task '%s' — refusing to guess.",
|
|
228
|
+
top_task.task.id,
|
|
229
|
+
)
|
|
230
|
+
|
|
195
231
|
return MatchResult(
|
|
196
232
|
selected_adapter=None,
|
|
197
233
|
matched_task=top_task.task,
|
|
198
234
|
similarity_score=top_task.score,
|
|
199
235
|
all_task_matches=top_tasks,
|
|
200
|
-
selection_method=
|
|
236
|
+
selection_method=method,
|
|
201
237
|
)
|
|
202
238
|
|
|
203
239
|
|
|
@@ -26,6 +26,7 @@ def route(
|
|
|
26
26
|
adapter_registry: AdapterRegistry,
|
|
27
27
|
embedder: Embedder,
|
|
28
28
|
top_k: int = 3,
|
|
29
|
+
available_runtimes: set[str] | None = None,
|
|
29
30
|
) -> tuple[RoutingTrace, MatchResult]:
|
|
30
31
|
"""Route a query string to the best matching adapter.
|
|
31
32
|
|
|
@@ -48,6 +49,12 @@ def route(
|
|
|
48
49
|
``Embedder`` instance (wraps fastembed singleton).
|
|
49
50
|
top_k:
|
|
50
51
|
Number of top task candidates to consider. Defaults to 3.
|
|
52
|
+
available_runtimes:
|
|
53
|
+
Optional set of runtime names loaded on the active backend. When set,
|
|
54
|
+
adapters whose ``effective_backend_name()`` is not in the set are
|
|
55
|
+
skipped, falling through to the next-best task. If no viable adapter
|
|
56
|
+
is found across all top-K tasks, the trace's ``selected_adapter_id`` is
|
|
57
|
+
``None`` and ``selection_method`` is ``"no_adapter_on_active_backend"``.
|
|
51
58
|
|
|
52
59
|
Returns
|
|
53
60
|
-------
|
|
@@ -73,7 +80,7 @@ def route(
|
|
|
73
80
|
query_embedding = embedder.embed(query)
|
|
74
81
|
all_tasks = task_registry.get_all_tasks()
|
|
75
82
|
ranked = top_k_tasks(query_embedding, all_tasks, k=top_k)
|
|
76
|
-
result = select_adapter(ranked, adapter_registry)
|
|
83
|
+
result = select_adapter(ranked, adapter_registry, available_runtimes=available_runtimes)
|
|
77
84
|
|
|
78
85
|
selected_id = result.selected_adapter.id if result.selected_adapter else None
|
|
79
86
|
|
|
@@ -22,6 +22,7 @@ The actual upstream HTTP is delegated to a *forwarder* object stored on
|
|
|
22
22
|
from __future__ import annotations
|
|
23
23
|
|
|
24
24
|
import logging
|
|
25
|
+
import time
|
|
25
26
|
from typing import Any, AsyncIterator, Optional
|
|
26
27
|
|
|
27
28
|
import httpx
|
|
@@ -39,6 +40,7 @@ logger = logging.getLogger(__name__)
|
|
|
39
40
|
|
|
40
41
|
_ROUTE_HEADER = "X-Shiftgate-Route"
|
|
41
42
|
_READ_TIMEOUT = 120.0
|
|
43
|
+
_RUNTIMES_TTL = 60.0 # seconds to cache the active backend's loaded-runtime list
|
|
42
44
|
|
|
43
45
|
|
|
44
46
|
# ---------------------------------------------------------------------------
|
|
@@ -127,6 +129,8 @@ def create_app(
|
|
|
127
129
|
app.state.backend_router = backend_router
|
|
128
130
|
app.state.backend_choice = backend
|
|
129
131
|
app.state.forwarder = forwarder or HttpxForwarder()
|
|
132
|
+
# (timestamp, runtimes set) cache so we don't ping the backend on every request.
|
|
133
|
+
app.state.runtimes_cache = None
|
|
130
134
|
|
|
131
135
|
def _embedder():
|
|
132
136
|
if app.state.embedder is None:
|
|
@@ -142,6 +146,22 @@ def create_app(
|
|
|
142
146
|
router.select(app.state.backend_choice)
|
|
143
147
|
return router.active_backend
|
|
144
148
|
|
|
149
|
+
def _available_runtimes() -> set[str] | None:
|
|
150
|
+
"""Loaded runtimes on the active backend, cached with a 60s TTL.
|
|
151
|
+
|
|
152
|
+
Returns ``None`` when no backend is active (no filtering).
|
|
153
|
+
"""
|
|
154
|
+
active = _active_backend()
|
|
155
|
+
if active is None:
|
|
156
|
+
return None
|
|
157
|
+
cache = app.state.runtimes_cache
|
|
158
|
+
now = time.monotonic()
|
|
159
|
+
if cache is not None and (now - cache[0]) < _RUNTIMES_TTL:
|
|
160
|
+
return cache[1]
|
|
161
|
+
runtimes = set(active.list_loaded_adapters())
|
|
162
|
+
app.state.runtimes_cache = (now, runtimes)
|
|
163
|
+
return runtimes
|
|
164
|
+
|
|
145
165
|
# -- health -------------------------------------------------------------
|
|
146
166
|
@app.get("/health")
|
|
147
167
|
def health() -> dict[str, Any]:
|
|
@@ -198,6 +218,7 @@ def create_app(
|
|
|
198
218
|
app.state.task_reg,
|
|
199
219
|
app.state.adapter_reg,
|
|
200
220
|
_embedder(),
|
|
221
|
+
available_runtimes=_available_runtimes(),
|
|
201
222
|
)
|
|
202
223
|
except ValueError as exc:
|
|
203
224
|
# Embeddings not initialised, etc.
|
|
@@ -69,6 +69,8 @@ def show_routing_decision(
|
|
|
69
69
|
adapter: AdapterEntry | None = None,
|
|
70
70
|
task_name: str | None = None,
|
|
71
71
|
backend_name: str | None = None,
|
|
72
|
+
loaded_runtimes: set[str] | None = None,
|
|
73
|
+
selection_method: str | None = None,
|
|
72
74
|
) -> None:
|
|
73
75
|
"""Print a Rich Panel describing a routing decision.
|
|
74
76
|
|
|
@@ -81,7 +83,13 @@ def show_routing_decision(
|
|
|
81
83
|
task_name:
|
|
82
84
|
Human-readable task cluster name (falls back to trace.matched_task_id).
|
|
83
85
|
backend_name:
|
|
84
|
-
Active backend name ('ollama', 'vllm', or None).
|
|
86
|
+
Active backend name ('ollama', 'vllm', 'cerebras', or None).
|
|
87
|
+
loaded_runtimes:
|
|
88
|
+
Optional set of runtime names loaded on the active backend (used to
|
|
89
|
+
explain a ``no_adapter_on_active_backend`` outcome).
|
|
90
|
+
selection_method:
|
|
91
|
+
The ``MatchResult.selection_method`` (e.g. ``"no_adapter_for_task"`` or
|
|
92
|
+
``"no_adapter_on_active_backend"``) used to tailor the no-adapter help.
|
|
85
93
|
"""
|
|
86
94
|
# When no adapter was selected the decision is unactionable — render red
|
|
87
95
|
# regardless of how confident the task match was.
|
|
@@ -101,7 +109,25 @@ def show_routing_decision(
|
|
|
101
109
|
task_text.append_text(_similarity_bar(trace.similarity_score))
|
|
102
110
|
grid.add_row("Matched Task", task_text)
|
|
103
111
|
|
|
104
|
-
if no_adapter:
|
|
112
|
+
if no_adapter and selection_method == "no_adapter_on_active_backend":
|
|
113
|
+
# Adapters are linked to this task but none are loaded on the active
|
|
114
|
+
# backend — a different, backend-specific message.
|
|
115
|
+
grid.add_row(
|
|
116
|
+
"Adapter",
|
|
117
|
+
Text(
|
|
118
|
+
f"No adapter loaded on backend '{backend_name or 'unknown'}'",
|
|
119
|
+
style="bold red",
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
runtimes = sorted(loaded_runtimes) if loaded_runtimes else []
|
|
123
|
+
runtimes_label = ", ".join(runtimes) if runtimes else "(none)"
|
|
124
|
+
grid.add_row("Loaded runtimes", Text(runtimes_label, style="dim"))
|
|
125
|
+
suggestion = Text()
|
|
126
|
+
suggestion.append("Try ", style="dim")
|
|
127
|
+
suggestion.append("shiftgate adapter list", style="cyan")
|
|
128
|
+
suggestion.append(" to see what's registered.", style="dim")
|
|
129
|
+
grid.add_row("Suggestion", suggestion)
|
|
130
|
+
elif no_adapter:
|
|
105
131
|
# Never silently substitute an adapter. Tell the user how to fix it.
|
|
106
132
|
adapter_text = Text("No adapter available", style="bold red")
|
|
107
133
|
grid.add_row("Adapter", adapter_text)
|
|
@@ -307,3 +307,91 @@ class TestRouteFunction:
|
|
|
307
307
|
def test_hf_adapter_effective_name_falls_back_to_id(self):
|
|
308
308
|
adapter = adapter_from_hf("org/sql-lora", adapter_id="sql-lora")
|
|
309
309
|
assert adapter.effective_backend_name() == "sql-lora"
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# ---------------------------------------------------------------------------
|
|
313
|
+
# Backend-aware adapter filtering
|
|
314
|
+
# ---------------------------------------------------------------------------
|
|
315
|
+
|
|
316
|
+
class TestBackendAwareFiltering:
|
|
317
|
+
def test_filter_excludes_unloaded_runtime(self, synthetic_tasks, adapter_reg):
|
|
318
|
+
"""An adapter whose runtime is not loaded is skipped during selection."""
|
|
319
|
+
query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32) # → task_x → adapter-x
|
|
320
|
+
ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
|
|
321
|
+
# adapter-x is NOT in the loaded set; task_x has only adapter-x → skip it.
|
|
322
|
+
# No other top task has a loaded adapter either → None.
|
|
323
|
+
result = select_adapter(ranked, adapter_reg, available_runtimes={"something-else"})
|
|
324
|
+
assert result.selected_adapter is None
|
|
325
|
+
assert result.selection_method == "no_adapter_on_active_backend"
|
|
326
|
+
|
|
327
|
+
def test_fall_through_to_next_best_task(self, synthetic_tasks, adapter_reg):
|
|
328
|
+
"""When top-1 task's adapter isn't loaded, selection falls through."""
|
|
329
|
+
query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32) # task_x is top
|
|
330
|
+
ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
|
|
331
|
+
# Only adapter-y (task_y) is loaded → fall through from task_x to task_y.
|
|
332
|
+
result = select_adapter(ranked, adapter_reg, available_runtimes={"adapter-y"})
|
|
333
|
+
assert result.selected_adapter is not None
|
|
334
|
+
assert result.selected_adapter.id == "adapter-y"
|
|
335
|
+
assert result.matched_task.id == "task_y"
|
|
336
|
+
|
|
337
|
+
def test_none_preserves_unfiltered_behavior(self, synthetic_tasks, adapter_reg):
|
|
338
|
+
"""available_runtimes=None means no filtering (preview behaviour)."""
|
|
339
|
+
query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
|
|
340
|
+
ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
|
|
341
|
+
result = select_adapter(ranked, adapter_reg, available_runtimes=None)
|
|
342
|
+
assert result.selected_adapter.id == "adapter-x"
|
|
343
|
+
assert result.selection_method == "preferred"
|
|
344
|
+
|
|
345
|
+
def test_filter_respects_runtime_name(self, tmp_path):
|
|
346
|
+
"""Filtering matches on effective_backend_name (runtime_name when set)."""
|
|
347
|
+
task = _make_task("task_x", [1, 0, 0], ["sql-lora"])
|
|
348
|
+
adapter = adapter_from_runtime("sql-lora-vllm", adapter_id="sql-lora")
|
|
349
|
+
reg = AdapterRegistry(adapters=[adapter], source_path=tmp_path / "a.json")
|
|
350
|
+
query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
|
|
351
|
+
ranked = top_k_tasks(query_emb, [task], k=1)
|
|
352
|
+
|
|
353
|
+
# The registry id "sql-lora" is loaded, but the runtime name is not →
|
|
354
|
+
# filtering uses the runtime name, so this is NOT viable.
|
|
355
|
+
miss = select_adapter(ranked, reg, available_runtimes={"sql-lora"})
|
|
356
|
+
assert miss.selected_adapter is None
|
|
357
|
+
assert miss.selection_method == "no_adapter_on_active_backend"
|
|
358
|
+
|
|
359
|
+
# The runtime name IS loaded → viable.
|
|
360
|
+
hit = select_adapter(ranked, reg, available_runtimes={"sql-lora-vllm"})
|
|
361
|
+
assert hit.selected_adapter is not None
|
|
362
|
+
assert hit.selected_adapter.id == "sql-lora"
|
|
363
|
+
|
|
364
|
+
def test_no_linked_adapter_still_reports_no_adapter_for_task(self, tmp_path):
|
|
365
|
+
"""Filtering active but task has no linked adapter at all → for_task reason."""
|
|
366
|
+
task = _make_task("task_x", [1, 0, 0], adapter_ids=[])
|
|
367
|
+
reg = AdapterRegistry(adapters=[], source_path=tmp_path / "a.json")
|
|
368
|
+
query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
|
|
369
|
+
ranked = top_k_tasks(query_emb, [task], k=1)
|
|
370
|
+
result = select_adapter(ranked, reg, available_runtimes={"anything"})
|
|
371
|
+
assert result.selected_adapter is None
|
|
372
|
+
assert result.selection_method == "no_adapter_for_task"
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class TestRouteFiltering:
|
|
376
|
+
def test_route_filters_by_available_runtimes(self, task_reg, adapter_reg):
|
|
377
|
+
"""route() passes the filter through and sets the right method on a miss."""
|
|
378
|
+
from shiftgate.router.router import route
|
|
379
|
+
|
|
380
|
+
# python query → task_x → adapter-x. None of the registry's adapters
|
|
381
|
+
# are loaded on the active backend → no viable adapter anywhere.
|
|
382
|
+
trace, result = route(
|
|
383
|
+
"write python code", task_reg, adapter_reg, MockEmbedder(),
|
|
384
|
+
available_runtimes={"unrelated-runtime"},
|
|
385
|
+
)
|
|
386
|
+
assert result.selected_adapter is None
|
|
387
|
+
assert trace.selected_adapter_id is None
|
|
388
|
+
assert result.selection_method == "no_adapter_on_active_backend"
|
|
389
|
+
|
|
390
|
+
def test_route_none_runtimes_unfiltered(self, task_reg, adapter_reg):
|
|
391
|
+
from shiftgate.router.router import route
|
|
392
|
+
|
|
393
|
+
trace, result = route(
|
|
394
|
+
"write python code", task_reg, adapter_reg, MockEmbedder(),
|
|
395
|
+
available_runtimes=None,
|
|
396
|
+
)
|
|
397
|
+
assert trace.selected_adapter_id == "adapter-x"
|
|
@@ -96,6 +96,9 @@ def client(tmp_path, forwarder):
|
|
|
96
96
|
|
|
97
97
|
router = BackendRouter()
|
|
98
98
|
router.select("vllm") # force-select; no availability ping
|
|
99
|
+
# Report the adapters as "loaded" so backend-aware filtering keeps them.
|
|
100
|
+
# adapter-x's runtime name is "adapter-x-vllm"; adapter-y has no runtime name.
|
|
101
|
+
router._vllm.list_loaded_adapters = lambda: ["adapter-x-vllm", "adapter-y"]
|
|
99
102
|
|
|
100
103
|
app = create_app(
|
|
101
104
|
backend="vllm",
|
|
@@ -179,6 +182,7 @@ def test_no_adapter_returns_400(tmp_path, forwarder):
|
|
|
179
182
|
adapter_reg = AdapterRegistry(adapters=[], source_path=tmp_path / "adapters.json")
|
|
180
183
|
router = BackendRouter()
|
|
181
184
|
router.select("vllm")
|
|
185
|
+
router._vllm.list_loaded_adapters = lambda: []
|
|
182
186
|
app = create_app(
|
|
183
187
|
backend="vllm",
|
|
184
188
|
task_reg=task_reg,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|