shiftgate 0.1.9__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {shiftgate-0.1.9 → shiftgate-0.2.0}/PKG-INFO +5 -1
  2. {shiftgate-0.1.9 → shiftgate-0.2.0}/README.md +4 -0
  3. {shiftgate-0.1.9 → shiftgate-0.2.0}/pyproject.toml +1 -1
  4. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/cli.py +35 -5
  5. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/matcher.py +64 -28
  6. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/router.py +8 -1
  7. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/serve/app.py +21 -0
  8. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/utils/display.py +28 -2
  9. {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_router.py +88 -0
  10. {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_serve.py +4 -0
  11. {shiftgate-0.1.9 → shiftgate-0.2.0}/.gitignore +0 -0
  12. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/__init__.py +0 -0
  13. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/data/__init__.py +0 -0
  14. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/data/default_tasks.json +0 -0
  15. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/feedback/__init__.py +0 -0
  16. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/feedback/loop.py +0 -0
  17. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/registry/__init__.py +0 -0
  18. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/registry/adapter_registry.py +0 -0
  19. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/registry/schemas.py +0 -0
  20. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/registry/task_registry.py +0 -0
  21. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/__init__.py +0 -0
  22. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/embedder.py +0 -0
  23. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/runtime/__init__.py +0 -0
  24. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/runtime/backend.py +0 -0
  25. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/serve/__init__.py +0 -0
  26. {shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/utils/__init__.py +0 -0
  27. {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/__init__.py +0 -0
  28. {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_backend.py +0 -0
  29. {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_feedback.py +0 -0
  30. {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_packaging.py +0 -0
  31. {shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_registry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: shiftgate
3
- Version: 0.1.9
3
+ Version: 0.2.0
4
4
  Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
5
5
  Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
6
6
  Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
@@ -276,6 +276,10 @@ User query
276
276
  └────────────────────────────────┘
277
277
  ```
278
278
 
279
+ ### How routing works
280
+
281
+ When a backend is active, shiftgate filters candidate adapters to only those actually loaded on that backend. Switch from vLLM to Cerebras and shiftgate automatically picks Cerebras-compatible adapters — no re-registration needed. (When you run `shiftgate route` with no backend running, no filtering is applied, so you still see the full routing preview.)
282
+
279
283
  ---
280
284
 
281
285
  ## Bring Your Own Models
@@ -240,6 +240,10 @@ User query
240
240
  └────────────────────────────────┘
241
241
  ```
242
242
 
243
+ ### How routing works
244
+
245
+ When a backend is active, shiftgate filters candidate adapters to only those actually loaded on that backend. Switch from vLLM to Cerebras and shiftgate automatically picks Cerebras-compatible adapters — no re-registration needed. (When you run `shiftgate route` with no backend running, no filtering is applied, so you still see the full routing preview.)
246
+
243
247
  ---
244
248
 
245
249
  ## Bring Your Own Models
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "shiftgate"
7
- version = "0.1.9"
7
+ version = "0.2.0"
8
8
  description = "Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -83,6 +83,19 @@ def _get_embedder():
83
83
  return Embedder()
84
84
 
85
85
 
86
+ def _active_runtimes(backend_router) -> set[str] | None:
87
+ """Return the set of runtime names loaded on the active backend, or None.
88
+
89
+ ``None`` means no backend is active → the router should not filter
90
+ (preview behaviour). An empty set means a backend is active but reports no
91
+ loaded models.
92
+ """
93
+ active = backend_router.active_backend
94
+ if active is None:
95
+ return None
96
+ return set(active.list_loaded_adapters())
97
+
98
+
86
99
  def _auto_link_adapter(adapter: AdapterEntry, task_reg) -> list[str]:
87
100
  """Add ``adapter.id`` to the ``preferred_adapters`` of matching task clusters.
88
101
 
@@ -464,6 +477,7 @@ def route(
464
477
  """
465
478
  from shiftgate.feedback import loop as feedback_loop
466
479
  from shiftgate.router import router as routing
480
+ from shiftgate.runtime.backend import BackendRouter
467
481
  from shiftgate.utils.display import show_explain_decision, show_routing_decision
468
482
 
469
483
  task_reg, adapter_reg = _load_registries()
@@ -474,8 +488,15 @@ def route(
474
488
 
475
489
  embedder = _get_embedder()
476
490
 
491
+ backend_router = BackendRouter()
492
+ backend_name = backend_router.detect()
493
+ available_runtimes = _active_runtimes(backend_router)
494
+
477
495
  try:
478
- trace, match_result = routing.route(query, task_reg, adapter_reg, embedder, top_k=top_k)
496
+ trace, match_result = routing.route(
497
+ query, task_reg, adapter_reg, embedder,
498
+ top_k=top_k, available_runtimes=available_runtimes,
499
+ )
479
500
  except Exception as exc:
480
501
  console.print(f"[red]Routing error:[/red] {exc}")
481
502
  raise typer.Exit(1)
@@ -487,7 +508,9 @@ def route(
487
508
  trace,
488
509
  adapter=adapter,
489
510
  task_name=task.name if task else None,
490
- backend_name=None,
511
+ backend_name=backend_name,
512
+ loaded_runtimes=available_runtimes,
513
+ selection_method=match_result.selection_method,
491
514
  )
492
515
 
493
516
  if explain:
@@ -524,22 +547,29 @@ def run(
524
547
 
525
548
  embedder = _get_embedder()
526
549
 
550
+ backend_router = BackendRouter()
551
+ backend_name = backend_router.detect()
552
+ available_runtimes = _active_runtimes(backend_router)
553
+
527
554
  try:
528
- trace, match_result = routing.route(query, task_reg, adapter_reg, embedder, top_k=top_k)
555
+ trace, match_result = routing.route(
556
+ query, task_reg, adapter_reg, embedder,
557
+ top_k=top_k, available_runtimes=available_runtimes,
558
+ )
529
559
  except Exception as exc:
530
560
  console.print(f"[red]Routing error:[/red] {exc}")
531
561
  raise typer.Exit(1)
532
562
 
533
563
  adapter = adapter_reg.get_adapter(trace.selected_adapter_id)
534
564
  task = task_reg.get_task(trace.matched_task_id)
535
- backend_router = BackendRouter()
536
- backend_name = backend_router.detect()
537
565
 
538
566
  show_routing_decision(
539
567
  trace,
540
568
  adapter=adapter,
541
569
  task_name=task.name if task else None,
542
570
  backend_name=backend_name,
571
+ loaded_runtimes=available_runtimes,
572
+ selection_method=match_result.selection_method,
543
573
  )
544
574
 
545
575
  if adapter is None:
@@ -117,6 +117,7 @@ def top_k_tasks(
117
117
  def select_adapter(
118
118
  top_tasks: list[TaskMatch],
119
119
  adapter_registry, # AdapterRegistry — avoid circular import with string hint
120
+ available_runtimes: set[str] | None = None,
120
121
  ) -> MatchResult:
121
122
  """Select the adapter linked to the best-matching task.
122
123
 
@@ -125,15 +126,23 @@ def select_adapter(
125
126
  For each top task (highest score first), walk ``preferred_adapters`` then
126
127
  ``fallback_adapters`` and collect the adapters that exist in the registry
127
128
  (populating ``TaskMatch.candidate_adapters`` for the ``--explain`` view).
128
- The first such adapter found, on the highest-scoring task, is selected.
129
+ The first viable adapter found, on the highest-scoring task, is selected.
130
+
131
+ Backend-aware filtering
132
+ -----------------------
133
+ When ``available_runtimes`` is provided (the set of model/adapter names
134
+ actually loaded on the active backend), only adapters whose
135
+ ``effective_backend_name()`` is in that set are considered viable. If a
136
+ task's entire candidate list is filtered out, selection falls through to
137
+ the next-best task. When ``available_runtimes`` is ``None`` no filtering
138
+ happens (the preview behaviour used by ``shiftgate route``).
129
139
 
130
140
  No silent fallback
131
141
  ------------------
132
142
  If the matched (top) task has **no** linked adapter in the registry, the
133
143
  router must NOT substitute an arbitrary adapter — doing so silently routes,
134
144
  e.g., a music query to a SQL adapter and destroys trust. Instead this
135
- returns a ``MatchResult`` with ``selected_adapter=None`` and
136
- ``selection_method="no_adapter_for_task"``.
145
+ returns a ``MatchResult`` with ``selected_adapter=None``.
137
146
 
138
147
  Parameters
139
148
  ----------
@@ -141,39 +150,56 @@ def select_adapter(
141
150
  Output of ``top_k_tasks`` (sorted by score descending).
142
151
  adapter_registry:
143
152
  ``AdapterRegistry`` instance to look up adapter IDs.
153
+ available_runtimes:
154
+ Optional set of runtime names loaded on the active backend. When set,
155
+ adapters not in the set are skipped during selection.
144
156
 
145
157
  Returns
146
158
  -------
147
- ``MatchResult``. ``selected_adapter`` is ``None`` when no adapter is
148
- linked to any of the ranked tasks. The ``matched_task`` is always the
149
- top-scoring task so callers can still report what was matched.
159
+ ``MatchResult``. ``selected_adapter`` is ``None`` when no viable adapter is
160
+ found. ``selection_method`` is ``"no_adapter_on_active_backend"`` when
161
+ linked adapters exist but none are loaded on the active backend, otherwise
162
+ ``"no_adapter_for_task"``. The ``matched_task`` is always the top-scoring
163
+ task so callers can still report what was matched.
150
164
  """
151
- # Populate candidate lists for every task (for the --explain view) and
152
- # find the first explicit match in score order.
165
+ def _is_viable(adapter) -> bool:
166
+ if available_runtimes is None:
167
+ return True
168
+ return adapter.effective_backend_name() in available_runtimes
169
+
153
170
  explicit_result: MatchResult | None = None
171
+ any_linked_adapter = False # any task had at least one registered adapter
154
172
 
155
173
  for tm in top_tasks:
156
174
  preferred_ids = list(tm.task.preferred_adapters)
157
175
  fallback_ids = list(tm.task.fallback_adapters)
158
176
 
177
+ # Populate candidate_adapters with every registered adapter (for the
178
+ # --explain view, showing all candidates regardless of runtime).
159
179
  for adapter_id in preferred_ids + fallback_ids:
160
180
  adapter = adapter_registry.get_adapter(adapter_id)
161
181
  if adapter is not None and adapter not in tm.candidate_adapters:
162
182
  tm.candidate_adapters.append(adapter)
163
183
 
164
- if explicit_result is None and tm.candidate_adapters:
165
- method = (
166
- "preferred"
167
- if tm.candidate_adapters[0].id in tm.task.preferred_adapters
168
- else "fallback"
169
- )
170
- explicit_result = MatchResult(
171
- selected_adapter=tm.candidate_adapters[0],
172
- matched_task=tm.task,
173
- similarity_score=tm.score,
174
- all_task_matches=top_tasks,
175
- selection_method=method,
176
- )
184
+ if tm.candidate_adapters:
185
+ any_linked_adapter = True
186
+
187
+ if explicit_result is None:
188
+ viable = [a for a in tm.candidate_adapters if _is_viable(a)]
189
+ if viable:
190
+ chosen = viable[0]
191
+ method = (
192
+ "preferred"
193
+ if chosen.id in tm.task.preferred_adapters
194
+ else "fallback"
195
+ )
196
+ explicit_result = MatchResult(
197
+ selected_adapter=chosen,
198
+ matched_task=tm.task,
199
+ similarity_score=tm.score,
200
+ all_task_matches=top_tasks,
201
+ selection_method=method,
202
+ )
177
203
 
178
204
  if explicit_result is not None:
179
205
  logger.debug(
@@ -185,19 +211,29 @@ def select_adapter(
185
211
  )
186
212
  return explicit_result
187
213
 
188
- # No adapter linked to any ranked task do NOT guess. Report the matched
189
- # task with no adapter so the caller can prompt the user to add one.
214
+ # No viable adapter across any ranked task. Distinguish "nothing linked at
215
+ # all" from "linked but not loaded on the active backend".
190
216
  top_task = top_tasks[0]
191
- logger.info(
192
- "No linked adapter for matched task '%s' — refusing to guess.",
193
- top_task.task.id,
194
- )
217
+ if available_runtimes is not None and any_linked_adapter:
218
+ method = "no_adapter_on_active_backend"
219
+ logger.info(
220
+ "Linked adapter(s) for task '%s' exist but none are loaded on the "
221
+ "active backend — refusing to guess.",
222
+ top_task.task.id,
223
+ )
224
+ else:
225
+ method = "no_adapter_for_task"
226
+ logger.info(
227
+ "No linked adapter for matched task '%s' — refusing to guess.",
228
+ top_task.task.id,
229
+ )
230
+
195
231
  return MatchResult(
196
232
  selected_adapter=None,
197
233
  matched_task=top_task.task,
198
234
  similarity_score=top_task.score,
199
235
  all_task_matches=top_tasks,
200
- selection_method="no_adapter_for_task",
236
+ selection_method=method,
201
237
  )
202
238
 
203
239
 
@@ -26,6 +26,7 @@ def route(
26
26
  adapter_registry: AdapterRegistry,
27
27
  embedder: Embedder,
28
28
  top_k: int = 3,
29
+ available_runtimes: set[str] | None = None,
29
30
  ) -> tuple[RoutingTrace, MatchResult]:
30
31
  """Route a query string to the best matching adapter.
31
32
 
@@ -48,6 +49,12 @@ def route(
48
49
  ``Embedder`` instance (wraps fastembed singleton).
49
50
  top_k:
50
51
  Number of top task candidates to consider. Defaults to 3.
52
+ available_runtimes:
53
+ Optional set of runtime names loaded on the active backend. When set,
54
+ adapters whose ``effective_backend_name()`` is not in the set are
55
+ skipped, falling through to the next-best task. If no viable adapter
56
+ is found across all top-K tasks, the trace's ``selected_adapter_id`` is
57
+ ``None`` and ``selection_method`` is ``"no_adapter_on_active_backend"``.
51
58
 
52
59
  Returns
53
60
  -------
@@ -73,7 +80,7 @@ def route(
73
80
  query_embedding = embedder.embed(query)
74
81
  all_tasks = task_registry.get_all_tasks()
75
82
  ranked = top_k_tasks(query_embedding, all_tasks, k=top_k)
76
- result = select_adapter(ranked, adapter_registry)
83
+ result = select_adapter(ranked, adapter_registry, available_runtimes=available_runtimes)
77
84
 
78
85
  selected_id = result.selected_adapter.id if result.selected_adapter else None
79
86
 
@@ -22,6 +22,7 @@ The actual upstream HTTP is delegated to a *forwarder* object stored on
22
22
  from __future__ import annotations
23
23
 
24
24
  import logging
25
+ import time
25
26
  from typing import Any, AsyncIterator, Optional
26
27
 
27
28
  import httpx
@@ -39,6 +40,7 @@ logger = logging.getLogger(__name__)
39
40
 
40
41
  _ROUTE_HEADER = "X-Shiftgate-Route"
41
42
  _READ_TIMEOUT = 120.0
43
+ _RUNTIMES_TTL = 60.0 # seconds to cache the active backend's loaded-runtime list
42
44
 
43
45
 
44
46
  # ---------------------------------------------------------------------------
@@ -127,6 +129,8 @@ def create_app(
127
129
  app.state.backend_router = backend_router
128
130
  app.state.backend_choice = backend
129
131
  app.state.forwarder = forwarder or HttpxForwarder()
132
+ # (timestamp, runtimes set) cache so we don't ping the backend on every request.
133
+ app.state.runtimes_cache = None
130
134
 
131
135
  def _embedder():
132
136
  if app.state.embedder is None:
@@ -142,6 +146,22 @@ def create_app(
142
146
  router.select(app.state.backend_choice)
143
147
  return router.active_backend
144
148
 
149
+ def _available_runtimes() -> set[str] | None:
150
+ """Loaded runtimes on the active backend, cached with a 60s TTL.
151
+
152
+ Returns ``None`` when no backend is active (no filtering).
153
+ """
154
+ active = _active_backend()
155
+ if active is None:
156
+ return None
157
+ cache = app.state.runtimes_cache
158
+ now = time.monotonic()
159
+ if cache is not None and (now - cache[0]) < _RUNTIMES_TTL:
160
+ return cache[1]
161
+ runtimes = set(active.list_loaded_adapters())
162
+ app.state.runtimes_cache = (now, runtimes)
163
+ return runtimes
164
+
145
165
  # -- health -------------------------------------------------------------
146
166
  @app.get("/health")
147
167
  def health() -> dict[str, Any]:
@@ -198,6 +218,7 @@ def create_app(
198
218
  app.state.task_reg,
199
219
  app.state.adapter_reg,
200
220
  _embedder(),
221
+ available_runtimes=_available_runtimes(),
201
222
  )
202
223
  except ValueError as exc:
203
224
  # Embeddings not initialised, etc.
@@ -69,6 +69,8 @@ def show_routing_decision(
69
69
  adapter: AdapterEntry | None = None,
70
70
  task_name: str | None = None,
71
71
  backend_name: str | None = None,
72
+ loaded_runtimes: set[str] | None = None,
73
+ selection_method: str | None = None,
72
74
  ) -> None:
73
75
  """Print a Rich Panel describing a routing decision.
74
76
 
@@ -81,7 +83,13 @@ def show_routing_decision(
81
83
  task_name:
82
84
  Human-readable task cluster name (falls back to trace.matched_task_id).
83
85
  backend_name:
84
- Active backend name ('ollama', 'vllm', or None).
86
+ Active backend name ('ollama', 'vllm', 'cerebras', or None).
87
+ loaded_runtimes:
88
+ Optional set of runtime names loaded on the active backend (used to
89
+ explain a ``no_adapter_on_active_backend`` outcome).
90
+ selection_method:
91
+ The ``MatchResult.selection_method`` (e.g. ``"no_adapter_for_task"`` or
92
+ ``"no_adapter_on_active_backend"``) used to tailor the no-adapter help.
85
93
  """
86
94
  # When no adapter was selected the decision is unactionable — render red
87
95
  # regardless of how confident the task match was.
@@ -101,7 +109,25 @@ def show_routing_decision(
101
109
  task_text.append_text(_similarity_bar(trace.similarity_score))
102
110
  grid.add_row("Matched Task", task_text)
103
111
 
104
- if no_adapter:
112
+ if no_adapter and selection_method == "no_adapter_on_active_backend":
113
+ # Adapters are linked to this task but none are loaded on the active
114
+ # backend — a different, backend-specific message.
115
+ grid.add_row(
116
+ "Adapter",
117
+ Text(
118
+ f"No adapter loaded on backend '{backend_name or 'unknown'}'",
119
+ style="bold red",
120
+ ),
121
+ )
122
+ runtimes = sorted(loaded_runtimes) if loaded_runtimes else []
123
+ runtimes_label = ", ".join(runtimes) if runtimes else "(none)"
124
+ grid.add_row("Loaded runtimes", Text(runtimes_label, style="dim"))
125
+ suggestion = Text()
126
+ suggestion.append("Try ", style="dim")
127
+ suggestion.append("shiftgate adapter list", style="cyan")
128
+ suggestion.append(" to see what's registered.", style="dim")
129
+ grid.add_row("Suggestion", suggestion)
130
+ elif no_adapter:
105
131
  # Never silently substitute an adapter. Tell the user how to fix it.
106
132
  adapter_text = Text("No adapter available", style="bold red")
107
133
  grid.add_row("Adapter", adapter_text)
@@ -307,3 +307,91 @@ class TestRouteFunction:
307
307
  def test_hf_adapter_effective_name_falls_back_to_id(self):
308
308
  adapter = adapter_from_hf("org/sql-lora", adapter_id="sql-lora")
309
309
  assert adapter.effective_backend_name() == "sql-lora"
310
+
311
+
312
+ # ---------------------------------------------------------------------------
313
+ # Backend-aware adapter filtering
314
+ # ---------------------------------------------------------------------------
315
+
316
+ class TestBackendAwareFiltering:
317
+ def test_filter_excludes_unloaded_runtime(self, synthetic_tasks, adapter_reg):
318
+ """An adapter whose runtime is not loaded is skipped during selection."""
319
+ query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32) # → task_x → adapter-x
320
+ ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
321
+ # adapter-x is NOT in the loaded set; task_x has only adapter-x → skip it.
322
+ # No other top task has a loaded adapter either → None.
323
+ result = select_adapter(ranked, adapter_reg, available_runtimes={"something-else"})
324
+ assert result.selected_adapter is None
325
+ assert result.selection_method == "no_adapter_on_active_backend"
326
+
327
+ def test_fall_through_to_next_best_task(self, synthetic_tasks, adapter_reg):
328
+ """When top-1 task's adapter isn't loaded, selection falls through."""
329
+ query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32) # task_x is top
330
+ ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
331
+ # Only adapter-y (task_y) is loaded → fall through from task_x to task_y.
332
+ result = select_adapter(ranked, adapter_reg, available_runtimes={"adapter-y"})
333
+ assert result.selected_adapter is not None
334
+ assert result.selected_adapter.id == "adapter-y"
335
+ assert result.matched_task.id == "task_y"
336
+
337
+ def test_none_preserves_unfiltered_behavior(self, synthetic_tasks, adapter_reg):
338
+ """available_runtimes=None means no filtering (preview behaviour)."""
339
+ query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
340
+ ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
341
+ result = select_adapter(ranked, adapter_reg, available_runtimes=None)
342
+ assert result.selected_adapter.id == "adapter-x"
343
+ assert result.selection_method == "preferred"
344
+
345
+ def test_filter_respects_runtime_name(self, tmp_path):
346
+ """Filtering matches on effective_backend_name (runtime_name when set)."""
347
+ task = _make_task("task_x", [1, 0, 0], ["sql-lora"])
348
+ adapter = adapter_from_runtime("sql-lora-vllm", adapter_id="sql-lora")
349
+ reg = AdapterRegistry(adapters=[adapter], source_path=tmp_path / "a.json")
350
+ query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
351
+ ranked = top_k_tasks(query_emb, [task], k=1)
352
+
353
+ # The registry id "sql-lora" is loaded, but the runtime name is not →
354
+ # filtering uses the runtime name, so this is NOT viable.
355
+ miss = select_adapter(ranked, reg, available_runtimes={"sql-lora"})
356
+ assert miss.selected_adapter is None
357
+ assert miss.selection_method == "no_adapter_on_active_backend"
358
+
359
+ # The runtime name IS loaded → viable.
360
+ hit = select_adapter(ranked, reg, available_runtimes={"sql-lora-vllm"})
361
+ assert hit.selected_adapter is not None
362
+ assert hit.selected_adapter.id == "sql-lora"
363
+
364
+ def test_no_linked_adapter_still_reports_no_adapter_for_task(self, tmp_path):
365
+ """Filtering active but task has no linked adapter at all → for_task reason."""
366
+ task = _make_task("task_x", [1, 0, 0], adapter_ids=[])
367
+ reg = AdapterRegistry(adapters=[], source_path=tmp_path / "a.json")
368
+ query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
369
+ ranked = top_k_tasks(query_emb, [task], k=1)
370
+ result = select_adapter(ranked, reg, available_runtimes={"anything"})
371
+ assert result.selected_adapter is None
372
+ assert result.selection_method == "no_adapter_for_task"
373
+
374
+
375
+ class TestRouteFiltering:
376
+ def test_route_filters_by_available_runtimes(self, task_reg, adapter_reg):
377
+ """route() passes the filter through and sets the right method on a miss."""
378
+ from shiftgate.router.router import route
379
+
380
+ # python query → task_x → adapter-x. None of the registry's adapters
381
+ # are loaded on the active backend → no viable adapter anywhere.
382
+ trace, result = route(
383
+ "write python code", task_reg, adapter_reg, MockEmbedder(),
384
+ available_runtimes={"unrelated-runtime"},
385
+ )
386
+ assert result.selected_adapter is None
387
+ assert trace.selected_adapter_id is None
388
+ assert result.selection_method == "no_adapter_on_active_backend"
389
+
390
+ def test_route_none_runtimes_unfiltered(self, task_reg, adapter_reg):
391
+ from shiftgate.router.router import route
392
+
393
+ trace, result = route(
394
+ "write python code", task_reg, adapter_reg, MockEmbedder(),
395
+ available_runtimes=None,
396
+ )
397
+ assert trace.selected_adapter_id == "adapter-x"
@@ -96,6 +96,9 @@ def client(tmp_path, forwarder):
96
96
 
97
97
  router = BackendRouter()
98
98
  router.select("vllm") # force-select; no availability ping
99
+ # Report the adapters as "loaded" so backend-aware filtering keeps them.
100
+ # adapter-x's runtime name is "adapter-x-vllm"; adapter-y has no runtime name.
101
+ router._vllm.list_loaded_adapters = lambda: ["adapter-x-vllm", "adapter-y"]
99
102
 
100
103
  app = create_app(
101
104
  backend="vllm",
@@ -179,6 +182,7 @@ def test_no_adapter_returns_400(tmp_path, forwarder):
179
182
  adapter_reg = AdapterRegistry(adapters=[], source_path=tmp_path / "adapters.json")
180
183
  router = BackendRouter()
181
184
  router.select("vllm")
185
+ router._vllm.list_loaded_adapters = lambda: []
182
186
  app = create_app(
183
187
  backend="vllm",
184
188
  task_reg=task_reg,
File without changes
File without changes