shiftgate 0.1.8__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {shiftgate-0.1.8 → shiftgate-0.2.0}/PKG-INFO +77 -2
  2. {shiftgate-0.1.8 → shiftgate-0.2.0}/README.md +73 -1
  3. {shiftgate-0.1.8 → shiftgate-0.2.0}/pyproject.toml +4 -1
  4. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/cli.py +113 -5
  5. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/router/matcher.py +64 -28
  6. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/router/router.py +8 -1
  7. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/runtime/backend.py +168 -8
  8. shiftgate-0.2.0/shiftgate/serve/__init__.py +11 -0
  9. shiftgate-0.2.0/shiftgate/serve/app.py +271 -0
  10. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/utils/display.py +28 -2
  11. {shiftgate-0.1.8 → shiftgate-0.2.0}/tests/test_backend.py +95 -0
  12. {shiftgate-0.1.8 → shiftgate-0.2.0}/tests/test_packaging.py +2 -0
  13. {shiftgate-0.1.8 → shiftgate-0.2.0}/tests/test_router.py +88 -0
  14. shiftgate-0.2.0/tests/test_serve.py +208 -0
  15. {shiftgate-0.1.8 → shiftgate-0.2.0}/.gitignore +0 -0
  16. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/__init__.py +0 -0
  17. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/data/__init__.py +0 -0
  18. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/data/default_tasks.json +0 -0
  19. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/feedback/__init__.py +0 -0
  20. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/feedback/loop.py +0 -0
  21. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/registry/__init__.py +0 -0
  22. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/registry/adapter_registry.py +0 -0
  23. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/registry/schemas.py +0 -0
  24. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/registry/task_registry.py +0 -0
  25. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/router/__init__.py +0 -0
  26. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/router/embedder.py +0 -0
  27. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/runtime/__init__.py +0 -0
  28. {shiftgate-0.1.8 → shiftgate-0.2.0}/shiftgate/utils/__init__.py +0 -0
  29. {shiftgate-0.1.8 → shiftgate-0.2.0}/tests/__init__.py +0 -0
  30. {shiftgate-0.1.8 → shiftgate-0.2.0}/tests/test_feedback.py +0 -0
  31. {shiftgate-0.1.8 → shiftgate-0.2.0}/tests/test_registry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: shiftgate
3
- Version: 0.1.8
3
+ Version: 0.2.0
4
4
  Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
5
5
  Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
6
6
  Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
@@ -18,6 +18,7 @@ Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.10
21
+ Requires-Dist: fastapi>=0.110.0
21
22
  Requires-Dist: fastembed>=0.3.0
22
23
  Requires-Dist: httpx>=0.27.0
23
24
  Requires-Dist: huggingface-hub>=0.22.0
@@ -25,7 +26,9 @@ Requires-Dist: numpy>=1.26.0
25
26
  Requires-Dist: pydantic>=2.6.0
26
27
  Requires-Dist: rich>=13.7.0
27
28
  Requires-Dist: scikit-learn>=1.4.0
29
+ Requires-Dist: sse-starlette>=2.1.0
28
30
  Requires-Dist: typer>=0.12.0
31
+ Requires-Dist: uvicorn[standard]>=0.29.0
29
32
  Provides-Extra: dev
30
33
  Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
31
34
  Requires-Dist: pytest>=8.0.0; extra == 'dev'
@@ -109,7 +112,60 @@ shiftgate route "write a python sorting function"
109
112
  shiftgate run "write a python sorting function"
110
113
  ```
111
114
 
112
- **Essential commands:** `init` · `adapter add` · `route` · `run` · `doctor`
115
+ **Essential commands:** `init` · `adapter add` · `route` · `run` · `doctor` · `serve`
116
+
117
+ ---
118
+
119
+ ## Use as an OpenAI-compatible proxy
120
+
121
+ `shiftgate serve` exposes the router as a drop-in OpenAI endpoint. Any client that speaks OpenAI can point at it and get auto-routing for free — just pass `model="auto"`.
122
+
123
+ ```bash
124
+ # Start the proxy (defaults to http://127.0.0.1:9000)
125
+ shiftgate serve
126
+ ```
127
+
128
+ ```python
129
+ # Use it from any OpenAI client
130
+ from openai import OpenAI
131
+
132
+ client = OpenAI(base_url="http://localhost:9000/v1", api_key="not-needed")
133
+ client.chat.completions.create(
134
+ model="auto", # ← shiftgate picks the right adapter
135
+ messages=[{"role": "user", "content": "write a sql query"}],
136
+ )
137
+ ```
138
+
139
+ When `model="auto"`, shiftgate routes the request to the best adapter and rewrites `model` to that adapter's backend name before forwarding upstream. The response carries an `X-Shiftgate-Route: <adapter_id> (<score>)` header so you can see what was chosen. Passing any other model id bypasses routing and forwards verbatim. Streaming (`stream: true`) is piped straight through via SSE.
140
+
141
+ ```bash
142
+ shiftgate serve --port 9000 --host 127.0.0.1 --backend auto # backend: auto | ollama | vllm | cerebras
143
+ ```
144
+
145
+ > Bind defaults to `127.0.0.1` (localhost only). Pass `--host 0.0.0.0` to expose it on your network.
146
+
147
+ ### Drop-in for Cursor / Aider / LangChain
148
+
149
+ Point each tool's OpenAI base URL at the proxy and use `model="auto"`:
150
+
151
+ ```bash
152
+ # Cursor → Settings → Models → Override OpenAI Base URL
153
+ http://localhost:9000/v1
154
+
155
+ # Aider
156
+ aider --openai-api-base http://localhost:9000/v1 --openai-api-key not-needed --model auto
157
+ ```
158
+
159
+ ```python
160
+ # LangChain
161
+ from langchain_openai import ChatOpenAI
162
+
163
+ llm = ChatOpenAI(
164
+ base_url="http://localhost:9000/v1",
165
+ api_key="not-needed",
166
+ model="auto",
167
+ )
168
+ ```
113
169
 
114
170
  ---
115
171
 
@@ -220,6 +276,10 @@ User query
220
276
  └────────────────────────────────┘
221
277
  ```
222
278
 
279
+ ### How routing works
280
+
281
+ When a backend is active, shiftgate filters candidate adapters to only those actually loaded on that backend. Switch from vLLM to Cerebras and shiftgate automatically picks Cerebras-compatible adapters — no re-registration needed. (When you run `shiftgate route` with no backend running, no filtering is applied, so you still see the full routing preview.)
282
+
223
283
  ---
224
284
 
225
285
  ## Bring Your Own Models
@@ -286,6 +346,20 @@ shiftgate adapter add sql-lora --local /models/sql-lora --tags sql --base llama3
286
346
 
287
347
  Useful for exploring routing decisions before your backend is set up. To run inference, load the adapter in vLLM or Ollama and re-register with `--runtime`.
288
348
 
349
+ ### Option 4 — Cerebras (cloud)
350
+
351
+ shiftgate also supports [Cerebras](https://cerebras.ai/) as a cloud fallback. It uses Cerebras' OpenAI-compatible API and authenticates with a bearer token from the `CEREBRAS_API_KEY` environment variable (or the `--cerebras-key` global flag).
352
+
353
+ ```bash
354
+ export CEREBRAS_API_KEY=csk-...
355
+ shiftgate adapter add llama3.1-8b --runtime llama3.1-8b --tags general --base llama3.1
356
+ shiftgate run "write a python sorting function"
357
+ ```
358
+
359
+ shiftgate auto-detects backends in the order **Ollama → vLLM → Cerebras**, so local backends always win and Cerebras is used only when no local backend is running.
360
+
361
+ > **Honest status:** shiftgate routes to Cerebras' base-model inference today. When Cerebras Multi-LoRA goes public, register your adapter with `--runtime <cerebras-lora-id>` and it just works — no shiftgate update needed.
362
+
289
363
  ---
290
364
 
291
365
  ## How to contribute adapters
@@ -393,6 +467,7 @@ shiftgate/
393
467
  | `shiftgate route "<query>"` | Route a query and show the decision — no inference |
394
468
  | `shiftgate route "<query>" --explain` | Full decision tree: task scores, candidates, selection reason |
395
469
  | `shiftgate run "<query>"` | Route + run via Ollama or vLLM |
470
+ | `shiftgate serve [--port 9000] [--host …] [--backend …]` | Run an OpenAI-compatible auto-routing proxy |
396
471
  | `shiftgate doctor` | Full health check: embedder, backend, adapters, task embeddings |
397
472
  | `shiftgate adapter add <hf_repo> [--tags …] [--base …]` | Register adapter from HuggingFace (metadata only) |
398
473
  | `shiftgate adapter add <id> --local <path> [--tags …]` | Register a local adapter path |
@@ -76,7 +76,60 @@ shiftgate route "write a python sorting function"
76
76
  shiftgate run "write a python sorting function"
77
77
  ```
78
78
 
79
- **Essential commands:** `init` · `adapter add` · `route` · `run` · `doctor`
79
+ **Essential commands:** `init` · `adapter add` · `route` · `run` · `doctor` · `serve`
80
+
81
+ ---
82
+
83
+ ## Use as an OpenAI-compatible proxy
84
+
85
+ `shiftgate serve` exposes the router as a drop-in OpenAI endpoint. Any client that speaks OpenAI can point at it and get auto-routing for free — just pass `model="auto"`.
86
+
87
+ ```bash
88
+ # Start the proxy (defaults to http://127.0.0.1:9000)
89
+ shiftgate serve
90
+ ```
91
+
92
+ ```python
93
+ # Use it from any OpenAI client
94
+ from openai import OpenAI
95
+
96
+ client = OpenAI(base_url="http://localhost:9000/v1", api_key="not-needed")
97
+ client.chat.completions.create(
98
+ model="auto", # ← shiftgate picks the right adapter
99
+ messages=[{"role": "user", "content": "write a sql query"}],
100
+ )
101
+ ```
102
+
103
+ When `model="auto"`, shiftgate routes the request to the best adapter and rewrites `model` to that adapter's backend name before forwarding upstream. The response carries an `X-Shiftgate-Route: <adapter_id> (<score>)` header so you can see what was chosen. Passing any other model id bypasses routing and forwards verbatim. Streaming (`stream: true`) is piped straight through via SSE.
104
+
105
+ ```bash
106
+ shiftgate serve --port 9000 --host 127.0.0.1 --backend auto # backend: auto | ollama | vllm | cerebras
107
+ ```
108
+
109
+ > Bind defaults to `127.0.0.1` (localhost only). Pass `--host 0.0.0.0` to expose it on your network.
110
+
111
+ ### Drop-in for Cursor / Aider / LangChain
112
+
113
+ Point each tool's OpenAI base URL at the proxy and use `model="auto"`:
114
+
115
+ ```bash
116
+ # Cursor → Settings → Models → Override OpenAI Base URL
117
+ http://localhost:9000/v1
118
+
119
+ # Aider
120
+ aider --openai-api-base http://localhost:9000/v1 --openai-api-key not-needed --model auto
121
+ ```
122
+
123
+ ```python
124
+ # LangChain
125
+ from langchain_openai import ChatOpenAI
126
+
127
+ llm = ChatOpenAI(
128
+ base_url="http://localhost:9000/v1",
129
+ api_key="not-needed",
130
+ model="auto",
131
+ )
132
+ ```
80
133
 
81
134
  ---
82
135
 
@@ -187,6 +240,10 @@ User query
187
240
  └────────────────────────────────┘
188
241
  ```
189
242
 
243
+ ### How routing works
244
+
245
+ When a backend is active, shiftgate filters candidate adapters to only those actually loaded on that backend. Switch from vLLM to Cerebras and shiftgate automatically picks Cerebras-compatible adapters — no re-registration needed. (When you run `shiftgate route` with no backend running, no filtering is applied, so you still see the full routing preview.)
246
+
190
247
  ---
191
248
 
192
249
  ## Bring Your Own Models
@@ -253,6 +310,20 @@ shiftgate adapter add sql-lora --local /models/sql-lora --tags sql --base llama3
253
310
 
254
311
  Useful for exploring routing decisions before your backend is set up. To run inference, load the adapter in vLLM or Ollama and re-register with `--runtime`.
255
312
 
313
+ ### Option 4 — Cerebras (cloud)
314
+
315
+ shiftgate also supports [Cerebras](https://cerebras.ai/) as a cloud fallback. It uses Cerebras' OpenAI-compatible API and authenticates with a bearer token from the `CEREBRAS_API_KEY` environment variable (or the `--cerebras-key` global flag).
316
+
317
+ ```bash
318
+ export CEREBRAS_API_KEY=csk-...
319
+ shiftgate adapter add llama3.1-8b --runtime llama3.1-8b --tags general --base llama3.1
320
+ shiftgate run "write a python sorting function"
321
+ ```
322
+
323
+ shiftgate auto-detects backends in the order **Ollama → vLLM → Cerebras**, so local backends always win and Cerebras is used only when no local backend is running.
324
+
325
+ > **Honest status:** shiftgate routes to Cerebras' base-model inference today. When Cerebras Multi-LoRA goes public, register your adapter with `--runtime <cerebras-lora-id>` and it just works — no shiftgate update needed.
326
+
256
327
  ---
257
328
 
258
329
  ## How to contribute adapters
@@ -360,6 +431,7 @@ shiftgate/
360
431
  | `shiftgate route "<query>"` | Route a query and show the decision — no inference |
361
432
  | `shiftgate route "<query>" --explain` | Full decision tree: task scores, candidates, selection reason |
362
433
  | `shiftgate run "<query>"` | Route + run via Ollama or vLLM |
434
+ | `shiftgate serve [--port 9000] [--host …] [--backend …]` | Run an OpenAI-compatible auto-routing proxy |
363
435
  | `shiftgate doctor` | Full health check: embedder, backend, adapters, task embeddings |
364
436
  | `shiftgate adapter add <hf_repo> [--tags …] [--base …]` | Register adapter from HuggingFace (metadata only) |
365
437
  | `shiftgate adapter add <id> --local <path> [--tags …]` | Register a local adapter path |
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "shiftgate"
7
- version = "0.1.8"
7
+ version = "0.2.0"
8
8
  description = "Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -31,6 +31,9 @@ dependencies = [
31
31
  "scikit-learn>=1.4.0",
32
32
  "httpx>=0.27.0",
33
33
  "huggingface-hub>=0.22.0",
34
+ "fastapi>=0.110.0",
35
+ "uvicorn[standard]>=0.29.0",
36
+ "sse-starlette>=2.1.0",
34
37
  ]
35
38
 
36
39
  [project.optional-dependencies]
@@ -17,6 +17,7 @@ from typing import Annotated, Optional
17
17
 
18
18
  import typer
19
19
  from rich.console import Console
20
+ from rich.panel import Panel
20
21
  from rich.prompt import Confirm, Prompt
21
22
 
22
23
  from shiftgate.registry.schemas import AdapterEntry, TaskCluster
@@ -39,6 +40,26 @@ app.add_typer(task_app, name="task")
39
40
  app.add_typer(feedback_app, name="feedback")
40
41
 
41
42
 
43
+ @app.callback()
44
+ def _main(
45
+ cerebras_key: Annotated[
46
+ Optional[str],
47
+ typer.Option(
48
+ "--cerebras-key",
49
+ help=(
50
+ "Cerebras API key. If passed, sets CEREBRAS_API_KEY for this run "
51
+ "so the Cerebras cloud backend becomes available."
52
+ ),
53
+ ),
54
+ ] = None,
55
+ ) -> None:
56
+ """Global options applied before any command runs."""
57
+ if cerebras_key:
58
+ import os
59
+
60
+ os.environ["CEREBRAS_API_KEY"] = cerebras_key
61
+
62
+
42
63
  # ---------------------------------------------------------------------------
43
64
  # Helpers
44
65
  # ---------------------------------------------------------------------------
@@ -62,6 +83,19 @@ def _get_embedder():
62
83
  return Embedder()
63
84
 
64
85
 
86
+ def _active_runtimes(backend_router) -> set[str] | None:
87
+ """Return the set of runtime names loaded on the active backend, or None.
88
+
89
+ ``None`` means no backend is active → the router should not filter
90
+ (preview behaviour). An empty set means a backend is active but reports no
91
+ loaded models.
92
+ """
93
+ active = backend_router.active_backend
94
+ if active is None:
95
+ return None
96
+ return set(active.list_loaded_adapters())
97
+
98
+
65
99
  def _auto_link_adapter(adapter: AdapterEntry, task_reg) -> list[str]:
66
100
  """Add ``adapter.id`` to the ``preferred_adapters`` of matching task clusters.
67
101
 
@@ -443,6 +477,7 @@ def route(
443
477
  """
444
478
  from shiftgate.feedback import loop as feedback_loop
445
479
  from shiftgate.router import router as routing
480
+ from shiftgate.runtime.backend import BackendRouter
446
481
  from shiftgate.utils.display import show_explain_decision, show_routing_decision
447
482
 
448
483
  task_reg, adapter_reg = _load_registries()
@@ -453,8 +488,15 @@ def route(
453
488
 
454
489
  embedder = _get_embedder()
455
490
 
491
+ backend_router = BackendRouter()
492
+ backend_name = backend_router.detect()
493
+ available_runtimes = _active_runtimes(backend_router)
494
+
456
495
  try:
457
- trace, match_result = routing.route(query, task_reg, adapter_reg, embedder, top_k=top_k)
496
+ trace, match_result = routing.route(
497
+ query, task_reg, adapter_reg, embedder,
498
+ top_k=top_k, available_runtimes=available_runtimes,
499
+ )
458
500
  except Exception as exc:
459
501
  console.print(f"[red]Routing error:[/red] {exc}")
460
502
  raise typer.Exit(1)
@@ -466,7 +508,9 @@ def route(
466
508
  trace,
467
509
  adapter=adapter,
468
510
  task_name=task.name if task else None,
469
- backend_name=None,
511
+ backend_name=backend_name,
512
+ loaded_runtimes=available_runtimes,
513
+ selection_method=match_result.selection_method,
470
514
  )
471
515
 
472
516
  if explain:
@@ -503,22 +547,29 @@ def run(
503
547
 
504
548
  embedder = _get_embedder()
505
549
 
550
+ backend_router = BackendRouter()
551
+ backend_name = backend_router.detect()
552
+ available_runtimes = _active_runtimes(backend_router)
553
+
506
554
  try:
507
- trace, match_result = routing.route(query, task_reg, adapter_reg, embedder, top_k=top_k)
555
+ trace, match_result = routing.route(
556
+ query, task_reg, adapter_reg, embedder,
557
+ top_k=top_k, available_runtimes=available_runtimes,
558
+ )
508
559
  except Exception as exc:
509
560
  console.print(f"[red]Routing error:[/red] {exc}")
510
561
  raise typer.Exit(1)
511
562
 
512
563
  adapter = adapter_reg.get_adapter(trace.selected_adapter_id)
513
564
  task = task_reg.get_task(trace.matched_task_id)
514
- backend_router = BackendRouter()
515
- backend_name = backend_router.detect()
516
565
 
517
566
  show_routing_decision(
518
567
  trace,
519
568
  adapter=adapter,
520
569
  task_name=task.name if task else None,
521
570
  backend_name=backend_name,
571
+ loaded_runtimes=available_runtimes,
572
+ selection_method=match_result.selection_method,
522
573
  )
523
574
 
524
575
  if adapter is None:
@@ -705,6 +756,63 @@ def doctor() -> None:
705
756
  )
706
757
 
707
758
 
759
+ # ---------------------------------------------------------------------------
760
+ # shiftgate serve
761
+ # ---------------------------------------------------------------------------
762
+
763
+ @app.command()
764
+ def serve(
765
+ port: Annotated[int, typer.Option("--port", help="Port to listen on.")] = 9000,
766
+ host: Annotated[
767
+ str,
768
+ typer.Option(
769
+ "--host",
770
+ help="Host to bind. Use 0.0.0.0 to expose on the network (default: localhost only).",
771
+ ),
772
+ ] = "127.0.0.1",
773
+ backend: Annotated[
774
+ str,
775
+ typer.Option(
776
+ "--backend",
777
+ help="Backend to forward to: auto | ollama | vllm | cerebras.",
778
+ ),
779
+ ] = "auto",
780
+ ) -> None:
781
+ """Run an OpenAI-compatible proxy that auto-routes `model="auto"` requests.
782
+
783
+ Point any OpenAI client (Cursor, Aider, LangChain, the OpenAI SDK) at this
784
+ URL and pass ``model="auto"`` to get shiftgate routing for free.
785
+ """
786
+ import uvicorn
787
+
788
+ from shiftgate.serve import create_app
789
+
790
+ try:
791
+ app_instance = create_app(backend=backend)
792
+ except Exception as exc:
793
+ console.print(f"[red]Failed to start serve:[/red] {exc}")
794
+ raise typer.Exit(1)
795
+
796
+ backend_router = app_instance.state.backend_router
797
+ backend_name = backend_router.active_backend_name or "none detected"
798
+
799
+ console.print(
800
+ Panel(
801
+ f"[bold green]shiftgate serve[/bold green] listening on "
802
+ f"[cyan]http://{host}:{port}[/cyan]\n"
803
+ f"backend: [bold]{backend_name}[/bold]\n\n"
804
+ "Point your OpenAI client at this URL with "
805
+ "[bold magenta]model='auto'[/bold magenta]:\n"
806
+ f" [dim]base_url=\"http://{host}:{port}/v1\"[/dim]",
807
+ title="OpenAI-compatible proxy",
808
+ border_style="green",
809
+ expand=False,
810
+ )
811
+ )
812
+
813
+ uvicorn.run(app_instance, host=host, port=port, log_level="info")
814
+
815
+
708
816
  # ---------------------------------------------------------------------------
709
817
  # shiftgate demo
710
818
  # ---------------------------------------------------------------------------
@@ -117,6 +117,7 @@ def top_k_tasks(
117
117
  def select_adapter(
118
118
  top_tasks: list[TaskMatch],
119
119
  adapter_registry, # AdapterRegistry — avoid circular import with string hint
120
+ available_runtimes: set[str] | None = None,
120
121
  ) -> MatchResult:
121
122
  """Select the adapter linked to the best-matching task.
122
123
 
@@ -125,15 +126,23 @@ def select_adapter(
125
126
  For each top task (highest score first), walk ``preferred_adapters`` then
126
127
  ``fallback_adapters`` and collect the adapters that exist in the registry
127
128
  (populating ``TaskMatch.candidate_adapters`` for the ``--explain`` view).
128
- The first such adapter found, on the highest-scoring task, is selected.
129
+ The first viable adapter found, on the highest-scoring task, is selected.
130
+
131
+ Backend-aware filtering
132
+ -----------------------
133
+ When ``available_runtimes`` is provided (the set of model/adapter names
134
+ actually loaded on the active backend), only adapters whose
135
+ ``effective_backend_name()`` is in that set are considered viable. If a
136
+ task's entire candidate list is filtered out, selection falls through to
137
+ the next-best task. When ``available_runtimes`` is ``None`` no filtering
138
+ happens (the preview behaviour used by ``shiftgate route``).
129
139
 
130
140
  No silent fallback
131
141
  ------------------
132
142
  If the matched (top) task has **no** linked adapter in the registry, the
133
143
  router must NOT substitute an arbitrary adapter — doing so silently routes,
134
144
  e.g., a music query to a SQL adapter and destroys trust. Instead this
135
- returns a ``MatchResult`` with ``selected_adapter=None`` and
136
- ``selection_method="no_adapter_for_task"``.
145
+ returns a ``MatchResult`` with ``selected_adapter=None``.
137
146
 
138
147
  Parameters
139
148
  ----------
@@ -141,39 +150,56 @@ def select_adapter(
141
150
  Output of ``top_k_tasks`` (sorted by score descending).
142
151
  adapter_registry:
143
152
  ``AdapterRegistry`` instance to look up adapter IDs.
153
+ available_runtimes:
154
+ Optional set of runtime names loaded on the active backend. When set,
155
+ adapters not in the set are skipped during selection.
144
156
 
145
157
  Returns
146
158
  -------
147
- ``MatchResult``. ``selected_adapter`` is ``None`` when no adapter is
148
- linked to any of the ranked tasks. The ``matched_task`` is always the
149
- top-scoring task so callers can still report what was matched.
159
+ ``MatchResult``. ``selected_adapter`` is ``None`` when no viable adapter is
160
+ found. ``selection_method`` is ``"no_adapter_on_active_backend"`` when
161
+ linked adapters exist but none are loaded on the active backend, otherwise
162
+ ``"no_adapter_for_task"``. The ``matched_task`` is always the top-scoring
163
+ task so callers can still report what was matched.
150
164
  """
151
- # Populate candidate lists for every task (for the --explain view) and
152
- # find the first explicit match in score order.
165
+ def _is_viable(adapter) -> bool:
166
+ if available_runtimes is None:
167
+ return True
168
+ return adapter.effective_backend_name() in available_runtimes
169
+
153
170
  explicit_result: MatchResult | None = None
171
+ any_linked_adapter = False # any task had at least one registered adapter
154
172
 
155
173
  for tm in top_tasks:
156
174
  preferred_ids = list(tm.task.preferred_adapters)
157
175
  fallback_ids = list(tm.task.fallback_adapters)
158
176
 
177
+ # Populate candidate_adapters with every registered adapter (for the
178
+ # --explain view, showing all candidates regardless of runtime).
159
179
  for adapter_id in preferred_ids + fallback_ids:
160
180
  adapter = adapter_registry.get_adapter(adapter_id)
161
181
  if adapter is not None and adapter not in tm.candidate_adapters:
162
182
  tm.candidate_adapters.append(adapter)
163
183
 
164
- if explicit_result is None and tm.candidate_adapters:
165
- method = (
166
- "preferred"
167
- if tm.candidate_adapters[0].id in tm.task.preferred_adapters
168
- else "fallback"
169
- )
170
- explicit_result = MatchResult(
171
- selected_adapter=tm.candidate_adapters[0],
172
- matched_task=tm.task,
173
- similarity_score=tm.score,
174
- all_task_matches=top_tasks,
175
- selection_method=method,
176
- )
184
+ if tm.candidate_adapters:
185
+ any_linked_adapter = True
186
+
187
+ if explicit_result is None:
188
+ viable = [a for a in tm.candidate_adapters if _is_viable(a)]
189
+ if viable:
190
+ chosen = viable[0]
191
+ method = (
192
+ "preferred"
193
+ if chosen.id in tm.task.preferred_adapters
194
+ else "fallback"
195
+ )
196
+ explicit_result = MatchResult(
197
+ selected_adapter=chosen,
198
+ matched_task=tm.task,
199
+ similarity_score=tm.score,
200
+ all_task_matches=top_tasks,
201
+ selection_method=method,
202
+ )
177
203
 
178
204
  if explicit_result is not None:
179
205
  logger.debug(
@@ -185,19 +211,29 @@ def select_adapter(
185
211
  )
186
212
  return explicit_result
187
213
 
188
- # No adapter linked to any ranked task do NOT guess. Report the matched
189
- # task with no adapter so the caller can prompt the user to add one.
214
+ # No viable adapter across any ranked task. Distinguish "nothing linked at
215
+ # all" from "linked but not loaded on the active backend".
190
216
  top_task = top_tasks[0]
191
- logger.info(
192
- "No linked adapter for matched task '%s' — refusing to guess.",
193
- top_task.task.id,
194
- )
217
+ if available_runtimes is not None and any_linked_adapter:
218
+ method = "no_adapter_on_active_backend"
219
+ logger.info(
220
+ "Linked adapter(s) for task '%s' exist but none are loaded on the "
221
+ "active backend — refusing to guess.",
222
+ top_task.task.id,
223
+ )
224
+ else:
225
+ method = "no_adapter_for_task"
226
+ logger.info(
227
+ "No linked adapter for matched task '%s' — refusing to guess.",
228
+ top_task.task.id,
229
+ )
230
+
195
231
  return MatchResult(
196
232
  selected_adapter=None,
197
233
  matched_task=top_task.task,
198
234
  similarity_score=top_task.score,
199
235
  all_task_matches=top_tasks,
200
- selection_method="no_adapter_for_task",
236
+ selection_method=method,
201
237
  )
202
238
 
203
239
 
@@ -26,6 +26,7 @@ def route(
26
26
  adapter_registry: AdapterRegistry,
27
27
  embedder: Embedder,
28
28
  top_k: int = 3,
29
+ available_runtimes: set[str] | None = None,
29
30
  ) -> tuple[RoutingTrace, MatchResult]:
30
31
  """Route a query string to the best matching adapter.
31
32
 
@@ -48,6 +49,12 @@ def route(
48
49
  ``Embedder`` instance (wraps fastembed singleton).
49
50
  top_k:
50
51
  Number of top task candidates to consider. Defaults to 3.
52
+ available_runtimes:
53
+ Optional set of runtime names loaded on the active backend. When set,
54
+ adapters whose ``effective_backend_name()`` is not in the set are
55
+ skipped, falling through to the next-best task. If no viable adapter
56
+ is found across all top-K tasks, the trace's ``selected_adapter_id`` is
57
+ ``None`` and ``selection_method`` is ``"no_adapter_on_active_backend"``.
51
58
 
52
59
  Returns
53
60
  -------
@@ -73,7 +80,7 @@ def route(
73
80
  query_embedding = embedder.embed(query)
74
81
  all_tasks = task_registry.get_all_tasks()
75
82
  ranked = top_k_tasks(query_embedding, all_tasks, k=top_k)
76
- result = select_adapter(ranked, adapter_registry)
83
+ result = select_adapter(ranked, adapter_registry, available_runtimes=available_runtimes)
77
84
 
78
85
  selected_id = result.selected_adapter.id if result.selected_adapter else None
79
86