shiftgate 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {shiftgate-0.1.4 → shiftgate-0.1.6}/PKG-INFO +9 -1
  2. {shiftgate-0.1.4 → shiftgate-0.1.6}/README.md +8 -0
  3. {shiftgate-0.1.4 → shiftgate-0.1.6}/pyproject.toml +1 -1
  4. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/__init__.py +1 -2
  5. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/cli.py +116 -4
  6. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/registry/schemas.py +10 -0
  7. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/runtime/backend.py +89 -2
  8. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/utils/display.py +147 -13
  9. shiftgate-0.1.6/tests/test_backend.py +137 -0
  10. {shiftgate-0.1.4 → shiftgate-0.1.6}/.gitignore +0 -0
  11. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/data/__init__.py +0 -0
  12. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/data/default_tasks.json +0 -0
  13. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/feedback/__init__.py +0 -0
  14. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/feedback/loop.py +0 -0
  15. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/registry/__init__.py +0 -0
  16. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/registry/adapter_registry.py +0 -0
  17. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/registry/task_registry.py +0 -0
  18. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/router/__init__.py +0 -0
  19. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/router/embedder.py +0 -0
  20. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/router/matcher.py +0 -0
  21. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/router/router.py +0 -0
  22. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/runtime/__init__.py +0 -0
  23. {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/utils/__init__.py +0 -0
  24. {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/__init__.py +0 -0
  25. {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/test_feedback.py +0 -0
  26. {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/test_packaging.py +0 -0
  27. {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/test_registry.py +0 -0
  28. {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/test_router.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: shiftgate
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
5
5
  Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
6
6
  Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
@@ -35,6 +35,8 @@ Description-Content-Type: text/markdown
35
35
 
36
36
  > **shiftgate is an intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.**
37
37
 
38
+ *Inspired by [LORAUTER](https://arxiv.org/abs/2601.21795) — Effective LoRA Adapter Routing using Task Representations (EPFL, 2026).*
39
+
38
40
  **Shiftgate is a routing layer. Users manage models and LoRA weights themselves.**
39
41
  shiftgate stores only adapter *metadata* — it never downloads, caches, or manages weights.
40
42
  Your inference backend (Ollama, vLLM) is responsible for loading the weights; shiftgate just tells it *which* adapter to use for each query.
@@ -306,6 +308,12 @@ shiftgate/
306
308
 
307
309
  ---
308
310
 
311
+ ## References
312
+
313
+ - [LORAUTER](https://arxiv.org/abs/2601.21795) — *Effective LoRA Adapter Routing using Task Representations* (Dhasade et al., EPFL, 2026). shiftgate's task-level semantic routing is inspired by this work; it is not a reimplementation of the paper's full algorithm.
314
+
315
+ ---
316
+
309
317
  ## License
310
318
 
311
319
  MIT. See [LICENSE](LICENSE).
@@ -2,6 +2,8 @@
2
2
 
3
3
  > **shiftgate is an intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.**
4
4
 
5
+ *Inspired by [LORAUTER](https://arxiv.org/abs/2601.21795) — Effective LoRA Adapter Routing using Task Representations (EPFL, 2026).*
6
+
5
7
  **Shiftgate is a routing layer. Users manage models and LoRA weights themselves.**
6
8
  shiftgate stores only adapter *metadata* — it never downloads, caches, or manages weights.
7
9
  Your inference backend (Ollama, vLLM) is responsible for loading the weights; shiftgate just tells it *which* adapter to use for each query.
@@ -273,6 +275,12 @@ shiftgate/
273
275
 
274
276
  ---
275
277
 
278
+ ## References
279
+
280
+ - [LORAUTER](https://arxiv.org/abs/2601.21795) — *Effective LoRA Adapter Routing using Task Representations* (Dhasade et al., EPFL, 2026). shiftgate's task-level semantic routing is inspired by this work; it is not a reimplementation of the paper's full algorithm.
281
+
282
+ ---
283
+
276
284
  ## License
277
285
 
278
286
  MIT. See [LICENSE](LICENSE).
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "shiftgate"
7
- version = "0.1.4"
7
+ version = "0.1.6"
8
8
  description = "Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,8 +1,7 @@
1
1
  """
2
2
  shiftgate — Intelligent LoRA adapter routing for local LLM inference.
3
3
 
4
- Automatically selects the right adapter for each task using semantic
5
- similarity, inspired by the LORAUTER paper (EPFL, 2026).
4
+ Automatically selects the right adapter for each task using semantic similarity.
6
5
  """
7
6
 
8
7
  __version__ = "0.1.0"
@@ -117,18 +117,52 @@ def _finish_adapter_add(adapter: AdapterEntry, task_reg, adapter_reg) -> None:
117
117
  )
118
118
 
119
119
 
120
+ def _verify_runtime_adapter(adapter: AdapterEntry, adapter_reg) -> None:
121
+ """Best-effort check that a runtime adapter is loaded in a live backend.
122
+
123
+ Updates ``adapter.verified`` (True / False / None) and re-saves the
124
+ registry. Never raises — verification is purely informational and must
125
+ not fail the ``adapter add`` command.
126
+ """
127
+ from shiftgate.runtime.backend import BackendRouter
128
+
129
+ try:
130
+ with console.status("[cyan]Verifying adapter against running backend…[/cyan]"):
131
+ router = BackendRouter()
132
+ is_loaded, backend_name = router.verify_adapter(adapter)
133
+ except Exception as exc: # pragma: no cover - defensive, should not happen
134
+ logger_msg = f"verification error: {exc}"
135
+ console.print(f" [dim]Backend: verification skipped ({logger_msg})[/dim]")
136
+ return
137
+
138
+ runtime = adapter.runtime_name or adapter.id
139
+
140
+ if backend_name is None:
141
+ adapter.verified = None
142
+ console.print(" [dim]Backend: not running (verification skipped)[/dim]")
143
+ elif is_loaded:
144
+ adapter.verified = True
145
+ console.print(f" [green]Backend: {backend_name} ✓ verified[/green]")
146
+ else:
147
+ adapter.verified = False
148
+ console.print(
149
+ f" [yellow]Backend: {backend_name} ⚠ runtime '{runtime}' not loaded "
150
+ "— did you pass --lora-modules?[/yellow]"
151
+ )
152
+
153
+ adapter_reg.save()
154
+
155
+
120
156
  # ---------------------------------------------------------------------------
121
157
  # shiftgate init
122
158
  # ---------------------------------------------------------------------------
123
159
 
124
160
  @app.command()
125
161
  def init() -> None:
126
- """Set up ~/.shiftgate/, compute task embeddings, and show a welcome message."""
162
+ """Set up ~/.shiftgate/ and compute task embeddings."""
127
163
  from shiftgate.registry.adapter_registry import AdapterRegistry
128
164
  from shiftgate.registry.task_registry import TaskRegistry
129
- from shiftgate.utils.display import show_task_table, show_welcome_banner
130
-
131
- show_welcome_banner()
165
+ from shiftgate.utils.display import show_task_table
132
166
 
133
167
  shiftgate_dir = Path.home() / ".shiftgate"
134
168
  shiftgate_dir.mkdir(parents=True, exist_ok=True)
@@ -291,6 +325,11 @@ def adapter_add(
291
325
  adapter_reg.add_adapter(adapter)
292
326
  _finish_adapter_add(adapter, task_reg, adapter_reg)
293
327
 
328
+ # For runtime-registered adapters, try to confirm the backend actually
329
+ # has it loaded. Purely informational — never fails the add command.
330
+ if adapter.runtime_name:
331
+ _verify_runtime_adapter(adapter, adapter_reg)
332
+
294
333
 
295
334
  @adapter_app.command("list")
296
335
  def adapter_list() -> None:
@@ -593,6 +632,79 @@ def status() -> None:
593
632
  )
594
633
 
595
634
 
635
+ # ---------------------------------------------------------------------------
636
+ # shiftgate doctor
637
+ # ---------------------------------------------------------------------------
638
+
639
+ @app.command()
640
+ def doctor() -> None:
641
+ """Run a full health check and print a diagnostic report.
642
+
643
+ Checks the embedder, the inference backend, every registered adapter's
644
+ runtime availability, task-embedding readiness, and flags any task
645
+ clusters that have no linked adapter. Run this whenever something feels off.
646
+ """
647
+ from shiftgate.runtime.backend import BackendRouter, effective_backend_name
648
+ from shiftgate.utils.display import show_doctor_report
649
+
650
+ task_reg, adapter_reg = _load_registries()
651
+
652
+ # --- 1. Embedder ---
653
+ embedder_ok = False
654
+ embedder_detail = ""
655
+ with console.status("[cyan]Checking embedder…[/cyan]"):
656
+ try:
657
+ vec = _get_embedder().embed("test")
658
+ embedder_ok = vec is not None and len(vec) > 0
659
+ embedder_detail = f"dim={len(vec)}" if embedder_ok else "empty embedding"
660
+ except Exception as exc:
661
+ embedder_detail = str(exc)
662
+
663
+ # --- 2. Backend ---
664
+ with console.status("[cyan]Probing backends…[/cyan]"):
665
+ router = BackendRouter()
666
+ backend_name = router.detect()
667
+ backend_url = router.active_backend_url
668
+ loaded_adapters: list[str] = []
669
+ if backend_name is not None and router._active is not None:
670
+ loaded_adapters = router._active.list_loaded_adapters()
671
+
672
+ # --- 3. Per-adapter runtime availability ---
673
+ adapter_rows = []
674
+ for a in adapter_reg.list_adapters():
675
+ effective = effective_backend_name(a)
676
+ if backend_name is None:
677
+ state = "unknown" # no backend to check against
678
+ elif effective in loaded_adapters:
679
+ state = "loaded"
680
+ else:
681
+ state = "missing"
682
+ adapter_rows.append(
683
+ {"id": a.id, "runtime": effective, "status": a.status, "state": state}
684
+ )
685
+
686
+ # --- 4 & 5. Task embedding readiness + unlinked clusters ---
687
+ all_tasks = task_reg.get_all_tasks()
688
+ n_with_embeddings = sum(1 for t in all_tasks if t.embedding_centroid is not None)
689
+ registered_ids = {a.id for a in adapter_reg.list_adapters()}
690
+ unlinked_tasks = [
691
+ t.id
692
+ for t in all_tasks
693
+ if not (set(t.preferred_adapters) | set(t.fallback_adapters)) & registered_ids
694
+ ]
695
+
696
+ show_doctor_report(
697
+ embedder_ok=embedder_ok,
698
+ embedder_detail=embedder_detail,
699
+ backend_name=backend_name,
700
+ backend_url=backend_url,
701
+ adapter_rows=adapter_rows,
702
+ n_tasks=len(all_tasks),
703
+ n_with_embeddings=n_with_embeddings,
704
+ unlinked_tasks=unlinked_tasks,
705
+ )
706
+
707
+
596
708
  # ---------------------------------------------------------------------------
597
709
  # shiftgate demo
598
710
  # ---------------------------------------------------------------------------
@@ -93,6 +93,16 @@ class AdapterEntry(BaseModel):
93
93
  ),
94
94
  )
95
95
 
96
+ # --- backend verification result ---
97
+ verified: bool | None = Field(
98
+ default=None,
99
+ description=(
100
+ "Result of the last backend verification: True = the adapter's "
101
+ "runtime name was found loaded in a running backend, False = it was "
102
+ "not found, None = no backend was reachable when last checked."
103
+ ),
104
+ )
105
+
96
106
  @model_validator(mode="after")
97
107
  def _at_least_one_source(self) -> "AdapterEntry":
98
108
  """Warn (not error) when no source field is set.
@@ -38,6 +38,21 @@ _CONNECT_TIMEOUT = 3.0
38
38
  _READ_TIMEOUT = 120.0
39
39
 
40
40
 
41
+ def effective_backend_name(adapter: AdapterEntry) -> str:
42
+ """Return the name the inference backend knows this adapter by.
43
+
44
+ When the adapter was registered with ``--runtime <name>`` the user has
45
+ explicitly told us the backend loaded it under that name (e.g. a vLLM
46
+ ``--lora-modules`` key or an Ollama Modelfile model name). In that case we
47
+ must send ``runtime_name`` — sending ``adapter.id`` would address a model
48
+ the backend has never heard of.
49
+
50
+ Priority: ``runtime_name`` (if set and non-empty) > ``id``.
51
+ """
52
+ runtime = (adapter.runtime_name or "").strip()
53
+ return runtime if runtime else adapter.id
54
+
55
+
41
56
  class BaseBackend(ABC):
42
57
  """Abstract base for inference backends."""
43
58
 
@@ -49,6 +64,14 @@ class BaseBackend(ABC):
49
64
  def generate(self, prompt: str, adapter: AdapterEntry) -> str:
50
65
  """Send ``prompt`` to the backend and return the generated text."""
51
66
 
67
+ @abstractmethod
68
+ def list_loaded_adapters(self) -> list[str]:
69
+ """Return the model/adapter names currently loaded in the backend.
70
+
71
+ Must use a short timeout and silently return ``[]`` if the backend is
72
+ unreachable — this method is only used for informational verification.
73
+ """
74
+
52
75
 
53
76
  # ---------------------------------------------------------------------------
54
77
  # Ollama
@@ -109,7 +132,9 @@ class OllamaBackend(BaseBackend):
109
132
  If True, Ollama streams response tokens. This client reads the
110
133
  full stream and returns the concatenated text.
111
134
  """
112
- model = model_name or adapter.id
135
+ # Explicit override wins; otherwise use the backend-effective name
136
+ # (runtime_name when set, else adapter.id).
137
+ model = model_name or effective_backend_name(adapter)
113
138
  payload = {"model": model, "prompt": prompt, "stream": stream}
114
139
 
115
140
  logger.debug("Ollama generate: model=%s", model)
@@ -126,6 +151,20 @@ class OllamaBackend(BaseBackend):
126
151
  data = r.json()
127
152
  return data.get("response", "")
128
153
 
154
+ def list_loaded_adapters(self) -> list[str]:
155
+ """Return the names of all models loaded in Ollama (``GET /api/tags``).
156
+
157
+ Silently returns ``[]`` if Ollama is unreachable.
158
+ """
159
+ try:
160
+ r = httpx.get(f"{self.base_url}/api/tags", timeout=_CONNECT_TIMEOUT)
161
+ r.raise_for_status()
162
+ models = r.json().get("models", [])
163
+ return [m["name"] for m in models if "name" in m]
164
+ except Exception as exc:
165
+ logger.debug("Ollama list_loaded_adapters failed: %s", exc)
166
+ return []
167
+
129
168
 
130
169
  # ---------------------------------------------------------------------------
131
170
  # vLLM
@@ -183,7 +222,9 @@ class VLLMBackend(BaseBackend):
183
222
  system_prompt:
184
223
  System message prepended before the user message.
185
224
  """
186
- model = lora_name or adapter.id
225
+ # Explicit override wins; otherwise use the backend-effective name
226
+ # (runtime_name when set, else adapter.id).
227
+ model = lora_name or effective_backend_name(adapter)
187
228
  payload = {
188
229
  "model": model,
189
230
  "messages": [
@@ -209,6 +250,21 @@ class VLLMBackend(BaseBackend):
209
250
  except (KeyError, IndexError) as exc:
210
251
  raise BackendError(f"Unexpected vLLM response format: {data}") from exc
211
252
 
253
+ def list_loaded_adapters(self) -> list[str]:
254
+ """Return all model/LoRA ids served by vLLM (``GET /v1/models``).
255
+
256
+ The ``data`` array lists the base model plus every ``--lora-modules``
257
+ key. Silently returns ``[]`` if vLLM is unreachable.
258
+ """
259
+ try:
260
+ r = httpx.get(f"{self.base_url}/v1/models", timeout=_CONNECT_TIMEOUT)
261
+ r.raise_for_status()
262
+ data = r.json().get("data", [])
263
+ return [m["id"] for m in data if "id" in m]
264
+ except Exception as exc:
265
+ logger.debug("vLLM list_loaded_adapters failed: %s", exc)
266
+ return []
267
+
212
268
 
213
269
  # ---------------------------------------------------------------------------
214
270
  # BackendRouter — auto-detects which backend is live
@@ -276,6 +332,37 @@ class BackendRouter:
276
332
  return "vllm"
277
333
  return None
278
334
 
335
+ @property
336
+ def active_backend_url(self) -> str | None:
337
+ """Return the base URL of the active backend, or None."""
338
+ if self._active is not None:
339
+ return self._active.base_url
340
+ return None
341
+
342
+ def verify_adapter(self, adapter: AdapterEntry) -> tuple[bool, str | None]:
343
+ """Check whether an adapter is actually loaded in the active backend.
344
+
345
+ Auto-detects a backend if one hasn't been probed yet.
346
+
347
+ Returns
348
+ -------
349
+ ``(is_loaded, backend_name)``
350
+ - ``(True, "<name>")`` — backend running and the adapter's
351
+ effective name is present in its loaded model list.
352
+ - ``(False, "<name>")`` — backend running but the name is absent.
353
+ - ``(False, None)`` — no backend reachable (verification skipped).
354
+
355
+ Never raises: HTTP failures degrade to ``(False, None)``.
356
+ """
357
+ if self._active is None:
358
+ self.detect()
359
+ if self._active is None:
360
+ return (False, None)
361
+
362
+ target = effective_backend_name(adapter)
363
+ loaded = self._active.list_loaded_adapters()
364
+ return (target in loaded, self.active_backend_name)
365
+
279
366
 
280
367
  # ---------------------------------------------------------------------------
281
368
  # Exceptions
@@ -333,19 +333,8 @@ def animate_swap(from_adapter: str, to_adapter: str, duration: float = 1.5) -> N
333
333
  # ---------------------------------------------------------------------------
334
334
 
335
335
  def show_welcome_banner() -> None:
336
- """Print the shiftgate welcome banner shown during `shiftgate init`."""
337
- banner = Text(justify="center")
338
- banner.append("\n ⚡ shiftgate ", style="bold cyan")
339
- banner.append("v0.1\n", style="dim")
340
- banner.append(" Intelligent LoRA routing for local LLM inference\n", style="italic white")
341
- banner.append(" Inspired by LORAUTER · EPFL 2026\n\n", style="dim")
342
-
343
- panel = Panel(
344
- Align.center(banner),
345
- border_style="cyan",
346
- expand=False,
347
- )
348
- console.print(Align.center(panel))
336
+ """Print a one-line banner (``shiftgate demo`` only)."""
337
+ console.print("\n[bold cyan]⚡ shiftgate[/bold cyan]\n")
349
338
 
350
339
 
351
340
  # ---------------------------------------------------------------------------
@@ -411,3 +400,148 @@ def show_status(
411
400
  grid.add_row("Embeddings", Text(emb_label, style=emb_style))
412
401
 
413
402
  console.print(Panel(grid, title="shiftgate status", border_style="cyan", expand=False))
403
+
404
+
405
+ # ---------------------------------------------------------------------------
406
+ # Doctor report
407
+ # ---------------------------------------------------------------------------
408
+
409
+ def show_doctor_report(
410
+ *,
411
+ embedder_ok: bool,
412
+ embedder_detail: str,
413
+ backend_name: str | None,
414
+ backend_url: str | None,
415
+ adapter_rows: list[dict],
416
+ n_tasks: int,
417
+ n_with_embeddings: int,
418
+ unlinked_tasks: list[str],
419
+ ) -> None:
420
+ """Render the full ``shiftgate doctor`` health report.
421
+
422
+ Parameters mirror the checks performed in ``cli.doctor``. Each section is
423
+ a Rich panel/table; a final summary line tallies pass / warn / fail.
424
+ """
425
+ ok_mark = "[green]✓[/green]"
426
+ warn_mark = "[yellow]⚠[/yellow]"
427
+ fail_mark = "[red]✗[/red]"
428
+
429
+ warnings = 0
430
+ failures = 0
431
+
432
+ console.print()
433
+ console.rule("[bold cyan]shiftgate doctor[/bold cyan]")
434
+ console.print()
435
+
436
+ # --- Core checks grid ---
437
+ core = Table.grid(padding=(0, 2))
438
+ core.add_column(width=3)
439
+ core.add_column(style="bold", min_width=18)
440
+ core.add_column()
441
+
442
+ # Embedder
443
+ if embedder_ok:
444
+ core.add_row(ok_mark, "Embedder", Text(f"loaded ({embedder_detail})", style="green"))
445
+ else:
446
+ failures += 1
447
+ core.add_row(fail_mark, "Embedder", Text(f"failed: {embedder_detail}", style="red"))
448
+
449
+ # Backend
450
+ if backend_name:
451
+ core.add_row(
452
+ ok_mark,
453
+ "Backend",
454
+ Text(f"{backend_name} ({backend_url})", style="green"),
455
+ )
456
+ else:
457
+ warnings += 1
458
+ core.add_row(
459
+ warn_mark,
460
+ "Backend",
461
+ Text("none detected — start ollama serve or vLLM", style="yellow"),
462
+ )
463
+
464
+ # Task embeddings
465
+ if n_tasks > 0 and n_with_embeddings == n_tasks:
466
+ core.add_row(
467
+ ok_mark,
468
+ "Task embeddings",
469
+ Text(f"{n_with_embeddings}/{n_tasks} clusters ready", style="green"),
470
+ )
471
+ else:
472
+ warnings += 1
473
+ core.add_row(
474
+ warn_mark,
475
+ "Task embeddings",
476
+ Text(
477
+ f"{n_with_embeddings}/{n_tasks} computed — run `shiftgate init`",
478
+ style="yellow",
479
+ ),
480
+ )
481
+
482
+ console.print(Panel(core, title="Core", border_style="cyan", expand=False))
483
+ console.print()
484
+
485
+ # --- Adapter availability table ---
486
+ if adapter_rows:
487
+ table = Table(
488
+ title="Adapter runtime availability",
489
+ box=box.ROUNDED,
490
+ header_style="bold cyan",
491
+ border_style="cyan",
492
+ )
493
+ table.add_column("Adapter ID", style="bold magenta")
494
+ table.add_column("Backend name")
495
+ table.add_column("Linked", justify="center")
496
+ table.add_column("Loaded", justify="center")
497
+
498
+ for row in adapter_rows:
499
+ linked = (
500
+ "[green]linked[/green]" if row["status"] == "linked"
501
+ else "[yellow]unassigned[/yellow]"
502
+ )
503
+ state = row["state"]
504
+ if state == "loaded":
505
+ loaded = f"{ok_mark} loaded"
506
+ elif state == "missing":
507
+ warnings += 1
508
+ loaded = f"{warn_mark} not loaded"
509
+ else: # unknown — no backend
510
+ loaded = "[dim]— (no backend)[/dim]"
511
+ table.add_row(row["id"], row["runtime"], linked, loaded)
512
+
513
+ console.print(table)
514
+ else:
515
+ console.print("[dim]No adapters registered. Add one with `shiftgate adapter add`.[/dim]")
516
+ console.print()
517
+
518
+ # --- Unlinked task clusters warning ---
519
+ if unlinked_tasks:
520
+ warnings += 1
521
+ console.print(
522
+ Panel(
523
+ Text(
524
+ "These task clusters have no linked adapter and will return "
525
+ "'No adapter available' if matched:\n "
526
+ + ", ".join(unlinked_tasks),
527
+ style="yellow",
528
+ ),
529
+ title=f"{warn_mark} Unlinked task clusters ({len(unlinked_tasks)})",
530
+ border_style="yellow",
531
+ expand=False,
532
+ )
533
+ )
534
+ console.print()
535
+
536
+ # --- Summary line ---
537
+ if failures:
538
+ summary = f"[bold red]{failures} failed[/bold red]"
539
+ if warnings:
540
+ summary += f", [yellow]{warnings} warning(s)[/yellow]"
541
+ elif warnings:
542
+ summary = f"[bold yellow]{warnings} warning(s)[/bold yellow] — shiftgate is usable but check above"
543
+ else:
544
+ summary = "[bold green]All checks passed — shiftgate is healthy.[/bold green]"
545
+
546
+ console.print(f" {summary}")
547
+ console.print()
@@ -0,0 +1,137 @@
1
+ """
2
+ Tests for the inference backend layer.
3
+
4
+ Focus areas:
5
+ - effective_backend_name() name resolution (runtime_name vs id)
6
+ - BackendRouter.verify_adapter() against a stubbed loaded-adapter list
7
+ No real HTTP calls are made.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import pytest
13
+
14
+ from shiftgate.registry.schemas import AdapterEntry
15
+ from shiftgate.runtime.backend import (
16
+ BackendRouter,
17
+ VLLMBackend,
18
+ effective_backend_name,
19
+ )
20
+
21
+
22
+ def _adapter(**kwargs) -> AdapterEntry:
23
+ base = dict(id="sql-lora", name="SQL", base_model="llama3")
24
+ base.update(kwargs)
25
+ return AdapterEntry(**base)
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # effective_backend_name()
30
+ # ---------------------------------------------------------------------------
31
+
32
+ class TestEffectiveBackendName:
33
+ def test_uses_runtime_name_when_set(self):
34
+ adapter = _adapter(runtime_name="sql-lora-vllm")
35
+ assert effective_backend_name(adapter) == "sql-lora-vllm"
36
+
37
+ def test_falls_back_to_id_when_runtime_name_none(self):
38
+ adapter = _adapter(runtime_name=None, hf_repo="org/sql-lora")
39
+ assert effective_backend_name(adapter) == "sql-lora"
40
+
41
+ def test_falls_back_to_id_when_runtime_name_blank(self):
42
+ # An empty / whitespace runtime_name must not be used.
43
+ adapter = _adapter(runtime_name=" ", hf_repo="org/sql-lora")
44
+ assert effective_backend_name(adapter) == "sql-lora"
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # generate() uses the effective name
49
+ # ---------------------------------------------------------------------------
50
+
51
+ class TestGenerateUsesEffectiveName:
52
+ def test_vllm_generate_sends_runtime_name(self, monkeypatch):
53
+ captured = {}
54
+
55
+ class _Resp:
56
+ def raise_for_status(self):
57
+ pass
58
+
59
+ def json(self):
60
+ return {"choices": [{"message": {"content": "ok"}}]}
61
+
62
+ def fake_post(url, json, timeout):
63
+ captured["model"] = json["model"]
64
+ return _Resp()
65
+
66
+ monkeypatch.setattr("shiftgate.runtime.backend.httpx.post", fake_post)
67
+
68
+ backend = VLLMBackend()
69
+ adapter = _adapter(runtime_name="sql-lora-vllm")
70
+ backend.generate("hello", adapter)
71
+ assert captured["model"] == "sql-lora-vllm"
72
+
73
+ def test_vllm_generate_explicit_override_wins(self, monkeypatch):
74
+ captured = {}
75
+
76
+ class _Resp:
77
+ def raise_for_status(self):
78
+ pass
79
+
80
+ def json(self):
81
+ return {"choices": [{"message": {"content": "ok"}}]}
82
+
83
+ def fake_post(url, json, timeout):
84
+ captured["model"] = json["model"]
85
+ return _Resp()
86
+
87
+ monkeypatch.setattr("shiftgate.runtime.backend.httpx.post", fake_post)
88
+
89
+ backend = VLLMBackend()
90
+ adapter = _adapter(runtime_name="sql-lora-vllm")
91
+ # Explicit lora_name must win over runtime_name.
92
+ backend.generate("hello", adapter, lora_name="override-name")
93
+ assert captured["model"] == "override-name"
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # BackendRouter.verify_adapter()
98
+ # ---------------------------------------------------------------------------
99
+
100
+ class TestVerifyAdapter:
101
+ def test_no_backend_returns_false_none(self, monkeypatch):
102
+ router = BackendRouter()
103
+ monkeypatch.setattr(router, "detect", lambda: None)
104
+ # Active stays None → verification skipped.
105
+ is_loaded, backend_name = router.verify_adapter(_adapter(runtime_name="x"))
106
+ assert is_loaded is False
107
+ assert backend_name is None
108
+
109
+ def test_loaded_returns_true(self, monkeypatch):
110
+ router = BackendRouter()
111
+ # Force vLLM active and stub its loaded list.
112
+ router._active = router._vllm
113
+ monkeypatch.setattr(router._vllm, "list_loaded_adapters", lambda: ["base", "sql-lora-vllm"])
114
+ is_loaded, backend_name = router.verify_adapter(_adapter(runtime_name="sql-lora-vllm"))
115
+ assert is_loaded is True
116
+ assert backend_name == "vllm"
117
+
118
+ def test_not_loaded_returns_false_with_backend(self, monkeypatch):
119
+ router = BackendRouter()
120
+ router._active = router._vllm
121
+ monkeypatch.setattr(router._vllm, "list_loaded_adapters", lambda: ["base"])
122
+ is_loaded, backend_name = router.verify_adapter(_adapter(runtime_name="sql-lora-vllm"))
123
+ assert is_loaded is False
124
+ assert backend_name == "vllm"
125
+
126
+
127
+ # ---------------------------------------------------------------------------
128
+ # list_loaded_adapters() degrades gracefully when offline
129
+ # ---------------------------------------------------------------------------
130
+
131
+ class TestListLoadedAdaptersOffline:
132
+ def test_vllm_returns_empty_on_connection_error(self, monkeypatch):
133
+ def boom(*a, **k):
134
+ raise OSError("connection refused")
135
+
136
+ monkeypatch.setattr("shiftgate.runtime.backend.httpx.get", boom)
137
+ assert VLLMBackend().list_loaded_adapters() == []
File without changes
File without changes