shiftgate 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {shiftgate-0.1.4 → shiftgate-0.1.6}/PKG-INFO +9 -1
- {shiftgate-0.1.4 → shiftgate-0.1.6}/README.md +8 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/pyproject.toml +1 -1
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/__init__.py +1 -2
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/cli.py +116 -4
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/registry/schemas.py +10 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/runtime/backend.py +89 -2
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/utils/display.py +147 -13
- shiftgate-0.1.6/tests/test_backend.py +137 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/.gitignore +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/data/__init__.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/data/default_tasks.json +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/feedback/__init__.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/feedback/loop.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/registry/__init__.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/registry/adapter_registry.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/registry/task_registry.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/router/__init__.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/router/embedder.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/router/matcher.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/router/router.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/runtime/__init__.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/shiftgate/utils/__init__.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/__init__.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/test_feedback.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/test_packaging.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/test_registry.py +0 -0
- {shiftgate-0.1.4 → shiftgate-0.1.6}/tests/test_router.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: shiftgate
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
|
|
5
5
|
Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
|
|
6
6
|
Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
|
|
@@ -35,6 +35,8 @@ Description-Content-Type: text/markdown
|
|
|
35
35
|
|
|
36
36
|
> **shiftgate is an intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.**
|
|
37
37
|
|
|
38
|
+
*Inspired by [LORAUTER](https://arxiv.org/abs/2601.21795) — Effective LoRA Adapter Routing using Task Representations (EPFL, 2026).*
|
|
39
|
+
|
|
38
40
|
**Shiftgate is a routing layer. Users manage models and LoRA weights themselves.**
|
|
39
41
|
shiftgate stores only adapter *metadata* — it never downloads, caches, or manages weights.
|
|
40
42
|
Your inference backend (Ollama, vLLM) is responsible for loading the weights; shiftgate just tells it *which* adapter to use for each query.
|
|
@@ -306,6 +308,12 @@ shiftgate/
|
|
|
306
308
|
|
|
307
309
|
---
|
|
308
310
|
|
|
311
|
+
## References
|
|
312
|
+
|
|
313
|
+
- [LORAUTER](https://arxiv.org/abs/2601.21795) — *Effective LoRA Adapter Routing using Task Representations* (Dhasade et al., EPFL, 2026). shiftgate's task-level semantic routing is inspired by this work; it is not a reimplementation of the paper's full algorithm.
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
309
317
|
## License
|
|
310
318
|
|
|
311
319
|
MIT. See [LICENSE](LICENSE).
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
> **shiftgate is an intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.**
|
|
4
4
|
|
|
5
|
+
*Inspired by [LORAUTER](https://arxiv.org/abs/2601.21795) — Effective LoRA Adapter Routing using Task Representations (EPFL, 2026).*
|
|
6
|
+
|
|
5
7
|
**Shiftgate is a routing layer. Users manage models and LoRA weights themselves.**
|
|
6
8
|
shiftgate stores only adapter *metadata* — it never downloads, caches, or manages weights.
|
|
7
9
|
Your inference backend (Ollama, vLLM) is responsible for loading the weights; shiftgate just tells it *which* adapter to use for each query.
|
|
@@ -273,6 +275,12 @@ shiftgate/
|
|
|
273
275
|
|
|
274
276
|
---
|
|
275
277
|
|
|
278
|
+
## References
|
|
279
|
+
|
|
280
|
+
- [LORAUTER](https://arxiv.org/abs/2601.21795) — *Effective LoRA Adapter Routing using Task Representations* (Dhasade et al., EPFL, 2026). shiftgate's task-level semantic routing is inspired by this work; it is not a reimplementation of the paper's full algorithm.
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
276
284
|
## License
|
|
277
285
|
|
|
278
286
|
MIT. See [LICENSE](LICENSE).
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "shiftgate"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.6"
|
|
8
8
|
description = "Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
shiftgate — Intelligent LoRA adapter routing for local LLM inference.
|
|
3
3
|
|
|
4
|
-
Automatically selects the right adapter for each task using semantic
|
|
5
|
-
similarity, inspired by the LORAUTER paper (EPFL, 2026).
|
|
4
|
+
Automatically selects the right adapter for each task using semantic similarity.
|
|
6
5
|
"""
|
|
7
6
|
|
|
8
7
|
__version__ = "0.1.0"
|
|
@@ -117,18 +117,52 @@ def _finish_adapter_add(adapter: AdapterEntry, task_reg, adapter_reg) -> None:
|
|
|
117
117
|
)
|
|
118
118
|
|
|
119
119
|
|
|
120
|
+
def _verify_runtime_adapter(adapter: AdapterEntry, adapter_reg) -> None:
|
|
121
|
+
"""Best-effort check that a runtime adapter is loaded in a live backend.
|
|
122
|
+
|
|
123
|
+
Updates ``adapter.verified`` (True / False / None) and re-saves the
|
|
124
|
+
registry. Never raises — verification is purely informational and must
|
|
125
|
+
not fail the ``adapter add`` command.
|
|
126
|
+
"""
|
|
127
|
+
from shiftgate.runtime.backend import BackendRouter
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
with console.status("[cyan]Verifying adapter against running backend…[/cyan]"):
|
|
131
|
+
router = BackendRouter()
|
|
132
|
+
is_loaded, backend_name = router.verify_adapter(adapter)
|
|
133
|
+
except Exception as exc: # pragma: no cover - defensive, should not happen
|
|
134
|
+
logger_msg = f"verification error: {exc}"
|
|
135
|
+
console.print(f" [dim]Backend: verification skipped ({logger_msg})[/dim]")
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
runtime = adapter.runtime_name or adapter.id
|
|
139
|
+
|
|
140
|
+
if backend_name is None:
|
|
141
|
+
adapter.verified = None
|
|
142
|
+
console.print(" [dim]Backend: not running (verification skipped)[/dim]")
|
|
143
|
+
elif is_loaded:
|
|
144
|
+
adapter.verified = True
|
|
145
|
+
console.print(f" [green]Backend: {backend_name} ✓ verified[/green]")
|
|
146
|
+
else:
|
|
147
|
+
adapter.verified = False
|
|
148
|
+
console.print(
|
|
149
|
+
f" [yellow]Backend: {backend_name} ⚠ runtime '{runtime}' not loaded "
|
|
150
|
+
"— did you pass --lora-modules?[/yellow]"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
adapter_reg.save()
|
|
154
|
+
|
|
155
|
+
|
|
120
156
|
# ---------------------------------------------------------------------------
|
|
121
157
|
# shiftgate init
|
|
122
158
|
# ---------------------------------------------------------------------------
|
|
123
159
|
|
|
124
160
|
@app.command()
|
|
125
161
|
def init() -> None:
|
|
126
|
-
"""Set up ~/.shiftgate
|
|
162
|
+
"""Set up ~/.shiftgate/ and compute task embeddings."""
|
|
127
163
|
from shiftgate.registry.adapter_registry import AdapterRegistry
|
|
128
164
|
from shiftgate.registry.task_registry import TaskRegistry
|
|
129
|
-
from shiftgate.utils.display import show_task_table
|
|
130
|
-
|
|
131
|
-
show_welcome_banner()
|
|
165
|
+
from shiftgate.utils.display import show_task_table
|
|
132
166
|
|
|
133
167
|
shiftgate_dir = Path.home() / ".shiftgate"
|
|
134
168
|
shiftgate_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -291,6 +325,11 @@ def adapter_add(
|
|
|
291
325
|
adapter_reg.add_adapter(adapter)
|
|
292
326
|
_finish_adapter_add(adapter, task_reg, adapter_reg)
|
|
293
327
|
|
|
328
|
+
# For runtime-registered adapters, try to confirm the backend actually
|
|
329
|
+
# has it loaded. Purely informational — never fails the add command.
|
|
330
|
+
if adapter.runtime_name:
|
|
331
|
+
_verify_runtime_adapter(adapter, adapter_reg)
|
|
332
|
+
|
|
294
333
|
|
|
295
334
|
@adapter_app.command("list")
|
|
296
335
|
def adapter_list() -> None:
|
|
@@ -593,6 +632,79 @@ def status() -> None:
|
|
|
593
632
|
)
|
|
594
633
|
|
|
595
634
|
|
|
635
|
+
# ---------------------------------------------------------------------------
|
|
636
|
+
# shiftgate doctor
|
|
637
|
+
# ---------------------------------------------------------------------------
|
|
638
|
+
|
|
639
|
+
@app.command()
|
|
640
|
+
def doctor() -> None:
|
|
641
|
+
"""Run a full health check and print a diagnostic report.
|
|
642
|
+
|
|
643
|
+
Checks the embedder, the inference backend, every registered adapter's
|
|
644
|
+
runtime availability, task-embedding readiness, and flags any task
|
|
645
|
+
clusters that have no linked adapter. Run this whenever something feels off.
|
|
646
|
+
"""
|
|
647
|
+
from shiftgate.runtime.backend import BackendRouter, effective_backend_name
|
|
648
|
+
from shiftgate.utils.display import show_doctor_report
|
|
649
|
+
|
|
650
|
+
task_reg, adapter_reg = _load_registries()
|
|
651
|
+
|
|
652
|
+
# --- 1. Embedder ---
|
|
653
|
+
embedder_ok = False
|
|
654
|
+
embedder_detail = ""
|
|
655
|
+
with console.status("[cyan]Checking embedder…[/cyan]"):
|
|
656
|
+
try:
|
|
657
|
+
vec = _get_embedder().embed("test")
|
|
658
|
+
embedder_ok = vec is not None and len(vec) > 0
|
|
659
|
+
embedder_detail = f"dim={len(vec)}" if embedder_ok else "empty embedding"
|
|
660
|
+
except Exception as exc:
|
|
661
|
+
embedder_detail = str(exc)
|
|
662
|
+
|
|
663
|
+
# --- 2. Backend ---
|
|
664
|
+
with console.status("[cyan]Probing backends…[/cyan]"):
|
|
665
|
+
router = BackendRouter()
|
|
666
|
+
backend_name = router.detect()
|
|
667
|
+
backend_url = router.active_backend_url
|
|
668
|
+
loaded_adapters: list[str] = []
|
|
669
|
+
if backend_name is not None and router._active is not None:
|
|
670
|
+
loaded_adapters = router._active.list_loaded_adapters()
|
|
671
|
+
|
|
672
|
+
# --- 3. Per-adapter runtime availability ---
|
|
673
|
+
adapter_rows = []
|
|
674
|
+
for a in adapter_reg.list_adapters():
|
|
675
|
+
effective = effective_backend_name(a)
|
|
676
|
+
if backend_name is None:
|
|
677
|
+
state = "unknown" # no backend to check against
|
|
678
|
+
elif effective in loaded_adapters:
|
|
679
|
+
state = "loaded"
|
|
680
|
+
else:
|
|
681
|
+
state = "missing"
|
|
682
|
+
adapter_rows.append(
|
|
683
|
+
{"id": a.id, "runtime": effective, "status": a.status, "state": state}
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
# --- 4 & 5. Task embedding readiness + unlinked clusters ---
|
|
687
|
+
all_tasks = task_reg.get_all_tasks()
|
|
688
|
+
n_with_embeddings = sum(1 for t in all_tasks if t.embedding_centroid is not None)
|
|
689
|
+
registered_ids = {a.id for a in adapter_reg.list_adapters()}
|
|
690
|
+
unlinked_tasks = [
|
|
691
|
+
t.id
|
|
692
|
+
for t in all_tasks
|
|
693
|
+
if not (set(t.preferred_adapters) | set(t.fallback_adapters)) & registered_ids
|
|
694
|
+
]
|
|
695
|
+
|
|
696
|
+
show_doctor_report(
|
|
697
|
+
embedder_ok=embedder_ok,
|
|
698
|
+
embedder_detail=embedder_detail,
|
|
699
|
+
backend_name=backend_name,
|
|
700
|
+
backend_url=backend_url,
|
|
701
|
+
adapter_rows=adapter_rows,
|
|
702
|
+
n_tasks=len(all_tasks),
|
|
703
|
+
n_with_embeddings=n_with_embeddings,
|
|
704
|
+
unlinked_tasks=unlinked_tasks,
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
|
|
596
708
|
# ---------------------------------------------------------------------------
|
|
597
709
|
# shiftgate demo
|
|
598
710
|
# ---------------------------------------------------------------------------
|
|
@@ -93,6 +93,16 @@ class AdapterEntry(BaseModel):
|
|
|
93
93
|
),
|
|
94
94
|
)
|
|
95
95
|
|
|
96
|
+
# --- backend verification result ---
|
|
97
|
+
verified: bool | None = Field(
|
|
98
|
+
default=None,
|
|
99
|
+
description=(
|
|
100
|
+
"Result of the last backend verification: True = the adapter's "
|
|
101
|
+
"runtime name was found loaded in a running backend, False = it was "
|
|
102
|
+
"not found, None = no backend was reachable when last checked."
|
|
103
|
+
),
|
|
104
|
+
)
|
|
105
|
+
|
|
96
106
|
@model_validator(mode="after")
|
|
97
107
|
def _at_least_one_source(self) -> "AdapterEntry":
|
|
98
108
|
"""Warn (not error) when no source field is set.
|
|
@@ -38,6 +38,21 @@ _CONNECT_TIMEOUT = 3.0
|
|
|
38
38
|
_READ_TIMEOUT = 120.0
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
def effective_backend_name(adapter: AdapterEntry) -> str:
|
|
42
|
+
"""Return the name the inference backend knows this adapter by.
|
|
43
|
+
|
|
44
|
+
When the adapter was registered with ``--runtime <name>`` the user has
|
|
45
|
+
explicitly told us the backend loaded it under that name (e.g. a vLLM
|
|
46
|
+
``--lora-modules`` key or an Ollama Modelfile model name). In that case we
|
|
47
|
+
must send ``runtime_name`` — sending ``adapter.id`` would address a model
|
|
48
|
+
the backend has never heard of.
|
|
49
|
+
|
|
50
|
+
Priority: ``runtime_name`` (if set and non-empty) > ``id``.
|
|
51
|
+
"""
|
|
52
|
+
runtime = (adapter.runtime_name or "").strip()
|
|
53
|
+
return runtime if runtime else adapter.id
|
|
54
|
+
|
|
55
|
+
|
|
41
56
|
class BaseBackend(ABC):
|
|
42
57
|
"""Abstract base for inference backends."""
|
|
43
58
|
|
|
@@ -49,6 +64,14 @@ class BaseBackend(ABC):
|
|
|
49
64
|
def generate(self, prompt: str, adapter: AdapterEntry) -> str:
|
|
50
65
|
"""Send ``prompt`` to the backend and return the generated text."""
|
|
51
66
|
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def list_loaded_adapters(self) -> list[str]:
|
|
69
|
+
"""Return the model/adapter names currently loaded in the backend.
|
|
70
|
+
|
|
71
|
+
Must use a short timeout and silently return ``[]`` if the backend is
|
|
72
|
+
unreachable — this method is only used for informational verification.
|
|
73
|
+
"""
|
|
74
|
+
|
|
52
75
|
|
|
53
76
|
# ---------------------------------------------------------------------------
|
|
54
77
|
# Ollama
|
|
@@ -109,7 +132,9 @@ class OllamaBackend(BaseBackend):
|
|
|
109
132
|
If True, Ollama streams response tokens. This client reads the
|
|
110
133
|
full stream and returns the concatenated text.
|
|
111
134
|
"""
|
|
112
|
-
|
|
135
|
+
# Explicit override wins; otherwise use the backend-effective name
|
|
136
|
+
# (runtime_name when set, else adapter.id).
|
|
137
|
+
model = model_name or effective_backend_name(adapter)
|
|
113
138
|
payload = {"model": model, "prompt": prompt, "stream": stream}
|
|
114
139
|
|
|
115
140
|
logger.debug("Ollama generate: model=%s", model)
|
|
@@ -126,6 +151,20 @@ class OllamaBackend(BaseBackend):
|
|
|
126
151
|
data = r.json()
|
|
127
152
|
return data.get("response", "")
|
|
128
153
|
|
|
154
|
+
def list_loaded_adapters(self) -> list[str]:
|
|
155
|
+
"""Return the names of all models loaded in Ollama (``GET /api/tags``).
|
|
156
|
+
|
|
157
|
+
Silently returns ``[]`` if Ollama is unreachable.
|
|
158
|
+
"""
|
|
159
|
+
try:
|
|
160
|
+
r = httpx.get(f"{self.base_url}/api/tags", timeout=_CONNECT_TIMEOUT)
|
|
161
|
+
r.raise_for_status()
|
|
162
|
+
models = r.json().get("models", [])
|
|
163
|
+
return [m["name"] for m in models if "name" in m]
|
|
164
|
+
except Exception as exc:
|
|
165
|
+
logger.debug("Ollama list_loaded_adapters failed: %s", exc)
|
|
166
|
+
return []
|
|
167
|
+
|
|
129
168
|
|
|
130
169
|
# ---------------------------------------------------------------------------
|
|
131
170
|
# vLLM
|
|
@@ -183,7 +222,9 @@ class VLLMBackend(BaseBackend):
|
|
|
183
222
|
system_prompt:
|
|
184
223
|
System message prepended before the user message.
|
|
185
224
|
"""
|
|
186
|
-
|
|
225
|
+
# Explicit override wins; otherwise use the backend-effective name
|
|
226
|
+
# (runtime_name when set, else adapter.id).
|
|
227
|
+
model = lora_name or effective_backend_name(adapter)
|
|
187
228
|
payload = {
|
|
188
229
|
"model": model,
|
|
189
230
|
"messages": [
|
|
@@ -209,6 +250,21 @@ class VLLMBackend(BaseBackend):
|
|
|
209
250
|
except (KeyError, IndexError) as exc:
|
|
210
251
|
raise BackendError(f"Unexpected vLLM response format: {data}") from exc
|
|
211
252
|
|
|
253
|
+
def list_loaded_adapters(self) -> list[str]:
|
|
254
|
+
"""Return all model/LoRA ids served by vLLM (``GET /v1/models``).
|
|
255
|
+
|
|
256
|
+
The ``data`` array lists the base model plus every ``--lora-modules``
|
|
257
|
+
key. Silently returns ``[]`` if vLLM is unreachable.
|
|
258
|
+
"""
|
|
259
|
+
try:
|
|
260
|
+
r = httpx.get(f"{self.base_url}/v1/models", timeout=_CONNECT_TIMEOUT)
|
|
261
|
+
r.raise_for_status()
|
|
262
|
+
data = r.json().get("data", [])
|
|
263
|
+
return [m["id"] for m in data if "id" in m]
|
|
264
|
+
except Exception as exc:
|
|
265
|
+
logger.debug("vLLM list_loaded_adapters failed: %s", exc)
|
|
266
|
+
return []
|
|
267
|
+
|
|
212
268
|
|
|
213
269
|
# ---------------------------------------------------------------------------
|
|
214
270
|
# BackendRouter — auto-detects which backend is live
|
|
@@ -276,6 +332,37 @@ class BackendRouter:
|
|
|
276
332
|
return "vllm"
|
|
277
333
|
return None
|
|
278
334
|
|
|
335
|
+
@property
|
|
336
|
+
def active_backend_url(self) -> str | None:
|
|
337
|
+
"""Return the base URL of the active backend, or None."""
|
|
338
|
+
if self._active is not None:
|
|
339
|
+
return self._active.base_url
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
def verify_adapter(self, adapter: AdapterEntry) -> tuple[bool, str | None]:
|
|
343
|
+
"""Check whether an adapter is actually loaded in the active backend.
|
|
344
|
+
|
|
345
|
+
Auto-detects a backend if one hasn't been probed yet.
|
|
346
|
+
|
|
347
|
+
Returns
|
|
348
|
+
-------
|
|
349
|
+
``(is_loaded, backend_name)``
|
|
350
|
+
- ``(True, "<name>")`` — backend running and the adapter's
|
|
351
|
+
effective name is present in its loaded model list.
|
|
352
|
+
- ``(False, "<name>")`` — backend running but the name is absent.
|
|
353
|
+
- ``(False, None)`` — no backend reachable (verification skipped).
|
|
354
|
+
|
|
355
|
+
Never raises: HTTP failures degrade to ``(False, None)``.
|
|
356
|
+
"""
|
|
357
|
+
if self._active is None:
|
|
358
|
+
self.detect()
|
|
359
|
+
if self._active is None:
|
|
360
|
+
return (False, None)
|
|
361
|
+
|
|
362
|
+
target = effective_backend_name(adapter)
|
|
363
|
+
loaded = self._active.list_loaded_adapters()
|
|
364
|
+
return (target in loaded, self.active_backend_name)
|
|
365
|
+
|
|
279
366
|
|
|
280
367
|
# ---------------------------------------------------------------------------
|
|
281
368
|
# Exceptions
|
|
@@ -333,19 +333,8 @@ def animate_swap(from_adapter: str, to_adapter: str, duration: float = 1.5) -> N
|
|
|
333
333
|
# ---------------------------------------------------------------------------
|
|
334
334
|
|
|
335
335
|
def show_welcome_banner() -> None:
|
|
336
|
-
"""Print
|
|
337
|
-
|
|
338
|
-
banner.append("\n ⚡ shiftgate ", style="bold cyan")
|
|
339
|
-
banner.append("v0.1\n", style="dim")
|
|
340
|
-
banner.append(" Intelligent LoRA routing for local LLM inference\n", style="italic white")
|
|
341
|
-
banner.append(" Inspired by LORAUTER · EPFL 2026\n\n", style="dim")
|
|
342
|
-
|
|
343
|
-
panel = Panel(
|
|
344
|
-
Align.center(banner),
|
|
345
|
-
border_style="cyan",
|
|
346
|
-
expand=False,
|
|
347
|
-
)
|
|
348
|
-
console.print(Align.center(panel))
|
|
336
|
+
"""Print a one-line banner (``shiftgate demo`` only)."""
|
|
337
|
+
console.print("\n[bold cyan]⚡ shiftgate[/bold cyan]\n")
|
|
349
338
|
|
|
350
339
|
|
|
351
340
|
# ---------------------------------------------------------------------------
|
|
@@ -411,3 +400,148 @@ def show_status(
|
|
|
411
400
|
grid.add_row("Embeddings", Text(emb_label, style=emb_style))
|
|
412
401
|
|
|
413
402
|
console.print(Panel(grid, title="shiftgate status", border_style="cyan", expand=False))
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
# ---------------------------------------------------------------------------
|
|
406
|
+
# Doctor report
|
|
407
|
+
# ---------------------------------------------------------------------------
|
|
408
|
+
|
|
409
|
+
def show_doctor_report(
|
|
410
|
+
*,
|
|
411
|
+
embedder_ok: bool,
|
|
412
|
+
embedder_detail: str,
|
|
413
|
+
backend_name: str | None,
|
|
414
|
+
backend_url: str | None,
|
|
415
|
+
adapter_rows: list[dict],
|
|
416
|
+
n_tasks: int,
|
|
417
|
+
n_with_embeddings: int,
|
|
418
|
+
unlinked_tasks: list[str],
|
|
419
|
+
) -> None:
|
|
420
|
+
"""Render the full ``shiftgate doctor`` health report.
|
|
421
|
+
|
|
422
|
+
Parameters mirror the checks performed in ``cli.doctor``. Each section is
|
|
423
|
+
a Rich panel/table; a final summary line tallies pass / warn / fail.
|
|
424
|
+
"""
|
|
425
|
+
ok_mark = "[green]✓[/green]"
|
|
426
|
+
warn_mark = "[yellow]⚠[/yellow]"
|
|
427
|
+
fail_mark = "[red]✗[/red]"
|
|
428
|
+
|
|
429
|
+
warnings = 0
|
|
430
|
+
failures = 0
|
|
431
|
+
|
|
432
|
+
console.print()
|
|
433
|
+
console.rule("[bold cyan]shiftgate doctor[/bold cyan]")
|
|
434
|
+
console.print()
|
|
435
|
+
|
|
436
|
+
# --- Core checks grid ---
|
|
437
|
+
core = Table.grid(padding=(0, 2))
|
|
438
|
+
core.add_column(width=3)
|
|
439
|
+
core.add_column(style="bold", min_width=18)
|
|
440
|
+
core.add_column()
|
|
441
|
+
|
|
442
|
+
# Embedder
|
|
443
|
+
if embedder_ok:
|
|
444
|
+
core.add_row(ok_mark, "Embedder", Text(f"loaded ({embedder_detail})", style="green"))
|
|
445
|
+
else:
|
|
446
|
+
failures += 1
|
|
447
|
+
core.add_row(fail_mark, "Embedder", Text(f"failed: {embedder_detail}", style="red"))
|
|
448
|
+
|
|
449
|
+
# Backend
|
|
450
|
+
if backend_name:
|
|
451
|
+
core.add_row(
|
|
452
|
+
ok_mark,
|
|
453
|
+
"Backend",
|
|
454
|
+
Text(f"{backend_name} ({backend_url})", style="green"),
|
|
455
|
+
)
|
|
456
|
+
else:
|
|
457
|
+
warnings += 1
|
|
458
|
+
core.add_row(
|
|
459
|
+
warn_mark,
|
|
460
|
+
"Backend",
|
|
461
|
+
Text("none detected — start ollama serve or vLLM", style="yellow"),
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
# Task embeddings
|
|
465
|
+
if n_tasks > 0 and n_with_embeddings == n_tasks:
|
|
466
|
+
core.add_row(
|
|
467
|
+
ok_mark,
|
|
468
|
+
"Task embeddings",
|
|
469
|
+
Text(f"{n_with_embeddings}/{n_tasks} clusters ready", style="green"),
|
|
470
|
+
)
|
|
471
|
+
else:
|
|
472
|
+
warnings += 1
|
|
473
|
+
core.add_row(
|
|
474
|
+
warn_mark,
|
|
475
|
+
"Task embeddings",
|
|
476
|
+
Text(
|
|
477
|
+
f"{n_with_embeddings}/{n_tasks} computed — run `shiftgate init`",
|
|
478
|
+
style="yellow",
|
|
479
|
+
),
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
console.print(Panel(core, title="Core", border_style="cyan", expand=False))
|
|
483
|
+
console.print()
|
|
484
|
+
|
|
485
|
+
# --- Adapter availability table ---
|
|
486
|
+
if adapter_rows:
|
|
487
|
+
table = Table(
|
|
488
|
+
title="Adapter runtime availability",
|
|
489
|
+
box=box.ROUNDED,
|
|
490
|
+
header_style="bold cyan",
|
|
491
|
+
border_style="cyan",
|
|
492
|
+
)
|
|
493
|
+
table.add_column("Adapter ID", style="bold magenta")
|
|
494
|
+
table.add_column("Backend name")
|
|
495
|
+
table.add_column("Linked", justify="center")
|
|
496
|
+
table.add_column("Loaded", justify="center")
|
|
497
|
+
|
|
498
|
+
for row in adapter_rows:
|
|
499
|
+
linked = (
|
|
500
|
+
"[green]linked[/green]" if row["status"] == "linked"
|
|
501
|
+
else "[yellow]unassigned[/yellow]"
|
|
502
|
+
)
|
|
503
|
+
state = row["state"]
|
|
504
|
+
if state == "loaded":
|
|
505
|
+
loaded = f"{ok_mark} loaded"
|
|
506
|
+
elif state == "missing":
|
|
507
|
+
warnings += 1
|
|
508
|
+
loaded = f"{warn_mark} not loaded"
|
|
509
|
+
else: # unknown — no backend
|
|
510
|
+
loaded = "[dim]— (no backend)[/dim]"
|
|
511
|
+
table.add_row(row["id"], row["runtime"], linked, loaded)
|
|
512
|
+
|
|
513
|
+
console.print(table)
|
|
514
|
+
else:
|
|
515
|
+
console.print("[dim]No adapters registered. Add one with `shiftgate adapter add`.[/dim]")
|
|
516
|
+
console.print()
|
|
517
|
+
|
|
518
|
+
# --- Unlinked task clusters warning ---
|
|
519
|
+
if unlinked_tasks:
|
|
520
|
+
warnings += 1
|
|
521
|
+
console.print(
|
|
522
|
+
Panel(
|
|
523
|
+
Text(
|
|
524
|
+
"These task clusters have no linked adapter and will return "
|
|
525
|
+
"'No adapter available' if matched:\n "
|
|
526
|
+
+ ", ".join(unlinked_tasks),
|
|
527
|
+
style="yellow",
|
|
528
|
+
),
|
|
529
|
+
title=f"{warn_mark} Unlinked task clusters ({len(unlinked_tasks)})",
|
|
530
|
+
border_style="yellow",
|
|
531
|
+
expand=False,
|
|
532
|
+
)
|
|
533
|
+
)
|
|
534
|
+
console.print()
|
|
535
|
+
|
|
536
|
+
# --- Summary line ---
|
|
537
|
+
if failures:
|
|
538
|
+
summary = f"[bold red]{failures} failed[/bold red]"
|
|
539
|
+
if warnings:
|
|
540
|
+
summary += f", [yellow]{warnings} warning(s)[/yellow]"
|
|
541
|
+
elif warnings:
|
|
542
|
+
summary = f"[bold yellow]{warnings} warning(s)[/bold yellow] — shiftgate is usable but check above"
|
|
543
|
+
else:
|
|
544
|
+
summary = "[bold green]All checks passed — shiftgate is healthy.[/bold green]"
|
|
545
|
+
|
|
546
|
+
console.print(f" {summary}")
|
|
547
|
+
console.print()
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for the inference backend layer.
|
|
3
|
+
|
|
4
|
+
Focus areas:
|
|
5
|
+
- effective_backend_name() name resolution (runtime_name vs id)
|
|
6
|
+
- BackendRouter.verify_adapter() against a stubbed loaded-adapter list
|
|
7
|
+
No real HTTP calls are made.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
from shiftgate.registry.schemas import AdapterEntry
|
|
15
|
+
from shiftgate.runtime.backend import (
|
|
16
|
+
BackendRouter,
|
|
17
|
+
VLLMBackend,
|
|
18
|
+
effective_backend_name,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _adapter(**kwargs) -> AdapterEntry:
|
|
23
|
+
base = dict(id="sql-lora", name="SQL", base_model="llama3")
|
|
24
|
+
base.update(kwargs)
|
|
25
|
+
return AdapterEntry(**base)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# effective_backend_name()
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
class TestEffectiveBackendName:
|
|
33
|
+
def test_uses_runtime_name_when_set(self):
|
|
34
|
+
adapter = _adapter(runtime_name="sql-lora-vllm")
|
|
35
|
+
assert effective_backend_name(adapter) == "sql-lora-vllm"
|
|
36
|
+
|
|
37
|
+
def test_falls_back_to_id_when_runtime_name_none(self):
|
|
38
|
+
adapter = _adapter(runtime_name=None, hf_repo="org/sql-lora")
|
|
39
|
+
assert effective_backend_name(adapter) == "sql-lora"
|
|
40
|
+
|
|
41
|
+
def test_falls_back_to_id_when_runtime_name_blank(self):
|
|
42
|
+
# An empty / whitespace runtime_name must not be used.
|
|
43
|
+
adapter = _adapter(runtime_name=" ", hf_repo="org/sql-lora")
|
|
44
|
+
assert effective_backend_name(adapter) == "sql-lora"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# generate() uses the effective name
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
class TestGenerateUsesEffectiveName:
|
|
52
|
+
def test_vllm_generate_sends_runtime_name(self, monkeypatch):
|
|
53
|
+
captured = {}
|
|
54
|
+
|
|
55
|
+
class _Resp:
|
|
56
|
+
def raise_for_status(self):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def json(self):
|
|
60
|
+
return {"choices": [{"message": {"content": "ok"}}]}
|
|
61
|
+
|
|
62
|
+
def fake_post(url, json, timeout):
|
|
63
|
+
captured["model"] = json["model"]
|
|
64
|
+
return _Resp()
|
|
65
|
+
|
|
66
|
+
monkeypatch.setattr("shiftgate.runtime.backend.httpx.post", fake_post)
|
|
67
|
+
|
|
68
|
+
backend = VLLMBackend()
|
|
69
|
+
adapter = _adapter(runtime_name="sql-lora-vllm")
|
|
70
|
+
backend.generate("hello", adapter)
|
|
71
|
+
assert captured["model"] == "sql-lora-vllm"
|
|
72
|
+
|
|
73
|
+
def test_vllm_generate_explicit_override_wins(self, monkeypatch):
|
|
74
|
+
captured = {}
|
|
75
|
+
|
|
76
|
+
class _Resp:
|
|
77
|
+
def raise_for_status(self):
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
def json(self):
|
|
81
|
+
return {"choices": [{"message": {"content": "ok"}}]}
|
|
82
|
+
|
|
83
|
+
def fake_post(url, json, timeout):
|
|
84
|
+
captured["model"] = json["model"]
|
|
85
|
+
return _Resp()
|
|
86
|
+
|
|
87
|
+
monkeypatch.setattr("shiftgate.runtime.backend.httpx.post", fake_post)
|
|
88
|
+
|
|
89
|
+
backend = VLLMBackend()
|
|
90
|
+
adapter = _adapter(runtime_name="sql-lora-vllm")
|
|
91
|
+
# Explicit lora_name must win over runtime_name.
|
|
92
|
+
backend.generate("hello", adapter, lora_name="override-name")
|
|
93
|
+
assert captured["model"] == "override-name"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# BackendRouter.verify_adapter()
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
class TestVerifyAdapter:
|
|
101
|
+
def test_no_backend_returns_false_none(self, monkeypatch):
|
|
102
|
+
router = BackendRouter()
|
|
103
|
+
monkeypatch.setattr(router, "detect", lambda: None)
|
|
104
|
+
# Active stays None → verification skipped.
|
|
105
|
+
is_loaded, backend_name = router.verify_adapter(_adapter(runtime_name="x"))
|
|
106
|
+
assert is_loaded is False
|
|
107
|
+
assert backend_name is None
|
|
108
|
+
|
|
109
|
+
def test_loaded_returns_true(self, monkeypatch):
|
|
110
|
+
router = BackendRouter()
|
|
111
|
+
# Force vLLM active and stub its loaded list.
|
|
112
|
+
router._active = router._vllm
|
|
113
|
+
monkeypatch.setattr(router._vllm, "list_loaded_adapters", lambda: ["base", "sql-lora-vllm"])
|
|
114
|
+
is_loaded, backend_name = router.verify_adapter(_adapter(runtime_name="sql-lora-vllm"))
|
|
115
|
+
assert is_loaded is True
|
|
116
|
+
assert backend_name == "vllm"
|
|
117
|
+
|
|
118
|
+
def test_not_loaded_returns_false_with_backend(self, monkeypatch):
|
|
119
|
+
router = BackendRouter()
|
|
120
|
+
router._active = router._vllm
|
|
121
|
+
monkeypatch.setattr(router._vllm, "list_loaded_adapters", lambda: ["base"])
|
|
122
|
+
is_loaded, backend_name = router.verify_adapter(_adapter(runtime_name="sql-lora-vllm"))
|
|
123
|
+
assert is_loaded is False
|
|
124
|
+
assert backend_name == "vllm"
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
# list_loaded_adapters() degrades gracefully when offline
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
class TestListLoadedAdaptersOffline:
|
|
132
|
+
def test_vllm_returns_empty_on_connection_error(self, monkeypatch):
|
|
133
|
+
def boom(*a, **k):
|
|
134
|
+
raise OSError("connection refused")
|
|
135
|
+
|
|
136
|
+
monkeypatch.setattr("shiftgate.runtime.backend.httpx.get", boom)
|
|
137
|
+
assert VLLMBackend().list_loaded_adapters() == []
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|