PyPI - shiftgate - Versions diffs - 0.1.9__tar.gz → 0.2.0__tar.gz - Mend

shiftgate 0.1.9tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{shiftgate-0.1.9 → shiftgate-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: shiftgate
-Version: 0.1.9
+Version: 0.2.0
 Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
 Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
 Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
@@ -276,6 +276,10 @@ User query
               └────────────────────────────────┘
 ```
+### How routing works
+When a backend is active, shiftgate filters candidate adapters to only those actually loaded on that backend. Switch from vLLM to Cerebras and shiftgate automatically picks Cerebras-compatible adapters — no re-registration needed. (When you run `shiftgate route` with no backend running, no filtering is applied, so you still see the full routing preview.)
 ---
 ## Bring Your Own Models

{shiftgate-0.1.9 → shiftgate-0.2.0}/README.md RENAMED Viewed

@@ -240,6 +240,10 @@ User query
               └────────────────────────────────┘
 ```
+### How routing works
+When a backend is active, shiftgate filters candidate adapters to only those actually loaded on that backend. Switch from vLLM to Cerebras and shiftgate automatically picks Cerebras-compatible adapters — no re-registration needed. (When you run `shiftgate route` with no backend running, no filtering is applied, so you still see the full routing preview.)
 ---
 ## Bring Your Own Models

{shiftgate-0.1.9 → shiftgate-0.2.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "shiftgate"
-version = "0.1.9"
+version = "0.2.0"
 description = "Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop."
 readme = "README.md"
 requires-python = ">=3.10"

{shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/cli.py RENAMED Viewed

@@ -83,6 +83,19 @@ def _get_embedder():
     return Embedder()
+def _active_runtimes(backend_router) -> set[str] | None:
+    """Return the set of runtime names loaded on the active backend, or None.
+    ``None`` means no backend is active → the router should not filter
+    (preview behaviour).  An empty set means a backend is active but reports no
+    loaded models.
+    """
+    active = backend_router.active_backend
+    if active is None:
+        return None
+    return set(active.list_loaded_adapters())
 def _auto_link_adapter(adapter: AdapterEntry, task_reg) -> list[str]:
     """Add ``adapter.id`` to the ``preferred_adapters`` of matching task clusters.
@@ -464,6 +477,7 @@ def route(
     """
     from shiftgate.feedback import loop as feedback_loop
     from shiftgate.router import router as routing
+    from shiftgate.runtime.backend import BackendRouter
     from shiftgate.utils.display import show_explain_decision, show_routing_decision
     task_reg, adapter_reg = _load_registries()
@@ -474,8 +488,15 @@ def route(
     embedder = _get_embedder()
+    backend_router = BackendRouter()
+    backend_name = backend_router.detect()
+    available_runtimes = _active_runtimes(backend_router)
     try:
-        trace, match_result = routing.route(query, task_reg, adapter_reg, embedder, top_k=top_k)
+        trace, match_result = routing.route(
+            query, task_reg, adapter_reg, embedder,
+            top_k=top_k, available_runtimes=available_runtimes,
+        )
     except Exception as exc:
         console.print(f"[red]Routing error:[/red] {exc}")
         raise typer.Exit(1)
@@ -487,7 +508,9 @@ def route(
         trace,
         adapter=adapter,
         task_name=task.name if task else None,
-        backend_name=None,
+        backend_name=backend_name,
+        loaded_runtimes=available_runtimes,
+        selection_method=match_result.selection_method,
     )
     if explain:
@@ -524,22 +547,29 @@ def run(
     embedder = _get_embedder()
+    backend_router = BackendRouter()
+    backend_name = backend_router.detect()
+    available_runtimes = _active_runtimes(backend_router)
     try:
-        trace, match_result = routing.route(query, task_reg, adapter_reg, embedder, top_k=top_k)
+        trace, match_result = routing.route(
+            query, task_reg, adapter_reg, embedder,
+            top_k=top_k, available_runtimes=available_runtimes,
+        )
     except Exception as exc:
         console.print(f"[red]Routing error:[/red] {exc}")
         raise typer.Exit(1)
     adapter = adapter_reg.get_adapter(trace.selected_adapter_id)
     task = task_reg.get_task(trace.matched_task_id)
-    backend_router = BackendRouter()
-    backend_name = backend_router.detect()
     show_routing_decision(
         trace,
         adapter=adapter,
         task_name=task.name if task else None,
         backend_name=backend_name,
+        loaded_runtimes=available_runtimes,
+        selection_method=match_result.selection_method,
     )
     if adapter is None:

{shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/matcher.py RENAMED Viewed

@@ -117,6 +117,7 @@ def top_k_tasks(
 def select_adapter(
     top_tasks: list[TaskMatch],
     adapter_registry,  # AdapterRegistry — avoid circular import with string hint
+    available_runtimes: set[str] | None = None,
 ) -> MatchResult:
     """Select the adapter linked to the best-matching task.
@@ -125,15 +126,23 @@ def select_adapter(
     For each top task (highest score first), walk ``preferred_adapters`` then
     ``fallback_adapters`` and collect the adapters that exist in the registry
     (populating ``TaskMatch.candidate_adapters`` for the ``--explain`` view).
-    The first such adapter found, on the highest-scoring task, is selected.
+    The first viable adapter found, on the highest-scoring task, is selected.
+    Backend-aware filtering
+    -----------------------
+    When ``available_runtimes`` is provided (the set of model/adapter names
+    actually loaded on the active backend), only adapters whose
+    ``effective_backend_name()`` is in that set are considered viable.  If a
+    task's entire candidate list is filtered out, selection falls through to
+    the next-best task.  When ``available_runtimes`` is ``None`` no filtering
+    happens (the preview behaviour used by ``shiftgate route``).
     No silent fallback
     ------------------
     If the matched (top) task has **no** linked adapter in the registry, the
     router must NOT substitute an arbitrary adapter — doing so silently routes,
     e.g., a music query to a SQL adapter and destroys trust.  Instead this
-    returns a ``MatchResult`` with ``selected_adapter=None`` and
-    ``selection_method="no_adapter_for_task"``.
+    returns a ``MatchResult`` with ``selected_adapter=None``.
     Parameters
     ----------
@@ -141,39 +150,56 @@ def select_adapter(
         Output of ``top_k_tasks`` (sorted by score descending).
     adapter_registry:
         ``AdapterRegistry`` instance to look up adapter IDs.
+    available_runtimes:
+        Optional set of runtime names loaded on the active backend.  When set,
+        adapters not in the set are skipped during selection.
     Returns
     -------
-    ``MatchResult``.  ``selected_adapter`` is ``None`` when no adapter is
-    linked to any of the ranked tasks.  The ``matched_task`` is always the
-    top-scoring task so callers can still report what was matched.
+    ``MatchResult``.  ``selected_adapter`` is ``None`` when no viable adapter is
+    found.  ``selection_method`` is ``"no_adapter_on_active_backend"`` when
+    linked adapters exist but none are loaded on the active backend, otherwise
+    ``"no_adapter_for_task"``.  The ``matched_task`` is always the top-scoring
+    task so callers can still report what was matched.
     """
-    # Populate candidate lists for every task (for the --explain view) and
-    # find the first explicit match in score order.
+    def _is_viable(adapter) -> bool:
+        if available_runtimes is None:
+            return True
+        return adapter.effective_backend_name() in available_runtimes
     explicit_result: MatchResult | None = None
+    any_linked_adapter = False  # any task had at least one registered adapter
     for tm in top_tasks:
         preferred_ids = list(tm.task.preferred_adapters)
         fallback_ids = list(tm.task.fallback_adapters)
+        # Populate candidate_adapters with every registered adapter (for the
+        # --explain view, showing all candidates regardless of runtime).
         for adapter_id in preferred_ids + fallback_ids:
             adapter = adapter_registry.get_adapter(adapter_id)
             if adapter is not None and adapter not in tm.candidate_adapters:
                 tm.candidate_adapters.append(adapter)
-        if explicit_result is None and tm.candidate_adapters:
-            method = (
-                "preferred"
-                if tm.candidate_adapters[0].id in tm.task.preferred_adapters
-                else "fallback"
-            )
-            explicit_result = MatchResult(
-                selected_adapter=tm.candidate_adapters[0],
-                matched_task=tm.task,
-                similarity_score=tm.score,
-                all_task_matches=top_tasks,
-                selection_method=method,
-            )
+        if tm.candidate_adapters:
+            any_linked_adapter = True
+        if explicit_result is None:
+            viable = [a for a in tm.candidate_adapters if _is_viable(a)]
+            if viable:
+                chosen = viable[0]
+                method = (
+                    "preferred"
+                    if chosen.id in tm.task.preferred_adapters
+                    else "fallback"
+                )
+                explicit_result = MatchResult(
+                    selected_adapter=chosen,
+                    matched_task=tm.task,
+                    similarity_score=tm.score,
+                    all_task_matches=top_tasks,
+                    selection_method=method,
+                )
     if explicit_result is not None:
         logger.debug(
@@ -185,19 +211,29 @@ def select_adapter(
         )
         return explicit_result
-    # No adapter linked to any ranked task — do NOT guess. Report the matched
-    # task with no adapter so the caller can prompt the user to add one.
+    # No viable adapter across any ranked task. Distinguish "nothing linked at
+    # all" from "linked but not loaded on the active backend".
     top_task = top_tasks[0]
-    logger.info(
-        "No linked adapter for matched task '%s' — refusing to guess.",
-        top_task.task.id,
-    )
+    if available_runtimes is not None and any_linked_adapter:
+        method = "no_adapter_on_active_backend"
+        logger.info(
+            "Linked adapter(s) for task '%s' exist but none are loaded on the "
+            "active backend — refusing to guess.",
+            top_task.task.id,
+        )
+    else:
+        method = "no_adapter_for_task"
+        logger.info(
+            "No linked adapter for matched task '%s' — refusing to guess.",
+            top_task.task.id,
+        )
     return MatchResult(
         selected_adapter=None,
         matched_task=top_task.task,
         similarity_score=top_task.score,
         all_task_matches=top_tasks,
-        selection_method="no_adapter_for_task",
+        selection_method=method,
     )

{shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/router/router.py RENAMED Viewed

@@ -26,6 +26,7 @@ def route(
     adapter_registry: AdapterRegistry,
     embedder: Embedder,
     top_k: int = 3,
+    available_runtimes: set[str] | None = None,
 ) -> tuple[RoutingTrace, MatchResult]:
     """Route a query string to the best matching adapter.
@@ -48,6 +49,12 @@ def route(
         ``Embedder`` instance (wraps fastembed singleton).
     top_k:
         Number of top task candidates to consider.  Defaults to 3.
+    available_runtimes:
+        Optional set of runtime names loaded on the active backend.  When set,
+        adapters whose ``effective_backend_name()`` is not in the set are
+        skipped, falling through to the next-best task.  If no viable adapter
+        is found across all top-K tasks, the trace's ``selected_adapter_id`` is
+        ``None`` and ``selection_method`` is ``"no_adapter_on_active_backend"``.
     Returns
     -------
@@ -73,7 +80,7 @@ def route(
     query_embedding = embedder.embed(query)
     all_tasks = task_registry.get_all_tasks()
     ranked = top_k_tasks(query_embedding, all_tasks, k=top_k)
-    result = select_adapter(ranked, adapter_registry)
+    result = select_adapter(ranked, adapter_registry, available_runtimes=available_runtimes)
     selected_id = result.selected_adapter.id if result.selected_adapter else None

{shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/serve/app.py RENAMED Viewed

@@ -22,6 +22,7 @@ The actual upstream HTTP is delegated to a *forwarder* object stored on
 from __future__ import annotations
 import logging
+import time
 from typing import Any, AsyncIterator, Optional
 import httpx
@@ -39,6 +40,7 @@ logger = logging.getLogger(__name__)
 _ROUTE_HEADER = "X-Shiftgate-Route"
 _READ_TIMEOUT = 120.0
+_RUNTIMES_TTL = 60.0  # seconds to cache the active backend's loaded-runtime list
 # ---------------------------------------------------------------------------
@@ -127,6 +129,8 @@ def create_app(
     app.state.backend_router = backend_router
     app.state.backend_choice = backend
     app.state.forwarder = forwarder or HttpxForwarder()
+    # (timestamp, runtimes set) cache so we don't ping the backend on every request.
+    app.state.runtimes_cache = None
     def _embedder():
         if app.state.embedder is None:
@@ -142,6 +146,22 @@ def create_app(
             router.select(app.state.backend_choice)
         return router.active_backend
+    def _available_runtimes() -> set[str] | None:
+        """Loaded runtimes on the active backend, cached with a 60s TTL.
+        Returns ``None`` when no backend is active (no filtering).
+        """
+        active = _active_backend()
+        if active is None:
+            return None
+        cache = app.state.runtimes_cache
+        now = time.monotonic()
+        if cache is not None and (now - cache[0]) < _RUNTIMES_TTL:
+            return cache[1]
+        runtimes = set(active.list_loaded_adapters())
+        app.state.runtimes_cache = (now, runtimes)
+        return runtimes
     # -- health -------------------------------------------------------------
     @app.get("/health")
     def health() -> dict[str, Any]:
@@ -198,6 +218,7 @@ def create_app(
                     app.state.task_reg,
                     app.state.adapter_reg,
                     _embedder(),
+                    available_runtimes=_available_runtimes(),
                 )
             except ValueError as exc:
                 # Embeddings not initialised, etc.

{shiftgate-0.1.9 → shiftgate-0.2.0}/shiftgate/utils/display.py RENAMED Viewed

@@ -69,6 +69,8 @@ def show_routing_decision(
     adapter: AdapterEntry | None = None,
     task_name: str | None = None,
     backend_name: str | None = None,
+    loaded_runtimes: set[str] | None = None,
+    selection_method: str | None = None,
 ) -> None:
     """Print a Rich Panel describing a routing decision.
@@ -81,7 +83,13 @@ def show_routing_decision(
     task_name:
         Human-readable task cluster name (falls back to trace.matched_task_id).
     backend_name:
-        Active backend name ('ollama', 'vllm', or None).
+        Active backend name ('ollama', 'vllm', 'cerebras', or None).
+    loaded_runtimes:
+        Optional set of runtime names loaded on the active backend (used to
+        explain a ``no_adapter_on_active_backend`` outcome).
+    selection_method:
+        The ``MatchResult.selection_method`` (e.g. ``"no_adapter_for_task"`` or
+        ``"no_adapter_on_active_backend"``) used to tailor the no-adapter help.
     """
     # When no adapter was selected the decision is unactionable — render red
     # regardless of how confident the task match was.
@@ -101,7 +109,25 @@ def show_routing_decision(
     task_text.append_text(_similarity_bar(trace.similarity_score))
     grid.add_row("Matched Task", task_text)
-    if no_adapter:
+    if no_adapter and selection_method == "no_adapter_on_active_backend":
+        # Adapters are linked to this task but none are loaded on the active
+        # backend — a different, backend-specific message.
+        grid.add_row(
+            "Adapter",
+            Text(
+                f"No adapter loaded on backend '{backend_name or 'unknown'}'",
+                style="bold red",
+            ),
+        )
+        runtimes = sorted(loaded_runtimes) if loaded_runtimes else []
+        runtimes_label = ", ".join(runtimes) if runtimes else "(none)"
+        grid.add_row("Loaded runtimes", Text(runtimes_label, style="dim"))
+        suggestion = Text()
+        suggestion.append("Try ", style="dim")
+        suggestion.append("shiftgate adapter list", style="cyan")
+        suggestion.append(" to see what's registered.", style="dim")
+        grid.add_row("Suggestion", suggestion)
+    elif no_adapter:
         # Never silently substitute an adapter. Tell the user how to fix it.
         adapter_text = Text("No adapter available", style="bold red")
         grid.add_row("Adapter", adapter_text)

{shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_router.py RENAMED Viewed

@@ -307,3 +307,91 @@ class TestRouteFunction:
     def test_hf_adapter_effective_name_falls_back_to_id(self):
         adapter = adapter_from_hf("org/sql-lora", adapter_id="sql-lora")
         assert adapter.effective_backend_name() == "sql-lora"
+# ---------------------------------------------------------------------------
+# Backend-aware adapter filtering
+# ---------------------------------------------------------------------------
+class TestBackendAwareFiltering:
+    def test_filter_excludes_unloaded_runtime(self, synthetic_tasks, adapter_reg):
+        """An adapter whose runtime is not loaded is skipped during selection."""
+        query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)  # → task_x → adapter-x
+        ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
+        # adapter-x is NOT in the loaded set; task_x has only adapter-x → skip it.
+        # No other top task has a loaded adapter either → None.
+        result = select_adapter(ranked, adapter_reg, available_runtimes={"something-else"})
+        assert result.selected_adapter is None
+        assert result.selection_method == "no_adapter_on_active_backend"
+    def test_fall_through_to_next_best_task(self, synthetic_tasks, adapter_reg):
+        """When top-1 task's adapter isn't loaded, selection falls through."""
+        query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)  # task_x is top
+        ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
+        # Only adapter-y (task_y) is loaded → fall through from task_x to task_y.
+        result = select_adapter(ranked, adapter_reg, available_runtimes={"adapter-y"})
+        assert result.selected_adapter is not None
+        assert result.selected_adapter.id == "adapter-y"
+        assert result.matched_task.id == "task_y"
+    def test_none_preserves_unfiltered_behavior(self, synthetic_tasks, adapter_reg):
+        """available_runtimes=None means no filtering (preview behaviour)."""
+        query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
+        ranked = top_k_tasks(query_emb, synthetic_tasks, k=3)
+        result = select_adapter(ranked, adapter_reg, available_runtimes=None)
+        assert result.selected_adapter.id == "adapter-x"
+        assert result.selection_method == "preferred"
+    def test_filter_respects_runtime_name(self, tmp_path):
+        """Filtering matches on effective_backend_name (runtime_name when set)."""
+        task = _make_task("task_x", [1, 0, 0], ["sql-lora"])
+        adapter = adapter_from_runtime("sql-lora-vllm", adapter_id="sql-lora")
+        reg = AdapterRegistry(adapters=[adapter], source_path=tmp_path / "a.json")
+        query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
+        ranked = top_k_tasks(query_emb, [task], k=1)
+        # The registry id "sql-lora" is loaded, but the runtime name is not →
+        # filtering uses the runtime name, so this is NOT viable.
+        miss = select_adapter(ranked, reg, available_runtimes={"sql-lora"})
+        assert miss.selected_adapter is None
+        assert miss.selection_method == "no_adapter_on_active_backend"
+        # The runtime name IS loaded → viable.
+        hit = select_adapter(ranked, reg, available_runtimes={"sql-lora-vllm"})
+        assert hit.selected_adapter is not None
+        assert hit.selected_adapter.id == "sql-lora"
+    def test_no_linked_adapter_still_reports_no_adapter_for_task(self, tmp_path):
+        """Filtering active but task has no linked adapter at all → for_task reason."""
+        task = _make_task("task_x", [1, 0, 0], adapter_ids=[])
+        reg = AdapterRegistry(adapters=[], source_path=tmp_path / "a.json")
+        query_emb = np.array([1.0, 0.0, 0.0], dtype=np.float32)
+        ranked = top_k_tasks(query_emb, [task], k=1)
+        result = select_adapter(ranked, reg, available_runtimes={"anything"})
+        assert result.selected_adapter is None
+        assert result.selection_method == "no_adapter_for_task"
+class TestRouteFiltering:
+    def test_route_filters_by_available_runtimes(self, task_reg, adapter_reg):
+        """route() passes the filter through and sets the right method on a miss."""
+        from shiftgate.router.router import route
+        # python query → task_x → adapter-x. None of the registry's adapters
+        # are loaded on the active backend → no viable adapter anywhere.
+        trace, result = route(
+            "write python code", task_reg, adapter_reg, MockEmbedder(),
+            available_runtimes={"unrelated-runtime"},
+        )
+        assert result.selected_adapter is None
+        assert trace.selected_adapter_id is None
+        assert result.selection_method == "no_adapter_on_active_backend"
+    def test_route_none_runtimes_unfiltered(self, task_reg, adapter_reg):
+        from shiftgate.router.router import route
+        trace, result = route(
+            "write python code", task_reg, adapter_reg, MockEmbedder(),
+            available_runtimes=None,
+        )
+        assert trace.selected_adapter_id == "adapter-x"

{shiftgate-0.1.9 → shiftgate-0.2.0}/tests/test_serve.py RENAMED Viewed

@@ -96,6 +96,9 @@ def client(tmp_path, forwarder):
     router = BackendRouter()
     router.select("vllm")  # force-select; no availability ping
+    # Report the adapters as "loaded" so backend-aware filtering keeps them.
+    # adapter-x's runtime name is "adapter-x-vllm"; adapter-y has no runtime name.
+    router._vllm.list_loaded_adapters = lambda: ["adapter-x-vllm", "adapter-y"]
     app = create_app(
         backend="vllm",
@@ -179,6 +182,7 @@ def test_no_adapter_returns_400(tmp_path, forwarder):
     adapter_reg = AdapterRegistry(adapters=[], source_path=tmp_path / "adapters.json")
     router = BackendRouter()
     router.select("vllm")
+    router._vllm.list_loaded_adapters = lambda: []
     app = create_app(
         backend="vllm",
         task_reg=task_reg,