shiftgate 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shiftgate/__init__.py +9 -0
- shiftgate/cli.py +513 -0
- shiftgate/feedback/__init__.py +1 -0
- shiftgate/feedback/loop.py +182 -0
- shiftgate/registry/__init__.py +1 -0
- shiftgate/registry/adapter_registry.py +162 -0
- shiftgate/registry/schemas.py +115 -0
- shiftgate/registry/task_registry.py +186 -0
- shiftgate/router/__init__.py +1 -0
- shiftgate/router/embedder.py +95 -0
- shiftgate/router/matcher.py +115 -0
- shiftgate/router/router.py +97 -0
- shiftgate/runtime/__init__.py +1 -0
- shiftgate/runtime/backend.py +289 -0
- shiftgate/utils/__init__.py +1 -0
- shiftgate/utils/display.py +297 -0
- shiftgate-0.1.0.dist-info/METADATA +273 -0
- shiftgate-0.1.0.dist-info/RECORD +20 -0
- shiftgate-0.1.0.dist-info/WHEEL +4 -0
- shiftgate-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rich terminal UI helpers for shiftgate.
|
|
3
|
+
|
|
4
|
+
All console output in shiftgate goes through this module so that visual style
|
|
5
|
+
is centralised and easy to update. Each function accepts domain objects
|
|
6
|
+
directly — never raw strings — so the display layer stays decoupled from
|
|
7
|
+
formatting decisions.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import time
|
|
13
|
+
|
|
14
|
+
from rich import box
|
|
15
|
+
from rich.align import Align
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
from rich.live import Live
|
|
18
|
+
from rich.panel import Panel
|
|
19
|
+
from rich.spinner import Spinner
|
|
20
|
+
from rich.table import Table
|
|
21
|
+
from rich.text import Text
|
|
22
|
+
|
|
23
|
+
from shiftgate.registry.schemas import AdapterEntry, RoutingTrace, TaskCluster
|
|
24
|
+
|
|
25
|
+
console = Console()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Colour helpers
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
def _similarity_colour(score: float) -> str:
|
|
33
|
+
"""Return a colour name based on similarity score thresholds."""
|
|
34
|
+
if score >= 0.80:
|
|
35
|
+
return "green"
|
|
36
|
+
if score >= 0.60:
|
|
37
|
+
return "yellow"
|
|
38
|
+
return "red"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _similarity_bar(score: float, width: int = 20) -> Text:
|
|
42
|
+
"""Build a coloured progress bar for the similarity score."""
|
|
43
|
+
filled = round(score * width)
|
|
44
|
+
bar = "█" * filled + "░" * (width - filled)
|
|
45
|
+
colour = _similarity_colour(score)
|
|
46
|
+
text = Text()
|
|
47
|
+
text.append(bar, style=colour)
|
|
48
|
+
text.append(f" {score * 100:.1f}%", style=f"bold {colour}")
|
|
49
|
+
return text
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Routing decision panel
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
def show_routing_decision(
|
|
57
|
+
trace: RoutingTrace,
|
|
58
|
+
adapter: AdapterEntry | None = None,
|
|
59
|
+
task_name: str | None = None,
|
|
60
|
+
backend_name: str | None = None,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Print a Rich Panel describing a routing decision.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
trace:
|
|
67
|
+
The ``RoutingTrace`` returned by the router.
|
|
68
|
+
adapter:
|
|
69
|
+
Optional ``AdapterEntry`` for richer adapter display.
|
|
70
|
+
task_name:
|
|
71
|
+
Human-readable task cluster name (falls back to trace.matched_task_id).
|
|
72
|
+
backend_name:
|
|
73
|
+
Active backend name ('ollama', 'vllm', or None).
|
|
74
|
+
"""
|
|
75
|
+
colour = _similarity_colour(trace.similarity_score)
|
|
76
|
+
|
|
77
|
+
grid = Table.grid(padding=(0, 2))
|
|
78
|
+
grid.add_column(style="dim", min_width=16)
|
|
79
|
+
grid.add_column()
|
|
80
|
+
|
|
81
|
+
grid.add_row("Query", Text(f'"{trace.query}"', style="italic cyan"))
|
|
82
|
+
|
|
83
|
+
task_display = task_name or trace.matched_task_id
|
|
84
|
+
task_text = Text()
|
|
85
|
+
task_text.append(task_display, style="bold white")
|
|
86
|
+
task_text.append(" ")
|
|
87
|
+
task_text.append_text(_similarity_bar(trace.similarity_score))
|
|
88
|
+
grid.add_row("Matched Task", task_text)
|
|
89
|
+
|
|
90
|
+
if adapter:
|
|
91
|
+
adapter_text = Text()
|
|
92
|
+
adapter_text.append(adapter.name, style="bold magenta")
|
|
93
|
+
adapter_text.append(f" [{adapter.base_model}]", style="dim")
|
|
94
|
+
if adapter.hf_repo:
|
|
95
|
+
adapter_text.append(f"\n hf: {adapter.hf_repo}", style="dim blue")
|
|
96
|
+
grid.add_row("Adapter", adapter_text)
|
|
97
|
+
else:
|
|
98
|
+
grid.add_row("Adapter", Text(trace.selected_adapter_id, style="bold magenta"))
|
|
99
|
+
|
|
100
|
+
backend_text = Text(backend_name or "—", style="green" if backend_name else "dim")
|
|
101
|
+
grid.add_row("Backend", backend_text)
|
|
102
|
+
|
|
103
|
+
if trace.latency_ms is not None:
|
|
104
|
+
grid.add_row("Latency", Text(f"{trace.latency_ms:.0f} ms", style="dim"))
|
|
105
|
+
|
|
106
|
+
panel = Panel(
|
|
107
|
+
grid,
|
|
108
|
+
title=f"[bold {colour}] shiftgate routing decision [/bold {colour}]",
|
|
109
|
+
border_style=colour,
|
|
110
|
+
expand=False,
|
|
111
|
+
)
|
|
112
|
+
console.print()
|
|
113
|
+
console.print(panel)
|
|
114
|
+
console.print()
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
# Adapter table
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
def show_adapter_table(adapters: list[AdapterEntry]) -> None:
|
|
122
|
+
"""Print a Rich table listing all registered adapters."""
|
|
123
|
+
if not adapters:
|
|
124
|
+
console.print("[dim]No adapters registered. Add one with `shiftgate adapter add <hf_repo>`.[/dim]")
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
table = Table(
|
|
128
|
+
title="Registered Adapters",
|
|
129
|
+
box=box.ROUNDED,
|
|
130
|
+
show_header=True,
|
|
131
|
+
header_style="bold cyan",
|
|
132
|
+
border_style="cyan",
|
|
133
|
+
)
|
|
134
|
+
table.add_column("ID", style="bold magenta", no_wrap=True)
|
|
135
|
+
table.add_column("Name")
|
|
136
|
+
table.add_column("Base Model", style="dim")
|
|
137
|
+
table.add_column("Tags", style="green")
|
|
138
|
+
table.add_column("HF Repo / Local Path", style="blue")
|
|
139
|
+
table.add_column("Score", justify="right")
|
|
140
|
+
|
|
141
|
+
for a in adapters:
|
|
142
|
+
location = a.hf_repo or a.local_path or "—"
|
|
143
|
+
score = f"{a.benchmark_score:.2f}" if a.benchmark_score is not None else "—"
|
|
144
|
+
tags = ", ".join(a.task_tags) if a.task_tags else "—"
|
|
145
|
+
table.add_row(a.id, a.name, a.base_model, tags, location, score)
|
|
146
|
+
|
|
147
|
+
console.print(table)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# ---------------------------------------------------------------------------
|
|
151
|
+
# Task cluster table
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
|
|
154
|
+
def show_task_table(tasks: list[TaskCluster]) -> None:
|
|
155
|
+
"""Print a Rich table listing all task clusters and their preferred adapters."""
|
|
156
|
+
if not tasks:
|
|
157
|
+
console.print("[dim]No task clusters found.[/dim]")
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
table = Table(
|
|
161
|
+
title="Task Clusters",
|
|
162
|
+
box=box.ROUNDED,
|
|
163
|
+
show_header=True,
|
|
164
|
+
header_style="bold cyan",
|
|
165
|
+
border_style="cyan",
|
|
166
|
+
)
|
|
167
|
+
table.add_column("ID", style="bold yellow", no_wrap=True)
|
|
168
|
+
table.add_column("Name")
|
|
169
|
+
table.add_column("Description", max_width=40)
|
|
170
|
+
table.add_column("Preferred Adapters", style="magenta")
|
|
171
|
+
table.add_column("Centroid", justify="center")
|
|
172
|
+
|
|
173
|
+
for t in tasks:
|
|
174
|
+
preferred = ", ".join(t.preferred_adapters) if t.preferred_adapters else "—"
|
|
175
|
+
has_centroid = "[green]✓[/green]" if t.embedding_centroid else "[red]✗[/red]"
|
|
176
|
+
table.add_row(t.id, t.name, t.description, preferred, has_centroid)
|
|
177
|
+
|
|
178
|
+
console.print(table)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
# Adapter swap animation
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
|
|
185
|
+
def animate_swap(from_adapter: str, to_adapter: str, duration: float = 1.5) -> None:
|
|
186
|
+
"""Show a short spinner animation while "swapping" from one adapter to another."""
|
|
187
|
+
spinner = Spinner("dots", style="cyan")
|
|
188
|
+
label = Text()
|
|
189
|
+
label.append("Swapping ", style="dim")
|
|
190
|
+
label.append(from_adapter, style="bold yellow")
|
|
191
|
+
label.append(" → ", style="dim")
|
|
192
|
+
label.append(to_adapter, style="bold green")
|
|
193
|
+
|
|
194
|
+
with Live(Align.center(label), refresh_per_second=12, console=console) as live:
|
|
195
|
+
end = time.monotonic() + duration
|
|
196
|
+
while time.monotonic() < end:
|
|
197
|
+
frame = spinner.render(time.monotonic())
|
|
198
|
+
display = Text.assemble(frame, " ", label)
|
|
199
|
+
live.update(Align.center(display))
|
|
200
|
+
time.sleep(0.08)
|
|
201
|
+
|
|
202
|
+
console.print(
|
|
203
|
+
f" [green]✓[/green] Swapped [yellow]{from_adapter}[/yellow] → [green]{to_adapter}[/green]"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# ---------------------------------------------------------------------------
|
|
208
|
+
# Welcome / init banner
|
|
209
|
+
# ---------------------------------------------------------------------------
|
|
210
|
+
|
|
211
|
+
def show_welcome_banner() -> None:
|
|
212
|
+
"""Print the shiftgate welcome banner shown during `shiftgate init`."""
|
|
213
|
+
banner = Text(justify="center")
|
|
214
|
+
banner.append("\n ⚡ shiftgate ", style="bold cyan")
|
|
215
|
+
banner.append("v0.1\n", style="dim")
|
|
216
|
+
banner.append(" Intelligent LoRA routing for local LLM inference\n", style="italic white")
|
|
217
|
+
banner.append(" Inspired by LORAUTER · EPFL 2026\n\n", style="dim")
|
|
218
|
+
|
|
219
|
+
panel = Panel(
|
|
220
|
+
Align.center(banner),
|
|
221
|
+
border_style="cyan",
|
|
222
|
+
expand=False,
|
|
223
|
+
)
|
|
224
|
+
console.print(Align.center(panel))
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
# Adapter acceptance / feedback stats table
|
|
229
|
+
# ---------------------------------------------------------------------------
|
|
230
|
+
|
|
231
|
+
def show_feedback_stats(scores: dict[str, float], stats: dict[str, int]) -> None:
|
|
232
|
+
"""Print a summary of adapter acceptance rates and overall trace stats.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
scores:
|
|
237
|
+
Output of ``feedback.loop.compute_adapter_scores()``.
|
|
238
|
+
stats:
|
|
239
|
+
Output of ``feedback.loop.get_trace_stats()``.
|
|
240
|
+
"""
|
|
241
|
+
console.print()
|
|
242
|
+
console.print(
|
|
243
|
+
f"[bold]Traces:[/bold] "
|
|
244
|
+
f"total=[cyan]{stats.get('total', 0)}[/cyan] "
|
|
245
|
+
f"accepted=[green]{stats.get('accepted', 0)}[/green] "
|
|
246
|
+
f"rejected=[red]{stats.get('rejected', 0)}[/red] "
|
|
247
|
+
f"unrated=[dim]{stats.get('unrated', 0)}[/dim]"
|
|
248
|
+
)
|
|
249
|
+
console.print()
|
|
250
|
+
|
|
251
|
+
if not scores:
|
|
252
|
+
console.print("[dim]No rated traces yet. Run `shiftgate feedback accept/reject` after routing.[/dim]")
|
|
253
|
+
return
|
|
254
|
+
|
|
255
|
+
table = Table(
|
|
256
|
+
title="Adapter Acceptance Rates",
|
|
257
|
+
box=box.ROUNDED,
|
|
258
|
+
header_style="bold cyan",
|
|
259
|
+
border_style="cyan",
|
|
260
|
+
)
|
|
261
|
+
table.add_column("Adapter ID", style="bold magenta")
|
|
262
|
+
table.add_column("Acceptance Rate", justify="right")
|
|
263
|
+
table.add_column("Bar")
|
|
264
|
+
|
|
265
|
+
for adapter_id, rate in sorted(scores.items(), key=lambda x: -x[1]):
|
|
266
|
+
rate_text = f"{rate * 100:.1f}%"
|
|
267
|
+
bar = _similarity_bar(rate, width=15)
|
|
268
|
+
table.add_row(adapter_id, rate_text, bar)
|
|
269
|
+
|
|
270
|
+
console.print(table)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
# ---------------------------------------------------------------------------
|
|
274
|
+
# Status display
|
|
275
|
+
# ---------------------------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
def show_status(
|
|
278
|
+
backend_name: str | None,
|
|
279
|
+
n_adapters: int,
|
|
280
|
+
n_tasks: int,
|
|
281
|
+
embeddings_ready: bool,
|
|
282
|
+
) -> None:
|
|
283
|
+
"""Print a compact status summary for `shiftgate status`."""
|
|
284
|
+
grid = Table.grid(padding=(0, 2))
|
|
285
|
+
grid.add_column(style="dim", min_width=20)
|
|
286
|
+
grid.add_column()
|
|
287
|
+
|
|
288
|
+
backend_style = "green" if backend_name else "red"
|
|
289
|
+
backend_label = backend_name or "none detected"
|
|
290
|
+
grid.add_row("Backend", Text(backend_label, style=f"bold {backend_style}"))
|
|
291
|
+
grid.add_row("Adapters registered", Text(str(n_adapters), style="bold cyan"))
|
|
292
|
+
grid.add_row("Task clusters", Text(str(n_tasks), style="bold cyan"))
|
|
293
|
+
emb_style = "green" if embeddings_ready else "yellow"
|
|
294
|
+
emb_label = "ready" if embeddings_ready else "not initialised — run `shiftgate init`"
|
|
295
|
+
grid.add_row("Embeddings", Text(emb_label, style=emb_style))
|
|
296
|
+
|
|
297
|
+
console.print(Panel(grid, title="shiftgate status", border_style="cyan", expand=False))
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: shiftgate
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
|
|
5
|
+
Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
|
|
6
|
+
Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
|
|
7
|
+
Project-URL: Issues, https://github.com/shiftgate-ai/shiftgate/issues
|
|
8
|
+
Author: shiftgate contributors
|
|
9
|
+
License: MIT
|
|
10
|
+
Keywords: adapters,inference,llm,lora,ollama,routing,vllm
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Requires-Dist: fastembed>=0.3.0
|
|
22
|
+
Requires-Dist: httpx>=0.27.0
|
|
23
|
+
Requires-Dist: huggingface-hub>=0.22.0
|
|
24
|
+
Requires-Dist: numpy>=1.26.0
|
|
25
|
+
Requires-Dist: pydantic>=2.6.0
|
|
26
|
+
Requires-Dist: rich>=13.7.0
|
|
27
|
+
Requires-Dist: scikit-learn>=1.4.0
|
|
28
|
+
Requires-Dist: typer>=0.12.0
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# shiftgate ⚡
|
|
35
|
+
|
|
36
|
+
> **shiftgate is an intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.**
|
|
37
|
+
|
|
38
|
+
Instead of hardcoding which adapter to use, shiftgate embeds your query and matches it against a catalog of task clusters using cosine similarity — then routes inference to the best-fit LoRA adapter on your running Ollama or vLLM instance. Think of it as "npm for LoRA adapters + an automatic brain that picks the right one per task."
|
|
39
|
+
|
|
40
|
+
Inspired by the [LORAUTER paper](https://arxiv.org/abs/2406.08213) (EPFL, 2026).
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Quickstart
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# 1. Install (requires Python 3.10+, uv recommended)
|
|
48
|
+
uv add shiftgate
|
|
49
|
+
|
|
50
|
+
# 2. Initialise: sets up ~/.shiftgate/, downloads the embedding model,
|
|
51
|
+
# and computes task centroids
|
|
52
|
+
shiftgate init
|
|
53
|
+
|
|
54
|
+
# 3. Register a LoRA adapter from HuggingFace
|
|
55
|
+
shiftgate adapter add monology/pmc-llama-13b-lora --base meta-llama/Meta-Llama-3-8B --tags medical qa
|
|
56
|
+
|
|
57
|
+
# 4. Route a query (shows decision, no inference needed)
|
|
58
|
+
shiftgate route "explain the mechanism of action of ibuprofen"
|
|
59
|
+
|
|
60
|
+
# 5. Route + run (requires Ollama or vLLM running locally)
|
|
61
|
+
shiftgate run "explain the mechanism of action of ibuprofen"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Architecture
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
User query
|
|
70
|
+
│
|
|
71
|
+
▼
|
|
72
|
+
┌──────────────────────────────────────────────────┐
|
|
73
|
+
│ shiftgate CLI │
|
|
74
|
+
│ shiftgate route / shiftgate run │
|
|
75
|
+
└────────────────────┬─────────────────────────────┘
|
|
76
|
+
│
|
|
77
|
+
▼
|
|
78
|
+
┌──────────────────────────────────────────────────┐
|
|
79
|
+
│ Router │
|
|
80
|
+
│ │
|
|
81
|
+
│ 1. Embed query (fastembed BAAI/bge-small-en) │
|
|
82
|
+
│ 2. Cosine similarity vs task centroids │
|
|
83
|
+
│ 3. top-K tasks → walk preferred_adapters list │
|
|
84
|
+
│ 4. Return RoutingTrace │
|
|
85
|
+
└──────────┬───────────────────────┬───────────────┘
|
|
86
|
+
│ │
|
|
87
|
+
▼ ▼
|
|
88
|
+
┌─────────────────┐ ┌────────────────────────────┐
|
|
89
|
+
│ Task Registry │ │ Adapter Registry │
|
|
90
|
+
│ ~/.shiftgate/ │ │ ~/.shiftgate/adapters.json │
|
|
91
|
+
│ tasks.json │ │ │
|
|
92
|
+
│ (10 defaults) │ │ Add via: │
|
|
93
|
+
└─────────────────┘ │ shiftgate adapter add │
|
|
94
|
+
└────────────┬───────────────┘
|
|
95
|
+
│
|
|
96
|
+
▼
|
|
97
|
+
┌────────────────────────────────┐
|
|
98
|
+
│ BackendRouter │
|
|
99
|
+
│ │
|
|
100
|
+
│ Ollama (localhost:11434) │
|
|
101
|
+
│ vLLM (localhost:8000) │
|
|
102
|
+
│ Auto-detected at runtime │
|
|
103
|
+
└────────────────────────────────┘
|
|
104
|
+
│
|
|
105
|
+
▼
|
|
106
|
+
┌────────────────────────────────┐
|
|
107
|
+
│ Feedback Loop │
|
|
108
|
+
│ ~/.shiftgate/traces.jsonl │
|
|
109
|
+
│ shiftgate feedback accept │
|
|
110
|
+
│ shiftgate feedback stats │
|
|
111
|
+
└────────────────────────────────┘
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Commands
|
|
117
|
+
|
|
118
|
+
| Command | Description |
|
|
119
|
+
|---|---|
|
|
120
|
+
| `shiftgate init` | First-time setup: copy defaults to `~/.shiftgate/`, compute embeddings |
|
|
121
|
+
| `shiftgate route "<query>"` | Route a query and show the decision — no inference |
|
|
122
|
+
| `shiftgate run "<query>"` | Route + run via Ollama or vLLM |
|
|
123
|
+
| `shiftgate adapter add <hf_repo>` | Register a new LoRA adapter |
|
|
124
|
+
| `shiftgate adapter list` | Table of all registered adapters |
|
|
125
|
+
| `shiftgate adapter remove <id>` | Remove an adapter |
|
|
126
|
+
| `shiftgate task list` | Table of all task clusters |
|
|
127
|
+
| `shiftgate task add` | Interactively add a new task cluster |
|
|
128
|
+
| `shiftgate feedback accept` | Mark last routing as good |
|
|
129
|
+
| `shiftgate feedback reject` | Mark last routing as bad |
|
|
130
|
+
| `shiftgate feedback stats` | Adapter acceptance rate table |
|
|
131
|
+
| `shiftgate status` | Backend connectivity + registry summary |
|
|
132
|
+
| `shiftgate demo` | Animated demo with fake routing traces |
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Using with Ollama
|
|
137
|
+
|
|
138
|
+
Ollama supports LoRA adapters via custom Modelfiles. Create one per adapter:
|
|
139
|
+
|
|
140
|
+
```dockerfile
|
|
141
|
+
# my-lora.Modelfile
|
|
142
|
+
FROM llama3
|
|
143
|
+
ADAPTER /path/to/my-adapter.safetensors
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
ollama create my-lora-model -f my-lora.Modelfile
|
|
148
|
+
ollama serve
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Register the adapter in shiftgate using the same ID:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
shiftgate adapter add my-org/my-lora --base meta-llama/Meta-Llama-3-8B
|
|
155
|
+
# The adapter id defaults to the repo slug: "my-lora"
|
|
156
|
+
# shiftgate will pass model="my-lora" to Ollama → activates the Modelfile
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Using with vLLM
|
|
160
|
+
|
|
161
|
+
vLLM loads LoRA adapters at startup via `--lora-modules`:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
165
|
+
--model meta-llama/Meta-Llama-3-8B \
|
|
166
|
+
--enable-lora \
|
|
167
|
+
--lora-modules my-lora=/path/to/adapter
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
shiftgate sends `"model": "<adapter_id>"` in each `/v1/chat/completions` request, which vLLM maps to the named LoRA module.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## How to contribute adapters
|
|
175
|
+
|
|
176
|
+
1. Fork this repo.
|
|
177
|
+
2. Add an entry to `data/default_adapters.json` (optional — the registry ships empty by design; adapters are user-managed).
|
|
178
|
+
3. Or, better: publish your adapter to HuggingFace and open a PR that documents it in the README's "Community Adapters" section.
|
|
179
|
+
|
|
180
|
+
To add a task cluster that better matches your domain, edit `data/default_tasks.json` and add `validation_examples` that represent real queries your users ask. Run `shiftgate init` to recompute centroids.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## `~/.shiftgate/` layout
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
~/.shiftgate/
|
|
188
|
+
├── adapters.json # your registered adapters
|
|
189
|
+
├── tasks.json # task clusters (copied from defaults on first init)
|
|
190
|
+
├── traces.jsonl # append-only routing trace log
|
|
191
|
+
└── embeddings_cache.npy # cached centroids — delete to force re-embedding
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Roadmap
|
|
197
|
+
|
|
198
|
+
| Version | Focus |
|
|
199
|
+
|---|---|
|
|
200
|
+
| **v0.1** | Single base model, multi-adapter routing ← _current_ |
|
|
201
|
+
| v0.2 | Feedback loop + adapter scoring (auto-demote bad adapters) |
|
|
202
|
+
| v0.3 | Multi-model routing (route to different base models per task) |
|
|
203
|
+
| v1.0 | Community registry + web UI |
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Development
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Clone and install in editable mode with all dev dependencies
|
|
211
|
+
git clone https://github.com/shiftgate-ai/shiftgate
|
|
212
|
+
cd shiftgate
|
|
213
|
+
uv sync --extra dev # creates .venv, installs shiftgate + dev deps
|
|
214
|
+
|
|
215
|
+
# Run tests (no GPU needed — tests use synthetic embeddings)
|
|
216
|
+
uv run pytest
|
|
217
|
+
|
|
218
|
+
# Run the demo inside the venv
|
|
219
|
+
uv run shiftgate demo
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
> **Note:** `uv sync` reads `pyproject.toml` and resolves a locked environment.
|
|
223
|
+
> There is no need to run `pip install` manually. Activate the venv with
|
|
224
|
+
> `.venv/Scripts/activate` (Windows) or `source .venv/bin/activate` (macOS/Linux)
|
|
225
|
+
> if you want the `shiftgate` command on your `PATH` without the `uv run` prefix.
|
|
226
|
+
|
|
227
|
+
## Releases and Publishing
|
|
228
|
+
|
|
229
|
+
Releases are managed through a CI release workflow (e.g. GitHub Actions).
|
|
230
|
+
**No manual PyPI API token management is required for normal releases.**
|
|
231
|
+
|
|
232
|
+
The recommended flow:
|
|
233
|
+
|
|
234
|
+
1. Bump the version in `pyproject.toml` (`version = "x.y.z"`).
|
|
235
|
+
2. Open a PR, get it reviewed and merged.
|
|
236
|
+
3. Tag the commit: `git tag vx.y.z && git push origin vx.y.z`.
|
|
237
|
+
4. The CI workflow builds the wheel with `uv build` and publishes to PyPI
|
|
238
|
+
using [Trusted Publishing (OIDC)](https://docs.pypi.org/trusted-publishers/)
|
|
239
|
+
— no stored API token needed.
|
|
240
|
+
|
|
241
|
+
For a one-off manual publish (maintainers only):
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
uv build # produces dist/shiftgate-x.y.z-py3-none-any.whl
|
|
245
|
+
uv publish # authenticates via OIDC or a scoped PyPI token
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Project layout
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
shiftgate/
|
|
252
|
+
├── cli.py # Typer CLI — all user commands
|
|
253
|
+
├── registry/
|
|
254
|
+
│ ├── schemas.py # Pydantic models: AdapterEntry, TaskCluster, RoutingTrace
|
|
255
|
+
│ ├── adapter_registry.py
|
|
256
|
+
│ └── task_registry.py
|
|
257
|
+
├── router/
|
|
258
|
+
│ ├── embedder.py # fastembed wrapper (CPU, singleton)
|
|
259
|
+
│ ├── matcher.py # cosine similarity, top-K, adapter selection
|
|
260
|
+
│ └── router.py # orchestrates embed → match → trace
|
|
261
|
+
├── runtime/
|
|
262
|
+
│ └── backend.py # OllamaBackend, VLLMBackend, BackendRouter
|
|
263
|
+
├── feedback/
|
|
264
|
+
│ └── loop.py # trace persistence, accept/reject, scoring
|
|
265
|
+
└── utils/
|
|
266
|
+
└── display.py # Rich panels, tables, animations
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## License
|
|
272
|
+
|
|
273
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
shiftgate/__init__.py,sha256=5xnE4JLAwev8YWPIggAntTpxOtGCS5f8qDk0kFjTSXw,268
|
|
2
|
+
shiftgate/cli.py,sha256=T_l2Rjw1dQVhzKvTIMK_CMjkGDMiJw53RgLKpCOBBXA,18233
|
|
3
|
+
shiftgate/feedback/__init__.py,sha256=D4rQVHGwfVq_2v6dt0NyNeKhx9zr1izEI_4oW5wnh2c,68
|
|
4
|
+
shiftgate/feedback/loop.py,sha256=HjXztY_pFkb6QH_APseScLKS1srr9QcyE5d-WyiBEPA,5709
|
|
5
|
+
shiftgate/registry/__init__.py,sha256=liGxMtJl6S85eBzfNeE0h7BbC2Eg8iwkzIw2xZEaPkM,82
|
|
6
|
+
shiftgate/registry/adapter_registry.py,sha256=ek9wxp8X5cA4a3fPC_tlm4cco3C9of-oDpEoMowwqOU,6292
|
|
7
|
+
shiftgate/registry/schemas.py,sha256=xZvvwwopC4d3FhuWdJ8pu3lAcwDdZI6ogIWQHNm8L8o,4663
|
|
8
|
+
shiftgate/registry/task_registry.py,sha256=DmwDO5oxjlxnc6w7caZSRCb6Ol1NZn6JHsHOdfitsxs,7407
|
|
9
|
+
shiftgate/router/__init__.py,sha256=Sx6lX51jO8wWGbquYtwdI6Z8N5XCRZyOndr6lqPXVjk,73
|
|
10
|
+
shiftgate/router/embedder.py,sha256=8DsII_pv9I1_apWw2Ev_gU2C0t46Vu2RbuGcwgom8Og,3166
|
|
11
|
+
shiftgate/router/matcher.py,sha256=8JPhC2fT_hL5a750jk4qso0JpNoTlmhh3EtuqBInwyU,3834
|
|
12
|
+
shiftgate/router/router.py,sha256=17ibBHHVznxx_GSDI0n5c-G9tseLfEo5mvWF4T4QEFs,2902
|
|
13
|
+
shiftgate/runtime/__init__.py,sha256=i4knQ_ErspGiQyv87wFsOF3t86YZyqk1Aszaq046o78,70
|
|
14
|
+
shiftgate/runtime/backend.py,sha256=w1M7QM3FpzFlk6OVD2wJfaVHu97pM7_z6XjncSV9mi8,9668
|
|
15
|
+
shiftgate/utils/__init__.py,sha256=jtUdmpchbbJrfjFmAmLGSoJeNBecSUWGy6_LtlJQ97U,51
|
|
16
|
+
shiftgate/utils/display.py,sha256=VCo0mxf5dp3LnsiohOJy7mtclbm0A8fG-C2jhDlDVl8,10540
|
|
17
|
+
shiftgate-0.1.0.dist-info/METADATA,sha256=o-R5FXpFmZ8VUW-av1uXVthB1VG7tw0UJsCQOLw61dc,10843
|
|
18
|
+
shiftgate-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
19
|
+
shiftgate-0.1.0.dist-info/entry_points.txt,sha256=TVcngfC2MKsbEBkTJIaJoCe5nk4bdgaAsBi23o8VVZs,48
|
|
20
|
+
shiftgate-0.1.0.dist-info/RECORD,,
|