shiftgate 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shiftgate-0.1.0/.github/workflows/release.yml +59 -0
- shiftgate-0.1.0/.gitignore +45 -0
- shiftgate-0.1.0/PKG-INFO +273 -0
- shiftgate-0.1.0/README.md +240 -0
- shiftgate-0.1.0/data/default_adapters.json +1 -0
- shiftgate-0.1.0/data/default_tasks.json +187 -0
- shiftgate-0.1.0/pyproject.toml +59 -0
- shiftgate-0.1.0/shiftgate/__init__.py +9 -0
- shiftgate-0.1.0/shiftgate/cli.py +513 -0
- shiftgate-0.1.0/shiftgate/feedback/__init__.py +1 -0
- shiftgate-0.1.0/shiftgate/feedback/loop.py +182 -0
- shiftgate-0.1.0/shiftgate/registry/__init__.py +1 -0
- shiftgate-0.1.0/shiftgate/registry/adapter_registry.py +162 -0
- shiftgate-0.1.0/shiftgate/registry/schemas.py +115 -0
- shiftgate-0.1.0/shiftgate/registry/task_registry.py +186 -0
- shiftgate-0.1.0/shiftgate/router/__init__.py +1 -0
- shiftgate-0.1.0/shiftgate/router/embedder.py +95 -0
- shiftgate-0.1.0/shiftgate/router/matcher.py +115 -0
- shiftgate-0.1.0/shiftgate/router/router.py +97 -0
- shiftgate-0.1.0/shiftgate/runtime/__init__.py +1 -0
- shiftgate-0.1.0/shiftgate/runtime/backend.py +289 -0
- shiftgate-0.1.0/shiftgate/utils/__init__.py +1 -0
- shiftgate-0.1.0/shiftgate/utils/display.py +297 -0
- shiftgate-0.1.0/tests/__init__.py +1 -0
- shiftgate-0.1.0/tests/test_feedback.py +261 -0
- shiftgate-0.1.0/tests/test_registry.py +218 -0
- shiftgate-0.1.0/tests/test_router.py +254 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
name: Release and Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*' # Triggers the pipeline whenever a version tag is pushed, e.g., v0.1.0
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
validate:
|
|
10
|
+
name: Validate and Test Code
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- name: Check out repository
|
|
14
|
+
uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Install uv
|
|
17
|
+
uses: astral-sh/setup-uv@v5
|
|
18
|
+
with:
|
|
19
|
+
enable-cache: true
|
|
20
|
+
|
|
21
|
+
- name: Set up Python
|
|
22
|
+
uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version-file: "pyproject.toml"
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies & run tests
|
|
27
|
+
run: |
|
|
28
|
+
uv sync --all-extras --dev
|
|
29
|
+
uv run pytest
|
|
30
|
+
|
|
31
|
+
publish:
|
|
32
|
+
name: Build and Publish to PyPI
|
|
33
|
+
needs: validate
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
|
|
36
|
+
# Define the target environment configured on PyPI
|
|
37
|
+
environment:
|
|
38
|
+
name: pypi
|
|
39
|
+
url: https://pypi.org/p/shiftgate
|
|
40
|
+
|
|
41
|
+
# CRITICAL: This gives the job permission to request an OIDC id-token from GitHub
|
|
42
|
+
permissions:
|
|
43
|
+
id-token: write
|
|
44
|
+
contents: read
|
|
45
|
+
|
|
46
|
+
steps:
|
|
47
|
+
- name: Check out repository
|
|
48
|
+
uses: actions/checkout@v4
|
|
49
|
+
|
|
50
|
+
- name: Install uv
|
|
51
|
+
uses: astral-sh/setup-uv@v5
|
|
52
|
+
with:
|
|
53
|
+
enable-cache: true
|
|
54
|
+
|
|
55
|
+
- name: Build distribution packages
|
|
56
|
+
run: uv build
|
|
57
|
+
|
|
58
|
+
- name: Publish to PyPI via Trusted Publishing
|
|
59
|
+
run: uv publish --trusted-publishing always
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
*.egg
|
|
7
|
+
*.egg-info/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
.eggs/
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
env/
|
|
16
|
+
.env
|
|
17
|
+
|
|
18
|
+
# uv
|
|
19
|
+
.uv/
|
|
20
|
+
uv.lock
|
|
21
|
+
|
|
22
|
+
# Testing
|
|
23
|
+
.pytest_cache/
|
|
24
|
+
.coverage
|
|
25
|
+
htmlcov/
|
|
26
|
+
.tox/
|
|
27
|
+
|
|
28
|
+
# IDE
|
|
29
|
+
.idea/
|
|
30
|
+
.vscode/
|
|
31
|
+
*.swp
|
|
32
|
+
*.swo
|
|
33
|
+
.DS_Store
|
|
34
|
+
|
|
35
|
+
# shiftgate runtime data
|
|
36
|
+
~/.shiftgate/
|
|
37
|
+
*.jsonl
|
|
38
|
+
|
|
39
|
+
# fastembed model cache
|
|
40
|
+
.fastembed_cache/
|
|
41
|
+
models/
|
|
42
|
+
|
|
43
|
+
# Distribution
|
|
44
|
+
*.whl
|
|
45
|
+
*.tar.gz
|
shiftgate-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: shiftgate
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
|
|
5
|
+
Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
|
|
6
|
+
Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
|
|
7
|
+
Project-URL: Issues, https://github.com/shiftgate-ai/shiftgate/issues
|
|
8
|
+
Author: shiftgate contributors
|
|
9
|
+
License: MIT
|
|
10
|
+
Keywords: adapters,inference,llm,lora,ollama,routing,vllm
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Requires-Dist: fastembed>=0.3.0
|
|
22
|
+
Requires-Dist: httpx>=0.27.0
|
|
23
|
+
Requires-Dist: huggingface-hub>=0.22.0
|
|
24
|
+
Requires-Dist: numpy>=1.26.0
|
|
25
|
+
Requires-Dist: pydantic>=2.6.0
|
|
26
|
+
Requires-Dist: rich>=13.7.0
|
|
27
|
+
Requires-Dist: scikit-learn>=1.4.0
|
|
28
|
+
Requires-Dist: typer>=0.12.0
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# shiftgate ⚡
|
|
35
|
+
|
|
36
|
+
> **shiftgate is an intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.**
|
|
37
|
+
|
|
38
|
+
Instead of hardcoding which adapter to use, shiftgate embeds your query and matches it against a catalog of task clusters using cosine similarity — then routes inference to the best-fit LoRA adapter on your running Ollama or vLLM instance. Think of it as "npm for LoRA adapters + an automatic brain that picks the right one per task."
|
|
39
|
+
|
|
40
|
+
Inspired by the [LORAUTER paper](https://arxiv.org/abs/2406.08213) (EPFL, 2026).
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Quickstart
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# 1. Install (requires Python 3.10+, uv recommended)
|
|
48
|
+
uv add shiftgate
|
|
49
|
+
|
|
50
|
+
# 2. Initialise: sets up ~/.shiftgate/, downloads the embedding model,
|
|
51
|
+
# and computes task centroids
|
|
52
|
+
shiftgate init
|
|
53
|
+
|
|
54
|
+
# 3. Register a LoRA adapter from HuggingFace
|
|
55
|
+
shiftgate adapter add monology/pmc-llama-13b-lora --base meta-llama/Meta-Llama-3-8B --tags medical qa
|
|
56
|
+
|
|
57
|
+
# 4. Route a query (shows decision, no inference needed)
|
|
58
|
+
shiftgate route "explain the mechanism of action of ibuprofen"
|
|
59
|
+
|
|
60
|
+
# 5. Route + run (requires Ollama or vLLM running locally)
|
|
61
|
+
shiftgate run "explain the mechanism of action of ibuprofen"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Architecture
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
User query
|
|
70
|
+
│
|
|
71
|
+
▼
|
|
72
|
+
┌──────────────────────────────────────────────────┐
|
|
73
|
+
│ shiftgate CLI │
|
|
74
|
+
│ shiftgate route / shiftgate run │
|
|
75
|
+
└────────────────────┬─────────────────────────────┘
|
|
76
|
+
│
|
|
77
|
+
▼
|
|
78
|
+
┌──────────────────────────────────────────────────┐
|
|
79
|
+
│ Router │
|
|
80
|
+
│ │
|
|
81
|
+
│ 1. Embed query (fastembed BAAI/bge-small-en) │
|
|
82
|
+
│ 2. Cosine similarity vs task centroids │
|
|
83
|
+
│ 3. top-K tasks → walk preferred_adapters list │
|
|
84
|
+
│ 4. Return RoutingTrace │
|
|
85
|
+
└──────────┬───────────────────────┬───────────────┘
|
|
86
|
+
│ │
|
|
87
|
+
▼ ▼
|
|
88
|
+
┌─────────────────┐ ┌────────────────────────────┐
|
|
89
|
+
│ Task Registry │ │ Adapter Registry │
|
|
90
|
+
│ ~/.shiftgate/ │ │ ~/.shiftgate/adapters.json │
|
|
91
|
+
│ tasks.json │ │ │
|
|
92
|
+
│ (10 defaults) │ │ Add via: │
|
|
93
|
+
└─────────────────┘ │ shiftgate adapter add │
|
|
94
|
+
└────────────┬───────────────┘
|
|
95
|
+
│
|
|
96
|
+
▼
|
|
97
|
+
┌────────────────────────────────┐
|
|
98
|
+
│ BackendRouter │
|
|
99
|
+
│ │
|
|
100
|
+
│ Ollama (localhost:11434) │
|
|
101
|
+
│ vLLM (localhost:8000) │
|
|
102
|
+
│ Auto-detected at runtime │
|
|
103
|
+
└────────────────────────────────┘
|
|
104
|
+
│
|
|
105
|
+
▼
|
|
106
|
+
┌────────────────────────────────┐
|
|
107
|
+
│ Feedback Loop │
|
|
108
|
+
│ ~/.shiftgate/traces.jsonl │
|
|
109
|
+
│ shiftgate feedback accept │
|
|
110
|
+
│ shiftgate feedback stats │
|
|
111
|
+
└────────────────────────────────┘
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Commands
|
|
117
|
+
|
|
118
|
+
| Command | Description |
|
|
119
|
+
|---|---|
|
|
120
|
+
| `shiftgate init` | First-time setup: copy defaults to `~/.shiftgate/`, compute embeddings |
|
|
121
|
+
| `shiftgate route "<query>"` | Route a query and show the decision — no inference |
|
|
122
|
+
| `shiftgate run "<query>"` | Route + run via Ollama or vLLM |
|
|
123
|
+
| `shiftgate adapter add <hf_repo>` | Register a new LoRA adapter |
|
|
124
|
+
| `shiftgate adapter list` | Table of all registered adapters |
|
|
125
|
+
| `shiftgate adapter remove <id>` | Remove an adapter |
|
|
126
|
+
| `shiftgate task list` | Table of all task clusters |
|
|
127
|
+
| `shiftgate task add` | Interactively add a new task cluster |
|
|
128
|
+
| `shiftgate feedback accept` | Mark last routing as good |
|
|
129
|
+
| `shiftgate feedback reject` | Mark last routing as bad |
|
|
130
|
+
| `shiftgate feedback stats` | Adapter acceptance rate table |
|
|
131
|
+
| `shiftgate status` | Backend connectivity + registry summary |
|
|
132
|
+
| `shiftgate demo` | Animated demo with fake routing traces |
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Using with Ollama
|
|
137
|
+
|
|
138
|
+
Ollama supports LoRA adapters via custom Modelfiles. Create one per adapter:
|
|
139
|
+
|
|
140
|
+
```dockerfile
|
|
141
|
+
# my-lora.Modelfile
|
|
142
|
+
FROM llama3
|
|
143
|
+
ADAPTER /path/to/my-adapter.safetensors
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
ollama create my-lora-model -f my-lora.Modelfile
|
|
148
|
+
ollama serve
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Register the adapter in shiftgate using the same ID:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
shiftgate adapter add my-org/my-lora --base meta-llama/Meta-Llama-3-8B
|
|
155
|
+
# The adapter id defaults to the repo slug: "my-lora"
|
|
156
|
+
# shiftgate will pass model="my-lora" to Ollama → activates the Modelfile
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Using with vLLM
|
|
160
|
+
|
|
161
|
+
vLLM loads LoRA adapters at startup via `--lora-modules`:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
165
|
+
--model meta-llama/Meta-Llama-3-8B \
|
|
166
|
+
--enable-lora \
|
|
167
|
+
--lora-modules my-lora=/path/to/adapter
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
shiftgate sends `"model": "<adapter_id>"` in each `/v1/chat/completions` request, which vLLM maps to the named LoRA module.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## How to contribute adapters
|
|
175
|
+
|
|
176
|
+
1. Fork this repo.
|
|
177
|
+
2. Add an entry to `data/default_adapters.json` (optional — the registry ships empty by design; adapters are user-managed).
|
|
178
|
+
3. Or, better: publish your adapter to HuggingFace and open a PR that documents it in the README's "Community Adapters" section.
|
|
179
|
+
|
|
180
|
+
To add a task cluster that better matches your domain, edit `data/default_tasks.json` and add `validation_examples` that represent real queries your users ask. Run `shiftgate init` to recompute centroids.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## `~/.shiftgate/` layout
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
~/.shiftgate/
|
|
188
|
+
├── adapters.json # your registered adapters
|
|
189
|
+
├── tasks.json # task clusters (copied from defaults on first init)
|
|
190
|
+
├── traces.jsonl # append-only routing trace log
|
|
191
|
+
└── embeddings_cache.npy # cached centroids — delete to force re-embedding
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Roadmap
|
|
197
|
+
|
|
198
|
+
| Version | Focus |
|
|
199
|
+
|---|---|
|
|
200
|
+
| **v0.1** | Single base model, multi-adapter routing ← _current_ |
|
|
201
|
+
| v0.2 | Feedback loop + adapter scoring (auto-demote bad adapters) |
|
|
202
|
+
| v0.3 | Multi-model routing (route to different base models per task) |
|
|
203
|
+
| v1.0 | Community registry + web UI |
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Development
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Clone and install in editable mode with all dev dependencies
|
|
211
|
+
git clone https://github.com/shiftgate-ai/shiftgate
|
|
212
|
+
cd shiftgate
|
|
213
|
+
uv sync --extra dev # creates .venv, installs shiftgate + dev deps
|
|
214
|
+
|
|
215
|
+
# Run tests (no GPU needed — tests use synthetic embeddings)
|
|
216
|
+
uv run pytest
|
|
217
|
+
|
|
218
|
+
# Run the demo inside the venv
|
|
219
|
+
uv run shiftgate demo
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
> **Note:** `uv sync` reads `pyproject.toml` and resolves a locked environment.
|
|
223
|
+
> There is no need to run `pip install` manually. Activate the venv with
|
|
224
|
+
> `.venv/Scripts/activate` (Windows) or `source .venv/bin/activate` (macOS/Linux)
|
|
225
|
+
> if you want the `shiftgate` command on your `PATH` without the `uv run` prefix.
|
|
226
|
+
|
|
227
|
+
## Releases and Publishing
|
|
228
|
+
|
|
229
|
+
Releases are managed through a CI release workflow (e.g. GitHub Actions).
|
|
230
|
+
**No manual PyPI API token management is required for normal releases.**
|
|
231
|
+
|
|
232
|
+
The recommended flow:
|
|
233
|
+
|
|
234
|
+
1. Bump the version in `pyproject.toml` (`version = "x.y.z"`).
|
|
235
|
+
2. Open a PR, get it reviewed and merged.
|
|
236
|
+
3. Tag the commit: `git tag vx.y.z && git push origin vx.y.z`.
|
|
237
|
+
4. The CI workflow builds the wheel with `uv build` and publishes to PyPI
|
|
238
|
+
using [Trusted Publishing (OIDC)](https://docs.pypi.org/trusted-publishers/)
|
|
239
|
+
— no stored API token needed.
|
|
240
|
+
|
|
241
|
+
For a one-off manual publish (maintainers only):
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
uv build # produces dist/shiftgate-x.y.z-py3-none-any.whl
|
|
245
|
+
uv publish # authenticates via OIDC or a scoped PyPI token
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Project layout
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
shiftgate/
|
|
252
|
+
├── cli.py # Typer CLI — all user commands
|
|
253
|
+
├── registry/
|
|
254
|
+
│ ├── schemas.py # Pydantic models: AdapterEntry, TaskCluster, RoutingTrace
|
|
255
|
+
│ ├── adapter_registry.py
|
|
256
|
+
│ └── task_registry.py
|
|
257
|
+
├── router/
|
|
258
|
+
│ ├── embedder.py # fastembed wrapper (CPU, singleton)
|
|
259
|
+
│ ├── matcher.py # cosine similarity, top-K, adapter selection
|
|
260
|
+
│ └── router.py # orchestrates embed → match → trace
|
|
261
|
+
├── runtime/
|
|
262
|
+
│ └── backend.py # OllamaBackend, VLLMBackend, BackendRouter
|
|
263
|
+
├── feedback/
|
|
264
|
+
│ └── loop.py # trace persistence, accept/reject, scoring
|
|
265
|
+
└── utils/
|
|
266
|
+
└── display.py # Rich panels, tables, animations
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## License
|
|
272
|
+
|
|
273
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# shiftgate ⚡
|
|
2
|
+
|
|
3
|
+
> **shiftgate is an intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.**
|
|
4
|
+
|
|
5
|
+
Instead of hardcoding which adapter to use, shiftgate embeds your query and matches it against a catalog of task clusters using cosine similarity — then routes inference to the best-fit LoRA adapter on your running Ollama or vLLM instance. Think of it as "npm for LoRA adapters + an automatic brain that picks the right one per task."
|
|
6
|
+
|
|
7
|
+
Inspired by the [LORAUTER paper](https://arxiv.org/abs/2406.08213) (EPFL, 2026).
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Quickstart
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# 1. Install (requires Python 3.10+, uv recommended)
|
|
15
|
+
uv add shiftgate
|
|
16
|
+
|
|
17
|
+
# 2. Initialise: sets up ~/.shiftgate/, downloads the embedding model,
|
|
18
|
+
# and computes task centroids
|
|
19
|
+
shiftgate init
|
|
20
|
+
|
|
21
|
+
# 3. Register a LoRA adapter from HuggingFace
|
|
22
|
+
shiftgate adapter add monology/pmc-llama-13b-lora --base meta-llama/Meta-Llama-3-8B --tags medical qa
|
|
23
|
+
|
|
24
|
+
# 4. Route a query (shows decision, no inference needed)
|
|
25
|
+
shiftgate route "explain the mechanism of action of ibuprofen"
|
|
26
|
+
|
|
27
|
+
# 5. Route + run (requires Ollama or vLLM running locally)
|
|
28
|
+
shiftgate run "explain the mechanism of action of ibuprofen"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Architecture
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
User query
|
|
37
|
+
│
|
|
38
|
+
▼
|
|
39
|
+
┌──────────────────────────────────────────────────┐
|
|
40
|
+
│ shiftgate CLI │
|
|
41
|
+
│ shiftgate route / shiftgate run │
|
|
42
|
+
└────────────────────┬─────────────────────────────┘
|
|
43
|
+
│
|
|
44
|
+
▼
|
|
45
|
+
┌──────────────────────────────────────────────────┐
|
|
46
|
+
│ Router │
|
|
47
|
+
│ │
|
|
48
|
+
│ 1. Embed query (fastembed BAAI/bge-small-en) │
|
|
49
|
+
│ 2. Cosine similarity vs task centroids │
|
|
50
|
+
│ 3. top-K tasks → walk preferred_adapters list │
|
|
51
|
+
│ 4. Return RoutingTrace │
|
|
52
|
+
└──────────┬───────────────────────┬───────────────┘
|
|
53
|
+
│ │
|
|
54
|
+
▼ ▼
|
|
55
|
+
┌─────────────────┐ ┌────────────────────────────┐
|
|
56
|
+
│ Task Registry │ │ Adapter Registry │
|
|
57
|
+
│ ~/.shiftgate/ │ │ ~/.shiftgate/adapters.json │
|
|
58
|
+
│ tasks.json │ │ │
|
|
59
|
+
│ (10 defaults) │ │ Add via: │
|
|
60
|
+
└─────────────────┘ │ shiftgate adapter add │
|
|
61
|
+
└────────────┬───────────────┘
|
|
62
|
+
│
|
|
63
|
+
▼
|
|
64
|
+
┌────────────────────────────────┐
|
|
65
|
+
│ BackendRouter │
|
|
66
|
+
│ │
|
|
67
|
+
│ Ollama (localhost:11434) │
|
|
68
|
+
│ vLLM (localhost:8000) │
|
|
69
|
+
│ Auto-detected at runtime │
|
|
70
|
+
└────────────────────────────────┘
|
|
71
|
+
│
|
|
72
|
+
▼
|
|
73
|
+
┌────────────────────────────────┐
|
|
74
|
+
│ Feedback Loop │
|
|
75
|
+
│ ~/.shiftgate/traces.jsonl │
|
|
76
|
+
│ shiftgate feedback accept │
|
|
77
|
+
│ shiftgate feedback stats │
|
|
78
|
+
└────────────────────────────────┘
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Commands
|
|
84
|
+
|
|
85
|
+
| Command | Description |
|
|
86
|
+
|---|---|
|
|
87
|
+
| `shiftgate init` | First-time setup: copy defaults to `~/.shiftgate/`, compute embeddings |
|
|
88
|
+
| `shiftgate route "<query>"` | Route a query and show the decision — no inference |
|
|
89
|
+
| `shiftgate run "<query>"` | Route + run via Ollama or vLLM |
|
|
90
|
+
| `shiftgate adapter add <hf_repo>` | Register a new LoRA adapter |
|
|
91
|
+
| `shiftgate adapter list` | Table of all registered adapters |
|
|
92
|
+
| `shiftgate adapter remove <id>` | Remove an adapter |
|
|
93
|
+
| `shiftgate task list` | Table of all task clusters |
|
|
94
|
+
| `shiftgate task add` | Interactively add a new task cluster |
|
|
95
|
+
| `shiftgate feedback accept` | Mark last routing as good |
|
|
96
|
+
| `shiftgate feedback reject` | Mark last routing as bad |
|
|
97
|
+
| `shiftgate feedback stats` | Adapter acceptance rate table |
|
|
98
|
+
| `shiftgate status` | Backend connectivity + registry summary |
|
|
99
|
+
| `shiftgate demo` | Animated demo with fake routing traces |
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Using with Ollama
|
|
104
|
+
|
|
105
|
+
Ollama supports LoRA adapters via custom Modelfiles. Create one per adapter:
|
|
106
|
+
|
|
107
|
+
```dockerfile
|
|
108
|
+
# my-lora.Modelfile
|
|
109
|
+
FROM llama3
|
|
110
|
+
ADAPTER /path/to/my-adapter.safetensors
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
ollama create my-lora-model -f my-lora.Modelfile
|
|
115
|
+
ollama serve
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Register the adapter in shiftgate using the same ID:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
shiftgate adapter add my-org/my-lora --base meta-llama/Meta-Llama-3-8B
|
|
122
|
+
# The adapter id defaults to the repo slug: "my-lora"
|
|
123
|
+
# shiftgate will pass model="my-lora" to Ollama → activates the Modelfile
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Using with vLLM
|
|
127
|
+
|
|
128
|
+
vLLM loads LoRA adapters at startup via `--lora-modules`:
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
132
|
+
--model meta-llama/Meta-Llama-3-8B \
|
|
133
|
+
--enable-lora \
|
|
134
|
+
--lora-modules my-lora=/path/to/adapter
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
shiftgate sends `"model": "<adapter_id>"` in each `/v1/chat/completions` request, which vLLM maps to the named LoRA module.
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## How to contribute adapters
|
|
142
|
+
|
|
143
|
+
1. Fork this repo.
|
|
144
|
+
2. Add an entry to `data/default_adapters.json` (optional — the registry ships empty by design; adapters are user-managed).
|
|
145
|
+
3. Or, better: publish your adapter to HuggingFace and open a PR that documents it in the README's "Community Adapters" section.
|
|
146
|
+
|
|
147
|
+
To add a task cluster that better matches your domain, edit `data/default_tasks.json` and add `validation_examples` that represent real queries your users ask. Run `shiftgate init` to recompute centroids.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## `~/.shiftgate/` layout
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
~/.shiftgate/
|
|
155
|
+
├── adapters.json # your registered adapters
|
|
156
|
+
├── tasks.json # task clusters (copied from defaults on first init)
|
|
157
|
+
├── traces.jsonl # append-only routing trace log
|
|
158
|
+
└── embeddings_cache.npy # cached centroids — delete to force re-embedding
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Roadmap
|
|
164
|
+
|
|
165
|
+
| Version | Focus |
|
|
166
|
+
|---|---|
|
|
167
|
+
| **v0.1** | Single base model, multi-adapter routing ← _current_ |
|
|
168
|
+
| v0.2 | Feedback loop + adapter scoring (auto-demote bad adapters) |
|
|
169
|
+
| v0.3 | Multi-model routing (route to different base models per task) |
|
|
170
|
+
| v1.0 | Community registry + web UI |
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Development
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
# Clone and install in editable mode with all dev dependencies
|
|
178
|
+
git clone https://github.com/shiftgate-ai/shiftgate
|
|
179
|
+
cd shiftgate
|
|
180
|
+
uv sync --extra dev # creates .venv, installs shiftgate + dev deps
|
|
181
|
+
|
|
182
|
+
# Run tests (no GPU needed — tests use synthetic embeddings)
|
|
183
|
+
uv run pytest
|
|
184
|
+
|
|
185
|
+
# Run the demo inside the venv
|
|
186
|
+
uv run shiftgate demo
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
> **Note:** `uv sync` reads `pyproject.toml` and resolves a locked environment.
|
|
190
|
+
> There is no need to run `pip install` manually. Activate the venv with
|
|
191
|
+
> `.venv/Scripts/activate` (Windows) or `source .venv/bin/activate` (macOS/Linux)
|
|
192
|
+
> if you want the `shiftgate` command on your `PATH` without the `uv run` prefix.
|
|
193
|
+
|
|
194
|
+
## Releases and Publishing
|
|
195
|
+
|
|
196
|
+
Releases are managed through a CI release workflow (e.g. GitHub Actions).
|
|
197
|
+
**No manual PyPI API token management is required for normal releases.**
|
|
198
|
+
|
|
199
|
+
The recommended flow:
|
|
200
|
+
|
|
201
|
+
1. Bump the version in `pyproject.toml` (`version = "x.y.z"`).
|
|
202
|
+
2. Open a PR, get it reviewed and merged.
|
|
203
|
+
3. Tag the commit: `git tag vx.y.z && git push origin vx.y.z`.
|
|
204
|
+
4. The CI workflow builds the wheel with `uv build` and publishes to PyPI
|
|
205
|
+
using [Trusted Publishing (OIDC)](https://docs.pypi.org/trusted-publishers/)
|
|
206
|
+
— no stored API token needed.
|
|
207
|
+
|
|
208
|
+
For a one-off manual publish (maintainers only):
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
uv build # produces dist/shiftgate-x.y.z-py3-none-any.whl
|
|
212
|
+
uv publish # authenticates via OIDC or a scoped PyPI token
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Project layout
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
shiftgate/
|
|
219
|
+
├── cli.py # Typer CLI — all user commands
|
|
220
|
+
├── registry/
|
|
221
|
+
│ ├── schemas.py # Pydantic models: AdapterEntry, TaskCluster, RoutingTrace
|
|
222
|
+
│ ├── adapter_registry.py
|
|
223
|
+
│ └── task_registry.py
|
|
224
|
+
├── router/
|
|
225
|
+
│ ├── embedder.py # fastembed wrapper (CPU, singleton)
|
|
226
|
+
│ ├── matcher.py # cosine similarity, top-K, adapter selection
|
|
227
|
+
│ └── router.py # orchestrates embed → match → trace
|
|
228
|
+
├── runtime/
|
|
229
|
+
│ └── backend.py # OllamaBackend, VLLMBackend, BackendRouter
|
|
230
|
+
├── feedback/
|
|
231
|
+
│ └── loop.py # trace persistence, accept/reject, scoring
|
|
232
|
+
└── utils/
|
|
233
|
+
└── display.py # Rich panels, tables, animations
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## License
|
|
239
|
+
|
|
240
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
[]
|