shiftgate 0.1.7__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {shiftgate-0.1.7 → shiftgate-0.1.8}/PKG-INFO +96 -52
- {shiftgate-0.1.7 → shiftgate-0.1.8}/README.md +95 -51
- {shiftgate-0.1.7 → shiftgate-0.1.8}/pyproject.toml +1 -1
- {shiftgate-0.1.7 → shiftgate-0.1.8}/.gitignore +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/__init__.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/cli.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/data/__init__.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/data/default_tasks.json +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/feedback/__init__.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/feedback/loop.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/registry/__init__.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/registry/adapter_registry.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/registry/schemas.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/registry/task_registry.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/router/__init__.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/router/embedder.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/router/matcher.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/router/router.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/runtime/__init__.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/runtime/backend.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/utils/__init__.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/shiftgate/utils/display.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/tests/__init__.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/tests/test_backend.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/tests/test_feedback.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/tests/test_packaging.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/tests/test_registry.py +0 -0
- {shiftgate-0.1.7 → shiftgate-0.1.8}/tests/test_router.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: shiftgate
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop.
|
|
5
5
|
Project-URL: Homepage, https://github.com/shiftgate-ai/shiftgate
|
|
6
6
|
Project-URL: Repository, https://github.com/shiftgate-ai/shiftgate
|
|
@@ -39,36 +39,74 @@ Description-Content-Type: text/markdown
|
|
|
39
39
|
<img src="assets/demo.gif" alt="shiftgate routing a query to the right LoRA adapter" width="720">
|
|
40
40
|
</p>
|
|
41
41
|
|
|
42
|
-
**
|
|
43
|
-
shiftgate stores only adapter *metadata* — it never downloads, caches, or manages weights.
|
|
44
|
-
Your inference backend (Ollama, vLLM) is responsible for loading the weights; shiftgate tells it *which* adapter to use for each query.
|
|
42
|
+
> **shiftgate does not manage weights.** It stores adapter *metadata* only — no downloading, caching, or loading LoRA files. You start **Ollama** or **vLLM** with your models and adapters loaded; shiftgate embeds each query, picks the best task cluster, and tells the backend which adapter to use.
|
|
45
43
|
|
|
46
|
-
|
|
44
|
+
> **`shiftgate run` requires a running inference backend.** Routing-only commands (`shiftgate route`, `shiftgate init`) work without one. To generate text, Ollama (`localhost:11434`) or vLLM (`localhost:8000`) must already be running with your adapters loaded.
|
|
45
|
+
|
|
46
|
+
Instead of hardcoding which adapter to use, shiftgate matches your query against a catalog of task clusters using cosine similarity — then routes to the best-fit LoRA adapter on that backend.
|
|
47
47
|
|
|
48
48
|
---
|
|
49
49
|
|
|
50
50
|
## Quickstart
|
|
51
51
|
|
|
52
|
-
Requires **Python 3.10
|
|
52
|
+
Requires **Python 3.10+** and a running **Ollama** or **vLLM** instance for inference.
|
|
53
|
+
|
|
54
|
+
### 1. Install
|
|
53
55
|
|
|
54
56
|
```bash
|
|
55
|
-
# Install
|
|
56
57
|
uv tool install shiftgate
|
|
57
58
|
# or: pip install shiftgate
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### 2. Start your backend
|
|
62
|
+
|
|
63
|
+
**vLLM** (example — load adapters with `--lora-modules`):
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
67
|
+
--model meta-llama/Meta-Llama-3-8B \
|
|
68
|
+
--enable-lora \
|
|
69
|
+
--lora-modules python-lora=/path/to/python-lora
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**Ollama** (example — create a model that bundles base + adapter, then serve):
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
ollama create python-lora-ollama -f my-python-lora.Modelfile
|
|
76
|
+
ollama serve
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 3. Initialise shiftgate
|
|
58
80
|
|
|
59
|
-
|
|
81
|
+
Creates `~/.shiftgate/` and computes task embeddings (one-time model download for routing):
|
|
82
|
+
|
|
83
|
+
```bash
|
|
60
84
|
shiftgate init
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 4. Register adapters
|
|
61
88
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
89
|
+
Pick the option that matches your setup (see [Bring Your Own Models](#bring-your-own-models) for details):
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
# Option 1 — adapter already loaded in vLLM
|
|
93
|
+
shiftgate adapter add python-lora --runtime python-lora --tags python --base meta-llama/Meta-Llama-3-8B
|
|
66
94
|
|
|
67
|
-
#
|
|
68
|
-
shiftgate
|
|
95
|
+
# Option 2 — adapter already loaded in Ollama
|
|
96
|
+
shiftgate adapter add python-lora --runtime python-lora-ollama --tags python --base llama3
|
|
69
97
|
|
|
70
|
-
#
|
|
71
|
-
shiftgate
|
|
98
|
+
# Option 3 — metadata-only (catalogue a HuggingFace repo; no weights downloaded)
|
|
99
|
+
shiftgate adapter add teknium/python-lora --tags python --base llama3
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### 5. Run a query
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
# Route only — shows the decision, no inference
|
|
106
|
+
shiftgate route "write a python sorting function"
|
|
107
|
+
|
|
108
|
+
# Route + run through your backend
|
|
109
|
+
shiftgate run "write a python sorting function"
|
|
72
110
|
```
|
|
73
111
|
|
|
74
112
|
**Essential commands:** `init` · `adapter add` · `route` · `run` · `doctor`
|
|
@@ -84,12 +122,12 @@ shiftgate run "write a python sorting function"
|
|
|
84
122
|
```
|
|
85
123
|
╭────────────────────────── Routing Decision ──────────────────────────╮
|
|
86
124
|
│ Query "write a python sorting function" │
|
|
87
|
-
│ Matched Task Python Code Generation ████████████████░░ 91.2%
|
|
88
|
-
│ Adapter python-lora
|
|
89
|
-
│ Backend
|
|
125
|
+
│ Matched Task Python Code Generation ████████████████░░ 91.2% │
|
|
126
|
+
│ Adapter python-lora [meta-llama/Meta-Llama-3-8B] │
|
|
127
|
+
│ Backend vllm │
|
|
90
128
|
╰──────────────────────────────────────────────────────────────────────╯
|
|
91
129
|
|
|
92
|
-
Running via
|
|
130
|
+
Running via vllm…
|
|
93
131
|
|
|
94
132
|
────────────────────────────────── Response ──────────────────────────────────
|
|
95
133
|
def sort_array(arr):
|
|
@@ -121,16 +159,16 @@ shiftgate doctor
|
|
|
121
159
|
| **Adapter runtime availability** | For each registered adapter: linked status and whether it is **loaded** in the backend |
|
|
122
160
|
| **Unlinked task clusters** | Task clusters with no adapter wired — routing will match the task but cannot run inference |
|
|
123
161
|
|
|
124
|
-
**Runtime adapter verification**
|
|
162
|
+
**Runtime adapter verification** runs automatically when you register a backend-loaded adapter:
|
|
125
163
|
|
|
126
164
|
```bash
|
|
127
|
-
shiftgate adapter add
|
|
165
|
+
shiftgate adapter add python-lora --runtime python-lora --tags python --base llama3
|
|
128
166
|
# Backend: vllm ✓ verified ← adapter found in the running backend
|
|
129
|
-
# Backend: vllm ⚠ runtime '
|
|
167
|
+
# Backend: vllm ⚠ runtime 'python-lora' not loaded — did you pass --lora-modules?
|
|
130
168
|
# Backend: not running (verification skipped)
|
|
131
169
|
```
|
|
132
170
|
|
|
133
|
-
**Backend detection** is automatic
|
|
171
|
+
**Backend detection** is automatic. `shiftgate run`, `shiftgate status`, and `shiftgate doctor` probe Ollama first, then vLLM. No config file required.
|
|
134
172
|
|
|
135
173
|
---
|
|
136
174
|
|
|
@@ -157,8 +195,8 @@ User query
|
|
|
157
195
|
│ │
|
|
158
196
|
▼ ▼
|
|
159
197
|
┌─────────────────┐ ┌────────────────────────────┐
|
|
160
|
-
│ Task Registry │ │ Adapter Registry
|
|
161
|
-
│ ~/.shiftgate/ │ │ ~/.shiftgate/adapters.json
|
|
198
|
+
│ Task Registry │ │ Adapter Registry │
|
|
199
|
+
│ ~/.shiftgate/ │ │ ~/.shiftgate/adapters.json│
|
|
162
200
|
│ tasks.json │ │ │
|
|
163
201
|
│ (10 defaults) │ │ Add via: │
|
|
164
202
|
└─────────────────┘ │ shiftgate adapter add │
|
|
@@ -168,8 +206,8 @@ User query
|
|
|
168
206
|
┌────────────────────────────────┐
|
|
169
207
|
│ BackendRouter │
|
|
170
208
|
│ │
|
|
171
|
-
│ Ollama (localhost:11434)
|
|
172
|
-
│ vLLM (localhost:8000)
|
|
209
|
+
│ Ollama (localhost:11434) │
|
|
210
|
+
│ vLLM (localhost:8000) │
|
|
173
211
|
│ Auto-detected at runtime │
|
|
174
212
|
└────────────────────────────────┘
|
|
175
213
|
│
|
|
@@ -186,10 +224,30 @@ User query
|
|
|
186
224
|
|
|
187
225
|
## Bring Your Own Models
|
|
188
226
|
|
|
189
|
-
|
|
190
|
-
|
|
227
|
+
shiftgate is a routing layer. **You load weights into Ollama or vLLM first**, then register what you loaded so shiftgate can route to it.
|
|
228
|
+
|
|
229
|
+
You can also catalogue adapters you have not loaded yet (Option 3) — useful for `shiftgate route`, but `shiftgate run` will not produce output until the adapter is available in a running backend.
|
|
191
230
|
|
|
192
|
-
###
|
|
231
|
+
### Option 1 — Adapter already loaded in vLLM
|
|
232
|
+
|
|
233
|
+
Start vLLM with your adapters:
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
237
|
+
--model meta-llama/Meta-Llama-3-8B \
|
|
238
|
+
--enable-lora \
|
|
239
|
+
--lora-modules sql-lora=/path/to/sql-lora
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Register using the `--lora-modules` key as `--runtime`:
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
shiftgate adapter add sql-lora --runtime sql-lora --tags sql --base meta-llama/Meta-Llama-3-8B
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
shiftgate sends `"model": "<runtime_name>"` in each `/v1/chat/completions` request.
|
|
249
|
+
|
|
250
|
+
### Option 2 — Adapter already loaded in Ollama
|
|
193
251
|
|
|
194
252
|
Create a Modelfile that bundles your base model and adapter:
|
|
195
253
|
|
|
@@ -204,43 +262,29 @@ ollama create sql-lora-ollama -f my-sql-lora.Modelfile
|
|
|
204
262
|
ollama serve
|
|
205
263
|
```
|
|
206
264
|
|
|
207
|
-
Register
|
|
265
|
+
Register using the Ollama model name as `--runtime`:
|
|
208
266
|
|
|
209
267
|
```bash
|
|
210
|
-
# Mode C — backend already has the adapter loaded
|
|
211
268
|
shiftgate adapter add sql-lora --runtime sql-lora-ollama --tags sql --base llama3
|
|
212
269
|
```
|
|
213
270
|
|
|
214
271
|
shiftgate passes `runtime_name` (or falls back to `id`) as the Ollama model name.
|
|
215
272
|
|
|
216
|
-
###
|
|
217
|
-
|
|
218
|
-
Load adapters at server start with `--lora-modules`:
|
|
219
|
-
|
|
220
|
-
```bash
|
|
221
|
-
python -m vllm.entrypoints.openai.api_server \
|
|
222
|
-
--model meta-llama/Meta-Llama-3-8B \
|
|
223
|
-
--enable-lora \
|
|
224
|
-
--lora-modules sql-lora=/path/to/sql-lora
|
|
225
|
-
```
|
|
273
|
+
### Option 3 — Metadata-only registration
|
|
226
274
|
|
|
227
|
-
|
|
275
|
+
Catalogue an adapter without downloading weights — metadata only:
|
|
228
276
|
|
|
229
277
|
```bash
|
|
230
|
-
|
|
231
|
-
shiftgate adapter add sql-lora --runtime sql-lora --tags sql --base meta-llama/Meta-Llama-3-8B
|
|
278
|
+
shiftgate adapter add teknium/sql-lora --tags sql --base llama3
|
|
232
279
|
```
|
|
233
280
|
|
|
234
|
-
shiftgate
|
|
235
|
-
|
|
236
|
-
### Registering a HuggingFace adapter (Mode A)
|
|
281
|
+
You can also record a local path for your own reference (shiftgate still does not load the file):
|
|
237
282
|
|
|
238
283
|
```bash
|
|
239
|
-
|
|
240
|
-
shiftgate adapter add teknium/sql-lora --tags sql --base llama3
|
|
284
|
+
shiftgate adapter add sql-lora --local /models/sql-lora --tags sql --base llama3
|
|
241
285
|
```
|
|
242
286
|
|
|
243
|
-
|
|
287
|
+
Useful for exploring routing decisions before your backend is set up. To run inference, load the adapter in vLLM or Ollama and re-register with `--runtime`.
|
|
244
288
|
|
|
245
289
|
---
|
|
246
290
|
|
|
@@ -6,36 +6,74 @@
|
|
|
6
6
|
<img src="assets/demo.gif" alt="shiftgate routing a query to the right LoRA adapter" width="720">
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
**
|
|
10
|
-
shiftgate stores only adapter *metadata* — it never downloads, caches, or manages weights.
|
|
11
|
-
Your inference backend (Ollama, vLLM) is responsible for loading the weights; shiftgate tells it *which* adapter to use for each query.
|
|
9
|
+
> **shiftgate does not manage weights.** It stores adapter *metadata* only — no downloading, caching, or loading LoRA files. You start **Ollama** or **vLLM** with your models and adapters loaded; shiftgate embeds each query, picks the best task cluster, and tells the backend which adapter to use.
|
|
12
10
|
|
|
13
|
-
|
|
11
|
+
> **`shiftgate run` requires a running inference backend.** Routing-only commands (`shiftgate route`, `shiftgate init`) work without one. To generate text, Ollama (`localhost:11434`) or vLLM (`localhost:8000`) must already be running with your adapters loaded.
|
|
12
|
+
|
|
13
|
+
Instead of hardcoding which adapter to use, shiftgate matches your query against a catalog of task clusters using cosine similarity — then routes to the best-fit LoRA adapter on that backend.
|
|
14
14
|
|
|
15
15
|
---
|
|
16
16
|
|
|
17
17
|
## Quickstart
|
|
18
18
|
|
|
19
|
-
Requires **Python 3.10
|
|
19
|
+
Requires **Python 3.10+** and a running **Ollama** or **vLLM** instance for inference.
|
|
20
|
+
|
|
21
|
+
### 1. Install
|
|
20
22
|
|
|
21
23
|
```bash
|
|
22
|
-
# Install
|
|
23
24
|
uv tool install shiftgate
|
|
24
25
|
# or: pip install shiftgate
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### 2. Start your backend
|
|
29
|
+
|
|
30
|
+
**vLLM** (example — load adapters with `--lora-modules`):
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
34
|
+
--model meta-llama/Meta-Llama-3-8B \
|
|
35
|
+
--enable-lora \
|
|
36
|
+
--lora-modules python-lora=/path/to/python-lora
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Ollama** (example — create a model that bundles base + adapter, then serve):
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
ollama create python-lora-ollama -f my-python-lora.Modelfile
|
|
43
|
+
ollama serve
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### 3. Initialise shiftgate
|
|
25
47
|
|
|
26
|
-
|
|
48
|
+
Creates `~/.shiftgate/` and computes task embeddings (one-time model download for routing):
|
|
49
|
+
|
|
50
|
+
```bash
|
|
27
51
|
shiftgate init
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### 4. Register adapters
|
|
28
55
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
56
|
+
Pick the option that matches your setup (see [Bring Your Own Models](#bring-your-own-models) for details):
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# Option 1 — adapter already loaded in vLLM
|
|
60
|
+
shiftgate adapter add python-lora --runtime python-lora --tags python --base meta-llama/Meta-Llama-3-8B
|
|
33
61
|
|
|
34
|
-
#
|
|
35
|
-
shiftgate
|
|
62
|
+
# Option 2 — adapter already loaded in Ollama
|
|
63
|
+
shiftgate adapter add python-lora --runtime python-lora-ollama --tags python --base llama3
|
|
36
64
|
|
|
37
|
-
#
|
|
38
|
-
shiftgate
|
|
65
|
+
# Option 3 — metadata-only (catalogue a HuggingFace repo; no weights downloaded)
|
|
66
|
+
shiftgate adapter add teknium/python-lora --tags python --base llama3
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### 5. Run a query
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
# Route only — shows the decision, no inference
|
|
73
|
+
shiftgate route "write a python sorting function"
|
|
74
|
+
|
|
75
|
+
# Route + run through your backend
|
|
76
|
+
shiftgate run "write a python sorting function"
|
|
39
77
|
```
|
|
40
78
|
|
|
41
79
|
**Essential commands:** `init` · `adapter add` · `route` · `run` · `doctor`
|
|
@@ -51,12 +89,12 @@ shiftgate run "write a python sorting function"
|
|
|
51
89
|
```
|
|
52
90
|
╭────────────────────────── Routing Decision ──────────────────────────╮
|
|
53
91
|
│ Query "write a python sorting function" │
|
|
54
|
-
│ Matched Task Python Code Generation ████████████████░░ 91.2%
|
|
55
|
-
│ Adapter python-lora
|
|
56
|
-
│ Backend
|
|
92
|
+
│ Matched Task Python Code Generation ████████████████░░ 91.2% │
|
|
93
|
+
│ Adapter python-lora [meta-llama/Meta-Llama-3-8B] │
|
|
94
|
+
│ Backend vllm │
|
|
57
95
|
╰──────────────────────────────────────────────────────────────────────╯
|
|
58
96
|
|
|
59
|
-
Running via
|
|
97
|
+
Running via vllm…
|
|
60
98
|
|
|
61
99
|
────────────────────────────────── Response ──────────────────────────────────
|
|
62
100
|
def sort_array(arr):
|
|
@@ -88,16 +126,16 @@ shiftgate doctor
|
|
|
88
126
|
| **Adapter runtime availability** | For each registered adapter: linked status and whether it is **loaded** in the backend |
|
|
89
127
|
| **Unlinked task clusters** | Task clusters with no adapter wired — routing will match the task but cannot run inference |
|
|
90
128
|
|
|
91
|
-
**Runtime adapter verification**
|
|
129
|
+
**Runtime adapter verification** runs automatically when you register a backend-loaded adapter:
|
|
92
130
|
|
|
93
131
|
```bash
|
|
94
|
-
shiftgate adapter add
|
|
132
|
+
shiftgate adapter add python-lora --runtime python-lora --tags python --base llama3
|
|
95
133
|
# Backend: vllm ✓ verified ← adapter found in the running backend
|
|
96
|
-
# Backend: vllm ⚠ runtime '
|
|
134
|
+
# Backend: vllm ⚠ runtime 'python-lora' not loaded — did you pass --lora-modules?
|
|
97
135
|
# Backend: not running (verification skipped)
|
|
98
136
|
```
|
|
99
137
|
|
|
100
|
-
**Backend detection** is automatic
|
|
138
|
+
**Backend detection** is automatic. `shiftgate run`, `shiftgate status`, and `shiftgate doctor` probe Ollama first, then vLLM. No config file required.
|
|
101
139
|
|
|
102
140
|
---
|
|
103
141
|
|
|
@@ -124,8 +162,8 @@ User query
|
|
|
124
162
|
│ │
|
|
125
163
|
▼ ▼
|
|
126
164
|
┌─────────────────┐ ┌────────────────────────────┐
|
|
127
|
-
│ Task Registry │ │ Adapter Registry
|
|
128
|
-
│ ~/.shiftgate/ │ │ ~/.shiftgate/adapters.json
|
|
165
|
+
│ Task Registry │ │ Adapter Registry │
|
|
166
|
+
│ ~/.shiftgate/ │ │ ~/.shiftgate/adapters.json│
|
|
129
167
|
│ tasks.json │ │ │
|
|
130
168
|
│ (10 defaults) │ │ Add via: │
|
|
131
169
|
└─────────────────┘ │ shiftgate adapter add │
|
|
@@ -135,8 +173,8 @@ User query
|
|
|
135
173
|
┌────────────────────────────────┐
|
|
136
174
|
│ BackendRouter │
|
|
137
175
|
│ │
|
|
138
|
-
│ Ollama (localhost:11434)
|
|
139
|
-
│ vLLM (localhost:8000)
|
|
176
|
+
│ Ollama (localhost:11434) │
|
|
177
|
+
│ vLLM (localhost:8000) │
|
|
140
178
|
│ Auto-detected at runtime │
|
|
141
179
|
└────────────────────────────────┘
|
|
142
180
|
│
|
|
@@ -153,10 +191,30 @@ User query
|
|
|
153
191
|
|
|
154
192
|
## Bring Your Own Models
|
|
155
193
|
|
|
156
|
-
|
|
157
|
-
|
|
194
|
+
shiftgate is a routing layer. **You load weights into Ollama or vLLM first**, then register what you loaded so shiftgate can route to it.
|
|
195
|
+
|
|
196
|
+
You can also catalogue adapters you have not loaded yet (Option 3) — useful for `shiftgate route`, but `shiftgate run` will not produce output until the adapter is available in a running backend.
|
|
158
197
|
|
|
159
|
-
###
|
|
198
|
+
### Option 1 — Adapter already loaded in vLLM
|
|
199
|
+
|
|
200
|
+
Start vLLM with your adapters:
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
204
|
+
--model meta-llama/Meta-Llama-3-8B \
|
|
205
|
+
--enable-lora \
|
|
206
|
+
--lora-modules sql-lora=/path/to/sql-lora
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Register using the `--lora-modules` key as `--runtime`:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
shiftgate adapter add sql-lora --runtime sql-lora --tags sql --base meta-llama/Meta-Llama-3-8B
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
shiftgate sends `"model": "<runtime_name>"` in each `/v1/chat/completions` request.
|
|
216
|
+
|
|
217
|
+
### Option 2 — Adapter already loaded in Ollama
|
|
160
218
|
|
|
161
219
|
Create a Modelfile that bundles your base model and adapter:
|
|
162
220
|
|
|
@@ -171,43 +229,29 @@ ollama create sql-lora-ollama -f my-sql-lora.Modelfile
|
|
|
171
229
|
ollama serve
|
|
172
230
|
```
|
|
173
231
|
|
|
174
|
-
Register
|
|
232
|
+
Register using the Ollama model name as `--runtime`:
|
|
175
233
|
|
|
176
234
|
```bash
|
|
177
|
-
# Mode C — backend already has the adapter loaded
|
|
178
235
|
shiftgate adapter add sql-lora --runtime sql-lora-ollama --tags sql --base llama3
|
|
179
236
|
```
|
|
180
237
|
|
|
181
238
|
shiftgate passes `runtime_name` (or falls back to `id`) as the Ollama model name.
|
|
182
239
|
|
|
183
|
-
###
|
|
184
|
-
|
|
185
|
-
Load adapters at server start with `--lora-modules`:
|
|
186
|
-
|
|
187
|
-
```bash
|
|
188
|
-
python -m vllm.entrypoints.openai.api_server \
|
|
189
|
-
--model meta-llama/Meta-Llama-3-8B \
|
|
190
|
-
--enable-lora \
|
|
191
|
-
--lora-modules sql-lora=/path/to/sql-lora
|
|
192
|
-
```
|
|
240
|
+
### Option 3 — Metadata-only registration
|
|
193
241
|
|
|
194
|
-
|
|
242
|
+
Catalogue an adapter without downloading weights — metadata only:
|
|
195
243
|
|
|
196
244
|
```bash
|
|
197
|
-
|
|
198
|
-
shiftgate adapter add sql-lora --runtime sql-lora --tags sql --base meta-llama/Meta-Llama-3-8B
|
|
245
|
+
shiftgate adapter add teknium/sql-lora --tags sql --base llama3
|
|
199
246
|
```
|
|
200
247
|
|
|
201
|
-
shiftgate
|
|
202
|
-
|
|
203
|
-
### Registering a HuggingFace adapter (Mode A)
|
|
248
|
+
You can also record a local path for your own reference (shiftgate still does not load the file):
|
|
204
249
|
|
|
205
250
|
```bash
|
|
206
|
-
|
|
207
|
-
shiftgate adapter add teknium/sql-lora --tags sql --base llama3
|
|
251
|
+
shiftgate adapter add sql-lora --local /models/sql-lora --tags sql --base llama3
|
|
208
252
|
```
|
|
209
253
|
|
|
210
|
-
|
|
254
|
+
Useful for exploring routing decisions before your backend is set up. To run inference, load the adapter in vLLM or Ollama and re-register with `--runtime`.
|
|
211
255
|
|
|
212
256
|
---
|
|
213
257
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "shiftgate"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.8"
|
|
8
8
|
description = "Intelligent routing layer that automatically selects the right LoRA adapter for each task in your local agent loop."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|