shift-sdk 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shift_sdk-0.3.2/PKG-INFO +376 -0
- shift_sdk-0.3.2/README.md +344 -0
- shift_sdk-0.3.2/pyproject.toml +60 -0
- shift_sdk-0.3.2/setup.cfg +4 -0
- shift_sdk-0.3.2/src/shift_sdk.egg-info/PKG-INFO +376 -0
- shift_sdk-0.3.2/src/shift_sdk.egg-info/SOURCES.txt +25 -0
- shift_sdk-0.3.2/src/shift_sdk.egg-info/dependency_links.txt +1 -0
- shift_sdk-0.3.2/src/shift_sdk.egg-info/requires.txt +17 -0
- shift_sdk-0.3.2/src/shift_sdk.egg-info/top_level.txt +1 -0
- shift_sdk-0.3.2/src/switch_sdk/__init__.py +96 -0
- shift_sdk-0.3.2/src/switch_sdk/auto_model.py +187 -0
- shift_sdk-0.3.2/src/switch_sdk/byok.py +209 -0
- shift_sdk-0.3.2/src/switch_sdk/client.py +872 -0
- shift_sdk-0.3.2/src/switch_sdk/context.py +51 -0
- shift_sdk-0.3.2/src/switch_sdk/errors.py +41 -0
- shift_sdk-0.3.2/src/switch_sdk/executorch_runtime.py +376 -0
- shift_sdk-0.3.2/src/switch_sdk/impact.py +134 -0
- shift_sdk-0.3.2/src/switch_sdk/local_models.py +359 -0
- shift_sdk-0.3.2/src/switch_sdk/local_runtime.py +34 -0
- shift_sdk-0.3.2/src/switch_sdk/models.py +346 -0
- shift_sdk-0.3.2/src/switch_sdk/py.typed +1 -0
- shift_sdk-0.3.2/src/switch_sdk/telemetry.py +98 -0
- shift_sdk-0.3.2/tests/test_client.py +430 -0
- shift_sdk-0.3.2/tests/test_executorch_runtime.py +69 -0
- shift_sdk-0.3.2/tests/test_local_models.py +77 -0
- shift_sdk-0.3.2/tests/test_region_switching_live.py +110 -0
- shift_sdk-0.3.2/tests/test_telemetry.py +56 -0
shift_sdk-0.3.2/PKG-INFO
ADDED
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: shift-sdk
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: Python SDK for Shift managed AI routing, telemetry, and local-first execution
|
|
5
|
+
Author: Shift
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Keywords: ai,llm,routing,telemetry,sustainability,executorch
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: httpx>=0.27.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
24
|
+
Requires-Dist: respx>=0.21.0; extra == "dev"
|
|
25
|
+
Provides-Extra: local
|
|
26
|
+
Requires-Dist: torch>=2.2.0; python_version < "3.14" and extra == "local"
|
|
27
|
+
Requires-Dist: transformers>=4.41.0; python_version < "3.14" and extra == "local"
|
|
28
|
+
Requires-Dist: optimum-executorch>=1.1.0; python_version < "3.14" and extra == "local"
|
|
29
|
+
Provides-Extra: publish
|
|
30
|
+
Requires-Dist: build>=1.2.2; extra == "publish"
|
|
31
|
+
Requires-Dist: twine>=5.1.1; extra == "publish"
|
|
32
|
+
|
|
33
|
+
# shift-sdk
|
|
34
|
+
|
|
35
|
+
Python SDK for the Shift (Switch gateway) managed API.
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install shift-sdk
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Import path remains:
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
from switch_sdk import SwitchClient
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
For local development:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install -e .[dev]
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
For local ExecuTorch runtime work:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install -e .[dev,local]
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
For packaging and publishing:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install -e .[publish]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Note: ExecuTorch wheels are not available for Python 3.14 yet. Use Python 3.10-3.13 (3.11 works well).
|
|
68
|
+
|
|
69
|
+
## Required values
|
|
70
|
+
|
|
71
|
+
- `base_url`: your gateway URL, for example `http://localhost:8000`
|
|
72
|
+
- `api_key`: your plain project key (for example `aura_...`), not the SHA256 hash
|
|
73
|
+
|
|
74
|
+
Environment shortcuts are supported:
|
|
75
|
+
|
|
76
|
+
- `SHIFT_BASE_URL` (fallback: `SWITCH_BASE_URL`)
|
|
77
|
+
- `SHIFT_API_KEY` (fallbacks: `SWITCH_API_KEY`, `API_KEY`)
|
|
78
|
+
|
|
79
|
+
## Quick start
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import asyncio
|
|
83
|
+
from switch_sdk import SwitchClient, ChatMessage
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
async def main() -> None:
|
|
87
|
+
async with SwitchClient.from_env() as client:
|
|
88
|
+
completion = await client.chat(
|
|
89
|
+
model="gpt-5",
|
|
90
|
+
messages=[ChatMessage(role="user", content="Reply with: SDK_OK")],
|
|
91
|
+
residency="US",
|
|
92
|
+
sla="realtime",
|
|
93
|
+
capability_flags={"force_cloud": True, "preferred_region": "eastus"},
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
print(completion.choices[0].message.content)
|
|
97
|
+
print(completion.switch_meta["route"]["target"]["region"])
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
asyncio.run(main())
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Set env vars first:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
export SHIFT_BASE_URL=http://localhost:8000
|
|
107
|
+
export SHIFT_API_KEY=aura_your_plain_project_key
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## BYOK mode (prompt privacy)
|
|
111
|
+
|
|
112
|
+
Use `chat_byok()` when you want Shift to do routing/observability while your
|
|
113
|
+
prompt is sent directly to Azure OpenAI with your own API key.
|
|
114
|
+
|
|
115
|
+
- Shift sees: route request + telemetry metadata.
|
|
116
|
+
- Shift does **not** see: prompt/messages payload.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
import asyncio
|
|
120
|
+
from switch_sdk import AzureBYOKConfig, AzureRegionCredential, ChatMessage, SwitchClient
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
async def main() -> None:
|
|
124
|
+
byok = AzureBYOKConfig(
|
|
125
|
+
api_version="2025-01-01-preview",
|
|
126
|
+
regions={
|
|
127
|
+
"eastus": AzureRegionCredential(
|
|
128
|
+
endpoint="https://shift-eastus.openai.azure.com",
|
|
129
|
+
api_key="AZURE_EASTUS_KEY",
|
|
130
|
+
),
|
|
131
|
+
"westus": AzureRegionCredential(
|
|
132
|
+
endpoint="https://shift-westus.openai.azure.com",
|
|
133
|
+
api_key="AZURE_WESTUS_KEY",
|
|
134
|
+
),
|
|
135
|
+
"centralus": AzureRegionCredential(
|
|
136
|
+
endpoint="https://shift-centralus.openai.azure.com",
|
|
137
|
+
api_key="AZURE_CENTRALUS_KEY",
|
|
138
|
+
),
|
|
139
|
+
},
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
async with SwitchClient.from_env(byok_azure=byok) as client:
|
|
143
|
+
completion = await client.chat_byok(
|
|
144
|
+
model="auto",
|
|
145
|
+
messages=[ChatMessage(role="user", content="Reply exactly: BYOK_OK")],
|
|
146
|
+
residency="US",
|
|
147
|
+
capability_flags={"auto_model": True},
|
|
148
|
+
)
|
|
149
|
+
print(completion.choices[0].message.content)
|
|
150
|
+
print(completion.switch_meta["route"]["target"]["region"])
|
|
151
|
+
print(completion.switch_meta["resolved_model"])
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
asyncio.run(main())
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Environment-based BYOK config is also supported:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
export SHIFT_BYOK_AZURE_EASTUS_ENDPOINT=https://shift-eastus.openai.azure.com
|
|
161
|
+
export SHIFT_BYOK_AZURE_EASTUS_API_KEY=...
|
|
162
|
+
export SHIFT_BYOK_AZURE_WESTUS_ENDPOINT=https://shift-westus.openai.azure.com
|
|
163
|
+
export SHIFT_BYOK_AZURE_WESTUS_API_KEY=...
|
|
164
|
+
export SHIFT_BYOK_AZURE_CENTRALUS_ENDPOINT=https://shift-centralus.openai.azure.com
|
|
165
|
+
export SHIFT_BYOK_AZURE_CENTRALUS_API_KEY=...
|
|
166
|
+
export SHIFT_BYOK_AZURE_API_VERSION=2025-01-01-preview
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
async with SwitchClient.from_env(load_byok_azure_from_env=True) as client:
|
|
171
|
+
completion = await client.chat_byok(
|
|
172
|
+
model="auto",
|
|
173
|
+
messages=[ChatMessage(role="user", content="Reply exactly: PRIVACY_OK")],
|
|
174
|
+
capability_flags={"auto_model": True},
|
|
175
|
+
)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Hybrid local-first mode (ExecuTorch-ready)
|
|
179
|
+
|
|
180
|
+
`chat_hybrid()` tries local execution first, then falls back to cloud when needed.
|
|
181
|
+
Local models are cached on disk and downloaded only once per model version.
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
import asyncio
|
|
185
|
+
from switch_sdk import ChatMessage, LocalModelManager, SwitchClient
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
manifest = [
|
|
189
|
+
{
|
|
190
|
+
"model_id": "smollm2-135m",
|
|
191
|
+
"task": "chat",
|
|
192
|
+
"download_url": "https://your-model-host/smollm2-135m.pte",
|
|
193
|
+
"sha256": "replace_with_sha256",
|
|
194
|
+
"size_mb": 550,
|
|
195
|
+
"min_ram_gb": 4,
|
|
196
|
+
"max_prompt_chars": 280,
|
|
197
|
+
"rank": 10,
|
|
198
|
+
},
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
async def main() -> None:
|
|
203
|
+
manager = LocalModelManager(cache_dir="~/.shift/models", manifest=manifest)
|
|
204
|
+
# Optional: real ExecuTorch adapter (requires deps below)
|
|
205
|
+
from switch_sdk import build_executorch_text_runtime
|
|
206
|
+
local_runtime = build_executorch_text_runtime(
|
|
207
|
+
tokenizer_source="HuggingFaceTB/SmolLM2-135M-Instruct",
|
|
208
|
+
max_new_tokens=96,
|
|
209
|
+
prefer_optimum=True,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
async with SwitchClient(
|
|
213
|
+
base_url="http://localhost:8000",
|
|
214
|
+
api_key="aura_your_plain_project_key",
|
|
215
|
+
local_model_manager=manager,
|
|
216
|
+
local_runtime=local_runtime,
|
|
217
|
+
) as client:
|
|
218
|
+
completion = await client.chat_hybrid(
|
|
219
|
+
model="auto",
|
|
220
|
+
messages=[ChatMessage(role="user", content="Reply exactly: LOCAL_OK")],
|
|
221
|
+
capability_flags={"auto_model": True},
|
|
222
|
+
)
|
|
223
|
+
print(completion.model)
|
|
224
|
+
print(completion.choices[0].message.content)
|
|
225
|
+
print(completion.switch_meta)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
asyncio.run(main())
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Notes:
|
|
232
|
+
- Default local runtime is a stub (for wiring/tests).
|
|
233
|
+
- `build_executorch_text_runtime(...)` provides a real adapter that prefers Optimum ExecuTorch and falls back to raw `executorch.runtime`.
|
|
234
|
+
- Cache path format: `~/.shift/models/<model_id>/<version>/model.pte`
|
|
235
|
+
- LRU eviction is applied when cache exceeds `max_cache_gb`.
|
|
236
|
+
|
|
237
|
+
Install local runtime dependencies:
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
pip install -e .[local]
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Ready-made demo manifest:
|
|
244
|
+
|
|
245
|
+
- `/Users/proguy/Documents/projects/switch/switch-sdk/examples/local_manifest_smollm2_135m.json`
|
|
246
|
+
|
|
247
|
+
Runtime callable contract:
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from switch_sdk import ChatMessage, LocalModelHandle
|
|
251
|
+
|
|
252
|
+
async def my_executorch_runtime(messages: list[ChatMessage], handle: LocalModelHandle) -> str:
|
|
253
|
+
# Load/use handle.path (.pte) with your ExecuTorch integration.
|
|
254
|
+
# Return assistant text.
|
|
255
|
+
return "LOCAL_EXECUTORCH_OK"
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Routing-only call
|
|
259
|
+
|
|
260
|
+
```python
|
|
261
|
+
decision = await client.route(
|
|
262
|
+
model="gpt-5",
|
|
263
|
+
residency="US",
|
|
264
|
+
sla="realtime",
|
|
265
|
+
capability_flags={"force_cloud": True},
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
print(decision.target.region)
|
|
269
|
+
print(decision.scores)
|
|
270
|
+
print(decision.candidate_breakdown)
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## Dashboard + carbon endpoints
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
summary = await client.get_dashboard_summary()
|
|
277
|
+
feed = await client.get_dashboard_feed(limit=20)
|
|
278
|
+
carbon = await client.get_live_carbon()
|
|
279
|
+
|
|
280
|
+
print(summary.summary.total_requests)
|
|
281
|
+
print(len(feed.items))
|
|
282
|
+
print(carbon.provider, carbon.regions.get("eastus"))
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
## Custom telemetry event
|
|
286
|
+
|
|
287
|
+
```python
|
|
288
|
+
from switch_sdk import TelemetryEvent
|
|
289
|
+
|
|
290
|
+
await client.track_event(
|
|
291
|
+
TelemetryEvent(
|
|
292
|
+
event_type="sdk_custom",
|
|
293
|
+
request_id="custom-123",
|
|
294
|
+
model="gpt-5",
|
|
295
|
+
metadata={"feature": "my_feature"},
|
|
296
|
+
)
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
await client.flush_telemetry()
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
## Error handling
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
from switch_sdk import SwitchAPIError, SwitchNetworkError, SwitchTimeoutError
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
await client.route(model="gpt-5")
|
|
309
|
+
except SwitchAPIError as exc:
|
|
310
|
+
print(exc.status_code, exc.detail)
|
|
311
|
+
except SwitchTimeoutError:
|
|
312
|
+
print("Request timed out")
|
|
313
|
+
except SwitchNetworkError as exc:
|
|
314
|
+
print(f"Network issue: {exc}")
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
## Notes
|
|
318
|
+
|
|
319
|
+
- The SDK is async-first.
|
|
320
|
+
- Use `async with SwitchClient(...)` so telemetry flushes cleanly on exit.
|
|
321
|
+
- Retries/backoff are built in for transient failures.
|
|
322
|
+
- Telemetry is best-effort and never blocks successful chat/route calls.
|
|
323
|
+
|
|
324
|
+
## Live switching checks
|
|
325
|
+
|
|
326
|
+
Automatic east/west region-switch verification script:
|
|
327
|
+
|
|
328
|
+
```bash
|
|
329
|
+
cd switch-sdk
|
|
330
|
+
.venv/bin/python examples/test_region_switching.py \
|
|
331
|
+
--base-url http://localhost:8000 \
|
|
332
|
+
--api-key aura_your_plain_project_key \
|
|
333
|
+
--east-region eastus \
|
|
334
|
+
--west-region westus \
|
|
335
|
+
--central-region centralus \
|
|
336
|
+
--check-chat
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
## From-env example script
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
export SHIFT_BASE_URL=http://localhost:8000
|
|
343
|
+
export SHIFT_API_KEY=aura_your_plain_project_key
|
|
344
|
+
python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_from_env.py
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
## Full user-journey script
|
|
348
|
+
|
|
349
|
+
```bash
|
|
350
|
+
python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_user_journey.py \
|
|
351
|
+
--base-url http://localhost:8000 \
|
|
352
|
+
--api-key aura_your_plain_project_key
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
## Local ExecuTorch sanity check
|
|
356
|
+
|
|
357
|
+
Force local execution and fail if local runtime does not work:
|
|
358
|
+
|
|
359
|
+
```bash
|
|
360
|
+
cd /Users/proguy/Documents/projects/switch/switch-sdk
|
|
361
|
+
.venv311/bin/python examples/test_hybrid_local.py \
|
|
362
|
+
--base-url http://localhost:8000 \
|
|
363
|
+
--api-key dummy_local_only \
|
|
364
|
+
--manifest-path examples/local_manifest_smollm2_135m.json \
|
|
365
|
+
--executorch \
|
|
366
|
+
--prefer-runtime \
|
|
367
|
+
--tokenizer-source HuggingFaceTB/SmolLM2-135M-Instruct \
|
|
368
|
+
--no-download \
|
|
369
|
+
--no-cloud-fallback
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
Expected: output JSON includes `"source": "sdk-local"` in `switch_meta`.
|
|
373
|
+
|
|
374
|
+
## Release
|
|
375
|
+
|
|
376
|
+
See `/Users/proguy/Documents/projects/switch/switch-sdk/RELEASING.md` for TestPyPI and PyPI release steps.
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
# shift-sdk
|
|
2
|
+
|
|
3
|
+
Python SDK for the Shift (Switch gateway) managed API.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install shift-sdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Import path remains:
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from switch_sdk import SwitchClient
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
For local development:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install -e .[dev]
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
For local ExecuTorch runtime work:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install -e .[dev,local]
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
For packaging and publishing:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install -e .[publish]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Note: ExecuTorch wheels are not available for Python 3.14 yet. Use Python 3.10-3.13 (3.11 works well).
|
|
36
|
+
|
|
37
|
+
## Required values
|
|
38
|
+
|
|
39
|
+
- `base_url`: your gateway URL, for example `http://localhost:8000`
|
|
40
|
+
- `api_key`: your plain project key (for example `aura_...`), not the SHA256 hash
|
|
41
|
+
|
|
42
|
+
Environment shortcuts are supported:
|
|
43
|
+
|
|
44
|
+
- `SHIFT_BASE_URL` (fallback: `SWITCH_BASE_URL`)
|
|
45
|
+
- `SHIFT_API_KEY` (fallbacks: `SWITCH_API_KEY`, `API_KEY`)
|
|
46
|
+
|
|
47
|
+
## Quick start
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import asyncio
|
|
51
|
+
from switch_sdk import SwitchClient, ChatMessage
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def main() -> None:
|
|
55
|
+
async with SwitchClient.from_env() as client:
|
|
56
|
+
completion = await client.chat(
|
|
57
|
+
model="gpt-5",
|
|
58
|
+
messages=[ChatMessage(role="user", content="Reply with: SDK_OK")],
|
|
59
|
+
residency="US",
|
|
60
|
+
sla="realtime",
|
|
61
|
+
capability_flags={"force_cloud": True, "preferred_region": "eastus"},
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
print(completion.choices[0].message.content)
|
|
65
|
+
print(completion.switch_meta["route"]["target"]["region"])
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
asyncio.run(main())
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Set env vars first:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
export SHIFT_BASE_URL=http://localhost:8000
|
|
75
|
+
export SHIFT_API_KEY=aura_your_plain_project_key
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## BYOK mode (prompt privacy)
|
|
79
|
+
|
|
80
|
+
Use `chat_byok()` when you want Shift to do routing/observability while your
|
|
81
|
+
prompt is sent directly to Azure OpenAI with your own API key.
|
|
82
|
+
|
|
83
|
+
- Shift sees: route request + telemetry metadata.
|
|
84
|
+
- Shift does **not** see: prompt/messages payload.
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
import asyncio
|
|
88
|
+
from switch_sdk import AzureBYOKConfig, AzureRegionCredential, ChatMessage, SwitchClient
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def main() -> None:
|
|
92
|
+
byok = AzureBYOKConfig(
|
|
93
|
+
api_version="2025-01-01-preview",
|
|
94
|
+
regions={
|
|
95
|
+
"eastus": AzureRegionCredential(
|
|
96
|
+
endpoint="https://shift-eastus.openai.azure.com",
|
|
97
|
+
api_key="AZURE_EASTUS_KEY",
|
|
98
|
+
),
|
|
99
|
+
"westus": AzureRegionCredential(
|
|
100
|
+
endpoint="https://shift-westus.openai.azure.com",
|
|
101
|
+
api_key="AZURE_WESTUS_KEY",
|
|
102
|
+
),
|
|
103
|
+
"centralus": AzureRegionCredential(
|
|
104
|
+
endpoint="https://shift-centralus.openai.azure.com",
|
|
105
|
+
api_key="AZURE_CENTRALUS_KEY",
|
|
106
|
+
),
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
async with SwitchClient.from_env(byok_azure=byok) as client:
|
|
111
|
+
completion = await client.chat_byok(
|
|
112
|
+
model="auto",
|
|
113
|
+
messages=[ChatMessage(role="user", content="Reply exactly: BYOK_OK")],
|
|
114
|
+
residency="US",
|
|
115
|
+
capability_flags={"auto_model": True},
|
|
116
|
+
)
|
|
117
|
+
print(completion.choices[0].message.content)
|
|
118
|
+
print(completion.switch_meta["route"]["target"]["region"])
|
|
119
|
+
print(completion.switch_meta["resolved_model"])
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
asyncio.run(main())
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Environment-based BYOK config is also supported:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
export SHIFT_BYOK_AZURE_EASTUS_ENDPOINT=https://shift-eastus.openai.azure.com
|
|
129
|
+
export SHIFT_BYOK_AZURE_EASTUS_API_KEY=...
|
|
130
|
+
export SHIFT_BYOK_AZURE_WESTUS_ENDPOINT=https://shift-westus.openai.azure.com
|
|
131
|
+
export SHIFT_BYOK_AZURE_WESTUS_API_KEY=...
|
|
132
|
+
export SHIFT_BYOK_AZURE_CENTRALUS_ENDPOINT=https://shift-centralus.openai.azure.com
|
|
133
|
+
export SHIFT_BYOK_AZURE_CENTRALUS_API_KEY=...
|
|
134
|
+
export SHIFT_BYOK_AZURE_API_VERSION=2025-01-01-preview
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
async with SwitchClient.from_env(load_byok_azure_from_env=True) as client:
|
|
139
|
+
completion = await client.chat_byok(
|
|
140
|
+
model="auto",
|
|
141
|
+
messages=[ChatMessage(role="user", content="Reply exactly: PRIVACY_OK")],
|
|
142
|
+
capability_flags={"auto_model": True},
|
|
143
|
+
)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Hybrid local-first mode (ExecuTorch-ready)
|
|
147
|
+
|
|
148
|
+
`chat_hybrid()` tries local execution first, then falls back to cloud when needed.
|
|
149
|
+
Local models are cached on disk and downloaded only once per model version.
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
import asyncio
|
|
153
|
+
from switch_sdk import ChatMessage, LocalModelManager, SwitchClient
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
manifest = [
|
|
157
|
+
{
|
|
158
|
+
"model_id": "smollm2-135m",
|
|
159
|
+
"task": "chat",
|
|
160
|
+
"download_url": "https://your-model-host/smollm2-135m.pte",
|
|
161
|
+
"sha256": "replace_with_sha256",
|
|
162
|
+
"size_mb": 550,
|
|
163
|
+
"min_ram_gb": 4,
|
|
164
|
+
"max_prompt_chars": 280,
|
|
165
|
+
"rank": 10,
|
|
166
|
+
},
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def main() -> None:
|
|
171
|
+
manager = LocalModelManager(cache_dir="~/.shift/models", manifest=manifest)
|
|
172
|
+
# Optional: real ExecuTorch adapter (requires deps below)
|
|
173
|
+
from switch_sdk import build_executorch_text_runtime
|
|
174
|
+
local_runtime = build_executorch_text_runtime(
|
|
175
|
+
tokenizer_source="HuggingFaceTB/SmolLM2-135M-Instruct",
|
|
176
|
+
max_new_tokens=96,
|
|
177
|
+
prefer_optimum=True,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
async with SwitchClient(
|
|
181
|
+
base_url="http://localhost:8000",
|
|
182
|
+
api_key="aura_your_plain_project_key",
|
|
183
|
+
local_model_manager=manager,
|
|
184
|
+
local_runtime=local_runtime,
|
|
185
|
+
) as client:
|
|
186
|
+
completion = await client.chat_hybrid(
|
|
187
|
+
model="auto",
|
|
188
|
+
messages=[ChatMessage(role="user", content="Reply exactly: LOCAL_OK")],
|
|
189
|
+
capability_flags={"auto_model": True},
|
|
190
|
+
)
|
|
191
|
+
print(completion.model)
|
|
192
|
+
print(completion.choices[0].message.content)
|
|
193
|
+
print(completion.switch_meta)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
asyncio.run(main())
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Notes:
|
|
200
|
+
- Default local runtime is a stub (for wiring/tests).
|
|
201
|
+
- `build_executorch_text_runtime(...)` provides a real adapter that prefers Optimum ExecuTorch and falls back to raw `executorch.runtime`.
|
|
202
|
+
- Cache path format: `~/.shift/models/<model_id>/<version>/model.pte`
|
|
203
|
+
- LRU eviction is applied when cache exceeds `max_cache_gb`.
|
|
204
|
+
|
|
205
|
+
Install local runtime dependencies:
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
pip install -e .[local]
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Ready-made demo manifest:
|
|
212
|
+
|
|
213
|
+
- `/Users/proguy/Documents/projects/switch/switch-sdk/examples/local_manifest_smollm2_135m.json`
|
|
214
|
+
|
|
215
|
+
Runtime callable contract:
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
from switch_sdk import ChatMessage, LocalModelHandle
|
|
219
|
+
|
|
220
|
+
async def my_executorch_runtime(messages: list[ChatMessage], handle: LocalModelHandle) -> str:
|
|
221
|
+
# Load/use handle.path (.pte) with your ExecuTorch integration.
|
|
222
|
+
# Return assistant text.
|
|
223
|
+
return "LOCAL_EXECUTORCH_OK"
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## Routing-only call
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
decision = await client.route(
|
|
230
|
+
model="gpt-5",
|
|
231
|
+
residency="US",
|
|
232
|
+
sla="realtime",
|
|
233
|
+
capability_flags={"force_cloud": True},
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
print(decision.target.region)
|
|
237
|
+
print(decision.scores)
|
|
238
|
+
print(decision.candidate_breakdown)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## Dashboard + carbon endpoints
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
summary = await client.get_dashboard_summary()
|
|
245
|
+
feed = await client.get_dashboard_feed(limit=20)
|
|
246
|
+
carbon = await client.get_live_carbon()
|
|
247
|
+
|
|
248
|
+
print(summary.summary.total_requests)
|
|
249
|
+
print(len(feed.items))
|
|
250
|
+
print(carbon.provider, carbon.regions.get("eastus"))
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Custom telemetry event
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
from switch_sdk import TelemetryEvent
|
|
257
|
+
|
|
258
|
+
await client.track_event(
|
|
259
|
+
TelemetryEvent(
|
|
260
|
+
event_type="sdk_custom",
|
|
261
|
+
request_id="custom-123",
|
|
262
|
+
model="gpt-5",
|
|
263
|
+
metadata={"feature": "my_feature"},
|
|
264
|
+
)
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
await client.flush_telemetry()
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
## Error handling
|
|
271
|
+
|
|
272
|
+
```python
|
|
273
|
+
from switch_sdk import SwitchAPIError, SwitchNetworkError, SwitchTimeoutError
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
await client.route(model="gpt-5")
|
|
277
|
+
except SwitchAPIError as exc:
|
|
278
|
+
print(exc.status_code, exc.detail)
|
|
279
|
+
except SwitchTimeoutError:
|
|
280
|
+
print("Request timed out")
|
|
281
|
+
except SwitchNetworkError as exc:
|
|
282
|
+
print(f"Network issue: {exc}")
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
## Notes
|
|
286
|
+
|
|
287
|
+
- The SDK is async-first.
|
|
288
|
+
- Use `async with SwitchClient(...)` so telemetry flushes cleanly on exit.
|
|
289
|
+
- Retries/backoff are built in for transient failures.
|
|
290
|
+
- Telemetry is best-effort and never blocks successful chat/route calls.
|
|
291
|
+
|
|
292
|
+
## Live switching checks
|
|
293
|
+
|
|
294
|
+
Automatic east/west region-switch verification script:
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
cd switch-sdk
|
|
298
|
+
.venv/bin/python examples/test_region_switching.py \
|
|
299
|
+
--base-url http://localhost:8000 \
|
|
300
|
+
--api-key aura_your_plain_project_key \
|
|
301
|
+
--east-region eastus \
|
|
302
|
+
--west-region westus \
|
|
303
|
+
--central-region centralus \
|
|
304
|
+
--check-chat
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## From-env example script
|
|
308
|
+
|
|
309
|
+
```bash
|
|
310
|
+
export SHIFT_BASE_URL=http://localhost:8000
|
|
311
|
+
export SHIFT_API_KEY=aura_your_plain_project_key
|
|
312
|
+
python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_from_env.py
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## Full user-journey script
|
|
316
|
+
|
|
317
|
+
```bash
|
|
318
|
+
python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_user_journey.py \
|
|
319
|
+
--base-url http://localhost:8000 \
|
|
320
|
+
--api-key aura_your_plain_project_key
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
## Local ExecuTorch sanity check
|
|
324
|
+
|
|
325
|
+
Force local execution and fail if local runtime does not work:
|
|
326
|
+
|
|
327
|
+
```bash
|
|
328
|
+
cd /Users/proguy/Documents/projects/switch/switch-sdk
|
|
329
|
+
.venv311/bin/python examples/test_hybrid_local.py \
|
|
330
|
+
--base-url http://localhost:8000 \
|
|
331
|
+
--api-key dummy_local_only \
|
|
332
|
+
--manifest-path examples/local_manifest_smollm2_135m.json \
|
|
333
|
+
--executorch \
|
|
334
|
+
--prefer-runtime \
|
|
335
|
+
--tokenizer-source HuggingFaceTB/SmolLM2-135M-Instruct \
|
|
336
|
+
--no-download \
|
|
337
|
+
--no-cloud-fallback
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Expected: output JSON includes `"source": "sdk-local"` in `switch_meta`.
|
|
341
|
+
|
|
342
|
+
## Release
|
|
343
|
+
|
|
344
|
+
See `/Users/proguy/Documents/projects/switch/switch-sdk/RELEASING.md` for TestPyPI and PyPI release steps.
|