PyPI - shift-sdk - Versions diffs - 0.3.2__py3-none-any.whl - Mend

shift-sdk 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

shift_sdk-0.3.2.dist-info/METADATA +376 -0
shift_sdk-0.3.2.dist-info/RECORD +17 -0
shift_sdk-0.3.2.dist-info/WHEEL +5 -0
shift_sdk-0.3.2.dist-info/top_level.txt +1 -0
switch_sdk/__init__.py +96 -0
switch_sdk/auto_model.py +187 -0
switch_sdk/byok.py +209 -0
switch_sdk/client.py +872 -0
switch_sdk/context.py +51 -0
switch_sdk/errors.py +41 -0
switch_sdk/executorch_runtime.py +376 -0
switch_sdk/impact.py +134 -0
switch_sdk/local_models.py +359 -0
switch_sdk/local_runtime.py +34 -0
switch_sdk/models.py +346 -0
switch_sdk/py.typed +1 -0
switch_sdk/telemetry.py +98 -0

shift_sdk-0.3.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,376 @@
+Metadata-Version: 2.4
+Name: shift-sdk
+Version: 0.3.2
+Summary: Python SDK for Shift managed AI routing, telemetry, and local-first execution
+Author: Shift
+License-Expression: LicenseRef-Proprietary
+Keywords: ai,llm,routing,telemetry,sustainability,executorch
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Operating System :: OS Independent
+Classifier: Typing :: Typed
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: httpx>=0.27.0
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
+Requires-Dist: respx>=0.21.0; extra == "dev"
+Provides-Extra: local
+Requires-Dist: torch>=2.2.0; python_version < "3.14" and extra == "local"
+Requires-Dist: transformers>=4.41.0; python_version < "3.14" and extra == "local"
+Requires-Dist: optimum-executorch>=1.1.0; python_version < "3.14" and extra == "local"
+Provides-Extra: publish
+Requires-Dist: build>=1.2.2; extra == "publish"
+Requires-Dist: twine>=5.1.1; extra == "publish"
+# shift-sdk
+Python SDK for the Shift (Switch gateway) managed API.
+## Install
+```bash
+pip install shift-sdk
+```
+Import path remains:
+```python
+from switch_sdk import SwitchClient
+```
+For local development:
+```bash
+pip install -e .[dev]
+```
+For local ExecuTorch runtime work:
+```bash
+pip install -e .[dev,local]
+```
+For packaging and publishing:
+```bash
+pip install -e .[publish]
+```
+Note: ExecuTorch wheels are not available for Python 3.14 yet. Use Python 3.10-3.13 (3.11 works well).
+## Required values
+- `base_url`: your gateway URL, for example `http://localhost:8000`
+- `api_key`: your plain project key (for example `aura_...`), not the SHA256 hash
+Environment shortcuts are supported:
+- `SHIFT_BASE_URL` (fallback: `SWITCH_BASE_URL`)
+- `SHIFT_API_KEY` (fallbacks: `SWITCH_API_KEY`, `API_KEY`)
+## Quick start
+```python
+import asyncio
+from switch_sdk import SwitchClient, ChatMessage
+async def main() -> None:
+    async with SwitchClient.from_env() as client:
+        completion = await client.chat(
+            model="gpt-5",
+            messages=[ChatMessage(role="user", content="Reply with: SDK_OK")],
+            residency="US",
+            sla="realtime",
+            capability_flags={"force_cloud": True, "preferred_region": "eastus"},
+        )
+        print(completion.choices[0].message.content)
+        print(completion.switch_meta["route"]["target"]["region"])
+asyncio.run(main())
+```
+Set env vars first:
+```bash
+export SHIFT_BASE_URL=http://localhost:8000
+export SHIFT_API_KEY=aura_your_plain_project_key
+```
+## BYOK mode (prompt privacy)
+Use `chat_byok()` when you want Shift to do routing/observability while your
+prompt is sent directly to Azure OpenAI with your own API key.
+- Shift sees: route request + telemetry metadata.
+- Shift does **not** see: prompt/messages payload.
+```python
+import asyncio
+from switch_sdk import AzureBYOKConfig, AzureRegionCredential, ChatMessage, SwitchClient
+async def main() -> None:
+    byok = AzureBYOKConfig(
+        api_version="2025-01-01-preview",
+        regions={
+            "eastus": AzureRegionCredential(
+                endpoint="https://shift-eastus.openai.azure.com",
+                api_key="AZURE_EASTUS_KEY",
+            ),
+            "westus": AzureRegionCredential(
+                endpoint="https://shift-westus.openai.azure.com",
+                api_key="AZURE_WESTUS_KEY",
+            ),
+            "centralus": AzureRegionCredential(
+                endpoint="https://shift-centralus.openai.azure.com",
+                api_key="AZURE_CENTRALUS_KEY",
+            ),
+        },
+    )
+    async with SwitchClient.from_env(byok_azure=byok) as client:
+        completion = await client.chat_byok(
+            model="auto",
+            messages=[ChatMessage(role="user", content="Reply exactly: BYOK_OK")],
+            residency="US",
+            capability_flags={"auto_model": True},
+        )
+        print(completion.choices[0].message.content)
+        print(completion.switch_meta["route"]["target"]["region"])
+        print(completion.switch_meta["resolved_model"])
+asyncio.run(main())
+```
+Environment-based BYOK config is also supported:
+```bash
+export SHIFT_BYOK_AZURE_EASTUS_ENDPOINT=https://shift-eastus.openai.azure.com
+export SHIFT_BYOK_AZURE_EASTUS_API_KEY=...
+export SHIFT_BYOK_AZURE_WESTUS_ENDPOINT=https://shift-westus.openai.azure.com
+export SHIFT_BYOK_AZURE_WESTUS_API_KEY=...
+export SHIFT_BYOK_AZURE_CENTRALUS_ENDPOINT=https://shift-centralus.openai.azure.com
+export SHIFT_BYOK_AZURE_CENTRALUS_API_KEY=...
+export SHIFT_BYOK_AZURE_API_VERSION=2025-01-01-preview
+```
+```python
+async with SwitchClient.from_env(load_byok_azure_from_env=True) as client:
+    completion = await client.chat_byok(
+        model="auto",
+        messages=[ChatMessage(role="user", content="Reply exactly: PRIVACY_OK")],
+        capability_flags={"auto_model": True},
+    )
+```
+## Hybrid local-first mode (ExecuTorch-ready)
+`chat_hybrid()` tries local execution first, then falls back to cloud when needed.
+Local models are cached on disk and downloaded only once per model version.
+```python
+import asyncio
+from switch_sdk import ChatMessage, LocalModelManager, SwitchClient
+manifest = [
+    {
+        "model_id": "smollm2-135m",
+        "task": "chat",
+        "download_url": "https://your-model-host/smollm2-135m.pte",
+        "sha256": "replace_with_sha256",
+        "size_mb": 550,
+        "min_ram_gb": 4,
+        "max_prompt_chars": 280,
+        "rank": 10,
+    },
+]
+async def main() -> None:
+    manager = LocalModelManager(cache_dir="~/.shift/models", manifest=manifest)
+    # Optional: real ExecuTorch adapter (requires deps below)
+    from switch_sdk import build_executorch_text_runtime
+    local_runtime = build_executorch_text_runtime(
+        tokenizer_source="HuggingFaceTB/SmolLM2-135M-Instruct",
+        max_new_tokens=96,
+        prefer_optimum=True,
+    )
+    async with SwitchClient(
+        base_url="http://localhost:8000",
+        api_key="aura_your_plain_project_key",
+        local_model_manager=manager,
+        local_runtime=local_runtime,
+    ) as client:
+        completion = await client.chat_hybrid(
+            model="auto",
+            messages=[ChatMessage(role="user", content="Reply exactly: LOCAL_OK")],
+            capability_flags={"auto_model": True},
+        )
+        print(completion.model)
+        print(completion.choices[0].message.content)
+        print(completion.switch_meta)
+asyncio.run(main())
+```
+Notes:
+- Default local runtime is a stub (for wiring/tests).
+- `build_executorch_text_runtime(...)` provides a real adapter that prefers Optimum ExecuTorch and falls back to raw `executorch.runtime`.
+- Cache path format: `~/.shift/models/<model_id>/<version>/model.pte`
+- LRU eviction is applied when cache exceeds `max_cache_gb`.
+Install local runtime dependencies:
+```bash
+pip install -e .[local]
+```
+Ready-made demo manifest:
+- `/Users/proguy/Documents/projects/switch/switch-sdk/examples/local_manifest_smollm2_135m.json`
+Runtime callable contract:
+```python
+from switch_sdk import ChatMessage, LocalModelHandle
+async def my_executorch_runtime(messages: list[ChatMessage], handle: LocalModelHandle) -> str:
+    # Load/use handle.path (.pte) with your ExecuTorch integration.
+    # Return assistant text.
+    return "LOCAL_EXECUTORCH_OK"
+```
+## Routing-only call
+```python
+decision = await client.route(
+    model="gpt-5",
+    residency="US",
+    sla="realtime",
+    capability_flags={"force_cloud": True},
+)
+print(decision.target.region)
+print(decision.scores)
+print(decision.candidate_breakdown)
+```
+## Dashboard + carbon endpoints
+```python
+summary = await client.get_dashboard_summary()
+feed = await client.get_dashboard_feed(limit=20)
+carbon = await client.get_live_carbon()
+print(summary.summary.total_requests)
+print(len(feed.items))
+print(carbon.provider, carbon.regions.get("eastus"))
+```
+## Custom telemetry event
+```python
+from switch_sdk import TelemetryEvent
+await client.track_event(
+    TelemetryEvent(
+        event_type="sdk_custom",
+        request_id="custom-123",
+        model="gpt-5",
+        metadata={"feature": "my_feature"},
+    )
+)
+await client.flush_telemetry()
+```
+## Error handling
+```python
+from switch_sdk import SwitchAPIError, SwitchNetworkError, SwitchTimeoutError
+try:
+    await client.route(model="gpt-5")
+except SwitchAPIError as exc:
+    print(exc.status_code, exc.detail)
+except SwitchTimeoutError:
+    print("Request timed out")
+except SwitchNetworkError as exc:
+    print(f"Network issue: {exc}")
+```
+## Notes
+- The SDK is async-first.
+- Use `async with SwitchClient(...)` so telemetry flushes cleanly on exit.
+- Retries/backoff are built in for transient failures.
+- Telemetry is best-effort and never blocks successful chat/route calls.
+## Live switching checks
+Automatic east/west region-switch verification script:
+```bash
+cd switch-sdk
+.venv/bin/python examples/test_region_switching.py \
+  --base-url http://localhost:8000 \
+  --api-key aura_your_plain_project_key \
+  --east-region eastus \
+  --west-region westus \
+  --central-region centralus \
+  --check-chat
+```
+## From-env example script
+```bash
+export SHIFT_BASE_URL=http://localhost:8000
+export SHIFT_API_KEY=aura_your_plain_project_key
+python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_from_env.py
+```
+## Full user-journey script
+```bash
+python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_user_journey.py \
+  --base-url http://localhost:8000 \
+  --api-key aura_your_plain_project_key
+```
+## Local ExecuTorch sanity check
+Force local execution and fail if local runtime does not work:
+```bash
+cd /Users/proguy/Documents/projects/switch/switch-sdk
+.venv311/bin/python examples/test_hybrid_local.py \
+  --base-url http://localhost:8000 \
+  --api-key dummy_local_only \
+  --manifest-path examples/local_manifest_smollm2_135m.json \
+  --executorch \
+  --prefer-runtime \
+  --tokenizer-source HuggingFaceTB/SmolLM2-135M-Instruct \
+  --no-download \
+  --no-cloud-fallback
+```
+Expected: output JSON includes `"source": "sdk-local"` in `switch_meta`.
+## Release
+See `/Users/proguy/Documents/projects/switch/switch-sdk/RELEASING.md` for TestPyPI and PyPI release steps.

shift_sdk-0.3.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+switch_sdk/__init__.py,sha256=43sqzn3EtDc8rScV4tjs9zTvwVlrwlZyrmR3tUqDMuo,2579
+switch_sdk/auto_model.py,sha256=lvrHNkADCzN3uKDGm6z-wL8P9eLzwrj2GvsNjJfigiI,5361
+switch_sdk/byok.py,sha256=Uynq-q28kg_bRtvDSSRMAG3aG8AQ2pkspfsmdZPawpU,8074
+switch_sdk/client.py,sha256=NXROzL-sa3Q-QZD63CQ7IdllJmb-pty7Fs5gvbyh9nI,33531
+switch_sdk/context.py,sha256=y3t1GgFWtTB3vTJSN4DJIaX5NNnZS9uIxla40LTVNWM,1617
+switch_sdk/errors.py,sha256=fK1vVEXUrutPZ9It-eYmz0rHjosRDPUnXHJ-SXzuvoI,1080
+switch_sdk/executorch_runtime.py,sha256=6nlvzEAUZco7BTSy8LcKsgXWsya9R2kp0cOmj2NbrkM,16114
+switch_sdk/impact.py,sha256=zPIccclLbOmIcE9LbGljOwPn5aLTbteJHdEd4x9oPbg,5264
+switch_sdk/local_models.py,sha256=PLAmPqXzX2kwq1jjOqulQJFz6ajmgTaqkGHBKgz6Yu8,12694
+switch_sdk/local_runtime.py,sha256=-e7FGevl0YmQrP1XSSI8tSIOed6nidYkr7Z-BI_1oVU,1162
+switch_sdk/models.py,sha256=34Fe5ZXYYi8qXS2pPlVybWu7INDj09kiy7QAXaxudT8,11354
+switch_sdk/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+switch_sdk/telemetry.py,sha256=uts1GsodO4mwzOAu1VyeTDjrnwbAZaM6ADY1LxMDzKI,3240
+shift_sdk-0.3.2.dist-info/METADATA,sha256=tUhsVkNsaRUGsCt-engpCsTozKxZrg8mw4QBPfy2DT0,10582
+shift_sdk-0.3.2.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
+shift_sdk-0.3.2.dist-info/top_level.txt,sha256=_lHYhnLFZh6ixzItr6dMfSS2QXWBbvpSCmluPo10ywk,11
+shift_sdk-0.3.2.dist-info/RECORD,,

shift_sdk-0.3.2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

shift_sdk-0.3.2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ switch_sdk

switch_sdk/__init__.py ADDED Viewed

@@ -0,0 +1,96 @@
+from importlib.metadata import PackageNotFoundError, version
+from switch_sdk.auto_model import AutoModelSelection
+from switch_sdk.byok import AzureBYOKConfig, AzureBYOKResolvedRequest, AzureRegionCredential
+from switch_sdk.client import SwitchClient
+from switch_sdk.context import switch_trace, trace_execution
+from switch_sdk.errors import (
+    SwitchAPIError,
+    SwitchClientNotStartedError,
+    SwitchLocalModelError,
+    SwitchNetworkError,
+    SwitchSDKError,
+    SwitchTimeoutError,
+)
+from switch_sdk.executorch_runtime import (
+    ExecuTorchRuntimeConfig,
+    ExecuTorchTextRuntime,
+    build_executorch_text_runtime,
+)
+from switch_sdk.impact import ImpactEstimate, ModelImpactEstimator
+from switch_sdk.local_models import HardwareProfile, LocalModelHandle, LocalModelManager, LocalModelSpec
+from switch_sdk.local_runtime import LocalChatRuntime, default_stub_local_runtime
+from switch_sdk.models import (
+    CarbonLiveResponse,
+    CandidateScoreBreakdown,
+    ChatChoice,
+    ChatChoiceMessage,
+    ChatCompletion,
+    ChatMessage,
+    ChatRequest,
+    ChatUsage,
+    DashboardFeedItem,
+    DashboardFeedResponse,
+    DashboardSummary,
+    DashboardSummaryResponse,
+    RouteDecision,
+    RouteRequest,
+    RouteTarget,
+    RoutingWeights,
+    TelemetryEvent,
+    TelemetryIngestResult,
+)
+try:
+    __version__ = version('shift-sdk')
+except PackageNotFoundError:
+    try:
+        __version__ = version('switch-sdk')
+    except PackageNotFoundError:
+        __version__ = '0.0.0'
+__all__ = [
+    '__version__',
+    'AutoModelSelection',
+    'AzureBYOKConfig',
+    'AzureBYOKResolvedRequest',
+    'AzureRegionCredential',
+    'CarbonLiveResponse',
+    'CandidateScoreBreakdown',
+    'ChatChoice',
+    'ChatChoiceMessage',
+    'ChatCompletion',
+    'ChatMessage',
+    'ChatRequest',
+    'ChatUsage',
+    'DashboardFeedItem',
+    'DashboardFeedResponse',
+    'DashboardSummary',
+    'DashboardSummaryResponse',
+    'RouteDecision',
+    'RouteRequest',
+    'RouteTarget',
+    'RoutingWeights',
+    'SwitchAPIError',
+    'SwitchClient',
+    'SwitchClientNotStartedError',
+    'SwitchLocalModelError',
+    'SwitchNetworkError',
+    'SwitchSDKError',
+    'SwitchTimeoutError',
+    'TelemetryEvent',
+    'TelemetryIngestResult',
+    'ExecuTorchRuntimeConfig',
+    'ExecuTorchTextRuntime',
+    'build_executorch_text_runtime',
+    'ImpactEstimate',
+    'ModelImpactEstimator',
+    'HardwareProfile',
+    'LocalModelSpec',
+    'LocalModelHandle',
+    'LocalModelManager',
+    'LocalChatRuntime',
+    'default_stub_local_runtime',
+    'switch_trace',
+    'trace_execution',
+]

switch_sdk/auto_model.py ADDED Viewed

@@ -0,0 +1,187 @@
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from typing import Any
+from switch_sdk.models import ChatMessage
+_URL_RE = re.compile(r'https?://\S+', re.IGNORECASE)
+_CODE_HINT_RE = re.compile(
+    r'```|`[^`]+`|\b(def |class |function|import |select |insert |update |delete |create table|docker|kubernetes|terraform)\b',
+    re.IGNORECASE,
+)
+_SIMPLE_MATH_RE = re.compile(r'^[\d\s\+\-\*/\(\)\.=]+$')
+_AUTO_MODEL_ALIASES = {'auto', 'shift-auto', 'router'}
+_SIMPLE_PREFIXES = (
+    'reply with exactly',
+    'reply only with',
+    'classify',
+    'label this',
+    'sentiment',
+    'is this',
+    'extract',
+    'fix grammar',
+    'correct grammar',
+    'rewrite this sentence',
+    'translate to',
+)
+_COMPLEX_HINTS = (
+    'step by step',
+    'detailed',
+    'in depth',
+    'architecture',
+    'design a',
+    'implement',
+    'debug',
+    'traceback',
+    'root cause',
+    'compare and contrast',
+    'pros and cons',
+    'research',
+    'latest',
+    'news',
+    'financial advice',
+    'medical advice',
+    'legal advice',
+    'long form',
+    'write a blog',
+    'essay',
+)
+@dataclass(slots=True)
+class AutoModelSelection:
+    selected_model: str
+    reason: str
+    confidence: float
+    inspected_text: str
+    features: dict[str, Any]
+    def to_metadata(self) -> dict[str, Any]:
+        return {
+            'selected_model': self.selected_model,
+            'reason': self.reason,
+            'confidence': self.confidence,
+            'features': self.features,
+            'inspected_text': self.inspected_text[:160],
+        }
+    def to_safe_metadata(self) -> dict[str, Any]:
+        payload = self.to_metadata()
+        payload.pop('inspected_text', None)
+        return payload
+def is_auto_model_requested(model: str, capability_flags: dict[str, Any] | None = None) -> bool:
+    flags = capability_flags or {}
+    normalized = str(model or '').strip().lower()
+    return normalized in _AUTO_MODEL_ALIASES or bool(flags.get('auto_model'))
+def select_model(
+    messages: list[ChatMessage],
+    capability_flags: dict[str, Any] | None = None,
+    *,
+    frontier_model: str = 'gpt-5',
+    mid_model: str = 'gpt-5-mini',
+    small_model: str = 'gpt-5-nano',
+    fast_model: str = 'gpt-4o-mini',
+) -> AutoModelSelection:
+    flags = capability_flags or {}
+    preferred = str(flags.get('preferred_model', '')).strip()
+    if preferred:
+        return AutoModelSelection(
+            selected_model=preferred,
+            reason='preferred_model capability flag',
+            confidence=1.0,
+            inspected_text='',
+            features={'preferred_model': preferred},
+        )
+    optimize_for = str(flags.get('optimize_for', '')).strip().lower()
+    text = _extract_primary_text(messages).strip()
+    normalized = text.lower()
+    words = len(normalized.split())
+    chars = len(text)
+    has_url = bool(_URL_RE.search(text))
+    has_code = bool(_CODE_HINT_RE.search(text))
+    has_complex_hint = any(hint in normalized for hint in _COMPLEX_HINTS)
+    is_simple_math = bool(_SIMPLE_MATH_RE.match(normalized))
+    simple_greeting = normalized in {'hi', 'hello', 'hey', 'thanks', 'thank you'}
+    simple_prefix = normalized.startswith(_SIMPLE_PREFIXES)
+    features = {
+        'optimize_for': optimize_for,
+        'word_count': words,
+        'char_count': chars,
+        'contains_url': has_url,
+        'contains_code_hint': has_code,
+        'contains_complex_hint': has_complex_hint,
+    }
+    if flags.get('vision') or flags.get('tool_use'):
+        return AutoModelSelection(
+            selected_model=frontier_model,
+            reason='advanced capabilities requested',
+            confidence=0.95,
+            inspected_text=text,
+            features=features,
+        )
+    if optimize_for == 'latency':
+        return AutoModelSelection(
+            selected_model=fast_model,
+            reason='latency optimization requested',
+            confidence=0.85,
+            inspected_text=text,
+            features=features,
+        )
+    if optimize_for == 'cost':
+        return AutoModelSelection(
+            selected_model=small_model,
+            reason='cost optimization requested',
+            confidence=0.85,
+            inspected_text=text,
+            features=features,
+        )
+    if has_code or has_url or has_complex_hint or words > 90 or chars > 500:
+        return AutoModelSelection(
+            selected_model=frontier_model,
+            reason='complex prompt characteristics',
+            confidence=0.85,
+            inspected_text=text,
+            features=features,
+        )
+    if is_simple_math or simple_greeting or simple_prefix or (words <= 20 and chars <= 120):
+        return AutoModelSelection(
+            selected_model=small_model,
+            reason='lightweight prompt characteristics',
+            confidence=0.8,
+            inspected_text=text,
+            features=features,
+        )
+    return AutoModelSelection(
+        selected_model=mid_model,
+        reason='default balanced prompt profile',
+        confidence=0.7,
+        inspected_text=text,
+        features=features,
+    )
+def _extract_primary_text(messages: list[ChatMessage]) -> str:
+    for message in reversed(messages):
+        if message.role == 'user':
+            return message.content
+    if messages:
+        return messages[-1].content
+    return ''