shift-sdk 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,376 @@
1
+ Metadata-Version: 2.4
2
+ Name: shift-sdk
3
+ Version: 0.3.2
4
+ Summary: Python SDK for Shift managed AI routing, telemetry, and local-first execution
5
+ Author: Shift
6
+ License-Expression: LicenseRef-Proprietary
7
+ Keywords: ai,llm,routing,telemetry,sustainability,executorch
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Typing :: Typed
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: httpx>=0.27.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
23
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
24
+ Requires-Dist: respx>=0.21.0; extra == "dev"
25
+ Provides-Extra: local
26
+ Requires-Dist: torch>=2.2.0; python_version < "3.14" and extra == "local"
27
+ Requires-Dist: transformers>=4.41.0; python_version < "3.14" and extra == "local"
28
+ Requires-Dist: optimum-executorch>=1.1.0; python_version < "3.14" and extra == "local"
29
+ Provides-Extra: publish
30
+ Requires-Dist: build>=1.2.2; extra == "publish"
31
+ Requires-Dist: twine>=5.1.1; extra == "publish"
32
+
33
+ # shift-sdk
34
+
35
+ Python SDK for the Shift (Switch gateway) managed API.
36
+
37
+ ## Install
38
+
39
+ ```bash
40
+ pip install shift-sdk
41
+ ```
42
+
43
+ Import path remains:
44
+
45
+ ```python
46
+ from switch_sdk import SwitchClient
47
+ ```
48
+
49
+ For local development:
50
+
51
+ ```bash
52
+ pip install -e .[dev]
53
+ ```
54
+
55
+ For local ExecuTorch runtime work:
56
+
57
+ ```bash
58
+ pip install -e .[dev,local]
59
+ ```
60
+
61
+ For packaging and publishing:
62
+
63
+ ```bash
64
+ pip install -e .[publish]
65
+ ```
66
+
67
+ Note: ExecuTorch wheels are not available for Python 3.14 yet. Use Python 3.10-3.13 (3.11 works well).
68
+
69
+ ## Required values
70
+
71
+ - `base_url`: your gateway URL, for example `http://localhost:8000`
72
+ - `api_key`: your plain project key (for example `aura_...`), not the SHA256 hash
73
+
74
+ Environment shortcuts are supported:
75
+
76
+ - `SHIFT_BASE_URL` (fallback: `SWITCH_BASE_URL`)
77
+ - `SHIFT_API_KEY` (fallbacks: `SWITCH_API_KEY`, `API_KEY`)
78
+
79
+ ## Quick start
80
+
81
+ ```python
82
+ import asyncio
83
+ from switch_sdk import SwitchClient, ChatMessage
84
+
85
+
86
+ async def main() -> None:
87
+ async with SwitchClient.from_env() as client:
88
+ completion = await client.chat(
89
+ model="gpt-5",
90
+ messages=[ChatMessage(role="user", content="Reply with: SDK_OK")],
91
+ residency="US",
92
+ sla="realtime",
93
+ capability_flags={"force_cloud": True, "preferred_region": "eastus"},
94
+ )
95
+
96
+ print(completion.choices[0].message.content)
97
+ print(completion.switch_meta["route"]["target"]["region"])
98
+
99
+
100
+ asyncio.run(main())
101
+ ```
102
+
103
+ Set env vars first:
104
+
105
+ ```bash
106
+ export SHIFT_BASE_URL=http://localhost:8000
107
+ export SHIFT_API_KEY=aura_your_plain_project_key
108
+ ```
109
+
110
+ ## BYOK mode (prompt privacy)
111
+
112
+ Use `chat_byok()` when you want Shift to do routing/observability while your
113
+ prompt is sent directly to Azure OpenAI with your own API key.
114
+
115
+ - Shift sees: route request + telemetry metadata.
116
+ - Shift does **not** see: prompt/messages payload.
117
+
118
+ ```python
119
+ import asyncio
120
+ from switch_sdk import AzureBYOKConfig, AzureRegionCredential, ChatMessage, SwitchClient
121
+
122
+
123
+ async def main() -> None:
124
+ byok = AzureBYOKConfig(
125
+ api_version="2025-01-01-preview",
126
+ regions={
127
+ "eastus": AzureRegionCredential(
128
+ endpoint="https://shift-eastus.openai.azure.com",
129
+ api_key="AZURE_EASTUS_KEY",
130
+ ),
131
+ "westus": AzureRegionCredential(
132
+ endpoint="https://shift-westus.openai.azure.com",
133
+ api_key="AZURE_WESTUS_KEY",
134
+ ),
135
+ "centralus": AzureRegionCredential(
136
+ endpoint="https://shift-centralus.openai.azure.com",
137
+ api_key="AZURE_CENTRALUS_KEY",
138
+ ),
139
+ },
140
+ )
141
+
142
+ async with SwitchClient.from_env(byok_azure=byok) as client:
143
+ completion = await client.chat_byok(
144
+ model="auto",
145
+ messages=[ChatMessage(role="user", content="Reply exactly: BYOK_OK")],
146
+ residency="US",
147
+ capability_flags={"auto_model": True},
148
+ )
149
+ print(completion.choices[0].message.content)
150
+ print(completion.switch_meta["route"]["target"]["region"])
151
+ print(completion.switch_meta["resolved_model"])
152
+
153
+
154
+ asyncio.run(main())
155
+ ```
156
+
157
+ Environment-based BYOK config is also supported:
158
+
159
+ ```bash
160
+ export SHIFT_BYOK_AZURE_EASTUS_ENDPOINT=https://shift-eastus.openai.azure.com
161
+ export SHIFT_BYOK_AZURE_EASTUS_API_KEY=...
162
+ export SHIFT_BYOK_AZURE_WESTUS_ENDPOINT=https://shift-westus.openai.azure.com
163
+ export SHIFT_BYOK_AZURE_WESTUS_API_KEY=...
164
+ export SHIFT_BYOK_AZURE_CENTRALUS_ENDPOINT=https://shift-centralus.openai.azure.com
165
+ export SHIFT_BYOK_AZURE_CENTRALUS_API_KEY=...
166
+ export SHIFT_BYOK_AZURE_API_VERSION=2025-01-01-preview
167
+ ```
168
+
169
+ ```python
170
+ async with SwitchClient.from_env(load_byok_azure_from_env=True) as client:
171
+ completion = await client.chat_byok(
172
+ model="auto",
173
+ messages=[ChatMessage(role="user", content="Reply exactly: PRIVACY_OK")],
174
+ capability_flags={"auto_model": True},
175
+ )
176
+ ```
177
+
178
+ ## Hybrid local-first mode (ExecuTorch-ready)
179
+
180
+ `chat_hybrid()` tries local execution first, then falls back to cloud when needed.
181
+ Local models are cached on disk and downloaded only once per model version.
182
+
183
+ ```python
184
+ import asyncio
185
+ from switch_sdk import ChatMessage, LocalModelManager, SwitchClient
186
+
187
+
188
+ manifest = [
189
+ {
190
+ "model_id": "smollm2-135m",
191
+ "task": "chat",
192
+ "download_url": "https://your-model-host/smollm2-135m.pte",
193
+ "sha256": "replace_with_sha256",
194
+ "size_mb": 550,
195
+ "min_ram_gb": 4,
196
+ "max_prompt_chars": 280,
197
+ "rank": 10,
198
+ },
199
+ ]
200
+
201
+
202
+ async def main() -> None:
203
+ manager = LocalModelManager(cache_dir="~/.shift/models", manifest=manifest)
204
+ # Optional: real ExecuTorch adapter (requires deps below)
205
+ from switch_sdk import build_executorch_text_runtime
206
+ local_runtime = build_executorch_text_runtime(
207
+ tokenizer_source="HuggingFaceTB/SmolLM2-135M-Instruct",
208
+ max_new_tokens=96,
209
+ prefer_optimum=True,
210
+ )
211
+
212
+ async with SwitchClient(
213
+ base_url="http://localhost:8000",
214
+ api_key="aura_your_plain_project_key",
215
+ local_model_manager=manager,
216
+ local_runtime=local_runtime,
217
+ ) as client:
218
+ completion = await client.chat_hybrid(
219
+ model="auto",
220
+ messages=[ChatMessage(role="user", content="Reply exactly: LOCAL_OK")],
221
+ capability_flags={"auto_model": True},
222
+ )
223
+ print(completion.model)
224
+ print(completion.choices[0].message.content)
225
+ print(completion.switch_meta)
226
+
227
+
228
+ asyncio.run(main())
229
+ ```
230
+
231
+ Notes:
232
+ - Default local runtime is a stub (for wiring/tests).
233
+ - `build_executorch_text_runtime(...)` provides a real adapter that prefers Optimum ExecuTorch and falls back to raw `executorch.runtime`.
234
+ - Cache path format: `~/.shift/models/<model_id>/<version>/model.pte`
235
+ - LRU eviction is applied when cache exceeds `max_cache_gb`.
236
+
237
+ Install local runtime dependencies:
238
+
239
+ ```bash
240
+ pip install -e .[local]
241
+ ```
242
+
243
+ Ready-made demo manifest:
244
+
245
+ - `/Users/proguy/Documents/projects/switch/switch-sdk/examples/local_manifest_smollm2_135m.json`
246
+
247
+ Runtime callable contract:
248
+
249
+ ```python
250
+ from switch_sdk import ChatMessage, LocalModelHandle
251
+
252
+ async def my_executorch_runtime(messages: list[ChatMessage], handle: LocalModelHandle) -> str:
253
+ # Load/use handle.path (.pte) with your ExecuTorch integration.
254
+ # Return assistant text.
255
+ return "LOCAL_EXECUTORCH_OK"
256
+ ```
257
+
258
+ ## Routing-only call
259
+
260
+ ```python
261
+ decision = await client.route(
262
+ model="gpt-5",
263
+ residency="US",
264
+ sla="realtime",
265
+ capability_flags={"force_cloud": True},
266
+ )
267
+
268
+ print(decision.target.region)
269
+ print(decision.scores)
270
+ print(decision.candidate_breakdown)
271
+ ```
272
+
273
+ ## Dashboard + carbon endpoints
274
+
275
+ ```python
276
+ summary = await client.get_dashboard_summary()
277
+ feed = await client.get_dashboard_feed(limit=20)
278
+ carbon = await client.get_live_carbon()
279
+
280
+ print(summary.summary.total_requests)
281
+ print(len(feed.items))
282
+ print(carbon.provider, carbon.regions.get("eastus"))
283
+ ```
284
+
285
+ ## Custom telemetry event
286
+
287
+ ```python
288
+ from switch_sdk import TelemetryEvent
289
+
290
+ await client.track_event(
291
+ TelemetryEvent(
292
+ event_type="sdk_custom",
293
+ request_id="custom-123",
294
+ model="gpt-5",
295
+ metadata={"feature": "my_feature"},
296
+ )
297
+ )
298
+
299
+ await client.flush_telemetry()
300
+ ```
301
+
302
+ ## Error handling
303
+
304
+ ```python
305
+ from switch_sdk import SwitchAPIError, SwitchNetworkError, SwitchTimeoutError
306
+
307
+ try:
308
+ await client.route(model="gpt-5")
309
+ except SwitchAPIError as exc:
310
+ print(exc.status_code, exc.detail)
311
+ except SwitchTimeoutError:
312
+ print("Request timed out")
313
+ except SwitchNetworkError as exc:
314
+ print(f"Network issue: {exc}")
315
+ ```
316
+
317
+ ## Notes
318
+
319
+ - The SDK is async-first.
320
+ - Use `async with SwitchClient(...)` so telemetry flushes cleanly on exit.
321
+ - Retries/backoff are built in for transient failures.
322
+ - Telemetry is best-effort and never blocks successful chat/route calls.
323
+
324
+ ## Live switching checks
325
+
326
+ Automatic east/west region-switch verification script:
327
+
328
+ ```bash
329
+ cd switch-sdk
330
+ .venv/bin/python examples/test_region_switching.py \
331
+ --base-url http://localhost:8000 \
332
+ --api-key aura_your_plain_project_key \
333
+ --east-region eastus \
334
+ --west-region westus \
335
+ --central-region centralus \
336
+ --check-chat
337
+ ```
338
+
339
+ ## From-env example script
340
+
341
+ ```bash
342
+ export SHIFT_BASE_URL=http://localhost:8000
343
+ export SHIFT_API_KEY=aura_your_plain_project_key
344
+ python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_from_env.py
345
+ ```
346
+
347
+ ## Full user-journey script
348
+
349
+ ```bash
350
+ python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_user_journey.py \
351
+ --base-url http://localhost:8000 \
352
+ --api-key aura_your_plain_project_key
353
+ ```
354
+
355
+ ## Local ExecuTorch sanity check
356
+
357
+ Force local execution and fail if local runtime does not work:
358
+
359
+ ```bash
360
+ cd /Users/proguy/Documents/projects/switch/switch-sdk
361
+ .venv311/bin/python examples/test_hybrid_local.py \
362
+ --base-url http://localhost:8000 \
363
+ --api-key dummy_local_only \
364
+ --manifest-path examples/local_manifest_smollm2_135m.json \
365
+ --executorch \
366
+ --prefer-runtime \
367
+ --tokenizer-source HuggingFaceTB/SmolLM2-135M-Instruct \
368
+ --no-download \
369
+ --no-cloud-fallback
370
+ ```
371
+
372
+ Expected: output JSON includes `"source": "sdk-local"` in `switch_meta`.
373
+
374
+ ## Release
375
+
376
+ See `/Users/proguy/Documents/projects/switch/switch-sdk/RELEASING.md` for TestPyPI and PyPI release steps.
@@ -0,0 +1,17 @@
1
+ switch_sdk/__init__.py,sha256=43sqzn3EtDc8rScV4tjs9zTvwVlrwlZyrmR3tUqDMuo,2579
2
+ switch_sdk/auto_model.py,sha256=lvrHNkADCzN3uKDGm6z-wL8P9eLzwrj2GvsNjJfigiI,5361
3
+ switch_sdk/byok.py,sha256=Uynq-q28kg_bRtvDSSRMAG3aG8AQ2pkspfsmdZPawpU,8074
4
+ switch_sdk/client.py,sha256=NXROzL-sa3Q-QZD63CQ7IdllJmb-pty7Fs5gvbyh9nI,33531
5
+ switch_sdk/context.py,sha256=y3t1GgFWtTB3vTJSN4DJIaX5NNnZS9uIxla40LTVNWM,1617
6
+ switch_sdk/errors.py,sha256=fK1vVEXUrutPZ9It-eYmz0rHjosRDPUnXHJ-SXzuvoI,1080
7
+ switch_sdk/executorch_runtime.py,sha256=6nlvzEAUZco7BTSy8LcKsgXWsya9R2kp0cOmj2NbrkM,16114
8
+ switch_sdk/impact.py,sha256=zPIccclLbOmIcE9LbGljOwPn5aLTbteJHdEd4x9oPbg,5264
9
+ switch_sdk/local_models.py,sha256=PLAmPqXzX2kwq1jjOqulQJFz6ajmgTaqkGHBKgz6Yu8,12694
10
+ switch_sdk/local_runtime.py,sha256=-e7FGevl0YmQrP1XSSI8tSIOed6nidYkr7Z-BI_1oVU,1162
11
+ switch_sdk/models.py,sha256=34Fe5ZXYYi8qXS2pPlVybWu7INDj09kiy7QAXaxudT8,11354
12
+ switch_sdk/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
13
+ switch_sdk/telemetry.py,sha256=uts1GsodO4mwzOAu1VyeTDjrnwbAZaM6ADY1LxMDzKI,3240
14
+ shift_sdk-0.3.2.dist-info/METADATA,sha256=tUhsVkNsaRUGsCt-engpCsTozKxZrg8mw4QBPfy2DT0,10582
15
+ shift_sdk-0.3.2.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
16
+ shift_sdk-0.3.2.dist-info/top_level.txt,sha256=_lHYhnLFZh6ixzItr6dMfSS2QXWBbvpSCmluPo10ywk,11
17
+ shift_sdk-0.3.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ switch_sdk
switch_sdk/__init__.py ADDED
@@ -0,0 +1,96 @@
1
+ from importlib.metadata import PackageNotFoundError, version
2
+
3
+ from switch_sdk.auto_model import AutoModelSelection
4
+ from switch_sdk.byok import AzureBYOKConfig, AzureBYOKResolvedRequest, AzureRegionCredential
5
+ from switch_sdk.client import SwitchClient
6
+ from switch_sdk.context import switch_trace, trace_execution
7
+ from switch_sdk.errors import (
8
+ SwitchAPIError,
9
+ SwitchClientNotStartedError,
10
+ SwitchLocalModelError,
11
+ SwitchNetworkError,
12
+ SwitchSDKError,
13
+ SwitchTimeoutError,
14
+ )
15
+ from switch_sdk.executorch_runtime import (
16
+ ExecuTorchRuntimeConfig,
17
+ ExecuTorchTextRuntime,
18
+ build_executorch_text_runtime,
19
+ )
20
+ from switch_sdk.impact import ImpactEstimate, ModelImpactEstimator
21
+ from switch_sdk.local_models import HardwareProfile, LocalModelHandle, LocalModelManager, LocalModelSpec
22
+ from switch_sdk.local_runtime import LocalChatRuntime, default_stub_local_runtime
23
+ from switch_sdk.models import (
24
+ CarbonLiveResponse,
25
+ CandidateScoreBreakdown,
26
+ ChatChoice,
27
+ ChatChoiceMessage,
28
+ ChatCompletion,
29
+ ChatMessage,
30
+ ChatRequest,
31
+ ChatUsage,
32
+ DashboardFeedItem,
33
+ DashboardFeedResponse,
34
+ DashboardSummary,
35
+ DashboardSummaryResponse,
36
+ RouteDecision,
37
+ RouteRequest,
38
+ RouteTarget,
39
+ RoutingWeights,
40
+ TelemetryEvent,
41
+ TelemetryIngestResult,
42
+ )
43
+
44
+ try:
45
+ __version__ = version('shift-sdk')
46
+ except PackageNotFoundError:
47
+ try:
48
+ __version__ = version('switch-sdk')
49
+ except PackageNotFoundError:
50
+ __version__ = '0.0.0'
51
+
52
+ __all__ = [
53
+ '__version__',
54
+ 'AutoModelSelection',
55
+ 'AzureBYOKConfig',
56
+ 'AzureBYOKResolvedRequest',
57
+ 'AzureRegionCredential',
58
+ 'CarbonLiveResponse',
59
+ 'CandidateScoreBreakdown',
60
+ 'ChatChoice',
61
+ 'ChatChoiceMessage',
62
+ 'ChatCompletion',
63
+ 'ChatMessage',
64
+ 'ChatRequest',
65
+ 'ChatUsage',
66
+ 'DashboardFeedItem',
67
+ 'DashboardFeedResponse',
68
+ 'DashboardSummary',
69
+ 'DashboardSummaryResponse',
70
+ 'RouteDecision',
71
+ 'RouteRequest',
72
+ 'RouteTarget',
73
+ 'RoutingWeights',
74
+ 'SwitchAPIError',
75
+ 'SwitchClient',
76
+ 'SwitchClientNotStartedError',
77
+ 'SwitchLocalModelError',
78
+ 'SwitchNetworkError',
79
+ 'SwitchSDKError',
80
+ 'SwitchTimeoutError',
81
+ 'TelemetryEvent',
82
+ 'TelemetryIngestResult',
83
+ 'ExecuTorchRuntimeConfig',
84
+ 'ExecuTorchTextRuntime',
85
+ 'build_executorch_text_runtime',
86
+ 'ImpactEstimate',
87
+ 'ModelImpactEstimator',
88
+ 'HardwareProfile',
89
+ 'LocalModelSpec',
90
+ 'LocalModelHandle',
91
+ 'LocalModelManager',
92
+ 'LocalChatRuntime',
93
+ 'default_stub_local_runtime',
94
+ 'switch_trace',
95
+ 'trace_execution',
96
+ ]
@@ -0,0 +1,187 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+
7
+ from switch_sdk.models import ChatMessage
8
+
9
+ _URL_RE = re.compile(r'https?://\S+', re.IGNORECASE)
10
+ _CODE_HINT_RE = re.compile(
11
+ r'```|`[^`]+`|\b(def |class |function|import |select |insert |update |delete |create table|docker|kubernetes|terraform)\b',
12
+ re.IGNORECASE,
13
+ )
14
+ _SIMPLE_MATH_RE = re.compile(r'^[\d\s\+\-\*/\(\)\.=]+$')
15
+
16
+ _AUTO_MODEL_ALIASES = {'auto', 'shift-auto', 'router'}
17
+
18
+ _SIMPLE_PREFIXES = (
19
+ 'reply with exactly',
20
+ 'reply only with',
21
+ 'classify',
22
+ 'label this',
23
+ 'sentiment',
24
+ 'is this',
25
+ 'extract',
26
+ 'fix grammar',
27
+ 'correct grammar',
28
+ 'rewrite this sentence',
29
+ 'translate to',
30
+ )
31
+
32
+ _COMPLEX_HINTS = (
33
+ 'step by step',
34
+ 'detailed',
35
+ 'in depth',
36
+ 'architecture',
37
+ 'design a',
38
+ 'implement',
39
+ 'debug',
40
+ 'traceback',
41
+ 'root cause',
42
+ 'compare and contrast',
43
+ 'pros and cons',
44
+ 'research',
45
+ 'latest',
46
+ 'news',
47
+ 'financial advice',
48
+ 'medical advice',
49
+ 'legal advice',
50
+ 'long form',
51
+ 'write a blog',
52
+ 'essay',
53
+ )
54
+
55
+
56
+ @dataclass(slots=True)
57
+ class AutoModelSelection:
58
+ selected_model: str
59
+ reason: str
60
+ confidence: float
61
+ inspected_text: str
62
+ features: dict[str, Any]
63
+
64
+ def to_metadata(self) -> dict[str, Any]:
65
+ return {
66
+ 'selected_model': self.selected_model,
67
+ 'reason': self.reason,
68
+ 'confidence': self.confidence,
69
+ 'features': self.features,
70
+ 'inspected_text': self.inspected_text[:160],
71
+ }
72
+
73
+ def to_safe_metadata(self) -> dict[str, Any]:
74
+ payload = self.to_metadata()
75
+ payload.pop('inspected_text', None)
76
+ return payload
77
+
78
+
79
+ def is_auto_model_requested(model: str, capability_flags: dict[str, Any] | None = None) -> bool:
80
+ flags = capability_flags or {}
81
+ normalized = str(model or '').strip().lower()
82
+ return normalized in _AUTO_MODEL_ALIASES or bool(flags.get('auto_model'))
83
+
84
+
85
+ def select_model(
86
+ messages: list[ChatMessage],
87
+ capability_flags: dict[str, Any] | None = None,
88
+ *,
89
+ frontier_model: str = 'gpt-5',
90
+ mid_model: str = 'gpt-5-mini',
91
+ small_model: str = 'gpt-5-nano',
92
+ fast_model: str = 'gpt-4o-mini',
93
+ ) -> AutoModelSelection:
94
+ flags = capability_flags or {}
95
+
96
+ preferred = str(flags.get('preferred_model', '')).strip()
97
+ if preferred:
98
+ return AutoModelSelection(
99
+ selected_model=preferred,
100
+ reason='preferred_model capability flag',
101
+ confidence=1.0,
102
+ inspected_text='',
103
+ features={'preferred_model': preferred},
104
+ )
105
+
106
+ optimize_for = str(flags.get('optimize_for', '')).strip().lower()
107
+ text = _extract_primary_text(messages).strip()
108
+ normalized = text.lower()
109
+ words = len(normalized.split())
110
+ chars = len(text)
111
+ has_url = bool(_URL_RE.search(text))
112
+ has_code = bool(_CODE_HINT_RE.search(text))
113
+ has_complex_hint = any(hint in normalized for hint in _COMPLEX_HINTS)
114
+ is_simple_math = bool(_SIMPLE_MATH_RE.match(normalized))
115
+ simple_greeting = normalized in {'hi', 'hello', 'hey', 'thanks', 'thank you'}
116
+ simple_prefix = normalized.startswith(_SIMPLE_PREFIXES)
117
+
118
+ features = {
119
+ 'optimize_for': optimize_for,
120
+ 'word_count': words,
121
+ 'char_count': chars,
122
+ 'contains_url': has_url,
123
+ 'contains_code_hint': has_code,
124
+ 'contains_complex_hint': has_complex_hint,
125
+ }
126
+
127
+ if flags.get('vision') or flags.get('tool_use'):
128
+ return AutoModelSelection(
129
+ selected_model=frontier_model,
130
+ reason='advanced capabilities requested',
131
+ confidence=0.95,
132
+ inspected_text=text,
133
+ features=features,
134
+ )
135
+
136
+ if optimize_for == 'latency':
137
+ return AutoModelSelection(
138
+ selected_model=fast_model,
139
+ reason='latency optimization requested',
140
+ confidence=0.85,
141
+ inspected_text=text,
142
+ features=features,
143
+ )
144
+
145
+ if optimize_for == 'cost':
146
+ return AutoModelSelection(
147
+ selected_model=small_model,
148
+ reason='cost optimization requested',
149
+ confidence=0.85,
150
+ inspected_text=text,
151
+ features=features,
152
+ )
153
+
154
+ if has_code or has_url or has_complex_hint or words > 90 or chars > 500:
155
+ return AutoModelSelection(
156
+ selected_model=frontier_model,
157
+ reason='complex prompt characteristics',
158
+ confidence=0.85,
159
+ inspected_text=text,
160
+ features=features,
161
+ )
162
+
163
+ if is_simple_math or simple_greeting or simple_prefix or (words <= 20 and chars <= 120):
164
+ return AutoModelSelection(
165
+ selected_model=small_model,
166
+ reason='lightweight prompt characteristics',
167
+ confidence=0.8,
168
+ inspected_text=text,
169
+ features=features,
170
+ )
171
+
172
+ return AutoModelSelection(
173
+ selected_model=mid_model,
174
+ reason='default balanced prompt profile',
175
+ confidence=0.7,
176
+ inspected_text=text,
177
+ features=features,
178
+ )
179
+
180
+
181
+ def _extract_primary_text(messages: list[ChatMessage]) -> str:
182
+ for message in reversed(messages):
183
+ if message.role == 'user':
184
+ return message.content
185
+ if messages:
186
+ return messages[-1].content
187
+ return ''