iac-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. iac_code/__init__.py +2 -0
  2. iac_code/acp/__init__.py +97 -0
  3. iac_code/acp/convert.py +423 -0
  4. iac_code/acp/http_sse.py +448 -0
  5. iac_code/acp/mcp.py +54 -0
  6. iac_code/acp/metrics.py +71 -0
  7. iac_code/acp/server.py +662 -0
  8. iac_code/acp/session.py +446 -0
  9. iac_code/acp/slash_registry.py +125 -0
  10. iac_code/acp/state.py +99 -0
  11. iac_code/acp/tools.py +112 -0
  12. iac_code/acp/types.py +13 -0
  13. iac_code/acp/version.py +26 -0
  14. iac_code/agent/__init__.py +19 -0
  15. iac_code/agent/agent_loop.py +640 -0
  16. iac_code/agent/agent_tool.py +269 -0
  17. iac_code/agent/agent_types.py +87 -0
  18. iac_code/agent/message.py +153 -0
  19. iac_code/agent/system_prompt.py +313 -0
  20. iac_code/cli/__init__.py +3 -0
  21. iac_code/cli/headless.py +114 -0
  22. iac_code/cli/main.py +246 -0
  23. iac_code/cli/output_formats.py +125 -0
  24. iac_code/commands/__init__.py +93 -0
  25. iac_code/commands/auth.py +1055 -0
  26. iac_code/commands/clear.py +34 -0
  27. iac_code/commands/compact.py +43 -0
  28. iac_code/commands/debug.py +45 -0
  29. iac_code/commands/effort.py +116 -0
  30. iac_code/commands/exit.py +10 -0
  31. iac_code/commands/help.py +49 -0
  32. iac_code/commands/model.py +130 -0
  33. iac_code/commands/registry.py +245 -0
  34. iac_code/commands/resume.py +49 -0
  35. iac_code/commands/tasks.py +41 -0
  36. iac_code/config.py +304 -0
  37. iac_code/i18n/__init__.py +141 -0
  38. iac_code/i18n/locales/zh/LC_MESSAGES/messages.po +1355 -0
  39. iac_code/memory/__init__.py +1 -0
  40. iac_code/memory/memory_manager.py +92 -0
  41. iac_code/memory/memory_tools.py +88 -0
  42. iac_code/providers/__init__.py +1 -0
  43. iac_code/providers/anthropic_provider.py +284 -0
  44. iac_code/providers/base.py +128 -0
  45. iac_code/providers/dashscope_provider.py +47 -0
  46. iac_code/providers/deepseek_provider.py +36 -0
  47. iac_code/providers/manager.py +399 -0
  48. iac_code/providers/openai_provider.py +344 -0
  49. iac_code/providers/retry.py +58 -0
  50. iac_code/providers/stream_watchdog.py +47 -0
  51. iac_code/providers/thinking.py +164 -0
  52. iac_code/services/__init__.py +1 -0
  53. iac_code/services/agent_factory.py +127 -0
  54. iac_code/services/cloud_credentials.py +22 -0
  55. iac_code/services/context_manager.py +221 -0
  56. iac_code/services/providers/__init__.py +1 -0
  57. iac_code/services/providers/aliyun.py +232 -0
  58. iac_code/services/session_index.py +281 -0
  59. iac_code/services/session_storage.py +245 -0
  60. iac_code/services/telemetry/__init__.py +66 -0
  61. iac_code/services/telemetry/attributes.py +84 -0
  62. iac_code/services/telemetry/client.py +330 -0
  63. iac_code/services/telemetry/config.py +76 -0
  64. iac_code/services/telemetry/constants.py +75 -0
  65. iac_code/services/telemetry/content_serializer.py +124 -0
  66. iac_code/services/telemetry/events.py +42 -0
  67. iac_code/services/telemetry/fallback.py +59 -0
  68. iac_code/services/telemetry/identity.py +73 -0
  69. iac_code/services/telemetry/metrics.py +62 -0
  70. iac_code/services/telemetry/names.py +199 -0
  71. iac_code/services/telemetry/sanitize.py +88 -0
  72. iac_code/services/telemetry/sink.py +67 -0
  73. iac_code/services/telemetry/tracing.py +38 -0
  74. iac_code/services/telemetry/types.py +13 -0
  75. iac_code/services/token_budget.py +54 -0
  76. iac_code/services/token_counter.py +76 -0
  77. iac_code/skills/__init__.py +1 -0
  78. iac_code/skills/bundled/__init__.py +94 -0
  79. iac_code/skills/bundled/iac_aliyun/SKILL.md +192 -0
  80. iac_code/skills/bundled/iac_aliyun/__init__.py +16 -0
  81. iac_code/skills/bundled/iac_aliyun/references/cloud-products/ecs.md +167 -0
  82. iac_code/skills/bundled/iac_aliyun/references/cloud-products/oss.md +69 -0
  83. iac_code/skills/bundled/iac_aliyun/references/cloud-products/rds.md +95 -0
  84. iac_code/skills/bundled/iac_aliyun/references/cloud-products/redis.md +100 -0
  85. iac_code/skills/bundled/iac_aliyun/references/cloud-products/slb.md +60 -0
  86. iac_code/skills/bundled/iac_aliyun/references/cloud-products/vpc.md +54 -0
  87. iac_code/skills/bundled/iac_aliyun/references/ros-template.md +155 -0
  88. iac_code/skills/bundled/iac_aliyun/references/template-parameters.md +206 -0
  89. iac_code/skills/bundled/iac_aliyun/references/terraform-template.md +101 -0
  90. iac_code/skills/bundled/iac_aliyun/scripts/tf2ros.py +77 -0
  91. iac_code/skills/bundled/simplify.py +28 -0
  92. iac_code/skills/discovery.py +136 -0
  93. iac_code/skills/frontmatter.py +119 -0
  94. iac_code/skills/listing.py +92 -0
  95. iac_code/skills/loader.py +42 -0
  96. iac_code/skills/processor.py +81 -0
  97. iac_code/skills/renderer.py +157 -0
  98. iac_code/skills/skill_definition.py +82 -0
  99. iac_code/skills/skill_tool.py +261 -0
  100. iac_code/state/__init__.py +5 -0
  101. iac_code/state/app_state.py +122 -0
  102. iac_code/tasks/__init__.py +1 -0
  103. iac_code/tasks/notification_queue.py +28 -0
  104. iac_code/tasks/task_state.py +66 -0
  105. iac_code/tasks/task_tools.py +114 -0
  106. iac_code/tools/__init__.py +8 -0
  107. iac_code/tools/base.py +226 -0
  108. iac_code/tools/bash.py +133 -0
  109. iac_code/tools/cloud/__init__.py +0 -0
  110. iac_code/tools/cloud/aliyun/__init__.py +0 -0
  111. iac_code/tools/cloud/aliyun/aliyun_api.py +510 -0
  112. iac_code/tools/cloud/aliyun/aliyun_doc_search.py +145 -0
  113. iac_code/tools/cloud/aliyun/endpoints.yml +343 -0
  114. iac_code/tools/cloud/aliyun/ros_client.py +56 -0
  115. iac_code/tools/cloud/aliyun/ros_stack.py +633 -0
  116. iac_code/tools/cloud/aliyun/ros_stack_instances.py +247 -0
  117. iac_code/tools/cloud/base_api.py +162 -0
  118. iac_code/tools/cloud/base_stack.py +242 -0
  119. iac_code/tools/cloud/registry.py +20 -0
  120. iac_code/tools/cloud/types.py +105 -0
  121. iac_code/tools/edit_file.py +121 -0
  122. iac_code/tools/glob.py +103 -0
  123. iac_code/tools/grep.py +254 -0
  124. iac_code/tools/list_files.py +104 -0
  125. iac_code/tools/read_file.py +127 -0
  126. iac_code/tools/result_storage.py +39 -0
  127. iac_code/tools/tool_executor.py +165 -0
  128. iac_code/tools/web_fetch.py +177 -0
  129. iac_code/tools/write_file.py +88 -0
  130. iac_code/types/__init__.py +40 -0
  131. iac_code/types/permissions.py +26 -0
  132. iac_code/types/skill_source.py +11 -0
  133. iac_code/types/stream_events.py +227 -0
  134. iac_code/ui/__init__.py +5 -0
  135. iac_code/ui/banner.py +110 -0
  136. iac_code/ui/components/__init__.py +0 -0
  137. iac_code/ui/components/dialog.py +142 -0
  138. iac_code/ui/components/divider.py +20 -0
  139. iac_code/ui/components/fuzzy_picker.py +308 -0
  140. iac_code/ui/components/progress_bar.py +54 -0
  141. iac_code/ui/components/search_box.py +165 -0
  142. iac_code/ui/components/select.py +319 -0
  143. iac_code/ui/components/status_icon.py +42 -0
  144. iac_code/ui/components/tabs.py +128 -0
  145. iac_code/ui/core/__init__.py +0 -0
  146. iac_code/ui/core/in_place_render.py +129 -0
  147. iac_code/ui/core/input_history.py +118 -0
  148. iac_code/ui/core/key_event.py +41 -0
  149. iac_code/ui/core/prompt_input.py +507 -0
  150. iac_code/ui/core/raw_input.py +302 -0
  151. iac_code/ui/core/screen.py +80 -0
  152. iac_code/ui/dialogs/__init__.py +0 -0
  153. iac_code/ui/dialogs/global_search.py +178 -0
  154. iac_code/ui/dialogs/history_search.py +100 -0
  155. iac_code/ui/dialogs/model_picker.py +280 -0
  156. iac_code/ui/dialogs/quick_open.py +108 -0
  157. iac_code/ui/dialogs/resume_picker.py +749 -0
  158. iac_code/ui/keybindings/__init__.py +0 -0
  159. iac_code/ui/keybindings/manager.py +124 -0
  160. iac_code/ui/renderer.py +1535 -0
  161. iac_code/ui/repl.py +772 -0
  162. iac_code/ui/spinner.py +112 -0
  163. iac_code/ui/suggestions/__init__.py +0 -0
  164. iac_code/ui/suggestions/aggregator.py +171 -0
  165. iac_code/ui/suggestions/command_provider.py +43 -0
  166. iac_code/ui/suggestions/directory_provider.py +95 -0
  167. iac_code/ui/suggestions/file_provider.py +121 -0
  168. iac_code/ui/suggestions/shell_history_provider.py +108 -0
  169. iac_code/ui/suggestions/token_extractor.py +77 -0
  170. iac_code/ui/suggestions/types.py +45 -0
  171. iac_code/ui/transcript_view.py +199 -0
  172. iac_code/utils/__init__.py +0 -0
  173. iac_code/utils/background_housekeeping.py +53 -0
  174. iac_code/utils/cleanup.py +68 -0
  175. iac_code/utils/json_utils.py +60 -0
  176. iac_code/utils/log.py +150 -0
  177. iac_code/utils/project_paths.py +74 -0
  178. iac_code/utils/tool_input_parser.py +62 -0
  179. iac_code-0.1.0.dist-info/LICENSE +201 -0
  180. iac_code-0.1.0.dist-info/METADATA +64 -0
  181. iac_code-0.1.0.dist-info/RECORD +184 -0
  182. iac_code-0.1.0.dist-info/WHEEL +5 -0
  183. iac_code-0.1.0.dist-info/entry_points.txt +2 -0
  184. iac_code-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,399 @@
1
+ """Provider selection, streaming fallback with tombstone, and model degradation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from collections.abc import AsyncGenerator
7
+
8
+ from loguru import logger
9
+
10
+ from iac_code.config import _KEY_NAME_TO_CRED_SLOT as _KEY_TO_PROVIDER
11
+ from iac_code.providers.base import Message, NonStreamingResponse, Provider, ToolDefinition
12
+ from iac_code.providers.retry import RetryableError, RetryConfig, with_retry
13
+ from iac_code.providers.stream_watchdog import StreamWatchdog
14
+ from iac_code.services.telemetry import add_metric, get_session_id, log_event, start_span
15
+ from iac_code.services.telemetry.config import should_capture_content_on_span
16
+ from iac_code.services.telemetry.content_serializer import (
17
+ serialize_input_messages,
18
+ serialize_system_instructions,
19
+ serialize_tool_definitions,
20
+ )
21
+ from iac_code.services.telemetry.names import (
22
+ Events,
23
+ GenAiAttr,
24
+ GenAiOperationName,
25
+ GenAiSpanKind,
26
+ Metrics,
27
+ Spans,
28
+ )
29
+ from iac_code.services.telemetry.sanitize import sanitize_error_message, sanitize_model_name
30
+ from iac_code.types.stream_events import (
31
+ ErrorEvent,
32
+ MessageEndEvent,
33
+ MessageStartEvent,
34
+ StreamEvent,
35
+ TextDeltaEvent,
36
+ ThinkingDeltaEvent,
37
+ TombstoneEvent,
38
+ ToolUseEndEvent,
39
+ ToolUseStartEvent,
40
+ )
41
+
42
+ MODEL_FALLBACK_MAP = {
43
+ "claude-opus-4-7": "claude-haiku-4-5-20251001",
44
+ "claude-opus-4-6": "claude-haiku-4-5-20251001",
45
+ "claude-sonnet-4-6": "claude-haiku-4-5-20251001",
46
+ "claude-sonnet-4-6-1m": "claude-haiku-4-5-20251001",
47
+ "gpt-5.5": "gpt-5.4",
48
+ "gpt-5.4": "gpt-5.4-mini",
49
+ "qwen3.6-plus": "qwen3.5-plus",
50
+ "deepseek-v4-pro": "deepseek-v4-flash",
51
+ }
52
+
53
+
54
+ def _detect_provider_name(model: str) -> str:
55
+ """Detect provider from saved settings.yml (set by /auth or /model).
56
+
57
+ The active provider is always determined by the saved config,
58
+ never by matching model names — different providers can share model names.
59
+ """
60
+ from iac_code.config import get_active_provider_key
61
+
62
+ key_name = get_active_provider_key() or ""
63
+ if key_name in _KEY_TO_PROVIDER:
64
+ return _KEY_TO_PROVIDER[key_name]
65
+ raise ValueError(f"Cannot determine provider for model: {model}. Run /auth to configure.")
66
+
67
+
68
+ def create_provider(model: str, credentials: dict[str, str]) -> Provider:
69
+ provider_name = _detect_provider_name(model)
70
+ if provider_name == "anthropic":
71
+ from iac_code.config import get_provider_config
72
+ from iac_code.providers.anthropic_provider import AnthropicProvider
73
+
74
+ effort = get_provider_config("anthropic").get("effort")
75
+ return AnthropicProvider(
76
+ model=model,
77
+ api_key=credentials.get("anthropic"),
78
+ effort=effort if isinstance(effort, str) else None,
79
+ )
80
+ elif provider_name == "openai":
81
+ from iac_code.config import get_provider_config
82
+ from iac_code.providers.openai_provider import OpenAIProvider
83
+
84
+ effort = get_provider_config("openai").get("effort")
85
+ return OpenAIProvider(
86
+ model=model,
87
+ api_key=credentials.get("openai"),
88
+ effort=effort if isinstance(effort, str) else None,
89
+ )
90
+ elif provider_name == "dashscope":
91
+ from iac_code.config import get_provider_config
92
+ from iac_code.providers.dashscope_provider import DashScopeProvider
93
+
94
+ effort = get_provider_config("dashscope").get("effort")
95
+ return DashScopeProvider(
96
+ model=model,
97
+ api_key=credentials.get("dashscope"),
98
+ effort=effort if isinstance(effort, str) else None,
99
+ )
100
+ elif provider_name == "dashscope_token_plan":
101
+ from iac_code.config import get_provider_config
102
+ from iac_code.providers.dashscope_provider import (
103
+ DASHSCOPE_TOKEN_PLAN_BASE_URL,
104
+ DashScopeProvider,
105
+ )
106
+
107
+ effort = get_provider_config("dashscope_token_plan").get("effort")
108
+ return DashScopeProvider(
109
+ model=model,
110
+ api_key=credentials.get("dashscope_token_plan"),
111
+ effort=effort if isinstance(effort, str) else None,
112
+ base_url=DASHSCOPE_TOKEN_PLAN_BASE_URL,
113
+ provider_key="dashscope_token_plan",
114
+ )
115
+ elif provider_name == "deepseek":
116
+ from iac_code.config import get_provider_config
117
+ from iac_code.providers.deepseek_provider import DeepSeekProvider
118
+
119
+ effort = get_provider_config("deepseek").get("effort")
120
+ return DeepSeekProvider(
121
+ model=model,
122
+ api_key=credentials.get("deepseek"),
123
+ effort=effort if isinstance(effort, str) else None,
124
+ )
125
+ elif provider_name == "openapi_compatible":
126
+ from iac_code.config import get_provider_config
127
+ from iac_code.providers.openai_provider import OpenAIProvider
128
+
129
+ api_base = get_provider_config("openapi_compatible").get("apiBase")
130
+ return OpenAIProvider(model=model, api_key=credentials.get("openapi_compatible"), base_url=api_base)
131
+ raise ValueError(f"Unknown provider: {provider_name}")
132
+
133
+
134
+ class ProviderManager:
135
+ """Manages provider lifecycle, streaming fallback, and model degradation.
136
+ When streaming fails mid-way:
137
+ 1. Yield TombstoneEvents for orphaned partial messages
138
+ 2. Fall back to non-streaming complete() call
139
+ 3. Yield the complete response as events
140
+ """
141
+
142
+ def __init__(
143
+ self,
144
+ model: str,
145
+ credentials: dict[str, str],
146
+ retry_config: RetryConfig | None = None,
147
+ stream_idle_timeout: float = 90.0,
148
+ ):
149
+ self._model = model
150
+ self._credentials = credentials
151
+ self._retry_config = retry_config or RetryConfig()
152
+ self._stream_idle_timeout = stream_idle_timeout
153
+ # Lazy: first startup may have no active provider yet. Defer errors
154
+ # until the user actually tries to send a message, so /auth is reachable.
155
+ self._provider: Provider | None = None
156
+ try:
157
+ self._provider = create_provider(model, credentials)
158
+ except ValueError as e:
159
+ logger.warning(f"Provider not configured yet: {e}")
160
+
161
+ def _ensure_provider(self) -> Provider:
162
+ if self._provider is None:
163
+ self._provider = create_provider(self._model, self._credentials)
164
+ return self._provider
165
+
166
+ def reconfigure(self, model: str, credentials: dict[str, str]) -> None:
167
+ """Switch model and credentials in place.
168
+
169
+ Used by `/auth` and `/model` so every consumer holding this manager
170
+ (REPL, AgentTool, SkillTool) picks up the change without re-wiring.
171
+ The underlying provider is reset and lazily recreated on next use,
172
+ so reconfiguring while no provider is active stays cheap.
173
+ """
174
+ self._model = model
175
+ self._credentials = credentials
176
+ self._provider = None
177
+ try:
178
+ self._provider = create_provider(model, credentials)
179
+ except ValueError as e:
180
+ logger.warning(f"Provider not configured after reconfigure: {e}")
181
+
182
+ def get_model_name(self) -> str:
183
+ return self._model
184
+
185
+ def _get_fallback_model(self) -> str | None:
186
+ return MODEL_FALLBACK_MAP.get(self._model)
187
+
188
+ async def stream(
189
+ self, messages: list[Message], system: str, tools: list[ToolDefinition] | None = None, max_tokens: int = 8192
190
+ ) -> AsyncGenerator[StreamEvent, None]:
191
+ provider = self._ensure_provider()
192
+ provider_name = type(provider).__name__.replace("Provider", "").lower()
193
+ sanitized_model = sanitize_model_name(self._model)
194
+
195
+ log_event(
196
+ Events.API_REQUEST_STARTED,
197
+ {
198
+ "provider": provider_name,
199
+ "model": sanitized_model,
200
+ "message_count": len(messages),
201
+ },
202
+ )
203
+ started = time.monotonic()
204
+
205
+ span_name = f"{Spans.LLM_CHAT} {self._model}"
206
+ span_attrs = {
207
+ GenAiAttr.SPAN_KIND: GenAiSpanKind.LLM,
208
+ GenAiAttr.OPERATION_NAME: GenAiOperationName.CHAT,
209
+ GenAiAttr.PROVIDER_NAME: provider_name,
210
+ GenAiAttr.REQUEST_MODEL: self._model,
211
+ GenAiAttr.REQUEST_MAX_TOKENS: max_tokens,
212
+ GenAiAttr.CONVERSATION_ID: get_session_id(),
213
+ GenAiAttr.OUTPUT_TYPE: "text",
214
+ }
215
+ if should_capture_content_on_span():
216
+ span_attrs[GenAiAttr.INPUT_MESSAGES] = serialize_input_messages(messages)
217
+ span_attrs[GenAiAttr.SYSTEM_INSTRUCTIONS] = serialize_system_instructions(system)
218
+ if tools:
219
+ span_attrs[GenAiAttr.TOOL_DEFINITIONS] = serialize_tool_definitions(tools)
220
+
221
+ with start_span(span_name, span_attrs) as span:
222
+ orphaned_message_ids: list[str] = []
223
+ streaming_failed = False
224
+ first_token_received = False
225
+ try:
226
+ watchdog = StreamWatchdog(idle_timeout=self._stream_idle_timeout)
227
+ watchdog.start()
228
+ async for event in provider.stream(messages, system, tools, max_tokens):
229
+ watchdog.ping()
230
+ if isinstance(event, MessageStartEvent):
231
+ orphaned_message_ids.append(event.message_id)
232
+ span.set_attribute(GenAiAttr.RESPONSE_ID, event.message_id)
233
+ elif isinstance(event, TextDeltaEvent) and not first_token_received:
234
+ first_token_received = True
235
+ ttft_ns = int((time.monotonic() - started) * 1_000_000_000)
236
+ span.set_attribute(GenAiAttr.RESPONSE_TIME_TO_FIRST_TOKEN, ttft_ns)
237
+ yield event
238
+ if isinstance(event, MessageEndEvent):
239
+ watchdog.stop()
240
+ self._set_llm_response_span_attrs(span, event, self._model)
241
+ self._emit_success_telemetry(provider_name, sanitized_model, started, event.usage)
242
+ return
243
+ except Exception as e:
244
+ streaming_failed = True
245
+ logger.warning(f"Streaming failed, falling back to non-streaming: {e}")
246
+ if streaming_failed:
247
+ for msg_id in orphaned_message_ids:
248
+ yield TombstoneEvent(message_id=msg_id)
249
+ try:
250
+ response = await self._complete_with_retry(messages, system, tools, max_tokens)
251
+ except Exception as e:
252
+ self._emit_failure_telemetry(provider_name, sanitized_model, started, e)
253
+ yield ErrorEvent(error=str(e), is_retryable=False)
254
+ return
255
+ span.set_attribute(GenAiAttr.RESPONSE_ID, response.message_id)
256
+ self._set_llm_response_span_attrs_from_response(span, response, self._model)
257
+ self._emit_success_telemetry(provider_name, sanitized_model, started, response.usage)
258
+ yield MessageStartEvent(message_id=response.message_id)
259
+ if response.thinking:
260
+ yield ThinkingDeltaEvent(text=response.thinking)
261
+ if response.text:
262
+ yield TextDeltaEvent(text=response.text)
263
+ for tu in response.tool_uses:
264
+ yield ToolUseStartEvent(tool_use_id=tu["id"], name=tu["name"])
265
+ yield ToolUseEndEvent(tool_use_id=tu["id"], input=tu["input"])
266
+ yield MessageEndEvent(stop_reason=response.stop_reason, usage=response.usage)
267
+
268
+ @staticmethod
269
+ def _set_llm_response_span_attrs(span, end_event: MessageEndEvent, model: str) -> None:
270
+ usage = end_event.usage
271
+ span.set_attribute(GenAiAttr.RESPONSE_MODEL, model)
272
+ span.set_attribute(GenAiAttr.RESPONSE_FINISH_REASONS, [end_event.stop_reason])
273
+ span.set_attribute(GenAiAttr.USAGE_INPUT_TOKENS, usage.input_tokens)
274
+ span.set_attribute(GenAiAttr.USAGE_OUTPUT_TOKENS, usage.output_tokens)
275
+ total = usage.input_tokens + usage.output_tokens
276
+ span.set_attribute(GenAiAttr.USAGE_TOTAL_TOKENS, total)
277
+ if usage.cache_creation_input_tokens:
278
+ span.set_attribute(GenAiAttr.USAGE_CACHE_CREATION_INPUT_TOKENS, usage.cache_creation_input_tokens)
279
+ if usage.cache_read_input_tokens:
280
+ span.set_attribute(GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS, usage.cache_read_input_tokens)
281
+
282
+ @staticmethod
283
+ def _set_llm_response_span_attrs_from_response(span, response: NonStreamingResponse, model: str) -> None:
284
+ usage = response.usage
285
+ span.set_attribute(GenAiAttr.RESPONSE_MODEL, model)
286
+ span.set_attribute(GenAiAttr.RESPONSE_FINISH_REASONS, [response.stop_reason])
287
+ span.set_attribute(GenAiAttr.USAGE_INPUT_TOKENS, usage.input_tokens)
288
+ span.set_attribute(GenAiAttr.USAGE_OUTPUT_TOKENS, usage.output_tokens)
289
+ total = usage.input_tokens + usage.output_tokens
290
+ span.set_attribute(GenAiAttr.USAGE_TOTAL_TOKENS, total)
291
+ if usage.cache_creation_input_tokens:
292
+ span.set_attribute(GenAiAttr.USAGE_CACHE_CREATION_INPUT_TOKENS, usage.cache_creation_input_tokens)
293
+ if usage.cache_read_input_tokens:
294
+ span.set_attribute(GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS, usage.cache_read_input_tokens)
295
+
296
+ @staticmethod
297
+ def _emit_success_telemetry(provider_name: str, model: str, started: float, usage) -> None:
298
+ duration_ms = int((time.monotonic() - started) * 1000)
299
+ log_event(
300
+ Events.API_REQUEST_SUCCEEDED,
301
+ {
302
+ "provider": provider_name,
303
+ "model": model,
304
+ "duration_ms": duration_ms,
305
+ "input_tokens": usage.input_tokens,
306
+ "output_tokens": usage.output_tokens,
307
+ "cache_read_tokens": usage.cache_read_input_tokens,
308
+ "cache_create_tokens": usage.cache_creation_input_tokens,
309
+ },
310
+ )
311
+ add_metric(Metrics.API_REQUEST_COUNT, 1, {"provider": provider_name, "model": model, "status": "ok"})
312
+ add_metric(Metrics.API_REQUEST_DURATION, duration_ms, {"provider": provider_name, "model": model})
313
+ for token_type, count in (
314
+ ("input", usage.input_tokens),
315
+ ("output", usage.output_tokens),
316
+ ("cache_read", usage.cache_read_input_tokens or 0),
317
+ ("cache_create", usage.cache_creation_input_tokens or 0),
318
+ ):
319
+ if count:
320
+ add_metric(Metrics.TOKEN_USAGE, count, {"type": token_type, "provider": provider_name, "model": model})
321
+
322
+ @staticmethod
323
+ def _emit_failure_telemetry(provider_name: str, model: str, started: float, exc: Exception) -> None:
324
+ duration_ms = int((time.monotonic() - started) * 1000)
325
+ log_event(
326
+ Events.API_REQUEST_FAILED,
327
+ {
328
+ "provider": provider_name,
329
+ "model": model,
330
+ "error_type": type(exc).__name__,
331
+ "duration_ms": duration_ms,
332
+ "error_message": sanitize_error_message(str(exc)),
333
+ },
334
+ )
335
+ add_metric(
336
+ Metrics.API_REQUEST_COUNT,
337
+ 1,
338
+ {"provider": provider_name, "model": model, "status": "error", "error_type": type(exc).__name__},
339
+ )
340
+
341
+ async def complete(
342
+ self, messages: list[Message], system: str, tools: list[ToolDefinition] | None = None, max_tokens: int = 8192
343
+ ) -> NonStreamingResponse:
344
+ return await self._complete_with_retry(messages, system, tools, max_tokens, is_fallback=False)
345
+
346
+ async def _complete_with_retry(
347
+ self, messages, system, tools, max_tokens, is_fallback=False
348
+ ) -> NonStreamingResponse:
349
+ provider = self._ensure_provider()
350
+ provider_name = type(provider).__name__.replace("Provider", "").lower()
351
+ sanitized_model = sanitize_model_name(self._model)
352
+
353
+ async def _on_retry(attempt, exc, delay):
354
+ log_event(
355
+ Events.API_REQUEST_RETRIED,
356
+ {
357
+ "provider": provider_name,
358
+ "model": sanitized_model,
359
+ "attempt": attempt,
360
+ "error_type": type(exc).__name__,
361
+ },
362
+ )
363
+
364
+ async def operation():
365
+ try:
366
+ return await provider.complete(messages, system, tools, max_tokens)
367
+ except Exception as e:
368
+ status = getattr(e, "status_code", None) or getattr(e, "status", None)
369
+ if status and status in {408, 409, 429, 500, 502, 503, 529}:
370
+ raise RetryableError(str(e), status_code=status) from e
371
+ if isinstance(e, (ConnectionError, TimeoutError, OSError)):
372
+ raise RetryableError(str(e)) from e
373
+ raise
374
+
375
+ try:
376
+ return await with_retry(operation, self._retry_config, on_retry=_on_retry)
377
+ except Exception as original_exc:
378
+ if not is_fallback:
379
+ fallback = self._get_fallback_model()
380
+ if fallback is not None:
381
+ original_model = self._model
382
+ original_provider = self._provider
383
+ log_event(
384
+ Events.MODEL_FALLBACK_TRIGGERED,
385
+ {
386
+ "from_model": sanitized_model,
387
+ "to_model": sanitize_model_name(fallback),
388
+ "reason": "model_degradation",
389
+ },
390
+ )
391
+ self._model = fallback
392
+ self._provider = create_provider(fallback, self._credentials)
393
+ try:
394
+ return await self._complete_with_retry(messages, system, tools, max_tokens, is_fallback=True)
395
+ except Exception:
396
+ self._model = original_model
397
+ self._provider = original_provider
398
+ raise original_exc from None
399
+ raise