PyPI - iac-code - Versions diffs - 0.1.0__py3-none-any.whl - Mend

iac-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

iac_code/__init__.py +2 -0
iac_code/acp/__init__.py +97 -0
iac_code/acp/convert.py +423 -0
iac_code/acp/http_sse.py +448 -0
iac_code/acp/mcp.py +54 -0
iac_code/acp/metrics.py +71 -0
iac_code/acp/server.py +662 -0
iac_code/acp/session.py +446 -0
iac_code/acp/slash_registry.py +125 -0
iac_code/acp/state.py +99 -0
iac_code/acp/tools.py +112 -0
iac_code/acp/types.py +13 -0
iac_code/acp/version.py +26 -0
iac_code/agent/__init__.py +19 -0
iac_code/agent/agent_loop.py +640 -0
iac_code/agent/agent_tool.py +269 -0
iac_code/agent/agent_types.py +87 -0
iac_code/agent/message.py +153 -0
iac_code/agent/system_prompt.py +313 -0
iac_code/cli/__init__.py +3 -0
iac_code/cli/headless.py +114 -0
iac_code/cli/main.py +246 -0
iac_code/cli/output_formats.py +125 -0
iac_code/commands/__init__.py +93 -0
iac_code/commands/auth.py +1055 -0
iac_code/commands/clear.py +34 -0
iac_code/commands/compact.py +43 -0
iac_code/commands/debug.py +45 -0
iac_code/commands/effort.py +116 -0
iac_code/commands/exit.py +10 -0
iac_code/commands/help.py +49 -0
iac_code/commands/model.py +130 -0
iac_code/commands/registry.py +245 -0
iac_code/commands/resume.py +49 -0
iac_code/commands/tasks.py +41 -0
iac_code/config.py +304 -0
iac_code/i18n/__init__.py +141 -0
iac_code/i18n/locales/zh/LC_MESSAGES/messages.po +1355 -0
iac_code/memory/__init__.py +1 -0
iac_code/memory/memory_manager.py +92 -0
iac_code/memory/memory_tools.py +88 -0
iac_code/providers/__init__.py +1 -0
iac_code/providers/anthropic_provider.py +284 -0
iac_code/providers/base.py +128 -0
iac_code/providers/dashscope_provider.py +47 -0
iac_code/providers/deepseek_provider.py +36 -0
iac_code/providers/manager.py +399 -0
iac_code/providers/openai_provider.py +344 -0
iac_code/providers/retry.py +58 -0
iac_code/providers/stream_watchdog.py +47 -0
iac_code/providers/thinking.py +164 -0
iac_code/services/__init__.py +1 -0
iac_code/services/agent_factory.py +127 -0
iac_code/services/cloud_credentials.py +22 -0
iac_code/services/context_manager.py +221 -0
iac_code/services/providers/__init__.py +1 -0
iac_code/services/providers/aliyun.py +232 -0
iac_code/services/session_index.py +281 -0
iac_code/services/session_storage.py +245 -0
iac_code/services/telemetry/__init__.py +66 -0
iac_code/services/telemetry/attributes.py +84 -0
iac_code/services/telemetry/client.py +330 -0
iac_code/services/telemetry/config.py +76 -0
iac_code/services/telemetry/constants.py +75 -0
iac_code/services/telemetry/content_serializer.py +124 -0
iac_code/services/telemetry/events.py +42 -0
iac_code/services/telemetry/fallback.py +59 -0
iac_code/services/telemetry/identity.py +73 -0
iac_code/services/telemetry/metrics.py +62 -0
iac_code/services/telemetry/names.py +199 -0
iac_code/services/telemetry/sanitize.py +88 -0
iac_code/services/telemetry/sink.py +67 -0
iac_code/services/telemetry/tracing.py +38 -0
iac_code/services/telemetry/types.py +13 -0
iac_code/services/token_budget.py +54 -0
iac_code/services/token_counter.py +76 -0
iac_code/skills/__init__.py +1 -0
iac_code/skills/bundled/__init__.py +94 -0
iac_code/skills/bundled/iac_aliyun/SKILL.md +192 -0
iac_code/skills/bundled/iac_aliyun/__init__.py +16 -0
iac_code/skills/bundled/iac_aliyun/references/cloud-products/ecs.md +167 -0
iac_code/skills/bundled/iac_aliyun/references/cloud-products/oss.md +69 -0
iac_code/skills/bundled/iac_aliyun/references/cloud-products/rds.md +95 -0
iac_code/skills/bundled/iac_aliyun/references/cloud-products/redis.md +100 -0
iac_code/skills/bundled/iac_aliyun/references/cloud-products/slb.md +60 -0
iac_code/skills/bundled/iac_aliyun/references/cloud-products/vpc.md +54 -0
iac_code/skills/bundled/iac_aliyun/references/ros-template.md +155 -0
iac_code/skills/bundled/iac_aliyun/references/template-parameters.md +206 -0
iac_code/skills/bundled/iac_aliyun/references/terraform-template.md +101 -0
iac_code/skills/bundled/iac_aliyun/scripts/tf2ros.py +77 -0
iac_code/skills/bundled/simplify.py +28 -0
iac_code/skills/discovery.py +136 -0
iac_code/skills/frontmatter.py +119 -0
iac_code/skills/listing.py +92 -0
iac_code/skills/loader.py +42 -0
iac_code/skills/processor.py +81 -0
iac_code/skills/renderer.py +157 -0
iac_code/skills/skill_definition.py +82 -0
iac_code/skills/skill_tool.py +261 -0
iac_code/state/__init__.py +5 -0
iac_code/state/app_state.py +122 -0
iac_code/tasks/__init__.py +1 -0
iac_code/tasks/notification_queue.py +28 -0
iac_code/tasks/task_state.py +66 -0
iac_code/tasks/task_tools.py +114 -0
iac_code/tools/__init__.py +8 -0
iac_code/tools/base.py +226 -0
iac_code/tools/bash.py +133 -0
iac_code/tools/cloud/__init__.py +0 -0
iac_code/tools/cloud/aliyun/__init__.py +0 -0
iac_code/tools/cloud/aliyun/aliyun_api.py +510 -0
iac_code/tools/cloud/aliyun/aliyun_doc_search.py +145 -0
iac_code/tools/cloud/aliyun/endpoints.yml +343 -0
iac_code/tools/cloud/aliyun/ros_client.py +56 -0
iac_code/tools/cloud/aliyun/ros_stack.py +633 -0
iac_code/tools/cloud/aliyun/ros_stack_instances.py +247 -0
iac_code/tools/cloud/base_api.py +162 -0
iac_code/tools/cloud/base_stack.py +242 -0
iac_code/tools/cloud/registry.py +20 -0
iac_code/tools/cloud/types.py +105 -0
iac_code/tools/edit_file.py +121 -0
iac_code/tools/glob.py +103 -0
iac_code/tools/grep.py +254 -0
iac_code/tools/list_files.py +104 -0
iac_code/tools/read_file.py +127 -0
iac_code/tools/result_storage.py +39 -0
iac_code/tools/tool_executor.py +165 -0
iac_code/tools/web_fetch.py +177 -0
iac_code/tools/write_file.py +88 -0
iac_code/types/__init__.py +40 -0
iac_code/types/permissions.py +26 -0
iac_code/types/skill_source.py +11 -0
iac_code/types/stream_events.py +227 -0
iac_code/ui/__init__.py +5 -0
iac_code/ui/banner.py +110 -0
iac_code/ui/components/__init__.py +0 -0
iac_code/ui/components/dialog.py +142 -0
iac_code/ui/components/divider.py +20 -0
iac_code/ui/components/fuzzy_picker.py +308 -0
iac_code/ui/components/progress_bar.py +54 -0
iac_code/ui/components/search_box.py +165 -0
iac_code/ui/components/select.py +319 -0
iac_code/ui/components/status_icon.py +42 -0
iac_code/ui/components/tabs.py +128 -0
iac_code/ui/core/__init__.py +0 -0
iac_code/ui/core/in_place_render.py +129 -0
iac_code/ui/core/input_history.py +118 -0
iac_code/ui/core/key_event.py +41 -0
iac_code/ui/core/prompt_input.py +507 -0
iac_code/ui/core/raw_input.py +302 -0
iac_code/ui/core/screen.py +80 -0
iac_code/ui/dialogs/__init__.py +0 -0
iac_code/ui/dialogs/global_search.py +178 -0
iac_code/ui/dialogs/history_search.py +100 -0
iac_code/ui/dialogs/model_picker.py +280 -0
iac_code/ui/dialogs/quick_open.py +108 -0
iac_code/ui/dialogs/resume_picker.py +749 -0
iac_code/ui/keybindings/__init__.py +0 -0
iac_code/ui/keybindings/manager.py +124 -0
iac_code/ui/renderer.py +1535 -0
iac_code/ui/repl.py +772 -0
iac_code/ui/spinner.py +112 -0
iac_code/ui/suggestions/__init__.py +0 -0
iac_code/ui/suggestions/aggregator.py +171 -0
iac_code/ui/suggestions/command_provider.py +43 -0
iac_code/ui/suggestions/directory_provider.py +95 -0
iac_code/ui/suggestions/file_provider.py +121 -0
iac_code/ui/suggestions/shell_history_provider.py +108 -0
iac_code/ui/suggestions/token_extractor.py +77 -0
iac_code/ui/suggestions/types.py +45 -0
iac_code/ui/transcript_view.py +199 -0
iac_code/utils/__init__.py +0 -0
iac_code/utils/background_housekeeping.py +53 -0
iac_code/utils/cleanup.py +68 -0
iac_code/utils/json_utils.py +60 -0
iac_code/utils/log.py +150 -0
iac_code/utils/project_paths.py +74 -0
iac_code/utils/tool_input_parser.py +62 -0
iac_code-0.1.0.dist-info/LICENSE +201 -0
iac_code-0.1.0.dist-info/METADATA +64 -0
iac_code-0.1.0.dist-info/RECORD +184 -0
iac_code-0.1.0.dist-info/WHEEL +5 -0
iac_code-0.1.0.dist-info/entry_points.txt +2 -0
iac_code-0.1.0.dist-info/top_level.txt +1 -0

iac_code/providers/manager.py ADDED Viewed

@@ -0,0 +1,399 @@
+"""Provider selection, streaming fallback with tombstone, and model degradation."""
+from __future__ import annotations
+import time
+from collections.abc import AsyncGenerator
+from loguru import logger
+from iac_code.config import _KEY_NAME_TO_CRED_SLOT as _KEY_TO_PROVIDER
+from iac_code.providers.base import Message, NonStreamingResponse, Provider, ToolDefinition
+from iac_code.providers.retry import RetryableError, RetryConfig, with_retry
+from iac_code.providers.stream_watchdog import StreamWatchdog
+from iac_code.services.telemetry import add_metric, get_session_id, log_event, start_span
+from iac_code.services.telemetry.config import should_capture_content_on_span
+from iac_code.services.telemetry.content_serializer import (
+    serialize_input_messages,
+    serialize_system_instructions,
+    serialize_tool_definitions,
+)
+from iac_code.services.telemetry.names import (
+    Events,
+    GenAiAttr,
+    GenAiOperationName,
+    GenAiSpanKind,
+    Metrics,
+    Spans,
+)
+from iac_code.services.telemetry.sanitize import sanitize_error_message, sanitize_model_name
+from iac_code.types.stream_events import (
+    ErrorEvent,
+    MessageEndEvent,
+    MessageStartEvent,
+    StreamEvent,
+    TextDeltaEvent,
+    ThinkingDeltaEvent,
+    TombstoneEvent,
+    ToolUseEndEvent,
+    ToolUseStartEvent,
+)
+MODEL_FALLBACK_MAP = {
+    "claude-opus-4-7": "claude-haiku-4-5-20251001",
+    "claude-opus-4-6": "claude-haiku-4-5-20251001",
+    "claude-sonnet-4-6": "claude-haiku-4-5-20251001",
+    "claude-sonnet-4-6-1m": "claude-haiku-4-5-20251001",
+    "gpt-5.5": "gpt-5.4",
+    "gpt-5.4": "gpt-5.4-mini",
+    "qwen3.6-plus": "qwen3.5-plus",
+    "deepseek-v4-pro": "deepseek-v4-flash",
+}
+def _detect_provider_name(model: str) -> str:
+    """Detect provider from saved settings.yml (set by /auth or /model).
+    The active provider is always determined by the saved config,
+    never by matching model names — different providers can share model names.
+    """
+    from iac_code.config import get_active_provider_key
+    key_name = get_active_provider_key() or ""
+    if key_name in _KEY_TO_PROVIDER:
+        return _KEY_TO_PROVIDER[key_name]
+    raise ValueError(f"Cannot determine provider for model: {model}. Run /auth to configure.")
+def create_provider(model: str, credentials: dict[str, str]) -> Provider:
+    provider_name = _detect_provider_name(model)
+    if provider_name == "anthropic":
+        from iac_code.config import get_provider_config
+        from iac_code.providers.anthropic_provider import AnthropicProvider
+        effort = get_provider_config("anthropic").get("effort")
+        return AnthropicProvider(
+            model=model,
+            api_key=credentials.get("anthropic"),
+            effort=effort if isinstance(effort, str) else None,
+        )
+    elif provider_name == "openai":
+        from iac_code.config import get_provider_config
+        from iac_code.providers.openai_provider import OpenAIProvider
+        effort = get_provider_config("openai").get("effort")
+        return OpenAIProvider(
+            model=model,
+            api_key=credentials.get("openai"),
+            effort=effort if isinstance(effort, str) else None,
+        )
+    elif provider_name == "dashscope":
+        from iac_code.config import get_provider_config
+        from iac_code.providers.dashscope_provider import DashScopeProvider
+        effort = get_provider_config("dashscope").get("effort")
+        return DashScopeProvider(
+            model=model,
+            api_key=credentials.get("dashscope"),
+            effort=effort if isinstance(effort, str) else None,
+        )
+    elif provider_name == "dashscope_token_plan":
+        from iac_code.config import get_provider_config
+        from iac_code.providers.dashscope_provider import (
+            DASHSCOPE_TOKEN_PLAN_BASE_URL,
+            DashScopeProvider,
+        )
+        effort = get_provider_config("dashscope_token_plan").get("effort")
+        return DashScopeProvider(
+            model=model,
+            api_key=credentials.get("dashscope_token_plan"),
+            effort=effort if isinstance(effort, str) else None,
+            base_url=DASHSCOPE_TOKEN_PLAN_BASE_URL,
+            provider_key="dashscope_token_plan",
+        )
+    elif provider_name == "deepseek":
+        from iac_code.config import get_provider_config
+        from iac_code.providers.deepseek_provider import DeepSeekProvider
+        effort = get_provider_config("deepseek").get("effort")
+        return DeepSeekProvider(
+            model=model,
+            api_key=credentials.get("deepseek"),
+            effort=effort if isinstance(effort, str) else None,
+        )
+    elif provider_name == "openapi_compatible":
+        from iac_code.config import get_provider_config
+        from iac_code.providers.openai_provider import OpenAIProvider
+        api_base = get_provider_config("openapi_compatible").get("apiBase")
+        return OpenAIProvider(model=model, api_key=credentials.get("openapi_compatible"), base_url=api_base)
+    raise ValueError(f"Unknown provider: {provider_name}")
+class ProviderManager:
+    """Manages provider lifecycle, streaming fallback, and model degradation.
+    When streaming fails mid-way:
+    1. Yield TombstoneEvents for orphaned partial messages
+    2. Fall back to non-streaming complete() call
+    3. Yield the complete response as events
+    """
+    def __init__(
+        self,
+        model: str,
+        credentials: dict[str, str],
+        retry_config: RetryConfig | None = None,
+        stream_idle_timeout: float = 90.0,
+    ):
+        self._model = model
+        self._credentials = credentials
+        self._retry_config = retry_config or RetryConfig()
+        self._stream_idle_timeout = stream_idle_timeout
+        # Lazy: first startup may have no active provider yet. Defer errors
+        # until the user actually tries to send a message, so /auth is reachable.
+        self._provider: Provider | None = None
+        try:
+            self._provider = create_provider(model, credentials)
+        except ValueError as e:
+            logger.warning(f"Provider not configured yet: {e}")
+    def _ensure_provider(self) -> Provider:
+        if self._provider is None:
+            self._provider = create_provider(self._model, self._credentials)
+        return self._provider
+    def reconfigure(self, model: str, credentials: dict[str, str]) -> None:
+        """Switch model and credentials in place.
+        Used by `/auth` and `/model` so every consumer holding this manager
+        (REPL, AgentTool, SkillTool) picks up the change without re-wiring.
+        The underlying provider is reset and lazily recreated on next use,
+        so reconfiguring while no provider is active stays cheap.
+        """
+        self._model = model
+        self._credentials = credentials
+        self._provider = None
+        try:
+            self._provider = create_provider(model, credentials)
+        except ValueError as e:
+            logger.warning(f"Provider not configured after reconfigure: {e}")
+    def get_model_name(self) -> str:
+        return self._model
+    def _get_fallback_model(self) -> str | None:
+        return MODEL_FALLBACK_MAP.get(self._model)
+    async def stream(
+        self, messages: list[Message], system: str, tools: list[ToolDefinition] | None = None, max_tokens: int = 8192
+    ) -> AsyncGenerator[StreamEvent, None]:
+        provider = self._ensure_provider()
+        provider_name = type(provider).__name__.replace("Provider", "").lower()
+        sanitized_model = sanitize_model_name(self._model)
+        log_event(
+            Events.API_REQUEST_STARTED,
+            {
+                "provider": provider_name,
+                "model": sanitized_model,
+                "message_count": len(messages),
+            },
+        )
+        started = time.monotonic()
+        span_name = f"{Spans.LLM_CHAT} {self._model}"
+        span_attrs = {
+            GenAiAttr.SPAN_KIND: GenAiSpanKind.LLM,
+            GenAiAttr.OPERATION_NAME: GenAiOperationName.CHAT,
+            GenAiAttr.PROVIDER_NAME: provider_name,
+            GenAiAttr.REQUEST_MODEL: self._model,
+            GenAiAttr.REQUEST_MAX_TOKENS: max_tokens,
+            GenAiAttr.CONVERSATION_ID: get_session_id(),
+            GenAiAttr.OUTPUT_TYPE: "text",
+        }
+        if should_capture_content_on_span():
+            span_attrs[GenAiAttr.INPUT_MESSAGES] = serialize_input_messages(messages)
+            span_attrs[GenAiAttr.SYSTEM_INSTRUCTIONS] = serialize_system_instructions(system)
+            if tools:
+                span_attrs[GenAiAttr.TOOL_DEFINITIONS] = serialize_tool_definitions(tools)
+        with start_span(span_name, span_attrs) as span:
+            orphaned_message_ids: list[str] = []
+            streaming_failed = False
+            first_token_received = False
+            try:
+                watchdog = StreamWatchdog(idle_timeout=self._stream_idle_timeout)
+                watchdog.start()
+                async for event in provider.stream(messages, system, tools, max_tokens):
+                    watchdog.ping()
+                    if isinstance(event, MessageStartEvent):
+                        orphaned_message_ids.append(event.message_id)
+                        span.set_attribute(GenAiAttr.RESPONSE_ID, event.message_id)
+                    elif isinstance(event, TextDeltaEvent) and not first_token_received:
+                        first_token_received = True
+                        ttft_ns = int((time.monotonic() - started) * 1_000_000_000)
+                        span.set_attribute(GenAiAttr.RESPONSE_TIME_TO_FIRST_TOKEN, ttft_ns)
+                    yield event
+                    if isinstance(event, MessageEndEvent):
+                        watchdog.stop()
+                        self._set_llm_response_span_attrs(span, event, self._model)
+                        self._emit_success_telemetry(provider_name, sanitized_model, started, event.usage)
+                        return
+            except Exception as e:
+                streaming_failed = True
+                logger.warning(f"Streaming failed, falling back to non-streaming: {e}")
+            if streaming_failed:
+                for msg_id in orphaned_message_ids:
+                    yield TombstoneEvent(message_id=msg_id)
+                try:
+                    response = await self._complete_with_retry(messages, system, tools, max_tokens)
+                except Exception as e:
+                    self._emit_failure_telemetry(provider_name, sanitized_model, started, e)
+                    yield ErrorEvent(error=str(e), is_retryable=False)
+                    return
+                span.set_attribute(GenAiAttr.RESPONSE_ID, response.message_id)
+                self._set_llm_response_span_attrs_from_response(span, response, self._model)
+                self._emit_success_telemetry(provider_name, sanitized_model, started, response.usage)
+                yield MessageStartEvent(message_id=response.message_id)
+                if response.thinking:
+                    yield ThinkingDeltaEvent(text=response.thinking)
+                if response.text:
+                    yield TextDeltaEvent(text=response.text)
+                for tu in response.tool_uses:
+                    yield ToolUseStartEvent(tool_use_id=tu["id"], name=tu["name"])
+                    yield ToolUseEndEvent(tool_use_id=tu["id"], input=tu["input"])
+                yield MessageEndEvent(stop_reason=response.stop_reason, usage=response.usage)
+    @staticmethod
+    def _set_llm_response_span_attrs(span, end_event: MessageEndEvent, model: str) -> None:
+        usage = end_event.usage
+        span.set_attribute(GenAiAttr.RESPONSE_MODEL, model)
+        span.set_attribute(GenAiAttr.RESPONSE_FINISH_REASONS, [end_event.stop_reason])
+        span.set_attribute(GenAiAttr.USAGE_INPUT_TOKENS, usage.input_tokens)
+        span.set_attribute(GenAiAttr.USAGE_OUTPUT_TOKENS, usage.output_tokens)
+        total = usage.input_tokens + usage.output_tokens
+        span.set_attribute(GenAiAttr.USAGE_TOTAL_TOKENS, total)
+        if usage.cache_creation_input_tokens:
+            span.set_attribute(GenAiAttr.USAGE_CACHE_CREATION_INPUT_TOKENS, usage.cache_creation_input_tokens)
+        if usage.cache_read_input_tokens:
+            span.set_attribute(GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS, usage.cache_read_input_tokens)
+    @staticmethod
+    def _set_llm_response_span_attrs_from_response(span, response: NonStreamingResponse, model: str) -> None:
+        usage = response.usage
+        span.set_attribute(GenAiAttr.RESPONSE_MODEL, model)
+        span.set_attribute(GenAiAttr.RESPONSE_FINISH_REASONS, [response.stop_reason])
+        span.set_attribute(GenAiAttr.USAGE_INPUT_TOKENS, usage.input_tokens)
+        span.set_attribute(GenAiAttr.USAGE_OUTPUT_TOKENS, usage.output_tokens)
+        total = usage.input_tokens + usage.output_tokens
+        span.set_attribute(GenAiAttr.USAGE_TOTAL_TOKENS, total)
+        if usage.cache_creation_input_tokens:
+            span.set_attribute(GenAiAttr.USAGE_CACHE_CREATION_INPUT_TOKENS, usage.cache_creation_input_tokens)
+        if usage.cache_read_input_tokens:
+            span.set_attribute(GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS, usage.cache_read_input_tokens)
+    @staticmethod
+    def _emit_success_telemetry(provider_name: str, model: str, started: float, usage) -> None:
+        duration_ms = int((time.monotonic() - started) * 1000)
+        log_event(
+            Events.API_REQUEST_SUCCEEDED,
+            {
+                "provider": provider_name,
+                "model": model,
+                "duration_ms": duration_ms,
+                "input_tokens": usage.input_tokens,
+                "output_tokens": usage.output_tokens,
+                "cache_read_tokens": usage.cache_read_input_tokens,
+                "cache_create_tokens": usage.cache_creation_input_tokens,
+            },
+        )
+        add_metric(Metrics.API_REQUEST_COUNT, 1, {"provider": provider_name, "model": model, "status": "ok"})
+        add_metric(Metrics.API_REQUEST_DURATION, duration_ms, {"provider": provider_name, "model": model})
+        for token_type, count in (
+            ("input", usage.input_tokens),
+            ("output", usage.output_tokens),
+            ("cache_read", usage.cache_read_input_tokens or 0),
+            ("cache_create", usage.cache_creation_input_tokens or 0),
+        ):
+            if count:
+                add_metric(Metrics.TOKEN_USAGE, count, {"type": token_type, "provider": provider_name, "model": model})
+    @staticmethod
+    def _emit_failure_telemetry(provider_name: str, model: str, started: float, exc: Exception) -> None:
+        duration_ms = int((time.monotonic() - started) * 1000)
+        log_event(
+            Events.API_REQUEST_FAILED,
+            {
+                "provider": provider_name,
+                "model": model,
+                "error_type": type(exc).__name__,
+                "duration_ms": duration_ms,
+                "error_message": sanitize_error_message(str(exc)),
+            },
+        )
+        add_metric(
+            Metrics.API_REQUEST_COUNT,
+            1,
+            {"provider": provider_name, "model": model, "status": "error", "error_type": type(exc).__name__},
+        )
+    async def complete(
+        self, messages: list[Message], system: str, tools: list[ToolDefinition] | None = None, max_tokens: int = 8192
+    ) -> NonStreamingResponse:
+        return await self._complete_with_retry(messages, system, tools, max_tokens, is_fallback=False)
+    async def _complete_with_retry(
+        self, messages, system, tools, max_tokens, is_fallback=False
+    ) -> NonStreamingResponse:
+        provider = self._ensure_provider()
+        provider_name = type(provider).__name__.replace("Provider", "").lower()
+        sanitized_model = sanitize_model_name(self._model)
+        async def _on_retry(attempt, exc, delay):
+            log_event(
+                Events.API_REQUEST_RETRIED,
+                {
+                    "provider": provider_name,
+                    "model": sanitized_model,
+                    "attempt": attempt,
+                    "error_type": type(exc).__name__,
+                },
+            )
+        async def operation():
+            try:
+                return await provider.complete(messages, system, tools, max_tokens)
+            except Exception as e:
+                status = getattr(e, "status_code", None) or getattr(e, "status", None)
+                if status and status in {408, 409, 429, 500, 502, 503, 529}:
+                    raise RetryableError(str(e), status_code=status) from e
+                if isinstance(e, (ConnectionError, TimeoutError, OSError)):
+                    raise RetryableError(str(e)) from e
+                raise
+        try:
+            return await with_retry(operation, self._retry_config, on_retry=_on_retry)
+        except Exception as original_exc:
+            if not is_fallback:
+                fallback = self._get_fallback_model()
+                if fallback is not None:
+                    original_model = self._model
+                    original_provider = self._provider
+                    log_event(
+                        Events.MODEL_FALLBACK_TRIGGERED,
+                        {
+                            "from_model": sanitized_model,
+                            "to_model": sanitize_model_name(fallback),
+                            "reason": "model_degradation",
+                        },
+                    )
+                    self._model = fallback
+                    self._provider = create_provider(fallback, self._credentials)
+                    try:
+                        return await self._complete_with_retry(messages, system, tools, max_tokens, is_fallback=True)
+                    except Exception:
+                        self._model = original_model
+                        self._provider = original_provider
+                        raise original_exc from None
+            raise