navaia-code 1.0.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. navaia/__init__.py +3 -0
  2. navaia/api/__init__.py +0 -0
  3. navaia/api/client.py +72 -0
  4. navaia/api/normalise.py +148 -0
  5. navaia/api/retry.py +114 -0
  6. navaia/api/streaming.py +341 -0
  7. navaia/api/types.py +213 -0
  8. navaia/commands/__init__.py +0 -0
  9. navaia/commands/builtin/__init__.py +0 -0
  10. navaia/commands/builtin/commands.py +206 -0
  11. navaia/commands/dispatcher.py +38 -0
  12. navaia/commands/parser.py +25 -0
  13. navaia/commands/registry.py +48 -0
  14. navaia/commands/skills.py +150 -0
  15. navaia/commands/types.py +26 -0
  16. navaia/compact/__init__.py +0 -0
  17. navaia/compact/compact.py +241 -0
  18. navaia/compact/prompt.py +22 -0
  19. navaia/compact/restore.py +91 -0
  20. navaia/config/__init__.py +0 -0
  21. navaia/config/env.py +53 -0
  22. navaia/config/global_config.py +43 -0
  23. navaia/config/project_config.py +53 -0
  24. navaia/config/providers.py +234 -0
  25. navaia/config/settings.py +113 -0
  26. navaia/context/__init__.py +0 -0
  27. navaia/context/cache.py +29 -0
  28. navaia/context/claudemd.py +252 -0
  29. navaia/context/system_prompt.py +172 -0
  30. navaia/effort/__init__.py +0 -0
  31. navaia/effort/effort.py +47 -0
  32. navaia/hooks/__init__.py +0 -0
  33. navaia/hooks/executor.py +153 -0
  34. navaia/hooks/settings.py +82 -0
  35. navaia/hooks/types.py +53 -0
  36. navaia/main.py +462 -0
  37. navaia/mcp/__init__.py +0 -0
  38. navaia/mcp/bootstrap.py +88 -0
  39. navaia/mcp/client.py +157 -0
  40. navaia/mcp/settings.py +80 -0
  41. navaia/mcp/tools.py +118 -0
  42. navaia/mcp/types.py +29 -0
  43. navaia/memory/__init__.py +0 -0
  44. navaia/memory/memdir.py +70 -0
  45. navaia/memory/paths.py +17 -0
  46. navaia/memory/scanner.py +85 -0
  47. navaia/memory/types.py +27 -0
  48. navaia/permissions/__init__.py +0 -0
  49. navaia/permissions/checker.py +147 -0
  50. navaia/permissions/rules.py +88 -0
  51. navaia/permissions/types.py +39 -0
  52. navaia/query/__init__.py +0 -0
  53. navaia/query/engine.py +477 -0
  54. navaia/query/types.py +43 -0
  55. navaia/session/__init__.py +0 -0
  56. navaia/session/history.py +64 -0
  57. navaia/session/serialise.py +184 -0
  58. navaia/session/state.py +20 -0
  59. navaia/session/storage.py +102 -0
  60. navaia/session/store.py +202 -0
  61. navaia/state/__init__.py +0 -0
  62. navaia/tasks/__init__.py +0 -0
  63. navaia/tasks/cron.py +113 -0
  64. navaia/tasks/manager.py +112 -0
  65. navaia/tasks/persistence.py +128 -0
  66. navaia/tasks/task.py +34 -0
  67. navaia/thinking/__init__.py +0 -0
  68. navaia/thinking/budget.py +42 -0
  69. navaia/thinking/config.py +55 -0
  70. navaia/tools/__init__.py +0 -0
  71. navaia/tools/agent_tool/__init__.py +0 -0
  72. navaia/tools/agent_tool/tool.py +148 -0
  73. navaia/tools/ask_user/__init__.py +0 -0
  74. navaia/tools/ask_user/bus.py +51 -0
  75. navaia/tools/ask_user/tool.py +64 -0
  76. navaia/tools/base.py +51 -0
  77. navaia/tools/bash/__init__.py +0 -0
  78. navaia/tools/bash/background.py +123 -0
  79. navaia/tools/bash/tool.py +234 -0
  80. navaia/tools/executor.py +111 -0
  81. navaia/tools/file_edit/__init__.py +0 -0
  82. navaia/tools/file_edit/tool.py +206 -0
  83. navaia/tools/file_read/__init__.py +0 -0
  84. navaia/tools/file_read/tool.py +209 -0
  85. navaia/tools/file_write/__init__.py +0 -0
  86. navaia/tools/file_write/tool.py +112 -0
  87. navaia/tools/glob_tool/__init__.py +0 -0
  88. navaia/tools/glob_tool/tool.py +97 -0
  89. navaia/tools/grep_tool/__init__.py +0 -0
  90. navaia/tools/grep_tool/tool.py +292 -0
  91. navaia/tools/monitor/__init__.py +0 -0
  92. navaia/tools/monitor/tool.py +101 -0
  93. navaia/tools/plan_mode/__init__.py +0 -0
  94. navaia/tools/plan_mode/enter.py +38 -0
  95. navaia/tools/plan_mode/exit.py +36 -0
  96. navaia/tools/registry.py +71 -0
  97. navaia/tools/result_storage.py +60 -0
  98. navaia/tools/skill_tool/__init__.py +0 -0
  99. navaia/tools/skill_tool/loader.py +147 -0
  100. navaia/tools/skill_tool/tool.py +88 -0
  101. navaia/tools/task_tools/__init__.py +0 -0
  102. navaia/tools/task_tools/create.py +60 -0
  103. navaia/tools/task_tools/get.py +52 -0
  104. navaia/tools/task_tools/list.py +39 -0
  105. navaia/tools/task_tools/manager.py +66 -0
  106. navaia/tools/task_tools/update.py +88 -0
  107. navaia/tools/todo_write/__init__.py +0 -0
  108. navaia/tools/todo_write/tool.py +121 -0
  109. navaia/tools/tool_search/__init__.py +0 -0
  110. navaia/tools/tool_search/tool.py +106 -0
  111. navaia/tools/web_fetch/__init__.py +0 -0
  112. navaia/tools/web_fetch/tool.py +88 -0
  113. navaia/tools/worktree/__init__.py +0 -0
  114. navaia/tools/worktree/enter.py +66 -0
  115. navaia/tools/worktree/exit.py +51 -0
  116. navaia/tools/worktree/manager.py +130 -0
  117. navaia/ui/__init__.py +0 -0
  118. navaia/ui/app.py +605 -0
  119. navaia/ui/bidi.py +70 -0
  120. navaia/ui/input/__init__.py +0 -0
  121. navaia/ui/input/history.py +84 -0
  122. navaia/ui/input/suggestions.py +72 -0
  123. navaia/ui/messages/__init__.py +0 -0
  124. navaia/ui/messages/assistant_text.py +46 -0
  125. navaia/ui/messages/bash_output.py +68 -0
  126. navaia/ui/messages/system_msg.py +25 -0
  127. navaia/ui/messages/tool_result.py +38 -0
  128. navaia/ui/messages/tool_use.py +70 -0
  129. navaia/ui/messages/user_prompt.py +27 -0
  130. navaia/ui/screens/__init__.py +0 -0
  131. navaia/ui/screens/repl.py +136 -0
  132. navaia/ui/styles/app.tcss +48 -0
  133. navaia/ui/widgets/__init__.py +0 -0
  134. navaia/ui/widgets/logo.py +48 -0
  135. navaia/ui/widgets/markdown_view.py +87 -0
  136. navaia/ui/widgets/message_list.py +387 -0
  137. navaia/ui/widgets/permission_prompt.py +137 -0
  138. navaia/ui/widgets/prompt_footer.py +67 -0
  139. navaia/ui/widgets/prompt_input.py +203 -0
  140. navaia/ui/widgets/question_prompt.py +58 -0
  141. navaia/ui/widgets/spinner.py +110 -0
  142. navaia/ui/widgets/thinking_view.py +124 -0
  143. navaia_code-1.0.50.dist-info/METADATA +17 -0
  144. navaia_code-1.0.50.dist-info/RECORD +146 -0
  145. navaia_code-1.0.50.dist-info/WHEEL +4 -0
  146. navaia_code-1.0.50.dist-info/entry_points.txt +2 -0
navaia/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Navaia Code — AI coding agent, built in Python."""
2
+
3
+ __version__ = "1.0.50"
navaia/api/__init__.py ADDED
File without changes
navaia/api/client.py ADDED
@@ -0,0 +1,72 @@
1
+ """Provider-agnostic API client factory using the OpenAI SDK.
2
+
3
+ Supports: OpenRouter, internal vLLM (ngx.tawakkalna.nic.gov.sa), Ollama,
4
+ Groq, or any OpenAI-compatible endpoint — controlled via provider profiles
5
+ or environment variables.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+
12
+ import httpx
13
+ from openai import AsyncOpenAI
14
+
15
+ from navaia.config.providers import (
16
+ ProviderProfile,
17
+ resolve_api_key,
18
+ resolve_base_url,
19
+ resolve_model,
20
+ resolve_provider,
21
+ )
22
+
23
+
24
+ def create_client(provider_name: str = "") -> AsyncOpenAI:
25
+ """Create an AsyncOpenAI client from a provider profile.
26
+
27
+ Args:
28
+ provider_name: Provider to use (e.g. "openrouter", "internal",
29
+ "ollama"). Empty string resolects via NAVAIA_PROVIDER env
30
+ or config files.
31
+ """
32
+ profile = resolve_provider(provider_name)
33
+ return _build_client(profile)
34
+
35
+
36
+ def get_model(provider_name: str = "") -> str:
37
+ """Return the model identifier, resolved from env or provider profile."""
38
+ profile = resolve_provider(provider_name)
39
+ return resolve_model(profile)
40
+
41
+
42
+ def get_provider_profile(provider_name: str = "") -> ProviderProfile:
43
+ """Return the resolved provider profile (for retry/streaming config)."""
44
+ return resolve_provider(provider_name)
45
+
46
+
47
+ def _build_client(profile: ProviderProfile) -> AsyncOpenAI:
48
+ """Build the AsyncOpenAI client from a resolved profile."""
49
+ api_key = resolve_api_key(profile)
50
+ base_url = resolve_base_url(profile)
51
+
52
+ # Normalise base URL to end with /v1 — but only for profile defaults,
53
+ # not for explicit user overrides (which may have custom paths)
54
+ is_user_override = bool(
55
+ os.environ.get("OPENAI_BASE_URL") or os.environ.get("ANTHROPIC_BASE_URL")
56
+ )
57
+ if base_url and not is_user_override and not base_url.rstrip("/").endswith("/v1"):
58
+ base_url = base_url.rstrip("/") + "/v1"
59
+
60
+ kwargs: dict = dict(
61
+ api_key=api_key,
62
+ base_url=base_url,
63
+ max_retries=0, # we handle retries ourselves in retry.py
64
+ timeout=profile.timeout,
65
+ )
66
+
67
+ # Only create a custom httpx client when SSL verification is disabled
68
+ # (internal/local endpoints). AsyncOpenAI manages its own client otherwise.
69
+ if not profile.verify_ssl:
70
+ kwargs["http_client"] = httpx.AsyncClient(verify=False)
71
+
72
+ return AsyncOpenAI(**kwargs)
@@ -0,0 +1,148 @@
1
+ """Translate internal message types to OpenAI API format.
2
+
3
+ Mirrors normalizeMessagesForAPI() from Claude Code:
4
+ - Strips progress, non-local system messages, synthetic API errors
5
+ - Merges consecutive user messages (some providers require this)
6
+ - Converts internal content blocks to OpenAI format
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from typing import Any
13
+
14
+ from navaia.api.types import (
15
+ AssistantMessage,
16
+ CompactBoundaryMessage,
17
+ Message,
18
+ SystemMessage,
19
+ TextBlock,
20
+ ToolResultBlock,
21
+ ToolUseBlock,
22
+ UserMessage,
23
+ )
24
+
25
+
26
+ def _convert_message(msg: Message) -> dict[str, Any] | list[dict[str, Any]] | None:
27
+ """Convert a single internal message to OpenAI format, or None to skip."""
28
+ if isinstance(msg, SystemMessage):
29
+ # Only include local_command system messages
30
+ if msg.subtype == "local_command" and not msg.is_api_error:
31
+ return {"role": "user", "content": f"[System: {msg.text}]"}
32
+ return None
33
+
34
+ if isinstance(msg, CompactBoundaryMessage):
35
+ return {
36
+ "role": "user",
37
+ "content": (
38
+ "[Context compacted. Previous conversation summarised below.]\n\n"
39
+ + msg.summary
40
+ ),
41
+ }
42
+
43
+ if isinstance(msg, UserMessage):
44
+ return _convert_user_message(msg)
45
+
46
+ if isinstance(msg, AssistantMessage):
47
+ return _convert_assistant_message(msg)
48
+
49
+ return None
50
+
51
+
52
+ def _convert_user_message(msg: UserMessage) -> dict[str, Any] | list[dict[str, Any]]:
53
+ """Convert UserMessage to OpenAI format."""
54
+ # Simple string content
55
+ if isinstance(msg.content, str):
56
+ return {"role": "user", "content": msg.content}
57
+
58
+ # Content block list
59
+ parts: list[str] = []
60
+ tool_results: list[dict[str, Any]] = []
61
+
62
+ for block in msg.content:
63
+ if isinstance(block, TextBlock):
64
+ parts.append(block.text)
65
+ elif isinstance(block, ToolResultBlock):
66
+ content_str = (
67
+ block.content
68
+ if isinstance(block.content, str)
69
+ else json.dumps(block.content)
70
+ )
71
+ tool_results.append({
72
+ "role": "tool",
73
+ "tool_call_id": block.tool_use_id,
74
+ "content": content_str,
75
+ })
76
+
77
+ # If there are tool results, return them as separate messages
78
+ if tool_results:
79
+ return tool_results # type: ignore[return-value] # handled in normalise
80
+
81
+ text = "\n".join(parts).strip()
82
+ return {"role": "user", "content": text} if text else {"role": "user", "content": ""}
83
+
84
+
85
+ def _convert_assistant_message(msg: AssistantMessage) -> dict[str, Any]:
86
+ """Convert AssistantMessage to OpenAI format."""
87
+ text_parts: list[str] = []
88
+ tool_calls: list[dict[str, Any]] = []
89
+
90
+ for block in msg.content:
91
+ if isinstance(block, TextBlock):
92
+ text_parts.append(block.text)
93
+ elif isinstance(block, ToolUseBlock):
94
+ tool_calls.append({
95
+ "id": block.id,
96
+ "type": "function",
97
+ "function": {
98
+ "name": block.name,
99
+ "arguments": json.dumps(block.input),
100
+ },
101
+ })
102
+
103
+ result: dict[str, Any] = {"role": "assistant"}
104
+ content = "\n".join(text_parts).strip() or None
105
+ result["content"] = content
106
+
107
+ if tool_calls:
108
+ result["tool_calls"] = tool_calls
109
+
110
+ return result
111
+
112
+
113
+ def normalise_messages_for_api(messages: list[Message]) -> list[dict[str, Any]]:
114
+ """Convert internal messages to flat OpenAI chat-completions message list."""
115
+ result: list[dict[str, Any]] = []
116
+
117
+ for msg in messages:
118
+ converted = _convert_message(msg)
119
+ if converted is None:
120
+ continue
121
+ # _convert_user_message may return a list of tool-result dicts
122
+ if isinstance(converted, list):
123
+ result.extend(converted)
124
+ else:
125
+ result.append(converted)
126
+
127
+ return _merge_consecutive_user_messages(result)
128
+
129
+
130
+ def _merge_consecutive_user_messages(
131
+ messages: list[dict[str, Any]],
132
+ ) -> list[dict[str, Any]]:
133
+ """Merge consecutive messages with the same role (required by some providers)."""
134
+ if not messages:
135
+ return messages
136
+
137
+ merged: list[dict[str, Any]] = [messages[0]]
138
+
139
+ for msg in messages[1:]:
140
+ prev = merged[-1]
141
+ if msg.get("role") == "user" and prev.get("role") == "user":
142
+ prev_content = prev.get("content", "")
143
+ new_content = msg.get("content", "")
144
+ prev["content"] = f"{prev_content}\n\n{new_content}".strip()
145
+ else:
146
+ merged.append(msg)
147
+
148
+ return merged
navaia/api/retry.py ADDED
@@ -0,0 +1,114 @@
1
+ """Retry logic for API calls — mirrors withRetry.ts exactly.
2
+
3
+ Retry conditions:
4
+ - 429 (rate limit): retry with exponential backoff
5
+ - 500-503 (server error): retry
6
+ - 529 (overloaded): max 3 consecutive retries
7
+ - Connection errors (ECONNRESET, timeout): retry
8
+ - 401/403/404: do NOT retry (auth/not found)
9
+
10
+ Numbers from Claude Code source:
11
+ - MAX_RETRIES = 10
12
+ - MAX_529_CONSECUTIVE = 3
13
+ - BASE_DELAY_MS = 500
14
+ - MAX_BACKOFF_MS = 5 minutes
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import logging
21
+ from collections.abc import Awaitable, Callable
22
+ from typing import TypeVar
23
+
24
+ from openai import (
25
+ APIConnectionError,
26
+ APIStatusError,
27
+ APITimeoutError,
28
+ RateLimitError,
29
+ )
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ T = TypeVar("T")
34
+
35
+ MAX_RETRIES = 10
36
+ MAX_529_CONSECUTIVE = 3
37
+ BASE_DELAY_MS = 500
38
+ MAX_BACKOFF_MS = 5 * 60 * 1000 # 5 minutes
39
+
40
+ RETRIABLE_STATUS_CODES = {429, 500, 502, 503, 529}
41
+ NON_RETRIABLE_STATUS_CODES = {401, 403, 404}
42
+
43
+
44
+ def _is_retriable(error: Exception) -> bool:
45
+ """Check if an error is retriable."""
46
+ if isinstance(error, (APIConnectionError, APITimeoutError)):
47
+ return True
48
+ if isinstance(error, RateLimitError):
49
+ return True
50
+ if isinstance(error, APIStatusError):
51
+ return error.status_code in RETRIABLE_STATUS_CODES
52
+ return False
53
+
54
+
55
+ def _get_status_code(error: Exception) -> int | None:
56
+ if isinstance(error, APIStatusError):
57
+ return error.status_code
58
+ return None
59
+
60
+
61
+ async def with_retry(
62
+ fn: Callable[[], Awaitable[T]],
63
+ max_retries: int = MAX_RETRIES,
64
+ on_retry: Callable[[int, Exception, float], None] | None = None,
65
+ ) -> T:
66
+ """Execute an async function with exponential backoff retry.
67
+
68
+ Args:
69
+ fn: Async function to execute
70
+ max_retries: Maximum number of retry attempts
71
+ on_retry: Optional callback(attempt, error, delay_seconds) for UI updates
72
+ """
73
+ consecutive_529 = 0
74
+
75
+ for attempt in range(max_retries + 1):
76
+ try:
77
+ result = await fn()
78
+ return result
79
+ except Exception as e:
80
+ if not _is_retriable(e):
81
+ raise
82
+
83
+ status = _get_status_code(e)
84
+
85
+ # Track 529 consecutive count
86
+ if status == 529:
87
+ consecutive_529 += 1
88
+ if consecutive_529 > MAX_529_CONSECUTIVE:
89
+ raise
90
+ else:
91
+ consecutive_529 = 0
92
+
93
+ if attempt >= max_retries:
94
+ raise
95
+
96
+ # Exponential backoff: 500ms, 1s, 2s, 4s, ... capped at 5min
97
+ delay_ms = min(
98
+ BASE_DELAY_MS * (2 ** attempt),
99
+ MAX_BACKOFF_MS,
100
+ )
101
+ delay_s = delay_ms / 1000.0
102
+
103
+ logger.info(
104
+ "Retry %d/%d after %s (%.1fs delay)",
105
+ attempt + 1, max_retries, type(e).__name__, delay_s,
106
+ )
107
+
108
+ if on_retry:
109
+ on_retry(attempt + 1, e, delay_s)
110
+
111
+ await asyncio.sleep(delay_s)
112
+
113
+ # Should not reach here, but type safety
114
+ raise RuntimeError("Exhausted retries")
@@ -0,0 +1,341 @@
1
+ """Streaming API calls — message assembly and delta-by-delta yielding.
2
+
3
+ Mirrors src/services/api/claude.ts: stream_message() is the core function
4
+ that yields StreamEvents as they arrive from the provider.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ from collections.abc import AsyncIterator
12
+ from typing import Any
13
+
14
+ from openai import (
15
+ APIConnectionError,
16
+ APIStatusError,
17
+ APITimeoutError,
18
+ AsyncOpenAI,
19
+ RateLimitError,
20
+ )
21
+
22
+ from navaia.api.normalise import normalise_messages_for_api
23
+ from navaia.api.retry import (
24
+ MAX_529_CONSECUTIVE,
25
+ NON_RETRIABLE_STATUS_CODES,
26
+ RETRIABLE_STATUS_CODES,
27
+ )
28
+ from navaia.api.types import (
29
+ FinishEvent,
30
+ Message,
31
+ RetryEvent,
32
+ StreamEvent,
33
+ TextDeltaEvent,
34
+ ThinkingDeltaEvent,
35
+ ToolCallDeltaEvent,
36
+ ToolCallAccumulator,
37
+ ToolUseBlock,
38
+ Usage,
39
+ UsageEvent,
40
+ )
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ STREAM_MAX_RETRIES = 5
45
+ STREAM_BASE_DELAY_S = 1.0
46
+
47
+
48
+ def _thinking_extra_body(model: str, thinking_config) -> dict:
49
+ """Return provider-specific extra_body for extended thinking.
50
+
51
+ Different providers expose the thinking/reasoning budget under
52
+ different parameter names — we detect based on the model id.
53
+
54
+ - Anthropic models (claude-*): ``thinking: {type, budget_tokens}``
55
+ - OpenAI reasoning models (o1, o3, o4-mini, gpt-5): ``reasoning_effort``
56
+ - OpenRouter passthrough: ``reasoning: {max_tokens}``
57
+ - Others: inline ``<think>`` handling in the stream parser (no extra body needed)
58
+ """
59
+ if thinking_config is None:
60
+ return {}
61
+
62
+ # Import locally to avoid circular imports and optional dependency cost
63
+ from navaia.thinking.config import ThinkingMode
64
+
65
+ if thinking_config.mode == ThinkingMode.DISABLED:
66
+ return {}
67
+
68
+ budget = max(1024, int(thinking_config.budget_tokens))
69
+ model_lc = model.lower()
70
+
71
+ if "claude" in model_lc and ("opus" in model_lc or "sonnet" in model_lc or "haiku" in model_lc):
72
+ return {"thinking": {"type": "enabled", "budget_tokens": budget}}
73
+
74
+ if model_lc.startswith(("o1", "o3", "o4", "gpt-5")):
75
+ # Translate budget to an effort bucket the reasoning API understands
76
+ if budget >= 20000:
77
+ effort = "high"
78
+ elif budget >= 5000:
79
+ effort = "medium"
80
+ else:
81
+ effort = "low"
82
+ return {"reasoning_effort": effort}
83
+
84
+ if "/" in model_lc: # OpenRouter-style slug (e.g. "anthropic/claude-opus-4")
85
+ return {"reasoning": {"max_tokens": budget}}
86
+
87
+ # deepseek-r1, qwq, and similar already stream <think> tags inline
88
+ return {}
89
+
90
+
91
+ def _tools_to_openai_schema(tools: list) -> list[dict[str, Any]] | None:
92
+ """Convert internal tool definitions to OpenAI function-calling schema."""
93
+ if not tools:
94
+ return None
95
+ schemas = []
96
+ for tool in tools:
97
+ schema = tool.to_openai_schema()
98
+ if schema:
99
+ schemas.append(schema)
100
+ return schemas or None
101
+
102
+
103
+ def _is_retriable_error(error: Exception) -> bool:
104
+ """Check whether an API error should trigger a retry."""
105
+ if isinstance(error, (APIConnectionError, APITimeoutError, RateLimitError)):
106
+ return True
107
+ if isinstance(error, APIStatusError):
108
+ return error.status_code in RETRIABLE_STATUS_CODES
109
+ return False
110
+
111
+
112
+ def _is_non_retriable_error(error: Exception) -> bool:
113
+ """Check whether an API error should be raised immediately."""
114
+ if isinstance(error, APIStatusError):
115
+ return error.status_code in NON_RETRIABLE_STATUS_CODES
116
+ return False
117
+
118
+
119
+ def _error_status(error: Exception) -> int | None:
120
+ """Extract HTTP status code from an API error, if present."""
121
+ if isinstance(error, APIStatusError):
122
+ return error.status_code
123
+ return None
124
+
125
+
126
+ async def stream_message(
127
+ client: AsyncOpenAI,
128
+ model: str,
129
+ messages: list[Message],
130
+ system_prompt: str,
131
+ tools: list | None = None,
132
+ temperature: float = 0,
133
+ max_tokens: int = 16384,
134
+ thinking_config: Any = None,
135
+ ) -> AsyncIterator[StreamEvent]:
136
+ """Stream a chat completion, yielding events as they arrive.
137
+
138
+ Handles:
139
+ - Text deltas (character by character)
140
+ - Tool call deltas (incremental id/name/arguments)
141
+ - <think> tag extraction for models that support thinking (deepseek-r1, etc.)
142
+ - Finish events with stop reason
143
+ - Retry with exponential backoff on retriable errors (429, 500, 502, 503, 529)
144
+ - Mid-stream disconnection recovery (flushes accumulated text)
145
+ """
146
+ api_messages = normalise_messages_for_api(messages)
147
+
148
+ # Prepend system prompt
149
+ api_messages = [{"role": "system", "content": system_prompt}] + api_messages
150
+
151
+ api_tools = _tools_to_openai_schema(tools or [])
152
+
153
+ kwargs: dict[str, Any] = {
154
+ "model": model,
155
+ "messages": api_messages,
156
+ "stream": True,
157
+ "temperature": temperature,
158
+ "max_tokens": max_tokens,
159
+ }
160
+ if api_tools:
161
+ kwargs["tools"] = api_tools
162
+
163
+ # Extended thinking / reasoning — only sent when the model supports it
164
+ extra = _thinking_extra_body(model, thinking_config)
165
+ if extra:
166
+ kwargs["extra_body"] = extra
167
+
168
+ # --- Retry loop around stream creation ---
169
+ stream = None
170
+ consecutive_529 = 0
171
+
172
+ for attempt in range(STREAM_MAX_RETRIES + 1):
173
+ try:
174
+ stream = await client.chat.completions.create(**kwargs)
175
+ break
176
+ except Exception as e:
177
+ # Non-retriable errors (401, 403, 404) — raise immediately
178
+ if _is_non_retriable_error(e):
179
+ raise
180
+
181
+ if not _is_retriable_error(e):
182
+ raise
183
+
184
+ # Track consecutive 529s
185
+ status = _error_status(e)
186
+ if status == 529:
187
+ consecutive_529 += 1
188
+ if consecutive_529 > MAX_529_CONSECUTIVE:
189
+ raise
190
+ else:
191
+ consecutive_529 = 0
192
+
193
+ if attempt >= STREAM_MAX_RETRIES:
194
+ raise
195
+
196
+ delay_s = STREAM_BASE_DELAY_S * (2 ** attempt)
197
+
198
+ logger.warning(
199
+ "Stream creation failed (attempt %d/%d, %s). "
200
+ "Retrying in %.1fs...",
201
+ attempt + 1,
202
+ STREAM_MAX_RETRIES,
203
+ type(e).__name__,
204
+ delay_s,
205
+ )
206
+
207
+ yield RetryEvent(
208
+ attempt=attempt + 1,
209
+ max_retries=STREAM_MAX_RETRIES,
210
+ delay_seconds=delay_s,
211
+ error_message=str(e),
212
+ )
213
+
214
+ await asyncio.sleep(delay_s)
215
+
216
+ if stream is None:
217
+ raise RuntimeError("Failed to create stream after retries")
218
+
219
+ # --- Consume the stream, handling mid-stream disconnections ---
220
+ usage = Usage()
221
+ in_thinking = False
222
+ thinking_buffer = ""
223
+
224
+ try:
225
+ async for chunk in stream:
226
+ # Extract usage if present
227
+ if hasattr(chunk, "usage") and chunk.usage:
228
+ usage = Usage(
229
+ input_tokens=getattr(chunk.usage, "prompt_tokens", 0) or 0,
230
+ output_tokens=getattr(chunk.usage, "completion_tokens", 0) or 0,
231
+ total_tokens=getattr(chunk.usage, "total_tokens", 0) or 0,
232
+ )
233
+
234
+ if not chunk.choices:
235
+ continue
236
+
237
+ choice = chunk.choices[0]
238
+ delta = choice.delta
239
+
240
+ # --- Text deltas ---
241
+ if delta and delta.content:
242
+ text = delta.content
243
+
244
+ # Handle <think> tags for models that output thinking inline
245
+ # (deepseek-r1, QwQ, etc.)
246
+ if "<think>" in text:
247
+ in_thinking = True
248
+ # Split: part before <think> is text, rest is thinking
249
+ before, _, after = text.partition("<think>")
250
+ if before:
251
+ yield TextDeltaEvent(text=before)
252
+ thinking_buffer += after
253
+ continue
254
+
255
+ if in_thinking:
256
+ if "</think>" in text:
257
+ before, _, after = text.partition("</think>")
258
+ thinking_buffer += before
259
+ yield ThinkingDeltaEvent(text=thinking_buffer)
260
+ thinking_buffer = ""
261
+ in_thinking = False
262
+ if after:
263
+ yield TextDeltaEvent(text=after)
264
+ else:
265
+ thinking_buffer += text
266
+ continue
267
+
268
+ yield TextDeltaEvent(text=text)
269
+
270
+ # --- Tool call deltas ---
271
+ if delta and delta.tool_calls:
272
+ for tc_delta in delta.tool_calls:
273
+ yield ToolCallDeltaEvent(
274
+ index=tc_delta.index,
275
+ id=tc_delta.id or None,
276
+ name=getattr(tc_delta.function, "name", None) if tc_delta.function else None,
277
+ arguments_delta=(
278
+ tc_delta.function.arguments
279
+ if tc_delta.function and tc_delta.function.arguments
280
+ else ""
281
+ ),
282
+ )
283
+
284
+ # --- Finish ---
285
+ if choice.finish_reason:
286
+ # Flush any remaining thinking buffer
287
+ if thinking_buffer:
288
+ yield ThinkingDeltaEvent(text=thinking_buffer)
289
+ thinking_buffer = ""
290
+
291
+ yield FinishEvent(reason=choice.finish_reason)
292
+
293
+ if usage.input_tokens or usage.output_tokens:
294
+ yield UsageEvent(usage=usage)
295
+
296
+ except (APIConnectionError, APITimeoutError) as e:
297
+ # Mid-stream disconnection: flush buffered content so the UI
298
+ # keeps whatever text was already received
299
+ logger.warning(
300
+ "Stream disconnected mid-response (%s). "
301
+ "Flushing accumulated text.",
302
+ type(e).__name__,
303
+ )
304
+ if thinking_buffer:
305
+ yield ThinkingDeltaEvent(text=thinking_buffer)
306
+
307
+ yield FinishEvent(reason="error")
308
+
309
+
310
+ def accumulate_tool_calls(
311
+ accumulators: dict[int, ToolCallAccumulator],
312
+ event: ToolCallDeltaEvent,
313
+ ) -> dict[int, ToolCallAccumulator]:
314
+ """Accumulate incremental tool call deltas into complete ToolUseBlocks.
315
+
316
+ Tool calls arrive in pieces during streaming:
317
+ - First delta has `id` and `name`
318
+ - Subsequent deltas append to `arguments`
319
+ """
320
+ idx = event.index
321
+ if idx not in accumulators:
322
+ accumulators[idx] = ToolCallAccumulator()
323
+
324
+ acc = accumulators[idx]
325
+ if event.id:
326
+ acc.id = event.id
327
+ if event.name:
328
+ acc.name = event.name
329
+ acc.args_str += event.arguments_delta
330
+
331
+ return accumulators
332
+
333
+
334
+ def finalise_tool_calls(
335
+ accumulators: dict[int, ToolCallAccumulator],
336
+ ) -> list[ToolUseBlock]:
337
+ """Convert accumulated tool call fragments to final ToolUseBlocks."""
338
+ return [
339
+ acc.to_tool_use_block()
340
+ for _, acc in sorted(accumulators.items())
341
+ ]