navaia-code 1.0.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- navaia/__init__.py +3 -0
- navaia/api/__init__.py +0 -0
- navaia/api/client.py +72 -0
- navaia/api/normalise.py +148 -0
- navaia/api/retry.py +114 -0
- navaia/api/streaming.py +341 -0
- navaia/api/types.py +213 -0
- navaia/commands/__init__.py +0 -0
- navaia/commands/builtin/__init__.py +0 -0
- navaia/commands/builtin/commands.py +206 -0
- navaia/commands/dispatcher.py +38 -0
- navaia/commands/parser.py +25 -0
- navaia/commands/registry.py +48 -0
- navaia/commands/skills.py +150 -0
- navaia/commands/types.py +26 -0
- navaia/compact/__init__.py +0 -0
- navaia/compact/compact.py +241 -0
- navaia/compact/prompt.py +22 -0
- navaia/compact/restore.py +91 -0
- navaia/config/__init__.py +0 -0
- navaia/config/env.py +53 -0
- navaia/config/global_config.py +43 -0
- navaia/config/project_config.py +53 -0
- navaia/config/providers.py +234 -0
- navaia/config/settings.py +113 -0
- navaia/context/__init__.py +0 -0
- navaia/context/cache.py +29 -0
- navaia/context/claudemd.py +252 -0
- navaia/context/system_prompt.py +172 -0
- navaia/effort/__init__.py +0 -0
- navaia/effort/effort.py +47 -0
- navaia/hooks/__init__.py +0 -0
- navaia/hooks/executor.py +153 -0
- navaia/hooks/settings.py +82 -0
- navaia/hooks/types.py +53 -0
- navaia/main.py +462 -0
- navaia/mcp/__init__.py +0 -0
- navaia/mcp/bootstrap.py +88 -0
- navaia/mcp/client.py +157 -0
- navaia/mcp/settings.py +80 -0
- navaia/mcp/tools.py +118 -0
- navaia/mcp/types.py +29 -0
- navaia/memory/__init__.py +0 -0
- navaia/memory/memdir.py +70 -0
- navaia/memory/paths.py +17 -0
- navaia/memory/scanner.py +85 -0
- navaia/memory/types.py +27 -0
- navaia/permissions/__init__.py +0 -0
- navaia/permissions/checker.py +147 -0
- navaia/permissions/rules.py +88 -0
- navaia/permissions/types.py +39 -0
- navaia/query/__init__.py +0 -0
- navaia/query/engine.py +477 -0
- navaia/query/types.py +43 -0
- navaia/session/__init__.py +0 -0
- navaia/session/history.py +64 -0
- navaia/session/serialise.py +184 -0
- navaia/session/state.py +20 -0
- navaia/session/storage.py +102 -0
- navaia/session/store.py +202 -0
- navaia/state/__init__.py +0 -0
- navaia/tasks/__init__.py +0 -0
- navaia/tasks/cron.py +113 -0
- navaia/tasks/manager.py +112 -0
- navaia/tasks/persistence.py +128 -0
- navaia/tasks/task.py +34 -0
- navaia/thinking/__init__.py +0 -0
- navaia/thinking/budget.py +42 -0
- navaia/thinking/config.py +55 -0
- navaia/tools/__init__.py +0 -0
- navaia/tools/agent_tool/__init__.py +0 -0
- navaia/tools/agent_tool/tool.py +148 -0
- navaia/tools/ask_user/__init__.py +0 -0
- navaia/tools/ask_user/bus.py +51 -0
- navaia/tools/ask_user/tool.py +64 -0
- navaia/tools/base.py +51 -0
- navaia/tools/bash/__init__.py +0 -0
- navaia/tools/bash/background.py +123 -0
- navaia/tools/bash/tool.py +234 -0
- navaia/tools/executor.py +111 -0
- navaia/tools/file_edit/__init__.py +0 -0
- navaia/tools/file_edit/tool.py +206 -0
- navaia/tools/file_read/__init__.py +0 -0
- navaia/tools/file_read/tool.py +209 -0
- navaia/tools/file_write/__init__.py +0 -0
- navaia/tools/file_write/tool.py +112 -0
- navaia/tools/glob_tool/__init__.py +0 -0
- navaia/tools/glob_tool/tool.py +97 -0
- navaia/tools/grep_tool/__init__.py +0 -0
- navaia/tools/grep_tool/tool.py +292 -0
- navaia/tools/monitor/__init__.py +0 -0
- navaia/tools/monitor/tool.py +101 -0
- navaia/tools/plan_mode/__init__.py +0 -0
- navaia/tools/plan_mode/enter.py +38 -0
- navaia/tools/plan_mode/exit.py +36 -0
- navaia/tools/registry.py +71 -0
- navaia/tools/result_storage.py +60 -0
- navaia/tools/skill_tool/__init__.py +0 -0
- navaia/tools/skill_tool/loader.py +147 -0
- navaia/tools/skill_tool/tool.py +88 -0
- navaia/tools/task_tools/__init__.py +0 -0
- navaia/tools/task_tools/create.py +60 -0
- navaia/tools/task_tools/get.py +52 -0
- navaia/tools/task_tools/list.py +39 -0
- navaia/tools/task_tools/manager.py +66 -0
- navaia/tools/task_tools/update.py +88 -0
- navaia/tools/todo_write/__init__.py +0 -0
- navaia/tools/todo_write/tool.py +121 -0
- navaia/tools/tool_search/__init__.py +0 -0
- navaia/tools/tool_search/tool.py +106 -0
- navaia/tools/web_fetch/__init__.py +0 -0
- navaia/tools/web_fetch/tool.py +88 -0
- navaia/tools/worktree/__init__.py +0 -0
- navaia/tools/worktree/enter.py +66 -0
- navaia/tools/worktree/exit.py +51 -0
- navaia/tools/worktree/manager.py +130 -0
- navaia/ui/__init__.py +0 -0
- navaia/ui/app.py +605 -0
- navaia/ui/bidi.py +70 -0
- navaia/ui/input/__init__.py +0 -0
- navaia/ui/input/history.py +84 -0
- navaia/ui/input/suggestions.py +72 -0
- navaia/ui/messages/__init__.py +0 -0
- navaia/ui/messages/assistant_text.py +46 -0
- navaia/ui/messages/bash_output.py +68 -0
- navaia/ui/messages/system_msg.py +25 -0
- navaia/ui/messages/tool_result.py +38 -0
- navaia/ui/messages/tool_use.py +70 -0
- navaia/ui/messages/user_prompt.py +27 -0
- navaia/ui/screens/__init__.py +0 -0
- navaia/ui/screens/repl.py +136 -0
- navaia/ui/styles/app.tcss +48 -0
- navaia/ui/widgets/__init__.py +0 -0
- navaia/ui/widgets/logo.py +48 -0
- navaia/ui/widgets/markdown_view.py +87 -0
- navaia/ui/widgets/message_list.py +387 -0
- navaia/ui/widgets/permission_prompt.py +137 -0
- navaia/ui/widgets/prompt_footer.py +67 -0
- navaia/ui/widgets/prompt_input.py +203 -0
- navaia/ui/widgets/question_prompt.py +58 -0
- navaia/ui/widgets/spinner.py +110 -0
- navaia/ui/widgets/thinking_view.py +124 -0
- navaia_code-1.0.50.dist-info/METADATA +17 -0
- navaia_code-1.0.50.dist-info/RECORD +146 -0
- navaia_code-1.0.50.dist-info/WHEEL +4 -0
- navaia_code-1.0.50.dist-info/entry_points.txt +2 -0
navaia/__init__.py
ADDED
navaia/api/__init__.py
ADDED
|
File without changes
|
navaia/api/client.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Provider-agnostic API client factory using the OpenAI SDK.
|
|
2
|
+
|
|
3
|
+
Supports: OpenRouter, internal vLLM (ngx.tawakkalna.nic.gov.sa), Ollama,
|
|
4
|
+
Groq, or any OpenAI-compatible endpoint — controlled via provider profiles
|
|
5
|
+
or environment variables.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
from openai import AsyncOpenAI
|
|
14
|
+
|
|
15
|
+
from navaia.config.providers import (
|
|
16
|
+
ProviderProfile,
|
|
17
|
+
resolve_api_key,
|
|
18
|
+
resolve_base_url,
|
|
19
|
+
resolve_model,
|
|
20
|
+
resolve_provider,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def create_client(provider_name: str = "") -> AsyncOpenAI:
|
|
25
|
+
"""Create an AsyncOpenAI client from a provider profile.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
provider_name: Provider to use (e.g. "openrouter", "internal",
|
|
29
|
+
"ollama"). Empty string resolects via NAVAIA_PROVIDER env
|
|
30
|
+
or config files.
|
|
31
|
+
"""
|
|
32
|
+
profile = resolve_provider(provider_name)
|
|
33
|
+
return _build_client(profile)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_model(provider_name: str = "") -> str:
|
|
37
|
+
"""Return the model identifier, resolved from env or provider profile."""
|
|
38
|
+
profile = resolve_provider(provider_name)
|
|
39
|
+
return resolve_model(profile)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_provider_profile(provider_name: str = "") -> ProviderProfile:
|
|
43
|
+
"""Return the resolved provider profile (for retry/streaming config)."""
|
|
44
|
+
return resolve_provider(provider_name)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _build_client(profile: ProviderProfile) -> AsyncOpenAI:
|
|
48
|
+
"""Build the AsyncOpenAI client from a resolved profile."""
|
|
49
|
+
api_key = resolve_api_key(profile)
|
|
50
|
+
base_url = resolve_base_url(profile)
|
|
51
|
+
|
|
52
|
+
# Normalise base URL to end with /v1 — but only for profile defaults,
|
|
53
|
+
# not for explicit user overrides (which may have custom paths)
|
|
54
|
+
is_user_override = bool(
|
|
55
|
+
os.environ.get("OPENAI_BASE_URL") or os.environ.get("ANTHROPIC_BASE_URL")
|
|
56
|
+
)
|
|
57
|
+
if base_url and not is_user_override and not base_url.rstrip("/").endswith("/v1"):
|
|
58
|
+
base_url = base_url.rstrip("/") + "/v1"
|
|
59
|
+
|
|
60
|
+
kwargs: dict = dict(
|
|
61
|
+
api_key=api_key,
|
|
62
|
+
base_url=base_url,
|
|
63
|
+
max_retries=0, # we handle retries ourselves in retry.py
|
|
64
|
+
timeout=profile.timeout,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Only create a custom httpx client when SSL verification is disabled
|
|
68
|
+
# (internal/local endpoints). AsyncOpenAI manages its own client otherwise.
|
|
69
|
+
if not profile.verify_ssl:
|
|
70
|
+
kwargs["http_client"] = httpx.AsyncClient(verify=False)
|
|
71
|
+
|
|
72
|
+
return AsyncOpenAI(**kwargs)
|
navaia/api/normalise.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Translate internal message types to OpenAI API format.
|
|
2
|
+
|
|
3
|
+
Mirrors normalizeMessagesForAPI() from Claude Code:
|
|
4
|
+
- Strips progress, non-local system messages, synthetic API errors
|
|
5
|
+
- Merges consecutive user messages (some providers require this)
|
|
6
|
+
- Converts internal content blocks to OpenAI format
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from navaia.api.types import (
|
|
15
|
+
AssistantMessage,
|
|
16
|
+
CompactBoundaryMessage,
|
|
17
|
+
Message,
|
|
18
|
+
SystemMessage,
|
|
19
|
+
TextBlock,
|
|
20
|
+
ToolResultBlock,
|
|
21
|
+
ToolUseBlock,
|
|
22
|
+
UserMessage,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _convert_message(msg: Message) -> dict[str, Any] | list[dict[str, Any]] | None:
|
|
27
|
+
"""Convert a single internal message to OpenAI format, or None to skip."""
|
|
28
|
+
if isinstance(msg, SystemMessage):
|
|
29
|
+
# Only include local_command system messages
|
|
30
|
+
if msg.subtype == "local_command" and not msg.is_api_error:
|
|
31
|
+
return {"role": "user", "content": f"[System: {msg.text}]"}
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
if isinstance(msg, CompactBoundaryMessage):
|
|
35
|
+
return {
|
|
36
|
+
"role": "user",
|
|
37
|
+
"content": (
|
|
38
|
+
"[Context compacted. Previous conversation summarised below.]\n\n"
|
|
39
|
+
+ msg.summary
|
|
40
|
+
),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if isinstance(msg, UserMessage):
|
|
44
|
+
return _convert_user_message(msg)
|
|
45
|
+
|
|
46
|
+
if isinstance(msg, AssistantMessage):
|
|
47
|
+
return _convert_assistant_message(msg)
|
|
48
|
+
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _convert_user_message(msg: UserMessage) -> dict[str, Any] | list[dict[str, Any]]:
|
|
53
|
+
"""Convert UserMessage to OpenAI format."""
|
|
54
|
+
# Simple string content
|
|
55
|
+
if isinstance(msg.content, str):
|
|
56
|
+
return {"role": "user", "content": msg.content}
|
|
57
|
+
|
|
58
|
+
# Content block list
|
|
59
|
+
parts: list[str] = []
|
|
60
|
+
tool_results: list[dict[str, Any]] = []
|
|
61
|
+
|
|
62
|
+
for block in msg.content:
|
|
63
|
+
if isinstance(block, TextBlock):
|
|
64
|
+
parts.append(block.text)
|
|
65
|
+
elif isinstance(block, ToolResultBlock):
|
|
66
|
+
content_str = (
|
|
67
|
+
block.content
|
|
68
|
+
if isinstance(block.content, str)
|
|
69
|
+
else json.dumps(block.content)
|
|
70
|
+
)
|
|
71
|
+
tool_results.append({
|
|
72
|
+
"role": "tool",
|
|
73
|
+
"tool_call_id": block.tool_use_id,
|
|
74
|
+
"content": content_str,
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
# If there are tool results, return them as separate messages
|
|
78
|
+
if tool_results:
|
|
79
|
+
return tool_results # type: ignore[return-value] # handled in normalise
|
|
80
|
+
|
|
81
|
+
text = "\n".join(parts).strip()
|
|
82
|
+
return {"role": "user", "content": text} if text else {"role": "user", "content": ""}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _convert_assistant_message(msg: AssistantMessage) -> dict[str, Any]:
|
|
86
|
+
"""Convert AssistantMessage to OpenAI format."""
|
|
87
|
+
text_parts: list[str] = []
|
|
88
|
+
tool_calls: list[dict[str, Any]] = []
|
|
89
|
+
|
|
90
|
+
for block in msg.content:
|
|
91
|
+
if isinstance(block, TextBlock):
|
|
92
|
+
text_parts.append(block.text)
|
|
93
|
+
elif isinstance(block, ToolUseBlock):
|
|
94
|
+
tool_calls.append({
|
|
95
|
+
"id": block.id,
|
|
96
|
+
"type": "function",
|
|
97
|
+
"function": {
|
|
98
|
+
"name": block.name,
|
|
99
|
+
"arguments": json.dumps(block.input),
|
|
100
|
+
},
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
result: dict[str, Any] = {"role": "assistant"}
|
|
104
|
+
content = "\n".join(text_parts).strip() or None
|
|
105
|
+
result["content"] = content
|
|
106
|
+
|
|
107
|
+
if tool_calls:
|
|
108
|
+
result["tool_calls"] = tool_calls
|
|
109
|
+
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def normalise_messages_for_api(messages: list[Message]) -> list[dict[str, Any]]:
|
|
114
|
+
"""Convert internal messages to flat OpenAI chat-completions message list."""
|
|
115
|
+
result: list[dict[str, Any]] = []
|
|
116
|
+
|
|
117
|
+
for msg in messages:
|
|
118
|
+
converted = _convert_message(msg)
|
|
119
|
+
if converted is None:
|
|
120
|
+
continue
|
|
121
|
+
# _convert_user_message may return a list of tool-result dicts
|
|
122
|
+
if isinstance(converted, list):
|
|
123
|
+
result.extend(converted)
|
|
124
|
+
else:
|
|
125
|
+
result.append(converted)
|
|
126
|
+
|
|
127
|
+
return _merge_consecutive_user_messages(result)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _merge_consecutive_user_messages(
|
|
131
|
+
messages: list[dict[str, Any]],
|
|
132
|
+
) -> list[dict[str, Any]]:
|
|
133
|
+
"""Merge consecutive messages with the same role (required by some providers)."""
|
|
134
|
+
if not messages:
|
|
135
|
+
return messages
|
|
136
|
+
|
|
137
|
+
merged: list[dict[str, Any]] = [messages[0]]
|
|
138
|
+
|
|
139
|
+
for msg in messages[1:]:
|
|
140
|
+
prev = merged[-1]
|
|
141
|
+
if msg.get("role") == "user" and prev.get("role") == "user":
|
|
142
|
+
prev_content = prev.get("content", "")
|
|
143
|
+
new_content = msg.get("content", "")
|
|
144
|
+
prev["content"] = f"{prev_content}\n\n{new_content}".strip()
|
|
145
|
+
else:
|
|
146
|
+
merged.append(msg)
|
|
147
|
+
|
|
148
|
+
return merged
|
navaia/api/retry.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Retry logic for API calls — mirrors withRetry.ts exactly.
|
|
2
|
+
|
|
3
|
+
Retry conditions:
|
|
4
|
+
- 429 (rate limit): retry with exponential backoff
|
|
5
|
+
- 500-503 (server error): retry
|
|
6
|
+
- 529 (overloaded): max 3 consecutive retries
|
|
7
|
+
- Connection errors (ECONNRESET, timeout): retry
|
|
8
|
+
- 401/403/404: do NOT retry (auth/not found)
|
|
9
|
+
|
|
10
|
+
Numbers from Claude Code source:
|
|
11
|
+
- MAX_RETRIES = 10
|
|
12
|
+
- MAX_529_CONSECUTIVE = 3
|
|
13
|
+
- BASE_DELAY_MS = 500
|
|
14
|
+
- MAX_BACKOFF_MS = 5 minutes
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import logging
|
|
21
|
+
from collections.abc import Awaitable, Callable
|
|
22
|
+
from typing import TypeVar
|
|
23
|
+
|
|
24
|
+
from openai import (
|
|
25
|
+
APIConnectionError,
|
|
26
|
+
APIStatusError,
|
|
27
|
+
APITimeoutError,
|
|
28
|
+
RateLimitError,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
T = TypeVar("T")
|
|
34
|
+
|
|
35
|
+
MAX_RETRIES = 10
|
|
36
|
+
MAX_529_CONSECUTIVE = 3
|
|
37
|
+
BASE_DELAY_MS = 500
|
|
38
|
+
MAX_BACKOFF_MS = 5 * 60 * 1000 # 5 minutes
|
|
39
|
+
|
|
40
|
+
RETRIABLE_STATUS_CODES = {429, 500, 502, 503, 529}
|
|
41
|
+
NON_RETRIABLE_STATUS_CODES = {401, 403, 404}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _is_retriable(error: Exception) -> bool:
|
|
45
|
+
"""Check if an error is retriable."""
|
|
46
|
+
if isinstance(error, (APIConnectionError, APITimeoutError)):
|
|
47
|
+
return True
|
|
48
|
+
if isinstance(error, RateLimitError):
|
|
49
|
+
return True
|
|
50
|
+
if isinstance(error, APIStatusError):
|
|
51
|
+
return error.status_code in RETRIABLE_STATUS_CODES
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _get_status_code(error: Exception) -> int | None:
|
|
56
|
+
if isinstance(error, APIStatusError):
|
|
57
|
+
return error.status_code
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
async def with_retry(
|
|
62
|
+
fn: Callable[[], Awaitable[T]],
|
|
63
|
+
max_retries: int = MAX_RETRIES,
|
|
64
|
+
on_retry: Callable[[int, Exception, float], None] | None = None,
|
|
65
|
+
) -> T:
|
|
66
|
+
"""Execute an async function with exponential backoff retry.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
fn: Async function to execute
|
|
70
|
+
max_retries: Maximum number of retry attempts
|
|
71
|
+
on_retry: Optional callback(attempt, error, delay_seconds) for UI updates
|
|
72
|
+
"""
|
|
73
|
+
consecutive_529 = 0
|
|
74
|
+
|
|
75
|
+
for attempt in range(max_retries + 1):
|
|
76
|
+
try:
|
|
77
|
+
result = await fn()
|
|
78
|
+
return result
|
|
79
|
+
except Exception as e:
|
|
80
|
+
if not _is_retriable(e):
|
|
81
|
+
raise
|
|
82
|
+
|
|
83
|
+
status = _get_status_code(e)
|
|
84
|
+
|
|
85
|
+
# Track 529 consecutive count
|
|
86
|
+
if status == 529:
|
|
87
|
+
consecutive_529 += 1
|
|
88
|
+
if consecutive_529 > MAX_529_CONSECUTIVE:
|
|
89
|
+
raise
|
|
90
|
+
else:
|
|
91
|
+
consecutive_529 = 0
|
|
92
|
+
|
|
93
|
+
if attempt >= max_retries:
|
|
94
|
+
raise
|
|
95
|
+
|
|
96
|
+
# Exponential backoff: 500ms, 1s, 2s, 4s, ... capped at 5min
|
|
97
|
+
delay_ms = min(
|
|
98
|
+
BASE_DELAY_MS * (2 ** attempt),
|
|
99
|
+
MAX_BACKOFF_MS,
|
|
100
|
+
)
|
|
101
|
+
delay_s = delay_ms / 1000.0
|
|
102
|
+
|
|
103
|
+
logger.info(
|
|
104
|
+
"Retry %d/%d after %s (%.1fs delay)",
|
|
105
|
+
attempt + 1, max_retries, type(e).__name__, delay_s,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if on_retry:
|
|
109
|
+
on_retry(attempt + 1, e, delay_s)
|
|
110
|
+
|
|
111
|
+
await asyncio.sleep(delay_s)
|
|
112
|
+
|
|
113
|
+
# Should not reach here, but type safety
|
|
114
|
+
raise RuntimeError("Exhausted retries")
|
navaia/api/streaming.py
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
"""Streaming API calls — message assembly and delta-by-delta yielding.
|
|
2
|
+
|
|
3
|
+
Mirrors src/services/api/claude.ts: stream_message() is the core function
|
|
4
|
+
that yields StreamEvents as they arrive from the provider.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
from collections.abc import AsyncIterator
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from openai import (
|
|
15
|
+
APIConnectionError,
|
|
16
|
+
APIStatusError,
|
|
17
|
+
APITimeoutError,
|
|
18
|
+
AsyncOpenAI,
|
|
19
|
+
RateLimitError,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from navaia.api.normalise import normalise_messages_for_api
|
|
23
|
+
from navaia.api.retry import (
|
|
24
|
+
MAX_529_CONSECUTIVE,
|
|
25
|
+
NON_RETRIABLE_STATUS_CODES,
|
|
26
|
+
RETRIABLE_STATUS_CODES,
|
|
27
|
+
)
|
|
28
|
+
from navaia.api.types import (
|
|
29
|
+
FinishEvent,
|
|
30
|
+
Message,
|
|
31
|
+
RetryEvent,
|
|
32
|
+
StreamEvent,
|
|
33
|
+
TextDeltaEvent,
|
|
34
|
+
ThinkingDeltaEvent,
|
|
35
|
+
ToolCallDeltaEvent,
|
|
36
|
+
ToolCallAccumulator,
|
|
37
|
+
ToolUseBlock,
|
|
38
|
+
Usage,
|
|
39
|
+
UsageEvent,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
STREAM_MAX_RETRIES = 5
|
|
45
|
+
STREAM_BASE_DELAY_S = 1.0
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _thinking_extra_body(model: str, thinking_config) -> dict:
|
|
49
|
+
"""Return provider-specific extra_body for extended thinking.
|
|
50
|
+
|
|
51
|
+
Different providers expose the thinking/reasoning budget under
|
|
52
|
+
different parameter names — we detect based on the model id.
|
|
53
|
+
|
|
54
|
+
- Anthropic models (claude-*): ``thinking: {type, budget_tokens}``
|
|
55
|
+
- OpenAI reasoning models (o1, o3, o4-mini, gpt-5): ``reasoning_effort``
|
|
56
|
+
- OpenRouter passthrough: ``reasoning: {max_tokens}``
|
|
57
|
+
- Others: inline ``<think>`` handling in the stream parser (no extra body needed)
|
|
58
|
+
"""
|
|
59
|
+
if thinking_config is None:
|
|
60
|
+
return {}
|
|
61
|
+
|
|
62
|
+
# Import locally to avoid circular imports and optional dependency cost
|
|
63
|
+
from navaia.thinking.config import ThinkingMode
|
|
64
|
+
|
|
65
|
+
if thinking_config.mode == ThinkingMode.DISABLED:
|
|
66
|
+
return {}
|
|
67
|
+
|
|
68
|
+
budget = max(1024, int(thinking_config.budget_tokens))
|
|
69
|
+
model_lc = model.lower()
|
|
70
|
+
|
|
71
|
+
if "claude" in model_lc and ("opus" in model_lc or "sonnet" in model_lc or "haiku" in model_lc):
|
|
72
|
+
return {"thinking": {"type": "enabled", "budget_tokens": budget}}
|
|
73
|
+
|
|
74
|
+
if model_lc.startswith(("o1", "o3", "o4", "gpt-5")):
|
|
75
|
+
# Translate budget to an effort bucket the reasoning API understands
|
|
76
|
+
if budget >= 20000:
|
|
77
|
+
effort = "high"
|
|
78
|
+
elif budget >= 5000:
|
|
79
|
+
effort = "medium"
|
|
80
|
+
else:
|
|
81
|
+
effort = "low"
|
|
82
|
+
return {"reasoning_effort": effort}
|
|
83
|
+
|
|
84
|
+
if "/" in model_lc: # OpenRouter-style slug (e.g. "anthropic/claude-opus-4")
|
|
85
|
+
return {"reasoning": {"max_tokens": budget}}
|
|
86
|
+
|
|
87
|
+
# deepseek-r1, qwq, and similar already stream <think> tags inline
|
|
88
|
+
return {}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _tools_to_openai_schema(tools: list) -> list[dict[str, Any]] | None:
|
|
92
|
+
"""Convert internal tool definitions to OpenAI function-calling schema."""
|
|
93
|
+
if not tools:
|
|
94
|
+
return None
|
|
95
|
+
schemas = []
|
|
96
|
+
for tool in tools:
|
|
97
|
+
schema = tool.to_openai_schema()
|
|
98
|
+
if schema:
|
|
99
|
+
schemas.append(schema)
|
|
100
|
+
return schemas or None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _is_retriable_error(error: Exception) -> bool:
|
|
104
|
+
"""Check whether an API error should trigger a retry."""
|
|
105
|
+
if isinstance(error, (APIConnectionError, APITimeoutError, RateLimitError)):
|
|
106
|
+
return True
|
|
107
|
+
if isinstance(error, APIStatusError):
|
|
108
|
+
return error.status_code in RETRIABLE_STATUS_CODES
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _is_non_retriable_error(error: Exception) -> bool:
|
|
113
|
+
"""Check whether an API error should be raised immediately."""
|
|
114
|
+
if isinstance(error, APIStatusError):
|
|
115
|
+
return error.status_code in NON_RETRIABLE_STATUS_CODES
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _error_status(error: Exception) -> int | None:
|
|
120
|
+
"""Extract HTTP status code from an API error, if present."""
|
|
121
|
+
if isinstance(error, APIStatusError):
|
|
122
|
+
return error.status_code
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async def stream_message(
|
|
127
|
+
client: AsyncOpenAI,
|
|
128
|
+
model: str,
|
|
129
|
+
messages: list[Message],
|
|
130
|
+
system_prompt: str,
|
|
131
|
+
tools: list | None = None,
|
|
132
|
+
temperature: float = 0,
|
|
133
|
+
max_tokens: int = 16384,
|
|
134
|
+
thinking_config: Any = None,
|
|
135
|
+
) -> AsyncIterator[StreamEvent]:
|
|
136
|
+
"""Stream a chat completion, yielding events as they arrive.
|
|
137
|
+
|
|
138
|
+
Handles:
|
|
139
|
+
- Text deltas (character by character)
|
|
140
|
+
- Tool call deltas (incremental id/name/arguments)
|
|
141
|
+
- <think> tag extraction for models that support thinking (deepseek-r1, etc.)
|
|
142
|
+
- Finish events with stop reason
|
|
143
|
+
- Retry with exponential backoff on retriable errors (429, 500, 502, 503, 529)
|
|
144
|
+
- Mid-stream disconnection recovery (flushes accumulated text)
|
|
145
|
+
"""
|
|
146
|
+
api_messages = normalise_messages_for_api(messages)
|
|
147
|
+
|
|
148
|
+
# Prepend system prompt
|
|
149
|
+
api_messages = [{"role": "system", "content": system_prompt}] + api_messages
|
|
150
|
+
|
|
151
|
+
api_tools = _tools_to_openai_schema(tools or [])
|
|
152
|
+
|
|
153
|
+
kwargs: dict[str, Any] = {
|
|
154
|
+
"model": model,
|
|
155
|
+
"messages": api_messages,
|
|
156
|
+
"stream": True,
|
|
157
|
+
"temperature": temperature,
|
|
158
|
+
"max_tokens": max_tokens,
|
|
159
|
+
}
|
|
160
|
+
if api_tools:
|
|
161
|
+
kwargs["tools"] = api_tools
|
|
162
|
+
|
|
163
|
+
# Extended thinking / reasoning — only sent when the model supports it
|
|
164
|
+
extra = _thinking_extra_body(model, thinking_config)
|
|
165
|
+
if extra:
|
|
166
|
+
kwargs["extra_body"] = extra
|
|
167
|
+
|
|
168
|
+
# --- Retry loop around stream creation ---
|
|
169
|
+
stream = None
|
|
170
|
+
consecutive_529 = 0
|
|
171
|
+
|
|
172
|
+
for attempt in range(STREAM_MAX_RETRIES + 1):
|
|
173
|
+
try:
|
|
174
|
+
stream = await client.chat.completions.create(**kwargs)
|
|
175
|
+
break
|
|
176
|
+
except Exception as e:
|
|
177
|
+
# Non-retriable errors (401, 403, 404) — raise immediately
|
|
178
|
+
if _is_non_retriable_error(e):
|
|
179
|
+
raise
|
|
180
|
+
|
|
181
|
+
if not _is_retriable_error(e):
|
|
182
|
+
raise
|
|
183
|
+
|
|
184
|
+
# Track consecutive 529s
|
|
185
|
+
status = _error_status(e)
|
|
186
|
+
if status == 529:
|
|
187
|
+
consecutive_529 += 1
|
|
188
|
+
if consecutive_529 > MAX_529_CONSECUTIVE:
|
|
189
|
+
raise
|
|
190
|
+
else:
|
|
191
|
+
consecutive_529 = 0
|
|
192
|
+
|
|
193
|
+
if attempt >= STREAM_MAX_RETRIES:
|
|
194
|
+
raise
|
|
195
|
+
|
|
196
|
+
delay_s = STREAM_BASE_DELAY_S * (2 ** attempt)
|
|
197
|
+
|
|
198
|
+
logger.warning(
|
|
199
|
+
"Stream creation failed (attempt %d/%d, %s). "
|
|
200
|
+
"Retrying in %.1fs...",
|
|
201
|
+
attempt + 1,
|
|
202
|
+
STREAM_MAX_RETRIES,
|
|
203
|
+
type(e).__name__,
|
|
204
|
+
delay_s,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
yield RetryEvent(
|
|
208
|
+
attempt=attempt + 1,
|
|
209
|
+
max_retries=STREAM_MAX_RETRIES,
|
|
210
|
+
delay_seconds=delay_s,
|
|
211
|
+
error_message=str(e),
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
await asyncio.sleep(delay_s)
|
|
215
|
+
|
|
216
|
+
if stream is None:
|
|
217
|
+
raise RuntimeError("Failed to create stream after retries")
|
|
218
|
+
|
|
219
|
+
# --- Consume the stream, handling mid-stream disconnections ---
|
|
220
|
+
usage = Usage()
|
|
221
|
+
in_thinking = False
|
|
222
|
+
thinking_buffer = ""
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
async for chunk in stream:
|
|
226
|
+
# Extract usage if present
|
|
227
|
+
if hasattr(chunk, "usage") and chunk.usage:
|
|
228
|
+
usage = Usage(
|
|
229
|
+
input_tokens=getattr(chunk.usage, "prompt_tokens", 0) or 0,
|
|
230
|
+
output_tokens=getattr(chunk.usage, "completion_tokens", 0) or 0,
|
|
231
|
+
total_tokens=getattr(chunk.usage, "total_tokens", 0) or 0,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if not chunk.choices:
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
choice = chunk.choices[0]
|
|
238
|
+
delta = choice.delta
|
|
239
|
+
|
|
240
|
+
# --- Text deltas ---
|
|
241
|
+
if delta and delta.content:
|
|
242
|
+
text = delta.content
|
|
243
|
+
|
|
244
|
+
# Handle <think> tags for models that output thinking inline
|
|
245
|
+
# (deepseek-r1, QwQ, etc.)
|
|
246
|
+
if "<think>" in text:
|
|
247
|
+
in_thinking = True
|
|
248
|
+
# Split: part before <think> is text, rest is thinking
|
|
249
|
+
before, _, after = text.partition("<think>")
|
|
250
|
+
if before:
|
|
251
|
+
yield TextDeltaEvent(text=before)
|
|
252
|
+
thinking_buffer += after
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
if in_thinking:
|
|
256
|
+
if "</think>" in text:
|
|
257
|
+
before, _, after = text.partition("</think>")
|
|
258
|
+
thinking_buffer += before
|
|
259
|
+
yield ThinkingDeltaEvent(text=thinking_buffer)
|
|
260
|
+
thinking_buffer = ""
|
|
261
|
+
in_thinking = False
|
|
262
|
+
if after:
|
|
263
|
+
yield TextDeltaEvent(text=after)
|
|
264
|
+
else:
|
|
265
|
+
thinking_buffer += text
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
yield TextDeltaEvent(text=text)
|
|
269
|
+
|
|
270
|
+
# --- Tool call deltas ---
|
|
271
|
+
if delta and delta.tool_calls:
|
|
272
|
+
for tc_delta in delta.tool_calls:
|
|
273
|
+
yield ToolCallDeltaEvent(
|
|
274
|
+
index=tc_delta.index,
|
|
275
|
+
id=tc_delta.id or None,
|
|
276
|
+
name=getattr(tc_delta.function, "name", None) if tc_delta.function else None,
|
|
277
|
+
arguments_delta=(
|
|
278
|
+
tc_delta.function.arguments
|
|
279
|
+
if tc_delta.function and tc_delta.function.arguments
|
|
280
|
+
else ""
|
|
281
|
+
),
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# --- Finish ---
|
|
285
|
+
if choice.finish_reason:
|
|
286
|
+
# Flush any remaining thinking buffer
|
|
287
|
+
if thinking_buffer:
|
|
288
|
+
yield ThinkingDeltaEvent(text=thinking_buffer)
|
|
289
|
+
thinking_buffer = ""
|
|
290
|
+
|
|
291
|
+
yield FinishEvent(reason=choice.finish_reason)
|
|
292
|
+
|
|
293
|
+
if usage.input_tokens or usage.output_tokens:
|
|
294
|
+
yield UsageEvent(usage=usage)
|
|
295
|
+
|
|
296
|
+
except (APIConnectionError, APITimeoutError) as e:
|
|
297
|
+
# Mid-stream disconnection: flush buffered content so the UI
|
|
298
|
+
# keeps whatever text was already received
|
|
299
|
+
logger.warning(
|
|
300
|
+
"Stream disconnected mid-response (%s). "
|
|
301
|
+
"Flushing accumulated text.",
|
|
302
|
+
type(e).__name__,
|
|
303
|
+
)
|
|
304
|
+
if thinking_buffer:
|
|
305
|
+
yield ThinkingDeltaEvent(text=thinking_buffer)
|
|
306
|
+
|
|
307
|
+
yield FinishEvent(reason="error")
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def accumulate_tool_calls(
|
|
311
|
+
accumulators: dict[int, ToolCallAccumulator],
|
|
312
|
+
event: ToolCallDeltaEvent,
|
|
313
|
+
) -> dict[int, ToolCallAccumulator]:
|
|
314
|
+
"""Accumulate incremental tool call deltas into complete ToolUseBlocks.
|
|
315
|
+
|
|
316
|
+
Tool calls arrive in pieces during streaming:
|
|
317
|
+
- First delta has `id` and `name`
|
|
318
|
+
- Subsequent deltas append to `arguments`
|
|
319
|
+
"""
|
|
320
|
+
idx = event.index
|
|
321
|
+
if idx not in accumulators:
|
|
322
|
+
accumulators[idx] = ToolCallAccumulator()
|
|
323
|
+
|
|
324
|
+
acc = accumulators[idx]
|
|
325
|
+
if event.id:
|
|
326
|
+
acc.id = event.id
|
|
327
|
+
if event.name:
|
|
328
|
+
acc.name = event.name
|
|
329
|
+
acc.args_str += event.arguments_delta
|
|
330
|
+
|
|
331
|
+
return accumulators
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def finalise_tool_calls(
|
|
335
|
+
accumulators: dict[int, ToolCallAccumulator],
|
|
336
|
+
) -> list[ToolUseBlock]:
|
|
337
|
+
"""Convert accumulated tool call fragments to final ToolUseBlocks."""
|
|
338
|
+
return [
|
|
339
|
+
acc.to_tool_use_block()
|
|
340
|
+
for _, acc in sorted(accumulators.items())
|
|
341
|
+
]
|