llmcode-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_code/__init__.py +2 -0
- llm_code/analysis/__init__.py +6 -0
- llm_code/analysis/cache.py +33 -0
- llm_code/analysis/engine.py +256 -0
- llm_code/analysis/go_rules.py +114 -0
- llm_code/analysis/js_rules.py +84 -0
- llm_code/analysis/python_rules.py +311 -0
- llm_code/analysis/rules.py +140 -0
- llm_code/analysis/rust_rules.py +108 -0
- llm_code/analysis/universal_rules.py +111 -0
- llm_code/api/__init__.py +0 -0
- llm_code/api/client.py +90 -0
- llm_code/api/errors.py +73 -0
- llm_code/api/openai_compat.py +390 -0
- llm_code/api/provider.py +35 -0
- llm_code/api/sse.py +52 -0
- llm_code/api/types.py +140 -0
- llm_code/cli/__init__.py +0 -0
- llm_code/cli/commands.py +70 -0
- llm_code/cli/image.py +122 -0
- llm_code/cli/render.py +214 -0
- llm_code/cli/status_line.py +79 -0
- llm_code/cli/streaming.py +92 -0
- llm_code/cli/tui_main.py +220 -0
- llm_code/computer_use/__init__.py +11 -0
- llm_code/computer_use/app_detect.py +49 -0
- llm_code/computer_use/app_tier.py +57 -0
- llm_code/computer_use/coordinator.py +99 -0
- llm_code/computer_use/input_control.py +71 -0
- llm_code/computer_use/screenshot.py +93 -0
- llm_code/cron/__init__.py +13 -0
- llm_code/cron/parser.py +145 -0
- llm_code/cron/scheduler.py +135 -0
- llm_code/cron/storage.py +126 -0
- llm_code/enterprise/__init__.py +1 -0
- llm_code/enterprise/audit.py +59 -0
- llm_code/enterprise/auth.py +26 -0
- llm_code/enterprise/oidc.py +95 -0
- llm_code/enterprise/rbac.py +65 -0
- llm_code/harness/__init__.py +5 -0
- llm_code/harness/config.py +33 -0
- llm_code/harness/engine.py +129 -0
- llm_code/harness/guides.py +41 -0
- llm_code/harness/sensors.py +68 -0
- llm_code/harness/templates.py +84 -0
- llm_code/hida/__init__.py +1 -0
- llm_code/hida/classifier.py +187 -0
- llm_code/hida/engine.py +49 -0
- llm_code/hida/profiles.py +95 -0
- llm_code/hida/types.py +28 -0
- llm_code/ide/__init__.py +1 -0
- llm_code/ide/bridge.py +80 -0
- llm_code/ide/detector.py +76 -0
- llm_code/ide/server.py +169 -0
- llm_code/logging.py +29 -0
- llm_code/lsp/__init__.py +0 -0
- llm_code/lsp/client.py +298 -0
- llm_code/lsp/detector.py +42 -0
- llm_code/lsp/manager.py +56 -0
- llm_code/lsp/tools.py +288 -0
- llm_code/marketplace/__init__.py +0 -0
- llm_code/marketplace/builtin_registry.py +102 -0
- llm_code/marketplace/installer.py +162 -0
- llm_code/marketplace/plugin.py +78 -0
- llm_code/marketplace/registry.py +360 -0
- llm_code/mcp/__init__.py +0 -0
- llm_code/mcp/bridge.py +87 -0
- llm_code/mcp/client.py +117 -0
- llm_code/mcp/health.py +120 -0
- llm_code/mcp/manager.py +214 -0
- llm_code/mcp/oauth.py +219 -0
- llm_code/mcp/transport.py +254 -0
- llm_code/mcp/types.py +53 -0
- llm_code/remote/__init__.py +0 -0
- llm_code/remote/client.py +136 -0
- llm_code/remote/protocol.py +22 -0
- llm_code/remote/server.py +275 -0
- llm_code/remote/ssh_proxy.py +56 -0
- llm_code/runtime/__init__.py +0 -0
- llm_code/runtime/auto_commit.py +56 -0
- llm_code/runtime/auto_diagnose.py +62 -0
- llm_code/runtime/checkpoint.py +70 -0
- llm_code/runtime/checkpoint_recovery.py +142 -0
- llm_code/runtime/compaction.py +35 -0
- llm_code/runtime/compressor.py +415 -0
- llm_code/runtime/config.py +533 -0
- llm_code/runtime/context.py +49 -0
- llm_code/runtime/conversation.py +921 -0
- llm_code/runtime/cost_tracker.py +126 -0
- llm_code/runtime/dream.py +127 -0
- llm_code/runtime/file_protection.py +150 -0
- llm_code/runtime/hardware.py +85 -0
- llm_code/runtime/hooks.py +223 -0
- llm_code/runtime/indexer.py +230 -0
- llm_code/runtime/knowledge_compiler.py +232 -0
- llm_code/runtime/memory.py +132 -0
- llm_code/runtime/memory_layers.py +467 -0
- llm_code/runtime/memory_lint.py +252 -0
- llm_code/runtime/model_aliases.py +37 -0
- llm_code/runtime/ollama.py +93 -0
- llm_code/runtime/overlay.py +124 -0
- llm_code/runtime/permissions.py +200 -0
- llm_code/runtime/plan.py +45 -0
- llm_code/runtime/prompt.py +238 -0
- llm_code/runtime/repo_map.py +174 -0
- llm_code/runtime/sandbox.py +116 -0
- llm_code/runtime/session.py +268 -0
- llm_code/runtime/skill_resolver.py +61 -0
- llm_code/runtime/skills.py +133 -0
- llm_code/runtime/speculative.py +75 -0
- llm_code/runtime/streaming_executor.py +216 -0
- llm_code/runtime/telemetry.py +196 -0
- llm_code/runtime/token_budget.py +26 -0
- llm_code/runtime/vcr.py +142 -0
- llm_code/runtime/vision.py +102 -0
- llm_code/swarm/__init__.py +1 -0
- llm_code/swarm/backend_subprocess.py +108 -0
- llm_code/swarm/backend_tmux.py +103 -0
- llm_code/swarm/backend_worktree.py +306 -0
- llm_code/swarm/checkpoint.py +74 -0
- llm_code/swarm/coordinator.py +236 -0
- llm_code/swarm/mailbox.py +88 -0
- llm_code/swarm/manager.py +202 -0
- llm_code/swarm/memory_sync.py +80 -0
- llm_code/swarm/recovery.py +21 -0
- llm_code/swarm/team.py +67 -0
- llm_code/swarm/types.py +31 -0
- llm_code/task/__init__.py +16 -0
- llm_code/task/diagnostics.py +93 -0
- llm_code/task/manager.py +162 -0
- llm_code/task/types.py +112 -0
- llm_code/task/verifier.py +104 -0
- llm_code/tools/__init__.py +0 -0
- llm_code/tools/agent.py +145 -0
- llm_code/tools/agent_roles.py +82 -0
- llm_code/tools/base.py +94 -0
- llm_code/tools/bash.py +565 -0
- llm_code/tools/computer_use_tools.py +278 -0
- llm_code/tools/coordinator_tool.py +75 -0
- llm_code/tools/cron_create.py +90 -0
- llm_code/tools/cron_delete.py +49 -0
- llm_code/tools/cron_list.py +51 -0
- llm_code/tools/deferred.py +92 -0
- llm_code/tools/dump.py +116 -0
- llm_code/tools/edit_file.py +282 -0
- llm_code/tools/git_tools.py +531 -0
- llm_code/tools/glob_search.py +112 -0
- llm_code/tools/grep_search.py +144 -0
- llm_code/tools/ide_diagnostics.py +59 -0
- llm_code/tools/ide_open.py +58 -0
- llm_code/tools/ide_selection.py +52 -0
- llm_code/tools/memory_tools.py +138 -0
- llm_code/tools/multi_edit.py +143 -0
- llm_code/tools/notebook_edit.py +107 -0
- llm_code/tools/notebook_read.py +81 -0
- llm_code/tools/parsing.py +63 -0
- llm_code/tools/read_file.py +154 -0
- llm_code/tools/registry.py +58 -0
- llm_code/tools/search_backends/__init__.py +56 -0
- llm_code/tools/search_backends/brave.py +56 -0
- llm_code/tools/search_backends/duckduckgo.py +129 -0
- llm_code/tools/search_backends/searxng.py +71 -0
- llm_code/tools/search_backends/tavily.py +73 -0
- llm_code/tools/swarm_create.py +109 -0
- llm_code/tools/swarm_delete.py +95 -0
- llm_code/tools/swarm_list.py +44 -0
- llm_code/tools/swarm_message.py +109 -0
- llm_code/tools/task_close.py +79 -0
- llm_code/tools/task_plan.py +79 -0
- llm_code/tools/task_verify.py +90 -0
- llm_code/tools/tool_search.py +65 -0
- llm_code/tools/web_common.py +258 -0
- llm_code/tools/web_fetch.py +223 -0
- llm_code/tools/web_search.py +280 -0
- llm_code/tools/write_file.py +118 -0
- llm_code/tui/__init__.py +1 -0
- llm_code/tui/app.py +2432 -0
- llm_code/tui/chat_view.py +82 -0
- llm_code/tui/chat_widgets.py +309 -0
- llm_code/tui/header_bar.py +46 -0
- llm_code/tui/input_bar.py +349 -0
- llm_code/tui/keybindings.py +142 -0
- llm_code/tui/marketplace.py +210 -0
- llm_code/tui/status_bar.py +72 -0
- llm_code/tui/theme.py +96 -0
- llm_code/utils/__init__.py +0 -0
- llm_code/utils/diff.py +111 -0
- llm_code/utils/errors.py +70 -0
- llm_code/utils/hyperlink.py +73 -0
- llm_code/utils/notebook.py +179 -0
- llm_code/utils/search.py +69 -0
- llm_code/utils/text_normalize.py +28 -0
- llm_code/utils/version_check.py +62 -0
- llm_code/vim/__init__.py +4 -0
- llm_code/vim/engine.py +51 -0
- llm_code/vim/motions.py +172 -0
- llm_code/vim/operators.py +183 -0
- llm_code/vim/text_objects.py +139 -0
- llm_code/vim/transitions.py +279 -0
- llm_code/vim/types.py +68 -0
- llm_code/voice/__init__.py +1 -0
- llm_code/voice/languages.py +43 -0
- llm_code/voice/recorder.py +136 -0
- llm_code/voice/stt.py +36 -0
- llm_code/voice/stt_anthropic.py +66 -0
- llm_code/voice/stt_google.py +32 -0
- llm_code/voice/stt_whisper.py +52 -0
- llmcode_cli-1.0.0.dist-info/METADATA +524 -0
- llmcode_cli-1.0.0.dist-info/RECORD +212 -0
- llmcode_cli-1.0.0.dist-info/WHEEL +4 -0
- llmcode_cli-1.0.0.dist-info/entry_points.txt +2 -0
- llmcode_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
llm_code/api/client.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Provider client factory — routes model names to the correct provider."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from llm_code.api.provider import LLMProvider
|
|
5
|
+
from llm_code.runtime.model_aliases import resolve_model
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ProviderClient:
|
|
9
|
+
"""Factory for creating LLMProvider instances based on model name."""
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def from_model(
|
|
13
|
+
model: str,
|
|
14
|
+
base_url: str = "",
|
|
15
|
+
api_key: str = "",
|
|
16
|
+
timeout: float = 120.0,
|
|
17
|
+
max_retries: int = 2,
|
|
18
|
+
native_tools: bool = True,
|
|
19
|
+
custom_aliases: dict[str, str] | None = None,
|
|
20
|
+
) -> LLMProvider:
|
|
21
|
+
"""Return the appropriate LLMProvider for the given model name.
|
|
22
|
+
|
|
23
|
+
Routing rules:
|
|
24
|
+
- Models starting with "claude-" → AnthropicProvider (requires
|
|
25
|
+
the ``anthropic`` SDK to be installed).
|
|
26
|
+
- Everything else → OpenAICompatProvider.
|
|
27
|
+
"""
|
|
28
|
+
model = resolve_model(model, custom_aliases)
|
|
29
|
+
if model.startswith("claude-"):
|
|
30
|
+
return ProviderClient._make_anthropic(
|
|
31
|
+
model=model,
|
|
32
|
+
api_key=api_key,
|
|
33
|
+
timeout=timeout,
|
|
34
|
+
max_retries=max_retries,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
return ProviderClient._make_openai_compat(
|
|
38
|
+
model=model,
|
|
39
|
+
base_url=base_url,
|
|
40
|
+
api_key=api_key,
|
|
41
|
+
timeout=timeout,
|
|
42
|
+
max_retries=max_retries,
|
|
43
|
+
native_tools=native_tools,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# ------------------------------------------------------------------
|
|
47
|
+
# Private factory helpers
|
|
48
|
+
# ------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def _make_openai_compat(
|
|
52
|
+
model: str,
|
|
53
|
+
base_url: str,
|
|
54
|
+
api_key: str,
|
|
55
|
+
timeout: float,
|
|
56
|
+
max_retries: int,
|
|
57
|
+
native_tools: bool,
|
|
58
|
+
) -> LLMProvider:
|
|
59
|
+
from llm_code.api.openai_compat import OpenAICompatProvider
|
|
60
|
+
|
|
61
|
+
return OpenAICompatProvider(
|
|
62
|
+
base_url=base_url,
|
|
63
|
+
api_key=api_key,
|
|
64
|
+
model_name=model,
|
|
65
|
+
timeout=timeout,
|
|
66
|
+
max_retries=max_retries,
|
|
67
|
+
native_tools=native_tools,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def _make_anthropic(
|
|
72
|
+
model: str,
|
|
73
|
+
api_key: str,
|
|
74
|
+
timeout: float,
|
|
75
|
+
max_retries: int,
|
|
76
|
+
) -> LLMProvider:
|
|
77
|
+
try:
|
|
78
|
+
from llm_code.api.anthropic_provider import AnthropicProvider # type: ignore[import]
|
|
79
|
+
except ImportError:
|
|
80
|
+
raise ImportError(
|
|
81
|
+
"The 'anthropic' SDK is required to use Claude models. "
|
|
82
|
+
"Install it with: pip install anthropic"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return AnthropicProvider(
|
|
86
|
+
api_key=api_key,
|
|
87
|
+
model_name=model,
|
|
88
|
+
timeout=timeout,
|
|
89
|
+
max_retries=max_retries,
|
|
90
|
+
)
|
llm_code/api/errors.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Exception hierarchy for the llm-code API layer."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LLMCodeError(Exception):
|
|
6
|
+
"""Base exception for all llm-code errors."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ProviderError(LLMCodeError):
|
|
10
|
+
"""Error returned by or related to an LLM provider."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, message: str, *, is_retryable: bool = False) -> None:
|
|
13
|
+
super().__init__(message)
|
|
14
|
+
self.is_retryable = is_retryable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ProviderConnectionError(ProviderError):
|
|
18
|
+
"""Network-level failure connecting to the provider (retryable)."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, message: str) -> None:
|
|
21
|
+
super().__init__(message, is_retryable=True)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ProviderAuthError(ProviderError):
|
|
25
|
+
"""Authentication / authorisation failure (not retryable)."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, message: str) -> None:
|
|
28
|
+
super().__init__(message, is_retryable=False)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ProviderRateLimitError(ProviderError):
|
|
32
|
+
"""Provider rate-limit exceeded (retryable)."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, message: str) -> None:
|
|
35
|
+
super().__init__(message, is_retryable=True)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ProviderModelNotFoundError(ProviderError):
|
|
39
|
+
"""Requested model does not exist on the provider (not retryable)."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, message: str) -> None:
|
|
42
|
+
super().__init__(message, is_retryable=False)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ProviderOverloadError(ProviderError):
|
|
46
|
+
"""Provider is overloaded (HTTP 529); retryable with long backoff."""
|
|
47
|
+
|
|
48
|
+
def __init__(self, message: str) -> None:
|
|
49
|
+
super().__init__(message, is_retryable=True)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ToolError(LLMCodeError):
|
|
53
|
+
"""Base exception for tool-related errors."""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ToolNotFoundError(ToolError):
|
|
57
|
+
"""A tool referenced by name does not exist in the registry."""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ToolPermissionDenied(ToolError):
|
|
61
|
+
"""The tool is not permitted under the current permission policy."""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ToolExecutionError(ToolError):
|
|
65
|
+
"""A tool raised an error during execution."""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ConfigError(LLMCodeError):
|
|
69
|
+
"""Invalid or missing configuration."""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class SessionError(LLMCodeError):
|
|
73
|
+
"""Error related to conversation session state."""
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"""OpenAI-compatible provider implementation."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import json
|
|
6
|
+
from typing import AsyncIterator
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from llm_code.api.errors import (
|
|
11
|
+
ProviderAuthError,
|
|
12
|
+
ProviderConnectionError,
|
|
13
|
+
ProviderModelNotFoundError,
|
|
14
|
+
ProviderOverloadError,
|
|
15
|
+
ProviderRateLimitError,
|
|
16
|
+
)
|
|
17
|
+
from llm_code.api.provider import LLMProvider
|
|
18
|
+
from llm_code.api.sse import parse_sse_events
|
|
19
|
+
from llm_code.api.types import (
|
|
20
|
+
ContentBlock,
|
|
21
|
+
ImageBlock,
|
|
22
|
+
Message,
|
|
23
|
+
MessageRequest,
|
|
24
|
+
MessageResponse,
|
|
25
|
+
StreamEvent,
|
|
26
|
+
StreamMessageStop,
|
|
27
|
+
StreamTextDelta,
|
|
28
|
+
StreamToolUseInputDelta,
|
|
29
|
+
StreamToolUseStart,
|
|
30
|
+
TextBlock,
|
|
31
|
+
TokenUsage,
|
|
32
|
+
ToolDefinition,
|
|
33
|
+
ToolResultBlock,
|
|
34
|
+
ToolUseBlock,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class OpenAICompatProvider(LLMProvider):
|
|
39
|
+
"""Provider adapter for OpenAI-compatible APIs (Ollama, vLLM, LM Studio, etc.)."""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
base_url: str,
|
|
44
|
+
api_key: str = "",
|
|
45
|
+
model_name: str = "",
|
|
46
|
+
max_retries: int = 2,
|
|
47
|
+
timeout: float = 120.0,
|
|
48
|
+
native_tools: bool = True,
|
|
49
|
+
) -> None:
|
|
50
|
+
self._base_url = base_url.rstrip("/")
|
|
51
|
+
self._api_key = api_key
|
|
52
|
+
self._model_name = model_name
|
|
53
|
+
self._max_retries = max_retries
|
|
54
|
+
self._timeout = timeout
|
|
55
|
+
self._native_tools = native_tools
|
|
56
|
+
|
|
57
|
+
headers: dict[str, str] = {"Content-Type": "application/json"}
|
|
58
|
+
if api_key:
|
|
59
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
60
|
+
|
|
61
|
+
self._client = httpx.AsyncClient(
|
|
62
|
+
headers=headers,
|
|
63
|
+
timeout=httpx.Timeout(timeout),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# ------------------------------------------------------------------
|
|
67
|
+
# Public interface
|
|
68
|
+
# ------------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
async def send_message(self, request: MessageRequest) -> MessageResponse:
|
|
71
|
+
payload = self._build_payload(request, stream=False)
|
|
72
|
+
response = await self._post_with_retry(payload)
|
|
73
|
+
return self._parse_response(response)
|
|
74
|
+
|
|
75
|
+
async def stream_message(self, request: MessageRequest) -> AsyncIterator[StreamEvent]:
|
|
76
|
+
payload = self._build_payload(request, stream=True)
|
|
77
|
+
response = await self._post_with_retry(payload)
|
|
78
|
+
return self._iter_stream_events(response.text)
|
|
79
|
+
|
|
80
|
+
def supports_native_tools(self) -> bool:
|
|
81
|
+
return self._native_tools
|
|
82
|
+
|
|
83
|
+
def supports_images(self) -> bool:
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
async def close(self) -> None:
|
|
87
|
+
await self._client.aclose()
|
|
88
|
+
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
# Internal helpers
|
|
91
|
+
# ------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
def _build_messages(
|
|
94
|
+
self,
|
|
95
|
+
messages: tuple[Message, ...],
|
|
96
|
+
system: str | None = None,
|
|
97
|
+
) -> list[dict]:
|
|
98
|
+
result: list[dict] = []
|
|
99
|
+
|
|
100
|
+
if system:
|
|
101
|
+
result.append({"role": "system", "content": system})
|
|
102
|
+
|
|
103
|
+
for msg in messages:
|
|
104
|
+
result.append(self._convert_message(msg))
|
|
105
|
+
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
def _convert_message(self, msg: Message) -> dict:
|
|
109
|
+
# Tool result messages use the "tool" role in OpenAI format
|
|
110
|
+
if msg.role == "tool" or (
|
|
111
|
+
len(msg.content) == 1 and isinstance(msg.content[0], ToolResultBlock)
|
|
112
|
+
):
|
|
113
|
+
block = msg.content[0]
|
|
114
|
+
assert isinstance(block, ToolResultBlock)
|
|
115
|
+
return {
|
|
116
|
+
"role": "tool",
|
|
117
|
+
"tool_call_id": block.tool_use_id,
|
|
118
|
+
"content": block.content,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# Check if content is mixed (has images or multiple block types)
|
|
122
|
+
has_image = any(isinstance(b, ImageBlock) for b in msg.content)
|
|
123
|
+
has_multiple = len(msg.content) > 1
|
|
124
|
+
|
|
125
|
+
if has_image or has_multiple:
|
|
126
|
+
parts: list[dict] = []
|
|
127
|
+
for block in msg.content:
|
|
128
|
+
if isinstance(block, TextBlock):
|
|
129
|
+
parts.append({"type": "text", "text": block.text})
|
|
130
|
+
elif isinstance(block, ImageBlock):
|
|
131
|
+
parts.append({
|
|
132
|
+
"type": "image_url",
|
|
133
|
+
"image_url": {
|
|
134
|
+
"url": f"data:{block.media_type};base64,{block.data}"
|
|
135
|
+
},
|
|
136
|
+
})
|
|
137
|
+
return {"role": msg.role, "content": parts}
|
|
138
|
+
|
|
139
|
+
# Single text block — use string content for simplicity
|
|
140
|
+
if len(msg.content) == 1 and isinstance(msg.content[0], TextBlock):
|
|
141
|
+
return {"role": msg.role, "content": msg.content[0].text}
|
|
142
|
+
|
|
143
|
+
# Fallback: concatenate text blocks
|
|
144
|
+
text = "".join(
|
|
145
|
+
b.text for b in msg.content if isinstance(b, TextBlock)
|
|
146
|
+
)
|
|
147
|
+
return {"role": msg.role, "content": text}
|
|
148
|
+
|
|
149
|
+
def _build_payload(self, request: MessageRequest, *, stream: bool) -> dict:
|
|
150
|
+
payload: dict = {
|
|
151
|
+
"model": request.model or self._model_name,
|
|
152
|
+
"messages": self._build_messages(request.messages, system=request.system),
|
|
153
|
+
"max_tokens": request.max_tokens,
|
|
154
|
+
"temperature": request.temperature,
|
|
155
|
+
"stream": stream,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if request.tools and self._native_tools:
|
|
159
|
+
payload["tools"] = [
|
|
160
|
+
self._convert_tool(t) for t in request.tools
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
if stream:
|
|
164
|
+
payload["stream_options"] = {"include_usage": True}
|
|
165
|
+
|
|
166
|
+
if request.extra_body:
|
|
167
|
+
payload.update(request.extra_body)
|
|
168
|
+
|
|
169
|
+
return payload
|
|
170
|
+
|
|
171
|
+
def _convert_tool(self, tool: ToolDefinition) -> dict:
|
|
172
|
+
return {
|
|
173
|
+
"type": "function",
|
|
174
|
+
"function": {
|
|
175
|
+
"name": tool.name,
|
|
176
|
+
"description": tool.description,
|
|
177
|
+
"parameters": tool.input_schema,
|
|
178
|
+
},
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
async def _post_with_retry(self, payload: dict) -> httpx.Response:
|
|
182
|
+
url = f"{self._base_url}/chat/completions"
|
|
183
|
+
last_exc: Exception | None = None
|
|
184
|
+
|
|
185
|
+
# 529 Overload: separate long-backoff retry track (30s -> 60s -> 120s, max 3 attempts)
|
|
186
|
+
_OVERLOAD_BACKOFFS = [30, 60, 120]
|
|
187
|
+
_overload_attempt = 0
|
|
188
|
+
attempt = 0
|
|
189
|
+
|
|
190
|
+
while attempt <= self._max_retries:
|
|
191
|
+
try:
|
|
192
|
+
response = await self._client.post(url, json=payload)
|
|
193
|
+
self._raise_for_status(response)
|
|
194
|
+
return response
|
|
195
|
+
except ProviderOverloadError as exc:
|
|
196
|
+
last_exc = exc
|
|
197
|
+
if _overload_attempt < len(_OVERLOAD_BACKOFFS):
|
|
198
|
+
backoff = _OVERLOAD_BACKOFFS[_overload_attempt]
|
|
199
|
+
_overload_attempt += 1
|
|
200
|
+
await asyncio.sleep(backoff)
|
|
201
|
+
# Overload retries don't count against normal retry budget
|
|
202
|
+
continue
|
|
203
|
+
raise
|
|
204
|
+
except (ProviderConnectionError, ProviderRateLimitError) as exc:
|
|
205
|
+
last_exc = exc
|
|
206
|
+
if attempt < self._max_retries:
|
|
207
|
+
await asyncio.sleep(2 ** attempt)
|
|
208
|
+
attempt += 1
|
|
209
|
+
continue
|
|
210
|
+
raise
|
|
211
|
+
except (ProviderAuthError, ProviderModelNotFoundError):
|
|
212
|
+
raise
|
|
213
|
+
except httpx.ConnectError as exc:
|
|
214
|
+
last_exc = ProviderConnectionError(str(exc))
|
|
215
|
+
if attempt < self._max_retries:
|
|
216
|
+
await asyncio.sleep(2 ** attempt)
|
|
217
|
+
attempt += 1
|
|
218
|
+
continue
|
|
219
|
+
raise last_exc from exc
|
|
220
|
+
attempt += 1
|
|
221
|
+
|
|
222
|
+
raise last_exc # type: ignore[misc]
|
|
223
|
+
|
|
224
|
+
def _raise_for_status(self, response: httpx.Response) -> None:
|
|
225
|
+
if response.status_code == 200:
|
|
226
|
+
return
|
|
227
|
+
try:
|
|
228
|
+
body = response.json()
|
|
229
|
+
msg = body.get("error", {}).get("message", response.text)
|
|
230
|
+
except Exception:
|
|
231
|
+
msg = response.text
|
|
232
|
+
|
|
233
|
+
if response.status_code == 401:
|
|
234
|
+
raise ProviderAuthError(msg)
|
|
235
|
+
if response.status_code == 404:
|
|
236
|
+
raise ProviderModelNotFoundError(msg)
|
|
237
|
+
if response.status_code == 429:
|
|
238
|
+
raise ProviderRateLimitError(msg)
|
|
239
|
+
if response.status_code == 529:
|
|
240
|
+
raise ProviderOverloadError(msg)
|
|
241
|
+
if response.status_code >= 500:
|
|
242
|
+
raise ProviderConnectionError(msg)
|
|
243
|
+
# Other 4xx — treat as connection error
|
|
244
|
+
raise ProviderConnectionError(f"HTTP {response.status_code}: {msg}")
|
|
245
|
+
|
|
246
|
+
def _parse_response(self, response: httpx.Response) -> MessageResponse:
|
|
247
|
+
data = response.json()
|
|
248
|
+
choices = data.get("choices")
|
|
249
|
+
if not choices:
|
|
250
|
+
raise ProviderConnectionError(f"No choices in API response: {str(data)[:200]}")
|
|
251
|
+
choice = choices[0]
|
|
252
|
+
message = choice.get("message")
|
|
253
|
+
if not message:
|
|
254
|
+
raise ProviderConnectionError(f"No message in API choice: {str(choice)[:200]}")
|
|
255
|
+
finish_reason = choice.get("finish_reason") or "stop"
|
|
256
|
+
|
|
257
|
+
content_blocks: list[ContentBlock] = []
|
|
258
|
+
|
|
259
|
+
tool_calls = message.get("tool_calls")
|
|
260
|
+
if tool_calls:
|
|
261
|
+
for tc in tool_calls:
|
|
262
|
+
fn = tc["function"]
|
|
263
|
+
try:
|
|
264
|
+
args = json.loads(fn["arguments"])
|
|
265
|
+
except (json.JSONDecodeError, KeyError):
|
|
266
|
+
args = {}
|
|
267
|
+
content_blocks.append(
|
|
268
|
+
ToolUseBlock(id=tc["id"], name=fn["name"], input=args)
|
|
269
|
+
)
|
|
270
|
+
else:
|
|
271
|
+
text = message.get("content") or ""
|
|
272
|
+
content_blocks.append(TextBlock(text=text))
|
|
273
|
+
|
|
274
|
+
usage_data = data.get("usage", {})
|
|
275
|
+
usage = TokenUsage(
|
|
276
|
+
input_tokens=usage_data.get("prompt_tokens", 0),
|
|
277
|
+
output_tokens=usage_data.get("completion_tokens", 0),
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
return MessageResponse(
|
|
281
|
+
content=tuple(content_blocks),
|
|
282
|
+
usage=usage,
|
|
283
|
+
stop_reason=finish_reason,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
def _iter_stream_events(self, raw: str) -> _StreamIterator:
|
|
287
|
+
"""Return async iterator over parsed SSE stream events."""
|
|
288
|
+
return _StreamIterator(raw)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class _StreamIterator:
|
|
292
|
+
"""Async iterator that wraps synchronous SSE parsing."""
|
|
293
|
+
|
|
294
|
+
def __init__(self, raw: str) -> None:
|
|
295
|
+
self._events = list(parse_sse_events(raw))
|
|
296
|
+
self._index = 0
|
|
297
|
+
self._pending_tool_calls: dict[int, dict] = {}
|
|
298
|
+
self._processed: list[StreamEvent] = []
|
|
299
|
+
self._done = False
|
|
300
|
+
self._build_events()
|
|
301
|
+
|
|
302
|
+
def _build_events(self) -> None:
|
|
303
|
+
events: list[StreamEvent] = []
|
|
304
|
+
pending_tools: dict[int, dict] = {}
|
|
305
|
+
_stop_emitted = False
|
|
306
|
+
_last_usage: dict = {}
|
|
307
|
+
|
|
308
|
+
for chunk in self._events:
|
|
309
|
+
# Some providers (vLLM, Ollama) send usage in a standalone
|
|
310
|
+
# final chunk with no choices. Capture it regardless.
|
|
311
|
+
chunk_usage = chunk.get("usage")
|
|
312
|
+
if chunk_usage:
|
|
313
|
+
_last_usage = chunk_usage
|
|
314
|
+
|
|
315
|
+
choices = chunk.get("choices", [])
|
|
316
|
+
for choice in choices:
|
|
317
|
+
delta = choice.get("delta", {})
|
|
318
|
+
finish_reason = choice.get("finish_reason")
|
|
319
|
+
|
|
320
|
+
# Text content delta
|
|
321
|
+
text = delta.get("content")
|
|
322
|
+
if text:
|
|
323
|
+
events.append(StreamTextDelta(text=text))
|
|
324
|
+
|
|
325
|
+
# Tool call deltas
|
|
326
|
+
tool_calls = delta.get("tool_calls", [])
|
|
327
|
+
for tc in tool_calls:
|
|
328
|
+
idx = tc.get("index", 0)
|
|
329
|
+
if idx not in pending_tools:
|
|
330
|
+
pending_tools[idx] = {
|
|
331
|
+
"id": tc.get("id", ""),
|
|
332
|
+
"name": tc.get("function", {}).get("name", ""),
|
|
333
|
+
"args": "",
|
|
334
|
+
}
|
|
335
|
+
if pending_tools[idx]["name"]:
|
|
336
|
+
events.append(
|
|
337
|
+
StreamToolUseStart(
|
|
338
|
+
id=pending_tools[idx]["id"],
|
|
339
|
+
name=pending_tools[idx]["name"],
|
|
340
|
+
)
|
|
341
|
+
)
|
|
342
|
+
# Accumulate argument fragments
|
|
343
|
+
args_fragment = tc.get("function", {}).get("arguments", "")
|
|
344
|
+
if args_fragment:
|
|
345
|
+
pending_tools[idx]["args"] += args_fragment
|
|
346
|
+
events.append(
|
|
347
|
+
StreamToolUseInputDelta(
|
|
348
|
+
id=pending_tools[idx]["id"],
|
|
349
|
+
partial_json=args_fragment,
|
|
350
|
+
)
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Stop event — emitted exactly once at the end
|
|
354
|
+
if finish_reason and not _stop_emitted:
|
|
355
|
+
_stop_emitted = True
|
|
356
|
+
usage_data = chunk_usage or _last_usage or {}
|
|
357
|
+
usage = TokenUsage(
|
|
358
|
+
input_tokens=usage_data.get("prompt_tokens", 0),
|
|
359
|
+
output_tokens=usage_data.get("completion_tokens", 0),
|
|
360
|
+
)
|
|
361
|
+
events.append(
|
|
362
|
+
StreamMessageStop(usage=usage, stop_reason=finish_reason)
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# If usage arrived in a trailing chunk after finish_reason, patch it
|
|
366
|
+
if _stop_emitted and _last_usage:
|
|
367
|
+
for i in range(len(events) - 1, -1, -1):
|
|
368
|
+
if isinstance(events[i], StreamMessageStop):
|
|
369
|
+
existing = events[i]
|
|
370
|
+
if existing.usage.input_tokens == 0 and existing.usage.output_tokens == 0:
|
|
371
|
+
events[i] = StreamMessageStop(
|
|
372
|
+
usage=TokenUsage(
|
|
373
|
+
input_tokens=_last_usage.get("prompt_tokens", 0),
|
|
374
|
+
output_tokens=_last_usage.get("completion_tokens", 0),
|
|
375
|
+
),
|
|
376
|
+
stop_reason=existing.stop_reason,
|
|
377
|
+
)
|
|
378
|
+
break
|
|
379
|
+
|
|
380
|
+
self._processed = events
|
|
381
|
+
|
|
382
|
+
def __aiter__(self):
|
|
383
|
+
return self
|
|
384
|
+
|
|
385
|
+
async def __anext__(self) -> StreamEvent:
|
|
386
|
+
if self._index >= len(self._processed):
|
|
387
|
+
raise StopAsyncIteration
|
|
388
|
+
event = self._processed[self._index]
|
|
389
|
+
self._index += 1
|
|
390
|
+
return event
|
llm_code/api/provider.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Abstract base class for LLM provider implementations."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import AsyncIterator
|
|
6
|
+
|
|
7
|
+
from llm_code.api.types import MessageRequest, MessageResponse, StreamEvent
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LLMProvider(ABC):
|
|
11
|
+
"""Interface that all LLM provider adapters must implement.
|
|
12
|
+
|
|
13
|
+
Concrete implementations (e.g. OpenAI-compatible, Anthropic) subclass
|
|
14
|
+
this and fill in the four abstract methods.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
async def send_message(self, request: MessageRequest) -> MessageResponse:
|
|
19
|
+
"""Send a complete (non-streaming) message and return the full response."""
|
|
20
|
+
...
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
async def stream_message(self, request: MessageRequest) -> AsyncIterator[StreamEvent]:
|
|
24
|
+
"""Stream a message and return an async iterator of stream events."""
|
|
25
|
+
...
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def supports_native_tools(self) -> bool:
|
|
29
|
+
"""Return True if the provider supports native/function-calling tools."""
|
|
30
|
+
...
|
|
31
|
+
|
|
32
|
+
@abstractmethod
|
|
33
|
+
def supports_images(self) -> bool:
|
|
34
|
+
"""Return True if the provider supports image inputs."""
|
|
35
|
+
...
|
llm_code/api/sse.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Server-Sent Events (SSE) parser for streaming LLM responses."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from typing import Iterator
|
|
7
|
+
|
|
8
|
+
# Split on blank lines — handles both \n\n and \r\n\r\n
|
|
9
|
+
_BLOCK_SEPARATOR = re.compile(r'\r?\n\r?\n')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_sse_events(raw: str) -> Iterator[dict]:
|
|
13
|
+
"""Parse a raw SSE string and yield each event as a parsed dict.
|
|
14
|
+
|
|
15
|
+
Rules:
|
|
16
|
+
- Split on blank-line boundaries (\\n\\n or \\r\\n\\r\\n).
|
|
17
|
+
- Lines starting with ':' are comments — skipped.
|
|
18
|
+
- Lines starting with 'data: ' contribute to the event data.
|
|
19
|
+
- Multiple data lines within one block are joined with '\\n'.
|
|
20
|
+
- 'data: [DONE]' stops iteration.
|
|
21
|
+
- Non-data fields (event:, id:, retry:) are silently ignored.
|
|
22
|
+
- JSON is parsed and yielded as a dict.
|
|
23
|
+
"""
|
|
24
|
+
for block in _BLOCK_SEPARATOR.split(raw):
|
|
25
|
+
block = block.strip()
|
|
26
|
+
if not block:
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
data_parts: list[str] = []
|
|
30
|
+
for line in re.split(r'\r?\n', block):
|
|
31
|
+
if line.startswith(':'):
|
|
32
|
+
# Comment line — skip
|
|
33
|
+
continue
|
|
34
|
+
if line.startswith('data:'):
|
|
35
|
+
# Strip the field name and a single optional space
|
|
36
|
+
value = line[5:]
|
|
37
|
+
if value.startswith(' '):
|
|
38
|
+
value = value[1:]
|
|
39
|
+
if value == '[DONE]':
|
|
40
|
+
return
|
|
41
|
+
data_parts.append(value)
|
|
42
|
+
# event:, id:, retry: — ignore
|
|
43
|
+
|
|
44
|
+
if not data_parts:
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
joined = '\n'.join(data_parts)
|
|
48
|
+
try:
|
|
49
|
+
yield json.loads(joined)
|
|
50
|
+
except json.JSONDecodeError:
|
|
51
|
+
# Malformed JSON — skip silently (could log in production)
|
|
52
|
+
continue
|