codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Ollama LLM provider — integration with the Ollama local model server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
from urllib.error import URLError
|
|
8
|
+
from urllib.request import Request, urlopen
|
|
9
|
+
|
|
10
|
+
from semantic_code_intelligence.llm.provider import (
|
|
11
|
+
LLMMessage,
|
|
12
|
+
LLMProvider,
|
|
13
|
+
LLMResponse,
|
|
14
|
+
MessageRole,
|
|
15
|
+
)
|
|
16
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
17
|
+
|
|
18
|
+
logger = get_logger("llm.ollama")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class OllamaProvider(LLMProvider):
|
|
22
|
+
"""LLM provider for the Ollama local model server.
|
|
23
|
+
|
|
24
|
+
Communicates via HTTP with the Ollama REST API.
|
|
25
|
+
No external packages required beyond the standard library.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
model: str = "llama3",
|
|
31
|
+
base_url: str = "http://localhost:11434",
|
|
32
|
+
temperature: float = 0.2,
|
|
33
|
+
max_tokens: int = 2048,
|
|
34
|
+
) -> None:
|
|
35
|
+
self._model = model
|
|
36
|
+
self._base_url = base_url.rstrip("/")
|
|
37
|
+
self._temperature = temperature
|
|
38
|
+
self._max_tokens = max_tokens
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def name(self) -> str:
|
|
42
|
+
return "ollama"
|
|
43
|
+
|
|
44
|
+
def _api_call(self, endpoint: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
45
|
+
"""Make a POST request to the Ollama API."""
|
|
46
|
+
url = f"{self._base_url}{endpoint}"
|
|
47
|
+
data = json.dumps(payload).encode("utf-8")
|
|
48
|
+
req = Request(url, data=data, headers={"Content-Type": "application/json"})
|
|
49
|
+
|
|
50
|
+
with urlopen(req, timeout=120) as resp: # noqa: S310 — localhost only
|
|
51
|
+
result: dict[str, Any] = json.loads(resp.read().decode("utf-8"))
|
|
52
|
+
return result
|
|
53
|
+
|
|
54
|
+
def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
|
|
55
|
+
temperature = kwargs.get("temperature", self._temperature)
|
|
56
|
+
payload: dict[str, Any] = {
|
|
57
|
+
"model": self._model,
|
|
58
|
+
"prompt": prompt,
|
|
59
|
+
"stream": False,
|
|
60
|
+
"options": {
|
|
61
|
+
"temperature": temperature,
|
|
62
|
+
"num_predict": kwargs.get("max_tokens", self._max_tokens),
|
|
63
|
+
},
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
logger.debug("Ollama generate request: model=%s", self._model)
|
|
67
|
+
result = self._api_call("/api/generate", payload)
|
|
68
|
+
|
|
69
|
+
return LLMResponse(
|
|
70
|
+
content=result.get("response", ""),
|
|
71
|
+
model=self._model,
|
|
72
|
+
provider=self.name,
|
|
73
|
+
usage={
|
|
74
|
+
"prompt_tokens": result.get("prompt_eval_count", 0),
|
|
75
|
+
"completion_tokens": result.get("eval_count", 0),
|
|
76
|
+
"total_tokens": (
|
|
77
|
+
result.get("prompt_eval_count", 0)
|
|
78
|
+
+ result.get("eval_count", 0)
|
|
79
|
+
),
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def chat(self, messages: list[LLMMessage], **kwargs: Any) -> LLMResponse:
|
|
84
|
+
temperature = kwargs.get("temperature", self._temperature)
|
|
85
|
+
payload: dict[str, Any] = {
|
|
86
|
+
"model": self._model,
|
|
87
|
+
"messages": [m.to_dict() for m in messages],
|
|
88
|
+
"stream": False,
|
|
89
|
+
"options": {
|
|
90
|
+
"temperature": temperature,
|
|
91
|
+
"num_predict": kwargs.get("max_tokens", self._max_tokens),
|
|
92
|
+
},
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
logger.debug("Ollama chat request: model=%s, messages=%d", self._model, len(messages))
|
|
96
|
+
result = self._api_call("/api/chat", payload)
|
|
97
|
+
|
|
98
|
+
msg = result.get("message", {})
|
|
99
|
+
return LLMResponse(
|
|
100
|
+
content=msg.get("content", ""),
|
|
101
|
+
model=self._model,
|
|
102
|
+
provider=self.name,
|
|
103
|
+
usage={
|
|
104
|
+
"prompt_tokens": result.get("prompt_eval_count", 0),
|
|
105
|
+
"completion_tokens": result.get("eval_count", 0),
|
|
106
|
+
"total_tokens": (
|
|
107
|
+
result.get("prompt_eval_count", 0)
|
|
108
|
+
+ result.get("eval_count", 0)
|
|
109
|
+
),
|
|
110
|
+
},
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def is_available(self) -> bool:
|
|
114
|
+
"""Check whether the Ollama server is reachable."""
|
|
115
|
+
try:
|
|
116
|
+
url = f"{self._base_url}/api/tags"
|
|
117
|
+
req = Request(url)
|
|
118
|
+
with urlopen(req, timeout=5) as resp: # noqa: S310 — localhost only
|
|
119
|
+
ok: bool = resp.status == 200
|
|
120
|
+
return ok
|
|
121
|
+
except (URLError, OSError):
|
|
122
|
+
return False
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""OpenAI LLM provider — integration with the OpenAI Chat Completions API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from semantic_code_intelligence.llm.provider import (
|
|
8
|
+
LLMMessage,
|
|
9
|
+
LLMProvider,
|
|
10
|
+
LLMResponse,
|
|
11
|
+
MessageRole,
|
|
12
|
+
)
|
|
13
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger("llm.openai")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OpenAIProvider(LLMProvider):
|
|
19
|
+
"""LLM provider for the OpenAI API (GPT-3.5, GPT-4, etc.).
|
|
20
|
+
|
|
21
|
+
Requires the ``openai`` package and a valid API key.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
api_key: str,
|
|
27
|
+
model: str = "gpt-3.5-turbo",
|
|
28
|
+
base_url: str | None = None,
|
|
29
|
+
temperature: float = 0.2,
|
|
30
|
+
max_tokens: int = 2048,
|
|
31
|
+
) -> None:
|
|
32
|
+
self._api_key = api_key
|
|
33
|
+
self._model = model
|
|
34
|
+
self._base_url = base_url
|
|
35
|
+
self._temperature = temperature
|
|
36
|
+
self._max_tokens = max_tokens
|
|
37
|
+
self._client: Any = None
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def name(self) -> str:
|
|
41
|
+
return "openai"
|
|
42
|
+
|
|
43
|
+
def _get_client(self) -> Any:
|
|
44
|
+
"""Lazily initialise the OpenAI client."""
|
|
45
|
+
if self._client is None:
|
|
46
|
+
try:
|
|
47
|
+
import openai
|
|
48
|
+
except ImportError as exc:
|
|
49
|
+
raise ImportError(
|
|
50
|
+
"The 'openai' package is required for OpenAIProvider. "
|
|
51
|
+
"Install it with: pip install openai"
|
|
52
|
+
) from exc
|
|
53
|
+
|
|
54
|
+
kwargs: dict[str, Any] = {"api_key": self._api_key}
|
|
55
|
+
if self._base_url:
|
|
56
|
+
kwargs["base_url"] = self._base_url
|
|
57
|
+
self._client = openai.OpenAI(**kwargs)
|
|
58
|
+
return self._client
|
|
59
|
+
|
|
60
|
+
def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
|
|
61
|
+
messages = [LLMMessage(role=MessageRole.USER, content=prompt)]
|
|
62
|
+
return self.chat(messages, **kwargs)
|
|
63
|
+
|
|
64
|
+
def chat(self, messages: list[LLMMessage], **kwargs: Any) -> LLMResponse:
|
|
65
|
+
client = self._get_client()
|
|
66
|
+
temperature = kwargs.get("temperature", self._temperature)
|
|
67
|
+
max_tokens = kwargs.get("max_tokens", self._max_tokens)
|
|
68
|
+
|
|
69
|
+
api_messages = [m.to_dict() for m in messages]
|
|
70
|
+
|
|
71
|
+
logger.debug(
|
|
72
|
+
"OpenAI chat request: model=%s, messages=%d", self._model, len(api_messages)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
response = client.chat.completions.create(
|
|
76
|
+
model=self._model,
|
|
77
|
+
messages=api_messages,
|
|
78
|
+
temperature=temperature,
|
|
79
|
+
max_tokens=max_tokens,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
choice = response.choices[0]
|
|
83
|
+
usage = {}
|
|
84
|
+
if response.usage:
|
|
85
|
+
usage = {
|
|
86
|
+
"prompt_tokens": response.usage.prompt_tokens,
|
|
87
|
+
"completion_tokens": response.usage.completion_tokens,
|
|
88
|
+
"total_tokens": response.usage.total_tokens,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return LLMResponse(
|
|
92
|
+
content=choice.message.content or "",
|
|
93
|
+
model=response.model,
|
|
94
|
+
provider=self.name,
|
|
95
|
+
usage=usage,
|
|
96
|
+
raw=response.model_dump() if hasattr(response, "model_dump") else {},
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def is_available(self) -> bool:
|
|
100
|
+
return bool(self._api_key)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""LLM provider abstraction — base class and data types for LLM integration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MessageRole(str, Enum):
|
|
12
|
+
"""Role of a message in a conversation."""
|
|
13
|
+
|
|
14
|
+
SYSTEM = "system"
|
|
15
|
+
USER = "user"
|
|
16
|
+
ASSISTANT = "assistant"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class LLMMessage:
|
|
21
|
+
"""A single message in an LLM conversation."""
|
|
22
|
+
|
|
23
|
+
role: MessageRole
|
|
24
|
+
content: str
|
|
25
|
+
|
|
26
|
+
def to_dict(self) -> dict[str, str]:
|
|
27
|
+
return {"role": self.role.value, "content": self.content}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class LLMResponse:
|
|
32
|
+
"""Response from an LLM provider."""
|
|
33
|
+
|
|
34
|
+
content: str
|
|
35
|
+
model: str = ""
|
|
36
|
+
provider: str = ""
|
|
37
|
+
usage: dict[str, int] = field(default_factory=dict)
|
|
38
|
+
raw: dict[str, Any] = field(default_factory=dict)
|
|
39
|
+
|
|
40
|
+
def to_dict(self) -> dict[str, Any]:
|
|
41
|
+
return {
|
|
42
|
+
"content": self.content,
|
|
43
|
+
"model": self.model,
|
|
44
|
+
"provider": self.provider,
|
|
45
|
+
"usage": self.usage,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class LLMProvider(ABC):
|
|
50
|
+
"""Abstract base class for LLM providers.
|
|
51
|
+
|
|
52
|
+
Subclasses must implement `complete()` and `chat()`.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def name(self) -> str:
|
|
58
|
+
"""Provider name (e.g. 'openai', 'ollama')."""
|
|
59
|
+
...
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
|
|
63
|
+
"""Generate a completion for a single prompt.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
prompt: The text prompt to complete.
|
|
67
|
+
**kwargs: Provider-specific options (temperature, max_tokens, etc.).
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
An LLMResponse containing the generated text.
|
|
71
|
+
"""
|
|
72
|
+
...
|
|
73
|
+
|
|
74
|
+
@abstractmethod
|
|
75
|
+
def chat(self, messages: list[LLMMessage], **kwargs: Any) -> LLMResponse:
|
|
76
|
+
"""Generate a response for a multi-turn conversation.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
messages: Conversation history as a list of LLMMessage.
|
|
80
|
+
**kwargs: Provider-specific options.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
An LLMResponse containing the assistant's reply.
|
|
84
|
+
"""
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
def is_available(self) -> bool:
|
|
88
|
+
"""Check whether the provider is configured and reachable.
|
|
89
|
+
|
|
90
|
+
Default: True. Subclasses may override for connectivity checks.
|
|
91
|
+
"""
|
|
92
|
+
return True
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""LLM rate limiter — sliding-window rate limiting for API calls.
|
|
2
|
+
|
|
3
|
+
Enforces requests-per-minute (RPM) and tokens-per-minute (TPM) limits
|
|
4
|
+
using a sliding window of recent events. Callers can either block
|
|
5
|
+
until capacity is available or receive a ``RateLimitExceeded`` error.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RateLimitExceeded(Exception):
|
|
17
|
+
"""Raised when a rate limit has been exceeded."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, message: str = "Rate limit exceeded", retry_after: float = 0.0) -> None:
|
|
20
|
+
super().__init__(message)
|
|
21
|
+
self.retry_after = retry_after
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class RateLimiterStats:
|
|
26
|
+
"""Rate limiter statistics."""
|
|
27
|
+
|
|
28
|
+
total_requests: int = 0
|
|
29
|
+
total_tokens: int = 0
|
|
30
|
+
rejected_requests: int = 0
|
|
31
|
+
current_rpm: int = 0
|
|
32
|
+
current_tpm: int = 0
|
|
33
|
+
|
|
34
|
+
def to_dict(self) -> dict[str, Any]:
|
|
35
|
+
return {
|
|
36
|
+
"total_requests": self.total_requests,
|
|
37
|
+
"total_tokens": self.total_tokens,
|
|
38
|
+
"rejected_requests": self.rejected_requests,
|
|
39
|
+
"current_rpm": self.current_rpm,
|
|
40
|
+
"current_tpm": self.current_tpm,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class _Event:
|
|
46
|
+
"""Internal record of a single API call."""
|
|
47
|
+
|
|
48
|
+
timestamp: float
|
|
49
|
+
tokens: int = 0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class RateLimiter:
|
|
53
|
+
"""Sliding-window rate limiter for LLM API calls.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
rpm : int
|
|
58
|
+
Maximum requests per minute. 0 = unlimited.
|
|
59
|
+
tpm : int
|
|
60
|
+
Maximum tokens per minute. 0 = unlimited.
|
|
61
|
+
blocking : bool
|
|
62
|
+
If ``True``, :meth:`acquire` will sleep until capacity is
|
|
63
|
+
available instead of raising :class:`RateLimitExceeded`.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
rpm: int = 0,
|
|
69
|
+
tpm: int = 0,
|
|
70
|
+
blocking: bool = True,
|
|
71
|
+
) -> None:
|
|
72
|
+
self._rpm = rpm
|
|
73
|
+
self._tpm = tpm
|
|
74
|
+
self._blocking = blocking
|
|
75
|
+
self._events: list[_Event] = []
|
|
76
|
+
self._lock = threading.Lock()
|
|
77
|
+
self._stats = RateLimiterStats()
|
|
78
|
+
|
|
79
|
+
# ------------------------------------------------------------------
|
|
80
|
+
# Public API
|
|
81
|
+
# ------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
def acquire(self, estimated_tokens: int = 0) -> None:
|
|
84
|
+
"""Acquire permission to make an API call.
|
|
85
|
+
|
|
86
|
+
If the rate limit would be exceeded and ``blocking`` is True,
|
|
87
|
+
this method sleeps until capacity is available. Otherwise it
|
|
88
|
+
raises :class:`RateLimitExceeded`.
|
|
89
|
+
"""
|
|
90
|
+
while True:
|
|
91
|
+
with self._lock:
|
|
92
|
+
self._prune()
|
|
93
|
+
rpm_ok = self._check_rpm()
|
|
94
|
+
tpm_ok = self._check_tpm(estimated_tokens)
|
|
95
|
+
if rpm_ok and tpm_ok:
|
|
96
|
+
# Record the request event (token count updated later via record_usage)
|
|
97
|
+
self._events.append(_Event(timestamp=time.monotonic(), tokens=estimated_tokens))
|
|
98
|
+
self._stats.total_requests += 1
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
# Calculate wait time
|
|
102
|
+
wait = self._wait_time()
|
|
103
|
+
|
|
104
|
+
if not self._blocking:
|
|
105
|
+
self._stats.rejected_requests += 1
|
|
106
|
+
raise RateLimitExceeded(
|
|
107
|
+
f"Rate limit exceeded (RPM={self._rpm}, TPM={self._tpm})",
|
|
108
|
+
retry_after=wait,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Sleep outside the lock
|
|
112
|
+
time.sleep(min(wait, 1.0))
|
|
113
|
+
|
|
114
|
+
def record_usage(self, tokens: int) -> None:
|
|
115
|
+
"""Record the actual token usage after a response is received."""
|
|
116
|
+
with self._lock:
|
|
117
|
+
self._stats.total_tokens += tokens
|
|
118
|
+
# Update the last event's token count with actual usage
|
|
119
|
+
if self._events:
|
|
120
|
+
self._events[-1].tokens = tokens
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def stats(self) -> RateLimiterStats:
|
|
124
|
+
"""Return current rate limiter statistics."""
|
|
125
|
+
with self._lock:
|
|
126
|
+
self._prune()
|
|
127
|
+
self._stats.current_rpm = len(self._events)
|
|
128
|
+
self._stats.current_tpm = sum(e.tokens for e in self._events)
|
|
129
|
+
return self._stats
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def is_enabled(self) -> bool:
|
|
133
|
+
"""Return True if any rate limit is configured."""
|
|
134
|
+
return self._rpm > 0 or self._tpm > 0
|
|
135
|
+
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
# Internal helpers
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
def _prune(self) -> None:
|
|
141
|
+
"""Remove events older than 60 seconds."""
|
|
142
|
+
cutoff = time.monotonic() - 60.0
|
|
143
|
+
self._events = [e for e in self._events if e.timestamp > cutoff]
|
|
144
|
+
|
|
145
|
+
def _check_rpm(self) -> bool:
|
|
146
|
+
"""Check whether adding one more request is within the RPM limit."""
|
|
147
|
+
if self._rpm <= 0:
|
|
148
|
+
return True
|
|
149
|
+
return len(self._events) < self._rpm
|
|
150
|
+
|
|
151
|
+
def _check_tpm(self, estimated_tokens: int) -> bool:
|
|
152
|
+
"""Check whether adding tokens is within the TPM limit."""
|
|
153
|
+
if self._tpm <= 0:
|
|
154
|
+
return True
|
|
155
|
+
current = sum(e.tokens for e in self._events)
|
|
156
|
+
return (current + estimated_tokens) <= self._tpm
|
|
157
|
+
|
|
158
|
+
def _wait_time(self) -> float:
|
|
159
|
+
"""Estimate how long to wait before capacity is available."""
|
|
160
|
+
if not self._events:
|
|
161
|
+
return 0.1
|
|
162
|
+
oldest = self._events[0].timestamp
|
|
163
|
+
elapsed = time.monotonic() - oldest
|
|
164
|
+
return max(60.0 - elapsed, 0.1)
|