llmcode-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_code/__init__.py +2 -0
- llm_code/analysis/__init__.py +6 -0
- llm_code/analysis/cache.py +33 -0
- llm_code/analysis/engine.py +256 -0
- llm_code/analysis/go_rules.py +114 -0
- llm_code/analysis/js_rules.py +84 -0
- llm_code/analysis/python_rules.py +311 -0
- llm_code/analysis/rules.py +140 -0
- llm_code/analysis/rust_rules.py +108 -0
- llm_code/analysis/universal_rules.py +111 -0
- llm_code/api/__init__.py +0 -0
- llm_code/api/client.py +90 -0
- llm_code/api/errors.py +73 -0
- llm_code/api/openai_compat.py +390 -0
- llm_code/api/provider.py +35 -0
- llm_code/api/sse.py +52 -0
- llm_code/api/types.py +140 -0
- llm_code/cli/__init__.py +0 -0
- llm_code/cli/commands.py +70 -0
- llm_code/cli/image.py +122 -0
- llm_code/cli/render.py +214 -0
- llm_code/cli/status_line.py +79 -0
- llm_code/cli/streaming.py +92 -0
- llm_code/cli/tui_main.py +220 -0
- llm_code/computer_use/__init__.py +11 -0
- llm_code/computer_use/app_detect.py +49 -0
- llm_code/computer_use/app_tier.py +57 -0
- llm_code/computer_use/coordinator.py +99 -0
- llm_code/computer_use/input_control.py +71 -0
- llm_code/computer_use/screenshot.py +93 -0
- llm_code/cron/__init__.py +13 -0
- llm_code/cron/parser.py +145 -0
- llm_code/cron/scheduler.py +135 -0
- llm_code/cron/storage.py +126 -0
- llm_code/enterprise/__init__.py +1 -0
- llm_code/enterprise/audit.py +59 -0
- llm_code/enterprise/auth.py +26 -0
- llm_code/enterprise/oidc.py +95 -0
- llm_code/enterprise/rbac.py +65 -0
- llm_code/harness/__init__.py +5 -0
- llm_code/harness/config.py +33 -0
- llm_code/harness/engine.py +129 -0
- llm_code/harness/guides.py +41 -0
- llm_code/harness/sensors.py +68 -0
- llm_code/harness/templates.py +84 -0
- llm_code/hida/__init__.py +1 -0
- llm_code/hida/classifier.py +187 -0
- llm_code/hida/engine.py +49 -0
- llm_code/hida/profiles.py +95 -0
- llm_code/hida/types.py +28 -0
- llm_code/ide/__init__.py +1 -0
- llm_code/ide/bridge.py +80 -0
- llm_code/ide/detector.py +76 -0
- llm_code/ide/server.py +169 -0
- llm_code/logging.py +29 -0
- llm_code/lsp/__init__.py +0 -0
- llm_code/lsp/client.py +298 -0
- llm_code/lsp/detector.py +42 -0
- llm_code/lsp/manager.py +56 -0
- llm_code/lsp/tools.py +288 -0
- llm_code/marketplace/__init__.py +0 -0
- llm_code/marketplace/builtin_registry.py +102 -0
- llm_code/marketplace/installer.py +162 -0
- llm_code/marketplace/plugin.py +78 -0
- llm_code/marketplace/registry.py +360 -0
- llm_code/mcp/__init__.py +0 -0
- llm_code/mcp/bridge.py +87 -0
- llm_code/mcp/client.py +117 -0
- llm_code/mcp/health.py +120 -0
- llm_code/mcp/manager.py +214 -0
- llm_code/mcp/oauth.py +219 -0
- llm_code/mcp/transport.py +254 -0
- llm_code/mcp/types.py +53 -0
- llm_code/remote/__init__.py +0 -0
- llm_code/remote/client.py +136 -0
- llm_code/remote/protocol.py +22 -0
- llm_code/remote/server.py +275 -0
- llm_code/remote/ssh_proxy.py +56 -0
- llm_code/runtime/__init__.py +0 -0
- llm_code/runtime/auto_commit.py +56 -0
- llm_code/runtime/auto_diagnose.py +62 -0
- llm_code/runtime/checkpoint.py +70 -0
- llm_code/runtime/checkpoint_recovery.py +142 -0
- llm_code/runtime/compaction.py +35 -0
- llm_code/runtime/compressor.py +415 -0
- llm_code/runtime/config.py +533 -0
- llm_code/runtime/context.py +49 -0
- llm_code/runtime/conversation.py +921 -0
- llm_code/runtime/cost_tracker.py +126 -0
- llm_code/runtime/dream.py +127 -0
- llm_code/runtime/file_protection.py +150 -0
- llm_code/runtime/hardware.py +85 -0
- llm_code/runtime/hooks.py +223 -0
- llm_code/runtime/indexer.py +230 -0
- llm_code/runtime/knowledge_compiler.py +232 -0
- llm_code/runtime/memory.py +132 -0
- llm_code/runtime/memory_layers.py +467 -0
- llm_code/runtime/memory_lint.py +252 -0
- llm_code/runtime/model_aliases.py +37 -0
- llm_code/runtime/ollama.py +93 -0
- llm_code/runtime/overlay.py +124 -0
- llm_code/runtime/permissions.py +200 -0
- llm_code/runtime/plan.py +45 -0
- llm_code/runtime/prompt.py +238 -0
- llm_code/runtime/repo_map.py +174 -0
- llm_code/runtime/sandbox.py +116 -0
- llm_code/runtime/session.py +268 -0
- llm_code/runtime/skill_resolver.py +61 -0
- llm_code/runtime/skills.py +133 -0
- llm_code/runtime/speculative.py +75 -0
- llm_code/runtime/streaming_executor.py +216 -0
- llm_code/runtime/telemetry.py +196 -0
- llm_code/runtime/token_budget.py +26 -0
- llm_code/runtime/vcr.py +142 -0
- llm_code/runtime/vision.py +102 -0
- llm_code/swarm/__init__.py +1 -0
- llm_code/swarm/backend_subprocess.py +108 -0
- llm_code/swarm/backend_tmux.py +103 -0
- llm_code/swarm/backend_worktree.py +306 -0
- llm_code/swarm/checkpoint.py +74 -0
- llm_code/swarm/coordinator.py +236 -0
- llm_code/swarm/mailbox.py +88 -0
- llm_code/swarm/manager.py +202 -0
- llm_code/swarm/memory_sync.py +80 -0
- llm_code/swarm/recovery.py +21 -0
- llm_code/swarm/team.py +67 -0
- llm_code/swarm/types.py +31 -0
- llm_code/task/__init__.py +16 -0
- llm_code/task/diagnostics.py +93 -0
- llm_code/task/manager.py +162 -0
- llm_code/task/types.py +112 -0
- llm_code/task/verifier.py +104 -0
- llm_code/tools/__init__.py +0 -0
- llm_code/tools/agent.py +145 -0
- llm_code/tools/agent_roles.py +82 -0
- llm_code/tools/base.py +94 -0
- llm_code/tools/bash.py +565 -0
- llm_code/tools/computer_use_tools.py +278 -0
- llm_code/tools/coordinator_tool.py +75 -0
- llm_code/tools/cron_create.py +90 -0
- llm_code/tools/cron_delete.py +49 -0
- llm_code/tools/cron_list.py +51 -0
- llm_code/tools/deferred.py +92 -0
- llm_code/tools/dump.py +116 -0
- llm_code/tools/edit_file.py +282 -0
- llm_code/tools/git_tools.py +531 -0
- llm_code/tools/glob_search.py +112 -0
- llm_code/tools/grep_search.py +144 -0
- llm_code/tools/ide_diagnostics.py +59 -0
- llm_code/tools/ide_open.py +58 -0
- llm_code/tools/ide_selection.py +52 -0
- llm_code/tools/memory_tools.py +138 -0
- llm_code/tools/multi_edit.py +143 -0
- llm_code/tools/notebook_edit.py +107 -0
- llm_code/tools/notebook_read.py +81 -0
- llm_code/tools/parsing.py +63 -0
- llm_code/tools/read_file.py +154 -0
- llm_code/tools/registry.py +58 -0
- llm_code/tools/search_backends/__init__.py +56 -0
- llm_code/tools/search_backends/brave.py +56 -0
- llm_code/tools/search_backends/duckduckgo.py +129 -0
- llm_code/tools/search_backends/searxng.py +71 -0
- llm_code/tools/search_backends/tavily.py +73 -0
- llm_code/tools/swarm_create.py +109 -0
- llm_code/tools/swarm_delete.py +95 -0
- llm_code/tools/swarm_list.py +44 -0
- llm_code/tools/swarm_message.py +109 -0
- llm_code/tools/task_close.py +79 -0
- llm_code/tools/task_plan.py +79 -0
- llm_code/tools/task_verify.py +90 -0
- llm_code/tools/tool_search.py +65 -0
- llm_code/tools/web_common.py +258 -0
- llm_code/tools/web_fetch.py +223 -0
- llm_code/tools/web_search.py +280 -0
- llm_code/tools/write_file.py +118 -0
- llm_code/tui/__init__.py +1 -0
- llm_code/tui/app.py +2432 -0
- llm_code/tui/chat_view.py +82 -0
- llm_code/tui/chat_widgets.py +309 -0
- llm_code/tui/header_bar.py +46 -0
- llm_code/tui/input_bar.py +349 -0
- llm_code/tui/keybindings.py +142 -0
- llm_code/tui/marketplace.py +210 -0
- llm_code/tui/status_bar.py +72 -0
- llm_code/tui/theme.py +96 -0
- llm_code/utils/__init__.py +0 -0
- llm_code/utils/diff.py +111 -0
- llm_code/utils/errors.py +70 -0
- llm_code/utils/hyperlink.py +73 -0
- llm_code/utils/notebook.py +179 -0
- llm_code/utils/search.py +69 -0
- llm_code/utils/text_normalize.py +28 -0
- llm_code/utils/version_check.py +62 -0
- llm_code/vim/__init__.py +4 -0
- llm_code/vim/engine.py +51 -0
- llm_code/vim/motions.py +172 -0
- llm_code/vim/operators.py +183 -0
- llm_code/vim/text_objects.py +139 -0
- llm_code/vim/transitions.py +279 -0
- llm_code/vim/types.py +68 -0
- llm_code/voice/__init__.py +1 -0
- llm_code/voice/languages.py +43 -0
- llm_code/voice/recorder.py +136 -0
- llm_code/voice/stt.py +36 -0
- llm_code/voice/stt_anthropic.py +66 -0
- llm_code/voice/stt_google.py +32 -0
- llm_code/voice/stt_whisper.py +52 -0
- llmcode_cli-1.0.0.dist-info/METADATA +524 -0
- llmcode_cli-1.0.0.dist-info/RECORD +212 -0
- llmcode_cli-1.0.0.dist-info/WHEEL +4 -0
- llmcode_cli-1.0.0.dist-info/entry_points.txt +2 -0
- llmcode_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""StreamingToolCollector and StreamingToolExecutor: route and execute tools during streaming."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
from llm_code.api.types import ToolResultBlock
|
|
11
|
+
from llm_code.tools.parsing import ParsedToolCall
|
|
12
|
+
from llm_code.tools.registry import ToolRegistry
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# Thread pool shared for background read-only tool execution during streaming
|
|
20
|
+
_STREAMING_EXECUTOR = ThreadPoolExecutor(max_workers=8, thread_name_prefix="streaming-tool")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _attempt_partial_json_recovery(partial: str) -> dict:
|
|
24
|
+
"""Try to recover a valid dict from partial/malformed JSON.
|
|
25
|
+
|
|
26
|
+
Attempts several repair strategies in order:
|
|
27
|
+
1. Direct parse (already complete)
|
|
28
|
+
2. Append ``}``
|
|
29
|
+
3. Append ``"}``
|
|
30
|
+
4. Append ``"}`` (for unclosed string + object)
|
|
31
|
+
5. Return empty dict as fallback
|
|
32
|
+
"""
|
|
33
|
+
candidates = [
|
|
34
|
+
partial,
|
|
35
|
+
partial + "}",
|
|
36
|
+
partial + '"}',
|
|
37
|
+
partial + '"}}',
|
|
38
|
+
partial + "}}",
|
|
39
|
+
]
|
|
40
|
+
for candidate in candidates:
|
|
41
|
+
try:
|
|
42
|
+
result = json.loads(candidate)
|
|
43
|
+
if isinstance(result, dict):
|
|
44
|
+
return result
|
|
45
|
+
except (json.JSONDecodeError, ValueError):
|
|
46
|
+
continue
|
|
47
|
+
return {}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class StreamingToolExecutor:
|
|
51
|
+
"""Execute read-only tools concurrently while the model is still streaming.
|
|
52
|
+
|
|
53
|
+
Usage pattern (mirrors the streaming loop in conversation.py):
|
|
54
|
+
|
|
55
|
+
.. code-block:: python
|
|
56
|
+
|
|
57
|
+
executor = StreamingToolExecutor(registry, permission_policy)
|
|
58
|
+
|
|
59
|
+
# Inside streaming loop:
|
|
60
|
+
if isinstance(event, StreamToolUseStart):
|
|
61
|
+
executor.start_tool(event.id, event.name)
|
|
62
|
+
elif isinstance(event, StreamToolUseInputDelta):
|
|
63
|
+
executor.submit(event.id, event.partial_json)
|
|
64
|
+
|
|
65
|
+
# After each tool input is complete (StreamToolUseStop or next tool start):
|
|
66
|
+
executor.finalize(tool_use_id) # triggers background execution for reads
|
|
67
|
+
|
|
68
|
+
# After stream ends, collect all results:
|
|
69
|
+
results = await executor.collect_results()
|
|
70
|
+
|
|
71
|
+
The executor decides at :meth:`finalize` time whether a tool is read-only and
|
|
72
|
+
concurrency-safe. If yes, it starts a background ``asyncio.Task``. Write
|
|
73
|
+
tools are queued and returned unfired so conversation.py can execute them via
|
|
74
|
+
the normal ``_execute_tool_with_streaming`` path.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, tool_registry: ToolRegistry, permission_policy: Any = None) -> None:
|
|
78
|
+
self._registry = tool_registry
|
|
79
|
+
self._permissions = permission_policy
|
|
80
|
+
|
|
81
|
+
# json_parts accumulation: tool_use_id -> list[str]
|
|
82
|
+
self._json_parts: dict[str, list[str]] = {}
|
|
83
|
+
# tool names: tool_use_id -> name
|
|
84
|
+
self._tool_names: dict[str, str] = {}
|
|
85
|
+
|
|
86
|
+
# background tasks for read-only tools: tool_use_id -> Task
|
|
87
|
+
self._read_tasks: dict[str, asyncio.Task] = {}
|
|
88
|
+
# pending write calls (not yet executed)
|
|
89
|
+
self._write_calls: list[ParsedToolCall] = []
|
|
90
|
+
|
|
91
|
+
def start_tool(self, tool_use_id: str, name: str) -> None:
|
|
92
|
+
"""Register a new tool use beginning (StreamToolUseStart event)."""
|
|
93
|
+
self._tool_names[tool_use_id] = name
|
|
94
|
+
self._json_parts[tool_use_id] = []
|
|
95
|
+
|
|
96
|
+
def submit(self, tool_use_id: str, partial_json: str) -> None:
|
|
97
|
+
"""Accumulate a partial JSON chunk (StreamToolUseInputDelta event)."""
|
|
98
|
+
if tool_use_id in self._json_parts:
|
|
99
|
+
self._json_parts[tool_use_id].append(partial_json)
|
|
100
|
+
|
|
101
|
+
def finalize(self, tool_use_id: str) -> None:
|
|
102
|
+
"""Mark tool input as complete; launch background execution if read-only.
|
|
103
|
+
|
|
104
|
+
For read-only + concurrency-safe tools: starts an asyncio.Task immediately.
|
|
105
|
+
For write tools: queues a ParsedToolCall for later sequential execution.
|
|
106
|
+
"""
|
|
107
|
+
name = self._tool_names.get(tool_use_id)
|
|
108
|
+
if name is None:
|
|
109
|
+
logger.debug("finalize called for unknown tool_use_id %s", tool_use_id)
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
raw_json = "".join(self._json_parts.get(tool_use_id, []))
|
|
113
|
+
try:
|
|
114
|
+
args = json.loads(raw_json) if raw_json.strip() else {}
|
|
115
|
+
except (json.JSONDecodeError, ValueError):
|
|
116
|
+
args = _attempt_partial_json_recovery(raw_json)
|
|
117
|
+
|
|
118
|
+
tool = self._registry.get(name)
|
|
119
|
+
if tool is not None and tool.is_read_only(args) and tool.is_concurrency_safe(args):
|
|
120
|
+
# Start background execution immediately
|
|
121
|
+
call = ParsedToolCall(id=tool_use_id, name=name, args=args, source="native")
|
|
122
|
+
task = asyncio.get_event_loop().create_task(
|
|
123
|
+
self._execute_read_tool(tool_use_id, call, tool, args),
|
|
124
|
+
name=f"streaming-read-{name}-{tool_use_id[:8]}",
|
|
125
|
+
)
|
|
126
|
+
self._read_tasks[tool_use_id] = task
|
|
127
|
+
else:
|
|
128
|
+
# Queue for sequential execution after stream completes
|
|
129
|
+
call = ParsedToolCall(id=tool_use_id, name=name, args=args, source="native")
|
|
130
|
+
self._write_calls.append(call)
|
|
131
|
+
|
|
132
|
+
async def _execute_read_tool(
|
|
133
|
+
self,
|
|
134
|
+
tool_use_id: str,
|
|
135
|
+
call: ParsedToolCall,
|
|
136
|
+
tool: Any,
|
|
137
|
+
args: dict,
|
|
138
|
+
) -> ToolResultBlock:
|
|
139
|
+
"""Run the tool in a thread pool and return a ToolResultBlock."""
|
|
140
|
+
loop = asyncio.get_running_loop()
|
|
141
|
+
try:
|
|
142
|
+
result = await loop.run_in_executor(
|
|
143
|
+
_STREAMING_EXECUTOR,
|
|
144
|
+
lambda: tool.execute(args),
|
|
145
|
+
)
|
|
146
|
+
return ToolResultBlock(
|
|
147
|
+
tool_use_id=tool_use_id,
|
|
148
|
+
content=result.output,
|
|
149
|
+
is_error=result.is_error,
|
|
150
|
+
)
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
logger.warning("Background read tool %s failed: %s", call.name, exc)
|
|
153
|
+
return ToolResultBlock(
|
|
154
|
+
tool_use_id=tool_use_id,
|
|
155
|
+
content=f"Tool execution failed: {exc}",
|
|
156
|
+
is_error=True,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
async def collect_results(self) -> tuple[list[ToolResultBlock], list[ParsedToolCall]]:
|
|
160
|
+
"""Wait for all background read tasks; return (read_results, write_calls).
|
|
161
|
+
|
|
162
|
+
- ``read_results``: ToolResultBlocks for all read-only tools that ran concurrently
|
|
163
|
+
- ``write_calls``: ParsedToolCalls for write tools that still need execution
|
|
164
|
+
"""
|
|
165
|
+
read_results: list[ToolResultBlock] = []
|
|
166
|
+
if self._read_tasks:
|
|
167
|
+
done = await asyncio.gather(*self._read_tasks.values(), return_exceptions=True)
|
|
168
|
+
for item in done:
|
|
169
|
+
if isinstance(item, ToolResultBlock):
|
|
170
|
+
read_results.append(item)
|
|
171
|
+
elif isinstance(item, BaseException):
|
|
172
|
+
logger.error("Unexpected error in background read task: %s", item)
|
|
173
|
+
|
|
174
|
+
return read_results, list(self._write_calls)
|
|
175
|
+
|
|
176
|
+
def pending_write_count(self) -> int:
|
|
177
|
+
"""Return number of write calls waiting for sequential execution."""
|
|
178
|
+
return len(self._write_calls)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class StreamingToolCollector:
|
|
182
|
+
"""Collects completed tool calls and decides whether they can run immediately.
|
|
183
|
+
|
|
184
|
+
A tool call is eligible for immediate (concurrent) execution when *both*:
|
|
185
|
+
- ``tool.is_read_only(args)`` returns True
|
|
186
|
+
- ``tool.is_concurrency_safe(args)`` returns True
|
|
187
|
+
|
|
188
|
+
All other calls (write operations, unknown tools, or tools that are not
|
|
189
|
+
concurrency-safe) are buffered and returned together via :meth:`flush_pending`.
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
def __init__(self, tool_registry: ToolRegistry) -> None:
|
|
193
|
+
self._registry = tool_registry
|
|
194
|
+
self._pending_writes: list[ParsedToolCall] = []
|
|
195
|
+
|
|
196
|
+
def on_tool_complete(self, call: ParsedToolCall) -> ParsedToolCall | None:
|
|
197
|
+
"""A tool call finished parsing.
|
|
198
|
+
|
|
199
|
+
If the tool is read-only and concurrency-safe, return it immediately
|
|
200
|
+
for parallel execution. Otherwise buffer it and return None.
|
|
201
|
+
"""
|
|
202
|
+
tool = self._registry.get(call.name)
|
|
203
|
+
if tool is not None and tool.is_read_only(call.args) and tool.is_concurrency_safe(call.args):
|
|
204
|
+
return call
|
|
205
|
+
self._pending_writes.append(call)
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
def flush_pending(self) -> list[ParsedToolCall]:
|
|
209
|
+
"""Return all buffered calls and clear the internal buffer."""
|
|
210
|
+
pending = self._pending_writes
|
|
211
|
+
self._pending_writes = []
|
|
212
|
+
return pending
|
|
213
|
+
|
|
214
|
+
def has_pending(self) -> bool:
|
|
215
|
+
"""Return True if there are buffered (write/unsafe) calls waiting."""
|
|
216
|
+
return len(self._pending_writes) > 0
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""OpenTelemetry observability for llm-code.
|
|
2
|
+
|
|
3
|
+
All OpenTelemetry imports are lazy — the module works as a no-op when the
|
|
4
|
+
``opentelemetry-*`` packages are not installed.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ---------------------------------------------------------------------------
|
|
13
|
+
# Config
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class TelemetryConfig:
|
|
18
|
+
enabled: bool = False
|
|
19
|
+
endpoint: str = "http://localhost:4318" # OTLP HTTP default
|
|
20
|
+
service_name: str = "llm-code"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Telemetry class
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
class Telemetry:
|
|
28
|
+
"""Thin wrapper around OpenTelemetry tracing and metrics.
|
|
29
|
+
|
|
30
|
+
When ``enabled=False`` or the ``opentelemetry-*`` packages are not
|
|
31
|
+
installed every method is a no-op so callers need no guard clauses.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, config: TelemetryConfig) -> None:
|
|
35
|
+
self._enabled = config.enabled
|
|
36
|
+
self._tracer: Any = None
|
|
37
|
+
self._cost_counter: Any = None
|
|
38
|
+
self._error_counter: Any = None
|
|
39
|
+
# Status/kind enums stored at setup so methods never re-import
|
|
40
|
+
self._SpanKind: Any = None
|
|
41
|
+
self._StatusCode: Any = None
|
|
42
|
+
self._otel_available = False
|
|
43
|
+
|
|
44
|
+
if not self._enabled:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
self._setup(config)
|
|
49
|
+
self._otel_available = True
|
|
50
|
+
except Exception:
|
|
51
|
+
# If setup fails for any reason (e.g., ImportError), degrade gracefully
|
|
52
|
+
self._enabled = False
|
|
53
|
+
|
|
54
|
+
# ------------------------------------------------------------------
|
|
55
|
+
# Setup (only called when enabled and packages are present)
|
|
56
|
+
# ------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
def _setup(self, config: TelemetryConfig) -> None:
|
|
59
|
+
from opentelemetry import trace, metrics
|
|
60
|
+
from opentelemetry.trace import SpanKind, StatusCode
|
|
61
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
62
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
63
|
+
from opentelemetry.sdk.metrics import MeterProvider
|
|
64
|
+
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
|
65
|
+
from opentelemetry.sdk.resources import Resource
|
|
66
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
67
|
+
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
|
68
|
+
|
|
69
|
+
# Store enums so methods don't need to re-import
|
|
70
|
+
self._SpanKind = SpanKind
|
|
71
|
+
self._StatusCode = StatusCode
|
|
72
|
+
|
|
73
|
+
resource = Resource.create({"service.name": config.service_name})
|
|
74
|
+
|
|
75
|
+
# Tracer
|
|
76
|
+
span_exporter = OTLPSpanExporter(endpoint=f"{config.endpoint}/v1/traces")
|
|
77
|
+
tracer_provider = TracerProvider(resource=resource)
|
|
78
|
+
tracer_provider.add_span_processor(BatchSpanProcessor(span_exporter))
|
|
79
|
+
trace.set_tracer_provider(tracer_provider)
|
|
80
|
+
self._tracer = trace.get_tracer(config.service_name)
|
|
81
|
+
|
|
82
|
+
# Meter
|
|
83
|
+
metric_exporter = OTLPMetricExporter(endpoint=f"{config.endpoint}/v1/metrics")
|
|
84
|
+
reader = PeriodicExportingMetricReader(metric_exporter, export_interval_millis=60_000)
|
|
85
|
+
meter_provider = MeterProvider(resource=resource, metric_readers=[reader])
|
|
86
|
+
metrics.set_meter_provider(meter_provider)
|
|
87
|
+
meter = metrics.get_meter(config.service_name)
|
|
88
|
+
|
|
89
|
+
self._cost_counter = meter.create_counter(
|
|
90
|
+
name="llm.cost.usd",
|
|
91
|
+
unit="USD",
|
|
92
|
+
description="Cumulative LLM cost in US dollars",
|
|
93
|
+
)
|
|
94
|
+
self._error_counter = meter.create_counter(
|
|
95
|
+
name="llm.errors",
|
|
96
|
+
description="Count of errors by type",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# ------------------------------------------------------------------
|
|
100
|
+
# Public API — all methods are safe to call unconditionally
|
|
101
|
+
# ------------------------------------------------------------------
|
|
102
|
+
|
|
103
|
+
def trace_turn(
|
|
104
|
+
self,
|
|
105
|
+
session_id: str,
|
|
106
|
+
model: str,
|
|
107
|
+
input_tokens: int,
|
|
108
|
+
output_tokens: int,
|
|
109
|
+
duration_ms: float,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Record a completed LLM conversation turn as an OTel span."""
|
|
112
|
+
if not self._enabled or self._tracer is None:
|
|
113
|
+
return
|
|
114
|
+
try:
|
|
115
|
+
with self._tracer.start_as_current_span(
|
|
116
|
+
"llm.turn",
|
|
117
|
+
kind=self._SpanKind.CLIENT,
|
|
118
|
+
) as span:
|
|
119
|
+
span.set_attribute("session.id", session_id)
|
|
120
|
+
span.set_attribute("llm.model", model)
|
|
121
|
+
span.set_attribute("llm.tokens.input", input_tokens)
|
|
122
|
+
span.set_attribute("llm.tokens.output", output_tokens)
|
|
123
|
+
span.set_attribute("llm.duration_ms", duration_ms)
|
|
124
|
+
span.set_status(self._StatusCode.OK)
|
|
125
|
+
except Exception:
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
def trace_tool(
|
|
129
|
+
self,
|
|
130
|
+
tool_name: str,
|
|
131
|
+
duration_ms: float,
|
|
132
|
+
is_error: bool = False,
|
|
133
|
+
) -> None:
|
|
134
|
+
"""Record a tool execution as an OTel span."""
|
|
135
|
+
if not self._enabled or self._tracer is None:
|
|
136
|
+
return
|
|
137
|
+
try:
|
|
138
|
+
with self._tracer.start_as_current_span(
|
|
139
|
+
f"tool.{tool_name}",
|
|
140
|
+
kind=self._SpanKind.INTERNAL,
|
|
141
|
+
) as span:
|
|
142
|
+
span.set_attribute("tool.name", tool_name)
|
|
143
|
+
span.set_attribute("tool.duration_ms", duration_ms)
|
|
144
|
+
span.set_attribute("tool.is_error", is_error)
|
|
145
|
+
if is_error:
|
|
146
|
+
span.set_status(self._StatusCode.ERROR)
|
|
147
|
+
else:
|
|
148
|
+
span.set_status(self._StatusCode.OK)
|
|
149
|
+
except Exception:
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
def record_cost(
|
|
153
|
+
self,
|
|
154
|
+
model: str,
|
|
155
|
+
input_tokens: int,
|
|
156
|
+
output_tokens: int,
|
|
157
|
+
cost_usd: float,
|
|
158
|
+
) -> None:
|
|
159
|
+
"""Record LLM cost as an OTel metric counter."""
|
|
160
|
+
if not self._enabled or self._cost_counter is None:
|
|
161
|
+
return
|
|
162
|
+
try:
|
|
163
|
+
self._cost_counter.add(
|
|
164
|
+
cost_usd,
|
|
165
|
+
attributes={
|
|
166
|
+
"llm.model": model,
|
|
167
|
+
"llm.tokens.input": input_tokens,
|
|
168
|
+
"llm.tokens.output": output_tokens,
|
|
169
|
+
},
|
|
170
|
+
)
|
|
171
|
+
except Exception:
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
def record_error(self, error_type: str, message: str) -> None:
|
|
175
|
+
"""Record an error event as an OTel counter increment."""
|
|
176
|
+
if not self._enabled or self._error_counter is None:
|
|
177
|
+
return
|
|
178
|
+
try:
|
|
179
|
+
self._error_counter.add(
|
|
180
|
+
1,
|
|
181
|
+
attributes={"error.type": error_type, "error.message": message[:256]},
|
|
182
|
+
)
|
|
183
|
+
except Exception:
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# ---------------------------------------------------------------------------
|
|
188
|
+
# Module-level singleton helpers
|
|
189
|
+
# ---------------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
_NOOP = Telemetry(TelemetryConfig(enabled=False))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def get_noop_telemetry() -> Telemetry:
|
|
195
|
+
"""Return the shared no-op Telemetry instance."""
|
|
196
|
+
return _NOOP
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Token budget tracking for agentic conversation turns."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class TokenBudget:
|
|
9
|
+
target: int
|
|
10
|
+
consumed: int = 0
|
|
11
|
+
|
|
12
|
+
def add(self, output_tokens: int) -> None:
|
|
13
|
+
self.consumed += output_tokens
|
|
14
|
+
|
|
15
|
+
def remaining(self) -> int:
|
|
16
|
+
return max(0, self.target - self.consumed)
|
|
17
|
+
|
|
18
|
+
def should_nudge(self) -> bool:
|
|
19
|
+
return self.consumed < self.target
|
|
20
|
+
|
|
21
|
+
def is_exhausted(self) -> bool:
|
|
22
|
+
return self.consumed >= self.target
|
|
23
|
+
|
|
24
|
+
def nudge_message(self) -> str:
|
|
25
|
+
rem = self.remaining()
|
|
26
|
+
return f"[Token budget: {rem:,} tokens remaining out of {self.target:,}. Continue working toward the goal.]"
|
llm_code/runtime/vcr.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""VCR session recording and playback for llm-code."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Iterator
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
EVENT_TYPES = (
|
|
12
|
+
"user_input",
|
|
13
|
+
"llm_request",
|
|
14
|
+
"llm_response",
|
|
15
|
+
"tool_call",
|
|
16
|
+
"tool_result",
|
|
17
|
+
"stream_event",
|
|
18
|
+
"error",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class VCREvent:
|
|
24
|
+
"""A single recorded event with timestamp, type, and payload."""
|
|
25
|
+
|
|
26
|
+
ts: float
|
|
27
|
+
type: str
|
|
28
|
+
data: dict
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class VCRRecorder:
|
|
32
|
+
"""Records session events as JSONL lines to a file."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, path: Path) -> None:
|
|
35
|
+
self._path = path
|
|
36
|
+
self._closed = False
|
|
37
|
+
self._file = None
|
|
38
|
+
|
|
39
|
+
def _ensure_open(self) -> None:
|
|
40
|
+
if self._file is None:
|
|
41
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
self._file = self._path.open("w", encoding="utf-8")
|
|
43
|
+
|
|
44
|
+
def record(self, event_type: str, data: dict) -> None:
|
|
45
|
+
"""Write a single JSONL event line with the current timestamp."""
|
|
46
|
+
if self._closed:
|
|
47
|
+
raise RuntimeError("VCRRecorder is closed")
|
|
48
|
+
self._ensure_open()
|
|
49
|
+
entry = {"ts": time.time(), "type": event_type, "data": data}
|
|
50
|
+
self._file.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
51
|
+
self._file.flush()
|
|
52
|
+
|
|
53
|
+
def close(self) -> None:
|
|
54
|
+
"""Close the recording file."""
|
|
55
|
+
if self._file is not None:
|
|
56
|
+
self._file.close()
|
|
57
|
+
self._file = None
|
|
58
|
+
self._closed = True
|
|
59
|
+
|
|
60
|
+
# Context manager support
|
|
61
|
+
def __enter__(self) -> "VCRRecorder":
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def __exit__(self, *args: object) -> None:
|
|
65
|
+
self.close()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class VCRPlayer:
|
|
69
|
+
"""Replays a JSONL recording file and provides summary statistics."""
|
|
70
|
+
|
|
71
|
+
def __init__(self, path: Path) -> None:
|
|
72
|
+
self._path = path
|
|
73
|
+
|
|
74
|
+
def _read_events(self) -> list[VCREvent]:
|
|
75
|
+
"""Parse all valid JSONL events from the file."""
|
|
76
|
+
events: list[VCREvent] = []
|
|
77
|
+
try:
|
|
78
|
+
text = self._path.read_text(encoding="utf-8")
|
|
79
|
+
except (FileNotFoundError, OSError):
|
|
80
|
+
return events
|
|
81
|
+
|
|
82
|
+
for line in text.splitlines():
|
|
83
|
+
line = line.strip()
|
|
84
|
+
if not line:
|
|
85
|
+
continue
|
|
86
|
+
try:
|
|
87
|
+
obj = json.loads(line)
|
|
88
|
+
events.append(VCREvent(
|
|
89
|
+
ts=float(obj["ts"]),
|
|
90
|
+
type=str(obj["type"]),
|
|
91
|
+
data=obj.get("data", {}),
|
|
92
|
+
))
|
|
93
|
+
except (json.JSONDecodeError, KeyError, TypeError, ValueError):
|
|
94
|
+
continue
|
|
95
|
+
return events
|
|
96
|
+
|
|
97
|
+
def replay(self, speed: float = 1.0) -> Iterator[VCREvent]:
|
|
98
|
+
"""Yield events from the recording, optionally with timing.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
speed: Playback speed multiplier. 0.0 means instant (no sleep).
|
|
102
|
+
1.0 means real-time. 2.0 means double speed.
|
|
103
|
+
"""
|
|
104
|
+
events = self._read_events()
|
|
105
|
+
if not events:
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
prev_ts: float | None = None
|
|
109
|
+
for event in events:
|
|
110
|
+
if speed > 0.0 and prev_ts is not None:
|
|
111
|
+
delay = (event.ts - prev_ts) / speed
|
|
112
|
+
if delay > 0:
|
|
113
|
+
time.sleep(delay)
|
|
114
|
+
prev_ts = event.ts
|
|
115
|
+
yield event
|
|
116
|
+
|
|
117
|
+
def summary(self) -> dict:
|
|
118
|
+
"""Return summary statistics for the recording.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
dict with keys:
|
|
122
|
+
- event_count: total number of events
|
|
123
|
+
- duration: wall-clock seconds from first to last event
|
|
124
|
+
- tool_calls: dict mapping tool name to call count
|
|
125
|
+
"""
|
|
126
|
+
events = self._read_events()
|
|
127
|
+
if not events:
|
|
128
|
+
return {"event_count": 0, "duration": 0.0, "tool_calls": {}}
|
|
129
|
+
|
|
130
|
+
duration = events[-1].ts - events[0].ts if len(events) > 1 else 0.0
|
|
131
|
+
|
|
132
|
+
tool_calls: dict[str, int] = {}
|
|
133
|
+
for event in events:
|
|
134
|
+
if event.type == "tool_call":
|
|
135
|
+
name = event.data.get("name", "unknown")
|
|
136
|
+
tool_calls[name] = tool_calls.get(name, 0) + 1
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
"event_count": len(events),
|
|
140
|
+
"duration": duration,
|
|
141
|
+
"tool_calls": tool_calls,
|
|
142
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Vision fallback: replace ImageBlocks when the primary model has no vision support."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import dataclasses
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from llm_code.api.types import ImageBlock, Message, TextBlock
|
|
8
|
+
from llm_code.runtime.config import VisionConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class VisionFallback:
|
|
12
|
+
"""Wraps VisionConfig and can optionally call a vision API to describe images."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, config: VisionConfig) -> None:
|
|
15
|
+
self._config = config
|
|
16
|
+
|
|
17
|
+
def is_configured(self) -> bool:
|
|
18
|
+
"""Return True if both vision_model and vision_api are non-empty."""
|
|
19
|
+
return bool(self._config.vision_model) and bool(self._config.vision_api)
|
|
20
|
+
|
|
21
|
+
async def describe_image(self, image: ImageBlock) -> str:
|
|
22
|
+
"""Call the vision API and return a text description of the image."""
|
|
23
|
+
from llm_code.api.openai_compat import OpenAICompatProvider
|
|
24
|
+
from llm_code.api.types import Message, MessageRequest, TextBlock
|
|
25
|
+
|
|
26
|
+
api_key = ""
|
|
27
|
+
if self._config.vision_api_key_env:
|
|
28
|
+
api_key = os.environ.get(self._config.vision_api_key_env, "")
|
|
29
|
+
|
|
30
|
+
provider = OpenAICompatProvider(
|
|
31
|
+
base_url=self._config.vision_api,
|
|
32
|
+
api_key=api_key,
|
|
33
|
+
model_name=self._config.vision_model,
|
|
34
|
+
)
|
|
35
|
+
try:
|
|
36
|
+
request = MessageRequest(
|
|
37
|
+
model=self._config.vision_model,
|
|
38
|
+
messages=(
|
|
39
|
+
Message(
|
|
40
|
+
role="user",
|
|
41
|
+
content=(
|
|
42
|
+
image,
|
|
43
|
+
TextBlock(text="Describe this image in detail."),
|
|
44
|
+
),
|
|
45
|
+
),
|
|
46
|
+
),
|
|
47
|
+
stream=False,
|
|
48
|
+
)
|
|
49
|
+
response = await provider.send_message(request)
|
|
50
|
+
for block in response.content:
|
|
51
|
+
if isinstance(block, TextBlock):
|
|
52
|
+
return block.text
|
|
53
|
+
return ""
|
|
54
|
+
finally:
|
|
55
|
+
await provider.close()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def preprocess_images(
|
|
59
|
+
msg: Message,
|
|
60
|
+
supports_images: bool,
|
|
61
|
+
vision_fallback: "VisionFallback | None",
|
|
62
|
+
return_warnings: bool = False,
|
|
63
|
+
) -> "Message | tuple[Message, list[str]]":
|
|
64
|
+
"""Pre-process a message's image blocks based on vision support.
|
|
65
|
+
|
|
66
|
+
- If model supports images OR message has no images → passthrough unchanged.
|
|
67
|
+
- If vision_fallback is configured → replace each ImageBlock with a placeholder
|
|
68
|
+
TextBlock (sync version; describe_image is not called here).
|
|
69
|
+
- Otherwise → strip ImageBlocks, emit a warning.
|
|
70
|
+
"""
|
|
71
|
+
has_images = any(isinstance(b, ImageBlock) for b in msg.content)
|
|
72
|
+
|
|
73
|
+
if supports_images or not has_images:
|
|
74
|
+
if return_warnings:
|
|
75
|
+
return msg, []
|
|
76
|
+
return msg
|
|
77
|
+
|
|
78
|
+
warnings: list[str] = []
|
|
79
|
+
|
|
80
|
+
if vision_fallback is not None and vision_fallback.is_configured():
|
|
81
|
+
# Replace each image with a placeholder; actual async describe not called here
|
|
82
|
+
new_blocks = []
|
|
83
|
+
for block in msg.content:
|
|
84
|
+
if isinstance(block, ImageBlock):
|
|
85
|
+
new_blocks.append(
|
|
86
|
+
TextBlock(text="[image: vision description not yet available]")
|
|
87
|
+
)
|
|
88
|
+
else:
|
|
89
|
+
new_blocks.append(block)
|
|
90
|
+
new_msg = dataclasses.replace(msg, content=tuple(new_blocks))
|
|
91
|
+
else:
|
|
92
|
+
# Strip images, keep other blocks
|
|
93
|
+
new_blocks = [b for b in msg.content if not isinstance(b, ImageBlock)]
|
|
94
|
+
new_msg = dataclasses.replace(msg, content=tuple(new_blocks))
|
|
95
|
+
warnings.append(
|
|
96
|
+
"One or more images were stripped because the model does not support "
|
|
97
|
+
"vision and no vision fallback is configured."
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if return_warnings:
|
|
101
|
+
return new_msg, warnings
|
|
102
|
+
return new_msg
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Swarm: multi-agent collaboration module."""
|