devcopilot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. api/__init__.py +17 -0
  2. api/admin_config.py +1303 -0
  3. api/admin_routes.py +287 -0
  4. api/admin_static/admin.css +459 -0
  5. api/admin_static/admin.js +497 -0
  6. api/admin_static/index.html +77 -0
  7. api/admin_urls.py +34 -0
  8. api/app.py +194 -0
  9. api/command_utils.py +164 -0
  10. api/dependencies.py +144 -0
  11. api/detection.py +152 -0
  12. api/gateway_model_ids.py +54 -0
  13. api/model_catalog.py +133 -0
  14. api/model_router.py +125 -0
  15. api/models/__init__.py +45 -0
  16. api/models/anthropic.py +234 -0
  17. api/models/openai_responses.py +28 -0
  18. api/models/responses.py +60 -0
  19. api/optimization_handlers.py +154 -0
  20. api/request_pipeline.py +424 -0
  21. api/routes.py +156 -0
  22. api/runtime.py +334 -0
  23. api/validation_log.py +48 -0
  24. api/web_server_tools.py +22 -0
  25. api/web_tools/__init__.py +17 -0
  26. api/web_tools/constants.py +15 -0
  27. api/web_tools/egress.py +99 -0
  28. api/web_tools/outbound.py +278 -0
  29. api/web_tools/parsers.py +104 -0
  30. api/web_tools/request.py +87 -0
  31. api/web_tools/streaming.py +206 -0
  32. cli/__init__.py +5 -0
  33. cli/claude_env.py +12 -0
  34. cli/entrypoints.py +166 -0
  35. cli/env.example +209 -0
  36. cli/launchers/__init__.py +1 -0
  37. cli/launchers/claude.py +84 -0
  38. cli/launchers/codex.py +204 -0
  39. cli/launchers/codex_model_catalog.py +186 -0
  40. cli/launchers/common.py +93 -0
  41. cli/managed/__init__.py +6 -0
  42. cli/managed/claude.py +215 -0
  43. cli/managed/manager.py +157 -0
  44. cli/managed/session.py +260 -0
  45. cli/process_registry.py +78 -0
  46. config/__init__.py +5 -0
  47. config/constants.py +13 -0
  48. config/logging_config.py +159 -0
  49. config/nim.py +118 -0
  50. config/paths.py +91 -0
  51. config/provider_catalog.py +259 -0
  52. config/provider_ids.py +7 -0
  53. config/settings.py +538 -0
  54. core/__init__.py +1 -0
  55. core/anthropic/__init__.py +46 -0
  56. core/anthropic/content.py +31 -0
  57. core/anthropic/conversion.py +587 -0
  58. core/anthropic/emitted_sse_tracker.py +346 -0
  59. core/anthropic/errors.py +70 -0
  60. core/anthropic/native_messages_request.py +280 -0
  61. core/anthropic/native_sse_block_policy.py +313 -0
  62. core/anthropic/provider_stream_error.py +34 -0
  63. core/anthropic/server_tool_sse.py +14 -0
  64. core/anthropic/sse.py +440 -0
  65. core/anthropic/stream_contracts.py +205 -0
  66. core/anthropic/stream_recovery.py +346 -0
  67. core/anthropic/stream_recovery_session.py +133 -0
  68. core/anthropic/thinking.py +140 -0
  69. core/anthropic/tokens.py +117 -0
  70. core/anthropic/tools.py +212 -0
  71. core/anthropic/utils.py +9 -0
  72. core/openai_responses/__init__.py +5 -0
  73. core/openai_responses/adapter.py +31 -0
  74. core/openai_responses/anthropic_sse.py +59 -0
  75. core/openai_responses/errors.py +22 -0
  76. core/openai_responses/events.py +19 -0
  77. core/openai_responses/ids.py +21 -0
  78. core/openai_responses/input.py +258 -0
  79. core/openai_responses/items.py +37 -0
  80. core/openai_responses/reasoning.py +52 -0
  81. core/openai_responses/stream.py +25 -0
  82. core/openai_responses/stream_state.py +654 -0
  83. core/openai_responses/tools.py +374 -0
  84. core/openai_responses/usage.py +37 -0
  85. core/rate_limit.py +60 -0
  86. core/trace.py +216 -0
  87. devcopilot-0.2.0.dist-info/METADATA +687 -0
  88. devcopilot-0.2.0.dist-info/RECORD +189 -0
  89. devcopilot-0.2.0.dist-info/WHEEL +4 -0
  90. devcopilot-0.2.0.dist-info/entry_points.txt +6 -0
  91. devcopilot-0.2.0.dist-info/licenses/LICENSE +21 -0
  92. messaging/__init__.py +26 -0
  93. messaging/cli_event_constants.py +67 -0
  94. messaging/command_context.py +66 -0
  95. messaging/command_dispatcher.py +37 -0
  96. messaging/commands.py +275 -0
  97. messaging/event_parser.py +181 -0
  98. messaging/limiter.py +300 -0
  99. messaging/models.py +36 -0
  100. messaging/node_event_pipeline.py +127 -0
  101. messaging/node_runner.py +342 -0
  102. messaging/platforms/__init__.py +15 -0
  103. messaging/platforms/base.py +228 -0
  104. messaging/platforms/discord.py +567 -0
  105. messaging/platforms/factory.py +103 -0
  106. messaging/platforms/outbox.py +144 -0
  107. messaging/platforms/telegram.py +688 -0
  108. messaging/platforms/voice_flow.py +295 -0
  109. messaging/rendering/__init__.py +3 -0
  110. messaging/rendering/discord_markdown.py +318 -0
  111. messaging/rendering/markdown_tables.py +49 -0
  112. messaging/rendering/profiles.py +55 -0
  113. messaging/rendering/telegram_markdown.py +327 -0
  114. messaging/safe_diagnostics.py +17 -0
  115. messaging/session.py +334 -0
  116. messaging/transcript.py +581 -0
  117. messaging/transcription.py +164 -0
  118. messaging/trees/__init__.py +15 -0
  119. messaging/trees/data.py +482 -0
  120. messaging/trees/manager.py +433 -0
  121. messaging/trees/processor.py +179 -0
  122. messaging/trees/repository.py +177 -0
  123. messaging/turn_intake.py +235 -0
  124. messaging/ui_updates.py +101 -0
  125. messaging/voice.py +76 -0
  126. messaging/workflow.py +200 -0
  127. providers/__init__.py +31 -0
  128. providers/base.py +152 -0
  129. providers/cerebras/__init__.py +7 -0
  130. providers/cerebras/client.py +31 -0
  131. providers/cerebras/request.py +55 -0
  132. providers/codestral/__init__.py +7 -0
  133. providers/codestral/client.py +34 -0
  134. providers/deepseek/__init__.py +11 -0
  135. providers/deepseek/client.py +51 -0
  136. providers/deepseek/request.py +475 -0
  137. providers/defaults.py +41 -0
  138. providers/error_mapping.py +309 -0
  139. providers/exceptions.py +113 -0
  140. providers/fireworks/__init__.py +5 -0
  141. providers/fireworks/client.py +45 -0
  142. providers/fireworks/request.py +48 -0
  143. providers/gemini/__init__.py +7 -0
  144. providers/gemini/client.py +49 -0
  145. providers/gemini/request.py +199 -0
  146. providers/groq/__init__.py +7 -0
  147. providers/groq/client.py +31 -0
  148. providers/groq/request.py +83 -0
  149. providers/kimi/__init__.py +10 -0
  150. providers/kimi/client.py +53 -0
  151. providers/kimi/request.py +42 -0
  152. providers/llamacpp/__init__.py +3 -0
  153. providers/llamacpp/client.py +16 -0
  154. providers/lmstudio/__init__.py +5 -0
  155. providers/lmstudio/client.py +16 -0
  156. providers/mistral/__init__.py +7 -0
  157. providers/mistral/client.py +31 -0
  158. providers/mistral/request.py +37 -0
  159. providers/model_listing.py +133 -0
  160. providers/nvidia_nim/__init__.py +7 -0
  161. providers/nvidia_nim/client.py +91 -0
  162. providers/nvidia_nim/request.py +430 -0
  163. providers/nvidia_nim/voice.py +95 -0
  164. providers/ollama/__init__.py +7 -0
  165. providers/ollama/client.py +39 -0
  166. providers/open_router/__init__.py +7 -0
  167. providers/open_router/client.py +124 -0
  168. providers/open_router/request.py +42 -0
  169. providers/opencode/__init__.py +11 -0
  170. providers/opencode/client.py +31 -0
  171. providers/opencode/request.py +35 -0
  172. providers/rate_limit.py +300 -0
  173. providers/registry.py +527 -0
  174. providers/transports/__init__.py +1 -0
  175. providers/transports/anthropic_messages/__init__.py +5 -0
  176. providers/transports/anthropic_messages/http.py +118 -0
  177. providers/transports/anthropic_messages/recovery.py +206 -0
  178. providers/transports/anthropic_messages/stream.py +295 -0
  179. providers/transports/anthropic_messages/transport.py +236 -0
  180. providers/transports/openai_chat/__init__.py +5 -0
  181. providers/transports/openai_chat/recovery.py +217 -0
  182. providers/transports/openai_chat/stream.py +384 -0
  183. providers/transports/openai_chat/tool_calls.py +293 -0
  184. providers/transports/openai_chat/transport.py +156 -0
  185. providers/wafer/__init__.py +10 -0
  186. providers/wafer/client.py +50 -0
  187. providers/zai/__init__.py +10 -0
  188. providers/zai/client.py +46 -0
  189. providers/zai/request.py +42 -0
cli/managed/session.py ADDED
@@ -0,0 +1,260 @@
1
+ """Managed Claude Code subprocess session."""
2
+
3
+ import asyncio
4
+ import os
5
+ from collections.abc import AsyncGenerator
6
+
7
+ from loguru import logger
8
+
9
+ from cli.process_registry import kill_pid_tree_best_effort, register_pid, unregister_pid
10
+ from core.trace import trace_event
11
+
12
+ from .claude import (
13
+ ManagedClaudeConfig,
14
+ ManagedClaudeParseState,
15
+ ManagedClaudeTaskRequest,
16
+ build_managed_claude_invocation,
17
+ parse_managed_claude_stdout_line,
18
+ )
19
+
20
+ # Cap stderr capture so a runaway child cannot exhaust memory; pipe is still drained.
21
+ _MAX_STDERR_CAPTURE_BYTES = 256 * 1024
22
+
23
+
24
+ class ManagedClaudeSession:
25
+ """Manages a single persistent Claude Code subprocess."""
26
+
27
+ def __init__(
28
+ self,
29
+ workspace_path: str,
30
+ api_url: str,
31
+ allowed_dirs: list[str] | None = None,
32
+ plans_directory: str | None = None,
33
+ claude_bin: str = "claude",
34
+ auth_token: str = "",
35
+ *,
36
+ log_raw_cli_diagnostics: bool = False,
37
+ ):
38
+ self.config = ManagedClaudeConfig(
39
+ workspace_path=os.path.normpath(os.path.abspath(workspace_path)),
40
+ api_url=api_url,
41
+ allowed_dirs=[os.path.normpath(d) for d in (allowed_dirs or [])],
42
+ plans_directory=plans_directory,
43
+ claude_bin=claude_bin,
44
+ auth_token=auth_token,
45
+ )
46
+ self.workspace = self.config.workspace_path
47
+ self.api_url = self.config.api_url
48
+ self.allowed_dirs = self.config.allowed_dirs
49
+ self.plans_directory = self.config.plans_directory
50
+ self.claude_bin = self.config.claude_bin
51
+ self.auth_token = self.config.auth_token
52
+ self._log_raw_cli_diagnostics = log_raw_cli_diagnostics
53
+ self.process: asyncio.subprocess.Process | None = None
54
+ self.current_session_id: str | None = None
55
+ self._is_busy = False
56
+ self._cli_lock = asyncio.Lock()
57
+
58
+ @staticmethod
59
+ async def _drain_stderr_bounded(
60
+ process: asyncio.subprocess.Process,
61
+ *,
62
+ max_bytes: int = _MAX_STDERR_CAPTURE_BYTES,
63
+ ) -> bytes:
64
+ """Read stderr concurrently with stdout to avoid subprocess pipe deadlocks.
65
+
66
+ Retains at most ``max_bytes`` for logging; any excess is discarded, but
67
+ the pipe is read until EOF so a noisy child cannot fill the buffer and
68
+ block forever.
69
+ """
70
+ if not process.stderr:
71
+ return b""
72
+ parts: list[bytes] = []
73
+ received = 0
74
+ while True:
75
+ chunk = await process.stderr.read(65_536)
76
+ if not chunk:
77
+ break
78
+ if received < max_bytes:
79
+ take = min(len(chunk), max_bytes - received)
80
+ if take:
81
+ parts.append(chunk[:take])
82
+ received += take
83
+ # If already at cap, keep reading and discarding until EOF.
84
+ return b"".join(parts)
85
+
86
+ @property
87
+ def is_busy(self) -> bool:
88
+ """Check if a task is currently running."""
89
+ return self._is_busy
90
+
91
+ async def start_task(
92
+ self, prompt: str, session_id: str | None = None, fork_session: bool = False
93
+ ) -> AsyncGenerator[dict]:
94
+ """
95
+ Start a new task or continue an existing session.
96
+
97
+ Args:
98
+ prompt: The user's message/prompt
99
+ session_id: Optional session ID to resume
100
+
101
+ Yields:
102
+ Event dictionaries from the CLI
103
+ """
104
+ async with self._cli_lock:
105
+ self._is_busy = True
106
+ invocation = build_managed_claude_invocation(
107
+ config=self.config,
108
+ request=ManagedClaudeTaskRequest(
109
+ prompt=prompt,
110
+ session_id=session_id,
111
+ fork_session=fork_session,
112
+ ),
113
+ base_env=os.environ,
114
+ )
115
+
116
+ trace_event(
117
+ stage="claude_cli",
118
+ event="claude_cli.process.launch",
119
+ source="claude_cli",
120
+ **invocation.trace_metadata,
121
+ )
122
+
123
+ try:
124
+ self.process = await asyncio.create_subprocess_exec(
125
+ *invocation.argv,
126
+ stdout=asyncio.subprocess.PIPE,
127
+ stderr=asyncio.subprocess.PIPE,
128
+ cwd=invocation.cwd,
129
+ env=invocation.env,
130
+ )
131
+ if self.process and self.process.pid:
132
+ register_pid(self.process.pid)
133
+
134
+ if not self.process or not self.process.stdout:
135
+ yield {"type": "exit", "code": 1}
136
+ return
137
+
138
+ parse_state = ManagedClaudeParseState(
139
+ log_raw_cli_diagnostics=self._log_raw_cli_diagnostics
140
+ )
141
+ buffer = bytearray()
142
+ stderr_task: asyncio.Task[bytes] | None = None
143
+ if self.process.stderr:
144
+ stderr_task = asyncio.create_task(
145
+ self._drain_stderr_bounded(self.process)
146
+ )
147
+
148
+ try:
149
+ while True:
150
+ chunk = await self.process.stdout.read(65536)
151
+ if not chunk:
152
+ if buffer:
153
+ line_str = buffer.decode(
154
+ "utf-8", errors="replace"
155
+ ).strip()
156
+ if line_str:
157
+ async for event in self._handle_line_gen(
158
+ line_str, parse_state
159
+ ):
160
+ yield event
161
+ break
162
+
163
+ buffer.extend(chunk)
164
+
165
+ while True:
166
+ newline_pos = buffer.find(b"\n")
167
+ if newline_pos == -1:
168
+ break
169
+
170
+ line = buffer[:newline_pos]
171
+ buffer = buffer[newline_pos + 1 :]
172
+
173
+ line_str = line.decode("utf-8", errors="replace").strip()
174
+ if line_str:
175
+ async for event in self._handle_line_gen(
176
+ line_str, parse_state
177
+ ):
178
+ yield event
179
+ except asyncio.CancelledError:
180
+ # Cancelling the handler task should not leave a Claude CLI
181
+ # subprocess running in the background.
182
+ await asyncio.shield(self.stop())
183
+ raise
184
+ finally:
185
+ stderr_bytes = b""
186
+ if stderr_task is not None:
187
+ stderr_bytes = await stderr_task
188
+
189
+ stderr_text = None
190
+ if stderr_bytes:
191
+ stderr_text = stderr_bytes.decode("utf-8", errors="replace").strip()
192
+ if stderr_text:
193
+ if self._log_raw_cli_diagnostics:
194
+ logger.error("Claude CLI stderr: {}", stderr_text)
195
+ else:
196
+ logger.error(
197
+ "Claude CLI stderr: bytes={} text_chars={}",
198
+ len(stderr_bytes),
199
+ len(stderr_text),
200
+ )
201
+ logger.info("CLI_SESSION: Yielding error event from stderr")
202
+ yield {"type": "error", "error": {"message": stderr_text}}
203
+
204
+ return_code = await self.process.wait()
205
+ logger.info(
206
+ f"Claude CLI exited with code {return_code}, stderr_present={bool(stderr_text)}"
207
+ )
208
+ if return_code != 0 and not stderr_text:
209
+ logger.warning(
210
+ f"CLI_SESSION: Process exited with code {return_code} but no stderr captured"
211
+ )
212
+ yield {
213
+ "type": "exit",
214
+ "code": return_code,
215
+ "stderr": stderr_text,
216
+ }
217
+ finally:
218
+ self._is_busy = False
219
+ if self.process and self.process.pid:
220
+ unregister_pid(self.process.pid)
221
+
222
+ async def _handle_line_gen(
223
+ self, line_str: str, parse_state: ManagedClaudeParseState
224
+ ) -> AsyncGenerator[dict]:
225
+ """Process a single line and yield events."""
226
+ for event in parse_managed_claude_stdout_line(line_str, parse_state):
227
+ if isinstance(event, dict) and event.get("type") == "session_info":
228
+ session_id = event.get("session_id")
229
+ if isinstance(session_id, str):
230
+ self.current_session_id = session_id
231
+ yield event
232
+
233
+ async def stop(self):
234
+ """Stop the CLI process."""
235
+ if self.process and self.process.returncode is None:
236
+ try:
237
+ logger.info(f"Stopping Claude CLI process {self.process.pid}")
238
+ kill_pid_tree_best_effort(self.process.pid)
239
+ try:
240
+ await asyncio.wait_for(self.process.wait(), timeout=5.0)
241
+ except TimeoutError:
242
+ self.process.kill()
243
+ await self.process.wait()
244
+ if self.process and self.process.pid:
245
+ unregister_pid(self.process.pid)
246
+ return True
247
+ except Exception as e:
248
+ if self._log_raw_cli_diagnostics:
249
+ logger.error(
250
+ "Error stopping process: {}: {}",
251
+ type(e).__name__,
252
+ e,
253
+ )
254
+ else:
255
+ logger.error(
256
+ "Error stopping process: exc_type={}",
257
+ type(e).__name__,
258
+ )
259
+ return False
260
+ return False
@@ -0,0 +1,78 @@
1
+ """Track and clean up spawned CLI subprocesses.
2
+
3
+ This is a safety net for cases where the server is interrupted (Ctrl+C) and the
4
+ FastAPI lifespan cleanup doesn't run to completion. We only track processes we
5
+ spawn so we don't accidentally kill unrelated system processes.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import atexit
11
+ import os
12
+ import signal
13
+ import subprocess
14
+ import threading
15
+
16
+ from loguru import logger
17
+
18
+ _lock = threading.Lock()
19
+ _pids: set[int] = set()
20
+ _atexit_registered = False
21
+
22
+
23
+ def ensure_atexit_registered() -> None:
24
+ global _atexit_registered
25
+ with _lock:
26
+ if _atexit_registered:
27
+ return
28
+ atexit.register(kill_all_best_effort)
29
+ _atexit_registered = True
30
+
31
+
32
+ def register_pid(pid: int) -> None:
33
+ if not pid:
34
+ return
35
+ ensure_atexit_registered()
36
+ with _lock:
37
+ _pids.add(int(pid))
38
+
39
+
40
+ def unregister_pid(pid: int) -> None:
41
+ if not pid:
42
+ return
43
+ with _lock:
44
+ _pids.discard(int(pid))
45
+
46
+
47
+ def kill_pid_tree_best_effort(pid: int) -> None:
48
+ """Kill a tracked process and its children where the platform supports it."""
49
+ if not pid:
50
+ return
51
+ if os.name == "nt":
52
+ try:
53
+ # /T kills child processes, /F forces termination.
54
+ subprocess.run(
55
+ ["taskkill", "/PID", str(pid), "/T", "/F"],
56
+ stdout=subprocess.DEVNULL,
57
+ stderr=subprocess.DEVNULL,
58
+ check=False,
59
+ )
60
+ except Exception as e:
61
+ logger.debug("process_registry: taskkill failed pid=%s: %s", pid, e)
62
+ return
63
+
64
+ # Best-effort fallback for non-Windows.
65
+ try:
66
+ os.kill(pid, signal.SIGTERM)
67
+ except Exception as e:
68
+ logger.debug("process_registry: terminate failed pid=%s: %s", pid, e)
69
+
70
+
71
+ def kill_all_best_effort() -> None:
72
+ """Kill any still-running registered pids (best-effort)."""
73
+ with _lock:
74
+ pids = list(_pids)
75
+ _pids.clear()
76
+
77
+ for pid in pids:
78
+ kill_pid_tree_best_effort(pid)
config/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Configuration management."""
2
+
3
+ from .settings import Settings, get_settings
4
+
5
+ __all__ = ["Settings", "get_settings"]
config/constants.py ADDED
@@ -0,0 +1,13 @@
1
+ """Shared defaults used by config models and provider adapters."""
2
+
3
+ # HTTP client connect timeout (seconds). Keep aligned with README.md and .env.example.
4
+ HTTP_CONNECT_TIMEOUT_DEFAULT = 10.0
5
+
6
+ # Anthropic Messages API default when the client omits max_tokens.
7
+ ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 81920
8
+
9
+ # Max bytes read from a non-200 native messages response when verbose error logging is on.
10
+ NATIVE_MESSAGES_ERROR_BODY_LOG_CAP_BYTES = 4096
11
+
12
+ # Max upstream error bytes shown to users for copy/paste diagnostics.
13
+ PROVIDER_ERROR_BODY_DISPLAY_CAP_BYTES = 16384
@@ -0,0 +1,159 @@
1
+ """Loguru-based structured logging configuration.
2
+
3
+ Structured logs are written as JSON lines to a configurable path (default
4
+ ``logs/server.log``). Stdlib logging is intercepted and funneled to loguru.
5
+ Context vars (request_id, node_id, chat_id) from contextualize() are
6
+ included at top level for easy grep/filter.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ import re
12
+ import threading
13
+ from pathlib import Path
14
+
15
+ from loguru import logger
16
+
17
+ _configured = False
18
+
19
+ # Loguru ``logger.bind()`` key used by structured TRACE payloads; ``core/trace.py``
20
+ # uses the identical string constant ``TRACE_PAYLOAD_BINDING``.
21
+ _TRACE_PAYLOAD_BINDING = "trace_payload"
22
+
23
+ # Context keys we promote to top-level JSON for traceability / grep
24
+ _CONTEXT_KEYS = (
25
+ "request_id",
26
+ "node_id",
27
+ "chat_id",
28
+ "claude_session_id",
29
+ "http_method",
30
+ "http_path",
31
+ )
32
+
33
+ _TELEGRAM_BOT_RE = re.compile(
34
+ r"(https?://api\.telegram\.org/)bot([0-9]+:[A-Za-z0-9_-]+)(/?)",
35
+ re.IGNORECASE,
36
+ )
37
+ # Authorization: Bearer <token> (HTTP client / proxy debug lines)
38
+ _AUTH_BEARER_RE = re.compile(
39
+ r"(\bAuthorization\s*:\s*Bearer\s+)([^\s'\"]+)",
40
+ re.IGNORECASE,
41
+ )
42
+
43
+
44
+ def _redact_sensitive_substrings(message: str) -> str:
45
+ """Remove obvious API tokens and secrets before JSON log line emission."""
46
+ text = _TELEGRAM_BOT_RE.sub(r"\1bot<redacted>\3", message)
47
+ return _AUTH_BEARER_RE.sub(r"\1<redacted>", text)
48
+
49
+
50
+ def _serialize_with_context(record) -> str:
51
+ """Format record as JSON with context vars at top level.
52
+ Returns a format template; we inject _json into record for output.
53
+ """
54
+ extra = record.get("extra", {})
55
+ out = {
56
+ "time": str(record["time"]),
57
+ "level": record["level"].name,
58
+ "message": _redact_sensitive_substrings(str(record["message"])),
59
+ "module": record["name"],
60
+ "function": record["function"],
61
+ "line": record["line"],
62
+ }
63
+ trace_payload = extra.get(_TRACE_PAYLOAD_BINDING)
64
+ for key in _CONTEXT_KEYS:
65
+ if key in extra and extra[key] is not None:
66
+ out[key] = extra[key]
67
+ if isinstance(trace_payload, dict):
68
+ for tk, tv in trace_payload.items():
69
+ if tk in out:
70
+ continue
71
+ out[tk] = tv
72
+ out["trace"] = True
73
+ record["_json"] = json.dumps(out, default=str)
74
+ return "{_json}\n"
75
+
76
+
77
+ class InterceptHandler(logging.Handler):
78
+ """Redirect stdlib logging to loguru."""
79
+
80
+ def __init__(self) -> None:
81
+ super().__init__()
82
+ self._local = threading.local()
83
+
84
+ def emit(self, record: logging.LogRecord) -> None:
85
+ if getattr(self._local, "active", False):
86
+ # Avoid deadlock when nested stdlib records fire during a loguru emit.
87
+ return
88
+ self._local.active = True
89
+ try:
90
+ try:
91
+ level = logger.level(record.levelname).name
92
+ except ValueError:
93
+ level = record.levelno
94
+
95
+ frame, depth = logging.currentframe(), 2
96
+ while frame is not None and frame.f_code.co_filename == logging.__file__:
97
+ frame = frame.f_back
98
+ depth += 1
99
+
100
+ logger.opt(depth=depth, exception=record.exc_info).log(
101
+ level, record.getMessage()
102
+ )
103
+ finally:
104
+ self._local.active = False
105
+
106
+
107
+ def configure_logging(
108
+ log_file: str | Path, *, force: bool = False, verbose_third_party: bool = False
109
+ ) -> None:
110
+ """Configure loguru with JSON output to log_file and intercept stdlib logging.
111
+
112
+ Idempotent: skips if already configured (e.g. hot reload).
113
+ Use force=True to reconfigure (e.g. in tests with a different log path).
114
+
115
+ When ``verbose_third_party`` is false, noisy HTTP and Telegram loggers are capped
116
+ at WARNING unless explicitly configured otherwise.
117
+ """
118
+ global _configured
119
+ if _configured and not force:
120
+ return
121
+ _configured = True
122
+
123
+ # Remove default loguru handler (writes to stderr)
124
+ logger.remove()
125
+
126
+ log_path = Path(log_file)
127
+ log_path.parent.mkdir(parents=True, exist_ok=True)
128
+
129
+ # Truncate log file on fresh start for clean debugging
130
+ log_path.write_text("")
131
+
132
+ # Add file sink: JSON lines, DEBUG level, context vars at top level
133
+ logger.add(
134
+ log_file,
135
+ level="DEBUG",
136
+ format=_serialize_with_context,
137
+ encoding="utf-8",
138
+ mode="a",
139
+ rotation="50 MB",
140
+ enqueue=True,
141
+ )
142
+
143
+ # Intercept stdlib logging: route all root logger output to loguru
144
+ intercept = InterceptHandler()
145
+ logging.root.handlers = [intercept]
146
+ logging.root.setLevel(logging.DEBUG)
147
+
148
+ third_party = (
149
+ "httpx",
150
+ "httpcore",
151
+ "httpcore.http11",
152
+ "httpcore.connection",
153
+ "telegram",
154
+ "telegram.ext",
155
+ )
156
+ for name in third_party:
157
+ logging.getLogger(name).setLevel(
158
+ logging.WARNING if not verbose_third_party else logging.NOTSET
159
+ )
config/nim.py ADDED
@@ -0,0 +1,118 @@
1
+ """NVIDIA NIM settings (fixed values, no env config)."""
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
4
+
5
+ from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
6
+
7
+
8
+ class NimSettings(BaseModel):
9
+ """Fixed NVIDIA NIM settings (not configurable via env)."""
10
+
11
+ temperature: float = Field(
12
+ 1.0, ge=0.0, le=2.0, description="Sampling temperature, must be >=0 and <=2."
13
+ )
14
+ top_p: float = Field(
15
+ 1.0, ge=0.0, le=1.0, description="Nucleus sampling probability. [0,1]"
16
+ )
17
+ top_k: int = -1
18
+ max_tokens: int = Field(
19
+ ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
20
+ ge=1,
21
+ description="Maximum number of tokens in output.",
22
+ )
23
+ presence_penalty: float = Field(0.0, ge=-2.0, le=2.0)
24
+ frequency_penalty: float = Field(0.0, ge=-2.0, le=2.0)
25
+ min_p: float = Field(
26
+ 0.0, ge=0.0, le=1.0, description="Minimum probability threshold [0,1]."
27
+ )
28
+ repetition_penalty: float = Field(
29
+ 1.0, ge=0.0, description="Penalty for repeated tokens. Must be >=0."
30
+ )
31
+ seed: int | None = None
32
+ stop: str | None = None
33
+ parallel_tool_calls: bool = True
34
+ ignore_eos: bool = False
35
+ min_tokens: int = Field(0, ge=0, description="Minimum tokens in the response.")
36
+ chat_template: str | None = None
37
+ request_id: str | None = None
38
+
39
+ model_config = ConfigDict(extra="forbid")
40
+
41
+ @field_validator("top_k", mode="before")
42
+ @classmethod
43
+ def validate_top_k(cls, v, info: ValidationInfo):
44
+ if v is None or v == "":
45
+ return -1
46
+ int_v = int(v)
47
+ if int_v < -1:
48
+ raise ValueError(f"{info.field_name} must be -1 or >= 0")
49
+ return int_v
50
+
51
+ @field_validator(
52
+ "temperature",
53
+ "top_p",
54
+ "min_p",
55
+ "presence_penalty",
56
+ "frequency_penalty",
57
+ "repetition_penalty",
58
+ mode="before",
59
+ )
60
+ @classmethod
61
+ def validate_float_fields(cls, v, info: ValidationInfo):
62
+ field_defaults = {
63
+ "temperature": 1.0,
64
+ "top_p": 1.0,
65
+ "min_p": 0.0,
66
+ "presence_penalty": 0.0,
67
+ "frequency_penalty": 0.0,
68
+ "repetition_penalty": 1.0,
69
+ }
70
+ if v is None or v == "":
71
+ key = info.field_name or "temperature"
72
+ return field_defaults.get(key, 1.0)
73
+ try:
74
+ val = float(v)
75
+ except (TypeError, ValueError) as err:
76
+ raise ValueError(
77
+ f"{info.field_name} must be a float. Got {type(v).__name__}."
78
+ ) from err
79
+ return val
80
+
81
+ @field_validator("max_tokens", "min_tokens", mode="before")
82
+ @classmethod
83
+ def validate_int_fields(cls, v, info: ValidationInfo):
84
+ field_defaults = {
85
+ "max_tokens": ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
86
+ "min_tokens": 0,
87
+ }
88
+ if v is None or v == "":
89
+ key = info.field_name or "max_tokens"
90
+ return field_defaults.get(key, ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS)
91
+ try:
92
+ val = int(v)
93
+ except (TypeError, ValueError) as err:
94
+ raise ValueError(
95
+ f"{info.field_name} must be an int. Got {type(v).__name__}."
96
+ ) from err
97
+ return val
98
+
99
+ @field_validator("seed", mode="before")
100
+ @classmethod
101
+ def parse_optional_int(cls, v, info: ValidationInfo):
102
+ if v == "" or v is None:
103
+ return None
104
+ try:
105
+ return int(v)
106
+ except (TypeError, ValueError) as err:
107
+ raise ValueError(
108
+ f"{info.field_name} must be an int or empty/None."
109
+ ) from err
110
+
111
+ @field_validator("stop", "chat_template", "request_id", mode="before")
112
+ @classmethod
113
+ def parse_optional_str(cls, v, info: ValidationInfo):
114
+ if v == "":
115
+ return None
116
+ if v is not None and not isinstance(v, str):
117
+ return str(v)
118
+ return v