deeptrade-quant 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. deeptrade/__init__.py +8 -0
  2. deeptrade/channels_builtin/__init__.py +0 -0
  3. deeptrade/channels_builtin/stdout/__init__.py +0 -0
  4. deeptrade/channels_builtin/stdout/deeptrade_plugin.yaml +25 -0
  5. deeptrade/channels_builtin/stdout/migrations/20260429_001_init.sql +13 -0
  6. deeptrade/channels_builtin/stdout/stdout_channel/__init__.py +0 -0
  7. deeptrade/channels_builtin/stdout/stdout_channel/channel.py +180 -0
  8. deeptrade/cli.py +214 -0
  9. deeptrade/cli_config.py +396 -0
  10. deeptrade/cli_data.py +33 -0
  11. deeptrade/cli_plugin.py +176 -0
  12. deeptrade/core/__init__.py +8 -0
  13. deeptrade/core/config.py +344 -0
  14. deeptrade/core/config_migrations.py +138 -0
  15. deeptrade/core/db.py +176 -0
  16. deeptrade/core/llm_client.py +591 -0
  17. deeptrade/core/llm_manager.py +174 -0
  18. deeptrade/core/logging_config.py +61 -0
  19. deeptrade/core/migrations/__init__.py +0 -0
  20. deeptrade/core/migrations/core/20260427_001_init.sql +121 -0
  21. deeptrade/core/migrations/core/20260501_002_drop_llm_calls_stage.sql +10 -0
  22. deeptrade/core/migrations/core/__init__.py +0 -0
  23. deeptrade/core/notifier.py +302 -0
  24. deeptrade/core/paths.py +49 -0
  25. deeptrade/core/plugin_manager.py +616 -0
  26. deeptrade/core/run_status.py +29 -0
  27. deeptrade/core/secrets.py +152 -0
  28. deeptrade/core/tushare_client.py +824 -0
  29. deeptrade/plugins_api/__init__.py +44 -0
  30. deeptrade/plugins_api/base.py +66 -0
  31. deeptrade/plugins_api/channel.py +42 -0
  32. deeptrade/plugins_api/events.py +61 -0
  33. deeptrade/plugins_api/llm.py +46 -0
  34. deeptrade/plugins_api/metadata.py +84 -0
  35. deeptrade/plugins_api/notify.py +67 -0
  36. deeptrade/strategies_builtin/__init__.py +0 -0
  37. deeptrade/strategies_builtin/limit_up_board/__init__.py +0 -0
  38. deeptrade/strategies_builtin/limit_up_board/deeptrade_plugin.yaml +101 -0
  39. deeptrade/strategies_builtin/limit_up_board/limit_up_board/__init__.py +0 -0
  40. deeptrade/strategies_builtin/limit_up_board/limit_up_board/calendar.py +65 -0
  41. deeptrade/strategies_builtin/limit_up_board/limit_up_board/cli.py +269 -0
  42. deeptrade/strategies_builtin/limit_up_board/limit_up_board/config.py +76 -0
  43. deeptrade/strategies_builtin/limit_up_board/limit_up_board/data.py +1191 -0
  44. deeptrade/strategies_builtin/limit_up_board/limit_up_board/pipeline.py +869 -0
  45. deeptrade/strategies_builtin/limit_up_board/limit_up_board/plugin.py +30 -0
  46. deeptrade/strategies_builtin/limit_up_board/limit_up_board/profiles.py +85 -0
  47. deeptrade/strategies_builtin/limit_up_board/limit_up_board/prompts.py +485 -0
  48. deeptrade/strategies_builtin/limit_up_board/limit_up_board/render.py +890 -0
  49. deeptrade/strategies_builtin/limit_up_board/limit_up_board/runner.py +1087 -0
  50. deeptrade/strategies_builtin/limit_up_board/limit_up_board/runtime.py +172 -0
  51. deeptrade/strategies_builtin/limit_up_board/limit_up_board/schemas.py +178 -0
  52. deeptrade/strategies_builtin/limit_up_board/migrations/20260430_001_init.sql +150 -0
  53. deeptrade/strategies_builtin/limit_up_board/migrations/20260501_002_lub_stage_results_llm_provider.sql +8 -0
  54. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_001_lub_lhb_tables.sql +36 -0
  55. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_002_lub_cyq_perf.sql +18 -0
  56. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_003_lub_lhb_pk_fix.sql +46 -0
  57. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_004_lub_lhb_drop_pk.sql +53 -0
  58. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_005_lub_config.sql +17 -0
  59. deeptrade/strategies_builtin/volume_anomaly/__init__.py +0 -0
  60. deeptrade/strategies_builtin/volume_anomaly/deeptrade_plugin.yaml +59 -0
  61. deeptrade/strategies_builtin/volume_anomaly/migrations/20260430_001_init.sql +94 -0
  62. deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_001_realized_returns.sql +44 -0
  63. deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_002_dimension_scores.sql +13 -0
  64. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/__init__.py +0 -0
  65. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/calendar.py +52 -0
  66. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/cli.py +247 -0
  67. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/data.py +2154 -0
  68. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/pipeline.py +327 -0
  69. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/plugin.py +22 -0
  70. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/profiles.py +49 -0
  71. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts.py +187 -0
  72. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts_examples.py +84 -0
  73. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/render.py +906 -0
  74. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runner.py +772 -0
  75. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runtime.py +90 -0
  76. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/schemas.py +97 -0
  77. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/stats.py +174 -0
  78. deeptrade/theme.py +48 -0
  79. deeptrade_quant-0.0.2.dist-info/METADATA +166 -0
  80. deeptrade_quant-0.0.2.dist-info/RECORD +83 -0
  81. deeptrade_quant-0.0.2.dist-info/WHEEL +4 -0
  82. deeptrade_quant-0.0.2.dist-info/entry_points.txt +2 -0
  83. deeptrade_quant-0.0.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,591 @@
1
+ """LLM client (OpenAI-compatible protocol, multi-provider).
2
+
3
+ v0.7 — stage 概念彻底退出框架。``complete_json`` 不再认识 stage 名字,
4
+ 由调用方直接传入一个已解析好的 ``StageProfile``。预设档(``fast/balanced/
5
+ quality``)保留为框架级用户配置(``app.profile``),但**预设档 → 各 stage
6
+ tuning 的映射表归插件自己维护**(详见 ``deeptrade.plugins_api.llm``)。
7
+
8
+ v0.6 — renamed from ``deepseek_client.py`` to reflect framework-level role.
9
+ The same client now backs every provider configured in ``llm.providers``
10
+ (DeepSeek / Qwen / Kimi / Doubao / GLM / Yi / SiliconFlow / OpenRouter, ...)
11
+ — they all speak the OpenAI Chat Completions wire format. Real heterogeneous
12
+ protocols (Anthropic native, Gemini native) will land later as a separate
13
+ transport plugin type; this module assumes OpenAI-compatible.
14
+
15
+ Construction is not normally done by plugins directly — use
16
+ ``deeptrade.core.llm_manager.LLMManager.get_client(name, ...)``.
17
+
18
+ DESIGN §10.2 + the M3/F3/F5 hard constraints from v0.3 review:
19
+
20
+ * **No tool calls EVER** — ``chat.completions.create()`` is invoked
21
+ without any ``tools`` / ``tool_choice`` / ``functions`` parameter, and
22
+ plugins are not handed a ``chat_with_tools`` surface.
23
+ * **Caller-supplied profile**: each ``complete_json`` call carries a
24
+ ``StageProfile`` (thinking / reasoning_effort / temperature /
25
+ max_output_tokens). Framework does not look up profiles by stage name.
26
+ * **JSON-only**: ``response_format={"type": "json_object"}`` + Pydantic
27
+ double-validate; one retry on JSON / Pydantic failure, then re-raise.
28
+ * **Audit**: each call is persisted to ``llm_calls`` (request_json,
29
+ response_json, prompt_hash, validation_status, plugin_id).
30
+
31
+ Transports:
32
+ OpenAICompatTransport — base class; OpenAI-compatible chat completion API.
33
+ Subclasses ``_provider_extra_body()`` to translate ``StageProfile.thinking``
34
+ into each provider's wire shape (DashScope's ``enable_thinking`` boolean,
35
+ Claude-on-OAI-compat's ``thinking={"type":"enabled"}``, etc.).
36
+ GenericOpenAITransport — providers without a thinking dial (Kimi/DeepSeek/
37
+ OpenRouter/...); ``thinking`` flag is silently dropped per the
38
+ plugins_api/llm.py contract.
39
+ DashScopeTransport — Alibaba Qwen on DashScope. qwen3.x defaults to
40
+ thinking=ON, so we MUST pass ``enable_thinking`` explicitly for both
41
+ True and False — otherwise the plugin's "thinking off" preset is
42
+ ineffective and runs hit the per-call timeout.
43
+ RecordedTransport — tests; FIFO-replays canned responses.
44
+
45
+ Routing: ``_select_transport_class(base_url)`` picks the right subclass via a
46
+ small framework-internal substring table. Unknown base_urls fall back to
47
+ GenericOpenAITransport. Insertion of a new provider type = one entry in
48
+ ``_TRANSPORT_BY_BASE_URL`` + one subclass; configuration-layer schemas are
49
+ unaffected on purpose (the dialect is not user-tunable).
50
+ """
51
+
52
+ from __future__ import annotations
53
+
54
+ import hashlib
55
+ import json
56
+ import logging
57
+ import time
58
+ import uuid
59
+ from abc import ABC, abstractmethod
60
+ from dataclasses import dataclass
61
+ from pathlib import Path
62
+ from typing import Any
63
+
64
+ from pydantic import BaseModel, ValidationError
65
+ from tenacity import (
66
+ retry,
67
+ retry_if_exception_type,
68
+ stop_after_attempt,
69
+ wait_exponential,
70
+ )
71
+
72
+ from deeptrade.core.db import Database
73
+ from deeptrade.plugins_api.llm import StageProfile
74
+
75
+ logger = logging.getLogger(__name__)
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Errors
80
+ # ---------------------------------------------------------------------------
81
+
82
+
83
+ class LLMError(Exception):
84
+ """Base for LLM client errors."""
85
+
86
+
87
+ class LLMTransportError(LLMError):
88
+ """Network / SDK error — transient, retried by tenacity."""
89
+
90
+
91
+ class LLMValidationError(LLMError):
92
+ """JSON parse or Pydantic validation failure."""
93
+
94
+
95
+ class LLMEmptyResponseError(LLMValidationError):
96
+ """The model returned no visible content (``message.content`` was empty
97
+ or None). Distinct from a JSON parse error so callers can show a clearer
98
+ message and the retry path can append a "skip extended reasoning" hint
99
+ instead of resending the same prompt that already produced nothing."""
100
+
101
+
102
+ # ---------------------------------------------------------------------------
103
+ # Transport abstraction
104
+ # ---------------------------------------------------------------------------
105
+
106
+
107
+ @dataclass
108
+ class LLMResponse:
109
+ """Normalized response shape regardless of transport."""
110
+
111
+ text: str
112
+ input_tokens: int
113
+ output_tokens: int
114
+
115
+
116
+ class LLMTransport(ABC):
117
+ """Carrier for one chat completion call."""
118
+
119
+ @abstractmethod
120
+ def chat(
121
+ self,
122
+ *,
123
+ model: str,
124
+ system: str,
125
+ user: str,
126
+ temperature: float,
127
+ max_tokens: int,
128
+ thinking: bool,
129
+ reasoning_effort: str,
130
+ ) -> LLMResponse:
131
+ """Send one chat. MUST NOT pass tools/tool_choice/functions."""
132
+
133
+
134
+ class OpenAICompatTransport(LLMTransport):
135
+ """Production transport base — OpenAI-compatible chat completion.
136
+
137
+ Subclasses override ``_provider_extra_body()`` to inject provider-specific
138
+ knobs (notably the various flavors of the "thinking" dial). The default
139
+ implementation returns ``{}``: appropriate for OpenAI-compatible providers
140
+ that don't recognize a thinking concept, where ``StageProfile.thinking``
141
+ is silently dropped per the plugins_api/llm.py contract.
142
+ """
143
+
144
+ def __init__(self, api_key: str, base_url: str, timeout: int) -> None:
145
+ from openai import OpenAI # noqa: PLC0415
146
+
147
+ self._client = OpenAI(api_key=api_key, base_url=base_url, timeout=timeout)
148
+
149
+ def _provider_extra_body(self, *, thinking: bool) -> dict[str, Any]:
150
+ """Provider-specific keys merged into ``extra_body``. Default: none."""
151
+ del thinking # base class has no provider knobs
152
+ return {}
153
+
154
+ def chat(
155
+ self,
156
+ *,
157
+ model: str,
158
+ system: str,
159
+ user: str,
160
+ temperature: float,
161
+ max_tokens: int,
162
+ thinking: bool,
163
+ reasoning_effort: str,
164
+ ) -> LLMResponse:
165
+ from openai import APIError, APITimeoutError # noqa: PLC0415
166
+
167
+ kwargs: dict[str, Any] = {
168
+ "model": model,
169
+ "messages": [
170
+ {"role": "system", "content": system},
171
+ {"role": "user", "content": user},
172
+ ],
173
+ "response_format": {"type": "json_object"},
174
+ "reasoning_effort": reasoning_effort,
175
+ "temperature": temperature,
176
+ "max_tokens": max_tokens,
177
+ "stream": False,
178
+ }
179
+ extra_body = self._provider_extra_body(thinking=thinking)
180
+ if extra_body:
181
+ kwargs["extra_body"] = extra_body
182
+
183
+ # ⚠ HARD CONSTRAINT (M3): we MUST NOT pass tools/tool_choice/functions.
184
+ # If a future maintainer adds them, the no-tools test in V0.5 fails.
185
+
186
+ try:
187
+ resp = self._client.chat.completions.create(**kwargs)
188
+ except (APITimeoutError, APIError) as e:
189
+ raise LLMTransportError(str(e)) from e
190
+
191
+ text = resp.choices[0].message.content or ""
192
+ usage = resp.usage
193
+ return LLMResponse(
194
+ text=text,
195
+ input_tokens=int(getattr(usage, "prompt_tokens", 0) or 0),
196
+ output_tokens=int(getattr(usage, "completion_tokens", 0) or 0),
197
+ )
198
+
199
+
200
+ class GenericOpenAITransport(OpenAICompatTransport):
201
+ """OpenAI-compatible providers without a thinking dial.
202
+
203
+ Catch-all for providers like DeepSeek, Kimi, Doubao, GLM, Yi, OpenRouter, …
204
+ The ``thinking`` flag from ``StageProfile`` is silently dropped here per
205
+ the contract documented in ``plugins_api/llm.py``.
206
+ """
207
+
208
+
209
+ class DashScopeTransport(OpenAICompatTransport):
210
+ """Alibaba DashScope (Qwen family).
211
+
212
+ qwen3.x defaults to thinking=ON; the only way to actually disable it is to
213
+ send ``enable_thinking=False`` explicitly. Sending nothing leaves thinking
214
+ on, which can blow past the per-call timeout for high-output prompts (the
215
+ model burns its budget on internal reasoning before any visible content).
216
+ Therefore we always emit ``enable_thinking`` — both for True and False.
217
+ """
218
+
219
+ def _provider_extra_body(self, *, thinking: bool) -> dict[str, Any]:
220
+ return {"enable_thinking": thinking}
221
+
222
+
223
+ # ---------------------------------------------------------------------------
224
+ # Transport routing (framework-internal)
225
+ # ---------------------------------------------------------------------------
226
+
227
+ # base_url substring → transport class. Substring (not strict host) is fine
228
+ # because each entry's pattern is anchored to the provider's well-known domain
229
+ # and tolerates port / path / version variations. New entries land here and
230
+ # nowhere else; user-facing config has no "dialect" knob on purpose.
231
+ _TRANSPORT_BY_BASE_URL: tuple[tuple[str, type[OpenAICompatTransport]], ...] = (
232
+ ("dashscope.aliyuncs.com", DashScopeTransport),
233
+ )
234
+
235
+
236
+ def _select_transport_class(base_url: str) -> type[OpenAICompatTransport]:
237
+ """Pick the OpenAI-compat transport subclass for a provider's base_url.
238
+
239
+ Substring match against ``_TRANSPORT_BY_BASE_URL``; unknown base_urls fall
240
+ back to ``GenericOpenAITransport`` (which silently drops thinking flags).
241
+ """
242
+ for pattern, cls in _TRANSPORT_BY_BASE_URL:
243
+ if pattern in base_url:
244
+ return cls
245
+ return GenericOpenAITransport
246
+
247
+
248
+ class RecordedTransport(LLMTransport):
249
+ """Test transport — FIFO-replays pre-registered responses.
250
+
251
+ Use ``register(response)`` to seed; each ``chat()`` call pops one entry.
252
+ """
253
+
254
+ def __init__(self) -> None:
255
+ self._queue: list[LLMResponse | Exception] = []
256
+ # last-seen kwargs the test can introspect (M3 no-tools test relies on this)
257
+ self.last_call_kwargs: dict[str, Any] = {}
258
+
259
+ def register(self, response: LLMResponse | Exception) -> None:
260
+ self._queue.append(response)
261
+
262
+ def chat(
263
+ self,
264
+ *,
265
+ model: str,
266
+ system: str,
267
+ user: str,
268
+ temperature: float,
269
+ max_tokens: int,
270
+ thinking: bool,
271
+ reasoning_effort: str,
272
+ ) -> LLMResponse:
273
+ self.last_call_kwargs = {
274
+ "model": model,
275
+ "system": system,
276
+ "user": user,
277
+ "temperature": temperature,
278
+ "max_tokens": max_tokens,
279
+ "thinking": thinking,
280
+ "reasoning_effort": reasoning_effort,
281
+ }
282
+ if not self._queue:
283
+ raise LLMTransportError("RecordedTransport: no more queued responses")
284
+ entry = self._queue.pop(0)
285
+ if isinstance(entry, Exception):
286
+ raise entry
287
+ return entry
288
+
289
+
290
+ # ---------------------------------------------------------------------------
291
+ # Client
292
+ # ---------------------------------------------------------------------------
293
+
294
+
295
+ class LLMClient:
296
+ def __init__(
297
+ self,
298
+ db: Database,
299
+ transport: LLMTransport,
300
+ *,
301
+ model: str,
302
+ plugin_id: str | None = None,
303
+ run_id: str | None = None,
304
+ audit_full_payload: bool = False,
305
+ reports_dir: Path | None = None,
306
+ ) -> None:
307
+ self._db = db
308
+ self._transport = transport
309
+ self._model = model
310
+ self._plugin_id = plugin_id
311
+ self._run_id = run_id
312
+ # F-M2 — when False (default), DB rows keep just hash + response excerpt;
313
+ # full payloads ALWAYS go to reports_dir/llm_calls.jsonl (set by caller).
314
+ self._audit_full = audit_full_payload
315
+ self._reports_dir = reports_dir
316
+
317
+ # --- main entry ----------------------------------------------------
318
+
319
+ def complete_json(
320
+ self,
321
+ *,
322
+ system: str,
323
+ user: str,
324
+ schema: type[BaseModel],
325
+ profile: StageProfile,
326
+ envelope_defaults: dict[str, Any] | None = None,
327
+ ) -> tuple[BaseModel, dict[str, Any]]:
328
+ """Send a JSON-mode chat and validate against `schema`.
329
+
330
+ ``profile`` (required) — caller-resolved per-call tuning. Plugins
331
+ own the preset → stage profile mapping; the framework just consumes
332
+ the four fields (thinking, reasoning_effort, temperature,
333
+ max_output_tokens).
334
+
335
+ ``envelope_defaults`` (optional) — top-level keys to inject when
336
+ the LLM omits them. Useful for caller-controlled metadata like
337
+ ``stage`` / ``trade_date`` / ``batch_no``. Only fills keys missing
338
+ from the parsed payload, never overwrites.
339
+
340
+ Returns (validated_model, meta) where meta includes input_tokens,
341
+ output_tokens, latency_ms, prompt_hash. Raises:
342
+ LLMValidationError — JSON or Pydantic still failing after 1 retry
343
+ LLMEmptyResponseError — model returned empty content twice
344
+ LLMTransportError — transport-level error after retries
345
+ """
346
+ return self._with_retry(system, user, schema, profile, envelope_defaults)
347
+
348
+ @staticmethod
349
+ def _retry_hint_for(error: Exception) -> str:
350
+ """Pick a corrective hint for the second attempt based on what failed.
351
+ Re-sending the identical prompt after a known-bad response is wasted
352
+ budget; the hint nudges the model toward the specific failure mode."""
353
+ if isinstance(error, LLMEmptyResponseError):
354
+ return (
355
+ "\n\n⚠ 上一次响应为空。请直接输出符合 schema 的 JSON,"
356
+ "不要进行扩展推理或 markdown 包裹;只返回最终的 JSON 对象。"
357
+ )
358
+ if isinstance(error, json.JSONDecodeError):
359
+ return (
360
+ "\n\n⚠ 上一次响应不是合法 JSON。请只返回 JSON 对象,"
361
+ "不要使用代码块标记 ``` 或前后缀。"
362
+ )
363
+ if isinstance(error, ValidationError):
364
+ return (
365
+ "\n\n⚠ 上一次响应缺少必填字段或字段值非法。请严格按照系统消息中的"
366
+ "【输出格式】填写每一个字段,不要省略 ts_code / score / strength_level "
367
+ "等枚举值字段,evidence 内每条 4 个字段不可省。"
368
+ )
369
+ return ""
370
+
371
+ @retry(
372
+ retry=retry_if_exception_type(LLMTransportError),
373
+ stop=stop_after_attempt(3),
374
+ wait=wait_exponential(multiplier=1, min=1, max=10),
375
+ reraise=True,
376
+ )
377
+ def _transport_call(self, system: str, user: str, profile: StageProfile) -> LLMResponse:
378
+ return self._transport.chat(
379
+ model=self._model,
380
+ system=system,
381
+ user=user,
382
+ temperature=profile.temperature,
383
+ max_tokens=profile.max_output_tokens,
384
+ thinking=profile.thinking,
385
+ reasoning_effort=profile.reasoning_effort,
386
+ )
387
+
388
+ def _with_retry(
389
+ self,
390
+ system: str,
391
+ user: str,
392
+ schema: type[BaseModel],
393
+ profile: StageProfile,
394
+ envelope_defaults: dict[str, Any] | None = None,
395
+ ) -> tuple[BaseModel, dict[str, Any]]:
396
+ """Two attempts: one retry on JSON / Pydantic / empty-response failure.
397
+
398
+ Empty responses (``response.text`` blank) are detected separately:
399
+ the second-attempt user prompt gets an appended hint asking the
400
+ model to emit JSON directly without extended reasoning, since
401
+ re-sending the identical prompt has zero new information.
402
+ """
403
+ prompt_hash = hashlib.sha256((system + user).encode("utf-8")).hexdigest()
404
+
405
+ last_err: Exception | None = None
406
+ last_response: LLMResponse | None = None
407
+ current_user = user
408
+ for attempt in (1, 2):
409
+ t0 = time.monotonic()
410
+ response = self._transport_call(system, current_user, profile)
411
+ latency_ms = int((time.monotonic() - t0) * 1000)
412
+ last_response = response
413
+
414
+ # Detect empty content BEFORE trying to parse — the JSON error
415
+ # otherwise surfaces as a misleading "Expecting value at line 1
416
+ # column 1 (char 0)" with no signal that the model returned
417
+ # nothing visible at all.
418
+ empty = (response.text or "").strip() == ""
419
+ try:
420
+ if empty:
421
+ raise LLMEmptyResponseError(
422
+ f"model returned empty content "
423
+ f"(input_tokens={response.input_tokens}, "
424
+ f"output_tokens={response.output_tokens}, latency_ms={latency_ms}); "
425
+ "common causes: extended reasoning consumed the output budget, "
426
+ "max_output_tokens too low, or model-side content filter."
427
+ )
428
+ payload = json.loads(response.text)
429
+ if envelope_defaults and isinstance(payload, dict):
430
+ for k, v in envelope_defaults.items():
431
+ payload.setdefault(k, v)
432
+ obj = schema.model_validate(payload)
433
+ except (json.JSONDecodeError, ValidationError, LLMEmptyResponseError) as e:
434
+ last_err = e
435
+ self._record_call(
436
+ prompt_hash=prompt_hash,
437
+ request_system=system,
438
+ request_user=current_user,
439
+ response_text=response.text,
440
+ input_tokens=response.input_tokens,
441
+ output_tokens=response.output_tokens,
442
+ latency_ms=latency_ms,
443
+ validation_status="retry" if attempt == 1 else "failed",
444
+ error=f"{type(e).__name__}: {e}",
445
+ )
446
+ # On retry, attach a corrective hint tailored to the failure
447
+ # mode. Same prompt twice is wasted budget.
448
+ if attempt == 1:
449
+ current_user = user + self._retry_hint_for(e)
450
+ continue
451
+ else:
452
+ self._record_call(
453
+ prompt_hash=prompt_hash,
454
+ request_system=system,
455
+ request_user=current_user,
456
+ response_text=response.text,
457
+ input_tokens=response.input_tokens,
458
+ output_tokens=response.output_tokens,
459
+ latency_ms=latency_ms,
460
+ validation_status="ok",
461
+ error=None,
462
+ )
463
+ return obj, {
464
+ "input_tokens": response.input_tokens,
465
+ "output_tokens": response.output_tokens,
466
+ "latency_ms": latency_ms,
467
+ "prompt_hash": prompt_hash,
468
+ }
469
+
470
+ # both attempts failed — preserve the specific error subclass so callers
471
+ # (and tests) can branch on whether the model returned empty vs. invalid.
472
+ assert last_err is not None
473
+ tail = (last_response.text if last_response else "")[:200]
474
+ msg = f"validation failed after retry; last error: {last_err}; last response (truncated): {tail}"
475
+ if isinstance(last_err, LLMEmptyResponseError):
476
+ raise LLMEmptyResponseError(msg) from last_err
477
+ raise LLMValidationError(msg) from last_err
478
+
479
+ # --- audit log ----------------------------------------------------
480
+
481
+ def _record_call(
482
+ self,
483
+ *,
484
+ prompt_hash: str,
485
+ request_system: str,
486
+ request_user: str,
487
+ response_text: str,
488
+ input_tokens: int,
489
+ output_tokens: int,
490
+ latency_ms: int,
491
+ validation_status: str,
492
+ error: str | None,
493
+ ) -> None:
494
+ call_id = str(uuid.uuid4())
495
+ # F-M2 — write FULL payload to reports/<run_id>/llm_calls.jsonl regardless
496
+ # of audit_full_payload (DB lean is just a storage optimization, not an
497
+ # audit gap). The DB row may be lean or full per audit_full_payload.
498
+ self._append_jsonl(
499
+ call_id=call_id,
500
+ prompt_hash=prompt_hash,
501
+ request_system=request_system,
502
+ request_user=request_user,
503
+ response_text=response_text,
504
+ input_tokens=input_tokens,
505
+ output_tokens=output_tokens,
506
+ latency_ms=latency_ms,
507
+ validation_status=validation_status,
508
+ error=error,
509
+ )
510
+ if self._audit_full:
511
+ request_json = json.dumps(
512
+ {"system": request_system, "user": request_user},
513
+ ensure_ascii=False,
514
+ )
515
+ response_payload = response_text
516
+ else:
517
+ # Lean mode: store user-prompt size + first 200 chars of response.
518
+ request_json = json.dumps(
519
+ {
520
+ "system_len": len(request_system),
521
+ "user_len": len(request_user),
522
+ "audit": "lean",
523
+ },
524
+ ensure_ascii=False,
525
+ )
526
+ response_payload = (response_text or "")[:200]
527
+ self._db.execute(
528
+ "INSERT INTO llm_calls(call_id, run_id, plugin_id, model, prompt_hash, "
529
+ "input_tokens, output_tokens, latency_ms, request_json, response_json, "
530
+ "validation_status, error) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
531
+ (
532
+ call_id,
533
+ self._run_id,
534
+ self._plugin_id,
535
+ self._model,
536
+ prompt_hash,
537
+ input_tokens,
538
+ output_tokens,
539
+ latency_ms,
540
+ request_json,
541
+ response_payload,
542
+ validation_status,
543
+ error,
544
+ ),
545
+ )
546
+
547
+ def _append_jsonl(
548
+ self,
549
+ *,
550
+ call_id: str,
551
+ prompt_hash: str,
552
+ request_system: str,
553
+ request_user: str,
554
+ response_text: str,
555
+ input_tokens: int,
556
+ output_tokens: int,
557
+ latency_ms: int,
558
+ validation_status: str,
559
+ error: str | None,
560
+ ) -> None:
561
+ """F-M2 — write the FULL prompt/response to llm_calls.jsonl always.
562
+ DB lean-mode is purely a storage optimization; audit must be reproducible
563
+ from the jsonl file in the report directory."""
564
+ if self._reports_dir is None:
565
+ return
566
+ try:
567
+ self._reports_dir.mkdir(parents=True, exist_ok=True)
568
+ with (self._reports_dir / "llm_calls.jsonl").open("a", encoding="utf-8") as fh:
569
+ fh.write(
570
+ json.dumps(
571
+ {
572
+ "call_id": call_id,
573
+ "run_id": self._run_id,
574
+ "plugin_id": self._plugin_id,
575
+ "model": self._model,
576
+ "prompt_hash": prompt_hash,
577
+ "input_tokens": input_tokens,
578
+ "output_tokens": output_tokens,
579
+ "latency_ms": latency_ms,
580
+ "system": request_system,
581
+ "user": request_user,
582
+ "response": response_text,
583
+ "validation_status": validation_status,
584
+ "error": error,
585
+ },
586
+ ensure_ascii=False,
587
+ )
588
+ + "\n"
589
+ )
590
+ except Exception as e: # noqa: BLE001 — never let audit IO crash the run
591
+ logger.warning("failed to write llm_calls.jsonl: %s", e)