deeptrade-quant 0.8.0__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/CHANGELOG.md +48 -0
  2. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/PKG-INFO +1 -1
  3. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/__init__.py +1 -1
  4. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/llm_client.py +100 -6
  5. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/pyproject.toml +1 -1
  6. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_llm_client.py +143 -25
  7. deeptrade_quant-0.9.0/tests/core/test_llm_client_streaming.py +187 -0
  8. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/.gitignore +0 -0
  9. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/LICENSE +0 -0
  10. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/README.md +0 -0
  11. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/cli.py +0 -0
  12. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/cli_config.py +0 -0
  13. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/cli_data.py +0 -0
  14. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/cli_plugin.py +0 -0
  15. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/__init__.py +0 -0
  16. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/config.py +0 -0
  17. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/config_migrations.py +0 -0
  18. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/db.py +0 -0
  19. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/dep_installer.py +0 -0
  20. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/github_fetch.py +0 -0
  21. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/llm_manager.py +0 -0
  22. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/logging_config.py +0 -0
  23. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/migrations/__init__.py +0 -0
  24. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/migrations/core/20260509_001_init.sql +0 -0
  25. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/migrations/core/20260512_001_drop_legacy_tushare_cache.sql +0 -0
  26. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/migrations/core/20260515_002_affected_tables.sql +0 -0
  27. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/migrations/core/__init__.py +0 -0
  28. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/paths.py +0 -0
  29. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/plugin_manager.py +0 -0
  30. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/plugin_source.py +0 -0
  31. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/registry.py +0 -0
  32. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/run_status.py +0 -0
  33. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/secrets.py +0 -0
  34. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/core/tushare_client.py +0 -0
  35. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/plugins_api/__init__.py +0 -0
  36. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/plugins_api/base.py +0 -0
  37. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/plugins_api/errors.py +0 -0
  38. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/plugins_api/events.py +0 -0
  39. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/plugins_api/llm.py +0 -0
  40. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/plugins_api/metadata.py +0 -0
  41. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/deeptrade/theme.py +0 -0
  42. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/__init__.py +0 -0
  43. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/cli/__init__.py +0 -0
  44. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/cli/test_config_cmd.py +0 -0
  45. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/cli/test_plugin_cmd.py +0 -0
  46. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/cli/test_routing.py +0 -0
  47. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/cli/test_user_facing_strings_are_chinese.py +0 -0
  48. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/conftest.py +0 -0
  49. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/__init__.py +0 -0
  50. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_config.py +0 -0
  51. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_config_migrations.py +0 -0
  52. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_db.py +0 -0
  53. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_github_fetch.py +0 -0
  54. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_llm_manager.py +0 -0
  55. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_paths.py +0 -0
  56. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_plugin_dependencies.py +0 -0
  57. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_plugin_install.py +0 -0
  58. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_plugin_security.py +0 -0
  59. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_plugin_source.py +0 -0
  60. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_plugin_upgrade.py +0 -0
  61. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_registry.py +0 -0
  62. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_secrets.py +0 -0
  63. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_tushare_classifier.py +0 -0
  64. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_tushare_client.py +0 -0
  65. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/core/test_tushare_retry_r1.py +0 -0
  66. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/plugins_api/__init__.py +0 -0
  67. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/plugins_api/test_api_version_2.py +0 -0
  68. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/plugins_api/test_errors.py +0 -0
  69. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/plugins_api/test_protocol.py +0 -0
  70. {deeptrade_quant-0.8.0 → deeptrade_quant-0.9.0}/tests/test_smoke.py +0 -0
@@ -2,6 +2,54 @@
2
2
 
3
3
  All notable changes to DeepTrade. Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and SemVer.
4
4
 
5
+ ## [v0.9.0] — 2026-05-16 — LLM transport 改流式,规避网关 idle-timeout
6
+
7
+ 打板等长生成场景下,`kimi-k2.6` 等 thinking 模型的非流式调用 100% 触发 `LLMTransportError: Request timed out.`,单次失败耗时 ~27 分钟(外层 tenacity 3 次 × openai SDK 3 次 × 180 s)。Moonshot 官方文档明确:思考模型在 server 端先思考再生成,**任何中间网关(包括 Moonshot 自家网关)只要看到长时间无 header 返回就会把 TCP 当僵尸连接切掉**,这是非流式的设计性缺陷,与 base_url / DNS / TLS 均无关。
8
+
9
+ 实证:同一 prompt + `stream=True` 下 TTFC 1.3 s、总耗时 42 s、`content_len=240`、JSON 合法、usage 完整。
10
+
11
+ ### Changed
12
+
13
+ - `deeptrade/core/llm_client.py::OpenAICompatTransport.chat()` 改为流式:`stream=True` + `stream_options={"include_usage": True}`,逐 chunk 累积 `delta.content` 拼成完整文本,从末 chunk 取 usage。`create()` 与迭代两阶段的 `APITimeoutError` / `APIError` 都包成 `LLMTransportError`,原 tenacity 重试路径不变。
14
+ - 保留 v0.6 / v0.8.1 的两段逻辑:`_adjust_temperature()` 钩子(Moonshot 强制 1.0)与 `supports_reasoning_effort` 门控(仅 `OpenAIOfficialTransport` 翻 True)继续生效。
15
+ - 子类(`GenericOpenAITransport` / `DashScopeTransport` / `MoonshotTransport` / `OpenAIOfficialTransport`)零修改,继承新基类实现。`RecordedTransport`、`LLMClient`、`LLMResponse`、`llm_calls` 表结构、`reports/<run>/llm_calls.jsonl` 格式全部不变。
16
+
17
+ ### Why streaming-only, no fallback
18
+
19
+ 内测期约定:**不留 `stream=False` 开关、不加配置项、不做降级**。理由:
20
+
21
+ - timeout 语义自然从「整次调用墙钟」变为「单 chunk 间最大不活跃间隔」,更宽容、更符合 LLM 长生成的实际形态;`provider.timeout=180` 字面值无需调整。
22
+ - 流式中途断 → 截断 JSON → 走 `LLMClient._with_retry` 已有的 `LLMValidationError` 1 次 repair-retry,不必单独走非流式 fallback。
23
+ - include_usage 是 OpenAI 协议 2024-07 起的官方约定,in-scope 的全部 provider(OpenAI / Moonshot / DeepSeek / DashScope / Doubao / GLM / Yi / OpenRouter / SiliconFlow)均已支持;万一某末 chunk 缺 usage,审计记 0 不抛。
24
+
25
+ ### Migration notes
26
+
27
+ - 插件零改动。`LLMClient.complete_json()` 接口、异常类型、返回类型、重试语义、审计格式全部不变。
28
+ - `app_config.llm_providers[*].timeout` 字段保留,语义如上;用户原本配的数值无需调整。
29
+ - 行为差异详见 `docs/fix/2026-05-16-llm-streaming-transport.md` §4.3。
30
+
31
+ ## [v0.8.1] — 2026-05-16 — Moonshot reasoning 模型 temperature 兼容性
32
+
33
+ `limit-up-board` 等插件接入 Kimi K2.6(``base_url = https://api.moonshot.cn/v1``)后,**所有** LLM 调用 100% 命中 ``HTTP 400 invalid temperature: only 1 is allowed for this model``。根因:Kimi K2 系列的 thinking / reasoning 变体(与 OpenAI o1/o3、Anthropic Sonnet thinking 同侧设计)在服务端硬约束 ``temperature``——仅接受模型专属的固定值,而插件 ``StageProfile`` 出于复现性给的是 ``0.0 ~ 0.2``。
34
+
35
+ 修复职责完全在框架:插件不应感知具体 provider/model 的服务端约束,框架的契约是「插件给一个温度意图,框架在真正发出请求前 sanitize 到目标 provider/model 能接受的取值」。
36
+
37
+ ### Changed
38
+
39
+ - ``deeptrade/core/llm_client.py::OpenAICompatTransport``:新增 ``_adjust_temperature(model, temperature) -> float`` 钩子,默认 identity;``chat()`` 在写 kwargs 前调用钩子,并在改写时打一行 ``logger.info`` 便于排查。非 Moonshot 路径完全无感。
40
+ - 新增 ``MoonshotTransport(OpenAICompatTransport)``:``_FORCED_TEMPERATURE`` prefix 表强制 ``kimi-k2-thinking`` / ``kimi-k2.5`` / ``kimi-k2.6`` 到 ``1.0``、``kimi-for-coding`` 到 ``0.6``;fallthrough 走 ``min(temperature, 1.0)`` 兼顾非 reasoning 模型(``moonshot-v1-*`` / ``kimi-k2-instruct-*``)的 ``[0, 1]`` 上限——Pydantic 字段允许到 2.0,超界一样 400。
41
+ - ``_TRANSPORT_BY_BASE_URL`` 新增 ``("api.moonshot.cn", MoonshotTransport)``。substring 匹配自动覆盖 ``api.moonshot.cn`` / ``api.moonshot.cn/v1`` 所有形式;国际站 ``api.moonshot.ai`` 暂未支持,若后续需要追加一行即可。
42
+
43
+ ### Why prefix match, not exact / regex
44
+
45
+ Moonshot 命名空间 ``<major>.<minor>[-<dated-revision>]`` 的天然分界就在 prefix。exact 会让 ``kimi-k2.6-1106`` / ``kimi-k2-thinking-128k`` 这类 dated revision 漏网,触发 0day 失败;regex 转义复杂度抬高 review 成本,收益不抵。
46
+
47
+ ### Migration notes
48
+
49
+ - 插件零改动。``limit_up_board`` / 其他第三方插件的 ``profiles.py`` 不需要感知该约束。
50
+ - 用户原本在 Kimi reasoning 模型上设的 ``temperature=0.0`` 在改写后会被强制为 ``1.0``——这本来就是服务端唯一允许的取值,不改写就是 100% 失败。
51
+ - ``app.profile`` / ``llm.providers`` 配置无变动。
52
+
5
53
  ## [v0.8.0] — 2026-05-16 — 插件 install / upgrade 走 CDN,零 GitHub API 调用
6
54
 
7
55
  `deeptrade plugin install` 与 `deeptrade plugin upgrade` 此前在解析"最新版本"与下载 tarball 时各打一次 ``api.github.com``,未认证用户共享 60/h 的 IP 级配额。一旦插件用户数上来,或者用户与浏览器 / `gh` CLI / `git clone` 公共仓库共用同一公网 IP,``HTTP 403: rate limit exceeded`` 就会把 install / upgrade 直接打死。共享 token 会违反 GitHub ToS,且配额仍会在那个 token 上聚合——不是解。
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeptrade-quant
3
- Version: 0.8.0
3
+ Version: 0.9.0
4
4
  Summary: LLM-driven A-share (Shanghai/Shenzhen main board) stock screening CLI
5
5
  Project-URL: Homepage, https://github.com/ty19880929/deeptrade
6
6
  Project-URL: Repository, https://github.com/ty19880929/deeptrade
@@ -2,5 +2,5 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "0.8.0"
5
+ __version__ = "0.9.0"
6
6
  __all__ = ["__version__"]
@@ -162,6 +162,16 @@ class OpenAICompatTransport(LLMTransport):
162
162
  del thinking # base class has no provider knobs
163
163
  return {}
164
164
 
165
+ def _adjust_temperature(self, *, model: str, temperature: float) -> float:
166
+ """Provider/model-specific temperature sanitization hook.
167
+
168
+ Default: identity. Subclasses override to clamp / force temperature
169
+ for models with server-side hard constraints (e.g. Moonshot reasoning
170
+ variants that only accept ``temperature == 1``).
171
+ """
172
+ del model # base class has no per-model constraints
173
+ return temperature
174
+
165
175
  def chat(
166
176
  self,
167
177
  *,
@@ -173,8 +183,35 @@ class OpenAICompatTransport(LLMTransport):
173
183
  thinking: bool,
174
184
  reasoning_effort: str,
175
185
  ) -> LLMResponse:
186
+ """Send one chat as a server-sent-event stream and accumulate the
187
+ deltas into a single :class:`LLMResponse`.
188
+
189
+ Streaming is the only supported wire mode (v0.9+). Moonshot's
190
+ official guidance explicitly warns that long non-streaming
191
+ generations are killed by intermediate gateways that interpret a
192
+ long "no headers yet" pause as a dead connection. Streaming makes
193
+ the server emit ``200 OK`` + SSE headers within ~1 s, so no
194
+ gateway treats the request as a zombie regardless of how long
195
+ generation takes. The framework still returns a single
196
+ ``LLMResponse``; audit log, retry, and plugin code are unaffected.
197
+
198
+ ``stream_options={"include_usage": True}`` — every OpenAI-compatible
199
+ provider currently in scope (OpenAI, Moonshot, DeepSeek, DashScope,
200
+ Doubao, GLM, Yi, OpenRouter, SiliconFlow) returns ``usage`` on the
201
+ final chunk when this is set. The final chunk in usage mode has
202
+ ``choices=[]`` and ``usage`` populated.
203
+ """
176
204
  from openai import APIError, APITimeoutError # noqa: PLC0415
177
205
 
206
+ adjusted_temperature = self._adjust_temperature(model=model, temperature=temperature)
207
+ if adjusted_temperature != temperature:
208
+ logger.info(
209
+ "transport adjusted temperature for model=%s: %.3f -> %.3f",
210
+ model,
211
+ temperature,
212
+ adjusted_temperature,
213
+ )
214
+
178
215
  kwargs: dict[str, Any] = {
179
216
  "model": model,
180
217
  "messages": [
@@ -182,9 +219,10 @@ class OpenAICompatTransport(LLMTransport):
182
219
  {"role": "user", "content": user},
183
220
  ],
184
221
  "response_format": {"type": "json_object"},
185
- "temperature": temperature,
222
+ "temperature": adjusted_temperature,
186
223
  "max_tokens": max_tokens,
187
- "stream": False,
224
+ "stream": True,
225
+ "stream_options": {"include_usage": True},
188
226
  }
189
227
  # v0.6 H5 — only send ``reasoning_effort`` when the transport
190
228
  # declares support AND the caller actually supplied a non-empty
@@ -199,15 +237,29 @@ class OpenAICompatTransport(LLMTransport):
199
237
  # ⚠ HARD CONSTRAINT (M3): we MUST NOT pass tools/tool_choice/functions.
200
238
  # If a future maintainer adds them, the no-tools test in V0.5 fails.
201
239
 
240
+ parts: list[str] = []
241
+ usage: Any = None
202
242
  try:
203
- resp = self._client.chat.completions.create(**kwargs)
243
+ stream = self._client.chat.completions.create(**kwargs)
244
+ for chunk in stream:
245
+ # In include_usage mode the final chunk carries usage and
246
+ # an empty choices list. Earlier chunks carry one choice
247
+ # whose delta.content may be None (role-only opener) or a
248
+ # text fragment.
249
+ if chunk.choices:
250
+ delta = chunk.choices[0].delta
251
+ if delta is not None and delta.content:
252
+ parts.append(delta.content)
253
+ if getattr(chunk, "usage", None) is not None:
254
+ usage = chunk.usage
204
255
  except (APITimeoutError, APIError) as e:
256
+ # Errors raised during create() (header phase) or while
257
+ # iterating the stream (body phase) both surface as
258
+ # LLMTransportError so tenacity in _transport_call retries.
205
259
  raise LLMTransportError(str(e)) from e
206
260
 
207
- text = resp.choices[0].message.content or ""
208
- usage = resp.usage
209
261
  return LLMResponse(
210
- text=text,
262
+ text="".join(parts),
211
263
  input_tokens=int(getattr(usage, "prompt_tokens", 0) or 0),
212
264
  output_tokens=int(getattr(usage, "completion_tokens", 0) or 0),
213
265
  )
@@ -236,6 +288,47 @@ class DashScopeTransport(OpenAICompatTransport):
236
288
  return {"enable_thinking": thinking}
237
289
 
238
290
 
291
+ class MoonshotTransport(OpenAICompatTransport):
292
+ """Moonshot Kimi (``api.moonshot.cn``).
293
+
294
+ Reasoning-variant models (``kimi-k2-thinking`` / ``kimi-k2.5`` /
295
+ ``kimi-k2.6``) have a server-side hard constraint: ``temperature`` MUST
296
+ equal a model-specific fixed value (1.0 for thinking variants, 0.6 for
297
+ ``kimi-for-coding``). Any other value returns HTTP 400 ``invalid
298
+ temperature``.
299
+
300
+ Non-reasoning Moonshot models accept the full ``[0, 1]`` range; values
301
+ above 1 also 400. We handle both: forced equality on known reasoning
302
+ variants, then fall through to range clamp for everyone else.
303
+
304
+ ``_FORCED_TEMPERATURE`` uses **prefix** match so that dated revisions
305
+ (``kimi-k2.6-1106``, ``kimi-k2-thinking-128k``, …) inherit the same
306
+ constraint without a code change. Only include models with confirmed
307
+ server-side enforcement, not just "recommended" values.
308
+
309
+ Note: the international site (``api.moonshot.ai``) shares the same
310
+ constraints — add a routing-table entry there if/when the framework
311
+ supports it.
312
+ """
313
+
314
+ # model-name prefix → forced temperature value
315
+ _FORCED_TEMPERATURE: tuple[tuple[str, float], ...] = (
316
+ ("kimi-k2-thinking", 1.0),
317
+ ("kimi-k2.5", 1.0),
318
+ ("kimi-k2.6", 1.0),
319
+ ("kimi-for-coding", 0.6),
320
+ )
321
+
322
+ def _adjust_temperature(self, *, model: str, temperature: float) -> float:
323
+ for prefix, forced in self._FORCED_TEMPERATURE:
324
+ if model.startswith(prefix):
325
+ return forced
326
+ # Moonshot accepts only [0, 1] across the whole API; upper-clamp guards
327
+ # non-reasoning models (moonshot-v1-*, kimi-k2-instruct-*) against a
328
+ # StageProfile that goes above 1.0 (the Pydantic field allows up to 2).
329
+ return min(temperature, 1.0)
330
+
331
+
239
332
  class OpenAIOfficialTransport(OpenAICompatTransport):
240
333
  """OpenAI's own ``api.openai.com`` endpoint.
241
334
 
@@ -259,6 +352,7 @@ class OpenAIOfficialTransport(OpenAICompatTransport):
259
352
  # nowhere else; user-facing config has no "dialect" knob on purpose.
260
353
  _TRANSPORT_BY_BASE_URL: tuple[tuple[str, type[OpenAICompatTransport]], ...] = (
261
354
  ("dashscope.aliyuncs.com", DashScopeTransport),
355
+ ("api.moonshot.cn", MoonshotTransport),
262
356
  ("api.openai.com", OpenAIOfficialTransport),
263
357
  )
264
358
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "deeptrade-quant"
7
- version = "0.8.0"
7
+ version = "0.9.0"
8
8
  description = "LLM-driven A-share (Shanghai/Shenzhen main board) stock screening CLI"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -22,6 +22,7 @@ from deeptrade.core.llm_client import (
22
22
  LLMTransport,
23
23
  LLMTransportError,
24
24
  LLMValidationError,
25
+ MoonshotTransport,
25
26
  OpenAICompatTransport,
26
27
  OpenAIOfficialTransport,
27
28
  RecordedTransport,
@@ -79,6 +80,28 @@ def client(db: Database, transport: RecordedTransport) -> LLMClient:
79
80
  )
80
81
 
81
82
 
83
+ def _text_chunk(content: str | None) -> Any:
84
+ """Fabricate a streaming ChatCompletionChunk with a single delta content
85
+ fragment. Mirrors what openai SDK yields per delta during ``stream=True``.
86
+ """
87
+ from types import SimpleNamespace
88
+
89
+ delta = SimpleNamespace(content=content, role=None)
90
+ choice = SimpleNamespace(delta=delta, index=0, finish_reason=None)
91
+ return SimpleNamespace(choices=[choice], usage=None)
92
+
93
+
94
+ def _final_usage_chunk(*, prompt_tokens: int, completion_tokens: int) -> Any:
95
+ """Final chunk in ``stream_options={'include_usage': True}`` mode: empty
96
+ choices list, populated usage."""
97
+ from types import SimpleNamespace
98
+
99
+ return SimpleNamespace(
100
+ choices=[],
101
+ usage=SimpleNamespace(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens),
102
+ )
103
+
104
+
82
105
  def _ok_response(stage_label: str = "test", n: int = 2) -> LLMResponse:
83
106
  payload = {
84
107
  "stage": stage_label,
@@ -369,16 +392,15 @@ def test_dashscope_transport_sends_enable_thinking_through_chat(
369
392
  """End-to-end wire-shape regression — the kwargs handed to OpenAI's
370
393
  chat.completions.create() must carry `extra_body={"enable_thinking": ...}`
371
394
  for DashScope, even when thinking=False."""
372
- from types import SimpleNamespace
373
-
374
395
  captured: dict[str, Any] = {}
375
396
 
376
397
  def fake_create(**kwargs: Any) -> Any:
377
398
  captured.update(kwargs)
378
- choice = SimpleNamespace(message=SimpleNamespace(content='{"k": 1}'))
379
- return SimpleNamespace(
380
- choices=[choice],
381
- usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5),
399
+ return iter(
400
+ [
401
+ _text_chunk('{"k": 1}'),
402
+ _final_usage_chunk(prompt_tokens=10, completion_tokens=5),
403
+ ]
382
404
  )
383
405
 
384
406
  t = DashScopeTransport(
@@ -398,6 +420,8 @@ def test_dashscope_transport_sends_enable_thinking_through_chat(
398
420
  reasoning_effort="medium",
399
421
  )
400
422
  assert captured["extra_body"] == {"enable_thinking": False}
423
+ assert captured["stream"] is True
424
+ assert captured["stream_options"] == {"include_usage": True}
401
425
 
402
426
 
403
427
  def test_select_transport_class_routes_dashscope_by_base_url() -> None:
@@ -407,6 +431,97 @@ def test_select_transport_class_routes_dashscope_by_base_url() -> None:
407
431
  )
408
432
 
409
433
 
434
+ # ---------------------------------------------------------------------------
435
+ # Moonshot — server-side temperature constraint sanitization
436
+ # ---------------------------------------------------------------------------
437
+
438
+
439
+ def test_base_transport_adjust_temperature_is_identity() -> None:
440
+ """Default hook MUST NOT alter temperature — every non-Moonshot transport
441
+ relies on this. If this regresses, DashScope / DeepSeek / OpenAI / … will
442
+ silently start sending different temperatures than the caller requested.
443
+ """
444
+ t = GenericOpenAITransport(api_key="dummy", base_url="https://api.deepseek.com", timeout=10)
445
+ assert t._adjust_temperature(model="deepseek-chat", temperature=0.0) == 0.0
446
+ assert t._adjust_temperature(model="deepseek-chat", temperature=0.7) == 0.7
447
+ assert t._adjust_temperature(model="anything", temperature=1.5) == 1.5
448
+
449
+
450
+ def test_moonshot_transport_forces_temperature_for_reasoning_variants() -> None:
451
+ """Kimi K2 reasoning variants only accept ``temperature == <forced>`` on
452
+ the wire; any other value returns HTTP 400. The transport must clamp to
453
+ the forced value regardless of what the StageProfile asks for.
454
+ """
455
+ t = MoonshotTransport(api_key="dummy", base_url="https://api.moonshot.cn/v1", timeout=10)
456
+ # forced to 1.0
457
+ assert t._adjust_temperature(model="kimi-k2.6", temperature=0.2) == 1.0
458
+ assert t._adjust_temperature(model="kimi-k2.6-1106", temperature=0.1) == 1.0
459
+ assert t._adjust_temperature(model="kimi-k2-thinking", temperature=0.0) == 1.0
460
+ assert t._adjust_temperature(model="kimi-k2-thinking-128k", temperature=0.5) == 1.0
461
+ assert t._adjust_temperature(model="kimi-k2.5", temperature=0.2) == 1.0
462
+ # forced to 0.6
463
+ assert t._adjust_temperature(model="kimi-for-coding", temperature=0.0) == 0.6
464
+ # no-op when caller already supplied the forced value
465
+ assert t._adjust_temperature(model="kimi-k2.6", temperature=1.0) == 1.0
466
+
467
+
468
+ def test_moonshot_transport_clamps_non_reasoning_to_one() -> None:
469
+ """Non-reasoning Moonshot models accept [0, 1]; values above 1 also 400.
470
+ Pass through inside the range; clamp above."""
471
+ t = MoonshotTransport(api_key="dummy", base_url="https://api.moonshot.cn/v1", timeout=10)
472
+ assert t._adjust_temperature(model="moonshot-v1-32k", temperature=0.1) == 0.1
473
+ assert t._adjust_temperature(model="kimi-k2-instruct-0905", temperature=0.2) == 0.2
474
+ assert t._adjust_temperature(model="moonshot-v1-32k", temperature=1.0) == 1.0
475
+ assert t._adjust_temperature(model="moonshot-v1-32k", temperature=1.5) == 1.0
476
+
477
+
478
+ def test_moonshot_transport_sends_forced_temperature_on_wire(
479
+ monkeypatch: pytest.MonkeyPatch,
480
+ ) -> None:
481
+ """End-to-end wire-shape regression: chat() composes kwargs with the
482
+ *adjusted* temperature, not the caller's original value."""
483
+ captured: dict[str, Any] = {}
484
+
485
+ def fake_create(**kwargs: Any) -> Any:
486
+ captured.update(kwargs)
487
+ return iter(
488
+ [
489
+ _text_chunk('{"k": 1}'),
490
+ _final_usage_chunk(prompt_tokens=1, completion_tokens=1),
491
+ ]
492
+ )
493
+
494
+ t = MoonshotTransport(api_key="dummy", base_url="https://api.moonshot.cn/v1", timeout=10)
495
+ monkeypatch.setattr(t._client.chat.completions, "create", fake_create)
496
+ t.chat(
497
+ model="kimi-k2.6",
498
+ system="s",
499
+ user="u",
500
+ temperature=0.2,
501
+ max_tokens=64,
502
+ thinking=False,
503
+ reasoning_effort="",
504
+ )
505
+ assert captured["temperature"] == 1.0
506
+ assert captured["stream"] is True
507
+ assert captured["stream_options"] == {"include_usage": True}
508
+
509
+
510
+ def test_select_transport_class_routes_moonshot() -> None:
511
+ """``api.moonshot.cn`` (with or without ``/v1``) routes to MoonshotTransport
512
+ via substring match, same pattern as the other entries in the routing table.
513
+ """
514
+ assert _select_transport_class("https://api.moonshot.cn/v1") is MoonshotTransport
515
+ assert _select_transport_class("https://api.moonshot.cn") is MoonshotTransport
516
+
517
+
518
+ def test_moonshot_transport_inherits_reasoning_effort_default() -> None:
519
+ """Moonshot does not document support for the ``reasoning_effort`` field;
520
+ it inherits the base-class default (False) — confirm we didn't accidentally
521
+ flip it on along with adding the transport."""
522
+ assert MoonshotTransport.supports_reasoning_effort is False
523
+
524
+
410
525
  # ---------------------------------------------------------------------------
411
526
  # v0.6 H5 — reasoning_effort gating
412
527
  # ---------------------------------------------------------------------------
@@ -432,16 +547,15 @@ def test_generic_transport_drops_reasoning_effort(monkeypatch: pytest.MonkeyPatc
432
547
  """Even when the caller's StageProfile sets ``reasoning_effort='high'``,
433
548
  a Generic (non-OpenAI) transport must NOT send the field — most Chinese
434
549
  OpenAI-compat providers either ignore or 400 on it."""
435
- from types import SimpleNamespace
436
-
437
550
  captured: dict[str, Any] = {}
438
551
 
439
552
  def fake_create(**kwargs: Any) -> Any:
440
553
  captured.update(kwargs)
441
- choice = SimpleNamespace(message=SimpleNamespace(content='{"k": 1}'))
442
- return SimpleNamespace(
443
- choices=[choice],
444
- usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1),
554
+ return iter(
555
+ [
556
+ _text_chunk('{"k": 1}'),
557
+ _final_usage_chunk(prompt_tokens=1, completion_tokens=1),
558
+ ]
445
559
  )
446
560
 
447
561
  t = GenericOpenAITransport(api_key="dummy", base_url="https://api.deepseek.com", timeout=10)
@@ -458,6 +572,8 @@ def test_generic_transport_drops_reasoning_effort(monkeypatch: pytest.MonkeyPatc
458
572
  assert "reasoning_effort" not in captured, (
459
573
  "GenericOpenAITransport must not forward reasoning_effort even when the caller sets it"
460
574
  )
575
+ assert captured["stream"] is True
576
+ assert captured["stream_options"] == {"include_usage": True}
461
577
 
462
578
 
463
579
  def test_openai_official_transport_sends_reasoning_effort(
@@ -465,16 +581,15 @@ def test_openai_official_transport_sends_reasoning_effort(
465
581
  ) -> None:
466
582
  """The official OpenAI transport forwards ``reasoning_effort`` when the
467
583
  caller's StageProfile supplies a non-empty value."""
468
- from types import SimpleNamespace
469
-
470
584
  captured: dict[str, Any] = {}
471
585
 
472
586
  def fake_create(**kwargs: Any) -> Any:
473
587
  captured.update(kwargs)
474
- choice = SimpleNamespace(message=SimpleNamespace(content='{"k": 1}'))
475
- return SimpleNamespace(
476
- choices=[choice],
477
- usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1),
588
+ return iter(
589
+ [
590
+ _text_chunk('{"k": 1}'),
591
+ _final_usage_chunk(prompt_tokens=1, completion_tokens=1),
592
+ ]
478
593
  )
479
594
 
480
595
  t = OpenAIOfficialTransport(api_key="dummy", base_url="https://api.openai.com/v1", timeout=10)
@@ -489,6 +604,8 @@ def test_openai_official_transport_sends_reasoning_effort(
489
604
  reasoning_effort="medium",
490
605
  )
491
606
  assert captured.get("reasoning_effort") == "medium"
607
+ assert captured["stream"] is True
608
+ assert captured["stream_options"] == {"include_usage": True}
492
609
 
493
610
 
494
611
  def test_openai_official_transport_drops_empty_reasoning_effort(
@@ -496,16 +613,15 @@ def test_openai_official_transport_drops_empty_reasoning_effort(
496
613
  ) -> None:
497
614
  """An empty ``reasoning_effort`` (caller declined to set one) is dropped
498
615
  even on the official transport — sending an empty string would 400."""
499
- from types import SimpleNamespace
500
-
501
616
  captured: dict[str, Any] = {}
502
617
 
503
618
  def fake_create(**kwargs: Any) -> Any:
504
619
  captured.update(kwargs)
505
- choice = SimpleNamespace(message=SimpleNamespace(content='{"k": 1}'))
506
- return SimpleNamespace(
507
- choices=[choice],
508
- usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1),
620
+ return iter(
621
+ [
622
+ _text_chunk('{"k": 1}'),
623
+ _final_usage_chunk(prompt_tokens=1, completion_tokens=1),
624
+ ]
509
625
  )
510
626
 
511
627
  t = OpenAIOfficialTransport(api_key="dummy", base_url="https://api.openai.com/v1", timeout=10)
@@ -520,6 +636,8 @@ def test_openai_official_transport_drops_empty_reasoning_effort(
520
636
  reasoning_effort="",
521
637
  )
522
638
  assert "reasoning_effort" not in captured
639
+ assert captured["stream"] is True
640
+ assert captured["stream_options"] == {"include_usage": True}
523
641
 
524
642
 
525
643
  def test_select_transport_class_defaults_to_generic() -> None:
@@ -532,4 +650,4 @@ def test_select_transport_class_defaults_to_generic() -> None:
532
650
  actually reaches the wire; that case is covered separately below.
533
651
  """
534
652
  assert _select_transport_class("https://api.deepseek.com") is GenericOpenAITransport
535
- assert _select_transport_class("https://api.moonshot.cn/v1") is GenericOpenAITransport
653
+ assert _select_transport_class("https://openrouter.ai/api/v1") is GenericOpenAITransport
@@ -0,0 +1,187 @@
1
+ """v0.9 — OpenAICompatTransport streaming wire-shape regression.
2
+
3
+ The transport switched from ``stream=False`` (single blocking response) to
4
+ ``stream=True`` + ``stream_options={"include_usage": True}`` to dodge the
5
+ intermediate-gateway idle-timeout that killed long Moonshot-thinking calls.
6
+ These tests pin down:
7
+
8
+ * chunk concatenation + final-chunk usage pickup
9
+ * empty content (thinking model burned the budget) returns ``text=""``
10
+ so the upper layer can raise ``LLMEmptyResponseError`` itself
11
+ * transport errors during create() *and* mid-iteration both surface
12
+ as ``LLMTransportError`` (tenacity retries them)
13
+ * missing usage on the final chunk records 0/0 rather than crashing
14
+
15
+ Plugin / audit / retry layers are unaffected and tested elsewhere.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from types import SimpleNamespace
21
+ from typing import Any
22
+ from unittest.mock import MagicMock
23
+
24
+ import pytest
25
+ from openai import APITimeoutError
26
+
27
+ from deeptrade.core.llm_client import (
28
+ GenericOpenAITransport,
29
+ LLMTransportError,
30
+ )
31
+
32
+
33
+ def _text_chunk(content: str | None) -> Any:
34
+ delta = SimpleNamespace(content=content, role=None)
35
+ choice = SimpleNamespace(delta=delta, index=0, finish_reason=None)
36
+ return SimpleNamespace(choices=[choice], usage=None)
37
+
38
+
39
+ def _final_usage_chunk(*, prompt_tokens: int, completion_tokens: int) -> Any:
40
+ return SimpleNamespace(
41
+ choices=[],
42
+ usage=SimpleNamespace(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens),
43
+ )
44
+
45
+
46
+ def _make_transport(stream_chunks: list[Any]) -> GenericOpenAITransport:
47
+ """Build a GenericOpenAITransport whose openai client returns the given
48
+ chunk sequence on chat.completions.create(). Bypasses __init__ so we
49
+ don't construct a real OpenAI client (no API key required)."""
50
+ t = GenericOpenAITransport.__new__(GenericOpenAITransport)
51
+ t._client = MagicMock()
52
+ t._client.chat.completions.create.return_value = iter(stream_chunks)
53
+ return t
54
+
55
+
56
+ class TestStreamingHappyPath:
57
+ def test_concatenates_chunks_and_picks_up_final_usage(self) -> None:
58
+ t = _make_transport(
59
+ [
60
+ _text_chunk(None), # role-only opener
61
+ _text_chunk('{"items":'),
62
+ _text_chunk('[{"code":"000001","score":7}]'),
63
+ _text_chunk("}"),
64
+ _final_usage_chunk(prompt_tokens=50, completion_tokens=20),
65
+ ]
66
+ )
67
+ resp = t.chat(
68
+ model="m",
69
+ system="s",
70
+ user="u",
71
+ temperature=1.0,
72
+ max_tokens=512,
73
+ thinking=False,
74
+ reasoning_effort="medium",
75
+ )
76
+ assert resp.text == '{"items":[{"code":"000001","score":7}]}'
77
+ assert resp.input_tokens == 50
78
+ assert resp.output_tokens == 20
79
+
80
+ def test_passes_stream_true_and_include_usage(self) -> None:
81
+ t = _make_transport([_final_usage_chunk(prompt_tokens=1, completion_tokens=1)])
82
+ t.chat(
83
+ model="m",
84
+ system="s",
85
+ user="u",
86
+ temperature=1.0,
87
+ max_tokens=64,
88
+ thinking=False,
89
+ reasoning_effort="medium",
90
+ )
91
+ kwargs = t._client.chat.completions.create.call_args.kwargs
92
+ assert kwargs["stream"] is True
93
+ assert kwargs["stream_options"] == {"include_usage": True}
94
+ # M3 hard constraint — no tools, ever.
95
+ assert "tools" not in kwargs
96
+ assert "tool_choice" not in kwargs
97
+ assert "functions" not in kwargs
98
+
99
+
100
+ class TestStreamingEmptyContent:
101
+ def test_no_content_chunks_yields_empty_text(self) -> None:
102
+ """Thinking model burns its budget on internal reasoning before
103
+ emitting any visible content. The transport itself does NOT raise —
104
+ it honestly returns ``text=""``; ``LLMClient._with_retry`` is what
105
+ promotes that into ``LLMEmptyResponseError``."""
106
+ t = _make_transport(
107
+ [
108
+ _text_chunk(None), # role-only
109
+ _final_usage_chunk(prompt_tokens=50, completion_tokens=2048),
110
+ ]
111
+ )
112
+ resp = t.chat(
113
+ model="m",
114
+ system="s",
115
+ user="u",
116
+ temperature=1.0,
117
+ max_tokens=2048,
118
+ thinking=False,
119
+ reasoning_effort="medium",
120
+ )
121
+ assert resp.text == ""
122
+ assert resp.output_tokens == 2048
123
+
124
+
125
+ class TestStreamingErrors:
126
+ def test_timeout_during_create_wraps_to_LLMTransportError(self) -> None:
127
+ t = GenericOpenAITransport.__new__(GenericOpenAITransport)
128
+ t._client = MagicMock()
129
+ t._client.chat.completions.create.side_effect = APITimeoutError(request=MagicMock())
130
+ with pytest.raises(LLMTransportError):
131
+ t.chat(
132
+ model="m",
133
+ system="s",
134
+ user="u",
135
+ temperature=1.0,
136
+ max_tokens=64,
137
+ thinking=False,
138
+ reasoning_effort="medium",
139
+ )
140
+
141
+ def test_timeout_during_iteration_wraps_to_LLMTransportError(self) -> None:
142
+ """Errors mid-stream (connection reset after headers, gateway drop
143
+ between chunks) must also surface as LLMTransportError so tenacity
144
+ retries — otherwise the partial bytes leak as an opaque exception."""
145
+
146
+ def raising_iter() -> Any:
147
+ yield _text_chunk('{"items":[')
148
+ raise APITimeoutError(request=MagicMock())
149
+
150
+ t = GenericOpenAITransport.__new__(GenericOpenAITransport)
151
+ t._client = MagicMock()
152
+ t._client.chat.completions.create.return_value = raising_iter()
153
+ with pytest.raises(LLMTransportError):
154
+ t.chat(
155
+ model="m",
156
+ system="s",
157
+ user="u",
158
+ temperature=1.0,
159
+ max_tokens=64,
160
+ thinking=False,
161
+ reasoning_effort="medium",
162
+ )
163
+
164
+
165
+ class TestStreamingUsageMissing:
166
+ def test_missing_usage_records_zero_not_raise(self) -> None:
167
+ """In-scope providers all populate usage on the final chunk when
168
+ ``include_usage`` is set, but the transport must not crash if a
169
+ provider omits it — it just records 0/0 and lets the call return."""
170
+ t = _make_transport(
171
+ [
172
+ _text_chunk("ok"),
173
+ SimpleNamespace(choices=[], usage=None), # final chunk, no usage
174
+ ]
175
+ )
176
+ resp = t.chat(
177
+ model="m",
178
+ system="s",
179
+ user="u",
180
+ temperature=1.0,
181
+ max_tokens=64,
182
+ thinking=False,
183
+ reasoning_effort="medium",
184
+ )
185
+ assert resp.text == "ok"
186
+ assert resp.input_tokens == 0
187
+ assert resp.output_tokens == 0
File without changes