power-loop 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. llm_client/__init__.py +0 -0
  2. llm_client/capabilities.py +162 -0
  3. llm_client/interface.py +470 -0
  4. llm_client/llm_factory.py +981 -0
  5. llm_client/llm_tooling.py +645 -0
  6. llm_client/llm_utils.py +205 -0
  7. llm_client/multimodal.py +237 -0
  8. llm_client/qwen_image.py +576 -0
  9. llm_client/web_search.py +149 -0
  10. power_loop/__init__.py +326 -0
  11. power_loop/agent/__init__.py +6 -0
  12. power_loop/agent/sink.py +247 -0
  13. power_loop/agent/stateful_loop.py +363 -0
  14. power_loop/agent/system_prompt.py +396 -0
  15. power_loop/agent/types.py +41 -0
  16. power_loop/contracts/__init__.py +132 -0
  17. power_loop/contracts/errors.py +140 -0
  18. power_loop/contracts/event_payloads.py +278 -0
  19. power_loop/contracts/events.py +86 -0
  20. power_loop/contracts/handlers.py +45 -0
  21. power_loop/contracts/hook_contexts.py +265 -0
  22. power_loop/contracts/hooks.py +64 -0
  23. power_loop/contracts/messages.py +90 -0
  24. power_loop/contracts/protocols.py +48 -0
  25. power_loop/contracts/tools.py +56 -0
  26. power_loop/core/agent_context.py +94 -0
  27. power_loop/core/events.py +124 -0
  28. power_loop/core/hooks.py +122 -0
  29. power_loop/core/phase.py +217 -0
  30. power_loop/core/pipeline.py +880 -0
  31. power_loop/core/runner.py +60 -0
  32. power_loop/core/state.py +208 -0
  33. power_loop/runtime/budget.py +179 -0
  34. power_loop/runtime/cancellation.py +127 -0
  35. power_loop/runtime/compact.py +300 -0
  36. power_loop/runtime/env.py +103 -0
  37. power_loop/runtime/memory.py +107 -0
  38. power_loop/runtime/provider.py +176 -0
  39. power_loop/runtime/retry.py +182 -0
  40. power_loop/runtime/session_store.py +636 -0
  41. power_loop/runtime/skills.py +201 -0
  42. power_loop/runtime/spec.py +233 -0
  43. power_loop/runtime/structured.py +225 -0
  44. power_loop/tools/__init__.py +51 -0
  45. power_loop/tools/default_manifest.py +244 -0
  46. power_loop/tools/default_tools.py +766 -0
  47. power_loop/tools/registry.py +162 -0
  48. power_loop/tools/spawn_agent.py +173 -0
  49. power_loop-0.2.0.dist-info/METADATA +632 -0
  50. power_loop-0.2.0.dist-info/RECORD +53 -0
  51. power_loop-0.2.0.dist-info/WHEEL +5 -0
  52. power_loop-0.2.0.dist-info/licenses/LICENSE +21 -0
  53. power_loop-0.2.0.dist-info/top_level.txt +2 -0
llm_client/__init__.py ADDED
File without changes
@@ -0,0 +1,162 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections.abc import Mapping
5
+ from dataclasses import dataclass
6
+ from typing import Any
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class ModelCapabilities:
11
+ provider: str
12
+ model: str
13
+ api_family: str = "chat"
14
+ supports_image_input: bool = False
15
+ supports_pdf_input_chat: bool = False
16
+ supports_pdf_input_responses: bool = False
17
+ supports_data_url: bool = True
18
+ supports_tools: bool = True
19
+ supports_stream: bool = True
20
+
21
+
22
+ def _caps(provider: str, model: str, **kwargs: Any) -> ModelCapabilities:
23
+ return ModelCapabilities(provider=provider, model=model, **kwargs)
24
+
25
+
26
+ CAPABILITY_OVERRIDE_ENV_MAP: dict[str, str] = {
27
+ "OPENAI_COMPAT_SUPPORTS_IMAGE_INPUT": "supports_image_input",
28
+ "OPENAI_COMPAT_SUPPORTS_PDF_INPUT_CHAT": "supports_pdf_input_chat",
29
+ "OPENAI_COMPAT_SUPPORTS_PDF_INPUT_RESPONSES": "supports_pdf_input_responses",
30
+ "OPENAI_COMPAT_SUPPORTS_DATA_URL": "supports_data_url",
31
+ "OPENAI_COMPAT_SUPPORTS_TOOLS": "supports_tools",
32
+ "OPENAI_COMPAT_SUPPORTS_STREAM": "supports_stream",
33
+ }
34
+
35
+
36
+ def _parse_optional_bool(value: Any) -> bool | None:
37
+ if value is None:
38
+ return None
39
+ if isinstance(value, bool):
40
+ return value
41
+ if isinstance(value, (int, float)):
42
+ return bool(value)
43
+ if isinstance(value, str):
44
+ normalized = value.strip().lower()
45
+ if not normalized:
46
+ return None
47
+ if normalized in {"1", "true", "yes", "on"}:
48
+ return True
49
+ if normalized in {"0", "false", "no", "off"}:
50
+ return False
51
+ return None
52
+
53
+
54
+ def capability_overrides_from_env(env: Mapping[str, Any]) -> dict[str, Any]:
55
+ overrides: dict[str, Any] = {}
56
+ for env_name, field_name in CAPABILITY_OVERRIDE_ENV_MAP.items():
57
+ parsed = _parse_optional_bool(env.get(env_name))
58
+ if parsed is not None:
59
+ overrides[field_name] = parsed
60
+ return overrides
61
+
62
+
63
+ PROVIDER_DEFAULTS: dict[str, dict[str, Any]] = {
64
+ "openai": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
65
+ "openai-compatible": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
66
+ "deepseek": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
67
+ "zhipu": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
68
+ "moonshot": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
69
+ "minimax": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
70
+ "dashscope": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
71
+ "volcengine": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
72
+ "google": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
73
+ "anthropic": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
74
+ }
75
+
76
+
77
+ MODEL_PATTERNS: list[tuple[re.Pattern[str], dict[str, Any]]] = [
78
+ (re.compile(r"^(gpt-4o|gpt-4\.1|gpt-4\.5|o1|o3)", re.IGNORECASE), {
79
+ "provider": "openai",
80
+ "supports_image_input": True,
81
+ "supports_pdf_input_responses": True,
82
+ }),
83
+ (re.compile(r"^(qwen3-vl|qwen-vl|qwen2(\.5)?-vl|qwen-vl-max|qwen-vl-plus|qvq|max-vl|internvl|qwen-omni)", re.IGNORECASE), {
84
+ "provider": "dashscope",
85
+ "supports_image_input": True,
86
+ }),
87
+ (re.compile(r"^(glm-4v|glm-4\.1v|glm-4\.5v|glm-4v-plus|glm-4v-thinking|cogvlm)", re.IGNORECASE), {
88
+ "provider": "zhipu",
89
+ "supports_image_input": True,
90
+ }),
91
+ (re.compile(r"^(glm-4|glm-4-plus|glm-4-air|glm-zero-preview)", re.IGNORECASE), {
92
+ "provider": "zhipu",
93
+ }),
94
+ (re.compile(r"^(deepseek-vl|deepseek-vl2)", re.IGNORECASE), {
95
+ "provider": "deepseek",
96
+ "supports_image_input": True,
97
+ }),
98
+ (re.compile(r"^(deepseek-chat|deepseek-reasoner|deepseek-coder|deepseek-r1)", re.IGNORECASE), {
99
+ "provider": "deepseek",
100
+ }),
101
+ (re.compile(r"^(kimi-vl|moonshot-v1-vision|moonshot-vision)", re.IGNORECASE), {
102
+ "provider": "moonshot",
103
+ "supports_image_input": True,
104
+ }),
105
+ (re.compile(r"^(kimi-k2|kimi-latest|moonshot-v1-(8k|32k|128k)|moonshot-kimi)", re.IGNORECASE), {
106
+ "provider": "moonshot",
107
+ }),
108
+ (re.compile(r"^(minimax-vl|minimax-vl-01|abab[\-_]vision|minimax-vision)", re.IGNORECASE), {
109
+ "provider": "minimax",
110
+ "supports_image_input": True,
111
+ }),
112
+ (re.compile(r"^(abab6(\.5)?-chat|minimax-text|minimax-m1)", re.IGNORECASE), {
113
+ "provider": "minimax",
114
+ }),
115
+ (re.compile(r"^(doubao-vision|doubao-1\.5-vision|doubao-seed-vision)", re.IGNORECASE), {
116
+ "provider": "volcengine",
117
+ "supports_image_input": True,
118
+ }),
119
+ (re.compile(r"^(doubao|doubao-1\.5|seed)", re.IGNORECASE), {
120
+ "provider": "volcengine",
121
+ }),
122
+ (re.compile(r"^(gemini|gemini-1\.5|gemini-2\.0)", re.IGNORECASE), {
123
+ "provider": "google",
124
+ "supports_image_input": True,
125
+ "supports_pdf_input_responses": True,
126
+ }),
127
+ (re.compile(r"^(claude-3|claude-3\.5|claude-3\.7|claude-sonnet|claude-opus|claude-4)", re.IGNORECASE), {
128
+ "provider": "anthropic",
129
+ "supports_image_input": True,
130
+ "supports_pdf_input_responses": True,
131
+ }),
132
+ ]
133
+
134
+
135
+ def resolve_model_capabilities(model: str, base_url: str, overrides: dict[str, Any] | None = None) -> ModelCapabilities:
136
+ normalized_model = (model or "").strip()
137
+ provider = "openai-compatible"
138
+ caps = _caps(provider=provider, model=normalized_model, **PROVIDER_DEFAULTS.get(provider, {}))
139
+
140
+ for pattern, payload in MODEL_PATTERNS:
141
+ if pattern.search(normalized_model):
142
+ merged = {
143
+ "provider": payload.get("provider", provider),
144
+ "model": normalized_model,
145
+ "api_family": payload.get("api_family", caps.api_family),
146
+ "supports_image_input": payload.get("supports_image_input", caps.supports_image_input),
147
+ "supports_pdf_input_chat": payload.get("supports_pdf_input_chat", caps.supports_pdf_input_chat),
148
+ "supports_pdf_input_responses": payload.get("supports_pdf_input_responses", caps.supports_pdf_input_responses),
149
+ "supports_data_url": payload.get("supports_data_url", caps.supports_data_url),
150
+ "supports_tools": payload.get("supports_tools", caps.supports_tools),
151
+ "supports_stream": payload.get("supports_stream", caps.supports_stream),
152
+ }
153
+ caps = ModelCapabilities(**merged)
154
+ break
155
+
156
+ if overrides:
157
+ merged = caps.__dict__ | dict(overrides)
158
+ merged.setdefault("provider", caps.provider)
159
+ merged.setdefault("model", normalized_model)
160
+ caps = ModelCapabilities(**merged)
161
+
162
+ return caps
@@ -0,0 +1,470 @@
1
+ from collections.abc import AsyncIterator, Callable
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Literal, Protocol, TypedDict
4
+
5
+ LLMRole = Literal["system", "user", "assistant", "tool"]
6
+ LLMContent = str | list[dict[str, Any]]
7
+
8
+
9
+ class LLMMessage(TypedDict, total=False):
10
+ role: LLMRole
11
+ content: LLMContent
12
+ name: str
13
+ tool_call_id: str
14
+ # OpenAI-compatible: assistant may include tool_calls / function_call when requesting tools.
15
+ tool_calls: Any
16
+ function_call: Any
17
+
18
+
19
+ class LLMToolFunction(TypedDict, total=False):
20
+ name: str
21
+ description: str
22
+ parameters: dict[str, Any]
23
+
24
+
25
+ class LLMTool(TypedDict, total=False):
26
+ # OpenAI-compatible tool schema: {"type":"function","function":{...}}
27
+ type: Literal["function"]
28
+ function: LLMToolFunction
29
+
30
+
31
+ @dataclass
32
+ class LLMRequest:
33
+ """
34
+ Canonical request object for LLM calls.
35
+
36
+ - Put conversation turns in `messages` (no system message here by default)
37
+ - Put system instruction in `system_prompt` (we will prepend it during execution)
38
+ """
39
+
40
+ # Message history. Rule: do NOT include a system message here; use `system_prompt`.
41
+ # Accepts any dict-shaped messages; the loop produces concrete dict[str, Any] sequences.
42
+ messages: list[dict[str, Any]]
43
+ system_prompt: str | None = None
44
+
45
+ # Common generation params (optional; implementation may ignore some)
46
+ model: str | None = None
47
+ temperature: float | None = None
48
+ max_tokens: int | None = None
49
+
50
+ # Structured output helpers
51
+ parse_json: bool = False
52
+
53
+ # Reasoning switch for providers supporting `reason` flag.
54
+ # Default enabled as requested; set to False to disable.
55
+ reason: bool | None = True
56
+
57
+ # Tool calling (OpenAI-compatible). Keep optional to avoid forcing every impl to support it.
58
+ tools: list[dict[str, Any]] | None = None
59
+ tool_choice: Any | None = None
60
+
61
+ # Structured output (M1.3). OpenAI-compatible shape, e.g.::
62
+ #
63
+ # {"type": "json_schema", "json_schema": {"name": ..., "schema": {...}, "strict": true}}
64
+ #
65
+ # Forwarded verbatim to the provider as the ``response_format`` parameter.
66
+ # Producing this dict from a pydantic-style spec is the caller's job
67
+ # (see ``power_loop.runtime.structured.StructuredOutputSpec``).
68
+ response_format: dict[str, Any] | None = None
69
+
70
+ # Provider-specific passthrough
71
+ extra: dict[str, Any] = field(default_factory=dict)
72
+
73
+ def to_messages(self, capabilities: Any = None) -> list[dict[str, Any]]:
74
+ """
75
+ Normalize messages for OpenAI-compatible APIs.
76
+ """
77
+ msgs: list[dict[str, Any]] = []
78
+ if self.system_prompt:
79
+ msgs.append({"role": "system", "content": self.system_prompt})
80
+
81
+ for raw_msg in self.messages or []:
82
+ msg = dict(raw_msg)
83
+ if capabilities is not None and "content" in msg:
84
+ from .multimodal import render_message_content
85
+
86
+ msg["content"] = render_message_content(
87
+ msg.get("content"),
88
+ role=str(msg.get("role") or "user"),
89
+ capabilities=capabilities,
90
+ )
91
+ msgs.append(msg)
92
+ return msgs
93
+
94
+ @classmethod
95
+ def from_prompt(
96
+ cls,
97
+ *,
98
+ prompt: str,
99
+ system_prompt: str | None = None,
100
+ model: str | None = None,
101
+ temperature: float | None = None,
102
+ max_tokens: int | None = None,
103
+ parse_json: bool = False,
104
+ reason: bool | None = True,
105
+ **kwargs: Any,
106
+ ) -> "LLMRequest":
107
+ return cls(
108
+ messages=[{"role": "user", "content": prompt}],
109
+ system_prompt=system_prompt,
110
+ model=model,
111
+ temperature=temperature,
112
+ max_tokens=max_tokens,
113
+ parse_json=parse_json,
114
+ reason=reason,
115
+ extra=dict(kwargs or {}),
116
+ )
117
+
118
+
119
+ @dataclass
120
+ class LLMTokenUsage:
121
+ prompt_tokens: int | None = None
122
+ completion_tokens: int | None = None
123
+ total_tokens: int | None = None
124
+
125
+ prompt_audio_tokens: int | None = None
126
+ prompt_cached_tokens: int | None = None
127
+ prompt_cache_miss_tokens: int | None = None
128
+ prompt_text_tokens: int | None = None
129
+ prompt_image_tokens: int | None = None
130
+
131
+ completion_reasoning_tokens: int | None = None
132
+ completion_audio_tokens: int | None = None
133
+ completion_text_tokens: int | None = None
134
+ completion_image_tokens: int | None = None
135
+
136
+ accepted_prediction_tokens: int | None = None
137
+ rejected_prediction_tokens: int | None = None
138
+
139
+ cached_tokens: int | None = None
140
+ cache_hit_tokens: int | None = None
141
+ cache_miss_tokens: int | None = None
142
+ reasoning_tokens: int | None = None
143
+ accepted_tokens: int | None = None
144
+ rejected_tokens: int | None = None
145
+
146
+ def as_dict(self) -> dict[str, int | None]:
147
+ return dict(self.__dict__)
148
+
149
+ def to_log_str(self) -> str:
150
+ parts = [f"{k}={v}" for k, v in self.__dict__.items() if v is not None]
151
+ return f"LLMTokenUsage({', '.join(parts)})"
152
+
153
+ def __str__(self) -> str:
154
+ return self.to_log_str()
155
+
156
+ def __repr__(self) -> str:
157
+ return self.to_log_str()
158
+
159
+
160
+ @dataclass
161
+ class LLMStreamChunk:
162
+ """
163
+ Streaming chunk event.
164
+ """
165
+
166
+ delta_text: str = ""
167
+ # Best-effort streamed thinking/reasoning text.
168
+ think: str = ""
169
+ # Optional tool/function-call delta payload (provider-specific shape).
170
+ # For OpenAI-compatible streaming this is typically `choices[0].delta.tool_calls`.
171
+ delta_tool_calls: Any = None
172
+ # Aggregated tool calls (best-effort normalized), built by the LLMService streaming implementation.
173
+ # Shape (OpenAI-compatible):
174
+ # [{"id": "...", "type": "function", "function": {"name": "...", "arguments": "..."}}]
175
+ tool_calls: list[dict[str, Any]] = field(default_factory=list)
176
+ # Best-effort usage. Some providers only send usage in the final event.
177
+ token_usage: LLMTokenUsage | None = None
178
+ raw_event: Any = None
179
+ is_final: bool = False
180
+
181
+
182
+ @dataclass
183
+ class LLMResponse:
184
+ """
185
+ Unified response wrapper for LLM calls, with optional JSON parsing.
186
+
187
+ Conceptually we keep 4 layers (some may be empty depending on output):
188
+ - raw_text: extracted message content text
189
+ - raw_json_data: if raw_text can be parsed as a JSON object directly
190
+ - content_text: extracted "business payload" text (typically a JSON substring)
191
+ - json_data: parsed JSON object from content_text (after fallback/repairs)
192
+ """
193
+
194
+ raw_text: str
195
+ # Raw message/completion object references (provider-specific)
196
+ raw_message: Any = None
197
+ raw_completion: Any = None
198
+
199
+ raw_json_data: dict[str, Any] = field(default_factory=dict)
200
+ raw_json_error: str | None = None
201
+
202
+ content_text: str = ""
203
+ think: str = ""
204
+ json_data: dict[str, Any] = field(default_factory=dict)
205
+ token_usage: LLMTokenUsage = field(default_factory=LLMTokenUsage)
206
+ parse_error: str | None = None
207
+ debug: dict[str, Any] = field(default_factory=dict)
208
+ # Best-effort extracted tool calls (if any). Providers may expose this on raw_message/raw_completion.
209
+ tool_calls: list[dict[str, Any]] = field(default_factory=list)
210
+ # Raw streamed chunks captured when complete() is implemented via stream aggregation.
211
+ stream_chunks: list[LLMStreamChunk] = field(default_factory=list)
212
+
213
+ def get_tool_calls(self) -> list[dict[str, Any]]:
214
+ """
215
+ Best-effort accessor for tool calls.
216
+
217
+ Preference order:
218
+ - explicit `self.tool_calls` (if caller filled it)
219
+ - `raw_message.tool_calls` / `raw_message.function_call`
220
+ - `raw_completion.choices[0].message.tool_calls` / `.function_call`
221
+ - `model_dump()` dict fallbacks
222
+ """
223
+
224
+ def _as_dict(obj: Any) -> Any:
225
+ if obj is None:
226
+ return None
227
+ if isinstance(obj, dict):
228
+ return obj
229
+ if hasattr(obj, "model_dump"):
230
+ try:
231
+ return obj.model_dump()
232
+ except Exception:
233
+ return obj
234
+ try:
235
+ return dict(obj.__dict__)
236
+ except Exception:
237
+ return obj
238
+
239
+ def _normalize_tool_calls(obj: Any) -> list[dict[str, Any]]:
240
+ if not obj:
241
+ return []
242
+ if isinstance(obj, list):
243
+ out: list[dict[str, Any]] = []
244
+ for it in obj:
245
+ d = _as_dict(it)
246
+ if isinstance(d, dict):
247
+ out.append(d)
248
+ return out
249
+ d = _as_dict(obj)
250
+ return [d] if isinstance(d, dict) else []
251
+
252
+ def _normalize_function_call(obj: Any) -> list[dict[str, Any]]:
253
+ if not obj:
254
+ return []
255
+ d = _as_dict(obj)
256
+ if not isinstance(d, dict):
257
+ return []
258
+ # legacy shape: {"name": "...", "arguments": "..."}
259
+ return [{"type": "function", "function": {"name": d.get("name"), "arguments": d.get("arguments")}}]
260
+
261
+ if self.tool_calls:
262
+ return list(self.tool_calls)
263
+
264
+ # raw_message direct
265
+ msg = self.raw_message
266
+ tcs = getattr(msg, "tool_calls", None) if msg is not None else None
267
+ if tcs:
268
+ return _normalize_tool_calls(tcs)
269
+ fc = getattr(msg, "function_call", None) if msg is not None else None
270
+ if fc:
271
+ return _normalize_function_call(fc)
272
+
273
+ # raw_completion -> first message
274
+ comp = self.raw_completion
275
+ try:
276
+ msg2 = comp.choices[0].message if comp is not None else None
277
+ tcs2 = getattr(msg2, "tool_calls", None) if msg2 is not None else None
278
+ if tcs2:
279
+ return _normalize_tool_calls(tcs2)
280
+ fc2 = getattr(msg2, "function_call", None) if msg2 is not None else None
281
+ if fc2:
282
+ return _normalize_function_call(fc2)
283
+ except Exception:
284
+ pass
285
+
286
+ # model_dump fallback (raw_message)
287
+ if msg is not None and hasattr(msg, "model_dump"):
288
+ try:
289
+ dumped = msg.model_dump()
290
+ if isinstance(dumped, dict):
291
+ if dumped.get("tool_calls"):
292
+ return _normalize_tool_calls(dumped.get("tool_calls"))
293
+ if dumped.get("function_call"):
294
+ return _normalize_function_call(dumped.get("function_call"))
295
+ except Exception:
296
+ pass
297
+
298
+ # model_dump fallback (raw_completion)
299
+ if comp is not None and hasattr(comp, "model_dump"):
300
+ try:
301
+ dumped = comp.model_dump()
302
+ if isinstance(dumped, dict):
303
+ choices = dumped.get("choices") or []
304
+ if choices and isinstance(choices, list) and isinstance(choices[0], dict):
305
+ msgd = choices[0].get("message") or {}
306
+ if isinstance(msgd, dict):
307
+ if msgd.get("tool_calls"):
308
+ return _normalize_tool_calls(msgd.get("tool_calls"))
309
+ if msgd.get("function_call"):
310
+ return _normalize_function_call(msgd.get("function_call"))
311
+ except Exception:
312
+ pass
313
+
314
+ return []
315
+
316
+ @property
317
+ def ok(self) -> bool:
318
+ return bool(self.json_data) and self.parse_error is None
319
+
320
+ def to_log_str(
321
+ self,
322
+ *,
323
+ max_raw_chars: int = 600,
324
+ max_content_chars: int = 600,
325
+ include_debug: bool = False,
326
+ ) -> str:
327
+ import json as _json
328
+
329
+ def _trunc(s: str, n: int) -> str:
330
+ s = s or ""
331
+ if n <= 0:
332
+ return ""
333
+ if len(s) <= n:
334
+ return s
335
+ return s[: n - 3] + "..."
336
+
337
+ parts = [
338
+ f"ok={self.ok}",
339
+ f"token_usage={self.token_usage.as_dict()}",
340
+ f"has_raw_message={self.raw_message is not None}",
341
+ f"has_raw_completion={self.raw_completion is not None}",
342
+ f"tool_calls={len(self.tool_calls) or len(self.get_tool_calls())}",
343
+ f"raw_json_ok={bool(self.raw_json_data) and self.raw_json_error is None}",
344
+ f"parse_error={self.parse_error!r}",
345
+ f"strategy={self.debug.get('strategy')!r}",
346
+ ]
347
+ raw_preview = _trunc(self.raw_text, max_raw_chars)
348
+ content_preview = _trunc(self.content_text, max_content_chars)
349
+ parts.append(f"raw_text={raw_preview!r}")
350
+ if self.content_text:
351
+ parts.append(f"content_text={content_preview!r}")
352
+ if self.json_data:
353
+ try:
354
+ parts.append(
355
+ "json_data="
356
+ + _json.dumps(self.json_data, ensure_ascii=False, sort_keys=True)[: max_content_chars]
357
+ )
358
+ except Exception:
359
+ parts.append("json_data=<unserializable>")
360
+ if include_debug and self.debug:
361
+ try:
362
+ parts.append("debug=" + _json.dumps(self.debug, ensure_ascii=False, sort_keys=True))
363
+ except Exception:
364
+ parts.append(f"debug={self.debug!r}")
365
+ return "LLMResponse(" + ", ".join(parts) + ")"
366
+
367
+ def __str__(self) -> str:
368
+ return self.to_log_str()
369
+
370
+ def __repr__(self) -> str:
371
+ return self.to_log_str()
372
+
373
+
374
+ class LLMService(Protocol):
375
+ """
376
+ Unified LLM interface (no LangChain dependency).
377
+
378
+ Why this design:
379
+ - There is ONE canonical request shape: `LLMRequest` (messages + optional system_prompt)
380
+ - There is ONE canonical non-streaming call: `complete(request) -> LLMResponse`
381
+ - Streaming uses the same request: `stream(request) -> AsyncIterator[LLMStreamChunk]`
382
+ - Convenience wrappers (`predict`, `chat`, `predict_stream`) are thin sugar over the above.
383
+ """
384
+
385
+ async def complete(
386
+ self,
387
+ request: "LLMRequest",
388
+ *,
389
+ on_chunk_delta_text: Callable[[str], Any] | None = None,
390
+ on_chunk_think: Callable[[str], Any] | None = None,
391
+ on_stream_end: Callable[["LLMResponse"], Any] | None = None,
392
+ ) -> LLMResponse:
393
+ """Preferred API (non-streaming). Can optional receive chunk hooks."""
394
+ ...
395
+
396
+ def stream(self, request: "LLMRequest") -> AsyncIterator["LLMStreamChunk"]:
397
+ """Preferred API (streaming). Returns an async iterator directly (no await needed)."""
398
+ ...
399
+
400
+ async def close(self) -> None:
401
+ """Close underlying HTTP resources, if any."""
402
+ ...
403
+
404
+ # -----------------------
405
+ # Convenience wrappers
406
+ # -----------------------
407
+
408
+ async def predict(
409
+ self,
410
+ prompt: str,
411
+ system_prompt: str | None = None,
412
+ **kwargs: Any,
413
+ ) -> str:
414
+ """
415
+ Convenience: single-turn text completion.
416
+
417
+ Note: this simply builds an `LLMRequest` with one user message.
418
+ """
419
+ req = LLMRequest.from_prompt(prompt=prompt, system_prompt=system_prompt, **kwargs)
420
+ resp = await self.complete(req)
421
+ return (resp.raw_text or "").strip()
422
+
423
+ async def chat(
424
+ self,
425
+ messages: list[dict[str, Any]],
426
+ system_prompt: str | None = None,
427
+ **kwargs: Any,
428
+ ) -> str:
429
+ """
430
+ Convenience: multi-turn chat returning plain text.
431
+
432
+ Rule: do NOT include a system message in `messages`. If you need one, pass `system_prompt`.
433
+ """
434
+ req = LLMRequest(messages=messages, system_prompt=system_prompt, **kwargs)
435
+ resp = await self.complete(req)
436
+ return (resp.raw_text or "").strip()
437
+
438
+ async def predict_stream(
439
+ self,
440
+ prompt: str,
441
+ system_prompt: str | None = None,
442
+ **kwargs: Any,
443
+ ) -> AsyncIterator[str]:
444
+ """
445
+ Convenience: single-turn streaming text.
446
+ """
447
+ req = LLMRequest.from_prompt(prompt=prompt, system_prompt=system_prompt, **kwargs)
448
+ async for chunk in self.stream(req):
449
+ if chunk.delta_text:
450
+ yield chunk.delta_text
451
+
452
+
453
+ @dataclass
454
+ class OpenAICompatibleChatConfig:
455
+ base_url: str
456
+ api_key: str
457
+ model: str
458
+ timeout_s: float = 180.0
459
+ max_tokens: int = 8000
460
+ temperature: float = 0.0
461
+ max_retries: int = 3
462
+ retry_base_delay_s: float = 0.5
463
+ stream_resume_on_error: bool = False
464
+ stream_max_restarts: int = 0
465
+ stream_resume_instruction: str = "继续,从你上次中断的位置继续输出。不要重复已经输出的内容。"
466
+ capability_overrides: dict[str, Any] = field(default_factory=dict)
467
+
468
+ @property
469
+ def is_ready(self) -> bool:
470
+ return bool(self.base_url and self.api_key and self.model)