power-loop 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_client/__init__.py +0 -0
- llm_client/capabilities.py +162 -0
- llm_client/interface.py +470 -0
- llm_client/llm_factory.py +981 -0
- llm_client/llm_tooling.py +645 -0
- llm_client/llm_utils.py +205 -0
- llm_client/multimodal.py +237 -0
- llm_client/qwen_image.py +576 -0
- llm_client/web_search.py +149 -0
- power_loop/__init__.py +326 -0
- power_loop/agent/__init__.py +6 -0
- power_loop/agent/sink.py +247 -0
- power_loop/agent/stateful_loop.py +363 -0
- power_loop/agent/system_prompt.py +396 -0
- power_loop/agent/types.py +41 -0
- power_loop/contracts/__init__.py +132 -0
- power_loop/contracts/errors.py +140 -0
- power_loop/contracts/event_payloads.py +278 -0
- power_loop/contracts/events.py +86 -0
- power_loop/contracts/handlers.py +45 -0
- power_loop/contracts/hook_contexts.py +265 -0
- power_loop/contracts/hooks.py +64 -0
- power_loop/contracts/messages.py +90 -0
- power_loop/contracts/protocols.py +48 -0
- power_loop/contracts/tools.py +56 -0
- power_loop/core/agent_context.py +94 -0
- power_loop/core/events.py +124 -0
- power_loop/core/hooks.py +122 -0
- power_loop/core/phase.py +217 -0
- power_loop/core/pipeline.py +880 -0
- power_loop/core/runner.py +60 -0
- power_loop/core/state.py +208 -0
- power_loop/runtime/budget.py +179 -0
- power_loop/runtime/cancellation.py +127 -0
- power_loop/runtime/compact.py +300 -0
- power_loop/runtime/env.py +103 -0
- power_loop/runtime/memory.py +107 -0
- power_loop/runtime/provider.py +176 -0
- power_loop/runtime/retry.py +182 -0
- power_loop/runtime/session_store.py +636 -0
- power_loop/runtime/skills.py +201 -0
- power_loop/runtime/spec.py +233 -0
- power_loop/runtime/structured.py +225 -0
- power_loop/tools/__init__.py +51 -0
- power_loop/tools/default_manifest.py +244 -0
- power_loop/tools/default_tools.py +766 -0
- power_loop/tools/registry.py +162 -0
- power_loop/tools/spawn_agent.py +173 -0
- power_loop-0.2.0.dist-info/METADATA +632 -0
- power_loop-0.2.0.dist-info/RECORD +53 -0
- power_loop-0.2.0.dist-info/WHEEL +5 -0
- power_loop-0.2.0.dist-info/licenses/LICENSE +21 -0
- power_loop-0.2.0.dist-info/top_level.txt +2 -0
llm_client/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class ModelCapabilities:
|
|
11
|
+
provider: str
|
|
12
|
+
model: str
|
|
13
|
+
api_family: str = "chat"
|
|
14
|
+
supports_image_input: bool = False
|
|
15
|
+
supports_pdf_input_chat: bool = False
|
|
16
|
+
supports_pdf_input_responses: bool = False
|
|
17
|
+
supports_data_url: bool = True
|
|
18
|
+
supports_tools: bool = True
|
|
19
|
+
supports_stream: bool = True
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _caps(provider: str, model: str, **kwargs: Any) -> ModelCapabilities:
|
|
23
|
+
return ModelCapabilities(provider=provider, model=model, **kwargs)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
CAPABILITY_OVERRIDE_ENV_MAP: dict[str, str] = {
|
|
27
|
+
"OPENAI_COMPAT_SUPPORTS_IMAGE_INPUT": "supports_image_input",
|
|
28
|
+
"OPENAI_COMPAT_SUPPORTS_PDF_INPUT_CHAT": "supports_pdf_input_chat",
|
|
29
|
+
"OPENAI_COMPAT_SUPPORTS_PDF_INPUT_RESPONSES": "supports_pdf_input_responses",
|
|
30
|
+
"OPENAI_COMPAT_SUPPORTS_DATA_URL": "supports_data_url",
|
|
31
|
+
"OPENAI_COMPAT_SUPPORTS_TOOLS": "supports_tools",
|
|
32
|
+
"OPENAI_COMPAT_SUPPORTS_STREAM": "supports_stream",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _parse_optional_bool(value: Any) -> bool | None:
|
|
37
|
+
if value is None:
|
|
38
|
+
return None
|
|
39
|
+
if isinstance(value, bool):
|
|
40
|
+
return value
|
|
41
|
+
if isinstance(value, (int, float)):
|
|
42
|
+
return bool(value)
|
|
43
|
+
if isinstance(value, str):
|
|
44
|
+
normalized = value.strip().lower()
|
|
45
|
+
if not normalized:
|
|
46
|
+
return None
|
|
47
|
+
if normalized in {"1", "true", "yes", "on"}:
|
|
48
|
+
return True
|
|
49
|
+
if normalized in {"0", "false", "no", "off"}:
|
|
50
|
+
return False
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def capability_overrides_from_env(env: Mapping[str, Any]) -> dict[str, Any]:
|
|
55
|
+
overrides: dict[str, Any] = {}
|
|
56
|
+
for env_name, field_name in CAPABILITY_OVERRIDE_ENV_MAP.items():
|
|
57
|
+
parsed = _parse_optional_bool(env.get(env_name))
|
|
58
|
+
if parsed is not None:
|
|
59
|
+
overrides[field_name] = parsed
|
|
60
|
+
return overrides
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
PROVIDER_DEFAULTS: dict[str, dict[str, Any]] = {
|
|
64
|
+
"openai": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
65
|
+
"openai-compatible": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
66
|
+
"deepseek": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
67
|
+
"zhipu": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
68
|
+
"moonshot": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
69
|
+
"minimax": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
70
|
+
"dashscope": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
71
|
+
"volcengine": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
72
|
+
"google": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
73
|
+
"anthropic": {"api_family": "chat", "supports_data_url": True, "supports_tools": True, "supports_stream": True},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
MODEL_PATTERNS: list[tuple[re.Pattern[str], dict[str, Any]]] = [
|
|
78
|
+
(re.compile(r"^(gpt-4o|gpt-4\.1|gpt-4\.5|o1|o3)", re.IGNORECASE), {
|
|
79
|
+
"provider": "openai",
|
|
80
|
+
"supports_image_input": True,
|
|
81
|
+
"supports_pdf_input_responses": True,
|
|
82
|
+
}),
|
|
83
|
+
(re.compile(r"^(qwen3-vl|qwen-vl|qwen2(\.5)?-vl|qwen-vl-max|qwen-vl-plus|qvq|max-vl|internvl|qwen-omni)", re.IGNORECASE), {
|
|
84
|
+
"provider": "dashscope",
|
|
85
|
+
"supports_image_input": True,
|
|
86
|
+
}),
|
|
87
|
+
(re.compile(r"^(glm-4v|glm-4\.1v|glm-4\.5v|glm-4v-plus|glm-4v-thinking|cogvlm)", re.IGNORECASE), {
|
|
88
|
+
"provider": "zhipu",
|
|
89
|
+
"supports_image_input": True,
|
|
90
|
+
}),
|
|
91
|
+
(re.compile(r"^(glm-4|glm-4-plus|glm-4-air|glm-zero-preview)", re.IGNORECASE), {
|
|
92
|
+
"provider": "zhipu",
|
|
93
|
+
}),
|
|
94
|
+
(re.compile(r"^(deepseek-vl|deepseek-vl2)", re.IGNORECASE), {
|
|
95
|
+
"provider": "deepseek",
|
|
96
|
+
"supports_image_input": True,
|
|
97
|
+
}),
|
|
98
|
+
(re.compile(r"^(deepseek-chat|deepseek-reasoner|deepseek-coder|deepseek-r1)", re.IGNORECASE), {
|
|
99
|
+
"provider": "deepseek",
|
|
100
|
+
}),
|
|
101
|
+
(re.compile(r"^(kimi-vl|moonshot-v1-vision|moonshot-vision)", re.IGNORECASE), {
|
|
102
|
+
"provider": "moonshot",
|
|
103
|
+
"supports_image_input": True,
|
|
104
|
+
}),
|
|
105
|
+
(re.compile(r"^(kimi-k2|kimi-latest|moonshot-v1-(8k|32k|128k)|moonshot-kimi)", re.IGNORECASE), {
|
|
106
|
+
"provider": "moonshot",
|
|
107
|
+
}),
|
|
108
|
+
(re.compile(r"^(minimax-vl|minimax-vl-01|abab[\-_]vision|minimax-vision)", re.IGNORECASE), {
|
|
109
|
+
"provider": "minimax",
|
|
110
|
+
"supports_image_input": True,
|
|
111
|
+
}),
|
|
112
|
+
(re.compile(r"^(abab6(\.5)?-chat|minimax-text|minimax-m1)", re.IGNORECASE), {
|
|
113
|
+
"provider": "minimax",
|
|
114
|
+
}),
|
|
115
|
+
(re.compile(r"^(doubao-vision|doubao-1\.5-vision|doubao-seed-vision)", re.IGNORECASE), {
|
|
116
|
+
"provider": "volcengine",
|
|
117
|
+
"supports_image_input": True,
|
|
118
|
+
}),
|
|
119
|
+
(re.compile(r"^(doubao|doubao-1\.5|seed)", re.IGNORECASE), {
|
|
120
|
+
"provider": "volcengine",
|
|
121
|
+
}),
|
|
122
|
+
(re.compile(r"^(gemini|gemini-1\.5|gemini-2\.0)", re.IGNORECASE), {
|
|
123
|
+
"provider": "google",
|
|
124
|
+
"supports_image_input": True,
|
|
125
|
+
"supports_pdf_input_responses": True,
|
|
126
|
+
}),
|
|
127
|
+
(re.compile(r"^(claude-3|claude-3\.5|claude-3\.7|claude-sonnet|claude-opus|claude-4)", re.IGNORECASE), {
|
|
128
|
+
"provider": "anthropic",
|
|
129
|
+
"supports_image_input": True,
|
|
130
|
+
"supports_pdf_input_responses": True,
|
|
131
|
+
}),
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def resolve_model_capabilities(model: str, base_url: str, overrides: dict[str, Any] | None = None) -> ModelCapabilities:
|
|
136
|
+
normalized_model = (model or "").strip()
|
|
137
|
+
provider = "openai-compatible"
|
|
138
|
+
caps = _caps(provider=provider, model=normalized_model, **PROVIDER_DEFAULTS.get(provider, {}))
|
|
139
|
+
|
|
140
|
+
for pattern, payload in MODEL_PATTERNS:
|
|
141
|
+
if pattern.search(normalized_model):
|
|
142
|
+
merged = {
|
|
143
|
+
"provider": payload.get("provider", provider),
|
|
144
|
+
"model": normalized_model,
|
|
145
|
+
"api_family": payload.get("api_family", caps.api_family),
|
|
146
|
+
"supports_image_input": payload.get("supports_image_input", caps.supports_image_input),
|
|
147
|
+
"supports_pdf_input_chat": payload.get("supports_pdf_input_chat", caps.supports_pdf_input_chat),
|
|
148
|
+
"supports_pdf_input_responses": payload.get("supports_pdf_input_responses", caps.supports_pdf_input_responses),
|
|
149
|
+
"supports_data_url": payload.get("supports_data_url", caps.supports_data_url),
|
|
150
|
+
"supports_tools": payload.get("supports_tools", caps.supports_tools),
|
|
151
|
+
"supports_stream": payload.get("supports_stream", caps.supports_stream),
|
|
152
|
+
}
|
|
153
|
+
caps = ModelCapabilities(**merged)
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
if overrides:
|
|
157
|
+
merged = caps.__dict__ | dict(overrides)
|
|
158
|
+
merged.setdefault("provider", caps.provider)
|
|
159
|
+
merged.setdefault("model", normalized_model)
|
|
160
|
+
caps = ModelCapabilities(**merged)
|
|
161
|
+
|
|
162
|
+
return caps
|
llm_client/interface.py
ADDED
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
from collections.abc import AsyncIterator, Callable
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Any, Literal, Protocol, TypedDict
|
|
4
|
+
|
|
5
|
+
LLMRole = Literal["system", "user", "assistant", "tool"]
|
|
6
|
+
LLMContent = str | list[dict[str, Any]]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LLMMessage(TypedDict, total=False):
|
|
10
|
+
role: LLMRole
|
|
11
|
+
content: LLMContent
|
|
12
|
+
name: str
|
|
13
|
+
tool_call_id: str
|
|
14
|
+
# OpenAI-compatible: assistant may include tool_calls / function_call when requesting tools.
|
|
15
|
+
tool_calls: Any
|
|
16
|
+
function_call: Any
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LLMToolFunction(TypedDict, total=False):
|
|
20
|
+
name: str
|
|
21
|
+
description: str
|
|
22
|
+
parameters: dict[str, Any]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LLMTool(TypedDict, total=False):
|
|
26
|
+
# OpenAI-compatible tool schema: {"type":"function","function":{...}}
|
|
27
|
+
type: Literal["function"]
|
|
28
|
+
function: LLMToolFunction
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class LLMRequest:
|
|
33
|
+
"""
|
|
34
|
+
Canonical request object for LLM calls.
|
|
35
|
+
|
|
36
|
+
- Put conversation turns in `messages` (no system message here by default)
|
|
37
|
+
- Put system instruction in `system_prompt` (we will prepend it during execution)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# Message history. Rule: do NOT include a system message here; use `system_prompt`.
|
|
41
|
+
# Accepts any dict-shaped messages; the loop produces concrete dict[str, Any] sequences.
|
|
42
|
+
messages: list[dict[str, Any]]
|
|
43
|
+
system_prompt: str | None = None
|
|
44
|
+
|
|
45
|
+
# Common generation params (optional; implementation may ignore some)
|
|
46
|
+
model: str | None = None
|
|
47
|
+
temperature: float | None = None
|
|
48
|
+
max_tokens: int | None = None
|
|
49
|
+
|
|
50
|
+
# Structured output helpers
|
|
51
|
+
parse_json: bool = False
|
|
52
|
+
|
|
53
|
+
# Reasoning switch for providers supporting `reason` flag.
|
|
54
|
+
# Default enabled as requested; set to False to disable.
|
|
55
|
+
reason: bool | None = True
|
|
56
|
+
|
|
57
|
+
# Tool calling (OpenAI-compatible). Keep optional to avoid forcing every impl to support it.
|
|
58
|
+
tools: list[dict[str, Any]] | None = None
|
|
59
|
+
tool_choice: Any | None = None
|
|
60
|
+
|
|
61
|
+
# Structured output (M1.3). OpenAI-compatible shape, e.g.::
|
|
62
|
+
#
|
|
63
|
+
# {"type": "json_schema", "json_schema": {"name": ..., "schema": {...}, "strict": true}}
|
|
64
|
+
#
|
|
65
|
+
# Forwarded verbatim to the provider as the ``response_format`` parameter.
|
|
66
|
+
# Producing this dict from a pydantic-style spec is the caller's job
|
|
67
|
+
# (see ``power_loop.runtime.structured.StructuredOutputSpec``).
|
|
68
|
+
response_format: dict[str, Any] | None = None
|
|
69
|
+
|
|
70
|
+
# Provider-specific passthrough
|
|
71
|
+
extra: dict[str, Any] = field(default_factory=dict)
|
|
72
|
+
|
|
73
|
+
def to_messages(self, capabilities: Any = None) -> list[dict[str, Any]]:
|
|
74
|
+
"""
|
|
75
|
+
Normalize messages for OpenAI-compatible APIs.
|
|
76
|
+
"""
|
|
77
|
+
msgs: list[dict[str, Any]] = []
|
|
78
|
+
if self.system_prompt:
|
|
79
|
+
msgs.append({"role": "system", "content": self.system_prompt})
|
|
80
|
+
|
|
81
|
+
for raw_msg in self.messages or []:
|
|
82
|
+
msg = dict(raw_msg)
|
|
83
|
+
if capabilities is not None and "content" in msg:
|
|
84
|
+
from .multimodal import render_message_content
|
|
85
|
+
|
|
86
|
+
msg["content"] = render_message_content(
|
|
87
|
+
msg.get("content"),
|
|
88
|
+
role=str(msg.get("role") or "user"),
|
|
89
|
+
capabilities=capabilities,
|
|
90
|
+
)
|
|
91
|
+
msgs.append(msg)
|
|
92
|
+
return msgs
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_prompt(
|
|
96
|
+
cls,
|
|
97
|
+
*,
|
|
98
|
+
prompt: str,
|
|
99
|
+
system_prompt: str | None = None,
|
|
100
|
+
model: str | None = None,
|
|
101
|
+
temperature: float | None = None,
|
|
102
|
+
max_tokens: int | None = None,
|
|
103
|
+
parse_json: bool = False,
|
|
104
|
+
reason: bool | None = True,
|
|
105
|
+
**kwargs: Any,
|
|
106
|
+
) -> "LLMRequest":
|
|
107
|
+
return cls(
|
|
108
|
+
messages=[{"role": "user", "content": prompt}],
|
|
109
|
+
system_prompt=system_prompt,
|
|
110
|
+
model=model,
|
|
111
|
+
temperature=temperature,
|
|
112
|
+
max_tokens=max_tokens,
|
|
113
|
+
parse_json=parse_json,
|
|
114
|
+
reason=reason,
|
|
115
|
+
extra=dict(kwargs or {}),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class LLMTokenUsage:
|
|
121
|
+
prompt_tokens: int | None = None
|
|
122
|
+
completion_tokens: int | None = None
|
|
123
|
+
total_tokens: int | None = None
|
|
124
|
+
|
|
125
|
+
prompt_audio_tokens: int | None = None
|
|
126
|
+
prompt_cached_tokens: int | None = None
|
|
127
|
+
prompt_cache_miss_tokens: int | None = None
|
|
128
|
+
prompt_text_tokens: int | None = None
|
|
129
|
+
prompt_image_tokens: int | None = None
|
|
130
|
+
|
|
131
|
+
completion_reasoning_tokens: int | None = None
|
|
132
|
+
completion_audio_tokens: int | None = None
|
|
133
|
+
completion_text_tokens: int | None = None
|
|
134
|
+
completion_image_tokens: int | None = None
|
|
135
|
+
|
|
136
|
+
accepted_prediction_tokens: int | None = None
|
|
137
|
+
rejected_prediction_tokens: int | None = None
|
|
138
|
+
|
|
139
|
+
cached_tokens: int | None = None
|
|
140
|
+
cache_hit_tokens: int | None = None
|
|
141
|
+
cache_miss_tokens: int | None = None
|
|
142
|
+
reasoning_tokens: int | None = None
|
|
143
|
+
accepted_tokens: int | None = None
|
|
144
|
+
rejected_tokens: int | None = None
|
|
145
|
+
|
|
146
|
+
def as_dict(self) -> dict[str, int | None]:
|
|
147
|
+
return dict(self.__dict__)
|
|
148
|
+
|
|
149
|
+
def to_log_str(self) -> str:
|
|
150
|
+
parts = [f"{k}={v}" for k, v in self.__dict__.items() if v is not None]
|
|
151
|
+
return f"LLMTokenUsage({', '.join(parts)})"
|
|
152
|
+
|
|
153
|
+
def __str__(self) -> str:
|
|
154
|
+
return self.to_log_str()
|
|
155
|
+
|
|
156
|
+
def __repr__(self) -> str:
|
|
157
|
+
return self.to_log_str()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass
|
|
161
|
+
class LLMStreamChunk:
|
|
162
|
+
"""
|
|
163
|
+
Streaming chunk event.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
delta_text: str = ""
|
|
167
|
+
# Best-effort streamed thinking/reasoning text.
|
|
168
|
+
think: str = ""
|
|
169
|
+
# Optional tool/function-call delta payload (provider-specific shape).
|
|
170
|
+
# For OpenAI-compatible streaming this is typically `choices[0].delta.tool_calls`.
|
|
171
|
+
delta_tool_calls: Any = None
|
|
172
|
+
# Aggregated tool calls (best-effort normalized), built by the LLMService streaming implementation.
|
|
173
|
+
# Shape (OpenAI-compatible):
|
|
174
|
+
# [{"id": "...", "type": "function", "function": {"name": "...", "arguments": "..."}}]
|
|
175
|
+
tool_calls: list[dict[str, Any]] = field(default_factory=list)
|
|
176
|
+
# Best-effort usage. Some providers only send usage in the final event.
|
|
177
|
+
token_usage: LLMTokenUsage | None = None
|
|
178
|
+
raw_event: Any = None
|
|
179
|
+
is_final: bool = False
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@dataclass
|
|
183
|
+
class LLMResponse:
|
|
184
|
+
"""
|
|
185
|
+
Unified response wrapper for LLM calls, with optional JSON parsing.
|
|
186
|
+
|
|
187
|
+
Conceptually we keep 4 layers (some may be empty depending on output):
|
|
188
|
+
- raw_text: extracted message content text
|
|
189
|
+
- raw_json_data: if raw_text can be parsed as a JSON object directly
|
|
190
|
+
- content_text: extracted "business payload" text (typically a JSON substring)
|
|
191
|
+
- json_data: parsed JSON object from content_text (after fallback/repairs)
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
raw_text: str
|
|
195
|
+
# Raw message/completion object references (provider-specific)
|
|
196
|
+
raw_message: Any = None
|
|
197
|
+
raw_completion: Any = None
|
|
198
|
+
|
|
199
|
+
raw_json_data: dict[str, Any] = field(default_factory=dict)
|
|
200
|
+
raw_json_error: str | None = None
|
|
201
|
+
|
|
202
|
+
content_text: str = ""
|
|
203
|
+
think: str = ""
|
|
204
|
+
json_data: dict[str, Any] = field(default_factory=dict)
|
|
205
|
+
token_usage: LLMTokenUsage = field(default_factory=LLMTokenUsage)
|
|
206
|
+
parse_error: str | None = None
|
|
207
|
+
debug: dict[str, Any] = field(default_factory=dict)
|
|
208
|
+
# Best-effort extracted tool calls (if any). Providers may expose this on raw_message/raw_completion.
|
|
209
|
+
tool_calls: list[dict[str, Any]] = field(default_factory=list)
|
|
210
|
+
# Raw streamed chunks captured when complete() is implemented via stream aggregation.
|
|
211
|
+
stream_chunks: list[LLMStreamChunk] = field(default_factory=list)
|
|
212
|
+
|
|
213
|
+
def get_tool_calls(self) -> list[dict[str, Any]]:
|
|
214
|
+
"""
|
|
215
|
+
Best-effort accessor for tool calls.
|
|
216
|
+
|
|
217
|
+
Preference order:
|
|
218
|
+
- explicit `self.tool_calls` (if caller filled it)
|
|
219
|
+
- `raw_message.tool_calls` / `raw_message.function_call`
|
|
220
|
+
- `raw_completion.choices[0].message.tool_calls` / `.function_call`
|
|
221
|
+
- `model_dump()` dict fallbacks
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
def _as_dict(obj: Any) -> Any:
|
|
225
|
+
if obj is None:
|
|
226
|
+
return None
|
|
227
|
+
if isinstance(obj, dict):
|
|
228
|
+
return obj
|
|
229
|
+
if hasattr(obj, "model_dump"):
|
|
230
|
+
try:
|
|
231
|
+
return obj.model_dump()
|
|
232
|
+
except Exception:
|
|
233
|
+
return obj
|
|
234
|
+
try:
|
|
235
|
+
return dict(obj.__dict__)
|
|
236
|
+
except Exception:
|
|
237
|
+
return obj
|
|
238
|
+
|
|
239
|
+
def _normalize_tool_calls(obj: Any) -> list[dict[str, Any]]:
|
|
240
|
+
if not obj:
|
|
241
|
+
return []
|
|
242
|
+
if isinstance(obj, list):
|
|
243
|
+
out: list[dict[str, Any]] = []
|
|
244
|
+
for it in obj:
|
|
245
|
+
d = _as_dict(it)
|
|
246
|
+
if isinstance(d, dict):
|
|
247
|
+
out.append(d)
|
|
248
|
+
return out
|
|
249
|
+
d = _as_dict(obj)
|
|
250
|
+
return [d] if isinstance(d, dict) else []
|
|
251
|
+
|
|
252
|
+
def _normalize_function_call(obj: Any) -> list[dict[str, Any]]:
|
|
253
|
+
if not obj:
|
|
254
|
+
return []
|
|
255
|
+
d = _as_dict(obj)
|
|
256
|
+
if not isinstance(d, dict):
|
|
257
|
+
return []
|
|
258
|
+
# legacy shape: {"name": "...", "arguments": "..."}
|
|
259
|
+
return [{"type": "function", "function": {"name": d.get("name"), "arguments": d.get("arguments")}}]
|
|
260
|
+
|
|
261
|
+
if self.tool_calls:
|
|
262
|
+
return list(self.tool_calls)
|
|
263
|
+
|
|
264
|
+
# raw_message direct
|
|
265
|
+
msg = self.raw_message
|
|
266
|
+
tcs = getattr(msg, "tool_calls", None) if msg is not None else None
|
|
267
|
+
if tcs:
|
|
268
|
+
return _normalize_tool_calls(tcs)
|
|
269
|
+
fc = getattr(msg, "function_call", None) if msg is not None else None
|
|
270
|
+
if fc:
|
|
271
|
+
return _normalize_function_call(fc)
|
|
272
|
+
|
|
273
|
+
# raw_completion -> first message
|
|
274
|
+
comp = self.raw_completion
|
|
275
|
+
try:
|
|
276
|
+
msg2 = comp.choices[0].message if comp is not None else None
|
|
277
|
+
tcs2 = getattr(msg2, "tool_calls", None) if msg2 is not None else None
|
|
278
|
+
if tcs2:
|
|
279
|
+
return _normalize_tool_calls(tcs2)
|
|
280
|
+
fc2 = getattr(msg2, "function_call", None) if msg2 is not None else None
|
|
281
|
+
if fc2:
|
|
282
|
+
return _normalize_function_call(fc2)
|
|
283
|
+
except Exception:
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
# model_dump fallback (raw_message)
|
|
287
|
+
if msg is not None and hasattr(msg, "model_dump"):
|
|
288
|
+
try:
|
|
289
|
+
dumped = msg.model_dump()
|
|
290
|
+
if isinstance(dumped, dict):
|
|
291
|
+
if dumped.get("tool_calls"):
|
|
292
|
+
return _normalize_tool_calls(dumped.get("tool_calls"))
|
|
293
|
+
if dumped.get("function_call"):
|
|
294
|
+
return _normalize_function_call(dumped.get("function_call"))
|
|
295
|
+
except Exception:
|
|
296
|
+
pass
|
|
297
|
+
|
|
298
|
+
# model_dump fallback (raw_completion)
|
|
299
|
+
if comp is not None and hasattr(comp, "model_dump"):
|
|
300
|
+
try:
|
|
301
|
+
dumped = comp.model_dump()
|
|
302
|
+
if isinstance(dumped, dict):
|
|
303
|
+
choices = dumped.get("choices") or []
|
|
304
|
+
if choices and isinstance(choices, list) and isinstance(choices[0], dict):
|
|
305
|
+
msgd = choices[0].get("message") or {}
|
|
306
|
+
if isinstance(msgd, dict):
|
|
307
|
+
if msgd.get("tool_calls"):
|
|
308
|
+
return _normalize_tool_calls(msgd.get("tool_calls"))
|
|
309
|
+
if msgd.get("function_call"):
|
|
310
|
+
return _normalize_function_call(msgd.get("function_call"))
|
|
311
|
+
except Exception:
|
|
312
|
+
pass
|
|
313
|
+
|
|
314
|
+
return []
|
|
315
|
+
|
|
316
|
+
@property
|
|
317
|
+
def ok(self) -> bool:
|
|
318
|
+
return bool(self.json_data) and self.parse_error is None
|
|
319
|
+
|
|
320
|
+
def to_log_str(
|
|
321
|
+
self,
|
|
322
|
+
*,
|
|
323
|
+
max_raw_chars: int = 600,
|
|
324
|
+
max_content_chars: int = 600,
|
|
325
|
+
include_debug: bool = False,
|
|
326
|
+
) -> str:
|
|
327
|
+
import json as _json
|
|
328
|
+
|
|
329
|
+
def _trunc(s: str, n: int) -> str:
|
|
330
|
+
s = s or ""
|
|
331
|
+
if n <= 0:
|
|
332
|
+
return ""
|
|
333
|
+
if len(s) <= n:
|
|
334
|
+
return s
|
|
335
|
+
return s[: n - 3] + "..."
|
|
336
|
+
|
|
337
|
+
parts = [
|
|
338
|
+
f"ok={self.ok}",
|
|
339
|
+
f"token_usage={self.token_usage.as_dict()}",
|
|
340
|
+
f"has_raw_message={self.raw_message is not None}",
|
|
341
|
+
f"has_raw_completion={self.raw_completion is not None}",
|
|
342
|
+
f"tool_calls={len(self.tool_calls) or len(self.get_tool_calls())}",
|
|
343
|
+
f"raw_json_ok={bool(self.raw_json_data) and self.raw_json_error is None}",
|
|
344
|
+
f"parse_error={self.parse_error!r}",
|
|
345
|
+
f"strategy={self.debug.get('strategy')!r}",
|
|
346
|
+
]
|
|
347
|
+
raw_preview = _trunc(self.raw_text, max_raw_chars)
|
|
348
|
+
content_preview = _trunc(self.content_text, max_content_chars)
|
|
349
|
+
parts.append(f"raw_text={raw_preview!r}")
|
|
350
|
+
if self.content_text:
|
|
351
|
+
parts.append(f"content_text={content_preview!r}")
|
|
352
|
+
if self.json_data:
|
|
353
|
+
try:
|
|
354
|
+
parts.append(
|
|
355
|
+
"json_data="
|
|
356
|
+
+ _json.dumps(self.json_data, ensure_ascii=False, sort_keys=True)[: max_content_chars]
|
|
357
|
+
)
|
|
358
|
+
except Exception:
|
|
359
|
+
parts.append("json_data=<unserializable>")
|
|
360
|
+
if include_debug and self.debug:
|
|
361
|
+
try:
|
|
362
|
+
parts.append("debug=" + _json.dumps(self.debug, ensure_ascii=False, sort_keys=True))
|
|
363
|
+
except Exception:
|
|
364
|
+
parts.append(f"debug={self.debug!r}")
|
|
365
|
+
return "LLMResponse(" + ", ".join(parts) + ")"
|
|
366
|
+
|
|
367
|
+
def __str__(self) -> str:
|
|
368
|
+
return self.to_log_str()
|
|
369
|
+
|
|
370
|
+
def __repr__(self) -> str:
|
|
371
|
+
return self.to_log_str()
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class LLMService(Protocol):
|
|
375
|
+
"""
|
|
376
|
+
Unified LLM interface (no LangChain dependency).
|
|
377
|
+
|
|
378
|
+
Why this design:
|
|
379
|
+
- There is ONE canonical request shape: `LLMRequest` (messages + optional system_prompt)
|
|
380
|
+
- There is ONE canonical non-streaming call: `complete(request) -> LLMResponse`
|
|
381
|
+
- Streaming uses the same request: `stream(request) -> AsyncIterator[LLMStreamChunk]`
|
|
382
|
+
- Convenience wrappers (`predict`, `chat`, `predict_stream`) are thin sugar over the above.
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
async def complete(
|
|
386
|
+
self,
|
|
387
|
+
request: "LLMRequest",
|
|
388
|
+
*,
|
|
389
|
+
on_chunk_delta_text: Callable[[str], Any] | None = None,
|
|
390
|
+
on_chunk_think: Callable[[str], Any] | None = None,
|
|
391
|
+
on_stream_end: Callable[["LLMResponse"], Any] | None = None,
|
|
392
|
+
) -> LLMResponse:
|
|
393
|
+
"""Preferred API (non-streaming). Can optional receive chunk hooks."""
|
|
394
|
+
...
|
|
395
|
+
|
|
396
|
+
def stream(self, request: "LLMRequest") -> AsyncIterator["LLMStreamChunk"]:
|
|
397
|
+
"""Preferred API (streaming). Returns an async iterator directly (no await needed)."""
|
|
398
|
+
...
|
|
399
|
+
|
|
400
|
+
async def close(self) -> None:
|
|
401
|
+
"""Close underlying HTTP resources, if any."""
|
|
402
|
+
...
|
|
403
|
+
|
|
404
|
+
# -----------------------
|
|
405
|
+
# Convenience wrappers
|
|
406
|
+
# -----------------------
|
|
407
|
+
|
|
408
|
+
async def predict(
|
|
409
|
+
self,
|
|
410
|
+
prompt: str,
|
|
411
|
+
system_prompt: str | None = None,
|
|
412
|
+
**kwargs: Any,
|
|
413
|
+
) -> str:
|
|
414
|
+
"""
|
|
415
|
+
Convenience: single-turn text completion.
|
|
416
|
+
|
|
417
|
+
Note: this simply builds an `LLMRequest` with one user message.
|
|
418
|
+
"""
|
|
419
|
+
req = LLMRequest.from_prompt(prompt=prompt, system_prompt=system_prompt, **kwargs)
|
|
420
|
+
resp = await self.complete(req)
|
|
421
|
+
return (resp.raw_text or "").strip()
|
|
422
|
+
|
|
423
|
+
async def chat(
|
|
424
|
+
self,
|
|
425
|
+
messages: list[dict[str, Any]],
|
|
426
|
+
system_prompt: str | None = None,
|
|
427
|
+
**kwargs: Any,
|
|
428
|
+
) -> str:
|
|
429
|
+
"""
|
|
430
|
+
Convenience: multi-turn chat returning plain text.
|
|
431
|
+
|
|
432
|
+
Rule: do NOT include a system message in `messages`. If you need one, pass `system_prompt`.
|
|
433
|
+
"""
|
|
434
|
+
req = LLMRequest(messages=messages, system_prompt=system_prompt, **kwargs)
|
|
435
|
+
resp = await self.complete(req)
|
|
436
|
+
return (resp.raw_text or "").strip()
|
|
437
|
+
|
|
438
|
+
async def predict_stream(
|
|
439
|
+
self,
|
|
440
|
+
prompt: str,
|
|
441
|
+
system_prompt: str | None = None,
|
|
442
|
+
**kwargs: Any,
|
|
443
|
+
) -> AsyncIterator[str]:
|
|
444
|
+
"""
|
|
445
|
+
Convenience: single-turn streaming text.
|
|
446
|
+
"""
|
|
447
|
+
req = LLMRequest.from_prompt(prompt=prompt, system_prompt=system_prompt, **kwargs)
|
|
448
|
+
async for chunk in self.stream(req):
|
|
449
|
+
if chunk.delta_text:
|
|
450
|
+
yield chunk.delta_text
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
@dataclass
|
|
454
|
+
class OpenAICompatibleChatConfig:
|
|
455
|
+
base_url: str
|
|
456
|
+
api_key: str
|
|
457
|
+
model: str
|
|
458
|
+
timeout_s: float = 180.0
|
|
459
|
+
max_tokens: int = 8000
|
|
460
|
+
temperature: float = 0.0
|
|
461
|
+
max_retries: int = 3
|
|
462
|
+
retry_base_delay_s: float = 0.5
|
|
463
|
+
stream_resume_on_error: bool = False
|
|
464
|
+
stream_max_restarts: int = 0
|
|
465
|
+
stream_resume_instruction: str = "继续,从你上次中断的位置继续输出。不要重复已经输出的内容。"
|
|
466
|
+
capability_overrides: dict[str, Any] = field(default_factory=dict)
|
|
467
|
+
|
|
468
|
+
@property
|
|
469
|
+
def is_ready(self) -> bool:
|
|
470
|
+
return bool(self.base_url and self.api_key and self.model)
|