AbstractRuntime 0.2.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractruntime/__init__.py +83 -3
- abstractruntime/core/config.py +82 -2
- abstractruntime/core/event_keys.py +62 -0
- abstractruntime/core/models.py +17 -1
- abstractruntime/core/policy.py +74 -3
- abstractruntime/core/runtime.py +3334 -28
- abstractruntime/core/vars.py +103 -2
- abstractruntime/evidence/__init__.py +10 -0
- abstractruntime/evidence/recorder.py +325 -0
- abstractruntime/history_bundle.py +772 -0
- abstractruntime/integrations/abstractcore/__init__.py +6 -0
- abstractruntime/integrations/abstractcore/constants.py +19 -0
- abstractruntime/integrations/abstractcore/default_tools.py +258 -0
- abstractruntime/integrations/abstractcore/effect_handlers.py +2622 -32
- abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
- abstractruntime/integrations/abstractcore/factory.py +149 -16
- abstractruntime/integrations/abstractcore/llm_client.py +891 -55
- abstractruntime/integrations/abstractcore/mcp_worker.py +587 -0
- abstractruntime/integrations/abstractcore/observability.py +80 -0
- abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
- abstractruntime/integrations/abstractcore/summarizer.py +154 -0
- abstractruntime/integrations/abstractcore/tool_executor.py +509 -31
- abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
- abstractruntime/integrations/abstractmemory/__init__.py +3 -0
- abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
- abstractruntime/memory/__init__.py +21 -0
- abstractruntime/memory/active_context.py +751 -0
- abstractruntime/memory/active_memory.py +452 -0
- abstractruntime/memory/compaction.py +105 -0
- abstractruntime/memory/kg_packets.py +164 -0
- abstractruntime/memory/memact_composer.py +175 -0
- abstractruntime/memory/recall_levels.py +163 -0
- abstractruntime/memory/token_budget.py +86 -0
- abstractruntime/rendering/__init__.py +17 -0
- abstractruntime/rendering/agent_trace_report.py +256 -0
- abstractruntime/rendering/json_stringify.py +136 -0
- abstractruntime/scheduler/scheduler.py +93 -2
- abstractruntime/storage/__init__.py +7 -2
- abstractruntime/storage/artifacts.py +175 -32
- abstractruntime/storage/base.py +17 -1
- abstractruntime/storage/commands.py +339 -0
- abstractruntime/storage/in_memory.py +41 -1
- abstractruntime/storage/json_files.py +210 -14
- abstractruntime/storage/observable.py +136 -0
- abstractruntime/storage/offloading.py +433 -0
- abstractruntime/storage/sqlite.py +836 -0
- abstractruntime/visualflow_compiler/__init__.py +29 -0
- abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
- abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
- abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
- abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
- abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
- abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
- abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
- abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
- abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
- abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
- abstractruntime/visualflow_compiler/compiler.py +3832 -0
- abstractruntime/visualflow_compiler/flow.py +247 -0
- abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
- abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
- abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
- abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
- abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
- abstractruntime/visualflow_compiler/visual/models.py +211 -0
- abstractruntime/workflow_bundle/__init__.py +52 -0
- abstractruntime/workflow_bundle/models.py +236 -0
- abstractruntime/workflow_bundle/packer.py +317 -0
- abstractruntime/workflow_bundle/reader.py +87 -0
- abstractruntime/workflow_bundle/registry.py +587 -0
- abstractruntime-0.4.1.dist-info/METADATA +177 -0
- abstractruntime-0.4.1.dist-info/RECORD +86 -0
- abstractruntime-0.4.1.dist-info/entry_points.txt +2 -0
- abstractruntime-0.2.0.dist-info/METADATA +0 -163
- abstractruntime-0.2.0.dist-info/RECORD +0 -32
- {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
- {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,14 +13,369 @@ Remote mode is the preferred way to support per-request dynamic routing (e.g. `b
|
|
|
13
13
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
+
import ast
|
|
16
17
|
import json
|
|
18
|
+
import locale
|
|
19
|
+
import os
|
|
20
|
+
import re
|
|
21
|
+
import threading
|
|
17
22
|
from dataclasses import asdict, dataclass, is_dataclass
|
|
23
|
+
from datetime import datetime
|
|
18
24
|
from typing import Any, Dict, List, Optional, Protocol, Tuple
|
|
19
25
|
|
|
20
26
|
from .logging import get_logger
|
|
21
27
|
|
|
22
28
|
logger = get_logger(__name__)
|
|
23
29
|
|
|
30
|
+
_LOCAL_GENERATE_LOCKS: Dict[Tuple[str, str], threading.Lock] = {}
|
|
31
|
+
_LOCAL_GENERATE_LOCKS_LOCK = threading.Lock()
|
|
32
|
+
_LOCAL_GENERATE_LOCKS_WARNED: set[Tuple[str, str]] = set()
|
|
33
|
+
_LOCAL_GENERATE_LOCKS_WARNED_LOCK = threading.Lock()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _local_generate_lock(*, provider: str, model: str) -> Optional[threading.Lock]:
|
|
37
|
+
"""Return a process-wide generation lock for providers that are not thread-safe.
|
|
38
|
+
|
|
39
|
+
MLX/Metal can crash the process when concurrent generations occur from multiple threads
|
|
40
|
+
(e.g. gateway ticking multiple runs concurrently). We serialize MLX generation per model
|
|
41
|
+
as a safety contract.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
prov = str(provider or "").strip().lower()
|
|
45
|
+
if prov != "mlx":
|
|
46
|
+
return None
|
|
47
|
+
key = (prov, str(model or "").strip())
|
|
48
|
+
with _LOCAL_GENERATE_LOCKS_LOCK:
|
|
49
|
+
lock = _LOCAL_GENERATE_LOCKS.get(key)
|
|
50
|
+
if lock is None:
|
|
51
|
+
lock = threading.Lock()
|
|
52
|
+
_LOCAL_GENERATE_LOCKS[key] = lock
|
|
53
|
+
return lock
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _warn_local_generate_lock_once(*, provider: str, model: str) -> None:
|
|
57
|
+
prov = str(provider or "").strip().lower()
|
|
58
|
+
key = (prov, str(model or "").strip())
|
|
59
|
+
with _LOCAL_GENERATE_LOCKS_WARNED_LOCK:
|
|
60
|
+
if key in _LOCAL_GENERATE_LOCKS_WARNED:
|
|
61
|
+
return
|
|
62
|
+
_LOCAL_GENERATE_LOCKS_WARNED.add(key)
|
|
63
|
+
logger.warning(
|
|
64
|
+
"Local provider generation is serialized for safety (prevents MLX/Metal crashes under concurrency).",
|
|
65
|
+
provider=prov,
|
|
66
|
+
model=key[1],
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
_SYSTEM_CONTEXT_HEADER_RE = re.compile(
|
|
70
|
+
# ChatML-style user-turn grounding prefix, matching `chat-mlx.py` / `chat-hf.py`:
|
|
71
|
+
# "[YYYY-MM-DD HH:MM:SS CC]" (optionally followed by whitespace + user text).
|
|
72
|
+
# Backward compatible with the historical "[YYYY/MM/DD HH:MM CC]" form.
|
|
73
|
+
r"^\[\d{4}[-/]\d{2}[-/]\d{2}\s+\d{2}:\d{2}(?::\d{2})?\s+[A-Z]{2}\](?:\s|$)",
|
|
74
|
+
re.IGNORECASE,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
_LEGACY_SYSTEM_CONTEXT_HEADER_RE = re.compile(
|
|
78
|
+
r"^Grounding:\s*\d{4}/\d{2}/\d{2}\|\d{2}:\d{2}\|[A-Z]{2}$",
|
|
79
|
+
re.IGNORECASE,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
_LEGACY_SYSTEM_CONTEXT_HEADER_PARSE_RE = re.compile(
|
|
83
|
+
r"^Grounding:\s*(\d{4}/\d{2}/\d{2})\|(\d{2}:\d{2})\|([A-Z]{2})$",
|
|
84
|
+
re.IGNORECASE,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
_ZONEINFO_TAB_CANDIDATES = [
|
|
88
|
+
"/usr/share/zoneinfo/zone.tab",
|
|
89
|
+
"/usr/share/zoneinfo/zone1970.tab",
|
|
90
|
+
"/var/db/timezone/zoneinfo/zone.tab",
|
|
91
|
+
"/var/db/timezone/zoneinfo/zone1970.tab",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _detect_timezone_name() -> Optional[str]:
|
|
96
|
+
"""Best-effort IANA timezone name (e.g. 'Europe/Paris')."""
|
|
97
|
+
|
|
98
|
+
tz_env = os.environ.get("TZ")
|
|
99
|
+
if isinstance(tz_env, str):
|
|
100
|
+
tz = tz_env.strip().lstrip(":")
|
|
101
|
+
if tz and "/" in tz:
|
|
102
|
+
return tz
|
|
103
|
+
|
|
104
|
+
# Common on Debian/Ubuntu.
|
|
105
|
+
try:
|
|
106
|
+
with open("/etc/timezone", "r", encoding="utf-8", errors="ignore") as f:
|
|
107
|
+
line = f.readline().strip()
|
|
108
|
+
if line and "/" in line:
|
|
109
|
+
return line
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
# Common on macOS + many Linux distros (symlink or copied file).
|
|
114
|
+
try:
|
|
115
|
+
real = os.path.realpath("/etc/localtime")
|
|
116
|
+
except Exception:
|
|
117
|
+
real = ""
|
|
118
|
+
if real:
|
|
119
|
+
match = re.search(r"/zoneinfo/(.+)$", real)
|
|
120
|
+
if match:
|
|
121
|
+
tz = match.group(1).strip()
|
|
122
|
+
if tz and "/" in tz:
|
|
123
|
+
return tz
|
|
124
|
+
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _country_from_zone_tab(*, zone_name: str, tab_paths: Optional[List[str]] = None) -> Optional[str]:
|
|
129
|
+
"""Resolve ISO2 country code from zone.tab / zone1970.tab."""
|
|
130
|
+
zone = str(zone_name or "").strip()
|
|
131
|
+
if not zone:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
paths = list(tab_paths) if isinstance(tab_paths, list) and tab_paths else list(_ZONEINFO_TAB_CANDIDATES)
|
|
135
|
+
for tab_path in paths:
|
|
136
|
+
try:
|
|
137
|
+
with open(tab_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
138
|
+
for raw_line in f:
|
|
139
|
+
line = raw_line.strip()
|
|
140
|
+
if not line or line.startswith("#"):
|
|
141
|
+
continue
|
|
142
|
+
parts = line.split("\t")
|
|
143
|
+
if len(parts) < 3:
|
|
144
|
+
continue
|
|
145
|
+
cc_field = parts[0].strip()
|
|
146
|
+
tz_field = parts[2].strip()
|
|
147
|
+
if tz_field != zone:
|
|
148
|
+
continue
|
|
149
|
+
cc = cc_field.split(",", 1)[0].strip()
|
|
150
|
+
if len(cc) == 2 and cc.isalpha():
|
|
151
|
+
return cc.upper()
|
|
152
|
+
except Exception:
|
|
153
|
+
continue
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _detect_country() -> str:
|
|
158
|
+
"""Best-effort 2-letter country code detection.
|
|
159
|
+
|
|
160
|
+
Order:
|
|
161
|
+
1) Explicit env override: ABSTRACT_COUNTRY / ABSTRACTFRAMEWORK_COUNTRY
|
|
162
|
+
2) Locale region from `locale.getlocale()` or locale env vars (LANG/LC_ALL/LC_CTYPE)
|
|
163
|
+
3) Timezone (IANA name) via zone.tab mapping
|
|
164
|
+
|
|
165
|
+
Notes:
|
|
166
|
+
- Avoid parsing encoding-only strings like `UTF-8` as a country (a common locale env pitfall).
|
|
167
|
+
- If no reliable region is found, return `XX` (unknown).
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def _normalize_country_code(value: Optional[str]) -> Optional[str]:
|
|
171
|
+
if not isinstance(value, str):
|
|
172
|
+
return None
|
|
173
|
+
raw = value.strip()
|
|
174
|
+
if not raw:
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
base = raw.split(".", 1)[0].split("@", 1)[0].strip()
|
|
178
|
+
if len(base) == 2 and base.isalpha():
|
|
179
|
+
return base.upper()
|
|
180
|
+
|
|
181
|
+
parts = [p.strip() for p in re.split(r"[_-]", base) if p.strip()]
|
|
182
|
+
for part in parts[1:]:
|
|
183
|
+
if len(part) == 2 and part.isalpha():
|
|
184
|
+
return part.upper()
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
# Explicit override (preferred).
|
|
188
|
+
for key in ("ABSTRACT_COUNTRY", "ABSTRACTFRAMEWORK_COUNTRY"):
|
|
189
|
+
cc = _normalize_country_code(os.environ.get(key))
|
|
190
|
+
if cc is not None:
|
|
191
|
+
return cc
|
|
192
|
+
|
|
193
|
+
candidates: List[str] = []
|
|
194
|
+
try:
|
|
195
|
+
loc = locale.getlocale()[0]
|
|
196
|
+
if isinstance(loc, str) and loc.strip():
|
|
197
|
+
candidates.append(loc)
|
|
198
|
+
except Exception:
|
|
199
|
+
pass
|
|
200
|
+
|
|
201
|
+
for key in ("LC_ALL", "LANG", "LC_CTYPE"):
|
|
202
|
+
v = os.environ.get(key)
|
|
203
|
+
if isinstance(v, str) and v.strip():
|
|
204
|
+
candidates.append(v)
|
|
205
|
+
|
|
206
|
+
for cand in candidates:
|
|
207
|
+
cc = _normalize_country_code(cand)
|
|
208
|
+
if cc is not None:
|
|
209
|
+
return cc
|
|
210
|
+
|
|
211
|
+
tz_name = _detect_timezone_name()
|
|
212
|
+
if tz_name:
|
|
213
|
+
cc = _country_from_zone_tab(zone_name=tz_name)
|
|
214
|
+
if cc is not None:
|
|
215
|
+
return cc
|
|
216
|
+
|
|
217
|
+
return "XX"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _system_context_header() -> str:
|
|
221
|
+
# Use local datetime (timezone-aware) to match the user's environment.
|
|
222
|
+
# Format: "[YYYY-MM-DD HH:MM:SS CC]"
|
|
223
|
+
stamp = datetime.now().astimezone().strftime("%Y-%m-%d %H:%M:%S")
|
|
224
|
+
return f"[{stamp} {_detect_country()}]"
|
|
225
|
+
|
|
226
|
+
def _strip_system_context_header(system_prompt: Optional[str]) -> Optional[str]:
|
|
227
|
+
"""Remove a runtime-injected system-context header from the system prompt (best-effort).
|
|
228
|
+
|
|
229
|
+
Why:
|
|
230
|
+
- Historically AbstractRuntime injected a "Grounding: ..." line into the *system prompt*.
|
|
231
|
+
- Prompt/KV caching works best when stable prefixes (system/tools/history) do not contain per-turn entropy.
|
|
232
|
+
- We still want date/time/country per turn, but we inject it into the *current user turn* instead.
|
|
233
|
+
"""
|
|
234
|
+
if not isinstance(system_prompt, str):
|
|
235
|
+
return system_prompt
|
|
236
|
+
raw = system_prompt
|
|
237
|
+
lines = raw.splitlines()
|
|
238
|
+
if not lines:
|
|
239
|
+
return None
|
|
240
|
+
first = lines[0].strip()
|
|
241
|
+
if not (_LEGACY_SYSTEM_CONTEXT_HEADER_RE.match(first) or _SYSTEM_CONTEXT_HEADER_RE.match(first)):
|
|
242
|
+
return raw
|
|
243
|
+
rest = "\n".join(lines[1:]).lstrip()
|
|
244
|
+
return rest if rest else None
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _strip_internal_system_messages(messages: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
|
|
248
|
+
"""Remove internal system messages that should never leak into model outputs.
|
|
249
|
+
|
|
250
|
+
Today this is intentionally narrow and only strips the synthetic tool-activity
|
|
251
|
+
summaries that can be injected by some agent hosts:
|
|
252
|
+
"Recent tool activity (auto): ..."
|
|
253
|
+
|
|
254
|
+
Why:
|
|
255
|
+
- Some local/open models will echo system-message content verbatim.
|
|
256
|
+
- These tool-trace summaries are *operator/debug* context, not user-facing content.
|
|
257
|
+
"""
|
|
258
|
+
if not isinstance(messages, list) or not messages:
|
|
259
|
+
return messages
|
|
260
|
+
|
|
261
|
+
out: List[Dict[str, Any]] = []
|
|
262
|
+
for m in messages:
|
|
263
|
+
if not isinstance(m, dict):
|
|
264
|
+
continue
|
|
265
|
+
role = str(m.get("role") or "").strip().lower()
|
|
266
|
+
if role == "system":
|
|
267
|
+
c = m.get("content")
|
|
268
|
+
if isinstance(c, str) and c.lstrip().startswith("Recent tool activity"):
|
|
269
|
+
continue
|
|
270
|
+
out.append(dict(m))
|
|
271
|
+
|
|
272
|
+
return out or None
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _inject_turn_grounding(
|
|
276
|
+
*,
|
|
277
|
+
prompt: str,
|
|
278
|
+
messages: Optional[List[Dict[str, Any]]],
|
|
279
|
+
) -> tuple[str, Optional[List[Dict[str, Any]]]]:
|
|
280
|
+
"""Inject date/time/country into the *current user turn* (not the system prompt)."""
|
|
281
|
+
header = _system_context_header()
|
|
282
|
+
|
|
283
|
+
def _prefix_with_header(text: str) -> str:
|
|
284
|
+
"""Prefix with the current header, or rewrite a legacy `Grounding:` prefix into bracket form."""
|
|
285
|
+
if not isinstance(text, str) or not text.strip():
|
|
286
|
+
return header
|
|
287
|
+
raw = str(text)
|
|
288
|
+
first = raw.lstrip().splitlines()[0].strip()
|
|
289
|
+
if _SYSTEM_CONTEXT_HEADER_RE.match(first):
|
|
290
|
+
return raw
|
|
291
|
+
legacy = _LEGACY_SYSTEM_CONTEXT_HEADER_PARSE_RE.match(first)
|
|
292
|
+
if legacy:
|
|
293
|
+
date_part, time_part, cc = legacy.group(1), legacy.group(2), legacy.group(3).upper()
|
|
294
|
+
date_part = date_part.replace("/", "-")
|
|
295
|
+
time_part = f"{time_part}:00" if len(time_part) == 5 else time_part
|
|
296
|
+
bracket = f"[{date_part} {time_part} {cc}]"
|
|
297
|
+
rest = "\n".join(raw.lstrip().splitlines()[1:]).lstrip()
|
|
298
|
+
return f"{bracket} {rest}" if rest else bracket
|
|
299
|
+
return f"{header} {raw}"
|
|
300
|
+
|
|
301
|
+
prompt_str = str(prompt or "")
|
|
302
|
+
if prompt_str.strip():
|
|
303
|
+
return _prefix_with_header(prompt_str), messages
|
|
304
|
+
|
|
305
|
+
if isinstance(messages, list) and messages:
|
|
306
|
+
out: List[Dict[str, Any]] = []
|
|
307
|
+
for m in messages:
|
|
308
|
+
out.append(dict(m) if isinstance(m, dict) else {"role": "user", "content": str(m)})
|
|
309
|
+
|
|
310
|
+
for i in range(len(out) - 1, -1, -1):
|
|
311
|
+
role = str(out[i].get("role") or "").strip().lower()
|
|
312
|
+
if role != "user":
|
|
313
|
+
continue
|
|
314
|
+
content = out[i].get("content")
|
|
315
|
+
content_str = content if isinstance(content, str) else str(content or "")
|
|
316
|
+
out[i]["content"] = _prefix_with_header(content_str)
|
|
317
|
+
return prompt_str, out
|
|
318
|
+
|
|
319
|
+
# No user message found; append a synthetic user turn.
|
|
320
|
+
out.append({"role": "user", "content": header})
|
|
321
|
+
return prompt_str, out
|
|
322
|
+
|
|
323
|
+
# No place to inject; best-effort no-op.
|
|
324
|
+
return prompt_str, messages
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _maybe_parse_tool_calls_from_text(
|
|
328
|
+
*,
|
|
329
|
+
content: Optional[str],
|
|
330
|
+
allowed_tool_names: Optional[set[str]] = None,
|
|
331
|
+
model_name: Optional[str] = None,
|
|
332
|
+
tool_handler: Any = None,
|
|
333
|
+
) -> tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
|
|
334
|
+
"""Deprecated: tool-call parsing belongs to AbstractCore.
|
|
335
|
+
|
|
336
|
+
AbstractCore now normalizes non-streaming responses by populating structured `tool_calls`
|
|
337
|
+
and returning cleaned `content`. This helper remains only for backward compatibility with
|
|
338
|
+
older AbstractCore versions and will be removed in the next major release.
|
|
339
|
+
"""
|
|
340
|
+
# Keep behavior for external callers/tests that still import it.
|
|
341
|
+
if not isinstance(content, str) or not content.strip():
|
|
342
|
+
return None, None
|
|
343
|
+
if tool_handler is None:
|
|
344
|
+
from abstractcore.tools.handler import UniversalToolHandler
|
|
345
|
+
|
|
346
|
+
tool_handler = UniversalToolHandler(str(model_name or ""))
|
|
347
|
+
|
|
348
|
+
try:
|
|
349
|
+
parsed = tool_handler.parse_response(content, mode="prompted")
|
|
350
|
+
except Exception:
|
|
351
|
+
return None, None
|
|
352
|
+
|
|
353
|
+
calls = getattr(parsed, "tool_calls", None)
|
|
354
|
+
cleaned = getattr(parsed, "content", None)
|
|
355
|
+
if not isinstance(calls, list) or not calls:
|
|
356
|
+
return None, None
|
|
357
|
+
|
|
358
|
+
out_calls: List[Dict[str, Any]] = []
|
|
359
|
+
for tc in calls:
|
|
360
|
+
name = getattr(tc, "name", None)
|
|
361
|
+
arguments = getattr(tc, "arguments", None)
|
|
362
|
+
call_id = getattr(tc, "call_id", None)
|
|
363
|
+
if not isinstance(name, str) or not name.strip():
|
|
364
|
+
continue
|
|
365
|
+
if isinstance(allowed_tool_names, set) and allowed_tool_names and name not in allowed_tool_names:
|
|
366
|
+
continue
|
|
367
|
+
out_calls.append(
|
|
368
|
+
{
|
|
369
|
+
"name": name.strip(),
|
|
370
|
+
"arguments": _jsonable(arguments) if arguments is not None else {},
|
|
371
|
+
"call_id": str(call_id) if call_id is not None else None,
|
|
372
|
+
}
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
if not out_calls:
|
|
376
|
+
return None, None
|
|
377
|
+
return out_calls, (str(cleaned) if isinstance(cleaned, str) else "")
|
|
378
|
+
|
|
24
379
|
|
|
25
380
|
@dataclass(frozen=True)
|
|
26
381
|
class HttpResponse:
|
|
@@ -47,6 +402,7 @@ class AbstractCoreLLMClient(Protocol):
|
|
|
47
402
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
48
403
|
system_prompt: Optional[str] = None,
|
|
49
404
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
405
|
+
media: Optional[List[Any]] = None,
|
|
50
406
|
params: Optional[Dict[str, Any]] = None,
|
|
51
407
|
) -> Dict[str, Any]:
|
|
52
408
|
"""Return a JSON-safe dict with at least: content/tool_calls/usage/model."""
|
|
@@ -83,9 +439,136 @@ def _jsonable(value: Any) -> Any:
|
|
|
83
439
|
return str(value)
|
|
84
440
|
|
|
85
441
|
|
|
442
|
+
def _loads_dict_like(raw: Any) -> Optional[Dict[str, Any]]:
|
|
443
|
+
"""Parse a JSON-ish or Python-literal dict safely."""
|
|
444
|
+
if raw is None:
|
|
445
|
+
return None
|
|
446
|
+
text = str(raw).strip()
|
|
447
|
+
if not text:
|
|
448
|
+
return None
|
|
449
|
+
try:
|
|
450
|
+
parsed = json.loads(text)
|
|
451
|
+
if isinstance(parsed, dict):
|
|
452
|
+
return parsed
|
|
453
|
+
except Exception:
|
|
454
|
+
pass
|
|
455
|
+
|
|
456
|
+
candidate = re.sub(r"\btrue\b", "True", text, flags=re.IGNORECASE)
|
|
457
|
+
candidate = re.sub(r"\bfalse\b", "False", candidate, flags=re.IGNORECASE)
|
|
458
|
+
candidate = re.sub(r"\bnull\b", "None", candidate, flags=re.IGNORECASE)
|
|
459
|
+
try:
|
|
460
|
+
parsed = ast.literal_eval(candidate)
|
|
461
|
+
except Exception:
|
|
462
|
+
return None
|
|
463
|
+
if not isinstance(parsed, dict):
|
|
464
|
+
return None
|
|
465
|
+
return {str(k): v for k, v in parsed.items()}
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _normalize_tool_calls(tool_calls: Any) -> Optional[List[Dict[str, Any]]]:
|
|
469
|
+
"""Normalize tool call shapes into AbstractRuntime's standard dict form.
|
|
470
|
+
|
|
471
|
+
Standard shape:
|
|
472
|
+
{"name": str, "arguments": dict, "call_id": Optional[str]}
|
|
473
|
+
"""
|
|
474
|
+
if tool_calls is None:
|
|
475
|
+
return None
|
|
476
|
+
if not isinstance(tool_calls, list):
|
|
477
|
+
return None
|
|
478
|
+
|
|
479
|
+
normalized: List[Dict[str, Any]] = []
|
|
480
|
+
for tc in tool_calls:
|
|
481
|
+
name: Optional[str] = None
|
|
482
|
+
arguments: Any = None
|
|
483
|
+
call_id: Any = None
|
|
484
|
+
|
|
485
|
+
if isinstance(tc, dict):
|
|
486
|
+
call_id = tc.get("call_id", None)
|
|
487
|
+
if call_id is None:
|
|
488
|
+
call_id = tc.get("id", None)
|
|
489
|
+
|
|
490
|
+
raw_name = tc.get("name")
|
|
491
|
+
raw_args = tc.get("arguments")
|
|
492
|
+
|
|
493
|
+
func = tc.get("function") if isinstance(tc.get("function"), dict) else None
|
|
494
|
+
if func and (not isinstance(raw_name, str) or not raw_name.strip()):
|
|
495
|
+
raw_name = func.get("name")
|
|
496
|
+
if func and raw_args is None:
|
|
497
|
+
raw_args = func.get("arguments")
|
|
498
|
+
|
|
499
|
+
if isinstance(raw_name, str):
|
|
500
|
+
name = raw_name.strip()
|
|
501
|
+
arguments = raw_args if raw_args is not None else {}
|
|
502
|
+
else:
|
|
503
|
+
raw_name = getattr(tc, "name", None)
|
|
504
|
+
raw_args = getattr(tc, "arguments", None)
|
|
505
|
+
call_id = getattr(tc, "call_id", None)
|
|
506
|
+
if isinstance(raw_name, str):
|
|
507
|
+
name = raw_name.strip()
|
|
508
|
+
arguments = raw_args if raw_args is not None else {}
|
|
509
|
+
|
|
510
|
+
if not isinstance(name, str) or not name:
|
|
511
|
+
continue
|
|
512
|
+
|
|
513
|
+
if isinstance(arguments, str):
|
|
514
|
+
parsed = _loads_dict_like(arguments)
|
|
515
|
+
arguments = parsed if isinstance(parsed, dict) else {}
|
|
516
|
+
|
|
517
|
+
if not isinstance(arguments, dict):
|
|
518
|
+
arguments = {}
|
|
519
|
+
|
|
520
|
+
normalized.append(
|
|
521
|
+
{
|
|
522
|
+
"name": name,
|
|
523
|
+
"arguments": _jsonable(arguments),
|
|
524
|
+
"call_id": str(call_id) if call_id is not None else None,
|
|
525
|
+
}
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
return normalized or None
|
|
529
|
+
|
|
530
|
+
|
|
86
531
|
def _normalize_local_response(resp: Any) -> Dict[str, Any]:
|
|
87
532
|
"""Normalize an AbstractCore local `generate()` result into JSON."""
|
|
88
533
|
|
|
534
|
+
def _extract_reasoning_from_openai_like(raw: Any) -> Optional[str]:
|
|
535
|
+
"""Best-effort extraction of model reasoning from OpenAI-style payloads.
|
|
536
|
+
|
|
537
|
+
LM Studio and some providers store reasoning in `choices[].message.reasoning_content`
|
|
538
|
+
while leaving `content` empty during tool-call turns.
|
|
539
|
+
"""
|
|
540
|
+
|
|
541
|
+
def _from_message(msg: Any) -> Optional[str]:
|
|
542
|
+
if not isinstance(msg, dict):
|
|
543
|
+
return None
|
|
544
|
+
for key in ("reasoning", "reasoning_content", "thinking", "thinking_content"):
|
|
545
|
+
val = msg.get(key)
|
|
546
|
+
if isinstance(val, str) and val.strip():
|
|
547
|
+
return val.strip()
|
|
548
|
+
return None
|
|
549
|
+
|
|
550
|
+
if isinstance(raw, dict):
|
|
551
|
+
# OpenAI chat completion: choices[].message
|
|
552
|
+
choices = raw.get("choices")
|
|
553
|
+
if isinstance(choices, list):
|
|
554
|
+
for c in choices:
|
|
555
|
+
if not isinstance(c, dict):
|
|
556
|
+
continue
|
|
557
|
+
r = _from_message(c.get("message"))
|
|
558
|
+
if r:
|
|
559
|
+
return r
|
|
560
|
+
# Streaming-style payloads may use `delta`.
|
|
561
|
+
r = _from_message(c.get("delta"))
|
|
562
|
+
if r:
|
|
563
|
+
return r
|
|
564
|
+
|
|
565
|
+
# Some variants store a single message at the top level.
|
|
566
|
+
r = _from_message(raw.get("message"))
|
|
567
|
+
if r:
|
|
568
|
+
return r
|
|
569
|
+
|
|
570
|
+
return None
|
|
571
|
+
|
|
89
572
|
# Dict-like already
|
|
90
573
|
if isinstance(resp, dict):
|
|
91
574
|
out = _jsonable(resp)
|
|
@@ -93,6 +576,24 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
|
|
|
93
576
|
meta = out.get("metadata")
|
|
94
577
|
if isinstance(meta, dict) and "trace_id" in meta and "trace_id" not in out:
|
|
95
578
|
out["trace_id"] = meta["trace_id"]
|
|
579
|
+
# Some providers place reasoning under metadata (e.g. LM Studio gpt-oss).
|
|
580
|
+
if "reasoning" not in out and isinstance(meta, dict) and isinstance(meta.get("reasoning"), str):
|
|
581
|
+
out["reasoning"] = meta.get("reasoning")
|
|
582
|
+
if (
|
|
583
|
+
(not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip())
|
|
584
|
+
and isinstance(out.get("raw_response"), dict)
|
|
585
|
+
):
|
|
586
|
+
extracted = _extract_reasoning_from_openai_like(out.get("raw_response"))
|
|
587
|
+
if extracted:
|
|
588
|
+
out["reasoning"] = extracted
|
|
589
|
+
if (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip()) and isinstance(out.get("raw"), dict):
|
|
590
|
+
extracted = _extract_reasoning_from_openai_like(out.get("raw"))
|
|
591
|
+
if extracted:
|
|
592
|
+
out["reasoning"] = extracted
|
|
593
|
+
if (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip()) and isinstance(out.get("choices"), list):
|
|
594
|
+
extracted = _extract_reasoning_from_openai_like(out)
|
|
595
|
+
if extracted:
|
|
596
|
+
out["reasoning"] = extracted
|
|
96
597
|
return out
|
|
97
598
|
|
|
98
599
|
# Pydantic structured output
|
|
@@ -110,26 +611,166 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
|
|
|
110
611
|
|
|
111
612
|
# AbstractCore GenerateResponse
|
|
112
613
|
content = getattr(resp, "content", None)
|
|
614
|
+
raw_response = getattr(resp, "raw_response", None)
|
|
113
615
|
tool_calls = getattr(resp, "tool_calls", None)
|
|
114
616
|
usage = getattr(resp, "usage", None)
|
|
115
617
|
model = getattr(resp, "model", None)
|
|
116
618
|
finish_reason = getattr(resp, "finish_reason", None)
|
|
117
619
|
metadata = getattr(resp, "metadata", None)
|
|
620
|
+
gen_time = getattr(resp, "gen_time", None)
|
|
118
621
|
trace_id: Optional[str] = None
|
|
622
|
+
reasoning: Optional[str] = None
|
|
119
623
|
if isinstance(metadata, dict):
|
|
120
624
|
raw = metadata.get("trace_id")
|
|
121
625
|
if raw is not None:
|
|
122
626
|
trace_id = str(raw)
|
|
627
|
+
r = metadata.get("reasoning")
|
|
628
|
+
if isinstance(r, str) and r.strip():
|
|
629
|
+
reasoning = r.strip()
|
|
630
|
+
if reasoning is None and raw_response is not None:
|
|
631
|
+
extracted = _extract_reasoning_from_openai_like(_jsonable(raw_response))
|
|
632
|
+
if extracted:
|
|
633
|
+
reasoning = extracted
|
|
123
634
|
|
|
124
635
|
return {
|
|
125
636
|
"content": content,
|
|
637
|
+
"reasoning": reasoning,
|
|
126
638
|
"data": None,
|
|
639
|
+
"raw_response": _jsonable(raw_response) if raw_response is not None else None,
|
|
127
640
|
"tool_calls": _jsonable(tool_calls) if tool_calls is not None else None,
|
|
128
641
|
"usage": _jsonable(usage) if usage is not None else None,
|
|
129
642
|
"model": model,
|
|
130
643
|
"finish_reason": finish_reason,
|
|
131
644
|
"metadata": _jsonable(metadata) if metadata is not None else None,
|
|
132
645
|
"trace_id": trace_id,
|
|
646
|
+
"gen_time": float(gen_time) if isinstance(gen_time, (int, float)) else None,
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _normalize_local_streaming_response(stream: Any) -> Dict[str, Any]:
|
|
651
|
+
"""Consume an AbstractCore streaming `generate(..., stream=True)` iterator into a single JSON result.
|
|
652
|
+
|
|
653
|
+
AbstractRuntime currently persists a single effect outcome object per LLM call, so even when
|
|
654
|
+
the underlying provider streams we aggregate into one final dict and surface timing fields.
|
|
655
|
+
"""
|
|
656
|
+
import time
|
|
657
|
+
|
|
658
|
+
start_perf = time.perf_counter()
|
|
659
|
+
|
|
660
|
+
chunks: list[str] = []
|
|
661
|
+
tool_calls: Any = None
|
|
662
|
+
usage: Any = None
|
|
663
|
+
model: Optional[str] = None
|
|
664
|
+
finish_reason: Optional[str] = None
|
|
665
|
+
metadata: Dict[str, Any] = {}
|
|
666
|
+
trace_id: Optional[str] = None
|
|
667
|
+
reasoning: Optional[str] = None
|
|
668
|
+
ttft_ms: Optional[float] = None
|
|
669
|
+
|
|
670
|
+
def _maybe_capture_ttft(*, content: Any, tool_calls_value: Any, meta: Any) -> None:
|
|
671
|
+
nonlocal ttft_ms
|
|
672
|
+
if ttft_ms is not None:
|
|
673
|
+
return
|
|
674
|
+
|
|
675
|
+
if isinstance(meta, dict):
|
|
676
|
+
timing = meta.get("_timing") if isinstance(meta.get("_timing"), dict) else None
|
|
677
|
+
if isinstance(timing, dict) and isinstance(timing.get("ttft_ms"), (int, float)):
|
|
678
|
+
ttft_ms = float(timing["ttft_ms"])
|
|
679
|
+
return
|
|
680
|
+
|
|
681
|
+
has_content = isinstance(content, str) and bool(content)
|
|
682
|
+
has_tools = isinstance(tool_calls_value, list) and bool(tool_calls_value)
|
|
683
|
+
if has_content or has_tools:
|
|
684
|
+
ttft_ms = round((time.perf_counter() - start_perf) * 1000, 1)
|
|
685
|
+
|
|
686
|
+
for chunk in stream:
|
|
687
|
+
if chunk is None:
|
|
688
|
+
continue
|
|
689
|
+
|
|
690
|
+
if isinstance(chunk, dict):
|
|
691
|
+
content = chunk.get("content")
|
|
692
|
+
if isinstance(content, str) and content:
|
|
693
|
+
chunks.append(content)
|
|
694
|
+
|
|
695
|
+
tc = chunk.get("tool_calls")
|
|
696
|
+
if tc is not None:
|
|
697
|
+
tool_calls = tc
|
|
698
|
+
|
|
699
|
+
u = chunk.get("usage")
|
|
700
|
+
if u is not None:
|
|
701
|
+
usage = u
|
|
702
|
+
|
|
703
|
+
m = chunk.get("model")
|
|
704
|
+
if model is None and isinstance(m, str) and m.strip():
|
|
705
|
+
model = m.strip()
|
|
706
|
+
|
|
707
|
+
fr = chunk.get("finish_reason")
|
|
708
|
+
if fr is not None:
|
|
709
|
+
finish_reason = str(fr)
|
|
710
|
+
|
|
711
|
+
meta = chunk.get("metadata")
|
|
712
|
+
_maybe_capture_ttft(content=content, tool_calls_value=tc, meta=meta)
|
|
713
|
+
|
|
714
|
+
if isinstance(meta, dict):
|
|
715
|
+
meta_json = _jsonable(meta)
|
|
716
|
+
if isinstance(meta_json, dict):
|
|
717
|
+
metadata.update(meta_json)
|
|
718
|
+
raw_trace = meta_json.get("trace_id")
|
|
719
|
+
if trace_id is None and raw_trace is not None:
|
|
720
|
+
trace_id = str(raw_trace)
|
|
721
|
+
r = meta_json.get("reasoning")
|
|
722
|
+
if reasoning is None and isinstance(r, str) and r.strip():
|
|
723
|
+
reasoning = r.strip()
|
|
724
|
+
continue
|
|
725
|
+
|
|
726
|
+
content = getattr(chunk, "content", None)
|
|
727
|
+
if isinstance(content, str) and content:
|
|
728
|
+
chunks.append(content)
|
|
729
|
+
|
|
730
|
+
tc = getattr(chunk, "tool_calls", None)
|
|
731
|
+
if tc is not None:
|
|
732
|
+
tool_calls = tc
|
|
733
|
+
|
|
734
|
+
u = getattr(chunk, "usage", None)
|
|
735
|
+
if u is not None:
|
|
736
|
+
usage = u
|
|
737
|
+
|
|
738
|
+
m = getattr(chunk, "model", None)
|
|
739
|
+
if model is None and isinstance(m, str) and m.strip():
|
|
740
|
+
model = m.strip()
|
|
741
|
+
|
|
742
|
+
fr = getattr(chunk, "finish_reason", None)
|
|
743
|
+
if fr is not None:
|
|
744
|
+
finish_reason = str(fr)
|
|
745
|
+
|
|
746
|
+
meta = getattr(chunk, "metadata", None)
|
|
747
|
+
_maybe_capture_ttft(content=content, tool_calls_value=tc, meta=meta)
|
|
748
|
+
|
|
749
|
+
if isinstance(meta, dict):
|
|
750
|
+
meta_json = _jsonable(meta)
|
|
751
|
+
if isinstance(meta_json, dict):
|
|
752
|
+
metadata.update(meta_json)
|
|
753
|
+
raw_trace = meta_json.get("trace_id")
|
|
754
|
+
if trace_id is None and raw_trace is not None:
|
|
755
|
+
trace_id = str(raw_trace)
|
|
756
|
+
r = meta_json.get("reasoning")
|
|
757
|
+
if reasoning is None and isinstance(r, str) and r.strip():
|
|
758
|
+
reasoning = r.strip()
|
|
759
|
+
|
|
760
|
+
gen_time = round((time.perf_counter() - start_perf) * 1000, 1)
|
|
761
|
+
|
|
762
|
+
return {
|
|
763
|
+
"content": "".join(chunks),
|
|
764
|
+
"reasoning": reasoning,
|
|
765
|
+
"data": None,
|
|
766
|
+
"tool_calls": _jsonable(tool_calls) if tool_calls is not None else None,
|
|
767
|
+
"usage": _jsonable(usage) if usage is not None else None,
|
|
768
|
+
"model": model,
|
|
769
|
+
"finish_reason": finish_reason,
|
|
770
|
+
"metadata": metadata or None,
|
|
771
|
+
"trace_id": trace_id,
|
|
772
|
+
"gen_time": gen_time,
|
|
773
|
+
"ttft_ms": ttft_ms,
|
|
133
774
|
}
|
|
134
775
|
|
|
135
776
|
|
|
@@ -143,15 +784,30 @@ class LocalAbstractCoreLLMClient:
|
|
|
143
784
|
model: str,
|
|
144
785
|
llm_kwargs: Optional[Dict[str, Any]] = None,
|
|
145
786
|
):
|
|
146
|
-
|
|
787
|
+
# In this monorepo layout, `import abstractcore` can resolve to a namespace package
|
|
788
|
+
# (the outer project directory) when running from the repo root. In that case, the
|
|
789
|
+
# top-level re-export `from abstractcore import create_llm` is unavailable even though
|
|
790
|
+
# the actual module tree (e.g. `abstractcore.core.factory`) is importable.
|
|
791
|
+
#
|
|
792
|
+
# Prefer the canonical public import, but fall back to the concrete module path so
|
|
793
|
+
# in-repo tooling/tests don't depend on editable-install import ordering.
|
|
794
|
+
try:
|
|
795
|
+
from abstractcore import create_llm # type: ignore
|
|
796
|
+
except Exception: # pragma: no cover
|
|
797
|
+
from abstractcore.core.factory import create_llm # type: ignore
|
|
147
798
|
from abstractcore.tools.handler import UniversalToolHandler
|
|
148
799
|
|
|
149
800
|
self._provider = provider
|
|
150
801
|
self._model = model
|
|
802
|
+
self._generate_lock = _local_generate_lock(provider=self._provider, model=self._model)
|
|
803
|
+
if self._generate_lock is not None:
|
|
804
|
+
_warn_local_generate_lock_once(provider=self._provider, model=self._model)
|
|
151
805
|
kwargs = dict(llm_kwargs or {})
|
|
152
806
|
kwargs.setdefault("enable_tracing", True)
|
|
153
807
|
if kwargs.get("enable_tracing"):
|
|
154
|
-
|
|
808
|
+
# Keep a small in-memory ring buffer for exact request/response observability.
|
|
809
|
+
# This enables hosts (AbstractCode/AbstractFlow) to inspect trace payloads by trace_id.
|
|
810
|
+
kwargs.setdefault("max_traces", 50)
|
|
155
811
|
self._llm = create_llm(provider, model=model, **kwargs)
|
|
156
812
|
self._tool_handler = UniversalToolHandler(model)
|
|
157
813
|
|
|
@@ -162,66 +818,112 @@ class LocalAbstractCoreLLMClient:
|
|
|
162
818
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
163
819
|
system_prompt: Optional[str] = None,
|
|
164
820
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
821
|
+
media: Optional[List[Any]] = None,
|
|
165
822
|
params: Optional[Dict[str, Any]] = None,
|
|
166
823
|
) -> Dict[str, Any]:
|
|
167
824
|
params = dict(params or {})
|
|
168
825
|
|
|
826
|
+
system_prompt = _strip_system_context_header(system_prompt)
|
|
827
|
+
prompt, messages = _inject_turn_grounding(prompt=str(prompt or ""), messages=messages)
|
|
828
|
+
messages = _strip_internal_system_messages(messages)
|
|
829
|
+
|
|
830
|
+
stream_raw = params.pop("stream", None)
|
|
831
|
+
if stream_raw is None:
|
|
832
|
+
stream_raw = params.pop("streaming", None)
|
|
833
|
+
if isinstance(stream_raw, str):
|
|
834
|
+
stream = stream_raw.strip().lower() in {"1", "true", "yes", "y", "on"}
|
|
835
|
+
else:
|
|
836
|
+
stream = bool(stream_raw) if stream_raw is not None else False
|
|
837
|
+
|
|
169
838
|
# `base_url` is a provider construction concern in local mode. We intentionally
|
|
170
839
|
# do not create new providers per call unless the host explicitly chooses to.
|
|
171
840
|
params.pop("base_url", None)
|
|
841
|
+
# Reserved routing keys (used by MultiLocalAbstractCoreLLMClient).
|
|
842
|
+
params.pop("_provider", None)
|
|
843
|
+
params.pop("_model", None)
|
|
172
844
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
if callable(get_capabilities):
|
|
176
|
-
try:
|
|
177
|
-
capabilities = list(get_capabilities())
|
|
178
|
-
except Exception:
|
|
179
|
-
capabilities = []
|
|
180
|
-
supports_tools = "tools" in set(c.lower() for c in capabilities)
|
|
181
|
-
|
|
182
|
-
if tools and not supports_tools:
|
|
183
|
-
# Fallback tool calling via prompting for providers/models without native tool support.
|
|
184
|
-
from abstractcore.tools import ToolDefinition
|
|
185
|
-
|
|
186
|
-
tool_defs = [
|
|
187
|
-
ToolDefinition(
|
|
188
|
-
name=t.get("name", ""),
|
|
189
|
-
description=t.get("description", ""),
|
|
190
|
-
parameters=t.get("parameters", {}),
|
|
191
|
-
)
|
|
192
|
-
for t in tools
|
|
193
|
-
]
|
|
194
|
-
tools_prompt = self._tool_handler.format_tools_prompt(tool_defs)
|
|
195
|
-
effective_prompt = f"{tools_prompt}\n\nUser request: {prompt}"
|
|
196
|
-
|
|
845
|
+
lock = getattr(self, "_generate_lock", None)
|
|
846
|
+
if lock is None:
|
|
197
847
|
resp = self._llm.generate(
|
|
198
|
-
prompt=
|
|
848
|
+
prompt=str(prompt or ""),
|
|
199
849
|
messages=messages,
|
|
200
850
|
system_prompt=system_prompt,
|
|
201
|
-
|
|
851
|
+
tools=tools,
|
|
852
|
+
media=media,
|
|
853
|
+
stream=stream,
|
|
202
854
|
**params,
|
|
203
855
|
)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
856
|
+
if stream and hasattr(resp, "__next__"):
|
|
857
|
+
result = _normalize_local_streaming_response(resp)
|
|
858
|
+
else:
|
|
859
|
+
result = _normalize_local_response(resp)
|
|
860
|
+
result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
|
|
861
|
+
else:
|
|
862
|
+
# Serialize generation for non-thread-safe providers (e.g. MLX).
|
|
863
|
+
with lock:
|
|
864
|
+
resp = self._llm.generate(
|
|
865
|
+
prompt=str(prompt or ""),
|
|
866
|
+
messages=messages,
|
|
867
|
+
system_prompt=system_prompt,
|
|
868
|
+
tools=tools,
|
|
869
|
+
media=media,
|
|
870
|
+
stream=stream,
|
|
871
|
+
**params,
|
|
872
|
+
)
|
|
873
|
+
if stream and hasattr(resp, "__next__"):
|
|
874
|
+
result = _normalize_local_streaming_response(resp)
|
|
875
|
+
else:
|
|
876
|
+
result = _normalize_local_response(resp)
|
|
877
|
+
result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
|
|
215
878
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
879
|
+
# Durable observability: ensure a provider request payload exists even when the
|
|
880
|
+
# underlying provider does not attach `_provider_request` metadata.
|
|
881
|
+
#
|
|
882
|
+
# AbstractCode's `/llm --verbatim` expects `metadata._provider_request.payload.messages`
|
|
883
|
+
# to be present to display the exact system/user content that was sent.
|
|
884
|
+
try:
|
|
885
|
+
meta = result.get("metadata")
|
|
886
|
+
if not isinstance(meta, dict):
|
|
887
|
+
meta = {}
|
|
888
|
+
result["metadata"] = meta
|
|
889
|
+
|
|
890
|
+
if "_provider_request" not in meta:
|
|
891
|
+
out_messages: List[Dict[str, str]] = []
|
|
892
|
+
if isinstance(system_prompt, str) and system_prompt:
|
|
893
|
+
out_messages.append({"role": "system", "content": system_prompt})
|
|
894
|
+
if isinstance(messages, list) and messages:
|
|
895
|
+
# Copy dict entries defensively (caller-owned objects).
|
|
896
|
+
out_messages.extend([dict(m) for m in messages if isinstance(m, dict)])
|
|
897
|
+
|
|
898
|
+
# Append the current prompt as the final user message unless it's already present.
|
|
899
|
+
prompt_str = str(prompt or "")
|
|
900
|
+
if prompt_str:
|
|
901
|
+
last = out_messages[-1] if out_messages else None
|
|
902
|
+
if not (isinstance(last, dict) and last.get("role") == "user" and last.get("content") == prompt_str):
|
|
903
|
+
out_messages.append({"role": "user", "content": prompt_str})
|
|
904
|
+
|
|
905
|
+
payload: Dict[str, Any] = {
|
|
906
|
+
"model": str(self._model),
|
|
907
|
+
"messages": out_messages,
|
|
908
|
+
"stream": bool(stream),
|
|
909
|
+
}
|
|
910
|
+
if tools is not None:
|
|
911
|
+
payload["tools"] = tools
|
|
912
|
+
|
|
913
|
+
# Include generation params for debugging; keep JSON-safe (e.g. response_model).
|
|
914
|
+
payload["params"] = _jsonable(params) if params else {}
|
|
915
|
+
|
|
916
|
+
meta["_provider_request"] = {
|
|
917
|
+
"transport": "local",
|
|
918
|
+
"provider": str(self._provider),
|
|
919
|
+
"model": str(self._model),
|
|
920
|
+
"payload": payload,
|
|
921
|
+
}
|
|
922
|
+
except Exception:
|
|
923
|
+
# Never fail an LLM call due to observability.
|
|
924
|
+
pass
|
|
925
|
+
|
|
926
|
+
return result
|
|
225
927
|
|
|
226
928
|
def get_model_capabilities(self) -> Dict[str, Any]:
|
|
227
929
|
"""Get model capabilities including max_tokens, vision_support, etc.
|
|
@@ -231,14 +933,92 @@ class LocalAbstractCoreLLMClient:
|
|
|
231
933
|
for resource tracking and warnings.
|
|
232
934
|
|
|
233
935
|
Returns:
|
|
234
|
-
Dict with model capabilities. Always includes 'max_tokens' (default
|
|
936
|
+
Dict with model capabilities. Always includes 'max_tokens' (default: DEFAULT_MAX_TOKENS).
|
|
235
937
|
"""
|
|
236
938
|
try:
|
|
237
939
|
from abstractcore.architectures.detection import get_model_capabilities
|
|
238
940
|
return get_model_capabilities(self._model)
|
|
239
941
|
except Exception:
|
|
240
942
|
# Safe fallback if detection fails
|
|
241
|
-
|
|
943
|
+
from abstractruntime.core.vars import DEFAULT_MAX_TOKENS
|
|
944
|
+
|
|
945
|
+
return {"max_tokens": DEFAULT_MAX_TOKENS}
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
class MultiLocalAbstractCoreLLMClient:
|
|
949
|
+
"""Local AbstractCore client with per-request provider/model routing.
|
|
950
|
+
|
|
951
|
+
This keeps the same `generate(...)` signature as AbstractCoreLLMClient by
|
|
952
|
+
using reserved keys in `params`:
|
|
953
|
+
- `_provider`: override provider for this request
|
|
954
|
+
- `_model`: override model for this request
|
|
955
|
+
"""
|
|
956
|
+
|
|
957
|
+
def __init__(
|
|
958
|
+
self,
|
|
959
|
+
*,
|
|
960
|
+
provider: str,
|
|
961
|
+
model: str,
|
|
962
|
+
llm_kwargs: Optional[Dict[str, Any]] = None,
|
|
963
|
+
):
|
|
964
|
+
self._llm_kwargs = dict(llm_kwargs or {})
|
|
965
|
+
self._default_provider = provider.strip().lower()
|
|
966
|
+
self._default_model = model.strip()
|
|
967
|
+
self._clients: Dict[Tuple[str, str], LocalAbstractCoreLLMClient] = {}
|
|
968
|
+
self._default_client = self._get_client(self._default_provider, self._default_model)
|
|
969
|
+
|
|
970
|
+
# Provide a stable underlying LLM for components that need one (e.g. summarizer).
|
|
971
|
+
self._llm = getattr(self._default_client, "_llm", None)
|
|
972
|
+
|
|
973
|
+
def _get_client(self, provider: str, model: str) -> LocalAbstractCoreLLMClient:
|
|
974
|
+
key = (provider.strip().lower(), model.strip())
|
|
975
|
+
client = self._clients.get(key)
|
|
976
|
+
if client is None:
|
|
977
|
+
client = LocalAbstractCoreLLMClient(provider=key[0], model=key[1], llm_kwargs=self._llm_kwargs)
|
|
978
|
+
self._clients[key] = client
|
|
979
|
+
return client
|
|
980
|
+
|
|
981
|
+
def get_provider_instance(self, *, provider: str, model: str) -> Any:
|
|
982
|
+
"""Return the underlying AbstractCore provider instance for (provider, model)."""
|
|
983
|
+
client = self._get_client(str(provider or ""), str(model or ""))
|
|
984
|
+
return getattr(client, "_llm", None)
|
|
985
|
+
|
|
986
|
+
def list_loaded_clients(self) -> List[Tuple[str, str]]:
|
|
987
|
+
"""Return (provider, model) pairs loaded in this process (best-effort)."""
|
|
988
|
+
return list(self._clients.keys())
|
|
989
|
+
|
|
990
|
+
def generate(
|
|
991
|
+
self,
|
|
992
|
+
*,
|
|
993
|
+
prompt: str,
|
|
994
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
|
995
|
+
system_prompt: Optional[str] = None,
|
|
996
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
997
|
+
media: Optional[List[Any]] = None,
|
|
998
|
+
params: Optional[Dict[str, Any]] = None,
|
|
999
|
+
) -> Dict[str, Any]:
|
|
1000
|
+
params = dict(params or {})
|
|
1001
|
+
provider = params.pop("_provider", None)
|
|
1002
|
+
model = params.pop("_model", None)
|
|
1003
|
+
|
|
1004
|
+
provider_str = (
|
|
1005
|
+
str(provider).strip().lower() if isinstance(provider, str) and provider.strip() else self._default_provider
|
|
1006
|
+
)
|
|
1007
|
+
model_str = str(model).strip() if isinstance(model, str) and model.strip() else self._default_model
|
|
1008
|
+
|
|
1009
|
+
client = self._get_client(provider_str, model_str)
|
|
1010
|
+
return client.generate(
|
|
1011
|
+
prompt=prompt,
|
|
1012
|
+
messages=messages,
|
|
1013
|
+
system_prompt=system_prompt,
|
|
1014
|
+
tools=tools,
|
|
1015
|
+
media=media,
|
|
1016
|
+
params=params,
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
def get_model_capabilities(self) -> Dict[str, Any]:
|
|
1020
|
+
# Best-effort: use default model capabilities. Per-model limits can be added later.
|
|
1021
|
+
return self._default_client.get_model_capabilities()
|
|
242
1022
|
|
|
243
1023
|
|
|
244
1024
|
class HttpxRequestSender:
|
|
@@ -288,13 +1068,17 @@ class RemoteAbstractCoreLLMClient:
|
|
|
288
1068
|
*,
|
|
289
1069
|
server_base_url: str,
|
|
290
1070
|
model: str,
|
|
291
|
-
|
|
1071
|
+
# Runtime authority default: long-running workflow steps may legitimately take a long time.
|
|
1072
|
+
# Keep this aligned with AbstractRuntime's orchestration defaults.
|
|
1073
|
+
timeout_s: Optional[float] = None,
|
|
292
1074
|
headers: Optional[Dict[str, str]] = None,
|
|
293
1075
|
request_sender: Optional[RequestSender] = None,
|
|
294
1076
|
):
|
|
1077
|
+
from .constants import DEFAULT_LLM_TIMEOUT_S
|
|
1078
|
+
|
|
295
1079
|
self._server_base_url = server_base_url.rstrip("/")
|
|
296
1080
|
self._model = model
|
|
297
|
-
self._timeout_s = timeout_s
|
|
1081
|
+
self._timeout_s = float(timeout_s) if timeout_s is not None else DEFAULT_LLM_TIMEOUT_S
|
|
298
1082
|
self._headers = dict(headers or {})
|
|
299
1083
|
self._sender = request_sender or HttpxRequestSender()
|
|
300
1084
|
|
|
@@ -305,12 +1089,20 @@ class RemoteAbstractCoreLLMClient:
|
|
|
305
1089
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
306
1090
|
system_prompt: Optional[str] = None,
|
|
307
1091
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1092
|
+
media: Optional[List[Any]] = None,
|
|
308
1093
|
params: Optional[Dict[str, Any]] = None,
|
|
309
1094
|
) -> Dict[str, Any]:
|
|
310
1095
|
params = dict(params or {})
|
|
1096
|
+
if media:
|
|
1097
|
+
raise ValueError(
|
|
1098
|
+
"RemoteAbstractCoreLLMClient does not support media yet (artifact-backed attachments require local/hybrid execution)."
|
|
1099
|
+
)
|
|
311
1100
|
req_headers = dict(self._headers)
|
|
312
1101
|
|
|
313
1102
|
trace_metadata = params.pop("trace_metadata", None)
|
|
1103
|
+
system_prompt = _strip_system_context_header(system_prompt)
|
|
1104
|
+
prompt, messages = _inject_turn_grounding(prompt=str(prompt or ""), messages=messages)
|
|
1105
|
+
|
|
314
1106
|
if isinstance(trace_metadata, dict) and trace_metadata:
|
|
315
1107
|
req_headers["X-AbstractCore-Trace-Metadata"] = json.dumps(
|
|
316
1108
|
trace_metadata, ensure_ascii=False, separators=(",", ":")
|
|
@@ -340,6 +1132,9 @@ class RemoteAbstractCoreLLMClient:
|
|
|
340
1132
|
"model": self._model,
|
|
341
1133
|
"messages": out_messages,
|
|
342
1134
|
"stream": False,
|
|
1135
|
+
# Orchestrator policy: ask AbstractCore server to use the same timeout it expects.
|
|
1136
|
+
# This keeps runtime authority even when the actual provider call happens server-side.
|
|
1137
|
+
"timeout_s": self._timeout_s,
|
|
343
1138
|
}
|
|
344
1139
|
|
|
345
1140
|
# Dynamic routing support (AbstractCore server feature).
|
|
@@ -347,6 +1142,10 @@ class RemoteAbstractCoreLLMClient:
|
|
|
347
1142
|
if base_url:
|
|
348
1143
|
body["base_url"] = base_url
|
|
349
1144
|
|
|
1145
|
+
prompt_cache_key = params.get("prompt_cache_key")
|
|
1146
|
+
if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
|
|
1147
|
+
body["prompt_cache_key"] = prompt_cache_key.strip()
|
|
1148
|
+
|
|
350
1149
|
# Pass through common OpenAI-compatible parameters.
|
|
351
1150
|
for key in (
|
|
352
1151
|
"temperature",
|
|
@@ -357,6 +1156,20 @@ class RemoteAbstractCoreLLMClient:
|
|
|
357
1156
|
"presence_penalty",
|
|
358
1157
|
):
|
|
359
1158
|
if key in params and params[key] is not None:
|
|
1159
|
+
if key == "seed":
|
|
1160
|
+
try:
|
|
1161
|
+
seed_i = int(params[key])
|
|
1162
|
+
except Exception:
|
|
1163
|
+
continue
|
|
1164
|
+
if seed_i >= 0:
|
|
1165
|
+
body[key] = seed_i
|
|
1166
|
+
continue
|
|
1167
|
+
if key == "temperature":
|
|
1168
|
+
try:
|
|
1169
|
+
body[key] = float(params[key])
|
|
1170
|
+
except Exception:
|
|
1171
|
+
continue
|
|
1172
|
+
continue
|
|
360
1173
|
body[key] = params[key]
|
|
361
1174
|
|
|
362
1175
|
if tools is not None:
|
|
@@ -372,16 +1185,33 @@ class RemoteAbstractCoreLLMClient:
|
|
|
372
1185
|
try:
|
|
373
1186
|
choice0 = (resp.get("choices") or [])[0]
|
|
374
1187
|
msg = choice0.get("message") or {}
|
|
375
|
-
|
|
1188
|
+
meta: Dict[str, Any] = {
|
|
1189
|
+
"_provider_request": {"url": url, "payload": body}
|
|
1190
|
+
}
|
|
1191
|
+
if trace_id:
|
|
1192
|
+
meta["trace_id"] = trace_id
|
|
1193
|
+
reasoning = msg.get("reasoning")
|
|
1194
|
+
if not isinstance(reasoning, str) or not reasoning.strip():
|
|
1195
|
+
reasoning = msg.get("reasoning_content")
|
|
1196
|
+
if not isinstance(reasoning, str) or not reasoning.strip():
|
|
1197
|
+
reasoning = msg.get("thinking")
|
|
1198
|
+
if not isinstance(reasoning, str) or not reasoning.strip():
|
|
1199
|
+
reasoning = msg.get("thinking_content")
|
|
1200
|
+
result = {
|
|
376
1201
|
"content": msg.get("content"),
|
|
1202
|
+
"reasoning": reasoning,
|
|
377
1203
|
"data": None,
|
|
1204
|
+
"raw_response": _jsonable(resp) if resp is not None else None,
|
|
378
1205
|
"tool_calls": _jsonable(msg.get("tool_calls")) if msg.get("tool_calls") is not None else None,
|
|
379
1206
|
"usage": _jsonable(resp.get("usage")) if resp.get("usage") is not None else None,
|
|
380
1207
|
"model": resp.get("model"),
|
|
381
1208
|
"finish_reason": choice0.get("finish_reason"),
|
|
382
|
-
"metadata":
|
|
1209
|
+
"metadata": meta,
|
|
383
1210
|
"trace_id": trace_id,
|
|
384
1211
|
}
|
|
1212
|
+
result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
|
|
1213
|
+
|
|
1214
|
+
return result
|
|
385
1215
|
except Exception:
|
|
386
1216
|
# Fallback: return the raw response in JSON-safe form.
|
|
387
1217
|
logger.warning("Remote LLM response normalization failed; returning raw JSON")
|
|
@@ -392,6 +1222,12 @@ class RemoteAbstractCoreLLMClient:
|
|
|
392
1222
|
"usage": None,
|
|
393
1223
|
"model": resp.get("model") if isinstance(resp, dict) else None,
|
|
394
1224
|
"finish_reason": None,
|
|
395
|
-
"metadata": {
|
|
1225
|
+
"metadata": {
|
|
1226
|
+
"_provider_request": {"url": url, "payload": body},
|
|
1227
|
+
"trace_id": trace_id,
|
|
1228
|
+
}
|
|
1229
|
+
if trace_id
|
|
1230
|
+
else {"_provider_request": {"url": url, "payload": body}},
|
|
396
1231
|
"trace_id": trace_id,
|
|
1232
|
+
"raw_response": _jsonable(resp) if resp is not None else None,
|
|
397
1233
|
}
|