AbstractRuntime 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractruntime/__init__.py +76 -1
- abstractruntime/core/config.py +68 -1
- abstractruntime/core/models.py +5 -0
- abstractruntime/core/policy.py +74 -3
- abstractruntime/core/runtime.py +1002 -126
- abstractruntime/core/vars.py +8 -2
- abstractruntime/evidence/recorder.py +1 -1
- abstractruntime/history_bundle.py +772 -0
- abstractruntime/integrations/abstractcore/__init__.py +3 -0
- abstractruntime/integrations/abstractcore/default_tools.py +127 -3
- abstractruntime/integrations/abstractcore/effect_handlers.py +2440 -99
- abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
- abstractruntime/integrations/abstractcore/factory.py +68 -20
- abstractruntime/integrations/abstractcore/llm_client.py +447 -15
- abstractruntime/integrations/abstractcore/mcp_worker.py +1 -0
- abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
- abstractruntime/integrations/abstractcore/tool_executor.py +31 -10
- abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
- abstractruntime/integrations/abstractmemory/__init__.py +3 -0
- abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
- abstractruntime/memory/active_context.py +6 -1
- abstractruntime/memory/kg_packets.py +164 -0
- abstractruntime/memory/memact_composer.py +175 -0
- abstractruntime/memory/recall_levels.py +163 -0
- abstractruntime/memory/token_budget.py +86 -0
- abstractruntime/storage/__init__.py +4 -1
- abstractruntime/storage/artifacts.py +158 -30
- abstractruntime/storage/base.py +17 -1
- abstractruntime/storage/commands.py +339 -0
- abstractruntime/storage/in_memory.py +41 -1
- abstractruntime/storage/json_files.py +195 -12
- abstractruntime/storage/observable.py +38 -1
- abstractruntime/storage/offloading.py +433 -0
- abstractruntime/storage/sqlite.py +836 -0
- abstractruntime/visualflow_compiler/__init__.py +29 -0
- abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
- abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
- abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
- abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
- abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
- abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
- abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
- abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
- abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
- abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
- abstractruntime/visualflow_compiler/compiler.py +3832 -0
- abstractruntime/visualflow_compiler/flow.py +247 -0
- abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
- abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
- abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
- abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
- abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
- abstractruntime/visualflow_compiler/visual/models.py +211 -0
- abstractruntime/workflow_bundle/__init__.py +52 -0
- abstractruntime/workflow_bundle/models.py +236 -0
- abstractruntime/workflow_bundle/packer.py +317 -0
- abstractruntime/workflow_bundle/reader.py +87 -0
- abstractruntime/workflow_bundle/registry.py +587 -0
- abstractruntime-0.4.1.dist-info/METADATA +177 -0
- abstractruntime-0.4.1.dist-info/RECORD +86 -0
- abstractruntime-0.4.0.dist-info/METADATA +0 -167
- abstractruntime-0.4.0.dist-info/RECORD +0 -49
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/entry_points.txt +0 -0
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -15,14 +15,314 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
import ast
|
|
17
17
|
import json
|
|
18
|
+
import locale
|
|
19
|
+
import os
|
|
18
20
|
import re
|
|
21
|
+
import threading
|
|
19
22
|
from dataclasses import asdict, dataclass, is_dataclass
|
|
23
|
+
from datetime import datetime
|
|
20
24
|
from typing import Any, Dict, List, Optional, Protocol, Tuple
|
|
21
25
|
|
|
22
26
|
from .logging import get_logger
|
|
23
27
|
|
|
24
28
|
logger = get_logger(__name__)
|
|
25
29
|
|
|
30
|
+
_LOCAL_GENERATE_LOCKS: Dict[Tuple[str, str], threading.Lock] = {}
|
|
31
|
+
_LOCAL_GENERATE_LOCKS_LOCK = threading.Lock()
|
|
32
|
+
_LOCAL_GENERATE_LOCKS_WARNED: set[Tuple[str, str]] = set()
|
|
33
|
+
_LOCAL_GENERATE_LOCKS_WARNED_LOCK = threading.Lock()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _local_generate_lock(*, provider: str, model: str) -> Optional[threading.Lock]:
|
|
37
|
+
"""Return a process-wide generation lock for providers that are not thread-safe.
|
|
38
|
+
|
|
39
|
+
MLX/Metal can crash the process when concurrent generations occur from multiple threads
|
|
40
|
+
(e.g. gateway ticking multiple runs concurrently). We serialize MLX generation per model
|
|
41
|
+
as a safety contract.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
prov = str(provider or "").strip().lower()
|
|
45
|
+
if prov != "mlx":
|
|
46
|
+
return None
|
|
47
|
+
key = (prov, str(model or "").strip())
|
|
48
|
+
with _LOCAL_GENERATE_LOCKS_LOCK:
|
|
49
|
+
lock = _LOCAL_GENERATE_LOCKS.get(key)
|
|
50
|
+
if lock is None:
|
|
51
|
+
lock = threading.Lock()
|
|
52
|
+
_LOCAL_GENERATE_LOCKS[key] = lock
|
|
53
|
+
return lock
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _warn_local_generate_lock_once(*, provider: str, model: str) -> None:
|
|
57
|
+
prov = str(provider or "").strip().lower()
|
|
58
|
+
key = (prov, str(model or "").strip())
|
|
59
|
+
with _LOCAL_GENERATE_LOCKS_WARNED_LOCK:
|
|
60
|
+
if key in _LOCAL_GENERATE_LOCKS_WARNED:
|
|
61
|
+
return
|
|
62
|
+
_LOCAL_GENERATE_LOCKS_WARNED.add(key)
|
|
63
|
+
logger.warning(
|
|
64
|
+
"Local provider generation is serialized for safety (prevents MLX/Metal crashes under concurrency).",
|
|
65
|
+
provider=prov,
|
|
66
|
+
model=key[1],
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
_SYSTEM_CONTEXT_HEADER_RE = re.compile(
|
|
70
|
+
# ChatML-style user-turn grounding prefix, matching `chat-mlx.py` / `chat-hf.py`:
|
|
71
|
+
# "[YYYY-MM-DD HH:MM:SS CC]" (optionally followed by whitespace + user text).
|
|
72
|
+
# Backward compatible with the historical "[YYYY/MM/DD HH:MM CC]" form.
|
|
73
|
+
r"^\[\d{4}[-/]\d{2}[-/]\d{2}\s+\d{2}:\d{2}(?::\d{2})?\s+[A-Z]{2}\](?:\s|$)",
|
|
74
|
+
re.IGNORECASE,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
_LEGACY_SYSTEM_CONTEXT_HEADER_RE = re.compile(
|
|
78
|
+
r"^Grounding:\s*\d{4}/\d{2}/\d{2}\|\d{2}:\d{2}\|[A-Z]{2}$",
|
|
79
|
+
re.IGNORECASE,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
_LEGACY_SYSTEM_CONTEXT_HEADER_PARSE_RE = re.compile(
|
|
83
|
+
r"^Grounding:\s*(\d{4}/\d{2}/\d{2})\|(\d{2}:\d{2})\|([A-Z]{2})$",
|
|
84
|
+
re.IGNORECASE,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
_ZONEINFO_TAB_CANDIDATES = [
|
|
88
|
+
"/usr/share/zoneinfo/zone.tab",
|
|
89
|
+
"/usr/share/zoneinfo/zone1970.tab",
|
|
90
|
+
"/var/db/timezone/zoneinfo/zone.tab",
|
|
91
|
+
"/var/db/timezone/zoneinfo/zone1970.tab",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _detect_timezone_name() -> Optional[str]:
|
|
96
|
+
"""Best-effort IANA timezone name (e.g. 'Europe/Paris')."""
|
|
97
|
+
|
|
98
|
+
tz_env = os.environ.get("TZ")
|
|
99
|
+
if isinstance(tz_env, str):
|
|
100
|
+
tz = tz_env.strip().lstrip(":")
|
|
101
|
+
if tz and "/" in tz:
|
|
102
|
+
return tz
|
|
103
|
+
|
|
104
|
+
# Common on Debian/Ubuntu.
|
|
105
|
+
try:
|
|
106
|
+
with open("/etc/timezone", "r", encoding="utf-8", errors="ignore") as f:
|
|
107
|
+
line = f.readline().strip()
|
|
108
|
+
if line and "/" in line:
|
|
109
|
+
return line
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
# Common on macOS + many Linux distros (symlink or copied file).
|
|
114
|
+
try:
|
|
115
|
+
real = os.path.realpath("/etc/localtime")
|
|
116
|
+
except Exception:
|
|
117
|
+
real = ""
|
|
118
|
+
if real:
|
|
119
|
+
match = re.search(r"/zoneinfo/(.+)$", real)
|
|
120
|
+
if match:
|
|
121
|
+
tz = match.group(1).strip()
|
|
122
|
+
if tz and "/" in tz:
|
|
123
|
+
return tz
|
|
124
|
+
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _country_from_zone_tab(*, zone_name: str, tab_paths: Optional[List[str]] = None) -> Optional[str]:
|
|
129
|
+
"""Resolve ISO2 country code from zone.tab / zone1970.tab."""
|
|
130
|
+
zone = str(zone_name or "").strip()
|
|
131
|
+
if not zone:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
paths = list(tab_paths) if isinstance(tab_paths, list) and tab_paths else list(_ZONEINFO_TAB_CANDIDATES)
|
|
135
|
+
for tab_path in paths:
|
|
136
|
+
try:
|
|
137
|
+
with open(tab_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
138
|
+
for raw_line in f:
|
|
139
|
+
line = raw_line.strip()
|
|
140
|
+
if not line or line.startswith("#"):
|
|
141
|
+
continue
|
|
142
|
+
parts = line.split("\t")
|
|
143
|
+
if len(parts) < 3:
|
|
144
|
+
continue
|
|
145
|
+
cc_field = parts[0].strip()
|
|
146
|
+
tz_field = parts[2].strip()
|
|
147
|
+
if tz_field != zone:
|
|
148
|
+
continue
|
|
149
|
+
cc = cc_field.split(",", 1)[0].strip()
|
|
150
|
+
if len(cc) == 2 and cc.isalpha():
|
|
151
|
+
return cc.upper()
|
|
152
|
+
except Exception:
|
|
153
|
+
continue
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _detect_country() -> str:
|
|
158
|
+
"""Best-effort 2-letter country code detection.
|
|
159
|
+
|
|
160
|
+
Order:
|
|
161
|
+
1) Explicit env override: ABSTRACT_COUNTRY / ABSTRACTFRAMEWORK_COUNTRY
|
|
162
|
+
2) Locale region from `locale.getlocale()` or locale env vars (LANG/LC_ALL/LC_CTYPE)
|
|
163
|
+
3) Timezone (IANA name) via zone.tab mapping
|
|
164
|
+
|
|
165
|
+
Notes:
|
|
166
|
+
- Avoid parsing encoding-only strings like `UTF-8` as a country (a common locale env pitfall).
|
|
167
|
+
- If no reliable region is found, return `XX` (unknown).
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def _normalize_country_code(value: Optional[str]) -> Optional[str]:
|
|
171
|
+
if not isinstance(value, str):
|
|
172
|
+
return None
|
|
173
|
+
raw = value.strip()
|
|
174
|
+
if not raw:
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
base = raw.split(".", 1)[0].split("@", 1)[0].strip()
|
|
178
|
+
if len(base) == 2 and base.isalpha():
|
|
179
|
+
return base.upper()
|
|
180
|
+
|
|
181
|
+
parts = [p.strip() for p in re.split(r"[_-]", base) if p.strip()]
|
|
182
|
+
for part in parts[1:]:
|
|
183
|
+
if len(part) == 2 and part.isalpha():
|
|
184
|
+
return part.upper()
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
# Explicit override (preferred).
|
|
188
|
+
for key in ("ABSTRACT_COUNTRY", "ABSTRACTFRAMEWORK_COUNTRY"):
|
|
189
|
+
cc = _normalize_country_code(os.environ.get(key))
|
|
190
|
+
if cc is not None:
|
|
191
|
+
return cc
|
|
192
|
+
|
|
193
|
+
candidates: List[str] = []
|
|
194
|
+
try:
|
|
195
|
+
loc = locale.getlocale()[0]
|
|
196
|
+
if isinstance(loc, str) and loc.strip():
|
|
197
|
+
candidates.append(loc)
|
|
198
|
+
except Exception:
|
|
199
|
+
pass
|
|
200
|
+
|
|
201
|
+
for key in ("LC_ALL", "LANG", "LC_CTYPE"):
|
|
202
|
+
v = os.environ.get(key)
|
|
203
|
+
if isinstance(v, str) and v.strip():
|
|
204
|
+
candidates.append(v)
|
|
205
|
+
|
|
206
|
+
for cand in candidates:
|
|
207
|
+
cc = _normalize_country_code(cand)
|
|
208
|
+
if cc is not None:
|
|
209
|
+
return cc
|
|
210
|
+
|
|
211
|
+
tz_name = _detect_timezone_name()
|
|
212
|
+
if tz_name:
|
|
213
|
+
cc = _country_from_zone_tab(zone_name=tz_name)
|
|
214
|
+
if cc is not None:
|
|
215
|
+
return cc
|
|
216
|
+
|
|
217
|
+
return "XX"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _system_context_header() -> str:
|
|
221
|
+
# Use local datetime (timezone-aware) to match the user's environment.
|
|
222
|
+
# Format: "[YYYY-MM-DD HH:MM:SS CC]"
|
|
223
|
+
stamp = datetime.now().astimezone().strftime("%Y-%m-%d %H:%M:%S")
|
|
224
|
+
return f"[{stamp} {_detect_country()}]"
|
|
225
|
+
|
|
226
|
+
def _strip_system_context_header(system_prompt: Optional[str]) -> Optional[str]:
|
|
227
|
+
"""Remove a runtime-injected system-context header from the system prompt (best-effort).
|
|
228
|
+
|
|
229
|
+
Why:
|
|
230
|
+
- Historically AbstractRuntime injected a "Grounding: ..." line into the *system prompt*.
|
|
231
|
+
- Prompt/KV caching works best when stable prefixes (system/tools/history) do not contain per-turn entropy.
|
|
232
|
+
- We still want date/time/country per turn, but we inject it into the *current user turn* instead.
|
|
233
|
+
"""
|
|
234
|
+
if not isinstance(system_prompt, str):
|
|
235
|
+
return system_prompt
|
|
236
|
+
raw = system_prompt
|
|
237
|
+
lines = raw.splitlines()
|
|
238
|
+
if not lines:
|
|
239
|
+
return None
|
|
240
|
+
first = lines[0].strip()
|
|
241
|
+
if not (_LEGACY_SYSTEM_CONTEXT_HEADER_RE.match(first) or _SYSTEM_CONTEXT_HEADER_RE.match(first)):
|
|
242
|
+
return raw
|
|
243
|
+
rest = "\n".join(lines[1:]).lstrip()
|
|
244
|
+
return rest if rest else None
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _strip_internal_system_messages(messages: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
|
|
248
|
+
"""Remove internal system messages that should never leak into model outputs.
|
|
249
|
+
|
|
250
|
+
Today this is intentionally narrow and only strips the synthetic tool-activity
|
|
251
|
+
summaries that can be injected by some agent hosts:
|
|
252
|
+
"Recent tool activity (auto): ..."
|
|
253
|
+
|
|
254
|
+
Why:
|
|
255
|
+
- Some local/open models will echo system-message content verbatim.
|
|
256
|
+
- These tool-trace summaries are *operator/debug* context, not user-facing content.
|
|
257
|
+
"""
|
|
258
|
+
if not isinstance(messages, list) or not messages:
|
|
259
|
+
return messages
|
|
260
|
+
|
|
261
|
+
out: List[Dict[str, Any]] = []
|
|
262
|
+
for m in messages:
|
|
263
|
+
if not isinstance(m, dict):
|
|
264
|
+
continue
|
|
265
|
+
role = str(m.get("role") or "").strip().lower()
|
|
266
|
+
if role == "system":
|
|
267
|
+
c = m.get("content")
|
|
268
|
+
if isinstance(c, str) and c.lstrip().startswith("Recent tool activity"):
|
|
269
|
+
continue
|
|
270
|
+
out.append(dict(m))
|
|
271
|
+
|
|
272
|
+
return out or None
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _inject_turn_grounding(
|
|
276
|
+
*,
|
|
277
|
+
prompt: str,
|
|
278
|
+
messages: Optional[List[Dict[str, Any]]],
|
|
279
|
+
) -> tuple[str, Optional[List[Dict[str, Any]]]]:
|
|
280
|
+
"""Inject date/time/country into the *current user turn* (not the system prompt)."""
|
|
281
|
+
header = _system_context_header()
|
|
282
|
+
|
|
283
|
+
def _prefix_with_header(text: str) -> str:
|
|
284
|
+
"""Prefix with the current header, or rewrite a legacy `Grounding:` prefix into bracket form."""
|
|
285
|
+
if not isinstance(text, str) or not text.strip():
|
|
286
|
+
return header
|
|
287
|
+
raw = str(text)
|
|
288
|
+
first = raw.lstrip().splitlines()[0].strip()
|
|
289
|
+
if _SYSTEM_CONTEXT_HEADER_RE.match(first):
|
|
290
|
+
return raw
|
|
291
|
+
legacy = _LEGACY_SYSTEM_CONTEXT_HEADER_PARSE_RE.match(first)
|
|
292
|
+
if legacy:
|
|
293
|
+
date_part, time_part, cc = legacy.group(1), legacy.group(2), legacy.group(3).upper()
|
|
294
|
+
date_part = date_part.replace("/", "-")
|
|
295
|
+
time_part = f"{time_part}:00" if len(time_part) == 5 else time_part
|
|
296
|
+
bracket = f"[{date_part} {time_part} {cc}]"
|
|
297
|
+
rest = "\n".join(raw.lstrip().splitlines()[1:]).lstrip()
|
|
298
|
+
return f"{bracket} {rest}" if rest else bracket
|
|
299
|
+
return f"{header} {raw}"
|
|
300
|
+
|
|
301
|
+
prompt_str = str(prompt or "")
|
|
302
|
+
if prompt_str.strip():
|
|
303
|
+
return _prefix_with_header(prompt_str), messages
|
|
304
|
+
|
|
305
|
+
if isinstance(messages, list) and messages:
|
|
306
|
+
out: List[Dict[str, Any]] = []
|
|
307
|
+
for m in messages:
|
|
308
|
+
out.append(dict(m) if isinstance(m, dict) else {"role": "user", "content": str(m)})
|
|
309
|
+
|
|
310
|
+
for i in range(len(out) - 1, -1, -1):
|
|
311
|
+
role = str(out[i].get("role") or "").strip().lower()
|
|
312
|
+
if role != "user":
|
|
313
|
+
continue
|
|
314
|
+
content = out[i].get("content")
|
|
315
|
+
content_str = content if isinstance(content, str) else str(content or "")
|
|
316
|
+
out[i]["content"] = _prefix_with_header(content_str)
|
|
317
|
+
return prompt_str, out
|
|
318
|
+
|
|
319
|
+
# No user message found; append a synthetic user turn.
|
|
320
|
+
out.append({"role": "user", "content": header})
|
|
321
|
+
return prompt_str, out
|
|
322
|
+
|
|
323
|
+
# No place to inject; best-effort no-op.
|
|
324
|
+
return prompt_str, messages
|
|
325
|
+
|
|
26
326
|
|
|
27
327
|
def _maybe_parse_tool_calls_from_text(
|
|
28
328
|
*,
|
|
@@ -102,6 +402,7 @@ class AbstractCoreLLMClient(Protocol):
|
|
|
102
402
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
103
403
|
system_prompt: Optional[str] = None,
|
|
104
404
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
405
|
+
media: Optional[List[Any]] = None,
|
|
105
406
|
params: Optional[Dict[str, Any]] = None,
|
|
106
407
|
) -> Dict[str, Any]:
|
|
107
408
|
"""Return a JSON-safe dict with at least: content/tool_calls/usage/model."""
|
|
@@ -230,6 +531,44 @@ def _normalize_tool_calls(tool_calls: Any) -> Optional[List[Dict[str, Any]]]:
|
|
|
230
531
|
def _normalize_local_response(resp: Any) -> Dict[str, Any]:
|
|
231
532
|
"""Normalize an AbstractCore local `generate()` result into JSON."""
|
|
232
533
|
|
|
534
|
+
def _extract_reasoning_from_openai_like(raw: Any) -> Optional[str]:
|
|
535
|
+
"""Best-effort extraction of model reasoning from OpenAI-style payloads.
|
|
536
|
+
|
|
537
|
+
LM Studio and some providers store reasoning in `choices[].message.reasoning_content`
|
|
538
|
+
while leaving `content` empty during tool-call turns.
|
|
539
|
+
"""
|
|
540
|
+
|
|
541
|
+
def _from_message(msg: Any) -> Optional[str]:
|
|
542
|
+
if not isinstance(msg, dict):
|
|
543
|
+
return None
|
|
544
|
+
for key in ("reasoning", "reasoning_content", "thinking", "thinking_content"):
|
|
545
|
+
val = msg.get(key)
|
|
546
|
+
if isinstance(val, str) and val.strip():
|
|
547
|
+
return val.strip()
|
|
548
|
+
return None
|
|
549
|
+
|
|
550
|
+
if isinstance(raw, dict):
|
|
551
|
+
# OpenAI chat completion: choices[].message
|
|
552
|
+
choices = raw.get("choices")
|
|
553
|
+
if isinstance(choices, list):
|
|
554
|
+
for c in choices:
|
|
555
|
+
if not isinstance(c, dict):
|
|
556
|
+
continue
|
|
557
|
+
r = _from_message(c.get("message"))
|
|
558
|
+
if r:
|
|
559
|
+
return r
|
|
560
|
+
# Streaming-style payloads may use `delta`.
|
|
561
|
+
r = _from_message(c.get("delta"))
|
|
562
|
+
if r:
|
|
563
|
+
return r
|
|
564
|
+
|
|
565
|
+
# Some variants store a single message at the top level.
|
|
566
|
+
r = _from_message(raw.get("message"))
|
|
567
|
+
if r:
|
|
568
|
+
return r
|
|
569
|
+
|
|
570
|
+
return None
|
|
571
|
+
|
|
233
572
|
# Dict-like already
|
|
234
573
|
if isinstance(resp, dict):
|
|
235
574
|
out = _jsonable(resp)
|
|
@@ -240,6 +579,21 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
|
|
|
240
579
|
# Some providers place reasoning under metadata (e.g. LM Studio gpt-oss).
|
|
241
580
|
if "reasoning" not in out and isinstance(meta, dict) and isinstance(meta.get("reasoning"), str):
|
|
242
581
|
out["reasoning"] = meta.get("reasoning")
|
|
582
|
+
if (
|
|
583
|
+
(not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip())
|
|
584
|
+
and isinstance(out.get("raw_response"), dict)
|
|
585
|
+
):
|
|
586
|
+
extracted = _extract_reasoning_from_openai_like(out.get("raw_response"))
|
|
587
|
+
if extracted:
|
|
588
|
+
out["reasoning"] = extracted
|
|
589
|
+
if (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip()) and isinstance(out.get("raw"), dict):
|
|
590
|
+
extracted = _extract_reasoning_from_openai_like(out.get("raw"))
|
|
591
|
+
if extracted:
|
|
592
|
+
out["reasoning"] = extracted
|
|
593
|
+
if (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip()) and isinstance(out.get("choices"), list):
|
|
594
|
+
extracted = _extract_reasoning_from_openai_like(out)
|
|
595
|
+
if extracted:
|
|
596
|
+
out["reasoning"] = extracted
|
|
243
597
|
return out
|
|
244
598
|
|
|
245
599
|
# Pydantic structured output
|
|
@@ -273,6 +627,10 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
|
|
|
273
627
|
r = metadata.get("reasoning")
|
|
274
628
|
if isinstance(r, str) and r.strip():
|
|
275
629
|
reasoning = r.strip()
|
|
630
|
+
if reasoning is None and raw_response is not None:
|
|
631
|
+
extracted = _extract_reasoning_from_openai_like(_jsonable(raw_response))
|
|
632
|
+
if extracted:
|
|
633
|
+
reasoning = extracted
|
|
276
634
|
|
|
277
635
|
return {
|
|
278
636
|
"content": content,
|
|
@@ -441,6 +799,9 @@ class LocalAbstractCoreLLMClient:
|
|
|
441
799
|
|
|
442
800
|
self._provider = provider
|
|
443
801
|
self._model = model
|
|
802
|
+
self._generate_lock = _local_generate_lock(provider=self._provider, model=self._model)
|
|
803
|
+
if self._generate_lock is not None:
|
|
804
|
+
_warn_local_generate_lock_once(provider=self._provider, model=self._model)
|
|
444
805
|
kwargs = dict(llm_kwargs or {})
|
|
445
806
|
kwargs.setdefault("enable_tracing", True)
|
|
446
807
|
if kwargs.get("enable_tracing"):
|
|
@@ -457,10 +818,15 @@ class LocalAbstractCoreLLMClient:
|
|
|
457
818
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
458
819
|
system_prompt: Optional[str] = None,
|
|
459
820
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
821
|
+
media: Optional[List[Any]] = None,
|
|
460
822
|
params: Optional[Dict[str, Any]] = None,
|
|
461
823
|
) -> Dict[str, Any]:
|
|
462
824
|
params = dict(params or {})
|
|
463
825
|
|
|
826
|
+
system_prompt = _strip_system_context_header(system_prompt)
|
|
827
|
+
prompt, messages = _inject_turn_grounding(prompt=str(prompt or ""), messages=messages)
|
|
828
|
+
messages = _strip_internal_system_messages(messages)
|
|
829
|
+
|
|
464
830
|
stream_raw = params.pop("stream", None)
|
|
465
831
|
if stream_raw is None:
|
|
466
832
|
stream_raw = params.pop("streaming", None)
|
|
@@ -476,19 +842,39 @@ class LocalAbstractCoreLLMClient:
|
|
|
476
842
|
params.pop("_provider", None)
|
|
477
843
|
params.pop("_model", None)
|
|
478
844
|
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
845
|
+
lock = getattr(self, "_generate_lock", None)
|
|
846
|
+
if lock is None:
|
|
847
|
+
resp = self._llm.generate(
|
|
848
|
+
prompt=str(prompt or ""),
|
|
849
|
+
messages=messages,
|
|
850
|
+
system_prompt=system_prompt,
|
|
851
|
+
tools=tools,
|
|
852
|
+
media=media,
|
|
853
|
+
stream=stream,
|
|
854
|
+
**params,
|
|
855
|
+
)
|
|
856
|
+
if stream and hasattr(resp, "__next__"):
|
|
857
|
+
result = _normalize_local_streaming_response(resp)
|
|
858
|
+
else:
|
|
859
|
+
result = _normalize_local_response(resp)
|
|
860
|
+
result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
|
|
489
861
|
else:
|
|
490
|
-
|
|
491
|
-
|
|
862
|
+
# Serialize generation for non-thread-safe providers (e.g. MLX).
|
|
863
|
+
with lock:
|
|
864
|
+
resp = self._llm.generate(
|
|
865
|
+
prompt=str(prompt or ""),
|
|
866
|
+
messages=messages,
|
|
867
|
+
system_prompt=system_prompt,
|
|
868
|
+
tools=tools,
|
|
869
|
+
media=media,
|
|
870
|
+
stream=stream,
|
|
871
|
+
**params,
|
|
872
|
+
)
|
|
873
|
+
if stream and hasattr(resp, "__next__"):
|
|
874
|
+
result = _normalize_local_streaming_response(resp)
|
|
875
|
+
else:
|
|
876
|
+
result = _normalize_local_response(resp)
|
|
877
|
+
result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
|
|
492
878
|
|
|
493
879
|
# Durable observability: ensure a provider request payload exists even when the
|
|
494
880
|
# underlying provider does not attach `_provider_request` metadata.
|
|
@@ -547,14 +933,16 @@ class LocalAbstractCoreLLMClient:
|
|
|
547
933
|
for resource tracking and warnings.
|
|
548
934
|
|
|
549
935
|
Returns:
|
|
550
|
-
Dict with model capabilities. Always includes 'max_tokens' (default
|
|
936
|
+
Dict with model capabilities. Always includes 'max_tokens' (default: DEFAULT_MAX_TOKENS).
|
|
551
937
|
"""
|
|
552
938
|
try:
|
|
553
939
|
from abstractcore.architectures.detection import get_model_capabilities
|
|
554
940
|
return get_model_capabilities(self._model)
|
|
555
941
|
except Exception:
|
|
556
942
|
# Safe fallback if detection fails
|
|
557
|
-
|
|
943
|
+
from abstractruntime.core.vars import DEFAULT_MAX_TOKENS
|
|
944
|
+
|
|
945
|
+
return {"max_tokens": DEFAULT_MAX_TOKENS}
|
|
558
946
|
|
|
559
947
|
|
|
560
948
|
class MultiLocalAbstractCoreLLMClient:
|
|
@@ -590,6 +978,15 @@ class MultiLocalAbstractCoreLLMClient:
|
|
|
590
978
|
self._clients[key] = client
|
|
591
979
|
return client
|
|
592
980
|
|
|
981
|
+
def get_provider_instance(self, *, provider: str, model: str) -> Any:
|
|
982
|
+
"""Return the underlying AbstractCore provider instance for (provider, model)."""
|
|
983
|
+
client = self._get_client(str(provider or ""), str(model or ""))
|
|
984
|
+
return getattr(client, "_llm", None)
|
|
985
|
+
|
|
986
|
+
def list_loaded_clients(self) -> List[Tuple[str, str]]:
|
|
987
|
+
"""Return (provider, model) pairs loaded in this process (best-effort)."""
|
|
988
|
+
return list(self._clients.keys())
|
|
989
|
+
|
|
593
990
|
def generate(
|
|
594
991
|
self,
|
|
595
992
|
*,
|
|
@@ -597,6 +994,7 @@ class MultiLocalAbstractCoreLLMClient:
|
|
|
597
994
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
598
995
|
system_prompt: Optional[str] = None,
|
|
599
996
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
997
|
+
media: Optional[List[Any]] = None,
|
|
600
998
|
params: Optional[Dict[str, Any]] = None,
|
|
601
999
|
) -> Dict[str, Any]:
|
|
602
1000
|
params = dict(params or {})
|
|
@@ -614,6 +1012,7 @@ class MultiLocalAbstractCoreLLMClient:
|
|
|
614
1012
|
messages=messages,
|
|
615
1013
|
system_prompt=system_prompt,
|
|
616
1014
|
tools=tools,
|
|
1015
|
+
media=media,
|
|
617
1016
|
params=params,
|
|
618
1017
|
)
|
|
619
1018
|
|
|
@@ -690,12 +1089,20 @@ class RemoteAbstractCoreLLMClient:
|
|
|
690
1089
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
691
1090
|
system_prompt: Optional[str] = None,
|
|
692
1091
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1092
|
+
media: Optional[List[Any]] = None,
|
|
693
1093
|
params: Optional[Dict[str, Any]] = None,
|
|
694
1094
|
) -> Dict[str, Any]:
|
|
695
1095
|
params = dict(params or {})
|
|
1096
|
+
if media:
|
|
1097
|
+
raise ValueError(
|
|
1098
|
+
"RemoteAbstractCoreLLMClient does not support media yet (artifact-backed attachments require local/hybrid execution)."
|
|
1099
|
+
)
|
|
696
1100
|
req_headers = dict(self._headers)
|
|
697
1101
|
|
|
698
1102
|
trace_metadata = params.pop("trace_metadata", None)
|
|
1103
|
+
system_prompt = _strip_system_context_header(system_prompt)
|
|
1104
|
+
prompt, messages = _inject_turn_grounding(prompt=str(prompt or ""), messages=messages)
|
|
1105
|
+
|
|
699
1106
|
if isinstance(trace_metadata, dict) and trace_metadata:
|
|
700
1107
|
req_headers["X-AbstractCore-Trace-Metadata"] = json.dumps(
|
|
701
1108
|
trace_metadata, ensure_ascii=False, separators=(",", ":")
|
|
@@ -735,6 +1142,10 @@ class RemoteAbstractCoreLLMClient:
|
|
|
735
1142
|
if base_url:
|
|
736
1143
|
body["base_url"] = base_url
|
|
737
1144
|
|
|
1145
|
+
prompt_cache_key = params.get("prompt_cache_key")
|
|
1146
|
+
if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
|
|
1147
|
+
body["prompt_cache_key"] = prompt_cache_key.strip()
|
|
1148
|
+
|
|
738
1149
|
# Pass through common OpenAI-compatible parameters.
|
|
739
1150
|
for key in (
|
|
740
1151
|
"temperature",
|
|
@@ -745,6 +1156,20 @@ class RemoteAbstractCoreLLMClient:
|
|
|
745
1156
|
"presence_penalty",
|
|
746
1157
|
):
|
|
747
1158
|
if key in params and params[key] is not None:
|
|
1159
|
+
if key == "seed":
|
|
1160
|
+
try:
|
|
1161
|
+
seed_i = int(params[key])
|
|
1162
|
+
except Exception:
|
|
1163
|
+
continue
|
|
1164
|
+
if seed_i >= 0:
|
|
1165
|
+
body[key] = seed_i
|
|
1166
|
+
continue
|
|
1167
|
+
if key == "temperature":
|
|
1168
|
+
try:
|
|
1169
|
+
body[key] = float(params[key])
|
|
1170
|
+
except Exception:
|
|
1171
|
+
continue
|
|
1172
|
+
continue
|
|
748
1173
|
body[key] = params[key]
|
|
749
1174
|
|
|
750
1175
|
if tools is not None:
|
|
@@ -765,9 +1190,16 @@ class RemoteAbstractCoreLLMClient:
|
|
|
765
1190
|
}
|
|
766
1191
|
if trace_id:
|
|
767
1192
|
meta["trace_id"] = trace_id
|
|
1193
|
+
reasoning = msg.get("reasoning")
|
|
1194
|
+
if not isinstance(reasoning, str) or not reasoning.strip():
|
|
1195
|
+
reasoning = msg.get("reasoning_content")
|
|
1196
|
+
if not isinstance(reasoning, str) or not reasoning.strip():
|
|
1197
|
+
reasoning = msg.get("thinking")
|
|
1198
|
+
if not isinstance(reasoning, str) or not reasoning.strip():
|
|
1199
|
+
reasoning = msg.get("thinking_content")
|
|
768
1200
|
result = {
|
|
769
1201
|
"content": msg.get("content"),
|
|
770
|
-
"reasoning":
|
|
1202
|
+
"reasoning": reasoning,
|
|
771
1203
|
"data": None,
|
|
772
1204
|
"raw_response": _jsonable(resp) if resp is not None else None,
|
|
773
1205
|
"tool_calls": _jsonable(msg.get("tool_calls")) if msg.get("tool_calls") is not None else None,
|