AbstractRuntime 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. abstractruntime/__init__.py +76 -1
  2. abstractruntime/core/config.py +68 -1
  3. abstractruntime/core/models.py +5 -0
  4. abstractruntime/core/policy.py +74 -3
  5. abstractruntime/core/runtime.py +1002 -126
  6. abstractruntime/core/vars.py +8 -2
  7. abstractruntime/evidence/recorder.py +1 -1
  8. abstractruntime/history_bundle.py +772 -0
  9. abstractruntime/integrations/abstractcore/__init__.py +3 -0
  10. abstractruntime/integrations/abstractcore/default_tools.py +127 -3
  11. abstractruntime/integrations/abstractcore/effect_handlers.py +2440 -99
  12. abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
  13. abstractruntime/integrations/abstractcore/factory.py +68 -20
  14. abstractruntime/integrations/abstractcore/llm_client.py +447 -15
  15. abstractruntime/integrations/abstractcore/mcp_worker.py +1 -0
  16. abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
  17. abstractruntime/integrations/abstractcore/tool_executor.py +31 -10
  18. abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
  19. abstractruntime/integrations/abstractmemory/__init__.py +3 -0
  20. abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
  21. abstractruntime/memory/active_context.py +6 -1
  22. abstractruntime/memory/kg_packets.py +164 -0
  23. abstractruntime/memory/memact_composer.py +175 -0
  24. abstractruntime/memory/recall_levels.py +163 -0
  25. abstractruntime/memory/token_budget.py +86 -0
  26. abstractruntime/storage/__init__.py +4 -1
  27. abstractruntime/storage/artifacts.py +158 -30
  28. abstractruntime/storage/base.py +17 -1
  29. abstractruntime/storage/commands.py +339 -0
  30. abstractruntime/storage/in_memory.py +41 -1
  31. abstractruntime/storage/json_files.py +195 -12
  32. abstractruntime/storage/observable.py +38 -1
  33. abstractruntime/storage/offloading.py +433 -0
  34. abstractruntime/storage/sqlite.py +836 -0
  35. abstractruntime/visualflow_compiler/__init__.py +29 -0
  36. abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
  37. abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
  38. abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
  39. abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
  40. abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
  41. abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
  42. abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
  43. abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
  44. abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
  45. abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
  46. abstractruntime/visualflow_compiler/compiler.py +3832 -0
  47. abstractruntime/visualflow_compiler/flow.py +247 -0
  48. abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
  49. abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
  50. abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
  51. abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
  52. abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
  53. abstractruntime/visualflow_compiler/visual/models.py +211 -0
  54. abstractruntime/workflow_bundle/__init__.py +52 -0
  55. abstractruntime/workflow_bundle/models.py +236 -0
  56. abstractruntime/workflow_bundle/packer.py +317 -0
  57. abstractruntime/workflow_bundle/reader.py +87 -0
  58. abstractruntime/workflow_bundle/registry.py +587 -0
  59. abstractruntime-0.4.1.dist-info/METADATA +177 -0
  60. abstractruntime-0.4.1.dist-info/RECORD +86 -0
  61. abstractruntime-0.4.0.dist-info/METADATA +0 -167
  62. abstractruntime-0.4.0.dist-info/RECORD +0 -49
  63. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
  64. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/entry_points.txt +0 -0
  65. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -15,14 +15,314 @@ from __future__ import annotations
15
15
 
16
16
  import ast
17
17
  import json
18
+ import locale
19
+ import os
18
20
  import re
21
+ import threading
19
22
  from dataclasses import asdict, dataclass, is_dataclass
23
+ from datetime import datetime
20
24
  from typing import Any, Dict, List, Optional, Protocol, Tuple
21
25
 
22
26
  from .logging import get_logger
23
27
 
24
28
  logger = get_logger(__name__)
25
29
 
30
+ _LOCAL_GENERATE_LOCKS: Dict[Tuple[str, str], threading.Lock] = {}
31
+ _LOCAL_GENERATE_LOCKS_LOCK = threading.Lock()
32
+ _LOCAL_GENERATE_LOCKS_WARNED: set[Tuple[str, str]] = set()
33
+ _LOCAL_GENERATE_LOCKS_WARNED_LOCK = threading.Lock()
34
+
35
+
36
+ def _local_generate_lock(*, provider: str, model: str) -> Optional[threading.Lock]:
37
+ """Return a process-wide generation lock for providers that are not thread-safe.
38
+
39
+ MLX/Metal can crash the process when concurrent generations occur from multiple threads
40
+ (e.g. gateway ticking multiple runs concurrently). We serialize MLX generation per model
41
+ as a safety contract.
42
+ """
43
+
44
+ prov = str(provider or "").strip().lower()
45
+ if prov != "mlx":
46
+ return None
47
+ key = (prov, str(model or "").strip())
48
+ with _LOCAL_GENERATE_LOCKS_LOCK:
49
+ lock = _LOCAL_GENERATE_LOCKS.get(key)
50
+ if lock is None:
51
+ lock = threading.Lock()
52
+ _LOCAL_GENERATE_LOCKS[key] = lock
53
+ return lock
54
+
55
+
56
+ def _warn_local_generate_lock_once(*, provider: str, model: str) -> None:
57
+ prov = str(provider or "").strip().lower()
58
+ key = (prov, str(model or "").strip())
59
+ with _LOCAL_GENERATE_LOCKS_WARNED_LOCK:
60
+ if key in _LOCAL_GENERATE_LOCKS_WARNED:
61
+ return
62
+ _LOCAL_GENERATE_LOCKS_WARNED.add(key)
63
+ logger.warning(
64
+ "Local provider generation is serialized for safety (prevents MLX/Metal crashes under concurrency).",
65
+ provider=prov,
66
+ model=key[1],
67
+ )
68
+
69
+ _SYSTEM_CONTEXT_HEADER_RE = re.compile(
70
+ # ChatML-style user-turn grounding prefix, matching `chat-mlx.py` / `chat-hf.py`:
71
+ # "[YYYY-MM-DD HH:MM:SS CC]" (optionally followed by whitespace + user text).
72
+ # Backward compatible with the historical "[YYYY/MM/DD HH:MM CC]" form.
73
+ r"^\[\d{4}[-/]\d{2}[-/]\d{2}\s+\d{2}:\d{2}(?::\d{2})?\s+[A-Z]{2}\](?:\s|$)",
74
+ re.IGNORECASE,
75
+ )
76
+
77
+ _LEGACY_SYSTEM_CONTEXT_HEADER_RE = re.compile(
78
+ r"^Grounding:\s*\d{4}/\d{2}/\d{2}\|\d{2}:\d{2}\|[A-Z]{2}$",
79
+ re.IGNORECASE,
80
+ )
81
+
82
+ _LEGACY_SYSTEM_CONTEXT_HEADER_PARSE_RE = re.compile(
83
+ r"^Grounding:\s*(\d{4}/\d{2}/\d{2})\|(\d{2}:\d{2})\|([A-Z]{2})$",
84
+ re.IGNORECASE,
85
+ )
86
+
87
+ _ZONEINFO_TAB_CANDIDATES = [
88
+ "/usr/share/zoneinfo/zone.tab",
89
+ "/usr/share/zoneinfo/zone1970.tab",
90
+ "/var/db/timezone/zoneinfo/zone.tab",
91
+ "/var/db/timezone/zoneinfo/zone1970.tab",
92
+ ]
93
+
94
+
95
+ def _detect_timezone_name() -> Optional[str]:
96
+ """Best-effort IANA timezone name (e.g. 'Europe/Paris')."""
97
+
98
+ tz_env = os.environ.get("TZ")
99
+ if isinstance(tz_env, str):
100
+ tz = tz_env.strip().lstrip(":")
101
+ if tz and "/" in tz:
102
+ return tz
103
+
104
+ # Common on Debian/Ubuntu.
105
+ try:
106
+ with open("/etc/timezone", "r", encoding="utf-8", errors="ignore") as f:
107
+ line = f.readline().strip()
108
+ if line and "/" in line:
109
+ return line
110
+ except Exception:
111
+ pass
112
+
113
+ # Common on macOS + many Linux distros (symlink or copied file).
114
+ try:
115
+ real = os.path.realpath("/etc/localtime")
116
+ except Exception:
117
+ real = ""
118
+ if real:
119
+ match = re.search(r"/zoneinfo/(.+)$", real)
120
+ if match:
121
+ tz = match.group(1).strip()
122
+ if tz and "/" in tz:
123
+ return tz
124
+
125
+ return None
126
+
127
+
128
+ def _country_from_zone_tab(*, zone_name: str, tab_paths: Optional[List[str]] = None) -> Optional[str]:
129
+ """Resolve ISO2 country code from zone.tab / zone1970.tab."""
130
+ zone = str(zone_name or "").strip()
131
+ if not zone:
132
+ return None
133
+
134
+ paths = list(tab_paths) if isinstance(tab_paths, list) and tab_paths else list(_ZONEINFO_TAB_CANDIDATES)
135
+ for tab_path in paths:
136
+ try:
137
+ with open(tab_path, "r", encoding="utf-8", errors="ignore") as f:
138
+ for raw_line in f:
139
+ line = raw_line.strip()
140
+ if not line or line.startswith("#"):
141
+ continue
142
+ parts = line.split("\t")
143
+ if len(parts) < 3:
144
+ continue
145
+ cc_field = parts[0].strip()
146
+ tz_field = parts[2].strip()
147
+ if tz_field != zone:
148
+ continue
149
+ cc = cc_field.split(",", 1)[0].strip()
150
+ if len(cc) == 2 and cc.isalpha():
151
+ return cc.upper()
152
+ except Exception:
153
+ continue
154
+ return None
155
+
156
+
157
+ def _detect_country() -> str:
158
+ """Best-effort 2-letter country code detection.
159
+
160
+ Order:
161
+ 1) Explicit env override: ABSTRACT_COUNTRY / ABSTRACTFRAMEWORK_COUNTRY
162
+ 2) Locale region from `locale.getlocale()` or locale env vars (LANG/LC_ALL/LC_CTYPE)
163
+ 3) Timezone (IANA name) via zone.tab mapping
164
+
165
+ Notes:
166
+ - Avoid parsing encoding-only strings like `UTF-8` as a country (a common locale env pitfall).
167
+ - If no reliable region is found, return `XX` (unknown).
168
+ """
169
+
170
+ def _normalize_country_code(value: Optional[str]) -> Optional[str]:
171
+ if not isinstance(value, str):
172
+ return None
173
+ raw = value.strip()
174
+ if not raw:
175
+ return None
176
+
177
+ base = raw.split(".", 1)[0].split("@", 1)[0].strip()
178
+ if len(base) == 2 and base.isalpha():
179
+ return base.upper()
180
+
181
+ parts = [p.strip() for p in re.split(r"[_-]", base) if p.strip()]
182
+ for part in parts[1:]:
183
+ if len(part) == 2 and part.isalpha():
184
+ return part.upper()
185
+ return None
186
+
187
+ # Explicit override (preferred).
188
+ for key in ("ABSTRACT_COUNTRY", "ABSTRACTFRAMEWORK_COUNTRY"):
189
+ cc = _normalize_country_code(os.environ.get(key))
190
+ if cc is not None:
191
+ return cc
192
+
193
+ candidates: List[str] = []
194
+ try:
195
+ loc = locale.getlocale()[0]
196
+ if isinstance(loc, str) and loc.strip():
197
+ candidates.append(loc)
198
+ except Exception:
199
+ pass
200
+
201
+ for key in ("LC_ALL", "LANG", "LC_CTYPE"):
202
+ v = os.environ.get(key)
203
+ if isinstance(v, str) and v.strip():
204
+ candidates.append(v)
205
+
206
+ for cand in candidates:
207
+ cc = _normalize_country_code(cand)
208
+ if cc is not None:
209
+ return cc
210
+
211
+ tz_name = _detect_timezone_name()
212
+ if tz_name:
213
+ cc = _country_from_zone_tab(zone_name=tz_name)
214
+ if cc is not None:
215
+ return cc
216
+
217
+ return "XX"
218
+
219
+
220
+ def _system_context_header() -> str:
221
+ # Use local datetime (timezone-aware) to match the user's environment.
222
+ # Format: "[YYYY-MM-DD HH:MM:SS CC]"
223
+ stamp = datetime.now().astimezone().strftime("%Y-%m-%d %H:%M:%S")
224
+ return f"[{stamp} {_detect_country()}]"
225
+
226
+ def _strip_system_context_header(system_prompt: Optional[str]) -> Optional[str]:
227
+ """Remove a runtime-injected system-context header from the system prompt (best-effort).
228
+
229
+ Why:
230
+ - Historically AbstractRuntime injected a "Grounding: ..." line into the *system prompt*.
231
+ - Prompt/KV caching works best when stable prefixes (system/tools/history) do not contain per-turn entropy.
232
+ - We still want date/time/country per turn, but we inject it into the *current user turn* instead.
233
+ """
234
+ if not isinstance(system_prompt, str):
235
+ return system_prompt
236
+ raw = system_prompt
237
+ lines = raw.splitlines()
238
+ if not lines:
239
+ return None
240
+ first = lines[0].strip()
241
+ if not (_LEGACY_SYSTEM_CONTEXT_HEADER_RE.match(first) or _SYSTEM_CONTEXT_HEADER_RE.match(first)):
242
+ return raw
243
+ rest = "\n".join(lines[1:]).lstrip()
244
+ return rest if rest else None
245
+
246
+
247
+ def _strip_internal_system_messages(messages: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
248
+ """Remove internal system messages that should never leak into model outputs.
249
+
250
+ Today this is intentionally narrow and only strips the synthetic tool-activity
251
+ summaries that can be injected by some agent hosts:
252
+ "Recent tool activity (auto): ..."
253
+
254
+ Why:
255
+ - Some local/open models will echo system-message content verbatim.
256
+ - These tool-trace summaries are *operator/debug* context, not user-facing content.
257
+ """
258
+ if not isinstance(messages, list) or not messages:
259
+ return messages
260
+
261
+ out: List[Dict[str, Any]] = []
262
+ for m in messages:
263
+ if not isinstance(m, dict):
264
+ continue
265
+ role = str(m.get("role") or "").strip().lower()
266
+ if role == "system":
267
+ c = m.get("content")
268
+ if isinstance(c, str) and c.lstrip().startswith("Recent tool activity"):
269
+ continue
270
+ out.append(dict(m))
271
+
272
+ return out or None
273
+
274
+
275
+ def _inject_turn_grounding(
276
+ *,
277
+ prompt: str,
278
+ messages: Optional[List[Dict[str, Any]]],
279
+ ) -> tuple[str, Optional[List[Dict[str, Any]]]]:
280
+ """Inject date/time/country into the *current user turn* (not the system prompt)."""
281
+ header = _system_context_header()
282
+
283
+ def _prefix_with_header(text: str) -> str:
284
+ """Prefix with the current header, or rewrite a legacy `Grounding:` prefix into bracket form."""
285
+ if not isinstance(text, str) or not text.strip():
286
+ return header
287
+ raw = str(text)
288
+ first = raw.lstrip().splitlines()[0].strip()
289
+ if _SYSTEM_CONTEXT_HEADER_RE.match(first):
290
+ return raw
291
+ legacy = _LEGACY_SYSTEM_CONTEXT_HEADER_PARSE_RE.match(first)
292
+ if legacy:
293
+ date_part, time_part, cc = legacy.group(1), legacy.group(2), legacy.group(3).upper()
294
+ date_part = date_part.replace("/", "-")
295
+ time_part = f"{time_part}:00" if len(time_part) == 5 else time_part
296
+ bracket = f"[{date_part} {time_part} {cc}]"
297
+ rest = "\n".join(raw.lstrip().splitlines()[1:]).lstrip()
298
+ return f"{bracket} {rest}" if rest else bracket
299
+ return f"{header} {raw}"
300
+
301
+ prompt_str = str(prompt or "")
302
+ if prompt_str.strip():
303
+ return _prefix_with_header(prompt_str), messages
304
+
305
+ if isinstance(messages, list) and messages:
306
+ out: List[Dict[str, Any]] = []
307
+ for m in messages:
308
+ out.append(dict(m) if isinstance(m, dict) else {"role": "user", "content": str(m)})
309
+
310
+ for i in range(len(out) - 1, -1, -1):
311
+ role = str(out[i].get("role") or "").strip().lower()
312
+ if role != "user":
313
+ continue
314
+ content = out[i].get("content")
315
+ content_str = content if isinstance(content, str) else str(content or "")
316
+ out[i]["content"] = _prefix_with_header(content_str)
317
+ return prompt_str, out
318
+
319
+ # No user message found; append a synthetic user turn.
320
+ out.append({"role": "user", "content": header})
321
+ return prompt_str, out
322
+
323
+ # No place to inject; best-effort no-op.
324
+ return prompt_str, messages
325
+
26
326
 
27
327
  def _maybe_parse_tool_calls_from_text(
28
328
  *,
@@ -102,6 +402,7 @@ class AbstractCoreLLMClient(Protocol):
102
402
  messages: Optional[List[Dict[str, str]]] = None,
103
403
  system_prompt: Optional[str] = None,
104
404
  tools: Optional[List[Dict[str, Any]]] = None,
405
+ media: Optional[List[Any]] = None,
105
406
  params: Optional[Dict[str, Any]] = None,
106
407
  ) -> Dict[str, Any]:
107
408
  """Return a JSON-safe dict with at least: content/tool_calls/usage/model."""
@@ -230,6 +531,44 @@ def _normalize_tool_calls(tool_calls: Any) -> Optional[List[Dict[str, Any]]]:
230
531
  def _normalize_local_response(resp: Any) -> Dict[str, Any]:
231
532
  """Normalize an AbstractCore local `generate()` result into JSON."""
232
533
 
534
+ def _extract_reasoning_from_openai_like(raw: Any) -> Optional[str]:
535
+ """Best-effort extraction of model reasoning from OpenAI-style payloads.
536
+
537
+ LM Studio and some providers store reasoning in `choices[].message.reasoning_content`
538
+ while leaving `content` empty during tool-call turns.
539
+ """
540
+
541
+ def _from_message(msg: Any) -> Optional[str]:
542
+ if not isinstance(msg, dict):
543
+ return None
544
+ for key in ("reasoning", "reasoning_content", "thinking", "thinking_content"):
545
+ val = msg.get(key)
546
+ if isinstance(val, str) and val.strip():
547
+ return val.strip()
548
+ return None
549
+
550
+ if isinstance(raw, dict):
551
+ # OpenAI chat completion: choices[].message
552
+ choices = raw.get("choices")
553
+ if isinstance(choices, list):
554
+ for c in choices:
555
+ if not isinstance(c, dict):
556
+ continue
557
+ r = _from_message(c.get("message"))
558
+ if r:
559
+ return r
560
+ # Streaming-style payloads may use `delta`.
561
+ r = _from_message(c.get("delta"))
562
+ if r:
563
+ return r
564
+
565
+ # Some variants store a single message at the top level.
566
+ r = _from_message(raw.get("message"))
567
+ if r:
568
+ return r
569
+
570
+ return None
571
+
233
572
  # Dict-like already
234
573
  if isinstance(resp, dict):
235
574
  out = _jsonable(resp)
@@ -240,6 +579,21 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
240
579
  # Some providers place reasoning under metadata (e.g. LM Studio gpt-oss).
241
580
  if "reasoning" not in out and isinstance(meta, dict) and isinstance(meta.get("reasoning"), str):
242
581
  out["reasoning"] = meta.get("reasoning")
582
+ if (
583
+ (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip())
584
+ and isinstance(out.get("raw_response"), dict)
585
+ ):
586
+ extracted = _extract_reasoning_from_openai_like(out.get("raw_response"))
587
+ if extracted:
588
+ out["reasoning"] = extracted
589
+ if (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip()) and isinstance(out.get("raw"), dict):
590
+ extracted = _extract_reasoning_from_openai_like(out.get("raw"))
591
+ if extracted:
592
+ out["reasoning"] = extracted
593
+ if (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip()) and isinstance(out.get("choices"), list):
594
+ extracted = _extract_reasoning_from_openai_like(out)
595
+ if extracted:
596
+ out["reasoning"] = extracted
243
597
  return out
244
598
 
245
599
  # Pydantic structured output
@@ -273,6 +627,10 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
273
627
  r = metadata.get("reasoning")
274
628
  if isinstance(r, str) and r.strip():
275
629
  reasoning = r.strip()
630
+ if reasoning is None and raw_response is not None:
631
+ extracted = _extract_reasoning_from_openai_like(_jsonable(raw_response))
632
+ if extracted:
633
+ reasoning = extracted
276
634
 
277
635
  return {
278
636
  "content": content,
@@ -441,6 +799,9 @@ class LocalAbstractCoreLLMClient:
441
799
 
442
800
  self._provider = provider
443
801
  self._model = model
802
+ self._generate_lock = _local_generate_lock(provider=self._provider, model=self._model)
803
+ if self._generate_lock is not None:
804
+ _warn_local_generate_lock_once(provider=self._provider, model=self._model)
444
805
  kwargs = dict(llm_kwargs or {})
445
806
  kwargs.setdefault("enable_tracing", True)
446
807
  if kwargs.get("enable_tracing"):
@@ -457,10 +818,15 @@ class LocalAbstractCoreLLMClient:
457
818
  messages: Optional[List[Dict[str, str]]] = None,
458
819
  system_prompt: Optional[str] = None,
459
820
  tools: Optional[List[Dict[str, Any]]] = None,
821
+ media: Optional[List[Any]] = None,
460
822
  params: Optional[Dict[str, Any]] = None,
461
823
  ) -> Dict[str, Any]:
462
824
  params = dict(params or {})
463
825
 
826
+ system_prompt = _strip_system_context_header(system_prompt)
827
+ prompt, messages = _inject_turn_grounding(prompt=str(prompt or ""), messages=messages)
828
+ messages = _strip_internal_system_messages(messages)
829
+
464
830
  stream_raw = params.pop("stream", None)
465
831
  if stream_raw is None:
466
832
  stream_raw = params.pop("streaming", None)
@@ -476,19 +842,39 @@ class LocalAbstractCoreLLMClient:
476
842
  params.pop("_provider", None)
477
843
  params.pop("_model", None)
478
844
 
479
- resp = self._llm.generate(
480
- prompt=str(prompt or ""),
481
- messages=messages,
482
- system_prompt=system_prompt,
483
- tools=tools,
484
- stream=stream,
485
- **params,
486
- )
487
- if stream and hasattr(resp, "__next__"):
488
- result = _normalize_local_streaming_response(resp)
845
+ lock = getattr(self, "_generate_lock", None)
846
+ if lock is None:
847
+ resp = self._llm.generate(
848
+ prompt=str(prompt or ""),
849
+ messages=messages,
850
+ system_prompt=system_prompt,
851
+ tools=tools,
852
+ media=media,
853
+ stream=stream,
854
+ **params,
855
+ )
856
+ if stream and hasattr(resp, "__next__"):
857
+ result = _normalize_local_streaming_response(resp)
858
+ else:
859
+ result = _normalize_local_response(resp)
860
+ result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
489
861
  else:
490
- result = _normalize_local_response(resp)
491
- result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
862
+ # Serialize generation for non-thread-safe providers (e.g. MLX).
863
+ with lock:
864
+ resp = self._llm.generate(
865
+ prompt=str(prompt or ""),
866
+ messages=messages,
867
+ system_prompt=system_prompt,
868
+ tools=tools,
869
+ media=media,
870
+ stream=stream,
871
+ **params,
872
+ )
873
+ if stream and hasattr(resp, "__next__"):
874
+ result = _normalize_local_streaming_response(resp)
875
+ else:
876
+ result = _normalize_local_response(resp)
877
+ result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
492
878
 
493
879
  # Durable observability: ensure a provider request payload exists even when the
494
880
  # underlying provider does not attach `_provider_request` metadata.
@@ -547,14 +933,16 @@ class LocalAbstractCoreLLMClient:
547
933
  for resource tracking and warnings.
548
934
 
549
935
  Returns:
550
- Dict with model capabilities. Always includes 'max_tokens' (default 32768).
936
+ Dict with model capabilities. Always includes 'max_tokens' (default: DEFAULT_MAX_TOKENS).
551
937
  """
552
938
  try:
553
939
  from abstractcore.architectures.detection import get_model_capabilities
554
940
  return get_model_capabilities(self._model)
555
941
  except Exception:
556
942
  # Safe fallback if detection fails
557
- return {"max_tokens": 32768}
943
+ from abstractruntime.core.vars import DEFAULT_MAX_TOKENS
944
+
945
+ return {"max_tokens": DEFAULT_MAX_TOKENS}
558
946
 
559
947
 
560
948
  class MultiLocalAbstractCoreLLMClient:
@@ -590,6 +978,15 @@ class MultiLocalAbstractCoreLLMClient:
590
978
  self._clients[key] = client
591
979
  return client
592
980
 
981
+ def get_provider_instance(self, *, provider: str, model: str) -> Any:
982
+ """Return the underlying AbstractCore provider instance for (provider, model)."""
983
+ client = self._get_client(str(provider or ""), str(model or ""))
984
+ return getattr(client, "_llm", None)
985
+
986
+ def list_loaded_clients(self) -> List[Tuple[str, str]]:
987
+ """Return (provider, model) pairs loaded in this process (best-effort)."""
988
+ return list(self._clients.keys())
989
+
593
990
  def generate(
594
991
  self,
595
992
  *,
@@ -597,6 +994,7 @@ class MultiLocalAbstractCoreLLMClient:
597
994
  messages: Optional[List[Dict[str, str]]] = None,
598
995
  system_prompt: Optional[str] = None,
599
996
  tools: Optional[List[Dict[str, Any]]] = None,
997
+ media: Optional[List[Any]] = None,
600
998
  params: Optional[Dict[str, Any]] = None,
601
999
  ) -> Dict[str, Any]:
602
1000
  params = dict(params or {})
@@ -614,6 +1012,7 @@ class MultiLocalAbstractCoreLLMClient:
614
1012
  messages=messages,
615
1013
  system_prompt=system_prompt,
616
1014
  tools=tools,
1015
+ media=media,
617
1016
  params=params,
618
1017
  )
619
1018
 
@@ -690,12 +1089,20 @@ class RemoteAbstractCoreLLMClient:
690
1089
  messages: Optional[List[Dict[str, str]]] = None,
691
1090
  system_prompt: Optional[str] = None,
692
1091
  tools: Optional[List[Dict[str, Any]]] = None,
1092
+ media: Optional[List[Any]] = None,
693
1093
  params: Optional[Dict[str, Any]] = None,
694
1094
  ) -> Dict[str, Any]:
695
1095
  params = dict(params or {})
1096
+ if media:
1097
+ raise ValueError(
1098
+ "RemoteAbstractCoreLLMClient does not support media yet (artifact-backed attachments require local/hybrid execution)."
1099
+ )
696
1100
  req_headers = dict(self._headers)
697
1101
 
698
1102
  trace_metadata = params.pop("trace_metadata", None)
1103
+ system_prompt = _strip_system_context_header(system_prompt)
1104
+ prompt, messages = _inject_turn_grounding(prompt=str(prompt or ""), messages=messages)
1105
+
699
1106
  if isinstance(trace_metadata, dict) and trace_metadata:
700
1107
  req_headers["X-AbstractCore-Trace-Metadata"] = json.dumps(
701
1108
  trace_metadata, ensure_ascii=False, separators=(",", ":")
@@ -735,6 +1142,10 @@ class RemoteAbstractCoreLLMClient:
735
1142
  if base_url:
736
1143
  body["base_url"] = base_url
737
1144
 
1145
+ prompt_cache_key = params.get("prompt_cache_key")
1146
+ if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
1147
+ body["prompt_cache_key"] = prompt_cache_key.strip()
1148
+
738
1149
  # Pass through common OpenAI-compatible parameters.
739
1150
  for key in (
740
1151
  "temperature",
@@ -745,6 +1156,20 @@ class RemoteAbstractCoreLLMClient:
745
1156
  "presence_penalty",
746
1157
  ):
747
1158
  if key in params and params[key] is not None:
1159
+ if key == "seed":
1160
+ try:
1161
+ seed_i = int(params[key])
1162
+ except Exception:
1163
+ continue
1164
+ if seed_i >= 0:
1165
+ body[key] = seed_i
1166
+ continue
1167
+ if key == "temperature":
1168
+ try:
1169
+ body[key] = float(params[key])
1170
+ except Exception:
1171
+ continue
1172
+ continue
748
1173
  body[key] = params[key]
749
1174
 
750
1175
  if tools is not None:
@@ -765,9 +1190,16 @@ class RemoteAbstractCoreLLMClient:
765
1190
  }
766
1191
  if trace_id:
767
1192
  meta["trace_id"] = trace_id
1193
+ reasoning = msg.get("reasoning")
1194
+ if not isinstance(reasoning, str) or not reasoning.strip():
1195
+ reasoning = msg.get("reasoning_content")
1196
+ if not isinstance(reasoning, str) or not reasoning.strip():
1197
+ reasoning = msg.get("thinking")
1198
+ if not isinstance(reasoning, str) or not reasoning.strip():
1199
+ reasoning = msg.get("thinking_content")
768
1200
  result = {
769
1201
  "content": msg.get("content"),
770
- "reasoning": msg.get("reasoning"),
1202
+ "reasoning": reasoning,
771
1203
  "data": None,
772
1204
  "raw_response": _jsonable(resp) if resp is not None else None,
773
1205
  "tool_calls": _jsonable(msg.get("tool_calls")) if msg.get("tool_calls") is not None else None,
@@ -23,6 +23,7 @@ def _truncate(text: str, *, limit: int) -> str:
23
23
  s = "" if text is None else str(text)
24
24
  if limit <= 0 or len(s) <= limit:
25
25
  return s
26
+ #[WARNING:TRUNCATION] bounded preview for stderr log lines (never used for durable data)
26
27
  return s[:limit] + "…"
27
28
 
28
29