AbstractRuntime 0.2.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. abstractruntime/__init__.py +83 -3
  2. abstractruntime/core/config.py +82 -2
  3. abstractruntime/core/event_keys.py +62 -0
  4. abstractruntime/core/models.py +17 -1
  5. abstractruntime/core/policy.py +74 -3
  6. abstractruntime/core/runtime.py +3334 -28
  7. abstractruntime/core/vars.py +103 -2
  8. abstractruntime/evidence/__init__.py +10 -0
  9. abstractruntime/evidence/recorder.py +325 -0
  10. abstractruntime/history_bundle.py +772 -0
  11. abstractruntime/integrations/abstractcore/__init__.py +6 -0
  12. abstractruntime/integrations/abstractcore/constants.py +19 -0
  13. abstractruntime/integrations/abstractcore/default_tools.py +258 -0
  14. abstractruntime/integrations/abstractcore/effect_handlers.py +2622 -32
  15. abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
  16. abstractruntime/integrations/abstractcore/factory.py +149 -16
  17. abstractruntime/integrations/abstractcore/llm_client.py +891 -55
  18. abstractruntime/integrations/abstractcore/mcp_worker.py +587 -0
  19. abstractruntime/integrations/abstractcore/observability.py +80 -0
  20. abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
  21. abstractruntime/integrations/abstractcore/summarizer.py +154 -0
  22. abstractruntime/integrations/abstractcore/tool_executor.py +509 -31
  23. abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
  24. abstractruntime/integrations/abstractmemory/__init__.py +3 -0
  25. abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
  26. abstractruntime/memory/__init__.py +21 -0
  27. abstractruntime/memory/active_context.py +751 -0
  28. abstractruntime/memory/active_memory.py +452 -0
  29. abstractruntime/memory/compaction.py +105 -0
  30. abstractruntime/memory/kg_packets.py +164 -0
  31. abstractruntime/memory/memact_composer.py +175 -0
  32. abstractruntime/memory/recall_levels.py +163 -0
  33. abstractruntime/memory/token_budget.py +86 -0
  34. abstractruntime/rendering/__init__.py +17 -0
  35. abstractruntime/rendering/agent_trace_report.py +256 -0
  36. abstractruntime/rendering/json_stringify.py +136 -0
  37. abstractruntime/scheduler/scheduler.py +93 -2
  38. abstractruntime/storage/__init__.py +7 -2
  39. abstractruntime/storage/artifacts.py +175 -32
  40. abstractruntime/storage/base.py +17 -1
  41. abstractruntime/storage/commands.py +339 -0
  42. abstractruntime/storage/in_memory.py +41 -1
  43. abstractruntime/storage/json_files.py +210 -14
  44. abstractruntime/storage/observable.py +136 -0
  45. abstractruntime/storage/offloading.py +433 -0
  46. abstractruntime/storage/sqlite.py +836 -0
  47. abstractruntime/visualflow_compiler/__init__.py +29 -0
  48. abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
  49. abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
  50. abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
  51. abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
  52. abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
  53. abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
  54. abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
  55. abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
  56. abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
  57. abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
  58. abstractruntime/visualflow_compiler/compiler.py +3832 -0
  59. abstractruntime/visualflow_compiler/flow.py +247 -0
  60. abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
  61. abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
  62. abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
  63. abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
  64. abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
  65. abstractruntime/visualflow_compiler/visual/models.py +211 -0
  66. abstractruntime/workflow_bundle/__init__.py +52 -0
  67. abstractruntime/workflow_bundle/models.py +236 -0
  68. abstractruntime/workflow_bundle/packer.py +317 -0
  69. abstractruntime/workflow_bundle/reader.py +87 -0
  70. abstractruntime/workflow_bundle/registry.py +587 -0
  71. abstractruntime-0.4.1.dist-info/METADATA +177 -0
  72. abstractruntime-0.4.1.dist-info/RECORD +86 -0
  73. abstractruntime-0.4.1.dist-info/entry_points.txt +2 -0
  74. abstractruntime-0.2.0.dist-info/METADATA +0 -163
  75. abstractruntime-0.2.0.dist-info/RECORD +0 -32
  76. {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
  77. {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -13,14 +13,369 @@ Remote mode is the preferred way to support per-request dynamic routing (e.g. `b
13
13
 
14
14
  from __future__ import annotations
15
15
 
16
+ import ast
16
17
  import json
18
+ import locale
19
+ import os
20
+ import re
21
+ import threading
17
22
  from dataclasses import asdict, dataclass, is_dataclass
23
+ from datetime import datetime
18
24
  from typing import Any, Dict, List, Optional, Protocol, Tuple
19
25
 
20
26
  from .logging import get_logger
21
27
 
22
28
  logger = get_logger(__name__)
23
29
 
30
+ _LOCAL_GENERATE_LOCKS: Dict[Tuple[str, str], threading.Lock] = {}
31
+ _LOCAL_GENERATE_LOCKS_LOCK = threading.Lock()
32
+ _LOCAL_GENERATE_LOCKS_WARNED: set[Tuple[str, str]] = set()
33
+ _LOCAL_GENERATE_LOCKS_WARNED_LOCK = threading.Lock()
34
+
35
+
36
+ def _local_generate_lock(*, provider: str, model: str) -> Optional[threading.Lock]:
37
+ """Return a process-wide generation lock for providers that are not thread-safe.
38
+
39
+ MLX/Metal can crash the process when concurrent generations occur from multiple threads
40
+ (e.g. gateway ticking multiple runs concurrently). We serialize MLX generation per model
41
+ as a safety contract.
42
+ """
43
+
44
+ prov = str(provider or "").strip().lower()
45
+ if prov != "mlx":
46
+ return None
47
+ key = (prov, str(model or "").strip())
48
+ with _LOCAL_GENERATE_LOCKS_LOCK:
49
+ lock = _LOCAL_GENERATE_LOCKS.get(key)
50
+ if lock is None:
51
+ lock = threading.Lock()
52
+ _LOCAL_GENERATE_LOCKS[key] = lock
53
+ return lock
54
+
55
+
56
+ def _warn_local_generate_lock_once(*, provider: str, model: str) -> None:
57
+ prov = str(provider or "").strip().lower()
58
+ key = (prov, str(model or "").strip())
59
+ with _LOCAL_GENERATE_LOCKS_WARNED_LOCK:
60
+ if key in _LOCAL_GENERATE_LOCKS_WARNED:
61
+ return
62
+ _LOCAL_GENERATE_LOCKS_WARNED.add(key)
63
+ logger.warning(
64
+ "Local provider generation is serialized for safety (prevents MLX/Metal crashes under concurrency).",
65
+ provider=prov,
66
+ model=key[1],
67
+ )
68
+
69
+ _SYSTEM_CONTEXT_HEADER_RE = re.compile(
70
+ # ChatML-style user-turn grounding prefix, matching `chat-mlx.py` / `chat-hf.py`:
71
+ # "[YYYY-MM-DD HH:MM:SS CC]" (optionally followed by whitespace + user text).
72
+ # Backward compatible with the historical "[YYYY/MM/DD HH:MM CC]" form.
73
+ r"^\[\d{4}[-/]\d{2}[-/]\d{2}\s+\d{2}:\d{2}(?::\d{2})?\s+[A-Z]{2}\](?:\s|$)",
74
+ re.IGNORECASE,
75
+ )
76
+
77
+ _LEGACY_SYSTEM_CONTEXT_HEADER_RE = re.compile(
78
+ r"^Grounding:\s*\d{4}/\d{2}/\d{2}\|\d{2}:\d{2}\|[A-Z]{2}$",
79
+ re.IGNORECASE,
80
+ )
81
+
82
+ _LEGACY_SYSTEM_CONTEXT_HEADER_PARSE_RE = re.compile(
83
+ r"^Grounding:\s*(\d{4}/\d{2}/\d{2})\|(\d{2}:\d{2})\|([A-Z]{2})$",
84
+ re.IGNORECASE,
85
+ )
86
+
87
+ _ZONEINFO_TAB_CANDIDATES = [
88
+ "/usr/share/zoneinfo/zone.tab",
89
+ "/usr/share/zoneinfo/zone1970.tab",
90
+ "/var/db/timezone/zoneinfo/zone.tab",
91
+ "/var/db/timezone/zoneinfo/zone1970.tab",
92
+ ]
93
+
94
+
95
+ def _detect_timezone_name() -> Optional[str]:
96
+ """Best-effort IANA timezone name (e.g. 'Europe/Paris')."""
97
+
98
+ tz_env = os.environ.get("TZ")
99
+ if isinstance(tz_env, str):
100
+ tz = tz_env.strip().lstrip(":")
101
+ if tz and "/" in tz:
102
+ return tz
103
+
104
+ # Common on Debian/Ubuntu.
105
+ try:
106
+ with open("/etc/timezone", "r", encoding="utf-8", errors="ignore") as f:
107
+ line = f.readline().strip()
108
+ if line and "/" in line:
109
+ return line
110
+ except Exception:
111
+ pass
112
+
113
+ # Common on macOS + many Linux distros (symlink or copied file).
114
+ try:
115
+ real = os.path.realpath("/etc/localtime")
116
+ except Exception:
117
+ real = ""
118
+ if real:
119
+ match = re.search(r"/zoneinfo/(.+)$", real)
120
+ if match:
121
+ tz = match.group(1).strip()
122
+ if tz and "/" in tz:
123
+ return tz
124
+
125
+ return None
126
+
127
+
128
+ def _country_from_zone_tab(*, zone_name: str, tab_paths: Optional[List[str]] = None) -> Optional[str]:
129
+ """Resolve ISO2 country code from zone.tab / zone1970.tab."""
130
+ zone = str(zone_name or "").strip()
131
+ if not zone:
132
+ return None
133
+
134
+ paths = list(tab_paths) if isinstance(tab_paths, list) and tab_paths else list(_ZONEINFO_TAB_CANDIDATES)
135
+ for tab_path in paths:
136
+ try:
137
+ with open(tab_path, "r", encoding="utf-8", errors="ignore") as f:
138
+ for raw_line in f:
139
+ line = raw_line.strip()
140
+ if not line or line.startswith("#"):
141
+ continue
142
+ parts = line.split("\t")
143
+ if len(parts) < 3:
144
+ continue
145
+ cc_field = parts[0].strip()
146
+ tz_field = parts[2].strip()
147
+ if tz_field != zone:
148
+ continue
149
+ cc = cc_field.split(",", 1)[0].strip()
150
+ if len(cc) == 2 and cc.isalpha():
151
+ return cc.upper()
152
+ except Exception:
153
+ continue
154
+ return None
155
+
156
+
157
+ def _detect_country() -> str:
158
+ """Best-effort 2-letter country code detection.
159
+
160
+ Order:
161
+ 1) Explicit env override: ABSTRACT_COUNTRY / ABSTRACTFRAMEWORK_COUNTRY
162
+ 2) Locale region from `locale.getlocale()` or locale env vars (LANG/LC_ALL/LC_CTYPE)
163
+ 3) Timezone (IANA name) via zone.tab mapping
164
+
165
+ Notes:
166
+ - Avoid parsing encoding-only strings like `UTF-8` as a country (a common locale env pitfall).
167
+ - If no reliable region is found, return `XX` (unknown).
168
+ """
169
+
170
+ def _normalize_country_code(value: Optional[str]) -> Optional[str]:
171
+ if not isinstance(value, str):
172
+ return None
173
+ raw = value.strip()
174
+ if not raw:
175
+ return None
176
+
177
+ base = raw.split(".", 1)[0].split("@", 1)[0].strip()
178
+ if len(base) == 2 and base.isalpha():
179
+ return base.upper()
180
+
181
+ parts = [p.strip() for p in re.split(r"[_-]", base) if p.strip()]
182
+ for part in parts[1:]:
183
+ if len(part) == 2 and part.isalpha():
184
+ return part.upper()
185
+ return None
186
+
187
+ # Explicit override (preferred).
188
+ for key in ("ABSTRACT_COUNTRY", "ABSTRACTFRAMEWORK_COUNTRY"):
189
+ cc = _normalize_country_code(os.environ.get(key))
190
+ if cc is not None:
191
+ return cc
192
+
193
+ candidates: List[str] = []
194
+ try:
195
+ loc = locale.getlocale()[0]
196
+ if isinstance(loc, str) and loc.strip():
197
+ candidates.append(loc)
198
+ except Exception:
199
+ pass
200
+
201
+ for key in ("LC_ALL", "LANG", "LC_CTYPE"):
202
+ v = os.environ.get(key)
203
+ if isinstance(v, str) and v.strip():
204
+ candidates.append(v)
205
+
206
+ for cand in candidates:
207
+ cc = _normalize_country_code(cand)
208
+ if cc is not None:
209
+ return cc
210
+
211
+ tz_name = _detect_timezone_name()
212
+ if tz_name:
213
+ cc = _country_from_zone_tab(zone_name=tz_name)
214
+ if cc is not None:
215
+ return cc
216
+
217
+ return "XX"
218
+
219
+
220
+ def _system_context_header() -> str:
221
+ # Use local datetime (timezone-aware) to match the user's environment.
222
+ # Format: "[YYYY-MM-DD HH:MM:SS CC]"
223
+ stamp = datetime.now().astimezone().strftime("%Y-%m-%d %H:%M:%S")
224
+ return f"[{stamp} {_detect_country()}]"
225
+
226
+ def _strip_system_context_header(system_prompt: Optional[str]) -> Optional[str]:
227
+ """Remove a runtime-injected system-context header from the system prompt (best-effort).
228
+
229
+ Why:
230
+ - Historically AbstractRuntime injected a "Grounding: ..." line into the *system prompt*.
231
+ - Prompt/KV caching works best when stable prefixes (system/tools/history) do not contain per-turn entropy.
232
+ - We still want date/time/country per turn, but we inject it into the *current user turn* instead.
233
+ """
234
+ if not isinstance(system_prompt, str):
235
+ return system_prompt
236
+ raw = system_prompt
237
+ lines = raw.splitlines()
238
+ if not lines:
239
+ return None
240
+ first = lines[0].strip()
241
+ if not (_LEGACY_SYSTEM_CONTEXT_HEADER_RE.match(first) or _SYSTEM_CONTEXT_HEADER_RE.match(first)):
242
+ return raw
243
+ rest = "\n".join(lines[1:]).lstrip()
244
+ return rest if rest else None
245
+
246
+
247
+ def _strip_internal_system_messages(messages: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
248
+ """Remove internal system messages that should never leak into model outputs.
249
+
250
+ Today this is intentionally narrow and only strips the synthetic tool-activity
251
+ summaries that can be injected by some agent hosts:
252
+ "Recent tool activity (auto): ..."
253
+
254
+ Why:
255
+ - Some local/open models will echo system-message content verbatim.
256
+ - These tool-trace summaries are *operator/debug* context, not user-facing content.
257
+ """
258
+ if not isinstance(messages, list) or not messages:
259
+ return messages
260
+
261
+ out: List[Dict[str, Any]] = []
262
+ for m in messages:
263
+ if not isinstance(m, dict):
264
+ continue
265
+ role = str(m.get("role") or "").strip().lower()
266
+ if role == "system":
267
+ c = m.get("content")
268
+ if isinstance(c, str) and c.lstrip().startswith("Recent tool activity"):
269
+ continue
270
+ out.append(dict(m))
271
+
272
+ return out or None
273
+
274
+
275
+ def _inject_turn_grounding(
276
+ *,
277
+ prompt: str,
278
+ messages: Optional[List[Dict[str, Any]]],
279
+ ) -> tuple[str, Optional[List[Dict[str, Any]]]]:
280
+ """Inject date/time/country into the *current user turn* (not the system prompt)."""
281
+ header = _system_context_header()
282
+
283
+ def _prefix_with_header(text: str) -> str:
284
+ """Prefix with the current header, or rewrite a legacy `Grounding:` prefix into bracket form."""
285
+ if not isinstance(text, str) or not text.strip():
286
+ return header
287
+ raw = str(text)
288
+ first = raw.lstrip().splitlines()[0].strip()
289
+ if _SYSTEM_CONTEXT_HEADER_RE.match(first):
290
+ return raw
291
+ legacy = _LEGACY_SYSTEM_CONTEXT_HEADER_PARSE_RE.match(first)
292
+ if legacy:
293
+ date_part, time_part, cc = legacy.group(1), legacy.group(2), legacy.group(3).upper()
294
+ date_part = date_part.replace("/", "-")
295
+ time_part = f"{time_part}:00" if len(time_part) == 5 else time_part
296
+ bracket = f"[{date_part} {time_part} {cc}]"
297
+ rest = "\n".join(raw.lstrip().splitlines()[1:]).lstrip()
298
+ return f"{bracket} {rest}" if rest else bracket
299
+ return f"{header} {raw}"
300
+
301
+ prompt_str = str(prompt or "")
302
+ if prompt_str.strip():
303
+ return _prefix_with_header(prompt_str), messages
304
+
305
+ if isinstance(messages, list) and messages:
306
+ out: List[Dict[str, Any]] = []
307
+ for m in messages:
308
+ out.append(dict(m) if isinstance(m, dict) else {"role": "user", "content": str(m)})
309
+
310
+ for i in range(len(out) - 1, -1, -1):
311
+ role = str(out[i].get("role") or "").strip().lower()
312
+ if role != "user":
313
+ continue
314
+ content = out[i].get("content")
315
+ content_str = content if isinstance(content, str) else str(content or "")
316
+ out[i]["content"] = _prefix_with_header(content_str)
317
+ return prompt_str, out
318
+
319
+ # No user message found; append a synthetic user turn.
320
+ out.append({"role": "user", "content": header})
321
+ return prompt_str, out
322
+
323
+ # No place to inject; best-effort no-op.
324
+ return prompt_str, messages
325
+
326
+
327
+ def _maybe_parse_tool_calls_from_text(
328
+ *,
329
+ content: Optional[str],
330
+ allowed_tool_names: Optional[set[str]] = None,
331
+ model_name: Optional[str] = None,
332
+ tool_handler: Any = None,
333
+ ) -> tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
334
+ """Deprecated: tool-call parsing belongs to AbstractCore.
335
+
336
+ AbstractCore now normalizes non-streaming responses by populating structured `tool_calls`
337
+ and returning cleaned `content`. This helper remains only for backward compatibility with
338
+ older AbstractCore versions and will be removed in the next major release.
339
+ """
340
+ # Keep behavior for external callers/tests that still import it.
341
+ if not isinstance(content, str) or not content.strip():
342
+ return None, None
343
+ if tool_handler is None:
344
+ from abstractcore.tools.handler import UniversalToolHandler
345
+
346
+ tool_handler = UniversalToolHandler(str(model_name or ""))
347
+
348
+ try:
349
+ parsed = tool_handler.parse_response(content, mode="prompted")
350
+ except Exception:
351
+ return None, None
352
+
353
+ calls = getattr(parsed, "tool_calls", None)
354
+ cleaned = getattr(parsed, "content", None)
355
+ if not isinstance(calls, list) or not calls:
356
+ return None, None
357
+
358
+ out_calls: List[Dict[str, Any]] = []
359
+ for tc in calls:
360
+ name = getattr(tc, "name", None)
361
+ arguments = getattr(tc, "arguments", None)
362
+ call_id = getattr(tc, "call_id", None)
363
+ if not isinstance(name, str) or not name.strip():
364
+ continue
365
+ if isinstance(allowed_tool_names, set) and allowed_tool_names and name not in allowed_tool_names:
366
+ continue
367
+ out_calls.append(
368
+ {
369
+ "name": name.strip(),
370
+ "arguments": _jsonable(arguments) if arguments is not None else {},
371
+ "call_id": str(call_id) if call_id is not None else None,
372
+ }
373
+ )
374
+
375
+ if not out_calls:
376
+ return None, None
377
+ return out_calls, (str(cleaned) if isinstance(cleaned, str) else "")
378
+
24
379
 
25
380
  @dataclass(frozen=True)
26
381
  class HttpResponse:
@@ -47,6 +402,7 @@ class AbstractCoreLLMClient(Protocol):
47
402
  messages: Optional[List[Dict[str, str]]] = None,
48
403
  system_prompt: Optional[str] = None,
49
404
  tools: Optional[List[Dict[str, Any]]] = None,
405
+ media: Optional[List[Any]] = None,
50
406
  params: Optional[Dict[str, Any]] = None,
51
407
  ) -> Dict[str, Any]:
52
408
  """Return a JSON-safe dict with at least: content/tool_calls/usage/model."""
@@ -83,9 +439,136 @@ def _jsonable(value: Any) -> Any:
83
439
  return str(value)
84
440
 
85
441
 
442
+ def _loads_dict_like(raw: Any) -> Optional[Dict[str, Any]]:
443
+ """Parse a JSON-ish or Python-literal dict safely."""
444
+ if raw is None:
445
+ return None
446
+ text = str(raw).strip()
447
+ if not text:
448
+ return None
449
+ try:
450
+ parsed = json.loads(text)
451
+ if isinstance(parsed, dict):
452
+ return parsed
453
+ except Exception:
454
+ pass
455
+
456
+ candidate = re.sub(r"\btrue\b", "True", text, flags=re.IGNORECASE)
457
+ candidate = re.sub(r"\bfalse\b", "False", candidate, flags=re.IGNORECASE)
458
+ candidate = re.sub(r"\bnull\b", "None", candidate, flags=re.IGNORECASE)
459
+ try:
460
+ parsed = ast.literal_eval(candidate)
461
+ except Exception:
462
+ return None
463
+ if not isinstance(parsed, dict):
464
+ return None
465
+ return {str(k): v for k, v in parsed.items()}
466
+
467
+
468
+ def _normalize_tool_calls(tool_calls: Any) -> Optional[List[Dict[str, Any]]]:
469
+ """Normalize tool call shapes into AbstractRuntime's standard dict form.
470
+
471
+ Standard shape:
472
+ {"name": str, "arguments": dict, "call_id": Optional[str]}
473
+ """
474
+ if tool_calls is None:
475
+ return None
476
+ if not isinstance(tool_calls, list):
477
+ return None
478
+
479
+ normalized: List[Dict[str, Any]] = []
480
+ for tc in tool_calls:
481
+ name: Optional[str] = None
482
+ arguments: Any = None
483
+ call_id: Any = None
484
+
485
+ if isinstance(tc, dict):
486
+ call_id = tc.get("call_id", None)
487
+ if call_id is None:
488
+ call_id = tc.get("id", None)
489
+
490
+ raw_name = tc.get("name")
491
+ raw_args = tc.get("arguments")
492
+
493
+ func = tc.get("function") if isinstance(tc.get("function"), dict) else None
494
+ if func and (not isinstance(raw_name, str) or not raw_name.strip()):
495
+ raw_name = func.get("name")
496
+ if func and raw_args is None:
497
+ raw_args = func.get("arguments")
498
+
499
+ if isinstance(raw_name, str):
500
+ name = raw_name.strip()
501
+ arguments = raw_args if raw_args is not None else {}
502
+ else:
503
+ raw_name = getattr(tc, "name", None)
504
+ raw_args = getattr(tc, "arguments", None)
505
+ call_id = getattr(tc, "call_id", None)
506
+ if isinstance(raw_name, str):
507
+ name = raw_name.strip()
508
+ arguments = raw_args if raw_args is not None else {}
509
+
510
+ if not isinstance(name, str) or not name:
511
+ continue
512
+
513
+ if isinstance(arguments, str):
514
+ parsed = _loads_dict_like(arguments)
515
+ arguments = parsed if isinstance(parsed, dict) else {}
516
+
517
+ if not isinstance(arguments, dict):
518
+ arguments = {}
519
+
520
+ normalized.append(
521
+ {
522
+ "name": name,
523
+ "arguments": _jsonable(arguments),
524
+ "call_id": str(call_id) if call_id is not None else None,
525
+ }
526
+ )
527
+
528
+ return normalized or None
529
+
530
+
86
531
  def _normalize_local_response(resp: Any) -> Dict[str, Any]:
87
532
  """Normalize an AbstractCore local `generate()` result into JSON."""
88
533
 
534
+ def _extract_reasoning_from_openai_like(raw: Any) -> Optional[str]:
535
+ """Best-effort extraction of model reasoning from OpenAI-style payloads.
536
+
537
+ LM Studio and some providers store reasoning in `choices[].message.reasoning_content`
538
+ while leaving `content` empty during tool-call turns.
539
+ """
540
+
541
+ def _from_message(msg: Any) -> Optional[str]:
542
+ if not isinstance(msg, dict):
543
+ return None
544
+ for key in ("reasoning", "reasoning_content", "thinking", "thinking_content"):
545
+ val = msg.get(key)
546
+ if isinstance(val, str) and val.strip():
547
+ return val.strip()
548
+ return None
549
+
550
+ if isinstance(raw, dict):
551
+ # OpenAI chat completion: choices[].message
552
+ choices = raw.get("choices")
553
+ if isinstance(choices, list):
554
+ for c in choices:
555
+ if not isinstance(c, dict):
556
+ continue
557
+ r = _from_message(c.get("message"))
558
+ if r:
559
+ return r
560
+ # Streaming-style payloads may use `delta`.
561
+ r = _from_message(c.get("delta"))
562
+ if r:
563
+ return r
564
+
565
+ # Some variants store a single message at the top level.
566
+ r = _from_message(raw.get("message"))
567
+ if r:
568
+ return r
569
+
570
+ return None
571
+
89
572
  # Dict-like already
90
573
  if isinstance(resp, dict):
91
574
  out = _jsonable(resp)
@@ -93,6 +576,24 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
93
576
  meta = out.get("metadata")
94
577
  if isinstance(meta, dict) and "trace_id" in meta and "trace_id" not in out:
95
578
  out["trace_id"] = meta["trace_id"]
579
+ # Some providers place reasoning under metadata (e.g. LM Studio gpt-oss).
580
+ if "reasoning" not in out and isinstance(meta, dict) and isinstance(meta.get("reasoning"), str):
581
+ out["reasoning"] = meta.get("reasoning")
582
+ if (
583
+ (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip())
584
+ and isinstance(out.get("raw_response"), dict)
585
+ ):
586
+ extracted = _extract_reasoning_from_openai_like(out.get("raw_response"))
587
+ if extracted:
588
+ out["reasoning"] = extracted
589
+ if (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip()) and isinstance(out.get("raw"), dict):
590
+ extracted = _extract_reasoning_from_openai_like(out.get("raw"))
591
+ if extracted:
592
+ out["reasoning"] = extracted
593
+ if (not isinstance(out.get("reasoning"), str) or not str(out.get("reasoning") or "").strip()) and isinstance(out.get("choices"), list):
594
+ extracted = _extract_reasoning_from_openai_like(out)
595
+ if extracted:
596
+ out["reasoning"] = extracted
96
597
  return out
97
598
 
98
599
  # Pydantic structured output
@@ -110,26 +611,166 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
110
611
 
111
612
  # AbstractCore GenerateResponse
112
613
  content = getattr(resp, "content", None)
614
+ raw_response = getattr(resp, "raw_response", None)
113
615
  tool_calls = getattr(resp, "tool_calls", None)
114
616
  usage = getattr(resp, "usage", None)
115
617
  model = getattr(resp, "model", None)
116
618
  finish_reason = getattr(resp, "finish_reason", None)
117
619
  metadata = getattr(resp, "metadata", None)
620
+ gen_time = getattr(resp, "gen_time", None)
118
621
  trace_id: Optional[str] = None
622
+ reasoning: Optional[str] = None
119
623
  if isinstance(metadata, dict):
120
624
  raw = metadata.get("trace_id")
121
625
  if raw is not None:
122
626
  trace_id = str(raw)
627
+ r = metadata.get("reasoning")
628
+ if isinstance(r, str) and r.strip():
629
+ reasoning = r.strip()
630
+ if reasoning is None and raw_response is not None:
631
+ extracted = _extract_reasoning_from_openai_like(_jsonable(raw_response))
632
+ if extracted:
633
+ reasoning = extracted
123
634
 
124
635
  return {
125
636
  "content": content,
637
+ "reasoning": reasoning,
126
638
  "data": None,
639
+ "raw_response": _jsonable(raw_response) if raw_response is not None else None,
127
640
  "tool_calls": _jsonable(tool_calls) if tool_calls is not None else None,
128
641
  "usage": _jsonable(usage) if usage is not None else None,
129
642
  "model": model,
130
643
  "finish_reason": finish_reason,
131
644
  "metadata": _jsonable(metadata) if metadata is not None else None,
132
645
  "trace_id": trace_id,
646
+ "gen_time": float(gen_time) if isinstance(gen_time, (int, float)) else None,
647
+ }
648
+
649
+
650
+ def _normalize_local_streaming_response(stream: Any) -> Dict[str, Any]:
651
+ """Consume an AbstractCore streaming `generate(..., stream=True)` iterator into a single JSON result.
652
+
653
+ AbstractRuntime currently persists a single effect outcome object per LLM call, so even when
654
+ the underlying provider streams we aggregate into one final dict and surface timing fields.
655
+ """
656
+ import time
657
+
658
+ start_perf = time.perf_counter()
659
+
660
+ chunks: list[str] = []
661
+ tool_calls: Any = None
662
+ usage: Any = None
663
+ model: Optional[str] = None
664
+ finish_reason: Optional[str] = None
665
+ metadata: Dict[str, Any] = {}
666
+ trace_id: Optional[str] = None
667
+ reasoning: Optional[str] = None
668
+ ttft_ms: Optional[float] = None
669
+
670
+ def _maybe_capture_ttft(*, content: Any, tool_calls_value: Any, meta: Any) -> None:
671
+ nonlocal ttft_ms
672
+ if ttft_ms is not None:
673
+ return
674
+
675
+ if isinstance(meta, dict):
676
+ timing = meta.get("_timing") if isinstance(meta.get("_timing"), dict) else None
677
+ if isinstance(timing, dict) and isinstance(timing.get("ttft_ms"), (int, float)):
678
+ ttft_ms = float(timing["ttft_ms"])
679
+ return
680
+
681
+ has_content = isinstance(content, str) and bool(content)
682
+ has_tools = isinstance(tool_calls_value, list) and bool(tool_calls_value)
683
+ if has_content or has_tools:
684
+ ttft_ms = round((time.perf_counter() - start_perf) * 1000, 1)
685
+
686
+ for chunk in stream:
687
+ if chunk is None:
688
+ continue
689
+
690
+ if isinstance(chunk, dict):
691
+ content = chunk.get("content")
692
+ if isinstance(content, str) and content:
693
+ chunks.append(content)
694
+
695
+ tc = chunk.get("tool_calls")
696
+ if tc is not None:
697
+ tool_calls = tc
698
+
699
+ u = chunk.get("usage")
700
+ if u is not None:
701
+ usage = u
702
+
703
+ m = chunk.get("model")
704
+ if model is None and isinstance(m, str) and m.strip():
705
+ model = m.strip()
706
+
707
+ fr = chunk.get("finish_reason")
708
+ if fr is not None:
709
+ finish_reason = str(fr)
710
+
711
+ meta = chunk.get("metadata")
712
+ _maybe_capture_ttft(content=content, tool_calls_value=tc, meta=meta)
713
+
714
+ if isinstance(meta, dict):
715
+ meta_json = _jsonable(meta)
716
+ if isinstance(meta_json, dict):
717
+ metadata.update(meta_json)
718
+ raw_trace = meta_json.get("trace_id")
719
+ if trace_id is None and raw_trace is not None:
720
+ trace_id = str(raw_trace)
721
+ r = meta_json.get("reasoning")
722
+ if reasoning is None and isinstance(r, str) and r.strip():
723
+ reasoning = r.strip()
724
+ continue
725
+
726
+ content = getattr(chunk, "content", None)
727
+ if isinstance(content, str) and content:
728
+ chunks.append(content)
729
+
730
+ tc = getattr(chunk, "tool_calls", None)
731
+ if tc is not None:
732
+ tool_calls = tc
733
+
734
+ u = getattr(chunk, "usage", None)
735
+ if u is not None:
736
+ usage = u
737
+
738
+ m = getattr(chunk, "model", None)
739
+ if model is None and isinstance(m, str) and m.strip():
740
+ model = m.strip()
741
+
742
+ fr = getattr(chunk, "finish_reason", None)
743
+ if fr is not None:
744
+ finish_reason = str(fr)
745
+
746
+ meta = getattr(chunk, "metadata", None)
747
+ _maybe_capture_ttft(content=content, tool_calls_value=tc, meta=meta)
748
+
749
+ if isinstance(meta, dict):
750
+ meta_json = _jsonable(meta)
751
+ if isinstance(meta_json, dict):
752
+ metadata.update(meta_json)
753
+ raw_trace = meta_json.get("trace_id")
754
+ if trace_id is None and raw_trace is not None:
755
+ trace_id = str(raw_trace)
756
+ r = meta_json.get("reasoning")
757
+ if reasoning is None and isinstance(r, str) and r.strip():
758
+ reasoning = r.strip()
759
+
760
+ gen_time = round((time.perf_counter() - start_perf) * 1000, 1)
761
+
762
+ return {
763
+ "content": "".join(chunks),
764
+ "reasoning": reasoning,
765
+ "data": None,
766
+ "tool_calls": _jsonable(tool_calls) if tool_calls is not None else None,
767
+ "usage": _jsonable(usage) if usage is not None else None,
768
+ "model": model,
769
+ "finish_reason": finish_reason,
770
+ "metadata": metadata or None,
771
+ "trace_id": trace_id,
772
+ "gen_time": gen_time,
773
+ "ttft_ms": ttft_ms,
133
774
  }
134
775
 
135
776
 
@@ -143,15 +784,30 @@ class LocalAbstractCoreLLMClient:
143
784
  model: str,
144
785
  llm_kwargs: Optional[Dict[str, Any]] = None,
145
786
  ):
146
- from abstractcore import create_llm
787
+ # In this monorepo layout, `import abstractcore` can resolve to a namespace package
788
+ # (the outer project directory) when running from the repo root. In that case, the
789
+ # top-level re-export `from abstractcore import create_llm` is unavailable even though
790
+ # the actual module tree (e.g. `abstractcore.core.factory`) is importable.
791
+ #
792
+ # Prefer the canonical public import, but fall back to the concrete module path so
793
+ # in-repo tooling/tests don't depend on editable-install import ordering.
794
+ try:
795
+ from abstractcore import create_llm # type: ignore
796
+ except Exception: # pragma: no cover
797
+ from abstractcore.core.factory import create_llm # type: ignore
147
798
  from abstractcore.tools.handler import UniversalToolHandler
148
799
 
149
800
  self._provider = provider
150
801
  self._model = model
802
+ self._generate_lock = _local_generate_lock(provider=self._provider, model=self._model)
803
+ if self._generate_lock is not None:
804
+ _warn_local_generate_lock_once(provider=self._provider, model=self._model)
151
805
  kwargs = dict(llm_kwargs or {})
152
806
  kwargs.setdefault("enable_tracing", True)
153
807
  if kwargs.get("enable_tracing"):
154
- kwargs.setdefault("max_traces", 0)
808
+ # Keep a small in-memory ring buffer for exact request/response observability.
809
+ # This enables hosts (AbstractCode/AbstractFlow) to inspect trace payloads by trace_id.
810
+ kwargs.setdefault("max_traces", 50)
155
811
  self._llm = create_llm(provider, model=model, **kwargs)
156
812
  self._tool_handler = UniversalToolHandler(model)
157
813
 
@@ -162,66 +818,112 @@ class LocalAbstractCoreLLMClient:
162
818
  messages: Optional[List[Dict[str, str]]] = None,
163
819
  system_prompt: Optional[str] = None,
164
820
  tools: Optional[List[Dict[str, Any]]] = None,
821
+ media: Optional[List[Any]] = None,
165
822
  params: Optional[Dict[str, Any]] = None,
166
823
  ) -> Dict[str, Any]:
167
824
  params = dict(params or {})
168
825
 
826
+ system_prompt = _strip_system_context_header(system_prompt)
827
+ prompt, messages = _inject_turn_grounding(prompt=str(prompt or ""), messages=messages)
828
+ messages = _strip_internal_system_messages(messages)
829
+
830
+ stream_raw = params.pop("stream", None)
831
+ if stream_raw is None:
832
+ stream_raw = params.pop("streaming", None)
833
+ if isinstance(stream_raw, str):
834
+ stream = stream_raw.strip().lower() in {"1", "true", "yes", "y", "on"}
835
+ else:
836
+ stream = bool(stream_raw) if stream_raw is not None else False
837
+
169
838
  # `base_url` is a provider construction concern in local mode. We intentionally
170
839
  # do not create new providers per call unless the host explicitly chooses to.
171
840
  params.pop("base_url", None)
841
+ # Reserved routing keys (used by MultiLocalAbstractCoreLLMClient).
842
+ params.pop("_provider", None)
843
+ params.pop("_model", None)
172
844
 
173
- capabilities: List[str] = []
174
- get_capabilities = getattr(self._llm, "get_capabilities", None)
175
- if callable(get_capabilities):
176
- try:
177
- capabilities = list(get_capabilities())
178
- except Exception:
179
- capabilities = []
180
- supports_tools = "tools" in set(c.lower() for c in capabilities)
181
-
182
- if tools and not supports_tools:
183
- # Fallback tool calling via prompting for providers/models without native tool support.
184
- from abstractcore.tools import ToolDefinition
185
-
186
- tool_defs = [
187
- ToolDefinition(
188
- name=t.get("name", ""),
189
- description=t.get("description", ""),
190
- parameters=t.get("parameters", {}),
191
- )
192
- for t in tools
193
- ]
194
- tools_prompt = self._tool_handler.format_tools_prompt(tool_defs)
195
- effective_prompt = f"{tools_prompt}\n\nUser request: {prompt}"
196
-
845
+ lock = getattr(self, "_generate_lock", None)
846
+ if lock is None:
197
847
  resp = self._llm.generate(
198
- prompt=effective_prompt,
848
+ prompt=str(prompt or ""),
199
849
  messages=messages,
200
850
  system_prompt=system_prompt,
201
- stream=False,
851
+ tools=tools,
852
+ media=media,
853
+ stream=stream,
202
854
  **params,
203
855
  )
204
- result = _normalize_local_response(resp)
205
-
206
- # Parse tool calls from response content.
207
- if result.get("content"):
208
- parsed = self._tool_handler.parse_response(result["content"], mode="prompted")
209
- if parsed.tool_calls:
210
- result["tool_calls"] = [
211
- {"name": tc.name, "arguments": tc.arguments, "call_id": tc.call_id}
212
- for tc in parsed.tool_calls
213
- ]
214
- return result
856
+ if stream and hasattr(resp, "__next__"):
857
+ result = _normalize_local_streaming_response(resp)
858
+ else:
859
+ result = _normalize_local_response(resp)
860
+ result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
861
+ else:
862
+ # Serialize generation for non-thread-safe providers (e.g. MLX).
863
+ with lock:
864
+ resp = self._llm.generate(
865
+ prompt=str(prompt or ""),
866
+ messages=messages,
867
+ system_prompt=system_prompt,
868
+ tools=tools,
869
+ media=media,
870
+ stream=stream,
871
+ **params,
872
+ )
873
+ if stream and hasattr(resp, "__next__"):
874
+ result = _normalize_local_streaming_response(resp)
875
+ else:
876
+ result = _normalize_local_response(resp)
877
+ result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
215
878
 
216
- resp = self._llm.generate(
217
- prompt=str(prompt or ""),
218
- messages=messages,
219
- system_prompt=system_prompt,
220
- tools=tools,
221
- stream=False,
222
- **params,
223
- )
224
- return _normalize_local_response(resp)
879
+ # Durable observability: ensure a provider request payload exists even when the
880
+ # underlying provider does not attach `_provider_request` metadata.
881
+ #
882
+ # AbstractCode's `/llm --verbatim` expects `metadata._provider_request.payload.messages`
883
+ # to be present to display the exact system/user content that was sent.
884
+ try:
885
+ meta = result.get("metadata")
886
+ if not isinstance(meta, dict):
887
+ meta = {}
888
+ result["metadata"] = meta
889
+
890
+ if "_provider_request" not in meta:
891
+ out_messages: List[Dict[str, str]] = []
892
+ if isinstance(system_prompt, str) and system_prompt:
893
+ out_messages.append({"role": "system", "content": system_prompt})
894
+ if isinstance(messages, list) and messages:
895
+ # Copy dict entries defensively (caller-owned objects).
896
+ out_messages.extend([dict(m) for m in messages if isinstance(m, dict)])
897
+
898
+ # Append the current prompt as the final user message unless it's already present.
899
+ prompt_str = str(prompt or "")
900
+ if prompt_str:
901
+ last = out_messages[-1] if out_messages else None
902
+ if not (isinstance(last, dict) and last.get("role") == "user" and last.get("content") == prompt_str):
903
+ out_messages.append({"role": "user", "content": prompt_str})
904
+
905
+ payload: Dict[str, Any] = {
906
+ "model": str(self._model),
907
+ "messages": out_messages,
908
+ "stream": bool(stream),
909
+ }
910
+ if tools is not None:
911
+ payload["tools"] = tools
912
+
913
+ # Include generation params for debugging; keep JSON-safe (e.g. response_model).
914
+ payload["params"] = _jsonable(params) if params else {}
915
+
916
+ meta["_provider_request"] = {
917
+ "transport": "local",
918
+ "provider": str(self._provider),
919
+ "model": str(self._model),
920
+ "payload": payload,
921
+ }
922
+ except Exception:
923
+ # Never fail an LLM call due to observability.
924
+ pass
925
+
926
+ return result
225
927
 
226
928
  def get_model_capabilities(self) -> Dict[str, Any]:
227
929
  """Get model capabilities including max_tokens, vision_support, etc.
@@ -231,14 +933,92 @@ class LocalAbstractCoreLLMClient:
231
933
  for resource tracking and warnings.
232
934
 
233
935
  Returns:
234
- Dict with model capabilities. Always includes 'max_tokens' (default 32768).
936
+ Dict with model capabilities. Always includes 'max_tokens' (default: DEFAULT_MAX_TOKENS).
235
937
  """
236
938
  try:
237
939
  from abstractcore.architectures.detection import get_model_capabilities
238
940
  return get_model_capabilities(self._model)
239
941
  except Exception:
240
942
  # Safe fallback if detection fails
241
- return {"max_tokens": 32768}
943
+ from abstractruntime.core.vars import DEFAULT_MAX_TOKENS
944
+
945
+ return {"max_tokens": DEFAULT_MAX_TOKENS}
946
+
947
+
948
+ class MultiLocalAbstractCoreLLMClient:
949
+ """Local AbstractCore client with per-request provider/model routing.
950
+
951
+ This keeps the same `generate(...)` signature as AbstractCoreLLMClient by
952
+ using reserved keys in `params`:
953
+ - `_provider`: override provider for this request
954
+ - `_model`: override model for this request
955
+ """
956
+
957
+ def __init__(
958
+ self,
959
+ *,
960
+ provider: str,
961
+ model: str,
962
+ llm_kwargs: Optional[Dict[str, Any]] = None,
963
+ ):
964
+ self._llm_kwargs = dict(llm_kwargs or {})
965
+ self._default_provider = provider.strip().lower()
966
+ self._default_model = model.strip()
967
+ self._clients: Dict[Tuple[str, str], LocalAbstractCoreLLMClient] = {}
968
+ self._default_client = self._get_client(self._default_provider, self._default_model)
969
+
970
+ # Provide a stable underlying LLM for components that need one (e.g. summarizer).
971
+ self._llm = getattr(self._default_client, "_llm", None)
972
+
973
+ def _get_client(self, provider: str, model: str) -> LocalAbstractCoreLLMClient:
974
+ key = (provider.strip().lower(), model.strip())
975
+ client = self._clients.get(key)
976
+ if client is None:
977
+ client = LocalAbstractCoreLLMClient(provider=key[0], model=key[1], llm_kwargs=self._llm_kwargs)
978
+ self._clients[key] = client
979
+ return client
980
+
981
+ def get_provider_instance(self, *, provider: str, model: str) -> Any:
982
+ """Return the underlying AbstractCore provider instance for (provider, model)."""
983
+ client = self._get_client(str(provider or ""), str(model or ""))
984
+ return getattr(client, "_llm", None)
985
+
986
+ def list_loaded_clients(self) -> List[Tuple[str, str]]:
987
+ """Return (provider, model) pairs loaded in this process (best-effort)."""
988
+ return list(self._clients.keys())
989
+
990
+ def generate(
991
+ self,
992
+ *,
993
+ prompt: str,
994
+ messages: Optional[List[Dict[str, str]]] = None,
995
+ system_prompt: Optional[str] = None,
996
+ tools: Optional[List[Dict[str, Any]]] = None,
997
+ media: Optional[List[Any]] = None,
998
+ params: Optional[Dict[str, Any]] = None,
999
+ ) -> Dict[str, Any]:
1000
+ params = dict(params or {})
1001
+ provider = params.pop("_provider", None)
1002
+ model = params.pop("_model", None)
1003
+
1004
+ provider_str = (
1005
+ str(provider).strip().lower() if isinstance(provider, str) and provider.strip() else self._default_provider
1006
+ )
1007
+ model_str = str(model).strip() if isinstance(model, str) and model.strip() else self._default_model
1008
+
1009
+ client = self._get_client(provider_str, model_str)
1010
+ return client.generate(
1011
+ prompt=prompt,
1012
+ messages=messages,
1013
+ system_prompt=system_prompt,
1014
+ tools=tools,
1015
+ media=media,
1016
+ params=params,
1017
+ )
1018
+
1019
+ def get_model_capabilities(self) -> Dict[str, Any]:
1020
+ # Best-effort: use default model capabilities. Per-model limits can be added later.
1021
+ return self._default_client.get_model_capabilities()
242
1022
 
243
1023
 
244
1024
  class HttpxRequestSender:
@@ -288,13 +1068,17 @@ class RemoteAbstractCoreLLMClient:
288
1068
  *,
289
1069
  server_base_url: str,
290
1070
  model: str,
291
- timeout_s: float = 60.0,
1071
+ # Runtime authority default: long-running workflow steps may legitimately take a long time.
1072
+ # Keep this aligned with AbstractRuntime's orchestration defaults.
1073
+ timeout_s: Optional[float] = None,
292
1074
  headers: Optional[Dict[str, str]] = None,
293
1075
  request_sender: Optional[RequestSender] = None,
294
1076
  ):
1077
+ from .constants import DEFAULT_LLM_TIMEOUT_S
1078
+
295
1079
  self._server_base_url = server_base_url.rstrip("/")
296
1080
  self._model = model
297
- self._timeout_s = timeout_s
1081
+ self._timeout_s = float(timeout_s) if timeout_s is not None else DEFAULT_LLM_TIMEOUT_S
298
1082
  self._headers = dict(headers or {})
299
1083
  self._sender = request_sender or HttpxRequestSender()
300
1084
 
@@ -305,12 +1089,20 @@ class RemoteAbstractCoreLLMClient:
305
1089
  messages: Optional[List[Dict[str, str]]] = None,
306
1090
  system_prompt: Optional[str] = None,
307
1091
  tools: Optional[List[Dict[str, Any]]] = None,
1092
+ media: Optional[List[Any]] = None,
308
1093
  params: Optional[Dict[str, Any]] = None,
309
1094
  ) -> Dict[str, Any]:
310
1095
  params = dict(params or {})
1096
+ if media:
1097
+ raise ValueError(
1098
+ "RemoteAbstractCoreLLMClient does not support media yet (artifact-backed attachments require local/hybrid execution)."
1099
+ )
311
1100
  req_headers = dict(self._headers)
312
1101
 
313
1102
  trace_metadata = params.pop("trace_metadata", None)
1103
+ system_prompt = _strip_system_context_header(system_prompt)
1104
+ prompt, messages = _inject_turn_grounding(prompt=str(prompt or ""), messages=messages)
1105
+
314
1106
  if isinstance(trace_metadata, dict) and trace_metadata:
315
1107
  req_headers["X-AbstractCore-Trace-Metadata"] = json.dumps(
316
1108
  trace_metadata, ensure_ascii=False, separators=(",", ":")
@@ -340,6 +1132,9 @@ class RemoteAbstractCoreLLMClient:
340
1132
  "model": self._model,
341
1133
  "messages": out_messages,
342
1134
  "stream": False,
1135
+ # Orchestrator policy: ask AbstractCore server to use the same timeout it expects.
1136
+ # This keeps runtime authority even when the actual provider call happens server-side.
1137
+ "timeout_s": self._timeout_s,
343
1138
  }
344
1139
 
345
1140
  # Dynamic routing support (AbstractCore server feature).
@@ -347,6 +1142,10 @@ class RemoteAbstractCoreLLMClient:
347
1142
  if base_url:
348
1143
  body["base_url"] = base_url
349
1144
 
1145
+ prompt_cache_key = params.get("prompt_cache_key")
1146
+ if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
1147
+ body["prompt_cache_key"] = prompt_cache_key.strip()
1148
+
350
1149
  # Pass through common OpenAI-compatible parameters.
351
1150
  for key in (
352
1151
  "temperature",
@@ -357,6 +1156,20 @@ class RemoteAbstractCoreLLMClient:
357
1156
  "presence_penalty",
358
1157
  ):
359
1158
  if key in params and params[key] is not None:
1159
+ if key == "seed":
1160
+ try:
1161
+ seed_i = int(params[key])
1162
+ except Exception:
1163
+ continue
1164
+ if seed_i >= 0:
1165
+ body[key] = seed_i
1166
+ continue
1167
+ if key == "temperature":
1168
+ try:
1169
+ body[key] = float(params[key])
1170
+ except Exception:
1171
+ continue
1172
+ continue
360
1173
  body[key] = params[key]
361
1174
 
362
1175
  if tools is not None:
@@ -372,16 +1185,33 @@ class RemoteAbstractCoreLLMClient:
372
1185
  try:
373
1186
  choice0 = (resp.get("choices") or [])[0]
374
1187
  msg = choice0.get("message") or {}
375
- return {
1188
+ meta: Dict[str, Any] = {
1189
+ "_provider_request": {"url": url, "payload": body}
1190
+ }
1191
+ if trace_id:
1192
+ meta["trace_id"] = trace_id
1193
+ reasoning = msg.get("reasoning")
1194
+ if not isinstance(reasoning, str) or not reasoning.strip():
1195
+ reasoning = msg.get("reasoning_content")
1196
+ if not isinstance(reasoning, str) or not reasoning.strip():
1197
+ reasoning = msg.get("thinking")
1198
+ if not isinstance(reasoning, str) or not reasoning.strip():
1199
+ reasoning = msg.get("thinking_content")
1200
+ result = {
376
1201
  "content": msg.get("content"),
1202
+ "reasoning": reasoning,
377
1203
  "data": None,
1204
+ "raw_response": _jsonable(resp) if resp is not None else None,
378
1205
  "tool_calls": _jsonable(msg.get("tool_calls")) if msg.get("tool_calls") is not None else None,
379
1206
  "usage": _jsonable(resp.get("usage")) if resp.get("usage") is not None else None,
380
1207
  "model": resp.get("model"),
381
1208
  "finish_reason": choice0.get("finish_reason"),
382
- "metadata": {"trace_id": trace_id} if trace_id else None,
1209
+ "metadata": meta,
383
1210
  "trace_id": trace_id,
384
1211
  }
1212
+ result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
1213
+
1214
+ return result
385
1215
  except Exception:
386
1216
  # Fallback: return the raw response in JSON-safe form.
387
1217
  logger.warning("Remote LLM response normalization failed; returning raw JSON")
@@ -392,6 +1222,12 @@ class RemoteAbstractCoreLLMClient:
392
1222
  "usage": None,
393
1223
  "model": resp.get("model") if isinstance(resp, dict) else None,
394
1224
  "finish_reason": None,
395
- "metadata": {"trace_id": trace_id} if trace_id else None,
1225
+ "metadata": {
1226
+ "_provider_request": {"url": url, "payload": body},
1227
+ "trace_id": trace_id,
1228
+ }
1229
+ if trace_id
1230
+ else {"_provider_request": {"url": url, "payload": body}},
396
1231
  "trace_id": trace_id,
1232
+ "raw_response": _jsonable(resp) if resp is not None else None,
397
1233
  }