AbstractRuntime 0.0.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. abstractruntime/__init__.py +7 -2
  2. abstractruntime/core/__init__.py +9 -2
  3. abstractruntime/core/config.py +114 -0
  4. abstractruntime/core/event_keys.py +62 -0
  5. abstractruntime/core/models.py +55 -1
  6. abstractruntime/core/runtime.py +2609 -24
  7. abstractruntime/core/vars.py +189 -0
  8. abstractruntime/evidence/__init__.py +10 -0
  9. abstractruntime/evidence/recorder.py +325 -0
  10. abstractruntime/integrations/abstractcore/__init__.py +9 -2
  11. abstractruntime/integrations/abstractcore/constants.py +19 -0
  12. abstractruntime/integrations/abstractcore/default_tools.py +134 -0
  13. abstractruntime/integrations/abstractcore/effect_handlers.py +288 -9
  14. abstractruntime/integrations/abstractcore/factory.py +133 -11
  15. abstractruntime/integrations/abstractcore/llm_client.py +547 -42
  16. abstractruntime/integrations/abstractcore/mcp_worker.py +586 -0
  17. abstractruntime/integrations/abstractcore/observability.py +80 -0
  18. abstractruntime/integrations/abstractcore/summarizer.py +154 -0
  19. abstractruntime/integrations/abstractcore/tool_executor.py +544 -8
  20. abstractruntime/memory/__init__.py +21 -0
  21. abstractruntime/memory/active_context.py +746 -0
  22. abstractruntime/memory/active_memory.py +452 -0
  23. abstractruntime/memory/compaction.py +105 -0
  24. abstractruntime/rendering/__init__.py +17 -0
  25. abstractruntime/rendering/agent_trace_report.py +256 -0
  26. abstractruntime/rendering/json_stringify.py +136 -0
  27. abstractruntime/scheduler/scheduler.py +93 -2
  28. abstractruntime/storage/__init__.py +3 -1
  29. abstractruntime/storage/artifacts.py +51 -5
  30. abstractruntime/storage/json_files.py +16 -3
  31. abstractruntime/storage/observable.py +99 -0
  32. {abstractruntime-0.0.1.dist-info → abstractruntime-0.4.0.dist-info}/METADATA +5 -1
  33. abstractruntime-0.4.0.dist-info/RECORD +49 -0
  34. abstractruntime-0.4.0.dist-info/entry_points.txt +2 -0
  35. abstractruntime-0.0.1.dist-info/RECORD +0 -30
  36. {abstractruntime-0.0.1.dist-info → abstractruntime-0.4.0.dist-info}/WHEEL +0 -0
  37. {abstractruntime-0.0.1.dist-info → abstractruntime-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -13,14 +13,76 @@ Remote mode is the preferred way to support per-request dynamic routing (e.g. `b
13
13
 
14
14
  from __future__ import annotations
15
15
 
16
- from dataclasses import asdict, is_dataclass
17
- from typing import Any, Dict, List, Optional, Protocol
16
+ import ast
17
+ import json
18
+ import re
19
+ from dataclasses import asdict, dataclass, is_dataclass
20
+ from typing import Any, Dict, List, Optional, Protocol, Tuple
18
21
 
19
22
  from .logging import get_logger
20
23
 
21
24
  logger = get_logger(__name__)
22
25
 
23
26
 
27
+ def _maybe_parse_tool_calls_from_text(
28
+ *,
29
+ content: Optional[str],
30
+ allowed_tool_names: Optional[set[str]] = None,
31
+ model_name: Optional[str] = None,
32
+ tool_handler: Any = None,
33
+ ) -> tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
34
+ """Deprecated: tool-call parsing belongs to AbstractCore.
35
+
36
+ AbstractCore now normalizes non-streaming responses by populating structured `tool_calls`
37
+ and returning cleaned `content`. This helper remains only for backward compatibility with
38
+ older AbstractCore versions and will be removed in the next major release.
39
+ """
40
+ # Keep behavior for external callers/tests that still import it.
41
+ if not isinstance(content, str) or not content.strip():
42
+ return None, None
43
+ if tool_handler is None:
44
+ from abstractcore.tools.handler import UniversalToolHandler
45
+
46
+ tool_handler = UniversalToolHandler(str(model_name or ""))
47
+
48
+ try:
49
+ parsed = tool_handler.parse_response(content, mode="prompted")
50
+ except Exception:
51
+ return None, None
52
+
53
+ calls = getattr(parsed, "tool_calls", None)
54
+ cleaned = getattr(parsed, "content", None)
55
+ if not isinstance(calls, list) or not calls:
56
+ return None, None
57
+
58
+ out_calls: List[Dict[str, Any]] = []
59
+ for tc in calls:
60
+ name = getattr(tc, "name", None)
61
+ arguments = getattr(tc, "arguments", None)
62
+ call_id = getattr(tc, "call_id", None)
63
+ if not isinstance(name, str) or not name.strip():
64
+ continue
65
+ if isinstance(allowed_tool_names, set) and allowed_tool_names and name not in allowed_tool_names:
66
+ continue
67
+ out_calls.append(
68
+ {
69
+ "name": name.strip(),
70
+ "arguments": _jsonable(arguments) if arguments is not None else {},
71
+ "call_id": str(call_id) if call_id is not None else None,
72
+ }
73
+ )
74
+
75
+ if not out_calls:
76
+ return None, None
77
+ return out_calls, (str(cleaned) if isinstance(cleaned, str) else "")
78
+
79
+
80
+ @dataclass(frozen=True)
81
+ class HttpResponse:
82
+ body: Dict[str, Any]
83
+ headers: Dict[str, str]
84
+
85
+
24
86
  class RequestSender(Protocol):
25
87
  def post(
26
88
  self,
@@ -29,7 +91,7 @@ class RequestSender(Protocol):
29
91
  headers: Dict[str, str],
30
92
  json: Dict[str, Any],
31
93
  timeout: float,
32
- ) -> Dict[str, Any]: ...
94
+ ) -> Any: ...
33
95
 
34
96
 
35
97
  class AbstractCoreLLMClient(Protocol):
@@ -76,12 +138,109 @@ def _jsonable(value: Any) -> Any:
76
138
  return str(value)
77
139
 
78
140
 
141
+ def _loads_dict_like(raw: Any) -> Optional[Dict[str, Any]]:
142
+ """Parse a JSON-ish or Python-literal dict safely."""
143
+ if raw is None:
144
+ return None
145
+ text = str(raw).strip()
146
+ if not text:
147
+ return None
148
+ try:
149
+ parsed = json.loads(text)
150
+ if isinstance(parsed, dict):
151
+ return parsed
152
+ except Exception:
153
+ pass
154
+
155
+ candidate = re.sub(r"\btrue\b", "True", text, flags=re.IGNORECASE)
156
+ candidate = re.sub(r"\bfalse\b", "False", candidate, flags=re.IGNORECASE)
157
+ candidate = re.sub(r"\bnull\b", "None", candidate, flags=re.IGNORECASE)
158
+ try:
159
+ parsed = ast.literal_eval(candidate)
160
+ except Exception:
161
+ return None
162
+ if not isinstance(parsed, dict):
163
+ return None
164
+ return {str(k): v for k, v in parsed.items()}
165
+
166
+
167
+ def _normalize_tool_calls(tool_calls: Any) -> Optional[List[Dict[str, Any]]]:
168
+ """Normalize tool call shapes into AbstractRuntime's standard dict form.
169
+
170
+ Standard shape:
171
+ {"name": str, "arguments": dict, "call_id": Optional[str]}
172
+ """
173
+ if tool_calls is None:
174
+ return None
175
+ if not isinstance(tool_calls, list):
176
+ return None
177
+
178
+ normalized: List[Dict[str, Any]] = []
179
+ for tc in tool_calls:
180
+ name: Optional[str] = None
181
+ arguments: Any = None
182
+ call_id: Any = None
183
+
184
+ if isinstance(tc, dict):
185
+ call_id = tc.get("call_id", None)
186
+ if call_id is None:
187
+ call_id = tc.get("id", None)
188
+
189
+ raw_name = tc.get("name")
190
+ raw_args = tc.get("arguments")
191
+
192
+ func = tc.get("function") if isinstance(tc.get("function"), dict) else None
193
+ if func and (not isinstance(raw_name, str) or not raw_name.strip()):
194
+ raw_name = func.get("name")
195
+ if func and raw_args is None:
196
+ raw_args = func.get("arguments")
197
+
198
+ if isinstance(raw_name, str):
199
+ name = raw_name.strip()
200
+ arguments = raw_args if raw_args is not None else {}
201
+ else:
202
+ raw_name = getattr(tc, "name", None)
203
+ raw_args = getattr(tc, "arguments", None)
204
+ call_id = getattr(tc, "call_id", None)
205
+ if isinstance(raw_name, str):
206
+ name = raw_name.strip()
207
+ arguments = raw_args if raw_args is not None else {}
208
+
209
+ if not isinstance(name, str) or not name:
210
+ continue
211
+
212
+ if isinstance(arguments, str):
213
+ parsed = _loads_dict_like(arguments)
214
+ arguments = parsed if isinstance(parsed, dict) else {}
215
+
216
+ if not isinstance(arguments, dict):
217
+ arguments = {}
218
+
219
+ normalized.append(
220
+ {
221
+ "name": name,
222
+ "arguments": _jsonable(arguments),
223
+ "call_id": str(call_id) if call_id is not None else None,
224
+ }
225
+ )
226
+
227
+ return normalized or None
228
+
229
+
79
230
  def _normalize_local_response(resp: Any) -> Dict[str, Any]:
80
231
  """Normalize an AbstractCore local `generate()` result into JSON."""
81
232
 
82
233
  # Dict-like already
83
234
  if isinstance(resp, dict):
84
- return _jsonable(resp)
235
+ out = _jsonable(resp)
236
+ if isinstance(out, dict):
237
+ meta = out.get("metadata")
238
+ if isinstance(meta, dict) and "trace_id" in meta and "trace_id" not in out:
239
+ out["trace_id"] = meta["trace_id"]
240
+ # Some providers place reasoning under metadata (e.g. LM Studio gpt-oss).
241
+ if "reasoning" not in out and isinstance(meta, dict) and isinstance(meta.get("reasoning"), str):
242
+ out["reasoning"] = meta.get("reasoning")
243
+ return out
85
244
 
86
245
  # Pydantic structured output
87
246
  if hasattr(resp, "model_dump") or hasattr(resp, "dict"):
@@ -92,22 +251,168 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
92
251
  "usage": None,
93
252
  "model": None,
94
253
  "finish_reason": None,
254
+ "metadata": None,
255
+ "trace_id": None,
95
256
  }
96
257
 
97
258
  # AbstractCore GenerateResponse
98
259
  content = getattr(resp, "content", None)
260
+ raw_response = getattr(resp, "raw_response", None)
99
261
  tool_calls = getattr(resp, "tool_calls", None)
100
262
  usage = getattr(resp, "usage", None)
101
263
  model = getattr(resp, "model", None)
102
264
  finish_reason = getattr(resp, "finish_reason", None)
265
+ metadata = getattr(resp, "metadata", None)
266
+ gen_time = getattr(resp, "gen_time", None)
267
+ trace_id: Optional[str] = None
268
+ reasoning: Optional[str] = None
269
+ if isinstance(metadata, dict):
270
+ raw = metadata.get("trace_id")
271
+ if raw is not None:
272
+ trace_id = str(raw)
273
+ r = metadata.get("reasoning")
274
+ if isinstance(r, str) and r.strip():
275
+ reasoning = r.strip()
103
276
 
104
277
  return {
105
278
  "content": content,
279
+ "reasoning": reasoning,
280
+ "data": None,
281
+ "raw_response": _jsonable(raw_response) if raw_response is not None else None,
282
+ "tool_calls": _jsonable(tool_calls) if tool_calls is not None else None,
283
+ "usage": _jsonable(usage) if usage is not None else None,
284
+ "model": model,
285
+ "finish_reason": finish_reason,
286
+ "metadata": _jsonable(metadata) if metadata is not None else None,
287
+ "trace_id": trace_id,
288
+ "gen_time": float(gen_time) if isinstance(gen_time, (int, float)) else None,
289
+ }
290
+
291
+
292
+ def _normalize_local_streaming_response(stream: Any) -> Dict[str, Any]:
293
+ """Consume an AbstractCore streaming `generate(..., stream=True)` iterator into a single JSON result.
294
+
295
+ AbstractRuntime currently persists a single effect outcome object per LLM call, so even when
296
+ the underlying provider streams we aggregate into one final dict and surface timing fields.
297
+ """
298
+ import time
299
+
300
+ start_perf = time.perf_counter()
301
+
302
+ chunks: list[str] = []
303
+ tool_calls: Any = None
304
+ usage: Any = None
305
+ model: Optional[str] = None
306
+ finish_reason: Optional[str] = None
307
+ metadata: Dict[str, Any] = {}
308
+ trace_id: Optional[str] = None
309
+ reasoning: Optional[str] = None
310
+ ttft_ms: Optional[float] = None
311
+
312
+ def _maybe_capture_ttft(*, content: Any, tool_calls_value: Any, meta: Any) -> None:
313
+ nonlocal ttft_ms
314
+ if ttft_ms is not None:
315
+ return
316
+
317
+ if isinstance(meta, dict):
318
+ timing = meta.get("_timing") if isinstance(meta.get("_timing"), dict) else None
319
+ if isinstance(timing, dict) and isinstance(timing.get("ttft_ms"), (int, float)):
320
+ ttft_ms = float(timing["ttft_ms"])
321
+ return
322
+
323
+ has_content = isinstance(content, str) and bool(content)
324
+ has_tools = isinstance(tool_calls_value, list) and bool(tool_calls_value)
325
+ if has_content or has_tools:
326
+ ttft_ms = round((time.perf_counter() - start_perf) * 1000, 1)
327
+
328
+ for chunk in stream:
329
+ if chunk is None:
330
+ continue
331
+
332
+ if isinstance(chunk, dict):
333
+ content = chunk.get("content")
334
+ if isinstance(content, str) and content:
335
+ chunks.append(content)
336
+
337
+ tc = chunk.get("tool_calls")
338
+ if tc is not None:
339
+ tool_calls = tc
340
+
341
+ u = chunk.get("usage")
342
+ if u is not None:
343
+ usage = u
344
+
345
+ m = chunk.get("model")
346
+ if model is None and isinstance(m, str) and m.strip():
347
+ model = m.strip()
348
+
349
+ fr = chunk.get("finish_reason")
350
+ if fr is not None:
351
+ finish_reason = str(fr)
352
+
353
+ meta = chunk.get("metadata")
354
+ _maybe_capture_ttft(content=content, tool_calls_value=tc, meta=meta)
355
+
356
+ if isinstance(meta, dict):
357
+ meta_json = _jsonable(meta)
358
+ if isinstance(meta_json, dict):
359
+ metadata.update(meta_json)
360
+ raw_trace = meta_json.get("trace_id")
361
+ if trace_id is None and raw_trace is not None:
362
+ trace_id = str(raw_trace)
363
+ r = meta_json.get("reasoning")
364
+ if reasoning is None and isinstance(r, str) and r.strip():
365
+ reasoning = r.strip()
366
+ continue
367
+
368
+ content = getattr(chunk, "content", None)
369
+ if isinstance(content, str) and content:
370
+ chunks.append(content)
371
+
372
+ tc = getattr(chunk, "tool_calls", None)
373
+ if tc is not None:
374
+ tool_calls = tc
375
+
376
+ u = getattr(chunk, "usage", None)
377
+ if u is not None:
378
+ usage = u
379
+
380
+ m = getattr(chunk, "model", None)
381
+ if model is None and isinstance(m, str) and m.strip():
382
+ model = m.strip()
383
+
384
+ fr = getattr(chunk, "finish_reason", None)
385
+ if fr is not None:
386
+ finish_reason = str(fr)
387
+
388
+ meta = getattr(chunk, "metadata", None)
389
+ _maybe_capture_ttft(content=content, tool_calls_value=tc, meta=meta)
390
+
391
+ if isinstance(meta, dict):
392
+ meta_json = _jsonable(meta)
393
+ if isinstance(meta_json, dict):
394
+ metadata.update(meta_json)
395
+ raw_trace = meta_json.get("trace_id")
396
+ if trace_id is None and raw_trace is not None:
397
+ trace_id = str(raw_trace)
398
+ r = meta_json.get("reasoning")
399
+ if reasoning is None and isinstance(r, str) and r.strip():
400
+ reasoning = r.strip()
401
+
402
+ gen_time = round((time.perf_counter() - start_perf) * 1000, 1)
403
+
404
+ return {
405
+ "content": "".join(chunks),
406
+ "reasoning": reasoning,
106
407
  "data": None,
107
408
  "tool_calls": _jsonable(tool_calls) if tool_calls is not None else None,
108
409
  "usage": _jsonable(usage) if usage is not None else None,
109
410
  "model": model,
110
411
  "finish_reason": finish_reason,
412
+ "metadata": metadata or None,
413
+ "trace_id": trace_id,
414
+ "gen_time": gen_time,
415
+ "ttft_ms": ttft_ms,
111
416
  }
112
417
 
113
418
 
@@ -121,12 +426,28 @@ class LocalAbstractCoreLLMClient:
121
426
  model: str,
122
427
  llm_kwargs: Optional[Dict[str, Any]] = None,
123
428
  ):
124
- from abstractcore import create_llm
429
+ # In this monorepo layout, `import abstractcore` can resolve to a namespace package
430
+ # (the outer project directory) when running from the repo root. In that case, the
431
+ # top-level re-export `from abstractcore import create_llm` is unavailable even though
432
+ # the actual module tree (e.g. `abstractcore.core.factory`) is importable.
433
+ #
434
+ # Prefer the canonical public import, but fall back to the concrete module path so
435
+ # in-repo tooling/tests don't depend on editable-install import ordering.
436
+ try:
437
+ from abstractcore import create_llm # type: ignore
438
+ except Exception: # pragma: no cover
439
+ from abstractcore.core.factory import create_llm # type: ignore
125
440
  from abstractcore.tools.handler import UniversalToolHandler
126
441
 
127
442
  self._provider = provider
128
443
  self._model = model
129
- self._llm = create_llm(provider, model=model, **(llm_kwargs or {}))
444
+ kwargs = dict(llm_kwargs or {})
445
+ kwargs.setdefault("enable_tracing", True)
446
+ if kwargs.get("enable_tracing"):
447
+ # Keep a small in-memory ring buffer for exact request/response observability.
448
+ # This enables hosts (AbstractCode/AbstractFlow) to inspect trace payloads by trace_id.
449
+ kwargs.setdefault("max_traces", 50)
450
+ self._llm = create_llm(provider, model=model, **kwargs)
130
451
  self._tool_handler = UniversalToolHandler(model)
131
452
 
132
453
  def generate(
@@ -140,46 +461,166 @@ class LocalAbstractCoreLLMClient:
140
461
  ) -> Dict[str, Any]:
141
462
  params = dict(params or {})
142
463
 
464
+ stream_raw = params.pop("stream", None)
465
+ if stream_raw is None:
466
+ stream_raw = params.pop("streaming", None)
467
+ if isinstance(stream_raw, str):
468
+ stream = stream_raw.strip().lower() in {"1", "true", "yes", "y", "on"}
469
+ else:
470
+ stream = bool(stream_raw) if stream_raw is not None else False
471
+
143
472
  # `base_url` is a provider construction concern in local mode. We intentionally
144
473
  # do not create new providers per call unless the host explicitly chooses to.
145
474
  params.pop("base_url", None)
146
-
147
- # If tools provided, use UniversalToolHandler to format them into prompt
148
- # This works for models without native tool support
149
- effective_prompt = prompt
150
- if tools:
151
- from abstractcore.tools import ToolDefinition
152
- tool_defs = []
153
- for t in tools:
154
- tool_defs.append(ToolDefinition(
155
- name=t.get("name", ""),
156
- description=t.get("description", ""),
157
- parameters=t.get("parameters", {}),
158
- ))
159
- tools_prompt = self._tool_handler.format_tools_prompt(tool_defs)
160
- effective_prompt = f"{tools_prompt}\n\nUser request: {prompt}"
475
+ # Reserved routing keys (used by MultiLocalAbstractCoreLLMClient).
476
+ params.pop("_provider", None)
477
+ params.pop("_model", None)
161
478
 
162
479
  resp = self._llm.generate(
163
- prompt=effective_prompt,
480
+ prompt=str(prompt or ""),
164
481
  messages=messages,
165
482
  system_prompt=system_prompt,
166
- stream=False,
483
+ tools=tools,
484
+ stream=stream,
167
485
  **params,
168
486
  )
169
-
170
- result = _normalize_local_response(resp)
171
-
172
- # Parse tool calls from response if tools were provided
173
- if tools and result.get("content"):
174
- parsed = self._tool_handler.parse_response(result["content"], mode="prompted")
175
- if parsed.tool_calls:
176
- result["tool_calls"] = [
177
- {"name": tc.name, "arguments": tc.arguments, "call_id": tc.call_id}
178
- for tc in parsed.tool_calls
179
- ]
180
-
487
+ if stream and hasattr(resp, "__next__"):
488
+ result = _normalize_local_streaming_response(resp)
489
+ else:
490
+ result = _normalize_local_response(resp)
491
+ result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
492
+
493
+ # Durable observability: ensure a provider request payload exists even when the
494
+ # underlying provider does not attach `_provider_request` metadata.
495
+ #
496
+ # AbstractCode's `/llm --verbatim` expects `metadata._provider_request.payload.messages`
497
+ # to be present to display the exact system/user content that was sent.
498
+ try:
499
+ meta = result.get("metadata")
500
+ if not isinstance(meta, dict):
501
+ meta = {}
502
+ result["metadata"] = meta
503
+
504
+ if "_provider_request" not in meta:
505
+ out_messages: List[Dict[str, str]] = []
506
+ if isinstance(system_prompt, str) and system_prompt:
507
+ out_messages.append({"role": "system", "content": system_prompt})
508
+ if isinstance(messages, list) and messages:
509
+ # Copy dict entries defensively (caller-owned objects).
510
+ out_messages.extend([dict(m) for m in messages if isinstance(m, dict)])
511
+
512
+ # Append the current prompt as the final user message unless it's already present.
513
+ prompt_str = str(prompt or "")
514
+ if prompt_str:
515
+ last = out_messages[-1] if out_messages else None
516
+ if not (isinstance(last, dict) and last.get("role") == "user" and last.get("content") == prompt_str):
517
+ out_messages.append({"role": "user", "content": prompt_str})
518
+
519
+ payload: Dict[str, Any] = {
520
+ "model": str(self._model),
521
+ "messages": out_messages,
522
+ "stream": bool(stream),
523
+ }
524
+ if tools is not None:
525
+ payload["tools"] = tools
526
+
527
+ # Include generation params for debugging; keep JSON-safe (e.g. response_model).
528
+ payload["params"] = _jsonable(params) if params else {}
529
+
530
+ meta["_provider_request"] = {
531
+ "transport": "local",
532
+ "provider": str(self._provider),
533
+ "model": str(self._model),
534
+ "payload": payload,
535
+ }
536
+ except Exception:
537
+ # Never fail an LLM call due to observability.
538
+ pass
539
+
181
540
  return result
182
541
 
542
+ def get_model_capabilities(self) -> Dict[str, Any]:
543
+ """Get model capabilities including max_tokens, vision_support, etc.
544
+
545
+ Uses AbstractCore's architecture detection system to query model limits
546
+ and features. This allows the runtime to be aware of model constraints
547
+ for resource tracking and warnings.
548
+
549
+ Returns:
550
+ Dict with model capabilities. Always includes 'max_tokens' (default 32768).
551
+ """
552
+ try:
553
+ from abstractcore.architectures.detection import get_model_capabilities
554
+ return get_model_capabilities(self._model)
555
+ except Exception:
556
+ # Safe fallback if detection fails
557
+ return {"max_tokens": 32768}
558
+
559
+
560
+ class MultiLocalAbstractCoreLLMClient:
561
+ """Local AbstractCore client with per-request provider/model routing.
562
+
563
+ This keeps the same `generate(...)` signature as AbstractCoreLLMClient by
564
+ using reserved keys in `params`:
565
+ - `_provider`: override provider for this request
566
+ - `_model`: override model for this request
567
+ """
568
+
569
+ def __init__(
570
+ self,
571
+ *,
572
+ provider: str,
573
+ model: str,
574
+ llm_kwargs: Optional[Dict[str, Any]] = None,
575
+ ):
576
+ self._llm_kwargs = dict(llm_kwargs or {})
577
+ self._default_provider = provider.strip().lower()
578
+ self._default_model = model.strip()
579
+ self._clients: Dict[Tuple[str, str], LocalAbstractCoreLLMClient] = {}
580
+ self._default_client = self._get_client(self._default_provider, self._default_model)
581
+
582
+ # Provide a stable underlying LLM for components that need one (e.g. summarizer).
583
+ self._llm = getattr(self._default_client, "_llm", None)
584
+
585
+ def _get_client(self, provider: str, model: str) -> LocalAbstractCoreLLMClient:
586
+ key = (provider.strip().lower(), model.strip())
587
+ client = self._clients.get(key)
588
+ if client is None:
589
+ client = LocalAbstractCoreLLMClient(provider=key[0], model=key[1], llm_kwargs=self._llm_kwargs)
590
+ self._clients[key] = client
591
+ return client
592
+
593
+ def generate(
594
+ self,
595
+ *,
596
+ prompt: str,
597
+ messages: Optional[List[Dict[str, str]]] = None,
598
+ system_prompt: Optional[str] = None,
599
+ tools: Optional[List[Dict[str, Any]]] = None,
600
+ params: Optional[Dict[str, Any]] = None,
601
+ ) -> Dict[str, Any]:
602
+ params = dict(params or {})
603
+ provider = params.pop("_provider", None)
604
+ model = params.pop("_model", None)
605
+
606
+ provider_str = (
607
+ str(provider).strip().lower() if isinstance(provider, str) and provider.strip() else self._default_provider
608
+ )
609
+ model_str = str(model).strip() if isinstance(model, str) and model.strip() else self._default_model
610
+
611
+ client = self._get_client(provider_str, model_str)
612
+ return client.generate(
613
+ prompt=prompt,
614
+ messages=messages,
615
+ system_prompt=system_prompt,
616
+ tools=tools,
617
+ params=params,
618
+ )
619
+
620
+ def get_model_capabilities(self) -> Dict[str, Any]:
621
+ # Best-effort: use default model capabilities. Per-model limits can be added later.
622
+ return self._default_client.get_model_capabilities()
623
+
183
624
 
184
625
  class HttpxRequestSender:
185
626
  """Default request sender based on httpx (sync)."""
@@ -196,10 +637,28 @@ class HttpxRequestSender:
196
637
  headers: Dict[str, str],
197
638
  json: Dict[str, Any],
198
639
  timeout: float,
199
- ) -> Dict[str, Any]:
640
+ ) -> HttpResponse:
200
641
  resp = self._httpx.post(url, headers=headers, json=json, timeout=timeout)
201
642
  resp.raise_for_status()
202
- return resp.json()
643
+ return HttpResponse(body=resp.json(), headers=dict(resp.headers))
644
+
645
+
646
+ def _unwrap_http_response(value: Any) -> Tuple[Dict[str, Any], Dict[str, str]]:
647
+ if isinstance(value, dict):
648
+ return value, {}
649
+ body = getattr(value, "body", None)
650
+ headers = getattr(value, "headers", None)
651
+ if isinstance(body, dict) and isinstance(headers, dict):
652
+ return body, headers
653
+ json_fn = getattr(value, "json", None)
654
+ hdrs = getattr(value, "headers", None)
655
+ if callable(json_fn) and hdrs is not None:
656
+ try:
657
+ payload = json_fn()
658
+ except Exception:
659
+ payload = {}
660
+ return payload if isinstance(payload, dict) else {"data": _jsonable(payload)}, dict(hdrs)
661
+ return {"data": _jsonable(value)}, {}
203
662
 
204
663
 
205
664
  class RemoteAbstractCoreLLMClient:
@@ -210,13 +669,17 @@ class RemoteAbstractCoreLLMClient:
210
669
  *,
211
670
  server_base_url: str,
212
671
  model: str,
213
- timeout_s: float = 60.0,
672
+ # Runtime authority default: long-running workflow steps may legitimately take a long time.
673
+ # Keep this aligned with AbstractRuntime's orchestration defaults.
674
+ timeout_s: Optional[float] = None,
214
675
  headers: Optional[Dict[str, str]] = None,
215
676
  request_sender: Optional[RequestSender] = None,
216
677
  ):
678
+ from .constants import DEFAULT_LLM_TIMEOUT_S
679
+
217
680
  self._server_base_url = server_base_url.rstrip("/")
218
681
  self._model = model
219
- self._timeout_s = timeout_s
682
+ self._timeout_s = float(timeout_s) if timeout_s is not None else DEFAULT_LLM_TIMEOUT_S
220
683
  self._headers = dict(headers or {})
221
684
  self._sender = request_sender or HttpxRequestSender()
222
685
 
@@ -230,6 +693,23 @@ class RemoteAbstractCoreLLMClient:
230
693
  params: Optional[Dict[str, Any]] = None,
231
694
  ) -> Dict[str, Any]:
232
695
  params = dict(params or {})
696
+ req_headers = dict(self._headers)
697
+
698
+ trace_metadata = params.pop("trace_metadata", None)
699
+ if isinstance(trace_metadata, dict) and trace_metadata:
700
+ req_headers["X-AbstractCore-Trace-Metadata"] = json.dumps(
701
+ trace_metadata, ensure_ascii=False, separators=(",", ":")
702
+ )
703
+ header_map = {
704
+ "actor_id": "X-AbstractCore-Actor-Id",
705
+ "session_id": "X-AbstractCore-Session-Id",
706
+ "run_id": "X-AbstractCore-Run-Id",
707
+ "parent_run_id": "X-AbstractCore-Parent-Run-Id",
708
+ }
709
+ for key, header in header_map.items():
710
+ val = trace_metadata.get(key)
711
+ if val is not None and header not in req_headers:
712
+ req_headers[header] = str(val)
233
713
 
234
714
  # Build OpenAI-like messages for AbstractCore server.
235
715
  out_messages: List[Dict[str, str]] = []
@@ -245,6 +725,9 @@ class RemoteAbstractCoreLLMClient:
245
725
  "model": self._model,
246
726
  "messages": out_messages,
247
727
  "stream": False,
728
+ # Orchestrator policy: ask AbstractCore server to use the same timeout it expects.
729
+ # This keeps runtime authority even when the actual provider call happens server-side.
730
+ "timeout_s": self._timeout_s,
248
731
  }
249
732
 
250
733
  # Dynamic routing support (AbstractCore server feature).
@@ -268,20 +751,35 @@ class RemoteAbstractCoreLLMClient:
268
751
  body["tools"] = tools
269
752
 
270
753
  url = f"{self._server_base_url}/v1/chat/completions"
271
- resp = self._sender.post(url, headers=self._headers, json=body, timeout=self._timeout_s)
754
+ raw = self._sender.post(url, headers=req_headers, json=body, timeout=self._timeout_s)
755
+ resp, resp_headers = _unwrap_http_response(raw)
756
+ lower_headers = {str(k).lower(): str(v) for k, v in resp_headers.items()}
757
+ trace_id = lower_headers.get("x-abstractcore-trace-id") or lower_headers.get("x-trace-id")
272
758
 
273
759
  # Normalize OpenAI-like response.
274
760
  try:
275
761
  choice0 = (resp.get("choices") or [])[0]
276
762
  msg = choice0.get("message") or {}
277
- return {
763
+ meta: Dict[str, Any] = {
764
+ "_provider_request": {"url": url, "payload": body}
765
+ }
766
+ if trace_id:
767
+ meta["trace_id"] = trace_id
768
+ result = {
278
769
  "content": msg.get("content"),
770
+ "reasoning": msg.get("reasoning"),
279
771
  "data": None,
772
+ "raw_response": _jsonable(resp) if resp is not None else None,
280
773
  "tool_calls": _jsonable(msg.get("tool_calls")) if msg.get("tool_calls") is not None else None,
281
774
  "usage": _jsonable(resp.get("usage")) if resp.get("usage") is not None else None,
282
775
  "model": resp.get("model"),
283
776
  "finish_reason": choice0.get("finish_reason"),
777
+ "metadata": meta,
778
+ "trace_id": trace_id,
284
779
  }
780
+ result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
781
+
782
+ return result
285
783
  except Exception:
286
784
  # Fallback: return the raw response in JSON-safe form.
287
785
  logger.warning("Remote LLM response normalization failed; returning raw JSON")
@@ -292,5 +790,12 @@ class RemoteAbstractCoreLLMClient:
292
790
  "usage": None,
293
791
  "model": resp.get("model") if isinstance(resp, dict) else None,
294
792
  "finish_reason": None,
793
+ "metadata": {
794
+ "_provider_request": {"url": url, "payload": body},
795
+ "trace_id": trace_id,
796
+ }
797
+ if trace_id
798
+ else {"_provider_request": {"url": url, "payload": body}},
799
+ "trace_id": trace_id,
800
+ "raw_response": _jsonable(resp) if resp is not None else None,
295
801
  }
296
-