AbstractRuntime 0.0.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractruntime/__init__.py +7 -2
- abstractruntime/core/__init__.py +9 -2
- abstractruntime/core/config.py +114 -0
- abstractruntime/core/event_keys.py +62 -0
- abstractruntime/core/models.py +55 -1
- abstractruntime/core/runtime.py +2609 -24
- abstractruntime/core/vars.py +189 -0
- abstractruntime/evidence/__init__.py +10 -0
- abstractruntime/evidence/recorder.py +325 -0
- abstractruntime/integrations/abstractcore/__init__.py +9 -2
- abstractruntime/integrations/abstractcore/constants.py +19 -0
- abstractruntime/integrations/abstractcore/default_tools.py +134 -0
- abstractruntime/integrations/abstractcore/effect_handlers.py +288 -9
- abstractruntime/integrations/abstractcore/factory.py +133 -11
- abstractruntime/integrations/abstractcore/llm_client.py +547 -42
- abstractruntime/integrations/abstractcore/mcp_worker.py +586 -0
- abstractruntime/integrations/abstractcore/observability.py +80 -0
- abstractruntime/integrations/abstractcore/summarizer.py +154 -0
- abstractruntime/integrations/abstractcore/tool_executor.py +544 -8
- abstractruntime/memory/__init__.py +21 -0
- abstractruntime/memory/active_context.py +746 -0
- abstractruntime/memory/active_memory.py +452 -0
- abstractruntime/memory/compaction.py +105 -0
- abstractruntime/rendering/__init__.py +17 -0
- abstractruntime/rendering/agent_trace_report.py +256 -0
- abstractruntime/rendering/json_stringify.py +136 -0
- abstractruntime/scheduler/scheduler.py +93 -2
- abstractruntime/storage/__init__.py +3 -1
- abstractruntime/storage/artifacts.py +51 -5
- abstractruntime/storage/json_files.py +16 -3
- abstractruntime/storage/observable.py +99 -0
- {abstractruntime-0.0.1.dist-info → abstractruntime-0.4.0.dist-info}/METADATA +5 -1
- abstractruntime-0.4.0.dist-info/RECORD +49 -0
- abstractruntime-0.4.0.dist-info/entry_points.txt +2 -0
- abstractruntime-0.0.1.dist-info/RECORD +0 -30
- {abstractruntime-0.0.1.dist-info → abstractruntime-0.4.0.dist-info}/WHEEL +0 -0
- {abstractruntime-0.0.1.dist-info → abstractruntime-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,14 +13,76 @@ Remote mode is the preferred way to support per-request dynamic routing (e.g. `b
|
|
|
13
13
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
import ast
|
|
17
|
+
import json
|
|
18
|
+
import re
|
|
19
|
+
from dataclasses import asdict, dataclass, is_dataclass
|
|
20
|
+
from typing import Any, Dict, List, Optional, Protocol, Tuple
|
|
18
21
|
|
|
19
22
|
from .logging import get_logger
|
|
20
23
|
|
|
21
24
|
logger = get_logger(__name__)
|
|
22
25
|
|
|
23
26
|
|
|
27
|
+
def _maybe_parse_tool_calls_from_text(
|
|
28
|
+
*,
|
|
29
|
+
content: Optional[str],
|
|
30
|
+
allowed_tool_names: Optional[set[str]] = None,
|
|
31
|
+
model_name: Optional[str] = None,
|
|
32
|
+
tool_handler: Any = None,
|
|
33
|
+
) -> tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
|
|
34
|
+
"""Deprecated: tool-call parsing belongs to AbstractCore.
|
|
35
|
+
|
|
36
|
+
AbstractCore now normalizes non-streaming responses by populating structured `tool_calls`
|
|
37
|
+
and returning cleaned `content`. This helper remains only for backward compatibility with
|
|
38
|
+
older AbstractCore versions and will be removed in the next major release.
|
|
39
|
+
"""
|
|
40
|
+
# Keep behavior for external callers/tests that still import it.
|
|
41
|
+
if not isinstance(content, str) or not content.strip():
|
|
42
|
+
return None, None
|
|
43
|
+
if tool_handler is None:
|
|
44
|
+
from abstractcore.tools.handler import UniversalToolHandler
|
|
45
|
+
|
|
46
|
+
tool_handler = UniversalToolHandler(str(model_name or ""))
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
parsed = tool_handler.parse_response(content, mode="prompted")
|
|
50
|
+
except Exception:
|
|
51
|
+
return None, None
|
|
52
|
+
|
|
53
|
+
calls = getattr(parsed, "tool_calls", None)
|
|
54
|
+
cleaned = getattr(parsed, "content", None)
|
|
55
|
+
if not isinstance(calls, list) or not calls:
|
|
56
|
+
return None, None
|
|
57
|
+
|
|
58
|
+
out_calls: List[Dict[str, Any]] = []
|
|
59
|
+
for tc in calls:
|
|
60
|
+
name = getattr(tc, "name", None)
|
|
61
|
+
arguments = getattr(tc, "arguments", None)
|
|
62
|
+
call_id = getattr(tc, "call_id", None)
|
|
63
|
+
if not isinstance(name, str) or not name.strip():
|
|
64
|
+
continue
|
|
65
|
+
if isinstance(allowed_tool_names, set) and allowed_tool_names and name not in allowed_tool_names:
|
|
66
|
+
continue
|
|
67
|
+
out_calls.append(
|
|
68
|
+
{
|
|
69
|
+
"name": name.strip(),
|
|
70
|
+
"arguments": _jsonable(arguments) if arguments is not None else {},
|
|
71
|
+
"call_id": str(call_id) if call_id is not None else None,
|
|
72
|
+
}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if not out_calls:
|
|
76
|
+
return None, None
|
|
77
|
+
return out_calls, (str(cleaned) if isinstance(cleaned, str) else "")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass(frozen=True)
|
|
81
|
+
class HttpResponse:
|
|
82
|
+
body: Dict[str, Any]
|
|
83
|
+
headers: Dict[str, str]
|
|
84
|
+
|
|
85
|
+
|
|
24
86
|
class RequestSender(Protocol):
|
|
25
87
|
def post(
|
|
26
88
|
self,
|
|
@@ -29,7 +91,7 @@ class RequestSender(Protocol):
|
|
|
29
91
|
headers: Dict[str, str],
|
|
30
92
|
json: Dict[str, Any],
|
|
31
93
|
timeout: float,
|
|
32
|
-
) ->
|
|
94
|
+
) -> Any: ...
|
|
33
95
|
|
|
34
96
|
|
|
35
97
|
class AbstractCoreLLMClient(Protocol):
|
|
@@ -76,12 +138,109 @@ def _jsonable(value: Any) -> Any:
|
|
|
76
138
|
return str(value)
|
|
77
139
|
|
|
78
140
|
|
|
141
|
+
def _loads_dict_like(raw: Any) -> Optional[Dict[str, Any]]:
|
|
142
|
+
"""Parse a JSON-ish or Python-literal dict safely."""
|
|
143
|
+
if raw is None:
|
|
144
|
+
return None
|
|
145
|
+
text = str(raw).strip()
|
|
146
|
+
if not text:
|
|
147
|
+
return None
|
|
148
|
+
try:
|
|
149
|
+
parsed = json.loads(text)
|
|
150
|
+
if isinstance(parsed, dict):
|
|
151
|
+
return parsed
|
|
152
|
+
except Exception:
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
candidate = re.sub(r"\btrue\b", "True", text, flags=re.IGNORECASE)
|
|
156
|
+
candidate = re.sub(r"\bfalse\b", "False", candidate, flags=re.IGNORECASE)
|
|
157
|
+
candidate = re.sub(r"\bnull\b", "None", candidate, flags=re.IGNORECASE)
|
|
158
|
+
try:
|
|
159
|
+
parsed = ast.literal_eval(candidate)
|
|
160
|
+
except Exception:
|
|
161
|
+
return None
|
|
162
|
+
if not isinstance(parsed, dict):
|
|
163
|
+
return None
|
|
164
|
+
return {str(k): v for k, v in parsed.items()}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _normalize_tool_calls(tool_calls: Any) -> Optional[List[Dict[str, Any]]]:
|
|
168
|
+
"""Normalize tool call shapes into AbstractRuntime's standard dict form.
|
|
169
|
+
|
|
170
|
+
Standard shape:
|
|
171
|
+
{"name": str, "arguments": dict, "call_id": Optional[str]}
|
|
172
|
+
"""
|
|
173
|
+
if tool_calls is None:
|
|
174
|
+
return None
|
|
175
|
+
if not isinstance(tool_calls, list):
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
normalized: List[Dict[str, Any]] = []
|
|
179
|
+
for tc in tool_calls:
|
|
180
|
+
name: Optional[str] = None
|
|
181
|
+
arguments: Any = None
|
|
182
|
+
call_id: Any = None
|
|
183
|
+
|
|
184
|
+
if isinstance(tc, dict):
|
|
185
|
+
call_id = tc.get("call_id", None)
|
|
186
|
+
if call_id is None:
|
|
187
|
+
call_id = tc.get("id", None)
|
|
188
|
+
|
|
189
|
+
raw_name = tc.get("name")
|
|
190
|
+
raw_args = tc.get("arguments")
|
|
191
|
+
|
|
192
|
+
func = tc.get("function") if isinstance(tc.get("function"), dict) else None
|
|
193
|
+
if func and (not isinstance(raw_name, str) or not raw_name.strip()):
|
|
194
|
+
raw_name = func.get("name")
|
|
195
|
+
if func and raw_args is None:
|
|
196
|
+
raw_args = func.get("arguments")
|
|
197
|
+
|
|
198
|
+
if isinstance(raw_name, str):
|
|
199
|
+
name = raw_name.strip()
|
|
200
|
+
arguments = raw_args if raw_args is not None else {}
|
|
201
|
+
else:
|
|
202
|
+
raw_name = getattr(tc, "name", None)
|
|
203
|
+
raw_args = getattr(tc, "arguments", None)
|
|
204
|
+
call_id = getattr(tc, "call_id", None)
|
|
205
|
+
if isinstance(raw_name, str):
|
|
206
|
+
name = raw_name.strip()
|
|
207
|
+
arguments = raw_args if raw_args is not None else {}
|
|
208
|
+
|
|
209
|
+
if not isinstance(name, str) or not name:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
if isinstance(arguments, str):
|
|
213
|
+
parsed = _loads_dict_like(arguments)
|
|
214
|
+
arguments = parsed if isinstance(parsed, dict) else {}
|
|
215
|
+
|
|
216
|
+
if not isinstance(arguments, dict):
|
|
217
|
+
arguments = {}
|
|
218
|
+
|
|
219
|
+
normalized.append(
|
|
220
|
+
{
|
|
221
|
+
"name": name,
|
|
222
|
+
"arguments": _jsonable(arguments),
|
|
223
|
+
"call_id": str(call_id) if call_id is not None else None,
|
|
224
|
+
}
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return normalized or None
|
|
228
|
+
|
|
229
|
+
|
|
79
230
|
def _normalize_local_response(resp: Any) -> Dict[str, Any]:
|
|
80
231
|
"""Normalize an AbstractCore local `generate()` result into JSON."""
|
|
81
232
|
|
|
82
233
|
# Dict-like already
|
|
83
234
|
if isinstance(resp, dict):
|
|
84
|
-
|
|
235
|
+
out = _jsonable(resp)
|
|
236
|
+
if isinstance(out, dict):
|
|
237
|
+
meta = out.get("metadata")
|
|
238
|
+
if isinstance(meta, dict) and "trace_id" in meta and "trace_id" not in out:
|
|
239
|
+
out["trace_id"] = meta["trace_id"]
|
|
240
|
+
# Some providers place reasoning under metadata (e.g. LM Studio gpt-oss).
|
|
241
|
+
if "reasoning" not in out and isinstance(meta, dict) and isinstance(meta.get("reasoning"), str):
|
|
242
|
+
out["reasoning"] = meta.get("reasoning")
|
|
243
|
+
return out
|
|
85
244
|
|
|
86
245
|
# Pydantic structured output
|
|
87
246
|
if hasattr(resp, "model_dump") or hasattr(resp, "dict"):
|
|
@@ -92,22 +251,168 @@ def _normalize_local_response(resp: Any) -> Dict[str, Any]:
|
|
|
92
251
|
"usage": None,
|
|
93
252
|
"model": None,
|
|
94
253
|
"finish_reason": None,
|
|
254
|
+
"metadata": None,
|
|
255
|
+
"trace_id": None,
|
|
95
256
|
}
|
|
96
257
|
|
|
97
258
|
# AbstractCore GenerateResponse
|
|
98
259
|
content = getattr(resp, "content", None)
|
|
260
|
+
raw_response = getattr(resp, "raw_response", None)
|
|
99
261
|
tool_calls = getattr(resp, "tool_calls", None)
|
|
100
262
|
usage = getattr(resp, "usage", None)
|
|
101
263
|
model = getattr(resp, "model", None)
|
|
102
264
|
finish_reason = getattr(resp, "finish_reason", None)
|
|
265
|
+
metadata = getattr(resp, "metadata", None)
|
|
266
|
+
gen_time = getattr(resp, "gen_time", None)
|
|
267
|
+
trace_id: Optional[str] = None
|
|
268
|
+
reasoning: Optional[str] = None
|
|
269
|
+
if isinstance(metadata, dict):
|
|
270
|
+
raw = metadata.get("trace_id")
|
|
271
|
+
if raw is not None:
|
|
272
|
+
trace_id = str(raw)
|
|
273
|
+
r = metadata.get("reasoning")
|
|
274
|
+
if isinstance(r, str) and r.strip():
|
|
275
|
+
reasoning = r.strip()
|
|
103
276
|
|
|
104
277
|
return {
|
|
105
278
|
"content": content,
|
|
279
|
+
"reasoning": reasoning,
|
|
280
|
+
"data": None,
|
|
281
|
+
"raw_response": _jsonable(raw_response) if raw_response is not None else None,
|
|
282
|
+
"tool_calls": _jsonable(tool_calls) if tool_calls is not None else None,
|
|
283
|
+
"usage": _jsonable(usage) if usage is not None else None,
|
|
284
|
+
"model": model,
|
|
285
|
+
"finish_reason": finish_reason,
|
|
286
|
+
"metadata": _jsonable(metadata) if metadata is not None else None,
|
|
287
|
+
"trace_id": trace_id,
|
|
288
|
+
"gen_time": float(gen_time) if isinstance(gen_time, (int, float)) else None,
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _normalize_local_streaming_response(stream: Any) -> Dict[str, Any]:
|
|
293
|
+
"""Consume an AbstractCore streaming `generate(..., stream=True)` iterator into a single JSON result.
|
|
294
|
+
|
|
295
|
+
AbstractRuntime currently persists a single effect outcome object per LLM call, so even when
|
|
296
|
+
the underlying provider streams we aggregate into one final dict and surface timing fields.
|
|
297
|
+
"""
|
|
298
|
+
import time
|
|
299
|
+
|
|
300
|
+
start_perf = time.perf_counter()
|
|
301
|
+
|
|
302
|
+
chunks: list[str] = []
|
|
303
|
+
tool_calls: Any = None
|
|
304
|
+
usage: Any = None
|
|
305
|
+
model: Optional[str] = None
|
|
306
|
+
finish_reason: Optional[str] = None
|
|
307
|
+
metadata: Dict[str, Any] = {}
|
|
308
|
+
trace_id: Optional[str] = None
|
|
309
|
+
reasoning: Optional[str] = None
|
|
310
|
+
ttft_ms: Optional[float] = None
|
|
311
|
+
|
|
312
|
+
def _maybe_capture_ttft(*, content: Any, tool_calls_value: Any, meta: Any) -> None:
|
|
313
|
+
nonlocal ttft_ms
|
|
314
|
+
if ttft_ms is not None:
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
if isinstance(meta, dict):
|
|
318
|
+
timing = meta.get("_timing") if isinstance(meta.get("_timing"), dict) else None
|
|
319
|
+
if isinstance(timing, dict) and isinstance(timing.get("ttft_ms"), (int, float)):
|
|
320
|
+
ttft_ms = float(timing["ttft_ms"])
|
|
321
|
+
return
|
|
322
|
+
|
|
323
|
+
has_content = isinstance(content, str) and bool(content)
|
|
324
|
+
has_tools = isinstance(tool_calls_value, list) and bool(tool_calls_value)
|
|
325
|
+
if has_content or has_tools:
|
|
326
|
+
ttft_ms = round((time.perf_counter() - start_perf) * 1000, 1)
|
|
327
|
+
|
|
328
|
+
for chunk in stream:
|
|
329
|
+
if chunk is None:
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
if isinstance(chunk, dict):
|
|
333
|
+
content = chunk.get("content")
|
|
334
|
+
if isinstance(content, str) and content:
|
|
335
|
+
chunks.append(content)
|
|
336
|
+
|
|
337
|
+
tc = chunk.get("tool_calls")
|
|
338
|
+
if tc is not None:
|
|
339
|
+
tool_calls = tc
|
|
340
|
+
|
|
341
|
+
u = chunk.get("usage")
|
|
342
|
+
if u is not None:
|
|
343
|
+
usage = u
|
|
344
|
+
|
|
345
|
+
m = chunk.get("model")
|
|
346
|
+
if model is None and isinstance(m, str) and m.strip():
|
|
347
|
+
model = m.strip()
|
|
348
|
+
|
|
349
|
+
fr = chunk.get("finish_reason")
|
|
350
|
+
if fr is not None:
|
|
351
|
+
finish_reason = str(fr)
|
|
352
|
+
|
|
353
|
+
meta = chunk.get("metadata")
|
|
354
|
+
_maybe_capture_ttft(content=content, tool_calls_value=tc, meta=meta)
|
|
355
|
+
|
|
356
|
+
if isinstance(meta, dict):
|
|
357
|
+
meta_json = _jsonable(meta)
|
|
358
|
+
if isinstance(meta_json, dict):
|
|
359
|
+
metadata.update(meta_json)
|
|
360
|
+
raw_trace = meta_json.get("trace_id")
|
|
361
|
+
if trace_id is None and raw_trace is not None:
|
|
362
|
+
trace_id = str(raw_trace)
|
|
363
|
+
r = meta_json.get("reasoning")
|
|
364
|
+
if reasoning is None and isinstance(r, str) and r.strip():
|
|
365
|
+
reasoning = r.strip()
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
content = getattr(chunk, "content", None)
|
|
369
|
+
if isinstance(content, str) and content:
|
|
370
|
+
chunks.append(content)
|
|
371
|
+
|
|
372
|
+
tc = getattr(chunk, "tool_calls", None)
|
|
373
|
+
if tc is not None:
|
|
374
|
+
tool_calls = tc
|
|
375
|
+
|
|
376
|
+
u = getattr(chunk, "usage", None)
|
|
377
|
+
if u is not None:
|
|
378
|
+
usage = u
|
|
379
|
+
|
|
380
|
+
m = getattr(chunk, "model", None)
|
|
381
|
+
if model is None and isinstance(m, str) and m.strip():
|
|
382
|
+
model = m.strip()
|
|
383
|
+
|
|
384
|
+
fr = getattr(chunk, "finish_reason", None)
|
|
385
|
+
if fr is not None:
|
|
386
|
+
finish_reason = str(fr)
|
|
387
|
+
|
|
388
|
+
meta = getattr(chunk, "metadata", None)
|
|
389
|
+
_maybe_capture_ttft(content=content, tool_calls_value=tc, meta=meta)
|
|
390
|
+
|
|
391
|
+
if isinstance(meta, dict):
|
|
392
|
+
meta_json = _jsonable(meta)
|
|
393
|
+
if isinstance(meta_json, dict):
|
|
394
|
+
metadata.update(meta_json)
|
|
395
|
+
raw_trace = meta_json.get("trace_id")
|
|
396
|
+
if trace_id is None and raw_trace is not None:
|
|
397
|
+
trace_id = str(raw_trace)
|
|
398
|
+
r = meta_json.get("reasoning")
|
|
399
|
+
if reasoning is None and isinstance(r, str) and r.strip():
|
|
400
|
+
reasoning = r.strip()
|
|
401
|
+
|
|
402
|
+
gen_time = round((time.perf_counter() - start_perf) * 1000, 1)
|
|
403
|
+
|
|
404
|
+
return {
|
|
405
|
+
"content": "".join(chunks),
|
|
406
|
+
"reasoning": reasoning,
|
|
106
407
|
"data": None,
|
|
107
408
|
"tool_calls": _jsonable(tool_calls) if tool_calls is not None else None,
|
|
108
409
|
"usage": _jsonable(usage) if usage is not None else None,
|
|
109
410
|
"model": model,
|
|
110
411
|
"finish_reason": finish_reason,
|
|
412
|
+
"metadata": metadata or None,
|
|
413
|
+
"trace_id": trace_id,
|
|
414
|
+
"gen_time": gen_time,
|
|
415
|
+
"ttft_ms": ttft_ms,
|
|
111
416
|
}
|
|
112
417
|
|
|
113
418
|
|
|
@@ -121,12 +426,28 @@ class LocalAbstractCoreLLMClient:
|
|
|
121
426
|
model: str,
|
|
122
427
|
llm_kwargs: Optional[Dict[str, Any]] = None,
|
|
123
428
|
):
|
|
124
|
-
|
|
429
|
+
# In this monorepo layout, `import abstractcore` can resolve to a namespace package
|
|
430
|
+
# (the outer project directory) when running from the repo root. In that case, the
|
|
431
|
+
# top-level re-export `from abstractcore import create_llm` is unavailable even though
|
|
432
|
+
# the actual module tree (e.g. `abstractcore.core.factory`) is importable.
|
|
433
|
+
#
|
|
434
|
+
# Prefer the canonical public import, but fall back to the concrete module path so
|
|
435
|
+
# in-repo tooling/tests don't depend on editable-install import ordering.
|
|
436
|
+
try:
|
|
437
|
+
from abstractcore import create_llm # type: ignore
|
|
438
|
+
except Exception: # pragma: no cover
|
|
439
|
+
from abstractcore.core.factory import create_llm # type: ignore
|
|
125
440
|
from abstractcore.tools.handler import UniversalToolHandler
|
|
126
441
|
|
|
127
442
|
self._provider = provider
|
|
128
443
|
self._model = model
|
|
129
|
-
|
|
444
|
+
kwargs = dict(llm_kwargs or {})
|
|
445
|
+
kwargs.setdefault("enable_tracing", True)
|
|
446
|
+
if kwargs.get("enable_tracing"):
|
|
447
|
+
# Keep a small in-memory ring buffer for exact request/response observability.
|
|
448
|
+
# This enables hosts (AbstractCode/AbstractFlow) to inspect trace payloads by trace_id.
|
|
449
|
+
kwargs.setdefault("max_traces", 50)
|
|
450
|
+
self._llm = create_llm(provider, model=model, **kwargs)
|
|
130
451
|
self._tool_handler = UniversalToolHandler(model)
|
|
131
452
|
|
|
132
453
|
def generate(
|
|
@@ -140,46 +461,166 @@ class LocalAbstractCoreLLMClient:
|
|
|
140
461
|
) -> Dict[str, Any]:
|
|
141
462
|
params = dict(params or {})
|
|
142
463
|
|
|
464
|
+
stream_raw = params.pop("stream", None)
|
|
465
|
+
if stream_raw is None:
|
|
466
|
+
stream_raw = params.pop("streaming", None)
|
|
467
|
+
if isinstance(stream_raw, str):
|
|
468
|
+
stream = stream_raw.strip().lower() in {"1", "true", "yes", "y", "on"}
|
|
469
|
+
else:
|
|
470
|
+
stream = bool(stream_raw) if stream_raw is not None else False
|
|
471
|
+
|
|
143
472
|
# `base_url` is a provider construction concern in local mode. We intentionally
|
|
144
473
|
# do not create new providers per call unless the host explicitly chooses to.
|
|
145
474
|
params.pop("base_url", None)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
effective_prompt = prompt
|
|
150
|
-
if tools:
|
|
151
|
-
from abstractcore.tools import ToolDefinition
|
|
152
|
-
tool_defs = []
|
|
153
|
-
for t in tools:
|
|
154
|
-
tool_defs.append(ToolDefinition(
|
|
155
|
-
name=t.get("name", ""),
|
|
156
|
-
description=t.get("description", ""),
|
|
157
|
-
parameters=t.get("parameters", {}),
|
|
158
|
-
))
|
|
159
|
-
tools_prompt = self._tool_handler.format_tools_prompt(tool_defs)
|
|
160
|
-
effective_prompt = f"{tools_prompt}\n\nUser request: {prompt}"
|
|
475
|
+
# Reserved routing keys (used by MultiLocalAbstractCoreLLMClient).
|
|
476
|
+
params.pop("_provider", None)
|
|
477
|
+
params.pop("_model", None)
|
|
161
478
|
|
|
162
479
|
resp = self._llm.generate(
|
|
163
|
-
prompt=
|
|
480
|
+
prompt=str(prompt or ""),
|
|
164
481
|
messages=messages,
|
|
165
482
|
system_prompt=system_prompt,
|
|
166
|
-
|
|
483
|
+
tools=tools,
|
|
484
|
+
stream=stream,
|
|
167
485
|
**params,
|
|
168
486
|
)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
487
|
+
if stream and hasattr(resp, "__next__"):
|
|
488
|
+
result = _normalize_local_streaming_response(resp)
|
|
489
|
+
else:
|
|
490
|
+
result = _normalize_local_response(resp)
|
|
491
|
+
result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
|
|
492
|
+
|
|
493
|
+
# Durable observability: ensure a provider request payload exists even when the
|
|
494
|
+
# underlying provider does not attach `_provider_request` metadata.
|
|
495
|
+
#
|
|
496
|
+
# AbstractCode's `/llm --verbatim` expects `metadata._provider_request.payload.messages`
|
|
497
|
+
# to be present to display the exact system/user content that was sent.
|
|
498
|
+
try:
|
|
499
|
+
meta = result.get("metadata")
|
|
500
|
+
if not isinstance(meta, dict):
|
|
501
|
+
meta = {}
|
|
502
|
+
result["metadata"] = meta
|
|
503
|
+
|
|
504
|
+
if "_provider_request" not in meta:
|
|
505
|
+
out_messages: List[Dict[str, str]] = []
|
|
506
|
+
if isinstance(system_prompt, str) and system_prompt:
|
|
507
|
+
out_messages.append({"role": "system", "content": system_prompt})
|
|
508
|
+
if isinstance(messages, list) and messages:
|
|
509
|
+
# Copy dict entries defensively (caller-owned objects).
|
|
510
|
+
out_messages.extend([dict(m) for m in messages if isinstance(m, dict)])
|
|
511
|
+
|
|
512
|
+
# Append the current prompt as the final user message unless it's already present.
|
|
513
|
+
prompt_str = str(prompt or "")
|
|
514
|
+
if prompt_str:
|
|
515
|
+
last = out_messages[-1] if out_messages else None
|
|
516
|
+
if not (isinstance(last, dict) and last.get("role") == "user" and last.get("content") == prompt_str):
|
|
517
|
+
out_messages.append({"role": "user", "content": prompt_str})
|
|
518
|
+
|
|
519
|
+
payload: Dict[str, Any] = {
|
|
520
|
+
"model": str(self._model),
|
|
521
|
+
"messages": out_messages,
|
|
522
|
+
"stream": bool(stream),
|
|
523
|
+
}
|
|
524
|
+
if tools is not None:
|
|
525
|
+
payload["tools"] = tools
|
|
526
|
+
|
|
527
|
+
# Include generation params for debugging; keep JSON-safe (e.g. response_model).
|
|
528
|
+
payload["params"] = _jsonable(params) if params else {}
|
|
529
|
+
|
|
530
|
+
meta["_provider_request"] = {
|
|
531
|
+
"transport": "local",
|
|
532
|
+
"provider": str(self._provider),
|
|
533
|
+
"model": str(self._model),
|
|
534
|
+
"payload": payload,
|
|
535
|
+
}
|
|
536
|
+
except Exception:
|
|
537
|
+
# Never fail an LLM call due to observability.
|
|
538
|
+
pass
|
|
539
|
+
|
|
181
540
|
return result
|
|
182
541
|
|
|
542
|
+
def get_model_capabilities(self) -> Dict[str, Any]:
|
|
543
|
+
"""Get model capabilities including max_tokens, vision_support, etc.
|
|
544
|
+
|
|
545
|
+
Uses AbstractCore's architecture detection system to query model limits
|
|
546
|
+
and features. This allows the runtime to be aware of model constraints
|
|
547
|
+
for resource tracking and warnings.
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
Dict with model capabilities. Always includes 'max_tokens' (default 32768).
|
|
551
|
+
"""
|
|
552
|
+
try:
|
|
553
|
+
from abstractcore.architectures.detection import get_model_capabilities
|
|
554
|
+
return get_model_capabilities(self._model)
|
|
555
|
+
except Exception:
|
|
556
|
+
# Safe fallback if detection fails
|
|
557
|
+
return {"max_tokens": 32768}
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
class MultiLocalAbstractCoreLLMClient:
|
|
561
|
+
"""Local AbstractCore client with per-request provider/model routing.
|
|
562
|
+
|
|
563
|
+
This keeps the same `generate(...)` signature as AbstractCoreLLMClient by
|
|
564
|
+
using reserved keys in `params`:
|
|
565
|
+
- `_provider`: override provider for this request
|
|
566
|
+
- `_model`: override model for this request
|
|
567
|
+
"""
|
|
568
|
+
|
|
569
|
+
def __init__(
|
|
570
|
+
self,
|
|
571
|
+
*,
|
|
572
|
+
provider: str,
|
|
573
|
+
model: str,
|
|
574
|
+
llm_kwargs: Optional[Dict[str, Any]] = None,
|
|
575
|
+
):
|
|
576
|
+
self._llm_kwargs = dict(llm_kwargs or {})
|
|
577
|
+
self._default_provider = provider.strip().lower()
|
|
578
|
+
self._default_model = model.strip()
|
|
579
|
+
self._clients: Dict[Tuple[str, str], LocalAbstractCoreLLMClient] = {}
|
|
580
|
+
self._default_client = self._get_client(self._default_provider, self._default_model)
|
|
581
|
+
|
|
582
|
+
# Provide a stable underlying LLM for components that need one (e.g. summarizer).
|
|
583
|
+
self._llm = getattr(self._default_client, "_llm", None)
|
|
584
|
+
|
|
585
|
+
def _get_client(self, provider: str, model: str) -> LocalAbstractCoreLLMClient:
|
|
586
|
+
key = (provider.strip().lower(), model.strip())
|
|
587
|
+
client = self._clients.get(key)
|
|
588
|
+
if client is None:
|
|
589
|
+
client = LocalAbstractCoreLLMClient(provider=key[0], model=key[1], llm_kwargs=self._llm_kwargs)
|
|
590
|
+
self._clients[key] = client
|
|
591
|
+
return client
|
|
592
|
+
|
|
593
|
+
def generate(
|
|
594
|
+
self,
|
|
595
|
+
*,
|
|
596
|
+
prompt: str,
|
|
597
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
|
598
|
+
system_prompt: Optional[str] = None,
|
|
599
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
600
|
+
params: Optional[Dict[str, Any]] = None,
|
|
601
|
+
) -> Dict[str, Any]:
|
|
602
|
+
params = dict(params or {})
|
|
603
|
+
provider = params.pop("_provider", None)
|
|
604
|
+
model = params.pop("_model", None)
|
|
605
|
+
|
|
606
|
+
provider_str = (
|
|
607
|
+
str(provider).strip().lower() if isinstance(provider, str) and provider.strip() else self._default_provider
|
|
608
|
+
)
|
|
609
|
+
model_str = str(model).strip() if isinstance(model, str) and model.strip() else self._default_model
|
|
610
|
+
|
|
611
|
+
client = self._get_client(provider_str, model_str)
|
|
612
|
+
return client.generate(
|
|
613
|
+
prompt=prompt,
|
|
614
|
+
messages=messages,
|
|
615
|
+
system_prompt=system_prompt,
|
|
616
|
+
tools=tools,
|
|
617
|
+
params=params,
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
def get_model_capabilities(self) -> Dict[str, Any]:
|
|
621
|
+
# Best-effort: use default model capabilities. Per-model limits can be added later.
|
|
622
|
+
return self._default_client.get_model_capabilities()
|
|
623
|
+
|
|
183
624
|
|
|
184
625
|
class HttpxRequestSender:
|
|
185
626
|
"""Default request sender based on httpx (sync)."""
|
|
@@ -196,10 +637,28 @@ class HttpxRequestSender:
|
|
|
196
637
|
headers: Dict[str, str],
|
|
197
638
|
json: Dict[str, Any],
|
|
198
639
|
timeout: float,
|
|
199
|
-
) ->
|
|
640
|
+
) -> HttpResponse:
|
|
200
641
|
resp = self._httpx.post(url, headers=headers, json=json, timeout=timeout)
|
|
201
642
|
resp.raise_for_status()
|
|
202
|
-
return resp.json()
|
|
643
|
+
return HttpResponse(body=resp.json(), headers=dict(resp.headers))
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def _unwrap_http_response(value: Any) -> Tuple[Dict[str, Any], Dict[str, str]]:
|
|
647
|
+
if isinstance(value, dict):
|
|
648
|
+
return value, {}
|
|
649
|
+
body = getattr(value, "body", None)
|
|
650
|
+
headers = getattr(value, "headers", None)
|
|
651
|
+
if isinstance(body, dict) and isinstance(headers, dict):
|
|
652
|
+
return body, headers
|
|
653
|
+
json_fn = getattr(value, "json", None)
|
|
654
|
+
hdrs = getattr(value, "headers", None)
|
|
655
|
+
if callable(json_fn) and hdrs is not None:
|
|
656
|
+
try:
|
|
657
|
+
payload = json_fn()
|
|
658
|
+
except Exception:
|
|
659
|
+
payload = {}
|
|
660
|
+
return payload if isinstance(payload, dict) else {"data": _jsonable(payload)}, dict(hdrs)
|
|
661
|
+
return {"data": _jsonable(value)}, {}
|
|
203
662
|
|
|
204
663
|
|
|
205
664
|
class RemoteAbstractCoreLLMClient:
|
|
@@ -210,13 +669,17 @@ class RemoteAbstractCoreLLMClient:
|
|
|
210
669
|
*,
|
|
211
670
|
server_base_url: str,
|
|
212
671
|
model: str,
|
|
213
|
-
|
|
672
|
+
# Runtime authority default: long-running workflow steps may legitimately take a long time.
|
|
673
|
+
# Keep this aligned with AbstractRuntime's orchestration defaults.
|
|
674
|
+
timeout_s: Optional[float] = None,
|
|
214
675
|
headers: Optional[Dict[str, str]] = None,
|
|
215
676
|
request_sender: Optional[RequestSender] = None,
|
|
216
677
|
):
|
|
678
|
+
from .constants import DEFAULT_LLM_TIMEOUT_S
|
|
679
|
+
|
|
217
680
|
self._server_base_url = server_base_url.rstrip("/")
|
|
218
681
|
self._model = model
|
|
219
|
-
self._timeout_s = timeout_s
|
|
682
|
+
self._timeout_s = float(timeout_s) if timeout_s is not None else DEFAULT_LLM_TIMEOUT_S
|
|
220
683
|
self._headers = dict(headers or {})
|
|
221
684
|
self._sender = request_sender or HttpxRequestSender()
|
|
222
685
|
|
|
@@ -230,6 +693,23 @@ class RemoteAbstractCoreLLMClient:
|
|
|
230
693
|
params: Optional[Dict[str, Any]] = None,
|
|
231
694
|
) -> Dict[str, Any]:
|
|
232
695
|
params = dict(params or {})
|
|
696
|
+
req_headers = dict(self._headers)
|
|
697
|
+
|
|
698
|
+
trace_metadata = params.pop("trace_metadata", None)
|
|
699
|
+
if isinstance(trace_metadata, dict) and trace_metadata:
|
|
700
|
+
req_headers["X-AbstractCore-Trace-Metadata"] = json.dumps(
|
|
701
|
+
trace_metadata, ensure_ascii=False, separators=(",", ":")
|
|
702
|
+
)
|
|
703
|
+
header_map = {
|
|
704
|
+
"actor_id": "X-AbstractCore-Actor-Id",
|
|
705
|
+
"session_id": "X-AbstractCore-Session-Id",
|
|
706
|
+
"run_id": "X-AbstractCore-Run-Id",
|
|
707
|
+
"parent_run_id": "X-AbstractCore-Parent-Run-Id",
|
|
708
|
+
}
|
|
709
|
+
for key, header in header_map.items():
|
|
710
|
+
val = trace_metadata.get(key)
|
|
711
|
+
if val is not None and header not in req_headers:
|
|
712
|
+
req_headers[header] = str(val)
|
|
233
713
|
|
|
234
714
|
# Build OpenAI-like messages for AbstractCore server.
|
|
235
715
|
out_messages: List[Dict[str, str]] = []
|
|
@@ -245,6 +725,9 @@ class RemoteAbstractCoreLLMClient:
|
|
|
245
725
|
"model": self._model,
|
|
246
726
|
"messages": out_messages,
|
|
247
727
|
"stream": False,
|
|
728
|
+
# Orchestrator policy: ask AbstractCore server to use the same timeout it expects.
|
|
729
|
+
# This keeps runtime authority even when the actual provider call happens server-side.
|
|
730
|
+
"timeout_s": self._timeout_s,
|
|
248
731
|
}
|
|
249
732
|
|
|
250
733
|
# Dynamic routing support (AbstractCore server feature).
|
|
@@ -268,20 +751,35 @@ class RemoteAbstractCoreLLMClient:
|
|
|
268
751
|
body["tools"] = tools
|
|
269
752
|
|
|
270
753
|
url = f"{self._server_base_url}/v1/chat/completions"
|
|
271
|
-
|
|
754
|
+
raw = self._sender.post(url, headers=req_headers, json=body, timeout=self._timeout_s)
|
|
755
|
+
resp, resp_headers = _unwrap_http_response(raw)
|
|
756
|
+
lower_headers = {str(k).lower(): str(v) for k, v in resp_headers.items()}
|
|
757
|
+
trace_id = lower_headers.get("x-abstractcore-trace-id") or lower_headers.get("x-trace-id")
|
|
272
758
|
|
|
273
759
|
# Normalize OpenAI-like response.
|
|
274
760
|
try:
|
|
275
761
|
choice0 = (resp.get("choices") or [])[0]
|
|
276
762
|
msg = choice0.get("message") or {}
|
|
277
|
-
|
|
763
|
+
meta: Dict[str, Any] = {
|
|
764
|
+
"_provider_request": {"url": url, "payload": body}
|
|
765
|
+
}
|
|
766
|
+
if trace_id:
|
|
767
|
+
meta["trace_id"] = trace_id
|
|
768
|
+
result = {
|
|
278
769
|
"content": msg.get("content"),
|
|
770
|
+
"reasoning": msg.get("reasoning"),
|
|
279
771
|
"data": None,
|
|
772
|
+
"raw_response": _jsonable(resp) if resp is not None else None,
|
|
280
773
|
"tool_calls": _jsonable(msg.get("tool_calls")) if msg.get("tool_calls") is not None else None,
|
|
281
774
|
"usage": _jsonable(resp.get("usage")) if resp.get("usage") is not None else None,
|
|
282
775
|
"model": resp.get("model"),
|
|
283
776
|
"finish_reason": choice0.get("finish_reason"),
|
|
777
|
+
"metadata": meta,
|
|
778
|
+
"trace_id": trace_id,
|
|
284
779
|
}
|
|
780
|
+
result["tool_calls"] = _normalize_tool_calls(result.get("tool_calls"))
|
|
781
|
+
|
|
782
|
+
return result
|
|
285
783
|
except Exception:
|
|
286
784
|
# Fallback: return the raw response in JSON-safe form.
|
|
287
785
|
logger.warning("Remote LLM response normalization failed; returning raw JSON")
|
|
@@ -292,5 +790,12 @@ class RemoteAbstractCoreLLMClient:
|
|
|
292
790
|
"usage": None,
|
|
293
791
|
"model": resp.get("model") if isinstance(resp, dict) else None,
|
|
294
792
|
"finish_reason": None,
|
|
793
|
+
"metadata": {
|
|
794
|
+
"_provider_request": {"url": url, "payload": body},
|
|
795
|
+
"trace_id": trace_id,
|
|
796
|
+
}
|
|
797
|
+
if trace_id
|
|
798
|
+
else {"_provider_request": {"url": url, "payload": body}},
|
|
799
|
+
"trace_id": trace_id,
|
|
800
|
+
"raw_response": _jsonable(resp) if resp is not None else None,
|
|
295
801
|
}
|
|
296
|
-
|