AbstractRuntime 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. abstractruntime/__init__.py +76 -1
  2. abstractruntime/core/config.py +68 -1
  3. abstractruntime/core/models.py +5 -0
  4. abstractruntime/core/policy.py +74 -3
  5. abstractruntime/core/runtime.py +1002 -126
  6. abstractruntime/core/vars.py +8 -2
  7. abstractruntime/evidence/recorder.py +1 -1
  8. abstractruntime/history_bundle.py +772 -0
  9. abstractruntime/integrations/abstractcore/__init__.py +3 -0
  10. abstractruntime/integrations/abstractcore/default_tools.py +127 -3
  11. abstractruntime/integrations/abstractcore/effect_handlers.py +2440 -99
  12. abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
  13. abstractruntime/integrations/abstractcore/factory.py +68 -20
  14. abstractruntime/integrations/abstractcore/llm_client.py +447 -15
  15. abstractruntime/integrations/abstractcore/mcp_worker.py +1 -0
  16. abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
  17. abstractruntime/integrations/abstractcore/tool_executor.py +31 -10
  18. abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
  19. abstractruntime/integrations/abstractmemory/__init__.py +3 -0
  20. abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
  21. abstractruntime/memory/active_context.py +6 -1
  22. abstractruntime/memory/kg_packets.py +164 -0
  23. abstractruntime/memory/memact_composer.py +175 -0
  24. abstractruntime/memory/recall_levels.py +163 -0
  25. abstractruntime/memory/token_budget.py +86 -0
  26. abstractruntime/storage/__init__.py +4 -1
  27. abstractruntime/storage/artifacts.py +158 -30
  28. abstractruntime/storage/base.py +17 -1
  29. abstractruntime/storage/commands.py +339 -0
  30. abstractruntime/storage/in_memory.py +41 -1
  31. abstractruntime/storage/json_files.py +195 -12
  32. abstractruntime/storage/observable.py +38 -1
  33. abstractruntime/storage/offloading.py +433 -0
  34. abstractruntime/storage/sqlite.py +836 -0
  35. abstractruntime/visualflow_compiler/__init__.py +29 -0
  36. abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
  37. abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
  38. abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
  39. abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
  40. abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
  41. abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
  42. abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
  43. abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
  44. abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
  45. abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
  46. abstractruntime/visualflow_compiler/compiler.py +3832 -0
  47. abstractruntime/visualflow_compiler/flow.py +247 -0
  48. abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
  49. abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
  50. abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
  51. abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
  52. abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
  53. abstractruntime/visualflow_compiler/visual/models.py +211 -0
  54. abstractruntime/workflow_bundle/__init__.py +52 -0
  55. abstractruntime/workflow_bundle/models.py +236 -0
  56. abstractruntime/workflow_bundle/packer.py +317 -0
  57. abstractruntime/workflow_bundle/reader.py +87 -0
  58. abstractruntime/workflow_bundle/registry.py +587 -0
  59. abstractruntime-0.4.1.dist-info/METADATA +177 -0
  60. abstractruntime-0.4.1.dist-info/RECORD +86 -0
  61. abstractruntime-0.4.0.dist-info/METADATA +0 -167
  62. abstractruntime-0.4.0.dist-info/RECORD +0 -49
  63. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
  64. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/entry_points.txt +0 -0
  65. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,772 @@
1
+ """abstractruntime.history_bundle
2
+
3
+ Runtime-owned, versioned run history export (RunHistoryBundle).
4
+
5
+ Design goals:
6
+ - Client-agnostic: any host UI can render from the same durable contract.
7
+ - Reproducible: include a workflow snapshot reference (ArtifactStore-backed).
8
+ - JSON-safe: keep payloads serializable; offload oversized leaves to ArtifactStore when possible.
9
+
10
+ This module is intentionally dependency-light (stdlib + abstractruntime stores/models).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import hashlib
16
+ import json
17
+ from datetime import datetime, timezone
18
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
19
+
20
+ from .core.models import RunState
21
+ from .storage.artifacts import ArtifactStore
22
+ from .storage.offloading import DEFAULT_MAX_INLINE_BYTES, offload_large_values
23
+
24
+ RUN_HISTORY_BUNDLE_VERSION_V1 = 1
25
+
26
+
27
+ def _utc_now_iso() -> str:
28
+ return datetime.now(timezone.utc).isoformat()
29
+
30
+
31
+ def _enum_str(raw: Any) -> str:
32
+ if raw is None:
33
+ return ""
34
+ # Enum subclasses of `str` (e.g. StepStatus) should be treated as their underlying value.
35
+ if isinstance(raw, str):
36
+ return raw
37
+ v = getattr(raw, "value", None)
38
+ if isinstance(v, str):
39
+ return v
40
+ return str(raw)
41
+
42
+
43
+ def _json_dumps_canonical(value: Any) -> bytes:
44
+ return json.dumps(value, ensure_ascii=False, separators=(",", ":"), sort_keys=True).encode("utf-8")
45
+
46
+
47
+ def _sha256_hex(content: bytes) -> str:
48
+ return hashlib.sha256(content).hexdigest()
49
+
50
+
51
+ def _extract_user_prompt_from_input(raw: Any) -> str:
52
+ if not isinstance(raw, dict):
53
+ return ""
54
+ input_data = raw.get("input_data") if isinstance(raw.get("input_data"), dict) else raw
55
+
56
+ candidates = [
57
+ input_data.get("prompt"),
58
+ input_data.get("message"),
59
+ input_data.get("task"),
60
+ ]
61
+ ctx = input_data.get("context") if isinstance(input_data.get("context"), dict) else None
62
+ if isinstance(ctx, dict):
63
+ candidates.extend([ctx.get("task"), ctx.get("message")])
64
+
65
+ for c in candidates:
66
+ if isinstance(c, str) and c.strip():
67
+ return c.strip()
68
+
69
+ msgs = ctx.get("messages") if isinstance(ctx, dict) else None
70
+ if isinstance(msgs, list):
71
+ for m in msgs:
72
+ if not isinstance(m, dict):
73
+ continue
74
+ role = str(m.get("role") or "").strip()
75
+ if role != "user":
76
+ continue
77
+ content = m.get("content")
78
+ if isinstance(content, str) and content.strip():
79
+ return content.strip()
80
+ return ""
81
+
82
+
83
+ def _extract_context_attachments_from_input(raw: Any) -> List[Dict[str, Any]]:
84
+ if not isinstance(raw, dict):
85
+ return []
86
+ input_data = raw.get("input_data") if isinstance(raw.get("input_data"), dict) else raw
87
+ ctx = input_data.get("context") if isinstance(input_data.get("context"), dict) else None
88
+ atts = ctx.get("attachments") if isinstance(ctx, dict) else None
89
+ if not isinstance(atts, list):
90
+ return []
91
+ out: List[Dict[str, Any]] = []
92
+ for a in atts:
93
+ if isinstance(a, dict):
94
+ out.append(dict(a))
95
+ return out
96
+
97
+
98
+ def _parse_iso_ms(raw: Any) -> Optional[int]:
99
+ s = str(raw or "").strip()
100
+ if not s:
101
+ return None
102
+ if s.endswith("Z"):
103
+ s = f"{s[:-1]}+00:00"
104
+ try:
105
+ return int(datetime.fromisoformat(s).timestamp() * 1000)
106
+ except Exception:
107
+ return None
108
+
109
+
110
+ def _parse_usage_summary(value: Any) -> Optional[Dict[str, int]]:
111
+ if not isinstance(value, dict):
112
+ return None
113
+ v = value
114
+ in_tok = v.get("input_tokens")
115
+ if in_tok is None:
116
+ in_tok = v.get("prompt_tokens")
117
+ if in_tok is None:
118
+ in_tok = v.get("prompt")
119
+ if in_tok is None:
120
+ in_tok = v.get("input")
121
+ if in_tok is None:
122
+ in_tok = v.get("in")
123
+
124
+ out_tok = v.get("output_tokens")
125
+ if out_tok is None:
126
+ out_tok = v.get("completion_tokens")
127
+ if out_tok is None:
128
+ out_tok = v.get("completion")
129
+ if out_tok is None:
130
+ out_tok = v.get("output")
131
+ if out_tok is None:
132
+ out_tok = v.get("out")
133
+
134
+ total_tok = v.get("total_tokens")
135
+ if total_tok is None:
136
+ total_tok = v.get("total")
137
+
138
+ try:
139
+ in_i = int(in_tok) if in_tok is not None and not isinstance(in_tok, bool) else 0
140
+ except Exception:
141
+ in_i = 0
142
+ try:
143
+ out_i = int(out_tok) if out_tok is not None and not isinstance(out_tok, bool) else 0
144
+ except Exception:
145
+ out_i = 0
146
+ try:
147
+ total_i = int(total_tok) if total_tok is not None and not isinstance(total_tok, bool) else in_i + out_i
148
+ except Exception:
149
+ total_i = in_i + out_i
150
+
151
+ if in_i <= 0 and out_i <= 0 and total_i <= 0:
152
+ return None
153
+ return {
154
+ "input_tokens": max(0, int(in_i)),
155
+ "output_tokens": max(0, int(out_i)),
156
+ "total_tokens": max(0, int(total_i)),
157
+ }
158
+
159
+
160
+ def _extract_usage_from_ledger_record(rec: Dict[str, Any]) -> Optional[Dict[str, int]]:
161
+ result = rec.get("result")
162
+ if not isinstance(result, dict):
163
+ return None
164
+ usage = result.get("usage") or result.get("token_usage") or result.get("tokens")
165
+ if not isinstance(usage, dict):
166
+ output = result.get("output")
167
+ if isinstance(output, dict):
168
+ usage = output.get("usage") or output.get("token_usage") or output.get("tokens")
169
+ if not isinstance(usage, dict):
170
+ return None
171
+ return _parse_usage_summary(usage)
172
+
173
+
174
+ def _extract_repl_stats_from_ledger(records: Iterable[Dict[str, Any]]) -> Dict[str, Any]:
175
+ llm_calls = 0
176
+ tool_calls = 0
177
+ usage_sum = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
178
+ min_ms: Optional[int] = None
179
+ max_ms: Optional[int] = None
180
+
181
+ for rec in records or []:
182
+ if not isinstance(rec, dict):
183
+ continue
184
+ st = _enum_str(rec.get("status")).strip()
185
+
186
+ ms_start = _parse_iso_ms(rec.get("started_at"))
187
+ ms_end = _parse_iso_ms(rec.get("ended_at"))
188
+ ms = ms_end if ms_end is not None else ms_start
189
+ if ms is not None:
190
+ min_ms = ms if min_ms is None else min(min_ms, ms)
191
+ max_ms = ms if max_ms is None else max(max_ms, ms)
192
+
193
+ eff = rec.get("effect") if isinstance(rec.get("effect"), dict) else None
194
+ eff_type = str((eff or {}).get("type") or "").strip()
195
+ if eff_type == "llm_call" and st == "completed":
196
+ llm_calls += 1
197
+ usage = _extract_usage_from_ledger_record(rec)
198
+ if usage:
199
+ usage_sum["input_tokens"] += int(usage.get("input_tokens") or 0)
200
+ usage_sum["output_tokens"] += int(usage.get("output_tokens") or 0)
201
+ usage_sum["total_tokens"] += int(usage.get("total_tokens") or 0)
202
+
203
+ if eff_type == "tool_calls" and st == "completed":
204
+ payload = eff.get("payload") if isinstance(eff, dict) and isinstance(eff.get("payload"), dict) else None
205
+ calls = payload.get("tool_calls") if isinstance(payload, dict) else None
206
+ if isinstance(calls, list):
207
+ tool_calls += len([c for c in calls if isinstance(c, dict) or c is not None])
208
+
209
+ duration_ms = int(max(0, (max_ms - min_ms))) if min_ms is not None and max_ms is not None else 0
210
+ tok_s: Optional[float] = None
211
+ if duration_ms > 0 and usage_sum.get("total_tokens", 0) > 0:
212
+ tok_s = float(usage_sum["total_tokens"]) / (float(duration_ms) / 1000.0)
213
+ return {
214
+ "duration_ms": duration_ms,
215
+ "llm_calls": int(llm_calls),
216
+ "tool_calls": int(tool_calls),
217
+ "usage": usage_sum,
218
+ "tok_s": tok_s,
219
+ }
220
+
221
+
222
+ def _extract_flow_end_output_from_ledger(records: List[Dict[str, Any]]) -> Tuple[str, Optional[Dict[str, Any]]]:
223
+ """Best-effort extract the final assistant response from ledger records.
224
+
225
+ Mirrors AbstractCode Web's `extract_flow_end_output` heuristics.
226
+ Returns (response_text, meta_obj_or_none).
227
+ """
228
+
229
+ def _pick_textish(v: Any) -> str:
230
+ if isinstance(v, str):
231
+ return v.strip()
232
+ if v is None:
233
+ return ""
234
+ if isinstance(v, (int, float, bool)):
235
+ return str(v)
236
+ return ""
237
+
238
+ for rec in reversed(records or []):
239
+ if not isinstance(rec, dict):
240
+ continue
241
+
242
+ status = _enum_str(rec.get("status")).strip()
243
+ eff = rec.get("effect") if isinstance(rec.get("effect"), dict) else None
244
+ eff_type = str((eff or {}).get("type") or "").strip()
245
+
246
+ # answer_user: common in chat-like flows.
247
+ if status == "completed" and eff_type == "answer_user":
248
+ res = rec.get("result") if isinstance(rec.get("result"), dict) else {}
249
+ msg = res.get("message")
250
+ if msg is None and isinstance(eff, dict):
251
+ payload = eff.get("payload") if isinstance(eff.get("payload"), dict) else {}
252
+ msg = payload.get("message") or payload.get("text") or payload.get("content")
253
+ text = _pick_textish(msg)
254
+ if text:
255
+ return (text, None)
256
+
257
+ # output node: record.result.output.{answer/response/message/...}
258
+ result = rec.get("result") if isinstance(rec.get("result"), dict) else None
259
+ out0 = result.get("output") if isinstance(result, dict) else None
260
+ if isinstance(out0, str):
261
+ s = out0.strip()
262
+ if s:
263
+ return (s, None)
264
+ if isinstance(out0, dict):
265
+ msg = (
266
+ _pick_textish(out0.get("answer"))
267
+ or _pick_textish(out0.get("response"))
268
+ or _pick_textish(out0.get("message"))
269
+ or _pick_textish(out0.get("text"))
270
+ or _pick_textish(out0.get("content"))
271
+ )
272
+ if msg:
273
+ meta = out0.get("meta") if isinstance(out0.get("meta"), dict) else None
274
+ return (msg, dict(meta) if isinstance(meta, dict) else None)
275
+
276
+ # Terminal resume completion record (runtime may append an output envelope).
277
+ if status == "completed" and isinstance(result, dict):
278
+ out_res = result.get("output") if isinstance(result.get("output"), dict) else None
279
+ if isinstance(out_res, dict):
280
+ msg = (
281
+ _pick_textish(out_res.get("answer"))
282
+ or _pick_textish(out_res.get("response"))
283
+ or _pick_textish(out_res.get("message"))
284
+ or _pick_textish(out_res.get("text"))
285
+ or _pick_textish(out_res.get("content"))
286
+ )
287
+ if msg:
288
+ return (msg, None)
289
+
290
+ return ("", None)
291
+
292
+
293
+ def persist_workflow_snapshot(
294
+ *,
295
+ run_store: Any,
296
+ artifact_store: ArtifactStore,
297
+ run_id: str,
298
+ workflow_id: str,
299
+ snapshot: Dict[str, Any],
300
+ format: str,
301
+ ) -> Dict[str, Any]:
302
+ """Persist a workflow snapshot for a run and store a small ref in run.vars.
303
+
304
+ Returns the stored ref dict (JSON-safe), which is also written to:
305
+ run.vars["_runtime"]["workflow_snapshot"].
306
+ """
307
+
308
+ rid = str(run_id or "").strip()
309
+ wid = str(workflow_id or "").strip()
310
+ fmt = str(format or "").strip() or "unknown"
311
+ if not rid:
312
+ raise ValueError("run_id is required")
313
+ if not wid:
314
+ raise ValueError("workflow_id is required")
315
+ if not isinstance(snapshot, dict):
316
+ raise ValueError("snapshot must be a dict")
317
+
318
+ run: Optional[RunState]
319
+ try:
320
+ run = run_store.load(rid)
321
+ except Exception as e:
322
+ raise RuntimeError(f"Failed to load run '{rid}': {e}") from e
323
+ if run is None:
324
+ raise KeyError(f"Run '{rid}' not found")
325
+
326
+ # Idempotency: if a snapshot ref already exists, keep it.
327
+ vars_obj = getattr(run, "vars", None)
328
+ if not isinstance(vars_obj, dict):
329
+ vars_obj = {}
330
+ run.vars = vars_obj # type: ignore[assignment]
331
+
332
+ runtime_ns = vars_obj.get("_runtime")
333
+ if not isinstance(runtime_ns, dict):
334
+ runtime_ns = {}
335
+ vars_obj["_runtime"] = runtime_ns
336
+
337
+ existing = runtime_ns.get("workflow_snapshot")
338
+ if isinstance(existing, dict) and str(existing.get("artifact_id") or "").strip():
339
+ return dict(existing)
340
+
341
+ content = _json_dumps_canonical(snapshot)
342
+ sha = _sha256_hex(content)
343
+
344
+ tags = {
345
+ "kind": "workflow_snapshot",
346
+ "workflow_id": wid,
347
+ "format": fmt,
348
+ "sha256": sha,
349
+ }
350
+ meta = artifact_store.store_json(snapshot, run_id=rid, tags=tags)
351
+ artifact_id = str(getattr(meta, "artifact_id", "") or "").strip()
352
+ if not artifact_id:
353
+ raise RuntimeError("ArtifactStore returned empty artifact_id for workflow snapshot")
354
+
355
+ ref: Dict[str, Any] = {
356
+ "workflow_id": wid,
357
+ "format": fmt,
358
+ "sha256": sha,
359
+ "artifact_id": artifact_id,
360
+ "created_at": _utc_now_iso(),
361
+ }
362
+ runtime_ns["workflow_snapshot"] = ref
363
+ try:
364
+ run_store.save(run)
365
+ except Exception as e:
366
+ raise RuntimeError(f"Failed to persist workflow snapshot ref to run '{rid}': {e}") from e
367
+ return dict(ref)
368
+
369
+
370
+ def _list_descendant_run_ids(*, run_store: Any, root_run_id: str, limit: int = 5000) -> List[str]:
371
+ """Return descendant run ids (BFS) when the RunStore supports list_children()."""
372
+ out: List[str] = []
373
+ list_children = getattr(run_store, "list_children", None)
374
+ if not callable(list_children):
375
+ return out
376
+ queue: List[str] = [root_run_id]
377
+ seen: set[str] = set()
378
+ while queue and len(out) < limit:
379
+ cur = str(queue.pop(0) or "").strip()
380
+ if not cur or cur in seen:
381
+ continue
382
+ seen.add(cur)
383
+ try:
384
+ kids = list_children(parent_run_id=cur) or []
385
+ except Exception:
386
+ kids = []
387
+ for c in kids:
388
+ cid = getattr(c, "run_id", None)
389
+ cid2 = str(cid or "").strip()
390
+ if not cid2 or cid2 in seen:
391
+ continue
392
+ out.append(cid2)
393
+ queue.append(cid2)
394
+ return out
395
+
396
+
397
+ def _best_effort_session_turns(
398
+ *,
399
+ run_store: Any,
400
+ ledger_store: Any,
401
+ session_id: str,
402
+ limit: int,
403
+ ) -> List[Dict[str, Any]]:
404
+ """Best-effort session turn list (root runs only).
405
+
406
+ This is a pragmatic bridge for thin clients (AbstractCode Web/mobile) until a more
407
+ explicit session history contract exists.
408
+ """
409
+
410
+ def _classify_turn(*, workflow_id: str, vars_obj: Any) -> str:
411
+ wid = str(workflow_id or "")
412
+ if wid.startswith("__"):
413
+ return "internal"
414
+ if wid.startswith("scheduled:"):
415
+ return "scheduled"
416
+ if isinstance(vars_obj, dict):
417
+ meta = vars_obj.get("_meta")
418
+ if isinstance(meta, dict) and isinstance(meta.get("schedule"), dict):
419
+ return "scheduled"
420
+ ctx0 = vars_obj.get("context")
421
+ if isinstance(ctx0, dict) and isinstance(ctx0.get("messages"), list):
422
+ return "chat"
423
+ return "run"
424
+
425
+ def _extract_answer_from_run_output(run: Any) -> Tuple[str, Optional[Dict[str, Any]]]:
426
+ out = getattr(run, "output", None)
427
+ if not isinstance(out, dict):
428
+ return ("", None)
429
+
430
+ # Common envelopes used by AbstractCode agent flows.
431
+ candidates = [
432
+ out.get("response"),
433
+ out.get("answer"),
434
+ out.get("message"),
435
+ out.get("text"),
436
+ out.get("content"),
437
+ ]
438
+ # Nested results are sometimes stored under `result` or `output`.
439
+ nested = out.get("result") if isinstance(out.get("result"), dict) else None
440
+ if nested is None:
441
+ nested = out.get("output") if isinstance(out.get("output"), dict) else None
442
+ if isinstance(nested, dict):
443
+ candidates.extend(
444
+ [
445
+ nested.get("response"),
446
+ nested.get("answer"),
447
+ nested.get("message"),
448
+ nested.get("text"),
449
+ nested.get("content"),
450
+ ]
451
+ )
452
+
453
+ answer = ""
454
+ for c in candidates:
455
+ if isinstance(c, str) and c.strip():
456
+ answer = c.strip()
457
+ break
458
+
459
+ meta0 = out.get("meta") if isinstance(out.get("meta"), dict) else None
460
+ if meta0 is None and isinstance(nested, dict):
461
+ meta0 = nested.get("meta") if isinstance(nested.get("meta"), dict) else None
462
+ meta = dict(meta0) if isinstance(meta0, dict) else None
463
+ return (answer, meta)
464
+
465
+ sid = str(session_id or "").strip()
466
+ if not sid:
467
+ return []
468
+ list_runs = getattr(run_store, "list_runs", None)
469
+ if not callable(list_runs):
470
+ return []
471
+
472
+ # RunStore has no session_id index today; we scan a bounded window and filter.
473
+ try:
474
+ candidates = list_runs(limit=max(1000, int(limit) * 5))
475
+ except Exception:
476
+ candidates = []
477
+
478
+ roots: List[RunState] = []
479
+ for r in candidates or []:
480
+ try:
481
+ rid = str(getattr(r, "run_id", "") or "").strip()
482
+ if not rid:
483
+ continue
484
+ if str(getattr(r, "session_id", "") or "").strip() != sid:
485
+ continue
486
+ if str(getattr(r, "parent_run_id", "") or "").strip():
487
+ continue
488
+ vars_obj = getattr(r, "vars", None)
489
+ wid = str(getattr(r, "workflow_id", "") or "")
490
+ kind = _classify_turn(workflow_id=wid, vars_obj=vars_obj)
491
+ if kind == "internal":
492
+ continue
493
+ roots.append(r)
494
+ except Exception:
495
+ continue
496
+
497
+ # Prefer chat-like turns when present (avoid scheduled wrapper runs polluting chat replay).
498
+ if roots:
499
+ chat_roots: List[RunState] = []
500
+ for r in roots:
501
+ wid = str(getattr(r, "workflow_id", "") or "")
502
+ vars_obj = getattr(r, "vars", None)
503
+ if _classify_turn(workflow_id=wid, vars_obj=vars_obj) == "chat":
504
+ chat_roots.append(r)
505
+ if chat_roots:
506
+ roots = chat_roots
507
+
508
+ def _ts_key(r: Any) -> float:
509
+ for k in ("created_at", "updated_at"):
510
+ try:
511
+ v = getattr(r, k, None)
512
+ if isinstance(v, str) and v.strip():
513
+ return float(datetime.fromisoformat(v).timestamp())
514
+ except Exception:
515
+ continue
516
+ return 0.0
517
+
518
+ roots.sort(key=_ts_key)
519
+ roots = roots[-int(limit) :] if limit > 0 else roots
520
+
521
+ ledger_cache: Dict[str, List[Dict[str, Any]]] = {}
522
+
523
+ def _ledger_for(run_id: str) -> List[Dict[str, Any]]:
524
+ rid2 = str(run_id or "").strip()
525
+ if not rid2:
526
+ return []
527
+ cached = ledger_cache.get(rid2)
528
+ if isinstance(cached, list):
529
+ return cached
530
+ try:
531
+ raw = ledger_store.list(rid2) if hasattr(ledger_store, "list") else []
532
+ except Exception:
533
+ raw = []
534
+ records = [x for x in raw if isinstance(x, dict)] if isinstance(raw, list) else []
535
+ ledger_cache[rid2] = records
536
+ return records
537
+
538
+ out: List[Dict[str, Any]] = []
539
+ for r in roots:
540
+ rid = str(getattr(r, "run_id", "") or "").strip()
541
+ vars_obj = getattr(r, "vars", None)
542
+ input_data = dict(vars_obj) if isinstance(vars_obj, dict) else {}
543
+ wid = str(getattr(r, "workflow_id", "") or "").strip()
544
+ kind = _classify_turn(workflow_id=wid, vars_obj=vars_obj)
545
+ prompt = _extract_user_prompt_from_input(input_data)
546
+ attachments = _extract_context_attachments_from_input(input_data)
547
+ status = getattr(getattr(r, "status", None), "value", None) or str(getattr(r, "status", "") or "")
548
+ created_at = str(getattr(r, "created_at", "") or "").strip() or None
549
+ updated_at = str(getattr(r, "updated_at", "") or "").strip() or None
550
+
551
+ answer, answer_meta = _extract_answer_from_run_output(r)
552
+ if not answer:
553
+ try:
554
+ ledger = _ledger_for(rid)
555
+ if ledger:
556
+ answer, answer_meta = _extract_flow_end_output_from_ledger(ledger)
557
+ except Exception:
558
+ answer = ""
559
+ answer_meta = None
560
+
561
+ stats: Optional[Dict[str, Any]] = None
562
+ try:
563
+ run_ids = [rid]
564
+ run_ids.extend(_list_descendant_run_ids(run_store=run_store, root_run_id=rid))
565
+ all_records: List[Dict[str, Any]] = []
566
+ for rid2 in run_ids:
567
+ all_records.extend(_ledger_for(rid2))
568
+ stats = _extract_repl_stats_from_ledger(all_records)
569
+ except Exception:
570
+ stats = None
571
+
572
+ out.append(
573
+ {
574
+ "run_id": rid,
575
+ "workflow_id": wid or None,
576
+ "kind": kind,
577
+ "status": str(status),
578
+ "created_at": created_at,
579
+ "updated_at": updated_at,
580
+ "prompt": prompt or None,
581
+ "attachments": attachments,
582
+ "answer": answer or None,
583
+ "answer_meta": answer_meta,
584
+ "stats": stats,
585
+ }
586
+ )
587
+ return out
588
+
589
+
590
+ def export_run_history_bundle(
591
+ *,
592
+ run_id: str,
593
+ run_store: Any,
594
+ ledger_store: Any,
595
+ artifact_store: Optional[ArtifactStore] = None,
596
+ include_subruns: bool = True,
597
+ include_session: bool = False,
598
+ session_turn_limit: int = 200,
599
+ ledger_mode: str = "tail", # "tail" | "full"
600
+ ledger_max_items: int = 2000,
601
+ ) -> Dict[str, Any]:
602
+ """Export a versioned RunHistoryBundle dict (v1).
603
+
604
+ Notes:
605
+ - This function is pure export (no network); gateway hosts should expose it as an endpoint.
606
+ - Payload is JSON-safe; when ArtifactStore is available, very large leaves are offloaded.
607
+ """
608
+
609
+ rid = str(run_id or "").strip()
610
+ if not rid:
611
+ raise ValueError("run_id is required")
612
+
613
+ run: Optional[RunState]
614
+ try:
615
+ run = run_store.load(rid)
616
+ except Exception as e:
617
+ raise RuntimeError(f"Failed to load run '{rid}': {e}") from e
618
+ if run is None:
619
+ raise KeyError(f"Run '{rid}' not found")
620
+
621
+ # Collect run tree ids (root + descendants).
622
+ run_ids: List[str] = [rid]
623
+ if include_subruns:
624
+ run_ids.extend(_list_descendant_run_ids(run_store=run_store, root_run_id=rid))
625
+
626
+ # Snapshot ref (best-effort, stored under run.vars._runtime.workflow_snapshot).
627
+ snapshot_ref = None
628
+ try:
629
+ vars_obj = getattr(run, "vars", None)
630
+ runtime_ns = vars_obj.get("_runtime") if isinstance(vars_obj, dict) else None
631
+ ws = runtime_ns.get("workflow_snapshot") if isinstance(runtime_ns, dict) else None
632
+ if isinstance(ws, dict) and str(ws.get("artifact_id") or "").strip():
633
+ snapshot_ref = dict(ws)
634
+ except Exception:
635
+ snapshot_ref = None
636
+
637
+ ledgers: Dict[str, Any] = {}
638
+ timeline: List[Dict[str, Any]] = []
639
+
640
+ def _append_timeline_items(*, run_id2: str, items_with_cursor: List[Dict[str, Any]]) -> None:
641
+ for it in items_with_cursor:
642
+ cursor = it.get("cursor")
643
+ rec = it.get("record")
644
+ if not isinstance(rec, dict):
645
+ continue
646
+ node_id = str(rec.get("node_id") or "").strip() or None
647
+ status = _enum_str(rec.get("status")).strip() or None
648
+ eff = rec.get("effect") if isinstance(rec.get("effect"), dict) else None
649
+ eff_type = str((eff or {}).get("type") or "").strip() or None
650
+ started_at = rec.get("started_at")
651
+ ended_at = rec.get("ended_at") or rec.get("started_at")
652
+ duration_ms = None
653
+ try:
654
+ if started_at and ended_at:
655
+ s = datetime.fromisoformat(str(started_at))
656
+ e = datetime.fromisoformat(str(ended_at))
657
+ duration_ms = max(0.0, (e - s).total_seconds() * 1000.0)
658
+ except Exception:
659
+ duration_ms = None
660
+ timeline.append(
661
+ {
662
+ "run_id": run_id2,
663
+ "cursor": cursor,
664
+ "node_id": node_id,
665
+ "status": status,
666
+ "effect_type": eff_type,
667
+ "started_at": started_at,
668
+ "ended_at": ended_at,
669
+ "duration_ms": round(float(duration_ms), 2) if isinstance(duration_ms, (int, float)) else None,
670
+ }
671
+ )
672
+
673
+ for rid2 in run_ids:
674
+ try:
675
+ raw = ledger_store.list(rid2) if hasattr(ledger_store, "list") else []
676
+ except Exception:
677
+ raw = []
678
+ records = [r for r in raw if isinstance(r, dict)] if isinstance(raw, list) else []
679
+ total = len(records)
680
+
681
+ mode = str(ledger_mode or "tail").strip().lower()
682
+ max_items_raw = int(ledger_max_items)
683
+ if max_items_raw <= 0 and mode == "tail":
684
+ mode = "full"
685
+ max_items = max_items_raw if max_items_raw > 0 else 2000
686
+
687
+ if mode != "full":
688
+ # Tail mode by default (bounded, good for UI). Keep absolute cursor indices.
689
+ if total > max_items:
690
+ start_idx = total - max_items
691
+ window = records[start_idx:]
692
+ cursor_start = start_idx + 1
693
+ else:
694
+ window = records
695
+ cursor_start = 1
696
+ else:
697
+ window = records
698
+ cursor_start = 1
699
+
700
+ items_with_cursor: List[Dict[str, Any]] = []
701
+ for i, rec in enumerate(window):
702
+ items_with_cursor.append({"cursor": cursor_start + i, "record": rec})
703
+
704
+ ledgers[rid2] = {
705
+ "run_id": rid2,
706
+ "total": int(total),
707
+ "cursor_start": int(cursor_start),
708
+ "cursor_end": int(cursor_start + len(window) - 1) if window else int(cursor_start - 1),
709
+ "items": items_with_cursor,
710
+ }
711
+ _append_timeline_items(run_id2=rid2, items_with_cursor=items_with_cursor)
712
+
713
+ # Session section (best-effort, bounded).
714
+ session_section = None
715
+ if include_session:
716
+ sid = str(getattr(run, "session_id", "") or "").strip()
717
+ if sid:
718
+ session_section = {
719
+ "session_id": sid,
720
+ "turns": _best_effort_session_turns(
721
+ run_store=run_store,
722
+ ledger_store=ledger_store,
723
+ session_id=sid,
724
+ limit=max(1, int(session_turn_limit) if int(session_turn_limit) > 0 else 200),
725
+ ),
726
+ }
727
+
728
+ # Filtered input_data (exclude private namespaces). This mirrors gateway's behavior but is runtime-owned.
729
+ vars_obj = getattr(run, "vars", None)
730
+ input_data = dict(vars_obj) if isinstance(vars_obj, dict) else {}
731
+ filtered_input_data = {k: v for k, v in input_data.items() if isinstance(k, str) and not k.startswith("_")}
732
+
733
+ # Offload oversized leaves when possible (keeps HTTP payload bounded).
734
+ if artifact_store is not None:
735
+ try:
736
+ filtered_input_data = offload_large_values(
737
+ filtered_input_data,
738
+ artifact_store=artifact_store,
739
+ run_id=rid,
740
+ max_inline_bytes=DEFAULT_MAX_INLINE_BYTES,
741
+ base_tags={"source": "history_bundle", "kind": "input_data"},
742
+ root_path="input_data",
743
+ allow_root_replace=False,
744
+ )
745
+ except Exception:
746
+ pass
747
+
748
+ # Final bundle.
749
+ bundle: Dict[str, Any] = {
750
+ "version": RUN_HISTORY_BUNDLE_VERSION_V1,
751
+ "generated_at": _utc_now_iso(),
752
+ "root_run_id": rid,
753
+ "run": {
754
+ "run_id": str(getattr(run, "run_id", "") or ""),
755
+ "workflow_id": str(getattr(run, "workflow_id", "") or ""),
756
+ "status": getattr(getattr(run, "status", None), "value", None) or str(getattr(run, "status", "") or ""),
757
+ "current_node": str(getattr(run, "current_node", "") or ""),
758
+ "created_at": getattr(run, "created_at", None),
759
+ "updated_at": getattr(run, "updated_at", None),
760
+ "actor_id": getattr(run, "actor_id", None),
761
+ "session_id": getattr(run, "session_id", None),
762
+ "parent_run_id": getattr(run, "parent_run_id", None),
763
+ "error": getattr(run, "error", None),
764
+ "waiting": getattr(run, "waiting", None).__dict__ if getattr(run, "waiting", None) is not None else None,
765
+ },
766
+ "workflow_snapshot": snapshot_ref,
767
+ "input_data": filtered_input_data,
768
+ "ledgers": ledgers,
769
+ "timeline": timeline,
770
+ "session": session_section,
771
+ }
772
+ return bundle