AbstractRuntime 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. abstractruntime/__init__.py +76 -1
  2. abstractruntime/core/config.py +68 -1
  3. abstractruntime/core/models.py +5 -0
  4. abstractruntime/core/policy.py +74 -3
  5. abstractruntime/core/runtime.py +1002 -126
  6. abstractruntime/core/vars.py +8 -2
  7. abstractruntime/evidence/recorder.py +1 -1
  8. abstractruntime/history_bundle.py +772 -0
  9. abstractruntime/integrations/abstractcore/__init__.py +3 -0
  10. abstractruntime/integrations/abstractcore/default_tools.py +127 -3
  11. abstractruntime/integrations/abstractcore/effect_handlers.py +2440 -99
  12. abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
  13. abstractruntime/integrations/abstractcore/factory.py +68 -20
  14. abstractruntime/integrations/abstractcore/llm_client.py +447 -15
  15. abstractruntime/integrations/abstractcore/mcp_worker.py +1 -0
  16. abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
  17. abstractruntime/integrations/abstractcore/tool_executor.py +31 -10
  18. abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
  19. abstractruntime/integrations/abstractmemory/__init__.py +3 -0
  20. abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
  21. abstractruntime/memory/active_context.py +6 -1
  22. abstractruntime/memory/kg_packets.py +164 -0
  23. abstractruntime/memory/memact_composer.py +175 -0
  24. abstractruntime/memory/recall_levels.py +163 -0
  25. abstractruntime/memory/token_budget.py +86 -0
  26. abstractruntime/storage/__init__.py +4 -1
  27. abstractruntime/storage/artifacts.py +158 -30
  28. abstractruntime/storage/base.py +17 -1
  29. abstractruntime/storage/commands.py +339 -0
  30. abstractruntime/storage/in_memory.py +41 -1
  31. abstractruntime/storage/json_files.py +195 -12
  32. abstractruntime/storage/observable.py +38 -1
  33. abstractruntime/storage/offloading.py +433 -0
  34. abstractruntime/storage/sqlite.py +836 -0
  35. abstractruntime/visualflow_compiler/__init__.py +29 -0
  36. abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
  37. abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
  38. abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
  39. abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
  40. abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
  41. abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
  42. abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
  43. abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
  44. abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
  45. abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
  46. abstractruntime/visualflow_compiler/compiler.py +3832 -0
  47. abstractruntime/visualflow_compiler/flow.py +247 -0
  48. abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
  49. abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
  50. abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
  51. abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
  52. abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
  53. abstractruntime/visualflow_compiler/visual/models.py +211 -0
  54. abstractruntime/workflow_bundle/__init__.py +52 -0
  55. abstractruntime/workflow_bundle/models.py +236 -0
  56. abstractruntime/workflow_bundle/packer.py +317 -0
  57. abstractruntime/workflow_bundle/reader.py +87 -0
  58. abstractruntime/workflow_bundle/registry.py +587 -0
  59. abstractruntime-0.4.1.dist-info/METADATA +177 -0
  60. abstractruntime-0.4.1.dist-info/RECORD +86 -0
  61. abstractruntime-0.4.0.dist-info/METADATA +0 -167
  62. abstractruntime-0.4.0.dist-info/RECORD +0 -49
  63. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
  64. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/entry_points.txt +0 -0
  65. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,946 @@
1
+ """Session attachment registry + on-demand open tool helpers.
2
+
3
+ This module implements two framework primitives:
4
+ - a session-scoped attachment index (metadata-only, LLM-visible via injection)
5
+ - a runtime-owned `open_attachment` tool (bounded artifact reads)
6
+
7
+ These are intentionally integration-scoped (AbstractCore) and are executed inside
8
+ the runtime's effect handlers (not via a host ToolExecutor).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import hashlib
14
+ import re
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
18
+
19
+ from ...storage.artifacts import ArtifactStore
20
+
21
+ _DEFAULT_SESSION_MEMORY_RUN_PREFIX = "session_memory_"
22
+ _SAFE_RUN_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$")
23
+
24
+ _TOOL_PREFIX_RE = re.compile(r"^\[(?P<name>[^\]]+)\]:\s*(?P<body>.*)$", re.DOTALL)
25
+ _READ_FILE_HEADER_RE = re.compile(r"^File:\s*(?P<path>.+?)\s*\((?P<count>\d+)\s+lines\)\s*$")
26
+ _OPEN_ATTACHMENT_HEADER_RE = re.compile(
27
+ r"^Attachment:\s*(?P<handle>.+?)\s*\(id=(?P<artifact_id>[a-zA-Z0-9_-]+)"
28
+ r"(?:,\s*sha=(?P<sha256>[0-9a-fA-F]{8,64}))?"
29
+ r"(?:,\s*lines\s+(?P<start_line>\d+)-(?P<end_line>\d+))?"
30
+ r".*\)\s*$"
31
+ )
32
+ _LINE_NUMBER_RE = re.compile(r"^\s*(?P<line>\d+):\s")
33
+
34
+
35
+ def session_memory_owner_run_id(session_id: str) -> str:
36
+ """Return the stable session memory owner run id for a session id.
37
+
38
+ This mirrors gateway/runtime behavior (`session_memory_<sid>` with a hash fallback)
39
+ so durability works across restarts and across services.
40
+ """
41
+ sid = str(session_id or "").strip()
42
+ if not sid:
43
+ raise ValueError("session_id is required")
44
+ if _SAFE_RUN_ID_PATTERN.match(sid):
45
+ rid = f"{_DEFAULT_SESSION_MEMORY_RUN_PREFIX}{sid}"
46
+ if _SAFE_RUN_ID_PATTERN.match(rid):
47
+ return rid
48
+ digest = hashlib.sha256(sid.encode("utf-8")).hexdigest()[:32]
49
+ return f"{_DEFAULT_SESSION_MEMORY_RUN_PREFIX}sha_{digest}"
50
+
51
+
52
+ def _normalize_handle(raw: Any) -> str:
53
+ s = str(raw or "").strip()
54
+ if not s:
55
+ return ""
56
+ if s.startswith("@"):
57
+ s = s[1:].strip()
58
+ if s.startswith("./"):
59
+ s = s[2:]
60
+ return s
61
+
62
+
63
+ def _safe_tag_subset(tags: Dict[str, str], *, limit: int = 8) -> Dict[str, str]:
64
+ out: Dict[str, str] = {}
65
+ for k in sorted(tags.keys()):
66
+ if len(out) >= limit:
67
+ break
68
+ v = tags.get(k)
69
+ if not isinstance(k, str) or not k.strip():
70
+ continue
71
+ if not isinstance(v, str) or not v.strip():
72
+ continue
73
+ if k in {"session_id"}:
74
+ continue
75
+ out[k] = v
76
+ return out
77
+
78
+
79
+ def list_session_attachments(
80
+ *,
81
+ artifact_store: ArtifactStore,
82
+ session_id: str,
83
+ limit: int = 20,
84
+ ) -> List[Dict[str, Any]]:
85
+ """Return the session attachment index (metadata-only, JSON-safe)."""
86
+ rid = session_memory_owner_run_id(session_id)
87
+ metas = artifact_store.list_by_run(rid)
88
+ items = [m for m in metas if isinstance(getattr(m, "tags", None), dict) and (m.tags or {}).get("kind") == "attachment"]
89
+ items.sort(key=lambda m: str(getattr(m, "created_at", "") or ""), reverse=True)
90
+
91
+ out: list[Dict[str, Any]] = []
92
+ for m in items[: max(0, int(limit))]:
93
+ tags = dict(getattr(m, "tags", {}) or {})
94
+ handle = _normalize_handle(tags.get("path") or tags.get("source_path") or tags.get("filename") or "")
95
+ filename = str(tags.get("filename") or "").strip() or (handle.rsplit("/", 1)[-1] if handle else "")
96
+ sha256 = str(tags.get("sha256") or "").strip().lower() or None
97
+ if sha256 and not re.fullmatch(r"[0-9a-f]{8,64}", sha256):
98
+ sha256 = None
99
+
100
+ out.append(
101
+ {
102
+ "handle": handle,
103
+ "artifact_id": str(getattr(m, "artifact_id", "") or ""),
104
+ "filename": filename,
105
+ "sha256": sha256,
106
+ "content_type": str(getattr(m, "content_type", "") or ""),
107
+ "size_bytes": int(getattr(m, "size_bytes", 0) or 0),
108
+ "created_at": str(getattr(m, "created_at", "") or ""),
109
+ "tags": _safe_tag_subset(tags),
110
+ }
111
+ )
112
+ return out
113
+
114
+
115
+ def render_session_attachments_system_message(
116
+ entries: Iterable[Dict[str, Any]],
117
+ *,
118
+ max_entries: int = 20,
119
+ max_chars: int = 4000,
120
+ include_open_attachment_hint: bool = True,
121
+ ) -> str:
122
+ """Render a bounded system message suitable for injection into LLM messages."""
123
+ max_e = max(0, int(max_entries))
124
+ max_c = max(0, int(max_chars))
125
+ if max_e <= 0 or max_c <= 0:
126
+ return ""
127
+
128
+ lines: list[str] = [
129
+ "Stored session attachments (most recent first; not necessarily active in this call). Do not mention this list:"
130
+ ]
131
+ used = len(lines[0]) + 1
132
+
133
+ if include_open_attachment_hint:
134
+ hint = (
135
+ "Open text via: open_attachment(artifact_id='…', start_line=..., end_line=...). "
136
+ "Open media via: open_attachment(artifact_id='…')."
137
+ )
138
+ if used + len(hint) + 1 <= max_c:
139
+ lines.append(hint)
140
+ used += len(hint) + 1
141
+
142
+ for i, e in enumerate(list(entries)[:max_e]):
143
+ if not isinstance(e, dict):
144
+ continue
145
+ handle = _normalize_handle(e.get("handle") or e.get("source_path") or e.get("filename") or "")
146
+ if not handle:
147
+ handle = str(e.get("filename") or "").strip() or "attachment"
148
+ filename = str(e.get("filename") or "").strip()
149
+ display = filename or handle
150
+ # Avoid leaking absolute paths into the model-visible index; prefer filename.
151
+ if display and (display.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", display)):
152
+ display = display.replace("\\", "/").rsplit("/", 1)[-1]
153
+ handle_disp = display
154
+ artifact_id = str(e.get("artifact_id") or "").strip()
155
+ sha256 = str(e.get("sha256") or "").strip()
156
+ ct = str(e.get("content_type") or "").strip()
157
+ size = e.get("size_bytes")
158
+ created_at = str(e.get("created_at") or "").strip()
159
+
160
+ bits: list[str] = []
161
+ if artifact_id:
162
+ bits.append(f"id={artifact_id}")
163
+ if sha256:
164
+ bits.append(f"sha={sha256[:8]}…")
165
+ if ct:
166
+ bits.append(ct)
167
+ if isinstance(size, int) and size > 0:
168
+ bits.append(f"{size:,} bytes")
169
+ if created_at:
170
+ bits.append(f"added {created_at}")
171
+ meta = ", ".join(bits) if bits else ""
172
+ line = f"- {handle_disp}" + (f" ({meta})" if meta else "")
173
+
174
+ if used + len(line) + 1 > max_c:
175
+ # Always include an explicit truncation marker if we had at least one entry.
176
+ if i > 0 and used + 18 <= max_c:
177
+ lines.append("- … (truncated)")
178
+ break
179
+
180
+ lines.append(line)
181
+ used += len(line) + 1
182
+
183
+ rendered = "\n".join(lines)
184
+ if len(rendered) <= max_c:
185
+ return rendered
186
+ #[WARNING:TRUNCATION] bounded attachment index rendering (model-visible)
187
+ marker = "\n- … (truncated)"
188
+ keep = max(0, max_c - len(marker))
189
+ if keep <= 0:
190
+ return rendered[:max_c]
191
+ trimmed = rendered[:keep].rstrip()
192
+ if trimmed.endswith("- … (truncated)"):
193
+ return trimmed
194
+ return trimmed + marker
195
+
196
+
197
+ def render_active_attachments_system_message(
198
+ media: Any,
199
+ *,
200
+ max_entries: int = 12,
201
+ max_chars: int = 2000,
202
+ ) -> str:
203
+ """Render a bounded system message that lists active media attachments for this call.
204
+
205
+ This is metadata-only: it does not inline attachment contents, and should remain stable
206
+ across `/compact` (system messages are not compacted).
207
+ """
208
+ max_e = max(0, int(max_entries))
209
+ max_c = max(0, int(max_chars))
210
+ if max_e <= 0 or max_c <= 0:
211
+ return ""
212
+
213
+ if media is None:
214
+ return ""
215
+ items = list(media) if isinstance(media, (list, tuple)) else []
216
+ if not items:
217
+ return ""
218
+
219
+ lines: list[str] = [
220
+ "Active attachments are already available in this call. Use their content directly; do not call tools to re-open them. Do not mention this list."
221
+ ]
222
+ used = len(lines[0]) + 1
223
+
224
+ def _fmt_line(item: Any) -> Optional[str]:
225
+ if isinstance(item, str):
226
+ raw = item.strip()
227
+ if not raw:
228
+ return None
229
+ disp = _normalize_handle(raw) or raw
230
+ # Display filename for absolute paths to avoid encouraging filesystem tool calls.
231
+ disp_norm = disp.replace("\\", "/")
232
+ if disp_norm.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", disp):
233
+ head = disp_norm.rsplit("/", 1)[-1]
234
+ else:
235
+ head = f"{disp}"
236
+ return f"- {head}"
237
+
238
+ if not isinstance(item, dict):
239
+ return None
240
+
241
+ aid = item.get("$artifact") or item.get("artifact_id") or item.get("id")
242
+ aid_s = str(aid or "").strip()
243
+ src = item.get("source_path") or item.get("path") or item.get("filename")
244
+ handle = _normalize_handle(src)
245
+ filename = str(item.get("filename") or "").strip()
246
+
247
+ display = filename or handle
248
+ if display and (display.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", display)):
249
+ display = display.replace("\\", "/").rsplit("/", 1)[-1]
250
+
251
+ head = ""
252
+ if display:
253
+ head = f"{display}"
254
+ elif aid_s:
255
+ head = f"id={aid_s}"
256
+ else:
257
+ head = "attachment"
258
+
259
+ bits: list[str] = []
260
+ if aid_s:
261
+ bits.append(f"id={aid_s}")
262
+ sha = str(item.get("sha256") or "").strip().lower()
263
+ if sha and re.fullmatch(r"[0-9a-f]{8,64}", sha):
264
+ bits.append(f"sha={sha[:8]}…")
265
+ ct = str(item.get("content_type") or "").strip()
266
+ if ct:
267
+ bits.append(ct)
268
+ size = item.get("size_bytes")
269
+ if isinstance(size, int) and size > 0:
270
+ bits.append(f"{size:,} bytes")
271
+
272
+ meta = ", ".join(bits)
273
+ return f"- {head}" + (f" ({meta})" if meta else "")
274
+
275
+ for i, it in enumerate(items[:max_e]):
276
+ line = _fmt_line(it)
277
+ if not line:
278
+ continue
279
+ if used + len(line) + 1 > max_c:
280
+ if i > 0 and used + 18 <= max_c:
281
+ lines.append("- … (truncated)")
282
+ break
283
+ lines.append(line)
284
+ used += len(line) + 1
285
+
286
+ rendered = "\n".join(lines)
287
+ if len(rendered) <= max_c:
288
+ return rendered
289
+ #[WARNING:TRUNCATION] bounded active-attachment list rendering (model-visible)
290
+ marker = "\n- … (truncated)"
291
+ keep = max(0, max_c - len(marker))
292
+ if keep <= 0:
293
+ return rendered[:max_c]
294
+ trimmed = rendered[:keep].rstrip()
295
+ if trimmed.endswith("- … (truncated)"):
296
+ return trimmed
297
+ return trimmed + marker
298
+
299
+
300
+ @dataclass(frozen=True)
301
+ class ParsedToolMessage:
302
+ tool_name: str
303
+ body: str
304
+
305
+
306
+ def parse_tool_message(text: str) -> Optional[ParsedToolMessage]:
307
+ m = _TOOL_PREFIX_RE.match(str(text or ""))
308
+ if not m:
309
+ return None
310
+ name = str(m.group("name") or "").strip()
311
+ body = str(m.group("body") or "")
312
+ if not name:
313
+ return None
314
+ return ParsedToolMessage(tool_name=name, body=body)
315
+
316
+
317
+ def _parse_read_file_identity(body: str) -> Optional[Tuple[str, str, int, int]]:
318
+ """Return (path, sha256_of_body, start_line, end_line) when parseable."""
319
+ raw = str(body or "")
320
+ if not raw.strip():
321
+ return None
322
+ lines = raw.splitlines()
323
+ if not lines:
324
+ return None
325
+ m = _READ_FILE_HEADER_RE.match(lines[0].strip())
326
+ if not m:
327
+ return None
328
+ path = _normalize_handle(m.group("path"))
329
+ if not path:
330
+ return None
331
+
332
+ start_line = -1
333
+ end_line = -1
334
+ for ln in lines[1:]:
335
+ mm = _LINE_NUMBER_RE.match(ln)
336
+ if not mm:
337
+ continue
338
+ try:
339
+ num = int(mm.group("line"))
340
+ except Exception:
341
+ continue
342
+ if start_line < 0:
343
+ start_line = num
344
+ end_line = num
345
+
346
+ if start_line < 0 or end_line < 0:
347
+ start_line = 1
348
+ end_line = 1
349
+
350
+ sha = hashlib.sha256(raw.encode("utf-8")).hexdigest()
351
+ return (path, sha, start_line, end_line)
352
+
353
+
354
+ def _parse_open_attachment_identity(body: str) -> Optional[Tuple[str, str, Optional[str], int, int]]:
355
+ """Return (handle, artifact_id, sha256, start_line, end_line) when parseable."""
356
+ raw = str(body or "")
357
+ if not raw.strip():
358
+ return None
359
+ lines = raw.splitlines()
360
+ if not lines:
361
+ return None
362
+ m = _OPEN_ATTACHMENT_HEADER_RE.match(lines[0].strip())
363
+ if not m:
364
+ return None
365
+ handle = _normalize_handle(m.group("handle"))
366
+ artifact_id = str(m.group("artifact_id") or "").strip()
367
+ if not artifact_id:
368
+ return None
369
+ sha256 = m.group("sha256")
370
+ sha = str(sha256 or "").strip().lower() or None
371
+ if sha and not re.fullmatch(r"[0-9a-f]{8,64}", sha):
372
+ sha = None
373
+
374
+ start_line = 1
375
+ end_line = 1
376
+ try:
377
+ if m.group("start_line") and m.group("end_line"):
378
+ start_line = int(m.group("start_line"))
379
+ end_line = int(m.group("end_line"))
380
+ except Exception:
381
+ start_line = 1
382
+ end_line = 1
383
+
384
+ return (handle, artifact_id, sha, start_line, end_line)
385
+
386
+
387
+ def dedup_messages_view(
388
+ messages: List[Dict[str, Any]],
389
+ *,
390
+ session_attachments: Optional[List[Dict[str, Any]]] = None,
391
+ ) -> List[Dict[str, Any]]:
392
+ """Return a derived LLM-visible messages list with duplicate doc reads stubbed."""
393
+ if not isinstance(messages, list) or not messages:
394
+ return [] if messages is None else list(messages)
395
+
396
+ by_handle: Dict[str, list[Dict[str, Any]]] = {}
397
+ for e in session_attachments or []:
398
+ if not isinstance(e, dict):
399
+ continue
400
+ h = _normalize_handle(e.get("handle"))
401
+ if not h:
402
+ continue
403
+ by_handle.setdefault(h, []).append(e)
404
+
405
+ out: list[Dict[str, Any]] = []
406
+ seen: Dict[Tuple[str, str, str, int, int], int] = {}
407
+
408
+ for msg in messages:
409
+ if not isinstance(msg, dict):
410
+ continue
411
+ role = str(msg.get("role") or "").strip()
412
+ content = msg.get("content")
413
+ content_str = "" if content is None else str(content)
414
+
415
+ if role != "tool" or not content_str.strip():
416
+ out.append(dict(msg))
417
+ continue
418
+
419
+ parsed = parse_tool_message(content_str)
420
+ if parsed is None:
421
+ out.append(dict(msg))
422
+ continue
423
+
424
+ tool = parsed.tool_name
425
+ body = parsed.body
426
+
427
+ identity: Optional[Tuple[str, str, str, int, int]] = None
428
+ stub: Optional[str] = None
429
+
430
+ if tool == "read_file":
431
+ ident = _parse_read_file_identity(body)
432
+ if ident is not None:
433
+ path, sha, start_line, end_line = ident
434
+ path_key = path
435
+ if path_key not in by_handle and path_key.startswith("/"):
436
+ suffix_matches = [h for h in by_handle.keys() if path_key.endswith("/" + h)]
437
+ if len(suffix_matches) == 1:
438
+ path_key = suffix_matches[0]
439
+
440
+ identity = ("read_file", path_key, sha, start_line, end_line)
441
+
442
+ candidates = by_handle.get(path_key) or []
443
+ attachment_hint = ""
444
+ artifact_id_hint = ""
445
+ if len(candidates) == 1:
446
+ a = candidates[0]
447
+ aid = str(a.get("artifact_id") or "").strip()
448
+ sha_a = str(a.get("sha256") or "").strip()
449
+ if aid:
450
+ artifact_id_hint = aid
451
+ attachment_hint = f" Attached artifact: id={aid}" + (f", sha={sha_a[:8]}…" if sha_a else "")
452
+ elif len(candidates) > 1:
453
+ bits: list[str] = []
454
+ for a in candidates[:3]:
455
+ aid = str(a.get("artifact_id") or "").strip()
456
+ sha_a = str(a.get("sha256") or "").strip()
457
+ if aid:
458
+ bits.append(f"{aid}:{sha_a[:8]}…" if sha_a else aid)
459
+ if bits:
460
+ attachment_hint = " Attached candidates: " + ", ".join(bits) + " (specify expected_sha256)"
461
+
462
+ display_path = path
463
+ if display_path and (display_path.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", display_path)):
464
+ display_path = display_path.replace("\\", "/").rsplit("/", 1)[-1]
465
+
466
+ reopen = ""
467
+ if artifact_id_hint:
468
+ reopen = (
469
+ f"Re-open with open_attachment(artifact_id='{artifact_id_hint}', "
470
+ f"start_line={start_line}, end_line={end_line})."
471
+ )
472
+ elif candidates:
473
+ reopen = (
474
+ f"Re-open with open_attachment(artifact_id='…', start_line={start_line}, end_line={end_line})."
475
+ )
476
+
477
+ stub = f"[read_file]: (duplicate) File already shown above: {display_path} lines {start_line}-{end_line}."
478
+ if reopen:
479
+ stub += "\n" + reopen
480
+ stub += attachment_hint
481
+
482
+ elif tool == "open_attachment":
483
+ ident2 = _parse_open_attachment_identity(body)
484
+ if ident2 is not None:
485
+ handle, artifact_id, sha, start_line, end_line = ident2
486
+ key_sha = sha or "unknown"
487
+ identity = ("open_attachment", artifact_id, key_sha, start_line, end_line)
488
+ display_handle = handle
489
+ if display_handle and (display_handle.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", display_handle)):
490
+ display_handle = display_handle.replace("\\", "/").rsplit("/", 1)[-1]
491
+ head = display_handle if display_handle else f"id={artifact_id}"
492
+ stub = (
493
+ f"[open_attachment]: (duplicate) Attachment already shown above: {head} lines {start_line}-{end_line} (id={artifact_id}).\n"
494
+ f"Re-open with open_attachment(artifact_id='{artifact_id}', start_line={start_line}, end_line={end_line})."
495
+ )
496
+
497
+ if identity is None:
498
+ out.append(dict(msg))
499
+ continue
500
+
501
+ if identity in seen:
502
+ out.append(dict(msg, content=stub or content_str))
503
+ continue
504
+
505
+ seen[identity] = len(out)
506
+ out.append(dict(msg))
507
+
508
+ return out
509
+
510
+
511
+ def execute_open_attachment(
512
+ *,
513
+ artifact_store: ArtifactStore,
514
+ session_id: str,
515
+ artifact_id: Optional[str],
516
+ handle: Optional[str],
517
+ expected_sha256: Optional[str],
518
+ start_line: int,
519
+ end_line: Optional[int],
520
+ max_chars: int,
521
+ ) -> Tuple[bool, Optional[Dict[str, Any]], Optional[str]]:
522
+ """Runtime-owned tool execution for `open_attachment`."""
523
+ sid = str(session_id or "").strip()
524
+ if not sid:
525
+ return False, {"rendered": "Error: session_id is required to open attachments."}, "session_id is required"
526
+
527
+ rid = session_memory_owner_run_id(sid)
528
+ handle_norm = _normalize_handle(handle)
529
+ artifact_id_norm = str(artifact_id or "").strip() or None
530
+
531
+ expected = str(expected_sha256 or "").strip().lower() or None
532
+ if expected and expected.startswith("sha256:"):
533
+ expected = expected.split(":", 1)[-1].strip() or None
534
+ if expected and not re.fullmatch(r"[0-9a-f]{8,64}", expected):
535
+ expected = None
536
+
537
+ # Clamp numeric args defensively.
538
+ try:
539
+ start = int(start_line)
540
+ except Exception:
541
+ start = 1
542
+ if start < 1:
543
+ start = 1
544
+ try:
545
+ end = int(end_line) if end_line is not None else None
546
+ except Exception:
547
+ end = None
548
+ if end is not None and end < start:
549
+ end = start
550
+ try:
551
+ mc = int(max_chars)
552
+ except Exception:
553
+ mc = 8000
554
+ # Contract:
555
+ # - `max_chars > 0` means "return up to this many chars" (bounded excerpt).
556
+ # - `max_chars <= 0` means "no artificial cap" (return full selection).
557
+ mc_limit: Optional[int]
558
+ if mc <= 0:
559
+ mc_limit = None
560
+ else:
561
+ mc_limit = mc
562
+
563
+ # Resolve artifact metadata.
564
+ metas = artifact_store.list_by_run(rid)
565
+ candidates = [m for m in metas if isinstance(getattr(m, "tags", None), dict) and (m.tags or {}).get("kind") == "attachment"]
566
+
567
+ selected_meta = None
568
+ if artifact_id_norm:
569
+ for m in candidates:
570
+ if str(getattr(m, "artifact_id", "") or "") == artifact_id_norm:
571
+ selected_meta = m
572
+ break
573
+ if selected_meta is None:
574
+ # Model robustness: many models confuse `artifact_id` (opaque) with `handle` (path-like).
575
+ # If a handle is available, fall back to it. Otherwise treat the provided artifact_id as
576
+ # a best-effort handle candidate (so `artifact_id=\"notes.txt\"` still works).
577
+ if not handle_norm:
578
+ handle_norm = _normalize_handle(artifact_id_norm)
579
+ artifact_id_norm = None
580
+
581
+ if selected_meta is None:
582
+ if not handle_norm:
583
+ return False, {"rendered": "Error: provide artifact_id or handle."}, "missing artifact_id/handle"
584
+
585
+ matches: list[Any] = []
586
+ for m in candidates:
587
+ tags = getattr(m, "tags", {}) or {}
588
+ p = _normalize_handle(tags.get("path") or tags.get("source_path"))
589
+ fn = _normalize_handle(tags.get("filename"))
590
+ if p == handle_norm or fn == handle_norm:
591
+ matches.append(m)
592
+ continue
593
+
594
+ # Robustness: `read_file` tool outputs typically show absolute paths, while
595
+ # attachment tags often store workspace-relative virtual paths. Treat a
596
+ # single unambiguous suffix match as equivalent (disambiguate via sha256
597
+ # when multiple candidates exist).
598
+ if handle_norm.startswith("/"):
599
+ if p and not p.startswith("/") and handle_norm.endswith("/" + p):
600
+ matches.append(m)
601
+ continue
602
+ if fn and not fn.startswith("/") and handle_norm.endswith("/" + fn):
603
+ matches.append(m)
604
+ continue
605
+ if p and p.startswith("/") and not handle_norm.startswith("/") and p.endswith("/" + handle_norm):
606
+ matches.append(m)
607
+ continue
608
+
609
+ if expected:
610
+ matches2: list[Any] = []
611
+ for m in matches:
612
+ tags = getattr(m, "tags", {}) or {}
613
+ sha = str(tags.get("sha256") or "").strip().lower()
614
+ if sha and sha == expected:
615
+ matches2.append(m)
616
+ matches = matches2
617
+
618
+ if not matches:
619
+ # Best-effort suggestions: help models recover when they misremember a handle/path.
620
+ suggestions: list[dict[str, Any]] = []
621
+ try:
622
+ query = _normalize_handle(handle_norm)
623
+ q_base = query.replace("\\", "/").strip().strip("/").rsplit("/", 1)[-1]
624
+ q_stem = q_base.rsplit(".", 1)[0].lower() if q_base else ""
625
+ scored: list[tuple[int, int, str, Any]] = []
626
+ for m in candidates:
627
+ tags = getattr(m, "tags", {}) or {}
628
+ p = _normalize_handle(tags.get("path") or tags.get("source_path") or "")
629
+ fn = _normalize_handle(tags.get("filename") or "")
630
+ cand_handle = p or fn
631
+ if not cand_handle:
632
+ continue
633
+ cand_base = cand_handle.replace("\\", "/").strip().strip("/").rsplit("/", 1)[-1]
634
+ cand_stem = cand_base.rsplit(".", 1)[0].lower() if cand_base else ""
635
+ score: Optional[int] = None
636
+ if q_base and cand_base and cand_base.lower() == q_base.lower():
637
+ score = 0
638
+ elif q_stem and cand_stem and q_stem in cand_stem:
639
+ score = 1
640
+ elif q_stem and cand_stem and cand_stem in q_stem:
641
+ score = 2
642
+ elif q_stem and q_stem in cand_handle.lower():
643
+ score = 3
644
+ if score is None:
645
+ continue
646
+ scored.append((score, len(cand_handle), cand_handle, m))
647
+ scored.sort(key=lambda x: (x[0], x[1], x[2]))
648
+ for _score, _len, h, m in scored[:5]:
649
+ tags = getattr(m, "tags", {}) or {}
650
+ suggestions.append(
651
+ {
652
+ "handle": h,
653
+ "artifact_id": str(getattr(m, "artifact_id", "") or ""),
654
+ "sha256": str(tags.get("sha256") or "").strip() or None,
655
+ }
656
+ )
657
+ except Exception:
658
+ suggestions = []
659
+
660
+ rendered = f"Error: no attachment matches handle '{handle_norm}' in this session."
661
+ if suggestions:
662
+ parts = []
663
+ for s in suggestions:
664
+ h = _normalize_handle(s.get("handle"))
665
+ aid = str(s.get("artifact_id") or "").strip()
666
+ sha = str(s.get("sha256") or "").strip()
667
+ bits: list[str] = []
668
+ if aid:
669
+ bits.append(f"id={aid}")
670
+ if sha:
671
+ bits.append(f"sha={sha[:8]}…")
672
+ meta = f" ({', '.join(bits)})" if bits else ""
673
+ parts.append(f"- {h}{meta}")
674
+ rendered += "\nDid you mean:\n" + "\n".join(parts)
675
+
676
+ return (
677
+ False,
678
+ {"rendered": rendered, "suggestions": suggestions},
679
+ "attachment not found",
680
+ )
681
+
682
+ if len(matches) > 1:
683
+ # List a few candidates to help the model disambiguate.
684
+ cand: list[dict[str, Any]] = []
685
+ for m in matches[:5]:
686
+ tags = getattr(m, "tags", {}) or {}
687
+ sha = str(tags.get("sha256") or "").strip()
688
+ cand.append({"artifact_id": str(getattr(m, "artifact_id", "") or ""), "sha256": sha or None})
689
+ return (
690
+ False,
691
+ {
692
+ "rendered": f"Error: multiple attachments match '{handle_norm}'. Provide expected_sha256 or artifact_id.",
693
+ "candidates": cand,
694
+ },
695
+ "multiple matches",
696
+ )
697
+
698
+ selected_meta = matches[0]
699
+
700
+ aid = str(getattr(selected_meta, "artifact_id", "") or "")
701
+ tags = dict(getattr(selected_meta, "tags", {}) or {})
702
+ ct = str(getattr(selected_meta, "content_type", "") or "")
703
+ size_bytes = int(getattr(selected_meta, "size_bytes", 0) or 0)
704
+ sha_tag = str(tags.get("sha256") or "").strip().lower() or None
705
+ handle_final = _normalize_handle(tags.get("path") or tags.get("source_path") or tags.get("filename") or handle_norm or "")
706
+ if not handle_final:
707
+ handle_final = aid
708
+ display_handle = handle_final
709
+ try:
710
+ disp_norm = display_handle.replace("\\", "/")
711
+ if disp_norm.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", disp_norm):
712
+ display_handle = disp_norm.rsplit("/", 1)[-1] or display_handle
713
+ except Exception:
714
+ display_handle = handle_final
715
+
716
+ # v0: text-only, bounded excerpts.
717
+ # v1: media attachments return a media ref (or derived text when possible) and are intended to be
718
+ # attached as `payload.media` for the next LLM call (runtime-owned behavior).
719
+ ct_low = ct.lower().strip()
720
+ text_like = ct_low.startswith("text/") or ct_low in {
721
+ "application/json",
722
+ "application/yaml",
723
+ "application/x-yaml",
724
+ "application/xml",
725
+ "application/javascript",
726
+ "application/typescript",
727
+ }
728
+ source_path = str(tags.get("source_path") or tags.get("path") or tags.get("filename") or handle_final or "").strip()
729
+ filename = str(tags.get("filename") or "").strip() or (source_path.rsplit("/", 1)[-1] if source_path else "")
730
+
731
+ text: Optional[str] = None
732
+ derived_from_content_type: Optional[str] = None
733
+ derived_text_content_type: Optional[str] = None
734
+ derived_error: Optional[str] = None
735
+
736
+ if not text_like:
737
+ # Best-effort: derive text from common document types (e.g. PDF) using AbstractCore's media stack.
738
+ #
739
+ # This keeps the tool usable for document attachments and enables KG ingestion to ground
740
+ # evidence quotes in a durable, readable text representation.
741
+ should_try_text_extract = ct_low in {"application/pdf"} or str(filename or "").lower().endswith(".pdf")
742
+
743
+ if should_try_text_extract:
744
+ artifact = artifact_store.load(aid)
745
+ if artifact is None:
746
+ return False, {"rendered": f"Error: failed to load artifact '{aid}'."}, "artifact not found"
747
+
748
+ content = getattr(artifact, "content", None)
749
+ if not isinstance(content, (bytes, bytearray)):
750
+ return False, {"rendered": "Error: failed to load attachment bytes."}, "artifact content missing"
751
+
752
+ try:
753
+ import tempfile
754
+
755
+ # Import lazily to keep this tool usable when AbstractCore media extras are not installed.
756
+ from abstractcore.media.auto_handler import AutoMediaHandler # type: ignore
757
+
758
+ handler = AutoMediaHandler(enable_events=False)
759
+ with tempfile.TemporaryDirectory(prefix="open_attachment_") as td:
760
+ ext = Path(filename).suffix if filename else ""
761
+ if not ext:
762
+ ext = ".pdf" if ct_low == "application/pdf" else ""
763
+ p = Path(td) / f"attachment{ext or ''}"
764
+ p.write_bytes(bytes(content))
765
+ res = handler.process_file(p, format_output="structured")
766
+ if getattr(res, "success", False) and getattr(res, "media_content", None) is not None:
767
+ extracted = str(getattr(res.media_content, "content", "") or "")
768
+ if extracted.strip():
769
+ text_like = True
770
+ derived_from_content_type = ct
771
+ derived_text_content_type = str(getattr(res.media_content, "mime_type", "") or "").strip() or "text/markdown"
772
+ text = extracted
773
+ else:
774
+ derived_error = "empty extracted text"
775
+ else:
776
+ derived_error = str(getattr(res, "error_message", None) or "document text extraction failed")
777
+ except Exception as e:
778
+ derived_error = str(e)
779
+
780
+ if not text_like:
781
+ media_item: Dict[str, Any] = {"$artifact": aid}
782
+ if filename:
783
+ media_item["filename"] = filename
784
+ if source_path:
785
+ media_item["source_path"] = source_path
786
+ if ct:
787
+ media_item["content_type"] = ct
788
+
789
+ header_bits: list[str] = []
790
+ header_bits.append(f"id={aid}")
791
+ if sha_tag:
792
+ header_bits.append(f"sha={sha_tag[:8]}…")
793
+ if ct:
794
+ header_bits.append(ct)
795
+ if size_bytes > 0:
796
+ header_bits.append(f"{size_bytes:,} bytes")
797
+
798
+ header = f"Attachment: {display_handle} ({', '.join(header_bits)})"
799
+ rendered = header + "\n\n(binary/media attachment; it will be attached as media for the next LLM call)"
800
+ out_media: Dict[str, Any] = {
801
+ "rendered": rendered,
802
+ "artifact_id": aid,
803
+ "handle": handle_final,
804
+ "sha256": sha_tag,
805
+ "content_type": ct,
806
+ "size_bytes": size_bytes,
807
+ "media": [media_item],
808
+ "derived_error": derived_error,
809
+ }
810
+ return True, out_media, None
811
+
812
+ if text_like and text is None:
813
+ artifact = artifact_store.load(aid)
814
+ if artifact is None:
815
+ return False, {"rendered": f"Error: failed to load artifact '{aid}'."}, "artifact not found"
816
+
817
+ try:
818
+ text = artifact.content.decode("utf-8")
819
+ except Exception:
820
+ return False, {"rendered": "Error: attachment is not valid UTF-8 text (binary?)"}, "binary content"
821
+
822
+ lines = (text or "").splitlines()
823
+ if not lines:
824
+ header = f"Attachment: {display_handle} (id={aid}" + (f", sha={sha_tag}" if sha_tag else "") + ", lines 0-0)"
825
+ return (
826
+ True,
827
+ {
828
+ "rendered": header,
829
+ "artifact_id": aid,
830
+ "handle": handle_final,
831
+ "sha256": sha_tag,
832
+ "content_type": ct,
833
+ "derived_from_content_type": derived_from_content_type,
834
+ "derived_text_content_type": derived_text_content_type,
835
+ "derived_error": derived_error,
836
+ },
837
+ None,
838
+ )
839
+
840
+ # UX: some models "preview" attachments by opening only the first ~20 lines even when the file is small.
841
+ # If the attachment is small enough to fit under the tool's hard max_chars cap and the call looks like a
842
+ # default preview, expand to the full file (still bounded).
843
+ default_budget = mc_limit == 8000
844
+ small_text = (size_bytes > 0 and size_bytes <= 50_000) or len(text) <= 30_000
845
+ preview_window = 20
846
+ preview_request = bool(
847
+ default_budget
848
+ and small_text
849
+ and start == 1
850
+ and end is not None
851
+ and end <= preview_window
852
+ and len(lines) > int(end)
853
+ )
854
+ if preview_request:
855
+ end = None
856
+ mc_limit = 50_000
857
+ elif default_budget and small_text and end is None:
858
+ mc_limit = 50_000
859
+
860
+ start_idx = min(max(start - 1, 0), len(lines) - 1)
861
+ end_idx = len(lines) - 1 if end is None else min(max(end - 1, start_idx), len(lines) - 1)
862
+ selected = lines[start_idx : end_idx + 1]
863
+
864
+ shown_start = start_idx + 1
865
+ shown_end = end_idx + 1
866
+ num_width = max(1, len(str(shown_end)))
867
+
868
+ # Build bounded, line-numbered excerpt.
869
+ header = (
870
+ f"Attachment: {display_handle} (id={aid}"
871
+ + (f", sha={sha_tag}" if sha_tag else "")
872
+ + f", lines {shown_start}-{shown_end})"
873
+ )
874
+
875
+ # Allocate budget for excerpt lines.
876
+ remaining: Optional[int]
877
+ if mc_limit is None:
878
+ remaining = None
879
+ else:
880
+ remaining = max(0, int(mc_limit) - len(header) - 2)
881
+ rendered_lines: list[str] = []
882
+ content_lines: list[str] = []
883
+ used = 0
884
+ truncated = False
885
+ for i, ln in enumerate(selected):
886
+ line_no = shown_start + i
887
+ prefix = f"{line_no:>{num_width}}: "
888
+ row = f"{prefix}{ln}"
889
+ add_len = len(row) + (1 if rendered_lines else 0)
890
+ if remaining is not None and used + add_len > remaining and rendered_lines:
891
+ truncated = True
892
+ break
893
+ if remaining is not None and used + add_len > remaining and not rendered_lines:
894
+ # Always show at least one line, even if it truncates.
895
+ if int(remaining) <= 1:
896
+ row = "…"
897
+ content_line = "…"
898
+ else:
899
+ keep = max(0, int(remaining) - 1)
900
+ if keep <= len(prefix):
901
+ row = prefix.rstrip()[:keep] + "…"
902
+ content_line = "…"
903
+ else:
904
+ body_keep = max(0, keep - len(prefix))
905
+ body = str(ln)[:body_keep].rstrip()
906
+ row = prefix + body + "…"
907
+ content_line = body + "…"
908
+ rendered_lines.append(row)
909
+ content_lines.append(content_line)
910
+ truncated = True
911
+ break
912
+ rendered_lines.append(row)
913
+ content_lines.append(str(ln))
914
+ used += add_len
915
+
916
+ rendered = header + "\n\n" + "\n".join(rendered_lines)
917
+ if truncated and mc_limit is not None and len(rendered) + 18 <= int(mc_limit):
918
+ #[WARNING:TRUNCATION] open_attachment returned a bounded excerpt
919
+ rendered += "\n\n… (truncated)"
920
+
921
+ out: Dict[str, Any] = {
922
+ "rendered": rendered,
923
+ "content_text": "\n".join(content_lines),
924
+ "artifact_id": aid,
925
+ "handle": handle_final,
926
+ "sha256": sha_tag,
927
+ "content_type": ct,
928
+ "derived_from_content_type": derived_from_content_type,
929
+ "derived_text_content_type": derived_text_content_type,
930
+ "derived_error": derived_error,
931
+ "size_bytes": size_bytes,
932
+ "start_line": shown_start,
933
+ "end_line": shown_end,
934
+ "truncated": bool(truncated),
935
+ }
936
+ return True, out, None
937
+
938
+
939
+ __all__ = [
940
+ "session_memory_owner_run_id",
941
+ "list_session_attachments",
942
+ "render_active_attachments_system_message",
943
+ "render_session_attachments_system_message",
944
+ "dedup_messages_view",
945
+ "execute_open_attachment",
946
+ ]