AbstractRuntime 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractruntime/__init__.py +76 -1
- abstractruntime/core/config.py +68 -1
- abstractruntime/core/models.py +5 -0
- abstractruntime/core/policy.py +74 -3
- abstractruntime/core/runtime.py +1002 -126
- abstractruntime/core/vars.py +8 -2
- abstractruntime/evidence/recorder.py +1 -1
- abstractruntime/history_bundle.py +772 -0
- abstractruntime/integrations/abstractcore/__init__.py +3 -0
- abstractruntime/integrations/abstractcore/default_tools.py +127 -3
- abstractruntime/integrations/abstractcore/effect_handlers.py +2440 -99
- abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
- abstractruntime/integrations/abstractcore/factory.py +68 -20
- abstractruntime/integrations/abstractcore/llm_client.py +447 -15
- abstractruntime/integrations/abstractcore/mcp_worker.py +1 -0
- abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
- abstractruntime/integrations/abstractcore/tool_executor.py +31 -10
- abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
- abstractruntime/integrations/abstractmemory/__init__.py +3 -0
- abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
- abstractruntime/memory/active_context.py +6 -1
- abstractruntime/memory/kg_packets.py +164 -0
- abstractruntime/memory/memact_composer.py +175 -0
- abstractruntime/memory/recall_levels.py +163 -0
- abstractruntime/memory/token_budget.py +86 -0
- abstractruntime/storage/__init__.py +4 -1
- abstractruntime/storage/artifacts.py +158 -30
- abstractruntime/storage/base.py +17 -1
- abstractruntime/storage/commands.py +339 -0
- abstractruntime/storage/in_memory.py +41 -1
- abstractruntime/storage/json_files.py +195 -12
- abstractruntime/storage/observable.py +38 -1
- abstractruntime/storage/offloading.py +433 -0
- abstractruntime/storage/sqlite.py +836 -0
- abstractruntime/visualflow_compiler/__init__.py +29 -0
- abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
- abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
- abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
- abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
- abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
- abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
- abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
- abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
- abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
- abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
- abstractruntime/visualflow_compiler/compiler.py +3832 -0
- abstractruntime/visualflow_compiler/flow.py +247 -0
- abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
- abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
- abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
- abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
- abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
- abstractruntime/visualflow_compiler/visual/models.py +211 -0
- abstractruntime/workflow_bundle/__init__.py +52 -0
- abstractruntime/workflow_bundle/models.py +236 -0
- abstractruntime/workflow_bundle/packer.py +317 -0
- abstractruntime/workflow_bundle/reader.py +87 -0
- abstractruntime/workflow_bundle/registry.py +587 -0
- abstractruntime-0.4.1.dist-info/METADATA +177 -0
- abstractruntime-0.4.1.dist-info/RECORD +86 -0
- abstractruntime-0.4.0.dist-info/METADATA +0 -167
- abstractruntime-0.4.0.dist-info/RECORD +0 -49
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/entry_points.txt +0 -0
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,946 @@
|
|
|
1
|
+
"""Session attachment registry + on-demand open tool helpers.
|
|
2
|
+
|
|
3
|
+
This module implements two framework primitives:
|
|
4
|
+
- a session-scoped attachment index (metadata-only, LLM-visible via injection)
|
|
5
|
+
- a runtime-owned `open_attachment` tool (bounded artifact reads)
|
|
6
|
+
|
|
7
|
+
These are intentionally integration-scoped (AbstractCore) and are executed inside
|
|
8
|
+
the runtime's effect handlers (not via a host ToolExecutor).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import hashlib
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
18
|
+
|
|
19
|
+
from ...storage.artifacts import ArtifactStore
|
|
20
|
+
|
|
21
|
+
_DEFAULT_SESSION_MEMORY_RUN_PREFIX = "session_memory_"
|
|
22
|
+
_SAFE_RUN_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$")
|
|
23
|
+
|
|
24
|
+
_TOOL_PREFIX_RE = re.compile(r"^\[(?P<name>[^\]]+)\]:\s*(?P<body>.*)$", re.DOTALL)
|
|
25
|
+
_READ_FILE_HEADER_RE = re.compile(r"^File:\s*(?P<path>.+?)\s*\((?P<count>\d+)\s+lines\)\s*$")
|
|
26
|
+
_OPEN_ATTACHMENT_HEADER_RE = re.compile(
|
|
27
|
+
r"^Attachment:\s*(?P<handle>.+?)\s*\(id=(?P<artifact_id>[a-zA-Z0-9_-]+)"
|
|
28
|
+
r"(?:,\s*sha=(?P<sha256>[0-9a-fA-F]{8,64}))?"
|
|
29
|
+
r"(?:,\s*lines\s+(?P<start_line>\d+)-(?P<end_line>\d+))?"
|
|
30
|
+
r".*\)\s*$"
|
|
31
|
+
)
|
|
32
|
+
_LINE_NUMBER_RE = re.compile(r"^\s*(?P<line>\d+):\s")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def session_memory_owner_run_id(session_id: str) -> str:
|
|
36
|
+
"""Return the stable session memory owner run id for a session id.
|
|
37
|
+
|
|
38
|
+
This mirrors gateway/runtime behavior (`session_memory_<sid>` with a hash fallback)
|
|
39
|
+
so durability works across restarts and across services.
|
|
40
|
+
"""
|
|
41
|
+
sid = str(session_id or "").strip()
|
|
42
|
+
if not sid:
|
|
43
|
+
raise ValueError("session_id is required")
|
|
44
|
+
if _SAFE_RUN_ID_PATTERN.match(sid):
|
|
45
|
+
rid = f"{_DEFAULT_SESSION_MEMORY_RUN_PREFIX}{sid}"
|
|
46
|
+
if _SAFE_RUN_ID_PATTERN.match(rid):
|
|
47
|
+
return rid
|
|
48
|
+
digest = hashlib.sha256(sid.encode("utf-8")).hexdigest()[:32]
|
|
49
|
+
return f"{_DEFAULT_SESSION_MEMORY_RUN_PREFIX}sha_{digest}"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _normalize_handle(raw: Any) -> str:
|
|
53
|
+
s = str(raw or "").strip()
|
|
54
|
+
if not s:
|
|
55
|
+
return ""
|
|
56
|
+
if s.startswith("@"):
|
|
57
|
+
s = s[1:].strip()
|
|
58
|
+
if s.startswith("./"):
|
|
59
|
+
s = s[2:]
|
|
60
|
+
return s
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _safe_tag_subset(tags: Dict[str, str], *, limit: int = 8) -> Dict[str, str]:
|
|
64
|
+
out: Dict[str, str] = {}
|
|
65
|
+
for k in sorted(tags.keys()):
|
|
66
|
+
if len(out) >= limit:
|
|
67
|
+
break
|
|
68
|
+
v = tags.get(k)
|
|
69
|
+
if not isinstance(k, str) or not k.strip():
|
|
70
|
+
continue
|
|
71
|
+
if not isinstance(v, str) or not v.strip():
|
|
72
|
+
continue
|
|
73
|
+
if k in {"session_id"}:
|
|
74
|
+
continue
|
|
75
|
+
out[k] = v
|
|
76
|
+
return out
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def list_session_attachments(
|
|
80
|
+
*,
|
|
81
|
+
artifact_store: ArtifactStore,
|
|
82
|
+
session_id: str,
|
|
83
|
+
limit: int = 20,
|
|
84
|
+
) -> List[Dict[str, Any]]:
|
|
85
|
+
"""Return the session attachment index (metadata-only, JSON-safe)."""
|
|
86
|
+
rid = session_memory_owner_run_id(session_id)
|
|
87
|
+
metas = artifact_store.list_by_run(rid)
|
|
88
|
+
items = [m for m in metas if isinstance(getattr(m, "tags", None), dict) and (m.tags or {}).get("kind") == "attachment"]
|
|
89
|
+
items.sort(key=lambda m: str(getattr(m, "created_at", "") or ""), reverse=True)
|
|
90
|
+
|
|
91
|
+
out: list[Dict[str, Any]] = []
|
|
92
|
+
for m in items[: max(0, int(limit))]:
|
|
93
|
+
tags = dict(getattr(m, "tags", {}) or {})
|
|
94
|
+
handle = _normalize_handle(tags.get("path") or tags.get("source_path") or tags.get("filename") or "")
|
|
95
|
+
filename = str(tags.get("filename") or "").strip() or (handle.rsplit("/", 1)[-1] if handle else "")
|
|
96
|
+
sha256 = str(tags.get("sha256") or "").strip().lower() or None
|
|
97
|
+
if sha256 and not re.fullmatch(r"[0-9a-f]{8,64}", sha256):
|
|
98
|
+
sha256 = None
|
|
99
|
+
|
|
100
|
+
out.append(
|
|
101
|
+
{
|
|
102
|
+
"handle": handle,
|
|
103
|
+
"artifact_id": str(getattr(m, "artifact_id", "") or ""),
|
|
104
|
+
"filename": filename,
|
|
105
|
+
"sha256": sha256,
|
|
106
|
+
"content_type": str(getattr(m, "content_type", "") or ""),
|
|
107
|
+
"size_bytes": int(getattr(m, "size_bytes", 0) or 0),
|
|
108
|
+
"created_at": str(getattr(m, "created_at", "") or ""),
|
|
109
|
+
"tags": _safe_tag_subset(tags),
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
return out
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def render_session_attachments_system_message(
|
|
116
|
+
entries: Iterable[Dict[str, Any]],
|
|
117
|
+
*,
|
|
118
|
+
max_entries: int = 20,
|
|
119
|
+
max_chars: int = 4000,
|
|
120
|
+
include_open_attachment_hint: bool = True,
|
|
121
|
+
) -> str:
|
|
122
|
+
"""Render a bounded system message suitable for injection into LLM messages."""
|
|
123
|
+
max_e = max(0, int(max_entries))
|
|
124
|
+
max_c = max(0, int(max_chars))
|
|
125
|
+
if max_e <= 0 or max_c <= 0:
|
|
126
|
+
return ""
|
|
127
|
+
|
|
128
|
+
lines: list[str] = [
|
|
129
|
+
"Stored session attachments (most recent first; not necessarily active in this call). Do not mention this list:"
|
|
130
|
+
]
|
|
131
|
+
used = len(lines[0]) + 1
|
|
132
|
+
|
|
133
|
+
if include_open_attachment_hint:
|
|
134
|
+
hint = (
|
|
135
|
+
"Open text via: open_attachment(artifact_id='…', start_line=..., end_line=...). "
|
|
136
|
+
"Open media via: open_attachment(artifact_id='…')."
|
|
137
|
+
)
|
|
138
|
+
if used + len(hint) + 1 <= max_c:
|
|
139
|
+
lines.append(hint)
|
|
140
|
+
used += len(hint) + 1
|
|
141
|
+
|
|
142
|
+
for i, e in enumerate(list(entries)[:max_e]):
|
|
143
|
+
if not isinstance(e, dict):
|
|
144
|
+
continue
|
|
145
|
+
handle = _normalize_handle(e.get("handle") or e.get("source_path") or e.get("filename") or "")
|
|
146
|
+
if not handle:
|
|
147
|
+
handle = str(e.get("filename") or "").strip() or "attachment"
|
|
148
|
+
filename = str(e.get("filename") or "").strip()
|
|
149
|
+
display = filename or handle
|
|
150
|
+
# Avoid leaking absolute paths into the model-visible index; prefer filename.
|
|
151
|
+
if display and (display.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", display)):
|
|
152
|
+
display = display.replace("\\", "/").rsplit("/", 1)[-1]
|
|
153
|
+
handle_disp = display
|
|
154
|
+
artifact_id = str(e.get("artifact_id") or "").strip()
|
|
155
|
+
sha256 = str(e.get("sha256") or "").strip()
|
|
156
|
+
ct = str(e.get("content_type") or "").strip()
|
|
157
|
+
size = e.get("size_bytes")
|
|
158
|
+
created_at = str(e.get("created_at") or "").strip()
|
|
159
|
+
|
|
160
|
+
bits: list[str] = []
|
|
161
|
+
if artifact_id:
|
|
162
|
+
bits.append(f"id={artifact_id}")
|
|
163
|
+
if sha256:
|
|
164
|
+
bits.append(f"sha={sha256[:8]}…")
|
|
165
|
+
if ct:
|
|
166
|
+
bits.append(ct)
|
|
167
|
+
if isinstance(size, int) and size > 0:
|
|
168
|
+
bits.append(f"{size:,} bytes")
|
|
169
|
+
if created_at:
|
|
170
|
+
bits.append(f"added {created_at}")
|
|
171
|
+
meta = ", ".join(bits) if bits else ""
|
|
172
|
+
line = f"- {handle_disp}" + (f" ({meta})" if meta else "")
|
|
173
|
+
|
|
174
|
+
if used + len(line) + 1 > max_c:
|
|
175
|
+
# Always include an explicit truncation marker if we had at least one entry.
|
|
176
|
+
if i > 0 and used + 18 <= max_c:
|
|
177
|
+
lines.append("- … (truncated)")
|
|
178
|
+
break
|
|
179
|
+
|
|
180
|
+
lines.append(line)
|
|
181
|
+
used += len(line) + 1
|
|
182
|
+
|
|
183
|
+
rendered = "\n".join(lines)
|
|
184
|
+
if len(rendered) <= max_c:
|
|
185
|
+
return rendered
|
|
186
|
+
#[WARNING:TRUNCATION] bounded attachment index rendering (model-visible)
|
|
187
|
+
marker = "\n- … (truncated)"
|
|
188
|
+
keep = max(0, max_c - len(marker))
|
|
189
|
+
if keep <= 0:
|
|
190
|
+
return rendered[:max_c]
|
|
191
|
+
trimmed = rendered[:keep].rstrip()
|
|
192
|
+
if trimmed.endswith("- … (truncated)"):
|
|
193
|
+
return trimmed
|
|
194
|
+
return trimmed + marker
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def render_active_attachments_system_message(
|
|
198
|
+
media: Any,
|
|
199
|
+
*,
|
|
200
|
+
max_entries: int = 12,
|
|
201
|
+
max_chars: int = 2000,
|
|
202
|
+
) -> str:
|
|
203
|
+
"""Render a bounded system message that lists active media attachments for this call.
|
|
204
|
+
|
|
205
|
+
This is metadata-only: it does not inline attachment contents, and should remain stable
|
|
206
|
+
across `/compact` (system messages are not compacted).
|
|
207
|
+
"""
|
|
208
|
+
max_e = max(0, int(max_entries))
|
|
209
|
+
max_c = max(0, int(max_chars))
|
|
210
|
+
if max_e <= 0 or max_c <= 0:
|
|
211
|
+
return ""
|
|
212
|
+
|
|
213
|
+
if media is None:
|
|
214
|
+
return ""
|
|
215
|
+
items = list(media) if isinstance(media, (list, tuple)) else []
|
|
216
|
+
if not items:
|
|
217
|
+
return ""
|
|
218
|
+
|
|
219
|
+
lines: list[str] = [
|
|
220
|
+
"Active attachments are already available in this call. Use their content directly; do not call tools to re-open them. Do not mention this list."
|
|
221
|
+
]
|
|
222
|
+
used = len(lines[0]) + 1
|
|
223
|
+
|
|
224
|
+
def _fmt_line(item: Any) -> Optional[str]:
|
|
225
|
+
if isinstance(item, str):
|
|
226
|
+
raw = item.strip()
|
|
227
|
+
if not raw:
|
|
228
|
+
return None
|
|
229
|
+
disp = _normalize_handle(raw) or raw
|
|
230
|
+
# Display filename for absolute paths to avoid encouraging filesystem tool calls.
|
|
231
|
+
disp_norm = disp.replace("\\", "/")
|
|
232
|
+
if disp_norm.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", disp):
|
|
233
|
+
head = disp_norm.rsplit("/", 1)[-1]
|
|
234
|
+
else:
|
|
235
|
+
head = f"{disp}"
|
|
236
|
+
return f"- {head}"
|
|
237
|
+
|
|
238
|
+
if not isinstance(item, dict):
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
aid = item.get("$artifact") or item.get("artifact_id") or item.get("id")
|
|
242
|
+
aid_s = str(aid or "").strip()
|
|
243
|
+
src = item.get("source_path") or item.get("path") or item.get("filename")
|
|
244
|
+
handle = _normalize_handle(src)
|
|
245
|
+
filename = str(item.get("filename") or "").strip()
|
|
246
|
+
|
|
247
|
+
display = filename or handle
|
|
248
|
+
if display and (display.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", display)):
|
|
249
|
+
display = display.replace("\\", "/").rsplit("/", 1)[-1]
|
|
250
|
+
|
|
251
|
+
head = ""
|
|
252
|
+
if display:
|
|
253
|
+
head = f"{display}"
|
|
254
|
+
elif aid_s:
|
|
255
|
+
head = f"id={aid_s}"
|
|
256
|
+
else:
|
|
257
|
+
head = "attachment"
|
|
258
|
+
|
|
259
|
+
bits: list[str] = []
|
|
260
|
+
if aid_s:
|
|
261
|
+
bits.append(f"id={aid_s}")
|
|
262
|
+
sha = str(item.get("sha256") or "").strip().lower()
|
|
263
|
+
if sha and re.fullmatch(r"[0-9a-f]{8,64}", sha):
|
|
264
|
+
bits.append(f"sha={sha[:8]}…")
|
|
265
|
+
ct = str(item.get("content_type") or "").strip()
|
|
266
|
+
if ct:
|
|
267
|
+
bits.append(ct)
|
|
268
|
+
size = item.get("size_bytes")
|
|
269
|
+
if isinstance(size, int) and size > 0:
|
|
270
|
+
bits.append(f"{size:,} bytes")
|
|
271
|
+
|
|
272
|
+
meta = ", ".join(bits)
|
|
273
|
+
return f"- {head}" + (f" ({meta})" if meta else "")
|
|
274
|
+
|
|
275
|
+
for i, it in enumerate(items[:max_e]):
|
|
276
|
+
line = _fmt_line(it)
|
|
277
|
+
if not line:
|
|
278
|
+
continue
|
|
279
|
+
if used + len(line) + 1 > max_c:
|
|
280
|
+
if i > 0 and used + 18 <= max_c:
|
|
281
|
+
lines.append("- … (truncated)")
|
|
282
|
+
break
|
|
283
|
+
lines.append(line)
|
|
284
|
+
used += len(line) + 1
|
|
285
|
+
|
|
286
|
+
rendered = "\n".join(lines)
|
|
287
|
+
if len(rendered) <= max_c:
|
|
288
|
+
return rendered
|
|
289
|
+
#[WARNING:TRUNCATION] bounded active-attachment list rendering (model-visible)
|
|
290
|
+
marker = "\n- … (truncated)"
|
|
291
|
+
keep = max(0, max_c - len(marker))
|
|
292
|
+
if keep <= 0:
|
|
293
|
+
return rendered[:max_c]
|
|
294
|
+
trimmed = rendered[:keep].rstrip()
|
|
295
|
+
if trimmed.endswith("- … (truncated)"):
|
|
296
|
+
return trimmed
|
|
297
|
+
return trimmed + marker
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
@dataclass(frozen=True)
|
|
301
|
+
class ParsedToolMessage:
|
|
302
|
+
tool_name: str
|
|
303
|
+
body: str
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def parse_tool_message(text: str) -> Optional[ParsedToolMessage]:
|
|
307
|
+
m = _TOOL_PREFIX_RE.match(str(text or ""))
|
|
308
|
+
if not m:
|
|
309
|
+
return None
|
|
310
|
+
name = str(m.group("name") or "").strip()
|
|
311
|
+
body = str(m.group("body") or "")
|
|
312
|
+
if not name:
|
|
313
|
+
return None
|
|
314
|
+
return ParsedToolMessage(tool_name=name, body=body)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _parse_read_file_identity(body: str) -> Optional[Tuple[str, str, int, int]]:
|
|
318
|
+
"""Return (path, sha256_of_body, start_line, end_line) when parseable."""
|
|
319
|
+
raw = str(body or "")
|
|
320
|
+
if not raw.strip():
|
|
321
|
+
return None
|
|
322
|
+
lines = raw.splitlines()
|
|
323
|
+
if not lines:
|
|
324
|
+
return None
|
|
325
|
+
m = _READ_FILE_HEADER_RE.match(lines[0].strip())
|
|
326
|
+
if not m:
|
|
327
|
+
return None
|
|
328
|
+
path = _normalize_handle(m.group("path"))
|
|
329
|
+
if not path:
|
|
330
|
+
return None
|
|
331
|
+
|
|
332
|
+
start_line = -1
|
|
333
|
+
end_line = -1
|
|
334
|
+
for ln in lines[1:]:
|
|
335
|
+
mm = _LINE_NUMBER_RE.match(ln)
|
|
336
|
+
if not mm:
|
|
337
|
+
continue
|
|
338
|
+
try:
|
|
339
|
+
num = int(mm.group("line"))
|
|
340
|
+
except Exception:
|
|
341
|
+
continue
|
|
342
|
+
if start_line < 0:
|
|
343
|
+
start_line = num
|
|
344
|
+
end_line = num
|
|
345
|
+
|
|
346
|
+
if start_line < 0 or end_line < 0:
|
|
347
|
+
start_line = 1
|
|
348
|
+
end_line = 1
|
|
349
|
+
|
|
350
|
+
sha = hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
|
351
|
+
return (path, sha, start_line, end_line)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _parse_open_attachment_identity(body: str) -> Optional[Tuple[str, str, Optional[str], int, int]]:
|
|
355
|
+
"""Return (handle, artifact_id, sha256, start_line, end_line) when parseable."""
|
|
356
|
+
raw = str(body or "")
|
|
357
|
+
if not raw.strip():
|
|
358
|
+
return None
|
|
359
|
+
lines = raw.splitlines()
|
|
360
|
+
if not lines:
|
|
361
|
+
return None
|
|
362
|
+
m = _OPEN_ATTACHMENT_HEADER_RE.match(lines[0].strip())
|
|
363
|
+
if not m:
|
|
364
|
+
return None
|
|
365
|
+
handle = _normalize_handle(m.group("handle"))
|
|
366
|
+
artifact_id = str(m.group("artifact_id") or "").strip()
|
|
367
|
+
if not artifact_id:
|
|
368
|
+
return None
|
|
369
|
+
sha256 = m.group("sha256")
|
|
370
|
+
sha = str(sha256 or "").strip().lower() or None
|
|
371
|
+
if sha and not re.fullmatch(r"[0-9a-f]{8,64}", sha):
|
|
372
|
+
sha = None
|
|
373
|
+
|
|
374
|
+
start_line = 1
|
|
375
|
+
end_line = 1
|
|
376
|
+
try:
|
|
377
|
+
if m.group("start_line") and m.group("end_line"):
|
|
378
|
+
start_line = int(m.group("start_line"))
|
|
379
|
+
end_line = int(m.group("end_line"))
|
|
380
|
+
except Exception:
|
|
381
|
+
start_line = 1
|
|
382
|
+
end_line = 1
|
|
383
|
+
|
|
384
|
+
return (handle, artifact_id, sha, start_line, end_line)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def dedup_messages_view(
|
|
388
|
+
messages: List[Dict[str, Any]],
|
|
389
|
+
*,
|
|
390
|
+
session_attachments: Optional[List[Dict[str, Any]]] = None,
|
|
391
|
+
) -> List[Dict[str, Any]]:
|
|
392
|
+
"""Return a derived LLM-visible messages list with duplicate doc reads stubbed."""
|
|
393
|
+
if not isinstance(messages, list) or not messages:
|
|
394
|
+
return [] if messages is None else list(messages)
|
|
395
|
+
|
|
396
|
+
by_handle: Dict[str, list[Dict[str, Any]]] = {}
|
|
397
|
+
for e in session_attachments or []:
|
|
398
|
+
if not isinstance(e, dict):
|
|
399
|
+
continue
|
|
400
|
+
h = _normalize_handle(e.get("handle"))
|
|
401
|
+
if not h:
|
|
402
|
+
continue
|
|
403
|
+
by_handle.setdefault(h, []).append(e)
|
|
404
|
+
|
|
405
|
+
out: list[Dict[str, Any]] = []
|
|
406
|
+
seen: Dict[Tuple[str, str, str, int, int], int] = {}
|
|
407
|
+
|
|
408
|
+
for msg in messages:
|
|
409
|
+
if not isinstance(msg, dict):
|
|
410
|
+
continue
|
|
411
|
+
role = str(msg.get("role") or "").strip()
|
|
412
|
+
content = msg.get("content")
|
|
413
|
+
content_str = "" if content is None else str(content)
|
|
414
|
+
|
|
415
|
+
if role != "tool" or not content_str.strip():
|
|
416
|
+
out.append(dict(msg))
|
|
417
|
+
continue
|
|
418
|
+
|
|
419
|
+
parsed = parse_tool_message(content_str)
|
|
420
|
+
if parsed is None:
|
|
421
|
+
out.append(dict(msg))
|
|
422
|
+
continue
|
|
423
|
+
|
|
424
|
+
tool = parsed.tool_name
|
|
425
|
+
body = parsed.body
|
|
426
|
+
|
|
427
|
+
identity: Optional[Tuple[str, str, str, int, int]] = None
|
|
428
|
+
stub: Optional[str] = None
|
|
429
|
+
|
|
430
|
+
if tool == "read_file":
|
|
431
|
+
ident = _parse_read_file_identity(body)
|
|
432
|
+
if ident is not None:
|
|
433
|
+
path, sha, start_line, end_line = ident
|
|
434
|
+
path_key = path
|
|
435
|
+
if path_key not in by_handle and path_key.startswith("/"):
|
|
436
|
+
suffix_matches = [h for h in by_handle.keys() if path_key.endswith("/" + h)]
|
|
437
|
+
if len(suffix_matches) == 1:
|
|
438
|
+
path_key = suffix_matches[0]
|
|
439
|
+
|
|
440
|
+
identity = ("read_file", path_key, sha, start_line, end_line)
|
|
441
|
+
|
|
442
|
+
candidates = by_handle.get(path_key) or []
|
|
443
|
+
attachment_hint = ""
|
|
444
|
+
artifact_id_hint = ""
|
|
445
|
+
if len(candidates) == 1:
|
|
446
|
+
a = candidates[0]
|
|
447
|
+
aid = str(a.get("artifact_id") or "").strip()
|
|
448
|
+
sha_a = str(a.get("sha256") or "").strip()
|
|
449
|
+
if aid:
|
|
450
|
+
artifact_id_hint = aid
|
|
451
|
+
attachment_hint = f" Attached artifact: id={aid}" + (f", sha={sha_a[:8]}…" if sha_a else "")
|
|
452
|
+
elif len(candidates) > 1:
|
|
453
|
+
bits: list[str] = []
|
|
454
|
+
for a in candidates[:3]:
|
|
455
|
+
aid = str(a.get("artifact_id") or "").strip()
|
|
456
|
+
sha_a = str(a.get("sha256") or "").strip()
|
|
457
|
+
if aid:
|
|
458
|
+
bits.append(f"{aid}:{sha_a[:8]}…" if sha_a else aid)
|
|
459
|
+
if bits:
|
|
460
|
+
attachment_hint = " Attached candidates: " + ", ".join(bits) + " (specify expected_sha256)"
|
|
461
|
+
|
|
462
|
+
display_path = path
|
|
463
|
+
if display_path and (display_path.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", display_path)):
|
|
464
|
+
display_path = display_path.replace("\\", "/").rsplit("/", 1)[-1]
|
|
465
|
+
|
|
466
|
+
reopen = ""
|
|
467
|
+
if artifact_id_hint:
|
|
468
|
+
reopen = (
|
|
469
|
+
f"Re-open with open_attachment(artifact_id='{artifact_id_hint}', "
|
|
470
|
+
f"start_line={start_line}, end_line={end_line})."
|
|
471
|
+
)
|
|
472
|
+
elif candidates:
|
|
473
|
+
reopen = (
|
|
474
|
+
f"Re-open with open_attachment(artifact_id='…', start_line={start_line}, end_line={end_line})."
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
stub = f"[read_file]: (duplicate) File already shown above: {display_path} lines {start_line}-{end_line}."
|
|
478
|
+
if reopen:
|
|
479
|
+
stub += "\n" + reopen
|
|
480
|
+
stub += attachment_hint
|
|
481
|
+
|
|
482
|
+
elif tool == "open_attachment":
|
|
483
|
+
ident2 = _parse_open_attachment_identity(body)
|
|
484
|
+
if ident2 is not None:
|
|
485
|
+
handle, artifact_id, sha, start_line, end_line = ident2
|
|
486
|
+
key_sha = sha or "unknown"
|
|
487
|
+
identity = ("open_attachment", artifact_id, key_sha, start_line, end_line)
|
|
488
|
+
display_handle = handle
|
|
489
|
+
if display_handle and (display_handle.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", display_handle)):
|
|
490
|
+
display_handle = display_handle.replace("\\", "/").rsplit("/", 1)[-1]
|
|
491
|
+
head = display_handle if display_handle else f"id={artifact_id}"
|
|
492
|
+
stub = (
|
|
493
|
+
f"[open_attachment]: (duplicate) Attachment already shown above: {head} lines {start_line}-{end_line} (id={artifact_id}).\n"
|
|
494
|
+
f"Re-open with open_attachment(artifact_id='{artifact_id}', start_line={start_line}, end_line={end_line})."
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
if identity is None:
|
|
498
|
+
out.append(dict(msg))
|
|
499
|
+
continue
|
|
500
|
+
|
|
501
|
+
if identity in seen:
|
|
502
|
+
out.append(dict(msg, content=stub or content_str))
|
|
503
|
+
continue
|
|
504
|
+
|
|
505
|
+
seen[identity] = len(out)
|
|
506
|
+
out.append(dict(msg))
|
|
507
|
+
|
|
508
|
+
return out
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def execute_open_attachment(
|
|
512
|
+
*,
|
|
513
|
+
artifact_store: ArtifactStore,
|
|
514
|
+
session_id: str,
|
|
515
|
+
artifact_id: Optional[str],
|
|
516
|
+
handle: Optional[str],
|
|
517
|
+
expected_sha256: Optional[str],
|
|
518
|
+
start_line: int,
|
|
519
|
+
end_line: Optional[int],
|
|
520
|
+
max_chars: int,
|
|
521
|
+
) -> Tuple[bool, Optional[Dict[str, Any]], Optional[str]]:
|
|
522
|
+
"""Runtime-owned tool execution for `open_attachment`."""
|
|
523
|
+
sid = str(session_id or "").strip()
|
|
524
|
+
if not sid:
|
|
525
|
+
return False, {"rendered": "Error: session_id is required to open attachments."}, "session_id is required"
|
|
526
|
+
|
|
527
|
+
rid = session_memory_owner_run_id(sid)
|
|
528
|
+
handle_norm = _normalize_handle(handle)
|
|
529
|
+
artifact_id_norm = str(artifact_id or "").strip() or None
|
|
530
|
+
|
|
531
|
+
expected = str(expected_sha256 or "").strip().lower() or None
|
|
532
|
+
if expected and expected.startswith("sha256:"):
|
|
533
|
+
expected = expected.split(":", 1)[-1].strip() or None
|
|
534
|
+
if expected and not re.fullmatch(r"[0-9a-f]{8,64}", expected):
|
|
535
|
+
expected = None
|
|
536
|
+
|
|
537
|
+
# Clamp numeric args defensively.
|
|
538
|
+
try:
|
|
539
|
+
start = int(start_line)
|
|
540
|
+
except Exception:
|
|
541
|
+
start = 1
|
|
542
|
+
if start < 1:
|
|
543
|
+
start = 1
|
|
544
|
+
try:
|
|
545
|
+
end = int(end_line) if end_line is not None else None
|
|
546
|
+
except Exception:
|
|
547
|
+
end = None
|
|
548
|
+
if end is not None and end < start:
|
|
549
|
+
end = start
|
|
550
|
+
try:
|
|
551
|
+
mc = int(max_chars)
|
|
552
|
+
except Exception:
|
|
553
|
+
mc = 8000
|
|
554
|
+
# Contract:
|
|
555
|
+
# - `max_chars > 0` means "return up to this many chars" (bounded excerpt).
|
|
556
|
+
# - `max_chars <= 0` means "no artificial cap" (return full selection).
|
|
557
|
+
mc_limit: Optional[int]
|
|
558
|
+
if mc <= 0:
|
|
559
|
+
mc_limit = None
|
|
560
|
+
else:
|
|
561
|
+
mc_limit = mc
|
|
562
|
+
|
|
563
|
+
# Resolve artifact metadata.
|
|
564
|
+
metas = artifact_store.list_by_run(rid)
|
|
565
|
+
candidates = [m for m in metas if isinstance(getattr(m, "tags", None), dict) and (m.tags or {}).get("kind") == "attachment"]
|
|
566
|
+
|
|
567
|
+
selected_meta = None
|
|
568
|
+
if artifact_id_norm:
|
|
569
|
+
for m in candidates:
|
|
570
|
+
if str(getattr(m, "artifact_id", "") or "") == artifact_id_norm:
|
|
571
|
+
selected_meta = m
|
|
572
|
+
break
|
|
573
|
+
if selected_meta is None:
|
|
574
|
+
# Model robustness: many models confuse `artifact_id` (opaque) with `handle` (path-like).
|
|
575
|
+
# If a handle is available, fall back to it. Otherwise treat the provided artifact_id as
|
|
576
|
+
# a best-effort handle candidate (so `artifact_id=\"notes.txt\"` still works).
|
|
577
|
+
if not handle_norm:
|
|
578
|
+
handle_norm = _normalize_handle(artifact_id_norm)
|
|
579
|
+
artifact_id_norm = None
|
|
580
|
+
|
|
581
|
+
if selected_meta is None:
|
|
582
|
+
if not handle_norm:
|
|
583
|
+
return False, {"rendered": "Error: provide artifact_id or handle."}, "missing artifact_id/handle"
|
|
584
|
+
|
|
585
|
+
matches: list[Any] = []
|
|
586
|
+
for m in candidates:
|
|
587
|
+
tags = getattr(m, "tags", {}) or {}
|
|
588
|
+
p = _normalize_handle(tags.get("path") or tags.get("source_path"))
|
|
589
|
+
fn = _normalize_handle(tags.get("filename"))
|
|
590
|
+
if p == handle_norm or fn == handle_norm:
|
|
591
|
+
matches.append(m)
|
|
592
|
+
continue
|
|
593
|
+
|
|
594
|
+
# Robustness: `read_file` tool outputs typically show absolute paths, while
|
|
595
|
+
# attachment tags often store workspace-relative virtual paths. Treat a
|
|
596
|
+
# single unambiguous suffix match as equivalent (disambiguate via sha256
|
|
597
|
+
# when multiple candidates exist).
|
|
598
|
+
if handle_norm.startswith("/"):
|
|
599
|
+
if p and not p.startswith("/") and handle_norm.endswith("/" + p):
|
|
600
|
+
matches.append(m)
|
|
601
|
+
continue
|
|
602
|
+
if fn and not fn.startswith("/") and handle_norm.endswith("/" + fn):
|
|
603
|
+
matches.append(m)
|
|
604
|
+
continue
|
|
605
|
+
if p and p.startswith("/") and not handle_norm.startswith("/") and p.endswith("/" + handle_norm):
|
|
606
|
+
matches.append(m)
|
|
607
|
+
continue
|
|
608
|
+
|
|
609
|
+
if expected:
|
|
610
|
+
matches2: list[Any] = []
|
|
611
|
+
for m in matches:
|
|
612
|
+
tags = getattr(m, "tags", {}) or {}
|
|
613
|
+
sha = str(tags.get("sha256") or "").strip().lower()
|
|
614
|
+
if sha and sha == expected:
|
|
615
|
+
matches2.append(m)
|
|
616
|
+
matches = matches2
|
|
617
|
+
|
|
618
|
+
if not matches:
|
|
619
|
+
# Best-effort suggestions: help models recover when they misremember a handle/path.
|
|
620
|
+
suggestions: list[dict[str, Any]] = []
|
|
621
|
+
try:
|
|
622
|
+
query = _normalize_handle(handle_norm)
|
|
623
|
+
q_base = query.replace("\\", "/").strip().strip("/").rsplit("/", 1)[-1]
|
|
624
|
+
q_stem = q_base.rsplit(".", 1)[0].lower() if q_base else ""
|
|
625
|
+
scored: list[tuple[int, int, str, Any]] = []
|
|
626
|
+
for m in candidates:
|
|
627
|
+
tags = getattr(m, "tags", {}) or {}
|
|
628
|
+
p = _normalize_handle(tags.get("path") or tags.get("source_path") or "")
|
|
629
|
+
fn = _normalize_handle(tags.get("filename") or "")
|
|
630
|
+
cand_handle = p or fn
|
|
631
|
+
if not cand_handle:
|
|
632
|
+
continue
|
|
633
|
+
cand_base = cand_handle.replace("\\", "/").strip().strip("/").rsplit("/", 1)[-1]
|
|
634
|
+
cand_stem = cand_base.rsplit(".", 1)[0].lower() if cand_base else ""
|
|
635
|
+
score: Optional[int] = None
|
|
636
|
+
if q_base and cand_base and cand_base.lower() == q_base.lower():
|
|
637
|
+
score = 0
|
|
638
|
+
elif q_stem and cand_stem and q_stem in cand_stem:
|
|
639
|
+
score = 1
|
|
640
|
+
elif q_stem and cand_stem and cand_stem in q_stem:
|
|
641
|
+
score = 2
|
|
642
|
+
elif q_stem and q_stem in cand_handle.lower():
|
|
643
|
+
score = 3
|
|
644
|
+
if score is None:
|
|
645
|
+
continue
|
|
646
|
+
scored.append((score, len(cand_handle), cand_handle, m))
|
|
647
|
+
scored.sort(key=lambda x: (x[0], x[1], x[2]))
|
|
648
|
+
for _score, _len, h, m in scored[:5]:
|
|
649
|
+
tags = getattr(m, "tags", {}) or {}
|
|
650
|
+
suggestions.append(
|
|
651
|
+
{
|
|
652
|
+
"handle": h,
|
|
653
|
+
"artifact_id": str(getattr(m, "artifact_id", "") or ""),
|
|
654
|
+
"sha256": str(tags.get("sha256") or "").strip() or None,
|
|
655
|
+
}
|
|
656
|
+
)
|
|
657
|
+
except Exception:
|
|
658
|
+
suggestions = []
|
|
659
|
+
|
|
660
|
+
rendered = f"Error: no attachment matches handle '{handle_norm}' in this session."
|
|
661
|
+
if suggestions:
|
|
662
|
+
parts = []
|
|
663
|
+
for s in suggestions:
|
|
664
|
+
h = _normalize_handle(s.get("handle"))
|
|
665
|
+
aid = str(s.get("artifact_id") or "").strip()
|
|
666
|
+
sha = str(s.get("sha256") or "").strip()
|
|
667
|
+
bits: list[str] = []
|
|
668
|
+
if aid:
|
|
669
|
+
bits.append(f"id={aid}")
|
|
670
|
+
if sha:
|
|
671
|
+
bits.append(f"sha={sha[:8]}…")
|
|
672
|
+
meta = f" ({', '.join(bits)})" if bits else ""
|
|
673
|
+
parts.append(f"- {h}{meta}")
|
|
674
|
+
rendered += "\nDid you mean:\n" + "\n".join(parts)
|
|
675
|
+
|
|
676
|
+
return (
|
|
677
|
+
False,
|
|
678
|
+
{"rendered": rendered, "suggestions": suggestions},
|
|
679
|
+
"attachment not found",
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
if len(matches) > 1:
|
|
683
|
+
# List a few candidates to help the model disambiguate.
|
|
684
|
+
cand: list[dict[str, Any]] = []
|
|
685
|
+
for m in matches[:5]:
|
|
686
|
+
tags = getattr(m, "tags", {}) or {}
|
|
687
|
+
sha = str(tags.get("sha256") or "").strip()
|
|
688
|
+
cand.append({"artifact_id": str(getattr(m, "artifact_id", "") or ""), "sha256": sha or None})
|
|
689
|
+
return (
|
|
690
|
+
False,
|
|
691
|
+
{
|
|
692
|
+
"rendered": f"Error: multiple attachments match '{handle_norm}'. Provide expected_sha256 or artifact_id.",
|
|
693
|
+
"candidates": cand,
|
|
694
|
+
},
|
|
695
|
+
"multiple matches",
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
selected_meta = matches[0]
|
|
699
|
+
|
|
700
|
+
aid = str(getattr(selected_meta, "artifact_id", "") or "")
|
|
701
|
+
tags = dict(getattr(selected_meta, "tags", {}) or {})
|
|
702
|
+
ct = str(getattr(selected_meta, "content_type", "") or "")
|
|
703
|
+
size_bytes = int(getattr(selected_meta, "size_bytes", 0) or 0)
|
|
704
|
+
sha_tag = str(tags.get("sha256") or "").strip().lower() or None
|
|
705
|
+
handle_final = _normalize_handle(tags.get("path") or tags.get("source_path") or tags.get("filename") or handle_norm or "")
|
|
706
|
+
if not handle_final:
|
|
707
|
+
handle_final = aid
|
|
708
|
+
display_handle = handle_final
|
|
709
|
+
try:
|
|
710
|
+
disp_norm = display_handle.replace("\\", "/")
|
|
711
|
+
if disp_norm.startswith("/") or re.match(r"^[a-zA-Z]:[\\\\/]", disp_norm):
|
|
712
|
+
display_handle = disp_norm.rsplit("/", 1)[-1] or display_handle
|
|
713
|
+
except Exception:
|
|
714
|
+
display_handle = handle_final
|
|
715
|
+
|
|
716
|
+
# v0: text-only, bounded excerpts.
|
|
717
|
+
# v1: media attachments return a media ref (or derived text when possible) and are intended to be
|
|
718
|
+
# attached as `payload.media` for the next LLM call (runtime-owned behavior).
|
|
719
|
+
ct_low = ct.lower().strip()
|
|
720
|
+
text_like = ct_low.startswith("text/") or ct_low in {
|
|
721
|
+
"application/json",
|
|
722
|
+
"application/yaml",
|
|
723
|
+
"application/x-yaml",
|
|
724
|
+
"application/xml",
|
|
725
|
+
"application/javascript",
|
|
726
|
+
"application/typescript",
|
|
727
|
+
}
|
|
728
|
+
source_path = str(tags.get("source_path") or tags.get("path") or tags.get("filename") or handle_final or "").strip()
|
|
729
|
+
filename = str(tags.get("filename") or "").strip() or (source_path.rsplit("/", 1)[-1] if source_path else "")
|
|
730
|
+
|
|
731
|
+
text: Optional[str] = None
|
|
732
|
+
derived_from_content_type: Optional[str] = None
|
|
733
|
+
derived_text_content_type: Optional[str] = None
|
|
734
|
+
derived_error: Optional[str] = None
|
|
735
|
+
|
|
736
|
+
if not text_like:
|
|
737
|
+
# Best-effort: derive text from common document types (e.g. PDF) using AbstractCore's media stack.
|
|
738
|
+
#
|
|
739
|
+
# This keeps the tool usable for document attachments and enables KG ingestion to ground
|
|
740
|
+
# evidence quotes in a durable, readable text representation.
|
|
741
|
+
should_try_text_extract = ct_low in {"application/pdf"} or str(filename or "").lower().endswith(".pdf")
|
|
742
|
+
|
|
743
|
+
if should_try_text_extract:
|
|
744
|
+
artifact = artifact_store.load(aid)
|
|
745
|
+
if artifact is None:
|
|
746
|
+
return False, {"rendered": f"Error: failed to load artifact '{aid}'."}, "artifact not found"
|
|
747
|
+
|
|
748
|
+
content = getattr(artifact, "content", None)
|
|
749
|
+
if not isinstance(content, (bytes, bytearray)):
|
|
750
|
+
return False, {"rendered": "Error: failed to load attachment bytes."}, "artifact content missing"
|
|
751
|
+
|
|
752
|
+
try:
|
|
753
|
+
import tempfile
|
|
754
|
+
|
|
755
|
+
# Import lazily to keep this tool usable when AbstractCore media extras are not installed.
|
|
756
|
+
from abstractcore.media.auto_handler import AutoMediaHandler # type: ignore
|
|
757
|
+
|
|
758
|
+
handler = AutoMediaHandler(enable_events=False)
|
|
759
|
+
with tempfile.TemporaryDirectory(prefix="open_attachment_") as td:
|
|
760
|
+
ext = Path(filename).suffix if filename else ""
|
|
761
|
+
if not ext:
|
|
762
|
+
ext = ".pdf" if ct_low == "application/pdf" else ""
|
|
763
|
+
p = Path(td) / f"attachment{ext or ''}"
|
|
764
|
+
p.write_bytes(bytes(content))
|
|
765
|
+
res = handler.process_file(p, format_output="structured")
|
|
766
|
+
if getattr(res, "success", False) and getattr(res, "media_content", None) is not None:
|
|
767
|
+
extracted = str(getattr(res.media_content, "content", "") or "")
|
|
768
|
+
if extracted.strip():
|
|
769
|
+
text_like = True
|
|
770
|
+
derived_from_content_type = ct
|
|
771
|
+
derived_text_content_type = str(getattr(res.media_content, "mime_type", "") or "").strip() or "text/markdown"
|
|
772
|
+
text = extracted
|
|
773
|
+
else:
|
|
774
|
+
derived_error = "empty extracted text"
|
|
775
|
+
else:
|
|
776
|
+
derived_error = str(getattr(res, "error_message", None) or "document text extraction failed")
|
|
777
|
+
except Exception as e:
|
|
778
|
+
derived_error = str(e)
|
|
779
|
+
|
|
780
|
+
if not text_like:
|
|
781
|
+
media_item: Dict[str, Any] = {"$artifact": aid}
|
|
782
|
+
if filename:
|
|
783
|
+
media_item["filename"] = filename
|
|
784
|
+
if source_path:
|
|
785
|
+
media_item["source_path"] = source_path
|
|
786
|
+
if ct:
|
|
787
|
+
media_item["content_type"] = ct
|
|
788
|
+
|
|
789
|
+
header_bits: list[str] = []
|
|
790
|
+
header_bits.append(f"id={aid}")
|
|
791
|
+
if sha_tag:
|
|
792
|
+
header_bits.append(f"sha={sha_tag[:8]}…")
|
|
793
|
+
if ct:
|
|
794
|
+
header_bits.append(ct)
|
|
795
|
+
if size_bytes > 0:
|
|
796
|
+
header_bits.append(f"{size_bytes:,} bytes")
|
|
797
|
+
|
|
798
|
+
header = f"Attachment: {display_handle} ({', '.join(header_bits)})"
|
|
799
|
+
rendered = header + "\n\n(binary/media attachment; it will be attached as media for the next LLM call)"
|
|
800
|
+
out_media: Dict[str, Any] = {
|
|
801
|
+
"rendered": rendered,
|
|
802
|
+
"artifact_id": aid,
|
|
803
|
+
"handle": handle_final,
|
|
804
|
+
"sha256": sha_tag,
|
|
805
|
+
"content_type": ct,
|
|
806
|
+
"size_bytes": size_bytes,
|
|
807
|
+
"media": [media_item],
|
|
808
|
+
"derived_error": derived_error,
|
|
809
|
+
}
|
|
810
|
+
return True, out_media, None
|
|
811
|
+
|
|
812
|
+
if text_like and text is None:
|
|
813
|
+
artifact = artifact_store.load(aid)
|
|
814
|
+
if artifact is None:
|
|
815
|
+
return False, {"rendered": f"Error: failed to load artifact '{aid}'."}, "artifact not found"
|
|
816
|
+
|
|
817
|
+
try:
|
|
818
|
+
text = artifact.content.decode("utf-8")
|
|
819
|
+
except Exception:
|
|
820
|
+
return False, {"rendered": "Error: attachment is not valid UTF-8 text (binary?)"}, "binary content"
|
|
821
|
+
|
|
822
|
+
lines = (text or "").splitlines()
|
|
823
|
+
if not lines:
|
|
824
|
+
header = f"Attachment: {display_handle} (id={aid}" + (f", sha={sha_tag}" if sha_tag else "") + ", lines 0-0)"
|
|
825
|
+
return (
|
|
826
|
+
True,
|
|
827
|
+
{
|
|
828
|
+
"rendered": header,
|
|
829
|
+
"artifact_id": aid,
|
|
830
|
+
"handle": handle_final,
|
|
831
|
+
"sha256": sha_tag,
|
|
832
|
+
"content_type": ct,
|
|
833
|
+
"derived_from_content_type": derived_from_content_type,
|
|
834
|
+
"derived_text_content_type": derived_text_content_type,
|
|
835
|
+
"derived_error": derived_error,
|
|
836
|
+
},
|
|
837
|
+
None,
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
# UX: some models "preview" attachments by opening only the first ~20 lines even when the file is small.
|
|
841
|
+
# If the attachment is small enough to fit under the tool's hard max_chars cap and the call looks like a
|
|
842
|
+
# default preview, expand to the full file (still bounded).
|
|
843
|
+
default_budget = mc_limit == 8000
|
|
844
|
+
small_text = (size_bytes > 0 and size_bytes <= 50_000) or len(text) <= 30_000
|
|
845
|
+
preview_window = 20
|
|
846
|
+
preview_request = bool(
|
|
847
|
+
default_budget
|
|
848
|
+
and small_text
|
|
849
|
+
and start == 1
|
|
850
|
+
and end is not None
|
|
851
|
+
and end <= preview_window
|
|
852
|
+
and len(lines) > int(end)
|
|
853
|
+
)
|
|
854
|
+
if preview_request:
|
|
855
|
+
end = None
|
|
856
|
+
mc_limit = 50_000
|
|
857
|
+
elif default_budget and small_text and end is None:
|
|
858
|
+
mc_limit = 50_000
|
|
859
|
+
|
|
860
|
+
start_idx = min(max(start - 1, 0), len(lines) - 1)
|
|
861
|
+
end_idx = len(lines) - 1 if end is None else min(max(end - 1, start_idx), len(lines) - 1)
|
|
862
|
+
selected = lines[start_idx : end_idx + 1]
|
|
863
|
+
|
|
864
|
+
shown_start = start_idx + 1
|
|
865
|
+
shown_end = end_idx + 1
|
|
866
|
+
num_width = max(1, len(str(shown_end)))
|
|
867
|
+
|
|
868
|
+
# Build bounded, line-numbered excerpt.
|
|
869
|
+
header = (
|
|
870
|
+
f"Attachment: {display_handle} (id={aid}"
|
|
871
|
+
+ (f", sha={sha_tag}" if sha_tag else "")
|
|
872
|
+
+ f", lines {shown_start}-{shown_end})"
|
|
873
|
+
)
|
|
874
|
+
|
|
875
|
+
# Allocate budget for excerpt lines.
|
|
876
|
+
remaining: Optional[int]
|
|
877
|
+
if mc_limit is None:
|
|
878
|
+
remaining = None
|
|
879
|
+
else:
|
|
880
|
+
remaining = max(0, int(mc_limit) - len(header) - 2)
|
|
881
|
+
rendered_lines: list[str] = []
|
|
882
|
+
content_lines: list[str] = []
|
|
883
|
+
used = 0
|
|
884
|
+
truncated = False
|
|
885
|
+
for i, ln in enumerate(selected):
|
|
886
|
+
line_no = shown_start + i
|
|
887
|
+
prefix = f"{line_no:>{num_width}}: "
|
|
888
|
+
row = f"{prefix}{ln}"
|
|
889
|
+
add_len = len(row) + (1 if rendered_lines else 0)
|
|
890
|
+
if remaining is not None and used + add_len > remaining and rendered_lines:
|
|
891
|
+
truncated = True
|
|
892
|
+
break
|
|
893
|
+
if remaining is not None and used + add_len > remaining and not rendered_lines:
|
|
894
|
+
# Always show at least one line, even if it truncates.
|
|
895
|
+
if int(remaining) <= 1:
|
|
896
|
+
row = "…"
|
|
897
|
+
content_line = "…"
|
|
898
|
+
else:
|
|
899
|
+
keep = max(0, int(remaining) - 1)
|
|
900
|
+
if keep <= len(prefix):
|
|
901
|
+
row = prefix.rstrip()[:keep] + "…"
|
|
902
|
+
content_line = "…"
|
|
903
|
+
else:
|
|
904
|
+
body_keep = max(0, keep - len(prefix))
|
|
905
|
+
body = str(ln)[:body_keep].rstrip()
|
|
906
|
+
row = prefix + body + "…"
|
|
907
|
+
content_line = body + "…"
|
|
908
|
+
rendered_lines.append(row)
|
|
909
|
+
content_lines.append(content_line)
|
|
910
|
+
truncated = True
|
|
911
|
+
break
|
|
912
|
+
rendered_lines.append(row)
|
|
913
|
+
content_lines.append(str(ln))
|
|
914
|
+
used += add_len
|
|
915
|
+
|
|
916
|
+
rendered = header + "\n\n" + "\n".join(rendered_lines)
|
|
917
|
+
if truncated and mc_limit is not None and len(rendered) + 18 <= int(mc_limit):
|
|
918
|
+
#[WARNING:TRUNCATION] open_attachment returned a bounded excerpt
|
|
919
|
+
rendered += "\n\n… (truncated)"
|
|
920
|
+
|
|
921
|
+
out: Dict[str, Any] = {
|
|
922
|
+
"rendered": rendered,
|
|
923
|
+
"content_text": "\n".join(content_lines),
|
|
924
|
+
"artifact_id": aid,
|
|
925
|
+
"handle": handle_final,
|
|
926
|
+
"sha256": sha_tag,
|
|
927
|
+
"content_type": ct,
|
|
928
|
+
"derived_from_content_type": derived_from_content_type,
|
|
929
|
+
"derived_text_content_type": derived_text_content_type,
|
|
930
|
+
"derived_error": derived_error,
|
|
931
|
+
"size_bytes": size_bytes,
|
|
932
|
+
"start_line": shown_start,
|
|
933
|
+
"end_line": shown_end,
|
|
934
|
+
"truncated": bool(truncated),
|
|
935
|
+
}
|
|
936
|
+
return True, out, None
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
__all__ = [
|
|
940
|
+
"session_memory_owner_run_id",
|
|
941
|
+
"list_session_attachments",
|
|
942
|
+
"render_active_attachments_system_message",
|
|
943
|
+
"render_session_attachments_system_message",
|
|
944
|
+
"dedup_messages_view",
|
|
945
|
+
"execute_open_attachment",
|
|
946
|
+
]
|