abstractassistant 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,434 @@
1
+ """Transcript-to-UI helpers for AbstractAssistant.
2
+
3
+ AbstractAgent/ReAct persists tool observations as role="tool" messages so the model can
4
+ continue the loop. Those observations are useful for debugging but are too noisy for
5
+ end-user chat history rendering.
6
+
7
+ This module provides a small, UI-agnostic transformation:
8
+ - hide tool messages from the user-visible transcript
9
+ - attach a compact tool summary + clickable resources to the next user-visible assistant message
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List, Sequence, Tuple
17
+ from urllib.parse import unquote, urlparse
18
+
19
+
20
+ def _dedupe_preserve_order(items: Sequence[str]) -> List[str]:
21
+ seen: set[str] = set()
22
+ out: List[str] = []
23
+ for item in items:
24
+ s = str(item or "").strip()
25
+ if not s or s in seen:
26
+ continue
27
+ seen.add(s)
28
+ out.append(s)
29
+ return out
30
+
31
+
32
+ _URL_RE = re.compile(r"https?://[^\s)\]\"'<>]+")
33
+ _WIN_PATH_RE = re.compile(r"[A-Za-z]:\\[^\s\"'<>]+")
34
+ _FILE_HEADER_RE = re.compile(r"(?im)^File:\s+(.+?)(?:\s\(|\n|$)")
35
+ _URL_HEADER_RE = re.compile(r"(?im)\bURL:\s*(https?://\S+)")
36
+ _HTML_IMG_RE = re.compile(r"(?is)<img[^>]*\bsrc\s*=\s*[\"']([^\"']+)[\"'][^>]*>")
37
+ _MD_IMAGE_RE = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
38
+
39
+ _IMAGE_EXTS: set[str] = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif"}
40
+
41
+
42
+ def _extract_urls(text: str, *, limit: int = 10) -> List[str]:
43
+ candidates = _URL_RE.findall(str(text or ""))
44
+ out: List[str] = []
45
+ for raw in candidates:
46
+ cleaned = str(raw).rstrip(").,;]\"'")
47
+ if not cleaned:
48
+ continue
49
+ out.append(cleaned)
50
+ if len(out) >= int(limit):
51
+ break
52
+ return _dedupe_preserve_order(out)
53
+
54
+
55
+ def _extract_primary_url(text: str) -> List[str]:
56
+ raw_text = str(text or "")
57
+ match = _URL_HEADER_RE.search(raw_text)
58
+ if match:
59
+ cleaned = str(match.group(1) or "").rstrip(").,;]\"'")
60
+ if cleaned:
61
+ return [cleaned]
62
+ return _extract_urls(raw_text, limit=1)
63
+
64
+
65
+ def _extract_primary_file_path(text: str) -> List[str]:
66
+ raw_text = str(text or "")
67
+ match = _FILE_HEADER_RE.search(raw_text)
68
+ if match:
69
+ candidate = str(match.group(1) or "").strip().strip("'\"").rstrip(").,;]\"'")
70
+ if candidate:
71
+ if candidate.startswith("~"):
72
+ try:
73
+ candidate = str(Path(candidate).expanduser())
74
+ except Exception:
75
+ pass
76
+ return [candidate]
77
+
78
+ # Common pattern for write/edit tools: "... 'absolute/path' ..."
79
+ quoted = re.search(r"'([^']+)'", raw_text)
80
+ if quoted:
81
+ candidate = str(quoted.group(1) or "").strip().rstrip(").,;]\"'")
82
+ if candidate:
83
+ if candidate.startswith("~"):
84
+ try:
85
+ candidate = str(Path(candidate).expanduser())
86
+ except Exception:
87
+ pass
88
+ return [candidate]
89
+
90
+ return _extract_file_paths(raw_text, limit=1)
91
+
92
+
93
+ def _extract_resources_for_tool(tool_name: str, content: str) -> Tuple[List[str], List[str]]:
94
+ name = str(tool_name or "").strip()
95
+ # URL tools
96
+ if name in {"fetch_url"}:
97
+ return _extract_primary_url(content), []
98
+ # File tools
99
+ if name in {"read_file", "write_file", "edit_file", "analyze_code"}:
100
+ return [], _extract_primary_file_path(content)
101
+ # Default: keep resource extraction bounded to avoid noisy chips (e.g. file contents).
102
+ return _extract_urls(content, limit=3), _extract_file_paths(content, limit=3)
103
+
104
+
105
+ def _extract_file_paths(text: str, *, limit: int = 10) -> List[str]:
106
+ raw_text = str(text or "")
107
+ # Avoid capturing URL path segments.
108
+ for url in _URL_RE.findall(raw_text):
109
+ raw_text = raw_text.replace(url, " ")
110
+
111
+ candidates: List[str] = []
112
+ candidates.extend(_WIN_PATH_RE.findall(raw_text))
113
+ # Unix-ish absolute paths (macOS/Linux). Prefer absolute to avoid CWD ambiguity.
114
+ candidates.extend(re.findall(r"(?:~|/)[^\s\"'<>]+", raw_text))
115
+
116
+ out: List[str] = []
117
+ for raw in candidates:
118
+ cleaned = str(raw).strip().rstrip(").,;]\"'")
119
+ if not cleaned:
120
+ continue
121
+ # Expand "~" when present so open() works reliably.
122
+ if cleaned.startswith("~"):
123
+ try:
124
+ cleaned = str(Path(cleaned).expanduser())
125
+ except Exception:
126
+ pass
127
+ out.append(cleaned)
128
+ if len(out) >= int(limit):
129
+ break
130
+ return _dedupe_preserve_order(out)
131
+
132
+
133
+ def _tool_name_from_message(message: Dict[str, Any]) -> str:
134
+ metadata = message.get("metadata")
135
+ if isinstance(metadata, dict):
136
+ name = metadata.get("name")
137
+ if isinstance(name, str) and name.strip():
138
+ return name.strip()
139
+
140
+ content = str(message.get("content") or "")
141
+ match = re.match(r"\s*\[([^\]]+)\]:", content)
142
+ if match:
143
+ name = match.group(1)
144
+ if isinstance(name, str) and name.strip():
145
+ return name.strip()
146
+ return "tool"
147
+
148
+
149
+ def _short_label_for_url(url: str) -> str:
150
+ try:
151
+ parsed = urlparse(url)
152
+ host = str(parsed.netloc or "").strip()
153
+ if host:
154
+ return host
155
+ except Exception:
156
+ pass
157
+ return url
158
+
159
+
160
+ def _short_label_for_path(path: str) -> str:
161
+ p = str(path or "").strip()
162
+ if not p:
163
+ return p
164
+ try:
165
+ parts = [part for part in Path(p).parts if part]
166
+ tail = parts[-3:] if len(parts) > 3 else parts
167
+ if tail:
168
+ return "…/" + "/".join(tail) if len(parts) > len(tail) else "/".join(tail)
169
+ except Exception:
170
+ return p
171
+ return p
172
+
173
+
174
+ def _is_image_target(target: str) -> bool:
175
+ t = str(target or "").strip()
176
+ if not t:
177
+ return False
178
+ if t.startswith("data:"):
179
+ return False
180
+ try:
181
+ if t.startswith(("http://", "https://", "file://")):
182
+ parsed = urlparse(t)
183
+ suffix = Path(parsed.path or "").suffix.lower()
184
+ return suffix in _IMAGE_EXTS
185
+ suffix = Path(t).suffix.lower()
186
+ return suffix in _IMAGE_EXTS
187
+ except Exception:
188
+ return False
189
+
190
+
191
+ def _normalize_image_target(target: str) -> Tuple[str, str]:
192
+ """Return (kind, normalized_target)."""
193
+ t = str(target or "").strip()
194
+ if t.startswith("file://"):
195
+ try:
196
+ parsed = urlparse(t)
197
+ file_path = unquote(parsed.path)
198
+ return "file", str(Path(file_path).expanduser()) if file_path else file_path
199
+ except Exception:
200
+ return "file", t
201
+ if t.startswith(("http://", "https://")):
202
+ return "url", t
203
+ # Only treat absolute-ish paths as files.
204
+ if t.startswith(("~", "/", "\\")) or _WIN_PATH_RE.match(t):
205
+ try:
206
+ return "file", str(Path(t).expanduser()) if t.startswith("~") else t
207
+ except Exception:
208
+ return "file", t
209
+ return "url", t
210
+
211
+
212
+ def _extract_images_from_text(text: str, *, limit: int = 6) -> Tuple[str, List[Dict[str, str]]]:
213
+ """Extract image references and return cleaned text + thumbnail descriptors."""
214
+ raw = str(text or "")
215
+ thumbs: List[Dict[str, str]] = []
216
+
217
+ def _add(target: str, label: str) -> None:
218
+ if not _is_image_target(target):
219
+ return
220
+ kind, norm = _normalize_image_target(target)
221
+ if not norm:
222
+ return
223
+ thumbs.append({"kind": kind, "target": norm, "label": str(label or "").strip()})
224
+
225
+ # Markdown images: ![alt](url "title")
226
+ def _md_repl(match: re.Match) -> str:
227
+ alt = str(match.group(1) or "").strip()
228
+ inner = str(match.group(2) or "").strip()
229
+ # Strip optional title: take first token that looks like a URL/path.
230
+ inner = inner.strip().strip("<>")
231
+ target = inner.split()[0] if inner else ""
232
+ _add(target, alt)
233
+ return "" # remove from visible transcript; thumbnails will render below.
234
+
235
+ cleaned = _MD_IMAGE_RE.sub(_md_repl, raw)
236
+
237
+ # HTML image tags (sometimes returned by models).
238
+ def _html_repl(match: re.Match) -> str:
239
+ target = str(match.group(1) or "").strip()
240
+ _add(target, "")
241
+ return ""
242
+
243
+ cleaned = _HTML_IMG_RE.sub(_html_repl, cleaned)
244
+
245
+ # Also detect bare image URLs / paths (do not remove from text).
246
+ for url in _extract_urls(cleaned, limit=20):
247
+ if _is_image_target(url):
248
+ _add(url, _short_label_for_url(url))
249
+ for path in _extract_file_paths(cleaned, limit=20):
250
+ if _is_image_target(path):
251
+ _add(path, _short_label_for_path(path))
252
+
253
+ # Dedupe + cap.
254
+ seen: set[str] = set()
255
+ uniq: List[Dict[str, str]] = []
256
+ for th in thumbs:
257
+ target = str(th.get("target") or "")
258
+ if not target or target in seen:
259
+ continue
260
+ seen.add(target)
261
+ uniq.append(th)
262
+ if len(uniq) >= int(limit):
263
+ break
264
+
265
+ # Light whitespace cleanup after removing markdown/html images.
266
+ cleaned = re.sub(r"\n{3,}", "\n\n", cleaned).strip()
267
+ return cleaned, uniq
268
+
269
+
270
+ def _images_from_links(links: Sequence[Dict[str, str]], *, limit: int = 6) -> List[Dict[str, str]]:
271
+ out: List[Dict[str, str]] = []
272
+ for link in links or []:
273
+ if not isinstance(link, dict):
274
+ continue
275
+ target = str(link.get("target") or "").strip()
276
+ if not target or not _is_image_target(target):
277
+ continue
278
+ kind, norm = _normalize_image_target(target)
279
+ label = str(link.get("label") or "").strip()
280
+ out.append({"kind": kind, "target": norm, "label": label})
281
+ if len(out) >= int(limit):
282
+ break
283
+ # Dedupe preserve order
284
+ seen: set[str] = set()
285
+ uniq: List[Dict[str, str]] = []
286
+ for item in out:
287
+ t = str(item.get("target") or "")
288
+ if not t or t in seen:
289
+ continue
290
+ seen.add(t)
291
+ uniq.append(item)
292
+ return uniq
293
+
294
+
295
+ def _build_tool_summary(tool_events: Sequence[Dict[str, Any]]) -> str:
296
+ order: List[str] = []
297
+ counts: Dict[str, int] = {}
298
+ for event in tool_events:
299
+ name = str(event.get("name") or "tool").strip() or "tool"
300
+ if name not in counts:
301
+ counts[name] = 0
302
+ order.append(name)
303
+ counts[name] += 1
304
+
305
+ parts: List[str] = []
306
+ for name in order:
307
+ count = counts.get(name, 0)
308
+ if count > 1:
309
+ parts.append(f"{name}×{count}")
310
+ else:
311
+ parts.append(name)
312
+ joined = " • ".join(parts).strip()
313
+ return f"🛠 {joined}" if joined else "🛠 tools"
314
+
315
+
316
+ def _build_tool_links(tool_events: Sequence[Dict[str, Any]], *, limit: int = 30) -> List[Dict[str, str]]:
317
+ urls: List[str] = []
318
+ paths: List[str] = []
319
+ for event in tool_events:
320
+ urls.extend([str(u) for u in (event.get("urls") or []) if isinstance(u, str)])
321
+ paths.extend([str(p) for p in (event.get("paths") or []) if isinstance(p, str)])
322
+
323
+ links: List[Dict[str, str]] = []
324
+ for url in _dedupe_preserve_order(urls):
325
+ # Treat file:// links as files.
326
+ if url.startswith("file://"):
327
+ try:
328
+ parsed = urlparse(url)
329
+ file_path = unquote(parsed.path)
330
+ if file_path:
331
+ links.append({"kind": "file", "target": file_path, "label": _short_label_for_path(file_path)})
332
+ continue
333
+ except Exception:
334
+ pass
335
+ links.append({"kind": "url", "target": url, "label": _short_label_for_url(url)})
336
+ if len(links) >= int(limit):
337
+ return links
338
+
339
+ for path in _dedupe_preserve_order(paths):
340
+ links.append({"kind": "file", "target": path, "label": _short_label_for_path(path)})
341
+ if len(links) >= int(limit):
342
+ break
343
+ return links
344
+
345
+
346
+ def build_display_messages(raw_messages: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
347
+ """Return user-visible transcript messages with attached tool summaries.
348
+
349
+ Rules:
350
+ - Drop role="system".
351
+ - Drop assistant internal tool-call placeholders (`metadata.kind=="tool_calls"` with empty content).
352
+ - Drop empty assistant messages.
353
+ - Drop role="tool" bubbles, but attach a compact summary + resource links to the
354
+ next user-visible assistant message.
355
+ """
356
+ pending_tools: List[Dict[str, Any]] = []
357
+ out: List[Dict[str, Any]] = []
358
+
359
+ for msg in raw_messages:
360
+ if not isinstance(msg, dict):
361
+ continue
362
+
363
+ role = str(msg.get("role") or "")
364
+ content = str(msg.get("content") or "")
365
+ metadata = msg.get("metadata")
366
+ meta = dict(metadata) if isinstance(metadata, dict) else {}
367
+ kind = str(meta.get("kind") or "").strip().lower()
368
+
369
+ if role == "system":
370
+ continue
371
+
372
+ if role == "tool":
373
+ name = _tool_name_from_message(msg)
374
+ urls, paths = _extract_resources_for_tool(name, content)
375
+ pending_tools.append(
376
+ {
377
+ "name": name,
378
+ "urls": urls,
379
+ "paths": paths,
380
+ }
381
+ )
382
+ continue
383
+
384
+ if role == "assistant":
385
+ if kind == "tool_calls" and not content.strip():
386
+ # Internal placeholder used to preserve tool-call metadata for providers.
387
+ continue
388
+ cleaned_content, content_images = _extract_images_from_text(content)
389
+ rendered = dict(msg)
390
+ rendered["content"] = cleaned_content
391
+ images: List[Dict[str, str]] = list(content_images)
392
+ if pending_tools:
393
+ rendered["tool_summary"] = _build_tool_summary(pending_tools)
394
+ links = _build_tool_links(pending_tools)
395
+ if links:
396
+ rendered["tool_links"] = links
397
+ images.extend(_images_from_links(links))
398
+ pending_tools = []
399
+ if images:
400
+ # Dedupe by target.
401
+ seen_targets: set[str] = set()
402
+ deduped: List[Dict[str, str]] = []
403
+ for img in images:
404
+ if not isinstance(img, dict):
405
+ continue
406
+ target = str(img.get("target") or "").strip()
407
+ if not target or target in seen_targets:
408
+ continue
409
+ seen_targets.add(target)
410
+ deduped.append({"kind": str(img.get("kind") or "url"), "target": target, "label": str(img.get("label") or "")})
411
+ if deduped:
412
+ rendered["image_thumbnails"] = deduped
413
+
414
+ if not cleaned_content.strip() and not rendered.get("tool_summary") and not rendered.get("image_thumbnails"):
415
+ # Avoid blank bubbles in the user-visible transcript.
416
+ continue
417
+ out.append(rendered)
418
+ continue
419
+
420
+ # Default: user / other roles.
421
+ out.append(dict(msg))
422
+
423
+ # Best-effort: attach any leftover tool events to the last assistant message.
424
+ if pending_tools and out:
425
+ for rendered in reversed(out):
426
+ if str(rendered.get("role") or "") != "assistant":
427
+ continue
428
+ rendered.setdefault("tool_summary", _build_tool_summary(pending_tools))
429
+ links = _build_tool_links(pending_tools)
430
+ if links:
431
+ rendered.setdefault("tool_links", links)
432
+ break
433
+
434
+ return out