sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show
  1. sentience/__init__.py +253 -0
  2. sentience/_extension_loader.py +195 -0
  3. sentience/action_executor.py +215 -0
  4. sentience/actions.py +1020 -0
  5. sentience/agent.py +1181 -0
  6. sentience/agent_config.py +46 -0
  7. sentience/agent_runtime.py +424 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +108 -0
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +343 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +427 -0
  21. sentience/base_agent.py +196 -0
  22. sentience/browser.py +1215 -0
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cli.py +130 -0
  26. sentience/cloud_tracing.py +807 -0
  27. sentience/constants.py +6 -0
  28. sentience/conversational_agent.py +543 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +188 -0
  31. sentience/extension/background.js +104 -0
  32. sentience/extension/content.js +161 -0
  33. sentience/extension/injected_api.js +914 -0
  34. sentience/extension/manifest.json +36 -0
  35. sentience/extension/pkg/sentience_core.d.ts +51 -0
  36. sentience/extension/pkg/sentience_core.js +323 -0
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  39. sentience/extension/release.json +115 -0
  40. sentience/formatting.py +15 -0
  41. sentience/generator.py +202 -0
  42. sentience/inspector.py +367 -0
  43. sentience/llm_interaction_handler.py +191 -0
  44. sentience/llm_provider.py +875 -0
  45. sentience/llm_provider_utils.py +120 -0
  46. sentience/llm_response_builder.py +153 -0
  47. sentience/models.py +846 -0
  48. sentience/ordinal.py +280 -0
  49. sentience/overlay.py +222 -0
  50. sentience/protocols.py +228 -0
  51. sentience/query.py +303 -0
  52. sentience/read.py +188 -0
  53. sentience/recorder.py +589 -0
  54. sentience/schemas/trace_v1.json +335 -0
  55. sentience/screenshot.py +100 -0
  56. sentience/sentience_methods.py +86 -0
  57. sentience/snapshot.py +706 -0
  58. sentience/snapshot_diff.py +126 -0
  59. sentience/text_search.py +262 -0
  60. sentience/trace_event_builder.py +148 -0
  61. sentience/trace_file_manager.py +197 -0
  62. sentience/trace_indexing/__init__.py +27 -0
  63. sentience/trace_indexing/index_schema.py +199 -0
  64. sentience/trace_indexing/indexer.py +414 -0
  65. sentience/tracer_factory.py +322 -0
  66. sentience/tracing.py +449 -0
  67. sentience/utils/__init__.py +40 -0
  68. sentience/utils/browser.py +46 -0
  69. sentience/utils/element.py +257 -0
  70. sentience/utils/formatting.py +59 -0
  71. sentience/utils.py +296 -0
  72. sentience/verification.py +380 -0
  73. sentience/visual_agent.py +2058 -0
  74. sentience/wait.py +139 -0
  75. sentienceapi-0.95.0.dist-info/METADATA +984 -0
  76. sentienceapi-0.95.0.dist-info/RECORD +82 -0
  77. sentienceapi-0.95.0.dist-info/WHEEL +5 -0
  78. sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
  79. sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
  80. sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
  81. sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
  82. sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,807 @@
1
+ """
2
+ Cloud trace sink with pre-signed URL upload.
3
+
4
+ Implements "Local Write, Batch Upload" pattern for enterprise cloud tracing.
5
+ """
6
+
7
+ import base64
8
+ import gzip
9
+ import json
10
+ import os
11
+ import threading
12
+ from collections.abc import Callable
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ from pathlib import Path
15
+ from typing import Any, Optional, Protocol, Union
16
+
17
+ import requests
18
+
19
+ from sentience.constants import SENTIENCE_API_URL
20
+ from sentience.models import TraceStats
21
+ from sentience.trace_file_manager import TraceFileManager
22
+ from sentience.tracing import TraceSink
23
+
24
+
25
+ class SentienceLogger(Protocol):
26
+ """Protocol for optional logger interface."""
27
+
28
+ def info(self, message: str) -> None:
29
+ """Log info message."""
30
+ ...
31
+
32
+ def warning(self, message: str) -> None:
33
+ """Log warning message."""
34
+ ...
35
+
36
+ def error(self, message: str) -> None:
37
+ """Log error message."""
38
+ ...
39
+
40
+
41
+ class CloudTraceSink(TraceSink):
42
+ """
43
+ Enterprise Cloud Sink: "Local Write, Batch Upload" pattern.
44
+
45
+ Architecture:
46
+ 1. **Local Buffer**: Writes to persistent cache directory (zero latency, non-blocking)
47
+ 2. **Pre-signed URL**: Uses secure pre-signed PUT URL from backend API
48
+ 3. **Batch Upload**: Uploads complete file on close() or at intervals
49
+ 4. **Zero Credential Exposure**: Never embeds DigitalOcean credentials in SDK
50
+ 5. **Crash Recovery**: Traces survive process crashes (stored in ~/.sentience/traces/pending/)
51
+
52
+ This design ensures:
53
+ - Fast agent performance (microseconds per emit, not milliseconds)
54
+ - Security (credentials stay on backend)
55
+ - Reliability (network issues don't crash the agent)
56
+ - Data durability (traces survive crashes and can be recovered)
57
+
58
+ Tiered Access:
59
+ - Free Tier: Falls back to JsonlTraceSink (local-only)
60
+ - Pro/Enterprise: Uploads to cloud via pre-signed URLs
61
+
62
+ Example:
63
+ >>> from sentience.cloud_tracing import CloudTraceSink
64
+ >>> from sentience.tracing import Tracer
65
+ >>> # Get upload URL from API
66
+ >>> upload_url = "https://sentience.nyc3.digitaloceanspaces.com/..."
67
+ >>> sink = CloudTraceSink(upload_url, run_id="demo")
68
+ >>> tracer = Tracer(run_id="demo", sink=sink)
69
+ >>> tracer.emit_run_start("SentienceAgent")
70
+ >>> tracer.close() # Uploads to cloud
71
+ >>> # Or non-blocking:
72
+ >>> tracer.close(blocking=False) # Returns immediately
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ upload_url: str,
78
+ run_id: str,
79
+ api_key: str | None = None,
80
+ api_url: str | None = None,
81
+ logger: SentienceLogger | None = None,
82
+ ):
83
+ """
84
+ Initialize cloud trace sink.
85
+
86
+ Args:
87
+ upload_url: Pre-signed PUT URL from Sentience API
88
+ (e.g., "https://sentience.nyc3.digitaloceanspaces.com/...")
89
+ run_id: Unique identifier for this agent run (used for persistent cache)
90
+ api_key: Sentience API key for calling /v1/traces/complete
91
+ api_url: Sentience API base URL (default: https://api.sentienceapi.com)
92
+ logger: Optional logger instance for logging file sizes and errors
93
+ """
94
+ self.upload_url = upload_url
95
+ self.run_id = run_id
96
+ self.api_key = api_key
97
+ self.api_url = api_url or SENTIENCE_API_URL
98
+ self.logger = logger
99
+
100
+ # Use persistent cache directory instead of temp file
101
+ # This ensures traces survive process crashes
102
+ cache_dir = Path.home() / ".sentience" / "traces" / "pending"
103
+ # Create directory if it doesn't exist (ensure_directory is for file paths, not dirs)
104
+ cache_dir.mkdir(parents=True, exist_ok=True)
105
+
106
+ # Persistent file (survives process crash)
107
+ self._path = cache_dir / f"{run_id}.jsonl"
108
+ self._trace_file = open(self._path, "w", encoding="utf-8")
109
+ self._closed = False
110
+ self._upload_successful = False
111
+
112
+ # File size tracking
113
+ self.trace_file_size_bytes = 0
114
+ self.screenshot_total_size_bytes = 0
115
+ self.screenshot_count = 0 # Track number of screenshots extracted
116
+ self.index_file_size_bytes = 0 # Track index file size
117
+
118
+ def emit(self, event: dict[str, Any]) -> None:
119
+ """
120
+ Write event to local persistent file (Fast, non-blocking).
121
+
122
+ Performance: ~10 microseconds per write vs ~50ms for HTTP request
123
+
124
+ Args:
125
+ event: Event dictionary from TraceEvent.to_dict()
126
+ """
127
+ if self._closed:
128
+ raise RuntimeError("CloudTraceSink is closed")
129
+
130
+ TraceFileManager.write_event(self._trace_file, event)
131
+
132
+ def close(
133
+ self,
134
+ blocking: bool = True,
135
+ on_progress: Callable[[int, int], None] | None = None,
136
+ ) -> None:
137
+ """
138
+ Upload buffered trace to cloud via pre-signed URL.
139
+
140
+ Args:
141
+ blocking: If False, returns immediately and uploads in background thread
142
+ on_progress: Optional callback(uploaded_bytes, total_bytes) for progress updates
143
+
144
+ This is the only network call - happens once at the end.
145
+ """
146
+ if self._closed:
147
+ return
148
+
149
+ self._closed = True
150
+
151
+ # Flush and sync file to disk before closing to ensure all data is written
152
+ # This is critical on CI systems where file system operations may be slower
153
+ self._trace_file.flush()
154
+ try:
155
+ # Force OS to write buffered data to disk
156
+ os.fsync(self._trace_file.fileno())
157
+ except (OSError, AttributeError):
158
+ # Some file handles don't support fsync (e.g., StringIO in tests)
159
+ # This is fine - flush() is usually sufficient
160
+ pass
161
+ self._trace_file.close()
162
+
163
+ # Ensure file exists and has content before proceeding
164
+ if not self._path.exists() or self._path.stat().st_size == 0:
165
+ # No events were emitted, nothing to upload
166
+ if self.logger:
167
+ self.logger.warning("No trace events to upload (file is empty or missing)")
168
+ return
169
+
170
+ # Generate index after closing file
171
+ self._generate_index()
172
+
173
+ if not blocking:
174
+ # Fire-and-forget background upload
175
+ thread = threading.Thread(
176
+ target=self._do_upload,
177
+ args=(on_progress,),
178
+ daemon=True,
179
+ )
180
+ thread.start()
181
+ return # Return immediately
182
+
183
+ # Blocking mode
184
+ self._do_upload(on_progress)
185
+
186
+ def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> None:
187
+ """
188
+ Internal upload method with progress tracking.
189
+
190
+ Extracts screenshots from trace events, uploads them separately,
191
+ then removes screenshot_base64 from events before uploading trace.
192
+
193
+ Args:
194
+ on_progress: Optional callback(uploaded_bytes, total_bytes) for progress updates
195
+ """
196
+ try:
197
+ # Step 1: Extract screenshots from trace events
198
+ screenshots = self._extract_screenshots_from_trace()
199
+ self.screenshot_count = len(screenshots)
200
+
201
+ # Step 2: Upload screenshots separately
202
+ if screenshots:
203
+ self._upload_screenshots(screenshots, on_progress)
204
+
205
+ # Step 3: Create cleaned trace file (without screenshot_base64)
206
+ cleaned_trace_path = self._path.with_suffix(".cleaned.jsonl")
207
+ self._create_cleaned_trace(cleaned_trace_path)
208
+
209
+ # Step 4: Read and compress cleaned trace
210
+ with open(cleaned_trace_path, "rb") as f:
211
+ trace_data = f.read()
212
+
213
+ compressed_data = gzip.compress(trace_data)
214
+ compressed_size = len(compressed_data)
215
+
216
+ # Measure trace file size
217
+ self.trace_file_size_bytes = compressed_size
218
+
219
+ # Log file sizes if logger is provided
220
+ if self.logger:
221
+ self.logger.info(
222
+ f"Trace file size: {self.trace_file_size_bytes / 1024 / 1024:.2f} MB"
223
+ )
224
+ self.logger.info(
225
+ f"Screenshot total: {self.screenshot_total_size_bytes / 1024 / 1024:.2f} MB"
226
+ )
227
+
228
+ # Report progress: start
229
+ if on_progress:
230
+ on_progress(0, compressed_size)
231
+
232
+ # Step 5: Upload cleaned trace to cloud
233
+ if self.logger:
234
+ self.logger.info(f"Uploading trace to cloud ({compressed_size} bytes)")
235
+
236
+ response = requests.put(
237
+ self.upload_url,
238
+ data=compressed_data,
239
+ headers={
240
+ "Content-Type": "application/x-gzip",
241
+ "Content-Encoding": "gzip",
242
+ },
243
+ timeout=60, # 1 minute timeout for large files
244
+ )
245
+
246
+ if response.status_code == 200:
247
+ self._upload_successful = True
248
+ print("✅ [Sentience] Trace uploaded successfully")
249
+ if self.logger:
250
+ self.logger.info("Trace uploaded successfully")
251
+
252
+ # Report progress: complete
253
+ if on_progress:
254
+ on_progress(compressed_size, compressed_size)
255
+
256
+ # Upload trace index file
257
+ self._upload_index()
258
+
259
+ # Call /v1/traces/complete to report file sizes
260
+ self._complete_trace()
261
+
262
+ # Delete files only on successful upload
263
+ self._cleanup_files()
264
+
265
+ # Clean up temporary cleaned trace file
266
+ if cleaned_trace_path.exists():
267
+ cleaned_trace_path.unlink()
268
+ else:
269
+ self._upload_successful = False
270
+ print(f"❌ [Sentience] Upload failed: HTTP {response.status_code}")
271
+ print(f" Response: {response.text[:200]}")
272
+ print(f" Local trace preserved at: {self._path}")
273
+ if self.logger:
274
+ self.logger.error(
275
+ f"Upload failed: HTTP {response.status_code}, Response: {response.text[:200]}"
276
+ )
277
+
278
+ except Exception as e:
279
+ self._upload_successful = False
280
+ print(f"❌ [Sentience] Error uploading trace: {e}")
281
+ print(f" Local trace preserved at: {self._path}")
282
+ if self.logger:
283
+ self.logger.error(f"Error uploading trace: {e}")
284
+ # Don't raise - preserve trace locally even if upload fails
285
+
286
+ def _generate_index(self) -> None:
287
+ """Generate trace index file (automatic on close)."""
288
+ try:
289
+ from .trace_indexing import write_trace_index
290
+
291
+ # Use frontend format to ensure 'step' field is present (1-based)
292
+ # Frontend derives sequence from step.step - 1, so step must be valid
293
+ index_path = Path(str(self._path).replace(".jsonl", ".index.json"))
294
+ write_trace_index(str(self._path), str(index_path), frontend_format=True)
295
+ except Exception as e:
296
+ # Non-fatal: log but don't crash
297
+ print(f"⚠️ Failed to generate trace index: {e}")
298
+ if self.logger:
299
+ self.logger.warning(f"Failed to generate trace index: {e}")
300
+
301
+ def _upload_index(self) -> None:
302
+ """
303
+ Upload trace index file to cloud storage.
304
+
305
+ Called after successful trace upload to provide fast timeline rendering.
306
+ The index file enables O(1) step lookups without parsing the entire trace.
307
+ """
308
+ # Construct index file path (same as trace file with .index.json extension)
309
+ index_path = Path(str(self._path).replace(".jsonl", ".index.json"))
310
+
311
+ if not index_path.exists():
312
+ if self.logger:
313
+ self.logger.warning("Index file not found, skipping index upload")
314
+ return
315
+
316
+ try:
317
+ # Request index upload URL from API
318
+ if not self.api_key:
319
+ # No API key - skip index upload
320
+ if self.logger:
321
+ self.logger.info("No API key provided, skipping index upload")
322
+ return
323
+
324
+ response = requests.post(
325
+ f"{self.api_url}/v1/traces/index_upload",
326
+ headers={"Authorization": f"Bearer {self.api_key}"},
327
+ json={"run_id": self.run_id},
328
+ timeout=10,
329
+ )
330
+
331
+ if response.status_code != 200:
332
+ if self.logger:
333
+ self.logger.warning(
334
+ f"Failed to get index upload URL: HTTP {response.status_code}"
335
+ )
336
+ return
337
+
338
+ upload_data = response.json()
339
+ index_upload_url = upload_data.get("upload_url")
340
+
341
+ if not index_upload_url:
342
+ if self.logger:
343
+ self.logger.warning("No upload URL in index upload response")
344
+ return
345
+
346
+ # Read index file and update trace_file.path to cloud storage path
347
+ with open(index_path, encoding="utf-8") as f:
348
+ index_json = json.load(f)
349
+
350
+ # Extract cloud storage path from trace upload URL
351
+ # upload_url format: https://...digitaloceanspaces.com/traces/{run_id}.jsonl.gz
352
+ # Extract path: traces/{run_id}.jsonl.gz
353
+ try:
354
+ from urllib.parse import urlparse
355
+
356
+ parsed_url = urlparse(self.upload_url)
357
+ # Extract path after domain (e.g., /traces/run-123.jsonl.gz -> traces/run-123.jsonl.gz)
358
+ cloud_trace_path = parsed_url.path.lstrip("/")
359
+ # Update trace_file.path in index
360
+ if "trace_file" in index_json and isinstance(index_json["trace_file"], dict):
361
+ index_json["trace_file"]["path"] = cloud_trace_path
362
+ except Exception as e:
363
+ if self.logger:
364
+ self.logger.warning(f"Failed to extract cloud path from upload URL: {e}")
365
+
366
+ # Serialize updated index to JSON
367
+ index_data = json.dumps(index_json, indent=2).encode("utf-8")
368
+ compressed_index = gzip.compress(index_data)
369
+ index_size = len(compressed_index)
370
+ self.index_file_size_bytes = index_size # Track index file size
371
+
372
+ if self.logger:
373
+ self.logger.info(f"Index file size: {index_size / 1024:.2f} KB")
374
+ self.logger.info(f"Uploading trace index ({index_size} bytes)")
375
+
376
+ # Upload index to cloud storage
377
+ index_response = requests.put(
378
+ index_upload_url,
379
+ data=compressed_index,
380
+ headers={
381
+ "Content-Type": "application/json",
382
+ "Content-Encoding": "gzip",
383
+ },
384
+ timeout=30,
385
+ )
386
+
387
+ if index_response.status_code == 200:
388
+ if self.logger:
389
+ self.logger.info("Trace index uploaded successfully")
390
+
391
+ # Delete local index file after successful upload
392
+ try:
393
+ os.remove(index_path)
394
+ except Exception:
395
+ pass # Ignore cleanup errors
396
+ else:
397
+ if self.logger:
398
+ self.logger.warning(f"Index upload failed: HTTP {index_response.status_code}")
399
+
400
+ except Exception as e:
401
+ # Non-fatal: log but don't crash
402
+ if self.logger:
403
+ self.logger.warning(f"Error uploading trace index: {e}")
404
+
405
+ def _infer_final_status_from_trace(
406
+ self, events: list[dict[str, Any]], run_end: dict[str, Any] | None
407
+ ) -> str:
408
+ """
409
+ Infer final status from trace events by reading the trace file.
410
+
411
+ Returns:
412
+ Final status: "success", "failure", "partial", or "unknown"
413
+ """
414
+ try:
415
+ # Read trace file to analyze events
416
+ with open(self._path, encoding="utf-8") as f:
417
+ events = []
418
+ for line in f:
419
+ line = line.strip()
420
+ if not line:
421
+ continue
422
+ try:
423
+ event = json.loads(line)
424
+ events.append(event)
425
+ except json.JSONDecodeError:
426
+ continue
427
+
428
+ if not events:
429
+ return "unknown"
430
+
431
+ # Check for run_end event with status
432
+ for event in reversed(events):
433
+ if event.get("type") == "run_end":
434
+ status = event.get("data", {}).get("status")
435
+ if status in ("success", "failure", "partial", "unknown"):
436
+ return status
437
+
438
+ # Infer from error events
439
+ has_errors = any(e.get("type") == "error" for e in events)
440
+ if has_errors:
441
+ # Check if there are successful steps too (partial success)
442
+ step_ends = [e for e in events if e.get("type") == "step_end"]
443
+ if step_ends:
444
+ return "partial"
445
+ return "failure"
446
+
447
+ # If we have step_end events and no errors, likely success
448
+ step_ends = [e for e in events if e.get("type") == "step_end"]
449
+ if step_ends:
450
+ return "success"
451
+
452
+ return "unknown"
453
+
454
+ except Exception:
455
+ # If we can't read the trace, default to unknown
456
+ return "unknown"
457
+
458
+ def _extract_stats_from_trace(self) -> TraceStats:
459
+ """
460
+ Extract execution statistics from trace file.
461
+
462
+ Returns:
463
+ TraceStats with stats fields for /v1/traces/complete
464
+ """
465
+ try:
466
+ # Check if file exists before reading
467
+ if not self._path.exists():
468
+ if self.logger:
469
+ self.logger.warning(f"Trace file not found: {self._path}")
470
+ return TraceStats(
471
+ total_steps=0,
472
+ total_events=0,
473
+ duration_ms=None,
474
+ final_status="unknown",
475
+ started_at=None,
476
+ ended_at=None,
477
+ )
478
+
479
+ # Read trace file to extract stats
480
+ events = TraceFileManager.read_events(self._path)
481
+ # Use TraceFileManager to extract stats (with custom status inference)
482
+ return TraceFileManager.extract_stats(
483
+ events, infer_status_func=self._infer_final_status_from_trace
484
+ )
485
+ except Exception as e:
486
+ if self.logger:
487
+ self.logger.warning(f"Error extracting stats from trace: {e}")
488
+ return TraceStats(
489
+ total_steps=0,
490
+ total_events=0,
491
+ duration_ms=None,
492
+ final_status="unknown",
493
+ started_at=None,
494
+ ended_at=None,
495
+ )
496
+
497
+ def _complete_trace(self) -> None:
498
+ """
499
+ Call /v1/traces/complete to report file sizes and stats to gateway.
500
+
501
+ This is a best-effort call - failures are logged but don't affect upload success.
502
+ """
503
+ if not self.api_key:
504
+ # No API key - skip complete call
505
+ return
506
+
507
+ try:
508
+ # Extract stats from trace file
509
+ stats = self._extract_stats_from_trace()
510
+
511
+ # Build completion payload with stats and file size fields
512
+ completion_payload = {
513
+ **stats.model_dump(), # Convert TraceStats to dict
514
+ "trace_file_size_bytes": self.trace_file_size_bytes,
515
+ "screenshot_total_size_bytes": self.screenshot_total_size_bytes,
516
+ "screenshot_count": self.screenshot_count,
517
+ "index_file_size_bytes": self.index_file_size_bytes,
518
+ }
519
+
520
+ response = requests.post(
521
+ f"{self.api_url}/v1/traces/complete",
522
+ headers={"Authorization": f"Bearer {self.api_key}"},
523
+ json={
524
+ "run_id": self.run_id,
525
+ "stats": completion_payload,
526
+ },
527
+ timeout=10,
528
+ )
529
+
530
+ if response.status_code == 200:
531
+ if self.logger:
532
+ self.logger.info("Trace completion reported to gateway")
533
+ else:
534
+ if self.logger:
535
+ self.logger.warning(
536
+ f"Failed to report trace completion: HTTP {response.status_code}"
537
+ )
538
+
539
+ except Exception as e:
540
+ # Best-effort - log but don't fail
541
+ if self.logger:
542
+ self.logger.warning(f"Error reporting trace completion: {e}")
543
+
544
+ def _extract_screenshots_from_trace(self) -> dict[int, dict[str, Any]]:
545
+ """
546
+ Extract screenshots from trace events.
547
+
548
+ Returns:
549
+ dict mapping sequence number to screenshot data:
550
+ {seq: {"base64": str, "format": str, "step_id": str}}
551
+ """
552
+ screenshots: dict[int, dict[str, Any]] = {}
553
+ sequence = 0
554
+
555
+ try:
556
+ # Check if file exists before reading
557
+ if not self._path.exists():
558
+ if self.logger:
559
+ self.logger.warning(f"Trace file not found: {self._path}")
560
+ return screenshots
561
+
562
+ events = TraceFileManager.read_events(self._path)
563
+ for event in events:
564
+ # Check if this is a snapshot event with screenshot
565
+ if event.get("type") == "snapshot":
566
+ data = event.get("data", {})
567
+ screenshot_base64 = data.get("screenshot_base64")
568
+
569
+ if screenshot_base64:
570
+ sequence += 1
571
+ screenshots[sequence] = {
572
+ "base64": screenshot_base64,
573
+ "format": data.get("screenshot_format", "jpeg"),
574
+ "step_id": event.get("step_id"),
575
+ }
576
+ except Exception as e:
577
+ if self.logger:
578
+ self.logger.error(f"Error extracting screenshots: {e}")
579
+
580
+ return screenshots
581
+
582
+ def _create_cleaned_trace(self, output_path: Path) -> None:
583
+ """
584
+ Create trace file without screenshot_base64 fields.
585
+
586
+ Args:
587
+ output_path: Path to write cleaned trace file
588
+ """
589
+ try:
590
+ # Check if file exists before reading
591
+ if not self._path.exists():
592
+ if self.logger:
593
+ self.logger.warning(f"Trace file not found: {self._path}")
594
+ # Create empty cleaned trace file
595
+ output_path.parent.mkdir(parents=True, exist_ok=True)
596
+ output_path.touch()
597
+ return
598
+
599
+ events = TraceFileManager.read_events(self._path)
600
+ with open(output_path, "w", encoding="utf-8") as outfile:
601
+ for event in events:
602
+ # Remove screenshot_base64 from snapshot events
603
+ if event.get("type") == "snapshot":
604
+ data = event.get("data", {})
605
+ if "screenshot_base64" in data:
606
+ # Create copy without screenshot fields
607
+ cleaned_data = {
608
+ k: v
609
+ for k, v in data.items()
610
+ if k not in ("screenshot_base64", "screenshot_format")
611
+ }
612
+ event["data"] = cleaned_data
613
+
614
+ # Write cleaned event
615
+ TraceFileManager.write_event(outfile, event)
616
+ except Exception as e:
617
+ if self.logger:
618
+ self.logger.error(f"Error creating cleaned trace: {e}")
619
+ raise
620
+
621
+ def _request_screenshot_urls(self, sequences: list[int]) -> dict[int, str]:
622
+ """
623
+ Request pre-signed upload URLs for screenshots from gateway.
624
+
625
+ Args:
626
+ sequences: List of screenshot sequence numbers
627
+
628
+ Returns:
629
+ dict mapping sequence number to upload URL
630
+ """
631
+ if not self.api_key or not sequences:
632
+ return {}
633
+
634
+ try:
635
+ response = requests.post(
636
+ f"{self.api_url}/v1/screenshots/init",
637
+ headers={"Authorization": f"Bearer {self.api_key}"},
638
+ json={
639
+ "run_id": self.run_id,
640
+ "sequences": sequences,
641
+ },
642
+ timeout=10,
643
+ )
644
+
645
+ if response.status_code == 200:
646
+ data = response.json()
647
+ # Gateway returns sequences as strings in JSON, convert to int keys
648
+ upload_urls = data.get("upload_urls", {})
649
+ result = {int(k): v for k, v in upload_urls.items()}
650
+ if self.logger:
651
+ self.logger.info(f"Received {len(result)} screenshot upload URLs")
652
+ return result
653
+ else:
654
+ error_msg = f"Failed to get screenshot URLs: HTTP {response.status_code}"
655
+ if self.logger:
656
+ # Try to get error details
657
+ try:
658
+ error_data = response.json()
659
+ error_detail = error_data.get("error") or error_data.get("message", "")
660
+ if error_detail:
661
+ self.logger.warning(f"{error_msg}: {error_detail}")
662
+ else:
663
+ self.logger.warning(f"{error_msg}: {response.text[:200]}")
664
+ except Exception:
665
+ self.logger.warning(f"{error_msg}: {response.text[:200]}")
666
+ return {}
667
+ except Exception as e:
668
+ error_msg = f"Error requesting screenshot URLs: {e}"
669
+ if self.logger:
670
+ self.logger.warning(error_msg)
671
+ return {}
672
+
673
+ def _upload_screenshots(
674
+ self,
675
+ screenshots: dict[int, dict[str, Any]],
676
+ on_progress: Callable[[int, int], None] | None = None,
677
+ ) -> None:
678
+ """
679
+ Upload screenshots extracted from trace events.
680
+
681
+ Steps:
682
+ 1. Request pre-signed URLs from gateway (/v1/screenshots/init)
683
+ 2. Decode base64 to image bytes
684
+ 3. Upload screenshots in parallel (10 concurrent workers)
685
+ 4. Track upload progress
686
+
687
+ Args:
688
+ screenshots: dict mapping sequence to screenshot data
689
+ on_progress: Optional callback(uploaded_count, total_count)
690
+ """
691
+ if not screenshots:
692
+ return
693
+
694
+ # 1. Request pre-signed URLs from gateway
695
+ sequences = sorted(screenshots.keys())
696
+ if self.logger:
697
+ self.logger.info(f"Requesting upload URLs for {len(sequences)} screenshot(s)")
698
+ upload_urls = self._request_screenshot_urls(sequences)
699
+
700
+ if not upload_urls:
701
+ if self.logger:
702
+ self.logger.warning(
703
+ "No screenshot upload URLs received, skipping upload. "
704
+ "This may indicate API key permission issue, gateway error, or network problem."
705
+ )
706
+ return
707
+
708
+ # 2. Upload screenshots in parallel
709
+ uploaded_count = 0
710
+ total_count = len(upload_urls)
711
+ failed_sequences: list[int] = []
712
+
713
+ def upload_one(seq: int, url: str) -> bool:
714
+ """Upload a single screenshot. Returns True if successful."""
715
+ try:
716
+ screenshot_data = screenshots[seq]
717
+ base64_str = screenshot_data["base64"]
718
+ format_str = screenshot_data.get("format", "jpeg")
719
+
720
+ # Decode base64 to image bytes
721
+ image_bytes = base64.b64decode(base64_str)
722
+ image_size = len(image_bytes)
723
+
724
+ # Update total size
725
+ self.screenshot_total_size_bytes += image_size
726
+
727
+ # Upload to pre-signed URL
728
+ response = requests.put(
729
+ url,
730
+ data=image_bytes, # Binary image data
731
+ headers={
732
+ "Content-Type": f"image/{format_str}",
733
+ },
734
+ timeout=30, # 30 second timeout per screenshot
735
+ )
736
+
737
+ if response.status_code == 200:
738
+ if self.logger:
739
+ self.logger.info(
740
+ f"Screenshot {seq} uploaded successfully ({image_size / 1024:.1f} KB)"
741
+ )
742
+ return True
743
+ else:
744
+ error_msg = f"Screenshot {seq} upload failed: HTTP {response.status_code}"
745
+ if self.logger:
746
+ try:
747
+ error_detail = response.text[:200]
748
+ if error_detail:
749
+ self.logger.warning(f"{error_msg}: {error_detail}")
750
+ else:
751
+ self.logger.warning(error_msg)
752
+ except Exception:
753
+ self.logger.warning(error_msg)
754
+ return False
755
+ except Exception as e:
756
+ error_msg = f"Screenshot {seq} upload error: {e}"
757
+ if self.logger:
758
+ self.logger.warning(error_msg)
759
+ return False
760
+
761
+ # Upload in parallel (max 10 concurrent)
762
+ with ThreadPoolExecutor(max_workers=10) as executor:
763
+ futures = {
764
+ executor.submit(upload_one, seq, url): seq for seq, url in upload_urls.items()
765
+ }
766
+
767
+ for future in as_completed(futures):
768
+ seq = futures[future]
769
+ if future.result():
770
+ uploaded_count += 1
771
+ if on_progress:
772
+ on_progress(uploaded_count, total_count)
773
+ else:
774
+ failed_sequences.append(seq)
775
+
776
+ # 3. Report results
777
+ if uploaded_count == total_count:
778
+ total_size_mb = self.screenshot_total_size_bytes / 1024 / 1024
779
+ if self.logger:
780
+ self.logger.info(
781
+ f"All {total_count} screenshots uploaded successfully "
782
+ f"(total size: {total_size_mb:.2f} MB)"
783
+ )
784
+ else:
785
+ if self.logger:
786
+ self.logger.warning(
787
+ f"Uploaded {uploaded_count}/{total_count} screenshots. "
788
+ f"Failed sequences: {failed_sequences if failed_sequences else 'none'}"
789
+ )
790
+
791
+ def _cleanup_files(self) -> None:
792
+ """Delete local files after successful upload."""
793
+ # Delete trace file
794
+ if os.path.exists(self._path):
795
+ try:
796
+ os.remove(self._path)
797
+ except Exception:
798
+ pass # Ignore cleanup errors
799
+
800
+ def __enter__(self):
801
+ """Context manager support."""
802
+ return self
803
+
804
+ def __exit__(self, exc_type, exc_val, exc_tb):
805
+ """Context manager cleanup."""
806
+ self.close()
807
+ return False