sentienceapi 0.90.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. sentience/__init__.py +153 -0
  2. sentience/actions.py +439 -0
  3. sentience/agent.py +687 -0
  4. sentience/agent_config.py +43 -0
  5. sentience/base_agent.py +101 -0
  6. sentience/browser.py +409 -0
  7. sentience/cli.py +130 -0
  8. sentience/cloud_tracing.py +292 -0
  9. sentience/conversational_agent.py +509 -0
  10. sentience/expect.py +92 -0
  11. sentience/extension/background.js +233 -0
  12. sentience/extension/content.js +298 -0
  13. sentience/extension/injected_api.js +1473 -0
  14. sentience/extension/manifest.json +36 -0
  15. sentience/extension/pkg/sentience_core.d.ts +51 -0
  16. sentience/extension/pkg/sentience_core.js +529 -0
  17. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  18. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  19. sentience/extension/release.json +115 -0
  20. sentience/extension/test-content.js +4 -0
  21. sentience/formatting.py +59 -0
  22. sentience/generator.py +202 -0
  23. sentience/inspector.py +185 -0
  24. sentience/llm_provider.py +431 -0
  25. sentience/models.py +406 -0
  26. sentience/overlay.py +115 -0
  27. sentience/query.py +303 -0
  28. sentience/read.py +96 -0
  29. sentience/recorder.py +369 -0
  30. sentience/schemas/trace_v1.json +216 -0
  31. sentience/screenshot.py +54 -0
  32. sentience/snapshot.py +282 -0
  33. sentience/text_search.py +150 -0
  34. sentience/trace_indexing/__init__.py +27 -0
  35. sentience/trace_indexing/index_schema.py +111 -0
  36. sentience/trace_indexing/indexer.py +363 -0
  37. sentience/tracer_factory.py +211 -0
  38. sentience/tracing.py +285 -0
  39. sentience/utils.py +296 -0
  40. sentience/wait.py +73 -0
  41. sentienceapi-0.90.11.dist-info/METADATA +878 -0
  42. sentienceapi-0.90.11.dist-info/RECORD +46 -0
  43. sentienceapi-0.90.11.dist-info/WHEEL +5 -0
  44. sentienceapi-0.90.11.dist-info/entry_points.txt +2 -0
  45. sentienceapi-0.90.11.dist-info/licenses/LICENSE.md +43 -0
  46. sentienceapi-0.90.11.dist-info/top_level.txt +1 -0
@@ -0,0 +1,363 @@
1
+ """
2
+ Trace indexing for fast timeline rendering and step drill-down.
3
+ """
4
+
5
+ import hashlib
6
+ import json
7
+ import os
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List
11
+
12
+ from .index_schema import (
13
+ TraceIndex,
14
+ StepIndex,
15
+ TraceSummary,
16
+ TraceFileInfo,
17
+ SnapshotInfo,
18
+ ActionInfo,
19
+ StepCounters,
20
+ )
21
+
22
+
23
+ def _normalize_text(text: str | None, max_len: int = 80) -> str:
24
+ """Normalize text for digest: trim, collapse whitespace, lowercase, cap length."""
25
+ if not text:
26
+ return ""
27
+ # Trim and collapse whitespace
28
+ normalized = " ".join(text.split())
29
+ # Lowercase
30
+ normalized = normalized.lower()
31
+ # Cap length
32
+ if len(normalized) > max_len:
33
+ normalized = normalized[:max_len]
34
+ return normalized
35
+
36
+
37
+ def _round_bbox(bbox: Dict[str, float], precision: int = 2) -> Dict[str, int]:
38
+ """Round bbox coordinates to reduce noise (default: 2px precision)."""
39
+ return {
40
+ "x": round(bbox.get("x", 0) / precision) * precision,
41
+ "y": round(bbox.get("y", 0) / precision) * precision,
42
+ "width": round(bbox.get("width", 0) / precision) * precision,
43
+ "height": round(bbox.get("height", 0) / precision) * precision,
44
+ }
45
+
46
+
47
+ def _compute_snapshot_digest(snapshot_data: Dict[str, Any]) -> str:
48
+ """
49
+ Compute stable digest of snapshot for diffing.
50
+
51
+ Includes: url, viewport, canonicalized elements (id, role, text_norm, bbox_rounded).
52
+ Excludes: importance, style fields, transient attributes.
53
+ """
54
+ url = snapshot_data.get("url", "")
55
+ viewport = snapshot_data.get("viewport", {})
56
+ elements = snapshot_data.get("elements", [])
57
+
58
+ # Canonicalize elements
59
+ canonical_elements = []
60
+ for elem in elements:
61
+ canonical_elem = {
62
+ "id": elem.get("id"),
63
+ "role": elem.get("role", ""),
64
+ "text_norm": _normalize_text(elem.get("text")),
65
+ "bbox": _round_bbox(
66
+ elem.get("bbox", {"x": 0, "y": 0, "width": 0, "height": 0})
67
+ ),
68
+ "is_primary": elem.get("is_primary", False),
69
+ "is_clickable": elem.get("is_clickable", False),
70
+ }
71
+ canonical_elements.append(canonical_elem)
72
+
73
+ # Sort by element id for determinism
74
+ canonical_elements.sort(key=lambda e: e.get("id", 0))
75
+
76
+ # Build canonical object
77
+ canonical = {
78
+ "url": url,
79
+ "viewport": {
80
+ "width": viewport.get("width", 0),
81
+ "height": viewport.get("height", 0),
82
+ },
83
+ "elements": canonical_elements,
84
+ }
85
+
86
+ # Hash
87
+ canonical_json = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
88
+ digest = hashlib.sha256(canonical_json.encode("utf-8")).hexdigest()
89
+ return f"sha256:{digest}"
90
+
91
+
92
+ def _compute_action_digest(action_data: Dict[str, Any]) -> str:
93
+ """
94
+ Compute digest of action args for privacy + determinism.
95
+
96
+ For TYPE: includes text_len + text_sha256 (not raw text)
97
+ For CLICK/PRESS: includes only non-sensitive fields
98
+ """
99
+ action_type = action_data.get("type", "")
100
+ target_id = action_data.get("target_element_id")
101
+
102
+ canonical = {
103
+ "type": action_type,
104
+ "target_element_id": target_id,
105
+ }
106
+
107
+ # Type-specific canonicalization
108
+ if action_type == "TYPE":
109
+ text = action_data.get("text", "")
110
+ canonical["text_len"] = len(text)
111
+ canonical["text_sha256"] = hashlib.sha256(text.encode("utf-8")).hexdigest()
112
+ elif action_type == "PRESS":
113
+ canonical["key"] = action_data.get("key", "")
114
+ # CLICK has no extra args
115
+
116
+ # Hash
117
+ canonical_json = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
118
+ digest = hashlib.sha256(canonical_json.encode("utf-8")).hexdigest()
119
+ return f"sha256:{digest}"
120
+
121
+
122
+ def _compute_file_sha256(file_path: str) -> str:
123
+ """Compute SHA256 hash of entire file."""
124
+ sha256 = hashlib.sha256()
125
+ with open(file_path, "rb") as f:
126
+ while chunk := f.read(8192):
127
+ sha256.update(chunk)
128
+ return sha256.hexdigest()
129
+
130
+
131
+ def build_trace_index(trace_path: str) -> TraceIndex:
132
+ """
133
+ Build trace index from JSONL file in single streaming pass.
134
+
135
+ Args:
136
+ trace_path: Path to trace JSONL file
137
+
138
+ Returns:
139
+ Complete TraceIndex object
140
+ """
141
+ trace_path_obj = Path(trace_path)
142
+ if not trace_path_obj.exists():
143
+ raise FileNotFoundError(f"Trace file not found: {trace_path}")
144
+
145
+ # Extract run_id from filename
146
+ run_id = trace_path_obj.stem
147
+
148
+ # Initialize summary
149
+ first_ts = ""
150
+ last_ts = ""
151
+ event_count = 0
152
+ error_count = 0
153
+ final_url = None
154
+
155
+ steps_by_id: Dict[str, StepIndex] = {}
156
+ step_order: List[str] = [] # Track order of first appearance
157
+
158
+ # Stream through file, tracking byte offsets
159
+ with open(trace_path, "rb") as f:
160
+ byte_offset = 0
161
+
162
+ for line_bytes in f:
163
+ line_len = len(line_bytes)
164
+
165
+ try:
166
+ event = json.loads(line_bytes.decode("utf-8"))
167
+ except json.JSONDecodeError:
168
+ # Skip malformed lines
169
+ byte_offset += line_len
170
+ continue
171
+
172
+ # Extract event metadata
173
+ event_type = event.get("type", "")
174
+ ts = event.get("ts") or event.get("timestamp", "")
175
+ step_id = event.get("step_id", "step-0") # Default synthetic step
176
+ data = event.get("data", {})
177
+
178
+ # Update summary
179
+ event_count += 1
180
+ if not first_ts:
181
+ first_ts = ts
182
+ last_ts = ts
183
+
184
+ if event_type == "error":
185
+ error_count += 1
186
+
187
+ # Initialize step if first time seeing this step_id
188
+ if step_id not in steps_by_id:
189
+ step_order.append(step_id)
190
+ steps_by_id[step_id] = StepIndex(
191
+ step_index=len(step_order),
192
+ step_id=step_id,
193
+ goal=None,
194
+ status="partial",
195
+ ts_start=ts,
196
+ ts_end=ts,
197
+ offset_start=byte_offset,
198
+ offset_end=byte_offset + line_len,
199
+ url_before=None,
200
+ url_after=None,
201
+ snapshot_before=SnapshotInfo(),
202
+ snapshot_after=SnapshotInfo(),
203
+ action=ActionInfo(),
204
+ counters=StepCounters(),
205
+ )
206
+
207
+ step = steps_by_id[step_id]
208
+
209
+ # Update step metadata
210
+ step.ts_end = ts
211
+ step.offset_end = byte_offset + line_len
212
+ step.counters.events += 1
213
+
214
+ # Handle specific event types
215
+ if event_type == "step_start":
216
+ step.goal = data.get("goal")
217
+ step.url_before = data.get("pre_url")
218
+
219
+ elif event_type == "snapshot":
220
+ snapshot_id = data.get("snapshot_id")
221
+ url = data.get("url")
222
+ digest = _compute_snapshot_digest(data)
223
+
224
+ # First snapshot = before, last snapshot = after
225
+ if step.snapshot_before.snapshot_id is None:
226
+ step.snapshot_before = SnapshotInfo(
227
+ snapshot_id=snapshot_id, digest=digest, url=url
228
+ )
229
+ step.url_before = step.url_before or url
230
+
231
+ step.snapshot_after = SnapshotInfo(
232
+ snapshot_id=snapshot_id, digest=digest, url=url
233
+ )
234
+ step.url_after = url
235
+ step.counters.snapshots += 1
236
+ final_url = url
237
+
238
+ elif event_type == "action":
239
+ step.action = ActionInfo(
240
+ type=data.get("type"),
241
+ target_element_id=data.get("target_element_id"),
242
+ args_digest=_compute_action_digest(data),
243
+ success=data.get("success", True),
244
+ )
245
+ step.counters.actions += 1
246
+
247
+ elif event_type == "llm_response":
248
+ step.counters.llm_calls += 1
249
+
250
+ elif event_type == "error":
251
+ step.status = "error"
252
+
253
+ elif event_type == "step_end":
254
+ if step.status != "error":
255
+ step.status = "ok"
256
+
257
+ byte_offset += line_len
258
+
259
+ # Build summary
260
+ summary = TraceSummary(
261
+ first_ts=first_ts,
262
+ last_ts=last_ts,
263
+ event_count=event_count,
264
+ step_count=len(steps_by_id),
265
+ error_count=error_count,
266
+ final_url=final_url,
267
+ )
268
+
269
+ # Build steps list in order
270
+ steps_list = [steps_by_id[sid] for sid in step_order]
271
+
272
+ # Build trace file info
273
+ trace_file = TraceFileInfo(
274
+ path=str(trace_path),
275
+ size_bytes=os.path.getsize(trace_path),
276
+ sha256=_compute_file_sha256(str(trace_path)),
277
+ )
278
+
279
+ # Build final index
280
+ index = TraceIndex(
281
+ version=1,
282
+ run_id=run_id,
283
+ created_at=datetime.now(timezone.utc).isoformat(),
284
+ trace_file=trace_file,
285
+ summary=summary,
286
+ steps=steps_list,
287
+ )
288
+
289
+ return index
290
+
291
+
292
+ def write_trace_index(trace_path: str, index_path: str | None = None) -> str:
293
+ """
294
+ Build index and write to file.
295
+
296
+ Args:
297
+ trace_path: Path to trace JSONL file
298
+ index_path: Optional custom path for index file (default: trace_path with .index.json)
299
+
300
+ Returns:
301
+ Path to written index file
302
+ """
303
+ if index_path is None:
304
+ index_path = str(Path(trace_path).with_suffix("")) + ".index.json"
305
+
306
+ index = build_trace_index(trace_path)
307
+
308
+ with open(index_path, "w") as f:
309
+ json.dump(index.to_dict(), f, indent=2)
310
+
311
+ return index_path
312
+
313
+
314
+ def read_step_events(
315
+ trace_path: str, offset_start: int, offset_end: int
316
+ ) -> List[Dict[str, Any]]:
317
+ """
318
+ Read events for a specific step using byte offsets from index.
319
+
320
+ Args:
321
+ trace_path: Path to trace JSONL file
322
+ offset_start: Byte offset where step starts
323
+ offset_end: Byte offset where step ends
324
+
325
+ Returns:
326
+ List of event dictionaries for the step
327
+ """
328
+ events = []
329
+
330
+ with open(trace_path, "rb") as f:
331
+ f.seek(offset_start)
332
+ bytes_to_read = offset_end - offset_start
333
+ chunk = f.read(bytes_to_read)
334
+
335
+ # Parse lines
336
+ for line_bytes in chunk.split(b"\n"):
337
+ if not line_bytes:
338
+ continue
339
+ try:
340
+ event = json.loads(line_bytes.decode("utf-8"))
341
+ events.append(event)
342
+ except json.JSONDecodeError:
343
+ continue
344
+
345
+ return events
346
+
347
+
348
+ # CLI entrypoint
349
+ def main():
350
+ """CLI tool for building trace index."""
351
+ import sys
352
+
353
+ if len(sys.argv) < 2:
354
+ print("Usage: python -m sentience.tracing.indexer <trace.jsonl>")
355
+ sys.exit(1)
356
+
357
+ trace_path = sys.argv[1]
358
+ index_path = write_trace_index(trace_path)
359
+ print(f"✅ Index written to: {index_path}")
360
+
361
+
362
+ if __name__ == "__main__":
363
+ main()
@@ -0,0 +1,211 @@
1
+ """
2
+ Tracer factory with automatic tier detection.
3
+
4
+ Provides convenient factory function for creating tracers with cloud upload support.
5
+ """
6
+
7
+ import gzip
8
+ import os
9
+ import uuid
10
+ from pathlib import Path
11
+
12
+ import requests
13
+
14
+ from sentience.cloud_tracing import CloudTraceSink, SentienceLogger
15
+ from sentience.tracing import JsonlTraceSink, Tracer
16
+
17
+ # Sentience API base URL (constant)
18
+ SENTIENCE_API_URL = "https://api.sentienceapi.com"
19
+
20
+
21
+ def create_tracer(
22
+ api_key: str | None = None,
23
+ run_id: str | None = None,
24
+ api_url: str | None = None,
25
+ logger: SentienceLogger | None = None,
26
+ upload_trace: bool = False,
27
+ ) -> Tracer:
28
+ """
29
+ Create tracer with automatic tier detection.
30
+
31
+ Tier Detection:
32
+ - If api_key is provided: Try to initialize CloudTraceSink (Pro/Enterprise)
33
+ - If cloud init fails or no api_key: Fall back to JsonlTraceSink (Free tier)
34
+
35
+ Args:
36
+ api_key: Sentience API key (e.g., "sk_pro_xxxxx")
37
+ - Free tier: None or empty
38
+ - Pro/Enterprise: Valid API key
39
+ run_id: Unique identifier for this agent run. If not provided, generates UUID.
40
+ api_url: Sentience API base URL (default: https://api.sentienceapi.com)
41
+ logger: Optional logger instance for logging file sizes and errors
42
+ upload_trace: Enable cloud trace upload (default: False). When True and api_key
43
+ is provided, traces will be uploaded to cloud. When False, traces
44
+ are saved locally only.
45
+
46
+ Returns:
47
+ Tracer configured with appropriate sink
48
+
49
+ Example:
50
+ >>> # Pro tier user
51
+ >>> tracer = create_tracer(api_key="sk_pro_xyz", run_id="demo")
52
+ >>> # Returns: Tracer with CloudTraceSink
53
+ >>>
54
+ >>> # Free tier user
55
+ >>> tracer = create_tracer(run_id="demo")
56
+ >>> # Returns: Tracer with JsonlTraceSink (local-only)
57
+ >>>
58
+ >>> # Use with agent
59
+ >>> agent = SentienceAgent(browser, llm, tracer=tracer)
60
+ >>> agent.act("Click search")
61
+ >>> tracer.close() # Uploads to cloud if Pro tier
62
+ """
63
+ if run_id is None:
64
+ run_id = str(uuid.uuid4())
65
+
66
+ if api_url is None:
67
+ api_url = SENTIENCE_API_URL
68
+
69
+ # 0. Check for orphaned traces from previous crashes (if api_key provided and upload enabled)
70
+ if api_key and upload_trace:
71
+ _recover_orphaned_traces(api_key, api_url)
72
+
73
+ # 1. Try to initialize Cloud Sink (Pro/Enterprise tier) if upload enabled
74
+ if api_key and upload_trace:
75
+ try:
76
+ # Request pre-signed upload URL from backend
77
+ response = requests.post(
78
+ f"{api_url}/v1/traces/init",
79
+ headers={"Authorization": f"Bearer {api_key}"},
80
+ json={"run_id": run_id},
81
+ timeout=10,
82
+ )
83
+
84
+ if response.status_code == 200:
85
+ data = response.json()
86
+ upload_url = data.get("upload_url")
87
+
88
+ if upload_url:
89
+ print("☁️ [Sentience] Cloud tracing enabled (Pro tier)")
90
+ return Tracer(
91
+ run_id=run_id,
92
+ sink=CloudTraceSink(
93
+ upload_url=upload_url,
94
+ run_id=run_id,
95
+ api_key=api_key,
96
+ api_url=api_url,
97
+ logger=logger,
98
+ ),
99
+ )
100
+ else:
101
+ print("⚠️ [Sentience] Cloud init response missing upload_url")
102
+ print(" Falling back to local-only tracing")
103
+
104
+ elif response.status_code == 403:
105
+ print("⚠️ [Sentience] Cloud tracing requires Pro tier")
106
+ print(" Falling back to local-only tracing")
107
+ else:
108
+ print(f"⚠️ [Sentience] Cloud init failed: HTTP {response.status_code}")
109
+ print(" Falling back to local-only tracing")
110
+
111
+ except requests.exceptions.Timeout:
112
+ print("⚠️ [Sentience] Cloud init timeout")
113
+ print(" Falling back to local-only tracing")
114
+ except requests.exceptions.ConnectionError:
115
+ print("⚠️ [Sentience] Cloud init connection error")
116
+ print(" Falling back to local-only tracing")
117
+ except Exception as e:
118
+ print(f"⚠️ [Sentience] Cloud init error: {e}")
119
+ print(" Falling back to local-only tracing")
120
+
121
+ # 2. Fallback to Local Sink (Free tier / Offline mode)
122
+ traces_dir = Path("traces")
123
+ traces_dir.mkdir(exist_ok=True)
124
+
125
+ local_path = traces_dir / f"{run_id}.jsonl"
126
+ print(f"💾 [Sentience] Local tracing: {local_path}")
127
+
128
+ return Tracer(run_id=run_id, sink=JsonlTraceSink(str(local_path)))
129
+
130
+
131
+ def _recover_orphaned_traces(api_key: str, api_url: str = SENTIENCE_API_URL) -> None:
132
+ """
133
+ Attempt to upload orphaned traces from previous crashed runs.
134
+
135
+ Scans ~/.sentience/traces/pending/ for un-uploaded trace files and
136
+ attempts to upload them using the provided API key.
137
+
138
+ Args:
139
+ api_key: Sentience API key for authentication
140
+ api_url: Sentience API base URL (defaults to SENTIENCE_API_URL)
141
+ """
142
+ pending_dir = Path.home() / ".sentience" / "traces" / "pending"
143
+
144
+ if not pending_dir.exists():
145
+ return
146
+
147
+ orphaned = list(pending_dir.glob("*.jsonl"))
148
+
149
+ if not orphaned:
150
+ return
151
+
152
+ print(f"⚠️ [Sentience] Found {len(orphaned)} un-uploaded trace(s) from previous runs")
153
+ print(" Attempting to upload now...")
154
+
155
+ for trace_file in orphaned:
156
+ try:
157
+ # Extract run_id from filename (format: {run_id}.jsonl)
158
+ run_id = trace_file.stem
159
+
160
+ # Request new upload URL for this run_id
161
+ response = requests.post(
162
+ f"{api_url}/v1/traces/init",
163
+ headers={"Authorization": f"Bearer {api_key}"},
164
+ json={"run_id": run_id},
165
+ timeout=10,
166
+ )
167
+
168
+ if response.status_code != 200:
169
+ print(f"❌ Failed to get upload URL for {run_id}: HTTP {response.status_code}")
170
+ continue
171
+
172
+ data = response.json()
173
+ upload_url = data.get("upload_url")
174
+
175
+ if not upload_url:
176
+ print(f"❌ Upload URL missing for {run_id}")
177
+ continue
178
+
179
+ # Read and compress trace file
180
+ with open(trace_file, "rb") as f:
181
+ trace_data = f.read()
182
+
183
+ compressed_data = gzip.compress(trace_data)
184
+
185
+ # Upload to cloud
186
+ upload_response = requests.put(
187
+ upload_url,
188
+ data=compressed_data,
189
+ headers={
190
+ "Content-Type": "application/x-gzip",
191
+ "Content-Encoding": "gzip",
192
+ },
193
+ timeout=60,
194
+ )
195
+
196
+ if upload_response.status_code == 200:
197
+ print(f"✅ Uploaded orphaned trace: {run_id}")
198
+ # Delete file on successful upload
199
+ try:
200
+ os.remove(trace_file)
201
+ except Exception:
202
+ pass # Ignore cleanup errors
203
+ else:
204
+ print(f"❌ Failed to upload {run_id}: HTTP {upload_response.status_code}")
205
+
206
+ except requests.exceptions.Timeout:
207
+ print(f"❌ Timeout uploading {trace_file.name}")
208
+ except requests.exceptions.ConnectionError:
209
+ print(f"❌ Connection error uploading {trace_file.name}")
210
+ except Exception as e:
211
+ print(f"❌ Error uploading {trace_file.name}: {e}")