sentienceapi 0.90.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (50) hide show
  1. sentience/__init__.py +153 -0
  2. sentience/_extension_loader.py +40 -0
  3. sentience/actions.py +837 -0
  4. sentience/agent.py +1246 -0
  5. sentience/agent_config.py +43 -0
  6. sentience/async_api.py +101 -0
  7. sentience/base_agent.py +194 -0
  8. sentience/browser.py +1037 -0
  9. sentience/cli.py +130 -0
  10. sentience/cloud_tracing.py +382 -0
  11. sentience/conversational_agent.py +509 -0
  12. sentience/expect.py +188 -0
  13. sentience/extension/background.js +233 -0
  14. sentience/extension/content.js +298 -0
  15. sentience/extension/injected_api.js +1473 -0
  16. sentience/extension/manifest.json +36 -0
  17. sentience/extension/pkg/sentience_core.d.ts +51 -0
  18. sentience/extension/pkg/sentience_core.js +529 -0
  19. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  20. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  21. sentience/extension/release.json +115 -0
  22. sentience/extension/test-content.js +4 -0
  23. sentience/formatting.py +59 -0
  24. sentience/generator.py +202 -0
  25. sentience/inspector.py +365 -0
  26. sentience/llm_provider.py +637 -0
  27. sentience/models.py +412 -0
  28. sentience/overlay.py +222 -0
  29. sentience/query.py +303 -0
  30. sentience/read.py +185 -0
  31. sentience/recorder.py +589 -0
  32. sentience/schemas/trace_v1.json +216 -0
  33. sentience/screenshot.py +100 -0
  34. sentience/snapshot.py +516 -0
  35. sentience/text_search.py +290 -0
  36. sentience/trace_indexing/__init__.py +27 -0
  37. sentience/trace_indexing/index_schema.py +111 -0
  38. sentience/trace_indexing/indexer.py +357 -0
  39. sentience/tracer_factory.py +211 -0
  40. sentience/tracing.py +285 -0
  41. sentience/utils.py +296 -0
  42. sentience/wait.py +137 -0
  43. sentienceapi-0.90.17.dist-info/METADATA +917 -0
  44. sentienceapi-0.90.17.dist-info/RECORD +50 -0
  45. sentienceapi-0.90.17.dist-info/WHEEL +5 -0
  46. sentienceapi-0.90.17.dist-info/entry_points.txt +2 -0
  47. sentienceapi-0.90.17.dist-info/licenses/LICENSE +24 -0
  48. sentienceapi-0.90.17.dist-info/licenses/LICENSE-APACHE +201 -0
  49. sentienceapi-0.90.17.dist-info/licenses/LICENSE-MIT +21 -0
  50. sentienceapi-0.90.17.dist-info/top_level.txt +1 -0
@@ -0,0 +1,357 @@
1
+ """
2
+ Trace indexing for fast timeline rendering and step drill-down.
3
+ """
4
+
5
+ import hashlib
6
+ import json
7
+ import os
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List
11
+
12
+ from .index_schema import (
13
+ ActionInfo,
14
+ SnapshotInfo,
15
+ StepCounters,
16
+ StepIndex,
17
+ TraceFileInfo,
18
+ TraceIndex,
19
+ TraceSummary,
20
+ )
21
+
22
+
23
+ def _normalize_text(text: str | None, max_len: int = 80) -> str:
24
+ """Normalize text for digest: trim, collapse whitespace, lowercase, cap length."""
25
+ if not text:
26
+ return ""
27
+ # Trim and collapse whitespace
28
+ normalized = " ".join(text.split())
29
+ # Lowercase
30
+ normalized = normalized.lower()
31
+ # Cap length
32
+ if len(normalized) > max_len:
33
+ normalized = normalized[:max_len]
34
+ return normalized
35
+
36
+
37
+ def _round_bbox(bbox: dict[str, float], precision: int = 2) -> dict[str, int]:
38
+ """Round bbox coordinates to reduce noise (default: 2px precision)."""
39
+ return {
40
+ "x": round(bbox.get("x", 0) / precision) * precision,
41
+ "y": round(bbox.get("y", 0) / precision) * precision,
42
+ "width": round(bbox.get("width", 0) / precision) * precision,
43
+ "height": round(bbox.get("height", 0) / precision) * precision,
44
+ }
45
+
46
+
47
+ def _compute_snapshot_digest(snapshot_data: dict[str, Any]) -> str:
48
+ """
49
+ Compute stable digest of snapshot for diffing.
50
+
51
+ Includes: url, viewport, canonicalized elements (id, role, text_norm, bbox_rounded).
52
+ Excludes: importance, style fields, transient attributes.
53
+ """
54
+ url = snapshot_data.get("url", "")
55
+ viewport = snapshot_data.get("viewport", {})
56
+ elements = snapshot_data.get("elements", [])
57
+
58
+ # Canonicalize elements
59
+ canonical_elements = []
60
+ for elem in elements:
61
+ canonical_elem = {
62
+ "id": elem.get("id"),
63
+ "role": elem.get("role", ""),
64
+ "text_norm": _normalize_text(elem.get("text")),
65
+ "bbox": _round_bbox(elem.get("bbox", {"x": 0, "y": 0, "width": 0, "height": 0})),
66
+ "is_primary": elem.get("is_primary", False),
67
+ "is_clickable": elem.get("is_clickable", False),
68
+ }
69
+ canonical_elements.append(canonical_elem)
70
+
71
+ # Sort by element id for determinism
72
+ canonical_elements.sort(key=lambda e: e.get("id", 0))
73
+
74
+ # Build canonical object
75
+ canonical = {
76
+ "url": url,
77
+ "viewport": {
78
+ "width": viewport.get("width", 0),
79
+ "height": viewport.get("height", 0),
80
+ },
81
+ "elements": canonical_elements,
82
+ }
83
+
84
+ # Hash
85
+ canonical_json = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
86
+ digest = hashlib.sha256(canonical_json.encode("utf-8")).hexdigest()
87
+ return f"sha256:{digest}"
88
+
89
+
90
+ def _compute_action_digest(action_data: dict[str, Any]) -> str:
91
+ """
92
+ Compute digest of action args for privacy + determinism.
93
+
94
+ For TYPE: includes text_len + text_sha256 (not raw text)
95
+ For CLICK/PRESS: includes only non-sensitive fields
96
+ """
97
+ action_type = action_data.get("type", "")
98
+ target_id = action_data.get("target_element_id")
99
+
100
+ canonical = {
101
+ "type": action_type,
102
+ "target_element_id": target_id,
103
+ }
104
+
105
+ # Type-specific canonicalization
106
+ if action_type == "TYPE":
107
+ text = action_data.get("text", "")
108
+ canonical["text_len"] = len(text)
109
+ canonical["text_sha256"] = hashlib.sha256(text.encode("utf-8")).hexdigest()
110
+ elif action_type == "PRESS":
111
+ canonical["key"] = action_data.get("key", "")
112
+ # CLICK has no extra args
113
+
114
+ # Hash
115
+ canonical_json = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
116
+ digest = hashlib.sha256(canonical_json.encode("utf-8")).hexdigest()
117
+ return f"sha256:{digest}"
118
+
119
+
120
+ def _compute_file_sha256(file_path: str) -> str:
121
+ """Compute SHA256 hash of entire file."""
122
+ sha256 = hashlib.sha256()
123
+ with open(file_path, "rb") as f:
124
+ while chunk := f.read(8192):
125
+ sha256.update(chunk)
126
+ return sha256.hexdigest()
127
+
128
+
129
+ def build_trace_index(trace_path: str) -> TraceIndex:
130
+ """
131
+ Build trace index from JSONL file in single streaming pass.
132
+
133
+ Args:
134
+ trace_path: Path to trace JSONL file
135
+
136
+ Returns:
137
+ Complete TraceIndex object
138
+ """
139
+ trace_path_obj = Path(trace_path)
140
+ if not trace_path_obj.exists():
141
+ raise FileNotFoundError(f"Trace file not found: {trace_path}")
142
+
143
+ # Extract run_id from filename
144
+ run_id = trace_path_obj.stem
145
+
146
+ # Initialize summary
147
+ first_ts = ""
148
+ last_ts = ""
149
+ event_count = 0
150
+ error_count = 0
151
+ final_url = None
152
+
153
+ steps_by_id: dict[str, StepIndex] = {}
154
+ step_order: list[str] = [] # Track order of first appearance
155
+
156
+ # Stream through file, tracking byte offsets
157
+ with open(trace_path, "rb") as f:
158
+ byte_offset = 0
159
+
160
+ for line_bytes in f:
161
+ line_len = len(line_bytes)
162
+
163
+ try:
164
+ event = json.loads(line_bytes.decode("utf-8"))
165
+ except json.JSONDecodeError:
166
+ # Skip malformed lines
167
+ byte_offset += line_len
168
+ continue
169
+
170
+ # Extract event metadata
171
+ event_type = event.get("type", "")
172
+ ts = event.get("ts") or event.get("timestamp", "")
173
+ step_id = event.get("step_id", "step-0") # Default synthetic step
174
+ data = event.get("data", {})
175
+
176
+ # Update summary
177
+ event_count += 1
178
+ if not first_ts:
179
+ first_ts = ts
180
+ last_ts = ts
181
+
182
+ if event_type == "error":
183
+ error_count += 1
184
+
185
+ # Initialize step if first time seeing this step_id
186
+ if step_id not in steps_by_id:
187
+ step_order.append(step_id)
188
+ steps_by_id[step_id] = StepIndex(
189
+ step_index=len(step_order),
190
+ step_id=step_id,
191
+ goal=None,
192
+ status="partial",
193
+ ts_start=ts,
194
+ ts_end=ts,
195
+ offset_start=byte_offset,
196
+ offset_end=byte_offset + line_len,
197
+ url_before=None,
198
+ url_after=None,
199
+ snapshot_before=SnapshotInfo(),
200
+ snapshot_after=SnapshotInfo(),
201
+ action=ActionInfo(),
202
+ counters=StepCounters(),
203
+ )
204
+
205
+ step = steps_by_id[step_id]
206
+
207
+ # Update step metadata
208
+ step.ts_end = ts
209
+ step.offset_end = byte_offset + line_len
210
+ step.counters.events += 1
211
+
212
+ # Handle specific event types
213
+ if event_type == "step_start":
214
+ step.goal = data.get("goal")
215
+ step.url_before = data.get("pre_url")
216
+
217
+ elif event_type == "snapshot":
218
+ snapshot_id = data.get("snapshot_id")
219
+ url = data.get("url")
220
+ digest = _compute_snapshot_digest(data)
221
+
222
+ # First snapshot = before, last snapshot = after
223
+ if step.snapshot_before.snapshot_id is None:
224
+ step.snapshot_before = SnapshotInfo(
225
+ snapshot_id=snapshot_id, digest=digest, url=url
226
+ )
227
+ step.url_before = step.url_before or url
228
+
229
+ step.snapshot_after = SnapshotInfo(snapshot_id=snapshot_id, digest=digest, url=url)
230
+ step.url_after = url
231
+ step.counters.snapshots += 1
232
+ final_url = url
233
+
234
+ elif event_type == "action":
235
+ step.action = ActionInfo(
236
+ type=data.get("type"),
237
+ target_element_id=data.get("target_element_id"),
238
+ args_digest=_compute_action_digest(data),
239
+ success=data.get("success", True),
240
+ )
241
+ step.counters.actions += 1
242
+
243
+ elif event_type == "llm_response":
244
+ step.counters.llm_calls += 1
245
+
246
+ elif event_type == "error":
247
+ step.status = "error"
248
+
249
+ elif event_type == "step_end":
250
+ if step.status != "error":
251
+ step.status = "ok"
252
+
253
+ byte_offset += line_len
254
+
255
+ # Build summary
256
+ summary = TraceSummary(
257
+ first_ts=first_ts,
258
+ last_ts=last_ts,
259
+ event_count=event_count,
260
+ step_count=len(steps_by_id),
261
+ error_count=error_count,
262
+ final_url=final_url,
263
+ )
264
+
265
+ # Build steps list in order
266
+ steps_list = [steps_by_id[sid] for sid in step_order]
267
+
268
+ # Build trace file info
269
+ trace_file = TraceFileInfo(
270
+ path=str(trace_path),
271
+ size_bytes=os.path.getsize(trace_path),
272
+ sha256=_compute_file_sha256(str(trace_path)),
273
+ )
274
+
275
+ # Build final index
276
+ index = TraceIndex(
277
+ version=1,
278
+ run_id=run_id,
279
+ created_at=datetime.now(timezone.utc).isoformat(),
280
+ trace_file=trace_file,
281
+ summary=summary,
282
+ steps=steps_list,
283
+ )
284
+
285
+ return index
286
+
287
+
288
+ def write_trace_index(trace_path: str, index_path: str | None = None) -> str:
289
+ """
290
+ Build index and write to file.
291
+
292
+ Args:
293
+ trace_path: Path to trace JSONL file
294
+ index_path: Optional custom path for index file (default: trace_path with .index.json)
295
+
296
+ Returns:
297
+ Path to written index file
298
+ """
299
+ if index_path is None:
300
+ index_path = str(Path(trace_path).with_suffix("")) + ".index.json"
301
+
302
+ index = build_trace_index(trace_path)
303
+
304
+ with open(index_path, "w") as f:
305
+ json.dump(index.to_dict(), f, indent=2)
306
+
307
+ return index_path
308
+
309
+
310
+ def read_step_events(trace_path: str, offset_start: int, offset_end: int) -> list[dict[str, Any]]:
311
+ """
312
+ Read events for a specific step using byte offsets from index.
313
+
314
+ Args:
315
+ trace_path: Path to trace JSONL file
316
+ offset_start: Byte offset where step starts
317
+ offset_end: Byte offset where step ends
318
+
319
+ Returns:
320
+ List of event dictionaries for the step
321
+ """
322
+ events = []
323
+
324
+ with open(trace_path, "rb") as f:
325
+ f.seek(offset_start)
326
+ bytes_to_read = offset_end - offset_start
327
+ chunk = f.read(bytes_to_read)
328
+
329
+ # Parse lines
330
+ for line_bytes in chunk.split(b"\n"):
331
+ if not line_bytes:
332
+ continue
333
+ try:
334
+ event = json.loads(line_bytes.decode("utf-8"))
335
+ events.append(event)
336
+ except json.JSONDecodeError:
337
+ continue
338
+
339
+ return events
340
+
341
+
342
+ # CLI entrypoint
343
+ def main():
344
+ """CLI tool for building trace index."""
345
+ import sys
346
+
347
+ if len(sys.argv) < 2:
348
+ print("Usage: python -m sentience.tracing.indexer <trace.jsonl>")
349
+ sys.exit(1)
350
+
351
+ trace_path = sys.argv[1]
352
+ index_path = write_trace_index(trace_path)
353
+ print(f"✅ Index written to: {index_path}")
354
+
355
+
356
+ if __name__ == "__main__":
357
+ main()
@@ -0,0 +1,211 @@
1
+ """
2
+ Tracer factory with automatic tier detection.
3
+
4
+ Provides convenient factory function for creating tracers with cloud upload support.
5
+ """
6
+
7
+ import gzip
8
+ import os
9
+ import uuid
10
+ from pathlib import Path
11
+
12
+ import requests
13
+
14
+ from sentience.cloud_tracing import CloudTraceSink, SentienceLogger
15
+ from sentience.tracing import JsonlTraceSink, Tracer
16
+
17
+ # Sentience API base URL (constant)
18
+ SENTIENCE_API_URL = "https://api.sentienceapi.com"
19
+
20
+
21
+ def create_tracer(
22
+ api_key: str | None = None,
23
+ run_id: str | None = None,
24
+ api_url: str | None = None,
25
+ logger: SentienceLogger | None = None,
26
+ upload_trace: bool = False,
27
+ ) -> Tracer:
28
+ """
29
+ Create tracer with automatic tier detection.
30
+
31
+ Tier Detection:
32
+ - If api_key is provided: Try to initialize CloudTraceSink (Pro/Enterprise)
33
+ - If cloud init fails or no api_key: Fall back to JsonlTraceSink (Free tier)
34
+
35
+ Args:
36
+ api_key: Sentience API key (e.g., "sk_pro_xxxxx")
37
+ - Free tier: None or empty
38
+ - Pro/Enterprise: Valid API key
39
+ run_id: Unique identifier for this agent run. If not provided, generates UUID.
40
+ api_url: Sentience API base URL (default: https://api.sentienceapi.com)
41
+ logger: Optional logger instance for logging file sizes and errors
42
+ upload_trace: Enable cloud trace upload (default: False). When True and api_key
43
+ is provided, traces will be uploaded to cloud. When False, traces
44
+ are saved locally only.
45
+
46
+ Returns:
47
+ Tracer configured with appropriate sink
48
+
49
+ Example:
50
+ >>> # Pro tier user
51
+ >>> tracer = create_tracer(api_key="sk_pro_xyz", run_id="demo")
52
+ >>> # Returns: Tracer with CloudTraceSink
53
+ >>>
54
+ >>> # Free tier user
55
+ >>> tracer = create_tracer(run_id="demo")
56
+ >>> # Returns: Tracer with JsonlTraceSink (local-only)
57
+ >>>
58
+ >>> # Use with agent
59
+ >>> agent = SentienceAgent(browser, llm, tracer=tracer)
60
+ >>> agent.act("Click search")
61
+ >>> tracer.close() # Uploads to cloud if Pro tier
62
+ """
63
+ if run_id is None:
64
+ run_id = str(uuid.uuid4())
65
+
66
+ if api_url is None:
67
+ api_url = SENTIENCE_API_URL
68
+
69
+ # 0. Check for orphaned traces from previous crashes (if api_key provided and upload enabled)
70
+ if api_key and upload_trace:
71
+ _recover_orphaned_traces(api_key, api_url)
72
+
73
+ # 1. Try to initialize Cloud Sink (Pro/Enterprise tier) if upload enabled
74
+ if api_key and upload_trace:
75
+ try:
76
+ # Request pre-signed upload URL from backend
77
+ response = requests.post(
78
+ f"{api_url}/v1/traces/init",
79
+ headers={"Authorization": f"Bearer {api_key}"},
80
+ json={"run_id": run_id},
81
+ timeout=10,
82
+ )
83
+
84
+ if response.status_code == 200:
85
+ data = response.json()
86
+ upload_url = data.get("upload_url")
87
+
88
+ if upload_url:
89
+ print("☁️ [Sentience] Cloud tracing enabled (Pro tier)")
90
+ return Tracer(
91
+ run_id=run_id,
92
+ sink=CloudTraceSink(
93
+ upload_url=upload_url,
94
+ run_id=run_id,
95
+ api_key=api_key,
96
+ api_url=api_url,
97
+ logger=logger,
98
+ ),
99
+ )
100
+ else:
101
+ print("⚠️ [Sentience] Cloud init response missing upload_url")
102
+ print(" Falling back to local-only tracing")
103
+
104
+ elif response.status_code == 403:
105
+ print("⚠️ [Sentience] Cloud tracing requires Pro tier")
106
+ print(" Falling back to local-only tracing")
107
+ else:
108
+ print(f"⚠️ [Sentience] Cloud init failed: HTTP {response.status_code}")
109
+ print(" Falling back to local-only tracing")
110
+
111
+ except requests.exceptions.Timeout:
112
+ print("⚠️ [Sentience] Cloud init timeout")
113
+ print(" Falling back to local-only tracing")
114
+ except requests.exceptions.ConnectionError:
115
+ print("⚠️ [Sentience] Cloud init connection error")
116
+ print(" Falling back to local-only tracing")
117
+ except Exception as e:
118
+ print(f"⚠️ [Sentience] Cloud init error: {e}")
119
+ print(" Falling back to local-only tracing")
120
+
121
+ # 2. Fallback to Local Sink (Free tier / Offline mode)
122
+ traces_dir = Path("traces")
123
+ traces_dir.mkdir(exist_ok=True)
124
+
125
+ local_path = traces_dir / f"{run_id}.jsonl"
126
+ print(f"💾 [Sentience] Local tracing: {local_path}")
127
+
128
+ return Tracer(run_id=run_id, sink=JsonlTraceSink(str(local_path)))
129
+
130
+
131
+ def _recover_orphaned_traces(api_key: str, api_url: str = SENTIENCE_API_URL) -> None:
132
+ """
133
+ Attempt to upload orphaned traces from previous crashed runs.
134
+
135
+ Scans ~/.sentience/traces/pending/ for un-uploaded trace files and
136
+ attempts to upload them using the provided API key.
137
+
138
+ Args:
139
+ api_key: Sentience API key for authentication
140
+ api_url: Sentience API base URL (defaults to SENTIENCE_API_URL)
141
+ """
142
+ pending_dir = Path.home() / ".sentience" / "traces" / "pending"
143
+
144
+ if not pending_dir.exists():
145
+ return
146
+
147
+ orphaned = list(pending_dir.glob("*.jsonl"))
148
+
149
+ if not orphaned:
150
+ return
151
+
152
+ print(f"⚠️ [Sentience] Found {len(orphaned)} un-uploaded trace(s) from previous runs")
153
+ print(" Attempting to upload now...")
154
+
155
+ for trace_file in orphaned:
156
+ try:
157
+ # Extract run_id from filename (format: {run_id}.jsonl)
158
+ run_id = trace_file.stem
159
+
160
+ # Request new upload URL for this run_id
161
+ response = requests.post(
162
+ f"{api_url}/v1/traces/init",
163
+ headers={"Authorization": f"Bearer {api_key}"},
164
+ json={"run_id": run_id},
165
+ timeout=10,
166
+ )
167
+
168
+ if response.status_code != 200:
169
+ print(f"❌ Failed to get upload URL for {run_id}: HTTP {response.status_code}")
170
+ continue
171
+
172
+ data = response.json()
173
+ upload_url = data.get("upload_url")
174
+
175
+ if not upload_url:
176
+ print(f"❌ Upload URL missing for {run_id}")
177
+ continue
178
+
179
+ # Read and compress trace file
180
+ with open(trace_file, "rb") as f:
181
+ trace_data = f.read()
182
+
183
+ compressed_data = gzip.compress(trace_data)
184
+
185
+ # Upload to cloud
186
+ upload_response = requests.put(
187
+ upload_url,
188
+ data=compressed_data,
189
+ headers={
190
+ "Content-Type": "application/x-gzip",
191
+ "Content-Encoding": "gzip",
192
+ },
193
+ timeout=60,
194
+ )
195
+
196
+ if upload_response.status_code == 200:
197
+ print(f"✅ Uploaded orphaned trace: {run_id}")
198
+ # Delete file on successful upload
199
+ try:
200
+ os.remove(trace_file)
201
+ except Exception:
202
+ pass # Ignore cleanup errors
203
+ else:
204
+ print(f"❌ Failed to upload {run_id}: HTTP {upload_response.status_code}")
205
+
206
+ except requests.exceptions.Timeout:
207
+ print(f"❌ Timeout uploading {trace_file.name}")
208
+ except requests.exceptions.ConnectionError:
209
+ print(f"❌ Connection error uploading {trace_file.name}")
210
+ except Exception as e:
211
+ print(f"❌ Error uploading {trace_file.name}: {e}")