sentienceapi 0.90.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (50) hide show
  1. sentience/__init__.py +153 -0
  2. sentience/_extension_loader.py +40 -0
  3. sentience/actions.py +837 -0
  4. sentience/agent.py +1246 -0
  5. sentience/agent_config.py +43 -0
  6. sentience/async_api.py +101 -0
  7. sentience/base_agent.py +194 -0
  8. sentience/browser.py +1037 -0
  9. sentience/cli.py +130 -0
  10. sentience/cloud_tracing.py +382 -0
  11. sentience/conversational_agent.py +509 -0
  12. sentience/expect.py +188 -0
  13. sentience/extension/background.js +233 -0
  14. sentience/extension/content.js +298 -0
  15. sentience/extension/injected_api.js +1473 -0
  16. sentience/extension/manifest.json +36 -0
  17. sentience/extension/pkg/sentience_core.d.ts +51 -0
  18. sentience/extension/pkg/sentience_core.js +529 -0
  19. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  20. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  21. sentience/extension/release.json +115 -0
  22. sentience/extension/test-content.js +4 -0
  23. sentience/formatting.py +59 -0
  24. sentience/generator.py +202 -0
  25. sentience/inspector.py +365 -0
  26. sentience/llm_provider.py +637 -0
  27. sentience/models.py +412 -0
  28. sentience/overlay.py +222 -0
  29. sentience/query.py +303 -0
  30. sentience/read.py +185 -0
  31. sentience/recorder.py +589 -0
  32. sentience/schemas/trace_v1.json +216 -0
  33. sentience/screenshot.py +100 -0
  34. sentience/snapshot.py +516 -0
  35. sentience/text_search.py +290 -0
  36. sentience/trace_indexing/__init__.py +27 -0
  37. sentience/trace_indexing/index_schema.py +111 -0
  38. sentience/trace_indexing/indexer.py +357 -0
  39. sentience/tracer_factory.py +211 -0
  40. sentience/tracing.py +285 -0
  41. sentience/utils.py +296 -0
  42. sentience/wait.py +137 -0
  43. sentienceapi-0.90.17.dist-info/METADATA +917 -0
  44. sentienceapi-0.90.17.dist-info/RECORD +50 -0
  45. sentienceapi-0.90.17.dist-info/WHEEL +5 -0
  46. sentienceapi-0.90.17.dist-info/entry_points.txt +2 -0
  47. sentienceapi-0.90.17.dist-info/licenses/LICENSE +24 -0
  48. sentienceapi-0.90.17.dist-info/licenses/LICENSE-APACHE +201 -0
  49. sentienceapi-0.90.17.dist-info/licenses/LICENSE-MIT +21 -0
  50. sentienceapi-0.90.17.dist-info/top_level.txt +1 -0
sentience/query.py ADDED
@@ -0,0 +1,303 @@
1
+ """
2
+ Query engine v1 - semantic selector matching
3
+ """
4
+
5
+ import re
6
+ from typing import Any
7
+
8
+ from .models import Element, Snapshot
9
+
10
+
11
+ def parse_selector(selector: str) -> dict[str, Any]: # noqa: C901
12
+ """
13
+ Parse string DSL selector into structured query
14
+
15
+ Examples:
16
+ "role=button text~'Sign in'"
17
+ "role=textbox name~'email'"
18
+ "clickable=true role=link"
19
+ "role!=link"
20
+ "importance>500"
21
+ "text^='Sign'"
22
+ "text$='in'"
23
+ """
24
+ query: dict[str, Any] = {}
25
+
26
+ # Match patterns like: key=value, key~'value', key!="value", key>123, key^='prefix', key$='suffix'
27
+ # Updated regex to support: =, !=, ~, ^=, $=, >, >=, <, <=
28
+ # Supports dot notation: attr.id, css.color
29
+ # Note: Handle ^= and $= first (before single char operators) to avoid regex conflicts
30
+ # Pattern matches: key, operator (including ^= and $=), and value (quoted or unquoted)
31
+ pattern = r"([\w.]+)(\^=|\$=|>=|<=|!=|[=~<>])((?:\'[^\']+\'|\"[^\"]+\"|[^\s]+))"
32
+ matches = re.findall(pattern, selector)
33
+
34
+ for key, op, value in matches:
35
+ # Remove quotes from value
36
+ value = value.strip().strip("\"'")
37
+
38
+ # Handle numeric comparisons
39
+ is_numeric = False
40
+ try:
41
+ numeric_value = float(value)
42
+ is_numeric = True
43
+ except ValueError:
44
+ pass
45
+
46
+ if op == "!=":
47
+ if key == "role":
48
+ query["role_exclude"] = value
49
+ elif key == "clickable":
50
+ query["clickable"] = False
51
+ elif key == "visible":
52
+ query["visible"] = False
53
+ elif op == "~":
54
+ # Substring match (case-insensitive)
55
+ if key == "text" or key == "name":
56
+ query["text_contains"] = value
57
+ elif op == "^=":
58
+ # Prefix match
59
+ if key == "text" or key == "name":
60
+ query["text_prefix"] = value
61
+ elif op == "$=":
62
+ # Suffix match
63
+ if key == "text" or key == "name":
64
+ query["text_suffix"] = value
65
+ elif op == ">":
66
+ # Greater than
67
+ if is_numeric:
68
+ if key == "importance":
69
+ query["importance_min"] = numeric_value + 0.0001 # Exclusive
70
+ elif key.startswith("bbox."):
71
+ query[f"{key}_min"] = numeric_value + 0.0001
72
+ elif key == "z_index":
73
+ query["z_index_min"] = numeric_value + 0.0001
74
+ elif key.startswith("attr.") or key.startswith("css."):
75
+ query[f"{key}_gt"] = value
76
+ elif op == ">=":
77
+ # Greater than or equal
78
+ if is_numeric:
79
+ if key == "importance":
80
+ query["importance_min"] = numeric_value
81
+ elif key.startswith("bbox."):
82
+ query[f"{key}_min"] = numeric_value
83
+ elif key == "z_index":
84
+ query["z_index_min"] = numeric_value
85
+ elif key.startswith("attr.") or key.startswith("css."):
86
+ query[f"{key}_gte"] = value
87
+ elif op == "<":
88
+ # Less than
89
+ if is_numeric:
90
+ if key == "importance":
91
+ query["importance_max"] = numeric_value - 0.0001 # Exclusive
92
+ elif key.startswith("bbox."):
93
+ query[f"{key}_max"] = numeric_value - 0.0001
94
+ elif key == "z_index":
95
+ query["z_index_max"] = numeric_value - 0.0001
96
+ elif key.startswith("attr.") or key.startswith("css."):
97
+ query[f"{key}_lt"] = value
98
+ elif op == "<=":
99
+ # Less than or equal
100
+ if is_numeric:
101
+ if key == "importance":
102
+ query["importance_max"] = numeric_value
103
+ elif key.startswith("bbox."):
104
+ query[f"{key}_max"] = numeric_value
105
+ elif key == "z_index":
106
+ query["z_index_max"] = numeric_value
107
+ elif key.startswith("attr.") or key.startswith("css."):
108
+ query[f"{key}_lte"] = value
109
+ elif op == "=":
110
+ # Exact match
111
+ if key == "role":
112
+ query["role"] = value
113
+ elif key == "clickable":
114
+ query["clickable"] = value.lower() == "true"
115
+ elif key == "visible":
116
+ query["visible"] = value.lower() == "true"
117
+ elif key == "tag":
118
+ query["tag"] = value
119
+ elif key == "name" or key == "text":
120
+ query["text"] = value
121
+ elif key == "importance" and is_numeric:
122
+ query["importance"] = numeric_value
123
+ elif key.startswith("attr."):
124
+ # Dot notation for attributes: attr.id="submit-btn"
125
+ attr_key = key[5:] # Remove "attr." prefix
126
+ if "attr" not in query:
127
+ query["attr"] = {}
128
+ query["attr"][attr_key] = value
129
+ elif key.startswith("css."):
130
+ # Dot notation for CSS: css.color="red"
131
+ css_key = key[4:] # Remove "css." prefix
132
+ if "css" not in query:
133
+ query["css"] = {}
134
+ query["css"][css_key] = value
135
+
136
+ return query
137
+
138
+
139
+ def match_element(element: Element, query: dict[str, Any]) -> bool: # noqa: C901
140
+ """Check if element matches query criteria"""
141
+
142
+ # Role exact match
143
+ if "role" in query:
144
+ if element.role != query["role"]:
145
+ return False
146
+
147
+ # Role exclusion
148
+ if "role_exclude" in query:
149
+ if element.role == query["role_exclude"]:
150
+ return False
151
+
152
+ # Clickable
153
+ if "clickable" in query:
154
+ if element.visual_cues.is_clickable != query["clickable"]:
155
+ return False
156
+
157
+ # Visible (using in_viewport and !is_occluded)
158
+ if "visible" in query:
159
+ is_visible = element.in_viewport and not element.is_occluded
160
+ if is_visible != query["visible"]:
161
+ return False
162
+
163
+ # Tag (not yet in Element model, but prepare for future)
164
+ if "tag" in query:
165
+ # For now, this will always fail since tag is not in Element model
166
+ # This is a placeholder for future implementation
167
+ pass
168
+
169
+ # Text exact match
170
+ if "text" in query:
171
+ if not element.text or element.text != query["text"]:
172
+ return False
173
+
174
+ # Text contains (case-insensitive)
175
+ if "text_contains" in query:
176
+ if not element.text:
177
+ return False
178
+ if query["text_contains"].lower() not in element.text.lower():
179
+ return False
180
+
181
+ # Text prefix match
182
+ if "text_prefix" in query:
183
+ if not element.text:
184
+ return False
185
+ if not element.text.lower().startswith(query["text_prefix"].lower()):
186
+ return False
187
+
188
+ # Text suffix match
189
+ if "text_suffix" in query:
190
+ if not element.text:
191
+ return False
192
+ if not element.text.lower().endswith(query["text_suffix"].lower()):
193
+ return False
194
+
195
+ # Importance filtering
196
+ if "importance" in query:
197
+ if element.importance != query["importance"]:
198
+ return False
199
+ if "importance_min" in query:
200
+ if element.importance < query["importance_min"]:
201
+ return False
202
+ if "importance_max" in query:
203
+ if element.importance > query["importance_max"]:
204
+ return False
205
+
206
+ # BBox filtering (spatial)
207
+ if "bbox.x_min" in query:
208
+ if element.bbox.x < query["bbox.x_min"]:
209
+ return False
210
+ if "bbox.x_max" in query:
211
+ if element.bbox.x > query["bbox.x_max"]:
212
+ return False
213
+ if "bbox.y_min" in query:
214
+ if element.bbox.y < query["bbox.y_min"]:
215
+ return False
216
+ if "bbox.y_max" in query:
217
+ if element.bbox.y > query["bbox.y_max"]:
218
+ return False
219
+ if "bbox.width_min" in query:
220
+ if element.bbox.width < query["bbox.width_min"]:
221
+ return False
222
+ if "bbox.width_max" in query:
223
+ if element.bbox.width > query["bbox.width_max"]:
224
+ return False
225
+ if "bbox.height_min" in query:
226
+ if element.bbox.height < query["bbox.height_min"]:
227
+ return False
228
+ if "bbox.height_max" in query:
229
+ if element.bbox.height > query["bbox.height_max"]:
230
+ return False
231
+
232
+ # Z-index filtering
233
+ if "z_index_min" in query:
234
+ if element.z_index < query["z_index_min"]:
235
+ return False
236
+ if "z_index_max" in query:
237
+ if element.z_index > query["z_index_max"]:
238
+ return False
239
+
240
+ # In viewport filtering
241
+ if "in_viewport" in query:
242
+ if element.in_viewport != query["in_viewport"]:
243
+ return False
244
+
245
+ # Occlusion filtering
246
+ if "is_occluded" in query:
247
+ if element.is_occluded != query["is_occluded"]:
248
+ return False
249
+
250
+ # Attribute filtering (dot notation: attr.id="submit-btn")
251
+ if "attr" in query:
252
+ # This requires DOM access, which is not available in the Element model
253
+ # This is a placeholder for future implementation when we add DOM access
254
+ pass
255
+
256
+ # CSS property filtering (dot notation: css.color="red")
257
+ if "css" in query:
258
+ # This requires DOM access, which is not available in the Element model
259
+ # This is a placeholder for future implementation when we add DOM access
260
+ pass
261
+
262
+ return True
263
+
264
+
265
+ def query(snapshot: Snapshot, selector: str | dict[str, Any]) -> list[Element]:
266
+ """
267
+ Query elements from snapshot using semantic selector
268
+
269
+ Args:
270
+ snapshot: Snapshot object
271
+ selector: String DSL (e.g., "role=button text~'Sign in'") or dict query
272
+
273
+ Returns:
274
+ List of matching elements, sorted by importance (descending)
275
+ """
276
+ # Parse selector if string
277
+ if isinstance(selector, str):
278
+ query_dict = parse_selector(selector)
279
+ else:
280
+ query_dict = selector
281
+
282
+ # Filter elements
283
+ matches = [el for el in snapshot.elements if match_element(el, query_dict)]
284
+
285
+ # Sort by importance (descending)
286
+ matches.sort(key=lambda el: el.importance, reverse=True)
287
+
288
+ return matches
289
+
290
+
291
+ def find(snapshot: Snapshot, selector: str | dict[str, Any]) -> Element | None:
292
+ """
293
+ Find single element matching selector (best match by importance)
294
+
295
+ Args:
296
+ snapshot: Snapshot object
297
+ selector: String DSL or dict query
298
+
299
+ Returns:
300
+ Best matching element or None
301
+ """
302
+ results = query(snapshot, selector)
303
+ return results[0] if results else None
sentience/read.py ADDED
@@ -0,0 +1,185 @@
1
+ """
2
+ Read page content - supports raw HTML, text, and markdown formats
3
+ """
4
+
5
+ from typing import Literal
6
+
7
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
8
+
9
+
10
+ def read(
11
+ browser: SentienceBrowser,
12
+ output_format: Literal["raw", "text", "markdown"] = "raw",
13
+ enhance_markdown: bool = True,
14
+ ) -> dict:
15
+ """
16
+ Read page content as raw HTML, text, or markdown
17
+
18
+ Args:
19
+ browser: SentienceBrowser instance
20
+ output_format: Output format - "raw" (default, returns HTML for external processing),
21
+ "text" (plain text), or "markdown" (lightweight or enhanced markdown).
22
+ enhance_markdown: If True and output_format is "markdown", uses markdownify for better conversion.
23
+ If False, uses the extension's lightweight markdown converter.
24
+
25
+ Returns:
26
+ dict with:
27
+ - status: "success" or "error"
28
+ - url: Current page URL
29
+ - format: "raw", "text", or "markdown"
30
+ - content: Page content as string
31
+ - length: Content length in characters
32
+ - error: Error message if status is "error"
33
+
34
+ Examples:
35
+ # Get raw HTML (default) - can be used with markdownify for better conversion
36
+ result = read(browser)
37
+ html_content = result["content"]
38
+
39
+ # Get high-quality markdown (uses markdownify internally)
40
+ result = read(browser, output_format="markdown")
41
+ markdown = result["content"]
42
+
43
+ # Get plain text
44
+ result = read(browser, output_format="text")
45
+ text = result["content"]
46
+ """
47
+ if not browser.page:
48
+ raise RuntimeError("Browser not started. Call browser.start() first.")
49
+
50
+ if output_format == "markdown" and enhance_markdown:
51
+ # Get raw HTML from the extension first
52
+ raw_html_result = browser.page.evaluate(
53
+ """
54
+ (options) => {
55
+ return window.sentience.read(options);
56
+ }
57
+ """,
58
+ {"format": "raw"},
59
+ )
60
+
61
+ if raw_html_result.get("status") == "success":
62
+ html_content = raw_html_result["content"]
63
+ try:
64
+ # Use markdownify for enhanced markdown conversion
65
+ from markdownify import MarkdownifyError, markdownify
66
+
67
+ markdown_content = markdownify(html_content, heading_style="ATX", wrap=True)
68
+ return {
69
+ "status": "success",
70
+ "url": raw_html_result["url"],
71
+ "format": "markdown",
72
+ "content": markdown_content,
73
+ "length": len(markdown_content),
74
+ }
75
+ except ImportError:
76
+ print(
77
+ "Warning: 'markdownify' not installed. Install with 'pip install markdownify' for enhanced markdown. Falling back to extension's markdown."
78
+ )
79
+ except MarkdownifyError as e:
80
+ print(f"Warning: markdownify failed ({e}), falling back to extension's markdown.")
81
+ except Exception as e:
82
+ print(
83
+ f"Warning: An unexpected error occurred with markdownify ({e}), falling back to extension's markdown."
84
+ )
85
+
86
+ # If not enhanced markdown, or fallback, call extension with requested format
87
+ result = browser.page.evaluate(
88
+ """
89
+ (options) => {
90
+ return window.sentience.read(options);
91
+ }
92
+ """,
93
+ {"format": output_format},
94
+ )
95
+
96
+ return result
97
+
98
+
99
+ async def read_async(
100
+ browser: AsyncSentienceBrowser,
101
+ output_format: Literal["raw", "text", "markdown"] = "raw",
102
+ enhance_markdown: bool = True,
103
+ ) -> dict:
104
+ """
105
+ Read page content as raw HTML, text, or markdown (async)
106
+
107
+ Args:
108
+ browser: AsyncSentienceBrowser instance
109
+ output_format: Output format - "raw" (default, returns HTML for external processing),
110
+ "text" (plain text), or "markdown" (lightweight or enhanced markdown).
111
+ enhance_markdown: If True and output_format is "markdown", uses markdownify for better conversion.
112
+ If False, uses the extension's lightweight markdown converter.
113
+
114
+ Returns:
115
+ dict with:
116
+ - status: "success" or "error"
117
+ - url: Current page URL
118
+ - format: "raw", "text", or "markdown"
119
+ - content: Page content as string
120
+ - length: Content length in characters
121
+ - error: Error message if status is "error"
122
+
123
+ Examples:
124
+ # Get raw HTML (default) - can be used with markdownify for better conversion
125
+ result = await read_async(browser)
126
+ html_content = result["content"]
127
+
128
+ # Get high-quality markdown (uses markdownify internally)
129
+ result = await read_async(browser, output_format="markdown")
130
+ markdown = result["content"]
131
+
132
+ # Get plain text
133
+ result = await read_async(browser, output_format="text")
134
+ text = result["content"]
135
+ """
136
+ if not browser.page:
137
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
138
+
139
+ if output_format == "markdown" and enhance_markdown:
140
+ # Get raw HTML from the extension first
141
+ raw_html_result = await browser.page.evaluate(
142
+ """
143
+ (options) => {
144
+ return window.sentience.read(options);
145
+ }
146
+ """,
147
+ {"format": "raw"},
148
+ )
149
+
150
+ if raw_html_result.get("status") == "success":
151
+ html_content = raw_html_result["content"]
152
+ try:
153
+ # Use markdownify for enhanced markdown conversion
154
+ from markdownify import MarkdownifyError, markdownify
155
+
156
+ markdown_content = markdownify(html_content, heading_style="ATX", wrap=True)
157
+ return {
158
+ "status": "success",
159
+ "url": raw_html_result["url"],
160
+ "format": "markdown",
161
+ "content": markdown_content,
162
+ "length": len(markdown_content),
163
+ }
164
+ except ImportError:
165
+ print(
166
+ "Warning: 'markdownify' not installed. Install with 'pip install markdownify' for enhanced markdown. Falling back to extension's markdown."
167
+ )
168
+ except MarkdownifyError as e:
169
+ print(f"Warning: markdownify failed ({e}), falling back to extension's markdown.")
170
+ except Exception as e:
171
+ print(
172
+ f"Warning: An unexpected error occurred with markdownify ({e}), falling back to extension's markdown."
173
+ )
174
+
175
+ # If not enhanced markdown, or fallback, call extension with requested format
176
+ result = await browser.page.evaluate(
177
+ """
178
+ (options) => {
179
+ return window.sentience.read(options);
180
+ }
181
+ """,
182
+ {"format": output_format},
183
+ )
184
+
185
+ return result