sentienceapi 0.90.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +153 -0
- sentience/actions.py +439 -0
- sentience/agent.py +687 -0
- sentience/agent_config.py +43 -0
- sentience/base_agent.py +101 -0
- sentience/browser.py +409 -0
- sentience/cli.py +130 -0
- sentience/cloud_tracing.py +292 -0
- sentience/conversational_agent.py +509 -0
- sentience/expect.py +92 -0
- sentience/extension/background.js +233 -0
- sentience/extension/content.js +298 -0
- sentience/extension/injected_api.js +1473 -0
- sentience/extension/manifest.json +36 -0
- sentience/extension/pkg/sentience_core.d.ts +51 -0
- sentience/extension/pkg/sentience_core.js +529 -0
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
- sentience/extension/release.json +115 -0
- sentience/extension/test-content.js +4 -0
- sentience/formatting.py +59 -0
- sentience/generator.py +202 -0
- sentience/inspector.py +185 -0
- sentience/llm_provider.py +431 -0
- sentience/models.py +406 -0
- sentience/overlay.py +115 -0
- sentience/query.py +303 -0
- sentience/read.py +96 -0
- sentience/recorder.py +369 -0
- sentience/schemas/trace_v1.json +216 -0
- sentience/screenshot.py +54 -0
- sentience/snapshot.py +282 -0
- sentience/text_search.py +107 -0
- sentience/trace_indexing/__init__.py +27 -0
- sentience/trace_indexing/index_schema.py +111 -0
- sentience/trace_indexing/indexer.py +363 -0
- sentience/tracer_factory.py +211 -0
- sentience/tracing.py +285 -0
- sentience/utils.py +296 -0
- sentience/wait.py +73 -0
- sentienceapi-0.90.9.dist-info/METADATA +878 -0
- sentienceapi-0.90.9.dist-info/RECORD +46 -0
- sentienceapi-0.90.9.dist-info/WHEEL +5 -0
- sentienceapi-0.90.9.dist-info/entry_points.txt +2 -0
- sentienceapi-0.90.9.dist-info/licenses/LICENSE.md +43 -0
- sentienceapi-0.90.9.dist-info/top_level.txt +1 -0
sentience/query.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Query engine v1 - semantic selector matching
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from .models import Element, Snapshot
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_selector(selector: str) -> dict[str, Any]: # noqa: C901
|
|
12
|
+
"""
|
|
13
|
+
Parse string DSL selector into structured query
|
|
14
|
+
|
|
15
|
+
Examples:
|
|
16
|
+
"role=button text~'Sign in'"
|
|
17
|
+
"role=textbox name~'email'"
|
|
18
|
+
"clickable=true role=link"
|
|
19
|
+
"role!=link"
|
|
20
|
+
"importance>500"
|
|
21
|
+
"text^='Sign'"
|
|
22
|
+
"text$='in'"
|
|
23
|
+
"""
|
|
24
|
+
query: dict[str, Any] = {}
|
|
25
|
+
|
|
26
|
+
# Match patterns like: key=value, key~'value', key!="value", key>123, key^='prefix', key$='suffix'
|
|
27
|
+
# Updated regex to support: =, !=, ~, ^=, $=, >, >=, <, <=
|
|
28
|
+
# Supports dot notation: attr.id, css.color
|
|
29
|
+
# Note: Handle ^= and $= first (before single char operators) to avoid regex conflicts
|
|
30
|
+
# Pattern matches: key, operator (including ^= and $=), and value (quoted or unquoted)
|
|
31
|
+
pattern = r"([\w.]+)(\^=|\$=|>=|<=|!=|[=~<>])((?:\'[^\']+\'|\"[^\"]+\"|[^\s]+))"
|
|
32
|
+
matches = re.findall(pattern, selector)
|
|
33
|
+
|
|
34
|
+
for key, op, value in matches:
|
|
35
|
+
# Remove quotes from value
|
|
36
|
+
value = value.strip().strip("\"'")
|
|
37
|
+
|
|
38
|
+
# Handle numeric comparisons
|
|
39
|
+
is_numeric = False
|
|
40
|
+
try:
|
|
41
|
+
numeric_value = float(value)
|
|
42
|
+
is_numeric = True
|
|
43
|
+
except ValueError:
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
if op == "!=":
|
|
47
|
+
if key == "role":
|
|
48
|
+
query["role_exclude"] = value
|
|
49
|
+
elif key == "clickable":
|
|
50
|
+
query["clickable"] = False
|
|
51
|
+
elif key == "visible":
|
|
52
|
+
query["visible"] = False
|
|
53
|
+
elif op == "~":
|
|
54
|
+
# Substring match (case-insensitive)
|
|
55
|
+
if key == "text" or key == "name":
|
|
56
|
+
query["text_contains"] = value
|
|
57
|
+
elif op == "^=":
|
|
58
|
+
# Prefix match
|
|
59
|
+
if key == "text" or key == "name":
|
|
60
|
+
query["text_prefix"] = value
|
|
61
|
+
elif op == "$=":
|
|
62
|
+
# Suffix match
|
|
63
|
+
if key == "text" or key == "name":
|
|
64
|
+
query["text_suffix"] = value
|
|
65
|
+
elif op == ">":
|
|
66
|
+
# Greater than
|
|
67
|
+
if is_numeric:
|
|
68
|
+
if key == "importance":
|
|
69
|
+
query["importance_min"] = numeric_value + 0.0001 # Exclusive
|
|
70
|
+
elif key.startswith("bbox."):
|
|
71
|
+
query[f"{key}_min"] = numeric_value + 0.0001
|
|
72
|
+
elif key == "z_index":
|
|
73
|
+
query["z_index_min"] = numeric_value + 0.0001
|
|
74
|
+
elif key.startswith("attr.") or key.startswith("css."):
|
|
75
|
+
query[f"{key}_gt"] = value
|
|
76
|
+
elif op == ">=":
|
|
77
|
+
# Greater than or equal
|
|
78
|
+
if is_numeric:
|
|
79
|
+
if key == "importance":
|
|
80
|
+
query["importance_min"] = numeric_value
|
|
81
|
+
elif key.startswith("bbox."):
|
|
82
|
+
query[f"{key}_min"] = numeric_value
|
|
83
|
+
elif key == "z_index":
|
|
84
|
+
query["z_index_min"] = numeric_value
|
|
85
|
+
elif key.startswith("attr.") or key.startswith("css."):
|
|
86
|
+
query[f"{key}_gte"] = value
|
|
87
|
+
elif op == "<":
|
|
88
|
+
# Less than
|
|
89
|
+
if is_numeric:
|
|
90
|
+
if key == "importance":
|
|
91
|
+
query["importance_max"] = numeric_value - 0.0001 # Exclusive
|
|
92
|
+
elif key.startswith("bbox."):
|
|
93
|
+
query[f"{key}_max"] = numeric_value - 0.0001
|
|
94
|
+
elif key == "z_index":
|
|
95
|
+
query["z_index_max"] = numeric_value - 0.0001
|
|
96
|
+
elif key.startswith("attr.") or key.startswith("css."):
|
|
97
|
+
query[f"{key}_lt"] = value
|
|
98
|
+
elif op == "<=":
|
|
99
|
+
# Less than or equal
|
|
100
|
+
if is_numeric:
|
|
101
|
+
if key == "importance":
|
|
102
|
+
query["importance_max"] = numeric_value
|
|
103
|
+
elif key.startswith("bbox."):
|
|
104
|
+
query[f"{key}_max"] = numeric_value
|
|
105
|
+
elif key == "z_index":
|
|
106
|
+
query["z_index_max"] = numeric_value
|
|
107
|
+
elif key.startswith("attr.") or key.startswith("css."):
|
|
108
|
+
query[f"{key}_lte"] = value
|
|
109
|
+
elif op == "=":
|
|
110
|
+
# Exact match
|
|
111
|
+
if key == "role":
|
|
112
|
+
query["role"] = value
|
|
113
|
+
elif key == "clickable":
|
|
114
|
+
query["clickable"] = value.lower() == "true"
|
|
115
|
+
elif key == "visible":
|
|
116
|
+
query["visible"] = value.lower() == "true"
|
|
117
|
+
elif key == "tag":
|
|
118
|
+
query["tag"] = value
|
|
119
|
+
elif key == "name" or key == "text":
|
|
120
|
+
query["text"] = value
|
|
121
|
+
elif key == "importance" and is_numeric:
|
|
122
|
+
query["importance"] = numeric_value
|
|
123
|
+
elif key.startswith("attr."):
|
|
124
|
+
# Dot notation for attributes: attr.id="submit-btn"
|
|
125
|
+
attr_key = key[5:] # Remove "attr." prefix
|
|
126
|
+
if "attr" not in query:
|
|
127
|
+
query["attr"] = {}
|
|
128
|
+
query["attr"][attr_key] = value
|
|
129
|
+
elif key.startswith("css."):
|
|
130
|
+
# Dot notation for CSS: css.color="red"
|
|
131
|
+
css_key = key[4:] # Remove "css." prefix
|
|
132
|
+
if "css" not in query:
|
|
133
|
+
query["css"] = {}
|
|
134
|
+
query["css"][css_key] = value
|
|
135
|
+
|
|
136
|
+
return query
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def match_element(element: Element, query: dict[str, Any]) -> bool: # noqa: C901
|
|
140
|
+
"""Check if element matches query criteria"""
|
|
141
|
+
|
|
142
|
+
# Role exact match
|
|
143
|
+
if "role" in query:
|
|
144
|
+
if element.role != query["role"]:
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
# Role exclusion
|
|
148
|
+
if "role_exclude" in query:
|
|
149
|
+
if element.role == query["role_exclude"]:
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
# Clickable
|
|
153
|
+
if "clickable" in query:
|
|
154
|
+
if element.visual_cues.is_clickable != query["clickable"]:
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
# Visible (using in_viewport and !is_occluded)
|
|
158
|
+
if "visible" in query:
|
|
159
|
+
is_visible = element.in_viewport and not element.is_occluded
|
|
160
|
+
if is_visible != query["visible"]:
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
# Tag (not yet in Element model, but prepare for future)
|
|
164
|
+
if "tag" in query:
|
|
165
|
+
# For now, this will always fail since tag is not in Element model
|
|
166
|
+
# This is a placeholder for future implementation
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
# Text exact match
|
|
170
|
+
if "text" in query:
|
|
171
|
+
if not element.text or element.text != query["text"]:
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
# Text contains (case-insensitive)
|
|
175
|
+
if "text_contains" in query:
|
|
176
|
+
if not element.text:
|
|
177
|
+
return False
|
|
178
|
+
if query["text_contains"].lower() not in element.text.lower():
|
|
179
|
+
return False
|
|
180
|
+
|
|
181
|
+
# Text prefix match
|
|
182
|
+
if "text_prefix" in query:
|
|
183
|
+
if not element.text:
|
|
184
|
+
return False
|
|
185
|
+
if not element.text.lower().startswith(query["text_prefix"].lower()):
|
|
186
|
+
return False
|
|
187
|
+
|
|
188
|
+
# Text suffix match
|
|
189
|
+
if "text_suffix" in query:
|
|
190
|
+
if not element.text:
|
|
191
|
+
return False
|
|
192
|
+
if not element.text.lower().endswith(query["text_suffix"].lower()):
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
# Importance filtering
|
|
196
|
+
if "importance" in query:
|
|
197
|
+
if element.importance != query["importance"]:
|
|
198
|
+
return False
|
|
199
|
+
if "importance_min" in query:
|
|
200
|
+
if element.importance < query["importance_min"]:
|
|
201
|
+
return False
|
|
202
|
+
if "importance_max" in query:
|
|
203
|
+
if element.importance > query["importance_max"]:
|
|
204
|
+
return False
|
|
205
|
+
|
|
206
|
+
# BBox filtering (spatial)
|
|
207
|
+
if "bbox.x_min" in query:
|
|
208
|
+
if element.bbox.x < query["bbox.x_min"]:
|
|
209
|
+
return False
|
|
210
|
+
if "bbox.x_max" in query:
|
|
211
|
+
if element.bbox.x > query["bbox.x_max"]:
|
|
212
|
+
return False
|
|
213
|
+
if "bbox.y_min" in query:
|
|
214
|
+
if element.bbox.y < query["bbox.y_min"]:
|
|
215
|
+
return False
|
|
216
|
+
if "bbox.y_max" in query:
|
|
217
|
+
if element.bbox.y > query["bbox.y_max"]:
|
|
218
|
+
return False
|
|
219
|
+
if "bbox.width_min" in query:
|
|
220
|
+
if element.bbox.width < query["bbox.width_min"]:
|
|
221
|
+
return False
|
|
222
|
+
if "bbox.width_max" in query:
|
|
223
|
+
if element.bbox.width > query["bbox.width_max"]:
|
|
224
|
+
return False
|
|
225
|
+
if "bbox.height_min" in query:
|
|
226
|
+
if element.bbox.height < query["bbox.height_min"]:
|
|
227
|
+
return False
|
|
228
|
+
if "bbox.height_max" in query:
|
|
229
|
+
if element.bbox.height > query["bbox.height_max"]:
|
|
230
|
+
return False
|
|
231
|
+
|
|
232
|
+
# Z-index filtering
|
|
233
|
+
if "z_index_min" in query:
|
|
234
|
+
if element.z_index < query["z_index_min"]:
|
|
235
|
+
return False
|
|
236
|
+
if "z_index_max" in query:
|
|
237
|
+
if element.z_index > query["z_index_max"]:
|
|
238
|
+
return False
|
|
239
|
+
|
|
240
|
+
# In viewport filtering
|
|
241
|
+
if "in_viewport" in query:
|
|
242
|
+
if element.in_viewport != query["in_viewport"]:
|
|
243
|
+
return False
|
|
244
|
+
|
|
245
|
+
# Occlusion filtering
|
|
246
|
+
if "is_occluded" in query:
|
|
247
|
+
if element.is_occluded != query["is_occluded"]:
|
|
248
|
+
return False
|
|
249
|
+
|
|
250
|
+
# Attribute filtering (dot notation: attr.id="submit-btn")
|
|
251
|
+
if "attr" in query:
|
|
252
|
+
# This requires DOM access, which is not available in the Element model
|
|
253
|
+
# This is a placeholder for future implementation when we add DOM access
|
|
254
|
+
pass
|
|
255
|
+
|
|
256
|
+
# CSS property filtering (dot notation: css.color="red")
|
|
257
|
+
if "css" in query:
|
|
258
|
+
# This requires DOM access, which is not available in the Element model
|
|
259
|
+
# This is a placeholder for future implementation when we add DOM access
|
|
260
|
+
pass
|
|
261
|
+
|
|
262
|
+
return True
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def query(snapshot: Snapshot, selector: str | dict[str, Any]) -> list[Element]:
|
|
266
|
+
"""
|
|
267
|
+
Query elements from snapshot using semantic selector
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
snapshot: Snapshot object
|
|
271
|
+
selector: String DSL (e.g., "role=button text~'Sign in'") or dict query
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
List of matching elements, sorted by importance (descending)
|
|
275
|
+
"""
|
|
276
|
+
# Parse selector if string
|
|
277
|
+
if isinstance(selector, str):
|
|
278
|
+
query_dict = parse_selector(selector)
|
|
279
|
+
else:
|
|
280
|
+
query_dict = selector
|
|
281
|
+
|
|
282
|
+
# Filter elements
|
|
283
|
+
matches = [el for el in snapshot.elements if match_element(el, query_dict)]
|
|
284
|
+
|
|
285
|
+
# Sort by importance (descending)
|
|
286
|
+
matches.sort(key=lambda el: el.importance, reverse=True)
|
|
287
|
+
|
|
288
|
+
return matches
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def find(snapshot: Snapshot, selector: str | dict[str, Any]) -> Element | None:
|
|
292
|
+
"""
|
|
293
|
+
Find single element matching selector (best match by importance)
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
snapshot: Snapshot object
|
|
297
|
+
selector: String DSL or dict query
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Best matching element or None
|
|
301
|
+
"""
|
|
302
|
+
results = query(snapshot, selector)
|
|
303
|
+
return results[0] if results else None
|
sentience/read.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Read page content - supports raw HTML, text, and markdown formats
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from .browser import SentienceBrowser
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def read(
|
|
11
|
+
browser: SentienceBrowser,
|
|
12
|
+
output_format: Literal["raw", "text", "markdown"] = "raw",
|
|
13
|
+
enhance_markdown: bool = True,
|
|
14
|
+
) -> dict:
|
|
15
|
+
"""
|
|
16
|
+
Read page content as raw HTML, text, or markdown
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
browser: SentienceBrowser instance
|
|
20
|
+
output_format: Output format - "raw" (default, returns HTML for external processing),
|
|
21
|
+
"text" (plain text), or "markdown" (lightweight or enhanced markdown).
|
|
22
|
+
enhance_markdown: If True and output_format is "markdown", uses markdownify for better conversion.
|
|
23
|
+
If False, uses the extension's lightweight markdown converter.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
dict with:
|
|
27
|
+
- status: "success" or "error"
|
|
28
|
+
- url: Current page URL
|
|
29
|
+
- format: "raw", "text", or "markdown"
|
|
30
|
+
- content: Page content as string
|
|
31
|
+
- length: Content length in characters
|
|
32
|
+
- error: Error message if status is "error"
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
# Get raw HTML (default) - can be used with markdownify for better conversion
|
|
36
|
+
result = read(browser)
|
|
37
|
+
html_content = result["content"]
|
|
38
|
+
|
|
39
|
+
# Get high-quality markdown (uses markdownify internally)
|
|
40
|
+
result = read(browser, output_format="markdown")
|
|
41
|
+
markdown = result["content"]
|
|
42
|
+
|
|
43
|
+
# Get plain text
|
|
44
|
+
result = read(browser, output_format="text")
|
|
45
|
+
text = result["content"]
|
|
46
|
+
"""
|
|
47
|
+
if not browser.page:
|
|
48
|
+
raise RuntimeError("Browser not started. Call browser.start() first.")
|
|
49
|
+
|
|
50
|
+
if output_format == "markdown" and enhance_markdown:
|
|
51
|
+
# Get raw HTML from the extension first
|
|
52
|
+
raw_html_result = browser.page.evaluate(
|
|
53
|
+
"""
|
|
54
|
+
(options) => {
|
|
55
|
+
return window.sentience.read(options);
|
|
56
|
+
}
|
|
57
|
+
""",
|
|
58
|
+
{"format": "raw"},
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if raw_html_result.get("status") == "success":
|
|
62
|
+
html_content = raw_html_result["content"]
|
|
63
|
+
try:
|
|
64
|
+
# Use markdownify for enhanced markdown conversion
|
|
65
|
+
from markdownify import MarkdownifyError, markdownify
|
|
66
|
+
|
|
67
|
+
markdown_content = markdownify(html_content, heading_style="ATX", wrap=True)
|
|
68
|
+
return {
|
|
69
|
+
"status": "success",
|
|
70
|
+
"url": raw_html_result["url"],
|
|
71
|
+
"format": "markdown",
|
|
72
|
+
"content": markdown_content,
|
|
73
|
+
"length": len(markdown_content),
|
|
74
|
+
}
|
|
75
|
+
except ImportError:
|
|
76
|
+
print(
|
|
77
|
+
"Warning: 'markdownify' not installed. Install with 'pip install markdownify' for enhanced markdown. Falling back to extension's markdown."
|
|
78
|
+
)
|
|
79
|
+
except MarkdownifyError as e:
|
|
80
|
+
print(f"Warning: markdownify failed ({e}), falling back to extension's markdown.")
|
|
81
|
+
except Exception as e:
|
|
82
|
+
print(
|
|
83
|
+
f"Warning: An unexpected error occurred with markdownify ({e}), falling back to extension's markdown."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# If not enhanced markdown, or fallback, call extension with requested format
|
|
87
|
+
result = browser.page.evaluate(
|
|
88
|
+
"""
|
|
89
|
+
(options) => {
|
|
90
|
+
return window.sentience.read(options);
|
|
91
|
+
}
|
|
92
|
+
""",
|
|
93
|
+
{"format": output_format},
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return result
|