sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show
  1. sentience/__init__.py +253 -0
  2. sentience/_extension_loader.py +195 -0
  3. sentience/action_executor.py +215 -0
  4. sentience/actions.py +1020 -0
  5. sentience/agent.py +1181 -0
  6. sentience/agent_config.py +46 -0
  7. sentience/agent_runtime.py +424 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +108 -0
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +343 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +427 -0
  21. sentience/base_agent.py +196 -0
  22. sentience/browser.py +1215 -0
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cli.py +130 -0
  26. sentience/cloud_tracing.py +807 -0
  27. sentience/constants.py +6 -0
  28. sentience/conversational_agent.py +543 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +188 -0
  31. sentience/extension/background.js +104 -0
  32. sentience/extension/content.js +161 -0
  33. sentience/extension/injected_api.js +914 -0
  34. sentience/extension/manifest.json +36 -0
  35. sentience/extension/pkg/sentience_core.d.ts +51 -0
  36. sentience/extension/pkg/sentience_core.js +323 -0
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  39. sentience/extension/release.json +115 -0
  40. sentience/formatting.py +15 -0
  41. sentience/generator.py +202 -0
  42. sentience/inspector.py +367 -0
  43. sentience/llm_interaction_handler.py +191 -0
  44. sentience/llm_provider.py +875 -0
  45. sentience/llm_provider_utils.py +120 -0
  46. sentience/llm_response_builder.py +153 -0
  47. sentience/models.py +846 -0
  48. sentience/ordinal.py +280 -0
  49. sentience/overlay.py +222 -0
  50. sentience/protocols.py +228 -0
  51. sentience/query.py +303 -0
  52. sentience/read.py +188 -0
  53. sentience/recorder.py +589 -0
  54. sentience/schemas/trace_v1.json +335 -0
  55. sentience/screenshot.py +100 -0
  56. sentience/sentience_methods.py +86 -0
  57. sentience/snapshot.py +706 -0
  58. sentience/snapshot_diff.py +126 -0
  59. sentience/text_search.py +262 -0
  60. sentience/trace_event_builder.py +148 -0
  61. sentience/trace_file_manager.py +197 -0
  62. sentience/trace_indexing/__init__.py +27 -0
  63. sentience/trace_indexing/index_schema.py +199 -0
  64. sentience/trace_indexing/indexer.py +414 -0
  65. sentience/tracer_factory.py +322 -0
  66. sentience/tracing.py +449 -0
  67. sentience/utils/__init__.py +40 -0
  68. sentience/utils/browser.py +46 -0
  69. sentience/utils/element.py +257 -0
  70. sentience/utils/formatting.py +59 -0
  71. sentience/utils.py +296 -0
  72. sentience/verification.py +380 -0
  73. sentience/visual_agent.py +2058 -0
  74. sentience/wait.py +139 -0
  75. sentienceapi-0.95.0.dist-info/METADATA +984 -0
  76. sentienceapi-0.95.0.dist-info/RECORD +82 -0
  77. sentienceapi-0.95.0.dist-info/WHEEL +5 -0
  78. sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
  79. sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
  80. sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
  81. sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
  82. sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,191 @@
1
+ """
2
+ LLM Interaction Handler for Sentience Agent.
3
+
4
+ Handles all LLM-related operations: context building, querying, and response parsing.
5
+ This separates LLM interaction concerns from action execution.
6
+ """
7
+
8
+ import re
9
+
10
+ from .llm_provider import LLMProvider, LLMResponse
11
+ from .models import Snapshot
12
+
13
+
14
+ class LLMInteractionHandler:
15
+ """
16
+ Handles LLM queries and response parsing for Sentience Agent.
17
+
18
+ This class encapsulates all LLM interaction logic, making it easier to:
19
+ - Test LLM interactions independently
20
+ - Swap LLM providers without changing agent code
21
+ - Modify prompt templates in one place
22
+ """
23
+
24
+ def __init__(self, llm: LLMProvider):
25
+ """
26
+ Initialize LLM interaction handler.
27
+
28
+ Args:
29
+ llm: LLM provider instance (OpenAIProvider, AnthropicProvider, etc.)
30
+ """
31
+ self.llm = llm
32
+
33
+ def build_context(self, snap: Snapshot, goal: str | None = None) -> str:
34
+ """
35
+ Convert snapshot elements to token-efficient prompt string.
36
+
37
+ Format: [ID] <role> "text" {cues} @ position size:WxH importance:score [status]
38
+
39
+ Args:
40
+ snap: Snapshot object
41
+ goal: Optional user goal (for context, currently unused but kept for API consistency)
42
+
43
+ Returns:
44
+ Formatted element context string
45
+ """
46
+ lines = []
47
+ for el in snap.elements:
48
+ # Skip REMOVED elements - they're not actionable and shouldn't be in LLM context
49
+ if el.diff_status == "REMOVED":
50
+ continue
51
+ # Extract visual cues
52
+ cues: list[str] = []
53
+ if el.visual_cues.is_primary:
54
+ cues.append("PRIMARY")
55
+ if el.visual_cues.is_clickable:
56
+ cues.append("CLICKABLE")
57
+ if el.visual_cues.background_color_name:
58
+ cues.append(f"color:{el.visual_cues.background_color_name}")
59
+
60
+ # Format element line with improved readability
61
+ # Ensure cues is defined before using it in f-string
62
+ cues_str = f" {{{','.join(cues)}}}" if cues else ""
63
+
64
+ # Better text handling - show truncation indicator
65
+ text_preview = ""
66
+ if el.text:
67
+ if len(el.text) > 50:
68
+ text_preview = f'"{el.text[:50]}..."'
69
+ else:
70
+ text_preview = f'"{el.text}"'
71
+
72
+ # Build position and size info
73
+ x, y = int(el.bbox.x), int(el.bbox.y)
74
+ width, height = int(el.bbox.width), int(el.bbox.height)
75
+ position_str = f"@ ({x},{y})"
76
+ size_str = f"size:{width}x{height}"
77
+
78
+ # Build status indicators (only include if relevant)
79
+ status_parts = []
80
+ if not el.in_viewport:
81
+ status_parts.append("not_in_viewport")
82
+ if el.is_occluded:
83
+ status_parts.append("occluded")
84
+ if el.diff_status:
85
+ status_parts.append(f"diff:{el.diff_status}")
86
+ status_str = f" [{','.join(status_parts)}]" if status_parts else ""
87
+
88
+ # Format: [ID] <role> "text" {cues} @ (x,y) size:WxH importance:score [status]
89
+ lines.append(
90
+ f"[{el.id}] <{el.role}> {text_preview}{cues_str} "
91
+ f"{position_str} {size_str} importance:{el.importance}{status_str}"
92
+ )
93
+
94
+ return "\n".join(lines)
95
+
96
+ def query_llm(self, dom_context: str, goal: str) -> LLMResponse:
97
+ """
98
+ Query LLM with standardized prompt template.
99
+
100
+ Args:
101
+ dom_context: Formatted element context from build_context()
102
+ goal: User goal
103
+
104
+ Returns:
105
+ LLMResponse from LLM provider
106
+ """
107
+ system_prompt = f"""You are an AI web automation agent.
108
+
109
+ GOAL: {goal}
110
+
111
+ VISIBLE ELEMENTS (sorted by importance):
112
+ {dom_context}
113
+
114
+ VISUAL CUES EXPLAINED:
115
+ After the text, you may see visual cues in curly braces like {{CLICKABLE}} or {{PRIMARY,CLICKABLE,color:white}}:
116
+ - PRIMARY: Main call-to-action element on the page
117
+ - CLICKABLE: Element is clickable/interactive
118
+ - color:X: Background color name (e.g., color:white, color:blue)
119
+ Multiple cues are comma-separated inside the braces: {{CLICKABLE,color:white}}
120
+
121
+ ELEMENT FORMAT EXPLAINED:
122
+ Each element line follows this format:
123
+ [ID] <role> "text" {{cues}} @ (x,y) size:WxH importance:score [status]
124
+
125
+ Example: [346] <button> "Computer Accessories" {{CLICKABLE,color:white}} @ (664,100) size:150x40 importance:811
126
+
127
+ Breaking down each part:
128
+ - [ID]: The number in brackets is the element ID - use this EXACT number in CLICK/TYPE commands
129
+ Example: If you see [346], use CLICK(346) or TYPE(346, "text")
130
+ - <role>: Element type (button, link, textbox, etc.)
131
+ - "text": Visible text content (truncated with "..." if long)
132
+ - {{cues}}: Optional visual cues in curly braces (e.g., {{CLICKABLE}}, {{PRIMARY,CLICKABLE}}, {{CLICKABLE,color:white}})
133
+ If no cues, this part is omitted entirely
134
+ - @ (x,y): Element position in pixels from top-left corner
135
+ - size:WxH: Element dimensions (width x height in pixels)
136
+ - importance: Score indicating element relevance (higher = more important)
137
+ - [status]: Optional status flags in brackets (not_in_viewport, occluded, diff:ADDED/MODIFIED/etc)
138
+
139
+ CRITICAL RESPONSE FORMAT:
140
+ You MUST respond with ONLY ONE of these exact action formats:
141
+ - CLICK(id) - Click element by ID (use the number from [ID] brackets)
142
+ - TYPE(id, "text") - Type text into element (use the number from [ID] brackets)
143
+ - PRESS("key") - Press keyboard key (Enter, Escape, Tab, ArrowDown, etc)
144
+ - FINISH() - Task complete
145
+
146
+ DO NOT include any explanation, reasoning, or natural language.
147
+ DO NOT use markdown formatting or code blocks.
148
+ DO NOT say "The next step is..." or anything similar.
149
+
150
+ CORRECT Examples (matching element IDs from the list above):
151
+ If element is [346] <button> "Click me" → respond: CLICK(346)
152
+ If element is [15] <textbox> "Search" → respond: TYPE(15, "magic mouse")
153
+ PRESS("Enter")
154
+ FINISH()
155
+
156
+ INCORRECT Examples (DO NOT DO THIS):
157
+ "The next step is to click..."
158
+ "I will type..."
159
+ ```CLICK(42)```
160
+ """
161
+
162
+ user_prompt = "Return the single action command:"
163
+
164
+ return self.llm.generate(system_prompt, user_prompt, temperature=0.0)
165
+
166
+ def extract_action(self, response: str) -> str:
167
+ """
168
+ Extract action command from LLM response.
169
+
170
+ Handles cases where the LLM adds extra explanation despite instructions.
171
+
172
+ Args:
173
+ response: Raw LLM response text
174
+
175
+ Returns:
176
+ Cleaned action command string (e.g., "CLICK(42)", "TYPE(15, \"text\")")
177
+ """
178
+ # Remove markdown code blocks if present
179
+ response = re.sub(r"```[\w]*\n?", "", response)
180
+ response = response.strip()
181
+
182
+ # Try to find action patterns in the response
183
+ # Pattern matches: CLICK(123), TYPE(123, "text"), PRESS("key"), FINISH()
184
+ action_pattern = r'(CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'
185
+
186
+ match = re.search(action_pattern, response, re.IGNORECASE)
187
+ if match:
188
+ return match.group(1)
189
+
190
+ # If no pattern match, return the original response (will likely fail parsing)
191
+ return response