robotframework-aitester 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- robotframework_aitester-1.0.0/AITester/__init__.py +26 -0
- robotframework_aitester-1.0.0/AITester/_version.py +36 -0
- robotframework_aitester-1.0.0/AITester/executor.py +531 -0
- robotframework_aitester-1.0.0/AITester/genai.py +262 -0
- robotframework_aitester-1.0.0/AITester/library.py +1950 -0
- robotframework_aitester-1.0.0/AITester/orchestrator.py +626 -0
- robotframework_aitester-1.0.0/AITester/platforms.py +94 -0
- robotframework_aitester-1.0.0/AITester/prompts/__init__.py +47 -0
- robotframework_aitester-1.0.0/AITester/prompts/api_executor.py +43 -0
- robotframework_aitester-1.0.0/AITester/prompts/mobile_executor.py +80 -0
- robotframework_aitester-1.0.0/AITester/prompts/planner.py +52 -0
- robotframework_aitester-1.0.0/AITester/prompts/reporter.py +40 -0
- robotframework_aitester-1.0.0/AITester/prompts/supervisor.py +52 -0
- robotframework_aitester-1.0.0/AITester/prompts/web_executor.py +96 -0
- robotframework_aitester-1.0.0/AITester/tools/__init__.py +26 -0
- robotframework_aitester-1.0.0/AITester/tools/api_tools.py +320 -0
- robotframework_aitester-1.0.0/AITester/tools/browser_analysis_tools.py +1293 -0
- robotframework_aitester-1.0.0/AITester/tools/common_tools.py +966 -0
- robotframework_aitester-1.0.0/AITester/tools/mobile_analysis_tools.py +143 -0
- robotframework_aitester-1.0.0/AITester/tools/mobile_tools.py +1500 -0
- robotframework_aitester-1.0.0/AITester/tools/web_tools.py +1535 -0
- robotframework_aitester-1.0.0/CHANGES +11 -0
- robotframework_aitester-1.0.0/LICENSE +189 -0
- robotframework_aitester-1.0.0/MANIFEST.in +2 -0
- robotframework_aitester-1.0.0/PKG-INFO +506 -0
- robotframework_aitester-1.0.0/README.md +421 -0
- robotframework_aitester-1.0.0/pyproject.toml +3 -0
- robotframework_aitester-1.0.0/robotframework_aitester.egg-info/PKG-INFO +506 -0
- robotframework_aitester-1.0.0/robotframework_aitester.egg-info/SOURCES.txt +32 -0
- robotframework_aitester-1.0.0/robotframework_aitester.egg-info/dependency_links.txt +1 -0
- robotframework_aitester-1.0.0/robotframework_aitester.egg-info/requires.txt +36 -0
- robotframework_aitester-1.0.0/robotframework_aitester.egg-info/top_level.txt +1 -0
- robotframework_aitester-1.0.0/setup.cfg +4 -0
- robotframework_aitester-1.0.0/setup.py +109 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Apache License 2.0
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2026 Róbert Malovec
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
robotframework-aitester — Fully Autonomous AI Testing for Robot Framework
|
|
19
|
+
|
|
20
|
+
Main Robot Framework library entrypoint.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from ._version import __version__
|
|
24
|
+
from .library import AITester
|
|
25
|
+
|
|
26
|
+
__all__ = ["AITester", "__version__"]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Apache License 2.0
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2026 Róbert Malovec
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
"""Project version metadata."""
|
|
18
|
+
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
_FALLBACK_VERSION = "0.0.1-dev"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _read_latest_changes_version():
|
|
25
|
+
"""Read the newest release version from CHANGES when available."""
|
|
26
|
+
changes_path = Path(__file__).resolve().parents[1] / "CHANGES"
|
|
27
|
+
try:
|
|
28
|
+
first_line = changes_path.read_text(encoding="utf-8").splitlines()[0].strip()
|
|
29
|
+
except (FileNotFoundError, IndexError, OSError):
|
|
30
|
+
return _FALLBACK_VERSION
|
|
31
|
+
|
|
32
|
+
version = first_line.split(",", 1)[0].strip()
|
|
33
|
+
return version or _FALLBACK_VERSION
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__version__ = _read_latest_changes_version()
|
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
# Apache License 2.0
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2026 Róbert Malovec
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Test execution engine and state management for robotframework-aitester.
|
|
19
|
+
|
|
20
|
+
Manages the lifecycle of an agentic test session, including state tracking,
|
|
21
|
+
evidence collection, error handling, and iteration management.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
import time
|
|
26
|
+
import uuid
|
|
27
|
+
from dataclasses import dataclass, field
|
|
28
|
+
from enum import Enum
|
|
29
|
+
from typing import List, Optional, Dict, Any
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# Active session context (shared across tools)
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
_ACTIVE_SESSION: Optional["TestSession"] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def set_active_session(session: Optional["TestSession"]) -> None:
|
|
41
|
+
"""Set the active session for tool-level step recording."""
|
|
42
|
+
global _ACTIVE_SESSION
|
|
43
|
+
_ACTIVE_SESSION = session
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_active_session() -> Optional["TestSession"]:
|
|
47
|
+
"""Get the current active session for tool-level step recording."""
|
|
48
|
+
return _ACTIVE_SESSION
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class StepStatus(Enum):
|
|
52
|
+
"""Status of an individual test step."""
|
|
53
|
+
PASSED = "passed"
|
|
54
|
+
FAILED = "failed"
|
|
55
|
+
SKIPPED = "skipped"
|
|
56
|
+
ERROR = "error"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class SessionStatus(Enum):
|
|
60
|
+
"""Status of a test session."""
|
|
61
|
+
RUNNING = "running"
|
|
62
|
+
COMPLETED = "completed"
|
|
63
|
+
FAILED = "failed"
|
|
64
|
+
ABORTED = "aborted"
|
|
65
|
+
TIMEOUT = "timeout"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class TestStep:
|
|
70
|
+
"""Represents a single test step executed by an agent.
|
|
71
|
+
|
|
72
|
+
Attributes:
|
|
73
|
+
step_number: Sequential step number within the session.
|
|
74
|
+
action: The tool/action invoked (e.g., 'selenium_click_element').
|
|
75
|
+
description: Human-readable description of what the step does.
|
|
76
|
+
status: Result status (passed, failed, skipped, error).
|
|
77
|
+
duration_ms: Execution time in milliseconds.
|
|
78
|
+
screenshot_path: Path to evidence screenshot (if captured).
|
|
79
|
+
assertion_message: Assertion details for pass/fail.
|
|
80
|
+
error_message: Error details if step failed with an exception.
|
|
81
|
+
metadata: Additional key-value metadata for the step.
|
|
82
|
+
"""
|
|
83
|
+
__test__ = False
|
|
84
|
+
step_number: int
|
|
85
|
+
action: str
|
|
86
|
+
description: str
|
|
87
|
+
status: StepStatus
|
|
88
|
+
duration_ms: float
|
|
89
|
+
screenshot_path: Optional[str] = None
|
|
90
|
+
assertion_message: Optional[str] = None
|
|
91
|
+
error_message: Optional[str] = None
|
|
92
|
+
high_level_step_number: Optional[int] = None
|
|
93
|
+
high_level_step_description: Optional[str] = None
|
|
94
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class TestScenario:
|
|
99
|
+
"""Represents a planned test scenario with its execution steps.
|
|
100
|
+
|
|
101
|
+
Attributes:
|
|
102
|
+
scenario_id: Unique identifier for the scenario.
|
|
103
|
+
name: Scenario name.
|
|
104
|
+
description: What the scenario tests.
|
|
105
|
+
priority: Priority level (critical, high, medium, low).
|
|
106
|
+
preconditions: Required preconditions.
|
|
107
|
+
steps: Executed test steps.
|
|
108
|
+
status: Overall scenario status.
|
|
109
|
+
"""
|
|
110
|
+
__test__ = False
|
|
111
|
+
scenario_id: str
|
|
112
|
+
name: str
|
|
113
|
+
description: str
|
|
114
|
+
priority: str = "medium"
|
|
115
|
+
preconditions: str = ""
|
|
116
|
+
steps: List[TestStep] = field(default_factory=list)
|
|
117
|
+
status: str = "pending"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class TestSession:
|
|
122
|
+
"""Encapsulates the full state of an agentic test session.
|
|
123
|
+
|
|
124
|
+
Attributes:
|
|
125
|
+
session_id: Unique session identifier.
|
|
126
|
+
objective: The test objective provided by the user.
|
|
127
|
+
app_context: Application context description.
|
|
128
|
+
test_mode: Testing mode (web, api, mobile).
|
|
129
|
+
start_time: Session start timestamp.
|
|
130
|
+
end_time: Session end timestamp (set on completion).
|
|
131
|
+
scenarios: Planned test scenarios.
|
|
132
|
+
steps: All executed steps across scenarios.
|
|
133
|
+
iterations_used: Number of agent iterations consumed.
|
|
134
|
+
max_iterations: Maximum allowed iterations.
|
|
135
|
+
status: Current session status.
|
|
136
|
+
cost_usd: Approximate accumulated cost in USD.
|
|
137
|
+
screenshots: Paths to all captured screenshots.
|
|
138
|
+
errors: List of error messages encountered.
|
|
139
|
+
agent_log: Raw agent conversation/action log entries.
|
|
140
|
+
high_level_steps: User-defined high-level test steps (optional).
|
|
141
|
+
current_high_level_step: Current high-level step number, if any.
|
|
142
|
+
current_high_level_step_description: Current high-level step text, if any.
|
|
143
|
+
reuse_existing_session: Whether to reuse an existing browser/app session.
|
|
144
|
+
start_state_summary: Start-state summary captured at session start.
|
|
145
|
+
scroll_into_view: Scroll UI elements into view before interacting.
|
|
146
|
+
direct_url_navigations_used: Count of direct browser URL navigations used
|
|
147
|
+
to enter the application.
|
|
148
|
+
allowed_direct_urls: Concrete URLs explicitly requested by the user.
|
|
149
|
+
allow_browser_termination: Whether the user explicitly allowed closing,
|
|
150
|
+
resetting, or restarting the current browser/app session.
|
|
151
|
+
ui_interactions_total: Count of UI interaction tool calls.
|
|
152
|
+
ui_state_checks_total: Count of UI state validation tool calls.
|
|
153
|
+
ui_interactions_by_step: UI interaction counts per high-level step.
|
|
154
|
+
ui_state_checks_by_step: UI state validation counts per high-level step.
|
|
155
|
+
"""
|
|
156
|
+
__test__ = False
|
|
157
|
+
session_id: str
|
|
158
|
+
objective: str
|
|
159
|
+
app_context: str
|
|
160
|
+
test_mode: str = "web"
|
|
161
|
+
start_time: float = field(default_factory=time.time)
|
|
162
|
+
end_time: Optional[float] = None
|
|
163
|
+
scenarios: List[TestScenario] = field(default_factory=list)
|
|
164
|
+
steps: List[TestStep] = field(default_factory=list)
|
|
165
|
+
iterations_used: int = 0
|
|
166
|
+
max_iterations: int = 50
|
|
167
|
+
status: SessionStatus = SessionStatus.RUNNING
|
|
168
|
+
cost_usd: float = 0.0
|
|
169
|
+
screenshots: List[str] = field(default_factory=list)
|
|
170
|
+
errors: List[str] = field(default_factory=list)
|
|
171
|
+
agent_log: List[Dict[str, Any]] = field(default_factory=list)
|
|
172
|
+
high_level_steps: List[str] = field(default_factory=list)
|
|
173
|
+
current_high_level_step: Optional[int] = None
|
|
174
|
+
current_high_level_step_description: Optional[str] = None
|
|
175
|
+
reuse_existing_session: bool = False
|
|
176
|
+
start_state_summary: Optional[str] = None
|
|
177
|
+
scroll_into_view: bool = True
|
|
178
|
+
direct_url_navigations_used: int = 0
|
|
179
|
+
allowed_direct_urls: List[str] = field(default_factory=list)
|
|
180
|
+
allow_browser_termination: bool = False
|
|
181
|
+
ui_interactions_total: int = 0
|
|
182
|
+
ui_state_checks_total: int = 0
|
|
183
|
+
ui_interactions_by_step: Dict[int, int] = field(default_factory=dict)
|
|
184
|
+
ui_state_checks_by_step: Dict[int, int] = field(default_factory=dict)
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def duration_seconds(self) -> float:
|
|
188
|
+
"""Calculate session duration in seconds."""
|
|
189
|
+
end = self.end_time or time.time()
|
|
190
|
+
return end - self.start_time
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def total_steps(self) -> int:
|
|
194
|
+
"""Total number of executed steps."""
|
|
195
|
+
return len(self.steps)
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def passed_steps(self) -> int:
|
|
199
|
+
"""Count of passed steps."""
|
|
200
|
+
return sum(1 for s in self.steps if s.status == StepStatus.PASSED)
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def failed_steps(self) -> int:
|
|
204
|
+
"""Count of failed steps."""
|
|
205
|
+
return sum(1 for s in self.steps if s.status == StepStatus.FAILED)
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def pass_rate(self) -> float:
|
|
209
|
+
"""Calculate pass rate as a percentage."""
|
|
210
|
+
if self.total_steps == 0:
|
|
211
|
+
return 0.0
|
|
212
|
+
return (self.passed_steps / self.total_steps) * 100.0
|
|
213
|
+
|
|
214
|
+
def add_step(self, step: TestStep):
|
|
215
|
+
"""Add a test step to the session.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
step: TestStep to record.
|
|
219
|
+
"""
|
|
220
|
+
self.steps.append(step)
|
|
221
|
+
if step.screenshot_path:
|
|
222
|
+
self.screenshots.append(step.screenshot_path)
|
|
223
|
+
if step.error_message:
|
|
224
|
+
self.errors.append(step.error_message)
|
|
225
|
+
|
|
226
|
+
def finalize(self, status: SessionStatus = None):
|
|
227
|
+
"""Finalize the session, setting end time and final status.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
status: Override status. If None, auto-determined from steps.
|
|
231
|
+
"""
|
|
232
|
+
self.end_time = time.time()
|
|
233
|
+
if status:
|
|
234
|
+
self.status = status
|
|
235
|
+
elif self.failed_steps > 0:
|
|
236
|
+
self.status = SessionStatus.FAILED
|
|
237
|
+
else:
|
|
238
|
+
self.status = SessionStatus.COMPLETED
|
|
239
|
+
|
|
240
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
241
|
+
"""Serialize session to a dictionary for reporting.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Dictionary representation of the session.
|
|
245
|
+
"""
|
|
246
|
+
return {
|
|
247
|
+
"session_id": self.session_id,
|
|
248
|
+
"objective": self.objective,
|
|
249
|
+
"app_context": self.app_context,
|
|
250
|
+
"test_mode": self.test_mode,
|
|
251
|
+
"status": self.status.value,
|
|
252
|
+
"duration_seconds": round(self.duration_seconds, 2),
|
|
253
|
+
"iterations_used": self.iterations_used,
|
|
254
|
+
"max_iterations": self.max_iterations,
|
|
255
|
+
"total_steps": self.total_steps,
|
|
256
|
+
"passed_steps": self.passed_steps,
|
|
257
|
+
"failed_steps": self.failed_steps,
|
|
258
|
+
"pass_rate": round(self.pass_rate, 1),
|
|
259
|
+
"cost_usd": round(self.cost_usd, 4),
|
|
260
|
+
"screenshots": self.screenshots,
|
|
261
|
+
"errors": self.errors,
|
|
262
|
+
"high_level_steps": self.high_level_steps,
|
|
263
|
+
"direct_url_navigations_used": self.direct_url_navigations_used,
|
|
264
|
+
"allowed_direct_urls": self.allowed_direct_urls,
|
|
265
|
+
"allow_browser_termination": self.allow_browser_termination,
|
|
266
|
+
"scenarios": [
|
|
267
|
+
{
|
|
268
|
+
"scenario_id": s.scenario_id,
|
|
269
|
+
"name": s.name,
|
|
270
|
+
"description": s.description,
|
|
271
|
+
"priority": s.priority,
|
|
272
|
+
"status": s.status,
|
|
273
|
+
"steps_count": len(s.steps),
|
|
274
|
+
}
|
|
275
|
+
for s in self.scenarios
|
|
276
|
+
],
|
|
277
|
+
"steps": [
|
|
278
|
+
{
|
|
279
|
+
"step_number": s.step_number,
|
|
280
|
+
"action": s.action,
|
|
281
|
+
"description": s.description,
|
|
282
|
+
"status": s.status.value,
|
|
283
|
+
"duration_ms": round(s.duration_ms, 2),
|
|
284
|
+
"screenshot_path": s.screenshot_path,
|
|
285
|
+
"assertion_message": s.assertion_message,
|
|
286
|
+
"error_message": s.error_message,
|
|
287
|
+
"high_level_step_number": s.high_level_step_number,
|
|
288
|
+
"high_level_step_description": s.high_level_step_description,
|
|
289
|
+
}
|
|
290
|
+
for s in self.steps
|
|
291
|
+
],
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class SafetyGuard:
|
|
296
|
+
"""Enforces safety mechanisms for autonomous test execution.
|
|
297
|
+
|
|
298
|
+
Implements multiple guard rails:
|
|
299
|
+
- Iteration limit: Hard stop after max_iterations.
|
|
300
|
+
- Timeout: Session-level timeout enforcement.
|
|
301
|
+
- Cost tracking: Approximate token cost accumulation.
|
|
302
|
+
- Error recovery: Retry budget per action.
|
|
303
|
+
- Action whitelist/blacklist: Configurable tool restrictions.
|
|
304
|
+
"""
|
|
305
|
+
|
|
306
|
+
DEFAULT_MAX_RETRIES = 3
|
|
307
|
+
DEFAULT_TIMEOUT_SECONDS = 600 # 10 minutes per session
|
|
308
|
+
|
|
309
|
+
def __init__(
|
|
310
|
+
self,
|
|
311
|
+
max_iterations: int = 50,
|
|
312
|
+
timeout_seconds: float = None,
|
|
313
|
+
max_retries_per_action: int = None,
|
|
314
|
+
action_whitelist: List[str] = None,
|
|
315
|
+
action_blacklist: List[str] = None,
|
|
316
|
+
max_cost_usd: float = None,
|
|
317
|
+
):
|
|
318
|
+
"""Initialize SafetyGuard.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
max_iterations: Maximum allowed agent iterations.
|
|
322
|
+
timeout_seconds: Session timeout in seconds.
|
|
323
|
+
max_retries_per_action: Max retries per failed action.
|
|
324
|
+
action_whitelist: If set, only these tools are allowed.
|
|
325
|
+
action_blacklist: If set, these tools are blocked.
|
|
326
|
+
max_cost_usd: Maximum session cost in USD.
|
|
327
|
+
"""
|
|
328
|
+
self.max_iterations = max_iterations
|
|
329
|
+
self.timeout_seconds = timeout_seconds or self.DEFAULT_TIMEOUT_SECONDS
|
|
330
|
+
self.max_retries_per_action = max_retries_per_action or self.DEFAULT_MAX_RETRIES
|
|
331
|
+
self.action_whitelist = set(action_whitelist) if action_whitelist else None
|
|
332
|
+
self.action_blacklist = set(action_blacklist) if action_blacklist else set()
|
|
333
|
+
self.max_cost_usd = max_cost_usd
|
|
334
|
+
self._retry_counts: Dict[str, int] = {}
|
|
335
|
+
|
|
336
|
+
def check_iteration_limit(self, session: TestSession) -> bool:
|
|
337
|
+
"""Check if iteration limit has been reached.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
session: Current test session.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
True if within limits, False if exceeded.
|
|
344
|
+
"""
|
|
345
|
+
if session.iterations_used >= session.max_iterations:
|
|
346
|
+
logger.warning(
|
|
347
|
+
"Iteration limit reached: %d/%d",
|
|
348
|
+
session.iterations_used,
|
|
349
|
+
session.max_iterations,
|
|
350
|
+
)
|
|
351
|
+
return False
|
|
352
|
+
return True
|
|
353
|
+
|
|
354
|
+
def check_timeout(self, session: TestSession) -> bool:
|
|
355
|
+
"""Check if session has timed out.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
session: Current test session.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
True if within timeout, False if exceeded.
|
|
362
|
+
"""
|
|
363
|
+
elapsed = time.time() - session.start_time
|
|
364
|
+
if elapsed >= self.timeout_seconds:
|
|
365
|
+
logger.warning(
|
|
366
|
+
"Session timeout: %.1f seconds elapsed (limit: %.1f)",
|
|
367
|
+
elapsed,
|
|
368
|
+
self.timeout_seconds,
|
|
369
|
+
)
|
|
370
|
+
return False
|
|
371
|
+
return True
|
|
372
|
+
|
|
373
|
+
def check_cost_limit(self, session: TestSession) -> bool:
|
|
374
|
+
"""Check if cost limit has been reached.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
session: Current test session.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
True if within cost limit, False if exceeded.
|
|
381
|
+
"""
|
|
382
|
+
if self.max_cost_usd and session.cost_usd >= self.max_cost_usd:
|
|
383
|
+
logger.warning(
|
|
384
|
+
"Cost limit reached: $%.4f (limit: $%.4f)",
|
|
385
|
+
session.cost_usd,
|
|
386
|
+
self.max_cost_usd,
|
|
387
|
+
)
|
|
388
|
+
return False
|
|
389
|
+
return True
|
|
390
|
+
|
|
391
|
+
def is_action_allowed(self, action_name: str) -> bool:
|
|
392
|
+
"""Check if an action/tool is allowed.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
action_name: Name of the tool to check.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
True if allowed, False if blocked.
|
|
399
|
+
"""
|
|
400
|
+
if action_name in self.action_blacklist:
|
|
401
|
+
logger.warning("Action blocked by blacklist: %s", action_name)
|
|
402
|
+
return False
|
|
403
|
+
if self.action_whitelist and action_name not in self.action_whitelist:
|
|
404
|
+
logger.warning("Action not in whitelist: %s", action_name)
|
|
405
|
+
return False
|
|
406
|
+
return True
|
|
407
|
+
|
|
408
|
+
def can_retry(self, action_name: str) -> bool:
|
|
409
|
+
"""Check if an action can be retried.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
action_name: Name of the tool to check.
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
True if retries remaining, False if exhausted.
|
|
416
|
+
"""
|
|
417
|
+
count = self._retry_counts.get(action_name, 0)
|
|
418
|
+
return count < self.max_retries_per_action
|
|
419
|
+
|
|
420
|
+
def record_retry(self, action_name: str):
|
|
421
|
+
"""Record a retry attempt for an action.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
action_name: Name of the tool being retried.
|
|
425
|
+
"""
|
|
426
|
+
self._retry_counts[action_name] = self._retry_counts.get(action_name, 0) + 1
|
|
427
|
+
|
|
428
|
+
def validate_session(self, session: TestSession) -> tuple:
|
|
429
|
+
"""Run all safety checks on the session.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
session: Current test session.
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
Tuple of (is_safe, reason_if_not_safe).
|
|
436
|
+
"""
|
|
437
|
+
if not self.check_iteration_limit(session):
|
|
438
|
+
return False, "iteration_limit_exceeded"
|
|
439
|
+
if not self.check_timeout(session):
|
|
440
|
+
return False, "session_timeout"
|
|
441
|
+
if not self.check_cost_limit(session):
|
|
442
|
+
return False, "cost_limit_exceeded"
|
|
443
|
+
return True, None
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def create_session(
|
|
447
|
+
objective: str,
|
|
448
|
+
app_context: str,
|
|
449
|
+
test_mode: str = "web",
|
|
450
|
+
max_iterations: int = 50,
|
|
451
|
+
high_level_steps: Optional[List[str]] = None,
|
|
452
|
+
reuse_existing_session: bool = False,
|
|
453
|
+
start_state_summary: Optional[str] = None,
|
|
454
|
+
scroll_into_view: bool = True,
|
|
455
|
+
allowed_direct_urls: Optional[List[str]] = None,
|
|
456
|
+
allow_browser_termination: bool = False,
|
|
457
|
+
) -> TestSession:
|
|
458
|
+
"""Factory function to create a new test session.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
objective: The test objective.
|
|
462
|
+
app_context: Application context description.
|
|
463
|
+
test_mode: Testing mode (web, api, mobile).
|
|
464
|
+
max_iterations: Maximum agent iterations.
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
Initialized TestSession instance.
|
|
468
|
+
"""
|
|
469
|
+
session_id = str(uuid.uuid4())[:8]
|
|
470
|
+
logger.info(
|
|
471
|
+
"Creating test session %s: mode=%s, max_iterations=%d",
|
|
472
|
+
session_id,
|
|
473
|
+
test_mode,
|
|
474
|
+
max_iterations,
|
|
475
|
+
)
|
|
476
|
+
return TestSession(
|
|
477
|
+
session_id=session_id,
|
|
478
|
+
objective=objective,
|
|
479
|
+
app_context=app_context,
|
|
480
|
+
test_mode=test_mode,
|
|
481
|
+
max_iterations=max_iterations,
|
|
482
|
+
high_level_steps=high_level_steps or [],
|
|
483
|
+
reuse_existing_session=reuse_existing_session,
|
|
484
|
+
start_state_summary=start_state_summary,
|
|
485
|
+
scroll_into_view=scroll_into_view,
|
|
486
|
+
allowed_direct_urls=allowed_direct_urls or [],
|
|
487
|
+
allow_browser_termination=allow_browser_termination,
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def record_step(
|
|
492
|
+
session: TestSession,
|
|
493
|
+
action: str,
|
|
494
|
+
description: str,
|
|
495
|
+
status: StepStatus,
|
|
496
|
+
duration_ms: float,
|
|
497
|
+
screenshot_path: str = None,
|
|
498
|
+
assertion_message: str = None,
|
|
499
|
+
error_message: str = None,
|
|
500
|
+
) -> TestStep:
|
|
501
|
+
"""Record a test step in the session.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
session: Current test session.
|
|
505
|
+
action: Tool/action name.
|
|
506
|
+
description: What the step does.
|
|
507
|
+
status: Step result.
|
|
508
|
+
duration_ms: Execution time.
|
|
509
|
+
screenshot_path: Optional evidence screenshot.
|
|
510
|
+
assertion_message: Assertion detail.
|
|
511
|
+
error_message: Error detail.
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
The recorded TestStep.
|
|
515
|
+
"""
|
|
516
|
+
high_level_number = session.current_high_level_step
|
|
517
|
+
high_level_description = session.current_high_level_step_description
|
|
518
|
+
step = TestStep(
|
|
519
|
+
step_number=len(session.steps) + 1,
|
|
520
|
+
action=action,
|
|
521
|
+
description=description,
|
|
522
|
+
status=status,
|
|
523
|
+
duration_ms=duration_ms,
|
|
524
|
+
screenshot_path=screenshot_path,
|
|
525
|
+
assertion_message=assertion_message,
|
|
526
|
+
error_message=error_message,
|
|
527
|
+
high_level_step_number=high_level_number,
|
|
528
|
+
high_level_step_description=high_level_description,
|
|
529
|
+
)
|
|
530
|
+
session.add_step(step)
|
|
531
|
+
return step
|