aitester-bdd 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aitester_bdd-0.2.0/.github/workflows/ci.yml +68 -0
- aitester_bdd-0.2.0/.github/workflows/publish-pypi.yml +44 -0
- aitester_bdd-0.2.0/.gitignore +66 -0
- aitester_bdd-0.2.0/CHANGELOG.md +49 -0
- aitester_bdd-0.2.0/LICENSE +21 -0
- aitester_bdd-0.2.0/PKG-INFO +124 -0
- aitester_bdd-0.2.0/README.md +82 -0
- aitester_bdd-0.2.0/dev/architecture.md +99 -0
- aitester_bdd-0.2.0/dev/ideas.md +39 -0
- aitester_bdd-0.2.0/docs/changelog.md +3 -0
- aitester_bdd-0.2.0/docs/getting-started/installation.md +84 -0
- aitester_bdd-0.2.0/docs/getting-started/quickstart.md +74 -0
- aitester_bdd-0.2.0/docs/guide/actions.md +145 -0
- aitester_bdd-0.2.0/docs/guide/configuration.md +93 -0
- aitester_bdd-0.2.0/docs/guide/rule-composition.md +155 -0
- aitester_bdd-0.2.0/docs/guide/running-tests.md +147 -0
- aitester_bdd-0.2.0/docs/guide/state-checks.md +138 -0
- aitester_bdd-0.2.0/docs/guide/writing-suites.md +138 -0
- aitester_bdd-0.2.0/docs/index.md +34 -0
- aitester_bdd-0.2.0/docs/internals/architecture.md +81 -0
- aitester_bdd-0.2.0/docs/internals/aspects.md +131 -0
- aitester_bdd-0.2.0/docs/internals/authoring.md +111 -0
- aitester_bdd-0.2.0/docs/internals/backends.md +132 -0
- aitester_bdd-0.2.0/docs/internals/plan-then-execute.md +122 -0
- aitester_bdd-0.2.0/docs/internals/walker.md +145 -0
- aitester_bdd-0.2.0/docs/reference/cli.md +94 -0
- aitester_bdd-0.2.0/docs/reference/env-vars.md +43 -0
- aitester_bdd-0.2.0/docs/reference/keywords.md +181 -0
- aitester_bdd-0.2.0/examples/explore_fluid/explore_settings.robot +25 -0
- aitester_bdd-0.2.0/examples/login_flow/README.md +54 -0
- aitester_bdd-0.2.0/examples/login_flow/login_flow.robot +47 -0
- aitester_bdd-0.2.0/examples/quickstart/README.md +40 -0
- aitester_bdd-0.2.0/examples/quickstart/story.txt +1 -0
- aitester_bdd-0.2.0/examples/quickstart/wiki_smoke.robot +35 -0
- aitester_bdd-0.2.0/examples/shell_assertions/api_health.robot +26 -0
- aitester_bdd-0.2.0/mkdocs.yml +66 -0
- aitester_bdd-0.2.0/pyproject.toml +106 -0
- aitester_bdd-0.2.0/src/aitester_bdd/AITester.py +2012 -0
- aitester_bdd-0.2.0/src/aitester_bdd/__init__.py +15 -0
- aitester_bdd-0.2.0/src/aitester_bdd/authoring/__init__.py +8 -0
- aitester_bdd-0.2.0/src/aitester_bdd/authoring/agent_loop.py +638 -0
- aitester_bdd-0.2.0/src/aitester_bdd/authoring/tools.py +203 -0
- aitester_bdd-0.2.0/src/aitester_bdd/cli.py +319 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/README.md +85 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/__init__.py +11 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/agent_browser_backend.py +508 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/aspects.py +239 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/browser.py +643 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/context.py +46 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/emit.py +282 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/nodriver_backend.py +605 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/verdict.py +75 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/walk.py +1808 -0
- aitester_bdd-0.2.0/src/aitester_bdd/engine/walk_log.py +335 -0
- aitester_bdd-0.2.0/src/aitester_bdd/llm/__init__.py +7 -0
- aitester_bdd-0.2.0/src/aitester_bdd/llm/aiagent_adapter.py +279 -0
- aitester_bdd-0.2.0/src/aitester_bdd/llm/base.py +64 -0
- aitester_bdd-0.2.0/src/aitester_bdd/py.typed +0 -0
- aitester_bdd-0.2.0/src/aitester_bdd/skill/SKILL.md +1069 -0
- aitester_bdd-0.2.0/tests/__init__.py +0 -0
- aitester_bdd-0.2.0/tests/unit/__init__.py +0 -0
- aitester_bdd-0.2.0/tests/unit/test_artifact_pipeline.py +270 -0
- aitester_bdd-0.2.0/tests/unit/test_expansion.py +296 -0
- aitester_bdd-0.2.0/tests/unit/test_keyword_library.py +64 -0
- aitester_bdd-0.2.0/tests/unit/test_rule_dag.py +39 -0
- aitester_bdd-0.2.0/tests/unit/test_semantic_state_check.py +187 -0
- aitester_bdd-0.2.0/tests/unit/test_walker_with_fake_browser.py +241 -0
- aitester_bdd-0.2.0/tests/unit/test_wise_ported_gotchas.py +391 -0
- aitester_bdd-0.2.0/uv.lock +3562 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- uses: astral-sh/setup-uv@v4
|
|
18
|
+
with:
|
|
19
|
+
version: "latest"
|
|
20
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
21
|
+
run: uv python install ${{ matrix.python-version }}
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: uv sync --dev
|
|
24
|
+
- name: Lint
|
|
25
|
+
run: uv run ruff check src/
|
|
26
|
+
- name: Test
|
|
27
|
+
run: uv run pytest tests/ -q
|
|
28
|
+
|
|
29
|
+
build:
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
steps:
|
|
32
|
+
- uses: actions/checkout@v4
|
|
33
|
+
- uses: astral-sh/setup-uv@v4
|
|
34
|
+
with:
|
|
35
|
+
version: "latest"
|
|
36
|
+
- name: Build package
|
|
37
|
+
run: uv build
|
|
38
|
+
- name: Verify wheel contents
|
|
39
|
+
run: |
|
|
40
|
+
pip install dist/*.whl --target /tmp/verify
|
|
41
|
+
python -c "import sys; sys.path.insert(0,'/tmp/verify'); from aitester_bdd.engine.context import WalkContext; print(WalkContext.from_env())"
|
|
42
|
+
|
|
43
|
+
docs:
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
permissions:
|
|
46
|
+
pages: write
|
|
47
|
+
id-token: write
|
|
48
|
+
environment:
|
|
49
|
+
name: github-pages
|
|
50
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
51
|
+
steps:
|
|
52
|
+
- uses: actions/checkout@v4
|
|
53
|
+
- uses: astral-sh/setup-uv@v4
|
|
54
|
+
with:
|
|
55
|
+
version: "latest"
|
|
56
|
+
- name: Install docs dependencies
|
|
57
|
+
run: uv sync --extra docs
|
|
58
|
+
- name: Build docs
|
|
59
|
+
run: uv run mkdocs build --strict
|
|
60
|
+
- name: Upload Pages artifact
|
|
61
|
+
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
62
|
+
uses: actions/upload-pages-artifact@v3
|
|
63
|
+
with:
|
|
64
|
+
path: ./site
|
|
65
|
+
- name: Deploy to GitHub Pages
|
|
66
|
+
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
67
|
+
id: deployment
|
|
68
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.13"
|
|
18
|
+
|
|
19
|
+
- name: Install build tooling
|
|
20
|
+
run: python -m pip install --upgrade build
|
|
21
|
+
|
|
22
|
+
- name: Build sdist + wheel
|
|
23
|
+
run: python -m build
|
|
24
|
+
|
|
25
|
+
- uses: actions/upload-artifact@v4
|
|
26
|
+
with:
|
|
27
|
+
name: dist
|
|
28
|
+
path: dist/
|
|
29
|
+
|
|
30
|
+
publish:
|
|
31
|
+
needs: build
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
environment:
|
|
34
|
+
name: pypi
|
|
35
|
+
url: https://pypi.org/p/aitester-bdd
|
|
36
|
+
permissions:
|
|
37
|
+
id-token: write # required for trusted publishing
|
|
38
|
+
steps:
|
|
39
|
+
- uses: actions/download-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: dist
|
|
42
|
+
path: dist/
|
|
43
|
+
|
|
44
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
share/python-wheels/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
.installed.cfg
|
|
22
|
+
*.egg
|
|
23
|
+
MANIFEST
|
|
24
|
+
|
|
25
|
+
# Virtual envs
|
|
26
|
+
.venv/
|
|
27
|
+
venv/
|
|
28
|
+
env/
|
|
29
|
+
ENV/
|
|
30
|
+
|
|
31
|
+
# uv
|
|
32
|
+
.uv-cache/
|
|
33
|
+
|
|
34
|
+
# Tools
|
|
35
|
+
.ruff_cache/
|
|
36
|
+
.mypy_cache/
|
|
37
|
+
.pytest_cache/
|
|
38
|
+
.coverage
|
|
39
|
+
htmlcov/
|
|
40
|
+
.tox/
|
|
41
|
+
.nox/
|
|
42
|
+
|
|
43
|
+
# Robot Framework artifacts
|
|
44
|
+
log.html
|
|
45
|
+
report.html
|
|
46
|
+
output.xml
|
|
47
|
+
selenium-screenshot-*.png
|
|
48
|
+
|
|
49
|
+
# IDE
|
|
50
|
+
.idea/
|
|
51
|
+
.vscode/
|
|
52
|
+
*.swp
|
|
53
|
+
|
|
54
|
+
# OS
|
|
55
|
+
.DS_Store
|
|
56
|
+
Thumbs.db
|
|
57
|
+
|
|
58
|
+
# Project-specific
|
|
59
|
+
output/
|
|
60
|
+
debug/
|
|
61
|
+
*.local.yaml
|
|
62
|
+
.env
|
|
63
|
+
failures.jsonl
|
|
64
|
+
walk_log.jsonl
|
|
65
|
+
emit.jsonl
|
|
66
|
+
site/
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.2.0] - 2026-05-16
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **`I explore` keyword** — fluid LLM-driven test execution within the rule DAG. The walker hands its browser session to the agent loop at topo-sort time; no separate browser lifecycle.
|
|
13
|
+
- **`I explore and author` keyword** — same as explore, but also writes a pinned `.robot` suite from the journey.
|
|
14
|
+
- **`I ask LLM` action + `llm_response_contains` / `llm_response_semantic` state checks** — interact with LLMs as a plan-phase action (deferred execution, not immediate).
|
|
15
|
+
- **WalkContext dataclass** — centralizes runtime configuration (headed, step_delay_ms, run_timeout_s, disabled_aspects). Replaces scattered `os.environ.get()` calls.
|
|
16
|
+
- **`step_delay` aspect** — step delay is now a proper AOP aspect (fires via `after_action` hook), not an inline `time.sleep`.
|
|
17
|
+
- **`--headed` and `--step-delay` CLI flags** on `aitester run` for visual observation.
|
|
18
|
+
- **Three runtime backends** — `agent-browser` (default, zero-install), `playwright` (in-process speed), `nodriver` (bot-detection-resistant). Declared via `${ENGINE}` variable in the suite.
|
|
19
|
+
- **Session isolation** — each adapter instance gets its own session UUID; cookies never leak between test runs or authoring sessions.
|
|
20
|
+
- **Scenario isolation** — `clear_state()` between scenarios so each Robot test case starts clean.
|
|
21
|
+
- **`state_setup` configuration** — suite-level auth/consent actions that run once before any scenario.
|
|
22
|
+
- **Quality gates** — `min_records`, `filled_pct`, `max_failed_pct` assertions on captured artifacts.
|
|
23
|
+
- **Expansion (TIER 2)** — parametric capture over elements or Cartesian combinations.
|
|
24
|
+
- **`visual_semantic` state check** — multimodal screenshot-to-LLM judge.
|
|
25
|
+
- **Shell action + assertions** — `When I run shell`, `Then last shell exit`, stdout/stderr checks.
|
|
26
|
+
- **GitHub Actions CI** — lint + test on Python 3.11/3.12/3.13, build verification, docs deployment.
|
|
27
|
+
- **`py.typed` marker** — enables downstream type checking.
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
|
|
31
|
+
- **Headed mode now works for all backends** — previously the walker always passed `headless=True` regardless of env var. Now `WalkContext.headed` flows correctly to `BrowserAdapter.new_session()`.
|
|
32
|
+
- **Aspect disabling centralized** — `AITESTER_DISABLE_ASPECTS` is read once at `WalkContext` construction, not per-registry-build.
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
|
|
36
|
+
- Duplicate `SKILL.md` in wheel (force-include removed; `packages` directive already includes it).
|
|
37
|
+
- Test suite: `declare_parents` → `and_declare_parents`, `set_rule_timeout` → `and_set_rule_timeout` renames propagated to all tests.
|
|
38
|
+
- 138 lint errors resolved (unused imports, `Optional` → `X | None`, import ordering, undefined forward refs).
|
|
39
|
+
|
|
40
|
+
## [0.1.0] - 2026-04-01
|
|
41
|
+
|
|
42
|
+
### Added
|
|
43
|
+
|
|
44
|
+
- Initial release: keyword library, walker, BrowserAdapter, AspectRegistry, trajectory/instrument/diagnose aspects.
|
|
45
|
+
- Authoring agent loop (DeepAgents + LangGraph) with `aitester author` CLI.
|
|
46
|
+
- `aitester run` CLI for executing authored suites.
|
|
47
|
+
- Rule DAG with parent-child composition, guards, retry-redo, interrupt dismissal.
|
|
48
|
+
- SKILL.md grammar reference for the authoring agent.
|
|
49
|
+
- Wikipedia quickstart example.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 kundeng
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aitester-bdd
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: LLM-driven BDD test authoring for Robot Framework — turn an intention + live app into a .robot suite
|
|
5
|
+
Project-URL: Homepage, https://github.com/kundeng/aitester-bdd
|
|
6
|
+
Project-URL: Repository, https://github.com/kundeng/aitester-bdd
|
|
7
|
+
Project-URL: Issues, https://github.com/kundeng/aitester-bdd/issues
|
|
8
|
+
Author: kundeng
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,bdd,llm,robotframework,test-generation,testing
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Framework :: Robot Framework
|
|
14
|
+
Classifier: Framework :: Robot Framework :: Library
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Software Development :: Testing :: Acceptance
|
|
22
|
+
Classifier: Topic :: Software Development :: Testing :: BDD
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Requires-Dist: deepagents>=0.0.15
|
|
25
|
+
Requires-Dist: langchain-openai>=0.2
|
|
26
|
+
Requires-Dist: langchain>=0.3
|
|
27
|
+
Requires-Dist: langgraph>=0.2.50
|
|
28
|
+
Requires-Dist: litellm>=1.50
|
|
29
|
+
Requires-Dist: pydantic>=2.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0
|
|
31
|
+
Requires-Dist: robotframework>=7.0
|
|
32
|
+
Requires-Dist: typer>=0.12
|
|
33
|
+
Provides-Extra: docs
|
|
34
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
35
|
+
Requires-Dist: mkdocs>=1.6; extra == 'docs'
|
|
36
|
+
Provides-Extra: playwright
|
|
37
|
+
Requires-Dist: playwright>=1.45; extra == 'playwright'
|
|
38
|
+
Requires-Dist: robotframework-browser>=18.0; extra == 'playwright'
|
|
39
|
+
Provides-Extra: stealth
|
|
40
|
+
Requires-Dist: nodriver>=0.40; extra == 'stealth'
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
|
|
43
|
+
# aitester-bdd
|
|
44
|
+
|
|
45
|
+
**LLM-driven BDD test authoring for Robot Framework.** Give it a story and a live web app; an agent explores the target via the `agent-browser` CLI, then writes a deterministic `.robot` suite with selectors grounded in the actual DOM it observed — or files a bug report when the system is broken in a way that prevents authoring.
|
|
46
|
+
|
|
47
|
+
## What it is
|
|
48
|
+
|
|
49
|
+
A Robot Framework library that turns a plain-English intention into a deterministic, executable `.robot` test suite. **Run-time has no LLM in the loop** — the authored suite is plain RF code that runs reproducibly, no tokens consumed on PR gates.
|
|
50
|
+
|
|
51
|
+
## What's novel
|
|
52
|
+
|
|
53
|
+
| | aitester-bdd |
|
|
54
|
+
|---|---|
|
|
55
|
+
| **Intention → `.robot` suite** | An agent loop drives the live target via shell-out to `agent-browser` (Playwright under the hood), writes a Robot Framework suite with selectors grounded in the snapshots it actually took. |
|
|
56
|
+
| **Bug-report exit channel** | When the SUT is broken in a way that prevents authoring (missing UI, broken auth flow, untestable terminal state), the agent writes `triage/<story>.md` rather than inventing selectors. |
|
|
57
|
+
| **Three pluggable runtime backends** | `agent-browser` (default, zero-install) / `playwright` (in-process speed) / `nodriver` (bot-detection-resistant). Same `.robot` runs on any. |
|
|
58
|
+
| **AOP failure aspect** | Each failed rule ships with an AI-written natural-language diagnosis (SUT-vs-test classification) plus a full MDP trajectory in `walk_log.jsonl`. |
|
|
59
|
+
| **Rule DAG with parent-child composition** | Ported from WISE RPA BDD. Position-determined state checks (guard vs observation), retry-with-redo, scope inheritance — all expressed via Given/When/Then. |
|
|
60
|
+
|
|
61
|
+
## Status
|
|
62
|
+
|
|
63
|
+
**Alpha.** Authoring verified end-to-end on public sites (example.com, en.wikipedia.org, the-internet.herokuapp.com) and on a real internal SPA (login + chat + tool-rendering verification).
|
|
64
|
+
|
|
65
|
+
## How fast is it?
|
|
66
|
+
|
|
67
|
+
Authoring is **headless DeepAgents on Claude Opus 4.7** shelling out to the `agent-browser` CLI. Typical wall-time for a single suite:
|
|
68
|
+
|
|
69
|
+
| Site / scope | Steps | Wall time |
|
|
70
|
+
|---|---:|---:|
|
|
71
|
+
| example.com smoke (heading + link) | 9 | ~27s |
|
|
72
|
+
| en.wikipedia.org search + article check (5 assertions) | 27 | ~70s |
|
|
73
|
+
| Real SPA login + chat + multi-rule verification | 50-80 | 2-3 min |
|
|
74
|
+
|
|
75
|
+
The agent batches multiple `agent-browser` subcommands per shell call (`open && snapshot && get count ...`) so ~1 LLM round-trip handles 2-4 browser ops. Most remaining wall-time is **SUT-bound** (waiting for the app's own LLM to stream a response) — not authoring overhead.
|
|
76
|
+
|
|
77
|
+
## Quick start
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# 1. Install
|
|
81
|
+
pip install aitester-bdd
|
|
82
|
+
npm i -g agent-browser
|
|
83
|
+
|
|
84
|
+
# 2. Point at an LLM endpoint. Defaults assume claude-code-proxy:
|
|
85
|
+
export AITESTER_LLM_MODEL=cc/claude-opus-4-7
|
|
86
|
+
export OPENAI_BASE_URL=http://localhost:20128/v1
|
|
87
|
+
export OPENAI_API_KEY=placeholder
|
|
88
|
+
|
|
89
|
+
# 3. Author a suite from a story
|
|
90
|
+
aitester author \
|
|
91
|
+
--story "Open the homepage, search for 'BDD', verify the article heading and a paragraph containing 'BDD'." \
|
|
92
|
+
--base-url https://en.wikipedia.org \
|
|
93
|
+
--out wiki_smoke.robot
|
|
94
|
+
|
|
95
|
+
# 4. Run it (no LLM at run time)
|
|
96
|
+
aitester run wiki_smoke.robot
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
For visibility into the agent's exploration, add `--debug` to `aitester author`. Every LLM turn and shell call streams to stderr with timestamps.
|
|
100
|
+
|
|
101
|
+
Output sidecar files at `<output_dir>/`:
|
|
102
|
+
- `walk_log.jsonl` — every MDP transition (rule_enter / before_action / after_action / state_check / dismiss / emit / rule_exit)
|
|
103
|
+
- `failures.jsonl` — failure context + AI diagnosis for every failed rule
|
|
104
|
+
- `emit.jsonl` — explicit `And I emit "..."` captures (intention-driven; only when the story is a diagnostic probe)
|
|
105
|
+
|
|
106
|
+
## Three runtime backends, one authored suite
|
|
107
|
+
|
|
108
|
+
`AITESTER_BROWSER=` picks the driver at run-time:
|
|
109
|
+
|
|
110
|
+
| Backend | Default? | Setup | Best for |
|
|
111
|
+
|---------|----------|-------|----------|
|
|
112
|
+
| `agent-browser` | ✓ | none — CLI ships its own browser | most cases; same driver author + run, zero install friction |
|
|
113
|
+
| `playwright` | | `aitester init-browser` once | action-heavy tests where subprocess latency matters |
|
|
114
|
+
| `nodriver` | | `pip install aitester-bdd[stealth]` + Edge/Chrome | bot-detected sites (DataDome / Cloudflare BM / etc.) |
|
|
115
|
+
|
|
116
|
+
Same `.robot` runs on any of the three.
|
|
117
|
+
|
|
118
|
+
## Architecture (one paragraph)
|
|
119
|
+
|
|
120
|
+
The LLM is the author, not the runtime. At authoring time, a DeepAgents/LangGraph agent reads `SKILL.md` as its system prompt, drives the live target by shelling out to the `agent-browser` CLI (via DeepAgents' `LocalShellBackend.execute` tool), and emits a `.robot` file with selectors grounded in real snapshots — or writes a bug report when the system is broken in a way that prevents authoring. The agent batches multiple `agent-browser` subcommands per shell call (`open && snapshot && get count …`) so each LLM round-trip drives multiple browser ops. At run time, plain Robot Framework executes the suite via one of three pluggable browser backends; no LLM in the loop. Failures fire an AOP `diagnose` aspect that hands the LLM the MDP trajectory plus snapshot and asks "why?" — short natural-language diagnoses land on `RuleResult.ai_diagnosis` and `failures.jsonl`. The walker, gotcha-fixes, and AspectRegistry are ported from the WISE RPA BDD skill.
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
MIT
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# aitester-bdd
|
|
2
|
+
|
|
3
|
+
**LLM-driven BDD test authoring for Robot Framework.** Give it a story and a live web app; an agent explores the target via the `agent-browser` CLI, then writes a deterministic `.robot` suite with selectors grounded in the actual DOM it observed — or files a bug report when the system is broken in a way that prevents authoring.
|
|
4
|
+
|
|
5
|
+
## What it is
|
|
6
|
+
|
|
7
|
+
A Robot Framework library that turns a plain-English intention into a deterministic, executable `.robot` test suite. **Run-time has no LLM in the loop** — the authored suite is plain RF code that runs reproducibly, no tokens consumed on PR gates.
|
|
8
|
+
|
|
9
|
+
## What's novel
|
|
10
|
+
|
|
11
|
+
| | aitester-bdd |
|
|
12
|
+
|---|---|
|
|
13
|
+
| **Intention → `.robot` suite** | An agent loop drives the live target via shell-out to `agent-browser` (Playwright under the hood), writes a Robot Framework suite with selectors grounded in the snapshots it actually took. |
|
|
14
|
+
| **Bug-report exit channel** | When the SUT is broken in a way that prevents authoring (missing UI, broken auth flow, untestable terminal state), the agent writes `triage/<story>.md` rather than inventing selectors. |
|
|
15
|
+
| **Three pluggable runtime backends** | `agent-browser` (default, zero-install) / `playwright` (in-process speed) / `nodriver` (bot-detection-resistant). Same `.robot` runs on any. |
|
|
16
|
+
| **AOP failure aspect** | Each failed rule ships with an AI-written natural-language diagnosis (SUT-vs-test classification) plus a full MDP trajectory in `walk_log.jsonl`. |
|
|
17
|
+
| **Rule DAG with parent-child composition** | Ported from WISE RPA BDD. Position-determined state checks (guard vs observation), retry-with-redo, scope inheritance — all expressed via Given/When/Then. |
|
|
18
|
+
|
|
19
|
+
## Status
|
|
20
|
+
|
|
21
|
+
**Alpha.** Authoring verified end-to-end on public sites (example.com, en.wikipedia.org, the-internet.herokuapp.com) and on a real internal SPA (login + chat + tool-rendering verification).
|
|
22
|
+
|
|
23
|
+
## How fast is it?
|
|
24
|
+
|
|
25
|
+
Authoring is **headless DeepAgents on Claude Opus 4.7** shelling out to the `agent-browser` CLI. Typical wall-time for a single suite:
|
|
26
|
+
|
|
27
|
+
| Site / scope | Steps | Wall time |
|
|
28
|
+
|---|---:|---:|
|
|
29
|
+
| example.com smoke (heading + link) | 9 | ~27s |
|
|
30
|
+
| en.wikipedia.org search + article check (5 assertions) | 27 | ~70s |
|
|
31
|
+
| Real SPA login + chat + multi-rule verification | 50-80 | 2-3 min |
|
|
32
|
+
|
|
33
|
+
The agent batches multiple `agent-browser` subcommands per shell call (`open && snapshot && get count ...`) so ~1 LLM round-trip handles 2-4 browser ops. Most remaining wall-time is **SUT-bound** (waiting for the app's own LLM to stream a response) — not authoring overhead.
|
|
34
|
+
|
|
35
|
+
## Quick start
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# 1. Install
|
|
39
|
+
pip install aitester-bdd
|
|
40
|
+
npm i -g agent-browser
|
|
41
|
+
|
|
42
|
+
# 2. Point at an LLM endpoint. Defaults assume claude-code-proxy:
|
|
43
|
+
export AITESTER_LLM_MODEL=cc/claude-opus-4-7
|
|
44
|
+
export OPENAI_BASE_URL=http://localhost:20128/v1
|
|
45
|
+
export OPENAI_API_KEY=placeholder
|
|
46
|
+
|
|
47
|
+
# 3. Author a suite from a story
|
|
48
|
+
aitester author \
|
|
49
|
+
--story "Open the homepage, search for 'BDD', verify the article heading and a paragraph containing 'BDD'." \
|
|
50
|
+
--base-url https://en.wikipedia.org \
|
|
51
|
+
--out wiki_smoke.robot
|
|
52
|
+
|
|
53
|
+
# 4. Run it (no LLM at run time)
|
|
54
|
+
aitester run wiki_smoke.robot
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
For visibility into the agent's exploration, add `--debug` to `aitester author`. Every LLM turn and shell call streams to stderr with timestamps.
|
|
58
|
+
|
|
59
|
+
Output sidecar files at `<output_dir>/`:
|
|
60
|
+
- `walk_log.jsonl` — every MDP transition (rule_enter / before_action / after_action / state_check / dismiss / emit / rule_exit)
|
|
61
|
+
- `failures.jsonl` — failure context + AI diagnosis for every failed rule
|
|
62
|
+
- `emit.jsonl` — explicit `And I emit "..."` captures (intention-driven; only when the story is a diagnostic probe)
|
|
63
|
+
|
|
64
|
+
## Three runtime backends, one authored suite
|
|
65
|
+
|
|
66
|
+
`AITESTER_BROWSER=` picks the driver at run-time:
|
|
67
|
+
|
|
68
|
+
| Backend | Default? | Setup | Best for |
|
|
69
|
+
|---------|----------|-------|----------|
|
|
70
|
+
| `agent-browser` | ✓ | none — CLI ships its own browser | most cases; same driver author + run, zero install friction |
|
|
71
|
+
| `playwright` | | `aitester init-browser` once | action-heavy tests where subprocess latency matters |
|
|
72
|
+
| `nodriver` | | `pip install aitester-bdd[stealth]` + Edge/Chrome | bot-detected sites (DataDome / Cloudflare BM / etc.) |
|
|
73
|
+
|
|
74
|
+
Same `.robot` runs on any of the three.
|
|
75
|
+
|
|
76
|
+
## Architecture (one paragraph)
|
|
77
|
+
|
|
78
|
+
The LLM is the author, not the runtime. At authoring time, a DeepAgents/LangGraph agent reads `SKILL.md` as its system prompt, drives the live target by shelling out to the `agent-browser` CLI (via DeepAgents' `LocalShellBackend.execute` tool), and emits a `.robot` file with selectors grounded in real snapshots — or writes a bug report when the system is broken in a way that prevents authoring. The agent batches multiple `agent-browser` subcommands per shell call (`open && snapshot && get count …`) so each LLM round-trip drives multiple browser ops. At run time, plain Robot Framework executes the suite via one of three pluggable browser backends; no LLM in the loop. Failures fire an AOP `diagnose` aspect that hands the LLM the MDP trajectory plus snapshot and asks "why?" — short natural-language diagnoses land on `RuleResult.ai_diagnosis` and `failures.jsonl`. The walker, gotcha-fixes, and AspectRegistry are ported from the WISE RPA BDD skill.
|
|
79
|
+
|
|
80
|
+
## License
|
|
81
|
+
|
|
82
|
+
MIT
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
## Three layers
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
┌──────────────────────────────────────────────────────────────────┐
|
|
7
|
+
│ Authoring (LLM-in-the-loop, one-shot per suite) │
|
|
8
|
+
│ ───────────────────────────────────────────────────────────── │
|
|
9
|
+
│ DeepAgents loop on top of LangGraph: │
|
|
10
|
+
│ - SKILL.md as system prompt │
|
|
11
|
+
│ - TodoListMiddleware (planning) + LocalShellBackend (execute) │
|
|
12
|
+
│ - Tool surface: `execute` (bash) + write_robot_suite + │
|
|
13
|
+
│ report_bug. No per-operation Python wrappers — the agent │
|
|
14
|
+
│ shells out to `agent-browser <subcommand> --json`, │
|
|
15
|
+
│ batching with `&&`. │
|
|
16
|
+
│ Retry harness wraps the inner loop (max_attempts, default 2) │
|
|
17
|
+
│ so a crash / recursion-limit retries with feedback. │
|
|
18
|
+
└──────────────────────────────────────────────────────────────────┘
|
|
19
|
+
│
|
|
20
|
+
│ produces
|
|
21
|
+
▼
|
|
22
|
+
┌──────────────────────────────────────────────────────────────────┐
|
|
23
|
+
│ Runtime (deterministic, no LLM) │
|
|
24
|
+
│ ───────────────────────────────────────────────────────────── │
|
|
25
|
+
│ • Robot Framework parses + walks the .robot file │
|
|
26
|
+
│ • aitester-bdd keyword library: rule DAG, observation gates, │
|
|
27
|
+
│ position-determined state checks, retry-with-redo guards │
|
|
28
|
+
│ • One of three browser backends (declared via ${ENGINE}): │
|
|
29
|
+
│ - agent-browser (default) — CLI subprocess, zero install │
|
|
30
|
+
│ - playwright (in-process) — install rfbrowser + browsers │
|
|
31
|
+
│ - nodriver (raw CDP) — bot-detection-resistant │
|
|
32
|
+
└──────────────────────────────────────────────────────────────────┘
|
|
33
|
+
│
|
|
34
|
+
│ emits
|
|
35
|
+
▼
|
|
36
|
+
┌──────────────────────────────────────────────────────────────────┐
|
|
37
|
+
│ Diagnostics │
|
|
38
|
+
│ ───────────────────────────────────────────────────────────── │
|
|
39
|
+
│ • RF log.html / report.html / output.xml │
|
|
40
|
+
│ • Verdict report (pass/fail per rule, evidence on failure) │
|
|
41
|
+
│ • Optional LLM post-mortem on failure (read RF log, suggest fix)│
|
|
42
|
+
└──────────────────────────────────────────────────────────────────┘
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Heritage from WISE RPA BDD
|
|
46
|
+
|
|
47
|
+
The engine borrows the Plan-then-Execute rule DAG model from the WISE RPA BDD skill — Robot Framework keywords build an in-memory plan during test case definition, then a single Suite Teardown step (`Then I finalize`) walks the plan against a live browser. This separates the **spec** (the rule tree) from the **execution** (the browser walk), which is what makes:
|
|
48
|
+
|
|
49
|
+
- LLM-authored output deterministic (the LLM commits to a plan up front)
|
|
50
|
+
- Observation gates clean (position-determined: before-action = guard, after-action = sync gate)
|
|
51
|
+
- Parent-child rule composition meaningful (rules can declare prerequisite rules; the walker handles ordering)
|
|
52
|
+
- Aspects (timing, screenshots, slow-mo, checkpoint) cross-cutting (modify behavior without touching rules)
|
|
53
|
+
|
|
54
|
+
## What's different from WISE
|
|
55
|
+
|
|
56
|
+
WISE's vocabulary is for **extraction**: artifacts, emits, merges, quality gates on extracted records. aitester-bdd's vocabulary is for **verification**: assertions on state, no data collection. The keyword library is a from-scratch design; only the underlying engine primitives carry over.
|
|
57
|
+
|
|
58
|
+
## Two discovery modes
|
|
59
|
+
|
|
60
|
+
### Black-box (Mode A)
|
|
61
|
+
|
|
62
|
+
Input: a story + base_url. No source access. Agent crawls via accessibility snapshots, identifies entry points by looking.
|
|
63
|
+
|
|
64
|
+
Use: validating customer-facing behavior of a deployed app you don't own; smoke-testing in foreign environments.
|
|
65
|
+
|
|
66
|
+
Limitation: invisible code paths (admin-only routes, conditional features) stay invisible.
|
|
67
|
+
|
|
68
|
+
### White-box (Mode B)
|
|
69
|
+
|
|
70
|
+
Input: a story + base_url + source_root. Reads framework-specific source (FastAPI routes, React TSX components, Zustand stores). Cross-references against live snapshots to ground selectors.
|
|
71
|
+
|
|
72
|
+
Use: testing apps you own with full source access. More complete coverage.
|
|
73
|
+
|
|
74
|
+
Mode B internally falls back to Mode A for selector grounding — TSX source doesn't always tell you the rendered class (shadcn wraps, Tailwind transforms), so a live snapshot is still required for the actual selector.
|
|
75
|
+
|
|
76
|
+
## Skill as grammar
|
|
77
|
+
|
|
78
|
+
The `SKILL.md` shipped inside the wheel is the LLM's grammar at authoring time. It documents:
|
|
79
|
+
|
|
80
|
+
- The shipped keyword vocabulary (Given/When/Then with concrete primitives)
|
|
81
|
+
- The rule DAG shape (parents, guards, observations, actions, assertions)
|
|
82
|
+
- The non-negotiables (no `When I wait`, no invented site-specific verbs, all selectors must come from a live snapshot)
|
|
83
|
+
- The patterns library (auth flow, observation gates, dismiss scoping)
|
|
84
|
+
- The agent contract (what the LLM is and isn't authorized to do)
|
|
85
|
+
|
|
86
|
+
Without the skill loaded, the LLM emits prose. With it, the LLM emits valid `.robot` files that the engine can execute.
|
|
87
|
+
|
|
88
|
+
## Where the LLM is and isn't in the loop
|
|
89
|
+
|
|
90
|
+
| Phase | LLM? | Why |
|
|
91
|
+
|---|---|---|
|
|
92
|
+
| Discovery | Yes | Reads source, snapshots app, proposes structure |
|
|
93
|
+
| Authoring | Yes | Composes the .robot file from story + snapshot |
|
|
94
|
+
| Dryrun | No | Plain `robot --dryrun` — fast, no tokens |
|
|
95
|
+
| Execution | No | Plain Robot Framework — deterministic, no tokens |
|
|
96
|
+
| Diagnostics on failure | Optional | LLM can read RF log.html and suggest a refine, but the failure detection itself is non-LLM |
|
|
97
|
+
| Refinement (loop) | Yes | Failed dryrun or execution feeds back into authoring |
|
|
98
|
+
|
|
99
|
+
Token cost is bounded: one author call + N refine calls per scenario. Production CI runs `.robot` files plain — no LLM cost on PR gates.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Ideas (parking lot)
|
|
2
|
+
|
|
3
|
+
Items here are not committed work — just things we've thought about.
|
|
4
|
+
File a real GitHub issue if/when one of these graduates to being scheduled.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Inception: self-hosted explore + execute keywords
|
|
9
|
+
|
|
10
|
+
Make the framework self-testable. Add two **private** Robot keywords next to the
|
|
11
|
+
existing `aitester author` / `aitester run` CLI:
|
|
12
|
+
|
|
13
|
+
```robot
|
|
14
|
+
When I explore "${story}" against "${url}" into "${suite_path}"
|
|
15
|
+
When I execute "${suite_path}"
|
|
16
|
+
Then verdict last passed
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
- Thin wrappers over `authoring.agent_loop.author_with_agent` and
|
|
20
|
+
`engine.walk.run_suite` (the same code paths the CLI uses today).
|
|
21
|
+
- The skill (SKILL.md) is already embeddable via `aitester_bdd.skill.load_skill()`;
|
|
22
|
+
meta-tests can reference the loader without duplicating content.
|
|
23
|
+
- Ship a small suite under `aitester_bdd/meta/` (e.g. `meta/explore.robot`,
|
|
24
|
+
`meta/execute.robot`) that exercises both keywords against a fixed simple
|
|
25
|
+
target (example.com or a local httpbin).
|
|
26
|
+
- CLI commands collapse to convenience over the keywords.
|
|
27
|
+
|
|
28
|
+
**Cost / friction:**
|
|
29
|
+
|
|
30
|
+
- `I explore` invokes a real LLM — ~30-60s + LLM cost per run. Best as a
|
|
31
|
+
nightly soak gate, not every-commit CI.
|
|
32
|
+
- Meta-tests must assert on *structure* (suite parses, executes, passes /
|
|
33
|
+
fails as expected), not exact selectors — the LLM will produce slightly
|
|
34
|
+
different suites across runs.
|
|
35
|
+
- ~250 LOC + 2-3 fixture stories + 1 small CLI subcommand for `aitester meta`.
|
|
36
|
+
|
|
37
|
+
**Why park it:** worth doing once aitester-bdd is stable enough to be
|
|
38
|
+
its own customer. Not blocking the migration of the prismi3 tests off the
|
|
39
|
+
vendored `src/aitester/`.
|