screenforge 0.4.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {screenforge-0.4.1 → screenforge-0.6.0}/PKG-INFO +3 -1
  2. {screenforge-0.4.1 → screenforge-0.6.0}/README.md +2 -0
  3. screenforge-0.6.0/cli/_version.py +1 -0
  4. {screenforge-0.4.1 → screenforge-0.6.0}/cli/modes/action.py +13 -0
  5. {screenforge-0.4.1 → screenforge-0.6.0}/cli/modes/workflow.py +15 -0
  6. {screenforge-0.4.1 → screenforge-0.6.0}/cli/parser.py +15 -1
  7. screenforge-0.6.0/cli/playground_sink.py +255 -0
  8. {screenforge-0.4.1 → screenforge-0.6.0}/pyproject.toml +1 -1
  9. {screenforge-0.4.1 → screenforge-0.6.0}/screenforge.egg-info/PKG-INFO +3 -1
  10. {screenforge-0.4.1 → screenforge-0.6.0}/screenforge.egg-info/SOURCES.txt +5 -0
  11. screenforge-0.6.0/tests/test_dom_capture.py +136 -0
  12. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_parser.py +12 -0
  13. screenforge-0.6.0/tests/test_playground_app.py +385 -0
  14. screenforge-0.6.0/tests/test_playground_sink.py +586 -0
  15. screenforge-0.6.0/tests/test_playground_sink_integration.py +202 -0
  16. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_web_dom_complex_live.py +84 -0
  17. screenforge-0.4.1/cli/_version.py +0 -1
  18. {screenforge-0.4.1 → screenforge-0.6.0}/LICENSE +0 -0
  19. {screenforge-0.4.1 → screenforge-0.6.0}/cli/__init__.py +0 -0
  20. {screenforge-0.4.1 → screenforge-0.6.0}/cli/dispatch.py +0 -0
  21. {screenforge-0.4.1 → screenforge-0.6.0}/cli/doctor.py +0 -0
  22. {screenforge-0.4.1 → screenforge-0.6.0}/cli/modes/__init__.py +0 -0
  23. {screenforge-0.4.1 → screenforge-0.6.0}/cli/modes/default.py +0 -0
  24. {screenforge-0.4.1 → screenforge-0.6.0}/cli/modes/demo.py +0 -0
  25. {screenforge-0.4.1 → screenforge-0.6.0}/cli/modes/dry_run.py +0 -0
  26. {screenforge-0.4.1 → screenforge-0.6.0}/cli/modes/init.py +0 -0
  27. {screenforge-0.4.1 → screenforge-0.6.0}/cli/modes/plan.py +0 -0
  28. {screenforge-0.4.1 → screenforge-0.6.0}/cli/reporter.py +0 -0
  29. {screenforge-0.4.1 → screenforge-0.6.0}/cli/session.py +0 -0
  30. {screenforge-0.4.1 → screenforge-0.6.0}/cli/shared.py +0 -0
  31. {screenforge-0.4.1 → screenforge-0.6.0}/cli/shorthand.py +0 -0
  32. {screenforge-0.4.1 → screenforge-0.6.0}/cli/tool_protocol_handlers.py +0 -0
  33. {screenforge-0.4.1 → screenforge-0.6.0}/common/__init__.py +0 -0
  34. {screenforge-0.4.1 → screenforge-0.6.0}/common/adapters/__init__.py +0 -0
  35. {screenforge-0.4.1 → screenforge-0.6.0}/common/adapters/android_adapter.py +0 -0
  36. {screenforge-0.4.1 → screenforge-0.6.0}/common/adapters/base_adapter.py +0 -0
  37. {screenforge-0.4.1 → screenforge-0.6.0}/common/adapters/ios_adapter.py +0 -0
  38. {screenforge-0.4.1 → screenforge-0.6.0}/common/adapters/web_adapter.py +0 -0
  39. {screenforge-0.4.1 → screenforge-0.6.0}/common/ai.py +0 -0
  40. {screenforge-0.4.1 → screenforge-0.6.0}/common/ai_autonomous.py +0 -0
  41. {screenforge-0.4.1 → screenforge-0.6.0}/common/ai_heal.py +0 -0
  42. {screenforge-0.4.1 → screenforge-0.6.0}/common/cache/__init__.py +0 -0
  43. {screenforge-0.4.1 → screenforge-0.6.0}/common/cache/cache_hash.py +0 -0
  44. {screenforge-0.4.1 → screenforge-0.6.0}/common/cache/cache_manager.py +0 -0
  45. {screenforge-0.4.1 → screenforge-0.6.0}/common/cache/cache_stats.py +0 -0
  46. {screenforge-0.4.1 → screenforge-0.6.0}/common/cache/cache_storage.py +0 -0
  47. {screenforge-0.4.1 → screenforge-0.6.0}/common/cache/embedding_loader.py +0 -0
  48. {screenforge-0.4.1 → screenforge-0.6.0}/common/capabilities.py +0 -0
  49. {screenforge-0.4.1 → screenforge-0.6.0}/common/case_memory.py +0 -0
  50. {screenforge-0.4.1 → screenforge-0.6.0}/common/error_codes.py +0 -0
  51. {screenforge-0.4.1 → screenforge-0.6.0}/common/exceptions.py +0 -0
  52. {screenforge-0.4.1 → screenforge-0.6.0}/common/executor.py +0 -0
  53. {screenforge-0.4.1 → screenforge-0.6.0}/common/failure_diagnosis.py +0 -0
  54. {screenforge-0.4.1 → screenforge-0.6.0}/common/history_manager.py +0 -0
  55. {screenforge-0.4.1 → screenforge-0.6.0}/common/logs.py +0 -0
  56. {screenforge-0.4.1 → screenforge-0.6.0}/common/mcp_server.py +0 -0
  57. {screenforge-0.4.1 → screenforge-0.6.0}/common/preflight.py +0 -0
  58. {screenforge-0.4.1 → screenforge-0.6.0}/common/progress.py +0 -0
  59. {screenforge-0.4.1 → screenforge-0.6.0}/common/run_reporter.py +0 -0
  60. {screenforge-0.4.1 → screenforge-0.6.0}/common/run_resume.py +0 -0
  61. {screenforge-0.4.1 → screenforge-0.6.0}/common/runtime_modes.py +0 -0
  62. {screenforge-0.4.1 → screenforge-0.6.0}/common/tool_protocol.py +0 -0
  63. {screenforge-0.4.1 → screenforge-0.6.0}/common/visual_fallback.py +0 -0
  64. {screenforge-0.4.1 → screenforge-0.6.0}/common/workflow_schema.py +0 -0
  65. {screenforge-0.4.1 → screenforge-0.6.0}/config/__init__.py +0 -0
  66. {screenforge-0.4.1 → screenforge-0.6.0}/config/config.py +0 -0
  67. {screenforge-0.4.1 → screenforge-0.6.0}/config/env_loader.py +0 -0
  68. {screenforge-0.4.1 → screenforge-0.6.0}/screenforge.egg-info/dependency_links.txt +0 -0
  69. {screenforge-0.4.1 → screenforge-0.6.0}/screenforge.egg-info/entry_points.txt +0 -0
  70. {screenforge-0.4.1 → screenforge-0.6.0}/screenforge.egg-info/requires.txt +0 -0
  71. {screenforge-0.4.1 → screenforge-0.6.0}/screenforge.egg-info/top_level.txt +0 -0
  72. {screenforge-0.4.1 → screenforge-0.6.0}/setup.cfg +0 -0
  73. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_ai_autonomous.py +0 -0
  74. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_ai_brain.py +0 -0
  75. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_ai_heal.py +0 -0
  76. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_android_smoke_live.py +0 -0
  77. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_cache_manager.py +0 -0
  78. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_capabilities.py +0 -0
  79. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_cli_action_json.py +0 -0
  80. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_codegen_quality.py +0 -0
  81. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_dispatch.py +0 -0
  82. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_doctor_orphan_browser.py +0 -0
  83. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_error_codes.py +0 -0
  84. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_executor.py +0 -0
  85. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_failure_diagnosis.py +0 -0
  86. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_interaction_actions.py +0 -0
  87. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_ios_smoke_live.py +0 -0
  88. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_mcp_ref_cache.py +0 -0
  89. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_ml_optional.py +0 -0
  90. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_run_reporter.py +0 -0
  91. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_run_resume.py +0 -0
  92. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_runtime_modes.py +0 -0
  93. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_screenshot_annotator.py +0 -0
  94. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_shorthand.py +0 -0
  95. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_tool_protocol_diagnosis.py +0 -0
  96. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_utils_ios.py +0 -0
  97. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_utils_web.py +0 -0
  98. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_utils_xml.py +0 -0
  99. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_visual_fallback.py +0 -0
  100. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_web_adapter.py +0 -0
  101. {screenforge-0.4.1 → screenforge-0.6.0}/tests/test_web_smoke_live.py +0 -0
  102. {screenforge-0.4.1 → screenforge-0.6.0}/utils/__init__.py +0 -0
  103. {screenforge-0.4.1 → screenforge-0.6.0}/utils/screenshot_annotator.py +0 -0
  104. {screenforge-0.4.1 → screenforge-0.6.0}/utils/utils_ios.py +0 -0
  105. {screenforge-0.4.1 → screenforge-0.6.0}/utils/utils_web.py +0 -0
  106. {screenforge-0.4.1 → screenforge-0.6.0}/utils/utils_xml.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: screenforge
3
- Version: 0.4.1
3
+ Version: 0.6.0
4
4
  Summary: AI-driven cross-platform UI automation engine with test script generation
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/jhinzzz/ScreenForge
@@ -113,6 +113,7 @@ Each step: **inspect → decide → act → verify**. The AI decides, ScreenForg
113
113
  - **Visual fallback**: When DOM can't locate elements (Canvas, games), VLM parses screenshots
114
114
  - **MCP server**: Any MCP-compatible Agent can drive ScreenForge natively
115
115
  - **Structured output**: JSON Lines events + `report/runs/<id>/` artifacts for CI integration
116
+ - **Live Mirror playground**: Watch the generated pytest code grow line-by-line beside a live screenshot as the test runs — `screenforge --playground`. See the [Playground Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/playground-guide.md)
116
117
 
117
118
  ## Agent Integration (Claude Code / Cursor / Codex)
118
119
 
@@ -205,6 +206,7 @@ If ScreenForge generates tests for your project, add this badge to your README:
205
206
  | [MCP Setup (3 min)](https://github.com/jhinzzz/ScreenForge/blob/main/docs/mcp-setup.md) | Connect to Claude Desktop / Cursor / Cline / Claude Code |
206
207
  | [Agent Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/agent_guide.md) | Integration protocol for AI Agents |
207
208
  | [Capability Matrix](https://github.com/jhinzzz/ScreenForge/blob/main/docs/capability-matrix.md) | Supported platforms, actions, and locators |
209
+ | [Playground Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/playground-guide.md) | Live Mirror — watch code + screenshots grow as the test runs |
208
210
  | [Workflow Examples](https://github.com/jhinzzz/ScreenForge/tree/main/docs/workflows) | YAML workflow templates |
209
211
  | [CHANGELOG](https://github.com/jhinzzz/ScreenForge/blob/main/CHANGELOG.md) | Version history |
210
212
 
@@ -68,6 +68,7 @@ Each step: **inspect → decide → act → verify**. The AI decides, ScreenForg
68
68
  - **Visual fallback**: When DOM can't locate elements (Canvas, games), VLM parses screenshots
69
69
  - **MCP server**: Any MCP-compatible Agent can drive ScreenForge natively
70
70
  - **Structured output**: JSON Lines events + `report/runs/<id>/` artifacts for CI integration
71
+ - **Live Mirror playground**: Watch the generated pytest code grow line-by-line beside a live screenshot as the test runs — `screenforge --playground`. See the [Playground Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/playground-guide.md)
71
72
 
72
73
  ## Agent Integration (Claude Code / Cursor / Codex)
73
74
 
@@ -160,6 +161,7 @@ If ScreenForge generates tests for your project, add this badge to your README:
160
161
  | [MCP Setup (3 min)](https://github.com/jhinzzz/ScreenForge/blob/main/docs/mcp-setup.md) | Connect to Claude Desktop / Cursor / Cline / Claude Code |
161
162
  | [Agent Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/agent_guide.md) | Integration protocol for AI Agents |
162
163
  | [Capability Matrix](https://github.com/jhinzzz/ScreenForge/blob/main/docs/capability-matrix.md) | Supported platforms, actions, and locators |
164
+ | [Playground Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/playground-guide.md) | Live Mirror — watch code + screenshots grow as the test runs |
163
165
  | [Workflow Examples](https://github.com/jhinzzz/ScreenForge/tree/main/docs/workflows) | YAML workflow templates |
164
166
  | [CHANGELOG](https://github.com/jhinzzz/ScreenForge/blob/main/CHANGELOG.md) | Version history |
165
167
 
@@ -0,0 +1 @@
1
+ __version__ = "0.6.0"
@@ -5,6 +5,7 @@ import os
5
5
  import sys
6
6
 
7
7
  import cli.shared as _shared
8
+ from cli.playground_sink import build_sink_from_args, maybe_push_step
8
9
  from cli.reporter import (
9
10
  _apply_resume_summary,
10
11
  _build_action_summary,
@@ -206,6 +207,18 @@ def run_action_default_mode(
206
207
 
207
208
  history_manager.add_step(result["code_lines"], result["action_description"])
208
209
  save_to_disk(output_script_path, history_manager.get_current_file_content())
210
+ # ★ Live-mirror bypass (opt-in --playground-sink). join_on_exit=True: a bare
211
+ # --action exits right after, so wait briefly for the last frame to land.
212
+ maybe_push_step(
213
+ build_sink_from_args(args, join_on_exit=True),
214
+ args=args,
215
+ reporter=reporter,
216
+ adapter=adapter,
217
+ action_data=action_data,
218
+ result=result,
219
+ step_index=None, # resolver picks: session counter, or 1 for a bare action
220
+ file_path=output_script_path, # normalized to abs path inside build_step_event
221
+ )
209
222
  reporter.emit_event(
210
223
  "action_executed",
211
224
  step=1,
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
 
5
5
  import cli.shared as _shared
6
6
  from cli.modes.dry_run import _build_resolution_hint, _preview_action_resolution
7
+ from cli.playground_sink import build_sink_from_args, maybe_push_step
7
8
  from cli.reporter import (
8
9
  _apply_resume_summary,
9
10
  _build_reporter,
@@ -285,6 +286,9 @@ def run_workflow_default_mode(
285
286
  history_manager = _shared.StepHistoryManager(initial_content=header)
286
287
  save_to_disk(output_script_path, header)
287
288
  executor = _shared.UIExecutor(device, platform=args.platform)
289
+ # Workflow is one process with many steps (the main use case for live mirror).
290
+ # Build the sink once; join_on_exit=False — the process lives across all steps.
291
+ sink = build_sink_from_args(args, join_on_exit=False)
288
292
 
289
293
  executed_steps = 0
290
294
  for index, step in enumerate(workflow.steps, start=1):
@@ -315,6 +319,17 @@ def run_workflow_default_mode(
315
319
  result["code_lines"], result["action_description"]
316
320
  )
317
321
  save_to_disk(output_script_path, history_manager.get_current_file_content())
322
+ # ★ Live-mirror bypass: push this step with its loop counter as step_index.
323
+ maybe_push_step(
324
+ sink,
325
+ args=args,
326
+ reporter=reporter,
327
+ adapter=adapter,
328
+ action_data=action_data,
329
+ result=result,
330
+ step_index=index,
331
+ file_path=output_script_path, # normalized to abs path inside build_step_event
332
+ )
318
333
  reporter.emit_event(
319
334
  "action_executed",
320
335
  step=index,
@@ -151,6 +151,17 @@ def build_parser() -> argparse.ArgumentParser:
151
151
  default=7860,
152
152
  help="Playground server port (default: 7860)",
153
153
  )
154
+ parser.add_argument(
155
+ "--playground-sink",
156
+ action="store_true",
157
+ help="Push each step's code + screenshot to a running playground (opt-in; off = zero cost)",
158
+ )
159
+ parser.add_argument(
160
+ "--playground-url",
161
+ type=str,
162
+ default="http://127.0.0.1:7860",
163
+ help="Playground base URL for --playground-sink (default: http://127.0.0.1:7860)",
164
+ )
154
165
  parser.add_argument(
155
166
  "--device-url",
156
167
  type=str,
@@ -270,7 +281,10 @@ def validate_cli_args(args: argparse.Namespace) -> None:
270
281
  raise ValueError("--action cannot be combined with --goal or --workflow")
271
282
  has_demo = bool(getattr(args, "demo", False))
272
283
  has_init = bool(getattr(args, "init", False))
273
- if has_demo or has_init:
284
+ # --playground starts a standalone server (dispatch.py handles it after this
285
+ # validation, exactly like --init/--demo) — it needs no goal/workflow/action.
286
+ has_playground = bool(getattr(args, "playground", False))
287
+ if has_demo or has_init or has_playground:
274
288
  return
275
289
  has_session_end = bool(str(getattr(args, "session_end", "")).strip())
276
290
  if has_session_end:
@@ -0,0 +1,255 @@
1
+ """Fire-and-forget visualization sink: short-lived action process → resident playground.
2
+
3
+ Red line (G5): any network error is swallowed silently. A sink push MUST NEVER
4
+ slow down the action or change its exit code — the 0/1 exit code is a contract to
5
+ the agent (see CLAUDE.md). The sink is a bypass observer hung after save_to_disk;
6
+ it does not touch execute_and_record, codegen, or disk persistence.
7
+ """
8
+
9
+ import base64
10
+ import os
11
+ import threading
12
+
13
+ import requests # already in requirements.txt (requests==2.32.5) — zero new deps
14
+ from loguru import logger as log
15
+ from pydantic import BaseModel, Field
16
+
17
+ from cli.session import load_session
18
+
19
+ DEFAULT_PLAYGROUND_URL = "http://127.0.0.1:7860"
20
+
21
+ # Latency ceiling on the contract-protected single-step --action path (red line #1:
22
+ # "never slow the action down"). A reachable-but-slow playground must not tax the
23
+ # action beyond this documented budget. (connect, read) is split so a hung peer
24
+ # can't stall on connect; _JOIN_TIMEOUT is the hard cap the single-step process
25
+ # waits for the last frame to land before sys.exit — kept ≤ read+ε and well under
26
+ # human-perceptible. Worst added latency on --action ≈ _JOIN_TIMEOUT.
27
+ _POST_TIMEOUT = (0.2, 0.25) # (connect, read) seconds
28
+ _DOM_POST_TIMEOUT = (0.2, 0.4) # tree body is larger; read budget a touch higher
29
+ _JOIN_TIMEOUT = 0.3 # seconds; single-step last-frame grace
30
+
31
+
32
+ class PlaygroundStepEvent(BaseModel):
33
+ """One step pushed to the playground. Shape == the frontend SSE `step` contract."""
34
+
35
+ run_id: str
36
+ step_index: int # ⭐ time-travel seed: data accumulates/replays by this index
37
+ code_lines: list[str] = Field(default_factory=list)
38
+ action_description: str = ""
39
+ action: str = ""
40
+ locator_type: str = ""
41
+ locator_value: str = ""
42
+ extra_value: str = ""
43
+ success: bool = True
44
+ screenshot_b64: str = "" # empty = no screenshot this step (degrade, never crash)
45
+ file_path: str = "" # abs path of the generated test file (for "open in IDE")
46
+ has_dom_tree: bool = False # a sidecar DOM tree was captured for this step
47
+
48
+
49
+ class PlaygroundSink:
50
+ """Pushes each step to a running playground, best-effort.
51
+
52
+ Disabled by default: enabled=False means zero cost — no HTTP, no thread, and
53
+ (at the call sites) take_screenshot is never even invoked.
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ base_url: str = DEFAULT_PLAYGROUND_URL,
59
+ enabled: bool = False,
60
+ join_on_exit: bool = False,
61
+ ):
62
+ self.base_url = base_url.rstrip("/")
63
+ self.enabled = enabled
64
+ # Single-step --action exits the process right after push_step returns;
65
+ # join a short window so the daemon thread's last frame can land (§6 单步收尾).
66
+ self._join_on_exit = join_on_exit
67
+
68
+ def push_step(self, event: PlaygroundStepEvent) -> None:
69
+ """Best-effort push. Hands off to a daemon thread; the caller returns at
70
+ once (arch#3: never block the action hot path)."""
71
+ if not self.enabled:
72
+ return
73
+ t = threading.Thread(target=self._post, args=(event,), daemon=True)
74
+ t.start()
75
+ if self._join_on_exit:
76
+ # Single-step --action exits right after this returns; wait a bounded
77
+ # grace (≤ _JOIN_TIMEOUT) for the last frame to land. This is the hard
78
+ # ceiling on added latency to the contract path — never grow it past a
79
+ # human-imperceptible budget (HIGH-1 from review: 0.6s was too generous).
80
+ t.join(timeout=_JOIN_TIMEOUT)
81
+
82
+ def _post(self, event: PlaygroundStepEvent) -> None:
83
+ try:
84
+ requests.post(
85
+ f"{self.base_url}/api/step",
86
+ json=event.model_dump(),
87
+ timeout=_POST_TIMEOUT, # (connect, read) split: a hung playground can't stall us
88
+ )
89
+ except Exception as e: # ConnectionError / Timeout / anything — swallow (G5)
90
+ log.debug(f"[playground-sink] skip (playground unreachable): {e}")
91
+
92
+ @staticmethod
93
+ def encode_screenshot(adapter) -> str:
94
+ """Cross-platform: take_screenshot() -> bytes → base64. Can't grab → '' (degrade).
95
+
96
+ Platform-agnostic on purpose: all three adapters expose take_screenshot()
97
+ (base_adapter.py:17 abstract), so no per-platform branching is needed here.
98
+ """
99
+ try:
100
+ png = adapter.take_screenshot()
101
+ return base64.b64encode(png).decode() if png else ""
102
+ except Exception as e:
103
+ log.debug(f"[playground-sink] screenshot skip: {e}")
104
+ return ""
105
+
106
+ @staticmethod
107
+ def capture_dom_tree(adapter, platform: str) -> dict | None:
108
+ """Sidecar hierarchical tree from the SAME raw source the compressors use,
109
+ without touching them. Any failure → None (degrade, never crash the action).
110
+
111
+ web: build_web_tree(adapter.driver) (Playwright page.evaluate)
112
+ android: build_mobile_tree(driver.dump_hierarchy(), 'android')
113
+ ios: build_mobile_tree(driver.source(), 'ios')
114
+ """
115
+ try:
116
+ from playground.dom_capture import build_mobile_tree, build_web_tree
117
+
118
+ driver = adapter.driver
119
+ if platform == "web":
120
+ return build_web_tree(driver)
121
+ if platform == "android":
122
+ return build_mobile_tree(driver.dump_hierarchy(), "android")
123
+ if platform == "ios":
124
+ return build_mobile_tree(driver.source(), "ios")
125
+ return None
126
+ except Exception as e:
127
+ log.debug(f"[playground-sink] dom capture skip: {e}")
128
+ return None
129
+
130
+ def push_dom_tree(self, run_id: str, step_index: int, tree: dict) -> None:
131
+ """Fire-and-forget POST of the captured tree, DECOUPLED from push_step and
132
+ NEVER join-waited — a big tree body must never delay the lean step push or
133
+ the action's exit. Disabled sink → no-op."""
134
+ if not self.enabled:
135
+ return
136
+ threading.Thread(
137
+ target=self._post_dom, args=(run_id, step_index, tree), daemon=True
138
+ ).start()
139
+
140
+ def _post_dom(self, run_id: str, step_index: int, tree: dict) -> None:
141
+ try:
142
+ requests.post(
143
+ f"{self.base_url}/api/dom",
144
+ json={"run_id": run_id, "step_index": step_index, "tree": tree},
145
+ timeout=_DOM_POST_TIMEOUT,
146
+ )
147
+ except Exception as e: # swallow (G5)
148
+ log.debug(f"[playground-sink] dom skip (unreachable): {e}")
149
+
150
+
151
+ def build_step_event(
152
+ *,
153
+ run_key: str,
154
+ step_index: int,
155
+ action_data: dict,
156
+ result: dict,
157
+ screenshot_b64: str,
158
+ file_path: str = "",
159
+ has_dom_tree: bool = False,
160
+ ) -> PlaygroundStepEvent:
161
+ """MANDATORY single construction point for every step event (code#4).
162
+
163
+ All three entry points (action / workflow / main) build events ONLY through
164
+ here. Adding a field later (e.g. a seed timestamp) is then one edit, not three
165
+ — preventing the P9-style schema split where one call site silently drifts.
166
+
167
+ file_path is normalized to an absolute path HERE (one idiom, one place) rather
168
+ than at each call site, so it happens inside maybe_push_step's G5 try/except.
169
+ Empty stays empty — abspath('') would wrongly yield the cwd, and the frontend
170
+ treats '' as "no openable file" (disables the IDE button), so guard it.
171
+ """
172
+ return PlaygroundStepEvent(
173
+ run_id=run_key,
174
+ step_index=step_index,
175
+ code_lines=result.get("code_lines", []) or [],
176
+ action_description=result.get("action_description", ""),
177
+ action=action_data.get("action", ""),
178
+ locator_type=action_data.get("locator_type", ""),
179
+ locator_value=action_data.get("locator_value", ""),
180
+ extra_value=action_data.get("extra_value", ""),
181
+ success=bool(result.get("success", True)),
182
+ screenshot_b64=screenshot_b64,
183
+ file_path=os.path.abspath(file_path) if file_path else "",
184
+ has_dom_tree=has_dom_tree,
185
+ )
186
+
187
+
188
+ def build_sink_from_args(args, *, join_on_exit: bool = False) -> "PlaygroundSink":
189
+ """Construct a sink from parsed CLI args. Absent flags → disabled (zero cost)."""
190
+ return PlaygroundSink(
191
+ base_url=getattr(args, "playground_url", "") or DEFAULT_PLAYGROUND_URL,
192
+ enabled=bool(getattr(args, "playground_sink", False)),
193
+ join_on_exit=join_on_exit,
194
+ )
195
+
196
+
197
+ def maybe_push_step(
198
+ sink: "PlaygroundSink",
199
+ *,
200
+ args,
201
+ reporter,
202
+ adapter,
203
+ action_data: dict,
204
+ result: dict,
205
+ step_index: int | None = None,
206
+ file_path: str = "",
207
+ ) -> None:
208
+ """The ONE guarded entry point every call site uses (action / workflow / main).
209
+
210
+ Disabled-fast: returns before touching the adapter, so take_screenshot is
211
+ never called and there is zero device I/O or network on the hot path when the
212
+ sink is off. Wrapped in a blanket try/except as a belt-and-suspenders G5
213
+ guard — the bypass observer must never break the action it observes.
214
+ """
215
+ if not sink.enabled:
216
+ return
217
+ try:
218
+ run_key, resolved_index = resolve_playground_run_key(args, reporter)
219
+ idx = step_index if step_index is not None else resolved_index
220
+ tree = sink.capture_dom_tree(adapter, getattr(args, "platform", ""))
221
+ event = build_step_event(
222
+ run_key=run_key,
223
+ step_index=idx,
224
+ action_data=action_data,
225
+ result=result,
226
+ screenshot_b64=PlaygroundSink.encode_screenshot(adapter),
227
+ file_path=file_path,
228
+ has_dom_tree=tree is not None,
229
+ )
230
+ sink.push_step(event)
231
+ if tree is not None:
232
+ sink.push_dom_tree(run_key, idx, tree) # decoupled, never joined
233
+ except Exception as e: # never let visualization break the observed action
234
+ log.debug(f"[playground-sink] push skipped: {e}")
235
+
236
+
237
+ def resolve_playground_run_key(args, reporter) -> tuple[str, int]:
238
+ """Return (run_key, step_index) — the cross-process-stable playground timeline key.
239
+
240
+ Root cause (arch#1): run_reporter.py mints run_id as `timestamp_uuid`, unique
241
+ per short-lived process. In agent mode each --action is its own process, so
242
+ using reporter.run_id directly would shatter a 5-step flow into 5 single-step
243
+ buckets and the seed's timeline would be born broken.
244
+
245
+ --session-id present → use session_id as the key (one session = one timeline);
246
+ step_index comes from the session's persisted 'steps' counter (cli/session.py),
247
+ which dispatch.py increments AFTER each successful step, so steps+1 is the
248
+ 1-based index of the step about to be pushed.
249
+ No session → a bare --action is inherently single-step: reporter.run_id, index 1.
250
+ """
251
+ session_id = getattr(args, "session_id", "") or getattr(args, "session_end", "")
252
+ if session_id:
253
+ session = load_session(session_id)
254
+ return session_id, (session.get("steps", 0) + 1 if session else 1)
255
+ return reporter.run_id, 1
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "screenforge"
3
- version = "0.4.1"
3
+ version = "0.6.0"
4
4
  description = "AI-driven cross-platform UI automation engine with test script generation"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: screenforge
3
- Version: 0.4.1
3
+ Version: 0.6.0
4
4
  Summary: AI-driven cross-platform UI automation engine with test script generation
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/jhinzzz/ScreenForge
@@ -113,6 +113,7 @@ Each step: **inspect → decide → act → verify**. The AI decides, ScreenForg
113
113
  - **Visual fallback**: When DOM can't locate elements (Canvas, games), VLM parses screenshots
114
114
  - **MCP server**: Any MCP-compatible Agent can drive ScreenForge natively
115
115
  - **Structured output**: JSON Lines events + `report/runs/<id>/` artifacts for CI integration
116
+ - **Live Mirror playground**: Watch the generated pytest code grow line-by-line beside a live screenshot as the test runs — `screenforge --playground`. See the [Playground Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/playground-guide.md)
116
117
 
117
118
  ## Agent Integration (Claude Code / Cursor / Codex)
118
119
 
@@ -205,6 +206,7 @@ If ScreenForge generates tests for your project, add this badge to your README:
205
206
  | [MCP Setup (3 min)](https://github.com/jhinzzz/ScreenForge/blob/main/docs/mcp-setup.md) | Connect to Claude Desktop / Cursor / Cline / Claude Code |
206
207
  | [Agent Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/agent_guide.md) | Integration protocol for AI Agents |
207
208
  | [Capability Matrix](https://github.com/jhinzzz/ScreenForge/blob/main/docs/capability-matrix.md) | Supported platforms, actions, and locators |
209
+ | [Playground Guide](https://github.com/jhinzzz/ScreenForge/blob/main/docs/playground-guide.md) | Live Mirror — watch code + screenshots grow as the test runs |
208
210
  | [Workflow Examples](https://github.com/jhinzzz/ScreenForge/tree/main/docs/workflows) | YAML workflow templates |
209
211
  | [CHANGELOG](https://github.com/jhinzzz/ScreenForge/blob/main/CHANGELOG.md) | Version history |
210
212
 
@@ -6,6 +6,7 @@ cli/_version.py
6
6
  cli/dispatch.py
7
7
  cli/doctor.py
8
8
  cli/parser.py
9
+ cli/playground_sink.py
9
10
  cli/reporter.py
10
11
  cli/session.py
11
12
  cli/shared.py
@@ -70,6 +71,7 @@ tests/test_cli_action_json.py
70
71
  tests/test_codegen_quality.py
71
72
  tests/test_dispatch.py
72
73
  tests/test_doctor_orphan_browser.py
74
+ tests/test_dom_capture.py
73
75
  tests/test_error_codes.py
74
76
  tests/test_executor.py
75
77
  tests/test_failure_diagnosis.py
@@ -78,6 +80,9 @@ tests/test_ios_smoke_live.py
78
80
  tests/test_mcp_ref_cache.py
79
81
  tests/test_ml_optional.py
80
82
  tests/test_parser.py
83
+ tests/test_playground_app.py
84
+ tests/test_playground_sink.py
85
+ tests/test_playground_sink_integration.py
81
86
  tests/test_run_reporter.py
82
87
  tests/test_run_resume.py
83
88
  tests/test_runtime_modes.py
@@ -0,0 +1,136 @@
1
+ """Tests for playground/dom_capture.py — the sidecar HIERARCHICAL tree builders.
2
+
3
+ Unlike utils/utils_xml.py (which flattens for the LLM), these preserve parent/
4
+ child so the playground can render a real tree. They REUSE utils_xml predicates
5
+ (never modify them) and degrade to None on any failure (never crash the sink).
6
+ """
7
+
8
+ from playground.dom_capture import build_mobile_tree, build_web_tree # noqa: F401
9
+
10
+
11
+ class TestBuildMobileTree:
12
+ def test_returns_none_on_parse_error(self):
13
+ assert build_mobile_tree("<<not xml", "android") is None
14
+
15
+ def test_empty_hierarchy_yields_none(self):
16
+ # An empty hierarchy has no surviving elements ⇒ no tree (keeps has_dom_tree
17
+ # truthful: the pip must not light when there is nothing to show).
18
+ xml = '<hierarchy rotation="0"></hierarchy>'
19
+ assert build_mobile_tree(xml, "android") is None
20
+
21
+ def test_ios_xcuitest_xml_yields_none_today(self):
22
+ # iOS WDA returns XCUITest XML (XCUIElementType* tags + name/label/value/type),
23
+ # which the Android predicates don't recognize → every node filtered → None.
24
+ # This pins the HONEST v1 boundary: iOS DOM-tree capture is not yet supported.
25
+ ios_xml = (
26
+ '<XCUIElementTypeApplication name="MyApp" label="MyApp">'
27
+ '<XCUIElementTypeButton name="login" label="登录" value="" type="XCUIElementTypeButton"/>'
28
+ '</XCUIElementTypeApplication>'
29
+ )
30
+ assert build_mobile_tree(ios_xml, "ios") is None
31
+
32
+ def test_single_clickable_node_emitted(self):
33
+ xml = (
34
+ '<hierarchy rotation="0">'
35
+ '<node class="android.widget.Button" text="Login" clickable="true"/>'
36
+ '</hierarchy>'
37
+ )
38
+ tree = build_mobile_tree(xml, "android")
39
+ assert tree["platform"] == "android"
40
+ assert len(tree["nodes"]) == 1
41
+ n = tree["nodes"][0]
42
+ assert n["class"] == "Button"
43
+ assert n["text"] == "Login"
44
+ assert n["clickable"] is True
45
+ assert n["children"] == []
46
+
47
+ def test_hierarchy_is_preserved_not_flattened(self):
48
+ # A clickable container with a labeled child: the tree keeps the nesting
49
+ # (the FLAT compressor would emit them as siblings; we must not).
50
+ xml = (
51
+ '<hierarchy rotation="0">'
52
+ '<node class="android.widget.LinearLayout" text="Settings" clickable="true">'
53
+ ' <node class="android.widget.TextView" text="Wi-Fi"/>'
54
+ '</node>'
55
+ '</hierarchy>'
56
+ )
57
+ tree = build_mobile_tree(xml, "android")
58
+ assert len(tree["nodes"]) == 1
59
+ parent = tree["nodes"][0]
60
+ assert parent["text"] == "Settings"
61
+ assert len(parent["children"]) == 1
62
+ assert parent["children"][0]["text"] == "Wi-Fi"
63
+
64
+ def test_dead_wrapper_collapses_lifting_children(self):
65
+ # A non-surviving wrapper (no text/desc/clickable/disabled) must NOT appear;
66
+ # its surviving child lifts to the wrapper's parent level.
67
+ xml = (
68
+ '<hierarchy rotation="0">'
69
+ '<node class="android.widget.FrameLayout">'
70
+ ' <node class="android.widget.Button" text="OK" clickable="true"/>'
71
+ '</node>'
72
+ '</hierarchy>'
73
+ )
74
+ tree = build_mobile_tree(xml, "android")
75
+ assert len(tree["nodes"]) == 1
76
+ assert tree["nodes"][0]["text"] == "OK" # lifted, wrapper gone
77
+
78
+ def test_disabled_emitted_without_clickable(self):
79
+ xml = (
80
+ '<hierarchy rotation="0">'
81
+ '<node class="android.widget.Button" text="Send" enabled="false"/>'
82
+ '</hierarchy>'
83
+ )
84
+ tree = build_mobile_tree(xml, "android")
85
+ n = tree["nodes"][0]
86
+ assert n["disabled"] is True
87
+ assert "clickable" not in n
88
+
89
+ def test_full_resource_id_emitted(self):
90
+ xml = (
91
+ '<hierarchy rotation="0">'
92
+ '<node class="android.widget.Button" text="Go" clickable="true" '
93
+ 'resource-id="com.app:id/go_btn"/>'
94
+ '</hierarchy>'
95
+ )
96
+ tree = build_mobile_tree(xml, "android")
97
+ assert tree["nodes"][0]["id"] == "com.app:id/go_btn"
98
+
99
+ def test_no_ref_and_no_bbox_on_mobile(self):
100
+ xml = (
101
+ '<hierarchy rotation="0">'
102
+ '<node class="android.widget.Button" text="X" clickable="true" '
103
+ 'bounds="[0,0][100,50]"/>'
104
+ '</hierarchy>'
105
+ )
106
+ n = build_mobile_tree(xml, "android")["nodes"][0]
107
+ assert "ref" not in n
108
+ assert "x" not in n and "w" not in n # honest: mobile has no bbox in this shape
109
+
110
+
111
+ class _FakePage:
112
+ def __init__(self, result=None, raises=None):
113
+ self._result = result
114
+ self._raises = raises
115
+
116
+ def evaluate(self, _js):
117
+ if self._raises:
118
+ raise self._raises
119
+ return self._result
120
+
121
+
122
+ class TestBuildWebTree:
123
+ def test_passthrough_wellformed_result(self):
124
+ page = _FakePage(result={"nodes": [{"ref": "@1", "class": "button", "children": []}]})
125
+ tree = build_web_tree(page)
126
+ assert tree["platform"] == "web"
127
+ assert tree["nodes"][0]["ref"] == "@1"
128
+
129
+ def test_none_when_result_missing_nodes(self):
130
+ assert build_web_tree(_FakePage(result={"oops": 1})) is None
131
+
132
+ def test_none_when_result_not_a_dict(self):
133
+ assert build_web_tree(_FakePage(result="not a dict")) is None
134
+
135
+ def test_none_when_evaluate_raises(self):
136
+ assert build_web_tree(_FakePage(raises=RuntimeError("page closed"))) is None
@@ -144,3 +144,15 @@ class TestValidateCliArgs:
144
144
  def test_demo_valid(self):
145
145
  args = self._make_args(goal="", demo=True)
146
146
  validate_cli_args(args)
147
+
148
+ def test_playground_alone_valid(self):
149
+ # Regression: --playground starts a standalone server (dispatch handles it
150
+ # AFTER this validation, like --init/--demo) and needs no goal/action.
151
+ # Before the fix it raised "Must provide --goal/--workflow/--action",
152
+ # making the live-mirror entry point impossible to launch from the CLI.
153
+ args = self._make_args(goal="", playground=True)
154
+ validate_cli_args(args)
155
+
156
+ def test_playground_with_port_valid(self):
157
+ args = self._make_args(goal="", playground=True, playground_port=8000)
158
+ validate_cli_args(args)