evizi-kit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. package/README.md +506 -0
  2. package/kits/agent/.agent/skills/claude-code-subagent-creator/SKILL.md +292 -0
  3. package/kits/agent/.agent/skills/claude-code-subagent-creator/references/claude-code-subagent-configuration.md +158 -0
  4. package/kits/agent/.agent/skills/claude-code-subagent-creator/templates/subagent-profile.template.md +26 -0
  5. package/kits/agent/.agent/skills/skill-creator/LICENSE.txt +202 -0
  6. package/kits/agent/.agent/skills/skill-creator/SKILL.md +485 -0
  7. package/kits/agent/.agent/skills/skill-creator/agents/analyzer.md +274 -0
  8. package/kits/agent/.agent/skills/skill-creator/agents/comparator.md +202 -0
  9. package/kits/agent/.agent/skills/skill-creator/agents/grader.md +223 -0
  10. package/kits/agent/.agent/skills/skill-creator/assets/eval_review.html +146 -0
  11. package/kits/agent/.agent/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  12. package/kits/agent/.agent/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  13. package/kits/agent/.agent/skills/skill-creator/references/schemas.md +430 -0
  14. package/kits/agent/.agent/skills/skill-creator/scripts/__init__.py +0 -0
  15. package/kits/agent/.agent/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  16. package/kits/agent/.agent/skills/skill-creator/scripts/generate_report.py +326 -0
  17. package/kits/agent/.agent/skills/skill-creator/scripts/improve_description.py +247 -0
  18. package/kits/agent/.agent/skills/skill-creator/scripts/package_skill.py +136 -0
  19. package/kits/agent/.agent/skills/skill-creator/scripts/quick_validate.py +103 -0
  20. package/kits/agent/.agent/skills/skill-creator/scripts/run_eval.py +310 -0
  21. package/kits/agent/.agent/skills/skill-creator/scripts/run_loop.py +328 -0
  22. package/kits/agent/.agent/skills/skill-creator/scripts/utils.py +47 -0
  23. package/kits/agent/manifest.json +10 -0
  24. package/kits/claude/.claude/agents/code-pusher.md +46 -0
  25. package/kits/claude/.claude/agents/feature-document-updater.md +37 -0
  26. package/kits/claude/.claude/agents/self-reviewer.md +32 -0
  27. package/kits/claude/.claude/agents/web-auto-agentic-workflow-initializer.md +42 -0
  28. package/kits/claude/.claude/agents/web-auto-assisted-fix-and-runner.md +36 -0
  29. package/kits/claude/.claude/agents/web-auto-chrome-devtools-selector-extractor.md +36 -0
  30. package/kits/claude/.claude/agents/web-auto-coder.md +33 -0
  31. package/kits/claude/.claude/agents/web-auto-fe-selector-extractor.md +31 -0
  32. package/kits/claude/.claude/agents/web-auto-fix-and-runner.md +35 -0
  33. package/kits/claude/.claude/agents/web-auto-lessons-learned-extractor.md +34 -0
  34. package/kits/claude/.claude/agents/web-auto-playwright-mcp-selector-extractor.md +37 -0
  35. package/kits/claude/.claude/agents/web-auto-source-instructions-updater.md +43 -0
  36. package/kits/claude/.claude/agents/web-auto-test-cases-generator.md +29 -0
  37. package/kits/claude/.claude/agents/web-auto-ticket-designer.md +35 -0
  38. package/kits/claude/.claude/agents/web-auto-ticket-playbook-planner.md +36 -0
  39. package/kits/claude/.claude/agents/web-auto.md +382 -0
  40. package/kits/claude/.claude/skills/claude-code-subagent-creator/SKILL.md +292 -0
  41. package/kits/claude/.claude/skills/claude-code-subagent-creator/references/claude-code-subagent-configuration.md +158 -0
  42. package/kits/claude/.claude/skills/claude-code-subagent-creator/templates/subagent-profile.template.md +26 -0
  43. package/kits/claude/.claude/skills/skill-creator/LICENSE.txt +202 -0
  44. package/kits/claude/.claude/skills/skill-creator/SKILL.md +485 -0
  45. package/kits/claude/.claude/skills/skill-creator/agents/analyzer.md +274 -0
  46. package/kits/claude/.claude/skills/skill-creator/agents/comparator.md +202 -0
  47. package/kits/claude/.claude/skills/skill-creator/agents/grader.md +223 -0
  48. package/kits/claude/.claude/skills/skill-creator/assets/eval_review.html +146 -0
  49. package/kits/claude/.claude/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  50. package/kits/claude/.claude/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  51. package/kits/claude/.claude/skills/skill-creator/references/schemas.md +430 -0
  52. package/kits/claude/.claude/skills/skill-creator/scripts/__init__.py +0 -0
  53. package/kits/claude/.claude/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  54. package/kits/claude/.claude/skills/skill-creator/scripts/generate_report.py +326 -0
  55. package/kits/claude/.claude/skills/skill-creator/scripts/improve_description.py +247 -0
  56. package/kits/claude/.claude/skills/skill-creator/scripts/package_skill.py +136 -0
  57. package/kits/claude/.claude/skills/skill-creator/scripts/quick_validate.py +103 -0
  58. package/kits/claude/.claude/skills/skill-creator/scripts/run_eval.py +310 -0
  59. package/kits/claude/.claude/skills/skill-creator/scripts/run_loop.py +328 -0
  60. package/kits/claude/.claude/skills/skill-creator/scripts/utils.py +47 -0
  61. package/kits/claude/manifest.json +10 -0
  62. package/kits/cursor/.cursor/agents/code-pusher.agent.md +43 -0
  63. package/kits/cursor/.cursor/agents/feature-document-updater.agent.md +34 -0
  64. package/kits/cursor/.cursor/agents/self-reviewer.agent.md +29 -0
  65. package/kits/cursor/.cursor/agents/web-auto-agentic-workflow-initializer.agent.md +37 -0
  66. package/kits/cursor/.cursor/agents/web-auto-assisted-fix-and-runner.agent.md +33 -0
  67. package/kits/cursor/.cursor/agents/web-auto-chrome-devtools-selector-extractor.agent.md +31 -0
  68. package/kits/cursor/.cursor/agents/web-auto-coder.agent.md +30 -0
  69. package/kits/cursor/.cursor/agents/web-auto-fe-selector-extractor.agent.md +28 -0
  70. package/kits/cursor/.cursor/agents/web-auto-fix-and-runner.agent.md +32 -0
  71. package/kits/cursor/.cursor/agents/web-auto-lessons-learned-extractor.agent.md +31 -0
  72. package/kits/cursor/.cursor/agents/web-auto-playwright-mcp-selector-extractor.agent.md +32 -0
  73. package/kits/cursor/.cursor/agents/web-auto-source-instructions-updater.agent.md +40 -0
  74. package/kits/cursor/.cursor/agents/web-auto-test-cases-generator.agent.md +26 -0
  75. package/kits/cursor/.cursor/agents/web-auto-ticket-designer.agent.md +32 -0
  76. package/kits/cursor/.cursor/agents/web-auto-ticket-playbook-planner.agent.md +33 -0
  77. package/kits/cursor/.cursor/agents/web-auto.agent.md +379 -0
  78. package/kits/cursor/.cursor/skills/claude-code-subagent-creator/SKILL.md +292 -0
  79. package/kits/cursor/.cursor/skills/claude-code-subagent-creator/references/claude-code-subagent-configuration.md +158 -0
  80. package/kits/cursor/.cursor/skills/claude-code-subagent-creator/templates/subagent-profile.template.md +26 -0
  81. package/kits/cursor/.cursor/skills/skill-creator/LICENSE.txt +202 -0
  82. package/kits/cursor/.cursor/skills/skill-creator/SKILL.md +485 -0
  83. package/kits/cursor/.cursor/skills/skill-creator/agents/analyzer.md +274 -0
  84. package/kits/cursor/.cursor/skills/skill-creator/agents/comparator.md +202 -0
  85. package/kits/cursor/.cursor/skills/skill-creator/agents/grader.md +223 -0
  86. package/kits/cursor/.cursor/skills/skill-creator/assets/eval_review.html +146 -0
  87. package/kits/cursor/.cursor/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  88. package/kits/cursor/.cursor/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  89. package/kits/cursor/.cursor/skills/skill-creator/references/schemas.md +430 -0
  90. package/kits/cursor/.cursor/skills/skill-creator/scripts/__init__.py +0 -0
  91. package/kits/cursor/.cursor/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  92. package/kits/cursor/.cursor/skills/skill-creator/scripts/generate_report.py +326 -0
  93. package/kits/cursor/.cursor/skills/skill-creator/scripts/improve_description.py +247 -0
  94. package/kits/cursor/.cursor/skills/skill-creator/scripts/package_skill.py +136 -0
  95. package/kits/cursor/.cursor/skills/skill-creator/scripts/quick_validate.py +103 -0
  96. package/kits/cursor/.cursor/skills/skill-creator/scripts/run_eval.py +310 -0
  97. package/kits/cursor/.cursor/skills/skill-creator/scripts/run_loop.py +328 -0
  98. package/kits/cursor/.cursor/skills/skill-creator/scripts/utils.py +47 -0
  99. package/kits/cursor/manifest.json +10 -0
  100. package/kits/github/.github/agents/code-pusher.agent.md +45 -0
  101. package/kits/github/.github/agents/feature-document-updater.agent.md +36 -0
  102. package/kits/github/.github/agents/self-reviewer.agent.md +31 -0
  103. package/kits/github/.github/agents/web-auto-agentic-workflow-initializer.agent.md +39 -0
  104. package/kits/github/.github/agents/web-auto-assisted-fix-and-runner.agent.md +35 -0
  105. package/kits/github/.github/agents/web-auto-chrome-devtools-selector-extractor.agent.md +33 -0
  106. package/kits/github/.github/agents/web-auto-coder.agent.md +32 -0
  107. package/kits/github/.github/agents/web-auto-fe-selector-extractor.agent.md +30 -0
  108. package/kits/github/.github/agents/web-auto-fix-and-runner.agent.md +34 -0
  109. package/kits/github/.github/agents/web-auto-lessons-learned-extractor.agent.md +33 -0
  110. package/kits/github/.github/agents/web-auto-playwright-mcp-selector-extractor.agent.md +34 -0
  111. package/kits/github/.github/agents/web-auto-source-instructions-updater.agent.md +42 -0
  112. package/kits/github/.github/agents/web-auto-test-cases-generator.agent.md +28 -0
  113. package/kits/github/.github/agents/web-auto-ticket-designer.agent.md +34 -0
  114. package/kits/github/.github/agents/web-auto-ticket-playbook-creator.agent.md +35 -0
  115. package/kits/github/.github/agents/web-auto.agent.md +382 -0
  116. package/kits/github/.github/skills/claude-code-subagent-creator/SKILL.md +310 -0
  117. package/kits/github/.github/skills/claude-code-subagent-creator/references/claude-code-subagent-configuration.md +158 -0
  118. package/kits/github/.github/skills/claude-code-subagent-creator/templates/subagent-profile.template.md +37 -0
  119. package/kits/github/.github/skills/skill-creator/LICENSE.txt +202 -0
  120. package/kits/github/.github/skills/skill-creator/SKILL.md +485 -0
  121. package/kits/github/.github/skills/skill-creator/agents/analyzer.md +274 -0
  122. package/kits/github/.github/skills/skill-creator/agents/comparator.md +202 -0
  123. package/kits/github/.github/skills/skill-creator/agents/grader.md +223 -0
  124. package/kits/github/.github/skills/skill-creator/assets/eval_review.html +146 -0
  125. package/kits/github/.github/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  126. package/kits/github/.github/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  127. package/kits/github/.github/skills/skill-creator/references/schemas.md +430 -0
  128. package/kits/github/.github/skills/skill-creator/scripts/__init__.py +0 -0
  129. package/kits/github/.github/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  130. package/kits/github/.github/skills/skill-creator/scripts/generate_report.py +326 -0
  131. package/kits/github/.github/skills/skill-creator/scripts/improve_description.py +247 -0
  132. package/kits/github/.github/skills/skill-creator/scripts/package_skill.py +136 -0
  133. package/kits/github/.github/skills/skill-creator/scripts/quick_validate.py +103 -0
  134. package/kits/github/.github/skills/skill-creator/scripts/run_eval.py +310 -0
  135. package/kits/github/.github/skills/skill-creator/scripts/run_loop.py +328 -0
  136. package/kits/github/.github/skills/skill-creator/scripts/utils.py +47 -0
  137. package/kits/github/manifest.json +10 -0
  138. package/kits/shared/docs/ai-code-review.md +440 -0
  139. package/kits/shared/docs/increase-unit-test-coverage.md +77 -0
  140. package/kits/shared/docs/pr-review-agent.md +501 -0
  141. package/kits/shared/docs/self-review-agent.md +246 -0
  142. package/kits/shared/docs/web-auto-agentic-workflow.md +506 -0
  143. package/kits/shared/manifest.json +11 -0
  144. package/kits/shared/skills/fix-automation-tests/SKILL.md +280 -0
  145. package/kits/shared/skills/fix-automation-tests/scripts/fetch_pr_changes.py +300 -0
  146. package/kits/shared/skills/fix-automation-tests/templates/impact-report.template.md +42 -0
  147. package/kits/shared/skills/increase-unit-test-coverage/SKILL.md +117 -0
  148. package/kits/shared/skills/increase-unit-test-coverage/scripts/filter_low_coverage.py +447 -0
  149. package/kits/shared/skills/pr-review/SKILL.md +200 -0
  150. package/kits/shared/skills/pr-review/references/automation.md +62 -0
  151. package/kits/shared/skills/pr-review/references/backend.md +95 -0
  152. package/kits/shared/skills/pr-review/references/frontend.md +103 -0
  153. package/kits/shared/skills/pr-review/references/mobile.md +108 -0
  154. package/kits/shared/skills/pr-review/references/output-schema.md +130 -0
  155. package/kits/shared/skills/pr-review/scripts/post-review.py +1395 -0
  156. package/kits/shared/skills/push-code/SKILL.md +176 -0
  157. package/kits/shared/skills/self-review/SKILL.md +234 -0
  158. package/kits/shared/skills/self-review/evals/evals.json +23 -0
  159. package/kits/shared/skills/self-review/references/automation.md +62 -0
  160. package/kits/shared/skills/self-review/references/backend.md +95 -0
  161. package/kits/shared/skills/self-review/references/frontend.md +103 -0
  162. package/kits/shared/skills/self-review/references/mobile.md +108 -0
  163. package/kits/shared/skills/self-review/templates/issues.template.md +72 -0
  164. package/kits/shared/skills/update-feature-document/SKILL.md +156 -0
  165. package/kits/shared/skills/update-feature-document/templates/delta.template.yaml +58 -0
  166. package/kits/shared/skills/update-feature-document/templates/feature.template.md +25 -0
  167. package/kits/shared/skills/web-auto-assisted-fix-and-run/SKILL.md +130 -0
  168. package/kits/shared/skills/web-auto-assisted-fix-and-run/references/resolve-api-error.md +108 -0
  169. package/kits/shared/skills/web-auto-assisted-fix-and-run/references/resolve-selector.md +60 -0
  170. package/kits/shared/skills/web-auto-assisted-fix-and-run/templates/issues-resolution-report-append.template.md +54 -0
  171. package/kits/shared/skills/web-auto-chrome-devtools-mcp-extract-selectors/SKILL.md +284 -0
  172. package/kits/shared/skills/web-auto-coding/SKILL.md +152 -0
  173. package/kits/shared/skills/web-auto-extract-lessons-learned/SKILL.md +168 -0
  174. package/kits/shared/skills/web-auto-extract-lessons-learned/templates/lessons-learned.template.md +115 -0
  175. package/kits/shared/skills/web-auto-fe-extract-selectors/SKILL.md +282 -0
  176. package/kits/shared/skills/web-auto-fe-extract-selectors/evals/evals.json +23 -0
  177. package/kits/shared/skills/web-auto-fix-and-run-test/SKILL.md +183 -0
  178. package/kits/shared/skills/web-auto-fix-and-run-test/templates/issues-resolution-report.template.md +77 -0
  179. package/kits/shared/skills/web-auto-generate-best-practices/SKILL.md +123 -0
  180. package/kits/shared/skills/web-auto-generate-instructions/SKILL.md +200 -0
  181. package/kits/shared/skills/web-auto-generate-instructions/evals/evals.json +23 -0
  182. package/kits/shared/skills/web-auto-generate-instructions/references/analysis-guide.md +145 -0
  183. package/kits/shared/skills/web-auto-generate-instructions/templates/web-auto-instructions.template.md +184 -0
  184. package/kits/shared/skills/web-auto-generate-project-blueprint/SKILL.md +181 -0
  185. package/kits/shared/skills/web-auto-generate-project-blueprint/evals/evals.json +57 -0
  186. package/kits/shared/skills/web-auto-generate-project-blueprint/templates/web-auto-project-blueprint.template.md +161 -0
  187. package/kits/shared/skills/web-auto-playwright-mcp-extract-selectors/SKILL.md +293 -0
  188. package/kits/shared/skills/web-auto-test-cases/SKILL.md +138 -0
  189. package/kits/shared/skills/web-auto-test-cases/evals/evals.json +129 -0
  190. package/kits/shared/skills/web-auto-test-cases/templates/test-cases.template.md +53 -0
  191. package/kits/shared/skills/web-auto-ticket-design/SKILL.md +199 -0
  192. package/kits/shared/skills/web-auto-ticket-design/templates/ticket-design.template.md +138 -0
  193. package/kits/shared/skills/web-auto-ticket-playbook/SKILL.md +218 -0
  194. package/kits/shared/skills/web-auto-ticket-playbook/evals/evals.json +23 -0
  195. package/kits/shared/skills/web-auto-ticket-playbook/templates/ticket-playbook.template.md +148 -0
  196. package/kits/shared/skills/web-auto-update-source-instructions/SKILL.md +156 -0
  197. package/kits/shared/skills/web-auto-update-source-instructions/evals/evals.json +22 -0
  198. package/kits/shared/skills/workspace-ai-nav-creator/SKILL.md +168 -0
  199. package/kits/shared/skills/workspace-ai-nav-creator/templates/agents-md.template.md +112 -0
  200. package/kits/shared/skills/workspace-ai-nav-creator/templates/claude-md.template.md +86 -0
  201. package/package.json +16 -0
@@ -0,0 +1,328 @@
1
+ #!/usr/bin/env python3
2
+ """Run the eval + improve loop until all pass or max iterations reached.
3
+
4
+ Combines run_eval.py and improve_description.py in a loop, tracking history
5
+ and returning the best description found. Supports train/test split to prevent
6
+ overfitting.
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import random
12
+ import sys
13
+ import tempfile
14
+ import time
15
+ import webbrowser
16
+ from pathlib import Path
17
+
18
+ from scripts.generate_report import generate_html
19
+ from scripts.improve_description import improve_description
20
+ from scripts.run_eval import find_project_root, run_eval
21
+ from scripts.utils import parse_skill_md
22
+
23
+
24
+ def split_eval_set(eval_set: list[dict], holdout: float, seed: int = 42) -> tuple[list[dict], list[dict]]:
25
+ """Split eval set into train and test sets, stratified by should_trigger."""
26
+ random.seed(seed)
27
+
28
+ # Separate by should_trigger
29
+ trigger = [e for e in eval_set if e["should_trigger"]]
30
+ no_trigger = [e for e in eval_set if not e["should_trigger"]]
31
+
32
+ # Shuffle each group
33
+ random.shuffle(trigger)
34
+ random.shuffle(no_trigger)
35
+
36
+ # Calculate split points
37
+ n_trigger_test = max(1, int(len(trigger) * holdout))
38
+ n_no_trigger_test = max(1, int(len(no_trigger) * holdout))
39
+
40
+ # Split
41
+ test_set = trigger[:n_trigger_test] + no_trigger[:n_no_trigger_test]
42
+ train_set = trigger[n_trigger_test:] + no_trigger[n_no_trigger_test:]
43
+
44
+ return train_set, test_set
45
+
46
+
47
+ def run_loop(
48
+ eval_set: list[dict],
49
+ skill_path: Path,
50
+ description_override: str | None,
51
+ num_workers: int,
52
+ timeout: int,
53
+ max_iterations: int,
54
+ runs_per_query: int,
55
+ trigger_threshold: float,
56
+ holdout: float,
57
+ model: str,
58
+ verbose: bool,
59
+ live_report_path: Path | None = None,
60
+ log_dir: Path | None = None,
61
+ ) -> dict:
62
+ """Run the eval + improvement loop."""
63
+ project_root = find_project_root()
64
+ name, original_description, content = parse_skill_md(skill_path)
65
+ current_description = description_override or original_description
66
+
67
+ # Split into train/test if holdout > 0
68
+ if holdout > 0:
69
+ train_set, test_set = split_eval_set(eval_set, holdout)
70
+ if verbose:
71
+ print(f"Split: {len(train_set)} train, {len(test_set)} test (holdout={holdout})", file=sys.stderr)
72
+ else:
73
+ train_set = eval_set
74
+ test_set = []
75
+
76
+ history = []
77
+ exit_reason = "unknown"
78
+
79
+ for iteration in range(1, max_iterations + 1):
80
+ if verbose:
81
+ print(f"\n{'='*60}", file=sys.stderr)
82
+ print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr)
83
+ print(f"Description: {current_description}", file=sys.stderr)
84
+ print(f"{'='*60}", file=sys.stderr)
85
+
86
+ # Evaluate train + test together in one batch for parallelism
87
+ all_queries = train_set + test_set
88
+ t0 = time.time()
89
+ all_results = run_eval(
90
+ eval_set=all_queries,
91
+ skill_name=name,
92
+ description=current_description,
93
+ num_workers=num_workers,
94
+ timeout=timeout,
95
+ project_root=project_root,
96
+ runs_per_query=runs_per_query,
97
+ trigger_threshold=trigger_threshold,
98
+ model=model,
99
+ )
100
+ eval_elapsed = time.time() - t0
101
+
102
+ # Split results back into train/test by matching queries
103
+ train_queries_set = {q["query"] for q in train_set}
104
+ train_result_list = [r for r in all_results["results"] if r["query"] in train_queries_set]
105
+ test_result_list = [r for r in all_results["results"] if r["query"] not in train_queries_set]
106
+
107
+ train_passed = sum(1 for r in train_result_list if r["pass"])
108
+ train_total = len(train_result_list)
109
+ train_summary = {"passed": train_passed, "failed": train_total - train_passed, "total": train_total}
110
+ train_results = {"results": train_result_list, "summary": train_summary}
111
+
112
+ if test_set:
113
+ test_passed = sum(1 for r in test_result_list if r["pass"])
114
+ test_total = len(test_result_list)
115
+ test_summary = {"passed": test_passed, "failed": test_total - test_passed, "total": test_total}
116
+ test_results = {"results": test_result_list, "summary": test_summary}
117
+ else:
118
+ test_results = None
119
+ test_summary = None
120
+
121
+ history.append({
122
+ "iteration": iteration,
123
+ "description": current_description,
124
+ "train_passed": train_summary["passed"],
125
+ "train_failed": train_summary["failed"],
126
+ "train_total": train_summary["total"],
127
+ "train_results": train_results["results"],
128
+ "test_passed": test_summary["passed"] if test_summary else None,
129
+ "test_failed": test_summary["failed"] if test_summary else None,
130
+ "test_total": test_summary["total"] if test_summary else None,
131
+ "test_results": test_results["results"] if test_results else None,
132
+ # For backward compat with report generator
133
+ "passed": train_summary["passed"],
134
+ "failed": train_summary["failed"],
135
+ "total": train_summary["total"],
136
+ "results": train_results["results"],
137
+ })
138
+
139
+ # Write live report if path provided
140
+ if live_report_path:
141
+ partial_output = {
142
+ "original_description": original_description,
143
+ "best_description": current_description,
144
+ "best_score": "in progress",
145
+ "iterations_run": len(history),
146
+ "holdout": holdout,
147
+ "train_size": len(train_set),
148
+ "test_size": len(test_set),
149
+ "history": history,
150
+ }
151
+ live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name))
152
+
153
+ if verbose:
154
+ def print_eval_stats(label, results, elapsed):
155
+ pos = [r for r in results if r["should_trigger"]]
156
+ neg = [r for r in results if not r["should_trigger"]]
157
+ tp = sum(r["triggers"] for r in pos)
158
+ pos_runs = sum(r["runs"] for r in pos)
159
+ fn = pos_runs - tp
160
+ fp = sum(r["triggers"] for r in neg)
161
+ neg_runs = sum(r["runs"] for r in neg)
162
+ tn = neg_runs - fp
163
+ total = tp + tn + fp + fn
164
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0
165
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0
166
+ accuracy = (tp + tn) / total if total > 0 else 0.0
167
+ print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr)
168
+ for r in results:
169
+ status = "PASS" if r["pass"] else "FAIL"
170
+ rate_str = f"{r['triggers']}/{r['runs']}"
171
+ print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr)
172
+
173
+ print_eval_stats("Train", train_results["results"], eval_elapsed)
174
+ if test_summary:
175
+ print_eval_stats("Test ", test_results["results"], 0)
176
+
177
+ if train_summary["failed"] == 0:
178
+ exit_reason = f"all_passed (iteration {iteration})"
179
+ if verbose:
180
+ print(f"\nAll train queries passed on iteration {iteration}!", file=sys.stderr)
181
+ break
182
+
183
+ if iteration == max_iterations:
184
+ exit_reason = f"max_iterations ({max_iterations})"
185
+ if verbose:
186
+ print(f"\nMax iterations reached ({max_iterations}).", file=sys.stderr)
187
+ break
188
+
189
+ # Improve the description based on train results
190
+ if verbose:
191
+ print(f"\nImproving description...", file=sys.stderr)
192
+
193
+ t0 = time.time()
194
+ # Strip test scores from history so improvement model can't see them
195
+ blinded_history = [
196
+ {k: v for k, v in h.items() if not k.startswith("test_")}
197
+ for h in history
198
+ ]
199
+ new_description = improve_description(
200
+ skill_name=name,
201
+ skill_content=content,
202
+ current_description=current_description,
203
+ eval_results=train_results,
204
+ history=blinded_history,
205
+ model=model,
206
+ log_dir=log_dir,
207
+ iteration=iteration,
208
+ )
209
+ improve_elapsed = time.time() - t0
210
+
211
+ if verbose:
212
+ print(f"Proposed ({improve_elapsed:.1f}s): {new_description}", file=sys.stderr)
213
+
214
+ current_description = new_description
215
+
216
+ # Find the best iteration by TEST score (or train if no test set)
217
+ if test_set:
218
+ best = max(history, key=lambda h: h["test_passed"] or 0)
219
+ best_score = f"{best['test_passed']}/{best['test_total']}"
220
+ else:
221
+ best = max(history, key=lambda h: h["train_passed"])
222
+ best_score = f"{best['train_passed']}/{best['train_total']}"
223
+
224
+ if verbose:
225
+ print(f"\nExit reason: {exit_reason}", file=sys.stderr)
226
+ print(f"Best score: {best_score} (iteration {best['iteration']})", file=sys.stderr)
227
+
228
+ return {
229
+ "exit_reason": exit_reason,
230
+ "original_description": original_description,
231
+ "best_description": best["description"],
232
+ "best_score": best_score,
233
+ "best_train_score": f"{best['train_passed']}/{best['train_total']}",
234
+ "best_test_score": f"{best['test_passed']}/{best['test_total']}" if test_set else None,
235
+ "final_description": current_description,
236
+ "iterations_run": len(history),
237
+ "holdout": holdout,
238
+ "train_size": len(train_set),
239
+ "test_size": len(test_set),
240
+ "history": history,
241
+ }
242
+
243
+
244
+ def main():
245
+ parser = argparse.ArgumentParser(description="Run eval + improve loop")
246
+ parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
247
+ parser.add_argument("--skill-path", required=True, help="Path to skill directory")
248
+ parser.add_argument("--description", default=None, help="Override starting description")
249
+ parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
250
+ parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
251
+ parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations")
252
+ parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
253
+ parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
254
+ parser.add_argument("--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)")
255
+ parser.add_argument("--model", required=True, help="Model for improvement")
256
+ parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
257
+ parser.add_argument("--report", default="auto", help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)")
258
+ parser.add_argument("--results-dir", default=None, help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here")
259
+ args = parser.parse_args()
260
+
261
+ eval_set = json.loads(Path(args.eval_set).read_text())
262
+ skill_path = Path(args.skill_path)
263
+
264
+ if not (skill_path / "SKILL.md").exists():
265
+ print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
266
+ sys.exit(1)
267
+
268
+ name, _, _ = parse_skill_md(skill_path)
269
+
270
+ # Set up live report path
271
+ if args.report != "none":
272
+ if args.report == "auto":
273
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
274
+ live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
275
+ else:
276
+ live_report_path = Path(args.report)
277
+ # Open the report immediately so the user can watch
278
+ live_report_path.write_text("<html><body><h1>Starting optimization loop...</h1><meta http-equiv='refresh' content='5'></body></html>")
279
+ webbrowser.open(str(live_report_path))
280
+ else:
281
+ live_report_path = None
282
+
283
+ # Determine output directory (create before run_loop so logs can be written)
284
+ if args.results_dir:
285
+ timestamp = time.strftime("%Y-%m-%d_%H%M%S")
286
+ results_dir = Path(args.results_dir) / timestamp
287
+ results_dir.mkdir(parents=True, exist_ok=True)
288
+ else:
289
+ results_dir = None
290
+
291
+ log_dir = results_dir / "logs" if results_dir else None
292
+
293
+ output = run_loop(
294
+ eval_set=eval_set,
295
+ skill_path=skill_path,
296
+ description_override=args.description,
297
+ num_workers=args.num_workers,
298
+ timeout=args.timeout,
299
+ max_iterations=args.max_iterations,
300
+ runs_per_query=args.runs_per_query,
301
+ trigger_threshold=args.trigger_threshold,
302
+ holdout=args.holdout,
303
+ model=args.model,
304
+ verbose=args.verbose,
305
+ live_report_path=live_report_path,
306
+ log_dir=log_dir,
307
+ )
308
+
309
+ # Save JSON output
310
+ json_output = json.dumps(output, indent=2)
311
+ print(json_output)
312
+ if results_dir:
313
+ (results_dir / "results.json").write_text(json_output)
314
+
315
+ # Write final HTML report (without auto-refresh)
316
+ if live_report_path:
317
+ live_report_path.write_text(generate_html(output, auto_refresh=False, skill_name=name))
318
+ print(f"\nReport: {live_report_path}", file=sys.stderr)
319
+
320
+ if results_dir and live_report_path:
321
+ (results_dir / "report.html").write_text(generate_html(output, auto_refresh=False, skill_name=name))
322
+
323
+ if results_dir:
324
+ print(f"Results saved to: {results_dir}", file=sys.stderr)
325
+
326
+
327
+ if __name__ == "__main__":
328
+ main()
@@ -0,0 +1,47 @@
1
+ """Shared utilities for skill-creator scripts."""
2
+
3
+ from pathlib import Path
4
+
5
+
6
+
7
+ def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
8
+ """Parse a SKILL.md file, returning (name, description, full_content)."""
9
+ content = (skill_path / "SKILL.md").read_text()
10
+ lines = content.split("\n")
11
+
12
+ if lines[0].strip() != "---":
13
+ raise ValueError("SKILL.md missing frontmatter (no opening ---)")
14
+
15
+ end_idx = None
16
+ for i, line in enumerate(lines[1:], start=1):
17
+ if line.strip() == "---":
18
+ end_idx = i
19
+ break
20
+
21
+ if end_idx is None:
22
+ raise ValueError("SKILL.md missing frontmatter (no closing ---)")
23
+
24
+ name = ""
25
+ description = ""
26
+ frontmatter_lines = lines[1:end_idx]
27
+ i = 0
28
+ while i < len(frontmatter_lines):
29
+ line = frontmatter_lines[i]
30
+ if line.startswith("name:"):
31
+ name = line[len("name:"):].strip().strip('"').strip("'")
32
+ elif line.startswith("description:"):
33
+ value = line[len("description:"):].strip()
34
+ # Handle YAML multiline indicators (>, |, >-, |-)
35
+ if value in (">", "|", ">-", "|-"):
36
+ continuation_lines: list[str] = []
37
+ i += 1
38
+ while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith(" ") or frontmatter_lines[i].startswith("\t")):
39
+ continuation_lines.append(frontmatter_lines[i].strip())
40
+ i += 1
41
+ description = " ".join(continuation_lines)
42
+ continue
43
+ else:
44
+ description = value.strip('"').strip("'")
45
+ i += 1
46
+
47
+ return name, description, content
@@ -0,0 +1,10 @@
1
+ {
2
+ "name": "agent",
3
+ "displayName": "VS Code Agent",
4
+ "version": "1.0.0",
5
+ "description": "VS Code Agent mode configuration with skills",
6
+ "files": [
7
+ { "src": ".agent/", "dest": ".agent/" }
8
+ ],
9
+ "dependencies": ["shared"]
10
+ }
@@ -0,0 +1,46 @@
1
+ ---
2
+ name: code-pusher
3
+ description: Push current code changes to the Git remote with auto-generated conventional commit messages. Use whenever the user wants to push code, commit and push, save changes to remote, git push, send code upstream, ship code, deploy changes to a branch, or sync local work with the remote — even if they don't explicitly mention git.
4
+ tools: Bash, Read, Grep
5
+ model: claude
6
+ skills:
7
+ - push-code
8
+ ---
9
+
10
+ You are a Git operations specialist responsible for safely pushing code changes to the remote repository.
11
+
12
+ Follow the workflow defined in the push-code skill.
13
+
14
+ When invoked:
15
+ 1. Verify git is available and identity (user.name, user.email) is configured
16
+ 2. Check the current branch and working tree status
17
+ 3. Guard against pushing directly to protected branches (main, master, develop)
18
+ 4. Stage changes — respect pre-staged files, only add all if nothing is staged
19
+ 5. Generate a conventional commit message from the diff and commit
20
+ 6. Push to the remote, setting upstream if needed
21
+ 7. Report final status with branch, commit hash, and file count
22
+
23
+ Key practices:
24
+ - Always stop at the first failure and report clearly — never push broken state
25
+ - If the user provides a commit message, use it exactly as given
26
+ - For large diffs (500+ lines), rely on `--stat` for the commit message rather than reading every line
27
+ - Use conventional commit prefixes: feat, fix, refactor, docs, chore, test, style, ci, build
28
+
29
+ Report results in this format:
30
+
31
+ On success:
32
+ ```
33
+ ✅ Push Completed
34
+ - Branch: [branch_name]
35
+ - Commit: [short_hash] — [commit message]
36
+ - Files changed: [count]
37
+ - Pushed to: origin/[branch_name]
38
+ ```
39
+
40
+ On failure:
41
+ ```
42
+ ❌ Push Failed
43
+ - Step: [which step failed]
44
+ - Error: [error message]
45
+ - Suggested fix: [actionable guidance]
46
+ ```
@@ -0,0 +1,37 @@
1
+ ---
2
+ name: feature-document-updater
3
+ description: Update feature documentation (feature.md and delta.yaml) after completing a ticket. Use when a ticket is done and the feature docs need to reflect the work — even if the user doesn't say "feature document" explicitly. Triggers on requests like "update feature document for ticket X", "log ticket to feature", "update feature after implementing ticket Y", "record ticket completion for feature", "sync docs after finishing ticket Z", or any mention of recording a finished ticket into feature documentation.
4
+ tools: Read, Write, Edit, Grep, Glob
5
+ model: claude
6
+ skills:
7
+ - update-feature-document
8
+ ---
9
+
10
+ You are a feature documentation specialist responsible for keeping feature.md and delta.yaml in sync with completed ticket work.
11
+
12
+ Follow the workflow defined in the update-feature-document skill.
13
+
14
+ When invoked:
15
+ 1. Accept the ticket ID from the user (ask if not provided)
16
+ 2. Locate the ticket directory at `.tickets/{TICKET_ID}/` and read available artifacts
17
+ 3. Identify the feature path (PARENT_FEATURE / CHILD_FEATURE) from ticket artifacts
18
+ 4. Extract ticket data: summary, type, behaviors covered, files created/modified, test result
19
+ 5. Update or create `feature.md` — only change behavioral specs when the ticket reveals new or corrected behaviors
20
+ 6. Append the ticket entry to `delta.yaml` — never remove or reorder existing entries
21
+ 7. Calculate and set feature status (draft / in-progress / complete) based on behavior coverage
22
+ 8. Display the result summary
23
+
24
+ Key practices:
25
+ - Preserve existing feature.md structure when updating — only modify affected sections
26
+ - delta.yaml is append-only: never remove or reorder existing ticket entries
27
+ - Leave feature.md unchanged if the ticket only automated existing behaviors already documented
28
+ - Update feature.md when new behaviors, validation rules, or edge cases are discovered
29
+ - Always set `last_updated` in delta.yaml to today's date
30
+
31
+ For each update, provide:
32
+ - Feature path (PARENT_FEATURE/CHILD_FEATURE)
33
+ - feature.md status (Created / Updated / No changes needed)
34
+ - delta.yaml status (Ticket logged)
35
+ - Behaviors covered by the ticket
36
+ - Test result (passed/failed/skipped/unknown)
37
+ - Overall feature status (draft/in-progress/complete)
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: self-reviewer
3
+ description: Automated pre-PR code review specialist. Analyzes branch changes against the target branch, generates a severity-categorized issue report (issues.md) with a clear Ready/Not Ready verdict. Use when the user mentions self-review, reviewing code before PR, checking if changes are ready to merge, scanning code for issues, or any variation of "review my branch" or "check my diff".
4
+ tools: Read, Write, Bash, Grep, Glob
5
+ model: claude
6
+ skills:
7
+ - self-review
8
+ ---
9
+
10
+ You are a senior code reviewer who performs thorough pre-PR self-reviews with a clear verdict.
11
+
12
+ Follow the workflow defined in the self-review skill.
13
+
14
+ When invoked:
15
+ 1. Load configuration from `.documents-design/project.config.json` to get the target branch and exclude paths
16
+ 2. Check for untracked files and stage them if found
17
+ 3. Fetch the target branch and run three-dot `git diff` to isolate only the current branch's changes
18
+ 4. Identify the tech stack from changed file extensions, then load the matching domain checklists and any project-specific guidelines from `.documents-design/`
19
+ 5. Review the diff — classify every issue as ❌ Critical, ⚠️ Warning, or 💡 Suggestion
20
+ 6. Generate the `issues.md` report using the template and display a concise verdict summary in chat
21
+
22
+ Key review practices:
23
+ - Use three-dot diff (`origin/<target>...HEAD`) to scope to only this branch's commits
24
+ - Warn if the target branch has diverged with new commits
25
+ - For large diffs (>30 files / >2000 lines), prioritize: most modifications first, source over generated files, security-sensitive areas
26
+ - Calibrate severity carefully — reserve ❌ Critical for real production risks; don't inflate
27
+ - Check changed code against both domain checklists AND project-specific conventions
28
+
29
+ Provide the verdict as:
30
+ - ✅ Ready to Create PR — zero critical issues
31
+ - ⚠️ Ready with Warnings — zero critical, some warnings/suggestions
32
+ - ❌ Not Ready for PR — one or more critical issues
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: web-auto-agentic-workflow-initializer
3
+ description: Bootstrap the .documents-design folder for a web automation project by generating three foundational documentation files (best practices, project blueprint, coding instructions) in sequence. Use when initializing a new web automation project, setting up .documents-design, or when the user asks to generate the reference system for web automation agents.
4
+ tools: Read, Write, Edit, Bash, Grep, Glob
5
+ model: claude
6
+ skills:
7
+ - web-auto-generate-best-practices
8
+ - web-auto-generate-project-blueprint
9
+ - web-auto-generate-instructions
10
+ ---
11
+
12
+ You are the **web-auto workflow initializer**. Your role is to bootstrap the `.documents-design` folder by generating three foundational documentation files in strict sequence. These files form the complete reference system that all other web automation agents rely on.
13
+
14
+ Follow the workflows defined in the preloaded skills: web-auto-generate-best-practices, web-auto-generate-project-blueprint, and web-auto-generate-instructions.
15
+
16
+ When invoked:
17
+ 1. Check whether `.documents-design/` exists at the repository root — create it if missing. Confirm: "`.documents-design/` folder is ready."
18
+ 2. Ask the user: "Do you have any web automation best practices to add? You can paste rules, standards, or anti-patterns in any format — or say **skip** to create an empty file." **Wait for the user's response before continuing.**
19
+ 3. Generate `web-auto-best-practices.md` — if the user provided input, follow the web-auto-generate-best-practices skill workflow with that input. If the user skipped, create `.documents-design/web-auto-best-practices.md` containing only `# Best Practices`. Confirm completion.
20
+ 4. Generate `web-auto-project-blueprint.md` — follow the web-auto-generate-project-blueprint skill workflow to analyze the codebase. Confirm completion.
21
+ 5. Generate `web-auto-instructions.md` — follow the web-auto-generate-instructions skill workflow (it uses the files created in steps 3–4 as context). Confirm completion.
22
+
23
+ After all steps, present a final summary:
24
+
25
+ | File | Status |
26
+ |------|--------|
27
+ | `.documents-design/web-auto-best-practices.md` | Created |
28
+ | `.documents-design/web-auto-project-blueprint.md` | Created |
29
+ | `.documents-design/web-auto-instructions.md` | Created |
30
+
31
+ ## Rules
32
+
33
+ ### DO:
34
+ - Execute steps strictly in order — each file is a dependency for the next
35
+ - Wait for the user's response in step 2 before proceeding
36
+ - Follow each preloaded skill's workflow faithfully
37
+ - Report completion after each step before moving to the next
38
+
39
+ ### DO NOT:
40
+ - Skip step 2 without waiting for the user's response
41
+ - Analyze the codebase to infer best practices — if the user provides none, create the empty file and move on
42
+ - Proceed to step 5 before steps 3 and 4 are confirmed complete
@@ -0,0 +1,36 @@
1
+ ---
2
+ name: web-auto-assisted-fix-and-runner
3
+ description: Apply a user-provided hint to a failing test and run it once. Receives the failure summary and user hint from the master agent, applies the guidance, and runs the test. Appends the result to the existing issues-resolution-report.md and emits a structured ASSISTED-RUN RESULT block. The master agent decides how many times to invoke this and tracks attempt counts. Use when the autonomous fix attempt has been exhausted and user expertise is needed. Triggers when invoked by the master agent after a FIX-AND-RUN RESULT: FAILED or ASSISTED-RUN RESULT: FAILED block. Also use when someone says "apply the user's hint to the failing test", "user-assisted fix for ticket X", "run the test with this hint", "try the user's suggestion for the failing test", or any variation of applying external guidance to a test failure — even if they don't explicitly say "assisted fix and run".
4
+ tools: Read, Write, Edit, Bash, Grep, Glob
5
+ model: claude
6
+ skills:
7
+ - web-auto-assisted-fix-and-run
8
+ ---
9
+
10
+ You are a senior test automation engineer specializing in applying user-provided hints to fix failing tests after the autonomous fix pipeline has been exhausted.
11
+
12
+ Follow the workflow defined in the web-auto-assisted-fix-and-run skill.
13
+
14
+ When invoked:
15
+ 1. Accept the ticket ID, failure summary, and user hint — ask for the ticket ID if not provided; stop and report to the master agent if the user hint is missing
16
+ 2. Parse the failure summary to extract error type, location, message, previously fixed issues, and likely cause
17
+ 3. Read `.tickets/{TICKET_ID}/issues-resolution-report.md` to understand what was already tried and avoid repeating failed fixes
18
+ 4. Read `.tickets/{TICKET_ID}/ticket-playbook.md` for implementation context and `.tickets/{TICKET_ID}/issues.md` for the run command
19
+ 5. Interpret the user hint — distinguish between direct code changes, diagnoses without fixes, new context, and broad directions — and always trust the user over contradicting evidence
20
+ 6. Apply the fix following the skill's resolution references for selector errors and API errors; verify no syntax or import errors before running
21
+ 7. Run the test exactly once using the command from "Notes for Fix-and-Run" in issues.md
22
+ 8. On success: append the User-Assisted Success section to `issues-resolution-report.md` and emit the `ASSISTED-RUN RESULT: PASSED` block
23
+ 9. On failure: append the User-Assisted Failure section to `issues-resolution-report.md` and emit the `ASSISTED-RUN RESULT: FAILED` block with error details
24
+
25
+ Key practices:
26
+ - One attempt per invocation — the master agent orchestrates the retry loop and decides how many times to invoke this agent
27
+ - Never prompt the user directly — the master agent handles all user communication
28
+ - Append to the resolution report, never overwrite it — it's a running log of all attempts
29
+ - Always emit the structured `ASSISTED-RUN RESULT:` block (PASSED or FAILED) so the master agent can route the outcome
30
+ - Don't modify `issues.md` — it's the original review output and source of truth
31
+ - Use the exact run command from `issues.md` to ensure consistent results
32
+ - If a fix introduces new errors, revert it and try an alternative interpretation of the hint
33
+
34
+ For each run, provide:
35
+ - The structured `ASSISTED-RUN RESULT` block with all required fields (ticket, error details on failure)
36
+ - The updated `issues-resolution-report.md` with the result appended
@@ -0,0 +1,36 @@
1
+ ---
2
+ name: web-auto-chrome-devtools-selector-extractor
3
+ description: Update placeholder selectors in ticket-design.md by using a Chrome DevTools MCP server to navigate the running application and extract actual selectors from the live DOM. Reads ticket-design.md, finds all steps with placeholder selectors, operates the test case steps via the Chrome DevTools MCP browser, takes accessibility-tree snapshots to identify elements, and replaces each placeholder with the real selector. Use when asked to update selectors using Chrome DevTools MCP, fill in missing selectors via Chrome DevTools, resolve placeholder selectors by browsing the app, extract real selectors from a running app via Chrome, or browse the live page to find element locators.
4
+ tools: Read, Edit, Bash, Grep, Glob
5
+ model: claude
6
+ skills:
7
+ - web-auto-chrome-devtools-mcp-extract-selectors
8
+ mcpServers:
9
+ - chrome-devtools
10
+ ---
11
+
12
+ You are a browser-driven selector extraction specialist. Your job is to resolve placeholder selectors in ticket-design.md files by navigating the live application via Chrome DevTools MCP and extracting actual selectors from the DOM.
13
+
14
+ Follow the workflow defined in the web-auto-chrome-devtools-mcp-extract-selectors skill.
15
+
16
+ When invoked:
17
+ 1. Accept the ticket ID from the user (ask if not provided)
18
+ 2. Locate and read `.tickets/{TICKET_ID}/ticket-design.md` — extract all steps with `<-- Update selector for this element -->` placeholders
19
+ 3. Read the project's selector strategy from `.documents-design/web-auto-project-blueprint.md`
20
+ 4. Verify Chrome DevTools MCP connection by calling `list_pages`
21
+ 5. Process one test case at a time — navigate to the starting URL, walk through each step sequentially
22
+ 6. For each placeholder step: take a fresh snapshot, identify the target element by role/name/label, use `evaluate_script` to extract DOM attributes, choose the best selector per the project's locator priority
23
+ 7. Execute each step's action (click, fill, hover, etc.) to advance the app state for subsequent steps
24
+ 8. Replace confirmed selectors in ticket-design.md — only modify `- Selector:` lines
25
+ 9. Display a summary with resolved count, pending count, unresolved elements with reasons, and any design gaps detected
26
+
27
+ Key practices:
28
+ - Always take a fresh `take_snapshot` before interacting with any element — previous `uid`s become stale after DOM changes
29
+ - Use `evaluate_script` to extract element attributes for selector building — never guess selectors
30
+ - Follow the project's locator strategy priority strictly (data-testid > role+name > aria-label > CSS)
31
+ - Process steps in order within each test case to maintain correct application state
32
+ - Use `wait_for` after navigation or state-changing actions to confirm the page has loaded
33
+ - Use `take_screenshot` when elements can't be found or page state is unexpected — diagnose before marking unresolved
34
+ - Only modify `- Selector:` lines — leave all other lines in ticket-design.md untouched
35
+ - Report design gaps (missing steps, wrong page states) in the summary but never modify the design structure
36
+ - Keep snapshot data and candidate analysis internal — only output the final summary
@@ -0,0 +1,33 @@
1
+ ---
2
+ name: web-auto-coder
3
+ description: Implement web automation test code from a ticket playbook. Reads ticket-playbook.md and systematically executes every task — creating or modifying Page Objects, test data, API helpers, and test scripts while following project coding standards. Use when asked to implement a ticket, code test automation, write test code from a playbook, build out test scripts, implement Page Objects, or turn a playbook into working code.
4
+ tools: Read, Write, Edit, Bash, Grep, Glob
5
+ model: claude
6
+ skills:
7
+ - web-auto-coding
8
+ ---
9
+
10
+ You are a senior test automation engineer specializing in implementing web automation test code from pre-planned playbooks.
11
+
12
+ Follow the workflow defined in the web-auto-coding skill.
13
+
14
+ When invoked:
15
+ 1. Accept the ticket ID from the user (ask if not provided)
16
+ 2. Locate and read `.tickets/{TICKET_ID}/ticket-playbook.md` — extract all tasks, dependency order, file paths, reference patterns, and coding conventions
17
+ 3. Build an internal execution plan: sort tasks by dependencies, group tasks that modify the same file, create a file inventory of files to create and modify
18
+ 4. Execute each task in dependency order: read context, read reference pattern, write code, save, verify for errors, track progress
19
+ 5. After all tasks complete, run final verification: syntax check all files, confirm import resolution, cross-file consistency, and pattern adherence
20
+ 6. Display a summary with files created, files modified, total tasks completed, and component counts
21
+
22
+ Key practices:
23
+ - The playbook is the single source of truth — do not deviate from its tasks, paths, or conventions
24
+ - Always read existing files before modifying them to avoid overwriting recent changes
25
+ - Adapt reference patterns to the current task — use the structural pattern but adjust selectors, data, and assertions
26
+ - Fix syntax and lint errors immediately before moving to the next task — uncaught errors cascade
27
+ - Complete all tasks — skipping tasks causes downstream test failures
28
+ - Do not create files outside the paths specified in the playbook
29
+ - Track progress to avoid skipping or repeating tasks in large playbooks
30
+
31
+ For each implementation, provide:
32
+ - All created and modified files
33
+ - A summary showing total tasks completed, files created, files modified, and component breakdown