myaidev-method 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/.claude-plugin/plugin.json +0 -1
  2. package/.env.example +5 -4
  3. package/CHANGELOG.md +2 -2
  4. package/CONTENT_CREATION_GUIDE.md +489 -3211
  5. package/DEVELOPER_USE_CASES.md +1 -1
  6. package/MODULAR_INSTALLATION.md +2 -2
  7. package/README.md +39 -33
  8. package/TECHNICAL_ARCHITECTURE.md +1 -1
  9. package/USER_GUIDE.md +242 -190
  10. package/agents/content-editor-agent.md +90 -0
  11. package/agents/content-planner-agent.md +97 -0
  12. package/agents/content-research-agent.md +62 -0
  13. package/agents/content-seo-agent.md +101 -0
  14. package/agents/content-writer-agent.md +69 -0
  15. package/agents/infographic-analyzer-agent.md +63 -0
  16. package/agents/infographic-designer-agent.md +72 -0
  17. package/bin/cli.js +777 -535
  18. package/{content-rules.example.md → content-rules-example.md} +2 -2
  19. package/dist/mcp/health-check.js +82 -68
  20. package/dist/mcp/mcp-config.json +8 -0
  21. package/dist/mcp/openstack-server.js +1746 -1262
  22. package/dist/server/.tsbuildinfo +1 -1
  23. package/extension.json +21 -4
  24. package/package.json +181 -184
  25. package/skills/company-config/SKILL.md +133 -0
  26. package/skills/configure/SKILL.md +1 -1
  27. package/skills/myai-configurator/SKILL.md +77 -0
  28. package/skills/myai-configurator/content-creation-configurator/SKILL.md +516 -0
  29. package/skills/myai-configurator/content-maintenance-configurator/SKILL.md +397 -0
  30. package/skills/myai-content-enrichment/SKILL.md +114 -0
  31. package/skills/myai-content-ideation/SKILL.md +288 -0
  32. package/skills/myai-content-ideation/evals/evals.json +182 -0
  33. package/skills/myai-content-production-coordinator/SKILL.md +946 -0
  34. package/skills/{content-rules-setup → myai-content-rules-setup}/SKILL.md +1 -1
  35. package/skills/{content-verifier → myai-content-verifier}/SKILL.md +1 -1
  36. package/skills/myai-content-writer/SKILL.md +333 -0
  37. package/skills/myai-content-writer/agents/editor-agent.md +138 -0
  38. package/skills/myai-content-writer/agents/planner-agent.md +121 -0
  39. package/skills/myai-content-writer/agents/research-agent.md +83 -0
  40. package/skills/myai-content-writer/agents/seo-agent.md +139 -0
  41. package/skills/myai-content-writer/agents/visual-planner-agent.md +110 -0
  42. package/skills/myai-content-writer/agents/writer-agent.md +85 -0
  43. package/skills/{infographic → myai-infographic}/SKILL.md +1 -1
  44. package/skills/myai-proprietary-content-verifier/SKILL.md +175 -0
  45. package/skills/myai-proprietary-content-verifier/evals/evals.json +36 -0
  46. package/skills/myai-skill-builder/SKILL.md +699 -0
  47. package/skills/myai-skill-builder/agents/analyzer-agent.md +137 -0
  48. package/skills/myai-skill-builder/agents/comparator-agent.md +77 -0
  49. package/skills/myai-skill-builder/agents/grader-agent.md +103 -0
  50. package/skills/myai-skill-builder/assets/eval_review.html +131 -0
  51. package/skills/myai-skill-builder/references/schemas.md +211 -0
  52. package/skills/myai-skill-builder/scripts/aggregate_benchmark.py +190 -0
  53. package/skills/myai-skill-builder/scripts/generate_review.py +381 -0
  54. package/skills/myai-skill-builder/scripts/package_skill.py +91 -0
  55. package/skills/myai-skill-builder/scripts/run_eval.py +105 -0
  56. package/skills/myai-skill-builder/scripts/run_loop.py +211 -0
  57. package/skills/myai-skill-builder/scripts/utils.py +123 -0
  58. package/skills/myai-visual-generator/SKILL.md +125 -0
  59. package/skills/myai-visual-generator/evals/evals.json +155 -0
  60. package/skills/myai-visual-generator/references/infographic-pipeline.md +73 -0
  61. package/skills/myai-visual-generator/references/research-visuals.md +57 -0
  62. package/skills/myai-visual-generator/references/services.md +89 -0
  63. package/skills/myai-visual-generator/scripts/visual-generation-utils.js +1272 -0
  64. package/skills/myaidev-analyze/agents/dependency-mapper-agent.md +236 -0
  65. package/skills/myaidev-analyze/agents/pattern-detector-agent.md +240 -0
  66. package/skills/myaidev-analyze/agents/structure-scanner-agent.md +171 -0
  67. package/skills/myaidev-analyze/agents/tech-profiler-agent.md +291 -0
  68. package/skills/myaidev-architect/agents/compliance-checker-agent.md +287 -0
  69. package/skills/myaidev-architect/agents/requirements-analyst-agent.md +194 -0
  70. package/skills/myaidev-architect/agents/system-designer-agent.md +315 -0
  71. package/skills/myaidev-coder/agents/implementer-agent.md +185 -0
  72. package/skills/myaidev-coder/agents/integration-agent.md +168 -0
  73. package/skills/myaidev-coder/agents/pattern-scanner-agent.md +161 -0
  74. package/skills/myaidev-coder/agents/self-reviewer-agent.md +168 -0
  75. package/skills/myaidev-debug/agents/fix-agent-debug.md +317 -0
  76. package/skills/myaidev-debug/agents/hypothesis-agent.md +226 -0
  77. package/skills/myaidev-debug/agents/investigator-agent.md +250 -0
  78. package/skills/myaidev-debug/agents/symptom-collector-agent.md +231 -0
  79. package/skills/myaidev-documenter/agents/code-reader-agent.md +172 -0
  80. package/skills/myaidev-documenter/agents/doc-validator-agent.md +174 -0
  81. package/skills/myaidev-documenter/agents/doc-writer-agent.md +379 -0
  82. package/skills/myaidev-figma/SKILL.md +212 -0
  83. package/skills/myaidev-figma/capture.js +133 -0
  84. package/skills/myaidev-figma/crawl.js +130 -0
  85. package/skills/myaidev-figma-configure/SKILL.md +130 -0
  86. package/skills/myaidev-migrate/agents/migration-planner-agent.md +237 -0
  87. package/skills/myaidev-migrate/agents/migration-writer-agent.md +248 -0
  88. package/skills/myaidev-migrate/agents/schema-analyzer-agent.md +190 -0
  89. package/skills/myaidev-performance/agents/benchmark-agent.md +281 -0
  90. package/skills/myaidev-performance/agents/optimizer-agent.md +277 -0
  91. package/skills/myaidev-performance/agents/profiler-agent.md +252 -0
  92. package/skills/myaidev-refactor/agents/refactor-executor-agent.md +221 -0
  93. package/skills/myaidev-refactor/agents/refactor-planner-agent.md +213 -0
  94. package/skills/myaidev-refactor/agents/regression-guard-agent.md +242 -0
  95. package/skills/myaidev-refactor/agents/smell-detector-agent.md +233 -0
  96. package/skills/myaidev-reviewer/agents/auto-fixer-agent.md +238 -0
  97. package/skills/myaidev-reviewer/agents/code-analyst-agent.md +220 -0
  98. package/skills/myaidev-reviewer/agents/security-scanner-agent.md +262 -0
  99. package/skills/myaidev-tester/agents/coverage-analyst-agent.md +163 -0
  100. package/skills/myaidev-tester/agents/tdd-driver-agent.md +242 -0
  101. package/skills/myaidev-tester/agents/test-runner-agent.md +176 -0
  102. package/skills/myaidev-tester/agents/test-strategist-agent.md +154 -0
  103. package/skills/myaidev-tester/agents/test-writer-agent.md +242 -0
  104. package/skills/myaidev-workflow/agents/analyzer-agent.md +317 -0
  105. package/skills/myaidev-workflow/agents/coordinator-agent.md +253 -0
  106. package/skills/openstack-manager/SKILL.md +1 -1
  107. package/skills/payloadcms-publisher/SKILL.md +141 -77
  108. package/skills/payloadcms-publisher/references/field-mapping.md +142 -0
  109. package/skills/payloadcms-publisher/references/lexical-format.md +97 -0
  110. package/skills/security-auditor/SKILL.md +1 -1
  111. package/src/cli/commands/addon.js +184 -123
  112. package/src/config/workflows.js +172 -228
  113. package/src/lib/ascii-banner.js +197 -182
  114. package/src/lib/{content-coordinator.js → content-production-coordinator.js} +649 -459
  115. package/src/lib/installation-detector.js +93 -59
  116. package/src/lib/payloadcms-utils.js +285 -510
  117. package/src/lib/update-manager.js +120 -61
  118. package/src/lib/workflow-installer.js +55 -0
  119. package/src/mcp/health-check.js +82 -68
  120. package/src/mcp/openstack-server.js +1746 -1262
  121. package/src/scripts/configure-visual-apis.js +224 -173
  122. package/src/scripts/configure-wordpress-mcp.js +96 -66
  123. package/src/scripts/init/install.js +109 -85
  124. package/src/scripts/init-project.js +138 -67
  125. package/src/scripts/utils/write-content.js +67 -52
  126. package/src/scripts/wordpress/publish-to-wordpress.js +128 -128
  127. package/src/templates/claude/CLAUDE.md +131 -0
  128. package/hooks/hooks.json +0 -26
  129. package/skills/content-coordinator/SKILL.md +0 -130
  130. package/skills/content-enrichment/SKILL.md +0 -80
  131. package/skills/content-writer/SKILL.md +0 -285
  132. package/skills/visual-generator/SKILL.md +0 -140
@@ -0,0 +1,190 @@
1
+ #!/usr/bin/env python3
2
+ """Aggregate grading.json and timing.json files into a benchmark.json summary.
3
+
4
+ Usage:
5
+ python3 aggregate_benchmark.py <iteration-dir>
6
+ python3 aggregate_benchmark.py <workspace-dir> --multi-run
7
+
8
+ Single iteration mode:
9
+ Reads all grading.json and timing.json from an iteration directory,
10
+ produces benchmark.json with pass rates, tokens, and timing per config.
11
+
12
+ Multi-run mode (--multi-run):
13
+ Reads benchmark.json from multiple iteration directories,
14
+ computes mean ± stddev across runs.
15
+ """
16
+
17
+ import argparse
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ # Add script directory to path for utils import
22
+ sys.path.insert(0, str(Path(__file__).parent))
23
+ from utils import (
24
+ load_json, save_json, collect_grading_files, collect_timing_files,
25
+ mean, stddev, now_iso
26
+ )
27
+
28
+
29
+ def aggregate_iteration(iteration_dir):
30
+ """Aggregate results from a single iteration into benchmark.json."""
31
+ iteration_dir = Path(iteration_dir)
32
+ gradings = collect_grading_files(iteration_dir)
33
+ timings = collect_timing_files(iteration_dir)
34
+
35
+ if not gradings:
36
+ print(f"No grading.json files found in {iteration_dir}", file=sys.stderr)
37
+ sys.exit(1)
38
+
39
+ # Group by config
40
+ configs = {}
41
+ for g in gradings:
42
+ config = g.get("config", "unknown")
43
+ if config not in configs:
44
+ configs[config] = {"gradings": [], "timings": []}
45
+ configs[config]["gradings"].append(g)
46
+
47
+ for t in timings:
48
+ config = t.get("config", "unknown")
49
+ if config in configs:
50
+ configs[config]["timings"].append(t)
51
+
52
+ # Build benchmark
53
+ benchmark = {
54
+ "iteration": int(iteration_dir.name.split("-")[1]) if "-" in iteration_dir.name else 1,
55
+ "timestamp": now_iso(),
56
+ "configs": {},
57
+ "comparison": {}
58
+ }
59
+
60
+ for config_name, data in configs.items():
61
+ pass_rates = [g["pass_rate"] for g in data["gradings"]]
62
+ tokens = [t["total_tokens"] for t in data["timings"]]
63
+ durations = [t["duration_ms"] for t in data["timings"]]
64
+
65
+ evals = []
66
+ for g in data["gradings"]:
67
+ eval_entry = {
68
+ "eval_id": g["eval_id"],
69
+ "pass_rate": g["pass_rate"],
70
+ "pass_count": g["pass_count"],
71
+ "fail_count": g["fail_count"],
72
+ }
73
+ # Find matching timing
74
+ matching_timing = [t for t in data["timings"] if t["eval_id"] == g["eval_id"]]
75
+ if matching_timing:
76
+ eval_entry["tokens"] = matching_timing[0]["total_tokens"]
77
+ eval_entry["duration_ms"] = matching_timing[0]["duration_ms"]
78
+ evals.append(eval_entry)
79
+
80
+ benchmark["configs"][config_name] = {
81
+ "overall_pass_rate": mean(pass_rates),
82
+ "total_tokens_mean": mean(tokens) if tokens else 0,
83
+ "total_tokens_stddev": stddev(tokens) if tokens else 0,
84
+ "duration_ms_mean": mean(durations) if durations else 0,
85
+ "duration_ms_stddev": stddev(durations) if durations else 0,
86
+ "evals": evals
87
+ }
88
+
89
+ # Compute comparison if both configs exist
90
+ if "with_skill" in benchmark["configs"] and "without_skill" in benchmark["configs"]:
91
+ ws = benchmark["configs"]["with_skill"]
92
+ wo = benchmark["configs"]["without_skill"]
93
+ benchmark["comparison"] = {
94
+ "pass_rate_delta": ws["overall_pass_rate"] - wo["overall_pass_rate"],
95
+ "token_overhead_percent": (
96
+ ((ws["total_tokens_mean"] - wo["total_tokens_mean"]) / wo["total_tokens_mean"] * 100)
97
+ if wo["total_tokens_mean"] > 0 else 0
98
+ ),
99
+ "time_overhead_percent": (
100
+ ((ws["duration_ms_mean"] - wo["duration_ms_mean"]) / wo["duration_ms_mean"] * 100)
101
+ if wo["duration_ms_mean"] > 0 else 0
102
+ ),
103
+ }
104
+
105
+ # Find non-discriminating assertions
106
+ non_disc = []
107
+ ws_evals = {e["eval_id"]: e for e in ws["evals"]}
108
+ wo_evals = {e["eval_id"]: e for e in wo["evals"]}
109
+ for eval_id in ws_evals:
110
+ if eval_id in wo_evals:
111
+ if ws_evals[eval_id]["pass_rate"] == 1.0 and wo_evals[eval_id]["pass_rate"] == 1.0:
112
+ non_disc.append(eval_id)
113
+ benchmark["comparison"]["non_discriminating_evals"] = non_disc
114
+
115
+ output_path = iteration_dir / "benchmark.json"
116
+ save_json(output_path, benchmark)
117
+ return benchmark
118
+
119
+
120
+ def aggregate_multi_run(workspace_dir):
121
+ """Aggregate benchmark.json files from multiple iterations for statistical analysis."""
122
+ workspace_dir = Path(workspace_dir)
123
+ benchmarks = []
124
+
125
+ for d in sorted(workspace_dir.iterdir()):
126
+ if d.is_dir() and d.name.startswith("iteration-"):
127
+ bm_path = d / "benchmark.json"
128
+ if bm_path.exists():
129
+ benchmarks.append(load_json(bm_path))
130
+
131
+ if len(benchmarks) < 2:
132
+ print(f"Need at least 2 iteration benchmarks for multi-run analysis, found {len(benchmarks)}", file=sys.stderr)
133
+ sys.exit(1)
134
+
135
+ result = {
136
+ "timestamp": now_iso(),
137
+ "num_runs": len(benchmarks),
138
+ "configs": {}
139
+ }
140
+
141
+ # Aggregate per config
142
+ for config_name in ["with_skill", "without_skill"]:
143
+ pass_rates = [b["configs"][config_name]["overall_pass_rate"]
144
+ for b in benchmarks if config_name in b.get("configs", {})]
145
+ tokens = [b["configs"][config_name]["total_tokens_mean"]
146
+ for b in benchmarks if config_name in b.get("configs", {})]
147
+ durations = [b["configs"][config_name]["duration_ms_mean"]
148
+ for b in benchmarks if config_name in b.get("configs", {})]
149
+
150
+ if pass_rates:
151
+ result["configs"][config_name] = {
152
+ "pass_rate": {"mean": mean(pass_rates), "stddev": stddev(pass_rates)},
153
+ "tokens": {"mean": mean(tokens), "stddev": stddev(tokens)},
154
+ "duration_ms": {"mean": mean(durations), "stddev": stddev(durations)},
155
+ }
156
+
157
+ output_path = workspace_dir / "benchmark.json"
158
+ save_json(output_path, result)
159
+ return result
160
+
161
+
162
+ def main():
163
+ parser = argparse.ArgumentParser(description="Aggregate eval results into benchmark summary")
164
+ parser.add_argument("path", help="Iteration directory or workspace directory (with --multi-run)")
165
+ parser.add_argument("--multi-run", action="store_true", help="Aggregate across multiple iterations")
166
+ args = parser.parse_args()
167
+
168
+ if args.multi_run:
169
+ result = aggregate_multi_run(args.path)
170
+ print(f"\nMulti-run benchmark ({result['num_runs']} runs):")
171
+ for config, stats in result["configs"].items():
172
+ pr = stats["pass_rate"]
173
+ print(f" {config}: pass_rate={pr['mean']:.2f} ± {pr['stddev']:.2f}")
174
+ else:
175
+ result = aggregate_iteration(args.path)
176
+ print(f"\nIteration {result['iteration']} benchmark:")
177
+ for config, stats in result["configs"].items():
178
+ print(f" {config}: pass_rate={stats['overall_pass_rate']:.2f}, "
179
+ f"tokens={stats['total_tokens_mean']:.0f}, "
180
+ f"time={stats['duration_ms_mean']:.0f}ms")
181
+ if result.get("comparison"):
182
+ c = result["comparison"]
183
+ print(f"\n Comparison:")
184
+ print(f" Pass rate delta: +{c['pass_rate_delta']:.2f}")
185
+ print(f" Token overhead: {c['token_overhead_percent']:.1f}%")
186
+ print(f" Time overhead: {c['time_overhead_percent']:.1f}%")
187
+
188
+
189
+ if __name__ == "__main__":
190
+ main()
@@ -0,0 +1,381 @@
1
+ #!/usr/bin/env python3
2
+ """Generate an HTML review page for eval results.
3
+
4
+ Creates an interactive HTML page showing eval outputs, grading results,
5
+ and benchmark data for human review.
6
+
7
+ Usage:
8
+ python3 generate_review.py <workspace-dir>
9
+ python3 generate_review.py <workspace-dir> --static output.html
10
+ python3 generate_review.py <workspace-dir> --open
11
+
12
+ Options:
13
+ --static <path> Generate a standalone HTML file instead of serving
14
+ --open Open the generated HTML in the default browser
15
+ --iteration <n> Show specific iteration (default: latest)
16
+ """
17
+
18
+ import argparse
19
+ import json
20
+ import os
21
+ import sys
22
+ import webbrowser
23
+ from pathlib import Path
24
+
25
+ sys.path.insert(0, str(Path(__file__).parent))
26
+ from utils import load_json, find_latest_iteration
27
+
28
+
29
+ def collect_eval_data(iteration_dir):
30
+ """Collect all eval data from an iteration directory."""
31
+ iteration_dir = Path(iteration_dir)
32
+ evals = []
33
+
34
+ for eval_dir in sorted(iteration_dir.iterdir()):
35
+ if not eval_dir.is_dir() or not eval_dir.name.startswith("eval-"):
36
+ continue
37
+
38
+ eval_id = eval_dir.name.replace("eval-", "")
39
+ eval_data = {"id": eval_id, "configs": {}}
40
+
41
+ for config in ["with_skill", "without_skill"]:
42
+ config_dir = eval_dir / config
43
+ if not config_dir.exists():
44
+ continue
45
+
46
+ config_data = {"outputs": {}, "grading": None, "timing": None}
47
+
48
+ # Read outputs
49
+ outputs_dir = config_dir / "outputs"
50
+ if outputs_dir.exists():
51
+ for f in sorted(outputs_dir.iterdir()):
52
+ if f.is_file():
53
+ try:
54
+ config_data["outputs"][f.name] = f.read_text()[:5000]
55
+ except (UnicodeDecodeError, PermissionError):
56
+ config_data["outputs"][f.name] = "(binary file)"
57
+
58
+ # Read grading
59
+ grading_path = config_dir / "grading.json"
60
+ if grading_path.exists():
61
+ config_data["grading"] = load_json(grading_path)
62
+
63
+ # Read timing
64
+ timing_path = config_dir / "timing.json"
65
+ if timing_path.exists():
66
+ config_data["timing"] = load_json(timing_path)
67
+
68
+ eval_data["configs"][config] = config_data
69
+
70
+ evals.append(eval_data)
71
+
72
+ # Read benchmark
73
+ benchmark = None
74
+ benchmark_path = iteration_dir / "benchmark.json"
75
+ if benchmark_path.exists():
76
+ benchmark = load_json(benchmark_path)
77
+
78
+ return evals, benchmark
79
+
80
+
81
+ def generate_html(evals, benchmark, iteration_num, workspace_dir):
82
+ """Generate the HTML review page."""
83
+ # Escape JSON for embedding in HTML
84
+ evals_json = json.dumps(evals, indent=2)
85
+ benchmark_json = json.dumps(benchmark, indent=2) if benchmark else "null"
86
+
87
+ html = f"""<!DOCTYPE html>
88
+ <html lang="en">
89
+ <head>
90
+ <meta charset="UTF-8">
91
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
92
+ <title>Skill Eval Review - Iteration {iteration_num}</title>
93
+ <style>
94
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
95
+ body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, monospace; background: #0d1117; color: #c9d1d9; }}
96
+ .header {{ background: #161b22; border-bottom: 1px solid #30363d; padding: 16px 24px; display: flex; justify-content: space-between; align-items: center; }}
97
+ .header h1 {{ font-size: 20px; color: #f0f6fc; }}
98
+ .tabs {{ display: flex; gap: 0; border-bottom: 1px solid #30363d; background: #161b22; padding: 0 24px; }}
99
+ .tab {{ padding: 12px 20px; cursor: pointer; border-bottom: 2px solid transparent; color: #8b949e; font-size: 14px; }}
100
+ .tab:hover {{ color: #c9d1d9; }}
101
+ .tab.active {{ color: #f0f6fc; border-bottom-color: #f78166; }}
102
+ .content {{ padding: 24px; max-width: 1400px; margin: 0 auto; }}
103
+ .eval-card {{ background: #161b22; border: 1px solid #30363d; border-radius: 6px; margin-bottom: 16px; overflow: hidden; }}
104
+ .eval-header {{ padding: 12px 16px; background: #1c2128; border-bottom: 1px solid #30363d; display: flex; justify-content: space-between; align-items: center; }}
105
+ .eval-header h3 {{ font-size: 16px; color: #f0f6fc; }}
106
+ .badge {{ padding: 2px 8px; border-radius: 12px; font-size: 12px; font-weight: 600; }}
107
+ .badge.pass {{ background: #1a3a2a; color: #3fb950; }}
108
+ .badge.fail {{ background: #3a1a1a; color: #f85149; }}
109
+ .badge.mixed {{ background: #3a2a1a; color: #d29922; }}
110
+ .eval-body {{ padding: 16px; }}
111
+ .config-columns {{ display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }}
112
+ .config-col {{ background: #0d1117; border: 1px solid #30363d; border-radius: 6px; padding: 12px; }}
113
+ .config-col h4 {{ font-size: 14px; margin-bottom: 8px; color: #8b949e; }}
114
+ .assertion {{ padding: 8px; margin: 4px 0; border-radius: 4px; font-size: 13px; }}
115
+ .assertion.pass {{ background: #1a3a2a; border-left: 3px solid #3fb950; }}
116
+ .assertion.fail {{ background: #3a1a1a; border-left: 3px solid #f85149; }}
117
+ .evidence {{ color: #8b949e; font-size: 12px; margin-top: 4px; }}
118
+ .output-block {{ background: #0d1117; border: 1px solid #30363d; border-radius: 4px; padding: 12px; margin: 8px 0; max-height: 300px; overflow-y: auto; font-size: 13px; white-space: pre-wrap; }}
119
+ .benchmark-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 16px; }}
120
+ .metric-card {{ background: #161b22; border: 1px solid #30363d; border-radius: 6px; padding: 16px; text-align: center; }}
121
+ .metric-value {{ font-size: 32px; font-weight: 700; color: #f0f6fc; margin: 8px 0; }}
122
+ .metric-label {{ font-size: 13px; color: #8b949e; }}
123
+ .metric-sub {{ font-size: 12px; color: #8b949e; margin-top: 4px; }}
124
+ .feedback-section {{ margin-top: 16px; }}
125
+ .feedback-section textarea {{ width: 100%; height: 80px; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; padding: 8px; color: #c9d1d9; font-family: inherit; font-size: 13px; resize: vertical; }}
126
+ .feedback-section select {{ background: #0d1117; border: 1px solid #30363d; border-radius: 6px; padding: 6px 12px; color: #c9d1d9; font-size: 13px; margin-right: 8px; }}
127
+ .btn {{ padding: 8px 16px; border-radius: 6px; border: none; cursor: pointer; font-size: 14px; font-weight: 600; }}
128
+ .btn-primary {{ background: #238636; color: #fff; }}
129
+ .btn-primary:hover {{ background: #2ea043; }}
130
+ .submit-bar {{ position: fixed; bottom: 0; left: 0; right: 0; background: #161b22; border-top: 1px solid #30363d; padding: 12px 24px; display: flex; justify-content: flex-end; gap: 12px; }}
131
+ .hidden {{ display: none; }}
132
+ .bar-chart {{ display: flex; align-items: flex-end; gap: 8px; height: 120px; margin: 16px 0; }}
133
+ .bar {{ flex: 1; border-radius: 4px 4px 0 0; min-width: 30px; position: relative; }}
134
+ .bar.with {{ background: #238636; }}
135
+ .bar.without {{ background: #6e7681; }}
136
+ .bar-label {{ position: absolute; bottom: -20px; left: 50%; transform: translateX(-50%); font-size: 10px; white-space: nowrap; }}
137
+ </style>
138
+ </head>
139
+ <body>
140
+ <div class="header">
141
+ <h1>Skill Eval Review &mdash; Iteration {iteration_num}</h1>
142
+ <span style="color: #8b949e; font-size: 13px;">{workspace_dir}</span>
143
+ </div>
144
+ <div class="tabs">
145
+ <div class="tab active" onclick="showTab('outputs')">Outputs</div>
146
+ <div class="tab" onclick="showTab('benchmark')">Benchmark</div>
147
+ </div>
148
+ <div id="outputs-tab" class="content"></div>
149
+ <div id="benchmark-tab" class="content hidden"></div>
150
+ <div class="submit-bar">
151
+ <select id="action-select">
152
+ <option value="iterate">Iterate (improve &amp; rerun)</option>
153
+ <option value="publish">Publish (submit to marketplace)</option>
154
+ <option value="stop">Stop (done for now)</option>
155
+ </select>
156
+ <button class="btn btn-primary" onclick="submitFeedback()">Submit All Reviews</button>
157
+ </div>
158
+
159
+ <script>
160
+ const evals = {evals_json};
161
+ const benchmark = {benchmark_json};
162
+
163
+ function showTab(name) {{
164
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
165
+ document.querySelectorAll('.content').forEach(c => c.classList.add('hidden'));
166
+ event.target.classList.add('active');
167
+ document.getElementById(name + '-tab').classList.remove('hidden');
168
+ }}
169
+
170
+ function renderOutputs() {{
171
+ const container = document.getElementById('outputs-tab');
172
+ let html = '';
173
+
174
+ evals.forEach(ev => {{
175
+ const wsGrading = ev.configs.with_skill?.grading;
176
+ const woGrading = ev.configs.without_skill?.grading;
177
+ const wsRate = wsGrading ? wsGrading.pass_rate : null;
178
+ const woRate = woGrading ? woGrading.pass_rate : null;
179
+
180
+ let badgeClass = 'mixed';
181
+ let badgeText = 'N/A';
182
+ if (wsRate !== null) {{
183
+ if (wsRate === 1.0) {{ badgeClass = 'pass'; badgeText = 'ALL PASS'; }}
184
+ else if (wsRate === 0) {{ badgeClass = 'fail'; badgeText = 'ALL FAIL'; }}
185
+ else {{ badgeText = Math.round(wsRate * 100) + '% pass'; }}
186
+ }}
187
+
188
+ html += '<div class="eval-card">';
189
+ html += '<div class="eval-header">';
190
+ html += '<h3>eval-' + ev.id + '</h3>';
191
+ html += '<span class="badge ' + badgeClass + '">' + badgeText + '</span>';
192
+ html += '</div>';
193
+ html += '<div class="eval-body">';
194
+ html += '<div class="config-columns">';
195
+
196
+ ['with_skill', 'without_skill'].forEach(config => {{
197
+ const data = ev.configs[config];
198
+ html += '<div class="config-col">';
199
+ html += '<h4>' + config.replace('_', ' ') + '</h4>';
200
+
201
+ if (data && data.grading) {{
202
+ data.grading.expectations.forEach(exp => {{
203
+ html += '<div class="assertion ' + exp.verdict.toLowerCase() + '">';
204
+ html += '<strong>' + exp.verdict + '</strong>: ' + exp.assertion;
205
+ html += '<div class="evidence">' + (exp.evidence || '') + '</div>';
206
+ html += '</div>';
207
+ }});
208
+ }}
209
+
210
+ if (data && Object.keys(data.outputs).length > 0) {{
211
+ html += '<details><summary style="cursor:pointer;margin-top:8px;color:#8b949e">Show outputs</summary>';
212
+ Object.entries(data.outputs).forEach(([name, content]) => {{
213
+ html += '<div style="margin-top:4px;font-size:12px;color:#8b949e">' + name + '</div>';
214
+ html += '<div class="output-block">' + escapeHtml(content) + '</div>';
215
+ }});
216
+ html += '</details>';
217
+ }}
218
+
219
+ if (data && data.timing) {{
220
+ html += '<div style="margin-top:8px;font-size:12px;color:#8b949e">';
221
+ html += 'Tokens: ' + data.timing.total_tokens + ' | Time: ' + data.timing.duration_ms + 'ms';
222
+ html += '</div>';
223
+ }}
224
+
225
+ html += '</div>';
226
+ }});
227
+
228
+ html += '</div>';
229
+ html += '<div class="feedback-section">';
230
+ html += '<select data-eval="' + ev.id + '" class="eval-rating">';
231
+ html += '<option value="good">Good</option>';
232
+ html += '<option value="needs-work">Needs Work</option>';
233
+ html += '<option value="bad">Bad</option>';
234
+ html += '</select>';
235
+ html += '<textarea data-eval="' + ev.id + '" class="eval-comment" placeholder="Feedback for this eval..."></textarea>';
236
+ html += '</div>';
237
+ html += '</div></div>';
238
+ }});
239
+
240
+ container.innerHTML = html;
241
+ }}
242
+
243
+ function renderBenchmark() {{
244
+ const container = document.getElementById('benchmark-tab');
245
+ if (!benchmark) {{
246
+ container.innerHTML = '<p style="color:#8b949e">No benchmark data available. Run evals first.</p>';
247
+ return;
248
+ }}
249
+
250
+ let html = '<div class="benchmark-grid">';
251
+
252
+ const configs = benchmark.configs || {{}};
253
+ ['with_skill', 'without_skill'].forEach(config => {{
254
+ const data = configs[config];
255
+ if (!data) return;
256
+
257
+ html += '<div class="metric-card">';
258
+ html += '<div class="metric-label">' + config.replace('_', ' ') + ' — Pass Rate</div>';
259
+ html += '<div class="metric-value">' + Math.round(data.overall_pass_rate * 100) + '%</div>';
260
+ html += '</div>';
261
+
262
+ html += '<div class="metric-card">';
263
+ html += '<div class="metric-label">' + config.replace('_', ' ') + ' — Avg Tokens</div>';
264
+ html += '<div class="metric-value">' + Math.round(data.total_tokens_mean) + '</div>';
265
+ if (data.total_tokens_stddev) html += '<div class="metric-sub">&plusmn; ' + Math.round(data.total_tokens_stddev) + '</div>';
266
+ html += '</div>';
267
+
268
+ html += '<div class="metric-card">';
269
+ html += '<div class="metric-label">' + config.replace('_', ' ') + ' — Avg Time</div>';
270
+ html += '<div class="metric-value">' + Math.round(data.duration_ms_mean / 1000 * 10) / 10 + 's</div>';
271
+ if (data.duration_ms_stddev) html += '<div class="metric-sub">&plusmn; ' + Math.round(data.duration_ms_stddev) + 'ms</div>';
272
+ html += '</div>';
273
+ }});
274
+
275
+ if (benchmark.comparison) {{
276
+ const c = benchmark.comparison;
277
+ html += '<div class="metric-card">';
278
+ html += '<div class="metric-label">Pass Rate Delta</div>';
279
+ html += '<div class="metric-value" style="color:' + (c.pass_rate_delta >= 0 ? '#3fb950' : '#f85149') + '">';
280
+ html += (c.pass_rate_delta >= 0 ? '+' : '') + Math.round(c.pass_rate_delta * 100) + '%';
281
+ html += '</div></div>';
282
+
283
+ html += '<div class="metric-card">';
284
+ html += '<div class="metric-label">Token Overhead</div>';
285
+ html += '<div class="metric-value">' + Math.round(c.token_overhead_percent) + '%</div>';
286
+ html += '</div>';
287
+ }}
288
+
289
+ html += '</div>';
290
+ container.innerHTML = html;
291
+ }}
292
+
293
+ function escapeHtml(text) {{
294
+ const div = document.createElement('div');
295
+ div.textContent = text;
296
+ return div.innerHTML;
297
+ }}
298
+
299
+ function submitFeedback() {{
300
+ const feedback = {{
301
+ timestamp: new Date().toISOString(),
302
+ iteration: {iteration_num},
303
+ eval_feedback: [],
304
+ action: document.getElementById('action-select').value
305
+ }};
306
+
307
+ document.querySelectorAll('.eval-rating').forEach(select => {{
308
+ const evalId = select.dataset.eval;
309
+ const comment = document.querySelector('.eval-comment[data-eval="' + evalId + '"]').value;
310
+ feedback.eval_feedback.push({{
311
+ eval_id: evalId,
312
+ rating: select.value,
313
+ comment: comment
314
+ }});
315
+ }});
316
+
317
+ // Save as downloadable file
318
+ const blob = new Blob([JSON.stringify(feedback, null, 2)], {{ type: 'application/json' }});
319
+ const a = document.createElement('a');
320
+ a.href = URL.createObjectURL(blob);
321
+ a.download = 'feedback.json';
322
+ a.click();
323
+ alert('Feedback saved! Place feedback.json in the workspace directory.');
324
+ }}
325
+
326
+ renderOutputs();
327
+ renderBenchmark();
328
+ </script>
329
+ </body>
330
+ </html>"""
331
+ return html
332
+
333
+
334
+ def main():
335
+ parser = argparse.ArgumentParser(description="Generate HTML eval review page")
336
+ parser.add_argument("workspace", help="Path to skill workspace directory")
337
+ parser.add_argument("--static", help="Output path for standalone HTML file")
338
+ parser.add_argument("--open", action="store_true", help="Open in browser")
339
+ parser.add_argument("--iteration", type=int, help="Specific iteration to show")
340
+ args = parser.parse_args()
341
+
342
+ workspace = Path(args.workspace)
343
+ if not workspace.exists():
344
+ print(f"Error: {workspace} not found", file=sys.stderr)
345
+ sys.exit(1)
346
+
347
+ # Find iteration
348
+ if args.iteration:
349
+ iteration_num = args.iteration
350
+ else:
351
+ iteration_num = find_latest_iteration(workspace)
352
+
353
+ if iteration_num == 0:
354
+ print("Error: No iterations found in workspace", file=sys.stderr)
355
+ sys.exit(1)
356
+
357
+ iteration_dir = workspace / f"iteration-{iteration_num}"
358
+ if not iteration_dir.exists():
359
+ print(f"Error: {iteration_dir} not found", file=sys.stderr)
360
+ sys.exit(1)
361
+
362
+ # Collect data
363
+ evals, benchmark = collect_eval_data(iteration_dir)
364
+ if not evals:
365
+ print("Error: No eval data found", file=sys.stderr)
366
+ sys.exit(1)
367
+
368
+ # Generate HTML
369
+ html = generate_html(evals, benchmark, iteration_num, str(workspace))
370
+
371
+ # Output
372
+ output_path = args.static or str(workspace / "review.html")
373
+ Path(output_path).write_text(html)
374
+ print(f"Generated: {output_path}")
375
+
376
+ if args.open:
377
+ webbrowser.open(f"file://{os.path.abspath(output_path)}")
378
+
379
+
380
+ if __name__ == "__main__":
381
+ main()
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env python3
2
+ """Package a skill directory into a distributable archive.
3
+
4
+ Creates a .tar.gz archive of a skill directory, excluding workspace
5
+ and temporary files.
6
+
7
+ Usage:
8
+ python3 package_skill.py .claude/skills/my-skill
9
+ python3 package_skill.py .claude/skills/my-skill --output my-skill-v1.tar.gz
10
+ """
11
+
12
+ import argparse
13
+ import os
14
+ import sys
15
+ import tarfile
16
+ from pathlib import Path
17
+
18
+ # Directories and patterns to exclude from the archive
19
+ EXCLUDE_PATTERNS = {
20
+ "*-workspace",
21
+ "__pycache__",
22
+ "*.pyc",
23
+ ".DS_Store",
24
+ "description_eval.json",
25
+ "feedback.json",
26
+ "review.html",
27
+ }
28
+
29
+
30
+ def should_exclude(path):
31
+ """Check if a path should be excluded from the archive."""
32
+ name = Path(path).name
33
+ for pattern in EXCLUDE_PATTERNS:
34
+ if pattern.startswith("*") and name.endswith(pattern[1:]):
35
+ return True
36
+ if pattern.endswith("*") and name.startswith(pattern[:-1]):
37
+ return True
38
+ if name == pattern:
39
+ return True
40
+ return False
41
+
42
+
43
+ def package_skill(skill_dir, output_path=None):
44
+ """Create a .tar.gz archive of the skill directory."""
45
+ skill_dir = Path(skill_dir).resolve()
46
+ if not skill_dir.exists():
47
+ print(f"Error: {skill_dir} not found", file=sys.stderr)
48
+ sys.exit(1)
49
+
50
+ skill_md = skill_dir / "SKILL.md"
51
+ if not skill_md.exists():
52
+ print(f"Error: No SKILL.md found in {skill_dir}", file=sys.stderr)
53
+ sys.exit(1)
54
+
55
+ slug = skill_dir.name
56
+ if output_path is None:
57
+ output_path = skill_dir.parent / f"{slug}.tar.gz"
58
+ else:
59
+ output_path = Path(output_path)
60
+
61
+ file_count = 0
62
+ with tarfile.open(output_path, "w:gz") as tar:
63
+ for root, dirs, files in os.walk(skill_dir):
64
+ # Filter out excluded directories
65
+ dirs[:] = [d for d in dirs if not should_exclude(d)]
66
+
67
+ for f in files:
68
+ filepath = Path(root) / f
69
+ if should_exclude(filepath):
70
+ continue
71
+
72
+ arcname = str(filepath.relative_to(skill_dir.parent))
73
+ tar.add(filepath, arcname=arcname)
74
+ file_count += 1
75
+
76
+ size_kb = output_path.stat().st_size / 1024
77
+ print(f"Packaged {file_count} files into {output_path} ({size_kb:.1f} KB)")
78
+ return str(output_path)
79
+
80
+
81
+ def main():
82
+ parser = argparse.ArgumentParser(description="Package a skill for distribution")
83
+ parser.add_argument("skill_dir", help="Path to skill directory")
84
+ parser.add_argument("--output", "-o", help="Output archive path")
85
+ args = parser.parse_args()
86
+
87
+ package_skill(args.skill_dir, args.output)
88
+
89
+
90
+ if __name__ == "__main__":
91
+ main()