@intentsolutionsio/skill-creator 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +17 -0
- package/README.md +55 -0
- package/package.json +38 -0
- package/scripts/validate-skill.py +1132 -0
- package/skills/agent-creator/SKILL.md +305 -0
- package/skills/agent-creator/references/anthropic-agent-spec.md +89 -0
- package/skills/skill-creator/SKILL.md +267 -0
- package/skills/skill-creator/agents/analyzer.md +279 -0
- package/skills/skill-creator/agents/comparator.md +207 -0
- package/skills/skill-creator/agents/grader.md +228 -0
- package/skills/skill-creator/assets/eval_review.html +146 -0
- package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
- package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/skills/skill-creator/references/advanced-eval-workflow.md +320 -0
- package/skills/skill-creator/references/anthropic-comparison.md +93 -0
- package/skills/skill-creator/references/ard-template.md +47 -0
- package/skills/skill-creator/references/creation-guide.md +305 -0
- package/skills/skill-creator/references/errors-template.md +27 -0
- package/skills/skill-creator/references/examples-template.md +40 -0
- package/skills/skill-creator/references/frontmatter-spec.md +531 -0
- package/skills/skill-creator/references/implementation-template.md +42 -0
- package/skills/skill-creator/references/output-patterns.md +193 -0
- package/skills/skill-creator/references/prd-template.md +55 -0
- package/skills/skill-creator/references/schemas.md +430 -0
- package/skills/skill-creator/references/source-of-truth.md +658 -0
- package/skills/skill-creator/references/validation-rules.md +528 -0
- package/skills/skill-creator/references/workflows.md +233 -0
- package/skills/skill-creator/scripts/__init__.py +0 -0
- package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/skill-creator/scripts/generate_report.py +326 -0
- package/skills/skill-creator/scripts/improve_description.py +247 -0
- package/skills/skill-creator/scripts/package_skill.py +136 -0
- package/skills/skill-creator/scripts/quick_validate.py +103 -0
- package/skills/skill-creator/scripts/run_eval.py +344 -0
- package/skills/skill-creator/scripts/run_loop.py +329 -0
- package/skills/skill-creator/scripts/utils.py +47 -0
- package/skills/skill-creator/scripts/validate-skill.py +87 -0
- package/skills/skill-creator/templates/agent-template.md +99 -0
- package/skills/skill-creator/templates/skill-template.md +122 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Run the eval + improve loop until all pass or max iterations reached.
|
|
3
|
+
|
|
4
|
+
Combines run_eval.py and improve_description.py in a loop, tracking history
|
|
5
|
+
and returning the best description found. Supports train/test split to prevent
|
|
6
|
+
overfitting.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import random
|
|
12
|
+
import sys
|
|
13
|
+
import tempfile
|
|
14
|
+
import time
|
|
15
|
+
import webbrowser
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from scripts.generate_report import generate_html
|
|
19
|
+
from scripts.improve_description import improve_description
|
|
20
|
+
from scripts.run_eval import find_project_root, run_eval # noqa: F401 (find_project_root re-exported)
|
|
21
|
+
from scripts.utils import parse_skill_md
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def split_eval_set(eval_set: list[dict], holdout: float, seed: int = 42) -> tuple[list[dict], list[dict]]:
|
|
25
|
+
"""Split eval set into train and test sets, stratified by should_trigger."""
|
|
26
|
+
random.seed(seed)
|
|
27
|
+
|
|
28
|
+
# Separate by should_trigger
|
|
29
|
+
trigger = [e for e in eval_set if e["should_trigger"]]
|
|
30
|
+
no_trigger = [e for e in eval_set if not e["should_trigger"]]
|
|
31
|
+
|
|
32
|
+
# Shuffle each group
|
|
33
|
+
random.shuffle(trigger)
|
|
34
|
+
random.shuffle(no_trigger)
|
|
35
|
+
|
|
36
|
+
# Calculate split points
|
|
37
|
+
n_trigger_test = max(1, int(len(trigger) * holdout))
|
|
38
|
+
n_no_trigger_test = max(1, int(len(no_trigger) * holdout))
|
|
39
|
+
|
|
40
|
+
# Split
|
|
41
|
+
test_set = trigger[:n_trigger_test] + no_trigger[:n_no_trigger_test]
|
|
42
|
+
train_set = trigger[n_trigger_test:] + no_trigger[n_no_trigger_test:]
|
|
43
|
+
|
|
44
|
+
return train_set, test_set
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def run_loop(
|
|
48
|
+
eval_set: list[dict],
|
|
49
|
+
skill_path: Path,
|
|
50
|
+
description_override: str | None,
|
|
51
|
+
num_workers: int,
|
|
52
|
+
timeout: int,
|
|
53
|
+
max_iterations: int,
|
|
54
|
+
runs_per_query: int,
|
|
55
|
+
trigger_threshold: float,
|
|
56
|
+
holdout: float,
|
|
57
|
+
model: str,
|
|
58
|
+
verbose: bool,
|
|
59
|
+
live_report_path: Path | None = None,
|
|
60
|
+
log_dir: Path | None = None,
|
|
61
|
+
) -> dict:
|
|
62
|
+
"""Run the eval + improvement loop."""
|
|
63
|
+
project_root = find_project_root()
|
|
64
|
+
name, original_description, content = parse_skill_md(skill_path)
|
|
65
|
+
current_description = description_override or original_description
|
|
66
|
+
|
|
67
|
+
# Split into train/test if holdout > 0
|
|
68
|
+
if holdout > 0:
|
|
69
|
+
train_set, test_set = split_eval_set(eval_set, holdout)
|
|
70
|
+
if verbose:
|
|
71
|
+
print(f"Split: {len(train_set)} train, {len(test_set)} test (holdout={holdout})", file=sys.stderr)
|
|
72
|
+
else:
|
|
73
|
+
train_set = eval_set
|
|
74
|
+
test_set = []
|
|
75
|
+
|
|
76
|
+
history = []
|
|
77
|
+
exit_reason = "unknown"
|
|
78
|
+
|
|
79
|
+
for iteration in range(1, max_iterations + 1):
|
|
80
|
+
if verbose:
|
|
81
|
+
print(f"\n{'='*60}", file=sys.stderr)
|
|
82
|
+
print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr)
|
|
83
|
+
print(f"Description: {current_description}", file=sys.stderr)
|
|
84
|
+
print(f"{'='*60}", file=sys.stderr)
|
|
85
|
+
|
|
86
|
+
# Evaluate train + test together in one batch for parallelism
|
|
87
|
+
all_queries = train_set + test_set
|
|
88
|
+
t0 = time.time()
|
|
89
|
+
all_results = run_eval(
|
|
90
|
+
eval_set=all_queries,
|
|
91
|
+
skill_name=name,
|
|
92
|
+
description=current_description,
|
|
93
|
+
num_workers=num_workers,
|
|
94
|
+
timeout=timeout,
|
|
95
|
+
project_root=project_root,
|
|
96
|
+
runs_per_query=runs_per_query,
|
|
97
|
+
trigger_threshold=trigger_threshold,
|
|
98
|
+
model=model,
|
|
99
|
+
skill_path=skill_path,
|
|
100
|
+
)
|
|
101
|
+
eval_elapsed = time.time() - t0
|
|
102
|
+
|
|
103
|
+
# Split results back into train/test by matching queries
|
|
104
|
+
train_queries_set = {q["query"] for q in train_set}
|
|
105
|
+
train_result_list = [r for r in all_results["results"] if r["query"] in train_queries_set]
|
|
106
|
+
test_result_list = [r for r in all_results["results"] if r["query"] not in train_queries_set]
|
|
107
|
+
|
|
108
|
+
train_passed = sum(1 for r in train_result_list if r["pass"])
|
|
109
|
+
train_total = len(train_result_list)
|
|
110
|
+
train_summary = {"passed": train_passed, "failed": train_total - train_passed, "total": train_total}
|
|
111
|
+
train_results = {"results": train_result_list, "summary": train_summary}
|
|
112
|
+
|
|
113
|
+
if test_set:
|
|
114
|
+
test_passed = sum(1 for r in test_result_list if r["pass"])
|
|
115
|
+
test_total = len(test_result_list)
|
|
116
|
+
test_summary = {"passed": test_passed, "failed": test_total - test_passed, "total": test_total}
|
|
117
|
+
test_results = {"results": test_result_list, "summary": test_summary}
|
|
118
|
+
else:
|
|
119
|
+
test_results = None
|
|
120
|
+
test_summary = None
|
|
121
|
+
|
|
122
|
+
history.append({
|
|
123
|
+
"iteration": iteration,
|
|
124
|
+
"description": current_description,
|
|
125
|
+
"train_passed": train_summary["passed"],
|
|
126
|
+
"train_failed": train_summary["failed"],
|
|
127
|
+
"train_total": train_summary["total"],
|
|
128
|
+
"train_results": train_results["results"],
|
|
129
|
+
"test_passed": test_summary["passed"] if test_summary else None,
|
|
130
|
+
"test_failed": test_summary["failed"] if test_summary else None,
|
|
131
|
+
"test_total": test_summary["total"] if test_summary else None,
|
|
132
|
+
"test_results": test_results["results"] if test_results else None,
|
|
133
|
+
# For backward compat with report generator
|
|
134
|
+
"passed": train_summary["passed"],
|
|
135
|
+
"failed": train_summary["failed"],
|
|
136
|
+
"total": train_summary["total"],
|
|
137
|
+
"results": train_results["results"],
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
# Write live report if path provided
|
|
141
|
+
if live_report_path:
|
|
142
|
+
partial_output = {
|
|
143
|
+
"original_description": original_description,
|
|
144
|
+
"best_description": current_description,
|
|
145
|
+
"best_score": "in progress",
|
|
146
|
+
"iterations_run": len(history),
|
|
147
|
+
"holdout": holdout,
|
|
148
|
+
"train_size": len(train_set),
|
|
149
|
+
"test_size": len(test_set),
|
|
150
|
+
"history": history,
|
|
151
|
+
}
|
|
152
|
+
live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name))
|
|
153
|
+
|
|
154
|
+
if verbose:
|
|
155
|
+
def print_eval_stats(label, results, elapsed):
|
|
156
|
+
pos = [r for r in results if r["should_trigger"]]
|
|
157
|
+
neg = [r for r in results if not r["should_trigger"]]
|
|
158
|
+
tp = sum(r["triggers"] for r in pos)
|
|
159
|
+
pos_runs = sum(r["runs"] for r in pos)
|
|
160
|
+
fn = pos_runs - tp
|
|
161
|
+
fp = sum(r["triggers"] for r in neg)
|
|
162
|
+
neg_runs = sum(r["runs"] for r in neg)
|
|
163
|
+
tn = neg_runs - fp
|
|
164
|
+
total = tp + tn + fp + fn
|
|
165
|
+
precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0
|
|
166
|
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0
|
|
167
|
+
accuracy = (tp + tn) / total if total > 0 else 0.0
|
|
168
|
+
print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr)
|
|
169
|
+
for r in results:
|
|
170
|
+
status = "PASS" if r["pass"] else "FAIL"
|
|
171
|
+
rate_str = f"{r['triggers']}/{r['runs']}"
|
|
172
|
+
print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr)
|
|
173
|
+
|
|
174
|
+
print_eval_stats("Train", train_results["results"], eval_elapsed)
|
|
175
|
+
if test_summary:
|
|
176
|
+
print_eval_stats("Test ", test_results["results"], 0)
|
|
177
|
+
|
|
178
|
+
if train_summary["failed"] == 0:
|
|
179
|
+
exit_reason = f"all_passed (iteration {iteration})"
|
|
180
|
+
if verbose:
|
|
181
|
+
print(f"\nAll train queries passed on iteration {iteration}!", file=sys.stderr)
|
|
182
|
+
break
|
|
183
|
+
|
|
184
|
+
if iteration == max_iterations:
|
|
185
|
+
exit_reason = f"max_iterations ({max_iterations})"
|
|
186
|
+
if verbose:
|
|
187
|
+
print(f"\nMax iterations reached ({max_iterations}).", file=sys.stderr)
|
|
188
|
+
break
|
|
189
|
+
|
|
190
|
+
# Improve the description based on train results
|
|
191
|
+
if verbose:
|
|
192
|
+
print(f"\nImproving description...", file=sys.stderr)
|
|
193
|
+
|
|
194
|
+
t0 = time.time()
|
|
195
|
+
# Strip test scores from history so improvement model can't see them
|
|
196
|
+
blinded_history = [
|
|
197
|
+
{k: v for k, v in h.items() if not k.startswith("test_")}
|
|
198
|
+
for h in history
|
|
199
|
+
]
|
|
200
|
+
new_description = improve_description(
|
|
201
|
+
skill_name=name,
|
|
202
|
+
skill_content=content,
|
|
203
|
+
current_description=current_description,
|
|
204
|
+
eval_results=train_results,
|
|
205
|
+
history=blinded_history,
|
|
206
|
+
model=model,
|
|
207
|
+
log_dir=log_dir,
|
|
208
|
+
iteration=iteration,
|
|
209
|
+
)
|
|
210
|
+
improve_elapsed = time.time() - t0
|
|
211
|
+
|
|
212
|
+
if verbose:
|
|
213
|
+
print(f"Proposed ({improve_elapsed:.1f}s): {new_description}", file=sys.stderr)
|
|
214
|
+
|
|
215
|
+
current_description = new_description
|
|
216
|
+
|
|
217
|
+
# Find the best iteration by TEST score (or train if no test set)
|
|
218
|
+
if test_set:
|
|
219
|
+
best = max(history, key=lambda h: h["test_passed"] or 0)
|
|
220
|
+
best_score = f"{best['test_passed']}/{best['test_total']}"
|
|
221
|
+
else:
|
|
222
|
+
best = max(history, key=lambda h: h["train_passed"])
|
|
223
|
+
best_score = f"{best['train_passed']}/{best['train_total']}"
|
|
224
|
+
|
|
225
|
+
if verbose:
|
|
226
|
+
print(f"\nExit reason: {exit_reason}", file=sys.stderr)
|
|
227
|
+
print(f"Best score: {best_score} (iteration {best['iteration']})", file=sys.stderr)
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
"exit_reason": exit_reason,
|
|
231
|
+
"original_description": original_description,
|
|
232
|
+
"best_description": best["description"],
|
|
233
|
+
"best_score": best_score,
|
|
234
|
+
"best_train_score": f"{best['train_passed']}/{best['train_total']}",
|
|
235
|
+
"best_test_score": f"{best['test_passed']}/{best['test_total']}" if test_set else None,
|
|
236
|
+
"final_description": current_description,
|
|
237
|
+
"iterations_run": len(history),
|
|
238
|
+
"holdout": holdout,
|
|
239
|
+
"train_size": len(train_set),
|
|
240
|
+
"test_size": len(test_set),
|
|
241
|
+
"history": history,
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def main():
|
|
246
|
+
parser = argparse.ArgumentParser(description="Run eval + improve loop")
|
|
247
|
+
parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
|
|
248
|
+
parser.add_argument("--skill-path", required=True, help="Path to skill directory")
|
|
249
|
+
parser.add_argument("--description", default=None, help="Override starting description")
|
|
250
|
+
parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
|
|
251
|
+
parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
|
|
252
|
+
parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations")
|
|
253
|
+
parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
|
|
254
|
+
parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
|
|
255
|
+
parser.add_argument("--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)")
|
|
256
|
+
parser.add_argument("--model", required=True, help="Model for improvement")
|
|
257
|
+
parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
|
|
258
|
+
parser.add_argument("--report", default="auto", help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)")
|
|
259
|
+
parser.add_argument("--results-dir", default=None, help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here")
|
|
260
|
+
args = parser.parse_args()
|
|
261
|
+
|
|
262
|
+
eval_set = json.loads(Path(args.eval_set).read_text())
|
|
263
|
+
skill_path = Path(args.skill_path)
|
|
264
|
+
|
|
265
|
+
if not (skill_path / "SKILL.md").exists():
|
|
266
|
+
print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
|
|
267
|
+
sys.exit(1)
|
|
268
|
+
|
|
269
|
+
name, _, _ = parse_skill_md(skill_path)
|
|
270
|
+
|
|
271
|
+
# Set up live report path
|
|
272
|
+
if args.report != "none":
|
|
273
|
+
if args.report == "auto":
|
|
274
|
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
275
|
+
live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
|
|
276
|
+
else:
|
|
277
|
+
live_report_path = Path(args.report)
|
|
278
|
+
# Open the report immediately so the user can watch
|
|
279
|
+
live_report_path.write_text("<html><body><h1>Starting optimization loop...</h1><meta http-equiv='refresh' content='5'></body></html>")
|
|
280
|
+
webbrowser.open(str(live_report_path))
|
|
281
|
+
else:
|
|
282
|
+
live_report_path = None
|
|
283
|
+
|
|
284
|
+
# Determine output directory (create before run_loop so logs can be written)
|
|
285
|
+
if args.results_dir:
|
|
286
|
+
timestamp = time.strftime("%Y-%m-%d_%H%M%S")
|
|
287
|
+
results_dir = Path(args.results_dir) / timestamp
|
|
288
|
+
results_dir.mkdir(parents=True, exist_ok=True)
|
|
289
|
+
else:
|
|
290
|
+
results_dir = None
|
|
291
|
+
|
|
292
|
+
log_dir = results_dir / "logs" if results_dir else None
|
|
293
|
+
|
|
294
|
+
output = run_loop(
|
|
295
|
+
eval_set=eval_set,
|
|
296
|
+
skill_path=skill_path,
|
|
297
|
+
description_override=args.description,
|
|
298
|
+
num_workers=args.num_workers,
|
|
299
|
+
timeout=args.timeout,
|
|
300
|
+
max_iterations=args.max_iterations,
|
|
301
|
+
runs_per_query=args.runs_per_query,
|
|
302
|
+
trigger_threshold=args.trigger_threshold,
|
|
303
|
+
holdout=args.holdout,
|
|
304
|
+
model=args.model,
|
|
305
|
+
verbose=args.verbose,
|
|
306
|
+
live_report_path=live_report_path,
|
|
307
|
+
log_dir=log_dir,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Save JSON output
|
|
311
|
+
json_output = json.dumps(output, indent=2)
|
|
312
|
+
print(json_output)
|
|
313
|
+
if results_dir:
|
|
314
|
+
(results_dir / "results.json").write_text(json_output)
|
|
315
|
+
|
|
316
|
+
# Write final HTML report (without auto-refresh)
|
|
317
|
+
if live_report_path:
|
|
318
|
+
live_report_path.write_text(generate_html(output, auto_refresh=False, skill_name=name))
|
|
319
|
+
print(f"\nReport: {live_report_path}", file=sys.stderr)
|
|
320
|
+
|
|
321
|
+
if results_dir and live_report_path:
|
|
322
|
+
(results_dir / "report.html").write_text(generate_html(output, auto_refresh=False, skill_name=name))
|
|
323
|
+
|
|
324
|
+
if results_dir:
|
|
325
|
+
print(f"Results saved to: {results_dir}", file=sys.stderr)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
if __name__ == "__main__":
|
|
329
|
+
main()
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Shared utilities for skill-creator scripts."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
|
|
8
|
+
"""Parse a SKILL.md file, returning (name, description, full_content)."""
|
|
9
|
+
content = (skill_path / "SKILL.md").read_text()
|
|
10
|
+
lines = content.split("\n")
|
|
11
|
+
|
|
12
|
+
if lines[0].strip() != "---":
|
|
13
|
+
raise ValueError("SKILL.md missing frontmatter (no opening ---)")
|
|
14
|
+
|
|
15
|
+
end_idx = None
|
|
16
|
+
for i, line in enumerate(lines[1:], start=1):
|
|
17
|
+
if line.strip() == "---":
|
|
18
|
+
end_idx = i
|
|
19
|
+
break
|
|
20
|
+
|
|
21
|
+
if end_idx is None:
|
|
22
|
+
raise ValueError("SKILL.md missing frontmatter (no closing ---)")
|
|
23
|
+
|
|
24
|
+
name = ""
|
|
25
|
+
description = ""
|
|
26
|
+
frontmatter_lines = lines[1:end_idx]
|
|
27
|
+
i = 0
|
|
28
|
+
while i < len(frontmatter_lines):
|
|
29
|
+
line = frontmatter_lines[i]
|
|
30
|
+
if line.startswith("name:"):
|
|
31
|
+
name = line[len("name:"):].strip().strip('"').strip("'")
|
|
32
|
+
elif line.startswith("description:"):
|
|
33
|
+
value = line[len("description:"):].strip()
|
|
34
|
+
# Handle YAML multiline indicators (>, |, >-, |-)
|
|
35
|
+
if value in (">", "|", ">-", "|-"):
|
|
36
|
+
continuation_lines: list[str] = []
|
|
37
|
+
i += 1
|
|
38
|
+
while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith(" ") or frontmatter_lines[i].startswith("\t")):
|
|
39
|
+
continuation_lines.append(frontmatter_lines[i].strip())
|
|
40
|
+
i += 1
|
|
41
|
+
description = " ".join(continuation_lines)
|
|
42
|
+
continue
|
|
43
|
+
else:
|
|
44
|
+
description = value.strip('"').strip("'")
|
|
45
|
+
i += 1
|
|
46
|
+
|
|
47
|
+
return name, description, content
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Skill Validator — Wrapper around validate-skills-schema.py for skill-creator.
|
|
4
|
+
|
|
5
|
+
Provides --grade flag for single-file validation with full grade report.
|
|
6
|
+
Delegates to the main validator in the repository root.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python3 ${CLAUDE_SKILL_DIR}/scripts/validate-skill.py <SKILL.md>
|
|
10
|
+
python3 ${CLAUDE_SKILL_DIR}/scripts/validate-skill.py --grade <SKILL.md>
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import subprocess
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def find_repo_validator():
|
|
19
|
+
"""Find the main validate-skills-schema.py in the repository."""
|
|
20
|
+
# Try relative to this script (inside the plugin repo)
|
|
21
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
22
|
+
# Walk up to find scripts/validate-skills-schema.py
|
|
23
|
+
current = script_dir
|
|
24
|
+
for _ in range(10):
|
|
25
|
+
candidate = os.path.join(current, "scripts", "validate-skills-schema.py")
|
|
26
|
+
if os.path.exists(candidate):
|
|
27
|
+
return candidate
|
|
28
|
+
parent = os.path.dirname(current)
|
|
29
|
+
if parent == current:
|
|
30
|
+
break
|
|
31
|
+
current = parent
|
|
32
|
+
|
|
33
|
+
# Try common locations
|
|
34
|
+
home = os.path.expanduser("~")
|
|
35
|
+
for path in [
|
|
36
|
+
os.path.join(home, "000-projects", "claude-code-plugins", "scripts", "validate-skills-schema.py"),
|
|
37
|
+
]:
|
|
38
|
+
if os.path.exists(path):
|
|
39
|
+
return path
|
|
40
|
+
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def main():
|
|
45
|
+
args = sys.argv[1:]
|
|
46
|
+
|
|
47
|
+
if not args or args == ["--help"] or args == ["-h"]:
|
|
48
|
+
print(__doc__)
|
|
49
|
+
sys.exit(0)
|
|
50
|
+
|
|
51
|
+
# Parse our flags
|
|
52
|
+
grade = "--grade" in args
|
|
53
|
+
verbose = "--verbose" in args or "-v" in args
|
|
54
|
+
skill_path = None
|
|
55
|
+
|
|
56
|
+
for arg in args:
|
|
57
|
+
if not arg.startswith("-"):
|
|
58
|
+
skill_path = arg
|
|
59
|
+
break
|
|
60
|
+
|
|
61
|
+
if not skill_path:
|
|
62
|
+
print("ERROR: No SKILL.md path provided", file=sys.stderr)
|
|
63
|
+
print("Usage: validate-skill.py [--grade] <path/to/SKILL.md>")
|
|
64
|
+
sys.exit(1)
|
|
65
|
+
|
|
66
|
+
if not os.path.exists(skill_path):
|
|
67
|
+
print(f"ERROR: File not found: {skill_path}", file=sys.stderr)
|
|
68
|
+
sys.exit(1)
|
|
69
|
+
|
|
70
|
+
validator = find_repo_validator()
|
|
71
|
+
if not validator:
|
|
72
|
+
print("ERROR: Cannot find validate-skills-schema.py", file=sys.stderr)
|
|
73
|
+
print("Make sure you're running from within the claude-code-plugins repo")
|
|
74
|
+
sys.exit(1)
|
|
75
|
+
|
|
76
|
+
# Build command
|
|
77
|
+
cmd = [sys.executable, validator]
|
|
78
|
+
if verbose or grade:
|
|
79
|
+
cmd.append("--verbose")
|
|
80
|
+
cmd.append(skill_path)
|
|
81
|
+
|
|
82
|
+
result = subprocess.run(cmd)
|
|
83
|
+
sys.exit(result.returncode)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
main()
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: {{AGENT_NAME}}
|
|
3
|
+
description: "{{AGENT_SPECIALTY_20_200_CHARS}}"
|
|
4
|
+
# Optional fields (include as needed):
|
|
5
|
+
# model: sonnet # sonnet|haiku|opus|inherit
|
|
6
|
+
# effort: medium # low|medium|high
|
|
7
|
+
# maxTurns: 15 # Max agentic loop iterations
|
|
8
|
+
# disallowedTools: "Write,Edit" # Denylist (opposite of skills' allowed-tools)
|
|
9
|
+
# skills: [{{SKILL_1}}, {{SKILL_2}}] # Skills to preload
|
|
10
|
+
# memory: project # user|project|local
|
|
11
|
+
# background: false # Run in background
|
|
12
|
+
# isolation: worktree # Isolated git worktree
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
# {{AGENT_TITLE}}
|
|
16
|
+
|
|
17
|
+
{{ONE_LINE_ROLE_STATEMENT}}
|
|
18
|
+
|
|
19
|
+
## Role
|
|
20
|
+
|
|
21
|
+
{{DETAILED_ROLE_DESCRIPTION_2_3_SENTENCES. What domain does this agent specialize in?
|
|
22
|
+
What unique perspective or methodology does it bring? What is it NOT responsible for?}}
|
|
23
|
+
|
|
24
|
+
## Inputs
|
|
25
|
+
|
|
26
|
+
You receive these parameters in your prompt:
|
|
27
|
+
|
|
28
|
+
- **{{INPUT_1}}**: {{DESCRIPTION}}
|
|
29
|
+
- **{{INPUT_2}}**: {{DESCRIPTION}}
|
|
30
|
+
- **{{INPUT_3}}**: {{DESCRIPTION}}
|
|
31
|
+
|
|
32
|
+
## Process
|
|
33
|
+
|
|
34
|
+
### Step 1: {{STEP_TITLE}}
|
|
35
|
+
|
|
36
|
+
{{DETAILED_INSTRUCTIONS_FOR_STEP}}
|
|
37
|
+
|
|
38
|
+
### Step 2: {{STEP_TITLE}}
|
|
39
|
+
|
|
40
|
+
{{DETAILED_INSTRUCTIONS_FOR_STEP}}
|
|
41
|
+
|
|
42
|
+
### Step 3: {{STEP_TITLE}}
|
|
43
|
+
|
|
44
|
+
{{DETAILED_INSTRUCTIONS_FOR_STEP}}
|
|
45
|
+
|
|
46
|
+
### Step 4: {{STEP_TITLE}}
|
|
47
|
+
|
|
48
|
+
{{DETAILED_INSTRUCTIONS_FOR_STEP}}
|
|
49
|
+
|
|
50
|
+
## Output Format
|
|
51
|
+
|
|
52
|
+
{{DESCRIBE_STRUCTURED_OUTPUT_FORMAT}}
|
|
53
|
+
|
|
54
|
+
```json
|
|
55
|
+
{
|
|
56
|
+
"{{FIELD_1}}": "{{DESCRIPTION}}",
|
|
57
|
+
"{{FIELD_2}}": [
|
|
58
|
+
{
|
|
59
|
+
"{{SUBFIELD}}": "{{DESCRIPTION}}"
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
"summary": {
|
|
63
|
+
"{{METRIC_1}}": 0,
|
|
64
|
+
"{{METRIC_2}}": 0
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Guidelines
|
|
70
|
+
|
|
71
|
+
- **{{GUIDELINE_1}}**: {{EXPLANATION}}
|
|
72
|
+
- **{{GUIDELINE_2}}**: {{EXPLANATION}}
|
|
73
|
+
- **{{GUIDELINE_3}}**: {{EXPLANATION}}
|
|
74
|
+
- **{{GUIDELINE_4}}**: {{EXPLANATION}}
|
|
75
|
+
|
|
76
|
+
## When Activated
|
|
77
|
+
|
|
78
|
+
You activate when:
|
|
79
|
+
- {{ACTIVATION_CONDITION_1}}
|
|
80
|
+
- {{ACTIVATION_CONDITION_2}}
|
|
81
|
+
- {{ACTIVATION_CONDITION_3}}
|
|
82
|
+
|
|
83
|
+
## Communication Style
|
|
84
|
+
|
|
85
|
+
- {{STYLE_TRAIT_1}}
|
|
86
|
+
- {{STYLE_TRAIT_2}}
|
|
87
|
+
- {{STYLE_TRAIT_3}}
|
|
88
|
+
|
|
89
|
+
## Success Criteria
|
|
90
|
+
|
|
91
|
+
Good output includes:
|
|
92
|
+
- {{QUALITY_MARKER_1}}
|
|
93
|
+
- {{QUALITY_MARKER_2}}
|
|
94
|
+
- {{QUALITY_MARKER_3}}
|
|
95
|
+
|
|
96
|
+
Poor output is:
|
|
97
|
+
- {{ANTI_PATTERN_1}}
|
|
98
|
+
- {{ANTI_PATTERN_2}}
|
|
99
|
+
- {{ANTI_PATTERN_3}}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
---
|
|
2
|
+
# Required (AgentSkills.io)
|
|
3
|
+
# NOTE: name and description must NOT contain XML tags (< or >)
|
|
4
|
+
name: {{SKILL_NAME}}
|
|
5
|
+
description: |
|
|
6
|
+
{{PURPOSE_STATEMENT}}. Use when {{WHEN_TO_USE}}.
|
|
7
|
+
Trigger with "/{{SKILL_NAME}}" or "{{NATURAL_TRIGGER}}".
|
|
8
|
+
|
|
9
|
+
# Tools (recommended)
|
|
10
|
+
allowed-tools: "{{TOOLS_CSV}}"
|
|
11
|
+
|
|
12
|
+
# Identity (top-level, NOT inside metadata)
|
|
13
|
+
version: 1.0.0
|
|
14
|
+
author: {{AUTHOR_NAME}} <{{AUTHOR_EMAIL}}>
|
|
15
|
+
license: MIT
|
|
16
|
+
|
|
17
|
+
# Claude Code extensions (include as needed)
|
|
18
|
+
model: inherit
|
|
19
|
+
# argument-hint: "[arg-description]"
|
|
20
|
+
# disable-model-invocation: false
|
|
21
|
+
# user-invocable: true
|
|
22
|
+
# context: fork
|
|
23
|
+
# agent: general-purpose
|
|
24
|
+
|
|
25
|
+
# Discovery (optional)
|
|
26
|
+
# compatible-with: claude-code, codex, openclaw
|
|
27
|
+
# tags: [{{TAG_1}}, {{TAG_2}}]
|
|
28
|
+
|
|
29
|
+
# Optional spec fields
|
|
30
|
+
# compatibility: "{{ENVIRONMENT_REQUIREMENTS}}"
|
|
31
|
+
# metadata:
|
|
32
|
+
# category: {{CATEGORY}}
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
# {{SKILL_TITLE}}
|
|
36
|
+
|
|
37
|
+
{{PURPOSE_STATEMENT_1_2_SENTENCES}}
|
|
38
|
+
|
|
39
|
+
## Overview
|
|
40
|
+
|
|
41
|
+
{{WHAT_THIS_SKILL_SOLVES_AND_WHY_IT_EXISTS}}
|
|
42
|
+
|
|
43
|
+
## Prerequisites
|
|
44
|
+
|
|
45
|
+
- {{PREREQUISITE_1}}
|
|
46
|
+
- {{PREREQUISITE_2}}
|
|
47
|
+
|
|
48
|
+
## Instructions
|
|
49
|
+
|
|
50
|
+
### Step 1: {{STEP_1_TITLE}}
|
|
51
|
+
|
|
52
|
+
{{STEP_1_DETAILED_INSTRUCTIONS}}
|
|
53
|
+
|
|
54
|
+
### Step 2: {{STEP_2_TITLE}}
|
|
55
|
+
|
|
56
|
+
{{STEP_2_DETAILED_INSTRUCTIONS}}
|
|
57
|
+
|
|
58
|
+
### Step 3: {{STEP_3_TITLE}}
|
|
59
|
+
|
|
60
|
+
{{STEP_3_DETAILED_INSTRUCTIONS}}
|
|
61
|
+
|
|
62
|
+
## Output
|
|
63
|
+
|
|
64
|
+
{{DESCRIPTION_OF_EXPECTED_OUTPUT_FORMAT}}
|
|
65
|
+
|
|
66
|
+
## Examples
|
|
67
|
+
|
|
68
|
+
### {{EXAMPLE_1_TITLE}}
|
|
69
|
+
|
|
70
|
+
**Input:**
|
|
71
|
+
```
|
|
72
|
+
{{EXAMPLE_1_INPUT}}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**Output:**
|
|
76
|
+
```
|
|
77
|
+
{{EXAMPLE_1_OUTPUT}}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### {{EXAMPLE_2_TITLE}}
|
|
81
|
+
|
|
82
|
+
**Input:**
|
|
83
|
+
```
|
|
84
|
+
{{EXAMPLE_2_INPUT}}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Output:**
|
|
88
|
+
```
|
|
89
|
+
{{EXAMPLE_2_OUTPUT}}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Edge Cases
|
|
93
|
+
|
|
94
|
+
- {{EDGE_CASE_1}}
|
|
95
|
+
- {{EDGE_CASE_2}}
|
|
96
|
+
|
|
97
|
+
<!-- Optional: Include for quality-critical workflows -->
|
|
98
|
+
<!-- ## Feedback Loop
|
|
99
|
+
Run validation after each major step. If issues found, fix and re-validate:
|
|
100
|
+
1. Execute step
|
|
101
|
+
2. Validate output
|
|
102
|
+
3. If validation fails → fix → return to step 2
|
|
103
|
+
4. Maximum 3 iterations before reporting -->
|
|
104
|
+
|
|
105
|
+
<!-- Optional: Include if skill documents deprecated approaches -->
|
|
106
|
+
<!-- ## Old Patterns
|
|
107
|
+
These patterns are deprecated but users may encounter them:
|
|
108
|
+
| Old Pattern | Replacement | Why Changed |
|
|
109
|
+
|-------------|-------------|-------------|
|
|
110
|
+
| {{OLD_1}} | {{NEW_1}} | {{REASON_1}} | -->
|
|
111
|
+
|
|
112
|
+
## Error Handling
|
|
113
|
+
|
|
114
|
+
| Error | Cause | Solution |
|
|
115
|
+
|-------|-------|----------|
|
|
116
|
+
| {{ERROR_1}} | {{CAUSE_1}} | {{SOLUTION_1}} |
|
|
117
|
+
| {{ERROR_2}} | {{CAUSE_2}} | {{SOLUTION_2}} |
|
|
118
|
+
|
|
119
|
+
## Resources
|
|
120
|
+
|
|
121
|
+
- ${CLAUDE_SKILL_DIR}/references/{{REFERENCE_1}}.md - {{REFERENCE_1_PURPOSE}}
|
|
122
|
+
- ${CLAUDE_SKILL_DIR}/scripts/{{SCRIPT_1}}.py - {{SCRIPT_1_PURPOSE}}
|