@cleocode/skills 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/dispatch-config.json +404 -0
  2. package/index.d.ts +178 -0
  3. package/index.js +405 -0
  4. package/package.json +14 -0
  5. package/profiles/core.json +7 -0
  6. package/profiles/full.json +10 -0
  7. package/profiles/minimal.json +7 -0
  8. package/profiles/recommended.json +7 -0
  9. package/provider-skills-map.json +97 -0
  10. package/skills/_shared/cleo-style-guide.md +84 -0
  11. package/skills/_shared/manifest-operations.md +810 -0
  12. package/skills/_shared/placeholders.json +433 -0
  13. package/skills/_shared/skill-chaining-patterns.md +237 -0
  14. package/skills/_shared/subagent-protocol-base.md +223 -0
  15. package/skills/_shared/task-system-integration.md +232 -0
  16. package/skills/_shared/testing-framework-config.md +110 -0
  17. package/skills/ct-cleo/SKILL.md +490 -0
  18. package/skills/ct-cleo/references/anti-patterns.md +19 -0
  19. package/skills/ct-cleo/references/loom-lifecycle.md +136 -0
  20. package/skills/ct-cleo/references/orchestrator-constraints.md +55 -0
  21. package/skills/ct-cleo/references/session-protocol.md +162 -0
  22. package/skills/ct-codebase-mapper/SKILL.md +82 -0
  23. package/skills/ct-contribution/SKILL.md +521 -0
  24. package/skills/ct-contribution/templates/contribution-init.json +21 -0
  25. package/skills/ct-dev-workflow/SKILL.md +423 -0
  26. package/skills/ct-docs-lookup/SKILL.md +66 -0
  27. package/skills/ct-docs-review/SKILL.md +175 -0
  28. package/skills/ct-docs-write/SKILL.md +108 -0
  29. package/skills/ct-documentor/SKILL.md +231 -0
  30. package/skills/ct-epic-architect/SKILL.md +305 -0
  31. package/skills/ct-epic-architect/references/bug-epic-example.md +172 -0
  32. package/skills/ct-epic-architect/references/commands.md +201 -0
  33. package/skills/ct-epic-architect/references/feature-epic-example.md +210 -0
  34. package/skills/ct-epic-architect/references/migration-epic-example.md +244 -0
  35. package/skills/ct-epic-architect/references/output-format.md +92 -0
  36. package/skills/ct-epic-architect/references/patterns.md +284 -0
  37. package/skills/ct-epic-architect/references/refactor-epic-example.md +412 -0
  38. package/skills/ct-epic-architect/references/research-epic-example.md +226 -0
  39. package/skills/ct-epic-architect/references/shell-escaping.md +86 -0
  40. package/skills/ct-epic-architect/references/skill-aware-execution.md +195 -0
  41. package/skills/ct-grade/SKILL.md +230 -0
  42. package/skills/ct-grade/agents/analysis-reporter.md +203 -0
  43. package/skills/ct-grade/agents/blind-comparator.md +157 -0
  44. package/skills/ct-grade/agents/scenario-runner.md +134 -0
  45. package/skills/ct-grade/eval-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
  46. package/skills/ct-grade/eval-viewer/generate_grade_review.py +1138 -0
  47. package/skills/ct-grade/eval-viewer/generate_grade_viewer.py +544 -0
  48. package/skills/ct-grade/eval-viewer/generate_review.py +283 -0
  49. package/skills/ct-grade/eval-viewer/grade-review.html +1574 -0
  50. package/skills/ct-grade/eval-viewer/viewer.html +219 -0
  51. package/skills/ct-grade/evals/evals.json +94 -0
  52. package/skills/ct-grade/references/ab-test-methodology.md +150 -0
  53. package/skills/ct-grade/references/domains.md +137 -0
  54. package/skills/ct-grade/references/grade-spec.md +236 -0
  55. package/skills/ct-grade/references/scenario-playbook.md +234 -0
  56. package/skills/ct-grade/references/token-tracking.md +120 -0
  57. package/skills/ct-grade/scripts/__pycache__/audit_analyzer.cpython-314.pyc +0 -0
  58. package/skills/ct-grade/scripts/__pycache__/run_ab_test.cpython-314.pyc +0 -0
  59. package/skills/ct-grade/scripts/__pycache__/run_all.cpython-314.pyc +0 -0
  60. package/skills/ct-grade/scripts/__pycache__/token_tracker.cpython-314.pyc +0 -0
  61. package/skills/ct-grade/scripts/audit_analyzer.py +279 -0
  62. package/skills/ct-grade/scripts/generate_report.py +283 -0
  63. package/skills/ct-grade/scripts/run_ab_test.py +504 -0
  64. package/skills/ct-grade/scripts/run_all.py +287 -0
  65. package/skills/ct-grade/scripts/setup_run.py +183 -0
  66. package/skills/ct-grade/scripts/token_tracker.py +630 -0
  67. package/skills/ct-grade-v2-1/SKILL.md +237 -0
  68. package/skills/ct-grade-v2-1/agents/analysis-reporter.md +203 -0
  69. package/skills/ct-grade-v2-1/agents/blind-comparator.md +157 -0
  70. package/skills/ct-grade-v2-1/agents/scenario-runner.md +179 -0
  71. package/skills/ct-grade-v2-1/evals/evals.json +74 -0
  72. package/skills/ct-grade-v2-1/grade-viewer/__pycache__/build_op_stats.cpython-314.pyc +0 -0
  73. package/skills/ct-grade-v2-1/grade-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
  74. package/skills/ct-grade-v2-1/grade-viewer/build_op_stats.py +174 -0
  75. package/skills/ct-grade-v2-1/grade-viewer/eval-analysis.json +41 -0
  76. package/skills/ct-grade-v2-1/grade-viewer/eval-report.md +34 -0
  77. package/skills/ct-grade-v2-1/grade-viewer/generate_grade_review.py +1023 -0
  78. package/skills/ct-grade-v2-1/grade-viewer/generate_grade_viewer.py +548 -0
  79. package/skills/ct-grade-v2-1/grade-viewer/grade-review-eval.html +613 -0
  80. package/skills/ct-grade-v2-1/grade-viewer/grade-review.html +1532 -0
  81. package/skills/ct-grade-v2-1/grade-viewer/viewer.html +620 -0
  82. package/skills/ct-grade-v2-1/manifest-entry.json +31 -0
  83. package/skills/ct-grade-v2-1/references/ab-testing.md +233 -0
  84. package/skills/ct-grade-v2-1/references/domains-ssot.md +156 -0
  85. package/skills/ct-grade-v2-1/references/grade-spec-v2.md +167 -0
  86. package/skills/ct-grade-v2-1/references/playbook-v2.md +393 -0
  87. package/skills/ct-grade-v2-1/references/token-tracking.md +202 -0
  88. package/skills/ct-grade-v2-1/scripts/generate_report.py +419 -0
  89. package/skills/ct-grade-v2-1/scripts/run_ab_test.py +493 -0
  90. package/skills/ct-grade-v2-1/scripts/run_scenario.py +396 -0
  91. package/skills/ct-grade-v2-1/scripts/setup_run.py +207 -0
  92. package/skills/ct-grade-v2-1/scripts/token_tracker.py +175 -0
  93. package/skills/ct-memory/SKILL.md +84 -0
  94. package/skills/ct-orchestrator/INSTALL.md +61 -0
  95. package/skills/ct-orchestrator/README.md +69 -0
  96. package/skills/ct-orchestrator/SKILL.md +380 -0
  97. package/skills/ct-orchestrator/manifest-entry.json +19 -0
  98. package/skills/ct-orchestrator/orchestrator-prompt.txt +17 -0
  99. package/skills/ct-orchestrator/references/SUBAGENT-PROTOCOL-BLOCK.md +66 -0
  100. package/skills/ct-orchestrator/references/autonomous-operation.md +167 -0
  101. package/skills/ct-orchestrator/references/lifecycle-gates.md +98 -0
  102. package/skills/ct-orchestrator/references/orchestrator-compliance.md +271 -0
  103. package/skills/ct-orchestrator/references/orchestrator-handoffs.md +85 -0
  104. package/skills/ct-orchestrator/references/orchestrator-patterns.md +164 -0
  105. package/skills/ct-orchestrator/references/orchestrator-recovery.md +113 -0
  106. package/skills/ct-orchestrator/references/orchestrator-spawning.md +271 -0
  107. package/skills/ct-orchestrator/references/orchestrator-tokens.md +180 -0
  108. package/skills/ct-research-agent/SKILL.md +226 -0
  109. package/skills/ct-skill-creator/.cleo/.context-state.json +13 -0
  110. package/skills/ct-skill-creator/.cleo/logs/cleo.2026-03-07.1.log +24 -0
  111. package/skills/ct-skill-creator/.cleo/tasks.db +0 -0
  112. package/skills/ct-skill-creator/SKILL.md +356 -0
  113. package/skills/ct-skill-creator/agents/analyzer.md +276 -0
  114. package/skills/ct-skill-creator/agents/comparator.md +204 -0
  115. package/skills/ct-skill-creator/agents/grader.md +225 -0
  116. package/skills/ct-skill-creator/assets/eval_review.html +146 -0
  117. package/skills/ct-skill-creator/eval-viewer/__pycache__/generate_review.cpython-314.pyc +0 -0
  118. package/skills/ct-skill-creator/eval-viewer/generate_review.py +471 -0
  119. package/skills/ct-skill-creator/eval-viewer/viewer.html +1325 -0
  120. package/skills/ct-skill-creator/manifest-entry.json +17 -0
  121. package/skills/ct-skill-creator/references/dynamic-context.md +228 -0
  122. package/skills/ct-skill-creator/references/frontmatter.md +83 -0
  123. package/skills/ct-skill-creator/references/invocation-control.md +165 -0
  124. package/skills/ct-skill-creator/references/output-patterns.md +86 -0
  125. package/skills/ct-skill-creator/references/provider-deployment.md +175 -0
  126. package/skills/ct-skill-creator/references/schemas.md +430 -0
  127. package/skills/ct-skill-creator/references/workflows.md +28 -0
  128. package/skills/ct-skill-creator/scripts/__init__.py +1 -0
  129. package/skills/ct-skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  130. package/skills/ct-skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
  131. package/skills/ct-skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
  132. package/skills/ct-skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
  133. package/skills/ct-skill-creator/scripts/__pycache__/init_skill.cpython-314.pyc +0 -0
  134. package/skills/ct-skill-creator/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
  135. package/skills/ct-skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
  136. package/skills/ct-skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
  137. package/skills/ct-skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
  138. package/skills/ct-skill-creator/scripts/aggregate_benchmark.py +401 -0
  139. package/skills/ct-skill-creator/scripts/generate_report.py +326 -0
  140. package/skills/ct-skill-creator/scripts/improve_description.py +247 -0
  141. package/skills/ct-skill-creator/scripts/init_skill.py +306 -0
  142. package/skills/ct-skill-creator/scripts/package_skill.py +110 -0
  143. package/skills/ct-skill-creator/scripts/quick_validate.py +97 -0
  144. package/skills/ct-skill-creator/scripts/run_eval.py +310 -0
  145. package/skills/ct-skill-creator/scripts/run_loop.py +328 -0
  146. package/skills/ct-skill-creator/scripts/utils.py +47 -0
  147. package/skills/ct-skill-validator/SKILL.md +178 -0
  148. package/skills/ct-skill-validator/agents/ecosystem-checker.md +151 -0
  149. package/skills/ct-skill-validator/assets/valid-skill-example.md +13 -0
  150. package/skills/ct-skill-validator/evals/eval_set.json +14 -0
  151. package/skills/ct-skill-validator/evals/evals.json +52 -0
  152. package/skills/ct-skill-validator/manifest-entry.json +20 -0
  153. package/skills/ct-skill-validator/references/cleo-ecosystem-rules.md +163 -0
  154. package/skills/ct-skill-validator/references/validation-rules.md +168 -0
  155. package/skills/ct-skill-validator/scripts/__init__.py +0 -0
  156. package/skills/ct-skill-validator/scripts/__pycache__/audit_body.cpython-314.pyc +0 -0
  157. package/skills/ct-skill-validator/scripts/__pycache__/check_ecosystem.cpython-314.pyc +0 -0
  158. package/skills/ct-skill-validator/scripts/__pycache__/generate_validation_report.cpython-314.pyc +0 -0
  159. package/skills/ct-skill-validator/scripts/__pycache__/validate.cpython-314.pyc +0 -0
  160. package/skills/ct-skill-validator/scripts/audit_body.py +242 -0
  161. package/skills/ct-skill-validator/scripts/check_ecosystem.py +169 -0
  162. package/skills/ct-skill-validator/scripts/check_manifest.py +172 -0
  163. package/skills/ct-skill-validator/scripts/generate_validation_report.py +442 -0
  164. package/skills/ct-skill-validator/scripts/validate.py +422 -0
  165. package/skills/ct-spec-writer/SKILL.md +189 -0
  166. package/skills/ct-stickynote/README.md +14 -0
  167. package/skills/ct-stickynote/SKILL.md +46 -0
  168. package/skills/ct-task-executor/SKILL.md +296 -0
  169. package/skills/ct-validator/SKILL.md +216 -0
  170. package/skills/manifest.json +469 -0
  171. package/skills.json +281 -0
@@ -0,0 +1,175 @@
1
+ # Provider Deployment
2
+
3
+ This reference covers multi-provider skill deployment: where each provider reads skills from, how symlinks enable a single source of truth, and how to add new skills to the CLEO ct-skills package.
4
+
5
+ ## Architecture
6
+
7
+ Skills follow a single-source-of-truth model:
8
+
9
+ ```
10
+ packages/ct-skills/skills/ <-- canonical location
11
+ ct-cleo/SKILL.md
12
+ ct-orchestrator/SKILL.md
13
+ ct-skill-creator/SKILL.md
14
+ ...
15
+ manifest.json <-- CLEO-only metadata
16
+ ```
17
+
18
+ Providers read skills from their own vendor-specific directories. Symlinks point from each provider's skill directory back to the canonical location, so a single SKILL.md serves all providers.
19
+
20
+ **SKILL.md** is the open standard -- every provider reads it. It contains only the 11 standard v2 fields (name, description, etc.) that all providers understand.
21
+
22
+ **manifest.json** is CLEO-only metadata -- tier, tags, capabilities, dispatch routing. Non-CLEO providers ignore it entirely. This is why CLEO-specific fields must never appear in SKILL.md: providers that do not understand them may reject the skill or behave unpredictably.
23
+
24
+ ## Provider Path Table
25
+
26
+ | Provider | Global Skills Path | Project Skills Path | Symlinks |
27
+ |---|---|---|---|
28
+ | claude-code | `$HOME/.claude/skills` | `.claude/skills` | Yes |
29
+ | codex-cli | `$HOME/.agents/skills` | `.agents/skills` | Yes |
30
+ | gemini-cli | `$HOME/.gemini/skills` | `.gemini/skills` | Yes |
31
+ | cursor | `$HOME/.cursor/skills` | `.cursor/skills` | Yes |
32
+ | github-copilot | `$HOME/.copilot/skills` | `.github/skills` | Yes |
33
+ | windsurf | `$HOME/.codeium/windsurf/skills` | `.windsurf/skills` | No |
34
+ | opencode | `$HOME/.config/opencode/skills` | `.opencode/skills` | Yes |
35
+ | kimi-coding | `$HOME/.kimi/skills` | `.kimi/skills` | No |
36
+ | antigravity | `$HOME/.antigravity/skills` | `.agent/skills` | Yes |
37
+
38
+ **Global skills** are available across all projects for that provider. **Project skills** are scoped to the repository they live in.
39
+
40
+ **Symlink support** indicates whether the provider follows symbolic links when reading skill directories. Providers without symlink support (windsurf, kimi-coding) require the skill directory to be copied rather than symlinked.
41
+
42
+ ## What Providers Read
43
+
44
+ ### Standard Providers (non-CLEO)
45
+
46
+ Standard providers read SKILL.md only. They parse the YAML frontmatter for standard v2 fields and load the markdown body when the skill triggers. They have no awareness of manifest.json, dispatch-config.json, or any CLEO infrastructure.
47
+
48
+ Fields they understand: `name`, `description`, `argument-hint`, `disable-model-invocation`, `user-invocable`, `allowed-tools`, `model`, `context`, `agent`, `hooks`, `license`.
49
+
50
+ Fields they do not understand: `version`, `tier`, `core`, `category`, `protocol`, `dependencies`, `sharedResources`, `compatibility`, `tags`, `triggers`, `token_budget`, `capabilities`, `constraints`, `metadata`.
51
+
52
+ ### CLEO-Aware Providers
53
+
54
+ CLEO-aware providers read both SKILL.md and manifest.json. The manifest provides dispatch routing, tier-based loading priorities, token budgets, and capability declarations that enable advanced features like skill chaining and orchestration.
55
+
56
+ ## Deployment Strategies
57
+
58
+ ### CLEO Package Skills
59
+
60
+ Skills in `packages/ct-skills/skills/` are managed by CLEO infrastructure. They are deployed via the CLEO skill system and do not need manual symlink setup. The manifest.json, dispatch-config.json, and provider-skills-map.json coordinate deployment automatically.
61
+
62
+ ### User Global Skills
63
+
64
+ For standalone skills installed directly into a provider's global skill directory:
65
+
66
+ ```bash
67
+ # Claude Code
68
+ cp -r my-skill/ ~/.claude/skills/my-skill/
69
+
70
+ # Gemini CLI
71
+ cp -r my-skill/ ~/.gemini/skills/my-skill/
72
+
73
+ # Codex CLI
74
+ cp -r my-skill/ ~/.agents/skills/my-skill/
75
+ ```
76
+
77
+ ### Multi-Provider via Symlinks
78
+
79
+ For skills that should be available across multiple providers, create the skill in one location and symlink to each provider's directory:
80
+
81
+ ```bash
82
+ # Canonical location
83
+ mkdir -p ~/shared-skills/my-skill
84
+ # ... create SKILL.md and resources in ~/shared-skills/my-skill/
85
+
86
+ # Symlink to each provider
87
+ ln -s ~/shared-skills/my-skill ~/.claude/skills/my-skill
88
+ ln -s ~/shared-skills/my-skill ~/.gemini/skills/my-skill
89
+ ln -s ~/shared-skills/my-skill ~/.agents/skills/my-skill
90
+ ```
91
+
92
+ For providers that do not support symlinks (windsurf, kimi-coding), copy the directory instead:
93
+
94
+ ```bash
95
+ cp -r ~/shared-skills/my-skill ~/.codeium/windsurf/skills/my-skill
96
+ cp -r ~/shared-skills/my-skill ~/.kimi/skills/my-skill
97
+ ```
98
+
99
+ ### Project-Level Skills
100
+
101
+ Place the skill directory inside the project's provider-specific skill path:
102
+
103
+ ```bash
104
+ # Claude Code project skill
105
+ cp -r my-skill/ .claude/skills/my-skill/
106
+
107
+ # Cursor project skill
108
+ cp -r my-skill/ .cursor/skills/my-skill/
109
+ ```
110
+
111
+ Project skills are committed to the repository and shared with all contributors.
112
+
113
+ ## Adding a New Skill to CLEO ct-skills
114
+
115
+ To add a new skill to the CLEO package (`packages/ct-skills/`):
116
+
117
+ 1. **Create the skill directory**:
118
+ ```bash
119
+ mkdir -p packages/ct-skills/skills/my-new-skill
120
+ ```
121
+
122
+ 2. **Write SKILL.md with standard fields only**:
123
+ ```yaml
124
+ ---
125
+ name: my-new-skill
126
+ description: "Clear description of what the skill does and when to use it."
127
+ license: MIT
128
+ ---
129
+ # My New Skill
130
+
131
+ Instructions for using the skill...
132
+ ```
133
+ Include only v2 standard fields. No `version`, `tier`, `category`, or other CLEO-only fields.
134
+
135
+ 3. **Add entry to manifest.json** (`packages/ct-skills/skills/manifest.json`):
136
+ ```json
137
+ {
138
+ "name": "my-new-skill",
139
+ "version": "1.0.0",
140
+ "description": "Same description as SKILL.md",
141
+ "path": "skills/my-new-skill",
142
+ "tags": ["relevant", "tags"],
143
+ "status": "active",
144
+ "tier": 2,
145
+ "token_budget": 6000,
146
+ "references": [],
147
+ "capabilities": {
148
+ "inputs": [],
149
+ "outputs": [],
150
+ "dependencies": [],
151
+ "dispatch_triggers": ["trigger phrase"],
152
+ "compatible_subagent_types": ["general-purpose"],
153
+ "chains_to": [],
154
+ "dispatch_keywords": {
155
+ "primary": ["keyword1", "keyword2"],
156
+ "secondary": ["keyword3", "keyword4"]
157
+ }
158
+ },
159
+ "constraints": {
160
+ "max_context_tokens": 60000,
161
+ "requires_session": false,
162
+ "requires_epic": false
163
+ }
164
+ }
165
+ ```
166
+
167
+ 4. **Add entry to dispatch-config.json** (`packages/ct-skills/dispatch-config.json`):
168
+ Add the skill to relevant `by_task_type`, `by_keyword`, and/or `by_protocol` mappings if it should participate in dispatch routing.
169
+
170
+ 5. **Update totalSkills** in manifest.json `_meta.totalSkills` to reflect the new count.
171
+
172
+ 6. **Validate**: Run the skill validator to confirm the new skill passes all checks:
173
+ ```bash
174
+ python3 packages/ct-skills/skills/ct-skill-creator/scripts/quick_validate.py packages/ct-skills/skills/my-new-skill
175
+ ```
@@ -0,0 +1,430 @@
1
+ # JSON Schemas
2
+
3
+ This document defines the JSON schemas used by skill-creator.
4
+
5
+ ---
6
+
7
+ ## evals.json
8
+
9
+ Defines the evals for a skill. Located at `evals/evals.json` within the skill directory.
10
+
11
+ ```json
12
+ {
13
+ "skill_name": "example-skill",
14
+ "evals": [
15
+ {
16
+ "id": 1,
17
+ "prompt": "User's example prompt",
18
+ "expected_output": "Description of expected result",
19
+ "files": ["evals/files/sample1.pdf"],
20
+ "expectations": [
21
+ "The output includes X",
22
+ "The skill used script Y"
23
+ ]
24
+ }
25
+ ]
26
+ }
27
+ ```
28
+
29
+ **Fields:**
30
+ - `skill_name`: Name matching the skill's frontmatter
31
+ - `evals[].id`: Unique integer identifier
32
+ - `evals[].prompt`: The task to execute
33
+ - `evals[].expected_output`: Human-readable description of success
34
+ - `evals[].files`: Optional list of input file paths (relative to skill root)
35
+ - `evals[].expectations`: List of verifiable statements
36
+
37
+ ---
38
+
39
+ ## history.json
40
+
41
+ Tracks version progression in Improve mode. Located at workspace root.
42
+
43
+ ```json
44
+ {
45
+ "started_at": "2026-01-15T10:30:00Z",
46
+ "skill_name": "pdf",
47
+ "current_best": "v2",
48
+ "iterations": [
49
+ {
50
+ "version": "v0",
51
+ "parent": null,
52
+ "expectation_pass_rate": 0.65,
53
+ "grading_result": "baseline",
54
+ "is_current_best": false
55
+ },
56
+ {
57
+ "version": "v1",
58
+ "parent": "v0",
59
+ "expectation_pass_rate": 0.75,
60
+ "grading_result": "won",
61
+ "is_current_best": false
62
+ },
63
+ {
64
+ "version": "v2",
65
+ "parent": "v1",
66
+ "expectation_pass_rate": 0.85,
67
+ "grading_result": "won",
68
+ "is_current_best": true
69
+ }
70
+ ]
71
+ }
72
+ ```
73
+
74
+ **Fields:**
75
+ - `started_at`: ISO timestamp of when improvement started
76
+ - `skill_name`: Name of the skill being improved
77
+ - `current_best`: Version identifier of the best performer
78
+ - `iterations[].version`: Version identifier (v0, v1, ...)
79
+ - `iterations[].parent`: Parent version this was derived from
80
+ - `iterations[].expectation_pass_rate`: Pass rate from grading
81
+ - `iterations[].grading_result`: "baseline", "won", "lost", or "tie"
82
+ - `iterations[].is_current_best`: Whether this is the current best version
83
+
84
+ ---
85
+
86
+ ## grading.json
87
+
88
+ Output from the grader agent. Located at `<run-dir>/grading.json`.
89
+
90
+ ```json
91
+ {
92
+ "expectations": [
93
+ {
94
+ "text": "The output includes the name 'John Smith'",
95
+ "passed": true,
96
+ "evidence": "Found in transcript Step 3: 'Extracted names: John Smith, Sarah Johnson'"
97
+ },
98
+ {
99
+ "text": "The spreadsheet has a SUM formula in cell B10",
100
+ "passed": false,
101
+ "evidence": "No spreadsheet was created. The output was a text file."
102
+ }
103
+ ],
104
+ "summary": {
105
+ "passed": 2,
106
+ "failed": 1,
107
+ "total": 3,
108
+ "pass_rate": 0.67
109
+ },
110
+ "execution_metrics": {
111
+ "tool_calls": {
112
+ "Read": 5,
113
+ "Write": 2,
114
+ "Bash": 8
115
+ },
116
+ "total_tool_calls": 15,
117
+ "total_steps": 6,
118
+ "errors_encountered": 0,
119
+ "output_chars": 12450,
120
+ "transcript_chars": 3200
121
+ },
122
+ "timing": {
123
+ "executor_duration_seconds": 165.0,
124
+ "grader_duration_seconds": 26.0,
125
+ "total_duration_seconds": 191.0
126
+ },
127
+ "claims": [
128
+ {
129
+ "claim": "The form has 12 fillable fields",
130
+ "type": "factual",
131
+ "verified": true,
132
+ "evidence": "Counted 12 fields in field_info.json"
133
+ }
134
+ ],
135
+ "user_notes_summary": {
136
+ "uncertainties": ["Used 2023 data, may be stale"],
137
+ "needs_review": [],
138
+ "workarounds": ["Fell back to text overlay for non-fillable fields"]
139
+ },
140
+ "eval_feedback": {
141
+ "suggestions": [
142
+ {
143
+ "assertion": "The output includes the name 'John Smith'",
144
+ "reason": "A hallucinated document that mentions the name would also pass"
145
+ }
146
+ ],
147
+ "overall": "Assertions check presence but not correctness."
148
+ }
149
+ }
150
+ ```
151
+
152
+ **Fields:**
153
+ - `expectations[]`: Graded expectations with evidence
154
+ - `summary`: Aggregate pass/fail counts
155
+ - `execution_metrics`: Tool usage and output size (from executor's metrics.json)
156
+ - `timing`: Wall clock timing (from timing.json)
157
+ - `claims`: Extracted and verified claims from the output
158
+ - `user_notes_summary`: Issues flagged by the executor
159
+ - `eval_feedback`: (optional) Improvement suggestions for the evals, only present when the grader identifies issues worth raising
160
+
161
+ ---
162
+
163
+ ## metrics.json
164
+
165
+ Output from the executor agent. Located at `<run-dir>/outputs/metrics.json`.
166
+
167
+ ```json
168
+ {
169
+ "tool_calls": {
170
+ "Read": 5,
171
+ "Write": 2,
172
+ "Bash": 8,
173
+ "Edit": 1,
174
+ "Glob": 2,
175
+ "Grep": 0
176
+ },
177
+ "total_tool_calls": 18,
178
+ "total_steps": 6,
179
+ "files_created": ["filled_form.pdf", "field_values.json"],
180
+ "errors_encountered": 0,
181
+ "output_chars": 12450,
182
+ "transcript_chars": 3200
183
+ }
184
+ ```
185
+
186
+ **Fields:**
187
+ - `tool_calls`: Count per tool type
188
+ - `total_tool_calls`: Sum of all tool calls
189
+ - `total_steps`: Number of major execution steps
190
+ - `files_created`: List of output files created
191
+ - `errors_encountered`: Number of errors during execution
192
+ - `output_chars`: Total character count of output files
193
+ - `transcript_chars`: Character count of transcript
194
+
195
+ ---
196
+
197
+ ## timing.json
198
+
199
+ Wall clock timing for a run. Located at `<run-dir>/timing.json`.
200
+
201
+ **How to capture:** When a subagent task completes, the task notification includes `total_tokens` and `duration_ms`. Save these immediately — they are not persisted anywhere else and cannot be recovered after the fact.
202
+
203
+ ```json
204
+ {
205
+ "total_tokens": 84852,
206
+ "duration_ms": 23332,
207
+ "total_duration_seconds": 23.3,
208
+ "executor_start": "2026-01-15T10:30:00Z",
209
+ "executor_end": "2026-01-15T10:32:45Z",
210
+ "executor_duration_seconds": 165.0,
211
+ "grader_start": "2026-01-15T10:32:46Z",
212
+ "grader_end": "2026-01-15T10:33:12Z",
213
+ "grader_duration_seconds": 26.0
214
+ }
215
+ ```
216
+
217
+ ---
218
+
219
+ ## benchmark.json
220
+
221
+ Output from Benchmark mode. Located at `benchmarks/<timestamp>/benchmark.json`.
222
+
223
+ ```json
224
+ {
225
+ "metadata": {
226
+ "skill_name": "pdf",
227
+ "skill_path": "/path/to/pdf",
228
+ "executor_model": "claude-sonnet-4-20250514",
229
+ "analyzer_model": "most-capable-model",
230
+ "timestamp": "2026-01-15T10:30:00Z",
231
+ "evals_run": [1, 2, 3],
232
+ "runs_per_configuration": 3
233
+ },
234
+
235
+ "runs": [
236
+ {
237
+ "eval_id": 1,
238
+ "eval_name": "Ocean",
239
+ "configuration": "with_skill",
240
+ "run_number": 1,
241
+ "result": {
242
+ "pass_rate": 0.85,
243
+ "passed": 6,
244
+ "failed": 1,
245
+ "total": 7,
246
+ "time_seconds": 42.5,
247
+ "tokens": 3800,
248
+ "tool_calls": 18,
249
+ "errors": 0
250
+ },
251
+ "expectations": [
252
+ {"text": "...", "passed": true, "evidence": "..."}
253
+ ],
254
+ "notes": [
255
+ "Used 2023 data, may be stale",
256
+ "Fell back to text overlay for non-fillable fields"
257
+ ]
258
+ }
259
+ ],
260
+
261
+ "run_summary": {
262
+ "with_skill": {
263
+ "pass_rate": {"mean": 0.85, "stddev": 0.05, "min": 0.80, "max": 0.90},
264
+ "time_seconds": {"mean": 45.0, "stddev": 12.0, "min": 32.0, "max": 58.0},
265
+ "tokens": {"mean": 3800, "stddev": 400, "min": 3200, "max": 4100}
266
+ },
267
+ "without_skill": {
268
+ "pass_rate": {"mean": 0.35, "stddev": 0.08, "min": 0.28, "max": 0.45},
269
+ "time_seconds": {"mean": 32.0, "stddev": 8.0, "min": 24.0, "max": 42.0},
270
+ "tokens": {"mean": 2100, "stddev": 300, "min": 1800, "max": 2500}
271
+ },
272
+ "delta": {
273
+ "pass_rate": "+0.50",
274
+ "time_seconds": "+13.0",
275
+ "tokens": "+1700"
276
+ }
277
+ },
278
+
279
+ "notes": [
280
+ "Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value",
281
+ "Eval 3 shows high variance (50% ± 40%) - may be flaky or model-dependent",
282
+ "Without-skill runs consistently fail on table extraction expectations",
283
+ "Skill adds 13s average execution time but improves pass rate by 50%"
284
+ ]
285
+ }
286
+ ```
287
+
288
+ **Fields:**
289
+ - `metadata`: Information about the benchmark run
290
+ - `skill_name`: Name of the skill
291
+ - `timestamp`: When the benchmark was run
292
+ - `evals_run`: List of eval names or IDs
293
+ - `runs_per_configuration`: Number of runs per config (e.g. 3)
294
+ - `runs[]`: Individual run results
295
+ - `eval_id`: Numeric eval identifier
296
+ - `eval_name`: Human-readable eval name (used as section header in the viewer)
297
+ - `configuration`: Must be `"with_skill"` or `"without_skill"` (the viewer uses this exact string for grouping and color coding)
298
+ - `run_number`: Integer run number (1, 2, 3...)
299
+ - `result`: Nested object with `pass_rate`, `passed`, `total`, `time_seconds`, `tokens`, `errors`
300
+ - `run_summary`: Statistical aggregates per configuration
301
+ - `with_skill` / `without_skill`: Each contains `pass_rate`, `time_seconds`, `tokens` objects with `mean` and `stddev` fields
302
+ - `delta`: Difference strings like `"+0.50"`, `"+13.0"`, `"+1700"`
303
+ - `notes`: Freeform observations from the analyzer
304
+
305
+ **Important:** The viewer reads these field names exactly. Using `config` instead of `configuration`, or putting `pass_rate` at the top level of a run instead of nested under `result`, will cause the viewer to show empty/zero values. Always reference this schema when generating benchmark.json manually.
306
+
307
+ ---
308
+
309
+ ## comparison.json
310
+
311
+ Output from blind comparator. Located at `<grading-dir>/comparison-N.json`.
312
+
313
+ ```json
314
+ {
315
+ "winner": "A",
316
+ "reasoning": "Output A provides a complete solution with proper formatting and all required fields. Output B is missing the date field and has formatting inconsistencies.",
317
+ "rubric": {
318
+ "A": {
319
+ "content": {
320
+ "correctness": 5,
321
+ "completeness": 5,
322
+ "accuracy": 4
323
+ },
324
+ "structure": {
325
+ "organization": 4,
326
+ "formatting": 5,
327
+ "usability": 4
328
+ },
329
+ "content_score": 4.7,
330
+ "structure_score": 4.3,
331
+ "overall_score": 9.0
332
+ },
333
+ "B": {
334
+ "content": {
335
+ "correctness": 3,
336
+ "completeness": 2,
337
+ "accuracy": 3
338
+ },
339
+ "structure": {
340
+ "organization": 3,
341
+ "formatting": 2,
342
+ "usability": 3
343
+ },
344
+ "content_score": 2.7,
345
+ "structure_score": 2.7,
346
+ "overall_score": 5.4
347
+ }
348
+ },
349
+ "output_quality": {
350
+ "A": {
351
+ "score": 9,
352
+ "strengths": ["Complete solution", "Well-formatted", "All fields present"],
353
+ "weaknesses": ["Minor style inconsistency in header"]
354
+ },
355
+ "B": {
356
+ "score": 5,
357
+ "strengths": ["Readable output", "Correct basic structure"],
358
+ "weaknesses": ["Missing date field", "Formatting inconsistencies", "Partial data extraction"]
359
+ }
360
+ },
361
+ "expectation_results": {
362
+ "A": {
363
+ "passed": 4,
364
+ "total": 5,
365
+ "pass_rate": 0.80,
366
+ "details": [
367
+ {"text": "Output includes name", "passed": true}
368
+ ]
369
+ },
370
+ "B": {
371
+ "passed": 3,
372
+ "total": 5,
373
+ "pass_rate": 0.60,
374
+ "details": [
375
+ {"text": "Output includes name", "passed": true}
376
+ ]
377
+ }
378
+ }
379
+ }
380
+ ```
381
+
382
+ ---
383
+
384
+ ## analysis.json
385
+
386
+ Output from post-hoc analyzer. Located at `<grading-dir>/analysis.json`.
387
+
388
+ ```json
389
+ {
390
+ "comparison_summary": {
391
+ "winner": "A",
392
+ "winner_skill": "path/to/winner/skill",
393
+ "loser_skill": "path/to/loser/skill",
394
+ "comparator_reasoning": "Brief summary of why comparator chose winner"
395
+ },
396
+ "winner_strengths": [
397
+ "Clear step-by-step instructions for handling multi-page documents",
398
+ "Included validation script that caught formatting errors"
399
+ ],
400
+ "loser_weaknesses": [
401
+ "Vague instruction 'process the document appropriately' led to inconsistent behavior",
402
+ "No script for validation, agent had to improvise"
403
+ ],
404
+ "instruction_following": {
405
+ "winner": {
406
+ "score": 9,
407
+ "issues": ["Minor: skipped optional logging step"]
408
+ },
409
+ "loser": {
410
+ "score": 6,
411
+ "issues": [
412
+ "Did not use the skill's formatting template",
413
+ "Invented own approach instead of following step 3"
414
+ ]
415
+ }
416
+ },
417
+ "improvement_suggestions": [
418
+ {
419
+ "priority": "high",
420
+ "category": "instructions",
421
+ "suggestion": "Replace 'process the document appropriately' with explicit steps",
422
+ "expected_impact": "Would eliminate ambiguity that caused inconsistent behavior"
423
+ }
424
+ ],
425
+ "transcript_insights": {
426
+ "winner_execution_pattern": "Read skill -> Followed 5-step process -> Used validation script",
427
+ "loser_execution_pattern": "Read skill -> Unclear on approach -> Tried 3 different methods"
428
+ }
429
+ }
430
+ ```
@@ -0,0 +1,28 @@
1
+ # Workflow Patterns
2
+
3
+ ## Sequential Workflows
4
+
5
+ For complex tasks, break operations into clear, sequential steps. It is often helpful to give Claude an overview of the process towards the beginning of SKILL.md:
6
+
7
+ ```markdown
8
+ Filling a PDF form involves these steps:
9
+
10
+ 1. Analyze the form (run analyze_form.py)
11
+ 2. Create field mapping (edit fields.json)
12
+ 3. Validate mapping (run validate_fields.py)
13
+ 4. Fill the form (run fill_form.py)
14
+ 5. Verify output (run verify_output.py)
15
+ ```
16
+
17
+ ## Conditional Workflows
18
+
19
+ For tasks with branching logic, guide Claude through decision points:
20
+
21
+ ```markdown
22
+ 1. Determine the modification type:
23
+ **Creating new content?** → Follow "Creation workflow" below
24
+ **Editing existing content?** → Follow "Editing workflow" below
25
+
26
+ 2. Creation workflow: [steps]
27
+ 3. Editing workflow: [steps]
28
+ ```