jumpstart-mode 1.1.12 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/.github/agents/jumpstart-adversary.agent.md +2 -1
  2. package/.github/agents/jumpstart-architect.agent.md +5 -6
  3. package/.github/agents/jumpstart-challenger.agent.md +2 -1
  4. package/.github/agents/jumpstart-devops.agent.md +2 -2
  5. package/.github/agents/jumpstart-diagram-verifier.agent.md +2 -1
  6. package/.github/agents/jumpstart-maintenance.agent.md +1 -0
  7. package/.github/agents/jumpstart-performance.agent.md +1 -0
  8. package/.github/agents/jumpstart-pm.agent.md +1 -1
  9. package/.github/agents/jumpstart-refactor.agent.md +1 -0
  10. package/.github/agents/jumpstart-requirements-extractor.agent.md +1 -0
  11. package/.github/agents/jumpstart-researcher.agent.md +1 -0
  12. package/.github/agents/jumpstart-retrospective.agent.md +1 -0
  13. package/.github/agents/jumpstart-reviewer.agent.md +2 -0
  14. package/.github/agents/jumpstart-scout.agent.md +1 -1
  15. package/.github/agents/jumpstart-scrum-master.agent.md +1 -0
  16. package/.github/agents/jumpstart-security.agent.md +2 -1
  17. package/.github/agents/jumpstart-tech-writer.agent.md +1 -0
  18. package/.github/workflows/quality.yml +19 -2
  19. package/.jumpstart/agents/analyst.md +38 -0
  20. package/.jumpstart/agents/architect.md +38 -0
  21. package/.jumpstart/agents/challenger.md +38 -0
  22. package/.jumpstart/agents/developer.md +41 -0
  23. package/.jumpstart/agents/pm.md +38 -0
  24. package/.jumpstart/agents/scout.md +33 -0
  25. package/.jumpstart/agents/ux-designer.md +4 -0
  26. package/.jumpstart/config.yaml +24 -0
  27. package/.jumpstart/schemas/timeline.schema.json +1 -0
  28. package/.jumpstart/skills/skill-creator/SKILL.md +485 -357
  29. package/.jumpstart/skills/skill-creator/agents/analyzer.md +274 -0
  30. package/.jumpstart/skills/skill-creator/agents/comparator.md +202 -0
  31. package/.jumpstart/skills/skill-creator/agents/grader.md +223 -0
  32. package/.jumpstart/skills/skill-creator/assets/eval_review.html +146 -0
  33. package/.jumpstart/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  34. package/.jumpstart/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  35. package/.jumpstart/skills/skill-creator/references/schemas.md +430 -0
  36. package/.jumpstart/skills/skill-creator/scripts/__init__.py +0 -0
  37. package/.jumpstart/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  38. package/.jumpstart/skills/skill-creator/scripts/generate_report.py +326 -0
  39. package/.jumpstart/skills/skill-creator/scripts/improve_description.py +247 -0
  40. package/.jumpstart/skills/skill-creator/scripts/package_skill.py +136 -110
  41. package/.jumpstart/skills/skill-creator/scripts/run_eval.py +310 -0
  42. package/.jumpstart/skills/skill-creator/scripts/run_loop.py +328 -0
  43. package/.jumpstart/skills/skill-creator/scripts/utils.py +47 -0
  44. package/.jumpstart/state/timeline.json +659 -0
  45. package/.jumpstart/usage-log.json +74 -3
  46. package/README.md +62 -1
  47. package/bin/cli.js +3217 -1
  48. package/bin/headless-runner.js +62 -2
  49. package/bin/lib/agent-checkpoint.js +168 -0
  50. package/bin/lib/ai-evaluation.js +104 -0
  51. package/bin/lib/ai-intake.js +152 -0
  52. package/bin/lib/ambiguity-heatmap.js +152 -0
  53. package/bin/lib/artifact-comparison.js +104 -0
  54. package/bin/lib/ast-edit-engine.js +157 -0
  55. package/bin/lib/backlog-sync.js +338 -0
  56. package/bin/lib/bcdr-planning.js +158 -0
  57. package/bin/lib/bidirectional-trace.js +199 -0
  58. package/bin/lib/branch-workflow.js +266 -0
  59. package/bin/lib/cab-output.js +119 -0
  60. package/bin/lib/chat-integration.js +122 -0
  61. package/bin/lib/ci-cd-integration.js +208 -0
  62. package/bin/lib/codebase-retrieval.js +125 -0
  63. package/bin/lib/collaboration.js +168 -0
  64. package/bin/lib/compliance-packs.js +213 -0
  65. package/bin/lib/context-chunker.js +128 -0
  66. package/bin/lib/context-onboarding.js +122 -0
  67. package/bin/lib/contract-first.js +124 -0
  68. package/bin/lib/cost-router.js +148 -0
  69. package/bin/lib/credential-boundary.js +155 -0
  70. package/bin/lib/data-classification.js +180 -0
  71. package/bin/lib/data-contracts.js +129 -0
  72. package/bin/lib/db-evolution.js +158 -0
  73. package/bin/lib/decision-conflicts.js +299 -0
  74. package/bin/lib/delivery-confidence.js +361 -0
  75. package/bin/lib/dependency-upgrade.js +153 -0
  76. package/bin/lib/design-system.js +133 -0
  77. package/bin/lib/deterministic-artifacts.js +151 -0
  78. package/bin/lib/diagram-studio.js +115 -0
  79. package/bin/lib/domain-ontology.js +140 -0
  80. package/bin/lib/ea-review-packet.js +151 -0
  81. package/bin/lib/enterprise-search.js +123 -0
  82. package/bin/lib/enterprise-templates.js +140 -0
  83. package/bin/lib/environment-promotion.js +220 -0
  84. package/bin/lib/estimation-studio.js +130 -0
  85. package/bin/lib/event-modeling.js +133 -0
  86. package/bin/lib/evidence-collector.js +179 -0
  87. package/bin/lib/finops-planner.js +182 -0
  88. package/bin/lib/fitness-functions.js +279 -0
  89. package/bin/lib/focus.js +448 -0
  90. package/bin/lib/governance-dashboard.js +165 -0
  91. package/bin/lib/guided-handoff.js +120 -0
  92. package/bin/lib/impact-analysis.js +190 -0
  93. package/bin/lib/incident-feedback.js +157 -0
  94. package/bin/lib/integrate.js +1 -1
  95. package/bin/lib/knowledge-graph.js +122 -0
  96. package/bin/lib/legacy-modernizer.js +160 -0
  97. package/bin/lib/migration-planner.js +144 -0
  98. package/bin/lib/model-governance.js +185 -0
  99. package/bin/lib/model-router.js +144 -0
  100. package/bin/lib/multi-repo.js +272 -0
  101. package/bin/lib/next-phase.js +53 -8
  102. package/bin/lib/ops-ownership.js +152 -0
  103. package/bin/lib/parallel-agents.js +257 -0
  104. package/bin/lib/pattern-library.js +115 -0
  105. package/bin/lib/persona-packs.js +99 -0
  106. package/bin/lib/plan-executor.js +366 -0
  107. package/bin/lib/platform-engineering.js +119 -0
  108. package/bin/lib/playback-summaries.js +126 -0
  109. package/bin/lib/policy-engine.js +240 -0
  110. package/bin/lib/portfolio-reporting.js +357 -0
  111. package/bin/lib/pr-package.js +197 -0
  112. package/bin/lib/project-memory.js +235 -0
  113. package/bin/lib/prompt-governance.js +130 -0
  114. package/bin/lib/promptless-mode.js +128 -0
  115. package/bin/lib/quality-graph.js +193 -0
  116. package/bin/lib/raci-matrix.js +188 -0
  117. package/bin/lib/refactor-planner.js +167 -0
  118. package/bin/lib/reference-architectures.js +304 -0
  119. package/bin/lib/release-readiness.js +171 -0
  120. package/bin/lib/repo-graph.js +262 -0
  121. package/bin/lib/requirements-baseline.js +358 -0
  122. package/bin/lib/risk-register.js +211 -0
  123. package/bin/lib/role-approval.js +249 -0
  124. package/bin/lib/role-views.js +142 -0
  125. package/bin/lib/root-cause-analysis.js +132 -0
  126. package/bin/lib/runtime-debugger.js +154 -0
  127. package/bin/lib/safe-rename.js +135 -0
  128. package/bin/lib/semantic-diff.js +335 -0
  129. package/bin/lib/sla-slo.js +210 -0
  130. package/bin/lib/spec-comments.js +147 -0
  131. package/bin/lib/spec-maturity.js +287 -0
  132. package/bin/lib/sre-integration.js +154 -0
  133. package/bin/lib/structured-elicitation.js +174 -0
  134. package/bin/lib/telemetry-feedback.js +118 -0
  135. package/bin/lib/test-generator.js +146 -0
  136. package/bin/lib/timeline.js +2 -1
  137. package/bin/lib/tool-bridge.js +107 -0
  138. package/bin/lib/tool-guardrails.js +139 -0
  139. package/bin/lib/tool-schemas.js +172 -3
  140. package/bin/lib/transcript-ingestion.js +150 -0
  141. package/bin/lib/vendor-risk.js +173 -0
  142. package/bin/lib/waiver-workflow.js +174 -0
  143. package/bin/lib/web-dashboard.js +126 -0
  144. package/bin/lib/workshop-mode.js +165 -0
  145. package/bin/lib/workstream-ownership.js +104 -0
  146. package/package.json +1 -1
@@ -1,110 +1,136 @@
1
- #!/usr/bin/env python3
2
- """
3
- Skill Packager - Creates a distributable .skill file of a skill folder
4
-
5
- Usage:
6
- python utils/package_skill.py <path/to/skill-folder> [output-directory]
7
-
8
- Example:
9
- python utils/package_skill.py skills/public/my-skill
10
- python utils/package_skill.py skills/public/my-skill ./dist
11
- """
12
-
13
- import sys
14
- import zipfile
15
- from pathlib import Path
16
- from quick_validate import validate_skill
17
-
18
-
19
- def package_skill(skill_path, output_dir=None):
20
- """
21
- Package a skill folder into a .skill file.
22
-
23
- Args:
24
- skill_path: Path to the skill folder
25
- output_dir: Optional output directory for the .skill file (defaults to current directory)
26
-
27
- Returns:
28
- Path to the created .skill file, or None if error
29
- """
30
- skill_path = Path(skill_path).resolve()
31
-
32
- # Validate skill folder exists
33
- if not skill_path.exists():
34
- print(f"❌ Error: Skill folder not found: {skill_path}")
35
- return None
36
-
37
- if not skill_path.is_dir():
38
- print(f"❌ Error: Path is not a directory: {skill_path}")
39
- return None
40
-
41
- # Validate SKILL.md exists
42
- skill_md = skill_path / "SKILL.md"
43
- if not skill_md.exists():
44
- print(f"❌ Error: SKILL.md not found in {skill_path}")
45
- return None
46
-
47
- # Run validation before packaging
48
- print("🔍 Validating skill...")
49
- valid, message = validate_skill(skill_path)
50
- if not valid:
51
- print(f"❌ Validation failed: {message}")
52
- print(" Please fix the validation errors before packaging.")
53
- return None
54
- print(f"✅ {message}\n")
55
-
56
- # Determine output location
57
- skill_name = skill_path.name
58
- if output_dir:
59
- output_path = Path(output_dir).resolve()
60
- output_path.mkdir(parents=True, exist_ok=True)
61
- else:
62
- output_path = Path.cwd()
63
-
64
- skill_filename = output_path / f"{skill_name}.skill"
65
-
66
- # Create the .skill file (zip format)
67
- try:
68
- with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
69
- # Walk through the skill directory
70
- for file_path in skill_path.rglob('*'):
71
- if file_path.is_file():
72
- # Calculate the relative path within the zip
73
- arcname = file_path.relative_to(skill_path.parent)
74
- zipf.write(file_path, arcname)
75
- print(f" Added: {arcname}")
76
-
77
- print(f"\nSuccessfully packaged skill to: {skill_filename}")
78
- return skill_filename
79
-
80
- except Exception as e:
81
- print(f"❌ Error creating .skill file: {e}")
82
- return None
83
-
84
-
85
- def main():
86
- if len(sys.argv) < 2:
87
- print("Usage: python utils/package_skill.py <path/to/skill-folder> [output-directory]")
88
- print("\nExample:")
89
- print(" python utils/package_skill.py skills/public/my-skill")
90
- print(" python utils/package_skill.py skills/public/my-skill ./dist")
91
- sys.exit(1)
92
-
93
- skill_path = sys.argv[1]
94
- output_dir = sys.argv[2] if len(sys.argv) > 2 else None
95
-
96
- print(f"📦 Packaging skill: {skill_path}")
97
- if output_dir:
98
- print(f" Output directory: {output_dir}")
99
- print()
100
-
101
- result = package_skill(skill_path, output_dir)
102
-
103
- if result:
104
- sys.exit(0)
105
- else:
106
- sys.exit(1)
107
-
108
-
109
- if __name__ == "__main__":
110
- main()
1
+ #!/usr/bin/env python3
2
+ """
3
+ Skill Packager - Creates a distributable .skill file of a skill folder
4
+
5
+ Usage:
6
+ python utils/package_skill.py <path/to/skill-folder> [output-directory]
7
+
8
+ Example:
9
+ python utils/package_skill.py skills/public/my-skill
10
+ python utils/package_skill.py skills/public/my-skill ./dist
11
+ """
12
+
13
+ import fnmatch
14
+ import sys
15
+ import zipfile
16
+ from pathlib import Path
17
+ from scripts.quick_validate import validate_skill
18
+
19
+ # Patterns to exclude when packaging skills.
20
+ EXCLUDE_DIRS = {"__pycache__", "node_modules"}
21
+ EXCLUDE_GLOBS = {"*.pyc"}
22
+ EXCLUDE_FILES = {".DS_Store"}
23
+ # Directories excluded only at the skill root (not when nested deeper).
24
+ ROOT_EXCLUDE_DIRS = {"evals"}
25
+
26
+
27
+ def should_exclude(rel_path: Path) -> bool:
28
+ """Check if a path should be excluded from packaging."""
29
+ parts = rel_path.parts
30
+ if any(part in EXCLUDE_DIRS for part in parts):
31
+ return True
32
+ # rel_path is relative to skill_path.parent, so parts[0] is the skill
33
+ # folder name and parts[1] (if present) is the first subdir.
34
+ if len(parts) > 1 and parts[1] in ROOT_EXCLUDE_DIRS:
35
+ return True
36
+ name = rel_path.name
37
+ if name in EXCLUDE_FILES:
38
+ return True
39
+ return any(fnmatch.fnmatch(name, pat) for pat in EXCLUDE_GLOBS)
40
+
41
+
42
+ def package_skill(skill_path, output_dir=None):
43
+ """
44
+ Package a skill folder into a .skill file.
45
+
46
+ Args:
47
+ skill_path: Path to the skill folder
48
+ output_dir: Optional output directory for the .skill file (defaults to current directory)
49
+
50
+ Returns:
51
+ Path to the created .skill file, or None if error
52
+ """
53
+ skill_path = Path(skill_path).resolve()
54
+
55
+ # Validate skill folder exists
56
+ if not skill_path.exists():
57
+ print(f"❌ Error: Skill folder not found: {skill_path}")
58
+ return None
59
+
60
+ if not skill_path.is_dir():
61
+ print(f"❌ Error: Path is not a directory: {skill_path}")
62
+ return None
63
+
64
+ # Validate SKILL.md exists
65
+ skill_md = skill_path / "SKILL.md"
66
+ if not skill_md.exists():
67
+ print(f"❌ Error: SKILL.md not found in {skill_path}")
68
+ return None
69
+
70
+ # Run validation before packaging
71
+ print("🔍 Validating skill...")
72
+ valid, message = validate_skill(skill_path)
73
+ if not valid:
74
+ print(f"❌ Validation failed: {message}")
75
+ print(" Please fix the validation errors before packaging.")
76
+ return None
77
+ print(f"✅ {message}\n")
78
+
79
+ # Determine output location
80
+ skill_name = skill_path.name
81
+ if output_dir:
82
+ output_path = Path(output_dir).resolve()
83
+ output_path.mkdir(parents=True, exist_ok=True)
84
+ else:
85
+ output_path = Path.cwd()
86
+
87
+ skill_filename = output_path / f"{skill_name}.skill"
88
+
89
+ # Create the .skill file (zip format)
90
+ try:
91
+ with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
92
+ # Walk through the skill directory, excluding build artifacts
93
+ for file_path in skill_path.rglob('*'):
94
+ if not file_path.is_file():
95
+ continue
96
+ arcname = file_path.relative_to(skill_path.parent)
97
+ if should_exclude(arcname):
98
+ print(f" Skipped: {arcname}")
99
+ continue
100
+ zipf.write(file_path, arcname)
101
+ print(f" Added: {arcname}")
102
+
103
+ print(f"\n✅ Successfully packaged skill to: {skill_filename}")
104
+ return skill_filename
105
+
106
+ except Exception as e:
107
+ print(f"❌ Error creating .skill file: {e}")
108
+ return None
109
+
110
+
111
+ def main():
112
+ if len(sys.argv) < 2:
113
+ print("Usage: python utils/package_skill.py <path/to/skill-folder> [output-directory]")
114
+ print("\nExample:")
115
+ print(" python utils/package_skill.py skills/public/my-skill")
116
+ print(" python utils/package_skill.py skills/public/my-skill ./dist")
117
+ sys.exit(1)
118
+
119
+ skill_path = sys.argv[1]
120
+ output_dir = sys.argv[2] if len(sys.argv) > 2 else None
121
+
122
+ print(f"📦 Packaging skill: {skill_path}")
123
+ if output_dir:
124
+ print(f" Output directory: {output_dir}")
125
+ print()
126
+
127
+ result = package_skill(skill_path, output_dir)
128
+
129
+ if result:
130
+ sys.exit(0)
131
+ else:
132
+ sys.exit(1)
133
+
134
+
135
+ if __name__ == "__main__":
136
+ main()
@@ -0,0 +1,310 @@
1
+ #!/usr/bin/env python3
2
+ """Run trigger evaluation for a skill description.
3
+
4
+ Tests whether a skill's description causes Claude to trigger (read the skill)
5
+ for a set of queries. Outputs results as JSON.
6
+ """
7
+
8
+ import argparse
9
+ import json
10
+ import os
11
+ import select
12
+ import subprocess
13
+ import sys
14
+ import time
15
+ import uuid
16
+ from concurrent.futures import ProcessPoolExecutor, as_completed
17
+ from pathlib import Path
18
+
19
+ from scripts.utils import parse_skill_md
20
+
21
+
22
+ def find_project_root() -> Path:
23
+ """Find the project root by walking up from cwd looking for .claude/.
24
+
25
+ Mimics how Claude Code discovers its project root, so the command file
26
+ we create ends up where claude -p will look for it.
27
+ """
28
+ current = Path.cwd()
29
+ for parent in [current, *current.parents]:
30
+ if (parent / ".claude").is_dir():
31
+ return parent
32
+ return current
33
+
34
+
35
+ def run_single_query(
36
+ query: str,
37
+ skill_name: str,
38
+ skill_description: str,
39
+ timeout: int,
40
+ project_root: str,
41
+ model: str | None = None,
42
+ ) -> bool:
43
+ """Run a single query and return whether the skill was triggered.
44
+
45
+ Creates a command file in .claude/commands/ so it appears in Claude's
46
+ available_skills list, then runs `claude -p` with the raw query.
47
+ Uses --include-partial-messages to detect triggering early from
48
+ stream events (content_block_start) rather than waiting for the
49
+ full assistant message, which only arrives after tool execution.
50
+ """
51
+ unique_id = uuid.uuid4().hex[:8]
52
+ clean_name = f"{skill_name}-skill-{unique_id}"
53
+ project_commands_dir = Path(project_root) / ".claude" / "commands"
54
+ command_file = project_commands_dir / f"{clean_name}.md"
55
+
56
+ try:
57
+ project_commands_dir.mkdir(parents=True, exist_ok=True)
58
+ # Use YAML block scalar to avoid breaking on quotes in description
59
+ indented_desc = "\n ".join(skill_description.split("\n"))
60
+ command_content = (
61
+ f"---\n"
62
+ f"description: |\n"
63
+ f" {indented_desc}\n"
64
+ f"---\n\n"
65
+ f"# {skill_name}\n\n"
66
+ f"This skill handles: {skill_description}\n"
67
+ )
68
+ command_file.write_text(command_content)
69
+
70
+ cmd = [
71
+ "claude",
72
+ "-p", query,
73
+ "--output-format", "stream-json",
74
+ "--verbose",
75
+ "--include-partial-messages",
76
+ ]
77
+ if model:
78
+ cmd.extend(["--model", model])
79
+
80
+ # Remove CLAUDECODE env var to allow nesting claude -p inside a
81
+ # Claude Code session. The guard is for interactive terminal conflicts;
82
+ # programmatic subprocess usage is safe.
83
+ env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
84
+
85
+ process = subprocess.Popen(
86
+ cmd,
87
+ stdout=subprocess.PIPE,
88
+ stderr=subprocess.DEVNULL,
89
+ cwd=project_root,
90
+ env=env,
91
+ )
92
+
93
+ triggered = False
94
+ start_time = time.time()
95
+ buffer = ""
96
+ # Track state for stream event detection
97
+ pending_tool_name = None
98
+ accumulated_json = ""
99
+
100
+ try:
101
+ while time.time() - start_time < timeout:
102
+ if process.poll() is not None:
103
+ remaining = process.stdout.read()
104
+ if remaining:
105
+ buffer += remaining.decode("utf-8", errors="replace")
106
+ break
107
+
108
+ ready, _, _ = select.select([process.stdout], [], [], 1.0)
109
+ if not ready:
110
+ continue
111
+
112
+ chunk = os.read(process.stdout.fileno(), 8192)
113
+ if not chunk:
114
+ break
115
+ buffer += chunk.decode("utf-8", errors="replace")
116
+
117
+ while "\n" in buffer:
118
+ line, buffer = buffer.split("\n", 1)
119
+ line = line.strip()
120
+ if not line:
121
+ continue
122
+
123
+ try:
124
+ event = json.loads(line)
125
+ except json.JSONDecodeError:
126
+ continue
127
+
128
+ # Early detection via stream events
129
+ if event.get("type") == "stream_event":
130
+ se = event.get("event", {})
131
+ se_type = se.get("type", "")
132
+
133
+ if se_type == "content_block_start":
134
+ cb = se.get("content_block", {})
135
+ if cb.get("type") == "tool_use":
136
+ tool_name = cb.get("name", "")
137
+ if tool_name in ("Skill", "Read"):
138
+ pending_tool_name = tool_name
139
+ accumulated_json = ""
140
+ else:
141
+ return False
142
+
143
+ elif se_type == "content_block_delta" and pending_tool_name:
144
+ delta = se.get("delta", {})
145
+ if delta.get("type") == "input_json_delta":
146
+ accumulated_json += delta.get("partial_json", "")
147
+ if clean_name in accumulated_json:
148
+ return True
149
+
150
+ elif se_type in ("content_block_stop", "message_stop"):
151
+ if pending_tool_name:
152
+ return clean_name in accumulated_json
153
+ if se_type == "message_stop":
154
+ return False
155
+
156
+ # Fallback: full assistant message
157
+ elif event.get("type") == "assistant":
158
+ message = event.get("message", {})
159
+ for content_item in message.get("content", []):
160
+ if content_item.get("type") != "tool_use":
161
+ continue
162
+ tool_name = content_item.get("name", "")
163
+ tool_input = content_item.get("input", {})
164
+ if tool_name == "Skill" and clean_name in tool_input.get("skill", ""):
165
+ triggered = True
166
+ elif tool_name == "Read" and clean_name in tool_input.get("file_path", ""):
167
+ triggered = True
168
+ return triggered
169
+
170
+ elif event.get("type") == "result":
171
+ return triggered
172
+ finally:
173
+ # Clean up process on any exit path (return, exception, timeout)
174
+ if process.poll() is None:
175
+ process.kill()
176
+ process.wait()
177
+
178
+ return triggered
179
+ finally:
180
+ if command_file.exists():
181
+ command_file.unlink()
182
+
183
+
184
+ def run_eval(
185
+ eval_set: list[dict],
186
+ skill_name: str,
187
+ description: str,
188
+ num_workers: int,
189
+ timeout: int,
190
+ project_root: Path,
191
+ runs_per_query: int = 1,
192
+ trigger_threshold: float = 0.5,
193
+ model: str | None = None,
194
+ ) -> dict:
195
+ """Run the full eval set and return results."""
196
+ results = []
197
+
198
+ with ProcessPoolExecutor(max_workers=num_workers) as executor:
199
+ future_to_info = {}
200
+ for item in eval_set:
201
+ for run_idx in range(runs_per_query):
202
+ future = executor.submit(
203
+ run_single_query,
204
+ item["query"],
205
+ skill_name,
206
+ description,
207
+ timeout,
208
+ str(project_root),
209
+ model,
210
+ )
211
+ future_to_info[future] = (item, run_idx)
212
+
213
+ query_triggers: dict[str, list[bool]] = {}
214
+ query_items: dict[str, dict] = {}
215
+ for future in as_completed(future_to_info):
216
+ item, _ = future_to_info[future]
217
+ query = item["query"]
218
+ query_items[query] = item
219
+ if query not in query_triggers:
220
+ query_triggers[query] = []
221
+ try:
222
+ query_triggers[query].append(future.result())
223
+ except Exception as e:
224
+ print(f"Warning: query failed: {e}", file=sys.stderr)
225
+ query_triggers[query].append(False)
226
+
227
+ for query, triggers in query_triggers.items():
228
+ item = query_items[query]
229
+ trigger_rate = sum(triggers) / len(triggers)
230
+ should_trigger = item["should_trigger"]
231
+ if should_trigger:
232
+ did_pass = trigger_rate >= trigger_threshold
233
+ else:
234
+ did_pass = trigger_rate < trigger_threshold
235
+ results.append({
236
+ "query": query,
237
+ "should_trigger": should_trigger,
238
+ "trigger_rate": trigger_rate,
239
+ "triggers": sum(triggers),
240
+ "runs": len(triggers),
241
+ "pass": did_pass,
242
+ })
243
+
244
+ passed = sum(1 for r in results if r["pass"])
245
+ total = len(results)
246
+
247
+ return {
248
+ "skill_name": skill_name,
249
+ "description": description,
250
+ "results": results,
251
+ "summary": {
252
+ "total": total,
253
+ "passed": passed,
254
+ "failed": total - passed,
255
+ },
256
+ }
257
+
258
+
259
+ def main():
260
+ parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description")
261
+ parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
262
+ parser.add_argument("--skill-path", required=True, help="Path to skill directory")
263
+ parser.add_argument("--description", default=None, help="Override description to test")
264
+ parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
265
+ parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
266
+ parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
267
+ parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
268
+ parser.add_argument("--model", default=None, help="Model to use for claude -p (default: user's configured model)")
269
+ parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
270
+ args = parser.parse_args()
271
+
272
+ eval_set = json.loads(Path(args.eval_set).read_text())
273
+ skill_path = Path(args.skill_path)
274
+
275
+ if not (skill_path / "SKILL.md").exists():
276
+ print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
277
+ sys.exit(1)
278
+
279
+ name, original_description, content = parse_skill_md(skill_path)
280
+ description = args.description or original_description
281
+ project_root = find_project_root()
282
+
283
+ if args.verbose:
284
+ print(f"Evaluating: {description}", file=sys.stderr)
285
+
286
+ output = run_eval(
287
+ eval_set=eval_set,
288
+ skill_name=name,
289
+ description=description,
290
+ num_workers=args.num_workers,
291
+ timeout=args.timeout,
292
+ project_root=project_root,
293
+ runs_per_query=args.runs_per_query,
294
+ trigger_threshold=args.trigger_threshold,
295
+ model=args.model,
296
+ )
297
+
298
+ if args.verbose:
299
+ summary = output["summary"]
300
+ print(f"Results: {summary['passed']}/{summary['total']} passed", file=sys.stderr)
301
+ for r in output["results"]:
302
+ status = "PASS" if r["pass"] else "FAIL"
303
+ rate_str = f"{r['triggers']}/{r['runs']}"
304
+ print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:70]}", file=sys.stderr)
305
+
306
+ print(json.dumps(output, indent=2))
307
+
308
+
309
+ if __name__ == "__main__":
310
+ main()