@heyai-rules/pilo-masterkit 1.2.2 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (594) hide show
  1. package/.agent/agents/architect.md +211 -211
  2. package/.agent/agents/build-error-resolver.md +114 -114
  3. package/.agent/agents/chief-of-staff.md +151 -151
  4. package/.agent/agents/code-reviewer.md +237 -237
  5. package/.agent/agents/cpp-build-resolver.md +90 -90
  6. package/.agent/agents/cpp-reviewer.md +72 -72
  7. package/.agent/agents/csharp-reviewer.md +101 -0
  8. package/.agent/agents/dart-build-resolver.md +201 -0
  9. package/.agent/agents/database-reviewer.md +91 -91
  10. package/.agent/agents/doc-updater.md +107 -107
  11. package/.agent/agents/docs-lookup.md +68 -68
  12. package/.agent/agents/e2e-runner.md +107 -107
  13. package/.agent/agents/flutter-reviewer.md +243 -243
  14. package/.agent/agents/gan-evaluator.md +209 -0
  15. package/.agent/agents/gan-generator.md +131 -0
  16. package/.agent/agents/gan-planner.md +99 -0
  17. package/.agent/agents/go-build-resolver.md +94 -94
  18. package/.agent/agents/go-reviewer.md +76 -76
  19. package/.agent/agents/harness-optimizer.md +35 -35
  20. package/.agent/agents/healthcare-reviewer.md +83 -0
  21. package/.agent/agents/java-build-resolver.md +153 -153
  22. package/.agent/agents/java-reviewer.md +92 -92
  23. package/.agent/agents/kotlin-build-resolver.md +118 -118
  24. package/.agent/agents/kotlin-reviewer.md +159 -159
  25. package/.agent/agents/loop-operator.md +36 -36
  26. package/.agent/agents/opensource-forker.md +198 -0
  27. package/.agent/agents/opensource-packager.md +249 -0
  28. package/.agent/agents/opensource-sanitizer.md +188 -0
  29. package/.agent/agents/performance-optimizer.md +392 -133
  30. package/.agent/agents/personas/athena-agent/agent.json +10 -0
  31. package/.agent/agents/personas/athena-agent/athena-backend-logic-architecture-profile.md +189 -0
  32. package/.agent/agents/personas/athena-agent/context-files/agents.md +55 -0
  33. package/.agent/agents/personas/athena-agent/context-files/identity.md +23 -0
  34. package/.agent/agents/personas/athena-agent/context-files/soul.md +51 -0
  35. package/.agent/agents/personas/athena-agent/context-files/user-predefined.md +15 -0
  36. package/.agent/agents/personas/athena-agent/user-context-files/system/bootstrap.md +37 -0
  37. package/.agent/agents/personas/athena-agent/user-context-files/system/user.md +45 -0
  38. package/.agent/agents/personas/da-vinci-agent/agent.json +10 -0
  39. package/.agent/agents/personas/da-vinci-agent/context-files/agents.md +55 -0
  40. package/.agent/agents/personas/da-vinci-agent/context-files/identity.md +23 -0
  41. package/.agent/agents/personas/da-vinci-agent/context-files/soul.md +51 -0
  42. package/.agent/agents/personas/da-vinci-agent/context-files/user-predefined.md +15 -0
  43. package/.agent/agents/personas/da-vinci-agent/da-vinci-frontend-ui-ux-design-profile.md +189 -0
  44. package/.agent/agents/personas/da-vinci-agent/user-context-files/system/bootstrap.md +37 -0
  45. package/.agent/agents/personas/da-vinci-agent/user-context-files/system/user.md +45 -0
  46. package/.agent/agents/personas/duong-tang-agent/agent.json +10 -0
  47. package/.agent/agents/personas/duong-tang-agent/context-files/agents.md +55 -0
  48. package/.agent/agents/personas/duong-tang-agent/context-files/identity.md +23 -0
  49. package/.agent/agents/personas/duong-tang-agent/context-files/soul.md +51 -0
  50. package/.agent/agents/personas/duong-tang-agent/context-files/user-predefined.md +15 -0
  51. package/.agent/agents/personas/duong-tang-agent/tang-monk-quality-testing-documentation-profile.md +189 -0
  52. package/.agent/agents/personas/duong-tang-agent/user-context-files/system/bootstrap.md +37 -0
  53. package/.agent/agents/personas/duong-tang-agent/user-context-files/system/user.md +45 -0
  54. package/.agent/agents/personas/gia-cat-luong-agent/agent.json +10 -0
  55. package/.agent/agents/personas/gia-cat-luong-agent/context-files/agents.md +55 -0
  56. package/.agent/agents/personas/gia-cat-luong-agent/context-files/identity.md +23 -0
  57. package/.agent/agents/personas/gia-cat-luong-agent/context-files/soul.md +51 -0
  58. package/.agent/agents/personas/gia-cat-luong-agent/context-files/user-predefined.md +15 -0
  59. package/.agent/agents/personas/gia-cat-luong-agent/kongming-research-strategy-analysis-profile.md +189 -0
  60. package/.agent/agents/personas/gia-cat-luong-agent/user-context-files/system/bootstrap.md +37 -0
  61. package/.agent/agents/personas/gia-cat-luong-agent/user-context-files/system/user.md +45 -0
  62. package/.agent/agents/personas/mihata-agent/agent.json +10 -0
  63. package/.agent/agents/personas/mihata-agent/context-files/agents.md +55 -0
  64. package/.agent/agents/personas/mihata-agent/context-files/identity.md +23 -0
  65. package/.agent/agents/personas/mihata-agent/context-files/soul.md +51 -0
  66. package/.agent/agents/personas/mihata-agent/context-files/user-predefined.md +15 -0
  67. package/.agent/agents/personas/mihata-agent/mihata-multi-agent-orchestration-profile.md +189 -0
  68. package/.agent/agents/personas/mihata-agent/user-context-files/system/bootstrap.md +37 -0
  69. package/.agent/agents/personas/mihata-agent/user-context-files/system/user.md +45 -0
  70. package/.agent/agents/personas/tesla-agent/agent.json +10 -0
  71. package/.agent/agents/personas/tesla-agent/context-files/agents.md +55 -0
  72. package/.agent/agents/personas/tesla-agent/context-files/identity.md +23 -0
  73. package/.agent/agents/personas/tesla-agent/context-files/soul.md +51 -0
  74. package/.agent/agents/personas/tesla-agent/context-files/user-predefined.md +15 -0
  75. package/.agent/agents/personas/tesla-agent/tesla-fullstack-system-optimization-profile.md +189 -0
  76. package/.agent/agents/personas/tesla-agent/user-context-files/system/bootstrap.md +37 -0
  77. package/.agent/agents/personas/tesla-agent/user-context-files/system/user.md +45 -0
  78. package/.agent/agents/personas/tu-ma-y-agent/agent.json +10 -0
  79. package/.agent/agents/personas/tu-ma-y-agent/context-files/agents.md +55 -0
  80. package/.agent/agents/personas/tu-ma-y-agent/context-files/identity.md +23 -0
  81. package/.agent/agents/personas/tu-ma-y-agent/context-files/soul.md +51 -0
  82. package/.agent/agents/personas/tu-ma-y-agent/context-files/user-predefined.md +15 -0
  83. package/.agent/agents/personas/tu-ma-y-agent/simayi-feasibility-risk-control-profile.md +189 -0
  84. package/.agent/agents/personas/tu-ma-y-agent/user-context-files/system/bootstrap.md +37 -0
  85. package/.agent/agents/personas/tu-ma-y-agent/user-context-files/system/user.md +45 -0
  86. package/.agent/agents/personas/venti-agent/agent.json +10 -0
  87. package/.agent/agents/personas/venti-agent/context-files/agents.md +55 -0
  88. package/.agent/agents/personas/venti-agent/context-files/identity.md +23 -0
  89. package/.agent/agents/personas/venti-agent/context-files/soul.md +51 -0
  90. package/.agent/agents/personas/venti-agent/context-files/user-predefined.md +15 -0
  91. package/.agent/agents/personas/venti-agent/user-context-files/system/bootstrap.md +37 -0
  92. package/.agent/agents/personas/venti-agent/user-context-files/system/user.md +45 -0
  93. package/.agent/agents/personas/venti-agent/venti-learning-communication-mentoring-profile.md +189 -0
  94. package/.agent/agents/planner.md +212 -212
  95. package/.agent/agents/python-reviewer.md +98 -98
  96. package/.agent/agents/pytorch-build-resolver.md +120 -120
  97. package/.agent/agents/refactor-cleaner.md +85 -85
  98. package/.agent/agents/rust-build-resolver.md +148 -148
  99. package/.agent/agents/rust-reviewer.md +94 -94
  100. package/.agent/agents/security-reviewer.md +108 -108
  101. package/.agent/agents/tdd-guide.md +91 -91
  102. package/.agent/agents/typescript-reviewer.md +112 -112
  103. package/.agent/contexts/dev.md +20 -0
  104. package/.agent/contexts/research.md +26 -0
  105. package/.agent/contexts/review.md +22 -0
  106. package/.agent/hooks/hooks.json +395 -0
  107. package/.agent/hooks/readme.md +222 -0
  108. package/.agent/mcp-configs/mcp-servers.json +181 -0
  109. package/.agent/rules/common/agents.md +50 -0
  110. package/.agent/rules/common/code-review.md +124 -0
  111. package/.agent/rules/common/coding-style.md +48 -0
  112. package/.agent/rules/common/development-workflow.md +44 -0
  113. package/.agent/rules/common/git-workflow.md +24 -0
  114. package/.agent/rules/common/hooks.md +30 -0
  115. package/.agent/rules/common/patterns.md +31 -0
  116. package/.agent/rules/common/performance.md +55 -0
  117. package/.agent/rules/common/security.md +29 -0
  118. package/.agent/rules/common/testing.md +29 -0
  119. package/.agent/rules/cpp/coding-style.md +44 -0
  120. package/.agent/rules/cpp/hooks.md +39 -0
  121. package/.agent/rules/cpp/patterns.md +51 -0
  122. package/.agent/rules/cpp/security.md +51 -0
  123. package/.agent/rules/cpp/testing.md +44 -0
  124. package/.agent/rules/csharp/coding-style.md +72 -0
  125. package/.agent/rules/csharp/hooks.md +25 -0
  126. package/.agent/rules/csharp/patterns.md +50 -0
  127. package/.agent/rules/csharp/security.md +58 -0
  128. package/.agent/rules/csharp/testing.md +46 -0
  129. package/.agent/rules/dart/coding-style.md +159 -0
  130. package/.agent/rules/dart/hooks.md +66 -0
  131. package/.agent/rules/dart/patterns.md +261 -0
  132. package/.agent/rules/dart/security.md +135 -0
  133. package/.agent/rules/dart/testing.md +215 -0
  134. package/.agent/rules/golang/coding-style.md +32 -0
  135. package/.agent/rules/golang/hooks.md +17 -0
  136. package/.agent/rules/golang/patterns.md +45 -0
  137. package/.agent/rules/golang/security.md +34 -0
  138. package/.agent/rules/golang/testing.md +31 -0
  139. package/.agent/rules/java/coding-style.md +114 -0
  140. package/.agent/rules/java/hooks.md +18 -0
  141. package/.agent/rules/java/patterns.md +146 -0
  142. package/.agent/rules/java/security.md +100 -0
  143. package/.agent/rules/java/testing.md +131 -0
  144. package/.agent/rules/kotlin/coding-style.md +86 -0
  145. package/.agent/rules/kotlin/hooks.md +17 -0
  146. package/.agent/rules/kotlin/patterns.md +146 -0
  147. package/.agent/rules/kotlin/security.md +82 -0
  148. package/.agent/rules/kotlin/testing.md +128 -0
  149. package/.agent/rules/perl/coding-style.md +46 -0
  150. package/.agent/rules/perl/hooks.md +22 -0
  151. package/.agent/rules/perl/patterns.md +76 -0
  152. package/.agent/rules/perl/security.md +69 -0
  153. package/.agent/rules/perl/testing.md +54 -0
  154. package/.agent/rules/php/coding-style.md +40 -0
  155. package/.agent/rules/php/hooks.md +24 -0
  156. package/.agent/rules/php/patterns.md +33 -0
  157. package/.agent/rules/php/security.md +37 -0
  158. package/.agent/rules/php/testing.md +39 -0
  159. package/.agent/rules/python/coding-style.md +42 -0
  160. package/.agent/rules/python/hooks.md +19 -0
  161. package/.agent/rules/python/patterns.md +39 -0
  162. package/.agent/rules/python/security.md +30 -0
  163. package/.agent/rules/python/testing.md +38 -0
  164. package/.agent/rules/readme.md +111 -0
  165. package/.agent/rules/rust/coding-style.md +151 -0
  166. package/.agent/rules/rust/hooks.md +16 -0
  167. package/.agent/rules/rust/patterns.md +168 -0
  168. package/.agent/rules/rust/security.md +141 -0
  169. package/.agent/rules/rust/testing.md +154 -0
  170. package/.agent/rules/swift/coding-style.md +47 -0
  171. package/.agent/rules/swift/hooks.md +20 -0
  172. package/.agent/rules/swift/patterns.md +66 -0
  173. package/.agent/rules/swift/security.md +33 -0
  174. package/.agent/rules/swift/testing.md +45 -0
  175. package/.agent/rules/typescript/coding-style.md +199 -0
  176. package/.agent/rules/typescript/hooks.md +22 -0
  177. package/.agent/rules/typescript/patterns.md +52 -0
  178. package/.agent/rules/typescript/security.md +28 -0
  179. package/.agent/rules/typescript/testing.md +18 -0
  180. package/.agent/rules/web/coding-style.md +96 -0
  181. package/.agent/rules/web/design-quality.md +63 -0
  182. package/.agent/rules/web/hooks.md +120 -0
  183. package/.agent/rules/web/patterns.md +79 -0
  184. package/.agent/rules/web/performance.md +64 -0
  185. package/.agent/rules/web/security.md +57 -0
  186. package/.agent/rules/web/testing.md +55 -0
  187. package/.agent/rules/zh/agents.md +50 -0
  188. package/.agent/rules/zh/code-review.md +124 -0
  189. package/.agent/rules/zh/coding-style.md +48 -0
  190. package/.agent/rules/zh/development-workflow.md +44 -0
  191. package/.agent/rules/zh/git-workflow.md +24 -0
  192. package/.agent/rules/zh/hooks.md +30 -0
  193. package/.agent/rules/zh/patterns.md +31 -0
  194. package/.agent/rules/zh/performance.md +55 -0
  195. package/.agent/rules/zh/readme.md +108 -0
  196. package/.agent/rules/zh/security.md +29 -0
  197. package/.agent/rules/zh/testing.md +29 -0
  198. package/.agent/skills/agent-eval/SKILL.md +145 -0
  199. package/.agent/skills/agent-harness-construction/SKILL.md +73 -0
  200. package/.agent/skills/agent-payment-x402/SKILL.md +178 -0
  201. package/.agent/skills/agentic-engineering/SKILL.md +63 -0
  202. package/.agent/skills/ai-first-engineering/SKILL.md +51 -0
  203. package/.agent/skills/ai-regression-testing/SKILL.md +385 -0
  204. package/.agent/skills/android-clean-architecture/SKILL.md +339 -0
  205. package/.agent/skills/api-design/SKILL.md +523 -0
  206. package/.agent/skills/architecture-decision-records/SKILL.md +179 -0
  207. package/.agent/skills/article-writing/SKILL.md +79 -0
  208. package/.agent/skills/autonomous-agent-harness/SKILL.md +267 -0
  209. package/.agent/skills/autonomous-loops/SKILL.md +610 -0
  210. package/.agent/skills/backend-patterns/SKILL.md +598 -0
  211. package/.agent/skills/benchmark/SKILL.md +93 -0
  212. package/.agent/skills/blueprint/SKILL.md +105 -0
  213. package/.agent/skills/brand-voice/SKILL.md +97 -0
  214. package/.agent/skills/brand-voice/references/voice-profile-schema.md +55 -0
  215. package/.agent/skills/browser-qa/SKILL.md +87 -0
  216. package/.agent/skills/bun-runtime/SKILL.md +84 -0
  217. package/.agent/skills/canary-watch/SKILL.md +99 -0
  218. package/.agent/skills/carrier-relationship-management/SKILL.md +212 -0
  219. package/.agent/skills/ck/SKILL.md +147 -0
  220. package/.agent/skills/ck/commands/forget.mjs +44 -0
  221. package/.agent/skills/ck/commands/info.mjs +24 -0
  222. package/.agent/skills/ck/commands/init.mjs +143 -0
  223. package/.agent/skills/ck/commands/list.mjs +40 -0
  224. package/.agent/skills/ck/commands/migrate.mjs +202 -0
  225. package/.agent/skills/ck/commands/resume.mjs +36 -0
  226. package/.agent/skills/ck/commands/save.mjs +210 -0
  227. package/.agent/skills/ck/commands/shared.mjs +387 -0
  228. package/.agent/skills/ck/hooks/session-start.mjs +224 -0
  229. package/.agent/skills/claude-api/SKILL.md +337 -0
  230. package/.agent/skills/claude-devfleet/SKILL.md +103 -0
  231. package/.agent/skills/click-path-audit/SKILL.md +244 -0
  232. package/.agent/skills/clickhouse-io/SKILL.md +439 -0
  233. package/.agent/skills/codebase-onboarding/SKILL.md +233 -0
  234. package/.agent/skills/coding-standards/SKILL.md +530 -0
  235. package/.agent/skills/compose-multiplatform-patterns/SKILL.md +299 -0
  236. package/.agent/skills/configure-ecc/SKILL.md +367 -0
  237. package/.agent/skills/connections-optimizer/SKILL.md +189 -0
  238. package/.agent/skills/content-engine/SKILL.md +131 -0
  239. package/.agent/skills/content-hash-cache-pattern/SKILL.md +161 -0
  240. package/.agent/skills/context-budget/SKILL.md +135 -0
  241. package/.agent/skills/continuous-agent-loop/SKILL.md +45 -0
  242. package/.agent/skills/continuous-learning/SKILL.md +119 -0
  243. package/.agent/skills/continuous-learning/config.json +18 -0
  244. package/.agent/skills/continuous-learning/evaluate-session.sh +69 -0
  245. package/.agent/skills/continuous-learning-v2/SKILL.md +365 -0
  246. package/.agent/skills/continuous-learning-v2/agents/observer-loop.sh +271 -0
  247. package/.agent/skills/continuous-learning-v2/agents/observer.md +198 -0
  248. package/.agent/skills/continuous-learning-v2/agents/session-guardian.sh +150 -0
  249. package/.agent/skills/continuous-learning-v2/agents/start-observer.sh +244 -0
  250. package/.agent/skills/continuous-learning-v2/config.json +8 -0
  251. package/.agent/skills/continuous-learning-v2/hooks/observe.sh +428 -0
  252. package/.agent/skills/continuous-learning-v2/scripts/detect-project.sh +228 -0
  253. package/.agent/skills/continuous-learning-v2/scripts/instinct-cli.py +1426 -0
  254. package/.agent/skills/continuous-learning-v2/scripts/test-parse-instinct.py +984 -0
  255. package/.agent/skills/cost-aware-llm-pipeline/SKILL.md +183 -0
  256. package/.agent/skills/cpp-coding-standards/SKILL.md +723 -0
  257. package/.agent/skills/cpp-testing/SKILL.md +324 -0
  258. package/.agent/skills/crosspost/SKILL.md +111 -0
  259. package/.agent/skills/csharp-testing/SKILL.md +321 -0
  260. package/.agent/skills/customer-billing-ops/SKILL.md +140 -0
  261. package/.agent/skills/customs-trade-compliance/SKILL.md +263 -0
  262. package/.agent/skills/dart-flutter-patterns/SKILL.md +563 -0
  263. package/.agent/skills/data-scraper-agent/SKILL.md +764 -0
  264. package/.agent/skills/database-migrations/SKILL.md +429 -0
  265. package/.agent/skills/deep-research/SKILL.md +155 -0
  266. package/.agent/skills/deployment-patterns/SKILL.md +427 -0
  267. package/.agent/skills/design-system/SKILL.md +82 -0
  268. package/.agent/skills/django-patterns/SKILL.md +734 -0
  269. package/.agent/skills/django-security/SKILL.md +593 -0
  270. package/.agent/skills/django-tdd/SKILL.md +729 -0
  271. package/.agent/skills/django-verification/SKILL.md +469 -0
  272. package/.agent/skills/dmux-workflows/SKILL.md +191 -0
  273. package/.agent/skills/docker-patterns/SKILL.md +364 -0
  274. package/.agent/skills/documentation-lookup/SKILL.md +90 -0
  275. package/.agent/skills/dotnet-patterns/SKILL.md +321 -0
  276. package/.agent/skills/e2e-testing/SKILL.md +326 -0
  277. package/.agent/skills/energy-procurement/SKILL.md +228 -0
  278. package/.agent/skills/enterprise-agent-ops/SKILL.md +50 -0
  279. package/.agent/skills/eval-harness/SKILL.md +270 -0
  280. package/.agent/skills/exa-search/SKILL.md +103 -0
  281. package/.agent/skills/fal-ai-media/SKILL.md +284 -0
  282. package/.agent/skills/flutter-dart-code-review/SKILL.md +435 -0
  283. package/.agent/skills/foundation-models-on-device/SKILL.md +243 -0
  284. package/.agent/skills/frontend-patterns/SKILL.md +642 -0
  285. package/.agent/skills/frontend-slides/SKILL.md +184 -0
  286. package/.agent/skills/frontend-slides/style-presets.md +330 -0
  287. package/.agent/skills/gan-style-harness/SKILL.md +278 -0
  288. package/.agent/skills/git-workflow/SKILL.md +715 -0
  289. package/.agent/skills/golang-patterns/SKILL.md +674 -0
  290. package/.agent/skills/golang-testing/SKILL.md +720 -0
  291. package/.agent/skills/google-workspace-ops/SKILL.md +95 -0
  292. package/.agent/skills/healthcare-cdss-patterns/SKILL.md +245 -0
  293. package/.agent/skills/healthcare-emr-patterns/SKILL.md +159 -0
  294. package/.agent/skills/healthcare-eval-harness/SKILL.md +207 -0
  295. package/.agent/skills/healthcare-phi-compliance/SKILL.md +145 -0
  296. package/.agent/skills/hexagonal-architecture/SKILL.md +276 -0
  297. package/.agent/skills/inventory-demand-planning/SKILL.md +247 -0
  298. package/.agent/skills/investor-materials/SKILL.md +96 -0
  299. package/.agent/skills/investor-outreach/SKILL.md +91 -0
  300. package/.agent/skills/iterative-retrieval/SKILL.md +211 -0
  301. package/.agent/skills/java-coding-standards/SKILL.md +147 -0
  302. package/.agent/skills/jira-integration/SKILL.md +293 -0
  303. package/.agent/skills/jpa-patterns/SKILL.md +151 -0
  304. package/.agent/skills/kotlin-coroutines-flows/SKILL.md +284 -0
  305. package/.agent/skills/kotlin-exposed-patterns/SKILL.md +719 -0
  306. package/.agent/skills/kotlin-ktor-patterns/SKILL.md +689 -0
  307. package/.agent/skills/kotlin-patterns/SKILL.md +711 -0
  308. package/.agent/skills/kotlin-testing/SKILL.md +824 -0
  309. package/.agent/skills/laravel-patterns/SKILL.md +415 -0
  310. package/.agent/skills/laravel-plugin-discovery/SKILL.md +229 -0
  311. package/.agent/skills/laravel-security/SKILL.md +285 -0
  312. package/.agent/skills/laravel-tdd/SKILL.md +283 -0
  313. package/.agent/skills/laravel-verification/SKILL.md +179 -0
  314. package/.agent/skills/lead-intelligence/SKILL.md +321 -0
  315. package/.agent/skills/lead-intelligence/agents/enrichment-agent.md +85 -0
  316. package/.agent/skills/lead-intelligence/agents/mutual-mapper.md +75 -0
  317. package/.agent/skills/lead-intelligence/agents/outreach-drafter.md +98 -0
  318. package/.agent/skills/lead-intelligence/agents/signal-scorer.md +60 -0
  319. package/.agent/skills/liquid-glass-design/SKILL.md +279 -0
  320. package/.agent/skills/logistics-exception-management/SKILL.md +222 -0
  321. package/.agent/skills/manim-video/SKILL.md +89 -0
  322. package/.agent/skills/manim-video/assets/network-graph-scene.py +52 -0
  323. package/.agent/skills/market-research/SKILL.md +75 -0
  324. package/.agent/skills/mcp-builder/SKILL.md +173 -113
  325. package/.agent/skills/mcp-builder/license.txt +202 -0
  326. package/.agent/skills/mcp-builder/reference/evaluation.md +602 -0
  327. package/.agent/skills/mcp-builder/reference/mcp-best-practices.md +249 -0
  328. package/.agent/skills/mcp-builder/reference/node-mcp-server.md +970 -0
  329. package/.agent/skills/mcp-builder/reference/python-mcp-server.md +719 -0
  330. package/.agent/skills/mcp-builder/scripts/connections.py +151 -0
  331. package/.agent/skills/mcp-builder/scripts/evaluation.py +373 -0
  332. package/.agent/skills/mcp-builder/scripts/example-evaluation.xml +22 -0
  333. package/.agent/skills/mcp-builder/scripts/requirements.txt +2 -0
  334. package/.agent/skills/mcp-server-patterns/SKILL.md +67 -0
  335. package/.agent/skills/nanoclaw-repl/SKILL.md +33 -0
  336. package/.agent/skills/nestjs-patterns/SKILL.md +230 -0
  337. package/.agent/skills/nextjs-turbopack/SKILL.md +44 -0
  338. package/.agent/skills/nutrient-document-processing/SKILL.md +167 -0
  339. package/.agent/skills/nuxt4-patterns/SKILL.md +100 -0
  340. package/.agent/skills/openclaw-persona-forge/SKILL.md +296 -0
  341. package/.agent/skills/openclaw-persona-forge/gacha.py +224 -0
  342. package/.agent/skills/openclaw-persona-forge/gacha.sh +5 -0
  343. package/.agent/skills/openclaw-persona-forge/references/avatar-style.md +124 -0
  344. package/.agent/skills/openclaw-persona-forge/references/boundary-rules.md +53 -0
  345. package/.agent/skills/openclaw-persona-forge/references/error-handling.md +53 -0
  346. package/.agent/skills/openclaw-persona-forge/references/identity-tension.md +48 -0
  347. package/.agent/skills/openclaw-persona-forge/references/naming-system.md +39 -0
  348. package/.agent/skills/openclaw-persona-forge/references/output-template.md +166 -0
  349. package/.agent/skills/opensource-pipeline/SKILL.md +255 -0
  350. package/.agent/skills/perl-patterns/SKILL.md +504 -0
  351. package/.agent/skills/perl-security/SKILL.md +503 -0
  352. package/.agent/skills/perl-testing/SKILL.md +475 -0
  353. package/.agent/skills/plankton-code-quality/SKILL.md +236 -0
  354. package/.agent/skills/postgres-patterns/SKILL.md +147 -0
  355. package/.agent/skills/product-lens/SKILL.md +85 -0
  356. package/.agent/skills/production-scheduling/SKILL.md +238 -0
  357. package/.agent/skills/project-flow-ops/SKILL.md +111 -0
  358. package/.agent/skills/project-guidelines-example/SKILL.md +349 -0
  359. package/.agent/skills/prompt-optimizer/SKILL.md +397 -0
  360. package/.agent/skills/python-patterns/SKILL.md +622 -313
  361. package/.agent/skills/python-testing/SKILL.md +816 -0
  362. package/.agent/skills/pytorch-patterns/SKILL.md +396 -0
  363. package/.agent/skills/quality-nonconformance/SKILL.md +260 -0
  364. package/.agent/skills/ralphinho-rfc-pipeline/SKILL.md +67 -0
  365. package/.agent/skills/regex-vs-llm-structured-text/SKILL.md +220 -0
  366. package/.agent/skills/remotion-video-creation/SKILL.md +43 -0
  367. package/.agent/skills/remotion-video-creation/rules/3d.md +86 -0
  368. package/.agent/skills/remotion-video-creation/rules/animations.md +29 -0
  369. package/.agent/skills/remotion-video-creation/rules/assets/charts-bar-chart.tsx +173 -0
  370. package/.agent/skills/remotion-video-creation/rules/assets/text-animations-typewriter.tsx +100 -0
  371. package/.agent/skills/remotion-video-creation/rules/assets/text-animations-word-highlight.tsx +108 -0
  372. package/.agent/skills/remotion-video-creation/rules/assets.md +78 -0
  373. package/.agent/skills/remotion-video-creation/rules/audio.md +172 -0
  374. package/.agent/skills/remotion-video-creation/rules/calculate-metadata.md +104 -0
  375. package/.agent/skills/remotion-video-creation/rules/can-decode.md +75 -0
  376. package/.agent/skills/remotion-video-creation/rules/charts.md +58 -0
  377. package/.agent/skills/remotion-video-creation/rules/compositions.md +146 -0
  378. package/.agent/skills/remotion-video-creation/rules/display-captions.md +126 -0
  379. package/.agent/skills/remotion-video-creation/rules/extract-frames.md +229 -0
  380. package/.agent/skills/remotion-video-creation/rules/fonts.md +152 -0
  381. package/.agent/skills/remotion-video-creation/rules/get-audio-duration.md +58 -0
  382. package/.agent/skills/remotion-video-creation/rules/get-video-dimensions.md +68 -0
  383. package/.agent/skills/remotion-video-creation/rules/get-video-duration.md +58 -0
  384. package/.agent/skills/remotion-video-creation/rules/gifs.md +138 -0
  385. package/.agent/skills/remotion-video-creation/rules/images.md +130 -0
  386. package/.agent/skills/remotion-video-creation/rules/import-srt-captions.md +67 -0
  387. package/.agent/skills/remotion-video-creation/rules/lottie.md +67 -0
  388. package/.agent/skills/remotion-video-creation/rules/measuring-dom-nodes.md +34 -0
  389. package/.agent/skills/remotion-video-creation/rules/measuring-text.md +143 -0
  390. package/.agent/skills/remotion-video-creation/rules/sequencing.md +106 -0
  391. package/.agent/skills/remotion-video-creation/rules/tailwind.md +11 -0
  392. package/.agent/skills/remotion-video-creation/rules/text-animations.md +20 -0
  393. package/.agent/skills/remotion-video-creation/rules/timing.md +179 -0
  394. package/.agent/skills/remotion-video-creation/rules/transcribe-captions.md +19 -0
  395. package/.agent/skills/remotion-video-creation/rules/transitions.md +122 -0
  396. package/.agent/skills/remotion-video-creation/rules/trimming.md +52 -0
  397. package/.agent/skills/remotion-video-creation/rules/videos.md +171 -0
  398. package/.agent/skills/repo-scan/SKILL.md +78 -0
  399. package/.agent/skills/returns-reverse-logistics/SKILL.md +240 -0
  400. package/.agent/skills/rules-distill/SKILL.md +264 -0
  401. package/.agent/skills/rules-distill/scripts/scan-rules.sh +58 -0
  402. package/.agent/skills/rules-distill/scripts/scan-skills.sh +129 -0
  403. package/.agent/skills/rust-patterns/SKILL.md +499 -0
  404. package/.agent/skills/rust-testing/SKILL.md +500 -0
  405. package/.agent/skills/safety-guard/SKILL.md +75 -0
  406. package/.agent/skills/santa-method/SKILL.md +306 -0
  407. package/.agent/skills/search-first/SKILL.md +161 -0
  408. package/.agent/skills/security-review/SKILL.md +495 -0
  409. package/.agent/skills/security-review/cloud-infrastructure-security.md +361 -0
  410. package/.agent/skills/security-scan/SKILL.md +165 -0
  411. package/.agent/skills/skill-comply/SKILL.md +58 -0
  412. package/.agent/skills/skill-comply/fixtures/compliant-trace.jsonl +5 -0
  413. package/.agent/skills/skill-comply/fixtures/noncompliant-trace.jsonl +3 -0
  414. package/.agent/skills/skill-comply/fixtures/tdd-spec.yaml +44 -0
  415. package/.agent/skills/skill-comply/prompts/classifier.md +24 -0
  416. package/.agent/skills/skill-comply/prompts/scenario-generator.md +62 -0
  417. package/.agent/skills/skill-comply/prompts/spec-generator.md +42 -0
  418. package/.agent/skills/skill-comply/pyproject.toml +15 -0
  419. package/.agent/skills/skill-comply/scripts/classifier.py +85 -0
  420. package/.agent/skills/skill-comply/scripts/grader.py +122 -0
  421. package/.agent/skills/skill-comply/scripts/init.py +0 -0
  422. package/.agent/skills/skill-comply/scripts/parser.py +107 -0
  423. package/.agent/skills/skill-comply/scripts/report.py +170 -0
  424. package/.agent/skills/skill-comply/scripts/run.py +127 -0
  425. package/.agent/skills/skill-comply/scripts/runner.py +161 -0
  426. package/.agent/skills/skill-comply/scripts/scenario-generator.py +70 -0
  427. package/.agent/skills/skill-comply/scripts/spec-generator.py +72 -0
  428. package/.agent/skills/skill-comply/scripts/utils.py +13 -0
  429. package/.agent/skills/skill-comply/tests/test-grader.py +137 -0
  430. package/.agent/skills/skill-comply/tests/test-parser.py +90 -0
  431. package/.agent/skills/skill-creator/SKILL.md +485 -0
  432. package/.agent/skills/skill-creator/agents/analyzer.md +274 -0
  433. package/.agent/skills/skill-creator/agents/comparator.md +202 -0
  434. package/.agent/skills/skill-creator/agents/grader.md +223 -0
  435. package/.agent/skills/skill-creator/assets/eval-review.html +146 -0
  436. package/.agent/skills/skill-creator/eval-viewer/generate-review.py +471 -0
  437. package/.agent/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  438. package/.agent/skills/skill-creator/license.txt +202 -0
  439. package/.agent/skills/skill-creator/references/schemas.md +430 -0
  440. package/.agent/skills/skill-creator/scripts/aggregate-benchmark.py +401 -0
  441. package/.agent/skills/skill-creator/scripts/generate-report.py +326 -0
  442. package/.agent/skills/skill-creator/scripts/improve-description.py +247 -0
  443. package/.agent/skills/skill-creator/scripts/init.py +0 -0
  444. package/.agent/skills/skill-creator/scripts/package-skill.py +136 -0
  445. package/.agent/skills/skill-creator/scripts/quick-validate.py +103 -0
  446. package/.agent/skills/skill-creator/scripts/run-eval.py +310 -0
  447. package/.agent/skills/skill-creator/scripts/run-loop.py +328 -0
  448. package/.agent/skills/skill-creator/scripts/utils.py +47 -0
  449. package/.agent/skills/skill-stocktake/SKILL.md +193 -0
  450. package/.agent/skills/skill-stocktake/scripts/quick-diff.sh +87 -0
  451. package/.agent/skills/skill-stocktake/scripts/save-results.sh +56 -0
  452. package/.agent/skills/skill-stocktake/scripts/scan.sh +170 -0
  453. package/.agent/skills/social-graph-ranker/SKILL.md +154 -0
  454. package/.agent/skills/springboot-patterns/SKILL.md +314 -0
  455. package/.agent/skills/springboot-security/SKILL.md +272 -0
  456. package/.agent/skills/springboot-tdd/SKILL.md +158 -0
  457. package/.agent/skills/springboot-verification/SKILL.md +231 -0
  458. package/.agent/skills/strategic-compact/SKILL.md +131 -0
  459. package/.agent/skills/strategic-compact/suggest-compact.sh +54 -0
  460. package/.agent/skills/swift-actor-persistence/SKILL.md +143 -0
  461. package/.agent/skills/swift-concurrency-6-2/SKILL.md +216 -0
  462. package/.agent/skills/swift-protocol-di-testing/SKILL.md +190 -0
  463. package/.agent/skills/swiftui-patterns/SKILL.md +259 -0
  464. package/.agent/skills/tdd-workflow/SKILL.md +412 -98
  465. package/.agent/skills/team-builder/SKILL.md +168 -0
  466. package/.agent/skills/token-budget-advisor/SKILL.md +133 -0
  467. package/.agent/skills/ui-demo/SKILL.md +465 -0
  468. package/.agent/skills/ui-ux-pro-max/data/charts.csv +26 -26
  469. package/.agent/skills/ui-ux-pro-max/data/colors.csv +97 -97
  470. package/.agent/skills/ui-ux-pro-max/data/landing.csv +28 -28
  471. package/.agent/skills/ui-ux-pro-max/data/products.csv +96 -96
  472. package/.agent/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -53
  473. package/.agent/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -56
  474. package/.agent/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -53
  475. package/.agent/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -52
  476. package/.agent/skills/ui-ux-pro-max/data/stacks/react.csv +54 -54
  477. package/.agent/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -54
  478. package/.agent/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -51
  479. package/.agent/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -50
  480. package/.agent/skills/ui-ux-pro-max/data/styles.csv +68 -68
  481. package/.agent/skills/ui-ux-pro-max/data/ux-guidelines.csv +99 -99
  482. package/.agent/skills/ui-ux-pro-max/scripts/search.py +114 -114
  483. package/.agent/skills/verification-loop/SKILL.md +126 -0
  484. package/.agent/skills/video-editing/SKILL.md +310 -0
  485. package/.agent/skills/videodb/SKILL.md +374 -0
  486. package/.agent/skills/videodb/reference/api-reference.md +550 -0
  487. package/.agent/skills/videodb/reference/capture-reference.md +407 -0
  488. package/.agent/skills/videodb/reference/capture.md +101 -0
  489. package/.agent/skills/videodb/reference/editor.md +443 -0
  490. package/.agent/skills/videodb/reference/generative.md +331 -0
  491. package/.agent/skills/videodb/reference/rtstream-reference.md +564 -0
  492. package/.agent/skills/videodb/reference/rtstream.md +65 -0
  493. package/.agent/skills/videodb/reference/search.md +230 -0
  494. package/.agent/skills/videodb/reference/streaming.md +406 -0
  495. package/.agent/skills/videodb/reference/use-cases.md +118 -0
  496. package/.agent/skills/videodb/scripts/ws-listener.py +282 -0
  497. package/.agent/skills/visa-doc-translate/SKILL.md +117 -0
  498. package/.agent/skills/visa-doc-translate/readme.md +86 -0
  499. package/.agent/skills/workspace-surface-audit/SKILL.md +125 -0
  500. package/.agent/skills/x-api/SKILL.md +230 -0
  501. package/.agent/tasks/two-track-merge-contract.md +29 -0
  502. package/.agent/workflows/aside.md +164 -164
  503. package/.agent/workflows/build-fix.md +62 -62
  504. package/.agent/workflows/checkpoint.md +74 -74
  505. package/.agent/workflows/claw.md +23 -51
  506. package/.agent/workflows/clean-memory.md +34 -0
  507. package/.agent/workflows/code-review.md +289 -40
  508. package/.agent/workflows/context-budget.md +23 -29
  509. package/.agent/workflows/cpp-build.md +173 -173
  510. package/.agent/workflows/cpp-review.md +132 -132
  511. package/.agent/workflows/cpp-test.md +251 -251
  512. package/.agent/workflows/devfleet.md +23 -92
  513. package/.agent/workflows/docs.md +23 -31
  514. package/.agent/workflows/e2e.md +268 -365
  515. package/.agent/workflows/eval.md +23 -120
  516. package/.agent/workflows/evolve.md +178 -178
  517. package/.agent/workflows/flutter-build.md +164 -0
  518. package/.agent/workflows/flutter-review.md +116 -0
  519. package/.agent/workflows/flutter-test.md +144 -0
  520. package/.agent/workflows/gan-build.md +99 -0
  521. package/.agent/workflows/gan-design.md +35 -0
  522. package/.agent/workflows/go-build.md +183 -183
  523. package/.agent/workflows/go-review.md +148 -148
  524. package/.agent/workflows/go-test.md +268 -268
  525. package/.agent/workflows/gradle-build.md +70 -70
  526. package/.agent/workflows/harness-audit.md +73 -71
  527. package/.agent/workflows/instinct-export.md +66 -66
  528. package/.agent/workflows/instinct-import.md +114 -114
  529. package/.agent/workflows/instinct-status.md +59 -59
  530. package/.agent/workflows/jira.md +106 -0
  531. package/.agent/workflows/kotlin-build.md +174 -174
  532. package/.agent/workflows/kotlin-review.md +140 -140
  533. package/.agent/workflows/kotlin-test.md +312 -312
  534. package/.agent/workflows/learn-eval.md +116 -116
  535. package/.agent/workflows/learn.md +70 -70
  536. package/.agent/workflows/loop-start.md +32 -32
  537. package/.agent/workflows/loop-status.md +24 -24
  538. package/.agent/workflows/model-route.md +26 -26
  539. package/.agent/workflows/multi-backend.md +158 -158
  540. package/.agent/workflows/multi-execute.md +315 -315
  541. package/.agent/workflows/multi-frontend.md +158 -158
  542. package/.agent/workflows/multi-plan.md +268 -268
  543. package/.agent/workflows/multi-workflow.md +191 -191
  544. package/.agent/workflows/orchestrate.md +135 -231
  545. package/.agent/workflows/plan.md +117 -115
  546. package/.agent/workflows/pm2.md +272 -272
  547. package/.agent/workflows/projects.md +39 -39
  548. package/.agent/workflows/promote.md +41 -41
  549. package/.agent/workflows/prompt-optimize.md +23 -38
  550. package/.agent/workflows/prp-commit.md +112 -0
  551. package/.agent/workflows/prp-implement.md +385 -0
  552. package/.agent/workflows/prp-plan.md +502 -0
  553. package/.agent/workflows/prp-pr.md +184 -0
  554. package/.agent/workflows/prp-prd.md +447 -0
  555. package/.agent/workflows/prune.md +31 -31
  556. package/.agent/workflows/python-review.md +297 -297
  557. package/.agent/workflows/quality-gate.md +29 -29
  558. package/.agent/workflows/refactor-clean.md +80 -80
  559. package/.agent/workflows/resume-session.md +156 -156
  560. package/.agent/workflows/rules-distill.md +20 -11
  561. package/.agent/workflows/rust-build.md +187 -187
  562. package/.agent/workflows/rust-review.md +142 -142
  563. package/.agent/workflows/rust-test.md +308 -308
  564. package/.agent/workflows/santa-loop.md +175 -0
  565. package/.agent/workflows/save-session.md +275 -275
  566. package/.agent/workflows/sessions.md +333 -333
  567. package/.agent/workflows/setup-pm.md +80 -80
  568. package/.agent/workflows/skill-create.md +174 -174
  569. package/.agent/workflows/skill-health.md +54 -54
  570. package/.agent/workflows/tdd.md +231 -328
  571. package/.agent/workflows/test-coverage.md +69 -69
  572. package/.agent/workflows/update-codemaps.md +72 -72
  573. package/.agent/workflows/update-docs.md +84 -84
  574. package/.agent/workflows/verify.md +23 -59
  575. package/LICENSE +176 -176
  576. package/README.md +28 -20
  577. package/RELEASE.md +32 -36
  578. package/package.json +87 -79
  579. package/scripts/release-check.js +55 -55
  580. package/src/bin/cli.js +399 -53
  581. package/src/lib/installer.js +360 -114
  582. package/src/lib/manifests/stacks.js +122 -0
  583. package/src/lib/slash-commands.js +28 -0
  584. package/src/templates/claude/CLAUDE.en.md +42 -0
  585. package/src/templates/claude/CLAUDE.md +42 -0
  586. package/src/templates/claude/CLAUDE.vi.md +42 -0
  587. package/src/templates/codex/AGENTS.en.md +40 -0
  588. package/src/templates/codex/AGENTS.md +40 -0
  589. package/src/templates/codex/AGENTS.vi.md +40 -0
  590. package/src/templates/cursor/pilo-masterkit.mdc +20 -0
  591. package/src/templates/gemini/GEMINI.en.md +56 -0
  592. package/src/templates/gemini/GEMINI.md +56 -0
  593. package/src/templates/gemini/GEMINI.vi.md +56 -0
  594. package/src/templates/github/copilot-instructions.md +16 -0
@@ -0,0 +1,401 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Aggregate individual run results into benchmark summary statistics.
4
+
5
+ Reads grading.json files from run directories and produces:
6
+ - run_summary with mean, stddev, min, max for each metric
7
+ - delta between with_skill and without_skill configurations
8
+
9
+ Usage:
10
+ python aggregate_benchmark.py <benchmark_dir>
11
+
12
+ Example:
13
+ python aggregate_benchmark.py benchmarks/2026-01-15T10-30-00/
14
+
15
+ The script supports two directory layouts:
16
+
17
+ Workspace layout (from skill-creator iterations):
18
+ <benchmark_dir>/
19
+ └── eval-N/
20
+ ├── with_skill/
21
+ │ ├── run-1/grading.json
22
+ │ └── run-2/grading.json
23
+ └── without_skill/
24
+ ├── run-1/grading.json
25
+ └── run-2/grading.json
26
+
27
+ Legacy layout (with runs/ subdirectory):
28
+ <benchmark_dir>/
29
+ └── runs/
30
+ └── eval-N/
31
+ ├── with_skill/
32
+ │ └── run-1/grading.json
33
+ └── without_skill/
34
+ └── run-1/grading.json
35
+ """
36
+
37
+ import argparse
38
+ import json
39
+ import math
40
+ import sys
41
+ from datetime import datetime, timezone
42
+ from pathlib import Path
43
+
44
+
45
+ def calculate_stats(values: list[float]) -> dict:
46
+ """Calculate mean, stddev, min, max for a list of values."""
47
+ if not values:
48
+ return {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0}
49
+
50
+ n = len(values)
51
+ mean = sum(values) / n
52
+
53
+ if n > 1:
54
+ variance = sum((x - mean) ** 2 for x in values) / (n - 1)
55
+ stddev = math.sqrt(variance)
56
+ else:
57
+ stddev = 0.0
58
+
59
+ return {
60
+ "mean": round(mean, 4),
61
+ "stddev": round(stddev, 4),
62
+ "min": round(min(values), 4),
63
+ "max": round(max(values), 4)
64
+ }
65
+
66
+
67
+ def load_run_results(benchmark_dir: Path) -> dict:
68
+ """
69
+ Load all run results from a benchmark directory.
70
+
71
+ Returns dict keyed by config name (e.g. "with_skill"/"without_skill",
72
+ or "new_skill"/"old_skill"), each containing a list of run results.
73
+ """
74
+ # Support both layouts: eval dirs directly under benchmark_dir, or under runs/
75
+ runs_dir = benchmark_dir / "runs"
76
+ if runs_dir.exists():
77
+ search_dir = runs_dir
78
+ elif list(benchmark_dir.glob("eval-*")):
79
+ search_dir = benchmark_dir
80
+ else:
81
+ print(f"No eval directories found in {benchmark_dir} or {benchmark_dir / 'runs'}")
82
+ return {}
83
+
84
+ results: dict[str, list] = {}
85
+
86
+ for eval_idx, eval_dir in enumerate(sorted(search_dir.glob("eval-*"))):
87
+ metadata_path = eval_dir / "eval_metadata.json"
88
+ if metadata_path.exists():
89
+ try:
90
+ with open(metadata_path) as mf:
91
+ eval_id = json.load(mf).get("eval_id", eval_idx)
92
+ except (json.JSONDecodeError, OSError):
93
+ eval_id = eval_idx
94
+ else:
95
+ try:
96
+ eval_id = int(eval_dir.name.split("-")[1])
97
+ except ValueError:
98
+ eval_id = eval_idx
99
+
100
+ # Discover config directories dynamically rather than hardcoding names
101
+ for config_dir in sorted(eval_dir.iterdir()):
102
+ if not config_dir.is_dir():
103
+ continue
104
+ # Skip non-config directories (inputs, outputs, etc.)
105
+ if not list(config_dir.glob("run-*")):
106
+ continue
107
+ config = config_dir.name
108
+ if config not in results:
109
+ results[config] = []
110
+
111
+ for run_dir in sorted(config_dir.glob("run-*")):
112
+ run_number = int(run_dir.name.split("-")[1])
113
+ grading_file = run_dir / "grading.json"
114
+
115
+ if not grading_file.exists():
116
+ print(f"Warning: grading.json not found in {run_dir}")
117
+ continue
118
+
119
+ try:
120
+ with open(grading_file) as f:
121
+ grading = json.load(f)
122
+ except json.JSONDecodeError as e:
123
+ print(f"Warning: Invalid JSON in {grading_file}: {e}")
124
+ continue
125
+
126
+ # Extract metrics
127
+ result = {
128
+ "eval_id": eval_id,
129
+ "run_number": run_number,
130
+ "pass_rate": grading.get("summary", {}).get("pass_rate", 0.0),
131
+ "passed": grading.get("summary", {}).get("passed", 0),
132
+ "failed": grading.get("summary", {}).get("failed", 0),
133
+ "total": grading.get("summary", {}).get("total", 0),
134
+ }
135
+
136
+ # Extract timing — check grading.json first, then sibling timing.json
137
+ timing = grading.get("timing", {})
138
+ result["time_seconds"] = timing.get("total_duration_seconds", 0.0)
139
+ timing_file = run_dir / "timing.json"
140
+ if result["time_seconds"] == 0.0 and timing_file.exists():
141
+ try:
142
+ with open(timing_file) as tf:
143
+ timing_data = json.load(tf)
144
+ result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0)
145
+ result["tokens"] = timing_data.get("total_tokens", 0)
146
+ except json.JSONDecodeError:
147
+ pass
148
+
149
+ # Extract metrics if available
150
+ metrics = grading.get("execution_metrics", {})
151
+ result["tool_calls"] = metrics.get("total_tool_calls", 0)
152
+ if not result.get("tokens"):
153
+ result["tokens"] = metrics.get("output_chars", 0)
154
+ result["errors"] = metrics.get("errors_encountered", 0)
155
+
156
+ # Extract expectations — viewer requires fields: text, passed, evidence
157
+ raw_expectations = grading.get("expectations", [])
158
+ for exp in raw_expectations:
159
+ if "text" not in exp or "passed" not in exp:
160
+ print(f"Warning: expectation in {grading_file} missing required fields (text, passed, evidence): {exp}")
161
+ result["expectations"] = raw_expectations
162
+
163
+ # Extract notes from user_notes_summary
164
+ notes_summary = grading.get("user_notes_summary", {})
165
+ notes = []
166
+ notes.extend(notes_summary.get("uncertainties", []))
167
+ notes.extend(notes_summary.get("needs_review", []))
168
+ notes.extend(notes_summary.get("workarounds", []))
169
+ result["notes"] = notes
170
+
171
+ results[config].append(result)
172
+
173
+ return results
174
+
175
+
176
+ def aggregate_results(results: dict) -> dict:
177
+ """
178
+ Aggregate run results into summary statistics.
179
+
180
+ Returns run_summary with stats for each configuration and delta.
181
+ """
182
+ run_summary = {}
183
+ configs = list(results.keys())
184
+
185
+ for config in configs:
186
+ runs = results.get(config, [])
187
+
188
+ if not runs:
189
+ run_summary[config] = {
190
+ "pass_rate": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
191
+ "time_seconds": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
192
+ "tokens": {"mean": 0, "stddev": 0, "min": 0, "max": 0}
193
+ }
194
+ continue
195
+
196
+ pass_rates = [r["pass_rate"] for r in runs]
197
+ times = [r["time_seconds"] for r in runs]
198
+ tokens = [r.get("tokens", 0) for r in runs]
199
+
200
+ run_summary[config] = {
201
+ "pass_rate": calculate_stats(pass_rates),
202
+ "time_seconds": calculate_stats(times),
203
+ "tokens": calculate_stats(tokens)
204
+ }
205
+
206
+ # Calculate delta between the first two configs (if two exist)
207
+ if len(configs) >= 2:
208
+ primary = run_summary.get(configs[0], {})
209
+ baseline = run_summary.get(configs[1], {})
210
+ else:
211
+ primary = run_summary.get(configs[0], {}) if configs else {}
212
+ baseline = {}
213
+
214
+ delta_pass_rate = primary.get("pass_rate", {}).get("mean", 0) - baseline.get("pass_rate", {}).get("mean", 0)
215
+ delta_time = primary.get("time_seconds", {}).get("mean", 0) - baseline.get("time_seconds", {}).get("mean", 0)
216
+ delta_tokens = primary.get("tokens", {}).get("mean", 0) - baseline.get("tokens", {}).get("mean", 0)
217
+
218
+ run_summary["delta"] = {
219
+ "pass_rate": f"{delta_pass_rate:+.2f}",
220
+ "time_seconds": f"{delta_time:+.1f}",
221
+ "tokens": f"{delta_tokens:+.0f}"
222
+ }
223
+
224
+ return run_summary
225
+
226
+
227
+ def generate_benchmark(benchmark_dir: Path, skill_name: str = "", skill_path: str = "") -> dict:
228
+ """
229
+ Generate complete benchmark.json from run results.
230
+ """
231
+ results = load_run_results(benchmark_dir)
232
+ run_summary = aggregate_results(results)
233
+
234
+ # Build runs array for benchmark.json
235
+ runs = []
236
+ for config in results:
237
+ for result in results[config]:
238
+ runs.append({
239
+ "eval_id": result["eval_id"],
240
+ "configuration": config,
241
+ "run_number": result["run_number"],
242
+ "result": {
243
+ "pass_rate": result["pass_rate"],
244
+ "passed": result["passed"],
245
+ "failed": result["failed"],
246
+ "total": result["total"],
247
+ "time_seconds": result["time_seconds"],
248
+ "tokens": result.get("tokens", 0),
249
+ "tool_calls": result.get("tool_calls", 0),
250
+ "errors": result.get("errors", 0)
251
+ },
252
+ "expectations": result["expectations"],
253
+ "notes": result["notes"]
254
+ })
255
+
256
+ # Determine eval IDs from results
257
+ eval_ids = sorted(set(
258
+ r["eval_id"]
259
+ for config in results.values()
260
+ for r in config
261
+ ))
262
+
263
+ benchmark = {
264
+ "metadata": {
265
+ "skill_name": skill_name or "<skill-name>",
266
+ "skill_path": skill_path or "<path/to/skill>",
267
+ "executor_model": "<model-name>",
268
+ "analyzer_model": "<model-name>",
269
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
270
+ "evals_run": eval_ids,
271
+ "runs_per_configuration": 3
272
+ },
273
+ "runs": runs,
274
+ "run_summary": run_summary,
275
+ "notes": [] # To be filled by analyzer
276
+ }
277
+
278
+ return benchmark
279
+
280
+
281
+ def generate_markdown(benchmark: dict) -> str:
282
+ """Generate human-readable benchmark.md from benchmark data."""
283
+ metadata = benchmark["metadata"]
284
+ run_summary = benchmark["run_summary"]
285
+
286
+ # Determine config names (excluding "delta")
287
+ configs = [k for k in run_summary if k != "delta"]
288
+ config_a = configs[0] if len(configs) >= 1 else "config_a"
289
+ config_b = configs[1] if len(configs) >= 2 else "config_b"
290
+ label_a = config_a.replace("_", " ").title()
291
+ label_b = config_b.replace("_", " ").title()
292
+
293
+ lines = [
294
+ f"# Skill Benchmark: {metadata['skill_name']}",
295
+ "",
296
+ f"**Model**: {metadata['executor_model']}",
297
+ f"**Date**: {metadata['timestamp']}",
298
+ f"**Evals**: {', '.join(map(str, metadata['evals_run']))} ({metadata['runs_per_configuration']} runs each per configuration)",
299
+ "",
300
+ "## Summary",
301
+ "",
302
+ f"| Metric | {label_a} | {label_b} | Delta |",
303
+ "|--------|------------|---------------|-------|",
304
+ ]
305
+
306
+ a_summary = run_summary.get(config_a, {})
307
+ b_summary = run_summary.get(config_b, {})
308
+ delta = run_summary.get("delta", {})
309
+
310
+ # Format pass rate
311
+ a_pr = a_summary.get("pass_rate", {})
312
+ b_pr = b_summary.get("pass_rate", {})
313
+ lines.append(f"| Pass Rate | {a_pr.get('mean', 0)*100:.0f}% ± {a_pr.get('stddev', 0)*100:.0f}% | {b_pr.get('mean', 0)*100:.0f}% ± {b_pr.get('stddev', 0)*100:.0f}% | {delta.get('pass_rate', '—')} |")
314
+
315
+ # Format time
316
+ a_time = a_summary.get("time_seconds", {})
317
+ b_time = b_summary.get("time_seconds", {})
318
+ lines.append(f"| Time | {a_time.get('mean', 0):.1f}s ± {a_time.get('stddev', 0):.1f}s | {b_time.get('mean', 0):.1f}s ± {b_time.get('stddev', 0):.1f}s | {delta.get('time_seconds', '—')}s |")
319
+
320
+ # Format tokens
321
+ a_tokens = a_summary.get("tokens", {})
322
+ b_tokens = b_summary.get("tokens", {})
323
+ lines.append(f"| Tokens | {a_tokens.get('mean', 0):.0f} ± {a_tokens.get('stddev', 0):.0f} | {b_tokens.get('mean', 0):.0f} ± {b_tokens.get('stddev', 0):.0f} | {delta.get('tokens', '—')} |")
324
+
325
+ # Notes section
326
+ if benchmark.get("notes"):
327
+ lines.extend([
328
+ "",
329
+ "## Notes",
330
+ ""
331
+ ])
332
+ for note in benchmark["notes"]:
333
+ lines.append(f"- {note}")
334
+
335
+ return "\n".join(lines)
336
+
337
+
338
+ def main():
339
+ parser = argparse.ArgumentParser(
340
+ description="Aggregate benchmark run results into summary statistics"
341
+ )
342
+ parser.add_argument(
343
+ "benchmark_dir",
344
+ type=Path,
345
+ help="Path to the benchmark directory"
346
+ )
347
+ parser.add_argument(
348
+ "--skill-name",
349
+ default="",
350
+ help="Name of the skill being benchmarked"
351
+ )
352
+ parser.add_argument(
353
+ "--skill-path",
354
+ default="",
355
+ help="Path to the skill being benchmarked"
356
+ )
357
+ parser.add_argument(
358
+ "--output", "-o",
359
+ type=Path,
360
+ help="Output path for benchmark.json (default: <benchmark_dir>/benchmark.json)"
361
+ )
362
+
363
+ args = parser.parse_args()
364
+
365
+ if not args.benchmark_dir.exists():
366
+ print(f"Directory not found: {args.benchmark_dir}")
367
+ sys.exit(1)
368
+
369
+ # Generate benchmark
370
+ benchmark = generate_benchmark(args.benchmark_dir, args.skill_name, args.skill_path)
371
+
372
+ # Determine output paths
373
+ output_json = args.output or (args.benchmark_dir / "benchmark.json")
374
+ output_md = output_json.with_suffix(".md")
375
+
376
+ # Write benchmark.json
377
+ with open(output_json, "w") as f:
378
+ json.dump(benchmark, f, indent=2)
379
+ print(f"Generated: {output_json}")
380
+
381
+ # Write benchmark.md
382
+ markdown = generate_markdown(benchmark)
383
+ with open(output_md, "w") as f:
384
+ f.write(markdown)
385
+ print(f"Generated: {output_md}")
386
+
387
+ # Print summary
388
+ run_summary = benchmark["run_summary"]
389
+ configs = [k for k in run_summary if k != "delta"]
390
+ delta = run_summary.get("delta", {})
391
+
392
+ print(f"\nSummary:")
393
+ for config in configs:
394
+ pr = run_summary[config]["pass_rate"]["mean"]
395
+ label = config.replace("_", " ").title()
396
+ print(f" {label}: {pr*100:.1f}% pass rate")
397
+ print(f" Delta: {delta.get('pass_rate', '—')}")
398
+
399
+
400
+ if __name__ == "__main__":
401
+ main()