@raishin/vanguard-frontier-agentic 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (342) hide show
  1. package/.claude-plugin/plugin.json +25 -1
  2. package/.cursor-plugin/plugin.json +25 -1
  3. package/.github/plugin/marketplace.json +1 -1
  4. package/README.md +26 -7
  5. package/agents/marketing/README.md +44 -0
  6. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/AGENT.md +53 -0
  7. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/harnesses/claude-code.agent.md +36 -0
  8. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/harnesses/codex.toml +33 -0
  9. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/harnesses/copilot.agent.md +36 -0
  10. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/harnesses/cursor.agent.md +36 -0
  11. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/harnesses/gemini.agent.md +36 -0
  12. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/harnesses/kiro-cli.agent.json +5 -0
  13. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/harnesses/kiro-ide.agent.md +36 -0
  14. package/agents/marketing/ai-advertising-targeting-fairness-review-agent/metadata.json +31 -0
  15. package/agents/marketing/analytics-data-minimization-review-agent/AGENT.md +51 -0
  16. package/agents/marketing/analytics-data-minimization-review-agent/harnesses/claude-code.agent.md +34 -0
  17. package/agents/marketing/analytics-data-minimization-review-agent/harnesses/codex.toml +33 -0
  18. package/agents/marketing/analytics-data-minimization-review-agent/harnesses/copilot.agent.md +34 -0
  19. package/agents/marketing/analytics-data-minimization-review-agent/harnesses/cursor.agent.md +34 -0
  20. package/agents/marketing/analytics-data-minimization-review-agent/harnesses/gemini.agent.md +34 -0
  21. package/agents/marketing/analytics-data-minimization-review-agent/harnesses/kiro-cli.agent.json +5 -0
  22. package/agents/marketing/analytics-data-minimization-review-agent/harnesses/kiro-ide.agent.md +34 -0
  23. package/agents/marketing/analytics-data-minimization-review-agent/metadata.json +31 -0
  24. package/agents/marketing/email-sender-authentication-review-agent/AGENT.md +50 -0
  25. package/agents/marketing/email-sender-authentication-review-agent/harnesses/claude-code.agent.md +33 -0
  26. package/agents/marketing/email-sender-authentication-review-agent/harnesses/codex.toml +32 -0
  27. package/agents/marketing/email-sender-authentication-review-agent/harnesses/copilot.agent.md +33 -0
  28. package/agents/marketing/email-sender-authentication-review-agent/harnesses/cursor.agent.md +33 -0
  29. package/agents/marketing/email-sender-authentication-review-agent/harnesses/gemini.agent.md +33 -0
  30. package/agents/marketing/email-sender-authentication-review-agent/harnesses/kiro-cli.agent.json +5 -0
  31. package/agents/marketing/email-sender-authentication-review-agent/harnesses/kiro-ide.agent.md +33 -0
  32. package/agents/marketing/email-sender-authentication-review-agent/metadata.json +31 -0
  33. package/agents/marketing/eu-ai-act-marketing-system-review-agent/AGENT.md +54 -0
  34. package/agents/marketing/eu-ai-act-marketing-system-review-agent/harnesses/claude-code.agent.md +37 -0
  35. package/agents/marketing/eu-ai-act-marketing-system-review-agent/harnesses/codex.toml +33 -0
  36. package/agents/marketing/eu-ai-act-marketing-system-review-agent/harnesses/copilot.agent.md +37 -0
  37. package/agents/marketing/eu-ai-act-marketing-system-review-agent/harnesses/cursor.agent.md +37 -0
  38. package/agents/marketing/eu-ai-act-marketing-system-review-agent/harnesses/gemini.agent.md +37 -0
  39. package/agents/marketing/eu-ai-act-marketing-system-review-agent/harnesses/kiro-cli.agent.json +5 -0
  40. package/agents/marketing/eu-ai-act-marketing-system-review-agent/harnesses/kiro-ide.agent.md +37 -0
  41. package/agents/marketing/eu-ai-act-marketing-system-review-agent/metadata.json +31 -0
  42. package/agents/marketing/influencer-disclosure-compliance-review-agent/AGENT.md +52 -0
  43. package/agents/marketing/influencer-disclosure-compliance-review-agent/harnesses/claude-code.agent.md +35 -0
  44. package/agents/marketing/influencer-disclosure-compliance-review-agent/harnesses/codex.toml +33 -0
  45. package/agents/marketing/influencer-disclosure-compliance-review-agent/harnesses/copilot.agent.md +35 -0
  46. package/agents/marketing/influencer-disclosure-compliance-review-agent/harnesses/cursor.agent.md +35 -0
  47. package/agents/marketing/influencer-disclosure-compliance-review-agent/harnesses/gemini.agent.md +35 -0
  48. package/agents/marketing/influencer-disclosure-compliance-review-agent/harnesses/kiro-cli.agent.json +5 -0
  49. package/agents/marketing/influencer-disclosure-compliance-review-agent/harnesses/kiro-ide.agent.md +35 -0
  50. package/agents/marketing/influencer-disclosure-compliance-review-agent/metadata.json +31 -0
  51. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/AGENT.md +54 -0
  52. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/harnesses/claude-code.agent.md +37 -0
  53. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/harnesses/codex.toml +34 -0
  54. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/harnesses/copilot.agent.md +37 -0
  55. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/harnesses/cursor.agent.md +37 -0
  56. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/harnesses/gemini.agent.md +37 -0
  57. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/harnesses/kiro-cli.agent.json +5 -0
  58. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/harnesses/kiro-ide.agent.md +37 -0
  59. package/agents/marketing/lookalike-audience-upload-compliance-review-agent/metadata.json +31 -0
  60. package/agents/marketing/marketing-consent-data-collection-review-agent/AGENT.md +51 -0
  61. package/agents/marketing/marketing-consent-data-collection-review-agent/harnesses/claude-code.agent.md +34 -0
  62. package/agents/marketing/marketing-consent-data-collection-review-agent/harnesses/codex.toml +33 -0
  63. package/agents/marketing/marketing-consent-data-collection-review-agent/harnesses/copilot.agent.md +34 -0
  64. package/agents/marketing/marketing-consent-data-collection-review-agent/harnesses/cursor.agent.md +34 -0
  65. package/agents/marketing/marketing-consent-data-collection-review-agent/harnesses/gemini.agent.md +34 -0
  66. package/agents/marketing/marketing-consent-data-collection-review-agent/harnesses/kiro-cli.agent.json +5 -0
  67. package/agents/marketing/marketing-consent-data-collection-review-agent/harnesses/kiro-ide.agent.md +34 -0
  68. package/agents/marketing/marketing-consent-data-collection-review-agent/metadata.json +31 -0
  69. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/AGENT.md +51 -0
  70. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/harnesses/claude-code.agent.md +34 -0
  71. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/harnesses/codex.toml +33 -0
  72. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/harnesses/copilot.agent.md +34 -0
  73. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/harnesses/cursor.agent.md +34 -0
  74. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/harnesses/gemini.agent.md +34 -0
  75. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/harnesses/kiro-cli.agent.json +5 -0
  76. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/harnesses/kiro-ide.agent.md +34 -0
  77. package/agents/marketing/marketing-conversion-flow-dark-pattern-review-agent/metadata.json +31 -0
  78. package/agents/marketing/marketing-email-list-retention-review-agent/AGENT.md +50 -0
  79. package/agents/marketing/marketing-email-list-retention-review-agent/harnesses/claude-code.agent.md +33 -0
  80. package/agents/marketing/marketing-email-list-retention-review-agent/harnesses/codex.toml +32 -0
  81. package/agents/marketing/marketing-email-list-retention-review-agent/harnesses/copilot.agent.md +33 -0
  82. package/agents/marketing/marketing-email-list-retention-review-agent/harnesses/cursor.agent.md +33 -0
  83. package/agents/marketing/marketing-email-list-retention-review-agent/harnesses/gemini.agent.md +33 -0
  84. package/agents/marketing/marketing-email-list-retention-review-agent/harnesses/kiro-cli.agent.json +5 -0
  85. package/agents/marketing/marketing-email-list-retention-review-agent/harnesses/kiro-ide.agent.md +33 -0
  86. package/agents/marketing/marketing-email-list-retention-review-agent/metadata.json +31 -0
  87. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/AGENT.md +50 -0
  88. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/harnesses/claude-code.agent.md +33 -0
  89. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/harnesses/codex.toml +32 -0
  90. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/harnesses/copilot.agent.md +33 -0
  91. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/harnesses/cursor.agent.md +33 -0
  92. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/harnesses/gemini.agent.md +33 -0
  93. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/harnesses/kiro-cli.agent.json +5 -0
  94. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/harnesses/kiro-ide.agent.md +33 -0
  95. package/agents/marketing/marketing-gpc-signal-honoring-review-agent/metadata.json +31 -0
  96. package/agents/marketing/marketing-maestro-agent/AGENT.md +62 -0
  97. package/agents/marketing/marketing-maestro-agent/PERMISSIONS.md +75 -0
  98. package/agents/marketing/marketing-maestro-agent/README.md +62 -0
  99. package/agents/marketing/marketing-maestro-agent/harnesses/claude-code.agent.md +43 -0
  100. package/agents/marketing/marketing-maestro-agent/harnesses/codex.toml +35 -0
  101. package/agents/marketing/marketing-maestro-agent/harnesses/copilot.agent.md +43 -0
  102. package/agents/marketing/marketing-maestro-agent/harnesses/cursor.agent.md +43 -0
  103. package/agents/marketing/marketing-maestro-agent/harnesses/gemini.agent.md +43 -0
  104. package/agents/marketing/marketing-maestro-agent/harnesses/kiro-cli.agent.json +5 -0
  105. package/agents/marketing/marketing-maestro-agent/harnesses/kiro-ide.agent.md +43 -0
  106. package/agents/marketing/marketing-maestro-agent/metadata.json +38 -0
  107. package/agents/marketing/marketing-pixel-data-leakage-review-agent/AGENT.md +50 -0
  108. package/agents/marketing/marketing-pixel-data-leakage-review-agent/harnesses/claude-code.agent.md +33 -0
  109. package/agents/marketing/marketing-pixel-data-leakage-review-agent/harnesses/codex.toml +32 -0
  110. package/agents/marketing/marketing-pixel-data-leakage-review-agent/harnesses/copilot.agent.md +33 -0
  111. package/agents/marketing/marketing-pixel-data-leakage-review-agent/harnesses/cursor.agent.md +33 -0
  112. package/agents/marketing/marketing-pixel-data-leakage-review-agent/harnesses/gemini.agent.md +33 -0
  113. package/agents/marketing/marketing-pixel-data-leakage-review-agent/harnesses/kiro-cli.agent.json +5 -0
  114. package/agents/marketing/marketing-pixel-data-leakage-review-agent/harnesses/kiro-ide.agent.md +33 -0
  115. package/agents/marketing/marketing-pixel-data-leakage-review-agent/metadata.json +31 -0
  116. package/agents/marketing/martech-access-governance-review-agent/AGENT.md +51 -0
  117. package/agents/marketing/martech-access-governance-review-agent/harnesses/claude-code.agent.md +34 -0
  118. package/agents/marketing/martech-access-governance-review-agent/harnesses/codex.toml +33 -0
  119. package/agents/marketing/martech-access-governance-review-agent/harnesses/copilot.agent.md +34 -0
  120. package/agents/marketing/martech-access-governance-review-agent/harnesses/cursor.agent.md +34 -0
  121. package/agents/marketing/martech-access-governance-review-agent/harnesses/gemini.agent.md +34 -0
  122. package/agents/marketing/martech-access-governance-review-agent/harnesses/kiro-cli.agent.json +5 -0
  123. package/agents/marketing/martech-access-governance-review-agent/harnesses/kiro-ide.agent.md +34 -0
  124. package/agents/marketing/martech-access-governance-review-agent/metadata.json +31 -0
  125. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/AGENT.md +50 -0
  126. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/harnesses/claude-code.agent.md +33 -0
  127. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/harnesses/codex.toml +32 -0
  128. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/harnesses/copilot.agent.md +33 -0
  129. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/harnesses/cursor.agent.md +33 -0
  130. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/harnesses/gemini.agent.md +33 -0
  131. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/harnesses/kiro-cli.agent.json +5 -0
  132. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/harnesses/kiro-ide.agent.md +33 -0
  133. package/agents/marketing/programmatic-supply-chain-integrity-review-agent/metadata.json +31 -0
  134. package/agents/qa/README.md +51 -0
  135. package/agents/qa/ci-test-pipeline-review-agent/AGENT.md +51 -0
  136. package/agents/qa/ci-test-pipeline-review-agent/harnesses/claude-code.agent.md +35 -0
  137. package/agents/qa/ci-test-pipeline-review-agent/harnesses/codex.toml +34 -0
  138. package/agents/qa/ci-test-pipeline-review-agent/harnesses/copilot.agent.md +35 -0
  139. package/agents/qa/ci-test-pipeline-review-agent/harnesses/cursor.agent.md +35 -0
  140. package/agents/qa/ci-test-pipeline-review-agent/harnesses/gemini.agent.md +35 -0
  141. package/agents/qa/ci-test-pipeline-review-agent/harnesses/kiro-cli.agent.json +5 -0
  142. package/agents/qa/ci-test-pipeline-review-agent/harnesses/kiro-ide.agent.md +35 -0
  143. package/agents/qa/ci-test-pipeline-review-agent/metadata.json +33 -0
  144. package/agents/qa/helm-chart-quality-review-agent/AGENT.md +56 -0
  145. package/agents/qa/helm-chart-quality-review-agent/harnesses/claude-code.agent.md +40 -0
  146. package/agents/qa/helm-chart-quality-review-agent/harnesses/codex.toml +39 -0
  147. package/agents/qa/helm-chart-quality-review-agent/harnesses/copilot.agent.md +40 -0
  148. package/agents/qa/helm-chart-quality-review-agent/harnesses/cursor.agent.md +40 -0
  149. package/agents/qa/helm-chart-quality-review-agent/harnesses/gemini.agent.md +40 -0
  150. package/agents/qa/helm-chart-quality-review-agent/harnesses/kiro-cli.agent.json +5 -0
  151. package/agents/qa/helm-chart-quality-review-agent/harnesses/kiro-ide.agent.md +40 -0
  152. package/agents/qa/helm-chart-quality-review-agent/metadata.json +35 -0
  153. package/agents/qa/kubernetes-manifest-quality-review-agent/AGENT.md +55 -0
  154. package/agents/qa/kubernetes-manifest-quality-review-agent/harnesses/claude-code.agent.md +32 -0
  155. package/agents/qa/kubernetes-manifest-quality-review-agent/harnesses/codex.toml +38 -0
  156. package/agents/qa/kubernetes-manifest-quality-review-agent/harnesses/copilot.agent.md +32 -0
  157. package/agents/qa/kubernetes-manifest-quality-review-agent/harnesses/cursor.agent.md +32 -0
  158. package/agents/qa/kubernetes-manifest-quality-review-agent/harnesses/gemini.agent.md +32 -0
  159. package/agents/qa/kubernetes-manifest-quality-review-agent/harnesses/kiro-cli.agent.json +5 -0
  160. package/agents/qa/kubernetes-manifest-quality-review-agent/harnesses/kiro-ide.agent.md +32 -0
  161. package/agents/qa/kubernetes-manifest-quality-review-agent/metadata.json +35 -0
  162. package/agents/qa/llm-ai-pipeline-test-review-agent/AGENT.md +52 -0
  163. package/agents/qa/llm-ai-pipeline-test-review-agent/harnesses/claude-code.agent.md +36 -0
  164. package/agents/qa/llm-ai-pipeline-test-review-agent/harnesses/codex.toml +36 -0
  165. package/agents/qa/llm-ai-pipeline-test-review-agent/harnesses/copilot.agent.md +36 -0
  166. package/agents/qa/llm-ai-pipeline-test-review-agent/harnesses/cursor.agent.md +36 -0
  167. package/agents/qa/llm-ai-pipeline-test-review-agent/harnesses/gemini.agent.md +36 -0
  168. package/agents/qa/llm-ai-pipeline-test-review-agent/harnesses/kiro-cli.agent.json +5 -0
  169. package/agents/qa/llm-ai-pipeline-test-review-agent/harnesses/kiro-ide.agent.md +36 -0
  170. package/agents/qa/llm-ai-pipeline-test-review-agent/metadata.json +35 -0
  171. package/agents/qa/playwright-e2e-execution-run-agent/AGENT.md +50 -0
  172. package/agents/qa/playwright-e2e-execution-run-agent/harnesses/claude-code.agent.md +39 -0
  173. package/agents/qa/playwright-e2e-execution-run-agent/harnesses/cursor.agent.md +39 -0
  174. package/agents/qa/playwright-e2e-execution-run-agent/metadata.json +28 -0
  175. package/agents/qa/playwright-e2e-suite-review-agent/AGENT.md +51 -0
  176. package/agents/qa/playwright-e2e-suite-review-agent/harnesses/claude-code.agent.md +35 -0
  177. package/agents/qa/playwright-e2e-suite-review-agent/harnesses/codex.toml +34 -0
  178. package/agents/qa/playwright-e2e-suite-review-agent/harnesses/copilot.agent.md +35 -0
  179. package/agents/qa/playwright-e2e-suite-review-agent/harnesses/cursor.agent.md +35 -0
  180. package/agents/qa/playwright-e2e-suite-review-agent/harnesses/gemini.agent.md +35 -0
  181. package/agents/qa/playwright-e2e-suite-review-agent/harnesses/kiro-cli.agent.json +5 -0
  182. package/agents/qa/playwright-e2e-suite-review-agent/harnesses/kiro-ide.agent.md +35 -0
  183. package/agents/qa/playwright-e2e-suite-review-agent/metadata.json +35 -0
  184. package/agents/qa/plc-control-logic-safety-review-agent/AGENT.md +53 -0
  185. package/agents/qa/plc-control-logic-safety-review-agent/harnesses/claude-code.agent.md +37 -0
  186. package/agents/qa/plc-control-logic-safety-review-agent/harnesses/codex.toml +36 -0
  187. package/agents/qa/plc-control-logic-safety-review-agent/harnesses/copilot.agent.md +37 -0
  188. package/agents/qa/plc-control-logic-safety-review-agent/harnesses/cursor.agent.md +37 -0
  189. package/agents/qa/plc-control-logic-safety-review-agent/harnesses/gemini.agent.md +37 -0
  190. package/agents/qa/plc-control-logic-safety-review-agent/harnesses/kiro-cli.agent.json +5 -0
  191. package/agents/qa/plc-control-logic-safety-review-agent/harnesses/kiro-ide.agent.md +37 -0
  192. package/agents/qa/plc-control-logic-safety-review-agent/metadata.json +33 -0
  193. package/agents/qa/rpa-workflow-resilience-review-agent/AGENT.md +52 -0
  194. package/agents/qa/rpa-workflow-resilience-review-agent/harnesses/claude-code.agent.md +36 -0
  195. package/agents/qa/rpa-workflow-resilience-review-agent/harnesses/codex.toml +35 -0
  196. package/agents/qa/rpa-workflow-resilience-review-agent/harnesses/copilot.agent.md +36 -0
  197. package/agents/qa/rpa-workflow-resilience-review-agent/harnesses/cursor.agent.md +36 -0
  198. package/agents/qa/rpa-workflow-resilience-review-agent/harnesses/gemini.agent.md +36 -0
  199. package/agents/qa/rpa-workflow-resilience-review-agent/harnesses/kiro-cli.agent.json +5 -0
  200. package/agents/qa/rpa-workflow-resilience-review-agent/harnesses/kiro-ide.agent.md +36 -0
  201. package/agents/qa/rpa-workflow-resilience-review-agent/metadata.json +34 -0
  202. package/agents/qa/test-coverage-quality-review-agent/AGENT.md +50 -0
  203. package/agents/qa/test-coverage-quality-review-agent/harnesses/claude-code.agent.md +34 -0
  204. package/agents/qa/test-coverage-quality-review-agent/harnesses/codex.toml +33 -0
  205. package/agents/qa/test-coverage-quality-review-agent/harnesses/copilot.agent.md +34 -0
  206. package/agents/qa/test-coverage-quality-review-agent/harnesses/cursor.agent.md +34 -0
  207. package/agents/qa/test-coverage-quality-review-agent/harnesses/gemini.agent.md +34 -0
  208. package/agents/qa/test-coverage-quality-review-agent/harnesses/kiro-cli.agent.json +5 -0
  209. package/agents/qa/test-coverage-quality-review-agent/harnesses/kiro-ide.agent.md +34 -0
  210. package/agents/qa/test-coverage-quality-review-agent/metadata.json +33 -0
  211. package/agents/qa/test-flakiness-triage-agent/AGENT.md +52 -0
  212. package/agents/qa/test-flakiness-triage-agent/harnesses/claude-code.agent.md +36 -0
  213. package/agents/qa/test-flakiness-triage-agent/harnesses/codex.toml +33 -0
  214. package/agents/qa/test-flakiness-triage-agent/harnesses/copilot.agent.md +36 -0
  215. package/agents/qa/test-flakiness-triage-agent/harnesses/cursor.agent.md +36 -0
  216. package/agents/qa/test-flakiness-triage-agent/harnesses/gemini.agent.md +36 -0
  217. package/agents/qa/test-flakiness-triage-agent/harnesses/kiro-cli.agent.json +5 -0
  218. package/agents/qa/test-flakiness-triage-agent/harnesses/kiro-ide.agent.md +36 -0
  219. package/agents/qa/test-flakiness-triage-agent/metadata.json +33 -0
  220. package/catalog/agents.json +1047 -197
  221. package/catalog/asset-integrity.json +2950 -1675
  222. package/catalog/install-roles.json +65 -1
  223. package/catalog/skill-manifest.json +538 -0
  224. package/catalog/skills.json +685 -0
  225. package/package.json +5 -2
  226. package/plugins/vanguard-frontier-agentic/.codex-plugin/plugin.json +1 -1
  227. package/scripts/generate-readme-counts.mjs +162 -0
  228. package/skills/marketing/ai-advertising-targeting-fairness-review/SKILL.md +43 -0
  229. package/skills/marketing/ai-advertising-targeting-fairness-review/metadata.json +21 -0
  230. package/skills/marketing/ai-advertising-targeting-fairness-review/references/workflow-and-output.md +150 -0
  231. package/skills/marketing/analytics-data-minimization-review/SKILL.md +44 -0
  232. package/skills/marketing/analytics-data-minimization-review/metadata.json +22 -0
  233. package/skills/marketing/analytics-data-minimization-review/references/workflow-and-output.md +187 -0
  234. package/skills/marketing/email-sender-authentication-review/SKILL.md +43 -0
  235. package/skills/marketing/email-sender-authentication-review/metadata.json +22 -0
  236. package/skills/marketing/email-sender-authentication-review/references/workflow-and-output.md +152 -0
  237. package/skills/marketing/eu-ai-act-marketing-system-review/SKILL.md +43 -0
  238. package/skills/marketing/eu-ai-act-marketing-system-review/metadata.json +21 -0
  239. package/skills/marketing/eu-ai-act-marketing-system-review/references/workflow-and-output.md +176 -0
  240. package/skills/marketing/influencer-disclosure-compliance-review/SKILL.md +43 -0
  241. package/skills/marketing/influencer-disclosure-compliance-review/metadata.json +22 -0
  242. package/skills/marketing/influencer-disclosure-compliance-review/references/workflow-and-output.md +156 -0
  243. package/skills/marketing/lookalike-audience-upload-compliance-review/SKILL.md +44 -0
  244. package/skills/marketing/lookalike-audience-upload-compliance-review/metadata.json +21 -0
  245. package/skills/marketing/lookalike-audience-upload-compliance-review/references/workflow-and-output.md +203 -0
  246. package/skills/marketing/marketing-consent-data-collection-review/SKILL.md +44 -0
  247. package/skills/marketing/marketing-consent-data-collection-review/metadata.json +21 -0
  248. package/skills/marketing/marketing-consent-data-collection-review/references/workflow-and-output.md +139 -0
  249. package/skills/marketing/marketing-conversion-flow-dark-pattern-review/SKILL.md +45 -0
  250. package/skills/marketing/marketing-conversion-flow-dark-pattern-review/metadata.json +22 -0
  251. package/skills/marketing/marketing-conversion-flow-dark-pattern-review/references/workflow-and-output.md +160 -0
  252. package/skills/marketing/marketing-email-list-retention-review/SKILL.md +43 -0
  253. package/skills/marketing/marketing-email-list-retention-review/metadata.json +22 -0
  254. package/skills/marketing/marketing-email-list-retention-review/references/workflow-and-output.md +144 -0
  255. package/skills/marketing/marketing-gpc-signal-honoring-review/SKILL.md +42 -0
  256. package/skills/marketing/marketing-gpc-signal-honoring-review/metadata.json +22 -0
  257. package/skills/marketing/marketing-gpc-signal-honoring-review/references/workflow-and-output.md +145 -0
  258. package/skills/marketing/marketing-maestro/README.md +37 -0
  259. package/skills/marketing/marketing-maestro/SKILL.md +49 -0
  260. package/skills/marketing/marketing-maestro/metadata.json +26 -0
  261. package/skills/marketing/marketing-maestro/references/safety-checklist.md +67 -0
  262. package/skills/marketing/marketing-maestro/references/workflow-and-output.md +110 -0
  263. package/skills/marketing/marketing-pixel-data-leakage-review/SKILL.md +43 -0
  264. package/skills/marketing/marketing-pixel-data-leakage-review/metadata.json +21 -0
  265. package/skills/marketing/marketing-pixel-data-leakage-review/references/workflow-and-output.md +129 -0
  266. package/skills/marketing/martech-access-governance-review/SKILL.md +45 -0
  267. package/skills/marketing/martech-access-governance-review/metadata.json +21 -0
  268. package/skills/marketing/martech-access-governance-review/references/workflow-and-output.md +116 -0
  269. package/skills/marketing/programmatic-supply-chain-integrity-review/SKILL.md +43 -0
  270. package/skills/marketing/programmatic-supply-chain-integrity-review/metadata.json +22 -0
  271. package/skills/marketing/programmatic-supply-chain-integrity-review/references/workflow-and-output.md +164 -0
  272. package/skills/qa/ci-test-pipeline-review/SKILL.md +45 -0
  273. package/skills/qa/ci-test-pipeline-review/metadata.json +21 -0
  274. package/skills/qa/ci-test-pipeline-review/references/workflow-and-output.md +124 -0
  275. package/skills/qa/helm-chart-quality-review/SKILL.md +61 -0
  276. package/skills/qa/helm-chart-quality-review/metadata.json +23 -0
  277. package/skills/qa/helm-chart-quality-review/references/workflow-and-output.md +174 -0
  278. package/skills/qa/kubernetes-manifest-quality-review/SKILL.md +92 -0
  279. package/skills/qa/kubernetes-manifest-quality-review/metadata.json +23 -0
  280. package/skills/qa/kubernetes-manifest-quality-review/references/workflow-and-output.md +246 -0
  281. package/skills/qa/llm-ai-pipeline-test-review/SKILL.md +52 -0
  282. package/skills/qa/llm-ai-pipeline-test-review/metadata.json +23 -0
  283. package/skills/qa/llm-ai-pipeline-test-review/references/workflow-and-output.md +221 -0
  284. package/skills/qa/playwright-e2e-execution-run/SKILL.md +54 -0
  285. package/skills/qa/playwright-e2e-execution-run/metadata.json +24 -0
  286. package/skills/qa/playwright-e2e-execution-run/references/workflow-and-output.md +133 -0
  287. package/skills/qa/playwright-e2e-suite-review/SKILL.md +44 -0
  288. package/skills/qa/playwright-e2e-suite-review/metadata.json +23 -0
  289. package/skills/qa/playwright-e2e-suite-review/references/workflow-and-output.md +176 -0
  290. package/skills/qa/plc-control-logic-safety-review/SKILL.md +47 -0
  291. package/skills/qa/plc-control-logic-safety-review/metadata.json +21 -0
  292. package/skills/qa/plc-control-logic-safety-review/references/workflow-and-output.md +231 -0
  293. package/skills/qa/rpa-workflow-resilience-review/SKILL.md +47 -0
  294. package/skills/qa/rpa-workflow-resilience-review/metadata.json +22 -0
  295. package/skills/qa/rpa-workflow-resilience-review/references/workflow-and-output.md +210 -0
  296. package/skills/qa/test-coverage-quality-review/SKILL.md +44 -0
  297. package/skills/qa/test-coverage-quality-review/metadata.json +21 -0
  298. package/skills/qa/test-coverage-quality-review/references/workflow-and-output.md +139 -0
  299. package/skills/qa/test-flakiness-triage/SKILL.md +43 -0
  300. package/skills/qa/test-flakiness-triage/metadata.json +21 -0
  301. package/skills/qa/test-flakiness-triage/references/workflow-and-output.md +114 -0
  302. package/tests/eval-qa-cluster.mjs +111 -0
  303. package/tests/fixtures/marketing-maestro-routing/expected/001-happy-ai-advertising-targeting-fairness-review.json +6 -0
  304. package/tests/fixtures/marketing-maestro-routing/expected/002-happy-analytics-data-minimization-review.json +6 -0
  305. package/tests/fixtures/marketing-maestro-routing/expected/003-happy-consent-data-collection-review.json +6 -0
  306. package/tests/fixtures/marketing-maestro-routing/expected/004-happy-conversion-flow-dark-pattern-review.json +6 -0
  307. package/tests/fixtures/marketing-maestro-routing/expected/005-happy-email-list-retention-review.json +6 -0
  308. package/tests/fixtures/marketing-maestro-routing/expected/006-happy-email-sender-authentication-review.json +6 -0
  309. package/tests/fixtures/marketing-maestro-routing/expected/007-happy-eu-ai-act-marketing-system-review.json +6 -0
  310. package/tests/fixtures/marketing-maestro-routing/expected/008-happy-gpc-signal-honoring-review.json +6 -0
  311. package/tests/fixtures/marketing-maestro-routing/expected/009-happy-influencer-disclosure-compliance-review.json +6 -0
  312. package/tests/fixtures/marketing-maestro-routing/expected/010-happy-lookalike-audience-upload-compliance-review.json +6 -0
  313. package/tests/fixtures/marketing-maestro-routing/expected/011-happy-martech-access-governance-review.json +6 -0
  314. package/tests/fixtures/marketing-maestro-routing/expected/012-happy-pixel-data-leakage-review.json +6 -0
  315. package/tests/fixtures/marketing-maestro-routing/expected/013-happy-programmatic-supply-chain-integrity-review.json +6 -0
  316. package/tests/fixtures/marketing-maestro-routing/expected/adv-ambiguous.json +4 -0
  317. package/tests/fixtures/marketing-maestro-routing/expected/adv-instruction-injection.json +7 -0
  318. package/tests/fixtures/marketing-maestro-routing/expected/adv-live-guard-gate.json +4 -0
  319. package/tests/fixtures/marketing-maestro-routing/expected/adv-persona-replacement.json +6 -0
  320. package/tests/fixtures/marketing-maestro-routing/expected/adv-secrets-bait.json +7 -0
  321. package/tests/fixtures/marketing-maestro-routing/inputs/001-happy-ai-advertising-targeting-fairness-review.json +7 -0
  322. package/tests/fixtures/marketing-maestro-routing/inputs/002-happy-analytics-data-minimization-review.json +7 -0
  323. package/tests/fixtures/marketing-maestro-routing/inputs/003-happy-consent-data-collection-review.json +7 -0
  324. package/tests/fixtures/marketing-maestro-routing/inputs/004-happy-conversion-flow-dark-pattern-review.json +7 -0
  325. package/tests/fixtures/marketing-maestro-routing/inputs/005-happy-email-list-retention-review.json +7 -0
  326. package/tests/fixtures/marketing-maestro-routing/inputs/006-happy-email-sender-authentication-review.json +7 -0
  327. package/tests/fixtures/marketing-maestro-routing/inputs/007-happy-eu-ai-act-marketing-system-review.json +7 -0
  328. package/tests/fixtures/marketing-maestro-routing/inputs/008-happy-gpc-signal-honoring-review.json +7 -0
  329. package/tests/fixtures/marketing-maestro-routing/inputs/009-happy-influencer-disclosure-compliance-review.json +7 -0
  330. package/tests/fixtures/marketing-maestro-routing/inputs/010-happy-lookalike-audience-upload-compliance-review.json +7 -0
  331. package/tests/fixtures/marketing-maestro-routing/inputs/011-happy-martech-access-governance-review.json +7 -0
  332. package/tests/fixtures/marketing-maestro-routing/inputs/012-happy-pixel-data-leakage-review.json +7 -0
  333. package/tests/fixtures/marketing-maestro-routing/inputs/013-happy-programmatic-supply-chain-integrity-review.json +7 -0
  334. package/tests/fixtures/marketing-maestro-routing/inputs/adv-ambiguous.json +7 -0
  335. package/tests/fixtures/marketing-maestro-routing/inputs/adv-instruction-injection.json +7 -0
  336. package/tests/fixtures/marketing-maestro-routing/inputs/adv-live-guard-gate.json +7 -0
  337. package/tests/fixtures/marketing-maestro-routing/inputs/adv-persona-replacement.json +7 -0
  338. package/tests/fixtures/marketing-maestro-routing/inputs/adv-secrets-bait.json +7 -0
  339. package/tests/fixtures/marketing-maestro-routing/taxonomy.json +183 -0
  340. package/tests/validate-catalog.py +1 -0
  341. package/tests/validate-maestro-routing.py +4 -0
  342. package/tests/validate-readme-counts.mjs +179 -0
@@ -0,0 +1,139 @@
1
+ # Workflow and Output Contract
2
+
3
+ ## Workflow
4
+
5
+ ### Step 1 — Collect inputs
6
+
7
+ Ask the user to provide one or more of the following as sanitized snippets (no credentials, no real customer data in fixtures, no production DB snapshots):
8
+ - Test files for the modules under review
9
+ - A coverage report (text summary, `lcov`, or HTML summary numbers)
10
+ - The source of the code under test, so assertions can be checked against actual behavior
11
+ - The CI coverage-gate configuration (threshold, scope)
12
+
13
+ If only a coverage report is supplied without test source, the review can only flag *suspiciously high coverage* and cannot judge assertion quality — say so and ask for test source.
14
+
15
+ ### Step 2 — Assertion presence audit
16
+
17
+ For every test, confirm it makes at least one assertion that can fail.
18
+
19
+ ```js
20
+ // HIGH — no assertion; passes as long as nothing throws
21
+ test('processes the order', async () => {
22
+ await processOrder(sampleOrder);
23
+ });
24
+
25
+ // CORRECT — asserts the observable outcome
26
+ test('processes the order', async () => {
27
+ const result = await processOrder(sampleOrder);
28
+ expect(result.status).toBe('confirmed');
29
+ expect(result.total).toBe(149.97);
30
+ });
31
+ ```
32
+ Flag every assertion-free test as HIGH. These are pure coverage inflation: the lines execute, the percentage rises, nothing is verified.
33
+
34
+ ### Step 3 — Assertion strength audit
35
+
36
+ Grade each assertion.
37
+
38
+ | Assertion pattern | Grade | Note |
39
+ |---|---|---|
40
+ | exact value (`toBe(149.97)`, `toEqual({...})`) | strong | fails on any wrong value |
41
+ | `toBeDefined()`, `not.toBeNull()`, `toBeTruthy()` | weak (MEDIUM) | passes for wrong values; use when an exact value is unknowable only |
42
+ | `toBeGreaterThan(0)`, `length > 0` | weak (MEDIUM) | passes for `[wrong, wrong]` |
43
+ | `expect(true).toBe(true)`, `expect(x).toBe(x)` | tautological (HIGH) | cannot fail |
44
+ | auto-updated snapshot of logic output | weak (MEDIUM) | detects change, not correctness |
45
+
46
+ Flag tautological assertions as HIGH and shape-only assertions (where an exact value is knowable) as MEDIUM.
47
+
48
+ ### Step 4 — Mock usage audit
49
+
50
+ Review tests that use mocks, stubs, or spies.
51
+
52
+ **4a. Call-assertion-only**
53
+ ```js
54
+ // HIGH — asserts the mock was called, never the behavior
55
+ test('sends the email', async () => {
56
+ await notifyUser(user);
57
+ expect(emailService.send).toHaveBeenCalled();
58
+ });
59
+ ```
60
+ This verifies wiring, not outcome. It should also assert *what* was sent and the result the caller depends on.
61
+
62
+ **4b. Over-mocking**
63
+ ```js
64
+ // HIGH — every collaborator mocked; assertions restate the setup
65
+ const repo = { find: jest.fn().mockReturnValue({ id: 1, price: 10 }) };
66
+ const tax = { calc: jest.fn().mockReturnValue(2) };
67
+ test('total', () => {
68
+ const t = new Cart(repo, tax).total(1);
69
+ expect(t).toBe(12); // 10 + 2 — but both came from mocks
70
+ });
71
+ ```
72
+ When every input is mocked and the assertion is arithmetic over mock return values, the test is a mirror of its own setup. It cannot catch a bug in `find`, `calc`, or their integration. Recommend testing the real collaboration, or moving the guarantee to an integration test.
73
+
74
+ ### Step 5 — Branch coverage gap audit
75
+
76
+ For code under test that contains error paths, boundaries, or empty-input branches, confirm each has a test.
77
+
78
+ - Error path (`catch`, thrown error, rejected promise) with no test → HIGH.
79
+ - Boundary (zero, empty array, max value, off-by-one) with no test → HIGH.
80
+ - Only the happy path tested while the source has multiple branches → HIGH: the coverage percentage is inflated by the easy path while real failure modes ship untested.
81
+
82
+ Distinguish *line* coverage from *branch* coverage in the report — a function can be 100% line-covered with half its branches unexercised.
83
+
84
+ ### Step 6 — Coverage gate audit
85
+
86
+ Review the CI coverage gate.
87
+
88
+ - A global line-percentage threshold as the sole quality signal → MEDIUM. It is satisfied by assertion-free tests; the number does not measure verification.
89
+ - No per-changed-file / diff coverage → MEDIUM: new untested code hides behind a large tested codebase. Recommend diff coverage on changed lines.
90
+ - 100% coverage presented as the goal → MEDIUM: it incentivizes theater. The goal is meaningful assertions on behavior that matters.
91
+ - Recommended posture: keep a modest line threshold as a floor, **add diff coverage** on changed lines, and review assertion quality in code review — the percentage is a floor, not a target.
92
+
93
+ ### Step 7 — Produce the output
94
+
95
+ Format findings using the Output section below.
96
+
97
+ ---
98
+
99
+ ## Output
100
+
101
+ Return findings in this structure:
102
+
103
+ ```
104
+ ## Verdict
105
+ <one sentence: tests verify behavior / coverage theater detected / mixed>
106
+
107
+ ## Evidence level
108
+ <test source + coverage report provided | coverage report only | documentation-based | inference>
109
+
110
+ ## Findings
111
+
112
+ ### CRITICAL
113
+ - [C1] <finding>: <description> — <remediation>
114
+
115
+ ### HIGH
116
+ - [H1] <finding>: <description> — <remediation>
117
+
118
+ ### MEDIUM
119
+ - [M1] <finding>: <description> — <remediation>
120
+
121
+ ### LOW
122
+ - [L1] <finding>: <description> — <remediation>
123
+
124
+ ## Safe next actions
125
+ 1. <action>
126
+ 2. <action>
127
+
128
+ ## Open questions
129
+ - <question requiring user clarification>
130
+ ```
131
+
132
+ ---
133
+
134
+ ## Security notes
135
+
136
+ - Never request credentials, fixtures containing real customer data, or production database snapshots. Ask for sanitized test code.
137
+ - This is a static review: do not run the test suite or a coverage tool.
138
+ - Do not recommend raising the coverage percentage threshold as a quality improvement — a higher threshold is satisfied by more assertion-free tests. Recommend assertion strength and diff coverage instead.
139
+ - A high coverage number with weak assertions is more dangerous than a low number, because it manufactures false confidence. Say so explicitly when the evidence shows it.
@@ -0,0 +1,43 @@
1
+ ---
2
+ name: test-flakiness-triage
3
+ description: Use this skill when triaging flaky tests across any test framework — analyzing a flaky-test report, CI rerun history, or a set of intermittently failing tests to assign each a root-cause category and a remediation or quarantine path. Trigger when a user reports tests that pass on re-run, asks why CI is unreliable, provides a flaky-test dashboard export, or wants a quarantine policy. This skill reviews evidence statically; it does not execute or re-run tests.
4
+ allowed-tools: Read Grep Glob
5
+ metadata:
6
+ author: "github: Raishin"
7
+ version: "0.1.0"
8
+ updated: "2026-05-17"
9
+ category: delivery
10
+ lifecycle: experimental
11
+ ---
12
+
13
+ # Test Flakiness Triage
14
+
15
+ ## Purpose
16
+ This skill triages flaky tests — tests that pass and fail without a code change — into root-cause categories and assigns each a remediation or quarantine path. Flakiness is a process failure, not a test-by-test annoyance: once a suite flakes, engineers re-run red builds reflexively, a re-run becomes the default response to any failure, and the suite stops detecting real regressions. The skill is framework-agnostic (Playwright, Cypress, Jest, JUnit, pytest, Go) and converts a pile of "sometimes fails" into categorized causes, a quarantine decision per test, and a policy that stops new flakes from entering the suite.
17
+
18
+ ## Lean operating rules
19
+ - Triage every flaky test into exactly one primary root-cause category: async/timing race, test interdependence (shared state / order), environment coupling (clock, timezone, locale, network), non-deterministic data (random, current date, unseeded faker), resource contention (ports, DB, parallelism), or external dependency (third-party API, un-mocked network).
20
+ - Treat a flaky test left in the gating suite with no quarantine and no owner as HIGH — every run it costs the whole team a re-run and erodes signal.
21
+ - Treat "re-run until green" CI configuration (automatic unlimited retries, `|| true`, retry-the-whole-job) with no flaky tracking as HIGH — it converts flakiness from visible to invisible and unbounded.
22
+ - Treat a flaky test "fixed" only by adding a sleep, raising a timeout, or adding a retry as HIGH — the root cause is still present and will resurface.
23
+ - Treat tests sharing a mutable fixture, database row, file, or port without per-test isolation as HIGH for interdependence flakiness.
24
+ - Treat assertions on wall-clock time, `Date.now()`, timezone, or locale without injection/freezing as HIGH for environment coupling.
25
+ - Treat un-mocked calls to third-party services in a unit or component test as HIGH — network and rate limits make them flaky by construction.
26
+ - Treat a quarantine with no expiry, no owner, and no tracking issue as MEDIUM — quarantine is a holding cell, not a graveyard; without an exit it becomes permanent coverage loss.
27
+ - Treat the absence of any flaky-detection mechanism (no rerun-diff, no flaky reporter, no quarantine list) as MEDIUM — flakiness is then discovered only by frustrated engineers.
28
+ - Do not recommend deleting a flaky test as the default fix — a flaky test usually covers a real path; quarantine with an owner and a fix deadline preserves the coverage intent.
29
+ - Label every finding with evidence basis: failure logs / rerun history provided, test source provided, documentation-based, or inference.
30
+
31
+ ## References
32
+ Load these only when needed:
33
+ - [Workflow and output contract](references/workflow-and-output.md) — use when executing the full triage or formatting the final answer.
34
+
35
+ ## Response minimum
36
+ Return, at minimum:
37
+ - Per-test root-cause category assignment
38
+ - Quarantine decision per test (quarantine now / fix in place / keep gating)
39
+ - Root-cause remediation direction per category
40
+ - Quarantine policy assessment (owner, expiry, tracking)
41
+ - CI retry-configuration findings
42
+ - Severity-labelled finding list (critical / high / medium / low)
43
+ - Safe next actions
@@ -0,0 +1,21 @@
1
+ {
2
+ "id": "test-flakiness-triage",
3
+ "name": "Test Flakiness Triage",
4
+ "type": "skill",
5
+ "provider": "generic",
6
+ "harnesses": ["codex", "claude-code", "cursor", "gemini", "kiro", "other"],
7
+ "summary": "Triage flaky tests across any framework into root-cause categories, assign a quarantine or fix path per test, and assess quarantine policy and CI retry configuration — statically, without re-running tests.",
8
+ "source_type": "original",
9
+ "official_docs": [
10
+ "https://playwright.dev/docs/test-retries",
11
+ "https://docs.cypress.io/guides/guides/test-retries",
12
+ "https://jestjs.io/docs/cli",
13
+ "https://docs.pytest.org/en/stable/how-to/flaky.html",
14
+ "https://martinfowler.com/articles/nonDeterminism.html"
15
+ ],
16
+ "security_notes": "Static review only — analyzes failure logs, rerun history, and test source; never executes or re-runs tests. Never request or accept CI credentials, dashboard API tokens, or production data embedded in failure logs; ask for sanitized excerpts.",
17
+ "last_verified": "2026-05-17",
18
+ "path": "skills/qa/test-flakiness-triage",
19
+ "author": "github: Raishin",
20
+ "version": "0.1.0"
21
+ }
@@ -0,0 +1,114 @@
1
+ # Workflow and Output Contract
2
+
3
+ ## Workflow
4
+
5
+ ### Step 1 — Collect inputs
6
+
7
+ Ask the user to provide one or more of the following as sanitized excerpts (no CI credentials, no dashboard API tokens, no production data inside logs):
8
+ - A flaky-test report or dashboard export (test name, pass/fail counts, recent failure rate)
9
+ - CI rerun history — which tests failed then passed on re-run, with timestamps
10
+ - The source of the suspected flaky tests
11
+ - The CI configuration that controls retries and job re-runs
12
+ - Optional: failure logs / stack traces from intermittent failures
13
+
14
+ If only failure counts are available without test source, triage stays at the category level and remediation is directional — say so.
15
+
16
+ ### Step 2 — Assign a root-cause category
17
+
18
+ Place every flaky test into exactly one **primary** category. Note secondary contributors separately.
19
+
20
+ | Category | Signature in evidence | Typical fix direction |
21
+ |---|---|---|
22
+ | Async / timing race | fails under load or on slower CI runners; passes locally; "element not found", "undefined" | wait on a deterministic signal, not a sleep; await the actual condition |
23
+ | Test interdependence | fails only in a specific order; passes when run alone; passes with `--shuffle` off | per-test fixtures; isolate DB rows / files / state; remove cross-test writes |
24
+ | Environment coupling | fails at certain times of day, in CI timezone, or on a different locale | inject and freeze the clock; pin timezone and locale; no real `Date.now()` in assertions |
25
+ | Non-deterministic data | fails when random/seeded data hits an edge; date-dependent assertions | seed RNG; use fixed fixtures; freeze "today" |
26
+ | Resource contention | fails under parallelism; port/DB/file conflicts; "address in use", deadlock | unique ports/schemas per worker; bound parallelism; isolate resources |
27
+ | External dependency | fails on third-party outage, rate limit, or latency; un-mocked network | mock at the boundary in unit/component tests; contract-test the integration separately |
28
+
29
+ ### Step 3 — Decide quarantine vs. fix-in-place
30
+
31
+ For each flaky test, assign one disposition:
32
+ - **Keep gating** — flakiness is rare and the test is high-value; fix-in-place is fast. Only if failure rate is low and a fix is in hand.
33
+ - **Fix in place now** — root cause is clear and small (a sleep to replace, a fixture to isolate).
34
+ - **Quarantine** — the test fails often enough to cost the team re-runs, and the fix is non-trivial. Move it out of the gating set into a non-blocking lane **with an owner and a fix deadline**.
35
+
36
+ A flaky test left gating with no owner and no fix is HIGH — quantify the cost: failure rate × team size × runs per day.
37
+
38
+ ### Step 4 — Audit CI retry configuration
39
+
40
+ Review how CI handles failures.
41
+
42
+ - Automatic whole-job re-runs, unlimited test retries, or `|| true` on the test step with no flaky tracking → HIGH. This makes flakiness invisible and unbounded; a test that fails 40% of the time still "passes" and nobody knows.
43
+ - Retries with no diff between attempts surfaced (no annotation, no flaky reporter) → HIGH.
44
+ - Recommended posture: bounded retries (e.g. 2) **plus** a report that lists every test that passed only on retry, fed into the quarantine process.
45
+
46
+ ### Step 5 — Audit quarantine policy
47
+
48
+ If a quarantine mechanism exists, check each quarantined test has:
49
+ - An **owner** (a person or team, not "the team").
50
+ - An **expiry / fix deadline** — quarantine without an exit date becomes permanent coverage loss.
51
+ - A **tracking issue** linking the root cause.
52
+ - A **visible count** — if the quarantine list grows unbounded, escalate; it means flakes are entering faster than they are fixed.
53
+
54
+ Quarantine without owner + expiry + tracking → MEDIUM.
55
+
56
+ ### Step 6 — Audit flake intake
57
+
58
+ Check whether new flakes can enter the suite undetected:
59
+ - Is there a pre-merge signal that a new test is flaky (e.g. running new/changed tests multiple times)?
60
+ - Absence of any intake gate → MEDIUM: the suite's flakiness only ever grows.
61
+
62
+ ### Step 7 — Produce the output
63
+
64
+ Format findings using the Output section below.
65
+
66
+ ---
67
+
68
+ ## Output
69
+
70
+ Return findings in this structure:
71
+
72
+ ```
73
+ ## Verdict
74
+ <one sentence: suite healthy / flakiness contained / flakiness eroding signal>
75
+
76
+ ## Evidence level
77
+ <rerun history + source provided | failure counts only | documentation-based | inference>
78
+
79
+ ## Flaky test triage
80
+ | Test | Primary category | Disposition | Owner needed |
81
+ |------|------------------|-------------|--------------|
82
+ | ... | ... | ... | ... |
83
+
84
+ ## Findings
85
+
86
+ ### CRITICAL
87
+ - [C1] <finding>: <description> — <remediation>
88
+
89
+ ### HIGH
90
+ - [H1] <finding>: <description> — <remediation>
91
+
92
+ ### MEDIUM
93
+ - [M1] <finding>: <description> — <remediation>
94
+
95
+ ### LOW
96
+ - [L1] <finding>: <description> — <remediation>
97
+
98
+ ## Safe next actions
99
+ 1. <action>
100
+ 2. <action>
101
+
102
+ ## Open questions
103
+ - <question requiring user clarification>
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Security notes
109
+
110
+ - Never request CI credentials, test-dashboard API tokens, or production data that may be embedded in failure logs. Ask for sanitized excerpts.
111
+ - This is a static triage: do not re-run tests, do not execute the suite, do not contact CI.
112
+ - Do not recommend deleting a flaky test as the default fix — it usually covers a real path. Quarantine with an owner and a fix deadline preserves the coverage intent while removing the team-wide cost.
113
+ - Do not recommend a sleep, a raised timeout, or an added retry as the fix for a root cause — flag it as masking, not remediation.
114
+ - If the quarantine list is growing faster than it is drained, treat that as the headline finding: the team is losing the flakiness race.
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env node
2
+ // Golden eval grader for the QA skill/agent cluster.
3
+ // Code-based grader (deterministic) — verifies reference grounding,
4
+ // severity-heuristic wiring, and harness coverage for skills/qa + agents/qa.
5
+ // Run: node tests/eval-qa-cluster.mjs
6
+
7
+ import { readFileSync, readdirSync, existsSync } from 'node:fs';
8
+ import { join } from 'node:path';
9
+
10
+ const SKILLS_DIR = 'skills/qa';
11
+ const AGENTS_DIR = 'agents/qa';
12
+ const TIER_HARNESS_COUNT = { 'static-review': 7, 'read-only-runtime': 2 };
13
+ const STALE_DOC_PATTERN = /\/20(1\d|2[0-4])\.\d{1,2}\//; // dated vendor-doc version pins
14
+
15
+ const checks = [];
16
+ const record = (id, pass, detail) => checks.push({ id, pass, detail });
17
+
18
+ const skillDirs = readdirSync(SKILLS_DIR).filter((d) =>
19
+ existsSync(join(SKILLS_DIR, d, 'SKILL.md')),
20
+ );
21
+ const agentDirs = readdirSync(AGENTS_DIR).filter((d) =>
22
+ existsSync(join(AGENTS_DIR, d, 'metadata.json')),
23
+ );
24
+
25
+ for (const skill of skillDirs) {
26
+ const base = join(SKILLS_DIR, skill);
27
+ const meta = JSON.parse(readFileSync(join(base, 'metadata.json'), 'utf8'));
28
+ const skillMd = readFileSync(join(base, 'SKILL.md'), 'utf8');
29
+ const docs = meta.official_docs || [];
30
+
31
+ record(`REF-1:${skill}`, docs.length >= 3, `${docs.length} official_docs`);
32
+ record(
33
+ `REF-2:${skill}`,
34
+ !docs.some((u) => STALE_DOC_PATTERN.test(u)),
35
+ 'no stale dated version pins in official_docs',
36
+ );
37
+ record(
38
+ `REF-3:${skill}`,
39
+ skillMd.includes('references/workflow-and-output.md'),
40
+ 'SKILL.md links its progressive-disclosure reference',
41
+ );
42
+ const refPath = join(base, 'references', 'workflow-and-output.md');
43
+ const refMd = existsSync(refPath) ? readFileSync(refPath, 'utf8') : '';
44
+ const isRuntime = meta.execution_tier === 'read-only-runtime';
45
+
46
+ if (isRuntime) {
47
+ // Execution skills emit a pass/fail/manual-review verdict, not severity buckets.
48
+ record(
49
+ `HEUR-1:${skill}`,
50
+ ['pass', 'fail', 'manual-review'].every((v) => skillMd.includes(v)),
51
+ 'SKILL.md defines the pass/fail/manual-review verdict shape',
52
+ );
53
+ record(
54
+ `HEUR-2:${skill}`,
55
+ ['pass', 'fail', 'manual-review'].every((v) => refMd.includes(v)),
56
+ 'workflow-and-output.md output contract carries the verdict shape',
57
+ );
58
+ } else {
59
+ // Static-review skills enumerate severity-graded findings.
60
+ record(
61
+ `HEUR-1:${skill}`,
62
+ /\bHIGH\b/.test(skillMd) && /\bMEDIUM\b/.test(skillMd),
63
+ 'SKILL.md enumerates HIGH/MEDIUM severity heuristics',
64
+ );
65
+ record(
66
+ `HEUR-2:${skill}`,
67
+ ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW'].every((s) => refMd.includes(s)),
68
+ 'workflow-and-output.md output contract has all four severity buckets',
69
+ );
70
+ }
71
+ }
72
+
73
+ for (const agent of agentDirs) {
74
+ const base = join(AGENTS_DIR, agent);
75
+ const meta = JSON.parse(readFileSync(join(base, 'metadata.json'), 'utf8'));
76
+ const companions = meta.companion_skills || [];
77
+
78
+ record(
79
+ `REF-4:${agent}`,
80
+ companions.length > 0 &&
81
+ companions.every((s) => existsSync(join(SKILLS_DIR, s, 'SKILL.md'))),
82
+ `companion_skills resolve: ${JSON.stringify(companions)}`,
83
+ );
84
+
85
+ const variants = meta.harness_variants || {};
86
+ record(
87
+ `HARNESS-1:${agent}`,
88
+ Object.values(variants).every((p) => existsSync(p)),
89
+ `${Object.keys(variants).length} harness_variants files exist`,
90
+ );
91
+ const expected = TIER_HARNESS_COUNT[meta.execution_tier];
92
+ record(
93
+ `HARNESS-2:${agent}`,
94
+ expected !== undefined && Object.keys(variants).length === expected,
95
+ `harness count ${Object.keys(variants).length} matches ${meta.execution_tier} tier (expect ${expected})`,
96
+ );
97
+ }
98
+
99
+ const failed = checks.filter((c) => !c.pass);
100
+ for (const c of checks) {
101
+ console.log(`${c.pass ? 'PASS' : 'FAIL'} [${c.id}] ${c.detail}`);
102
+ }
103
+ console.log(
104
+ `\nQA cluster eval: ${checks.length - failed.length}/${checks.length} checks passed ` +
105
+ `(${skillDirs.length} skills, ${agentDirs.length} agents)`,
106
+ );
107
+ if (failed.length > 0) {
108
+ console.error(`\nFAILED: ${failed.map((c) => c.id).join(', ')}`);
109
+ process.exit(1);
110
+ }
111
+ console.log('OK: QA cluster eval green');
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "ai-advertising-targeting-fairness-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "analytics-data-minimization-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "marketing-consent-data-collection-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "marketing-conversion-flow-dark-pattern-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "marketing-email-list-retention-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "email-sender-authentication-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "eu-ai-act-marketing-system-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "marketing-gpc-signal-honoring-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "influencer-disclosure-compliance-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "lookalike-audience-upload-compliance-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "martech-access-governance-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "marketing-pixel-data-leakage-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "programmatic-supply-chain-integrity-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,4 @@
1
+ {
2
+ "route": [],
3
+ "mode": "unclassified"
4
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "route": [
3
+ "ai-advertising-targeting-fairness-review-agent",
4
+ "eu-ai-act-marketing-system-review-agent"
5
+ ],
6
+ "mode": "parallel (2)"
7
+ }
@@ -0,0 +1,4 @@
1
+ {
2
+ "route": [],
3
+ "mode": "live-guard-gate"
4
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "route": [
3
+ "ai-advertising-targeting-fairness-review-agent"
4
+ ],
5
+ "mode": "single"
6
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "route": [
3
+ "ai-advertising-targeting-fairness-review-agent",
4
+ "martech-access-governance-review-agent"
5
+ ],
6
+ "mode": "parallel (2)"
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "001-happy-ai-advertising-targeting-fairness-review",
3
+ "task": "Review our marketing advertising targeting fairness configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "002-happy-analytics-data-minimization-review",
3
+ "task": "Review our marketing analytics minimization GA4 configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "003-happy-consent-data-collection-review",
3
+ "task": "Review our marketing consent collection tag-manager configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "004-happy-conversion-flow-dark-pattern-review",
3
+ "task": "Review our marketing conversion flow dark configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "005-happy-email-list-retention-review",
3
+ "task": "Review our marketing list retention consent-record configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "006-happy-email-sender-authentication-review",
3
+ "task": "Review our marketing sender authentication Review DNS configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "007-happy-eu-ai-act-marketing-system-review",
3
+ "task": "Review our marketing eu act marketing configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "008-happy-gpc-signal-honoring-review",
3
+ "task": "Review our marketing gpc signal honoring configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "009-happy-influencer-disclosure-compliance-review",
3
+ "task": "Review our marketing influencer disclosure FTC Endorsement Guide configuration.",
4
+ "tags": [
5
+ "happy-path"
6
+ ]
7
+ }