@harness-engineering/cli 1.13.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/dist/agents/skills/claude-code/add-harness-component/skill.yaml +1 -0
  2. package/dist/agents/skills/claude-code/align-documentation/skill.yaml +1 -0
  3. package/dist/agents/skills/claude-code/check-mechanical-constraints/skill.yaml +1 -0
  4. package/dist/agents/skills/claude-code/cleanup-dead-code/skill.yaml +1 -0
  5. package/dist/agents/skills/claude-code/detect-doc-drift/skill.yaml +1 -0
  6. package/dist/agents/skills/claude-code/enforce-architecture/skill.yaml +1 -0
  7. package/dist/agents/skills/claude-code/harness-accessibility/skill.yaml +1 -0
  8. package/dist/agents/skills/claude-code/harness-api-design/SKILL.md +304 -0
  9. package/dist/agents/skills/claude-code/harness-api-design/skill.yaml +74 -0
  10. package/dist/agents/skills/claude-code/harness-architecture-advisor/skill.yaml +1 -0
  11. package/dist/agents/skills/claude-code/harness-auth/SKILL.md +279 -0
  12. package/dist/agents/skills/claude-code/harness-auth/skill.yaml +81 -0
  13. package/dist/agents/skills/claude-code/harness-autopilot/skill.yaml +1 -0
  14. package/dist/agents/skills/claude-code/harness-brainstorming/SKILL.md +39 -0
  15. package/dist/agents/skills/claude-code/harness-brainstorming/skill.yaml +1 -0
  16. package/dist/agents/skills/claude-code/harness-caching/SKILL.md +309 -0
  17. package/dist/agents/skills/claude-code/harness-caching/skill.yaml +73 -0
  18. package/dist/agents/skills/claude-code/harness-chaos/SKILL.md +295 -0
  19. package/dist/agents/skills/claude-code/harness-chaos/skill.yaml +72 -0
  20. package/dist/agents/skills/claude-code/harness-code-review/SKILL.md +44 -0
  21. package/dist/agents/skills/claude-code/harness-code-review/skill.yaml +1 -0
  22. package/dist/agents/skills/claude-code/harness-codebase-cleanup/skill.yaml +1 -0
  23. package/dist/agents/skills/claude-code/harness-compliance/SKILL.md +303 -0
  24. package/dist/agents/skills/claude-code/harness-compliance/skill.yaml +78 -0
  25. package/dist/agents/skills/claude-code/harness-containerization/SKILL.md +284 -0
  26. package/dist/agents/skills/claude-code/harness-containerization/skill.yaml +80 -0
  27. package/dist/agents/skills/claude-code/harness-data-pipeline/SKILL.md +274 -0
  28. package/dist/agents/skills/claude-code/harness-data-pipeline/skill.yaml +81 -0
  29. package/dist/agents/skills/claude-code/harness-data-validation/SKILL.md +343 -0
  30. package/dist/agents/skills/claude-code/harness-data-validation/skill.yaml +75 -0
  31. package/dist/agents/skills/claude-code/harness-database/SKILL.md +258 -0
  32. package/dist/agents/skills/claude-code/harness-database/skill.yaml +80 -0
  33. package/dist/agents/skills/claude-code/harness-debugging/skill.yaml +1 -0
  34. package/dist/agents/skills/claude-code/harness-dependency-health/skill.yaml +1 -0
  35. package/dist/agents/skills/claude-code/harness-deployment/SKILL.md +255 -0
  36. package/dist/agents/skills/claude-code/harness-deployment/skill.yaml +77 -0
  37. package/dist/agents/skills/claude-code/harness-design/skill.yaml +1 -0
  38. package/dist/agents/skills/claude-code/harness-design-mobile/skill.yaml +1 -0
  39. package/dist/agents/skills/claude-code/harness-design-system/skill.yaml +1 -0
  40. package/dist/agents/skills/claude-code/harness-design-web/skill.yaml +1 -0
  41. package/dist/agents/skills/claude-code/harness-diagnostics/skill.yaml +1 -0
  42. package/dist/agents/skills/claude-code/harness-docs-pipeline/skill.yaml +1 -0
  43. package/dist/agents/skills/claude-code/harness-dx/SKILL.md +276 -0
  44. package/dist/agents/skills/claude-code/harness-dx/skill.yaml +76 -0
  45. package/dist/agents/skills/claude-code/harness-e2e/SKILL.md +245 -0
  46. package/dist/agents/skills/claude-code/harness-e2e/skill.yaml +78 -0
  47. package/dist/agents/skills/claude-code/harness-event-driven/SKILL.md +280 -0
  48. package/dist/agents/skills/claude-code/harness-event-driven/skill.yaml +77 -0
  49. package/dist/agents/skills/claude-code/harness-execution/SKILL.md +44 -0
  50. package/dist/agents/skills/claude-code/harness-execution/skill.yaml +1 -0
  51. package/dist/agents/skills/claude-code/harness-feature-flags/SKILL.md +287 -0
  52. package/dist/agents/skills/claude-code/harness-feature-flags/skill.yaml +74 -0
  53. package/dist/agents/skills/claude-code/harness-git-workflow/skill.yaml +1 -0
  54. package/dist/agents/skills/claude-code/harness-hotspot-detector/skill.yaml +1 -0
  55. package/dist/agents/skills/claude-code/harness-i18n/skill.yaml +1 -0
  56. package/dist/agents/skills/claude-code/harness-i18n-process/skill.yaml +1 -0
  57. package/dist/agents/skills/claude-code/harness-i18n-workflow/skill.yaml +1 -0
  58. package/dist/agents/skills/claude-code/harness-impact-analysis/skill.yaml +1 -0
  59. package/dist/agents/skills/claude-code/harness-incident-response/SKILL.md +223 -0
  60. package/dist/agents/skills/claude-code/harness-incident-response/skill.yaml +78 -0
  61. package/dist/agents/skills/claude-code/harness-infrastructure-as-code/SKILL.md +279 -0
  62. package/dist/agents/skills/claude-code/harness-infrastructure-as-code/skill.yaml +80 -0
  63. package/dist/agents/skills/claude-code/harness-integration-test/SKILL.md +271 -0
  64. package/dist/agents/skills/claude-code/harness-integration-test/skill.yaml +73 -0
  65. package/dist/agents/skills/claude-code/harness-integrity/skill.yaml +1 -0
  66. package/dist/agents/skills/claude-code/harness-knowledge-mapper/skill.yaml +1 -0
  67. package/dist/agents/skills/claude-code/harness-load-testing/SKILL.md +274 -0
  68. package/dist/agents/skills/claude-code/harness-load-testing/skill.yaml +79 -0
  69. package/dist/agents/skills/claude-code/harness-ml-ops/SKILL.md +341 -0
  70. package/dist/agents/skills/claude-code/harness-ml-ops/skill.yaml +79 -0
  71. package/dist/agents/skills/claude-code/harness-mobile-patterns/SKILL.md +326 -0
  72. package/dist/agents/skills/claude-code/harness-mobile-patterns/skill.yaml +82 -0
  73. package/dist/agents/skills/claude-code/harness-mutation-test/SKILL.md +251 -0
  74. package/dist/agents/skills/claude-code/harness-mutation-test/skill.yaml +70 -0
  75. package/dist/agents/skills/claude-code/harness-observability/SKILL.md +283 -0
  76. package/dist/agents/skills/claude-code/harness-observability/skill.yaml +78 -0
  77. package/dist/agents/skills/claude-code/harness-onboarding/skill.yaml +1 -0
  78. package/dist/agents/skills/claude-code/harness-parallel-agents/skill.yaml +1 -0
  79. package/dist/agents/skills/claude-code/harness-perf/skill.yaml +1 -0
  80. package/dist/agents/skills/claude-code/harness-perf-tdd/skill.yaml +1 -0
  81. package/dist/agents/skills/claude-code/harness-planning/SKILL.md +39 -0
  82. package/dist/agents/skills/claude-code/harness-planning/skill.yaml +1 -0
  83. package/dist/agents/skills/claude-code/harness-pre-commit-review/skill.yaml +1 -0
  84. package/dist/agents/skills/claude-code/harness-product-spec/SKILL.md +285 -0
  85. package/dist/agents/skills/claude-code/harness-product-spec/skill.yaml +72 -0
  86. package/dist/agents/skills/claude-code/harness-property-test/SKILL.md +281 -0
  87. package/dist/agents/skills/claude-code/harness-property-test/skill.yaml +71 -0
  88. package/dist/agents/skills/claude-code/harness-refactoring/skill.yaml +1 -0
  89. package/dist/agents/skills/claude-code/harness-release-readiness/SKILL.md +3 -3
  90. package/dist/agents/skills/claude-code/harness-release-readiness/skill.yaml +1 -0
  91. package/dist/agents/skills/claude-code/harness-resilience/SKILL.md +255 -0
  92. package/dist/agents/skills/claude-code/harness-resilience/skill.yaml +76 -0
  93. package/dist/agents/skills/claude-code/harness-roadmap/skill.yaml +1 -0
  94. package/dist/agents/skills/claude-code/harness-secrets/SKILL.md +293 -0
  95. package/dist/agents/skills/claude-code/harness-secrets/skill.yaml +76 -0
  96. package/dist/agents/skills/claude-code/harness-security-review/skill.yaml +1 -0
  97. package/dist/agents/skills/claude-code/harness-security-scan/skill.yaml +1 -0
  98. package/dist/agents/skills/claude-code/harness-skill-authoring/skill.yaml +1 -0
  99. package/dist/agents/skills/claude-code/harness-soundness-review/skill.yaml +1 -0
  100. package/dist/agents/skills/claude-code/harness-sql-review/SKILL.md +315 -0
  101. package/dist/agents/skills/claude-code/harness-sql-review/skill.yaml +74 -0
  102. package/dist/agents/skills/claude-code/harness-state-management/skill.yaml +1 -0
  103. package/dist/agents/skills/claude-code/harness-tdd/skill.yaml +1 -0
  104. package/dist/agents/skills/claude-code/harness-test-advisor/skill.yaml +1 -0
  105. package/dist/agents/skills/claude-code/harness-test-data/SKILL.md +268 -0
  106. package/dist/agents/skills/claude-code/harness-test-data/skill.yaml +74 -0
  107. package/dist/agents/skills/claude-code/harness-ux-copy/SKILL.md +271 -0
  108. package/dist/agents/skills/claude-code/harness-ux-copy/skill.yaml +77 -0
  109. package/dist/agents/skills/claude-code/harness-verification/SKILL.md +35 -0
  110. package/dist/agents/skills/claude-code/harness-verification/skill.yaml +1 -0
  111. package/dist/agents/skills/claude-code/harness-verify/skill.yaml +1 -0
  112. package/dist/agents/skills/claude-code/harness-visual-regression/SKILL.md +257 -0
  113. package/dist/agents/skills/claude-code/harness-visual-regression/skill.yaml +74 -0
  114. package/dist/agents/skills/claude-code/initialize-harness-project/SKILL.md +11 -3
  115. package/dist/agents/skills/claude-code/initialize-harness-project/skill.yaml +1 -0
  116. package/dist/agents/skills/claude-code/validate-context-engineering/skill.yaml +1 -0
  117. package/dist/agents/skills/gemini-cli/add-harness-component/skill.yaml +1 -0
  118. package/dist/agents/skills/gemini-cli/align-documentation/skill.yaml +1 -0
  119. package/dist/agents/skills/gemini-cli/check-mechanical-constraints/skill.yaml +1 -0
  120. package/dist/agents/skills/gemini-cli/cleanup-dead-code/skill.yaml +1 -0
  121. package/dist/agents/skills/gemini-cli/detect-doc-drift/skill.yaml +1 -0
  122. package/dist/agents/skills/gemini-cli/enforce-architecture/skill.yaml +1 -0
  123. package/dist/agents/skills/gemini-cli/harness-accessibility/skill.yaml +1 -0
  124. package/dist/agents/skills/gemini-cli/harness-api-design/SKILL.md +304 -0
  125. package/dist/agents/skills/gemini-cli/harness-api-design/skill.yaml +74 -0
  126. package/dist/agents/skills/gemini-cli/harness-architecture-advisor/skill.yaml +1 -0
  127. package/dist/agents/skills/gemini-cli/harness-auth/SKILL.md +279 -0
  128. package/dist/agents/skills/gemini-cli/harness-auth/skill.yaml +81 -0
  129. package/dist/agents/skills/gemini-cli/harness-autopilot/skill.yaml +1 -0
  130. package/dist/agents/skills/gemini-cli/harness-brainstorming/SKILL.md +39 -0
  131. package/dist/agents/skills/gemini-cli/harness-brainstorming/skill.yaml +1 -0
  132. package/dist/agents/skills/gemini-cli/harness-caching/SKILL.md +309 -0
  133. package/dist/agents/skills/gemini-cli/harness-caching/skill.yaml +73 -0
  134. package/dist/agents/skills/gemini-cli/harness-chaos/SKILL.md +295 -0
  135. package/dist/agents/skills/gemini-cli/harness-chaos/skill.yaml +72 -0
  136. package/dist/agents/skills/gemini-cli/harness-code-review/SKILL.md +44 -0
  137. package/dist/agents/skills/gemini-cli/harness-code-review/skill.yaml +1 -0
  138. package/dist/agents/skills/gemini-cli/harness-codebase-cleanup/skill.yaml +1 -0
  139. package/dist/agents/skills/gemini-cli/harness-compliance/SKILL.md +303 -0
  140. package/dist/agents/skills/gemini-cli/harness-compliance/skill.yaml +78 -0
  141. package/dist/agents/skills/gemini-cli/harness-containerization/SKILL.md +284 -0
  142. package/dist/agents/skills/gemini-cli/harness-containerization/skill.yaml +80 -0
  143. package/dist/agents/skills/gemini-cli/harness-data-pipeline/SKILL.md +274 -0
  144. package/dist/agents/skills/gemini-cli/harness-data-pipeline/skill.yaml +81 -0
  145. package/dist/agents/skills/gemini-cli/harness-data-validation/SKILL.md +343 -0
  146. package/dist/agents/skills/gemini-cli/harness-data-validation/skill.yaml +75 -0
  147. package/dist/agents/skills/gemini-cli/harness-database/SKILL.md +258 -0
  148. package/dist/agents/skills/gemini-cli/harness-database/skill.yaml +80 -0
  149. package/dist/agents/skills/gemini-cli/harness-debugging/skill.yaml +1 -0
  150. package/dist/agents/skills/gemini-cli/harness-dependency-health/skill.yaml +1 -0
  151. package/dist/agents/skills/gemini-cli/harness-deployment/SKILL.md +255 -0
  152. package/dist/agents/skills/gemini-cli/harness-deployment/skill.yaml +77 -0
  153. package/dist/agents/skills/gemini-cli/harness-design/skill.yaml +1 -0
  154. package/dist/agents/skills/gemini-cli/harness-design-mobile/skill.yaml +1 -0
  155. package/dist/agents/skills/gemini-cli/harness-design-system/skill.yaml +1 -0
  156. package/dist/agents/skills/gemini-cli/harness-design-web/skill.yaml +1 -0
  157. package/dist/agents/skills/gemini-cli/harness-diagnostics/skill.yaml +1 -0
  158. package/dist/agents/skills/gemini-cli/harness-docs-pipeline/skill.yaml +1 -0
  159. package/dist/agents/skills/gemini-cli/harness-dx/SKILL.md +276 -0
  160. package/dist/agents/skills/gemini-cli/harness-dx/skill.yaml +76 -0
  161. package/dist/agents/skills/gemini-cli/harness-e2e/SKILL.md +245 -0
  162. package/dist/agents/skills/gemini-cli/harness-e2e/skill.yaml +78 -0
  163. package/dist/agents/skills/gemini-cli/harness-event-driven/SKILL.md +280 -0
  164. package/dist/agents/skills/gemini-cli/harness-event-driven/skill.yaml +77 -0
  165. package/dist/agents/skills/gemini-cli/harness-execution/SKILL.md +44 -0
  166. package/dist/agents/skills/gemini-cli/harness-execution/skill.yaml +1 -0
  167. package/dist/agents/skills/gemini-cli/harness-feature-flags/SKILL.md +287 -0
  168. package/dist/agents/skills/gemini-cli/harness-feature-flags/skill.yaml +74 -0
  169. package/dist/agents/skills/gemini-cli/harness-git-workflow/skill.yaml +1 -0
  170. package/dist/agents/skills/gemini-cli/harness-hotspot-detector/skill.yaml +1 -0
  171. package/dist/agents/skills/gemini-cli/harness-i18n/skill.yaml +1 -0
  172. package/dist/agents/skills/gemini-cli/harness-i18n-process/skill.yaml +1 -0
  173. package/dist/agents/skills/gemini-cli/harness-i18n-workflow/skill.yaml +1 -0
  174. package/dist/agents/skills/gemini-cli/harness-impact-analysis/skill.yaml +1 -0
  175. package/dist/agents/skills/gemini-cli/harness-incident-response/SKILL.md +223 -0
  176. package/dist/agents/skills/gemini-cli/harness-incident-response/skill.yaml +78 -0
  177. package/dist/agents/skills/gemini-cli/harness-infrastructure-as-code/SKILL.md +279 -0
  178. package/dist/agents/skills/gemini-cli/harness-infrastructure-as-code/skill.yaml +80 -0
  179. package/dist/agents/skills/gemini-cli/harness-integration-test/SKILL.md +271 -0
  180. package/dist/agents/skills/gemini-cli/harness-integration-test/skill.yaml +73 -0
  181. package/dist/agents/skills/gemini-cli/harness-integrity/skill.yaml +1 -0
  182. package/dist/agents/skills/gemini-cli/harness-knowledge-mapper/skill.yaml +1 -0
  183. package/dist/agents/skills/gemini-cli/harness-load-testing/SKILL.md +274 -0
  184. package/dist/agents/skills/gemini-cli/harness-load-testing/skill.yaml +79 -0
  185. package/dist/agents/skills/gemini-cli/harness-ml-ops/SKILL.md +341 -0
  186. package/dist/agents/skills/gemini-cli/harness-ml-ops/skill.yaml +79 -0
  187. package/dist/agents/skills/gemini-cli/harness-mobile-patterns/SKILL.md +326 -0
  188. package/dist/agents/skills/gemini-cli/harness-mobile-patterns/skill.yaml +82 -0
  189. package/dist/agents/skills/gemini-cli/harness-mutation-test/SKILL.md +251 -0
  190. package/dist/agents/skills/gemini-cli/harness-mutation-test/skill.yaml +70 -0
  191. package/dist/agents/skills/gemini-cli/harness-observability/SKILL.md +283 -0
  192. package/dist/agents/skills/gemini-cli/harness-observability/skill.yaml +78 -0
  193. package/dist/agents/skills/gemini-cli/harness-onboarding/skill.yaml +1 -0
  194. package/dist/agents/skills/gemini-cli/harness-parallel-agents/skill.yaml +1 -0
  195. package/dist/agents/skills/gemini-cli/harness-perf/skill.yaml +1 -0
  196. package/dist/agents/skills/gemini-cli/harness-perf-tdd/skill.yaml +1 -0
  197. package/dist/agents/skills/gemini-cli/harness-planning/SKILL.md +39 -0
  198. package/dist/agents/skills/gemini-cli/harness-planning/skill.yaml +1 -0
  199. package/dist/agents/skills/gemini-cli/harness-pre-commit-review/skill.yaml +1 -0
  200. package/dist/agents/skills/gemini-cli/harness-product-spec/SKILL.md +285 -0
  201. package/dist/agents/skills/gemini-cli/harness-product-spec/skill.yaml +72 -0
  202. package/dist/agents/skills/gemini-cli/harness-property-test/SKILL.md +281 -0
  203. package/dist/agents/skills/gemini-cli/harness-property-test/skill.yaml +71 -0
  204. package/dist/agents/skills/gemini-cli/harness-refactoring/skill.yaml +1 -0
  205. package/dist/agents/skills/gemini-cli/harness-release-readiness/SKILL.md +3 -3
  206. package/dist/agents/skills/gemini-cli/harness-release-readiness/skill.yaml +1 -0
  207. package/dist/agents/skills/gemini-cli/harness-resilience/SKILL.md +255 -0
  208. package/dist/agents/skills/gemini-cli/harness-resilience/skill.yaml +76 -0
  209. package/dist/agents/skills/gemini-cli/harness-roadmap/skill.yaml +1 -0
  210. package/dist/agents/skills/gemini-cli/harness-secrets/SKILL.md +293 -0
  211. package/dist/agents/skills/gemini-cli/harness-secrets/skill.yaml +76 -0
  212. package/dist/agents/skills/gemini-cli/harness-security-review/SKILL.md +240 -0
  213. package/dist/agents/skills/gemini-cli/harness-security-review/skill.yaml +1 -0
  214. package/dist/agents/skills/gemini-cli/harness-security-scan/skill.yaml +1 -0
  215. package/dist/agents/skills/gemini-cli/harness-skill-authoring/skill.yaml +1 -0
  216. package/dist/agents/skills/gemini-cli/harness-soundness-review/skill.yaml +1 -0
  217. package/dist/agents/skills/gemini-cli/harness-sql-review/SKILL.md +315 -0
  218. package/dist/agents/skills/gemini-cli/harness-sql-review/skill.yaml +74 -0
  219. package/dist/agents/skills/gemini-cli/harness-state-management/skill.yaml +1 -0
  220. package/dist/agents/skills/gemini-cli/harness-tdd/skill.yaml +1 -0
  221. package/dist/agents/skills/gemini-cli/harness-test-advisor/skill.yaml +1 -0
  222. package/dist/agents/skills/gemini-cli/harness-test-data/SKILL.md +268 -0
  223. package/dist/agents/skills/gemini-cli/harness-test-data/skill.yaml +74 -0
  224. package/dist/agents/skills/gemini-cli/harness-ux-copy/SKILL.md +271 -0
  225. package/dist/agents/skills/gemini-cli/harness-ux-copy/skill.yaml +77 -0
  226. package/dist/agents/skills/gemini-cli/harness-verification/SKILL.md +35 -0
  227. package/dist/agents/skills/gemini-cli/harness-verification/skill.yaml +1 -0
  228. package/dist/agents/skills/gemini-cli/harness-verify/skill.yaml +1 -0
  229. package/dist/agents/skills/gemini-cli/harness-visual-regression/SKILL.md +257 -0
  230. package/dist/agents/skills/gemini-cli/harness-visual-regression/skill.yaml +74 -0
  231. package/dist/agents/skills/gemini-cli/initialize-harness-project/SKILL.md +11 -3
  232. package/dist/agents/skills/gemini-cli/initialize-harness-project/skill.yaml +1 -0
  233. package/dist/agents/skills/gemini-cli/validate-context-engineering/skill.yaml +1 -0
  234. package/dist/agents-md-YTYQDA3P.js +8 -0
  235. package/dist/{architecture-ESOOE26S.js → architecture-JQZYM4US.js} +4 -4
  236. package/dist/bin/harness-mcp.js +16 -15
  237. package/dist/bin/harness.js +31 -30
  238. package/dist/{check-phase-gate-S2MZKLFQ.js → check-phase-gate-L3RADYWO.js} +4 -3
  239. package/dist/{chunk-WPPDRIJL.js → chunk-3C2MLBPJ.js} +4 -4
  240. package/dist/chunk-6KTUUFRN.js +217 -0
  241. package/dist/{chunk-MI5XJQDY.js → chunk-7IP4JIFL.js} +24 -10
  242. package/dist/{chunk-C2ERUR3L.js → chunk-7MJAPE3Z.js} +165 -49
  243. package/dist/{chunk-KELT6K6M.js → chunk-ABQHQ6I5.js} +1861 -1418
  244. package/dist/{chunk-L2KLU56K.js → chunk-AOZRDOIP.js} +2 -2
  245. package/dist/{chunk-QPEH2QPG.js → chunk-DBSOCI3G.js} +53 -54
  246. package/dist/{chunk-MHBMTPW7.js → chunk-ERS5EVUZ.js} +9 -0
  247. package/dist/{chunk-JSTQ3AWB.js → chunk-FIAPHX37.js} +1 -1
  248. package/dist/{chunk-2YPZKGAG.js → chunk-FTMXDOR6.js} +1 -1
  249. package/dist/{chunk-72GHBOL2.js → chunk-GZKSBLQL.js} +1 -1
  250. package/dist/{chunk-K6XAPGML.js → chunk-H7Y5CKTM.js} +1 -1
  251. package/dist/{chunk-HD4IBGLA.js → chunk-N5G5QMS3.js} +24 -1
  252. package/dist/{chunk-LD3DKUK5.js → chunk-NLVUVUGD.js} +1 -1
  253. package/dist/{chunk-3KOLLWWE.js → chunk-O5OJVPL6.js} +26 -211
  254. package/dist/{chunk-NKDM3FMH.js → chunk-OD3S2NHN.js} +1 -1
  255. package/dist/{chunk-5VY23YK3.js → chunk-OSXBPAMK.js} +2 -2
  256. package/dist/{chunk-MACVXDZK.js → chunk-OXLLOSSR.js} +45 -47
  257. package/dist/{chunk-GNGELAXY.js → chunk-RCWZBSK5.js} +2 -2
  258. package/dist/{chunk-PSNN4LWX.js → chunk-S2FXOWOR.js} +3 -3
  259. package/dist/{chunk-VUCPTQ6G.js → chunk-SD3SQOZ2.js} +1 -1
  260. package/dist/{chunk-7PZWR4LI.js → chunk-TPOTOBR7.js} +9 -9
  261. package/dist/{chunk-RZSUJBZZ.js → chunk-XKECDXJS.js} +452 -353
  262. package/dist/{chunk-VRFZWGMS.js → chunk-XYLGHKG6.js} +5 -1
  263. package/dist/{chunk-6N4R6FVX.js → chunk-YBJ262QL.js} +1 -1
  264. package/dist/{chunk-2VU4MFM3.js → chunk-YPYGXRDR.js} +7 -7
  265. package/dist/{chunk-Q6AB7W5Z.js → chunk-YQ6KC6TE.js} +1 -1
  266. package/dist/{chunk-7KQSUZVG.js → chunk-YZD2MRNQ.js} +1528 -1010
  267. package/dist/ci-workflow-EQZFVX3P.js +8 -0
  268. package/dist/{create-skill-WPXHSLX2.js → create-skill-XSWHMSM5.js} +2 -2
  269. package/dist/{dist-M6BQODWC.js → dist-B26DFXMP.js} +573 -480
  270. package/dist/{dist-L7LAAQAS.js → dist-DZ63LLUD.js} +1 -1
  271. package/dist/{dist-WF4C7A4A.js → dist-HWXF2C3R.js} +18 -2
  272. package/dist/{dist-D4RYGUZE.js → dist-USY2C5JL.js} +3 -1
  273. package/dist/{docs-BPYCN2DR.js → docs-7ECGYMAV.js} +5 -3
  274. package/dist/engine-EG4EH4IX.js +8 -0
  275. package/dist/{entropy-4VDVV5CR.js → entropy-5USWKLVS.js} +3 -3
  276. package/dist/{feedback-63QB5RCA.js → feedback-UTBXZZHF.js} +1 -1
  277. package/dist/{generate-agent-definitions-QABOJG56.js → generate-agent-definitions-3PM5EU7V.js} +5 -5
  278. package/dist/{glob-helper-5OHBUQAI.js → glob-helper-R5FXNUPS.js} +1 -1
  279. package/dist/{graph-loader-KO4GJ5N2.js → graph-loader-2M2HXDQI.js} +1 -1
  280. package/dist/index.d.ts +183 -17
  281. package/dist/index.js +32 -30
  282. package/dist/loader-ZPALXIVR.js +10 -0
  283. package/dist/mcp-362EZHF4.js +35 -0
  284. package/dist/{performance-26BH47O4.js → performance-OQAFMJUD.js} +3 -3
  285. package/dist/{review-pipeline-GHR3WFBI.js → review-pipeline-C4GCFVGP.js} +1 -1
  286. package/dist/runtime-7YLVK453.js +9 -0
  287. package/dist/{security-UQFUZXEN.js → security-PZOX7AQS.js} +1 -1
  288. package/dist/skill-executor-XZLYZYAK.js +8 -0
  289. package/dist/templates/axum/Cargo.toml.hbs +8 -0
  290. package/dist/templates/axum/src/main.rs +12 -0
  291. package/dist/templates/axum/template.json +16 -0
  292. package/dist/templates/django/manage.py.hbs +19 -0
  293. package/dist/templates/django/requirements.txt.hbs +1 -0
  294. package/dist/templates/django/src/settings.py.hbs +44 -0
  295. package/dist/templates/django/src/urls.py +6 -0
  296. package/dist/templates/django/src/wsgi.py.hbs +9 -0
  297. package/dist/templates/django/template.json +21 -0
  298. package/dist/templates/express/package.json.hbs +15 -0
  299. package/dist/templates/express/src/app.ts +12 -0
  300. package/dist/templates/express/src/lib/.gitkeep +0 -0
  301. package/dist/templates/express/template.json +16 -0
  302. package/dist/templates/fastapi/requirements.txt.hbs +2 -0
  303. package/dist/templates/fastapi/src/main.py +8 -0
  304. package/dist/templates/fastapi/template.json +20 -0
  305. package/dist/templates/gin/go.mod.hbs +5 -0
  306. package/dist/templates/gin/main.go +15 -0
  307. package/dist/templates/gin/template.json +19 -0
  308. package/dist/templates/go-base/.golangci.yml +16 -0
  309. package/dist/templates/go-base/AGENTS.md.hbs +35 -0
  310. package/dist/templates/go-base/go.mod.hbs +3 -0
  311. package/dist/templates/go-base/harness.config.json.hbs +17 -0
  312. package/dist/templates/go-base/main.go +7 -0
  313. package/dist/templates/go-base/template.json +14 -0
  314. package/dist/templates/java-base/AGENTS.md.hbs +35 -0
  315. package/dist/templates/java-base/checkstyle.xml +20 -0
  316. package/dist/templates/java-base/harness.config.json.hbs +16 -0
  317. package/dist/templates/java-base/pom.xml.hbs +39 -0
  318. package/dist/templates/java-base/src/main/java/App.java.hbs +5 -0
  319. package/dist/templates/java-base/template.json +13 -0
  320. package/dist/templates/nestjs/nest-cli.json +5 -0
  321. package/dist/templates/nestjs/package.json.hbs +18 -0
  322. package/dist/templates/nestjs/src/app.module.ts +8 -0
  323. package/dist/templates/nestjs/src/lib/.gitkeep +0 -0
  324. package/dist/templates/nestjs/src/main.ts +11 -0
  325. package/dist/templates/nestjs/template.json +16 -0
  326. package/dist/templates/nextjs/template.json +15 -1
  327. package/dist/templates/python-base/.python-version +1 -0
  328. package/dist/templates/python-base/AGENTS.md.hbs +32 -0
  329. package/dist/templates/python-base/harness.config.json.hbs +16 -0
  330. package/dist/templates/python-base/pyproject.toml.hbs +18 -0
  331. package/dist/templates/python-base/ruff.toml +5 -0
  332. package/dist/templates/python-base/src/__init__.py +0 -0
  333. package/dist/templates/python-base/template.json +13 -0
  334. package/dist/templates/react-vite/index.html +12 -0
  335. package/dist/templates/react-vite/package.json.hbs +18 -0
  336. package/dist/templates/react-vite/src/App.tsx +7 -0
  337. package/dist/templates/react-vite/src/lib/.gitkeep +0 -0
  338. package/dist/templates/react-vite/src/main.tsx +9 -0
  339. package/dist/templates/react-vite/template.json +19 -0
  340. package/dist/templates/react-vite/vite.config.ts +6 -0
  341. package/dist/templates/rust-base/AGENTS.md.hbs +35 -0
  342. package/dist/templates/rust-base/Cargo.toml.hbs +6 -0
  343. package/dist/templates/rust-base/clippy.toml +2 -0
  344. package/dist/templates/rust-base/harness.config.json.hbs +17 -0
  345. package/dist/templates/rust-base/src/main.rs +3 -0
  346. package/dist/templates/rust-base/template.json +14 -0
  347. package/dist/templates/spring-boot/pom.xml.hbs +50 -0
  348. package/dist/templates/spring-boot/src/main/java/Application.java.hbs +19 -0
  349. package/dist/templates/spring-boot/template.json +15 -0
  350. package/dist/templates/vue/index.html +12 -0
  351. package/dist/templates/vue/package.json.hbs +16 -0
  352. package/dist/templates/vue/src/App.vue +7 -0
  353. package/dist/templates/vue/src/lib/.gitkeep +0 -0
  354. package/dist/templates/vue/src/main.ts +4 -0
  355. package/dist/templates/vue/template.json +19 -0
  356. package/dist/templates/vue/vite.config.ts +6 -0
  357. package/dist/{validate-N7QJOKFZ.js → validate-FD3Z6VJD.js} +4 -4
  358. package/dist/validate-cross-check-WNJM6H2D.js +8 -0
  359. package/package.json +6 -6
  360. package/dist/agents-md-P2RHSUV7.js +0 -8
  361. package/dist/ci-workflow-4NYBUG6R.js +0 -8
  362. package/dist/engine-LXLIWQQ3.js +0 -8
  363. package/dist/loader-Z2IT7QX3.js +0 -10
  364. package/dist/mcp-KQHEL5IF.js +0 -34
  365. package/dist/runtime-PDWD7UIK.js +0 -9
  366. package/dist/skill-executor-RG45LUO5.js +0 -8
  367. package/dist/validate-cross-check-EDQ5QGTM.js +0 -8
@@ -0,0 +1,79 @@
1
+ name: harness-load-testing
2
+ version: "1.0.0"
3
+ description: Stress testing, capacity planning, and performance benchmarking with k6/Artillery/Gatling
4
+ cognitive_mode: meticulous-verifier
5
+ triggers:
6
+ - manual
7
+ - on_milestone
8
+ platforms:
9
+ - claude-code
10
+ - gemini-cli
11
+ tools:
12
+ - Bash
13
+ - Read
14
+ - Write
15
+ - Edit
16
+ - Glob
17
+ - Grep
18
+ - emit_interaction
19
+ cli:
20
+ command: harness skill run harness-load-testing
21
+ args:
22
+ - name: path
23
+ description: Project root path
24
+ required: false
25
+ - name: tool
26
+ description: "Load testing tool: k6, artillery, or gatling. Auto-detected when omitted."
27
+ required: false
28
+ - name: profile
29
+ description: "Test profile: smoke, load, stress, spike, or soak. Defaults to load."
30
+ required: false
31
+ mcp:
32
+ tool: run_skill
33
+ input:
34
+ skill: harness-load-testing
35
+ path: string
36
+ type: rigid
37
+ tier: 3
38
+ internal: false
39
+ keywords:
40
+ - load test
41
+ - stress test
42
+ - performance test
43
+ - k6
44
+ - Artillery
45
+ - Gatling
46
+ - JMeter
47
+ - throughput
48
+ - latency
49
+ - p99
50
+ - capacity planning
51
+ - benchmark
52
+ - soak test
53
+ - spike test
54
+ stack_signals:
55
+ - "load-tests/"
56
+ - "k6/"
57
+ - "artillery/"
58
+ - "*.k6.js"
59
+ - "artillery.yml"
60
+ - "gatling/"
61
+ - "src/**/load-test*"
62
+ - "perf/"
63
+ phases:
64
+ - name: detect
65
+ description: Identify existing load tests, tooling, endpoints, and performance baselines
66
+ required: true
67
+ - name: design
68
+ description: Define test scenarios, virtual user profiles, thresholds, and ramp-up stages
69
+ required: true
70
+ - name: execute
71
+ description: Run load tests and collect metrics for throughput, latency, and error rates
72
+ required: true
73
+ - name: analyze
74
+ description: Compare results against thresholds, identify bottlenecks, and produce capacity report
75
+ required: true
76
+ state:
77
+ persistent: false
78
+ files: []
79
+ depends_on: []
@@ -0,0 +1,341 @@
1
+ # Harness ML Ops
2
+
3
+ > Advise on ML pipeline management, experiment tracking hygiene, model serving patterns, and prompt evaluation frameworks. Audits reproducibility, model versioning, and deployment readiness across MLflow, Weights and Biases, SageMaker, and Vertex AI.
4
+
5
+ ## When to Use
6
+
7
+ - When setting up or auditing ML infrastructure (experiment tracking, model registry, serving)
8
+ - When adding a new model or prompt pipeline and need deployment pattern guidance
9
+ - When experiment tracking is inconsistent and reproducibility is at risk
10
+ - NOT for data pipeline ETL patterns (use harness-data-pipeline)
11
+ - NOT for SQL query optimization in feature engineering (use harness-sql-review)
12
+ - NOT for infrastructure provisioning of GPU instances (use harness-infrastructure-as-code)
13
+
14
+ ## Process
15
+
16
+ ### Phase 1: DETECT -- Identify ML Stack and Artifacts
17
+
18
+ 1. **Resolve project root.** Use provided path or cwd.
19
+
20
+ 2. **Detect ML frameworks and tools.** Scan for:
21
+ - **Experiment tracking:** `mlflow/`, `mlruns/`, `wandb/`, `mlflow.log_param`, `wandb.init`, `wandb.log`
22
+ - **Model frameworks:** `torch`, `tensorflow`, `sklearn`, `xgboost`, `transformers`, `langchain`, `openai`
23
+ - **Serving:** `Dockerfile` with model references, `serve.py`, `predict.py`, `app.py` with `/predict` routes, BentoML, TorchServe, TensorFlow Serving configs
24
+ - **Evaluation:** `evals/`, `prompts/`, `eval_config.yaml`, files with `evaluate`, `benchmark`, `metrics`
25
+ - **Notebooks:** `*.ipynb` files, `notebooks/` directory
26
+ - **Feature stores:** Feast config, Tecton, Hopsworks references
27
+
28
+ 3. **Inventory model artifacts.** Locate and catalog:
29
+ - Trained model files: `*.pt`, `*.pth`, `*.h5`, `*.pkl`, `*.onnx`, `*.safetensors`
30
+ - Model configuration: `config.json`, `model_config.yaml`, hyperparameter files
31
+ - Prompt templates: `prompts/`, `*.prompt`, template strings with `{variable}` interpolation
32
+ - Evaluation datasets: `evals/`, `test_data/`, golden sets, benchmark datasets
33
+
34
+ 4. **Detect model registry usage.** Check for:
35
+ - MLflow Model Registry: `mlflow.register_model`, model stages (Staging, Production, Archived)
36
+ - Weights and Biases Artifacts: `wandb.Artifact`, model versioning
37
+ - SageMaker Model Registry: `sagemaker.register_model_step`
38
+ - Hugging Face Hub: `push_to_hub`, `from_pretrained` with custom models
39
+ - Custom registry: version-tagged model directories, model metadata files
40
+
41
+ 5. **Map the ML lifecycle.** Identify which stages are present:
42
+ - Data preparation and feature engineering
43
+ - Training / fine-tuning
44
+ - Experiment tracking and comparison
45
+ - Model evaluation and validation
46
+ - Model registration and versioning
47
+ - Deployment and serving
48
+ - Monitoring and retraining triggers
49
+
50
+ 6. **Report detection summary:**
51
+ ```
52
+ ML Stack Detection:
53
+ Frameworks: PyTorch 2.1, Hugging Face Transformers 4.36
54
+ Tracking: MLflow 2.10 (local tracking server)
55
+ Serving: FastAPI + TorchServe
56
+ Models: 3 fine-tuned transformers, 1 XGBoost classifier
57
+ Prompts: 12 templates in prompts/ (LangChain format)
58
+ Evaluation: 2 eval configs, 1 golden dataset
59
+ Registry: MLflow (2 models registered, 1 in Production stage)
60
+ Missing stages: monitoring, automated retraining
61
+ ```
62
+
63
+ ---
64
+
65
+ ### Phase 2: ANALYZE -- Evaluate ML Practices
66
+
67
+ 1. **Check experiment tracking hygiene.** Evaluate:
68
+ - Are all training runs logged? (check for `mlflow.autolog()` or manual `log_param`/`log_metric`)
69
+ - Are hyperparameters fully captured? (learning rate, batch size, epochs, model architecture)
70
+ - Are data versions tracked? (dataset hash, split ratios, preprocessing version)
71
+ - Are environment details logged? (Python version, package versions, GPU type)
72
+ - Are artifacts (model weights, configs) stored with experiments?
73
+ - Flag: training scripts that produce models without experiment logging
74
+
75
+ 2. **Check reproducibility.** Verify:
76
+ - Are random seeds set and logged? (`torch.manual_seed`, `np.random.seed`, `random.seed`)
77
+ - Are data loading pipelines deterministic? (shuffling with seed, consistent splits)
78
+ - Are package versions pinned? (`requirements.txt` with versions, `poetry.lock`, `pip freeze`)
79
+ - Can an experiment be re-run from its logged parameters to reproduce the result?
80
+ - Are there notebooks with unexecuted cells or out-of-order execution?
81
+
82
+ 3. **Check model serving patterns.** Evaluate:
83
+ - Is the model loaded once at startup or per-request? (per-request loading is an error)
84
+ - Are inference inputs validated? (schema, type checking, bounds checking)
85
+ - Is batching implemented for throughput? (batch inference for non-real-time use cases)
86
+ - Is there a health check endpoint? (`/health`, `/ready`)
87
+ - Is model versioning reflected in the serving API? (A/B testing, canary deployment)
88
+ - Are timeout and resource limits configured?
89
+
90
+ 4. **Check prompt management (for LLM applications).** Evaluate:
91
+ - Are prompts version-controlled? (not hardcoded in application code)
92
+ - Are prompt templates parameterized? (using `{variable}` not string concatenation)
93
+ - Is there prompt-response logging for debugging?
94
+ - Are there guardrails for prompt injection? (input sanitization, output validation)
95
+ - Are token costs estimated and budgeted?
96
+
97
+ 5. **Check evaluation coverage.** Evaluate:
98
+ - Are there evaluation datasets with known-good outputs (golden sets)?
99
+ - Are multiple metrics tracked? (not just accuracy -- precision, recall, F1, latency)
100
+ - For LLM applications: are there automated eval suites? (factual accuracy, hallucination detection, safety)
101
+ - Is there regression testing? (new model vs production model comparison)
102
+ - Are evaluation results versioned alongside model versions?
103
+
104
+ 6. **Classify findings by severity:**
105
+ - **Error:** Model loaded per-request, no experiment tracking on training, no evaluation before deployment
106
+ - **Warning:** Missing reproducibility controls, incomplete metric logging, no golden set
107
+ - **Info:** Missing monitoring, suboptimal batching, notebook ordering issues
108
+
109
+ ---
110
+
111
+ ### Phase 3: DESIGN -- Recommend Improvements
112
+
113
+ 1. **Recommend experiment tracking setup.** Based on the detected framework:
114
+ - **MLflow not configured:** Provide `mlflow.set_tracking_uri()` and `mlflow.autolog()` setup
115
+ - **W&B not configured:** Provide `wandb.init(project=...)` and `wandb.config` setup
116
+ - **Tracking present but incomplete:** List specific parameters and metrics to add
117
+
118
+ 2. **Recommend model registry workflow.** Design a versioning and promotion flow:
119
+
120
+ ```
121
+ Training -> Candidate (auto-registered)
122
+ -> Evaluation gate (metrics threshold)
123
+ -> Staging (shadow deployment)
124
+ -> Production (canary rollout)
125
+ -> Archived (previous version)
126
+ ```
127
+
128
+ Adapt to the project's scale: small projects may skip staging/canary.
129
+
130
+ 3. **Recommend evaluation framework.** Based on model type:
131
+ - **Classification:** confusion matrix, precision/recall by class, calibration curve
132
+ - **Regression:** MAE, RMSE, residual distribution
133
+ - **LLM/generative:** factual accuracy, relevance scoring, safety checks, latency per token
134
+ - **Recommendation:** hit rate, NDCG, coverage, diversity
135
+ Provide example evaluation config for the detected framework.
136
+
137
+ 4. **Recommend prompt management patterns.** For LLM applications:
138
+ - Separate prompt templates from application code into `prompts/` directory
139
+ - Version prompts alongside evaluation results
140
+ - Implement A/B testing for prompt variants
141
+ - Add guardrails: input length limits, output validation, PII filtering
142
+
143
+ 5. **Recommend monitoring and retraining triggers.** Design:
144
+ - Data drift detection (input distribution monitoring)
145
+ - Model performance degradation alerts (metric thresholds)
146
+ - Automated retraining pipeline triggers
147
+ - Cost monitoring for API-based models (token usage, request volume)
148
+
149
+ 6. **Provide implementation templates.** Generate starter code for:
150
+ - Experiment tracking wrapper (standardized logging across the project)
151
+ - Model serving boilerplate (FastAPI with health check, input validation, batching)
152
+ - Evaluation harness (test runner with golden set comparison)
153
+
154
+ ---
155
+
156
+ ### Phase 4: VALIDATE -- Verify Deployment Readiness
157
+
158
+ 1. **Check deployment checklist.** Verify each item:
159
+ - [ ] Model is registered in the model registry with a version tag
160
+ - [ ] Evaluation metrics meet the defined threshold (or threshold is documented)
161
+ - [ ] Serving endpoint has health check and input validation
162
+ - [ ] Model is not loaded per-request
163
+ - [ ] Resource requirements are documented (memory, GPU, latency budget)
164
+ - [ ] Rollback procedure exists (previous model version can be restored)
165
+ - [ ] Monitoring is configured (or explicitly deferred with rationale)
166
+
167
+ 2. **Check for common deployment pitfalls:**
168
+ - Model file is committed to git (should be in artifact store)
169
+ - API keys or credentials in model config (should use secrets manager)
170
+ - Hardcoded file paths that differ between dev and production
171
+ - Missing CORS or authentication on inference endpoints
172
+ - No rate limiting on public-facing inference APIs
173
+
174
+ 3. **Validate prompt safety (for LLM applications).** Check:
175
+ - Are system prompts protected from user override?
176
+ - Is there input sanitization before prompt injection?
177
+ - Are output filters in place for PII, toxicity, and hallucination?
178
+ - Are token limits enforced per request?
179
+
180
+ 4. **Output ML readiness report:**
181
+
182
+ ```
183
+ ML Ops Report: [READY/NEEDS_ATTENTION/NOT_READY]
184
+ Stack: PyTorch + MLflow + FastAPI
185
+ Models: 3 detected, 1 registered in Production
186
+ Experiment tracking: 85% coverage (2 training scripts missing logging)
187
+ Reproducibility: PARTIAL (seeds set, packages not pinned)
188
+ Evaluation: 1/3 models have golden set evaluation
189
+ Serving: health check present, input validation missing
190
+
191
+ ERRORS:
192
+ [ML-ERR-001] src/serve.py:12
193
+ Model loaded inside request handler -- move to startup event
194
+ [ML-ERR-002] training/train_classifier.py
195
+ No experiment tracking -- results are not reproducible
196
+
197
+ WARNINGS:
198
+ [ML-WARN-001] requirements.txt
199
+ Package versions not pinned (torch, transformers)
200
+ [ML-WARN-002] evals/
201
+ Only accuracy metric tracked -- add precision, recall, F1
202
+
203
+ RECOMMENDATIONS:
204
+ 1. Add mlflow.autolog() to train_classifier.py
205
+ 2. Pin package versions in requirements.txt
206
+ 3. Move model loading to FastAPI lifespan event
207
+ 4. Add input validation schema to /predict endpoint
208
+ ```
209
+
210
+ 5. **Verify report accuracy.** Cross-check:
211
+ - Do referenced files exist at the stated paths?
212
+ - Do error findings match actual code patterns?
213
+ - Are recommendations actionable with the detected framework?
214
+
215
+ ---
216
+
217
+ ## Harness Integration
218
+
219
+ - **`harness skill run harness-ml-ops`** -- Primary command for ML operations auditing.
220
+ - **`harness validate`** -- Run after applying recommendations to verify project health.
221
+ - **`Glob`** -- Used to locate model artifacts, experiment configs, notebooks, prompt templates, and evaluation datasets.
222
+ - **`Grep`** -- Used to find experiment logging calls, model loading patterns, and serving endpoint definitions.
223
+ - **`Read`** -- Used to read training scripts, serving code, evaluation configs, and model metadata.
224
+ - **`Write`** -- Used to generate experiment tracking wrappers, serving boilerplate, and evaluation harness templates.
225
+ - **`Bash`** -- Used to check MLflow tracking server status, validate model registry entries, and run lightweight eval checks.
226
+ - **`emit_interaction`** -- Used to present the readiness report and confirm recommendations before generating implementation templates.
227
+
228
+ ## Success Criteria
229
+
230
+ - ML stack is fully detected with framework versions and artifact locations
231
+ - Experiment tracking coverage is measured across all training scripts
232
+ - Reproducibility gaps are identified with specific remediation steps
233
+ - Model serving patterns are evaluated for correctness and performance
234
+ - Evaluation coverage is assessed with metric completeness
235
+ - Prompt management is audited for safety and version control (for LLM applications)
236
+ - Deployment readiness checklist produces a clear go/no-go assessment
237
+
238
+ ## Examples
239
+
240
+ ### Example: PyTorch Fine-Tuning Pipeline with MLflow
241
+
242
+ ```
243
+ Phase 1: DETECT
244
+ Frameworks: PyTorch 2.1, Hugging Face Transformers 4.36
245
+ Tracking: MLflow 2.10 (local, 47 runs logged)
246
+ Models: 2 fine-tuned BERT models in mlruns/
247
+ Notebooks: 3 in notebooks/ (exploration, training, evaluation)
248
+
249
+ Phase 2: ANALYZE
250
+ [ML-WARN-001] notebooks/training.ipynb
251
+ Cells executed out of order (cell 7 before cell 5) -- not reproducible
252
+ [ML-WARN-002] training/finetune.py
253
+ Random seed set for torch but not for numpy or python random
254
+ [ML-INFO-001] MLflow runs missing GPU type metadata
255
+
256
+ Phase 3: DESIGN
257
+ Recommended: Add np.random.seed() and random.seed() alongside torch.manual_seed()
258
+ Recommended: Add mlflow.log_param("gpu_type", torch.cuda.get_device_name())
259
+ Generated: training/experiment_wrapper.py (standardized logging)
260
+
261
+ Phase 4: VALIDATE
262
+ Deployment readiness: NEEDS_ATTENTION
263
+ Model registered: YES (bert-sentiment-v2 in Production)
264
+ Evaluation: golden set present with 500 examples
265
+ Missing: automated regression test comparing v2 vs v1
266
+ ```
267
+
268
+ ### Example: LangChain Application with OpenAI
269
+
270
+ ```
271
+ Phase 1: DETECT
272
+ Frameworks: LangChain 0.1, OpenAI API (GPT-4)
273
+ Prompts: 8 templates in src/prompts/ (hardcoded as Python strings)
274
+ Evaluation: none detected
275
+ Serving: FastAPI with /chat and /summarize endpoints
276
+
277
+ Phase 2: ANALYZE
278
+ [ML-ERR-001] src/prompts/summarize.py
279
+ Prompt template uses string concatenation with user input -- injection risk
280
+ [ML-ERR-002] src/api/chat.py
281
+ No token limit enforcement -- single request could consume entire budget
282
+ [ML-WARN-001] No evaluation framework -- model changes deployed without quality check
283
+ [ML-WARN-002] No prompt versioning -- changes to prompts are not tracked
284
+
285
+ Phase 3: DESIGN
286
+ Recommended: Move prompts to YAML files with version tags
287
+ Recommended: Implement promptfoo or custom eval harness with golden QA pairs
288
+ Recommended: Add token budget middleware (max 4096 tokens per request)
289
+ Recommended: Use LangChain PromptTemplate with input validation
290
+ Generated: evals/eval_config.yaml (promptfoo configuration)
291
+ Generated: src/middleware/token_budget.py (request token limiter)
292
+
293
+ Phase 4: VALIDATE
294
+ Deployment readiness: NOT_READY (2 errors, 2 warnings)
295
+ Critical: prompt injection risk and unbounded token usage must be fixed
296
+ ```
297
+
298
+ ### Example: Scikit-learn Classifier with W&B Tracking
299
+
300
+ ```
301
+ Phase 1: DETECT
302
+ Frameworks: scikit-learn 1.4, XGBoost 2.0
303
+ Tracking: Weights and Biases (23 runs, 3 sweeps)
304
+ Models: 1 XGBoost classifier (model.pkl in models/)
305
+ Serving: Flask app with /predict endpoint
306
+
307
+ Phase 2: ANALYZE
308
+ [ML-ERR-001] models/model.pkl committed to git (12MB)
309
+ Should be in W&B Artifacts or external storage
310
+ [ML-ERR-002] app.py:15
311
+ pickle.load(open("models/model.pkl")) on every request
312
+ [ML-WARN-001] training/train.py
313
+ Only accuracy logged -- imbalanced dataset needs precision/recall
314
+ [ML-INFO-001] W&B sweeps well-configured, good hyperparameter search
315
+
316
+ Phase 3: DESIGN
317
+ Recommended: Store model in W&B Artifacts, download at startup
318
+ Recommended: Load model once in Flask app factory, not per-request
319
+ Recommended: Add classification_report metrics to training
320
+ Generated: .gitignore addition for *.pkl
321
+ Generated: app.py refactor with model singleton
322
+
323
+ Phase 4: VALIDATE
324
+ Deployment readiness: NOT_READY (2 errors)
325
+ Critical: model in git and per-request loading must be fixed
326
+ After fixes: projected NEEDS_ATTENTION (missing precision/recall metrics)
327
+ ```
328
+
329
+ ## Gates
330
+
331
+ - **No deploying models without evaluation.** A model that has not been evaluated against a golden set or baseline cannot be promoted to production. This is always an error.
332
+ - **No per-request model loading.** Loading model weights on every inference request is a performance and reliability error. Models must be loaded at application startup.
333
+ - **No committing model files to git.** Binary model files (`.pkl`, `.pt`, `.h5`, `.onnx`) belong in artifact stores, not in version control. If detected, flag as error with migration path.
334
+ - **No hardcoded prompts with user input concatenation.** String concatenation for prompt construction is a prompt injection vulnerability. Must use parameterized templates.
335
+
336
+ ## Escalation
337
+
338
+ - **When experiment tracking requires infrastructure changes:** If MLflow tracking server or W&B workspace needs to be provisioned, flag it: "Experiment tracking requires an MLflow server. This is an infrastructure task outside this skill's scope -- coordinate with the platform team."
339
+ - **When model performance is below threshold but no alternative exists:** Do not approve deployment of an underperforming model. Report: "Model accuracy is 72% against a 80% threshold. Options: (A) collect more training data, (B) try a different architecture, (C) revise the threshold with stakeholder approval."
340
+ - **When GPU/memory requirements exceed available infrastructure:** Flag the resource gap: "This model requires 16GB GPU memory for serving. Current infrastructure provides 8GB. Either quantize the model or provision larger instances."
341
+ - **When prompt evaluation reveals safety concerns:** If prompts can generate harmful, biased, or factually incorrect content in evaluation, escalate immediately: "Evaluation found the model generates fabricated citations in 12% of test cases. This must be addressed with output validation before deployment."
@@ -0,0 +1,79 @@
1
+ name: harness-ml-ops
2
+ version: "1.0.0"
3
+ description: Model serving patterns, experiment tracking, prompt evaluation, and ML pipeline management
4
+ cognitive_mode: advisory-guide
5
+ triggers:
6
+ - manual
7
+ - on_new_feature
8
+ platforms:
9
+ - claude-code
10
+ - gemini-cli
11
+ tools:
12
+ - Bash
13
+ - Read
14
+ - Write
15
+ - Edit
16
+ - Glob
17
+ - Grep
18
+ - emit_interaction
19
+ cli:
20
+ command: harness skill run harness-ml-ops
21
+ args:
22
+ - name: path
23
+ description: Project root path
24
+ required: false
25
+ - name: focus
26
+ description: "Audit focus: serving, tracking, evaluation, pipeline, all. Defaults to all."
27
+ required: false
28
+ - name: framework
29
+ description: "ML framework: mlflow, wandb, sagemaker, vertex-ai. Auto-detected when omitted."
30
+ required: false
31
+ mcp:
32
+ tool: run_skill
33
+ input:
34
+ skill: harness-ml-ops
35
+ path: string
36
+ type: rigid
37
+ tier: 3
38
+ internal: false
39
+ keywords:
40
+ - MLOps
41
+ - model serving
42
+ - experiment tracking
43
+ - MLflow
44
+ - Weights and Biases
45
+ - model registry
46
+ - feature store
47
+ - prompt evaluation
48
+ - LLM
49
+ - model deployment
50
+ - inference
51
+ - training pipeline
52
+ stack_signals:
53
+ - "models/"
54
+ - "experiments/"
55
+ - "mlflow/"
56
+ - "wandb/"
57
+ - "src/**/ml/**"
58
+ - "src/**/models/**"
59
+ - "notebooks/"
60
+ - "*.ipynb"
61
+ - "prompts/"
62
+ - "evals/"
63
+ phases:
64
+ - name: detect
65
+ description: Identify ML frameworks, model artifacts, experiment configs, and serving infrastructure
66
+ required: true
67
+ - name: analyze
68
+ description: Evaluate experiment tracking, model versioning, reproducibility, and serving patterns
69
+ required: true
70
+ - name: design
71
+ description: Recommend pipeline improvements, evaluation frameworks, and deployment strategies
72
+ required: true
73
+ - name: validate
74
+ description: Verify reproducibility, model registry hygiene, and evaluation coverage
75
+ required: true
76
+ state:
77
+ persistent: false
78
+ files: []
79
+ depends_on: []