aigroup-workflow 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (914) hide show
  1. package/.codex/AGENTS.md +1 -1
  2. package/CLAUDE.md +1 -4
  3. package/README.md +333 -333
  4. package/cli/commands/init.mjs +20 -6
  5. package/cli/utils/scaffold.mjs +39 -9
  6. package/docs/red-flags.md +1 -1
  7. package/docs/rules/coding-style.md +21 -1
  8. package/docs/rules/entropy.md +1 -1
  9. package/docs/rules/performance.md +1 -1
  10. package/docs/workflow-pipeline.md +1 -0
  11. package/manifests/install-modules.json +223 -133
  12. package/package.json +39 -39
  13. package/scripts/orchestration/lib/orchestrator.cjs +34 -0
  14. package/scripts/orchestration/session.cjs +24 -1
  15. package/skills/ai-ml/fine-tuning-expert/SKILL.md +162 -0
  16. package/skills/ai-ml/fine-tuning-expert/references/dataset-preparation.md +540 -0
  17. package/skills/ai-ml/fine-tuning-expert/references/deployment-optimization.md +673 -0
  18. package/skills/ai-ml/fine-tuning-expert/references/evaluation-metrics.md +597 -0
  19. package/skills/ai-ml/fine-tuning-expert/references/hyperparameter-tuning.md +565 -0
  20. package/skills/ai-ml/fine-tuning-expert/references/lora-peft.md +347 -0
  21. package/skills/ai-ml/ml-pipeline/SKILL.md +159 -0
  22. package/skills/ai-ml/ml-pipeline/references/experiment-tracking.md +833 -0
  23. package/skills/ai-ml/ml-pipeline/references/feature-engineering.md +631 -0
  24. package/skills/ai-ml/ml-pipeline/references/model-validation.md +978 -0
  25. package/skills/ai-ml/ml-pipeline/references/pipeline-orchestration.md +907 -0
  26. package/skills/ai-ml/ml-pipeline/references/training-pipelines.md +782 -0
  27. package/skills/ai-ml/rag-architect/SKILL.md +194 -0
  28. package/skills/ai-ml/rag-architect/references/chunking-strategies.md +878 -0
  29. package/skills/ai-ml/rag-architect/references/embedding-models.md +561 -0
  30. package/skills/ai-ml/rag-architect/references/rag-evaluation.md +833 -0
  31. package/skills/ai-ml/rag-architect/references/retrieval-optimization.md +795 -0
  32. package/skills/ai-ml/rag-architect/references/vector-databases.md +589 -0
  33. package/skills/ai-ml/spark-engineer/SKILL.md +148 -0
  34. package/skills/ai-ml/spark-engineer/references/partitioning-caching.md +543 -0
  35. package/skills/ai-ml/spark-engineer/references/performance-tuning.md +544 -0
  36. package/skills/ai-ml/spark-engineer/references/rdd-operations.md +599 -0
  37. package/skills/ai-ml/spark-engineer/references/spark-sql-dataframes.md +474 -0
  38. package/skills/ai-ml/spark-engineer/references/streaming-patterns.md +786 -0
  39. package/skills/backend/api-designer/SKILL.md +217 -0
  40. package/skills/backend/api-designer/references/error-handling.md +541 -0
  41. package/skills/backend/api-designer/references/openapi.md +824 -0
  42. package/skills/backend/api-designer/references/pagination.md +494 -0
  43. package/skills/backend/api-designer/references/rest-patterns.md +335 -0
  44. package/skills/backend/api-designer/references/versioning.md +391 -0
  45. package/skills/backend/architecture-designer/SKILL.md +117 -0
  46. package/skills/backend/architecture-designer/references/adr-template.md +116 -0
  47. package/skills/backend/architecture-designer/references/architecture-patterns.md +111 -0
  48. package/skills/backend/architecture-designer/references/database-selection.md +102 -0
  49. package/skills/backend/architecture-designer/references/nfr-checklist.md +112 -0
  50. package/skills/backend/architecture-designer/references/system-design.md +100 -0
  51. package/skills/backend/code-documenter/SKILL.md +147 -0
  52. package/skills/backend/code-documenter/references/api-docs-fastapi-django.md +166 -0
  53. package/skills/backend/code-documenter/references/api-docs-nestjs-express.md +220 -0
  54. package/skills/backend/code-documenter/references/coverage-reports.md +125 -0
  55. package/skills/backend/code-documenter/references/documentation-systems.md +333 -0
  56. package/skills/backend/code-documenter/references/interactive-api-docs.md +531 -0
  57. package/skills/backend/code-documenter/references/python-docstrings.md +121 -0
  58. package/skills/backend/code-documenter/references/typescript-jsdoc.md +145 -0
  59. package/skills/backend/code-documenter/references/user-guides-tutorials.md +530 -0
  60. package/skills/backend/debugging-wizard/SKILL.md +105 -0
  61. package/skills/backend/debugging-wizard/references/common-patterns.md +132 -0
  62. package/skills/backend/debugging-wizard/references/debugging-tools.md +140 -0
  63. package/skills/backend/debugging-wizard/references/quick-fixes.md +177 -0
  64. package/skills/backend/debugging-wizard/references/strategies.md +142 -0
  65. package/skills/backend/debugging-wizard/references/systematic-debugging.md +367 -0
  66. package/skills/backend/feature-forge/SKILL.md +98 -0
  67. package/skills/backend/feature-forge/references/acceptance-criteria.md +104 -0
  68. package/skills/backend/feature-forge/references/ears-syntax.md +99 -0
  69. package/skills/backend/feature-forge/references/interview-questions.md +150 -0
  70. package/skills/backend/feature-forge/references/pre-discovery-subagents.md +54 -0
  71. package/skills/backend/feature-forge/references/specification-template.md +103 -0
  72. package/skills/backend/fullstack-guardian/SKILL.md +105 -0
  73. package/skills/backend/fullstack-guardian/references/api-design-standards.md +307 -0
  74. package/skills/backend/fullstack-guardian/references/architecture-decisions.md +350 -0
  75. package/skills/backend/fullstack-guardian/references/backend-patterns.md +237 -0
  76. package/skills/backend/fullstack-guardian/references/common-patterns.md +134 -0
  77. package/skills/backend/fullstack-guardian/references/deliverables-checklist.md +354 -0
  78. package/skills/backend/fullstack-guardian/references/design-template.md +91 -0
  79. package/skills/backend/fullstack-guardian/references/error-handling.md +135 -0
  80. package/skills/backend/fullstack-guardian/references/frontend-patterns.md +340 -0
  81. package/skills/backend/fullstack-guardian/references/integration-patterns.md +333 -0
  82. package/skills/backend/fullstack-guardian/references/security-checklist.md +106 -0
  83. package/skills/backend/graphql-architect/SKILL.md +146 -0
  84. package/skills/backend/graphql-architect/references/federation.md +418 -0
  85. package/skills/backend/graphql-architect/references/migration-from-rest.md +1141 -0
  86. package/skills/backend/graphql-architect/references/resolvers.md +425 -0
  87. package/skills/backend/graphql-architect/references/schema-design.md +393 -0
  88. package/skills/backend/graphql-architect/references/security.md +569 -0
  89. package/skills/backend/graphql-architect/references/subscriptions.md +510 -0
  90. package/skills/backend/legacy-modernizer/SKILL.md +137 -0
  91. package/skills/backend/legacy-modernizer/references/legacy-testing.md +381 -0
  92. package/skills/backend/legacy-modernizer/references/migration-strategies.md +423 -0
  93. package/skills/backend/legacy-modernizer/references/refactoring-patterns.md +395 -0
  94. package/skills/backend/legacy-modernizer/references/strangler-fig-pattern.md +281 -0
  95. package/skills/backend/legacy-modernizer/references/system-assessment.md +487 -0
  96. package/skills/backend/microservices-architect/SKILL.md +164 -0
  97. package/skills/backend/microservices-architect/references/communication.md +499 -0
  98. package/skills/backend/microservices-architect/references/data.md +721 -0
  99. package/skills/backend/microservices-architect/references/decomposition.md +344 -0
  100. package/skills/backend/microservices-architect/references/observability.md +805 -0
  101. package/skills/backend/microservices-architect/references/patterns.md +603 -0
  102. package/skills/database/database-optimizer/SKILL.md +147 -0
  103. package/skills/database/database-optimizer/references/index-strategies.md +331 -0
  104. package/skills/database/database-optimizer/references/monitoring-analysis.md +501 -0
  105. package/skills/database/database-optimizer/references/mysql-tuning.md +452 -0
  106. package/skills/database/database-optimizer/references/postgresql-tuning.md +413 -0
  107. package/skills/database/database-optimizer/references/query-optimization.md +251 -0
  108. package/skills/database/postgres-pro/SKILL.md +152 -0
  109. package/skills/database/postgres-pro/references/extensions.md +404 -0
  110. package/skills/database/postgres-pro/references/jsonb.md +321 -0
  111. package/skills/database/postgres-pro/references/maintenance.md +481 -0
  112. package/skills/database/postgres-pro/references/performance.md +265 -0
  113. package/skills/database/postgres-pro/references/replication.md +446 -0
  114. package/skills/database/sql-pro/SKILL.md +129 -0
  115. package/skills/database/sql-pro/references/database-design.md +402 -0
  116. package/skills/database/sql-pro/references/dialect-differences.md +419 -0
  117. package/skills/database/sql-pro/references/optimization.md +384 -0
  118. package/skills/database/sql-pro/references/query-patterns.md +285 -0
  119. package/skills/database/sql-pro/references/window-functions.md +328 -0
  120. package/skills/dotnet/csharp-developer/SKILL.md +125 -0
  121. package/skills/dotnet/csharp-developer/references/aspnet-core.md +394 -0
  122. package/skills/dotnet/csharp-developer/references/blazor.md +553 -0
  123. package/skills/dotnet/csharp-developer/references/entity-framework.md +409 -0
  124. package/skills/dotnet/csharp-developer/references/modern-csharp.md +248 -0
  125. package/skills/dotnet/csharp-developer/references/performance.md +498 -0
  126. package/skills/dotnet/dotnet-core-expert/SKILL.md +138 -0
  127. package/skills/dotnet/dotnet-core-expert/references/authentication.md +546 -0
  128. package/skills/dotnet/dotnet-core-expert/references/clean-architecture.md +455 -0
  129. package/skills/dotnet/dotnet-core-expert/references/cloud-native.md +548 -0
  130. package/skills/dotnet/dotnet-core-expert/references/entity-framework.md +440 -0
  131. package/skills/dotnet/dotnet-core-expert/references/minimal-apis.md +319 -0
  132. package/skills/frontend/angular-architect/SKILL.md +152 -0
  133. package/skills/frontend/angular-architect/references/components.md +297 -0
  134. package/skills/frontend/angular-architect/references/ngrx.md +401 -0
  135. package/skills/frontend/angular-architect/references/routing.md +361 -0
  136. package/skills/frontend/angular-architect/references/rxjs.md +319 -0
  137. package/skills/frontend/angular-architect/references/testing.md +405 -0
  138. package/skills/frontend/flutter-expert/SKILL.md +138 -0
  139. package/skills/frontend/flutter-expert/references/bloc-state.md +259 -0
  140. package/skills/frontend/flutter-expert/references/gorouter-navigation.md +119 -0
  141. package/skills/frontend/flutter-expert/references/performance.md +99 -0
  142. package/skills/frontend/flutter-expert/references/project-structure.md +118 -0
  143. package/skills/frontend/flutter-expert/references/riverpod-state.md +130 -0
  144. package/skills/frontend/flutter-expert/references/widget-patterns.md +123 -0
  145. package/skills/frontend/nextjs-developer/SKILL.md +143 -0
  146. package/skills/frontend/nextjs-developer/references/app-router.md +311 -0
  147. package/skills/frontend/nextjs-developer/references/data-fetching.md +482 -0
  148. package/skills/frontend/nextjs-developer/references/deployment.md +545 -0
  149. package/skills/frontend/nextjs-developer/references/server-actions.md +462 -0
  150. package/skills/frontend/nextjs-developer/references/server-components.md +384 -0
  151. package/skills/frontend/react-expert/SKILL.md +149 -0
  152. package/skills/frontend/react-expert/references/hooks-patterns.md +162 -0
  153. package/skills/frontend/react-expert/references/migration-class-to-modern.md +1119 -0
  154. package/skills/frontend/react-expert/references/performance.md +168 -0
  155. package/skills/frontend/react-expert/references/react-19-features.md +174 -0
  156. package/skills/frontend/react-expert/references/server-components.md +143 -0
  157. package/skills/frontend/react-expert/references/state-management.md +171 -0
  158. package/skills/frontend/react-expert/references/testing-react.md +174 -0
  159. package/skills/frontend/react-native-expert/SKILL.md +185 -0
  160. package/skills/frontend/react-native-expert/references/expo-router.md +187 -0
  161. package/skills/frontend/react-native-expert/references/list-optimization.md +204 -0
  162. package/skills/frontend/react-native-expert/references/platform-handling.md +188 -0
  163. package/skills/frontend/react-native-expert/references/project-structure.md +171 -0
  164. package/skills/frontend/react-native-expert/references/storage-hooks.md +173 -0
  165. package/skills/frontend/vue-expert/SKILL.md +98 -0
  166. package/skills/frontend/vue-expert/references/build-tooling.md +480 -0
  167. package/skills/frontend/vue-expert/references/components.md +448 -0
  168. package/skills/frontend/vue-expert/references/composition-api.md +299 -0
  169. package/skills/frontend/vue-expert/references/mobile-hybrid.md +636 -0
  170. package/skills/frontend/vue-expert/references/nuxt.md +669 -0
  171. package/skills/frontend/vue-expert/references/state-management.md +449 -0
  172. package/skills/frontend/vue-expert/references/typescript.md +584 -0
  173. package/skills/frontend/vue-expert-js/SKILL.md +167 -0
  174. package/skills/frontend/vue-expert-js/references/component-architecture.md +219 -0
  175. package/skills/frontend/vue-expert-js/references/composables-patterns.md +183 -0
  176. package/skills/frontend/vue-expert-js/references/jsdoc-typing.md +535 -0
  177. package/skills/frontend/vue-expert-js/references/state-management.md +249 -0
  178. package/skills/frontend/vue-expert-js/references/testing-patterns.md +237 -0
  179. package/skills/go-rust-cpp/cpp-pro/SKILL.md +115 -0
  180. package/skills/go-rust-cpp/cpp-pro/references/build-tooling.md +440 -0
  181. package/skills/go-rust-cpp/cpp-pro/references/concurrency.md +437 -0
  182. package/skills/go-rust-cpp/cpp-pro/references/memory-performance.md +397 -0
  183. package/skills/go-rust-cpp/cpp-pro/references/modern-cpp.md +304 -0
  184. package/skills/go-rust-cpp/cpp-pro/references/templates.md +357 -0
  185. package/skills/go-rust-cpp/golang-pro/SKILL.md +122 -0
  186. package/skills/go-rust-cpp/golang-pro/references/concurrency.md +329 -0
  187. package/skills/go-rust-cpp/golang-pro/references/generics.md +442 -0
  188. package/skills/go-rust-cpp/golang-pro/references/interfaces.md +432 -0
  189. package/skills/go-rust-cpp/golang-pro/references/project-structure.md +477 -0
  190. package/skills/go-rust-cpp/golang-pro/references/testing.md +451 -0
  191. package/skills/go-rust-cpp/rust-engineer/SKILL.md +167 -0
  192. package/skills/go-rust-cpp/rust-engineer/references/async.md +458 -0
  193. package/skills/go-rust-cpp/rust-engineer/references/error-handling.md +334 -0
  194. package/skills/go-rust-cpp/rust-engineer/references/ownership.md +278 -0
  195. package/skills/go-rust-cpp/rust-engineer/references/testing.md +470 -0
  196. package/skills/go-rust-cpp/rust-engineer/references/traits.md +413 -0
  197. package/skills/infra/cli-developer/SKILL.md +113 -0
  198. package/skills/infra/cli-developer/references/design-patterns.md +221 -0
  199. package/skills/infra/cli-developer/references/go-cli.md +540 -0
  200. package/skills/infra/cli-developer/references/node-cli.md +383 -0
  201. package/skills/infra/cli-developer/references/python-cli.md +422 -0
  202. package/skills/infra/cli-developer/references/ux-patterns.md +448 -0
  203. package/skills/infra/cloud-architect/SKILL.md +216 -0
  204. package/skills/infra/cloud-architect/references/aws.md +394 -0
  205. package/skills/infra/cloud-architect/references/azure.md +562 -0
  206. package/skills/infra/cloud-architect/references/cost.md +582 -0
  207. package/skills/infra/cloud-architect/references/gcp.md +633 -0
  208. package/skills/infra/cloud-architect/references/multi-cloud.md +483 -0
  209. package/skills/infra/devops-engineer/SKILL.md +144 -0
  210. package/skills/infra/devops-engineer/references/deployment-strategies.md +241 -0
  211. package/skills/infra/devops-engineer/references/docker-patterns.md +113 -0
  212. package/skills/infra/devops-engineer/references/github-actions.md +139 -0
  213. package/skills/infra/devops-engineer/references/incident-response.md +331 -0
  214. package/skills/infra/devops-engineer/references/kubernetes.md +154 -0
  215. package/skills/infra/devops-engineer/references/platform-engineering.md +417 -0
  216. package/skills/infra/devops-engineer/references/release-automation.md +527 -0
  217. package/skills/infra/devops-engineer/references/terraform-iac.md +141 -0
  218. package/skills/infra/kubernetes-specialist/SKILL.md +241 -0
  219. package/skills/infra/kubernetes-specialist/references/configuration.md +452 -0
  220. package/skills/infra/kubernetes-specialist/references/cost-optimization.md +458 -0
  221. package/skills/infra/kubernetes-specialist/references/custom-operators.md +563 -0
  222. package/skills/infra/kubernetes-specialist/references/gitops.md +530 -0
  223. package/skills/infra/kubernetes-specialist/references/helm-charts.md +912 -0
  224. package/skills/infra/kubernetes-specialist/references/multi-cluster.md +507 -0
  225. package/skills/infra/kubernetes-specialist/references/networking.md +447 -0
  226. package/skills/infra/kubernetes-specialist/references/service-mesh.md +459 -0
  227. package/skills/infra/kubernetes-specialist/references/storage.md +535 -0
  228. package/skills/infra/kubernetes-specialist/references/troubleshooting.md +414 -0
  229. package/skills/infra/kubernetes-specialist/references/workloads.md +377 -0
  230. package/skills/infra/mcp-developer/SKILL.md +143 -0
  231. package/skills/infra/mcp-developer/references/protocol.md +244 -0
  232. package/skills/infra/mcp-developer/references/python-sdk.md +367 -0
  233. package/skills/infra/mcp-developer/references/resources.md +554 -0
  234. package/skills/infra/mcp-developer/references/tools.md +480 -0
  235. package/skills/infra/mcp-developer/references/typescript-sdk.md +350 -0
  236. package/skills/infra/monitoring-expert/SKILL.md +176 -0
  237. package/skills/infra/monitoring-expert/references/alerting-rules.md +141 -0
  238. package/skills/infra/monitoring-expert/references/application-profiling.md +331 -0
  239. package/skills/infra/monitoring-expert/references/capacity-planning.md +344 -0
  240. package/skills/infra/monitoring-expert/references/dashboards.md +126 -0
  241. package/skills/infra/monitoring-expert/references/opentelemetry.md +123 -0
  242. package/skills/infra/monitoring-expert/references/performance-testing.md +269 -0
  243. package/skills/infra/monitoring-expert/references/prometheus-metrics.md +136 -0
  244. package/skills/infra/monitoring-expert/references/structured-logging.md +142 -0
  245. package/skills/infra/sre-engineer/SKILL.md +181 -0
  246. package/skills/infra/sre-engineer/references/automation-toil.md +492 -0
  247. package/skills/infra/sre-engineer/references/error-budget-policy.md +334 -0
  248. package/skills/infra/sre-engineer/references/incident-chaos.md +576 -0
  249. package/skills/infra/sre-engineer/references/monitoring-alerting.md +424 -0
  250. package/skills/infra/sre-engineer/references/slo-sli-management.md +238 -0
  251. package/skills/infra/terraform-engineer/SKILL.md +143 -0
  252. package/skills/infra/terraform-engineer/references/best-practices.md +583 -0
  253. package/skills/infra/terraform-engineer/references/module-patterns.md +297 -0
  254. package/skills/infra/terraform-engineer/references/providers.md +452 -0
  255. package/skills/infra/terraform-engineer/references/state-management.md +371 -0
  256. package/skills/infra/terraform-engineer/references/testing.md +486 -0
  257. package/skills/infra/websocket-engineer/SKILL.md +168 -0
  258. package/skills/infra/websocket-engineer/references/alternatives.md +391 -0
  259. package/skills/infra/websocket-engineer/references/patterns.md +400 -0
  260. package/skills/infra/websocket-engineer/references/protocol.md +195 -0
  261. package/skills/infra/websocket-engineer/references/scaling.md +333 -0
  262. package/skills/infra/websocket-engineer/references/security.md +474 -0
  263. package/skills/java/java-architect/SKILL.md +132 -0
  264. package/skills/java/java-architect/references/jpa-optimization.md +393 -0
  265. package/skills/java/java-architect/references/reactive-webflux.md +356 -0
  266. package/skills/java/java-architect/references/spring-boot-setup.md +269 -0
  267. package/skills/java/java-architect/references/spring-security.md +445 -0
  268. package/skills/java/java-architect/references/testing-patterns.md +500 -0
  269. package/skills/java/kotlin-specialist/SKILL.md +147 -0
  270. package/skills/java/kotlin-specialist/references/android-compose.md +419 -0
  271. package/skills/java/kotlin-specialist/references/coroutines-flow.md +276 -0
  272. package/skills/java/kotlin-specialist/references/dsl-idioms.md +421 -0
  273. package/skills/java/kotlin-specialist/references/ktor-server.md +426 -0
  274. package/skills/java/kotlin-specialist/references/multiplatform-kmp.md +380 -0
  275. package/skills/java/spring-boot-engineer/SKILL.md +195 -0
  276. package/skills/java/spring-boot-engineer/references/cloud.md +498 -0
  277. package/skills/java/spring-boot-engineer/references/data.md +381 -0
  278. package/skills/java/spring-boot-engineer/references/security.md +459 -0
  279. package/skills/java/spring-boot-engineer/references/testing.md +545 -0
  280. package/skills/java/spring-boot-engineer/references/web.md +295 -0
  281. package/skills/javascript/javascript-pro/SKILL.md +132 -0
  282. package/skills/javascript/javascript-pro/references/async-patterns.md +334 -0
  283. package/skills/javascript/javascript-pro/references/browser-apis.md +398 -0
  284. package/skills/javascript/javascript-pro/references/modern-syntax.md +272 -0
  285. package/skills/javascript/javascript-pro/references/modules.md +357 -0
  286. package/skills/javascript/javascript-pro/references/node-essentials.md +471 -0
  287. package/skills/javascript/nestjs-expert/SKILL.md +206 -0
  288. package/skills/javascript/nestjs-expert/references/authentication.md +166 -0
  289. package/skills/javascript/nestjs-expert/references/controllers-routing.md +111 -0
  290. package/skills/javascript/nestjs-expert/references/dtos-validation.md +153 -0
  291. package/skills/javascript/nestjs-expert/references/migration-from-express.md +1237 -0
  292. package/skills/javascript/nestjs-expert/references/services-di.md +140 -0
  293. package/skills/javascript/nestjs-expert/references/testing-patterns.md +186 -0
  294. package/skills/javascript/typescript-pro/SKILL.md +145 -0
  295. package/skills/javascript/typescript-pro/references/advanced-types.md +259 -0
  296. package/skills/javascript/typescript-pro/references/configuration.md +445 -0
  297. package/skills/javascript/typescript-pro/references/patterns.md +484 -0
  298. package/skills/javascript/typescript-pro/references/type-guards.md +352 -0
  299. package/skills/javascript/typescript-pro/references/utility-types.md +329 -0
  300. package/skills/php/laravel-specialist/SKILL.md +262 -0
  301. package/skills/php/laravel-specialist/references/eloquent.md +351 -0
  302. package/skills/php/laravel-specialist/references/livewire.md +512 -0
  303. package/skills/php/laravel-specialist/references/queues.md +423 -0
  304. package/skills/php/laravel-specialist/references/routing.md +362 -0
  305. package/skills/php/laravel-specialist/references/testing.md +522 -0
  306. package/skills/php/php-pro/SKILL.md +206 -0
  307. package/skills/php/php-pro/references/async-patterns.md +412 -0
  308. package/skills/php/php-pro/references/laravel-patterns.md +377 -0
  309. package/skills/php/php-pro/references/modern-php-features.md +323 -0
  310. package/skills/php/php-pro/references/symfony-patterns.md +466 -0
  311. package/skills/php/php-pro/references/testing-quality.md +466 -0
  312. package/skills/python/django-expert/SKILL.md +162 -0
  313. package/skills/python/django-expert/references/authentication.md +145 -0
  314. package/skills/python/django-expert/references/drf-serializers.md +148 -0
  315. package/skills/python/django-expert/references/models-orm.md +151 -0
  316. package/skills/python/django-expert/references/testing-django.md +204 -0
  317. package/skills/python/django-expert/references/viewsets-views.md +153 -0
  318. package/skills/python/fastapi-expert/SKILL.md +185 -0
  319. package/skills/python/fastapi-expert/references/async-sqlalchemy.md +146 -0
  320. package/skills/python/fastapi-expert/references/authentication.md +159 -0
  321. package/skills/python/fastapi-expert/references/endpoints-routing.md +142 -0
  322. package/skills/python/fastapi-expert/references/migration-from-django.md +997 -0
  323. package/skills/python/fastapi-expert/references/pydantic-v2.md +135 -0
  324. package/skills/python/fastapi-expert/references/testing-async.md +159 -0
  325. package/skills/python/pandas-pro/SKILL.md +178 -0
  326. package/skills/python/pandas-pro/references/aggregation-groupby.md +545 -0
  327. package/skills/python/pandas-pro/references/data-cleaning.md +500 -0
  328. package/skills/python/pandas-pro/references/dataframe-operations.md +420 -0
  329. package/skills/python/pandas-pro/references/merging-joining.md +596 -0
  330. package/skills/python/pandas-pro/references/performance-optimization.md +597 -0
  331. package/skills/python/python-pro/SKILL.md +177 -0
  332. package/skills/python/python-pro/references/async-patterns.md +356 -0
  333. package/skills/python/python-pro/references/packaging.md +460 -0
  334. package/skills/python/python-pro/references/standard-library.md +378 -0
  335. package/skills/python/python-pro/references/testing.md +404 -0
  336. package/skills/python/python-pro/references/type-system.md +290 -0
  337. package/skills/quality/chaos-engineer/SKILL.md +182 -0
  338. package/skills/quality/chaos-engineer/references/chaos-tools.md +511 -0
  339. package/skills/quality/chaos-engineer/references/experiment-design.md +229 -0
  340. package/skills/quality/chaos-engineer/references/game-days.md +434 -0
  341. package/skills/quality/chaos-engineer/references/infrastructure-chaos.md +348 -0
  342. package/skills/quality/chaos-engineer/references/kubernetes-chaos.md +432 -0
  343. package/skills/quality/code-reviewer/SKILL.md +119 -0
  344. package/skills/quality/code-reviewer/references/common-issues.md +142 -0
  345. package/skills/quality/code-reviewer/references/feedback-examples.md +144 -0
  346. package/skills/quality/code-reviewer/references/receiving-feedback.md +238 -0
  347. package/skills/quality/code-reviewer/references/report-template.md +109 -0
  348. package/skills/quality/code-reviewer/references/review-checklist.md +88 -0
  349. package/skills/quality/code-reviewer/references/spec-compliance-review.md +258 -0
  350. package/skills/quality/playwright-expert/SKILL.md +169 -0
  351. package/skills/quality/playwright-expert/references/api-mocking.md +140 -0
  352. package/skills/quality/playwright-expert/references/configuration.md +155 -0
  353. package/skills/quality/playwright-expert/references/debugging-flaky.md +150 -0
  354. package/skills/quality/playwright-expert/references/page-object-model.md +152 -0
  355. package/skills/quality/playwright-expert/references/selectors-locators.md +119 -0
  356. package/skills/quality/secure-code-guardian/SKILL.md +191 -0
  357. package/skills/quality/secure-code-guardian/references/authentication.md +136 -0
  358. package/skills/quality/secure-code-guardian/references/input-validation.md +146 -0
  359. package/skills/quality/secure-code-guardian/references/owasp-prevention.md +135 -0
  360. package/skills/quality/secure-code-guardian/references/security-headers.md +133 -0
  361. package/skills/quality/secure-code-guardian/references/xss-csrf.md +157 -0
  362. package/skills/quality/security-reviewer/SKILL.md +103 -0
  363. package/skills/quality/security-reviewer/references/infrastructure-security.md +268 -0
  364. package/skills/quality/security-reviewer/references/penetration-testing.md +268 -0
  365. package/skills/quality/security-reviewer/references/report-template.md +170 -0
  366. package/skills/quality/security-reviewer/references/sast-tools.md +117 -0
  367. package/skills/quality/security-reviewer/references/secret-scanning.md +125 -0
  368. package/skills/quality/security-reviewer/references/vulnerability-patterns.md +152 -0
  369. package/skills/quality/tdd-guide/assets/sample_coverage_report.lcov +0 -0
  370. package/skills/quality/test-master/SKILL.md +94 -0
  371. package/skills/quality/test-master/references/automation-frameworks.md +294 -0
  372. package/skills/quality/test-master/references/e2e-testing.md +128 -0
  373. package/skills/quality/test-master/references/integration-testing.md +120 -0
  374. package/skills/quality/test-master/references/performance-testing.md +118 -0
  375. package/skills/quality/test-master/references/qa-methodology.md +247 -0
  376. package/skills/quality/test-master/references/security-testing.md +127 -0
  377. package/skills/quality/test-master/references/tdd-iron-laws.md +174 -0
  378. package/skills/quality/test-master/references/test-reports.md +104 -0
  379. package/skills/quality/test-master/references/testing-anti-patterns.md +231 -0
  380. package/skills/quality/test-master/references/unit-testing.md +113 -0
  381. package/skills/ruby/rails-expert/SKILL.md +154 -0
  382. package/skills/ruby/rails-expert/references/active-record.md +244 -0
  383. package/skills/ruby/rails-expert/references/api-development.md +401 -0
  384. package/skills/ruby/rails-expert/references/background-jobs.md +272 -0
  385. package/skills/ruby/rails-expert/references/hotwire-turbo.md +228 -0
  386. package/skills/ruby/rails-expert/references/rspec-testing.md +367 -0
  387. package/skills/swift/swift-expert/SKILL.md +163 -0
  388. package/skills/swift/swift-expert/references/async-concurrency.md +360 -0
  389. package/skills/swift/swift-expert/references/memory-performance.md +377 -0
  390. package/skills/swift/swift-expert/references/protocol-oriented.md +354 -0
  391. package/skills/swift/swift-expert/references/swiftui-patterns.md +291 -0
  392. package/skills/swift/swift-expert/references/testing-patterns.md +399 -0
  393. package/skills/workflow/brainstorming/SKILL.md +164 -0
  394. package/skills/workflow/brainstorming/scripts/helper.js +88 -0
  395. package/skills/workflow/brainstorming/scripts/start-server.sh +148 -0
  396. package/skills/workflow/brainstorming/scripts/stop-server.sh +56 -0
  397. package/skills/workflow/brainstorming/spec-document-reviewer-prompt.md +49 -0
  398. package/skills/workflow/brainstorming/visual-companion.md +287 -0
  399. package/skills/workflow/documentation/SKILL.md +45 -0
  400. package/skills/workflow/entropy-management/SKILL.md +115 -0
  401. package/skills/workflow/executing-plans/SKILL.md +70 -0
  402. package/skills/workflow/finishing-a-development-branch/SKILL.md +200 -0
  403. package/skills/workflow/receiving-code-review/SKILL.md +213 -0
  404. package/skills/workflow/requesting-code-review/SKILL.md +105 -0
  405. package/skills/workflow/requesting-code-review/code-reviewer.md +146 -0
  406. package/skills/workflow/requirement-engineering/SKILL.md +111 -0
  407. package/skills/workflow/systematic-debugging/CREATION-LOG.md +119 -0
  408. package/skills/workflow/systematic-debugging/SKILL.md +296 -0
  409. package/skills/workflow/systematic-debugging/condition-based-waiting-example.ts +158 -0
  410. package/skills/workflow/systematic-debugging/condition-based-waiting.md +115 -0
  411. package/skills/workflow/systematic-debugging/defense-in-depth.md +122 -0
  412. package/skills/workflow/systematic-debugging/find-polluter.sh +63 -0
  413. package/skills/workflow/systematic-debugging/root-cause-tracing.md +169 -0
  414. package/skills/workflow/systematic-debugging/test-academic.md +14 -0
  415. package/skills/workflow/systematic-debugging/test-pressure-1.md +58 -0
  416. package/skills/workflow/systematic-debugging/test-pressure-2.md +68 -0
  417. package/skills/workflow/systematic-debugging/test-pressure-3.md +69 -0
  418. package/skills/workflow/using-git-worktrees/SKILL.md +218 -0
  419. package/skills/workflow/verification-before-completion/SKILL.md +139 -0
  420. package/skills/workflow/writing-plans/SKILL.md +151 -0
  421. package/skills/workflow/writing-plans/plan-document-reviewer-prompt.md +49 -0
  422. package/skills/workflow/writing-skills/SKILL.md +655 -0
  423. package/skills/workflow/writing-skills/anthropic-best-practices.md +1150 -0
  424. package/skills/workflow/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
  425. package/skills/workflow/writing-skills/graphviz-conventions.dot +0 -0
  426. package/skills/workflow/writing-skills/persuasion-principles.md +187 -0
  427. package/skills/workflow/writing-skills/render-graphs.js +168 -0
  428. package/skills/workflow/writing-skills/testing-skills-with-subagents.md +384 -0
  429. package/skills/angular-architect/SKILL.md +0 -152
  430. package/skills/angular-architect/references/components.md +0 -297
  431. package/skills/angular-architect/references/ngrx.md +0 -401
  432. package/skills/angular-architect/references/routing.md +0 -361
  433. package/skills/angular-architect/references/rxjs.md +0 -319
  434. package/skills/angular-architect/references/testing.md +0 -405
  435. package/skills/api-designer/SKILL.md +0 -217
  436. package/skills/api-designer/references/error-handling.md +0 -541
  437. package/skills/api-designer/references/openapi.md +0 -824
  438. package/skills/api-designer/references/pagination.md +0 -494
  439. package/skills/api-designer/references/rest-patterns.md +0 -335
  440. package/skills/api-designer/references/versioning.md +0 -391
  441. package/skills/architecture-designer/SKILL.md +0 -117
  442. package/skills/architecture-designer/references/adr-template.md +0 -116
  443. package/skills/architecture-designer/references/architecture-patterns.md +0 -111
  444. package/skills/architecture-designer/references/database-selection.md +0 -102
  445. package/skills/architecture-designer/references/nfr-checklist.md +0 -112
  446. package/skills/architecture-designer/references/system-design.md +0 -100
  447. package/skills/brainstorming/SKILL.md +0 -164
  448. package/skills/brainstorming/scripts/helper.js +0 -88
  449. package/skills/brainstorming/scripts/start-server.sh +0 -148
  450. package/skills/brainstorming/scripts/stop-server.sh +0 -56
  451. package/skills/brainstorming/spec-document-reviewer-prompt.md +0 -49
  452. package/skills/brainstorming/visual-companion.md +0 -287
  453. package/skills/chaos-engineer/SKILL.md +0 -182
  454. package/skills/chaos-engineer/references/chaos-tools.md +0 -511
  455. package/skills/chaos-engineer/references/experiment-design.md +0 -229
  456. package/skills/chaos-engineer/references/game-days.md +0 -434
  457. package/skills/chaos-engineer/references/infrastructure-chaos.md +0 -348
  458. package/skills/chaos-engineer/references/kubernetes-chaos.md +0 -432
  459. package/skills/cli-developer/SKILL.md +0 -113
  460. package/skills/cli-developer/references/design-patterns.md +0 -221
  461. package/skills/cli-developer/references/go-cli.md +0 -540
  462. package/skills/cli-developer/references/node-cli.md +0 -383
  463. package/skills/cli-developer/references/python-cli.md +0 -422
  464. package/skills/cli-developer/references/ux-patterns.md +0 -448
  465. package/skills/cloud-architect/SKILL.md +0 -216
  466. package/skills/cloud-architect/references/aws.md +0 -394
  467. package/skills/cloud-architect/references/azure.md +0 -562
  468. package/skills/cloud-architect/references/cost.md +0 -582
  469. package/skills/cloud-architect/references/gcp.md +0 -633
  470. package/skills/cloud-architect/references/multi-cloud.md +0 -483
  471. package/skills/code-documenter/SKILL.md +0 -147
  472. package/skills/code-documenter/references/api-docs-fastapi-django.md +0 -166
  473. package/skills/code-documenter/references/api-docs-nestjs-express.md +0 -220
  474. package/skills/code-documenter/references/coverage-reports.md +0 -125
  475. package/skills/code-documenter/references/documentation-systems.md +0 -333
  476. package/skills/code-documenter/references/interactive-api-docs.md +0 -531
  477. package/skills/code-documenter/references/python-docstrings.md +0 -121
  478. package/skills/code-documenter/references/typescript-jsdoc.md +0 -145
  479. package/skills/code-documenter/references/user-guides-tutorials.md +0 -530
  480. package/skills/code-reviewer/SKILL.md +0 -119
  481. package/skills/code-reviewer/references/common-issues.md +0 -142
  482. package/skills/code-reviewer/references/feedback-examples.md +0 -144
  483. package/skills/code-reviewer/references/receiving-feedback.md +0 -238
  484. package/skills/code-reviewer/references/report-template.md +0 -109
  485. package/skills/code-reviewer/references/review-checklist.md +0 -88
  486. package/skills/code-reviewer/references/spec-compliance-review.md +0 -258
  487. package/skills/cpp-pro/SKILL.md +0 -115
  488. package/skills/cpp-pro/references/build-tooling.md +0 -440
  489. package/skills/cpp-pro/references/concurrency.md +0 -437
  490. package/skills/cpp-pro/references/memory-performance.md +0 -397
  491. package/skills/cpp-pro/references/modern-cpp.md +0 -304
  492. package/skills/cpp-pro/references/templates.md +0 -357
  493. package/skills/csharp-developer/SKILL.md +0 -125
  494. package/skills/csharp-developer/references/aspnet-core.md +0 -394
  495. package/skills/csharp-developer/references/blazor.md +0 -553
  496. package/skills/csharp-developer/references/entity-framework.md +0 -409
  497. package/skills/csharp-developer/references/modern-csharp.md +0 -248
  498. package/skills/csharp-developer/references/performance.md +0 -498
  499. package/skills/database-optimizer/SKILL.md +0 -147
  500. package/skills/database-optimizer/references/index-strategies.md +0 -331
  501. package/skills/database-optimizer/references/monitoring-analysis.md +0 -501
  502. package/skills/database-optimizer/references/mysql-tuning.md +0 -452
  503. package/skills/database-optimizer/references/postgresql-tuning.md +0 -413
  504. package/skills/database-optimizer/references/query-optimization.md +0 -251
  505. package/skills/debugging-wizard/SKILL.md +0 -105
  506. package/skills/debugging-wizard/references/common-patterns.md +0 -132
  507. package/skills/debugging-wizard/references/debugging-tools.md +0 -140
  508. package/skills/debugging-wizard/references/quick-fixes.md +0 -177
  509. package/skills/debugging-wizard/references/strategies.md +0 -142
  510. package/skills/debugging-wizard/references/systematic-debugging.md +0 -367
  511. package/skills/devops-engineer/SKILL.md +0 -144
  512. package/skills/devops-engineer/references/deployment-strategies.md +0 -241
  513. package/skills/devops-engineer/references/docker-patterns.md +0 -113
  514. package/skills/devops-engineer/references/github-actions.md +0 -139
  515. package/skills/devops-engineer/references/incident-response.md +0 -331
  516. package/skills/devops-engineer/references/kubernetes.md +0 -154
  517. package/skills/devops-engineer/references/platform-engineering.md +0 -417
  518. package/skills/devops-engineer/references/release-automation.md +0 -527
  519. package/skills/devops-engineer/references/terraform-iac.md +0 -141
  520. package/skills/django-expert/SKILL.md +0 -162
  521. package/skills/django-expert/references/authentication.md +0 -145
  522. package/skills/django-expert/references/drf-serializers.md +0 -148
  523. package/skills/django-expert/references/models-orm.md +0 -151
  524. package/skills/django-expert/references/testing-django.md +0 -204
  525. package/skills/django-expert/references/viewsets-views.md +0 -153
  526. package/skills/documentation/SKILL.md +0 -45
  527. package/skills/dotnet-core-expert/SKILL.md +0 -138
  528. package/skills/dotnet-core-expert/references/authentication.md +0 -546
  529. package/skills/dotnet-core-expert/references/clean-architecture.md +0 -455
  530. package/skills/dotnet-core-expert/references/cloud-native.md +0 -548
  531. package/skills/dotnet-core-expert/references/entity-framework.md +0 -440
  532. package/skills/dotnet-core-expert/references/minimal-apis.md +0 -319
  533. package/skills/entropy-management/SKILL.md +0 -115
  534. package/skills/executing-plans/SKILL.md +0 -70
  535. package/skills/fastapi-expert/SKILL.md +0 -185
  536. package/skills/fastapi-expert/references/async-sqlalchemy.md +0 -146
  537. package/skills/fastapi-expert/references/authentication.md +0 -159
  538. package/skills/fastapi-expert/references/endpoints-routing.md +0 -142
  539. package/skills/fastapi-expert/references/migration-from-django.md +0 -997
  540. package/skills/fastapi-expert/references/pydantic-v2.md +0 -135
  541. package/skills/fastapi-expert/references/testing-async.md +0 -159
  542. package/skills/feature-forge/SKILL.md +0 -98
  543. package/skills/feature-forge/references/acceptance-criteria.md +0 -104
  544. package/skills/feature-forge/references/ears-syntax.md +0 -99
  545. package/skills/feature-forge/references/interview-questions.md +0 -150
  546. package/skills/feature-forge/references/pre-discovery-subagents.md +0 -54
  547. package/skills/feature-forge/references/specification-template.md +0 -103
  548. package/skills/fine-tuning-expert/SKILL.md +0 -162
  549. package/skills/fine-tuning-expert/references/dataset-preparation.md +0 -540
  550. package/skills/fine-tuning-expert/references/deployment-optimization.md +0 -673
  551. package/skills/fine-tuning-expert/references/evaluation-metrics.md +0 -597
  552. package/skills/fine-tuning-expert/references/hyperparameter-tuning.md +0 -565
  553. package/skills/fine-tuning-expert/references/lora-peft.md +0 -347
  554. package/skills/finishing-a-development-branch/SKILL.md +0 -200
  555. package/skills/flutter-expert/SKILL.md +0 -138
  556. package/skills/flutter-expert/references/bloc-state.md +0 -259
  557. package/skills/flutter-expert/references/gorouter-navigation.md +0 -119
  558. package/skills/flutter-expert/references/performance.md +0 -99
  559. package/skills/flutter-expert/references/project-structure.md +0 -118
  560. package/skills/flutter-expert/references/riverpod-state.md +0 -130
  561. package/skills/flutter-expert/references/widget-patterns.md +0 -123
  562. package/skills/fullstack-guardian/SKILL.md +0 -105
  563. package/skills/fullstack-guardian/references/api-design-standards.md +0 -307
  564. package/skills/fullstack-guardian/references/architecture-decisions.md +0 -350
  565. package/skills/fullstack-guardian/references/backend-patterns.md +0 -237
  566. package/skills/fullstack-guardian/references/common-patterns.md +0 -134
  567. package/skills/fullstack-guardian/references/deliverables-checklist.md +0 -354
  568. package/skills/fullstack-guardian/references/design-template.md +0 -91
  569. package/skills/fullstack-guardian/references/error-handling.md +0 -135
  570. package/skills/fullstack-guardian/references/frontend-patterns.md +0 -340
  571. package/skills/fullstack-guardian/references/integration-patterns.md +0 -333
  572. package/skills/fullstack-guardian/references/security-checklist.md +0 -106
  573. package/skills/golang-pro/SKILL.md +0 -122
  574. package/skills/golang-pro/references/concurrency.md +0 -329
  575. package/skills/golang-pro/references/generics.md +0 -442
  576. package/skills/golang-pro/references/interfaces.md +0 -432
  577. package/skills/golang-pro/references/project-structure.md +0 -477
  578. package/skills/golang-pro/references/testing.md +0 -451
  579. package/skills/graphql-architect/SKILL.md +0 -146
  580. package/skills/graphql-architect/references/federation.md +0 -418
  581. package/skills/graphql-architect/references/migration-from-rest.md +0 -1141
  582. package/skills/graphql-architect/references/resolvers.md +0 -425
  583. package/skills/graphql-architect/references/schema-design.md +0 -393
  584. package/skills/graphql-architect/references/security.md +0 -569
  585. package/skills/graphql-architect/references/subscriptions.md +0 -510
  586. package/skills/java-architect/SKILL.md +0 -132
  587. package/skills/java-architect/references/jpa-optimization.md +0 -393
  588. package/skills/java-architect/references/reactive-webflux.md +0 -356
  589. package/skills/java-architect/references/spring-boot-setup.md +0 -269
  590. package/skills/java-architect/references/spring-security.md +0 -445
  591. package/skills/java-architect/references/testing-patterns.md +0 -500
  592. package/skills/javascript-pro/SKILL.md +0 -132
  593. package/skills/javascript-pro/references/async-patterns.md +0 -334
  594. package/skills/javascript-pro/references/browser-apis.md +0 -398
  595. package/skills/javascript-pro/references/modern-syntax.md +0 -272
  596. package/skills/javascript-pro/references/modules.md +0 -357
  597. package/skills/javascript-pro/references/node-essentials.md +0 -471
  598. package/skills/kotlin-specialist/SKILL.md +0 -147
  599. package/skills/kotlin-specialist/references/android-compose.md +0 -419
  600. package/skills/kotlin-specialist/references/coroutines-flow.md +0 -276
  601. package/skills/kotlin-specialist/references/dsl-idioms.md +0 -421
  602. package/skills/kotlin-specialist/references/ktor-server.md +0 -426
  603. package/skills/kotlin-specialist/references/multiplatform-kmp.md +0 -380
  604. package/skills/kubernetes-specialist/SKILL.md +0 -241
  605. package/skills/kubernetes-specialist/references/configuration.md +0 -452
  606. package/skills/kubernetes-specialist/references/cost-optimization.md +0 -458
  607. package/skills/kubernetes-specialist/references/custom-operators.md +0 -563
  608. package/skills/kubernetes-specialist/references/gitops.md +0 -530
  609. package/skills/kubernetes-specialist/references/helm-charts.md +0 -912
  610. package/skills/kubernetes-specialist/references/multi-cluster.md +0 -507
  611. package/skills/kubernetes-specialist/references/networking.md +0 -447
  612. package/skills/kubernetes-specialist/references/service-mesh.md +0 -459
  613. package/skills/kubernetes-specialist/references/storage.md +0 -535
  614. package/skills/kubernetes-specialist/references/troubleshooting.md +0 -414
  615. package/skills/kubernetes-specialist/references/workloads.md +0 -377
  616. package/skills/laravel-specialist/SKILL.md +0 -262
  617. package/skills/laravel-specialist/references/eloquent.md +0 -351
  618. package/skills/laravel-specialist/references/livewire.md +0 -512
  619. package/skills/laravel-specialist/references/queues.md +0 -423
  620. package/skills/laravel-specialist/references/routing.md +0 -362
  621. package/skills/laravel-specialist/references/testing.md +0 -522
  622. package/skills/legacy-modernizer/SKILL.md +0 -137
  623. package/skills/legacy-modernizer/references/legacy-testing.md +0 -381
  624. package/skills/legacy-modernizer/references/migration-strategies.md +0 -423
  625. package/skills/legacy-modernizer/references/refactoring-patterns.md +0 -395
  626. package/skills/legacy-modernizer/references/strangler-fig-pattern.md +0 -281
  627. package/skills/legacy-modernizer/references/system-assessment.md +0 -487
  628. package/skills/mcp-developer/SKILL.md +0 -143
  629. package/skills/mcp-developer/references/protocol.md +0 -244
  630. package/skills/mcp-developer/references/python-sdk.md +0 -367
  631. package/skills/mcp-developer/references/resources.md +0 -554
  632. package/skills/mcp-developer/references/tools.md +0 -480
  633. package/skills/mcp-developer/references/typescript-sdk.md +0 -350
  634. package/skills/microservices-architect/SKILL.md +0 -164
  635. package/skills/microservices-architect/references/communication.md +0 -499
  636. package/skills/microservices-architect/references/data.md +0 -721
  637. package/skills/microservices-architect/references/decomposition.md +0 -344
  638. package/skills/microservices-architect/references/observability.md +0 -805
  639. package/skills/microservices-architect/references/patterns.md +0 -603
  640. package/skills/ml-pipeline/SKILL.md +0 -159
  641. package/skills/ml-pipeline/references/experiment-tracking.md +0 -833
  642. package/skills/ml-pipeline/references/feature-engineering.md +0 -631
  643. package/skills/ml-pipeline/references/model-validation.md +0 -978
  644. package/skills/ml-pipeline/references/pipeline-orchestration.md +0 -907
  645. package/skills/ml-pipeline/references/training-pipelines.md +0 -782
  646. package/skills/monitoring-expert/SKILL.md +0 -176
  647. package/skills/monitoring-expert/references/alerting-rules.md +0 -141
  648. package/skills/monitoring-expert/references/application-profiling.md +0 -331
  649. package/skills/monitoring-expert/references/capacity-planning.md +0 -344
  650. package/skills/monitoring-expert/references/dashboards.md +0 -126
  651. package/skills/monitoring-expert/references/opentelemetry.md +0 -123
  652. package/skills/monitoring-expert/references/performance-testing.md +0 -269
  653. package/skills/monitoring-expert/references/prometheus-metrics.md +0 -136
  654. package/skills/monitoring-expert/references/structured-logging.md +0 -142
  655. package/skills/nestjs-expert/SKILL.md +0 -206
  656. package/skills/nestjs-expert/references/authentication.md +0 -166
  657. package/skills/nestjs-expert/references/controllers-routing.md +0 -111
  658. package/skills/nestjs-expert/references/dtos-validation.md +0 -153
  659. package/skills/nestjs-expert/references/migration-from-express.md +0 -1237
  660. package/skills/nestjs-expert/references/services-di.md +0 -140
  661. package/skills/nestjs-expert/references/testing-patterns.md +0 -186
  662. package/skills/nextjs-developer/SKILL.md +0 -143
  663. package/skills/nextjs-developer/references/app-router.md +0 -311
  664. package/skills/nextjs-developer/references/data-fetching.md +0 -482
  665. package/skills/nextjs-developer/references/deployment.md +0 -545
  666. package/skills/nextjs-developer/references/server-actions.md +0 -462
  667. package/skills/nextjs-developer/references/server-components.md +0 -384
  668. package/skills/pandas-pro/SKILL.md +0 -178
  669. package/skills/pandas-pro/references/aggregation-groupby.md +0 -545
  670. package/skills/pandas-pro/references/data-cleaning.md +0 -500
  671. package/skills/pandas-pro/references/dataframe-operations.md +0 -420
  672. package/skills/pandas-pro/references/merging-joining.md +0 -596
  673. package/skills/pandas-pro/references/performance-optimization.md +0 -597
  674. package/skills/php-pro/SKILL.md +0 -206
  675. package/skills/php-pro/references/async-patterns.md +0 -412
  676. package/skills/php-pro/references/laravel-patterns.md +0 -377
  677. package/skills/php-pro/references/modern-php-features.md +0 -323
  678. package/skills/php-pro/references/symfony-patterns.md +0 -466
  679. package/skills/php-pro/references/testing-quality.md +0 -466
  680. package/skills/playwright-expert/SKILL.md +0 -169
  681. package/skills/playwright-expert/references/api-mocking.md +0 -140
  682. package/skills/playwright-expert/references/configuration.md +0 -155
  683. package/skills/playwright-expert/references/debugging-flaky.md +0 -150
  684. package/skills/playwright-expert/references/page-object-model.md +0 -152
  685. package/skills/playwright-expert/references/selectors-locators.md +0 -119
  686. package/skills/postgres-pro/SKILL.md +0 -152
  687. package/skills/postgres-pro/references/extensions.md +0 -404
  688. package/skills/postgres-pro/references/jsonb.md +0 -321
  689. package/skills/postgres-pro/references/maintenance.md +0 -481
  690. package/skills/postgres-pro/references/performance.md +0 -265
  691. package/skills/postgres-pro/references/replication.md +0 -446
  692. package/skills/python-pro/SKILL.md +0 -177
  693. package/skills/python-pro/references/async-patterns.md +0 -356
  694. package/skills/python-pro/references/packaging.md +0 -460
  695. package/skills/python-pro/references/standard-library.md +0 -378
  696. package/skills/python-pro/references/testing.md +0 -404
  697. package/skills/python-pro/references/type-system.md +0 -290
  698. package/skills/rag-architect/SKILL.md +0 -194
  699. package/skills/rag-architect/references/chunking-strategies.md +0 -878
  700. package/skills/rag-architect/references/embedding-models.md +0 -561
  701. package/skills/rag-architect/references/rag-evaluation.md +0 -833
  702. package/skills/rag-architect/references/retrieval-optimization.md +0 -795
  703. package/skills/rag-architect/references/vector-databases.md +0 -589
  704. package/skills/rails-expert/SKILL.md +0 -154
  705. package/skills/rails-expert/references/active-record.md +0 -244
  706. package/skills/rails-expert/references/api-development.md +0 -401
  707. package/skills/rails-expert/references/background-jobs.md +0 -272
  708. package/skills/rails-expert/references/hotwire-turbo.md +0 -228
  709. package/skills/rails-expert/references/rspec-testing.md +0 -367
  710. package/skills/react-expert/SKILL.md +0 -149
  711. package/skills/react-expert/references/hooks-patterns.md +0 -162
  712. package/skills/react-expert/references/migration-class-to-modern.md +0 -1119
  713. package/skills/react-expert/references/performance.md +0 -168
  714. package/skills/react-expert/references/react-19-features.md +0 -174
  715. package/skills/react-expert/references/server-components.md +0 -143
  716. package/skills/react-expert/references/state-management.md +0 -171
  717. package/skills/react-expert/references/testing-react.md +0 -174
  718. package/skills/react-native-expert/SKILL.md +0 -185
  719. package/skills/react-native-expert/references/expo-router.md +0 -187
  720. package/skills/react-native-expert/references/list-optimization.md +0 -204
  721. package/skills/react-native-expert/references/platform-handling.md +0 -188
  722. package/skills/react-native-expert/references/project-structure.md +0 -171
  723. package/skills/react-native-expert/references/storage-hooks.md +0 -173
  724. package/skills/receiving-code-review/SKILL.md +0 -213
  725. package/skills/requesting-code-review/SKILL.md +0 -105
  726. package/skills/requesting-code-review/code-reviewer.md +0 -146
  727. package/skills/requirement-engineering/SKILL.md +0 -111
  728. package/skills/rust-engineer/SKILL.md +0 -167
  729. package/skills/rust-engineer/references/async.md +0 -458
  730. package/skills/rust-engineer/references/error-handling.md +0 -334
  731. package/skills/rust-engineer/references/ownership.md +0 -278
  732. package/skills/rust-engineer/references/testing.md +0 -470
  733. package/skills/rust-engineer/references/traits.md +0 -413
  734. package/skills/secure-code-guardian/SKILL.md +0 -191
  735. package/skills/secure-code-guardian/references/authentication.md +0 -136
  736. package/skills/secure-code-guardian/references/input-validation.md +0 -146
  737. package/skills/secure-code-guardian/references/owasp-prevention.md +0 -135
  738. package/skills/secure-code-guardian/references/security-headers.md +0 -133
  739. package/skills/secure-code-guardian/references/xss-csrf.md +0 -157
  740. package/skills/security-reviewer/SKILL.md +0 -103
  741. package/skills/security-reviewer/references/infrastructure-security.md +0 -268
  742. package/skills/security-reviewer/references/penetration-testing.md +0 -268
  743. package/skills/security-reviewer/references/report-template.md +0 -170
  744. package/skills/security-reviewer/references/sast-tools.md +0 -117
  745. package/skills/security-reviewer/references/secret-scanning.md +0 -125
  746. package/skills/security-reviewer/references/vulnerability-patterns.md +0 -152
  747. package/skills/spark-engineer/SKILL.md +0 -148
  748. package/skills/spark-engineer/references/partitioning-caching.md +0 -543
  749. package/skills/spark-engineer/references/performance-tuning.md +0 -544
  750. package/skills/spark-engineer/references/rdd-operations.md +0 -599
  751. package/skills/spark-engineer/references/spark-sql-dataframes.md +0 -474
  752. package/skills/spark-engineer/references/streaming-patterns.md +0 -786
  753. package/skills/spring-boot-engineer/SKILL.md +0 -195
  754. package/skills/spring-boot-engineer/references/cloud.md +0 -498
  755. package/skills/spring-boot-engineer/references/data.md +0 -381
  756. package/skills/spring-boot-engineer/references/security.md +0 -459
  757. package/skills/spring-boot-engineer/references/testing.md +0 -545
  758. package/skills/spring-boot-engineer/references/web.md +0 -295
  759. package/skills/sql-pro/SKILL.md +0 -129
  760. package/skills/sql-pro/references/database-design.md +0 -402
  761. package/skills/sql-pro/references/dialect-differences.md +0 -419
  762. package/skills/sql-pro/references/optimization.md +0 -384
  763. package/skills/sql-pro/references/query-patterns.md +0 -285
  764. package/skills/sql-pro/references/window-functions.md +0 -328
  765. package/skills/sre-engineer/SKILL.md +0 -181
  766. package/skills/sre-engineer/references/automation-toil.md +0 -492
  767. package/skills/sre-engineer/references/error-budget-policy.md +0 -334
  768. package/skills/sre-engineer/references/incident-chaos.md +0 -576
  769. package/skills/sre-engineer/references/monitoring-alerting.md +0 -424
  770. package/skills/sre-engineer/references/slo-sli-management.md +0 -238
  771. package/skills/swift-expert/SKILL.md +0 -163
  772. package/skills/swift-expert/references/async-concurrency.md +0 -360
  773. package/skills/swift-expert/references/memory-performance.md +0 -377
  774. package/skills/swift-expert/references/protocol-oriented.md +0 -354
  775. package/skills/swift-expert/references/swiftui-patterns.md +0 -291
  776. package/skills/swift-expert/references/testing-patterns.md +0 -399
  777. package/skills/systematic-debugging/CREATION-LOG.md +0 -119
  778. package/skills/systematic-debugging/SKILL.md +0 -296
  779. package/skills/systematic-debugging/condition-based-waiting-example.ts +0 -158
  780. package/skills/systematic-debugging/condition-based-waiting.md +0 -115
  781. package/skills/systematic-debugging/defense-in-depth.md +0 -122
  782. package/skills/systematic-debugging/find-polluter.sh +0 -63
  783. package/skills/systematic-debugging/root-cause-tracing.md +0 -169
  784. package/skills/systematic-debugging/test-academic.md +0 -14
  785. package/skills/systematic-debugging/test-pressure-1.md +0 -58
  786. package/skills/systematic-debugging/test-pressure-2.md +0 -68
  787. package/skills/systematic-debugging/test-pressure-3.md +0 -69
  788. package/skills/tdd-guide/assets/sample_coverage_report.lcov +0 -56
  789. package/skills/terraform-engineer/SKILL.md +0 -143
  790. package/skills/terraform-engineer/references/best-practices.md +0 -583
  791. package/skills/terraform-engineer/references/module-patterns.md +0 -297
  792. package/skills/terraform-engineer/references/providers.md +0 -452
  793. package/skills/terraform-engineer/references/state-management.md +0 -371
  794. package/skills/terraform-engineer/references/testing.md +0 -486
  795. package/skills/test-master/SKILL.md +0 -94
  796. package/skills/test-master/references/automation-frameworks.md +0 -294
  797. package/skills/test-master/references/e2e-testing.md +0 -128
  798. package/skills/test-master/references/integration-testing.md +0 -120
  799. package/skills/test-master/references/performance-testing.md +0 -118
  800. package/skills/test-master/references/qa-methodology.md +0 -247
  801. package/skills/test-master/references/security-testing.md +0 -127
  802. package/skills/test-master/references/tdd-iron-laws.md +0 -174
  803. package/skills/test-master/references/test-reports.md +0 -104
  804. package/skills/test-master/references/testing-anti-patterns.md +0 -231
  805. package/skills/test-master/references/unit-testing.md +0 -113
  806. package/skills/typescript-pro/SKILL.md +0 -145
  807. package/skills/typescript-pro/references/advanced-types.md +0 -259
  808. package/skills/typescript-pro/references/configuration.md +0 -445
  809. package/skills/typescript-pro/references/patterns.md +0 -484
  810. package/skills/typescript-pro/references/type-guards.md +0 -352
  811. package/skills/typescript-pro/references/utility-types.md +0 -329
  812. package/skills/using-git-worktrees/SKILL.md +0 -218
  813. package/skills/verification-before-completion/SKILL.md +0 -139
  814. package/skills/vue-expert/SKILL.md +0 -98
  815. package/skills/vue-expert/references/build-tooling.md +0 -480
  816. package/skills/vue-expert/references/components.md +0 -448
  817. package/skills/vue-expert/references/composition-api.md +0 -299
  818. package/skills/vue-expert/references/mobile-hybrid.md +0 -636
  819. package/skills/vue-expert/references/nuxt.md +0 -669
  820. package/skills/vue-expert/references/state-management.md +0 -449
  821. package/skills/vue-expert/references/typescript.md +0 -584
  822. package/skills/vue-expert-js/SKILL.md +0 -167
  823. package/skills/vue-expert-js/references/component-architecture.md +0 -219
  824. package/skills/vue-expert-js/references/composables-patterns.md +0 -183
  825. package/skills/vue-expert-js/references/jsdoc-typing.md +0 -535
  826. package/skills/vue-expert-js/references/state-management.md +0 -249
  827. package/skills/vue-expert-js/references/testing-patterns.md +0 -237
  828. package/skills/websocket-engineer/SKILL.md +0 -168
  829. package/skills/websocket-engineer/references/alternatives.md +0 -391
  830. package/skills/websocket-engineer/references/patterns.md +0 -400
  831. package/skills/websocket-engineer/references/protocol.md +0 -195
  832. package/skills/websocket-engineer/references/scaling.md +0 -333
  833. package/skills/websocket-engineer/references/security.md +0 -474
  834. package/skills/writing-plans/SKILL.md +0 -151
  835. package/skills/writing-plans/plan-document-reviewer-prompt.md +0 -49
  836. package/skills/writing-skills/SKILL.md +0 -655
  837. package/skills/writing-skills/anthropic-best-practices.md +0 -1150
  838. package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +0 -189
  839. package/skills/writing-skills/graphviz-conventions.dot +0 -172
  840. package/skills/writing-skills/persuasion-principles.md +0 -187
  841. package/skills/writing-skills/render-graphs.js +0 -168
  842. package/skills/writing-skills/testing-skills-with-subagents.md +0 -384
  843. /package/skills/{design-commands → frontend/design-commands}/design.md +0 -0
  844. /package/skills/{design-commands → frontend/design-commands}/handoff.md +0 -0
  845. /package/skills/{design-commands → frontend/design-commands}/prototype.md +0 -0
  846. /package/skills/{design-commands → frontend/design-commands}/spec.md +0 -0
  847. /package/skills/{design-commands → frontend/design-commands}/style.md +0 -0
  848. /package/skills/{senior-frontend → frontend/senior-frontend}/SKILL.md +0 -0
  849. /package/skills/{senior-frontend → frontend/senior-frontend}/references/frontend_best_practices.md +0 -0
  850. /package/skills/{senior-frontend → frontend/senior-frontend}/references/nextjs_optimization_guide.md +0 -0
  851. /package/skills/{senior-frontend → frontend/senior-frontend}/references/react_patterns.md +0 -0
  852. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/bundle_analyzer.py +0 -0
  853. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/component_generator.py +0 -0
  854. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/frontend_scaffolder.py +0 -0
  855. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/SKILL.md +0 -0
  856. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/charts.csv +0 -0
  857. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/colors.csv +0 -0
  858. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/icons.csv +0 -0
  859. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/landing.csv +0 -0
  860. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/products.csv +0 -0
  861. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/react-performance.csv +0 -0
  862. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/astro.csv +0 -0
  863. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/flutter.csv +0 -0
  864. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/html-tailwind.csv +0 -0
  865. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/jetpack-compose.csv +0 -0
  866. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nextjs.csv +0 -0
  867. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxt-ui.csv +0 -0
  868. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxtjs.csv +0 -0
  869. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react-native.csv +0 -0
  870. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react.csv +0 -0
  871. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/shadcn.csv +0 -0
  872. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/svelte.csv +0 -0
  873. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/swiftui.csv +0 -0
  874. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/vue.csv +0 -0
  875. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/styles.csv +0 -0
  876. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/typography.csv +0 -0
  877. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ui-reasoning.csv +0 -0
  878. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ux-guidelines.csv +0 -0
  879. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/web-interface.csv +0 -0
  880. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/core.py +0 -0
  881. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/design_system.py +0 -0
  882. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/search.py +0 -0
  883. /package/skills/{competitive-analysis → product/competitive-analysis}/SKILL.md +0 -0
  884. /package/skills/{meeting-notes → product/meeting-notes}/SKILL.md +0 -0
  885. /package/skills/{prd-template → product/prd-template}/SKILL.md +0 -0
  886. /package/skills/{stakeholder-update → product/stakeholder-update}/SKILL.md +0 -0
  887. /package/skills/{user-research-synthesis → product/user-research-synthesis}/SKILL.md +0 -0
  888. /package/skills/{senior-qa → quality/senior-qa}/README.md +0 -0
  889. /package/skills/{senior-qa → quality/senior-qa}/SKILL.md +0 -0
  890. /package/skills/{senior-qa → quality/senior-qa}/references/qa_best_practices.md +0 -0
  891. /package/skills/{senior-qa → quality/senior-qa}/references/test_automation_patterns.md +0 -0
  892. /package/skills/{senior-qa → quality/senior-qa}/references/testing_strategies.md +0 -0
  893. /package/skills/{senior-qa → quality/senior-qa}/scripts/coverage_analyzer.py +0 -0
  894. /package/skills/{senior-qa → quality/senior-qa}/scripts/e2e_test_scaffolder.py +0 -0
  895. /package/skills/{senior-qa → quality/senior-qa}/scripts/test_suite_generator.py +0 -0
  896. /package/skills/{tdd-guide → quality/tdd-guide}/HOW_TO_USE.md +0 -0
  897. /package/skills/{tdd-guide → quality/tdd-guide}/README.md +0 -0
  898. /package/skills/{tdd-guide → quality/tdd-guide}/SKILL.md +0 -0
  899. /package/skills/{tdd-guide → quality/tdd-guide}/assets/expected_output.json +0 -0
  900. /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_python.json +0 -0
  901. /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_typescript.json +0 -0
  902. /package/skills/{tdd-guide → quality/tdd-guide}/references/ci-integration.md +0 -0
  903. /package/skills/{tdd-guide → quality/tdd-guide}/references/framework-guide.md +0 -0
  904. /package/skills/{tdd-guide → quality/tdd-guide}/references/tdd-best-practices.md +0 -0
  905. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/coverage_analyzer.py +0 -0
  906. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/fixture_generator.py +0 -0
  907. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/format_detector.py +0 -0
  908. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/framework_adapter.py +0 -0
  909. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/metrics_calculator.py +0 -0
  910. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/output_formatter.py +0 -0
  911. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/tdd_workflow.py +0 -0
  912. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/test_generator.py +0 -0
  913. /package/skills/{brainstorming → workflow/brainstorming}/scripts/frame-template.html +0 -0
  914. /package/skills/{brainstorming → workflow/brainstorming}/scripts/server.cjs +0 -0
@@ -0,0 +1,978 @@
1
+ # Model Validation
2
+
3
+ ---
4
+
5
+ ## Overview
6
+
7
+ Model validation ensures models meet quality standards before production deployment. It encompasses offline evaluation, online testing, and continuous monitoring to catch performance degradation, data drift, and model failures.
8
+
9
+ ## When to Use This Reference
10
+
11
+ - Implementing offline model evaluation strategies
12
+ - Setting up A/B testing frameworks
13
+ - Building shadow deployment pipelines
14
+ - Creating model comparison workflows
15
+ - Implementing continuous model monitoring
16
+
17
+ ## When NOT to Use
18
+
19
+ - Quick model prototyping
20
+ - One-off analysis without deployment
21
+ - Models with no production requirements
22
+
23
+ ---
24
+
25
+ ## Offline Evaluation
26
+
27
+ ### Comprehensive Evaluation Suite
28
+
29
+ ```python
30
+ from dataclasses import dataclass
31
+ from typing import Optional
32
+ import numpy as np
33
+ import pandas as pd
34
+ from sklearn.metrics import (
35
+ accuracy_score, precision_score, recall_score, f1_score,
36
+ roc_auc_score, average_precision_score, confusion_matrix,
37
+ mean_squared_error, mean_absolute_error, r2_score,
38
+ )
39
+
40
+ @dataclass
41
+ class ClassificationMetrics:
42
+ """Classification model metrics."""
43
+ accuracy: float
44
+ precision: float
45
+ recall: float
46
+ f1: float
47
+ roc_auc: Optional[float]
48
+ pr_auc: Optional[float]
49
+ confusion_matrix: np.ndarray
50
+
51
+ def to_dict(self) -> dict:
52
+ return {
53
+ "accuracy": self.accuracy,
54
+ "precision": self.precision,
55
+ "recall": self.recall,
56
+ "f1": self.f1,
57
+ "roc_auc": self.roc_auc,
58
+ "pr_auc": self.pr_auc,
59
+ }
60
+
61
+ @dataclass
62
+ class RegressionMetrics:
63
+ """Regression model metrics."""
64
+ mse: float
65
+ rmse: float
66
+ mae: float
67
+ r2: float
68
+ mape: Optional[float]
69
+
70
+ def to_dict(self) -> dict:
71
+ return {
72
+ "mse": self.mse,
73
+ "rmse": self.rmse,
74
+ "mae": self.mae,
75
+ "r2": self.r2,
76
+ "mape": self.mape,
77
+ }
78
+
79
+ class ModelEvaluator:
80
+ """Comprehensive model evaluation."""
81
+
82
+ def __init__(self, task_type: str = "classification"):
83
+ self.task_type = task_type
84
+
85
+ def evaluate_classification(
86
+ self,
87
+ y_true: np.ndarray,
88
+ y_pred: np.ndarray,
89
+ y_prob: Optional[np.ndarray] = None,
90
+ average: str = "weighted",
91
+ ) -> ClassificationMetrics:
92
+ """Evaluate classification model."""
93
+ roc_auc = None
94
+ pr_auc = None
95
+
96
+ if y_prob is not None:
97
+ if len(np.unique(y_true)) == 2:
98
+ # Binary classification
99
+ if y_prob.ndim == 2:
100
+ y_prob_pos = y_prob[:, 1]
101
+ else:
102
+ y_prob_pos = y_prob
103
+ roc_auc = roc_auc_score(y_true, y_prob_pos)
104
+ pr_auc = average_precision_score(y_true, y_prob_pos)
105
+ else:
106
+ # Multiclass
107
+ roc_auc = roc_auc_score(
108
+ y_true, y_prob, multi_class="ovr", average=average
109
+ )
110
+
111
+ return ClassificationMetrics(
112
+ accuracy=accuracy_score(y_true, y_pred),
113
+ precision=precision_score(y_true, y_pred, average=average, zero_division=0),
114
+ recall=recall_score(y_true, y_pred, average=average, zero_division=0),
115
+ f1=f1_score(y_true, y_pred, average=average, zero_division=0),
116
+ roc_auc=roc_auc,
117
+ pr_auc=pr_auc,
118
+ confusion_matrix=confusion_matrix(y_true, y_pred),
119
+ )
120
+
121
+ def evaluate_regression(
122
+ self,
123
+ y_true: np.ndarray,
124
+ y_pred: np.ndarray,
125
+ ) -> RegressionMetrics:
126
+ """Evaluate regression model."""
127
+ mse = mean_squared_error(y_true, y_pred)
128
+
129
+ # MAPE (handle zero values)
130
+ mask = y_true != 0
131
+ if mask.any():
132
+ mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
133
+ else:
134
+ mape = None
135
+
136
+ return RegressionMetrics(
137
+ mse=mse,
138
+ rmse=np.sqrt(mse),
139
+ mae=mean_absolute_error(y_true, y_pred),
140
+ r2=r2_score(y_true, y_pred),
141
+ mape=mape,
142
+ )
143
+
144
+ def evaluate_by_segment(
145
+ self,
146
+ y_true: np.ndarray,
147
+ y_pred: np.ndarray,
148
+ segments: np.ndarray,
149
+ y_prob: Optional[np.ndarray] = None,
150
+ ) -> dict:
151
+ """Evaluate model performance by segment."""
152
+ results = {}
153
+
154
+ for segment in np.unique(segments):
155
+ mask = segments == segment
156
+
157
+ if self.task_type == "classification":
158
+ segment_prob = y_prob[mask] if y_prob is not None else None
159
+ metrics = self.evaluate_classification(
160
+ y_true[mask], y_pred[mask], segment_prob
161
+ )
162
+ else:
163
+ metrics = self.evaluate_regression(y_true[mask], y_pred[mask])
164
+
165
+ results[segment] = metrics.to_dict()
166
+
167
+ return results
168
+ ```
169
+
170
+ ### Cross-Validation Framework
171
+
172
+ ```python
173
+ from sklearn.model_selection import (
174
+ KFold, StratifiedKFold, TimeSeriesSplit, cross_val_score
175
+ )
176
+ import numpy as np
177
+ from typing import Callable
178
+
179
+ class CrossValidator:
180
+ """Cross-validation framework for model evaluation."""
181
+
182
+ def __init__(
183
+ self,
184
+ n_splits: int = 5,
185
+ shuffle: bool = True,
186
+ random_state: int = 42,
187
+ ):
188
+ self.n_splits = n_splits
189
+ self.shuffle = shuffle
190
+ self.random_state = random_state
191
+
192
+ def validate_classification(
193
+ self,
194
+ model,
195
+ X: np.ndarray,
196
+ y: np.ndarray,
197
+ stratified: bool = True,
198
+ ) -> dict:
199
+ """Run stratified k-fold cross-validation for classification."""
200
+ if stratified:
201
+ cv = StratifiedKFold(
202
+ n_splits=self.n_splits,
203
+ shuffle=self.shuffle,
204
+ random_state=self.random_state,
205
+ )
206
+ else:
207
+ cv = KFold(
208
+ n_splits=self.n_splits,
209
+ shuffle=self.shuffle,
210
+ random_state=self.random_state,
211
+ )
212
+
213
+ evaluator = ModelEvaluator("classification")
214
+ fold_metrics = []
215
+
216
+ for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
217
+ X_train, X_val = X[train_idx], X[val_idx]
218
+ y_train, y_val = y[train_idx], y[val_idx]
219
+
220
+ # Clone and train model
221
+ from sklearn.base import clone
222
+ fold_model = clone(model)
223
+ fold_model.fit(X_train, y_train)
224
+
225
+ y_pred = fold_model.predict(X_val)
226
+ y_prob = None
227
+ if hasattr(fold_model, "predict_proba"):
228
+ y_prob = fold_model.predict_proba(X_val)
229
+
230
+ metrics = evaluator.evaluate_classification(y_val, y_pred, y_prob)
231
+ fold_metrics.append(metrics.to_dict())
232
+
233
+ return self._aggregate_cv_results(fold_metrics)
234
+
235
+ def validate_time_series(
236
+ self,
237
+ model,
238
+ X: np.ndarray,
239
+ y: np.ndarray,
240
+ gap: int = 0,
241
+ ) -> dict:
242
+ """Run time series cross-validation."""
243
+ cv = TimeSeriesSplit(n_splits=self.n_splits, gap=gap)
244
+ evaluator = ModelEvaluator("regression")
245
+ fold_metrics = []
246
+
247
+ for train_idx, val_idx in cv.split(X):
248
+ X_train, X_val = X[train_idx], X[val_idx]
249
+ y_train, y_val = y[train_idx], y[val_idx]
250
+
251
+ from sklearn.base import clone
252
+ fold_model = clone(model)
253
+ fold_model.fit(X_train, y_train)
254
+
255
+ y_pred = fold_model.predict(X_val)
256
+ metrics = evaluator.evaluate_regression(y_val, y_pred)
257
+ fold_metrics.append(metrics.to_dict())
258
+
259
+ return self._aggregate_cv_results(fold_metrics)
260
+
261
+ def _aggregate_cv_results(self, fold_metrics: list[dict]) -> dict:
262
+ """Aggregate metrics across folds."""
263
+ keys = fold_metrics[0].keys()
264
+ aggregated = {}
265
+
266
+ for key in keys:
267
+ values = [m[key] for m in fold_metrics if m[key] is not None]
268
+ if values:
269
+ aggregated[key] = {
270
+ "mean": np.mean(values),
271
+ "std": np.std(values),
272
+ "min": np.min(values),
273
+ "max": np.max(values),
274
+ "values": values,
275
+ }
276
+
277
+ return aggregated
278
+ ```
279
+
280
+ ---
281
+
282
+ ## Model Comparison
283
+
284
+ ### Statistical Comparison
285
+
286
+ ```python
287
+ from scipy import stats
288
+ import numpy as np
289
+ from dataclasses import dataclass
290
+
291
+ @dataclass
292
+ class ComparisonResult:
293
+ """Model comparison statistical result."""
294
+ model_a_mean: float
295
+ model_b_mean: float
296
+ difference: float
297
+ p_value: float
298
+ significant: bool
299
+ confidence_interval: tuple[float, float]
300
+ test_used: str
301
+
302
+ class ModelComparator:
303
+ """Statistical comparison of model performance."""
304
+
305
+ def __init__(self, significance_level: float = 0.05):
306
+ self.significance_level = significance_level
307
+
308
+ def paired_t_test(
309
+ self,
310
+ scores_a: np.ndarray,
311
+ scores_b: np.ndarray,
312
+ ) -> ComparisonResult:
313
+ """Paired t-test for CV score comparison."""
314
+ statistic, p_value = stats.ttest_rel(scores_a, scores_b)
315
+
316
+ differences = scores_a - scores_b
317
+ mean_diff = np.mean(differences)
318
+ std_diff = np.std(differences, ddof=1)
319
+ n = len(differences)
320
+
321
+ # 95% confidence interval
322
+ t_critical = stats.t.ppf(1 - self.significance_level / 2, n - 1)
323
+ margin = t_critical * std_diff / np.sqrt(n)
324
+ ci = (mean_diff - margin, mean_diff + margin)
325
+
326
+ return ComparisonResult(
327
+ model_a_mean=np.mean(scores_a),
328
+ model_b_mean=np.mean(scores_b),
329
+ difference=mean_diff,
330
+ p_value=p_value,
331
+ significant=p_value < self.significance_level,
332
+ confidence_interval=ci,
333
+ test_used="paired_t_test",
334
+ )
335
+
336
+ def wilcoxon_test(
337
+ self,
338
+ scores_a: np.ndarray,
339
+ scores_b: np.ndarray,
340
+ ) -> ComparisonResult:
341
+ """Wilcoxon signed-rank test (non-parametric)."""
342
+ statistic, p_value = stats.wilcoxon(scores_a, scores_b)
343
+
344
+ differences = scores_a - scores_b
345
+ mean_diff = np.mean(differences)
346
+
347
+ # Bootstrap confidence interval
348
+ ci = self._bootstrap_ci(differences)
349
+
350
+ return ComparisonResult(
351
+ model_a_mean=np.mean(scores_a),
352
+ model_b_mean=np.mean(scores_b),
353
+ difference=mean_diff,
354
+ p_value=p_value,
355
+ significant=p_value < self.significance_level,
356
+ confidence_interval=ci,
357
+ test_used="wilcoxon",
358
+ )
359
+
360
+ def mcnemar_test(
361
+ self,
362
+ y_true: np.ndarray,
363
+ pred_a: np.ndarray,
364
+ pred_b: np.ndarray,
365
+ ) -> ComparisonResult:
366
+ """McNemar's test for classifier comparison."""
367
+ # Build contingency table
368
+ correct_a = (pred_a == y_true)
369
+ correct_b = (pred_b == y_true)
370
+
371
+ # b: A correct, B wrong; c: A wrong, B correct
372
+ b = np.sum(correct_a & ~correct_b)
373
+ c = np.sum(~correct_a & correct_b)
374
+
375
+ if b + c < 25:
376
+ # Use exact binomial test for small samples
377
+ p_value = stats.binom_test(b, b + c, 0.5)
378
+ else:
379
+ # Use chi-square approximation
380
+ statistic = (abs(b - c) - 1) ** 2 / (b + c)
381
+ p_value = 1 - stats.chi2.cdf(statistic, 1)
382
+
383
+ acc_a = np.mean(correct_a)
384
+ acc_b = np.mean(correct_b)
385
+
386
+ return ComparisonResult(
387
+ model_a_mean=acc_a,
388
+ model_b_mean=acc_b,
389
+ difference=acc_a - acc_b,
390
+ p_value=p_value,
391
+ significant=p_value < self.significance_level,
392
+ confidence_interval=(None, None),
393
+ test_used="mcnemar",
394
+ )
395
+
396
+ def _bootstrap_ci(
397
+ self,
398
+ data: np.ndarray,
399
+ n_bootstrap: int = 10000,
400
+ alpha: float = 0.05,
401
+ ) -> tuple[float, float]:
402
+ """Calculate bootstrap confidence interval."""
403
+ bootstrapped_means = []
404
+
405
+ for _ in range(n_bootstrap):
406
+ sample = np.random.choice(data, size=len(data), replace=True)
407
+ bootstrapped_means.append(np.mean(sample))
408
+
409
+ lower = np.percentile(bootstrapped_means, alpha / 2 * 100)
410
+ upper = np.percentile(bootstrapped_means, (1 - alpha / 2) * 100)
411
+
412
+ return (lower, upper)
413
+ ```
414
+
415
+ ---
416
+
417
+ ## A/B Testing
418
+
419
+ ### Online Experiment Framework
420
+
421
+ ```python
422
+ from dataclasses import dataclass
423
+ from datetime import datetime
424
+ from typing import Optional
425
+ import numpy as np
426
+ import hashlib
427
+ import json
428
+
429
+ @dataclass
430
+ class Experiment:
431
+ """A/B test experiment configuration."""
432
+ experiment_id: str
433
+ name: str
434
+ control_model: str
435
+ treatment_model: str
436
+ traffic_split: float # Fraction to treatment
437
+ start_time: datetime
438
+ end_time: Optional[datetime]
439
+ metrics: list[str]
440
+ minimum_sample_size: int
441
+ status: str = "active"
442
+
443
+ class ABTestRouter:
444
+ """Route traffic between control and treatment."""
445
+
446
+ def __init__(self, experiment: Experiment):
447
+ self.experiment = experiment
448
+
449
+ def get_variant(self, user_id: str) -> str:
450
+ """Deterministically assign user to variant."""
451
+ # Hash user_id for consistent assignment
452
+ hash_input = f"{self.experiment.experiment_id}:{user_id}"
453
+ hash_value = int(hashlib.md5(hash_input.encode()).hexdigest(), 16)
454
+ normalized = hash_value / (2**128)
455
+
456
+ if normalized < self.experiment.traffic_split:
457
+ return "treatment"
458
+ return "control"
459
+
460
+ def get_model(self, user_id: str) -> str:
461
+ """Get model to use for user."""
462
+ variant = self.get_variant(user_id)
463
+
464
+ if variant == "treatment":
465
+ return self.experiment.treatment_model
466
+ return self.experiment.control_model
467
+
468
+ class ABTestAnalyzer:
469
+ """Analyze A/B test results."""
470
+
471
+ def __init__(self, significance_level: float = 0.05):
472
+ self.significance_level = significance_level
473
+
474
+ def analyze_conversion(
475
+ self,
476
+ control_conversions: int,
477
+ control_total: int,
478
+ treatment_conversions: int,
479
+ treatment_total: int,
480
+ ) -> dict:
481
+ """Analyze conversion rate experiment."""
482
+ control_rate = control_conversions / control_total
483
+ treatment_rate = treatment_conversions / treatment_total
484
+
485
+ # Two-proportion z-test
486
+ pooled_rate = (control_conversions + treatment_conversions) / (
487
+ control_total + treatment_total
488
+ )
489
+ se = np.sqrt(
490
+ pooled_rate * (1 - pooled_rate) * (1/control_total + 1/treatment_total)
491
+ )
492
+
493
+ z_stat = (treatment_rate - control_rate) / se
494
+ p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
495
+
496
+ # Relative lift
497
+ lift = (treatment_rate - control_rate) / control_rate if control_rate > 0 else 0
498
+
499
+ # Confidence interval for difference
500
+ se_diff = np.sqrt(
501
+ control_rate * (1 - control_rate) / control_total +
502
+ treatment_rate * (1 - treatment_rate) / treatment_total
503
+ )
504
+ z_critical = stats.norm.ppf(1 - self.significance_level / 2)
505
+ ci = (
506
+ (treatment_rate - control_rate) - z_critical * se_diff,
507
+ (treatment_rate - control_rate) + z_critical * se_diff,
508
+ )
509
+
510
+ return {
511
+ "control_rate": control_rate,
512
+ "treatment_rate": treatment_rate,
513
+ "absolute_difference": treatment_rate - control_rate,
514
+ "relative_lift": lift,
515
+ "p_value": p_value,
516
+ "significant": p_value < self.significance_level,
517
+ "confidence_interval": ci,
518
+ "control_sample_size": control_total,
519
+ "treatment_sample_size": treatment_total,
520
+ }
521
+
522
+ def analyze_continuous_metric(
523
+ self,
524
+ control_values: np.ndarray,
525
+ treatment_values: np.ndarray,
526
+ ) -> dict:
527
+ """Analyze continuous metric (e.g., revenue, time)."""
528
+ control_mean = np.mean(control_values)
529
+ treatment_mean = np.mean(treatment_values)
530
+
531
+ # Welch's t-test (unequal variances)
532
+ statistic, p_value = stats.ttest_ind(
533
+ treatment_values, control_values, equal_var=False
534
+ )
535
+
536
+ lift = (treatment_mean - control_mean) / control_mean if control_mean > 0 else 0
537
+
538
+ # Confidence interval
539
+ se_diff = np.sqrt(
540
+ np.var(control_values) / len(control_values) +
541
+ np.var(treatment_values) / len(treatment_values)
542
+ )
543
+ t_critical = stats.t.ppf(
544
+ 1 - self.significance_level / 2,
545
+ min(len(control_values), len(treatment_values)) - 1
546
+ )
547
+ ci = (
548
+ (treatment_mean - control_mean) - t_critical * se_diff,
549
+ (treatment_mean - control_mean) + t_critical * se_diff,
550
+ )
551
+
552
+ return {
553
+ "control_mean": control_mean,
554
+ "treatment_mean": treatment_mean,
555
+ "absolute_difference": treatment_mean - control_mean,
556
+ "relative_lift": lift,
557
+ "p_value": p_value,
558
+ "significant": p_value < self.significance_level,
559
+ "confidence_interval": ci,
560
+ "control_sample_size": len(control_values),
561
+ "treatment_sample_size": len(treatment_values),
562
+ }
563
+
564
+ def calculate_sample_size(
565
+ self,
566
+ baseline_rate: float,
567
+ minimum_detectable_effect: float,
568
+ power: float = 0.8,
569
+ ) -> int:
570
+ """Calculate required sample size per variant."""
571
+ alpha = self.significance_level
572
+ z_alpha = stats.norm.ppf(1 - alpha / 2)
573
+ z_beta = stats.norm.ppf(power)
574
+
575
+ p1 = baseline_rate
576
+ p2 = baseline_rate * (1 + minimum_detectable_effect)
577
+
578
+ p_bar = (p1 + p2) / 2
579
+
580
+ n = (
581
+ (z_alpha * np.sqrt(2 * p_bar * (1 - p_bar)) +
582
+ z_beta * np.sqrt(p1 * (1 - p1) + p2 * (1 - p2))) ** 2 /
583
+ (p2 - p1) ** 2
584
+ )
585
+
586
+ return int(np.ceil(n))
587
+ ```
588
+
589
+ ---
590
+
591
+ ## Shadow Deployment
592
+
593
+ ### Shadow Mode Pipeline
594
+
595
+ ```python
596
+ from dataclasses import dataclass
597
+ from datetime import datetime
598
+ from typing import Any, Optional
599
+ import logging
600
+ import json
601
+
602
+ logger = logging.getLogger(__name__)
603
+
604
+ @dataclass
605
+ class PredictionComparison:
606
+ """Comparison of production and shadow predictions."""
607
+ request_id: str
608
+ timestamp: datetime
609
+ production_prediction: Any
610
+ shadow_prediction: Any
611
+ production_latency_ms: float
612
+ shadow_latency_ms: float
613
+ agreement: bool
614
+ features: Optional[dict] = None
615
+
616
+ class ShadowDeployment:
617
+ """Shadow deployment for model validation."""
618
+
619
+ def __init__(
620
+ self,
621
+ production_model,
622
+ shadow_model,
623
+ log_path: str = "/var/log/shadow_predictions.jsonl",
624
+ ):
625
+ self.production_model = production_model
626
+ self.shadow_model = shadow_model
627
+ self.log_path = log_path
628
+ self.comparisons: list[PredictionComparison] = []
629
+
630
+ def predict(
631
+ self,
632
+ features: dict,
633
+ request_id: str = None,
634
+ ) -> Any:
635
+ """Get production prediction, run shadow in parallel."""
636
+ import time
637
+ import uuid
638
+ import concurrent.futures
639
+
640
+ request_id = request_id or str(uuid.uuid4())
641
+
642
+ # Production prediction (synchronous, used for response)
643
+ prod_start = time.time()
644
+ production_pred = self.production_model.predict(features)
645
+ prod_latency = (time.time() - prod_start) * 1000
646
+
647
+ # Shadow prediction (async, logged but not returned)
648
+ def run_shadow():
649
+ shadow_start = time.time()
650
+ shadow_pred = self.shadow_model.predict(features)
651
+ shadow_latency = (time.time() - shadow_start) * 1000
652
+ return shadow_pred, shadow_latency
653
+
654
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
655
+ future = executor.submit(run_shadow)
656
+
657
+ try:
658
+ shadow_pred, shadow_latency = future.result(timeout=5.0)
659
+
660
+ comparison = PredictionComparison(
661
+ request_id=request_id,
662
+ timestamp=datetime.utcnow(),
663
+ production_prediction=production_pred,
664
+ shadow_prediction=shadow_pred,
665
+ production_latency_ms=prod_latency,
666
+ shadow_latency_ms=shadow_latency,
667
+ agreement=self._check_agreement(production_pred, shadow_pred),
668
+ features=features,
669
+ )
670
+
671
+ self._log_comparison(comparison)
672
+
673
+ except concurrent.futures.TimeoutError:
674
+ logger.warning(f"Shadow prediction timed out for {request_id}")
675
+
676
+ return production_pred
677
+
678
+ def _check_agreement(self, prod_pred: Any, shadow_pred: Any) -> bool:
679
+ """Check if predictions agree."""
680
+ if isinstance(prod_pred, (list, np.ndarray)):
681
+ return np.allclose(prod_pred, shadow_pred, rtol=1e-3)
682
+ return prod_pred == shadow_pred
683
+
684
+ def _log_comparison(self, comparison: PredictionComparison) -> None:
685
+ """Log comparison to file."""
686
+ log_entry = {
687
+ "request_id": comparison.request_id,
688
+ "timestamp": comparison.timestamp.isoformat(),
689
+ "production_prediction": str(comparison.production_prediction),
690
+ "shadow_prediction": str(comparison.shadow_prediction),
691
+ "production_latency_ms": comparison.production_latency_ms,
692
+ "shadow_latency_ms": comparison.shadow_latency_ms,
693
+ "agreement": comparison.agreement,
694
+ }
695
+
696
+ with open(self.log_path, "a") as f:
697
+ f.write(json.dumps(log_entry) + "\n")
698
+
699
+ self.comparisons.append(comparison)
700
+
701
+ def analyze_shadow_performance(self) -> dict:
702
+ """Analyze shadow model performance."""
703
+ if not self.comparisons:
704
+ return {}
705
+
706
+ agreements = [c.agreement for c in self.comparisons]
707
+ prod_latencies = [c.production_latency_ms for c in self.comparisons]
708
+ shadow_latencies = [c.shadow_latency_ms for c in self.comparisons]
709
+
710
+ return {
711
+ "total_comparisons": len(self.comparisons),
712
+ "agreement_rate": np.mean(agreements),
713
+ "production_latency_p50": np.percentile(prod_latencies, 50),
714
+ "production_latency_p99": np.percentile(prod_latencies, 99),
715
+ "shadow_latency_p50": np.percentile(shadow_latencies, 50),
716
+ "shadow_latency_p99": np.percentile(shadow_latencies, 99),
717
+ "latency_difference_mean": np.mean(
718
+ [s - p for s, p in zip(shadow_latencies, prod_latencies)]
719
+ ),
720
+ }
721
+ ```
722
+
723
+ ---
724
+
725
+ ## Validation Pipeline Integration
726
+
727
+ ### Complete Validation Workflow
728
+
729
+ ```python
730
+ from enum import Enum
731
+ from dataclasses import dataclass
732
+ from typing import Optional
733
+
734
+ class ValidationStatus(Enum):
735
+ PASSED = "passed"
736
+ FAILED = "failed"
737
+ WARNING = "warning"
738
+
739
+ @dataclass
740
+ class ValidationResult:
741
+ """Result of a validation check."""
742
+ check_name: str
743
+ status: ValidationStatus
744
+ message: str
745
+ details: Optional[dict] = None
746
+
747
+ class ModelValidator:
748
+ """Complete model validation workflow."""
749
+
750
+ def __init__(
751
+ self,
752
+ accuracy_threshold: float = 0.8,
753
+ latency_threshold_ms: float = 100,
754
+ drift_threshold: float = 0.2,
755
+ ):
756
+ self.accuracy_threshold = accuracy_threshold
757
+ self.latency_threshold_ms = latency_threshold_ms
758
+ self.drift_threshold = drift_threshold
759
+ self.results: list[ValidationResult] = []
760
+
761
+ def validate_performance(
762
+ self,
763
+ y_true: np.ndarray,
764
+ y_pred: np.ndarray,
765
+ ) -> ValidationResult:
766
+ """Validate model performance metrics."""
767
+ evaluator = ModelEvaluator("classification")
768
+ metrics = evaluator.evaluate_classification(y_true, y_pred)
769
+
770
+ if metrics.accuracy >= self.accuracy_threshold:
771
+ status = ValidationStatus.PASSED
772
+ message = f"Accuracy {metrics.accuracy:.4f} meets threshold"
773
+ else:
774
+ status = ValidationStatus.FAILED
775
+ message = f"Accuracy {metrics.accuracy:.4f} below threshold {self.accuracy_threshold}"
776
+
777
+ result = ValidationResult(
778
+ check_name="performance",
779
+ status=status,
780
+ message=message,
781
+ details=metrics.to_dict(),
782
+ )
783
+ self.results.append(result)
784
+ return result
785
+
786
+ def validate_latency(
787
+ self,
788
+ model,
789
+ sample_input: np.ndarray,
790
+ n_iterations: int = 100,
791
+ ) -> ValidationResult:
792
+ """Validate inference latency."""
793
+ import time
794
+
795
+ latencies = []
796
+ for _ in range(n_iterations):
797
+ start = time.time()
798
+ model.predict(sample_input)
799
+ latencies.append((time.time() - start) * 1000)
800
+
801
+ p50 = np.percentile(latencies, 50)
802
+ p99 = np.percentile(latencies, 99)
803
+
804
+ if p99 <= self.latency_threshold_ms:
805
+ status = ValidationStatus.PASSED
806
+ message = f"P99 latency {p99:.2f}ms meets threshold"
807
+ elif p50 <= self.latency_threshold_ms:
808
+ status = ValidationStatus.WARNING
809
+ message = f"P50 OK but P99 {p99:.2f}ms exceeds threshold"
810
+ else:
811
+ status = ValidationStatus.FAILED
812
+ message = f"P99 latency {p99:.2f}ms exceeds threshold"
813
+
814
+ result = ValidationResult(
815
+ check_name="latency",
816
+ status=status,
817
+ message=message,
818
+ details={"p50_ms": p50, "p99_ms": p99, "mean_ms": np.mean(latencies)},
819
+ )
820
+ self.results.append(result)
821
+ return result
822
+
823
+ def validate_data_compatibility(
824
+ self,
825
+ model,
826
+ expected_features: list[str],
827
+ sample_data: pd.DataFrame,
828
+ ) -> ValidationResult:
829
+ """Validate model accepts expected input format."""
830
+ missing_features = set(expected_features) - set(sample_data.columns)
831
+ extra_features = set(sample_data.columns) - set(expected_features)
832
+
833
+ if missing_features:
834
+ status = ValidationStatus.FAILED
835
+ message = f"Missing features: {missing_features}"
836
+ elif extra_features:
837
+ status = ValidationStatus.WARNING
838
+ message = f"Extra features will be ignored: {extra_features}"
839
+ else:
840
+ status = ValidationStatus.PASSED
841
+ message = "All expected features present"
842
+
843
+ # Try inference
844
+ try:
845
+ model.predict(sample_data[expected_features].head(1))
846
+ except Exception as e:
847
+ status = ValidationStatus.FAILED
848
+ message = f"Inference failed: {str(e)}"
849
+
850
+ result = ValidationResult(
851
+ check_name="data_compatibility",
852
+ status=status,
853
+ message=message,
854
+ details={
855
+ "missing_features": list(missing_features),
856
+ "extra_features": list(extra_features),
857
+ },
858
+ )
859
+ self.results.append(result)
860
+ return result
861
+
862
+ def validate_vs_baseline(
863
+ self,
864
+ y_true: np.ndarray,
865
+ new_pred: np.ndarray,
866
+ baseline_pred: np.ndarray,
867
+ ) -> ValidationResult:
868
+ """Validate new model vs baseline."""
869
+ comparator = ModelComparator()
870
+ comparison = comparator.mcnemar_test(y_true, new_pred, baseline_pred)
871
+
872
+ new_acc = accuracy_score(y_true, new_pred)
873
+ baseline_acc = accuracy_score(y_true, baseline_pred)
874
+
875
+ if new_acc >= baseline_acc:
876
+ if comparison.significant:
877
+ status = ValidationStatus.PASSED
878
+ message = f"Significant improvement: {new_acc:.4f} vs {baseline_acc:.4f}"
879
+ else:
880
+ status = ValidationStatus.WARNING
881
+ message = f"Improvement not significant: {new_acc:.4f} vs {baseline_acc:.4f}"
882
+ else:
883
+ if comparison.significant:
884
+ status = ValidationStatus.FAILED
885
+ message = f"Significant regression: {new_acc:.4f} vs {baseline_acc:.4f}"
886
+ else:
887
+ status = ValidationStatus.WARNING
888
+ message = f"Minor regression: {new_acc:.4f} vs {baseline_acc:.4f}"
889
+
890
+ result = ValidationResult(
891
+ check_name="baseline_comparison",
892
+ status=status,
893
+ message=message,
894
+ details={
895
+ "new_accuracy": new_acc,
896
+ "baseline_accuracy": baseline_acc,
897
+ "p_value": comparison.p_value,
898
+ },
899
+ )
900
+ self.results.append(result)
901
+ return result
902
+
903
+ def get_summary(self) -> dict:
904
+ """Get validation summary."""
905
+ passed = sum(1 for r in self.results if r.status == ValidationStatus.PASSED)
906
+ warnings = sum(1 for r in self.results if r.status == ValidationStatus.WARNING)
907
+ failed = sum(1 for r in self.results if r.status == ValidationStatus.FAILED)
908
+
909
+ overall_status = (
910
+ ValidationStatus.FAILED if failed > 0
911
+ else ValidationStatus.WARNING if warnings > 0
912
+ else ValidationStatus.PASSED
913
+ )
914
+
915
+ return {
916
+ "overall_status": overall_status.value,
917
+ "passed": passed,
918
+ "warnings": warnings,
919
+ "failed": failed,
920
+ "results": [
921
+ {
922
+ "check": r.check_name,
923
+ "status": r.status.value,
924
+ "message": r.message,
925
+ }
926
+ for r in self.results
927
+ ],
928
+ }
929
+ ```
930
+
931
+ ---
932
+
933
+ ## Best Practices
934
+
935
+ ### Validation Checklist
936
+
937
+ ```python
938
+ VALIDATION_CHECKLIST = {
939
+ "offline": [
940
+ "Accuracy/performance metrics meet threshold",
941
+ "Cross-validation shows consistent performance",
942
+ "Model outperforms or matches baseline",
943
+ "Metrics stable across data segments",
944
+ ],
945
+ "pre_deployment": [
946
+ "Inference latency within SLA",
947
+ "Memory usage acceptable",
948
+ "Input/output schema validated",
949
+ "Model serialization/loading works",
950
+ ],
951
+ "shadow": [
952
+ "Shadow predictions logged successfully",
953
+ "Agreement rate with production acceptable",
954
+ "No latency regression",
955
+ "Error rate within bounds",
956
+ ],
957
+ "ab_test": [
958
+ "Sufficient sample size reached",
959
+ "Statistical significance achieved",
960
+ "No negative impact on guardrail metrics",
961
+ "Business metrics improved",
962
+ ],
963
+ }
964
+ ```
965
+
966
+ ---
967
+
968
+ ## Related References
969
+
970
+ - `training-pipelines.md` - Model training before validation
971
+ - `experiment-tracking.md` - Logging validation results
972
+ - `pipeline-orchestration.md` - Automated validation workflows
973
+ - `feature-engineering.md` - Feature validation
974
+
975
+ ## Cross-Reference Skills
976
+
977
+ - **Data Engineer** - Data quality validation
978
+ - **DevOps Engineer** - Deployment pipeline integration