aigroup-workflow 2.1.2 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (916) hide show
  1. package/.codex/AGENTS.md +1 -1
  2. package/CLAUDE.md +1 -4
  3. package/README.md +333 -333
  4. package/cli/commands/init.mjs +20 -6
  5. package/cli/utils/scaffold.mjs +39 -9
  6. package/docs/red-flags.md +1 -1
  7. package/docs/rules/entropy.md +1 -1
  8. package/docs/rules/performance.md +1 -1
  9. package/docs/workflow-pipeline.md +8 -6
  10. package/manifests/install-modules.json +223 -133
  11. package/package.json +39 -39
  12. package/scripts/hooks/checks/orchestration-artifacts.cjs +28 -23
  13. package/scripts/hooks/checks/workflow-state.cjs +4 -5
  14. package/scripts/orchestration/lib/orchestrator.cjs +353 -92
  15. package/scripts/orchestration/lib/validate.cjs +145 -0
  16. package/scripts/orchestration/session.cjs +100 -33
  17. package/skills/ai-ml/fine-tuning-expert/SKILL.md +162 -0
  18. package/skills/ai-ml/fine-tuning-expert/references/dataset-preparation.md +540 -0
  19. package/skills/ai-ml/fine-tuning-expert/references/deployment-optimization.md +673 -0
  20. package/skills/ai-ml/fine-tuning-expert/references/evaluation-metrics.md +597 -0
  21. package/skills/ai-ml/fine-tuning-expert/references/hyperparameter-tuning.md +565 -0
  22. package/skills/ai-ml/fine-tuning-expert/references/lora-peft.md +347 -0
  23. package/skills/ai-ml/ml-pipeline/SKILL.md +159 -0
  24. package/skills/ai-ml/ml-pipeline/references/experiment-tracking.md +833 -0
  25. package/skills/ai-ml/ml-pipeline/references/feature-engineering.md +631 -0
  26. package/skills/ai-ml/ml-pipeline/references/model-validation.md +978 -0
  27. package/skills/ai-ml/ml-pipeline/references/pipeline-orchestration.md +907 -0
  28. package/skills/ai-ml/ml-pipeline/references/training-pipelines.md +782 -0
  29. package/skills/ai-ml/rag-architect/SKILL.md +194 -0
  30. package/skills/ai-ml/rag-architect/references/chunking-strategies.md +878 -0
  31. package/skills/ai-ml/rag-architect/references/embedding-models.md +561 -0
  32. package/skills/ai-ml/rag-architect/references/rag-evaluation.md +833 -0
  33. package/skills/ai-ml/rag-architect/references/retrieval-optimization.md +795 -0
  34. package/skills/ai-ml/rag-architect/references/vector-databases.md +589 -0
  35. package/skills/ai-ml/spark-engineer/SKILL.md +148 -0
  36. package/skills/ai-ml/spark-engineer/references/partitioning-caching.md +543 -0
  37. package/skills/ai-ml/spark-engineer/references/performance-tuning.md +544 -0
  38. package/skills/ai-ml/spark-engineer/references/rdd-operations.md +599 -0
  39. package/skills/ai-ml/spark-engineer/references/spark-sql-dataframes.md +474 -0
  40. package/skills/ai-ml/spark-engineer/references/streaming-patterns.md +786 -0
  41. package/skills/backend/api-designer/SKILL.md +217 -0
  42. package/skills/backend/api-designer/references/error-handling.md +541 -0
  43. package/skills/backend/api-designer/references/openapi.md +824 -0
  44. package/skills/backend/api-designer/references/pagination.md +494 -0
  45. package/skills/backend/api-designer/references/rest-patterns.md +335 -0
  46. package/skills/backend/api-designer/references/versioning.md +391 -0
  47. package/skills/backend/architecture-designer/SKILL.md +117 -0
  48. package/skills/backend/architecture-designer/references/adr-template.md +116 -0
  49. package/skills/backend/architecture-designer/references/architecture-patterns.md +111 -0
  50. package/skills/backend/architecture-designer/references/database-selection.md +102 -0
  51. package/skills/backend/architecture-designer/references/nfr-checklist.md +112 -0
  52. package/skills/backend/architecture-designer/references/system-design.md +100 -0
  53. package/skills/backend/code-documenter/SKILL.md +147 -0
  54. package/skills/backend/code-documenter/references/api-docs-fastapi-django.md +166 -0
  55. package/skills/backend/code-documenter/references/api-docs-nestjs-express.md +220 -0
  56. package/skills/backend/code-documenter/references/coverage-reports.md +125 -0
  57. package/skills/backend/code-documenter/references/documentation-systems.md +333 -0
  58. package/skills/backend/code-documenter/references/interactive-api-docs.md +531 -0
  59. package/skills/backend/code-documenter/references/python-docstrings.md +121 -0
  60. package/skills/backend/code-documenter/references/typescript-jsdoc.md +145 -0
  61. package/skills/backend/code-documenter/references/user-guides-tutorials.md +530 -0
  62. package/skills/backend/debugging-wizard/SKILL.md +105 -0
  63. package/skills/backend/debugging-wizard/references/common-patterns.md +132 -0
  64. package/skills/backend/debugging-wizard/references/debugging-tools.md +140 -0
  65. package/skills/backend/debugging-wizard/references/quick-fixes.md +177 -0
  66. package/skills/backend/debugging-wizard/references/strategies.md +142 -0
  67. package/skills/backend/debugging-wizard/references/systematic-debugging.md +367 -0
  68. package/skills/backend/feature-forge/SKILL.md +98 -0
  69. package/skills/backend/feature-forge/references/acceptance-criteria.md +104 -0
  70. package/skills/backend/feature-forge/references/ears-syntax.md +99 -0
  71. package/skills/backend/feature-forge/references/interview-questions.md +150 -0
  72. package/skills/backend/feature-forge/references/pre-discovery-subagents.md +54 -0
  73. package/skills/backend/feature-forge/references/specification-template.md +103 -0
  74. package/skills/backend/fullstack-guardian/SKILL.md +105 -0
  75. package/skills/backend/fullstack-guardian/references/api-design-standards.md +307 -0
  76. package/skills/backend/fullstack-guardian/references/architecture-decisions.md +350 -0
  77. package/skills/backend/fullstack-guardian/references/backend-patterns.md +237 -0
  78. package/skills/backend/fullstack-guardian/references/common-patterns.md +134 -0
  79. package/skills/backend/fullstack-guardian/references/deliverables-checklist.md +354 -0
  80. package/skills/backend/fullstack-guardian/references/design-template.md +91 -0
  81. package/skills/backend/fullstack-guardian/references/error-handling.md +135 -0
  82. package/skills/backend/fullstack-guardian/references/frontend-patterns.md +340 -0
  83. package/skills/backend/fullstack-guardian/references/integration-patterns.md +333 -0
  84. package/skills/backend/fullstack-guardian/references/security-checklist.md +106 -0
  85. package/skills/backend/graphql-architect/SKILL.md +146 -0
  86. package/skills/backend/graphql-architect/references/federation.md +418 -0
  87. package/skills/backend/graphql-architect/references/migration-from-rest.md +1141 -0
  88. package/skills/backend/graphql-architect/references/resolvers.md +425 -0
  89. package/skills/backend/graphql-architect/references/schema-design.md +393 -0
  90. package/skills/backend/graphql-architect/references/security.md +569 -0
  91. package/skills/backend/graphql-architect/references/subscriptions.md +510 -0
  92. package/skills/backend/legacy-modernizer/SKILL.md +137 -0
  93. package/skills/backend/legacy-modernizer/references/legacy-testing.md +381 -0
  94. package/skills/backend/legacy-modernizer/references/migration-strategies.md +423 -0
  95. package/skills/backend/legacy-modernizer/references/refactoring-patterns.md +395 -0
  96. package/skills/backend/legacy-modernizer/references/strangler-fig-pattern.md +281 -0
  97. package/skills/backend/legacy-modernizer/references/system-assessment.md +487 -0
  98. package/skills/backend/microservices-architect/SKILL.md +164 -0
  99. package/skills/backend/microservices-architect/references/communication.md +499 -0
  100. package/skills/backend/microservices-architect/references/data.md +721 -0
  101. package/skills/backend/microservices-architect/references/decomposition.md +344 -0
  102. package/skills/backend/microservices-architect/references/observability.md +805 -0
  103. package/skills/backend/microservices-architect/references/patterns.md +603 -0
  104. package/skills/database/database-optimizer/SKILL.md +147 -0
  105. package/skills/database/database-optimizer/references/index-strategies.md +331 -0
  106. package/skills/database/database-optimizer/references/monitoring-analysis.md +501 -0
  107. package/skills/database/database-optimizer/references/mysql-tuning.md +452 -0
  108. package/skills/database/database-optimizer/references/postgresql-tuning.md +413 -0
  109. package/skills/database/database-optimizer/references/query-optimization.md +251 -0
  110. package/skills/database/postgres-pro/SKILL.md +152 -0
  111. package/skills/database/postgres-pro/references/extensions.md +404 -0
  112. package/skills/database/postgres-pro/references/jsonb.md +321 -0
  113. package/skills/database/postgres-pro/references/maintenance.md +481 -0
  114. package/skills/database/postgres-pro/references/performance.md +265 -0
  115. package/skills/database/postgres-pro/references/replication.md +446 -0
  116. package/skills/database/sql-pro/SKILL.md +129 -0
  117. package/skills/database/sql-pro/references/database-design.md +402 -0
  118. package/skills/database/sql-pro/references/dialect-differences.md +419 -0
  119. package/skills/database/sql-pro/references/optimization.md +384 -0
  120. package/skills/database/sql-pro/references/query-patterns.md +285 -0
  121. package/skills/database/sql-pro/references/window-functions.md +328 -0
  122. package/skills/dotnet/csharp-developer/SKILL.md +125 -0
  123. package/skills/dotnet/csharp-developer/references/aspnet-core.md +394 -0
  124. package/skills/dotnet/csharp-developer/references/blazor.md +553 -0
  125. package/skills/dotnet/csharp-developer/references/entity-framework.md +409 -0
  126. package/skills/dotnet/csharp-developer/references/modern-csharp.md +248 -0
  127. package/skills/dotnet/csharp-developer/references/performance.md +498 -0
  128. package/skills/dotnet/dotnet-core-expert/SKILL.md +138 -0
  129. package/skills/dotnet/dotnet-core-expert/references/authentication.md +546 -0
  130. package/skills/dotnet/dotnet-core-expert/references/clean-architecture.md +455 -0
  131. package/skills/dotnet/dotnet-core-expert/references/cloud-native.md +548 -0
  132. package/skills/dotnet/dotnet-core-expert/references/entity-framework.md +440 -0
  133. package/skills/dotnet/dotnet-core-expert/references/minimal-apis.md +319 -0
  134. package/skills/frontend/angular-architect/SKILL.md +152 -0
  135. package/skills/frontend/angular-architect/references/components.md +297 -0
  136. package/skills/frontend/angular-architect/references/ngrx.md +401 -0
  137. package/skills/frontend/angular-architect/references/routing.md +361 -0
  138. package/skills/frontend/angular-architect/references/rxjs.md +319 -0
  139. package/skills/frontend/angular-architect/references/testing.md +405 -0
  140. package/skills/frontend/flutter-expert/SKILL.md +138 -0
  141. package/skills/frontend/flutter-expert/references/bloc-state.md +259 -0
  142. package/skills/frontend/flutter-expert/references/gorouter-navigation.md +119 -0
  143. package/skills/frontend/flutter-expert/references/performance.md +99 -0
  144. package/skills/frontend/flutter-expert/references/project-structure.md +118 -0
  145. package/skills/frontend/flutter-expert/references/riverpod-state.md +130 -0
  146. package/skills/frontend/flutter-expert/references/widget-patterns.md +123 -0
  147. package/skills/frontend/nextjs-developer/SKILL.md +143 -0
  148. package/skills/frontend/nextjs-developer/references/app-router.md +311 -0
  149. package/skills/frontend/nextjs-developer/references/data-fetching.md +482 -0
  150. package/skills/frontend/nextjs-developer/references/deployment.md +545 -0
  151. package/skills/frontend/nextjs-developer/references/server-actions.md +462 -0
  152. package/skills/frontend/nextjs-developer/references/server-components.md +384 -0
  153. package/skills/frontend/react-expert/SKILL.md +149 -0
  154. package/skills/frontend/react-expert/references/hooks-patterns.md +162 -0
  155. package/skills/frontend/react-expert/references/migration-class-to-modern.md +1119 -0
  156. package/skills/frontend/react-expert/references/performance.md +168 -0
  157. package/skills/frontend/react-expert/references/react-19-features.md +174 -0
  158. package/skills/frontend/react-expert/references/server-components.md +143 -0
  159. package/skills/frontend/react-expert/references/state-management.md +171 -0
  160. package/skills/frontend/react-expert/references/testing-react.md +174 -0
  161. package/skills/frontend/react-native-expert/SKILL.md +185 -0
  162. package/skills/frontend/react-native-expert/references/expo-router.md +187 -0
  163. package/skills/frontend/react-native-expert/references/list-optimization.md +204 -0
  164. package/skills/frontend/react-native-expert/references/platform-handling.md +188 -0
  165. package/skills/frontend/react-native-expert/references/project-structure.md +171 -0
  166. package/skills/frontend/react-native-expert/references/storage-hooks.md +173 -0
  167. package/skills/frontend/vue-expert/SKILL.md +98 -0
  168. package/skills/frontend/vue-expert/references/build-tooling.md +480 -0
  169. package/skills/frontend/vue-expert/references/components.md +448 -0
  170. package/skills/frontend/vue-expert/references/composition-api.md +299 -0
  171. package/skills/frontend/vue-expert/references/mobile-hybrid.md +636 -0
  172. package/skills/frontend/vue-expert/references/nuxt.md +669 -0
  173. package/skills/frontend/vue-expert/references/state-management.md +449 -0
  174. package/skills/frontend/vue-expert/references/typescript.md +584 -0
  175. package/skills/frontend/vue-expert-js/SKILL.md +167 -0
  176. package/skills/frontend/vue-expert-js/references/component-architecture.md +219 -0
  177. package/skills/frontend/vue-expert-js/references/composables-patterns.md +183 -0
  178. package/skills/frontend/vue-expert-js/references/jsdoc-typing.md +535 -0
  179. package/skills/frontend/vue-expert-js/references/state-management.md +249 -0
  180. package/skills/frontend/vue-expert-js/references/testing-patterns.md +237 -0
  181. package/skills/go-rust-cpp/cpp-pro/SKILL.md +115 -0
  182. package/skills/go-rust-cpp/cpp-pro/references/build-tooling.md +440 -0
  183. package/skills/go-rust-cpp/cpp-pro/references/concurrency.md +437 -0
  184. package/skills/go-rust-cpp/cpp-pro/references/memory-performance.md +397 -0
  185. package/skills/go-rust-cpp/cpp-pro/references/modern-cpp.md +304 -0
  186. package/skills/go-rust-cpp/cpp-pro/references/templates.md +357 -0
  187. package/skills/go-rust-cpp/golang-pro/SKILL.md +122 -0
  188. package/skills/go-rust-cpp/golang-pro/references/concurrency.md +329 -0
  189. package/skills/go-rust-cpp/golang-pro/references/generics.md +442 -0
  190. package/skills/go-rust-cpp/golang-pro/references/interfaces.md +432 -0
  191. package/skills/go-rust-cpp/golang-pro/references/project-structure.md +477 -0
  192. package/skills/go-rust-cpp/golang-pro/references/testing.md +451 -0
  193. package/skills/go-rust-cpp/rust-engineer/SKILL.md +167 -0
  194. package/skills/go-rust-cpp/rust-engineer/references/async.md +458 -0
  195. package/skills/go-rust-cpp/rust-engineer/references/error-handling.md +334 -0
  196. package/skills/go-rust-cpp/rust-engineer/references/ownership.md +278 -0
  197. package/skills/go-rust-cpp/rust-engineer/references/testing.md +470 -0
  198. package/skills/go-rust-cpp/rust-engineer/references/traits.md +413 -0
  199. package/skills/infra/cli-developer/SKILL.md +113 -0
  200. package/skills/infra/cli-developer/references/design-patterns.md +221 -0
  201. package/skills/infra/cli-developer/references/go-cli.md +540 -0
  202. package/skills/infra/cli-developer/references/node-cli.md +383 -0
  203. package/skills/infra/cli-developer/references/python-cli.md +422 -0
  204. package/skills/infra/cli-developer/references/ux-patterns.md +448 -0
  205. package/skills/infra/cloud-architect/SKILL.md +216 -0
  206. package/skills/infra/cloud-architect/references/aws.md +394 -0
  207. package/skills/infra/cloud-architect/references/azure.md +562 -0
  208. package/skills/infra/cloud-architect/references/cost.md +582 -0
  209. package/skills/infra/cloud-architect/references/gcp.md +633 -0
  210. package/skills/infra/cloud-architect/references/multi-cloud.md +483 -0
  211. package/skills/infra/devops-engineer/SKILL.md +144 -0
  212. package/skills/infra/devops-engineer/references/deployment-strategies.md +241 -0
  213. package/skills/infra/devops-engineer/references/docker-patterns.md +113 -0
  214. package/skills/infra/devops-engineer/references/github-actions.md +139 -0
  215. package/skills/infra/devops-engineer/references/incident-response.md +331 -0
  216. package/skills/infra/devops-engineer/references/kubernetes.md +154 -0
  217. package/skills/infra/devops-engineer/references/platform-engineering.md +417 -0
  218. package/skills/infra/devops-engineer/references/release-automation.md +527 -0
  219. package/skills/infra/devops-engineer/references/terraform-iac.md +141 -0
  220. package/skills/infra/kubernetes-specialist/SKILL.md +241 -0
  221. package/skills/infra/kubernetes-specialist/references/configuration.md +452 -0
  222. package/skills/infra/kubernetes-specialist/references/cost-optimization.md +458 -0
  223. package/skills/infra/kubernetes-specialist/references/custom-operators.md +563 -0
  224. package/skills/infra/kubernetes-specialist/references/gitops.md +530 -0
  225. package/skills/infra/kubernetes-specialist/references/helm-charts.md +912 -0
  226. package/skills/infra/kubernetes-specialist/references/multi-cluster.md +507 -0
  227. package/skills/infra/kubernetes-specialist/references/networking.md +447 -0
  228. package/skills/infra/kubernetes-specialist/references/service-mesh.md +459 -0
  229. package/skills/infra/kubernetes-specialist/references/storage.md +535 -0
  230. package/skills/infra/kubernetes-specialist/references/troubleshooting.md +414 -0
  231. package/skills/infra/kubernetes-specialist/references/workloads.md +377 -0
  232. package/skills/infra/mcp-developer/SKILL.md +143 -0
  233. package/skills/infra/mcp-developer/references/protocol.md +244 -0
  234. package/skills/infra/mcp-developer/references/python-sdk.md +367 -0
  235. package/skills/infra/mcp-developer/references/resources.md +554 -0
  236. package/skills/infra/mcp-developer/references/tools.md +480 -0
  237. package/skills/infra/mcp-developer/references/typescript-sdk.md +350 -0
  238. package/skills/infra/monitoring-expert/SKILL.md +176 -0
  239. package/skills/infra/monitoring-expert/references/alerting-rules.md +141 -0
  240. package/skills/infra/monitoring-expert/references/application-profiling.md +331 -0
  241. package/skills/infra/monitoring-expert/references/capacity-planning.md +344 -0
  242. package/skills/infra/monitoring-expert/references/dashboards.md +126 -0
  243. package/skills/infra/monitoring-expert/references/opentelemetry.md +123 -0
  244. package/skills/infra/monitoring-expert/references/performance-testing.md +269 -0
  245. package/skills/infra/monitoring-expert/references/prometheus-metrics.md +136 -0
  246. package/skills/infra/monitoring-expert/references/structured-logging.md +142 -0
  247. package/skills/infra/sre-engineer/SKILL.md +181 -0
  248. package/skills/infra/sre-engineer/references/automation-toil.md +492 -0
  249. package/skills/infra/sre-engineer/references/error-budget-policy.md +334 -0
  250. package/skills/infra/sre-engineer/references/incident-chaos.md +576 -0
  251. package/skills/infra/sre-engineer/references/monitoring-alerting.md +424 -0
  252. package/skills/infra/sre-engineer/references/slo-sli-management.md +238 -0
  253. package/skills/infra/terraform-engineer/SKILL.md +143 -0
  254. package/skills/infra/terraform-engineer/references/best-practices.md +583 -0
  255. package/skills/infra/terraform-engineer/references/module-patterns.md +297 -0
  256. package/skills/infra/terraform-engineer/references/providers.md +452 -0
  257. package/skills/infra/terraform-engineer/references/state-management.md +371 -0
  258. package/skills/infra/terraform-engineer/references/testing.md +486 -0
  259. package/skills/infra/websocket-engineer/SKILL.md +168 -0
  260. package/skills/infra/websocket-engineer/references/alternatives.md +391 -0
  261. package/skills/infra/websocket-engineer/references/patterns.md +400 -0
  262. package/skills/infra/websocket-engineer/references/protocol.md +195 -0
  263. package/skills/infra/websocket-engineer/references/scaling.md +333 -0
  264. package/skills/infra/websocket-engineer/references/security.md +474 -0
  265. package/skills/java/java-architect/SKILL.md +132 -0
  266. package/skills/java/java-architect/references/jpa-optimization.md +393 -0
  267. package/skills/java/java-architect/references/reactive-webflux.md +356 -0
  268. package/skills/java/java-architect/references/spring-boot-setup.md +269 -0
  269. package/skills/java/java-architect/references/spring-security.md +445 -0
  270. package/skills/java/java-architect/references/testing-patterns.md +500 -0
  271. package/skills/java/kotlin-specialist/SKILL.md +147 -0
  272. package/skills/java/kotlin-specialist/references/android-compose.md +419 -0
  273. package/skills/java/kotlin-specialist/references/coroutines-flow.md +276 -0
  274. package/skills/java/kotlin-specialist/references/dsl-idioms.md +421 -0
  275. package/skills/java/kotlin-specialist/references/ktor-server.md +426 -0
  276. package/skills/java/kotlin-specialist/references/multiplatform-kmp.md +380 -0
  277. package/skills/java/spring-boot-engineer/SKILL.md +195 -0
  278. package/skills/java/spring-boot-engineer/references/cloud.md +498 -0
  279. package/skills/java/spring-boot-engineer/references/data.md +381 -0
  280. package/skills/java/spring-boot-engineer/references/security.md +459 -0
  281. package/skills/java/spring-boot-engineer/references/testing.md +545 -0
  282. package/skills/java/spring-boot-engineer/references/web.md +295 -0
  283. package/skills/javascript/javascript-pro/SKILL.md +132 -0
  284. package/skills/javascript/javascript-pro/references/async-patterns.md +334 -0
  285. package/skills/javascript/javascript-pro/references/browser-apis.md +398 -0
  286. package/skills/javascript/javascript-pro/references/modern-syntax.md +272 -0
  287. package/skills/javascript/javascript-pro/references/modules.md +357 -0
  288. package/skills/javascript/javascript-pro/references/node-essentials.md +471 -0
  289. package/skills/javascript/nestjs-expert/SKILL.md +206 -0
  290. package/skills/javascript/nestjs-expert/references/authentication.md +166 -0
  291. package/skills/javascript/nestjs-expert/references/controllers-routing.md +111 -0
  292. package/skills/javascript/nestjs-expert/references/dtos-validation.md +153 -0
  293. package/skills/javascript/nestjs-expert/references/migration-from-express.md +1237 -0
  294. package/skills/javascript/nestjs-expert/references/services-di.md +140 -0
  295. package/skills/javascript/nestjs-expert/references/testing-patterns.md +186 -0
  296. package/skills/javascript/typescript-pro/SKILL.md +145 -0
  297. package/skills/javascript/typescript-pro/references/advanced-types.md +259 -0
  298. package/skills/javascript/typescript-pro/references/configuration.md +445 -0
  299. package/skills/javascript/typescript-pro/references/patterns.md +484 -0
  300. package/skills/javascript/typescript-pro/references/type-guards.md +352 -0
  301. package/skills/javascript/typescript-pro/references/utility-types.md +329 -0
  302. package/skills/php/laravel-specialist/SKILL.md +262 -0
  303. package/skills/php/laravel-specialist/references/eloquent.md +351 -0
  304. package/skills/php/laravel-specialist/references/livewire.md +512 -0
  305. package/skills/php/laravel-specialist/references/queues.md +423 -0
  306. package/skills/php/laravel-specialist/references/routing.md +362 -0
  307. package/skills/php/laravel-specialist/references/testing.md +522 -0
  308. package/skills/php/php-pro/SKILL.md +206 -0
  309. package/skills/php/php-pro/references/async-patterns.md +412 -0
  310. package/skills/php/php-pro/references/laravel-patterns.md +377 -0
  311. package/skills/php/php-pro/references/modern-php-features.md +323 -0
  312. package/skills/php/php-pro/references/symfony-patterns.md +466 -0
  313. package/skills/php/php-pro/references/testing-quality.md +466 -0
  314. package/skills/python/django-expert/SKILL.md +162 -0
  315. package/skills/python/django-expert/references/authentication.md +145 -0
  316. package/skills/python/django-expert/references/drf-serializers.md +148 -0
  317. package/skills/python/django-expert/references/models-orm.md +151 -0
  318. package/skills/python/django-expert/references/testing-django.md +204 -0
  319. package/skills/python/django-expert/references/viewsets-views.md +153 -0
  320. package/skills/python/fastapi-expert/SKILL.md +185 -0
  321. package/skills/python/fastapi-expert/references/async-sqlalchemy.md +146 -0
  322. package/skills/python/fastapi-expert/references/authentication.md +159 -0
  323. package/skills/python/fastapi-expert/references/endpoints-routing.md +142 -0
  324. package/skills/python/fastapi-expert/references/migration-from-django.md +997 -0
  325. package/skills/python/fastapi-expert/references/pydantic-v2.md +135 -0
  326. package/skills/python/fastapi-expert/references/testing-async.md +159 -0
  327. package/skills/python/pandas-pro/SKILL.md +178 -0
  328. package/skills/python/pandas-pro/references/aggregation-groupby.md +545 -0
  329. package/skills/python/pandas-pro/references/data-cleaning.md +500 -0
  330. package/skills/python/pandas-pro/references/dataframe-operations.md +420 -0
  331. package/skills/python/pandas-pro/references/merging-joining.md +596 -0
  332. package/skills/python/pandas-pro/references/performance-optimization.md +597 -0
  333. package/skills/python/python-pro/SKILL.md +177 -0
  334. package/skills/python/python-pro/references/async-patterns.md +356 -0
  335. package/skills/python/python-pro/references/packaging.md +460 -0
  336. package/skills/python/python-pro/references/standard-library.md +378 -0
  337. package/skills/python/python-pro/references/testing.md +404 -0
  338. package/skills/python/python-pro/references/type-system.md +290 -0
  339. package/skills/quality/chaos-engineer/SKILL.md +182 -0
  340. package/skills/quality/chaos-engineer/references/chaos-tools.md +511 -0
  341. package/skills/quality/chaos-engineer/references/experiment-design.md +229 -0
  342. package/skills/quality/chaos-engineer/references/game-days.md +434 -0
  343. package/skills/quality/chaos-engineer/references/infrastructure-chaos.md +348 -0
  344. package/skills/quality/chaos-engineer/references/kubernetes-chaos.md +432 -0
  345. package/skills/quality/code-reviewer/SKILL.md +119 -0
  346. package/skills/quality/code-reviewer/references/common-issues.md +142 -0
  347. package/skills/quality/code-reviewer/references/feedback-examples.md +144 -0
  348. package/skills/quality/code-reviewer/references/receiving-feedback.md +238 -0
  349. package/skills/quality/code-reviewer/references/report-template.md +109 -0
  350. package/skills/quality/code-reviewer/references/review-checklist.md +88 -0
  351. package/skills/quality/code-reviewer/references/spec-compliance-review.md +258 -0
  352. package/skills/quality/playwright-expert/SKILL.md +169 -0
  353. package/skills/quality/playwright-expert/references/api-mocking.md +140 -0
  354. package/skills/quality/playwright-expert/references/configuration.md +155 -0
  355. package/skills/quality/playwright-expert/references/debugging-flaky.md +150 -0
  356. package/skills/quality/playwright-expert/references/page-object-model.md +152 -0
  357. package/skills/quality/playwright-expert/references/selectors-locators.md +119 -0
  358. package/skills/quality/secure-code-guardian/SKILL.md +191 -0
  359. package/skills/quality/secure-code-guardian/references/authentication.md +136 -0
  360. package/skills/quality/secure-code-guardian/references/input-validation.md +146 -0
  361. package/skills/quality/secure-code-guardian/references/owasp-prevention.md +135 -0
  362. package/skills/quality/secure-code-guardian/references/security-headers.md +133 -0
  363. package/skills/quality/secure-code-guardian/references/xss-csrf.md +157 -0
  364. package/skills/quality/security-reviewer/SKILL.md +103 -0
  365. package/skills/quality/security-reviewer/references/infrastructure-security.md +268 -0
  366. package/skills/quality/security-reviewer/references/penetration-testing.md +268 -0
  367. package/skills/quality/security-reviewer/references/report-template.md +170 -0
  368. package/skills/quality/security-reviewer/references/sast-tools.md +117 -0
  369. package/skills/quality/security-reviewer/references/secret-scanning.md +125 -0
  370. package/skills/quality/security-reviewer/references/vulnerability-patterns.md +152 -0
  371. package/skills/quality/tdd-guide/assets/sample_coverage_report.lcov +0 -0
  372. package/skills/quality/test-master/SKILL.md +94 -0
  373. package/skills/quality/test-master/references/automation-frameworks.md +294 -0
  374. package/skills/quality/test-master/references/e2e-testing.md +128 -0
  375. package/skills/quality/test-master/references/integration-testing.md +120 -0
  376. package/skills/quality/test-master/references/performance-testing.md +118 -0
  377. package/skills/quality/test-master/references/qa-methodology.md +247 -0
  378. package/skills/quality/test-master/references/security-testing.md +127 -0
  379. package/skills/quality/test-master/references/tdd-iron-laws.md +174 -0
  380. package/skills/quality/test-master/references/test-reports.md +104 -0
  381. package/skills/quality/test-master/references/testing-anti-patterns.md +231 -0
  382. package/skills/quality/test-master/references/unit-testing.md +113 -0
  383. package/skills/ruby/rails-expert/SKILL.md +154 -0
  384. package/skills/ruby/rails-expert/references/active-record.md +244 -0
  385. package/skills/ruby/rails-expert/references/api-development.md +401 -0
  386. package/skills/ruby/rails-expert/references/background-jobs.md +272 -0
  387. package/skills/ruby/rails-expert/references/hotwire-turbo.md +228 -0
  388. package/skills/ruby/rails-expert/references/rspec-testing.md +367 -0
  389. package/skills/swift/swift-expert/SKILL.md +163 -0
  390. package/skills/swift/swift-expert/references/async-concurrency.md +360 -0
  391. package/skills/swift/swift-expert/references/memory-performance.md +377 -0
  392. package/skills/swift/swift-expert/references/protocol-oriented.md +354 -0
  393. package/skills/swift/swift-expert/references/swiftui-patterns.md +291 -0
  394. package/skills/swift/swift-expert/references/testing-patterns.md +399 -0
  395. package/skills/workflow/brainstorming/SKILL.md +164 -0
  396. package/skills/workflow/brainstorming/scripts/helper.js +88 -0
  397. package/skills/workflow/brainstorming/scripts/start-server.sh +148 -0
  398. package/skills/workflow/brainstorming/scripts/stop-server.sh +56 -0
  399. package/skills/workflow/brainstorming/spec-document-reviewer-prompt.md +49 -0
  400. package/skills/workflow/brainstorming/visual-companion.md +287 -0
  401. package/skills/workflow/documentation/SKILL.md +45 -0
  402. package/skills/workflow/entropy-management/SKILL.md +115 -0
  403. package/skills/workflow/executing-plans/SKILL.md +70 -0
  404. package/skills/workflow/finishing-a-development-branch/SKILL.md +200 -0
  405. package/skills/workflow/receiving-code-review/SKILL.md +213 -0
  406. package/skills/workflow/requesting-code-review/SKILL.md +105 -0
  407. package/skills/workflow/requesting-code-review/code-reviewer.md +146 -0
  408. package/skills/workflow/requirement-engineering/SKILL.md +111 -0
  409. package/skills/workflow/systematic-debugging/CREATION-LOG.md +119 -0
  410. package/skills/workflow/systematic-debugging/SKILL.md +296 -0
  411. package/skills/workflow/systematic-debugging/condition-based-waiting-example.ts +158 -0
  412. package/skills/workflow/systematic-debugging/condition-based-waiting.md +115 -0
  413. package/skills/workflow/systematic-debugging/defense-in-depth.md +122 -0
  414. package/skills/workflow/systematic-debugging/find-polluter.sh +63 -0
  415. package/skills/workflow/systematic-debugging/root-cause-tracing.md +169 -0
  416. package/skills/workflow/systematic-debugging/test-academic.md +14 -0
  417. package/skills/workflow/systematic-debugging/test-pressure-1.md +58 -0
  418. package/skills/workflow/systematic-debugging/test-pressure-2.md +68 -0
  419. package/skills/workflow/systematic-debugging/test-pressure-3.md +69 -0
  420. package/skills/workflow/using-git-worktrees/SKILL.md +218 -0
  421. package/skills/workflow/verification-before-completion/SKILL.md +139 -0
  422. package/skills/workflow/writing-plans/SKILL.md +151 -0
  423. package/skills/workflow/writing-plans/plan-document-reviewer-prompt.md +49 -0
  424. package/skills/workflow/writing-skills/SKILL.md +655 -0
  425. package/skills/workflow/writing-skills/anthropic-best-practices.md +1150 -0
  426. package/skills/workflow/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
  427. package/skills/workflow/writing-skills/graphviz-conventions.dot +0 -0
  428. package/skills/workflow/writing-skills/persuasion-principles.md +187 -0
  429. package/skills/workflow/writing-skills/render-graphs.js +168 -0
  430. package/skills/workflow/writing-skills/testing-skills-with-subagents.md +384 -0
  431. package/skills/angular-architect/SKILL.md +0 -152
  432. package/skills/angular-architect/references/components.md +0 -297
  433. package/skills/angular-architect/references/ngrx.md +0 -401
  434. package/skills/angular-architect/references/routing.md +0 -361
  435. package/skills/angular-architect/references/rxjs.md +0 -319
  436. package/skills/angular-architect/references/testing.md +0 -405
  437. package/skills/api-designer/SKILL.md +0 -217
  438. package/skills/api-designer/references/error-handling.md +0 -541
  439. package/skills/api-designer/references/openapi.md +0 -824
  440. package/skills/api-designer/references/pagination.md +0 -494
  441. package/skills/api-designer/references/rest-patterns.md +0 -335
  442. package/skills/api-designer/references/versioning.md +0 -391
  443. package/skills/architecture-designer/SKILL.md +0 -117
  444. package/skills/architecture-designer/references/adr-template.md +0 -116
  445. package/skills/architecture-designer/references/architecture-patterns.md +0 -111
  446. package/skills/architecture-designer/references/database-selection.md +0 -102
  447. package/skills/architecture-designer/references/nfr-checklist.md +0 -112
  448. package/skills/architecture-designer/references/system-design.md +0 -100
  449. package/skills/brainstorming/SKILL.md +0 -164
  450. package/skills/brainstorming/scripts/helper.js +0 -88
  451. package/skills/brainstorming/scripts/start-server.sh +0 -148
  452. package/skills/brainstorming/scripts/stop-server.sh +0 -56
  453. package/skills/brainstorming/spec-document-reviewer-prompt.md +0 -49
  454. package/skills/brainstorming/visual-companion.md +0 -287
  455. package/skills/chaos-engineer/SKILL.md +0 -182
  456. package/skills/chaos-engineer/references/chaos-tools.md +0 -511
  457. package/skills/chaos-engineer/references/experiment-design.md +0 -229
  458. package/skills/chaos-engineer/references/game-days.md +0 -434
  459. package/skills/chaos-engineer/references/infrastructure-chaos.md +0 -348
  460. package/skills/chaos-engineer/references/kubernetes-chaos.md +0 -432
  461. package/skills/cli-developer/SKILL.md +0 -113
  462. package/skills/cli-developer/references/design-patterns.md +0 -221
  463. package/skills/cli-developer/references/go-cli.md +0 -540
  464. package/skills/cli-developer/references/node-cli.md +0 -383
  465. package/skills/cli-developer/references/python-cli.md +0 -422
  466. package/skills/cli-developer/references/ux-patterns.md +0 -448
  467. package/skills/cloud-architect/SKILL.md +0 -216
  468. package/skills/cloud-architect/references/aws.md +0 -394
  469. package/skills/cloud-architect/references/azure.md +0 -562
  470. package/skills/cloud-architect/references/cost.md +0 -582
  471. package/skills/cloud-architect/references/gcp.md +0 -633
  472. package/skills/cloud-architect/references/multi-cloud.md +0 -483
  473. package/skills/code-documenter/SKILL.md +0 -147
  474. package/skills/code-documenter/references/api-docs-fastapi-django.md +0 -166
  475. package/skills/code-documenter/references/api-docs-nestjs-express.md +0 -220
  476. package/skills/code-documenter/references/coverage-reports.md +0 -125
  477. package/skills/code-documenter/references/documentation-systems.md +0 -333
  478. package/skills/code-documenter/references/interactive-api-docs.md +0 -531
  479. package/skills/code-documenter/references/python-docstrings.md +0 -121
  480. package/skills/code-documenter/references/typescript-jsdoc.md +0 -145
  481. package/skills/code-documenter/references/user-guides-tutorials.md +0 -530
  482. package/skills/code-reviewer/SKILL.md +0 -119
  483. package/skills/code-reviewer/references/common-issues.md +0 -142
  484. package/skills/code-reviewer/references/feedback-examples.md +0 -144
  485. package/skills/code-reviewer/references/receiving-feedback.md +0 -238
  486. package/skills/code-reviewer/references/report-template.md +0 -109
  487. package/skills/code-reviewer/references/review-checklist.md +0 -88
  488. package/skills/code-reviewer/references/spec-compliance-review.md +0 -258
  489. package/skills/cpp-pro/SKILL.md +0 -115
  490. package/skills/cpp-pro/references/build-tooling.md +0 -440
  491. package/skills/cpp-pro/references/concurrency.md +0 -437
  492. package/skills/cpp-pro/references/memory-performance.md +0 -397
  493. package/skills/cpp-pro/references/modern-cpp.md +0 -304
  494. package/skills/cpp-pro/references/templates.md +0 -357
  495. package/skills/csharp-developer/SKILL.md +0 -125
  496. package/skills/csharp-developer/references/aspnet-core.md +0 -394
  497. package/skills/csharp-developer/references/blazor.md +0 -553
  498. package/skills/csharp-developer/references/entity-framework.md +0 -409
  499. package/skills/csharp-developer/references/modern-csharp.md +0 -248
  500. package/skills/csharp-developer/references/performance.md +0 -498
  501. package/skills/database-optimizer/SKILL.md +0 -147
  502. package/skills/database-optimizer/references/index-strategies.md +0 -331
  503. package/skills/database-optimizer/references/monitoring-analysis.md +0 -501
  504. package/skills/database-optimizer/references/mysql-tuning.md +0 -452
  505. package/skills/database-optimizer/references/postgresql-tuning.md +0 -413
  506. package/skills/database-optimizer/references/query-optimization.md +0 -251
  507. package/skills/debugging-wizard/SKILL.md +0 -105
  508. package/skills/debugging-wizard/references/common-patterns.md +0 -132
  509. package/skills/debugging-wizard/references/debugging-tools.md +0 -140
  510. package/skills/debugging-wizard/references/quick-fixes.md +0 -177
  511. package/skills/debugging-wizard/references/strategies.md +0 -142
  512. package/skills/debugging-wizard/references/systematic-debugging.md +0 -367
  513. package/skills/devops-engineer/SKILL.md +0 -144
  514. package/skills/devops-engineer/references/deployment-strategies.md +0 -241
  515. package/skills/devops-engineer/references/docker-patterns.md +0 -113
  516. package/skills/devops-engineer/references/github-actions.md +0 -139
  517. package/skills/devops-engineer/references/incident-response.md +0 -331
  518. package/skills/devops-engineer/references/kubernetes.md +0 -154
  519. package/skills/devops-engineer/references/platform-engineering.md +0 -417
  520. package/skills/devops-engineer/references/release-automation.md +0 -527
  521. package/skills/devops-engineer/references/terraform-iac.md +0 -141
  522. package/skills/django-expert/SKILL.md +0 -162
  523. package/skills/django-expert/references/authentication.md +0 -145
  524. package/skills/django-expert/references/drf-serializers.md +0 -148
  525. package/skills/django-expert/references/models-orm.md +0 -151
  526. package/skills/django-expert/references/testing-django.md +0 -204
  527. package/skills/django-expert/references/viewsets-views.md +0 -153
  528. package/skills/documentation/SKILL.md +0 -45
  529. package/skills/dotnet-core-expert/SKILL.md +0 -138
  530. package/skills/dotnet-core-expert/references/authentication.md +0 -546
  531. package/skills/dotnet-core-expert/references/clean-architecture.md +0 -455
  532. package/skills/dotnet-core-expert/references/cloud-native.md +0 -548
  533. package/skills/dotnet-core-expert/references/entity-framework.md +0 -440
  534. package/skills/dotnet-core-expert/references/minimal-apis.md +0 -319
  535. package/skills/entropy-management/SKILL.md +0 -115
  536. package/skills/executing-plans/SKILL.md +0 -70
  537. package/skills/fastapi-expert/SKILL.md +0 -185
  538. package/skills/fastapi-expert/references/async-sqlalchemy.md +0 -146
  539. package/skills/fastapi-expert/references/authentication.md +0 -159
  540. package/skills/fastapi-expert/references/endpoints-routing.md +0 -142
  541. package/skills/fastapi-expert/references/migration-from-django.md +0 -997
  542. package/skills/fastapi-expert/references/pydantic-v2.md +0 -135
  543. package/skills/fastapi-expert/references/testing-async.md +0 -159
  544. package/skills/feature-forge/SKILL.md +0 -98
  545. package/skills/feature-forge/references/acceptance-criteria.md +0 -104
  546. package/skills/feature-forge/references/ears-syntax.md +0 -99
  547. package/skills/feature-forge/references/interview-questions.md +0 -150
  548. package/skills/feature-forge/references/pre-discovery-subagents.md +0 -54
  549. package/skills/feature-forge/references/specification-template.md +0 -103
  550. package/skills/fine-tuning-expert/SKILL.md +0 -162
  551. package/skills/fine-tuning-expert/references/dataset-preparation.md +0 -540
  552. package/skills/fine-tuning-expert/references/deployment-optimization.md +0 -673
  553. package/skills/fine-tuning-expert/references/evaluation-metrics.md +0 -597
  554. package/skills/fine-tuning-expert/references/hyperparameter-tuning.md +0 -565
  555. package/skills/fine-tuning-expert/references/lora-peft.md +0 -347
  556. package/skills/finishing-a-development-branch/SKILL.md +0 -200
  557. package/skills/flutter-expert/SKILL.md +0 -138
  558. package/skills/flutter-expert/references/bloc-state.md +0 -259
  559. package/skills/flutter-expert/references/gorouter-navigation.md +0 -119
  560. package/skills/flutter-expert/references/performance.md +0 -99
  561. package/skills/flutter-expert/references/project-structure.md +0 -118
  562. package/skills/flutter-expert/references/riverpod-state.md +0 -130
  563. package/skills/flutter-expert/references/widget-patterns.md +0 -123
  564. package/skills/fullstack-guardian/SKILL.md +0 -105
  565. package/skills/fullstack-guardian/references/api-design-standards.md +0 -307
  566. package/skills/fullstack-guardian/references/architecture-decisions.md +0 -350
  567. package/skills/fullstack-guardian/references/backend-patterns.md +0 -237
  568. package/skills/fullstack-guardian/references/common-patterns.md +0 -134
  569. package/skills/fullstack-guardian/references/deliverables-checklist.md +0 -354
  570. package/skills/fullstack-guardian/references/design-template.md +0 -91
  571. package/skills/fullstack-guardian/references/error-handling.md +0 -135
  572. package/skills/fullstack-guardian/references/frontend-patterns.md +0 -340
  573. package/skills/fullstack-guardian/references/integration-patterns.md +0 -333
  574. package/skills/fullstack-guardian/references/security-checklist.md +0 -106
  575. package/skills/golang-pro/SKILL.md +0 -122
  576. package/skills/golang-pro/references/concurrency.md +0 -329
  577. package/skills/golang-pro/references/generics.md +0 -442
  578. package/skills/golang-pro/references/interfaces.md +0 -432
  579. package/skills/golang-pro/references/project-structure.md +0 -477
  580. package/skills/golang-pro/references/testing.md +0 -451
  581. package/skills/graphql-architect/SKILL.md +0 -146
  582. package/skills/graphql-architect/references/federation.md +0 -418
  583. package/skills/graphql-architect/references/migration-from-rest.md +0 -1141
  584. package/skills/graphql-architect/references/resolvers.md +0 -425
  585. package/skills/graphql-architect/references/schema-design.md +0 -393
  586. package/skills/graphql-architect/references/security.md +0 -569
  587. package/skills/graphql-architect/references/subscriptions.md +0 -510
  588. package/skills/java-architect/SKILL.md +0 -132
  589. package/skills/java-architect/references/jpa-optimization.md +0 -393
  590. package/skills/java-architect/references/reactive-webflux.md +0 -356
  591. package/skills/java-architect/references/spring-boot-setup.md +0 -269
  592. package/skills/java-architect/references/spring-security.md +0 -445
  593. package/skills/java-architect/references/testing-patterns.md +0 -500
  594. package/skills/javascript-pro/SKILL.md +0 -132
  595. package/skills/javascript-pro/references/async-patterns.md +0 -334
  596. package/skills/javascript-pro/references/browser-apis.md +0 -398
  597. package/skills/javascript-pro/references/modern-syntax.md +0 -272
  598. package/skills/javascript-pro/references/modules.md +0 -357
  599. package/skills/javascript-pro/references/node-essentials.md +0 -471
  600. package/skills/kotlin-specialist/SKILL.md +0 -147
  601. package/skills/kotlin-specialist/references/android-compose.md +0 -419
  602. package/skills/kotlin-specialist/references/coroutines-flow.md +0 -276
  603. package/skills/kotlin-specialist/references/dsl-idioms.md +0 -421
  604. package/skills/kotlin-specialist/references/ktor-server.md +0 -426
  605. package/skills/kotlin-specialist/references/multiplatform-kmp.md +0 -380
  606. package/skills/kubernetes-specialist/SKILL.md +0 -241
  607. package/skills/kubernetes-specialist/references/configuration.md +0 -452
  608. package/skills/kubernetes-specialist/references/cost-optimization.md +0 -458
  609. package/skills/kubernetes-specialist/references/custom-operators.md +0 -563
  610. package/skills/kubernetes-specialist/references/gitops.md +0 -530
  611. package/skills/kubernetes-specialist/references/helm-charts.md +0 -912
  612. package/skills/kubernetes-specialist/references/multi-cluster.md +0 -507
  613. package/skills/kubernetes-specialist/references/networking.md +0 -447
  614. package/skills/kubernetes-specialist/references/service-mesh.md +0 -459
  615. package/skills/kubernetes-specialist/references/storage.md +0 -535
  616. package/skills/kubernetes-specialist/references/troubleshooting.md +0 -414
  617. package/skills/kubernetes-specialist/references/workloads.md +0 -377
  618. package/skills/laravel-specialist/SKILL.md +0 -262
  619. package/skills/laravel-specialist/references/eloquent.md +0 -351
  620. package/skills/laravel-specialist/references/livewire.md +0 -512
  621. package/skills/laravel-specialist/references/queues.md +0 -423
  622. package/skills/laravel-specialist/references/routing.md +0 -362
  623. package/skills/laravel-specialist/references/testing.md +0 -522
  624. package/skills/legacy-modernizer/SKILL.md +0 -137
  625. package/skills/legacy-modernizer/references/legacy-testing.md +0 -381
  626. package/skills/legacy-modernizer/references/migration-strategies.md +0 -423
  627. package/skills/legacy-modernizer/references/refactoring-patterns.md +0 -395
  628. package/skills/legacy-modernizer/references/strangler-fig-pattern.md +0 -281
  629. package/skills/legacy-modernizer/references/system-assessment.md +0 -487
  630. package/skills/mcp-developer/SKILL.md +0 -143
  631. package/skills/mcp-developer/references/protocol.md +0 -244
  632. package/skills/mcp-developer/references/python-sdk.md +0 -367
  633. package/skills/mcp-developer/references/resources.md +0 -554
  634. package/skills/mcp-developer/references/tools.md +0 -480
  635. package/skills/mcp-developer/references/typescript-sdk.md +0 -350
  636. package/skills/microservices-architect/SKILL.md +0 -164
  637. package/skills/microservices-architect/references/communication.md +0 -499
  638. package/skills/microservices-architect/references/data.md +0 -721
  639. package/skills/microservices-architect/references/decomposition.md +0 -344
  640. package/skills/microservices-architect/references/observability.md +0 -805
  641. package/skills/microservices-architect/references/patterns.md +0 -603
  642. package/skills/ml-pipeline/SKILL.md +0 -159
  643. package/skills/ml-pipeline/references/experiment-tracking.md +0 -833
  644. package/skills/ml-pipeline/references/feature-engineering.md +0 -631
  645. package/skills/ml-pipeline/references/model-validation.md +0 -978
  646. package/skills/ml-pipeline/references/pipeline-orchestration.md +0 -907
  647. package/skills/ml-pipeline/references/training-pipelines.md +0 -782
  648. package/skills/monitoring-expert/SKILL.md +0 -176
  649. package/skills/monitoring-expert/references/alerting-rules.md +0 -141
  650. package/skills/monitoring-expert/references/application-profiling.md +0 -331
  651. package/skills/monitoring-expert/references/capacity-planning.md +0 -344
  652. package/skills/monitoring-expert/references/dashboards.md +0 -126
  653. package/skills/monitoring-expert/references/opentelemetry.md +0 -123
  654. package/skills/monitoring-expert/references/performance-testing.md +0 -269
  655. package/skills/monitoring-expert/references/prometheus-metrics.md +0 -136
  656. package/skills/monitoring-expert/references/structured-logging.md +0 -142
  657. package/skills/nestjs-expert/SKILL.md +0 -206
  658. package/skills/nestjs-expert/references/authentication.md +0 -166
  659. package/skills/nestjs-expert/references/controllers-routing.md +0 -111
  660. package/skills/nestjs-expert/references/dtos-validation.md +0 -153
  661. package/skills/nestjs-expert/references/migration-from-express.md +0 -1237
  662. package/skills/nestjs-expert/references/services-di.md +0 -140
  663. package/skills/nestjs-expert/references/testing-patterns.md +0 -186
  664. package/skills/nextjs-developer/SKILL.md +0 -143
  665. package/skills/nextjs-developer/references/app-router.md +0 -311
  666. package/skills/nextjs-developer/references/data-fetching.md +0 -482
  667. package/skills/nextjs-developer/references/deployment.md +0 -545
  668. package/skills/nextjs-developer/references/server-actions.md +0 -462
  669. package/skills/nextjs-developer/references/server-components.md +0 -384
  670. package/skills/pandas-pro/SKILL.md +0 -178
  671. package/skills/pandas-pro/references/aggregation-groupby.md +0 -545
  672. package/skills/pandas-pro/references/data-cleaning.md +0 -500
  673. package/skills/pandas-pro/references/dataframe-operations.md +0 -420
  674. package/skills/pandas-pro/references/merging-joining.md +0 -596
  675. package/skills/pandas-pro/references/performance-optimization.md +0 -597
  676. package/skills/php-pro/SKILL.md +0 -206
  677. package/skills/php-pro/references/async-patterns.md +0 -412
  678. package/skills/php-pro/references/laravel-patterns.md +0 -377
  679. package/skills/php-pro/references/modern-php-features.md +0 -323
  680. package/skills/php-pro/references/symfony-patterns.md +0 -466
  681. package/skills/php-pro/references/testing-quality.md +0 -466
  682. package/skills/playwright-expert/SKILL.md +0 -169
  683. package/skills/playwright-expert/references/api-mocking.md +0 -140
  684. package/skills/playwright-expert/references/configuration.md +0 -155
  685. package/skills/playwright-expert/references/debugging-flaky.md +0 -150
  686. package/skills/playwright-expert/references/page-object-model.md +0 -152
  687. package/skills/playwright-expert/references/selectors-locators.md +0 -119
  688. package/skills/postgres-pro/SKILL.md +0 -152
  689. package/skills/postgres-pro/references/extensions.md +0 -404
  690. package/skills/postgres-pro/references/jsonb.md +0 -321
  691. package/skills/postgres-pro/references/maintenance.md +0 -481
  692. package/skills/postgres-pro/references/performance.md +0 -265
  693. package/skills/postgres-pro/references/replication.md +0 -446
  694. package/skills/python-pro/SKILL.md +0 -177
  695. package/skills/python-pro/references/async-patterns.md +0 -356
  696. package/skills/python-pro/references/packaging.md +0 -460
  697. package/skills/python-pro/references/standard-library.md +0 -378
  698. package/skills/python-pro/references/testing.md +0 -404
  699. package/skills/python-pro/references/type-system.md +0 -290
  700. package/skills/rag-architect/SKILL.md +0 -194
  701. package/skills/rag-architect/references/chunking-strategies.md +0 -878
  702. package/skills/rag-architect/references/embedding-models.md +0 -561
  703. package/skills/rag-architect/references/rag-evaluation.md +0 -833
  704. package/skills/rag-architect/references/retrieval-optimization.md +0 -795
  705. package/skills/rag-architect/references/vector-databases.md +0 -589
  706. package/skills/rails-expert/SKILL.md +0 -154
  707. package/skills/rails-expert/references/active-record.md +0 -244
  708. package/skills/rails-expert/references/api-development.md +0 -401
  709. package/skills/rails-expert/references/background-jobs.md +0 -272
  710. package/skills/rails-expert/references/hotwire-turbo.md +0 -228
  711. package/skills/rails-expert/references/rspec-testing.md +0 -367
  712. package/skills/react-expert/SKILL.md +0 -149
  713. package/skills/react-expert/references/hooks-patterns.md +0 -162
  714. package/skills/react-expert/references/migration-class-to-modern.md +0 -1119
  715. package/skills/react-expert/references/performance.md +0 -168
  716. package/skills/react-expert/references/react-19-features.md +0 -174
  717. package/skills/react-expert/references/server-components.md +0 -143
  718. package/skills/react-expert/references/state-management.md +0 -171
  719. package/skills/react-expert/references/testing-react.md +0 -174
  720. package/skills/react-native-expert/SKILL.md +0 -185
  721. package/skills/react-native-expert/references/expo-router.md +0 -187
  722. package/skills/react-native-expert/references/list-optimization.md +0 -204
  723. package/skills/react-native-expert/references/platform-handling.md +0 -188
  724. package/skills/react-native-expert/references/project-structure.md +0 -171
  725. package/skills/react-native-expert/references/storage-hooks.md +0 -173
  726. package/skills/receiving-code-review/SKILL.md +0 -213
  727. package/skills/requesting-code-review/SKILL.md +0 -105
  728. package/skills/requesting-code-review/code-reviewer.md +0 -146
  729. package/skills/requirement-engineering/SKILL.md +0 -111
  730. package/skills/rust-engineer/SKILL.md +0 -167
  731. package/skills/rust-engineer/references/async.md +0 -458
  732. package/skills/rust-engineer/references/error-handling.md +0 -334
  733. package/skills/rust-engineer/references/ownership.md +0 -278
  734. package/skills/rust-engineer/references/testing.md +0 -470
  735. package/skills/rust-engineer/references/traits.md +0 -413
  736. package/skills/secure-code-guardian/SKILL.md +0 -191
  737. package/skills/secure-code-guardian/references/authentication.md +0 -136
  738. package/skills/secure-code-guardian/references/input-validation.md +0 -146
  739. package/skills/secure-code-guardian/references/owasp-prevention.md +0 -135
  740. package/skills/secure-code-guardian/references/security-headers.md +0 -133
  741. package/skills/secure-code-guardian/references/xss-csrf.md +0 -157
  742. package/skills/security-reviewer/SKILL.md +0 -103
  743. package/skills/security-reviewer/references/infrastructure-security.md +0 -268
  744. package/skills/security-reviewer/references/penetration-testing.md +0 -268
  745. package/skills/security-reviewer/references/report-template.md +0 -170
  746. package/skills/security-reviewer/references/sast-tools.md +0 -117
  747. package/skills/security-reviewer/references/secret-scanning.md +0 -125
  748. package/skills/security-reviewer/references/vulnerability-patterns.md +0 -152
  749. package/skills/spark-engineer/SKILL.md +0 -148
  750. package/skills/spark-engineer/references/partitioning-caching.md +0 -543
  751. package/skills/spark-engineer/references/performance-tuning.md +0 -544
  752. package/skills/spark-engineer/references/rdd-operations.md +0 -599
  753. package/skills/spark-engineer/references/spark-sql-dataframes.md +0 -474
  754. package/skills/spark-engineer/references/streaming-patterns.md +0 -786
  755. package/skills/spring-boot-engineer/SKILL.md +0 -195
  756. package/skills/spring-boot-engineer/references/cloud.md +0 -498
  757. package/skills/spring-boot-engineer/references/data.md +0 -381
  758. package/skills/spring-boot-engineer/references/security.md +0 -459
  759. package/skills/spring-boot-engineer/references/testing.md +0 -545
  760. package/skills/spring-boot-engineer/references/web.md +0 -295
  761. package/skills/sql-pro/SKILL.md +0 -129
  762. package/skills/sql-pro/references/database-design.md +0 -402
  763. package/skills/sql-pro/references/dialect-differences.md +0 -419
  764. package/skills/sql-pro/references/optimization.md +0 -384
  765. package/skills/sql-pro/references/query-patterns.md +0 -285
  766. package/skills/sql-pro/references/window-functions.md +0 -328
  767. package/skills/sre-engineer/SKILL.md +0 -181
  768. package/skills/sre-engineer/references/automation-toil.md +0 -492
  769. package/skills/sre-engineer/references/error-budget-policy.md +0 -334
  770. package/skills/sre-engineer/references/incident-chaos.md +0 -576
  771. package/skills/sre-engineer/references/monitoring-alerting.md +0 -424
  772. package/skills/sre-engineer/references/slo-sli-management.md +0 -238
  773. package/skills/swift-expert/SKILL.md +0 -163
  774. package/skills/swift-expert/references/async-concurrency.md +0 -360
  775. package/skills/swift-expert/references/memory-performance.md +0 -377
  776. package/skills/swift-expert/references/protocol-oriented.md +0 -354
  777. package/skills/swift-expert/references/swiftui-patterns.md +0 -291
  778. package/skills/swift-expert/references/testing-patterns.md +0 -399
  779. package/skills/systematic-debugging/CREATION-LOG.md +0 -119
  780. package/skills/systematic-debugging/SKILL.md +0 -296
  781. package/skills/systematic-debugging/condition-based-waiting-example.ts +0 -158
  782. package/skills/systematic-debugging/condition-based-waiting.md +0 -115
  783. package/skills/systematic-debugging/defense-in-depth.md +0 -122
  784. package/skills/systematic-debugging/find-polluter.sh +0 -63
  785. package/skills/systematic-debugging/root-cause-tracing.md +0 -169
  786. package/skills/systematic-debugging/test-academic.md +0 -14
  787. package/skills/systematic-debugging/test-pressure-1.md +0 -58
  788. package/skills/systematic-debugging/test-pressure-2.md +0 -68
  789. package/skills/systematic-debugging/test-pressure-3.md +0 -69
  790. package/skills/tdd-guide/assets/sample_coverage_report.lcov +0 -56
  791. package/skills/terraform-engineer/SKILL.md +0 -143
  792. package/skills/terraform-engineer/references/best-practices.md +0 -583
  793. package/skills/terraform-engineer/references/module-patterns.md +0 -297
  794. package/skills/terraform-engineer/references/providers.md +0 -452
  795. package/skills/terraform-engineer/references/state-management.md +0 -371
  796. package/skills/terraform-engineer/references/testing.md +0 -486
  797. package/skills/test-master/SKILL.md +0 -94
  798. package/skills/test-master/references/automation-frameworks.md +0 -294
  799. package/skills/test-master/references/e2e-testing.md +0 -128
  800. package/skills/test-master/references/integration-testing.md +0 -120
  801. package/skills/test-master/references/performance-testing.md +0 -118
  802. package/skills/test-master/references/qa-methodology.md +0 -247
  803. package/skills/test-master/references/security-testing.md +0 -127
  804. package/skills/test-master/references/tdd-iron-laws.md +0 -174
  805. package/skills/test-master/references/test-reports.md +0 -104
  806. package/skills/test-master/references/testing-anti-patterns.md +0 -231
  807. package/skills/test-master/references/unit-testing.md +0 -113
  808. package/skills/typescript-pro/SKILL.md +0 -145
  809. package/skills/typescript-pro/references/advanced-types.md +0 -259
  810. package/skills/typescript-pro/references/configuration.md +0 -445
  811. package/skills/typescript-pro/references/patterns.md +0 -484
  812. package/skills/typescript-pro/references/type-guards.md +0 -352
  813. package/skills/typescript-pro/references/utility-types.md +0 -329
  814. package/skills/using-git-worktrees/SKILL.md +0 -218
  815. package/skills/verification-before-completion/SKILL.md +0 -139
  816. package/skills/vue-expert/SKILL.md +0 -98
  817. package/skills/vue-expert/references/build-tooling.md +0 -480
  818. package/skills/vue-expert/references/components.md +0 -448
  819. package/skills/vue-expert/references/composition-api.md +0 -299
  820. package/skills/vue-expert/references/mobile-hybrid.md +0 -636
  821. package/skills/vue-expert/references/nuxt.md +0 -669
  822. package/skills/vue-expert/references/state-management.md +0 -449
  823. package/skills/vue-expert/references/typescript.md +0 -584
  824. package/skills/vue-expert-js/SKILL.md +0 -167
  825. package/skills/vue-expert-js/references/component-architecture.md +0 -219
  826. package/skills/vue-expert-js/references/composables-patterns.md +0 -183
  827. package/skills/vue-expert-js/references/jsdoc-typing.md +0 -535
  828. package/skills/vue-expert-js/references/state-management.md +0 -249
  829. package/skills/vue-expert-js/references/testing-patterns.md +0 -237
  830. package/skills/websocket-engineer/SKILL.md +0 -168
  831. package/skills/websocket-engineer/references/alternatives.md +0 -391
  832. package/skills/websocket-engineer/references/patterns.md +0 -400
  833. package/skills/websocket-engineer/references/protocol.md +0 -195
  834. package/skills/websocket-engineer/references/scaling.md +0 -333
  835. package/skills/websocket-engineer/references/security.md +0 -474
  836. package/skills/writing-plans/SKILL.md +0 -151
  837. package/skills/writing-plans/plan-document-reviewer-prompt.md +0 -49
  838. package/skills/writing-skills/SKILL.md +0 -655
  839. package/skills/writing-skills/anthropic-best-practices.md +0 -1150
  840. package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +0 -189
  841. package/skills/writing-skills/graphviz-conventions.dot +0 -172
  842. package/skills/writing-skills/persuasion-principles.md +0 -187
  843. package/skills/writing-skills/render-graphs.js +0 -168
  844. package/skills/writing-skills/testing-skills-with-subagents.md +0 -384
  845. /package/skills/{design-commands → frontend/design-commands}/design.md +0 -0
  846. /package/skills/{design-commands → frontend/design-commands}/handoff.md +0 -0
  847. /package/skills/{design-commands → frontend/design-commands}/prototype.md +0 -0
  848. /package/skills/{design-commands → frontend/design-commands}/spec.md +0 -0
  849. /package/skills/{design-commands → frontend/design-commands}/style.md +0 -0
  850. /package/skills/{senior-frontend → frontend/senior-frontend}/SKILL.md +0 -0
  851. /package/skills/{senior-frontend → frontend/senior-frontend}/references/frontend_best_practices.md +0 -0
  852. /package/skills/{senior-frontend → frontend/senior-frontend}/references/nextjs_optimization_guide.md +0 -0
  853. /package/skills/{senior-frontend → frontend/senior-frontend}/references/react_patterns.md +0 -0
  854. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/bundle_analyzer.py +0 -0
  855. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/component_generator.py +0 -0
  856. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/frontend_scaffolder.py +0 -0
  857. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/SKILL.md +0 -0
  858. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/charts.csv +0 -0
  859. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/colors.csv +0 -0
  860. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/icons.csv +0 -0
  861. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/landing.csv +0 -0
  862. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/products.csv +0 -0
  863. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/react-performance.csv +0 -0
  864. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/astro.csv +0 -0
  865. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/flutter.csv +0 -0
  866. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/html-tailwind.csv +0 -0
  867. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/jetpack-compose.csv +0 -0
  868. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nextjs.csv +0 -0
  869. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxt-ui.csv +0 -0
  870. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxtjs.csv +0 -0
  871. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react-native.csv +0 -0
  872. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react.csv +0 -0
  873. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/shadcn.csv +0 -0
  874. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/svelte.csv +0 -0
  875. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/swiftui.csv +0 -0
  876. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/vue.csv +0 -0
  877. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/styles.csv +0 -0
  878. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/typography.csv +0 -0
  879. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ui-reasoning.csv +0 -0
  880. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ux-guidelines.csv +0 -0
  881. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/web-interface.csv +0 -0
  882. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/core.py +0 -0
  883. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/design_system.py +0 -0
  884. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/search.py +0 -0
  885. /package/skills/{competitive-analysis → product/competitive-analysis}/SKILL.md +0 -0
  886. /package/skills/{meeting-notes → product/meeting-notes}/SKILL.md +0 -0
  887. /package/skills/{prd-template → product/prd-template}/SKILL.md +0 -0
  888. /package/skills/{stakeholder-update → product/stakeholder-update}/SKILL.md +0 -0
  889. /package/skills/{user-research-synthesis → product/user-research-synthesis}/SKILL.md +0 -0
  890. /package/skills/{senior-qa → quality/senior-qa}/README.md +0 -0
  891. /package/skills/{senior-qa → quality/senior-qa}/SKILL.md +0 -0
  892. /package/skills/{senior-qa → quality/senior-qa}/references/qa_best_practices.md +0 -0
  893. /package/skills/{senior-qa → quality/senior-qa}/references/test_automation_patterns.md +0 -0
  894. /package/skills/{senior-qa → quality/senior-qa}/references/testing_strategies.md +0 -0
  895. /package/skills/{senior-qa → quality/senior-qa}/scripts/coverage_analyzer.py +0 -0
  896. /package/skills/{senior-qa → quality/senior-qa}/scripts/e2e_test_scaffolder.py +0 -0
  897. /package/skills/{senior-qa → quality/senior-qa}/scripts/test_suite_generator.py +0 -0
  898. /package/skills/{tdd-guide → quality/tdd-guide}/HOW_TO_USE.md +0 -0
  899. /package/skills/{tdd-guide → quality/tdd-guide}/README.md +0 -0
  900. /package/skills/{tdd-guide → quality/tdd-guide}/SKILL.md +0 -0
  901. /package/skills/{tdd-guide → quality/tdd-guide}/assets/expected_output.json +0 -0
  902. /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_python.json +0 -0
  903. /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_typescript.json +0 -0
  904. /package/skills/{tdd-guide → quality/tdd-guide}/references/ci-integration.md +0 -0
  905. /package/skills/{tdd-guide → quality/tdd-guide}/references/framework-guide.md +0 -0
  906. /package/skills/{tdd-guide → quality/tdd-guide}/references/tdd-best-practices.md +0 -0
  907. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/coverage_analyzer.py +0 -0
  908. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/fixture_generator.py +0 -0
  909. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/format_detector.py +0 -0
  910. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/framework_adapter.py +0 -0
  911. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/metrics_calculator.py +0 -0
  912. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/output_formatter.py +0 -0
  913. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/tdd_workflow.py +0 -0
  914. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/test_generator.py +0 -0
  915. /package/skills/{brainstorming → workflow/brainstorming}/scripts/frame-template.html +0 -0
  916. /package/skills/{brainstorming → workflow/brainstorming}/scripts/server.cjs +0 -0
@@ -1,978 +0,0 @@
1
- # Model Validation
2
-
3
- ---
4
-
5
- ## Overview
6
-
7
- Model validation ensures models meet quality standards before production deployment. It encompasses offline evaluation, online testing, and continuous monitoring to catch performance degradation, data drift, and model failures.
8
-
9
- ## When to Use This Reference
10
-
11
- - Implementing offline model evaluation strategies
12
- - Setting up A/B testing frameworks
13
- - Building shadow deployment pipelines
14
- - Creating model comparison workflows
15
- - Implementing continuous model monitoring
16
-
17
- ## When NOT to Use
18
-
19
- - Quick model prototyping
20
- - One-off analysis without deployment
21
- - Models with no production requirements
22
-
23
- ---
24
-
25
- ## Offline Evaluation
26
-
27
- ### Comprehensive Evaluation Suite
28
-
29
- ```python
30
- from dataclasses import dataclass
31
- from typing import Optional
32
- import numpy as np
33
- import pandas as pd
34
- from sklearn.metrics import (
35
- accuracy_score, precision_score, recall_score, f1_score,
36
- roc_auc_score, average_precision_score, confusion_matrix,
37
- mean_squared_error, mean_absolute_error, r2_score,
38
- )
39
-
40
- @dataclass
41
- class ClassificationMetrics:
42
- """Classification model metrics."""
43
- accuracy: float
44
- precision: float
45
- recall: float
46
- f1: float
47
- roc_auc: Optional[float]
48
- pr_auc: Optional[float]
49
- confusion_matrix: np.ndarray
50
-
51
- def to_dict(self) -> dict:
52
- return {
53
- "accuracy": self.accuracy,
54
- "precision": self.precision,
55
- "recall": self.recall,
56
- "f1": self.f1,
57
- "roc_auc": self.roc_auc,
58
- "pr_auc": self.pr_auc,
59
- }
60
-
61
- @dataclass
62
- class RegressionMetrics:
63
- """Regression model metrics."""
64
- mse: float
65
- rmse: float
66
- mae: float
67
- r2: float
68
- mape: Optional[float]
69
-
70
- def to_dict(self) -> dict:
71
- return {
72
- "mse": self.mse,
73
- "rmse": self.rmse,
74
- "mae": self.mae,
75
- "r2": self.r2,
76
- "mape": self.mape,
77
- }
78
-
79
- class ModelEvaluator:
80
- """Comprehensive model evaluation."""
81
-
82
- def __init__(self, task_type: str = "classification"):
83
- self.task_type = task_type
84
-
85
- def evaluate_classification(
86
- self,
87
- y_true: np.ndarray,
88
- y_pred: np.ndarray,
89
- y_prob: Optional[np.ndarray] = None,
90
- average: str = "weighted",
91
- ) -> ClassificationMetrics:
92
- """Evaluate classification model."""
93
- roc_auc = None
94
- pr_auc = None
95
-
96
- if y_prob is not None:
97
- if len(np.unique(y_true)) == 2:
98
- # Binary classification
99
- if y_prob.ndim == 2:
100
- y_prob_pos = y_prob[:, 1]
101
- else:
102
- y_prob_pos = y_prob
103
- roc_auc = roc_auc_score(y_true, y_prob_pos)
104
- pr_auc = average_precision_score(y_true, y_prob_pos)
105
- else:
106
- # Multiclass
107
- roc_auc = roc_auc_score(
108
- y_true, y_prob, multi_class="ovr", average=average
109
- )
110
-
111
- return ClassificationMetrics(
112
- accuracy=accuracy_score(y_true, y_pred),
113
- precision=precision_score(y_true, y_pred, average=average, zero_division=0),
114
- recall=recall_score(y_true, y_pred, average=average, zero_division=0),
115
- f1=f1_score(y_true, y_pred, average=average, zero_division=0),
116
- roc_auc=roc_auc,
117
- pr_auc=pr_auc,
118
- confusion_matrix=confusion_matrix(y_true, y_pred),
119
- )
120
-
121
- def evaluate_regression(
122
- self,
123
- y_true: np.ndarray,
124
- y_pred: np.ndarray,
125
- ) -> RegressionMetrics:
126
- """Evaluate regression model."""
127
- mse = mean_squared_error(y_true, y_pred)
128
-
129
- # MAPE (handle zero values)
130
- mask = y_true != 0
131
- if mask.any():
132
- mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
133
- else:
134
- mape = None
135
-
136
- return RegressionMetrics(
137
- mse=mse,
138
- rmse=np.sqrt(mse),
139
- mae=mean_absolute_error(y_true, y_pred),
140
- r2=r2_score(y_true, y_pred),
141
- mape=mape,
142
- )
143
-
144
- def evaluate_by_segment(
145
- self,
146
- y_true: np.ndarray,
147
- y_pred: np.ndarray,
148
- segments: np.ndarray,
149
- y_prob: Optional[np.ndarray] = None,
150
- ) -> dict:
151
- """Evaluate model performance by segment."""
152
- results = {}
153
-
154
- for segment in np.unique(segments):
155
- mask = segments == segment
156
-
157
- if self.task_type == "classification":
158
- segment_prob = y_prob[mask] if y_prob is not None else None
159
- metrics = self.evaluate_classification(
160
- y_true[mask], y_pred[mask], segment_prob
161
- )
162
- else:
163
- metrics = self.evaluate_regression(y_true[mask], y_pred[mask])
164
-
165
- results[segment] = metrics.to_dict()
166
-
167
- return results
168
- ```
169
-
170
- ### Cross-Validation Framework
171
-
172
- ```python
173
- from sklearn.model_selection import (
174
- KFold, StratifiedKFold, TimeSeriesSplit, cross_val_score
175
- )
176
- import numpy as np
177
- from typing import Callable
178
-
179
- class CrossValidator:
180
- """Cross-validation framework for model evaluation."""
181
-
182
- def __init__(
183
- self,
184
- n_splits: int = 5,
185
- shuffle: bool = True,
186
- random_state: int = 42,
187
- ):
188
- self.n_splits = n_splits
189
- self.shuffle = shuffle
190
- self.random_state = random_state
191
-
192
- def validate_classification(
193
- self,
194
- model,
195
- X: np.ndarray,
196
- y: np.ndarray,
197
- stratified: bool = True,
198
- ) -> dict:
199
- """Run stratified k-fold cross-validation for classification."""
200
- if stratified:
201
- cv = StratifiedKFold(
202
- n_splits=self.n_splits,
203
- shuffle=self.shuffle,
204
- random_state=self.random_state,
205
- )
206
- else:
207
- cv = KFold(
208
- n_splits=self.n_splits,
209
- shuffle=self.shuffle,
210
- random_state=self.random_state,
211
- )
212
-
213
- evaluator = ModelEvaluator("classification")
214
- fold_metrics = []
215
-
216
- for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
217
- X_train, X_val = X[train_idx], X[val_idx]
218
- y_train, y_val = y[train_idx], y[val_idx]
219
-
220
- # Clone and train model
221
- from sklearn.base import clone
222
- fold_model = clone(model)
223
- fold_model.fit(X_train, y_train)
224
-
225
- y_pred = fold_model.predict(X_val)
226
- y_prob = None
227
- if hasattr(fold_model, "predict_proba"):
228
- y_prob = fold_model.predict_proba(X_val)
229
-
230
- metrics = evaluator.evaluate_classification(y_val, y_pred, y_prob)
231
- fold_metrics.append(metrics.to_dict())
232
-
233
- return self._aggregate_cv_results(fold_metrics)
234
-
235
- def validate_time_series(
236
- self,
237
- model,
238
- X: np.ndarray,
239
- y: np.ndarray,
240
- gap: int = 0,
241
- ) -> dict:
242
- """Run time series cross-validation."""
243
- cv = TimeSeriesSplit(n_splits=self.n_splits, gap=gap)
244
- evaluator = ModelEvaluator("regression")
245
- fold_metrics = []
246
-
247
- for train_idx, val_idx in cv.split(X):
248
- X_train, X_val = X[train_idx], X[val_idx]
249
- y_train, y_val = y[train_idx], y[val_idx]
250
-
251
- from sklearn.base import clone
252
- fold_model = clone(model)
253
- fold_model.fit(X_train, y_train)
254
-
255
- y_pred = fold_model.predict(X_val)
256
- metrics = evaluator.evaluate_regression(y_val, y_pred)
257
- fold_metrics.append(metrics.to_dict())
258
-
259
- return self._aggregate_cv_results(fold_metrics)
260
-
261
- def _aggregate_cv_results(self, fold_metrics: list[dict]) -> dict:
262
- """Aggregate metrics across folds."""
263
- keys = fold_metrics[0].keys()
264
- aggregated = {}
265
-
266
- for key in keys:
267
- values = [m[key] for m in fold_metrics if m[key] is not None]
268
- if values:
269
- aggregated[key] = {
270
- "mean": np.mean(values),
271
- "std": np.std(values),
272
- "min": np.min(values),
273
- "max": np.max(values),
274
- "values": values,
275
- }
276
-
277
- return aggregated
278
- ```
279
-
280
- ---
281
-
282
- ## Model Comparison
283
-
284
- ### Statistical Comparison
285
-
286
- ```python
287
- from scipy import stats
288
- import numpy as np
289
- from dataclasses import dataclass
290
-
291
- @dataclass
292
- class ComparisonResult:
293
- """Model comparison statistical result."""
294
- model_a_mean: float
295
- model_b_mean: float
296
- difference: float
297
- p_value: float
298
- significant: bool
299
- confidence_interval: tuple[float, float]
300
- test_used: str
301
-
302
- class ModelComparator:
303
- """Statistical comparison of model performance."""
304
-
305
- def __init__(self, significance_level: float = 0.05):
306
- self.significance_level = significance_level
307
-
308
- def paired_t_test(
309
- self,
310
- scores_a: np.ndarray,
311
- scores_b: np.ndarray,
312
- ) -> ComparisonResult:
313
- """Paired t-test for CV score comparison."""
314
- statistic, p_value = stats.ttest_rel(scores_a, scores_b)
315
-
316
- differences = scores_a - scores_b
317
- mean_diff = np.mean(differences)
318
- std_diff = np.std(differences, ddof=1)
319
- n = len(differences)
320
-
321
- # 95% confidence interval
322
- t_critical = stats.t.ppf(1 - self.significance_level / 2, n - 1)
323
- margin = t_critical * std_diff / np.sqrt(n)
324
- ci = (mean_diff - margin, mean_diff + margin)
325
-
326
- return ComparisonResult(
327
- model_a_mean=np.mean(scores_a),
328
- model_b_mean=np.mean(scores_b),
329
- difference=mean_diff,
330
- p_value=p_value,
331
- significant=p_value < self.significance_level,
332
- confidence_interval=ci,
333
- test_used="paired_t_test",
334
- )
335
-
336
- def wilcoxon_test(
337
- self,
338
- scores_a: np.ndarray,
339
- scores_b: np.ndarray,
340
- ) -> ComparisonResult:
341
- """Wilcoxon signed-rank test (non-parametric)."""
342
- statistic, p_value = stats.wilcoxon(scores_a, scores_b)
343
-
344
- differences = scores_a - scores_b
345
- mean_diff = np.mean(differences)
346
-
347
- # Bootstrap confidence interval
348
- ci = self._bootstrap_ci(differences)
349
-
350
- return ComparisonResult(
351
- model_a_mean=np.mean(scores_a),
352
- model_b_mean=np.mean(scores_b),
353
- difference=mean_diff,
354
- p_value=p_value,
355
- significant=p_value < self.significance_level,
356
- confidence_interval=ci,
357
- test_used="wilcoxon",
358
- )
359
-
360
- def mcnemar_test(
361
- self,
362
- y_true: np.ndarray,
363
- pred_a: np.ndarray,
364
- pred_b: np.ndarray,
365
- ) -> ComparisonResult:
366
- """McNemar's test for classifier comparison."""
367
- # Build contingency table
368
- correct_a = (pred_a == y_true)
369
- correct_b = (pred_b == y_true)
370
-
371
- # b: A correct, B wrong; c: A wrong, B correct
372
- b = np.sum(correct_a & ~correct_b)
373
- c = np.sum(~correct_a & correct_b)
374
-
375
- if b + c < 25:
376
- # Use exact binomial test for small samples
377
- p_value = stats.binom_test(b, b + c, 0.5)
378
- else:
379
- # Use chi-square approximation
380
- statistic = (abs(b - c) - 1) ** 2 / (b + c)
381
- p_value = 1 - stats.chi2.cdf(statistic, 1)
382
-
383
- acc_a = np.mean(correct_a)
384
- acc_b = np.mean(correct_b)
385
-
386
- return ComparisonResult(
387
- model_a_mean=acc_a,
388
- model_b_mean=acc_b,
389
- difference=acc_a - acc_b,
390
- p_value=p_value,
391
- significant=p_value < self.significance_level,
392
- confidence_interval=(None, None),
393
- test_used="mcnemar",
394
- )
395
-
396
- def _bootstrap_ci(
397
- self,
398
- data: np.ndarray,
399
- n_bootstrap: int = 10000,
400
- alpha: float = 0.05,
401
- ) -> tuple[float, float]:
402
- """Calculate bootstrap confidence interval."""
403
- bootstrapped_means = []
404
-
405
- for _ in range(n_bootstrap):
406
- sample = np.random.choice(data, size=len(data), replace=True)
407
- bootstrapped_means.append(np.mean(sample))
408
-
409
- lower = np.percentile(bootstrapped_means, alpha / 2 * 100)
410
- upper = np.percentile(bootstrapped_means, (1 - alpha / 2) * 100)
411
-
412
- return (lower, upper)
413
- ```
414
-
415
- ---
416
-
417
- ## A/B Testing
418
-
419
- ### Online Experiment Framework
420
-
421
- ```python
422
- from dataclasses import dataclass
423
- from datetime import datetime
424
- from typing import Optional
425
- import numpy as np
426
- import hashlib
427
- import json
428
-
429
- @dataclass
430
- class Experiment:
431
- """A/B test experiment configuration."""
432
- experiment_id: str
433
- name: str
434
- control_model: str
435
- treatment_model: str
436
- traffic_split: float # Fraction to treatment
437
- start_time: datetime
438
- end_time: Optional[datetime]
439
- metrics: list[str]
440
- minimum_sample_size: int
441
- status: str = "active"
442
-
443
- class ABTestRouter:
444
- """Route traffic between control and treatment."""
445
-
446
- def __init__(self, experiment: Experiment):
447
- self.experiment = experiment
448
-
449
- def get_variant(self, user_id: str) -> str:
450
- """Deterministically assign user to variant."""
451
- # Hash user_id for consistent assignment
452
- hash_input = f"{self.experiment.experiment_id}:{user_id}"
453
- hash_value = int(hashlib.md5(hash_input.encode()).hexdigest(), 16)
454
- normalized = hash_value / (2**128)
455
-
456
- if normalized < self.experiment.traffic_split:
457
- return "treatment"
458
- return "control"
459
-
460
- def get_model(self, user_id: str) -> str:
461
- """Get model to use for user."""
462
- variant = self.get_variant(user_id)
463
-
464
- if variant == "treatment":
465
- return self.experiment.treatment_model
466
- return self.experiment.control_model
467
-
468
- class ABTestAnalyzer:
469
- """Analyze A/B test results."""
470
-
471
- def __init__(self, significance_level: float = 0.05):
472
- self.significance_level = significance_level
473
-
474
- def analyze_conversion(
475
- self,
476
- control_conversions: int,
477
- control_total: int,
478
- treatment_conversions: int,
479
- treatment_total: int,
480
- ) -> dict:
481
- """Analyze conversion rate experiment."""
482
- control_rate = control_conversions / control_total
483
- treatment_rate = treatment_conversions / treatment_total
484
-
485
- # Two-proportion z-test
486
- pooled_rate = (control_conversions + treatment_conversions) / (
487
- control_total + treatment_total
488
- )
489
- se = np.sqrt(
490
- pooled_rate * (1 - pooled_rate) * (1/control_total + 1/treatment_total)
491
- )
492
-
493
- z_stat = (treatment_rate - control_rate) / se
494
- p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
495
-
496
- # Relative lift
497
- lift = (treatment_rate - control_rate) / control_rate if control_rate > 0 else 0
498
-
499
- # Confidence interval for difference
500
- se_diff = np.sqrt(
501
- control_rate * (1 - control_rate) / control_total +
502
- treatment_rate * (1 - treatment_rate) / treatment_total
503
- )
504
- z_critical = stats.norm.ppf(1 - self.significance_level / 2)
505
- ci = (
506
- (treatment_rate - control_rate) - z_critical * se_diff,
507
- (treatment_rate - control_rate) + z_critical * se_diff,
508
- )
509
-
510
- return {
511
- "control_rate": control_rate,
512
- "treatment_rate": treatment_rate,
513
- "absolute_difference": treatment_rate - control_rate,
514
- "relative_lift": lift,
515
- "p_value": p_value,
516
- "significant": p_value < self.significance_level,
517
- "confidence_interval": ci,
518
- "control_sample_size": control_total,
519
- "treatment_sample_size": treatment_total,
520
- }
521
-
522
- def analyze_continuous_metric(
523
- self,
524
- control_values: np.ndarray,
525
- treatment_values: np.ndarray,
526
- ) -> dict:
527
- """Analyze continuous metric (e.g., revenue, time)."""
528
- control_mean = np.mean(control_values)
529
- treatment_mean = np.mean(treatment_values)
530
-
531
- # Welch's t-test (unequal variances)
532
- statistic, p_value = stats.ttest_ind(
533
- treatment_values, control_values, equal_var=False
534
- )
535
-
536
- lift = (treatment_mean - control_mean) / control_mean if control_mean > 0 else 0
537
-
538
- # Confidence interval
539
- se_diff = np.sqrt(
540
- np.var(control_values) / len(control_values) +
541
- np.var(treatment_values) / len(treatment_values)
542
- )
543
- t_critical = stats.t.ppf(
544
- 1 - self.significance_level / 2,
545
- min(len(control_values), len(treatment_values)) - 1
546
- )
547
- ci = (
548
- (treatment_mean - control_mean) - t_critical * se_diff,
549
- (treatment_mean - control_mean) + t_critical * se_diff,
550
- )
551
-
552
- return {
553
- "control_mean": control_mean,
554
- "treatment_mean": treatment_mean,
555
- "absolute_difference": treatment_mean - control_mean,
556
- "relative_lift": lift,
557
- "p_value": p_value,
558
- "significant": p_value < self.significance_level,
559
- "confidence_interval": ci,
560
- "control_sample_size": len(control_values),
561
- "treatment_sample_size": len(treatment_values),
562
- }
563
-
564
- def calculate_sample_size(
565
- self,
566
- baseline_rate: float,
567
- minimum_detectable_effect: float,
568
- power: float = 0.8,
569
- ) -> int:
570
- """Calculate required sample size per variant."""
571
- alpha = self.significance_level
572
- z_alpha = stats.norm.ppf(1 - alpha / 2)
573
- z_beta = stats.norm.ppf(power)
574
-
575
- p1 = baseline_rate
576
- p2 = baseline_rate * (1 + minimum_detectable_effect)
577
-
578
- p_bar = (p1 + p2) / 2
579
-
580
- n = (
581
- (z_alpha * np.sqrt(2 * p_bar * (1 - p_bar)) +
582
- z_beta * np.sqrt(p1 * (1 - p1) + p2 * (1 - p2))) ** 2 /
583
- (p2 - p1) ** 2
584
- )
585
-
586
- return int(np.ceil(n))
587
- ```
588
-
589
- ---
590
-
591
- ## Shadow Deployment
592
-
593
- ### Shadow Mode Pipeline
594
-
595
- ```python
596
- from dataclasses import dataclass
597
- from datetime import datetime
598
- from typing import Any, Optional
599
- import logging
600
- import json
601
-
602
- logger = logging.getLogger(__name__)
603
-
604
- @dataclass
605
- class PredictionComparison:
606
- """Comparison of production and shadow predictions."""
607
- request_id: str
608
- timestamp: datetime
609
- production_prediction: Any
610
- shadow_prediction: Any
611
- production_latency_ms: float
612
- shadow_latency_ms: float
613
- agreement: bool
614
- features: Optional[dict] = None
615
-
616
- class ShadowDeployment:
617
- """Shadow deployment for model validation."""
618
-
619
- def __init__(
620
- self,
621
- production_model,
622
- shadow_model,
623
- log_path: str = "/var/log/shadow_predictions.jsonl",
624
- ):
625
- self.production_model = production_model
626
- self.shadow_model = shadow_model
627
- self.log_path = log_path
628
- self.comparisons: list[PredictionComparison] = []
629
-
630
- def predict(
631
- self,
632
- features: dict,
633
- request_id: str = None,
634
- ) -> Any:
635
- """Get production prediction, run shadow in parallel."""
636
- import time
637
- import uuid
638
- import concurrent.futures
639
-
640
- request_id = request_id or str(uuid.uuid4())
641
-
642
- # Production prediction (synchronous, used for response)
643
- prod_start = time.time()
644
- production_pred = self.production_model.predict(features)
645
- prod_latency = (time.time() - prod_start) * 1000
646
-
647
- # Shadow prediction (async, logged but not returned)
648
- def run_shadow():
649
- shadow_start = time.time()
650
- shadow_pred = self.shadow_model.predict(features)
651
- shadow_latency = (time.time() - shadow_start) * 1000
652
- return shadow_pred, shadow_latency
653
-
654
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
655
- future = executor.submit(run_shadow)
656
-
657
- try:
658
- shadow_pred, shadow_latency = future.result(timeout=5.0)
659
-
660
- comparison = PredictionComparison(
661
- request_id=request_id,
662
- timestamp=datetime.utcnow(),
663
- production_prediction=production_pred,
664
- shadow_prediction=shadow_pred,
665
- production_latency_ms=prod_latency,
666
- shadow_latency_ms=shadow_latency,
667
- agreement=self._check_agreement(production_pred, shadow_pred),
668
- features=features,
669
- )
670
-
671
- self._log_comparison(comparison)
672
-
673
- except concurrent.futures.TimeoutError:
674
- logger.warning(f"Shadow prediction timed out for {request_id}")
675
-
676
- return production_pred
677
-
678
- def _check_agreement(self, prod_pred: Any, shadow_pred: Any) -> bool:
679
- """Check if predictions agree."""
680
- if isinstance(prod_pred, (list, np.ndarray)):
681
- return np.allclose(prod_pred, shadow_pred, rtol=1e-3)
682
- return prod_pred == shadow_pred
683
-
684
- def _log_comparison(self, comparison: PredictionComparison) -> None:
685
- """Log comparison to file."""
686
- log_entry = {
687
- "request_id": comparison.request_id,
688
- "timestamp": comparison.timestamp.isoformat(),
689
- "production_prediction": str(comparison.production_prediction),
690
- "shadow_prediction": str(comparison.shadow_prediction),
691
- "production_latency_ms": comparison.production_latency_ms,
692
- "shadow_latency_ms": comparison.shadow_latency_ms,
693
- "agreement": comparison.agreement,
694
- }
695
-
696
- with open(self.log_path, "a") as f:
697
- f.write(json.dumps(log_entry) + "\n")
698
-
699
- self.comparisons.append(comparison)
700
-
701
- def analyze_shadow_performance(self) -> dict:
702
- """Analyze shadow model performance."""
703
- if not self.comparisons:
704
- return {}
705
-
706
- agreements = [c.agreement for c in self.comparisons]
707
- prod_latencies = [c.production_latency_ms for c in self.comparisons]
708
- shadow_latencies = [c.shadow_latency_ms for c in self.comparisons]
709
-
710
- return {
711
- "total_comparisons": len(self.comparisons),
712
- "agreement_rate": np.mean(agreements),
713
- "production_latency_p50": np.percentile(prod_latencies, 50),
714
- "production_latency_p99": np.percentile(prod_latencies, 99),
715
- "shadow_latency_p50": np.percentile(shadow_latencies, 50),
716
- "shadow_latency_p99": np.percentile(shadow_latencies, 99),
717
- "latency_difference_mean": np.mean(
718
- [s - p for s, p in zip(shadow_latencies, prod_latencies)]
719
- ),
720
- }
721
- ```
722
-
723
- ---
724
-
725
- ## Validation Pipeline Integration
726
-
727
- ### Complete Validation Workflow
728
-
729
- ```python
730
- from enum import Enum
731
- from dataclasses import dataclass
732
- from typing import Optional
733
-
734
- class ValidationStatus(Enum):
735
- PASSED = "passed"
736
- FAILED = "failed"
737
- WARNING = "warning"
738
-
739
- @dataclass
740
- class ValidationResult:
741
- """Result of a validation check."""
742
- check_name: str
743
- status: ValidationStatus
744
- message: str
745
- details: Optional[dict] = None
746
-
747
- class ModelValidator:
748
- """Complete model validation workflow."""
749
-
750
- def __init__(
751
- self,
752
- accuracy_threshold: float = 0.8,
753
- latency_threshold_ms: float = 100,
754
- drift_threshold: float = 0.2,
755
- ):
756
- self.accuracy_threshold = accuracy_threshold
757
- self.latency_threshold_ms = latency_threshold_ms
758
- self.drift_threshold = drift_threshold
759
- self.results: list[ValidationResult] = []
760
-
761
- def validate_performance(
762
- self,
763
- y_true: np.ndarray,
764
- y_pred: np.ndarray,
765
- ) -> ValidationResult:
766
- """Validate model performance metrics."""
767
- evaluator = ModelEvaluator("classification")
768
- metrics = evaluator.evaluate_classification(y_true, y_pred)
769
-
770
- if metrics.accuracy >= self.accuracy_threshold:
771
- status = ValidationStatus.PASSED
772
- message = f"Accuracy {metrics.accuracy:.4f} meets threshold"
773
- else:
774
- status = ValidationStatus.FAILED
775
- message = f"Accuracy {metrics.accuracy:.4f} below threshold {self.accuracy_threshold}"
776
-
777
- result = ValidationResult(
778
- check_name="performance",
779
- status=status,
780
- message=message,
781
- details=metrics.to_dict(),
782
- )
783
- self.results.append(result)
784
- return result
785
-
786
- def validate_latency(
787
- self,
788
- model,
789
- sample_input: np.ndarray,
790
- n_iterations: int = 100,
791
- ) -> ValidationResult:
792
- """Validate inference latency."""
793
- import time
794
-
795
- latencies = []
796
- for _ in range(n_iterations):
797
- start = time.time()
798
- model.predict(sample_input)
799
- latencies.append((time.time() - start) * 1000)
800
-
801
- p50 = np.percentile(latencies, 50)
802
- p99 = np.percentile(latencies, 99)
803
-
804
- if p99 <= self.latency_threshold_ms:
805
- status = ValidationStatus.PASSED
806
- message = f"P99 latency {p99:.2f}ms meets threshold"
807
- elif p50 <= self.latency_threshold_ms:
808
- status = ValidationStatus.WARNING
809
- message = f"P50 OK but P99 {p99:.2f}ms exceeds threshold"
810
- else:
811
- status = ValidationStatus.FAILED
812
- message = f"P99 latency {p99:.2f}ms exceeds threshold"
813
-
814
- result = ValidationResult(
815
- check_name="latency",
816
- status=status,
817
- message=message,
818
- details={"p50_ms": p50, "p99_ms": p99, "mean_ms": np.mean(latencies)},
819
- )
820
- self.results.append(result)
821
- return result
822
-
823
- def validate_data_compatibility(
824
- self,
825
- model,
826
- expected_features: list[str],
827
- sample_data: pd.DataFrame,
828
- ) -> ValidationResult:
829
- """Validate model accepts expected input format."""
830
- missing_features = set(expected_features) - set(sample_data.columns)
831
- extra_features = set(sample_data.columns) - set(expected_features)
832
-
833
- if missing_features:
834
- status = ValidationStatus.FAILED
835
- message = f"Missing features: {missing_features}"
836
- elif extra_features:
837
- status = ValidationStatus.WARNING
838
- message = f"Extra features will be ignored: {extra_features}"
839
- else:
840
- status = ValidationStatus.PASSED
841
- message = "All expected features present"
842
-
843
- # Try inference
844
- try:
845
- model.predict(sample_data[expected_features].head(1))
846
- except Exception as e:
847
- status = ValidationStatus.FAILED
848
- message = f"Inference failed: {str(e)}"
849
-
850
- result = ValidationResult(
851
- check_name="data_compatibility",
852
- status=status,
853
- message=message,
854
- details={
855
- "missing_features": list(missing_features),
856
- "extra_features": list(extra_features),
857
- },
858
- )
859
- self.results.append(result)
860
- return result
861
-
862
- def validate_vs_baseline(
863
- self,
864
- y_true: np.ndarray,
865
- new_pred: np.ndarray,
866
- baseline_pred: np.ndarray,
867
- ) -> ValidationResult:
868
- """Validate new model vs baseline."""
869
- comparator = ModelComparator()
870
- comparison = comparator.mcnemar_test(y_true, new_pred, baseline_pred)
871
-
872
- new_acc = accuracy_score(y_true, new_pred)
873
- baseline_acc = accuracy_score(y_true, baseline_pred)
874
-
875
- if new_acc >= baseline_acc:
876
- if comparison.significant:
877
- status = ValidationStatus.PASSED
878
- message = f"Significant improvement: {new_acc:.4f} vs {baseline_acc:.4f}"
879
- else:
880
- status = ValidationStatus.WARNING
881
- message = f"Improvement not significant: {new_acc:.4f} vs {baseline_acc:.4f}"
882
- else:
883
- if comparison.significant:
884
- status = ValidationStatus.FAILED
885
- message = f"Significant regression: {new_acc:.4f} vs {baseline_acc:.4f}"
886
- else:
887
- status = ValidationStatus.WARNING
888
- message = f"Minor regression: {new_acc:.4f} vs {baseline_acc:.4f}"
889
-
890
- result = ValidationResult(
891
- check_name="baseline_comparison",
892
- status=status,
893
- message=message,
894
- details={
895
- "new_accuracy": new_acc,
896
- "baseline_accuracy": baseline_acc,
897
- "p_value": comparison.p_value,
898
- },
899
- )
900
- self.results.append(result)
901
- return result
902
-
903
- def get_summary(self) -> dict:
904
- """Get validation summary."""
905
- passed = sum(1 for r in self.results if r.status == ValidationStatus.PASSED)
906
- warnings = sum(1 for r in self.results if r.status == ValidationStatus.WARNING)
907
- failed = sum(1 for r in self.results if r.status == ValidationStatus.FAILED)
908
-
909
- overall_status = (
910
- ValidationStatus.FAILED if failed > 0
911
- else ValidationStatus.WARNING if warnings > 0
912
- else ValidationStatus.PASSED
913
- )
914
-
915
- return {
916
- "overall_status": overall_status.value,
917
- "passed": passed,
918
- "warnings": warnings,
919
- "failed": failed,
920
- "results": [
921
- {
922
- "check": r.check_name,
923
- "status": r.status.value,
924
- "message": r.message,
925
- }
926
- for r in self.results
927
- ],
928
- }
929
- ```
930
-
931
- ---
932
-
933
- ## Best Practices
934
-
935
- ### Validation Checklist
936
-
937
- ```python
938
- VALIDATION_CHECKLIST = {
939
- "offline": [
940
- "Accuracy/performance metrics meet threshold",
941
- "Cross-validation shows consistent performance",
942
- "Model outperforms or matches baseline",
943
- "Metrics stable across data segments",
944
- ],
945
- "pre_deployment": [
946
- "Inference latency within SLA",
947
- "Memory usage acceptable",
948
- "Input/output schema validated",
949
- "Model serialization/loading works",
950
- ],
951
- "shadow": [
952
- "Shadow predictions logged successfully",
953
- "Agreement rate with production acceptable",
954
- "No latency regression",
955
- "Error rate within bounds",
956
- ],
957
- "ab_test": [
958
- "Sufficient sample size reached",
959
- "Statistical significance achieved",
960
- "No negative impact on guardrail metrics",
961
- "Business metrics improved",
962
- ],
963
- }
964
- ```
965
-
966
- ---
967
-
968
- ## Related References
969
-
970
- - `training-pipelines.md` - Model training before validation
971
- - `experiment-tracking.md` - Logging validation results
972
- - `pipeline-orchestration.md` - Automated validation workflows
973
- - `feature-engineering.md` - Feature validation
974
-
975
- ## Cross-Reference Skills
976
-
977
- - **Data Engineer** - Data quality validation
978
- - **DevOps Engineer** - Deployment pipeline integration