aigroup-workflow 2.1.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (913) hide show
  1. package/.codex/AGENTS.md +1 -1
  2. package/CLAUDE.md +1 -4
  3. package/README.md +333 -333
  4. package/cli/commands/init.mjs +20 -6
  5. package/cli/utils/scaffold.mjs +39 -9
  6. package/docs/red-flags.md +1 -1
  7. package/docs/rules/entropy.md +1 -1
  8. package/docs/rules/performance.md +1 -1
  9. package/docs/workflow-pipeline.md +1 -0
  10. package/manifests/install-modules.json +223 -133
  11. package/package.json +39 -39
  12. package/scripts/orchestration/lib/orchestrator.cjs +34 -0
  13. package/scripts/orchestration/session.cjs +24 -1
  14. package/skills/ai-ml/fine-tuning-expert/SKILL.md +162 -0
  15. package/skills/ai-ml/fine-tuning-expert/references/dataset-preparation.md +540 -0
  16. package/skills/ai-ml/fine-tuning-expert/references/deployment-optimization.md +673 -0
  17. package/skills/ai-ml/fine-tuning-expert/references/evaluation-metrics.md +597 -0
  18. package/skills/ai-ml/fine-tuning-expert/references/hyperparameter-tuning.md +565 -0
  19. package/skills/ai-ml/fine-tuning-expert/references/lora-peft.md +347 -0
  20. package/skills/ai-ml/ml-pipeline/SKILL.md +159 -0
  21. package/skills/ai-ml/ml-pipeline/references/experiment-tracking.md +833 -0
  22. package/skills/ai-ml/ml-pipeline/references/feature-engineering.md +631 -0
  23. package/skills/ai-ml/ml-pipeline/references/model-validation.md +978 -0
  24. package/skills/ai-ml/ml-pipeline/references/pipeline-orchestration.md +907 -0
  25. package/skills/ai-ml/ml-pipeline/references/training-pipelines.md +782 -0
  26. package/skills/ai-ml/rag-architect/SKILL.md +194 -0
  27. package/skills/ai-ml/rag-architect/references/chunking-strategies.md +878 -0
  28. package/skills/ai-ml/rag-architect/references/embedding-models.md +561 -0
  29. package/skills/ai-ml/rag-architect/references/rag-evaluation.md +833 -0
  30. package/skills/ai-ml/rag-architect/references/retrieval-optimization.md +795 -0
  31. package/skills/ai-ml/rag-architect/references/vector-databases.md +589 -0
  32. package/skills/ai-ml/spark-engineer/SKILL.md +148 -0
  33. package/skills/ai-ml/spark-engineer/references/partitioning-caching.md +543 -0
  34. package/skills/ai-ml/spark-engineer/references/performance-tuning.md +544 -0
  35. package/skills/ai-ml/spark-engineer/references/rdd-operations.md +599 -0
  36. package/skills/ai-ml/spark-engineer/references/spark-sql-dataframes.md +474 -0
  37. package/skills/ai-ml/spark-engineer/references/streaming-patterns.md +786 -0
  38. package/skills/backend/api-designer/SKILL.md +217 -0
  39. package/skills/backend/api-designer/references/error-handling.md +541 -0
  40. package/skills/backend/api-designer/references/openapi.md +824 -0
  41. package/skills/backend/api-designer/references/pagination.md +494 -0
  42. package/skills/backend/api-designer/references/rest-patterns.md +335 -0
  43. package/skills/backend/api-designer/references/versioning.md +391 -0
  44. package/skills/backend/architecture-designer/SKILL.md +117 -0
  45. package/skills/backend/architecture-designer/references/adr-template.md +116 -0
  46. package/skills/backend/architecture-designer/references/architecture-patterns.md +111 -0
  47. package/skills/backend/architecture-designer/references/database-selection.md +102 -0
  48. package/skills/backend/architecture-designer/references/nfr-checklist.md +112 -0
  49. package/skills/backend/architecture-designer/references/system-design.md +100 -0
  50. package/skills/backend/code-documenter/SKILL.md +147 -0
  51. package/skills/backend/code-documenter/references/api-docs-fastapi-django.md +166 -0
  52. package/skills/backend/code-documenter/references/api-docs-nestjs-express.md +220 -0
  53. package/skills/backend/code-documenter/references/coverage-reports.md +125 -0
  54. package/skills/backend/code-documenter/references/documentation-systems.md +333 -0
  55. package/skills/backend/code-documenter/references/interactive-api-docs.md +531 -0
  56. package/skills/backend/code-documenter/references/python-docstrings.md +121 -0
  57. package/skills/backend/code-documenter/references/typescript-jsdoc.md +145 -0
  58. package/skills/backend/code-documenter/references/user-guides-tutorials.md +530 -0
  59. package/skills/backend/debugging-wizard/SKILL.md +105 -0
  60. package/skills/backend/debugging-wizard/references/common-patterns.md +132 -0
  61. package/skills/backend/debugging-wizard/references/debugging-tools.md +140 -0
  62. package/skills/backend/debugging-wizard/references/quick-fixes.md +177 -0
  63. package/skills/backend/debugging-wizard/references/strategies.md +142 -0
  64. package/skills/backend/debugging-wizard/references/systematic-debugging.md +367 -0
  65. package/skills/backend/feature-forge/SKILL.md +98 -0
  66. package/skills/backend/feature-forge/references/acceptance-criteria.md +104 -0
  67. package/skills/backend/feature-forge/references/ears-syntax.md +99 -0
  68. package/skills/backend/feature-forge/references/interview-questions.md +150 -0
  69. package/skills/backend/feature-forge/references/pre-discovery-subagents.md +54 -0
  70. package/skills/backend/feature-forge/references/specification-template.md +103 -0
  71. package/skills/backend/fullstack-guardian/SKILL.md +105 -0
  72. package/skills/backend/fullstack-guardian/references/api-design-standards.md +307 -0
  73. package/skills/backend/fullstack-guardian/references/architecture-decisions.md +350 -0
  74. package/skills/backend/fullstack-guardian/references/backend-patterns.md +237 -0
  75. package/skills/backend/fullstack-guardian/references/common-patterns.md +134 -0
  76. package/skills/backend/fullstack-guardian/references/deliverables-checklist.md +354 -0
  77. package/skills/backend/fullstack-guardian/references/design-template.md +91 -0
  78. package/skills/backend/fullstack-guardian/references/error-handling.md +135 -0
  79. package/skills/backend/fullstack-guardian/references/frontend-patterns.md +340 -0
  80. package/skills/backend/fullstack-guardian/references/integration-patterns.md +333 -0
  81. package/skills/backend/fullstack-guardian/references/security-checklist.md +106 -0
  82. package/skills/backend/graphql-architect/SKILL.md +146 -0
  83. package/skills/backend/graphql-architect/references/federation.md +418 -0
  84. package/skills/backend/graphql-architect/references/migration-from-rest.md +1141 -0
  85. package/skills/backend/graphql-architect/references/resolvers.md +425 -0
  86. package/skills/backend/graphql-architect/references/schema-design.md +393 -0
  87. package/skills/backend/graphql-architect/references/security.md +569 -0
  88. package/skills/backend/graphql-architect/references/subscriptions.md +510 -0
  89. package/skills/backend/legacy-modernizer/SKILL.md +137 -0
  90. package/skills/backend/legacy-modernizer/references/legacy-testing.md +381 -0
  91. package/skills/backend/legacy-modernizer/references/migration-strategies.md +423 -0
  92. package/skills/backend/legacy-modernizer/references/refactoring-patterns.md +395 -0
  93. package/skills/backend/legacy-modernizer/references/strangler-fig-pattern.md +281 -0
  94. package/skills/backend/legacy-modernizer/references/system-assessment.md +487 -0
  95. package/skills/backend/microservices-architect/SKILL.md +164 -0
  96. package/skills/backend/microservices-architect/references/communication.md +499 -0
  97. package/skills/backend/microservices-architect/references/data.md +721 -0
  98. package/skills/backend/microservices-architect/references/decomposition.md +344 -0
  99. package/skills/backend/microservices-architect/references/observability.md +805 -0
  100. package/skills/backend/microservices-architect/references/patterns.md +603 -0
  101. package/skills/database/database-optimizer/SKILL.md +147 -0
  102. package/skills/database/database-optimizer/references/index-strategies.md +331 -0
  103. package/skills/database/database-optimizer/references/monitoring-analysis.md +501 -0
  104. package/skills/database/database-optimizer/references/mysql-tuning.md +452 -0
  105. package/skills/database/database-optimizer/references/postgresql-tuning.md +413 -0
  106. package/skills/database/database-optimizer/references/query-optimization.md +251 -0
  107. package/skills/database/postgres-pro/SKILL.md +152 -0
  108. package/skills/database/postgres-pro/references/extensions.md +404 -0
  109. package/skills/database/postgres-pro/references/jsonb.md +321 -0
  110. package/skills/database/postgres-pro/references/maintenance.md +481 -0
  111. package/skills/database/postgres-pro/references/performance.md +265 -0
  112. package/skills/database/postgres-pro/references/replication.md +446 -0
  113. package/skills/database/sql-pro/SKILL.md +129 -0
  114. package/skills/database/sql-pro/references/database-design.md +402 -0
  115. package/skills/database/sql-pro/references/dialect-differences.md +419 -0
  116. package/skills/database/sql-pro/references/optimization.md +384 -0
  117. package/skills/database/sql-pro/references/query-patterns.md +285 -0
  118. package/skills/database/sql-pro/references/window-functions.md +328 -0
  119. package/skills/dotnet/csharp-developer/SKILL.md +125 -0
  120. package/skills/dotnet/csharp-developer/references/aspnet-core.md +394 -0
  121. package/skills/dotnet/csharp-developer/references/blazor.md +553 -0
  122. package/skills/dotnet/csharp-developer/references/entity-framework.md +409 -0
  123. package/skills/dotnet/csharp-developer/references/modern-csharp.md +248 -0
  124. package/skills/dotnet/csharp-developer/references/performance.md +498 -0
  125. package/skills/dotnet/dotnet-core-expert/SKILL.md +138 -0
  126. package/skills/dotnet/dotnet-core-expert/references/authentication.md +546 -0
  127. package/skills/dotnet/dotnet-core-expert/references/clean-architecture.md +455 -0
  128. package/skills/dotnet/dotnet-core-expert/references/cloud-native.md +548 -0
  129. package/skills/dotnet/dotnet-core-expert/references/entity-framework.md +440 -0
  130. package/skills/dotnet/dotnet-core-expert/references/minimal-apis.md +319 -0
  131. package/skills/frontend/angular-architect/SKILL.md +152 -0
  132. package/skills/frontend/angular-architect/references/components.md +297 -0
  133. package/skills/frontend/angular-architect/references/ngrx.md +401 -0
  134. package/skills/frontend/angular-architect/references/routing.md +361 -0
  135. package/skills/frontend/angular-architect/references/rxjs.md +319 -0
  136. package/skills/frontend/angular-architect/references/testing.md +405 -0
  137. package/skills/frontend/flutter-expert/SKILL.md +138 -0
  138. package/skills/frontend/flutter-expert/references/bloc-state.md +259 -0
  139. package/skills/frontend/flutter-expert/references/gorouter-navigation.md +119 -0
  140. package/skills/frontend/flutter-expert/references/performance.md +99 -0
  141. package/skills/frontend/flutter-expert/references/project-structure.md +118 -0
  142. package/skills/frontend/flutter-expert/references/riverpod-state.md +130 -0
  143. package/skills/frontend/flutter-expert/references/widget-patterns.md +123 -0
  144. package/skills/frontend/nextjs-developer/SKILL.md +143 -0
  145. package/skills/frontend/nextjs-developer/references/app-router.md +311 -0
  146. package/skills/frontend/nextjs-developer/references/data-fetching.md +482 -0
  147. package/skills/frontend/nextjs-developer/references/deployment.md +545 -0
  148. package/skills/frontend/nextjs-developer/references/server-actions.md +462 -0
  149. package/skills/frontend/nextjs-developer/references/server-components.md +384 -0
  150. package/skills/frontend/react-expert/SKILL.md +149 -0
  151. package/skills/frontend/react-expert/references/hooks-patterns.md +162 -0
  152. package/skills/frontend/react-expert/references/migration-class-to-modern.md +1119 -0
  153. package/skills/frontend/react-expert/references/performance.md +168 -0
  154. package/skills/frontend/react-expert/references/react-19-features.md +174 -0
  155. package/skills/frontend/react-expert/references/server-components.md +143 -0
  156. package/skills/frontend/react-expert/references/state-management.md +171 -0
  157. package/skills/frontend/react-expert/references/testing-react.md +174 -0
  158. package/skills/frontend/react-native-expert/SKILL.md +185 -0
  159. package/skills/frontend/react-native-expert/references/expo-router.md +187 -0
  160. package/skills/frontend/react-native-expert/references/list-optimization.md +204 -0
  161. package/skills/frontend/react-native-expert/references/platform-handling.md +188 -0
  162. package/skills/frontend/react-native-expert/references/project-structure.md +171 -0
  163. package/skills/frontend/react-native-expert/references/storage-hooks.md +173 -0
  164. package/skills/frontend/vue-expert/SKILL.md +98 -0
  165. package/skills/frontend/vue-expert/references/build-tooling.md +480 -0
  166. package/skills/frontend/vue-expert/references/components.md +448 -0
  167. package/skills/frontend/vue-expert/references/composition-api.md +299 -0
  168. package/skills/frontend/vue-expert/references/mobile-hybrid.md +636 -0
  169. package/skills/frontend/vue-expert/references/nuxt.md +669 -0
  170. package/skills/frontend/vue-expert/references/state-management.md +449 -0
  171. package/skills/frontend/vue-expert/references/typescript.md +584 -0
  172. package/skills/frontend/vue-expert-js/SKILL.md +167 -0
  173. package/skills/frontend/vue-expert-js/references/component-architecture.md +219 -0
  174. package/skills/frontend/vue-expert-js/references/composables-patterns.md +183 -0
  175. package/skills/frontend/vue-expert-js/references/jsdoc-typing.md +535 -0
  176. package/skills/frontend/vue-expert-js/references/state-management.md +249 -0
  177. package/skills/frontend/vue-expert-js/references/testing-patterns.md +237 -0
  178. package/skills/go-rust-cpp/cpp-pro/SKILL.md +115 -0
  179. package/skills/go-rust-cpp/cpp-pro/references/build-tooling.md +440 -0
  180. package/skills/go-rust-cpp/cpp-pro/references/concurrency.md +437 -0
  181. package/skills/go-rust-cpp/cpp-pro/references/memory-performance.md +397 -0
  182. package/skills/go-rust-cpp/cpp-pro/references/modern-cpp.md +304 -0
  183. package/skills/go-rust-cpp/cpp-pro/references/templates.md +357 -0
  184. package/skills/go-rust-cpp/golang-pro/SKILL.md +122 -0
  185. package/skills/go-rust-cpp/golang-pro/references/concurrency.md +329 -0
  186. package/skills/go-rust-cpp/golang-pro/references/generics.md +442 -0
  187. package/skills/go-rust-cpp/golang-pro/references/interfaces.md +432 -0
  188. package/skills/go-rust-cpp/golang-pro/references/project-structure.md +477 -0
  189. package/skills/go-rust-cpp/golang-pro/references/testing.md +451 -0
  190. package/skills/go-rust-cpp/rust-engineer/SKILL.md +167 -0
  191. package/skills/go-rust-cpp/rust-engineer/references/async.md +458 -0
  192. package/skills/go-rust-cpp/rust-engineer/references/error-handling.md +334 -0
  193. package/skills/go-rust-cpp/rust-engineer/references/ownership.md +278 -0
  194. package/skills/go-rust-cpp/rust-engineer/references/testing.md +470 -0
  195. package/skills/go-rust-cpp/rust-engineer/references/traits.md +413 -0
  196. package/skills/infra/cli-developer/SKILL.md +113 -0
  197. package/skills/infra/cli-developer/references/design-patterns.md +221 -0
  198. package/skills/infra/cli-developer/references/go-cli.md +540 -0
  199. package/skills/infra/cli-developer/references/node-cli.md +383 -0
  200. package/skills/infra/cli-developer/references/python-cli.md +422 -0
  201. package/skills/infra/cli-developer/references/ux-patterns.md +448 -0
  202. package/skills/infra/cloud-architect/SKILL.md +216 -0
  203. package/skills/infra/cloud-architect/references/aws.md +394 -0
  204. package/skills/infra/cloud-architect/references/azure.md +562 -0
  205. package/skills/infra/cloud-architect/references/cost.md +582 -0
  206. package/skills/infra/cloud-architect/references/gcp.md +633 -0
  207. package/skills/infra/cloud-architect/references/multi-cloud.md +483 -0
  208. package/skills/infra/devops-engineer/SKILL.md +144 -0
  209. package/skills/infra/devops-engineer/references/deployment-strategies.md +241 -0
  210. package/skills/infra/devops-engineer/references/docker-patterns.md +113 -0
  211. package/skills/infra/devops-engineer/references/github-actions.md +139 -0
  212. package/skills/infra/devops-engineer/references/incident-response.md +331 -0
  213. package/skills/infra/devops-engineer/references/kubernetes.md +154 -0
  214. package/skills/infra/devops-engineer/references/platform-engineering.md +417 -0
  215. package/skills/infra/devops-engineer/references/release-automation.md +527 -0
  216. package/skills/infra/devops-engineer/references/terraform-iac.md +141 -0
  217. package/skills/infra/kubernetes-specialist/SKILL.md +241 -0
  218. package/skills/infra/kubernetes-specialist/references/configuration.md +452 -0
  219. package/skills/infra/kubernetes-specialist/references/cost-optimization.md +458 -0
  220. package/skills/infra/kubernetes-specialist/references/custom-operators.md +563 -0
  221. package/skills/infra/kubernetes-specialist/references/gitops.md +530 -0
  222. package/skills/infra/kubernetes-specialist/references/helm-charts.md +912 -0
  223. package/skills/infra/kubernetes-specialist/references/multi-cluster.md +507 -0
  224. package/skills/infra/kubernetes-specialist/references/networking.md +447 -0
  225. package/skills/infra/kubernetes-specialist/references/service-mesh.md +459 -0
  226. package/skills/infra/kubernetes-specialist/references/storage.md +535 -0
  227. package/skills/infra/kubernetes-specialist/references/troubleshooting.md +414 -0
  228. package/skills/infra/kubernetes-specialist/references/workloads.md +377 -0
  229. package/skills/infra/mcp-developer/SKILL.md +143 -0
  230. package/skills/infra/mcp-developer/references/protocol.md +244 -0
  231. package/skills/infra/mcp-developer/references/python-sdk.md +367 -0
  232. package/skills/infra/mcp-developer/references/resources.md +554 -0
  233. package/skills/infra/mcp-developer/references/tools.md +480 -0
  234. package/skills/infra/mcp-developer/references/typescript-sdk.md +350 -0
  235. package/skills/infra/monitoring-expert/SKILL.md +176 -0
  236. package/skills/infra/monitoring-expert/references/alerting-rules.md +141 -0
  237. package/skills/infra/monitoring-expert/references/application-profiling.md +331 -0
  238. package/skills/infra/monitoring-expert/references/capacity-planning.md +344 -0
  239. package/skills/infra/monitoring-expert/references/dashboards.md +126 -0
  240. package/skills/infra/monitoring-expert/references/opentelemetry.md +123 -0
  241. package/skills/infra/monitoring-expert/references/performance-testing.md +269 -0
  242. package/skills/infra/monitoring-expert/references/prometheus-metrics.md +136 -0
  243. package/skills/infra/monitoring-expert/references/structured-logging.md +142 -0
  244. package/skills/infra/sre-engineer/SKILL.md +181 -0
  245. package/skills/infra/sre-engineer/references/automation-toil.md +492 -0
  246. package/skills/infra/sre-engineer/references/error-budget-policy.md +334 -0
  247. package/skills/infra/sre-engineer/references/incident-chaos.md +576 -0
  248. package/skills/infra/sre-engineer/references/monitoring-alerting.md +424 -0
  249. package/skills/infra/sre-engineer/references/slo-sli-management.md +238 -0
  250. package/skills/infra/terraform-engineer/SKILL.md +143 -0
  251. package/skills/infra/terraform-engineer/references/best-practices.md +583 -0
  252. package/skills/infra/terraform-engineer/references/module-patterns.md +297 -0
  253. package/skills/infra/terraform-engineer/references/providers.md +452 -0
  254. package/skills/infra/terraform-engineer/references/state-management.md +371 -0
  255. package/skills/infra/terraform-engineer/references/testing.md +486 -0
  256. package/skills/infra/websocket-engineer/SKILL.md +168 -0
  257. package/skills/infra/websocket-engineer/references/alternatives.md +391 -0
  258. package/skills/infra/websocket-engineer/references/patterns.md +400 -0
  259. package/skills/infra/websocket-engineer/references/protocol.md +195 -0
  260. package/skills/infra/websocket-engineer/references/scaling.md +333 -0
  261. package/skills/infra/websocket-engineer/references/security.md +474 -0
  262. package/skills/java/java-architect/SKILL.md +132 -0
  263. package/skills/java/java-architect/references/jpa-optimization.md +393 -0
  264. package/skills/java/java-architect/references/reactive-webflux.md +356 -0
  265. package/skills/java/java-architect/references/spring-boot-setup.md +269 -0
  266. package/skills/java/java-architect/references/spring-security.md +445 -0
  267. package/skills/java/java-architect/references/testing-patterns.md +500 -0
  268. package/skills/java/kotlin-specialist/SKILL.md +147 -0
  269. package/skills/java/kotlin-specialist/references/android-compose.md +419 -0
  270. package/skills/java/kotlin-specialist/references/coroutines-flow.md +276 -0
  271. package/skills/java/kotlin-specialist/references/dsl-idioms.md +421 -0
  272. package/skills/java/kotlin-specialist/references/ktor-server.md +426 -0
  273. package/skills/java/kotlin-specialist/references/multiplatform-kmp.md +380 -0
  274. package/skills/java/spring-boot-engineer/SKILL.md +195 -0
  275. package/skills/java/spring-boot-engineer/references/cloud.md +498 -0
  276. package/skills/java/spring-boot-engineer/references/data.md +381 -0
  277. package/skills/java/spring-boot-engineer/references/security.md +459 -0
  278. package/skills/java/spring-boot-engineer/references/testing.md +545 -0
  279. package/skills/java/spring-boot-engineer/references/web.md +295 -0
  280. package/skills/javascript/javascript-pro/SKILL.md +132 -0
  281. package/skills/javascript/javascript-pro/references/async-patterns.md +334 -0
  282. package/skills/javascript/javascript-pro/references/browser-apis.md +398 -0
  283. package/skills/javascript/javascript-pro/references/modern-syntax.md +272 -0
  284. package/skills/javascript/javascript-pro/references/modules.md +357 -0
  285. package/skills/javascript/javascript-pro/references/node-essentials.md +471 -0
  286. package/skills/javascript/nestjs-expert/SKILL.md +206 -0
  287. package/skills/javascript/nestjs-expert/references/authentication.md +166 -0
  288. package/skills/javascript/nestjs-expert/references/controllers-routing.md +111 -0
  289. package/skills/javascript/nestjs-expert/references/dtos-validation.md +153 -0
  290. package/skills/javascript/nestjs-expert/references/migration-from-express.md +1237 -0
  291. package/skills/javascript/nestjs-expert/references/services-di.md +140 -0
  292. package/skills/javascript/nestjs-expert/references/testing-patterns.md +186 -0
  293. package/skills/javascript/typescript-pro/SKILL.md +145 -0
  294. package/skills/javascript/typescript-pro/references/advanced-types.md +259 -0
  295. package/skills/javascript/typescript-pro/references/configuration.md +445 -0
  296. package/skills/javascript/typescript-pro/references/patterns.md +484 -0
  297. package/skills/javascript/typescript-pro/references/type-guards.md +352 -0
  298. package/skills/javascript/typescript-pro/references/utility-types.md +329 -0
  299. package/skills/php/laravel-specialist/SKILL.md +262 -0
  300. package/skills/php/laravel-specialist/references/eloquent.md +351 -0
  301. package/skills/php/laravel-specialist/references/livewire.md +512 -0
  302. package/skills/php/laravel-specialist/references/queues.md +423 -0
  303. package/skills/php/laravel-specialist/references/routing.md +362 -0
  304. package/skills/php/laravel-specialist/references/testing.md +522 -0
  305. package/skills/php/php-pro/SKILL.md +206 -0
  306. package/skills/php/php-pro/references/async-patterns.md +412 -0
  307. package/skills/php/php-pro/references/laravel-patterns.md +377 -0
  308. package/skills/php/php-pro/references/modern-php-features.md +323 -0
  309. package/skills/php/php-pro/references/symfony-patterns.md +466 -0
  310. package/skills/php/php-pro/references/testing-quality.md +466 -0
  311. package/skills/python/django-expert/SKILL.md +162 -0
  312. package/skills/python/django-expert/references/authentication.md +145 -0
  313. package/skills/python/django-expert/references/drf-serializers.md +148 -0
  314. package/skills/python/django-expert/references/models-orm.md +151 -0
  315. package/skills/python/django-expert/references/testing-django.md +204 -0
  316. package/skills/python/django-expert/references/viewsets-views.md +153 -0
  317. package/skills/python/fastapi-expert/SKILL.md +185 -0
  318. package/skills/python/fastapi-expert/references/async-sqlalchemy.md +146 -0
  319. package/skills/python/fastapi-expert/references/authentication.md +159 -0
  320. package/skills/python/fastapi-expert/references/endpoints-routing.md +142 -0
  321. package/skills/python/fastapi-expert/references/migration-from-django.md +997 -0
  322. package/skills/python/fastapi-expert/references/pydantic-v2.md +135 -0
  323. package/skills/python/fastapi-expert/references/testing-async.md +159 -0
  324. package/skills/python/pandas-pro/SKILL.md +178 -0
  325. package/skills/python/pandas-pro/references/aggregation-groupby.md +545 -0
  326. package/skills/python/pandas-pro/references/data-cleaning.md +500 -0
  327. package/skills/python/pandas-pro/references/dataframe-operations.md +420 -0
  328. package/skills/python/pandas-pro/references/merging-joining.md +596 -0
  329. package/skills/python/pandas-pro/references/performance-optimization.md +597 -0
  330. package/skills/python/python-pro/SKILL.md +177 -0
  331. package/skills/python/python-pro/references/async-patterns.md +356 -0
  332. package/skills/python/python-pro/references/packaging.md +460 -0
  333. package/skills/python/python-pro/references/standard-library.md +378 -0
  334. package/skills/python/python-pro/references/testing.md +404 -0
  335. package/skills/python/python-pro/references/type-system.md +290 -0
  336. package/skills/quality/chaos-engineer/SKILL.md +182 -0
  337. package/skills/quality/chaos-engineer/references/chaos-tools.md +511 -0
  338. package/skills/quality/chaos-engineer/references/experiment-design.md +229 -0
  339. package/skills/quality/chaos-engineer/references/game-days.md +434 -0
  340. package/skills/quality/chaos-engineer/references/infrastructure-chaos.md +348 -0
  341. package/skills/quality/chaos-engineer/references/kubernetes-chaos.md +432 -0
  342. package/skills/quality/code-reviewer/SKILL.md +119 -0
  343. package/skills/quality/code-reviewer/references/common-issues.md +142 -0
  344. package/skills/quality/code-reviewer/references/feedback-examples.md +144 -0
  345. package/skills/quality/code-reviewer/references/receiving-feedback.md +238 -0
  346. package/skills/quality/code-reviewer/references/report-template.md +109 -0
  347. package/skills/quality/code-reviewer/references/review-checklist.md +88 -0
  348. package/skills/quality/code-reviewer/references/spec-compliance-review.md +258 -0
  349. package/skills/quality/playwright-expert/SKILL.md +169 -0
  350. package/skills/quality/playwright-expert/references/api-mocking.md +140 -0
  351. package/skills/quality/playwright-expert/references/configuration.md +155 -0
  352. package/skills/quality/playwright-expert/references/debugging-flaky.md +150 -0
  353. package/skills/quality/playwright-expert/references/page-object-model.md +152 -0
  354. package/skills/quality/playwright-expert/references/selectors-locators.md +119 -0
  355. package/skills/quality/secure-code-guardian/SKILL.md +191 -0
  356. package/skills/quality/secure-code-guardian/references/authentication.md +136 -0
  357. package/skills/quality/secure-code-guardian/references/input-validation.md +146 -0
  358. package/skills/quality/secure-code-guardian/references/owasp-prevention.md +135 -0
  359. package/skills/quality/secure-code-guardian/references/security-headers.md +133 -0
  360. package/skills/quality/secure-code-guardian/references/xss-csrf.md +157 -0
  361. package/skills/quality/security-reviewer/SKILL.md +103 -0
  362. package/skills/quality/security-reviewer/references/infrastructure-security.md +268 -0
  363. package/skills/quality/security-reviewer/references/penetration-testing.md +268 -0
  364. package/skills/quality/security-reviewer/references/report-template.md +170 -0
  365. package/skills/quality/security-reviewer/references/sast-tools.md +117 -0
  366. package/skills/quality/security-reviewer/references/secret-scanning.md +125 -0
  367. package/skills/quality/security-reviewer/references/vulnerability-patterns.md +152 -0
  368. package/skills/quality/tdd-guide/assets/sample_coverage_report.lcov +0 -0
  369. package/skills/quality/test-master/SKILL.md +94 -0
  370. package/skills/quality/test-master/references/automation-frameworks.md +294 -0
  371. package/skills/quality/test-master/references/e2e-testing.md +128 -0
  372. package/skills/quality/test-master/references/integration-testing.md +120 -0
  373. package/skills/quality/test-master/references/performance-testing.md +118 -0
  374. package/skills/quality/test-master/references/qa-methodology.md +247 -0
  375. package/skills/quality/test-master/references/security-testing.md +127 -0
  376. package/skills/quality/test-master/references/tdd-iron-laws.md +174 -0
  377. package/skills/quality/test-master/references/test-reports.md +104 -0
  378. package/skills/quality/test-master/references/testing-anti-patterns.md +231 -0
  379. package/skills/quality/test-master/references/unit-testing.md +113 -0
  380. package/skills/ruby/rails-expert/SKILL.md +154 -0
  381. package/skills/ruby/rails-expert/references/active-record.md +244 -0
  382. package/skills/ruby/rails-expert/references/api-development.md +401 -0
  383. package/skills/ruby/rails-expert/references/background-jobs.md +272 -0
  384. package/skills/ruby/rails-expert/references/hotwire-turbo.md +228 -0
  385. package/skills/ruby/rails-expert/references/rspec-testing.md +367 -0
  386. package/skills/swift/swift-expert/SKILL.md +163 -0
  387. package/skills/swift/swift-expert/references/async-concurrency.md +360 -0
  388. package/skills/swift/swift-expert/references/memory-performance.md +377 -0
  389. package/skills/swift/swift-expert/references/protocol-oriented.md +354 -0
  390. package/skills/swift/swift-expert/references/swiftui-patterns.md +291 -0
  391. package/skills/swift/swift-expert/references/testing-patterns.md +399 -0
  392. package/skills/workflow/brainstorming/SKILL.md +164 -0
  393. package/skills/workflow/brainstorming/scripts/helper.js +88 -0
  394. package/skills/workflow/brainstorming/scripts/start-server.sh +148 -0
  395. package/skills/workflow/brainstorming/scripts/stop-server.sh +56 -0
  396. package/skills/workflow/brainstorming/spec-document-reviewer-prompt.md +49 -0
  397. package/skills/workflow/brainstorming/visual-companion.md +287 -0
  398. package/skills/workflow/documentation/SKILL.md +45 -0
  399. package/skills/workflow/entropy-management/SKILL.md +115 -0
  400. package/skills/workflow/executing-plans/SKILL.md +70 -0
  401. package/skills/workflow/finishing-a-development-branch/SKILL.md +200 -0
  402. package/skills/workflow/receiving-code-review/SKILL.md +213 -0
  403. package/skills/workflow/requesting-code-review/SKILL.md +105 -0
  404. package/skills/workflow/requesting-code-review/code-reviewer.md +146 -0
  405. package/skills/workflow/requirement-engineering/SKILL.md +111 -0
  406. package/skills/workflow/systematic-debugging/CREATION-LOG.md +119 -0
  407. package/skills/workflow/systematic-debugging/SKILL.md +296 -0
  408. package/skills/workflow/systematic-debugging/condition-based-waiting-example.ts +158 -0
  409. package/skills/workflow/systematic-debugging/condition-based-waiting.md +115 -0
  410. package/skills/workflow/systematic-debugging/defense-in-depth.md +122 -0
  411. package/skills/workflow/systematic-debugging/find-polluter.sh +63 -0
  412. package/skills/workflow/systematic-debugging/root-cause-tracing.md +169 -0
  413. package/skills/workflow/systematic-debugging/test-academic.md +14 -0
  414. package/skills/workflow/systematic-debugging/test-pressure-1.md +58 -0
  415. package/skills/workflow/systematic-debugging/test-pressure-2.md +68 -0
  416. package/skills/workflow/systematic-debugging/test-pressure-3.md +69 -0
  417. package/skills/workflow/using-git-worktrees/SKILL.md +218 -0
  418. package/skills/workflow/verification-before-completion/SKILL.md +139 -0
  419. package/skills/workflow/writing-plans/SKILL.md +151 -0
  420. package/skills/workflow/writing-plans/plan-document-reviewer-prompt.md +49 -0
  421. package/skills/workflow/writing-skills/SKILL.md +655 -0
  422. package/skills/workflow/writing-skills/anthropic-best-practices.md +1150 -0
  423. package/skills/workflow/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
  424. package/skills/workflow/writing-skills/graphviz-conventions.dot +0 -0
  425. package/skills/workflow/writing-skills/persuasion-principles.md +187 -0
  426. package/skills/workflow/writing-skills/render-graphs.js +168 -0
  427. package/skills/workflow/writing-skills/testing-skills-with-subagents.md +384 -0
  428. package/skills/angular-architect/SKILL.md +0 -152
  429. package/skills/angular-architect/references/components.md +0 -297
  430. package/skills/angular-architect/references/ngrx.md +0 -401
  431. package/skills/angular-architect/references/routing.md +0 -361
  432. package/skills/angular-architect/references/rxjs.md +0 -319
  433. package/skills/angular-architect/references/testing.md +0 -405
  434. package/skills/api-designer/SKILL.md +0 -217
  435. package/skills/api-designer/references/error-handling.md +0 -541
  436. package/skills/api-designer/references/openapi.md +0 -824
  437. package/skills/api-designer/references/pagination.md +0 -494
  438. package/skills/api-designer/references/rest-patterns.md +0 -335
  439. package/skills/api-designer/references/versioning.md +0 -391
  440. package/skills/architecture-designer/SKILL.md +0 -117
  441. package/skills/architecture-designer/references/adr-template.md +0 -116
  442. package/skills/architecture-designer/references/architecture-patterns.md +0 -111
  443. package/skills/architecture-designer/references/database-selection.md +0 -102
  444. package/skills/architecture-designer/references/nfr-checklist.md +0 -112
  445. package/skills/architecture-designer/references/system-design.md +0 -100
  446. package/skills/brainstorming/SKILL.md +0 -164
  447. package/skills/brainstorming/scripts/helper.js +0 -88
  448. package/skills/brainstorming/scripts/start-server.sh +0 -148
  449. package/skills/brainstorming/scripts/stop-server.sh +0 -56
  450. package/skills/brainstorming/spec-document-reviewer-prompt.md +0 -49
  451. package/skills/brainstorming/visual-companion.md +0 -287
  452. package/skills/chaos-engineer/SKILL.md +0 -182
  453. package/skills/chaos-engineer/references/chaos-tools.md +0 -511
  454. package/skills/chaos-engineer/references/experiment-design.md +0 -229
  455. package/skills/chaos-engineer/references/game-days.md +0 -434
  456. package/skills/chaos-engineer/references/infrastructure-chaos.md +0 -348
  457. package/skills/chaos-engineer/references/kubernetes-chaos.md +0 -432
  458. package/skills/cli-developer/SKILL.md +0 -113
  459. package/skills/cli-developer/references/design-patterns.md +0 -221
  460. package/skills/cli-developer/references/go-cli.md +0 -540
  461. package/skills/cli-developer/references/node-cli.md +0 -383
  462. package/skills/cli-developer/references/python-cli.md +0 -422
  463. package/skills/cli-developer/references/ux-patterns.md +0 -448
  464. package/skills/cloud-architect/SKILL.md +0 -216
  465. package/skills/cloud-architect/references/aws.md +0 -394
  466. package/skills/cloud-architect/references/azure.md +0 -562
  467. package/skills/cloud-architect/references/cost.md +0 -582
  468. package/skills/cloud-architect/references/gcp.md +0 -633
  469. package/skills/cloud-architect/references/multi-cloud.md +0 -483
  470. package/skills/code-documenter/SKILL.md +0 -147
  471. package/skills/code-documenter/references/api-docs-fastapi-django.md +0 -166
  472. package/skills/code-documenter/references/api-docs-nestjs-express.md +0 -220
  473. package/skills/code-documenter/references/coverage-reports.md +0 -125
  474. package/skills/code-documenter/references/documentation-systems.md +0 -333
  475. package/skills/code-documenter/references/interactive-api-docs.md +0 -531
  476. package/skills/code-documenter/references/python-docstrings.md +0 -121
  477. package/skills/code-documenter/references/typescript-jsdoc.md +0 -145
  478. package/skills/code-documenter/references/user-guides-tutorials.md +0 -530
  479. package/skills/code-reviewer/SKILL.md +0 -119
  480. package/skills/code-reviewer/references/common-issues.md +0 -142
  481. package/skills/code-reviewer/references/feedback-examples.md +0 -144
  482. package/skills/code-reviewer/references/receiving-feedback.md +0 -238
  483. package/skills/code-reviewer/references/report-template.md +0 -109
  484. package/skills/code-reviewer/references/review-checklist.md +0 -88
  485. package/skills/code-reviewer/references/spec-compliance-review.md +0 -258
  486. package/skills/cpp-pro/SKILL.md +0 -115
  487. package/skills/cpp-pro/references/build-tooling.md +0 -440
  488. package/skills/cpp-pro/references/concurrency.md +0 -437
  489. package/skills/cpp-pro/references/memory-performance.md +0 -397
  490. package/skills/cpp-pro/references/modern-cpp.md +0 -304
  491. package/skills/cpp-pro/references/templates.md +0 -357
  492. package/skills/csharp-developer/SKILL.md +0 -125
  493. package/skills/csharp-developer/references/aspnet-core.md +0 -394
  494. package/skills/csharp-developer/references/blazor.md +0 -553
  495. package/skills/csharp-developer/references/entity-framework.md +0 -409
  496. package/skills/csharp-developer/references/modern-csharp.md +0 -248
  497. package/skills/csharp-developer/references/performance.md +0 -498
  498. package/skills/database-optimizer/SKILL.md +0 -147
  499. package/skills/database-optimizer/references/index-strategies.md +0 -331
  500. package/skills/database-optimizer/references/monitoring-analysis.md +0 -501
  501. package/skills/database-optimizer/references/mysql-tuning.md +0 -452
  502. package/skills/database-optimizer/references/postgresql-tuning.md +0 -413
  503. package/skills/database-optimizer/references/query-optimization.md +0 -251
  504. package/skills/debugging-wizard/SKILL.md +0 -105
  505. package/skills/debugging-wizard/references/common-patterns.md +0 -132
  506. package/skills/debugging-wizard/references/debugging-tools.md +0 -140
  507. package/skills/debugging-wizard/references/quick-fixes.md +0 -177
  508. package/skills/debugging-wizard/references/strategies.md +0 -142
  509. package/skills/debugging-wizard/references/systematic-debugging.md +0 -367
  510. package/skills/devops-engineer/SKILL.md +0 -144
  511. package/skills/devops-engineer/references/deployment-strategies.md +0 -241
  512. package/skills/devops-engineer/references/docker-patterns.md +0 -113
  513. package/skills/devops-engineer/references/github-actions.md +0 -139
  514. package/skills/devops-engineer/references/incident-response.md +0 -331
  515. package/skills/devops-engineer/references/kubernetes.md +0 -154
  516. package/skills/devops-engineer/references/platform-engineering.md +0 -417
  517. package/skills/devops-engineer/references/release-automation.md +0 -527
  518. package/skills/devops-engineer/references/terraform-iac.md +0 -141
  519. package/skills/django-expert/SKILL.md +0 -162
  520. package/skills/django-expert/references/authentication.md +0 -145
  521. package/skills/django-expert/references/drf-serializers.md +0 -148
  522. package/skills/django-expert/references/models-orm.md +0 -151
  523. package/skills/django-expert/references/testing-django.md +0 -204
  524. package/skills/django-expert/references/viewsets-views.md +0 -153
  525. package/skills/documentation/SKILL.md +0 -45
  526. package/skills/dotnet-core-expert/SKILL.md +0 -138
  527. package/skills/dotnet-core-expert/references/authentication.md +0 -546
  528. package/skills/dotnet-core-expert/references/clean-architecture.md +0 -455
  529. package/skills/dotnet-core-expert/references/cloud-native.md +0 -548
  530. package/skills/dotnet-core-expert/references/entity-framework.md +0 -440
  531. package/skills/dotnet-core-expert/references/minimal-apis.md +0 -319
  532. package/skills/entropy-management/SKILL.md +0 -115
  533. package/skills/executing-plans/SKILL.md +0 -70
  534. package/skills/fastapi-expert/SKILL.md +0 -185
  535. package/skills/fastapi-expert/references/async-sqlalchemy.md +0 -146
  536. package/skills/fastapi-expert/references/authentication.md +0 -159
  537. package/skills/fastapi-expert/references/endpoints-routing.md +0 -142
  538. package/skills/fastapi-expert/references/migration-from-django.md +0 -997
  539. package/skills/fastapi-expert/references/pydantic-v2.md +0 -135
  540. package/skills/fastapi-expert/references/testing-async.md +0 -159
  541. package/skills/feature-forge/SKILL.md +0 -98
  542. package/skills/feature-forge/references/acceptance-criteria.md +0 -104
  543. package/skills/feature-forge/references/ears-syntax.md +0 -99
  544. package/skills/feature-forge/references/interview-questions.md +0 -150
  545. package/skills/feature-forge/references/pre-discovery-subagents.md +0 -54
  546. package/skills/feature-forge/references/specification-template.md +0 -103
  547. package/skills/fine-tuning-expert/SKILL.md +0 -162
  548. package/skills/fine-tuning-expert/references/dataset-preparation.md +0 -540
  549. package/skills/fine-tuning-expert/references/deployment-optimization.md +0 -673
  550. package/skills/fine-tuning-expert/references/evaluation-metrics.md +0 -597
  551. package/skills/fine-tuning-expert/references/hyperparameter-tuning.md +0 -565
  552. package/skills/fine-tuning-expert/references/lora-peft.md +0 -347
  553. package/skills/finishing-a-development-branch/SKILL.md +0 -200
  554. package/skills/flutter-expert/SKILL.md +0 -138
  555. package/skills/flutter-expert/references/bloc-state.md +0 -259
  556. package/skills/flutter-expert/references/gorouter-navigation.md +0 -119
  557. package/skills/flutter-expert/references/performance.md +0 -99
  558. package/skills/flutter-expert/references/project-structure.md +0 -118
  559. package/skills/flutter-expert/references/riverpod-state.md +0 -130
  560. package/skills/flutter-expert/references/widget-patterns.md +0 -123
  561. package/skills/fullstack-guardian/SKILL.md +0 -105
  562. package/skills/fullstack-guardian/references/api-design-standards.md +0 -307
  563. package/skills/fullstack-guardian/references/architecture-decisions.md +0 -350
  564. package/skills/fullstack-guardian/references/backend-patterns.md +0 -237
  565. package/skills/fullstack-guardian/references/common-patterns.md +0 -134
  566. package/skills/fullstack-guardian/references/deliverables-checklist.md +0 -354
  567. package/skills/fullstack-guardian/references/design-template.md +0 -91
  568. package/skills/fullstack-guardian/references/error-handling.md +0 -135
  569. package/skills/fullstack-guardian/references/frontend-patterns.md +0 -340
  570. package/skills/fullstack-guardian/references/integration-patterns.md +0 -333
  571. package/skills/fullstack-guardian/references/security-checklist.md +0 -106
  572. package/skills/golang-pro/SKILL.md +0 -122
  573. package/skills/golang-pro/references/concurrency.md +0 -329
  574. package/skills/golang-pro/references/generics.md +0 -442
  575. package/skills/golang-pro/references/interfaces.md +0 -432
  576. package/skills/golang-pro/references/project-structure.md +0 -477
  577. package/skills/golang-pro/references/testing.md +0 -451
  578. package/skills/graphql-architect/SKILL.md +0 -146
  579. package/skills/graphql-architect/references/federation.md +0 -418
  580. package/skills/graphql-architect/references/migration-from-rest.md +0 -1141
  581. package/skills/graphql-architect/references/resolvers.md +0 -425
  582. package/skills/graphql-architect/references/schema-design.md +0 -393
  583. package/skills/graphql-architect/references/security.md +0 -569
  584. package/skills/graphql-architect/references/subscriptions.md +0 -510
  585. package/skills/java-architect/SKILL.md +0 -132
  586. package/skills/java-architect/references/jpa-optimization.md +0 -393
  587. package/skills/java-architect/references/reactive-webflux.md +0 -356
  588. package/skills/java-architect/references/spring-boot-setup.md +0 -269
  589. package/skills/java-architect/references/spring-security.md +0 -445
  590. package/skills/java-architect/references/testing-patterns.md +0 -500
  591. package/skills/javascript-pro/SKILL.md +0 -132
  592. package/skills/javascript-pro/references/async-patterns.md +0 -334
  593. package/skills/javascript-pro/references/browser-apis.md +0 -398
  594. package/skills/javascript-pro/references/modern-syntax.md +0 -272
  595. package/skills/javascript-pro/references/modules.md +0 -357
  596. package/skills/javascript-pro/references/node-essentials.md +0 -471
  597. package/skills/kotlin-specialist/SKILL.md +0 -147
  598. package/skills/kotlin-specialist/references/android-compose.md +0 -419
  599. package/skills/kotlin-specialist/references/coroutines-flow.md +0 -276
  600. package/skills/kotlin-specialist/references/dsl-idioms.md +0 -421
  601. package/skills/kotlin-specialist/references/ktor-server.md +0 -426
  602. package/skills/kotlin-specialist/references/multiplatform-kmp.md +0 -380
  603. package/skills/kubernetes-specialist/SKILL.md +0 -241
  604. package/skills/kubernetes-specialist/references/configuration.md +0 -452
  605. package/skills/kubernetes-specialist/references/cost-optimization.md +0 -458
  606. package/skills/kubernetes-specialist/references/custom-operators.md +0 -563
  607. package/skills/kubernetes-specialist/references/gitops.md +0 -530
  608. package/skills/kubernetes-specialist/references/helm-charts.md +0 -912
  609. package/skills/kubernetes-specialist/references/multi-cluster.md +0 -507
  610. package/skills/kubernetes-specialist/references/networking.md +0 -447
  611. package/skills/kubernetes-specialist/references/service-mesh.md +0 -459
  612. package/skills/kubernetes-specialist/references/storage.md +0 -535
  613. package/skills/kubernetes-specialist/references/troubleshooting.md +0 -414
  614. package/skills/kubernetes-specialist/references/workloads.md +0 -377
  615. package/skills/laravel-specialist/SKILL.md +0 -262
  616. package/skills/laravel-specialist/references/eloquent.md +0 -351
  617. package/skills/laravel-specialist/references/livewire.md +0 -512
  618. package/skills/laravel-specialist/references/queues.md +0 -423
  619. package/skills/laravel-specialist/references/routing.md +0 -362
  620. package/skills/laravel-specialist/references/testing.md +0 -522
  621. package/skills/legacy-modernizer/SKILL.md +0 -137
  622. package/skills/legacy-modernizer/references/legacy-testing.md +0 -381
  623. package/skills/legacy-modernizer/references/migration-strategies.md +0 -423
  624. package/skills/legacy-modernizer/references/refactoring-patterns.md +0 -395
  625. package/skills/legacy-modernizer/references/strangler-fig-pattern.md +0 -281
  626. package/skills/legacy-modernizer/references/system-assessment.md +0 -487
  627. package/skills/mcp-developer/SKILL.md +0 -143
  628. package/skills/mcp-developer/references/protocol.md +0 -244
  629. package/skills/mcp-developer/references/python-sdk.md +0 -367
  630. package/skills/mcp-developer/references/resources.md +0 -554
  631. package/skills/mcp-developer/references/tools.md +0 -480
  632. package/skills/mcp-developer/references/typescript-sdk.md +0 -350
  633. package/skills/microservices-architect/SKILL.md +0 -164
  634. package/skills/microservices-architect/references/communication.md +0 -499
  635. package/skills/microservices-architect/references/data.md +0 -721
  636. package/skills/microservices-architect/references/decomposition.md +0 -344
  637. package/skills/microservices-architect/references/observability.md +0 -805
  638. package/skills/microservices-architect/references/patterns.md +0 -603
  639. package/skills/ml-pipeline/SKILL.md +0 -159
  640. package/skills/ml-pipeline/references/experiment-tracking.md +0 -833
  641. package/skills/ml-pipeline/references/feature-engineering.md +0 -631
  642. package/skills/ml-pipeline/references/model-validation.md +0 -978
  643. package/skills/ml-pipeline/references/pipeline-orchestration.md +0 -907
  644. package/skills/ml-pipeline/references/training-pipelines.md +0 -782
  645. package/skills/monitoring-expert/SKILL.md +0 -176
  646. package/skills/monitoring-expert/references/alerting-rules.md +0 -141
  647. package/skills/monitoring-expert/references/application-profiling.md +0 -331
  648. package/skills/monitoring-expert/references/capacity-planning.md +0 -344
  649. package/skills/monitoring-expert/references/dashboards.md +0 -126
  650. package/skills/monitoring-expert/references/opentelemetry.md +0 -123
  651. package/skills/monitoring-expert/references/performance-testing.md +0 -269
  652. package/skills/monitoring-expert/references/prometheus-metrics.md +0 -136
  653. package/skills/monitoring-expert/references/structured-logging.md +0 -142
  654. package/skills/nestjs-expert/SKILL.md +0 -206
  655. package/skills/nestjs-expert/references/authentication.md +0 -166
  656. package/skills/nestjs-expert/references/controllers-routing.md +0 -111
  657. package/skills/nestjs-expert/references/dtos-validation.md +0 -153
  658. package/skills/nestjs-expert/references/migration-from-express.md +0 -1237
  659. package/skills/nestjs-expert/references/services-di.md +0 -140
  660. package/skills/nestjs-expert/references/testing-patterns.md +0 -186
  661. package/skills/nextjs-developer/SKILL.md +0 -143
  662. package/skills/nextjs-developer/references/app-router.md +0 -311
  663. package/skills/nextjs-developer/references/data-fetching.md +0 -482
  664. package/skills/nextjs-developer/references/deployment.md +0 -545
  665. package/skills/nextjs-developer/references/server-actions.md +0 -462
  666. package/skills/nextjs-developer/references/server-components.md +0 -384
  667. package/skills/pandas-pro/SKILL.md +0 -178
  668. package/skills/pandas-pro/references/aggregation-groupby.md +0 -545
  669. package/skills/pandas-pro/references/data-cleaning.md +0 -500
  670. package/skills/pandas-pro/references/dataframe-operations.md +0 -420
  671. package/skills/pandas-pro/references/merging-joining.md +0 -596
  672. package/skills/pandas-pro/references/performance-optimization.md +0 -597
  673. package/skills/php-pro/SKILL.md +0 -206
  674. package/skills/php-pro/references/async-patterns.md +0 -412
  675. package/skills/php-pro/references/laravel-patterns.md +0 -377
  676. package/skills/php-pro/references/modern-php-features.md +0 -323
  677. package/skills/php-pro/references/symfony-patterns.md +0 -466
  678. package/skills/php-pro/references/testing-quality.md +0 -466
  679. package/skills/playwright-expert/SKILL.md +0 -169
  680. package/skills/playwright-expert/references/api-mocking.md +0 -140
  681. package/skills/playwright-expert/references/configuration.md +0 -155
  682. package/skills/playwright-expert/references/debugging-flaky.md +0 -150
  683. package/skills/playwright-expert/references/page-object-model.md +0 -152
  684. package/skills/playwright-expert/references/selectors-locators.md +0 -119
  685. package/skills/postgres-pro/SKILL.md +0 -152
  686. package/skills/postgres-pro/references/extensions.md +0 -404
  687. package/skills/postgres-pro/references/jsonb.md +0 -321
  688. package/skills/postgres-pro/references/maintenance.md +0 -481
  689. package/skills/postgres-pro/references/performance.md +0 -265
  690. package/skills/postgres-pro/references/replication.md +0 -446
  691. package/skills/python-pro/SKILL.md +0 -177
  692. package/skills/python-pro/references/async-patterns.md +0 -356
  693. package/skills/python-pro/references/packaging.md +0 -460
  694. package/skills/python-pro/references/standard-library.md +0 -378
  695. package/skills/python-pro/references/testing.md +0 -404
  696. package/skills/python-pro/references/type-system.md +0 -290
  697. package/skills/rag-architect/SKILL.md +0 -194
  698. package/skills/rag-architect/references/chunking-strategies.md +0 -878
  699. package/skills/rag-architect/references/embedding-models.md +0 -561
  700. package/skills/rag-architect/references/rag-evaluation.md +0 -833
  701. package/skills/rag-architect/references/retrieval-optimization.md +0 -795
  702. package/skills/rag-architect/references/vector-databases.md +0 -589
  703. package/skills/rails-expert/SKILL.md +0 -154
  704. package/skills/rails-expert/references/active-record.md +0 -244
  705. package/skills/rails-expert/references/api-development.md +0 -401
  706. package/skills/rails-expert/references/background-jobs.md +0 -272
  707. package/skills/rails-expert/references/hotwire-turbo.md +0 -228
  708. package/skills/rails-expert/references/rspec-testing.md +0 -367
  709. package/skills/react-expert/SKILL.md +0 -149
  710. package/skills/react-expert/references/hooks-patterns.md +0 -162
  711. package/skills/react-expert/references/migration-class-to-modern.md +0 -1119
  712. package/skills/react-expert/references/performance.md +0 -168
  713. package/skills/react-expert/references/react-19-features.md +0 -174
  714. package/skills/react-expert/references/server-components.md +0 -143
  715. package/skills/react-expert/references/state-management.md +0 -171
  716. package/skills/react-expert/references/testing-react.md +0 -174
  717. package/skills/react-native-expert/SKILL.md +0 -185
  718. package/skills/react-native-expert/references/expo-router.md +0 -187
  719. package/skills/react-native-expert/references/list-optimization.md +0 -204
  720. package/skills/react-native-expert/references/platform-handling.md +0 -188
  721. package/skills/react-native-expert/references/project-structure.md +0 -171
  722. package/skills/react-native-expert/references/storage-hooks.md +0 -173
  723. package/skills/receiving-code-review/SKILL.md +0 -213
  724. package/skills/requesting-code-review/SKILL.md +0 -105
  725. package/skills/requesting-code-review/code-reviewer.md +0 -146
  726. package/skills/requirement-engineering/SKILL.md +0 -111
  727. package/skills/rust-engineer/SKILL.md +0 -167
  728. package/skills/rust-engineer/references/async.md +0 -458
  729. package/skills/rust-engineer/references/error-handling.md +0 -334
  730. package/skills/rust-engineer/references/ownership.md +0 -278
  731. package/skills/rust-engineer/references/testing.md +0 -470
  732. package/skills/rust-engineer/references/traits.md +0 -413
  733. package/skills/secure-code-guardian/SKILL.md +0 -191
  734. package/skills/secure-code-guardian/references/authentication.md +0 -136
  735. package/skills/secure-code-guardian/references/input-validation.md +0 -146
  736. package/skills/secure-code-guardian/references/owasp-prevention.md +0 -135
  737. package/skills/secure-code-guardian/references/security-headers.md +0 -133
  738. package/skills/secure-code-guardian/references/xss-csrf.md +0 -157
  739. package/skills/security-reviewer/SKILL.md +0 -103
  740. package/skills/security-reviewer/references/infrastructure-security.md +0 -268
  741. package/skills/security-reviewer/references/penetration-testing.md +0 -268
  742. package/skills/security-reviewer/references/report-template.md +0 -170
  743. package/skills/security-reviewer/references/sast-tools.md +0 -117
  744. package/skills/security-reviewer/references/secret-scanning.md +0 -125
  745. package/skills/security-reviewer/references/vulnerability-patterns.md +0 -152
  746. package/skills/spark-engineer/SKILL.md +0 -148
  747. package/skills/spark-engineer/references/partitioning-caching.md +0 -543
  748. package/skills/spark-engineer/references/performance-tuning.md +0 -544
  749. package/skills/spark-engineer/references/rdd-operations.md +0 -599
  750. package/skills/spark-engineer/references/spark-sql-dataframes.md +0 -474
  751. package/skills/spark-engineer/references/streaming-patterns.md +0 -786
  752. package/skills/spring-boot-engineer/SKILL.md +0 -195
  753. package/skills/spring-boot-engineer/references/cloud.md +0 -498
  754. package/skills/spring-boot-engineer/references/data.md +0 -381
  755. package/skills/spring-boot-engineer/references/security.md +0 -459
  756. package/skills/spring-boot-engineer/references/testing.md +0 -545
  757. package/skills/spring-boot-engineer/references/web.md +0 -295
  758. package/skills/sql-pro/SKILL.md +0 -129
  759. package/skills/sql-pro/references/database-design.md +0 -402
  760. package/skills/sql-pro/references/dialect-differences.md +0 -419
  761. package/skills/sql-pro/references/optimization.md +0 -384
  762. package/skills/sql-pro/references/query-patterns.md +0 -285
  763. package/skills/sql-pro/references/window-functions.md +0 -328
  764. package/skills/sre-engineer/SKILL.md +0 -181
  765. package/skills/sre-engineer/references/automation-toil.md +0 -492
  766. package/skills/sre-engineer/references/error-budget-policy.md +0 -334
  767. package/skills/sre-engineer/references/incident-chaos.md +0 -576
  768. package/skills/sre-engineer/references/monitoring-alerting.md +0 -424
  769. package/skills/sre-engineer/references/slo-sli-management.md +0 -238
  770. package/skills/swift-expert/SKILL.md +0 -163
  771. package/skills/swift-expert/references/async-concurrency.md +0 -360
  772. package/skills/swift-expert/references/memory-performance.md +0 -377
  773. package/skills/swift-expert/references/protocol-oriented.md +0 -354
  774. package/skills/swift-expert/references/swiftui-patterns.md +0 -291
  775. package/skills/swift-expert/references/testing-patterns.md +0 -399
  776. package/skills/systematic-debugging/CREATION-LOG.md +0 -119
  777. package/skills/systematic-debugging/SKILL.md +0 -296
  778. package/skills/systematic-debugging/condition-based-waiting-example.ts +0 -158
  779. package/skills/systematic-debugging/condition-based-waiting.md +0 -115
  780. package/skills/systematic-debugging/defense-in-depth.md +0 -122
  781. package/skills/systematic-debugging/find-polluter.sh +0 -63
  782. package/skills/systematic-debugging/root-cause-tracing.md +0 -169
  783. package/skills/systematic-debugging/test-academic.md +0 -14
  784. package/skills/systematic-debugging/test-pressure-1.md +0 -58
  785. package/skills/systematic-debugging/test-pressure-2.md +0 -68
  786. package/skills/systematic-debugging/test-pressure-3.md +0 -69
  787. package/skills/tdd-guide/assets/sample_coverage_report.lcov +0 -56
  788. package/skills/terraform-engineer/SKILL.md +0 -143
  789. package/skills/terraform-engineer/references/best-practices.md +0 -583
  790. package/skills/terraform-engineer/references/module-patterns.md +0 -297
  791. package/skills/terraform-engineer/references/providers.md +0 -452
  792. package/skills/terraform-engineer/references/state-management.md +0 -371
  793. package/skills/terraform-engineer/references/testing.md +0 -486
  794. package/skills/test-master/SKILL.md +0 -94
  795. package/skills/test-master/references/automation-frameworks.md +0 -294
  796. package/skills/test-master/references/e2e-testing.md +0 -128
  797. package/skills/test-master/references/integration-testing.md +0 -120
  798. package/skills/test-master/references/performance-testing.md +0 -118
  799. package/skills/test-master/references/qa-methodology.md +0 -247
  800. package/skills/test-master/references/security-testing.md +0 -127
  801. package/skills/test-master/references/tdd-iron-laws.md +0 -174
  802. package/skills/test-master/references/test-reports.md +0 -104
  803. package/skills/test-master/references/testing-anti-patterns.md +0 -231
  804. package/skills/test-master/references/unit-testing.md +0 -113
  805. package/skills/typescript-pro/SKILL.md +0 -145
  806. package/skills/typescript-pro/references/advanced-types.md +0 -259
  807. package/skills/typescript-pro/references/configuration.md +0 -445
  808. package/skills/typescript-pro/references/patterns.md +0 -484
  809. package/skills/typescript-pro/references/type-guards.md +0 -352
  810. package/skills/typescript-pro/references/utility-types.md +0 -329
  811. package/skills/using-git-worktrees/SKILL.md +0 -218
  812. package/skills/verification-before-completion/SKILL.md +0 -139
  813. package/skills/vue-expert/SKILL.md +0 -98
  814. package/skills/vue-expert/references/build-tooling.md +0 -480
  815. package/skills/vue-expert/references/components.md +0 -448
  816. package/skills/vue-expert/references/composition-api.md +0 -299
  817. package/skills/vue-expert/references/mobile-hybrid.md +0 -636
  818. package/skills/vue-expert/references/nuxt.md +0 -669
  819. package/skills/vue-expert/references/state-management.md +0 -449
  820. package/skills/vue-expert/references/typescript.md +0 -584
  821. package/skills/vue-expert-js/SKILL.md +0 -167
  822. package/skills/vue-expert-js/references/component-architecture.md +0 -219
  823. package/skills/vue-expert-js/references/composables-patterns.md +0 -183
  824. package/skills/vue-expert-js/references/jsdoc-typing.md +0 -535
  825. package/skills/vue-expert-js/references/state-management.md +0 -249
  826. package/skills/vue-expert-js/references/testing-patterns.md +0 -237
  827. package/skills/websocket-engineer/SKILL.md +0 -168
  828. package/skills/websocket-engineer/references/alternatives.md +0 -391
  829. package/skills/websocket-engineer/references/patterns.md +0 -400
  830. package/skills/websocket-engineer/references/protocol.md +0 -195
  831. package/skills/websocket-engineer/references/scaling.md +0 -333
  832. package/skills/websocket-engineer/references/security.md +0 -474
  833. package/skills/writing-plans/SKILL.md +0 -151
  834. package/skills/writing-plans/plan-document-reviewer-prompt.md +0 -49
  835. package/skills/writing-skills/SKILL.md +0 -655
  836. package/skills/writing-skills/anthropic-best-practices.md +0 -1150
  837. package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +0 -189
  838. package/skills/writing-skills/graphviz-conventions.dot +0 -172
  839. package/skills/writing-skills/persuasion-principles.md +0 -187
  840. package/skills/writing-skills/render-graphs.js +0 -168
  841. package/skills/writing-skills/testing-skills-with-subagents.md +0 -384
  842. /package/skills/{design-commands → frontend/design-commands}/design.md +0 -0
  843. /package/skills/{design-commands → frontend/design-commands}/handoff.md +0 -0
  844. /package/skills/{design-commands → frontend/design-commands}/prototype.md +0 -0
  845. /package/skills/{design-commands → frontend/design-commands}/spec.md +0 -0
  846. /package/skills/{design-commands → frontend/design-commands}/style.md +0 -0
  847. /package/skills/{senior-frontend → frontend/senior-frontend}/SKILL.md +0 -0
  848. /package/skills/{senior-frontend → frontend/senior-frontend}/references/frontend_best_practices.md +0 -0
  849. /package/skills/{senior-frontend → frontend/senior-frontend}/references/nextjs_optimization_guide.md +0 -0
  850. /package/skills/{senior-frontend → frontend/senior-frontend}/references/react_patterns.md +0 -0
  851. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/bundle_analyzer.py +0 -0
  852. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/component_generator.py +0 -0
  853. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/frontend_scaffolder.py +0 -0
  854. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/SKILL.md +0 -0
  855. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/charts.csv +0 -0
  856. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/colors.csv +0 -0
  857. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/icons.csv +0 -0
  858. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/landing.csv +0 -0
  859. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/products.csv +0 -0
  860. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/react-performance.csv +0 -0
  861. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/astro.csv +0 -0
  862. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/flutter.csv +0 -0
  863. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/html-tailwind.csv +0 -0
  864. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/jetpack-compose.csv +0 -0
  865. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nextjs.csv +0 -0
  866. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxt-ui.csv +0 -0
  867. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxtjs.csv +0 -0
  868. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react-native.csv +0 -0
  869. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react.csv +0 -0
  870. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/shadcn.csv +0 -0
  871. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/svelte.csv +0 -0
  872. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/swiftui.csv +0 -0
  873. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/vue.csv +0 -0
  874. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/styles.csv +0 -0
  875. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/typography.csv +0 -0
  876. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ui-reasoning.csv +0 -0
  877. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ux-guidelines.csv +0 -0
  878. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/web-interface.csv +0 -0
  879. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/core.py +0 -0
  880. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/design_system.py +0 -0
  881. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/search.py +0 -0
  882. /package/skills/{competitive-analysis → product/competitive-analysis}/SKILL.md +0 -0
  883. /package/skills/{meeting-notes → product/meeting-notes}/SKILL.md +0 -0
  884. /package/skills/{prd-template → product/prd-template}/SKILL.md +0 -0
  885. /package/skills/{stakeholder-update → product/stakeholder-update}/SKILL.md +0 -0
  886. /package/skills/{user-research-synthesis → product/user-research-synthesis}/SKILL.md +0 -0
  887. /package/skills/{senior-qa → quality/senior-qa}/README.md +0 -0
  888. /package/skills/{senior-qa → quality/senior-qa}/SKILL.md +0 -0
  889. /package/skills/{senior-qa → quality/senior-qa}/references/qa_best_practices.md +0 -0
  890. /package/skills/{senior-qa → quality/senior-qa}/references/test_automation_patterns.md +0 -0
  891. /package/skills/{senior-qa → quality/senior-qa}/references/testing_strategies.md +0 -0
  892. /package/skills/{senior-qa → quality/senior-qa}/scripts/coverage_analyzer.py +0 -0
  893. /package/skills/{senior-qa → quality/senior-qa}/scripts/e2e_test_scaffolder.py +0 -0
  894. /package/skills/{senior-qa → quality/senior-qa}/scripts/test_suite_generator.py +0 -0
  895. /package/skills/{tdd-guide → quality/tdd-guide}/HOW_TO_USE.md +0 -0
  896. /package/skills/{tdd-guide → quality/tdd-guide}/README.md +0 -0
  897. /package/skills/{tdd-guide → quality/tdd-guide}/SKILL.md +0 -0
  898. /package/skills/{tdd-guide → quality/tdd-guide}/assets/expected_output.json +0 -0
  899. /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_python.json +0 -0
  900. /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_typescript.json +0 -0
  901. /package/skills/{tdd-guide → quality/tdd-guide}/references/ci-integration.md +0 -0
  902. /package/skills/{tdd-guide → quality/tdd-guide}/references/framework-guide.md +0 -0
  903. /package/skills/{tdd-guide → quality/tdd-guide}/references/tdd-best-practices.md +0 -0
  904. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/coverage_analyzer.py +0 -0
  905. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/fixture_generator.py +0 -0
  906. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/format_detector.py +0 -0
  907. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/framework_adapter.py +0 -0
  908. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/metrics_calculator.py +0 -0
  909. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/output_formatter.py +0 -0
  910. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/tdd_workflow.py +0 -0
  911. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/test_generator.py +0 -0
  912. /package/skills/{brainstorming → workflow/brainstorming}/scripts/frame-template.html +0 -0
  913. /package/skills/{brainstorming → workflow/brainstorming}/scripts/server.cjs +0 -0
@@ -1,978 +0,0 @@
1
- # Model Validation
2
-
3
- ---
4
-
5
- ## Overview
6
-
7
- Model validation ensures models meet quality standards before production deployment. It encompasses offline evaluation, online testing, and continuous monitoring to catch performance degradation, data drift, and model failures.
8
-
9
- ## When to Use This Reference
10
-
11
- - Implementing offline model evaluation strategies
12
- - Setting up A/B testing frameworks
13
- - Building shadow deployment pipelines
14
- - Creating model comparison workflows
15
- - Implementing continuous model monitoring
16
-
17
- ## When NOT to Use
18
-
19
- - Quick model prototyping
20
- - One-off analysis without deployment
21
- - Models with no production requirements
22
-
23
- ---
24
-
25
- ## Offline Evaluation
26
-
27
- ### Comprehensive Evaluation Suite
28
-
29
- ```python
30
- from dataclasses import dataclass
31
- from typing import Optional
32
- import numpy as np
33
- import pandas as pd
34
- from sklearn.metrics import (
35
- accuracy_score, precision_score, recall_score, f1_score,
36
- roc_auc_score, average_precision_score, confusion_matrix,
37
- mean_squared_error, mean_absolute_error, r2_score,
38
- )
39
-
40
- @dataclass
41
- class ClassificationMetrics:
42
- """Classification model metrics."""
43
- accuracy: float
44
- precision: float
45
- recall: float
46
- f1: float
47
- roc_auc: Optional[float]
48
- pr_auc: Optional[float]
49
- confusion_matrix: np.ndarray
50
-
51
- def to_dict(self) -> dict:
52
- return {
53
- "accuracy": self.accuracy,
54
- "precision": self.precision,
55
- "recall": self.recall,
56
- "f1": self.f1,
57
- "roc_auc": self.roc_auc,
58
- "pr_auc": self.pr_auc,
59
- }
60
-
61
- @dataclass
62
- class RegressionMetrics:
63
- """Regression model metrics."""
64
- mse: float
65
- rmse: float
66
- mae: float
67
- r2: float
68
- mape: Optional[float]
69
-
70
- def to_dict(self) -> dict:
71
- return {
72
- "mse": self.mse,
73
- "rmse": self.rmse,
74
- "mae": self.mae,
75
- "r2": self.r2,
76
- "mape": self.mape,
77
- }
78
-
79
- class ModelEvaluator:
80
- """Comprehensive model evaluation."""
81
-
82
- def __init__(self, task_type: str = "classification"):
83
- self.task_type = task_type
84
-
85
- def evaluate_classification(
86
- self,
87
- y_true: np.ndarray,
88
- y_pred: np.ndarray,
89
- y_prob: Optional[np.ndarray] = None,
90
- average: str = "weighted",
91
- ) -> ClassificationMetrics:
92
- """Evaluate classification model."""
93
- roc_auc = None
94
- pr_auc = None
95
-
96
- if y_prob is not None:
97
- if len(np.unique(y_true)) == 2:
98
- # Binary classification
99
- if y_prob.ndim == 2:
100
- y_prob_pos = y_prob[:, 1]
101
- else:
102
- y_prob_pos = y_prob
103
- roc_auc = roc_auc_score(y_true, y_prob_pos)
104
- pr_auc = average_precision_score(y_true, y_prob_pos)
105
- else:
106
- # Multiclass
107
- roc_auc = roc_auc_score(
108
- y_true, y_prob, multi_class="ovr", average=average
109
- )
110
-
111
- return ClassificationMetrics(
112
- accuracy=accuracy_score(y_true, y_pred),
113
- precision=precision_score(y_true, y_pred, average=average, zero_division=0),
114
- recall=recall_score(y_true, y_pred, average=average, zero_division=0),
115
- f1=f1_score(y_true, y_pred, average=average, zero_division=0),
116
- roc_auc=roc_auc,
117
- pr_auc=pr_auc,
118
- confusion_matrix=confusion_matrix(y_true, y_pred),
119
- )
120
-
121
- def evaluate_regression(
122
- self,
123
- y_true: np.ndarray,
124
- y_pred: np.ndarray,
125
- ) -> RegressionMetrics:
126
- """Evaluate regression model."""
127
- mse = mean_squared_error(y_true, y_pred)
128
-
129
- # MAPE (handle zero values)
130
- mask = y_true != 0
131
- if mask.any():
132
- mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
133
- else:
134
- mape = None
135
-
136
- return RegressionMetrics(
137
- mse=mse,
138
- rmse=np.sqrt(mse),
139
- mae=mean_absolute_error(y_true, y_pred),
140
- r2=r2_score(y_true, y_pred),
141
- mape=mape,
142
- )
143
-
144
- def evaluate_by_segment(
145
- self,
146
- y_true: np.ndarray,
147
- y_pred: np.ndarray,
148
- segments: np.ndarray,
149
- y_prob: Optional[np.ndarray] = None,
150
- ) -> dict:
151
- """Evaluate model performance by segment."""
152
- results = {}
153
-
154
- for segment in np.unique(segments):
155
- mask = segments == segment
156
-
157
- if self.task_type == "classification":
158
- segment_prob = y_prob[mask] if y_prob is not None else None
159
- metrics = self.evaluate_classification(
160
- y_true[mask], y_pred[mask], segment_prob
161
- )
162
- else:
163
- metrics = self.evaluate_regression(y_true[mask], y_pred[mask])
164
-
165
- results[segment] = metrics.to_dict()
166
-
167
- return results
168
- ```
169
-
170
- ### Cross-Validation Framework
171
-
172
- ```python
173
- from sklearn.model_selection import (
174
- KFold, StratifiedKFold, TimeSeriesSplit, cross_val_score
175
- )
176
- import numpy as np
177
- from typing import Callable
178
-
179
- class CrossValidator:
180
- """Cross-validation framework for model evaluation."""
181
-
182
- def __init__(
183
- self,
184
- n_splits: int = 5,
185
- shuffle: bool = True,
186
- random_state: int = 42,
187
- ):
188
- self.n_splits = n_splits
189
- self.shuffle = shuffle
190
- self.random_state = random_state
191
-
192
- def validate_classification(
193
- self,
194
- model,
195
- X: np.ndarray,
196
- y: np.ndarray,
197
- stratified: bool = True,
198
- ) -> dict:
199
- """Run stratified k-fold cross-validation for classification."""
200
- if stratified:
201
- cv = StratifiedKFold(
202
- n_splits=self.n_splits,
203
- shuffle=self.shuffle,
204
- random_state=self.random_state,
205
- )
206
- else:
207
- cv = KFold(
208
- n_splits=self.n_splits,
209
- shuffle=self.shuffle,
210
- random_state=self.random_state,
211
- )
212
-
213
- evaluator = ModelEvaluator("classification")
214
- fold_metrics = []
215
-
216
- for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
217
- X_train, X_val = X[train_idx], X[val_idx]
218
- y_train, y_val = y[train_idx], y[val_idx]
219
-
220
- # Clone and train model
221
- from sklearn.base import clone
222
- fold_model = clone(model)
223
- fold_model.fit(X_train, y_train)
224
-
225
- y_pred = fold_model.predict(X_val)
226
- y_prob = None
227
- if hasattr(fold_model, "predict_proba"):
228
- y_prob = fold_model.predict_proba(X_val)
229
-
230
- metrics = evaluator.evaluate_classification(y_val, y_pred, y_prob)
231
- fold_metrics.append(metrics.to_dict())
232
-
233
- return self._aggregate_cv_results(fold_metrics)
234
-
235
- def validate_time_series(
236
- self,
237
- model,
238
- X: np.ndarray,
239
- y: np.ndarray,
240
- gap: int = 0,
241
- ) -> dict:
242
- """Run time series cross-validation."""
243
- cv = TimeSeriesSplit(n_splits=self.n_splits, gap=gap)
244
- evaluator = ModelEvaluator("regression")
245
- fold_metrics = []
246
-
247
- for train_idx, val_idx in cv.split(X):
248
- X_train, X_val = X[train_idx], X[val_idx]
249
- y_train, y_val = y[train_idx], y[val_idx]
250
-
251
- from sklearn.base import clone
252
- fold_model = clone(model)
253
- fold_model.fit(X_train, y_train)
254
-
255
- y_pred = fold_model.predict(X_val)
256
- metrics = evaluator.evaluate_regression(y_val, y_pred)
257
- fold_metrics.append(metrics.to_dict())
258
-
259
- return self._aggregate_cv_results(fold_metrics)
260
-
261
- def _aggregate_cv_results(self, fold_metrics: list[dict]) -> dict:
262
- """Aggregate metrics across folds."""
263
- keys = fold_metrics[0].keys()
264
- aggregated = {}
265
-
266
- for key in keys:
267
- values = [m[key] for m in fold_metrics if m[key] is not None]
268
- if values:
269
- aggregated[key] = {
270
- "mean": np.mean(values),
271
- "std": np.std(values),
272
- "min": np.min(values),
273
- "max": np.max(values),
274
- "values": values,
275
- }
276
-
277
- return aggregated
278
- ```
279
-
280
- ---
281
-
282
- ## Model Comparison
283
-
284
- ### Statistical Comparison
285
-
286
- ```python
287
- from scipy import stats
288
- import numpy as np
289
- from dataclasses import dataclass
290
-
291
- @dataclass
292
- class ComparisonResult:
293
- """Model comparison statistical result."""
294
- model_a_mean: float
295
- model_b_mean: float
296
- difference: float
297
- p_value: float
298
- significant: bool
299
- confidence_interval: tuple[float, float]
300
- test_used: str
301
-
302
- class ModelComparator:
303
- """Statistical comparison of model performance."""
304
-
305
- def __init__(self, significance_level: float = 0.05):
306
- self.significance_level = significance_level
307
-
308
- def paired_t_test(
309
- self,
310
- scores_a: np.ndarray,
311
- scores_b: np.ndarray,
312
- ) -> ComparisonResult:
313
- """Paired t-test for CV score comparison."""
314
- statistic, p_value = stats.ttest_rel(scores_a, scores_b)
315
-
316
- differences = scores_a - scores_b
317
- mean_diff = np.mean(differences)
318
- std_diff = np.std(differences, ddof=1)
319
- n = len(differences)
320
-
321
- # 95% confidence interval
322
- t_critical = stats.t.ppf(1 - self.significance_level / 2, n - 1)
323
- margin = t_critical * std_diff / np.sqrt(n)
324
- ci = (mean_diff - margin, mean_diff + margin)
325
-
326
- return ComparisonResult(
327
- model_a_mean=np.mean(scores_a),
328
- model_b_mean=np.mean(scores_b),
329
- difference=mean_diff,
330
- p_value=p_value,
331
- significant=p_value < self.significance_level,
332
- confidence_interval=ci,
333
- test_used="paired_t_test",
334
- )
335
-
336
- def wilcoxon_test(
337
- self,
338
- scores_a: np.ndarray,
339
- scores_b: np.ndarray,
340
- ) -> ComparisonResult:
341
- """Wilcoxon signed-rank test (non-parametric)."""
342
- statistic, p_value = stats.wilcoxon(scores_a, scores_b)
343
-
344
- differences = scores_a - scores_b
345
- mean_diff = np.mean(differences)
346
-
347
- # Bootstrap confidence interval
348
- ci = self._bootstrap_ci(differences)
349
-
350
- return ComparisonResult(
351
- model_a_mean=np.mean(scores_a),
352
- model_b_mean=np.mean(scores_b),
353
- difference=mean_diff,
354
- p_value=p_value,
355
- significant=p_value < self.significance_level,
356
- confidence_interval=ci,
357
- test_used="wilcoxon",
358
- )
359
-
360
- def mcnemar_test(
361
- self,
362
- y_true: np.ndarray,
363
- pred_a: np.ndarray,
364
- pred_b: np.ndarray,
365
- ) -> ComparisonResult:
366
- """McNemar's test for classifier comparison."""
367
- # Build contingency table
368
- correct_a = (pred_a == y_true)
369
- correct_b = (pred_b == y_true)
370
-
371
- # b: A correct, B wrong; c: A wrong, B correct
372
- b = np.sum(correct_a & ~correct_b)
373
- c = np.sum(~correct_a & correct_b)
374
-
375
- if b + c < 25:
376
- # Use exact binomial test for small samples
377
- p_value = stats.binom_test(b, b + c, 0.5)
378
- else:
379
- # Use chi-square approximation
380
- statistic = (abs(b - c) - 1) ** 2 / (b + c)
381
- p_value = 1 - stats.chi2.cdf(statistic, 1)
382
-
383
- acc_a = np.mean(correct_a)
384
- acc_b = np.mean(correct_b)
385
-
386
- return ComparisonResult(
387
- model_a_mean=acc_a,
388
- model_b_mean=acc_b,
389
- difference=acc_a - acc_b,
390
- p_value=p_value,
391
- significant=p_value < self.significance_level,
392
- confidence_interval=(None, None),
393
- test_used="mcnemar",
394
- )
395
-
396
- def _bootstrap_ci(
397
- self,
398
- data: np.ndarray,
399
- n_bootstrap: int = 10000,
400
- alpha: float = 0.05,
401
- ) -> tuple[float, float]:
402
- """Calculate bootstrap confidence interval."""
403
- bootstrapped_means = []
404
-
405
- for _ in range(n_bootstrap):
406
- sample = np.random.choice(data, size=len(data), replace=True)
407
- bootstrapped_means.append(np.mean(sample))
408
-
409
- lower = np.percentile(bootstrapped_means, alpha / 2 * 100)
410
- upper = np.percentile(bootstrapped_means, (1 - alpha / 2) * 100)
411
-
412
- return (lower, upper)
413
- ```
414
-
415
- ---
416
-
417
- ## A/B Testing
418
-
419
- ### Online Experiment Framework
420
-
421
- ```python
422
- from dataclasses import dataclass
423
- from datetime import datetime
424
- from typing import Optional
425
- import numpy as np
426
- import hashlib
427
- import json
428
-
429
- @dataclass
430
- class Experiment:
431
- """A/B test experiment configuration."""
432
- experiment_id: str
433
- name: str
434
- control_model: str
435
- treatment_model: str
436
- traffic_split: float # Fraction to treatment
437
- start_time: datetime
438
- end_time: Optional[datetime]
439
- metrics: list[str]
440
- minimum_sample_size: int
441
- status: str = "active"
442
-
443
- class ABTestRouter:
444
- """Route traffic between control and treatment."""
445
-
446
- def __init__(self, experiment: Experiment):
447
- self.experiment = experiment
448
-
449
- def get_variant(self, user_id: str) -> str:
450
- """Deterministically assign user to variant."""
451
- # Hash user_id for consistent assignment
452
- hash_input = f"{self.experiment.experiment_id}:{user_id}"
453
- hash_value = int(hashlib.md5(hash_input.encode()).hexdigest(), 16)
454
- normalized = hash_value / (2**128)
455
-
456
- if normalized < self.experiment.traffic_split:
457
- return "treatment"
458
- return "control"
459
-
460
- def get_model(self, user_id: str) -> str:
461
- """Get model to use for user."""
462
- variant = self.get_variant(user_id)
463
-
464
- if variant == "treatment":
465
- return self.experiment.treatment_model
466
- return self.experiment.control_model
467
-
468
- class ABTestAnalyzer:
469
- """Analyze A/B test results."""
470
-
471
- def __init__(self, significance_level: float = 0.05):
472
- self.significance_level = significance_level
473
-
474
- def analyze_conversion(
475
- self,
476
- control_conversions: int,
477
- control_total: int,
478
- treatment_conversions: int,
479
- treatment_total: int,
480
- ) -> dict:
481
- """Analyze conversion rate experiment."""
482
- control_rate = control_conversions / control_total
483
- treatment_rate = treatment_conversions / treatment_total
484
-
485
- # Two-proportion z-test
486
- pooled_rate = (control_conversions + treatment_conversions) / (
487
- control_total + treatment_total
488
- )
489
- se = np.sqrt(
490
- pooled_rate * (1 - pooled_rate) * (1/control_total + 1/treatment_total)
491
- )
492
-
493
- z_stat = (treatment_rate - control_rate) / se
494
- p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
495
-
496
- # Relative lift
497
- lift = (treatment_rate - control_rate) / control_rate if control_rate > 0 else 0
498
-
499
- # Confidence interval for difference
500
- se_diff = np.sqrt(
501
- control_rate * (1 - control_rate) / control_total +
502
- treatment_rate * (1 - treatment_rate) / treatment_total
503
- )
504
- z_critical = stats.norm.ppf(1 - self.significance_level / 2)
505
- ci = (
506
- (treatment_rate - control_rate) - z_critical * se_diff,
507
- (treatment_rate - control_rate) + z_critical * se_diff,
508
- )
509
-
510
- return {
511
- "control_rate": control_rate,
512
- "treatment_rate": treatment_rate,
513
- "absolute_difference": treatment_rate - control_rate,
514
- "relative_lift": lift,
515
- "p_value": p_value,
516
- "significant": p_value < self.significance_level,
517
- "confidence_interval": ci,
518
- "control_sample_size": control_total,
519
- "treatment_sample_size": treatment_total,
520
- }
521
-
522
- def analyze_continuous_metric(
523
- self,
524
- control_values: np.ndarray,
525
- treatment_values: np.ndarray,
526
- ) -> dict:
527
- """Analyze continuous metric (e.g., revenue, time)."""
528
- control_mean = np.mean(control_values)
529
- treatment_mean = np.mean(treatment_values)
530
-
531
- # Welch's t-test (unequal variances)
532
- statistic, p_value = stats.ttest_ind(
533
- treatment_values, control_values, equal_var=False
534
- )
535
-
536
- lift = (treatment_mean - control_mean) / control_mean if control_mean > 0 else 0
537
-
538
- # Confidence interval
539
- se_diff = np.sqrt(
540
- np.var(control_values) / len(control_values) +
541
- np.var(treatment_values) / len(treatment_values)
542
- )
543
- t_critical = stats.t.ppf(
544
- 1 - self.significance_level / 2,
545
- min(len(control_values), len(treatment_values)) - 1
546
- )
547
- ci = (
548
- (treatment_mean - control_mean) - t_critical * se_diff,
549
- (treatment_mean - control_mean) + t_critical * se_diff,
550
- )
551
-
552
- return {
553
- "control_mean": control_mean,
554
- "treatment_mean": treatment_mean,
555
- "absolute_difference": treatment_mean - control_mean,
556
- "relative_lift": lift,
557
- "p_value": p_value,
558
- "significant": p_value < self.significance_level,
559
- "confidence_interval": ci,
560
- "control_sample_size": len(control_values),
561
- "treatment_sample_size": len(treatment_values),
562
- }
563
-
564
- def calculate_sample_size(
565
- self,
566
- baseline_rate: float,
567
- minimum_detectable_effect: float,
568
- power: float = 0.8,
569
- ) -> int:
570
- """Calculate required sample size per variant."""
571
- alpha = self.significance_level
572
- z_alpha = stats.norm.ppf(1 - alpha / 2)
573
- z_beta = stats.norm.ppf(power)
574
-
575
- p1 = baseline_rate
576
- p2 = baseline_rate * (1 + minimum_detectable_effect)
577
-
578
- p_bar = (p1 + p2) / 2
579
-
580
- n = (
581
- (z_alpha * np.sqrt(2 * p_bar * (1 - p_bar)) +
582
- z_beta * np.sqrt(p1 * (1 - p1) + p2 * (1 - p2))) ** 2 /
583
- (p2 - p1) ** 2
584
- )
585
-
586
- return int(np.ceil(n))
587
- ```
588
-
589
- ---
590
-
591
- ## Shadow Deployment
592
-
593
- ### Shadow Mode Pipeline
594
-
595
- ```python
596
- from dataclasses import dataclass
597
- from datetime import datetime
598
- from typing import Any, Optional
599
- import logging
600
- import json
601
-
602
- logger = logging.getLogger(__name__)
603
-
604
- @dataclass
605
- class PredictionComparison:
606
- """Comparison of production and shadow predictions."""
607
- request_id: str
608
- timestamp: datetime
609
- production_prediction: Any
610
- shadow_prediction: Any
611
- production_latency_ms: float
612
- shadow_latency_ms: float
613
- agreement: bool
614
- features: Optional[dict] = None
615
-
616
- class ShadowDeployment:
617
- """Shadow deployment for model validation."""
618
-
619
- def __init__(
620
- self,
621
- production_model,
622
- shadow_model,
623
- log_path: str = "/var/log/shadow_predictions.jsonl",
624
- ):
625
- self.production_model = production_model
626
- self.shadow_model = shadow_model
627
- self.log_path = log_path
628
- self.comparisons: list[PredictionComparison] = []
629
-
630
- def predict(
631
- self,
632
- features: dict,
633
- request_id: str = None,
634
- ) -> Any:
635
- """Get production prediction, run shadow in parallel."""
636
- import time
637
- import uuid
638
- import concurrent.futures
639
-
640
- request_id = request_id or str(uuid.uuid4())
641
-
642
- # Production prediction (synchronous, used for response)
643
- prod_start = time.time()
644
- production_pred = self.production_model.predict(features)
645
- prod_latency = (time.time() - prod_start) * 1000
646
-
647
- # Shadow prediction (async, logged but not returned)
648
- def run_shadow():
649
- shadow_start = time.time()
650
- shadow_pred = self.shadow_model.predict(features)
651
- shadow_latency = (time.time() - shadow_start) * 1000
652
- return shadow_pred, shadow_latency
653
-
654
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
655
- future = executor.submit(run_shadow)
656
-
657
- try:
658
- shadow_pred, shadow_latency = future.result(timeout=5.0)
659
-
660
- comparison = PredictionComparison(
661
- request_id=request_id,
662
- timestamp=datetime.utcnow(),
663
- production_prediction=production_pred,
664
- shadow_prediction=shadow_pred,
665
- production_latency_ms=prod_latency,
666
- shadow_latency_ms=shadow_latency,
667
- agreement=self._check_agreement(production_pred, shadow_pred),
668
- features=features,
669
- )
670
-
671
- self._log_comparison(comparison)
672
-
673
- except concurrent.futures.TimeoutError:
674
- logger.warning(f"Shadow prediction timed out for {request_id}")
675
-
676
- return production_pred
677
-
678
- def _check_agreement(self, prod_pred: Any, shadow_pred: Any) -> bool:
679
- """Check if predictions agree."""
680
- if isinstance(prod_pred, (list, np.ndarray)):
681
- return np.allclose(prod_pred, shadow_pred, rtol=1e-3)
682
- return prod_pred == shadow_pred
683
-
684
- def _log_comparison(self, comparison: PredictionComparison) -> None:
685
- """Log comparison to file."""
686
- log_entry = {
687
- "request_id": comparison.request_id,
688
- "timestamp": comparison.timestamp.isoformat(),
689
- "production_prediction": str(comparison.production_prediction),
690
- "shadow_prediction": str(comparison.shadow_prediction),
691
- "production_latency_ms": comparison.production_latency_ms,
692
- "shadow_latency_ms": comparison.shadow_latency_ms,
693
- "agreement": comparison.agreement,
694
- }
695
-
696
- with open(self.log_path, "a") as f:
697
- f.write(json.dumps(log_entry) + "\n")
698
-
699
- self.comparisons.append(comparison)
700
-
701
- def analyze_shadow_performance(self) -> dict:
702
- """Analyze shadow model performance."""
703
- if not self.comparisons:
704
- return {}
705
-
706
- agreements = [c.agreement for c in self.comparisons]
707
- prod_latencies = [c.production_latency_ms for c in self.comparisons]
708
- shadow_latencies = [c.shadow_latency_ms for c in self.comparisons]
709
-
710
- return {
711
- "total_comparisons": len(self.comparisons),
712
- "agreement_rate": np.mean(agreements),
713
- "production_latency_p50": np.percentile(prod_latencies, 50),
714
- "production_latency_p99": np.percentile(prod_latencies, 99),
715
- "shadow_latency_p50": np.percentile(shadow_latencies, 50),
716
- "shadow_latency_p99": np.percentile(shadow_latencies, 99),
717
- "latency_difference_mean": np.mean(
718
- [s - p for s, p in zip(shadow_latencies, prod_latencies)]
719
- ),
720
- }
721
- ```
722
-
723
- ---
724
-
725
- ## Validation Pipeline Integration
726
-
727
- ### Complete Validation Workflow
728
-
729
- ```python
730
- from enum import Enum
731
- from dataclasses import dataclass
732
- from typing import Optional
733
-
734
- class ValidationStatus(Enum):
735
- PASSED = "passed"
736
- FAILED = "failed"
737
- WARNING = "warning"
738
-
739
- @dataclass
740
- class ValidationResult:
741
- """Result of a validation check."""
742
- check_name: str
743
- status: ValidationStatus
744
- message: str
745
- details: Optional[dict] = None
746
-
747
- class ModelValidator:
748
- """Complete model validation workflow."""
749
-
750
- def __init__(
751
- self,
752
- accuracy_threshold: float = 0.8,
753
- latency_threshold_ms: float = 100,
754
- drift_threshold: float = 0.2,
755
- ):
756
- self.accuracy_threshold = accuracy_threshold
757
- self.latency_threshold_ms = latency_threshold_ms
758
- self.drift_threshold = drift_threshold
759
- self.results: list[ValidationResult] = []
760
-
761
- def validate_performance(
762
- self,
763
- y_true: np.ndarray,
764
- y_pred: np.ndarray,
765
- ) -> ValidationResult:
766
- """Validate model performance metrics."""
767
- evaluator = ModelEvaluator("classification")
768
- metrics = evaluator.evaluate_classification(y_true, y_pred)
769
-
770
- if metrics.accuracy >= self.accuracy_threshold:
771
- status = ValidationStatus.PASSED
772
- message = f"Accuracy {metrics.accuracy:.4f} meets threshold"
773
- else:
774
- status = ValidationStatus.FAILED
775
- message = f"Accuracy {metrics.accuracy:.4f} below threshold {self.accuracy_threshold}"
776
-
777
- result = ValidationResult(
778
- check_name="performance",
779
- status=status,
780
- message=message,
781
- details=metrics.to_dict(),
782
- )
783
- self.results.append(result)
784
- return result
785
-
786
- def validate_latency(
787
- self,
788
- model,
789
- sample_input: np.ndarray,
790
- n_iterations: int = 100,
791
- ) -> ValidationResult:
792
- """Validate inference latency."""
793
- import time
794
-
795
- latencies = []
796
- for _ in range(n_iterations):
797
- start = time.time()
798
- model.predict(sample_input)
799
- latencies.append((time.time() - start) * 1000)
800
-
801
- p50 = np.percentile(latencies, 50)
802
- p99 = np.percentile(latencies, 99)
803
-
804
- if p99 <= self.latency_threshold_ms:
805
- status = ValidationStatus.PASSED
806
- message = f"P99 latency {p99:.2f}ms meets threshold"
807
- elif p50 <= self.latency_threshold_ms:
808
- status = ValidationStatus.WARNING
809
- message = f"P50 OK but P99 {p99:.2f}ms exceeds threshold"
810
- else:
811
- status = ValidationStatus.FAILED
812
- message = f"P99 latency {p99:.2f}ms exceeds threshold"
813
-
814
- result = ValidationResult(
815
- check_name="latency",
816
- status=status,
817
- message=message,
818
- details={"p50_ms": p50, "p99_ms": p99, "mean_ms": np.mean(latencies)},
819
- )
820
- self.results.append(result)
821
- return result
822
-
823
- def validate_data_compatibility(
824
- self,
825
- model,
826
- expected_features: list[str],
827
- sample_data: pd.DataFrame,
828
- ) -> ValidationResult:
829
- """Validate model accepts expected input format."""
830
- missing_features = set(expected_features) - set(sample_data.columns)
831
- extra_features = set(sample_data.columns) - set(expected_features)
832
-
833
- if missing_features:
834
- status = ValidationStatus.FAILED
835
- message = f"Missing features: {missing_features}"
836
- elif extra_features:
837
- status = ValidationStatus.WARNING
838
- message = f"Extra features will be ignored: {extra_features}"
839
- else:
840
- status = ValidationStatus.PASSED
841
- message = "All expected features present"
842
-
843
- # Try inference
844
- try:
845
- model.predict(sample_data[expected_features].head(1))
846
- except Exception as e:
847
- status = ValidationStatus.FAILED
848
- message = f"Inference failed: {str(e)}"
849
-
850
- result = ValidationResult(
851
- check_name="data_compatibility",
852
- status=status,
853
- message=message,
854
- details={
855
- "missing_features": list(missing_features),
856
- "extra_features": list(extra_features),
857
- },
858
- )
859
- self.results.append(result)
860
- return result
861
-
862
- def validate_vs_baseline(
863
- self,
864
- y_true: np.ndarray,
865
- new_pred: np.ndarray,
866
- baseline_pred: np.ndarray,
867
- ) -> ValidationResult:
868
- """Validate new model vs baseline."""
869
- comparator = ModelComparator()
870
- comparison = comparator.mcnemar_test(y_true, new_pred, baseline_pred)
871
-
872
- new_acc = accuracy_score(y_true, new_pred)
873
- baseline_acc = accuracy_score(y_true, baseline_pred)
874
-
875
- if new_acc >= baseline_acc:
876
- if comparison.significant:
877
- status = ValidationStatus.PASSED
878
- message = f"Significant improvement: {new_acc:.4f} vs {baseline_acc:.4f}"
879
- else:
880
- status = ValidationStatus.WARNING
881
- message = f"Improvement not significant: {new_acc:.4f} vs {baseline_acc:.4f}"
882
- else:
883
- if comparison.significant:
884
- status = ValidationStatus.FAILED
885
- message = f"Significant regression: {new_acc:.4f} vs {baseline_acc:.4f}"
886
- else:
887
- status = ValidationStatus.WARNING
888
- message = f"Minor regression: {new_acc:.4f} vs {baseline_acc:.4f}"
889
-
890
- result = ValidationResult(
891
- check_name="baseline_comparison",
892
- status=status,
893
- message=message,
894
- details={
895
- "new_accuracy": new_acc,
896
- "baseline_accuracy": baseline_acc,
897
- "p_value": comparison.p_value,
898
- },
899
- )
900
- self.results.append(result)
901
- return result
902
-
903
- def get_summary(self) -> dict:
904
- """Get validation summary."""
905
- passed = sum(1 for r in self.results if r.status == ValidationStatus.PASSED)
906
- warnings = sum(1 for r in self.results if r.status == ValidationStatus.WARNING)
907
- failed = sum(1 for r in self.results if r.status == ValidationStatus.FAILED)
908
-
909
- overall_status = (
910
- ValidationStatus.FAILED if failed > 0
911
- else ValidationStatus.WARNING if warnings > 0
912
- else ValidationStatus.PASSED
913
- )
914
-
915
- return {
916
- "overall_status": overall_status.value,
917
- "passed": passed,
918
- "warnings": warnings,
919
- "failed": failed,
920
- "results": [
921
- {
922
- "check": r.check_name,
923
- "status": r.status.value,
924
- "message": r.message,
925
- }
926
- for r in self.results
927
- ],
928
- }
929
- ```
930
-
931
- ---
932
-
933
- ## Best Practices
934
-
935
- ### Validation Checklist
936
-
937
- ```python
938
- VALIDATION_CHECKLIST = {
939
- "offline": [
940
- "Accuracy/performance metrics meet threshold",
941
- "Cross-validation shows consistent performance",
942
- "Model outperforms or matches baseline",
943
- "Metrics stable across data segments",
944
- ],
945
- "pre_deployment": [
946
- "Inference latency within SLA",
947
- "Memory usage acceptable",
948
- "Input/output schema validated",
949
- "Model serialization/loading works",
950
- ],
951
- "shadow": [
952
- "Shadow predictions logged successfully",
953
- "Agreement rate with production acceptable",
954
- "No latency regression",
955
- "Error rate within bounds",
956
- ],
957
- "ab_test": [
958
- "Sufficient sample size reached",
959
- "Statistical significance achieved",
960
- "No negative impact on guardrail metrics",
961
- "Business metrics improved",
962
- ],
963
- }
964
- ```
965
-
966
- ---
967
-
968
- ## Related References
969
-
970
- - `training-pipelines.md` - Model training before validation
971
- - `experiment-tracking.md` - Logging validation results
972
- - `pipeline-orchestration.md` - Automated validation workflows
973
- - `feature-engineering.md` - Feature validation
974
-
975
- ## Cross-Reference Skills
976
-
977
- - **Data Engineer** - Data quality validation
978
- - **DevOps Engineer** - Deployment pipeline integration