aigroup-workflow 2.1.2 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex/AGENTS.md +1 -1
- package/CLAUDE.md +1 -4
- package/README.md +333 -333
- package/cli/commands/init.mjs +20 -6
- package/cli/utils/scaffold.mjs +39 -9
- package/docs/red-flags.md +1 -1
- package/docs/rules/entropy.md +1 -1
- package/docs/rules/performance.md +1 -1
- package/docs/workflow-pipeline.md +8 -6
- package/manifests/install-modules.json +223 -133
- package/package.json +39 -39
- package/scripts/hooks/checks/orchestration-artifacts.cjs +28 -23
- package/scripts/hooks/checks/workflow-state.cjs +4 -5
- package/scripts/orchestration/lib/orchestrator.cjs +353 -92
- package/scripts/orchestration/lib/validate.cjs +145 -0
- package/scripts/orchestration/session.cjs +100 -33
- package/skills/ai-ml/fine-tuning-expert/SKILL.md +162 -0
- package/skills/ai-ml/fine-tuning-expert/references/dataset-preparation.md +540 -0
- package/skills/ai-ml/fine-tuning-expert/references/deployment-optimization.md +673 -0
- package/skills/ai-ml/fine-tuning-expert/references/evaluation-metrics.md +597 -0
- package/skills/ai-ml/fine-tuning-expert/references/hyperparameter-tuning.md +565 -0
- package/skills/ai-ml/fine-tuning-expert/references/lora-peft.md +347 -0
- package/skills/ai-ml/ml-pipeline/SKILL.md +159 -0
- package/skills/ai-ml/ml-pipeline/references/experiment-tracking.md +833 -0
- package/skills/ai-ml/ml-pipeline/references/feature-engineering.md +631 -0
- package/skills/ai-ml/ml-pipeline/references/model-validation.md +978 -0
- package/skills/ai-ml/ml-pipeline/references/pipeline-orchestration.md +907 -0
- package/skills/ai-ml/ml-pipeline/references/training-pipelines.md +782 -0
- package/skills/ai-ml/rag-architect/SKILL.md +194 -0
- package/skills/ai-ml/rag-architect/references/chunking-strategies.md +878 -0
- package/skills/ai-ml/rag-architect/references/embedding-models.md +561 -0
- package/skills/ai-ml/rag-architect/references/rag-evaluation.md +833 -0
- package/skills/ai-ml/rag-architect/references/retrieval-optimization.md +795 -0
- package/skills/ai-ml/rag-architect/references/vector-databases.md +589 -0
- package/skills/ai-ml/spark-engineer/SKILL.md +148 -0
- package/skills/ai-ml/spark-engineer/references/partitioning-caching.md +543 -0
- package/skills/ai-ml/spark-engineer/references/performance-tuning.md +544 -0
- package/skills/ai-ml/spark-engineer/references/rdd-operations.md +599 -0
- package/skills/ai-ml/spark-engineer/references/spark-sql-dataframes.md +474 -0
- package/skills/ai-ml/spark-engineer/references/streaming-patterns.md +786 -0
- package/skills/backend/api-designer/SKILL.md +217 -0
- package/skills/backend/api-designer/references/error-handling.md +541 -0
- package/skills/backend/api-designer/references/openapi.md +824 -0
- package/skills/backend/api-designer/references/pagination.md +494 -0
- package/skills/backend/api-designer/references/rest-patterns.md +335 -0
- package/skills/backend/api-designer/references/versioning.md +391 -0
- package/skills/backend/architecture-designer/SKILL.md +117 -0
- package/skills/backend/architecture-designer/references/adr-template.md +116 -0
- package/skills/backend/architecture-designer/references/architecture-patterns.md +111 -0
- package/skills/backend/architecture-designer/references/database-selection.md +102 -0
- package/skills/backend/architecture-designer/references/nfr-checklist.md +112 -0
- package/skills/backend/architecture-designer/references/system-design.md +100 -0
- package/skills/backend/code-documenter/SKILL.md +147 -0
- package/skills/backend/code-documenter/references/api-docs-fastapi-django.md +166 -0
- package/skills/backend/code-documenter/references/api-docs-nestjs-express.md +220 -0
- package/skills/backend/code-documenter/references/coverage-reports.md +125 -0
- package/skills/backend/code-documenter/references/documentation-systems.md +333 -0
- package/skills/backend/code-documenter/references/interactive-api-docs.md +531 -0
- package/skills/backend/code-documenter/references/python-docstrings.md +121 -0
- package/skills/backend/code-documenter/references/typescript-jsdoc.md +145 -0
- package/skills/backend/code-documenter/references/user-guides-tutorials.md +530 -0
- package/skills/backend/debugging-wizard/SKILL.md +105 -0
- package/skills/backend/debugging-wizard/references/common-patterns.md +132 -0
- package/skills/backend/debugging-wizard/references/debugging-tools.md +140 -0
- package/skills/backend/debugging-wizard/references/quick-fixes.md +177 -0
- package/skills/backend/debugging-wizard/references/strategies.md +142 -0
- package/skills/backend/debugging-wizard/references/systematic-debugging.md +367 -0
- package/skills/backend/feature-forge/SKILL.md +98 -0
- package/skills/backend/feature-forge/references/acceptance-criteria.md +104 -0
- package/skills/backend/feature-forge/references/ears-syntax.md +99 -0
- package/skills/backend/feature-forge/references/interview-questions.md +150 -0
- package/skills/backend/feature-forge/references/pre-discovery-subagents.md +54 -0
- package/skills/backend/feature-forge/references/specification-template.md +103 -0
- package/skills/backend/fullstack-guardian/SKILL.md +105 -0
- package/skills/backend/fullstack-guardian/references/api-design-standards.md +307 -0
- package/skills/backend/fullstack-guardian/references/architecture-decisions.md +350 -0
- package/skills/backend/fullstack-guardian/references/backend-patterns.md +237 -0
- package/skills/backend/fullstack-guardian/references/common-patterns.md +134 -0
- package/skills/backend/fullstack-guardian/references/deliverables-checklist.md +354 -0
- package/skills/backend/fullstack-guardian/references/design-template.md +91 -0
- package/skills/backend/fullstack-guardian/references/error-handling.md +135 -0
- package/skills/backend/fullstack-guardian/references/frontend-patterns.md +340 -0
- package/skills/backend/fullstack-guardian/references/integration-patterns.md +333 -0
- package/skills/backend/fullstack-guardian/references/security-checklist.md +106 -0
- package/skills/backend/graphql-architect/SKILL.md +146 -0
- package/skills/backend/graphql-architect/references/federation.md +418 -0
- package/skills/backend/graphql-architect/references/migration-from-rest.md +1141 -0
- package/skills/backend/graphql-architect/references/resolvers.md +425 -0
- package/skills/backend/graphql-architect/references/schema-design.md +393 -0
- package/skills/backend/graphql-architect/references/security.md +569 -0
- package/skills/backend/graphql-architect/references/subscriptions.md +510 -0
- package/skills/backend/legacy-modernizer/SKILL.md +137 -0
- package/skills/backend/legacy-modernizer/references/legacy-testing.md +381 -0
- package/skills/backend/legacy-modernizer/references/migration-strategies.md +423 -0
- package/skills/backend/legacy-modernizer/references/refactoring-patterns.md +395 -0
- package/skills/backend/legacy-modernizer/references/strangler-fig-pattern.md +281 -0
- package/skills/backend/legacy-modernizer/references/system-assessment.md +487 -0
- package/skills/backend/microservices-architect/SKILL.md +164 -0
- package/skills/backend/microservices-architect/references/communication.md +499 -0
- package/skills/backend/microservices-architect/references/data.md +721 -0
- package/skills/backend/microservices-architect/references/decomposition.md +344 -0
- package/skills/backend/microservices-architect/references/observability.md +805 -0
- package/skills/backend/microservices-architect/references/patterns.md +603 -0
- package/skills/database/database-optimizer/SKILL.md +147 -0
- package/skills/database/database-optimizer/references/index-strategies.md +331 -0
- package/skills/database/database-optimizer/references/monitoring-analysis.md +501 -0
- package/skills/database/database-optimizer/references/mysql-tuning.md +452 -0
- package/skills/database/database-optimizer/references/postgresql-tuning.md +413 -0
- package/skills/database/database-optimizer/references/query-optimization.md +251 -0
- package/skills/database/postgres-pro/SKILL.md +152 -0
- package/skills/database/postgres-pro/references/extensions.md +404 -0
- package/skills/database/postgres-pro/references/jsonb.md +321 -0
- package/skills/database/postgres-pro/references/maintenance.md +481 -0
- package/skills/database/postgres-pro/references/performance.md +265 -0
- package/skills/database/postgres-pro/references/replication.md +446 -0
- package/skills/database/sql-pro/SKILL.md +129 -0
- package/skills/database/sql-pro/references/database-design.md +402 -0
- package/skills/database/sql-pro/references/dialect-differences.md +419 -0
- package/skills/database/sql-pro/references/optimization.md +384 -0
- package/skills/database/sql-pro/references/query-patterns.md +285 -0
- package/skills/database/sql-pro/references/window-functions.md +328 -0
- package/skills/dotnet/csharp-developer/SKILL.md +125 -0
- package/skills/dotnet/csharp-developer/references/aspnet-core.md +394 -0
- package/skills/dotnet/csharp-developer/references/blazor.md +553 -0
- package/skills/dotnet/csharp-developer/references/entity-framework.md +409 -0
- package/skills/dotnet/csharp-developer/references/modern-csharp.md +248 -0
- package/skills/dotnet/csharp-developer/references/performance.md +498 -0
- package/skills/dotnet/dotnet-core-expert/SKILL.md +138 -0
- package/skills/dotnet/dotnet-core-expert/references/authentication.md +546 -0
- package/skills/dotnet/dotnet-core-expert/references/clean-architecture.md +455 -0
- package/skills/dotnet/dotnet-core-expert/references/cloud-native.md +548 -0
- package/skills/dotnet/dotnet-core-expert/references/entity-framework.md +440 -0
- package/skills/dotnet/dotnet-core-expert/references/minimal-apis.md +319 -0
- package/skills/frontend/angular-architect/SKILL.md +152 -0
- package/skills/frontend/angular-architect/references/components.md +297 -0
- package/skills/frontend/angular-architect/references/ngrx.md +401 -0
- package/skills/frontend/angular-architect/references/routing.md +361 -0
- package/skills/frontend/angular-architect/references/rxjs.md +319 -0
- package/skills/frontend/angular-architect/references/testing.md +405 -0
- package/skills/frontend/flutter-expert/SKILL.md +138 -0
- package/skills/frontend/flutter-expert/references/bloc-state.md +259 -0
- package/skills/frontend/flutter-expert/references/gorouter-navigation.md +119 -0
- package/skills/frontend/flutter-expert/references/performance.md +99 -0
- package/skills/frontend/flutter-expert/references/project-structure.md +118 -0
- package/skills/frontend/flutter-expert/references/riverpod-state.md +130 -0
- package/skills/frontend/flutter-expert/references/widget-patterns.md +123 -0
- package/skills/frontend/nextjs-developer/SKILL.md +143 -0
- package/skills/frontend/nextjs-developer/references/app-router.md +311 -0
- package/skills/frontend/nextjs-developer/references/data-fetching.md +482 -0
- package/skills/frontend/nextjs-developer/references/deployment.md +545 -0
- package/skills/frontend/nextjs-developer/references/server-actions.md +462 -0
- package/skills/frontend/nextjs-developer/references/server-components.md +384 -0
- package/skills/frontend/react-expert/SKILL.md +149 -0
- package/skills/frontend/react-expert/references/hooks-patterns.md +162 -0
- package/skills/frontend/react-expert/references/migration-class-to-modern.md +1119 -0
- package/skills/frontend/react-expert/references/performance.md +168 -0
- package/skills/frontend/react-expert/references/react-19-features.md +174 -0
- package/skills/frontend/react-expert/references/server-components.md +143 -0
- package/skills/frontend/react-expert/references/state-management.md +171 -0
- package/skills/frontend/react-expert/references/testing-react.md +174 -0
- package/skills/frontend/react-native-expert/SKILL.md +185 -0
- package/skills/frontend/react-native-expert/references/expo-router.md +187 -0
- package/skills/frontend/react-native-expert/references/list-optimization.md +204 -0
- package/skills/frontend/react-native-expert/references/platform-handling.md +188 -0
- package/skills/frontend/react-native-expert/references/project-structure.md +171 -0
- package/skills/frontend/react-native-expert/references/storage-hooks.md +173 -0
- package/skills/frontend/vue-expert/SKILL.md +98 -0
- package/skills/frontend/vue-expert/references/build-tooling.md +480 -0
- package/skills/frontend/vue-expert/references/components.md +448 -0
- package/skills/frontend/vue-expert/references/composition-api.md +299 -0
- package/skills/frontend/vue-expert/references/mobile-hybrid.md +636 -0
- package/skills/frontend/vue-expert/references/nuxt.md +669 -0
- package/skills/frontend/vue-expert/references/state-management.md +449 -0
- package/skills/frontend/vue-expert/references/typescript.md +584 -0
- package/skills/frontend/vue-expert-js/SKILL.md +167 -0
- package/skills/frontend/vue-expert-js/references/component-architecture.md +219 -0
- package/skills/frontend/vue-expert-js/references/composables-patterns.md +183 -0
- package/skills/frontend/vue-expert-js/references/jsdoc-typing.md +535 -0
- package/skills/frontend/vue-expert-js/references/state-management.md +249 -0
- package/skills/frontend/vue-expert-js/references/testing-patterns.md +237 -0
- package/skills/go-rust-cpp/cpp-pro/SKILL.md +115 -0
- package/skills/go-rust-cpp/cpp-pro/references/build-tooling.md +440 -0
- package/skills/go-rust-cpp/cpp-pro/references/concurrency.md +437 -0
- package/skills/go-rust-cpp/cpp-pro/references/memory-performance.md +397 -0
- package/skills/go-rust-cpp/cpp-pro/references/modern-cpp.md +304 -0
- package/skills/go-rust-cpp/cpp-pro/references/templates.md +357 -0
- package/skills/go-rust-cpp/golang-pro/SKILL.md +122 -0
- package/skills/go-rust-cpp/golang-pro/references/concurrency.md +329 -0
- package/skills/go-rust-cpp/golang-pro/references/generics.md +442 -0
- package/skills/go-rust-cpp/golang-pro/references/interfaces.md +432 -0
- package/skills/go-rust-cpp/golang-pro/references/project-structure.md +477 -0
- package/skills/go-rust-cpp/golang-pro/references/testing.md +451 -0
- package/skills/go-rust-cpp/rust-engineer/SKILL.md +167 -0
- package/skills/go-rust-cpp/rust-engineer/references/async.md +458 -0
- package/skills/go-rust-cpp/rust-engineer/references/error-handling.md +334 -0
- package/skills/go-rust-cpp/rust-engineer/references/ownership.md +278 -0
- package/skills/go-rust-cpp/rust-engineer/references/testing.md +470 -0
- package/skills/go-rust-cpp/rust-engineer/references/traits.md +413 -0
- package/skills/infra/cli-developer/SKILL.md +113 -0
- package/skills/infra/cli-developer/references/design-patterns.md +221 -0
- package/skills/infra/cli-developer/references/go-cli.md +540 -0
- package/skills/infra/cli-developer/references/node-cli.md +383 -0
- package/skills/infra/cli-developer/references/python-cli.md +422 -0
- package/skills/infra/cli-developer/references/ux-patterns.md +448 -0
- package/skills/infra/cloud-architect/SKILL.md +216 -0
- package/skills/infra/cloud-architect/references/aws.md +394 -0
- package/skills/infra/cloud-architect/references/azure.md +562 -0
- package/skills/infra/cloud-architect/references/cost.md +582 -0
- package/skills/infra/cloud-architect/references/gcp.md +633 -0
- package/skills/infra/cloud-architect/references/multi-cloud.md +483 -0
- package/skills/infra/devops-engineer/SKILL.md +144 -0
- package/skills/infra/devops-engineer/references/deployment-strategies.md +241 -0
- package/skills/infra/devops-engineer/references/docker-patterns.md +113 -0
- package/skills/infra/devops-engineer/references/github-actions.md +139 -0
- package/skills/infra/devops-engineer/references/incident-response.md +331 -0
- package/skills/infra/devops-engineer/references/kubernetes.md +154 -0
- package/skills/infra/devops-engineer/references/platform-engineering.md +417 -0
- package/skills/infra/devops-engineer/references/release-automation.md +527 -0
- package/skills/infra/devops-engineer/references/terraform-iac.md +141 -0
- package/skills/infra/kubernetes-specialist/SKILL.md +241 -0
- package/skills/infra/kubernetes-specialist/references/configuration.md +452 -0
- package/skills/infra/kubernetes-specialist/references/cost-optimization.md +458 -0
- package/skills/infra/kubernetes-specialist/references/custom-operators.md +563 -0
- package/skills/infra/kubernetes-specialist/references/gitops.md +530 -0
- package/skills/infra/kubernetes-specialist/references/helm-charts.md +912 -0
- package/skills/infra/kubernetes-specialist/references/multi-cluster.md +507 -0
- package/skills/infra/kubernetes-specialist/references/networking.md +447 -0
- package/skills/infra/kubernetes-specialist/references/service-mesh.md +459 -0
- package/skills/infra/kubernetes-specialist/references/storage.md +535 -0
- package/skills/infra/kubernetes-specialist/references/troubleshooting.md +414 -0
- package/skills/infra/kubernetes-specialist/references/workloads.md +377 -0
- package/skills/infra/mcp-developer/SKILL.md +143 -0
- package/skills/infra/mcp-developer/references/protocol.md +244 -0
- package/skills/infra/mcp-developer/references/python-sdk.md +367 -0
- package/skills/infra/mcp-developer/references/resources.md +554 -0
- package/skills/infra/mcp-developer/references/tools.md +480 -0
- package/skills/infra/mcp-developer/references/typescript-sdk.md +350 -0
- package/skills/infra/monitoring-expert/SKILL.md +176 -0
- package/skills/infra/monitoring-expert/references/alerting-rules.md +141 -0
- package/skills/infra/monitoring-expert/references/application-profiling.md +331 -0
- package/skills/infra/monitoring-expert/references/capacity-planning.md +344 -0
- package/skills/infra/monitoring-expert/references/dashboards.md +126 -0
- package/skills/infra/monitoring-expert/references/opentelemetry.md +123 -0
- package/skills/infra/monitoring-expert/references/performance-testing.md +269 -0
- package/skills/infra/monitoring-expert/references/prometheus-metrics.md +136 -0
- package/skills/infra/monitoring-expert/references/structured-logging.md +142 -0
- package/skills/infra/sre-engineer/SKILL.md +181 -0
- package/skills/infra/sre-engineer/references/automation-toil.md +492 -0
- package/skills/infra/sre-engineer/references/error-budget-policy.md +334 -0
- package/skills/infra/sre-engineer/references/incident-chaos.md +576 -0
- package/skills/infra/sre-engineer/references/monitoring-alerting.md +424 -0
- package/skills/infra/sre-engineer/references/slo-sli-management.md +238 -0
- package/skills/infra/terraform-engineer/SKILL.md +143 -0
- package/skills/infra/terraform-engineer/references/best-practices.md +583 -0
- package/skills/infra/terraform-engineer/references/module-patterns.md +297 -0
- package/skills/infra/terraform-engineer/references/providers.md +452 -0
- package/skills/infra/terraform-engineer/references/state-management.md +371 -0
- package/skills/infra/terraform-engineer/references/testing.md +486 -0
- package/skills/infra/websocket-engineer/SKILL.md +168 -0
- package/skills/infra/websocket-engineer/references/alternatives.md +391 -0
- package/skills/infra/websocket-engineer/references/patterns.md +400 -0
- package/skills/infra/websocket-engineer/references/protocol.md +195 -0
- package/skills/infra/websocket-engineer/references/scaling.md +333 -0
- package/skills/infra/websocket-engineer/references/security.md +474 -0
- package/skills/java/java-architect/SKILL.md +132 -0
- package/skills/java/java-architect/references/jpa-optimization.md +393 -0
- package/skills/java/java-architect/references/reactive-webflux.md +356 -0
- package/skills/java/java-architect/references/spring-boot-setup.md +269 -0
- package/skills/java/java-architect/references/spring-security.md +445 -0
- package/skills/java/java-architect/references/testing-patterns.md +500 -0
- package/skills/java/kotlin-specialist/SKILL.md +147 -0
- package/skills/java/kotlin-specialist/references/android-compose.md +419 -0
- package/skills/java/kotlin-specialist/references/coroutines-flow.md +276 -0
- package/skills/java/kotlin-specialist/references/dsl-idioms.md +421 -0
- package/skills/java/kotlin-specialist/references/ktor-server.md +426 -0
- package/skills/java/kotlin-specialist/references/multiplatform-kmp.md +380 -0
- package/skills/java/spring-boot-engineer/SKILL.md +195 -0
- package/skills/java/spring-boot-engineer/references/cloud.md +498 -0
- package/skills/java/spring-boot-engineer/references/data.md +381 -0
- package/skills/java/spring-boot-engineer/references/security.md +459 -0
- package/skills/java/spring-boot-engineer/references/testing.md +545 -0
- package/skills/java/spring-boot-engineer/references/web.md +295 -0
- package/skills/javascript/javascript-pro/SKILL.md +132 -0
- package/skills/javascript/javascript-pro/references/async-patterns.md +334 -0
- package/skills/javascript/javascript-pro/references/browser-apis.md +398 -0
- package/skills/javascript/javascript-pro/references/modern-syntax.md +272 -0
- package/skills/javascript/javascript-pro/references/modules.md +357 -0
- package/skills/javascript/javascript-pro/references/node-essentials.md +471 -0
- package/skills/javascript/nestjs-expert/SKILL.md +206 -0
- package/skills/javascript/nestjs-expert/references/authentication.md +166 -0
- package/skills/javascript/nestjs-expert/references/controllers-routing.md +111 -0
- package/skills/javascript/nestjs-expert/references/dtos-validation.md +153 -0
- package/skills/javascript/nestjs-expert/references/migration-from-express.md +1237 -0
- package/skills/javascript/nestjs-expert/references/services-di.md +140 -0
- package/skills/javascript/nestjs-expert/references/testing-patterns.md +186 -0
- package/skills/javascript/typescript-pro/SKILL.md +145 -0
- package/skills/javascript/typescript-pro/references/advanced-types.md +259 -0
- package/skills/javascript/typescript-pro/references/configuration.md +445 -0
- package/skills/javascript/typescript-pro/references/patterns.md +484 -0
- package/skills/javascript/typescript-pro/references/type-guards.md +352 -0
- package/skills/javascript/typescript-pro/references/utility-types.md +329 -0
- package/skills/php/laravel-specialist/SKILL.md +262 -0
- package/skills/php/laravel-specialist/references/eloquent.md +351 -0
- package/skills/php/laravel-specialist/references/livewire.md +512 -0
- package/skills/php/laravel-specialist/references/queues.md +423 -0
- package/skills/php/laravel-specialist/references/routing.md +362 -0
- package/skills/php/laravel-specialist/references/testing.md +522 -0
- package/skills/php/php-pro/SKILL.md +206 -0
- package/skills/php/php-pro/references/async-patterns.md +412 -0
- package/skills/php/php-pro/references/laravel-patterns.md +377 -0
- package/skills/php/php-pro/references/modern-php-features.md +323 -0
- package/skills/php/php-pro/references/symfony-patterns.md +466 -0
- package/skills/php/php-pro/references/testing-quality.md +466 -0
- package/skills/python/django-expert/SKILL.md +162 -0
- package/skills/python/django-expert/references/authentication.md +145 -0
- package/skills/python/django-expert/references/drf-serializers.md +148 -0
- package/skills/python/django-expert/references/models-orm.md +151 -0
- package/skills/python/django-expert/references/testing-django.md +204 -0
- package/skills/python/django-expert/references/viewsets-views.md +153 -0
- package/skills/python/fastapi-expert/SKILL.md +185 -0
- package/skills/python/fastapi-expert/references/async-sqlalchemy.md +146 -0
- package/skills/python/fastapi-expert/references/authentication.md +159 -0
- package/skills/python/fastapi-expert/references/endpoints-routing.md +142 -0
- package/skills/python/fastapi-expert/references/migration-from-django.md +997 -0
- package/skills/python/fastapi-expert/references/pydantic-v2.md +135 -0
- package/skills/python/fastapi-expert/references/testing-async.md +159 -0
- package/skills/python/pandas-pro/SKILL.md +178 -0
- package/skills/python/pandas-pro/references/aggregation-groupby.md +545 -0
- package/skills/python/pandas-pro/references/data-cleaning.md +500 -0
- package/skills/python/pandas-pro/references/dataframe-operations.md +420 -0
- package/skills/python/pandas-pro/references/merging-joining.md +596 -0
- package/skills/python/pandas-pro/references/performance-optimization.md +597 -0
- package/skills/python/python-pro/SKILL.md +177 -0
- package/skills/python/python-pro/references/async-patterns.md +356 -0
- package/skills/python/python-pro/references/packaging.md +460 -0
- package/skills/python/python-pro/references/standard-library.md +378 -0
- package/skills/python/python-pro/references/testing.md +404 -0
- package/skills/python/python-pro/references/type-system.md +290 -0
- package/skills/quality/chaos-engineer/SKILL.md +182 -0
- package/skills/quality/chaos-engineer/references/chaos-tools.md +511 -0
- package/skills/quality/chaos-engineer/references/experiment-design.md +229 -0
- package/skills/quality/chaos-engineer/references/game-days.md +434 -0
- package/skills/quality/chaos-engineer/references/infrastructure-chaos.md +348 -0
- package/skills/quality/chaos-engineer/references/kubernetes-chaos.md +432 -0
- package/skills/quality/code-reviewer/SKILL.md +119 -0
- package/skills/quality/code-reviewer/references/common-issues.md +142 -0
- package/skills/quality/code-reviewer/references/feedback-examples.md +144 -0
- package/skills/quality/code-reviewer/references/receiving-feedback.md +238 -0
- package/skills/quality/code-reviewer/references/report-template.md +109 -0
- package/skills/quality/code-reviewer/references/review-checklist.md +88 -0
- package/skills/quality/code-reviewer/references/spec-compliance-review.md +258 -0
- package/skills/quality/playwright-expert/SKILL.md +169 -0
- package/skills/quality/playwright-expert/references/api-mocking.md +140 -0
- package/skills/quality/playwright-expert/references/configuration.md +155 -0
- package/skills/quality/playwright-expert/references/debugging-flaky.md +150 -0
- package/skills/quality/playwright-expert/references/page-object-model.md +152 -0
- package/skills/quality/playwright-expert/references/selectors-locators.md +119 -0
- package/skills/quality/secure-code-guardian/SKILL.md +191 -0
- package/skills/quality/secure-code-guardian/references/authentication.md +136 -0
- package/skills/quality/secure-code-guardian/references/input-validation.md +146 -0
- package/skills/quality/secure-code-guardian/references/owasp-prevention.md +135 -0
- package/skills/quality/secure-code-guardian/references/security-headers.md +133 -0
- package/skills/quality/secure-code-guardian/references/xss-csrf.md +157 -0
- package/skills/quality/security-reviewer/SKILL.md +103 -0
- package/skills/quality/security-reviewer/references/infrastructure-security.md +268 -0
- package/skills/quality/security-reviewer/references/penetration-testing.md +268 -0
- package/skills/quality/security-reviewer/references/report-template.md +170 -0
- package/skills/quality/security-reviewer/references/sast-tools.md +117 -0
- package/skills/quality/security-reviewer/references/secret-scanning.md +125 -0
- package/skills/quality/security-reviewer/references/vulnerability-patterns.md +152 -0
- package/skills/quality/tdd-guide/assets/sample_coverage_report.lcov +0 -0
- package/skills/quality/test-master/SKILL.md +94 -0
- package/skills/quality/test-master/references/automation-frameworks.md +294 -0
- package/skills/quality/test-master/references/e2e-testing.md +128 -0
- package/skills/quality/test-master/references/integration-testing.md +120 -0
- package/skills/quality/test-master/references/performance-testing.md +118 -0
- package/skills/quality/test-master/references/qa-methodology.md +247 -0
- package/skills/quality/test-master/references/security-testing.md +127 -0
- package/skills/quality/test-master/references/tdd-iron-laws.md +174 -0
- package/skills/quality/test-master/references/test-reports.md +104 -0
- package/skills/quality/test-master/references/testing-anti-patterns.md +231 -0
- package/skills/quality/test-master/references/unit-testing.md +113 -0
- package/skills/ruby/rails-expert/SKILL.md +154 -0
- package/skills/ruby/rails-expert/references/active-record.md +244 -0
- package/skills/ruby/rails-expert/references/api-development.md +401 -0
- package/skills/ruby/rails-expert/references/background-jobs.md +272 -0
- package/skills/ruby/rails-expert/references/hotwire-turbo.md +228 -0
- package/skills/ruby/rails-expert/references/rspec-testing.md +367 -0
- package/skills/swift/swift-expert/SKILL.md +163 -0
- package/skills/swift/swift-expert/references/async-concurrency.md +360 -0
- package/skills/swift/swift-expert/references/memory-performance.md +377 -0
- package/skills/swift/swift-expert/references/protocol-oriented.md +354 -0
- package/skills/swift/swift-expert/references/swiftui-patterns.md +291 -0
- package/skills/swift/swift-expert/references/testing-patterns.md +399 -0
- package/skills/workflow/brainstorming/SKILL.md +164 -0
- package/skills/workflow/brainstorming/scripts/helper.js +88 -0
- package/skills/workflow/brainstorming/scripts/start-server.sh +148 -0
- package/skills/workflow/brainstorming/scripts/stop-server.sh +56 -0
- package/skills/workflow/brainstorming/spec-document-reviewer-prompt.md +49 -0
- package/skills/workflow/brainstorming/visual-companion.md +287 -0
- package/skills/workflow/documentation/SKILL.md +45 -0
- package/skills/workflow/entropy-management/SKILL.md +115 -0
- package/skills/workflow/executing-plans/SKILL.md +70 -0
- package/skills/workflow/finishing-a-development-branch/SKILL.md +200 -0
- package/skills/workflow/receiving-code-review/SKILL.md +213 -0
- package/skills/workflow/requesting-code-review/SKILL.md +105 -0
- package/skills/workflow/requesting-code-review/code-reviewer.md +146 -0
- package/skills/workflow/requirement-engineering/SKILL.md +111 -0
- package/skills/workflow/systematic-debugging/CREATION-LOG.md +119 -0
- package/skills/workflow/systematic-debugging/SKILL.md +296 -0
- package/skills/workflow/systematic-debugging/condition-based-waiting-example.ts +158 -0
- package/skills/workflow/systematic-debugging/condition-based-waiting.md +115 -0
- package/skills/workflow/systematic-debugging/defense-in-depth.md +122 -0
- package/skills/workflow/systematic-debugging/find-polluter.sh +63 -0
- package/skills/workflow/systematic-debugging/root-cause-tracing.md +169 -0
- package/skills/workflow/systematic-debugging/test-academic.md +14 -0
- package/skills/workflow/systematic-debugging/test-pressure-1.md +58 -0
- package/skills/workflow/systematic-debugging/test-pressure-2.md +68 -0
- package/skills/workflow/systematic-debugging/test-pressure-3.md +69 -0
- package/skills/workflow/using-git-worktrees/SKILL.md +218 -0
- package/skills/workflow/verification-before-completion/SKILL.md +139 -0
- package/skills/workflow/writing-plans/SKILL.md +151 -0
- package/skills/workflow/writing-plans/plan-document-reviewer-prompt.md +49 -0
- package/skills/workflow/writing-skills/SKILL.md +655 -0
- package/skills/workflow/writing-skills/anthropic-best-practices.md +1150 -0
- package/skills/workflow/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
- package/skills/workflow/writing-skills/graphviz-conventions.dot +0 -0
- package/skills/workflow/writing-skills/persuasion-principles.md +187 -0
- package/skills/workflow/writing-skills/render-graphs.js +168 -0
- package/skills/workflow/writing-skills/testing-skills-with-subagents.md +384 -0
- package/skills/angular-architect/SKILL.md +0 -152
- package/skills/angular-architect/references/components.md +0 -297
- package/skills/angular-architect/references/ngrx.md +0 -401
- package/skills/angular-architect/references/routing.md +0 -361
- package/skills/angular-architect/references/rxjs.md +0 -319
- package/skills/angular-architect/references/testing.md +0 -405
- package/skills/api-designer/SKILL.md +0 -217
- package/skills/api-designer/references/error-handling.md +0 -541
- package/skills/api-designer/references/openapi.md +0 -824
- package/skills/api-designer/references/pagination.md +0 -494
- package/skills/api-designer/references/rest-patterns.md +0 -335
- package/skills/api-designer/references/versioning.md +0 -391
- package/skills/architecture-designer/SKILL.md +0 -117
- package/skills/architecture-designer/references/adr-template.md +0 -116
- package/skills/architecture-designer/references/architecture-patterns.md +0 -111
- package/skills/architecture-designer/references/database-selection.md +0 -102
- package/skills/architecture-designer/references/nfr-checklist.md +0 -112
- package/skills/architecture-designer/references/system-design.md +0 -100
- package/skills/brainstorming/SKILL.md +0 -164
- package/skills/brainstorming/scripts/helper.js +0 -88
- package/skills/brainstorming/scripts/start-server.sh +0 -148
- package/skills/brainstorming/scripts/stop-server.sh +0 -56
- package/skills/brainstorming/spec-document-reviewer-prompt.md +0 -49
- package/skills/brainstorming/visual-companion.md +0 -287
- package/skills/chaos-engineer/SKILL.md +0 -182
- package/skills/chaos-engineer/references/chaos-tools.md +0 -511
- package/skills/chaos-engineer/references/experiment-design.md +0 -229
- package/skills/chaos-engineer/references/game-days.md +0 -434
- package/skills/chaos-engineer/references/infrastructure-chaos.md +0 -348
- package/skills/chaos-engineer/references/kubernetes-chaos.md +0 -432
- package/skills/cli-developer/SKILL.md +0 -113
- package/skills/cli-developer/references/design-patterns.md +0 -221
- package/skills/cli-developer/references/go-cli.md +0 -540
- package/skills/cli-developer/references/node-cli.md +0 -383
- package/skills/cli-developer/references/python-cli.md +0 -422
- package/skills/cli-developer/references/ux-patterns.md +0 -448
- package/skills/cloud-architect/SKILL.md +0 -216
- package/skills/cloud-architect/references/aws.md +0 -394
- package/skills/cloud-architect/references/azure.md +0 -562
- package/skills/cloud-architect/references/cost.md +0 -582
- package/skills/cloud-architect/references/gcp.md +0 -633
- package/skills/cloud-architect/references/multi-cloud.md +0 -483
- package/skills/code-documenter/SKILL.md +0 -147
- package/skills/code-documenter/references/api-docs-fastapi-django.md +0 -166
- package/skills/code-documenter/references/api-docs-nestjs-express.md +0 -220
- package/skills/code-documenter/references/coverage-reports.md +0 -125
- package/skills/code-documenter/references/documentation-systems.md +0 -333
- package/skills/code-documenter/references/interactive-api-docs.md +0 -531
- package/skills/code-documenter/references/python-docstrings.md +0 -121
- package/skills/code-documenter/references/typescript-jsdoc.md +0 -145
- package/skills/code-documenter/references/user-guides-tutorials.md +0 -530
- package/skills/code-reviewer/SKILL.md +0 -119
- package/skills/code-reviewer/references/common-issues.md +0 -142
- package/skills/code-reviewer/references/feedback-examples.md +0 -144
- package/skills/code-reviewer/references/receiving-feedback.md +0 -238
- package/skills/code-reviewer/references/report-template.md +0 -109
- package/skills/code-reviewer/references/review-checklist.md +0 -88
- package/skills/code-reviewer/references/spec-compliance-review.md +0 -258
- package/skills/cpp-pro/SKILL.md +0 -115
- package/skills/cpp-pro/references/build-tooling.md +0 -440
- package/skills/cpp-pro/references/concurrency.md +0 -437
- package/skills/cpp-pro/references/memory-performance.md +0 -397
- package/skills/cpp-pro/references/modern-cpp.md +0 -304
- package/skills/cpp-pro/references/templates.md +0 -357
- package/skills/csharp-developer/SKILL.md +0 -125
- package/skills/csharp-developer/references/aspnet-core.md +0 -394
- package/skills/csharp-developer/references/blazor.md +0 -553
- package/skills/csharp-developer/references/entity-framework.md +0 -409
- package/skills/csharp-developer/references/modern-csharp.md +0 -248
- package/skills/csharp-developer/references/performance.md +0 -498
- package/skills/database-optimizer/SKILL.md +0 -147
- package/skills/database-optimizer/references/index-strategies.md +0 -331
- package/skills/database-optimizer/references/monitoring-analysis.md +0 -501
- package/skills/database-optimizer/references/mysql-tuning.md +0 -452
- package/skills/database-optimizer/references/postgresql-tuning.md +0 -413
- package/skills/database-optimizer/references/query-optimization.md +0 -251
- package/skills/debugging-wizard/SKILL.md +0 -105
- package/skills/debugging-wizard/references/common-patterns.md +0 -132
- package/skills/debugging-wizard/references/debugging-tools.md +0 -140
- package/skills/debugging-wizard/references/quick-fixes.md +0 -177
- package/skills/debugging-wizard/references/strategies.md +0 -142
- package/skills/debugging-wizard/references/systematic-debugging.md +0 -367
- package/skills/devops-engineer/SKILL.md +0 -144
- package/skills/devops-engineer/references/deployment-strategies.md +0 -241
- package/skills/devops-engineer/references/docker-patterns.md +0 -113
- package/skills/devops-engineer/references/github-actions.md +0 -139
- package/skills/devops-engineer/references/incident-response.md +0 -331
- package/skills/devops-engineer/references/kubernetes.md +0 -154
- package/skills/devops-engineer/references/platform-engineering.md +0 -417
- package/skills/devops-engineer/references/release-automation.md +0 -527
- package/skills/devops-engineer/references/terraform-iac.md +0 -141
- package/skills/django-expert/SKILL.md +0 -162
- package/skills/django-expert/references/authentication.md +0 -145
- package/skills/django-expert/references/drf-serializers.md +0 -148
- package/skills/django-expert/references/models-orm.md +0 -151
- package/skills/django-expert/references/testing-django.md +0 -204
- package/skills/django-expert/references/viewsets-views.md +0 -153
- package/skills/documentation/SKILL.md +0 -45
- package/skills/dotnet-core-expert/SKILL.md +0 -138
- package/skills/dotnet-core-expert/references/authentication.md +0 -546
- package/skills/dotnet-core-expert/references/clean-architecture.md +0 -455
- package/skills/dotnet-core-expert/references/cloud-native.md +0 -548
- package/skills/dotnet-core-expert/references/entity-framework.md +0 -440
- package/skills/dotnet-core-expert/references/minimal-apis.md +0 -319
- package/skills/entropy-management/SKILL.md +0 -115
- package/skills/executing-plans/SKILL.md +0 -70
- package/skills/fastapi-expert/SKILL.md +0 -185
- package/skills/fastapi-expert/references/async-sqlalchemy.md +0 -146
- package/skills/fastapi-expert/references/authentication.md +0 -159
- package/skills/fastapi-expert/references/endpoints-routing.md +0 -142
- package/skills/fastapi-expert/references/migration-from-django.md +0 -997
- package/skills/fastapi-expert/references/pydantic-v2.md +0 -135
- package/skills/fastapi-expert/references/testing-async.md +0 -159
- package/skills/feature-forge/SKILL.md +0 -98
- package/skills/feature-forge/references/acceptance-criteria.md +0 -104
- package/skills/feature-forge/references/ears-syntax.md +0 -99
- package/skills/feature-forge/references/interview-questions.md +0 -150
- package/skills/feature-forge/references/pre-discovery-subagents.md +0 -54
- package/skills/feature-forge/references/specification-template.md +0 -103
- package/skills/fine-tuning-expert/SKILL.md +0 -162
- package/skills/fine-tuning-expert/references/dataset-preparation.md +0 -540
- package/skills/fine-tuning-expert/references/deployment-optimization.md +0 -673
- package/skills/fine-tuning-expert/references/evaluation-metrics.md +0 -597
- package/skills/fine-tuning-expert/references/hyperparameter-tuning.md +0 -565
- package/skills/fine-tuning-expert/references/lora-peft.md +0 -347
- package/skills/finishing-a-development-branch/SKILL.md +0 -200
- package/skills/flutter-expert/SKILL.md +0 -138
- package/skills/flutter-expert/references/bloc-state.md +0 -259
- package/skills/flutter-expert/references/gorouter-navigation.md +0 -119
- package/skills/flutter-expert/references/performance.md +0 -99
- package/skills/flutter-expert/references/project-structure.md +0 -118
- package/skills/flutter-expert/references/riverpod-state.md +0 -130
- package/skills/flutter-expert/references/widget-patterns.md +0 -123
- package/skills/fullstack-guardian/SKILL.md +0 -105
- package/skills/fullstack-guardian/references/api-design-standards.md +0 -307
- package/skills/fullstack-guardian/references/architecture-decisions.md +0 -350
- package/skills/fullstack-guardian/references/backend-patterns.md +0 -237
- package/skills/fullstack-guardian/references/common-patterns.md +0 -134
- package/skills/fullstack-guardian/references/deliverables-checklist.md +0 -354
- package/skills/fullstack-guardian/references/design-template.md +0 -91
- package/skills/fullstack-guardian/references/error-handling.md +0 -135
- package/skills/fullstack-guardian/references/frontend-patterns.md +0 -340
- package/skills/fullstack-guardian/references/integration-patterns.md +0 -333
- package/skills/fullstack-guardian/references/security-checklist.md +0 -106
- package/skills/golang-pro/SKILL.md +0 -122
- package/skills/golang-pro/references/concurrency.md +0 -329
- package/skills/golang-pro/references/generics.md +0 -442
- package/skills/golang-pro/references/interfaces.md +0 -432
- package/skills/golang-pro/references/project-structure.md +0 -477
- package/skills/golang-pro/references/testing.md +0 -451
- package/skills/graphql-architect/SKILL.md +0 -146
- package/skills/graphql-architect/references/federation.md +0 -418
- package/skills/graphql-architect/references/migration-from-rest.md +0 -1141
- package/skills/graphql-architect/references/resolvers.md +0 -425
- package/skills/graphql-architect/references/schema-design.md +0 -393
- package/skills/graphql-architect/references/security.md +0 -569
- package/skills/graphql-architect/references/subscriptions.md +0 -510
- package/skills/java-architect/SKILL.md +0 -132
- package/skills/java-architect/references/jpa-optimization.md +0 -393
- package/skills/java-architect/references/reactive-webflux.md +0 -356
- package/skills/java-architect/references/spring-boot-setup.md +0 -269
- package/skills/java-architect/references/spring-security.md +0 -445
- package/skills/java-architect/references/testing-patterns.md +0 -500
- package/skills/javascript-pro/SKILL.md +0 -132
- package/skills/javascript-pro/references/async-patterns.md +0 -334
- package/skills/javascript-pro/references/browser-apis.md +0 -398
- package/skills/javascript-pro/references/modern-syntax.md +0 -272
- package/skills/javascript-pro/references/modules.md +0 -357
- package/skills/javascript-pro/references/node-essentials.md +0 -471
- package/skills/kotlin-specialist/SKILL.md +0 -147
- package/skills/kotlin-specialist/references/android-compose.md +0 -419
- package/skills/kotlin-specialist/references/coroutines-flow.md +0 -276
- package/skills/kotlin-specialist/references/dsl-idioms.md +0 -421
- package/skills/kotlin-specialist/references/ktor-server.md +0 -426
- package/skills/kotlin-specialist/references/multiplatform-kmp.md +0 -380
- package/skills/kubernetes-specialist/SKILL.md +0 -241
- package/skills/kubernetes-specialist/references/configuration.md +0 -452
- package/skills/kubernetes-specialist/references/cost-optimization.md +0 -458
- package/skills/kubernetes-specialist/references/custom-operators.md +0 -563
- package/skills/kubernetes-specialist/references/gitops.md +0 -530
- package/skills/kubernetes-specialist/references/helm-charts.md +0 -912
- package/skills/kubernetes-specialist/references/multi-cluster.md +0 -507
- package/skills/kubernetes-specialist/references/networking.md +0 -447
- package/skills/kubernetes-specialist/references/service-mesh.md +0 -459
- package/skills/kubernetes-specialist/references/storage.md +0 -535
- package/skills/kubernetes-specialist/references/troubleshooting.md +0 -414
- package/skills/kubernetes-specialist/references/workloads.md +0 -377
- package/skills/laravel-specialist/SKILL.md +0 -262
- package/skills/laravel-specialist/references/eloquent.md +0 -351
- package/skills/laravel-specialist/references/livewire.md +0 -512
- package/skills/laravel-specialist/references/queues.md +0 -423
- package/skills/laravel-specialist/references/routing.md +0 -362
- package/skills/laravel-specialist/references/testing.md +0 -522
- package/skills/legacy-modernizer/SKILL.md +0 -137
- package/skills/legacy-modernizer/references/legacy-testing.md +0 -381
- package/skills/legacy-modernizer/references/migration-strategies.md +0 -423
- package/skills/legacy-modernizer/references/refactoring-patterns.md +0 -395
- package/skills/legacy-modernizer/references/strangler-fig-pattern.md +0 -281
- package/skills/legacy-modernizer/references/system-assessment.md +0 -487
- package/skills/mcp-developer/SKILL.md +0 -143
- package/skills/mcp-developer/references/protocol.md +0 -244
- package/skills/mcp-developer/references/python-sdk.md +0 -367
- package/skills/mcp-developer/references/resources.md +0 -554
- package/skills/mcp-developer/references/tools.md +0 -480
- package/skills/mcp-developer/references/typescript-sdk.md +0 -350
- package/skills/microservices-architect/SKILL.md +0 -164
- package/skills/microservices-architect/references/communication.md +0 -499
- package/skills/microservices-architect/references/data.md +0 -721
- package/skills/microservices-architect/references/decomposition.md +0 -344
- package/skills/microservices-architect/references/observability.md +0 -805
- package/skills/microservices-architect/references/patterns.md +0 -603
- package/skills/ml-pipeline/SKILL.md +0 -159
- package/skills/ml-pipeline/references/experiment-tracking.md +0 -833
- package/skills/ml-pipeline/references/feature-engineering.md +0 -631
- package/skills/ml-pipeline/references/model-validation.md +0 -978
- package/skills/ml-pipeline/references/pipeline-orchestration.md +0 -907
- package/skills/ml-pipeline/references/training-pipelines.md +0 -782
- package/skills/monitoring-expert/SKILL.md +0 -176
- package/skills/monitoring-expert/references/alerting-rules.md +0 -141
- package/skills/monitoring-expert/references/application-profiling.md +0 -331
- package/skills/monitoring-expert/references/capacity-planning.md +0 -344
- package/skills/monitoring-expert/references/dashboards.md +0 -126
- package/skills/monitoring-expert/references/opentelemetry.md +0 -123
- package/skills/monitoring-expert/references/performance-testing.md +0 -269
- package/skills/monitoring-expert/references/prometheus-metrics.md +0 -136
- package/skills/monitoring-expert/references/structured-logging.md +0 -142
- package/skills/nestjs-expert/SKILL.md +0 -206
- package/skills/nestjs-expert/references/authentication.md +0 -166
- package/skills/nestjs-expert/references/controllers-routing.md +0 -111
- package/skills/nestjs-expert/references/dtos-validation.md +0 -153
- package/skills/nestjs-expert/references/migration-from-express.md +0 -1237
- package/skills/nestjs-expert/references/services-di.md +0 -140
- package/skills/nestjs-expert/references/testing-patterns.md +0 -186
- package/skills/nextjs-developer/SKILL.md +0 -143
- package/skills/nextjs-developer/references/app-router.md +0 -311
- package/skills/nextjs-developer/references/data-fetching.md +0 -482
- package/skills/nextjs-developer/references/deployment.md +0 -545
- package/skills/nextjs-developer/references/server-actions.md +0 -462
- package/skills/nextjs-developer/references/server-components.md +0 -384
- package/skills/pandas-pro/SKILL.md +0 -178
- package/skills/pandas-pro/references/aggregation-groupby.md +0 -545
- package/skills/pandas-pro/references/data-cleaning.md +0 -500
- package/skills/pandas-pro/references/dataframe-operations.md +0 -420
- package/skills/pandas-pro/references/merging-joining.md +0 -596
- package/skills/pandas-pro/references/performance-optimization.md +0 -597
- package/skills/php-pro/SKILL.md +0 -206
- package/skills/php-pro/references/async-patterns.md +0 -412
- package/skills/php-pro/references/laravel-patterns.md +0 -377
- package/skills/php-pro/references/modern-php-features.md +0 -323
- package/skills/php-pro/references/symfony-patterns.md +0 -466
- package/skills/php-pro/references/testing-quality.md +0 -466
- package/skills/playwright-expert/SKILL.md +0 -169
- package/skills/playwright-expert/references/api-mocking.md +0 -140
- package/skills/playwright-expert/references/configuration.md +0 -155
- package/skills/playwright-expert/references/debugging-flaky.md +0 -150
- package/skills/playwright-expert/references/page-object-model.md +0 -152
- package/skills/playwright-expert/references/selectors-locators.md +0 -119
- package/skills/postgres-pro/SKILL.md +0 -152
- package/skills/postgres-pro/references/extensions.md +0 -404
- package/skills/postgres-pro/references/jsonb.md +0 -321
- package/skills/postgres-pro/references/maintenance.md +0 -481
- package/skills/postgres-pro/references/performance.md +0 -265
- package/skills/postgres-pro/references/replication.md +0 -446
- package/skills/python-pro/SKILL.md +0 -177
- package/skills/python-pro/references/async-patterns.md +0 -356
- package/skills/python-pro/references/packaging.md +0 -460
- package/skills/python-pro/references/standard-library.md +0 -378
- package/skills/python-pro/references/testing.md +0 -404
- package/skills/python-pro/references/type-system.md +0 -290
- package/skills/rag-architect/SKILL.md +0 -194
- package/skills/rag-architect/references/chunking-strategies.md +0 -878
- package/skills/rag-architect/references/embedding-models.md +0 -561
- package/skills/rag-architect/references/rag-evaluation.md +0 -833
- package/skills/rag-architect/references/retrieval-optimization.md +0 -795
- package/skills/rag-architect/references/vector-databases.md +0 -589
- package/skills/rails-expert/SKILL.md +0 -154
- package/skills/rails-expert/references/active-record.md +0 -244
- package/skills/rails-expert/references/api-development.md +0 -401
- package/skills/rails-expert/references/background-jobs.md +0 -272
- package/skills/rails-expert/references/hotwire-turbo.md +0 -228
- package/skills/rails-expert/references/rspec-testing.md +0 -367
- package/skills/react-expert/SKILL.md +0 -149
- package/skills/react-expert/references/hooks-patterns.md +0 -162
- package/skills/react-expert/references/migration-class-to-modern.md +0 -1119
- package/skills/react-expert/references/performance.md +0 -168
- package/skills/react-expert/references/react-19-features.md +0 -174
- package/skills/react-expert/references/server-components.md +0 -143
- package/skills/react-expert/references/state-management.md +0 -171
- package/skills/react-expert/references/testing-react.md +0 -174
- package/skills/react-native-expert/SKILL.md +0 -185
- package/skills/react-native-expert/references/expo-router.md +0 -187
- package/skills/react-native-expert/references/list-optimization.md +0 -204
- package/skills/react-native-expert/references/platform-handling.md +0 -188
- package/skills/react-native-expert/references/project-structure.md +0 -171
- package/skills/react-native-expert/references/storage-hooks.md +0 -173
- package/skills/receiving-code-review/SKILL.md +0 -213
- package/skills/requesting-code-review/SKILL.md +0 -105
- package/skills/requesting-code-review/code-reviewer.md +0 -146
- package/skills/requirement-engineering/SKILL.md +0 -111
- package/skills/rust-engineer/SKILL.md +0 -167
- package/skills/rust-engineer/references/async.md +0 -458
- package/skills/rust-engineer/references/error-handling.md +0 -334
- package/skills/rust-engineer/references/ownership.md +0 -278
- package/skills/rust-engineer/references/testing.md +0 -470
- package/skills/rust-engineer/references/traits.md +0 -413
- package/skills/secure-code-guardian/SKILL.md +0 -191
- package/skills/secure-code-guardian/references/authentication.md +0 -136
- package/skills/secure-code-guardian/references/input-validation.md +0 -146
- package/skills/secure-code-guardian/references/owasp-prevention.md +0 -135
- package/skills/secure-code-guardian/references/security-headers.md +0 -133
- package/skills/secure-code-guardian/references/xss-csrf.md +0 -157
- package/skills/security-reviewer/SKILL.md +0 -103
- package/skills/security-reviewer/references/infrastructure-security.md +0 -268
- package/skills/security-reviewer/references/penetration-testing.md +0 -268
- package/skills/security-reviewer/references/report-template.md +0 -170
- package/skills/security-reviewer/references/sast-tools.md +0 -117
- package/skills/security-reviewer/references/secret-scanning.md +0 -125
- package/skills/security-reviewer/references/vulnerability-patterns.md +0 -152
- package/skills/spark-engineer/SKILL.md +0 -148
- package/skills/spark-engineer/references/partitioning-caching.md +0 -543
- package/skills/spark-engineer/references/performance-tuning.md +0 -544
- package/skills/spark-engineer/references/rdd-operations.md +0 -599
- package/skills/spark-engineer/references/spark-sql-dataframes.md +0 -474
- package/skills/spark-engineer/references/streaming-patterns.md +0 -786
- package/skills/spring-boot-engineer/SKILL.md +0 -195
- package/skills/spring-boot-engineer/references/cloud.md +0 -498
- package/skills/spring-boot-engineer/references/data.md +0 -381
- package/skills/spring-boot-engineer/references/security.md +0 -459
- package/skills/spring-boot-engineer/references/testing.md +0 -545
- package/skills/spring-boot-engineer/references/web.md +0 -295
- package/skills/sql-pro/SKILL.md +0 -129
- package/skills/sql-pro/references/database-design.md +0 -402
- package/skills/sql-pro/references/dialect-differences.md +0 -419
- package/skills/sql-pro/references/optimization.md +0 -384
- package/skills/sql-pro/references/query-patterns.md +0 -285
- package/skills/sql-pro/references/window-functions.md +0 -328
- package/skills/sre-engineer/SKILL.md +0 -181
- package/skills/sre-engineer/references/automation-toil.md +0 -492
- package/skills/sre-engineer/references/error-budget-policy.md +0 -334
- package/skills/sre-engineer/references/incident-chaos.md +0 -576
- package/skills/sre-engineer/references/monitoring-alerting.md +0 -424
- package/skills/sre-engineer/references/slo-sli-management.md +0 -238
- package/skills/swift-expert/SKILL.md +0 -163
- package/skills/swift-expert/references/async-concurrency.md +0 -360
- package/skills/swift-expert/references/memory-performance.md +0 -377
- package/skills/swift-expert/references/protocol-oriented.md +0 -354
- package/skills/swift-expert/references/swiftui-patterns.md +0 -291
- package/skills/swift-expert/references/testing-patterns.md +0 -399
- package/skills/systematic-debugging/CREATION-LOG.md +0 -119
- package/skills/systematic-debugging/SKILL.md +0 -296
- package/skills/systematic-debugging/condition-based-waiting-example.ts +0 -158
- package/skills/systematic-debugging/condition-based-waiting.md +0 -115
- package/skills/systematic-debugging/defense-in-depth.md +0 -122
- package/skills/systematic-debugging/find-polluter.sh +0 -63
- package/skills/systematic-debugging/root-cause-tracing.md +0 -169
- package/skills/systematic-debugging/test-academic.md +0 -14
- package/skills/systematic-debugging/test-pressure-1.md +0 -58
- package/skills/systematic-debugging/test-pressure-2.md +0 -68
- package/skills/systematic-debugging/test-pressure-3.md +0 -69
- package/skills/tdd-guide/assets/sample_coverage_report.lcov +0 -56
- package/skills/terraform-engineer/SKILL.md +0 -143
- package/skills/terraform-engineer/references/best-practices.md +0 -583
- package/skills/terraform-engineer/references/module-patterns.md +0 -297
- package/skills/terraform-engineer/references/providers.md +0 -452
- package/skills/terraform-engineer/references/state-management.md +0 -371
- package/skills/terraform-engineer/references/testing.md +0 -486
- package/skills/test-master/SKILL.md +0 -94
- package/skills/test-master/references/automation-frameworks.md +0 -294
- package/skills/test-master/references/e2e-testing.md +0 -128
- package/skills/test-master/references/integration-testing.md +0 -120
- package/skills/test-master/references/performance-testing.md +0 -118
- package/skills/test-master/references/qa-methodology.md +0 -247
- package/skills/test-master/references/security-testing.md +0 -127
- package/skills/test-master/references/tdd-iron-laws.md +0 -174
- package/skills/test-master/references/test-reports.md +0 -104
- package/skills/test-master/references/testing-anti-patterns.md +0 -231
- package/skills/test-master/references/unit-testing.md +0 -113
- package/skills/typescript-pro/SKILL.md +0 -145
- package/skills/typescript-pro/references/advanced-types.md +0 -259
- package/skills/typescript-pro/references/configuration.md +0 -445
- package/skills/typescript-pro/references/patterns.md +0 -484
- package/skills/typescript-pro/references/type-guards.md +0 -352
- package/skills/typescript-pro/references/utility-types.md +0 -329
- package/skills/using-git-worktrees/SKILL.md +0 -218
- package/skills/verification-before-completion/SKILL.md +0 -139
- package/skills/vue-expert/SKILL.md +0 -98
- package/skills/vue-expert/references/build-tooling.md +0 -480
- package/skills/vue-expert/references/components.md +0 -448
- package/skills/vue-expert/references/composition-api.md +0 -299
- package/skills/vue-expert/references/mobile-hybrid.md +0 -636
- package/skills/vue-expert/references/nuxt.md +0 -669
- package/skills/vue-expert/references/state-management.md +0 -449
- package/skills/vue-expert/references/typescript.md +0 -584
- package/skills/vue-expert-js/SKILL.md +0 -167
- package/skills/vue-expert-js/references/component-architecture.md +0 -219
- package/skills/vue-expert-js/references/composables-patterns.md +0 -183
- package/skills/vue-expert-js/references/jsdoc-typing.md +0 -535
- package/skills/vue-expert-js/references/state-management.md +0 -249
- package/skills/vue-expert-js/references/testing-patterns.md +0 -237
- package/skills/websocket-engineer/SKILL.md +0 -168
- package/skills/websocket-engineer/references/alternatives.md +0 -391
- package/skills/websocket-engineer/references/patterns.md +0 -400
- package/skills/websocket-engineer/references/protocol.md +0 -195
- package/skills/websocket-engineer/references/scaling.md +0 -333
- package/skills/websocket-engineer/references/security.md +0 -474
- package/skills/writing-plans/SKILL.md +0 -151
- package/skills/writing-plans/plan-document-reviewer-prompt.md +0 -49
- package/skills/writing-skills/SKILL.md +0 -655
- package/skills/writing-skills/anthropic-best-practices.md +0 -1150
- package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +0 -189
- package/skills/writing-skills/graphviz-conventions.dot +0 -172
- package/skills/writing-skills/persuasion-principles.md +0 -187
- package/skills/writing-skills/render-graphs.js +0 -168
- package/skills/writing-skills/testing-skills-with-subagents.md +0 -384
- /package/skills/{design-commands → frontend/design-commands}/design.md +0 -0
- /package/skills/{design-commands → frontend/design-commands}/handoff.md +0 -0
- /package/skills/{design-commands → frontend/design-commands}/prototype.md +0 -0
- /package/skills/{design-commands → frontend/design-commands}/spec.md +0 -0
- /package/skills/{design-commands → frontend/design-commands}/style.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/SKILL.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/references/frontend_best_practices.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/references/nextjs_optimization_guide.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/references/react_patterns.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/bundle_analyzer.py +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/component_generator.py +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/frontend_scaffolder.py +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/SKILL.md +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/charts.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/colors.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/icons.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/landing.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/products.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/react-performance.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/astro.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/flutter.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/html-tailwind.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/jetpack-compose.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nextjs.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxt-ui.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxtjs.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react-native.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/shadcn.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/svelte.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/swiftui.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/vue.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/styles.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/typography.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ui-reasoning.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ux-guidelines.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/web-interface.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/core.py +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/design_system.py +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/search.py +0 -0
- /package/skills/{competitive-analysis → product/competitive-analysis}/SKILL.md +0 -0
- /package/skills/{meeting-notes → product/meeting-notes}/SKILL.md +0 -0
- /package/skills/{prd-template → product/prd-template}/SKILL.md +0 -0
- /package/skills/{stakeholder-update → product/stakeholder-update}/SKILL.md +0 -0
- /package/skills/{user-research-synthesis → product/user-research-synthesis}/SKILL.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/README.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/SKILL.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/references/qa_best_practices.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/references/test_automation_patterns.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/references/testing_strategies.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/scripts/coverage_analyzer.py +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/scripts/e2e_test_scaffolder.py +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/scripts/test_suite_generator.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/HOW_TO_USE.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/README.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/SKILL.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/assets/expected_output.json +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_python.json +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_typescript.json +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/references/ci-integration.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/references/framework-guide.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/references/tdd-best-practices.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/coverage_analyzer.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/fixture_generator.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/format_detector.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/framework_adapter.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/metrics_calculator.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/output_formatter.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/tdd_workflow.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/test_generator.py +0 -0
- /package/skills/{brainstorming → workflow/brainstorming}/scripts/frame-template.html +0 -0
- /package/skills/{brainstorming → workflow/brainstorming}/scripts/server.cjs +0 -0
|
@@ -0,0 +1,978 @@
|
|
|
1
|
+
# Model Validation
|
|
2
|
+
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Model validation ensures models meet quality standards before production deployment. It encompasses offline evaluation, online testing, and continuous monitoring to catch performance degradation, data drift, and model failures.
|
|
8
|
+
|
|
9
|
+
## When to Use This Reference
|
|
10
|
+
|
|
11
|
+
- Implementing offline model evaluation strategies
|
|
12
|
+
- Setting up A/B testing frameworks
|
|
13
|
+
- Building shadow deployment pipelines
|
|
14
|
+
- Creating model comparison workflows
|
|
15
|
+
- Implementing continuous model monitoring
|
|
16
|
+
|
|
17
|
+
## When NOT to Use
|
|
18
|
+
|
|
19
|
+
- Quick model prototyping
|
|
20
|
+
- One-off analysis without deployment
|
|
21
|
+
- Models with no production requirements
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Offline Evaluation
|
|
26
|
+
|
|
27
|
+
### Comprehensive Evaluation Suite
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
from typing import Optional
|
|
32
|
+
import numpy as np
|
|
33
|
+
import pandas as pd
|
|
34
|
+
from sklearn.metrics import (
|
|
35
|
+
accuracy_score, precision_score, recall_score, f1_score,
|
|
36
|
+
roc_auc_score, average_precision_score, confusion_matrix,
|
|
37
|
+
mean_squared_error, mean_absolute_error, r2_score,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ClassificationMetrics:
|
|
42
|
+
"""Classification model metrics."""
|
|
43
|
+
accuracy: float
|
|
44
|
+
precision: float
|
|
45
|
+
recall: float
|
|
46
|
+
f1: float
|
|
47
|
+
roc_auc: Optional[float]
|
|
48
|
+
pr_auc: Optional[float]
|
|
49
|
+
confusion_matrix: np.ndarray
|
|
50
|
+
|
|
51
|
+
def to_dict(self) -> dict:
|
|
52
|
+
return {
|
|
53
|
+
"accuracy": self.accuracy,
|
|
54
|
+
"precision": self.precision,
|
|
55
|
+
"recall": self.recall,
|
|
56
|
+
"f1": self.f1,
|
|
57
|
+
"roc_auc": self.roc_auc,
|
|
58
|
+
"pr_auc": self.pr_auc,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class RegressionMetrics:
|
|
63
|
+
"""Regression model metrics."""
|
|
64
|
+
mse: float
|
|
65
|
+
rmse: float
|
|
66
|
+
mae: float
|
|
67
|
+
r2: float
|
|
68
|
+
mape: Optional[float]
|
|
69
|
+
|
|
70
|
+
def to_dict(self) -> dict:
|
|
71
|
+
return {
|
|
72
|
+
"mse": self.mse,
|
|
73
|
+
"rmse": self.rmse,
|
|
74
|
+
"mae": self.mae,
|
|
75
|
+
"r2": self.r2,
|
|
76
|
+
"mape": self.mape,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
class ModelEvaluator:
|
|
80
|
+
"""Comprehensive model evaluation."""
|
|
81
|
+
|
|
82
|
+
def __init__(self, task_type: str = "classification"):
|
|
83
|
+
self.task_type = task_type
|
|
84
|
+
|
|
85
|
+
def evaluate_classification(
|
|
86
|
+
self,
|
|
87
|
+
y_true: np.ndarray,
|
|
88
|
+
y_pred: np.ndarray,
|
|
89
|
+
y_prob: Optional[np.ndarray] = None,
|
|
90
|
+
average: str = "weighted",
|
|
91
|
+
) -> ClassificationMetrics:
|
|
92
|
+
"""Evaluate classification model."""
|
|
93
|
+
roc_auc = None
|
|
94
|
+
pr_auc = None
|
|
95
|
+
|
|
96
|
+
if y_prob is not None:
|
|
97
|
+
if len(np.unique(y_true)) == 2:
|
|
98
|
+
# Binary classification
|
|
99
|
+
if y_prob.ndim == 2:
|
|
100
|
+
y_prob_pos = y_prob[:, 1]
|
|
101
|
+
else:
|
|
102
|
+
y_prob_pos = y_prob
|
|
103
|
+
roc_auc = roc_auc_score(y_true, y_prob_pos)
|
|
104
|
+
pr_auc = average_precision_score(y_true, y_prob_pos)
|
|
105
|
+
else:
|
|
106
|
+
# Multiclass
|
|
107
|
+
roc_auc = roc_auc_score(
|
|
108
|
+
y_true, y_prob, multi_class="ovr", average=average
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return ClassificationMetrics(
|
|
112
|
+
accuracy=accuracy_score(y_true, y_pred),
|
|
113
|
+
precision=precision_score(y_true, y_pred, average=average, zero_division=0),
|
|
114
|
+
recall=recall_score(y_true, y_pred, average=average, zero_division=0),
|
|
115
|
+
f1=f1_score(y_true, y_pred, average=average, zero_division=0),
|
|
116
|
+
roc_auc=roc_auc,
|
|
117
|
+
pr_auc=pr_auc,
|
|
118
|
+
confusion_matrix=confusion_matrix(y_true, y_pred),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def evaluate_regression(
|
|
122
|
+
self,
|
|
123
|
+
y_true: np.ndarray,
|
|
124
|
+
y_pred: np.ndarray,
|
|
125
|
+
) -> RegressionMetrics:
|
|
126
|
+
"""Evaluate regression model."""
|
|
127
|
+
mse = mean_squared_error(y_true, y_pred)
|
|
128
|
+
|
|
129
|
+
# MAPE (handle zero values)
|
|
130
|
+
mask = y_true != 0
|
|
131
|
+
if mask.any():
|
|
132
|
+
mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
|
|
133
|
+
else:
|
|
134
|
+
mape = None
|
|
135
|
+
|
|
136
|
+
return RegressionMetrics(
|
|
137
|
+
mse=mse,
|
|
138
|
+
rmse=np.sqrt(mse),
|
|
139
|
+
mae=mean_absolute_error(y_true, y_pred),
|
|
140
|
+
r2=r2_score(y_true, y_pred),
|
|
141
|
+
mape=mape,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def evaluate_by_segment(
|
|
145
|
+
self,
|
|
146
|
+
y_true: np.ndarray,
|
|
147
|
+
y_pred: np.ndarray,
|
|
148
|
+
segments: np.ndarray,
|
|
149
|
+
y_prob: Optional[np.ndarray] = None,
|
|
150
|
+
) -> dict:
|
|
151
|
+
"""Evaluate model performance by segment."""
|
|
152
|
+
results = {}
|
|
153
|
+
|
|
154
|
+
for segment in np.unique(segments):
|
|
155
|
+
mask = segments == segment
|
|
156
|
+
|
|
157
|
+
if self.task_type == "classification":
|
|
158
|
+
segment_prob = y_prob[mask] if y_prob is not None else None
|
|
159
|
+
metrics = self.evaluate_classification(
|
|
160
|
+
y_true[mask], y_pred[mask], segment_prob
|
|
161
|
+
)
|
|
162
|
+
else:
|
|
163
|
+
metrics = self.evaluate_regression(y_true[mask], y_pred[mask])
|
|
164
|
+
|
|
165
|
+
results[segment] = metrics.to_dict()
|
|
166
|
+
|
|
167
|
+
return results
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Cross-Validation Framework
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from sklearn.model_selection import (
|
|
174
|
+
KFold, StratifiedKFold, TimeSeriesSplit, cross_val_score
|
|
175
|
+
)
|
|
176
|
+
import numpy as np
|
|
177
|
+
from typing import Callable
|
|
178
|
+
|
|
179
|
+
class CrossValidator:
|
|
180
|
+
"""Cross-validation framework for model evaluation."""
|
|
181
|
+
|
|
182
|
+
def __init__(
|
|
183
|
+
self,
|
|
184
|
+
n_splits: int = 5,
|
|
185
|
+
shuffle: bool = True,
|
|
186
|
+
random_state: int = 42,
|
|
187
|
+
):
|
|
188
|
+
self.n_splits = n_splits
|
|
189
|
+
self.shuffle = shuffle
|
|
190
|
+
self.random_state = random_state
|
|
191
|
+
|
|
192
|
+
def validate_classification(
|
|
193
|
+
self,
|
|
194
|
+
model,
|
|
195
|
+
X: np.ndarray,
|
|
196
|
+
y: np.ndarray,
|
|
197
|
+
stratified: bool = True,
|
|
198
|
+
) -> dict:
|
|
199
|
+
"""Run stratified k-fold cross-validation for classification."""
|
|
200
|
+
if stratified:
|
|
201
|
+
cv = StratifiedKFold(
|
|
202
|
+
n_splits=self.n_splits,
|
|
203
|
+
shuffle=self.shuffle,
|
|
204
|
+
random_state=self.random_state,
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
cv = KFold(
|
|
208
|
+
n_splits=self.n_splits,
|
|
209
|
+
shuffle=self.shuffle,
|
|
210
|
+
random_state=self.random_state,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
evaluator = ModelEvaluator("classification")
|
|
214
|
+
fold_metrics = []
|
|
215
|
+
|
|
216
|
+
for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
|
|
217
|
+
X_train, X_val = X[train_idx], X[val_idx]
|
|
218
|
+
y_train, y_val = y[train_idx], y[val_idx]
|
|
219
|
+
|
|
220
|
+
# Clone and train model
|
|
221
|
+
from sklearn.base import clone
|
|
222
|
+
fold_model = clone(model)
|
|
223
|
+
fold_model.fit(X_train, y_train)
|
|
224
|
+
|
|
225
|
+
y_pred = fold_model.predict(X_val)
|
|
226
|
+
y_prob = None
|
|
227
|
+
if hasattr(fold_model, "predict_proba"):
|
|
228
|
+
y_prob = fold_model.predict_proba(X_val)
|
|
229
|
+
|
|
230
|
+
metrics = evaluator.evaluate_classification(y_val, y_pred, y_prob)
|
|
231
|
+
fold_metrics.append(metrics.to_dict())
|
|
232
|
+
|
|
233
|
+
return self._aggregate_cv_results(fold_metrics)
|
|
234
|
+
|
|
235
|
+
def validate_time_series(
|
|
236
|
+
self,
|
|
237
|
+
model,
|
|
238
|
+
X: np.ndarray,
|
|
239
|
+
y: np.ndarray,
|
|
240
|
+
gap: int = 0,
|
|
241
|
+
) -> dict:
|
|
242
|
+
"""Run time series cross-validation."""
|
|
243
|
+
cv = TimeSeriesSplit(n_splits=self.n_splits, gap=gap)
|
|
244
|
+
evaluator = ModelEvaluator("regression")
|
|
245
|
+
fold_metrics = []
|
|
246
|
+
|
|
247
|
+
for train_idx, val_idx in cv.split(X):
|
|
248
|
+
X_train, X_val = X[train_idx], X[val_idx]
|
|
249
|
+
y_train, y_val = y[train_idx], y[val_idx]
|
|
250
|
+
|
|
251
|
+
from sklearn.base import clone
|
|
252
|
+
fold_model = clone(model)
|
|
253
|
+
fold_model.fit(X_train, y_train)
|
|
254
|
+
|
|
255
|
+
y_pred = fold_model.predict(X_val)
|
|
256
|
+
metrics = evaluator.evaluate_regression(y_val, y_pred)
|
|
257
|
+
fold_metrics.append(metrics.to_dict())
|
|
258
|
+
|
|
259
|
+
return self._aggregate_cv_results(fold_metrics)
|
|
260
|
+
|
|
261
|
+
def _aggregate_cv_results(self, fold_metrics: list[dict]) -> dict:
|
|
262
|
+
"""Aggregate metrics across folds."""
|
|
263
|
+
keys = fold_metrics[0].keys()
|
|
264
|
+
aggregated = {}
|
|
265
|
+
|
|
266
|
+
for key in keys:
|
|
267
|
+
values = [m[key] for m in fold_metrics if m[key] is not None]
|
|
268
|
+
if values:
|
|
269
|
+
aggregated[key] = {
|
|
270
|
+
"mean": np.mean(values),
|
|
271
|
+
"std": np.std(values),
|
|
272
|
+
"min": np.min(values),
|
|
273
|
+
"max": np.max(values),
|
|
274
|
+
"values": values,
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return aggregated
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
## Model Comparison
|
|
283
|
+
|
|
284
|
+
### Statistical Comparison
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
from scipy import stats
|
|
288
|
+
import numpy as np
|
|
289
|
+
from dataclasses import dataclass
|
|
290
|
+
|
|
291
|
+
@dataclass
|
|
292
|
+
class ComparisonResult:
|
|
293
|
+
"""Model comparison statistical result."""
|
|
294
|
+
model_a_mean: float
|
|
295
|
+
model_b_mean: float
|
|
296
|
+
difference: float
|
|
297
|
+
p_value: float
|
|
298
|
+
significant: bool
|
|
299
|
+
confidence_interval: tuple[float, float]
|
|
300
|
+
test_used: str
|
|
301
|
+
|
|
302
|
+
class ModelComparator:
|
|
303
|
+
"""Statistical comparison of model performance."""
|
|
304
|
+
|
|
305
|
+
def __init__(self, significance_level: float = 0.05):
|
|
306
|
+
self.significance_level = significance_level
|
|
307
|
+
|
|
308
|
+
def paired_t_test(
|
|
309
|
+
self,
|
|
310
|
+
scores_a: np.ndarray,
|
|
311
|
+
scores_b: np.ndarray,
|
|
312
|
+
) -> ComparisonResult:
|
|
313
|
+
"""Paired t-test for CV score comparison."""
|
|
314
|
+
statistic, p_value = stats.ttest_rel(scores_a, scores_b)
|
|
315
|
+
|
|
316
|
+
differences = scores_a - scores_b
|
|
317
|
+
mean_diff = np.mean(differences)
|
|
318
|
+
std_diff = np.std(differences, ddof=1)
|
|
319
|
+
n = len(differences)
|
|
320
|
+
|
|
321
|
+
# 95% confidence interval
|
|
322
|
+
t_critical = stats.t.ppf(1 - self.significance_level / 2, n - 1)
|
|
323
|
+
margin = t_critical * std_diff / np.sqrt(n)
|
|
324
|
+
ci = (mean_diff - margin, mean_diff + margin)
|
|
325
|
+
|
|
326
|
+
return ComparisonResult(
|
|
327
|
+
model_a_mean=np.mean(scores_a),
|
|
328
|
+
model_b_mean=np.mean(scores_b),
|
|
329
|
+
difference=mean_diff,
|
|
330
|
+
p_value=p_value,
|
|
331
|
+
significant=p_value < self.significance_level,
|
|
332
|
+
confidence_interval=ci,
|
|
333
|
+
test_used="paired_t_test",
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
def wilcoxon_test(
|
|
337
|
+
self,
|
|
338
|
+
scores_a: np.ndarray,
|
|
339
|
+
scores_b: np.ndarray,
|
|
340
|
+
) -> ComparisonResult:
|
|
341
|
+
"""Wilcoxon signed-rank test (non-parametric)."""
|
|
342
|
+
statistic, p_value = stats.wilcoxon(scores_a, scores_b)
|
|
343
|
+
|
|
344
|
+
differences = scores_a - scores_b
|
|
345
|
+
mean_diff = np.mean(differences)
|
|
346
|
+
|
|
347
|
+
# Bootstrap confidence interval
|
|
348
|
+
ci = self._bootstrap_ci(differences)
|
|
349
|
+
|
|
350
|
+
return ComparisonResult(
|
|
351
|
+
model_a_mean=np.mean(scores_a),
|
|
352
|
+
model_b_mean=np.mean(scores_b),
|
|
353
|
+
difference=mean_diff,
|
|
354
|
+
p_value=p_value,
|
|
355
|
+
significant=p_value < self.significance_level,
|
|
356
|
+
confidence_interval=ci,
|
|
357
|
+
test_used="wilcoxon",
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
def mcnemar_test(
|
|
361
|
+
self,
|
|
362
|
+
y_true: np.ndarray,
|
|
363
|
+
pred_a: np.ndarray,
|
|
364
|
+
pred_b: np.ndarray,
|
|
365
|
+
) -> ComparisonResult:
|
|
366
|
+
"""McNemar's test for classifier comparison."""
|
|
367
|
+
# Build contingency table
|
|
368
|
+
correct_a = (pred_a == y_true)
|
|
369
|
+
correct_b = (pred_b == y_true)
|
|
370
|
+
|
|
371
|
+
# b: A correct, B wrong; c: A wrong, B correct
|
|
372
|
+
b = np.sum(correct_a & ~correct_b)
|
|
373
|
+
c = np.sum(~correct_a & correct_b)
|
|
374
|
+
|
|
375
|
+
if b + c < 25:
|
|
376
|
+
# Use exact binomial test for small samples
|
|
377
|
+
p_value = stats.binom_test(b, b + c, 0.5)
|
|
378
|
+
else:
|
|
379
|
+
# Use chi-square approximation
|
|
380
|
+
statistic = (abs(b - c) - 1) ** 2 / (b + c)
|
|
381
|
+
p_value = 1 - stats.chi2.cdf(statistic, 1)
|
|
382
|
+
|
|
383
|
+
acc_a = np.mean(correct_a)
|
|
384
|
+
acc_b = np.mean(correct_b)
|
|
385
|
+
|
|
386
|
+
return ComparisonResult(
|
|
387
|
+
model_a_mean=acc_a,
|
|
388
|
+
model_b_mean=acc_b,
|
|
389
|
+
difference=acc_a - acc_b,
|
|
390
|
+
p_value=p_value,
|
|
391
|
+
significant=p_value < self.significance_level,
|
|
392
|
+
confidence_interval=(None, None),
|
|
393
|
+
test_used="mcnemar",
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
def _bootstrap_ci(
|
|
397
|
+
self,
|
|
398
|
+
data: np.ndarray,
|
|
399
|
+
n_bootstrap: int = 10000,
|
|
400
|
+
alpha: float = 0.05,
|
|
401
|
+
) -> tuple[float, float]:
|
|
402
|
+
"""Calculate bootstrap confidence interval."""
|
|
403
|
+
bootstrapped_means = []
|
|
404
|
+
|
|
405
|
+
for _ in range(n_bootstrap):
|
|
406
|
+
sample = np.random.choice(data, size=len(data), replace=True)
|
|
407
|
+
bootstrapped_means.append(np.mean(sample))
|
|
408
|
+
|
|
409
|
+
lower = np.percentile(bootstrapped_means, alpha / 2 * 100)
|
|
410
|
+
upper = np.percentile(bootstrapped_means, (1 - alpha / 2) * 100)
|
|
411
|
+
|
|
412
|
+
return (lower, upper)
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
---
|
|
416
|
+
|
|
417
|
+
## A/B Testing
|
|
418
|
+
|
|
419
|
+
### Online Experiment Framework
|
|
420
|
+
|
|
421
|
+
```python
|
|
422
|
+
from dataclasses import dataclass
|
|
423
|
+
from datetime import datetime
|
|
424
|
+
from typing import Optional
|
|
425
|
+
import numpy as np
|
|
426
|
+
import hashlib
|
|
427
|
+
import json
|
|
428
|
+
|
|
429
|
+
@dataclass
|
|
430
|
+
class Experiment:
|
|
431
|
+
"""A/B test experiment configuration."""
|
|
432
|
+
experiment_id: str
|
|
433
|
+
name: str
|
|
434
|
+
control_model: str
|
|
435
|
+
treatment_model: str
|
|
436
|
+
traffic_split: float # Fraction to treatment
|
|
437
|
+
start_time: datetime
|
|
438
|
+
end_time: Optional[datetime]
|
|
439
|
+
metrics: list[str]
|
|
440
|
+
minimum_sample_size: int
|
|
441
|
+
status: str = "active"
|
|
442
|
+
|
|
443
|
+
class ABTestRouter:
|
|
444
|
+
"""Route traffic between control and treatment."""
|
|
445
|
+
|
|
446
|
+
def __init__(self, experiment: Experiment):
|
|
447
|
+
self.experiment = experiment
|
|
448
|
+
|
|
449
|
+
def get_variant(self, user_id: str) -> str:
|
|
450
|
+
"""Deterministically assign user to variant."""
|
|
451
|
+
# Hash user_id for consistent assignment
|
|
452
|
+
hash_input = f"{self.experiment.experiment_id}:{user_id}"
|
|
453
|
+
hash_value = int(hashlib.md5(hash_input.encode()).hexdigest(), 16)
|
|
454
|
+
normalized = hash_value / (2**128)
|
|
455
|
+
|
|
456
|
+
if normalized < self.experiment.traffic_split:
|
|
457
|
+
return "treatment"
|
|
458
|
+
return "control"
|
|
459
|
+
|
|
460
|
+
def get_model(self, user_id: str) -> str:
|
|
461
|
+
"""Get model to use for user."""
|
|
462
|
+
variant = self.get_variant(user_id)
|
|
463
|
+
|
|
464
|
+
if variant == "treatment":
|
|
465
|
+
return self.experiment.treatment_model
|
|
466
|
+
return self.experiment.control_model
|
|
467
|
+
|
|
468
|
+
class ABTestAnalyzer:
|
|
469
|
+
"""Analyze A/B test results."""
|
|
470
|
+
|
|
471
|
+
def __init__(self, significance_level: float = 0.05):
|
|
472
|
+
self.significance_level = significance_level
|
|
473
|
+
|
|
474
|
+
def analyze_conversion(
|
|
475
|
+
self,
|
|
476
|
+
control_conversions: int,
|
|
477
|
+
control_total: int,
|
|
478
|
+
treatment_conversions: int,
|
|
479
|
+
treatment_total: int,
|
|
480
|
+
) -> dict:
|
|
481
|
+
"""Analyze conversion rate experiment."""
|
|
482
|
+
control_rate = control_conversions / control_total
|
|
483
|
+
treatment_rate = treatment_conversions / treatment_total
|
|
484
|
+
|
|
485
|
+
# Two-proportion z-test
|
|
486
|
+
pooled_rate = (control_conversions + treatment_conversions) / (
|
|
487
|
+
control_total + treatment_total
|
|
488
|
+
)
|
|
489
|
+
se = np.sqrt(
|
|
490
|
+
pooled_rate * (1 - pooled_rate) * (1/control_total + 1/treatment_total)
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
z_stat = (treatment_rate - control_rate) / se
|
|
494
|
+
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
|
|
495
|
+
|
|
496
|
+
# Relative lift
|
|
497
|
+
lift = (treatment_rate - control_rate) / control_rate if control_rate > 0 else 0
|
|
498
|
+
|
|
499
|
+
# Confidence interval for difference
|
|
500
|
+
se_diff = np.sqrt(
|
|
501
|
+
control_rate * (1 - control_rate) / control_total +
|
|
502
|
+
treatment_rate * (1 - treatment_rate) / treatment_total
|
|
503
|
+
)
|
|
504
|
+
z_critical = stats.norm.ppf(1 - self.significance_level / 2)
|
|
505
|
+
ci = (
|
|
506
|
+
(treatment_rate - control_rate) - z_critical * se_diff,
|
|
507
|
+
(treatment_rate - control_rate) + z_critical * se_diff,
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
return {
|
|
511
|
+
"control_rate": control_rate,
|
|
512
|
+
"treatment_rate": treatment_rate,
|
|
513
|
+
"absolute_difference": treatment_rate - control_rate,
|
|
514
|
+
"relative_lift": lift,
|
|
515
|
+
"p_value": p_value,
|
|
516
|
+
"significant": p_value < self.significance_level,
|
|
517
|
+
"confidence_interval": ci,
|
|
518
|
+
"control_sample_size": control_total,
|
|
519
|
+
"treatment_sample_size": treatment_total,
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
def analyze_continuous_metric(
|
|
523
|
+
self,
|
|
524
|
+
control_values: np.ndarray,
|
|
525
|
+
treatment_values: np.ndarray,
|
|
526
|
+
) -> dict:
|
|
527
|
+
"""Analyze continuous metric (e.g., revenue, time)."""
|
|
528
|
+
control_mean = np.mean(control_values)
|
|
529
|
+
treatment_mean = np.mean(treatment_values)
|
|
530
|
+
|
|
531
|
+
# Welch's t-test (unequal variances)
|
|
532
|
+
statistic, p_value = stats.ttest_ind(
|
|
533
|
+
treatment_values, control_values, equal_var=False
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
lift = (treatment_mean - control_mean) / control_mean if control_mean > 0 else 0
|
|
537
|
+
|
|
538
|
+
# Confidence interval
|
|
539
|
+
se_diff = np.sqrt(
|
|
540
|
+
np.var(control_values) / len(control_values) +
|
|
541
|
+
np.var(treatment_values) / len(treatment_values)
|
|
542
|
+
)
|
|
543
|
+
t_critical = stats.t.ppf(
|
|
544
|
+
1 - self.significance_level / 2,
|
|
545
|
+
min(len(control_values), len(treatment_values)) - 1
|
|
546
|
+
)
|
|
547
|
+
ci = (
|
|
548
|
+
(treatment_mean - control_mean) - t_critical * se_diff,
|
|
549
|
+
(treatment_mean - control_mean) + t_critical * se_diff,
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
return {
|
|
553
|
+
"control_mean": control_mean,
|
|
554
|
+
"treatment_mean": treatment_mean,
|
|
555
|
+
"absolute_difference": treatment_mean - control_mean,
|
|
556
|
+
"relative_lift": lift,
|
|
557
|
+
"p_value": p_value,
|
|
558
|
+
"significant": p_value < self.significance_level,
|
|
559
|
+
"confidence_interval": ci,
|
|
560
|
+
"control_sample_size": len(control_values),
|
|
561
|
+
"treatment_sample_size": len(treatment_values),
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
def calculate_sample_size(
|
|
565
|
+
self,
|
|
566
|
+
baseline_rate: float,
|
|
567
|
+
minimum_detectable_effect: float,
|
|
568
|
+
power: float = 0.8,
|
|
569
|
+
) -> int:
|
|
570
|
+
"""Calculate required sample size per variant."""
|
|
571
|
+
alpha = self.significance_level
|
|
572
|
+
z_alpha = stats.norm.ppf(1 - alpha / 2)
|
|
573
|
+
z_beta = stats.norm.ppf(power)
|
|
574
|
+
|
|
575
|
+
p1 = baseline_rate
|
|
576
|
+
p2 = baseline_rate * (1 + minimum_detectable_effect)
|
|
577
|
+
|
|
578
|
+
p_bar = (p1 + p2) / 2
|
|
579
|
+
|
|
580
|
+
n = (
|
|
581
|
+
(z_alpha * np.sqrt(2 * p_bar * (1 - p_bar)) +
|
|
582
|
+
z_beta * np.sqrt(p1 * (1 - p1) + p2 * (1 - p2))) ** 2 /
|
|
583
|
+
(p2 - p1) ** 2
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
return int(np.ceil(n))
|
|
587
|
+
```
|
|
588
|
+
|
|
589
|
+
---
|
|
590
|
+
|
|
591
|
+
## Shadow Deployment
|
|
592
|
+
|
|
593
|
+
### Shadow Mode Pipeline
|
|
594
|
+
|
|
595
|
+
```python
|
|
596
|
+
from dataclasses import dataclass
|
|
597
|
+
from datetime import datetime
|
|
598
|
+
from typing import Any, Optional
|
|
599
|
+
import logging
|
|
600
|
+
import json
|
|
601
|
+
|
|
602
|
+
logger = logging.getLogger(__name__)
|
|
603
|
+
|
|
604
|
+
@dataclass
|
|
605
|
+
class PredictionComparison:
|
|
606
|
+
"""Comparison of production and shadow predictions."""
|
|
607
|
+
request_id: str
|
|
608
|
+
timestamp: datetime
|
|
609
|
+
production_prediction: Any
|
|
610
|
+
shadow_prediction: Any
|
|
611
|
+
production_latency_ms: float
|
|
612
|
+
shadow_latency_ms: float
|
|
613
|
+
agreement: bool
|
|
614
|
+
features: Optional[dict] = None
|
|
615
|
+
|
|
616
|
+
class ShadowDeployment:
|
|
617
|
+
"""Shadow deployment for model validation."""
|
|
618
|
+
|
|
619
|
+
def __init__(
|
|
620
|
+
self,
|
|
621
|
+
production_model,
|
|
622
|
+
shadow_model,
|
|
623
|
+
log_path: str = "/var/log/shadow_predictions.jsonl",
|
|
624
|
+
):
|
|
625
|
+
self.production_model = production_model
|
|
626
|
+
self.shadow_model = shadow_model
|
|
627
|
+
self.log_path = log_path
|
|
628
|
+
self.comparisons: list[PredictionComparison] = []
|
|
629
|
+
|
|
630
|
+
def predict(
|
|
631
|
+
self,
|
|
632
|
+
features: dict,
|
|
633
|
+
request_id: str = None,
|
|
634
|
+
) -> Any:
|
|
635
|
+
"""Get production prediction, run shadow in parallel."""
|
|
636
|
+
import time
|
|
637
|
+
import uuid
|
|
638
|
+
import concurrent.futures
|
|
639
|
+
|
|
640
|
+
request_id = request_id or str(uuid.uuid4())
|
|
641
|
+
|
|
642
|
+
# Production prediction (synchronous, used for response)
|
|
643
|
+
prod_start = time.time()
|
|
644
|
+
production_pred = self.production_model.predict(features)
|
|
645
|
+
prod_latency = (time.time() - prod_start) * 1000
|
|
646
|
+
|
|
647
|
+
# Shadow prediction (async, logged but not returned)
|
|
648
|
+
def run_shadow():
|
|
649
|
+
shadow_start = time.time()
|
|
650
|
+
shadow_pred = self.shadow_model.predict(features)
|
|
651
|
+
shadow_latency = (time.time() - shadow_start) * 1000
|
|
652
|
+
return shadow_pred, shadow_latency
|
|
653
|
+
|
|
654
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
|
655
|
+
future = executor.submit(run_shadow)
|
|
656
|
+
|
|
657
|
+
try:
|
|
658
|
+
shadow_pred, shadow_latency = future.result(timeout=5.0)
|
|
659
|
+
|
|
660
|
+
comparison = PredictionComparison(
|
|
661
|
+
request_id=request_id,
|
|
662
|
+
timestamp=datetime.utcnow(),
|
|
663
|
+
production_prediction=production_pred,
|
|
664
|
+
shadow_prediction=shadow_pred,
|
|
665
|
+
production_latency_ms=prod_latency,
|
|
666
|
+
shadow_latency_ms=shadow_latency,
|
|
667
|
+
agreement=self._check_agreement(production_pred, shadow_pred),
|
|
668
|
+
features=features,
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
self._log_comparison(comparison)
|
|
672
|
+
|
|
673
|
+
except concurrent.futures.TimeoutError:
|
|
674
|
+
logger.warning(f"Shadow prediction timed out for {request_id}")
|
|
675
|
+
|
|
676
|
+
return production_pred
|
|
677
|
+
|
|
678
|
+
def _check_agreement(self, prod_pred: Any, shadow_pred: Any) -> bool:
|
|
679
|
+
"""Check if predictions agree."""
|
|
680
|
+
if isinstance(prod_pred, (list, np.ndarray)):
|
|
681
|
+
return np.allclose(prod_pred, shadow_pred, rtol=1e-3)
|
|
682
|
+
return prod_pred == shadow_pred
|
|
683
|
+
|
|
684
|
+
def _log_comparison(self, comparison: PredictionComparison) -> None:
|
|
685
|
+
"""Log comparison to file."""
|
|
686
|
+
log_entry = {
|
|
687
|
+
"request_id": comparison.request_id,
|
|
688
|
+
"timestamp": comparison.timestamp.isoformat(),
|
|
689
|
+
"production_prediction": str(comparison.production_prediction),
|
|
690
|
+
"shadow_prediction": str(comparison.shadow_prediction),
|
|
691
|
+
"production_latency_ms": comparison.production_latency_ms,
|
|
692
|
+
"shadow_latency_ms": comparison.shadow_latency_ms,
|
|
693
|
+
"agreement": comparison.agreement,
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
with open(self.log_path, "a") as f:
|
|
697
|
+
f.write(json.dumps(log_entry) + "\n")
|
|
698
|
+
|
|
699
|
+
self.comparisons.append(comparison)
|
|
700
|
+
|
|
701
|
+
def analyze_shadow_performance(self) -> dict:
|
|
702
|
+
"""Analyze shadow model performance."""
|
|
703
|
+
if not self.comparisons:
|
|
704
|
+
return {}
|
|
705
|
+
|
|
706
|
+
agreements = [c.agreement for c in self.comparisons]
|
|
707
|
+
prod_latencies = [c.production_latency_ms for c in self.comparisons]
|
|
708
|
+
shadow_latencies = [c.shadow_latency_ms for c in self.comparisons]
|
|
709
|
+
|
|
710
|
+
return {
|
|
711
|
+
"total_comparisons": len(self.comparisons),
|
|
712
|
+
"agreement_rate": np.mean(agreements),
|
|
713
|
+
"production_latency_p50": np.percentile(prod_latencies, 50),
|
|
714
|
+
"production_latency_p99": np.percentile(prod_latencies, 99),
|
|
715
|
+
"shadow_latency_p50": np.percentile(shadow_latencies, 50),
|
|
716
|
+
"shadow_latency_p99": np.percentile(shadow_latencies, 99),
|
|
717
|
+
"latency_difference_mean": np.mean(
|
|
718
|
+
[s - p for s, p in zip(shadow_latencies, prod_latencies)]
|
|
719
|
+
),
|
|
720
|
+
}
|
|
721
|
+
```
|
|
722
|
+
|
|
723
|
+
---
|
|
724
|
+
|
|
725
|
+
## Validation Pipeline Integration
|
|
726
|
+
|
|
727
|
+
### Complete Validation Workflow
|
|
728
|
+
|
|
729
|
+
```python
|
|
730
|
+
from enum import Enum
|
|
731
|
+
from dataclasses import dataclass
|
|
732
|
+
from typing import Optional
|
|
733
|
+
|
|
734
|
+
class ValidationStatus(Enum):
|
|
735
|
+
PASSED = "passed"
|
|
736
|
+
FAILED = "failed"
|
|
737
|
+
WARNING = "warning"
|
|
738
|
+
|
|
739
|
+
@dataclass
|
|
740
|
+
class ValidationResult:
|
|
741
|
+
"""Result of a validation check."""
|
|
742
|
+
check_name: str
|
|
743
|
+
status: ValidationStatus
|
|
744
|
+
message: str
|
|
745
|
+
details: Optional[dict] = None
|
|
746
|
+
|
|
747
|
+
class ModelValidator:
|
|
748
|
+
"""Complete model validation workflow."""
|
|
749
|
+
|
|
750
|
+
def __init__(
|
|
751
|
+
self,
|
|
752
|
+
accuracy_threshold: float = 0.8,
|
|
753
|
+
latency_threshold_ms: float = 100,
|
|
754
|
+
drift_threshold: float = 0.2,
|
|
755
|
+
):
|
|
756
|
+
self.accuracy_threshold = accuracy_threshold
|
|
757
|
+
self.latency_threshold_ms = latency_threshold_ms
|
|
758
|
+
self.drift_threshold = drift_threshold
|
|
759
|
+
self.results: list[ValidationResult] = []
|
|
760
|
+
|
|
761
|
+
def validate_performance(
|
|
762
|
+
self,
|
|
763
|
+
y_true: np.ndarray,
|
|
764
|
+
y_pred: np.ndarray,
|
|
765
|
+
) -> ValidationResult:
|
|
766
|
+
"""Validate model performance metrics."""
|
|
767
|
+
evaluator = ModelEvaluator("classification")
|
|
768
|
+
metrics = evaluator.evaluate_classification(y_true, y_pred)
|
|
769
|
+
|
|
770
|
+
if metrics.accuracy >= self.accuracy_threshold:
|
|
771
|
+
status = ValidationStatus.PASSED
|
|
772
|
+
message = f"Accuracy {metrics.accuracy:.4f} meets threshold"
|
|
773
|
+
else:
|
|
774
|
+
status = ValidationStatus.FAILED
|
|
775
|
+
message = f"Accuracy {metrics.accuracy:.4f} below threshold {self.accuracy_threshold}"
|
|
776
|
+
|
|
777
|
+
result = ValidationResult(
|
|
778
|
+
check_name="performance",
|
|
779
|
+
status=status,
|
|
780
|
+
message=message,
|
|
781
|
+
details=metrics.to_dict(),
|
|
782
|
+
)
|
|
783
|
+
self.results.append(result)
|
|
784
|
+
return result
|
|
785
|
+
|
|
786
|
+
def validate_latency(
|
|
787
|
+
self,
|
|
788
|
+
model,
|
|
789
|
+
sample_input: np.ndarray,
|
|
790
|
+
n_iterations: int = 100,
|
|
791
|
+
) -> ValidationResult:
|
|
792
|
+
"""Validate inference latency."""
|
|
793
|
+
import time
|
|
794
|
+
|
|
795
|
+
latencies = []
|
|
796
|
+
for _ in range(n_iterations):
|
|
797
|
+
start = time.time()
|
|
798
|
+
model.predict(sample_input)
|
|
799
|
+
latencies.append((time.time() - start) * 1000)
|
|
800
|
+
|
|
801
|
+
p50 = np.percentile(latencies, 50)
|
|
802
|
+
p99 = np.percentile(latencies, 99)
|
|
803
|
+
|
|
804
|
+
if p99 <= self.latency_threshold_ms:
|
|
805
|
+
status = ValidationStatus.PASSED
|
|
806
|
+
message = f"P99 latency {p99:.2f}ms meets threshold"
|
|
807
|
+
elif p50 <= self.latency_threshold_ms:
|
|
808
|
+
status = ValidationStatus.WARNING
|
|
809
|
+
message = f"P50 OK but P99 {p99:.2f}ms exceeds threshold"
|
|
810
|
+
else:
|
|
811
|
+
status = ValidationStatus.FAILED
|
|
812
|
+
message = f"P99 latency {p99:.2f}ms exceeds threshold"
|
|
813
|
+
|
|
814
|
+
result = ValidationResult(
|
|
815
|
+
check_name="latency",
|
|
816
|
+
status=status,
|
|
817
|
+
message=message,
|
|
818
|
+
details={"p50_ms": p50, "p99_ms": p99, "mean_ms": np.mean(latencies)},
|
|
819
|
+
)
|
|
820
|
+
self.results.append(result)
|
|
821
|
+
return result
|
|
822
|
+
|
|
823
|
+
def validate_data_compatibility(
|
|
824
|
+
self,
|
|
825
|
+
model,
|
|
826
|
+
expected_features: list[str],
|
|
827
|
+
sample_data: pd.DataFrame,
|
|
828
|
+
) -> ValidationResult:
|
|
829
|
+
"""Validate model accepts expected input format."""
|
|
830
|
+
missing_features = set(expected_features) - set(sample_data.columns)
|
|
831
|
+
extra_features = set(sample_data.columns) - set(expected_features)
|
|
832
|
+
|
|
833
|
+
if missing_features:
|
|
834
|
+
status = ValidationStatus.FAILED
|
|
835
|
+
message = f"Missing features: {missing_features}"
|
|
836
|
+
elif extra_features:
|
|
837
|
+
status = ValidationStatus.WARNING
|
|
838
|
+
message = f"Extra features will be ignored: {extra_features}"
|
|
839
|
+
else:
|
|
840
|
+
status = ValidationStatus.PASSED
|
|
841
|
+
message = "All expected features present"
|
|
842
|
+
|
|
843
|
+
# Try inference
|
|
844
|
+
try:
|
|
845
|
+
model.predict(sample_data[expected_features].head(1))
|
|
846
|
+
except Exception as e:
|
|
847
|
+
status = ValidationStatus.FAILED
|
|
848
|
+
message = f"Inference failed: {str(e)}"
|
|
849
|
+
|
|
850
|
+
result = ValidationResult(
|
|
851
|
+
check_name="data_compatibility",
|
|
852
|
+
status=status,
|
|
853
|
+
message=message,
|
|
854
|
+
details={
|
|
855
|
+
"missing_features": list(missing_features),
|
|
856
|
+
"extra_features": list(extra_features),
|
|
857
|
+
},
|
|
858
|
+
)
|
|
859
|
+
self.results.append(result)
|
|
860
|
+
return result
|
|
861
|
+
|
|
862
|
+
def validate_vs_baseline(
|
|
863
|
+
self,
|
|
864
|
+
y_true: np.ndarray,
|
|
865
|
+
new_pred: np.ndarray,
|
|
866
|
+
baseline_pred: np.ndarray,
|
|
867
|
+
) -> ValidationResult:
|
|
868
|
+
"""Validate new model vs baseline."""
|
|
869
|
+
comparator = ModelComparator()
|
|
870
|
+
comparison = comparator.mcnemar_test(y_true, new_pred, baseline_pred)
|
|
871
|
+
|
|
872
|
+
new_acc = accuracy_score(y_true, new_pred)
|
|
873
|
+
baseline_acc = accuracy_score(y_true, baseline_pred)
|
|
874
|
+
|
|
875
|
+
if new_acc >= baseline_acc:
|
|
876
|
+
if comparison.significant:
|
|
877
|
+
status = ValidationStatus.PASSED
|
|
878
|
+
message = f"Significant improvement: {new_acc:.4f} vs {baseline_acc:.4f}"
|
|
879
|
+
else:
|
|
880
|
+
status = ValidationStatus.WARNING
|
|
881
|
+
message = f"Improvement not significant: {new_acc:.4f} vs {baseline_acc:.4f}"
|
|
882
|
+
else:
|
|
883
|
+
if comparison.significant:
|
|
884
|
+
status = ValidationStatus.FAILED
|
|
885
|
+
message = f"Significant regression: {new_acc:.4f} vs {baseline_acc:.4f}"
|
|
886
|
+
else:
|
|
887
|
+
status = ValidationStatus.WARNING
|
|
888
|
+
message = f"Minor regression: {new_acc:.4f} vs {baseline_acc:.4f}"
|
|
889
|
+
|
|
890
|
+
result = ValidationResult(
|
|
891
|
+
check_name="baseline_comparison",
|
|
892
|
+
status=status,
|
|
893
|
+
message=message,
|
|
894
|
+
details={
|
|
895
|
+
"new_accuracy": new_acc,
|
|
896
|
+
"baseline_accuracy": baseline_acc,
|
|
897
|
+
"p_value": comparison.p_value,
|
|
898
|
+
},
|
|
899
|
+
)
|
|
900
|
+
self.results.append(result)
|
|
901
|
+
return result
|
|
902
|
+
|
|
903
|
+
def get_summary(self) -> dict:
|
|
904
|
+
"""Get validation summary."""
|
|
905
|
+
passed = sum(1 for r in self.results if r.status == ValidationStatus.PASSED)
|
|
906
|
+
warnings = sum(1 for r in self.results if r.status == ValidationStatus.WARNING)
|
|
907
|
+
failed = sum(1 for r in self.results if r.status == ValidationStatus.FAILED)
|
|
908
|
+
|
|
909
|
+
overall_status = (
|
|
910
|
+
ValidationStatus.FAILED if failed > 0
|
|
911
|
+
else ValidationStatus.WARNING if warnings > 0
|
|
912
|
+
else ValidationStatus.PASSED
|
|
913
|
+
)
|
|
914
|
+
|
|
915
|
+
return {
|
|
916
|
+
"overall_status": overall_status.value,
|
|
917
|
+
"passed": passed,
|
|
918
|
+
"warnings": warnings,
|
|
919
|
+
"failed": failed,
|
|
920
|
+
"results": [
|
|
921
|
+
{
|
|
922
|
+
"check": r.check_name,
|
|
923
|
+
"status": r.status.value,
|
|
924
|
+
"message": r.message,
|
|
925
|
+
}
|
|
926
|
+
for r in self.results
|
|
927
|
+
],
|
|
928
|
+
}
|
|
929
|
+
```
|
|
930
|
+
|
|
931
|
+
---
|
|
932
|
+
|
|
933
|
+
## Best Practices
|
|
934
|
+
|
|
935
|
+
### Validation Checklist
|
|
936
|
+
|
|
937
|
+
```python
|
|
938
|
+
VALIDATION_CHECKLIST = {
|
|
939
|
+
"offline": [
|
|
940
|
+
"Accuracy/performance metrics meet threshold",
|
|
941
|
+
"Cross-validation shows consistent performance",
|
|
942
|
+
"Model outperforms or matches baseline",
|
|
943
|
+
"Metrics stable across data segments",
|
|
944
|
+
],
|
|
945
|
+
"pre_deployment": [
|
|
946
|
+
"Inference latency within SLA",
|
|
947
|
+
"Memory usage acceptable",
|
|
948
|
+
"Input/output schema validated",
|
|
949
|
+
"Model serialization/loading works",
|
|
950
|
+
],
|
|
951
|
+
"shadow": [
|
|
952
|
+
"Shadow predictions logged successfully",
|
|
953
|
+
"Agreement rate with production acceptable",
|
|
954
|
+
"No latency regression",
|
|
955
|
+
"Error rate within bounds",
|
|
956
|
+
],
|
|
957
|
+
"ab_test": [
|
|
958
|
+
"Sufficient sample size reached",
|
|
959
|
+
"Statistical significance achieved",
|
|
960
|
+
"No negative impact on guardrail metrics",
|
|
961
|
+
"Business metrics improved",
|
|
962
|
+
],
|
|
963
|
+
}
|
|
964
|
+
```
|
|
965
|
+
|
|
966
|
+
---
|
|
967
|
+
|
|
968
|
+
## Related References
|
|
969
|
+
|
|
970
|
+
- `training-pipelines.md` - Model training before validation
|
|
971
|
+
- `experiment-tracking.md` - Logging validation results
|
|
972
|
+
- `pipeline-orchestration.md` - Automated validation workflows
|
|
973
|
+
- `feature-engineering.md` - Feature validation
|
|
974
|
+
|
|
975
|
+
## Cross-Reference Skills
|
|
976
|
+
|
|
977
|
+
- **Data Engineer** - Data quality validation
|
|
978
|
+
- **DevOps Engineer** - Deployment pipeline integration
|