aigroup-workflow 2.1.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex/AGENTS.md +1 -1
- package/CLAUDE.md +1 -4
- package/README.md +333 -333
- package/cli/commands/init.mjs +20 -6
- package/cli/utils/scaffold.mjs +39 -9
- package/docs/red-flags.md +1 -1
- package/docs/rules/entropy.md +1 -1
- package/docs/rules/performance.md +1 -1
- package/docs/workflow-pipeline.md +1 -0
- package/manifests/install-modules.json +223 -133
- package/package.json +39 -39
- package/scripts/orchestration/lib/orchestrator.cjs +34 -0
- package/scripts/orchestration/session.cjs +24 -1
- package/skills/ai-ml/fine-tuning-expert/SKILL.md +162 -0
- package/skills/ai-ml/fine-tuning-expert/references/dataset-preparation.md +540 -0
- package/skills/ai-ml/fine-tuning-expert/references/deployment-optimization.md +673 -0
- package/skills/ai-ml/fine-tuning-expert/references/evaluation-metrics.md +597 -0
- package/skills/ai-ml/fine-tuning-expert/references/hyperparameter-tuning.md +565 -0
- package/skills/ai-ml/fine-tuning-expert/references/lora-peft.md +347 -0
- package/skills/ai-ml/ml-pipeline/SKILL.md +159 -0
- package/skills/ai-ml/ml-pipeline/references/experiment-tracking.md +833 -0
- package/skills/ai-ml/ml-pipeline/references/feature-engineering.md +631 -0
- package/skills/ai-ml/ml-pipeline/references/model-validation.md +978 -0
- package/skills/ai-ml/ml-pipeline/references/pipeline-orchestration.md +907 -0
- package/skills/ai-ml/ml-pipeline/references/training-pipelines.md +782 -0
- package/skills/ai-ml/rag-architect/SKILL.md +194 -0
- package/skills/ai-ml/rag-architect/references/chunking-strategies.md +878 -0
- package/skills/ai-ml/rag-architect/references/embedding-models.md +561 -0
- package/skills/ai-ml/rag-architect/references/rag-evaluation.md +833 -0
- package/skills/ai-ml/rag-architect/references/retrieval-optimization.md +795 -0
- package/skills/ai-ml/rag-architect/references/vector-databases.md +589 -0
- package/skills/ai-ml/spark-engineer/SKILL.md +148 -0
- package/skills/ai-ml/spark-engineer/references/partitioning-caching.md +543 -0
- package/skills/ai-ml/spark-engineer/references/performance-tuning.md +544 -0
- package/skills/ai-ml/spark-engineer/references/rdd-operations.md +599 -0
- package/skills/ai-ml/spark-engineer/references/spark-sql-dataframes.md +474 -0
- package/skills/ai-ml/spark-engineer/references/streaming-patterns.md +786 -0
- package/skills/backend/api-designer/SKILL.md +217 -0
- package/skills/backend/api-designer/references/error-handling.md +541 -0
- package/skills/backend/api-designer/references/openapi.md +824 -0
- package/skills/backend/api-designer/references/pagination.md +494 -0
- package/skills/backend/api-designer/references/rest-patterns.md +335 -0
- package/skills/backend/api-designer/references/versioning.md +391 -0
- package/skills/backend/architecture-designer/SKILL.md +117 -0
- package/skills/backend/architecture-designer/references/adr-template.md +116 -0
- package/skills/backend/architecture-designer/references/architecture-patterns.md +111 -0
- package/skills/backend/architecture-designer/references/database-selection.md +102 -0
- package/skills/backend/architecture-designer/references/nfr-checklist.md +112 -0
- package/skills/backend/architecture-designer/references/system-design.md +100 -0
- package/skills/backend/code-documenter/SKILL.md +147 -0
- package/skills/backend/code-documenter/references/api-docs-fastapi-django.md +166 -0
- package/skills/backend/code-documenter/references/api-docs-nestjs-express.md +220 -0
- package/skills/backend/code-documenter/references/coverage-reports.md +125 -0
- package/skills/backend/code-documenter/references/documentation-systems.md +333 -0
- package/skills/backend/code-documenter/references/interactive-api-docs.md +531 -0
- package/skills/backend/code-documenter/references/python-docstrings.md +121 -0
- package/skills/backend/code-documenter/references/typescript-jsdoc.md +145 -0
- package/skills/backend/code-documenter/references/user-guides-tutorials.md +530 -0
- package/skills/backend/debugging-wizard/SKILL.md +105 -0
- package/skills/backend/debugging-wizard/references/common-patterns.md +132 -0
- package/skills/backend/debugging-wizard/references/debugging-tools.md +140 -0
- package/skills/backend/debugging-wizard/references/quick-fixes.md +177 -0
- package/skills/backend/debugging-wizard/references/strategies.md +142 -0
- package/skills/backend/debugging-wizard/references/systematic-debugging.md +367 -0
- package/skills/backend/feature-forge/SKILL.md +98 -0
- package/skills/backend/feature-forge/references/acceptance-criteria.md +104 -0
- package/skills/backend/feature-forge/references/ears-syntax.md +99 -0
- package/skills/backend/feature-forge/references/interview-questions.md +150 -0
- package/skills/backend/feature-forge/references/pre-discovery-subagents.md +54 -0
- package/skills/backend/feature-forge/references/specification-template.md +103 -0
- package/skills/backend/fullstack-guardian/SKILL.md +105 -0
- package/skills/backend/fullstack-guardian/references/api-design-standards.md +307 -0
- package/skills/backend/fullstack-guardian/references/architecture-decisions.md +350 -0
- package/skills/backend/fullstack-guardian/references/backend-patterns.md +237 -0
- package/skills/backend/fullstack-guardian/references/common-patterns.md +134 -0
- package/skills/backend/fullstack-guardian/references/deliverables-checklist.md +354 -0
- package/skills/backend/fullstack-guardian/references/design-template.md +91 -0
- package/skills/backend/fullstack-guardian/references/error-handling.md +135 -0
- package/skills/backend/fullstack-guardian/references/frontend-patterns.md +340 -0
- package/skills/backend/fullstack-guardian/references/integration-patterns.md +333 -0
- package/skills/backend/fullstack-guardian/references/security-checklist.md +106 -0
- package/skills/backend/graphql-architect/SKILL.md +146 -0
- package/skills/backend/graphql-architect/references/federation.md +418 -0
- package/skills/backend/graphql-architect/references/migration-from-rest.md +1141 -0
- package/skills/backend/graphql-architect/references/resolvers.md +425 -0
- package/skills/backend/graphql-architect/references/schema-design.md +393 -0
- package/skills/backend/graphql-architect/references/security.md +569 -0
- package/skills/backend/graphql-architect/references/subscriptions.md +510 -0
- package/skills/backend/legacy-modernizer/SKILL.md +137 -0
- package/skills/backend/legacy-modernizer/references/legacy-testing.md +381 -0
- package/skills/backend/legacy-modernizer/references/migration-strategies.md +423 -0
- package/skills/backend/legacy-modernizer/references/refactoring-patterns.md +395 -0
- package/skills/backend/legacy-modernizer/references/strangler-fig-pattern.md +281 -0
- package/skills/backend/legacy-modernizer/references/system-assessment.md +487 -0
- package/skills/backend/microservices-architect/SKILL.md +164 -0
- package/skills/backend/microservices-architect/references/communication.md +499 -0
- package/skills/backend/microservices-architect/references/data.md +721 -0
- package/skills/backend/microservices-architect/references/decomposition.md +344 -0
- package/skills/backend/microservices-architect/references/observability.md +805 -0
- package/skills/backend/microservices-architect/references/patterns.md +603 -0
- package/skills/database/database-optimizer/SKILL.md +147 -0
- package/skills/database/database-optimizer/references/index-strategies.md +331 -0
- package/skills/database/database-optimizer/references/monitoring-analysis.md +501 -0
- package/skills/database/database-optimizer/references/mysql-tuning.md +452 -0
- package/skills/database/database-optimizer/references/postgresql-tuning.md +413 -0
- package/skills/database/database-optimizer/references/query-optimization.md +251 -0
- package/skills/database/postgres-pro/SKILL.md +152 -0
- package/skills/database/postgres-pro/references/extensions.md +404 -0
- package/skills/database/postgres-pro/references/jsonb.md +321 -0
- package/skills/database/postgres-pro/references/maintenance.md +481 -0
- package/skills/database/postgres-pro/references/performance.md +265 -0
- package/skills/database/postgres-pro/references/replication.md +446 -0
- package/skills/database/sql-pro/SKILL.md +129 -0
- package/skills/database/sql-pro/references/database-design.md +402 -0
- package/skills/database/sql-pro/references/dialect-differences.md +419 -0
- package/skills/database/sql-pro/references/optimization.md +384 -0
- package/skills/database/sql-pro/references/query-patterns.md +285 -0
- package/skills/database/sql-pro/references/window-functions.md +328 -0
- package/skills/dotnet/csharp-developer/SKILL.md +125 -0
- package/skills/dotnet/csharp-developer/references/aspnet-core.md +394 -0
- package/skills/dotnet/csharp-developer/references/blazor.md +553 -0
- package/skills/dotnet/csharp-developer/references/entity-framework.md +409 -0
- package/skills/dotnet/csharp-developer/references/modern-csharp.md +248 -0
- package/skills/dotnet/csharp-developer/references/performance.md +498 -0
- package/skills/dotnet/dotnet-core-expert/SKILL.md +138 -0
- package/skills/dotnet/dotnet-core-expert/references/authentication.md +546 -0
- package/skills/dotnet/dotnet-core-expert/references/clean-architecture.md +455 -0
- package/skills/dotnet/dotnet-core-expert/references/cloud-native.md +548 -0
- package/skills/dotnet/dotnet-core-expert/references/entity-framework.md +440 -0
- package/skills/dotnet/dotnet-core-expert/references/minimal-apis.md +319 -0
- package/skills/frontend/angular-architect/SKILL.md +152 -0
- package/skills/frontend/angular-architect/references/components.md +297 -0
- package/skills/frontend/angular-architect/references/ngrx.md +401 -0
- package/skills/frontend/angular-architect/references/routing.md +361 -0
- package/skills/frontend/angular-architect/references/rxjs.md +319 -0
- package/skills/frontend/angular-architect/references/testing.md +405 -0
- package/skills/frontend/flutter-expert/SKILL.md +138 -0
- package/skills/frontend/flutter-expert/references/bloc-state.md +259 -0
- package/skills/frontend/flutter-expert/references/gorouter-navigation.md +119 -0
- package/skills/frontend/flutter-expert/references/performance.md +99 -0
- package/skills/frontend/flutter-expert/references/project-structure.md +118 -0
- package/skills/frontend/flutter-expert/references/riverpod-state.md +130 -0
- package/skills/frontend/flutter-expert/references/widget-patterns.md +123 -0
- package/skills/frontend/nextjs-developer/SKILL.md +143 -0
- package/skills/frontend/nextjs-developer/references/app-router.md +311 -0
- package/skills/frontend/nextjs-developer/references/data-fetching.md +482 -0
- package/skills/frontend/nextjs-developer/references/deployment.md +545 -0
- package/skills/frontend/nextjs-developer/references/server-actions.md +462 -0
- package/skills/frontend/nextjs-developer/references/server-components.md +384 -0
- package/skills/frontend/react-expert/SKILL.md +149 -0
- package/skills/frontend/react-expert/references/hooks-patterns.md +162 -0
- package/skills/frontend/react-expert/references/migration-class-to-modern.md +1119 -0
- package/skills/frontend/react-expert/references/performance.md +168 -0
- package/skills/frontend/react-expert/references/react-19-features.md +174 -0
- package/skills/frontend/react-expert/references/server-components.md +143 -0
- package/skills/frontend/react-expert/references/state-management.md +171 -0
- package/skills/frontend/react-expert/references/testing-react.md +174 -0
- package/skills/frontend/react-native-expert/SKILL.md +185 -0
- package/skills/frontend/react-native-expert/references/expo-router.md +187 -0
- package/skills/frontend/react-native-expert/references/list-optimization.md +204 -0
- package/skills/frontend/react-native-expert/references/platform-handling.md +188 -0
- package/skills/frontend/react-native-expert/references/project-structure.md +171 -0
- package/skills/frontend/react-native-expert/references/storage-hooks.md +173 -0
- package/skills/frontend/vue-expert/SKILL.md +98 -0
- package/skills/frontend/vue-expert/references/build-tooling.md +480 -0
- package/skills/frontend/vue-expert/references/components.md +448 -0
- package/skills/frontend/vue-expert/references/composition-api.md +299 -0
- package/skills/frontend/vue-expert/references/mobile-hybrid.md +636 -0
- package/skills/frontend/vue-expert/references/nuxt.md +669 -0
- package/skills/frontend/vue-expert/references/state-management.md +449 -0
- package/skills/frontend/vue-expert/references/typescript.md +584 -0
- package/skills/frontend/vue-expert-js/SKILL.md +167 -0
- package/skills/frontend/vue-expert-js/references/component-architecture.md +219 -0
- package/skills/frontend/vue-expert-js/references/composables-patterns.md +183 -0
- package/skills/frontend/vue-expert-js/references/jsdoc-typing.md +535 -0
- package/skills/frontend/vue-expert-js/references/state-management.md +249 -0
- package/skills/frontend/vue-expert-js/references/testing-patterns.md +237 -0
- package/skills/go-rust-cpp/cpp-pro/SKILL.md +115 -0
- package/skills/go-rust-cpp/cpp-pro/references/build-tooling.md +440 -0
- package/skills/go-rust-cpp/cpp-pro/references/concurrency.md +437 -0
- package/skills/go-rust-cpp/cpp-pro/references/memory-performance.md +397 -0
- package/skills/go-rust-cpp/cpp-pro/references/modern-cpp.md +304 -0
- package/skills/go-rust-cpp/cpp-pro/references/templates.md +357 -0
- package/skills/go-rust-cpp/golang-pro/SKILL.md +122 -0
- package/skills/go-rust-cpp/golang-pro/references/concurrency.md +329 -0
- package/skills/go-rust-cpp/golang-pro/references/generics.md +442 -0
- package/skills/go-rust-cpp/golang-pro/references/interfaces.md +432 -0
- package/skills/go-rust-cpp/golang-pro/references/project-structure.md +477 -0
- package/skills/go-rust-cpp/golang-pro/references/testing.md +451 -0
- package/skills/go-rust-cpp/rust-engineer/SKILL.md +167 -0
- package/skills/go-rust-cpp/rust-engineer/references/async.md +458 -0
- package/skills/go-rust-cpp/rust-engineer/references/error-handling.md +334 -0
- package/skills/go-rust-cpp/rust-engineer/references/ownership.md +278 -0
- package/skills/go-rust-cpp/rust-engineer/references/testing.md +470 -0
- package/skills/go-rust-cpp/rust-engineer/references/traits.md +413 -0
- package/skills/infra/cli-developer/SKILL.md +113 -0
- package/skills/infra/cli-developer/references/design-patterns.md +221 -0
- package/skills/infra/cli-developer/references/go-cli.md +540 -0
- package/skills/infra/cli-developer/references/node-cli.md +383 -0
- package/skills/infra/cli-developer/references/python-cli.md +422 -0
- package/skills/infra/cli-developer/references/ux-patterns.md +448 -0
- package/skills/infra/cloud-architect/SKILL.md +216 -0
- package/skills/infra/cloud-architect/references/aws.md +394 -0
- package/skills/infra/cloud-architect/references/azure.md +562 -0
- package/skills/infra/cloud-architect/references/cost.md +582 -0
- package/skills/infra/cloud-architect/references/gcp.md +633 -0
- package/skills/infra/cloud-architect/references/multi-cloud.md +483 -0
- package/skills/infra/devops-engineer/SKILL.md +144 -0
- package/skills/infra/devops-engineer/references/deployment-strategies.md +241 -0
- package/skills/infra/devops-engineer/references/docker-patterns.md +113 -0
- package/skills/infra/devops-engineer/references/github-actions.md +139 -0
- package/skills/infra/devops-engineer/references/incident-response.md +331 -0
- package/skills/infra/devops-engineer/references/kubernetes.md +154 -0
- package/skills/infra/devops-engineer/references/platform-engineering.md +417 -0
- package/skills/infra/devops-engineer/references/release-automation.md +527 -0
- package/skills/infra/devops-engineer/references/terraform-iac.md +141 -0
- package/skills/infra/kubernetes-specialist/SKILL.md +241 -0
- package/skills/infra/kubernetes-specialist/references/configuration.md +452 -0
- package/skills/infra/kubernetes-specialist/references/cost-optimization.md +458 -0
- package/skills/infra/kubernetes-specialist/references/custom-operators.md +563 -0
- package/skills/infra/kubernetes-specialist/references/gitops.md +530 -0
- package/skills/infra/kubernetes-specialist/references/helm-charts.md +912 -0
- package/skills/infra/kubernetes-specialist/references/multi-cluster.md +507 -0
- package/skills/infra/kubernetes-specialist/references/networking.md +447 -0
- package/skills/infra/kubernetes-specialist/references/service-mesh.md +459 -0
- package/skills/infra/kubernetes-specialist/references/storage.md +535 -0
- package/skills/infra/kubernetes-specialist/references/troubleshooting.md +414 -0
- package/skills/infra/kubernetes-specialist/references/workloads.md +377 -0
- package/skills/infra/mcp-developer/SKILL.md +143 -0
- package/skills/infra/mcp-developer/references/protocol.md +244 -0
- package/skills/infra/mcp-developer/references/python-sdk.md +367 -0
- package/skills/infra/mcp-developer/references/resources.md +554 -0
- package/skills/infra/mcp-developer/references/tools.md +480 -0
- package/skills/infra/mcp-developer/references/typescript-sdk.md +350 -0
- package/skills/infra/monitoring-expert/SKILL.md +176 -0
- package/skills/infra/monitoring-expert/references/alerting-rules.md +141 -0
- package/skills/infra/monitoring-expert/references/application-profiling.md +331 -0
- package/skills/infra/monitoring-expert/references/capacity-planning.md +344 -0
- package/skills/infra/monitoring-expert/references/dashboards.md +126 -0
- package/skills/infra/monitoring-expert/references/opentelemetry.md +123 -0
- package/skills/infra/monitoring-expert/references/performance-testing.md +269 -0
- package/skills/infra/monitoring-expert/references/prometheus-metrics.md +136 -0
- package/skills/infra/monitoring-expert/references/structured-logging.md +142 -0
- package/skills/infra/sre-engineer/SKILL.md +181 -0
- package/skills/infra/sre-engineer/references/automation-toil.md +492 -0
- package/skills/infra/sre-engineer/references/error-budget-policy.md +334 -0
- package/skills/infra/sre-engineer/references/incident-chaos.md +576 -0
- package/skills/infra/sre-engineer/references/monitoring-alerting.md +424 -0
- package/skills/infra/sre-engineer/references/slo-sli-management.md +238 -0
- package/skills/infra/terraform-engineer/SKILL.md +143 -0
- package/skills/infra/terraform-engineer/references/best-practices.md +583 -0
- package/skills/infra/terraform-engineer/references/module-patterns.md +297 -0
- package/skills/infra/terraform-engineer/references/providers.md +452 -0
- package/skills/infra/terraform-engineer/references/state-management.md +371 -0
- package/skills/infra/terraform-engineer/references/testing.md +486 -0
- package/skills/infra/websocket-engineer/SKILL.md +168 -0
- package/skills/infra/websocket-engineer/references/alternatives.md +391 -0
- package/skills/infra/websocket-engineer/references/patterns.md +400 -0
- package/skills/infra/websocket-engineer/references/protocol.md +195 -0
- package/skills/infra/websocket-engineer/references/scaling.md +333 -0
- package/skills/infra/websocket-engineer/references/security.md +474 -0
- package/skills/java/java-architect/SKILL.md +132 -0
- package/skills/java/java-architect/references/jpa-optimization.md +393 -0
- package/skills/java/java-architect/references/reactive-webflux.md +356 -0
- package/skills/java/java-architect/references/spring-boot-setup.md +269 -0
- package/skills/java/java-architect/references/spring-security.md +445 -0
- package/skills/java/java-architect/references/testing-patterns.md +500 -0
- package/skills/java/kotlin-specialist/SKILL.md +147 -0
- package/skills/java/kotlin-specialist/references/android-compose.md +419 -0
- package/skills/java/kotlin-specialist/references/coroutines-flow.md +276 -0
- package/skills/java/kotlin-specialist/references/dsl-idioms.md +421 -0
- package/skills/java/kotlin-specialist/references/ktor-server.md +426 -0
- package/skills/java/kotlin-specialist/references/multiplatform-kmp.md +380 -0
- package/skills/java/spring-boot-engineer/SKILL.md +195 -0
- package/skills/java/spring-boot-engineer/references/cloud.md +498 -0
- package/skills/java/spring-boot-engineer/references/data.md +381 -0
- package/skills/java/spring-boot-engineer/references/security.md +459 -0
- package/skills/java/spring-boot-engineer/references/testing.md +545 -0
- package/skills/java/spring-boot-engineer/references/web.md +295 -0
- package/skills/javascript/javascript-pro/SKILL.md +132 -0
- package/skills/javascript/javascript-pro/references/async-patterns.md +334 -0
- package/skills/javascript/javascript-pro/references/browser-apis.md +398 -0
- package/skills/javascript/javascript-pro/references/modern-syntax.md +272 -0
- package/skills/javascript/javascript-pro/references/modules.md +357 -0
- package/skills/javascript/javascript-pro/references/node-essentials.md +471 -0
- package/skills/javascript/nestjs-expert/SKILL.md +206 -0
- package/skills/javascript/nestjs-expert/references/authentication.md +166 -0
- package/skills/javascript/nestjs-expert/references/controllers-routing.md +111 -0
- package/skills/javascript/nestjs-expert/references/dtos-validation.md +153 -0
- package/skills/javascript/nestjs-expert/references/migration-from-express.md +1237 -0
- package/skills/javascript/nestjs-expert/references/services-di.md +140 -0
- package/skills/javascript/nestjs-expert/references/testing-patterns.md +186 -0
- package/skills/javascript/typescript-pro/SKILL.md +145 -0
- package/skills/javascript/typescript-pro/references/advanced-types.md +259 -0
- package/skills/javascript/typescript-pro/references/configuration.md +445 -0
- package/skills/javascript/typescript-pro/references/patterns.md +484 -0
- package/skills/javascript/typescript-pro/references/type-guards.md +352 -0
- package/skills/javascript/typescript-pro/references/utility-types.md +329 -0
- package/skills/php/laravel-specialist/SKILL.md +262 -0
- package/skills/php/laravel-specialist/references/eloquent.md +351 -0
- package/skills/php/laravel-specialist/references/livewire.md +512 -0
- package/skills/php/laravel-specialist/references/queues.md +423 -0
- package/skills/php/laravel-specialist/references/routing.md +362 -0
- package/skills/php/laravel-specialist/references/testing.md +522 -0
- package/skills/php/php-pro/SKILL.md +206 -0
- package/skills/php/php-pro/references/async-patterns.md +412 -0
- package/skills/php/php-pro/references/laravel-patterns.md +377 -0
- package/skills/php/php-pro/references/modern-php-features.md +323 -0
- package/skills/php/php-pro/references/symfony-patterns.md +466 -0
- package/skills/php/php-pro/references/testing-quality.md +466 -0
- package/skills/python/django-expert/SKILL.md +162 -0
- package/skills/python/django-expert/references/authentication.md +145 -0
- package/skills/python/django-expert/references/drf-serializers.md +148 -0
- package/skills/python/django-expert/references/models-orm.md +151 -0
- package/skills/python/django-expert/references/testing-django.md +204 -0
- package/skills/python/django-expert/references/viewsets-views.md +153 -0
- package/skills/python/fastapi-expert/SKILL.md +185 -0
- package/skills/python/fastapi-expert/references/async-sqlalchemy.md +146 -0
- package/skills/python/fastapi-expert/references/authentication.md +159 -0
- package/skills/python/fastapi-expert/references/endpoints-routing.md +142 -0
- package/skills/python/fastapi-expert/references/migration-from-django.md +997 -0
- package/skills/python/fastapi-expert/references/pydantic-v2.md +135 -0
- package/skills/python/fastapi-expert/references/testing-async.md +159 -0
- package/skills/python/pandas-pro/SKILL.md +178 -0
- package/skills/python/pandas-pro/references/aggregation-groupby.md +545 -0
- package/skills/python/pandas-pro/references/data-cleaning.md +500 -0
- package/skills/python/pandas-pro/references/dataframe-operations.md +420 -0
- package/skills/python/pandas-pro/references/merging-joining.md +596 -0
- package/skills/python/pandas-pro/references/performance-optimization.md +597 -0
- package/skills/python/python-pro/SKILL.md +177 -0
- package/skills/python/python-pro/references/async-patterns.md +356 -0
- package/skills/python/python-pro/references/packaging.md +460 -0
- package/skills/python/python-pro/references/standard-library.md +378 -0
- package/skills/python/python-pro/references/testing.md +404 -0
- package/skills/python/python-pro/references/type-system.md +290 -0
- package/skills/quality/chaos-engineer/SKILL.md +182 -0
- package/skills/quality/chaos-engineer/references/chaos-tools.md +511 -0
- package/skills/quality/chaos-engineer/references/experiment-design.md +229 -0
- package/skills/quality/chaos-engineer/references/game-days.md +434 -0
- package/skills/quality/chaos-engineer/references/infrastructure-chaos.md +348 -0
- package/skills/quality/chaos-engineer/references/kubernetes-chaos.md +432 -0
- package/skills/quality/code-reviewer/SKILL.md +119 -0
- package/skills/quality/code-reviewer/references/common-issues.md +142 -0
- package/skills/quality/code-reviewer/references/feedback-examples.md +144 -0
- package/skills/quality/code-reviewer/references/receiving-feedback.md +238 -0
- package/skills/quality/code-reviewer/references/report-template.md +109 -0
- package/skills/quality/code-reviewer/references/review-checklist.md +88 -0
- package/skills/quality/code-reviewer/references/spec-compliance-review.md +258 -0
- package/skills/quality/playwright-expert/SKILL.md +169 -0
- package/skills/quality/playwright-expert/references/api-mocking.md +140 -0
- package/skills/quality/playwright-expert/references/configuration.md +155 -0
- package/skills/quality/playwright-expert/references/debugging-flaky.md +150 -0
- package/skills/quality/playwright-expert/references/page-object-model.md +152 -0
- package/skills/quality/playwright-expert/references/selectors-locators.md +119 -0
- package/skills/quality/secure-code-guardian/SKILL.md +191 -0
- package/skills/quality/secure-code-guardian/references/authentication.md +136 -0
- package/skills/quality/secure-code-guardian/references/input-validation.md +146 -0
- package/skills/quality/secure-code-guardian/references/owasp-prevention.md +135 -0
- package/skills/quality/secure-code-guardian/references/security-headers.md +133 -0
- package/skills/quality/secure-code-guardian/references/xss-csrf.md +157 -0
- package/skills/quality/security-reviewer/SKILL.md +103 -0
- package/skills/quality/security-reviewer/references/infrastructure-security.md +268 -0
- package/skills/quality/security-reviewer/references/penetration-testing.md +268 -0
- package/skills/quality/security-reviewer/references/report-template.md +170 -0
- package/skills/quality/security-reviewer/references/sast-tools.md +117 -0
- package/skills/quality/security-reviewer/references/secret-scanning.md +125 -0
- package/skills/quality/security-reviewer/references/vulnerability-patterns.md +152 -0
- package/skills/quality/tdd-guide/assets/sample_coverage_report.lcov +0 -0
- package/skills/quality/test-master/SKILL.md +94 -0
- package/skills/quality/test-master/references/automation-frameworks.md +294 -0
- package/skills/quality/test-master/references/e2e-testing.md +128 -0
- package/skills/quality/test-master/references/integration-testing.md +120 -0
- package/skills/quality/test-master/references/performance-testing.md +118 -0
- package/skills/quality/test-master/references/qa-methodology.md +247 -0
- package/skills/quality/test-master/references/security-testing.md +127 -0
- package/skills/quality/test-master/references/tdd-iron-laws.md +174 -0
- package/skills/quality/test-master/references/test-reports.md +104 -0
- package/skills/quality/test-master/references/testing-anti-patterns.md +231 -0
- package/skills/quality/test-master/references/unit-testing.md +113 -0
- package/skills/ruby/rails-expert/SKILL.md +154 -0
- package/skills/ruby/rails-expert/references/active-record.md +244 -0
- package/skills/ruby/rails-expert/references/api-development.md +401 -0
- package/skills/ruby/rails-expert/references/background-jobs.md +272 -0
- package/skills/ruby/rails-expert/references/hotwire-turbo.md +228 -0
- package/skills/ruby/rails-expert/references/rspec-testing.md +367 -0
- package/skills/swift/swift-expert/SKILL.md +163 -0
- package/skills/swift/swift-expert/references/async-concurrency.md +360 -0
- package/skills/swift/swift-expert/references/memory-performance.md +377 -0
- package/skills/swift/swift-expert/references/protocol-oriented.md +354 -0
- package/skills/swift/swift-expert/references/swiftui-patterns.md +291 -0
- package/skills/swift/swift-expert/references/testing-patterns.md +399 -0
- package/skills/workflow/brainstorming/SKILL.md +164 -0
- package/skills/workflow/brainstorming/scripts/helper.js +88 -0
- package/skills/workflow/brainstorming/scripts/start-server.sh +148 -0
- package/skills/workflow/brainstorming/scripts/stop-server.sh +56 -0
- package/skills/workflow/brainstorming/spec-document-reviewer-prompt.md +49 -0
- package/skills/workflow/brainstorming/visual-companion.md +287 -0
- package/skills/workflow/documentation/SKILL.md +45 -0
- package/skills/workflow/entropy-management/SKILL.md +115 -0
- package/skills/workflow/executing-plans/SKILL.md +70 -0
- package/skills/workflow/finishing-a-development-branch/SKILL.md +200 -0
- package/skills/workflow/receiving-code-review/SKILL.md +213 -0
- package/skills/workflow/requesting-code-review/SKILL.md +105 -0
- package/skills/workflow/requesting-code-review/code-reviewer.md +146 -0
- package/skills/workflow/requirement-engineering/SKILL.md +111 -0
- package/skills/workflow/systematic-debugging/CREATION-LOG.md +119 -0
- package/skills/workflow/systematic-debugging/SKILL.md +296 -0
- package/skills/workflow/systematic-debugging/condition-based-waiting-example.ts +158 -0
- package/skills/workflow/systematic-debugging/condition-based-waiting.md +115 -0
- package/skills/workflow/systematic-debugging/defense-in-depth.md +122 -0
- package/skills/workflow/systematic-debugging/find-polluter.sh +63 -0
- package/skills/workflow/systematic-debugging/root-cause-tracing.md +169 -0
- package/skills/workflow/systematic-debugging/test-academic.md +14 -0
- package/skills/workflow/systematic-debugging/test-pressure-1.md +58 -0
- package/skills/workflow/systematic-debugging/test-pressure-2.md +68 -0
- package/skills/workflow/systematic-debugging/test-pressure-3.md +69 -0
- package/skills/workflow/using-git-worktrees/SKILL.md +218 -0
- package/skills/workflow/verification-before-completion/SKILL.md +139 -0
- package/skills/workflow/writing-plans/SKILL.md +151 -0
- package/skills/workflow/writing-plans/plan-document-reviewer-prompt.md +49 -0
- package/skills/workflow/writing-skills/SKILL.md +655 -0
- package/skills/workflow/writing-skills/anthropic-best-practices.md +1150 -0
- package/skills/workflow/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
- package/skills/workflow/writing-skills/graphviz-conventions.dot +0 -0
- package/skills/workflow/writing-skills/persuasion-principles.md +187 -0
- package/skills/workflow/writing-skills/render-graphs.js +168 -0
- package/skills/workflow/writing-skills/testing-skills-with-subagents.md +384 -0
- package/skills/angular-architect/SKILL.md +0 -152
- package/skills/angular-architect/references/components.md +0 -297
- package/skills/angular-architect/references/ngrx.md +0 -401
- package/skills/angular-architect/references/routing.md +0 -361
- package/skills/angular-architect/references/rxjs.md +0 -319
- package/skills/angular-architect/references/testing.md +0 -405
- package/skills/api-designer/SKILL.md +0 -217
- package/skills/api-designer/references/error-handling.md +0 -541
- package/skills/api-designer/references/openapi.md +0 -824
- package/skills/api-designer/references/pagination.md +0 -494
- package/skills/api-designer/references/rest-patterns.md +0 -335
- package/skills/api-designer/references/versioning.md +0 -391
- package/skills/architecture-designer/SKILL.md +0 -117
- package/skills/architecture-designer/references/adr-template.md +0 -116
- package/skills/architecture-designer/references/architecture-patterns.md +0 -111
- package/skills/architecture-designer/references/database-selection.md +0 -102
- package/skills/architecture-designer/references/nfr-checklist.md +0 -112
- package/skills/architecture-designer/references/system-design.md +0 -100
- package/skills/brainstorming/SKILL.md +0 -164
- package/skills/brainstorming/scripts/helper.js +0 -88
- package/skills/brainstorming/scripts/start-server.sh +0 -148
- package/skills/brainstorming/scripts/stop-server.sh +0 -56
- package/skills/brainstorming/spec-document-reviewer-prompt.md +0 -49
- package/skills/brainstorming/visual-companion.md +0 -287
- package/skills/chaos-engineer/SKILL.md +0 -182
- package/skills/chaos-engineer/references/chaos-tools.md +0 -511
- package/skills/chaos-engineer/references/experiment-design.md +0 -229
- package/skills/chaos-engineer/references/game-days.md +0 -434
- package/skills/chaos-engineer/references/infrastructure-chaos.md +0 -348
- package/skills/chaos-engineer/references/kubernetes-chaos.md +0 -432
- package/skills/cli-developer/SKILL.md +0 -113
- package/skills/cli-developer/references/design-patterns.md +0 -221
- package/skills/cli-developer/references/go-cli.md +0 -540
- package/skills/cli-developer/references/node-cli.md +0 -383
- package/skills/cli-developer/references/python-cli.md +0 -422
- package/skills/cli-developer/references/ux-patterns.md +0 -448
- package/skills/cloud-architect/SKILL.md +0 -216
- package/skills/cloud-architect/references/aws.md +0 -394
- package/skills/cloud-architect/references/azure.md +0 -562
- package/skills/cloud-architect/references/cost.md +0 -582
- package/skills/cloud-architect/references/gcp.md +0 -633
- package/skills/cloud-architect/references/multi-cloud.md +0 -483
- package/skills/code-documenter/SKILL.md +0 -147
- package/skills/code-documenter/references/api-docs-fastapi-django.md +0 -166
- package/skills/code-documenter/references/api-docs-nestjs-express.md +0 -220
- package/skills/code-documenter/references/coverage-reports.md +0 -125
- package/skills/code-documenter/references/documentation-systems.md +0 -333
- package/skills/code-documenter/references/interactive-api-docs.md +0 -531
- package/skills/code-documenter/references/python-docstrings.md +0 -121
- package/skills/code-documenter/references/typescript-jsdoc.md +0 -145
- package/skills/code-documenter/references/user-guides-tutorials.md +0 -530
- package/skills/code-reviewer/SKILL.md +0 -119
- package/skills/code-reviewer/references/common-issues.md +0 -142
- package/skills/code-reviewer/references/feedback-examples.md +0 -144
- package/skills/code-reviewer/references/receiving-feedback.md +0 -238
- package/skills/code-reviewer/references/report-template.md +0 -109
- package/skills/code-reviewer/references/review-checklist.md +0 -88
- package/skills/code-reviewer/references/spec-compliance-review.md +0 -258
- package/skills/cpp-pro/SKILL.md +0 -115
- package/skills/cpp-pro/references/build-tooling.md +0 -440
- package/skills/cpp-pro/references/concurrency.md +0 -437
- package/skills/cpp-pro/references/memory-performance.md +0 -397
- package/skills/cpp-pro/references/modern-cpp.md +0 -304
- package/skills/cpp-pro/references/templates.md +0 -357
- package/skills/csharp-developer/SKILL.md +0 -125
- package/skills/csharp-developer/references/aspnet-core.md +0 -394
- package/skills/csharp-developer/references/blazor.md +0 -553
- package/skills/csharp-developer/references/entity-framework.md +0 -409
- package/skills/csharp-developer/references/modern-csharp.md +0 -248
- package/skills/csharp-developer/references/performance.md +0 -498
- package/skills/database-optimizer/SKILL.md +0 -147
- package/skills/database-optimizer/references/index-strategies.md +0 -331
- package/skills/database-optimizer/references/monitoring-analysis.md +0 -501
- package/skills/database-optimizer/references/mysql-tuning.md +0 -452
- package/skills/database-optimizer/references/postgresql-tuning.md +0 -413
- package/skills/database-optimizer/references/query-optimization.md +0 -251
- package/skills/debugging-wizard/SKILL.md +0 -105
- package/skills/debugging-wizard/references/common-patterns.md +0 -132
- package/skills/debugging-wizard/references/debugging-tools.md +0 -140
- package/skills/debugging-wizard/references/quick-fixes.md +0 -177
- package/skills/debugging-wizard/references/strategies.md +0 -142
- package/skills/debugging-wizard/references/systematic-debugging.md +0 -367
- package/skills/devops-engineer/SKILL.md +0 -144
- package/skills/devops-engineer/references/deployment-strategies.md +0 -241
- package/skills/devops-engineer/references/docker-patterns.md +0 -113
- package/skills/devops-engineer/references/github-actions.md +0 -139
- package/skills/devops-engineer/references/incident-response.md +0 -331
- package/skills/devops-engineer/references/kubernetes.md +0 -154
- package/skills/devops-engineer/references/platform-engineering.md +0 -417
- package/skills/devops-engineer/references/release-automation.md +0 -527
- package/skills/devops-engineer/references/terraform-iac.md +0 -141
- package/skills/django-expert/SKILL.md +0 -162
- package/skills/django-expert/references/authentication.md +0 -145
- package/skills/django-expert/references/drf-serializers.md +0 -148
- package/skills/django-expert/references/models-orm.md +0 -151
- package/skills/django-expert/references/testing-django.md +0 -204
- package/skills/django-expert/references/viewsets-views.md +0 -153
- package/skills/documentation/SKILL.md +0 -45
- package/skills/dotnet-core-expert/SKILL.md +0 -138
- package/skills/dotnet-core-expert/references/authentication.md +0 -546
- package/skills/dotnet-core-expert/references/clean-architecture.md +0 -455
- package/skills/dotnet-core-expert/references/cloud-native.md +0 -548
- package/skills/dotnet-core-expert/references/entity-framework.md +0 -440
- package/skills/dotnet-core-expert/references/minimal-apis.md +0 -319
- package/skills/entropy-management/SKILL.md +0 -115
- package/skills/executing-plans/SKILL.md +0 -70
- package/skills/fastapi-expert/SKILL.md +0 -185
- package/skills/fastapi-expert/references/async-sqlalchemy.md +0 -146
- package/skills/fastapi-expert/references/authentication.md +0 -159
- package/skills/fastapi-expert/references/endpoints-routing.md +0 -142
- package/skills/fastapi-expert/references/migration-from-django.md +0 -997
- package/skills/fastapi-expert/references/pydantic-v2.md +0 -135
- package/skills/fastapi-expert/references/testing-async.md +0 -159
- package/skills/feature-forge/SKILL.md +0 -98
- package/skills/feature-forge/references/acceptance-criteria.md +0 -104
- package/skills/feature-forge/references/ears-syntax.md +0 -99
- package/skills/feature-forge/references/interview-questions.md +0 -150
- package/skills/feature-forge/references/pre-discovery-subagents.md +0 -54
- package/skills/feature-forge/references/specification-template.md +0 -103
- package/skills/fine-tuning-expert/SKILL.md +0 -162
- package/skills/fine-tuning-expert/references/dataset-preparation.md +0 -540
- package/skills/fine-tuning-expert/references/deployment-optimization.md +0 -673
- package/skills/fine-tuning-expert/references/evaluation-metrics.md +0 -597
- package/skills/fine-tuning-expert/references/hyperparameter-tuning.md +0 -565
- package/skills/fine-tuning-expert/references/lora-peft.md +0 -347
- package/skills/finishing-a-development-branch/SKILL.md +0 -200
- package/skills/flutter-expert/SKILL.md +0 -138
- package/skills/flutter-expert/references/bloc-state.md +0 -259
- package/skills/flutter-expert/references/gorouter-navigation.md +0 -119
- package/skills/flutter-expert/references/performance.md +0 -99
- package/skills/flutter-expert/references/project-structure.md +0 -118
- package/skills/flutter-expert/references/riverpod-state.md +0 -130
- package/skills/flutter-expert/references/widget-patterns.md +0 -123
- package/skills/fullstack-guardian/SKILL.md +0 -105
- package/skills/fullstack-guardian/references/api-design-standards.md +0 -307
- package/skills/fullstack-guardian/references/architecture-decisions.md +0 -350
- package/skills/fullstack-guardian/references/backend-patterns.md +0 -237
- package/skills/fullstack-guardian/references/common-patterns.md +0 -134
- package/skills/fullstack-guardian/references/deliverables-checklist.md +0 -354
- package/skills/fullstack-guardian/references/design-template.md +0 -91
- package/skills/fullstack-guardian/references/error-handling.md +0 -135
- package/skills/fullstack-guardian/references/frontend-patterns.md +0 -340
- package/skills/fullstack-guardian/references/integration-patterns.md +0 -333
- package/skills/fullstack-guardian/references/security-checklist.md +0 -106
- package/skills/golang-pro/SKILL.md +0 -122
- package/skills/golang-pro/references/concurrency.md +0 -329
- package/skills/golang-pro/references/generics.md +0 -442
- package/skills/golang-pro/references/interfaces.md +0 -432
- package/skills/golang-pro/references/project-structure.md +0 -477
- package/skills/golang-pro/references/testing.md +0 -451
- package/skills/graphql-architect/SKILL.md +0 -146
- package/skills/graphql-architect/references/federation.md +0 -418
- package/skills/graphql-architect/references/migration-from-rest.md +0 -1141
- package/skills/graphql-architect/references/resolvers.md +0 -425
- package/skills/graphql-architect/references/schema-design.md +0 -393
- package/skills/graphql-architect/references/security.md +0 -569
- package/skills/graphql-architect/references/subscriptions.md +0 -510
- package/skills/java-architect/SKILL.md +0 -132
- package/skills/java-architect/references/jpa-optimization.md +0 -393
- package/skills/java-architect/references/reactive-webflux.md +0 -356
- package/skills/java-architect/references/spring-boot-setup.md +0 -269
- package/skills/java-architect/references/spring-security.md +0 -445
- package/skills/java-architect/references/testing-patterns.md +0 -500
- package/skills/javascript-pro/SKILL.md +0 -132
- package/skills/javascript-pro/references/async-patterns.md +0 -334
- package/skills/javascript-pro/references/browser-apis.md +0 -398
- package/skills/javascript-pro/references/modern-syntax.md +0 -272
- package/skills/javascript-pro/references/modules.md +0 -357
- package/skills/javascript-pro/references/node-essentials.md +0 -471
- package/skills/kotlin-specialist/SKILL.md +0 -147
- package/skills/kotlin-specialist/references/android-compose.md +0 -419
- package/skills/kotlin-specialist/references/coroutines-flow.md +0 -276
- package/skills/kotlin-specialist/references/dsl-idioms.md +0 -421
- package/skills/kotlin-specialist/references/ktor-server.md +0 -426
- package/skills/kotlin-specialist/references/multiplatform-kmp.md +0 -380
- package/skills/kubernetes-specialist/SKILL.md +0 -241
- package/skills/kubernetes-specialist/references/configuration.md +0 -452
- package/skills/kubernetes-specialist/references/cost-optimization.md +0 -458
- package/skills/kubernetes-specialist/references/custom-operators.md +0 -563
- package/skills/kubernetes-specialist/references/gitops.md +0 -530
- package/skills/kubernetes-specialist/references/helm-charts.md +0 -912
- package/skills/kubernetes-specialist/references/multi-cluster.md +0 -507
- package/skills/kubernetes-specialist/references/networking.md +0 -447
- package/skills/kubernetes-specialist/references/service-mesh.md +0 -459
- package/skills/kubernetes-specialist/references/storage.md +0 -535
- package/skills/kubernetes-specialist/references/troubleshooting.md +0 -414
- package/skills/kubernetes-specialist/references/workloads.md +0 -377
- package/skills/laravel-specialist/SKILL.md +0 -262
- package/skills/laravel-specialist/references/eloquent.md +0 -351
- package/skills/laravel-specialist/references/livewire.md +0 -512
- package/skills/laravel-specialist/references/queues.md +0 -423
- package/skills/laravel-specialist/references/routing.md +0 -362
- package/skills/laravel-specialist/references/testing.md +0 -522
- package/skills/legacy-modernizer/SKILL.md +0 -137
- package/skills/legacy-modernizer/references/legacy-testing.md +0 -381
- package/skills/legacy-modernizer/references/migration-strategies.md +0 -423
- package/skills/legacy-modernizer/references/refactoring-patterns.md +0 -395
- package/skills/legacy-modernizer/references/strangler-fig-pattern.md +0 -281
- package/skills/legacy-modernizer/references/system-assessment.md +0 -487
- package/skills/mcp-developer/SKILL.md +0 -143
- package/skills/mcp-developer/references/protocol.md +0 -244
- package/skills/mcp-developer/references/python-sdk.md +0 -367
- package/skills/mcp-developer/references/resources.md +0 -554
- package/skills/mcp-developer/references/tools.md +0 -480
- package/skills/mcp-developer/references/typescript-sdk.md +0 -350
- package/skills/microservices-architect/SKILL.md +0 -164
- package/skills/microservices-architect/references/communication.md +0 -499
- package/skills/microservices-architect/references/data.md +0 -721
- package/skills/microservices-architect/references/decomposition.md +0 -344
- package/skills/microservices-architect/references/observability.md +0 -805
- package/skills/microservices-architect/references/patterns.md +0 -603
- package/skills/ml-pipeline/SKILL.md +0 -159
- package/skills/ml-pipeline/references/experiment-tracking.md +0 -833
- package/skills/ml-pipeline/references/feature-engineering.md +0 -631
- package/skills/ml-pipeline/references/model-validation.md +0 -978
- package/skills/ml-pipeline/references/pipeline-orchestration.md +0 -907
- package/skills/ml-pipeline/references/training-pipelines.md +0 -782
- package/skills/monitoring-expert/SKILL.md +0 -176
- package/skills/monitoring-expert/references/alerting-rules.md +0 -141
- package/skills/monitoring-expert/references/application-profiling.md +0 -331
- package/skills/monitoring-expert/references/capacity-planning.md +0 -344
- package/skills/monitoring-expert/references/dashboards.md +0 -126
- package/skills/monitoring-expert/references/opentelemetry.md +0 -123
- package/skills/monitoring-expert/references/performance-testing.md +0 -269
- package/skills/monitoring-expert/references/prometheus-metrics.md +0 -136
- package/skills/monitoring-expert/references/structured-logging.md +0 -142
- package/skills/nestjs-expert/SKILL.md +0 -206
- package/skills/nestjs-expert/references/authentication.md +0 -166
- package/skills/nestjs-expert/references/controllers-routing.md +0 -111
- package/skills/nestjs-expert/references/dtos-validation.md +0 -153
- package/skills/nestjs-expert/references/migration-from-express.md +0 -1237
- package/skills/nestjs-expert/references/services-di.md +0 -140
- package/skills/nestjs-expert/references/testing-patterns.md +0 -186
- package/skills/nextjs-developer/SKILL.md +0 -143
- package/skills/nextjs-developer/references/app-router.md +0 -311
- package/skills/nextjs-developer/references/data-fetching.md +0 -482
- package/skills/nextjs-developer/references/deployment.md +0 -545
- package/skills/nextjs-developer/references/server-actions.md +0 -462
- package/skills/nextjs-developer/references/server-components.md +0 -384
- package/skills/pandas-pro/SKILL.md +0 -178
- package/skills/pandas-pro/references/aggregation-groupby.md +0 -545
- package/skills/pandas-pro/references/data-cleaning.md +0 -500
- package/skills/pandas-pro/references/dataframe-operations.md +0 -420
- package/skills/pandas-pro/references/merging-joining.md +0 -596
- package/skills/pandas-pro/references/performance-optimization.md +0 -597
- package/skills/php-pro/SKILL.md +0 -206
- package/skills/php-pro/references/async-patterns.md +0 -412
- package/skills/php-pro/references/laravel-patterns.md +0 -377
- package/skills/php-pro/references/modern-php-features.md +0 -323
- package/skills/php-pro/references/symfony-patterns.md +0 -466
- package/skills/php-pro/references/testing-quality.md +0 -466
- package/skills/playwright-expert/SKILL.md +0 -169
- package/skills/playwright-expert/references/api-mocking.md +0 -140
- package/skills/playwright-expert/references/configuration.md +0 -155
- package/skills/playwright-expert/references/debugging-flaky.md +0 -150
- package/skills/playwright-expert/references/page-object-model.md +0 -152
- package/skills/playwright-expert/references/selectors-locators.md +0 -119
- package/skills/postgres-pro/SKILL.md +0 -152
- package/skills/postgres-pro/references/extensions.md +0 -404
- package/skills/postgres-pro/references/jsonb.md +0 -321
- package/skills/postgres-pro/references/maintenance.md +0 -481
- package/skills/postgres-pro/references/performance.md +0 -265
- package/skills/postgres-pro/references/replication.md +0 -446
- package/skills/python-pro/SKILL.md +0 -177
- package/skills/python-pro/references/async-patterns.md +0 -356
- package/skills/python-pro/references/packaging.md +0 -460
- package/skills/python-pro/references/standard-library.md +0 -378
- package/skills/python-pro/references/testing.md +0 -404
- package/skills/python-pro/references/type-system.md +0 -290
- package/skills/rag-architect/SKILL.md +0 -194
- package/skills/rag-architect/references/chunking-strategies.md +0 -878
- package/skills/rag-architect/references/embedding-models.md +0 -561
- package/skills/rag-architect/references/rag-evaluation.md +0 -833
- package/skills/rag-architect/references/retrieval-optimization.md +0 -795
- package/skills/rag-architect/references/vector-databases.md +0 -589
- package/skills/rails-expert/SKILL.md +0 -154
- package/skills/rails-expert/references/active-record.md +0 -244
- package/skills/rails-expert/references/api-development.md +0 -401
- package/skills/rails-expert/references/background-jobs.md +0 -272
- package/skills/rails-expert/references/hotwire-turbo.md +0 -228
- package/skills/rails-expert/references/rspec-testing.md +0 -367
- package/skills/react-expert/SKILL.md +0 -149
- package/skills/react-expert/references/hooks-patterns.md +0 -162
- package/skills/react-expert/references/migration-class-to-modern.md +0 -1119
- package/skills/react-expert/references/performance.md +0 -168
- package/skills/react-expert/references/react-19-features.md +0 -174
- package/skills/react-expert/references/server-components.md +0 -143
- package/skills/react-expert/references/state-management.md +0 -171
- package/skills/react-expert/references/testing-react.md +0 -174
- package/skills/react-native-expert/SKILL.md +0 -185
- package/skills/react-native-expert/references/expo-router.md +0 -187
- package/skills/react-native-expert/references/list-optimization.md +0 -204
- package/skills/react-native-expert/references/platform-handling.md +0 -188
- package/skills/react-native-expert/references/project-structure.md +0 -171
- package/skills/react-native-expert/references/storage-hooks.md +0 -173
- package/skills/receiving-code-review/SKILL.md +0 -213
- package/skills/requesting-code-review/SKILL.md +0 -105
- package/skills/requesting-code-review/code-reviewer.md +0 -146
- package/skills/requirement-engineering/SKILL.md +0 -111
- package/skills/rust-engineer/SKILL.md +0 -167
- package/skills/rust-engineer/references/async.md +0 -458
- package/skills/rust-engineer/references/error-handling.md +0 -334
- package/skills/rust-engineer/references/ownership.md +0 -278
- package/skills/rust-engineer/references/testing.md +0 -470
- package/skills/rust-engineer/references/traits.md +0 -413
- package/skills/secure-code-guardian/SKILL.md +0 -191
- package/skills/secure-code-guardian/references/authentication.md +0 -136
- package/skills/secure-code-guardian/references/input-validation.md +0 -146
- package/skills/secure-code-guardian/references/owasp-prevention.md +0 -135
- package/skills/secure-code-guardian/references/security-headers.md +0 -133
- package/skills/secure-code-guardian/references/xss-csrf.md +0 -157
- package/skills/security-reviewer/SKILL.md +0 -103
- package/skills/security-reviewer/references/infrastructure-security.md +0 -268
- package/skills/security-reviewer/references/penetration-testing.md +0 -268
- package/skills/security-reviewer/references/report-template.md +0 -170
- package/skills/security-reviewer/references/sast-tools.md +0 -117
- package/skills/security-reviewer/references/secret-scanning.md +0 -125
- package/skills/security-reviewer/references/vulnerability-patterns.md +0 -152
- package/skills/spark-engineer/SKILL.md +0 -148
- package/skills/spark-engineer/references/partitioning-caching.md +0 -543
- package/skills/spark-engineer/references/performance-tuning.md +0 -544
- package/skills/spark-engineer/references/rdd-operations.md +0 -599
- package/skills/spark-engineer/references/spark-sql-dataframes.md +0 -474
- package/skills/spark-engineer/references/streaming-patterns.md +0 -786
- package/skills/spring-boot-engineer/SKILL.md +0 -195
- package/skills/spring-boot-engineer/references/cloud.md +0 -498
- package/skills/spring-boot-engineer/references/data.md +0 -381
- package/skills/spring-boot-engineer/references/security.md +0 -459
- package/skills/spring-boot-engineer/references/testing.md +0 -545
- package/skills/spring-boot-engineer/references/web.md +0 -295
- package/skills/sql-pro/SKILL.md +0 -129
- package/skills/sql-pro/references/database-design.md +0 -402
- package/skills/sql-pro/references/dialect-differences.md +0 -419
- package/skills/sql-pro/references/optimization.md +0 -384
- package/skills/sql-pro/references/query-patterns.md +0 -285
- package/skills/sql-pro/references/window-functions.md +0 -328
- package/skills/sre-engineer/SKILL.md +0 -181
- package/skills/sre-engineer/references/automation-toil.md +0 -492
- package/skills/sre-engineer/references/error-budget-policy.md +0 -334
- package/skills/sre-engineer/references/incident-chaos.md +0 -576
- package/skills/sre-engineer/references/monitoring-alerting.md +0 -424
- package/skills/sre-engineer/references/slo-sli-management.md +0 -238
- package/skills/swift-expert/SKILL.md +0 -163
- package/skills/swift-expert/references/async-concurrency.md +0 -360
- package/skills/swift-expert/references/memory-performance.md +0 -377
- package/skills/swift-expert/references/protocol-oriented.md +0 -354
- package/skills/swift-expert/references/swiftui-patterns.md +0 -291
- package/skills/swift-expert/references/testing-patterns.md +0 -399
- package/skills/systematic-debugging/CREATION-LOG.md +0 -119
- package/skills/systematic-debugging/SKILL.md +0 -296
- package/skills/systematic-debugging/condition-based-waiting-example.ts +0 -158
- package/skills/systematic-debugging/condition-based-waiting.md +0 -115
- package/skills/systematic-debugging/defense-in-depth.md +0 -122
- package/skills/systematic-debugging/find-polluter.sh +0 -63
- package/skills/systematic-debugging/root-cause-tracing.md +0 -169
- package/skills/systematic-debugging/test-academic.md +0 -14
- package/skills/systematic-debugging/test-pressure-1.md +0 -58
- package/skills/systematic-debugging/test-pressure-2.md +0 -68
- package/skills/systematic-debugging/test-pressure-3.md +0 -69
- package/skills/tdd-guide/assets/sample_coverage_report.lcov +0 -56
- package/skills/terraform-engineer/SKILL.md +0 -143
- package/skills/terraform-engineer/references/best-practices.md +0 -583
- package/skills/terraform-engineer/references/module-patterns.md +0 -297
- package/skills/terraform-engineer/references/providers.md +0 -452
- package/skills/terraform-engineer/references/state-management.md +0 -371
- package/skills/terraform-engineer/references/testing.md +0 -486
- package/skills/test-master/SKILL.md +0 -94
- package/skills/test-master/references/automation-frameworks.md +0 -294
- package/skills/test-master/references/e2e-testing.md +0 -128
- package/skills/test-master/references/integration-testing.md +0 -120
- package/skills/test-master/references/performance-testing.md +0 -118
- package/skills/test-master/references/qa-methodology.md +0 -247
- package/skills/test-master/references/security-testing.md +0 -127
- package/skills/test-master/references/tdd-iron-laws.md +0 -174
- package/skills/test-master/references/test-reports.md +0 -104
- package/skills/test-master/references/testing-anti-patterns.md +0 -231
- package/skills/test-master/references/unit-testing.md +0 -113
- package/skills/typescript-pro/SKILL.md +0 -145
- package/skills/typescript-pro/references/advanced-types.md +0 -259
- package/skills/typescript-pro/references/configuration.md +0 -445
- package/skills/typescript-pro/references/patterns.md +0 -484
- package/skills/typescript-pro/references/type-guards.md +0 -352
- package/skills/typescript-pro/references/utility-types.md +0 -329
- package/skills/using-git-worktrees/SKILL.md +0 -218
- package/skills/verification-before-completion/SKILL.md +0 -139
- package/skills/vue-expert/SKILL.md +0 -98
- package/skills/vue-expert/references/build-tooling.md +0 -480
- package/skills/vue-expert/references/components.md +0 -448
- package/skills/vue-expert/references/composition-api.md +0 -299
- package/skills/vue-expert/references/mobile-hybrid.md +0 -636
- package/skills/vue-expert/references/nuxt.md +0 -669
- package/skills/vue-expert/references/state-management.md +0 -449
- package/skills/vue-expert/references/typescript.md +0 -584
- package/skills/vue-expert-js/SKILL.md +0 -167
- package/skills/vue-expert-js/references/component-architecture.md +0 -219
- package/skills/vue-expert-js/references/composables-patterns.md +0 -183
- package/skills/vue-expert-js/references/jsdoc-typing.md +0 -535
- package/skills/vue-expert-js/references/state-management.md +0 -249
- package/skills/vue-expert-js/references/testing-patterns.md +0 -237
- package/skills/websocket-engineer/SKILL.md +0 -168
- package/skills/websocket-engineer/references/alternatives.md +0 -391
- package/skills/websocket-engineer/references/patterns.md +0 -400
- package/skills/websocket-engineer/references/protocol.md +0 -195
- package/skills/websocket-engineer/references/scaling.md +0 -333
- package/skills/websocket-engineer/references/security.md +0 -474
- package/skills/writing-plans/SKILL.md +0 -151
- package/skills/writing-plans/plan-document-reviewer-prompt.md +0 -49
- package/skills/writing-skills/SKILL.md +0 -655
- package/skills/writing-skills/anthropic-best-practices.md +0 -1150
- package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +0 -189
- package/skills/writing-skills/graphviz-conventions.dot +0 -172
- package/skills/writing-skills/persuasion-principles.md +0 -187
- package/skills/writing-skills/render-graphs.js +0 -168
- package/skills/writing-skills/testing-skills-with-subagents.md +0 -384
- /package/skills/{design-commands → frontend/design-commands}/design.md +0 -0
- /package/skills/{design-commands → frontend/design-commands}/handoff.md +0 -0
- /package/skills/{design-commands → frontend/design-commands}/prototype.md +0 -0
- /package/skills/{design-commands → frontend/design-commands}/spec.md +0 -0
- /package/skills/{design-commands → frontend/design-commands}/style.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/SKILL.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/references/frontend_best_practices.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/references/nextjs_optimization_guide.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/references/react_patterns.md +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/bundle_analyzer.py +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/component_generator.py +0 -0
- /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/frontend_scaffolder.py +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/SKILL.md +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/charts.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/colors.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/icons.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/landing.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/products.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/react-performance.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/astro.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/flutter.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/html-tailwind.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/jetpack-compose.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nextjs.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxt-ui.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxtjs.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react-native.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/shadcn.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/svelte.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/swiftui.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/vue.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/styles.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/typography.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ui-reasoning.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ux-guidelines.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/web-interface.csv +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/core.py +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/design_system.py +0 -0
- /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/search.py +0 -0
- /package/skills/{competitive-analysis → product/competitive-analysis}/SKILL.md +0 -0
- /package/skills/{meeting-notes → product/meeting-notes}/SKILL.md +0 -0
- /package/skills/{prd-template → product/prd-template}/SKILL.md +0 -0
- /package/skills/{stakeholder-update → product/stakeholder-update}/SKILL.md +0 -0
- /package/skills/{user-research-synthesis → product/user-research-synthesis}/SKILL.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/README.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/SKILL.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/references/qa_best_practices.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/references/test_automation_patterns.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/references/testing_strategies.md +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/scripts/coverage_analyzer.py +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/scripts/e2e_test_scaffolder.py +0 -0
- /package/skills/{senior-qa → quality/senior-qa}/scripts/test_suite_generator.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/HOW_TO_USE.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/README.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/SKILL.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/assets/expected_output.json +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_python.json +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_typescript.json +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/references/ci-integration.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/references/framework-guide.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/references/tdd-best-practices.md +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/coverage_analyzer.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/fixture_generator.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/format_detector.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/framework_adapter.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/metrics_calculator.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/output_formatter.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/tdd_workflow.py +0 -0
- /package/skills/{tdd-guide → quality/tdd-guide}/scripts/test_generator.py +0 -0
- /package/skills/{brainstorming → workflow/brainstorming}/scripts/frame-template.html +0 -0
- /package/skills/{brainstorming → workflow/brainstorming}/scripts/server.cjs +0 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: spark-engineer
|
|
3
|
+
description: Use when writing Spark jobs, debugging performance issues, or configuring cluster settings for Apache Spark applications, distributed data processing pipelines, or big data workloads. Invoke to write DataFrame transformations, optimize Spark SQL queries, implement RDD pipelines, tune shuffle operations, configure executor memory, process .parquet files, handle data partitioning, or build structured streaming analytics.
|
|
4
|
+
license: MIT
|
|
5
|
+
metadata:
|
|
6
|
+
author: https://github.com/Jeffallan
|
|
7
|
+
version: "1.1.0"
|
|
8
|
+
domain: data-ml
|
|
9
|
+
triggers: Apache Spark, PySpark, Spark SQL, distributed computing, big data, DataFrame API, RDD, Spark Streaming, structured streaming, data partitioning, Spark performance, cluster computing, data processing pipeline
|
|
10
|
+
role: expert
|
|
11
|
+
scope: implementation
|
|
12
|
+
output-format: code
|
|
13
|
+
related-skills: python-pro, sql-pro, devops-engineer
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# Spark Engineer
|
|
17
|
+
|
|
18
|
+
Senior Apache Spark engineer specializing in high-performance distributed data processing, optimizing large-scale ETL pipelines, and building production-grade Spark applications.
|
|
19
|
+
|
|
20
|
+
## Core Workflow
|
|
21
|
+
|
|
22
|
+
1. **Analyze requirements** - Understand data volume, transformations, latency requirements, cluster resources
|
|
23
|
+
2. **Design pipeline** - Choose DataFrame vs RDD, plan partitioning strategy, identify broadcast opportunities
|
|
24
|
+
3. **Implement** - Write Spark code with optimized transformations, appropriate caching, proper error handling
|
|
25
|
+
4. **Optimize** - Analyze Spark UI, tune shuffle partitions, eliminate skew, optimize joins and aggregations
|
|
26
|
+
5. **Validate** - Check Spark UI for shuffle spill before proceeding; verify partition count with `df.rdd.getNumPartitions()`; if spill or skew detected, return to step 4; test with production-scale data, monitor resource usage, verify performance targets
|
|
27
|
+
|
|
28
|
+
## Reference Guide
|
|
29
|
+
|
|
30
|
+
Load detailed guidance based on context:
|
|
31
|
+
|
|
32
|
+
| Topic | Reference | Load When |
|
|
33
|
+
|-------|-----------|-----------|
|
|
34
|
+
| Spark SQL & DataFrames | `references/spark-sql-dataframes.md` | DataFrame API, Spark SQL, schemas, joins, aggregations |
|
|
35
|
+
| RDD Operations | `references/rdd-operations.md` | Transformations, actions, pair RDDs, custom partitioners |
|
|
36
|
+
| Partitioning & Caching | `references/partitioning-caching.md` | Data partitioning, persistence levels, broadcast variables |
|
|
37
|
+
| Performance Tuning | `references/performance-tuning.md` | Configuration, memory tuning, shuffle optimization, skew handling |
|
|
38
|
+
| Streaming Patterns | `references/streaming-patterns.md` | Structured Streaming, watermarks, stateful operations, sinks |
|
|
39
|
+
|
|
40
|
+
## Code Examples
|
|
41
|
+
|
|
42
|
+
### Quick-Start Mini-Pipeline (PySpark)
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from pyspark.sql import SparkSession
|
|
46
|
+
from pyspark.sql import functions as F
|
|
47
|
+
from pyspark.sql.types import StructType, StructField, StringType, LongType, DoubleType
|
|
48
|
+
|
|
49
|
+
spark = SparkSession.builder \
|
|
50
|
+
.appName("example-pipeline") \
|
|
51
|
+
.config("spark.sql.shuffle.partitions", "400") \
|
|
52
|
+
.config("spark.sql.adaptive.enabled", "true") \
|
|
53
|
+
.getOrCreate()
|
|
54
|
+
|
|
55
|
+
# Always define explicit schemas in production
|
|
56
|
+
schema = StructType([
|
|
57
|
+
StructField("user_id", StringType(), False),
|
|
58
|
+
StructField("event_ts", LongType(), False),
|
|
59
|
+
StructField("amount", DoubleType(), True),
|
|
60
|
+
])
|
|
61
|
+
|
|
62
|
+
df = spark.read.schema(schema).parquet("s3://bucket/events/")
|
|
63
|
+
|
|
64
|
+
result = df \
|
|
65
|
+
.filter(F.col("amount").isNotNull()) \
|
|
66
|
+
.groupBy("user_id") \
|
|
67
|
+
.agg(F.sum("amount").alias("total_amount"), F.count("*").alias("event_count"))
|
|
68
|
+
|
|
69
|
+
# Verify partition count before writing
|
|
70
|
+
print(f"Partition count: {result.rdd.getNumPartitions()}")
|
|
71
|
+
|
|
72
|
+
result.write.mode("overwrite").parquet("s3://bucket/output/")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Broadcast Join (small dimension table < 200 MB)
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from pyspark.sql.functions import broadcast
|
|
79
|
+
|
|
80
|
+
# Spark will automatically broadcast dim_table; hint makes intent explicit
|
|
81
|
+
enriched = large_fact_df.join(broadcast(dim_df), on="product_id", how="left")
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Handling Data Skew with Salting
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
import pyspark.sql.functions as F
|
|
88
|
+
|
|
89
|
+
SALT_BUCKETS = 50
|
|
90
|
+
|
|
91
|
+
# Add salt to the skewed key on both sides
|
|
92
|
+
skewed_df = skewed_df.withColumn("salt", (F.rand() * SALT_BUCKETS).cast("int")) \
|
|
93
|
+
.withColumn("salted_key", F.concat(F.col("skewed_key"), F.lit("_"), F.col("salt")))
|
|
94
|
+
|
|
95
|
+
other_df = other_df.withColumn("salt", F.explode(F.array([F.lit(i) for i in range(SALT_BUCKETS)]))) \
|
|
96
|
+
.withColumn("salted_key", F.concat(F.col("skewed_key"), F.lit("_"), F.col("salt")))
|
|
97
|
+
|
|
98
|
+
result = skewed_df.join(other_df, on="salted_key", how="inner") \
|
|
99
|
+
.drop("salt", "salted_key")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Correct Caching Pattern
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
# Cache ONLY when the DataFrame is reused multiple times
|
|
106
|
+
df_cleaned = df.filter(...).withColumn(...).cache()
|
|
107
|
+
df_cleaned.count() # Materialize immediately; check Spark UI for spill
|
|
108
|
+
|
|
109
|
+
report_a = df_cleaned.groupBy("region").agg(...)
|
|
110
|
+
report_b = df_cleaned.groupBy("product").agg(...)
|
|
111
|
+
|
|
112
|
+
df_cleaned.unpersist() # Release when done
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Constraints
|
|
116
|
+
|
|
117
|
+
### MUST DO
|
|
118
|
+
- Use DataFrame API over RDD for structured data processing
|
|
119
|
+
- Define explicit schemas for production pipelines
|
|
120
|
+
- Partition data appropriately (200-1000 partitions per executor core)
|
|
121
|
+
- Cache intermediate results only when reused multiple times
|
|
122
|
+
- Use broadcast joins for small dimension tables (<200MB)
|
|
123
|
+
- Handle data skew with salting or custom partitioning
|
|
124
|
+
- Monitor Spark UI for shuffle, spill, and GC metrics
|
|
125
|
+
- Test with production-scale data volumes
|
|
126
|
+
|
|
127
|
+
### MUST NOT DO
|
|
128
|
+
- Use collect() on large datasets (causes OOM)
|
|
129
|
+
- Skip schema definition and rely on inference in production
|
|
130
|
+
- Cache every DataFrame without measuring benefit
|
|
131
|
+
- Ignore shuffle partition tuning (default 200 often wrong)
|
|
132
|
+
- Use UDFs when built-in functions available (10-100x slower)
|
|
133
|
+
- Process small files without coalescing (small file problem)
|
|
134
|
+
- Run transformations without understanding lazy evaluation
|
|
135
|
+
- Ignore data skew warnings in Spark UI
|
|
136
|
+
|
|
137
|
+
## Output Templates
|
|
138
|
+
|
|
139
|
+
When implementing Spark solutions, provide:
|
|
140
|
+
1. Complete Spark code (PySpark or Scala) with type hints/types
|
|
141
|
+
2. Configuration recommendations (executors, memory, shuffle partitions)
|
|
142
|
+
3. Partitioning strategy explanation
|
|
143
|
+
4. Performance analysis (expected shuffle size, memory usage)
|
|
144
|
+
5. Monitoring recommendations (key Spark UI metrics to watch)
|
|
145
|
+
|
|
146
|
+
## Knowledge Reference
|
|
147
|
+
|
|
148
|
+
Spark DataFrame API, Spark SQL, RDD transformations/actions, catalyst optimizer, tungsten execution engine, partitioning strategies, broadcast variables, accumulators, structured streaming, watermarks, checkpointing, Spark UI analysis, memory management, shuffle optimization
|
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
# Partitioning and Caching
|
|
2
|
+
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
## Partitioning Fundamentals
|
|
6
|
+
|
|
7
|
+
### Why Partitioning Matters
|
|
8
|
+
|
|
9
|
+
- **Parallelism**: Each partition runs on a separate task
|
|
10
|
+
- **Data locality**: Minimize data movement across network
|
|
11
|
+
- **Memory efficiency**: Right-sized partitions prevent OOM
|
|
12
|
+
- **Join performance**: Co-partitioned data avoids shuffle
|
|
13
|
+
|
|
14
|
+
### Partition Count Guidelines
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
# Rule of thumb: 2-4 partitions per CPU core
|
|
18
|
+
# For 100 executor cores: 200-400 partitions
|
|
19
|
+
|
|
20
|
+
# Check current partitions
|
|
21
|
+
print(f"Number of partitions: {df.rdd.getNumPartitions()}")
|
|
22
|
+
|
|
23
|
+
# Recommended formula
|
|
24
|
+
total_cores = num_executors * cores_per_executor
|
|
25
|
+
recommended_partitions = total_cores * 2 to 4
|
|
26
|
+
|
|
27
|
+
# Target partition size: 128MB - 256MB per partition
|
|
28
|
+
# For 100GB data with 128MB target: ~800 partitions
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Optimal Partition Sizes
|
|
32
|
+
|
|
33
|
+
| Data Volume | Target Partition Size | Partition Count |
|
|
34
|
+
|-------------|----------------------|-----------------|
|
|
35
|
+
| < 1GB | 64MB | 8-16 |
|
|
36
|
+
| 1-10GB | 128MB | 8-80 |
|
|
37
|
+
| 10-100GB | 128-256MB | 40-800 |
|
|
38
|
+
| 100GB-1TB | 256MB | 400-4000 |
|
|
39
|
+
| > 1TB | 256MB | 4000+ |
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## DataFrame Partitioning
|
|
44
|
+
|
|
45
|
+
### Repartition (Full Shuffle)
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from pyspark.sql import functions as F
|
|
49
|
+
|
|
50
|
+
# Repartition to specific number
|
|
51
|
+
df_repart = df.repartition(200)
|
|
52
|
+
|
|
53
|
+
# Repartition by column(s) - same keys go to same partition
|
|
54
|
+
df_repart = df.repartition("user_id")
|
|
55
|
+
df_repart = df.repartition("user_id", "date")
|
|
56
|
+
|
|
57
|
+
# Repartition with count and columns
|
|
58
|
+
df_repart = df.repartition(100, "user_id")
|
|
59
|
+
|
|
60
|
+
# Range partitioning (for sorted access patterns)
|
|
61
|
+
df_range = df.repartitionByRange(100, "date")
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
```scala
|
|
65
|
+
// Scala repartition
|
|
66
|
+
val dfRepart = df.repartition(200)
|
|
67
|
+
val dfByCol = df.repartition($"user_id")
|
|
68
|
+
val dfRange = df.repartitionByRange(100, $"date")
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Coalesce (No Shuffle)
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
# Reduce partitions without shuffle - efficient!
|
|
75
|
+
# Use after filtering reduces data significantly
|
|
76
|
+
df_coalesced = df.coalesce(50)
|
|
77
|
+
|
|
78
|
+
# Common pattern: filter then coalesce
|
|
79
|
+
df_filtered = df.filter(F.col("active") == True)
|
|
80
|
+
# If filter reduced data by 80%, reduce partitions too
|
|
81
|
+
df_optimized = df_filtered.coalesce(40) # From 200 to 40
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**When to use:**
|
|
85
|
+
- `repartition(n)`: Increase partitions, need even distribution, partition by column
|
|
86
|
+
- `coalesce(n)`: Decrease partitions only (no shuffle benefit)
|
|
87
|
+
- `repartitionByRange()`: Need sorted partitions for range queries
|
|
88
|
+
|
|
89
|
+
### Checking Partition Distribution
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from pyspark.sql import functions as F
|
|
93
|
+
|
|
94
|
+
# Check partition count
|
|
95
|
+
print(f"Partitions: {df.rdd.getNumPartitions()}")
|
|
96
|
+
|
|
97
|
+
# Check partition sizes (row counts)
|
|
98
|
+
partition_counts = df.withColumn("partition_id", F.spark_partition_id()) \
|
|
99
|
+
.groupBy("partition_id") \
|
|
100
|
+
.count() \
|
|
101
|
+
.orderBy("partition_id")
|
|
102
|
+
|
|
103
|
+
partition_counts.show()
|
|
104
|
+
|
|
105
|
+
# Get partition statistics
|
|
106
|
+
stats = partition_counts.agg(
|
|
107
|
+
F.min("count").alias("min_rows"),
|
|
108
|
+
F.max("count").alias("max_rows"),
|
|
109
|
+
F.avg("count").alias("avg_rows"),
|
|
110
|
+
F.stddev("count").alias("stddev")
|
|
111
|
+
)
|
|
112
|
+
stats.show()
|
|
113
|
+
|
|
114
|
+
# Identify skew: max/avg ratio > 3 indicates skew
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Shuffle Partitions
|
|
120
|
+
|
|
121
|
+
### Configuration
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
# Default shuffle partitions (200) - often suboptimal
|
|
125
|
+
spark.conf.set("spark.sql.shuffle.partitions", 200)
|
|
126
|
+
|
|
127
|
+
# For small data (<10GB), reduce
|
|
128
|
+
spark.conf.set("spark.sql.shuffle.partitions", 50)
|
|
129
|
+
|
|
130
|
+
# For large data (>100GB), increase
|
|
131
|
+
spark.conf.set("spark.sql.shuffle.partitions", 2000)
|
|
132
|
+
|
|
133
|
+
# Adaptive Query Execution (Spark 3.0+) - dynamic partition sizing
|
|
134
|
+
spark.conf.set("spark.sql.adaptive.enabled", "true")
|
|
135
|
+
spark.conf.set("spark.sql.adaptive.coalescePartitions.enabled", "true")
|
|
136
|
+
spark.conf.set("spark.sql.adaptive.coalescePartitions.minPartitionSize", "64MB")
|
|
137
|
+
spark.conf.set("spark.sql.adaptive.advisoryPartitionSizeInBytes", "128MB")
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### AQE Automatic Optimization (Spark 3.x)
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
# Enable full AQE suite
|
|
144
|
+
spark.conf.set("spark.sql.adaptive.enabled", "true")
|
|
145
|
+
|
|
146
|
+
# Auto-coalesce shuffle partitions
|
|
147
|
+
spark.conf.set("spark.sql.adaptive.coalescePartitions.enabled", "true")
|
|
148
|
+
spark.conf.set("spark.sql.adaptive.coalescePartitions.parallelismFirst", "false")
|
|
149
|
+
|
|
150
|
+
# Handle skewed partitions automatically
|
|
151
|
+
spark.conf.set("spark.sql.adaptive.skewJoin.enabled", "true")
|
|
152
|
+
spark.conf.set("spark.sql.adaptive.skewJoin.skewedPartitionFactor", 5)
|
|
153
|
+
spark.conf.set("spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes", "256MB")
|
|
154
|
+
|
|
155
|
+
# Local shuffle reader (avoid remote reads when possible)
|
|
156
|
+
spark.conf.set("spark.sql.adaptive.localShuffleReader.enabled", "true")
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
**Spark UI Check:** With AQE, check "Adaptive" badge in SQL tab. View coalesced partition counts in stage details.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Caching and Persistence
|
|
164
|
+
|
|
165
|
+
### When to Cache
|
|
166
|
+
|
|
167
|
+
**Cache when:**
|
|
168
|
+
- DataFrame is reused multiple times in same job
|
|
169
|
+
- DataFrame is expensive to compute (complex joins/aggregations)
|
|
170
|
+
- Iterative algorithms (ML training loops)
|
|
171
|
+
- Interactive exploration in notebooks
|
|
172
|
+
|
|
173
|
+
**Do NOT cache when:**
|
|
174
|
+
- DataFrame used only once
|
|
175
|
+
- Data doesn't fit in cluster memory
|
|
176
|
+
- Source data is already fast (local SSD, columnar formats)
|
|
177
|
+
- Storage level causes excessive GC
|
|
178
|
+
|
|
179
|
+
### Persistence Levels
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from pyspark import StorageLevel
|
|
183
|
+
|
|
184
|
+
# Memory only (default for cache())
|
|
185
|
+
df.cache() # Equivalent to persist(MEMORY_AND_DISK)
|
|
186
|
+
df.persist() # Same as cache()
|
|
187
|
+
|
|
188
|
+
# Specific storage levels
|
|
189
|
+
df.persist(StorageLevel.MEMORY_ONLY) # Fast, may lose partitions
|
|
190
|
+
df.persist(StorageLevel.MEMORY_AND_DISK) # Spill to disk if needed
|
|
191
|
+
df.persist(StorageLevel.MEMORY_ONLY_SER) # Serialized, less memory, slower
|
|
192
|
+
df.persist(StorageLevel.MEMORY_AND_DISK_SER) # Serialized with disk spill
|
|
193
|
+
df.persist(StorageLevel.DISK_ONLY) # Only disk, slowest
|
|
194
|
+
df.persist(StorageLevel.OFF_HEAP) # Off-heap memory
|
|
195
|
+
|
|
196
|
+
# With replication (for fault tolerance)
|
|
197
|
+
df.persist(StorageLevel.MEMORY_AND_DISK_2) # 2x replication
|
|
198
|
+
|
|
199
|
+
# Unpersist when done
|
|
200
|
+
df.unpersist()
|
|
201
|
+
df.unpersist(blocking=True) # Wait for completion
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
```scala
|
|
205
|
+
// Scala persistence
|
|
206
|
+
import org.apache.spark.storage.StorageLevel
|
|
207
|
+
|
|
208
|
+
df.cache()
|
|
209
|
+
df.persist(StorageLevel.MEMORY_AND_DISK_SER)
|
|
210
|
+
df.unpersist()
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Storage Level Selection Guide
|
|
214
|
+
|
|
215
|
+
| Storage Level | Use When |
|
|
216
|
+
|---------------|----------|
|
|
217
|
+
| MEMORY_ONLY | Enough memory, need fastest access |
|
|
218
|
+
| MEMORY_AND_DISK | Default, safe for most cases |
|
|
219
|
+
| MEMORY_ONLY_SER | Memory constrained, CPU available |
|
|
220
|
+
| MEMORY_AND_DISK_SER | Large data, memory constrained |
|
|
221
|
+
| DISK_ONLY | Very large data, memory scarce |
|
|
222
|
+
| OFF_HEAP | Using Tungsten off-heap memory |
|
|
223
|
+
|
|
224
|
+
### Caching Best Practices
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
# Pattern 1: Cache after expensive transformation
|
|
228
|
+
expensive_df = source_df \
|
|
229
|
+
.join(lookup_df, "key") \
|
|
230
|
+
.groupBy("category").agg(F.sum("amount"))
|
|
231
|
+
|
|
232
|
+
expensive_df.cache()
|
|
233
|
+
|
|
234
|
+
# Trigger caching with action
|
|
235
|
+
expensive_df.count()
|
|
236
|
+
|
|
237
|
+
# Reuse cached data
|
|
238
|
+
result1 = expensive_df.filter(F.col("category") == "A")
|
|
239
|
+
result2 = expensive_df.filter(F.col("category") == "B")
|
|
240
|
+
|
|
241
|
+
# Clean up
|
|
242
|
+
expensive_df.unpersist()
|
|
243
|
+
|
|
244
|
+
# Pattern 2: Cache at checkpoint in iterative algorithm
|
|
245
|
+
for iteration in range(100):
|
|
246
|
+
df = df.transform(update_function)
|
|
247
|
+
if iteration % 10 == 0:
|
|
248
|
+
df.cache()
|
|
249
|
+
df.count() # Materialize
|
|
250
|
+
df.unpersist() # Clean previous
|
|
251
|
+
|
|
252
|
+
# Pattern 3: Checkpoint to break lineage (long pipelines)
|
|
253
|
+
spark.sparkContext.setCheckpointDir("hdfs://path/checkpoints/")
|
|
254
|
+
df.checkpoint() # Truncates lineage, saves to reliable storage
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Monitoring Cache Usage
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
# Check if DataFrame is cached
|
|
261
|
+
print(df.storageLevel) # StorageLevel(False, False, False, False, 1) = not cached
|
|
262
|
+
|
|
263
|
+
# Check storage tab in Spark UI for:
|
|
264
|
+
# - Size in Memory
|
|
265
|
+
# - Size on Disk
|
|
266
|
+
# - Fraction Cached (should be 100%)
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
**Spark UI Check:** Storage tab shows cached RDDs/DataFrames. Monitor "Fraction Cached" - if < 100%, memory is insufficient.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Broadcast Variables
|
|
274
|
+
|
|
275
|
+
### When to Use Broadcast
|
|
276
|
+
|
|
277
|
+
- Small lookup tables (< 200MB)
|
|
278
|
+
- Dimension tables joined to large fact tables
|
|
279
|
+
- Configuration data used across all tasks
|
|
280
|
+
- Avoiding shuffle in map-side joins
|
|
281
|
+
|
|
282
|
+
### DataFrame Broadcast Join
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
from pyspark.sql.functions import broadcast
|
|
286
|
+
|
|
287
|
+
# Explicit broadcast hint
|
|
288
|
+
large_df = spark.read.parquet("s3://bucket/transactions/") # 100GB
|
|
289
|
+
small_df = spark.read.parquet("s3://bucket/categories/") # 50MB
|
|
290
|
+
|
|
291
|
+
# Broadcast small table for efficient join
|
|
292
|
+
result = large_df.join(broadcast(small_df), "category_id")
|
|
293
|
+
|
|
294
|
+
# Auto-broadcast threshold configuration
|
|
295
|
+
spark.conf.set("spark.sql.autoBroadcastJoinThreshold", 100 * 1024 * 1024) # 100MB
|
|
296
|
+
|
|
297
|
+
# Disable auto-broadcast (force sort-merge join)
|
|
298
|
+
spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### RDD Broadcast Variables
|
|
302
|
+
|
|
303
|
+
```python
|
|
304
|
+
# Create broadcast variable
|
|
305
|
+
lookup_dict = {"A": 1, "B": 2, "C": 3}
|
|
306
|
+
broadcast_lookup = spark.sparkContext.broadcast(lookup_dict)
|
|
307
|
+
|
|
308
|
+
# Use in transformation
|
|
309
|
+
def enrich_with_lookup(row):
|
|
310
|
+
lookup = broadcast_lookup.value
|
|
311
|
+
return Row(
|
|
312
|
+
id=row.id,
|
|
313
|
+
code=row.code,
|
|
314
|
+
value=lookup.get(row.code, 0)
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
enriched_rdd = df.rdd.map(enrich_with_lookup)
|
|
318
|
+
|
|
319
|
+
# Clean up
|
|
320
|
+
broadcast_lookup.unpersist()
|
|
321
|
+
broadcast_lookup.destroy()
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Broadcast Size Limits
|
|
325
|
+
|
|
326
|
+
```python
|
|
327
|
+
# Maximum broadcast size (default 8GB, adjustable)
|
|
328
|
+
spark.conf.set("spark.sql.autoBroadcastJoinThreshold", 200 * 1024 * 1024) # 200MB
|
|
329
|
+
|
|
330
|
+
# For larger broadcasts
|
|
331
|
+
spark.conf.set("spark.driver.maxResultSize", "4g")
|
|
332
|
+
|
|
333
|
+
# Monitor broadcast time in Spark UI
|
|
334
|
+
# Long broadcast time indicates table too large
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
**Warning:** Broadcasting tables > 200MB can cause driver OOM and slow broadcast. Use sort-merge join instead.
|
|
338
|
+
|
|
339
|
+
---
|
|
340
|
+
|
|
341
|
+
## Partitioning Strategies for Common Patterns
|
|
342
|
+
|
|
343
|
+
### Time-Series Data
|
|
344
|
+
|
|
345
|
+
```python
|
|
346
|
+
# Partition by date for time-range queries
|
|
347
|
+
df_partitioned = df.repartition("date")
|
|
348
|
+
|
|
349
|
+
# Range partition for ordered access
|
|
350
|
+
df_range = df.repartitionByRange(365, "date") # One year
|
|
351
|
+
|
|
352
|
+
# Write partitioned by date
|
|
353
|
+
df.write.partitionBy("year", "month", "day").parquet("s3://bucket/data/")
|
|
354
|
+
|
|
355
|
+
# Read with partition pruning
|
|
356
|
+
df = spark.read.parquet("s3://bucket/data/") \
|
|
357
|
+
.filter(F.col("year") == 2024) # Only reads 2024 partitions
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
### User/Entity Data
|
|
361
|
+
|
|
362
|
+
```python
|
|
363
|
+
# Partition by user_id for user-specific queries
|
|
364
|
+
df_user_partitioned = df.repartition(1000, "user_id")
|
|
365
|
+
|
|
366
|
+
# Co-partition for efficient joins
|
|
367
|
+
users_partitioned = users.repartition(1000, "user_id")
|
|
368
|
+
orders_partitioned = orders.repartition(1000, "user_id")
|
|
369
|
+
|
|
370
|
+
# Join without shuffle (if partitioners match)
|
|
371
|
+
joined = users_partitioned.join(orders_partitioned, "user_id")
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
### Skewed Data
|
|
375
|
+
|
|
376
|
+
```python
|
|
377
|
+
# Salt skewed keys
|
|
378
|
+
salt_buckets = 10
|
|
379
|
+
|
|
380
|
+
# Add salt to skewed table
|
|
381
|
+
salted_df = large_df.withColumn(
|
|
382
|
+
"salted_key",
|
|
383
|
+
F.concat(
|
|
384
|
+
F.col("join_key"),
|
|
385
|
+
F.lit("_"),
|
|
386
|
+
(F.monotonically_increasing_id() % salt_buckets).cast("string")
|
|
387
|
+
)
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Explode small table to match
|
|
391
|
+
from pyspark.sql.functions import explode, array, lit
|
|
392
|
+
|
|
393
|
+
small_exploded = small_df.withColumn(
|
|
394
|
+
"salt",
|
|
395
|
+
explode(array([lit(i) for i in range(salt_buckets)]))
|
|
396
|
+
).withColumn(
|
|
397
|
+
"salted_key",
|
|
398
|
+
F.concat(F.col("join_key"), F.lit("_"), F.col("salt").cast("string"))
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# Join on salted key
|
|
402
|
+
result = salted_df.join(small_exploded, "salted_key")
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
---
|
|
406
|
+
|
|
407
|
+
## File Partitioning (Write Optimization)
|
|
408
|
+
|
|
409
|
+
### Hive-Style Partitioning
|
|
410
|
+
|
|
411
|
+
```python
|
|
412
|
+
# Write with partitioning
|
|
413
|
+
df.write \
|
|
414
|
+
.mode("overwrite") \
|
|
415
|
+
.partitionBy("year", "month") \
|
|
416
|
+
.parquet("s3://bucket/data/")
|
|
417
|
+
|
|
418
|
+
# Result directory structure:
|
|
419
|
+
# s3://bucket/data/year=2024/month=01/part-*.parquet
|
|
420
|
+
# s3://bucket/data/year=2024/month=02/part-*.parquet
|
|
421
|
+
|
|
422
|
+
# Read with partition discovery
|
|
423
|
+
df = spark.read.parquet("s3://bucket/data/")
|
|
424
|
+
# Columns year, month automatically added from path
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
### Bucketing (Hash-Based File Partitioning)
|
|
428
|
+
|
|
429
|
+
```python
|
|
430
|
+
# Write bucketed table for optimized joins
|
|
431
|
+
df.write \
|
|
432
|
+
.mode("overwrite") \
|
|
433
|
+
.bucketBy(100, "user_id") \
|
|
434
|
+
.sortBy("timestamp") \
|
|
435
|
+
.saveAsTable("bucketed_orders")
|
|
436
|
+
|
|
437
|
+
# Read bucketed table
|
|
438
|
+
orders = spark.table("bucketed_orders")
|
|
439
|
+
users = spark.table("bucketed_users") # Same bucket count
|
|
440
|
+
|
|
441
|
+
# Bucket join - no shuffle if buckets match
|
|
442
|
+
result = orders.join(users, "user_id")
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
**Note:** Bucketing requires Hive metastore and saveAsTable. Doesn't work with direct file writes.
|
|
446
|
+
|
|
447
|
+
### Controlling Output Files
|
|
448
|
+
|
|
449
|
+
```python
|
|
450
|
+
# Control number of output files
|
|
451
|
+
# One file per partition
|
|
452
|
+
df.coalesce(1).write.parquet("s3://bucket/output/")
|
|
453
|
+
|
|
454
|
+
# Multiple files per partition (for large partitions)
|
|
455
|
+
df.repartition(100).write.parquet("s3://bucket/output/")
|
|
456
|
+
|
|
457
|
+
# Max records per file
|
|
458
|
+
df.write \
|
|
459
|
+
.option("maxRecordsPerFile", 1000000) \
|
|
460
|
+
.parquet("s3://bucket/output/")
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
---
|
|
464
|
+
|
|
465
|
+
## Spark UI Analysis for Partitioning/Caching
|
|
466
|
+
|
|
467
|
+
### Jobs Tab
|
|
468
|
+
|
|
469
|
+
- Check if cached data shows "(cached)" in DAG
|
|
470
|
+
- Look for skipped stages (using cached data)
|
|
471
|
+
|
|
472
|
+
### Stages Tab
|
|
473
|
+
|
|
474
|
+
- **Shuffle Write Size**: Large values indicate repartition opportunities
|
|
475
|
+
- **Shuffle Read Size**: Should be similar across tasks (no skew)
|
|
476
|
+
- **Task Duration Distribution**: Wide variance indicates partition imbalance
|
|
477
|
+
|
|
478
|
+
### Storage Tab
|
|
479
|
+
|
|
480
|
+
- **Size in Memory**: Actual cached size
|
|
481
|
+
- **Size on Disk**: Spilled size
|
|
482
|
+
- **Fraction Cached**: Should be 100% if memory sufficient
|
|
483
|
+
|
|
484
|
+
### SQL Tab
|
|
485
|
+
|
|
486
|
+
- Look for "BroadcastExchange" - indicates broadcast join
|
|
487
|
+
- Look for "ShuffleExchange" - indicates data movement
|
|
488
|
+
- Check "Rows Output" at each stage for data flow
|
|
489
|
+
|
|
490
|
+
---
|
|
491
|
+
|
|
492
|
+
## Common Anti-Patterns
|
|
493
|
+
|
|
494
|
+
```python
|
|
495
|
+
# BAD: Caching without measuring benefit
|
|
496
|
+
for table in all_tables:
|
|
497
|
+
spark.read.parquet(table).cache() # Wastes memory
|
|
498
|
+
|
|
499
|
+
# GOOD: Cache only if reused
|
|
500
|
+
expensive_df.cache()
|
|
501
|
+
result1 = expensive_df.groupBy("a").count()
|
|
502
|
+
result2 = expensive_df.groupBy("b").count()
|
|
503
|
+
expensive_df.unpersist()
|
|
504
|
+
|
|
505
|
+
# BAD: Too many small partitions
|
|
506
|
+
df.repartition(10000) # Creates scheduling overhead
|
|
507
|
+
|
|
508
|
+
# GOOD: Right-size partitions (128MB-256MB each)
|
|
509
|
+
df.repartition(100)
|
|
510
|
+
|
|
511
|
+
# BAD: Too few partitions for large data
|
|
512
|
+
df.coalesce(1) # Single partition can't parallelize
|
|
513
|
+
|
|
514
|
+
# GOOD: Maintain parallelism
|
|
515
|
+
df.coalesce(max(1, target_size))
|
|
516
|
+
|
|
517
|
+
# BAD: Repartition before filter
|
|
518
|
+
df.repartition(1000).filter(F.col("active") == True) # Shuffles then filters
|
|
519
|
+
|
|
520
|
+
# GOOD: Filter then coalesce
|
|
521
|
+
df.filter(F.col("active") == True).coalesce(100) # Filter first, then resize
|
|
522
|
+
|
|
523
|
+
# BAD: Broadcasting large table
|
|
524
|
+
result = large.join(broadcast(also_large), "key") # OOM risk
|
|
525
|
+
|
|
526
|
+
# GOOD: Let Spark decide or use sort-merge
|
|
527
|
+
result = large.join(also_large, "key") # Sort-merge join
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
---
|
|
531
|
+
|
|
532
|
+
## Best Practices Summary
|
|
533
|
+
|
|
534
|
+
1. **Target 128-256MB partitions** - Not too small (overhead) or large (OOM)
|
|
535
|
+
2. **Use 2-4 partitions per core** - Maximize parallelism
|
|
536
|
+
3. **Enable AQE in Spark 3.x** - Automatic partition optimization
|
|
537
|
+
4. **Cache only reused DataFrames** - Measure before caching everything
|
|
538
|
+
5. **Use MEMORY_AND_DISK** - Safe default storage level
|
|
539
|
+
6. **Broadcast tables < 200MB** - Avoid shuffle for small dimension tables
|
|
540
|
+
7. **Coalesce after filters** - Reduce partitions when data shrinks
|
|
541
|
+
8. **Repartition for joins** - Co-partition related tables
|
|
542
|
+
9. **Partition writes by filter columns** - Enable partition pruning
|
|
543
|
+
10. **Monitor Storage tab** - Ensure cache fits in memory
|