javi-forge 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ci-local/ci-local.sh +38 -10
- package/ci-local/hooks/pre-commit +10 -155
- package/ci-local/hooks/pre-push +12 -29
- package/dist/commands/ci.d.ts +33 -0
- package/dist/commands/ci.js +341 -0
- package/dist/commands/init.js +5 -0
- package/dist/index.js +39 -5
- package/dist/lib/docker.d.ts +43 -0
- package/dist/lib/docker.js +223 -0
- package/dist/ui/CI.d.ts +9 -0
- package/dist/ui/CI.js +91 -0
- package/package.json +9 -1
- package/ai-config/.skillignore +0 -15
- package/ai-config/AUTO_INVOKE.md +0 -300
- package/ai-config/agents/_TEMPLATE.md +0 -93
- package/ai-config/agents/business/api-designer.md +0 -1657
- package/ai-config/agents/business/business-analyst.md +0 -1331
- package/ai-config/agents/business/product-strategist.md +0 -206
- package/ai-config/agents/business/project-manager.md +0 -178
- package/ai-config/agents/business/requirements-analyst.md +0 -1277
- package/ai-config/agents/business/technical-writer.md +0 -1679
- package/ai-config/agents/creative/ux-designer.md +0 -205
- package/ai-config/agents/data-ai/ai-engineer.md +0 -487
- package/ai-config/agents/data-ai/analytics-engineer.md +0 -953
- package/ai-config/agents/data-ai/data-engineer.md +0 -173
- package/ai-config/agents/data-ai/data-scientist.md +0 -672
- package/ai-config/agents/data-ai/mlops-engineer.md +0 -814
- package/ai-config/agents/data-ai/prompt-engineer.md +0 -772
- package/ai-config/agents/development/angular-expert.md +0 -620
- package/ai-config/agents/development/backend-architect.md +0 -795
- package/ai-config/agents/development/database-specialist.md +0 -212
- package/ai-config/agents/development/frontend-specialist.md +0 -686
- package/ai-config/agents/development/fullstack-engineer.md +0 -668
- package/ai-config/agents/development/golang-pro.md +0 -338
- package/ai-config/agents/development/java-enterprise.md +0 -400
- package/ai-config/agents/development/javascript-pro.md +0 -422
- package/ai-config/agents/development/nextjs-pro.md +0 -474
- package/ai-config/agents/development/python-pro.md +0 -570
- package/ai-config/agents/development/react-pro.md +0 -487
- package/ai-config/agents/development/rust-pro.md +0 -246
- package/ai-config/agents/development/spring-boot-4-expert.md +0 -326
- package/ai-config/agents/development/typescript-pro.md +0 -336
- package/ai-config/agents/development/vue-specialist.md +0 -605
- package/ai-config/agents/infrastructure/cloud-architect.md +0 -472
- package/ai-config/agents/infrastructure/deployment-manager.md +0 -358
- package/ai-config/agents/infrastructure/devops-engineer.md +0 -455
- package/ai-config/agents/infrastructure/incident-responder.md +0 -519
- package/ai-config/agents/infrastructure/kubernetes-expert.md +0 -705
- package/ai-config/agents/infrastructure/monitoring-specialist.md +0 -674
- package/ai-config/agents/infrastructure/performance-engineer.md +0 -658
- package/ai-config/agents/orchestrator.md +0 -241
- package/ai-config/agents/quality/accessibility-auditor.md +0 -1204
- package/ai-config/agents/quality/code-reviewer-compact.md +0 -123
- package/ai-config/agents/quality/code-reviewer.md +0 -363
- package/ai-config/agents/quality/dependency-manager.md +0 -743
- package/ai-config/agents/quality/e2e-test-specialist.md +0 -1005
- package/ai-config/agents/quality/performance-tester.md +0 -1086
- package/ai-config/agents/quality/security-auditor.md +0 -133
- package/ai-config/agents/quality/test-engineer.md +0 -453
- package/ai-config/agents/specialists/api-designer.md +0 -87
- package/ai-config/agents/specialists/backend-architect.md +0 -73
- package/ai-config/agents/specialists/code-reviewer.md +0 -77
- package/ai-config/agents/specialists/db-optimizer.md +0 -75
- package/ai-config/agents/specialists/devops-engineer.md +0 -83
- package/ai-config/agents/specialists/documentation-writer.md +0 -78
- package/ai-config/agents/specialists/frontend-developer.md +0 -75
- package/ai-config/agents/specialists/performance-analyst.md +0 -82
- package/ai-config/agents/specialists/refactor-specialist.md +0 -74
- package/ai-config/agents/specialists/security-auditor.md +0 -74
- package/ai-config/agents/specialists/test-engineer.md +0 -81
- package/ai-config/agents/specialists/ux-consultant.md +0 -76
- package/ai-config/agents/specialized/agent-generator.md +0 -1190
- package/ai-config/agents/specialized/blockchain-developer.md +0 -149
- package/ai-config/agents/specialized/code-migrator.md +0 -892
- package/ai-config/agents/specialized/context-manager.md +0 -978
- package/ai-config/agents/specialized/documentation-writer.md +0 -1078
- package/ai-config/agents/specialized/ecommerce-expert.md +0 -1756
- package/ai-config/agents/specialized/embedded-engineer.md +0 -1714
- package/ai-config/agents/specialized/error-detective.md +0 -1034
- package/ai-config/agents/specialized/fintech-specialist.md +0 -1659
- package/ai-config/agents/specialized/freelance-project-planner-v2.md +0 -1988
- package/ai-config/agents/specialized/freelance-project-planner-v3.md +0 -2136
- package/ai-config/agents/specialized/freelance-project-planner-v4.md +0 -4503
- package/ai-config/agents/specialized/freelance-project-planner.md +0 -722
- package/ai-config/agents/specialized/game-developer.md +0 -1963
- package/ai-config/agents/specialized/healthcare-dev.md +0 -1620
- package/ai-config/agents/specialized/mobile-developer.md +0 -188
- package/ai-config/agents/specialized/parallel-plan-executor.md +0 -506
- package/ai-config/agents/specialized/plan-executor.md +0 -485
- package/ai-config/agents/specialized/solo-dev-planner-modular/00-INDEX.md +0 -485
- package/ai-config/agents/specialized/solo-dev-planner-modular/01-CORE.md +0 -3493
- package/ai-config/agents/specialized/solo-dev-planner-modular/02-SELF-CORRECTION.md +0 -778
- package/ai-config/agents/specialized/solo-dev-planner-modular/03-PROGRESSIVE-SETUP.md +0 -918
- package/ai-config/agents/specialized/solo-dev-planner-modular/04-DEPLOYMENT.md +0 -1537
- package/ai-config/agents/specialized/solo-dev-planner-modular/05-TESTING.md +0 -2633
- package/ai-config/agents/specialized/solo-dev-planner-modular/06-OPERATIONS.md +0 -5610
- package/ai-config/agents/specialized/solo-dev-planner-modular/INSTALL.md +0 -335
- package/ai-config/agents/specialized/solo-dev-planner-modular/QUICK-REFERENCE.txt +0 -215
- package/ai-config/agents/specialized/solo-dev-planner-modular/README.md +0 -260
- package/ai-config/agents/specialized/solo-dev-planner-modular/START-HERE.md +0 -379
- package/ai-config/agents/specialized/solo-dev-planner-modular/WORKFLOW-DIAGRAM.md +0 -355
- package/ai-config/agents/specialized/solo-dev-planner-modular/solo-dev-planner.md +0 -279
- package/ai-config/agents/specialized/template-writer.md +0 -347
- package/ai-config/agents/specialized/test-runner.md +0 -99
- package/ai-config/agents/specialized/vibekanban-smart-worker.md +0 -244
- package/ai-config/agents/specialized/wave-executor.md +0 -138
- package/ai-config/agents/specialized/workflow-optimizer.md +0 -1114
- package/ai-config/commands/git/changelog.md +0 -32
- package/ai-config/commands/git/ci-local.md +0 -70
- package/ai-config/commands/git/commit.md +0 -35
- package/ai-config/commands/git/fix-issue.md +0 -23
- package/ai-config/commands/git/pr-create.md +0 -42
- package/ai-config/commands/git/pr-review.md +0 -50
- package/ai-config/commands/git/worktree.md +0 -39
- package/ai-config/commands/refactoring/cleanup.md +0 -24
- package/ai-config/commands/refactoring/dead-code.md +0 -40
- package/ai-config/commands/refactoring/extract.md +0 -31
- package/ai-config/commands/testing/e2e.md +0 -30
- package/ai-config/commands/testing/tdd.md +0 -36
- package/ai-config/commands/testing/test-coverage.md +0 -30
- package/ai-config/commands/testing/test-fix.md +0 -24
- package/ai-config/commands/workflow/generate-agents-md.md +0 -85
- package/ai-config/commands/workflow/planning.md +0 -47
- package/ai-config/commands/workflows/compound.md +0 -89
- package/ai-config/commands/workflows/diagnose.md +0 -70
- package/ai-config/commands/workflows/discover.md +0 -86
- package/ai-config/commands/workflows/plan.md +0 -77
- package/ai-config/commands/workflows/review.md +0 -78
- package/ai-config/commands/workflows/work.md +0 -75
- package/ai-config/config.yaml +0 -18
- package/ai-config/hooks/_TEMPLATE.md +0 -96
- package/ai-config/hooks/block-dangerous-commands.md +0 -75
- package/ai-config/hooks/commit-guard.md +0 -90
- package/ai-config/hooks/context-loader.md +0 -73
- package/ai-config/hooks/improve-prompt.md +0 -91
- package/ai-config/hooks/learning-log.md +0 -72
- package/ai-config/hooks/model-router.md +0 -86
- package/ai-config/hooks/secret-scanner.md +0 -64
- package/ai-config/hooks/skill-validator.md +0 -102
- package/ai-config/hooks/task-artifact.md +0 -114
- package/ai-config/hooks/validate-workflow.md +0 -100
- package/ai-config/prompts/base.md +0 -71
- package/ai-config/prompts/modes/debug.md +0 -34
- package/ai-config/prompts/modes/deploy.md +0 -40
- package/ai-config/prompts/modes/research.md +0 -32
- package/ai-config/prompts/modes/review.md +0 -33
- package/ai-config/prompts/review-policy.md +0 -79
- package/ai-config/skills/_TEMPLATE.md +0 -157
- package/ai-config/skills/backend/api-gateway/SKILL.md +0 -254
- package/ai-config/skills/backend/bff-concepts/SKILL.md +0 -239
- package/ai-config/skills/backend/bff-spring/SKILL.md +0 -364
- package/ai-config/skills/backend/chi-router/SKILL.md +0 -396
- package/ai-config/skills/backend/error-handling/SKILL.md +0 -255
- package/ai-config/skills/backend/exceptions-spring/SKILL.md +0 -323
- package/ai-config/skills/backend/fastapi/SKILL.md +0 -302
- package/ai-config/skills/backend/gateway-spring/SKILL.md +0 -390
- package/ai-config/skills/backend/go-backend/SKILL.md +0 -457
- package/ai-config/skills/backend/gradle-multimodule/SKILL.md +0 -274
- package/ai-config/skills/backend/graphql-concepts/SKILL.md +0 -352
- package/ai-config/skills/backend/graphql-spring/SKILL.md +0 -398
- package/ai-config/skills/backend/grpc-concepts/SKILL.md +0 -283
- package/ai-config/skills/backend/grpc-spring/SKILL.md +0 -445
- package/ai-config/skills/backend/jwt-auth/SKILL.md +0 -412
- package/ai-config/skills/backend/notifications-concepts/SKILL.md +0 -259
- package/ai-config/skills/backend/recommendations-concepts/SKILL.md +0 -261
- package/ai-config/skills/backend/search-concepts/SKILL.md +0 -263
- package/ai-config/skills/backend/search-spring/SKILL.md +0 -375
- package/ai-config/skills/backend/spring-boot-4/SKILL.md +0 -172
- package/ai-config/skills/backend/websockets/SKILL.md +0 -532
- package/ai-config/skills/data-ai/ai-ml/SKILL.md +0 -423
- package/ai-config/skills/data-ai/analytics-concepts/SKILL.md +0 -195
- package/ai-config/skills/data-ai/analytics-spring/SKILL.md +0 -340
- package/ai-config/skills/data-ai/duckdb-analytics/SKILL.md +0 -440
- package/ai-config/skills/data-ai/langchain/SKILL.md +0 -238
- package/ai-config/skills/data-ai/mlflow/SKILL.md +0 -302
- package/ai-config/skills/data-ai/onnx-inference/SKILL.md +0 -290
- package/ai-config/skills/data-ai/powerbi/SKILL.md +0 -352
- package/ai-config/skills/data-ai/pytorch/SKILL.md +0 -274
- package/ai-config/skills/data-ai/scikit-learn/SKILL.md +0 -321
- package/ai-config/skills/data-ai/vector-db/SKILL.md +0 -301
- package/ai-config/skills/database/graph-databases/SKILL.md +0 -218
- package/ai-config/skills/database/graph-spring/SKILL.md +0 -361
- package/ai-config/skills/database/pgx-postgres/SKILL.md +0 -512
- package/ai-config/skills/database/redis-cache/SKILL.md +0 -343
- package/ai-config/skills/database/sqlite-embedded/SKILL.md +0 -388
- package/ai-config/skills/database/timescaledb/SKILL.md +0 -320
- package/ai-config/skills/docs/api-documentation/SKILL.md +0 -293
- package/ai-config/skills/docs/docs-spring/SKILL.md +0 -377
- package/ai-config/skills/docs/mustache-templates/SKILL.md +0 -190
- package/ai-config/skills/docs/technical-docs/SKILL.md +0 -447
- package/ai-config/skills/frontend/astro-ssr/SKILL.md +0 -441
- package/ai-config/skills/frontend/frontend-design/SKILL.md +0 -54
- package/ai-config/skills/frontend/frontend-web/SKILL.md +0 -368
- package/ai-config/skills/frontend/mantine-ui/SKILL.md +0 -396
- package/ai-config/skills/frontend/tanstack-query/SKILL.md +0 -439
- package/ai-config/skills/frontend/zod-validation/SKILL.md +0 -417
- package/ai-config/skills/frontend/zustand-state/SKILL.md +0 -350
- package/ai-config/skills/infrastructure/chaos-engineering/SKILL.md +0 -244
- package/ai-config/skills/infrastructure/chaos-spring/SKILL.md +0 -378
- package/ai-config/skills/infrastructure/devops-infra/SKILL.md +0 -435
- package/ai-config/skills/infrastructure/docker-containers/SKILL.md +0 -420
- package/ai-config/skills/infrastructure/kubernetes/SKILL.md +0 -456
- package/ai-config/skills/infrastructure/opentelemetry/SKILL.md +0 -546
- package/ai-config/skills/infrastructure/traefik-proxy/SKILL.md +0 -474
- package/ai-config/skills/infrastructure/woodpecker-ci/SKILL.md +0 -315
- package/ai-config/skills/mobile/ionic-capacitor/SKILL.md +0 -504
- package/ai-config/skills/mobile/mobile-ionic/SKILL.md +0 -448
- package/ai-config/skills/prompt-improver/SKILL.md +0 -125
- package/ai-config/skills/quality/ghagga-review/SKILL.md +0 -216
- package/ai-config/skills/references/hooks-patterns/SKILL.md +0 -238
- package/ai-config/skills/references/mcp-servers/SKILL.md +0 -275
- package/ai-config/skills/references/plugins-reference/SKILL.md +0 -110
- package/ai-config/skills/references/skills-reference/SKILL.md +0 -420
- package/ai-config/skills/references/subagent-templates/SKILL.md +0 -193
- package/ai-config/skills/systems-iot/modbus-protocol/SKILL.md +0 -410
- package/ai-config/skills/systems-iot/mqtt-rumqttc/SKILL.md +0 -408
- package/ai-config/skills/systems-iot/rust-systems/SKILL.md +0 -386
- package/ai-config/skills/systems-iot/tokio-async/SKILL.md +0 -324
- package/ai-config/skills/testing/playwright-e2e/SKILL.md +0 -289
- package/ai-config/skills/testing/testcontainers/SKILL.md +0 -299
- package/ai-config/skills/testing/vitest-testing/SKILL.md +0 -381
- package/ai-config/skills/workflow/ci-local-guide/SKILL.md +0 -118
- package/ai-config/skills/workflow/claude-automation-recommender/SKILL.md +0 -299
- package/ai-config/skills/workflow/claude-md-improver/SKILL.md +0 -158
- package/ai-config/skills/workflow/finishing-a-development-branch/SKILL.md +0 -117
- package/ai-config/skills/workflow/git-github/SKILL.md +0 -334
- package/ai-config/skills/workflow/git-github/references/examples.md +0 -160
- package/ai-config/skills/workflow/git-workflow/SKILL.md +0 -214
- package/ai-config/skills/workflow/ide-plugins/SKILL.md +0 -277
- package/ai-config/skills/workflow/ide-plugins-intellij/SKILL.md +0 -401
- package/ai-config/skills/workflow/obsidian-brain-workflow/SKILL.md +0 -199
- package/ai-config/skills/workflow/using-git-worktrees/SKILL.md +0 -100
- package/ai-config/skills/workflow/verification-before-completion/SKILL.md +0 -73
- package/ai-config/skills/workflow/wave-workflow/SKILL.md +0 -178
- package/schemas/agent.schema.json +0 -34
- package/schemas/ai-config.schema.json +0 -28
- package/schemas/plugin.schema.json +0 -62
- package/schemas/skill.schema.json +0 -44
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: data-engineer
|
|
3
|
-
description: Data engineering expert for ETL pipelines, data warehouses, and big data processing
|
|
4
|
-
trigger: >
|
|
5
|
-
ETL, data pipeline, BigQuery, Snowflake, dbt, data warehouse, Spark, Kafka,
|
|
6
|
-
Airflow, data lake, batch processing, stream processing, CDC, data ingestion
|
|
7
|
-
category: data-ai
|
|
8
|
-
color: cyan
|
|
9
|
-
tools: Write, Read, MultiEdit, Bash, Grep, Glob
|
|
10
|
-
config:
|
|
11
|
-
model: sonnet
|
|
12
|
-
metadata:
|
|
13
|
-
version: "2.0"
|
|
14
|
-
updated: "2026-02"
|
|
15
|
-
---
|
|
16
|
-
|
|
17
|
-
You are a data engineer specializing in building scalable data infrastructure and pipelines.
|
|
18
|
-
|
|
19
|
-
## Core Expertise
|
|
20
|
-
|
|
21
|
-
### Data Pipeline Development
|
|
22
|
-
- ETL/ELT pipeline design
|
|
23
|
-
- Real-time streaming pipelines
|
|
24
|
-
- Batch processing systems
|
|
25
|
-
- Data validation and quality checks
|
|
26
|
-
- Error handling and recovery
|
|
27
|
-
- Pipeline orchestration
|
|
28
|
-
- Data lineage tracking
|
|
29
|
-
|
|
30
|
-
### Big Data Technologies
|
|
31
|
-
- Apache Spark (PySpark, Spark SQL)
|
|
32
|
-
- Apache Kafka, Pulsar
|
|
33
|
-
- Apache Airflow, Dagster, Prefect
|
|
34
|
-
- Apache Beam, Flink
|
|
35
|
-
- Hadoop ecosystem (HDFS, Hive, HBase)
|
|
36
|
-
- Databricks platform
|
|
37
|
-
- Snowflake, BigQuery, Redshift
|
|
38
|
-
|
|
39
|
-
### Data Storage Systems
|
|
40
|
-
#### Data Warehouses
|
|
41
|
-
- Snowflake
|
|
42
|
-
- Amazon Redshift
|
|
43
|
-
- Google BigQuery
|
|
44
|
-
- Azure Synapse
|
|
45
|
-
- ClickHouse
|
|
46
|
-
|
|
47
|
-
#### Data Lakes
|
|
48
|
-
- AWS S3 + Athena
|
|
49
|
-
- Azure Data Lake Storage
|
|
50
|
-
- Delta Lake, Apache Iceberg
|
|
51
|
-
- Apache Hudi
|
|
52
|
-
|
|
53
|
-
#### Databases
|
|
54
|
-
- PostgreSQL, MySQL
|
|
55
|
-
- MongoDB, Cassandra
|
|
56
|
-
- Redis, Elasticsearch
|
|
57
|
-
- Time-series DBs (InfluxDB, TimescaleDB)
|
|
58
|
-
|
|
59
|
-
## Data Processing Patterns
|
|
60
|
-
### Batch Processing
|
|
61
|
-
- Daily/hourly data loads
|
|
62
|
-
- Historical data processing
|
|
63
|
-
- Large-scale transformations
|
|
64
|
-
- Data warehouse updates
|
|
65
|
-
|
|
66
|
-
### Stream Processing
|
|
67
|
-
- Real-time analytics
|
|
68
|
-
- Event-driven architectures
|
|
69
|
-
- Change Data Capture (CDC)
|
|
70
|
-
- IoT data ingestion
|
|
71
|
-
- Log processing
|
|
72
|
-
|
|
73
|
-
### Data Modeling
|
|
74
|
-
- Dimensional modeling (Star, Snowflake)
|
|
75
|
-
- Data vault modeling
|
|
76
|
-
- Slowly Changing Dimensions (SCD)
|
|
77
|
-
- Time-series modeling
|
|
78
|
-
- Graph data models
|
|
79
|
-
|
|
80
|
-
## ETL/ELT Best Practices
|
|
81
|
-
1. Idempotent pipeline design
|
|
82
|
-
2. Incremental processing
|
|
83
|
-
3. Data quality validation
|
|
84
|
-
4. Schema evolution handling
|
|
85
|
-
5. Monitoring and alerting
|
|
86
|
-
6. Cost optimization
|
|
87
|
-
7. Performance tuning
|
|
88
|
-
|
|
89
|
-
## Data Quality & Governance
|
|
90
|
-
- Data profiling and validation
|
|
91
|
-
- Schema registry management
|
|
92
|
-
- Data catalog maintenance
|
|
93
|
-
- Privacy and compliance (GDPR, CCPA)
|
|
94
|
-
- Data retention policies
|
|
95
|
-
- Access control and security
|
|
96
|
-
|
|
97
|
-
## Cloud Data Platforms
|
|
98
|
-
### AWS
|
|
99
|
-
- S3, Glue, EMR
|
|
100
|
-
- Kinesis, MSK
|
|
101
|
-
- Redshift, RDS
|
|
102
|
-
- Lambda, Step Functions
|
|
103
|
-
|
|
104
|
-
### GCP
|
|
105
|
-
- Cloud Storage, Dataflow
|
|
106
|
-
- Pub/Sub, Dataproc
|
|
107
|
-
- BigQuery, Cloud SQL
|
|
108
|
-
- Cloud Functions, Composer
|
|
109
|
-
|
|
110
|
-
### Azure
|
|
111
|
-
- Data Lake Storage, Data Factory
|
|
112
|
-
- Event Hubs, Stream Analytics
|
|
113
|
-
- Synapse, SQL Database
|
|
114
|
-
- Functions, Logic Apps
|
|
115
|
-
|
|
116
|
-
## Output Format
|
|
117
|
-
```python
|
|
118
|
-
# Data Pipeline Implementation
|
|
119
|
-
from airflow import DAG
|
|
120
|
-
from datetime import datetime, timedelta
|
|
121
|
-
|
|
122
|
-
# Pipeline configuration
|
|
123
|
-
pipeline_config = {
|
|
124
|
-
"source": "raw_data",
|
|
125
|
-
"destination": "processed_data",
|
|
126
|
-
"processing_steps": [...]
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
# ETL Pipeline
|
|
130
|
-
class DataPipeline:
|
|
131
|
-
def extract(self):
|
|
132
|
-
"""Extract data from source systems"""
|
|
133
|
-
pass
|
|
134
|
-
|
|
135
|
-
def transform(self):
|
|
136
|
-
"""Apply business logic transformations"""
|
|
137
|
-
pass
|
|
138
|
-
|
|
139
|
-
def load(self):
|
|
140
|
-
"""Load data to destination"""
|
|
141
|
-
pass
|
|
142
|
-
|
|
143
|
-
def validate(self):
|
|
144
|
-
"""Validate data quality"""
|
|
145
|
-
pass
|
|
146
|
-
|
|
147
|
-
# Spark job example
|
|
148
|
-
def process_large_dataset(spark, input_path, output_path):
|
|
149
|
-
df = spark.read.parquet(input_path)
|
|
150
|
-
|
|
151
|
-
# Transformations
|
|
152
|
-
processed_df = df.transform(clean_data) \
|
|
153
|
-
.transform(enrich_data) \
|
|
154
|
-
.transform(aggregate_metrics)
|
|
155
|
-
|
|
156
|
-
# Write results
|
|
157
|
-
processed_df.write.mode("overwrite").parquet(output_path)
|
|
158
|
-
|
|
159
|
-
# Data quality checks
|
|
160
|
-
quality_checks = {
|
|
161
|
-
"completeness": check_null_values,
|
|
162
|
-
"uniqueness": check_duplicates,
|
|
163
|
-
"validity": check_data_ranges,
|
|
164
|
-
"consistency": check_referential_integrity
|
|
165
|
-
}
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
### Performance Metrics
|
|
169
|
-
- Pipeline execution time
|
|
170
|
-
- Data processing throughput
|
|
171
|
-
- Resource utilization
|
|
172
|
-
- Data quality scores
|
|
173
|
-
- Cost per GB processed
|