tech-hub-skills 1.2.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/README.md +291 -0
- package/.claude/bin/cli.js +266 -0
- package/.claude/package.json +46 -0
- package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_ab_tester.py +356 -0
- package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_template_manager.py +274 -0
- package/.claude/roles/ai-engineer/skills/01-prompt-engineering/token_cost_estimator.py +324 -0
- package/.claude/roles/ai-engineer/skills/02-rag-pipeline/document_chunker.py +336 -0
- package/.claude/roles/ai-engineer/skills/02-rag-pipeline/rag_pipeline.sql +213 -0
- package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/bronze_ingestion.py +337 -0
- package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/medallion_queries.sql +300 -0
- package/.claude/roles/data-scientist/skills/01-eda-automation/eda_generator.py +446 -0
- package/.claude/roles/system-design/skills/08-process-automation/ai_prompt_generator.py +744 -0
- package/.claude/roles/system-design/skills/08-process-automation/automation_recommender.py +688 -0
- package/.claude/roles/system-design/skills/08-process-automation/plan_generator.py +679 -0
- package/.claude/roles/system-design/skills/08-process-automation/process_analyzer.py +528 -0
- package/.claude/roles/system-design/skills/08-process-automation/process_parser.py +684 -0
- package/.claude/roles/system-design/skills/08-process-automation/role_matcher.py +615 -0
- package/.claude/skills/README.md +336 -0
- package/.claude/skills/ai-engineer.md +104 -0
- package/.claude/skills/aws.md +143 -0
- package/.claude/skills/azure.md +149 -0
- package/.claude/skills/backend-developer.md +108 -0
- package/.claude/skills/code-review.md +399 -0
- package/.claude/skills/compliance-automation.md +747 -0
- package/.claude/skills/compliance-officer.md +108 -0
- package/.claude/skills/data-engineer.md +113 -0
- package/.claude/skills/data-governance.md +102 -0
- package/.claude/skills/data-scientist.md +123 -0
- package/.claude/skills/database-admin.md +109 -0
- package/.claude/skills/devops.md +160 -0
- package/.claude/skills/docker.md +160 -0
- package/.claude/skills/enterprise-dashboard.md +613 -0
- package/.claude/skills/finops.md +184 -0
- package/.claude/skills/frontend-developer.md +108 -0
- package/.claude/skills/gcp.md +143 -0
- package/.claude/skills/ml-engineer.md +115 -0
- package/.claude/skills/mlops.md +187 -0
- package/.claude/skills/network-engineer.md +109 -0
- package/.claude/skills/optimization-advisor.md +329 -0
- package/.claude/skills/orchestrator.md +623 -0
- package/.claude/skills/platform-engineer.md +102 -0
- package/.claude/skills/process-automation.md +226 -0
- package/.claude/skills/process-changelog.md +184 -0
- package/.claude/skills/process-documentation.md +484 -0
- package/.claude/skills/process-kanban.md +324 -0
- package/.claude/skills/process-versioning.md +214 -0
- package/.claude/skills/product-designer.md +104 -0
- package/.claude/skills/project-starter.md +443 -0
- package/.claude/skills/qa-engineer.md +109 -0
- package/.claude/skills/security-architect.md +135 -0
- package/.claude/skills/sre.md +109 -0
- package/.claude/skills/system-design.md +126 -0
- package/.claude/skills/technical-writer.md +101 -0
- package/.gitattributes +2 -0
- package/GITHUB_COPILOT.md +106 -0
- package/README.md +117 -224
- package/package.json +4 -42
- package/bin/cli.js +0 -241
- /package/{LICENSE → .claude/LICENSE} +0 -0
- /package/{bin → .claude/bin}/copilot.js +0 -0
- /package/{bin → .claude/bin}/postinstall.js +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/README.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/ai-engineer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/aws.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/azure.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/backend-developer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/code-review.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/compliance-automation.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/compliance-officer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/data-engineer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/data-governance.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/data-scientist.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/database-admin.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/devops.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/docker.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/enterprise-dashboard.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/finops.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/frontend-developer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/gcp.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/ml-engineer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/mlops.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/network-engineer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/optimization-advisor.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/orchestrator.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/platform-engineer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/process-automation.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/process-changelog.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/process-documentation.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/process-kanban.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/process-versioning.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/product-designer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/project-starter.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/qa-engineer.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/security-architect.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/sre.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/system-design.md +0 -0
- /package/{tech_hub_skills/skills → .claude/commands}/technical-writer.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/01-prompt-engineering/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/02-rag-pipeline/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/03-agent-orchestration/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/04-llm-guardrails/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/05-vector-embeddings/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/06-llm-evaluation/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/01-infrastructure-fundamentals/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/02-data-factory/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/03-synapse-analytics/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/04-databricks/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/05-functions/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/06-kubernetes-service/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/07-openai-service/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/08-machine-learning/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/09-storage-adls/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/10-networking/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/11-sql-cosmos/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/azure/skills/12-event-hubs/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/code-review/skills/01-automated-code-review/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/code-review/skills/02-pr-review-workflow/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/code-review/skills/03-code-quality-gates/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/code-review/skills/04-reviewer-assignment/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/code-review/skills/05-review-analytics/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/01-lakehouse-architecture/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/02-etl-pipeline/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/03-data-quality/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/04-streaming-pipelines/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/05-performance-optimization/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-governance/skills/01-data-catalog/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-governance/skills/02-data-lineage/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-governance/skills/03-data-quality-framework/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-governance/skills/04-access-control/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-governance/skills/05-master-data-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-governance/skills/06-compliance-privacy/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/01-eda-automation/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/02-statistical-modeling/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/03-feature-engineering/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/04-predictive-modeling/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/05-customer-analytics/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/06-campaign-analysis/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/07-experimentation/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/08-data-visualization/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/01-cicd-pipeline/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/02-container-orchestration/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/03-infrastructure-as-code/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/04-gitops/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/05-environment-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/06-automated-testing/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/07-release-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/08-monitoring-alerting/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/devops/skills/09-devsecops/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/finops/skills/01-cost-visibility/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/finops/skills/02-resource-tagging/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/finops/skills/03-budget-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/finops/skills/04-reserved-instances/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/finops/skills/05-spot-optimization/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/finops/skills/06-storage-tiering/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/finops/skills/07-compute-rightsizing/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/finops/skills/08-chargeback/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/01-mlops-pipeline/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/02-feature-engineering/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/03-model-training/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/04-model-serving/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/05-model-monitoring/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/01-ml-pipeline-orchestration/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/02-experiment-tracking/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/03-model-registry/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/04-feature-store/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/05-model-deployment/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/06-model-observability/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/07-data-versioning/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/08-ab-testing/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/mlops/skills/09-automated-retraining/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/01-internal-developer-platform/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/02-self-service-infrastructure/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/03-slo-sli-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/04-developer-experience/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/05-incident-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/06-capacity-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/product-designer/skills/01-requirements-discovery/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/product-designer/skills/02-user-research/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/product-designer/skills/03-brainstorming-ideation/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/product-designer/skills/04-ux-design/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/product-designer/skills/05-product-market-fit/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/product-designer/skills/06-stakeholder-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/security-architect/skills/01-pii-detection/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/security-architect/skills/02-threat-modeling/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/security-architect/skills/03-infrastructure-security/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/security-architect/skills/04-iam/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/security-architect/skills/05-application-security/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/security-architect/skills/06-secrets-management/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/security-architect/skills/07-security-monitoring/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/01-architecture-patterns/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/02-requirements-engineering/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/03-scalability/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/04-high-availability/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/05-cost-optimization-design/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/06-api-design/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/07-observability-architecture/README.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/PROCESS_TEMPLATE.md +0 -0
- /package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/README.md +0 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
-- Medallion Architecture SQL Patterns
|
|
2
|
+
-- Bronze → Silver → Gold transformations for Data Lakehouse
|
|
3
|
+
|
|
4
|
+
-- ================================================================
|
|
5
|
+
-- BRONZE LAYER - Raw Data Ingestion
|
|
6
|
+
-- ================================================================
|
|
7
|
+
|
|
8
|
+
-- View bronze layer with metadata
|
|
9
|
+
SELECT
|
|
10
|
+
*,
|
|
11
|
+
_bronze_ingestion_timestamp,
|
|
12
|
+
_bronze_source_path,
|
|
13
|
+
_bronze_table_name
|
|
14
|
+
FROM bronze.crm_leads
|
|
15
|
+
WHERE _bronze_ingestion_date >= CURRENT_DATE - INTERVAL '7 days'
|
|
16
|
+
ORDER BY _bronze_ingestion_timestamp DESC;
|
|
17
|
+
|
|
18
|
+
-- Check for duplicate records in bronze
|
|
19
|
+
SELECT
|
|
20
|
+
lead_id,
|
|
21
|
+
COUNT(*) as duplicate_count,
|
|
22
|
+
MIN(_bronze_ingestion_timestamp) as first_seen,
|
|
23
|
+
MAX(_bronze_ingestion_timestamp) as last_seen
|
|
24
|
+
FROM bronze.crm_leads
|
|
25
|
+
GROUP BY lead_id
|
|
26
|
+
HAVING COUNT(*) > 1;
|
|
27
|
+
|
|
28
|
+
-- Bronze layer data quality check
|
|
29
|
+
SELECT
|
|
30
|
+
_bronze_ingestion_date,
|
|
31
|
+
COUNT(*) as total_records,
|
|
32
|
+
COUNT(DISTINCT lead_id) as unique_leads,
|
|
33
|
+
COUNT(*) - COUNT(DISTINCT lead_id) as duplicates,
|
|
34
|
+
COUNT(CASE WHEN email IS NULL THEN 1 END) as missing_email,
|
|
35
|
+
COUNT(CASE WHEN company IS NULL THEN 1 END) as missing_company
|
|
36
|
+
FROM bronze.crm_leads
|
|
37
|
+
GROUP BY _bronze_ingestion_date
|
|
38
|
+
ORDER BY _bronze_ingestion_date DESC;
|
|
39
|
+
|
|
40
|
+
-- ================================================================
|
|
41
|
+
-- SILVER LAYER - Cleaned & Standardized
|
|
42
|
+
-- ================================================================
|
|
43
|
+
|
|
44
|
+
-- Transform Bronze → Silver (Deduplication & Cleaning)
|
|
45
|
+
CREATE OR REPLACE TABLE silver.crm_leads_clean AS
|
|
46
|
+
WITH deduplicated AS (
|
|
47
|
+
SELECT *,
|
|
48
|
+
ROW_NUMBER() OVER (
|
|
49
|
+
PARTITION BY lead_id
|
|
50
|
+
ORDER BY _bronze_ingestion_timestamp DESC
|
|
51
|
+
) as rn
|
|
52
|
+
FROM bronze.crm_leads
|
|
53
|
+
),
|
|
54
|
+
cleaned AS (
|
|
55
|
+
SELECT
|
|
56
|
+
lead_id,
|
|
57
|
+
LOWER(TRIM(email)) as email,
|
|
58
|
+
TRIM(company) as company,
|
|
59
|
+
UPPER(industry) as industry,
|
|
60
|
+
company_size,
|
|
61
|
+
job_title,
|
|
62
|
+
lead_source,
|
|
63
|
+
created_date,
|
|
64
|
+
COALESCE(lead_score, 0) as lead_score,
|
|
65
|
+
UPPER(status) as status,
|
|
66
|
+
-- Silver metadata
|
|
67
|
+
CURRENT_TIMESTAMP() as _silver_processed_timestamp,
|
|
68
|
+
_bronze_ingestion_timestamp as _bronze_ingestion_timestamp
|
|
69
|
+
FROM deduplicated
|
|
70
|
+
WHERE rn = 1 -- Keep only most recent version
|
|
71
|
+
AND email IS NOT NULL -- Basic validation
|
|
72
|
+
AND email LIKE '%@%' -- Email format check
|
|
73
|
+
)
|
|
74
|
+
SELECT * FROM cleaned;
|
|
75
|
+
|
|
76
|
+
-- Silver layer quality metrics
|
|
77
|
+
SELECT
|
|
78
|
+
COUNT(*) as total_records,
|
|
79
|
+
COUNT(DISTINCT email) as unique_emails,
|
|
80
|
+
COUNT(CASE WHEN lead_score >= 80 THEN 1 END) as high_score_leads,
|
|
81
|
+
AVG(lead_score) as avg_lead_score,
|
|
82
|
+
COUNT(DISTINCT industry) as unique_industries,
|
|
83
|
+
COUNT(DISTINCT company) as unique_companies,
|
|
84
|
+
MAX(_silver_processed_timestamp) as last_processed
|
|
85
|
+
FROM silver.crm_leads_clean;
|
|
86
|
+
|
|
87
|
+
-- Schema drift detection (Silver)
|
|
88
|
+
SELECT
|
|
89
|
+
column_name,
|
|
90
|
+
data_type,
|
|
91
|
+
is_nullable,
|
|
92
|
+
COUNT(*) OVER () as total_columns
|
|
93
|
+
FROM information_schema.columns
|
|
94
|
+
WHERE table_schema = 'silver'
|
|
95
|
+
AND table_name = 'crm_leads_clean'
|
|
96
|
+
ORDER BY ordinal_position;
|
|
97
|
+
|
|
98
|
+
-- ================================================================
|
|
99
|
+
-- GOLD LAYER - Business Logic & Aggregations
|
|
100
|
+
-- ================================================================
|
|
101
|
+
|
|
102
|
+
-- Transform Silver → Gold (Lead Segmentation)
|
|
103
|
+
CREATE OR REPLACE TABLE gold.lead_segments AS
|
|
104
|
+
SELECT
|
|
105
|
+
lead_id,
|
|
106
|
+
email,
|
|
107
|
+
company,
|
|
108
|
+
industry,
|
|
109
|
+
company_size,
|
|
110
|
+
job_title,
|
|
111
|
+
lead_source,
|
|
112
|
+
created_date,
|
|
113
|
+
lead_score,
|
|
114
|
+
status,
|
|
115
|
+
-- Business logic: Lead segment
|
|
116
|
+
CASE
|
|
117
|
+
WHEN lead_score >= 90 THEN 'HOT'
|
|
118
|
+
WHEN lead_score >= 70 THEN 'WARM'
|
|
119
|
+
WHEN lead_score >= 50 THEN 'QUALIFIED'
|
|
120
|
+
ELSE 'COLD'
|
|
121
|
+
END as lead_segment,
|
|
122
|
+
-- Seniority level from job title
|
|
123
|
+
CASE
|
|
124
|
+
WHEN UPPER(job_title) LIKE '%VP%' OR UPPER(job_title) LIKE '%VICE PRESIDENT%' THEN 'VP+'
|
|
125
|
+
WHEN UPPER(job_title) LIKE '%DIRECTOR%' THEN 'Director'
|
|
126
|
+
WHEN UPPER(job_title) LIKE '%MANAGER%' THEN 'Manager'
|
|
127
|
+
WHEN UPPER(job_title) LIKE '%SENIOR%' OR UPPER(job_title) LIKE '%SR%' THEN 'Senior IC'
|
|
128
|
+
ELSE 'IC'
|
|
129
|
+
END as seniority_level,
|
|
130
|
+
-- Company size category
|
|
131
|
+
CASE
|
|
132
|
+
WHEN company_size IN ('1000+', '500-1000') THEN 'Enterprise'
|
|
133
|
+
WHEN company_size IN ('100-500', '50-100') THEN 'Mid-Market'
|
|
134
|
+
ELSE 'SMB'
|
|
135
|
+
END as company_category,
|
|
136
|
+
-- Days since creation
|
|
137
|
+
DATEDIFF(CURRENT_DATE, created_date) as days_since_created,
|
|
138
|
+
-- Gold metadata
|
|
139
|
+
CURRENT_TIMESTAMP() as _gold_created_timestamp
|
|
140
|
+
FROM silver.crm_leads_clean;
|
|
141
|
+
|
|
142
|
+
-- Gold Layer: Daily Lead Metrics
|
|
143
|
+
CREATE OR REPLACE TABLE gold.daily_lead_metrics AS
|
|
144
|
+
SELECT
|
|
145
|
+
DATE(created_date) as metric_date,
|
|
146
|
+
lead_source,
|
|
147
|
+
lead_segment,
|
|
148
|
+
company_category,
|
|
149
|
+
COUNT(*) as lead_count,
|
|
150
|
+
AVG(lead_score) as avg_lead_score,
|
|
151
|
+
COUNT(CASE WHEN lead_segment = 'HOT' THEN 1 END) as hot_leads,
|
|
152
|
+
COUNT(CASE WHEN status = 'QUALIFIED' THEN 1 END) as qualified_leads,
|
|
153
|
+
COUNT(DISTINCT company) as unique_companies,
|
|
154
|
+
COUNT(DISTINCT industry) as unique_industries
|
|
155
|
+
FROM gold.lead_segments
|
|
156
|
+
GROUP BY
|
|
157
|
+
DATE(created_date),
|
|
158
|
+
lead_source,
|
|
159
|
+
lead_segment,
|
|
160
|
+
company_category;
|
|
161
|
+
|
|
162
|
+
-- Gold Layer: Lead Source Performance
|
|
163
|
+
CREATE OR REPLACE VIEW gold.lead_source_performance AS
|
|
164
|
+
SELECT
|
|
165
|
+
lead_source,
|
|
166
|
+
COUNT(*) as total_leads,
|
|
167
|
+
AVG(lead_score) as avg_score,
|
|
168
|
+
COUNT(CASE WHEN lead_segment = 'HOT' THEN 1 END) as hot_leads,
|
|
169
|
+
COUNT(CASE WHEN lead_segment IN ('HOT', 'WARM') THEN 1 END) as quality_leads,
|
|
170
|
+
ROUND(100.0 * COUNT(CASE WHEN lead_segment IN ('HOT', 'WARM') THEN 1 END) / COUNT(*), 2) as quality_rate,
|
|
171
|
+
COUNT(DISTINCT company) as unique_companies,
|
|
172
|
+
MAX(created_date) as latest_lead_date,
|
|
173
|
+
DATEDIFF(CURRENT_DATE, MAX(created_date)) as days_since_last_lead
|
|
174
|
+
FROM gold.lead_segments
|
|
175
|
+
GROUP BY lead_source
|
|
176
|
+
ORDER BY quality_rate DESC;
|
|
177
|
+
|
|
178
|
+
-- Gold Layer: Industry Analysis
|
|
179
|
+
CREATE OR REPLACE VIEW gold.industry_analysis AS
|
|
180
|
+
SELECT
|
|
181
|
+
industry,
|
|
182
|
+
company_category,
|
|
183
|
+
COUNT(*) as lead_count,
|
|
184
|
+
AVG(lead_score) as avg_lead_score,
|
|
185
|
+
COUNT(CASE WHEN lead_segment = 'HOT' THEN 1 END) as hot_leads,
|
|
186
|
+
COUNT(CASE WHEN seniority_level IN ('VP+', 'Director') THEN 1 END) as senior_decision_makers,
|
|
187
|
+
COUNT(DISTINCT company) as unique_companies,
|
|
188
|
+
ROUND(AVG(days_since_created), 1) as avg_age_days
|
|
189
|
+
FROM gold.lead_segments
|
|
190
|
+
GROUP BY industry, company_category
|
|
191
|
+
HAVING COUNT(*) >= 10
|
|
192
|
+
ORDER BY hot_leads DESC, avg_lead_score DESC;
|
|
193
|
+
|
|
194
|
+
-- ================================================================
|
|
195
|
+
-- INCREMENTAL PROCESSING PATTERNS
|
|
196
|
+
-- ================================================================
|
|
197
|
+
|
|
198
|
+
-- Incremental load: Bronze to Silver (only new/updated records)
|
|
199
|
+
MERGE INTO silver.crm_leads_clean AS target
|
|
200
|
+
USING (
|
|
201
|
+
SELECT
|
|
202
|
+
lead_id,
|
|
203
|
+
LOWER(TRIM(email)) as email,
|
|
204
|
+
TRIM(company) as company,
|
|
205
|
+
UPPER(industry) as industry,
|
|
206
|
+
company_size,
|
|
207
|
+
job_title,
|
|
208
|
+
lead_source,
|
|
209
|
+
created_date,
|
|
210
|
+
COALESCE(lead_score, 0) as lead_score,
|
|
211
|
+
UPPER(status) as status,
|
|
212
|
+
_bronze_ingestion_timestamp
|
|
213
|
+
FROM (
|
|
214
|
+
SELECT *,
|
|
215
|
+
ROW_NUMBER() OVER (
|
|
216
|
+
PARTITION BY lead_id
|
|
217
|
+
ORDER BY _bronze_ingestion_timestamp DESC
|
|
218
|
+
) as rn
|
|
219
|
+
FROM bronze.crm_leads
|
|
220
|
+
WHERE _bronze_ingestion_timestamp > (
|
|
221
|
+
SELECT COALESCE(MAX(_bronze_ingestion_timestamp), '1900-01-01')
|
|
222
|
+
FROM silver.crm_leads_clean
|
|
223
|
+
)
|
|
224
|
+
)
|
|
225
|
+
WHERE rn = 1
|
|
226
|
+
AND email IS NOT NULL
|
|
227
|
+
AND email LIKE '%@%'
|
|
228
|
+
) AS source
|
|
229
|
+
ON target.lead_id = source.lead_id
|
|
230
|
+
WHEN MATCHED THEN
|
|
231
|
+
UPDATE SET
|
|
232
|
+
email = source.email,
|
|
233
|
+
company = source.company,
|
|
234
|
+
industry = source.industry,
|
|
235
|
+
company_size = source.company_size,
|
|
236
|
+
job_title = source.job_title,
|
|
237
|
+
lead_source = source.lead_source,
|
|
238
|
+
created_date = source.created_date,
|
|
239
|
+
lead_score = source.lead_score,
|
|
240
|
+
status = source.status,
|
|
241
|
+
_silver_processed_timestamp = CURRENT_TIMESTAMP(),
|
|
242
|
+
_bronze_ingestion_timestamp = source._bronze_ingestion_timestamp
|
|
243
|
+
WHEN NOT MATCHED THEN
|
|
244
|
+
INSERT (
|
|
245
|
+
lead_id, email, company, industry, company_size,
|
|
246
|
+
job_title, lead_source, created_date, lead_score, status,
|
|
247
|
+
_silver_processed_timestamp, _bronze_ingestion_timestamp
|
|
248
|
+
)
|
|
249
|
+
VALUES (
|
|
250
|
+
source.lead_id, source.email, source.company, source.industry,
|
|
251
|
+
source.company_size, source.job_title, source.lead_source,
|
|
252
|
+
source.created_date, source.lead_score, source.status,
|
|
253
|
+
CURRENT_TIMESTAMP(), source._bronze_ingestion_timestamp
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
-- ================================================================
|
|
257
|
+
-- DATA QUALITY MONITORING
|
|
258
|
+
-- ================================================================
|
|
259
|
+
|
|
260
|
+
-- Cross-layer data quality dashboard
|
|
261
|
+
SELECT
|
|
262
|
+
'Bronze' as layer,
|
|
263
|
+
COUNT(*) as record_count,
|
|
264
|
+
COUNT(DISTINCT lead_id) as unique_ids,
|
|
265
|
+
MAX(_bronze_ingestion_timestamp) as last_update
|
|
266
|
+
FROM bronze.crm_leads
|
|
267
|
+
|
|
268
|
+
UNION ALL
|
|
269
|
+
|
|
270
|
+
SELECT
|
|
271
|
+
'Silver' as layer,
|
|
272
|
+
COUNT(*) as record_count,
|
|
273
|
+
COUNT(DISTINCT lead_id) as unique_ids,
|
|
274
|
+
MAX(_silver_processed_timestamp) as last_update
|
|
275
|
+
FROM silver.crm_leads_clean
|
|
276
|
+
|
|
277
|
+
UNION ALL
|
|
278
|
+
|
|
279
|
+
SELECT
|
|
280
|
+
'Gold' as layer,
|
|
281
|
+
COUNT(*) as record_count,
|
|
282
|
+
COUNT(DISTINCT lead_id) as unique_ids,
|
|
283
|
+
MAX(_gold_created_timestamp) as last_update
|
|
284
|
+
FROM gold.lead_segments;
|
|
285
|
+
|
|
286
|
+
-- ================================================================
|
|
287
|
+
-- PERFORMANCE OPTIMIZATION
|
|
288
|
+
-- ================================================================
|
|
289
|
+
|
|
290
|
+
-- Optimize Silver table (Vacuum + Optimize)
|
|
291
|
+
-- OPTIMIZE silver.crm_leads_clean ZORDER BY (lead_id, created_date);
|
|
292
|
+
-- VACUUM silver.crm_leads_clean RETAIN 168 HOURS; -- 7 days
|
|
293
|
+
|
|
294
|
+
-- Optimize Gold table
|
|
295
|
+
-- OPTIMIZE gold.lead_segments ZORDER BY (lead_segment, created_date, company_category);
|
|
296
|
+
-- VACUUM gold.lead_segments RETAIN 168 HOURS;
|
|
297
|
+
|
|
298
|
+
-- Table statistics for query optimization
|
|
299
|
+
-- ANALYZE TABLE silver.crm_leads_clean COMPUTE STATISTICS;
|
|
300
|
+
-- ANALYZE TABLE gold.lead_segments COMPUTE STATISTICS FOR ALL COLUMNS;
|