@cloudstreamsoftware/claude-tools 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -37
- package/agents/INDEX.md +183 -0
- package/agents/architect.md +247 -0
- package/agents/build-error-resolver.md +555 -0
- package/agents/catalyst-deployer.md +132 -0
- package/agents/code-reviewer.md +121 -0
- package/agents/compliance-auditor.md +148 -0
- package/agents/creator-architect.md +395 -0
- package/agents/deluge-reviewer.md +98 -0
- package/agents/doc-updater.md +471 -0
- package/agents/e2e-runner.md +711 -0
- package/agents/planner.md +122 -0
- package/agents/refactor-cleaner.md +309 -0
- package/agents/security-reviewer.md +582 -0
- package/agents/tdd-guide.md +302 -0
- package/bin/cloudstream-setup.js +16 -6
- package/config/versions.json +63 -0
- package/dist/hooks/hooks.json +209 -0
- package/dist/index.js +47 -0
- package/dist/lib/asset-value.js +609 -0
- package/dist/lib/client-manager.js +300 -0
- package/dist/lib/command-matcher.js +242 -0
- package/dist/lib/cross-session-patterns.js +754 -0
- package/dist/lib/intent-classifier.js +1075 -0
- package/dist/lib/package-manager.js +374 -0
- package/dist/lib/recommendation-engine.js +597 -0
- package/dist/lib/session-memory.js +489 -0
- package/dist/lib/skill-effectiveness.js +486 -0
- package/dist/lib/skill-matcher.js +595 -0
- package/dist/lib/tutorial-metrics.js +242 -0
- package/dist/lib/tutorial-progress.js +209 -0
- package/dist/lib/tutorial-renderer.js +431 -0
- package/dist/lib/utils.js +380 -0
- package/dist/lib/verify-formatter.js +143 -0
- package/dist/lib/workflow-state.js +249 -0
- package/hooks/hooks.json +209 -0
- package/package.json +5 -1
- package/scripts/aggregate-sessions.js +290 -0
- package/scripts/branch-name-validator.js +291 -0
- package/scripts/build.js +101 -0
- package/scripts/commands/client-switch.js +231 -0
- package/scripts/deprecate-skill.js +610 -0
- package/scripts/diagnose.js +324 -0
- package/scripts/doc-freshness.js +168 -0
- package/scripts/generate-weekly-digest.js +393 -0
- package/scripts/health-check.js +270 -0
- package/scripts/hooks/credential-check.js +101 -0
- package/scripts/hooks/evaluate-session.js +81 -0
- package/scripts/hooks/pre-compact.js +66 -0
- package/scripts/hooks/prompt-analyzer.js +276 -0
- package/scripts/hooks/prompt-router.js +422 -0
- package/scripts/hooks/quality-gate-enforcer.js +371 -0
- package/scripts/hooks/session-end.js +156 -0
- package/scripts/hooks/session-start.js +195 -0
- package/scripts/hooks/skill-injector.js +333 -0
- package/scripts/hooks/suggest-compact.js +58 -0
- package/scripts/lib/asset-value.js +609 -0
- package/scripts/lib/client-manager.js +300 -0
- package/scripts/lib/command-matcher.js +242 -0
- package/scripts/lib/cross-session-patterns.js +754 -0
- package/scripts/lib/intent-classifier.js +1075 -0
- package/scripts/lib/package-manager.js +374 -0
- package/scripts/lib/recommendation-engine.js +597 -0
- package/scripts/lib/session-memory.js +489 -0
- package/scripts/lib/skill-effectiveness.js +486 -0
- package/scripts/lib/skill-matcher.js +595 -0
- package/scripts/lib/tutorial-metrics.js +242 -0
- package/scripts/lib/tutorial-progress.js +209 -0
- package/scripts/lib/tutorial-renderer.js +431 -0
- package/scripts/lib/utils.js +380 -0
- package/scripts/lib/verify-formatter.js +143 -0
- package/scripts/lib/workflow-state.js +249 -0
- package/scripts/onboard.js +363 -0
- package/scripts/quarterly-report.js +692 -0
- package/scripts/setup-package-manager.js +204 -0
- package/scripts/sync-upstream.js +391 -0
- package/scripts/test.js +108 -0
- package/scripts/tutorial-runner.js +351 -0
- package/scripts/validate-all.js +201 -0
- package/scripts/verifiers/agents.js +245 -0
- package/scripts/verifiers/config.js +186 -0
- package/scripts/verifiers/environment.js +123 -0
- package/scripts/verifiers/hooks.js +188 -0
- package/scripts/verifiers/index.js +38 -0
- package/scripts/verifiers/persistence.js +140 -0
- package/scripts/verifiers/plugin.js +215 -0
- package/scripts/verifiers/skills.js +209 -0
- package/scripts/verify-setup.js +164 -0
- package/skills/INDEX.md +157 -0
- package/skills/backend-patterns/SKILL.md +586 -0
- package/skills/backend-patterns/catalyst-patterns.md +128 -0
- package/skills/bigquery-patterns/SKILL.md +27 -0
- package/skills/bigquery-patterns/performance-optimization.md +518 -0
- package/skills/bigquery-patterns/query-patterns.md +372 -0
- package/skills/bigquery-patterns/schema-design.md +78 -0
- package/skills/cloudstream-project-template/SKILL.md +20 -0
- package/skills/cloudstream-project-template/structure.md +65 -0
- package/skills/coding-standards/SKILL.md +524 -0
- package/skills/coding-standards/deluge-standards.md +83 -0
- package/skills/compliance-patterns/SKILL.md +28 -0
- package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
- package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
- package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
- package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
- package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
- package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
- package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
- package/skills/compliance-patterns/soc2/access-controls.md +344 -0
- package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
- package/skills/compliance-patterns/soc2/change-management.md +403 -0
- package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
- package/skills/consultancy-workflows/SKILL.md +19 -0
- package/skills/consultancy-workflows/client-isolation.md +21 -0
- package/skills/consultancy-workflows/documentation-automation.md +454 -0
- package/skills/consultancy-workflows/handoff-procedures.md +257 -0
- package/skills/consultancy-workflows/knowledge-capture.md +513 -0
- package/skills/consultancy-workflows/time-tracking.md +26 -0
- package/skills/continuous-learning/SKILL.md +84 -0
- package/skills/continuous-learning/config.json +18 -0
- package/skills/continuous-learning/evaluate-session.sh +60 -0
- package/skills/continuous-learning-v2/SKILL.md +126 -0
- package/skills/continuous-learning-v2/config.json +61 -0
- package/skills/frontend-patterns/SKILL.md +635 -0
- package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
- package/skills/gcp-data-engineering/SKILL.md +36 -0
- package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
- package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
- package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
- package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
- package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
- package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
- package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
- package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
- package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
- package/skills/security-review/SKILL.md +498 -0
- package/skills/security-review/compliance-checklist.md +53 -0
- package/skills/strategic-compact/SKILL.md +67 -0
- package/skills/tdd-workflow/SKILL.md +413 -0
- package/skills/tdd-workflow/zoho-testing.md +124 -0
- package/skills/tutorial/SKILL.md +249 -0
- package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
- package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
- package/skills/tutorial/lessons/01-basics.md +81 -0
- package/skills/tutorial/lessons/02-training.md +86 -0
- package/skills/tutorial/lessons/03-commands.md +109 -0
- package/skills/tutorial/lessons/04-workflows.md +115 -0
- package/skills/tutorial/lessons/05-compliance.md +116 -0
- package/skills/tutorial/lessons/06-zoho.md +121 -0
- package/skills/tutorial/lessons/07-hooks-system.md +277 -0
- package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
- package/skills/tutorial/lessons/09-client-management.md +215 -0
- package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
- package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
- package/skills/tutorial/lessons/12-rules-system.md +326 -0
- package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
- package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
- package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
- package/skills/tutorial/tracks/accelerated/README.md +134 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
- package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
- package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
- package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
- package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
- package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
- package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
- package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
- package/skills/zoho-patterns/CHANGELOG.md +108 -0
- package/skills/zoho-patterns/SKILL.md +446 -0
- package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
- package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
- package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
- package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
- package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
- package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
- package/skills/zoho-patterns/creator/form-design.md +304 -0
- package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
- package/skills/zoho-patterns/creator/widget-integration.md +306 -0
- package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
- package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
- package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
- package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
- package/skills/zoho-patterns/deluge/error-handling.md +423 -0
- package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
- package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
- package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
- package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
- package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
|
@@ -0,0 +1,517 @@
|
|
|
1
|
+
# Layer Transitions - Bronze to Silver to Gold
|
|
2
|
+
|
|
3
|
+
> Orchestrating data movement between medallion layers using dbt, Dataflow, Cloud Functions, and Cloud Scheduler.
|
|
4
|
+
|
|
5
|
+
## Transition Overview
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
[GCS Landing Zone] → Cloud Function trigger → [Bronze BQ Tables]
|
|
9
|
+
│
|
|
10
|
+
dbt run (staging models)
|
|
11
|
+
│
|
|
12
|
+
[Silver BQ Tables]
|
|
13
|
+
│
|
|
14
|
+
dbt run (mart models)
|
|
15
|
+
│
|
|
16
|
+
[Gold BQ Tables]
|
|
17
|
+
│
|
|
18
|
+
┌──────────────────┼──────────────────┐
|
|
19
|
+
Looker Studio Zoho Analytics Custom Apps
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## dbt Models: Bronze to Silver to Gold
|
|
23
|
+
|
|
24
|
+
### Project Structure
|
|
25
|
+
|
|
26
|
+
```yaml
|
|
27
|
+
# dbt_project.yml
|
|
28
|
+
models:
|
|
29
|
+
cloudstream:
|
|
30
|
+
staging: # Bronze → Silver
|
|
31
|
+
+materialized: incremental
|
|
32
|
+
+schema: silver
|
|
33
|
+
+tags: ['silver']
|
|
34
|
+
intermediate: # Silver internal transforms
|
|
35
|
+
+materialized: ephemeral
|
|
36
|
+
+schema: silver
|
|
37
|
+
marts: # Silver → Gold
|
|
38
|
+
+materialized: table
|
|
39
|
+
+schema: gold
|
|
40
|
+
+tags: ['gold']
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Bronze to Silver (Staging Models)
|
|
44
|
+
|
|
45
|
+
```sql
|
|
46
|
+
-- models/staging/zoho_crm/stg_zoho__deals.sql
|
|
47
|
+
{{
|
|
48
|
+
config(
|
|
49
|
+
materialized='incremental',
|
|
50
|
+
unique_key='record_id',
|
|
51
|
+
partition_by={'field': '_ingestion_date', 'data_type': 'date'},
|
|
52
|
+
cluster_by=['stage', 'owner_id'],
|
|
53
|
+
on_schema_change='append_new_columns'
|
|
54
|
+
)
|
|
55
|
+
}}
|
|
56
|
+
|
|
57
|
+
WITH source AS (
|
|
58
|
+
SELECT *
|
|
59
|
+
FROM {{ source('bronze', 'zoho_deals') }}
|
|
60
|
+
{% if is_incremental() %}
|
|
61
|
+
WHERE _ingestion_timestamp > (
|
|
62
|
+
SELECT COALESCE(MAX(_ingestion_timestamp), TIMESTAMP('2020-01-01'))
|
|
63
|
+
FROM {{ this }}
|
|
64
|
+
)
|
|
65
|
+
{% endif %}
|
|
66
|
+
),
|
|
67
|
+
|
|
68
|
+
deduplicated AS (
|
|
69
|
+
SELECT *, ROW_NUMBER() OVER (
|
|
70
|
+
PARTITION BY record_id ORDER BY modified_time DESC
|
|
71
|
+
) AS _rn
|
|
72
|
+
FROM source
|
|
73
|
+
),
|
|
74
|
+
|
|
75
|
+
cleaned AS (
|
|
76
|
+
SELECT
|
|
77
|
+
record_id,
|
|
78
|
+
NULLIF(TRIM(deal_name), '') AS deal_name,
|
|
79
|
+
SAFE_CAST(amount AS FLOAT64) AS amount,
|
|
80
|
+
COALESCE(stage, 'Unknown') AS stage,
|
|
81
|
+
SAFE.PARSE_DATE('%Y-%m-%d', close_date) AS close_date,
|
|
82
|
+
owner_id,
|
|
83
|
+
account_id,
|
|
84
|
+
SAFE_CAST(probability AS INT64) AS probability,
|
|
85
|
+
_ingestion_date,
|
|
86
|
+
_ingestion_timestamp,
|
|
87
|
+
_source_system,
|
|
88
|
+
_batch_id
|
|
89
|
+
FROM deduplicated
|
|
90
|
+
WHERE _rn = 1
|
|
91
|
+
AND record_id IS NOT NULL
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
SELECT * FROM cleaned
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Silver to Gold (Mart Models)
|
|
98
|
+
|
|
99
|
+
```sql
|
|
100
|
+
-- models/marts/finance/fct_monthly_revenue.sql
|
|
101
|
+
{{
|
|
102
|
+
config(
|
|
103
|
+
materialized='table',
|
|
104
|
+
partition_by={'field': 'revenue_month', 'data_type': 'date', 'granularity': 'month'},
|
|
105
|
+
cluster_by=['customer_id']
|
|
106
|
+
)
|
|
107
|
+
}}
|
|
108
|
+
|
|
109
|
+
WITH invoices AS (
|
|
110
|
+
SELECT * FROM {{ ref('stg_zoho__invoices') }}
|
|
111
|
+
WHERE status = 'paid'
|
|
112
|
+
),
|
|
113
|
+
|
|
114
|
+
customers AS (
|
|
115
|
+
SELECT * FROM {{ ref('stg_zoho__accounts') }}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
SELECT
|
|
119
|
+
DATE_TRUNC(i.paid_date, MONTH) AS revenue_month,
|
|
120
|
+
i.customer_id,
|
|
121
|
+
c.account_name AS customer_name,
|
|
122
|
+
c.industry,
|
|
123
|
+
COUNT(*) AS invoice_count,
|
|
124
|
+
SUM(i.total) AS total_revenue,
|
|
125
|
+
SUM(i.tax_amount) AS total_tax,
|
|
126
|
+
AVG(DATE_DIFF(i.paid_date, i.invoice_date, DAY)) AS avg_days_to_pay
|
|
127
|
+
FROM invoices i
|
|
128
|
+
LEFT JOIN customers c ON i.customer_id = c.record_id
|
|
129
|
+
GROUP BY 1, 2, 3, 4
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Dataflow Jobs for Streaming Transitions
|
|
133
|
+
|
|
134
|
+
### PubSub to Silver (Streaming)
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
# streaming_silver_pipeline.py
|
|
138
|
+
import apache_beam as beam
|
|
139
|
+
from apache_beam.options.pipeline_options import PipelineOptions
|
|
140
|
+
from apache_beam.io.gcp.bigquery import WriteToBigQuery, BigQueryDisposition
|
|
141
|
+
import json
|
|
142
|
+
from datetime import datetime
|
|
143
|
+
|
|
144
|
+
class ParseAndValidate(beam.DoFn):
|
|
145
|
+
"""Bronze→Silver: parse, validate, enrich."""
|
|
146
|
+
|
|
147
|
+
def process(self, element, timestamp=beam.DoFn.TimestampParam):
|
|
148
|
+
try:
|
|
149
|
+
record = json.loads(element.decode('utf-8'))
|
|
150
|
+
|
|
151
|
+
# Validation
|
|
152
|
+
if not record.get('record_id'):
|
|
153
|
+
yield beam.pvalue.TaggedOutput('quarantine', {
|
|
154
|
+
'raw': element.decode('utf-8'),
|
|
155
|
+
'error': 'missing_record_id',
|
|
156
|
+
'timestamp': datetime.utcnow().isoformat()
|
|
157
|
+
})
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
# Type coercion
|
|
161
|
+
cleaned = {
|
|
162
|
+
'record_id': str(record['record_id']),
|
|
163
|
+
'deal_name': (record.get('deal_name') or '').strip() or None,
|
|
164
|
+
'amount': float(record['amount']) if record.get('amount') else 0.0,
|
|
165
|
+
'stage': record.get('stage', 'Unknown'),
|
|
166
|
+
'owner_id': record.get('owner_id'),
|
|
167
|
+
'modified_time': record.get('modified_time'),
|
|
168
|
+
'_ingestion_timestamp': datetime.utcnow().isoformat(),
|
|
169
|
+
'_source_system': 'zoho_crm_webhook'
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
yield beam.pvalue.TaggedOutput('valid', cleaned)
|
|
173
|
+
|
|
174
|
+
except Exception as e:
|
|
175
|
+
yield beam.pvalue.TaggedOutput('quarantine', {
|
|
176
|
+
'raw': element.decode('utf-8'),
|
|
177
|
+
'error': str(e),
|
|
178
|
+
'timestamp': datetime.utcnow().isoformat()
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
def run():
|
|
182
|
+
options = PipelineOptions(
|
|
183
|
+
streaming=True,
|
|
184
|
+
project='cloudstream-prod',
|
|
185
|
+
region='us-central1',
|
|
186
|
+
temp_location='gs://cloudstream-dataflow-temp/tmp',
|
|
187
|
+
autoscaling_algorithm='THROUGHPUT_BASED',
|
|
188
|
+
max_num_workers=5
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
with beam.Pipeline(options=options) as p:
|
|
192
|
+
messages = (
|
|
193
|
+
p
|
|
194
|
+
| 'ReadPubSub' >> beam.io.ReadFromPubSub(
|
|
195
|
+
topic='projects/cloudstream-prod/topics/zoho-webhooks'
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
results = messages | 'ParseValidate' >> beam.ParDo(
|
|
200
|
+
ParseAndValidate()
|
|
201
|
+
).with_outputs('valid', 'quarantine')
|
|
202
|
+
|
|
203
|
+
# Valid records → Silver
|
|
204
|
+
results.valid | 'WriteToSilver' >> WriteToBigQuery(
|
|
205
|
+
table='cloudstream-prod:silver.zoho_deals_streaming',
|
|
206
|
+
write_disposition=BigQueryDisposition.WRITE_APPEND,
|
|
207
|
+
create_disposition=BigQueryDisposition.CREATE_IF_NEEDED
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Bad records → Quarantine
|
|
211
|
+
results.quarantine | 'WriteToQuarantine' >> WriteToBigQuery(
|
|
212
|
+
table='cloudstream-prod:silver._quarantine_streaming',
|
|
213
|
+
write_disposition=BigQueryDisposition.WRITE_APPEND
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if __name__ == '__main__':
|
|
217
|
+
run()
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Cloud Function Triggers on GCS File Arrival
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
# cloud_function/bronze_loader/main.py
|
|
224
|
+
"""Triggered when new file arrives in GCS landing zone.
|
|
225
|
+
Loads raw file into Bronze BigQuery table."""
|
|
226
|
+
|
|
227
|
+
from google.cloud import bigquery, storage
|
|
228
|
+
import functions_framework
|
|
229
|
+
import json
|
|
230
|
+
|
|
231
|
+
PROJECT = 'cloudstream-prod'
|
|
232
|
+
DATASET_BRONZE = 'bronze'
|
|
233
|
+
|
|
234
|
+
# Source-to-table mapping
|
|
235
|
+
SOURCE_MAP = {
|
|
236
|
+
'zoho-crm/deals': 'zoho_deals',
|
|
237
|
+
'zoho-crm/contacts': 'zoho_contacts',
|
|
238
|
+
'zoho-books/invoices': 'zoho_invoices',
|
|
239
|
+
'zoho-books/payments': 'zoho_payments',
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
@functions_framework.cloud_event
|
|
243
|
+
def load_to_bronze(cloud_event):
|
|
244
|
+
"""Triggered by GCS object finalize event."""
|
|
245
|
+
data = cloud_event.data
|
|
246
|
+
bucket_name = data['bucket']
|
|
247
|
+
file_path = data['name']
|
|
248
|
+
|
|
249
|
+
# Determine target table from file path
|
|
250
|
+
# Expected: landing/{source}/{entity}/YYYY/MM/DD/file.json
|
|
251
|
+
parts = file_path.split('/')
|
|
252
|
+
if len(parts) < 4:
|
|
253
|
+
print(f"Unexpected path format: {file_path}")
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
source_key = f"{parts[1]}/{parts[2]}"
|
|
257
|
+
table_name = SOURCE_MAP.get(source_key)
|
|
258
|
+
if not table_name:
|
|
259
|
+
print(f"Unknown source: {source_key}")
|
|
260
|
+
return
|
|
261
|
+
|
|
262
|
+
client = bigquery.Client(project=PROJECT)
|
|
263
|
+
table_ref = f"{PROJECT}.{DATASET_BRONZE}.{table_name}"
|
|
264
|
+
|
|
265
|
+
job_config = bigquery.LoadJobConfig(
|
|
266
|
+
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
|
|
267
|
+
write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
|
|
268
|
+
schema_update_options=[
|
|
269
|
+
bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION
|
|
270
|
+
],
|
|
271
|
+
# Add metadata columns
|
|
272
|
+
time_partitioning=bigquery.TimePartitioning(
|
|
273
|
+
type_=bigquery.TimePartitioningType.DAY,
|
|
274
|
+
field='_ingestion_date'
|
|
275
|
+
),
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
uri = f"gs://{bucket_name}/{file_path}"
|
|
279
|
+
load_job = client.load_table_from_uri(uri, table_ref, job_config=job_config)
|
|
280
|
+
load_job.result() # Wait for completion
|
|
281
|
+
|
|
282
|
+
print(f"Loaded {load_job.output_rows} rows from {uri} to {table_ref}")
|
|
283
|
+
|
|
284
|
+
# Trigger silver refresh if needed
|
|
285
|
+
trigger_silver_refresh(table_name)
|
|
286
|
+
|
|
287
|
+
def trigger_silver_refresh(table_name):
|
|
288
|
+
"""Publish message to trigger dbt staging model run."""
|
|
289
|
+
from google.cloud import pubsub_v1
|
|
290
|
+
publisher = pubsub_v1.PublisherClient()
|
|
291
|
+
topic = f"projects/{PROJECT}/topics/silver-refresh-trigger"
|
|
292
|
+
|
|
293
|
+
message = json.dumps({
|
|
294
|
+
'source_table': table_name,
|
|
295
|
+
'trigger_time': datetime.utcnow().isoformat()
|
|
296
|
+
}).encode('utf-8')
|
|
297
|
+
|
|
298
|
+
publisher.publish(topic, message)
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## Orchestration with Cloud Composer/Scheduler
|
|
302
|
+
|
|
303
|
+
### Cloud Composer DAG (Full Pipeline)
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
# dags/medallion_pipeline.py
|
|
307
|
+
from airflow import DAG
|
|
308
|
+
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator
|
|
309
|
+
from airflow.providers.dbt.cloud.operators.dbt import DbtCloudRunJobOperator
|
|
310
|
+
from airflow.operators.python import PythonOperator
|
|
311
|
+
from airflow.utils.dates import days_ago
|
|
312
|
+
from datetime import timedelta
|
|
313
|
+
|
|
314
|
+
default_args = {
|
|
315
|
+
'owner': 'cloudstream',
|
|
316
|
+
'retries': 2,
|
|
317
|
+
'retry_delay': timedelta(minutes=5),
|
|
318
|
+
'email_on_failure': True,
|
|
319
|
+
'email': ['data-alerts@cloudstreamsoftware.com'],
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
with DAG(
|
|
323
|
+
'medallion_full_refresh',
|
|
324
|
+
default_args=default_args,
|
|
325
|
+
schedule_interval='0 6 * * *', # 6 AM daily
|
|
326
|
+
start_date=days_ago(1),
|
|
327
|
+
catchup=False,
|
|
328
|
+
tags=['medallion', 'production'],
|
|
329
|
+
) as dag:
|
|
330
|
+
|
|
331
|
+
# Step 1: Bronze freshness check
|
|
332
|
+
check_bronze = BigQueryInsertJobOperator(
|
|
333
|
+
task_id='check_bronze_freshness',
|
|
334
|
+
configuration={
|
|
335
|
+
'query': {
|
|
336
|
+
'query': """
|
|
337
|
+
SELECT table_name, MAX(_ingestion_timestamp) AS latest
|
|
338
|
+
FROM `bronze.INFORMATION_SCHEMA.COLUMNS`
|
|
339
|
+
GROUP BY 1
|
|
340
|
+
HAVING MAX(_ingestion_timestamp) < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 25 HOUR)
|
|
341
|
+
""",
|
|
342
|
+
'useLegacySql': False,
|
|
343
|
+
}
|
|
344
|
+
},
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
# Step 2: dbt run staging (Bronze → Silver)
|
|
348
|
+
dbt_silver = DbtCloudRunJobOperator(
|
|
349
|
+
task_id='dbt_run_silver',
|
|
350
|
+
job_id=12345, # dbt Cloud job ID for staging models
|
|
351
|
+
check_interval=30,
|
|
352
|
+
timeout=1800,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# Step 3: dbt test silver
|
|
356
|
+
dbt_test_silver = DbtCloudRunJobOperator(
|
|
357
|
+
task_id='dbt_test_silver',
|
|
358
|
+
job_id=12346, # dbt Cloud job ID for staging tests
|
|
359
|
+
check_interval=30,
|
|
360
|
+
timeout=600,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Step 4: dbt run marts (Silver → Gold)
|
|
364
|
+
dbt_gold = DbtCloudRunJobOperator(
|
|
365
|
+
task_id='dbt_run_gold',
|
|
366
|
+
job_id=12347,
|
|
367
|
+
check_interval=30,
|
|
368
|
+
timeout=1800,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Step 5: dbt test gold
|
|
372
|
+
dbt_test_gold = DbtCloudRunJobOperator(
|
|
373
|
+
task_id='dbt_test_gold',
|
|
374
|
+
job_id=12348,
|
|
375
|
+
check_interval=30,
|
|
376
|
+
timeout=600,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
check_bronze >> dbt_silver >> dbt_test_silver >> dbt_gold >> dbt_test_gold
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
### Cloud Scheduler (Lightweight Alternative)
|
|
383
|
+
|
|
384
|
+
```yaml
|
|
385
|
+
# terraform/scheduler.tf - For simpler orchestration without Composer
|
|
386
|
+
resource "google_cloud_scheduler_job" "dbt_silver_refresh" {
|
|
387
|
+
name = "dbt-silver-refresh"
|
|
388
|
+
schedule = "0 */4 * * *" # Every 4 hours
|
|
389
|
+
time_zone = "America/Chicago"
|
|
390
|
+
|
|
391
|
+
http_target {
|
|
392
|
+
uri = "https://cloud.getdbt.com/api/v2/accounts/12345/jobs/67890/run/"
|
|
393
|
+
http_method = "POST"
|
|
394
|
+
headers = { "Authorization" = "Token ${var.dbt_cloud_token}" }
|
|
395
|
+
body = base64encode(jsonencode({ "cause" = "Scheduled by Cloud Scheduler" }))
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
## Dependency Management
|
|
401
|
+
|
|
402
|
+
```yaml
|
|
403
|
+
# models/staging/_staging__sources.yml
|
|
404
|
+
sources:
|
|
405
|
+
- name: bronze
|
|
406
|
+
database: cloudstream-prod
|
|
407
|
+
schema: bronze
|
|
408
|
+
freshness:
|
|
409
|
+
warn_after: {count: 12, period: hour}
|
|
410
|
+
error_after: {count: 24, period: hour}
|
|
411
|
+
loaded_at_field: _ingestion_timestamp
|
|
412
|
+
tables:
|
|
413
|
+
- name: zoho_deals
|
|
414
|
+
- name: zoho_contacts
|
|
415
|
+
- name: zoho_invoices
|
|
416
|
+
- name: zoho_payments
|
|
417
|
+
|
|
418
|
+
# models/marts/_marts__models.yml - explicit dependencies
|
|
419
|
+
models:
|
|
420
|
+
- name: fct_revenue
|
|
421
|
+
config:
|
|
422
|
+
depends_on:
|
|
423
|
+
- ref('stg_zoho__invoices')
|
|
424
|
+
- ref('stg_zoho__accounts')
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
## Failure Handling and Replay
|
|
428
|
+
|
|
429
|
+
```python
|
|
430
|
+
# Replay failed loads from GCS archive
|
|
431
|
+
def replay_failed_batch(batch_id: str, source_table: str):
|
|
432
|
+
"""Re-process a specific failed batch from GCS landing zone."""
|
|
433
|
+
from google.cloud import bigquery, storage
|
|
434
|
+
|
|
435
|
+
client = bigquery.Client()
|
|
436
|
+
storage_client = storage.Client()
|
|
437
|
+
|
|
438
|
+
# Find files for the failed batch
|
|
439
|
+
bucket = storage_client.bucket('cloudstream-landing')
|
|
440
|
+
blobs = bucket.list_blobs(prefix=f"archive/{source_table}/{batch_id}/")
|
|
441
|
+
|
|
442
|
+
for blob in blobs:
|
|
443
|
+
uri = f"gs://cloudstream-landing/{blob.name}"
|
|
444
|
+
job_config = bigquery.LoadJobConfig(
|
|
445
|
+
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
|
|
446
|
+
write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
job = client.load_table_from_uri(
|
|
450
|
+
uri, f"cloudstream-prod.bronze.{source_table}", job_config=job_config
|
|
451
|
+
)
|
|
452
|
+
job.result()
|
|
453
|
+
print(f"Replayed: {uri} → {job.output_rows} rows")
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
## Monitoring and Alerts
|
|
457
|
+
|
|
458
|
+
```yaml
|
|
459
|
+
# Cloud Monitoring alert policies
|
|
460
|
+
# Alert if silver layer is stale (no updates in 6+ hours)
|
|
461
|
+
alertPolicies:
|
|
462
|
+
- displayName: "Silver Layer Staleness"
|
|
463
|
+
conditions:
|
|
464
|
+
- conditionThreshold:
|
|
465
|
+
filter: 'resource.type="bigquery_table" AND metric.type="bigquery.googleapis.com/storage/last_modified_time"'
|
|
466
|
+
comparison: COMPARISON_GT
|
|
467
|
+
thresholdValue: 21600 # 6 hours in seconds
|
|
468
|
+
notificationChannels:
|
|
469
|
+
- "projects/cloudstream-prod/notificationChannels/slack-data-alerts"
|
|
470
|
+
|
|
471
|
+
- displayName: "High Quarantine Rate"
|
|
472
|
+
conditions:
|
|
473
|
+
- conditionThreshold:
|
|
474
|
+
filter: 'metric.type="custom.googleapis.com/silver/quarantine_rate"'
|
|
475
|
+
comparison: COMPARISON_GT
|
|
476
|
+
thresholdValue: 0.05 # >5% quarantine rate
|
|
477
|
+
```
|
|
478
|
+
|
|
479
|
+
## SLA Tracking
|
|
480
|
+
|
|
481
|
+
```sql
|
|
482
|
+
-- Track layer transition SLAs
|
|
483
|
+
CREATE TABLE `project.ops._sla_tracking` (
|
|
484
|
+
pipeline_name STRING,
|
|
485
|
+
layer_transition STRING, -- 'bronze_to_silver', 'silver_to_gold'
|
|
486
|
+
started_at TIMESTAMP,
|
|
487
|
+
completed_at TIMESTAMP,
|
|
488
|
+
duration_seconds INT64,
|
|
489
|
+
sla_seconds INT64, -- Target SLA
|
|
490
|
+
sla_met BOOL,
|
|
491
|
+
record_count INT64
|
|
492
|
+
);
|
|
493
|
+
|
|
494
|
+
-- SLA dashboard query
|
|
495
|
+
SELECT
|
|
496
|
+
layer_transition,
|
|
497
|
+
DATE(started_at) AS run_date,
|
|
498
|
+
AVG(duration_seconds) AS avg_duration,
|
|
499
|
+
MAX(duration_seconds) AS max_duration,
|
|
500
|
+
COUNTIF(sla_met = FALSE) AS sla_violations,
|
|
501
|
+
COUNT(*) AS total_runs
|
|
502
|
+
FROM `project.ops._sla_tracking`
|
|
503
|
+
WHERE started_at >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
|
|
504
|
+
GROUP BY 1, 2
|
|
505
|
+
ORDER BY 2 DESC;
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
## Best Practices
|
|
509
|
+
|
|
510
|
+
1. **Never skip silver** - Even for "clean" sources, always apply deduplication and typing
|
|
511
|
+
2. **Use dbt `--select` by tag** - Run `tag:silver` and `tag:gold` independently
|
|
512
|
+
3. **Test between layers** - Run `dbt test` after silver before triggering gold
|
|
513
|
+
4. **Archive raw files** - Move GCS files to archive/ after successful bronze load
|
|
514
|
+
5. **Idempotent loads** - Use `unique_key` in incremental models for safe re-runs
|
|
515
|
+
6. **Monitor freshness** - dbt source freshness checks catch upstream delays early
|
|
516
|
+
|
|
517
|
+
> **WARNING**: Never run gold models if silver tests fail. Use `dbt build --select tag:silver` (runs + tests) before proceeding to gold.
|