@cloudstreamsoftware/claude-tools 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/README.md +152 -37
  2. package/agents/INDEX.md +183 -0
  3. package/agents/architect.md +247 -0
  4. package/agents/build-error-resolver.md +555 -0
  5. package/agents/catalyst-deployer.md +132 -0
  6. package/agents/code-reviewer.md +121 -0
  7. package/agents/compliance-auditor.md +148 -0
  8. package/agents/creator-architect.md +395 -0
  9. package/agents/deluge-reviewer.md +98 -0
  10. package/agents/doc-updater.md +471 -0
  11. package/agents/e2e-runner.md +711 -0
  12. package/agents/planner.md +122 -0
  13. package/agents/refactor-cleaner.md +309 -0
  14. package/agents/security-reviewer.md +582 -0
  15. package/agents/tdd-guide.md +302 -0
  16. package/bin/cloudstream-setup.js +16 -6
  17. package/config/versions.json +63 -0
  18. package/dist/hooks/hooks.json +209 -0
  19. package/dist/index.js +47 -0
  20. package/dist/lib/asset-value.js +609 -0
  21. package/dist/lib/client-manager.js +300 -0
  22. package/dist/lib/command-matcher.js +242 -0
  23. package/dist/lib/cross-session-patterns.js +754 -0
  24. package/dist/lib/intent-classifier.js +1075 -0
  25. package/dist/lib/package-manager.js +374 -0
  26. package/dist/lib/recommendation-engine.js +597 -0
  27. package/dist/lib/session-memory.js +489 -0
  28. package/dist/lib/skill-effectiveness.js +486 -0
  29. package/dist/lib/skill-matcher.js +595 -0
  30. package/dist/lib/tutorial-metrics.js +242 -0
  31. package/dist/lib/tutorial-progress.js +209 -0
  32. package/dist/lib/tutorial-renderer.js +431 -0
  33. package/dist/lib/utils.js +380 -0
  34. package/dist/lib/verify-formatter.js +143 -0
  35. package/dist/lib/workflow-state.js +249 -0
  36. package/hooks/hooks.json +209 -0
  37. package/package.json +5 -1
  38. package/scripts/aggregate-sessions.js +290 -0
  39. package/scripts/branch-name-validator.js +291 -0
  40. package/scripts/build.js +101 -0
  41. package/scripts/commands/client-switch.js +231 -0
  42. package/scripts/deprecate-skill.js +610 -0
  43. package/scripts/diagnose.js +324 -0
  44. package/scripts/doc-freshness.js +168 -0
  45. package/scripts/generate-weekly-digest.js +393 -0
  46. package/scripts/health-check.js +270 -0
  47. package/scripts/hooks/credential-check.js +101 -0
  48. package/scripts/hooks/evaluate-session.js +81 -0
  49. package/scripts/hooks/pre-compact.js +66 -0
  50. package/scripts/hooks/prompt-analyzer.js +276 -0
  51. package/scripts/hooks/prompt-router.js +422 -0
  52. package/scripts/hooks/quality-gate-enforcer.js +371 -0
  53. package/scripts/hooks/session-end.js +156 -0
  54. package/scripts/hooks/session-start.js +195 -0
  55. package/scripts/hooks/skill-injector.js +333 -0
  56. package/scripts/hooks/suggest-compact.js +58 -0
  57. package/scripts/lib/asset-value.js +609 -0
  58. package/scripts/lib/client-manager.js +300 -0
  59. package/scripts/lib/command-matcher.js +242 -0
  60. package/scripts/lib/cross-session-patterns.js +754 -0
  61. package/scripts/lib/intent-classifier.js +1075 -0
  62. package/scripts/lib/package-manager.js +374 -0
  63. package/scripts/lib/recommendation-engine.js +597 -0
  64. package/scripts/lib/session-memory.js +489 -0
  65. package/scripts/lib/skill-effectiveness.js +486 -0
  66. package/scripts/lib/skill-matcher.js +595 -0
  67. package/scripts/lib/tutorial-metrics.js +242 -0
  68. package/scripts/lib/tutorial-progress.js +209 -0
  69. package/scripts/lib/tutorial-renderer.js +431 -0
  70. package/scripts/lib/utils.js +380 -0
  71. package/scripts/lib/verify-formatter.js +143 -0
  72. package/scripts/lib/workflow-state.js +249 -0
  73. package/scripts/onboard.js +363 -0
  74. package/scripts/quarterly-report.js +692 -0
  75. package/scripts/setup-package-manager.js +204 -0
  76. package/scripts/sync-upstream.js +391 -0
  77. package/scripts/test.js +108 -0
  78. package/scripts/tutorial-runner.js +351 -0
  79. package/scripts/validate-all.js +201 -0
  80. package/scripts/verifiers/agents.js +245 -0
  81. package/scripts/verifiers/config.js +186 -0
  82. package/scripts/verifiers/environment.js +123 -0
  83. package/scripts/verifiers/hooks.js +188 -0
  84. package/scripts/verifiers/index.js +38 -0
  85. package/scripts/verifiers/persistence.js +140 -0
  86. package/scripts/verifiers/plugin.js +215 -0
  87. package/scripts/verifiers/skills.js +209 -0
  88. package/scripts/verify-setup.js +164 -0
  89. package/skills/INDEX.md +157 -0
  90. package/skills/backend-patterns/SKILL.md +586 -0
  91. package/skills/backend-patterns/catalyst-patterns.md +128 -0
  92. package/skills/bigquery-patterns/SKILL.md +27 -0
  93. package/skills/bigquery-patterns/performance-optimization.md +518 -0
  94. package/skills/bigquery-patterns/query-patterns.md +372 -0
  95. package/skills/bigquery-patterns/schema-design.md +78 -0
  96. package/skills/cloudstream-project-template/SKILL.md +20 -0
  97. package/skills/cloudstream-project-template/structure.md +65 -0
  98. package/skills/coding-standards/SKILL.md +524 -0
  99. package/skills/coding-standards/deluge-standards.md +83 -0
  100. package/skills/compliance-patterns/SKILL.md +28 -0
  101. package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
  102. package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
  103. package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
  104. package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
  105. package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
  106. package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
  107. package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
  108. package/skills/compliance-patterns/soc2/access-controls.md +344 -0
  109. package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
  110. package/skills/compliance-patterns/soc2/change-management.md +403 -0
  111. package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
  112. package/skills/consultancy-workflows/SKILL.md +19 -0
  113. package/skills/consultancy-workflows/client-isolation.md +21 -0
  114. package/skills/consultancy-workflows/documentation-automation.md +454 -0
  115. package/skills/consultancy-workflows/handoff-procedures.md +257 -0
  116. package/skills/consultancy-workflows/knowledge-capture.md +513 -0
  117. package/skills/consultancy-workflows/time-tracking.md +26 -0
  118. package/skills/continuous-learning/SKILL.md +84 -0
  119. package/skills/continuous-learning/config.json +18 -0
  120. package/skills/continuous-learning/evaluate-session.sh +60 -0
  121. package/skills/continuous-learning-v2/SKILL.md +126 -0
  122. package/skills/continuous-learning-v2/config.json +61 -0
  123. package/skills/frontend-patterns/SKILL.md +635 -0
  124. package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
  125. package/skills/gcp-data-engineering/SKILL.md +36 -0
  126. package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
  127. package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
  128. package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
  129. package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
  130. package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
  131. package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
  132. package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
  133. package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
  134. package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
  135. package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
  136. package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
  137. package/skills/security-review/SKILL.md +498 -0
  138. package/skills/security-review/compliance-checklist.md +53 -0
  139. package/skills/strategic-compact/SKILL.md +67 -0
  140. package/skills/tdd-workflow/SKILL.md +413 -0
  141. package/skills/tdd-workflow/zoho-testing.md +124 -0
  142. package/skills/tutorial/SKILL.md +249 -0
  143. package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
  144. package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
  145. package/skills/tutorial/lessons/01-basics.md +81 -0
  146. package/skills/tutorial/lessons/02-training.md +86 -0
  147. package/skills/tutorial/lessons/03-commands.md +109 -0
  148. package/skills/tutorial/lessons/04-workflows.md +115 -0
  149. package/skills/tutorial/lessons/05-compliance.md +116 -0
  150. package/skills/tutorial/lessons/06-zoho.md +121 -0
  151. package/skills/tutorial/lessons/07-hooks-system.md +277 -0
  152. package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
  153. package/skills/tutorial/lessons/09-client-management.md +215 -0
  154. package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
  155. package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
  156. package/skills/tutorial/lessons/12-rules-system.md +326 -0
  157. package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
  158. package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
  159. package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
  160. package/skills/tutorial/tracks/accelerated/README.md +134 -0
  161. package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
  162. package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
  163. package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
  164. package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
  165. package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
  166. package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
  167. package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
  168. package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
  169. package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
  170. package/skills/zoho-patterns/CHANGELOG.md +108 -0
  171. package/skills/zoho-patterns/SKILL.md +446 -0
  172. package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
  173. package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
  174. package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
  175. package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
  176. package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
  177. package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
  178. package/skills/zoho-patterns/creator/form-design.md +304 -0
  179. package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
  180. package/skills/zoho-patterns/creator/widget-integration.md +306 -0
  181. package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
  182. package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
  183. package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
  184. package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
  185. package/skills/zoho-patterns/deluge/error-handling.md +423 -0
  186. package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
  187. package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
  188. package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
  189. package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
  190. package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
@@ -0,0 +1,517 @@
1
+ # Layer Transitions - Bronze to Silver to Gold
2
+
3
+ > Orchestrating data movement between medallion layers using dbt, Dataflow, Cloud Functions, and Cloud Scheduler.
4
+
5
+ ## Transition Overview
6
+
7
+ ```
8
+ [GCS Landing Zone] → Cloud Function trigger → [Bronze BQ Tables]
9
+
10
+ dbt run (staging models)
11
+
12
+ [Silver BQ Tables]
13
+
14
+ dbt run (mart models)
15
+
16
+ [Gold BQ Tables]
17
+
18
+ ┌──────────────────┼──────────────────┐
19
+ Looker Studio Zoho Analytics Custom Apps
20
+ ```
21
+
22
+ ## dbt Models: Bronze to Silver to Gold
23
+
24
+ ### Project Structure
25
+
26
+ ```yaml
27
+ # dbt_project.yml
28
+ models:
29
+ cloudstream:
30
+ staging: # Bronze → Silver
31
+ +materialized: incremental
32
+ +schema: silver
33
+ +tags: ['silver']
34
+ intermediate: # Silver internal transforms
35
+ +materialized: ephemeral
36
+ +schema: silver
37
+ marts: # Silver → Gold
38
+ +materialized: table
39
+ +schema: gold
40
+ +tags: ['gold']
41
+ ```
42
+
43
+ ### Bronze to Silver (Staging Models)
44
+
45
+ ```sql
46
+ -- models/staging/zoho_crm/stg_zoho__deals.sql
47
+ {{
48
+ config(
49
+ materialized='incremental',
50
+ unique_key='record_id',
51
+ partition_by={'field': '_ingestion_date', 'data_type': 'date'},
52
+ cluster_by=['stage', 'owner_id'],
53
+ on_schema_change='append_new_columns'
54
+ )
55
+ }}
56
+
57
+ WITH source AS (
58
+ SELECT *
59
+ FROM {{ source('bronze', 'zoho_deals') }}
60
+ {% if is_incremental() %}
61
+ WHERE _ingestion_timestamp > (
62
+ SELECT COALESCE(MAX(_ingestion_timestamp), TIMESTAMP('2020-01-01'))
63
+ FROM {{ this }}
64
+ )
65
+ {% endif %}
66
+ ),
67
+
68
+ deduplicated AS (
69
+ SELECT *, ROW_NUMBER() OVER (
70
+ PARTITION BY record_id ORDER BY modified_time DESC
71
+ ) AS _rn
72
+ FROM source
73
+ ),
74
+
75
+ cleaned AS (
76
+ SELECT
77
+ record_id,
78
+ NULLIF(TRIM(deal_name), '') AS deal_name,
79
+ SAFE_CAST(amount AS FLOAT64) AS amount,
80
+ COALESCE(stage, 'Unknown') AS stage,
81
+ SAFE.PARSE_DATE('%Y-%m-%d', close_date) AS close_date,
82
+ owner_id,
83
+ account_id,
84
+ SAFE_CAST(probability AS INT64) AS probability,
85
+ _ingestion_date,
86
+ _ingestion_timestamp,
87
+ _source_system,
88
+ _batch_id
89
+ FROM deduplicated
90
+ WHERE _rn = 1
91
+ AND record_id IS NOT NULL
92
+ )
93
+
94
+ SELECT * FROM cleaned
95
+ ```
96
+
97
+ ### Silver to Gold (Mart Models)
98
+
99
+ ```sql
100
+ -- models/marts/finance/fct_monthly_revenue.sql
101
+ {{
102
+ config(
103
+ materialized='table',
104
+ partition_by={'field': 'revenue_month', 'data_type': 'date', 'granularity': 'month'},
105
+ cluster_by=['customer_id']
106
+ )
107
+ }}
108
+
109
+ WITH invoices AS (
110
+ SELECT * FROM {{ ref('stg_zoho__invoices') }}
111
+ WHERE status = 'paid'
112
+ ),
113
+
114
+ customers AS (
115
+ SELECT * FROM {{ ref('stg_zoho__accounts') }}
116
+ )
117
+
118
+ SELECT
119
+ DATE_TRUNC(i.paid_date, MONTH) AS revenue_month,
120
+ i.customer_id,
121
+ c.account_name AS customer_name,
122
+ c.industry,
123
+ COUNT(*) AS invoice_count,
124
+ SUM(i.total) AS total_revenue,
125
+ SUM(i.tax_amount) AS total_tax,
126
+ AVG(DATE_DIFF(i.paid_date, i.invoice_date, DAY)) AS avg_days_to_pay
127
+ FROM invoices i
128
+ LEFT JOIN customers c ON i.customer_id = c.record_id
129
+ GROUP BY 1, 2, 3, 4
130
+ ```
131
+
132
+ ## Dataflow Jobs for Streaming Transitions
133
+
134
+ ### PubSub to Silver (Streaming)
135
+
136
+ ```python
137
+ # streaming_silver_pipeline.py
138
+ import apache_beam as beam
139
+ from apache_beam.options.pipeline_options import PipelineOptions
140
+ from apache_beam.io.gcp.bigquery import WriteToBigQuery, BigQueryDisposition
141
+ import json
142
+ from datetime import datetime
143
+
144
+ class ParseAndValidate(beam.DoFn):
145
+ """Bronze→Silver: parse, validate, enrich."""
146
+
147
+ def process(self, element, timestamp=beam.DoFn.TimestampParam):
148
+ try:
149
+ record = json.loads(element.decode('utf-8'))
150
+
151
+ # Validation
152
+ if not record.get('record_id'):
153
+ yield beam.pvalue.TaggedOutput('quarantine', {
154
+ 'raw': element.decode('utf-8'),
155
+ 'error': 'missing_record_id',
156
+ 'timestamp': datetime.utcnow().isoformat()
157
+ })
158
+ return
159
+
160
+ # Type coercion
161
+ cleaned = {
162
+ 'record_id': str(record['record_id']),
163
+ 'deal_name': (record.get('deal_name') or '').strip() or None,
164
+ 'amount': float(record['amount']) if record.get('amount') else 0.0,
165
+ 'stage': record.get('stage', 'Unknown'),
166
+ 'owner_id': record.get('owner_id'),
167
+ 'modified_time': record.get('modified_time'),
168
+ '_ingestion_timestamp': datetime.utcnow().isoformat(),
169
+ '_source_system': 'zoho_crm_webhook'
170
+ }
171
+
172
+ yield beam.pvalue.TaggedOutput('valid', cleaned)
173
+
174
+ except Exception as e:
175
+ yield beam.pvalue.TaggedOutput('quarantine', {
176
+ 'raw': element.decode('utf-8'),
177
+ 'error': str(e),
178
+ 'timestamp': datetime.utcnow().isoformat()
179
+ })
180
+
181
+ def run():
182
+ options = PipelineOptions(
183
+ streaming=True,
184
+ project='cloudstream-prod',
185
+ region='us-central1',
186
+ temp_location='gs://cloudstream-dataflow-temp/tmp',
187
+ autoscaling_algorithm='THROUGHPUT_BASED',
188
+ max_num_workers=5
189
+ )
190
+
191
+ with beam.Pipeline(options=options) as p:
192
+ messages = (
193
+ p
194
+ | 'ReadPubSub' >> beam.io.ReadFromPubSub(
195
+ topic='projects/cloudstream-prod/topics/zoho-webhooks'
196
+ )
197
+ )
198
+
199
+ results = messages | 'ParseValidate' >> beam.ParDo(
200
+ ParseAndValidate()
201
+ ).with_outputs('valid', 'quarantine')
202
+
203
+ # Valid records → Silver
204
+ results.valid | 'WriteToSilver' >> WriteToBigQuery(
205
+ table='cloudstream-prod:silver.zoho_deals_streaming',
206
+ write_disposition=BigQueryDisposition.WRITE_APPEND,
207
+ create_disposition=BigQueryDisposition.CREATE_IF_NEEDED
208
+ )
209
+
210
+ # Bad records → Quarantine
211
+ results.quarantine | 'WriteToQuarantine' >> WriteToBigQuery(
212
+ table='cloudstream-prod:silver._quarantine_streaming',
213
+ write_disposition=BigQueryDisposition.WRITE_APPEND
214
+ )
215
+
216
+ if __name__ == '__main__':
217
+ run()
218
+ ```
219
+
220
+ ## Cloud Function Triggers on GCS File Arrival
221
+
222
+ ```python
223
+ # cloud_function/bronze_loader/main.py
224
+ """Triggered when new file arrives in GCS landing zone.
225
+ Loads raw file into Bronze BigQuery table."""
226
+
227
+ from google.cloud import bigquery, storage
228
+ import functions_framework
229
+ import json
230
+
231
+ PROJECT = 'cloudstream-prod'
232
+ DATASET_BRONZE = 'bronze'
233
+
234
+ # Source-to-table mapping
235
+ SOURCE_MAP = {
236
+ 'zoho-crm/deals': 'zoho_deals',
237
+ 'zoho-crm/contacts': 'zoho_contacts',
238
+ 'zoho-books/invoices': 'zoho_invoices',
239
+ 'zoho-books/payments': 'zoho_payments',
240
+ }
241
+
242
+ @functions_framework.cloud_event
243
+ def load_to_bronze(cloud_event):
244
+ """Triggered by GCS object finalize event."""
245
+ data = cloud_event.data
246
+ bucket_name = data['bucket']
247
+ file_path = data['name']
248
+
249
+ # Determine target table from file path
250
+ # Expected: landing/{source}/{entity}/YYYY/MM/DD/file.json
251
+ parts = file_path.split('/')
252
+ if len(parts) < 4:
253
+ print(f"Unexpected path format: {file_path}")
254
+ return
255
+
256
+ source_key = f"{parts[1]}/{parts[2]}"
257
+ table_name = SOURCE_MAP.get(source_key)
258
+ if not table_name:
259
+ print(f"Unknown source: {source_key}")
260
+ return
261
+
262
+ client = bigquery.Client(project=PROJECT)
263
+ table_ref = f"{PROJECT}.{DATASET_BRONZE}.{table_name}"
264
+
265
+ job_config = bigquery.LoadJobConfig(
266
+ source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
267
+ write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
268
+ schema_update_options=[
269
+ bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION
270
+ ],
271
+ # Add metadata columns
272
+ time_partitioning=bigquery.TimePartitioning(
273
+ type_=bigquery.TimePartitioningType.DAY,
274
+ field='_ingestion_date'
275
+ ),
276
+ )
277
+
278
+ uri = f"gs://{bucket_name}/{file_path}"
279
+ load_job = client.load_table_from_uri(uri, table_ref, job_config=job_config)
280
+ load_job.result() # Wait for completion
281
+
282
+ print(f"Loaded {load_job.output_rows} rows from {uri} to {table_ref}")
283
+
284
+ # Trigger silver refresh if needed
285
+ trigger_silver_refresh(table_name)
286
+
287
+ def trigger_silver_refresh(table_name):
288
+ """Publish message to trigger dbt staging model run."""
289
+ from google.cloud import pubsub_v1
290
+ publisher = pubsub_v1.PublisherClient()
291
+ topic = f"projects/{PROJECT}/topics/silver-refresh-trigger"
292
+
293
+ message = json.dumps({
294
+ 'source_table': table_name,
295
+ 'trigger_time': datetime.utcnow().isoformat()
296
+ }).encode('utf-8')
297
+
298
+ publisher.publish(topic, message)
299
+ ```
300
+
301
+ ## Orchestration with Cloud Composer/Scheduler
302
+
303
+ ### Cloud Composer DAG (Full Pipeline)
304
+
305
+ ```python
306
+ # dags/medallion_pipeline.py
307
+ from airflow import DAG
308
+ from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator
309
+ from airflow.providers.dbt.cloud.operators.dbt import DbtCloudRunJobOperator
310
+ from airflow.operators.python import PythonOperator
311
+ from airflow.utils.dates import days_ago
312
+ from datetime import timedelta
313
+
314
+ default_args = {
315
+ 'owner': 'cloudstream',
316
+ 'retries': 2,
317
+ 'retry_delay': timedelta(minutes=5),
318
+ 'email_on_failure': True,
319
+ 'email': ['data-alerts@cloudstreamsoftware.com'],
320
+ }
321
+
322
+ with DAG(
323
+ 'medallion_full_refresh',
324
+ default_args=default_args,
325
+ schedule_interval='0 6 * * *', # 6 AM daily
326
+ start_date=days_ago(1),
327
+ catchup=False,
328
+ tags=['medallion', 'production'],
329
+ ) as dag:
330
+
331
+ # Step 1: Bronze freshness check
332
+ check_bronze = BigQueryInsertJobOperator(
333
+ task_id='check_bronze_freshness',
334
+ configuration={
335
+ 'query': {
336
+ 'query': """
337
+ SELECT table_name, MAX(_ingestion_timestamp) AS latest
338
+ FROM `bronze.INFORMATION_SCHEMA.COLUMNS`
339
+ GROUP BY 1
340
+ HAVING MAX(_ingestion_timestamp) < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 25 HOUR)
341
+ """,
342
+ 'useLegacySql': False,
343
+ }
344
+ },
345
+ )
346
+
347
+ # Step 2: dbt run staging (Bronze → Silver)
348
+ dbt_silver = DbtCloudRunJobOperator(
349
+ task_id='dbt_run_silver',
350
+ job_id=12345, # dbt Cloud job ID for staging models
351
+ check_interval=30,
352
+ timeout=1800,
353
+ )
354
+
355
+ # Step 3: dbt test silver
356
+ dbt_test_silver = DbtCloudRunJobOperator(
357
+ task_id='dbt_test_silver',
358
+ job_id=12346, # dbt Cloud job ID for staging tests
359
+ check_interval=30,
360
+ timeout=600,
361
+ )
362
+
363
+ # Step 4: dbt run marts (Silver → Gold)
364
+ dbt_gold = DbtCloudRunJobOperator(
365
+ task_id='dbt_run_gold',
366
+ job_id=12347,
367
+ check_interval=30,
368
+ timeout=1800,
369
+ )
370
+
371
+ # Step 5: dbt test gold
372
+ dbt_test_gold = DbtCloudRunJobOperator(
373
+ task_id='dbt_test_gold',
374
+ job_id=12348,
375
+ check_interval=30,
376
+ timeout=600,
377
+ )
378
+
379
+ check_bronze >> dbt_silver >> dbt_test_silver >> dbt_gold >> dbt_test_gold
380
+ ```
381
+
382
+ ### Cloud Scheduler (Lightweight Alternative)
383
+
384
+ ```yaml
385
+ # terraform/scheduler.tf - For simpler orchestration without Composer
386
+ resource "google_cloud_scheduler_job" "dbt_silver_refresh" {
387
+ name = "dbt-silver-refresh"
388
+ schedule = "0 */4 * * *" # Every 4 hours
389
+ time_zone = "America/Chicago"
390
+
391
+ http_target {
392
+ uri = "https://cloud.getdbt.com/api/v2/accounts/12345/jobs/67890/run/"
393
+ http_method = "POST"
394
+ headers = { "Authorization" = "Token ${var.dbt_cloud_token}" }
395
+ body = base64encode(jsonencode({ "cause" = "Scheduled by Cloud Scheduler" }))
396
+ }
397
+ }
398
+ ```
399
+
400
+ ## Dependency Management
401
+
402
+ ```yaml
403
+ # models/staging/_staging__sources.yml
404
+ sources:
405
+ - name: bronze
406
+ database: cloudstream-prod
407
+ schema: bronze
408
+ freshness:
409
+ warn_after: {count: 12, period: hour}
410
+ error_after: {count: 24, period: hour}
411
+ loaded_at_field: _ingestion_timestamp
412
+ tables:
413
+ - name: zoho_deals
414
+ - name: zoho_contacts
415
+ - name: zoho_invoices
416
+ - name: zoho_payments
417
+
418
+ # models/marts/_marts__models.yml - explicit dependencies
419
+ models:
420
+ - name: fct_revenue
421
+ config:
422
+ depends_on:
423
+ - ref('stg_zoho__invoices')
424
+ - ref('stg_zoho__accounts')
425
+ ```
426
+
427
+ ## Failure Handling and Replay
428
+
429
+ ```python
430
+ # Replay failed loads from GCS archive
431
+ def replay_failed_batch(batch_id: str, source_table: str):
432
+ """Re-process a specific failed batch from GCS landing zone."""
433
+ from google.cloud import bigquery, storage
434
+
435
+ client = bigquery.Client()
436
+ storage_client = storage.Client()
437
+
438
+ # Find files for the failed batch
439
+ bucket = storage_client.bucket('cloudstream-landing')
440
+ blobs = bucket.list_blobs(prefix=f"archive/{source_table}/{batch_id}/")
441
+
442
+ for blob in blobs:
443
+ uri = f"gs://cloudstream-landing/{blob.name}"
444
+ job_config = bigquery.LoadJobConfig(
445
+ source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
446
+ write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
447
+ )
448
+
449
+ job = client.load_table_from_uri(
450
+ uri, f"cloudstream-prod.bronze.{source_table}", job_config=job_config
451
+ )
452
+ job.result()
453
+ print(f"Replayed: {uri} → {job.output_rows} rows")
454
+ ```
455
+
456
+ ## Monitoring and Alerts
457
+
458
+ ```yaml
459
+ # Cloud Monitoring alert policies
460
+ # Alert if silver layer is stale (no updates in 6+ hours)
461
+ alertPolicies:
462
+ - displayName: "Silver Layer Staleness"
463
+ conditions:
464
+ - conditionThreshold:
465
+ filter: 'resource.type="bigquery_table" AND metric.type="bigquery.googleapis.com/storage/last_modified_time"'
466
+ comparison: COMPARISON_GT
467
+ thresholdValue: 21600 # 6 hours in seconds
468
+ notificationChannels:
469
+ - "projects/cloudstream-prod/notificationChannels/slack-data-alerts"
470
+
471
+ - displayName: "High Quarantine Rate"
472
+ conditions:
473
+ - conditionThreshold:
474
+ filter: 'metric.type="custom.googleapis.com/silver/quarantine_rate"'
475
+ comparison: COMPARISON_GT
476
+ thresholdValue: 0.05 # >5% quarantine rate
477
+ ```
478
+
479
+ ## SLA Tracking
480
+
481
+ ```sql
482
+ -- Track layer transition SLAs
483
+ CREATE TABLE `project.ops._sla_tracking` (
484
+ pipeline_name STRING,
485
+ layer_transition STRING, -- 'bronze_to_silver', 'silver_to_gold'
486
+ started_at TIMESTAMP,
487
+ completed_at TIMESTAMP,
488
+ duration_seconds INT64,
489
+ sla_seconds INT64, -- Target SLA
490
+ sla_met BOOL,
491
+ record_count INT64
492
+ );
493
+
494
+ -- SLA dashboard query
495
+ SELECT
496
+ layer_transition,
497
+ DATE(started_at) AS run_date,
498
+ AVG(duration_seconds) AS avg_duration,
499
+ MAX(duration_seconds) AS max_duration,
500
+ COUNTIF(sla_met = FALSE) AS sla_violations,
501
+ COUNT(*) AS total_runs
502
+ FROM `project.ops._sla_tracking`
503
+ WHERE started_at >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
504
+ GROUP BY 1, 2
505
+ ORDER BY 2 DESC;
506
+ ```
507
+
508
+ ## Best Practices
509
+
510
+ 1. **Never skip silver** - Even for "clean" sources, always apply deduplication and typing
511
+ 2. **Use dbt `--select` by tag** - Run `tag:silver` and `tag:gold` independently
512
+ 3. **Test between layers** - Run `dbt test` after silver before triggering gold
513
+ 4. **Archive raw files** - Move GCS files to archive/ after successful bronze load
514
+ 5. **Idempotent loads** - Use `unique_key` in incremental models for safe re-runs
515
+ 6. **Monitor freshness** - dbt source freshness checks catch upstream delays early
516
+
517
+ > **WARNING**: Never run gold models if silver tests fail. Use `dbt build --select tag:silver` (runs + tests) before proceeding to gold.