@cloudstreamsoftware/claude-tools 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -37
- package/agents/INDEX.md +183 -0
- package/agents/architect.md +247 -0
- package/agents/build-error-resolver.md +555 -0
- package/agents/catalyst-deployer.md +132 -0
- package/agents/code-reviewer.md +121 -0
- package/agents/compliance-auditor.md +148 -0
- package/agents/creator-architect.md +395 -0
- package/agents/deluge-reviewer.md +98 -0
- package/agents/doc-updater.md +471 -0
- package/agents/e2e-runner.md +711 -0
- package/agents/planner.md +122 -0
- package/agents/refactor-cleaner.md +309 -0
- package/agents/security-reviewer.md +582 -0
- package/agents/tdd-guide.md +302 -0
- package/bin/cloudstream-setup.js +16 -6
- package/config/versions.json +63 -0
- package/dist/hooks/hooks.json +209 -0
- package/dist/index.js +47 -0
- package/dist/lib/asset-value.js +609 -0
- package/dist/lib/client-manager.js +300 -0
- package/dist/lib/command-matcher.js +242 -0
- package/dist/lib/cross-session-patterns.js +754 -0
- package/dist/lib/intent-classifier.js +1075 -0
- package/dist/lib/package-manager.js +374 -0
- package/dist/lib/recommendation-engine.js +597 -0
- package/dist/lib/session-memory.js +489 -0
- package/dist/lib/skill-effectiveness.js +486 -0
- package/dist/lib/skill-matcher.js +595 -0
- package/dist/lib/tutorial-metrics.js +242 -0
- package/dist/lib/tutorial-progress.js +209 -0
- package/dist/lib/tutorial-renderer.js +431 -0
- package/dist/lib/utils.js +380 -0
- package/dist/lib/verify-formatter.js +143 -0
- package/dist/lib/workflow-state.js +249 -0
- package/hooks/hooks.json +209 -0
- package/package.json +5 -1
- package/scripts/aggregate-sessions.js +290 -0
- package/scripts/branch-name-validator.js +291 -0
- package/scripts/build.js +101 -0
- package/scripts/commands/client-switch.js +231 -0
- package/scripts/deprecate-skill.js +610 -0
- package/scripts/diagnose.js +324 -0
- package/scripts/doc-freshness.js +168 -0
- package/scripts/generate-weekly-digest.js +393 -0
- package/scripts/health-check.js +270 -0
- package/scripts/hooks/credential-check.js +101 -0
- package/scripts/hooks/evaluate-session.js +81 -0
- package/scripts/hooks/pre-compact.js +66 -0
- package/scripts/hooks/prompt-analyzer.js +276 -0
- package/scripts/hooks/prompt-router.js +422 -0
- package/scripts/hooks/quality-gate-enforcer.js +371 -0
- package/scripts/hooks/session-end.js +156 -0
- package/scripts/hooks/session-start.js +195 -0
- package/scripts/hooks/skill-injector.js +333 -0
- package/scripts/hooks/suggest-compact.js +58 -0
- package/scripts/lib/asset-value.js +609 -0
- package/scripts/lib/client-manager.js +300 -0
- package/scripts/lib/command-matcher.js +242 -0
- package/scripts/lib/cross-session-patterns.js +754 -0
- package/scripts/lib/intent-classifier.js +1075 -0
- package/scripts/lib/package-manager.js +374 -0
- package/scripts/lib/recommendation-engine.js +597 -0
- package/scripts/lib/session-memory.js +489 -0
- package/scripts/lib/skill-effectiveness.js +486 -0
- package/scripts/lib/skill-matcher.js +595 -0
- package/scripts/lib/tutorial-metrics.js +242 -0
- package/scripts/lib/tutorial-progress.js +209 -0
- package/scripts/lib/tutorial-renderer.js +431 -0
- package/scripts/lib/utils.js +380 -0
- package/scripts/lib/verify-formatter.js +143 -0
- package/scripts/lib/workflow-state.js +249 -0
- package/scripts/onboard.js +363 -0
- package/scripts/quarterly-report.js +692 -0
- package/scripts/setup-package-manager.js +204 -0
- package/scripts/sync-upstream.js +391 -0
- package/scripts/test.js +108 -0
- package/scripts/tutorial-runner.js +351 -0
- package/scripts/validate-all.js +201 -0
- package/scripts/verifiers/agents.js +245 -0
- package/scripts/verifiers/config.js +186 -0
- package/scripts/verifiers/environment.js +123 -0
- package/scripts/verifiers/hooks.js +188 -0
- package/scripts/verifiers/index.js +38 -0
- package/scripts/verifiers/persistence.js +140 -0
- package/scripts/verifiers/plugin.js +215 -0
- package/scripts/verifiers/skills.js +209 -0
- package/scripts/verify-setup.js +164 -0
- package/skills/INDEX.md +157 -0
- package/skills/backend-patterns/SKILL.md +586 -0
- package/skills/backend-patterns/catalyst-patterns.md +128 -0
- package/skills/bigquery-patterns/SKILL.md +27 -0
- package/skills/bigquery-patterns/performance-optimization.md +518 -0
- package/skills/bigquery-patterns/query-patterns.md +372 -0
- package/skills/bigquery-patterns/schema-design.md +78 -0
- package/skills/cloudstream-project-template/SKILL.md +20 -0
- package/skills/cloudstream-project-template/structure.md +65 -0
- package/skills/coding-standards/SKILL.md +524 -0
- package/skills/coding-standards/deluge-standards.md +83 -0
- package/skills/compliance-patterns/SKILL.md +28 -0
- package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
- package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
- package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
- package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
- package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
- package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
- package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
- package/skills/compliance-patterns/soc2/access-controls.md +344 -0
- package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
- package/skills/compliance-patterns/soc2/change-management.md +403 -0
- package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
- package/skills/consultancy-workflows/SKILL.md +19 -0
- package/skills/consultancy-workflows/client-isolation.md +21 -0
- package/skills/consultancy-workflows/documentation-automation.md +454 -0
- package/skills/consultancy-workflows/handoff-procedures.md +257 -0
- package/skills/consultancy-workflows/knowledge-capture.md +513 -0
- package/skills/consultancy-workflows/time-tracking.md +26 -0
- package/skills/continuous-learning/SKILL.md +84 -0
- package/skills/continuous-learning/config.json +18 -0
- package/skills/continuous-learning/evaluate-session.sh +60 -0
- package/skills/continuous-learning-v2/SKILL.md +126 -0
- package/skills/continuous-learning-v2/config.json +61 -0
- package/skills/frontend-patterns/SKILL.md +635 -0
- package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
- package/skills/gcp-data-engineering/SKILL.md +36 -0
- package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
- package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
- package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
- package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
- package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
- package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
- package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
- package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
- package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
- package/skills/security-review/SKILL.md +498 -0
- package/skills/security-review/compliance-checklist.md +53 -0
- package/skills/strategic-compact/SKILL.md +67 -0
- package/skills/tdd-workflow/SKILL.md +413 -0
- package/skills/tdd-workflow/zoho-testing.md +124 -0
- package/skills/tutorial/SKILL.md +249 -0
- package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
- package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
- package/skills/tutorial/lessons/01-basics.md +81 -0
- package/skills/tutorial/lessons/02-training.md +86 -0
- package/skills/tutorial/lessons/03-commands.md +109 -0
- package/skills/tutorial/lessons/04-workflows.md +115 -0
- package/skills/tutorial/lessons/05-compliance.md +116 -0
- package/skills/tutorial/lessons/06-zoho.md +121 -0
- package/skills/tutorial/lessons/07-hooks-system.md +277 -0
- package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
- package/skills/tutorial/lessons/09-client-management.md +215 -0
- package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
- package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
- package/skills/tutorial/lessons/12-rules-system.md +326 -0
- package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
- package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
- package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
- package/skills/tutorial/tracks/accelerated/README.md +134 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
- package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
- package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
- package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
- package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
- package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
- package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
- package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
- package/skills/zoho-patterns/CHANGELOG.md +108 -0
- package/skills/zoho-patterns/SKILL.md +446 -0
- package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
- package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
- package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
- package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
- package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
- package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
- package/skills/zoho-patterns/creator/form-design.md +304 -0
- package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
- package/skills/zoho-patterns/creator/widget-integration.md +306 -0
- package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
- package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
- package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
- package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
- package/skills/zoho-patterns/deluge/error-handling.md +423 -0
- package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
- package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
- package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
- package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
- package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
# dbt Testing Patterns
|
|
2
|
+
|
|
3
|
+
> Comprehensive testing strategies for CloudStream's dbt project, covering schema tests, data tests, source freshness, CI/CD integration, and custom macros.
|
|
4
|
+
|
|
5
|
+
## Schema Tests (Built-In)
|
|
6
|
+
|
|
7
|
+
### Core Tests in YAML
|
|
8
|
+
|
|
9
|
+
```yaml
|
|
10
|
+
# models/staging/_stg_zoho__models.yml
|
|
11
|
+
version: 2
|
|
12
|
+
|
|
13
|
+
models:
|
|
14
|
+
- name: stg_zoho__deals
|
|
15
|
+
description: "Cleansed Zoho CRM deals, deduplicated by record_id"
|
|
16
|
+
columns:
|
|
17
|
+
- name: record_id
|
|
18
|
+
description: "Primary key from Zoho"
|
|
19
|
+
tests:
|
|
20
|
+
- not_null
|
|
21
|
+
- unique
|
|
22
|
+
|
|
23
|
+
- name: stage
|
|
24
|
+
description: "Deal pipeline stage"
|
|
25
|
+
tests:
|
|
26
|
+
- not_null
|
|
27
|
+
- accepted_values:
|
|
28
|
+
values: [
|
|
29
|
+
'Qualification', 'Needs Analysis', 'Proposal',
|
|
30
|
+
'Negotiation', 'Closed Won', 'Closed Lost', 'Unknown'
|
|
31
|
+
]
|
|
32
|
+
config:
|
|
33
|
+
severity: warn # New stages from Zoho should warn, not fail
|
|
34
|
+
|
|
35
|
+
- name: amount
|
|
36
|
+
description: "Deal value in USD"
|
|
37
|
+
tests:
|
|
38
|
+
- not_null:
|
|
39
|
+
config:
|
|
40
|
+
where: "stage = 'Closed Won'" # Only required for won deals
|
|
41
|
+
|
|
42
|
+
- name: owner_id
|
|
43
|
+
description: "Sales rep who owns the deal"
|
|
44
|
+
tests:
|
|
45
|
+
- not_null
|
|
46
|
+
- relationships:
|
|
47
|
+
to: ref('stg_zoho__users')
|
|
48
|
+
field: user_id
|
|
49
|
+
config:
|
|
50
|
+
severity: warn # Orphaned owners may exist during sync
|
|
51
|
+
|
|
52
|
+
- name: account_id
|
|
53
|
+
description: "Related account"
|
|
54
|
+
tests:
|
|
55
|
+
- relationships:
|
|
56
|
+
to: ref('stg_zoho__accounts')
|
|
57
|
+
field: record_id
|
|
58
|
+
|
|
59
|
+
- name: stg_zoho__invoices
|
|
60
|
+
columns:
|
|
61
|
+
- name: record_id
|
|
62
|
+
tests:
|
|
63
|
+
- not_null
|
|
64
|
+
- unique
|
|
65
|
+
- name: invoice_number
|
|
66
|
+
tests:
|
|
67
|
+
- not_null
|
|
68
|
+
- unique
|
|
69
|
+
- name: total
|
|
70
|
+
tests:
|
|
71
|
+
- not_null
|
|
72
|
+
- dbt_utils.accepted_range:
|
|
73
|
+
min_value: 0
|
|
74
|
+
max_value: 10000000
|
|
75
|
+
config:
|
|
76
|
+
severity: warn
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Data Tests (Custom SQL Assertions)
|
|
80
|
+
|
|
81
|
+
```sql
|
|
82
|
+
-- tests/assert_no_future_close_dates.sql
|
|
83
|
+
-- Deals should not have close dates in the future for "Closed Won" stage
|
|
84
|
+
SELECT
|
|
85
|
+
record_id,
|
|
86
|
+
deal_name,
|
|
87
|
+
close_date,
|
|
88
|
+
stage
|
|
89
|
+
FROM {{ ref('stg_zoho__deals') }}
|
|
90
|
+
WHERE stage = 'Closed Won'
|
|
91
|
+
AND close_date > CURRENT_DATE()
|
|
92
|
+
|
|
93
|
+
-- If this returns any rows, the test FAILS
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
```sql
|
|
97
|
+
-- tests/assert_invoice_total_matches_lines.sql
|
|
98
|
+
-- Invoice total should match sum of line items
|
|
99
|
+
SELECT
|
|
100
|
+
i.record_id AS invoice_id,
|
|
101
|
+
i.total AS header_total,
|
|
102
|
+
SUM(l.amount) AS line_total,
|
|
103
|
+
ABS(i.total - SUM(l.amount)) AS discrepancy
|
|
104
|
+
FROM {{ ref('stg_zoho__invoices') }} i
|
|
105
|
+
JOIN {{ ref('stg_zoho__invoice_lines') }} l ON i.record_id = l.invoice_id
|
|
106
|
+
GROUP BY 1, 2
|
|
107
|
+
HAVING ABS(i.total - SUM(l.amount)) > 0.01 -- Allow 1 cent rounding
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
```sql
|
|
111
|
+
-- tests/assert_revenue_not_negative.sql
|
|
112
|
+
-- Monthly revenue should never be negative
|
|
113
|
+
SELECT
|
|
114
|
+
revenue_month,
|
|
115
|
+
customer_id,
|
|
116
|
+
total_revenue
|
|
117
|
+
FROM {{ ref('fct_monthly_revenue') }}
|
|
118
|
+
WHERE total_revenue < 0
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## dbt Expectations Package
|
|
122
|
+
|
|
123
|
+
```yaml
|
|
124
|
+
# packages.yml
|
|
125
|
+
packages:
|
|
126
|
+
- package: calogica/dbt_expectations
|
|
127
|
+
version: [">=0.10.0", "<0.11.0"]
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
```yaml
|
|
131
|
+
# models/staging/_stg_zoho__models.yml
|
|
132
|
+
models:
|
|
133
|
+
- name: stg_zoho__deals
|
|
134
|
+
tests:
|
|
135
|
+
# Table-level expectations
|
|
136
|
+
- dbt_expectations.expect_table_row_count_to_be_between:
|
|
137
|
+
min_value: 100 # We should always have at least 100 deals
|
|
138
|
+
config:
|
|
139
|
+
severity: error
|
|
140
|
+
|
|
141
|
+
- dbt_expectations.expect_table_row_count_to_equal_other_table:
|
|
142
|
+
compare_model: source('bronze', 'zoho_deals')
|
|
143
|
+
# Silver should have same or fewer rows than bronze (dedup removes some)
|
|
144
|
+
row_condition: "1=1"
|
|
145
|
+
compare_row_condition: "record_id IS NOT NULL"
|
|
146
|
+
config:
|
|
147
|
+
severity: warn
|
|
148
|
+
|
|
149
|
+
columns:
|
|
150
|
+
- name: amount
|
|
151
|
+
tests:
|
|
152
|
+
- dbt_expectations.expect_column_values_to_be_between:
|
|
153
|
+
min_value: 0
|
|
154
|
+
max_value: 50000000
|
|
155
|
+
row_condition: "stage != 'Closed Lost'"
|
|
156
|
+
|
|
157
|
+
- dbt_expectations.expect_column_mean_to_be_between:
|
|
158
|
+
min_value: 1000
|
|
159
|
+
max_value: 500000
|
|
160
|
+
config:
|
|
161
|
+
severity: warn
|
|
162
|
+
|
|
163
|
+
- name: email
|
|
164
|
+
tests:
|
|
165
|
+
- dbt_expectations.expect_column_values_to_match_regex:
|
|
166
|
+
regex: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
|
|
167
|
+
row_condition: "email IS NOT NULL"
|
|
168
|
+
config:
|
|
169
|
+
severity: warn
|
|
170
|
+
|
|
171
|
+
- name: close_date
|
|
172
|
+
tests:
|
|
173
|
+
- dbt_expectations.expect_column_values_to_be_between:
|
|
174
|
+
min_value: "'2015-01-01'"
|
|
175
|
+
max_value: "CAST(DATE_ADD(CURRENT_DATE(), INTERVAL 365 DAY) AS STRING)"
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Source Freshness Checks
|
|
179
|
+
|
|
180
|
+
```yaml
|
|
181
|
+
# models/staging/_sources.yml
|
|
182
|
+
sources:
|
|
183
|
+
- name: bronze
|
|
184
|
+
database: cloudstream-prod
|
|
185
|
+
schema: bronze
|
|
186
|
+
freshness:
|
|
187
|
+
warn_after: {count: 6, period: hour}
|
|
188
|
+
error_after: {count: 24, period: hour}
|
|
189
|
+
loaded_at_field: _ingestion_timestamp
|
|
190
|
+
tables:
|
|
191
|
+
- name: zoho_deals
|
|
192
|
+
freshness:
|
|
193
|
+
warn_after: {count: 4, period: hour} # Deals sync every 4 hours
|
|
194
|
+
error_after: {count: 8, period: hour}
|
|
195
|
+
|
|
196
|
+
- name: zoho_invoices
|
|
197
|
+
freshness:
|
|
198
|
+
warn_after: {count: 12, period: hour} # Invoices sync twice daily
|
|
199
|
+
error_after: {count: 36, period: hour}
|
|
200
|
+
|
|
201
|
+
- name: zoho_contacts
|
|
202
|
+
freshness:
|
|
203
|
+
warn_after: {count: 24, period: hour} # Contacts sync daily
|
|
204
|
+
error_after: {count: 48, period: hour}
|
|
205
|
+
|
|
206
|
+
- name: zoho_realtime_events
|
|
207
|
+
freshness:
|
|
208
|
+
warn_after: {count: 30, period: minute} # Real-time should be fresh
|
|
209
|
+
error_after: {count: 2, period: hour}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
# Check source freshness
|
|
214
|
+
dbt source freshness --select source:bronze
|
|
215
|
+
|
|
216
|
+
# Output example:
|
|
217
|
+
# 14:23:01 | WARN | source bronze.zoho_deals is stale (last loaded 5 hours ago)
|
|
218
|
+
# 14:23:01 | PASS | source bronze.zoho_invoices is fresh (last loaded 2 hours ago)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Test Severity Levels
|
|
222
|
+
|
|
223
|
+
```yaml
|
|
224
|
+
# Severity configuration matrix
|
|
225
|
+
# ERROR: Pipeline stops, must fix before gold layer runs
|
|
226
|
+
# WARN: Pipeline continues, alert sent to Slack
|
|
227
|
+
|
|
228
|
+
models:
|
|
229
|
+
- name: stg_zoho__deals
|
|
230
|
+
columns:
|
|
231
|
+
- name: record_id
|
|
232
|
+
tests:
|
|
233
|
+
- not_null:
|
|
234
|
+
config:
|
|
235
|
+
severity: error # HARD STOP: Cannot have null PKs
|
|
236
|
+
- unique:
|
|
237
|
+
config:
|
|
238
|
+
severity: error # HARD STOP: Dedup failed
|
|
239
|
+
|
|
240
|
+
- name: stage
|
|
241
|
+
tests:
|
|
242
|
+
- accepted_values:
|
|
243
|
+
values: ['Qualification', 'Needs Analysis', 'Proposal',
|
|
244
|
+
'Negotiation', 'Closed Won', 'Closed Lost', 'Unknown']
|
|
245
|
+
config:
|
|
246
|
+
severity: warn # New values from Zoho are expected occasionally
|
|
247
|
+
|
|
248
|
+
- name: owner_id
|
|
249
|
+
tests:
|
|
250
|
+
- relationships:
|
|
251
|
+
to: ref('stg_zoho__users')
|
|
252
|
+
field: user_id
|
|
253
|
+
config:
|
|
254
|
+
severity: warn # Orphaned refs possible during sync windows
|
|
255
|
+
error_if: ">100" # But more than 100 orphans is suspicious
|
|
256
|
+
warn_if: ">10"
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## Custom Test Macros
|
|
260
|
+
|
|
261
|
+
```sql
|
|
262
|
+
-- macros/tests/test_freshness_within_hours.sql
|
|
263
|
+
{% test freshness_within_hours(model, column_name, max_hours=24) %}
|
|
264
|
+
SELECT
|
|
265
|
+
MAX({{ column_name }}) AS latest_record,
|
|
266
|
+
TIMESTAMP_DIFF(
|
|
267
|
+
CURRENT_TIMESTAMP(),
|
|
268
|
+
MAX({{ column_name }}),
|
|
269
|
+
HOUR
|
|
270
|
+
) AS hours_since_latest
|
|
271
|
+
FROM {{ model }}
|
|
272
|
+
HAVING TIMESTAMP_DIFF(
|
|
273
|
+
CURRENT_TIMESTAMP(),
|
|
274
|
+
MAX({{ column_name }}),
|
|
275
|
+
HOUR
|
|
276
|
+
) > {{ max_hours }}
|
|
277
|
+
{% endtest %}
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
```sql
|
|
281
|
+
-- macros/tests/test_no_orphaned_records.sql
|
|
282
|
+
{% test no_orphaned_records(model, column_name, parent_model, parent_column) %}
|
|
283
|
+
SELECT
|
|
284
|
+
child.{{ column_name }}
|
|
285
|
+
FROM {{ model }} child
|
|
286
|
+
LEFT JOIN {{ parent_model }} parent
|
|
287
|
+
ON child.{{ column_name }} = parent.{{ parent_column }}
|
|
288
|
+
WHERE parent.{{ parent_column }} IS NULL
|
|
289
|
+
AND child.{{ column_name }} IS NOT NULL
|
|
290
|
+
{% endtest %}
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
```sql
|
|
294
|
+
-- macros/tests/test_row_count_anomaly.sql
|
|
295
|
+
-- Alert if row count deviates more than 50% from 7-day average
|
|
296
|
+
{% test row_count_anomaly(model, threshold=0.5) %}
|
|
297
|
+
WITH daily_counts AS (
|
|
298
|
+
SELECT
|
|
299
|
+
_ingestion_date,
|
|
300
|
+
COUNT(*) AS row_count
|
|
301
|
+
FROM {{ model }}
|
|
302
|
+
WHERE _ingestion_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 8 DAY)
|
|
303
|
+
GROUP BY 1
|
|
304
|
+
),
|
|
305
|
+
stats AS (
|
|
306
|
+
SELECT
|
|
307
|
+
AVG(row_count) AS avg_count,
|
|
308
|
+
STDDEV(row_count) AS stddev_count
|
|
309
|
+
FROM daily_counts
|
|
310
|
+
WHERE _ingestion_date < CURRENT_DATE() -- Exclude today
|
|
311
|
+
)
|
|
312
|
+
SELECT
|
|
313
|
+
dc._ingestion_date,
|
|
314
|
+
dc.row_count,
|
|
315
|
+
s.avg_count,
|
|
316
|
+
ABS(dc.row_count - s.avg_count) / NULLIF(s.avg_count, 0) AS deviation_pct
|
|
317
|
+
FROM daily_counts dc
|
|
318
|
+
CROSS JOIN stats s
|
|
319
|
+
WHERE dc._ingestion_date = CURRENT_DATE()
|
|
320
|
+
AND ABS(dc.row_count - s.avg_count) / NULLIF(s.avg_count, 0) > {{ threshold }}
|
|
321
|
+
{% endtest %}
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
Usage:
|
|
325
|
+
|
|
326
|
+
```yaml
|
|
327
|
+
models:
|
|
328
|
+
- name: stg_zoho__deals
|
|
329
|
+
tests:
|
|
330
|
+
- row_count_anomaly:
|
|
331
|
+
threshold: 0.5
|
|
332
|
+
config:
|
|
333
|
+
severity: warn
|
|
334
|
+
columns:
|
|
335
|
+
- name: _ingestion_timestamp
|
|
336
|
+
tests:
|
|
337
|
+
- freshness_within_hours:
|
|
338
|
+
max_hours: 8
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
## CI/CD Test Integration
|
|
342
|
+
|
|
343
|
+
```yaml
|
|
344
|
+
# .github/workflows/dbt-ci.yml
|
|
345
|
+
name: dbt CI
|
|
346
|
+
|
|
347
|
+
on:
|
|
348
|
+
pull_request:
|
|
349
|
+
paths:
|
|
350
|
+
- 'dbt/**'
|
|
351
|
+
- 'models/**'
|
|
352
|
+
- 'macros/**'
|
|
353
|
+
|
|
354
|
+
jobs:
|
|
355
|
+
dbt-test:
|
|
356
|
+
runs-on: ubuntu-latest
|
|
357
|
+
steps:
|
|
358
|
+
- uses: actions/checkout@v4
|
|
359
|
+
|
|
360
|
+
- name: Setup Python
|
|
361
|
+
uses: actions/setup-python@v5
|
|
362
|
+
with:
|
|
363
|
+
python-version: '3.11'
|
|
364
|
+
|
|
365
|
+
- name: Install dbt
|
|
366
|
+
run: pip install dbt-bigquery==1.7.*
|
|
367
|
+
|
|
368
|
+
- name: dbt deps
|
|
369
|
+
run: dbt deps --target ci
|
|
370
|
+
|
|
371
|
+
- name: dbt compile
|
|
372
|
+
run: dbt compile --target ci
|
|
373
|
+
|
|
374
|
+
- name: dbt run (modified models only)
|
|
375
|
+
run: dbt run --select state:modified+ --target ci --defer --state ./prod-manifest
|
|
376
|
+
env:
|
|
377
|
+
DBT_CI_DATASET: ci_pr_${{ github.event.pull_request.number }}
|
|
378
|
+
|
|
379
|
+
- name: dbt test (modified models only)
|
|
380
|
+
run: dbt test --select state:modified+ --target ci --defer --state ./prod-manifest
|
|
381
|
+
|
|
382
|
+
- name: Cleanup CI dataset
|
|
383
|
+
if: always()
|
|
384
|
+
run: |
|
|
385
|
+
bq rm -r -f cloudstream-prod:ci_pr_${{ github.event.pull_request.number }}
|
|
386
|
+
|
|
387
|
+
dbt-production:
|
|
388
|
+
runs-on: ubuntu-latest
|
|
389
|
+
if: github.ref == 'refs/heads/main'
|
|
390
|
+
steps:
|
|
391
|
+
- uses: actions/checkout@v4
|
|
392
|
+
|
|
393
|
+
- name: dbt run + test (full)
|
|
394
|
+
run: |
|
|
395
|
+
dbt run --target prod
|
|
396
|
+
dbt test --target prod --store-failures
|
|
397
|
+
|
|
398
|
+
- name: Upload test results
|
|
399
|
+
run: |
|
|
400
|
+
dbt source freshness --target prod
|
|
401
|
+
# Store failures in dedicated schema for investigation
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
## Test Coverage Reporting
|
|
405
|
+
|
|
406
|
+
```sql
|
|
407
|
+
-- macros/test_coverage_report.sql
|
|
408
|
+
-- Generate test coverage report
|
|
409
|
+
{% macro test_coverage_report() %}
|
|
410
|
+
{% set models = graph.nodes.values() | selectattr("resource_type", "equalto", "model") | list %}
|
|
411
|
+
{% set tests = graph.nodes.values() | selectattr("resource_type", "equalto", "test") | list %}
|
|
412
|
+
|
|
413
|
+
{% for model in models %}
|
|
414
|
+
{% set model_tests = tests | selectattr("depends_on.nodes", "containing", model.unique_id) | list %}
|
|
415
|
+
{{ log(model.name ~ ": " ~ model_tests | length ~ " tests", info=True) }}
|
|
416
|
+
{% endfor %}
|
|
417
|
+
{% endmacro %}
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
```bash
|
|
421
|
+
# Quick coverage check
|
|
422
|
+
dbt ls --resource-type test | wc -l # Total tests
|
|
423
|
+
dbt ls --resource-type model | wc -l # Total models
|
|
424
|
+
# Aim for at least 2 tests per model (not_null PK + unique PK minimum)
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
## Performance Testing for Slow Models
|
|
428
|
+
|
|
429
|
+
```yaml
|
|
430
|
+
# models/marts/_performance_thresholds.yml
|
|
431
|
+
models:
|
|
432
|
+
- name: fct_monthly_revenue
|
|
433
|
+
config:
|
|
434
|
+
# Track model run time - alert if exceeds threshold
|
|
435
|
+
meta:
|
|
436
|
+
max_execution_seconds: 300 # 5 minutes max
|
|
437
|
+
owner: data-team
|
|
438
|
+
|
|
439
|
+
- name: kpi_daily_snapshot
|
|
440
|
+
config:
|
|
441
|
+
meta:
|
|
442
|
+
max_execution_seconds: 120 # 2 minutes max
|
|
443
|
+
owner: data-team
|
|
444
|
+
```
|
|
445
|
+
|
|
446
|
+
```python
|
|
447
|
+
# scripts/check_model_performance.py
|
|
448
|
+
"""Post-dbt-run check: alert if any model exceeded its time threshold."""
|
|
449
|
+
import json
|
|
450
|
+
import sys
|
|
451
|
+
|
|
452
|
+
def check_performance(run_results_path='target/run_results.json'):
|
|
453
|
+
with open(run_results_path) as f:
|
|
454
|
+
results = json.load(f)
|
|
455
|
+
|
|
456
|
+
slow_models = []
|
|
457
|
+
for result in results['results']:
|
|
458
|
+
node = result['unique_id']
|
|
459
|
+
execution_time = result.get('execution_time', 0)
|
|
460
|
+
threshold = result.get('node', {}).get('config', {}).get('meta', {}).get(
|
|
461
|
+
'max_execution_seconds', 600 # Default 10 min
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
if execution_time > threshold:
|
|
465
|
+
slow_models.append({
|
|
466
|
+
'model': node,
|
|
467
|
+
'execution_time': round(execution_time, 1),
|
|
468
|
+
'threshold': threshold
|
|
469
|
+
})
|
|
470
|
+
|
|
471
|
+
if slow_models:
|
|
472
|
+
print("PERFORMANCE WARNING - Slow models detected:")
|
|
473
|
+
for m in slow_models:
|
|
474
|
+
print(f" {m['model']}: {m['execution_time']}s (threshold: {m['threshold']}s)")
|
|
475
|
+
sys.exit(1)
|
|
476
|
+
|
|
477
|
+
if __name__ == '__main__':
|
|
478
|
+
check_performance()
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
## Test Failure Playbook
|
|
482
|
+
|
|
483
|
+
| Test Type | Severity | Action |
|
|
484
|
+
|-----------|----------|--------|
|
|
485
|
+
| `not_null` on PK | ERROR | Block pipeline. Check source for missing IDs. |
|
|
486
|
+
| `unique` on PK | ERROR | Block pipeline. Deduplication logic is broken. |
|
|
487
|
+
| `accepted_values` | WARN | Notify Slack. Zoho may have new picklist values. |
|
|
488
|
+
| `relationships` | WARN | Notify. Sync timing issue or orphaned records. |
|
|
489
|
+
| `freshness` | WARN/ERROR | Check CData Sync or Catalyst Cron job status. |
|
|
490
|
+
| `row_count_anomaly` | WARN | Investigate source. May be legitimate (holiday, etc). |
|
|
491
|
+
| Custom range test | WARN | Review outlier records. May need quarantine. |
|
|
492
|
+
|
|
493
|
+
## Best Practices
|
|
494
|
+
|
|
495
|
+
1. **Every model needs at minimum**: `not_null` + `unique` on primary key
|
|
496
|
+
2. **Use `store_failures`** in production to persist failing rows for investigation
|
|
497
|
+
3. **Run `dbt source freshness`** before model runs to catch upstream delays
|
|
498
|
+
4. **Use `--select state:modified+`** in CI to only test affected models
|
|
499
|
+
5. **Set severity wisely** - too many ERRORs causes alert fatigue; too many WARNs gets ignored
|
|
500
|
+
6. **Custom tests for business rules** - SQL assertions catch domain-specific issues
|
|
501
|
+
7. **Performance test regularly** - Models that slow down signal data growth problems
|
|
502
|
+
|
|
503
|
+
> **WARNING**: `dbt test --store-failures` creates tables in your target schema for each failing test. Set up cleanup jobs or use a dedicated `dbt_test_failures` schema to avoid clutter.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Bronze Layer (Raw Ingestion)
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Capture raw data from source systems with minimal transformation.
|
|
5
|
+
Preserve original data for reprocessing if needed.
|
|
6
|
+
|
|
7
|
+
## Zoho → GCS Ingestion Patterns
|
|
8
|
+
|
|
9
|
+
### Scheduled Pull (Catalyst Cron)
|
|
10
|
+
```javascript
|
|
11
|
+
// Catalyst Cron function: Pull Zoho CRM data to GCS
|
|
12
|
+
module.exports = async (cronDetails, context) => {
|
|
13
|
+
const { Storage } = require('@google-cloud/storage');
|
|
14
|
+
const storage = new Storage();
|
|
15
|
+
const bucket = storage.bucket('project-bronze');
|
|
16
|
+
|
|
17
|
+
const zohoToken = await refreshZohoToken();
|
|
18
|
+
const records = await fetchAllCRMRecords('Contacts', zohoToken);
|
|
19
|
+
|
|
20
|
+
const filename = `zoho-crm/contacts/${new Date().toISOString()}.json`;
|
|
21
|
+
const file = bucket.file(filename);
|
|
22
|
+
await file.save(JSON.stringify(records), {
|
|
23
|
+
contentType: 'application/json',
|
|
24
|
+
metadata: {
|
|
25
|
+
source: 'zoho-crm',
|
|
26
|
+
module: 'Contacts',
|
|
27
|
+
ingestion_time: new Date().toISOString(),
|
|
28
|
+
record_count: String(records.length)
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
console.log(`Ingested ${records.length} contacts to ${filename}`);
|
|
33
|
+
context.close();
|
|
34
|
+
};
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Event-Driven (Webhook → Cloud Function)
|
|
38
|
+
```javascript
|
|
39
|
+
// Cloud Function: Receive Zoho webhook, write to GCS
|
|
40
|
+
exports.zohoWebhook = async (req, res) => {
|
|
41
|
+
// Validate webhook signature
|
|
42
|
+
if (!validateZohoSignature(req)) {
|
|
43
|
+
return res.status(401).send('Invalid signature');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const event = req.body;
|
|
47
|
+
const bucket = storage.bucket('project-bronze');
|
|
48
|
+
const filename = `zoho-events/${event.module}/${Date.now()}.json`;
|
|
49
|
+
|
|
50
|
+
await bucket.file(filename).save(JSON.stringify(event));
|
|
51
|
+
res.status(200).send('OK');
|
|
52
|
+
};
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Bronze Layer Rules
|
|
56
|
+
- Never modify source data
|
|
57
|
+
- Add metadata: source, timestamp, batch_id
|
|
58
|
+
- Partition by ingestion date
|
|
59
|
+
- Set retention policy (90 days typical for raw)
|
|
60
|
+
- Schema-on-read (don't enforce schema at ingestion)
|