@cloudstreamsoftware/claude-tools 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -37
- package/agents/INDEX.md +183 -0
- package/agents/architect.md +247 -0
- package/agents/build-error-resolver.md +555 -0
- package/agents/catalyst-deployer.md +132 -0
- package/agents/code-reviewer.md +121 -0
- package/agents/compliance-auditor.md +148 -0
- package/agents/creator-architect.md +395 -0
- package/agents/deluge-reviewer.md +98 -0
- package/agents/doc-updater.md +471 -0
- package/agents/e2e-runner.md +711 -0
- package/agents/planner.md +122 -0
- package/agents/refactor-cleaner.md +309 -0
- package/agents/security-reviewer.md +582 -0
- package/agents/tdd-guide.md +302 -0
- package/config/versions.json +63 -0
- package/dist/hooks/hooks.json +209 -0
- package/dist/index.js +47 -0
- package/dist/lib/asset-value.js +609 -0
- package/dist/lib/client-manager.js +300 -0
- package/dist/lib/command-matcher.js +242 -0
- package/dist/lib/cross-session-patterns.js +754 -0
- package/dist/lib/intent-classifier.js +1075 -0
- package/dist/lib/package-manager.js +374 -0
- package/dist/lib/recommendation-engine.js +597 -0
- package/dist/lib/session-memory.js +489 -0
- package/dist/lib/skill-effectiveness.js +486 -0
- package/dist/lib/skill-matcher.js +595 -0
- package/dist/lib/tutorial-metrics.js +242 -0
- package/dist/lib/tutorial-progress.js +209 -0
- package/dist/lib/tutorial-renderer.js +431 -0
- package/dist/lib/utils.js +380 -0
- package/dist/lib/verify-formatter.js +143 -0
- package/dist/lib/workflow-state.js +249 -0
- package/hooks/hooks.json +209 -0
- package/package.json +5 -1
- package/scripts/aggregate-sessions.js +290 -0
- package/scripts/branch-name-validator.js +291 -0
- package/scripts/build.js +101 -0
- package/scripts/commands/client-switch.js +231 -0
- package/scripts/deprecate-skill.js +610 -0
- package/scripts/diagnose.js +324 -0
- package/scripts/doc-freshness.js +168 -0
- package/scripts/generate-weekly-digest.js +393 -0
- package/scripts/health-check.js +270 -0
- package/scripts/hooks/credential-check.js +101 -0
- package/scripts/hooks/evaluate-session.js +81 -0
- package/scripts/hooks/pre-compact.js +66 -0
- package/scripts/hooks/prompt-analyzer.js +276 -0
- package/scripts/hooks/prompt-router.js +422 -0
- package/scripts/hooks/quality-gate-enforcer.js +371 -0
- package/scripts/hooks/session-end.js +156 -0
- package/scripts/hooks/session-start.js +195 -0
- package/scripts/hooks/skill-injector.js +333 -0
- package/scripts/hooks/suggest-compact.js +58 -0
- package/scripts/lib/asset-value.js +609 -0
- package/scripts/lib/client-manager.js +300 -0
- package/scripts/lib/command-matcher.js +242 -0
- package/scripts/lib/cross-session-patterns.js +754 -0
- package/scripts/lib/intent-classifier.js +1075 -0
- package/scripts/lib/package-manager.js +374 -0
- package/scripts/lib/recommendation-engine.js +597 -0
- package/scripts/lib/session-memory.js +489 -0
- package/scripts/lib/skill-effectiveness.js +486 -0
- package/scripts/lib/skill-matcher.js +595 -0
- package/scripts/lib/tutorial-metrics.js +242 -0
- package/scripts/lib/tutorial-progress.js +209 -0
- package/scripts/lib/tutorial-renderer.js +431 -0
- package/scripts/lib/utils.js +380 -0
- package/scripts/lib/verify-formatter.js +143 -0
- package/scripts/lib/workflow-state.js +249 -0
- package/scripts/onboard.js +363 -0
- package/scripts/quarterly-report.js +692 -0
- package/scripts/setup-package-manager.js +204 -0
- package/scripts/sync-upstream.js +391 -0
- package/scripts/test.js +108 -0
- package/scripts/tutorial-runner.js +351 -0
- package/scripts/validate-all.js +201 -0
- package/scripts/verifiers/agents.js +245 -0
- package/scripts/verifiers/config.js +186 -0
- package/scripts/verifiers/environment.js +123 -0
- package/scripts/verifiers/hooks.js +188 -0
- package/scripts/verifiers/index.js +38 -0
- package/scripts/verifiers/persistence.js +140 -0
- package/scripts/verifiers/plugin.js +215 -0
- package/scripts/verifiers/skills.js +209 -0
- package/scripts/verify-setup.js +164 -0
- package/skills/INDEX.md +157 -0
- package/skills/backend-patterns/SKILL.md +586 -0
- package/skills/backend-patterns/catalyst-patterns.md +128 -0
- package/skills/bigquery-patterns/SKILL.md +27 -0
- package/skills/bigquery-patterns/performance-optimization.md +518 -0
- package/skills/bigquery-patterns/query-patterns.md +372 -0
- package/skills/bigquery-patterns/schema-design.md +78 -0
- package/skills/cloudstream-project-template/SKILL.md +20 -0
- package/skills/cloudstream-project-template/structure.md +65 -0
- package/skills/coding-standards/SKILL.md +524 -0
- package/skills/coding-standards/deluge-standards.md +83 -0
- package/skills/compliance-patterns/SKILL.md +28 -0
- package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
- package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
- package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
- package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
- package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
- package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
- package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
- package/skills/compliance-patterns/soc2/access-controls.md +344 -0
- package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
- package/skills/compliance-patterns/soc2/change-management.md +403 -0
- package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
- package/skills/consultancy-workflows/SKILL.md +19 -0
- package/skills/consultancy-workflows/client-isolation.md +21 -0
- package/skills/consultancy-workflows/documentation-automation.md +454 -0
- package/skills/consultancy-workflows/handoff-procedures.md +257 -0
- package/skills/consultancy-workflows/knowledge-capture.md +513 -0
- package/skills/consultancy-workflows/time-tracking.md +26 -0
- package/skills/continuous-learning/SKILL.md +84 -0
- package/skills/continuous-learning/config.json +18 -0
- package/skills/continuous-learning/evaluate-session.sh +60 -0
- package/skills/continuous-learning-v2/SKILL.md +126 -0
- package/skills/continuous-learning-v2/config.json +61 -0
- package/skills/frontend-patterns/SKILL.md +635 -0
- package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
- package/skills/gcp-data-engineering/SKILL.md +36 -0
- package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
- package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
- package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
- package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
- package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
- package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
- package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
- package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
- package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
- package/skills/security-review/SKILL.md +498 -0
- package/skills/security-review/compliance-checklist.md +53 -0
- package/skills/strategic-compact/SKILL.md +67 -0
- package/skills/tdd-workflow/SKILL.md +413 -0
- package/skills/tdd-workflow/zoho-testing.md +124 -0
- package/skills/tutorial/SKILL.md +249 -0
- package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
- package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
- package/skills/tutorial/lessons/01-basics.md +81 -0
- package/skills/tutorial/lessons/02-training.md +86 -0
- package/skills/tutorial/lessons/03-commands.md +109 -0
- package/skills/tutorial/lessons/04-workflows.md +115 -0
- package/skills/tutorial/lessons/05-compliance.md +116 -0
- package/skills/tutorial/lessons/06-zoho.md +121 -0
- package/skills/tutorial/lessons/07-hooks-system.md +277 -0
- package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
- package/skills/tutorial/lessons/09-client-management.md +215 -0
- package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
- package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
- package/skills/tutorial/lessons/12-rules-system.md +326 -0
- package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
- package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
- package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
- package/skills/tutorial/tracks/accelerated/README.md +134 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
- package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
- package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
- package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
- package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
- package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
- package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
- package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
- package/skills/zoho-patterns/CHANGELOG.md +108 -0
- package/skills/zoho-patterns/SKILL.md +446 -0
- package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
- package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
- package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
- package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
- package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
- package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
- package/skills/zoho-patterns/creator/form-design.md +304 -0
- package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
- package/skills/zoho-patterns/creator/widget-integration.md +306 -0
- package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
- package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
- package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
- package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
- package/skills/zoho-patterns/deluge/error-handling.md +423 -0
- package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
- package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
- package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
- package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
- package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
# Silver Layer - Cleansed & Conformed Data
|
|
2
|
+
|
|
3
|
+
> The silver layer transforms raw bronze data into cleansed, deduplicated, and validated records ready for business consumption.
|
|
4
|
+
|
|
5
|
+
## Architecture Overview
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
Bronze (raw JSON/CSV) → Silver (cleansed, typed, deduplicated) → Gold (aggregated)
|
|
9
|
+
│
|
|
10
|
+
├── Data quality rules applied
|
|
11
|
+
├── Deduplication by record_id + timestamp
|
|
12
|
+
├── Schema enforcement & evolution
|
|
13
|
+
├── SCD Type 2 tracking
|
|
14
|
+
└── Error quarantine routing
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Deduplication Strategy
|
|
18
|
+
|
|
19
|
+
CloudStream deduplicates using `record_id + modified_time` composite key from Zoho sources.
|
|
20
|
+
|
|
21
|
+
```sql
|
|
22
|
+
-- Deduplication: keep latest record per entity
|
|
23
|
+
CREATE OR REPLACE TABLE `project.silver.zoho_contacts` AS
|
|
24
|
+
WITH ranked AS (
|
|
25
|
+
SELECT
|
|
26
|
+
*,
|
|
27
|
+
ROW_NUMBER() OVER (
|
|
28
|
+
PARTITION BY record_id
|
|
29
|
+
ORDER BY modified_time DESC, _ingestion_timestamp DESC
|
|
30
|
+
) AS row_num
|
|
31
|
+
FROM `project.bronze.zoho_contacts`
|
|
32
|
+
WHERE _ingestion_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY)
|
|
33
|
+
)
|
|
34
|
+
SELECT * EXCEPT(row_num)
|
|
35
|
+
FROM ranked
|
|
36
|
+
WHERE row_num = 1;
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### dbt Deduplication Model
|
|
40
|
+
|
|
41
|
+
```sql
|
|
42
|
+
-- models/staging/stg_zoho__contacts.sql
|
|
43
|
+
{{
|
|
44
|
+
config(
|
|
45
|
+
materialized='incremental',
|
|
46
|
+
unique_key='record_id',
|
|
47
|
+
partition_by={'field': '_ingestion_date', 'data_type': 'date'},
|
|
48
|
+
cluster_by=['owner_id', 'account_id']
|
|
49
|
+
)
|
|
50
|
+
}}
|
|
51
|
+
|
|
52
|
+
WITH source AS (
|
|
53
|
+
SELECT * FROM {{ source('bronze', 'zoho_contacts') }}
|
|
54
|
+
{% if is_incremental() %}
|
|
55
|
+
WHERE _ingestion_timestamp > (SELECT MAX(_ingestion_timestamp) FROM {{ this }})
|
|
56
|
+
{% endif %}
|
|
57
|
+
),
|
|
58
|
+
|
|
59
|
+
deduplicated AS (
|
|
60
|
+
SELECT
|
|
61
|
+
*,
|
|
62
|
+
ROW_NUMBER() OVER (
|
|
63
|
+
PARTITION BY record_id
|
|
64
|
+
ORDER BY modified_time DESC
|
|
65
|
+
) AS _row_num
|
|
66
|
+
FROM source
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
SELECT * EXCEPT(_row_num)
|
|
70
|
+
FROM deduplicated
|
|
71
|
+
WHERE _row_num = 1
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Data Quality Rules
|
|
75
|
+
|
|
76
|
+
### Null Handling
|
|
77
|
+
|
|
78
|
+
| Column Type | Strategy | Example |
|
|
79
|
+
|-------------|----------|---------|
|
|
80
|
+
| Required ID | Reject to quarantine | `record_id IS NULL` → quarantine |
|
|
81
|
+
| Email | Coalesce with fallback | `COALESCE(email, 'unknown@placeholder.com')` |
|
|
82
|
+
| Numeric | Default to 0 or NULL | `COALESCE(SAFE_CAST(amount AS FLOAT64), 0.0)` |
|
|
83
|
+
| Date | Flag as suspect | Keep NULL, add `_date_missing = TRUE` |
|
|
84
|
+
| Name | Trim and validate | `NULLIF(TRIM(name), '')` |
|
|
85
|
+
|
|
86
|
+
### Type Coercion
|
|
87
|
+
|
|
88
|
+
```sql
|
|
89
|
+
-- Safe type casting with error tracking
|
|
90
|
+
SELECT
|
|
91
|
+
record_id,
|
|
92
|
+
SAFE_CAST(amount AS FLOAT64) AS amount,
|
|
93
|
+
SAFE.PARSE_DATE('%Y-%m-%d', date_field) AS parsed_date,
|
|
94
|
+
SAFE.PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%S%Ez', timestamp_field) AS parsed_ts,
|
|
95
|
+
-- Track coercion failures
|
|
96
|
+
CASE WHEN SAFE_CAST(amount AS FLOAT64) IS NULL AND amount IS NOT NULL
|
|
97
|
+
THEN TRUE ELSE FALSE END AS _amount_coercion_failed
|
|
98
|
+
FROM `project.bronze.zoho_invoices`
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Range Validation
|
|
102
|
+
|
|
103
|
+
```sql
|
|
104
|
+
-- Business rule validations
|
|
105
|
+
SELECT
|
|
106
|
+
*,
|
|
107
|
+
CASE
|
|
108
|
+
WHEN amount < 0 THEN 'NEGATIVE_AMOUNT'
|
|
109
|
+
WHEN amount > 10000000 THEN 'SUSPICIOUSLY_LARGE'
|
|
110
|
+
WHEN created_date > CURRENT_TIMESTAMP() THEN 'FUTURE_DATE'
|
|
111
|
+
WHEN created_date < TIMESTAMP('2015-01-01') THEN 'DATE_TOO_OLD'
|
|
112
|
+
ELSE NULL
|
|
113
|
+
END AS _validation_flag
|
|
114
|
+
FROM deduplicated_source
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Schema Evolution Handling
|
|
118
|
+
|
|
119
|
+
> **WARNING**: Schema changes from Zoho can break pipelines. Always use SAFE_CAST and handle missing columns gracefully.
|
|
120
|
+
|
|
121
|
+
```sql
|
|
122
|
+
-- Handle schema evolution with column existence checks (dbt macro)
|
|
123
|
+
{% macro safe_column(column_name, default_value='NULL', cast_type=none) %}
|
|
124
|
+
{% if column_name in adapter.get_columns_in_relation(source('bronze', model_name)) | map(attribute='name') %}
|
|
125
|
+
{% if cast_type %}
|
|
126
|
+
SAFE_CAST({{ column_name }} AS {{ cast_type }})
|
|
127
|
+
{% else %}
|
|
128
|
+
{{ column_name }}
|
|
129
|
+
{% endif %}
|
|
130
|
+
{% else %}
|
|
131
|
+
{{ default_value }} AS {{ column_name }}
|
|
132
|
+
{% endif %}
|
|
133
|
+
{% endmacro %}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## SCD Type 2 Implementation
|
|
137
|
+
|
|
138
|
+
```sql
|
|
139
|
+
-- models/intermediate/int_zoho__contacts_scd2.sql
|
|
140
|
+
{{
|
|
141
|
+
config(
|
|
142
|
+
materialized='incremental',
|
|
143
|
+
unique_key='surrogate_key',
|
|
144
|
+
partition_by={'field': 'effective_from', 'data_type': 'timestamp', 'granularity': 'day'}
|
|
145
|
+
)
|
|
146
|
+
}}
|
|
147
|
+
|
|
148
|
+
WITH current_records AS (
|
|
149
|
+
SELECT * FROM {{ ref('stg_zoho__contacts') }}
|
|
150
|
+
),
|
|
151
|
+
|
|
152
|
+
{% if is_incremental() %}
|
|
153
|
+
existing AS (
|
|
154
|
+
SELECT * FROM {{ this }}
|
|
155
|
+
WHERE is_current = TRUE
|
|
156
|
+
),
|
|
157
|
+
|
|
158
|
+
changes AS (
|
|
159
|
+
SELECT
|
|
160
|
+
c.*,
|
|
161
|
+
e.surrogate_key AS existing_key
|
|
162
|
+
FROM current_records c
|
|
163
|
+
LEFT JOIN existing e ON c.record_id = e.record_id
|
|
164
|
+
WHERE e.record_id IS NULL -- new record
|
|
165
|
+
OR MD5(CONCAT(
|
|
166
|
+
COALESCE(c.email,''), '|',
|
|
167
|
+
COALESCE(c.phone,''), '|',
|
|
168
|
+
COALESCE(c.owner_id,''), '|',
|
|
169
|
+
COALESCE(CAST(c.account_id AS STRING),'')
|
|
170
|
+
)) != e.hash_diff -- changed record
|
|
171
|
+
)
|
|
172
|
+
{% endif %}
|
|
173
|
+
|
|
174
|
+
SELECT
|
|
175
|
+
{{ dbt_utils.generate_surrogate_key(['record_id', 'modified_time']) }} AS surrogate_key,
|
|
176
|
+
record_id,
|
|
177
|
+
email, phone, owner_id, account_id,
|
|
178
|
+
MD5(CONCAT(
|
|
179
|
+
COALESCE(email,''), '|', COALESCE(phone,''), '|',
|
|
180
|
+
COALESCE(owner_id,''), '|', COALESCE(CAST(account_id AS STRING),'')
|
|
181
|
+
)) AS hash_diff,
|
|
182
|
+
modified_time AS effective_from,
|
|
183
|
+
CAST(NULL AS TIMESTAMP) AS effective_to,
|
|
184
|
+
TRUE AS is_current,
|
|
185
|
+
CURRENT_TIMESTAMP() AS _loaded_at
|
|
186
|
+
FROM {% if is_incremental() %} changes {% else %} current_records {% endif %}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Incremental Processing
|
|
190
|
+
|
|
191
|
+
```yaml
|
|
192
|
+
# dbt_project.yml - Silver layer incremental config
|
|
193
|
+
models:
|
|
194
|
+
cloudstream:
|
|
195
|
+
staging:
|
|
196
|
+
+materialized: incremental
|
|
197
|
+
+incremental_strategy: merge
|
|
198
|
+
+on_schema_change: append_new_columns
|
|
199
|
+
+partition_by:
|
|
200
|
+
field: _ingestion_date
|
|
201
|
+
data_type: date
|
|
202
|
+
granularity: day
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## Partitioning Strategy
|
|
206
|
+
|
|
207
|
+
All silver tables partition by `_ingestion_date` for efficient incremental loads:
|
|
208
|
+
|
|
209
|
+
```sql
|
|
210
|
+
CREATE TABLE `project.silver.zoho_deals` (
|
|
211
|
+
record_id STRING NOT NULL,
|
|
212
|
+
deal_name STRING,
|
|
213
|
+
amount FLOAT64,
|
|
214
|
+
stage STRING,
|
|
215
|
+
close_date DATE,
|
|
216
|
+
owner_id STRING,
|
|
217
|
+
_ingestion_date DATE NOT NULL,
|
|
218
|
+
_ingestion_timestamp TIMESTAMP NOT NULL,
|
|
219
|
+
_source_system STRING DEFAULT 'zoho_crm',
|
|
220
|
+
_batch_id STRING
|
|
221
|
+
)
|
|
222
|
+
PARTITION BY _ingestion_date
|
|
223
|
+
CLUSTER BY owner_id, stage
|
|
224
|
+
OPTIONS (
|
|
225
|
+
require_partition_filter = TRUE,
|
|
226
|
+
partition_expiration_days = 730 -- 2 year retention at silver
|
|
227
|
+
);
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## Error Quarantine Tables
|
|
231
|
+
|
|
232
|
+
Records failing quality checks route to quarantine for investigation:
|
|
233
|
+
|
|
234
|
+
```sql
|
|
235
|
+
CREATE TABLE `project.silver._quarantine` (
|
|
236
|
+
source_table STRING,
|
|
237
|
+
record_id STRING,
|
|
238
|
+
error_type STRING, -- 'NULL_KEY', 'TYPE_COERCION', 'RANGE_VIOLATION', 'DUPLICATE'
|
|
239
|
+
error_detail STRING,
|
|
240
|
+
raw_record STRING, -- JSON serialized original
|
|
241
|
+
quarantined_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP(),
|
|
242
|
+
resolved_at TIMESTAMP,
|
|
243
|
+
resolution STRING -- 'FIXED', 'IGNORED', 'SOURCE_CORRECTED'
|
|
244
|
+
)
|
|
245
|
+
PARTITION BY DATE(quarantined_at);
|
|
246
|
+
|
|
247
|
+
-- Route bad records
|
|
248
|
+
INSERT INTO `project.silver._quarantine`
|
|
249
|
+
SELECT
|
|
250
|
+
'zoho_invoices' AS source_table,
|
|
251
|
+
record_id,
|
|
252
|
+
'NULL_KEY' AS error_type,
|
|
253
|
+
'record_id is NULL' AS error_detail,
|
|
254
|
+
TO_JSON_STRING(t) AS raw_record,
|
|
255
|
+
CURRENT_TIMESTAMP(),
|
|
256
|
+
NULL, NULL
|
|
257
|
+
FROM `project.bronze.zoho_invoices` t
|
|
258
|
+
WHERE record_id IS NULL;
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## Quality Metrics Tracking
|
|
262
|
+
|
|
263
|
+
```sql
|
|
264
|
+
-- Track quality metrics per load
|
|
265
|
+
CREATE TABLE `project.silver._quality_metrics` (
|
|
266
|
+
table_name STRING,
|
|
267
|
+
metric_date DATE,
|
|
268
|
+
batch_id STRING,
|
|
269
|
+
total_records INT64,
|
|
270
|
+
valid_records INT64,
|
|
271
|
+
quarantined_records INT64,
|
|
272
|
+
duplicate_records INT64,
|
|
273
|
+
null_key_count INT64,
|
|
274
|
+
type_coercion_failures INT64,
|
|
275
|
+
processing_time_seconds FLOAT64,
|
|
276
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP()
|
|
277
|
+
)
|
|
278
|
+
PARTITION BY metric_date;
|
|
279
|
+
|
|
280
|
+
-- Example quality check query
|
|
281
|
+
SELECT
|
|
282
|
+
table_name,
|
|
283
|
+
metric_date,
|
|
284
|
+
ROUND(valid_records / NULLIF(total_records, 0) * 100, 2) AS quality_percentage,
|
|
285
|
+
quarantined_records,
|
|
286
|
+
CASE
|
|
287
|
+
WHEN valid_records / NULLIF(total_records, 0) < 0.95 THEN 'ALERT'
|
|
288
|
+
WHEN valid_records / NULLIF(total_records, 0) < 0.99 THEN 'WARNING'
|
|
289
|
+
ELSE 'HEALTHY'
|
|
290
|
+
END AS health_status
|
|
291
|
+
FROM `project.silver._quality_metrics`
|
|
292
|
+
WHERE metric_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)
|
|
293
|
+
ORDER BY metric_date DESC;
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
## Best Practices
|
|
297
|
+
|
|
298
|
+
1. **Always partition silver tables by `_ingestion_date`** - enables efficient incremental loads
|
|
299
|
+
2. **Use `SAFE_CAST` everywhere** - never let a type error crash a pipeline
|
|
300
|
+
3. **Quarantine, do not drop** - bad records may contain fixable issues
|
|
301
|
+
4. **Track quality over time** - degrading quality signals source system problems
|
|
302
|
+
5. **Cluster by frequently-filtered columns** - owner_id, account_id, stage
|
|
303
|
+
6. **Set `require_partition_filter = TRUE`** - prevents accidental full scans
|
|
304
|
+
|
|
305
|
+
> **COST WARNING**: Silver tables with `require_partition_filter = FALSE` can cause expensive full-table scans. Always enforce partition filters in production.
|