@cloudstreamsoftware/claude-tools 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -37
- package/agents/INDEX.md +183 -0
- package/agents/architect.md +247 -0
- package/agents/build-error-resolver.md +555 -0
- package/agents/catalyst-deployer.md +132 -0
- package/agents/code-reviewer.md +121 -0
- package/agents/compliance-auditor.md +148 -0
- package/agents/creator-architect.md +395 -0
- package/agents/deluge-reviewer.md +98 -0
- package/agents/doc-updater.md +471 -0
- package/agents/e2e-runner.md +711 -0
- package/agents/planner.md +122 -0
- package/agents/refactor-cleaner.md +309 -0
- package/agents/security-reviewer.md +582 -0
- package/agents/tdd-guide.md +302 -0
- package/bin/cloudstream-setup.js +16 -6
- package/config/versions.json +63 -0
- package/dist/hooks/hooks.json +209 -0
- package/dist/index.js +47 -0
- package/dist/lib/asset-value.js +609 -0
- package/dist/lib/client-manager.js +300 -0
- package/dist/lib/command-matcher.js +242 -0
- package/dist/lib/cross-session-patterns.js +754 -0
- package/dist/lib/intent-classifier.js +1075 -0
- package/dist/lib/package-manager.js +374 -0
- package/dist/lib/recommendation-engine.js +597 -0
- package/dist/lib/session-memory.js +489 -0
- package/dist/lib/skill-effectiveness.js +486 -0
- package/dist/lib/skill-matcher.js +595 -0
- package/dist/lib/tutorial-metrics.js +242 -0
- package/dist/lib/tutorial-progress.js +209 -0
- package/dist/lib/tutorial-renderer.js +431 -0
- package/dist/lib/utils.js +380 -0
- package/dist/lib/verify-formatter.js +143 -0
- package/dist/lib/workflow-state.js +249 -0
- package/hooks/hooks.json +209 -0
- package/package.json +5 -1
- package/scripts/aggregate-sessions.js +290 -0
- package/scripts/branch-name-validator.js +291 -0
- package/scripts/build.js +101 -0
- package/scripts/commands/client-switch.js +231 -0
- package/scripts/deprecate-skill.js +610 -0
- package/scripts/diagnose.js +324 -0
- package/scripts/doc-freshness.js +168 -0
- package/scripts/generate-weekly-digest.js +393 -0
- package/scripts/health-check.js +270 -0
- package/scripts/hooks/credential-check.js +101 -0
- package/scripts/hooks/evaluate-session.js +81 -0
- package/scripts/hooks/pre-compact.js +66 -0
- package/scripts/hooks/prompt-analyzer.js +276 -0
- package/scripts/hooks/prompt-router.js +422 -0
- package/scripts/hooks/quality-gate-enforcer.js +371 -0
- package/scripts/hooks/session-end.js +156 -0
- package/scripts/hooks/session-start.js +195 -0
- package/scripts/hooks/skill-injector.js +333 -0
- package/scripts/hooks/suggest-compact.js +58 -0
- package/scripts/lib/asset-value.js +609 -0
- package/scripts/lib/client-manager.js +300 -0
- package/scripts/lib/command-matcher.js +242 -0
- package/scripts/lib/cross-session-patterns.js +754 -0
- package/scripts/lib/intent-classifier.js +1075 -0
- package/scripts/lib/package-manager.js +374 -0
- package/scripts/lib/recommendation-engine.js +597 -0
- package/scripts/lib/session-memory.js +489 -0
- package/scripts/lib/skill-effectiveness.js +486 -0
- package/scripts/lib/skill-matcher.js +595 -0
- package/scripts/lib/tutorial-metrics.js +242 -0
- package/scripts/lib/tutorial-progress.js +209 -0
- package/scripts/lib/tutorial-renderer.js +431 -0
- package/scripts/lib/utils.js +380 -0
- package/scripts/lib/verify-formatter.js +143 -0
- package/scripts/lib/workflow-state.js +249 -0
- package/scripts/onboard.js +363 -0
- package/scripts/quarterly-report.js +692 -0
- package/scripts/setup-package-manager.js +204 -0
- package/scripts/sync-upstream.js +391 -0
- package/scripts/test.js +108 -0
- package/scripts/tutorial-runner.js +351 -0
- package/scripts/validate-all.js +201 -0
- package/scripts/verifiers/agents.js +245 -0
- package/scripts/verifiers/config.js +186 -0
- package/scripts/verifiers/environment.js +123 -0
- package/scripts/verifiers/hooks.js +188 -0
- package/scripts/verifiers/index.js +38 -0
- package/scripts/verifiers/persistence.js +140 -0
- package/scripts/verifiers/plugin.js +215 -0
- package/scripts/verifiers/skills.js +209 -0
- package/scripts/verify-setup.js +164 -0
- package/skills/INDEX.md +157 -0
- package/skills/backend-patterns/SKILL.md +586 -0
- package/skills/backend-patterns/catalyst-patterns.md +128 -0
- package/skills/bigquery-patterns/SKILL.md +27 -0
- package/skills/bigquery-patterns/performance-optimization.md +518 -0
- package/skills/bigquery-patterns/query-patterns.md +372 -0
- package/skills/bigquery-patterns/schema-design.md +78 -0
- package/skills/cloudstream-project-template/SKILL.md +20 -0
- package/skills/cloudstream-project-template/structure.md +65 -0
- package/skills/coding-standards/SKILL.md +524 -0
- package/skills/coding-standards/deluge-standards.md +83 -0
- package/skills/compliance-patterns/SKILL.md +28 -0
- package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
- package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
- package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
- package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
- package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
- package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
- package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
- package/skills/compliance-patterns/soc2/access-controls.md +344 -0
- package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
- package/skills/compliance-patterns/soc2/change-management.md +403 -0
- package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
- package/skills/consultancy-workflows/SKILL.md +19 -0
- package/skills/consultancy-workflows/client-isolation.md +21 -0
- package/skills/consultancy-workflows/documentation-automation.md +454 -0
- package/skills/consultancy-workflows/handoff-procedures.md +257 -0
- package/skills/consultancy-workflows/knowledge-capture.md +513 -0
- package/skills/consultancy-workflows/time-tracking.md +26 -0
- package/skills/continuous-learning/SKILL.md +84 -0
- package/skills/continuous-learning/config.json +18 -0
- package/skills/continuous-learning/evaluate-session.sh +60 -0
- package/skills/continuous-learning-v2/SKILL.md +126 -0
- package/skills/continuous-learning-v2/config.json +61 -0
- package/skills/frontend-patterns/SKILL.md +635 -0
- package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
- package/skills/gcp-data-engineering/SKILL.md +36 -0
- package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
- package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
- package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
- package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
- package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
- package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
- package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
- package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
- package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
- package/skills/security-review/SKILL.md +498 -0
- package/skills/security-review/compliance-checklist.md +53 -0
- package/skills/strategic-compact/SKILL.md +67 -0
- package/skills/tdd-workflow/SKILL.md +413 -0
- package/skills/tdd-workflow/zoho-testing.md +124 -0
- package/skills/tutorial/SKILL.md +249 -0
- package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
- package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
- package/skills/tutorial/lessons/01-basics.md +81 -0
- package/skills/tutorial/lessons/02-training.md +86 -0
- package/skills/tutorial/lessons/03-commands.md +109 -0
- package/skills/tutorial/lessons/04-workflows.md +115 -0
- package/skills/tutorial/lessons/05-compliance.md +116 -0
- package/skills/tutorial/lessons/06-zoho.md +121 -0
- package/skills/tutorial/lessons/07-hooks-system.md +277 -0
- package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
- package/skills/tutorial/lessons/09-client-management.md +215 -0
- package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
- package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
- package/skills/tutorial/lessons/12-rules-system.md +326 -0
- package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
- package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
- package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
- package/skills/tutorial/tracks/accelerated/README.md +134 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
- package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
- package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
- package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
- package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
- package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
- package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
- package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
- package/skills/zoho-patterns/CHANGELOG.md +108 -0
- package/skills/zoho-patterns/SKILL.md +446 -0
- package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
- package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
- package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
- package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
- package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
- package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
- package/skills/zoho-patterns/creator/form-design.md +304 -0
- package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
- package/skills/zoho-patterns/creator/widget-integration.md +306 -0
- package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
- package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
- package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
- package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
- package/skills/zoho-patterns/deluge/error-handling.md +423 -0
- package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
- package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
- package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
- package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
- package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
# Real-Time vs Batch: Decision Framework
|
|
2
|
+
|
|
3
|
+
> When to use real-time streaming, near-real-time polling, or batch extraction for Zoho-to-GCP data flows at CloudStream.
|
|
4
|
+
|
|
5
|
+
## Decision Framework
|
|
6
|
+
|
|
7
|
+
### Primary Decision Criteria
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
Is the data needed within seconds?
|
|
11
|
+
YES → Real-time (Webhooks → PubSub → Dataflow → BigQuery)
|
|
12
|
+
NO →
|
|
13
|
+
Is the data needed within 15 minutes?
|
|
14
|
+
YES → Near-real-time (Catalyst Cron every 5 min)
|
|
15
|
+
NO →
|
|
16
|
+
Is the data volume > 50K records/day?
|
|
17
|
+
YES → Batch with Dataflow (nightly bulk extract)
|
|
18
|
+
NO → Batch with CData Sync (daily scheduled)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Detailed Decision Matrix
|
|
22
|
+
|
|
23
|
+
| Factor | Real-Time | Near-Real-Time | Batch |
|
|
24
|
+
|--------|-----------|----------------|-------|
|
|
25
|
+
| **Latency** | < 30 seconds | 5-15 minutes | 1-24 hours |
|
|
26
|
+
| **Complexity** | High | Medium | Low |
|
|
27
|
+
| **Monthly Cost** | $200-500+ | $50-150 | $20-80 |
|
|
28
|
+
| **API Credits/Day** | Variable (webhook-driven) | ~2,000-5,000 | ~500-2,000 |
|
|
29
|
+
| **Reliability** | Requires DLQ, retry logic | Simpler retry | Very reliable |
|
|
30
|
+
| **Data Volume** | Any (event-driven) | Low-medium (API pagination) | Any (bulk API) |
|
|
31
|
+
| **Schema Changes** | Must handle dynamically | Easier to adapt | Easiest to adapt |
|
|
32
|
+
| **Monitoring** | Complex (streaming metrics) | Moderate | Simple (job success/fail) |
|
|
33
|
+
|
|
34
|
+
## Real-Time: Zoho Webhooks to PubSub to Dataflow to BigQuery
|
|
35
|
+
|
|
36
|
+
### Architecture
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
Zoho CRM/Books → Webhook → Cloud Function → PubSub → Dataflow → BigQuery (Bronze)
|
|
40
|
+
│
|
|
41
|
+
dbt (Silver/Gold)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### When to Use Real-Time
|
|
45
|
+
|
|
46
|
+
- Deal stage changes need instant dashboard updates
|
|
47
|
+
- Invoice payments require immediate AR reconciliation
|
|
48
|
+
- Customer support ticket creation triggers automation
|
|
49
|
+
- Multi-system sync (Zoho ↔ other platforms) requires consistency
|
|
50
|
+
|
|
51
|
+
### Implementation
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
# Streaming pipeline: PubSub → Bronze → Silver
|
|
55
|
+
# See dataflow/pipeline-patterns.md for full implementation
|
|
56
|
+
|
|
57
|
+
# Key configuration for real-time Zoho ingestion:
|
|
58
|
+
pipeline_options = {
|
|
59
|
+
'streaming': True,
|
|
60
|
+
'enable_streaming_engine': True,
|
|
61
|
+
'max_num_workers': 3, # Low worker count for webhook volume
|
|
62
|
+
'autoscaling_algorithm': 'THROUGHPUT_BASED',
|
|
63
|
+
'min_num_workers': 1, # Scale to 1 during off-hours
|
|
64
|
+
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Cost Breakdown (Real-Time)
|
|
68
|
+
|
|
69
|
+
| Component | Monthly Cost (Estimate) |
|
|
70
|
+
|-----------|------------------------|
|
|
71
|
+
| Cloud Function (webhook receiver) | $5-10 |
|
|
72
|
+
| PubSub (message throughput) | $10-20 |
|
|
73
|
+
| Dataflow (1-3 workers, 24/7) | $150-450 |
|
|
74
|
+
| BigQuery (streaming inserts) | $10-30 |
|
|
75
|
+
| **Total** | **$175-510** |
|
|
76
|
+
|
|
77
|
+
> **COST WARNING**: Dataflow streaming jobs run 24/7. A single n1-standard-2 worker costs ~$50/month. Even with autoscaling to 1 worker off-peak, this is the most expensive option.
|
|
78
|
+
|
|
79
|
+
## Near-Real-Time: Catalyst Cron Every 5 Minutes
|
|
80
|
+
|
|
81
|
+
### Architecture
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
Catalyst Cron (every 5 min) → Zoho API → PubSub → Cloud Function → BigQuery (Bronze)
|
|
85
|
+
or
|
|
86
|
+
Catalyst Cron (every 5 min) → Zoho API → Direct BigQuery Insert (Bronze)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### When to Use Near-Real-Time
|
|
90
|
+
|
|
91
|
+
- Dashboard data needs to be "fresh enough" (< 15 min)
|
|
92
|
+
- API credit budget allows frequent polling
|
|
93
|
+
- Data volume per poll is manageable (< 500 records)
|
|
94
|
+
- Real-time is overkill but daily is too stale
|
|
95
|
+
|
|
96
|
+
### Implementation
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
# Catalyst Cron configuration
|
|
100
|
+
# See zoho-to-gcp/data-extraction.md for full Catalyst implementation
|
|
101
|
+
|
|
102
|
+
# Key considerations:
|
|
103
|
+
# - 5-minute interval = 288 API calls/day per module
|
|
104
|
+
# - With 4 modules (Deals, Contacts, Accounts, Invoices) = 1,152 calls/day
|
|
105
|
+
# - Each call may paginate: 500 records / 200 per page = 3 calls
|
|
106
|
+
# - Worst case: ~3,500 API credits/day
|
|
107
|
+
|
|
108
|
+
# Optimization: Only poll high-priority modules frequently
|
|
109
|
+
POLL_SCHEDULES = {
|
|
110
|
+
'Deals': 5, # Every 5 minutes (critical for pipeline visibility)
|
|
111
|
+
'Contacts': 15, # Every 15 minutes
|
|
112
|
+
'Accounts': 30, # Every 30 minutes
|
|
113
|
+
'Invoices': 60, # Every hour (less frequent changes)
|
|
114
|
+
'Payments': 15, # Every 15 minutes (important for AR)
|
|
115
|
+
'Tasks': 30, # Every 30 minutes
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Cost Breakdown (Near-Real-Time)
|
|
120
|
+
|
|
121
|
+
| Component | Monthly Cost (Estimate) |
|
|
122
|
+
|-----------|------------------------|
|
|
123
|
+
| Catalyst Cron (Zoho plan included) | $0 (included in Zoho plan) |
|
|
124
|
+
| PubSub (lower throughput) | $2-5 |
|
|
125
|
+
| Cloud Function (trigger loads) | $5-10 |
|
|
126
|
+
| BigQuery (batch inserts) | $5-10 |
|
|
127
|
+
| Zoho API credits (3,500/day) | Included in plan |
|
|
128
|
+
| **Total** | **$12-25** |
|
|
129
|
+
|
|
130
|
+
## Batch: Daily CData Sync or Nightly Catalyst Cron
|
|
131
|
+
|
|
132
|
+
### Architecture
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
Option A: CData Sync (scheduled) → BigQuery (Bronze) → dbt → Silver/Gold
|
|
136
|
+
Option B: Catalyst Cron (nightly) → GCS (landing) → Cloud Function → BigQuery (Bronze)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### When to Use Batch
|
|
140
|
+
|
|
141
|
+
- Historical reporting (yesterday's numbers are fine)
|
|
142
|
+
- Large data volumes (full table syncs)
|
|
143
|
+
- Cost sensitivity is high
|
|
144
|
+
- Data does not drive real-time decisions
|
|
145
|
+
- Initial data load / backfill
|
|
146
|
+
|
|
147
|
+
### Implementation: CData Sync (Simplest)
|
|
148
|
+
|
|
149
|
+
```yaml
|
|
150
|
+
# CData Sync job configuration
|
|
151
|
+
sync_job:
|
|
152
|
+
name: nightly_full_sync
|
|
153
|
+
schedule: "0 2 * * *" # 2 AM nightly
|
|
154
|
+
source: zoho_crm
|
|
155
|
+
tables:
|
|
156
|
+
- Deals
|
|
157
|
+
- Contacts
|
|
158
|
+
- Accounts
|
|
159
|
+
- Invoices
|
|
160
|
+
- Payments
|
|
161
|
+
- Products
|
|
162
|
+
mode: incremental
|
|
163
|
+
incremental_column: Modified_Time
|
|
164
|
+
batch_size: 5000
|
|
165
|
+
retry_count: 3
|
|
166
|
+
notification_email: data-team@cloudstreamsoftware.com
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Implementation: Nightly Catalyst Cron (More Control)
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
# Catalyst Cron: Nightly full extraction
|
|
173
|
+
def nightly_extract(context, cronDetails):
|
|
174
|
+
"""Run at 2 AM - extract all modified records from past 25 hours."""
|
|
175
|
+
# 25 hours ensures overlap and no gaps
|
|
176
|
+
since = (datetime.utcnow() - timedelta(hours=25)).isoformat()
|
|
177
|
+
|
|
178
|
+
for module in ['Deals', 'Contacts', 'Accounts', 'Invoices', 'Payments']:
|
|
179
|
+
records = fetch_all_modified(module, since)
|
|
180
|
+
upload_to_gcs(records, module) # Land in GCS, Cloud Function loads to BQ
|
|
181
|
+
|
|
182
|
+
print(f"Nightly extract complete")
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Cost Breakdown (Batch)
|
|
186
|
+
|
|
187
|
+
| Component | Monthly Cost (Estimate) |
|
|
188
|
+
|-----------|------------------------|
|
|
189
|
+
| CData Sync license | $50-200 (varies by plan) |
|
|
190
|
+
| OR Catalyst Cron | $0 (included) |
|
|
191
|
+
| BigQuery (load jobs, free) | $0 |
|
|
192
|
+
| BigQuery (storage) | $5-20 |
|
|
193
|
+
| GCS (landing zone) | $1-5 |
|
|
194
|
+
| **Total (CData)** | **$56-225** |
|
|
195
|
+
| **Total (Catalyst)** | **$6-25** |
|
|
196
|
+
|
|
197
|
+
## Hybrid Patterns
|
|
198
|
+
|
|
199
|
+
### Pattern 1: Real-Time for Critical + Batch for Everything
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
Critical Events (Deal Won, Payment Received):
|
|
203
|
+
Zoho Webhook → PubSub → Streaming Dataflow → Bronze
|
|
204
|
+
|
|
205
|
+
Everything Else (Contact updates, Task changes):
|
|
206
|
+
Nightly CData Sync → Bronze
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Pattern 2: Near-Real-Time Primary + Weekly Full Refresh
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
Daily Operations:
|
|
213
|
+
Catalyst Cron (every 15 min) → Incremental to Bronze
|
|
214
|
+
|
|
215
|
+
Weekly Reconciliation:
|
|
216
|
+
Sunday 2 AM → Full table refresh → Overwrite Bronze
|
|
217
|
+
Catches any missed incremental updates
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Pattern 3: Stream Events + Batch Dimensions
|
|
221
|
+
|
|
222
|
+
```
|
|
223
|
+
Fact/Event Data (Deals, Invoices, Payments):
|
|
224
|
+
Real-time or Near-real-time → Bronze → Silver facts
|
|
225
|
+
|
|
226
|
+
Dimension Data (Contacts, Accounts, Products):
|
|
227
|
+
Daily batch → Bronze → Silver dimensions
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## Cost Comparison Table (Monthly)
|
|
231
|
+
|
|
232
|
+
| Scenario | Real-Time | Near-RT | Batch (CData) | Batch (Catalyst) |
|
|
233
|
+
|----------|-----------|---------|----------------|-------------------|
|
|
234
|
+
| 10K records/day | $200 | $15 | $70 | $10 |
|
|
235
|
+
| 50K records/day | $350 | $40 | $100 | $20 |
|
|
236
|
+
| 200K records/day | $500 | $100 | $200 | $50 |
|
|
237
|
+
| 1M records/day | $700 | N/A (API limits) | $300 | N/A (API limits) |
|
|
238
|
+
|
|
239
|
+
> **NOTE**: Near-real-time and Catalyst batch hit API credit limits for very high volumes (>200K records/day). Use CData Sync bulk connector or Dataflow for these scenarios.
|
|
240
|
+
|
|
241
|
+
## When to Upgrade from Batch to Streaming
|
|
242
|
+
|
|
243
|
+
### Upgrade Signals
|
|
244
|
+
|
|
245
|
+
1. **Business asks "why isn't this updated yet?"** more than twice a week
|
|
246
|
+
2. **Revenue decisions** depend on same-day deal data
|
|
247
|
+
3. **Customer-facing systems** consume the data (not just internal reports)
|
|
248
|
+
4. **Compliance requirements** mandate near-real-time audit trails
|
|
249
|
+
5. **Multi-system orchestration** requires event-driven triggers
|
|
250
|
+
|
|
251
|
+
### Migration Path
|
|
252
|
+
|
|
253
|
+
```
|
|
254
|
+
Phase 1: Daily batch (CData Sync)
|
|
255
|
+
└── Validate data quality, build silver/gold layers
|
|
256
|
+
|
|
257
|
+
Phase 2: Near-real-time (Catalyst Cron every 15 min)
|
|
258
|
+
└── Reduce latency for critical modules only
|
|
259
|
+
|
|
260
|
+
Phase 3: Real-time for critical events
|
|
261
|
+
└── Add webhooks for Deal Stage Changes, Payments
|
|
262
|
+
└── Keep batch for dimension tables
|
|
263
|
+
|
|
264
|
+
Phase 4: Full real-time (if needed)
|
|
265
|
+
└── All modules via webhooks + streaming Dataflow
|
|
266
|
+
└── Keep weekly full refresh for reconciliation
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## Monitoring Latency SLAs
|
|
270
|
+
|
|
271
|
+
```sql
|
|
272
|
+
-- Track data freshness SLA compliance
|
|
273
|
+
CREATE TABLE `project.ops.freshness_sla` (
|
|
274
|
+
module STRING,
|
|
275
|
+
extraction_method STRING, -- 'webhook', 'catalyst_cron', 'cdata_sync'
|
|
276
|
+
target_latency_minutes INT64,
|
|
277
|
+
actual_latency_minutes FLOAT64,
|
|
278
|
+
measured_at TIMESTAMP,
|
|
279
|
+
sla_met BOOL
|
|
280
|
+
)
|
|
281
|
+
PARTITION BY DATE(measured_at);
|
|
282
|
+
|
|
283
|
+
-- Freshness check query (run every 15 minutes via Cloud Scheduler)
|
|
284
|
+
INSERT INTO `project.ops.freshness_sla`
|
|
285
|
+
SELECT
|
|
286
|
+
'Deals' AS module,
|
|
287
|
+
'catalyst_cron' AS extraction_method,
|
|
288
|
+
15 AS target_latency_minutes,
|
|
289
|
+
TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), MAX(_ingestion_timestamp), MINUTE) AS actual_latency_minutes,
|
|
290
|
+
CURRENT_TIMESTAMP() AS measured_at,
|
|
291
|
+
TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), MAX(_ingestion_timestamp), MINUTE) <= 15 AS sla_met
|
|
292
|
+
FROM `project.bronze.zoho_deals`;
|
|
293
|
+
|
|
294
|
+
-- SLA compliance dashboard
|
|
295
|
+
SELECT
|
|
296
|
+
module,
|
|
297
|
+
extraction_method,
|
|
298
|
+
COUNT(*) AS total_checks,
|
|
299
|
+
COUNTIF(sla_met) AS met_count,
|
|
300
|
+
ROUND(COUNTIF(sla_met) / COUNT(*) * 100, 1) AS sla_compliance_pct,
|
|
301
|
+
ROUND(AVG(actual_latency_minutes), 1) AS avg_latency_minutes,
|
|
302
|
+
MAX(actual_latency_minutes) AS max_latency_minutes
|
|
303
|
+
FROM `project.ops.freshness_sla`
|
|
304
|
+
WHERE measured_at >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
|
|
305
|
+
GROUP BY 1, 2
|
|
306
|
+
ORDER BY sla_compliance_pct ASC;
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
```yaml
|
|
310
|
+
# Alert when SLA is violated
|
|
311
|
+
resource "google_monitoring_alert_policy" "data_freshness_sla" {
|
|
312
|
+
display_name = "Data Freshness SLA Violation"
|
|
313
|
+
conditions {
|
|
314
|
+
display_name = "Deals data stale > 20 minutes"
|
|
315
|
+
condition_threshold {
|
|
316
|
+
filter = <<-EOT
|
|
317
|
+
metric.type="custom.googleapis.com/data_freshness/deals_latency_minutes"
|
|
318
|
+
AND resource.type="global"
|
|
319
|
+
EOT
|
|
320
|
+
comparison = "COMPARISON_GT"
|
|
321
|
+
threshold_value = 20
|
|
322
|
+
duration = "300s"
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
notification_channels = [google_monitoring_notification_channel.slack.id]
|
|
326
|
+
}
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
## Best Practices
|
|
330
|
+
|
|
331
|
+
1. **Start with batch, upgrade incrementally** - Avoid over-engineering on day one
|
|
332
|
+
2. **Use hybrid patterns** - Not everything needs real-time; mix methods by priority
|
|
333
|
+
3. **Monitor freshness, not just success** - A successful job that is 6 hours late is still a problem
|
|
334
|
+
4. **Budget API credits** - Near-real-time can exhaust daily limits if not controlled
|
|
335
|
+
5. **Keep weekly full refresh** regardless of primary method - catches edge cases
|
|
336
|
+
6. **Separate critical from nice-to-have** - Only stream what truly needs sub-minute latency
|
|
337
|
+
7. **Document SLA expectations** - Written agreement on what "fresh" means per dataset
|