@cloudstreamsoftware/claude-tools 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -37
- package/agents/INDEX.md +183 -0
- package/agents/architect.md +247 -0
- package/agents/build-error-resolver.md +555 -0
- package/agents/catalyst-deployer.md +132 -0
- package/agents/code-reviewer.md +121 -0
- package/agents/compliance-auditor.md +148 -0
- package/agents/creator-architect.md +395 -0
- package/agents/deluge-reviewer.md +98 -0
- package/agents/doc-updater.md +471 -0
- package/agents/e2e-runner.md +711 -0
- package/agents/planner.md +122 -0
- package/agents/refactor-cleaner.md +309 -0
- package/agents/security-reviewer.md +582 -0
- package/agents/tdd-guide.md +302 -0
- package/bin/cloudstream-setup.js +16 -6
- package/config/versions.json +63 -0
- package/dist/hooks/hooks.json +209 -0
- package/dist/index.js +47 -0
- package/dist/lib/asset-value.js +609 -0
- package/dist/lib/client-manager.js +300 -0
- package/dist/lib/command-matcher.js +242 -0
- package/dist/lib/cross-session-patterns.js +754 -0
- package/dist/lib/intent-classifier.js +1075 -0
- package/dist/lib/package-manager.js +374 -0
- package/dist/lib/recommendation-engine.js +597 -0
- package/dist/lib/session-memory.js +489 -0
- package/dist/lib/skill-effectiveness.js +486 -0
- package/dist/lib/skill-matcher.js +595 -0
- package/dist/lib/tutorial-metrics.js +242 -0
- package/dist/lib/tutorial-progress.js +209 -0
- package/dist/lib/tutorial-renderer.js +431 -0
- package/dist/lib/utils.js +380 -0
- package/dist/lib/verify-formatter.js +143 -0
- package/dist/lib/workflow-state.js +249 -0
- package/hooks/hooks.json +209 -0
- package/package.json +5 -1
- package/scripts/aggregate-sessions.js +290 -0
- package/scripts/branch-name-validator.js +291 -0
- package/scripts/build.js +101 -0
- package/scripts/commands/client-switch.js +231 -0
- package/scripts/deprecate-skill.js +610 -0
- package/scripts/diagnose.js +324 -0
- package/scripts/doc-freshness.js +168 -0
- package/scripts/generate-weekly-digest.js +393 -0
- package/scripts/health-check.js +270 -0
- package/scripts/hooks/credential-check.js +101 -0
- package/scripts/hooks/evaluate-session.js +81 -0
- package/scripts/hooks/pre-compact.js +66 -0
- package/scripts/hooks/prompt-analyzer.js +276 -0
- package/scripts/hooks/prompt-router.js +422 -0
- package/scripts/hooks/quality-gate-enforcer.js +371 -0
- package/scripts/hooks/session-end.js +156 -0
- package/scripts/hooks/session-start.js +195 -0
- package/scripts/hooks/skill-injector.js +333 -0
- package/scripts/hooks/suggest-compact.js +58 -0
- package/scripts/lib/asset-value.js +609 -0
- package/scripts/lib/client-manager.js +300 -0
- package/scripts/lib/command-matcher.js +242 -0
- package/scripts/lib/cross-session-patterns.js +754 -0
- package/scripts/lib/intent-classifier.js +1075 -0
- package/scripts/lib/package-manager.js +374 -0
- package/scripts/lib/recommendation-engine.js +597 -0
- package/scripts/lib/session-memory.js +489 -0
- package/scripts/lib/skill-effectiveness.js +486 -0
- package/scripts/lib/skill-matcher.js +595 -0
- package/scripts/lib/tutorial-metrics.js +242 -0
- package/scripts/lib/tutorial-progress.js +209 -0
- package/scripts/lib/tutorial-renderer.js +431 -0
- package/scripts/lib/utils.js +380 -0
- package/scripts/lib/verify-formatter.js +143 -0
- package/scripts/lib/workflow-state.js +249 -0
- package/scripts/onboard.js +363 -0
- package/scripts/quarterly-report.js +692 -0
- package/scripts/setup-package-manager.js +204 -0
- package/scripts/sync-upstream.js +391 -0
- package/scripts/test.js +108 -0
- package/scripts/tutorial-runner.js +351 -0
- package/scripts/validate-all.js +201 -0
- package/scripts/verifiers/agents.js +245 -0
- package/scripts/verifiers/config.js +186 -0
- package/scripts/verifiers/environment.js +123 -0
- package/scripts/verifiers/hooks.js +188 -0
- package/scripts/verifiers/index.js +38 -0
- package/scripts/verifiers/persistence.js +140 -0
- package/scripts/verifiers/plugin.js +215 -0
- package/scripts/verifiers/skills.js +209 -0
- package/scripts/verify-setup.js +164 -0
- package/skills/INDEX.md +157 -0
- package/skills/backend-patterns/SKILL.md +586 -0
- package/skills/backend-patterns/catalyst-patterns.md +128 -0
- package/skills/bigquery-patterns/SKILL.md +27 -0
- package/skills/bigquery-patterns/performance-optimization.md +518 -0
- package/skills/bigquery-patterns/query-patterns.md +372 -0
- package/skills/bigquery-patterns/schema-design.md +78 -0
- package/skills/cloudstream-project-template/SKILL.md +20 -0
- package/skills/cloudstream-project-template/structure.md +65 -0
- package/skills/coding-standards/SKILL.md +524 -0
- package/skills/coding-standards/deluge-standards.md +83 -0
- package/skills/compliance-patterns/SKILL.md +28 -0
- package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
- package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
- package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
- package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
- package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
- package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
- package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
- package/skills/compliance-patterns/soc2/access-controls.md +344 -0
- package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
- package/skills/compliance-patterns/soc2/change-management.md +403 -0
- package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
- package/skills/consultancy-workflows/SKILL.md +19 -0
- package/skills/consultancy-workflows/client-isolation.md +21 -0
- package/skills/consultancy-workflows/documentation-automation.md +454 -0
- package/skills/consultancy-workflows/handoff-procedures.md +257 -0
- package/skills/consultancy-workflows/knowledge-capture.md +513 -0
- package/skills/consultancy-workflows/time-tracking.md +26 -0
- package/skills/continuous-learning/SKILL.md +84 -0
- package/skills/continuous-learning/config.json +18 -0
- package/skills/continuous-learning/evaluate-session.sh +60 -0
- package/skills/continuous-learning-v2/SKILL.md +126 -0
- package/skills/continuous-learning-v2/config.json +61 -0
- package/skills/frontend-patterns/SKILL.md +635 -0
- package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
- package/skills/gcp-data-engineering/SKILL.md +36 -0
- package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
- package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
- package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
- package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
- package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
- package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
- package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
- package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
- package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
- package/skills/security-review/SKILL.md +498 -0
- package/skills/security-review/compliance-checklist.md +53 -0
- package/skills/strategic-compact/SKILL.md +67 -0
- package/skills/tdd-workflow/SKILL.md +413 -0
- package/skills/tdd-workflow/zoho-testing.md +124 -0
- package/skills/tutorial/SKILL.md +249 -0
- package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
- package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
- package/skills/tutorial/lessons/01-basics.md +81 -0
- package/skills/tutorial/lessons/02-training.md +86 -0
- package/skills/tutorial/lessons/03-commands.md +109 -0
- package/skills/tutorial/lessons/04-workflows.md +115 -0
- package/skills/tutorial/lessons/05-compliance.md +116 -0
- package/skills/tutorial/lessons/06-zoho.md +121 -0
- package/skills/tutorial/lessons/07-hooks-system.md +277 -0
- package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
- package/skills/tutorial/lessons/09-client-management.md +215 -0
- package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
- package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
- package/skills/tutorial/lessons/12-rules-system.md +326 -0
- package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
- package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
- package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
- package/skills/tutorial/tracks/accelerated/README.md +134 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
- package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
- package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
- package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
- package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
- package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
- package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
- package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
- package/skills/zoho-patterns/CHANGELOG.md +108 -0
- package/skills/zoho-patterns/SKILL.md +446 -0
- package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
- package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
- package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
- package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
- package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
- package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
- package/skills/zoho-patterns/creator/form-design.md +304 -0
- package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
- package/skills/zoho-patterns/creator/widget-integration.md +306 -0
- package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
- package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
- package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
- package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
- package/skills/zoho-patterns/deluge/error-handling.md +423 -0
- package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
- package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
- package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
- package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
- package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
# Zoho to BigQuery Data Pipeline
|
|
2
|
+
|
|
3
|
+
## Pipeline Architecture Options
|
|
4
|
+
|
|
5
|
+
| Method | Complexity | Cost | Latency | Best For |
|
|
6
|
+
|--------|-----------|------|---------|----------|
|
|
7
|
+
| CData Sync | Low | $$$ | 15 min+ | Quick setup, many tables |
|
|
8
|
+
| Catalyst Cron API Pull | Medium | $ | Configurable | Custom logic, transformations |
|
|
9
|
+
| Zoho Analytics Export API | Medium | $ | Hours | Analytics-ready data |
|
|
10
|
+
| Zoho Flow + Cloud Function | Low-Med | $$ | Near real-time | Event-driven sync |
|
|
11
|
+
| Custom ETL (Dataflow/dbt) | High | $$ | Configurable | Complex transformations |
|
|
12
|
+
|
|
13
|
+
## Method 1: CData Sync (Managed ETL)
|
|
14
|
+
|
|
15
|
+
### Setup
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
1. CData Sync Console > Add Connection > Zoho CRM
|
|
19
|
+
2. Enter OAuth credentials (Client ID, Client Secret, Refresh Token)
|
|
20
|
+
3. Add Destination > Google BigQuery
|
|
21
|
+
4. Enter GCP project ID, dataset name, service account key
|
|
22
|
+
5. Select tables/modules to sync
|
|
23
|
+
6. Configure schedule (every 15 min, hourly, daily)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Pros/Cons
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
+ No code required
|
|
30
|
+
+ Handles schema changes automatically
|
|
31
|
+
+ Built-in incremental sync
|
|
32
|
+
+ Handles pagination and rate limits
|
|
33
|
+
- Expensive for large datasets ($$$)
|
|
34
|
+
- Limited transformation during transit
|
|
35
|
+
- Vendor lock-in
|
|
36
|
+
- May not support all Zoho modules
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Method 2: Catalyst Cron API Pull (Recommended)
|
|
40
|
+
|
|
41
|
+
### Architecture
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
Catalyst Cron (every 2 hours)
|
|
45
|
+
↓ Fetch records from Zoho API (paginated)
|
|
46
|
+
↓ Transform to BigQuery schema
|
|
47
|
+
↓ Write to BigQuery via API
|
|
48
|
+
↓ Update sync state (last_sync_timestamp)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Implementation
|
|
52
|
+
|
|
53
|
+
```javascript
|
|
54
|
+
const catalyst = require("zcatalyst-sdk-node");
|
|
55
|
+
const { BigQuery } = require("@google-cloud/bigquery");
|
|
56
|
+
const axios = require("axios");
|
|
57
|
+
|
|
58
|
+
module.exports = async (cronDetails, context) => {
|
|
59
|
+
const app = catalyst.initialize(context);
|
|
60
|
+
const bigquery = new BigQuery({
|
|
61
|
+
projectId: process.env.GCP_PROJECT_ID,
|
|
62
|
+
credentials: JSON.parse(process.env.GCP_SERVICE_ACCOUNT_KEY)
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
const startTime = Date.now();
|
|
66
|
+
const MAX_RUNTIME = 14 * 60 * 1000; // 14 minutes
|
|
67
|
+
|
|
68
|
+
try {
|
|
69
|
+
// Get last sync timestamp
|
|
70
|
+
const lastSync = await getLastSyncTime(app, "crm_deals");
|
|
71
|
+
|
|
72
|
+
// Fetch modified records from Zoho CRM
|
|
73
|
+
const records = await fetchModifiedRecords(
|
|
74
|
+
"Deals",
|
|
75
|
+
lastSync,
|
|
76
|
+
process.env.ZOHO_ACCESS_TOKEN
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
if (records.length === 0) {
|
|
80
|
+
console.log("No new records to sync");
|
|
81
|
+
context.close();
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Transform to BigQuery schema
|
|
86
|
+
const bqRows = records.map(transformDealToBQ);
|
|
87
|
+
|
|
88
|
+
// Write to BigQuery
|
|
89
|
+
const dataset = bigquery.dataset(process.env.BQ_DATASET);
|
|
90
|
+
const table = dataset.table("crm_deals");
|
|
91
|
+
|
|
92
|
+
// Use streaming insert for incremental
|
|
93
|
+
await table.insert(bqRows, {
|
|
94
|
+
skipInvalidRows: true,
|
|
95
|
+
ignoreUnknownValues: true
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Update sync state
|
|
99
|
+
await updateSyncTime(app, "crm_deals", new Date().toISOString());
|
|
100
|
+
|
|
101
|
+
console.log(`Synced ${bqRows.length} deals to BigQuery`);
|
|
102
|
+
context.close();
|
|
103
|
+
|
|
104
|
+
} catch (error) {
|
|
105
|
+
console.error("Pipeline failed:", error);
|
|
106
|
+
await notifyPipelineFailure(app, error);
|
|
107
|
+
context.close();
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
// Fetch records modified since last sync
|
|
112
|
+
async function fetchModifiedRecords(module, since, accessToken) {
|
|
113
|
+
let allRecords = [];
|
|
114
|
+
let page = 1;
|
|
115
|
+
let hasMore = true;
|
|
116
|
+
|
|
117
|
+
while (hasMore) {
|
|
118
|
+
const response = await axios.get(
|
|
119
|
+
`https://www.zohoapis.com/crm/v5/${module}`,
|
|
120
|
+
{
|
|
121
|
+
headers: { "Authorization": `Zoho-oauthtoken ${accessToken}` },
|
|
122
|
+
params: {
|
|
123
|
+
modified_since: since,
|
|
124
|
+
page: page,
|
|
125
|
+
per_page: 200, // Max per page
|
|
126
|
+
fields: "Deal_Name,Amount,Stage,Closing_Date,Created_Time,Modified_Time,Owner"
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
if (response.data && response.data.data) {
|
|
132
|
+
allRecords = allRecords.concat(response.data.data);
|
|
133
|
+
hasMore = response.data.info.more_records;
|
|
134
|
+
page++;
|
|
135
|
+
} else {
|
|
136
|
+
hasMore = false;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return allRecords;
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Data Transformation
|
|
145
|
+
|
|
146
|
+
```javascript
|
|
147
|
+
// Transform Zoho CRM record to BigQuery schema
|
|
148
|
+
function transformDealToBQ(crmDeal) {
|
|
149
|
+
return {
|
|
150
|
+
deal_id: crmDeal.id,
|
|
151
|
+
deal_name: crmDeal.Deal_Name || null,
|
|
152
|
+
amount: crmDeal.Amount ? parseFloat(crmDeal.Amount) : null,
|
|
153
|
+
stage: crmDeal.Stage || null,
|
|
154
|
+
closing_date: crmDeal.Closing_Date || null,
|
|
155
|
+
owner_name: crmDeal.Owner ? crmDeal.Owner.name : null,
|
|
156
|
+
owner_email: crmDeal.Owner ? crmDeal.Owner.email : null,
|
|
157
|
+
created_at: crmDeal.Created_Time || null,
|
|
158
|
+
modified_at: crmDeal.Modified_Time || null,
|
|
159
|
+
// Metadata
|
|
160
|
+
_synced_at: new Date().toISOString(),
|
|
161
|
+
_source: "zoho_crm"
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### BigQuery Schema
|
|
167
|
+
|
|
168
|
+
```json
|
|
169
|
+
[
|
|
170
|
+
{"name": "deal_id", "type": "STRING", "mode": "REQUIRED"},
|
|
171
|
+
{"name": "deal_name", "type": "STRING", "mode": "NULLABLE"},
|
|
172
|
+
{"name": "amount", "type": "FLOAT", "mode": "NULLABLE"},
|
|
173
|
+
{"name": "stage", "type": "STRING", "mode": "NULLABLE"},
|
|
174
|
+
{"name": "closing_date", "type": "DATE", "mode": "NULLABLE"},
|
|
175
|
+
{"name": "owner_name", "type": "STRING", "mode": "NULLABLE"},
|
|
176
|
+
{"name": "owner_email", "type": "STRING", "mode": "NULLABLE"},
|
|
177
|
+
{"name": "created_at", "type": "TIMESTAMP", "mode": "NULLABLE"},
|
|
178
|
+
{"name": "modified_at", "type": "TIMESTAMP", "mode": "NULLABLE"},
|
|
179
|
+
{"name": "_synced_at", "type": "TIMESTAMP", "mode": "REQUIRED"},
|
|
180
|
+
{"name": "_source", "type": "STRING", "mode": "REQUIRED"}
|
|
181
|
+
]
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Method 3: Zoho Analytics Export API
|
|
185
|
+
|
|
186
|
+
```javascript
|
|
187
|
+
// Export data from Zoho Analytics to BigQuery
|
|
188
|
+
async function exportFromAnalytics(workspaceId, viewId) {
|
|
189
|
+
// Step 1: Request export
|
|
190
|
+
const exportResponse = await axios.post(
|
|
191
|
+
`https://analyticsapi.zoho.com/restapi/v2/workspaces/${workspaceId}/views/${viewId}/data`,
|
|
192
|
+
null,
|
|
193
|
+
{
|
|
194
|
+
headers: { "Authorization": `Zoho-oauthtoken ${accessToken}` },
|
|
195
|
+
params: {
|
|
196
|
+
responseFormat: "json",
|
|
197
|
+
limit: 10000
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
);
|
|
201
|
+
|
|
202
|
+
const data = exportResponse.data.data;
|
|
203
|
+
|
|
204
|
+
// Step 2: Transform and load to BigQuery
|
|
205
|
+
const bqRows = data.map(row => transformAnalyticsRow(row));
|
|
206
|
+
await loadToBigQuery("analytics_export", bqRows);
|
|
207
|
+
|
|
208
|
+
return bqRows.length;
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Incremental vs Full Loads
|
|
213
|
+
|
|
214
|
+
### Decision Matrix
|
|
215
|
+
|
|
216
|
+
| Factor | Incremental | Full Load |
|
|
217
|
+
|--------|-------------|-----------|
|
|
218
|
+
| Data volume | Large (>10K records) | Small (<10K records) |
|
|
219
|
+
| Update frequency | Frequent | Infrequent |
|
|
220
|
+
| Deleted records | Misses deletes | Catches deletes |
|
|
221
|
+
| Data consistency | Eventually consistent | Fully consistent |
|
|
222
|
+
| API quota usage | Low | High |
|
|
223
|
+
| BigQuery cost | Low (streaming insert) | Higher (table replace) |
|
|
224
|
+
| Complexity | Higher (state tracking) | Simple |
|
|
225
|
+
|
|
226
|
+
### Incremental Load Pattern
|
|
227
|
+
|
|
228
|
+
```javascript
|
|
229
|
+
// Only fetch records modified since last sync
|
|
230
|
+
async function incrementalSync(app, module, tableName) {
|
|
231
|
+
const lastSync = await getLastSyncTime(app, tableName);
|
|
232
|
+
|
|
233
|
+
const records = await fetchModifiedRecords(module, lastSync);
|
|
234
|
+
if (records.length === 0) return 0;
|
|
235
|
+
|
|
236
|
+
// Upsert pattern: Use MERGE in BigQuery
|
|
237
|
+
const tempTable = `${tableName}_staging`;
|
|
238
|
+
await loadToTempTable(records, tempTable);
|
|
239
|
+
|
|
240
|
+
// Merge staging into main table
|
|
241
|
+
const mergeQuery = `
|
|
242
|
+
MERGE \`${process.env.GCP_PROJECT_ID}.${process.env.BQ_DATASET}.${tableName}\` T
|
|
243
|
+
USING \`${process.env.GCP_PROJECT_ID}.${process.env.BQ_DATASET}.${tempTable}\` S
|
|
244
|
+
ON T.deal_id = S.deal_id
|
|
245
|
+
WHEN MATCHED THEN
|
|
246
|
+
UPDATE SET
|
|
247
|
+
deal_name = S.deal_name,
|
|
248
|
+
amount = S.amount,
|
|
249
|
+
stage = S.stage,
|
|
250
|
+
modified_at = S.modified_at,
|
|
251
|
+
_synced_at = S._synced_at
|
|
252
|
+
WHEN NOT MATCHED THEN
|
|
253
|
+
INSERT ROW
|
|
254
|
+
`;
|
|
255
|
+
|
|
256
|
+
await bigquery.query({ query: mergeQuery });
|
|
257
|
+
await updateSyncTime(app, tableName, new Date().toISOString());
|
|
258
|
+
|
|
259
|
+
return records.length;
|
|
260
|
+
}
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### Full Load Pattern
|
|
264
|
+
|
|
265
|
+
```javascript
|
|
266
|
+
// Replace entire table (for small datasets or periodic reconciliation)
|
|
267
|
+
async function fullSync(module, tableName) {
|
|
268
|
+
let allRecords = [];
|
|
269
|
+
let page = 1;
|
|
270
|
+
let hasMore = true;
|
|
271
|
+
|
|
272
|
+
// Fetch ALL records (paginated)
|
|
273
|
+
while (hasMore) {
|
|
274
|
+
const batch = await fetchPage(module, page, 200);
|
|
275
|
+
allRecords = allRecords.concat(batch.data);
|
|
276
|
+
hasMore = batch.hasMore;
|
|
277
|
+
page++;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Transform
|
|
281
|
+
const bqRows = allRecords.map(r => transformToBQ(r));
|
|
282
|
+
|
|
283
|
+
// Load to temp table, then swap
|
|
284
|
+
const tempTable = `${tableName}_full_load_${Date.now()}`;
|
|
285
|
+
await loadToTable(bqRows, tempTable);
|
|
286
|
+
|
|
287
|
+
// Atomic swap
|
|
288
|
+
await bigquery.query({
|
|
289
|
+
query: `
|
|
290
|
+
CREATE OR REPLACE TABLE \`${dataset}.${tableName}\`
|
|
291
|
+
AS SELECT * FROM \`${dataset}.${tempTable}\`
|
|
292
|
+
`
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
// Cleanup temp
|
|
296
|
+
await bigquery.dataset(dataset).table(tempTable).delete();
|
|
297
|
+
|
|
298
|
+
return allRecords.length;
|
|
299
|
+
}
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
## Scheduling
|
|
303
|
+
|
|
304
|
+
### Recommended Schedule
|
|
305
|
+
|
|
306
|
+
| Data Type | Frequency | Method | Reason |
|
|
307
|
+
|-----------|-----------|--------|--------|
|
|
308
|
+
| CRM Deals | Every 2 hours | Incremental | Matches CRM-Books sync cycle |
|
|
309
|
+
| Invoices | Every 4 hours | Incremental | Less volatile |
|
|
310
|
+
| Contacts | Daily | Full load | Catch deletes |
|
|
311
|
+
| Products | Daily | Full load | Small dataset |
|
|
312
|
+
| Activities | Every 1 hour | Incremental | Time-sensitive |
|
|
313
|
+
| Analytics aggregates | Daily | Full load | Pre-computed |
|
|
314
|
+
|
|
315
|
+
### Catalyst Cron Schedule
|
|
316
|
+
|
|
317
|
+
```json
|
|
318
|
+
{
|
|
319
|
+
"crons": [
|
|
320
|
+
{
|
|
321
|
+
"function_name": "sync_crm_deals",
|
|
322
|
+
"cron_expression": "0 */2 * * *",
|
|
323
|
+
"description": "Sync CRM deals to BigQuery every 2 hours"
|
|
324
|
+
},
|
|
325
|
+
{
|
|
326
|
+
"function_name": "sync_invoices",
|
|
327
|
+
"cron_expression": "0 */4 * * *",
|
|
328
|
+
"description": "Sync invoices to BigQuery every 4 hours"
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
"function_name": "full_sync_contacts",
|
|
332
|
+
"cron_expression": "0 2 * * *",
|
|
333
|
+
"description": "Full sync contacts daily at 2 AM"
|
|
334
|
+
}
|
|
335
|
+
]
|
|
336
|
+
}
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
## Monitoring
|
|
340
|
+
|
|
341
|
+
### Pipeline Health Dashboard (BigQuery SQL)
|
|
342
|
+
|
|
343
|
+
```sql
|
|
344
|
+
-- Monitor sync freshness
|
|
345
|
+
SELECT
|
|
346
|
+
_source,
|
|
347
|
+
MAX(_synced_at) AS last_sync,
|
|
348
|
+
TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), MAX(_synced_at), HOUR) AS hours_since_sync,
|
|
349
|
+
COUNT(*) AS total_records,
|
|
350
|
+
COUNTIF(_synced_at > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 24 HOUR)) AS synced_today
|
|
351
|
+
FROM `project.dataset.crm_deals`
|
|
352
|
+
GROUP BY _source
|
|
353
|
+
|
|
354
|
+
-- Alert if sync is stale (> 4 hours)
|
|
355
|
+
-- Set up BigQuery scheduled query + email alert
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Error Notification
|
|
359
|
+
|
|
360
|
+
```javascript
|
|
361
|
+
async function notifyPipelineFailure(app, error) {
|
|
362
|
+
// Log to Catalyst Data Store
|
|
363
|
+
await app.datastore().table("PipelineErrors").insertRow({
|
|
364
|
+
pipeline_name: "zoho_to_bigquery",
|
|
365
|
+
error_message: error.message,
|
|
366
|
+
error_stack: error.stack,
|
|
367
|
+
timestamp: new Date().toISOString()
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
// Email alert
|
|
371
|
+
await app.email().sendMail({
|
|
372
|
+
from_email: "noreply@catalyst-project.com",
|
|
373
|
+
to_email: ["data-team@company.com"],
|
|
374
|
+
subject: "[ALERT] Zoho→BigQuery Pipeline Failure",
|
|
375
|
+
content: `Pipeline failed at ${new Date().toISOString()}\n\nError: ${error.message}\n\nStack: ${error.stack}`
|
|
376
|
+
});
|
|
377
|
+
}
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Cost Optimization
|
|
381
|
+
|
|
382
|
+
| Optimization | Savings | Trade-off |
|
|
383
|
+
|-------------|---------|-----------|
|
|
384
|
+
| Incremental sync | 80-90% less API calls | May miss deletes |
|
|
385
|
+
| Batch insert (not streaming) | No streaming costs | Higher latency (minutes) |
|
|
386
|
+
| Partitioned tables | Reduced query costs | Slightly complex schema |
|
|
387
|
+
| Columnar selection | Less data transferred | Must specify fields |
|
|
388
|
+
| Schedule during off-peak | Lower Zoho API contention | Delayed data |
|
|
389
|
+
|
|
390
|
+
### BigQuery Table Partitioning
|
|
391
|
+
|
|
392
|
+
```sql
|
|
393
|
+
-- Create partitioned table for cost-effective queries
|
|
394
|
+
CREATE TABLE `project.dataset.crm_deals` (
|
|
395
|
+
deal_id STRING NOT NULL,
|
|
396
|
+
deal_name STRING,
|
|
397
|
+
amount FLOAT64,
|
|
398
|
+
stage STRING,
|
|
399
|
+
closing_date DATE,
|
|
400
|
+
modified_at TIMESTAMP,
|
|
401
|
+
_synced_at TIMESTAMP NOT NULL
|
|
402
|
+
)
|
|
403
|
+
PARTITION BY DATE(_synced_at)
|
|
404
|
+
CLUSTER BY stage, closing_date;
|
|
405
|
+
|
|
406
|
+
-- Queries that filter by _synced_at only scan relevant partitions
|
|
407
|
+
-- Queries that also filter by stage/closing_date benefit from clustering
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
## Medallion Architecture Mapping
|
|
411
|
+
|
|
412
|
+
```
|
|
413
|
+
Bronze (Raw):
|
|
414
|
+
zoho_raw.crm_deals ← Direct API response, minimal transformation
|
|
415
|
+
zoho_raw.books_invoices ← Raw JSON stored as-is
|
|
416
|
+
|
|
417
|
+
Silver (Cleaned):
|
|
418
|
+
zoho_clean.deals ← Deduplicated, typed, null-handled
|
|
419
|
+
zoho_clean.invoices ← Standardized amounts, dates
|
|
420
|
+
|
|
421
|
+
Gold (Business):
|
|
422
|
+
analytics.revenue_monthly ← Aggregated metrics
|
|
423
|
+
analytics.customer_ltv ← Calculated business KPIs
|
|
424
|
+
analytics.pipeline_health ← Dashboard-ready views
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
> **NOTE:** Use dbt for Silver→Gold transformations. Store Bronze in BigQuery with the raw sync pipeline described above.
|