@cloudstreamsoftware/claude-tools 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -37
- package/agents/INDEX.md +183 -0
- package/agents/architect.md +247 -0
- package/agents/build-error-resolver.md +555 -0
- package/agents/catalyst-deployer.md +132 -0
- package/agents/code-reviewer.md +121 -0
- package/agents/compliance-auditor.md +148 -0
- package/agents/creator-architect.md +395 -0
- package/agents/deluge-reviewer.md +98 -0
- package/agents/doc-updater.md +471 -0
- package/agents/e2e-runner.md +711 -0
- package/agents/planner.md +122 -0
- package/agents/refactor-cleaner.md +309 -0
- package/agents/security-reviewer.md +582 -0
- package/agents/tdd-guide.md +302 -0
- package/bin/cloudstream-setup.js +16 -6
- package/config/versions.json +63 -0
- package/dist/hooks/hooks.json +209 -0
- package/dist/index.js +47 -0
- package/dist/lib/asset-value.js +609 -0
- package/dist/lib/client-manager.js +300 -0
- package/dist/lib/command-matcher.js +242 -0
- package/dist/lib/cross-session-patterns.js +754 -0
- package/dist/lib/intent-classifier.js +1075 -0
- package/dist/lib/package-manager.js +374 -0
- package/dist/lib/recommendation-engine.js +597 -0
- package/dist/lib/session-memory.js +489 -0
- package/dist/lib/skill-effectiveness.js +486 -0
- package/dist/lib/skill-matcher.js +595 -0
- package/dist/lib/tutorial-metrics.js +242 -0
- package/dist/lib/tutorial-progress.js +209 -0
- package/dist/lib/tutorial-renderer.js +431 -0
- package/dist/lib/utils.js +380 -0
- package/dist/lib/verify-formatter.js +143 -0
- package/dist/lib/workflow-state.js +249 -0
- package/hooks/hooks.json +209 -0
- package/package.json +5 -1
- package/scripts/aggregate-sessions.js +290 -0
- package/scripts/branch-name-validator.js +291 -0
- package/scripts/build.js +101 -0
- package/scripts/commands/client-switch.js +231 -0
- package/scripts/deprecate-skill.js +610 -0
- package/scripts/diagnose.js +324 -0
- package/scripts/doc-freshness.js +168 -0
- package/scripts/generate-weekly-digest.js +393 -0
- package/scripts/health-check.js +270 -0
- package/scripts/hooks/credential-check.js +101 -0
- package/scripts/hooks/evaluate-session.js +81 -0
- package/scripts/hooks/pre-compact.js +66 -0
- package/scripts/hooks/prompt-analyzer.js +276 -0
- package/scripts/hooks/prompt-router.js +422 -0
- package/scripts/hooks/quality-gate-enforcer.js +371 -0
- package/scripts/hooks/session-end.js +156 -0
- package/scripts/hooks/session-start.js +195 -0
- package/scripts/hooks/skill-injector.js +333 -0
- package/scripts/hooks/suggest-compact.js +58 -0
- package/scripts/lib/asset-value.js +609 -0
- package/scripts/lib/client-manager.js +300 -0
- package/scripts/lib/command-matcher.js +242 -0
- package/scripts/lib/cross-session-patterns.js +754 -0
- package/scripts/lib/intent-classifier.js +1075 -0
- package/scripts/lib/package-manager.js +374 -0
- package/scripts/lib/recommendation-engine.js +597 -0
- package/scripts/lib/session-memory.js +489 -0
- package/scripts/lib/skill-effectiveness.js +486 -0
- package/scripts/lib/skill-matcher.js +595 -0
- package/scripts/lib/tutorial-metrics.js +242 -0
- package/scripts/lib/tutorial-progress.js +209 -0
- package/scripts/lib/tutorial-renderer.js +431 -0
- package/scripts/lib/utils.js +380 -0
- package/scripts/lib/verify-formatter.js +143 -0
- package/scripts/lib/workflow-state.js +249 -0
- package/scripts/onboard.js +363 -0
- package/scripts/quarterly-report.js +692 -0
- package/scripts/setup-package-manager.js +204 -0
- package/scripts/sync-upstream.js +391 -0
- package/scripts/test.js +108 -0
- package/scripts/tutorial-runner.js +351 -0
- package/scripts/validate-all.js +201 -0
- package/scripts/verifiers/agents.js +245 -0
- package/scripts/verifiers/config.js +186 -0
- package/scripts/verifiers/environment.js +123 -0
- package/scripts/verifiers/hooks.js +188 -0
- package/scripts/verifiers/index.js +38 -0
- package/scripts/verifiers/persistence.js +140 -0
- package/scripts/verifiers/plugin.js +215 -0
- package/scripts/verifiers/skills.js +209 -0
- package/scripts/verify-setup.js +164 -0
- package/skills/INDEX.md +157 -0
- package/skills/backend-patterns/SKILL.md +586 -0
- package/skills/backend-patterns/catalyst-patterns.md +128 -0
- package/skills/bigquery-patterns/SKILL.md +27 -0
- package/skills/bigquery-patterns/performance-optimization.md +518 -0
- package/skills/bigquery-patterns/query-patterns.md +372 -0
- package/skills/bigquery-patterns/schema-design.md +78 -0
- package/skills/cloudstream-project-template/SKILL.md +20 -0
- package/skills/cloudstream-project-template/structure.md +65 -0
- package/skills/coding-standards/SKILL.md +524 -0
- package/skills/coding-standards/deluge-standards.md +83 -0
- package/skills/compliance-patterns/SKILL.md +28 -0
- package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
- package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
- package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
- package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
- package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
- package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
- package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
- package/skills/compliance-patterns/soc2/access-controls.md +344 -0
- package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
- package/skills/compliance-patterns/soc2/change-management.md +403 -0
- package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
- package/skills/consultancy-workflows/SKILL.md +19 -0
- package/skills/consultancy-workflows/client-isolation.md +21 -0
- package/skills/consultancy-workflows/documentation-automation.md +454 -0
- package/skills/consultancy-workflows/handoff-procedures.md +257 -0
- package/skills/consultancy-workflows/knowledge-capture.md +513 -0
- package/skills/consultancy-workflows/time-tracking.md +26 -0
- package/skills/continuous-learning/SKILL.md +84 -0
- package/skills/continuous-learning/config.json +18 -0
- package/skills/continuous-learning/evaluate-session.sh +60 -0
- package/skills/continuous-learning-v2/SKILL.md +126 -0
- package/skills/continuous-learning-v2/config.json +61 -0
- package/skills/frontend-patterns/SKILL.md +635 -0
- package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
- package/skills/gcp-data-engineering/SKILL.md +36 -0
- package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
- package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
- package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
- package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
- package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
- package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
- package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
- package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
- package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
- package/skills/security-review/SKILL.md +498 -0
- package/skills/security-review/compliance-checklist.md +53 -0
- package/skills/strategic-compact/SKILL.md +67 -0
- package/skills/tdd-workflow/SKILL.md +413 -0
- package/skills/tdd-workflow/zoho-testing.md +124 -0
- package/skills/tutorial/SKILL.md +249 -0
- package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
- package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
- package/skills/tutorial/lessons/01-basics.md +81 -0
- package/skills/tutorial/lessons/02-training.md +86 -0
- package/skills/tutorial/lessons/03-commands.md +109 -0
- package/skills/tutorial/lessons/04-workflows.md +115 -0
- package/skills/tutorial/lessons/05-compliance.md +116 -0
- package/skills/tutorial/lessons/06-zoho.md +121 -0
- package/skills/tutorial/lessons/07-hooks-system.md +277 -0
- package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
- package/skills/tutorial/lessons/09-client-management.md +215 -0
- package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
- package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
- package/skills/tutorial/lessons/12-rules-system.md +326 -0
- package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
- package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
- package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
- package/skills/tutorial/tracks/accelerated/README.md +134 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
- package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
- package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
- package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
- package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
- package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
- package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
- package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
- package/skills/zoho-patterns/CHANGELOG.md +108 -0
- package/skills/zoho-patterns/SKILL.md +446 -0
- package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
- package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
- package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
- package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
- package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
- package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
- package/skills/zoho-patterns/creator/form-design.md +304 -0
- package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
- package/skills/zoho-patterns/creator/widget-integration.md +306 -0
- package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
- package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
- package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
- package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
- package/skills/zoho-patterns/deluge/error-handling.md +423 -0
- package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
- package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
- package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
- package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
- package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Zoho Widget Development Patterns
|
|
2
|
+
|
|
3
|
+
## Widget Initialization
|
|
4
|
+
```javascript
|
|
5
|
+
import React, { useEffect, useState } from 'react';
|
|
6
|
+
|
|
7
|
+
function ZohoWidget() {
|
|
8
|
+
const [sdk, setSdk] = useState(null);
|
|
9
|
+
const [loading, setLoading] = useState(true);
|
|
10
|
+
|
|
11
|
+
useEffect(() => {
|
|
12
|
+
// ZOHO SDK is loaded via script tag in index.html
|
|
13
|
+
window.ZOHO.CREATOR.init()
|
|
14
|
+
.then(() => {
|
|
15
|
+
setSdk(window.ZOHO.CREATOR);
|
|
16
|
+
setLoading(false);
|
|
17
|
+
})
|
|
18
|
+
.catch(err => {
|
|
19
|
+
console.error('SDK init failed:', err);
|
|
20
|
+
setLoading(false);
|
|
21
|
+
});
|
|
22
|
+
}, []);
|
|
23
|
+
|
|
24
|
+
if (loading) return <div>Loading...</div>;
|
|
25
|
+
|
|
26
|
+
return <WidgetContent sdk={sdk} />;
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Reading Form Data
|
|
31
|
+
```javascript
|
|
32
|
+
async function getRecordData(sdk, reportName, criteria) {
|
|
33
|
+
try {
|
|
34
|
+
const config = {
|
|
35
|
+
appName: 'your-app',
|
|
36
|
+
reportName: reportName,
|
|
37
|
+
criteria: criteria,
|
|
38
|
+
page: 1,
|
|
39
|
+
pageSize: 200
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
const response = await sdk.API.getAllRecords(config);
|
|
43
|
+
return response.data;
|
|
44
|
+
} catch (error) {
|
|
45
|
+
console.error('Failed to fetch records:', error);
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Writing Form Data
|
|
52
|
+
```javascript
|
|
53
|
+
async function createRecord(sdk, formName, data) {
|
|
54
|
+
try {
|
|
55
|
+
const config = {
|
|
56
|
+
appName: 'your-app',
|
|
57
|
+
formName: formName,
|
|
58
|
+
data: { data: data }
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
const response = await sdk.API.addRecord(config);
|
|
62
|
+
return response;
|
|
63
|
+
} catch (error) {
|
|
64
|
+
console.error('Failed to create record:', error);
|
|
65
|
+
throw error;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Widget-to-Form Communication
|
|
71
|
+
```javascript
|
|
72
|
+
// Send data to parent form
|
|
73
|
+
sdk.API.setFieldValue({ appName: 'app', formName: 'form', fieldName: 'Status', value: 'Complete' });
|
|
74
|
+
|
|
75
|
+
// Listen for form events
|
|
76
|
+
sdk.API.onFormSubmit(function(data) {
|
|
77
|
+
console.log('Form submitted:', data);
|
|
78
|
+
});
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## CORS Proxy Architecture
|
|
82
|
+
Widgets cannot directly call external APIs due to CORS.
|
|
83
|
+
Use a Catalyst function as proxy:
|
|
84
|
+
|
|
85
|
+
```javascript
|
|
86
|
+
// Widget → Catalyst Function → External API
|
|
87
|
+
async function callExternalAPI(endpoint, data) {
|
|
88
|
+
const proxyURL = 'https://your-project.catalystserverless.com/server/proxy';
|
|
89
|
+
const response = await fetch(proxyURL, {
|
|
90
|
+
method: 'POST',
|
|
91
|
+
headers: { 'Content-Type': 'application/json' },
|
|
92
|
+
body: JSON.stringify({ endpoint, data })
|
|
93
|
+
});
|
|
94
|
+
return response.json();
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Critical Constraints
|
|
99
|
+
- Widgets do NOT work on published pages (use Publish API instead)
|
|
100
|
+
- Maximum 50 widgets per Zoho One account
|
|
101
|
+
- Widget SDK is loaded via script tag, not npm package
|
|
102
|
+
- Test locally with Zoho SDK mock before deployment
|
|
103
|
+
- Widget container has limited height/width - design responsively
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: gcp-data-engineering
|
|
3
|
+
description: GCP data engineering patterns for medallion architecture (bronze/silver/gold), BigQuery, Dataflow, and dbt. Used for data pipeline design and implementation.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
status: active
|
|
6
|
+
introduced: 1.0.0
|
|
7
|
+
lastUpdated: 2026-01-25
|
|
8
|
+
activation: BigQuery, Dataflow, data pipeline tasks, medallion architecture, dbt models
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# GCP Data Engineering
|
|
12
|
+
|
|
13
|
+
Patterns for CloudStream's data engineering work using Google Cloud Platform.
|
|
14
|
+
|
|
15
|
+
## Medallion Architecture
|
|
16
|
+
- **Bronze**: Raw ingestion, schema-on-read, minimal transformation
|
|
17
|
+
- **Silver**: Cleansed, deduplicated, quality rules applied
|
|
18
|
+
- **Gold**: Business-ready aggregations for reporting
|
|
19
|
+
|
|
20
|
+
## Key Technologies
|
|
21
|
+
- BigQuery: Data warehouse (silver + gold layers)
|
|
22
|
+
- Cloud Storage: Raw file storage (bronze layer)
|
|
23
|
+
- Dataflow: Streaming and batch ETL (Apache Beam)
|
|
24
|
+
- dbt: SQL-based transformations (silver → gold)
|
|
25
|
+
- Cloud Functions: Event-driven ingestion triggers
|
|
26
|
+
- Looker Studio: Visualization on gold layer
|
|
27
|
+
|
|
28
|
+
## Naming Conventions
|
|
29
|
+
|
|
30
|
+
| Layer | Table Suffix | Example | Description |
|
|
31
|
+
|-------|-------------|---------|-------------|
|
|
32
|
+
| Bronze | `_raw` | `zoho_contacts_raw` | Raw ingested data, schema-on-read |
|
|
33
|
+
| Silver | `_cleaned` | `contacts_cleaned` | Deduplicated, validated, typed |
|
|
34
|
+
| Gold | `_agg` or descriptive | `customer_360`, `revenue_daily_agg` | Business aggregations |
|
|
35
|
+
|
|
36
|
+
Dataset naming: `project.bronze.*`, `project.silver.*`, `project.gold.*`
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
# BigQuery Performance Optimization
|
|
2
|
+
|
|
3
|
+
> Strategies for partitioning, clustering, slot management, and query optimization to minimize cost and maximize performance in CloudStream's BigQuery warehouse.
|
|
4
|
+
|
|
5
|
+
## Partitioning Strategies
|
|
6
|
+
|
|
7
|
+
### Time-Unit Partitioning (Recommended Default)
|
|
8
|
+
|
|
9
|
+
```sql
|
|
10
|
+
-- Partition by business date for time-series data
|
|
11
|
+
CREATE TABLE `project.silver.zoho_deals` (
|
|
12
|
+
record_id STRING,
|
|
13
|
+
deal_name STRING,
|
|
14
|
+
amount FLOAT64,
|
|
15
|
+
close_date DATE,
|
|
16
|
+
_ingestion_date DATE
|
|
17
|
+
)
|
|
18
|
+
PARTITION BY _ingestion_date
|
|
19
|
+
OPTIONS (
|
|
20
|
+
require_partition_filter = TRUE,
|
|
21
|
+
partition_expiration_days = 730
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
-- Partition by month for aggregated tables (reduces partition count)
|
|
25
|
+
CREATE TABLE `project.gold.fct_monthly_revenue` (
|
|
26
|
+
revenue_month DATE,
|
|
27
|
+
customer_id STRING,
|
|
28
|
+
total_revenue FLOAT64
|
|
29
|
+
)
|
|
30
|
+
PARTITION BY DATE_TRUNC(revenue_month, MONTH);
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Integer-Range Partitioning (For Non-Date Keys)
|
|
34
|
+
|
|
35
|
+
```sql
|
|
36
|
+
-- Useful for customer ID-based sharding
|
|
37
|
+
CREATE TABLE `project.gold.dim_customers_sharded` (
|
|
38
|
+
customer_shard INT64, -- hash of customer_id mod 1000
|
|
39
|
+
customer_id STRING,
|
|
40
|
+
account_name STRING
|
|
41
|
+
)
|
|
42
|
+
PARTITION BY RANGE_BUCKET(customer_shard, GENERATE_ARRAY(0, 1000, 10));
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Ingestion-Time Partitioning (Bronze Layer Default)
|
|
46
|
+
|
|
47
|
+
```sql
|
|
48
|
+
-- Automatic partitioning by load time (no column needed)
|
|
49
|
+
CREATE TABLE `project.bronze.zoho_raw_events` (
|
|
50
|
+
event_data STRING,
|
|
51
|
+
source STRING
|
|
52
|
+
)
|
|
53
|
+
PARTITION BY _PARTITIONDATE
|
|
54
|
+
OPTIONS (require_partition_filter = TRUE);
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Partitioning Decision Matrix
|
|
58
|
+
|
|
59
|
+
| Data Type | Recommended Partition | Granularity | Expiration |
|
|
60
|
+
|-----------|----------------------|-------------|------------|
|
|
61
|
+
| Bronze raw | `_PARTITIONDATE` | Day | 365 days |
|
|
62
|
+
| Silver cleansed | `_ingestion_date` | Day | 730 days |
|
|
63
|
+
| Gold facts | Business date | Day/Month | No expiry |
|
|
64
|
+
| Gold KPIs | `snapshot_date` | Day | No expiry |
|
|
65
|
+
| Quarantine | `quarantined_at` | Day | 90 days |
|
|
66
|
+
|
|
67
|
+
> **WARNING**: Tables with 4000+ partitions hit BigQuery limits. Use MONTH granularity for tables spanning 10+ years.
|
|
68
|
+
|
|
69
|
+
## Clustering
|
|
70
|
+
|
|
71
|
+
Column order matters: place the most frequently filtered column first.
|
|
72
|
+
|
|
73
|
+
```sql
|
|
74
|
+
-- Optimal clustering for deal queries
|
|
75
|
+
CREATE TABLE `project.silver.zoho_deals` (
|
|
76
|
+
...
|
|
77
|
+
)
|
|
78
|
+
PARTITION BY _ingestion_date
|
|
79
|
+
CLUSTER BY owner_id, stage, account_id;
|
|
80
|
+
-- Query pattern: WHERE owner_id = 'x' AND stage = 'Closed Won'
|
|
81
|
+
-- Clustering eliminates scanning irrelevant blocks
|
|
82
|
+
|
|
83
|
+
-- Re-cluster existing table (free, runs automatically)
|
|
84
|
+
ALTER TABLE `project.silver.zoho_deals`
|
|
85
|
+
SET OPTIONS (clustering_columns = ['owner_id', 'stage', 'account_id']);
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Clustering Best Practices
|
|
89
|
+
|
|
90
|
+
| Rule | Rationale |
|
|
91
|
+
|------|-----------|
|
|
92
|
+
| Max 4 columns | Diminishing returns beyond 4 |
|
|
93
|
+
| High cardinality first | Provides best block elimination |
|
|
94
|
+
| Filter columns only | Clustering helps WHERE, not SELECT |
|
|
95
|
+
| String columns work | Unlike partitioning, strings are fine |
|
|
96
|
+
| Re-clusters automatically | No maintenance needed after initial set |
|
|
97
|
+
|
|
98
|
+
## BI Engine Reservations
|
|
99
|
+
|
|
100
|
+
```sql
|
|
101
|
+
-- Reserve BI Engine capacity for Looker Studio dashboards
|
|
102
|
+
-- This provides sub-second query responses for cached data
|
|
103
|
+
|
|
104
|
+
-- Check current BI Engine status
|
|
105
|
+
SELECT * FROM `region-us`.INFORMATION_SCHEMA.BI_CAPACITIES;
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
# Terraform: Reserve BI Engine capacity
|
|
110
|
+
# terraform/bi_engine.tf
|
|
111
|
+
resource "google_bigquery_bi_reservation" "looker_cache" {
|
|
112
|
+
location = "us-central1"
|
|
113
|
+
size = 2 # GB of BI Engine RAM (starts at ~$36.50/GB/month)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
# Preferred tables for BI Engine acceleration
|
|
117
|
+
resource "google_bigquery_table" "gold_kpi" {
|
|
118
|
+
# BI Engine automatically caches frequently-queried tables
|
|
119
|
+
# Prioritize gold layer dashboard tables
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
> **COST TIP**: BI Engine costs ~$36.50/GB/month. Start with 1-2 GB for your most critical dashboards. Monitor cache hit rate in Cloud Monitoring.
|
|
124
|
+
|
|
125
|
+
## Slot Management
|
|
126
|
+
|
|
127
|
+
### On-Demand vs Editions
|
|
128
|
+
|
|
129
|
+
| Factor | On-Demand | Standard Edition | Enterprise Edition |
|
|
130
|
+
|--------|-----------|-------------------|--------------------|
|
|
131
|
+
| Pricing | $6.25/TB scanned | $0.04/slot-hour | $0.06/slot-hour |
|
|
132
|
+
| Breakeven | <50 TB/month | 50-200 TB/month | 200+ TB/month |
|
|
133
|
+
| Autoscaling | Unlimited | Configurable | Configurable |
|
|
134
|
+
| Baseline slots | N/A | Set minimum | Set minimum |
|
|
135
|
+
| Concurrency | 2000 queries | Based on slots | Based on slots |
|
|
136
|
+
| Best for | Unpredictable/low usage | Steady workloads | Heavy + governance |
|
|
137
|
+
|
|
138
|
+
```sql
|
|
139
|
+
-- Monitor slot utilization
|
|
140
|
+
SELECT
|
|
141
|
+
period_start,
|
|
142
|
+
project_id,
|
|
143
|
+
job_type,
|
|
144
|
+
SUM(period_slot_ms) / 1000 / 3600 AS slot_hours,
|
|
145
|
+
SUM(total_bytes_processed) / POW(1024, 4) AS tb_processed
|
|
146
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_PROJECT
|
|
147
|
+
WHERE period_start >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
|
|
148
|
+
GROUP BY 1, 2, 3
|
|
149
|
+
ORDER BY slot_hours DESC;
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Reservation Assignment
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
# Assign reservations to different workload types
|
|
156
|
+
from google.cloud import bigquery_reservation_v1
|
|
157
|
+
|
|
158
|
+
client = bigquery_reservation_v1.ReservationServiceClient()
|
|
159
|
+
|
|
160
|
+
# Create reservation for ETL workloads (off-peak slots)
|
|
161
|
+
reservation = client.create_reservation(
|
|
162
|
+
parent="projects/cloudstream-prod/locations/us-central1",
|
|
163
|
+
reservation_id="etl-workload",
|
|
164
|
+
reservation=bigquery_reservation_v1.Reservation(
|
|
165
|
+
slot_capacity=200, # 200 slots for ETL
|
|
166
|
+
)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Create reservation for BI/dashboard queries (always available)
|
|
170
|
+
reservation_bi = client.create_reservation(
|
|
171
|
+
parent="projects/cloudstream-prod/locations/us-central1",
|
|
172
|
+
reservation_id="bi-workload",
|
|
173
|
+
reservation=bigquery_reservation_v1.Reservation(
|
|
174
|
+
slot_capacity=100, # 100 slots for dashboards
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## Materialized Views for Common Queries
|
|
180
|
+
|
|
181
|
+
```sql
|
|
182
|
+
-- Identify top queries to materialize
|
|
183
|
+
SELECT
|
|
184
|
+
query,
|
|
185
|
+
COUNT(*) AS execution_count,
|
|
186
|
+
SUM(total_bytes_processed) / POW(1024, 3) AS total_gb_processed,
|
|
187
|
+
AVG(total_slot_ms) / 1000 AS avg_slot_seconds
|
|
188
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
189
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
|
|
190
|
+
AND job_type = 'QUERY'
|
|
191
|
+
AND state = 'DONE'
|
|
192
|
+
GROUP BY 1
|
|
193
|
+
HAVING COUNT(*) > 10
|
|
194
|
+
ORDER BY total_gb_processed DESC
|
|
195
|
+
LIMIT 20;
|
|
196
|
+
|
|
197
|
+
-- Create materialized view for frequently-run aggregation
|
|
198
|
+
CREATE MATERIALIZED VIEW `project.gold.mv_deal_stage_summary`
|
|
199
|
+
PARTITION BY DATE(_ingestion_date)
|
|
200
|
+
CLUSTER BY stage
|
|
201
|
+
AS
|
|
202
|
+
SELECT
|
|
203
|
+
_ingestion_date,
|
|
204
|
+
stage,
|
|
205
|
+
owner_id,
|
|
206
|
+
COUNT(*) AS deal_count,
|
|
207
|
+
SUM(amount) AS total_amount,
|
|
208
|
+
AVG(amount) AS avg_amount,
|
|
209
|
+
MAX(modified_time) AS latest_update
|
|
210
|
+
FROM `project.silver.zoho_deals`
|
|
211
|
+
GROUP BY 1, 2, 3;
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Query Optimization Patterns
|
|
215
|
+
|
|
216
|
+
### Avoid SELECT *
|
|
217
|
+
|
|
218
|
+
```sql
|
|
219
|
+
-- BAD: Scans all columns (expensive)
|
|
220
|
+
SELECT * FROM `project.silver.zoho_deals` WHERE stage = 'Closed Won';
|
|
221
|
+
|
|
222
|
+
-- GOOD: Only scan needed columns
|
|
223
|
+
SELECT record_id, deal_name, amount, close_date
|
|
224
|
+
FROM `project.silver.zoho_deals`
|
|
225
|
+
WHERE stage = 'Closed Won';
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Use Approximate Functions
|
|
229
|
+
|
|
230
|
+
```sql
|
|
231
|
+
-- BAD: Exact distinct count (expensive for large tables)
|
|
232
|
+
SELECT COUNT(DISTINCT customer_id) FROM `project.gold.fct_invoices`;
|
|
233
|
+
|
|
234
|
+
-- GOOD: Approximate count (2% error, much faster)
|
|
235
|
+
SELECT APPROX_COUNT_DISTINCT(customer_id) FROM `project.gold.fct_invoices`;
|
|
236
|
+
|
|
237
|
+
-- GOOD: Approximate quantiles
|
|
238
|
+
SELECT APPROX_QUANTILES(amount, 100)[OFFSET(50)] AS median_amount
|
|
239
|
+
FROM `project.gold.fct_invoices`;
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### Optimize Joins
|
|
243
|
+
|
|
244
|
+
```sql
|
|
245
|
+
-- BAD: Join before filter
|
|
246
|
+
SELECT d.*, c.account_name
|
|
247
|
+
FROM `project.gold.fct_deals` d
|
|
248
|
+
JOIN `project.gold.dim_customers` c ON d.customer_id = c.customer_id;
|
|
249
|
+
|
|
250
|
+
-- GOOD: Filter early, then join
|
|
251
|
+
WITH filtered_deals AS (
|
|
252
|
+
SELECT record_id, customer_id, amount
|
|
253
|
+
FROM `project.gold.fct_deals`
|
|
254
|
+
WHERE close_date >= '2024-01-01'
|
|
255
|
+
AND stage = 'Closed Won'
|
|
256
|
+
)
|
|
257
|
+
SELECT d.*, c.account_name
|
|
258
|
+
FROM filtered_deals d
|
|
259
|
+
JOIN `project.gold.dim_customers` c ON d.customer_id = c.customer_id;
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## Cost Monitoring and Budgets
|
|
263
|
+
|
|
264
|
+
```sql
|
|
265
|
+
-- Daily cost tracking query
|
|
266
|
+
SELECT
|
|
267
|
+
DATE(creation_time) AS query_date,
|
|
268
|
+
user_email,
|
|
269
|
+
SUM(total_bytes_billed) / POW(1024, 4) AS tb_billed,
|
|
270
|
+
SUM(total_bytes_billed) / POW(1024, 4) * 6.25 AS estimated_cost_usd,
|
|
271
|
+
COUNT(*) AS query_count
|
|
272
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
273
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
|
|
274
|
+
AND job_type = 'QUERY'
|
|
275
|
+
GROUP BY 1, 2
|
|
276
|
+
ORDER BY estimated_cost_usd DESC;
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
```yaml
|
|
280
|
+
# Terraform: Budget alerts
|
|
281
|
+
resource "google_billing_budget" "bigquery_budget" {
|
|
282
|
+
billing_account = var.billing_account_id
|
|
283
|
+
display_name = "BigQuery Monthly Budget"
|
|
284
|
+
|
|
285
|
+
budget_filter {
|
|
286
|
+
services = ["services/24E6-581D-38E5"] # BigQuery service ID
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
amount {
|
|
290
|
+
specified_amount {
|
|
291
|
+
currency_code = "USD"
|
|
292
|
+
units = "500" # $500/month budget
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
threshold_rules {
|
|
297
|
+
threshold_percent = 0.5 # Alert at 50%
|
|
298
|
+
}
|
|
299
|
+
threshold_rules {
|
|
300
|
+
threshold_percent = 0.8 # Alert at 80%
|
|
301
|
+
}
|
|
302
|
+
threshold_rules {
|
|
303
|
+
threshold_percent = 1.0 # Alert at 100%
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## Storage Optimization
|
|
309
|
+
|
|
310
|
+
> **COST TIP**: BigQuery automatically moves data to long-term storage after 90 days of no modifications, reducing cost from $0.02/GB to $0.01/GB. Design tables to be append-only where possible.
|
|
311
|
+
|
|
312
|
+
```sql
|
|
313
|
+
-- Identify tables eligible for long-term pricing
|
|
314
|
+
SELECT
|
|
315
|
+
table_schema,
|
|
316
|
+
table_name,
|
|
317
|
+
TIMESTAMP_MILLIS(last_modified_time) AS last_modified,
|
|
318
|
+
ROUND(size_bytes / POW(1024, 3), 2) AS size_gb,
|
|
319
|
+
CASE
|
|
320
|
+
WHEN TIMESTAMP_MILLIS(last_modified_time) < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)
|
|
321
|
+
THEN 'LONG_TERM (50% discount)'
|
|
322
|
+
ELSE 'ACTIVE'
|
|
323
|
+
END AS storage_tier
|
|
324
|
+
FROM `project`.`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE
|
|
325
|
+
ORDER BY size_bytes DESC;
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
## Best Practices Summary
|
|
329
|
+
|
|
330
|
+
1. **Always partition** - Even small tables benefit from partition pruning
|
|
331
|
+
2. **Cluster by filter columns** - Place highest-cardinality column first
|
|
332
|
+
3. **Require partition filters** - Prevent accidental full scans
|
|
333
|
+
4. **Use materialized views** - Free storage, auto-refresh, transparent to queries
|
|
334
|
+
5. **Monitor with INFORMATION_SCHEMA** - Track bytes scanned, slot usage, cost
|
|
335
|
+
6. **Set budget alerts** - Never be surprised by a BigQuery bill
|
|
336
|
+
7. **Prefer approximate functions** - `APPROX_COUNT_DISTINCT` is 10x faster
|
|
337
|
+
8. **Design for long-term storage** - Append-only tables get 50% discount after 90 days
|