@cloudstreamsoftware/claude-tools 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -37
- package/agents/INDEX.md +183 -0
- package/agents/architect.md +247 -0
- package/agents/build-error-resolver.md +555 -0
- package/agents/catalyst-deployer.md +132 -0
- package/agents/code-reviewer.md +121 -0
- package/agents/compliance-auditor.md +148 -0
- package/agents/creator-architect.md +395 -0
- package/agents/deluge-reviewer.md +98 -0
- package/agents/doc-updater.md +471 -0
- package/agents/e2e-runner.md +711 -0
- package/agents/planner.md +122 -0
- package/agents/refactor-cleaner.md +309 -0
- package/agents/security-reviewer.md +582 -0
- package/agents/tdd-guide.md +302 -0
- package/bin/cloudstream-setup.js +16 -6
- package/config/versions.json +63 -0
- package/dist/hooks/hooks.json +209 -0
- package/dist/index.js +47 -0
- package/dist/lib/asset-value.js +609 -0
- package/dist/lib/client-manager.js +300 -0
- package/dist/lib/command-matcher.js +242 -0
- package/dist/lib/cross-session-patterns.js +754 -0
- package/dist/lib/intent-classifier.js +1075 -0
- package/dist/lib/package-manager.js +374 -0
- package/dist/lib/recommendation-engine.js +597 -0
- package/dist/lib/session-memory.js +489 -0
- package/dist/lib/skill-effectiveness.js +486 -0
- package/dist/lib/skill-matcher.js +595 -0
- package/dist/lib/tutorial-metrics.js +242 -0
- package/dist/lib/tutorial-progress.js +209 -0
- package/dist/lib/tutorial-renderer.js +431 -0
- package/dist/lib/utils.js +380 -0
- package/dist/lib/verify-formatter.js +143 -0
- package/dist/lib/workflow-state.js +249 -0
- package/hooks/hooks.json +209 -0
- package/package.json +5 -1
- package/scripts/aggregate-sessions.js +290 -0
- package/scripts/branch-name-validator.js +291 -0
- package/scripts/build.js +101 -0
- package/scripts/commands/client-switch.js +231 -0
- package/scripts/deprecate-skill.js +610 -0
- package/scripts/diagnose.js +324 -0
- package/scripts/doc-freshness.js +168 -0
- package/scripts/generate-weekly-digest.js +393 -0
- package/scripts/health-check.js +270 -0
- package/scripts/hooks/credential-check.js +101 -0
- package/scripts/hooks/evaluate-session.js +81 -0
- package/scripts/hooks/pre-compact.js +66 -0
- package/scripts/hooks/prompt-analyzer.js +276 -0
- package/scripts/hooks/prompt-router.js +422 -0
- package/scripts/hooks/quality-gate-enforcer.js +371 -0
- package/scripts/hooks/session-end.js +156 -0
- package/scripts/hooks/session-start.js +195 -0
- package/scripts/hooks/skill-injector.js +333 -0
- package/scripts/hooks/suggest-compact.js +58 -0
- package/scripts/lib/asset-value.js +609 -0
- package/scripts/lib/client-manager.js +300 -0
- package/scripts/lib/command-matcher.js +242 -0
- package/scripts/lib/cross-session-patterns.js +754 -0
- package/scripts/lib/intent-classifier.js +1075 -0
- package/scripts/lib/package-manager.js +374 -0
- package/scripts/lib/recommendation-engine.js +597 -0
- package/scripts/lib/session-memory.js +489 -0
- package/scripts/lib/skill-effectiveness.js +486 -0
- package/scripts/lib/skill-matcher.js +595 -0
- package/scripts/lib/tutorial-metrics.js +242 -0
- package/scripts/lib/tutorial-progress.js +209 -0
- package/scripts/lib/tutorial-renderer.js +431 -0
- package/scripts/lib/utils.js +380 -0
- package/scripts/lib/verify-formatter.js +143 -0
- package/scripts/lib/workflow-state.js +249 -0
- package/scripts/onboard.js +363 -0
- package/scripts/quarterly-report.js +692 -0
- package/scripts/setup-package-manager.js +204 -0
- package/scripts/sync-upstream.js +391 -0
- package/scripts/test.js +108 -0
- package/scripts/tutorial-runner.js +351 -0
- package/scripts/validate-all.js +201 -0
- package/scripts/verifiers/agents.js +245 -0
- package/scripts/verifiers/config.js +186 -0
- package/scripts/verifiers/environment.js +123 -0
- package/scripts/verifiers/hooks.js +188 -0
- package/scripts/verifiers/index.js +38 -0
- package/scripts/verifiers/persistence.js +140 -0
- package/scripts/verifiers/plugin.js +215 -0
- package/scripts/verifiers/skills.js +209 -0
- package/scripts/verify-setup.js +164 -0
- package/skills/INDEX.md +157 -0
- package/skills/backend-patterns/SKILL.md +586 -0
- package/skills/backend-patterns/catalyst-patterns.md +128 -0
- package/skills/bigquery-patterns/SKILL.md +27 -0
- package/skills/bigquery-patterns/performance-optimization.md +518 -0
- package/skills/bigquery-patterns/query-patterns.md +372 -0
- package/skills/bigquery-patterns/schema-design.md +78 -0
- package/skills/cloudstream-project-template/SKILL.md +20 -0
- package/skills/cloudstream-project-template/structure.md +65 -0
- package/skills/coding-standards/SKILL.md +524 -0
- package/skills/coding-standards/deluge-standards.md +83 -0
- package/skills/compliance-patterns/SKILL.md +28 -0
- package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
- package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
- package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
- package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
- package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
- package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
- package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
- package/skills/compliance-patterns/soc2/access-controls.md +344 -0
- package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
- package/skills/compliance-patterns/soc2/change-management.md +403 -0
- package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
- package/skills/consultancy-workflows/SKILL.md +19 -0
- package/skills/consultancy-workflows/client-isolation.md +21 -0
- package/skills/consultancy-workflows/documentation-automation.md +454 -0
- package/skills/consultancy-workflows/handoff-procedures.md +257 -0
- package/skills/consultancy-workflows/knowledge-capture.md +513 -0
- package/skills/consultancy-workflows/time-tracking.md +26 -0
- package/skills/continuous-learning/SKILL.md +84 -0
- package/skills/continuous-learning/config.json +18 -0
- package/skills/continuous-learning/evaluate-session.sh +60 -0
- package/skills/continuous-learning-v2/SKILL.md +126 -0
- package/skills/continuous-learning-v2/config.json +61 -0
- package/skills/frontend-patterns/SKILL.md +635 -0
- package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
- package/skills/gcp-data-engineering/SKILL.md +36 -0
- package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
- package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
- package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
- package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
- package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
- package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
- package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
- package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
- package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
- package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
- package/skills/security-review/SKILL.md +498 -0
- package/skills/security-review/compliance-checklist.md +53 -0
- package/skills/strategic-compact/SKILL.md +67 -0
- package/skills/tdd-workflow/SKILL.md +413 -0
- package/skills/tdd-workflow/zoho-testing.md +124 -0
- package/skills/tutorial/SKILL.md +249 -0
- package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
- package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
- package/skills/tutorial/lessons/01-basics.md +81 -0
- package/skills/tutorial/lessons/02-training.md +86 -0
- package/skills/tutorial/lessons/03-commands.md +109 -0
- package/skills/tutorial/lessons/04-workflows.md +115 -0
- package/skills/tutorial/lessons/05-compliance.md +116 -0
- package/skills/tutorial/lessons/06-zoho.md +121 -0
- package/skills/tutorial/lessons/07-hooks-system.md +277 -0
- package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
- package/skills/tutorial/lessons/09-client-management.md +215 -0
- package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
- package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
- package/skills/tutorial/lessons/12-rules-system.md +326 -0
- package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
- package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
- package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
- package/skills/tutorial/tracks/accelerated/README.md +134 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
- package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
- package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
- package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
- package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
- package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
- package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
- package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
- package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
- package/skills/zoho-patterns/CHANGELOG.md +108 -0
- package/skills/zoho-patterns/SKILL.md +446 -0
- package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
- package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
- package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
- package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
- package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
- package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
- package/skills/zoho-patterns/creator/form-design.md +304 -0
- package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
- package/skills/zoho-patterns/creator/widget-integration.md +306 -0
- package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
- package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
- package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
- package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
- package/skills/zoho-patterns/deluge/error-handling.md +423 -0
- package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
- package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
- package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
- package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
- package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: bigquery-patterns
|
|
3
|
+
description: BigQuery analytical patterns for CloudStream's data engineering work. Covers schema design, query optimization, partitioning, clustering, and cost management.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
status: active
|
|
6
|
+
introduced: 1.0.0
|
|
7
|
+
lastUpdated: 2026-01-25
|
|
8
|
+
activation: BigQuery queries, data warehouse design, analytical reporting, medallion architecture
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# BigQuery Patterns
|
|
12
|
+
|
|
13
|
+
This skill covers BigQuery-specific patterns for CloudStream's medallion architecture (bronze/silver/gold layers), cost optimization, and analytical query patterns.
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
- Designing BigQuery table schemas
|
|
17
|
+
- Writing analytical queries
|
|
18
|
+
- Optimizing query cost and performance
|
|
19
|
+
- Building medallion layer transitions
|
|
20
|
+
- Creating materialized views for Looker
|
|
21
|
+
|
|
22
|
+
## Key Principles
|
|
23
|
+
- Partition by date/timestamp for time-series data
|
|
24
|
+
- Cluster by frequently filtered columns (max 4)
|
|
25
|
+
- Use materialized views for expensive aggregations
|
|
26
|
+
- Prefer STRUCT over JOINs for denormalized data
|
|
27
|
+
- Monitor slot usage and costs with INFORMATION_SCHEMA
|
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
# BigQuery Performance Optimization Patterns
|
|
2
|
+
|
|
3
|
+
> Advanced patterns for slot management, BI Engine acceleration, cost optimization, query tuning, and INFORMATION_SCHEMA monitoring at CloudStream.
|
|
4
|
+
|
|
5
|
+
## Slot Usage Analysis
|
|
6
|
+
|
|
7
|
+
### Understanding Slot Consumption
|
|
8
|
+
|
|
9
|
+
```sql
|
|
10
|
+
-- Identify top slot-consuming queries (last 7 days)
|
|
11
|
+
SELECT
|
|
12
|
+
user_email,
|
|
13
|
+
job_id,
|
|
14
|
+
query,
|
|
15
|
+
total_slot_ms / 1000 AS slot_seconds,
|
|
16
|
+
total_bytes_processed / POW(1024, 3) AS gb_processed,
|
|
17
|
+
TIMESTAMP_DIFF(end_time, start_time, SECOND) AS wall_clock_seconds,
|
|
18
|
+
-- Slot efficiency: high slot usage with low wall time = good parallelism
|
|
19
|
+
SAFE_DIVIDE(total_slot_ms / 1000, TIMESTAMP_DIFF(end_time, start_time, SECOND)) AS avg_slots_used
|
|
20
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
21
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
|
|
22
|
+
AND job_type = 'QUERY'
|
|
23
|
+
AND state = 'DONE'
|
|
24
|
+
AND error_result IS NULL
|
|
25
|
+
ORDER BY total_slot_ms DESC
|
|
26
|
+
LIMIT 25;
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Slot Usage by Time of Day (Capacity Planning)
|
|
30
|
+
|
|
31
|
+
```sql
|
|
32
|
+
-- Hourly slot usage pattern - identifies peak hours for reservation sizing
|
|
33
|
+
SELECT
|
|
34
|
+
EXTRACT(HOUR FROM creation_time) AS hour_of_day,
|
|
35
|
+
EXTRACT(DAYOFWEEK FROM creation_time) AS day_of_week,
|
|
36
|
+
COUNT(*) AS query_count,
|
|
37
|
+
SUM(total_slot_ms) / 1000 / 3600 AS total_slot_hours,
|
|
38
|
+
MAX(total_slot_ms) / 1000 AS max_slot_seconds,
|
|
39
|
+
APPROX_QUANTILES(total_slot_ms / 1000, 100)[OFFSET(95)] AS p95_slot_seconds
|
|
40
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
41
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
|
|
42
|
+
AND job_type = 'QUERY'
|
|
43
|
+
GROUP BY 1, 2
|
|
44
|
+
ORDER BY 1, 2;
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Slot Contention Detection
|
|
48
|
+
|
|
49
|
+
```sql
|
|
50
|
+
-- Detect queries that queued due to slot unavailability
|
|
51
|
+
SELECT
|
|
52
|
+
job_id,
|
|
53
|
+
user_email,
|
|
54
|
+
creation_time,
|
|
55
|
+
start_time,
|
|
56
|
+
TIMESTAMP_DIFF(start_time, creation_time, SECOND) AS queue_wait_seconds,
|
|
57
|
+
total_slot_ms / 1000 AS slot_seconds,
|
|
58
|
+
SUBSTR(query, 1, 200) AS query_preview
|
|
59
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
60
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 24 HOUR)
|
|
61
|
+
AND job_type = 'QUERY'
|
|
62
|
+
AND TIMESTAMP_DIFF(start_time, creation_time, SECOND) > 5 -- Queued > 5s
|
|
63
|
+
ORDER BY queue_wait_seconds DESC
|
|
64
|
+
LIMIT 20;
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## BI Engine for Sub-Second Dashboards
|
|
68
|
+
|
|
69
|
+
### Reservation Setup
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
# terraform/bi_engine.tf
|
|
73
|
+
resource "google_bigquery_bi_reservation" "dashboard_acceleration" {
|
|
74
|
+
location = "US" # Must match dataset location
|
|
75
|
+
size = 3 # 3 GB of BI Engine RAM
|
|
76
|
+
|
|
77
|
+
# Preferred tables are auto-detected, but you can influence priority
|
|
78
|
+
# by ensuring gold layer dashboard tables are queried frequently
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### BI Engine Monitoring
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
-- Check BI Engine acceleration status
|
|
86
|
+
SELECT
|
|
87
|
+
project_id,
|
|
88
|
+
bi_engine_mode, -- 'FULL', 'PARTIAL', 'DISABLED'
|
|
89
|
+
bi_engine_reasons, -- Why partial/disabled
|
|
90
|
+
total_bytes_processed,
|
|
91
|
+
query
|
|
92
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
93
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 24 HOUR)
|
|
94
|
+
AND bi_engine_statistics IS NOT NULL
|
|
95
|
+
ORDER BY creation_time DESC
|
|
96
|
+
LIMIT 50;
|
|
97
|
+
|
|
98
|
+
-- BI Engine cache hit rate
|
|
99
|
+
SELECT
|
|
100
|
+
COUNT(*) AS total_queries,
|
|
101
|
+
COUNTIF(bi_engine_mode = 'FULL') AS fully_accelerated,
|
|
102
|
+
COUNTIF(bi_engine_mode = 'PARTIAL') AS partially_accelerated,
|
|
103
|
+
COUNTIF(bi_engine_mode = 'DISABLED') AS not_accelerated,
|
|
104
|
+
ROUND(COUNTIF(bi_engine_mode = 'FULL') / COUNT(*) * 100, 1) AS full_acceleration_pct
|
|
105
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
106
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
|
|
107
|
+
AND job_type = 'QUERY'
|
|
108
|
+
AND bi_engine_statistics IS NOT NULL;
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Optimizing for BI Engine
|
|
112
|
+
|
|
113
|
+
BI Engine works best with these table patterns:
|
|
114
|
+
|
|
115
|
+
| Optimization | Impact | Example |
|
|
116
|
+
|-------------|--------|---------|
|
|
117
|
+
| Fewer columns in query | Higher cache hit | SELECT only dashboard fields |
|
|
118
|
+
| Smaller tables (< reserved GB) | Full acceleration | Pre-aggregate in gold layer |
|
|
119
|
+
| Avoid complex functions | Better acceleration | Pre-compute in dbt model |
|
|
120
|
+
| Use standard types | Full support | Avoid STRUCT, ARRAY in dashboard tables |
|
|
121
|
+
| Cluster by dashboard filters | Better scanning | CLUSTER BY date, region |
|
|
122
|
+
|
|
123
|
+
```sql
|
|
124
|
+
-- Design gold layer views optimized for BI Engine
|
|
125
|
+
CREATE OR REPLACE VIEW `project.gold.v_bi_revenue_dashboard` AS
|
|
126
|
+
SELECT
|
|
127
|
+
-- Only columns used by Looker Studio dashboard
|
|
128
|
+
revenue_month,
|
|
129
|
+
customer_name,
|
|
130
|
+
region,
|
|
131
|
+
total_revenue,
|
|
132
|
+
invoice_count,
|
|
133
|
+
avg_days_to_pay,
|
|
134
|
+
-- Pre-computed comparisons (avoid window functions in dashboard query)
|
|
135
|
+
revenue_mom_pct,
|
|
136
|
+
revenue_yoy_pct
|
|
137
|
+
FROM `project.gold.fct_monthly_revenue_enriched`
|
|
138
|
+
WHERE revenue_month >= DATE_SUB(CURRENT_DATE(), INTERVAL 24 MONTH);
|
|
139
|
+
-- Limit date range to fit in BI Engine reservation
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
> **COST TIP**: BI Engine costs $36.50/GB/month. A 2 GB reservation (~$73/month) can accelerate most dashboard tables if you pre-aggregate to keep gold tables small. Monitor the acceleration percentage -- below 80% means you need more capacity or smaller tables.
|
|
143
|
+
|
|
144
|
+
## Cost Optimization: Flat-Rate vs On-Demand Breakeven
|
|
145
|
+
|
|
146
|
+
### Breakeven Analysis
|
|
147
|
+
|
|
148
|
+
```sql
|
|
149
|
+
-- Calculate your on-demand cost to compare with flat-rate
|
|
150
|
+
WITH monthly_usage AS (
|
|
151
|
+
SELECT
|
|
152
|
+
DATE_TRUNC(creation_time, MONTH) AS usage_month,
|
|
153
|
+
SUM(total_bytes_billed) / POW(1024, 4) AS tb_billed,
|
|
154
|
+
SUM(total_bytes_billed) / POW(1024, 4) * 6.25 AS on_demand_cost_usd,
|
|
155
|
+
SUM(total_slot_ms) / 1000 / 3600 AS total_slot_hours,
|
|
156
|
+
MAX(total_slot_ms) / 1000 AS max_query_slot_seconds
|
|
157
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
158
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 6 MONTH)
|
|
159
|
+
AND job_type = 'QUERY'
|
|
160
|
+
GROUP BY 1
|
|
161
|
+
)
|
|
162
|
+
SELECT
|
|
163
|
+
usage_month,
|
|
164
|
+
tb_billed,
|
|
165
|
+
on_demand_cost_usd,
|
|
166
|
+
total_slot_hours,
|
|
167
|
+
-- Standard Edition comparison: $0.04/slot-hour, 100 slot baseline
|
|
168
|
+
100 * 730 * 0.04 AS standard_100_slots_monthly, -- $2,920/month
|
|
169
|
+
-- Is on-demand cheaper?
|
|
170
|
+
CASE
|
|
171
|
+
WHEN on_demand_cost_usd < 100 * 730 * 0.04 THEN 'ON_DEMAND_CHEAPER'
|
|
172
|
+
ELSE 'FLAT_RATE_CHEAPER'
|
|
173
|
+
END AS recommendation
|
|
174
|
+
FROM monthly_usage
|
|
175
|
+
ORDER BY usage_month DESC;
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Cost Optimization Decision Tree
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
Monthly BigQuery spend:
|
|
182
|
+
< $500/month → Stay on-demand (not worth flat-rate complexity)
|
|
183
|
+
$500-$3,000 → Consider Standard Edition (100 baseline slots)
|
|
184
|
+
$3,000-$10,000 → Standard Edition with autoscaling
|
|
185
|
+
> $10,000 → Enterprise Edition (governance + autoscaling)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### On-Demand Cost Controls
|
|
189
|
+
|
|
190
|
+
```sql
|
|
191
|
+
-- Set maximum bytes billed per query (prevents runaway queries)
|
|
192
|
+
-- Apply via project-level or user-level settings
|
|
193
|
+
|
|
194
|
+
-- Project-level default (via API/Terraform)
|
|
195
|
+
-- terraform/bigquery.tf
|
|
196
|
+
resource "google_bigquery_reservation_assignment" "default" {
|
|
197
|
+
# Set custom quota per project
|
|
198
|
+
}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
# Set per-query byte limit in application code
|
|
203
|
+
from google.cloud import bigquery
|
|
204
|
+
|
|
205
|
+
client = bigquery.Client()
|
|
206
|
+
job_config = bigquery.QueryJobConfig(
|
|
207
|
+
maximum_bytes_billed=10 * 1024**3 # 10 GB limit per query
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# This query will fail if it would scan > 10 GB
|
|
211
|
+
query_job = client.query(
|
|
212
|
+
"SELECT * FROM `project.silver.large_table`",
|
|
213
|
+
job_config=job_config
|
|
214
|
+
)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## Query Optimization Patterns
|
|
218
|
+
|
|
219
|
+
### Avoid Full Table Scans
|
|
220
|
+
|
|
221
|
+
```sql
|
|
222
|
+
-- BAD: No partition filter, scans entire table
|
|
223
|
+
SELECT COUNT(*) FROM `project.silver.zoho_deals`;
|
|
224
|
+
|
|
225
|
+
-- GOOD: Partition-pruned query
|
|
226
|
+
SELECT COUNT(*)
|
|
227
|
+
FROM `project.silver.zoho_deals`
|
|
228
|
+
WHERE _ingestion_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY);
|
|
229
|
+
|
|
230
|
+
-- Check if partition filter is being used (dry run)
|
|
231
|
+
-- Use the BigQuery UI "Execution Details" or:
|
|
232
|
+
SELECT
|
|
233
|
+
total_bytes_processed,
|
|
234
|
+
total_bytes_billed,
|
|
235
|
+
total_partitions_processed -- Should be < total partitions
|
|
236
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
237
|
+
WHERE job_id = 'your-job-id';
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### Using Clustering Effectively
|
|
241
|
+
|
|
242
|
+
```sql
|
|
243
|
+
-- Clustering eliminates scanning irrelevant data blocks
|
|
244
|
+
-- Column order matters: most selective first
|
|
245
|
+
|
|
246
|
+
-- Table clustered by: owner_id, stage, account_id
|
|
247
|
+
|
|
248
|
+
-- GOOD: Uses first cluster column (best pruning)
|
|
249
|
+
SELECT * FROM `project.silver.zoho_deals`
|
|
250
|
+
WHERE _ingestion_date = CURRENT_DATE()
|
|
251
|
+
AND owner_id = 'user_12345';
|
|
252
|
+
|
|
253
|
+
-- GOOD: Uses first two cluster columns
|
|
254
|
+
SELECT * FROM `project.silver.zoho_deals`
|
|
255
|
+
WHERE _ingestion_date = CURRENT_DATE()
|
|
256
|
+
AND owner_id = 'user_12345'
|
|
257
|
+
AND stage = 'Closed Won';
|
|
258
|
+
|
|
259
|
+
-- LESS EFFECTIVE: Skips first cluster column
|
|
260
|
+
SELECT * FROM `project.silver.zoho_deals`
|
|
261
|
+
WHERE _ingestion_date = CURRENT_DATE()
|
|
262
|
+
AND stage = 'Closed Won'; -- Skips owner_id, less block elimination
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### Common Anti-Patterns
|
|
266
|
+
|
|
267
|
+
```sql
|
|
268
|
+
-- ANTI-PATTERN 1: SELECT * (scans all columns)
|
|
269
|
+
SELECT * FROM `project.gold.fct_deals` WHERE stage = 'Closed Won';
|
|
270
|
+
-- FIX: Select only needed columns
|
|
271
|
+
SELECT deal_id, amount, close_date FROM `project.gold.fct_deals` WHERE stage = 'Closed Won';
|
|
272
|
+
|
|
273
|
+
-- ANTI-PATTERN 2: Cross join / cartesian product
|
|
274
|
+
SELECT * FROM table_a, table_b WHERE table_a.id = table_b.id;
|
|
275
|
+
-- FIX: Use explicit JOIN
|
|
276
|
+
SELECT * FROM table_a JOIN table_b ON table_a.id = table_b.id;
|
|
277
|
+
|
|
278
|
+
-- ANTI-PATTERN 3: Repeated subqueries
|
|
279
|
+
SELECT *, (SELECT AVG(amount) FROM deals) AS avg_amount FROM deals;
|
|
280
|
+
-- FIX: Use window function or CTE
|
|
281
|
+
SELECT *, AVG(amount) OVER () AS avg_amount FROM deals;
|
|
282
|
+
|
|
283
|
+
-- ANTI-PATTERN 4: DISTINCT on large result sets
|
|
284
|
+
SELECT DISTINCT * FROM `project.silver.zoho_deals`;
|
|
285
|
+
-- FIX: Use GROUP BY on specific columns or fix deduplication upstream
|
|
286
|
+
|
|
287
|
+
-- ANTI-PATTERN 5: ORDER BY without LIMIT
|
|
288
|
+
SELECT * FROM `project.gold.fct_invoices` ORDER BY invoice_date DESC;
|
|
289
|
+
-- FIX: Always pair ORDER BY with LIMIT
|
|
290
|
+
SELECT * FROM `project.gold.fct_invoices` ORDER BY invoice_date DESC LIMIT 1000;
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
## Storage Tier Management
|
|
294
|
+
|
|
295
|
+
```sql
|
|
296
|
+
-- Monitor storage tiers and costs
|
|
297
|
+
SELECT
|
|
298
|
+
table_schema AS dataset,
|
|
299
|
+
table_name,
|
|
300
|
+
ROUND(active_physical_bytes / POW(1024, 3), 2) AS active_gb,
|
|
301
|
+
ROUND(long_term_physical_bytes / POW(1024, 3), 2) AS long_term_gb,
|
|
302
|
+
ROUND(active_physical_bytes / POW(1024, 3) * 0.02, 2) AS active_monthly_cost,
|
|
303
|
+
ROUND(long_term_physical_bytes / POW(1024, 3) * 0.01, 2) AS long_term_monthly_cost,
|
|
304
|
+
TIMESTAMP_MILLIS(last_modified_time) AS last_modified
|
|
305
|
+
FROM `project`.`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE
|
|
306
|
+
WHERE total_physical_bytes > 0
|
|
307
|
+
ORDER BY (active_physical_bytes + long_term_physical_bytes) DESC
|
|
308
|
+
LIMIT 50;
|
|
309
|
+
|
|
310
|
+
-- Tables that could benefit from partition expiration
|
|
311
|
+
SELECT
|
|
312
|
+
table_schema,
|
|
313
|
+
table_name,
|
|
314
|
+
ROUND(total_physical_bytes / POW(1024, 3), 2) AS total_gb,
|
|
315
|
+
TIMESTAMP_MILLIS(last_modified_time) AS last_modified,
|
|
316
|
+
CASE
|
|
317
|
+
WHEN TIMESTAMP_MILLIS(last_modified_time) < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 180 DAY)
|
|
318
|
+
THEN 'Consider archiving or expiring old partitions'
|
|
319
|
+
ELSE 'Active'
|
|
320
|
+
END AS recommendation
|
|
321
|
+
FROM `project`.`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE
|
|
322
|
+
WHERE total_physical_bytes > 1 * POW(1024, 3) -- > 1 GB
|
|
323
|
+
ORDER BY total_physical_bytes DESC;
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Storage Best Practices
|
|
327
|
+
|
|
328
|
+
| Strategy | Savings | How |
|
|
329
|
+
|----------|---------|-----|
|
|
330
|
+
| Append-only tables | 50% after 90 days | Avoid UPDATE/DELETE on silver tables |
|
|
331
|
+
| Partition expiration | Variable | Set `partition_expiration_days` on bronze |
|
|
332
|
+
| Compression (Parquet source) | 60-80% | Use Parquet/Avro over CSV for loads |
|
|
333
|
+
| Drop unused tables | 100% | Audit with INFORMATION_SCHEMA quarterly |
|
|
334
|
+
| Time Travel reduction | Up to 7 days of storage | Set `max_time_travel_hours` (default 168h) |
|
|
335
|
+
|
|
336
|
+
```sql
|
|
337
|
+
-- Reduce time travel window for non-critical tables (saves storage)
|
|
338
|
+
ALTER TABLE `project.bronze.zoho_raw_events`
|
|
339
|
+
SET OPTIONS (max_time_travel_hours = 48); -- 2 days instead of default 7
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
## Scheduled Queries for Materialization
|
|
343
|
+
|
|
344
|
+
```sql
|
|
345
|
+
-- Scheduled query: Refresh gold KPI table every hour
|
|
346
|
+
-- Configure via Cloud Console or Terraform
|
|
347
|
+
|
|
348
|
+
-- terraform/scheduled_queries.tf
|
|
349
|
+
resource "google_bigquery_data_transfer_config" "kpi_refresh" {
|
|
350
|
+
display_name = "Hourly KPI Refresh"
|
|
351
|
+
data_source_id = "scheduled_query"
|
|
352
|
+
schedule = "every 1 hours"
|
|
353
|
+
location = "US"
|
|
354
|
+
|
|
355
|
+
params = {
|
|
356
|
+
query = <<-EOT
|
|
357
|
+
CREATE OR REPLACE TABLE `project.gold.kpi_current` AS
|
|
358
|
+
SELECT
|
|
359
|
+
CURRENT_TIMESTAMP() AS refreshed_at,
|
|
360
|
+
(SELECT SUM(amount) FROM `project.silver.zoho_deals`
|
|
361
|
+
WHERE stage = 'Closed Won' AND close_date >= DATE_TRUNC(CURRENT_DATE(), MONTH)
|
|
362
|
+
) AS mtd_closed_won,
|
|
363
|
+
(SELECT COUNT(*) FROM `project.silver.zoho_deals`
|
|
364
|
+
WHERE stage NOT IN ('Closed Won', 'Closed Lost')
|
|
365
|
+
) AS open_deals,
|
|
366
|
+
(SELECT SUM(balance_due) FROM `project.silver.zoho_invoices`
|
|
367
|
+
WHERE status = 'overdue'
|
|
368
|
+
) AS total_overdue_ar
|
|
369
|
+
EOT
|
|
370
|
+
destination_table_name_template = "kpi_current"
|
|
371
|
+
write_disposition = "WRITE_TRUNCATE"
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
## INFORMATION_SCHEMA Monitoring
|
|
377
|
+
|
|
378
|
+
### Comprehensive Monitoring Dashboard Query
|
|
379
|
+
|
|
380
|
+
```sql
|
|
381
|
+
-- Daily monitoring report: cost, performance, anomalies
|
|
382
|
+
WITH daily_stats AS (
|
|
383
|
+
SELECT
|
|
384
|
+
DATE(creation_time) AS query_date,
|
|
385
|
+
COUNT(*) AS total_queries,
|
|
386
|
+
SUM(total_bytes_processed) / POW(1024, 4) AS tb_processed,
|
|
387
|
+
SUM(total_bytes_billed) / POW(1024, 4) * 6.25 AS estimated_cost,
|
|
388
|
+
SUM(total_slot_ms) / 1000 / 3600 AS slot_hours,
|
|
389
|
+
COUNTIF(error_result IS NOT NULL) AS failed_queries,
|
|
390
|
+
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, start_time, SECOND), 100)[OFFSET(95)] AS p95_duration_sec,
|
|
391
|
+
MAX(total_bytes_processed) / POW(1024, 3) AS max_query_gb
|
|
392
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
393
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
|
|
394
|
+
AND job_type = 'QUERY'
|
|
395
|
+
GROUP BY 1
|
|
396
|
+
)
|
|
397
|
+
SELECT
|
|
398
|
+
query_date,
|
|
399
|
+
total_queries,
|
|
400
|
+
ROUND(tb_processed, 3) AS tb_processed,
|
|
401
|
+
ROUND(estimated_cost, 2) AS estimated_cost_usd,
|
|
402
|
+
ROUND(slot_hours, 1) AS slot_hours,
|
|
403
|
+
failed_queries,
|
|
404
|
+
p95_duration_sec,
|
|
405
|
+
ROUND(max_query_gb, 1) AS max_query_gb,
|
|
406
|
+
-- Anomaly detection
|
|
407
|
+
CASE
|
|
408
|
+
WHEN estimated_cost > 2 * AVG(estimated_cost) OVER (
|
|
409
|
+
ORDER BY query_date ROWS BETWEEN 7 PRECEDING AND 1 PRECEDING
|
|
410
|
+
) THEN 'COST_SPIKE'
|
|
411
|
+
WHEN failed_queries > 2 * AVG(failed_queries) OVER (
|
|
412
|
+
ORDER BY query_date ROWS BETWEEN 7 PRECEDING AND 1 PRECEDING
|
|
413
|
+
) THEN 'ERROR_SPIKE'
|
|
414
|
+
ELSE 'NORMAL'
|
|
415
|
+
END AS anomaly_flag
|
|
416
|
+
FROM daily_stats
|
|
417
|
+
ORDER BY query_date DESC;
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
### User Attribution
|
|
421
|
+
|
|
422
|
+
```sql
|
|
423
|
+
-- Who is spending the most? (accountability)
|
|
424
|
+
SELECT
|
|
425
|
+
user_email,
|
|
426
|
+
COUNT(*) AS query_count,
|
|
427
|
+
ROUND(SUM(total_bytes_billed) / POW(1024, 4) * 6.25, 2) AS cost_usd,
|
|
428
|
+
ROUND(SUM(total_bytes_billed) / POW(1024, 3), 1) AS gb_billed,
|
|
429
|
+
ROUND(AVG(total_slot_ms) / 1000, 1) AS avg_slot_seconds,
|
|
430
|
+
COUNTIF(total_bytes_billed > 10 * POW(1024, 3)) AS queries_over_10gb
|
|
431
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
432
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
|
|
433
|
+
AND job_type = 'QUERY'
|
|
434
|
+
GROUP BY 1
|
|
435
|
+
ORDER BY cost_usd DESC
|
|
436
|
+
LIMIT 20;
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
### Table-Level Usage Patterns
|
|
440
|
+
|
|
441
|
+
```sql
|
|
442
|
+
-- Most queried tables (optimization priority)
|
|
443
|
+
SELECT
|
|
444
|
+
REGEXP_EXTRACT(query, r'FROM\s+`([^`]+)`') AS table_referenced,
|
|
445
|
+
COUNT(*) AS query_count,
|
|
446
|
+
SUM(total_bytes_processed) / POW(1024, 3) AS total_gb_scanned,
|
|
447
|
+
AVG(total_bytes_processed) / POW(1024, 3) AS avg_gb_per_query,
|
|
448
|
+
-- Tables with high scan per query = optimization candidates
|
|
449
|
+
CASE
|
|
450
|
+
WHEN AVG(total_bytes_processed) / POW(1024, 3) > 10 THEN 'OPTIMIZE: Add partition/cluster'
|
|
451
|
+
WHEN COUNT(*) > 100 AND AVG(total_bytes_processed) / POW(1024, 3) > 1 THEN 'CONSIDER: Materialized view'
|
|
452
|
+
ELSE 'OK'
|
|
453
|
+
END AS recommendation
|
|
454
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
455
|
+
WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
|
|
456
|
+
AND job_type = 'QUERY'
|
|
457
|
+
AND state = 'DONE'
|
|
458
|
+
GROUP BY 1
|
|
459
|
+
HAVING table_referenced IS NOT NULL
|
|
460
|
+
ORDER BY total_gb_scanned DESC
|
|
461
|
+
LIMIT 30;
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
### Automated Cost Alert Query
|
|
465
|
+
|
|
466
|
+
```python
|
|
467
|
+
# scripts/bq_cost_monitor.py
|
|
468
|
+
"""Run daily to check BigQuery spending and alert if over budget."""
|
|
469
|
+
|
|
470
|
+
from google.cloud import bigquery, monitoring_v3
|
|
471
|
+
from datetime import datetime, timedelta
|
|
472
|
+
|
|
473
|
+
DAILY_BUDGET_USD = 50.0 # Alert threshold
|
|
474
|
+
|
|
475
|
+
def check_daily_cost():
|
|
476
|
+
client = bigquery.Client()
|
|
477
|
+
|
|
478
|
+
query = """
|
|
479
|
+
SELECT
|
|
480
|
+
ROUND(SUM(total_bytes_billed) / POW(1024, 4) * 6.25, 2) AS today_cost_usd
|
|
481
|
+
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
|
482
|
+
WHERE creation_time >= TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY)
|
|
483
|
+
AND job_type = 'QUERY'
|
|
484
|
+
"""
|
|
485
|
+
|
|
486
|
+
result = list(client.query(query).result())[0]
|
|
487
|
+
today_cost = result.today_cost_usd or 0
|
|
488
|
+
|
|
489
|
+
if today_cost > DAILY_BUDGET_USD:
|
|
490
|
+
send_alert(
|
|
491
|
+
f"BigQuery daily cost alert: ${today_cost:.2f} "
|
|
492
|
+
f"(budget: ${DAILY_BUDGET_USD:.2f})"
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
# Also publish as custom metric for dashboards
|
|
496
|
+
publish_cost_metric(today_cost)
|
|
497
|
+
|
|
498
|
+
def send_alert(message):
|
|
499
|
+
"""Send Slack alert via webhook."""
|
|
500
|
+
import requests
|
|
501
|
+
requests.post(
|
|
502
|
+
"https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
|
|
503
|
+
json={"text": f":warning: {message}"}
|
|
504
|
+
)
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
## Best Practices Summary
|
|
508
|
+
|
|
509
|
+
1. **Monitor INFORMATION_SCHEMA weekly** - Catch cost spikes and inefficient queries early
|
|
510
|
+
2. **Reserve BI Engine for dashboard tables only** - Do not waste on ad-hoc queries
|
|
511
|
+
3. **Set maximum_bytes_billed on all service accounts** - Prevent runaway queries
|
|
512
|
+
4. **Use slot analysis before committing to flat-rate** - Ensure breakeven is favorable
|
|
513
|
+
5. **Cluster gold tables by dashboard filter columns** - Matches Looker/Analytics query patterns
|
|
514
|
+
6. **Scheduled query materialization** - Cheaper than users scanning large tables repeatedly
|
|
515
|
+
7. **Audit storage quarterly** - Drop unused tables, reduce time travel on non-critical data
|
|
516
|
+
8. **Alert on anomalies** - Cost spikes usually indicate a broken query or missing partition filter
|
|
517
|
+
|
|
518
|
+
> **WARNING**: INFORMATION_SCHEMA queries themselves consume slots. Avoid running expensive monitoring queries more than once per hour. Cache results in a monitoring table for dashboard use.
|