@cloudstreamsoftware/claude-tools 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/README.md +152 -37
  2. package/agents/INDEX.md +183 -0
  3. package/agents/architect.md +247 -0
  4. package/agents/build-error-resolver.md +555 -0
  5. package/agents/catalyst-deployer.md +132 -0
  6. package/agents/code-reviewer.md +121 -0
  7. package/agents/compliance-auditor.md +148 -0
  8. package/agents/creator-architect.md +395 -0
  9. package/agents/deluge-reviewer.md +98 -0
  10. package/agents/doc-updater.md +471 -0
  11. package/agents/e2e-runner.md +711 -0
  12. package/agents/planner.md +122 -0
  13. package/agents/refactor-cleaner.md +309 -0
  14. package/agents/security-reviewer.md +582 -0
  15. package/agents/tdd-guide.md +302 -0
  16. package/bin/cloudstream-setup.js +16 -6
  17. package/config/versions.json +63 -0
  18. package/dist/hooks/hooks.json +209 -0
  19. package/dist/index.js +47 -0
  20. package/dist/lib/asset-value.js +609 -0
  21. package/dist/lib/client-manager.js +300 -0
  22. package/dist/lib/command-matcher.js +242 -0
  23. package/dist/lib/cross-session-patterns.js +754 -0
  24. package/dist/lib/intent-classifier.js +1075 -0
  25. package/dist/lib/package-manager.js +374 -0
  26. package/dist/lib/recommendation-engine.js +597 -0
  27. package/dist/lib/session-memory.js +489 -0
  28. package/dist/lib/skill-effectiveness.js +486 -0
  29. package/dist/lib/skill-matcher.js +595 -0
  30. package/dist/lib/tutorial-metrics.js +242 -0
  31. package/dist/lib/tutorial-progress.js +209 -0
  32. package/dist/lib/tutorial-renderer.js +431 -0
  33. package/dist/lib/utils.js +380 -0
  34. package/dist/lib/verify-formatter.js +143 -0
  35. package/dist/lib/workflow-state.js +249 -0
  36. package/hooks/hooks.json +209 -0
  37. package/package.json +5 -1
  38. package/scripts/aggregate-sessions.js +290 -0
  39. package/scripts/branch-name-validator.js +291 -0
  40. package/scripts/build.js +101 -0
  41. package/scripts/commands/client-switch.js +231 -0
  42. package/scripts/deprecate-skill.js +610 -0
  43. package/scripts/diagnose.js +324 -0
  44. package/scripts/doc-freshness.js +168 -0
  45. package/scripts/generate-weekly-digest.js +393 -0
  46. package/scripts/health-check.js +270 -0
  47. package/scripts/hooks/credential-check.js +101 -0
  48. package/scripts/hooks/evaluate-session.js +81 -0
  49. package/scripts/hooks/pre-compact.js +66 -0
  50. package/scripts/hooks/prompt-analyzer.js +276 -0
  51. package/scripts/hooks/prompt-router.js +422 -0
  52. package/scripts/hooks/quality-gate-enforcer.js +371 -0
  53. package/scripts/hooks/session-end.js +156 -0
  54. package/scripts/hooks/session-start.js +195 -0
  55. package/scripts/hooks/skill-injector.js +333 -0
  56. package/scripts/hooks/suggest-compact.js +58 -0
  57. package/scripts/lib/asset-value.js +609 -0
  58. package/scripts/lib/client-manager.js +300 -0
  59. package/scripts/lib/command-matcher.js +242 -0
  60. package/scripts/lib/cross-session-patterns.js +754 -0
  61. package/scripts/lib/intent-classifier.js +1075 -0
  62. package/scripts/lib/package-manager.js +374 -0
  63. package/scripts/lib/recommendation-engine.js +597 -0
  64. package/scripts/lib/session-memory.js +489 -0
  65. package/scripts/lib/skill-effectiveness.js +486 -0
  66. package/scripts/lib/skill-matcher.js +595 -0
  67. package/scripts/lib/tutorial-metrics.js +242 -0
  68. package/scripts/lib/tutorial-progress.js +209 -0
  69. package/scripts/lib/tutorial-renderer.js +431 -0
  70. package/scripts/lib/utils.js +380 -0
  71. package/scripts/lib/verify-formatter.js +143 -0
  72. package/scripts/lib/workflow-state.js +249 -0
  73. package/scripts/onboard.js +363 -0
  74. package/scripts/quarterly-report.js +692 -0
  75. package/scripts/setup-package-manager.js +204 -0
  76. package/scripts/sync-upstream.js +391 -0
  77. package/scripts/test.js +108 -0
  78. package/scripts/tutorial-runner.js +351 -0
  79. package/scripts/validate-all.js +201 -0
  80. package/scripts/verifiers/agents.js +245 -0
  81. package/scripts/verifiers/config.js +186 -0
  82. package/scripts/verifiers/environment.js +123 -0
  83. package/scripts/verifiers/hooks.js +188 -0
  84. package/scripts/verifiers/index.js +38 -0
  85. package/scripts/verifiers/persistence.js +140 -0
  86. package/scripts/verifiers/plugin.js +215 -0
  87. package/scripts/verifiers/skills.js +209 -0
  88. package/scripts/verify-setup.js +164 -0
  89. package/skills/INDEX.md +157 -0
  90. package/skills/backend-patterns/SKILL.md +586 -0
  91. package/skills/backend-patterns/catalyst-patterns.md +128 -0
  92. package/skills/bigquery-patterns/SKILL.md +27 -0
  93. package/skills/bigquery-patterns/performance-optimization.md +518 -0
  94. package/skills/bigquery-patterns/query-patterns.md +372 -0
  95. package/skills/bigquery-patterns/schema-design.md +78 -0
  96. package/skills/cloudstream-project-template/SKILL.md +20 -0
  97. package/skills/cloudstream-project-template/structure.md +65 -0
  98. package/skills/coding-standards/SKILL.md +524 -0
  99. package/skills/coding-standards/deluge-standards.md +83 -0
  100. package/skills/compliance-patterns/SKILL.md +28 -0
  101. package/skills/compliance-patterns/hipaa/audit-requirements.md +251 -0
  102. package/skills/compliance-patterns/hipaa/baa-process.md +298 -0
  103. package/skills/compliance-patterns/hipaa/data-archival-strategy.md +387 -0
  104. package/skills/compliance-patterns/hipaa/phi-handling.md +52 -0
  105. package/skills/compliance-patterns/pci-dss/saq-a-requirements.md +307 -0
  106. package/skills/compliance-patterns/pci-dss/tokenization-patterns.md +382 -0
  107. package/skills/compliance-patterns/pci-dss/zoho-checkout-patterns.md +56 -0
  108. package/skills/compliance-patterns/soc2/access-controls.md +344 -0
  109. package/skills/compliance-patterns/soc2/audit-logging.md +458 -0
  110. package/skills/compliance-patterns/soc2/change-management.md +403 -0
  111. package/skills/compliance-patterns/soc2/deluge-execution-logging.md +407 -0
  112. package/skills/consultancy-workflows/SKILL.md +19 -0
  113. package/skills/consultancy-workflows/client-isolation.md +21 -0
  114. package/skills/consultancy-workflows/documentation-automation.md +454 -0
  115. package/skills/consultancy-workflows/handoff-procedures.md +257 -0
  116. package/skills/consultancy-workflows/knowledge-capture.md +513 -0
  117. package/skills/consultancy-workflows/time-tracking.md +26 -0
  118. package/skills/continuous-learning/SKILL.md +84 -0
  119. package/skills/continuous-learning/config.json +18 -0
  120. package/skills/continuous-learning/evaluate-session.sh +60 -0
  121. package/skills/continuous-learning-v2/SKILL.md +126 -0
  122. package/skills/continuous-learning-v2/config.json +61 -0
  123. package/skills/frontend-patterns/SKILL.md +635 -0
  124. package/skills/frontend-patterns/zoho-widget-patterns.md +103 -0
  125. package/skills/gcp-data-engineering/SKILL.md +36 -0
  126. package/skills/gcp-data-engineering/bigquery/performance-optimization.md +337 -0
  127. package/skills/gcp-data-engineering/dataflow/error-handling.md +496 -0
  128. package/skills/gcp-data-engineering/dataflow/pipeline-patterns.md +444 -0
  129. package/skills/gcp-data-engineering/dbt/model-organization.md +63 -0
  130. package/skills/gcp-data-engineering/dbt/testing-patterns.md +503 -0
  131. package/skills/gcp-data-engineering/medallion-architecture/bronze-layer.md +60 -0
  132. package/skills/gcp-data-engineering/medallion-architecture/gold-layer.md +311 -0
  133. package/skills/gcp-data-engineering/medallion-architecture/layer-transitions.md +517 -0
  134. package/skills/gcp-data-engineering/medallion-architecture/silver-layer.md +305 -0
  135. package/skills/gcp-data-engineering/zoho-to-gcp/data-extraction.md +543 -0
  136. package/skills/gcp-data-engineering/zoho-to-gcp/real-time-vs-batch.md +337 -0
  137. package/skills/security-review/SKILL.md +498 -0
  138. package/skills/security-review/compliance-checklist.md +53 -0
  139. package/skills/strategic-compact/SKILL.md +67 -0
  140. package/skills/tdd-workflow/SKILL.md +413 -0
  141. package/skills/tdd-workflow/zoho-testing.md +124 -0
  142. package/skills/tutorial/SKILL.md +249 -0
  143. package/skills/tutorial/docs/ACCESSIBILITY.md +169 -0
  144. package/skills/tutorial/lessons/00-philosophy-and-workflow.md +198 -0
  145. package/skills/tutorial/lessons/01-basics.md +81 -0
  146. package/skills/tutorial/lessons/02-training.md +86 -0
  147. package/skills/tutorial/lessons/03-commands.md +109 -0
  148. package/skills/tutorial/lessons/04-workflows.md +115 -0
  149. package/skills/tutorial/lessons/05-compliance.md +116 -0
  150. package/skills/tutorial/lessons/06-zoho.md +121 -0
  151. package/skills/tutorial/lessons/07-hooks-system.md +277 -0
  152. package/skills/tutorial/lessons/08-mcp-servers.md +316 -0
  153. package/skills/tutorial/lessons/09-client-management.md +215 -0
  154. package/skills/tutorial/lessons/10-testing-e2e.md +260 -0
  155. package/skills/tutorial/lessons/11-skills-deep-dive.md +272 -0
  156. package/skills/tutorial/lessons/12-rules-system.md +326 -0
  157. package/skills/tutorial/lessons/13-golden-standard-graduation.md +213 -0
  158. package/skills/tutorial/lessons/14-fork-setup-and-sync.md +312 -0
  159. package/skills/tutorial/lessons/15-living-examples-system.md +221 -0
  160. package/skills/tutorial/tracks/accelerated/README.md +134 -0
  161. package/skills/tutorial/tracks/accelerated/assessment/checkpoint-1.md +161 -0
  162. package/skills/tutorial/tracks/accelerated/assessment/checkpoint-2.md +175 -0
  163. package/skills/tutorial/tracks/accelerated/day-1-core-concepts.md +234 -0
  164. package/skills/tutorial/tracks/accelerated/day-2-essential-commands.md +270 -0
  165. package/skills/tutorial/tracks/accelerated/day-3-workflow-mastery.md +305 -0
  166. package/skills/tutorial/tracks/accelerated/day-4-compliance-zoho.md +304 -0
  167. package/skills/tutorial/tracks/accelerated/day-5-hooks-skills.md +344 -0
  168. package/skills/tutorial/tracks/accelerated/day-6-client-testing.md +386 -0
  169. package/skills/tutorial/tracks/accelerated/day-7-graduation.md +369 -0
  170. package/skills/zoho-patterns/CHANGELOG.md +108 -0
  171. package/skills/zoho-patterns/SKILL.md +446 -0
  172. package/skills/zoho-patterns/analytics/dashboard-patterns.md +352 -0
  173. package/skills/zoho-patterns/analytics/zoho-to-bigquery-pipeline.md +427 -0
  174. package/skills/zoho-patterns/catalyst/appsail-deployment.md +349 -0
  175. package/skills/zoho-patterns/catalyst/context-close-patterns.md +354 -0
  176. package/skills/zoho-patterns/catalyst/cron-batch-processing.md +374 -0
  177. package/skills/zoho-patterns/catalyst/function-patterns.md +439 -0
  178. package/skills/zoho-patterns/creator/form-design.md +304 -0
  179. package/skills/zoho-patterns/creator/publish-api-patterns.md +313 -0
  180. package/skills/zoho-patterns/creator/widget-integration.md +306 -0
  181. package/skills/zoho-patterns/creator/workflow-automation.md +253 -0
  182. package/skills/zoho-patterns/deluge/api-patterns.md +468 -0
  183. package/skills/zoho-patterns/deluge/batch-processing.md +403 -0
  184. package/skills/zoho-patterns/deluge/cross-app-integration.md +356 -0
  185. package/skills/zoho-patterns/deluge/error-handling.md +423 -0
  186. package/skills/zoho-patterns/deluge/syntax-reference.md +65 -0
  187. package/skills/zoho-patterns/integration/cors-proxy-architecture.md +426 -0
  188. package/skills/zoho-patterns/integration/crm-books-native-sync.md +277 -0
  189. package/skills/zoho-patterns/integration/oauth-token-management.md +461 -0
  190. package/skills/zoho-patterns/integration/zoho-flow-patterns.md +334 -0
@@ -0,0 +1,27 @@
1
+ ---
2
+ name: bigquery-patterns
3
+ description: BigQuery analytical patterns for CloudStream's data engineering work. Covers schema design, query optimization, partitioning, clustering, and cost management.
4
+ version: 1.0.0
5
+ status: active
6
+ introduced: 1.0.0
7
+ lastUpdated: 2026-01-25
8
+ activation: BigQuery queries, data warehouse design, analytical reporting, medallion architecture
9
+ ---
10
+
11
+ # BigQuery Patterns
12
+
13
+ This skill covers BigQuery-specific patterns for CloudStream's medallion architecture (bronze/silver/gold layers), cost optimization, and analytical query patterns.
14
+
15
+ ## When to Use
16
+ - Designing BigQuery table schemas
17
+ - Writing analytical queries
18
+ - Optimizing query cost and performance
19
+ - Building medallion layer transitions
20
+ - Creating materialized views for Looker
21
+
22
+ ## Key Principles
23
+ - Partition by date/timestamp for time-series data
24
+ - Cluster by frequently filtered columns (max 4)
25
+ - Use materialized views for expensive aggregations
26
+ - Prefer STRUCT over JOINs for denormalized data
27
+ - Monitor slot usage and costs with INFORMATION_SCHEMA
@@ -0,0 +1,518 @@
1
+ # BigQuery Performance Optimization Patterns
2
+
3
+ > Advanced patterns for slot management, BI Engine acceleration, cost optimization, query tuning, and INFORMATION_SCHEMA monitoring at CloudStream.
4
+
5
+ ## Slot Usage Analysis
6
+
7
+ ### Understanding Slot Consumption
8
+
9
+ ```sql
10
+ -- Identify top slot-consuming queries (last 7 days)
11
+ SELECT
12
+ user_email,
13
+ job_id,
14
+ query,
15
+ total_slot_ms / 1000 AS slot_seconds,
16
+ total_bytes_processed / POW(1024, 3) AS gb_processed,
17
+ TIMESTAMP_DIFF(end_time, start_time, SECOND) AS wall_clock_seconds,
18
+ -- Slot efficiency: high slot usage with low wall time = good parallelism
19
+ SAFE_DIVIDE(total_slot_ms / 1000, TIMESTAMP_DIFF(end_time, start_time, SECOND)) AS avg_slots_used
20
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
21
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
22
+ AND job_type = 'QUERY'
23
+ AND state = 'DONE'
24
+ AND error_result IS NULL
25
+ ORDER BY total_slot_ms DESC
26
+ LIMIT 25;
27
+ ```
28
+
29
+ ### Slot Usage by Time of Day (Capacity Planning)
30
+
31
+ ```sql
32
+ -- Hourly slot usage pattern - identifies peak hours for reservation sizing
33
+ SELECT
34
+ EXTRACT(HOUR FROM creation_time) AS hour_of_day,
35
+ EXTRACT(DAYOFWEEK FROM creation_time) AS day_of_week,
36
+ COUNT(*) AS query_count,
37
+ SUM(total_slot_ms) / 1000 / 3600 AS total_slot_hours,
38
+ MAX(total_slot_ms) / 1000 AS max_slot_seconds,
39
+ APPROX_QUANTILES(total_slot_ms / 1000, 100)[OFFSET(95)] AS p95_slot_seconds
40
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
41
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
42
+ AND job_type = 'QUERY'
43
+ GROUP BY 1, 2
44
+ ORDER BY 1, 2;
45
+ ```
46
+
47
+ ### Slot Contention Detection
48
+
49
+ ```sql
50
+ -- Detect queries that queued due to slot unavailability
51
+ SELECT
52
+ job_id,
53
+ user_email,
54
+ creation_time,
55
+ start_time,
56
+ TIMESTAMP_DIFF(start_time, creation_time, SECOND) AS queue_wait_seconds,
57
+ total_slot_ms / 1000 AS slot_seconds,
58
+ SUBSTR(query, 1, 200) AS query_preview
59
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
60
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 24 HOUR)
61
+ AND job_type = 'QUERY'
62
+ AND TIMESTAMP_DIFF(start_time, creation_time, SECOND) > 5 -- Queued > 5s
63
+ ORDER BY queue_wait_seconds DESC
64
+ LIMIT 20;
65
+ ```
66
+
67
+ ## BI Engine for Sub-Second Dashboards
68
+
69
+ ### Reservation Setup
70
+
71
+ ```python
72
+ # terraform/bi_engine.tf
73
+ resource "google_bigquery_bi_reservation" "dashboard_acceleration" {
74
+ location = "US" # Must match dataset location
75
+ size = 3 # 3 GB of BI Engine RAM
76
+
77
+ # Preferred tables are auto-detected, but you can influence priority
78
+ # by ensuring gold layer dashboard tables are queried frequently
79
+ }
80
+ ```
81
+
82
+ ### BI Engine Monitoring
83
+
84
+ ```sql
85
+ -- Check BI Engine acceleration status
86
+ SELECT
87
+ project_id,
88
+ bi_engine_mode, -- 'FULL', 'PARTIAL', 'DISABLED'
89
+ bi_engine_reasons, -- Why partial/disabled
90
+ total_bytes_processed,
91
+ query
92
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
93
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 24 HOUR)
94
+ AND bi_engine_statistics IS NOT NULL
95
+ ORDER BY creation_time DESC
96
+ LIMIT 50;
97
+
98
+ -- BI Engine cache hit rate
99
+ SELECT
100
+ COUNT(*) AS total_queries,
101
+ COUNTIF(bi_engine_mode = 'FULL') AS fully_accelerated,
102
+ COUNTIF(bi_engine_mode = 'PARTIAL') AS partially_accelerated,
103
+ COUNTIF(bi_engine_mode = 'DISABLED') AS not_accelerated,
104
+ ROUND(COUNTIF(bi_engine_mode = 'FULL') / COUNT(*) * 100, 1) AS full_acceleration_pct
105
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
106
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
107
+ AND job_type = 'QUERY'
108
+ AND bi_engine_statistics IS NOT NULL;
109
+ ```
110
+
111
+ ### Optimizing for BI Engine
112
+
113
+ BI Engine works best with these table patterns:
114
+
115
+ | Optimization | Impact | Example |
116
+ |-------------|--------|---------|
117
+ | Fewer columns in query | Higher cache hit | SELECT only dashboard fields |
118
+ | Smaller tables (< reserved GB) | Full acceleration | Pre-aggregate in gold layer |
119
+ | Avoid complex functions | Better acceleration | Pre-compute in dbt model |
120
+ | Use standard types | Full support | Avoid STRUCT, ARRAY in dashboard tables |
121
+ | Cluster by dashboard filters | Better scanning | CLUSTER BY date, region |
122
+
123
+ ```sql
124
+ -- Design gold layer views optimized for BI Engine
125
+ CREATE OR REPLACE VIEW `project.gold.v_bi_revenue_dashboard` AS
126
+ SELECT
127
+ -- Only columns used by Looker Studio dashboard
128
+ revenue_month,
129
+ customer_name,
130
+ region,
131
+ total_revenue,
132
+ invoice_count,
133
+ avg_days_to_pay,
134
+ -- Pre-computed comparisons (avoid window functions in dashboard query)
135
+ revenue_mom_pct,
136
+ revenue_yoy_pct
137
+ FROM `project.gold.fct_monthly_revenue_enriched`
138
+ WHERE revenue_month >= DATE_SUB(CURRENT_DATE(), INTERVAL 24 MONTH);
139
+ -- Limit date range to fit in BI Engine reservation
140
+ ```
141
+
142
+ > **COST TIP**: BI Engine costs $36.50/GB/month. A 2 GB reservation (~$73/month) can accelerate most dashboard tables if you pre-aggregate to keep gold tables small. Monitor the acceleration percentage -- below 80% means you need more capacity or smaller tables.
143
+
144
+ ## Cost Optimization: Flat-Rate vs On-Demand Breakeven
145
+
146
+ ### Breakeven Analysis
147
+
148
+ ```sql
149
+ -- Calculate your on-demand cost to compare with flat-rate
150
+ WITH monthly_usage AS (
151
+ SELECT
152
+ DATE_TRUNC(creation_time, MONTH) AS usage_month,
153
+ SUM(total_bytes_billed) / POW(1024, 4) AS tb_billed,
154
+ SUM(total_bytes_billed) / POW(1024, 4) * 6.25 AS on_demand_cost_usd,
155
+ SUM(total_slot_ms) / 1000 / 3600 AS total_slot_hours,
156
+ MAX(total_slot_ms) / 1000 AS max_query_slot_seconds
157
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
158
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 6 MONTH)
159
+ AND job_type = 'QUERY'
160
+ GROUP BY 1
161
+ )
162
+ SELECT
163
+ usage_month,
164
+ tb_billed,
165
+ on_demand_cost_usd,
166
+ total_slot_hours,
167
+ -- Standard Edition comparison: $0.04/slot-hour, 100 slot baseline
168
+ 100 * 730 * 0.04 AS standard_100_slots_monthly, -- $2,920/month
169
+ -- Is on-demand cheaper?
170
+ CASE
171
+ WHEN on_demand_cost_usd < 100 * 730 * 0.04 THEN 'ON_DEMAND_CHEAPER'
172
+ ELSE 'FLAT_RATE_CHEAPER'
173
+ END AS recommendation
174
+ FROM monthly_usage
175
+ ORDER BY usage_month DESC;
176
+ ```
177
+
178
+ ### Cost Optimization Decision Tree
179
+
180
+ ```
181
+ Monthly BigQuery spend:
182
+ < $500/month → Stay on-demand (not worth flat-rate complexity)
183
+ $500-$3,000 → Consider Standard Edition (100 baseline slots)
184
+ $3,000-$10,000 → Standard Edition with autoscaling
185
+ > $10,000 → Enterprise Edition (governance + autoscaling)
186
+ ```
187
+
188
+ ### On-Demand Cost Controls
189
+
190
+ ```sql
191
+ -- Set maximum bytes billed per query (prevents runaway queries)
192
+ -- Apply via project-level or user-level settings
193
+
194
+ -- Project-level default (via API/Terraform)
195
+ -- terraform/bigquery.tf
196
+ resource "google_bigquery_reservation_assignment" "default" {
197
+ # Set custom quota per project
198
+ }
199
+ ```
200
+
201
+ ```python
202
+ # Set per-query byte limit in application code
203
+ from google.cloud import bigquery
204
+
205
+ client = bigquery.Client()
206
+ job_config = bigquery.QueryJobConfig(
207
+ maximum_bytes_billed=10 * 1024**3 # 10 GB limit per query
208
+ )
209
+
210
+ # This query will fail if it would scan > 10 GB
211
+ query_job = client.query(
212
+ "SELECT * FROM `project.silver.large_table`",
213
+ job_config=job_config
214
+ )
215
+ ```
216
+
217
+ ## Query Optimization Patterns
218
+
219
+ ### Avoid Full Table Scans
220
+
221
+ ```sql
222
+ -- BAD: No partition filter, scans entire table
223
+ SELECT COUNT(*) FROM `project.silver.zoho_deals`;
224
+
225
+ -- GOOD: Partition-pruned query
226
+ SELECT COUNT(*)
227
+ FROM `project.silver.zoho_deals`
228
+ WHERE _ingestion_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY);
229
+
230
+ -- Check if partition filter is being used (dry run)
231
+ -- Use the BigQuery UI "Execution Details" or:
232
+ SELECT
233
+ total_bytes_processed,
234
+ total_bytes_billed,
235
+ total_partitions_processed -- Should be < total partitions
236
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
237
+ WHERE job_id = 'your-job-id';
238
+ ```
239
+
240
+ ### Using Clustering Effectively
241
+
242
+ ```sql
243
+ -- Clustering eliminates scanning irrelevant data blocks
244
+ -- Column order matters: most selective first
245
+
246
+ -- Table clustered by: owner_id, stage, account_id
247
+
248
+ -- GOOD: Uses first cluster column (best pruning)
249
+ SELECT * FROM `project.silver.zoho_deals`
250
+ WHERE _ingestion_date = CURRENT_DATE()
251
+ AND owner_id = 'user_12345';
252
+
253
+ -- GOOD: Uses first two cluster columns
254
+ SELECT * FROM `project.silver.zoho_deals`
255
+ WHERE _ingestion_date = CURRENT_DATE()
256
+ AND owner_id = 'user_12345'
257
+ AND stage = 'Closed Won';
258
+
259
+ -- LESS EFFECTIVE: Skips first cluster column
260
+ SELECT * FROM `project.silver.zoho_deals`
261
+ WHERE _ingestion_date = CURRENT_DATE()
262
+ AND stage = 'Closed Won'; -- Skips owner_id, less block elimination
263
+ ```
264
+
265
+ ### Common Anti-Patterns
266
+
267
+ ```sql
268
+ -- ANTI-PATTERN 1: SELECT * (scans all columns)
269
+ SELECT * FROM `project.gold.fct_deals` WHERE stage = 'Closed Won';
270
+ -- FIX: Select only needed columns
271
+ SELECT deal_id, amount, close_date FROM `project.gold.fct_deals` WHERE stage = 'Closed Won';
272
+
273
+ -- ANTI-PATTERN 2: Cross join / cartesian product
274
+ SELECT * FROM table_a, table_b WHERE table_a.id = table_b.id;
275
+ -- FIX: Use explicit JOIN
276
+ SELECT * FROM table_a JOIN table_b ON table_a.id = table_b.id;
277
+
278
+ -- ANTI-PATTERN 3: Repeated subqueries
279
+ SELECT *, (SELECT AVG(amount) FROM deals) AS avg_amount FROM deals;
280
+ -- FIX: Use window function or CTE
281
+ SELECT *, AVG(amount) OVER () AS avg_amount FROM deals;
282
+
283
+ -- ANTI-PATTERN 4: DISTINCT on large result sets
284
+ SELECT DISTINCT * FROM `project.silver.zoho_deals`;
285
+ -- FIX: Use GROUP BY on specific columns or fix deduplication upstream
286
+
287
+ -- ANTI-PATTERN 5: ORDER BY without LIMIT
288
+ SELECT * FROM `project.gold.fct_invoices` ORDER BY invoice_date DESC;
289
+ -- FIX: Always pair ORDER BY with LIMIT
290
+ SELECT * FROM `project.gold.fct_invoices` ORDER BY invoice_date DESC LIMIT 1000;
291
+ ```
292
+
293
+ ## Storage Tier Management
294
+
295
+ ```sql
296
+ -- Monitor storage tiers and costs
297
+ SELECT
298
+ table_schema AS dataset,
299
+ table_name,
300
+ ROUND(active_physical_bytes / POW(1024, 3), 2) AS active_gb,
301
+ ROUND(long_term_physical_bytes / POW(1024, 3), 2) AS long_term_gb,
302
+ ROUND(active_physical_bytes / POW(1024, 3) * 0.02, 2) AS active_monthly_cost,
303
+ ROUND(long_term_physical_bytes / POW(1024, 3) * 0.01, 2) AS long_term_monthly_cost,
304
+ TIMESTAMP_MILLIS(last_modified_time) AS last_modified
305
+ FROM `project`.`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE
306
+ WHERE total_physical_bytes > 0
307
+ ORDER BY (active_physical_bytes + long_term_physical_bytes) DESC
308
+ LIMIT 50;
309
+
310
+ -- Tables that could benefit from partition expiration
311
+ SELECT
312
+ table_schema,
313
+ table_name,
314
+ ROUND(total_physical_bytes / POW(1024, 3), 2) AS total_gb,
315
+ TIMESTAMP_MILLIS(last_modified_time) AS last_modified,
316
+ CASE
317
+ WHEN TIMESTAMP_MILLIS(last_modified_time) < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 180 DAY)
318
+ THEN 'Consider archiving or expiring old partitions'
319
+ ELSE 'Active'
320
+ END AS recommendation
321
+ FROM `project`.`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE
322
+ WHERE total_physical_bytes > 1 * POW(1024, 3) -- > 1 GB
323
+ ORDER BY total_physical_bytes DESC;
324
+ ```
325
+
326
+ ### Storage Best Practices
327
+
328
+ | Strategy | Savings | How |
329
+ |----------|---------|-----|
330
+ | Append-only tables | 50% after 90 days | Avoid UPDATE/DELETE on silver tables |
331
+ | Partition expiration | Variable | Set `partition_expiration_days` on bronze |
332
+ | Compression (Parquet source) | 60-80% | Use Parquet/Avro over CSV for loads |
333
+ | Drop unused tables | 100% | Audit with INFORMATION_SCHEMA quarterly |
334
+ | Time Travel reduction | Up to 7 days of storage | Set `max_time_travel_hours` (default 168h) |
335
+
336
+ ```sql
337
+ -- Reduce time travel window for non-critical tables (saves storage)
338
+ ALTER TABLE `project.bronze.zoho_raw_events`
339
+ SET OPTIONS (max_time_travel_hours = 48); -- 2 days instead of default 7
340
+ ```
341
+
342
+ ## Scheduled Queries for Materialization
343
+
344
+ ```sql
345
+ -- Scheduled query: Refresh gold KPI table every hour
346
+ -- Configure via Cloud Console or Terraform
347
+
348
+ -- terraform/scheduled_queries.tf
349
+ resource "google_bigquery_data_transfer_config" "kpi_refresh" {
350
+ display_name = "Hourly KPI Refresh"
351
+ data_source_id = "scheduled_query"
352
+ schedule = "every 1 hours"
353
+ location = "US"
354
+
355
+ params = {
356
+ query = <<-EOT
357
+ CREATE OR REPLACE TABLE `project.gold.kpi_current` AS
358
+ SELECT
359
+ CURRENT_TIMESTAMP() AS refreshed_at,
360
+ (SELECT SUM(amount) FROM `project.silver.zoho_deals`
361
+ WHERE stage = 'Closed Won' AND close_date >= DATE_TRUNC(CURRENT_DATE(), MONTH)
362
+ ) AS mtd_closed_won,
363
+ (SELECT COUNT(*) FROM `project.silver.zoho_deals`
364
+ WHERE stage NOT IN ('Closed Won', 'Closed Lost')
365
+ ) AS open_deals,
366
+ (SELECT SUM(balance_due) FROM `project.silver.zoho_invoices`
367
+ WHERE status = 'overdue'
368
+ ) AS total_overdue_ar
369
+ EOT
370
+ destination_table_name_template = "kpi_current"
371
+ write_disposition = "WRITE_TRUNCATE"
372
+ }
373
+ }
374
+ ```
375
+
376
+ ## INFORMATION_SCHEMA Monitoring
377
+
378
+ ### Comprehensive Monitoring Dashboard Query
379
+
380
+ ```sql
381
+ -- Daily monitoring report: cost, performance, anomalies
382
+ WITH daily_stats AS (
383
+ SELECT
384
+ DATE(creation_time) AS query_date,
385
+ COUNT(*) AS total_queries,
386
+ SUM(total_bytes_processed) / POW(1024, 4) AS tb_processed,
387
+ SUM(total_bytes_billed) / POW(1024, 4) * 6.25 AS estimated_cost,
388
+ SUM(total_slot_ms) / 1000 / 3600 AS slot_hours,
389
+ COUNTIF(error_result IS NOT NULL) AS failed_queries,
390
+ APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, start_time, SECOND), 100)[OFFSET(95)] AS p95_duration_sec,
391
+ MAX(total_bytes_processed) / POW(1024, 3) AS max_query_gb
392
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
393
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
394
+ AND job_type = 'QUERY'
395
+ GROUP BY 1
396
+ )
397
+ SELECT
398
+ query_date,
399
+ total_queries,
400
+ ROUND(tb_processed, 3) AS tb_processed,
401
+ ROUND(estimated_cost, 2) AS estimated_cost_usd,
402
+ ROUND(slot_hours, 1) AS slot_hours,
403
+ failed_queries,
404
+ p95_duration_sec,
405
+ ROUND(max_query_gb, 1) AS max_query_gb,
406
+ -- Anomaly detection
407
+ CASE
408
+ WHEN estimated_cost > 2 * AVG(estimated_cost) OVER (
409
+ ORDER BY query_date ROWS BETWEEN 7 PRECEDING AND 1 PRECEDING
410
+ ) THEN 'COST_SPIKE'
411
+ WHEN failed_queries > 2 * AVG(failed_queries) OVER (
412
+ ORDER BY query_date ROWS BETWEEN 7 PRECEDING AND 1 PRECEDING
413
+ ) THEN 'ERROR_SPIKE'
414
+ ELSE 'NORMAL'
415
+ END AS anomaly_flag
416
+ FROM daily_stats
417
+ ORDER BY query_date DESC;
418
+ ```
419
+
420
+ ### User Attribution
421
+
422
+ ```sql
423
+ -- Who is spending the most? (accountability)
424
+ SELECT
425
+ user_email,
426
+ COUNT(*) AS query_count,
427
+ ROUND(SUM(total_bytes_billed) / POW(1024, 4) * 6.25, 2) AS cost_usd,
428
+ ROUND(SUM(total_bytes_billed) / POW(1024, 3), 1) AS gb_billed,
429
+ ROUND(AVG(total_slot_ms) / 1000, 1) AS avg_slot_seconds,
430
+ COUNTIF(total_bytes_billed > 10 * POW(1024, 3)) AS queries_over_10gb
431
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
432
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
433
+ AND job_type = 'QUERY'
434
+ GROUP BY 1
435
+ ORDER BY cost_usd DESC
436
+ LIMIT 20;
437
+ ```
438
+
439
+ ### Table-Level Usage Patterns
440
+
441
+ ```sql
442
+ -- Most queried tables (optimization priority)
443
+ SELECT
444
+ REGEXP_EXTRACT(query, r'FROM\s+`([^`]+)`') AS table_referenced,
445
+ COUNT(*) AS query_count,
446
+ SUM(total_bytes_processed) / POW(1024, 3) AS total_gb_scanned,
447
+ AVG(total_bytes_processed) / POW(1024, 3) AS avg_gb_per_query,
448
+ -- Tables with high scan per query = optimization candidates
449
+ CASE
450
+ WHEN AVG(total_bytes_processed) / POW(1024, 3) > 10 THEN 'OPTIMIZE: Add partition/cluster'
451
+ WHEN COUNT(*) > 100 AND AVG(total_bytes_processed) / POW(1024, 3) > 1 THEN 'CONSIDER: Materialized view'
452
+ ELSE 'OK'
453
+ END AS recommendation
454
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
455
+ WHERE creation_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
456
+ AND job_type = 'QUERY'
457
+ AND state = 'DONE'
458
+ GROUP BY 1
459
+ HAVING table_referenced IS NOT NULL
460
+ ORDER BY total_gb_scanned DESC
461
+ LIMIT 30;
462
+ ```
463
+
464
+ ### Automated Cost Alert Query
465
+
466
+ ```python
467
+ # scripts/bq_cost_monitor.py
468
+ """Run daily to check BigQuery spending and alert if over budget."""
469
+
470
+ from google.cloud import bigquery, monitoring_v3
471
+ from datetime import datetime, timedelta
472
+
473
+ DAILY_BUDGET_USD = 50.0 # Alert threshold
474
+
475
+ def check_daily_cost():
476
+ client = bigquery.Client()
477
+
478
+ query = """
479
+ SELECT
480
+ ROUND(SUM(total_bytes_billed) / POW(1024, 4) * 6.25, 2) AS today_cost_usd
481
+ FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
482
+ WHERE creation_time >= TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY)
483
+ AND job_type = 'QUERY'
484
+ """
485
+
486
+ result = list(client.query(query).result())[0]
487
+ today_cost = result.today_cost_usd or 0
488
+
489
+ if today_cost > DAILY_BUDGET_USD:
490
+ send_alert(
491
+ f"BigQuery daily cost alert: ${today_cost:.2f} "
492
+ f"(budget: ${DAILY_BUDGET_USD:.2f})"
493
+ )
494
+
495
+ # Also publish as custom metric for dashboards
496
+ publish_cost_metric(today_cost)
497
+
498
+ def send_alert(message):
499
+ """Send Slack alert via webhook."""
500
+ import requests
501
+ requests.post(
502
+ "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
503
+ json={"text": f":warning: {message}"}
504
+ )
505
+ ```
506
+
507
+ ## Best Practices Summary
508
+
509
+ 1. **Monitor INFORMATION_SCHEMA weekly** - Catch cost spikes and inefficient queries early
510
+ 2. **Reserve BI Engine for dashboard tables only** - Do not waste on ad-hoc queries
511
+ 3. **Set maximum_bytes_billed on all service accounts** - Prevent runaway queries
512
+ 4. **Use slot analysis before committing to flat-rate** - Ensure breakeven is favorable
513
+ 5. **Cluster gold tables by dashboard filter columns** - Matches Looker/Analytics query patterns
514
+ 6. **Scheduled query materialization** - Cheaper than users scanning large tables repeatedly
515
+ 7. **Audit storage quarterly** - Drop unused tables, reduce time travel on non-critical data
516
+ 8. **Alert on anomalies** - Cost spikes usually indicate a broken query or missing partition filter
517
+
518
+ > **WARNING**: INFORMATION_SCHEMA queries themselves consume slots. Avoid running expensive monitoring queries more than once per hour. Cache results in a monitoring table for dashboard use.