specweave 1.0.239 → 1.0.241
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +31 -30
- package/README.md +1 -1
- package/bin/specweave.js +16 -0
- package/dist/plugins/specweave-ado/lib/ado-permission-gate.d.ts.map +1 -1
- package/dist/plugins/specweave-ado/lib/ado-permission-gate.js +17 -2
- package/dist/plugins/specweave-ado/lib/ado-permission-gate.js.map +1 -1
- package/dist/plugins/specweave-github/lib/github-feature-sync.d.ts +7 -0
- package/dist/plugins/specweave-github/lib/github-feature-sync.d.ts.map +1 -1
- package/dist/plugins/specweave-github/lib/github-feature-sync.js +53 -0
- package/dist/plugins/specweave-github/lib/github-feature-sync.js.map +1 -1
- package/dist/plugins/specweave-jira/lib/jira-permission-gate.d.ts.map +1 -1
- package/dist/plugins/specweave-jira/lib/jira-permission-gate.js +17 -2
- package/dist/plugins/specweave-jira/lib/jira-permission-gate.js.map +1 -1
- package/dist/plugins/specweave-testing/lib/playwright-cli-detector.d.ts +1 -0
- package/dist/plugins/specweave-testing/lib/playwright-cli-detector.d.ts.map +1 -1
- package/dist/plugins/specweave-testing/lib/playwright-cli-detector.js +7 -3
- package/dist/plugins/specweave-testing/lib/playwright-cli-detector.js.map +1 -1
- package/dist/plugins/specweave-testing/lib/playwright-cli-runner.d.ts.map +1 -1
- package/dist/plugins/specweave-testing/lib/playwright-cli-runner.js +27 -19
- package/dist/plugins/specweave-testing/lib/playwright-cli-runner.js.map +1 -1
- package/dist/plugins/specweave-testing/lib/playwright-routing.d.ts +8 -0
- package/dist/plugins/specweave-testing/lib/playwright-routing.d.ts.map +1 -1
- package/dist/plugins/specweave-testing/lib/playwright-routing.js +10 -7
- package/dist/plugins/specweave-testing/lib/playwright-routing.js.map +1 -1
- package/dist/src/adapters/agents-md-generator.js +1 -1
- package/dist/src/adapters/agents-md-generator.js.map +1 -1
- package/dist/src/adapters/claude/README.md +1 -1
- package/dist/src/adapters/claude-md-generator.js +1 -1
- package/dist/src/adapters/claude-md-generator.js.map +1 -1
- package/dist/src/cli/commands/init.d.ts.map +1 -1
- package/dist/src/cli/commands/init.js +10 -1
- package/dist/src/cli/commands/init.js.map +1 -1
- package/dist/src/cli/commands/refresh-marketplace.d.ts.map +1 -1
- package/dist/src/cli/commands/refresh-marketplace.js +7 -67
- package/dist/src/cli/commands/refresh-marketplace.js.map +1 -1
- package/dist/src/cli/commands/team.d.ts +20 -0
- package/dist/src/cli/commands/team.d.ts.map +1 -0
- package/dist/src/cli/commands/team.js +101 -0
- package/dist/src/cli/commands/team.js.map +1 -0
- package/dist/src/cli/helpers/init/claude-settings-env.d.ts +16 -0
- package/dist/src/cli/helpers/init/claude-settings-env.d.ts.map +1 -0
- package/dist/src/cli/helpers/init/claude-settings-env.js +44 -0
- package/dist/src/cli/helpers/init/claude-settings-env.js.map +1 -0
- package/dist/src/cli/helpers/init/plugin-installer.d.ts.map +1 -1
- package/dist/src/cli/helpers/init/plugin-installer.js +9 -13
- package/dist/src/cli/helpers/init/plugin-installer.js.map +1 -1
- package/dist/src/cli/helpers/issue-tracker/index.d.ts.map +1 -1
- package/dist/src/cli/helpers/issue-tracker/index.js +12 -6
- package/dist/src/cli/helpers/issue-tracker/index.js.map +1 -1
- package/dist/src/cli/helpers/issue-tracker/types.d.ts +2 -0
- package/dist/src/cli/helpers/issue-tracker/types.d.ts.map +1 -1
- package/dist/src/cli/helpers/issue-tracker/types.js.map +1 -1
- package/dist/src/core/increment/discipline-checker.js +1 -1
- package/dist/src/core/increment/discipline-checker.js.map +1 -1
- package/dist/src/core/increment/status-commands.d.ts.map +1 -1
- package/dist/src/core/increment/status-commands.js +7 -0
- package/dist/src/core/increment/status-commands.js.map +1 -1
- package/dist/src/core/lazy-loading/llm-plugin-detector.d.ts +2 -2
- package/dist/src/core/lazy-loading/llm-plugin-detector.d.ts.map +1 -1
- package/dist/src/core/lazy-loading/llm-plugin-detector.js +63 -25
- package/dist/src/core/lazy-loading/llm-plugin-detector.js.map +1 -1
- package/dist/src/core/reflection/reflect-handler.js +2 -2
- package/dist/src/core/reflection/reflect-handler.js.map +1 -1
- package/dist/src/core/session/handoff-context.js +2 -2
- package/dist/src/core/session/handoff-context.js.map +1 -1
- package/dist/src/sync/ado-reconciler.d.ts.map +1 -1
- package/dist/src/sync/ado-reconciler.js +21 -2
- package/dist/src/sync/ado-reconciler.js.map +1 -1
- package/dist/src/sync/engine.d.ts.map +1 -1
- package/dist/src/sync/engine.js +2 -0
- package/dist/src/sync/engine.js.map +1 -1
- package/dist/src/sync/github-reconciler.d.ts.map +1 -1
- package/dist/src/sync/github-reconciler.js +52 -26
- package/dist/src/sync/github-reconciler.js.map +1 -1
- package/dist/src/sync/jira-reconciler.d.ts.map +1 -1
- package/dist/src/sync/jira-reconciler.js +16 -3
- package/dist/src/sync/jira-reconciler.js.map +1 -1
- package/dist/src/sync/providers/ado.d.ts.map +1 -1
- package/dist/src/sync/providers/ado.js +4 -2
- package/dist/src/sync/providers/ado.js.map +1 -1
- package/dist/src/sync/providers/github.d.ts.map +1 -1
- package/dist/src/sync/providers/github.js +11 -0
- package/dist/src/sync/providers/github.js.map +1 -1
- package/dist/src/sync/providers/jira.d.ts.map +1 -1
- package/dist/src/sync/providers/jira.js +14 -2
- package/dist/src/sync/providers/jira.js.map +1 -1
- package/dist/src/sync/sync-coordinator.d.ts.map +1 -1
- package/dist/src/sync/sync-coordinator.js +31 -6
- package/dist/src/sync/sync-coordinator.js.map +1 -1
- package/dist/src/utils/auto-install.js +4 -4
- package/dist/src/utils/auto-install.js.map +1 -1
- package/package.json +2 -2
- package/plugins/FINAL-AUDIT-RECOMMENDATIONS.md +3 -3
- package/plugins/SKILLS-VS-AGENTS.md +1 -1
- package/plugins/specweave/PLUGIN.md +0 -2
- package/plugins/specweave/commands/export-skills.md +1 -1
- package/plugins/specweave/commands/role-orchestrator.md +1 -1
- package/plugins/specweave/hooks/log-decision.sh +6 -0
- package/plugins/specweave/hooks/stop-auto-v5.sh +17 -1
- package/plugins/specweave/hooks/stop-reflect.sh +16 -2
- package/plugins/specweave/hooks/stop-sync.sh +17 -9
- package/plugins/specweave/hooks/user-prompt-submit.sh +119 -35
- package/plugins/specweave/lib/vendor/sync/github-reconciler.js +52 -26
- package/plugins/specweave/lib/vendor/sync/github-reconciler.js.map +1 -1
- package/plugins/specweave/scripts/read-grill-context.sh +149 -0
- package/plugins/specweave/skills/code-review/SKILL.md +608 -0
- package/plugins/specweave/skills/done/SKILL.md +1 -1
- package/plugins/specweave/skills/grill/SKILL.md +91 -0
- package/plugins/specweave/skills/performance/SKILL.md +6 -0
- package/plugins/specweave/skills/security/SKILL.md +7 -0
- package/plugins/specweave/skills/security-patterns/SKILL.md +6 -0
- package/plugins/specweave/skills/tdd-orchestrator/SKILL.md +1 -1
- package/plugins/specweave/skills/team-build/SKILL.md +1 -1
- package/plugins/specweave/skills/team-orchestrate/SKILL.md +1 -1
- package/plugins/specweave/skills/tech-lead/SKILL.md +7 -0
- package/plugins/specweave-ado/lib/ado-permission-gate.js +18 -2
- package/plugins/specweave-ado/lib/ado-permission-gate.ts +19 -2
- package/plugins/specweave-frontend/skills/frontend/SKILL.md +138 -2
- package/plugins/specweave-frontend/skills/i18n-expert/SKILL.md +989 -0
- package/plugins/specweave-github/hooks/github-auto-create-handler.sh +23 -1
- package/plugins/specweave-github/lib/github-feature-sync.js +41 -0
- package/plugins/specweave-github/lib/github-feature-sync.ts +62 -0
- package/plugins/specweave-infrastructure/PLUGIN.md +2 -1
- package/plugins/specweave-infrastructure/skills/gcp-deep-dive/SKILL.md +1172 -0
- package/plugins/specweave-infrastructure/skills/observability/SKILL.md +6 -0
- package/plugins/specweave-infrastructure/skills/opentelemetry/SKILL.md +6 -0
- package/plugins/specweave-jira/lib/jira-permission-gate.js +18 -2
- package/plugins/specweave-jira/lib/jira-permission-gate.ts +19 -2
- package/plugins/specweave-mobile/PLUGIN.md +1 -2
- package/plugins/specweave-mobile/README.md +13 -12
- package/plugins/specweave-mobile/skills/capacitor-ionic/SKILL.md +4 -18
- package/plugins/specweave-mobile/skills/deep-linking-push/SKILL.md +4 -22
- package/plugins/specweave-mobile/skills/expo/SKILL.md +4 -24
- package/plugins/specweave-mobile/skills/mobile-testing/SKILL.md +4 -22
- package/plugins/specweave-mobile/skills/react-native-expert/SKILL.md +404 -47
- package/plugins/specweave-testing/PLUGIN.md +3 -11
- package/plugins/specweave-testing/lib/playwright-cli-detector.js +8 -3
- package/plugins/specweave-testing/lib/playwright-cli-detector.ts +8 -3
- package/plugins/specweave-testing/lib/playwright-cli-runner.js +25 -20
- package/plugins/specweave-testing/lib/playwright-cli-runner.ts +24 -19
- package/plugins/specweave-testing/lib/playwright-routing.js +1 -6
- package/plugins/specweave-testing/lib/playwright-routing.ts +11 -8
- package/plugins/specweave-testing/skills/accessibility-testing/SKILL.md +998 -0
- package/plugins/specweave-testing/skills/e2e-testing/SKILL.md +29 -28
- package/plugins/specweave-testing/skills/mutation-testing/SKILL.md +769 -0
- package/plugins/specweave-testing/skills/performance-testing/SKILL.md +961 -0
- package/plugins/specweave-testing/skills/qa-engineer/SKILL.md +2 -0
- package/plugins/specweave/.specweave/logs/decisions.jsonl +0 -12
- package/plugins/specweave/.specweave/logs/reflect/reflect.log +0 -8
- package/plugins/specweave/.specweave/logs/stop-auto.log +0 -6
- package/plugins/specweave/.specweave/logs/stop-sync.log +0 -10
- package/plugins/specweave/.specweave/state/dashboard.json +0 -43
- package/plugins/specweave/skills/infrastructure/SKILL.md +0 -86
- package/plugins/specweave/skills/qa-lead/SKILL.md +0 -77
- package/plugins/specweave-mobile/skills/mobile-architect/SKILL.md +0 -30
- package/plugins/specweave-testing/commands/e2e-setup.md +0 -1103
- package/plugins/specweave-testing/commands/test-coverage.md +0 -983
- package/plugins/specweave-testing/commands/test-generate.md +0 -1160
- package/plugins/specweave-testing/commands/test-init.md +0 -413
- package/plugins/specweave-testing/commands/ui-automate.md +0 -182
- package/plugins/specweave-testing/commands/ui-inspect.md +0 -82
|
@@ -0,0 +1,1172 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: GCP deep dive expert for GKE, Cloud Run, Cloud Functions, networking, IAM, and data services. Designs production-grade architectures using Terraform, gcloud CLI, and Google Cloud best practices. Generates infrastructure ONE COMPONENT AT A TIME.
|
|
3
|
+
allowed-tools: Read, Write, Edit, Bash
|
|
4
|
+
model: opus
|
|
5
|
+
context: fork
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# GCP Deep Dive Expert
|
|
9
|
+
|
|
10
|
+
## Purpose
|
|
11
|
+
|
|
12
|
+
Design and implement production-grade Google Cloud infrastructure with deep expertise in GKE, Cloud Run, networking, IAM, and data services. Produces well-structured Terraform configurations and gcloud workflows following the Google Cloud Architecture Framework.
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- Designing GKE clusters with Workload Identity
|
|
17
|
+
- Building serverless architectures with Cloud Run or Cloud Functions
|
|
18
|
+
- VPC design, Cloud Load Balancing, and Cloud CDN
|
|
19
|
+
- IAM policy engineering and organization policies
|
|
20
|
+
- Cloud SQL, Cloud Spanner, Firestore, BigQuery architectures
|
|
21
|
+
- Terraform GCP provider patterns
|
|
22
|
+
- Cloud Build CI/CD pipelines
|
|
23
|
+
- Cloud Monitoring, Logging, and Trace setup
|
|
24
|
+
|
|
25
|
+
## Triggers
|
|
26
|
+
|
|
27
|
+
GCP, Google Cloud, GKE, Cloud Run, Cloud Functions, BigQuery, Cloud SQL, Firestore, Cloud Spanner, Cloud Build, Artifact Registry, Cloud Armor, Cloud CDN
|
|
28
|
+
|
|
29
|
+
## Generation Rules
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
CHUNKING RULE: Generate ONE COMPONENT AT A TIME.
|
|
33
|
+
Do NOT produce a complete infrastructure in one response.
|
|
34
|
+
|
|
35
|
+
Order:
|
|
36
|
+
1. Networking (VPC, subnets, firewall rules)
|
|
37
|
+
2. IAM (service accounts, Workload Identity)
|
|
38
|
+
3. Compute (GKE, Cloud Run, Cloud Functions)
|
|
39
|
+
4. Data (Cloud SQL, Spanner, Firestore, BigQuery)
|
|
40
|
+
5. Load Balancing & CDN
|
|
41
|
+
6. CI/CD (Cloud Build, Artifact Registry, Cloud Deploy)
|
|
42
|
+
7. Observability (Monitoring, Logging, Trace)
|
|
43
|
+
|
|
44
|
+
Each chunk: explain decisions, show Terraform + gcloud equivalent, note costs.
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## GKE (Google Kubernetes Engine)
|
|
48
|
+
|
|
49
|
+
### Cluster with Terraform
|
|
50
|
+
|
|
51
|
+
```hcl
|
|
52
|
+
# modules/gke/main.tf
|
|
53
|
+
resource "google_container_cluster" "primary" {
|
|
54
|
+
name = "gke-${var.project_name}-${var.environment}"
|
|
55
|
+
location = var.region
|
|
56
|
+
|
|
57
|
+
remove_default_node_pool = true
|
|
58
|
+
initial_node_count = 1
|
|
59
|
+
|
|
60
|
+
network = var.vpc_id
|
|
61
|
+
subnetwork = var.subnet_id
|
|
62
|
+
|
|
63
|
+
ip_allocation_policy {
|
|
64
|
+
cluster_secondary_range_name = "pods"
|
|
65
|
+
services_secondary_range_name = "services"
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
private_cluster_config {
|
|
69
|
+
enable_private_nodes = true
|
|
70
|
+
enable_private_endpoint = var.environment == "production"
|
|
71
|
+
master_ipv4_cidr_block = "172.16.0.0/28"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
master_authorized_networks_config {
|
|
75
|
+
dynamic "cidr_blocks" {
|
|
76
|
+
for_each = var.authorized_networks
|
|
77
|
+
content {
|
|
78
|
+
cidr_block = cidr_blocks.value.cidr
|
|
79
|
+
display_name = cidr_blocks.value.name
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
workload_identity_config {
|
|
85
|
+
workload_pool = "${var.project_id}.svc.id.goog"
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
binary_authorization {
|
|
89
|
+
evaluation_mode = var.environment == "production" ? "PROJECT_SINGLETON_POLICY_ENFORCE" : "DISABLED"
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
maintenance_policy {
|
|
93
|
+
recurring_window {
|
|
94
|
+
start_time = "2024-01-01T02:00:00Z"
|
|
95
|
+
end_time = "2024-01-01T06:00:00Z"
|
|
96
|
+
recurrence = "FREQ=WEEKLY;BYDAY=TU,WE,TH"
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
release_channel {
|
|
101
|
+
channel = var.environment == "production" ? "REGULAR" : "RAPID"
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
logging_config {
|
|
105
|
+
enable_components = ["SYSTEM_COMPONENTS", "WORKLOADS"]
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
monitoring_config {
|
|
109
|
+
enable_components = ["SYSTEM_COMPONENTS", "DAEMONSET", "DEPLOYMENT", "STATEFULSET"]
|
|
110
|
+
managed_prometheus { enabled = true }
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
addons_config {
|
|
114
|
+
gce_persistent_disk_csi_driver_config { enabled = true }
|
|
115
|
+
gcs_fuse_csi_driver_config { enabled = true }
|
|
116
|
+
dns_cache_config { enabled = true }
|
|
117
|
+
http_load_balancing { disabled = false }
|
|
118
|
+
horizontal_pod_autoscaling { disabled = false }
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
cluster_autoscaling {
|
|
122
|
+
autoscaling_profile = "OPTIMIZE_UTILIZATION"
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
# General-purpose node pool
|
|
127
|
+
resource "google_container_node_pool" "general" {
|
|
128
|
+
name = "general"
|
|
129
|
+
cluster = google_container_cluster.primary.id
|
|
130
|
+
location = var.region
|
|
131
|
+
|
|
132
|
+
initial_node_count = var.environment == "production" ? 3 : 1
|
|
133
|
+
autoscaling {
|
|
134
|
+
min_node_count = var.environment == "production" ? 3 : 1
|
|
135
|
+
max_node_count = 20
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
node_config {
|
|
139
|
+
machine_type = "e2-standard-4"
|
|
140
|
+
disk_size_gb = 100
|
|
141
|
+
disk_type = "pd-ssd"
|
|
142
|
+
oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
|
|
143
|
+
workload_metadata_config { mode = "GKE_METADATA" }
|
|
144
|
+
shielded_instance_config {
|
|
145
|
+
enable_secure_boot = true
|
|
146
|
+
enable_integrity_monitoring = true
|
|
147
|
+
}
|
|
148
|
+
labels = { workload-type = "general", environment = var.environment }
|
|
149
|
+
metadata = { disable-legacy-endpoints = "true" }
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
management { auto_repair = true; auto_upgrade = true }
|
|
153
|
+
upgrade_settings { max_surge = 1; max_unavailable = 0; strategy = "SURGE" }
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Spot node pool (cost optimization)
|
|
157
|
+
resource "google_container_node_pool" "spot" {
|
|
158
|
+
name = "spot"
|
|
159
|
+
cluster = google_container_cluster.primary.id
|
|
160
|
+
location = var.region
|
|
161
|
+
|
|
162
|
+
initial_node_count = 0
|
|
163
|
+
autoscaling { min_node_count = 0; max_node_count = 30 }
|
|
164
|
+
|
|
165
|
+
node_config {
|
|
166
|
+
machine_type = "e2-standard-4"
|
|
167
|
+
spot = true
|
|
168
|
+
disk_size_gb = 100
|
|
169
|
+
oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
|
|
170
|
+
workload_metadata_config { mode = "GKE_METADATA" }
|
|
171
|
+
labels = { workload-type = "spot" }
|
|
172
|
+
taint {
|
|
173
|
+
key = "cloud.google.com/gke-spot"; value = "true"; effect = "NO_SCHEDULE"
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
management { auto_repair = true; auto_upgrade = true }
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### GKE with gcloud CLI
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
# Autopilot cluster (recommended for most workloads)
|
|
184
|
+
gcloud container clusters create-auto "gke-myapp-production" \
|
|
185
|
+
--region="us-central1" --project="my-project-id" \
|
|
186
|
+
--network="vpc-myapp" --subnetwork="subnet-gke" \
|
|
187
|
+
--cluster-secondary-range-name="pods" \
|
|
188
|
+
--services-secondary-range-name="services" \
|
|
189
|
+
--enable-private-nodes --master-ipv4-cidr="172.16.0.0/28" \
|
|
190
|
+
--release-channel="regular" \
|
|
191
|
+
--enable-master-authorized-networks --master-authorized-networks="10.0.0.0/8"
|
|
192
|
+
|
|
193
|
+
# Standard cluster (full node control)
|
|
194
|
+
gcloud container clusters create "gke-myapp-production" \
|
|
195
|
+
--region="us-central1" --num-nodes=3 --machine-type="e2-standard-4" \
|
|
196
|
+
--enable-autoscaling --min-nodes=3 --max-nodes=20 \
|
|
197
|
+
--enable-ip-alias --network="vpc-myapp" --subnetwork="subnet-gke" \
|
|
198
|
+
--workload-pool="my-project-id.svc.id.goog" \
|
|
199
|
+
--enable-shielded-nodes --enable-private-nodes \
|
|
200
|
+
--master-ipv4-cidr="172.16.0.0/28" --release-channel="regular"
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### Workload Identity Federation (GKE)
|
|
204
|
+
|
|
205
|
+
```hcl
|
|
206
|
+
resource "google_service_account" "app_sa" {
|
|
207
|
+
account_id = "myapp-workload"
|
|
208
|
+
display_name = "MyApp Workload Service Account"
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
resource "google_service_account_iam_binding" "workload_identity" {
|
|
212
|
+
service_account_id = google_service_account.app_sa.name
|
|
213
|
+
role = "roles/iam.workloadIdentityUser"
|
|
214
|
+
members = [
|
|
215
|
+
"serviceAccount:${var.project_id}.svc.id.goog[production/myapp-sa]"
|
|
216
|
+
]
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
resource "google_project_iam_member" "app_storage" {
|
|
220
|
+
project = var.project_id
|
|
221
|
+
role = "roles/storage.objectViewer"
|
|
222
|
+
member = "serviceAccount:${google_service_account.app_sa.email}"
|
|
223
|
+
}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
```yaml
|
|
227
|
+
# Kubernetes ServiceAccount linked to GCP SA
|
|
228
|
+
apiVersion: v1
|
|
229
|
+
kind: ServiceAccount
|
|
230
|
+
metadata:
|
|
231
|
+
name: myapp-sa
|
|
232
|
+
namespace: production
|
|
233
|
+
annotations:
|
|
234
|
+
iam.gke.io/gcp-service-account: myapp-workload@my-project-id.iam.gserviceaccount.com
|
|
235
|
+
---
|
|
236
|
+
apiVersion: apps/v1
|
|
237
|
+
kind: Deployment
|
|
238
|
+
metadata:
|
|
239
|
+
name: myapp
|
|
240
|
+
namespace: production
|
|
241
|
+
spec:
|
|
242
|
+
template:
|
|
243
|
+
spec:
|
|
244
|
+
serviceAccountName: myapp-sa
|
|
245
|
+
nodeSelector:
|
|
246
|
+
iam.gke.io/gke-metadata-server-enabled: "true"
|
|
247
|
+
containers:
|
|
248
|
+
- name: myapp
|
|
249
|
+
image: us-central1-docker.pkg.dev/my-project-id/myapp/api:latest
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### GKE Standard vs Autopilot
|
|
253
|
+
|
|
254
|
+
```
|
|
255
|
+
┌──────────────────────┬──────────────────────┬──────────────────────┐
|
|
256
|
+
│ Criteria │ GKE Standard │ GKE Autopilot │
|
|
257
|
+
├──────────────────────┼──────────────────────┼──────────────────────┤
|
|
258
|
+
│ Node management │ You manage │ Google manages │
|
|
259
|
+
│ Pricing │ Pay for nodes │ Pay for pods │
|
|
260
|
+
│ GPU / TPU │ Full support │ Supported │
|
|
261
|
+
│ DaemonSets │ Full control │ Restricted │
|
|
262
|
+
│ Privileged pods │ Allowed │ Not allowed │
|
|
263
|
+
│ Security posture │ You harden │ Pre-hardened │
|
|
264
|
+
│ Scale to zero │ Manual │ Built-in │
|
|
265
|
+
│ Best for │ Complex workloads │ Most workloads │
|
|
266
|
+
└──────────────────────┴──────────────────────┴──────────────────────┘
|
|
267
|
+
|
|
268
|
+
RULE: Default → Autopilot. Use Standard for DaemonSets, privileged pods,
|
|
269
|
+
custom node images, GPU/ML with specific node configs.
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## Cloud Run
|
|
273
|
+
|
|
274
|
+
### Service Deployment with Terraform
|
|
275
|
+
|
|
276
|
+
```hcl
|
|
277
|
+
resource "google_cloud_run_v2_service" "api" {
|
|
278
|
+
name = "api-${var.environment}"
|
|
279
|
+
location = var.region
|
|
280
|
+
ingress = "INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER"
|
|
281
|
+
|
|
282
|
+
template {
|
|
283
|
+
service_account = google_service_account.run_sa.email
|
|
284
|
+
scaling {
|
|
285
|
+
min_instance_count = var.environment == "production" ? 2 : 0
|
|
286
|
+
max_instance_count = 100
|
|
287
|
+
}
|
|
288
|
+
containers {
|
|
289
|
+
image = "${var.region}-docker.pkg.dev/${var.project_id}/${var.repo}/api:${var.image_tag}"
|
|
290
|
+
ports { container_port = 8080 }
|
|
291
|
+
resources {
|
|
292
|
+
limits = { cpu = "2", memory = "1Gi" }
|
|
293
|
+
cpu_idle = var.environment != "production"
|
|
294
|
+
startup_cpu_boost = true
|
|
295
|
+
}
|
|
296
|
+
env {
|
|
297
|
+
name = "DB_HOST"
|
|
298
|
+
value = "/cloudsql/${google_sql_database_instance.main.connection_name}"
|
|
299
|
+
}
|
|
300
|
+
env {
|
|
301
|
+
name = "DB_PASSWORD"
|
|
302
|
+
value_source {
|
|
303
|
+
secret_key_ref {
|
|
304
|
+
secret = google_secret_manager_secret.db_password.secret_id
|
|
305
|
+
version = "latest"
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
startup_probe {
|
|
310
|
+
http_get { path = "/healthz" }
|
|
311
|
+
initial_delay_seconds = 5
|
|
312
|
+
period_seconds = 10
|
|
313
|
+
failure_threshold = 3
|
|
314
|
+
}
|
|
315
|
+
liveness_probe {
|
|
316
|
+
http_get { path = "/healthz" }
|
|
317
|
+
period_seconds = 30
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
vpc_access {
|
|
321
|
+
network_interfaces { network = var.vpc_id; subnetwork = var.subnet_id }
|
|
322
|
+
egress = "PRIVATE_RANGES_ONLY"
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
traffic {
|
|
327
|
+
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
|
|
328
|
+
percent = 100
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
resource "google_cloud_run_v2_service_iam_member" "invoker" {
|
|
333
|
+
name = google_cloud_run_v2_service.api.name
|
|
334
|
+
location = var.region
|
|
335
|
+
role = "roles/run.invoker"
|
|
336
|
+
member = var.public_api ? "allUsers" : "serviceAccount:${var.invoker_sa_email}"
|
|
337
|
+
}
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
### Cloud Run with gcloud
|
|
341
|
+
|
|
342
|
+
```bash
|
|
343
|
+
gcloud run deploy api-production \
|
|
344
|
+
--image="us-central1-docker.pkg.dev/my-project/myapp/api:v1.2.3" \
|
|
345
|
+
--region="us-central1" \
|
|
346
|
+
--service-account="api-runner@my-project.iam.gserviceaccount.com" \
|
|
347
|
+
--min-instances=2 --max-instances=100 --cpu=2 --memory=1Gi \
|
|
348
|
+
--set-env-vars="ENV=production" --set-secrets="DB_PASS=db-password:latest" \
|
|
349
|
+
--vpc-connector="projects/my-project/locations/us-central1/connectors/run-vpc" \
|
|
350
|
+
--vpc-egress=private-ranges-only \
|
|
351
|
+
--ingress=internal-and-cloud-load-balancing --no-allow-unauthenticated
|
|
352
|
+
|
|
353
|
+
# Traffic splitting (canary)
|
|
354
|
+
gcloud run services update-traffic api-production --region="us-central1" \
|
|
355
|
+
--to-revisions="api-production-v2=10,api-production-v1=90"
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
## Cloud Functions (2nd Gen)
|
|
359
|
+
|
|
360
|
+
```hcl
|
|
361
|
+
resource "google_cloudfunctions2_function" "processor" {
|
|
362
|
+
name = "order-processor-${var.environment}"
|
|
363
|
+
location = var.region
|
|
364
|
+
|
|
365
|
+
build_config {
|
|
366
|
+
runtime = "nodejs22"
|
|
367
|
+
entry_point = "processOrder"
|
|
368
|
+
source {
|
|
369
|
+
storage_source {
|
|
370
|
+
bucket = google_storage_bucket.functions_source.name
|
|
371
|
+
object = google_storage_bucket_object.function_zip.name
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
service_config {
|
|
377
|
+
min_instance_count = 0
|
|
378
|
+
max_instance_count = 100
|
|
379
|
+
available_memory = "512Mi"
|
|
380
|
+
timeout_seconds = 60
|
|
381
|
+
service_account_email = google_service_account.fn_sa.email
|
|
382
|
+
ingress_settings = "ALLOW_INTERNAL_ONLY"
|
|
383
|
+
|
|
384
|
+
environment_variables = { PROJECT_ID = var.project_id }
|
|
385
|
+
|
|
386
|
+
secret_environment_variables {
|
|
387
|
+
key = "API_KEY"
|
|
388
|
+
secret = google_secret_manager_secret.api_key.secret_id
|
|
389
|
+
version = "latest"
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
event_trigger {
|
|
394
|
+
trigger_region = var.region
|
|
395
|
+
event_type = "google.cloud.pubsub.topic.v1.messagePublished"
|
|
396
|
+
pubsub_topic = google_pubsub_topic.orders.id
|
|
397
|
+
retry_policy = "RETRY_POLICY_RETRY"
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
## Networking
|
|
403
|
+
|
|
404
|
+
### VPC Design with Terraform
|
|
405
|
+
|
|
406
|
+
```hcl
|
|
407
|
+
resource "google_compute_network" "vpc" {
|
|
408
|
+
name = "vpc-${var.project_name}-${var.environment}"
|
|
409
|
+
auto_create_subnetworks = false
|
|
410
|
+
routing_mode = "GLOBAL"
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
# Primary subnet with secondary ranges for GKE
|
|
414
|
+
resource "google_compute_subnetwork" "gke" {
|
|
415
|
+
name = "subnet-gke-${var.environment}"
|
|
416
|
+
ip_cidr_range = "10.0.0.0/20"
|
|
417
|
+
region = var.region
|
|
418
|
+
network = google_compute_network.vpc.id
|
|
419
|
+
|
|
420
|
+
secondary_ip_range { range_name = "pods"; ip_cidr_range = "10.4.0.0/14" }
|
|
421
|
+
secondary_ip_range { range_name = "services"; ip_cidr_range = "10.8.0.0/20" }
|
|
422
|
+
|
|
423
|
+
private_ip_google_access = true
|
|
424
|
+
log_config {
|
|
425
|
+
aggregation_interval = "INTERVAL_5_SEC"
|
|
426
|
+
flow_sampling = 0.5
|
|
427
|
+
metadata = "INCLUDE_ALL_METADATA"
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
# Serverless and data subnets
|
|
432
|
+
resource "google_compute_subnetwork" "serverless" {
|
|
433
|
+
name = "subnet-serverless-${var.environment}"
|
|
434
|
+
ip_cidr_range = "10.1.0.0/24"; region = var.region
|
|
435
|
+
network = google_compute_network.vpc.id
|
|
436
|
+
private_ip_google_access = true
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
resource "google_compute_subnetwork" "data" {
|
|
440
|
+
name = "subnet-data-${var.environment}"
|
|
441
|
+
ip_cidr_range = "10.2.0.0/24"; region = var.region
|
|
442
|
+
network = google_compute_network.vpc.id
|
|
443
|
+
private_ip_google_access = true
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
# Cloud NAT
|
|
447
|
+
resource "google_compute_router" "router" {
|
|
448
|
+
name = "router-${var.environment}"; region = var.region
|
|
449
|
+
network = google_compute_network.vpc.id
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
resource "google_compute_router_nat" "nat" {
|
|
453
|
+
name = "nat-${var.environment}"
|
|
454
|
+
router = google_compute_router.router.name
|
|
455
|
+
region = var.region
|
|
456
|
+
nat_ip_allocate_option = "AUTO_ONLY"
|
|
457
|
+
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
|
|
458
|
+
log_config { enable = true; filter = "ERRORS_ONLY" }
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
# Private Service Access (Cloud SQL, Memorystore, etc.)
|
|
462
|
+
resource "google_compute_global_address" "private_services" {
|
|
463
|
+
name = "private-services-${var.environment}"
|
|
464
|
+
purpose = "VPC_PEERING"; address_type = "INTERNAL"; prefix_length = 20
|
|
465
|
+
network = google_compute_network.vpc.id
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
resource "google_service_networking_connection" "private_vpc" {
|
|
469
|
+
network = google_compute_network.vpc.id
|
|
470
|
+
service = "servicenetworking.googleapis.com"
|
|
471
|
+
reserved_peering_ranges = [google_compute_global_address.private_services.name]
|
|
472
|
+
}
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
### Firewall Rules
|
|
476
|
+
|
|
477
|
+
```hcl
|
|
478
|
+
resource "google_compute_firewall" "allow_internal" {
|
|
479
|
+
name = "allow-internal-${var.environment}"
|
|
480
|
+
network = google_compute_network.vpc.id
|
|
481
|
+
allow { protocol = "tcp"; ports = ["0-65535"] }
|
|
482
|
+
allow { protocol = "udp"; ports = ["0-65535"] }
|
|
483
|
+
allow { protocol = "icmp" }
|
|
484
|
+
source_ranges = ["10.0.0.0/8"]
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
resource "google_compute_firewall" "allow_health_checks" {
|
|
488
|
+
name = "allow-health-checks-${var.environment}"
|
|
489
|
+
network = google_compute_network.vpc.id
|
|
490
|
+
allow { protocol = "tcp"; ports = ["80", "443", "8080"] }
|
|
491
|
+
source_ranges = ["35.191.0.0/16", "130.211.0.0/22"] # GCP health check ranges
|
|
492
|
+
target_tags = ["allow-health-check"]
|
|
493
|
+
}
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
### Cloud Load Balancing (External HTTPS with CDN)
|
|
497
|
+
|
|
498
|
+
```hcl
|
|
499
|
+
resource "google_compute_global_address" "lb_ip" { name = "lb-ip-${var.environment}" }
|
|
500
|
+
|
|
501
|
+
resource "google_compute_managed_ssl_certificate" "cert" {
|
|
502
|
+
name = "cert-${var.environment}"
|
|
503
|
+
managed { domains = [var.domain] }
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
resource "google_compute_backend_service" "api" {
|
|
507
|
+
name = "backend-api-${var.environment}"
|
|
508
|
+
protocol = "HTTPS"
|
|
509
|
+
timeout_sec = 30
|
|
510
|
+
load_balancing_scheme = "EXTERNAL_MANAGED"
|
|
511
|
+
health_checks = [google_compute_health_check.api.id]
|
|
512
|
+
backend { group = google_compute_region_network_endpoint_group.run_neg.id }
|
|
513
|
+
|
|
514
|
+
enable_cdn = true
|
|
515
|
+
cdn_policy {
|
|
516
|
+
cache_mode = "CACHE_ALL_STATIC"
|
|
517
|
+
default_ttl = 3600
|
|
518
|
+
max_ttl = 86400
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
security_policy = google_compute_security_policy.armor.id
|
|
522
|
+
log_config { enable = true; sample_rate = 1.0 }
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
resource "google_compute_url_map" "lb" {
|
|
526
|
+
name = "lb-${var.environment}"
|
|
527
|
+
default_service = google_compute_backend_service.api.id
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
resource "google_compute_target_https_proxy" "lb" {
|
|
531
|
+
name = "lb-proxy-${var.environment}"
|
|
532
|
+
url_map = google_compute_url_map.lb.id
|
|
533
|
+
ssl_certificates = [google_compute_managed_ssl_certificate.cert.id]
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
resource "google_compute_global_forwarding_rule" "lb" {
|
|
537
|
+
name = "lb-rule-${var.environment}"
|
|
538
|
+
target = google_compute_target_https_proxy.lb.id
|
|
539
|
+
port_range = "443"
|
|
540
|
+
ip_address = google_compute_global_address.lb_ip.id
|
|
541
|
+
load_balancing_scheme = "EXTERNAL_MANAGED"
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
# Serverless NEG for Cloud Run
|
|
545
|
+
resource "google_compute_region_network_endpoint_group" "run_neg" {
|
|
546
|
+
name = "neg-run-${var.environment}"
|
|
547
|
+
region = var.region
|
|
548
|
+
network_endpoint_type = "SERVERLESS"
|
|
549
|
+
cloud_run { service = google_cloud_run_v2_service.api.name }
|
|
550
|
+
}
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
### Cloud Armor (WAF / DDoS)
|
|
554
|
+
|
|
555
|
+
```hcl
|
|
556
|
+
resource "google_compute_security_policy" "armor" {
|
|
557
|
+
name = "armor-${var.environment}"
|
|
558
|
+
|
|
559
|
+
rule {
|
|
560
|
+
action = "allow"; priority = 2147483647
|
|
561
|
+
match { versioned_expr = "SRC_IPS_V1"; config { src_ip_ranges = ["*"] } }
|
|
562
|
+
description = "Default allow"
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
rule {
|
|
566
|
+
action = "deny(403)"; priority = 1000
|
|
567
|
+
match { versioned_expr = "SRC_IPS_V1"; config { src_ip_ranges = var.blocked_ip_ranges } }
|
|
568
|
+
description = "Block known malicious IPs"
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
rule {
|
|
572
|
+
action = "rate_based_ban"; priority = 2000
|
|
573
|
+
match { versioned_expr = "SRC_IPS_V1"; config { src_ip_ranges = ["*"] } }
|
|
574
|
+
rate_limit_options {
|
|
575
|
+
conform_action = "allow"; exceed_action = "deny(429)"
|
|
576
|
+
rate_limit_threshold { count = 100; interval_sec = 60 }
|
|
577
|
+
ban_duration_sec = 300
|
|
578
|
+
}
|
|
579
|
+
description = "Rate limit: 100 req/min"
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
# OWASP CRS preconfigured rules
|
|
583
|
+
rule {
|
|
584
|
+
action = "deny(403)"; priority = 3000
|
|
585
|
+
match { expr { expression = "evaluatePreconfiguredExpr('sqli-v33-stable')" } }
|
|
586
|
+
description = "SQL injection protection"
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
rule {
|
|
590
|
+
action = "deny(403)"; priority = 3100
|
|
591
|
+
match { expr { expression = "evaluatePreconfiguredExpr('xss-v33-stable')" } }
|
|
592
|
+
description = "XSS protection"
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
adaptive_protection_config {
|
|
596
|
+
layer_7_ddos_defense_config { enable = var.environment == "production" }
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
```
|
|
600
|
+
|
|
601
|
+
## Identity & Security
|
|
602
|
+
|
|
603
|
+
### IAM Best Practices
|
|
604
|
+
|
|
605
|
+
```hcl
|
|
606
|
+
# Principle: Grant roles to groups, not individuals.
|
|
607
|
+
# Principle: Use custom roles when predefined roles are too broad.
|
|
608
|
+
|
|
609
|
+
resource "google_project_iam_custom_role" "app_reader" {
|
|
610
|
+
role_id = "appDataReader"
|
|
611
|
+
title = "Application Data Reader"
|
|
612
|
+
permissions = [
|
|
613
|
+
"cloudsql.instances.connect", "cloudsql.instances.get",
|
|
614
|
+
"storage.objects.get", "storage.objects.list",
|
|
615
|
+
"secretmanager.versions.access",
|
|
616
|
+
]
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
# Conditional IAM binding (time-limited)
|
|
620
|
+
resource "google_project_iam_binding" "conditional" {
|
|
621
|
+
project = var.project_id
|
|
622
|
+
role = "roles/cloudsql.admin"
|
|
623
|
+
members = ["group:dba-team@company.com"]
|
|
624
|
+
condition {
|
|
625
|
+
title = "weekday_only"
|
|
626
|
+
expression = "request.time.getDayOfWeek('America/New_York') >= 1 && request.time.getDayOfWeek('America/New_York') <= 5"
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
```
|
|
630
|
+
|
|
631
|
+
### Service Accounts with Least Privilege
|
|
632
|
+
|
|
633
|
+
```hcl
|
|
634
|
+
# Dedicated SA per workload (never use default compute SA)
|
|
635
|
+
resource "google_service_account" "api_sa" {
|
|
636
|
+
account_id = "api-${var.environment}"
|
|
637
|
+
display_name = "API Service Account (${var.environment})"
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
resource "google_project_iam_member" "api_roles" {
|
|
641
|
+
for_each = toset([
|
|
642
|
+
"roles/cloudsql.client",
|
|
643
|
+
"roles/secretmanager.secretAccessor",
|
|
644
|
+
"roles/logging.logWriter",
|
|
645
|
+
"roles/monitoring.metricWriter",
|
|
646
|
+
"roles/cloudtrace.agent",
|
|
647
|
+
])
|
|
648
|
+
project = var.project_id
|
|
649
|
+
role = each.value
|
|
650
|
+
member = "serviceAccount:${google_service_account.api_sa.email}"
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
# Disable SA key creation (enforce Workload Identity)
|
|
654
|
+
resource "google_project_organization_policy" "disable_sa_keys" {
|
|
655
|
+
project = var.project_id
|
|
656
|
+
constraint = "iam.disableServiceAccountKeyCreation"
|
|
657
|
+
boolean_policy { enforced = true }
|
|
658
|
+
}
|
|
659
|
+
```
|
|
660
|
+
|
|
661
|
+
### Secret Manager
|
|
662
|
+
|
|
663
|
+
```hcl
|
|
664
|
+
resource "google_secret_manager_secret" "db_password" {
|
|
665
|
+
secret_id = "db-password-${var.environment}"
|
|
666
|
+
replication { auto {} }
|
|
667
|
+
labels = { environment = var.environment, managed_by = "terraform" }
|
|
668
|
+
rotation {
|
|
669
|
+
rotation_period = "7776000s" # 90 days
|
|
670
|
+
next_rotation_time = timeadd(timestamp(), "24h")
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
resource "google_secret_manager_secret_version" "db_password" {
|
|
675
|
+
secret = google_secret_manager_secret.db_password.id
|
|
676
|
+
secret_data = var.db_password
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
resource "google_secret_manager_secret_iam_member" "accessor" {
|
|
680
|
+
secret_id = google_secret_manager_secret.db_password.id
|
|
681
|
+
role = "roles/secretmanager.secretAccessor"
|
|
682
|
+
member = "serviceAccount:${google_service_account.api_sa.email}"
|
|
683
|
+
}
|
|
684
|
+
```
|
|
685
|
+
|
|
686
|
+
### Organization Policies
|
|
687
|
+
|
|
688
|
+
```hcl
|
|
689
|
+
resource "google_project_organization_policy" "no_external_ip" {
|
|
690
|
+
project = var.project_id
|
|
691
|
+
constraint = "compute.vmExternalIpAccess"
|
|
692
|
+
list_policy { deny { all = true } }
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
resource "google_project_organization_policy" "location_restriction" {
|
|
696
|
+
project = var.project_id
|
|
697
|
+
constraint = "gcp.resourceLocations"
|
|
698
|
+
list_policy { allow { values = ["in:us-locations"] } }
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
resource "google_project_organization_policy" "uniform_bucket" {
|
|
702
|
+
project = var.project_id
|
|
703
|
+
constraint = "storage.uniformBucketLevelAccess"
|
|
704
|
+
boolean_policy { enforced = true }
|
|
705
|
+
}
|
|
706
|
+
```
|
|
707
|
+
|
|
708
|
+
## Data Services
|
|
709
|
+
|
|
710
|
+
### Cloud SQL (PostgreSQL)
|
|
711
|
+
|
|
712
|
+
```hcl
|
|
713
|
+
resource "google_sql_database_instance" "main" {
|
|
714
|
+
name = "sql-${var.project_name}-${var.environment}"
|
|
715
|
+
database_version = "POSTGRES_16"
|
|
716
|
+
region = var.region
|
|
717
|
+
deletion_protection = var.environment == "production"
|
|
718
|
+
|
|
719
|
+
settings {
|
|
720
|
+
tier = var.environment == "production" ? "db-custom-4-16384" : "db-f1-micro"
|
|
721
|
+
availability_type = var.environment == "production" ? "REGIONAL" : "ZONAL"
|
|
722
|
+
disk_size = 100; disk_type = "PD_SSD"; disk_autoresize = true
|
|
723
|
+
|
|
724
|
+
backup_configuration {
|
|
725
|
+
enabled = true
|
|
726
|
+
point_in_time_recovery_enabled = true
|
|
727
|
+
start_time = "03:00"
|
|
728
|
+
transaction_log_retention_days = 7
|
|
729
|
+
backup_retention_settings {
|
|
730
|
+
retained_backups = var.environment == "production" ? 30 : 7
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
ip_configuration {
|
|
735
|
+
ipv4_enabled = false
|
|
736
|
+
private_network = var.vpc_id
|
|
737
|
+
enable_private_path_for_google_cloud_services = true
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
database_flags { name = "log_checkpoints"; value = "on" }
|
|
741
|
+
database_flags { name = "log_connections"; value = "on" }
|
|
742
|
+
database_flags { name = "log_min_duration_statement"; value = "1000" }
|
|
743
|
+
|
|
744
|
+
maintenance_window { day = 7; hour = 3; update_track = "stable" }
|
|
745
|
+
|
|
746
|
+
insights_config {
|
|
747
|
+
query_insights_enabled = true
|
|
748
|
+
query_plans_per_minute = 5
|
|
749
|
+
record_application_tags = true
|
|
750
|
+
record_client_address = true
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
depends_on = [google_service_networking_connection.private_vpc]
|
|
755
|
+
}
|
|
756
|
+
```
|
|
757
|
+
|
|
758
|
+
### Cloud Spanner
|
|
759
|
+
|
|
760
|
+
```hcl
|
|
761
|
+
resource "google_spanner_instance" "main" {
|
|
762
|
+
name = "spanner-${var.project_name}-${var.environment}"
|
|
763
|
+
config = "regional-${var.region}"
|
|
764
|
+
display_name = "${var.project_name} ${var.environment}"
|
|
765
|
+
|
|
766
|
+
autoscaling_config {
|
|
767
|
+
autoscaling_limits {
|
|
768
|
+
min_processing_units = var.environment == "production" ? 1000 : 100
|
|
769
|
+
max_processing_units = var.environment == "production" ? 10000 : 1000
|
|
770
|
+
}
|
|
771
|
+
autoscaling_targets {
|
|
772
|
+
high_priority_cpu_utilization_percent = 65
|
|
773
|
+
storage_utilization_percent = 90
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
resource "google_spanner_database" "app" {
|
|
779
|
+
instance = google_spanner_instance.main.name
|
|
780
|
+
name = "app-db"
|
|
781
|
+
ddl = [
|
|
782
|
+
"CREATE TABLE Users (UserId STRING(36) NOT NULL, Email STRING(255) NOT NULL, CreatedAt TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true)) PRIMARY KEY (UserId)",
|
|
783
|
+
"CREATE UNIQUE INDEX UsersByEmail ON Users(Email)",
|
|
784
|
+
]
|
|
785
|
+
deletion_protection = var.environment == "production"
|
|
786
|
+
}
|
|
787
|
+
```
|
|
788
|
+
|
|
789
|
+
### Firestore
|
|
790
|
+
|
|
791
|
+
```hcl
|
|
792
|
+
resource "google_firestore_database" "main" {
|
|
793
|
+
name = "(default)"
|
|
794
|
+
project = var.project_id
|
|
795
|
+
location_id = var.region
|
|
796
|
+
type = "FIRESTORE_NATIVE"
|
|
797
|
+
concurrency_mode = "OPTIMISTIC"
|
|
798
|
+
point_in_time_recovery_enablement = var.environment == "production" ? "POINT_IN_TIME_RECOVERY_ENABLED" : "POINT_IN_TIME_RECOVERY_DISABLED"
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
resource "google_firestore_index" "orders_by_user" {
|
|
802
|
+
project = var.project_id
|
|
803
|
+
database = google_firestore_database.main.name
|
|
804
|
+
collection = "orders"
|
|
805
|
+
fields { field_path = "userId"; order = "ASCENDING" }
|
|
806
|
+
fields { field_path = "createdAt"; order = "DESCENDING" }
|
|
807
|
+
}
|
|
808
|
+
```
|
|
809
|
+
|
|
810
|
+
### BigQuery
|
|
811
|
+
|
|
812
|
+
```hcl
|
|
813
|
+
resource "google_bigquery_dataset" "analytics" {
|
|
814
|
+
dataset_id = "analytics_${var.environment}"
|
|
815
|
+
location = var.region
|
|
816
|
+
labels = { environment = var.environment }
|
|
817
|
+
|
|
818
|
+
access { role = "OWNER"; special_group = "projectOwners" }
|
|
819
|
+
access { role = "READER"; group_by_email = "analysts@company.com" }
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
resource "google_bigquery_table" "events" {
|
|
823
|
+
dataset_id = google_bigquery_dataset.analytics.dataset_id
|
|
824
|
+
table_id = "events"
|
|
825
|
+
time_partitioning { type = "DAY"; field = "event_timestamp" }
|
|
826
|
+
clustering = ["event_name", "user_id"]
|
|
827
|
+
schema = jsonencode([
|
|
828
|
+
{ name = "event_id", type = "STRING", mode = "REQUIRED" },
|
|
829
|
+
{ name = "event_name", type = "STRING", mode = "REQUIRED" },
|
|
830
|
+
{ name = "event_timestamp", type = "TIMESTAMP", mode = "REQUIRED" },
|
|
831
|
+
{ name = "user_id", type = "STRING", mode = "NULLABLE" },
|
|
832
|
+
{ name = "properties", type = "JSON", mode = "NULLABLE" },
|
|
833
|
+
])
|
|
834
|
+
}
|
|
835
|
+
```
|
|
836
|
+
|
|
837
|
+
### Cloud Storage Patterns
|
|
838
|
+
|
|
839
|
+
```hcl
|
|
840
|
+
resource "google_storage_bucket" "data_lake" {
|
|
841
|
+
name = "${var.project_id}-data-lake-${var.environment}"
|
|
842
|
+
location = var.region
|
|
843
|
+
force_destroy = var.environment != "production"
|
|
844
|
+
uniform_bucket_level_access = true
|
|
845
|
+
public_access_prevention = "enforced"
|
|
846
|
+
|
|
847
|
+
versioning { enabled = var.environment == "production" }
|
|
848
|
+
|
|
849
|
+
lifecycle_rule { condition { age = 30 }; action { type = "SetStorageClass"; storage_class = "NEARLINE" } }
|
|
850
|
+
lifecycle_rule { condition { age = 90 }; action { type = "SetStorageClass"; storage_class = "COLDLINE" } }
|
|
851
|
+
lifecycle_rule { condition { age = 365 }; action { type = "SetStorageClass"; storage_class = "ARCHIVE" } }
|
|
852
|
+
|
|
853
|
+
encryption { default_kms_key_name = var.kms_key_id }
|
|
854
|
+
}
|
|
855
|
+
```
|
|
856
|
+
|
|
857
|
+
### Data Service Decision Framework
|
|
858
|
+
|
|
859
|
+
```
|
|
860
|
+
┌──────────────────┬───────────────┬───────────────┬───────────────────┐
|
|
861
|
+
│ Need │ Service │ Strengths │ Limits │
|
|
862
|
+
├──────────────────┼───────────────┼───────────────┼───────────────────┤
|
|
863
|
+
│ Relational OLTP │ Cloud SQL │ Managed PG/MY │ Single region │
|
|
864
|
+
│ Global relational│ Cloud Spanner │ Strong consis.│ Cost (min ~$65/mo)│
|
|
865
|
+
│ Document / mobile│ Firestore │ Realtime sync │ 1 write/sec/doc │
|
|
866
|
+
│ Analytics / OLAP │ BigQuery │ Serverless │ Not for OLTP │
|
|
867
|
+
│ Key-value cache │ Memorystore │ Redis compat. │ No persistence opt│
|
|
868
|
+
│ Wide-column │ Bigtable │ Low latency │ No SQL, no joins │
|
|
869
|
+
│ Object storage │ Cloud Storage │ Unlimited │ Eventual consist. │
|
|
870
|
+
└──────────────────┴───────────────┴───────────────┴───────────────────┘
|
|
871
|
+
|
|
872
|
+
GUIDANCE:
|
|
873
|
+
- Default OLTP -> Cloud SQL (PostgreSQL)
|
|
874
|
+
- Need global consistency -> Cloud Spanner
|
|
875
|
+
- Mobile/web realtime -> Firestore
|
|
876
|
+
- Analytics warehouse -> BigQuery
|
|
877
|
+
- High-throughput time-series -> Bigtable
|
|
878
|
+
```
|
|
879
|
+
|
|
880
|
+
## Infrastructure as Code
|
|
881
|
+
|
|
882
|
+
### Terraform GCP Provider Setup
|
|
883
|
+
|
|
884
|
+
```hcl
|
|
885
|
+
terraform {
|
|
886
|
+
required_version = ">= 1.7"
|
|
887
|
+
required_providers {
|
|
888
|
+
google = { source = "hashicorp/google"; version = "~> 6.0" }
|
|
889
|
+
google-beta = { source = "hashicorp/google-beta"; version = "~> 6.0" }
|
|
890
|
+
}
|
|
891
|
+
backend "gcs" { bucket = "my-project-terraform-state"; prefix = "terraform/state" }
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
provider "google" { project = var.project_id; region = var.region }
|
|
895
|
+
provider "google-beta" { project = var.project_id; region = var.region }
|
|
896
|
+
```
|
|
897
|
+
|
|
898
|
+
### Terraform Module Structure
|
|
899
|
+
|
|
900
|
+
```
|
|
901
|
+
infra/
|
|
902
|
+
├── environments/
|
|
903
|
+
│ ├── dev/ # main.tf, variables.tf, terraform.tfvars
|
|
904
|
+
│ ├── staging/
|
|
905
|
+
│ └── production/
|
|
906
|
+
├── modules/
|
|
907
|
+
│ ├── network/ # VPC, subnets, firewall, NAT
|
|
908
|
+
│ ├── gke/ # GKE cluster + node pools
|
|
909
|
+
│ ├── cloud-run/ # Cloud Run services
|
|
910
|
+
│ ├── database/ # Cloud SQL, Spanner
|
|
911
|
+
│ ├── observability/ # Monitoring, logging, alerting
|
|
912
|
+
│ └── security/ # IAM, org policies, KMS
|
|
913
|
+
└── shared/
|
|
914
|
+
└── state-bucket/ # Bootstrap: create state bucket
|
|
915
|
+
```
|
|
916
|
+
|
|
917
|
+
### Pulumi GCP Pattern (TypeScript)
|
|
918
|
+
|
|
919
|
+
```typescript
|
|
920
|
+
import * as gcp from "@pulumi/gcp";
|
|
921
|
+
import * as pulumi from "@pulumi/pulumi";
|
|
922
|
+
|
|
923
|
+
const config = new pulumi.Config();
|
|
924
|
+
const environment = config.require("environment");
|
|
925
|
+
|
|
926
|
+
const vpc = new gcp.compute.Network("vpc", {
|
|
927
|
+
name: `vpc-myapp-${environment}`,
|
|
928
|
+
autoCreateSubnetworks: false,
|
|
929
|
+
});
|
|
930
|
+
|
|
931
|
+
const cluster = new gcp.container.Cluster("gke", {
|
|
932
|
+
name: `gke-myapp-${environment}`,
|
|
933
|
+
location: "us-central1",
|
|
934
|
+
network: vpc.id,
|
|
935
|
+
removeDefaultNodePool: true,
|
|
936
|
+
initialNodeCount: 1,
|
|
937
|
+
workloadIdentityConfig: { workloadPool: `${gcp.config.project}.svc.id.goog` },
|
|
938
|
+
releaseChannel: { channel: "REGULAR" },
|
|
939
|
+
});
|
|
940
|
+
|
|
941
|
+
export const clusterName = cluster.name;
|
|
942
|
+
```
|
|
943
|
+
|
|
944
|
+
## CI/CD
|
|
945
|
+
|
|
946
|
+
### Cloud Build Pipeline
|
|
947
|
+
|
|
948
|
+
```yaml
|
|
949
|
+
# cloudbuild.yaml
|
|
950
|
+
steps:
|
|
951
|
+
- name: 'node:22-slim'
|
|
952
|
+
entrypoint: 'npm'
|
|
953
|
+
args: ['ci']
|
|
954
|
+
id: install
|
|
955
|
+
|
|
956
|
+
- name: 'node:22-slim'
|
|
957
|
+
entrypoint: 'npm'
|
|
958
|
+
args: ['test']
|
|
959
|
+
id: test
|
|
960
|
+
waitFor: ['install']
|
|
961
|
+
|
|
962
|
+
- name: 'gcr.io/cloud-builders/docker'
|
|
963
|
+
args: ['build', '-t', '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}', '.']
|
|
964
|
+
id: build
|
|
965
|
+
waitFor: ['test']
|
|
966
|
+
|
|
967
|
+
- name: 'gcr.io/cloud-builders/docker'
|
|
968
|
+
args: ['push', '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}']
|
|
969
|
+
id: push
|
|
970
|
+
waitFor: ['build']
|
|
971
|
+
|
|
972
|
+
- name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
|
|
973
|
+
entrypoint: 'gcloud'
|
|
974
|
+
args: ['run', 'deploy', '${_SERVICE}',
|
|
975
|
+
'--image=${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}',
|
|
976
|
+
'--region=${_REGION}', '--platform=managed', '--quiet']
|
|
977
|
+
id: deploy
|
|
978
|
+
waitFor: ['push']
|
|
979
|
+
|
|
980
|
+
substitutions:
|
|
981
|
+
_REGION: us-central1
|
|
982
|
+
_REPO: myapp
|
|
983
|
+
_SERVICE: api
|
|
984
|
+
options:
|
|
985
|
+
logging: CLOUD_LOGGING_ONLY
|
|
986
|
+
machineType: 'E2_HIGHCPU_8'
|
|
987
|
+
```
|
|
988
|
+
|
|
989
|
+
### Artifact Registry
|
|
990
|
+
|
|
991
|
+
```hcl
|
|
992
|
+
resource "google_artifact_registry_repository" "docker" {
|
|
993
|
+
repository_id = "myapp"
|
|
994
|
+
location = var.region
|
|
995
|
+
format = "DOCKER"
|
|
996
|
+
|
|
997
|
+
cleanup_policies {
|
|
998
|
+
id = "keep-recent"; action = "KEEP"
|
|
999
|
+
most_recent_versions { keep_count = 10 }
|
|
1000
|
+
}
|
|
1001
|
+
cleanup_policies {
|
|
1002
|
+
id = "delete-old-untagged"; action = "DELETE"
|
|
1003
|
+
condition { tag_state = "UNTAGGED"; older_than = "604800s" }
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
resource "google_artifact_registry_repository_iam_member" "cloudbuild" {
|
|
1008
|
+
repository = google_artifact_registry_repository.docker.name
|
|
1009
|
+
location = var.region
|
|
1010
|
+
role = "roles/artifactregistry.writer"
|
|
1011
|
+
member = "serviceAccount:${data.google_project.current.number}@cloudbuild.gserviceaccount.com"
|
|
1012
|
+
}
|
|
1013
|
+
```
|
|
1014
|
+
|
|
1015
|
+
### Cloud Deploy (GKE Delivery Pipeline)
|
|
1016
|
+
|
|
1017
|
+
```bash
|
|
1018
|
+
# Create a release
|
|
1019
|
+
gcloud deploy releases create "v1.2.3" \
|
|
1020
|
+
--delivery-pipeline="myapp-pipeline" --region="us-central1" \
|
|
1021
|
+
--images="api=${REGION}-docker.pkg.dev/${PROJECT_ID}/myapp/api:v1.2.3"
|
|
1022
|
+
|
|
1023
|
+
# Approve promotion to production
|
|
1024
|
+
gcloud deploy rollouts approve "v1.2.3-to-production-0001" \
|
|
1025
|
+
--delivery-pipeline="myapp-pipeline" --release="v1.2.3" --region="us-central1"
|
|
1026
|
+
```
|
|
1027
|
+
|
|
1028
|
+
## Observability
|
|
1029
|
+
|
|
1030
|
+
### Cloud Monitoring (Alerting)
|
|
1031
|
+
|
|
1032
|
+
```hcl
|
|
1033
|
+
resource "google_monitoring_notification_channel" "pagerduty" {
|
|
1034
|
+
display_name = "PagerDuty - ${var.environment}"
|
|
1035
|
+
type = "pagerduty"
|
|
1036
|
+
labels = { service_key = var.pagerduty_service_key }
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
resource "google_monitoring_uptime_check_config" "api" {
|
|
1040
|
+
display_name = "API Health - ${var.environment}"
|
|
1041
|
+
timeout = "10s"; period = "60s"
|
|
1042
|
+
http_check { path = "/healthz"; port = 443; use_ssl = true; validate_ssl = true }
|
|
1043
|
+
monitored_resource {
|
|
1044
|
+
type = "uptime_url"
|
|
1045
|
+
labels = { project_id = var.project_id; host = var.api_domain }
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
# Alert: High error rate (MQL)
|
|
1050
|
+
resource "google_monitoring_alert_policy" "error_rate" {
|
|
1051
|
+
display_name = "High Error Rate - ${var.environment}"
|
|
1052
|
+
combiner = "OR"
|
|
1053
|
+
conditions {
|
|
1054
|
+
display_name = "Error rate > 1%"
|
|
1055
|
+
condition_monitoring_query_language {
|
|
1056
|
+
query = <<-MQL
|
|
1057
|
+
fetch cloud_run_revision
|
|
1058
|
+
| metric 'run.googleapis.com/request_count'
|
|
1059
|
+
| filter resource.service_name == 'api-${var.environment}'
|
|
1060
|
+
| align rate(1m)
|
|
1061
|
+
| group_by [metric.response_code_class],
|
|
1062
|
+
[value_request_count_aggregate: aggregate(value.request_count)]
|
|
1063
|
+
| outer_join 0
|
|
1064
|
+
| value [error_rate:
|
|
1065
|
+
sum(if(metric.response_code_class = '5xx', value_request_count_aggregate, 0))
|
|
1066
|
+
/ sum(value_request_count_aggregate) * 100]
|
|
1067
|
+
| condition error_rate > 1
|
|
1068
|
+
MQL
|
|
1069
|
+
duration = "300s"
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
notification_channels = [google_monitoring_notification_channel.pagerduty.id]
|
|
1073
|
+
alert_strategy { auto_close = "1800s" }
|
|
1074
|
+
}
|
|
1075
|
+
```
|
|
1076
|
+
|
|
1077
|
+
### Cloud Logging
|
|
1078
|
+
|
|
1079
|
+
```bash
|
|
1080
|
+
# Query logs
|
|
1081
|
+
gcloud logging read \
|
|
1082
|
+
'resource.type="cloud_run_revision" AND severity>=ERROR AND resource.labels.service_name="api-production"' \
|
|
1083
|
+
--limit=50 --format=json --freshness=1h
|
|
1084
|
+
|
|
1085
|
+
# Log-based metric
|
|
1086
|
+
gcloud logging metrics create api-errors \
|
|
1087
|
+
--description="Count of API 5xx errors" \
|
|
1088
|
+
--log-filter='resource.type="cloud_run_revision" AND httpRequest.status>=500'
|
|
1089
|
+
|
|
1090
|
+
# Log sink to BigQuery
|
|
1091
|
+
gcloud logging sinks create audit-to-bq \
|
|
1092
|
+
"bigquery.googleapis.com/projects/my-project/datasets/audit_logs" \
|
|
1093
|
+
--log-filter='logName:"cloudaudit.googleapis.com"'
|
|
1094
|
+
```
|
|
1095
|
+
|
|
1096
|
+
```hcl
|
|
1097
|
+
resource "google_logging_project_sink" "audit_bq" {
|
|
1098
|
+
name = "audit-to-bigquery"
|
|
1099
|
+
destination = "bigquery.googleapis.com/projects/${var.project_id}/datasets/${google_bigquery_dataset.audit.dataset_id}"
|
|
1100
|
+
filter = "logName:\"cloudaudit.googleapis.com\""
|
|
1101
|
+
unique_writer_identity = true
|
|
1102
|
+
bigquery_options { use_partitioned_tables = true }
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
resource "google_bigquery_dataset_iam_member" "sink_writer" {
|
|
1106
|
+
dataset_id = google_bigquery_dataset.audit.dataset_id
|
|
1107
|
+
role = "roles/bigquery.dataEditor"
|
|
1108
|
+
member = google_logging_project_sink.audit_bq.writer_identity
|
|
1109
|
+
}
|
|
1110
|
+
```
|
|
1111
|
+
|
|
1112
|
+
### Cloud Trace and Error Reporting
|
|
1113
|
+
|
|
1114
|
+
```bash
|
|
1115
|
+
gcloud services enable cloudtrace.googleapis.com \
|
|
1116
|
+
clouderrorreporting.googleapis.com cloudprofiler.googleapis.com
|
|
1117
|
+
|
|
1118
|
+
# Cloud Trace is automatic for Cloud Run and GKE with the trace agent.
|
|
1119
|
+
# For custom instrumentation, use OpenTelemetry with the GCP exporter.
|
|
1120
|
+
# See the opentelemetry skill (sw-infra:opentelemetry) for full setup.
|
|
1121
|
+
```
|
|
1122
|
+
|
|
1123
|
+
### SLO Monitoring
|
|
1124
|
+
|
|
1125
|
+
```hcl
|
|
1126
|
+
resource "google_monitoring_slo" "api_availability" {
|
|
1127
|
+
service = google_monitoring_custom_service.api.service_id
|
|
1128
|
+
display_name = "API Availability SLO"
|
|
1129
|
+
goal = 0.999
|
|
1130
|
+
rolling_period_days = 28
|
|
1131
|
+
request_based_sli {
|
|
1132
|
+
good_total_ratio {
|
|
1133
|
+
good_service_filter = "resource.type=\"cloud_run_revision\" AND resource.labels.service_name=\"api-${var.environment}\" AND metric.type=\"run.googleapis.com/request_count\" AND metric.labels.response_code_class!=\"5xx\""
|
|
1134
|
+
total_service_filter = "resource.type=\"cloud_run_revision\" AND resource.labels.service_name=\"api-${var.environment}\" AND metric.type=\"run.googleapis.com/request_count\""
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
resource "google_monitoring_alert_policy" "slo_burn" {
|
|
1140
|
+
display_name = "SLO Burn Rate Alert - ${var.environment}"
|
|
1141
|
+
combiner = "OR"
|
|
1142
|
+
conditions {
|
|
1143
|
+
display_name = "SLO burn rate high"
|
|
1144
|
+
condition_threshold {
|
|
1145
|
+
filter = "select_slo_burn_rate(\"${google_monitoring_slo.api_availability.id}\", \"60m\")"
|
|
1146
|
+
comparison = "COMPARISON_GT"
|
|
1147
|
+
threshold_value = 10
|
|
1148
|
+
duration = "0s"
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
notification_channels = [google_monitoring_notification_channel.pagerduty.id]
|
|
1152
|
+
}
|
|
1153
|
+
```
|
|
1154
|
+
|
|
1155
|
+
## Cost Optimization Quick Reference
|
|
1156
|
+
|
|
1157
|
+
```
|
|
1158
|
+
┌─────────────────────────────┬──────────────────────────────────────┐
|
|
1159
|
+
│ Strategy │ Implementation │
|
|
1160
|
+
├─────────────────────────────┼──────────────────────────────────────┤
|
|
1161
|
+
│ Committed Use Discounts │ 1yr: ~20% off, 3yr: ~50% off │
|
|
1162
|
+
│ Spot VMs │ Up to 91% off (can be preempted) │
|
|
1163
|
+
│ Cloud Run min=0 │ Scale to zero in non-prod │
|
|
1164
|
+
│ Cloud SQL shared-core │ db-f1-micro for dev/staging │
|
|
1165
|
+
│ GKE Autopilot │ Pay per pod, not per node │
|
|
1166
|
+
│ Storage lifecycle │ Standard->Nearline->Coldline->Archive│
|
|
1167
|
+
│ BigQuery flat-rate │ Predictable cost at scale │
|
|
1168
|
+
│ Cloud NAT per-VM pricing │ Reduce NAT by using Private Google │
|
|
1169
|
+
│ Rightsizing recommendations │ gcloud recommender insights list │
|
|
1170
|
+
│ Budget alerts │ Set billing budgets + Pub/Sub alerts │
|
|
1171
|
+
└─────────────────────────────┴──────────────────────────────────────┘
|
|
1172
|
+
```
|