@pleri/olam-cli 0.1.160 → 0.1.161

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,273 @@
1
+ #!/usr/bin/env bash
2
+ # grafana-port-forward.sh — e2e smoke test: Grafana installs via Helm,
3
+ # port-forward is accessible, Loki datasource
4
+ # is pre-wired and reachable.
5
+ #
6
+ # Usage: scripts/e2e/grafana-port-forward.sh
7
+ #
8
+ # Pre-conditions:
9
+ # - kubectl context is set to a live k8s cluster (does NOT spin up k3d)
10
+ # - helm binary available
11
+ # - jq binary available
12
+ # - grafana Helm repo added (helm repo add grafana https://grafana.github.io/helm-charts)
13
+ # - Loki is already installed (scripts/e2e/loki-ingest.sh ran successfully
14
+ # OR `helm status olam-loki -n monitoring` is healthy)
15
+ #
16
+ # Idempotency: `helm upgrade --install` is idempotent; re-runs succeed on an
17
+ # existing cluster. The Secret is applied via --dry-run | kubectl apply
18
+ # so re-runs update the password (useful for rotation testing).
19
+ # The olam-dashboards ConfigMap is applied before helm install so
20
+ # Grafana's volume mount finds the ConfigMap on first boot.
21
+ #
22
+ # Cleanup: port-forward is killed on exit; Helm release is left in place so
23
+ # downstream tasks can reuse the same cluster.
24
+ #
25
+ # Refs: docs/plans/k3s-ingress-observability/phase-b-tasks.md — Task B2, B3
26
+ # Chart: grafana/grafana 8.5.2 (pinned; latest stable 2026-05-20)
27
+
28
+ set -euo pipefail
29
+
30
+ NAMESPACE="monitoring"
31
+ GRAFANA_RELEASE="olam-grafana"
32
+ GRAFANA_CHART_VERSION="8.5.2"
33
+ LOCAL_PORT="3000"
34
+ GRAFANA_SVC_PORT="80"
35
+ PF_BIND_SECONDS=5
36
+
37
+ log() { printf '[grafana-port-forward] %s\n' "$*" >&2; }
38
+ fail() { printf '[grafana-port-forward] FAIL: %s\n' "$*" >&2; exit 1; }
39
+
40
+ # -------------------------------------------------------------------------
41
+ # Cleanup trap — kill port-forward on exit; leave Helm release in place
42
+ # -------------------------------------------------------------------------
43
+ PF_PID=""
44
+ cleanup() {
45
+ if [[ -n "$PF_PID" ]] && kill -0 "$PF_PID" 2>/dev/null; then
46
+ kill "$PF_PID" 2>/dev/null || true
47
+ fi
48
+ }
49
+ trap cleanup EXIT
50
+
51
+ # -------------------------------------------------------------------------
52
+ # Pre-flight
53
+ # -------------------------------------------------------------------------
54
+ command -v helm >/dev/null 2>&1 || fail "helm not installed"
55
+ command -v kubectl >/dev/null 2>&1 || fail "kubectl not installed"
56
+ command -v curl >/dev/null 2>&1 || fail "curl not installed"
57
+ command -v openssl >/dev/null 2>&1 || fail "openssl not installed"
58
+ command -v jq >/dev/null 2>&1 || fail "jq not installed (required for B3 dashboard assertion)"
59
+ kubectl cluster-info >/dev/null 2>&1 || fail "kubectl: no reachable cluster; set KUBECONFIG"
60
+
61
+ log "pre-flight checks passed"
62
+
63
+ # -------------------------------------------------------------------------
64
+ # Ensure grafana Helm repo is present (idempotent — safe to re-run)
65
+ # -------------------------------------------------------------------------
66
+ helm repo add grafana https://grafana.github.io/helm-charts 2>/dev/null || true
67
+ helm repo update grafana
68
+
69
+ # Verify Loki is already installed (B2 depends on B1)
70
+ if ! helm status "olam-loki" -n "$NAMESPACE" >/dev/null 2>&1; then
71
+ fail "olam-loki Helm release not found in namespace $NAMESPACE — run scripts/e2e/loki-ingest.sh first"
72
+ fi
73
+ log "Loki pre-condition satisfied (olam-loki release found)"
74
+
75
+ # -------------------------------------------------------------------------
76
+ # Step 1: Resolve admin password (preserve existing on idempotent re-run)
77
+ # -------------------------------------------------------------------------
78
+ # Grafana persists the admin password in its internal SQLite on first
79
+ # deploy. Subsequent helm upgrades do NOT re-read GF_SECURITY_ADMIN_PASSWORD
80
+ # from the env (env value is set once at pod-start and not refreshed). So
81
+ # on a re-run, rotating the Secret leaves the in-Grafana password stale
82
+ # and breaks API auth.
83
+ #
84
+ # Idempotency contract: if the Secret already exists, reuse its current
85
+ # password. The Secret's value matches Grafana's stored value (set in
86
+ # concert on first install). Only generate a new password when the
87
+ # Secret doesn't exist yet — i.e. true first deploy.
88
+ if kubectl get secret olam-grafana-admin -n "$NAMESPACE" >/dev/null 2>&1; then
89
+ log "reusing existing admin password from Secret olam-grafana-admin"
90
+ GRAFANA_ADMIN_PW=$(kubectl get secret olam-grafana-admin -n "$NAMESPACE" \
91
+ -o jsonpath='{.data.admin-password}' | base64 -d)
92
+ else
93
+ log "generating fresh admin password (first deploy)"
94
+ GRAFANA_ADMIN_PW=$(openssl rand -base64 24)
95
+ fi
96
+ export GRAFANA_ADMIN_PW
97
+
98
+ # -------------------------------------------------------------------------
99
+ # Step 2: Create / update the admin Secret idempotently
100
+ # -------------------------------------------------------------------------
101
+ log "applying Secret olam-grafana-admin in namespace $NAMESPACE"
102
+ kubectl create secret generic olam-grafana-admin \
103
+ --from-literal=admin-user=admin \
104
+ --from-literal=admin-password="$GRAFANA_ADMIN_PW" \
105
+ -n "$NAMESPACE" \
106
+ --dry-run=client -o yaml \
107
+ | kubectl apply -f -
108
+
109
+ log "Secret applied"
110
+
111
+ # -------------------------------------------------------------------------
112
+ # Step 3a: Apply olam-dashboards ConfigMap BEFORE helm install
113
+ # so Grafana's volume mount finds it on first boot (B3).
114
+ # The ConfigMap is generated from grafana-dashboards/*.json by
115
+ # packages/peripheral-services/scripts/sync-grafana-dashboards.sh.
116
+ # -------------------------------------------------------------------------
117
+ REPO_ROOT="$(git -C "$(dirname "$0")" rev-parse --show-toplevel 2>/dev/null || pwd)"
118
+ CONFIGMAP_MANIFEST="$REPO_ROOT/packages/peripheral-services/manifests/80-grafana-dashboard-configmap.yaml"
119
+
120
+ if [[ -f "$CONFIGMAP_MANIFEST" ]]; then
121
+ log "applying olam-dashboards ConfigMap from $CONFIGMAP_MANIFEST"
122
+ kubectl apply -f "$CONFIGMAP_MANIFEST"
123
+ log "ConfigMap applied"
124
+ else
125
+ log "WARN: $CONFIGMAP_MANIFEST not found — Grafana will warn 'ConfigMap not found' until B3 is deployed"
126
+ fi
127
+
128
+ # -------------------------------------------------------------------------
129
+ # Step 3: Helm upgrade --install
130
+ # -------------------------------------------------------------------------
131
+ log "installing grafana/grafana ($GRAFANA_RELEASE) in namespace $NAMESPACE"
132
+ helm upgrade --install "$GRAFANA_RELEASE" grafana/grafana \
133
+ --version "$GRAFANA_CHART_VERSION" \
134
+ --namespace "$NAMESPACE" \
135
+ --create-namespace \
136
+ -f "$REPO_ROOT/packages/peripheral-services/helm-values/grafana-values.yaml" \
137
+ --wait \
138
+ --timeout 300s
139
+
140
+ log "Grafana Helm install complete"
141
+
142
+ # -------------------------------------------------------------------------
143
+ # Step 4: Wait for Grafana pod Ready
144
+ # -------------------------------------------------------------------------
145
+ log "waiting for Grafana pod Ready (120s)"
146
+ kubectl wait \
147
+ --for=condition=ready pod \
148
+ -l "app.kubernetes.io/name=grafana" \
149
+ -n "$NAMESPACE" \
150
+ --timeout=120s
151
+
152
+ log "Grafana pod Ready"
153
+
154
+ # -------------------------------------------------------------------------
155
+ # Step 5: Start port-forward in background
156
+ # -------------------------------------------------------------------------
157
+ log "port-forwarding svc/$GRAFANA_RELEASE $LOCAL_PORT:$GRAFANA_SVC_PORT in namespace $NAMESPACE"
158
+ kubectl port-forward \
159
+ -n "$NAMESPACE" \
160
+ "svc/$GRAFANA_RELEASE" \
161
+ "${LOCAL_PORT}:${GRAFANA_SVC_PORT}" &
162
+ PF_PID=$!
163
+
164
+ log "port-forward PID $PF_PID; waiting ${PF_BIND_SECONDS}s for bind"
165
+ sleep "$PF_BIND_SECONDS"
166
+
167
+ # Verify the port-forward process is still alive after sleep
168
+ kill -0 "$PF_PID" 2>/dev/null || fail "port-forward process exited prematurely"
169
+
170
+ # -------------------------------------------------------------------------
171
+ # Diagnostic helper — called on assertion failure
172
+ # -------------------------------------------------------------------------
173
+ dump_diagnostics() {
174
+ log "DIAGNOSTIC: last 50 lines of Grafana pod logs:"
175
+ kubectl logs -n "$NAMESPACE" \
176
+ -l "app.kubernetes.io/name=grafana" \
177
+ --tail=50 2>&1 >&2 || true
178
+ }
179
+
180
+ # -------------------------------------------------------------------------
181
+ # Step 6: Assertion 1 — /api/health returns 200 with database: ok
182
+ # -------------------------------------------------------------------------
183
+ log "asserting Grafana health (GET /api/health)"
184
+ HEALTH_RESPONSE=$(
185
+ curl -sf \
186
+ -u "admin:${GRAFANA_ADMIN_PW}" \
187
+ "http://localhost:${LOCAL_PORT}/api/health" \
188
+ || { dump_diagnostics; fail "GET /api/health failed — Grafana not reachable on port $LOCAL_PORT"; }
189
+ )
190
+
191
+ if ! echo "$HEALTH_RESPONSE" | jq -e '.database == "ok"' >/dev/null 2>&1; then
192
+ log "DIAGNOSTIC: /api/health response:"
193
+ echo "$HEALTH_RESPONSE" >&2
194
+ dump_diagnostics
195
+ fail '/api/health returned database != "ok" — Grafana DB layer not healthy'
196
+ fi
197
+
198
+ log "PASS: /api/health → database: ok"
199
+
200
+ # -------------------------------------------------------------------------
201
+ # Step 7: Assertion 2 — /api/datasources includes Loki entry with cluster URL
202
+ # -------------------------------------------------------------------------
203
+ log "asserting Loki datasource pre-wired (GET /api/datasources)"
204
+ DS_RESPONSE=$(
205
+ curl -sf \
206
+ -u "admin:${GRAFANA_ADMIN_PW}" \
207
+ "http://localhost:${LOCAL_PORT}/api/datasources" \
208
+ || { dump_diagnostics; fail "GET /api/datasources failed"; }
209
+ )
210
+
211
+ EXPECTED_URL="olam-loki.monitoring.svc.cluster.local:3100"
212
+
213
+ if ! echo "$DS_RESPONSE" | jq -e 'map(select(.type == "loki")) | length >= 1' >/dev/null 2>&1; then
214
+ log "DIAGNOSTIC: /api/datasources response:"
215
+ echo "$DS_RESPONSE" >&2
216
+ dump_diagnostics
217
+ fail "datasources response contains no 'loki' type entry — datasource not provisioned"
218
+ fi
219
+
220
+ if ! echo "$DS_RESPONSE" | jq -e --arg url "$EXPECTED_URL" 'map(select(.type == "loki" and (.url | contains($url)))) | length >= 1' >/dev/null 2>&1; then
221
+ log "DIAGNOSTIC: /api/datasources response:"
222
+ echo "$DS_RESPONSE" >&2
223
+ dump_diagnostics
224
+ fail "Loki datasource URL does not contain '$EXPECTED_URL' — check grafana-values.yaml datasources block"
225
+ fi
226
+
227
+ log "PASS: Loki datasource found with cluster-local URL $EXPECTED_URL"
228
+
229
+ # -------------------------------------------------------------------------
230
+ # Step 7b: Assertion 2b — dashboard provider loaded olam-home (catches mount-path bugs)
231
+ # -------------------------------------------------------------------------
232
+ log "asserting olam-home dashboard visible in /api/search (catches ConfigMap mount failures)"
233
+ DASHBOARDS=$(
234
+ curl -sf \
235
+ -u "admin:${GRAFANA_ADMIN_PW}" \
236
+ "http://localhost:${LOCAL_PORT}/api/search?type=dash-db&query=olam" \
237
+ || true
238
+ )
239
+
240
+ if ! echo "$DASHBOARDS" | jq -e 'map(select(.uid == "olam-home")) | length == 1' >/dev/null 2>&1; then
241
+ log "DIAGNOSTIC: /api/search response:"
242
+ echo "$DASHBOARDS" >&2
243
+ dump_diagnostics
244
+ fail "olam-home dashboard not found in /api/search — check ConfigMap mount path and dashboard provider config"
245
+ fi
246
+
247
+ log "PASS: olam-home dashboard found via /api/search"
248
+
249
+ # -------------------------------------------------------------------------
250
+ # Step 8: Assertion 3 — olam-home dashboard present (B3)
251
+ # -------------------------------------------------------------------------
252
+ log "asserting olam-home dashboard present (GET /api/dashboards/uid/olam-home)"
253
+ DASHBOARD_RESPONSE=$(
254
+ curl -sf \
255
+ -u "admin:${GRAFANA_ADMIN_PW}" \
256
+ "http://localhost:${LOCAL_PORT}/api/dashboards/uid/olam-home" \
257
+ || { dump_diagnostics; fail "GET /api/dashboards/uid/olam-home failed — dashboard not found or Grafana unreachable"; }
258
+ )
259
+
260
+ if ! echo "$DASHBOARD_RESPONSE" | jq -e '.dashboard.uid == "olam-home"' >/dev/null 2>&1; then
261
+ log "DIAGNOSTIC: /api/dashboards/uid/olam-home response:"
262
+ echo "$DASHBOARD_RESPONSE" >&2
263
+ dump_diagnostics
264
+ fail "olam-home dashboard uid mismatch or missing — check ConfigMap provisioning and Grafana provider config"
265
+ fi
266
+
267
+ log "PASS: olam-home dashboard present with uid=olam-home"
268
+
269
+ # -------------------------------------------------------------------------
270
+ # Final
271
+ # -------------------------------------------------------------------------
272
+ log "PASS: Grafana port-forward accessible; Loki datasource pre-wired; olam-home dashboard provisioned — Tasks B2+B3 verified"
273
+ exit 0