@pleri/olam-cli 0.1.160 → 0.1.162

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +11 -0
  2. package/dist/agent-stream/agent-sdk-to-chunks.js +20 -2
  3. package/dist/commands/bootstrap.d.ts +15 -0
  4. package/dist/commands/bootstrap.d.ts.map +1 -1
  5. package/dist/commands/bootstrap.js +58 -5
  6. package/dist/commands/bootstrap.js.map +1 -1
  7. package/dist/commands/flywheel/migrate-overlays.d.ts +1 -0
  8. package/dist/commands/flywheel/migrate-overlays.d.ts.map +1 -1
  9. package/dist/commands/flywheel/migrate-overlays.js +29 -3
  10. package/dist/commands/flywheel/migrate-overlays.js.map +1 -1
  11. package/dist/commands/skills-source.d.ts.map +1 -1
  12. package/dist/commands/skills-source.js +57 -2
  13. package/dist/commands/skills-source.js.map +1 -1
  14. package/dist/commands/skills.d.ts.map +1 -1
  15. package/dist/commands/skills.js +14 -0
  16. package/dist/commands/skills.js.map +1 -1
  17. package/dist/image-digests.json +7 -7
  18. package/dist/index.js +2424 -1781
  19. package/dist/lib/bootstrap-kubernetes.d.ts +42 -0
  20. package/dist/lib/bootstrap-kubernetes.d.ts.map +1 -0
  21. package/dist/lib/bootstrap-kubernetes.js +367 -0
  22. package/dist/lib/bootstrap-kubernetes.js.map +1 -0
  23. package/dist/lib/config.d.ts.map +1 -1
  24. package/dist/lib/config.js +6 -1
  25. package/dist/lib/config.js.map +1 -1
  26. package/dist/mcp-server.js +568 -368
  27. package/hermes-bundle/version.json +1 -1
  28. package/host-cp/k8s/manifests/50-deployment.yaml +1 -1
  29. package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
  30. package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
  31. package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
  32. package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
  33. package/host-cp/observability/grafana-port-forward.sh +283 -0
  34. package/host-cp/observability/kyverno-cardinality-mutate.sh +462 -0
  35. package/host-cp/observability/loki-ingest.sh +253 -0
  36. package/host-cp/observability/prom-no-double-grafana.sh +311 -0
  37. package/host-cp/peripheral-services/helm-values/grafana-values.yaml +159 -0
  38. package/host-cp/peripheral-services/helm-values/kube-prom-stack-values.yaml +229 -0
  39. package/host-cp/peripheral-services/helm-values/kyverno-values.yaml +85 -0
  40. package/host-cp/peripheral-services/helm-values/loki-values.yaml +166 -0
  41. package/host-cp/peripheral-services/helm-values/promtail-staging.yaml +92 -0
  42. package/host-cp/peripheral-services/helm-values/promtail-values.yaml +102 -0
  43. package/host-cp/peripheral-services/helm-values/traefik-values.yaml +73 -0
  44. package/host-cp/peripheral-services/manifests/20-namespace.yaml +6 -0
  45. package/host-cp/peripheral-services/manifests/24-deploy-kg-service.yaml +245 -0
  46. package/host-cp/peripheral-services/manifests/30-traefik-ingressroute-host-cp.yaml +22 -0
  47. package/host-cp/peripheral-services/manifests/40-traefik-ingressroute-kg.yaml +29 -0
  48. package/host-cp/peripheral-services/manifests/50-traefik-ingressroute-agent-memory.yaml +29 -0
  49. package/host-cp/peripheral-services/manifests/60-networkpolicy-ingress.yaml +80 -0
  50. package/host-cp/peripheral-services/manifests/65-networkpolicy-loki-prom-deny.yaml +67 -0
  51. package/host-cp/peripheral-services/manifests/80-grafana-dashboard-configmap.yaml +1349 -0
  52. package/host-cp/peripheral-services/manifests/90-prom-alert-cardinality.yaml +50 -0
  53. package/host-cp/peripheral-services/manifests/91-servicemonitor-host-cp.yaml +70 -0
  54. package/host-cp/peripheral-services/manifests/92-servicemonitor-kg-service.yaml +70 -0
  55. package/host-cp/peripheral-services/manifests/93-servicemonitor-memory-service.yaml +87 -0
  56. package/host-cp/peripheral-services/manifests/95-prom-recording-rules.yaml +108 -0
  57. package/host-cp/peripheral-services/manifests/96-kyverno-cardinality-mutate.yaml +195 -0
  58. package/host-cp/src/plan-chat-service.mjs +147 -1
  59. package/package.json +1 -1
@@ -0,0 +1,462 @@
1
+ #!/usr/bin/env bash
2
+ # kyverno-cardinality-mutate.sh — Phase C C8 follow-up e2e smoke test.
3
+ #
4
+ # Verifies that the Kyverno ClusterPolicy
5
+ # `enforce-cardinality-labeldrop` mutates incoming ServiceMonitor and
6
+ # PodMonitor objects at admission time, regardless of authorship,
7
+ # closing codex's "policy by convention" gap on PR #783.
8
+ #
9
+ # Test approach:
10
+ # 1. helm-install Kyverno (pinned 3.8.1) into the `kyverno` namespace.
11
+ # 2. Apply the ClusterPolicy.
12
+ # 3. POSITIVE test: apply ServiceMonitor `kyverno-mutate-positive-test`
13
+ # with selector `app: kyverno-mutate-positive-test` (no backing Service)
14
+ # and NO metricRelabelings; assert Kyverno mutated it; delete immediately.
15
+ # 4. IDEMPOTENCY test: apply ServiceMonitor `kyverno-mutate-idempotency-test`
16
+ # with selector `app: kyverno-mutate-idempotency-test` (different non-existent
17
+ # label) and the labeldrop already present; assert count stays at 1; delete.
18
+ # 5. SCRAPE-VERIFICATION test: deploy synthetic `kyverno-emitter` (Service +
19
+ # Deployment + ConfigMap) + dedicated ServiceMonitor `kyverno-emitter-sm`
20
+ # applied WITHOUT metricRelabelings; assert Kyverno mutates the SM at admission;
21
+ # wait for pod Ready; poll Prometheus for http_requests_total; assert
22
+ # world_id label is ABSENT.
23
+ #
24
+ # Key design decision: POSITIVE and IDEMPOTENCY tests use selectors that match
25
+ # no real Service, so they are isolated from each other and from the SCRAPE test.
26
+ # A single dedicated SM (`kyverno-emitter-sm`) owns the emitter endpoint, so
27
+ # prometheus-operator can reliably reconcile exactly one scrape config for it.
28
+ # Root cause of the prior failure (PR #828 CI run 26239574154): two SMs
29
+ # (naive-violator + pre-armoured-violator) competed for the same
30
+ # `app: kyverno-emitter` Endpoints; operator never reconciled either.
31
+ #
32
+ # Pre-conditions:
33
+ # - kube-prometheus-stack installed (cardinality-drop.sh ran).
34
+ # - kubectl context set to a live cluster; helm + jq + curl available.
35
+ #
36
+ # Idempotency: kubectl apply is idempotent; helm upgrade --install is
37
+ # idempotent. Cleanup trap removes synthetic resources on exit. The
38
+ # ClusterPolicy + Kyverno install are LEFT in the cluster (permanent
39
+ # C8 fixtures).
40
+ #
41
+ # Refs: docs/plans/k3s-ingress-observability/phase-c-tasks.md — C8
42
+ # codex review on PR #783 ("policy by convention" finding)
43
+ # PR #828 CI run 26239574154 (competing-SM root cause)
44
+
45
+ set -euo pipefail
46
+
47
+ KYVERNO_VERSION="3.8.1"
48
+ KYVERNO_NAMESPACE="kyverno"
49
+ TEST_NAMESPACE="monitoring"
50
+ PROM_LOCAL_PORT="9092" # 9090, 9091 may be in use by sibling Phase C scripts
51
+ PF_BIND_SECONDS=5
52
+ TARGET_DISCOVERY_TIMEOUT=180
53
+ SCRAPE_POLL_INTERVAL=10
54
+
55
+ log() { printf '[kyverno-mutate] %s\n' "$*" >&2; }
56
+ fail() { printf '[kyverno-mutate] FAIL: %s\n' "$*" >&2; exit 1; }
57
+
58
+ REPO_ROOT="$(git -C "$(dirname "$0")" rev-parse --show-toplevel 2>/dev/null || pwd)"
59
+ # When invoked from a published @pleri/olam-cli install (no monorepo), `olam
60
+ # setup` exports OLAM_BUNDLE_ROOT=<install>/host-cp so the bundled
61
+ # peripheral-services/{helm-values,manifests} directory is reachable.
62
+ # Monorepo callers leave it unset; the script falls back to the source dir
63
+ # under packages/peripheral-services/.
64
+ if [[ -n "${OLAM_BUNDLE_ROOT:-}" ]]; then
65
+ PERIPHERAL_SERVICES_DIR="$OLAM_BUNDLE_ROOT/peripheral-services"
66
+ else
67
+ PERIPHERAL_SERVICES_DIR="$REPO_ROOT/packages/peripheral-services"
68
+ fi
69
+
70
+ # -------------------------------------------------------------------------
71
+ # Cleanup trap — kill port-forwards; remove synthetic resources on exit.
72
+ # Kyverno chart + ClusterPolicy stay (permanent C8 fixtures).
73
+ # -------------------------------------------------------------------------
74
+ PROM_PF_PID=""
75
+ cleanup() {
76
+ [[ -n "$PROM_PF_PID" ]] && kill "$PROM_PF_PID" 2>/dev/null || true
77
+ log "removing synthetic resources (idempotent)"
78
+ # Mutation-test SMs (already deleted inline, but --ignore-not-found makes this safe)
79
+ kubectl delete servicemonitor kyverno-mutate-positive-test -n "$TEST_NAMESPACE" --ignore-not-found=true 2>/dev/null || true
80
+ kubectl delete servicemonitor kyverno-mutate-idempotency-test -n "$TEST_NAMESPACE" --ignore-not-found=true 2>/dev/null || true
81
+ # Scrape-verification resources
82
+ kubectl delete servicemonitor kyverno-emitter-sm -n "$TEST_NAMESPACE" --ignore-not-found=true 2>/dev/null || true
83
+ kubectl delete deployment kyverno-emitter -n "$TEST_NAMESPACE" --ignore-not-found=true 2>/dev/null || true
84
+ kubectl delete service kyverno-emitter-svc -n "$TEST_NAMESPACE" --ignore-not-found=true 2>/dev/null || true
85
+ kubectl delete configmap kyverno-emitter-config -n "$TEST_NAMESPACE" --ignore-not-found=true 2>/dev/null || true
86
+ }
87
+ trap cleanup EXIT
88
+
89
+ # -------------------------------------------------------------------------
90
+ # Pre-flight
91
+ # -------------------------------------------------------------------------
92
+ command -v helm >/dev/null 2>&1 || fail "helm not installed"
93
+ command -v kubectl >/dev/null 2>&1 || fail "kubectl not installed"
94
+ command -v curl >/dev/null 2>&1 || fail "curl not installed"
95
+ command -v jq >/dev/null 2>&1 || fail "jq not installed"
96
+ kubectl cluster-info >/dev/null 2>&1 || fail "kubectl: no reachable cluster; set KUBECONFIG"
97
+
98
+ # kube-prom-stack must already be up — we rely on Prometheus + the
99
+ # ServiceMonitor CRD existing.
100
+ kubectl get crd servicemonitors.monitoring.coreos.com >/dev/null 2>&1 \
101
+ || fail "ServiceMonitor CRD not present — run prom-no-double-grafana.sh first"
102
+ kubectl get deployment -n "$TEST_NAMESPACE" -l "app.kubernetes.io/name=prometheus-operator" \
103
+ >/dev/null 2>&1 \
104
+ || fail "prometheus-operator not found in $TEST_NAMESPACE — run prom-no-double-grafana.sh first"
105
+
106
+ log "pre-flight checks passed"
107
+
108
+ # -------------------------------------------------------------------------
109
+ # Step 1: helm-install Kyverno
110
+ #
111
+ # Repo add is idempotent; helm upgrade --install handles fresh install + upgrade.
112
+ # `--wait` blocks until pods are Ready; admission webhook needs to be live
113
+ # before we apply the ClusterPolicy or our test ServiceMonitors.
114
+ # -------------------------------------------------------------------------
115
+ log "ensuring kyverno helm repo is configured"
116
+ helm repo add kyverno https://kyverno.github.io/kyverno/ >/dev/null 2>&1 || true
117
+ helm repo update kyverno >/dev/null 2>&1 || true
118
+
119
+ log "installing kyverno chart $KYVERNO_VERSION (waits for admission webhook Ready)"
120
+ helm upgrade --install olam-kyverno kyverno/kyverno \
121
+ --version "$KYVERNO_VERSION" \
122
+ --namespace "$KYVERNO_NAMESPACE" \
123
+ --create-namespace \
124
+ -f "$PERIPHERAL_SERVICES_DIR/helm-values/kyverno-values.yaml" \
125
+ --wait --timeout 300s 2>&1 | tail -8
126
+
127
+ # Sanity: kyverno-admission-controller Deployment Ready.
128
+ kubectl get deployment -n "$KYVERNO_NAMESPACE" -l "app.kubernetes.io/component=admission-controller" \
129
+ >/dev/null 2>&1 \
130
+ || fail "kyverno admission controller not found in $KYVERNO_NAMESPACE"
131
+
132
+ log "waiting for kyverno admission webhook to be registered with apiserver"
133
+ # The webhook registration is the LAST thing kyverno does after pod-Ready;
134
+ # poll until our ClusterPolicy can be admitted.
135
+ elapsed=0
136
+ while [ "$elapsed" -lt 120 ]; do
137
+ if kubectl get validatingwebhookconfiguration kyverno-policy-validating-webhook-cfg \
138
+ >/dev/null 2>&1; then
139
+ log "kyverno webhooks registered after ${elapsed}s"
140
+ break
141
+ fi
142
+ sleep 5
143
+ elapsed=$((elapsed + 5))
144
+ done
145
+ if [ "$elapsed" -ge 120 ]; then
146
+ fail "kyverno webhook registration timed out after 120s"
147
+ fi
148
+
149
+ # -------------------------------------------------------------------------
150
+ # Step 2: Apply the ClusterPolicy
151
+ # -------------------------------------------------------------------------
152
+ log "applying ClusterPolicy enforce-cardinality-labeldrop"
153
+ kubectl apply -f "$PERIPHERAL_SERVICES_DIR/manifests/96-kyverno-cardinality-mutate.yaml"
154
+
155
+ # Wait for policy to be Ready (Kyverno controller picks it up and reports
156
+ # readiness in status.ready / .conditions).
157
+ log "waiting up to 60s for ClusterPolicy to be Ready"
158
+ elapsed=0
159
+ while [ "$elapsed" -lt 60 ]; do
160
+ READY=$(kubectl get clusterpolicy enforce-cardinality-labeldrop \
161
+ -o jsonpath='{.status.ready}' 2>/dev/null || echo "")
162
+ if [ "$READY" = "true" ]; then
163
+ log "ClusterPolicy Ready after ${elapsed}s"
164
+ break
165
+ fi
166
+ sleep 3
167
+ elapsed=$((elapsed + 3))
168
+ done
169
+ if [ "$elapsed" -ge 60 ]; then
170
+ log "WARN: ClusterPolicy status.ready not observed within 60s; proceeding (status field can lag)"
171
+ fi
172
+
173
+ # -------------------------------------------------------------------------
174
+ # Step 3: POSITIVE test — mutation only, no backing Service
175
+ #
176
+ # Uses selector `app: kyverno-mutate-positive-test` — a label that no
177
+ # real Service carries, so this SM never competes with anything for
178
+ # Endpoints. Its sole job is to exercise the Kyverno admission webhook.
179
+ #
180
+ # Deleted immediately after assertion so the SM space is clean when
181
+ # the scrape test runs.
182
+ # -------------------------------------------------------------------------
183
+ log "POSITIVE test: applying naive ServiceMonitor (no metricRelabelings, non-Service-backed selector)"
184
+ kubectl apply -f - <<'EOF'
185
+ ---
186
+ apiVersion: monitoring.coreos.com/v1
187
+ kind: ServiceMonitor
188
+ metadata:
189
+ name: kyverno-mutate-positive-test
190
+ namespace: monitoring
191
+ labels:
192
+ release: olam-prom
193
+ spec:
194
+ namespaceSelector:
195
+ matchNames:
196
+ - monitoring
197
+ selector:
198
+ matchLabels:
199
+ app: kyverno-mutate-positive-test
200
+ endpoints:
201
+ - port: metrics
202
+ interval: 15s
203
+ # NOTE: deliberately NO metricRelabelings — Kyverno must inject it.
204
+ EOF
205
+
206
+ # Read back and assert.
207
+ ACTUAL=$(kubectl get servicemonitor kyverno-mutate-positive-test -n "$TEST_NAMESPACE" -o json \
208
+ | jq -r '.spec.endpoints[0].metricRelabelings // [] | tojson')
209
+ log "kyverno-mutate-positive-test metricRelabelings after admission: $ACTUAL"
210
+
211
+ INJECTED_COUNT=$(echo "$ACTUAL" | jq '[ .[] | select(.action == "labeldrop" and (.regex | contains("world_id"))) ] | length')
212
+ if [ "$INJECTED_COUNT" -lt 1 ]; then
213
+ log "actual policy state:"
214
+ kubectl get clusterpolicy enforce-cardinality-labeldrop -o yaml >&2 || true
215
+ fail "POSITIVE test FAILED: Kyverno did not inject labeldrop into naive ServiceMonitor — third-party bypass gap NOT closed"
216
+ fi
217
+ log "PASS: naive ServiceMonitor was mutated at admission (labeldrop injected)"
218
+
219
+ log "deleting kyverno-mutate-positive-test (mutation-only test; SM space clean for scrape test)"
220
+ kubectl delete servicemonitor kyverno-mutate-positive-test -n "$TEST_NAMESPACE" --ignore-not-found=true
221
+
222
+ # -------------------------------------------------------------------------
223
+ # Step 4: IDEMPOTENCY test — mutation only, no backing Service
224
+ #
225
+ # Uses selector `app: kyverno-mutate-idempotency-test` — different from
226
+ # the positive test and from the scrape test label. No real Service.
227
+ # Deleted immediately after assertion.
228
+ # -------------------------------------------------------------------------
229
+ log "IDEMPOTENCY test: applying pre-armoured ServiceMonitor (labeldrop already present)"
230
+ kubectl apply -f - <<'EOF'
231
+ ---
232
+ apiVersion: monitoring.coreos.com/v1
233
+ kind: ServiceMonitor
234
+ metadata:
235
+ name: kyverno-mutate-idempotency-test
236
+ namespace: monitoring
237
+ labels:
238
+ release: olam-prom
239
+ spec:
240
+ namespaceSelector:
241
+ matchNames:
242
+ - monitoring
243
+ selector:
244
+ matchLabels:
245
+ app: kyverno-mutate-idempotency-test
246
+ endpoints:
247
+ - port: metrics
248
+ interval: 15s
249
+ metricRelabelings:
250
+ - action: labeldrop
251
+ regex: 'world_id|trace_id|user_id|request_id|operator_id'
252
+ EOF
253
+
254
+ DUP_COUNT=$(kubectl get servicemonitor kyverno-mutate-idempotency-test -n "$TEST_NAMESPACE" -o json \
255
+ | jq '[ .spec.endpoints[0].metricRelabelings[] | select(.action == "labeldrop" and (.regex | contains("world_id"))) ] | length')
256
+ log "kyverno-mutate-idempotency-test labeldrop count: $DUP_COUNT"
257
+ if [ "$DUP_COUNT" -ne 1 ]; then
258
+ kubectl get servicemonitor kyverno-mutate-idempotency-test -n "$TEST_NAMESPACE" -o yaml >&2
259
+ fail "IDEMPOTENCY test FAILED: expected 1 labeldrop entry, got $DUP_COUNT — policy double-adds"
260
+ fi
261
+ log "PASS: pre-armoured ServiceMonitor has exactly 1 labeldrop (no double-add)"
262
+
263
+ log "deleting kyverno-mutate-idempotency-test (mutation-only test; SM space clean for scrape test)"
264
+ kubectl delete servicemonitor kyverno-mutate-idempotency-test -n "$TEST_NAMESPACE" --ignore-not-found=true
265
+
266
+ # -------------------------------------------------------------------------
267
+ # Step 5: SCRAPE-VERIFICATION test — dedicated SM + Service + Pod
268
+ #
269
+ # One SM (`kyverno-emitter-sm`) selects exactly one Service (`kyverno-emitter-svc`).
270
+ # No other SM in the cluster selects `app: kyverno-emitter`, so prometheus-operator
271
+ # reconciles a single clean scrape config.
272
+ #
273
+ # The SM is applied WITHOUT metricRelabelings so Kyverno's admission webhook
274
+ # fires — this is the load-bearing check that the policy applies during real
275
+ # scrape setup, not just on test fixtures.
276
+ #
277
+ # After admission we verify the spec has the labeldrop, then wait for the pod
278
+ # to be Ready and poll Prometheus for http_requests_total. We assert
279
+ # world_id is absent from all returned series.
280
+ #
281
+ # Mirrors the working pattern from dashboards-have-data.sh (single dedicated
282
+ # SM + co-located Service in `monitoring` namespace).
283
+ # -------------------------------------------------------------------------
284
+ log "SCRAPE-VERIFICATION test: deploying synthetic kyverno-emitter (emits http_requests_total{world_id})"
285
+ kubectl apply -f - <<'EOF'
286
+ ---
287
+ apiVersion: v1
288
+ kind: ConfigMap
289
+ metadata:
290
+ name: kyverno-emitter-config
291
+ namespace: monitoring
292
+ data:
293
+ metrics: |
294
+ # HELP http_requests_total Synthetic counter; world_id is the cardinality bomb
295
+ # TYPE http_requests_total counter
296
+ http_requests_total{world_id="kyverno-world",route="/api",method="GET",status_code="200"} 1
297
+ ---
298
+ apiVersion: apps/v1
299
+ kind: Deployment
300
+ metadata:
301
+ name: kyverno-emitter
302
+ namespace: monitoring
303
+ labels:
304
+ app: kyverno-emitter
305
+ spec:
306
+ replicas: 1
307
+ selector:
308
+ matchLabels:
309
+ app: kyverno-emitter
310
+ template:
311
+ metadata:
312
+ labels:
313
+ app: kyverno-emitter
314
+ spec:
315
+ containers:
316
+ - name: emitter
317
+ image: python:3.11-alpine
318
+ ports:
319
+ - containerPort: 8080
320
+ command: ["python3", "-c"]
321
+ args:
322
+ - |
323
+ import http.server
324
+ with open('/config/metrics') as f: METRICS = f.read().encode()
325
+ class H(http.server.BaseHTTPRequestHandler):
326
+ def do_GET(self):
327
+ if self.path != '/metrics':
328
+ self.send_response(404); self.end_headers(); return
329
+ self.send_response(200)
330
+ self.send_header('Content-Type', 'text/plain; version=0.0.4; charset=utf-8')
331
+ self.end_headers()
332
+ self.wfile.write(METRICS)
333
+ def log_message(self, *a): pass
334
+ http.server.HTTPServer(('0.0.0.0', 8080), H).serve_forever()
335
+ volumeMounts:
336
+ - name: config
337
+ mountPath: /config
338
+ volumes:
339
+ - name: config
340
+ configMap:
341
+ name: kyverno-emitter-config
342
+ ---
343
+ apiVersion: v1
344
+ kind: Service
345
+ metadata:
346
+ name: kyverno-emitter-svc
347
+ namespace: monitoring
348
+ labels:
349
+ app: kyverno-emitter
350
+ spec:
351
+ selector:
352
+ app: kyverno-emitter
353
+ ports:
354
+ - name: metrics
355
+ port: 8080
356
+ targetPort: 8080
357
+ EOF
358
+
359
+ log "waiting for kyverno-emitter deployment Ready"
360
+ kubectl rollout status deployment/kyverno-emitter -n "$TEST_NAMESPACE" --timeout=120s
361
+
362
+ # Apply the dedicated ServiceMonitor WITHOUT metricRelabelings so Kyverno
363
+ # mutates it at admission — this proves the policy fires on real SM objects,
364
+ # not just on the POSITIVE test fixture.
365
+ log "applying kyverno-emitter-sm (no metricRelabelings — Kyverno must inject)"
366
+ kubectl apply -f - <<'EOF'
367
+ ---
368
+ apiVersion: monitoring.coreos.com/v1
369
+ kind: ServiceMonitor
370
+ metadata:
371
+ name: kyverno-emitter-sm
372
+ namespace: monitoring
373
+ labels:
374
+ release: olam-prom
375
+ spec:
376
+ namespaceSelector:
377
+ matchNames:
378
+ - monitoring
379
+ selector:
380
+ matchLabels:
381
+ app: kyverno-emitter
382
+ endpoints:
383
+ - port: metrics
384
+ interval: 15s
385
+ # NOTE: NO metricRelabelings — Kyverno must inject the labeldrop at admission.
386
+ EOF
387
+
388
+ # Verify Kyverno mutated this SM too (belt-and-suspenders: proves the policy
389
+ # applies to the SM that actually drives the scrape, not just the test fixtures).
390
+ SCRAPE_SM_ACTUAL=$(kubectl get servicemonitor kyverno-emitter-sm -n "$TEST_NAMESPACE" -o json \
391
+ | jq -r '.spec.endpoints[0].metricRelabelings // [] | tojson')
392
+ log "kyverno-emitter-sm metricRelabelings after admission: $SCRAPE_SM_ACTUAL"
393
+
394
+ SCRAPE_SM_INJECTED=$(echo "$SCRAPE_SM_ACTUAL" | jq '[ .[] | select(.action == "labeldrop" and (.regex | contains("world_id"))) ] | length')
395
+ if [ "$SCRAPE_SM_INJECTED" -lt 1 ]; then
396
+ log "actual policy state:"
397
+ kubectl get clusterpolicy enforce-cardinality-labeldrop -o yaml >&2 || true
398
+ fail "SCRAPE-VERIFICATION test FAILED: Kyverno did not mutate kyverno-emitter-sm at admission"
399
+ fi
400
+ log "PASS: kyverno-emitter-sm was mutated at admission (labeldrop injected)"
401
+
402
+ # Port-forward Prometheus and poll for metric samples.
403
+ log "port-forwarding svc/prometheus-operated $PROM_LOCAL_PORT:9090"
404
+ kubectl port-forward \
405
+ -n "$TEST_NAMESPACE" \
406
+ "svc/prometheus-operated" \
407
+ "${PROM_LOCAL_PORT}:9090" &
408
+ PROM_PF_PID=$!
409
+ sleep "$PF_BIND_SECONDS"
410
+ kill -0 "$PROM_PF_PID" 2>/dev/null \
411
+ || fail "Prometheus port-forward exited prematurely"
412
+
413
+ PROM_URL="http://localhost:${PROM_LOCAL_PORT}"
414
+
415
+ # Direct-metric polling rather than target-discovery polling.
416
+ #
417
+ # Rationale: kube-prometheus-stack's default relabel sets the `job` label
418
+ # from the k8s Service name. Polling by job-name is brittle — operator
419
+ # reconciliation races, dropped-target filtering, and rare CRD revision
420
+ # lag have all surfaced as "target not in activeTargets" flakes during
421
+ # earlier ingress-integration runs. What we ACTUALLY care about is
422
+ # whether the mutated relabel was applied to a real scrape sample. So
423
+ # poll for the metric directly. With a single SM selecting on
424
+ # `app=kyverno-emitter`, any http_requests_total series returned
425
+ # necessarily came through kyverno-emitter-sm.
426
+ log "polling Prometheus for http_requests_total samples (up to ${TARGET_DISCOVERY_TIMEOUT}s)"
427
+ elapsed=0
428
+ RESULT=""
429
+ while [ "$elapsed" -lt "$TARGET_DISCOVERY_TIMEOUT" ]; do
430
+ RESULT=$(curl -sf "${PROM_URL}/api/v1/query?query=http_requests_total" 2>/dev/null || echo "")
431
+ if [ -n "$RESULT" ]; then
432
+ SERIES_COUNT=$(echo "$RESULT" | jq '.data.result | length' 2>/dev/null || echo "0")
433
+ if [ "$SERIES_COUNT" -ge 1 ]; then
434
+ log "http_requests_total returned $SERIES_COUNT series after ${elapsed}s"
435
+ break
436
+ fi
437
+ fi
438
+ sleep "$SCRAPE_POLL_INTERVAL"
439
+ elapsed=$((elapsed + SCRAPE_POLL_INTERVAL))
440
+ done
441
+
442
+ if [ "$elapsed" -ge "$TARGET_DISCOVERY_TIMEOUT" ]; then
443
+ log "Active targets snapshot for diagnosis:"
444
+ curl -sf "${PROM_URL}/api/v1/targets" | jq '.data.activeTargets[] | {job: .labels.job, service: .labels.service, namespace: .labels.namespace, health: .health, lastError: .lastError}' >&2 || true
445
+ log "ServiceMonitor kyverno-emitter-sm status:"
446
+ kubectl get servicemonitor kyverno-emitter-sm -n "$TEST_NAMESPACE" -o yaml >&2 || true
447
+ log "prometheus-operator log tail (last 50 lines):"
448
+ kubectl logs -n "$TEST_NAMESPACE" -l "app.kubernetes.io/name=prometheus-operator" --tail=50 >&2 || true
449
+ fail "Prometheus did not scrape kyverno-emitter within ${TARGET_DISCOVERY_TIMEOUT}s"
450
+ fi
451
+
452
+ SERIES_COUNT=$(echo "$RESULT" | jq '.data.result | length')
453
+
454
+ LEAKED=$(echo "$RESULT" | jq '[.data.result[] | .metric | has("world_id")] | any')
455
+ if [ "$LEAKED" = "true" ]; then
456
+ echo "$RESULT" | jq '.data.result[] | .metric' >&2
457
+ fail "world_id label leaked into Prometheus — Kyverno-mutated relabel did NOT take effect at scrape time"
458
+ fi
459
+
460
+ log "PASS: kyverno-emitter scraped via kyverno-emitter-sm; world_id absent at scrape time"
461
+ log "PASS: C8 verified — Kyverno mutates third-party-shaped ServiceMonitors at admission and the mutation takes effect at scrape time"
462
+ exit 0