npm - @pleri/olam-cli - Versions diffs - 0.1.161 → 0.1.166 - Mend

@pleri/olam-cli 0.1.161 → 0.1.166

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/hermes-bundle/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "bundledAt": "2026-05-22T07:50:42.070Z",
+  "bundledAt": "2026-05-22T11:04:18.339Z",
   "kgFirstSha": "29a9ccce1b115d049e375c4a90eb5cf7c123e610e2d0590270a4db2cdbc64a28"
 }

package/host-cp/k8s/manifests/50-deployment.yaml CHANGED Viewed

@@ -111,7 +111,7 @@ spec:
         # k3d), started by `olam upgrade` Step 0.7 — not inside this Pod.
       containers:
         - name: olam-host-cp
-          image: ghcr.io/pleri/olam-host-cp@sha256:a71a02ad25f03c1481d8b5a4f3cf50614eb1f9b02376935e4df5c65b9bd4fa8f
+          image: ghcr.io/pleri/olam-host-cp@sha256:7a49b44546d9b69c5a7448613130a43319e90e06a2999d688101657d7d851dda
           imagePullPolicy: IfNotPresent
           securityContext:
             runAsNonRoot: true

package/host-cp/k8s/manifests/auth-service/50-deployment.yaml CHANGED Viewed

@@ -70,7 +70,7 @@ spec:
               mountPath: /data
       containers:
         - name: olam-auth-service
-          image: ghcr.io/pleri/olam-auth@sha256:7ad7f92e5feafff3921f3219886a2aec312d83e00c66eaa568e53aac03b19b16
+          image: ghcr.io/pleri/olam-auth@sha256:d41a940bc9eb7016aeecc1c653e057d63d32d33c1e694d298b5340711d3d0bd8
           imagePullPolicy: IfNotPresent
           securityContext:
             runAsNonRoot: true

package/host-cp/k8s/manifests/kg-service/50-deployment.yaml CHANGED Viewed

@@ -61,7 +61,7 @@ spec:
               mountPath: /data
       containers:
         - name: olam-kg-service
-          image: ghcr.io/pleri/olam-kg-service@sha256:9c01fd288e136116abfd0e34c7230417a30c5036e411838b88a553c44a802f13
+          image: ghcr.io/pleri/olam-kg-service@sha256:b9a96be3cad11f298286d011a88309ac2e495074970bf4d860c032709a5ab72f
           imagePullPolicy: IfNotPresent
           securityContext:
             runAsNonRoot: true

package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml CHANGED Viewed

@@ -68,7 +68,7 @@ spec:
               mountPath: /data
       containers:
         - name: olam-mcp-auth-service
-          image: ghcr.io/pleri/olam-mcp-auth@sha256:ddd15d5ee0b18ed36a8916c4d8d985182f6e32b57fc2625295f5db19a14b37a0
+          image: ghcr.io/pleri/olam-mcp-auth@sha256:0322f65701dfda84a2d0672071914fd7276927772ccccf6c5f55c4c3617cd8fe
           imagePullPolicy: IfNotPresent
           securityContext:
             runAsNonRoot: true

package/host-cp/k8s/manifests/memory-service/50-deployment.yaml CHANGED Viewed

@@ -70,7 +70,7 @@ spec:
           # bootstrap-placeholder comment + run `npm run refresh:manifest-digests`
           # once ghcr.io/pleri/olam-memory-service has a real published digest.
           # bootstrap-placeholder: pre-publish; refresh after first release
-          image: ghcr.io/pleri/olam-memory-service@sha256:8dd1593af37b345a9b9b741803355254f1f719d9bf23e56339d9baed8dea9ac1
+          image: ghcr.io/pleri/olam-memory-service@sha256:70ae9c81efe07d8105c109aea970105709fc4daa50b0d688aa5d299a39a8b24a
           imagePullPolicy: IfNotPresent
           securityContext:
             runAsNonRoot: true

package/host-cp/observability/grafana-port-forward.sh CHANGED Viewed

@@ -115,7 +115,17 @@ log "Secret applied"
 #          packages/peripheral-services/scripts/sync-grafana-dashboards.sh.
 # -------------------------------------------------------------------------
 REPO_ROOT="$(git -C "$(dirname "$0")" rev-parse --show-toplevel 2>/dev/null || pwd)"
-CONFIGMAP_MANIFEST="$REPO_ROOT/packages/peripheral-services/manifests/80-grafana-dashboard-configmap.yaml"
+# When invoked from a published @pleri/olam-cli install (no monorepo), `olam
+# setup` exports OLAM_BUNDLE_ROOT=<install>/host-cp so the bundled
+# peripheral-services/{helm-values,manifests} directory is reachable.
+# Monorepo callers leave it unset; the script falls back to the source dir
+# under packages/peripheral-services/.
+if [[ -n "${OLAM_BUNDLE_ROOT:-}" ]]; then
+  PERIPHERAL_SERVICES_DIR="$OLAM_BUNDLE_ROOT/peripheral-services"
+else
+  PERIPHERAL_SERVICES_DIR="$REPO_ROOT/packages/peripheral-services"
+fi
+CONFIGMAP_MANIFEST="$PERIPHERAL_SERVICES_DIR/manifests/80-grafana-dashboard-configmap.yaml"
 if [[ -f "$CONFIGMAP_MANIFEST" ]]; then
   log "applying olam-dashboards ConfigMap from $CONFIGMAP_MANIFEST"
@@ -133,7 +143,7 @@ helm upgrade --install "$GRAFANA_RELEASE" grafana/grafana \
   --version "$GRAFANA_CHART_VERSION" \
   --namespace "$NAMESPACE" \
   --create-namespace \
-  -f "$REPO_ROOT/packages/peripheral-services/helm-values/grafana-values.yaml" \
+  -f "$PERIPHERAL_SERVICES_DIR/helm-values/grafana-values.yaml" \
   --wait \
   --timeout 300s

package/host-cp/observability/kyverno-cardinality-mutate.sh CHANGED Viewed

@@ -56,6 +56,16 @@ log()  { printf '[kyverno-mutate] %s\n' "$*" >&2; }
 fail() { printf '[kyverno-mutate] FAIL: %s\n' "$*" >&2; exit 1; }
 REPO_ROOT="$(git -C "$(dirname "$0")" rev-parse --show-toplevel 2>/dev/null || pwd)"
+# When invoked from a published @pleri/olam-cli install (no monorepo), `olam
+# setup` exports OLAM_BUNDLE_ROOT=<install>/host-cp so the bundled
+# peripheral-services/{helm-values,manifests} directory is reachable.
+# Monorepo callers leave it unset; the script falls back to the source dir
+# under packages/peripheral-services/.
+if [[ -n "${OLAM_BUNDLE_ROOT:-}" ]]; then
+  PERIPHERAL_SERVICES_DIR="$OLAM_BUNDLE_ROOT/peripheral-services"
+else
+  PERIPHERAL_SERVICES_DIR="$REPO_ROOT/packages/peripheral-services"
+fi
 # -------------------------------------------------------------------------
 # Cleanup trap — kill port-forwards; remove synthetic resources on exit.
@@ -111,7 +121,7 @@ helm upgrade --install olam-kyverno kyverno/kyverno \
   --version "$KYVERNO_VERSION" \
   --namespace "$KYVERNO_NAMESPACE" \
   --create-namespace \
-  -f "$REPO_ROOT/packages/peripheral-services/helm-values/kyverno-values.yaml" \
+  -f "$PERIPHERAL_SERVICES_DIR/helm-values/kyverno-values.yaml" \
   --wait --timeout 300s 2>&1 | tail -8
 # Sanity: kyverno-admission-controller Deployment Ready.
@@ -140,7 +150,7 @@ fi
 # Step 2: Apply the ClusterPolicy
 # -------------------------------------------------------------------------
 log "applying ClusterPolicy enforce-cardinality-labeldrop"
-kubectl apply -f "$REPO_ROOT/packages/peripheral-services/manifests/96-kyverno-cardinality-mutate.yaml"
+kubectl apply -f "$PERIPHERAL_SERVICES_DIR/manifests/96-kyverno-cardinality-mutate.yaml"
 # Wait for policy to be Ready (Kyverno controller picks it up and reports
 # readiness in status.ready / .conditions).

package/host-cp/observability/loki-ingest.sh CHANGED Viewed

@@ -66,6 +66,16 @@ log "pre-flight checks passed"
 # Resolve repo root so helm -f paths work regardless of invocation cwd
 # -------------------------------------------------------------------------
 REPO_ROOT="$(git -C "$(dirname "$0")" rev-parse --show-toplevel 2>/dev/null || pwd)"
+# When invoked from a published @pleri/olam-cli install (no monorepo), `olam
+# setup` exports OLAM_BUNDLE_ROOT=<install>/host-cp so the bundled
+# peripheral-services/{helm-values,manifests} directory is reachable.
+# Monorepo callers leave it unset; the script resolves the source dir under
+# packages/peripheral-services/.
+if [[ -n "${OLAM_BUNDLE_ROOT:-}" ]]; then
+  PERIPHERAL_SERVICES_DIR="$OLAM_BUNDLE_ROOT/peripheral-services"
+else
+  PERIPHERAL_SERVICES_DIR="$REPO_ROOT/packages/peripheral-services"
+fi
 # -------------------------------------------------------------------------
 # Ensure grafana Helm repo is present (idempotent — safe to re-run)
@@ -81,7 +91,7 @@ helm upgrade --install "$LOKI_RELEASE" grafana/loki \
   --version 6.7.4 \
   --namespace "$NAMESPACE" \
   --create-namespace \
-  -f "$REPO_ROOT/packages/peripheral-services/helm-values/loki-values.yaml" \
+  -f "$PERIPHERAL_SERVICES_DIR/helm-values/loki-values.yaml" \
   --wait \
   --timeout 300s
@@ -94,7 +104,7 @@ log "installing grafana/promtail ($PROMTAIL_RELEASE) in namespace $NAMESPACE"
 helm upgrade --install "$PROMTAIL_RELEASE" grafana/promtail \
   --version 6.16.6 \
   --namespace "$NAMESPACE" \
-  -f "$REPO_ROOT/packages/peripheral-services/helm-values/promtail-values.yaml" \
+  -f "$PERIPHERAL_SERVICES_DIR/helm-values/promtail-values.yaml" \
   --wait \
   --timeout 120s

package/host-cp/observability/prom-no-double-grafana.sh CHANGED Viewed

@@ -43,6 +43,16 @@ fail() { printf '[prom-no-double-grafana] FAIL: %s\n' "$*" >&2; exit 1; }
 # Resolve repo root so helm -f paths work regardless of invocation cwd
 # -------------------------------------------------------------------------
 REPO_ROOT="$(git -C "$(dirname "$0")" rev-parse --show-toplevel 2>/dev/null || pwd)"
+# When invoked from a published @pleri/olam-cli install (no monorepo), `olam
+# setup` exports OLAM_BUNDLE_ROOT=<install>/host-cp so the bundled
+# peripheral-services/{helm-values,manifests} directory is reachable.
+# Monorepo callers leave it unset; the script falls back to the source dir
+# under packages/peripheral-services/.
+if [[ -n "${OLAM_BUNDLE_ROOT:-}" ]]; then
+  PERIPHERAL_SERVICES_DIR="$OLAM_BUNDLE_ROOT/peripheral-services"
+else
+  PERIPHERAL_SERVICES_DIR="$REPO_ROOT/packages/peripheral-services"
+fi
 # -------------------------------------------------------------------------
 # Cleanup trap — kill port-forwards on exit; leave Helm releases in place
@@ -84,7 +94,7 @@ helm upgrade --install "$PROM_RELEASE" prometheus-community/kube-prometheus-stac
   --version "$PROM_CHART_VERSION" \
   --namespace "$NAMESPACE" \
   --create-namespace \
-  -f "$REPO_ROOT/packages/peripheral-services/helm-values/kube-prom-stack-values.yaml" \
+  -f "$PERIPHERAL_SERVICES_DIR/helm-values/kube-prom-stack-values.yaml" \
   --wait \
   --timeout 600s
@@ -131,7 +141,7 @@ log "upgrading Phase B charts with runtime --set serviceMonitor.enabled=true (pi
 helm upgrade olam-loki grafana/loki \
   --version "$LOKI_CHART_VERSION" \
   --namespace "$NAMESPACE" \
-  -f "$REPO_ROOT/packages/peripheral-services/helm-values/loki-values.yaml" \
+  -f "$PERIPHERAL_SERVICES_DIR/helm-values/loki-values.yaml" \
   --wait \
   --timeout 300s \
   --reuse-values \
@@ -142,7 +152,7 @@ log "olam-loki upgraded (ServiceMonitor enabled)"
 helm upgrade olam-promtail grafana/promtail \
   --version "$PROMTAIL_CHART_VERSION" \
   --namespace "$NAMESPACE" \
-  -f "$REPO_ROOT/packages/peripheral-services/helm-values/promtail-values.yaml" \
+  -f "$PERIPHERAL_SERVICES_DIR/helm-values/promtail-values.yaml" \
   --wait \
   --timeout 300s \
   --reuse-values \
@@ -153,7 +163,7 @@ log "olam-promtail upgraded (ServiceMonitor enabled)"
 helm upgrade "$GRAFANA_RELEASE" grafana/grafana \
   --version "$GRAFANA_CHART_VERSION" \
   --namespace "$NAMESPACE" \
-  -f "$REPO_ROOT/packages/peripheral-services/helm-values/grafana-values.yaml" \
+  -f "$PERIPHERAL_SERVICES_DIR/helm-values/grafana-values.yaml" \
   --wait \
   --timeout 300s \
   --reuse-values \
@@ -272,7 +282,7 @@ log "PASS: $ACTIVE active scrape target(s) found in Prometheus"
 PROM_URL="http://localhost:${PROM_LOCAL_PORT}"
 log "applying 95-prom-recording-rules.yaml (skipped by apply-manifests due to 9[0-9]-prom-* filter)"
-kubectl apply -f "$REPO_ROOT/packages/peripheral-services/manifests/95-prom-recording-rules.yaml"
+kubectl apply -f "$PERIPHERAL_SERVICES_DIR/manifests/95-prom-recording-rules.yaml"
 # Prometheus operator reconcile + config reload can take ~60-90s (C2 lesson).
 # Poll /api/v1/rules until our group appears (up to 180s).

package/host-cp/peripheral-services/helm-values/grafana-values.yaml ADDED Viewed

@@ -0,0 +1,159 @@
+# Grafana Helm values — k3s-ingress-observability Phase B Task B2
+#
+# STANDALONE grafana/grafana chart per OQ-p3-4 + Decision 16.
+#   - This is NOT the Grafana bundled with kube-prometheus-stack.
+#   - Phase C kube-prometheus-stack MUST set `grafana.enabled: false`
+#     explicitly to prevent a second Grafana Deployment from landing.
+#   - Port-forward only — NEVER expose via Traefik IngressRoute.
+#     See T7 in DESIGN.md: secret exfil mitigated by no ingress surface.
+#
+# Chart: grafana/grafana; pinned to 8.5.2 (latest stable as of 2026-05-20).
+# Upgrade discipline: chart version is embedded in the e2e script comment.
+# -------------------------------------------------------------------------
+# Admin credentials — loaded from a pre-existing Secret, NOT from chart
+# values. Secret is created by scripts/e2e/grafana-port-forward.sh before
+# helm install, or by the operator following the procedure in
+# packages/peripheral-services/manifests/README.md (§ "Grafana admin secret").
+# The placeholder manifest (70-grafana-secret.yaml) was removed 2026-05-21
+# (dogfood finding #4) because `kubectl apply` would overwrite the operator's
+# pre-created Secret with the placeholder value.
+# -------------------------------------------------------------------------
+admin:
+  existingSecret: olam-grafana-admin
+  userKey: admin-user
+  passwordKey: admin-password
+# -------------------------------------------------------------------------
+# Service: ClusterIP only.
+# Decision 16: port-forward only; never ingress-routed.
+# Access: `kubectl port-forward -n monitoring svc/olam-grafana 3000:80`
+# -------------------------------------------------------------------------
+service:
+  type: ClusterIP
+  port: 80
+# -------------------------------------------------------------------------
+# Ingress: disabled.
+# Decision 16 + OQ-p3-4: Grafana is never exposed via Traefik IngressRoute.
+# Port-forward is the sole operator access path. Enabling ingress here would
+# silently violate the access-control intent even if no IngressRoute manifest
+# is committed.
+# -------------------------------------------------------------------------
+ingress:
+  enabled: false   # Decision 16: port-forward only; never ingress-routed
+# -------------------------------------------------------------------------
+# Datasources: Loki (default) + Prometheus (added in Phase C Task C1).
+#
+# Dual-chart pattern:
+#   - kube-prometheus-stack (C1) provides Prometheus. Its bundled Grafana
+#     sub-chart is disabled (grafana.enabled: false in kube-prom-stack-values.yaml).
+#   - This standalone grafana/grafana chart (Phase B) is the only Grafana.
+#   - The Prometheus datasource URL points at `prometheus-operated`, which is
+#     the in-cluster Service that kube-prometheus-stack's Prometheus Operator
+#     creates for the managed Prometheus StatefulSet.
+#   - timeInterval: 15s matches the scrape interval in kube-prom-stack-values.yaml
+#     so Grafana's step calculation aligns with actual data granularity.
+#   - exemplarTraceIdDestinations.datasourceUid: tempo is harmless until Phase D
+#     adds Tempo; Grafana silently ignores unknown datasource UIDs.
+#
+# editable: false prevents accidental operator drift across sessions.
+# -------------------------------------------------------------------------
+datasources:
+  datasources.yaml:
+    apiVersion: 1
+    datasources:
+      - name: Loki
+        type: loki
+        access: proxy
+        url: http://olam-loki.monitoring.svc.cluster.local:3100
+        isDefault: true
+        editable: false
+      - name: Prometheus
+        type: prometheus
+        access: proxy
+        url: http://prometheus-operated.monitoring.svc.cluster.local:9090
+        isDefault: false
+        editable: false
+        jsonData:
+          timeInterval: 15s         # matches scrape interval in kube-prom-stack-values.yaml
+          exemplarTraceIdDestinations:
+            - name: trace_id
+              datasourceUid: tempo  # Phase D may add Tempo; harmless until then
+# -------------------------------------------------------------------------
+# Dashboard provisioner: file-based ConfigMap mount.
+# B3 lands the olam-dashboards ConfigMap and the actual JSON files.
+# B2 wires the loader so B3's ConfigMap is picked up automatically.
+# -------------------------------------------------------------------------
+dashboardProviders:
+  dashboardproviders.yaml:
+    apiVersion: 1
+    providers:
+      - name: olam-default
+        orgId: 1
+        folder: 'Olam'
+        type: file
+        disableDeletion: true
+        updateIntervalSeconds: 30
+        allowUiUpdates: false
+        options:
+          path: /var/lib/grafana/dashboards/olam-default
+# Wire the volume mount — B3 creates this ConfigMap with the actual JSON.
+# Grafana will warn "ConfigMap olam-dashboards not found" until B3 lands;
+# this is benign and does not block Grafana startup.
+dashboardsConfigMaps:
+  olam-default: olam-dashboards   # B3 creates this ConfigMap
+# -------------------------------------------------------------------------
+# Resources: tuned for single-operator k3s (<256Mi idle typical).
+# P2 acceptance criterion: <500MB idle / <1GB typical across full LGTM stack.
+# -------------------------------------------------------------------------
+resources:
+  requests:
+    cpu: 50m
+    memory: 128Mi
+  limits:
+    cpu: 200m
+    memory: 256Mi   # P2: keeps Grafana within its share of the LGTM RAM budget
+# -------------------------------------------------------------------------
+# Persistence: disabled for Phase B.
+# Grafana state (dashboards, users) lives in ConfigMaps / values files.
+# Phase C may enable a PV if fine-grained alert state or annotations
+# accumulate. For now, stateless Grafana is simpler and matches S2.
+# -------------------------------------------------------------------------
+persistence:
+  enabled: false   # S2: ConfigMap-mounted dashboards; no PV needed in Phase B
+# -------------------------------------------------------------------------
+# ServiceMonitor: Phase C Prometheus scrapes Grafana's /metrics endpoint.
+# Disabled in Phase B: the ServiceMonitor CRD (monitoring.coreos.com/v1) is
+# shipped by kube-prometheus-stack in Phase C. The earlier "enable now to
+# avoid a Phase C helm upgrade" rationale was wrong — Phase C will need a
+# helm upgrade anyway to wire Prometheus scrape targets. Flipping this on
+# pre-CRD breaks the install on chart versions that hard-validate.
+# -------------------------------------------------------------------------
+serviceMonitor:
+  # Disabled in the source-of-truth values file so a standalone Phase B install
+  # (without kube-prometheus-stack) does not hard-fail when the CRD is absent.
+  # The C1 e2e script flips this on at RUNTIME via
+  #   helm upgrade ... --reuse-values --set serviceMonitor.enabled=true
+  # AFTER kube-prom-stack has installed the ServiceMonitor CRD.
+  enabled: false
+# -------------------------------------------------------------------------
+# Grafana.ini overrides: anonymous access disabled (default); only
+# setting the server root_url so port-forward URLs render correctly
+# in email / share links (cosmetic; not a security seam).
+# -------------------------------------------------------------------------
+grafana.ini:
+  server:
+    root_url: "%(protocol)s://%(domain)s:%(http_port)s/"
+  analytics:
+    reporting_enabled: false   # no telemetry to grafana.com
+    check_for_updates: false
+  security:
+    allow_embedding: false

package/host-cp/peripheral-services/helm-values/kube-prom-stack-values.yaml ADDED Viewed

@@ -0,0 +1,229 @@
+# kube-prometheus-stack Helm values — k3s-ingress-observability Phase C Task C1
+#
+# Chart: prometheus-community/kube-prometheus-stack; pinned to 85.2.0
+# (latest stable as of 2026-05-21).
+# Upgrade discipline: pin in this file + e2e script comment must stay in sync.
+#
+# CRITICAL: grafana.enabled MUST stay false.
+#   Phase B ships a standalone grafana/grafana chart (olam-grafana release).
+#   kube-prometheus-stack's bundled Grafana sub-chart is disabled to prevent
+#   a second Grafana Deployment from landing in the cluster.
+#   Decision 16 + OQ-p3-4: Phase B's standalone Grafana is canonical.
+#   Enabling the sub-chart here would violate that decision and create two
+#   Grafana instances — caught by prom-no-double-grafana.sh's single-Grafana
+#   assertion.
+#
+# Resource budget summary (Phase C contribution to P2 target <500MB idle / <1GB typical):
+#   prometheus-operator:  128Mi req / 512Mi limit
+#   prometheus:           512Mi req / 2Gi limit
+#   node-exporter:        64Mi req  / 128Mi limit
+#   kube-state-metrics:   128Mi req / 256Mi limit
+#   Total C1 addition:    ~832Mi req / ~3Gi limit (spread across nodes)
+#
+# Retention policy (Decision 14): scrape 15s / retention 15d / size cap 10GiB.
+# The size cap (T10 TSDB corruption mitigation) is the hard guard; retention 15d
+# is advisory — the size cap enforces first.
+#
+# Alertmanager: disabled for C1. C2 lands the first alert rule (cardinality 80k).
+# When C2 ships, flip alertmanager.enabled: true and configure receivers.
+# Comment: "C1 ships without alertmanager; C2 enables when first alert rule lands."
+# -------------------------------------------------------------------------
+# CARDINALITY ENFORCEMENT — Task C2 (T1 cardinality bomb / P4 <100k active series)
+#
+# Goal: strip high-cardinality labels (world_id, trace_id, user_id,
+# request_id, operator_id) from every scraped series BEFORE TSDB ingest.
+#
+# Architecture finding (helm template verified, 2026-05-21):
+#   The prometheus-operator Prometheus CR has NO global metricRelabelConfigs
+#   field. The Prometheus CR spec exposes only per-ServiceMonitor endpoint
+#   metricRelabelings. There is no chart-level "apply to all scrapes" slot.
+#
+# Enforcement strategy (two-layer):
+#   Layer 1 — chart-managed ServiceMonitors: set metricRelabelings on every
+#     ServiceMonitor the chart controls (coreDns, prometheusOperator,
+#     prometheus self-scrape, node-exporter). Belt-and-suspenders; these
+#     services don't emit world_id etc. in practice, but the rule is free.
+#     Note: kube-state-metrics sub-chart has no metricRelabelings slot in
+#     its prometheus.monitor section at chart version 85.2.0 — omitted.
+#   Layer 2 — user-deployed ServiceMonitors: the cardinality-drop.sh e2e
+#     script's synthetic violator ServiceMonitor carries the same labeldrop
+#     rule (release: olam-prom label + metricRelabelings). New services
+#     MUST include the same block — enforced by docs + code review.
+#
+# Why labeldrop is the right action:
+#   action: labeldrop removes the matched labels from ALL series that carry
+#   them, regardless of metric name. This is the same semantic as Promtail's
+#   pipeline drop stages (promtail-values.yaml) — both layers stay in sync.
+#   world_id surfaces in dashboards via EXEMPLARS (Decision 9), not labels.
+#
+# Regex covers all five taxonomy labels from observability-label-taxonomy:
+#   world_id, trace_id, user_id, request_id, operator_id
+# -------------------------------------------------------------------------
+_cardinalityLabeldrop: &cardinality-labeldrop
+  - action: labeldrop
+    regex: 'world_id|trace_id|user_id|request_id|operator_id'
+# -------------------------------------------------------------------------
+# HARD REQUIREMENT: grafana sub-chart is off.
+# See top-of-file comment for rationale.
+# -------------------------------------------------------------------------
+grafana:
+  enabled: false   # HARD: Decision 16 + OQ-p3-4 — standalone Grafana (olam-grafana) is canonical
+# -------------------------------------------------------------------------
+# Alertmanager: off until C2 lands the first alert rule.
+# C2 comment: "C1 ships without alertmanager; C2 enables when first alert rule lands."
+# -------------------------------------------------------------------------
+alertmanager:
+  enabled: true   # C2: first alert rule (OlamActiveSeriesHigh) lands; alertmanager enabled
+  serviceMonitor:
+    metricRelabelings: *cardinality-labeldrop
+# -------------------------------------------------------------------------
+# Default kube-controller-manager / scheduler / proxy / etcd monitors.
+# These ServiceMonitors don't work on k3d/k3s because the endpoints are not
+# exposed via the usual ports. Disabling avoids noisy "endpoint not found"
+# warnings and scrape failures on every Prometheus eval cycle.
+# -------------------------------------------------------------------------
+kubeControllerManager:
+  enabled: false
+kubeScheduler:
+  enabled: false
+kubeProxy:
+  enabled: false
+kubeEtcd:
+  enabled: false
+# kube-apiserver and kubelet DO work on k3d but generate high-cardinality
+# label combinations. Disable for now; re-evaluate when per-service /metrics
+# (C3) and cardinality enforcement (C2) are in place.
+kubeApiServer:
+  enabled: false
+kubelet:
+  enabled: false
+# -------------------------------------------------------------------------
+# Default alerting rules: off.
+# The bundled default rules generate Alertmanager receivers and PrometheusRule
+# objects for kubelet, etcd, apiserver, etc. — most don't fire on k3d anyway
+# and add noise before C2's focused cardinality rule lands.
+# C2 will add targeted PrometheusRule objects separately.
+# -------------------------------------------------------------------------
+defaultRules:
+  create: false
+# -------------------------------------------------------------------------
+# coreDns — ServiceMonitor with labeldrop (Layer 1 cardinality enforcement)
+# -------------------------------------------------------------------------
+coreDns:
+  serviceMonitor:
+    metricRelabelings: *cardinality-labeldrop
+# -------------------------------------------------------------------------
+# CRDs: install via chart (default: true, explicit for clarity).
+# These CRDs (ServiceMonitor, PodMonitor, PrometheusRule, etc.) are required
+# before Phase B's loki/promtail/grafana charts can have serviceMonitor.enabled:true.
+# Phase C's e2e script waits for servicemonitors.monitoring.coreos.com to be
+# Established before helm-upgrading the Phase B charts.
+# -------------------------------------------------------------------------
+crds:
+  enabled: true
+# -------------------------------------------------------------------------
+# Prometheus Operator
+# -------------------------------------------------------------------------
+prometheusOperator:
+  enabled: true
+  serviceMonitor:
+    metricRelabelings: *cardinality-labeldrop
+  resources:
+    requests:
+      cpu: 100m
+      memory: 128Mi
+    limits:
+      cpu: 500m
+      memory: 512Mi
+# -------------------------------------------------------------------------
+# Prometheus core — Decision 14: scrape 15s / retention 15d / 10GiB cap
+# -------------------------------------------------------------------------
+prometheus:
+  serviceMonitor:
+    metricRelabelings: *cardinality-labeldrop
+  prometheusSpec:
+    scrapeInterval: 15s              # Decision 14
+    evaluationInterval: 15s
+    retention: 15d                   # Decision 14 — advisory; size cap enforces first
+    retentionSize: 10GiB             # Decision 14 — T10 TSDB corruption prevention
+    walCompression: true
+    enableAdminAPI: false            # security: admin API allows snapshot deletion + series deletion
+    enableRemoteWriteReceiver: false # not a remote-write target; no inbound writes
+    logLevel: warn                   # info is noisy at 15s scrape cycle
+    resources:
+      requests:
+        cpu: 200m
+        memory: 512Mi
+      limits:
+        cpu: 1000m
+        memory: 2Gi
+    # PersistentVolume for TSDB. 12Gi = 10GiB retention cap + ~20% headroom.
+    # local-path provisioner is used on k3d; cloud providers use their default SC.
+    storageSpec:
+      volumeClaimTemplate:
+        spec:
+          accessModes:
+            - ReadWriteOnce
+          resources:
+            requests:
+              storage: 12Gi   # 10GiB retention + 20% headroom for in-flight segments
+# -------------------------------------------------------------------------
+# Node exporter — keep enabled (host-level metrics: CPU, memory, disk, net).
+# -------------------------------------------------------------------------
+nodeExporter:
+  enabled: true
+prometheus-node-exporter:
+  prometheus:
+    monitor:
+      metricRelabelings: *cardinality-labeldrop
+  resources:
+    requests:
+      cpu: 30m
+      memory: 64Mi
+    limits:
+      cpu: 100m
+      memory: 128Mi
+# -------------------------------------------------------------------------
+# kube-state-metrics — keep enabled (k8s-level metrics: pod phases, deployments).
+# -------------------------------------------------------------------------
+kubeStateMetrics:
+  enabled: true
+kube-state-metrics:
+  resources:
+    requests:
+      cpu: 50m
+      memory: 128Mi
+    limits:
+      cpu: 200m
+      memory: 256Mi
+# -------------------------------------------------------------------------
+# Datasource auto-discovery note:
+#   kube-prometheus-stack's grafana.sidecar.datasources is N/A (grafana sub-chart
+#   is off). Phase B's standalone Grafana (grafana-values.yaml) has been updated
+#   in this same C1 PR to include a Prometheus datasource entry pointing at:
+#     http://prometheus-operated.monitoring.svc.cluster.local:9090
+#   This is the in-cluster Service that kube-prometheus-stack creates for the
+#   Prometheus StatefulSet (created by the Prometheus Operator from the
+#   Prometheus CR above).
+# -------------------------------------------------------------------------