ecip-observability-stack 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/CLAUDE.md +48 -0
  2. package/README.md +75 -0
  3. package/alerts/analysis-backlog.yaml +39 -0
  4. package/alerts/cache-degradation.yaml +44 -0
  5. package/alerts/dlq-depth.yaml +56 -0
  6. package/alerts/lsp-daemon.yaml +43 -0
  7. package/alerts/mcp-latency.yaml +46 -0
  8. package/alerts/security-anomaly.yaml +59 -0
  9. package/alerts/sla-latency.yaml +61 -0
  10. package/chaos/kafka-broker-restart.sh +168 -0
  11. package/chaos/kill-lsp-daemon.sh +148 -0
  12. package/chaos/redis-node-failure.sh +318 -0
  13. package/ci/check-observability-contract.js +285 -0
  14. package/ci/eslint-plugin-ecip/index.js +209 -0
  15. package/ci/eslint-plugin-ecip/package.json +12 -0
  16. package/ci/github-actions-observability-gate.yaml +180 -0
  17. package/ci/ruff-shared.toml +41 -0
  18. package/collector/otel-collector-config.yaml +226 -0
  19. package/collector/otel-collector-daemonset.yaml +168 -0
  20. package/collector/sampling-config.yaml +83 -0
  21. package/dashboards/_provisioning/grafana-dashboards.yaml +16 -0
  22. package/dashboards/analysis-throughput.json +166 -0
  23. package/dashboards/cache-performance.json +129 -0
  24. package/dashboards/cross-repo-fanout.json +93 -0
  25. package/dashboards/event-bus-dlq.json +129 -0
  26. package/dashboards/lsp-daemon-health.json +104 -0
  27. package/dashboards/mcp-call-graph.json +114 -0
  28. package/dashboards/query-latency.json +160 -0
  29. package/dashboards/security-events.json +131 -0
  30. package/docs/M08-Observability-Design.md +639 -0
  31. package/docs/PROGRESS.md +375 -0
  32. package/docs/module-documentation.md +64 -0
  33. package/elasticsearch/ilm-policy.json +57 -0
  34. package/elasticsearch/index-template.json +62 -0
  35. package/elasticsearch/kibana-space.yaml +53 -0
  36. package/helm/Chart.yaml +30 -0
  37. package/helm/templates/configmaps.yaml +25 -0
  38. package/helm/templates/elasticsearch.yaml +68 -0
  39. package/helm/templates/grafana-secret.yaml +22 -0
  40. package/helm/templates/grafana.yaml +19 -0
  41. package/helm/templates/loki.yaml +33 -0
  42. package/helm/templates/otel-collector.yaml +119 -0
  43. package/helm/templates/prometheus.yaml +43 -0
  44. package/helm/templates/tempo.yaml +16 -0
  45. package/helm/values.prod.yaml +159 -0
  46. package/helm/values.yaml +146 -0
  47. package/logging-lib/nodejs/package.json +57 -0
  48. package/logging-lib/nodejs/pnpm-lock.yaml +4576 -0
  49. package/logging-lib/python/pyproject.toml +45 -0
  50. package/logging-lib/python/src/__init__.py +19 -0
  51. package/logging-lib/python/src/logger.py +131 -0
  52. package/logging-lib/python/src/security_events.py +150 -0
  53. package/logging-lib/python/src/tracer.py +185 -0
  54. package/logging-lib/python/tests/test_logger.py +113 -0
  55. package/package.json +21 -0
  56. package/prometheus/prometheus-values.yaml +170 -0
  57. package/prometheus/recording-rules.yaml +97 -0
  58. package/prometheus/scrape-configs.yaml +122 -0
  59. package/runbooks/SDK-INTEGRATION.md +239 -0
  60. package/runbooks/alert-response/ANALYSIS_BACKLOG.md +128 -0
  61. package/runbooks/alert-response/DLQ_DEPTH_EXCEEDED.md +150 -0
  62. package/runbooks/alert-response/HIGH_QUERY_LATENCY.md +134 -0
  63. package/runbooks/alert-response/LSP_DAEMON_RESTART.md +118 -0
  64. package/runbooks/alert-response/SECURITY_ANOMALY.md +160 -0
  65. package/runbooks/dashboard-guide.md +169 -0
  66. package/scripts/lint-dashboards.js +184 -0
  67. package/tempo/tempo-datasource.yaml +46 -0
  68. package/tempo/tempo-values.yaml +94 -0
  69. package/tests/alert-threshold-config.test.ts +283 -0
  70. package/tests/log-schema-validation.test.ts +246 -0
  71. package/tests/metric-label-validation.test.ts +292 -0
  72. package/tests/otel-pipeline-integration.test.ts +420 -0
  73. package/tests/security-events.test.ts +417 -0
  74. package/tsconfig.json +17 -0
  75. package/vitest.config.ts +21 -0
  76. package/vitest.integration.config.ts +9 -0
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ecip-observability"
7
+ version = "1.0.0"
8
+ description = "ECIP shared observability library — structured logging, tracing, and security event helpers for Python services"
9
+ authors = [
10
+ { name = "ECIP Platform Team" }
11
+ ]
12
+ requires-python = ">=3.11"
13
+ dependencies = [
14
+ "structlog>=23.2.0",
15
+ "opentelemetry-api>=1.22.0",
16
+ "opentelemetry-sdk>=1.22.0",
17
+ "opentelemetry-exporter-otlp-proto-grpc>=1.22.0",
18
+ "opentelemetry-exporter-otlp-proto-http>=1.22.0",
19
+ "opentelemetry-instrumentation>=0.43b0",
20
+ "opentelemetry-instrumentation-grpc>=0.43b0",
21
+ "opentelemetry-instrumentation-requests>=0.43b0",
22
+ "opentelemetry-instrumentation-aiohttp-client>=0.43b0",
23
+ ]
24
+
25
+ [project.optional-dependencies]
26
+ dev = [
27
+ "pytest>=7.4.0",
28
+ "pytest-asyncio>=0.23.0",
29
+ "ruff>=0.2.0",
30
+ "mypy>=1.8.0",
31
+ ]
32
+
33
+ [tool.setuptools.packages.find]
34
+ where = ["src"]
35
+
36
+ [tool.ruff]
37
+ target-version = "py311"
38
+ line-length = 120
39
+
40
+ [tool.ruff.lint]
41
+ select = ["E", "F", "W", "I", "N", "UP", "B", "A", "C4", "SIM"]
42
+
43
+ [tool.mypy]
44
+ python_version = "3.11"
45
+ strict = true
@@ -0,0 +1,19 @@
1
+ """
2
+ ecip_observability — Package init
3
+
4
+ Re-exports all public APIs.
5
+ """
6
+
7
+ from .logger import get_logger, MissingObservabilityContext
8
+ from .tracer import init_tracer, get_tracer, traced
9
+ from .security_events import emit_auth_failure, emit_rbac_denial
10
+
11
+ __all__ = [
12
+ "get_logger",
13
+ "MissingObservabilityContext",
14
+ "init_tracer",
15
+ "get_tracer",
16
+ "traced",
17
+ "emit_auth_failure",
18
+ "emit_rbac_denial",
19
+ ]
@@ -0,0 +1,131 @@
1
+ """
2
+ ecip_observability — Structured Logger (Python)
3
+
4
+ structlog-based logger with mandatory ECIP context fields.
5
+ Missing fields raise MissingObservabilityContext at logger creation time.
6
+
7
+ Usage:
8
+ from ecip_observability import get_logger
9
+ log = get_logger(repo="acme/auth", branch="main", user_id="u_abc", module="M02")
10
+ log.info("Analysis complete", duration_ms=14200, files_indexed=47)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import os
17
+ import sys
18
+ from datetime import datetime, timezone
19
+ from typing import Any
20
+
21
+ import structlog
22
+ from opentelemetry import trace
23
+
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Exceptions
27
+ # ---------------------------------------------------------------------------
28
+
29
+ class MissingObservabilityContext(Exception):
30
+ """Raised when required ECIP observability fields are missing."""
31
+ pass
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Valid module identifiers
36
+ # ---------------------------------------------------------------------------
37
+
38
+ VALID_MODULES = {"M01", "M02", "M03", "M04", "M05", "M06", "M07", "M08"}
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # structlog configuration
43
+ # ---------------------------------------------------------------------------
44
+
45
+ def _configure_structlog() -> None:
46
+ """Configure structlog for JSON output with ECIP mandatory fields."""
47
+ structlog.configure(
48
+ processors=[
49
+ structlog.contextvars.merge_contextvars,
50
+ structlog.processors.add_log_level,
51
+ structlog.processors.TimeStamper(fmt="iso", utc=True),
52
+ _add_trace_context,
53
+ structlog.processors.StackInfoRenderer(),
54
+ structlog.processors.format_exc_info,
55
+ structlog.processors.UnicodeDecoder(),
56
+ structlog.processors.JSONRenderer(),
57
+ ],
58
+ wrapper_class=structlog.make_filtering_bound_logger(
59
+ logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO").upper())
60
+ ),
61
+ context_class=dict,
62
+ logger_factory=structlog.PrintLoggerFactory(file=sys.stdout),
63
+ cache_logger_on_first_use=True,
64
+ )
65
+
66
+
67
+ def _add_trace_context(
68
+ logger: Any, method_name: str, event_dict: dict[str, Any]
69
+ ) -> dict[str, Any]:
70
+ """Add OpenTelemetry trace_id and span_id to every log entry."""
71
+ span = trace.get_current_span()
72
+ if span and span.is_recording():
73
+ ctx = span.get_span_context()
74
+ event_dict["trace_id"] = format(ctx.trace_id, "032x")
75
+ event_dict["span_id"] = format(ctx.span_id, "016x")
76
+ else:
77
+ event_dict["trace_id"] = "no-active-trace"
78
+ event_dict["span_id"] = "no-active-span"
79
+ return event_dict
80
+
81
+
82
+ # Initialize on module import
83
+ _configure_structlog()
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Logger factory
88
+ # ---------------------------------------------------------------------------
89
+
90
+ def get_logger(
91
+ *,
92
+ repo: str,
93
+ branch: str,
94
+ user_id: str,
95
+ module: str,
96
+ ) -> structlog.BoundLogger:
97
+ """
98
+ Create a structured ECIP logger with mandatory context fields.
99
+
100
+ All fields are required — omitting any raises MissingObservabilityContext.
101
+
102
+ Args:
103
+ repo: Repository in the form {org}/{repo}
104
+ branch: Branch being operated on
105
+ user_id: Hashed user identifier (no raw PII)
106
+ module: ECIP module identifier (M01-M08)
107
+
108
+ Returns:
109
+ A structlog BoundLogger with ECIP context bound
110
+ """
111
+ # Validate mandatory fields
112
+ if not repo:
113
+ raise MissingObservabilityContext("'repo' is required")
114
+ if not branch:
115
+ raise MissingObservabilityContext("'branch' is required")
116
+ if not user_id:
117
+ raise MissingObservabilityContext("'user_id' is required")
118
+ if not module:
119
+ raise MissingObservabilityContext("'module' is required")
120
+ if module not in VALID_MODULES:
121
+ raise MissingObservabilityContext(
122
+ f"'module' must be one of {VALID_MODULES}, got '{module}'"
123
+ )
124
+
125
+ return structlog.get_logger().bind(
126
+ repo=repo,
127
+ branch=branch,
128
+ user_id=user_id,
129
+ module=module,
130
+ env=os.environ.get("NODE_ENV", os.environ.get("ENVIRONMENT", "development")),
131
+ )
@@ -0,0 +1,150 @@
1
+ """
2
+ ecip_observability — Security Event Helpers (Python)
3
+
4
+ Security events route to a dedicated Elasticsearch pipeline — NEVER to
5
+ the general log store. Use these helpers exclusively for auth/RBAC events.
6
+
7
+ Usage:
8
+ from ecip_observability import emit_auth_failure, emit_rbac_denial
9
+
10
+ emit_auth_failure(
11
+ user_id="u_abc",
12
+ reason="jwt_expired",
13
+ source_ip="10.0.14.22",
14
+ module="M02",
15
+ )
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import hashlib
21
+ import json
22
+ import sys
23
+ from datetime import datetime, timezone
24
+ from typing import Any, Literal
25
+
26
+ from opentelemetry import trace
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Types
31
+ # ---------------------------------------------------------------------------
32
+
33
+ AuthFailureReason = Literal["jwt_expired", "jwt_invalid", "jwt_missing", "mtls_rejected"]
34
+ RbacAction = Literal["read", "write", "admin"]
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Internal helpers
39
+ # ---------------------------------------------------------------------------
40
+
41
+ def _hash_user_id(user_id: str) -> str:
42
+ """Hash a user ID to prevent raw PII in security logs."""
43
+ if user_id.startswith("u_"):
44
+ return user_id
45
+ hash_hex = hashlib.sha256(user_id.encode()).hexdigest()[:12]
46
+ return f"u_{hash_hex}"
47
+
48
+
49
+ def _get_trace_id() -> str:
50
+ """Get the current trace ID for correlation."""
51
+ span = trace.get_current_span()
52
+ if span and span.is_recording():
53
+ ctx = span.get_span_context()
54
+ return format(ctx.trace_id, "032x")
55
+ return "no-active-trace"
56
+
57
+
58
+ def _emit_event(event: dict[str, Any]) -> None:
59
+ """
60
+ Emit a security event to stderr as JSON.
61
+ The OTel Collector's filelog receiver picks this up and routes it
62
+ through the security pipeline to Elasticsearch.
63
+ """
64
+ sys.stderr.write(json.dumps(event, default=str) + "\n")
65
+ sys.stderr.flush()
66
+
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # Public API
70
+ # ---------------------------------------------------------------------------
71
+
72
+ def emit_auth_failure(
73
+ *,
74
+ user_id: str,
75
+ reason: AuthFailureReason,
76
+ source_ip: str,
77
+ module: str,
78
+ metadata: dict[str, Any] | None = None,
79
+ ) -> None:
80
+ """
81
+ Emit an authentication failure security event.
82
+
83
+ Routes to Elasticsearch via the dedicated security event pipeline.
84
+ NEVER use the general logger for this.
85
+
86
+ Args:
87
+ user_id: User identifier (will be hashed if not already)
88
+ reason: Failure reason
89
+ source_ip: Source IP address of the request
90
+ module: ECIP module that detected the failure
91
+ metadata: Optional additional context
92
+ """
93
+ event = {
94
+ "@timestamp": datetime.now(timezone.utc).isoformat(),
95
+ "event.kind": "event",
96
+ "event.category": "authentication",
97
+ "event.type": "denied",
98
+ "event.outcome": "failure",
99
+ "trace.id": _get_trace_id(),
100
+ "user.id": _hash_user_id(user_id),
101
+ "source.ip": source_ip,
102
+ "reason": reason,
103
+ "module": module,
104
+ }
105
+ if metadata:
106
+ event["metadata"] = metadata
107
+
108
+ _emit_event(event)
109
+
110
+
111
+ def emit_rbac_denial(
112
+ *,
113
+ user_id: str,
114
+ resource: str,
115
+ action: RbacAction,
116
+ reason: str,
117
+ module: str,
118
+ metadata: dict[str, Any] | None = None,
119
+ ) -> None:
120
+ """
121
+ Emit an RBAC denial security event.
122
+
123
+ Routes to Elasticsearch via the dedicated security event pipeline.
124
+ NEVER use the general logger for this.
125
+
126
+ Args:
127
+ user_id: User identifier (will be hashed if not already)
128
+ resource: Resource that was being accessed
129
+ action: Action attempted
130
+ reason: Denial reason
131
+ module: ECIP module that denied access
132
+ metadata: Optional additional context
133
+ """
134
+ event = {
135
+ "@timestamp": datetime.now(timezone.utc).isoformat(),
136
+ "event.kind": "event",
137
+ "event.category": "authorization",
138
+ "event.type": "denied",
139
+ "event.outcome": "failure",
140
+ "trace.id": _get_trace_id(),
141
+ "user.id": _hash_user_id(user_id),
142
+ "resource": resource,
143
+ "action": action,
144
+ "reason": reason,
145
+ "module": module,
146
+ }
147
+ if metadata:
148
+ event["metadata"] = metadata
149
+
150
+ _emit_event(event)
@@ -0,0 +1,185 @@
1
+ """
2
+ ecip_observability — Distributed Tracer (Python)
3
+
4
+ Initializes the OpenTelemetry Python SDK and provides a @traced decorator
5
+ for automatic span creation.
6
+
7
+ Usage:
8
+ from ecip_observability import init_tracer, traced
9
+
10
+ init_tracer(service_name="ecip-analysis-engine")
11
+
12
+ @traced(name="lsp.symbol_extraction")
13
+ def extract_symbols(file_path: str) -> list:
14
+ ... # span automatically started/ended; exceptions auto-captured
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import functools
20
+ import os
21
+ from typing import Any, Callable, TypeVar, ParamSpec
22
+
23
+ from opentelemetry import trace
24
+ from opentelemetry.sdk.trace import TracerProvider
25
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
26
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
27
+ from opentelemetry.sdk.resources import Resource, SERVICE_NAME, SERVICE_VERSION
28
+ from opentelemetry.trace import StatusCode, Span
29
+ from opentelemetry.instrumentation.grpc import GrpcInstrumentorClient, GrpcInstrumentorServer
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Types
34
+ # ---------------------------------------------------------------------------
35
+
36
+ P = ParamSpec("P")
37
+ T = TypeVar("T")
38
+
39
+ _initialized = False
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # SDK Initialization
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def init_tracer(
47
+ *,
48
+ service_name: str,
49
+ service_version: str = "0.0.0",
50
+ otlp_endpoint: str | None = None,
51
+ environment: str | None = None,
52
+ resource_attributes: dict[str, str] | None = None,
53
+ ) -> None:
54
+ """
55
+ Initialize the OpenTelemetry Python SDK.
56
+
57
+ Must be called once at process entry, before any other imports or
58
+ server initialization.
59
+
60
+ Args:
61
+ service_name: Service name (e.g., 'ecip-analysis-engine')
62
+ service_version: Service version string
63
+ otlp_endpoint: OTLP collector endpoint (default from env)
64
+ environment: Deployment environment (default from env)
65
+ resource_attributes: Additional resource attributes
66
+ """
67
+ global _initialized
68
+ if _initialized:
69
+ import warnings
70
+ warnings.warn("Tracer already initialized — skipping re-initialization", stacklevel=2)
71
+ return
72
+
73
+ endpoint = (
74
+ otlp_endpoint
75
+ or os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
76
+ or "http://otel-collector.monitoring:4317"
77
+ )
78
+
79
+ env = environment or os.environ.get("ENVIRONMENT", "development")
80
+
81
+ attrs: dict[str, str] = {
82
+ SERVICE_NAME: service_name,
83
+ SERVICE_VERSION: service_version,
84
+ "deployment.environment": env,
85
+ }
86
+
87
+ # Parse OTEL_RESOURCE_ATTRIBUTES env var
88
+ otel_attrs = os.environ.get("OTEL_RESOURCE_ATTRIBUTES", "")
89
+ for pair in otel_attrs.split(","):
90
+ if "=" in pair:
91
+ key, value = pair.split("=", 1)
92
+ attrs[key.strip()] = value.strip()
93
+
94
+ if resource_attributes:
95
+ attrs.update(resource_attributes)
96
+
97
+ resource = Resource.create(attrs)
98
+
99
+ exporter = OTLPSpanExporter(endpoint=endpoint, insecure=True)
100
+ processor = BatchSpanProcessor(exporter)
101
+
102
+ provider = TracerProvider(resource=resource)
103
+ provider.add_span_processor(processor)
104
+
105
+ trace.set_tracer_provider(provider)
106
+
107
+ # Auto-instrument gRPC
108
+ try:
109
+ GrpcInstrumentorClient().instrument()
110
+ GrpcInstrumentorServer().instrument()
111
+ except Exception:
112
+ pass # gRPC not in use — skip
113
+
114
+ _initialized = True
115
+
116
+
117
+ def get_tracer(name: str = "ecip-observability") -> trace.Tracer:
118
+ """Get a tracer instance."""
119
+ return trace.get_tracer(name)
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # @traced decorator
124
+ # ---------------------------------------------------------------------------
125
+
126
+ def traced(
127
+ name: str | None = None,
128
+ attributes: dict[str, str] | None = None,
129
+ ) -> Callable[[Callable[P, T]], Callable[P, T]]:
130
+ """
131
+ Decorator that wraps a function in an OpenTelemetry span.
132
+
133
+ Span is automatically started on function entry and ended on exit.
134
+ Exceptions are recorded and re-raised with span status set to ERROR.
135
+
136
+ Args:
137
+ name: Span name (defaults to function qualified name)
138
+ attributes: Additional span attributes
139
+
140
+ Usage:
141
+ @traced(name="lsp.symbol_extraction")
142
+ def extract_symbols(file_path: str) -> list:
143
+ ...
144
+ """
145
+ def decorator(fn: Callable[P, T]) -> Callable[P, T]:
146
+ span_name = name or f"{fn.__module__}.{fn.__qualname__}"
147
+
148
+ @functools.wraps(fn)
149
+ def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
150
+ tracer = get_tracer()
151
+ with tracer.start_as_current_span(span_name) as span:
152
+ if attributes:
153
+ for key, value in attributes.items():
154
+ span.set_attribute(key, value)
155
+ try:
156
+ result = fn(*args, **kwargs)
157
+ span.set_status(StatusCode.OK)
158
+ return result
159
+ except Exception as exc:
160
+ span.set_status(StatusCode.ERROR, str(exc))
161
+ span.record_exception(exc)
162
+ raise
163
+
164
+ @functools.wraps(fn)
165
+ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
166
+ tracer = get_tracer()
167
+ with tracer.start_as_current_span(span_name) as span:
168
+ if attributes:
169
+ for key, value in attributes.items():
170
+ span.set_attribute(key, value)
171
+ try:
172
+ result = await fn(*args, **kwargs) # type: ignore[misc]
173
+ span.set_status(StatusCode.OK)
174
+ return result
175
+ except Exception as exc:
176
+ span.set_status(StatusCode.ERROR, str(exc))
177
+ span.record_exception(exc)
178
+ raise
179
+
180
+ import asyncio
181
+ if asyncio.iscoroutinefunction(fn):
182
+ return async_wrapper # type: ignore[return-value]
183
+ return sync_wrapper # type: ignore[return-value]
184
+
185
+ return decorator
@@ -0,0 +1,113 @@
1
+ """Tests for the ECIP Python structured logger."""
2
+
3
+ import json
4
+ import pytest
5
+
6
+ from unittest.mock import patch, MagicMock
7
+
8
+
9
+ class TestGetLogger:
10
+ """Test the get_logger factory function."""
11
+
12
+ def test_creates_logger_with_valid_context(self):
13
+ from src.logger import get_logger
14
+
15
+ log = get_logger(
16
+ repo="acme-corp/auth-service",
17
+ branch="main",
18
+ user_id="u_8f3a1c",
19
+ module="M02",
20
+ )
21
+ assert log is not None
22
+
23
+ def test_raises_on_missing_repo(self):
24
+ from src.logger import get_logger, MissingObservabilityContext
25
+
26
+ with pytest.raises(MissingObservabilityContext, match="repo"):
27
+ get_logger(repo="", branch="main", user_id="u_abc", module="M02")
28
+
29
+ def test_raises_on_missing_branch(self):
30
+ from src.logger import get_logger, MissingObservabilityContext
31
+
32
+ with pytest.raises(MissingObservabilityContext, match="branch"):
33
+ get_logger(repo="acme/auth", branch="", user_id="u_abc", module="M02")
34
+
35
+ def test_raises_on_missing_user_id(self):
36
+ from src.logger import get_logger, MissingObservabilityContext
37
+
38
+ with pytest.raises(MissingObservabilityContext, match="user_id"):
39
+ get_logger(repo="acme/auth", branch="main", user_id="", module="M02")
40
+
41
+ def test_raises_on_missing_module(self):
42
+ from src.logger import get_logger, MissingObservabilityContext
43
+
44
+ with pytest.raises(MissingObservabilityContext, match="module"):
45
+ get_logger(repo="acme/auth", branch="main", user_id="u_abc", module="")
46
+
47
+ def test_raises_on_invalid_module(self):
48
+ from src.logger import get_logger, MissingObservabilityContext
49
+
50
+ with pytest.raises(MissingObservabilityContext, match="must be one of"):
51
+ get_logger(repo="acme/auth", branch="main", user_id="u_abc", module="M99")
52
+
53
+ def test_all_valid_modules(self):
54
+ from src.logger import get_logger, VALID_MODULES
55
+
56
+ for module in VALID_MODULES:
57
+ log = get_logger(
58
+ repo="test/repo",
59
+ branch="main",
60
+ user_id="u_test",
61
+ module=module,
62
+ )
63
+ assert log is not None
64
+
65
+
66
+ class TestSecurityEvents:
67
+ """Test security event emission."""
68
+
69
+ def test_emit_auth_failure_format(self, capsys):
70
+ from src.security_events import emit_auth_failure
71
+
72
+ emit_auth_failure(
73
+ user_id="u_abc123",
74
+ reason="jwt_expired",
75
+ source_ip="10.0.14.22",
76
+ module="M01",
77
+ )
78
+
79
+ captured = capsys.readouterr()
80
+ event = json.loads(captured.err.strip())
81
+ assert event["event.category"] == "authentication"
82
+ assert event["event.type"] == "denied"
83
+ assert event["event.outcome"] == "failure"
84
+ assert event["reason"] == "jwt_expired"
85
+ assert event["user.id"] == "u_abc123" # already hashed format
86
+
87
+ def test_emit_rbac_denial_format(self, capsys):
88
+ from src.security_events import emit_rbac_denial
89
+
90
+ emit_rbac_denial(
91
+ user_id="u_abc123",
92
+ resource="acme-corp/auth-service",
93
+ action="read",
94
+ reason="rbac_insufficient_role",
95
+ module="M06",
96
+ )
97
+
98
+ captured = capsys.readouterr()
99
+ event = json.loads(captured.err.strip())
100
+ assert event["event.category"] == "authorization"
101
+ assert event["resource"] == "acme-corp/auth-service"
102
+ assert event["action"] == "read"
103
+
104
+ def test_user_id_hashing(self):
105
+ from src.security_events import _hash_user_id
106
+
107
+ # Already hashed — should return as-is
108
+ assert _hash_user_id("u_abc123") == "u_abc123"
109
+
110
+ # Raw ID — should be hashed
111
+ hashed = _hash_user_id("john.doe@example.com")
112
+ assert hashed.startswith("u_")
113
+ assert len(hashed) == 15 # u_ + 12 hex chars
package/package.json ADDED
@@ -0,0 +1,21 @@
1
+ {
2
+ "name": "ecip-observability-stack",
3
+ "version": "1.0.0",
4
+ "private": false,
5
+ "description": "ECIP M08 — Observability Stack: OTel Collector, Prometheus, Tempo, Grafana, Elasticsearch",
6
+ "scripts": {
7
+ "test": "vitest run",
8
+ "test:watch": "vitest",
9
+ "test:coverage": "vitest run --coverage",
10
+ "lint:dashboards": "node scripts/lint-dashboards.js",
11
+ "lint:alerts": "promtool check rules alerts/*.yaml",
12
+ "validate": "npm run test && npm run lint:dashboards && npm run lint:alerts"
13
+ },
14
+ "devDependencies": {
15
+ "typescript": "^5.4.0",
16
+ "vitest": "^1.3.0",
17
+ "@vitest/coverage-v8": "^1.3.0",
18
+ "yaml": "^2.4.0",
19
+ "testcontainers": "^10.7.0"
20
+ }
21
+ }