foundry-mcp 0.8.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of foundry-mcp might be problematic. Click here for more details.
- foundry_mcp/__init__.py +13 -0
- foundry_mcp/cli/__init__.py +67 -0
- foundry_mcp/cli/__main__.py +9 -0
- foundry_mcp/cli/agent.py +96 -0
- foundry_mcp/cli/commands/__init__.py +37 -0
- foundry_mcp/cli/commands/cache.py +137 -0
- foundry_mcp/cli/commands/dashboard.py +148 -0
- foundry_mcp/cli/commands/dev.py +446 -0
- foundry_mcp/cli/commands/journal.py +377 -0
- foundry_mcp/cli/commands/lifecycle.py +274 -0
- foundry_mcp/cli/commands/modify.py +824 -0
- foundry_mcp/cli/commands/plan.py +640 -0
- foundry_mcp/cli/commands/pr.py +393 -0
- foundry_mcp/cli/commands/review.py +667 -0
- foundry_mcp/cli/commands/session.py +472 -0
- foundry_mcp/cli/commands/specs.py +686 -0
- foundry_mcp/cli/commands/tasks.py +807 -0
- foundry_mcp/cli/commands/testing.py +676 -0
- foundry_mcp/cli/commands/validate.py +982 -0
- foundry_mcp/cli/config.py +98 -0
- foundry_mcp/cli/context.py +298 -0
- foundry_mcp/cli/logging.py +212 -0
- foundry_mcp/cli/main.py +44 -0
- foundry_mcp/cli/output.py +122 -0
- foundry_mcp/cli/registry.py +110 -0
- foundry_mcp/cli/resilience.py +178 -0
- foundry_mcp/cli/transcript.py +217 -0
- foundry_mcp/config.py +1454 -0
- foundry_mcp/core/__init__.py +144 -0
- foundry_mcp/core/ai_consultation.py +1773 -0
- foundry_mcp/core/batch_operations.py +1202 -0
- foundry_mcp/core/cache.py +195 -0
- foundry_mcp/core/capabilities.py +446 -0
- foundry_mcp/core/concurrency.py +898 -0
- foundry_mcp/core/context.py +540 -0
- foundry_mcp/core/discovery.py +1603 -0
- foundry_mcp/core/error_collection.py +728 -0
- foundry_mcp/core/error_store.py +592 -0
- foundry_mcp/core/health.py +749 -0
- foundry_mcp/core/intake.py +933 -0
- foundry_mcp/core/journal.py +700 -0
- foundry_mcp/core/lifecycle.py +412 -0
- foundry_mcp/core/llm_config.py +1376 -0
- foundry_mcp/core/llm_patterns.py +510 -0
- foundry_mcp/core/llm_provider.py +1569 -0
- foundry_mcp/core/logging_config.py +374 -0
- foundry_mcp/core/metrics_persistence.py +584 -0
- foundry_mcp/core/metrics_registry.py +327 -0
- foundry_mcp/core/metrics_store.py +641 -0
- foundry_mcp/core/modifications.py +224 -0
- foundry_mcp/core/naming.py +146 -0
- foundry_mcp/core/observability.py +1216 -0
- foundry_mcp/core/otel.py +452 -0
- foundry_mcp/core/otel_stubs.py +264 -0
- foundry_mcp/core/pagination.py +255 -0
- foundry_mcp/core/progress.py +387 -0
- foundry_mcp/core/prometheus.py +564 -0
- foundry_mcp/core/prompts/__init__.py +464 -0
- foundry_mcp/core/prompts/fidelity_review.py +691 -0
- foundry_mcp/core/prompts/markdown_plan_review.py +515 -0
- foundry_mcp/core/prompts/plan_review.py +627 -0
- foundry_mcp/core/providers/__init__.py +237 -0
- foundry_mcp/core/providers/base.py +515 -0
- foundry_mcp/core/providers/claude.py +472 -0
- foundry_mcp/core/providers/codex.py +637 -0
- foundry_mcp/core/providers/cursor_agent.py +630 -0
- foundry_mcp/core/providers/detectors.py +515 -0
- foundry_mcp/core/providers/gemini.py +426 -0
- foundry_mcp/core/providers/opencode.py +718 -0
- foundry_mcp/core/providers/opencode_wrapper.js +308 -0
- foundry_mcp/core/providers/package-lock.json +24 -0
- foundry_mcp/core/providers/package.json +25 -0
- foundry_mcp/core/providers/registry.py +607 -0
- foundry_mcp/core/providers/test_provider.py +171 -0
- foundry_mcp/core/providers/validation.py +857 -0
- foundry_mcp/core/rate_limit.py +427 -0
- foundry_mcp/core/research/__init__.py +68 -0
- foundry_mcp/core/research/memory.py +528 -0
- foundry_mcp/core/research/models.py +1234 -0
- foundry_mcp/core/research/providers/__init__.py +40 -0
- foundry_mcp/core/research/providers/base.py +242 -0
- foundry_mcp/core/research/providers/google.py +507 -0
- foundry_mcp/core/research/providers/perplexity.py +442 -0
- foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
- foundry_mcp/core/research/providers/tavily.py +383 -0
- foundry_mcp/core/research/workflows/__init__.py +25 -0
- foundry_mcp/core/research/workflows/base.py +298 -0
- foundry_mcp/core/research/workflows/chat.py +271 -0
- foundry_mcp/core/research/workflows/consensus.py +539 -0
- foundry_mcp/core/research/workflows/deep_research.py +4142 -0
- foundry_mcp/core/research/workflows/ideate.py +682 -0
- foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
- foundry_mcp/core/resilience.py +600 -0
- foundry_mcp/core/responses.py +1624 -0
- foundry_mcp/core/review.py +366 -0
- foundry_mcp/core/security.py +438 -0
- foundry_mcp/core/spec.py +4119 -0
- foundry_mcp/core/task.py +2463 -0
- foundry_mcp/core/testing.py +839 -0
- foundry_mcp/core/validation.py +2357 -0
- foundry_mcp/dashboard/__init__.py +32 -0
- foundry_mcp/dashboard/app.py +119 -0
- foundry_mcp/dashboard/components/__init__.py +17 -0
- foundry_mcp/dashboard/components/cards.py +88 -0
- foundry_mcp/dashboard/components/charts.py +177 -0
- foundry_mcp/dashboard/components/filters.py +136 -0
- foundry_mcp/dashboard/components/tables.py +195 -0
- foundry_mcp/dashboard/data/__init__.py +11 -0
- foundry_mcp/dashboard/data/stores.py +433 -0
- foundry_mcp/dashboard/launcher.py +300 -0
- foundry_mcp/dashboard/views/__init__.py +12 -0
- foundry_mcp/dashboard/views/errors.py +217 -0
- foundry_mcp/dashboard/views/metrics.py +164 -0
- foundry_mcp/dashboard/views/overview.py +96 -0
- foundry_mcp/dashboard/views/providers.py +83 -0
- foundry_mcp/dashboard/views/sdd_workflow.py +255 -0
- foundry_mcp/dashboard/views/tool_usage.py +139 -0
- foundry_mcp/prompts/__init__.py +9 -0
- foundry_mcp/prompts/workflows.py +525 -0
- foundry_mcp/resources/__init__.py +9 -0
- foundry_mcp/resources/specs.py +591 -0
- foundry_mcp/schemas/__init__.py +38 -0
- foundry_mcp/schemas/intake-schema.json +89 -0
- foundry_mcp/schemas/sdd-spec-schema.json +414 -0
- foundry_mcp/server.py +150 -0
- foundry_mcp/tools/__init__.py +10 -0
- foundry_mcp/tools/unified/__init__.py +92 -0
- foundry_mcp/tools/unified/authoring.py +3620 -0
- foundry_mcp/tools/unified/context_helpers.py +98 -0
- foundry_mcp/tools/unified/documentation_helpers.py +268 -0
- foundry_mcp/tools/unified/environment.py +1341 -0
- foundry_mcp/tools/unified/error.py +479 -0
- foundry_mcp/tools/unified/health.py +225 -0
- foundry_mcp/tools/unified/journal.py +841 -0
- foundry_mcp/tools/unified/lifecycle.py +640 -0
- foundry_mcp/tools/unified/metrics.py +777 -0
- foundry_mcp/tools/unified/plan.py +876 -0
- foundry_mcp/tools/unified/pr.py +294 -0
- foundry_mcp/tools/unified/provider.py +589 -0
- foundry_mcp/tools/unified/research.py +1283 -0
- foundry_mcp/tools/unified/review.py +1042 -0
- foundry_mcp/tools/unified/review_helpers.py +314 -0
- foundry_mcp/tools/unified/router.py +102 -0
- foundry_mcp/tools/unified/server.py +565 -0
- foundry_mcp/tools/unified/spec.py +1283 -0
- foundry_mcp/tools/unified/task.py +3846 -0
- foundry_mcp/tools/unified/test.py +431 -0
- foundry_mcp/tools/unified/verification.py +520 -0
- foundry_mcp-0.8.22.dist-info/METADATA +344 -0
- foundry_mcp-0.8.22.dist-info/RECORD +153 -0
- foundry_mcp-0.8.22.dist-info/WHEEL +4 -0
- foundry_mcp-0.8.22.dist-info/entry_points.txt +3 -0
- foundry_mcp-0.8.22.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1216 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Observability utilities for foundry-mcp.
|
|
3
|
+
|
|
4
|
+
Provides structured logging, metrics collection, and audit logging
|
|
5
|
+
for MCP tools and resources.
|
|
6
|
+
|
|
7
|
+
FastMCP Middleware Integration:
|
|
8
|
+
The decorators in this module can be applied to FastMCP tool and resource
|
|
9
|
+
handlers to provide consistent observability. Example:
|
|
10
|
+
|
|
11
|
+
from fastmcp import FastMCP
|
|
12
|
+
from foundry_mcp.core.observability import mcp_tool, audit_log
|
|
13
|
+
|
|
14
|
+
mcp = FastMCP("foundry-mcp")
|
|
15
|
+
|
|
16
|
+
@mcp.tool()
|
|
17
|
+
@mcp_tool(tool_name="list_specs")
|
|
18
|
+
async def list_specs(status: str = "all") -> str:
|
|
19
|
+
audit_log("tool_invocation", tool="list_specs", status=status)
|
|
20
|
+
# ... implementation
|
|
21
|
+
return result
|
|
22
|
+
|
|
23
|
+
For resources, use the mcp_resource decorator:
|
|
24
|
+
|
|
25
|
+
@mcp.resource("specs://{spec_id}")
|
|
26
|
+
@mcp_resource(resource_type="spec")
|
|
27
|
+
async def get_spec(spec_id: str) -> str:
|
|
28
|
+
# ... implementation
|
|
29
|
+
return spec_data
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import logging
|
|
33
|
+
import functools
|
|
34
|
+
import re
|
|
35
|
+
import time
|
|
36
|
+
import json
|
|
37
|
+
from datetime import datetime, timezone
|
|
38
|
+
from typing import Final, Optional, Dict, Any, Callable, TypeVar, Union, List, Tuple
|
|
39
|
+
from dataclasses import dataclass, field
|
|
40
|
+
from enum import Enum
|
|
41
|
+
|
|
42
|
+
from foundry_mcp.core.context import (
|
|
43
|
+
get_correlation_id,
|
|
44
|
+
get_client_id,
|
|
45
|
+
generate_correlation_id,
|
|
46
|
+
sync_request_context,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
logger = logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# =============================================================================
|
|
53
|
+
# Optional Dependencies Availability Flags
|
|
54
|
+
# =============================================================================
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
import opentelemetry # noqa: F401
|
|
58
|
+
|
|
59
|
+
_OPENTELEMETRY_AVAILABLE = True
|
|
60
|
+
except ImportError:
|
|
61
|
+
_OPENTELEMETRY_AVAILABLE = False
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
import prometheus_client # noqa: F401
|
|
65
|
+
|
|
66
|
+
_PROMETHEUS_AVAILABLE = True
|
|
67
|
+
except ImportError:
|
|
68
|
+
_PROMETHEUS_AVAILABLE = False
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_observability_status() -> Dict[str, Any]:
|
|
72
|
+
"""Get the current observability stack status.
|
|
73
|
+
|
|
74
|
+
Returns a dict containing availability information for optional
|
|
75
|
+
observability dependencies and their enabled status.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict with keys:
|
|
79
|
+
- opentelemetry_available: Whether opentelemetry packages are installed
|
|
80
|
+
- prometheus_available: Whether prometheus_client is installed
|
|
81
|
+
- opentelemetry_enabled: Whether OTel is enabled (via otel module)
|
|
82
|
+
- version: foundry-mcp version
|
|
83
|
+
"""
|
|
84
|
+
# Check if OTel is actually enabled (requires otel module)
|
|
85
|
+
otel_enabled = False
|
|
86
|
+
if _OPENTELEMETRY_AVAILABLE:
|
|
87
|
+
try:
|
|
88
|
+
from foundry_mcp.core.otel import is_enabled
|
|
89
|
+
|
|
90
|
+
otel_enabled = is_enabled()
|
|
91
|
+
except ImportError:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
# Get version
|
|
95
|
+
try:
|
|
96
|
+
from importlib.metadata import version
|
|
97
|
+
|
|
98
|
+
pkg_version = version("foundry-mcp")
|
|
99
|
+
except Exception:
|
|
100
|
+
pkg_version = "unknown"
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
"opentelemetry_available": _OPENTELEMETRY_AVAILABLE,
|
|
104
|
+
"prometheus_available": _PROMETHEUS_AVAILABLE,
|
|
105
|
+
"opentelemetry_enabled": otel_enabled,
|
|
106
|
+
"version": pkg_version,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# =============================================================================
|
|
111
|
+
# Observability Manager
|
|
112
|
+
# =============================================================================
|
|
113
|
+
|
|
114
|
+
import threading
|
|
115
|
+
from typing import TYPE_CHECKING
|
|
116
|
+
|
|
117
|
+
if TYPE_CHECKING:
|
|
118
|
+
from foundry_mcp.config import ObservabilityConfig
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class ObservabilityManager:
|
|
122
|
+
"""Thread-safe singleton manager for observability stack.
|
|
123
|
+
|
|
124
|
+
Provides unified access to OpenTelemetry tracing and Prometheus metrics
|
|
125
|
+
with graceful degradation when dependencies are not available.
|
|
126
|
+
|
|
127
|
+
Usage:
|
|
128
|
+
manager = ObservabilityManager.get_instance()
|
|
129
|
+
manager.initialize(config)
|
|
130
|
+
|
|
131
|
+
tracer = manager.get_tracer("my-module")
|
|
132
|
+
with tracer.start_as_current_span("my-operation"):
|
|
133
|
+
# ... do work
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
_instance: Optional["ObservabilityManager"] = None
|
|
137
|
+
_lock = threading.Lock()
|
|
138
|
+
|
|
139
|
+
def __new__(cls) -> "ObservabilityManager":
|
|
140
|
+
if cls._instance is None:
|
|
141
|
+
with cls._lock:
|
|
142
|
+
if cls._instance is None:
|
|
143
|
+
instance = super().__new__(cls)
|
|
144
|
+
instance._initialized = False
|
|
145
|
+
instance._config = None
|
|
146
|
+
instance._otel_initialized = False
|
|
147
|
+
instance._prometheus_initialized = False
|
|
148
|
+
cls._instance = instance
|
|
149
|
+
return cls._instance
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def get_instance(cls) -> "ObservabilityManager":
|
|
153
|
+
"""Get the singleton instance."""
|
|
154
|
+
return cls()
|
|
155
|
+
|
|
156
|
+
def initialize(self, config: "ObservabilityConfig") -> None:
|
|
157
|
+
"""Initialize observability with configuration.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
config: ObservabilityConfig instance from server config
|
|
161
|
+
"""
|
|
162
|
+
if self._initialized:
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
with self._lock:
|
|
166
|
+
if self._initialized:
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
self._config = config
|
|
170
|
+
|
|
171
|
+
# Initialize OpenTelemetry if enabled
|
|
172
|
+
if config.enabled and config.otel_enabled and _OPENTELEMETRY_AVAILABLE:
|
|
173
|
+
try:
|
|
174
|
+
from foundry_mcp.core.otel import OTelConfig, initialize as init_otel
|
|
175
|
+
|
|
176
|
+
otel_config = OTelConfig(
|
|
177
|
+
enabled=True,
|
|
178
|
+
otlp_endpoint=config.otel_endpoint,
|
|
179
|
+
service_name=config.otel_service_name,
|
|
180
|
+
sample_rate=config.otel_sample_rate,
|
|
181
|
+
)
|
|
182
|
+
init_otel(otel_config)
|
|
183
|
+
self._otel_initialized = True
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.warning(f"Failed to initialize OpenTelemetry: {e}")
|
|
186
|
+
|
|
187
|
+
# Initialize Prometheus if enabled
|
|
188
|
+
if config.enabled and config.prometheus_enabled and _PROMETHEUS_AVAILABLE:
|
|
189
|
+
try:
|
|
190
|
+
from foundry_mcp.core.prometheus import (
|
|
191
|
+
PrometheusConfig,
|
|
192
|
+
get_prometheus_exporter,
|
|
193
|
+
reset_exporter,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
reset_exporter() # Reset to apply new config
|
|
197
|
+
prom_config = PrometheusConfig(
|
|
198
|
+
enabled=True,
|
|
199
|
+
port=config.prometheus_port,
|
|
200
|
+
host=config.prometheus_host,
|
|
201
|
+
namespace=config.prometheus_namespace,
|
|
202
|
+
)
|
|
203
|
+
exporter = get_prometheus_exporter(prom_config)
|
|
204
|
+
if config.prometheus_port > 0:
|
|
205
|
+
exporter.start_server()
|
|
206
|
+
self._prometheus_initialized = True
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.warning(f"Failed to initialize Prometheus: {e}")
|
|
209
|
+
|
|
210
|
+
self._initialized = True
|
|
211
|
+
|
|
212
|
+
def is_tracing_enabled(self) -> bool:
|
|
213
|
+
"""Check if OTel tracing is enabled and initialized."""
|
|
214
|
+
return self._otel_initialized
|
|
215
|
+
|
|
216
|
+
def is_metrics_enabled(self) -> bool:
|
|
217
|
+
"""Check if Prometheus metrics are enabled and initialized."""
|
|
218
|
+
return self._prometheus_initialized
|
|
219
|
+
|
|
220
|
+
def get_tracer(self, name: str = __name__) -> Any:
|
|
221
|
+
"""Get a tracer instance (real or no-op).
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
name: Tracer name (typically module __name__)
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Tracer instance
|
|
228
|
+
"""
|
|
229
|
+
if self._otel_initialized:
|
|
230
|
+
from foundry_mcp.core.otel import get_tracer
|
|
231
|
+
|
|
232
|
+
return get_tracer(name)
|
|
233
|
+
|
|
234
|
+
from foundry_mcp.core.otel_stubs import get_noop_tracer
|
|
235
|
+
|
|
236
|
+
return get_noop_tracer(name)
|
|
237
|
+
|
|
238
|
+
def get_prometheus_exporter(self) -> Any:
|
|
239
|
+
"""Get the Prometheus exporter instance.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
PrometheusExporter instance (real or with no-op methods)
|
|
243
|
+
"""
|
|
244
|
+
if self._prometheus_initialized:
|
|
245
|
+
from foundry_mcp.core.prometheus import get_prometheus_exporter
|
|
246
|
+
|
|
247
|
+
return get_prometheus_exporter()
|
|
248
|
+
|
|
249
|
+
# Return a minimal no-op object
|
|
250
|
+
class NoOpExporter:
|
|
251
|
+
def record_tool_invocation(self, *args: Any, **kwargs: Any) -> None:
|
|
252
|
+
pass
|
|
253
|
+
|
|
254
|
+
def record_tool_start(self, *args: Any, **kwargs: Any) -> None:
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
def record_tool_end(self, *args: Any, **kwargs: Any) -> None:
|
|
258
|
+
pass
|
|
259
|
+
|
|
260
|
+
def record_resource_access(self, *args: Any, **kwargs: Any) -> None:
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
def record_error(self, *args: Any, **kwargs: Any) -> None:
|
|
264
|
+
pass
|
|
265
|
+
|
|
266
|
+
return NoOpExporter()
|
|
267
|
+
|
|
268
|
+
def shutdown(self) -> None:
|
|
269
|
+
"""Shutdown observability providers and flush pending data."""
|
|
270
|
+
if self._otel_initialized:
|
|
271
|
+
try:
|
|
272
|
+
from foundry_mcp.core.otel import shutdown
|
|
273
|
+
|
|
274
|
+
shutdown()
|
|
275
|
+
except Exception:
|
|
276
|
+
pass
|
|
277
|
+
self._otel_initialized = False
|
|
278
|
+
|
|
279
|
+
self._initialized = False
|
|
280
|
+
self._config = None
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# Global manager instance accessor
|
|
284
|
+
def get_observability_manager() -> ObservabilityManager:
|
|
285
|
+
"""Get the global ObservabilityManager instance."""
|
|
286
|
+
return ObservabilityManager.get_instance()
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# =============================================================================
|
|
290
|
+
# Sensitive Data Patterns for Redaction
|
|
291
|
+
# =============================================================================
|
|
292
|
+
# These patterns identify sensitive data that should be redacted from logs,
|
|
293
|
+
# error messages, and audit trails. See docs/mcp_best_practices/08-security-trust-boundaries.md
|
|
294
|
+
|
|
295
|
+
SENSITIVE_PATTERNS: Final[List[Tuple[str, str]]] = [
|
|
296
|
+
# API Keys and Tokens
|
|
297
|
+
(r"(?i)(api[_-]?key|apikey)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{20,})['\"]?", "API_KEY"),
|
|
298
|
+
(
|
|
299
|
+
r"(?i)(secret[_-]?key|secretkey)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{20,})['\"]?",
|
|
300
|
+
"SECRET_KEY",
|
|
301
|
+
),
|
|
302
|
+
(
|
|
303
|
+
r"(?i)(access[_-]?token|accesstoken)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-\.]{20,})['\"]?",
|
|
304
|
+
"ACCESS_TOKEN",
|
|
305
|
+
),
|
|
306
|
+
(r"(?i)bearer\s+([a-zA-Z0-9_\-\.]+)", "BEARER_TOKEN"),
|
|
307
|
+
# Passwords
|
|
308
|
+
(r"(?i)(password|passwd|pwd)\s*[:=]\s*['\"]?([^\s'\"]{4,})['\"]?", "PASSWORD"),
|
|
309
|
+
# AWS Credentials
|
|
310
|
+
(r"AKIA[0-9A-Z]{16}", "AWS_ACCESS_KEY"),
|
|
311
|
+
(
|
|
312
|
+
r"(?i)(aws[_-]?secret[_-]?access[_-]?key)\s*[:=]\s*['\"]?([a-zA-Z0-9/+=]{40})['\"]?",
|
|
313
|
+
"AWS_SECRET",
|
|
314
|
+
),
|
|
315
|
+
# Private Keys
|
|
316
|
+
(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----", "PRIVATE_KEY"),
|
|
317
|
+
# Email Addresses (for PII protection)
|
|
318
|
+
(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", "EMAIL"),
|
|
319
|
+
# Social Security Numbers (US)
|
|
320
|
+
(r"\b\d{3}-\d{2}-\d{4}\b", "SSN"),
|
|
321
|
+
# Credit Card Numbers (basic pattern)
|
|
322
|
+
(r"\b(?:\d{4}[- ]?){3}\d{4}\b", "CREDIT_CARD"),
|
|
323
|
+
# Phone Numbers (various formats)
|
|
324
|
+
(r"\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b", "PHONE"),
|
|
325
|
+
# GitHub/GitLab Tokens
|
|
326
|
+
(r"gh[pousr]_[a-zA-Z0-9]{36,}", "GITHUB_TOKEN"),
|
|
327
|
+
(r"glpat-[a-zA-Z0-9\-]{20,}", "GITLAB_TOKEN"),
|
|
328
|
+
# Generic Base64-encoded secrets (long base64 strings in key contexts)
|
|
329
|
+
(
|
|
330
|
+
r"(?i)(token|secret|key|credential)\s*[:=]\s*['\"]?([a-zA-Z0-9+/]{40,}={0,2})['\"]?",
|
|
331
|
+
"BASE64_SECRET",
|
|
332
|
+
),
|
|
333
|
+
]
|
|
334
|
+
"""Patterns for detecting sensitive data that should be redacted.
|
|
335
|
+
|
|
336
|
+
Each tuple contains:
|
|
337
|
+
- regex pattern: The pattern to match sensitive data
|
|
338
|
+
- label: A human-readable label for the type of sensitive data
|
|
339
|
+
|
|
340
|
+
Use with redact_sensitive_data() to sanitize logs and error messages.
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def redact_sensitive_data(
|
|
345
|
+
data: Any,
|
|
346
|
+
*,
|
|
347
|
+
patterns: Optional[List[Tuple[str, str]]] = None,
|
|
348
|
+
redaction_format: str = "[REDACTED:{label}]",
|
|
349
|
+
max_depth: int = 10,
|
|
350
|
+
) -> Any:
|
|
351
|
+
"""Recursively redact sensitive data from strings, dicts, and lists.
|
|
352
|
+
|
|
353
|
+
Scans input data for sensitive patterns (API keys, passwords, PII, etc.)
|
|
354
|
+
and replaces matches with redaction markers. Safe for use before logging
|
|
355
|
+
or including data in error messages.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
data: The data to redact (string, dict, list, or nested structure)
|
|
359
|
+
patterns: Custom patterns to use (default: SENSITIVE_PATTERNS)
|
|
360
|
+
redaction_format: Format string for redaction markers (uses {label})
|
|
361
|
+
max_depth: Maximum recursion depth to prevent stack overflow
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
A copy of the data with sensitive values redacted
|
|
365
|
+
|
|
366
|
+
Example:
|
|
367
|
+
>>> data = {"api_key": "sk_live_abc123...", "user": "john"}
|
|
368
|
+
>>> safe_data = redact_sensitive_data(data)
|
|
369
|
+
>>> logger.info("Request data", extra={"data": safe_data})
|
|
370
|
+
"""
|
|
371
|
+
if max_depth <= 0:
|
|
372
|
+
return "[MAX_DEPTH_EXCEEDED]"
|
|
373
|
+
|
|
374
|
+
check_patterns = patterns if patterns is not None else SENSITIVE_PATTERNS
|
|
375
|
+
|
|
376
|
+
def redact_string(text: str) -> str:
|
|
377
|
+
"""Redact sensitive patterns from a string."""
|
|
378
|
+
result = text
|
|
379
|
+
for pattern, label in check_patterns:
|
|
380
|
+
replacement = redaction_format.format(label=label)
|
|
381
|
+
result = re.sub(pattern, replacement, result)
|
|
382
|
+
return result
|
|
383
|
+
|
|
384
|
+
# Handle different data types
|
|
385
|
+
if isinstance(data, str):
|
|
386
|
+
return redact_string(data)
|
|
387
|
+
|
|
388
|
+
elif isinstance(data, dict):
|
|
389
|
+
# Check for sensitive key names and redact their values entirely
|
|
390
|
+
sensitive_keys = {
|
|
391
|
+
"password",
|
|
392
|
+
"passwd",
|
|
393
|
+
"pwd",
|
|
394
|
+
"secret",
|
|
395
|
+
"token",
|
|
396
|
+
"api_key",
|
|
397
|
+
"apikey",
|
|
398
|
+
"api-key",
|
|
399
|
+
"access_token",
|
|
400
|
+
"refresh_token",
|
|
401
|
+
"private_key",
|
|
402
|
+
"secret_key",
|
|
403
|
+
"auth",
|
|
404
|
+
"authorization",
|
|
405
|
+
"credential",
|
|
406
|
+
"credentials",
|
|
407
|
+
"ssn",
|
|
408
|
+
"credit_card",
|
|
409
|
+
}
|
|
410
|
+
result = {}
|
|
411
|
+
for key, value in data.items():
|
|
412
|
+
key_lower = str(key).lower().replace("-", "_")
|
|
413
|
+
if key_lower in sensitive_keys:
|
|
414
|
+
# Redact entire value for known sensitive keys
|
|
415
|
+
result[key] = f"[REDACTED:{key_lower.upper()}]"
|
|
416
|
+
else:
|
|
417
|
+
# Recursively process the value
|
|
418
|
+
result[key] = redact_sensitive_data(
|
|
419
|
+
value,
|
|
420
|
+
patterns=check_patterns,
|
|
421
|
+
redaction_format=redaction_format,
|
|
422
|
+
max_depth=max_depth - 1,
|
|
423
|
+
)
|
|
424
|
+
return result
|
|
425
|
+
|
|
426
|
+
elif isinstance(data, (list, tuple)):
|
|
427
|
+
result_list = [
|
|
428
|
+
redact_sensitive_data(
|
|
429
|
+
item,
|
|
430
|
+
patterns=check_patterns,
|
|
431
|
+
redaction_format=redaction_format,
|
|
432
|
+
max_depth=max_depth - 1,
|
|
433
|
+
)
|
|
434
|
+
for item in data
|
|
435
|
+
]
|
|
436
|
+
return type(data)(result_list) if isinstance(data, tuple) else result_list
|
|
437
|
+
|
|
438
|
+
else:
|
|
439
|
+
# For other types (int, float, bool, None), return as-is
|
|
440
|
+
return data
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def redact_for_logging(data: Any) -> str:
|
|
444
|
+
"""Convenience function to redact and serialize data for logging.
|
|
445
|
+
|
|
446
|
+
Combines redaction with JSON serialization for safe logging.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
data: Data to redact and serialize
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
JSON string with sensitive data redacted
|
|
453
|
+
|
|
454
|
+
Example:
|
|
455
|
+
>>> logger.info(f"Processing request: {redact_for_logging(request_data)}")
|
|
456
|
+
"""
|
|
457
|
+
redacted = redact_sensitive_data(data)
|
|
458
|
+
try:
|
|
459
|
+
return json.dumps(redacted, default=str)
|
|
460
|
+
except (TypeError, ValueError):
|
|
461
|
+
return str(redacted)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
T = TypeVar("T")
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
class MetricType(Enum):
|
|
468
|
+
"""Types of metrics that can be emitted."""
|
|
469
|
+
|
|
470
|
+
COUNTER = "counter"
|
|
471
|
+
GAUGE = "gauge"
|
|
472
|
+
HISTOGRAM = "histogram"
|
|
473
|
+
TIMER = "timer"
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
class AuditEventType(Enum):
|
|
477
|
+
"""Types of audit events for security logging."""
|
|
478
|
+
|
|
479
|
+
AUTH_SUCCESS = "auth_success"
|
|
480
|
+
AUTH_FAILURE = "auth_failure"
|
|
481
|
+
RATE_LIMIT = "rate_limit"
|
|
482
|
+
RESOURCE_ACCESS = "resource_access"
|
|
483
|
+
TOOL_INVOCATION = "tool_invocation"
|
|
484
|
+
PERMISSION_DENIED = "permission_denied"
|
|
485
|
+
CONFIG_CHANGE = "config_change"
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
@dataclass
|
|
489
|
+
class Metric:
|
|
490
|
+
"""Structured metric data."""
|
|
491
|
+
|
|
492
|
+
name: str
|
|
493
|
+
value: Union[int, float]
|
|
494
|
+
metric_type: MetricType
|
|
495
|
+
labels: Dict[str, str] = field(default_factory=dict)
|
|
496
|
+
timestamp: str = field(
|
|
497
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
501
|
+
"""Convert to dictionary for JSON serialization."""
|
|
502
|
+
return {
|
|
503
|
+
"name": self.name,
|
|
504
|
+
"value": self.value,
|
|
505
|
+
"type": self.metric_type.value,
|
|
506
|
+
"labels": self.labels,
|
|
507
|
+
"timestamp": self.timestamp,
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
@dataclass
|
|
512
|
+
class AuditEvent:
|
|
513
|
+
"""Structured audit event for security logging."""
|
|
514
|
+
|
|
515
|
+
event_type: AuditEventType
|
|
516
|
+
details: Dict[str, Any] = field(default_factory=dict)
|
|
517
|
+
timestamp: str = field(
|
|
518
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
519
|
+
)
|
|
520
|
+
correlation_id: Optional[str] = None
|
|
521
|
+
client_id: Optional[str] = None
|
|
522
|
+
user_id: Optional[str] = None
|
|
523
|
+
ip_address: Optional[str] = None
|
|
524
|
+
|
|
525
|
+
def __post_init__(self) -> None:
|
|
526
|
+
"""Auto-populate correlation_id and client_id from context if not set."""
|
|
527
|
+
if self.correlation_id is None:
|
|
528
|
+
self.correlation_id = get_correlation_id() or None
|
|
529
|
+
if self.client_id is None:
|
|
530
|
+
ctx_client = get_client_id()
|
|
531
|
+
if ctx_client and ctx_client != "anonymous":
|
|
532
|
+
self.client_id = ctx_client
|
|
533
|
+
|
|
534
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
535
|
+
"""Convert to dictionary for JSON serialization."""
|
|
536
|
+
result = {
|
|
537
|
+
"event_type": self.event_type.value,
|
|
538
|
+
"timestamp": self.timestamp,
|
|
539
|
+
"details": self.details,
|
|
540
|
+
}
|
|
541
|
+
if self.correlation_id:
|
|
542
|
+
result["correlation_id"] = self.correlation_id
|
|
543
|
+
if self.client_id:
|
|
544
|
+
result["client_id"] = self.client_id
|
|
545
|
+
if self.user_id:
|
|
546
|
+
result["user_id"] = self.user_id
|
|
547
|
+
if self.ip_address:
|
|
548
|
+
result["ip_address"] = self.ip_address
|
|
549
|
+
return result
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
class MetricsCollector:
|
|
553
|
+
"""
|
|
554
|
+
Collects and emits metrics to the standard logger and Prometheus.
|
|
555
|
+
|
|
556
|
+
Metrics are logged as structured JSON for easy parsing by
|
|
557
|
+
log aggregation systems (e.g., Datadog, Splunk, CloudWatch).
|
|
558
|
+
When Prometheus is enabled, metrics are also exported via the
|
|
559
|
+
Prometheus exporter.
|
|
560
|
+
"""
|
|
561
|
+
|
|
562
|
+
def __init__(self, prefix: str = "foundry_mcp"):
|
|
563
|
+
self.prefix = prefix
|
|
564
|
+
self._logger = logging.getLogger(f"{__name__}.metrics")
|
|
565
|
+
|
|
566
|
+
def emit(self, metric: Metric) -> None:
|
|
567
|
+
"""Emit a metric to the logger and Prometheus (if enabled).
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
metric: The Metric to emit
|
|
571
|
+
"""
|
|
572
|
+
# Always emit to structured logger
|
|
573
|
+
self._logger.info(
|
|
574
|
+
f"METRIC: {self.prefix}.{metric.name}", extra={"metric": metric.to_dict()}
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# Emit to Prometheus if enabled
|
|
578
|
+
manager = get_observability_manager()
|
|
579
|
+
if manager.is_metrics_enabled():
|
|
580
|
+
exporter = manager.get_prometheus_exporter()
|
|
581
|
+
# Map our metric types to Prometheus exporter methods
|
|
582
|
+
if metric.metric_type == MetricType.COUNTER:
|
|
583
|
+
# Prometheus counters don't support arbitrary labels easily,
|
|
584
|
+
# so we record as tool invocation if it has tool label
|
|
585
|
+
if "tool" in metric.labels:
|
|
586
|
+
exporter.record_tool_invocation(
|
|
587
|
+
metric.labels["tool"],
|
|
588
|
+
success=metric.labels.get("status") == "success",
|
|
589
|
+
)
|
|
590
|
+
elif metric.metric_type == MetricType.TIMER:
|
|
591
|
+
# Record duration via tool invocation
|
|
592
|
+
if "tool" in metric.labels:
|
|
593
|
+
exporter.record_tool_invocation(
|
|
594
|
+
metric.labels["tool"],
|
|
595
|
+
success=True,
|
|
596
|
+
duration_ms=metric.value,
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
def counter(
|
|
600
|
+
self, name: str, value: int = 1, labels: Optional[Dict[str, str]] = None
|
|
601
|
+
) -> None:
|
|
602
|
+
"""Emit a counter metric."""
|
|
603
|
+
self.emit(
|
|
604
|
+
Metric(
|
|
605
|
+
name=name,
|
|
606
|
+
value=value,
|
|
607
|
+
metric_type=MetricType.COUNTER,
|
|
608
|
+
labels=labels or {},
|
|
609
|
+
)
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
def gauge(
|
|
613
|
+
self,
|
|
614
|
+
name: str,
|
|
615
|
+
value: Union[int, float],
|
|
616
|
+
labels: Optional[Dict[str, str]] = None,
|
|
617
|
+
) -> None:
|
|
618
|
+
"""Emit a gauge metric."""
|
|
619
|
+
self.emit(
|
|
620
|
+
Metric(
|
|
621
|
+
name=name,
|
|
622
|
+
value=value,
|
|
623
|
+
metric_type=MetricType.GAUGE,
|
|
624
|
+
labels=labels or {},
|
|
625
|
+
)
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
def timer(
|
|
629
|
+
self, name: str, duration_ms: float, labels: Optional[Dict[str, str]] = None
|
|
630
|
+
) -> None:
|
|
631
|
+
"""Emit a timer metric (duration in milliseconds)."""
|
|
632
|
+
self.emit(
|
|
633
|
+
Metric(
|
|
634
|
+
name=name,
|
|
635
|
+
value=duration_ms,
|
|
636
|
+
metric_type=MetricType.TIMER,
|
|
637
|
+
labels=labels or {},
|
|
638
|
+
)
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
def histogram(
|
|
642
|
+
self,
|
|
643
|
+
name: str,
|
|
644
|
+
value: Union[int, float],
|
|
645
|
+
labels: Optional[Dict[str, str]] = None,
|
|
646
|
+
) -> None:
|
|
647
|
+
"""Emit a histogram metric for distribution tracking."""
|
|
648
|
+
self.emit(
|
|
649
|
+
Metric(
|
|
650
|
+
name=name,
|
|
651
|
+
value=value,
|
|
652
|
+
metric_type=MetricType.HISTOGRAM,
|
|
653
|
+
labels=labels or {},
|
|
654
|
+
)
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
# Global metrics collector
|
|
659
|
+
_metrics = MetricsCollector()
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def get_metrics() -> MetricsCollector:
|
|
663
|
+
"""Get the global metrics collector."""
|
|
664
|
+
return _metrics
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
class AuditLogger:
|
|
668
|
+
"""
|
|
669
|
+
Structured audit logging for security events.
|
|
670
|
+
|
|
671
|
+
Audit logs are written to a separate logger for easy filtering
|
|
672
|
+
and compliance requirements.
|
|
673
|
+
"""
|
|
674
|
+
|
|
675
|
+
def __init__(self):
|
|
676
|
+
self._logger = logging.getLogger(f"{__name__}.audit")
|
|
677
|
+
|
|
678
|
+
def log(self, event: AuditEvent) -> None:
|
|
679
|
+
"""Log an audit event."""
|
|
680
|
+
self._logger.info(
|
|
681
|
+
f"AUDIT: {event.event_type.value}", extra={"audit": event.to_dict()}
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
def auth_success(self, client_id: Optional[str] = None, **details: Any) -> None:
|
|
685
|
+
"""Log successful authentication."""
|
|
686
|
+
self.log(
|
|
687
|
+
AuditEvent(
|
|
688
|
+
event_type=AuditEventType.AUTH_SUCCESS,
|
|
689
|
+
client_id=client_id,
|
|
690
|
+
details=details,
|
|
691
|
+
)
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
def auth_failure(
|
|
695
|
+
self,
|
|
696
|
+
reason: str,
|
|
697
|
+
client_id: Optional[str] = None,
|
|
698
|
+
ip_address: Optional[str] = None,
|
|
699
|
+
**details: Any,
|
|
700
|
+
) -> None:
|
|
701
|
+
"""Log failed authentication."""
|
|
702
|
+
self.log(
|
|
703
|
+
AuditEvent(
|
|
704
|
+
event_type=AuditEventType.AUTH_FAILURE,
|
|
705
|
+
client_id=client_id,
|
|
706
|
+
ip_address=ip_address,
|
|
707
|
+
details={"reason": reason, **details},
|
|
708
|
+
)
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
def rate_limit(
|
|
712
|
+
self,
|
|
713
|
+
client_id: Optional[str] = None,
|
|
714
|
+
limit: Optional[int] = None,
|
|
715
|
+
**details: Any,
|
|
716
|
+
) -> None:
|
|
717
|
+
"""Log rate limit event."""
|
|
718
|
+
self.log(
|
|
719
|
+
AuditEvent(
|
|
720
|
+
event_type=AuditEventType.RATE_LIMIT,
|
|
721
|
+
client_id=client_id,
|
|
722
|
+
details={"limit": limit, **details},
|
|
723
|
+
)
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
def resource_access(
|
|
727
|
+
self, resource_type: str, resource_id: str, action: str = "read", **details: Any
|
|
728
|
+
) -> None:
|
|
729
|
+
"""Log resource access."""
|
|
730
|
+
self.log(
|
|
731
|
+
AuditEvent(
|
|
732
|
+
event_type=AuditEventType.RESOURCE_ACCESS,
|
|
733
|
+
details={
|
|
734
|
+
"resource_type": resource_type,
|
|
735
|
+
"resource_id": resource_id,
|
|
736
|
+
"action": action,
|
|
737
|
+
**details,
|
|
738
|
+
},
|
|
739
|
+
)
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
def tool_invocation(
|
|
743
|
+
self,
|
|
744
|
+
tool_name: str,
|
|
745
|
+
success: bool = True,
|
|
746
|
+
duration_ms: Optional[float] = None,
|
|
747
|
+
correlation_id: Optional[str] = None,
|
|
748
|
+
**details: Any,
|
|
749
|
+
) -> None:
|
|
750
|
+
"""Log tool invocation."""
|
|
751
|
+
self.log(
|
|
752
|
+
AuditEvent(
|
|
753
|
+
event_type=AuditEventType.TOOL_INVOCATION,
|
|
754
|
+
correlation_id=correlation_id,
|
|
755
|
+
details={
|
|
756
|
+
"tool": tool_name,
|
|
757
|
+
"success": success,
|
|
758
|
+
"duration_ms": duration_ms,
|
|
759
|
+
**details,
|
|
760
|
+
},
|
|
761
|
+
)
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
# Global audit logger
|
|
766
|
+
_audit = AuditLogger()
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def get_audit_logger() -> AuditLogger:
|
|
770
|
+
"""Get the global audit logger."""
|
|
771
|
+
return _audit
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def audit_log(event_type: str, **details: Any) -> None:
|
|
775
|
+
"""
|
|
776
|
+
Convenience function for audit logging.
|
|
777
|
+
|
|
778
|
+
Args:
|
|
779
|
+
event_type: Type of event (auth_success, auth_failure, rate_limit,
|
|
780
|
+
resource_access, tool_invocation, permission_denied, config_change)
|
|
781
|
+
**details: Additional details to include in the audit log
|
|
782
|
+
"""
|
|
783
|
+
try:
|
|
784
|
+
event_enum = AuditEventType(event_type)
|
|
785
|
+
except ValueError:
|
|
786
|
+
event_enum = AuditEventType.TOOL_INVOCATION
|
|
787
|
+
details["original_event_type"] = event_type
|
|
788
|
+
|
|
789
|
+
_audit.log(AuditEvent(event_type=event_enum, details=details))
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
def _record_to_metrics_persistence(
|
|
793
|
+
tool_name: str, success: bool, duration_ms: float, action: Optional[str] = None
|
|
794
|
+
) -> None:
|
|
795
|
+
"""
|
|
796
|
+
Record tool invocation to metrics persistence for dashboard visibility.
|
|
797
|
+
|
|
798
|
+
Args:
|
|
799
|
+
tool_name: Name of the tool (router)
|
|
800
|
+
success: Whether the invocation succeeded
|
|
801
|
+
duration_ms: Duration in milliseconds
|
|
802
|
+
action: Optional action name for router tools (e.g., "list", "validate")
|
|
803
|
+
|
|
804
|
+
Fails silently if metrics persistence is not configured.
|
|
805
|
+
"""
|
|
806
|
+
try:
|
|
807
|
+
from foundry_mcp.core.metrics_persistence import get_metrics_collector
|
|
808
|
+
|
|
809
|
+
collector = get_metrics_collector()
|
|
810
|
+
if collector is not None and collector._config.enabled:
|
|
811
|
+
status = "success" if success else "error"
|
|
812
|
+
labels = {"tool": tool_name, "status": status}
|
|
813
|
+
if action:
|
|
814
|
+
labels["action"] = action
|
|
815
|
+
collector.record(
|
|
816
|
+
"tool_invocations_total",
|
|
817
|
+
1.0,
|
|
818
|
+
metric_type="counter",
|
|
819
|
+
labels=labels,
|
|
820
|
+
)
|
|
821
|
+
duration_labels = {"tool": tool_name}
|
|
822
|
+
if action:
|
|
823
|
+
duration_labels["action"] = action
|
|
824
|
+
collector.record(
|
|
825
|
+
"tool_duration_ms",
|
|
826
|
+
duration_ms,
|
|
827
|
+
metric_type="gauge",
|
|
828
|
+
labels=duration_labels,
|
|
829
|
+
)
|
|
830
|
+
except Exception:
|
|
831
|
+
# Never let metrics persistence failures affect tool execution
|
|
832
|
+
pass
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
def mcp_tool(
|
|
836
|
+
tool_name: Optional[str] = None, emit_metrics: bool = True, audit: bool = True
|
|
837
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
838
|
+
"""
|
|
839
|
+
Decorator for MCP tool handlers with observability.
|
|
840
|
+
|
|
841
|
+
Automatically:
|
|
842
|
+
- Logs tool invocations
|
|
843
|
+
- Emits latency and status metrics
|
|
844
|
+
- Creates audit log entries
|
|
845
|
+
- Creates OTel spans when tracing is enabled
|
|
846
|
+
- Records Prometheus metrics when metrics are enabled
|
|
847
|
+
|
|
848
|
+
Args:
|
|
849
|
+
tool_name: Override tool name (defaults to function name)
|
|
850
|
+
emit_metrics: Whether to emit metrics
|
|
851
|
+
audit: Whether to create audit log entries
|
|
852
|
+
"""
|
|
853
|
+
|
|
854
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
855
|
+
name = tool_name or func.__name__
|
|
856
|
+
|
|
857
|
+
@functools.wraps(func)
|
|
858
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> T:
|
|
859
|
+
# Set up request context if not already set
|
|
860
|
+
existing_corr_id = get_correlation_id()
|
|
861
|
+
corr_id = existing_corr_id or generate_correlation_id(prefix="tool")
|
|
862
|
+
|
|
863
|
+
# Use context manager if we need to establish context
|
|
864
|
+
if not existing_corr_id:
|
|
865
|
+
with sync_request_context(correlation_id=corr_id):
|
|
866
|
+
return await _async_tool_impl(
|
|
867
|
+
func, name, corr_id, emit_metrics, audit, *args, **kwargs
|
|
868
|
+
)
|
|
869
|
+
else:
|
|
870
|
+
return await _async_tool_impl(
|
|
871
|
+
func, name, corr_id, emit_metrics, audit, *args, **kwargs
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
async def _async_tool_impl(
|
|
875
|
+
func: Callable[..., T],
|
|
876
|
+
name: str,
|
|
877
|
+
corr_id: str,
|
|
878
|
+
emit_metrics: bool,
|
|
879
|
+
audit: bool,
|
|
880
|
+
*args: Any,
|
|
881
|
+
**kwargs: Any,
|
|
882
|
+
) -> T:
|
|
883
|
+
start = time.perf_counter()
|
|
884
|
+
success = True
|
|
885
|
+
error_msg = None
|
|
886
|
+
|
|
887
|
+
# Get observability manager for OTel/Prometheus integration
|
|
888
|
+
manager = get_observability_manager()
|
|
889
|
+
tracer = manager.get_tracer(__name__) if manager.is_tracing_enabled() else None
|
|
890
|
+
prom_exporter = manager.get_prometheus_exporter() if manager.is_metrics_enabled() else None
|
|
891
|
+
|
|
892
|
+
# Start Prometheus active operation tracking
|
|
893
|
+
if prom_exporter:
|
|
894
|
+
prom_exporter.record_tool_start(name)
|
|
895
|
+
|
|
896
|
+
# Create OTel span if tracing enabled (with correlation_id attribute)
|
|
897
|
+
span_context = None
|
|
898
|
+
if tracer:
|
|
899
|
+
span_context = tracer.start_as_current_span(
|
|
900
|
+
f"tool:{name}",
|
|
901
|
+
attributes={
|
|
902
|
+
"tool.name": name,
|
|
903
|
+
"tool.type": "mcp_tool",
|
|
904
|
+
"request.correlation_id": corr_id,
|
|
905
|
+
},
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
try:
|
|
909
|
+
if span_context:
|
|
910
|
+
with span_context:
|
|
911
|
+
result = await func(*args, **kwargs)
|
|
912
|
+
else:
|
|
913
|
+
result = await func(*args, **kwargs)
|
|
914
|
+
return result
|
|
915
|
+
except Exception as e:
|
|
916
|
+
success = False
|
|
917
|
+
error_msg = str(e)
|
|
918
|
+
# Record error in Prometheus
|
|
919
|
+
if prom_exporter:
|
|
920
|
+
prom_exporter.record_error(name, type(e).__name__)
|
|
921
|
+
raise
|
|
922
|
+
finally:
|
|
923
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
924
|
+
|
|
925
|
+
# End Prometheus active operation tracking
|
|
926
|
+
if prom_exporter:
|
|
927
|
+
prom_exporter.record_tool_end(name)
|
|
928
|
+
prom_exporter.record_tool_invocation(
|
|
929
|
+
name, success=success, duration_ms=duration_ms
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
if emit_metrics:
|
|
933
|
+
labels = {"tool": name, "status": "success" if success else "error"}
|
|
934
|
+
_metrics.counter("tool.invocations", labels=labels)
|
|
935
|
+
_metrics.timer("tool.latency", duration_ms, labels={"tool": name})
|
|
936
|
+
|
|
937
|
+
if audit:
|
|
938
|
+
_audit.tool_invocation(
|
|
939
|
+
tool_name=name,
|
|
940
|
+
success=success,
|
|
941
|
+
duration_ms=round(duration_ms, 2),
|
|
942
|
+
error=error_msg,
|
|
943
|
+
correlation_id=corr_id,
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
# Record to metrics persistence (for dashboard visibility)
|
|
947
|
+
# Extract action from kwargs for router tools
|
|
948
|
+
action = kwargs.get("action") if isinstance(kwargs.get("action"), str) else None
|
|
949
|
+
_record_to_metrics_persistence(name, success, duration_ms, action=action)
|
|
950
|
+
|
|
951
|
+
@functools.wraps(func)
|
|
952
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> T:
|
|
953
|
+
# Set up request context if not already set
|
|
954
|
+
existing_corr_id = get_correlation_id()
|
|
955
|
+
corr_id = existing_corr_id or generate_correlation_id(prefix="tool")
|
|
956
|
+
|
|
957
|
+
# Use context manager if we need to establish context
|
|
958
|
+
if not existing_corr_id:
|
|
959
|
+
with sync_request_context(correlation_id=corr_id):
|
|
960
|
+
return _sync_tool_impl(
|
|
961
|
+
func, name, corr_id, emit_metrics, audit, args, kwargs
|
|
962
|
+
)
|
|
963
|
+
else:
|
|
964
|
+
return _sync_tool_impl(
|
|
965
|
+
func, name, corr_id, emit_metrics, audit, args, kwargs
|
|
966
|
+
)
|
|
967
|
+
|
|
968
|
+
def _sync_tool_impl(
|
|
969
|
+
_wrapped_func: Callable[..., T],
|
|
970
|
+
_tool_name: str,
|
|
971
|
+
_corr_id: str,
|
|
972
|
+
_emit_metrics: bool,
|
|
973
|
+
_do_audit: bool,
|
|
974
|
+
_args: tuple,
|
|
975
|
+
_kwargs: dict,
|
|
976
|
+
) -> T:
|
|
977
|
+
"""Internal implementation for sync tool execution.
|
|
978
|
+
|
|
979
|
+
Note: Parameter names are prefixed with underscore to avoid
|
|
980
|
+
conflicts with tool parameter names (e.g., 'name').
|
|
981
|
+
"""
|
|
982
|
+
start = time.perf_counter()
|
|
983
|
+
success = True
|
|
984
|
+
error_msg = None
|
|
985
|
+
|
|
986
|
+
# Get observability manager for OTel/Prometheus integration
|
|
987
|
+
manager = get_observability_manager()
|
|
988
|
+
tracer = manager.get_tracer(__name__) if manager.is_tracing_enabled() else None
|
|
989
|
+
prom_exporter = manager.get_prometheus_exporter() if manager.is_metrics_enabled() else None
|
|
990
|
+
|
|
991
|
+
# Start Prometheus active operation tracking
|
|
992
|
+
if prom_exporter:
|
|
993
|
+
prom_exporter.record_tool_start(_tool_name)
|
|
994
|
+
|
|
995
|
+
# Create OTel span if tracing enabled (with correlation_id attribute)
|
|
996
|
+
span_context = None
|
|
997
|
+
if tracer:
|
|
998
|
+
span_context = tracer.start_as_current_span(
|
|
999
|
+
f"tool:{_tool_name}",
|
|
1000
|
+
attributes={
|
|
1001
|
+
"tool.name": _tool_name,
|
|
1002
|
+
"tool.type": "mcp_tool",
|
|
1003
|
+
"request.correlation_id": _corr_id,
|
|
1004
|
+
},
|
|
1005
|
+
)
|
|
1006
|
+
|
|
1007
|
+
try:
|
|
1008
|
+
if span_context:
|
|
1009
|
+
with span_context:
|
|
1010
|
+
result = _wrapped_func(*_args, **_kwargs)
|
|
1011
|
+
else:
|
|
1012
|
+
result = _wrapped_func(*_args, **_kwargs)
|
|
1013
|
+
return result
|
|
1014
|
+
except Exception as e:
|
|
1015
|
+
success = False
|
|
1016
|
+
error_msg = str(e)
|
|
1017
|
+
# Record error in Prometheus
|
|
1018
|
+
if prom_exporter:
|
|
1019
|
+
prom_exporter.record_error(_tool_name, type(e).__name__)
|
|
1020
|
+
raise
|
|
1021
|
+
finally:
|
|
1022
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
1023
|
+
|
|
1024
|
+
# End Prometheus active operation tracking
|
|
1025
|
+
if prom_exporter:
|
|
1026
|
+
prom_exporter.record_tool_end(_tool_name)
|
|
1027
|
+
prom_exporter.record_tool_invocation(
|
|
1028
|
+
_tool_name, success=success, duration_ms=duration_ms
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
if _emit_metrics:
|
|
1032
|
+
labels = {"tool": _tool_name, "status": "success" if success else "error"}
|
|
1033
|
+
_metrics.counter("tool.invocations", labels=labels)
|
|
1034
|
+
_metrics.timer("tool.latency", duration_ms, labels={"tool": _tool_name})
|
|
1035
|
+
|
|
1036
|
+
if _do_audit:
|
|
1037
|
+
_audit.tool_invocation(
|
|
1038
|
+
tool_name=_tool_name,
|
|
1039
|
+
success=success,
|
|
1040
|
+
duration_ms=round(duration_ms, 2),
|
|
1041
|
+
error=error_msg,
|
|
1042
|
+
correlation_id=_corr_id,
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
# Record to metrics persistence (for dashboard visibility)
|
|
1046
|
+
# Extract action from kwargs for router tools
|
|
1047
|
+
action = _kwargs.get("action") if isinstance(_kwargs.get("action"), str) else None
|
|
1048
|
+
_record_to_metrics_persistence(_tool_name, success, duration_ms, action=action)
|
|
1049
|
+
|
|
1050
|
+
# Return appropriate wrapper based on whether func is async
|
|
1051
|
+
import asyncio
|
|
1052
|
+
|
|
1053
|
+
if asyncio.iscoroutinefunction(func):
|
|
1054
|
+
return async_wrapper
|
|
1055
|
+
return sync_wrapper
|
|
1056
|
+
|
|
1057
|
+
return decorator
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
def mcp_resource(
|
|
1061
|
+
resource_type: Optional[str] = None, emit_metrics: bool = True, audit: bool = True
|
|
1062
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
1063
|
+
"""
|
|
1064
|
+
Decorator for MCP resource handlers with observability.
|
|
1065
|
+
|
|
1066
|
+
Automatically:
|
|
1067
|
+
- Logs resource access
|
|
1068
|
+
- Emits latency and status metrics
|
|
1069
|
+
- Creates audit log entries
|
|
1070
|
+
- Creates OTel spans when tracing is enabled
|
|
1071
|
+
- Records Prometheus metrics when metrics are enabled
|
|
1072
|
+
|
|
1073
|
+
Args:
|
|
1074
|
+
resource_type: Type of resource (e.g., "spec", "journal")
|
|
1075
|
+
emit_metrics: Whether to emit metrics
|
|
1076
|
+
audit: Whether to create audit log entries
|
|
1077
|
+
"""
|
|
1078
|
+
|
|
1079
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
1080
|
+
rtype = resource_type or "resource"
|
|
1081
|
+
|
|
1082
|
+
@functools.wraps(func)
|
|
1083
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> T:
|
|
1084
|
+
start = time.perf_counter()
|
|
1085
|
+
success = True
|
|
1086
|
+
error_msg = None
|
|
1087
|
+
resource_id = kwargs.get("spec_id") or kwargs.get("id") or "unknown"
|
|
1088
|
+
|
|
1089
|
+
# Get observability manager for OTel/Prometheus integration
|
|
1090
|
+
manager = get_observability_manager()
|
|
1091
|
+
tracer = manager.get_tracer(__name__) if manager.is_tracing_enabled() else None
|
|
1092
|
+
prom_exporter = manager.get_prometheus_exporter() if manager.is_metrics_enabled() else None
|
|
1093
|
+
|
|
1094
|
+
# Create OTel span if tracing enabled
|
|
1095
|
+
span_context = None
|
|
1096
|
+
if tracer:
|
|
1097
|
+
span_context = tracer.start_as_current_span(
|
|
1098
|
+
f"resource:{rtype}",
|
|
1099
|
+
attributes={
|
|
1100
|
+
"resource.type": rtype,
|
|
1101
|
+
"resource.id": str(resource_id),
|
|
1102
|
+
},
|
|
1103
|
+
)
|
|
1104
|
+
|
|
1105
|
+
try:
|
|
1106
|
+
if span_context:
|
|
1107
|
+
with span_context:
|
|
1108
|
+
result = await func(*args, **kwargs)
|
|
1109
|
+
else:
|
|
1110
|
+
result = await func(*args, **kwargs)
|
|
1111
|
+
return result
|
|
1112
|
+
except Exception as e:
|
|
1113
|
+
success = False
|
|
1114
|
+
error_msg = str(e)
|
|
1115
|
+
# Record error in Prometheus
|
|
1116
|
+
if prom_exporter:
|
|
1117
|
+
prom_exporter.record_error(f"resource:{rtype}", type(e).__name__)
|
|
1118
|
+
raise
|
|
1119
|
+
finally:
|
|
1120
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
1121
|
+
|
|
1122
|
+
# Record Prometheus resource access
|
|
1123
|
+
if prom_exporter:
|
|
1124
|
+
prom_exporter.record_resource_access(rtype, "read")
|
|
1125
|
+
|
|
1126
|
+
if emit_metrics:
|
|
1127
|
+
labels = {
|
|
1128
|
+
"resource_type": rtype,
|
|
1129
|
+
"status": "success" if success else "error",
|
|
1130
|
+
}
|
|
1131
|
+
_metrics.counter("resource.access", labels=labels)
|
|
1132
|
+
_metrics.timer(
|
|
1133
|
+
"resource.latency", duration_ms, labels={"resource_type": rtype}
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
if audit:
|
|
1137
|
+
_audit.resource_access(
|
|
1138
|
+
resource_type=rtype,
|
|
1139
|
+
resource_id=str(resource_id),
|
|
1140
|
+
action="read",
|
|
1141
|
+
success=success,
|
|
1142
|
+
duration_ms=round(duration_ms, 2),
|
|
1143
|
+
error=error_msg,
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
@functools.wraps(func)
|
|
1147
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> T:
|
|
1148
|
+
start = time.perf_counter()
|
|
1149
|
+
success = True
|
|
1150
|
+
error_msg = None
|
|
1151
|
+
resource_id = kwargs.get("spec_id") or kwargs.get("id") or "unknown"
|
|
1152
|
+
|
|
1153
|
+
# Get observability manager for OTel/Prometheus integration
|
|
1154
|
+
manager = get_observability_manager()
|
|
1155
|
+
tracer = manager.get_tracer(__name__) if manager.is_tracing_enabled() else None
|
|
1156
|
+
prom_exporter = manager.get_prometheus_exporter() if manager.is_metrics_enabled() else None
|
|
1157
|
+
|
|
1158
|
+
# Create OTel span if tracing enabled
|
|
1159
|
+
span_context = None
|
|
1160
|
+
if tracer:
|
|
1161
|
+
span_context = tracer.start_as_current_span(
|
|
1162
|
+
f"resource:{rtype}",
|
|
1163
|
+
attributes={
|
|
1164
|
+
"resource.type": rtype,
|
|
1165
|
+
"resource.id": str(resource_id),
|
|
1166
|
+
},
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
try:
|
|
1170
|
+
if span_context:
|
|
1171
|
+
with span_context:
|
|
1172
|
+
result = func(*args, **kwargs)
|
|
1173
|
+
else:
|
|
1174
|
+
result = func(*args, **kwargs)
|
|
1175
|
+
return result
|
|
1176
|
+
except Exception as e:
|
|
1177
|
+
success = False
|
|
1178
|
+
error_msg = str(e)
|
|
1179
|
+
# Record error in Prometheus
|
|
1180
|
+
if prom_exporter:
|
|
1181
|
+
prom_exporter.record_error(f"resource:{rtype}", type(e).__name__)
|
|
1182
|
+
raise
|
|
1183
|
+
finally:
|
|
1184
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
1185
|
+
|
|
1186
|
+
# Record Prometheus resource access
|
|
1187
|
+
if prom_exporter:
|
|
1188
|
+
prom_exporter.record_resource_access(rtype, "read")
|
|
1189
|
+
|
|
1190
|
+
if emit_metrics:
|
|
1191
|
+
labels = {
|
|
1192
|
+
"resource_type": rtype,
|
|
1193
|
+
"status": "success" if success else "error",
|
|
1194
|
+
}
|
|
1195
|
+
_metrics.counter("resource.access", labels=labels)
|
|
1196
|
+
_metrics.timer(
|
|
1197
|
+
"resource.latency", duration_ms, labels={"resource_type": rtype}
|
|
1198
|
+
)
|
|
1199
|
+
|
|
1200
|
+
if audit:
|
|
1201
|
+
_audit.resource_access(
|
|
1202
|
+
resource_type=rtype,
|
|
1203
|
+
resource_id=str(resource_id),
|
|
1204
|
+
action="read",
|
|
1205
|
+
success=success,
|
|
1206
|
+
duration_ms=round(duration_ms, 2),
|
|
1207
|
+
error=error_msg,
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1210
|
+
import asyncio
|
|
1211
|
+
|
|
1212
|
+
if asyncio.iscoroutinefunction(func):
|
|
1213
|
+
return async_wrapper
|
|
1214
|
+
return sync_wrapper
|
|
1215
|
+
|
|
1216
|
+
return decorator
|