foundry-mcp 0.8.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of foundry-mcp might be problematic. Click here for more details.
- foundry_mcp/__init__.py +13 -0
- foundry_mcp/cli/__init__.py +67 -0
- foundry_mcp/cli/__main__.py +9 -0
- foundry_mcp/cli/agent.py +96 -0
- foundry_mcp/cli/commands/__init__.py +37 -0
- foundry_mcp/cli/commands/cache.py +137 -0
- foundry_mcp/cli/commands/dashboard.py +148 -0
- foundry_mcp/cli/commands/dev.py +446 -0
- foundry_mcp/cli/commands/journal.py +377 -0
- foundry_mcp/cli/commands/lifecycle.py +274 -0
- foundry_mcp/cli/commands/modify.py +824 -0
- foundry_mcp/cli/commands/plan.py +640 -0
- foundry_mcp/cli/commands/pr.py +393 -0
- foundry_mcp/cli/commands/review.py +667 -0
- foundry_mcp/cli/commands/session.py +472 -0
- foundry_mcp/cli/commands/specs.py +686 -0
- foundry_mcp/cli/commands/tasks.py +807 -0
- foundry_mcp/cli/commands/testing.py +676 -0
- foundry_mcp/cli/commands/validate.py +982 -0
- foundry_mcp/cli/config.py +98 -0
- foundry_mcp/cli/context.py +298 -0
- foundry_mcp/cli/logging.py +212 -0
- foundry_mcp/cli/main.py +44 -0
- foundry_mcp/cli/output.py +122 -0
- foundry_mcp/cli/registry.py +110 -0
- foundry_mcp/cli/resilience.py +178 -0
- foundry_mcp/cli/transcript.py +217 -0
- foundry_mcp/config.py +1454 -0
- foundry_mcp/core/__init__.py +144 -0
- foundry_mcp/core/ai_consultation.py +1773 -0
- foundry_mcp/core/batch_operations.py +1202 -0
- foundry_mcp/core/cache.py +195 -0
- foundry_mcp/core/capabilities.py +446 -0
- foundry_mcp/core/concurrency.py +898 -0
- foundry_mcp/core/context.py +540 -0
- foundry_mcp/core/discovery.py +1603 -0
- foundry_mcp/core/error_collection.py +728 -0
- foundry_mcp/core/error_store.py +592 -0
- foundry_mcp/core/health.py +749 -0
- foundry_mcp/core/intake.py +933 -0
- foundry_mcp/core/journal.py +700 -0
- foundry_mcp/core/lifecycle.py +412 -0
- foundry_mcp/core/llm_config.py +1376 -0
- foundry_mcp/core/llm_patterns.py +510 -0
- foundry_mcp/core/llm_provider.py +1569 -0
- foundry_mcp/core/logging_config.py +374 -0
- foundry_mcp/core/metrics_persistence.py +584 -0
- foundry_mcp/core/metrics_registry.py +327 -0
- foundry_mcp/core/metrics_store.py +641 -0
- foundry_mcp/core/modifications.py +224 -0
- foundry_mcp/core/naming.py +146 -0
- foundry_mcp/core/observability.py +1216 -0
- foundry_mcp/core/otel.py +452 -0
- foundry_mcp/core/otel_stubs.py +264 -0
- foundry_mcp/core/pagination.py +255 -0
- foundry_mcp/core/progress.py +387 -0
- foundry_mcp/core/prometheus.py +564 -0
- foundry_mcp/core/prompts/__init__.py +464 -0
- foundry_mcp/core/prompts/fidelity_review.py +691 -0
- foundry_mcp/core/prompts/markdown_plan_review.py +515 -0
- foundry_mcp/core/prompts/plan_review.py +627 -0
- foundry_mcp/core/providers/__init__.py +237 -0
- foundry_mcp/core/providers/base.py +515 -0
- foundry_mcp/core/providers/claude.py +472 -0
- foundry_mcp/core/providers/codex.py +637 -0
- foundry_mcp/core/providers/cursor_agent.py +630 -0
- foundry_mcp/core/providers/detectors.py +515 -0
- foundry_mcp/core/providers/gemini.py +426 -0
- foundry_mcp/core/providers/opencode.py +718 -0
- foundry_mcp/core/providers/opencode_wrapper.js +308 -0
- foundry_mcp/core/providers/package-lock.json +24 -0
- foundry_mcp/core/providers/package.json +25 -0
- foundry_mcp/core/providers/registry.py +607 -0
- foundry_mcp/core/providers/test_provider.py +171 -0
- foundry_mcp/core/providers/validation.py +857 -0
- foundry_mcp/core/rate_limit.py +427 -0
- foundry_mcp/core/research/__init__.py +68 -0
- foundry_mcp/core/research/memory.py +528 -0
- foundry_mcp/core/research/models.py +1234 -0
- foundry_mcp/core/research/providers/__init__.py +40 -0
- foundry_mcp/core/research/providers/base.py +242 -0
- foundry_mcp/core/research/providers/google.py +507 -0
- foundry_mcp/core/research/providers/perplexity.py +442 -0
- foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
- foundry_mcp/core/research/providers/tavily.py +383 -0
- foundry_mcp/core/research/workflows/__init__.py +25 -0
- foundry_mcp/core/research/workflows/base.py +298 -0
- foundry_mcp/core/research/workflows/chat.py +271 -0
- foundry_mcp/core/research/workflows/consensus.py +539 -0
- foundry_mcp/core/research/workflows/deep_research.py +4142 -0
- foundry_mcp/core/research/workflows/ideate.py +682 -0
- foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
- foundry_mcp/core/resilience.py +600 -0
- foundry_mcp/core/responses.py +1624 -0
- foundry_mcp/core/review.py +366 -0
- foundry_mcp/core/security.py +438 -0
- foundry_mcp/core/spec.py +4119 -0
- foundry_mcp/core/task.py +2463 -0
- foundry_mcp/core/testing.py +839 -0
- foundry_mcp/core/validation.py +2357 -0
- foundry_mcp/dashboard/__init__.py +32 -0
- foundry_mcp/dashboard/app.py +119 -0
- foundry_mcp/dashboard/components/__init__.py +17 -0
- foundry_mcp/dashboard/components/cards.py +88 -0
- foundry_mcp/dashboard/components/charts.py +177 -0
- foundry_mcp/dashboard/components/filters.py +136 -0
- foundry_mcp/dashboard/components/tables.py +195 -0
- foundry_mcp/dashboard/data/__init__.py +11 -0
- foundry_mcp/dashboard/data/stores.py +433 -0
- foundry_mcp/dashboard/launcher.py +300 -0
- foundry_mcp/dashboard/views/__init__.py +12 -0
- foundry_mcp/dashboard/views/errors.py +217 -0
- foundry_mcp/dashboard/views/metrics.py +164 -0
- foundry_mcp/dashboard/views/overview.py +96 -0
- foundry_mcp/dashboard/views/providers.py +83 -0
- foundry_mcp/dashboard/views/sdd_workflow.py +255 -0
- foundry_mcp/dashboard/views/tool_usage.py +139 -0
- foundry_mcp/prompts/__init__.py +9 -0
- foundry_mcp/prompts/workflows.py +525 -0
- foundry_mcp/resources/__init__.py +9 -0
- foundry_mcp/resources/specs.py +591 -0
- foundry_mcp/schemas/__init__.py +38 -0
- foundry_mcp/schemas/intake-schema.json +89 -0
- foundry_mcp/schemas/sdd-spec-schema.json +414 -0
- foundry_mcp/server.py +150 -0
- foundry_mcp/tools/__init__.py +10 -0
- foundry_mcp/tools/unified/__init__.py +92 -0
- foundry_mcp/tools/unified/authoring.py +3620 -0
- foundry_mcp/tools/unified/context_helpers.py +98 -0
- foundry_mcp/tools/unified/documentation_helpers.py +268 -0
- foundry_mcp/tools/unified/environment.py +1341 -0
- foundry_mcp/tools/unified/error.py +479 -0
- foundry_mcp/tools/unified/health.py +225 -0
- foundry_mcp/tools/unified/journal.py +841 -0
- foundry_mcp/tools/unified/lifecycle.py +640 -0
- foundry_mcp/tools/unified/metrics.py +777 -0
- foundry_mcp/tools/unified/plan.py +876 -0
- foundry_mcp/tools/unified/pr.py +294 -0
- foundry_mcp/tools/unified/provider.py +589 -0
- foundry_mcp/tools/unified/research.py +1283 -0
- foundry_mcp/tools/unified/review.py +1042 -0
- foundry_mcp/tools/unified/review_helpers.py +314 -0
- foundry_mcp/tools/unified/router.py +102 -0
- foundry_mcp/tools/unified/server.py +565 -0
- foundry_mcp/tools/unified/spec.py +1283 -0
- foundry_mcp/tools/unified/task.py +3846 -0
- foundry_mcp/tools/unified/test.py +431 -0
- foundry_mcp/tools/unified/verification.py +520 -0
- foundry_mcp-0.8.22.dist-info/METADATA +344 -0
- foundry_mcp-0.8.22.dist-info/RECORD +153 -0
- foundry_mcp-0.8.22.dist-info/WHEEL +4 -0
- foundry_mcp-0.8.22.dist-info/entry_points.txt +3 -0
- foundry_mcp-0.8.22.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
"""Prometheus metrics integration with graceful degradation.
|
|
2
|
+
|
|
3
|
+
This module provides Prometheus metrics integration that gracefully falls back
|
|
4
|
+
to no-op operations when the optional prometheus_client dependency is not installed.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from foundry_mcp.core.prometheus import get_prometheus_exporter
|
|
8
|
+
|
|
9
|
+
exporter = get_prometheus_exporter()
|
|
10
|
+
exporter.record_tool_invocation("list_specs", success=True, duration_ms=45.2)
|
|
11
|
+
|
|
12
|
+
# Optionally start HTTP server for /metrics endpoint
|
|
13
|
+
exporter.start_server(port=9090)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import threading
|
|
20
|
+
import time
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import Any, Callable, Optional, TypeVar
|
|
23
|
+
|
|
24
|
+
# Try to import prometheus_client
|
|
25
|
+
try:
|
|
26
|
+
from prometheus_client import (
|
|
27
|
+
REGISTRY,
|
|
28
|
+
Counter,
|
|
29
|
+
Gauge,
|
|
30
|
+
Histogram,
|
|
31
|
+
start_http_server,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
_PROMETHEUS_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
_PROMETHEUS_AVAILABLE = False
|
|
37
|
+
|
|
38
|
+
# Placeholders so type checkers don't complain.
|
|
39
|
+
Counter: Any = None
|
|
40
|
+
Gauge: Any = None
|
|
41
|
+
Histogram: Any = None
|
|
42
|
+
REGISTRY: Any = None
|
|
43
|
+
start_http_server: Any = None
|
|
44
|
+
|
|
45
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# =============================================================================
|
|
49
|
+
# Configuration
|
|
50
|
+
# =============================================================================
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class PrometheusConfig:
|
|
55
|
+
"""Configuration for Prometheus metrics.
|
|
56
|
+
|
|
57
|
+
Attributes:
|
|
58
|
+
enabled: Whether Prometheus metrics are enabled
|
|
59
|
+
port: HTTP server port for /metrics endpoint (0 = no server)
|
|
60
|
+
host: HTTP server host
|
|
61
|
+
namespace: Metric namespace prefix
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
enabled: bool = False
|
|
65
|
+
port: int = 0 # 0 means don't start HTTP server
|
|
66
|
+
host: str = "0.0.0.0"
|
|
67
|
+
namespace: str = "foundry_mcp"
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def from_env_and_config(
|
|
71
|
+
cls,
|
|
72
|
+
config: Optional[dict[str, Any]] = None,
|
|
73
|
+
) -> "PrometheusConfig":
|
|
74
|
+
"""Load configuration from environment variables and optional config dict.
|
|
75
|
+
|
|
76
|
+
Environment variables take precedence over config dict values.
|
|
77
|
+
|
|
78
|
+
Env vars:
|
|
79
|
+
PROMETHEUS_ENABLED: "true" or "1" to enable
|
|
80
|
+
PROMETHEUS_PORT: HTTP server port (0 = no server)
|
|
81
|
+
PROMETHEUS_HOST: HTTP server host
|
|
82
|
+
PROMETHEUS_NAMESPACE: Metric namespace
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
config: Optional dict with config values (typically from TOML)
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
PrometheusConfig instance
|
|
89
|
+
"""
|
|
90
|
+
config = config or {}
|
|
91
|
+
|
|
92
|
+
# Parse enabled from env or config
|
|
93
|
+
env_enabled = os.environ.get("PROMETHEUS_ENABLED", "").lower()
|
|
94
|
+
if env_enabled:
|
|
95
|
+
enabled = env_enabled in ("true", "1", "yes")
|
|
96
|
+
else:
|
|
97
|
+
enabled = config.get("enabled", False)
|
|
98
|
+
|
|
99
|
+
# Parse port
|
|
100
|
+
port_str = os.environ.get("PROMETHEUS_PORT")
|
|
101
|
+
if port_str:
|
|
102
|
+
try:
|
|
103
|
+
port = int(port_str)
|
|
104
|
+
except ValueError:
|
|
105
|
+
port = 0
|
|
106
|
+
else:
|
|
107
|
+
port = config.get("port", 0)
|
|
108
|
+
|
|
109
|
+
# Parse host
|
|
110
|
+
host = os.environ.get(
|
|
111
|
+
"PROMETHEUS_HOST",
|
|
112
|
+
config.get("host", "0.0.0.0"),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Parse namespace
|
|
116
|
+
namespace = os.environ.get(
|
|
117
|
+
"PROMETHEUS_NAMESPACE",
|
|
118
|
+
config.get("namespace", "foundry_mcp"),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return cls(
|
|
122
|
+
enabled=enabled,
|
|
123
|
+
port=port,
|
|
124
|
+
host=host,
|
|
125
|
+
namespace=namespace,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# =============================================================================
|
|
130
|
+
# Prometheus Exporter
|
|
131
|
+
# =============================================================================
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class PrometheusExporter:
|
|
135
|
+
"""Prometheus metrics exporter with graceful degradation.
|
|
136
|
+
|
|
137
|
+
When prometheus_client is not installed or metrics are disabled,
|
|
138
|
+
all methods become no-ops that silently do nothing.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
def __init__(self, config: Optional[PrometheusConfig] = None) -> None:
|
|
142
|
+
"""Initialize the exporter.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
config: Prometheus configuration. If None, loads from env/defaults.
|
|
146
|
+
"""
|
|
147
|
+
self._config = config or PrometheusConfig.from_env_and_config()
|
|
148
|
+
self._initialized = False
|
|
149
|
+
self._server_started = False
|
|
150
|
+
self._lock = threading.Lock()
|
|
151
|
+
|
|
152
|
+
# Metric instances (set during initialization)
|
|
153
|
+
self._tool_invocations: Any = None
|
|
154
|
+
self._tool_duration: Any = None
|
|
155
|
+
self._tool_errors: Any = None
|
|
156
|
+
self._resource_access: Any = None
|
|
157
|
+
self._active_operations: Any = None
|
|
158
|
+
|
|
159
|
+
# Manifest/discovery metrics
|
|
160
|
+
self._manifest_tokens: Any = None
|
|
161
|
+
self._manifest_tool_count: Any = None
|
|
162
|
+
|
|
163
|
+
# Health check metrics
|
|
164
|
+
self._health_status: Any = None
|
|
165
|
+
self._dependency_health: Any = None
|
|
166
|
+
self._health_check_duration: Any = None
|
|
167
|
+
|
|
168
|
+
# Auto-initialize if enabled
|
|
169
|
+
if self.is_enabled():
|
|
170
|
+
self._initialize_metrics()
|
|
171
|
+
|
|
172
|
+
def is_available(self) -> bool:
|
|
173
|
+
"""Check if prometheus_client is installed."""
|
|
174
|
+
return _PROMETHEUS_AVAILABLE
|
|
175
|
+
|
|
176
|
+
def is_enabled(self) -> bool:
|
|
177
|
+
"""Check if Prometheus metrics are enabled and available."""
|
|
178
|
+
return self._config.enabled and _PROMETHEUS_AVAILABLE
|
|
179
|
+
|
|
180
|
+
def _initialize_metrics(self) -> None:
|
|
181
|
+
"""Initialize Prometheus metric instances."""
|
|
182
|
+
if self._initialized or not self.is_enabled():
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
with self._lock:
|
|
186
|
+
if self._initialized:
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
ns = self._config.namespace
|
|
190
|
+
|
|
191
|
+
# Tool invocation counter
|
|
192
|
+
self._tool_invocations = Counter(
|
|
193
|
+
f"{ns}_tool_invocations_total",
|
|
194
|
+
"Total number of tool invocations",
|
|
195
|
+
["tool", "status"],
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Tool duration histogram
|
|
199
|
+
self._tool_duration = Histogram(
|
|
200
|
+
f"{ns}_tool_duration_seconds",
|
|
201
|
+
"Tool execution duration in seconds",
|
|
202
|
+
["tool"],
|
|
203
|
+
buckets=(0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0),
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Tool error counter
|
|
207
|
+
self._tool_errors = Counter(
|
|
208
|
+
f"{ns}_tool_errors_total",
|
|
209
|
+
"Total number of tool errors",
|
|
210
|
+
["tool", "error_type"],
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Resource access counter
|
|
214
|
+
self._resource_access = Counter(
|
|
215
|
+
f"{ns}_resource_access_total",
|
|
216
|
+
"Total number of resource accesses",
|
|
217
|
+
["resource_type", "action"],
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Active operations gauge
|
|
221
|
+
self._active_operations = Gauge(
|
|
222
|
+
f"{ns}_active_operations",
|
|
223
|
+
"Number of currently active operations",
|
|
224
|
+
["operation_type"],
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Manifest/discovery gauges
|
|
228
|
+
self._manifest_tokens = Gauge(
|
|
229
|
+
f"{ns}_manifest_tokens",
|
|
230
|
+
"Estimated token count for the advertised tool manifest",
|
|
231
|
+
["manifest"], # unified|legacy
|
|
232
|
+
)
|
|
233
|
+
self._manifest_tool_count = Gauge(
|
|
234
|
+
f"{ns}_manifest_tool_count",
|
|
235
|
+
"Tool count for the advertised tool manifest",
|
|
236
|
+
["manifest"], # unified|legacy
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Health check metrics
|
|
240
|
+
self._health_status = Gauge(
|
|
241
|
+
f"{ns}_health_status",
|
|
242
|
+
"Current health status (0=unhealthy, 1=degraded, 2=healthy)",
|
|
243
|
+
["check_type"], # liveness, readiness, health
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
self._dependency_health = Gauge(
|
|
247
|
+
f"{ns}_dependency_health",
|
|
248
|
+
"Dependency health status (0=unhealthy, 1=healthy)",
|
|
249
|
+
["dependency"],
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
self._health_check_duration = Histogram(
|
|
253
|
+
f"{ns}_health_check_duration_seconds",
|
|
254
|
+
"Health check duration in seconds",
|
|
255
|
+
["check_type"],
|
|
256
|
+
buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0),
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
self._initialized = True
|
|
260
|
+
|
|
261
|
+
def start_server(
|
|
262
|
+
self, port: Optional[int] = None, host: Optional[str] = None
|
|
263
|
+
) -> bool:
|
|
264
|
+
"""Start the HTTP server for /metrics endpoint.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
port: Override port from config
|
|
268
|
+
host: Override host from config
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
True if server started, False if already running or not enabled
|
|
272
|
+
"""
|
|
273
|
+
if not self.is_enabled():
|
|
274
|
+
return False
|
|
275
|
+
|
|
276
|
+
if self._server_started:
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
with self._lock:
|
|
280
|
+
if self._server_started:
|
|
281
|
+
return False
|
|
282
|
+
|
|
283
|
+
actual_port = port or self._config.port
|
|
284
|
+
if actual_port <= 0:
|
|
285
|
+
return False
|
|
286
|
+
|
|
287
|
+
actual_host = host or self._config.host
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
start_http_server(actual_port, addr=actual_host)
|
|
291
|
+
self._server_started = True
|
|
292
|
+
return True
|
|
293
|
+
except Exception:
|
|
294
|
+
return False
|
|
295
|
+
|
|
296
|
+
def record_tool_invocation(
|
|
297
|
+
self,
|
|
298
|
+
tool_name: str,
|
|
299
|
+
*,
|
|
300
|
+
success: bool = True,
|
|
301
|
+
duration_ms: Optional[float] = None,
|
|
302
|
+
) -> None:
|
|
303
|
+
"""Record a tool invocation.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
tool_name: Name of the tool
|
|
307
|
+
success: Whether the invocation was successful
|
|
308
|
+
duration_ms: Duration in milliseconds (optional)
|
|
309
|
+
"""
|
|
310
|
+
if not self.is_enabled():
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
status = "success" if success else "error"
|
|
314
|
+
self._tool_invocations.labels(tool=tool_name, status=status).inc()
|
|
315
|
+
|
|
316
|
+
if duration_ms is not None:
|
|
317
|
+
# Convert ms to seconds for Prometheus conventions
|
|
318
|
+
self._tool_duration.labels(tool=tool_name).observe(duration_ms / 1000.0)
|
|
319
|
+
|
|
320
|
+
def record_tool_start(self, tool_name: str) -> None:
|
|
321
|
+
"""Record tool execution start (increment active operations).
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
tool_name: Name of the tool
|
|
325
|
+
"""
|
|
326
|
+
if not self.is_enabled():
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
self._active_operations.labels(operation_type=f"tool:{tool_name}").inc()
|
|
330
|
+
|
|
331
|
+
def record_tool_end(self, tool_name: str) -> None:
|
|
332
|
+
"""Record tool execution end (decrement active operations).
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
tool_name: Name of the tool
|
|
336
|
+
"""
|
|
337
|
+
if not self.is_enabled():
|
|
338
|
+
return
|
|
339
|
+
|
|
340
|
+
self._active_operations.labels(operation_type=f"tool:{tool_name}").dec()
|
|
341
|
+
|
|
342
|
+
def record_resource_access(
|
|
343
|
+
self,
|
|
344
|
+
resource_type: str,
|
|
345
|
+
action: str = "read",
|
|
346
|
+
) -> None:
|
|
347
|
+
"""Record a resource access.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
resource_type: Type of resource (e.g., "spec", "task", "journal")
|
|
351
|
+
action: Action performed (e.g., "read", "write", "delete")
|
|
352
|
+
"""
|
|
353
|
+
if not self.is_enabled():
|
|
354
|
+
return
|
|
355
|
+
|
|
356
|
+
self._resource_access.labels(resource_type=resource_type, action=action).inc()
|
|
357
|
+
|
|
358
|
+
def record_error(
|
|
359
|
+
self,
|
|
360
|
+
tool_name: str,
|
|
361
|
+
error_type: str = "unknown",
|
|
362
|
+
) -> None:
|
|
363
|
+
"""Record a tool error.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
tool_name: Name of the tool
|
|
367
|
+
error_type: Type/category of error
|
|
368
|
+
"""
|
|
369
|
+
if not self.is_enabled():
|
|
370
|
+
return
|
|
371
|
+
|
|
372
|
+
self._tool_errors.labels(tool=tool_name, error_type=error_type).inc()
|
|
373
|
+
|
|
374
|
+
# -------------------------------------------------------------------------
|
|
375
|
+
# Manifest/Discovery Metrics
|
|
376
|
+
# -------------------------------------------------------------------------
|
|
377
|
+
|
|
378
|
+
def record_manifest_snapshot(
|
|
379
|
+
self,
|
|
380
|
+
*,
|
|
381
|
+
manifest: str,
|
|
382
|
+
tokens: int,
|
|
383
|
+
tool_count: int,
|
|
384
|
+
) -> None:
|
|
385
|
+
"""Record a manifest snapshot (token count + tool count)."""
|
|
386
|
+
if not self.is_enabled():
|
|
387
|
+
return
|
|
388
|
+
|
|
389
|
+
manifest_label = manifest or "unknown"
|
|
390
|
+
self._manifest_tokens.labels(manifest=manifest_label).set(int(tokens))
|
|
391
|
+
self._manifest_tool_count.labels(manifest=manifest_label).set(int(tool_count))
|
|
392
|
+
|
|
393
|
+
# -------------------------------------------------------------------------
|
|
394
|
+
# Health Check Metrics
|
|
395
|
+
# -------------------------------------------------------------------------
|
|
396
|
+
|
|
397
|
+
def record_health_check(
|
|
398
|
+
self,
|
|
399
|
+
check_type: str,
|
|
400
|
+
status: int,
|
|
401
|
+
duration_seconds: Optional[float] = None,
|
|
402
|
+
) -> None:
|
|
403
|
+
"""Record a health check result.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
check_type: Type of check (liveness, readiness, health)
|
|
407
|
+
status: Health status (0=unhealthy, 1=degraded, 2=healthy)
|
|
408
|
+
duration_seconds: Optional duration of the check in seconds
|
|
409
|
+
"""
|
|
410
|
+
if not self.is_enabled():
|
|
411
|
+
return
|
|
412
|
+
|
|
413
|
+
self._health_status.labels(check_type=check_type).set(status)
|
|
414
|
+
|
|
415
|
+
if duration_seconds is not None:
|
|
416
|
+
self._health_check_duration.labels(check_type=check_type).observe(
|
|
417
|
+
duration_seconds
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
def record_dependency_health(
|
|
421
|
+
self,
|
|
422
|
+
dependency: str,
|
|
423
|
+
healthy: bool,
|
|
424
|
+
) -> None:
|
|
425
|
+
"""Record dependency health status.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
dependency: Name of the dependency (e.g., specs_dir, otel, prometheus)
|
|
429
|
+
healthy: Whether the dependency is healthy
|
|
430
|
+
"""
|
|
431
|
+
if not self.is_enabled():
|
|
432
|
+
return
|
|
433
|
+
|
|
434
|
+
self._dependency_health.labels(dependency=dependency).set(1 if healthy else 0)
|
|
435
|
+
|
|
436
|
+
def record_health_check_batch(
|
|
437
|
+
self,
|
|
438
|
+
check_type: str,
|
|
439
|
+
status: int,
|
|
440
|
+
dependencies: dict[str, bool],
|
|
441
|
+
duration_seconds: Optional[float] = None,
|
|
442
|
+
) -> None:
|
|
443
|
+
"""Record a complete health check with all dependencies.
|
|
444
|
+
|
|
445
|
+
Convenience method to record overall status and all dependency statuses.
|
|
446
|
+
|
|
447
|
+
Args:
|
|
448
|
+
check_type: Type of check (liveness, readiness, health)
|
|
449
|
+
status: Health status (0=unhealthy, 1=degraded, 2=healthy)
|
|
450
|
+
dependencies: Dict mapping dependency name to healthy status
|
|
451
|
+
duration_seconds: Optional duration of the check in seconds
|
|
452
|
+
"""
|
|
453
|
+
if not self.is_enabled():
|
|
454
|
+
return
|
|
455
|
+
|
|
456
|
+
# Record overall status
|
|
457
|
+
self.record_health_check(check_type, status, duration_seconds)
|
|
458
|
+
|
|
459
|
+
# Record each dependency
|
|
460
|
+
for dep_name, is_healthy in dependencies.items():
|
|
461
|
+
self.record_dependency_health(dep_name, is_healthy)
|
|
462
|
+
|
|
463
|
+
def get_config(self) -> PrometheusConfig:
|
|
464
|
+
"""Get the current configuration."""
|
|
465
|
+
return self._config
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
# =============================================================================
|
|
469
|
+
# Singleton Instance
|
|
470
|
+
# =============================================================================
|
|
471
|
+
|
|
472
|
+
_exporter: Optional[PrometheusExporter] = None
|
|
473
|
+
_exporter_lock = threading.Lock()
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def get_prometheus_exporter(
|
|
477
|
+
config: Optional[PrometheusConfig] = None,
|
|
478
|
+
) -> PrometheusExporter:
|
|
479
|
+
"""Get the singleton Prometheus exporter instance.
|
|
480
|
+
|
|
481
|
+
On first call, initializes with provided config or defaults.
|
|
482
|
+
Subsequent calls return the same instance (config parameter ignored).
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
config: Optional configuration (only used on first call)
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
PrometheusExporter singleton instance
|
|
489
|
+
"""
|
|
490
|
+
global _exporter
|
|
491
|
+
|
|
492
|
+
if _exporter is None:
|
|
493
|
+
with _exporter_lock:
|
|
494
|
+
if _exporter is None:
|
|
495
|
+
_exporter = PrometheusExporter(config)
|
|
496
|
+
|
|
497
|
+
return _exporter
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def reset_exporter() -> None:
|
|
501
|
+
"""Reset the singleton exporter (mainly for testing)."""
|
|
502
|
+
global _exporter
|
|
503
|
+
with _exporter_lock:
|
|
504
|
+
_exporter = None
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
# =============================================================================
|
|
508
|
+
# Context Manager for Timing
|
|
509
|
+
# =============================================================================
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
class timed_operation:
|
|
513
|
+
"""Context manager for timing tool operations.
|
|
514
|
+
|
|
515
|
+
Usage:
|
|
516
|
+
with timed_operation("my_tool") as timer:
|
|
517
|
+
# do work
|
|
518
|
+
pass
|
|
519
|
+
# Automatically records duration
|
|
520
|
+
"""
|
|
521
|
+
|
|
522
|
+
def __init__(
|
|
523
|
+
self, tool_name: str, exporter: Optional[PrometheusExporter] = None
|
|
524
|
+
) -> None:
|
|
525
|
+
self.tool_name = tool_name
|
|
526
|
+
self.exporter = exporter or get_prometheus_exporter()
|
|
527
|
+
self.start_time: Optional[float] = None
|
|
528
|
+
self.success = True
|
|
529
|
+
|
|
530
|
+
def __enter__(self) -> "timed_operation":
|
|
531
|
+
self.start_time = time.perf_counter()
|
|
532
|
+
self.exporter.record_tool_start(self.tool_name)
|
|
533
|
+
return self
|
|
534
|
+
|
|
535
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
536
|
+
duration_ms = (time.perf_counter() - (self.start_time or 0)) * 1000
|
|
537
|
+
self.success = exc_type is None
|
|
538
|
+
|
|
539
|
+
self.exporter.record_tool_end(self.tool_name)
|
|
540
|
+
self.exporter.record_tool_invocation(
|
|
541
|
+
self.tool_name,
|
|
542
|
+
success=self.success,
|
|
543
|
+
duration_ms=duration_ms,
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
if exc_type is not None:
|
|
547
|
+
error_type = exc_type.__name__ if exc_type else "unknown"
|
|
548
|
+
self.exporter.record_error(self.tool_name, error_type)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
# =============================================================================
|
|
552
|
+
# Exports
|
|
553
|
+
# =============================================================================
|
|
554
|
+
|
|
555
|
+
__all__ = [
|
|
556
|
+
# Configuration
|
|
557
|
+
"PrometheusConfig",
|
|
558
|
+
# Exporter
|
|
559
|
+
"PrometheusExporter",
|
|
560
|
+
"get_prometheus_exporter",
|
|
561
|
+
"reset_exporter",
|
|
562
|
+
# Context manager
|
|
563
|
+
"timed_operation",
|
|
564
|
+
]
|