minder-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. minder/__init__.py +12 -0
  2. minder/api/routers/prompts.py +177 -0
  3. minder/application/__init__.py +1 -0
  4. minder/application/admin/__init__.py +11 -0
  5. minder/application/admin/dto.py +453 -0
  6. minder/application/admin/jobs.py +327 -0
  7. minder/application/admin/use_cases.py +1895 -0
  8. minder/auth/__init__.py +12 -0
  9. minder/auth/context.py +26 -0
  10. minder/auth/middleware.py +70 -0
  11. minder/auth/principal.py +59 -0
  12. minder/auth/rate_limiter.py +89 -0
  13. minder/auth/rbac.py +60 -0
  14. minder/auth/service.py +541 -0
  15. minder/bootstrap/__init__.py +9 -0
  16. minder/bootstrap/providers.py +109 -0
  17. minder/bootstrap/transport.py +807 -0
  18. minder/cache/__init__.py +10 -0
  19. minder/cache/providers.py +140 -0
  20. minder/chunking/__init__.py +4 -0
  21. minder/chunking/code_splitter.py +184 -0
  22. minder/chunking/splitter.py +136 -0
  23. minder/cli.py +1542 -0
  24. minder/config.py +179 -0
  25. minder/continuity.py +363 -0
  26. minder/dev.py +160 -0
  27. minder/embedding/__init__.py +9 -0
  28. minder/embedding/base.py +7 -0
  29. minder/embedding/local.py +65 -0
  30. minder/embedding/openai.py +7 -0
  31. minder/graph/__init__.py +11 -0
  32. minder/graph/edges.py +13 -0
  33. minder/graph/executor.py +127 -0
  34. minder/graph/graph.py +263 -0
  35. minder/graph/nodes/__init__.py +27 -0
  36. minder/graph/nodes/evaluator.py +21 -0
  37. minder/graph/nodes/guard.py +64 -0
  38. minder/graph/nodes/llm.py +59 -0
  39. minder/graph/nodes/planning.py +30 -0
  40. minder/graph/nodes/reasoning.py +87 -0
  41. minder/graph/nodes/reranker.py +141 -0
  42. minder/graph/nodes/retriever.py +86 -0
  43. minder/graph/nodes/verification.py +230 -0
  44. minder/graph/nodes/workflow_planner.py +250 -0
  45. minder/graph/runtime.py +15 -0
  46. minder/graph/state.py +26 -0
  47. minder/llm/__init__.py +5 -0
  48. minder/llm/base.py +14 -0
  49. minder/llm/local.py +381 -0
  50. minder/llm/openai.py +89 -0
  51. minder/models/__init__.py +109 -0
  52. minder/models/base.py +10 -0
  53. minder/models/client.py +137 -0
  54. minder/models/document.py +34 -0
  55. minder/models/error.py +32 -0
  56. minder/models/graph.py +114 -0
  57. minder/models/history.py +32 -0
  58. minder/models/job.py +62 -0
  59. minder/models/prompt.py +41 -0
  60. minder/models/repository.py +62 -0
  61. minder/models/rule.py +68 -0
  62. minder/models/session.py +51 -0
  63. minder/models/skill.py +52 -0
  64. minder/models/user.py +41 -0
  65. minder/models/workflow.py +35 -0
  66. minder/observability/__init__.py +57 -0
  67. minder/observability/audit.py +243 -0
  68. minder/observability/logging.py +253 -0
  69. minder/observability/metrics.py +448 -0
  70. minder/observability/tracing.py +215 -0
  71. minder/presentation/__init__.py +1 -0
  72. minder/presentation/http/__init__.py +1 -0
  73. minder/presentation/http/admin/__init__.py +3 -0
  74. minder/presentation/http/admin/api.py +1309 -0
  75. minder/presentation/http/admin/context.py +94 -0
  76. minder/presentation/http/admin/dashboard.py +111 -0
  77. minder/presentation/http/admin/jobs.py +208 -0
  78. minder/presentation/http/admin/memories.py +185 -0
  79. minder/presentation/http/admin/prompts.py +219 -0
  80. minder/presentation/http/admin/routes.py +127 -0
  81. minder/presentation/http/admin/runtime.py +650 -0
  82. minder/presentation/http/admin/search.py +368 -0
  83. minder/presentation/http/admin/skills.py +230 -0
  84. minder/prompts/__init__.py +646 -0
  85. minder/prompts/formatter.py +142 -0
  86. minder/resources/__init__.py +318 -0
  87. minder/retrieval/__init__.py +5 -0
  88. minder/retrieval/hybrid.py +178 -0
  89. minder/retrieval/mmr.py +116 -0
  90. minder/retrieval/multi_hop.py +115 -0
  91. minder/runtime.py +15 -0
  92. minder/server.py +145 -0
  93. minder/store/__init__.py +64 -0
  94. minder/store/document.py +115 -0
  95. minder/store/error.py +82 -0
  96. minder/store/feedback.py +114 -0
  97. minder/store/graph.py +588 -0
  98. minder/store/history.py +57 -0
  99. minder/store/interfaces.py +512 -0
  100. minder/store/milvus/__init__.py +11 -0
  101. minder/store/milvus/client.py +26 -0
  102. minder/store/milvus/collections.py +15 -0
  103. minder/store/milvus/vector_store.py +232 -0
  104. minder/store/mongodb/__init__.py +11 -0
  105. minder/store/mongodb/client.py +49 -0
  106. minder/store/mongodb/indexes.py +90 -0
  107. minder/store/mongodb/operational_store.py +993 -0
  108. minder/store/relational.py +1087 -0
  109. minder/store/repo_state.py +58 -0
  110. minder/store/rule.py +93 -0
  111. minder/store/vector.py +79 -0
  112. minder/tools/__init__.py +47 -0
  113. minder/tools/auth.py +94 -0
  114. minder/tools/graph.py +839 -0
  115. minder/tools/ingest.py +353 -0
  116. minder/tools/memory.py +381 -0
  117. minder/tools/query.py +307 -0
  118. minder/tools/registry.py +269 -0
  119. minder/tools/repo_scanner.py +1266 -0
  120. minder/tools/search.py +15 -0
  121. minder/tools/session.py +316 -0
  122. minder/tools/skills.py +899 -0
  123. minder/tools/workflow.py +215 -0
  124. minder/transport/__init__.py +4 -0
  125. minder/transport/base.py +286 -0
  126. minder/transport/sse.py +252 -0
  127. minder/transport/stdio.py +29 -0
  128. minder_cli-0.2.0.dist-info/METADATA +318 -0
  129. minder_cli-0.2.0.dist-info/RECORD +132 -0
  130. minder_cli-0.2.0.dist-info/WHEEL +4 -0
  131. minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
  132. minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,253 @@
1
+ """Structured JSON logging and request correlation-ID middleware for Minder."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import logging
6
+ import time
7
+ import uuid
8
+ from contextvars import ContextVar
9
+ from typing import TYPE_CHECKING, Any, MutableMapping
10
+
11
+ if TYPE_CHECKING:
12
+ from starlette.types import ASGIApp, Receive, Scope, Send
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Correlation ID context variable
16
+ # ---------------------------------------------------------------------------
17
+
18
+ _correlation_id: ContextVar[str] = ContextVar("correlation_id", default="")
19
+
20
+
21
+ def get_correlation_id() -> str:
22
+ """Return the correlation ID bound to the current async task."""
23
+ return _correlation_id.get("")
24
+
25
+
26
+ def set_correlation_id(cid: str) -> None:
27
+ """Bind a correlation ID to the current async task."""
28
+ _correlation_id.set(cid)
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # JSON log formatter
33
+ # ---------------------------------------------------------------------------
34
+
35
+ _RESERVED_ATTRS: frozenset[str] = frozenset(
36
+ {
37
+ "args",
38
+ "created",
39
+ "exc_info",
40
+ "exc_text",
41
+ "filename",
42
+ "funcName",
43
+ "levelname",
44
+ "levelno",
45
+ "lineno",
46
+ "message",
47
+ "module",
48
+ "msecs",
49
+ "msg",
50
+ "name",
51
+ "pathname",
52
+ "process",
53
+ "processName",
54
+ "relativeCreated",
55
+ "stack_info",
56
+ "thread",
57
+ "threadName",
58
+ }
59
+ )
60
+
61
+
62
+ class JsonFormatter(logging.Formatter):
63
+ """Formats log records as a single-line JSON object.
64
+
65
+ The emitted keys are always:
66
+ timestamp ISO-8601 UTC
67
+ level log level name
68
+ logger logger name
69
+ message formatted message
70
+ correlation_id current request ID (empty string if not set)
71
+
72
+ Any extra fields set via ``extra=`` on the log call are merged in.
73
+ """
74
+
75
+ def format(self, record: logging.LogRecord) -> str: # noqa: A003
76
+ record.message = record.getMessage()
77
+ payload: dict[str, object] = {
78
+ "timestamp": self.formatTime(record, "%Y-%m-%dT%H:%M:%S"),
79
+ "level": record.levelname,
80
+ "logger": record.name,
81
+ "message": record.message,
82
+ "correlation_id": get_correlation_id() or record.__dict__.get("correlation_id", ""),
83
+ }
84
+ # Merge caller-supplied extra keys
85
+ for key, value in record.__dict__.items():
86
+ if key not in _RESERVED_ATTRS and not key.startswith("_"):
87
+ payload.setdefault(key, value)
88
+
89
+ if record.exc_info:
90
+ payload["exc_info"] = self.formatException(record.exc_info)
91
+ return json.dumps(payload, default=str)
92
+
93
+
94
+ def configure_json_logging(level: str = "INFO") -> None:
95
+ """Replace the root logger's handlers with a JSON-emitting stream handler.
96
+
97
+ Call this once at server startup; subsequent ``logging.getLogger(…)``
98
+ calls will inherit the formatter automatically.
99
+ """
100
+ handler = logging.StreamHandler()
101
+ handler.setFormatter(JsonFormatter())
102
+ root = logging.getLogger()
103
+ root.handlers = [handler]
104
+ root.setLevel(getattr(logging, level.upper(), logging.INFO))
105
+
106
+
107
+ # ---------------------------------------------------------------------------
108
+ # Starlette ASGI correlation-ID middleware
109
+ # ---------------------------------------------------------------------------
110
+
111
+
112
+ class CorrelationIdMiddleware:
113
+ """ASGI middleware that assigns a unique correlation ID to every request.
114
+
115
+ The ID is taken from the incoming ``X-Correlation-ID`` header when
116
+ present, or generated fresh as a UUID4 hex string. It is:
117
+
118
+ * Stored in the ``correlation_id`` ContextVar (readable via
119
+ :func:`get_correlation_id` anywhere in the same async task).
120
+ * Added to the response as the ``X-Correlation-ID`` header.
121
+ """
122
+
123
+ def __init__(self, app: "ASGIApp") -> None:
124
+ self.app = app
125
+
126
+ async def __call__(
127
+ self, scope: "Scope", receive: "Receive", send: "Send"
128
+ ) -> None:
129
+ if scope["type"] not in ("http", "websocket"):
130
+ await self.app(scope, receive, send)
131
+ return
132
+
133
+ # Extract or generate a correlation ID
134
+ headers = dict(scope.get("headers", []))
135
+ raw_cid = headers.get(b"x-correlation-id", b"")
136
+ cid = raw_cid.decode("latin-1", errors="replace") if raw_cid else uuid.uuid4().hex
137
+ set_correlation_id(cid)
138
+
139
+ async def send_with_correlation(message: MutableMapping[str, Any]) -> None:
140
+ if message["type"] == "http.response.start":
141
+ # Append the correlation-ID header to the response
142
+ extra = [(b"x-correlation-id", cid.encode())]
143
+ message = {**message, "headers": list(message.get("headers", [])) + extra}
144
+ await send(message)
145
+
146
+ await self.app(scope, receive, send_with_correlation)
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # HTTP request/response access-log middleware
151
+ # ---------------------------------------------------------------------------
152
+
153
+
154
+ class AccessLogMiddleware:
155
+ """ASGI middleware that emits a structured access log entry per request
156
+ and records the request in the Prometheus metrics registry.
157
+ """
158
+
159
+ def __init__(self, app: "ASGIApp", logger_name: str = "minder.access") -> None:
160
+ self.app = app
161
+ self._log = logging.getLogger(logger_name)
162
+
163
+ async def __call__(
164
+ self, scope: "Scope", receive: "Receive", send: "Send"
165
+ ) -> None:
166
+ if scope["type"] != "http":
167
+ await self.app(scope, receive, send)
168
+ return
169
+
170
+ start = time.perf_counter()
171
+ status_code = [0]
172
+
173
+ async def capture_status(message: MutableMapping[str, Any]) -> None:
174
+ if message["type"] == "http.response.start":
175
+ status_code[0] = message.get("status", 0)
176
+ await send(message)
177
+
178
+ try:
179
+ await self.app(scope, receive, capture_status)
180
+ finally:
181
+ elapsed = time.perf_counter() - start
182
+ method = scope.get("method", "")
183
+ path = scope.get("path", "")
184
+ self._log.info(
185
+ "%s %s %s",
186
+ method,
187
+ path,
188
+ status_code[0],
189
+ extra={
190
+ "http_method": method,
191
+ "http_path": path,
192
+ "http_status": status_code[0],
193
+ "duration_ms": round(elapsed * 1000, 2),
194
+ },
195
+ )
196
+ # Record into the Prometheus registry (import deferred to avoid
197
+ # circular imports at module load time).
198
+ try:
199
+ from minder.observability.metrics import record_http_request # noqa: PLC0415
200
+ record_http_request(method, path, status_code[0], elapsed)
201
+ except Exception: # noqa: BLE001
202
+ pass
203
+
204
+
205
+ class GlobalExceptionMiddleware:
206
+ """Catch-all middleware that ensures 500s are returned as clean JSON."""
207
+
208
+ def __init__(self, app: "ASGIApp") -> None:
209
+ self.app = app
210
+
211
+ async def __call__(self, scope: "Scope", receive: "Receive", send: "Send") -> None:
212
+ if scope["type"] != "http":
213
+ await self.app(scope, receive, send)
214
+ return
215
+
216
+ response_started = [False]
217
+
218
+ async def send_wrapper(message: MutableMapping[str, Any]) -> None:
219
+ if message["type"] == "http.response.start":
220
+ response_started[0] = True
221
+ await send(message)
222
+
223
+ try:
224
+ await self.app(scope, receive, send_wrapper)
225
+ except Exception as exc:
226
+ import traceback
227
+
228
+ from starlette.responses import JSONResponse
229
+
230
+ logger = logging.getLogger("minder.errors")
231
+ logger.exception("Unhandled exception in ASGI application: %s", exc)
232
+
233
+ if response_started[0]:
234
+ # We can't send a clean JSON error response if we've already
235
+ # sent the 200/initial status code and headers.
236
+ # Just log and let the connection drop or raise.
237
+ return
238
+
239
+ response = JSONResponse(
240
+ {
241
+ "error": {
242
+ "code": "SYS_INTERNAL_ERROR",
243
+ "message": str(exc),
244
+ "details": (
245
+ traceback.format_exc()
246
+ if scope.get("debug") or True
247
+ else None
248
+ ),
249
+ }
250
+ },
251
+ status_code=500,
252
+ )
253
+ await response(scope, receive, send)
@@ -0,0 +1,448 @@
1
+ """Prometheus metrics registry for Minder.
2
+
3
+ Registers all application-level counters, histograms, and gauges and
4
+ exposes a WSGI/ASGI-compatible handler that can be mounted at `/metrics`.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING, Any, cast
10
+
11
+ from prometheus_client import (
12
+ CONTENT_TYPE_LATEST,
13
+ CollectorRegistry,
14
+ Counter,
15
+ Gauge,
16
+ Histogram,
17
+ generate_latest,
18
+ )
19
+
20
+ if TYPE_CHECKING:
21
+ from starlette.requests import Request
22
+ from starlette.responses import Response
23
+
24
+ from minder.store.interfaces import IOperationalStore
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Shared registry
28
+ # ---------------------------------------------------------------------------
29
+
30
+ REGISTRY = CollectorRegistry(auto_describe=True)
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Tool-call metrics
34
+ # ---------------------------------------------------------------------------
35
+
36
+ TOOL_CALLS_TOTAL = Counter(
37
+ "minder_tool_calls_total",
38
+ "Total number of MCP tool invocations.",
39
+ [
40
+ "tool_name",
41
+ "outcome",
42
+ ], # client_id is high-cardinality → stored in audit DB, not here
43
+ registry=REGISTRY,
44
+ )
45
+
46
+ TOOL_CALL_DURATION = Histogram(
47
+ "minder_tool_call_duration_seconds",
48
+ "MCP tool call latency in seconds.",
49
+ ["tool_name"],
50
+ buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0),
51
+ registry=REGISTRY,
52
+ )
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Auth / session metrics
56
+ # ---------------------------------------------------------------------------
57
+
58
+ AUTH_EVENTS_TOTAL = Counter(
59
+ "minder_auth_events_total",
60
+ "Total number of authentication and authorisation events.",
61
+ ["event_type", "outcome"], # client_id is high-cardinality → stored in audit DB
62
+ registry=REGISTRY,
63
+ )
64
+
65
+ ACTIVE_CLIENT_SESSIONS = Gauge(
66
+ "minder_active_client_sessions",
67
+ "Number of active MCP client sessions tracked in the cache.",
68
+ registry=REGISTRY,
69
+ )
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # HTTP metrics
73
+ # ---------------------------------------------------------------------------
74
+
75
+ HTTP_REQUESTS_TOTAL = Counter(
76
+ "minder_http_requests_total",
77
+ "Total HTTP requests handled.",
78
+ ["method", "path_template", "status"],
79
+ registry=REGISTRY,
80
+ )
81
+
82
+ HTTP_REQUEST_DURATION = Histogram(
83
+ "minder_http_request_duration_seconds",
84
+ "HTTP request latency in seconds.",
85
+ ["method", "path_template"],
86
+ buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5),
87
+ registry=REGISTRY,
88
+ )
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # Admin-operation metrics
92
+ # ---------------------------------------------------------------------------
93
+
94
+ ADMIN_OPERATIONS_TOTAL = Counter(
95
+ "minder_admin_operations_total",
96
+ "Total admin API operations.",
97
+ ["operation", "outcome"],
98
+ registry=REGISTRY,
99
+ )
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Continuity quality metrics
103
+ # ---------------------------------------------------------------------------
104
+
105
+ CONTINUITY_PACKETS_TOTAL = Counter(
106
+ "minder_continuity_packets_total",
107
+ "Total continuity packets emitted by continuity-aware surfaces.",
108
+ ["source"],
109
+ registry=REGISTRY,
110
+ )
111
+
112
+ CONTINUITY_RECALLS_TOTAL = Counter(
113
+ "minder_continuity_recalls_total",
114
+ "Total continuity recall operations grouped by synthesis provider.",
115
+ ["provider"],
116
+ registry=REGISTRY,
117
+ )
118
+
119
+ CONTINUITY_STEP_COMPATIBILITY = Histogram(
120
+ "minder_continuity_step_compatibility",
121
+ "Observed workflow-step compatibility scores for continuity-aware retrieval.",
122
+ buckets=(0.0, 0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5),
123
+ registry=REGISTRY,
124
+ )
125
+
126
+ CONTINUITY_SKILL_QUALITY = Histogram(
127
+ "minder_continuity_skill_quality",
128
+ "Observed quality scores for workflow-aware skill retrieval.",
129
+ buckets=(0.0, 0.1, 0.25, 0.5, 0.75, 1.0),
130
+ registry=REGISTRY,
131
+ )
132
+
133
+ CONTINUITY_QUERY_PROMPTS_TOTAL = Counter(
134
+ "minder_continuity_query_prompts_total",
135
+ "Total query prompt renders grouped by prompt source.",
136
+ ["source"],
137
+ registry=REGISTRY,
138
+ )
139
+
140
+ CONTINUITY_CORRECTION_RETRIES_TOTAL = Counter(
141
+ "minder_continuity_correction_retries_total",
142
+ "Total corrective retries triggered by continuity/workflow contract failures.",
143
+ ["failure_kind"],
144
+ registry=REGISTRY,
145
+ )
146
+
147
+ CONTINUITY_GATES_TOTAL = Counter(
148
+ "minder_continuity_gates_total",
149
+ "Total continuity gate evaluations grouped by outcome.",
150
+ ["outcome"],
151
+ registry=REGISTRY,
152
+ )
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # Helpers
156
+ # ---------------------------------------------------------------------------
157
+
158
+
159
+ def record_tool_call(
160
+ tool_name: str,
161
+ outcome: str,
162
+ duration_seconds: float,
163
+ client_id: str = "unknown", # kept for API compat; stored in audit DB, not Prometheus label
164
+ ) -> None:
165
+ """Record a tool invocation outcome and latency."""
166
+ TOOL_CALLS_TOTAL.labels(tool_name=tool_name, outcome=outcome).inc()
167
+ TOOL_CALL_DURATION.labels(tool_name=tool_name).observe(duration_seconds)
168
+
169
+
170
+ async def record_auth_event(
171
+ event_type: str,
172
+ outcome: str,
173
+ client_id: str = "unknown",
174
+ store: "IOperationalStore | None" = None,
175
+ ) -> None:
176
+ """Record an auth/session lifecycle event.
177
+
178
+ Increments the Prometheus counter (synchronous) then writes an audit log
179
+ entry to the store (async, best-effort — failures are swallowed).
180
+ """
181
+ AUTH_EVENTS_TOTAL.labels(event_type=event_type, outcome=outcome).inc()
182
+
183
+ if store is not None:
184
+ try:
185
+ await store.create_audit_log(
186
+ actor_type="auth",
187
+ actor_id=client_id,
188
+ event_type=event_type,
189
+ resource_type="session",
190
+ resource_id=client_id,
191
+ outcome=outcome,
192
+ audit_metadata={"client_id": client_id},
193
+ )
194
+ except Exception: # noqa: BLE001
195
+ pass
196
+
197
+
198
+ def record_http_request(
199
+ method: str,
200
+ path_template: str,
201
+ status: int,
202
+ duration_seconds: float,
203
+ ) -> None:
204
+ """Record a completed HTTP request."""
205
+ HTTP_REQUESTS_TOTAL.labels(
206
+ method=method, path_template=path_template, status=str(status)
207
+ ).inc()
208
+ HTTP_REQUEST_DURATION.labels(method=method, path_template=path_template).observe(
209
+ duration_seconds
210
+ )
211
+
212
+
213
+ def record_continuity_packet(source: str) -> None:
214
+ CONTINUITY_PACKETS_TOTAL.labels(source=source or "unknown").inc()
215
+
216
+
217
+ def record_continuity_recall(*, provider: str, step_compatibility: float) -> None:
218
+ CONTINUITY_RECALLS_TOTAL.labels(provider=provider or "unknown").inc()
219
+ CONTINUITY_STEP_COMPATIBILITY.observe(step_compatibility)
220
+
221
+
222
+ def record_continuity_skill_recall(
223
+ *, step_compatibility: float, quality_score: float
224
+ ) -> None:
225
+ CONTINUITY_STEP_COMPATIBILITY.observe(step_compatibility)
226
+ CONTINUITY_SKILL_QUALITY.observe(max(quality_score, 0.0))
227
+
228
+
229
+ def record_query_prompt_render(source: str, *, correction_retries: int = 0) -> None:
230
+ CONTINUITY_QUERY_PROMPTS_TOTAL.labels(source=source or "unknown").inc()
231
+ if correction_retries > 0:
232
+ CONTINUITY_CORRECTION_RETRIES_TOTAL.labels(
233
+ failure_kind="workflow_contract"
234
+ ).inc(correction_retries)
235
+
236
+
237
+ def record_continuity_gate(outcome: str) -> None:
238
+ CONTINUITY_GATES_TOTAL.labels(outcome=outcome or "unknown").inc()
239
+
240
+
241
+ async def record_admin_operation(
242
+ operation: str,
243
+ outcome: str,
244
+ actor_id: str = "unknown",
245
+ store: IOperationalStore | None = None,
246
+ ) -> None:
247
+ """Record an admin API operation (outcome: 'success' | 'error')."""
248
+ ADMIN_OPERATIONS_TOTAL.labels(operation=operation, outcome=outcome).inc()
249
+
250
+ if store is not None:
251
+ try:
252
+ await store.create_audit_log(
253
+ actor_type="admin",
254
+ actor_id=actor_id,
255
+ event_type="admin_op",
256
+ resource_type="admin_api",
257
+ resource_id=operation,
258
+ outcome=outcome,
259
+ audit_metadata={"operation": operation},
260
+ )
261
+ except Exception:
262
+ pass
263
+
264
+
265
+ # ---------------------------------------------------------------------------
266
+ # Starlette endpoint
267
+ # ---------------------------------------------------------------------------
268
+
269
+
270
+ async def metrics_endpoint(request: "Request") -> "Response": # noqa: ARG001
271
+ """ASGI route handler that returns Prometheus text format metrics."""
272
+ from starlette.responses import Response as StarletteResponse
273
+
274
+ output = generate_latest(REGISTRY)
275
+ return StarletteResponse(
276
+ content=output,
277
+ media_type=CONTENT_TYPE_LATEST,
278
+ )
279
+
280
+
281
+ def get_registry_snapshot() -> dict[str, Any]:
282
+ """Return a lightweight dict snapshot of registered metric names (for tests)."""
283
+ return {
284
+ metric.describe()[0].name: metric.describe()[0].type # type: ignore[union-attr]
285
+ for metric in REGISTRY._names_to_collectors.values() # noqa: SLF001
286
+ if hasattr(metric, "describe")
287
+ }
288
+
289
+
290
+ def _counter_total(
291
+ counter: Counter, filter_label: str | None = None, filter_value: str | None = None
292
+ ) -> float:
293
+ """Sum all label-value combinations of a Counter, optionally filtering."""
294
+ total = 0.0
295
+ label_names = counter._labelnames # noqa: SLF001
296
+ filter_idx = (
297
+ label_names.index(filter_label) if filter_label in label_names else None
298
+ )
299
+
300
+ for label_tuple, child in counter._metrics.items(): # noqa: SLF001
301
+ if filter_idx is not None and filter_value:
302
+ if label_tuple[filter_idx] != filter_value:
303
+ continue
304
+ total += cast(Any, child)._value.get() # noqa: SLF001
305
+ return total
306
+
307
+
308
+ def _counter_by_label(
309
+ counter: Counter,
310
+ label_name: str,
311
+ filter_label: str | None = None,
312
+ filter_value: str | None = None,
313
+ ) -> dict[str, float]:
314
+ """Aggregate a Counter by a single label, optionally filtering."""
315
+ label_names: tuple[str, ...] = counter._labelnames # noqa: SLF001
316
+ if label_name not in label_names:
317
+ return {}
318
+
319
+ idx = label_names.index(label_name)
320
+ filter_idx = (
321
+ label_names.index(filter_label) if filter_label in label_names else None
322
+ )
323
+
324
+ result: dict[str, float] = {}
325
+ for label_tuple, child in counter._metrics.items(): # noqa: SLF001
326
+ if filter_idx is not None and filter_value:
327
+ if label_tuple[filter_idx] != filter_value:
328
+ continue
329
+ key = label_tuple[idx]
330
+ result[key] = (
331
+ result.get(key, 0.0) + cast(Any, child)._value.get()
332
+ ) # noqa: SLF001
333
+ return result
334
+
335
+
336
+ def _histogram_average(histogram: Histogram) -> float:
337
+ total = 0.0
338
+ count = 0.0
339
+ for metric in histogram.collect():
340
+ for sample in metric.samples:
341
+ if sample.name.endswith("_sum"):
342
+ total = float(sample.value)
343
+ elif sample.name.endswith("_count"):
344
+ count = float(sample.value)
345
+ if count <= 0:
346
+ return 0.0
347
+ return round(total / count, 4)
348
+
349
+
350
+ async def get_metrics_summary(
351
+ store: IOperationalStore,
352
+ active_sessions: int | None = None,
353
+ client_id: str | None = None,
354
+ event_type: str | None = None,
355
+ outcome: str | None = None,
356
+ ) -> dict[str, Any]:
357
+ """Return a combined summary of persistent audit logs and runtime metrics.
358
+
359
+ Prioritises the operational store for persistent events (tool calls, auth, admin ops)
360
+ while falling back to Prometheus for ephemeral runtime stats (active sessions, HTTP).
361
+ """
362
+ import logging
363
+
364
+ logger = logging.getLogger("minder.metrics")
365
+
366
+ # Metrics from Store (Persistent)
367
+ # 1. Tool Calls
368
+ tool_by_outcome = await store.get_audit_summary(
369
+ actor_id=client_id, event_type="tool_call", outcome=outcome, group_by="outcome"
370
+ )
371
+ tool_by_client = await store.get_audit_summary(
372
+ event_type="tool_call",
373
+ outcome=outcome,
374
+ group_by="audit_metadata.client_id", # This depends on Mongo/SQL support for nested fields
375
+ )
376
+ tool_by_name = await store.get_audit_summary(
377
+ actor_id=client_id,
378
+ event_type="tool_call",
379
+ outcome=outcome,
380
+ group_by="tool_name",
381
+ )
382
+ tool_total = sum(tool_by_outcome.values())
383
+
384
+ # 2. Auth Events (we combine tool_calls and auth_events for a "unified" view if needed)
385
+ auth_by_type = await store.get_audit_summary(
386
+ actor_id=client_id, outcome=outcome, group_by="event_type"
387
+ )
388
+ auth_total = sum(auth_by_type.values())
389
+
390
+ # 3. Admin Ops
391
+ admin_by_outcome = await store.get_audit_summary(
392
+ event_type="admin_op", outcome=outcome, group_by="outcome"
393
+ )
394
+ admin_total = sum(admin_by_outcome.values())
395
+
396
+ # Runtime stats from Prometheus (Fallback/Ephemeral)
397
+ effective_sessions = (
398
+ active_sessions
399
+ if active_sessions is not None
400
+ else ACTIVE_CLIENT_SESSIONS._value.get()
401
+ )
402
+
403
+ logger.info(
404
+ "Serving persistent metrics summary: sessions=%s, tool_calls=%s",
405
+ effective_sessions,
406
+ tool_total,
407
+ )
408
+
409
+ return {
410
+ "active_client_sessions": effective_sessions,
411
+ "tool_calls": {
412
+ "total": tool_total,
413
+ "by_outcome": tool_by_outcome,
414
+ "by_client": tool_by_client,
415
+ "by_name": tool_by_name,
416
+ },
417
+ "auth_events": {
418
+ "total": auth_total,
419
+ "by_type": auth_by_type,
420
+ },
421
+ "http_requests": {
422
+ "total": _counter_total(HTTP_REQUESTS_TOTAL),
423
+ "by_status": _counter_by_label(HTTP_REQUESTS_TOTAL, "status"),
424
+ },
425
+ "admin_operations": {
426
+ "total": admin_total,
427
+ "by_outcome": admin_by_outcome,
428
+ },
429
+ "continuity_quality": {
430
+ "packets_emitted_total": _counter_total(CONTINUITY_PACKETS_TOTAL),
431
+ "packets_by_source": _counter_by_label(CONTINUITY_PACKETS_TOTAL, "source"),
432
+ "recalls_total": _counter_total(CONTINUITY_RECALLS_TOTAL),
433
+ "recalls_by_provider": _counter_by_label(
434
+ CONTINUITY_RECALLS_TOTAL, "provider"
435
+ ),
436
+ "average_step_compatibility": _histogram_average(
437
+ CONTINUITY_STEP_COMPATIBILITY
438
+ ),
439
+ "average_skill_quality": _histogram_average(CONTINUITY_SKILL_QUALITY),
440
+ "query_prompts_by_source": _counter_by_label(
441
+ CONTINUITY_QUERY_PROMPTS_TOTAL, "source"
442
+ ),
443
+ "correction_retries_total": _counter_total(
444
+ CONTINUITY_CORRECTION_RETRIES_TOTAL
445
+ ),
446
+ "gates_by_outcome": _counter_by_label(CONTINUITY_GATES_TOTAL, "outcome"),
447
+ },
448
+ }