minder-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minder/__init__.py +12 -0
- minder/api/routers/prompts.py +177 -0
- minder/application/__init__.py +1 -0
- minder/application/admin/__init__.py +11 -0
- minder/application/admin/dto.py +453 -0
- minder/application/admin/jobs.py +327 -0
- minder/application/admin/use_cases.py +1895 -0
- minder/auth/__init__.py +12 -0
- minder/auth/context.py +26 -0
- minder/auth/middleware.py +70 -0
- minder/auth/principal.py +59 -0
- minder/auth/rate_limiter.py +89 -0
- minder/auth/rbac.py +60 -0
- minder/auth/service.py +541 -0
- minder/bootstrap/__init__.py +9 -0
- minder/bootstrap/providers.py +109 -0
- minder/bootstrap/transport.py +807 -0
- minder/cache/__init__.py +10 -0
- minder/cache/providers.py +140 -0
- minder/chunking/__init__.py +4 -0
- minder/chunking/code_splitter.py +184 -0
- minder/chunking/splitter.py +136 -0
- minder/cli.py +1542 -0
- minder/config.py +179 -0
- minder/continuity.py +363 -0
- minder/dev.py +160 -0
- minder/embedding/__init__.py +9 -0
- minder/embedding/base.py +7 -0
- minder/embedding/local.py +65 -0
- minder/embedding/openai.py +7 -0
- minder/graph/__init__.py +11 -0
- minder/graph/edges.py +13 -0
- minder/graph/executor.py +127 -0
- minder/graph/graph.py +263 -0
- minder/graph/nodes/__init__.py +27 -0
- minder/graph/nodes/evaluator.py +21 -0
- minder/graph/nodes/guard.py +64 -0
- minder/graph/nodes/llm.py +59 -0
- minder/graph/nodes/planning.py +30 -0
- minder/graph/nodes/reasoning.py +87 -0
- minder/graph/nodes/reranker.py +141 -0
- minder/graph/nodes/retriever.py +86 -0
- minder/graph/nodes/verification.py +230 -0
- minder/graph/nodes/workflow_planner.py +250 -0
- minder/graph/runtime.py +15 -0
- minder/graph/state.py +26 -0
- minder/llm/__init__.py +5 -0
- minder/llm/base.py +14 -0
- minder/llm/local.py +381 -0
- minder/llm/openai.py +89 -0
- minder/models/__init__.py +109 -0
- minder/models/base.py +10 -0
- minder/models/client.py +137 -0
- minder/models/document.py +34 -0
- minder/models/error.py +32 -0
- minder/models/graph.py +114 -0
- minder/models/history.py +32 -0
- minder/models/job.py +62 -0
- minder/models/prompt.py +41 -0
- minder/models/repository.py +62 -0
- minder/models/rule.py +68 -0
- minder/models/session.py +51 -0
- minder/models/skill.py +52 -0
- minder/models/user.py +41 -0
- minder/models/workflow.py +35 -0
- minder/observability/__init__.py +57 -0
- minder/observability/audit.py +243 -0
- minder/observability/logging.py +253 -0
- minder/observability/metrics.py +448 -0
- minder/observability/tracing.py +215 -0
- minder/presentation/__init__.py +1 -0
- minder/presentation/http/__init__.py +1 -0
- minder/presentation/http/admin/__init__.py +3 -0
- minder/presentation/http/admin/api.py +1309 -0
- minder/presentation/http/admin/context.py +94 -0
- minder/presentation/http/admin/dashboard.py +111 -0
- minder/presentation/http/admin/jobs.py +208 -0
- minder/presentation/http/admin/memories.py +185 -0
- minder/presentation/http/admin/prompts.py +219 -0
- minder/presentation/http/admin/routes.py +127 -0
- minder/presentation/http/admin/runtime.py +650 -0
- minder/presentation/http/admin/search.py +368 -0
- minder/presentation/http/admin/skills.py +230 -0
- minder/prompts/__init__.py +646 -0
- minder/prompts/formatter.py +142 -0
- minder/resources/__init__.py +318 -0
- minder/retrieval/__init__.py +5 -0
- minder/retrieval/hybrid.py +178 -0
- minder/retrieval/mmr.py +116 -0
- minder/retrieval/multi_hop.py +115 -0
- minder/runtime.py +15 -0
- minder/server.py +145 -0
- minder/store/__init__.py +64 -0
- minder/store/document.py +115 -0
- minder/store/error.py +82 -0
- minder/store/feedback.py +114 -0
- minder/store/graph.py +588 -0
- minder/store/history.py +57 -0
- minder/store/interfaces.py +512 -0
- minder/store/milvus/__init__.py +11 -0
- minder/store/milvus/client.py +26 -0
- minder/store/milvus/collections.py +15 -0
- minder/store/milvus/vector_store.py +232 -0
- minder/store/mongodb/__init__.py +11 -0
- minder/store/mongodb/client.py +49 -0
- minder/store/mongodb/indexes.py +90 -0
- minder/store/mongodb/operational_store.py +993 -0
- minder/store/relational.py +1087 -0
- minder/store/repo_state.py +58 -0
- minder/store/rule.py +93 -0
- minder/store/vector.py +79 -0
- minder/tools/__init__.py +47 -0
- minder/tools/auth.py +94 -0
- minder/tools/graph.py +839 -0
- minder/tools/ingest.py +353 -0
- minder/tools/memory.py +381 -0
- minder/tools/query.py +307 -0
- minder/tools/registry.py +269 -0
- minder/tools/repo_scanner.py +1266 -0
- minder/tools/search.py +15 -0
- minder/tools/session.py +316 -0
- minder/tools/skills.py +899 -0
- minder/tools/workflow.py +215 -0
- minder/transport/__init__.py +4 -0
- minder/transport/base.py +286 -0
- minder/transport/sse.py +252 -0
- minder/transport/stdio.py +29 -0
- minder_cli-0.2.0.dist-info/METADATA +318 -0
- minder_cli-0.2.0.dist-info/RECORD +132 -0
- minder_cli-0.2.0.dist-info/WHEEL +4 -0
- minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
- minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
"""Structured JSON logging and request correlation-ID middleware for Minder."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from contextvars import ContextVar
|
|
9
|
+
from typing import TYPE_CHECKING, Any, MutableMapping
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from starlette.types import ASGIApp, Receive, Scope, Send
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Correlation ID context variable
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
_correlation_id: ContextVar[str] = ContextVar("correlation_id", default="")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_correlation_id() -> str:
|
|
22
|
+
"""Return the correlation ID bound to the current async task."""
|
|
23
|
+
return _correlation_id.get("")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def set_correlation_id(cid: str) -> None:
|
|
27
|
+
"""Bind a correlation ID to the current async task."""
|
|
28
|
+
_correlation_id.set(cid)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# JSON log formatter
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
_RESERVED_ATTRS: frozenset[str] = frozenset(
|
|
36
|
+
{
|
|
37
|
+
"args",
|
|
38
|
+
"created",
|
|
39
|
+
"exc_info",
|
|
40
|
+
"exc_text",
|
|
41
|
+
"filename",
|
|
42
|
+
"funcName",
|
|
43
|
+
"levelname",
|
|
44
|
+
"levelno",
|
|
45
|
+
"lineno",
|
|
46
|
+
"message",
|
|
47
|
+
"module",
|
|
48
|
+
"msecs",
|
|
49
|
+
"msg",
|
|
50
|
+
"name",
|
|
51
|
+
"pathname",
|
|
52
|
+
"process",
|
|
53
|
+
"processName",
|
|
54
|
+
"relativeCreated",
|
|
55
|
+
"stack_info",
|
|
56
|
+
"thread",
|
|
57
|
+
"threadName",
|
|
58
|
+
}
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class JsonFormatter(logging.Formatter):
|
|
63
|
+
"""Formats log records as a single-line JSON object.
|
|
64
|
+
|
|
65
|
+
The emitted keys are always:
|
|
66
|
+
timestamp ISO-8601 UTC
|
|
67
|
+
level log level name
|
|
68
|
+
logger logger name
|
|
69
|
+
message formatted message
|
|
70
|
+
correlation_id current request ID (empty string if not set)
|
|
71
|
+
|
|
72
|
+
Any extra fields set via ``extra=`` on the log call are merged in.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def format(self, record: logging.LogRecord) -> str: # noqa: A003
|
|
76
|
+
record.message = record.getMessage()
|
|
77
|
+
payload: dict[str, object] = {
|
|
78
|
+
"timestamp": self.formatTime(record, "%Y-%m-%dT%H:%M:%S"),
|
|
79
|
+
"level": record.levelname,
|
|
80
|
+
"logger": record.name,
|
|
81
|
+
"message": record.message,
|
|
82
|
+
"correlation_id": get_correlation_id() or record.__dict__.get("correlation_id", ""),
|
|
83
|
+
}
|
|
84
|
+
# Merge caller-supplied extra keys
|
|
85
|
+
for key, value in record.__dict__.items():
|
|
86
|
+
if key not in _RESERVED_ATTRS and not key.startswith("_"):
|
|
87
|
+
payload.setdefault(key, value)
|
|
88
|
+
|
|
89
|
+
if record.exc_info:
|
|
90
|
+
payload["exc_info"] = self.formatException(record.exc_info)
|
|
91
|
+
return json.dumps(payload, default=str)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def configure_json_logging(level: str = "INFO") -> None:
|
|
95
|
+
"""Replace the root logger's handlers with a JSON-emitting stream handler.
|
|
96
|
+
|
|
97
|
+
Call this once at server startup; subsequent ``logging.getLogger(…)``
|
|
98
|
+
calls will inherit the formatter automatically.
|
|
99
|
+
"""
|
|
100
|
+
handler = logging.StreamHandler()
|
|
101
|
+
handler.setFormatter(JsonFormatter())
|
|
102
|
+
root = logging.getLogger()
|
|
103
|
+
root.handlers = [handler]
|
|
104
|
+
root.setLevel(getattr(logging, level.upper(), logging.INFO))
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
# Starlette ASGI correlation-ID middleware
|
|
109
|
+
# ---------------------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class CorrelationIdMiddleware:
|
|
113
|
+
"""ASGI middleware that assigns a unique correlation ID to every request.
|
|
114
|
+
|
|
115
|
+
The ID is taken from the incoming ``X-Correlation-ID`` header when
|
|
116
|
+
present, or generated fresh as a UUID4 hex string. It is:
|
|
117
|
+
|
|
118
|
+
* Stored in the ``correlation_id`` ContextVar (readable via
|
|
119
|
+
:func:`get_correlation_id` anywhere in the same async task).
|
|
120
|
+
* Added to the response as the ``X-Correlation-ID`` header.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def __init__(self, app: "ASGIApp") -> None:
|
|
124
|
+
self.app = app
|
|
125
|
+
|
|
126
|
+
async def __call__(
|
|
127
|
+
self, scope: "Scope", receive: "Receive", send: "Send"
|
|
128
|
+
) -> None:
|
|
129
|
+
if scope["type"] not in ("http", "websocket"):
|
|
130
|
+
await self.app(scope, receive, send)
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
# Extract or generate a correlation ID
|
|
134
|
+
headers = dict(scope.get("headers", []))
|
|
135
|
+
raw_cid = headers.get(b"x-correlation-id", b"")
|
|
136
|
+
cid = raw_cid.decode("latin-1", errors="replace") if raw_cid else uuid.uuid4().hex
|
|
137
|
+
set_correlation_id(cid)
|
|
138
|
+
|
|
139
|
+
async def send_with_correlation(message: MutableMapping[str, Any]) -> None:
|
|
140
|
+
if message["type"] == "http.response.start":
|
|
141
|
+
# Append the correlation-ID header to the response
|
|
142
|
+
extra = [(b"x-correlation-id", cid.encode())]
|
|
143
|
+
message = {**message, "headers": list(message.get("headers", [])) + extra}
|
|
144
|
+
await send(message)
|
|
145
|
+
|
|
146
|
+
await self.app(scope, receive, send_with_correlation)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# HTTP request/response access-log middleware
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class AccessLogMiddleware:
|
|
155
|
+
"""ASGI middleware that emits a structured access log entry per request
|
|
156
|
+
and records the request in the Prometheus metrics registry.
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
def __init__(self, app: "ASGIApp", logger_name: str = "minder.access") -> None:
|
|
160
|
+
self.app = app
|
|
161
|
+
self._log = logging.getLogger(logger_name)
|
|
162
|
+
|
|
163
|
+
async def __call__(
|
|
164
|
+
self, scope: "Scope", receive: "Receive", send: "Send"
|
|
165
|
+
) -> None:
|
|
166
|
+
if scope["type"] != "http":
|
|
167
|
+
await self.app(scope, receive, send)
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
start = time.perf_counter()
|
|
171
|
+
status_code = [0]
|
|
172
|
+
|
|
173
|
+
async def capture_status(message: MutableMapping[str, Any]) -> None:
|
|
174
|
+
if message["type"] == "http.response.start":
|
|
175
|
+
status_code[0] = message.get("status", 0)
|
|
176
|
+
await send(message)
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
await self.app(scope, receive, capture_status)
|
|
180
|
+
finally:
|
|
181
|
+
elapsed = time.perf_counter() - start
|
|
182
|
+
method = scope.get("method", "")
|
|
183
|
+
path = scope.get("path", "")
|
|
184
|
+
self._log.info(
|
|
185
|
+
"%s %s %s",
|
|
186
|
+
method,
|
|
187
|
+
path,
|
|
188
|
+
status_code[0],
|
|
189
|
+
extra={
|
|
190
|
+
"http_method": method,
|
|
191
|
+
"http_path": path,
|
|
192
|
+
"http_status": status_code[0],
|
|
193
|
+
"duration_ms": round(elapsed * 1000, 2),
|
|
194
|
+
},
|
|
195
|
+
)
|
|
196
|
+
# Record into the Prometheus registry (import deferred to avoid
|
|
197
|
+
# circular imports at module load time).
|
|
198
|
+
try:
|
|
199
|
+
from minder.observability.metrics import record_http_request # noqa: PLC0415
|
|
200
|
+
record_http_request(method, path, status_code[0], elapsed)
|
|
201
|
+
except Exception: # noqa: BLE001
|
|
202
|
+
pass
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class GlobalExceptionMiddleware:
|
|
206
|
+
"""Catch-all middleware that ensures 500s are returned as clean JSON."""
|
|
207
|
+
|
|
208
|
+
def __init__(self, app: "ASGIApp") -> None:
|
|
209
|
+
self.app = app
|
|
210
|
+
|
|
211
|
+
async def __call__(self, scope: "Scope", receive: "Receive", send: "Send") -> None:
|
|
212
|
+
if scope["type"] != "http":
|
|
213
|
+
await self.app(scope, receive, send)
|
|
214
|
+
return
|
|
215
|
+
|
|
216
|
+
response_started = [False]
|
|
217
|
+
|
|
218
|
+
async def send_wrapper(message: MutableMapping[str, Any]) -> None:
|
|
219
|
+
if message["type"] == "http.response.start":
|
|
220
|
+
response_started[0] = True
|
|
221
|
+
await send(message)
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
await self.app(scope, receive, send_wrapper)
|
|
225
|
+
except Exception as exc:
|
|
226
|
+
import traceback
|
|
227
|
+
|
|
228
|
+
from starlette.responses import JSONResponse
|
|
229
|
+
|
|
230
|
+
logger = logging.getLogger("minder.errors")
|
|
231
|
+
logger.exception("Unhandled exception in ASGI application: %s", exc)
|
|
232
|
+
|
|
233
|
+
if response_started[0]:
|
|
234
|
+
# We can't send a clean JSON error response if we've already
|
|
235
|
+
# sent the 200/initial status code and headers.
|
|
236
|
+
# Just log and let the connection drop or raise.
|
|
237
|
+
return
|
|
238
|
+
|
|
239
|
+
response = JSONResponse(
|
|
240
|
+
{
|
|
241
|
+
"error": {
|
|
242
|
+
"code": "SYS_INTERNAL_ERROR",
|
|
243
|
+
"message": str(exc),
|
|
244
|
+
"details": (
|
|
245
|
+
traceback.format_exc()
|
|
246
|
+
if scope.get("debug") or True
|
|
247
|
+
else None
|
|
248
|
+
),
|
|
249
|
+
}
|
|
250
|
+
},
|
|
251
|
+
status_code=500,
|
|
252
|
+
)
|
|
253
|
+
await response(scope, receive, send)
|
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
"""Prometheus metrics registry for Minder.
|
|
2
|
+
|
|
3
|
+
Registers all application-level counters, histograms, and gauges and
|
|
4
|
+
exposes a WSGI/ASGI-compatible handler that can be mounted at `/metrics`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
10
|
+
|
|
11
|
+
from prometheus_client import (
|
|
12
|
+
CONTENT_TYPE_LATEST,
|
|
13
|
+
CollectorRegistry,
|
|
14
|
+
Counter,
|
|
15
|
+
Gauge,
|
|
16
|
+
Histogram,
|
|
17
|
+
generate_latest,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from starlette.requests import Request
|
|
22
|
+
from starlette.responses import Response
|
|
23
|
+
|
|
24
|
+
from minder.store.interfaces import IOperationalStore
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Shared registry
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
REGISTRY = CollectorRegistry(auto_describe=True)
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Tool-call metrics
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
TOOL_CALLS_TOTAL = Counter(
|
|
37
|
+
"minder_tool_calls_total",
|
|
38
|
+
"Total number of MCP tool invocations.",
|
|
39
|
+
[
|
|
40
|
+
"tool_name",
|
|
41
|
+
"outcome",
|
|
42
|
+
], # client_id is high-cardinality → stored in audit DB, not here
|
|
43
|
+
registry=REGISTRY,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
TOOL_CALL_DURATION = Histogram(
|
|
47
|
+
"minder_tool_call_duration_seconds",
|
|
48
|
+
"MCP tool call latency in seconds.",
|
|
49
|
+
["tool_name"],
|
|
50
|
+
buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0),
|
|
51
|
+
registry=REGISTRY,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
# Auth / session metrics
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
AUTH_EVENTS_TOTAL = Counter(
|
|
59
|
+
"minder_auth_events_total",
|
|
60
|
+
"Total number of authentication and authorisation events.",
|
|
61
|
+
["event_type", "outcome"], # client_id is high-cardinality → stored in audit DB
|
|
62
|
+
registry=REGISTRY,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
ACTIVE_CLIENT_SESSIONS = Gauge(
|
|
66
|
+
"minder_active_client_sessions",
|
|
67
|
+
"Number of active MCP client sessions tracked in the cache.",
|
|
68
|
+
registry=REGISTRY,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# HTTP metrics
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
HTTP_REQUESTS_TOTAL = Counter(
|
|
76
|
+
"minder_http_requests_total",
|
|
77
|
+
"Total HTTP requests handled.",
|
|
78
|
+
["method", "path_template", "status"],
|
|
79
|
+
registry=REGISTRY,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
HTTP_REQUEST_DURATION = Histogram(
|
|
83
|
+
"minder_http_request_duration_seconds",
|
|
84
|
+
"HTTP request latency in seconds.",
|
|
85
|
+
["method", "path_template"],
|
|
86
|
+
buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5),
|
|
87
|
+
registry=REGISTRY,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
# Admin-operation metrics
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
ADMIN_OPERATIONS_TOTAL = Counter(
|
|
95
|
+
"minder_admin_operations_total",
|
|
96
|
+
"Total admin API operations.",
|
|
97
|
+
["operation", "outcome"],
|
|
98
|
+
registry=REGISTRY,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# Continuity quality metrics
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
CONTINUITY_PACKETS_TOTAL = Counter(
|
|
106
|
+
"minder_continuity_packets_total",
|
|
107
|
+
"Total continuity packets emitted by continuity-aware surfaces.",
|
|
108
|
+
["source"],
|
|
109
|
+
registry=REGISTRY,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
CONTINUITY_RECALLS_TOTAL = Counter(
|
|
113
|
+
"minder_continuity_recalls_total",
|
|
114
|
+
"Total continuity recall operations grouped by synthesis provider.",
|
|
115
|
+
["provider"],
|
|
116
|
+
registry=REGISTRY,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
CONTINUITY_STEP_COMPATIBILITY = Histogram(
|
|
120
|
+
"minder_continuity_step_compatibility",
|
|
121
|
+
"Observed workflow-step compatibility scores for continuity-aware retrieval.",
|
|
122
|
+
buckets=(0.0, 0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5),
|
|
123
|
+
registry=REGISTRY,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
CONTINUITY_SKILL_QUALITY = Histogram(
|
|
127
|
+
"minder_continuity_skill_quality",
|
|
128
|
+
"Observed quality scores for workflow-aware skill retrieval.",
|
|
129
|
+
buckets=(0.0, 0.1, 0.25, 0.5, 0.75, 1.0),
|
|
130
|
+
registry=REGISTRY,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
CONTINUITY_QUERY_PROMPTS_TOTAL = Counter(
|
|
134
|
+
"minder_continuity_query_prompts_total",
|
|
135
|
+
"Total query prompt renders grouped by prompt source.",
|
|
136
|
+
["source"],
|
|
137
|
+
registry=REGISTRY,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
CONTINUITY_CORRECTION_RETRIES_TOTAL = Counter(
|
|
141
|
+
"minder_continuity_correction_retries_total",
|
|
142
|
+
"Total corrective retries triggered by continuity/workflow contract failures.",
|
|
143
|
+
["failure_kind"],
|
|
144
|
+
registry=REGISTRY,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
CONTINUITY_GATES_TOTAL = Counter(
|
|
148
|
+
"minder_continuity_gates_total",
|
|
149
|
+
"Total continuity gate evaluations grouped by outcome.",
|
|
150
|
+
["outcome"],
|
|
151
|
+
registry=REGISTRY,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
# Helpers
|
|
156
|
+
# ---------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def record_tool_call(
|
|
160
|
+
tool_name: str,
|
|
161
|
+
outcome: str,
|
|
162
|
+
duration_seconds: float,
|
|
163
|
+
client_id: str = "unknown", # kept for API compat; stored in audit DB, not Prometheus label
|
|
164
|
+
) -> None:
|
|
165
|
+
"""Record a tool invocation outcome and latency."""
|
|
166
|
+
TOOL_CALLS_TOTAL.labels(tool_name=tool_name, outcome=outcome).inc()
|
|
167
|
+
TOOL_CALL_DURATION.labels(tool_name=tool_name).observe(duration_seconds)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def record_auth_event(
|
|
171
|
+
event_type: str,
|
|
172
|
+
outcome: str,
|
|
173
|
+
client_id: str = "unknown",
|
|
174
|
+
store: "IOperationalStore | None" = None,
|
|
175
|
+
) -> None:
|
|
176
|
+
"""Record an auth/session lifecycle event.
|
|
177
|
+
|
|
178
|
+
Increments the Prometheus counter (synchronous) then writes an audit log
|
|
179
|
+
entry to the store (async, best-effort — failures are swallowed).
|
|
180
|
+
"""
|
|
181
|
+
AUTH_EVENTS_TOTAL.labels(event_type=event_type, outcome=outcome).inc()
|
|
182
|
+
|
|
183
|
+
if store is not None:
|
|
184
|
+
try:
|
|
185
|
+
await store.create_audit_log(
|
|
186
|
+
actor_type="auth",
|
|
187
|
+
actor_id=client_id,
|
|
188
|
+
event_type=event_type,
|
|
189
|
+
resource_type="session",
|
|
190
|
+
resource_id=client_id,
|
|
191
|
+
outcome=outcome,
|
|
192
|
+
audit_metadata={"client_id": client_id},
|
|
193
|
+
)
|
|
194
|
+
except Exception: # noqa: BLE001
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def record_http_request(
|
|
199
|
+
method: str,
|
|
200
|
+
path_template: str,
|
|
201
|
+
status: int,
|
|
202
|
+
duration_seconds: float,
|
|
203
|
+
) -> None:
|
|
204
|
+
"""Record a completed HTTP request."""
|
|
205
|
+
HTTP_REQUESTS_TOTAL.labels(
|
|
206
|
+
method=method, path_template=path_template, status=str(status)
|
|
207
|
+
).inc()
|
|
208
|
+
HTTP_REQUEST_DURATION.labels(method=method, path_template=path_template).observe(
|
|
209
|
+
duration_seconds
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def record_continuity_packet(source: str) -> None:
|
|
214
|
+
CONTINUITY_PACKETS_TOTAL.labels(source=source or "unknown").inc()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def record_continuity_recall(*, provider: str, step_compatibility: float) -> None:
|
|
218
|
+
CONTINUITY_RECALLS_TOTAL.labels(provider=provider or "unknown").inc()
|
|
219
|
+
CONTINUITY_STEP_COMPATIBILITY.observe(step_compatibility)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def record_continuity_skill_recall(
|
|
223
|
+
*, step_compatibility: float, quality_score: float
|
|
224
|
+
) -> None:
|
|
225
|
+
CONTINUITY_STEP_COMPATIBILITY.observe(step_compatibility)
|
|
226
|
+
CONTINUITY_SKILL_QUALITY.observe(max(quality_score, 0.0))
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def record_query_prompt_render(source: str, *, correction_retries: int = 0) -> None:
|
|
230
|
+
CONTINUITY_QUERY_PROMPTS_TOTAL.labels(source=source or "unknown").inc()
|
|
231
|
+
if correction_retries > 0:
|
|
232
|
+
CONTINUITY_CORRECTION_RETRIES_TOTAL.labels(
|
|
233
|
+
failure_kind="workflow_contract"
|
|
234
|
+
).inc(correction_retries)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def record_continuity_gate(outcome: str) -> None:
|
|
238
|
+
CONTINUITY_GATES_TOTAL.labels(outcome=outcome or "unknown").inc()
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
async def record_admin_operation(
|
|
242
|
+
operation: str,
|
|
243
|
+
outcome: str,
|
|
244
|
+
actor_id: str = "unknown",
|
|
245
|
+
store: IOperationalStore | None = None,
|
|
246
|
+
) -> None:
|
|
247
|
+
"""Record an admin API operation (outcome: 'success' | 'error')."""
|
|
248
|
+
ADMIN_OPERATIONS_TOTAL.labels(operation=operation, outcome=outcome).inc()
|
|
249
|
+
|
|
250
|
+
if store is not None:
|
|
251
|
+
try:
|
|
252
|
+
await store.create_audit_log(
|
|
253
|
+
actor_type="admin",
|
|
254
|
+
actor_id=actor_id,
|
|
255
|
+
event_type="admin_op",
|
|
256
|
+
resource_type="admin_api",
|
|
257
|
+
resource_id=operation,
|
|
258
|
+
outcome=outcome,
|
|
259
|
+
audit_metadata={"operation": operation},
|
|
260
|
+
)
|
|
261
|
+
except Exception:
|
|
262
|
+
pass
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# ---------------------------------------------------------------------------
|
|
266
|
+
# Starlette endpoint
|
|
267
|
+
# ---------------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
async def metrics_endpoint(request: "Request") -> "Response": # noqa: ARG001
|
|
271
|
+
"""ASGI route handler that returns Prometheus text format metrics."""
|
|
272
|
+
from starlette.responses import Response as StarletteResponse
|
|
273
|
+
|
|
274
|
+
output = generate_latest(REGISTRY)
|
|
275
|
+
return StarletteResponse(
|
|
276
|
+
content=output,
|
|
277
|
+
media_type=CONTENT_TYPE_LATEST,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def get_registry_snapshot() -> dict[str, Any]:
|
|
282
|
+
"""Return a lightweight dict snapshot of registered metric names (for tests)."""
|
|
283
|
+
return {
|
|
284
|
+
metric.describe()[0].name: metric.describe()[0].type # type: ignore[union-attr]
|
|
285
|
+
for metric in REGISTRY._names_to_collectors.values() # noqa: SLF001
|
|
286
|
+
if hasattr(metric, "describe")
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _counter_total(
|
|
291
|
+
counter: Counter, filter_label: str | None = None, filter_value: str | None = None
|
|
292
|
+
) -> float:
|
|
293
|
+
"""Sum all label-value combinations of a Counter, optionally filtering."""
|
|
294
|
+
total = 0.0
|
|
295
|
+
label_names = counter._labelnames # noqa: SLF001
|
|
296
|
+
filter_idx = (
|
|
297
|
+
label_names.index(filter_label) if filter_label in label_names else None
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
for label_tuple, child in counter._metrics.items(): # noqa: SLF001
|
|
301
|
+
if filter_idx is not None and filter_value:
|
|
302
|
+
if label_tuple[filter_idx] != filter_value:
|
|
303
|
+
continue
|
|
304
|
+
total += cast(Any, child)._value.get() # noqa: SLF001
|
|
305
|
+
return total
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _counter_by_label(
|
|
309
|
+
counter: Counter,
|
|
310
|
+
label_name: str,
|
|
311
|
+
filter_label: str | None = None,
|
|
312
|
+
filter_value: str | None = None,
|
|
313
|
+
) -> dict[str, float]:
|
|
314
|
+
"""Aggregate a Counter by a single label, optionally filtering."""
|
|
315
|
+
label_names: tuple[str, ...] = counter._labelnames # noqa: SLF001
|
|
316
|
+
if label_name not in label_names:
|
|
317
|
+
return {}
|
|
318
|
+
|
|
319
|
+
idx = label_names.index(label_name)
|
|
320
|
+
filter_idx = (
|
|
321
|
+
label_names.index(filter_label) if filter_label in label_names else None
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
result: dict[str, float] = {}
|
|
325
|
+
for label_tuple, child in counter._metrics.items(): # noqa: SLF001
|
|
326
|
+
if filter_idx is not None and filter_value:
|
|
327
|
+
if label_tuple[filter_idx] != filter_value:
|
|
328
|
+
continue
|
|
329
|
+
key = label_tuple[idx]
|
|
330
|
+
result[key] = (
|
|
331
|
+
result.get(key, 0.0) + cast(Any, child)._value.get()
|
|
332
|
+
) # noqa: SLF001
|
|
333
|
+
return result
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _histogram_average(histogram: Histogram) -> float:
|
|
337
|
+
total = 0.0
|
|
338
|
+
count = 0.0
|
|
339
|
+
for metric in histogram.collect():
|
|
340
|
+
for sample in metric.samples:
|
|
341
|
+
if sample.name.endswith("_sum"):
|
|
342
|
+
total = float(sample.value)
|
|
343
|
+
elif sample.name.endswith("_count"):
|
|
344
|
+
count = float(sample.value)
|
|
345
|
+
if count <= 0:
|
|
346
|
+
return 0.0
|
|
347
|
+
return round(total / count, 4)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
async def get_metrics_summary(
|
|
351
|
+
store: IOperationalStore,
|
|
352
|
+
active_sessions: int | None = None,
|
|
353
|
+
client_id: str | None = None,
|
|
354
|
+
event_type: str | None = None,
|
|
355
|
+
outcome: str | None = None,
|
|
356
|
+
) -> dict[str, Any]:
|
|
357
|
+
"""Return a combined summary of persistent audit logs and runtime metrics.
|
|
358
|
+
|
|
359
|
+
Prioritises the operational store for persistent events (tool calls, auth, admin ops)
|
|
360
|
+
while falling back to Prometheus for ephemeral runtime stats (active sessions, HTTP).
|
|
361
|
+
"""
|
|
362
|
+
import logging
|
|
363
|
+
|
|
364
|
+
logger = logging.getLogger("minder.metrics")
|
|
365
|
+
|
|
366
|
+
# Metrics from Store (Persistent)
|
|
367
|
+
# 1. Tool Calls
|
|
368
|
+
tool_by_outcome = await store.get_audit_summary(
|
|
369
|
+
actor_id=client_id, event_type="tool_call", outcome=outcome, group_by="outcome"
|
|
370
|
+
)
|
|
371
|
+
tool_by_client = await store.get_audit_summary(
|
|
372
|
+
event_type="tool_call",
|
|
373
|
+
outcome=outcome,
|
|
374
|
+
group_by="audit_metadata.client_id", # This depends on Mongo/SQL support for nested fields
|
|
375
|
+
)
|
|
376
|
+
tool_by_name = await store.get_audit_summary(
|
|
377
|
+
actor_id=client_id,
|
|
378
|
+
event_type="tool_call",
|
|
379
|
+
outcome=outcome,
|
|
380
|
+
group_by="tool_name",
|
|
381
|
+
)
|
|
382
|
+
tool_total = sum(tool_by_outcome.values())
|
|
383
|
+
|
|
384
|
+
# 2. Auth Events (we combine tool_calls and auth_events for a "unified" view if needed)
|
|
385
|
+
auth_by_type = await store.get_audit_summary(
|
|
386
|
+
actor_id=client_id, outcome=outcome, group_by="event_type"
|
|
387
|
+
)
|
|
388
|
+
auth_total = sum(auth_by_type.values())
|
|
389
|
+
|
|
390
|
+
# 3. Admin Ops
|
|
391
|
+
admin_by_outcome = await store.get_audit_summary(
|
|
392
|
+
event_type="admin_op", outcome=outcome, group_by="outcome"
|
|
393
|
+
)
|
|
394
|
+
admin_total = sum(admin_by_outcome.values())
|
|
395
|
+
|
|
396
|
+
# Runtime stats from Prometheus (Fallback/Ephemeral)
|
|
397
|
+
effective_sessions = (
|
|
398
|
+
active_sessions
|
|
399
|
+
if active_sessions is not None
|
|
400
|
+
else ACTIVE_CLIENT_SESSIONS._value.get()
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
logger.info(
|
|
404
|
+
"Serving persistent metrics summary: sessions=%s, tool_calls=%s",
|
|
405
|
+
effective_sessions,
|
|
406
|
+
tool_total,
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
return {
|
|
410
|
+
"active_client_sessions": effective_sessions,
|
|
411
|
+
"tool_calls": {
|
|
412
|
+
"total": tool_total,
|
|
413
|
+
"by_outcome": tool_by_outcome,
|
|
414
|
+
"by_client": tool_by_client,
|
|
415
|
+
"by_name": tool_by_name,
|
|
416
|
+
},
|
|
417
|
+
"auth_events": {
|
|
418
|
+
"total": auth_total,
|
|
419
|
+
"by_type": auth_by_type,
|
|
420
|
+
},
|
|
421
|
+
"http_requests": {
|
|
422
|
+
"total": _counter_total(HTTP_REQUESTS_TOTAL),
|
|
423
|
+
"by_status": _counter_by_label(HTTP_REQUESTS_TOTAL, "status"),
|
|
424
|
+
},
|
|
425
|
+
"admin_operations": {
|
|
426
|
+
"total": admin_total,
|
|
427
|
+
"by_outcome": admin_by_outcome,
|
|
428
|
+
},
|
|
429
|
+
"continuity_quality": {
|
|
430
|
+
"packets_emitted_total": _counter_total(CONTINUITY_PACKETS_TOTAL),
|
|
431
|
+
"packets_by_source": _counter_by_label(CONTINUITY_PACKETS_TOTAL, "source"),
|
|
432
|
+
"recalls_total": _counter_total(CONTINUITY_RECALLS_TOTAL),
|
|
433
|
+
"recalls_by_provider": _counter_by_label(
|
|
434
|
+
CONTINUITY_RECALLS_TOTAL, "provider"
|
|
435
|
+
),
|
|
436
|
+
"average_step_compatibility": _histogram_average(
|
|
437
|
+
CONTINUITY_STEP_COMPATIBILITY
|
|
438
|
+
),
|
|
439
|
+
"average_skill_quality": _histogram_average(CONTINUITY_SKILL_QUALITY),
|
|
440
|
+
"query_prompts_by_source": _counter_by_label(
|
|
441
|
+
CONTINUITY_QUERY_PROMPTS_TOTAL, "source"
|
|
442
|
+
),
|
|
443
|
+
"correction_retries_total": _counter_total(
|
|
444
|
+
CONTINUITY_CORRECTION_RETRIES_TOTAL
|
|
445
|
+
),
|
|
446
|
+
"gates_by_outcome": _counter_by_label(CONTINUITY_GATES_TOTAL, "outcome"),
|
|
447
|
+
},
|
|
448
|
+
}
|