monkeybrain-introspection 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ """Lemon — Observability Layer for AgentOS.
2
+
3
+ Complete end-to-end visibility into every aspect of the runtime.
4
+
5
+ Provides:
6
+ - Distributed tracing (Trace, Span)
7
+ - Metrics collection (counters, gauges, histograms)
8
+ - Structured logging (JSON, trace-enriched)
9
+ - Health monitoring (healthy, degraded, unhealthy)
10
+ - Alerting (rules, firing, resolution)
11
+
12
+ Lemon observes everything.
13
+ Lemon changes nothing.
14
+ """
15
+
16
+ from introspection.lemon import Lemon
17
+ from introspection.tracing import Tracer, Trace, Span
18
+ from introspection.metrics import MetricsCollector, Metric
19
+ from introspection.structured_logging import StructuredLogger, LogEntry
20
+ from introspection.health import HealthMonitor, HealthCheck
21
+ from introspection.alerting import AlertManager, Alert, AlertRule, AlertSeverity
22
+
23
+ __all__ = [
24
+ "Lemon",
25
+ "Tracer",
26
+ "Trace",
27
+ "Span",
28
+ "MetricsCollector",
29
+ "Metric",
30
+ "StructuredLogger",
31
+ "LogEntry",
32
+ "HealthMonitor",
33
+ "HealthCheck",
34
+ "AlertManager",
35
+ "Alert",
36
+ "AlertRule",
37
+ "AlertSeverity",
38
+ ]
@@ -0,0 +1,142 @@
1
+ """Alerting — event-driven alert system.
2
+
3
+ Generate alerts for:
4
+ - Failures
5
+ - Latency thresholds
6
+ - Memory pressure
7
+ - Queue saturation
8
+ - Capability failures
9
+ - Provider failures
10
+ - Database connectivity
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timezone
17
+ from enum import Enum
18
+ from typing import Any, Callable
19
+ from uuid import uuid4
20
+
21
+
22
+ class AlertSeverity(str, Enum):
23
+ INFO = "info"
24
+ WARNING = "warning"
25
+ CRITICAL = "critical"
26
+
27
+
28
+ class AlertStatus(str, Enum):
29
+ FIRING = "firing"
30
+ RESOLVED = "resolved"
31
+ SILENCED = "silenced"
32
+
33
+
34
+ @dataclass
35
+ class Alert:
36
+ """An alert instance."""
37
+
38
+ alert_id: str = field(default_factory=lambda: f"alert-{uuid4().hex[:8]}")
39
+ name: str = ""
40
+ severity: AlertSeverity = AlertSeverity.WARNING
41
+ status: AlertStatus = AlertStatus.FIRING
42
+ message: str = ""
43
+ source: str = ""
44
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
45
+ metadata: dict[str, Any] = field(default_factory=dict)
46
+
47
+ def to_dict(self) -> dict[str, Any]:
48
+ return {
49
+ "alert_id": self.alert_id,
50
+ "name": self.name,
51
+ "severity": self.severity.value,
52
+ "status": self.status.value,
53
+ "message": self.message,
54
+ "source": self.source,
55
+ "timestamp": self.timestamp,
56
+ }
57
+
58
+
59
+ class AlertRule:
60
+ """A rule that triggers alerts."""
61
+
62
+ def __init__(
63
+ self,
64
+ name: str,
65
+ condition: Callable[[dict], bool],
66
+ severity: AlertSeverity = AlertSeverity.WARNING,
67
+ message_template: str = "",
68
+ ):
69
+ self.name = name
70
+ self.condition = condition
71
+ self.severity = severity
72
+ self.message_template = message_template
73
+
74
+ def evaluate(self, context: dict) -> Alert | None:
75
+ try:
76
+ if self.condition(context):
77
+ return Alert(
78
+ name=self.name,
79
+ severity=self.severity,
80
+ message=self.message_template.format(**context),
81
+ )
82
+ except Exception:
83
+ pass
84
+ return None
85
+
86
+
87
+ class AlertManager:
88
+ """Alert management system.
89
+
90
+ Responsibilities:
91
+ - Evaluate alert rules
92
+ - Fire alerts
93
+ - Track alert history
94
+ - Support alert suppression
95
+ """
96
+
97
+ def __init__(self):
98
+ self._rules: list[AlertRule] = []
99
+ self._alerts: list[Alert] = []
100
+ self._active: dict[str, Alert] = {}
101
+
102
+ def add_rule(self, rule: AlertRule) -> None:
103
+ self._rules.append(rule)
104
+
105
+ def evaluate(self, context: dict) -> list[Alert]:
106
+ """Evaluate all rules against context."""
107
+ new_alerts = []
108
+ for rule in self._rules:
109
+ alert = rule.evaluate(context)
110
+ if alert:
111
+ self._alerts.append(alert)
112
+ self._active[alert.alert_id] = alert
113
+ new_alerts.append(alert)
114
+ return new_alerts
115
+
116
+ def fire(self, name: str, message: str, severity: AlertSeverity = AlertSeverity.WARNING, **metadata: Any) -> Alert:
117
+ """Manually fire an alert."""
118
+ alert = Alert(name=name, severity=severity, message=message, metadata=metadata)
119
+ self._alerts.append(alert)
120
+ self._active[alert.alert_id] = alert
121
+ return alert
122
+
123
+ def resolve(self, alert_id: str) -> bool:
124
+ """Resolve an active alert."""
125
+ if alert_id in self._active:
126
+ self._active[alert_id].status = AlertStatus.RESOLVED
127
+ del self._active[alert_id]
128
+ return True
129
+ return False
130
+
131
+ def get_active(self) -> list[Alert]:
132
+ return list(self._active.values())
133
+
134
+ def get_history(self, limit: int = 50) -> list[Alert]:
135
+ return self._alerts[-limit:]
136
+
137
+ def summary(self) -> dict:
138
+ return {
139
+ "rules": len(self._rules),
140
+ "active_alerts": len(self._active),
141
+ "total_alerts": len(self._alerts),
142
+ }
@@ -0,0 +1,101 @@
1
+ """Health — health monitoring for all subsystems.
2
+
3
+ Every subsystem reports health:
4
+ - Healthy
5
+ - Degraded
6
+ - Unavailable
7
+ - Unknown
8
+
9
+ Health checks include dependencies.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime, timezone
16
+ from typing import Any
17
+ from uuid import uuid4
18
+
19
+
20
+ @dataclass
21
+ class HealthCheck:
22
+ """A single health check result."""
23
+
24
+ check_id: str = field(default_factory=lambda: f"health-{uuid4().hex[:8]}")
25
+ name: str = ""
26
+ status: str = "healthy" # healthy | degraded | unhealthy | unknown
27
+ latency_ms: float = 0.0
28
+ message: str = ""
29
+ dependencies: list[dict[str, Any]] = field(default_factory=list)
30
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
31
+ metadata: dict[str, Any] = field(default_factory=dict)
32
+
33
+ def to_dict(self) -> dict[str, Any]:
34
+ return {
35
+ "check_id": self.check_id,
36
+ "name": self.name,
37
+ "status": self.status,
38
+ "latency_ms": round(self.latency_ms, 2),
39
+ "message": self.message,
40
+ "dependencies": self.dependencies,
41
+ "timestamp": self.timestamp,
42
+ }
43
+
44
+
45
+ class HealthMonitor:
46
+ """Health monitoring manager.
47
+
48
+ Responsibilities:
49
+ - Run health checks
50
+ - Track health history
51
+ - Aggregate health status
52
+ """
53
+
54
+ def __init__(self):
55
+ self._checks: dict[str, HealthCheck] = {}
56
+ self._history: list[HealthCheck] = []
57
+
58
+ def check(self, name: str, status: str = "healthy", message: str = "", **metadata: Any) -> HealthCheck:
59
+ """Record a health check."""
60
+ check = HealthCheck(
61
+ name=name,
62
+ status=status,
63
+ message=message,
64
+ metadata=metadata,
65
+ )
66
+ self._checks[name] = check
67
+ self._history.append(check)
68
+ if len(self._history) > 1000:
69
+ self._history = self._history[-500:]
70
+ return check
71
+
72
+ def healthy(self, name: str, **metadata: Any) -> HealthCheck:
73
+ return self.check(name, "healthy", **metadata)
74
+
75
+ def degraded(self, name: str, message: str = "", **metadata: Any) -> HealthCheck:
76
+ return self.check(name, "degraded", message, **metadata)
77
+
78
+ def unhealthy(self, name: str, message: str = "", **metadata: Any) -> HealthCheck:
79
+ return self.check(name, "unhealthy", message, **metadata)
80
+
81
+ def get_health(self, name: str) -> HealthCheck | None:
82
+ return self._checks.get(name)
83
+
84
+ def overall_status(self) -> str:
85
+ statuses = [c.status for c in self._checks.values()]
86
+ if not statuses:
87
+ return "unknown"
88
+ if "unhealthy" in statuses:
89
+ return "unhealthy"
90
+ if "degraded" in statuses:
91
+ return "degraded"
92
+ if all(s == "healthy" for s in statuses):
93
+ return "healthy"
94
+ return "unknown"
95
+
96
+ def summary(self) -> dict:
97
+ return {
98
+ "overall": self.overall_status(),
99
+ "checks": {name: check.status for name, check in self._checks.items()},
100
+ "total_checks": len(self._checks),
101
+ }
introspection/lemon.py ADDED
@@ -0,0 +1,243 @@
1
+ """Lemon — Observability Layer for AgentOS.
2
+
3
+ Persists all metrics to Elasticsearch for querying and dashboards.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ from datetime import datetime, timezone
10
+ from typing import Any
11
+
12
+ from introspection.tracing import Tracer, Trace, Span
13
+ from introspection.metrics import MetricsCollector, Metric
14
+ from introspection.structured_logging import StructuredLogger, LogEntry
15
+ from introspection.health import HealthMonitor, HealthCheck
16
+ from introspection.alerting import AlertManager, Alert, AlertRule, AlertSeverity
17
+
18
+
19
+ class Lemon:
20
+ """Observability manager for AgentOS.
21
+
22
+ Persists all metrics to Elasticsearch for querying.
23
+ """
24
+
25
+ def __init__(self, elasticsearch_url: str = "http://localhost:9200"):
26
+ self.tracer = Tracer()
27
+ self.metrics = MetricsCollector()
28
+ self.logger = StructuredLogger()
29
+ self.health = HealthMonitor()
30
+ self.alerts = AlertManager()
31
+ self._es_url = elasticsearch_url
32
+ self._es_client = None
33
+ self._persist_buffer: list[dict] = []
34
+
35
+ async def connect_elasticsearch(self) -> None:
36
+ """Connect to Elasticsearch for persistence."""
37
+ try:
38
+ from elasticsearch import AsyncElasticsearch
39
+ self._es_client = AsyncElasticsearch([self._es_url])
40
+ await self._es_client.ping()
41
+ except Exception:
42
+ self._es_client = None
43
+
44
+ async def persist_metrics(self) -> dict[str, Any]:
45
+ """Persist all metrics to Elasticsearch."""
46
+ if not self._es_client:
47
+ return {"status": "no_elasticsearch"}
48
+
49
+ export = self.metrics.export()
50
+ timestamp = datetime.now(timezone.utc).isoformat()
51
+
52
+ doc = {
53
+ "timestamp": timestamp,
54
+ "counters": export.get("counters", {}),
55
+ "gauges": export.get("gauges", {}),
56
+ "histograms": export.get("histograms", {}),
57
+ }
58
+
59
+ try:
60
+ await self._es_client.index(index="agentos-metrics", document=doc)
61
+ return {"status": "persisted", "timestamp": timestamp}
62
+ except Exception as e:
63
+ return {"status": "error", "error": str(e)}
64
+
65
+ async def persist_traces(self) -> dict[str, Any]:
66
+ """Persist recent traces to Elasticsearch."""
67
+ if not self._es_client:
68
+ return {"status": "no_elasticsearch"}
69
+
70
+ traces = list(self.tracer._traces.values())[-100:]
71
+
72
+ for trace in traces:
73
+ doc = trace.to_dict()
74
+ try:
75
+ await self._es_client.index(index="agentos-traces", document=doc)
76
+ except Exception:
77
+ pass
78
+
79
+ return {"status": "persisted", "count": len(traces)}
80
+
81
+ async def persist_logs(self, limit: int = 100) -> dict[str, Any]:
82
+ """Persist recent logs to Elasticsearch."""
83
+ if not self._es_client:
84
+ return {"status": "no_elasticsearch"}
85
+
86
+ logs = self.logger.get_entries(limit=limit)
87
+
88
+ for log in logs:
89
+ doc = log.to_dict()
90
+ try:
91
+ await self._es_client.index(index="agentos-logs", document=doc)
92
+ except Exception:
93
+ pass
94
+
95
+ return {"status": "persisted", "count": len(logs)}
96
+
97
+ async def persist_all(self) -> dict[str, Any]:
98
+ """Persist all observability data to Elasticsearch."""
99
+ results = {}
100
+ results["metrics"] = await self.persist_metrics()
101
+ results["traces"] = await self.persist_traces()
102
+ results["logs"] = await self.persist_logs()
103
+ return results
104
+
105
+ async def query_metrics(self, metric_name: str | None = None, size: int = 100) -> list[dict]:
106
+ """Query metrics from Elasticsearch."""
107
+ if not self._es_client:
108
+ return []
109
+
110
+ try:
111
+ if metric_name:
112
+ query = {"query": {"term": {"counters." + metric_name: {"exists": True}}}}
113
+ else:
114
+ query = {"query": {"match_all": {}}, "sort": [{"timestamp": "desc"}]}
115
+
116
+ result = await self._es_client.search(index="agentos-metrics", body={"size": size, **query})
117
+ return [hit["_source"] for hit in result["hits"]["hits"]]
118
+ except Exception:
119
+ return []
120
+
121
+ async def query_traces(self, trace_id: str | None = None, size: int = 100) -> list[dict]:
122
+ """Query traces from Elasticsearch."""
123
+ if not self._es_client:
124
+ return []
125
+
126
+ try:
127
+ if trace_id:
128
+ query = {"query": {"term": {"trace_id": trace_id}}}
129
+ else:
130
+ query = {"query": {"match_all": {}}, "sort": [{"timestamp": "desc"}]}
131
+
132
+ result = await self._es_client.search(index="agentos-traces", body={"size": size, **query})
133
+ return [hit["_source"] for hit in result["hits"]["hits"]]
134
+ except Exception:
135
+ return []
136
+
137
+ async def query_logs(self, severity: str | None = None, component: str | None = None, size: int = 100) -> list[dict]:
138
+ """Query logs from Elasticsearch."""
139
+ if not self._es_client:
140
+ return []
141
+
142
+ try:
143
+ must = []
144
+ if severity:
145
+ must.append({"term": {"severity": severity}})
146
+ if component:
147
+ must.append({"term": {"component": component}})
148
+
149
+ query = {"query": {"bool": {"must": must or [{"match_all": {}}]}}}
150
+ result = await self._es_client.search(index="agentos-logs", body={"size": size, "sort": [{"timestamp": "desc"}], **query})
151
+ return [hit["_source"] for hit in result["hits"]["hits"]]
152
+ except Exception:
153
+ return []
154
+
155
+ async def get_dashboard(self) -> dict[str, Any]:
156
+ """Get dashboard data for all metrics."""
157
+ metrics = await self.query_metrics(size=1)
158
+ traces = await self.query_traces(size=10)
159
+ logs = await self.query_logs(size=10)
160
+
161
+ return {
162
+ "latest_metrics": metrics[0] if metrics else {},
163
+ "recent_traces": len(traces),
164
+ "recent_logs": len(logs),
165
+ "health": self.health.summary(),
166
+ "alerts": [a.to_dict() for a in self.alerts.get_active()],
167
+ }
168
+
169
+ # --- Tracing ---
170
+ def start_trace(self, name: str = "", **metadata: Any) -> Trace:
171
+ return self.tracer.start_trace(name, **metadata)
172
+
173
+ def start_span(self, name: str, component: str = "", **attributes: Any) -> Span:
174
+ return self.tracer.start_span(name, component, **attributes)
175
+
176
+ def finish_span(self, status: str = "ok") -> None:
177
+ self.tracer.finish_span(status)
178
+
179
+ def finish_trace(self) -> None:
180
+ self.tracer.finish_trace()
181
+
182
+ # --- Metrics ---
183
+ def record_metric(self, name: str, value: float, unit: str = "", **tags: str) -> Metric:
184
+ return self.metrics.record(name, value, unit, **tags)
185
+
186
+ def counter(self, name: str, increment: int = 1, **tags: str) -> None:
187
+ self.metrics.counter(name, increment, **tags)
188
+
189
+ def gauge(self, name: str, value: float, **tags: str) -> None:
190
+ self.metrics.gauge(name, value, **tags)
191
+
192
+ def histogram(self, name: str, value: float, **tags: str) -> None:
193
+ self.metrics.histogram(name, value, **tags)
194
+
195
+ # --- Logging ---
196
+ def log(self, severity: str, message: str, component: str = "", **kwargs: Any) -> LogEntry:
197
+ trace = self.tracer.get_current_trace()
198
+ if trace:
199
+ kwargs["trace_id"] = trace.trace_id
200
+ return self.logger.log(severity, message, component=component, **kwargs)
201
+
202
+ def info(self, message: str, component: str = "", **kwargs: Any) -> LogEntry:
203
+ return self.logger.info(message, component=component, **kwargs)
204
+
205
+ def warn(self, message: str, component: str = "", **kwargs: Any) -> LogEntry:
206
+ return self.logger.warn(message, component=component, **kwargs)
207
+
208
+ def error(self, message: str, component: str = "", **kwargs: Any) -> LogEntry:
209
+ return self.logger.error(message, component=component, **kwargs)
210
+
211
+ # --- Health ---
212
+ def health_check(self, name: str, status: str = "healthy", **metadata: Any) -> HealthCheck:
213
+ return self.health.check(name, status, **metadata)
214
+
215
+ def overall_health(self) -> str:
216
+ return self.health.overall_status()
217
+
218
+ # --- Alerts ---
219
+ def alert(self, name: str, message: str, severity: AlertSeverity = AlertSeverity.WARNING, **metadata: Any) -> Alert:
220
+ return self.alerts.fire(name, message, severity, **metadata)
221
+
222
+ def add_alert_rule(self, rule: AlertRule) -> None:
223
+ self.alerts.add_rule(rule)
224
+
225
+ # --- Summary ---
226
+ def summary(self) -> dict[str, Any]:
227
+ return {
228
+ "tracing": self.tracer.summary(),
229
+ "metrics": self.metrics.summary(),
230
+ "logging": self.logger.summary(),
231
+ "health": self.health.summary(),
232
+ "alerts": self.alerts.summary(),
233
+ "elasticsearch_connected": self._es_client is not None,
234
+ }
235
+
236
+ def export(self) -> dict[str, Any]:
237
+ """Export all observability data."""
238
+ return {
239
+ "summary": self.summary(),
240
+ "metrics": self.metrics.export(),
241
+ "health": self.health.summary(),
242
+ "active_alerts": [a.to_dict() for a in self.alerts.get_active()],
243
+ }
@@ -0,0 +1,106 @@
1
+ """Metrics — performance and operational metrics collection.
2
+
3
+ Captures execution time, latency, throughput, resource usage,
4
+ and AI-specific metrics across all subsystems.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import time
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime, timezone
12
+ from typing import Any
13
+ from uuid import uuid4
14
+ from collections import defaultdict
15
+
16
+
17
+ @dataclass
18
+ class Metric:
19
+ """A single metric measurement."""
20
+
21
+ metric_id: str = field(default_factory=lambda: f"metric-{uuid4().hex[:8]}")
22
+ name: str = ""
23
+ value: float = 0.0
24
+ unit: str = "" # ms | count | bytes | ratio | tokens
25
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
26
+ tags: dict[str, str] = field(default_factory=dict)
27
+ metadata: dict[str, Any] = field(default_factory=dict)
28
+
29
+
30
+ class MetricsCollector:
31
+ """Collects and aggregates metrics across all subsystems.
32
+
33
+ Responsibilities:
34
+ - Record metric values
35
+ - Aggregate metrics (count, sum, avg, min, max)
36
+ - Track rate metrics (requests/sec)
37
+ - Export metrics for dashboards
38
+ """
39
+
40
+ def __init__(self):
41
+ self._metrics: dict[str, list[Metric]] = defaultdict(list)
42
+ self._counters: dict[str, int] = defaultdict(int)
43
+ self._gauges: dict[str, float] = {}
44
+ self._histograms: dict[str, list[float]] = defaultdict(list)
45
+
46
+ def record(self, name: str, value: float, unit: str = "", **tags: str) -> Metric:
47
+ """Record a metric measurement."""
48
+ metric = Metric(name=name, value=value, unit=unit, tags=tags)
49
+ self._metrics[name].append(metric)
50
+ return metric
51
+
52
+ def counter(self, name: str, increment: int = 1, **tags: str) -> None:
53
+ """Increment a counter."""
54
+ key = f"{name}:{':'.join(f'{k}={v}' for k, v in sorted(tags.items()))}"
55
+ self._counters[key] += increment
56
+
57
+ def gauge(self, name: str, value: float, **tags: str) -> None:
58
+ """Set a gauge value."""
59
+ key = f"{name}:{':'.join(f'{k}={v}' for k, v in sorted(tags.items()))}"
60
+ self._gauges[key] = value
61
+
62
+ def histogram(self, name: str, value: float, **tags: str) -> None:
63
+ """Record a histogram value."""
64
+ key = f"{name}:{':'.join(f'{k}={v}' for k, v in sorted(tags.items()))}"
65
+ self._histograms[key].append(value)
66
+ if len(self._histograms[key]) > 1000:
67
+ self._histograms[key] = self._histograms[key][-500:]
68
+
69
+ def get_counter(self, name: str, **tags: str) -> int:
70
+ key = f"{name}:{':'.join(f'{k}={v}' for k, v in sorted(tags.items()))}"
71
+ return self._counters.get(key, 0)
72
+
73
+ def get_gauge(self, name: str, **tags: str) -> float | None:
74
+ key = f"{name}:{':'.join(f'{k}={v}' for k, v in sorted(tags.items()))}"
75
+ return self._gauges.get(key)
76
+
77
+ def get_histogram_stats(self, name: str, **tags: str) -> dict:
78
+ key = f"{name}:{':'.join(f'{k}={v}' for k, v in sorted(tags.items()))}"
79
+ values = self._histograms.get(key, [])
80
+ if not values:
81
+ return {"count": 0}
82
+ return {
83
+ "count": len(values),
84
+ "min": min(values),
85
+ "max": max(values),
86
+ "avg": sum(values) / len(values),
87
+ "p50": sorted(values)[len(values)//2],
88
+ "p95": sorted(values)[int(len(values)*0.95)] if len(values) > 1 else values[0],
89
+ "p99": sorted(values)[int(len(values)*0.99)] if len(values) > 1 else values[0],
90
+ }
91
+
92
+ def summary(self) -> dict:
93
+ return {
94
+ "counters": len(self._counters),
95
+ "gauges": len(self._gauges),
96
+ "histograms": len(self._histograms),
97
+ "total_metrics": sum(len(v) for v in self._metrics.values()),
98
+ }
99
+
100
+ def export(self) -> dict[str, Any]:
101
+ """Export all metrics for dashboards."""
102
+ return {
103
+ "counters": dict(self._counters),
104
+ "gauges": dict(self._gauges),
105
+ "histograms": {k: self.get_histogram_stats(k.split(":")[0]) for k in self._histograms},
106
+ }
@@ -0,0 +1,147 @@
1
+ """Logging — structured logging for all subsystems.
2
+
3
+ Every log entry includes:
4
+ - Timestamp
5
+ - Trace ID
6
+ - Span ID
7
+ - Workflow ID
8
+ - Pipeline ID
9
+ - Capability ID
10
+ - User ID
11
+ - Severity
12
+ - Component
13
+ - Duration
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import logging
20
+ from dataclasses import dataclass, field
21
+ from datetime import datetime, timezone
22
+ from typing import Any
23
+ from uuid import uuid4
24
+
25
+
26
+ @dataclass
27
+ class LogEntry:
28
+ """A structured log entry."""
29
+
30
+ log_id: str = field(default_factory=lambda: f"log-{uuid4().hex[:8]}")
31
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
32
+ severity: str = "info" # debug | info | warn | error | fatal
33
+ component: str = ""
34
+ message: str = ""
35
+ trace_id: str | None = None
36
+ span_id: str | None = None
37
+ workflow_id: str | None = None
38
+ pipeline_id: str | None = None
39
+ capability_id: str | None = None
40
+ user_id: str | None = None
41
+ duration_ms: float | None = None
42
+ metadata: dict[str, Any] = field(default_factory=dict)
43
+
44
+ def to_dict(self) -> dict[str, Any]:
45
+ d = {
46
+ "log_id": self.log_id,
47
+ "timestamp": self.timestamp,
48
+ "severity": self.severity,
49
+ "component": self.component,
50
+ "message": self.message,
51
+ }
52
+ if self.trace_id: d["trace_id"] = self.trace_id
53
+ if self.span_id: d["span_id"] = self.span_id
54
+ if self.pipeline_id: d["pipeline_id"] = self.pipeline_id
55
+ if self.capability_id: d["capability_id"] = self.capability_id
56
+ if self.user_id: d["user_id"] = self.user_id
57
+ if self.duration_ms is not None: d["duration_ms"] = self.duration_ms
58
+ if self.metadata: d["metadata"] = self.metadata
59
+ return d
60
+
61
+ def to_json(self) -> str:
62
+ return json.dumps(self.to_dict(), default=str)
63
+
64
+
65
+ class StructuredLogger:
66
+ """Structured logging manager.
67
+
68
+ Responsibilities:
69
+ - Create structured log entries
70
+ - Enrich logs with trace context
71
+ - Filter by severity
72
+ - Export logs
73
+ """
74
+
75
+ def __init__(self, min_level: str = "info"):
76
+ self._min_level = min_level
77
+ self._entries: list[LogEntry] = []
78
+ self._levels = {"debug": 0, "info": 1, "warn": 2, "error": 3, "fatal": 4}
79
+
80
+ def _should_log(self, level: str) -> bool:
81
+ return self._levels.get(level, 0) >= self._levels.get(self._min_level, 0)
82
+
83
+ def log(
84
+ self,
85
+ severity: str,
86
+ message: str,
87
+ component: str = "",
88
+ trace_id: str | None = None,
89
+ span_id: str | None = None,
90
+ pipeline_id: str | None = None,
91
+ capability_id: str | None = None,
92
+ duration_ms: float | None = None,
93
+ **metadata: Any,
94
+ ) -> LogEntry:
95
+ if not self._should_log(severity):
96
+ return None
97
+
98
+ entry = LogEntry(
99
+ severity=severity,
100
+ component=component,
101
+ message=message,
102
+ trace_id=trace_id,
103
+ span_id=span_id,
104
+ pipeline_id=pipeline_id,
105
+ capability_id=capability_id,
106
+ duration_ms=duration_ms,
107
+ metadata=metadata,
108
+ )
109
+ self._entries.append(entry)
110
+ return entry
111
+
112
+ def debug(self, message: str, **kwargs: Any) -> LogEntry:
113
+ return self.log("debug", message, **kwargs)
114
+
115
+ def info(self, message: str, **kwargs: Any) -> LogEntry:
116
+ return self.log("info", message, **kwargs)
117
+
118
+ def warn(self, message: str, **kwargs: Any) -> LogEntry:
119
+ return self.log("warn", message, **kwargs)
120
+
121
+ def error(self, message: str, **kwargs: Any) -> LogEntry:
122
+ return self.log("error", message, **kwargs)
123
+
124
+ def fatal(self, message: str, **kwargs: Any) -> LogEntry:
125
+ return self.log("fatal", message, **kwargs)
126
+
127
+ def get_entries(
128
+ self,
129
+ severity: str | None = None,
130
+ component: str | None = None,
131
+ trace_id: str | None = None,
132
+ limit: int = 100,
133
+ ) -> list[LogEntry]:
134
+ entries = self._entries
135
+ if severity:
136
+ entries = [e for e in entries if e.severity == severity]
137
+ if component:
138
+ entries = [e for e in entries if e.component == component]
139
+ if trace_id:
140
+ entries = [e for e in entries if e.trace_id == trace_id]
141
+ return entries[-limit:]
142
+
143
+ def summary(self) -> dict:
144
+ counts = {}
145
+ for e in self._entries:
146
+ counts[e.severity] = counts.get(e.severity, 0) + 1
147
+ return {"total": len(self._entries), "by_severity": counts}
@@ -0,0 +1,162 @@
1
+ """Tracing — distributed trace management.
2
+
3
+ Every request receives a globally unique Trace ID.
4
+ Every operation inherits the same Trace ID.
5
+ Supports nested spans for hierarchical tracing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import time
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timezone
13
+ from typing import Any
14
+ from uuid import uuid4
15
+
16
+
17
+ @dataclass
18
+ class Span:
19
+ """A single span in a trace."""
20
+
21
+ span_id: str = field(default_factory=lambda: f"span-{uuid4().hex[:12]}")
22
+ trace_id: str = ""
23
+ parent_span_id: str | None = None
24
+ name: str = ""
25
+ component: str = ""
26
+ start_time: float = field(default_factory=time.monotonic)
27
+ end_time: float | None = None
28
+ status: str = "ok" # ok | error | cancelled
29
+ attributes: dict[str, Any] = field(default_factory=dict)
30
+ events: list[dict[str, Any]] = field(default_factory=list)
31
+
32
+ def finish(self, status: str = "ok") -> None:
33
+ self.end_time = time.monotonic()
34
+ self.status = status
35
+
36
+ @property
37
+ def duration_ms(self) -> float:
38
+ if self.end_time is None:
39
+ return (time.monotonic() - self.start_time) * 1000
40
+ return (self.end_time - self.start_time) * 1000
41
+
42
+ def to_dict(self) -> dict[str, Any]:
43
+ return {
44
+ "span_id": self.span_id,
45
+ "trace_id": self.trace_id,
46
+ "parent_span_id": self.parent_span_id,
47
+ "name": self.name,
48
+ "component": self.component,
49
+ "duration_ms": round(self.duration_ms, 2),
50
+ "status": self.status,
51
+ "attributes": self.attributes,
52
+ "events": self.events,
53
+ }
54
+
55
+
56
+ @dataclass
57
+ class Trace:
58
+ """A complete trace of a request."""
59
+
60
+ trace_id: str = field(default_factory=lambda: f"trace-{uuid4().hex[:12]}")
61
+ name: str = ""
62
+ start_time: float = field(default_factory=time.monotonic)
63
+ end_time: float | None = None
64
+ spans: list[Span] = field(default_factory=list)
65
+ metadata: dict[str, Any] = field(default_factory=dict)
66
+
67
+ def finish(self) -> None:
68
+ self.end_time = time.monotonic()
69
+
70
+ @property
71
+ def duration_ms(self) -> float:
72
+ if self.end_time is None:
73
+ return (time.monotonic() - self.start_time) * 1000
74
+ return (self.end_time - self.start_time) * 1000
75
+
76
+ def add_span(self, name: str, component: str = "", parent_span_id: str | None = None) -> Span:
77
+ span = Span(
78
+ trace_id=self.trace_id,
79
+ parent_span_id=parent_span_id,
80
+ name=name,
81
+ component=component,
82
+ )
83
+ self.spans.append(span)
84
+ return span
85
+
86
+ def to_dict(self) -> dict[str, Any]:
87
+ return {
88
+ "trace_id": self.trace_id,
89
+ "name": self.name,
90
+ "duration_ms": round(self.duration_ms, 2),
91
+ "span_count": len(self.spans),
92
+ "spans": [s.to_dict() for s in self.spans],
93
+ "metadata": self.metadata,
94
+ }
95
+
96
+
97
+ class Tracer:
98
+ """Distributed tracing manager.
99
+
100
+ Responsibilities:
101
+ - Create and manage traces
102
+ - Create and manage spans
103
+ - Track nested span relationships
104
+ """
105
+
106
+ def __init__(self):
107
+ self._traces: dict[str, Trace] = {}
108
+ self._current_trace: Trace | None = None
109
+ self._current_span: Span | None = None
110
+
111
+ def start_trace(self, name: str = "", **metadata: Any) -> Trace:
112
+ """Start a new trace."""
113
+ trace = Trace(name=name, metadata=metadata)
114
+ self._traces[trace.trace_id] = trace
115
+ self._current_trace = trace
116
+ return trace
117
+
118
+ def start_span(self, name: str, component: str = "", **attributes: Any) -> Span:
119
+ """Start a new span within the current trace."""
120
+ if self._current_trace is None:
121
+ self.start_trace()
122
+
123
+ parent_id = self._current_span.span_id if self._current_span else None
124
+ span = self._current_trace.add_span(
125
+ name=name,
126
+ component=component,
127
+ parent_span_id=parent_id,
128
+ )
129
+ span.attributes.update(attributes)
130
+ self._current_span = span
131
+ return span
132
+
133
+ def finish_span(self, status: str = "ok") -> None:
134
+ """Finish the current span."""
135
+ if self._current_span:
136
+ self._current_span.finish(status)
137
+ # Move to parent span
138
+ if self._current_trace:
139
+ for s in reversed(self._current_trace.spans):
140
+ if s.span_id == self._current_span.parent_span_id:
141
+ self._current_span = s
142
+ return
143
+ self._current_span = None
144
+
145
+ def finish_trace(self) -> None:
146
+ """Finish the current trace."""
147
+ if self._current_trace:
148
+ self._current_trace.finish()
149
+ self._current_trace = None
150
+ self._current_span = None
151
+
152
+ def get_trace(self, trace_id: str) -> Trace | None:
153
+ return self._traces.get(trace_id)
154
+
155
+ def get_current_trace(self) -> Trace | None:
156
+ return self._current_trace
157
+
158
+ def summary(self) -> dict:
159
+ return {
160
+ "total_traces": len(self._traces),
161
+ "current_trace": self._current_trace.trace_id if self._current_trace else None,
162
+ }
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.4
2
+ Name: monkeybrain-introspection
3
+ Version: 1.0.0
4
+ Summary: MonkeyBrain Introspection — Lemon observability layer for the Cognitive Operating System
5
+ Author: Prashun Javeri
6
+ License: Proprietary
7
+ Keywords: observability,tracing,metrics,logging,health,alerting
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
15
+ Classifier: Topic :: System :: Monitoring
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Requires-Python: >=3.11
18
+ Description-Content-Type: text/markdown
19
+
20
+ # MonkeyBrain Introspection
21
+
22
+ Lemon observability layer for the MonkeyBrain Cognitive Operating System.
23
+
24
+ ## Features
25
+
26
+ - **Lemon** — unified observability facade
27
+ - **Tracer** — distributed tracing with traces and spans
28
+ - **MetricsCollector** — counters, gauges, and histograms
29
+ - **StructuredLogger** — JSON structured logging with trace enrichment
30
+ - **HealthMonitor** — health checks (healthy, degraded, unhealthy)
31
+ - **AlertManager** — alert rules, firing, and resolution
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install monkeybrain-introspection
37
+ ```
38
+
39
+ ## Quick Start
40
+
41
+ ```python
42
+ from introspection import Lemon
43
+
44
+ lemon = Lemon()
45
+ lemon.start_trace("my-operation")
46
+ # ... do work ...
47
+ lemon.finish_trace()
48
+ ```
49
+
50
+ ## License
51
+
52
+ Proprietary
@@ -0,0 +1,11 @@
1
+ introspection/__init__.py,sha256=D8jRsv2OeQ5mAss92_KxAjS0lVITRv1W2YSkAgoR28w,1021
2
+ introspection/alerting.py,sha256=x-eUmT3WaC1RFjS4yH1P98ELZ4ftbk7Lzx5aKwqNw0k,4011
3
+ introspection/health.py,sha256=ngv5IV68JwIkMJYz78E-S3QZyLl6aob0aFsJi_PeMWE,3158
4
+ introspection/lemon.py,sha256=8LPut0ZJtJ5PmTZQIyqeusPSy8jB7cxntztO2R11_o4,9461
5
+ introspection/metrics.py,sha256=Kh2-PmPIbN_ePeeNU9P_KnGaJy_7cnRtMPV94SulMSM,4078
6
+ introspection/structured_logging.py,sha256=IWOjyoMIyc1IBPrRh9V161pPcBlKUM8sejRz4RHgQlw,4566
7
+ introspection/tracing.py,sha256=V3WAJxQP3vxt2FWQybJ5eATsHZPq_DiJf9SK2-caggc,5265
8
+ monkeybrain_introspection-1.0.0.dist-info/METADATA,sha256=jTuJTTtSdVq6c3WyqLjuomumG36ZKx3_5-Ku2r7wC50,1524
9
+ monkeybrain_introspection-1.0.0.dist-info/WHEEL,sha256=YLJXdYXQ2FQ0Uqn2J-6iEIC-3iOey8lH3xCtvFLkd8Q,91
10
+ monkeybrain_introspection-1.0.0.dist-info/top_level.txt,sha256=3RJ-vMpsRDQgpwxiTqt2ialeS0MJKPFWMwcU-JwGycs,14
11
+ monkeybrain_introspection-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (81.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ introspection