genxai-framework 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +3 -0
- cli/commands/__init__.py +6 -0
- cli/commands/approval.py +85 -0
- cli/commands/audit.py +127 -0
- cli/commands/metrics.py +25 -0
- cli/commands/tool.py +389 -0
- cli/main.py +32 -0
- genxai/__init__.py +81 -0
- genxai/api/__init__.py +5 -0
- genxai/api/app.py +21 -0
- genxai/config/__init__.py +5 -0
- genxai/config/settings.py +37 -0
- genxai/connectors/__init__.py +19 -0
- genxai/connectors/base.py +122 -0
- genxai/connectors/kafka.py +92 -0
- genxai/connectors/postgres_cdc.py +95 -0
- genxai/connectors/registry.py +44 -0
- genxai/connectors/sqs.py +94 -0
- genxai/connectors/webhook.py +73 -0
- genxai/core/__init__.py +37 -0
- genxai/core/agent/__init__.py +32 -0
- genxai/core/agent/base.py +206 -0
- genxai/core/agent/config_io.py +59 -0
- genxai/core/agent/registry.py +98 -0
- genxai/core/agent/runtime.py +970 -0
- genxai/core/communication/__init__.py +6 -0
- genxai/core/communication/collaboration.py +44 -0
- genxai/core/communication/message_bus.py +192 -0
- genxai/core/communication/protocols.py +35 -0
- genxai/core/execution/__init__.py +22 -0
- genxai/core/execution/metadata.py +181 -0
- genxai/core/execution/queue.py +201 -0
- genxai/core/graph/__init__.py +30 -0
- genxai/core/graph/checkpoints.py +77 -0
- genxai/core/graph/edges.py +131 -0
- genxai/core/graph/engine.py +813 -0
- genxai/core/graph/executor.py +516 -0
- genxai/core/graph/nodes.py +161 -0
- genxai/core/graph/trigger_runner.py +40 -0
- genxai/core/memory/__init__.py +19 -0
- genxai/core/memory/base.py +72 -0
- genxai/core/memory/embedding.py +327 -0
- genxai/core/memory/episodic.py +448 -0
- genxai/core/memory/long_term.py +467 -0
- genxai/core/memory/manager.py +543 -0
- genxai/core/memory/persistence.py +297 -0
- genxai/core/memory/procedural.py +461 -0
- genxai/core/memory/semantic.py +526 -0
- genxai/core/memory/shared.py +62 -0
- genxai/core/memory/short_term.py +303 -0
- genxai/core/memory/vector_store.py +508 -0
- genxai/core/memory/working.py +211 -0
- genxai/core/state/__init__.py +6 -0
- genxai/core/state/manager.py +293 -0
- genxai/core/state/schema.py +115 -0
- genxai/llm/__init__.py +14 -0
- genxai/llm/base.py +150 -0
- genxai/llm/factory.py +329 -0
- genxai/llm/providers/__init__.py +1 -0
- genxai/llm/providers/anthropic.py +249 -0
- genxai/llm/providers/cohere.py +274 -0
- genxai/llm/providers/google.py +334 -0
- genxai/llm/providers/ollama.py +147 -0
- genxai/llm/providers/openai.py +257 -0
- genxai/llm/routing.py +83 -0
- genxai/observability/__init__.py +6 -0
- genxai/observability/logging.py +327 -0
- genxai/observability/metrics.py +494 -0
- genxai/observability/tracing.py +372 -0
- genxai/performance/__init__.py +39 -0
- genxai/performance/cache.py +256 -0
- genxai/performance/pooling.py +289 -0
- genxai/security/audit.py +304 -0
- genxai/security/auth.py +315 -0
- genxai/security/cost_control.py +528 -0
- genxai/security/default_policies.py +44 -0
- genxai/security/jwt.py +142 -0
- genxai/security/oauth.py +226 -0
- genxai/security/pii.py +366 -0
- genxai/security/policy_engine.py +82 -0
- genxai/security/rate_limit.py +341 -0
- genxai/security/rbac.py +247 -0
- genxai/security/validation.py +218 -0
- genxai/tools/__init__.py +21 -0
- genxai/tools/base.py +383 -0
- genxai/tools/builtin/__init__.py +131 -0
- genxai/tools/builtin/communication/__init__.py +15 -0
- genxai/tools/builtin/communication/email_sender.py +159 -0
- genxai/tools/builtin/communication/notification_manager.py +167 -0
- genxai/tools/builtin/communication/slack_notifier.py +118 -0
- genxai/tools/builtin/communication/sms_sender.py +118 -0
- genxai/tools/builtin/communication/webhook_caller.py +136 -0
- genxai/tools/builtin/computation/__init__.py +15 -0
- genxai/tools/builtin/computation/calculator.py +101 -0
- genxai/tools/builtin/computation/code_executor.py +183 -0
- genxai/tools/builtin/computation/data_validator.py +259 -0
- genxai/tools/builtin/computation/hash_generator.py +129 -0
- genxai/tools/builtin/computation/regex_matcher.py +201 -0
- genxai/tools/builtin/data/__init__.py +15 -0
- genxai/tools/builtin/data/csv_processor.py +213 -0
- genxai/tools/builtin/data/data_transformer.py +299 -0
- genxai/tools/builtin/data/json_processor.py +233 -0
- genxai/tools/builtin/data/text_analyzer.py +288 -0
- genxai/tools/builtin/data/xml_processor.py +175 -0
- genxai/tools/builtin/database/__init__.py +15 -0
- genxai/tools/builtin/database/database_inspector.py +157 -0
- genxai/tools/builtin/database/mongodb_query.py +196 -0
- genxai/tools/builtin/database/redis_cache.py +167 -0
- genxai/tools/builtin/database/sql_query.py +145 -0
- genxai/tools/builtin/database/vector_search.py +163 -0
- genxai/tools/builtin/file/__init__.py +17 -0
- genxai/tools/builtin/file/directory_scanner.py +214 -0
- genxai/tools/builtin/file/file_compressor.py +237 -0
- genxai/tools/builtin/file/file_reader.py +102 -0
- genxai/tools/builtin/file/file_writer.py +122 -0
- genxai/tools/builtin/file/image_processor.py +186 -0
- genxai/tools/builtin/file/pdf_parser.py +144 -0
- genxai/tools/builtin/test/__init__.py +15 -0
- genxai/tools/builtin/test/async_simulator.py +62 -0
- genxai/tools/builtin/test/data_transformer.py +99 -0
- genxai/tools/builtin/test/error_generator.py +82 -0
- genxai/tools/builtin/test/simple_math.py +94 -0
- genxai/tools/builtin/test/string_processor.py +72 -0
- genxai/tools/builtin/web/__init__.py +15 -0
- genxai/tools/builtin/web/api_caller.py +161 -0
- genxai/tools/builtin/web/html_parser.py +330 -0
- genxai/tools/builtin/web/http_client.py +187 -0
- genxai/tools/builtin/web/url_validator.py +162 -0
- genxai/tools/builtin/web/web_scraper.py +170 -0
- genxai/tools/custom/my_test_tool_2.py +9 -0
- genxai/tools/dynamic.py +105 -0
- genxai/tools/mcp_server.py +167 -0
- genxai/tools/persistence/__init__.py +6 -0
- genxai/tools/persistence/models.py +55 -0
- genxai/tools/persistence/service.py +322 -0
- genxai/tools/registry.py +227 -0
- genxai/tools/security/__init__.py +11 -0
- genxai/tools/security/limits.py +214 -0
- genxai/tools/security/policy.py +20 -0
- genxai/tools/security/sandbox.py +248 -0
- genxai/tools/templates.py +435 -0
- genxai/triggers/__init__.py +19 -0
- genxai/triggers/base.py +104 -0
- genxai/triggers/file_watcher.py +75 -0
- genxai/triggers/queue.py +68 -0
- genxai/triggers/registry.py +82 -0
- genxai/triggers/schedule.py +66 -0
- genxai/triggers/webhook.py +68 -0
- genxai/utils/__init__.py +1 -0
- genxai/utils/tokens.py +295 -0
- genxai_framework-0.1.0.dist-info/METADATA +495 -0
- genxai_framework-0.1.0.dist-info/RECORD +156 -0
- genxai_framework-0.1.0.dist-info/WHEEL +5 -0
- genxai_framework-0.1.0.dist-info/entry_points.txt +2 -0
- genxai_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
- genxai_framework-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
"""Metrics collection for GenXAI with Prometheus support."""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from prometheus_client import Counter, Histogram, Gauge, CollectorRegistry, generate_latest
|
|
10
|
+
PROMETHEUS_AVAILABLE = True
|
|
11
|
+
except ImportError:
|
|
12
|
+
PROMETHEUS_AVAILABLE = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MetricsCollector:
|
|
16
|
+
"""Collect and track metrics for GenXAI components."""
|
|
17
|
+
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
"""Initialize metrics collector."""
|
|
20
|
+
self._counters: Dict[str, int] = defaultdict(int)
|
|
21
|
+
self._gauges: Dict[str, float] = {}
|
|
22
|
+
self._histograms: Dict[str, list[float]] = defaultdict(list)
|
|
23
|
+
self._timers: Dict[str, float] = {}
|
|
24
|
+
|
|
25
|
+
@contextmanager
|
|
26
|
+
def time(self, metric: str, tags: Optional[Dict[str, str]] = None):
|
|
27
|
+
"""Context manager to time a code block.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
metric: Metric name
|
|
31
|
+
tags: Optional tags
|
|
32
|
+
"""
|
|
33
|
+
start = time.time()
|
|
34
|
+
try:
|
|
35
|
+
yield
|
|
36
|
+
finally:
|
|
37
|
+
duration = time.time() - start
|
|
38
|
+
self.timing(metric, duration, tags)
|
|
39
|
+
|
|
40
|
+
def increment(self, metric: str, value: int = 1, tags: Optional[Dict[str, str]] = None) -> None:
|
|
41
|
+
"""Increment a counter metric.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
metric: Metric name
|
|
45
|
+
value: Value to increment by
|
|
46
|
+
tags: Optional tags for the metric
|
|
47
|
+
"""
|
|
48
|
+
key = self._make_key(metric, tags)
|
|
49
|
+
self._counters[key] += value
|
|
50
|
+
|
|
51
|
+
def gauge(self, metric: str, value: float, tags: Optional[Dict[str, str]] = None) -> None:
|
|
52
|
+
"""Set a gauge metric.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
metric: Metric name
|
|
56
|
+
value: Gauge value
|
|
57
|
+
tags: Optional tags for the metric
|
|
58
|
+
"""
|
|
59
|
+
key = self._make_key(metric, tags)
|
|
60
|
+
self._gauges[key] = value
|
|
61
|
+
|
|
62
|
+
def histogram(self, metric: str, value: float, tags: Optional[Dict[str, str]] = None) -> None:
|
|
63
|
+
"""Record a histogram value.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
metric: Metric name
|
|
67
|
+
value: Value to record
|
|
68
|
+
tags: Optional tags for the metric
|
|
69
|
+
"""
|
|
70
|
+
key = self._make_key(metric, tags)
|
|
71
|
+
self._histograms[key].append(value)
|
|
72
|
+
|
|
73
|
+
def timing(self, metric: str, duration: float, tags: Optional[Dict[str, str]] = None) -> None:
|
|
74
|
+
"""Record a timing metric.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
metric: Metric name
|
|
78
|
+
duration: Duration in seconds
|
|
79
|
+
tags: Optional tags for the metric
|
|
80
|
+
"""
|
|
81
|
+
self.histogram(metric, duration, tags)
|
|
82
|
+
|
|
83
|
+
def start_timer(self, metric: str) -> None:
|
|
84
|
+
"""Start a timer for a metric.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
metric: Metric name
|
|
88
|
+
"""
|
|
89
|
+
self._timers[metric] = time.time()
|
|
90
|
+
|
|
91
|
+
def stop_timer(self, metric: str, tags: Optional[Dict[str, str]] = None) -> float:
|
|
92
|
+
"""Stop a timer and record the duration.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
metric: Metric name
|
|
96
|
+
tags: Optional tags for the metric
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Duration in seconds
|
|
100
|
+
"""
|
|
101
|
+
if metric not in self._timers:
|
|
102
|
+
return 0.0
|
|
103
|
+
|
|
104
|
+
duration = time.time() - self._timers[metric]
|
|
105
|
+
self.timing(metric, duration, tags)
|
|
106
|
+
del self._timers[metric]
|
|
107
|
+
return duration
|
|
108
|
+
|
|
109
|
+
def get_counter(self, metric: str, tags: Optional[Dict[str, str]] = None) -> int:
|
|
110
|
+
"""Get counter value.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
metric: Metric name
|
|
114
|
+
tags: Optional tags
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Counter value
|
|
118
|
+
"""
|
|
119
|
+
key = self._make_key(metric, tags)
|
|
120
|
+
return self._counters.get(key, 0)
|
|
121
|
+
|
|
122
|
+
def get_gauge(self, metric: str, tags: Optional[Dict[str, str]] = None) -> Optional[float]:
|
|
123
|
+
"""Get gauge value.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
metric: Metric name
|
|
127
|
+
tags: Optional tags
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Gauge value or None
|
|
131
|
+
"""
|
|
132
|
+
key = self._make_key(metric, tags)
|
|
133
|
+
return self._gauges.get(key)
|
|
134
|
+
|
|
135
|
+
def get_histogram_stats(
|
|
136
|
+
self, metric: str, tags: Optional[Dict[str, str]] = None
|
|
137
|
+
) -> Dict[str, float]:
|
|
138
|
+
"""Get histogram statistics.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
metric: Metric name
|
|
142
|
+
tags: Optional tags
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Statistics dictionary (count, sum, avg, min, max)
|
|
146
|
+
"""
|
|
147
|
+
key = self._make_key(metric, tags)
|
|
148
|
+
values = self._histograms.get(key, [])
|
|
149
|
+
|
|
150
|
+
if not values:
|
|
151
|
+
return {"count": 0, "sum": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0}
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
"count": len(values),
|
|
155
|
+
"sum": sum(values),
|
|
156
|
+
"avg": sum(values) / len(values),
|
|
157
|
+
"min": min(values),
|
|
158
|
+
"max": max(values),
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
def get_all_metrics(self) -> Dict[str, Any]:
|
|
162
|
+
"""Get all metrics.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Dictionary of all metrics
|
|
166
|
+
"""
|
|
167
|
+
return {
|
|
168
|
+
"counters": dict(self._counters),
|
|
169
|
+
"gauges": dict(self._gauges),
|
|
170
|
+
"histograms": {
|
|
171
|
+
key: self.get_histogram_stats(key.split(":")[0])
|
|
172
|
+
for key in self._histograms.keys()
|
|
173
|
+
},
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
def reset(self) -> None:
|
|
177
|
+
"""Reset all metrics."""
|
|
178
|
+
self._counters.clear()
|
|
179
|
+
self._gauges.clear()
|
|
180
|
+
self._histograms.clear()
|
|
181
|
+
self._timers.clear()
|
|
182
|
+
|
|
183
|
+
def _make_key(self, metric: str, tags: Optional[Dict[str, str]] = None) -> str:
|
|
184
|
+
"""Create metric key with tags.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
metric: Metric name
|
|
188
|
+
tags: Optional tags
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Metric key
|
|
192
|
+
"""
|
|
193
|
+
if not tags:
|
|
194
|
+
return metric
|
|
195
|
+
|
|
196
|
+
tag_str = ",".join(f"{k}={v}" for k, v in sorted(tags.items()))
|
|
197
|
+
return f"{metric}:{tag_str}"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# Prometheus metrics (if available)
|
|
201
|
+
if PROMETHEUS_AVAILABLE:
|
|
202
|
+
# Create registry
|
|
203
|
+
registry = CollectorRegistry()
|
|
204
|
+
|
|
205
|
+
# Agent execution metrics
|
|
206
|
+
agent_executions_total = Counter(
|
|
207
|
+
'genxai_agent_executions_total',
|
|
208
|
+
'Total number of agent executions',
|
|
209
|
+
['agent_id', 'status'],
|
|
210
|
+
registry=registry
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
agent_errors_total = Counter(
|
|
214
|
+
'genxai_agent_errors_total',
|
|
215
|
+
'Total number of agent errors',
|
|
216
|
+
['agent_id', 'error_type'],
|
|
217
|
+
registry=registry
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
agent_execution_duration_seconds = Histogram(
|
|
221
|
+
'genxai_agent_execution_duration_seconds',
|
|
222
|
+
'Agent execution duration in seconds',
|
|
223
|
+
['agent_id'],
|
|
224
|
+
buckets=[0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0],
|
|
225
|
+
registry=registry
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
agent_active_executions = Gauge(
|
|
229
|
+
'genxai_agent_active_executions',
|
|
230
|
+
'Number of currently active agent executions',
|
|
231
|
+
['agent_id'],
|
|
232
|
+
registry=registry
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Tool usage metrics
|
|
236
|
+
tool_calls_total = Counter(
|
|
237
|
+
'genxai_tool_calls_total',
|
|
238
|
+
'Total number of tool calls',
|
|
239
|
+
['tool_name', 'status'],
|
|
240
|
+
registry=registry
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
tool_execution_duration_seconds = Histogram(
|
|
244
|
+
'genxai_tool_execution_duration_seconds',
|
|
245
|
+
'Tool execution duration in seconds',
|
|
246
|
+
['tool_name'],
|
|
247
|
+
buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0],
|
|
248
|
+
registry=registry
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
tool_errors_total = Counter(
|
|
252
|
+
'genxai_tool_errors_total',
|
|
253
|
+
'Total number of tool errors',
|
|
254
|
+
['tool_name', 'error_type'],
|
|
255
|
+
registry=registry
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# LLM metrics
|
|
259
|
+
llm_requests_total = Counter(
|
|
260
|
+
'genxai_llm_requests_total',
|
|
261
|
+
'Total number of LLM requests',
|
|
262
|
+
['provider', 'model', 'status'],
|
|
263
|
+
registry=registry
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
llm_tokens_total = Counter(
|
|
267
|
+
'genxai_llm_tokens_total',
|
|
268
|
+
'Total number of tokens used',
|
|
269
|
+
['provider', 'model', 'token_type'],
|
|
270
|
+
registry=registry
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
llm_cost_total = Counter(
|
|
274
|
+
'genxai_llm_cost_total',
|
|
275
|
+
'Total estimated cost in USD',
|
|
276
|
+
['provider', 'model'],
|
|
277
|
+
registry=registry
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
llm_request_duration_seconds = Histogram(
|
|
281
|
+
'genxai_llm_request_duration_seconds',
|
|
282
|
+
'LLM request duration in seconds',
|
|
283
|
+
['provider', 'model'],
|
|
284
|
+
buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0],
|
|
285
|
+
registry=registry
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Memory operation metrics
|
|
289
|
+
memory_operations_total = Counter(
|
|
290
|
+
'genxai_memory_operations_total',
|
|
291
|
+
'Total number of memory operations',
|
|
292
|
+
['operation_type', 'memory_type', 'status'],
|
|
293
|
+
registry=registry
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
memory_operation_duration_seconds = Histogram(
|
|
297
|
+
'genxai_memory_operation_duration_seconds',
|
|
298
|
+
'Memory operation duration in seconds',
|
|
299
|
+
['operation_type', 'memory_type'],
|
|
300
|
+
buckets=[0.001, 0.01, 0.05, 0.1, 0.5, 1.0],
|
|
301
|
+
registry=registry
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
memory_size_bytes = Gauge(
|
|
305
|
+
'genxai_memory_size_bytes',
|
|
306
|
+
'Current memory size in bytes',
|
|
307
|
+
['agent_id', 'memory_type'],
|
|
308
|
+
registry=registry
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Workflow execution metrics
|
|
312
|
+
workflow_executions_total = Counter(
|
|
313
|
+
'genxai_workflow_executions_total',
|
|
314
|
+
'Total number of workflow executions',
|
|
315
|
+
['workflow_id', 'status'],
|
|
316
|
+
registry=registry
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
workflow_execution_duration_seconds = Histogram(
|
|
320
|
+
'genxai_workflow_execution_duration_seconds',
|
|
321
|
+
'Workflow execution duration in seconds',
|
|
322
|
+
['workflow_id'],
|
|
323
|
+
buckets=[1.0, 5.0, 10.0, 30.0, 60.0, 300.0, 600.0],
|
|
324
|
+
registry=registry
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
workflow_node_executions_total = Counter(
|
|
328
|
+
'genxai_workflow_node_executions_total',
|
|
329
|
+
'Total number of workflow node executions',
|
|
330
|
+
['workflow_id', 'node_id', 'status'],
|
|
331
|
+
registry=registry
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
def get_prometheus_metrics() -> bytes:
|
|
335
|
+
"""Get Prometheus metrics in text format.
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
Metrics in Prometheus text format
|
|
339
|
+
"""
|
|
340
|
+
return generate_latest(registry)
|
|
341
|
+
|
|
342
|
+
else:
|
|
343
|
+
# Stub metrics if Prometheus not available
|
|
344
|
+
agent_executions_total = None
|
|
345
|
+
agent_errors_total = None
|
|
346
|
+
agent_execution_duration_seconds = None
|
|
347
|
+
agent_active_executions = None
|
|
348
|
+
tool_calls_total = None
|
|
349
|
+
tool_execution_duration_seconds = None
|
|
350
|
+
tool_errors_total = None
|
|
351
|
+
llm_requests_total = None
|
|
352
|
+
llm_tokens_total = None
|
|
353
|
+
llm_cost_total = None
|
|
354
|
+
llm_request_duration_seconds = None
|
|
355
|
+
memory_operations_total = None
|
|
356
|
+
memory_operation_duration_seconds = None
|
|
357
|
+
memory_size_bytes = None
|
|
358
|
+
workflow_executions_total = None
|
|
359
|
+
workflow_execution_duration_seconds = None
|
|
360
|
+
workflow_node_executions_total = None
|
|
361
|
+
|
|
362
|
+
def get_prometheus_metrics() -> bytes:
|
|
363
|
+
"""Stub function when Prometheus not available."""
|
|
364
|
+
return b"# Prometheus client not installed\n"
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
# Global metrics collector
|
|
368
|
+
_global_metrics = MetricsCollector()
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def get_metrics_collector() -> MetricsCollector:
|
|
372
|
+
"""Get global metrics collector.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Global metrics collector instance
|
|
376
|
+
"""
|
|
377
|
+
return _global_metrics
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _safe_inc(counter: Any, labels: Optional[Dict[str, str]] = None, value: int = 1) -> None:
|
|
381
|
+
if counter is None:
|
|
382
|
+
return
|
|
383
|
+
if labels:
|
|
384
|
+
counter.labels(**labels).inc(value)
|
|
385
|
+
else:
|
|
386
|
+
counter.inc(value)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _safe_observe(histogram: Any, labels: Optional[Dict[str, str]] = None, value: float = 0.0) -> None:
|
|
390
|
+
if histogram is None:
|
|
391
|
+
return
|
|
392
|
+
if labels:
|
|
393
|
+
histogram.labels(**labels).observe(value)
|
|
394
|
+
else:
|
|
395
|
+
histogram.observe(value)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _safe_set(gauge: Any, labels: Optional[Dict[str, str]] = None, value: float = 0.0) -> None:
|
|
399
|
+
if gauge is None:
|
|
400
|
+
return
|
|
401
|
+
if labels:
|
|
402
|
+
gauge.labels(**labels).set(value)
|
|
403
|
+
else:
|
|
404
|
+
gauge.set(value)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def record_agent_execution(
|
|
408
|
+
agent_id: str,
|
|
409
|
+
duration: float,
|
|
410
|
+
status: str = "success",
|
|
411
|
+
error_type: Optional[str] = None,
|
|
412
|
+
) -> None:
|
|
413
|
+
"""Record agent execution metrics.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
agent_id: Agent identifier
|
|
417
|
+
duration: Execution duration in seconds
|
|
418
|
+
status: Execution status (success/error)
|
|
419
|
+
error_type: Optional error type
|
|
420
|
+
"""
|
|
421
|
+
_safe_inc(agent_executions_total, {"agent_id": agent_id, "status": status})
|
|
422
|
+
_safe_observe(agent_execution_duration_seconds, {"agent_id": agent_id}, duration)
|
|
423
|
+
if status != "success" and error_type:
|
|
424
|
+
_safe_inc(agent_errors_total, {"agent_id": agent_id, "error_type": error_type})
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def record_tool_execution(
|
|
428
|
+
tool_name: str,
|
|
429
|
+
duration: float,
|
|
430
|
+
status: str = "success",
|
|
431
|
+
error_type: Optional[str] = None,
|
|
432
|
+
) -> None:
|
|
433
|
+
"""Record tool execution metrics."""
|
|
434
|
+
_safe_inc(tool_calls_total, {"tool_name": tool_name, "status": status})
|
|
435
|
+
_safe_observe(tool_execution_duration_seconds, {"tool_name": tool_name}, duration)
|
|
436
|
+
if status != "success" and error_type:
|
|
437
|
+
_safe_inc(tool_errors_total, {"tool_name": tool_name, "error_type": error_type})
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def record_llm_request(
|
|
441
|
+
provider: str,
|
|
442
|
+
model: str,
|
|
443
|
+
duration: float,
|
|
444
|
+
status: str = "success",
|
|
445
|
+
input_tokens: int = 0,
|
|
446
|
+
output_tokens: int = 0,
|
|
447
|
+
total_cost: float = 0.0,
|
|
448
|
+
) -> None:
|
|
449
|
+
"""Record LLM request metrics."""
|
|
450
|
+
_safe_inc(llm_requests_total, {"provider": provider, "model": model, "status": status})
|
|
451
|
+
_safe_inc(llm_tokens_total, {"provider": provider, "model": model, "token_type": "input"}, input_tokens)
|
|
452
|
+
_safe_inc(llm_tokens_total, {"provider": provider, "model": model, "token_type": "output"}, output_tokens)
|
|
453
|
+
_safe_inc(llm_cost_total, {"provider": provider, "model": model}, total_cost)
|
|
454
|
+
_safe_observe(llm_request_duration_seconds, {"provider": provider, "model": model}, duration)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def record_memory_operation(
|
|
458
|
+
operation_type: str,
|
|
459
|
+
memory_type: str,
|
|
460
|
+
duration: float,
|
|
461
|
+
status: str = "success",
|
|
462
|
+
) -> None:
|
|
463
|
+
"""Record memory operation metrics."""
|
|
464
|
+
_safe_inc(
|
|
465
|
+
memory_operations_total,
|
|
466
|
+
{"operation_type": operation_type, "memory_type": memory_type, "status": status},
|
|
467
|
+
)
|
|
468
|
+
_safe_observe(
|
|
469
|
+
memory_operation_duration_seconds,
|
|
470
|
+
{"operation_type": operation_type, "memory_type": memory_type},
|
|
471
|
+
duration,
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def record_workflow_execution(
|
|
476
|
+
workflow_id: str,
|
|
477
|
+
duration: float,
|
|
478
|
+
status: str = "success",
|
|
479
|
+
) -> None:
|
|
480
|
+
"""Record workflow execution metrics."""
|
|
481
|
+
_safe_inc(workflow_executions_total, {"workflow_id": workflow_id, "status": status})
|
|
482
|
+
_safe_observe(workflow_execution_duration_seconds, {"workflow_id": workflow_id}, duration)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def record_workflow_node_execution(
|
|
486
|
+
workflow_id: str,
|
|
487
|
+
node_id: str,
|
|
488
|
+
status: str = "success",
|
|
489
|
+
) -> None:
|
|
490
|
+
"""Record workflow node execution metrics."""
|
|
491
|
+
_safe_inc(
|
|
492
|
+
workflow_node_executions_total,
|
|
493
|
+
{"workflow_id": workflow_id, "node_id": node_id, "status": status},
|
|
494
|
+
)
|