kailash 0.6.6__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +35 -5
- kailash/adapters/__init__.py +5 -0
- kailash/adapters/mcp_platform_adapter.py +273 -0
- kailash/channels/__init__.py +21 -0
- kailash/channels/api_channel.py +409 -0
- kailash/channels/base.py +271 -0
- kailash/channels/cli_channel.py +661 -0
- kailash/channels/event_router.py +496 -0
- kailash/channels/mcp_channel.py +648 -0
- kailash/channels/session.py +423 -0
- kailash/mcp_server/discovery.py +1 -1
- kailash/middleware/mcp/enhanced_server.py +22 -16
- kailash/nexus/__init__.py +21 -0
- kailash/nexus/factory.py +413 -0
- kailash/nexus/gateway.py +545 -0
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/ai/iterative_llm_agent.py +988 -17
- kailash/nodes/ai/llm_agent.py +29 -9
- kailash/nodes/api/__init__.py +2 -2
- kailash/nodes/api/monitoring.py +1 -1
- kailash/nodes/base_async.py +54 -14
- kailash/nodes/code/async_python.py +1 -1
- kailash/nodes/data/bulk_operations.py +939 -0
- kailash/nodes/data/query_builder.py +373 -0
- kailash/nodes/data/query_cache.py +512 -0
- kailash/nodes/monitoring/__init__.py +10 -0
- kailash/nodes/monitoring/deadlock_detector.py +964 -0
- kailash/nodes/monitoring/performance_anomaly.py +1078 -0
- kailash/nodes/monitoring/race_condition_detector.py +1151 -0
- kailash/nodes/monitoring/transaction_metrics.py +790 -0
- kailash/nodes/monitoring/transaction_monitor.py +931 -0
- kailash/nodes/system/__init__.py +17 -0
- kailash/nodes/system/command_parser.py +820 -0
- kailash/nodes/transaction/__init__.py +48 -0
- kailash/nodes/transaction/distributed_transaction_manager.py +983 -0
- kailash/nodes/transaction/saga_coordinator.py +652 -0
- kailash/nodes/transaction/saga_state_storage.py +411 -0
- kailash/nodes/transaction/saga_step.py +467 -0
- kailash/nodes/transaction/transaction_context.py +756 -0
- kailash/nodes/transaction/two_phase_commit.py +978 -0
- kailash/nodes/transform/processors.py +17 -1
- kailash/nodes/validation/__init__.py +21 -0
- kailash/nodes/validation/test_executor.py +532 -0
- kailash/nodes/validation/validation_nodes.py +447 -0
- kailash/resources/factory.py +1 -1
- kailash/runtime/async_local.py +84 -21
- kailash/runtime/local.py +21 -2
- kailash/runtime/parameter_injector.py +187 -31
- kailash/security.py +16 -1
- kailash/servers/__init__.py +32 -0
- kailash/servers/durable_workflow_server.py +430 -0
- kailash/servers/enterprise_workflow_server.py +466 -0
- kailash/servers/gateway.py +183 -0
- kailash/servers/workflow_server.py +290 -0
- kailash/utils/data_validation.py +192 -0
- kailash/workflow/builder.py +291 -12
- kailash/workflow/validation.py +144 -8
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/METADATA +1 -1
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/RECORD +63 -25
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/WHEEL +0 -0
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,790 @@
|
|
1
|
+
"""Transaction metrics collection and analysis node.
|
2
|
+
|
3
|
+
This module provides comprehensive transaction performance monitoring with
|
4
|
+
support for timing, aggregation, and export to various monitoring backends.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import json
|
8
|
+
import logging
|
9
|
+
import time
|
10
|
+
from collections import defaultdict
|
11
|
+
from dataclasses import dataclass, field
|
12
|
+
from datetime import UTC, datetime
|
13
|
+
from enum import Enum
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
15
|
+
|
16
|
+
from kailash.nodes.base import NodeParameter, register_node
|
17
|
+
from kailash.nodes.base_async import AsyncNode
|
18
|
+
from kailash.sdk_exceptions import NodeExecutionError
|
19
|
+
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
class MetricExportFormat(Enum):
|
24
|
+
"""Supported metric export formats."""
|
25
|
+
|
26
|
+
JSON = "json"
|
27
|
+
PROMETHEUS = "prometheus"
|
28
|
+
CLOUDWATCH = "cloudwatch"
|
29
|
+
DATADOG = "datadog"
|
30
|
+
OPENTELEMETRY = "opentelemetry"
|
31
|
+
|
32
|
+
|
33
|
+
class AggregationType(Enum):
|
34
|
+
"""Types of metric aggregation."""
|
35
|
+
|
36
|
+
COUNT = "count"
|
37
|
+
SUM = "sum"
|
38
|
+
AVG = "avg"
|
39
|
+
MIN = "min"
|
40
|
+
MAX = "max"
|
41
|
+
P50 = "p50"
|
42
|
+
P75 = "p75"
|
43
|
+
P90 = "p90"
|
44
|
+
P95 = "p95"
|
45
|
+
P99 = "p99"
|
46
|
+
P999 = "p999"
|
47
|
+
|
48
|
+
|
49
|
+
@dataclass
|
50
|
+
class TransactionMetric:
|
51
|
+
"""Represents a single transaction metric."""
|
52
|
+
|
53
|
+
transaction_id: str
|
54
|
+
name: str
|
55
|
+
start_time: float
|
56
|
+
end_time: Optional[float] = None
|
57
|
+
duration: Optional[float] = None
|
58
|
+
status: str = "pending"
|
59
|
+
error: Optional[str] = None
|
60
|
+
tags: Dict[str, str] = field(default_factory=dict)
|
61
|
+
custom_metrics: Dict[str, float] = field(default_factory=dict)
|
62
|
+
|
63
|
+
|
64
|
+
@dataclass
|
65
|
+
class AggregatedMetrics:
|
66
|
+
"""Aggregated transaction metrics."""
|
67
|
+
|
68
|
+
name: str
|
69
|
+
count: int
|
70
|
+
sum_duration: float
|
71
|
+
min_duration: float
|
72
|
+
max_duration: float
|
73
|
+
avg_duration: float
|
74
|
+
percentiles: Dict[str, float]
|
75
|
+
success_count: int
|
76
|
+
error_count: int
|
77
|
+
error_rate: float
|
78
|
+
tags: Dict[str, str] = field(default_factory=dict)
|
79
|
+
|
80
|
+
|
81
|
+
@register_node()
|
82
|
+
class TransactionMetricsNode(AsyncNode):
|
83
|
+
"""Node for collecting and analyzing transaction performance metrics.
|
84
|
+
|
85
|
+
This node provides comprehensive transaction monitoring including:
|
86
|
+
- Transaction timing and duration tracking
|
87
|
+
- Success/failure rate monitoring
|
88
|
+
- Latency percentile calculations (p50, p95, p99)
|
89
|
+
- Custom metric collection
|
90
|
+
- Multi-format export (Prometheus, CloudWatch, DataDog)
|
91
|
+
- Real-time and batch aggregation
|
92
|
+
|
93
|
+
Design Purpose:
|
94
|
+
- Enable production-grade performance monitoring
|
95
|
+
- Support SLA tracking and alerting
|
96
|
+
- Facilitate performance troubleshooting
|
97
|
+
- Integrate with enterprise monitoring systems
|
98
|
+
|
99
|
+
Examples:
|
100
|
+
>>> # Track individual transaction
|
101
|
+
>>> metrics_node = TransactionMetricsNode()
|
102
|
+
>>> result = await metrics_node.execute(
|
103
|
+
... operation="start_transaction",
|
104
|
+
... transaction_id="txn_12345",
|
105
|
+
... name="order_processing",
|
106
|
+
... tags={"region": "us-west", "customer_tier": "premium"}
|
107
|
+
... )
|
108
|
+
|
109
|
+
>>> # Complete transaction with metrics
|
110
|
+
>>> result = await metrics_node.execute(
|
111
|
+
... operation="end_transaction",
|
112
|
+
... transaction_id="txn_12345",
|
113
|
+
... status="success",
|
114
|
+
... custom_metrics={"items_processed": 25, "db_queries": 3}
|
115
|
+
... )
|
116
|
+
|
117
|
+
>>> # Get aggregated metrics
|
118
|
+
>>> result = await metrics_node.execute(
|
119
|
+
... operation="get_aggregated",
|
120
|
+
... metric_names=["order_processing"],
|
121
|
+
... aggregation_window=300, # 5 minutes
|
122
|
+
... export_format="prometheus"
|
123
|
+
... )
|
124
|
+
"""
|
125
|
+
|
126
|
+
def __init__(self, **kwargs):
|
127
|
+
"""Initialize the transaction metrics node."""
|
128
|
+
super().__init__(**kwargs)
|
129
|
+
self._active_transactions: Dict[str, TransactionMetric] = {}
|
130
|
+
self._completed_transactions: List[TransactionMetric] = []
|
131
|
+
self._metric_buffer = defaultdict(list)
|
132
|
+
self._last_aggregation_time = time.time()
|
133
|
+
self.logger.info(f"Initialized TransactionMetricsNode: {self.id}")
|
134
|
+
|
135
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
136
|
+
"""Define the parameters this node accepts."""
|
137
|
+
return {
|
138
|
+
"operation": NodeParameter(
|
139
|
+
name="operation",
|
140
|
+
type=str,
|
141
|
+
required=True,
|
142
|
+
description="Operation to perform (start_transaction, end_transaction, get_metrics, get_aggregated)",
|
143
|
+
),
|
144
|
+
"transaction_id": NodeParameter(
|
145
|
+
name="transaction_id",
|
146
|
+
type=str,
|
147
|
+
required=False,
|
148
|
+
description="Unique transaction identifier",
|
149
|
+
),
|
150
|
+
"name": NodeParameter(
|
151
|
+
name="name",
|
152
|
+
type=str,
|
153
|
+
required=False,
|
154
|
+
description="Transaction/metric name",
|
155
|
+
),
|
156
|
+
"status": NodeParameter(
|
157
|
+
name="status",
|
158
|
+
type=str,
|
159
|
+
required=False,
|
160
|
+
default="success",
|
161
|
+
description="Transaction status (success, error, timeout)",
|
162
|
+
),
|
163
|
+
"error": NodeParameter(
|
164
|
+
name="error",
|
165
|
+
type=str,
|
166
|
+
required=False,
|
167
|
+
description="Error message if transaction failed",
|
168
|
+
),
|
169
|
+
"tags": NodeParameter(
|
170
|
+
name="tags",
|
171
|
+
type=dict,
|
172
|
+
required=False,
|
173
|
+
default={},
|
174
|
+
description="Tags for metric grouping and filtering",
|
175
|
+
),
|
176
|
+
"custom_metrics": NodeParameter(
|
177
|
+
name="custom_metrics",
|
178
|
+
type=dict,
|
179
|
+
required=False,
|
180
|
+
default={},
|
181
|
+
description="Custom metrics to attach to transaction",
|
182
|
+
),
|
183
|
+
"metric_names": NodeParameter(
|
184
|
+
name="metric_names",
|
185
|
+
type=list,
|
186
|
+
required=False,
|
187
|
+
default=[],
|
188
|
+
description="List of metric names to retrieve",
|
189
|
+
),
|
190
|
+
"aggregation_window": NodeParameter(
|
191
|
+
name="aggregation_window",
|
192
|
+
type=float,
|
193
|
+
required=False,
|
194
|
+
default=60.0,
|
195
|
+
description="Time window for aggregation in seconds",
|
196
|
+
),
|
197
|
+
"aggregation_types": NodeParameter(
|
198
|
+
name="aggregation_types",
|
199
|
+
type=list,
|
200
|
+
required=False,
|
201
|
+
default=["count", "avg", "p50", "p95", "p99"],
|
202
|
+
description="Types of aggregation to perform",
|
203
|
+
),
|
204
|
+
"export_format": NodeParameter(
|
205
|
+
name="export_format",
|
206
|
+
type=str,
|
207
|
+
required=False,
|
208
|
+
default="json",
|
209
|
+
description="Export format (json, prometheus, cloudwatch, datadog, opentelemetry)",
|
210
|
+
),
|
211
|
+
"include_raw": NodeParameter(
|
212
|
+
name="include_raw",
|
213
|
+
type=bool,
|
214
|
+
required=False,
|
215
|
+
default=False,
|
216
|
+
description="Include raw transaction data in response",
|
217
|
+
),
|
218
|
+
}
|
219
|
+
|
220
|
+
def get_output_schema(self) -> Dict[str, NodeParameter]:
|
221
|
+
"""Define the output schema for this node."""
|
222
|
+
return {
|
223
|
+
"metrics": NodeParameter(
|
224
|
+
name="metrics",
|
225
|
+
type=Any,
|
226
|
+
description="Transaction metrics in requested format",
|
227
|
+
),
|
228
|
+
"transaction_count": NodeParameter(
|
229
|
+
name="transaction_count",
|
230
|
+
type=int,
|
231
|
+
description="Number of transactions processed",
|
232
|
+
),
|
233
|
+
"total_transactions": NodeParameter(
|
234
|
+
name="total_transactions",
|
235
|
+
type=int,
|
236
|
+
description="Total number of transactions (alias for transaction_count)",
|
237
|
+
),
|
238
|
+
"success_rate": NodeParameter(
|
239
|
+
name="success_rate",
|
240
|
+
type=float,
|
241
|
+
description="Success rate of transactions (0.0 to 1.0)",
|
242
|
+
),
|
243
|
+
"aggregations": NodeParameter(
|
244
|
+
name="aggregations", type=dict, description="Aggregated metric data"
|
245
|
+
),
|
246
|
+
"export_format": NodeParameter(
|
247
|
+
name="export_format", type=str, description="Format of exported metrics"
|
248
|
+
),
|
249
|
+
"timestamp": NodeParameter(
|
250
|
+
name="timestamp", type=str, description="ISO timestamp of operation"
|
251
|
+
),
|
252
|
+
"status": NodeParameter(
|
253
|
+
name="status", type=str, description="Operation status"
|
254
|
+
),
|
255
|
+
}
|
256
|
+
|
257
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
258
|
+
"""Execute transaction metrics operation."""
|
259
|
+
operation = kwargs.get("operation")
|
260
|
+
|
261
|
+
try:
|
262
|
+
if operation == "start_transaction":
|
263
|
+
return await self._start_transaction(**kwargs)
|
264
|
+
elif operation == "end_transaction":
|
265
|
+
return await self._end_transaction(**kwargs)
|
266
|
+
elif operation == "complete_transaction":
|
267
|
+
return await self._end_transaction(**kwargs) # Same as end_transaction
|
268
|
+
elif operation == "get_metrics":
|
269
|
+
return await self._get_metrics(**kwargs)
|
270
|
+
elif operation == "get_aggregated":
|
271
|
+
return await self._get_aggregated_metrics(**kwargs)
|
272
|
+
else:
|
273
|
+
raise ValueError(f"Unknown operation: {operation}")
|
274
|
+
|
275
|
+
except Exception as e:
|
276
|
+
self.logger.error(f"Transaction metrics operation failed: {str(e)}")
|
277
|
+
raise NodeExecutionError(f"Failed to process transaction metrics: {str(e)}")
|
278
|
+
|
279
|
+
async def _start_transaction(self, **kwargs) -> Dict[str, Any]:
|
280
|
+
"""Start tracking a new transaction."""
|
281
|
+
transaction_id = kwargs.get("transaction_id")
|
282
|
+
if not transaction_id:
|
283
|
+
raise ValueError("transaction_id is required for start_transaction")
|
284
|
+
|
285
|
+
name = kwargs.get("name", "unnamed_transaction")
|
286
|
+
tags = kwargs.get("tags", {})
|
287
|
+
|
288
|
+
# Create new transaction metric
|
289
|
+
metric = TransactionMetric(
|
290
|
+
transaction_id=transaction_id,
|
291
|
+
name=name,
|
292
|
+
start_time=time.time(),
|
293
|
+
tags=tags,
|
294
|
+
status="in_progress",
|
295
|
+
)
|
296
|
+
|
297
|
+
self._active_transactions[transaction_id] = metric
|
298
|
+
|
299
|
+
self.logger.debug(f"Started transaction {transaction_id} ({name})")
|
300
|
+
|
301
|
+
return {
|
302
|
+
"metrics": {"transaction_id": transaction_id, "status": "started"},
|
303
|
+
"transaction_count": 1,
|
304
|
+
"total_transactions": 1, # Alias for backward compatibility
|
305
|
+
"success_rate": 1.0, # Starting transaction is optimistically successful
|
306
|
+
"aggregations": {},
|
307
|
+
"export_format": "json",
|
308
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
309
|
+
"status": "success",
|
310
|
+
}
|
311
|
+
|
312
|
+
async def _end_transaction(self, **kwargs) -> Dict[str, Any]:
|
313
|
+
"""Complete a transaction and record metrics."""
|
314
|
+
transaction_id = kwargs.get("transaction_id")
|
315
|
+
if not transaction_id:
|
316
|
+
raise ValueError("transaction_id is required for end_transaction")
|
317
|
+
|
318
|
+
if transaction_id not in self._active_transactions:
|
319
|
+
raise ValueError(f"Transaction {transaction_id} not found")
|
320
|
+
|
321
|
+
metric = self._active_transactions.pop(transaction_id)
|
322
|
+
|
323
|
+
# Update transaction metrics
|
324
|
+
metric.end_time = time.time()
|
325
|
+
metric.duration = metric.end_time - metric.start_time
|
326
|
+
metric.status = kwargs.get("status", "success")
|
327
|
+
metric.error = kwargs.get("error")
|
328
|
+
metric.custom_metrics = kwargs.get("custom_metrics", {})
|
329
|
+
|
330
|
+
# Store completed transaction
|
331
|
+
self._completed_transactions.append(metric)
|
332
|
+
self._metric_buffer[metric.name].append(metric)
|
333
|
+
|
334
|
+
# Clean old metrics from buffer (keep last hour)
|
335
|
+
cutoff_time = time.time() - 3600
|
336
|
+
self._completed_transactions = [
|
337
|
+
m for m in self._completed_transactions if m.start_time > cutoff_time
|
338
|
+
]
|
339
|
+
|
340
|
+
self.logger.debug(
|
341
|
+
f"Completed transaction {transaction_id} ({metric.name}) "
|
342
|
+
f"in {metric.duration:.3f}s with status {metric.status}"
|
343
|
+
)
|
344
|
+
|
345
|
+
return {
|
346
|
+
"metrics": {
|
347
|
+
"transaction_id": transaction_id,
|
348
|
+
"duration": metric.duration,
|
349
|
+
"status": metric.status,
|
350
|
+
},
|
351
|
+
"transaction_count": 1,
|
352
|
+
"total_transactions": 1, # Alias for backward compatibility
|
353
|
+
"success_rate": (
|
354
|
+
1.0 if metric.status == "success" else 0.0
|
355
|
+
), # Based on this transaction
|
356
|
+
"aggregations": {},
|
357
|
+
"export_format": "json",
|
358
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
359
|
+
"status": "success",
|
360
|
+
}
|
361
|
+
|
362
|
+
async def _get_metrics(self, **kwargs) -> Dict[str, Any]:
|
363
|
+
"""Get raw transaction metrics."""
|
364
|
+
metric_names = kwargs.get("metric_names", [])
|
365
|
+
include_raw = kwargs.get("include_raw", False)
|
366
|
+
export_format = MetricExportFormat(kwargs.get("export_format", "json"))
|
367
|
+
|
368
|
+
# Filter metrics by name if specified
|
369
|
+
if metric_names:
|
370
|
+
filtered_metrics = [
|
371
|
+
m for m in self._completed_transactions if m.name in metric_names
|
372
|
+
]
|
373
|
+
else:
|
374
|
+
filtered_metrics = self._completed_transactions
|
375
|
+
|
376
|
+
# Calculate success rate
|
377
|
+
total_metrics = len(filtered_metrics)
|
378
|
+
successful_metrics = len([m for m in filtered_metrics if m.status == "success"])
|
379
|
+
success_rate = successful_metrics / total_metrics if total_metrics > 0 else 1.0
|
380
|
+
|
381
|
+
# Format output
|
382
|
+
if export_format == MetricExportFormat.JSON:
|
383
|
+
if include_raw:
|
384
|
+
metrics_data = [self._serialize_metric(m) for m in filtered_metrics]
|
385
|
+
else:
|
386
|
+
metrics_data = {
|
387
|
+
"transaction_count": len(filtered_metrics),
|
388
|
+
"metric_names": list(set(m.name for m in filtered_metrics)),
|
389
|
+
"success_rate": success_rate,
|
390
|
+
}
|
391
|
+
else:
|
392
|
+
metrics_data = self._format_metrics(filtered_metrics, export_format)
|
393
|
+
|
394
|
+
return {
|
395
|
+
"metrics": metrics_data,
|
396
|
+
"transaction_count": len(filtered_metrics),
|
397
|
+
"total_transactions": len(
|
398
|
+
filtered_metrics
|
399
|
+
), # Alias for backward compatibility
|
400
|
+
"success_rate": success_rate, # Add success rate to top level
|
401
|
+
"aggregations": {},
|
402
|
+
"export_format": export_format.value,
|
403
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
404
|
+
"status": "success",
|
405
|
+
}
|
406
|
+
|
407
|
+
async def _get_aggregated_metrics(self, **kwargs) -> Dict[str, Any]:
|
408
|
+
"""Get aggregated transaction metrics."""
|
409
|
+
metric_names = kwargs.get("metric_names", [])
|
410
|
+
aggregation_window = kwargs.get("aggregation_window", 60.0)
|
411
|
+
aggregation_types = kwargs.get(
|
412
|
+
"aggregation_types", ["count", "avg", "p50", "p95", "p99"]
|
413
|
+
)
|
414
|
+
export_format = MetricExportFormat(kwargs.get("export_format", "json"))
|
415
|
+
|
416
|
+
# Calculate time window
|
417
|
+
current_time = time.time()
|
418
|
+
window_start = current_time - aggregation_window
|
419
|
+
|
420
|
+
# Aggregate metrics by name
|
421
|
+
aggregations = {}
|
422
|
+
|
423
|
+
for name, metrics in self._metric_buffer.items():
|
424
|
+
if metric_names and name not in metric_names:
|
425
|
+
continue
|
426
|
+
|
427
|
+
# Filter metrics within window
|
428
|
+
window_metrics = [
|
429
|
+
m
|
430
|
+
for m in metrics
|
431
|
+
if m.start_time >= window_start and m.duration is not None
|
432
|
+
]
|
433
|
+
|
434
|
+
if not window_metrics:
|
435
|
+
continue
|
436
|
+
|
437
|
+
# Calculate aggregations
|
438
|
+
aggregations[name] = self._calculate_aggregations(
|
439
|
+
window_metrics, aggregation_types
|
440
|
+
)
|
441
|
+
|
442
|
+
# Convert aggregations to JSON-serializable format first
|
443
|
+
serialized_aggregations = {}
|
444
|
+
for name, agg in aggregations.items():
|
445
|
+
serialized_aggregations[name] = {
|
446
|
+
"name": agg.name,
|
447
|
+
"count": agg.count,
|
448
|
+
"sum_duration": agg.sum_duration,
|
449
|
+
"min_duration": agg.min_duration,
|
450
|
+
"max_duration": agg.max_duration,
|
451
|
+
"avg_duration": agg.avg_duration,
|
452
|
+
"percentiles": agg.percentiles,
|
453
|
+
"success_count": agg.success_count,
|
454
|
+
"error_count": agg.error_count,
|
455
|
+
"error_rate": agg.error_rate,
|
456
|
+
"tags": agg.tags,
|
457
|
+
}
|
458
|
+
|
459
|
+
# Format output
|
460
|
+
if export_format == MetricExportFormat.JSON:
|
461
|
+
formatted_metrics = serialized_aggregations
|
462
|
+
else:
|
463
|
+
formatted_metrics = self._format_aggregated_metrics(
|
464
|
+
aggregations, export_format
|
465
|
+
)
|
466
|
+
|
467
|
+
transaction_count = (
|
468
|
+
sum(agg.count for agg in aggregations.values()) if aggregations else 0
|
469
|
+
)
|
470
|
+
|
471
|
+
# Calculate overall success rate from aggregations
|
472
|
+
total_success = (
|
473
|
+
sum(agg.success_count for agg in aggregations.values())
|
474
|
+
if aggregations
|
475
|
+
else 0
|
476
|
+
)
|
477
|
+
success_rate = (
|
478
|
+
total_success / transaction_count if transaction_count > 0 else 1.0
|
479
|
+
)
|
480
|
+
|
481
|
+
return {
|
482
|
+
"metrics": formatted_metrics,
|
483
|
+
"transaction_count": transaction_count,
|
484
|
+
"total_transactions": transaction_count, # Alias for backward compatibility
|
485
|
+
"success_rate": success_rate, # Calculated from aggregations
|
486
|
+
"aggregations": serialized_aggregations,
|
487
|
+
"export_format": export_format.value,
|
488
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
489
|
+
"status": "success",
|
490
|
+
}
|
491
|
+
|
492
|
+
def _calculate_aggregations(
|
493
|
+
self, metrics: List[TransactionMetric], aggregation_types: List[str]
|
494
|
+
) -> AggregatedMetrics:
|
495
|
+
"""Calculate aggregated metrics from transaction list."""
|
496
|
+
if not metrics:
|
497
|
+
return None
|
498
|
+
|
499
|
+
durations = [m.duration for m in metrics if m.duration is not None]
|
500
|
+
if not durations:
|
501
|
+
return None
|
502
|
+
|
503
|
+
# Sort durations for percentile calculations
|
504
|
+
sorted_durations = sorted(durations)
|
505
|
+
|
506
|
+
# Basic statistics
|
507
|
+
count = len(metrics)
|
508
|
+
sum_duration = sum(durations)
|
509
|
+
min_duration = min(durations)
|
510
|
+
max_duration = max(durations)
|
511
|
+
avg_duration = sum_duration / count
|
512
|
+
|
513
|
+
# Success/error counts
|
514
|
+
success_count = sum(1 for m in metrics if m.status == "success")
|
515
|
+
error_count = count - success_count
|
516
|
+
error_rate = error_count / count if count > 0 else 0.0
|
517
|
+
|
518
|
+
# Calculate percentiles
|
519
|
+
percentiles = {}
|
520
|
+
percentile_mappings = {
|
521
|
+
"p50": 50,
|
522
|
+
"p75": 75,
|
523
|
+
"p90": 90,
|
524
|
+
"p95": 95,
|
525
|
+
"p99": 99,
|
526
|
+
"p999": 99.9,
|
527
|
+
}
|
528
|
+
|
529
|
+
for agg_type in aggregation_types:
|
530
|
+
if agg_type in percentile_mappings:
|
531
|
+
percentile = percentile_mappings[agg_type]
|
532
|
+
index = int(len(sorted_durations) * (percentile / 100.0))
|
533
|
+
index = min(index, len(sorted_durations) - 1)
|
534
|
+
percentiles[agg_type] = sorted_durations[index]
|
535
|
+
|
536
|
+
# Aggregate tags (use most common values)
|
537
|
+
tag_counts = defaultdict(lambda: defaultdict(int))
|
538
|
+
for metric in metrics:
|
539
|
+
for tag_key, tag_value in metric.tags.items():
|
540
|
+
tag_counts[tag_key][tag_value] += 1
|
541
|
+
|
542
|
+
aggregated_tags = {}
|
543
|
+
for tag_key, value_counts in tag_counts.items():
|
544
|
+
# Use most common tag value
|
545
|
+
most_common = max(value_counts.items(), key=lambda x: x[1])
|
546
|
+
aggregated_tags[tag_key] = most_common[0]
|
547
|
+
|
548
|
+
return AggregatedMetrics(
|
549
|
+
name=metrics[0].name,
|
550
|
+
count=count,
|
551
|
+
sum_duration=sum_duration,
|
552
|
+
min_duration=min_duration,
|
553
|
+
max_duration=max_duration,
|
554
|
+
avg_duration=avg_duration,
|
555
|
+
percentiles=percentiles,
|
556
|
+
success_count=success_count,
|
557
|
+
error_count=error_count,
|
558
|
+
error_rate=error_rate,
|
559
|
+
tags=aggregated_tags,
|
560
|
+
)
|
561
|
+
|
562
|
+
def _serialize_metric(self, metric: TransactionMetric) -> Dict[str, Any]:
|
563
|
+
"""Serialize a transaction metric to dictionary."""
|
564
|
+
return {
|
565
|
+
"transaction_id": metric.transaction_id,
|
566
|
+
"name": metric.name,
|
567
|
+
"start_time": metric.start_time,
|
568
|
+
"end_time": metric.end_time,
|
569
|
+
"duration": metric.duration,
|
570
|
+
"status": metric.status,
|
571
|
+
"error": metric.error,
|
572
|
+
"tags": metric.tags,
|
573
|
+
"custom_metrics": metric.custom_metrics,
|
574
|
+
}
|
575
|
+
|
576
|
+
def _format_metrics(
|
577
|
+
self, metrics: List[TransactionMetric], format: MetricExportFormat
|
578
|
+
) -> Union[str, Dict[str, Any]]:
|
579
|
+
"""Format metrics for export."""
|
580
|
+
if format == MetricExportFormat.PROMETHEUS:
|
581
|
+
return self._format_prometheus(metrics)
|
582
|
+
elif format == MetricExportFormat.CLOUDWATCH:
|
583
|
+
return self._format_cloudwatch(metrics)
|
584
|
+
elif format == MetricExportFormat.DATADOG:
|
585
|
+
return self._format_datadog(metrics)
|
586
|
+
elif format == MetricExportFormat.OPENTELEMETRY:
|
587
|
+
return self._format_opentelemetry(metrics)
|
588
|
+
else:
|
589
|
+
return [self._serialize_metric(m) for m in metrics]
|
590
|
+
|
591
|
+
def _format_prometheus(self, metrics: List[TransactionMetric]) -> str:
|
592
|
+
"""Format metrics in Prometheus exposition format."""
|
593
|
+
lines = []
|
594
|
+
|
595
|
+
# Group by metric name
|
596
|
+
by_name = defaultdict(list)
|
597
|
+
for m in metrics:
|
598
|
+
by_name[m.name].append(m)
|
599
|
+
|
600
|
+
for name, metric_list in by_name.items():
|
601
|
+
# Transaction duration histogram
|
602
|
+
lines.append("# TYPE transaction_duration_seconds histogram")
|
603
|
+
lines.append(
|
604
|
+
"# HELP transaction_duration_seconds Transaction duration in seconds"
|
605
|
+
)
|
606
|
+
|
607
|
+
for metric in metric_list:
|
608
|
+
if metric.duration is not None:
|
609
|
+
labels = self._format_prometheus_labels(metric.tags)
|
610
|
+
lines.append(
|
611
|
+
f'transaction_duration_seconds{{{labels},name="{name}"}} {metric.duration}'
|
612
|
+
)
|
613
|
+
|
614
|
+
# Success/error counters
|
615
|
+
success_count = sum(1 for m in metric_list if m.status == "success")
|
616
|
+
error_count = len(metric_list) - success_count
|
617
|
+
|
618
|
+
lines.append("# TYPE transaction_total counter")
|
619
|
+
lines.append("# HELP transaction_total Total number of transactions")
|
620
|
+
lines.append(
|
621
|
+
f'transaction_total{{name="{name}",status="success"}} {success_count}'
|
622
|
+
)
|
623
|
+
lines.append(
|
624
|
+
f'transaction_total{{name="{name}",status="error"}} {error_count}'
|
625
|
+
)
|
626
|
+
|
627
|
+
return "\n".join(lines)
|
628
|
+
|
629
|
+
def _format_prometheus_labels(self, tags: Dict[str, str]) -> str:
|
630
|
+
"""Format tags as Prometheus labels."""
|
631
|
+
label_parts = []
|
632
|
+
for k, v in tags.items():
|
633
|
+
# Escape quotes and backslashes
|
634
|
+
v = v.replace("\\", "\\\\").replace('"', '\\"')
|
635
|
+
label_parts.append(f'{k}="{v}"')
|
636
|
+
return ",".join(label_parts)
|
637
|
+
|
638
|
+
def _format_cloudwatch(self, metrics: List[TransactionMetric]) -> Dict[str, Any]:
|
639
|
+
"""Format metrics for AWS CloudWatch."""
|
640
|
+
cloudwatch_metrics = []
|
641
|
+
|
642
|
+
for metric in metrics:
|
643
|
+
if metric.duration is not None:
|
644
|
+
cw_metric = {
|
645
|
+
"MetricName": f"TransactionDuration_{metric.name}",
|
646
|
+
"Value": metric.duration * 1000, # Convert to milliseconds
|
647
|
+
"Unit": "Milliseconds",
|
648
|
+
"Timestamp": datetime.fromtimestamp(
|
649
|
+
metric.start_time, UTC
|
650
|
+
).isoformat(),
|
651
|
+
"Dimensions": [
|
652
|
+
{"Name": k, "Value": v} for k, v in metric.tags.items()
|
653
|
+
],
|
654
|
+
}
|
655
|
+
cloudwatch_metrics.append(cw_metric)
|
656
|
+
|
657
|
+
# Add custom metrics
|
658
|
+
for custom_name, custom_value in metric.custom_metrics.items():
|
659
|
+
cw_custom = {
|
660
|
+
"MetricName": f"Custom_{metric.name}_{custom_name}",
|
661
|
+
"Value": custom_value,
|
662
|
+
"Unit": "Count",
|
663
|
+
"Timestamp": datetime.fromtimestamp(
|
664
|
+
metric.start_time, UTC
|
665
|
+
).isoformat(),
|
666
|
+
"Dimensions": [
|
667
|
+
{"Name": k, "Value": v} for k, v in metric.tags.items()
|
668
|
+
],
|
669
|
+
}
|
670
|
+
cloudwatch_metrics.append(cw_custom)
|
671
|
+
|
672
|
+
return {"MetricData": cloudwatch_metrics}
|
673
|
+
|
674
|
+
def _format_datadog(self, metrics: List[TransactionMetric]) -> Dict[str, Any]:
|
675
|
+
"""Format metrics for DataDog."""
|
676
|
+
series = []
|
677
|
+
|
678
|
+
for metric in metrics:
|
679
|
+
if metric.duration is not None:
|
680
|
+
# Duration metric
|
681
|
+
dd_metric = {
|
682
|
+
"metric": "transaction.duration",
|
683
|
+
"points": [[int(metric.start_time), metric.duration]],
|
684
|
+
"type": "gauge",
|
685
|
+
"tags": [f"{k}:{v}" for k, v in metric.tags.items()]
|
686
|
+
+ [f"transaction_name:{metric.name}"],
|
687
|
+
}
|
688
|
+
series.append(dd_metric)
|
689
|
+
|
690
|
+
# Status counter
|
691
|
+
status_metric = {
|
692
|
+
"metric": "transaction.count",
|
693
|
+
"points": [[int(metric.start_time), 1]],
|
694
|
+
"type": "count",
|
695
|
+
"tags": [f"{k}:{v}" for k, v in metric.tags.items()]
|
696
|
+
+ [f"transaction_name:{metric.name}", f"status:{metric.status}"],
|
697
|
+
}
|
698
|
+
series.append(status_metric)
|
699
|
+
|
700
|
+
return {"series": series}
|
701
|
+
|
702
|
+
def _format_opentelemetry(self, metrics: List[TransactionMetric]) -> Dict[str, Any]:
|
703
|
+
"""Format metrics in OpenTelemetry format."""
|
704
|
+
otel_metrics = []
|
705
|
+
|
706
|
+
for metric in metrics:
|
707
|
+
if metric.duration is not None:
|
708
|
+
otel_metric = {
|
709
|
+
"name": "transaction.duration",
|
710
|
+
"description": f"Duration of {metric.name} transaction",
|
711
|
+
"unit": "s",
|
712
|
+
"data": {
|
713
|
+
"type": "Gauge",
|
714
|
+
"data_points": [
|
715
|
+
{
|
716
|
+
"attributes": {
|
717
|
+
**metric.tags,
|
718
|
+
"transaction.name": metric.name,
|
719
|
+
"transaction.status": metric.status,
|
720
|
+
},
|
721
|
+
"time_unix_nano": int(metric.start_time * 1e9),
|
722
|
+
"value": metric.duration,
|
723
|
+
}
|
724
|
+
],
|
725
|
+
},
|
726
|
+
}
|
727
|
+
otel_metrics.append(otel_metric)
|
728
|
+
|
729
|
+
return {"resource_metrics": [{"scope_metrics": [{"metrics": otel_metrics}]}]}
|
730
|
+
|
731
|
+
def _format_aggregated_metrics(
|
732
|
+
self, aggregations: Dict[str, AggregatedMetrics], format: MetricExportFormat
|
733
|
+
) -> Union[str, Dict[str, Any]]:
|
734
|
+
"""Format aggregated metrics for export."""
|
735
|
+
if format == MetricExportFormat.PROMETHEUS:
|
736
|
+
lines = []
|
737
|
+
|
738
|
+
for name, agg in aggregations.items():
|
739
|
+
labels = self._format_prometheus_labels(agg.tags)
|
740
|
+
base_labels = f'name="{name}"' + (f",{labels}" if labels else "")
|
741
|
+
|
742
|
+
# Summary metrics
|
743
|
+
lines.append("# TYPE transaction_duration_summary summary")
|
744
|
+
lines.append(
|
745
|
+
f"transaction_duration_summary_count{{{base_labels}}} {agg.count}"
|
746
|
+
)
|
747
|
+
lines.append(
|
748
|
+
f"transaction_duration_summary_sum{{{base_labels}}} {agg.sum_duration}"
|
749
|
+
)
|
750
|
+
|
751
|
+
# Percentiles
|
752
|
+
for percentile_name, value in agg.percentiles.items():
|
753
|
+
quantile = percentile_name[1:] # Remove 'p' prefix
|
754
|
+
lines.append(
|
755
|
+
f'transaction_duration_summary{{{base_labels},quantile="0.{quantile}"}} {value}'
|
756
|
+
)
|
757
|
+
|
758
|
+
# Error rate
|
759
|
+
lines.append("# TYPE transaction_error_rate gauge")
|
760
|
+
lines.append(
|
761
|
+
f"transaction_error_rate{{{base_labels}}} {agg.error_rate}"
|
762
|
+
)
|
763
|
+
|
764
|
+
return "\n".join(lines)
|
765
|
+
|
766
|
+
else:
|
767
|
+
# For other formats, return structured data
|
768
|
+
return {
|
769
|
+
name: {
|
770
|
+
"count": agg.count,
|
771
|
+
"duration": {
|
772
|
+
"sum": agg.sum_duration,
|
773
|
+
"min": agg.min_duration,
|
774
|
+
"max": agg.max_duration,
|
775
|
+
"avg": agg.avg_duration,
|
776
|
+
**agg.percentiles,
|
777
|
+
},
|
778
|
+
"success_count": agg.success_count,
|
779
|
+
"error_count": agg.error_count,
|
780
|
+
"error_rate": agg.error_rate,
|
781
|
+
"tags": agg.tags,
|
782
|
+
}
|
783
|
+
for name, agg in aggregations.items()
|
784
|
+
}
|
785
|
+
|
786
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
787
|
+
"""Synchronous wrapper for compatibility."""
|
788
|
+
import asyncio
|
789
|
+
|
790
|
+
return asyncio.run(self.async_run(**kwargs))
|