aiecs 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +75 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +295 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +341 -0
- aiecs/config/__init__.py +15 -0
- aiecs/config/config.py +117 -0
- aiecs/config/registry.py +19 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +150 -0
- aiecs/core/interface/storage_interface.py +214 -0
- aiecs/domain/__init__.py +20 -0
- aiecs/domain/context/__init__.py +28 -0
- aiecs/domain/context/content_engine.py +982 -0
- aiecs/domain/context/conversation_models.py +306 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +49 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +460 -0
- aiecs/domain/task/model.py +50 -0
- aiecs/domain/task/task_context.py +257 -0
- aiecs/infrastructure/__init__.py +26 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +341 -0
- aiecs/infrastructure/messaging/websocket_manager.py +289 -0
- aiecs/infrastructure/monitoring/__init__.py +12 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +138 -0
- aiecs/infrastructure/monitoring/structured_logger.py +50 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +376 -0
- aiecs/infrastructure/persistence/__init__.py +12 -0
- aiecs/infrastructure/persistence/database_manager.py +286 -0
- aiecs/infrastructure/persistence/file_storage.py +671 -0
- aiecs/infrastructure/persistence/redis_client.py +162 -0
- aiecs/llm/__init__.py +54 -0
- aiecs/llm/base_client.py +99 -0
- aiecs/llm/client_factory.py +339 -0
- aiecs/llm/custom_callbacks.py +228 -0
- aiecs/llm/openai_client.py +125 -0
- aiecs/llm/vertex_client.py +186 -0
- aiecs/llm/xai_client.py +184 -0
- aiecs/main.py +351 -0
- aiecs/scripts/DEPENDENCY_SYSTEM_SUMMARY.md +241 -0
- aiecs/scripts/README_DEPENDENCY_CHECKER.md +309 -0
- aiecs/scripts/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/dependency_checker.py +825 -0
- aiecs/scripts/dependency_fixer.py +348 -0
- aiecs/scripts/download_nlp_data.py +348 -0
- aiecs/scripts/fix_weasel_validator.py +121 -0
- aiecs/scripts/fix_weasel_validator.sh +82 -0
- aiecs/scripts/patch_weasel_library.sh +188 -0
- aiecs/scripts/quick_dependency_check.py +269 -0
- aiecs/scripts/run_weasel_patch.sh +41 -0
- aiecs/scripts/setup_nlp_data.sh +217 -0
- aiecs/tasks/__init__.py +2 -0
- aiecs/tasks/worker.py +111 -0
- aiecs/tools/__init__.py +196 -0
- aiecs/tools/base_tool.py +202 -0
- aiecs/tools/langchain_adapter.py +361 -0
- aiecs/tools/task_tools/__init__.py +82 -0
- aiecs/tools/task_tools/chart_tool.py +704 -0
- aiecs/tools/task_tools/classfire_tool.py +901 -0
- aiecs/tools/task_tools/image_tool.py +397 -0
- aiecs/tools/task_tools/office_tool.py +600 -0
- aiecs/tools/task_tools/pandas_tool.py +565 -0
- aiecs/tools/task_tools/report_tool.py +499 -0
- aiecs/tools/task_tools/research_tool.py +363 -0
- aiecs/tools/task_tools/scraper_tool.py +548 -0
- aiecs/tools/task_tools/search_api.py +7 -0
- aiecs/tools/task_tools/stats_tool.py +513 -0
- aiecs/tools/temp_file_manager.py +126 -0
- aiecs/tools/tool_executor/__init__.py +35 -0
- aiecs/tools/tool_executor/tool_executor.py +518 -0
- aiecs/utils/LLM_output_structor.py +409 -0
- aiecs/utils/__init__.py +23 -0
- aiecs/utils/base_callback.py +50 -0
- aiecs/utils/execution_utils.py +158 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +13 -0
- aiecs/utils/token_usage_repository.py +279 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +41 -0
- aiecs-1.0.0.dist-info/METADATA +610 -0
- aiecs-1.0.0.dist-info/RECORD +90 -0
- aiecs-1.0.0.dist-info/WHEEL +5 -0
- aiecs-1.0.0.dist-info/entry_points.txt +7 -0
- aiecs-1.0.0.dist-info/licenses/LICENSE +225 -0
- aiecs-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import uuid
|
|
5
|
+
import websockets
|
|
6
|
+
from typing import Dict, Any, Set, Optional, Callable
|
|
7
|
+
from websockets import serve, ServerConnection
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class UserConfirmation(BaseModel):
|
|
14
|
+
proceed: bool
|
|
15
|
+
feedback: Optional[str] = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TaskStepResult(BaseModel):
|
|
19
|
+
step: str
|
|
20
|
+
result: Any = None
|
|
21
|
+
completed: bool = False
|
|
22
|
+
message: str
|
|
23
|
+
status: str
|
|
24
|
+
error_code: Optional[str] = None
|
|
25
|
+
error_message: Optional[str] = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class WebSocketManager:
|
|
29
|
+
"""
|
|
30
|
+
Specialized handler for WebSocket server and client communication
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, host: str = "python-middleware-api", port: int = 8765):
|
|
34
|
+
self.host = host
|
|
35
|
+
self.port = port
|
|
36
|
+
self.server = None
|
|
37
|
+
self.callback_registry: Dict[str, Callable] = {}
|
|
38
|
+
self.active_connections: Set[ServerConnection] = set()
|
|
39
|
+
self._running = False
|
|
40
|
+
|
|
41
|
+
async def start_server(self):
|
|
42
|
+
"""Start WebSocket server"""
|
|
43
|
+
if self.server:
|
|
44
|
+
logger.warning("WebSocket server is already running")
|
|
45
|
+
return self.server
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
self.server = await serve(
|
|
49
|
+
self._handle_client_connection,
|
|
50
|
+
self.host,
|
|
51
|
+
self.port
|
|
52
|
+
)
|
|
53
|
+
self._running = True
|
|
54
|
+
logger.info(f"WebSocket server started on {self.host}:{self.port}")
|
|
55
|
+
return self.server
|
|
56
|
+
except Exception as e:
|
|
57
|
+
logger.error(f"Failed to start WebSocket server: {e}")
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
async def stop_server(self):
|
|
61
|
+
"""Stop WebSocket server"""
|
|
62
|
+
if self.server:
|
|
63
|
+
self.server.close()
|
|
64
|
+
await self.server.wait_closed()
|
|
65
|
+
self._running = False
|
|
66
|
+
logger.info("WebSocket server stopped")
|
|
67
|
+
|
|
68
|
+
# Close all active connections
|
|
69
|
+
if self.active_connections:
|
|
70
|
+
await asyncio.gather(
|
|
71
|
+
*[conn.close() for conn in self.active_connections],
|
|
72
|
+
return_exceptions=True
|
|
73
|
+
)
|
|
74
|
+
self.active_connections.clear()
|
|
75
|
+
|
|
76
|
+
async def _handle_client_connection(self, websocket: ServerConnection, path: str):
|
|
77
|
+
"""Handle client connection"""
|
|
78
|
+
self.active_connections.add(websocket)
|
|
79
|
+
client_addr = websocket.remote_address
|
|
80
|
+
logger.info(f"New WebSocket connection from {client_addr}")
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
async for message in websocket:
|
|
84
|
+
await self._handle_client_message(websocket, message)
|
|
85
|
+
except websockets.exceptions.ConnectionClosed:
|
|
86
|
+
logger.info(f"WebSocket connection closed: {client_addr}")
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(f"WebSocket error for {client_addr}: {e}")
|
|
89
|
+
finally:
|
|
90
|
+
self.active_connections.discard(websocket)
|
|
91
|
+
if not websocket.closed:
|
|
92
|
+
await websocket.close()
|
|
93
|
+
|
|
94
|
+
async def _handle_client_message(self, websocket: ServerConnection, message: str):
|
|
95
|
+
"""Handle client message"""
|
|
96
|
+
try:
|
|
97
|
+
data = json.loads(message)
|
|
98
|
+
action = data.get("action")
|
|
99
|
+
|
|
100
|
+
if action == "confirm":
|
|
101
|
+
await self._handle_confirmation(data)
|
|
102
|
+
elif action == "cancel":
|
|
103
|
+
await self._handle_cancellation(data)
|
|
104
|
+
elif action == "ping":
|
|
105
|
+
await self._handle_ping(websocket, data)
|
|
106
|
+
elif action == "subscribe":
|
|
107
|
+
await self._handle_subscription(websocket, data)
|
|
108
|
+
else:
|
|
109
|
+
logger.warning(f"Unknown action received: {action}")
|
|
110
|
+
await self._send_error(websocket, f"Unknown action: {action}")
|
|
111
|
+
|
|
112
|
+
except json.JSONDecodeError as e:
|
|
113
|
+
logger.error(f"Invalid JSON received: {e}")
|
|
114
|
+
await self._send_error(websocket, "Invalid JSON format")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.error(f"Error handling client message: {e}")
|
|
117
|
+
await self._send_error(websocket, f"Internal error: {str(e)}")
|
|
118
|
+
|
|
119
|
+
async def _handle_confirmation(self, data: Dict[str, Any]):
|
|
120
|
+
"""Handle user confirmation"""
|
|
121
|
+
callback_id = data.get("callback_id")
|
|
122
|
+
if callback_id and callback_id in self.callback_registry:
|
|
123
|
+
callback = self.callback_registry[callback_id]
|
|
124
|
+
confirmation = UserConfirmation(
|
|
125
|
+
proceed=data.get("proceed", False),
|
|
126
|
+
feedback=data.get("feedback")
|
|
127
|
+
)
|
|
128
|
+
try:
|
|
129
|
+
callback(confirmation)
|
|
130
|
+
del self.callback_registry[callback_id]
|
|
131
|
+
logger.debug(f"Processed confirmation for callback {callback_id}")
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(f"Error processing confirmation callback: {e}")
|
|
134
|
+
else:
|
|
135
|
+
logger.warning(f"No callback found for confirmation ID: {callback_id}")
|
|
136
|
+
|
|
137
|
+
async def _handle_cancellation(self, data: Dict[str, Any]):
|
|
138
|
+
"""Handle task cancellation"""
|
|
139
|
+
user_id = data.get("user_id")
|
|
140
|
+
task_id = data.get("task_id")
|
|
141
|
+
|
|
142
|
+
if user_id and task_id:
|
|
143
|
+
# Task cancellation logic can be added here
|
|
144
|
+
# Since database manager access is needed, this functionality may need to be implemented through callbacks
|
|
145
|
+
logger.info(f"Task cancellation requested: user={user_id}, task={task_id}")
|
|
146
|
+
await self.broadcast_message({
|
|
147
|
+
"type": "task_cancelled",
|
|
148
|
+
"user_id": user_id,
|
|
149
|
+
"task_id": task_id,
|
|
150
|
+
"timestamp": asyncio.get_event_loop().time()
|
|
151
|
+
})
|
|
152
|
+
else:
|
|
153
|
+
logger.warning("Invalid cancellation request: missing user_id or task_id")
|
|
154
|
+
|
|
155
|
+
async def _handle_ping(self, websocket: ServerConnection, data: Dict[str, Any]):
|
|
156
|
+
"""Handle heartbeat detection"""
|
|
157
|
+
pong_data = {
|
|
158
|
+
"type": "pong",
|
|
159
|
+
"timestamp": asyncio.get_event_loop().time(),
|
|
160
|
+
"original_data": data
|
|
161
|
+
}
|
|
162
|
+
await self._send_to_client(websocket, pong_data)
|
|
163
|
+
|
|
164
|
+
async def _handle_subscription(self, websocket: ServerConnection, data: Dict[str, Any]):
|
|
165
|
+
"""Handle subscription request"""
|
|
166
|
+
user_id = data.get("user_id")
|
|
167
|
+
if user_id:
|
|
168
|
+
# User-specific subscription logic can be implemented here
|
|
169
|
+
logger.info(f"User {user_id} subscribed to updates")
|
|
170
|
+
await self._send_to_client(websocket, {
|
|
171
|
+
"type": "subscription_confirmed",
|
|
172
|
+
"user_id": user_id
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
async def _send_error(self, websocket: ServerConnection, error_message: str):
|
|
176
|
+
"""Send error message to client"""
|
|
177
|
+
error_data = {
|
|
178
|
+
"type": "error",
|
|
179
|
+
"message": error_message,
|
|
180
|
+
"timestamp": asyncio.get_event_loop().time()
|
|
181
|
+
}
|
|
182
|
+
await self._send_to_client(websocket, error_data)
|
|
183
|
+
|
|
184
|
+
async def _send_to_client(self, websocket: ServerConnection, data: Dict[str, Any]):
|
|
185
|
+
"""Send data to specific client"""
|
|
186
|
+
try:
|
|
187
|
+
if not websocket.closed:
|
|
188
|
+
await websocket.send(json.dumps(data))
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.error(f"Failed to send message to client: {e}")
|
|
191
|
+
|
|
192
|
+
async def notify_user(self, step_result: TaskStepResult, user_id: str, task_id: str, step: int) -> UserConfirmation:
|
|
193
|
+
"""Notify user of task step result"""
|
|
194
|
+
callback_id = str(uuid.uuid4())
|
|
195
|
+
confirmation_future = asyncio.Future()
|
|
196
|
+
|
|
197
|
+
# Register callback
|
|
198
|
+
self.callback_registry[callback_id] = lambda confirmation: confirmation_future.set_result(confirmation)
|
|
199
|
+
|
|
200
|
+
# Prepare notification data
|
|
201
|
+
notification_data = {
|
|
202
|
+
"type": "task_step_result",
|
|
203
|
+
"callback_id": callback_id,
|
|
204
|
+
"step": step,
|
|
205
|
+
"message": step_result.message,
|
|
206
|
+
"result": step_result.result,
|
|
207
|
+
"status": step_result.status,
|
|
208
|
+
"error_code": step_result.error_code,
|
|
209
|
+
"error_message": step_result.error_message,
|
|
210
|
+
"user_id": user_id,
|
|
211
|
+
"task_id": task_id,
|
|
212
|
+
"timestamp": asyncio.get_event_loop().time()
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
# Broadcast to all connected clients (can be optimized to send only to specific users)
|
|
217
|
+
await self.broadcast_message(notification_data)
|
|
218
|
+
|
|
219
|
+
# Wait for user confirmation with timeout
|
|
220
|
+
try:
|
|
221
|
+
return await asyncio.wait_for(confirmation_future, timeout=300) # 5 minute timeout
|
|
222
|
+
except asyncio.TimeoutError:
|
|
223
|
+
logger.warning(f"User confirmation timeout for callback {callback_id}")
|
|
224
|
+
# Clean up callback
|
|
225
|
+
self.callback_registry.pop(callback_id, None)
|
|
226
|
+
return UserConfirmation(proceed=True) # Default to proceed
|
|
227
|
+
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logger.error(f"WebSocket notification error: {e}")
|
|
230
|
+
# Clean up callback
|
|
231
|
+
self.callback_registry.pop(callback_id, None)
|
|
232
|
+
return UserConfirmation(proceed=True) # Default to proceed
|
|
233
|
+
|
|
234
|
+
async def send_heartbeat(self, user_id: str, task_id: str, interval: int = 30):
|
|
235
|
+
"""Send heartbeat message"""
|
|
236
|
+
heartbeat_data = {
|
|
237
|
+
"type": "heartbeat",
|
|
238
|
+
"status": "heartbeat",
|
|
239
|
+
"message": "Task is still executing...",
|
|
240
|
+
"user_id": user_id,
|
|
241
|
+
"task_id": task_id,
|
|
242
|
+
"timestamp": asyncio.get_event_loop().time()
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
while self._running:
|
|
246
|
+
try:
|
|
247
|
+
await self.broadcast_message(heartbeat_data)
|
|
248
|
+
await asyncio.sleep(interval)
|
|
249
|
+
except Exception as e:
|
|
250
|
+
logger.error(f"WebSocket heartbeat error: {e}")
|
|
251
|
+
break
|
|
252
|
+
|
|
253
|
+
async def broadcast_message(self, message: Dict[str, Any]):
|
|
254
|
+
"""Broadcast message to all connected clients"""
|
|
255
|
+
if not self.active_connections:
|
|
256
|
+
logger.debug("No active WebSocket connections for broadcast")
|
|
257
|
+
return
|
|
258
|
+
|
|
259
|
+
# Filter out closed connections
|
|
260
|
+
active_connections = [conn for conn in self.active_connections if not conn.closed]
|
|
261
|
+
self.active_connections = set(active_connections)
|
|
262
|
+
|
|
263
|
+
if active_connections:
|
|
264
|
+
await asyncio.gather(
|
|
265
|
+
*[self._send_to_client(conn, message) for conn in active_connections],
|
|
266
|
+
return_exceptions=True
|
|
267
|
+
)
|
|
268
|
+
logger.debug(f"Broadcasted message to {len(active_connections)} clients")
|
|
269
|
+
|
|
270
|
+
async def send_to_user(self, user_id: str, message: Dict[str, Any]):
|
|
271
|
+
"""Send message to specific user (requires user connection mapping implementation)"""
|
|
272
|
+
# User ID to WebSocket connection mapping can be implemented here
|
|
273
|
+
# Currently simplified to broadcast
|
|
274
|
+
message["target_user_id"] = user_id
|
|
275
|
+
await self.broadcast_message(message)
|
|
276
|
+
|
|
277
|
+
def get_connection_count(self) -> int:
|
|
278
|
+
"""Get active connection count"""
|
|
279
|
+
return len([conn for conn in self.active_connections if not conn.closed])
|
|
280
|
+
|
|
281
|
+
def get_status(self) -> Dict[str, Any]:
|
|
282
|
+
"""Get WebSocket manager status"""
|
|
283
|
+
return {
|
|
284
|
+
"running": self._running,
|
|
285
|
+
"host": self.host,
|
|
286
|
+
"port": self.port,
|
|
287
|
+
"active_connections": self.get_connection_count(),
|
|
288
|
+
"pending_callbacks": len(self.callback_registry)
|
|
289
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Infrastructure monitoring module
|
|
2
|
+
|
|
3
|
+
Contains monitoring, metrics, and observability infrastructure.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .executor_metrics import ExecutorMetrics
|
|
7
|
+
from .tracing_manager import TracingManager
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"ExecutorMetrics",
|
|
11
|
+
"TracingManager",
|
|
12
|
+
]
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import functools
|
|
3
|
+
from typing import Dict, Optional, Any
|
|
4
|
+
from prometheus_client import Counter, Histogram, start_http_server
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ExecutorMetrics:
|
|
10
|
+
"""
|
|
11
|
+
Specialized handler for executor performance monitoring and metrics collection
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, enable_metrics: bool = True, metrics_port: int = 8001):
|
|
15
|
+
self.enable_metrics = enable_metrics
|
|
16
|
+
self.metrics_port = metrics_port
|
|
17
|
+
self.metrics: Dict[str, Any] = {}
|
|
18
|
+
|
|
19
|
+
if self.enable_metrics:
|
|
20
|
+
self._init_prometheus_metrics()
|
|
21
|
+
|
|
22
|
+
def _init_prometheus_metrics(self):
|
|
23
|
+
"""Initialize Prometheus metrics"""
|
|
24
|
+
try:
|
|
25
|
+
start_http_server(self.metrics_port)
|
|
26
|
+
self.metrics = {
|
|
27
|
+
"intent_latency": Histogram("intent_latency_seconds", "Latency of intent parsing"),
|
|
28
|
+
"intent_success": Counter("intent_success_total", "Number of successful intent parsings"),
|
|
29
|
+
"intent_retries": Counter("intent_retries_total", "Number of intent parsing retries"),
|
|
30
|
+
"plan_latency": Histogram("plan_latency_seconds", "Latency of task planning"),
|
|
31
|
+
"plan_success": Counter("plan_success_total", "Number of successful plans"),
|
|
32
|
+
"plan_retries": Counter("plan_retries_total", "Number of plan retries"),
|
|
33
|
+
"execute_latency": Histogram("execute_latency_seconds", "Latency of task execution", ["task_type"]),
|
|
34
|
+
"execute_success": Counter("execute_success_total", "Number of successful executions", ["task_type"]),
|
|
35
|
+
"execute_retries": Counter("execute_retries_total", "Number of execution retries", ["task_type"]),
|
|
36
|
+
}
|
|
37
|
+
logger.info(f"Prometheus metrics server started on port {self.metrics_port}")
|
|
38
|
+
except Exception as e:
|
|
39
|
+
logger.warning(f"Failed to start metrics server: {e}")
|
|
40
|
+
self.metrics = {}
|
|
41
|
+
|
|
42
|
+
def record_operation_latency(self, operation: str, duration: float):
|
|
43
|
+
"""Record operation latency"""
|
|
44
|
+
if not self.enable_metrics or f"{operation}_latency" not in self.metrics:
|
|
45
|
+
return
|
|
46
|
+
self.metrics[f"{operation}_latency"].observe(duration)
|
|
47
|
+
|
|
48
|
+
def record_operation_success(self, operation: str, labels: Optional[Dict[str, str]] = None):
|
|
49
|
+
"""Record operation success"""
|
|
50
|
+
if not self.enable_metrics or f"{operation}_success" not in self.metrics:
|
|
51
|
+
return
|
|
52
|
+
metric = self.metrics[f"{operation}_success"]
|
|
53
|
+
if labels:
|
|
54
|
+
metric = metric.labels(**labels)
|
|
55
|
+
metric.inc()
|
|
56
|
+
|
|
57
|
+
def record_operation_failure(self, operation: str, error_type: str, labels: Optional[Dict[str, str]] = None):
|
|
58
|
+
"""Record operation failure"""
|
|
59
|
+
if not self.enable_metrics:
|
|
60
|
+
return
|
|
61
|
+
# Failure metrics can be added
|
|
62
|
+
logger.error(f"Operation {operation} failed with error type: {error_type}")
|
|
63
|
+
|
|
64
|
+
def record_retry(self, operation: str, attempt_number: int):
|
|
65
|
+
"""Record retry"""
|
|
66
|
+
if not self.enable_metrics or f"{operation}_retries" not in self.metrics:
|
|
67
|
+
return
|
|
68
|
+
if attempt_number > 1:
|
|
69
|
+
self.metrics[f"{operation}_retries"].inc()
|
|
70
|
+
|
|
71
|
+
def with_metrics(self, metric_name: str, labels: Optional[Dict[str, str]] = None):
|
|
72
|
+
"""Monitoring decorator"""
|
|
73
|
+
def decorator(func):
|
|
74
|
+
@functools.wraps(func)
|
|
75
|
+
async def wrapper(*args, **kwargs):
|
|
76
|
+
if not self.metrics or f"{metric_name}_latency" not in self.metrics:
|
|
77
|
+
return await func(*args, **kwargs)
|
|
78
|
+
|
|
79
|
+
labels_dict = labels or {}
|
|
80
|
+
metric = self.metrics[f"{metric_name}_latency"]
|
|
81
|
+
if labels:
|
|
82
|
+
metric = metric.labels(**labels_dict)
|
|
83
|
+
|
|
84
|
+
with metric.time():
|
|
85
|
+
try:
|
|
86
|
+
result = await func(*args, **kwargs)
|
|
87
|
+
if f"{metric_name}_success" in self.metrics:
|
|
88
|
+
success_metric = self.metrics[f"{metric_name}_success"]
|
|
89
|
+
if labels:
|
|
90
|
+
success_metric = success_metric.labels(**labels_dict)
|
|
91
|
+
success_metric.inc()
|
|
92
|
+
return result
|
|
93
|
+
except Exception as e:
|
|
94
|
+
logger.error(f"Error in {func.__name__}: {e}")
|
|
95
|
+
raise
|
|
96
|
+
return wrapper
|
|
97
|
+
return decorator
|
|
98
|
+
|
|
99
|
+
def get_metrics_summary(self) -> Dict[str, Any]:
|
|
100
|
+
"""Get metrics summary"""
|
|
101
|
+
if not self.enable_metrics:
|
|
102
|
+
return {"metrics_enabled": False}
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
"metrics_enabled": True,
|
|
106
|
+
"metrics_port": self.metrics_port,
|
|
107
|
+
"available_metrics": list(self.metrics.keys())
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
def record_operation(self, operation_type: str, success: bool = True, duration: Optional[float] = None, **kwargs):
|
|
111
|
+
"""Record a general operation for metrics tracking"""
|
|
112
|
+
if not self.enable_metrics:
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Record operation success/failure
|
|
117
|
+
if success:
|
|
118
|
+
self.record_operation_success(operation_type, kwargs.get('labels'))
|
|
119
|
+
else:
|
|
120
|
+
error_type = kwargs.get('error_type', 'unknown')
|
|
121
|
+
self.record_operation_failure(operation_type, error_type, kwargs.get('labels'))
|
|
122
|
+
|
|
123
|
+
# Record operation latency if provided
|
|
124
|
+
if duration is not None:
|
|
125
|
+
self.record_operation_latency(operation_type, duration)
|
|
126
|
+
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.warning(f"Failed to record operation metrics: {e}")
|
|
129
|
+
|
|
130
|
+
def record_duration(self, operation: str, duration: float, labels: Optional[Dict[str, str]] = None):
|
|
131
|
+
"""Record operation duration for metrics tracking"""
|
|
132
|
+
if not self.enable_metrics:
|
|
133
|
+
return
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
self.record_operation_latency(operation, duration)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.warning(f"Failed to record duration metrics: {e}")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Structured logging setup for aiecs."""
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def setup_structured_logging(level: str = "INFO", format_type: str = "json") -> None:
|
|
8
|
+
"""
|
|
9
|
+
Setup structured logging for the application.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
|
13
|
+
format_type: Format type (json, text)
|
|
14
|
+
"""
|
|
15
|
+
# Convert string level to logging level
|
|
16
|
+
numeric_level = getattr(logging, level.upper(), logging.INFO)
|
|
17
|
+
|
|
18
|
+
# Create formatter
|
|
19
|
+
if format_type.lower() == "json":
|
|
20
|
+
# Simple JSON-like format for now
|
|
21
|
+
formatter = logging.Formatter(
|
|
22
|
+
'{"timestamp": "%(asctime)s", "level": "%(levelname)s", "module": "%(name)s", "message": "%(message)s"}'
|
|
23
|
+
)
|
|
24
|
+
else:
|
|
25
|
+
# Standard text format
|
|
26
|
+
formatter = logging.Formatter(
|
|
27
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Setup root logger
|
|
31
|
+
root_logger = logging.getLogger()
|
|
32
|
+
root_logger.setLevel(numeric_level)
|
|
33
|
+
|
|
34
|
+
# Remove existing handlers
|
|
35
|
+
for handler in root_logger.handlers[:]:
|
|
36
|
+
root_logger.removeHandler(handler)
|
|
37
|
+
|
|
38
|
+
# Add console handler
|
|
39
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
|
40
|
+
console_handler.setLevel(numeric_level)
|
|
41
|
+
console_handler.setFormatter(formatter)
|
|
42
|
+
root_logger.addHandler(console_handler)
|
|
43
|
+
|
|
44
|
+
# Set specific logger levels
|
|
45
|
+
logging.getLogger("aiecs").setLevel(numeric_level)
|
|
46
|
+
|
|
47
|
+
# Suppress noisy third-party loggers
|
|
48
|
+
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
|
49
|
+
logging.getLogger("requests").setLevel(logging.WARNING)
|
|
50
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|