machinaos 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.template +71 -0
- package/LICENSE +21 -0
- package/README.md +87 -0
- package/bin/cli.js +159 -0
- package/client/.dockerignore +45 -0
- package/client/Dockerfile +68 -0
- package/client/eslint.config.js +29 -0
- package/client/index.html +13 -0
- package/client/nginx.conf +66 -0
- package/client/package.json +48 -0
- package/client/src/App.tsx +27 -0
- package/client/src/Dashboard.tsx +1173 -0
- package/client/src/ParameterPanel.tsx +301 -0
- package/client/src/components/AIAgentNode.tsx +321 -0
- package/client/src/components/APIKeyValidator.tsx +118 -0
- package/client/src/components/ClaudeChatModelNode.tsx +18 -0
- package/client/src/components/ConditionalEdge.tsx +189 -0
- package/client/src/components/CredentialsModal.tsx +306 -0
- package/client/src/components/EdgeConditionEditor.tsx +443 -0
- package/client/src/components/GeminiChatModelNode.tsx +18 -0
- package/client/src/components/GenericNode.tsx +357 -0
- package/client/src/components/LocationParameterPanel.tsx +154 -0
- package/client/src/components/ModelNode.tsx +286 -0
- package/client/src/components/OpenAIChatModelNode.tsx +18 -0
- package/client/src/components/OutputPanel.tsx +471 -0
- package/client/src/components/ParameterRenderer.tsx +1874 -0
- package/client/src/components/SkillEditorModal.tsx +417 -0
- package/client/src/components/SquareNode.tsx +797 -0
- package/client/src/components/StartNode.tsx +250 -0
- package/client/src/components/ToolkitNode.tsx +365 -0
- package/client/src/components/TriggerNode.tsx +463 -0
- package/client/src/components/auth/LoginPage.tsx +247 -0
- package/client/src/components/auth/ProtectedRoute.tsx +59 -0
- package/client/src/components/base/BaseChatModelNode.tsx +271 -0
- package/client/src/components/icons/AIProviderIcons.tsx +50 -0
- package/client/src/components/maps/GoogleMapsPicker.tsx +137 -0
- package/client/src/components/maps/MapsPreviewPanel.tsx +110 -0
- package/client/src/components/maps/index.ts +26 -0
- package/client/src/components/parameterPanel/InputSection.tsx +1094 -0
- package/client/src/components/parameterPanel/LocationPanelLayout.tsx +65 -0
- package/client/src/components/parameterPanel/MapsSection.tsx +92 -0
- package/client/src/components/parameterPanel/MiddleSection.tsx +571 -0
- package/client/src/components/parameterPanel/OutputSection.tsx +81 -0
- package/client/src/components/parameterPanel/ParameterPanelLayout.tsx +82 -0
- package/client/src/components/parameterPanel/ToolSchemaEditor.tsx +436 -0
- package/client/src/components/parameterPanel/index.ts +42 -0
- package/client/src/components/shared/DataPanel.tsx +142 -0
- package/client/src/components/shared/JSONTreeRenderer.tsx +106 -0
- package/client/src/components/ui/AIResultModal.tsx +204 -0
- package/client/src/components/ui/AndroidSettingsPanel.tsx +401 -0
- package/client/src/components/ui/CodeEditor.tsx +81 -0
- package/client/src/components/ui/CollapsibleSection.tsx +88 -0
- package/client/src/components/ui/ComponentItem.tsx +154 -0
- package/client/src/components/ui/ComponentPalette.tsx +321 -0
- package/client/src/components/ui/ConsolePanel.tsx +1074 -0
- package/client/src/components/ui/ErrorBoundary.tsx +196 -0
- package/client/src/components/ui/InputNodesPanel.tsx +204 -0
- package/client/src/components/ui/MapSelector.tsx +314 -0
- package/client/src/components/ui/Modal.tsx +149 -0
- package/client/src/components/ui/NodeContextMenu.tsx +192 -0
- package/client/src/components/ui/NodeOutputPanel.tsx +1150 -0
- package/client/src/components/ui/OutputDisplayPanel.tsx +381 -0
- package/client/src/components/ui/SettingsPanel.tsx +243 -0
- package/client/src/components/ui/TopToolbar.tsx +736 -0
- package/client/src/components/ui/WhatsAppSettingsPanel.tsx +345 -0
- package/client/src/components/ui/WorkflowSidebar.tsx +294 -0
- package/client/src/config/antdTheme.ts +186 -0
- package/client/src/config/api.ts +54 -0
- package/client/src/contexts/AuthContext.tsx +221 -0
- package/client/src/contexts/ThemeContext.tsx +42 -0
- package/client/src/contexts/WebSocketContext.tsx +1971 -0
- package/client/src/factories/baseChatModelFactory.ts +256 -0
- package/client/src/hooks/useAndroidOperations.ts +164 -0
- package/client/src/hooks/useApiKeyValidation.ts +107 -0
- package/client/src/hooks/useApiKeys.ts +238 -0
- package/client/src/hooks/useAppTheme.ts +17 -0
- package/client/src/hooks/useComponentPalette.ts +51 -0
- package/client/src/hooks/useCopyPaste.ts +155 -0
- package/client/src/hooks/useDragAndDrop.ts +124 -0
- package/client/src/hooks/useDragVariable.ts +88 -0
- package/client/src/hooks/useExecution.ts +313 -0
- package/client/src/hooks/useParameterPanel.ts +176 -0
- package/client/src/hooks/useReactFlowNodes.ts +189 -0
- package/client/src/hooks/useToolSchema.ts +209 -0
- package/client/src/hooks/useWhatsApp.ts +196 -0
- package/client/src/hooks/useWorkflowManagement.ts +46 -0
- package/client/src/index.css +315 -0
- package/client/src/main.tsx +19 -0
- package/client/src/nodeDefinitions/aiAgentNodes.ts +336 -0
- package/client/src/nodeDefinitions/aiModelNodes.ts +340 -0
- package/client/src/nodeDefinitions/androidDeviceNodes.ts +140 -0
- package/client/src/nodeDefinitions/androidServiceNodes.ts +383 -0
- package/client/src/nodeDefinitions/chatNodes.ts +135 -0
- package/client/src/nodeDefinitions/codeNodes.ts +54 -0
- package/client/src/nodeDefinitions/documentNodes.ts +379 -0
- package/client/src/nodeDefinitions/index.ts +15 -0
- package/client/src/nodeDefinitions/locationNodes.ts +463 -0
- package/client/src/nodeDefinitions/schedulerNodes.ts +220 -0
- package/client/src/nodeDefinitions/skillNodes.ts +211 -0
- package/client/src/nodeDefinitions/toolNodes.ts +198 -0
- package/client/src/nodeDefinitions/utilityNodes.ts +284 -0
- package/client/src/nodeDefinitions/whatsappNodes.ts +865 -0
- package/client/src/nodeDefinitions/workflowNodes.ts +41 -0
- package/client/src/nodeDefinitions.ts +104 -0
- package/client/src/schemas/workflowSchema.ts +264 -0
- package/client/src/services/dynamicParameterService.ts +96 -0
- package/client/src/services/execution/aiAgentExecutionService.ts +35 -0
- package/client/src/services/executionService.ts +232 -0
- package/client/src/services/workflowApi.ts +91 -0
- package/client/src/store/useAppStore.ts +582 -0
- package/client/src/styles/theme.ts +508 -0
- package/client/src/styles/zIndex.ts +17 -0
- package/client/src/types/ComponentTypes.ts +39 -0
- package/client/src/types/EdgeCondition.ts +231 -0
- package/client/src/types/INodeProperties.ts +288 -0
- package/client/src/types/NodeTypes.ts +28 -0
- package/client/src/utils/formatters.ts +33 -0
- package/client/src/utils/googleMapsLoader.ts +140 -0
- package/client/src/utils/locationUtils.ts +85 -0
- package/client/src/utils/nodeUtils.ts +31 -0
- package/client/src/utils/workflow.ts +30 -0
- package/client/src/utils/workflowExport.ts +120 -0
- package/client/src/vite-env.d.ts +12 -0
- package/client/tailwind.config.js +60 -0
- package/client/tsconfig.json +25 -0
- package/client/tsconfig.node.json +11 -0
- package/client/vite.config.js +35 -0
- package/docker-compose.prod.yml +107 -0
- package/docker-compose.yml +104 -0
- package/docs-MachinaOs/README.md +85 -0
- package/docs-MachinaOs/deployment/docker.mdx +228 -0
- package/docs-MachinaOs/deployment/production.mdx +345 -0
- package/docs-MachinaOs/docs.json +75 -0
- package/docs-MachinaOs/faq.mdx +309 -0
- package/docs-MachinaOs/favicon.svg +5 -0
- package/docs-MachinaOs/installation.mdx +160 -0
- package/docs-MachinaOs/introduction.mdx +114 -0
- package/docs-MachinaOs/logo/dark.svg +6 -0
- package/docs-MachinaOs/logo/light.svg +6 -0
- package/docs-MachinaOs/nodes/ai-agent.mdx +216 -0
- package/docs-MachinaOs/nodes/ai-models.mdx +240 -0
- package/docs-MachinaOs/nodes/android.mdx +411 -0
- package/docs-MachinaOs/nodes/overview.mdx +181 -0
- package/docs-MachinaOs/nodes/schedulers.mdx +316 -0
- package/docs-MachinaOs/nodes/webhooks.mdx +330 -0
- package/docs-MachinaOs/nodes/whatsapp.mdx +305 -0
- package/docs-MachinaOs/quickstart.mdx +119 -0
- package/docs-MachinaOs/tutorials/ai-agent-workflow.mdx +177 -0
- package/docs-MachinaOs/tutorials/android-automation.mdx +242 -0
- package/docs-MachinaOs/tutorials/first-workflow.mdx +134 -0
- package/docs-MachinaOs/tutorials/whatsapp-automation.mdx +185 -0
- package/nul +0 -0
- package/package.json +70 -0
- package/scripts/build.js +158 -0
- package/scripts/check-ports.ps1 +33 -0
- package/scripts/clean.js +40 -0
- package/scripts/docker.js +93 -0
- package/scripts/kill-port.ps1 +154 -0
- package/scripts/start.js +210 -0
- package/scripts/stop.js +325 -0
- package/server/.dockerignore +44 -0
- package/server/Dockerfile +45 -0
- package/server/constants.py +249 -0
- package/server/core/__init__.py +1 -0
- package/server/core/cache.py +461 -0
- package/server/core/config.py +128 -0
- package/server/core/container.py +99 -0
- package/server/core/database.py +1211 -0
- package/server/core/logging.py +314 -0
- package/server/main.py +289 -0
- package/server/middleware/__init__.py +5 -0
- package/server/middleware/auth.py +89 -0
- package/server/models/__init__.py +1 -0
- package/server/models/auth.py +52 -0
- package/server/models/cache.py +24 -0
- package/server/models/database.py +211 -0
- package/server/models/nodes.py +455 -0
- package/server/package.json +9 -0
- package/server/pyproject.toml +72 -0
- package/server/requirements.txt +83 -0
- package/server/routers/__init__.py +1 -0
- package/server/routers/android.py +294 -0
- package/server/routers/auth.py +203 -0
- package/server/routers/database.py +151 -0
- package/server/routers/maps.py +142 -0
- package/server/routers/nodejs_compat.py +289 -0
- package/server/routers/webhook.py +90 -0
- package/server/routers/websocket.py +2127 -0
- package/server/routers/whatsapp.py +761 -0
- package/server/routers/workflow.py +200 -0
- package/server/services/__init__.py +1 -0
- package/server/services/ai.py +2415 -0
- package/server/services/android/__init__.py +27 -0
- package/server/services/android/broadcaster.py +114 -0
- package/server/services/android/client.py +608 -0
- package/server/services/android/manager.py +78 -0
- package/server/services/android/protocol.py +165 -0
- package/server/services/android_service.py +588 -0
- package/server/services/auth.py +131 -0
- package/server/services/chat_client.py +160 -0
- package/server/services/deployment/__init__.py +12 -0
- package/server/services/deployment/manager.py +706 -0
- package/server/services/deployment/state.py +47 -0
- package/server/services/deployment/triggers.py +275 -0
- package/server/services/event_waiter.py +785 -0
- package/server/services/execution/__init__.py +77 -0
- package/server/services/execution/cache.py +769 -0
- package/server/services/execution/conditions.py +373 -0
- package/server/services/execution/dlq.py +132 -0
- package/server/services/execution/executor.py +1351 -0
- package/server/services/execution/models.py +531 -0
- package/server/services/execution/recovery.py +235 -0
- package/server/services/handlers/__init__.py +126 -0
- package/server/services/handlers/ai.py +355 -0
- package/server/services/handlers/android.py +260 -0
- package/server/services/handlers/code.py +278 -0
- package/server/services/handlers/document.py +598 -0
- package/server/services/handlers/http.py +193 -0
- package/server/services/handlers/polyglot.py +105 -0
- package/server/services/handlers/tools.py +845 -0
- package/server/services/handlers/triggers.py +107 -0
- package/server/services/handlers/utility.py +822 -0
- package/server/services/handlers/whatsapp.py +476 -0
- package/server/services/maps.py +289 -0
- package/server/services/memory_store.py +103 -0
- package/server/services/node_executor.py +375 -0
- package/server/services/parameter_resolver.py +218 -0
- package/server/services/polyglot_client.py +169 -0
- package/server/services/scheduler.py +155 -0
- package/server/services/skill_loader.py +417 -0
- package/server/services/status_broadcaster.py +826 -0
- package/server/services/temporal/__init__.py +23 -0
- package/server/services/temporal/activities.py +344 -0
- package/server/services/temporal/client.py +76 -0
- package/server/services/temporal/executor.py +147 -0
- package/server/services/temporal/worker.py +251 -0
- package/server/services/temporal/workflow.py +355 -0
- package/server/services/temporal/ws_client.py +236 -0
- package/server/services/text.py +111 -0
- package/server/services/user_auth.py +172 -0
- package/server/services/websocket_client.py +29 -0
- package/server/services/workflow.py +597 -0
- package/server/skills/android-skill/SKILL.md +82 -0
- package/server/skills/assistant-personality/SKILL.md +45 -0
- package/server/skills/code-skill/SKILL.md +140 -0
- package/server/skills/http-skill/SKILL.md +161 -0
- package/server/skills/maps-skill/SKILL.md +170 -0
- package/server/skills/memory-skill/SKILL.md +154 -0
- package/server/skills/scheduler-skill/SKILL.md +84 -0
- package/server/skills/whatsapp-skill/SKILL.md +283 -0
- package/server/uv.lock +2916 -0
- package/server/whatsapp-rpc/.dockerignore +30 -0
- package/server/whatsapp-rpc/Dockerfile +44 -0
- package/server/whatsapp-rpc/Dockerfile.web +17 -0
- package/server/whatsapp-rpc/README.md +139 -0
- package/server/whatsapp-rpc/cli.js +95 -0
- package/server/whatsapp-rpc/configs/config.yaml +7 -0
- package/server/whatsapp-rpc/docker-compose.yml +35 -0
- package/server/whatsapp-rpc/docs/API.md +410 -0
- package/server/whatsapp-rpc/go.mod +67 -0
- package/server/whatsapp-rpc/go.sum +203 -0
- package/server/whatsapp-rpc/package.json +30 -0
- package/server/whatsapp-rpc/schema.json +1294 -0
- package/server/whatsapp-rpc/scripts/clean.cjs +66 -0
- package/server/whatsapp-rpc/scripts/cli.js +162 -0
- package/server/whatsapp-rpc/src/go/cmd/server/main.go +91 -0
- package/server/whatsapp-rpc/src/go/config/config.go +49 -0
- package/server/whatsapp-rpc/src/go/rpc/rpc.go +446 -0
- package/server/whatsapp-rpc/src/go/rpc/server.go +112 -0
- package/server/whatsapp-rpc/src/go/whatsapp/history.go +166 -0
- package/server/whatsapp-rpc/src/go/whatsapp/messages.go +390 -0
- package/server/whatsapp-rpc/src/go/whatsapp/service.go +2130 -0
- package/server/whatsapp-rpc/src/go/whatsapp/types.go +261 -0
- package/server/whatsapp-rpc/src/python/pyproject.toml +15 -0
- package/server/whatsapp-rpc/src/python/whatsapp_rpc/__init__.py +4 -0
- package/server/whatsapp-rpc/src/python/whatsapp_rpc/client.py +427 -0
- package/server/whatsapp-rpc/web/app.py +609 -0
- package/server/whatsapp-rpc/web/requirements.txt +6 -0
- package/server/whatsapp-rpc/web/rpc_client.py +427 -0
- package/server/whatsapp-rpc/web/static/openapi.yaml +59 -0
- package/server/whatsapp-rpc/web/templates/base.html +150 -0
- package/server/whatsapp-rpc/web/templates/contacts.html +240 -0
- package/server/whatsapp-rpc/web/templates/dashboard.html +320 -0
- package/server/whatsapp-rpc/web/templates/groups.html +328 -0
- package/server/whatsapp-rpc/web/templates/messages.html +465 -0
- package/server/whatsapp-rpc/web/templates/messaging.html +681 -0
- package/server/whatsapp-rpc/web/templates/send.html +259 -0
- package/server/whatsapp-rpc/web/templates/settings.html +459 -0
|
@@ -0,0 +1,769 @@
|
|
|
1
|
+
"""Execution cache service for Redis persistence.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- Result caching (Prefect pattern) for idempotency
|
|
5
|
+
- Execution state persistence
|
|
6
|
+
- Distributed locking (Conductor pattern)
|
|
7
|
+
- Transaction checkpointing
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json
|
|
12
|
+
import time
|
|
13
|
+
import uuid
|
|
14
|
+
from contextlib import asynccontextmanager
|
|
15
|
+
from typing import Dict, Any, List, Optional, Set, Union
|
|
16
|
+
|
|
17
|
+
from core.logging import get_logger
|
|
18
|
+
from core.cache import CacheService
|
|
19
|
+
from .models import ExecutionContext, TaskStatus, WorkflowStatus, hash_inputs, DLQEntry
|
|
20
|
+
|
|
21
|
+
logger = get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def ensure_str(value: Union[str, bytes, None]) -> Optional[str]:
|
|
25
|
+
"""Ensure value is a string, handling both bytes and str.
|
|
26
|
+
|
|
27
|
+
Redis with decode_responses=True returns strings directly.
|
|
28
|
+
This helper handles both cases for compatibility.
|
|
29
|
+
"""
|
|
30
|
+
if value is None:
|
|
31
|
+
return None
|
|
32
|
+
if isinstance(value, bytes):
|
|
33
|
+
return value.decode('utf-8')
|
|
34
|
+
return value
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ExecutionCache:
|
|
38
|
+
"""Redis-backed cache for workflow execution state.
|
|
39
|
+
|
|
40
|
+
Key schema:
|
|
41
|
+
execution:{id}:state -> HASH {status, workflow_id, etc}
|
|
42
|
+
execution:{id}:nodes -> HASH {node_id -> NodeExecution JSON}
|
|
43
|
+
execution:{id}:outputs -> HASH {node_id -> output JSON}
|
|
44
|
+
execution:{id}:events -> STREAM (immutable event log)
|
|
45
|
+
result:{exec}:{node}:{hash} -> JSON (cached result)
|
|
46
|
+
executions:active -> SET {execution_ids}
|
|
47
|
+
lock:execution:{id} -> STRING (lock token)
|
|
48
|
+
heartbeat:{exec}:{node} -> STRING (timestamp)
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, cache_service: CacheService):
|
|
52
|
+
self.cache = cache_service
|
|
53
|
+
self._local_locks: Dict[str, asyncio.Lock] = {}
|
|
54
|
+
|
|
55
|
+
# =========================================================================
|
|
56
|
+
# EXECUTION STATE PERSISTENCE
|
|
57
|
+
# =========================================================================
|
|
58
|
+
|
|
59
|
+
async def save_execution_state(self, ctx: ExecutionContext) -> bool:
|
|
60
|
+
"""Persist execution context to Redis.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
ctx: ExecutionContext to save
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
True if saved successfully
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
key = f"execution:{ctx.execution_id}:state"
|
|
70
|
+
data = ctx.to_dict()
|
|
71
|
+
|
|
72
|
+
# Use Redis HSET for structured storage
|
|
73
|
+
if self.cache.is_redis_available():
|
|
74
|
+
mapping = {
|
|
75
|
+
k: json.dumps(v) if isinstance(v, (dict, list)) else str(v)
|
|
76
|
+
for k, v in data.items()
|
|
77
|
+
}
|
|
78
|
+
if mapping: # Only call hset if mapping is not empty
|
|
79
|
+
await self.cache.redis.hset(key, mapping=mapping)
|
|
80
|
+
# Set TTL (24 hours for completed, no TTL for active)
|
|
81
|
+
if ctx.status in (WorkflowStatus.COMPLETED, WorkflowStatus.FAILED,
|
|
82
|
+
WorkflowStatus.CANCELLED):
|
|
83
|
+
await self.cache.redis.expire(key, 86400)
|
|
84
|
+
|
|
85
|
+
# Track active executions
|
|
86
|
+
if ctx.status == WorkflowStatus.RUNNING:
|
|
87
|
+
await self.cache.redis.sadd("executions:active", ctx.execution_id)
|
|
88
|
+
else:
|
|
89
|
+
await self.cache.redis.srem("executions:active", ctx.execution_id)
|
|
90
|
+
|
|
91
|
+
logger.debug("Saved execution state", execution_id=ctx.execution_id,
|
|
92
|
+
status=ctx.status.value)
|
|
93
|
+
return True
|
|
94
|
+
else:
|
|
95
|
+
# Fallback to simple key-value
|
|
96
|
+
await self.cache.set(key, data, ttl=86400)
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.error("Failed to save execution state", execution_id=ctx.execution_id,
|
|
101
|
+
error=str(e))
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
async def load_execution_state(self, execution_id: str,
|
|
105
|
+
nodes: List[Dict] = None,
|
|
106
|
+
edges: List[Dict] = None) -> Optional[ExecutionContext]:
|
|
107
|
+
"""Load execution context from Redis.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
execution_id: Execution ID to load
|
|
111
|
+
nodes: Workflow nodes (not stored in Redis due to size)
|
|
112
|
+
edges: Workflow edges (not stored in Redis due to size)
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
ExecutionContext if found, None otherwise
|
|
116
|
+
"""
|
|
117
|
+
try:
|
|
118
|
+
key = f"execution:{execution_id}:state"
|
|
119
|
+
|
|
120
|
+
if self.cache.is_redis_available():
|
|
121
|
+
raw_data = await self.cache.redis.hgetall(key)
|
|
122
|
+
if not raw_data:
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
# Deserialize Redis hash values
|
|
126
|
+
# With decode_responses=True, values are already strings
|
|
127
|
+
data = {}
|
|
128
|
+
for k, v in raw_data.items():
|
|
129
|
+
key_str = ensure_str(k)
|
|
130
|
+
val_str = ensure_str(v)
|
|
131
|
+
try:
|
|
132
|
+
data[key_str] = json.loads(val_str)
|
|
133
|
+
except (json.JSONDecodeError, TypeError):
|
|
134
|
+
data[key_str] = val_str
|
|
135
|
+
|
|
136
|
+
return ExecutionContext.from_dict(data, nodes, edges)
|
|
137
|
+
else:
|
|
138
|
+
# Fallback to simple key-value
|
|
139
|
+
data = await self.cache.get(key)
|
|
140
|
+
if data:
|
|
141
|
+
return ExecutionContext.from_dict(data, nodes, edges)
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.error("Failed to load execution state", execution_id=execution_id,
|
|
146
|
+
error=str(e))
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
async def get_active_executions(self) -> Set[str]:
|
|
150
|
+
"""Get all active execution IDs.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Set of execution IDs currently running
|
|
154
|
+
"""
|
|
155
|
+
try:
|
|
156
|
+
if self.cache.is_redis_available():
|
|
157
|
+
members = await self.cache.redis.smembers("executions:active")
|
|
158
|
+
return {ensure_str(m) for m in members}
|
|
159
|
+
return set()
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error("Failed to get active executions", error=str(e))
|
|
162
|
+
return set()
|
|
163
|
+
|
|
164
|
+
async def delete_execution_state(self, execution_id: str) -> bool:
|
|
165
|
+
"""Delete execution state from Redis.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
execution_id: Execution ID to delete
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
True if deleted successfully
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
if self.cache.is_redis_available():
|
|
175
|
+
keys = [
|
|
176
|
+
f"execution:{execution_id}:state",
|
|
177
|
+
f"execution:{execution_id}:events",
|
|
178
|
+
]
|
|
179
|
+
await self.cache.redis.delete(*keys)
|
|
180
|
+
await self.cache.redis.srem("executions:active", execution_id)
|
|
181
|
+
return True
|
|
182
|
+
except Exception as e:
|
|
183
|
+
logger.error("Failed to delete execution state", execution_id=execution_id,
|
|
184
|
+
error=str(e))
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
# =========================================================================
|
|
188
|
+
# RESULT CACHING (Prefect pattern)
|
|
189
|
+
# =========================================================================
|
|
190
|
+
|
|
191
|
+
async def get_cached_result(self, execution_id: str, node_id: str,
|
|
192
|
+
inputs: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
193
|
+
"""Get cached result for node execution (Prefect pattern).
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
execution_id: Execution ID
|
|
197
|
+
node_id: Node ID
|
|
198
|
+
inputs: Node inputs for cache key
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Cached result if found, None otherwise
|
|
202
|
+
"""
|
|
203
|
+
try:
|
|
204
|
+
input_hash = hash_inputs(inputs)
|
|
205
|
+
cache_key = f"result:{execution_id}:{node_id}:{input_hash}"
|
|
206
|
+
result = await self.cache.get(cache_key)
|
|
207
|
+
if result:
|
|
208
|
+
logger.debug("Cache hit", node_id=node_id, input_hash=input_hash[:8])
|
|
209
|
+
return result
|
|
210
|
+
except Exception as e:
|
|
211
|
+
logger.error("Failed to get cached result", node_id=node_id, error=str(e))
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
async def set_cached_result(self, execution_id: str, node_id: str,
|
|
215
|
+
inputs: Dict[str, Any], result: Dict[str, Any],
|
|
216
|
+
ttl: int = 3600) -> bool:
|
|
217
|
+
"""Cache node execution result (Prefect pattern).
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
execution_id: Execution ID
|
|
221
|
+
node_id: Node ID
|
|
222
|
+
inputs: Node inputs for cache key
|
|
223
|
+
result: Execution result to cache
|
|
224
|
+
ttl: Time-to-live in seconds (default 1 hour)
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
True if cached successfully
|
|
228
|
+
"""
|
|
229
|
+
try:
|
|
230
|
+
input_hash = hash_inputs(inputs)
|
|
231
|
+
cache_key = f"result:{execution_id}:{node_id}:{input_hash}"
|
|
232
|
+
await self.cache.set(cache_key, result, ttl=ttl)
|
|
233
|
+
logger.debug("Cached result", node_id=node_id, input_hash=input_hash[:8])
|
|
234
|
+
return True
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.error("Failed to cache result", node_id=node_id, error=str(e))
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
# =========================================================================
|
|
240
|
+
# DISTRIBUTED LOCKING (Conductor pattern)
|
|
241
|
+
# =========================================================================
|
|
242
|
+
|
|
243
|
+
@asynccontextmanager
|
|
244
|
+
async def distributed_lock(self, lock_name: str, timeout: int = 60):
|
|
245
|
+
"""Acquire distributed lock using Redis (Conductor pattern).
|
|
246
|
+
|
|
247
|
+
Used to prevent concurrent workflow_decide() calls.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
lock_name: Name of the lock (e.g., "execution:{id}:decide")
|
|
251
|
+
timeout: Lock timeout in seconds
|
|
252
|
+
|
|
253
|
+
Yields:
|
|
254
|
+
Lock token if acquired
|
|
255
|
+
|
|
256
|
+
Raises:
|
|
257
|
+
TimeoutError: If lock cannot be acquired
|
|
258
|
+
"""
|
|
259
|
+
lock_key = f"lock:{lock_name}"
|
|
260
|
+
lock_token = str(uuid.uuid4())
|
|
261
|
+
acquired = False
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
# Try to acquire lock
|
|
265
|
+
if self.cache.is_redis_available():
|
|
266
|
+
# Redis SETNX with expiry
|
|
267
|
+
acquired = await self.cache.redis.set(
|
|
268
|
+
lock_key, lock_token,
|
|
269
|
+
ex=timeout,
|
|
270
|
+
nx=True # Only set if not exists
|
|
271
|
+
)
|
|
272
|
+
else:
|
|
273
|
+
# Fallback to local asyncio lock
|
|
274
|
+
if lock_name not in self._local_locks:
|
|
275
|
+
self._local_locks[lock_name] = asyncio.Lock()
|
|
276
|
+
await asyncio.wait_for(
|
|
277
|
+
self._local_locks[lock_name].acquire(),
|
|
278
|
+
timeout=timeout
|
|
279
|
+
)
|
|
280
|
+
acquired = True
|
|
281
|
+
|
|
282
|
+
if not acquired:
|
|
283
|
+
raise TimeoutError(f"Could not acquire lock: {lock_name}")
|
|
284
|
+
|
|
285
|
+
logger.debug("Lock acquired", lock_name=lock_name, token=lock_token[:8])
|
|
286
|
+
yield lock_token
|
|
287
|
+
|
|
288
|
+
finally:
|
|
289
|
+
# Release lock
|
|
290
|
+
if acquired:
|
|
291
|
+
if self.cache.is_redis_available():
|
|
292
|
+
# Only release if we hold the lock (check token)
|
|
293
|
+
# With decode_responses=True, current is already a string
|
|
294
|
+
current = await self.cache.redis.get(lock_key)
|
|
295
|
+
if current and current == lock_token:
|
|
296
|
+
await self.cache.redis.delete(lock_key)
|
|
297
|
+
logger.debug("Lock released", lock_name=lock_name)
|
|
298
|
+
else:
|
|
299
|
+
if lock_name in self._local_locks:
|
|
300
|
+
self._local_locks[lock_name].release()
|
|
301
|
+
|
|
302
|
+
# =========================================================================
|
|
303
|
+
# HEARTBEATS (for crash recovery)
|
|
304
|
+
# =========================================================================
|
|
305
|
+
|
|
306
|
+
async def update_heartbeat(self, execution_id: str, node_id: str) -> bool:
|
|
307
|
+
"""Update heartbeat for running node (for crash detection).
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
execution_id: Execution ID
|
|
311
|
+
node_id: Node ID
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
True if updated successfully
|
|
315
|
+
"""
|
|
316
|
+
try:
|
|
317
|
+
key = f"heartbeat:{execution_id}:{node_id}"
|
|
318
|
+
timestamp = str(time.time())
|
|
319
|
+
if self.cache.is_redis_available():
|
|
320
|
+
await self.cache.redis.setex(key, 300, timestamp) # 5 min TTL
|
|
321
|
+
else:
|
|
322
|
+
await self.cache.set(key, timestamp, ttl=300)
|
|
323
|
+
return True
|
|
324
|
+
except Exception as e:
|
|
325
|
+
logger.error("Failed to update heartbeat", node_id=node_id, error=str(e))
|
|
326
|
+
return False
|
|
327
|
+
|
|
328
|
+
async def get_heartbeat(self, execution_id: str, node_id: str) -> Optional[float]:
|
|
329
|
+
"""Get last heartbeat timestamp for a node.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
execution_id: Execution ID
|
|
333
|
+
node_id: Node ID
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Timestamp if found, None otherwise
|
|
337
|
+
"""
|
|
338
|
+
try:
|
|
339
|
+
key = f"heartbeat:{execution_id}:{node_id}"
|
|
340
|
+
if self.cache.is_redis_available():
|
|
341
|
+
val = await self.cache.redis.get(key)
|
|
342
|
+
# With decode_responses=True, val is already a string
|
|
343
|
+
return float(val) if val else None
|
|
344
|
+
else:
|
|
345
|
+
val = await self.cache.get(key)
|
|
346
|
+
return float(val) if val else None
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.error("Failed to get heartbeat", node_id=node_id, error=str(e))
|
|
349
|
+
return None
|
|
350
|
+
|
|
351
|
+
# =========================================================================
|
|
352
|
+
# EVENT HISTORY (for debugging and recovery)
|
|
353
|
+
# =========================================================================
|
|
354
|
+
|
|
355
|
+
async def add_event(self, execution_id: str, event_type: str,
|
|
356
|
+
data: Dict[str, Any]) -> Optional[str]:
|
|
357
|
+
"""Add event to execution history stream.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
execution_id: Execution ID
|
|
361
|
+
event_type: Event type (e.g., 'node_started', 'node_completed')
|
|
362
|
+
data: Event data
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Message ID if successful, None otherwise
|
|
366
|
+
"""
|
|
367
|
+
try:
|
|
368
|
+
stream_key = f"execution:{execution_id}:events"
|
|
369
|
+
event_data = {
|
|
370
|
+
"type": event_type,
|
|
371
|
+
"timestamp": time.time(),
|
|
372
|
+
**data
|
|
373
|
+
}
|
|
374
|
+
return await self.cache.stream_add(stream_key, event_data, maxlen=1000)
|
|
375
|
+
except Exception as e:
|
|
376
|
+
logger.error("Failed to add event", execution_id=execution_id, error=str(e))
|
|
377
|
+
return None
|
|
378
|
+
|
|
379
|
+
async def get_events(self, execution_id: str, count: int = 100) -> List[Dict[str, Any]]:
|
|
380
|
+
"""Get execution event history.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
execution_id: Execution ID
|
|
384
|
+
count: Maximum events to return
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
List of events
|
|
388
|
+
"""
|
|
389
|
+
try:
|
|
390
|
+
stream_key = f"execution:{execution_id}:events"
|
|
391
|
+
if not self.cache.is_redis_available():
|
|
392
|
+
return []
|
|
393
|
+
|
|
394
|
+
# Read from stream
|
|
395
|
+
result = await self.cache.stream_read(
|
|
396
|
+
{stream_key: "0"},
|
|
397
|
+
count=count
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
events = []
|
|
401
|
+
if result:
|
|
402
|
+
for stream_name, messages in result:
|
|
403
|
+
for msg_id, msg_data in messages:
|
|
404
|
+
# Deserialize event data
|
|
405
|
+
event = {}
|
|
406
|
+
for k, v in msg_data.items():
|
|
407
|
+
key_str = ensure_str(k)
|
|
408
|
+
val_str = ensure_str(v)
|
|
409
|
+
try:
|
|
410
|
+
event[key_str] = json.loads(val_str)
|
|
411
|
+
except (json.JSONDecodeError, TypeError):
|
|
412
|
+
event[key_str] = val_str
|
|
413
|
+
events.append(event)
|
|
414
|
+
|
|
415
|
+
return events
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.error("Failed to get events", execution_id=execution_id, error=str(e))
|
|
418
|
+
return []
|
|
419
|
+
|
|
420
|
+
# =========================================================================
|
|
421
|
+
# TRANSACTION CHECKPOINTS (Prefect pattern)
|
|
422
|
+
# =========================================================================
|
|
423
|
+
|
|
424
|
+
async def checkpoint_transaction(self, transaction_id: str, node_id: str,
|
|
425
|
+
result: Dict[str, Any]) -> bool:
|
|
426
|
+
"""Save transaction checkpoint (Prefect pattern).
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
transaction_id: Transaction ID
|
|
430
|
+
node_id: Node that completed
|
|
431
|
+
result: Node result
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
True if saved successfully
|
|
435
|
+
"""
|
|
436
|
+
try:
|
|
437
|
+
key = f"txn:{transaction_id}:checkpoints"
|
|
438
|
+
checkpoint = {
|
|
439
|
+
"node_id": node_id,
|
|
440
|
+
"result": result,
|
|
441
|
+
"timestamp": time.time()
|
|
442
|
+
}
|
|
443
|
+
if self.cache.is_redis_available():
|
|
444
|
+
await self.cache.redis.rpush(key, json.dumps(checkpoint))
|
|
445
|
+
await self.cache.redis.expire(key, 86400) # 24 hour TTL
|
|
446
|
+
return True
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.error("Failed to checkpoint", transaction_id=transaction_id, error=str(e))
|
|
449
|
+
return False
|
|
450
|
+
|
|
451
|
+
async def rollback_transaction(self, transaction_id: str) -> bool:
|
|
452
|
+
"""Rollback transaction by clearing checkpoints.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
transaction_id: Transaction ID to rollback
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
True if rolled back successfully
|
|
459
|
+
"""
|
|
460
|
+
try:
|
|
461
|
+
key = f"txn:{transaction_id}:checkpoints"
|
|
462
|
+
if self.cache.is_redis_available():
|
|
463
|
+
await self.cache.redis.delete(key)
|
|
464
|
+
logger.info("Transaction rolled back", transaction_id=transaction_id)
|
|
465
|
+
return True
|
|
466
|
+
except Exception as e:
|
|
467
|
+
logger.error("Failed to rollback", transaction_id=transaction_id, error=str(e))
|
|
468
|
+
return False
|
|
469
|
+
|
|
470
|
+
async def get_transaction_checkpoints(self, transaction_id: str) -> List[Dict[str, Any]]:
|
|
471
|
+
"""Get transaction checkpoints for recovery.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
transaction_id: Transaction ID
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
List of checkpoints
|
|
478
|
+
"""
|
|
479
|
+
try:
|
|
480
|
+
key = f"txn:{transaction_id}:checkpoints"
|
|
481
|
+
if not self.cache.is_redis_available():
|
|
482
|
+
return []
|
|
483
|
+
|
|
484
|
+
raw_list = await self.cache.redis.lrange(key, 0, -1)
|
|
485
|
+
return [json.loads(ensure_str(item)) for item in raw_list]
|
|
486
|
+
except Exception as e:
|
|
487
|
+
logger.error("Failed to get checkpoints", transaction_id=transaction_id, error=str(e))
|
|
488
|
+
return []
|
|
489
|
+
|
|
490
|
+
# =========================================================================
|
|
491
|
+
# DEAD LETTER QUEUE (for failed executions)
|
|
492
|
+
# =========================================================================
|
|
493
|
+
|
|
494
|
+
async def add_to_dlq(self, entry: DLQEntry) -> bool:
|
|
495
|
+
"""Add failed node execution to Dead Letter Queue.
|
|
496
|
+
|
|
497
|
+
Stores the entry in multiple indices for querying:
|
|
498
|
+
- dlq:entries:{id} - Individual entry data
|
|
499
|
+
- dlq:workflow:{workflow_id} - List of entry IDs for workflow
|
|
500
|
+
- dlq:node_type:{node_type} - List of entry IDs by node type
|
|
501
|
+
- dlq:all - Set of all entry IDs
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
entry: DLQEntry to add
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
True if added successfully
|
|
508
|
+
"""
|
|
509
|
+
try:
|
|
510
|
+
entry_data = entry.to_dict()
|
|
511
|
+
|
|
512
|
+
if self.cache.is_redis_available():
|
|
513
|
+
# Store entry data
|
|
514
|
+
entry_key = f"dlq:entries:{entry.id}"
|
|
515
|
+
mapping = {
|
|
516
|
+
k: json.dumps(v) if isinstance(v, (dict, list)) else str(v)
|
|
517
|
+
for k, v in entry_data.items()
|
|
518
|
+
}
|
|
519
|
+
if mapping: # Only call hset if mapping is not empty
|
|
520
|
+
await self.cache.redis.hset(entry_key, mapping=mapping)
|
|
521
|
+
# Set TTL (7 days for DLQ entries)
|
|
522
|
+
await self.cache.redis.expire(entry_key, 604800)
|
|
523
|
+
|
|
524
|
+
# Add to workflow index
|
|
525
|
+
workflow_key = f"dlq:workflow:{entry.workflow_id}"
|
|
526
|
+
await self.cache.redis.lpush(workflow_key, entry.id)
|
|
527
|
+
await self.cache.redis.expire(workflow_key, 604800)
|
|
528
|
+
|
|
529
|
+
# Add to node type index
|
|
530
|
+
node_type_key = f"dlq:node_type:{entry.node_type}"
|
|
531
|
+
await self.cache.redis.lpush(node_type_key, entry.id)
|
|
532
|
+
await self.cache.redis.expire(node_type_key, 604800)
|
|
533
|
+
|
|
534
|
+
# Add to global set
|
|
535
|
+
await self.cache.redis.sadd("dlq:all", entry.id)
|
|
536
|
+
|
|
537
|
+
logger.info("Added to DLQ", entry_id=entry.id, node_id=entry.node_id,
|
|
538
|
+
node_type=entry.node_type, error=entry.error[:100])
|
|
539
|
+
return True
|
|
540
|
+
else:
|
|
541
|
+
# Fallback to simple key-value
|
|
542
|
+
await self.cache.set(f"dlq:entries:{entry.id}", entry_data, ttl=604800)
|
|
543
|
+
return True
|
|
544
|
+
|
|
545
|
+
except Exception as e:
|
|
546
|
+
logger.error("Failed to add to DLQ", entry_id=entry.id, error=str(e))
|
|
547
|
+
return False
|
|
548
|
+
|
|
549
|
+
async def get_dlq_entry(self, entry_id: str) -> Optional[DLQEntry]:
|
|
550
|
+
"""Get a single DLQ entry by ID.
|
|
551
|
+
|
|
552
|
+
Args:
|
|
553
|
+
entry_id: DLQ entry ID
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
DLQEntry if found, None otherwise
|
|
557
|
+
"""
|
|
558
|
+
try:
|
|
559
|
+
entry_key = f"dlq:entries:{entry_id}"
|
|
560
|
+
|
|
561
|
+
if self.cache.is_redis_available():
|
|
562
|
+
raw_data = await self.cache.redis.hgetall(entry_key)
|
|
563
|
+
if not raw_data:
|
|
564
|
+
return None
|
|
565
|
+
|
|
566
|
+
# Deserialize Redis hash values
|
|
567
|
+
data = {}
|
|
568
|
+
for k, v in raw_data.items():
|
|
569
|
+
key_str = ensure_str(k)
|
|
570
|
+
val_str = ensure_str(v)
|
|
571
|
+
try:
|
|
572
|
+
data[key_str] = json.loads(val_str)
|
|
573
|
+
except (json.JSONDecodeError, TypeError):
|
|
574
|
+
data[key_str] = val_str
|
|
575
|
+
|
|
576
|
+
return DLQEntry.from_dict(data)
|
|
577
|
+
else:
|
|
578
|
+
data = await self.cache.get(entry_key)
|
|
579
|
+
if data:
|
|
580
|
+
return DLQEntry.from_dict(data)
|
|
581
|
+
return None
|
|
582
|
+
|
|
583
|
+
except Exception as e:
|
|
584
|
+
logger.error("Failed to get DLQ entry", entry_id=entry_id, error=str(e))
|
|
585
|
+
return None
|
|
586
|
+
|
|
587
|
+
async def get_dlq_entries(self, workflow_id: Optional[str] = None,
|
|
588
|
+
node_type: Optional[str] = None,
|
|
589
|
+
limit: int = 100) -> List[DLQEntry]:
|
|
590
|
+
"""Get DLQ entries with optional filtering.
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
workflow_id: Filter by workflow ID
|
|
594
|
+
node_type: Filter by node type
|
|
595
|
+
limit: Maximum entries to return
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
List of DLQEntry objects
|
|
599
|
+
"""
|
|
600
|
+
try:
|
|
601
|
+
if not self.cache.is_redis_available():
|
|
602
|
+
return []
|
|
603
|
+
|
|
604
|
+
# Determine which index to use
|
|
605
|
+
if workflow_id:
|
|
606
|
+
index_key = f"dlq:workflow:{workflow_id}"
|
|
607
|
+
elif node_type:
|
|
608
|
+
index_key = f"dlq:node_type:{node_type}"
|
|
609
|
+
else:
|
|
610
|
+
# Get all entries from global set
|
|
611
|
+
entry_ids = await self.cache.redis.smembers("dlq:all")
|
|
612
|
+
entry_ids = [ensure_str(eid) for eid in entry_ids][:limit]
|
|
613
|
+
entries = []
|
|
614
|
+
for entry_id in entry_ids:
|
|
615
|
+
entry = await self.get_dlq_entry(entry_id)
|
|
616
|
+
if entry:
|
|
617
|
+
entries.append(entry)
|
|
618
|
+
# Sort by last_error_at descending
|
|
619
|
+
entries.sort(key=lambda e: e.last_error_at, reverse=True)
|
|
620
|
+
return entries
|
|
621
|
+
|
|
622
|
+
# Get from LIST index
|
|
623
|
+
raw_ids = await self.cache.redis.lrange(index_key, 0, limit - 1)
|
|
624
|
+
entry_ids = [ensure_str(eid) for eid in raw_ids]
|
|
625
|
+
|
|
626
|
+
entries = []
|
|
627
|
+
for entry_id in entry_ids:
|
|
628
|
+
entry = await self.get_dlq_entry(entry_id)
|
|
629
|
+
if entry:
|
|
630
|
+
entries.append(entry)
|
|
631
|
+
|
|
632
|
+
return entries
|
|
633
|
+
|
|
634
|
+
except Exception as e:
|
|
635
|
+
logger.error("Failed to get DLQ entries", error=str(e))
|
|
636
|
+
return []
|
|
637
|
+
|
|
638
|
+
async def remove_from_dlq(self, entry_id: str) -> bool:
|
|
639
|
+
"""Remove entry from DLQ after successful replay or manual purge.
|
|
640
|
+
|
|
641
|
+
Args:
|
|
642
|
+
entry_id: DLQ entry ID to remove
|
|
643
|
+
|
|
644
|
+
Returns:
|
|
645
|
+
True if removed successfully
|
|
646
|
+
"""
|
|
647
|
+
try:
|
|
648
|
+
if not self.cache.is_redis_available():
|
|
649
|
+
return False
|
|
650
|
+
|
|
651
|
+
# Get entry first to know which indices to update
|
|
652
|
+
entry = await self.get_dlq_entry(entry_id)
|
|
653
|
+
if not entry:
|
|
654
|
+
return False
|
|
655
|
+
|
|
656
|
+
# Remove from indices
|
|
657
|
+
await self.cache.redis.lrem(f"dlq:workflow:{entry.workflow_id}", 0, entry_id)
|
|
658
|
+
await self.cache.redis.lrem(f"dlq:node_type:{entry.node_type}", 0, entry_id)
|
|
659
|
+
await self.cache.redis.srem("dlq:all", entry_id)
|
|
660
|
+
|
|
661
|
+
# Delete entry data
|
|
662
|
+
await self.cache.redis.delete(f"dlq:entries:{entry_id}")
|
|
663
|
+
|
|
664
|
+
logger.info("Removed from DLQ", entry_id=entry_id)
|
|
665
|
+
return True
|
|
666
|
+
|
|
667
|
+
except Exception as e:
|
|
668
|
+
logger.error("Failed to remove from DLQ", entry_id=entry_id, error=str(e))
|
|
669
|
+
return False
|
|
670
|
+
|
|
671
|
+
async def update_dlq_entry(self, entry_id: str, retry_count: int,
|
|
672
|
+
error: str) -> bool:
|
|
673
|
+
"""Update DLQ entry after failed retry attempt.
|
|
674
|
+
|
|
675
|
+
Args:
|
|
676
|
+
entry_id: DLQ entry ID
|
|
677
|
+
retry_count: New retry count
|
|
678
|
+
error: Latest error message
|
|
679
|
+
|
|
680
|
+
Returns:
|
|
681
|
+
True if updated successfully
|
|
682
|
+
"""
|
|
683
|
+
try:
|
|
684
|
+
if not self.cache.is_redis_available():
|
|
685
|
+
return False
|
|
686
|
+
|
|
687
|
+
entry_key = f"dlq:entries:{entry_id}"
|
|
688
|
+
await self.cache.redis.hset(entry_key, mapping={
|
|
689
|
+
"retry_count": str(retry_count),
|
|
690
|
+
"error": error,
|
|
691
|
+
"last_error_at": str(time.time())
|
|
692
|
+
})
|
|
693
|
+
|
|
694
|
+
logger.debug("Updated DLQ entry", entry_id=entry_id, retry_count=retry_count)
|
|
695
|
+
return True
|
|
696
|
+
|
|
697
|
+
except Exception as e:
|
|
698
|
+
logger.error("Failed to update DLQ entry", entry_id=entry_id, error=str(e))
|
|
699
|
+
return False
|
|
700
|
+
|
|
701
|
+
async def get_dlq_stats(self) -> Dict[str, Any]:
|
|
702
|
+
"""Get DLQ statistics.
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
Dictionary with DLQ stats (total count, by node type, by workflow)
|
|
706
|
+
"""
|
|
707
|
+
try:
|
|
708
|
+
if not self.cache.is_redis_available():
|
|
709
|
+
return {"total": 0, "by_node_type": {}, "by_workflow": {}}
|
|
710
|
+
|
|
711
|
+
# Get total count
|
|
712
|
+
total = await self.cache.redis.scard("dlq:all")
|
|
713
|
+
|
|
714
|
+
# Get all entries for breakdown
|
|
715
|
+
entries = await self.get_dlq_entries(limit=1000)
|
|
716
|
+
|
|
717
|
+
by_node_type = {}
|
|
718
|
+
by_workflow = {}
|
|
719
|
+
for entry in entries:
|
|
720
|
+
by_node_type[entry.node_type] = by_node_type.get(entry.node_type, 0) + 1
|
|
721
|
+
by_workflow[entry.workflow_id] = by_workflow.get(entry.workflow_id, 0) + 1
|
|
722
|
+
|
|
723
|
+
return {
|
|
724
|
+
"total": total,
|
|
725
|
+
"by_node_type": by_node_type,
|
|
726
|
+
"by_workflow": by_workflow
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
except Exception as e:
|
|
730
|
+
logger.error("Failed to get DLQ stats", error=str(e))
|
|
731
|
+
return {"total": 0, "by_node_type": {}, "by_workflow": {}}
|
|
732
|
+
|
|
733
|
+
async def purge_dlq(self, workflow_id: Optional[str] = None,
|
|
734
|
+
node_type: Optional[str] = None,
|
|
735
|
+
older_than: Optional[float] = None) -> int:
|
|
736
|
+
"""Purge entries from DLQ.
|
|
737
|
+
|
|
738
|
+
Args:
|
|
739
|
+
workflow_id: Only purge entries for this workflow
|
|
740
|
+
node_type: Only purge entries for this node type
|
|
741
|
+
older_than: Only purge entries older than this timestamp
|
|
742
|
+
|
|
743
|
+
Returns:
|
|
744
|
+
Number of entries purged
|
|
745
|
+
"""
|
|
746
|
+
try:
|
|
747
|
+
if not self.cache.is_redis_available():
|
|
748
|
+
return 0
|
|
749
|
+
|
|
750
|
+
entries = await self.get_dlq_entries(workflow_id=workflow_id,
|
|
751
|
+
node_type=node_type,
|
|
752
|
+
limit=10000)
|
|
753
|
+
|
|
754
|
+
purged = 0
|
|
755
|
+
for entry in entries:
|
|
756
|
+
# Check age filter
|
|
757
|
+
if older_than and entry.created_at > older_than:
|
|
758
|
+
continue
|
|
759
|
+
|
|
760
|
+
if await self.remove_from_dlq(entry.id):
|
|
761
|
+
purged += 1
|
|
762
|
+
|
|
763
|
+
logger.info("Purged DLQ entries", count=purged, workflow_id=workflow_id,
|
|
764
|
+
node_type=node_type)
|
|
765
|
+
return purged
|
|
766
|
+
|
|
767
|
+
except Exception as e:
|
|
768
|
+
logger.error("Failed to purge DLQ", error=str(e))
|
|
769
|
+
return 0
|