machinaos 0.0.1 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (422) hide show
  1. package/.env.template +71 -71
  2. package/LICENSE +21 -21
  3. package/README.md +163 -87
  4. package/bin/cli.js +62 -106
  5. package/client/.dockerignore +45 -45
  6. package/client/Dockerfile +68 -68
  7. package/client/dist/assets/index-DFSC53FP.css +1 -0
  8. package/client/dist/assets/index-fJ-1gTf5.js +613 -0
  9. package/client/dist/index.html +14 -0
  10. package/client/eslint.config.js +34 -16
  11. package/client/nginx.conf +66 -66
  12. package/client/package.json +61 -48
  13. package/client/src/App.tsx +27 -27
  14. package/client/src/Dashboard.tsx +1200 -1172
  15. package/client/src/ParameterPanel.tsx +302 -300
  16. package/client/src/components/AIAgentNode.tsx +315 -321
  17. package/client/src/components/APIKeyValidator.tsx +117 -117
  18. package/client/src/components/ClaudeChatModelNode.tsx +17 -17
  19. package/client/src/components/CredentialsModal.tsx +1200 -306
  20. package/client/src/components/GeminiChatModelNode.tsx +17 -17
  21. package/client/src/components/GenericNode.tsx +356 -356
  22. package/client/src/components/LocationParameterPanel.tsx +153 -153
  23. package/client/src/components/ModelNode.tsx +285 -285
  24. package/client/src/components/OpenAIChatModelNode.tsx +17 -17
  25. package/client/src/components/OutputPanel.tsx +470 -470
  26. package/client/src/components/ParameterRenderer.tsx +1873 -1873
  27. package/client/src/components/SkillEditorModal.tsx +3 -3
  28. package/client/src/components/SquareNode.tsx +812 -796
  29. package/client/src/components/ToolkitNode.tsx +365 -365
  30. package/client/src/components/auth/LoginPage.tsx +247 -247
  31. package/client/src/components/auth/ProtectedRoute.tsx +59 -59
  32. package/client/src/components/base/BaseChatModelNode.tsx +270 -270
  33. package/client/src/components/icons/AIProviderIcons.tsx +50 -50
  34. package/client/src/components/maps/GoogleMapsPicker.tsx +136 -136
  35. package/client/src/components/maps/MapsPreviewPanel.tsx +109 -109
  36. package/client/src/components/maps/index.ts +25 -25
  37. package/client/src/components/parameterPanel/InputSection.tsx +1094 -1094
  38. package/client/src/components/parameterPanel/LocationPanelLayout.tsx +64 -64
  39. package/client/src/components/parameterPanel/MapsSection.tsx +91 -91
  40. package/client/src/components/parameterPanel/MiddleSection.tsx +867 -571
  41. package/client/src/components/parameterPanel/OutputSection.tsx +80 -80
  42. package/client/src/components/parameterPanel/ParameterPanelLayout.tsx +81 -81
  43. package/client/src/components/parameterPanel/ToolSchemaEditor.tsx +436 -436
  44. package/client/src/components/parameterPanel/index.ts +41 -41
  45. package/client/src/components/shared/DataPanel.tsx +142 -142
  46. package/client/src/components/shared/JSONTreeRenderer.tsx +105 -105
  47. package/client/src/components/ui/AIResultModal.tsx +203 -203
  48. package/client/src/components/ui/ApiKeyInput.tsx +93 -0
  49. package/client/src/components/ui/CodeEditor.tsx +81 -81
  50. package/client/src/components/ui/CollapsibleSection.tsx +87 -87
  51. package/client/src/components/ui/ComponentItem.tsx +153 -153
  52. package/client/src/components/ui/ComponentPalette.tsx +320 -320
  53. package/client/src/components/ui/ConsolePanel.tsx +151 -43
  54. package/client/src/components/ui/ErrorBoundary.tsx +195 -195
  55. package/client/src/components/ui/InputNodesPanel.tsx +203 -203
  56. package/client/src/components/ui/MapSelector.tsx +313 -313
  57. package/client/src/components/ui/Modal.tsx +151 -148
  58. package/client/src/components/ui/NodeOutputPanel.tsx +1150 -1150
  59. package/client/src/components/ui/OutputDisplayPanel.tsx +381 -381
  60. package/client/src/components/ui/QRCodeDisplay.tsx +182 -0
  61. package/client/src/components/ui/TopToolbar.tsx +736 -736
  62. package/client/src/components/ui/WorkflowSidebar.tsx +293 -293
  63. package/client/src/config/antdTheme.ts +186 -186
  64. package/client/src/contexts/AuthContext.tsx +221 -221
  65. package/client/src/contexts/ThemeContext.tsx +42 -42
  66. package/client/src/contexts/WebSocketContext.tsx +2144 -1971
  67. package/client/src/factories/baseChatModelFactory.ts +255 -255
  68. package/client/src/hooks/useAndroidOperations.ts +118 -164
  69. package/client/src/hooks/useApiKeyValidation.ts +106 -106
  70. package/client/src/hooks/useApiKeys.ts +238 -238
  71. package/client/src/hooks/useAppTheme.ts +17 -17
  72. package/client/src/hooks/useComponentPalette.ts +50 -50
  73. package/client/src/hooks/useDragAndDrop.ts +123 -123
  74. package/client/src/hooks/useDragVariable.ts +88 -88
  75. package/client/src/hooks/useExecution.ts +319 -313
  76. package/client/src/hooks/useParameterPanel.ts +176 -176
  77. package/client/src/hooks/useReactFlowNodes.ts +188 -188
  78. package/client/src/hooks/useToolSchema.ts +209 -209
  79. package/client/src/hooks/useWhatsApp.ts +196 -196
  80. package/client/src/hooks/useWorkflowManagement.ts +45 -45
  81. package/client/src/index.css +314 -314
  82. package/client/src/nodeDefinitions/aiAgentNodes.ts +335 -335
  83. package/client/src/nodeDefinitions/aiModelNodes.ts +340 -340
  84. package/client/src/nodeDefinitions/androidServiceNodes.ts +383 -383
  85. package/client/src/nodeDefinitions/chatNodes.ts +135 -135
  86. package/client/src/nodeDefinitions/codeNodes.ts +54 -54
  87. package/client/src/nodeDefinitions/index.ts +14 -14
  88. package/client/src/nodeDefinitions/locationNodes.ts +462 -462
  89. package/client/src/nodeDefinitions/schedulerNodes.ts +220 -220
  90. package/client/src/nodeDefinitions/skillNodes.ts +17 -5
  91. package/client/src/nodeDefinitions/utilityNodes.ts +284 -284
  92. package/client/src/nodeDefinitions/whatsappNodes.ts +821 -865
  93. package/client/src/nodeDefinitions.ts +101 -103
  94. package/client/src/services/dynamicParameterService.ts +95 -95
  95. package/client/src/services/execution/aiAgentExecutionService.ts +34 -34
  96. package/client/src/services/executionService.ts +227 -231
  97. package/client/src/services/workflowApi.ts +91 -91
  98. package/client/src/store/useAppStore.ts +578 -581
  99. package/client/src/styles/theme.ts +513 -508
  100. package/client/src/styles/zIndex.ts +16 -16
  101. package/client/src/types/ComponentTypes.ts +38 -38
  102. package/client/src/types/INodeProperties.ts +287 -287
  103. package/client/src/types/NodeTypes.ts +27 -27
  104. package/client/src/utils/formatters.ts +32 -32
  105. package/client/src/utils/googleMapsLoader.ts +139 -139
  106. package/client/src/utils/locationUtils.ts +84 -84
  107. package/client/src/utils/nodeUtils.ts +30 -30
  108. package/client/src/utils/workflow.ts +29 -29
  109. package/client/src/vite-env.d.ts +12 -12
  110. package/client/tailwind.config.js +59 -59
  111. package/client/tsconfig.json +25 -25
  112. package/client/vite.config.js +35 -35
  113. package/install.ps1 +308 -0
  114. package/install.sh +343 -0
  115. package/package.json +81 -70
  116. package/scripts/build.js +174 -51
  117. package/scripts/clean.js +40 -40
  118. package/scripts/start.js +234 -210
  119. package/scripts/stop.js +301 -325
  120. package/server/.dockerignore +44 -44
  121. package/server/Dockerfile +45 -45
  122. package/server/constants.py +244 -249
  123. package/server/core/cache.py +460 -460
  124. package/server/core/config.py +127 -127
  125. package/server/core/container.py +98 -98
  126. package/server/core/database.py +1296 -1210
  127. package/server/core/logging.py +313 -313
  128. package/server/main.py +288 -288
  129. package/server/middleware/__init__.py +5 -5
  130. package/server/middleware/auth.py +89 -89
  131. package/server/models/auth.py +52 -52
  132. package/server/models/cache.py +24 -24
  133. package/server/models/database.py +235 -210
  134. package/server/models/nodes.py +435 -455
  135. package/server/pyproject.toml +75 -72
  136. package/server/requirements.txt +83 -83
  137. package/server/routers/android.py +294 -294
  138. package/server/routers/auth.py +203 -203
  139. package/server/routers/database.py +150 -150
  140. package/server/routers/maps.py +141 -141
  141. package/server/routers/nodejs_compat.py +288 -288
  142. package/server/routers/webhook.py +90 -90
  143. package/server/routers/websocket.py +2239 -2127
  144. package/server/routers/whatsapp.py +761 -761
  145. package/server/routers/workflow.py +199 -199
  146. package/server/services/ai.py +2444 -2414
  147. package/server/services/android_service.py +588 -588
  148. package/server/services/auth.py +130 -130
  149. package/server/services/chat_client.py +160 -160
  150. package/server/services/deployment/manager.py +706 -706
  151. package/server/services/event_waiter.py +675 -785
  152. package/server/services/execution/executor.py +1351 -1351
  153. package/server/services/execution/models.py +1 -1
  154. package/server/services/handlers/__init__.py +122 -126
  155. package/server/services/handlers/ai.py +390 -355
  156. package/server/services/handlers/android.py +69 -260
  157. package/server/services/handlers/code.py +278 -278
  158. package/server/services/handlers/http.py +193 -193
  159. package/server/services/handlers/tools.py +146 -32
  160. package/server/services/handlers/triggers.py +107 -107
  161. package/server/services/handlers/utility.py +822 -822
  162. package/server/services/handlers/whatsapp.py +423 -476
  163. package/server/services/maps.py +288 -288
  164. package/server/services/memory_store.py +103 -103
  165. package/server/services/node_executor.py +372 -375
  166. package/server/services/scheduler.py +155 -155
  167. package/server/services/skill_loader.py +1 -1
  168. package/server/services/status_broadcaster.py +834 -826
  169. package/server/services/temporal/__init__.py +23 -23
  170. package/server/services/temporal/activities.py +344 -344
  171. package/server/services/temporal/client.py +76 -76
  172. package/server/services/temporal/executor.py +147 -147
  173. package/server/services/temporal/worker.py +251 -251
  174. package/server/services/temporal/workflow.py +355 -355
  175. package/server/services/temporal/ws_client.py +236 -236
  176. package/server/services/text.py +110 -110
  177. package/server/services/user_auth.py +172 -172
  178. package/server/services/websocket_client.py +29 -29
  179. package/server/services/workflow.py +597 -597
  180. package/server/skills/android-skill/SKILL.md +4 -4
  181. package/server/skills/code-skill/SKILL.md +123 -89
  182. package/server/skills/maps-skill/SKILL.md +3 -3
  183. package/server/skills/memory-skill/SKILL.md +1 -1
  184. package/server/skills/web-search-skill/SKILL.md +154 -0
  185. package/server/skills/whatsapp-skill/SKILL.md +3 -3
  186. package/server/uv.lock +461 -100
  187. package/server/whatsapp-rpc/.dockerignore +30 -30
  188. package/server/whatsapp-rpc/Dockerfile +44 -44
  189. package/server/whatsapp-rpc/Dockerfile.web +17 -17
  190. package/server/whatsapp-rpc/README.md +139 -139
  191. package/server/whatsapp-rpc/bin/whatsapp-rpc-server +0 -0
  192. package/server/whatsapp-rpc/cli.js +95 -95
  193. package/server/whatsapp-rpc/configs/config.yaml +6 -6
  194. package/server/whatsapp-rpc/docker-compose.yml +35 -35
  195. package/server/whatsapp-rpc/docs/API.md +410 -410
  196. package/server/whatsapp-rpc/node_modules/.package-lock.json +259 -0
  197. package/server/whatsapp-rpc/node_modules/chalk/license +9 -0
  198. package/server/whatsapp-rpc/node_modules/chalk/package.json +83 -0
  199. package/server/whatsapp-rpc/node_modules/chalk/readme.md +297 -0
  200. package/server/whatsapp-rpc/node_modules/chalk/source/index.d.ts +325 -0
  201. package/server/whatsapp-rpc/node_modules/chalk/source/index.js +225 -0
  202. package/server/whatsapp-rpc/node_modules/chalk/source/utilities.js +33 -0
  203. package/server/whatsapp-rpc/node_modules/chalk/source/vendor/ansi-styles/index.d.ts +236 -0
  204. package/server/whatsapp-rpc/node_modules/chalk/source/vendor/ansi-styles/index.js +223 -0
  205. package/server/whatsapp-rpc/node_modules/chalk/source/vendor/supports-color/browser.d.ts +1 -0
  206. package/server/whatsapp-rpc/node_modules/chalk/source/vendor/supports-color/browser.js +34 -0
  207. package/server/whatsapp-rpc/node_modules/chalk/source/vendor/supports-color/index.d.ts +55 -0
  208. package/server/whatsapp-rpc/node_modules/chalk/source/vendor/supports-color/index.js +190 -0
  209. package/server/whatsapp-rpc/node_modules/commander/LICENSE +22 -0
  210. package/server/whatsapp-rpc/node_modules/commander/Readme.md +1148 -0
  211. package/server/whatsapp-rpc/node_modules/commander/esm.mjs +16 -0
  212. package/server/whatsapp-rpc/node_modules/commander/index.js +26 -0
  213. package/server/whatsapp-rpc/node_modules/commander/lib/argument.js +145 -0
  214. package/server/whatsapp-rpc/node_modules/commander/lib/command.js +2179 -0
  215. package/server/whatsapp-rpc/node_modules/commander/lib/error.js +43 -0
  216. package/server/whatsapp-rpc/node_modules/commander/lib/help.js +462 -0
  217. package/server/whatsapp-rpc/node_modules/commander/lib/option.js +329 -0
  218. package/server/whatsapp-rpc/node_modules/commander/lib/suggestSimilar.js +100 -0
  219. package/server/whatsapp-rpc/node_modules/commander/package-support.json +16 -0
  220. package/server/whatsapp-rpc/node_modules/commander/package.json +80 -0
  221. package/server/whatsapp-rpc/node_modules/commander/typings/esm.d.mts +3 -0
  222. package/server/whatsapp-rpc/node_modules/commander/typings/index.d.ts +884 -0
  223. package/server/whatsapp-rpc/node_modules/cross-spawn/LICENSE +21 -0
  224. package/server/whatsapp-rpc/node_modules/cross-spawn/README.md +89 -0
  225. package/server/whatsapp-rpc/node_modules/cross-spawn/index.js +39 -0
  226. package/server/whatsapp-rpc/node_modules/cross-spawn/lib/enoent.js +59 -0
  227. package/server/whatsapp-rpc/node_modules/cross-spawn/lib/parse.js +91 -0
  228. package/server/whatsapp-rpc/node_modules/cross-spawn/lib/util/escape.js +47 -0
  229. package/server/whatsapp-rpc/node_modules/cross-spawn/lib/util/readShebang.js +23 -0
  230. package/server/whatsapp-rpc/node_modules/cross-spawn/lib/util/resolveCommand.js +52 -0
  231. package/server/whatsapp-rpc/node_modules/cross-spawn/package.json +73 -0
  232. package/server/whatsapp-rpc/node_modules/execa/index.d.ts +955 -0
  233. package/server/whatsapp-rpc/node_modules/execa/index.js +309 -0
  234. package/server/whatsapp-rpc/node_modules/execa/lib/command.js +119 -0
  235. package/server/whatsapp-rpc/node_modules/execa/lib/error.js +87 -0
  236. package/server/whatsapp-rpc/node_modules/execa/lib/kill.js +102 -0
  237. package/server/whatsapp-rpc/node_modules/execa/lib/pipe.js +42 -0
  238. package/server/whatsapp-rpc/node_modules/execa/lib/promise.js +36 -0
  239. package/server/whatsapp-rpc/node_modules/execa/lib/stdio.js +49 -0
  240. package/server/whatsapp-rpc/node_modules/execa/lib/stream.js +133 -0
  241. package/server/whatsapp-rpc/node_modules/execa/lib/verbose.js +19 -0
  242. package/server/whatsapp-rpc/node_modules/execa/license +9 -0
  243. package/server/whatsapp-rpc/node_modules/execa/package.json +90 -0
  244. package/server/whatsapp-rpc/node_modules/execa/readme.md +822 -0
  245. package/server/whatsapp-rpc/node_modules/get-stream/license +9 -0
  246. package/server/whatsapp-rpc/node_modules/get-stream/package.json +53 -0
  247. package/server/whatsapp-rpc/node_modules/get-stream/readme.md +291 -0
  248. package/server/whatsapp-rpc/node_modules/get-stream/source/array-buffer.js +84 -0
  249. package/server/whatsapp-rpc/node_modules/get-stream/source/array.js +32 -0
  250. package/server/whatsapp-rpc/node_modules/get-stream/source/buffer.js +20 -0
  251. package/server/whatsapp-rpc/node_modules/get-stream/source/contents.js +101 -0
  252. package/server/whatsapp-rpc/node_modules/get-stream/source/index.d.ts +119 -0
  253. package/server/whatsapp-rpc/node_modules/get-stream/source/index.js +5 -0
  254. package/server/whatsapp-rpc/node_modules/get-stream/source/string.js +36 -0
  255. package/server/whatsapp-rpc/node_modules/get-stream/source/utils.js +11 -0
  256. package/server/whatsapp-rpc/node_modules/get-them-args/LICENSE +21 -0
  257. package/server/whatsapp-rpc/node_modules/get-them-args/README.md +95 -0
  258. package/server/whatsapp-rpc/node_modules/get-them-args/index.js +97 -0
  259. package/server/whatsapp-rpc/node_modules/get-them-args/package.json +36 -0
  260. package/server/whatsapp-rpc/node_modules/human-signals/LICENSE +201 -0
  261. package/server/whatsapp-rpc/node_modules/human-signals/README.md +168 -0
  262. package/server/whatsapp-rpc/node_modules/human-signals/build/src/core.js +273 -0
  263. package/server/whatsapp-rpc/node_modules/human-signals/build/src/main.d.ts +73 -0
  264. package/server/whatsapp-rpc/node_modules/human-signals/build/src/main.js +70 -0
  265. package/server/whatsapp-rpc/node_modules/human-signals/build/src/realtime.js +16 -0
  266. package/server/whatsapp-rpc/node_modules/human-signals/build/src/signals.js +34 -0
  267. package/server/whatsapp-rpc/node_modules/human-signals/package.json +61 -0
  268. package/server/whatsapp-rpc/node_modules/is-stream/index.d.ts +81 -0
  269. package/server/whatsapp-rpc/node_modules/is-stream/index.js +29 -0
  270. package/server/whatsapp-rpc/node_modules/is-stream/license +9 -0
  271. package/server/whatsapp-rpc/node_modules/is-stream/package.json +44 -0
  272. package/server/whatsapp-rpc/node_modules/is-stream/readme.md +60 -0
  273. package/server/whatsapp-rpc/node_modules/isexe/LICENSE +15 -0
  274. package/server/whatsapp-rpc/node_modules/isexe/README.md +51 -0
  275. package/server/whatsapp-rpc/node_modules/isexe/index.js +57 -0
  276. package/server/whatsapp-rpc/node_modules/isexe/mode.js +41 -0
  277. package/server/whatsapp-rpc/node_modules/isexe/package.json +31 -0
  278. package/server/whatsapp-rpc/node_modules/isexe/test/basic.js +221 -0
  279. package/server/whatsapp-rpc/node_modules/isexe/windows.js +42 -0
  280. package/server/whatsapp-rpc/node_modules/kill-port/.editorconfig +12 -0
  281. package/server/whatsapp-rpc/node_modules/kill-port/.gitattributes +1 -0
  282. package/server/whatsapp-rpc/node_modules/kill-port/LICENSE +21 -0
  283. package/server/whatsapp-rpc/node_modules/kill-port/README.md +140 -0
  284. package/server/whatsapp-rpc/node_modules/kill-port/cli.js +25 -0
  285. package/server/whatsapp-rpc/node_modules/kill-port/example.js +21 -0
  286. package/server/whatsapp-rpc/node_modules/kill-port/index.js +46 -0
  287. package/server/whatsapp-rpc/node_modules/kill-port/logo.png +0 -0
  288. package/server/whatsapp-rpc/node_modules/kill-port/package.json +41 -0
  289. package/server/whatsapp-rpc/node_modules/kill-port/pnpm-lock.yaml +4606 -0
  290. package/server/whatsapp-rpc/node_modules/kill-port/test.js +16 -0
  291. package/server/whatsapp-rpc/node_modules/merge-stream/LICENSE +21 -0
  292. package/server/whatsapp-rpc/node_modules/merge-stream/README.md +78 -0
  293. package/server/whatsapp-rpc/node_modules/merge-stream/index.js +41 -0
  294. package/server/whatsapp-rpc/node_modules/merge-stream/package.json +19 -0
  295. package/server/whatsapp-rpc/node_modules/mimic-fn/index.d.ts +52 -0
  296. package/server/whatsapp-rpc/node_modules/mimic-fn/index.js +71 -0
  297. package/server/whatsapp-rpc/node_modules/mimic-fn/license +9 -0
  298. package/server/whatsapp-rpc/node_modules/mimic-fn/package.json +45 -0
  299. package/server/whatsapp-rpc/node_modules/mimic-fn/readme.md +90 -0
  300. package/server/whatsapp-rpc/node_modules/npm-run-path/index.d.ts +90 -0
  301. package/server/whatsapp-rpc/node_modules/npm-run-path/index.js +52 -0
  302. package/server/whatsapp-rpc/node_modules/npm-run-path/license +9 -0
  303. package/server/whatsapp-rpc/node_modules/npm-run-path/node_modules/path-key/index.d.ts +31 -0
  304. package/server/whatsapp-rpc/node_modules/npm-run-path/node_modules/path-key/index.js +12 -0
  305. package/server/whatsapp-rpc/node_modules/npm-run-path/node_modules/path-key/license +9 -0
  306. package/server/whatsapp-rpc/node_modules/npm-run-path/node_modules/path-key/package.json +41 -0
  307. package/server/whatsapp-rpc/node_modules/npm-run-path/node_modules/path-key/readme.md +57 -0
  308. package/server/whatsapp-rpc/node_modules/npm-run-path/package.json +49 -0
  309. package/server/whatsapp-rpc/node_modules/npm-run-path/readme.md +104 -0
  310. package/server/whatsapp-rpc/node_modules/onetime/index.d.ts +59 -0
  311. package/server/whatsapp-rpc/node_modules/onetime/index.js +41 -0
  312. package/server/whatsapp-rpc/node_modules/onetime/license +9 -0
  313. package/server/whatsapp-rpc/node_modules/onetime/package.json +45 -0
  314. package/server/whatsapp-rpc/node_modules/onetime/readme.md +94 -0
  315. package/server/whatsapp-rpc/node_modules/path-key/index.d.ts +40 -0
  316. package/server/whatsapp-rpc/node_modules/path-key/index.js +16 -0
  317. package/server/whatsapp-rpc/node_modules/path-key/license +9 -0
  318. package/server/whatsapp-rpc/node_modules/path-key/package.json +39 -0
  319. package/server/whatsapp-rpc/node_modules/path-key/readme.md +61 -0
  320. package/server/whatsapp-rpc/node_modules/shebang-command/index.js +19 -0
  321. package/server/whatsapp-rpc/node_modules/shebang-command/license +9 -0
  322. package/server/whatsapp-rpc/node_modules/shebang-command/package.json +34 -0
  323. package/server/whatsapp-rpc/node_modules/shebang-command/readme.md +34 -0
  324. package/server/whatsapp-rpc/node_modules/shebang-regex/index.d.ts +22 -0
  325. package/server/whatsapp-rpc/node_modules/shebang-regex/index.js +2 -0
  326. package/server/whatsapp-rpc/node_modules/shebang-regex/license +9 -0
  327. package/server/whatsapp-rpc/node_modules/shebang-regex/package.json +35 -0
  328. package/server/whatsapp-rpc/node_modules/shebang-regex/readme.md +33 -0
  329. package/server/whatsapp-rpc/node_modules/shell-exec/LICENSE +21 -0
  330. package/server/whatsapp-rpc/node_modules/shell-exec/README.md +60 -0
  331. package/server/whatsapp-rpc/node_modules/shell-exec/index.js +47 -0
  332. package/server/whatsapp-rpc/node_modules/shell-exec/package.json +29 -0
  333. package/server/whatsapp-rpc/node_modules/signal-exit/LICENSE.txt +16 -0
  334. package/server/whatsapp-rpc/node_modules/signal-exit/README.md +74 -0
  335. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/browser.d.ts +12 -0
  336. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/browser.d.ts.map +1 -0
  337. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/browser.js +10 -0
  338. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/browser.js.map +1 -0
  339. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/index.d.ts +48 -0
  340. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/index.d.ts.map +1 -0
  341. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/index.js +279 -0
  342. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/index.js.map +1 -0
  343. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/package.json +3 -0
  344. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/signals.d.ts +29 -0
  345. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/signals.d.ts.map +1 -0
  346. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/signals.js +42 -0
  347. package/server/whatsapp-rpc/node_modules/signal-exit/dist/cjs/signals.js.map +1 -0
  348. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/browser.d.ts +12 -0
  349. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/browser.d.ts.map +1 -0
  350. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/browser.js +4 -0
  351. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/browser.js.map +1 -0
  352. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/index.d.ts +48 -0
  353. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/index.d.ts.map +1 -0
  354. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/index.js +275 -0
  355. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/index.js.map +1 -0
  356. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/package.json +3 -0
  357. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/signals.d.ts +29 -0
  358. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/signals.d.ts.map +1 -0
  359. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/signals.js +39 -0
  360. package/server/whatsapp-rpc/node_modules/signal-exit/dist/mjs/signals.js.map +1 -0
  361. package/server/whatsapp-rpc/node_modules/signal-exit/package.json +106 -0
  362. package/server/whatsapp-rpc/node_modules/strip-final-newline/index.js +14 -0
  363. package/server/whatsapp-rpc/node_modules/strip-final-newline/license +9 -0
  364. package/server/whatsapp-rpc/node_modules/strip-final-newline/package.json +43 -0
  365. package/server/whatsapp-rpc/node_modules/strip-final-newline/readme.md +35 -0
  366. package/server/whatsapp-rpc/node_modules/which/CHANGELOG.md +166 -0
  367. package/server/whatsapp-rpc/node_modules/which/LICENSE +15 -0
  368. package/server/whatsapp-rpc/node_modules/which/README.md +54 -0
  369. package/server/whatsapp-rpc/node_modules/which/bin/node-which +52 -0
  370. package/server/whatsapp-rpc/node_modules/which/package.json +43 -0
  371. package/server/whatsapp-rpc/node_modules/which/which.js +125 -0
  372. package/server/whatsapp-rpc/package-lock.json +272 -0
  373. package/server/whatsapp-rpc/package.json +30 -30
  374. package/server/whatsapp-rpc/schema.json +1294 -1294
  375. package/server/whatsapp-rpc/scripts/clean.cjs +66 -66
  376. package/server/whatsapp-rpc/scripts/cli.js +162 -162
  377. package/server/whatsapp-rpc/src/go/whatsapp/history.go +166 -166
  378. package/server/whatsapp-rpc/src/python/pyproject.toml +15 -15
  379. package/server/whatsapp-rpc/src/python/whatsapp_rpc/__init__.py +4 -4
  380. package/server/whatsapp-rpc/src/python/whatsapp_rpc/client.py +427 -427
  381. package/server/whatsapp-rpc/web/app.py +609 -609
  382. package/server/whatsapp-rpc/web/requirements.txt +6 -6
  383. package/server/whatsapp-rpc/web/rpc_client.py +427 -427
  384. package/server/whatsapp-rpc/web/static/openapi.yaml +59 -59
  385. package/server/whatsapp-rpc/web/templates/base.html +149 -149
  386. package/server/whatsapp-rpc/web/templates/contacts.html +240 -240
  387. package/server/whatsapp-rpc/web/templates/dashboard.html +319 -319
  388. package/server/whatsapp-rpc/web/templates/groups.html +328 -328
  389. package/server/whatsapp-rpc/web/templates/messages.html +465 -465
  390. package/server/whatsapp-rpc/web/templates/messaging.html +680 -680
  391. package/server/whatsapp-rpc/web/templates/send.html +258 -258
  392. package/server/whatsapp-rpc/web/templates/settings.html +459 -459
  393. package/client/src/components/ui/AndroidSettingsPanel.tsx +0 -401
  394. package/client/src/components/ui/WhatsAppSettingsPanel.tsx +0 -345
  395. package/client/src/nodeDefinitions/androidDeviceNodes.ts +0 -140
  396. package/docker-compose.prod.yml +0 -107
  397. package/docker-compose.yml +0 -104
  398. package/docs-MachinaOs/README.md +0 -85
  399. package/docs-MachinaOs/deployment/docker.mdx +0 -228
  400. package/docs-MachinaOs/deployment/production.mdx +0 -345
  401. package/docs-MachinaOs/docs.json +0 -75
  402. package/docs-MachinaOs/faq.mdx +0 -309
  403. package/docs-MachinaOs/favicon.svg +0 -5
  404. package/docs-MachinaOs/installation.mdx +0 -160
  405. package/docs-MachinaOs/introduction.mdx +0 -114
  406. package/docs-MachinaOs/logo/dark.svg +0 -6
  407. package/docs-MachinaOs/logo/light.svg +0 -6
  408. package/docs-MachinaOs/nodes/ai-agent.mdx +0 -216
  409. package/docs-MachinaOs/nodes/ai-models.mdx +0 -240
  410. package/docs-MachinaOs/nodes/android.mdx +0 -411
  411. package/docs-MachinaOs/nodes/overview.mdx +0 -181
  412. package/docs-MachinaOs/nodes/schedulers.mdx +0 -316
  413. package/docs-MachinaOs/nodes/webhooks.mdx +0 -330
  414. package/docs-MachinaOs/nodes/whatsapp.mdx +0 -305
  415. package/docs-MachinaOs/quickstart.mdx +0 -119
  416. package/docs-MachinaOs/tutorials/ai-agent-workflow.mdx +0 -177
  417. package/docs-MachinaOs/tutorials/android-automation.mdx +0 -242
  418. package/docs-MachinaOs/tutorials/first-workflow.mdx +0 -134
  419. package/docs-MachinaOs/tutorials/whatsapp-automation.mdx +0 -185
  420. package/nul +0 -0
  421. package/scripts/check-ports.ps1 +0 -33
  422. package/scripts/kill-port.ps1 +0 -154
@@ -1,1351 +1,1351 @@
1
- """Workflow executor with Conductor decide pattern and parallel execution.
2
-
3
- Implements:
4
- - Conductor-style workflow_decide() for orchestration
5
- - Prefect-style task caching for idempotency
6
- - Fork/Join parallel execution with asyncio.wait (FIRST_COMPLETED pattern)
7
- - Dynamic workflow branching at runtime
8
- - Proper handling of long-running trigger nodes in parallel batches
9
- """
10
-
11
- import asyncio
12
- import time
13
- from collections import defaultdict
14
- from datetime import datetime
15
- from typing import Dict, Any, List, Optional, Callable, Awaitable, Set
16
-
17
- from core.logging import get_logger
18
- from constants import WORKFLOW_TRIGGER_TYPES
19
- from .models import (
20
- ExecutionContext,
21
- TaskStatus,
22
- WorkflowStatus,
23
- NodeExecution,
24
- hash_inputs,
25
- RetryPolicy,
26
- get_retry_policy,
27
- )
28
- from .cache import ExecutionCache
29
- from .conditions import evaluate_condition, decide_next_edges
30
- from .dlq import create_dlq_handler, DLQHandlerProtocol, NullDLQHandler
31
-
32
- logger = get_logger(__name__)
33
-
34
-
35
- def is_trigger_node(node_type: str) -> bool:
36
- """Check if a node type is a trigger node (workflow starting point).
37
-
38
- Trigger nodes have no input handles and serve as entry points for workflows.
39
- They are identified by WORKFLOW_TRIGGER_TYPES in constants.py.
40
-
41
- Args:
42
- node_type: The node type string
43
-
44
- Returns:
45
- True if the node is a trigger type
46
- """
47
- return node_type in WORKFLOW_TRIGGER_TYPES
48
-
49
-
50
- class WorkflowExecutor:
51
- """Executes workflows using Conductor decide pattern with parallel execution.
52
-
53
- Features:
54
- - Isolated ExecutionContext per workflow run
55
- - Parallel execution of independent nodes (Fork/Join)
56
- - Result caching for idempotency (Prefect pattern)
57
- - Distributed locking to prevent race conditions
58
- - Event history for debugging and recovery
59
- """
60
-
61
- def __init__(self, cache: ExecutionCache,
62
- node_executor: Callable[[str, str, Dict, Dict], Awaitable[Dict]],
63
- status_callback: Callable[[str, str, Dict], Awaitable[None]] = None,
64
- dlq_enabled: bool = False):
65
- """Initialize executor.
66
-
67
- Args:
68
- cache: ExecutionCache for Redis persistence
69
- node_executor: Async function to execute a single node
70
- Signature: async def execute(node_id, node_type, params, context) -> result
71
- status_callback: Optional async callback for status updates
72
- Signature: async def callback(node_id, status, data)
73
- dlq_enabled: Whether to add failed nodes to Dead Letter Queue
74
- """
75
- self.cache = cache
76
- self.node_executor = node_executor
77
- self.status_callback = status_callback
78
-
79
- # Create DLQ handler (modular - uses Null Object pattern when disabled)
80
- self.dlq = create_dlq_handler(cache, enabled=dlq_enabled)
81
-
82
- # Active executions (in-memory for fast lookup)
83
- self._active_contexts: Dict[str, ExecutionContext] = {}
84
-
85
- # =========================================================================
86
- # EXECUTION ENTRY POINTS
87
- # =========================================================================
88
-
89
- async def execute_workflow(self, workflow_id: str, nodes: List[Dict],
90
- edges: List[Dict], session_id: str = "default",
91
- enable_caching: bool = True) -> Dict[str, Any]:
92
- """Execute a workflow with parallel node execution.
93
-
94
- Args:
95
- workflow_id: Workflow identifier
96
- nodes: List of workflow nodes
97
- edges: List of edges connecting nodes
98
- session_id: Session identifier
99
- enable_caching: Whether to use result caching
100
-
101
- Returns:
102
- Execution result dict
103
- """
104
- start_time = time.time()
105
-
106
- # Create isolated execution context
107
- ctx = ExecutionContext.create(
108
- workflow_id=workflow_id,
109
- session_id=session_id,
110
- nodes=nodes,
111
- edges=edges,
112
- )
113
-
114
- # Compute execution layers (for parallel batches)
115
- ctx.execution_order = self._compute_execution_layers(nodes, edges)
116
-
117
- logger.info("Starting workflow execution",
118
- execution_id=ctx.execution_id,
119
- workflow_id=workflow_id,
120
- node_count=len(nodes),
121
- layers=len(ctx.execution_order))
122
-
123
- # Track in memory
124
- self._active_contexts[ctx.execution_id] = ctx
125
-
126
- # Persist initial state
127
- ctx.status = WorkflowStatus.RUNNING
128
- ctx.started_at = time.time()
129
- await self.cache.save_execution_state(ctx)
130
-
131
- # Add workflow_started event
132
- await self.cache.add_event(ctx.execution_id, "workflow_started", {
133
- "workflow_id": workflow_id,
134
- "node_count": len(nodes),
135
- })
136
-
137
- try:
138
- # Run the decide loop
139
- await self._workflow_decide(ctx, enable_caching)
140
-
141
- # Determine final status
142
- if ctx.all_nodes_complete():
143
- ctx.status = WorkflowStatus.COMPLETED
144
- elif ctx.errors:
145
- ctx.status = WorkflowStatus.FAILED
146
-
147
- ctx.completed_at = time.time()
148
- await self.cache.save_execution_state(ctx)
149
-
150
- # Add workflow_completed event
151
- await self.cache.add_event(ctx.execution_id, "workflow_completed", {
152
- "status": ctx.status.value,
153
- "completed_nodes": len(ctx.get_completed_nodes()),
154
- "execution_time": ctx.completed_at - ctx.started_at,
155
- })
156
-
157
- return {
158
- "success": ctx.status == WorkflowStatus.COMPLETED,
159
- "execution_id": ctx.execution_id,
160
- "status": ctx.status.value,
161
- "nodes_executed": ctx.get_completed_nodes(),
162
- "outputs": ctx.outputs,
163
- "errors": ctx.errors,
164
- "execution_time": time.time() - start_time,
165
- "timestamp": datetime.now().isoformat(),
166
- }
167
-
168
- except asyncio.CancelledError:
169
- ctx.status = WorkflowStatus.CANCELLED
170
- ctx.completed_at = time.time()
171
- await self.cache.save_execution_state(ctx)
172
- await self.cache.add_event(ctx.execution_id, "workflow_cancelled", {})
173
- return {
174
- "success": False,
175
- "execution_id": ctx.execution_id,
176
- "status": "cancelled",
177
- "error": "Cancelled by user",
178
- "execution_time": time.time() - start_time,
179
- }
180
-
181
- except Exception as e:
182
- logger.error("Workflow execution failed", execution_id=ctx.execution_id,
183
- error=str(e))
184
- ctx.status = WorkflowStatus.FAILED
185
- ctx.errors.append({"error": str(e), "timestamp": time.time()})
186
- await self.cache.save_execution_state(ctx)
187
- await self.cache.add_event(ctx.execution_id, "workflow_failed", {
188
- "error": str(e),
189
- })
190
- return {
191
- "success": False,
192
- "execution_id": ctx.execution_id,
193
- "status": "failed",
194
- "error": str(e),
195
- "execution_time": time.time() - start_time,
196
- }
197
-
198
- finally:
199
- # Cleanup
200
- self._active_contexts.pop(ctx.execution_id, None)
201
-
202
- async def cancel_execution(self, execution_id: str) -> bool:
203
- """Cancel a running execution.
204
-
205
- Args:
206
- execution_id: Execution to cancel
207
-
208
- Returns:
209
- True if cancelled successfully
210
- """
211
- ctx = self._active_contexts.get(execution_id)
212
- if ctx:
213
- ctx.status = WorkflowStatus.CANCELLED
214
- for node_exec in ctx.node_executions.values():
215
- if node_exec.status in (TaskStatus.PENDING, TaskStatus.SCHEDULED,
216
- TaskStatus.RUNNING, TaskStatus.WAITING):
217
- node_exec.status = TaskStatus.CANCELLED
218
- await self.cache.save_execution_state(ctx)
219
- logger.info("Execution cancelled", execution_id=execution_id)
220
- return True
221
- return False
222
-
223
- # =========================================================================
224
- # CONDUCTOR DECIDE PATTERN
225
- # =========================================================================
226
-
227
- async def _workflow_decide(self, ctx: ExecutionContext,
228
- enable_caching: bool = True) -> None:
229
- """Core orchestration loop - Conductor's decide pattern.
230
-
231
- Evaluates current state, finds ready nodes, executes them in parallel,
232
- then recurses until all nodes complete or error occurs.
233
-
234
- Args:
235
- ctx: ExecutionContext to process
236
- enable_caching: Whether to use result caching
237
- """
238
- # Distributed lock prevents concurrent decides for same execution
239
- try:
240
- async with self.cache.distributed_lock(
241
- f"execution:{ctx.execution_id}:decide", timeout=60
242
- ):
243
- await self._decide_iteration(ctx, enable_caching)
244
- except TimeoutError:
245
- logger.warning("Could not acquire decide lock",
246
- execution_id=ctx.execution_id)
247
- # Retry after short delay
248
- await asyncio.sleep(0.5)
249
- await self._workflow_decide(ctx, enable_caching)
250
-
251
- async def _decide_iteration(self, ctx: ExecutionContext,
252
- enable_caching: bool) -> None:
253
- """Continuous scheduling loop - Temporal/Conductor pattern.
254
-
255
- When any node completes, immediately check for newly-ready dependents
256
- and start them without waiting for entire layer to complete.
257
-
258
- Example: Cron3 (5s) completes -> immediately start WS3,
259
- even while Cron1 (20s) is still running.
260
- """
261
- # Check if cancelled
262
- if ctx.status == WorkflowStatus.CANCELLED:
263
- return
264
-
265
- # Find initial ready nodes
266
- ready_nodes = self._find_ready_nodes(ctx)
267
-
268
- if not ready_nodes:
269
- if ctx.all_nodes_complete():
270
- logger.info("All nodes complete", execution_id=ctx.execution_id)
271
- else:
272
- pending = ctx.get_pending_nodes()
273
- if pending:
274
- logger.warning("Stuck: pending nodes with unsatisfied deps",
275
- execution_id=ctx.execution_id,
276
- pending=pending)
277
- return
278
-
279
- logger.info("Starting continuous execution",
280
- execution_id=ctx.execution_id,
281
- initial_batch=len(ready_nodes),
282
- nodes=[n.node_id for n in ready_nodes])
283
-
284
- # Execute with continuous scheduling - new pattern
285
- await self._execute_with_continuous_scheduling(ctx, ready_nodes, enable_caching)
286
-
287
- # Save final state
288
- await self.cache.save_execution_state(ctx)
289
-
290
- # =========================================================================
291
- # CONTINUOUS SCHEDULING (Temporal/Conductor Pattern)
292
- # =========================================================================
293
-
294
- async def _execute_with_continuous_scheduling(
295
- self,
296
- ctx: ExecutionContext,
297
- initial_nodes: List[NodeExecution],
298
- enable_caching: bool
299
- ) -> None:
300
- """Execute workflow with continuous scheduling.
301
-
302
- Modern pattern: When any node completes, immediately check for and start
303
- newly-ready dependent nodes. This enables true parallel pipelines where
304
- each path progresses independently.
305
-
306
- Uses asyncio.wait(FIRST_COMPLETED) to process completions immediately.
307
-
308
- Args:
309
- ctx: ExecutionContext
310
- initial_nodes: Initial batch of ready nodes
311
- enable_caching: Whether to use result caching
312
- """
313
- # Track all running tasks: task -> NodeExecution
314
- task_to_node: Dict[asyncio.Task, NodeExecution] = {}
315
- pending_tasks: Set[asyncio.Task] = set()
316
- workflow_failed = False
317
-
318
- def create_node_task(node: NodeExecution) -> asyncio.Task:
319
- """Create and track a task for node execution."""
320
- node.status = TaskStatus.SCHEDULED
321
- task = asyncio.create_task(
322
- self._execute_node_with_retry(ctx, node, enable_caching),
323
- name=f"node_{node.node_id}"
324
- )
325
- task_to_node[task] = node
326
- pending_tasks.add(task)
327
- return task
328
-
329
- # Start initial nodes
330
- for node in initial_nodes:
331
- create_node_task(node)
332
- await self._notify_status(node.node_id, "scheduled", {})
333
- logger.info("Scheduled node", node_id=node.node_id)
334
-
335
- # Process completions and schedule new nodes continuously
336
- while pending_tasks and not workflow_failed:
337
- if ctx.status == WorkflowStatus.CANCELLED:
338
- # Cancel all pending tasks
339
- for task in pending_tasks:
340
- task.cancel()
341
- break
342
-
343
- # Wait for ANY task to complete
344
- done, pending_tasks = await asyncio.wait(
345
- pending_tasks,
346
- return_when=asyncio.FIRST_COMPLETED
347
- )
348
-
349
- # Process each completed task
350
- for task in done:
351
- node = task_to_node[task]
352
- newly_ready = []
353
-
354
- try:
355
- result = task.result()
356
-
357
- if isinstance(result, Exception):
358
- node.status = TaskStatus.FAILED
359
- node.error = str(result)
360
- node.completed_at = time.time()
361
- ctx.errors.append({
362
- "node_id": node.node_id,
363
- "error": str(result),
364
- "timestamp": time.time(),
365
- })
366
- await self._notify_status(node.node_id, "error", {"error": str(result)})
367
- logger.error("Node failed", node_id=node.node_id, error=str(result))
368
- workflow_failed = True
369
-
370
- elif result.get("retries_exhausted"):
371
- node.status = TaskStatus.FAILED
372
- node.error = result.get("error", "Unknown error")
373
- node.completed_at = time.time()
374
- ctx.errors.append({
375
- "node_id": node.node_id,
376
- "error": node.error,
377
- "retries_exhausted": True,
378
- "timestamp": time.time(),
379
- })
380
- workflow_failed = True
381
-
382
- elif not result.get("success"):
383
- node.status = TaskStatus.FAILED
384
- node.error = result.get("error", "Unknown error")
385
- node.completed_at = time.time()
386
- ctx.errors.append({
387
- "node_id": node.node_id,
388
- "error": node.error,
389
- "timestamp": time.time(),
390
- })
391
- await self._notify_status(node.node_id, "error", {"error": node.error})
392
- logger.error("Node failed", node_id=node.node_id, error=node.error)
393
- workflow_failed = True
394
-
395
- else:
396
- # Success - checkpoint and find newly ready nodes
397
- ctx.add_checkpoint(node.node_id)
398
- logger.info("Node completed", node_id=node.node_id)
399
-
400
- # Find nodes that are now ready (their dependencies just completed)
401
- newly_ready = self._find_ready_nodes(ctx)
402
-
403
- except asyncio.CancelledError:
404
- node.status = TaskStatus.CANCELLED
405
- node.completed_at = time.time()
406
- logger.info("Node cancelled", node_id=node.node_id)
407
-
408
- except Exception as e:
409
- node.status = TaskStatus.FAILED
410
- node.error = str(e)
411
- node.completed_at = time.time()
412
- ctx.errors.append({
413
- "node_id": node.node_id,
414
- "error": str(e),
415
- "timestamp": time.time(),
416
- })
417
- await self._notify_status(node.node_id, "error", {"error": str(e)})
418
- logger.error("Node exception", node_id=node.node_id, error=str(e))
419
- workflow_failed = True
420
-
421
- # Schedule newly ready nodes immediately
422
- if newly_ready and not workflow_failed:
423
- for ready_node in newly_ready:
424
- create_node_task(ready_node)
425
- await self._notify_status(ready_node.node_id, "scheduled", {})
426
- logger.info("Scheduled dependent node",
427
- node_id=ready_node.node_id,
428
- triggered_by=node.node_id)
429
-
430
- # Periodic state save
431
- await self.cache.save_execution_state(ctx)
432
-
433
- # Handle workflow failure - cancel remaining tasks
434
- if workflow_failed and pending_tasks:
435
- logger.info("Workflow failed, cancelling remaining tasks",
436
- pending_count=len(pending_tasks))
437
-
438
- for task in pending_tasks:
439
- task.cancel()
440
-
441
- # Wait for cancelled tasks
442
- if pending_tasks:
443
- cancelled_done, _ = await asyncio.wait(
444
- pending_tasks,
445
- return_when=asyncio.ALL_COMPLETED
446
- )
447
-
448
- for task in cancelled_done:
449
- node = task_to_node.get(task)
450
- if node and node.status not in (TaskStatus.COMPLETED, TaskStatus.FAILED):
451
- node.status = TaskStatus.CANCELLED
452
- node.completed_at = time.time()
453
-
454
- ctx.status = WorkflowStatus.FAILED
455
-
456
- # =========================================================================
457
- # PARALLEL EXECUTION (Legacy - Fork/Join with FIRST_COMPLETED pattern)
458
- # =========================================================================
459
-
460
- async def _execute_parallel_nodes(self, ctx: ExecutionContext,
461
- nodes: List[NodeExecution],
462
- enable_caching: bool) -> None:
463
- """Execute multiple nodes in parallel using asyncio.wait with FIRST_COMPLETED.
464
-
465
- Uses the standard asyncio pattern for mixed task types:
466
- - Regular nodes complete quickly
467
- - Trigger nodes wait indefinitely for external events
468
- - If a regular node fails, cancel remaining trigger nodes immediately
469
-
470
- This follows Python asyncio best practices:
471
- https://docs.python.org/3/library/asyncio-task.html#asyncio.wait
472
-
473
- Args:
474
- ctx: ExecutionContext
475
- nodes: List of NodeExecution to run in parallel
476
- enable_caching: Whether to use result caching
477
- """
478
- # Mark all as scheduled
479
- for node in nodes:
480
- node.status = TaskStatus.SCHEDULED
481
- await self._notify_status(node.node_id, "scheduled", {})
482
-
483
- # Create named tasks for parallel execution
484
- # Using dict to track node -> task mapping for proper result handling
485
- node_to_task: Dict[str, asyncio.Task] = {}
486
- task_to_node: Dict[asyncio.Task, NodeExecution] = {}
487
-
488
- for node in nodes:
489
- task = asyncio.create_task(
490
- self._execute_node_with_retry(ctx, node, enable_caching),
491
- name=f"node_{node.node_id}"
492
- )
493
- node_to_task[node.node_id] = task
494
- task_to_node[task] = node
495
-
496
- pending: Set[asyncio.Task] = set(node_to_task.values())
497
- workflow_failed = False
498
-
499
- # Process tasks as they complete using FIRST_COMPLETED pattern
500
- while pending:
501
- # Wait for any task to complete
502
- done, pending = await asyncio.wait(
503
- pending,
504
- return_when=asyncio.FIRST_COMPLETED
505
- )
506
-
507
- # Process completed tasks
508
- for task in done:
509
- node = task_to_node[task]
510
-
511
- try:
512
- result = task.result()
513
-
514
- if isinstance(result, Exception):
515
- # Task raised exception
516
- node.status = TaskStatus.FAILED
517
- node.error = str(result)
518
- node.completed_at = time.time()
519
- ctx.errors.append({
520
- "node_id": node.node_id,
521
- "error": str(result),
522
- "timestamp": time.time(),
523
- })
524
- await self._notify_status(node.node_id, "error", {"error": str(result)})
525
- logger.error("Parallel node failed",
526
- node_id=node.node_id, error=str(result))
527
- workflow_failed = True
528
-
529
- elif result.get("retries_exhausted"):
530
- # Node failed after all retries - already in DLQ
531
- node.status = TaskStatus.FAILED
532
- node.error = result.get("error", "Unknown error")
533
- node.completed_at = time.time()
534
- ctx.errors.append({
535
- "node_id": node.node_id,
536
- "error": node.error,
537
- "retries_exhausted": True,
538
- "timestamp": time.time(),
539
- })
540
- workflow_failed = True
541
-
542
- elif not result.get("success"):
543
- # Node returned failure without exhausting retries
544
- node.status = TaskStatus.FAILED
545
- node.error = result.get("error", "Unknown error")
546
- node.completed_at = time.time()
547
- ctx.errors.append({
548
- "node_id": node.node_id,
549
- "error": node.error,
550
- "timestamp": time.time(),
551
- })
552
- await self._notify_status(node.node_id, "error", {"error": node.error})
553
- logger.error("Parallel node failed",
554
- node_id=node.node_id, error=node.error)
555
- workflow_failed = True
556
-
557
- except asyncio.CancelledError:
558
- # Task was cancelled (by us or externally)
559
- node.status = TaskStatus.CANCELLED
560
- node.completed_at = time.time()
561
- logger.info("Parallel node cancelled", node_id=node.node_id)
562
-
563
- except Exception as e:
564
- # Unexpected exception from task.result()
565
- node.status = TaskStatus.FAILED
566
- node.error = str(e)
567
- node.completed_at = time.time()
568
- ctx.errors.append({
569
- "node_id": node.node_id,
570
- "error": str(e),
571
- "timestamp": time.time(),
572
- })
573
- await self._notify_status(node.node_id, "error", {"error": str(e)})
574
- logger.error("Parallel node exception",
575
- node_id=node.node_id, error=str(e))
576
- workflow_failed = True
577
-
578
- # If workflow failed, cancel remaining pending tasks
579
- # This prevents trigger nodes from blocking forever when a regular node fails
580
- if workflow_failed and pending:
581
- logger.info("Workflow failed, cancelling remaining tasks",
582
- pending_count=len(pending))
583
-
584
- for task in pending:
585
- task.cancel()
586
-
587
- # Wait for cancelled tasks to finish
588
- if pending:
589
- cancelled_done, _ = await asyncio.wait(
590
- pending,
591
- return_when=asyncio.ALL_COMPLETED
592
- )
593
-
594
- # Mark cancelled nodes
595
- for task in cancelled_done:
596
- node = task_to_node[task]
597
- if node.status not in (TaskStatus.COMPLETED, TaskStatus.FAILED):
598
- node.status = TaskStatus.CANCELLED
599
- node.completed_at = time.time()
600
- logger.info("Cancelled pending node", node_id=node.node_id)
601
-
602
- pending = set() # All done now
603
-
604
- # Mark workflow as failed if any node failed
605
- if workflow_failed:
606
- ctx.status = WorkflowStatus.FAILED
607
-
608
- async def _execute_single_node(self, ctx: ExecutionContext,
609
- node: NodeExecution,
610
- enable_caching: bool) -> None:
611
- """Execute a single node with retry logic.
612
-
613
- Args:
614
- ctx: ExecutionContext
615
- node: NodeExecution to run
616
- enable_caching: Whether to use result caching
617
- """
618
- node.status = TaskStatus.SCHEDULED
619
- await self._notify_status(node.node_id, "scheduled", {})
620
-
621
- try:
622
- result = await self._execute_node_with_retry(ctx, node, enable_caching)
623
-
624
- if result.get("retries_exhausted"):
625
- # Node failed after all retries - already in DLQ
626
- node.status = TaskStatus.FAILED
627
- node.error = result.get("error", "Unknown error")
628
- node.completed_at = time.time()
629
- ctx.errors.append({
630
- "node_id": node.node_id,
631
- "error": node.error,
632
- "retries_exhausted": True,
633
- "timestamp": time.time(),
634
- })
635
- ctx.status = WorkflowStatus.FAILED
636
-
637
- except Exception as e:
638
- node.status = TaskStatus.FAILED
639
- node.error = str(e)
640
- node.completed_at = time.time()
641
- ctx.errors.append({
642
- "node_id": node.node_id,
643
- "error": str(e),
644
- "timestamp": time.time(),
645
- })
646
- await self._notify_status(node.node_id, "error", {"error": str(e)})
647
- ctx.status = WorkflowStatus.FAILED
648
-
649
- # =========================================================================
650
- # RETRY LOGIC
651
- # =========================================================================
652
-
653
- async def _execute_node_with_retry(self, ctx: ExecutionContext,
654
- node: NodeExecution,
655
- enable_caching: bool) -> Dict[str, Any]:
656
- """Execute node with retry logic and DLQ on final failure.
657
-
658
- Uses exponential backoff retry policy based on node type.
659
- On exhausted retries, adds entry to Dead Letter Queue.
660
-
661
- Args:
662
- ctx: ExecutionContext
663
- node: NodeExecution to run
664
- enable_caching: Whether to use result caching
665
-
666
- Returns:
667
- Execution result
668
- """
669
- # Get retry policy for this node type
670
- node_data = self._get_node_data(ctx, node.node_id)
671
- custom_policy = node_data.get("parameters", {}).get("retryPolicy")
672
- retry_policy = get_retry_policy(node.node_type, custom_policy)
673
-
674
- last_error = None
675
- inputs = self._gather_node_inputs(ctx, node.node_id)
676
-
677
- for attempt in range(retry_policy.max_attempts):
678
- try:
679
- node.retry_count = attempt
680
- result = await self._execute_node_with_caching(ctx, node, enable_caching)
681
-
682
- # Success - return result
683
- if result.get("success"):
684
- return result
685
-
686
- # Execution returned failure (not exception)
687
- error = result.get("error", "Unknown error")
688
- last_error = error
689
-
690
- # Check if we should retry
691
- if retry_policy.should_retry(error, attempt + 1):
692
- delay = retry_policy.calculate_delay(attempt)
693
- logger.info("Retrying node after failure",
694
- node_id=node.node_id,
695
- attempt=attempt + 1,
696
- max_attempts=retry_policy.max_attempts,
697
- delay=delay,
698
- error=error[:100])
699
-
700
- await self._notify_status(node.node_id, "retrying", {
701
- "attempt": attempt + 1,
702
- "max_attempts": retry_policy.max_attempts,
703
- "delay": delay,
704
- "error": error,
705
- })
706
-
707
- await asyncio.sleep(delay)
708
-
709
- # Reset node status for retry
710
- node.status = TaskStatus.PENDING
711
- node.error = None
712
- continue
713
- else:
714
- # Not retryable, break out
715
- break
716
-
717
- except asyncio.CancelledError:
718
- raise # Propagate cancellation
719
- except Exception as e:
720
- last_error = str(e)
721
- logger.warning("Node execution exception",
722
- node_id=node.node_id,
723
- attempt=attempt + 1,
724
- error=last_error)
725
-
726
- # Check if we should retry
727
- if retry_policy.should_retry(last_error, attempt + 1):
728
- delay = retry_policy.calculate_delay(attempt)
729
- logger.info("Retrying node after exception",
730
- node_id=node.node_id,
731
- attempt=attempt + 1,
732
- delay=delay)
733
-
734
- await asyncio.sleep(delay)
735
- node.status = TaskStatus.PENDING
736
- node.error = None
737
- continue
738
- else:
739
- break
740
-
741
- # All retries exhausted - add to DLQ (handler is no-op if disabled)
742
- await self.dlq.add_failed_node(ctx, node, inputs, last_error or "Unknown error")
743
-
744
- # Return failure result
745
- return {
746
- "success": False,
747
- "error": last_error or "Unknown error",
748
- "retries_exhausted": True,
749
- "retry_count": node.retry_count,
750
- }
751
-
752
- # =========================================================================
753
- # CACHED NODE EXECUTION (Prefect pattern)
754
- # =========================================================================
755
-
756
- async def _execute_node_with_caching(self, ctx: ExecutionContext,
757
- node: NodeExecution,
758
- enable_caching: bool) -> Dict[str, Any]:
759
- """Execute node with result caching (Prefect pattern).
760
-
761
- Args:
762
- ctx: ExecutionContext
763
- node: NodeExecution to run
764
- enable_caching: Whether to check cache
765
-
766
- Returns:
767
- Execution result
768
- """
769
- # Get node parameters and inputs
770
- node_data = self._get_node_data(ctx, node.node_id)
771
- inputs = self._gather_node_inputs(ctx, node.node_id)
772
-
773
- # Check cache first (Prefect pattern)
774
- if enable_caching:
775
- cached_result = await self.cache.get_cached_result(
776
- ctx.execution_id, node.node_id, inputs
777
- )
778
- if cached_result:
779
- logger.info("Cache hit", node_id=node.node_id)
780
- node.status = TaskStatus.CACHED
781
- node.output = cached_result
782
- node.input_hash = hash_inputs(inputs)
783
- node.completed_at = time.time()
784
- ctx.outputs[node.node_id] = cached_result
785
- await self._notify_status(node.node_id, "success",
786
- {"cached": True, **cached_result})
787
- await self.cache.add_event(ctx.execution_id, "node_cached", {
788
- "node_id": node.node_id,
789
- })
790
- return cached_result
791
-
792
- # Execute node
793
- node.status = TaskStatus.RUNNING
794
- node.started_at = time.time()
795
- node.input_hash = hash_inputs(inputs)
796
- await self._notify_status(node.node_id, "executing", {})
797
- await self.cache.add_event(ctx.execution_id, "node_started", {
798
- "node_id": node.node_id,
799
- "node_type": node.node_type,
800
- })
801
-
802
- # Update heartbeat (for crash detection)
803
- await self.cache.update_heartbeat(ctx.execution_id, node.node_id)
804
-
805
- # Build execution context for node handler
806
- # workflow_id is included for per-workflow status scoping (n8n pattern)
807
- exec_context = {
808
- "nodes": ctx.nodes,
809
- "edges": ctx.edges,
810
- "session_id": ctx.session_id,
811
- "execution_id": ctx.execution_id,
812
- "workflow_id": ctx.workflow_id, # For per-workflow status broadcasts
813
- "start_time": node.started_at,
814
- "outputs": ctx.outputs, # Previous node outputs
815
- }
816
-
817
- # Call the actual node executor
818
- result = await self.node_executor(
819
- node.node_id,
820
- node.node_type,
821
- node_data.get("parameters", {}),
822
- exec_context
823
- )
824
-
825
- # Process result
826
- if result.get("success"):
827
- node.status = TaskStatus.COMPLETED
828
- node.output = result.get("result", {})
829
- node.completed_at = time.time()
830
- ctx.outputs[node.node_id] = node.output
831
-
832
- # Cache result (Prefect pattern)
833
- if enable_caching:
834
- await self.cache.set_cached_result(
835
- ctx.execution_id, node.node_id, inputs, node.output
836
- )
837
-
838
- await self._notify_status(node.node_id, "success", node.output)
839
- await self.cache.add_event(ctx.execution_id, "node_completed", {
840
- "node_id": node.node_id,
841
- "execution_time": node.completed_at - node.started_at,
842
- })
843
- else:
844
- node.status = TaskStatus.FAILED
845
- node.error = result.get("error", "Unknown error")
846
- node.completed_at = time.time()
847
- ctx.errors.append({
848
- "node_id": node.node_id,
849
- "error": node.error,
850
- "timestamp": time.time(),
851
- })
852
-
853
- await self._notify_status(node.node_id, "error", {"error": node.error})
854
- await self.cache.add_event(ctx.execution_id, "node_failed", {
855
- "node_id": node.node_id,
856
- "error": node.error,
857
- })
858
-
859
- # Mark workflow as failed
860
- ctx.status = WorkflowStatus.FAILED
861
-
862
- return result
863
-
864
- # =========================================================================
865
- # DAG ANALYSIS
866
- # =========================================================================
867
-
868
- def _compute_execution_layers(self, nodes: List[Dict],
869
- edges: List[Dict]) -> List[List[str]]:
870
- """Compute execution layers for parallel execution.
871
-
872
- Nodes in the same layer have no dependencies on each other
873
- and can execute in parallel. Layer 0 contains trigger nodes
874
- (workflow starting points with no input handles).
875
-
876
- Following n8n pattern: Trigger nodes are the starting point of every
877
- workflow. They listen for specific events/conditions and initiate
878
- the execution of the entire workflow.
879
-
880
- Config nodes and toolkit sub-nodes are excluded from layers since
881
- they don't execute as independent workflow nodes.
882
-
883
- Args:
884
- nodes: List of workflow nodes
885
- edges: List of edges
886
-
887
- Returns:
888
- List of layers, where each layer is a list of node IDs
889
- """
890
- from constants import CONFIG_NODE_TYPES, TOOLKIT_NODE_TYPES
891
-
892
- # Build node type lookup for trigger detection
893
- node_types: Dict[str, str] = {
894
- node["id"]: node.get("type", "unknown") for node in nodes
895
- }
896
-
897
- # Find toolkit sub-nodes (nodes that connect TO a toolkit)
898
- toolkit_node_ids = {n.get("id") for n in nodes if n.get("type") in TOOLKIT_NODE_TYPES}
899
-
900
- # Find AI Agent nodes (both aiAgent and chatAgent have config handles)
901
- ai_agent_node_ids = {n.get("id") for n in nodes if n.get("type") in ('aiAgent', 'chatAgent')}
902
-
903
- subnode_ids: set = set()
904
- for edge in edges:
905
- source = edge.get("source")
906
- target = edge.get("target")
907
- target_handle = edge.get("targetHandle")
908
-
909
- # Any node that connects TO a toolkit is a sub-node
910
- if target in toolkit_node_ids and source:
911
- subnode_ids.add(source)
912
-
913
- # Nodes connected to AI Agent/Chat Agent config handles are sub-nodes
914
- # These handles: input-memory, input-tools, input-skill
915
- if target in ai_agent_node_ids and source and target_handle:
916
- if target_handle in ('input-memory', 'input-tools', 'input-skill'):
917
- subnode_ids.add(source)
918
-
919
- # Filter out config nodes and sub-nodes from execution
920
- excluded_ids = set()
921
- for node in nodes:
922
- node_id = node.get("id")
923
- node_type = node.get("type", "unknown")
924
- if node_type in CONFIG_NODE_TYPES or node_id in subnode_ids:
925
- excluded_ids.add(node_id)
926
-
927
- # Build adjacency and in-degree maps (excluding filtered nodes)
928
- in_degree: Dict[str, int] = defaultdict(int)
929
- adjacency: Dict[str, List[str]] = defaultdict(list)
930
- node_ids = {node["id"] for node in nodes if node["id"] not in excluded_ids}
931
-
932
- for edge in edges:
933
- source = edge.get("source")
934
- target = edge.get("target")
935
- if source in node_ids and target in node_ids:
936
- adjacency[source].append(target)
937
- in_degree[target] += 1
938
-
939
- # Initialize in-degree for all nodes
940
- for node_id in node_ids:
941
- if node_id not in in_degree:
942
- in_degree[node_id] = 0
943
-
944
- # Kahn's algorithm for topological sort with layers
945
- layers = []
946
- remaining = set(node_ids)
947
- is_first_layer = True
948
-
949
- while remaining:
950
- # Find all nodes with in-degree 0 (no dependencies)
951
- layer = [
952
- node_id for node_id in remaining
953
- if in_degree[node_id] == 0
954
- ]
955
-
956
- if not layer:
957
- # Cycle detected or stuck
958
- logger.warning("Cycle detected or no start nodes",
959
- remaining=list(remaining))
960
- # Add remaining as single layer to avoid infinite loop
961
- layers.append(list(remaining))
962
- break
963
-
964
- # For layer 0, validate that starting nodes are trigger nodes
965
- if is_first_layer:
966
- trigger_nodes = []
967
- non_trigger_nodes = []
968
-
969
- for node_id in layer:
970
- node_type = node_types.get(node_id, "unknown")
971
- if is_trigger_node(node_type):
972
- trigger_nodes.append(node_id)
973
- else:
974
- non_trigger_nodes.append(node_id)
975
- logger.warning(
976
- "Non-trigger node found at graph entry point",
977
- node_id=node_id,
978
- node_type=node_type,
979
- expected_types=list(WORKFLOW_TRIGGER_TYPES)
980
- )
981
-
982
- # Log trigger node identification
983
- if trigger_nodes:
984
- logger.info(
985
- "Identified trigger nodes as workflow starting points",
986
- trigger_count=len(trigger_nodes),
987
- trigger_nodes=[
988
- f"{nid[:8]}({node_types.get(nid)})"
989
- for nid in trigger_nodes
990
- ]
991
- )
992
-
993
- is_first_layer = False
994
-
995
- layers.append(layer)
996
-
997
- # Remove layer nodes and update in-degrees
998
- for node_id in layer:
999
- remaining.remove(node_id)
1000
- for successor in adjacency[node_id]:
1001
- in_degree[successor] -= 1
1002
-
1003
- logger.debug("Computed execution layers",
1004
- layer_count=len(layers),
1005
- layers=[[n[:8] for n in l] for l in layers])
1006
-
1007
- return layers
1008
-
1009
- def _find_ready_nodes(self, ctx: ExecutionContext) -> List[NodeExecution]:
1010
- """Find nodes ready to execute (dependencies satisfied + conditions met).
1011
-
1012
- A node is ready if:
1013
- - Status is PENDING
1014
- - Not disabled (n8n-style disable feature)
1015
- - All upstream nodes are COMPLETED, CACHED, or SKIPPED
1016
- - Edge conditions (if any) evaluate to True based on upstream outputs
1017
-
1018
- Supports runtime conditional branching (Prefect-style dynamic workflows).
1019
-
1020
- Args:
1021
- ctx: ExecutionContext
1022
-
1023
- Returns:
1024
- List of NodeExecution ready to run
1025
- """
1026
- from constants import CONFIG_NODE_TYPES
1027
-
1028
- # Build set of completed nodes
1029
- completed = set(ctx.get_completed_nodes())
1030
-
1031
- # Build map of node_id -> node_type for config node detection
1032
- node_types: Dict[str, str] = {}
1033
- for node in ctx.nodes:
1034
- node_types[node.get("id", "")] = node.get("type", "unknown")
1035
-
1036
- # Build dependency map and track conditional edges
1037
- # Skip edges from config nodes (they don't execute, provide config only)
1038
- dependencies: Dict[str, Set[str]] = defaultdict(set)
1039
- conditional_edges: Dict[str, List[Dict]] = defaultdict(list) # target -> edges with conditions
1040
-
1041
- for edge in ctx.edges:
1042
- target = edge.get("target")
1043
- source = edge.get("source")
1044
- if target and source:
1045
- # Skip edges from config nodes - they provide configuration, not execution dependencies
1046
- source_type = node_types.get(source, "unknown")
1047
- if source_type in CONFIG_NODE_TYPES:
1048
- continue
1049
-
1050
- dependencies[target].add(source)
1051
- # Track edges with conditions for evaluation
1052
- if edge.get("data", {}).get("condition"):
1053
- conditional_edges[target].append(edge)
1054
-
1055
- # Find ready nodes
1056
- ready = []
1057
- for node_id, node_exec in ctx.node_executions.items():
1058
- if node_exec.status != TaskStatus.PENDING:
1059
- continue
1060
-
1061
- # Check if all dependencies are satisfied
1062
- deps = dependencies.get(node_id, set())
1063
- if not deps <= completed: # Not all deps completed
1064
- continue
1065
-
1066
- # Check if node is disabled (n8n-style disable)
1067
- node_data = self._get_node_data(ctx, node_id)
1068
- if node_data.get("data", {}).get("disabled"):
1069
- node_exec.status = TaskStatus.SKIPPED
1070
- node_exec.completed_at = time.time()
1071
- logger.debug("Skipping disabled node", node_id=node_id)
1072
- # Notify status callback about skipped node
1073
- asyncio.create_task(self._notify_status(node_id, "skipped", {"disabled": True}))
1074
- continue
1075
-
1076
- # Check conditional edges for this node
1077
- if node_id in conditional_edges:
1078
- # Has conditional incoming edges - evaluate them
1079
- conditions_met = self._evaluate_incoming_conditions(
1080
- ctx, node_id, conditional_edges[node_id]
1081
- )
1082
- if not conditions_met:
1083
- # Mark as SKIPPED if conditions not met and all deps done
1084
- node_exec.status = TaskStatus.SKIPPED
1085
- logger.info("Node skipped due to unmet conditions",
1086
- node_id=node_id)
1087
- continue
1088
-
1089
- ready.append(node_exec)
1090
-
1091
- return ready
1092
-
1093
- def _evaluate_incoming_conditions(self, ctx: ExecutionContext, target_node_id: str,
1094
- edges: List[Dict]) -> bool:
1095
- """Evaluate conditions on incoming edges to determine if node should run.
1096
-
1097
- Args:
1098
- ctx: ExecutionContext
1099
- target_node_id: The node we're checking
1100
- edges: Incoming edges with conditions
1101
-
1102
- Returns:
1103
- True if at least one conditional edge evaluates to True
1104
- """
1105
- for edge in edges:
1106
- source_id = edge.get("source")
1107
- condition = edge.get("data", {}).get("condition")
1108
-
1109
- if not condition:
1110
- continue
1111
-
1112
- # Get output from source node
1113
- source_output = ctx.outputs.get(source_id, {})
1114
-
1115
- # Evaluate condition
1116
- if evaluate_condition(condition, source_output):
1117
- logger.debug("Conditional edge matched",
1118
- source=source_id,
1119
- target=target_node_id,
1120
- condition=condition)
1121
- return True
1122
-
1123
- # No conditions matched
1124
- logger.debug("No conditional edges matched",
1125
- target=target_node_id,
1126
- edge_count=len(edges))
1127
- return False
1128
-
1129
- def _get_node_data(self, ctx: ExecutionContext, node_id: str) -> Dict[str, Any]:
1130
- """Get node data from context.
1131
-
1132
- Args:
1133
- ctx: ExecutionContext
1134
- node_id: Node ID
1135
-
1136
- Returns:
1137
- Node data dict
1138
- """
1139
- for node in ctx.nodes:
1140
- if node.get("id") == node_id:
1141
- return node
1142
- return {}
1143
-
1144
- def _gather_node_inputs(self, ctx: ExecutionContext, node_id: str) -> Dict[str, Any]:
1145
- """Gather inputs for a node from upstream outputs.
1146
-
1147
- Args:
1148
- ctx: ExecutionContext
1149
- node_id: Target node ID
1150
-
1151
- Returns:
1152
- Dict of upstream outputs keyed by source node type
1153
- """
1154
- inputs = {}
1155
- for edge in ctx.edges:
1156
- if edge.get("target") == node_id:
1157
- source_id = edge.get("source")
1158
- if source_id in ctx.outputs:
1159
- # Find source node type
1160
- source_node = self._get_node_data(ctx, source_id)
1161
- source_type = source_node.get("type", source_id)
1162
- inputs[source_type] = ctx.outputs[source_id]
1163
- return inputs
1164
-
1165
- # =========================================================================
1166
- # STATUS NOTIFICATIONS
1167
- # =========================================================================
1168
-
1169
- async def _notify_status(self, node_id: str, status: str,
1170
- data: Dict[str, Any]) -> None:
1171
- """Send status notification via callback.
1172
-
1173
- Args:
1174
- node_id: Node ID
1175
- status: Status string
1176
- data: Additional data
1177
- """
1178
- if self.status_callback:
1179
- try:
1180
- await self.status_callback(node_id, status, data)
1181
- except Exception as e:
1182
- logger.warning("Status callback failed", node_id=node_id, error=str(e))
1183
-
1184
- # =========================================================================
1185
- # RECOVERY
1186
- # =========================================================================
1187
-
1188
- async def recover_execution(self, execution_id: str,
1189
- nodes: List[Dict],
1190
- edges: List[Dict]) -> Optional[Dict[str, Any]]:
1191
- """Recover and resume an interrupted execution.
1192
-
1193
- Args:
1194
- execution_id: Execution ID to recover
1195
- nodes: Workflow nodes
1196
- edges: Workflow edges
1197
-
1198
- Returns:
1199
- Execution result if resumed, None if not found
1200
- """
1201
- ctx = await self.cache.load_execution_state(execution_id, nodes, edges)
1202
- if not ctx:
1203
- logger.warning("Execution not found for recovery", execution_id=execution_id)
1204
- return None
1205
-
1206
- if ctx.status != WorkflowStatus.RUNNING:
1207
- logger.info("Execution already complete", execution_id=execution_id,
1208
- status=ctx.status.value)
1209
- return {
1210
- "success": ctx.status == WorkflowStatus.COMPLETED,
1211
- "execution_id": execution_id,
1212
- "status": ctx.status.value,
1213
- "recovered": False,
1214
- }
1215
-
1216
- logger.info("Recovering execution",
1217
- execution_id=execution_id,
1218
- checkpoints=ctx.checkpoints)
1219
-
1220
- # Reset any RUNNING nodes to PENDING (they were interrupted)
1221
- for node_exec in ctx.node_executions.values():
1222
- if node_exec.status == TaskStatus.RUNNING:
1223
- node_exec.status = TaskStatus.PENDING
1224
- node_exec.started_at = None
1225
-
1226
- # Track in memory
1227
- self._active_contexts[ctx.execution_id] = ctx
1228
-
1229
- # Resume decide loop
1230
- try:
1231
- await self._workflow_decide(ctx, enable_caching=True)
1232
-
1233
- if ctx.all_nodes_complete():
1234
- ctx.status = WorkflowStatus.COMPLETED
1235
- elif ctx.errors:
1236
- ctx.status = WorkflowStatus.FAILED
1237
-
1238
- ctx.completed_at = time.time()
1239
- await self.cache.save_execution_state(ctx)
1240
-
1241
- return {
1242
- "success": ctx.status == WorkflowStatus.COMPLETED,
1243
- "execution_id": ctx.execution_id,
1244
- "status": ctx.status.value,
1245
- "recovered": True,
1246
- "outputs": ctx.outputs,
1247
- }
1248
-
1249
- finally:
1250
- self._active_contexts.pop(ctx.execution_id, None)
1251
-
1252
- async def get_active_executions(self) -> List[str]:
1253
- """Get list of active execution IDs.
1254
-
1255
- Returns:
1256
- List of execution IDs currently running
1257
- """
1258
- return list(self._active_contexts.keys())
1259
-
1260
- # =========================================================================
1261
- # DLQ REPLAY
1262
- # =========================================================================
1263
-
1264
- async def replay_dlq_entry(self, entry_id: str,
1265
- nodes: List[Dict],
1266
- edges: List[Dict]) -> Dict[str, Any]:
1267
- """Replay a failed node from the Dead Letter Queue.
1268
-
1269
- Creates a new execution context and attempts to re-execute the failed node.
1270
-
1271
- Args:
1272
- entry_id: DLQ entry ID to replay
1273
- nodes: Workflow nodes
1274
- edges: Workflow edges
1275
-
1276
- Returns:
1277
- Execution result dict
1278
- """
1279
- # Get DLQ entry
1280
- entry = await self.cache.get_dlq_entry(entry_id)
1281
- if not entry:
1282
- return {
1283
- "success": False,
1284
- "error": f"DLQ entry not found: {entry_id}",
1285
- }
1286
-
1287
- logger.info("Replaying DLQ entry",
1288
- entry_id=entry_id,
1289
- node_id=entry.node_id,
1290
- node_type=entry.node_type,
1291
- original_execution=entry.execution_id)
1292
-
1293
- # Create new execution context for replay
1294
- ctx = ExecutionContext.create(
1295
- workflow_id=entry.workflow_id,
1296
- session_id="dlq_replay",
1297
- nodes=nodes,
1298
- edges=edges,
1299
- )
1300
-
1301
- # Get the node execution
1302
- node_exec = ctx.node_executions.get(entry.node_id)
1303
- if not node_exec:
1304
- return {
1305
- "success": False,
1306
- "error": f"Node not found in workflow: {entry.node_id}",
1307
- }
1308
-
1309
- # Set up context with stored inputs
1310
- ctx.outputs = entry.inputs # Restore input state
1311
-
1312
- ctx.status = WorkflowStatus.RUNNING
1313
- ctx.started_at = time.time()
1314
- self._active_contexts[ctx.execution_id] = ctx
1315
-
1316
- try:
1317
- # Execute the single node with retry
1318
- await self._execute_single_node(ctx, node_exec, enable_caching=False)
1319
-
1320
- if node_exec.status == TaskStatus.COMPLETED:
1321
- # Success - remove from DLQ
1322
- await self.cache.remove_from_dlq(entry_id)
1323
- logger.info("DLQ replay succeeded",
1324
- entry_id=entry_id,
1325
- node_id=entry.node_id)
1326
-
1327
- return {
1328
- "success": True,
1329
- "execution_id": ctx.execution_id,
1330
- "node_id": entry.node_id,
1331
- "result": node_exec.output,
1332
- "removed_from_dlq": True,
1333
- }
1334
- else:
1335
- # Still failing - update DLQ entry
1336
- await self.cache.update_dlq_entry(
1337
- entry_id,
1338
- entry.retry_count + 1,
1339
- node_exec.error or "Unknown error"
1340
- )
1341
-
1342
- return {
1343
- "success": False,
1344
- "execution_id": ctx.execution_id,
1345
- "node_id": entry.node_id,
1346
- "error": node_exec.error,
1347
- "retry_count": entry.retry_count + 1,
1348
- }
1349
-
1350
- finally:
1351
- self._active_contexts.pop(ctx.execution_id, None)
1
+ """Workflow executor with Conductor decide pattern and parallel execution.
2
+
3
+ Implements:
4
+ - Conductor-style workflow_decide() for orchestration
5
+ - Prefect-style task caching for idempotency
6
+ - Fork/Join parallel execution with asyncio.wait (FIRST_COMPLETED pattern)
7
+ - Dynamic workflow branching at runtime
8
+ - Proper handling of long-running trigger nodes in parallel batches
9
+ """
10
+
11
+ import asyncio
12
+ import time
13
+ from collections import defaultdict
14
+ from datetime import datetime
15
+ from typing import Dict, Any, List, Optional, Callable, Awaitable, Set
16
+
17
+ from core.logging import get_logger
18
+ from constants import WORKFLOW_TRIGGER_TYPES
19
+ from .models import (
20
+ ExecutionContext,
21
+ TaskStatus,
22
+ WorkflowStatus,
23
+ NodeExecution,
24
+ hash_inputs,
25
+ RetryPolicy,
26
+ get_retry_policy,
27
+ )
28
+ from .cache import ExecutionCache
29
+ from .conditions import evaluate_condition, decide_next_edges
30
+ from .dlq import create_dlq_handler, DLQHandlerProtocol, NullDLQHandler
31
+
32
+ logger = get_logger(__name__)
33
+
34
+
35
+ def is_trigger_node(node_type: str) -> bool:
36
+ """Check if a node type is a trigger node (workflow starting point).
37
+
38
+ Trigger nodes have no input handles and serve as entry points for workflows.
39
+ They are identified by WORKFLOW_TRIGGER_TYPES in constants.py.
40
+
41
+ Args:
42
+ node_type: The node type string
43
+
44
+ Returns:
45
+ True if the node is a trigger type
46
+ """
47
+ return node_type in WORKFLOW_TRIGGER_TYPES
48
+
49
+
50
+ class WorkflowExecutor:
51
+ """Executes workflows using Conductor decide pattern with parallel execution.
52
+
53
+ Features:
54
+ - Isolated ExecutionContext per workflow run
55
+ - Parallel execution of independent nodes (Fork/Join)
56
+ - Result caching for idempotency (Prefect pattern)
57
+ - Distributed locking to prevent race conditions
58
+ - Event history for debugging and recovery
59
+ """
60
+
61
+ def __init__(self, cache: ExecutionCache,
62
+ node_executor: Callable[[str, str, Dict, Dict], Awaitable[Dict]],
63
+ status_callback: Callable[[str, str, Dict], Awaitable[None]] = None,
64
+ dlq_enabled: bool = False):
65
+ """Initialize executor.
66
+
67
+ Args:
68
+ cache: ExecutionCache for Redis persistence
69
+ node_executor: Async function to execute a single node
70
+ Signature: async def execute(node_id, node_type, params, context) -> result
71
+ status_callback: Optional async callback for status updates
72
+ Signature: async def callback(node_id, status, data)
73
+ dlq_enabled: Whether to add failed nodes to Dead Letter Queue
74
+ """
75
+ self.cache = cache
76
+ self.node_executor = node_executor
77
+ self.status_callback = status_callback
78
+
79
+ # Create DLQ handler (modular - uses Null Object pattern when disabled)
80
+ self.dlq = create_dlq_handler(cache, enabled=dlq_enabled)
81
+
82
+ # Active executions (in-memory for fast lookup)
83
+ self._active_contexts: Dict[str, ExecutionContext] = {}
84
+
85
+ # =========================================================================
86
+ # EXECUTION ENTRY POINTS
87
+ # =========================================================================
88
+
89
+ async def execute_workflow(self, workflow_id: str, nodes: List[Dict],
90
+ edges: List[Dict], session_id: str = "default",
91
+ enable_caching: bool = True) -> Dict[str, Any]:
92
+ """Execute a workflow with parallel node execution.
93
+
94
+ Args:
95
+ workflow_id: Workflow identifier
96
+ nodes: List of workflow nodes
97
+ edges: List of edges connecting nodes
98
+ session_id: Session identifier
99
+ enable_caching: Whether to use result caching
100
+
101
+ Returns:
102
+ Execution result dict
103
+ """
104
+ start_time = time.time()
105
+
106
+ # Create isolated execution context
107
+ ctx = ExecutionContext.create(
108
+ workflow_id=workflow_id,
109
+ session_id=session_id,
110
+ nodes=nodes,
111
+ edges=edges,
112
+ )
113
+
114
+ # Compute execution layers (for parallel batches)
115
+ ctx.execution_order = self._compute_execution_layers(nodes, edges)
116
+
117
+ logger.info("Starting workflow execution",
118
+ execution_id=ctx.execution_id,
119
+ workflow_id=workflow_id,
120
+ node_count=len(nodes),
121
+ layers=len(ctx.execution_order))
122
+
123
+ # Track in memory
124
+ self._active_contexts[ctx.execution_id] = ctx
125
+
126
+ # Persist initial state
127
+ ctx.status = WorkflowStatus.RUNNING
128
+ ctx.started_at = time.time()
129
+ await self.cache.save_execution_state(ctx)
130
+
131
+ # Add workflow_started event
132
+ await self.cache.add_event(ctx.execution_id, "workflow_started", {
133
+ "workflow_id": workflow_id,
134
+ "node_count": len(nodes),
135
+ })
136
+
137
+ try:
138
+ # Run the decide loop
139
+ await self._workflow_decide(ctx, enable_caching)
140
+
141
+ # Determine final status
142
+ if ctx.all_nodes_complete():
143
+ ctx.status = WorkflowStatus.COMPLETED
144
+ elif ctx.errors:
145
+ ctx.status = WorkflowStatus.FAILED
146
+
147
+ ctx.completed_at = time.time()
148
+ await self.cache.save_execution_state(ctx)
149
+
150
+ # Add workflow_completed event
151
+ await self.cache.add_event(ctx.execution_id, "workflow_completed", {
152
+ "status": ctx.status.value,
153
+ "completed_nodes": len(ctx.get_completed_nodes()),
154
+ "execution_time": ctx.completed_at - ctx.started_at,
155
+ })
156
+
157
+ return {
158
+ "success": ctx.status == WorkflowStatus.COMPLETED,
159
+ "execution_id": ctx.execution_id,
160
+ "status": ctx.status.value,
161
+ "nodes_executed": ctx.get_completed_nodes(),
162
+ "outputs": ctx.outputs,
163
+ "errors": ctx.errors,
164
+ "execution_time": time.time() - start_time,
165
+ "timestamp": datetime.now().isoformat(),
166
+ }
167
+
168
+ except asyncio.CancelledError:
169
+ ctx.status = WorkflowStatus.CANCELLED
170
+ ctx.completed_at = time.time()
171
+ await self.cache.save_execution_state(ctx)
172
+ await self.cache.add_event(ctx.execution_id, "workflow_cancelled", {})
173
+ return {
174
+ "success": False,
175
+ "execution_id": ctx.execution_id,
176
+ "status": "cancelled",
177
+ "error": "Cancelled by user",
178
+ "execution_time": time.time() - start_time,
179
+ }
180
+
181
+ except Exception as e:
182
+ logger.error("Workflow execution failed", execution_id=ctx.execution_id,
183
+ error=str(e))
184
+ ctx.status = WorkflowStatus.FAILED
185
+ ctx.errors.append({"error": str(e), "timestamp": time.time()})
186
+ await self.cache.save_execution_state(ctx)
187
+ await self.cache.add_event(ctx.execution_id, "workflow_failed", {
188
+ "error": str(e),
189
+ })
190
+ return {
191
+ "success": False,
192
+ "execution_id": ctx.execution_id,
193
+ "status": "failed",
194
+ "error": str(e),
195
+ "execution_time": time.time() - start_time,
196
+ }
197
+
198
+ finally:
199
+ # Cleanup
200
+ self._active_contexts.pop(ctx.execution_id, None)
201
+
202
+ async def cancel_execution(self, execution_id: str) -> bool:
203
+ """Cancel a running execution.
204
+
205
+ Args:
206
+ execution_id: Execution to cancel
207
+
208
+ Returns:
209
+ True if cancelled successfully
210
+ """
211
+ ctx = self._active_contexts.get(execution_id)
212
+ if ctx:
213
+ ctx.status = WorkflowStatus.CANCELLED
214
+ for node_exec in ctx.node_executions.values():
215
+ if node_exec.status in (TaskStatus.PENDING, TaskStatus.SCHEDULED,
216
+ TaskStatus.RUNNING, TaskStatus.WAITING):
217
+ node_exec.status = TaskStatus.CANCELLED
218
+ await self.cache.save_execution_state(ctx)
219
+ logger.info("Execution cancelled", execution_id=execution_id)
220
+ return True
221
+ return False
222
+
223
+ # =========================================================================
224
+ # CONDUCTOR DECIDE PATTERN
225
+ # =========================================================================
226
+
227
+ async def _workflow_decide(self, ctx: ExecutionContext,
228
+ enable_caching: bool = True) -> None:
229
+ """Core orchestration loop - Conductor's decide pattern.
230
+
231
+ Evaluates current state, finds ready nodes, executes them in parallel,
232
+ then recurses until all nodes complete or error occurs.
233
+
234
+ Args:
235
+ ctx: ExecutionContext to process
236
+ enable_caching: Whether to use result caching
237
+ """
238
+ # Distributed lock prevents concurrent decides for same execution
239
+ try:
240
+ async with self.cache.distributed_lock(
241
+ f"execution:{ctx.execution_id}:decide", timeout=60
242
+ ):
243
+ await self._decide_iteration(ctx, enable_caching)
244
+ except TimeoutError:
245
+ logger.warning("Could not acquire decide lock",
246
+ execution_id=ctx.execution_id)
247
+ # Retry after short delay
248
+ await asyncio.sleep(0.5)
249
+ await self._workflow_decide(ctx, enable_caching)
250
+
251
+ async def _decide_iteration(self, ctx: ExecutionContext,
252
+ enable_caching: bool) -> None:
253
+ """Continuous scheduling loop - Temporal/Conductor pattern.
254
+
255
+ When any node completes, immediately check for newly-ready dependents
256
+ and start them without waiting for entire layer to complete.
257
+
258
+ Example: Cron3 (5s) completes -> immediately start WS3,
259
+ even while Cron1 (20s) is still running.
260
+ """
261
+ # Check if cancelled
262
+ if ctx.status == WorkflowStatus.CANCELLED:
263
+ return
264
+
265
+ # Find initial ready nodes
266
+ ready_nodes = self._find_ready_nodes(ctx)
267
+
268
+ if not ready_nodes:
269
+ if ctx.all_nodes_complete():
270
+ logger.info("All nodes complete", execution_id=ctx.execution_id)
271
+ else:
272
+ pending = ctx.get_pending_nodes()
273
+ if pending:
274
+ logger.warning("Stuck: pending nodes with unsatisfied deps",
275
+ execution_id=ctx.execution_id,
276
+ pending=pending)
277
+ return
278
+
279
+ logger.info("Starting continuous execution",
280
+ execution_id=ctx.execution_id,
281
+ initial_batch=len(ready_nodes),
282
+ nodes=[n.node_id for n in ready_nodes])
283
+
284
+ # Execute with continuous scheduling - new pattern
285
+ await self._execute_with_continuous_scheduling(ctx, ready_nodes, enable_caching)
286
+
287
+ # Save final state
288
+ await self.cache.save_execution_state(ctx)
289
+
290
+ # =========================================================================
291
+ # CONTINUOUS SCHEDULING (Temporal/Conductor Pattern)
292
+ # =========================================================================
293
+
294
+ async def _execute_with_continuous_scheduling(
295
+ self,
296
+ ctx: ExecutionContext,
297
+ initial_nodes: List[NodeExecution],
298
+ enable_caching: bool
299
+ ) -> None:
300
+ """Execute workflow with continuous scheduling.
301
+
302
+ Modern pattern: When any node completes, immediately check for and start
303
+ newly-ready dependent nodes. This enables true parallel pipelines where
304
+ each path progresses independently.
305
+
306
+ Uses asyncio.wait(FIRST_COMPLETED) to process completions immediately.
307
+
308
+ Args:
309
+ ctx: ExecutionContext
310
+ initial_nodes: Initial batch of ready nodes
311
+ enable_caching: Whether to use result caching
312
+ """
313
+ # Track all running tasks: task -> NodeExecution
314
+ task_to_node: Dict[asyncio.Task, NodeExecution] = {}
315
+ pending_tasks: Set[asyncio.Task] = set()
316
+ workflow_failed = False
317
+
318
+ def create_node_task(node: NodeExecution) -> asyncio.Task:
319
+ """Create and track a task for node execution."""
320
+ node.status = TaskStatus.SCHEDULED
321
+ task = asyncio.create_task(
322
+ self._execute_node_with_retry(ctx, node, enable_caching),
323
+ name=f"node_{node.node_id}"
324
+ )
325
+ task_to_node[task] = node
326
+ pending_tasks.add(task)
327
+ return task
328
+
329
+ # Start initial nodes
330
+ for node in initial_nodes:
331
+ create_node_task(node)
332
+ await self._notify_status(node.node_id, "scheduled", {})
333
+ logger.info("Scheduled node", node_id=node.node_id)
334
+
335
+ # Process completions and schedule new nodes continuously
336
+ while pending_tasks and not workflow_failed:
337
+ if ctx.status == WorkflowStatus.CANCELLED:
338
+ # Cancel all pending tasks
339
+ for task in pending_tasks:
340
+ task.cancel()
341
+ break
342
+
343
+ # Wait for ANY task to complete
344
+ done, pending_tasks = await asyncio.wait(
345
+ pending_tasks,
346
+ return_when=asyncio.FIRST_COMPLETED
347
+ )
348
+
349
+ # Process each completed task
350
+ for task in done:
351
+ node = task_to_node[task]
352
+ newly_ready = []
353
+
354
+ try:
355
+ result = task.result()
356
+
357
+ if isinstance(result, Exception):
358
+ node.status = TaskStatus.FAILED
359
+ node.error = str(result)
360
+ node.completed_at = time.time()
361
+ ctx.errors.append({
362
+ "node_id": node.node_id,
363
+ "error": str(result),
364
+ "timestamp": time.time(),
365
+ })
366
+ await self._notify_status(node.node_id, "error", {"error": str(result)})
367
+ logger.error("Node failed", node_id=node.node_id, error=str(result))
368
+ workflow_failed = True
369
+
370
+ elif result.get("retries_exhausted"):
371
+ node.status = TaskStatus.FAILED
372
+ node.error = result.get("error", "Unknown error")
373
+ node.completed_at = time.time()
374
+ ctx.errors.append({
375
+ "node_id": node.node_id,
376
+ "error": node.error,
377
+ "retries_exhausted": True,
378
+ "timestamp": time.time(),
379
+ })
380
+ workflow_failed = True
381
+
382
+ elif not result.get("success"):
383
+ node.status = TaskStatus.FAILED
384
+ node.error = result.get("error", "Unknown error")
385
+ node.completed_at = time.time()
386
+ ctx.errors.append({
387
+ "node_id": node.node_id,
388
+ "error": node.error,
389
+ "timestamp": time.time(),
390
+ })
391
+ await self._notify_status(node.node_id, "error", {"error": node.error})
392
+ logger.error("Node failed", node_id=node.node_id, error=node.error)
393
+ workflow_failed = True
394
+
395
+ else:
396
+ # Success - checkpoint and find newly ready nodes
397
+ ctx.add_checkpoint(node.node_id)
398
+ logger.info("Node completed", node_id=node.node_id)
399
+
400
+ # Find nodes that are now ready (their dependencies just completed)
401
+ newly_ready = self._find_ready_nodes(ctx)
402
+
403
+ except asyncio.CancelledError:
404
+ node.status = TaskStatus.CANCELLED
405
+ node.completed_at = time.time()
406
+ logger.info("Node cancelled", node_id=node.node_id)
407
+
408
+ except Exception as e:
409
+ node.status = TaskStatus.FAILED
410
+ node.error = str(e)
411
+ node.completed_at = time.time()
412
+ ctx.errors.append({
413
+ "node_id": node.node_id,
414
+ "error": str(e),
415
+ "timestamp": time.time(),
416
+ })
417
+ await self._notify_status(node.node_id, "error", {"error": str(e)})
418
+ logger.error("Node exception", node_id=node.node_id, error=str(e))
419
+ workflow_failed = True
420
+
421
+ # Schedule newly ready nodes immediately
422
+ if newly_ready and not workflow_failed:
423
+ for ready_node in newly_ready:
424
+ create_node_task(ready_node)
425
+ await self._notify_status(ready_node.node_id, "scheduled", {})
426
+ logger.info("Scheduled dependent node",
427
+ node_id=ready_node.node_id,
428
+ triggered_by=node.node_id)
429
+
430
+ # Periodic state save
431
+ await self.cache.save_execution_state(ctx)
432
+
433
+ # Handle workflow failure - cancel remaining tasks
434
+ if workflow_failed and pending_tasks:
435
+ logger.info("Workflow failed, cancelling remaining tasks",
436
+ pending_count=len(pending_tasks))
437
+
438
+ for task in pending_tasks:
439
+ task.cancel()
440
+
441
+ # Wait for cancelled tasks
442
+ if pending_tasks:
443
+ cancelled_done, _ = await asyncio.wait(
444
+ pending_tasks,
445
+ return_when=asyncio.ALL_COMPLETED
446
+ )
447
+
448
+ for task in cancelled_done:
449
+ node = task_to_node.get(task)
450
+ if node and node.status not in (TaskStatus.COMPLETED, TaskStatus.FAILED):
451
+ node.status = TaskStatus.CANCELLED
452
+ node.completed_at = time.time()
453
+
454
+ ctx.status = WorkflowStatus.FAILED
455
+
456
+ # =========================================================================
457
+ # PARALLEL EXECUTION (Legacy - Fork/Join with FIRST_COMPLETED pattern)
458
+ # =========================================================================
459
+
460
+ async def _execute_parallel_nodes(self, ctx: ExecutionContext,
461
+ nodes: List[NodeExecution],
462
+ enable_caching: bool) -> None:
463
+ """Execute multiple nodes in parallel using asyncio.wait with FIRST_COMPLETED.
464
+
465
+ Uses the standard asyncio pattern for mixed task types:
466
+ - Regular nodes complete quickly
467
+ - Trigger nodes wait indefinitely for external events
468
+ - If a regular node fails, cancel remaining trigger nodes immediately
469
+
470
+ This follows Python asyncio best practices:
471
+ https://docs.python.org/3/library/asyncio-task.html#asyncio.wait
472
+
473
+ Args:
474
+ ctx: ExecutionContext
475
+ nodes: List of NodeExecution to run in parallel
476
+ enable_caching: Whether to use result caching
477
+ """
478
+ # Mark all as scheduled
479
+ for node in nodes:
480
+ node.status = TaskStatus.SCHEDULED
481
+ await self._notify_status(node.node_id, "scheduled", {})
482
+
483
+ # Create named tasks for parallel execution
484
+ # Using dict to track node -> task mapping for proper result handling
485
+ node_to_task: Dict[str, asyncio.Task] = {}
486
+ task_to_node: Dict[asyncio.Task, NodeExecution] = {}
487
+
488
+ for node in nodes:
489
+ task = asyncio.create_task(
490
+ self._execute_node_with_retry(ctx, node, enable_caching),
491
+ name=f"node_{node.node_id}"
492
+ )
493
+ node_to_task[node.node_id] = task
494
+ task_to_node[task] = node
495
+
496
+ pending: Set[asyncio.Task] = set(node_to_task.values())
497
+ workflow_failed = False
498
+
499
+ # Process tasks as they complete using FIRST_COMPLETED pattern
500
+ while pending:
501
+ # Wait for any task to complete
502
+ done, pending = await asyncio.wait(
503
+ pending,
504
+ return_when=asyncio.FIRST_COMPLETED
505
+ )
506
+
507
+ # Process completed tasks
508
+ for task in done:
509
+ node = task_to_node[task]
510
+
511
+ try:
512
+ result = task.result()
513
+
514
+ if isinstance(result, Exception):
515
+ # Task raised exception
516
+ node.status = TaskStatus.FAILED
517
+ node.error = str(result)
518
+ node.completed_at = time.time()
519
+ ctx.errors.append({
520
+ "node_id": node.node_id,
521
+ "error": str(result),
522
+ "timestamp": time.time(),
523
+ })
524
+ await self._notify_status(node.node_id, "error", {"error": str(result)})
525
+ logger.error("Parallel node failed",
526
+ node_id=node.node_id, error=str(result))
527
+ workflow_failed = True
528
+
529
+ elif result.get("retries_exhausted"):
530
+ # Node failed after all retries - already in DLQ
531
+ node.status = TaskStatus.FAILED
532
+ node.error = result.get("error", "Unknown error")
533
+ node.completed_at = time.time()
534
+ ctx.errors.append({
535
+ "node_id": node.node_id,
536
+ "error": node.error,
537
+ "retries_exhausted": True,
538
+ "timestamp": time.time(),
539
+ })
540
+ workflow_failed = True
541
+
542
+ elif not result.get("success"):
543
+ # Node returned failure without exhausting retries
544
+ node.status = TaskStatus.FAILED
545
+ node.error = result.get("error", "Unknown error")
546
+ node.completed_at = time.time()
547
+ ctx.errors.append({
548
+ "node_id": node.node_id,
549
+ "error": node.error,
550
+ "timestamp": time.time(),
551
+ })
552
+ await self._notify_status(node.node_id, "error", {"error": node.error})
553
+ logger.error("Parallel node failed",
554
+ node_id=node.node_id, error=node.error)
555
+ workflow_failed = True
556
+
557
+ except asyncio.CancelledError:
558
+ # Task was cancelled (by us or externally)
559
+ node.status = TaskStatus.CANCELLED
560
+ node.completed_at = time.time()
561
+ logger.info("Parallel node cancelled", node_id=node.node_id)
562
+
563
+ except Exception as e:
564
+ # Unexpected exception from task.result()
565
+ node.status = TaskStatus.FAILED
566
+ node.error = str(e)
567
+ node.completed_at = time.time()
568
+ ctx.errors.append({
569
+ "node_id": node.node_id,
570
+ "error": str(e),
571
+ "timestamp": time.time(),
572
+ })
573
+ await self._notify_status(node.node_id, "error", {"error": str(e)})
574
+ logger.error("Parallel node exception",
575
+ node_id=node.node_id, error=str(e))
576
+ workflow_failed = True
577
+
578
+ # If workflow failed, cancel remaining pending tasks
579
+ # This prevents trigger nodes from blocking forever when a regular node fails
580
+ if workflow_failed and pending:
581
+ logger.info("Workflow failed, cancelling remaining tasks",
582
+ pending_count=len(pending))
583
+
584
+ for task in pending:
585
+ task.cancel()
586
+
587
+ # Wait for cancelled tasks to finish
588
+ if pending:
589
+ cancelled_done, _ = await asyncio.wait(
590
+ pending,
591
+ return_when=asyncio.ALL_COMPLETED
592
+ )
593
+
594
+ # Mark cancelled nodes
595
+ for task in cancelled_done:
596
+ node = task_to_node[task]
597
+ if node.status not in (TaskStatus.COMPLETED, TaskStatus.FAILED):
598
+ node.status = TaskStatus.CANCELLED
599
+ node.completed_at = time.time()
600
+ logger.info("Cancelled pending node", node_id=node.node_id)
601
+
602
+ pending = set() # All done now
603
+
604
+ # Mark workflow as failed if any node failed
605
+ if workflow_failed:
606
+ ctx.status = WorkflowStatus.FAILED
607
+
608
+ async def _execute_single_node(self, ctx: ExecutionContext,
609
+ node: NodeExecution,
610
+ enable_caching: bool) -> None:
611
+ """Execute a single node with retry logic.
612
+
613
+ Args:
614
+ ctx: ExecutionContext
615
+ node: NodeExecution to run
616
+ enable_caching: Whether to use result caching
617
+ """
618
+ node.status = TaskStatus.SCHEDULED
619
+ await self._notify_status(node.node_id, "scheduled", {})
620
+
621
+ try:
622
+ result = await self._execute_node_with_retry(ctx, node, enable_caching)
623
+
624
+ if result.get("retries_exhausted"):
625
+ # Node failed after all retries - already in DLQ
626
+ node.status = TaskStatus.FAILED
627
+ node.error = result.get("error", "Unknown error")
628
+ node.completed_at = time.time()
629
+ ctx.errors.append({
630
+ "node_id": node.node_id,
631
+ "error": node.error,
632
+ "retries_exhausted": True,
633
+ "timestamp": time.time(),
634
+ })
635
+ ctx.status = WorkflowStatus.FAILED
636
+
637
+ except Exception as e:
638
+ node.status = TaskStatus.FAILED
639
+ node.error = str(e)
640
+ node.completed_at = time.time()
641
+ ctx.errors.append({
642
+ "node_id": node.node_id,
643
+ "error": str(e),
644
+ "timestamp": time.time(),
645
+ })
646
+ await self._notify_status(node.node_id, "error", {"error": str(e)})
647
+ ctx.status = WorkflowStatus.FAILED
648
+
649
+ # =========================================================================
650
+ # RETRY LOGIC
651
+ # =========================================================================
652
+
653
+ async def _execute_node_with_retry(self, ctx: ExecutionContext,
654
+ node: NodeExecution,
655
+ enable_caching: bool) -> Dict[str, Any]:
656
+ """Execute node with retry logic and DLQ on final failure.
657
+
658
+ Uses exponential backoff retry policy based on node type.
659
+ On exhausted retries, adds entry to Dead Letter Queue.
660
+
661
+ Args:
662
+ ctx: ExecutionContext
663
+ node: NodeExecution to run
664
+ enable_caching: Whether to use result caching
665
+
666
+ Returns:
667
+ Execution result
668
+ """
669
+ # Get retry policy for this node type
670
+ node_data = self._get_node_data(ctx, node.node_id)
671
+ custom_policy = node_data.get("parameters", {}).get("retryPolicy")
672
+ retry_policy = get_retry_policy(node.node_type, custom_policy)
673
+
674
+ last_error = None
675
+ inputs = self._gather_node_inputs(ctx, node.node_id)
676
+
677
+ for attempt in range(retry_policy.max_attempts):
678
+ try:
679
+ node.retry_count = attempt
680
+ result = await self._execute_node_with_caching(ctx, node, enable_caching)
681
+
682
+ # Success - return result
683
+ if result.get("success"):
684
+ return result
685
+
686
+ # Execution returned failure (not exception)
687
+ error = result.get("error", "Unknown error")
688
+ last_error = error
689
+
690
+ # Check if we should retry
691
+ if retry_policy.should_retry(error, attempt + 1):
692
+ delay = retry_policy.calculate_delay(attempt)
693
+ logger.info("Retrying node after failure",
694
+ node_id=node.node_id,
695
+ attempt=attempt + 1,
696
+ max_attempts=retry_policy.max_attempts,
697
+ delay=delay,
698
+ error=error[:100])
699
+
700
+ await self._notify_status(node.node_id, "retrying", {
701
+ "attempt": attempt + 1,
702
+ "max_attempts": retry_policy.max_attempts,
703
+ "delay": delay,
704
+ "error": error,
705
+ })
706
+
707
+ await asyncio.sleep(delay)
708
+
709
+ # Reset node status for retry
710
+ node.status = TaskStatus.PENDING
711
+ node.error = None
712
+ continue
713
+ else:
714
+ # Not retryable, break out
715
+ break
716
+
717
+ except asyncio.CancelledError:
718
+ raise # Propagate cancellation
719
+ except Exception as e:
720
+ last_error = str(e)
721
+ logger.warning("Node execution exception",
722
+ node_id=node.node_id,
723
+ attempt=attempt + 1,
724
+ error=last_error)
725
+
726
+ # Check if we should retry
727
+ if retry_policy.should_retry(last_error, attempt + 1):
728
+ delay = retry_policy.calculate_delay(attempt)
729
+ logger.info("Retrying node after exception",
730
+ node_id=node.node_id,
731
+ attempt=attempt + 1,
732
+ delay=delay)
733
+
734
+ await asyncio.sleep(delay)
735
+ node.status = TaskStatus.PENDING
736
+ node.error = None
737
+ continue
738
+ else:
739
+ break
740
+
741
+ # All retries exhausted - add to DLQ (handler is no-op if disabled)
742
+ await self.dlq.add_failed_node(ctx, node, inputs, last_error or "Unknown error")
743
+
744
+ # Return failure result
745
+ return {
746
+ "success": False,
747
+ "error": last_error or "Unknown error",
748
+ "retries_exhausted": True,
749
+ "retry_count": node.retry_count,
750
+ }
751
+
752
+ # =========================================================================
753
+ # CACHED NODE EXECUTION (Prefect pattern)
754
+ # =========================================================================
755
+
756
+ async def _execute_node_with_caching(self, ctx: ExecutionContext,
757
+ node: NodeExecution,
758
+ enable_caching: bool) -> Dict[str, Any]:
759
+ """Execute node with result caching (Prefect pattern).
760
+
761
+ Args:
762
+ ctx: ExecutionContext
763
+ node: NodeExecution to run
764
+ enable_caching: Whether to check cache
765
+
766
+ Returns:
767
+ Execution result
768
+ """
769
+ # Get node parameters and inputs
770
+ node_data = self._get_node_data(ctx, node.node_id)
771
+ inputs = self._gather_node_inputs(ctx, node.node_id)
772
+
773
+ # Check cache first (Prefect pattern)
774
+ if enable_caching:
775
+ cached_result = await self.cache.get_cached_result(
776
+ ctx.execution_id, node.node_id, inputs
777
+ )
778
+ if cached_result:
779
+ logger.info("Cache hit", node_id=node.node_id)
780
+ node.status = TaskStatus.CACHED
781
+ node.output = cached_result
782
+ node.input_hash = hash_inputs(inputs)
783
+ node.completed_at = time.time()
784
+ ctx.outputs[node.node_id] = cached_result
785
+ await self._notify_status(node.node_id, "success",
786
+ {"cached": True, **cached_result})
787
+ await self.cache.add_event(ctx.execution_id, "node_cached", {
788
+ "node_id": node.node_id,
789
+ })
790
+ return cached_result
791
+
792
+ # Execute node
793
+ node.status = TaskStatus.RUNNING
794
+ node.started_at = time.time()
795
+ node.input_hash = hash_inputs(inputs)
796
+ await self._notify_status(node.node_id, "executing", {})
797
+ await self.cache.add_event(ctx.execution_id, "node_started", {
798
+ "node_id": node.node_id,
799
+ "node_type": node.node_type,
800
+ })
801
+
802
+ # Update heartbeat (for crash detection)
803
+ await self.cache.update_heartbeat(ctx.execution_id, node.node_id)
804
+
805
+ # Build execution context for node handler
806
+ # workflow_id is included for per-workflow status scoping (n8n pattern)
807
+ exec_context = {
808
+ "nodes": ctx.nodes,
809
+ "edges": ctx.edges,
810
+ "session_id": ctx.session_id,
811
+ "execution_id": ctx.execution_id,
812
+ "workflow_id": ctx.workflow_id, # For per-workflow status broadcasts
813
+ "start_time": node.started_at,
814
+ "outputs": ctx.outputs, # Previous node outputs
815
+ }
816
+
817
+ # Call the actual node executor
818
+ result = await self.node_executor(
819
+ node.node_id,
820
+ node.node_type,
821
+ node_data.get("parameters", {}),
822
+ exec_context
823
+ )
824
+
825
+ # Process result
826
+ if result.get("success"):
827
+ node.status = TaskStatus.COMPLETED
828
+ node.output = result.get("result", {})
829
+ node.completed_at = time.time()
830
+ ctx.outputs[node.node_id] = node.output
831
+
832
+ # Cache result (Prefect pattern)
833
+ if enable_caching:
834
+ await self.cache.set_cached_result(
835
+ ctx.execution_id, node.node_id, inputs, node.output
836
+ )
837
+
838
+ await self._notify_status(node.node_id, "success", node.output)
839
+ await self.cache.add_event(ctx.execution_id, "node_completed", {
840
+ "node_id": node.node_id,
841
+ "execution_time": node.completed_at - node.started_at,
842
+ })
843
+ else:
844
+ node.status = TaskStatus.FAILED
845
+ node.error = result.get("error", "Unknown error")
846
+ node.completed_at = time.time()
847
+ ctx.errors.append({
848
+ "node_id": node.node_id,
849
+ "error": node.error,
850
+ "timestamp": time.time(),
851
+ })
852
+
853
+ await self._notify_status(node.node_id, "error", {"error": node.error})
854
+ await self.cache.add_event(ctx.execution_id, "node_failed", {
855
+ "node_id": node.node_id,
856
+ "error": node.error,
857
+ })
858
+
859
+ # Mark workflow as failed
860
+ ctx.status = WorkflowStatus.FAILED
861
+
862
+ return result
863
+
864
+ # =========================================================================
865
+ # DAG ANALYSIS
866
+ # =========================================================================
867
+
868
+ def _compute_execution_layers(self, nodes: List[Dict],
869
+ edges: List[Dict]) -> List[List[str]]:
870
+ """Compute execution layers for parallel execution.
871
+
872
+ Nodes in the same layer have no dependencies on each other
873
+ and can execute in parallel. Layer 0 contains trigger nodes
874
+ (workflow starting points with no input handles).
875
+
876
+ Following n8n pattern: Trigger nodes are the starting point of every
877
+ workflow. They listen for specific events/conditions and initiate
878
+ the execution of the entire workflow.
879
+
880
+ Config nodes and toolkit sub-nodes are excluded from layers since
881
+ they don't execute as independent workflow nodes.
882
+
883
+ Args:
884
+ nodes: List of workflow nodes
885
+ edges: List of edges
886
+
887
+ Returns:
888
+ List of layers, where each layer is a list of node IDs
889
+ """
890
+ from constants import CONFIG_NODE_TYPES, TOOLKIT_NODE_TYPES
891
+
892
+ # Build node type lookup for trigger detection
893
+ node_types: Dict[str, str] = {
894
+ node["id"]: node.get("type", "unknown") for node in nodes
895
+ }
896
+
897
+ # Find toolkit sub-nodes (nodes that connect TO a toolkit)
898
+ toolkit_node_ids = {n.get("id") for n in nodes if n.get("type") in TOOLKIT_NODE_TYPES}
899
+
900
+ # Find AI Agent nodes (both aiAgent and chatAgent have config handles)
901
+ ai_agent_node_ids = {n.get("id") for n in nodes if n.get("type") in ('aiAgent', 'chatAgent')}
902
+
903
+ subnode_ids: set = set()
904
+ for edge in edges:
905
+ source = edge.get("source")
906
+ target = edge.get("target")
907
+ target_handle = edge.get("targetHandle")
908
+
909
+ # Any node that connects TO a toolkit is a sub-node
910
+ if target in toolkit_node_ids and source:
911
+ subnode_ids.add(source)
912
+
913
+ # Nodes connected to AI Agent/Zeenie config handles are sub-nodes
914
+ # These handles: input-memory, input-tools, input-skill
915
+ if target in ai_agent_node_ids and source and target_handle:
916
+ if target_handle in ('input-memory', 'input-tools', 'input-skill'):
917
+ subnode_ids.add(source)
918
+
919
+ # Filter out config nodes and sub-nodes from execution
920
+ excluded_ids = set()
921
+ for node in nodes:
922
+ node_id = node.get("id")
923
+ node_type = node.get("type", "unknown")
924
+ if node_type in CONFIG_NODE_TYPES or node_id in subnode_ids:
925
+ excluded_ids.add(node_id)
926
+
927
+ # Build adjacency and in-degree maps (excluding filtered nodes)
928
+ in_degree: Dict[str, int] = defaultdict(int)
929
+ adjacency: Dict[str, List[str]] = defaultdict(list)
930
+ node_ids = {node["id"] for node in nodes if node["id"] not in excluded_ids}
931
+
932
+ for edge in edges:
933
+ source = edge.get("source")
934
+ target = edge.get("target")
935
+ if source in node_ids and target in node_ids:
936
+ adjacency[source].append(target)
937
+ in_degree[target] += 1
938
+
939
+ # Initialize in-degree for all nodes
940
+ for node_id in node_ids:
941
+ if node_id not in in_degree:
942
+ in_degree[node_id] = 0
943
+
944
+ # Kahn's algorithm for topological sort with layers
945
+ layers = []
946
+ remaining = set(node_ids)
947
+ is_first_layer = True
948
+
949
+ while remaining:
950
+ # Find all nodes with in-degree 0 (no dependencies)
951
+ layer = [
952
+ node_id for node_id in remaining
953
+ if in_degree[node_id] == 0
954
+ ]
955
+
956
+ if not layer:
957
+ # Cycle detected or stuck
958
+ logger.warning("Cycle detected or no start nodes",
959
+ remaining=list(remaining))
960
+ # Add remaining as single layer to avoid infinite loop
961
+ layers.append(list(remaining))
962
+ break
963
+
964
+ # For layer 0, validate that starting nodes are trigger nodes
965
+ if is_first_layer:
966
+ trigger_nodes = []
967
+ non_trigger_nodes = []
968
+
969
+ for node_id in layer:
970
+ node_type = node_types.get(node_id, "unknown")
971
+ if is_trigger_node(node_type):
972
+ trigger_nodes.append(node_id)
973
+ else:
974
+ non_trigger_nodes.append(node_id)
975
+ logger.warning(
976
+ "Non-trigger node found at graph entry point",
977
+ node_id=node_id,
978
+ node_type=node_type,
979
+ expected_types=list(WORKFLOW_TRIGGER_TYPES)
980
+ )
981
+
982
+ # Log trigger node identification
983
+ if trigger_nodes:
984
+ logger.info(
985
+ "Identified trigger nodes as workflow starting points",
986
+ trigger_count=len(trigger_nodes),
987
+ trigger_nodes=[
988
+ f"{nid[:8]}({node_types.get(nid)})"
989
+ for nid in trigger_nodes
990
+ ]
991
+ )
992
+
993
+ is_first_layer = False
994
+
995
+ layers.append(layer)
996
+
997
+ # Remove layer nodes and update in-degrees
998
+ for node_id in layer:
999
+ remaining.remove(node_id)
1000
+ for successor in adjacency[node_id]:
1001
+ in_degree[successor] -= 1
1002
+
1003
+ logger.debug("Computed execution layers",
1004
+ layer_count=len(layers),
1005
+ layers=[[n[:8] for n in l] for l in layers])
1006
+
1007
+ return layers
1008
+
1009
+ def _find_ready_nodes(self, ctx: ExecutionContext) -> List[NodeExecution]:
1010
+ """Find nodes ready to execute (dependencies satisfied + conditions met).
1011
+
1012
+ A node is ready if:
1013
+ - Status is PENDING
1014
+ - Not disabled (n8n-style disable feature)
1015
+ - All upstream nodes are COMPLETED, CACHED, or SKIPPED
1016
+ - Edge conditions (if any) evaluate to True based on upstream outputs
1017
+
1018
+ Supports runtime conditional branching (Prefect-style dynamic workflows).
1019
+
1020
+ Args:
1021
+ ctx: ExecutionContext
1022
+
1023
+ Returns:
1024
+ List of NodeExecution ready to run
1025
+ """
1026
+ from constants import CONFIG_NODE_TYPES
1027
+
1028
+ # Build set of completed nodes
1029
+ completed = set(ctx.get_completed_nodes())
1030
+
1031
+ # Build map of node_id -> node_type for config node detection
1032
+ node_types: Dict[str, str] = {}
1033
+ for node in ctx.nodes:
1034
+ node_types[node.get("id", "")] = node.get("type", "unknown")
1035
+
1036
+ # Build dependency map and track conditional edges
1037
+ # Skip edges from config nodes (they don't execute, provide config only)
1038
+ dependencies: Dict[str, Set[str]] = defaultdict(set)
1039
+ conditional_edges: Dict[str, List[Dict]] = defaultdict(list) # target -> edges with conditions
1040
+
1041
+ for edge in ctx.edges:
1042
+ target = edge.get("target")
1043
+ source = edge.get("source")
1044
+ if target and source:
1045
+ # Skip edges from config nodes - they provide configuration, not execution dependencies
1046
+ source_type = node_types.get(source, "unknown")
1047
+ if source_type in CONFIG_NODE_TYPES:
1048
+ continue
1049
+
1050
+ dependencies[target].add(source)
1051
+ # Track edges with conditions for evaluation
1052
+ if edge.get("data", {}).get("condition"):
1053
+ conditional_edges[target].append(edge)
1054
+
1055
+ # Find ready nodes
1056
+ ready = []
1057
+ for node_id, node_exec in ctx.node_executions.items():
1058
+ if node_exec.status != TaskStatus.PENDING:
1059
+ continue
1060
+
1061
+ # Check if all dependencies are satisfied
1062
+ deps = dependencies.get(node_id, set())
1063
+ if not deps <= completed: # Not all deps completed
1064
+ continue
1065
+
1066
+ # Check if node is disabled (n8n-style disable)
1067
+ node_data = self._get_node_data(ctx, node_id)
1068
+ if node_data.get("data", {}).get("disabled"):
1069
+ node_exec.status = TaskStatus.SKIPPED
1070
+ node_exec.completed_at = time.time()
1071
+ logger.debug("Skipping disabled node", node_id=node_id)
1072
+ # Notify status callback about skipped node
1073
+ asyncio.create_task(self._notify_status(node_id, "skipped", {"disabled": True}))
1074
+ continue
1075
+
1076
+ # Check conditional edges for this node
1077
+ if node_id in conditional_edges:
1078
+ # Has conditional incoming edges - evaluate them
1079
+ conditions_met = self._evaluate_incoming_conditions(
1080
+ ctx, node_id, conditional_edges[node_id]
1081
+ )
1082
+ if not conditions_met:
1083
+ # Mark as SKIPPED if conditions not met and all deps done
1084
+ node_exec.status = TaskStatus.SKIPPED
1085
+ logger.info("Node skipped due to unmet conditions",
1086
+ node_id=node_id)
1087
+ continue
1088
+
1089
+ ready.append(node_exec)
1090
+
1091
+ return ready
1092
+
1093
+ def _evaluate_incoming_conditions(self, ctx: ExecutionContext, target_node_id: str,
1094
+ edges: List[Dict]) -> bool:
1095
+ """Evaluate conditions on incoming edges to determine if node should run.
1096
+
1097
+ Args:
1098
+ ctx: ExecutionContext
1099
+ target_node_id: The node we're checking
1100
+ edges: Incoming edges with conditions
1101
+
1102
+ Returns:
1103
+ True if at least one conditional edge evaluates to True
1104
+ """
1105
+ for edge in edges:
1106
+ source_id = edge.get("source")
1107
+ condition = edge.get("data", {}).get("condition")
1108
+
1109
+ if not condition:
1110
+ continue
1111
+
1112
+ # Get output from source node
1113
+ source_output = ctx.outputs.get(source_id, {})
1114
+
1115
+ # Evaluate condition
1116
+ if evaluate_condition(condition, source_output):
1117
+ logger.debug("Conditional edge matched",
1118
+ source=source_id,
1119
+ target=target_node_id,
1120
+ condition=condition)
1121
+ return True
1122
+
1123
+ # No conditions matched
1124
+ logger.debug("No conditional edges matched",
1125
+ target=target_node_id,
1126
+ edge_count=len(edges))
1127
+ return False
1128
+
1129
+ def _get_node_data(self, ctx: ExecutionContext, node_id: str) -> Dict[str, Any]:
1130
+ """Get node data from context.
1131
+
1132
+ Args:
1133
+ ctx: ExecutionContext
1134
+ node_id: Node ID
1135
+
1136
+ Returns:
1137
+ Node data dict
1138
+ """
1139
+ for node in ctx.nodes:
1140
+ if node.get("id") == node_id:
1141
+ return node
1142
+ return {}
1143
+
1144
+ def _gather_node_inputs(self, ctx: ExecutionContext, node_id: str) -> Dict[str, Any]:
1145
+ """Gather inputs for a node from upstream outputs.
1146
+
1147
+ Args:
1148
+ ctx: ExecutionContext
1149
+ node_id: Target node ID
1150
+
1151
+ Returns:
1152
+ Dict of upstream outputs keyed by source node type
1153
+ """
1154
+ inputs = {}
1155
+ for edge in ctx.edges:
1156
+ if edge.get("target") == node_id:
1157
+ source_id = edge.get("source")
1158
+ if source_id in ctx.outputs:
1159
+ # Find source node type
1160
+ source_node = self._get_node_data(ctx, source_id)
1161
+ source_type = source_node.get("type", source_id)
1162
+ inputs[source_type] = ctx.outputs[source_id]
1163
+ return inputs
1164
+
1165
+ # =========================================================================
1166
+ # STATUS NOTIFICATIONS
1167
+ # =========================================================================
1168
+
1169
+ async def _notify_status(self, node_id: str, status: str,
1170
+ data: Dict[str, Any]) -> None:
1171
+ """Send status notification via callback.
1172
+
1173
+ Args:
1174
+ node_id: Node ID
1175
+ status: Status string
1176
+ data: Additional data
1177
+ """
1178
+ if self.status_callback:
1179
+ try:
1180
+ await self.status_callback(node_id, status, data)
1181
+ except Exception as e:
1182
+ logger.warning("Status callback failed", node_id=node_id, error=str(e))
1183
+
1184
+ # =========================================================================
1185
+ # RECOVERY
1186
+ # =========================================================================
1187
+
1188
+ async def recover_execution(self, execution_id: str,
1189
+ nodes: List[Dict],
1190
+ edges: List[Dict]) -> Optional[Dict[str, Any]]:
1191
+ """Recover and resume an interrupted execution.
1192
+
1193
+ Args:
1194
+ execution_id: Execution ID to recover
1195
+ nodes: Workflow nodes
1196
+ edges: Workflow edges
1197
+
1198
+ Returns:
1199
+ Execution result if resumed, None if not found
1200
+ """
1201
+ ctx = await self.cache.load_execution_state(execution_id, nodes, edges)
1202
+ if not ctx:
1203
+ logger.warning("Execution not found for recovery", execution_id=execution_id)
1204
+ return None
1205
+
1206
+ if ctx.status != WorkflowStatus.RUNNING:
1207
+ logger.info("Execution already complete", execution_id=execution_id,
1208
+ status=ctx.status.value)
1209
+ return {
1210
+ "success": ctx.status == WorkflowStatus.COMPLETED,
1211
+ "execution_id": execution_id,
1212
+ "status": ctx.status.value,
1213
+ "recovered": False,
1214
+ }
1215
+
1216
+ logger.info("Recovering execution",
1217
+ execution_id=execution_id,
1218
+ checkpoints=ctx.checkpoints)
1219
+
1220
+ # Reset any RUNNING nodes to PENDING (they were interrupted)
1221
+ for node_exec in ctx.node_executions.values():
1222
+ if node_exec.status == TaskStatus.RUNNING:
1223
+ node_exec.status = TaskStatus.PENDING
1224
+ node_exec.started_at = None
1225
+
1226
+ # Track in memory
1227
+ self._active_contexts[ctx.execution_id] = ctx
1228
+
1229
+ # Resume decide loop
1230
+ try:
1231
+ await self._workflow_decide(ctx, enable_caching=True)
1232
+
1233
+ if ctx.all_nodes_complete():
1234
+ ctx.status = WorkflowStatus.COMPLETED
1235
+ elif ctx.errors:
1236
+ ctx.status = WorkflowStatus.FAILED
1237
+
1238
+ ctx.completed_at = time.time()
1239
+ await self.cache.save_execution_state(ctx)
1240
+
1241
+ return {
1242
+ "success": ctx.status == WorkflowStatus.COMPLETED,
1243
+ "execution_id": ctx.execution_id,
1244
+ "status": ctx.status.value,
1245
+ "recovered": True,
1246
+ "outputs": ctx.outputs,
1247
+ }
1248
+
1249
+ finally:
1250
+ self._active_contexts.pop(ctx.execution_id, None)
1251
+
1252
+ async def get_active_executions(self) -> List[str]:
1253
+ """Get list of active execution IDs.
1254
+
1255
+ Returns:
1256
+ List of execution IDs currently running
1257
+ """
1258
+ return list(self._active_contexts.keys())
1259
+
1260
+ # =========================================================================
1261
+ # DLQ REPLAY
1262
+ # =========================================================================
1263
+
1264
+ async def replay_dlq_entry(self, entry_id: str,
1265
+ nodes: List[Dict],
1266
+ edges: List[Dict]) -> Dict[str, Any]:
1267
+ """Replay a failed node from the Dead Letter Queue.
1268
+
1269
+ Creates a new execution context and attempts to re-execute the failed node.
1270
+
1271
+ Args:
1272
+ entry_id: DLQ entry ID to replay
1273
+ nodes: Workflow nodes
1274
+ edges: Workflow edges
1275
+
1276
+ Returns:
1277
+ Execution result dict
1278
+ """
1279
+ # Get DLQ entry
1280
+ entry = await self.cache.get_dlq_entry(entry_id)
1281
+ if not entry:
1282
+ return {
1283
+ "success": False,
1284
+ "error": f"DLQ entry not found: {entry_id}",
1285
+ }
1286
+
1287
+ logger.info("Replaying DLQ entry",
1288
+ entry_id=entry_id,
1289
+ node_id=entry.node_id,
1290
+ node_type=entry.node_type,
1291
+ original_execution=entry.execution_id)
1292
+
1293
+ # Create new execution context for replay
1294
+ ctx = ExecutionContext.create(
1295
+ workflow_id=entry.workflow_id,
1296
+ session_id="dlq_replay",
1297
+ nodes=nodes,
1298
+ edges=edges,
1299
+ )
1300
+
1301
+ # Get the node execution
1302
+ node_exec = ctx.node_executions.get(entry.node_id)
1303
+ if not node_exec:
1304
+ return {
1305
+ "success": False,
1306
+ "error": f"Node not found in workflow: {entry.node_id}",
1307
+ }
1308
+
1309
+ # Set up context with stored inputs
1310
+ ctx.outputs = entry.inputs # Restore input state
1311
+
1312
+ ctx.status = WorkflowStatus.RUNNING
1313
+ ctx.started_at = time.time()
1314
+ self._active_contexts[ctx.execution_id] = ctx
1315
+
1316
+ try:
1317
+ # Execute the single node with retry
1318
+ await self._execute_single_node(ctx, node_exec, enable_caching=False)
1319
+
1320
+ if node_exec.status == TaskStatus.COMPLETED:
1321
+ # Success - remove from DLQ
1322
+ await self.cache.remove_from_dlq(entry_id)
1323
+ logger.info("DLQ replay succeeded",
1324
+ entry_id=entry_id,
1325
+ node_id=entry.node_id)
1326
+
1327
+ return {
1328
+ "success": True,
1329
+ "execution_id": ctx.execution_id,
1330
+ "node_id": entry.node_id,
1331
+ "result": node_exec.output,
1332
+ "removed_from_dlq": True,
1333
+ }
1334
+ else:
1335
+ # Still failing - update DLQ entry
1336
+ await self.cache.update_dlq_entry(
1337
+ entry_id,
1338
+ entry.retry_count + 1,
1339
+ node_exec.error or "Unknown error"
1340
+ )
1341
+
1342
+ return {
1343
+ "success": False,
1344
+ "execution_id": ctx.execution_id,
1345
+ "node_id": entry.node_id,
1346
+ "error": node_exec.error,
1347
+ "retry_count": entry.retry_count + 1,
1348
+ }
1349
+
1350
+ finally:
1351
+ self._active_contexts.pop(ctx.execution_id, None)