whale-code 6.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -0
- package/bin/swag-agent.js +9 -0
- package/bin/swagmanager-mcp.js +321 -0
- package/dist/cli/app.d.ts +26 -0
- package/dist/cli/app.js +64 -0
- package/dist/cli/chat/AgentSelector.d.ts +14 -0
- package/dist/cli/chat/AgentSelector.js +14 -0
- package/dist/cli/chat/ChatApp.d.ts +9 -0
- package/dist/cli/chat/ChatApp.js +267 -0
- package/dist/cli/chat/ChatInput.d.ts +39 -0
- package/dist/cli/chat/ChatInput.js +509 -0
- package/dist/cli/chat/MarkdownText.d.ts +10 -0
- package/dist/cli/chat/MarkdownText.js +20 -0
- package/dist/cli/chat/MessageList.d.ts +37 -0
- package/dist/cli/chat/MessageList.js +80 -0
- package/dist/cli/chat/ModelSelector.d.ts +20 -0
- package/dist/cli/chat/ModelSelector.js +73 -0
- package/dist/cli/chat/RewindViewer.d.ts +26 -0
- package/dist/cli/chat/RewindViewer.js +185 -0
- package/dist/cli/chat/StoreSelector.d.ts +14 -0
- package/dist/cli/chat/StoreSelector.js +24 -0
- package/dist/cli/chat/StreamingText.d.ts +12 -0
- package/dist/cli/chat/StreamingText.js +12 -0
- package/dist/cli/chat/SubagentPanel.d.ts +45 -0
- package/dist/cli/chat/SubagentPanel.js +110 -0
- package/dist/cli/chat/TeamPanel.d.ts +21 -0
- package/dist/cli/chat/TeamPanel.js +42 -0
- package/dist/cli/chat/ToolIndicator.d.ts +25 -0
- package/dist/cli/chat/ToolIndicator.js +436 -0
- package/dist/cli/chat/hooks/useAgentLoop.d.ts +39 -0
- package/dist/cli/chat/hooks/useAgentLoop.js +382 -0
- package/dist/cli/chat/hooks/useSlashCommands.d.ts +37 -0
- package/dist/cli/chat/hooks/useSlashCommands.js +387 -0
- package/dist/cli/commands/config-cmd.d.ts +10 -0
- package/dist/cli/commands/config-cmd.js +99 -0
- package/dist/cli/commands/doctor.d.ts +14 -0
- package/dist/cli/commands/doctor.js +172 -0
- package/dist/cli/commands/init.d.ts +16 -0
- package/dist/cli/commands/init.js +278 -0
- package/dist/cli/commands/mcp.d.ts +12 -0
- package/dist/cli/commands/mcp.js +162 -0
- package/dist/cli/login/LoginApp.d.ts +7 -0
- package/dist/cli/login/LoginApp.js +157 -0
- package/dist/cli/print-mode.d.ts +31 -0
- package/dist/cli/print-mode.js +202 -0
- package/dist/cli/serve-mode.d.ts +37 -0
- package/dist/cli/serve-mode.js +636 -0
- package/dist/cli/services/agent-definitions.d.ts +25 -0
- package/dist/cli/services/agent-definitions.js +91 -0
- package/dist/cli/services/agent-events.d.ts +178 -0
- package/dist/cli/services/agent-events.js +175 -0
- package/dist/cli/services/agent-loop.d.ts +90 -0
- package/dist/cli/services/agent-loop.js +762 -0
- package/dist/cli/services/agent-worker-base.d.ts +97 -0
- package/dist/cli/services/agent-worker-base.js +220 -0
- package/dist/cli/services/auth-service.d.ts +30 -0
- package/dist/cli/services/auth-service.js +160 -0
- package/dist/cli/services/background-processes.d.ts +126 -0
- package/dist/cli/services/background-processes.js +318 -0
- package/dist/cli/services/browser-auth.d.ts +24 -0
- package/dist/cli/services/browser-auth.js +180 -0
- package/dist/cli/services/claude-md-loader.d.ts +16 -0
- package/dist/cli/services/claude-md-loader.js +58 -0
- package/dist/cli/services/config-store.d.ts +47 -0
- package/dist/cli/services/config-store.js +79 -0
- package/dist/cli/services/debug-log.d.ts +10 -0
- package/dist/cli/services/debug-log.js +52 -0
- package/dist/cli/services/error-logger.d.ts +58 -0
- package/dist/cli/services/error-logger.js +269 -0
- package/dist/cli/services/file-history.d.ts +21 -0
- package/dist/cli/services/file-history.js +83 -0
- package/dist/cli/services/format-server-response.d.ts +16 -0
- package/dist/cli/services/format-server-response.js +440 -0
- package/dist/cli/services/git-context.d.ts +11 -0
- package/dist/cli/services/git-context.js +66 -0
- package/dist/cli/services/hooks.d.ts +85 -0
- package/dist/cli/services/hooks.js +258 -0
- package/dist/cli/services/interactive-tools.d.ts +125 -0
- package/dist/cli/services/interactive-tools.js +260 -0
- package/dist/cli/services/keybinding-manager.d.ts +52 -0
- package/dist/cli/services/keybinding-manager.js +115 -0
- package/dist/cli/services/local-tools.d.ts +22 -0
- package/dist/cli/services/local-tools.js +697 -0
- package/dist/cli/services/lsp-manager.d.ts +18 -0
- package/dist/cli/services/lsp-manager.js +717 -0
- package/dist/cli/services/mcp-client.d.ts +48 -0
- package/dist/cli/services/mcp-client.js +157 -0
- package/dist/cli/services/memory-manager.d.ts +16 -0
- package/dist/cli/services/memory-manager.js +57 -0
- package/dist/cli/services/model-manager.d.ts +18 -0
- package/dist/cli/services/model-manager.js +71 -0
- package/dist/cli/services/model-router.d.ts +26 -0
- package/dist/cli/services/model-router.js +149 -0
- package/dist/cli/services/permission-modes.d.ts +13 -0
- package/dist/cli/services/permission-modes.js +43 -0
- package/dist/cli/services/rewind.d.ts +84 -0
- package/dist/cli/services/rewind.js +194 -0
- package/dist/cli/services/ripgrep.d.ts +28 -0
- package/dist/cli/services/ripgrep.js +138 -0
- package/dist/cli/services/sandbox.d.ts +29 -0
- package/dist/cli/services/sandbox.js +97 -0
- package/dist/cli/services/server-tools.d.ts +61 -0
- package/dist/cli/services/server-tools.js +543 -0
- package/dist/cli/services/session-persistence.d.ts +23 -0
- package/dist/cli/services/session-persistence.js +99 -0
- package/dist/cli/services/subagent-worker.d.ts +19 -0
- package/dist/cli/services/subagent-worker.js +41 -0
- package/dist/cli/services/subagent.d.ts +47 -0
- package/dist/cli/services/subagent.js +647 -0
- package/dist/cli/services/system-prompt.d.ts +7 -0
- package/dist/cli/services/system-prompt.js +198 -0
- package/dist/cli/services/team-lead.d.ts +73 -0
- package/dist/cli/services/team-lead.js +512 -0
- package/dist/cli/services/team-state.d.ts +77 -0
- package/dist/cli/services/team-state.js +398 -0
- package/dist/cli/services/teammate.d.ts +31 -0
- package/dist/cli/services/teammate.js +689 -0
- package/dist/cli/services/telemetry.d.ts +61 -0
- package/dist/cli/services/telemetry.js +209 -0
- package/dist/cli/services/tools/agent-tools.d.ts +14 -0
- package/dist/cli/services/tools/agent-tools.js +347 -0
- package/dist/cli/services/tools/file-ops.d.ts +15 -0
- package/dist/cli/services/tools/file-ops.js +487 -0
- package/dist/cli/services/tools/search-tools.d.ts +8 -0
- package/dist/cli/services/tools/search-tools.js +186 -0
- package/dist/cli/services/tools/shell-exec.d.ts +10 -0
- package/dist/cli/services/tools/shell-exec.js +168 -0
- package/dist/cli/services/tools/task-manager.d.ts +28 -0
- package/dist/cli/services/tools/task-manager.js +209 -0
- package/dist/cli/services/tools/web-tools.d.ts +11 -0
- package/dist/cli/services/tools/web-tools.js +395 -0
- package/dist/cli/setup/SetupApp.d.ts +9 -0
- package/dist/cli/setup/SetupApp.js +191 -0
- package/dist/cli/shared/MatrixIntro.d.ts +4 -0
- package/dist/cli/shared/MatrixIntro.js +83 -0
- package/dist/cli/shared/Theme.d.ts +74 -0
- package/dist/cli/shared/Theme.js +127 -0
- package/dist/cli/shared/WhaleBanner.d.ts +10 -0
- package/dist/cli/shared/WhaleBanner.js +12 -0
- package/dist/cli/shared/markdown.d.ts +21 -0
- package/dist/cli/shared/markdown.js +756 -0
- package/dist/cli/status/StatusApp.d.ts +4 -0
- package/dist/cli/status/StatusApp.js +105 -0
- package/dist/cli/stores/StoreApp.d.ts +7 -0
- package/dist/cli/stores/StoreApp.js +81 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +538 -0
- package/dist/local-agent/connection.d.ts +48 -0
- package/dist/local-agent/connection.js +332 -0
- package/dist/local-agent/discovery.d.ts +18 -0
- package/dist/local-agent/discovery.js +146 -0
- package/dist/local-agent/executor.d.ts +34 -0
- package/dist/local-agent/executor.js +241 -0
- package/dist/local-agent/index.d.ts +14 -0
- package/dist/local-agent/index.js +198 -0
- package/dist/node/adapters/base.d.ts +35 -0
- package/dist/node/adapters/base.js +10 -0
- package/dist/node/adapters/discord.d.ts +29 -0
- package/dist/node/adapters/discord.js +299 -0
- package/dist/node/adapters/email.d.ts +23 -0
- package/dist/node/adapters/email.js +218 -0
- package/dist/node/adapters/imessage.d.ts +17 -0
- package/dist/node/adapters/imessage.js +118 -0
- package/dist/node/adapters/slack.d.ts +26 -0
- package/dist/node/adapters/slack.js +259 -0
- package/dist/node/adapters/sms.d.ts +23 -0
- package/dist/node/adapters/sms.js +161 -0
- package/dist/node/adapters/telegram.d.ts +17 -0
- package/dist/node/adapters/telegram.js +101 -0
- package/dist/node/adapters/webchat.d.ts +27 -0
- package/dist/node/adapters/webchat.js +160 -0
- package/dist/node/adapters/whatsapp.d.ts +28 -0
- package/dist/node/adapters/whatsapp.js +230 -0
- package/dist/node/cli.d.ts +2 -0
- package/dist/node/cli.js +325 -0
- package/dist/node/config.d.ts +17 -0
- package/dist/node/config.js +31 -0
- package/dist/node/runtime.d.ts +50 -0
- package/dist/node/runtime.js +351 -0
- package/dist/server/handlers/__test-utils__/mock-supabase.d.ts +11 -0
- package/dist/server/handlers/__test-utils__/mock-supabase.js +393 -0
- package/dist/server/handlers/analytics.d.ts +17 -0
- package/dist/server/handlers/analytics.js +266 -0
- package/dist/server/handlers/api-keys.d.ts +6 -0
- package/dist/server/handlers/api-keys.js +221 -0
- package/dist/server/handlers/billing.d.ts +33 -0
- package/dist/server/handlers/billing.js +272 -0
- package/dist/server/handlers/browser.d.ts +10 -0
- package/dist/server/handlers/browser.js +517 -0
- package/dist/server/handlers/catalog.d.ts +99 -0
- package/dist/server/handlers/catalog.js +976 -0
- package/dist/server/handlers/comms.d.ts +254 -0
- package/dist/server/handlers/comms.js +588 -0
- package/dist/server/handlers/creations.d.ts +6 -0
- package/dist/server/handlers/creations.js +479 -0
- package/dist/server/handlers/crm.d.ts +89 -0
- package/dist/server/handlers/crm.js +538 -0
- package/dist/server/handlers/discovery.d.ts +6 -0
- package/dist/server/handlers/discovery.js +288 -0
- package/dist/server/handlers/embeddings.d.ts +92 -0
- package/dist/server/handlers/embeddings.js +197 -0
- package/dist/server/handlers/enrichment.d.ts +8 -0
- package/dist/server/handlers/enrichment.js +768 -0
- package/dist/server/handlers/image-gen.d.ts +6 -0
- package/dist/server/handlers/image-gen.js +409 -0
- package/dist/server/handlers/inventory.d.ts +319 -0
- package/dist/server/handlers/inventory.js +447 -0
- package/dist/server/handlers/kali.d.ts +10 -0
- package/dist/server/handlers/kali.js +210 -0
- package/dist/server/handlers/llm-providers.d.ts +6 -0
- package/dist/server/handlers/llm-providers.js +673 -0
- package/dist/server/handlers/local-agent.d.ts +6 -0
- package/dist/server/handlers/local-agent.js +118 -0
- package/dist/server/handlers/meta-ads.d.ts +111 -0
- package/dist/server/handlers/meta-ads.js +2279 -0
- package/dist/server/handlers/nodes.d.ts +33 -0
- package/dist/server/handlers/nodes.js +699 -0
- package/dist/server/handlers/operations.d.ts +138 -0
- package/dist/server/handlers/operations.js +131 -0
- package/dist/server/handlers/platform.d.ts +23 -0
- package/dist/server/handlers/platform.js +227 -0
- package/dist/server/handlers/supply-chain.d.ts +19 -0
- package/dist/server/handlers/supply-chain.js +327 -0
- package/dist/server/handlers/transcription.d.ts +17 -0
- package/dist/server/handlers/transcription.js +121 -0
- package/dist/server/handlers/video-gen.d.ts +6 -0
- package/dist/server/handlers/video-gen.js +466 -0
- package/dist/server/handlers/voice.d.ts +8 -0
- package/dist/server/handlers/voice.js +1146 -0
- package/dist/server/handlers/workflow-steps.d.ts +86 -0
- package/dist/server/handlers/workflow-steps.js +2349 -0
- package/dist/server/handlers/workflows.d.ts +7 -0
- package/dist/server/handlers/workflows.js +989 -0
- package/dist/server/index.d.ts +1 -0
- package/dist/server/index.js +2427 -0
- package/dist/server/lib/batch-client.d.ts +80 -0
- package/dist/server/lib/batch-client.js +467 -0
- package/dist/server/lib/code-worker-pool.d.ts +31 -0
- package/dist/server/lib/code-worker-pool.js +224 -0
- package/dist/server/lib/code-worker.d.ts +1 -0
- package/dist/server/lib/code-worker.js +188 -0
- package/dist/server/lib/compaction-service.d.ts +32 -0
- package/dist/server/lib/compaction-service.js +162 -0
- package/dist/server/lib/logger.d.ts +19 -0
- package/dist/server/lib/logger.js +46 -0
- package/dist/server/lib/otel.d.ts +38 -0
- package/dist/server/lib/otel.js +126 -0
- package/dist/server/lib/pg-rate-limiter.d.ts +21 -0
- package/dist/server/lib/pg-rate-limiter.js +86 -0
- package/dist/server/lib/prompt-sanitizer.d.ts +37 -0
- package/dist/server/lib/prompt-sanitizer.js +177 -0
- package/dist/server/lib/provider-capabilities.d.ts +85 -0
- package/dist/server/lib/provider-capabilities.js +190 -0
- package/dist/server/lib/provider-failover.d.ts +74 -0
- package/dist/server/lib/provider-failover.js +210 -0
- package/dist/server/lib/rate-limiter.d.ts +39 -0
- package/dist/server/lib/rate-limiter.js +147 -0
- package/dist/server/lib/server-agent-loop.d.ts +107 -0
- package/dist/server/lib/server-agent-loop.js +667 -0
- package/dist/server/lib/server-subagent.d.ts +78 -0
- package/dist/server/lib/server-subagent.js +203 -0
- package/dist/server/lib/session-checkpoint.d.ts +51 -0
- package/dist/server/lib/session-checkpoint.js +145 -0
- package/dist/server/lib/ssrf-guard.d.ts +13 -0
- package/dist/server/lib/ssrf-guard.js +240 -0
- package/dist/server/lib/supabase-client.d.ts +7 -0
- package/dist/server/lib/supabase-client.js +78 -0
- package/dist/server/lib/template-resolver.d.ts +31 -0
- package/dist/server/lib/template-resolver.js +215 -0
- package/dist/server/lib/utils.d.ts +16 -0
- package/dist/server/lib/utils.js +147 -0
- package/dist/server/local-agent-gateway.d.ts +82 -0
- package/dist/server/local-agent-gateway.js +426 -0
- package/dist/server/providers/anthropic.d.ts +20 -0
- package/dist/server/providers/anthropic.js +199 -0
- package/dist/server/providers/bedrock.d.ts +20 -0
- package/dist/server/providers/bedrock.js +194 -0
- package/dist/server/providers/gemini.d.ts +24 -0
- package/dist/server/providers/gemini.js +486 -0
- package/dist/server/providers/openai.d.ts +24 -0
- package/dist/server/providers/openai.js +522 -0
- package/dist/server/providers/registry.d.ts +32 -0
- package/dist/server/providers/registry.js +58 -0
- package/dist/server/providers/shared.d.ts +32 -0
- package/dist/server/providers/shared.js +124 -0
- package/dist/server/providers/types.d.ts +92 -0
- package/dist/server/providers/types.js +12 -0
- package/dist/server/proxy-handlers.d.ts +6 -0
- package/dist/server/proxy-handlers.js +89 -0
- package/dist/server/tool-router.d.ts +149 -0
- package/dist/server/tool-router.js +803 -0
- package/dist/server/validation.d.ts +24 -0
- package/dist/server/validation.js +301 -0
- package/dist/server/worker.d.ts +19 -0
- package/dist/server/worker.js +201 -0
- package/dist/setup.d.ts +8 -0
- package/dist/setup.js +181 -0
- package/dist/shared/agent-core.d.ts +157 -0
- package/dist/shared/agent-core.js +534 -0
- package/dist/shared/anthropic-types.d.ts +105 -0
- package/dist/shared/anthropic-types.js +7 -0
- package/dist/shared/api-client.d.ts +90 -0
- package/dist/shared/api-client.js +379 -0
- package/dist/shared/constants.d.ts +33 -0
- package/dist/shared/constants.js +80 -0
- package/dist/shared/sse-parser.d.ts +26 -0
- package/dist/shared/sse-parser.js +259 -0
- package/dist/shared/tool-dispatch.d.ts +52 -0
- package/dist/shared/tool-dispatch.js +191 -0
- package/dist/shared/types.d.ts +72 -0
- package/dist/shared/types.js +7 -0
- package/dist/updater.d.ts +25 -0
- package/dist/updater.js +140 -0
- package/dist/webchat/widget.d.ts +0 -0
- package/dist/webchat/widget.js +397 -0
- package/package.json +95 -0
- package/src/cli/services/builtin-skills/commit.md +19 -0
- package/src/cli/services/builtin-skills/review-pr.md +21 -0
- package/src/cli/services/builtin-skills/review.md +18 -0
|
@@ -0,0 +1,2349 @@
|
|
|
1
|
+
// server/handlers/workflow-steps.ts — Step executor engine
|
|
2
|
+
// Extracted from workflows.ts to separate step execution from workflow CRUD/management.
|
|
3
|
+
//
|
|
4
|
+
// Contains: step type executors, executeAndAdvance, inline chain execution,
|
|
5
|
+
// circuit breakers, code execution (JS/Python), cron parser, schedule/timeout processing,
|
|
6
|
+
// event trigger processing, flow control, webhook ingestion, and all step advancement helpers.
|
|
7
|
+
import { createHmac, timingSafeEqual, randomUUID } from "node:crypto";
|
|
8
|
+
import { resolveTemplate, evaluateCondition } from "../lib/template-resolver.js";
|
|
9
|
+
import { sanitizeError } from "../../shared/agent-core.js";
|
|
10
|
+
import { executeWithPool, initWorkerPool, getPoolStats, shutdownPool } from "../lib/code-worker-pool.js";
|
|
11
|
+
import { batchClient } from "../lib/batch-client.js";
|
|
12
|
+
import { getProvider } from "../../shared/constants.js";
|
|
13
|
+
import { createLogger } from "../lib/logger.js";
|
|
14
|
+
import { startSpan } from "../lib/otel.js";
|
|
15
|
+
const log = createLogger("workflow-steps");
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// CONSTANTS
|
|
18
|
+
// ============================================================================
|
|
19
|
+
const MAX_INLINE_DEPTH = 50;
|
|
20
|
+
const CODE_TIMEOUT_MS = 5000;
|
|
21
|
+
const CODE_OUTPUT_MAX = 102_400; // 100KB
|
|
22
|
+
const MAX_FOR_EACH_ITEMS = 1000; // P2 FIX: Prevent unbounded for_each expansion
|
|
23
|
+
const MAX_PARALLEL_CHILDREN = 100; // P1 FIX: Cap parallel step fan-out
|
|
24
|
+
const GUEST_APPROVAL_SECRET = process.env.GUEST_APPROVAL_SECRET || process.env.FLY_INTERNAL_SECRET || "";
|
|
25
|
+
const GUEST_APPROVAL_BASE_URL = "https://whale-agent.fly.dev/approvals/guest";
|
|
26
|
+
// ============================================================================
|
|
27
|
+
// GUEST APPROVAL — HMAC-signed URLs for unauthenticated approvers
|
|
28
|
+
// ============================================================================
|
|
29
|
+
export function generateGuestApprovalUrl(approvalId, action, expiresAt) {
|
|
30
|
+
if (!GUEST_APPROVAL_SECRET)
|
|
31
|
+
return null; // Guest approvals disabled — no signing secret configured
|
|
32
|
+
const payload = `${approvalId}:${action}:${expiresAt}`;
|
|
33
|
+
const sig = createHmac("sha256", GUEST_APPROVAL_SECRET).update(payload).digest("hex");
|
|
34
|
+
return `${GUEST_APPROVAL_BASE_URL}/${approvalId}?action=${action}&expires=${encodeURIComponent(expiresAt)}&sig=${sig}`;
|
|
35
|
+
}
|
|
36
|
+
export function verifyGuestApprovalSignature(approvalId, action, expiresAt, sig) {
|
|
37
|
+
if (!GUEST_APPROVAL_SECRET)
|
|
38
|
+
return false; // Guest approvals disabled — no signing secret configured
|
|
39
|
+
const payload = `${approvalId}:${action}:${expiresAt}`;
|
|
40
|
+
const expected = createHmac("sha256", GUEST_APPROVAL_SECRET).update(payload).digest("hex");
|
|
41
|
+
try {
|
|
42
|
+
return timingSafeEqual(Buffer.from(sig, "hex"), Buffer.from(expected, "hex"));
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
// ============================================================================
|
|
49
|
+
// EVENT JOURNAL — append-only state transition log
|
|
50
|
+
// ============================================================================
|
|
51
|
+
export async function logWorkflowEvent(supabase, runId, eventType, payload, stepRunId) {
|
|
52
|
+
const { error } = await supabase.from("workflow_events").insert({
|
|
53
|
+
run_id: runId,
|
|
54
|
+
step_run_id: stepRunId || null,
|
|
55
|
+
event_type: eventType,
|
|
56
|
+
payload,
|
|
57
|
+
});
|
|
58
|
+
if (error)
|
|
59
|
+
log.warn({ err: error.message, runId, eventType }, "logWorkflowEvent insert failed");
|
|
60
|
+
}
|
|
61
|
+
// ============================================================================
|
|
62
|
+
// FLOW CONTROL — concurrency + rate limiting at step level
|
|
63
|
+
// ============================================================================
|
|
64
|
+
async function checkFlowControl(supabase, step) {
|
|
65
|
+
const config = step.step_config;
|
|
66
|
+
// Per-step concurrency limit
|
|
67
|
+
// P0 FIX: Filter by store_id to prevent cross-tenant data access in flow control decisions
|
|
68
|
+
const concurrencyLimit = config.concurrency_limit;
|
|
69
|
+
if (concurrencyLimit && concurrencyLimit > 0) {
|
|
70
|
+
const concurrencyKey = config.concurrency_key || step.step_key;
|
|
71
|
+
const { count } = await supabase.from("workflow_step_runs")
|
|
72
|
+
.select("id", { count: "exact", head: true })
|
|
73
|
+
.eq("step_key", concurrencyKey)
|
|
74
|
+
.eq("status", "running")
|
|
75
|
+
.eq("store_id", step.store_id)
|
|
76
|
+
.neq("id", step.step_run_id); // exclude self
|
|
77
|
+
if ((count || 0) >= concurrencyLimit) {
|
|
78
|
+
return { allowed: false, reason: `Concurrency limit ${concurrencyLimit} reached for '${concurrencyKey}'` };
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// Per-step rate limit (max N executions per window)
|
|
82
|
+
// P0 FIX: Filter by store_id to prevent cross-tenant data access in flow control decisions
|
|
83
|
+
const rateLimit = config.rate_limit;
|
|
84
|
+
const rateWindowSec = config.rate_window_seconds || 60;
|
|
85
|
+
if (rateLimit && rateLimit > 0) {
|
|
86
|
+
const windowStart = new Date(Date.now() - rateWindowSec * 1000).toISOString();
|
|
87
|
+
const { count } = await supabase.from("workflow_step_runs")
|
|
88
|
+
.select("id", { count: "exact", head: true })
|
|
89
|
+
.eq("step_key", step.step_key)
|
|
90
|
+
.eq("store_id", step.store_id)
|
|
91
|
+
.in("status", ["success", "running"])
|
|
92
|
+
.gte("started_at", windowStart);
|
|
93
|
+
if ((count || 0) >= rateLimit) {
|
|
94
|
+
return { allowed: false, reason: `Rate limit ${rateLimit}/${rateWindowSec}s reached for '${step.step_key}'` };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return { allowed: true };
|
|
98
|
+
}
|
|
99
|
+
let _executeTool = null;
|
|
100
|
+
let _runAgentQuery = null;
|
|
101
|
+
let _broadcastToken = null;
|
|
102
|
+
let _broadcastStepError = null;
|
|
103
|
+
export function setToolExecutor(fn) { _executeTool = fn; }
|
|
104
|
+
export function setAgentExecutor(fn) { _runAgentQuery = fn; }
|
|
105
|
+
export function setTokenBroadcaster(fn) { _broadcastToken = fn; }
|
|
106
|
+
export function setStepErrorBroadcaster(fn) { _broadcastStepError = fn; }
|
|
107
|
+
/** Broadcast a step error to SSE clients and persist error_details on the step run. */
|
|
108
|
+
async function surfaceStepError(supabase, step, errorMessage) {
|
|
109
|
+
const timestamp = new Date().toISOString();
|
|
110
|
+
// 1. Persist structured error_details on the step run record
|
|
111
|
+
await supabase.from("workflow_step_runs").update({
|
|
112
|
+
error_details: {
|
|
113
|
+
step_name: step.step_key,
|
|
114
|
+
step_type: step.step_type,
|
|
115
|
+
error_message: errorMessage,
|
|
116
|
+
timestamp,
|
|
117
|
+
},
|
|
118
|
+
}).eq("id", step.step_run_id);
|
|
119
|
+
// 2. Broadcast via SSE so connected clients see the error in real time
|
|
120
|
+
if (_broadcastStepError) {
|
|
121
|
+
_broadcastStepError(step.run_id, {
|
|
122
|
+
type: "workflow_error",
|
|
123
|
+
workflow_id: step.workflow_id,
|
|
124
|
+
run_id: step.run_id,
|
|
125
|
+
step_name: step.step_key,
|
|
126
|
+
step_type: step.step_type,
|
|
127
|
+
error: errorMessage,
|
|
128
|
+
timestamp,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// ============================================================================
|
|
133
|
+
// STEP EXECUTORS
|
|
134
|
+
// ============================================================================
|
|
135
|
+
async function executeToolStep(supabase, config, ctx, storeId, traceId) {
|
|
136
|
+
if (!_executeTool)
|
|
137
|
+
return { success: false, error: "Tool executor not initialized" };
|
|
138
|
+
const toolName = config.tool_name;
|
|
139
|
+
if (!toolName)
|
|
140
|
+
return { success: false, error: "No tool_name in step config" };
|
|
141
|
+
const argsTemplate = (config.args_template || config.args || {});
|
|
142
|
+
const resolvedArgs = resolveTemplate(argsTemplate, ctx);
|
|
143
|
+
// For email steps: auto-inject template_data from workflow context so {{variable}} placeholders resolve
|
|
144
|
+
if (toolName === "email" && (resolvedArgs.action === "send" || resolvedArgs.action === "send_template") && !resolvedArgs.template_data) {
|
|
145
|
+
const mergedData = { ...(ctx.trigger || {}) };
|
|
146
|
+
for (const [, stepData] of Object.entries(ctx.steps || {})) {
|
|
147
|
+
if (stepData?.output && typeof stepData.output === "object") {
|
|
148
|
+
Object.assign(mergedData, stepData.output);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (Object.keys(mergedData).length > 0) {
|
|
152
|
+
resolvedArgs.template_data = mergedData;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
if (config.tool_id) {
|
|
156
|
+
const cb = await checkToolCircuitBreaker(supabase, config.tool_id);
|
|
157
|
+
if (!cb.allowed)
|
|
158
|
+
return { success: false, error: cb.reason };
|
|
159
|
+
}
|
|
160
|
+
const result = await _executeTool(supabase, toolName, resolvedArgs, storeId, traceId);
|
|
161
|
+
if (config.tool_id)
|
|
162
|
+
await updateToolCircuitBreaker(supabase, config.tool_id, result.success, result.error);
|
|
163
|
+
return result.success
|
|
164
|
+
? { success: true, output: result.data }
|
|
165
|
+
: { success: false, error: result.error };
|
|
166
|
+
}
|
|
167
|
+
function executeConditionStep(config, ctx) {
|
|
168
|
+
const expression = config.expression;
|
|
169
|
+
if (!expression)
|
|
170
|
+
return { success: false, error: "No expression in condition step" };
|
|
171
|
+
if (!config.on_true && !config.on_false) {
|
|
172
|
+
return { success: false, output: { error: "Condition step must have at least on_true or on_false defined" } };
|
|
173
|
+
}
|
|
174
|
+
const result = evaluateCondition(expression, ctx);
|
|
175
|
+
const branch = result ? (config.on_true || undefined) : (config.on_false || undefined);
|
|
176
|
+
return { success: true, output: { condition_result: result, branch }, branch };
|
|
177
|
+
}
|
|
178
|
+
function executeTransformStep(config, ctx) {
|
|
179
|
+
const mapping = config.mapping;
|
|
180
|
+
if (!mapping)
|
|
181
|
+
return { success: false, error: "No mapping in transform step" };
|
|
182
|
+
return { success: true, output: resolveTemplate(mapping, ctx) };
|
|
183
|
+
}
|
|
184
|
+
async function executeAgentStep(config, ctx, storeId, supabase, step, traceId) {
|
|
185
|
+
if (!_runAgentQuery)
|
|
186
|
+
return { success: false, error: "Agent executor not initialized" };
|
|
187
|
+
const agentId = config.agent_id;
|
|
188
|
+
if (!agentId)
|
|
189
|
+
return { success: false, error: "No agent_id in agent step config" };
|
|
190
|
+
const promptTemplate = (config.prompt_template || config.prompt || "");
|
|
191
|
+
const prompt = resolveTemplate(promptTemplate, ctx);
|
|
192
|
+
if (!prompt)
|
|
193
|
+
return { success: false, error: "No prompt resolved for agent step" };
|
|
194
|
+
// AI tool gating — inject allowed/blocked tool lists into prompt
|
|
195
|
+
const allowedTools = config.allowed_tools;
|
|
196
|
+
const blockedTools = config.blocked_tools;
|
|
197
|
+
const requireApprovalTools = config.require_approval_tools;
|
|
198
|
+
let gatedPrompt = prompt;
|
|
199
|
+
if (allowedTools?.length) {
|
|
200
|
+
gatedPrompt += `\n\n[SYSTEM: You may ONLY use these tools: ${allowedTools.join(", ")}. Refuse any other tool calls.]`;
|
|
201
|
+
}
|
|
202
|
+
if (blockedTools?.length) {
|
|
203
|
+
gatedPrompt += `\n\n[SYSTEM: You must NEVER use these tools: ${blockedTools.join(", ")}. Use alternatives instead.]`;
|
|
204
|
+
}
|
|
205
|
+
if (requireApprovalTools?.length && step) {
|
|
206
|
+
// Check if approval was already given (stored in step input from approval step)
|
|
207
|
+
const approvedTools = step.input?.approved_tools;
|
|
208
|
+
const pendingTools = requireApprovalTools.filter(t => !approvedTools?.includes(t));
|
|
209
|
+
if (pendingTools.length > 0) {
|
|
210
|
+
gatedPrompt += `\n\n[SYSTEM: The following tools require human approval before use: ${pendingTools.join(", ")}. Do NOT call them — describe what you would do and why, then stop.]`;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
const maxTurns = config.max_turns || 5;
|
|
214
|
+
const useBatch = config.use_batch === true;
|
|
215
|
+
// Batch mode: single-turn LLM call via Batch API for ~50% cost savings.
|
|
216
|
+
// Only valid when no tool loop is needed (the batch API doesn't support agentic tool loops).
|
|
217
|
+
if (useBatch && maxTurns <= 1) {
|
|
218
|
+
const model = config.model || "claude-sonnet-4-6";
|
|
219
|
+
const provider = getProvider(model);
|
|
220
|
+
const batchProvider = (provider === "openai") ? "openai" : "anthropic";
|
|
221
|
+
const requestId = `wf_agent_${randomUUID().replace(/-/g, "").slice(0, 12)}`;
|
|
222
|
+
try {
|
|
223
|
+
const batchResult = await batchClient.processSingle(requestId, batchProvider, model, [{ role: "user", content: gatedPrompt }], { max_tokens: config.max_tokens || 4096, temperature: config.temperature });
|
|
224
|
+
return batchResult.success
|
|
225
|
+
? { success: true, output: { response: batchResult.text || "", usage: batchResult.usage, batch: true } }
|
|
226
|
+
: { success: false, error: batchResult.error || "Batch agent request failed" };
|
|
227
|
+
}
|
|
228
|
+
catch (err) {
|
|
229
|
+
return { success: false, error: sanitizeError(err) };
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
// Wire up token broadcasting for SSE streaming to connected clients
|
|
233
|
+
const onToken = step && _broadcastToken
|
|
234
|
+
? (token) => _broadcastToken(step.run_id, step.step_key, token)
|
|
235
|
+
: undefined;
|
|
236
|
+
const result = await _runAgentQuery(supabase, agentId, gatedPrompt, storeId, maxTurns, onToken, traceId);
|
|
237
|
+
return result.success
|
|
238
|
+
? { success: true, output: { response: result.response } }
|
|
239
|
+
: { success: false, error: result.error };
|
|
240
|
+
}
|
|
241
|
+
// P1 FIX: Use shared SSRF guard module (enhanced with DNS resolve-then-check, IPv6-mapped, CGNAT)
|
|
242
|
+
import { validateUrl } from "../lib/ssrf-guard.js";
|
|
243
|
+
async function executeWebhookOutStep(config, ctx) {
|
|
244
|
+
const url = resolveTemplate(config.url, ctx);
|
|
245
|
+
if (!url)
|
|
246
|
+
return { success: false, error: "No URL in webhook_out step" };
|
|
247
|
+
// P0 FIX: Use async validateUrl (DNS resolve-then-check) instead of sync isBlockedUrl
|
|
248
|
+
const ssrfError = await validateUrl(url);
|
|
249
|
+
if (ssrfError)
|
|
250
|
+
return { success: false, error: `Blocked: ${ssrfError}` };
|
|
251
|
+
const method = (config.method || "POST").toUpperCase();
|
|
252
|
+
const headers = {};
|
|
253
|
+
if (config.headers && typeof config.headers === "object") {
|
|
254
|
+
for (const [k, v] of Object.entries(config.headers)) {
|
|
255
|
+
headers[k] = resolveTemplate(v, ctx);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
let body;
|
|
259
|
+
if (method !== "GET" && method !== "HEAD") {
|
|
260
|
+
const bodyTemplate = config.body_template || {};
|
|
261
|
+
body = JSON.stringify(resolveTemplate(bodyTemplate, ctx));
|
|
262
|
+
if (!headers["Content-Type"])
|
|
263
|
+
headers["Content-Type"] = "application/json";
|
|
264
|
+
}
|
|
265
|
+
if (config.hmac_secret && body) {
|
|
266
|
+
const hmac = createHmac("sha256", config.hmac_secret).update(body).digest("hex");
|
|
267
|
+
headers["X-Webhook-Signature"] = `sha256=${hmac}`;
|
|
268
|
+
}
|
|
269
|
+
try {
|
|
270
|
+
const controller = new AbortController();
|
|
271
|
+
const timer = setTimeout(() => controller.abort(), 30_000);
|
|
272
|
+
const resp = await fetch(url, { method, headers, body, signal: controller.signal });
|
|
273
|
+
clearTimeout(timer);
|
|
274
|
+
const ct = resp.headers.get("content-type") || "";
|
|
275
|
+
const data = ct.includes("json") ? await resp.json() : await resp.text();
|
|
276
|
+
if (!resp.ok)
|
|
277
|
+
return { success: false, error: `HTTP ${resp.status}: ${String(data).substring(0, 500)}` };
|
|
278
|
+
return { success: true, output: { status: resp.status, data } };
|
|
279
|
+
}
|
|
280
|
+
catch (err) {
|
|
281
|
+
if (err.name === "AbortError")
|
|
282
|
+
return { success: false, error: "Webhook request timed out" };
|
|
283
|
+
return { success: false, error: sanitizeError(err) };
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
function executeNoopStep() {
|
|
287
|
+
return { success: true, output: { noop: true } };
|
|
288
|
+
}
|
|
289
|
+
// ============================================================================
|
|
290
|
+
// BATCH LLM STEP — non-streaming LLM via Batch API (~50% cost savings)
|
|
291
|
+
// ============================================================================
|
|
292
|
+
/**
|
|
293
|
+
* Execute an LLM request via the Batch API instead of streaming.
|
|
294
|
+
* Designed for workflow steps that don't need real-time token streaming.
|
|
295
|
+
*
|
|
296
|
+
* Step config:
|
|
297
|
+
* model: string — model ID (e.g. "claude-sonnet-4-6", "gpt-5-mini")
|
|
298
|
+
* prompt: string — prompt template (resolved with ctx)
|
|
299
|
+
* system: string — optional system prompt template
|
|
300
|
+
* max_tokens: number — optional, default 4096
|
|
301
|
+
* temperature: number — optional
|
|
302
|
+
* tools: array — optional tool definitions for the LLM
|
|
303
|
+
*/
|
|
304
|
+
async function executeLlmBatchStep(config, ctx) {
|
|
305
|
+
const model = config.model;
|
|
306
|
+
if (!model)
|
|
307
|
+
return { success: false, error: "No model in llm_batch step config" };
|
|
308
|
+
const promptTemplate = (config.prompt_template || config.prompt || "");
|
|
309
|
+
const prompt = resolveTemplate(promptTemplate, ctx);
|
|
310
|
+
if (!prompt)
|
|
311
|
+
return { success: false, error: "No prompt resolved for llm_batch step" };
|
|
312
|
+
const systemTemplate = config.system;
|
|
313
|
+
const system = systemTemplate ? resolveTemplate(systemTemplate, ctx) : undefined;
|
|
314
|
+
const maxTokens = config.max_tokens || 4096;
|
|
315
|
+
const temperature = config.temperature;
|
|
316
|
+
const tools = config.tools;
|
|
317
|
+
// Determine provider from model ID
|
|
318
|
+
const provider = getProvider(model);
|
|
319
|
+
const batchProvider = (provider === "openai") ? "openai" : "anthropic";
|
|
320
|
+
const requestId = `wf_${randomUUID().replace(/-/g, "").slice(0, 16)}`;
|
|
321
|
+
try {
|
|
322
|
+
const result = await batchClient.processSingle(requestId, batchProvider, model, [{ role: "user", content: prompt }], { system, tools, max_tokens: maxTokens, temperature });
|
|
323
|
+
if (result.success) {
|
|
324
|
+
return {
|
|
325
|
+
success: true,
|
|
326
|
+
output: {
|
|
327
|
+
response: result.text || "",
|
|
328
|
+
content: result.content,
|
|
329
|
+
usage: result.usage,
|
|
330
|
+
batch_request_id: requestId,
|
|
331
|
+
},
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
else {
|
|
335
|
+
return { success: false, error: result.error || "Batch LLM request failed" };
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
catch (err) {
|
|
339
|
+
return { success: false, error: sanitizeError(err) };
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
// ============================================================================
|
|
343
|
+
// PHASE 2: APPROVAL STEP EXECUTOR
|
|
344
|
+
// ============================================================================
|
|
345
|
+
async function executeApprovalStep(supabase, step, ctx) {
|
|
346
|
+
const config = step.step_config;
|
|
347
|
+
// Second pass — step was resumed after approval response
|
|
348
|
+
if (step.input && typeof step.input === "object" && step.input.approval_status) {
|
|
349
|
+
const approvalData = step.input;
|
|
350
|
+
const isApproved = approvalData.approval_status === "approved" || approvalData.approval_status === "approve";
|
|
351
|
+
return {
|
|
352
|
+
success: true,
|
|
353
|
+
output: {
|
|
354
|
+
approved: isApproved,
|
|
355
|
+
status: approvalData.approval_status,
|
|
356
|
+
response_data: approvalData.approval_data,
|
|
357
|
+
responded_by: approvalData.responded_by,
|
|
358
|
+
},
|
|
359
|
+
branch: isApproved ? config.on_approve : config.on_reject,
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
// First pass — create approval request and wait
|
|
363
|
+
const title = resolveTemplate((config.title || "Approval Required"), ctx);
|
|
364
|
+
const description = config.description ? resolveTemplate(config.description, ctx) : null;
|
|
365
|
+
const prompt = config.prompt ? resolveTemplate(config.prompt, ctx) : null;
|
|
366
|
+
const options = config.options || ["approve", "reject"];
|
|
367
|
+
const timeoutSeconds = config.timeout_seconds || 86400;
|
|
368
|
+
const timeoutAction = config.timeout_action || "fail";
|
|
369
|
+
const channels = config.notification_channels || ["push"];
|
|
370
|
+
const expiresAt = new Date(Date.now() + timeoutSeconds * 1000).toISOString();
|
|
371
|
+
await supabase.from("workflow_approval_requests").insert({
|
|
372
|
+
store_id: step.store_id,
|
|
373
|
+
run_id: step.run_id,
|
|
374
|
+
step_run_id: step.step_run_id,
|
|
375
|
+
workflow_id: step.workflow_id,
|
|
376
|
+
title,
|
|
377
|
+
description,
|
|
378
|
+
prompt,
|
|
379
|
+
options,
|
|
380
|
+
form_schema: config.form_schema || null,
|
|
381
|
+
assigned_to: config.assigned_to || null,
|
|
382
|
+
assigned_role: config.assigned_role || null,
|
|
383
|
+
expires_at: expiresAt,
|
|
384
|
+
timeout_action: timeoutAction,
|
|
385
|
+
notification_channels: channels,
|
|
386
|
+
});
|
|
387
|
+
// Generate guest approval URLs (signed, no auth required) — only if signing secret is configured
|
|
388
|
+
const guestUrls = {};
|
|
389
|
+
const optionsList = Array.isArray(options) ? options : ["approve", "reject"];
|
|
390
|
+
for (const opt of optionsList) {
|
|
391
|
+
const url = generateGuestApprovalUrl(step.step_run_id, opt, expiresAt);
|
|
392
|
+
if (url)
|
|
393
|
+
guestUrls[opt] = url;
|
|
394
|
+
}
|
|
395
|
+
// Set step to waiting
|
|
396
|
+
await supabase.from("workflow_step_runs").update({
|
|
397
|
+
status: "waiting",
|
|
398
|
+
output: { waiting_for: "approval", title, expires_at: expiresAt, guest_urls: guestUrls },
|
|
399
|
+
}).eq("id", step.step_run_id);
|
|
400
|
+
return "waiting";
|
|
401
|
+
}
|
|
402
|
+
// ============================================================================
|
|
403
|
+
// PHASE 7: ENHANCED CODE EXECUTION
|
|
404
|
+
// ============================================================================
|
|
405
|
+
// Re-export pool management for index.ts to initialize on startup
|
|
406
|
+
export { initWorkerPool, getPoolStats, shutdownPool };
|
|
407
|
+
// ============================================================================
|
|
408
|
+
// CRON EXPRESSION PARSER — 5-field (min hour dom mon dow)
|
|
409
|
+
// No external dependencies. Supports: *, */N, N-M, N,M, N
|
|
410
|
+
// ============================================================================
|
|
411
|
+
function parseCronField(field, min, max) {
|
|
412
|
+
const values = new Set();
|
|
413
|
+
for (const part of field.split(",")) {
|
|
414
|
+
const trimmed = part.trim();
|
|
415
|
+
if (trimmed === "*") {
|
|
416
|
+
for (let i = min; i <= max; i++)
|
|
417
|
+
values.add(i);
|
|
418
|
+
}
|
|
419
|
+
else if (trimmed.includes("/")) {
|
|
420
|
+
const [range, stepStr] = trimmed.split("/");
|
|
421
|
+
const step = parseInt(stepStr, 10);
|
|
422
|
+
if (isNaN(step) || step <= 0)
|
|
423
|
+
continue;
|
|
424
|
+
let start = min, end = max;
|
|
425
|
+
if (range !== "*") {
|
|
426
|
+
if (range.includes("-")) {
|
|
427
|
+
[start, end] = range.split("-").map(Number);
|
|
428
|
+
}
|
|
429
|
+
else {
|
|
430
|
+
start = parseInt(range, 10);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
for (let i = start; i <= end; i += step)
|
|
434
|
+
values.add(i);
|
|
435
|
+
}
|
|
436
|
+
else if (trimmed.includes("-")) {
|
|
437
|
+
const [s, e] = trimmed.split("-").map(Number);
|
|
438
|
+
for (let i = s; i <= e; i++)
|
|
439
|
+
values.add(i);
|
|
440
|
+
}
|
|
441
|
+
else {
|
|
442
|
+
const n = parseInt(trimmed, 10);
|
|
443
|
+
if (!isNaN(n) && n >= min && n <= max)
|
|
444
|
+
values.add(n);
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
return [...values].sort((a, b) => a - b);
|
|
448
|
+
}
|
|
449
|
+
/**
|
|
450
|
+
* Compute the next occurrence of a 5-field cron expression after `after`.
|
|
451
|
+
* Returns null if expression is invalid or no match found within 366 days.
|
|
452
|
+
*/
|
|
453
|
+
/**
|
|
454
|
+
* Get the UTC offset in minutes for an IANA timezone at a specific instant.
|
|
455
|
+
* Uses Intl.DateTimeFormat.formatToParts to extract wall-clock components,
|
|
456
|
+
* then diffs against the UTC components of the same instant. No string parsing.
|
|
457
|
+
*/
|
|
458
|
+
function getUtcOffsetMinutes(date, tz) {
|
|
459
|
+
// Extract wall-clock parts in the target timezone
|
|
460
|
+
const fmt = new Intl.DateTimeFormat("en-US", {
|
|
461
|
+
timeZone: tz, year: "numeric", month: "numeric", day: "numeric",
|
|
462
|
+
hour: "numeric", minute: "numeric", second: "numeric", hour12: false,
|
|
463
|
+
});
|
|
464
|
+
const p = Object.fromEntries(fmt.formatToParts(date).map(x => [x.type, parseInt(x.value, 10)]));
|
|
465
|
+
// Build a pseudo-UTC timestamp from the wall-clock parts
|
|
466
|
+
const wallMs = Date.UTC(p.year, p.month - 1, p.day, p.hour % 24, p.minute, p.second);
|
|
467
|
+
// The offset is how far ahead the wall clock is from the actual UTC instant
|
|
468
|
+
return (wallMs - date.getTime()) / 60_000;
|
|
469
|
+
}
|
|
470
|
+
export function getNextCronTime(expression, after = new Date(), timezone) {
|
|
471
|
+
const parts = expression.trim().split(/\s+/);
|
|
472
|
+
if (parts.length !== 5)
|
|
473
|
+
return null;
|
|
474
|
+
const minutes = parseCronField(parts[0], 0, 59);
|
|
475
|
+
const hours = parseCronField(parts[1], 0, 23);
|
|
476
|
+
const doms = parseCronField(parts[2], 1, 31);
|
|
477
|
+
const months = parseCronField(parts[3], 1, 12);
|
|
478
|
+
const dows = parseCronField(parts[4], 0, 6); // 0=Sunday
|
|
479
|
+
if (!minutes.length || !hours.length || !doms.length || !months.length || !dows.length)
|
|
480
|
+
return null;
|
|
481
|
+
// DOM/DOW: POSIX semantics — when both are restricted, match EITHER (OR).
|
|
482
|
+
// When only one is restricted, match only that one.
|
|
483
|
+
const domRestricted = parts[2] !== "*";
|
|
484
|
+
const dowRestricted = parts[4] !== "*";
|
|
485
|
+
const useDomDowOr = domRestricted && dowRestricted;
|
|
486
|
+
// Validate timezone
|
|
487
|
+
let effectiveTz = "UTC";
|
|
488
|
+
if (timezone) {
|
|
489
|
+
try {
|
|
490
|
+
Intl.DateTimeFormat(undefined, { timeZone: timezone });
|
|
491
|
+
effectiveTz = timezone;
|
|
492
|
+
}
|
|
493
|
+
catch { /* invalid tz, stay UTC */ }
|
|
494
|
+
}
|
|
495
|
+
// Strategy: work in "local time" coordinates using a fake Date whose UTC fields
|
|
496
|
+
// represent the wall-clock time in the target timezone. This lets us use the fast
|
|
497
|
+
// jump-by-month/day/hour logic. Once we find a match, we convert back to real UTC.
|
|
498
|
+
//
|
|
499
|
+
// The offset is recomputed each time we cross a day boundary to handle DST transitions.
|
|
500
|
+
let offsetMin = effectiveTz === "UTC" ? 0 : getUtcOffsetMinutes(after, effectiveTz);
|
|
501
|
+
const localEpoch = after.getTime() + offsetMin * 60_000;
|
|
502
|
+
const candidate = new Date(localEpoch);
|
|
503
|
+
candidate.setUTCSeconds(0, 0);
|
|
504
|
+
candidate.setUTCMinutes(candidate.getUTCMinutes() + 1); // 1 minute after `after`
|
|
505
|
+
const maxMs = after.getTime() + 366 * 86_400_000;
|
|
506
|
+
let lastOffsetDay = candidate.getUTCDate(); // Track day for offset recomputation
|
|
507
|
+
// Iterate in local-time coordinates (fast jumps — same algorithm as before)
|
|
508
|
+
while (true) {
|
|
509
|
+
// Recompute offset on day boundaries to handle DST transitions correctly
|
|
510
|
+
const currentDay = candidate.getUTCDate();
|
|
511
|
+
if (effectiveTz !== "UTC" && currentDay !== lastOffsetDay) {
|
|
512
|
+
// Convert current candidate back to approximate UTC, then get fresh offset
|
|
513
|
+
const approxUtc = new Date(candidate.getTime() - offsetMin * 60_000);
|
|
514
|
+
const newOffset = getUtcOffsetMinutes(approxUtc, effectiveTz);
|
|
515
|
+
if (newOffset !== offsetMin) {
|
|
516
|
+
// DST changed — adjust candidate to maintain correct local-time coordinates
|
|
517
|
+
const drift = (newOffset - offsetMin) * 60_000;
|
|
518
|
+
candidate.setTime(candidate.getTime() + drift);
|
|
519
|
+
offsetMin = newOffset;
|
|
520
|
+
}
|
|
521
|
+
lastOffsetDay = candidate.getUTCDate();
|
|
522
|
+
}
|
|
523
|
+
// Safety: check if we've exceeded 366 days
|
|
524
|
+
const realUtc = candidate.getTime() - offsetMin * 60_000;
|
|
525
|
+
if (realUtc > maxMs)
|
|
526
|
+
return null;
|
|
527
|
+
const mo = candidate.getUTCMonth() + 1;
|
|
528
|
+
const day = candidate.getUTCDate();
|
|
529
|
+
const hr = candidate.getUTCHours();
|
|
530
|
+
const mi = candidate.getUTCMinutes();
|
|
531
|
+
const dow = candidate.getUTCDay();
|
|
532
|
+
if (!months.includes(mo)) {
|
|
533
|
+
candidate.setUTCMonth(candidate.getUTCMonth() + 1, 1);
|
|
534
|
+
candidate.setUTCHours(0, 0, 0, 0);
|
|
535
|
+
continue;
|
|
536
|
+
}
|
|
537
|
+
const domMatch = doms.includes(day);
|
|
538
|
+
const dowMatch = dows.includes(dow);
|
|
539
|
+
const dayMatch = useDomDowOr ? (domMatch || dowMatch) : (domMatch && dowMatch);
|
|
540
|
+
if (!dayMatch) {
|
|
541
|
+
candidate.setUTCDate(candidate.getUTCDate() + 1);
|
|
542
|
+
candidate.setUTCHours(0, 0, 0, 0);
|
|
543
|
+
continue;
|
|
544
|
+
}
|
|
545
|
+
if (!hours.includes(hr)) {
|
|
546
|
+
candidate.setUTCHours(candidate.getUTCHours() + 1, 0, 0, 0);
|
|
547
|
+
continue;
|
|
548
|
+
}
|
|
549
|
+
if (!minutes.includes(mi)) {
|
|
550
|
+
candidate.setUTCMinutes(candidate.getUTCMinutes() + 1, 0, 0);
|
|
551
|
+
continue;
|
|
552
|
+
}
|
|
553
|
+
// Match found in local coordinates — convert back to real UTC.
|
|
554
|
+
if (effectiveTz === "UTC")
|
|
555
|
+
return candidate;
|
|
556
|
+
const result = new Date(candidate.getTime() - offsetMin * 60_000);
|
|
557
|
+
// Verify: the result in UTC should map back to the same local time we matched.
|
|
558
|
+
// DST spring-forward (gap): 2:00-3:00 doesn't exist → verifyOffset differs → we skip.
|
|
559
|
+
// DST fall-back (ambiguity): 1:00-2:00 exists twice → first occurrence wins (same as cronie).
|
|
560
|
+
const verifyOffset = getUtcOffsetMinutes(result, effectiveTz);
|
|
561
|
+
if (verifyOffset !== offsetMin) {
|
|
562
|
+
candidate.setUTCMinutes(candidate.getUTCMinutes() + 1, 0, 0);
|
|
563
|
+
continue;
|
|
564
|
+
}
|
|
565
|
+
return result;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
// ============================================================================
|
|
569
|
+
// SCHEDULE TRIGGER PROCESSING — fires due cron workflows
|
|
570
|
+
// ============================================================================
|
|
571
|
+
export async function processScheduleTriggers(supabase) {
|
|
572
|
+
// Find workflows that are past due — supports both cron (recurring) and one-time (run_at)
|
|
573
|
+
const { data: dueWorkflows } = await supabase.from("workflows")
|
|
574
|
+
.select("id, store_id, cron_expression, timezone")
|
|
575
|
+
.not("next_run_at", "is", null)
|
|
576
|
+
.lte("next_run_at", new Date().toISOString())
|
|
577
|
+
.eq("is_active", true)
|
|
578
|
+
.eq("status", "active")
|
|
579
|
+
.limit(10);
|
|
580
|
+
if (!dueWorkflows?.length)
|
|
581
|
+
return 0;
|
|
582
|
+
let fired = 0;
|
|
583
|
+
for (const wf of dueWorkflows) {
|
|
584
|
+
try {
|
|
585
|
+
const isOneTime = !wf.cron_expression;
|
|
586
|
+
// Start the run
|
|
587
|
+
const { data: result, error } = await supabase.rpc("start_workflow_run", {
|
|
588
|
+
p_workflow_id: wf.id,
|
|
589
|
+
p_store_id: wf.store_id,
|
|
590
|
+
p_trigger_type: "schedule",
|
|
591
|
+
p_trigger_payload: {
|
|
592
|
+
cron: wf.cron_expression || null,
|
|
593
|
+
one_time: isOneTime,
|
|
594
|
+
scheduled_at: new Date().toISOString(),
|
|
595
|
+
},
|
|
596
|
+
p_idempotency_key: wf.cron_expression
|
|
597
|
+
? `schedule:${wf.id}:${new Date().toISOString().slice(0, 16)}` // Minute-granularity dedup for cron
|
|
598
|
+
: `schedule:${wf.id}:one_time`,
|
|
599
|
+
});
|
|
600
|
+
if (error || !result?.success) {
|
|
601
|
+
log.error({ workflowId: wf.id, err: error?.message || result?.error }, "failed to start scheduled workflow");
|
|
602
|
+
// Still update next_run_at to prevent infinite retries
|
|
603
|
+
}
|
|
604
|
+
else {
|
|
605
|
+
fired++;
|
|
606
|
+
// Generate trace_id for the new run
|
|
607
|
+
const traceId = randomUUID();
|
|
608
|
+
await supabase.from("workflow_runs").update({ trace_id: traceId }).eq("id", result.run_id);
|
|
609
|
+
// Inline execution
|
|
610
|
+
try {
|
|
611
|
+
await executeInlineChain(supabase, result.run_id);
|
|
612
|
+
}
|
|
613
|
+
catch (err) {
|
|
614
|
+
log.error({ runId: result.run_id, err: err.message }, "inline chain failed for scheduled run");
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
if (isOneTime) {
|
|
618
|
+
// One-time schedule: clear next_run_at and deactivate
|
|
619
|
+
await supabase.from("workflows").update({
|
|
620
|
+
last_scheduled_at: new Date().toISOString(),
|
|
621
|
+
next_run_at: null,
|
|
622
|
+
is_active: false,
|
|
623
|
+
status: "paused",
|
|
624
|
+
}).eq("id", wf.id);
|
|
625
|
+
}
|
|
626
|
+
else {
|
|
627
|
+
// Recurring cron: compute next run time
|
|
628
|
+
const nextRun = getNextCronTime(wf.cron_expression, new Date(), wf.timezone || undefined);
|
|
629
|
+
await supabase.from("workflows").update({
|
|
630
|
+
last_scheduled_at: new Date().toISOString(),
|
|
631
|
+
next_run_at: nextRun?.toISOString() || null,
|
|
632
|
+
}).eq("id", wf.id);
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
catch (err) {
|
|
636
|
+
log.error({ workflowId: wf.id, err: sanitizeError(err) }, "error processing scheduled workflow");
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
return fired;
|
|
640
|
+
}
|
|
641
|
+
// ============================================================================
|
|
642
|
+
// WORKFLOW TIMEOUT ENFORCEMENT — cancel overtime runs
|
|
643
|
+
// ============================================================================
|
|
644
|
+
const DEFAULT_MAX_RUN_DURATION_SEC = 3600; // 1 hour hard ceiling for any workflow run
|
|
645
|
+
export async function enforceWorkflowTimeouts(supabase) {
|
|
646
|
+
// Find running workflows that exceeded their duration limit
|
|
647
|
+
const { data: timedOut } = await supabase.from("workflow_runs")
|
|
648
|
+
.select("id, workflow_id, store_id, started_at, workflows!inner(max_run_duration_seconds, name)")
|
|
649
|
+
.eq("status", "running")
|
|
650
|
+
.not("started_at", "is", null)
|
|
651
|
+
.limit(50);
|
|
652
|
+
if (!timedOut?.length)
|
|
653
|
+
return 0;
|
|
654
|
+
let count = 0;
|
|
655
|
+
const now = Date.now();
|
|
656
|
+
for (const run of timedOut) {
|
|
657
|
+
const wf = run.workflows;
|
|
658
|
+
// Use configured max_duration or fall back to the hard ceiling
|
|
659
|
+
const maxDuration = (wf?.max_run_duration_seconds && wf.max_run_duration_seconds > 0)
|
|
660
|
+
? wf.max_run_duration_seconds
|
|
661
|
+
: DEFAULT_MAX_RUN_DURATION_SEC;
|
|
662
|
+
const elapsed = now - new Date(run.started_at).getTime();
|
|
663
|
+
if (elapsed < maxDuration * 1000)
|
|
664
|
+
continue;
|
|
665
|
+
// This run has timed out
|
|
666
|
+
await completeWorkflowRun(supabase, run.id, run.workflow_id, run.store_id, "timed_out", `Workflow exceeded max duration of ${maxDuration}s (ran for ${Math.round(elapsed / 1000)}s)`);
|
|
667
|
+
// Archive to DLQ
|
|
668
|
+
await archiveToDlq(supabase, run.id, run.workflow_id, run.store_id, wf?.name);
|
|
669
|
+
count++;
|
|
670
|
+
log.warn({ runId: run.id, elapsedSec: Math.round(elapsed / 1000), maxDuration }, "workflow run timed out");
|
|
671
|
+
}
|
|
672
|
+
// Detect zombie runs: "running" with no active steps (all steps are terminal)
|
|
673
|
+
const { data: zombieRuns } = await supabase.from("workflow_runs")
|
|
674
|
+
.select("id, workflow_id, store_id, started_at, workflows!inner(name)")
|
|
675
|
+
.eq("status", "running")
|
|
676
|
+
.not("started_at", "is", null)
|
|
677
|
+
.limit(50);
|
|
678
|
+
if (zombieRuns?.length) {
|
|
679
|
+
for (const run of zombieRuns) {
|
|
680
|
+
const elapsed = now - new Date(run.started_at).getTime();
|
|
681
|
+
if (elapsed < 120_000)
|
|
682
|
+
continue; // Only check runs older than 2 min
|
|
683
|
+
const { data: activeSteps } = await supabase.from("workflow_step_runs")
|
|
684
|
+
.select("id").eq("run_id", run.id)
|
|
685
|
+
.in("status", ["pending", "running", "retrying", "waiting"]).limit(1);
|
|
686
|
+
if (!activeSteps?.length) {
|
|
687
|
+
// No active steps but run is still "running" — finalize it
|
|
688
|
+
await checkWorkflowCompletion(supabase, run.id, run.workflow_id);
|
|
689
|
+
count++;
|
|
690
|
+
log.warn({ runId: run.id, elapsedSec: Math.round(elapsed / 1000) }, "finalized zombie workflow run");
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
return count;
|
|
695
|
+
}
|
|
696
|
+
// ============================================================================
|
|
697
|
+
// ORPHANED STEP CLEANUP — cancel step_runs whose parent run is terminal
|
|
698
|
+
// ============================================================================
|
|
699
|
+
let lastOrphanCleanupAt = 0;
|
|
700
|
+
const ORPHAN_CLEANUP_INTERVAL_MS = 60_000; // Run at most once per minute
|
|
701
|
+
export async function cleanupOrphanedSteps(supabase) {
|
|
702
|
+
if (Date.now() - lastOrphanCleanupAt < ORPHAN_CLEANUP_INTERVAL_MS)
|
|
703
|
+
return 0;
|
|
704
|
+
lastOrphanCleanupAt = Date.now();
|
|
705
|
+
// Find step_runs in non-terminal status whose parent run IS terminal.
|
|
706
|
+
// The !inner join + .in filter on the JOINED table ensures Postgres only returns
|
|
707
|
+
// rows where the run status is already terminal — no in-app filtering needed.
|
|
708
|
+
const { data: orphans } = await supabase.from("workflow_step_runs")
|
|
709
|
+
.select("id, run_id, step_key, status, workflow_runs!workflow_step_runs_run_id_fkey!inner(status)")
|
|
710
|
+
.in("status", ["pending", "retrying", "waiting"])
|
|
711
|
+
.in("workflow_runs.status", ["success", "failed", "cancelled", "timed_out"])
|
|
712
|
+
.limit(100);
|
|
713
|
+
if (!orphans?.length)
|
|
714
|
+
return 0;
|
|
715
|
+
let cleaned = 0;
|
|
716
|
+
for (const step of orphans) {
|
|
717
|
+
const runStatus = step.workflow_runs?.status;
|
|
718
|
+
await supabase.from("workflow_step_runs").update({
|
|
719
|
+
status: "cancelled",
|
|
720
|
+
error_message: `Orphaned: parent run already ${runStatus}`,
|
|
721
|
+
completed_at: new Date().toISOString(),
|
|
722
|
+
}).eq("id", step.id);
|
|
723
|
+
cleaned++;
|
|
724
|
+
}
|
|
725
|
+
if (cleaned > 0) {
|
|
726
|
+
log.info({ cleaned }, "cleaned up orphaned step runs");
|
|
727
|
+
}
|
|
728
|
+
return cleaned;
|
|
729
|
+
}
|
|
730
|
+
// ============================================================================
|
|
731
|
+
// DLQ RETRY MECHANISM — retry transient DLQ failures
|
|
732
|
+
// ============================================================================
|
|
733
|
+
let lastDlqRetryAt = 0;
|
|
734
|
+
const DLQ_RETRY_INTERVAL_MS = 60_000; // Run at most once per minute
|
|
735
|
+
const DLQ_RETRY_BATCH_SIZE = 5;
|
|
736
|
+
const DLQ_MAX_RETRY_ATTEMPTS = 3;
|
|
737
|
+
export async function processDlqRetries(supabase) {
|
|
738
|
+
// Throttle: only run once per minute
|
|
739
|
+
if (Date.now() - lastDlqRetryAt < DLQ_RETRY_INTERVAL_MS)
|
|
740
|
+
return 0;
|
|
741
|
+
lastDlqRetryAt = Date.now();
|
|
742
|
+
// Fetch retryable DLQ entries: transient errors (timeout, network), not yet exhausted
|
|
743
|
+
const { data: entries } = await supabase.from("workflow_dlq")
|
|
744
|
+
.select("id, workflow_id, store_id, trigger_type, trigger_payload, error_message, retry_count, last_retry_at")
|
|
745
|
+
.or("error_message.ilike.%timed out%,error_message.ilike.%timeout%,error_message.ilike.%network%,error_message.ilike.%ECONNREFUSED%,error_message.ilike.%fetch failed%")
|
|
746
|
+
.eq("status", "pending")
|
|
747
|
+
.lt("retry_count", DLQ_MAX_RETRY_ATTEMPTS)
|
|
748
|
+
.order("created_at", { ascending: true })
|
|
749
|
+
.limit(DLQ_RETRY_BATCH_SIZE);
|
|
750
|
+
if (!entries?.length)
|
|
751
|
+
return 0;
|
|
752
|
+
let retried = 0;
|
|
753
|
+
const now = Date.now();
|
|
754
|
+
for (const entry of entries) {
|
|
755
|
+
try {
|
|
756
|
+
// Enforce per-entry exponential backoff: 2min, 4min, 8min
|
|
757
|
+
if (entry.last_retry_at) {
|
|
758
|
+
const backoffMs = Math.pow(2, entry.retry_count || 0) * 120_000;
|
|
759
|
+
const elapsed = now - new Date(entry.last_retry_at).getTime();
|
|
760
|
+
if (elapsed < backoffMs)
|
|
761
|
+
continue; // Too soon for this entry
|
|
762
|
+
}
|
|
763
|
+
const { data: result, error } = await supabase.rpc("start_workflow_run", {
|
|
764
|
+
p_workflow_id: entry.workflow_id,
|
|
765
|
+
p_store_id: entry.store_id,
|
|
766
|
+
p_trigger_type: entry.trigger_type || "dlq_retry",
|
|
767
|
+
p_trigger_payload: { ...(entry.trigger_payload || {}), _dlq_retry: true, _dlq_entry_id: entry.id },
|
|
768
|
+
p_idempotency_key: `dlq_retry:${entry.id}:${(entry.retry_count || 0) + 1}`,
|
|
769
|
+
});
|
|
770
|
+
if (error || !result?.success) {
|
|
771
|
+
// Update retry count but keep in DLQ
|
|
772
|
+
await supabase.from("workflow_dlq").update({
|
|
773
|
+
retry_count: (entry.retry_count || 0) + 1,
|
|
774
|
+
last_error: error?.message || result?.error || "retry failed",
|
|
775
|
+
last_retry_at: new Date().toISOString(),
|
|
776
|
+
}).eq("id", entry.id);
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
779
|
+
// Success — mark DLQ entry as retried
|
|
780
|
+
await supabase.from("workflow_dlq").update({
|
|
781
|
+
status: "retried",
|
|
782
|
+
retry_count: (entry.retry_count || 0) + 1,
|
|
783
|
+
retried_run_id: result.run_id,
|
|
784
|
+
last_retry_at: new Date().toISOString(),
|
|
785
|
+
}).eq("id", entry.id);
|
|
786
|
+
// Execute inline
|
|
787
|
+
if (result.run_id) {
|
|
788
|
+
const traceId = randomUUID();
|
|
789
|
+
await supabase.from("workflow_runs").update({ trace_id: traceId }).eq("id", result.run_id);
|
|
790
|
+
try {
|
|
791
|
+
await executeInlineChain(supabase, result.run_id);
|
|
792
|
+
}
|
|
793
|
+
catch (err) {
|
|
794
|
+
log.error({ runId: result.run_id, err: err.message }, "inline chain failed for DLQ retry");
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
retried++;
|
|
798
|
+
log.info({ dlqEntryId: entry.id, workflowId: entry.workflow_id, retryCount: (entry.retry_count || 0) + 1 }, "DLQ entry retried");
|
|
799
|
+
}
|
|
800
|
+
catch (err) {
|
|
801
|
+
log.warn({ dlqEntryId: entry.id, err: sanitizeError(err) }, "DLQ retry error");
|
|
802
|
+
await supabase.from("workflow_dlq").update({
|
|
803
|
+
retry_count: (entry.retry_count || 0) + 1,
|
|
804
|
+
last_retry_at: new Date().toISOString(),
|
|
805
|
+
last_error: sanitizeError(err),
|
|
806
|
+
}).eq("id", entry.id);
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
return retried;
|
|
810
|
+
}
|
|
811
|
+
// ============================================================================
|
|
812
|
+
// EVENT TRIGGER PROCESSING — match inbound events to workflow subscriptions
|
|
813
|
+
// ============================================================================
|
|
814
|
+
export async function processEventTriggers(supabase) {
|
|
815
|
+
// Atomically claim a batch of pending events using FOR UPDATE SKIP LOCKED
|
|
816
|
+
// Falls back to SELECT+UPDATE if the RPC doesn't exist yet
|
|
817
|
+
let events = null;
|
|
818
|
+
const { data: claimed, error: claimErr } = await supabase.rpc("claim_pending_events", { batch_size: 20 });
|
|
819
|
+
if (!claimErr && claimed?.length) {
|
|
820
|
+
events = claimed;
|
|
821
|
+
}
|
|
822
|
+
else {
|
|
823
|
+
// Fallback: non-atomic claim (SELECT then UPDATE)
|
|
824
|
+
if (claimErr)
|
|
825
|
+
log.debug({ err: claimErr.message }, "claim_pending_events RPC unavailable, using fallback");
|
|
826
|
+
const { data: fallbackEvents } = await supabase.from("automation_events")
|
|
827
|
+
.select("id, store_id, event_type, event_payload, source")
|
|
828
|
+
.eq("status", "pending")
|
|
829
|
+
.order("created_at", { ascending: true })
|
|
830
|
+
.limit(20);
|
|
831
|
+
if (fallbackEvents?.length) {
|
|
832
|
+
const eventIds = fallbackEvents.map(e => e.id);
|
|
833
|
+
await supabase.from("automation_events")
|
|
834
|
+
.update({ status: "processing" })
|
|
835
|
+
.in("id", eventIds);
|
|
836
|
+
events = fallbackEvents;
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
if (!events?.length)
|
|
840
|
+
return 0;
|
|
841
|
+
// P2 FIX: Batch-load ALL active subscriptions once instead of per-event queries.
|
|
842
|
+
// Group by (store_id, event_type) for O(1) in-memory lookup per event.
|
|
843
|
+
const uniqueStoreIds = Array.from(new Set(events.map(e => e.store_id)));
|
|
844
|
+
const uniqueEventTypes = Array.from(new Set(events.map(e => e.event_type)));
|
|
845
|
+
const { data: allSubs } = await supabase.from("workflow_event_subscriptions")
|
|
846
|
+
.select("id, workflow_id, filter_expression, store_id, event_type")
|
|
847
|
+
.in("store_id", uniqueStoreIds)
|
|
848
|
+
.in("event_type", uniqueEventTypes)
|
|
849
|
+
.eq("is_active", true);
|
|
850
|
+
// Build lookup map: "store_id:event_type" -> subscriptions[]
|
|
851
|
+
const subsMap = new Map();
|
|
852
|
+
for (const sub of (allSubs || [])) {
|
|
853
|
+
const key = `${sub.store_id}:${sub.event_type}`;
|
|
854
|
+
if (!subsMap.has(key))
|
|
855
|
+
subsMap.set(key, []);
|
|
856
|
+
subsMap.get(key).push(sub);
|
|
857
|
+
}
|
|
858
|
+
let processed = 0;
|
|
859
|
+
for (const event of events) {
|
|
860
|
+
try {
|
|
861
|
+
// P2 FIX: In-memory lookup instead of per-event DB query
|
|
862
|
+
const subs = subsMap.get(`${event.store_id}:${event.event_type}`) || [];
|
|
863
|
+
if (!subs.length) {
|
|
864
|
+
// No subscribers — mark processed and move on
|
|
865
|
+
await supabase.from("automation_events")
|
|
866
|
+
.update({ status: "processed", processed_at: new Date().toISOString() })
|
|
867
|
+
.eq("id", event.id);
|
|
868
|
+
processed++;
|
|
869
|
+
continue;
|
|
870
|
+
}
|
|
871
|
+
for (const sub of subs) {
|
|
872
|
+
// Optional filter expression evaluation
|
|
873
|
+
if (sub.filter_expression) {
|
|
874
|
+
try {
|
|
875
|
+
const ctx = {
|
|
876
|
+
trigger: event.event_payload || {},
|
|
877
|
+
steps: {},
|
|
878
|
+
};
|
|
879
|
+
const pass = evaluateCondition(sub.filter_expression, ctx);
|
|
880
|
+
if (!pass)
|
|
881
|
+
continue; // Filter didn't match — skip this subscription
|
|
882
|
+
}
|
|
883
|
+
catch {
|
|
884
|
+
// Filter eval error — skip rather than block
|
|
885
|
+
continue;
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
// Start a workflow run for each matching subscription
|
|
889
|
+
const idempotencyKey = `event:${event.id}:${sub.workflow_id}`;
|
|
890
|
+
const { data: result, error: startErr } = await supabase.rpc("start_workflow_run", {
|
|
891
|
+
p_workflow_id: sub.workflow_id,
|
|
892
|
+
p_store_id: event.store_id,
|
|
893
|
+
p_trigger_type: "event",
|
|
894
|
+
p_trigger_payload: {
|
|
895
|
+
...(event.event_payload || {}),
|
|
896
|
+
_event_id: event.id,
|
|
897
|
+
_event_type: event.event_type,
|
|
898
|
+
_event_source: event.source,
|
|
899
|
+
},
|
|
900
|
+
p_idempotency_key: idempotencyKey,
|
|
901
|
+
});
|
|
902
|
+
if (startErr || !result?.success) {
|
|
903
|
+
log.error({ workflowId: sub.workflow_id, eventId: event.id, err: startErr?.message || result?.error }, "failed to start event-triggered workflow");
|
|
904
|
+
continue;
|
|
905
|
+
}
|
|
906
|
+
// Assign trace ID and run inline chain for immediate execution
|
|
907
|
+
if (result.run_id && !result.deduplicated) {
|
|
908
|
+
const traceId = randomUUID();
|
|
909
|
+
await supabase.from("workflow_runs").update({ trace_id: traceId }).eq("id", result.run_id);
|
|
910
|
+
try {
|
|
911
|
+
await executeInlineChain(supabase, result.run_id);
|
|
912
|
+
}
|
|
913
|
+
catch (err) {
|
|
914
|
+
log.error({ runId: result.run_id, err: err.message }, "inline chain failed for event run");
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
// Mark event as processed
|
|
919
|
+
await supabase.from("automation_events")
|
|
920
|
+
.update({ status: "processed", processed_at: new Date().toISOString() })
|
|
921
|
+
.eq("id", event.id);
|
|
922
|
+
processed++;
|
|
923
|
+
}
|
|
924
|
+
catch (err) {
|
|
925
|
+
// Mark event as failed
|
|
926
|
+
await supabase.from("automation_events")
|
|
927
|
+
.update({
|
|
928
|
+
status: "failed",
|
|
929
|
+
processed_at: new Date().toISOString(),
|
|
930
|
+
error_message: sanitizeError(err),
|
|
931
|
+
})
|
|
932
|
+
.eq("id", event.id);
|
|
933
|
+
log.error({ eventId: event.id, err: sanitizeError(err) }, "error processing event trigger");
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
return processed;
|
|
937
|
+
}
|
|
938
|
+
// ============================================================================
|
|
939
|
+
// DEAD LETTER QUEUE — archive failed runs for investigation
|
|
940
|
+
// ============================================================================
|
|
941
|
+
async function archiveToDlq(supabase, runId, workflowId, storeId, workflowName) {
|
|
942
|
+
const { data: run } = await supabase.from("workflow_runs")
|
|
943
|
+
.select("error_message, error_step_key, trigger_type, trigger_payload, step_outputs, duration_ms")
|
|
944
|
+
.eq("id", runId).single();
|
|
945
|
+
if (!run)
|
|
946
|
+
return;
|
|
947
|
+
const { error } = await supabase.from("workflow_dlq").insert({
|
|
948
|
+
store_id: storeId,
|
|
949
|
+
run_id: runId,
|
|
950
|
+
workflow_id: workflowId,
|
|
951
|
+
workflow_name: workflowName || null,
|
|
952
|
+
error_message: run.error_message,
|
|
953
|
+
error_step_key: run.error_step_key,
|
|
954
|
+
trigger_type: run.trigger_type,
|
|
955
|
+
trigger_payload: run.trigger_payload || {},
|
|
956
|
+
step_outputs: run.step_outputs || {},
|
|
957
|
+
run_duration_ms: run.duration_ms,
|
|
958
|
+
});
|
|
959
|
+
if (error)
|
|
960
|
+
log.warn({ err: error.message, runId, workflowId }, "archiveToDlq insert failed");
|
|
961
|
+
}
|
|
962
|
+
/**
|
|
963
|
+
* Process-isolated JS code execution via persistent worker pool.
|
|
964
|
+
* Falls back to one-shot fork if pool is unavailable.
|
|
965
|
+
* A crash, OOM, or infinite loop in user code cannot take down the server.
|
|
966
|
+
*/
|
|
967
|
+
async function executeCodeStepIsolated(config, ctx) {
|
|
968
|
+
const code = config.code;
|
|
969
|
+
if (!code)
|
|
970
|
+
return { success: false, error: "No code in code step config" };
|
|
971
|
+
const language = config.language || "javascript";
|
|
972
|
+
if (language === "python") {
|
|
973
|
+
// P0 FIX: Python regex sandbox is trivially bypassable (string concatenation,
|
|
974
|
+
// __builtins__, globals() aliasing). Hard-fail until a real Python sandbox
|
|
975
|
+
// (e.g. Pyodide/WASM or containerized execution) is available.
|
|
976
|
+
return { success: false, error: "Python code execution is temporarily disabled — sandbox under hardening. Use JavaScript code steps instead." };
|
|
977
|
+
}
|
|
978
|
+
const timeoutMs = config.timeout_ms || CODE_TIMEOUT_MS;
|
|
979
|
+
try {
|
|
980
|
+
const result = await executeWithPool({
|
|
981
|
+
code,
|
|
982
|
+
context: { steps: ctx.steps, trigger: ctx.trigger, input: ctx.input },
|
|
983
|
+
timeoutMs,
|
|
984
|
+
});
|
|
985
|
+
// P3 FIX: Enforce same 100KB output cap as Python code steps
|
|
986
|
+
if (result.success && result.output) {
|
|
987
|
+
const outputStr = JSON.stringify(result.output);
|
|
988
|
+
if (outputStr.length > CODE_OUTPUT_MAX) {
|
|
989
|
+
result.output = { result: outputStr.slice(0, CODE_OUTPUT_MAX) + "\n[output truncated at 100KB]", truncated: true };
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
return result;
|
|
993
|
+
}
|
|
994
|
+
catch (err) {
|
|
995
|
+
// P0 FIX: Hard failure — no in-process fallback (SSRF risk via unrestricted fetch in sandbox)
|
|
996
|
+
log.error({ err: err.message }, "worker pool execution failed, no fallback");
|
|
997
|
+
return { success: false, error: "Code execution unavailable — worker pool failed" };
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
// P0 FIX: executeCodeStepInProcess removed — in-process fallback was a security risk
|
|
1001
|
+
// (unrestricted fetch in sandbox = SSRF). All code steps now require the worker pool.
|
|
1002
|
+
async function executePythonCode(_code, _ctx) {
|
|
1003
|
+
return { success: false, error: "Python code execution is disabled — sandbox under hardening" };
|
|
1004
|
+
}
|
|
1005
|
+
// ============================================================================
|
|
1006
|
+
// CIRCUIT BREAKER (per user_tool)
|
|
1007
|
+
// ============================================================================
|
|
1008
|
+
async function checkToolCircuitBreaker(supabase, toolId) {
|
|
1009
|
+
const { data } = await supabase.from("user_tools")
|
|
1010
|
+
.select("circuit_breaker_state, circuit_breaker_tripped_at, circuit_breaker_cooldown_seconds")
|
|
1011
|
+
.eq("id", toolId).single();
|
|
1012
|
+
if (!data)
|
|
1013
|
+
return { allowed: true };
|
|
1014
|
+
if (data.circuit_breaker_state === "open") {
|
|
1015
|
+
const trippedAt = new Date(data.circuit_breaker_tripped_at).getTime();
|
|
1016
|
+
const cooldownMs = (data.circuit_breaker_cooldown_seconds || 300) * 1000;
|
|
1017
|
+
if (Date.now() < trippedAt + cooldownMs) {
|
|
1018
|
+
return { allowed: false, reason: `Circuit breaker open (cooldown until ${new Date(trippedAt + cooldownMs).toISOString()})` };
|
|
1019
|
+
}
|
|
1020
|
+
await supabase.from("user_tools").update({ circuit_breaker_state: "half_open" }).eq("id", toolId);
|
|
1021
|
+
}
|
|
1022
|
+
return { allowed: true };
|
|
1023
|
+
}
|
|
1024
|
+
// P1 FIX: Transient error patterns — these indicate network/infra issues, not broken tools.
|
|
1025
|
+
// Circuit breaker should only trip on persistent tool-level errors (auth, 404, bad config).
|
|
1026
|
+
const TRANSIENT_ERROR_PATTERNS = [
|
|
1027
|
+
/timed?\s*out/i, /timeout/i, /ECONNREFUSED/i, /ECONNRESET/i, /ETIMEDOUT/i,
|
|
1028
|
+
/fetch failed/i, /network/i, /socket hang up/i, /EPIPE/i, /EHOSTUNREACH/i,
|
|
1029
|
+
/503 Service Unavailable/i, /502 Bad Gateway/i, /429 Too Many Requests/i,
|
|
1030
|
+
];
|
|
1031
|
+
function isTransientError(errorMessage) {
|
|
1032
|
+
if (!errorMessage)
|
|
1033
|
+
return false;
|
|
1034
|
+
return TRANSIENT_ERROR_PATTERNS.some(pattern => pattern.test(errorMessage));
|
|
1035
|
+
}
|
|
1036
|
+
async function updateToolCircuitBreaker(supabase, toolId, success, errorMessage) {
|
|
1037
|
+
if (success) {
|
|
1038
|
+
await supabase.from("user_tools").update({ circuit_breaker_state: "closed", circuit_breaker_failures: 0 }).eq("id", toolId);
|
|
1039
|
+
return;
|
|
1040
|
+
}
|
|
1041
|
+
// P1 FIX: Skip circuit breaker increment for transient errors (network, timeout, etc.)
|
|
1042
|
+
if (isTransientError(errorMessage)) {
|
|
1043
|
+
log.debug({ toolId, error: errorMessage }, "skipping circuit breaker increment for transient error");
|
|
1044
|
+
return;
|
|
1045
|
+
}
|
|
1046
|
+
const { data } = await supabase.from("user_tools")
|
|
1047
|
+
.select("circuit_breaker_failures, circuit_breaker_threshold").eq("id", toolId).single();
|
|
1048
|
+
if (!data)
|
|
1049
|
+
return;
|
|
1050
|
+
const newFailures = (data.circuit_breaker_failures || 0) + 1;
|
|
1051
|
+
if (newFailures >= (data.circuit_breaker_threshold || 5)) {
|
|
1052
|
+
await supabase.from("user_tools").update({
|
|
1053
|
+
circuit_breaker_state: "open", circuit_breaker_failures: newFailures,
|
|
1054
|
+
circuit_breaker_tripped_at: new Date().toISOString(),
|
|
1055
|
+
}).eq("id", toolId);
|
|
1056
|
+
const { error: toolCbErr } = await supabase.from("audit_logs").insert({
|
|
1057
|
+
action: "workflow.circuit_breaker.tripped", severity: "warning",
|
|
1058
|
+
resource_type: "user_tool", resource_id: toolId, source: "workflow_engine",
|
|
1059
|
+
details: { failures: newFailures, threshold: data.circuit_breaker_threshold },
|
|
1060
|
+
});
|
|
1061
|
+
if (toolCbErr)
|
|
1062
|
+
log.warn({ err: toolCbErr.message, toolId }, "tool circuit breaker audit failed");
|
|
1063
|
+
}
|
|
1064
|
+
else {
|
|
1065
|
+
await supabase.from("user_tools").update({ circuit_breaker_failures: newFailures }).eq("id", toolId);
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
async function handleWorkflowCircuitBreaker(supabase, workflowId, success, errorMessage) {
|
|
1069
|
+
if (success) {
|
|
1070
|
+
await supabase.from("workflows").update({ circuit_breaker_state: "closed", circuit_breaker_failures: 0 }).eq("id", workflowId);
|
|
1071
|
+
return;
|
|
1072
|
+
}
|
|
1073
|
+
// P1 FIX: Skip circuit breaker increment for transient errors
|
|
1074
|
+
if (isTransientError(errorMessage)) {
|
|
1075
|
+
log.debug({ workflowId, error: errorMessage }, "skipping workflow circuit breaker increment for transient error");
|
|
1076
|
+
return;
|
|
1077
|
+
}
|
|
1078
|
+
const { data } = await supabase.from("workflows")
|
|
1079
|
+
.select("circuit_breaker_failures, circuit_breaker_threshold").eq("id", workflowId).single();
|
|
1080
|
+
if (!data)
|
|
1081
|
+
return;
|
|
1082
|
+
const newFailures = (data.circuit_breaker_failures || 0) + 1;
|
|
1083
|
+
if (newFailures >= (data.circuit_breaker_threshold || 5)) {
|
|
1084
|
+
await supabase.from("workflows").update({
|
|
1085
|
+
circuit_breaker_state: "open", circuit_breaker_failures: newFailures,
|
|
1086
|
+
circuit_breaker_tripped_at: new Date().toISOString(),
|
|
1087
|
+
}).eq("id", workflowId);
|
|
1088
|
+
const { error: wfCbErr } = await supabase.from("audit_logs").insert({
|
|
1089
|
+
action: "workflow.circuit_breaker.tripped", severity: "warning",
|
|
1090
|
+
resource_type: "workflow", resource_id: workflowId, source: "workflow_engine",
|
|
1091
|
+
details: { failures: newFailures, threshold: data.circuit_breaker_threshold },
|
|
1092
|
+
});
|
|
1093
|
+
if (wfCbErr)
|
|
1094
|
+
log.warn({ err: wfCbErr.message, workflowId }, "workflow circuit breaker audit failed");
|
|
1095
|
+
}
|
|
1096
|
+
else {
|
|
1097
|
+
await supabase.from("workflows").update({ circuit_breaker_failures: newFailures }).eq("id", workflowId);
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
// ============================================================================
|
|
1101
|
+
// CORE ENGINE
|
|
1102
|
+
// ============================================================================
|
|
1103
|
+
/**
|
|
1104
|
+
* Reclaim steps stuck in "running" status longer than their timeout + buffer.
|
|
1105
|
+
* This handles steps that crashed mid-execution (e.g., server restart, OOM).
|
|
1106
|
+
* Steps are reset to "retrying" so the worker can re-execute them.
|
|
1107
|
+
*/
|
|
1108
|
+
async function reclaimStaleSteps(supabase) {
|
|
1109
|
+
// P0 FIX: Use step.timeout_seconds * 2 (min 300s) as stale threshold to prevent
|
|
1110
|
+
// double-execution. The old 60s buffer was too aggressive for long-running steps.
|
|
1111
|
+
const MIN_STALE_THRESHOLD_MS = 300_000; // 5 min minimum before reclaim
|
|
1112
|
+
const MAX_STALE_AGE_MS = 1_200_000; // Hard ceiling: 20 min = always stale
|
|
1113
|
+
const { data: staleSteps } = await supabase.from("workflow_step_runs")
|
|
1114
|
+
.select(`
|
|
1115
|
+
id, step_key, run_id, attempt_count, max_attempts, started_at,
|
|
1116
|
+
workflow_steps!inner(timeout_seconds)
|
|
1117
|
+
`)
|
|
1118
|
+
.eq("status", "running")
|
|
1119
|
+
.not("started_at", "is", null)
|
|
1120
|
+
.limit(50);
|
|
1121
|
+
if (!staleSteps?.length)
|
|
1122
|
+
return 0;
|
|
1123
|
+
let reclaimed = 0;
|
|
1124
|
+
const now = Date.now();
|
|
1125
|
+
for (const step of staleSteps) {
|
|
1126
|
+
const timeoutSec = step.workflow_steps?.timeout_seconds || 120;
|
|
1127
|
+
// P0 FIX: threshold = max(300s, timeout * 2) — gives step enough time to complete
|
|
1128
|
+
const staleThreshold = Math.min(Math.max(MIN_STALE_THRESHOLD_MS, timeoutSec * 2000), MAX_STALE_AGE_MS);
|
|
1129
|
+
const elapsed = now - new Date(step.started_at).getTime();
|
|
1130
|
+
if (elapsed < staleThreshold)
|
|
1131
|
+
continue;
|
|
1132
|
+
const exhaustedRetries = step.attempt_count >= step.max_attempts;
|
|
1133
|
+
if (exhaustedRetries) {
|
|
1134
|
+
// No retries left — mark as failed
|
|
1135
|
+
await supabase.from("workflow_step_runs").update({
|
|
1136
|
+
status: "failed",
|
|
1137
|
+
error_message: `Step stale: running for ${Math.round(elapsed / 1000)}s with no response (retries exhausted)`,
|
|
1138
|
+
completed_at: new Date().toISOString(),
|
|
1139
|
+
}).eq("id", step.id);
|
|
1140
|
+
}
|
|
1141
|
+
else {
|
|
1142
|
+
// Reset to retrying so worker picks it up again
|
|
1143
|
+
await supabase.from("workflow_step_runs").update({
|
|
1144
|
+
status: "retrying",
|
|
1145
|
+
error_message: `Step stale: running for ${Math.round(elapsed / 1000)}s with no response (auto-reclaimed)`,
|
|
1146
|
+
next_retry_at: new Date(now + 5000).toISOString(),
|
|
1147
|
+
}).eq("id", step.id);
|
|
1148
|
+
}
|
|
1149
|
+
reclaimed++;
|
|
1150
|
+
log.warn({ stepRunId: step.id, stepKey: step.step_key, runId: step.run_id, elapsedSec: Math.round(elapsed / 1000), exhaustedRetries }, "reclaimed stale step");
|
|
1151
|
+
}
|
|
1152
|
+
return reclaimed;
|
|
1153
|
+
}
|
|
1154
|
+
export async function processWorkflowSteps(supabase, batchSize = 10) {
|
|
1155
|
+
// Reclaim stale steps first so they become available for claiming
|
|
1156
|
+
const reclaimed = await reclaimStaleSteps(supabase).catch(e => {
|
|
1157
|
+
log.warn({ err: e.message }, "reclaimStaleSteps failed");
|
|
1158
|
+
return 0;
|
|
1159
|
+
});
|
|
1160
|
+
const { data: claimedRaw, error: claimErr } = await supabase.rpc("claim_pending_steps", {
|
|
1161
|
+
batch_size: batchSize,
|
|
1162
|
+
});
|
|
1163
|
+
if (claimErr) {
|
|
1164
|
+
log.error({ err: claimErr.message }, "workflow claim error");
|
|
1165
|
+
return { processed: 0, errors: 1, reclaimed };
|
|
1166
|
+
}
|
|
1167
|
+
let claimed = Array.isArray(claimedRaw) ? claimedRaw : [];
|
|
1168
|
+
if (claimed.length === 0)
|
|
1169
|
+
return { processed: 0, errors: 0, reclaimed };
|
|
1170
|
+
// Circuit breaker enforcement — skip steps from workflows with open breakers
|
|
1171
|
+
const workflowIds = [...new Set(claimed.map(s => s.workflow_id))];
|
|
1172
|
+
const { data: openBreakers } = await supabase.from("workflows")
|
|
1173
|
+
.select("id")
|
|
1174
|
+
.in("id", workflowIds)
|
|
1175
|
+
.eq("circuit_breaker_state", "open");
|
|
1176
|
+
if (openBreakers?.length) {
|
|
1177
|
+
const blockedIds = new Set(openBreakers.map(w => w.id));
|
|
1178
|
+
const blocked = claimed.filter(s => blockedIds.has(s.workflow_id));
|
|
1179
|
+
claimed = claimed.filter(s => !blockedIds.has(s.workflow_id));
|
|
1180
|
+
// Mark blocked steps as skipped and finalize affected runs
|
|
1181
|
+
const affectedRuns = new Map();
|
|
1182
|
+
for (const step of blocked) {
|
|
1183
|
+
await supabase.from("workflow_step_runs").update({
|
|
1184
|
+
status: "skipped",
|
|
1185
|
+
error_message: "Workflow circuit breaker is open — step skipped",
|
|
1186
|
+
completed_at: new Date().toISOString(),
|
|
1187
|
+
}).eq("id", step.step_run_id);
|
|
1188
|
+
affectedRuns.set(step.run_id, step.workflow_id);
|
|
1189
|
+
}
|
|
1190
|
+
// Check completion for affected runs — prevents zombie "running" state
|
|
1191
|
+
for (const [runId, workflowId] of affectedRuns) {
|
|
1192
|
+
await checkWorkflowCompletion(supabase, runId, workflowId).catch(e => log.warn({ runId, err: e.message }, "completion check after circuit breaker skip failed"));
|
|
1193
|
+
}
|
|
1194
|
+
if (blocked.length)
|
|
1195
|
+
log.warn({ skippedSteps: blocked.length, blockedWorkflows: blockedIds.size }, "circuit breaker skipped steps");
|
|
1196
|
+
if (claimed.length === 0)
|
|
1197
|
+
return { processed: blocked.length, errors: 0, reclaimed };
|
|
1198
|
+
}
|
|
1199
|
+
log.info({ stepCount: claimed.length }, "processing workflow steps");
|
|
1200
|
+
let errors = 0;
|
|
1201
|
+
// Batch-fetch trace_ids for all runs in this batch
|
|
1202
|
+
const runIds = [...new Set(claimed.map(s => s.run_id))];
|
|
1203
|
+
const { data: runTraces } = await supabase.from("workflow_runs")
|
|
1204
|
+
.select("id, trace_id").in("id", runIds);
|
|
1205
|
+
const traceMap = new Map((runTraces || []).map(r => [r.id, r.trace_id]));
|
|
1206
|
+
// Partition: email tool steps from for_each need sequential processing with delays
|
|
1207
|
+
// to avoid overwhelming Resend's 2 req/s rate limit
|
|
1208
|
+
const isForEachEmailStep = (s) => s.parent_step_run_id && s.step_type === "tool" &&
|
|
1209
|
+
String(s.step_config?.tool_name || "").includes("email");
|
|
1210
|
+
const emailForEachSteps = claimed.filter(isForEachEmailStep);
|
|
1211
|
+
const otherSteps = claimed.filter(s => !isForEachEmailStep(s));
|
|
1212
|
+
const processStep = async (step) => {
|
|
1213
|
+
try {
|
|
1214
|
+
await applyVersionOverrides(supabase, step);
|
|
1215
|
+
await executeAndAdvance(supabase, step, traceMap.get(step.run_id));
|
|
1216
|
+
}
|
|
1217
|
+
catch (err) {
|
|
1218
|
+
errors++;
|
|
1219
|
+
const errMsg = sanitizeError(err);
|
|
1220
|
+
log.error({ stepKey: step.step_key, runId: step.run_id, err: errMsg }, "step execution error");
|
|
1221
|
+
await supabase.from("workflow_step_runs").update({
|
|
1222
|
+
status: "failed", error_message: errMsg,
|
|
1223
|
+
completed_at: new Date().toISOString(), duration_ms: 0,
|
|
1224
|
+
}).eq("id", step.step_run_id);
|
|
1225
|
+
// Surface error to clients (SSE broadcast + structured error_details on step run)
|
|
1226
|
+
await surfaceStepError(supabase, step, errMsg);
|
|
1227
|
+
// Check if this failure finalizes the run (prevents zombie runs from uncaught errors)
|
|
1228
|
+
await checkWorkflowCompletion(supabase, step.run_id, step.workflow_id).catch(e => log.warn({ runId: step.run_id, err: e.message }, "completion check after step error failed"));
|
|
1229
|
+
}
|
|
1230
|
+
};
|
|
1231
|
+
// Process non-email steps in parallel (existing behavior)
|
|
1232
|
+
await Promise.all(otherSteps.map(processStep));
|
|
1233
|
+
// Process email for_each children sequentially with 550ms throttle
|
|
1234
|
+
for (let i = 0; i < emailForEachSteps.length; i++) {
|
|
1235
|
+
if (i > 0)
|
|
1236
|
+
await new Promise(r => setTimeout(r, 550));
|
|
1237
|
+
await processStep(emailForEachSteps[i]);
|
|
1238
|
+
}
|
|
1239
|
+
return { processed: claimed.length, errors };
|
|
1240
|
+
}
|
|
1241
|
+
/**
|
|
1242
|
+
* Check waiting steps: sub_workflow children completed, parallel/for_each children done.
|
|
1243
|
+
* Called by the persistent worker loop alongside processWorkflowSteps.
|
|
1244
|
+
*/
|
|
1245
|
+
export async function processWaitingSteps(supabase) {
|
|
1246
|
+
let resolved = 0;
|
|
1247
|
+
// 0. Expire pending approvals (Phase 2)
|
|
1248
|
+
try {
|
|
1249
|
+
await supabase.rpc("expire_pending_approvals");
|
|
1250
|
+
}
|
|
1251
|
+
catch (err) {
|
|
1252
|
+
// Non-fatal — RPC may not exist yet if migration not applied
|
|
1253
|
+
log.warn({ err: sanitizeError(err) }, "expire_pending_approvals error");
|
|
1254
|
+
}
|
|
1255
|
+
// 1. Sub-workflow steps waiting for child runs to complete
|
|
1256
|
+
const { data: subWfSteps } = await supabase
|
|
1257
|
+
.from("workflow_step_runs")
|
|
1258
|
+
.select("id, run_id, step_key, child_run_id, step_type")
|
|
1259
|
+
.eq("status", "waiting")
|
|
1260
|
+
.eq("step_type", "sub_workflow")
|
|
1261
|
+
.not("child_run_id", "is", null)
|
|
1262
|
+
.limit(50);
|
|
1263
|
+
if (subWfSteps?.length) {
|
|
1264
|
+
const childRunIds = subWfSteps.map(s => s.child_run_id).filter(Boolean);
|
|
1265
|
+
const { data: childRuns } = await supabase
|
|
1266
|
+
.from("workflow_runs")
|
|
1267
|
+
.select("id, status, step_outputs, error_message")
|
|
1268
|
+
.in("id", childRunIds)
|
|
1269
|
+
.in("status", ["success", "failed"]);
|
|
1270
|
+
if (childRuns?.length) {
|
|
1271
|
+
const runMap = new Map(childRuns.map(r => [r.id, r]));
|
|
1272
|
+
for (const step of subWfSteps) {
|
|
1273
|
+
const childRun = runMap.get(step.child_run_id);
|
|
1274
|
+
if (!childRun)
|
|
1275
|
+
continue;
|
|
1276
|
+
const success = childRun.status === "success";
|
|
1277
|
+
await supabase.from("workflow_step_runs").update({
|
|
1278
|
+
status: success ? "success" : "failed",
|
|
1279
|
+
output: childRun.step_outputs,
|
|
1280
|
+
error_message: success ? null : childRun.error_message,
|
|
1281
|
+
completed_at: new Date().toISOString(),
|
|
1282
|
+
}).eq("id", step.id);
|
|
1283
|
+
// Accumulate output + advance
|
|
1284
|
+
await accumulateAndAdvance(supabase, step.id, step.run_id, step.step_key, success, childRun.step_outputs, childRun.error_message);
|
|
1285
|
+
resolved++;
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
// 2. P1 FIX: Use aggregate RPC to eliminate N+1 queries (was 4 queries per parent)
|
|
1290
|
+
const { data: aggregatedParents, error: aggErr } = await supabase.rpc("get_waiting_parents_with_children");
|
|
1291
|
+
if (aggErr) {
|
|
1292
|
+
// Fallback to old N+1 pattern if RPC doesn't exist yet
|
|
1293
|
+
log.debug({ err: aggErr.message }, "get_waiting_parents_with_children RPC unavailable, using fallback");
|
|
1294
|
+
const { data: waitingParents } = await supabase
|
|
1295
|
+
.from("workflow_step_runs")
|
|
1296
|
+
.select("id, run_id, step_key, step_type, output")
|
|
1297
|
+
.eq("status", "waiting")
|
|
1298
|
+
.in("step_type", ["parallel", "for_each"])
|
|
1299
|
+
.limit(50);
|
|
1300
|
+
if (waitingParents?.length) {
|
|
1301
|
+
for (const parent of waitingParents) {
|
|
1302
|
+
const { count: totalChildren } = await supabase
|
|
1303
|
+
.from("workflow_step_runs")
|
|
1304
|
+
.select("id", { count: "exact", head: true })
|
|
1305
|
+
.eq("parent_step_run_id", parent.id);
|
|
1306
|
+
const { count: doneChildren } = await supabase
|
|
1307
|
+
.from("workflow_step_runs")
|
|
1308
|
+
.select("id", { count: "exact", head: true })
|
|
1309
|
+
.eq("parent_step_run_id", parent.id)
|
|
1310
|
+
.in("status", ["success", "failed", "skipped", "cancelled"]);
|
|
1311
|
+
if (totalChildren && doneChildren && doneChildren >= totalChildren) {
|
|
1312
|
+
const { data: childOutputs } = await supabase
|
|
1313
|
+
.from("workflow_step_runs")
|
|
1314
|
+
.select("step_key, output, status, error_message")
|
|
1315
|
+
.eq("parent_step_run_id", parent.id)
|
|
1316
|
+
.order("created_at", { ascending: true });
|
|
1317
|
+
const outputs = (childOutputs || []).map(c => c.output);
|
|
1318
|
+
const failedKids = (childOutputs || []).filter(c => c.status === "failed");
|
|
1319
|
+
const allSuccess = failedKids.length === 0;
|
|
1320
|
+
await supabase.from("workflow_step_runs").update({
|
|
1321
|
+
status: allSuccess ? "success" : "failed",
|
|
1322
|
+
output: { children: outputs, total: totalChildren, failed: failedKids.length },
|
|
1323
|
+
error_message: allSuccess ? null : failedKids[0]?.error_message,
|
|
1324
|
+
completed_at: new Date().toISOString(),
|
|
1325
|
+
}).eq("id", parent.id);
|
|
1326
|
+
await accumulateAndAdvance(supabase, parent.id, parent.run_id, parent.step_key, allSuccess, { children: outputs }, allSuccess ? null : failedKids[0]?.error_message);
|
|
1327
|
+
resolved++;
|
|
1328
|
+
}
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
else if (aggregatedParents?.length) {
|
|
1333
|
+
for (const parent of aggregatedParents) {
|
|
1334
|
+
if (parent.total_children > 0 && parent.done_children >= parent.total_children) {
|
|
1335
|
+
const childOutputsArr = (parent.child_outputs || []);
|
|
1336
|
+
const outputs = childOutputsArr.map((c) => c.output);
|
|
1337
|
+
const allSuccess = parent.failed_children === 0;
|
|
1338
|
+
const firstError = childOutputsArr.find((c) => c.status === "failed")?.error_message;
|
|
1339
|
+
await supabase.from("workflow_step_runs").update({
|
|
1340
|
+
status: allSuccess ? "success" : "failed",
|
|
1341
|
+
output: { children: outputs, total: parent.total_children, failed: parent.failed_children },
|
|
1342
|
+
error_message: allSuccess ? null : firstError,
|
|
1343
|
+
completed_at: new Date().toISOString(),
|
|
1344
|
+
}).eq("id", parent.parent_id);
|
|
1345
|
+
await accumulateAndAdvance(supabase, parent.parent_id, parent.parent_run_id, parent.parent_step_key, allSuccess, { children: outputs }, allSuccess ? null : firstError);
|
|
1346
|
+
resolved++;
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
return resolved;
|
|
1351
|
+
}
|
|
1352
|
+
/** Helper: after a waiting step resolves, accumulate output and advance the workflow. */
|
|
1353
|
+
async function accumulateAndAdvance(supabase, stepRunId, runId, stepKey, success, output, errorMessage) {
|
|
1354
|
+
// Load run to get current step_outputs + workflow_id + on_success/on_failure
|
|
1355
|
+
const { data: run } = await supabase.from("workflow_runs")
|
|
1356
|
+
.select("workflow_id, step_outputs, store_id").eq("id", runId).single();
|
|
1357
|
+
if (!run)
|
|
1358
|
+
return;
|
|
1359
|
+
const { data: stepDef } = await supabase.from("workflow_step_runs")
|
|
1360
|
+
.select("step_id, step_key").eq("id", stepRunId).single();
|
|
1361
|
+
if (!stepDef)
|
|
1362
|
+
return;
|
|
1363
|
+
// Phase 4: Try versioned step def first, fall back to live table
|
|
1364
|
+
let wsDef = null;
|
|
1365
|
+
const versionedSteps = await loadVersionedSteps(supabase, runId);
|
|
1366
|
+
if (versionedSteps) {
|
|
1367
|
+
const vStep = versionedSteps.find((s) => s.step_key === stepKey);
|
|
1368
|
+
if (vStep)
|
|
1369
|
+
wsDef = { on_success: vStep.on_success, on_failure: vStep.on_failure, max_retries: vStep.max_retries };
|
|
1370
|
+
}
|
|
1371
|
+
if (!wsDef) {
|
|
1372
|
+
const { data } = await supabase.from("workflow_steps")
|
|
1373
|
+
.select("on_success, on_failure, max_retries").eq("id", stepDef.step_id).single();
|
|
1374
|
+
wsDef = data;
|
|
1375
|
+
}
|
|
1376
|
+
// P1 FIX: Atomic step output accumulation — use jsonb_set instead of read-modify-write
|
|
1377
|
+
// This prevents lost updates when multiple steps complete concurrently
|
|
1378
|
+
const stepOutput = { output, status: success ? "success" : "failed" };
|
|
1379
|
+
await supabase.rpc("accumulate_step_output", {
|
|
1380
|
+
p_run_id: runId,
|
|
1381
|
+
p_step_key: stepKey,
|
|
1382
|
+
p_step_output: stepOutput,
|
|
1383
|
+
}).then(({ error: rpcErr }) => {
|
|
1384
|
+
// Fallback to direct update if RPC doesn't exist yet (migration pending)
|
|
1385
|
+
if (rpcErr) {
|
|
1386
|
+
const newOutputs = { ...(run.step_outputs || {}), [stepKey]: stepOutput };
|
|
1387
|
+
return supabase.from("workflow_runs").update({ step_outputs: newOutputs }).eq("id", runId);
|
|
1388
|
+
}
|
|
1389
|
+
});
|
|
1390
|
+
if (success) {
|
|
1391
|
+
if (wsDef?.on_success) {
|
|
1392
|
+
await createNextStepRunByKey(supabase, runId, run.workflow_id, wsDef.on_success);
|
|
1393
|
+
}
|
|
1394
|
+
else {
|
|
1395
|
+
await checkWorkflowCompletion(supabase, runId, run.workflow_id);
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
else {
|
|
1399
|
+
if (wsDef?.on_failure) {
|
|
1400
|
+
await createNextStepRunByKey(supabase, runId, run.workflow_id, wsDef.on_failure);
|
|
1401
|
+
}
|
|
1402
|
+
else {
|
|
1403
|
+
await completeWorkflowRun(supabase, runId, run.workflow_id, run.store_id, "failed", errorMessage, stepKey);
|
|
1404
|
+
}
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
export async function executeAndAdvance(supabase, step, traceId) {
|
|
1408
|
+
const startTime = Date.now();
|
|
1409
|
+
// Phase 3.3: OTEL span for step execution
|
|
1410
|
+
const span = startSpan("workflow.step.execute", {
|
|
1411
|
+
"workflow.run_id": step.run_id,
|
|
1412
|
+
"workflow.step_key": step.step_key,
|
|
1413
|
+
"workflow.step_type": step.step_type,
|
|
1414
|
+
"workflow.attempt": step.attempt_count,
|
|
1415
|
+
...(traceId ? { "workflow.trace_id": traceId } : {}),
|
|
1416
|
+
});
|
|
1417
|
+
// Event journal — step started
|
|
1418
|
+
await logWorkflowEvent(supabase, step.run_id, "step_started", {
|
|
1419
|
+
step_key: step.step_key, step_type: step.step_type, attempt: step.attempt_count,
|
|
1420
|
+
}, step.step_run_id);
|
|
1421
|
+
// Step result caching — skip successful steps on retry (idempotent re-execution)
|
|
1422
|
+
if (step.attempt_count > 1) {
|
|
1423
|
+
const { data: prevRun } = await supabase.from("workflow_step_runs")
|
|
1424
|
+
.select("status, output").eq("run_id", step.run_id).eq("step_key", step.step_key)
|
|
1425
|
+
.eq("status", "success").neq("id", step.step_run_id).limit(1);
|
|
1426
|
+
if (prevRun?.length) {
|
|
1427
|
+
log.info({ stepKey: step.step_key, runId: step.run_id }, "step already succeeded, using cached result");
|
|
1428
|
+
await supabase.from("workflow_step_runs").update({
|
|
1429
|
+
status: "success", output: prevRun[0].output,
|
|
1430
|
+
completed_at: new Date().toISOString(), duration_ms: Date.now() - startTime,
|
|
1431
|
+
}).eq("id", step.step_run_id);
|
|
1432
|
+
await logWorkflowEvent(supabase, step.run_id, "step_cached", { step_key: step.step_key }, step.step_run_id);
|
|
1433
|
+
const nextStepKey = step.on_success;
|
|
1434
|
+
if (!nextStepKey) {
|
|
1435
|
+
await checkWorkflowCompletion(supabase, step.run_id, step.workflow_id);
|
|
1436
|
+
}
|
|
1437
|
+
else {
|
|
1438
|
+
await createNextStepRunByKey(supabase, step.run_id, step.workflow_id, nextStepKey);
|
|
1439
|
+
}
|
|
1440
|
+
return;
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
// Flow control — check concurrency/rate limits before execution
|
|
1444
|
+
const flowCheck = await checkFlowControl(supabase, step);
|
|
1445
|
+
if (!flowCheck.allowed) {
|
|
1446
|
+
// Requeue step with short delay for flow control backoff
|
|
1447
|
+
await supabase.from("workflow_step_runs").update({
|
|
1448
|
+
status: "retrying",
|
|
1449
|
+
next_retry_at: new Date(Date.now() + 2000).toISOString(), // retry in 2s
|
|
1450
|
+
output: { flow_control: flowCheck.reason },
|
|
1451
|
+
}).eq("id", step.step_run_id);
|
|
1452
|
+
await logWorkflowEvent(supabase, step.run_id, "step_throttled", { reason: flowCheck.reason }, step.step_run_id);
|
|
1453
|
+
return;
|
|
1454
|
+
}
|
|
1455
|
+
// Build template context
|
|
1456
|
+
const ctx = {
|
|
1457
|
+
steps: {},
|
|
1458
|
+
trigger: step.trigger_payload || {},
|
|
1459
|
+
input: step.input || undefined,
|
|
1460
|
+
workflow: {
|
|
1461
|
+
id: step.workflow_id,
|
|
1462
|
+
store_id: step.store_id,
|
|
1463
|
+
},
|
|
1464
|
+
run: {
|
|
1465
|
+
id: step.run_id,
|
|
1466
|
+
workflow_id: step.workflow_id,
|
|
1467
|
+
store_id: step.store_id,
|
|
1468
|
+
},
|
|
1469
|
+
};
|
|
1470
|
+
if (step.step_outputs && typeof step.step_outputs === "object") {
|
|
1471
|
+
for (const [key, val] of Object.entries(step.step_outputs)) {
|
|
1472
|
+
if (val && typeof val === "object") {
|
|
1473
|
+
ctx.steps[key] = val;
|
|
1474
|
+
}
|
|
1475
|
+
}
|
|
1476
|
+
}
|
|
1477
|
+
let result;
|
|
1478
|
+
// Enforce step-level timeout from step column (default 30s)
|
|
1479
|
+
const stepTimeoutSec = step.timeout_seconds || step.step_config.timeout_seconds || 30;
|
|
1480
|
+
const stepTimeoutMs = stepTimeoutSec * 1000;
|
|
1481
|
+
let stepTimer;
|
|
1482
|
+
const stepTimeoutPromise = new Promise((_, reject) => {
|
|
1483
|
+
stepTimer = setTimeout(() => reject(new Error(`Step timed out after ${stepTimeoutSec}s`)), stepTimeoutMs);
|
|
1484
|
+
});
|
|
1485
|
+
try {
|
|
1486
|
+
switch (step.step_type) {
|
|
1487
|
+
case "tool":
|
|
1488
|
+
result = await Promise.race([executeToolStep(supabase, step.step_config, ctx, step.store_id, traceId), stepTimeoutPromise]);
|
|
1489
|
+
break;
|
|
1490
|
+
case "condition":
|
|
1491
|
+
result = executeConditionStep(step.step_config, ctx);
|
|
1492
|
+
break;
|
|
1493
|
+
case "transform":
|
|
1494
|
+
result = executeTransformStep(step.step_config, ctx);
|
|
1495
|
+
break;
|
|
1496
|
+
case "delay": {
|
|
1497
|
+
// First attempt: set the delay. Second attempt (after delay): success.
|
|
1498
|
+
if (step.attempt_count <= 1) {
|
|
1499
|
+
const delaySec = step.step_config.seconds || 60;
|
|
1500
|
+
await supabase.from("workflow_step_runs").update({
|
|
1501
|
+
status: "retrying",
|
|
1502
|
+
output: { delay_seconds: delaySec, resume_at: new Date(Date.now() + delaySec * 1000).toISOString() },
|
|
1503
|
+
next_retry_at: new Date(Date.now() + delaySec * 1000).toISOString(),
|
|
1504
|
+
}).eq("id", step.step_run_id);
|
|
1505
|
+
clearTimeout(stepTimer);
|
|
1506
|
+
return; // Worker picks it up after delay
|
|
1507
|
+
}
|
|
1508
|
+
result = { success: true, output: { delayed: true, seconds: step.step_config.seconds } };
|
|
1509
|
+
break;
|
|
1510
|
+
}
|
|
1511
|
+
case "agent":
|
|
1512
|
+
result = await Promise.race([executeAgentStep(step.step_config, ctx, step.store_id, supabase, step, traceId), stepTimeoutPromise]);
|
|
1513
|
+
break;
|
|
1514
|
+
case "sub_workflow": {
|
|
1515
|
+
const childWfId = resolveTemplate((step.step_config.workflow_id || ""), ctx);
|
|
1516
|
+
if (!childWfId) {
|
|
1517
|
+
result = { success: false, error: "No workflow_id in sub_workflow config" };
|
|
1518
|
+
break;
|
|
1519
|
+
}
|
|
1520
|
+
const payloadTemplate = (step.step_config.trigger_payload_template || step.step_config.trigger_payload || {});
|
|
1521
|
+
const payload = resolveTemplate(payloadTemplate, ctx);
|
|
1522
|
+
const { data: startResult } = await supabase.rpc("start_workflow_run", {
|
|
1523
|
+
p_workflow_id: childWfId,
|
|
1524
|
+
p_store_id: step.store_id,
|
|
1525
|
+
p_trigger_type: "sub_workflow",
|
|
1526
|
+
p_trigger_payload: payload,
|
|
1527
|
+
});
|
|
1528
|
+
if (!startResult?.success) {
|
|
1529
|
+
result = { success: false, error: startResult?.error || "Failed to start sub-workflow" };
|
|
1530
|
+
break;
|
|
1531
|
+
}
|
|
1532
|
+
// Set to waiting — processWaitingSteps will resolve when child completes
|
|
1533
|
+
await supabase.from("workflow_step_runs").update({
|
|
1534
|
+
status: "waiting",
|
|
1535
|
+
child_run_id: startResult.run_id,
|
|
1536
|
+
output: { child_run_id: startResult.run_id, child_workflow_id: childWfId },
|
|
1537
|
+
}).eq("id", step.step_run_id);
|
|
1538
|
+
clearTimeout(stepTimer);
|
|
1539
|
+
return;
|
|
1540
|
+
}
|
|
1541
|
+
case "parallel": {
|
|
1542
|
+
const stepKeys = (step.step_config.step_keys || step.step_config.child_steps || []);
|
|
1543
|
+
if (stepKeys.length === 0) {
|
|
1544
|
+
result = { success: true, output: { parallel: true, steps: [] } };
|
|
1545
|
+
break;
|
|
1546
|
+
}
|
|
1547
|
+
if (stepKeys.length > MAX_PARALLEL_CHILDREN) {
|
|
1548
|
+
result = { success: false, error: `Parallel step has ${stepKeys.length} children, exceeding limit of ${MAX_PARALLEL_CHILDREN}` };
|
|
1549
|
+
break;
|
|
1550
|
+
}
|
|
1551
|
+
const { data: steps } = await supabase.from("workflow_steps")
|
|
1552
|
+
.select("id, step_key, step_type, max_retries")
|
|
1553
|
+
.eq("workflow_id", step.workflow_id).in("step_key", stepKeys);
|
|
1554
|
+
if (steps?.length) {
|
|
1555
|
+
await supabase.from("workflow_step_runs").insert(steps.map(s => ({
|
|
1556
|
+
run_id: step.run_id, step_id: s.id, step_key: s.step_key,
|
|
1557
|
+
step_type: s.step_type, status: "pending",
|
|
1558
|
+
max_attempts: s.max_retries ?? 3, parent_step_run_id: step.step_run_id,
|
|
1559
|
+
})));
|
|
1560
|
+
}
|
|
1561
|
+
await supabase.from("workflow_step_runs").update({
|
|
1562
|
+
status: "waiting", output: { waiting_for: stepKeys },
|
|
1563
|
+
}).eq("id", step.step_run_id);
|
|
1564
|
+
clearTimeout(stepTimer);
|
|
1565
|
+
return; // processWaitingSteps resolves when all children complete
|
|
1566
|
+
}
|
|
1567
|
+
case "for_each": {
|
|
1568
|
+
const itemsExpr = step.step_config.items;
|
|
1569
|
+
const targetStepKey = step.step_config.step_key;
|
|
1570
|
+
if (!itemsExpr || !targetStepKey) {
|
|
1571
|
+
result = { success: false, error: "for_each requires items and step_key in config" };
|
|
1572
|
+
break;
|
|
1573
|
+
}
|
|
1574
|
+
const items = resolveTemplate(itemsExpr, ctx);
|
|
1575
|
+
if (!Array.isArray(items)) {
|
|
1576
|
+
result = { success: false, error: `for_each items resolved to ${typeof items}, expected array` };
|
|
1577
|
+
break;
|
|
1578
|
+
}
|
|
1579
|
+
if (items.length === 0) {
|
|
1580
|
+
result = { success: true, output: { children: [], total: 0 } };
|
|
1581
|
+
break;
|
|
1582
|
+
}
|
|
1583
|
+
// P2 FIX: Enforce max items limit to prevent runaway step creation
|
|
1584
|
+
const maxItems = step.step_config.max_items || MAX_FOR_EACH_ITEMS;
|
|
1585
|
+
if (items.length > maxItems) {
|
|
1586
|
+
result = { success: false, error: `for_each exceeded maximum of ${maxItems} items (got ${items.length}). Increase limit in step config or paginate.` };
|
|
1587
|
+
break;
|
|
1588
|
+
}
|
|
1589
|
+
// Look up target step definition
|
|
1590
|
+
const { data: targetStep } = await supabase.from("workflow_steps")
|
|
1591
|
+
.select("id, step_key, step_type, max_retries")
|
|
1592
|
+
.eq("workflow_id", step.workflow_id).eq("step_key", targetStepKey).single();
|
|
1593
|
+
if (!targetStep) {
|
|
1594
|
+
result = { success: false, error: `for_each target step '${targetStepKey}' not found` };
|
|
1595
|
+
break;
|
|
1596
|
+
}
|
|
1597
|
+
// Create a step_run per item with the item as input
|
|
1598
|
+
await supabase.from("workflow_step_runs").insert(items.map((item, idx) => ({
|
|
1599
|
+
run_id: step.run_id, step_id: targetStep.id,
|
|
1600
|
+
step_key: `${targetStepKey}[${idx}]`, step_type: targetStep.step_type,
|
|
1601
|
+
status: "pending", max_attempts: targetStep.max_retries ?? 3,
|
|
1602
|
+
parent_step_run_id: step.step_run_id, input: item,
|
|
1603
|
+
})));
|
|
1604
|
+
await supabase.from("workflow_step_runs").update({
|
|
1605
|
+
status: "waiting", output: { waiting_for_count: items.length, target_step: targetStepKey },
|
|
1606
|
+
}).eq("id", step.step_run_id);
|
|
1607
|
+
clearTimeout(stepTimer);
|
|
1608
|
+
return;
|
|
1609
|
+
}
|
|
1610
|
+
case "code": {
|
|
1611
|
+
result = await Promise.race([executeCodeStepIsolated(step.step_config, ctx), stepTimeoutPromise]);
|
|
1612
|
+
break;
|
|
1613
|
+
}
|
|
1614
|
+
case "webhook_out":
|
|
1615
|
+
result = await Promise.race([executeWebhookOutStep(step.step_config, ctx), stepTimeoutPromise]);
|
|
1616
|
+
break;
|
|
1617
|
+
case "noop":
|
|
1618
|
+
result = executeNoopStep();
|
|
1619
|
+
break;
|
|
1620
|
+
case "llm_batch":
|
|
1621
|
+
result = await Promise.race([executeLlmBatchStep(step.step_config, ctx), stepTimeoutPromise]);
|
|
1622
|
+
break;
|
|
1623
|
+
case "approval": {
|
|
1624
|
+
const approvalResult = await executeApprovalStep(supabase, step, ctx);
|
|
1625
|
+
if (approvalResult === "waiting") {
|
|
1626
|
+
clearTimeout(stepTimer);
|
|
1627
|
+
return;
|
|
1628
|
+
}
|
|
1629
|
+
result = approvalResult;
|
|
1630
|
+
break;
|
|
1631
|
+
}
|
|
1632
|
+
// Custom step — POSTs workflow context to a user-defined URL and uses the response
|
|
1633
|
+
case "custom": {
|
|
1634
|
+
const customUrl = resolveTemplate((step.step_config.url || step.step_config.endpoint), ctx);
|
|
1635
|
+
if (!customUrl) {
|
|
1636
|
+
result = { success: false, error: "Custom step requires url in config" };
|
|
1637
|
+
break;
|
|
1638
|
+
}
|
|
1639
|
+
// P0 FIX: Use async validateUrl (DNS resolve-then-check) instead of sync isBlockedUrl
|
|
1640
|
+
const customSsrfError = await validateUrl(customUrl);
|
|
1641
|
+
if (customSsrfError) {
|
|
1642
|
+
result = { success: false, error: `Custom step blocked: ${customSsrfError}` };
|
|
1643
|
+
break;
|
|
1644
|
+
}
|
|
1645
|
+
try {
|
|
1646
|
+
const customHeaders = { "Content-Type": "application/json" };
|
|
1647
|
+
if (step.step_config.headers && typeof step.step_config.headers === "object") {
|
|
1648
|
+
for (const [k, v] of Object.entries(step.step_config.headers)) {
|
|
1649
|
+
customHeaders[k] = resolveTemplate(v, ctx);
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
const customBody = JSON.stringify({
|
|
1653
|
+
step_key: step.step_key,
|
|
1654
|
+
run_id: step.run_id,
|
|
1655
|
+
workflow_id: step.workflow_id,
|
|
1656
|
+
input: step.input,
|
|
1657
|
+
step_outputs: step.step_outputs,
|
|
1658
|
+
trigger_payload: step.trigger_payload,
|
|
1659
|
+
config: step.step_config.payload_config || {},
|
|
1660
|
+
});
|
|
1661
|
+
const ctrl = new AbortController();
|
|
1662
|
+
const timer = setTimeout(() => ctrl.abort(), 30_000);
|
|
1663
|
+
const resp = await fetch(customUrl, { method: "POST", headers: customHeaders, body: customBody, signal: ctrl.signal });
|
|
1664
|
+
clearTimeout(timer);
|
|
1665
|
+
const respData = resp.headers.get("content-type")?.includes("json")
|
|
1666
|
+
? await resp.json() : await resp.text();
|
|
1667
|
+
if (!resp.ok) {
|
|
1668
|
+
result = { success: false, error: `Custom step HTTP ${resp.status}: ${String(respData).substring(0, 500)}` };
|
|
1669
|
+
}
|
|
1670
|
+
else {
|
|
1671
|
+
// Support branch routing from custom step response
|
|
1672
|
+
const branch = typeof respData === "object" && respData?.branch ? respData.branch : undefined;
|
|
1673
|
+
result = { success: true, output: respData, branch };
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
catch (err) {
|
|
1677
|
+
result = { success: false, error: err.name === "AbortError" ? "Custom step timed out" : sanitizeError(err) };
|
|
1678
|
+
}
|
|
1679
|
+
break;
|
|
1680
|
+
}
|
|
1681
|
+
// Waitpoint — generalized wait-for-external-signal (subsumes approval, webhook callback, cross-workflow)
|
|
1682
|
+
case "waitpoint": {
|
|
1683
|
+
// Second pass — resumed with completion data
|
|
1684
|
+
if (step.input && typeof step.input === "object" && step.input.waitpoint_completed) {
|
|
1685
|
+
result = { success: true, output: step.input.waitpoint_data || {} };
|
|
1686
|
+
break;
|
|
1687
|
+
}
|
|
1688
|
+
// First pass — create waitpoint token and pause
|
|
1689
|
+
const waitpointToken = randomUUID();
|
|
1690
|
+
const waitpointTimeout = step.step_config.timeout_seconds || 86400;
|
|
1691
|
+
const waitpointExpires = new Date(Date.now() + waitpointTimeout * 1000).toISOString();
|
|
1692
|
+
await supabase.from("waitpoint_tokens").insert({
|
|
1693
|
+
token: waitpointToken,
|
|
1694
|
+
run_id: step.run_id,
|
|
1695
|
+
step_run_id: step.step_run_id,
|
|
1696
|
+
store_id: step.store_id,
|
|
1697
|
+
expires_at: waitpointExpires,
|
|
1698
|
+
label: step.step_config.label || step.step_key,
|
|
1699
|
+
});
|
|
1700
|
+
await supabase.from("workflow_step_runs").update({
|
|
1701
|
+
status: "waiting",
|
|
1702
|
+
output: { waiting_for: "waitpoint", token: waitpointToken, expires_at: waitpointExpires },
|
|
1703
|
+
}).eq("id", step.step_run_id);
|
|
1704
|
+
await logWorkflowEvent(supabase, step.run_id, "waitpoint_created", { token: waitpointToken }, step.step_run_id);
|
|
1705
|
+
clearTimeout(stepTimer);
|
|
1706
|
+
return;
|
|
1707
|
+
}
|
|
1708
|
+
default:
|
|
1709
|
+
result = { success: false, error: `Unknown step type: ${step.step_type}` };
|
|
1710
|
+
}
|
|
1711
|
+
}
|
|
1712
|
+
catch (timeoutErr) {
|
|
1713
|
+
result = { success: false, error: timeoutErr.message || `Step timed out after ${stepTimeoutSec}s` };
|
|
1714
|
+
}
|
|
1715
|
+
finally {
|
|
1716
|
+
clearTimeout(stepTimer);
|
|
1717
|
+
}
|
|
1718
|
+
const durationMs = Date.now() - startTime;
|
|
1719
|
+
// Phase 3.3: End OTEL span with result attributes
|
|
1720
|
+
if (result.success) {
|
|
1721
|
+
span.end({ "workflow.duration_ms": durationMs, "workflow.status": "success" });
|
|
1722
|
+
}
|
|
1723
|
+
else {
|
|
1724
|
+
span.setError(result.error || "step failed");
|
|
1725
|
+
span.end({ "workflow.duration_ms": durationMs, "workflow.status": "failed" });
|
|
1726
|
+
}
|
|
1727
|
+
// Event journal — step completed
|
|
1728
|
+
await logWorkflowEvent(supabase, step.run_id, result.success ? "step_completed" : "step_failed", {
|
|
1729
|
+
step_key: step.step_key, duration_ms: durationMs,
|
|
1730
|
+
...(result.error ? { error: result.error } : {}),
|
|
1731
|
+
...(result.branch ? { branch: result.branch } : {}),
|
|
1732
|
+
}, step.step_run_id);
|
|
1733
|
+
// Persist step result
|
|
1734
|
+
await supabase.from("workflow_step_runs").update({
|
|
1735
|
+
status: result.success ? "success" : "failed",
|
|
1736
|
+
output: result.output || null,
|
|
1737
|
+
error_message: result.error || null,
|
|
1738
|
+
completed_at: new Date().toISOString(),
|
|
1739
|
+
duration_ms: durationMs,
|
|
1740
|
+
}).eq("id", step.step_run_id);
|
|
1741
|
+
// P4 FIX: Atomically merge step output using jsonb_set to prevent race conditions
|
|
1742
|
+
// Two concurrent steps can no longer overwrite each other's outputs
|
|
1743
|
+
const stepOutput = { output: result.output, status: result.success ? "success" : "failed", duration_ms: durationMs };
|
|
1744
|
+
const { error: rpcError } = await supabase.rpc("accumulate_step_output", {
|
|
1745
|
+
p_run_id: step.run_id,
|
|
1746
|
+
p_step_key: step.step_key,
|
|
1747
|
+
p_step_output: stepOutput,
|
|
1748
|
+
});
|
|
1749
|
+
if (rpcError) {
|
|
1750
|
+
// Retry once — transient connection errors are common under load
|
|
1751
|
+
log.warn({ err: rpcError.message, runId: step.run_id, stepKey: step.step_key }, "accumulate_step_output RPC error, retrying once");
|
|
1752
|
+
const { error: retryError } = await supabase.rpc("accumulate_step_output", {
|
|
1753
|
+
p_run_id: step.run_id,
|
|
1754
|
+
p_step_key: step.step_key,
|
|
1755
|
+
p_step_output: stepOutput,
|
|
1756
|
+
});
|
|
1757
|
+
if (retryError) {
|
|
1758
|
+
// Final fallback: advisory-lock-guarded RPC prevents parallel step race conditions
|
|
1759
|
+
log.warn({ err: retryError.message, runId: step.run_id, stepKey: step.step_key }, "accumulate_step_output retry failed, using locked fallback");
|
|
1760
|
+
const { error: lockError } = await supabase.rpc("accumulate_step_output_locked", {
|
|
1761
|
+
p_run_id: step.run_id,
|
|
1762
|
+
p_step_key: step.step_key,
|
|
1763
|
+
p_step_output: stepOutput,
|
|
1764
|
+
});
|
|
1765
|
+
if (lockError) {
|
|
1766
|
+
// P0 FIX: Throw so the step is marked as failed, not falsely as success
|
|
1767
|
+
throw new Error(`Failed to accumulate step output after all fallbacks: ${lockError.message}`);
|
|
1768
|
+
}
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
// Checkpoint — snapshot state after each step for replay/debugging
|
|
1772
|
+
if (result.success) {
|
|
1773
|
+
const { error: cpError } = await supabase.from("workflow_checkpoints").insert({
|
|
1774
|
+
run_id: step.run_id, step_run_id: step.step_run_id, step_key: step.step_key,
|
|
1775
|
+
step_outputs: { ...(step.step_outputs || {}), [step.step_key]: { output: result.output, status: "success", duration_ms: durationMs } },
|
|
1776
|
+
trigger_payload: step.trigger_payload,
|
|
1777
|
+
sequence_number: Object.keys(step.step_outputs || {}).length + 1,
|
|
1778
|
+
});
|
|
1779
|
+
if (cpError)
|
|
1780
|
+
log.warn({ err: cpError.message, runId: step.run_id, stepKey: step.step_key }, "checkpoint insert failed");
|
|
1781
|
+
}
|
|
1782
|
+
// Audit
|
|
1783
|
+
const { error: stepAuditErr } = await supabase.from("audit_logs").insert({
|
|
1784
|
+
action: `workflow.step.${result.success ? "completed" : "failed"}`,
|
|
1785
|
+
severity: result.success ? "info" : "error",
|
|
1786
|
+
store_id: step.store_id, resource_type: "workflow_step_run",
|
|
1787
|
+
resource_id: step.step_run_id, source: "workflow_engine", duration_ms: durationMs,
|
|
1788
|
+
request_id: traceId || null,
|
|
1789
|
+
details: { workflow_id: step.workflow_id, run_id: step.run_id, step_key: step.step_key, step_type: step.step_type, attempt: step.attempt_count },
|
|
1790
|
+
error_message: result.error || null,
|
|
1791
|
+
});
|
|
1792
|
+
if (stepAuditErr)
|
|
1793
|
+
log.warn({ err: stepAuditErr.message, runId: step.run_id }, "step audit insert failed");
|
|
1794
|
+
// Surface step errors to clients via SSE broadcast + structured error_details
|
|
1795
|
+
if (!result.success && result.error) {
|
|
1796
|
+
await surfaceStepError(supabase, step, result.error);
|
|
1797
|
+
}
|
|
1798
|
+
// Child steps (parallel/for_each) — just save result, parent handles advancement
|
|
1799
|
+
if (step.parent_step_run_id) {
|
|
1800
|
+
// If child failed and has retries left, retry it (respects retry_policy)
|
|
1801
|
+
if (!result.success && step.attempt_count < step.max_attempts) {
|
|
1802
|
+
const retryPolicy = step.step_config.retry_policy;
|
|
1803
|
+
const backoffType = retryPolicy?.backoff_type || "exponential";
|
|
1804
|
+
const baseDelay = retryPolicy?.backoff_base_seconds || step.retry_delay_seconds || 10;
|
|
1805
|
+
const maxBackoff = retryPolicy?.max_backoff_seconds || 300;
|
|
1806
|
+
let backoffDelay;
|
|
1807
|
+
switch (backoffType) {
|
|
1808
|
+
case "fixed":
|
|
1809
|
+
backoffDelay = baseDelay;
|
|
1810
|
+
break;
|
|
1811
|
+
case "linear":
|
|
1812
|
+
backoffDelay = baseDelay * step.attempt_count;
|
|
1813
|
+
break;
|
|
1814
|
+
default:
|
|
1815
|
+
backoffDelay = baseDelay * Math.pow(2, step.attempt_count - 1);
|
|
1816
|
+
break;
|
|
1817
|
+
}
|
|
1818
|
+
backoffDelay = Math.min(backoffDelay, maxBackoff);
|
|
1819
|
+
// P3 FIX: Add jitter (50%-100% of computed delay) to prevent thundering herd
|
|
1820
|
+
backoffDelay *= (0.5 + Math.random() * 0.5);
|
|
1821
|
+
await supabase.from("workflow_step_runs").update({
|
|
1822
|
+
status: "retrying",
|
|
1823
|
+
next_retry_at: new Date(Date.now() + backoffDelay * 1000).toISOString(),
|
|
1824
|
+
}).eq("id", step.step_run_id);
|
|
1825
|
+
}
|
|
1826
|
+
return; // Parent's processWaitingSteps handles advancement
|
|
1827
|
+
}
|
|
1828
|
+
// Handle failure — configurable retry policy
|
|
1829
|
+
if (!result.success) {
|
|
1830
|
+
if (step.attempt_count < step.max_attempts) {
|
|
1831
|
+
// Check retry_on filter — only retry if error matches pattern (if configured)
|
|
1832
|
+
const retryPolicy = step.step_config.retry_policy;
|
|
1833
|
+
const retryOn = retryPolicy?.retry_on;
|
|
1834
|
+
const shouldRetry = !retryOn?.length || retryOn.some(pattern => result.error?.includes(pattern));
|
|
1835
|
+
if (shouldRetry) {
|
|
1836
|
+
const backoffType = retryPolicy?.backoff_type || "exponential";
|
|
1837
|
+
const baseDelay = retryPolicy?.backoff_base_seconds || step.retry_delay_seconds || 10;
|
|
1838
|
+
const maxBackoff = retryPolicy?.max_backoff_seconds || 300; // 5 min cap
|
|
1839
|
+
let backoffDelay;
|
|
1840
|
+
switch (backoffType) {
|
|
1841
|
+
case "fixed":
|
|
1842
|
+
backoffDelay = baseDelay;
|
|
1843
|
+
break;
|
|
1844
|
+
case "linear":
|
|
1845
|
+
backoffDelay = baseDelay * step.attempt_count;
|
|
1846
|
+
break;
|
|
1847
|
+
default:
|
|
1848
|
+
backoffDelay = baseDelay * Math.pow(2, step.attempt_count - 1);
|
|
1849
|
+
break; // exponential
|
|
1850
|
+
}
|
|
1851
|
+
backoffDelay = Math.min(backoffDelay, maxBackoff);
|
|
1852
|
+
// P3 FIX: Add jitter (50%-100% of computed delay) to prevent thundering herd
|
|
1853
|
+
backoffDelay *= (0.5 + Math.random() * 0.5);
|
|
1854
|
+
await supabase.from("workflow_step_runs").update({
|
|
1855
|
+
status: "retrying",
|
|
1856
|
+
next_retry_at: new Date(Date.now() + backoffDelay * 1000).toISOString(),
|
|
1857
|
+
}).eq("id", step.step_run_id);
|
|
1858
|
+
await logWorkflowEvent(supabase, step.run_id, "step_retrying", {
|
|
1859
|
+
step_key: step.step_key, attempt: step.attempt_count, backoff_type: backoffType, delay_seconds: backoffDelay,
|
|
1860
|
+
}, step.step_run_id);
|
|
1861
|
+
return;
|
|
1862
|
+
}
|
|
1863
|
+
// retry_on filter didn't match — fall through to failure handling
|
|
1864
|
+
}
|
|
1865
|
+
if (step.on_failure) {
|
|
1866
|
+
await createNextStepRunByKey(supabase, step.run_id, step.workflow_id, step.on_failure);
|
|
1867
|
+
}
|
|
1868
|
+
else {
|
|
1869
|
+
await completeWorkflowRun(supabase, step.run_id, step.workflow_id, step.store_id, "failed", result.error, step.step_key);
|
|
1870
|
+
}
|
|
1871
|
+
return;
|
|
1872
|
+
}
|
|
1873
|
+
// Advance — condition steps use branch, otherwise on_success
|
|
1874
|
+
const nextStepKey = result.branch || step.on_success;
|
|
1875
|
+
if (!nextStepKey) {
|
|
1876
|
+
await checkWorkflowCompletion(supabase, step.run_id, step.workflow_id);
|
|
1877
|
+
return;
|
|
1878
|
+
}
|
|
1879
|
+
await createNextStepRunByKey(supabase, step.run_id, step.workflow_id, nextStepKey);
|
|
1880
|
+
}
|
|
1881
|
+
// ============================================================================
|
|
1882
|
+
// WORKFLOW ADVANCEMENT HELPERS
|
|
1883
|
+
// ============================================================================
|
|
1884
|
+
// P1 FIX: Per-run in-memory cache for versioned steps (30s TTL)
|
|
1885
|
+
// Prevents 4x redundant DB calls per step execution
|
|
1886
|
+
// P0 FIX: Capped at 500 entries with LRU eviction to prevent unbounded memory growth
|
|
1887
|
+
const VERSION_CACHE_MAX_SIZE = 500;
|
|
1888
|
+
const versionedStepsCache = new Map();
|
|
1889
|
+
const VERSION_CACHE_TTL_MS = 30_000;
|
|
1890
|
+
export function clearStepCache() {
|
|
1891
|
+
versionedStepsCache.clear();
|
|
1892
|
+
}
|
|
1893
|
+
/** Insert into cache with LRU eviction when size exceeds limit */
|
|
1894
|
+
function versionedStepsCacheSet(key, value) {
|
|
1895
|
+
// If key already exists, delete first so re-insertion moves it to end (most recent)
|
|
1896
|
+
if (versionedStepsCache.has(key)) {
|
|
1897
|
+
versionedStepsCache.delete(key);
|
|
1898
|
+
}
|
|
1899
|
+
versionedStepsCache.set(key, value);
|
|
1900
|
+
// Evict oldest entries (Map iteration order = insertion order) if over limit
|
|
1901
|
+
if (versionedStepsCache.size > VERSION_CACHE_MAX_SIZE) {
|
|
1902
|
+
const excess = versionedStepsCache.size - VERSION_CACHE_MAX_SIZE;
|
|
1903
|
+
let removed = 0;
|
|
1904
|
+
for (const k of versionedStepsCache.keys()) {
|
|
1905
|
+
if (removed >= excess)
|
|
1906
|
+
break;
|
|
1907
|
+
versionedStepsCache.delete(k);
|
|
1908
|
+
removed++;
|
|
1909
|
+
}
|
|
1910
|
+
}
|
|
1911
|
+
}
|
|
1912
|
+
// Periodic cleanup to prevent memory leaks
|
|
1913
|
+
setInterval(() => {
|
|
1914
|
+
const now = Date.now();
|
|
1915
|
+
for (const [key, entry] of versionedStepsCache) {
|
|
1916
|
+
if (now > entry.expiresAt)
|
|
1917
|
+
versionedStepsCache.delete(key);
|
|
1918
|
+
}
|
|
1919
|
+
}, 60_000);
|
|
1920
|
+
/**
|
|
1921
|
+
* Load the versioned steps array for a run. Returns null if no version.
|
|
1922
|
+
* Uses per-run in-memory cache with 30s TTL.
|
|
1923
|
+
*/
|
|
1924
|
+
async function loadVersionedSteps(supabase, runId) {
|
|
1925
|
+
const cached = versionedStepsCache.get(runId);
|
|
1926
|
+
if (cached && Date.now() < cached.expiresAt)
|
|
1927
|
+
return cached.data;
|
|
1928
|
+
const { data: run } = await supabase.from("workflow_runs")
|
|
1929
|
+
.select("version_id").eq("id", runId).single();
|
|
1930
|
+
if (!run?.version_id) {
|
|
1931
|
+
versionedStepsCacheSet(runId, { data: null, expiresAt: Date.now() + VERSION_CACHE_TTL_MS });
|
|
1932
|
+
return null;
|
|
1933
|
+
}
|
|
1934
|
+
const { data: version } = await supabase.from("workflow_versions")
|
|
1935
|
+
.select("steps").eq("id", run.version_id).single();
|
|
1936
|
+
const result = (version?.steps && Array.isArray(version.steps)) ? version.steps : null;
|
|
1937
|
+
versionedStepsCacheSet(runId, { data: result, expiresAt: Date.now() + VERSION_CACHE_TTL_MS });
|
|
1938
|
+
return result;
|
|
1939
|
+
}
|
|
1940
|
+
/**
|
|
1941
|
+
* Apply versioned overrides to a claimed step. If the run has a version_id,
|
|
1942
|
+
* replaces step_config, on_success, on_failure with values from the snapshot.
|
|
1943
|
+
*/
|
|
1944
|
+
async function applyVersionOverrides(supabase, step) {
|
|
1945
|
+
const versionedSteps = await loadVersionedSteps(supabase, step.run_id);
|
|
1946
|
+
if (!versionedSteps)
|
|
1947
|
+
return;
|
|
1948
|
+
const vStep = versionedSteps.find((s) => s.step_key === step.step_key);
|
|
1949
|
+
if (vStep) {
|
|
1950
|
+
step.step_config = vStep.step_config || step.step_config;
|
|
1951
|
+
step.on_success = vStep.on_success ?? step.on_success;
|
|
1952
|
+
step.on_failure = vStep.on_failure ?? step.on_failure;
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
/**
|
|
1956
|
+
* Resolve a step definition by key. If the run has a version_id, load from
|
|
1957
|
+
* the versioned snapshot. Otherwise, load from the live workflow_steps table.
|
|
1958
|
+
*/
|
|
1959
|
+
async function resolveStepDef(supabase, runId, workflowId, stepKey) {
|
|
1960
|
+
const versionedSteps = await loadVersionedSteps(supabase, runId);
|
|
1961
|
+
if (versionedSteps) {
|
|
1962
|
+
const step = versionedSteps.find((s) => s.step_key === stepKey);
|
|
1963
|
+
if (step) {
|
|
1964
|
+
return { id: step.id, step_key: step.step_key, step_type: step.step_type, max_retries: step.max_retries ?? 3 };
|
|
1965
|
+
}
|
|
1966
|
+
return null;
|
|
1967
|
+
}
|
|
1968
|
+
// Live table
|
|
1969
|
+
const { data } = await supabase.from("workflow_steps")
|
|
1970
|
+
.select("id, step_key, step_type, max_retries")
|
|
1971
|
+
.eq("workflow_id", workflowId).eq("step_key", stepKey).single();
|
|
1972
|
+
return data;
|
|
1973
|
+
}
|
|
1974
|
+
async function createNextStepRunByKey(supabase, runId, workflowId, stepKey) {
|
|
1975
|
+
const nextStep = await resolveStepDef(supabase, runId, workflowId, stepKey);
|
|
1976
|
+
if (!nextStep) {
|
|
1977
|
+
log.error({ stepKey, workflowId }, "step not found in workflow");
|
|
1978
|
+
const { data: run } = await supabase.from("workflow_runs").select("store_id").eq("id", runId).single();
|
|
1979
|
+
await completeWorkflowRun(supabase, runId, workflowId, run?.store_id, "failed", `Step '${stepKey}' not found`);
|
|
1980
|
+
return null;
|
|
1981
|
+
}
|
|
1982
|
+
// Check step count limit
|
|
1983
|
+
const { data: run } = await supabase.from("workflow_runs")
|
|
1984
|
+
.select("store_id").eq("id", runId).single();
|
|
1985
|
+
const { count } = await supabase.from("workflow_step_runs")
|
|
1986
|
+
.select("id", { count: "exact", head: true }).eq("run_id", runId);
|
|
1987
|
+
const { data: wf } = await supabase.from("workflows")
|
|
1988
|
+
.select("max_steps_per_run").eq("id", workflowId).single();
|
|
1989
|
+
if ((count || 0) >= (wf?.max_steps_per_run || 50)) {
|
|
1990
|
+
await completeWorkflowRun(supabase, runId, workflowId, run?.store_id, "failed", `Step limit exceeded (${wf?.max_steps_per_run || 50})`);
|
|
1991
|
+
return null;
|
|
1992
|
+
}
|
|
1993
|
+
const { data: inserted } = await supabase.from("workflow_step_runs").insert({
|
|
1994
|
+
run_id: runId, step_id: nextStep.id, step_key: nextStep.step_key,
|
|
1995
|
+
step_type: nextStep.step_type, status: "pending", max_attempts: nextStep.max_retries ?? 3,
|
|
1996
|
+
}).select("id").single();
|
|
1997
|
+
return inserted?.id || null;
|
|
1998
|
+
}
|
|
1999
|
+
// ============================================================================
|
|
2000
|
+
// PHASE 1: INLINE EXECUTION — execute steps immediately, no 5s wait
|
|
2001
|
+
// ============================================================================
|
|
2002
|
+
/**
|
|
2003
|
+
* Claim a single pending step for a specific run using atomic RPC.
|
|
2004
|
+
* P0 FIX: Uses claim_step_for_run RPC with FOR UPDATE SKIP LOCKED + attempt_count increment.
|
|
2005
|
+
* Replaces the old SELECT-then-UPDATE pattern that never incremented attempt_count (infinite retries).
|
|
2006
|
+
*/
|
|
2007
|
+
async function claimStepForRun(supabase, runId) {
|
|
2008
|
+
const { data, error } = await supabase.rpc("claim_step_for_run", { p_run_id: runId });
|
|
2009
|
+
if (error) {
|
|
2010
|
+
log.error({ err: error.message, runId }, "claim_step_for_run RPC failed");
|
|
2011
|
+
return null;
|
|
2012
|
+
}
|
|
2013
|
+
// RPC returns an array of rows; we expect 0 or 1
|
|
2014
|
+
const row = Array.isArray(data) ? data[0] : data;
|
|
2015
|
+
if (!row)
|
|
2016
|
+
return null;
|
|
2017
|
+
return {
|
|
2018
|
+
step_run_id: row.step_run_id,
|
|
2019
|
+
run_id: row.run_id,
|
|
2020
|
+
workflow_id: row.workflow_id,
|
|
2021
|
+
store_id: row.store_id,
|
|
2022
|
+
step_id: row.step_id,
|
|
2023
|
+
step_key: row.step_key,
|
|
2024
|
+
step_type: row.step_type,
|
|
2025
|
+
step_config: row.step_config || {},
|
|
2026
|
+
on_success: row.on_success,
|
|
2027
|
+
on_failure: row.on_failure,
|
|
2028
|
+
timeout_seconds: row.timeout_seconds || 60,
|
|
2029
|
+
input_schema: row.input_schema,
|
|
2030
|
+
step_outputs: row.step_outputs || {},
|
|
2031
|
+
trigger_payload: row.trigger_payload || {},
|
|
2032
|
+
attempt_count: row.attempt_count || 1,
|
|
2033
|
+
max_attempts: row.max_attempts || 3,
|
|
2034
|
+
max_steps_per_run: row.max_steps_per_run || 50,
|
|
2035
|
+
input: row.input,
|
|
2036
|
+
parent_step_run_id: row.parent_step_run_id,
|
|
2037
|
+
retry_delay_seconds: row.retry_delay_seconds || 10,
|
|
2038
|
+
};
|
|
2039
|
+
}
|
|
2040
|
+
/**
|
|
2041
|
+
* Execute the first pending step of a run inline, then chain subsequent steps.
|
|
2042
|
+
* Depth guard prevents unbounded recursion — worker loop catches anything left.
|
|
2043
|
+
*/
|
|
2044
|
+
export async function executeInlineChain(supabase, runId, depth = 0, traceId) {
|
|
2045
|
+
if (depth >= MAX_INLINE_DEPTH) {
|
|
2046
|
+
log.warn({ runId, depthLimit: MAX_INLINE_DEPTH }, "inline depth limit reached, deferring to worker");
|
|
2047
|
+
return;
|
|
2048
|
+
}
|
|
2049
|
+
// Resolve traceId from run record if not passed (first depth only to avoid repeated queries)
|
|
2050
|
+
if (!traceId && depth === 0) {
|
|
2051
|
+
const { data: runData } = await supabase.from("workflow_runs")
|
|
2052
|
+
.select("trace_id").eq("id", runId).single();
|
|
2053
|
+
traceId = runData?.trace_id || undefined;
|
|
2054
|
+
}
|
|
2055
|
+
// H1 FIX: Claim step specifically for this run — cannot steal from other runs
|
|
2056
|
+
const step = await claimStepForRun(supabase, runId);
|
|
2057
|
+
if (!step)
|
|
2058
|
+
return; // No pending steps for this run
|
|
2059
|
+
// Phase 4: Override step_config/on_success/on_failure from version snapshot
|
|
2060
|
+
await applyVersionOverrides(supabase, step);
|
|
2061
|
+
try {
|
|
2062
|
+
await executeAndAdvance(supabase, step, traceId);
|
|
2063
|
+
}
|
|
2064
|
+
catch (err) {
|
|
2065
|
+
const errMsg = sanitizeError(err);
|
|
2066
|
+
log.error({ stepKey: step.step_key, runId, err: errMsg }, "inline step execution error");
|
|
2067
|
+
await supabase.from("workflow_step_runs").update({
|
|
2068
|
+
status: "failed", error_message: errMsg,
|
|
2069
|
+
completed_at: new Date().toISOString(), duration_ms: 0,
|
|
2070
|
+
}).eq("id", step.step_run_id);
|
|
2071
|
+
// Surface error to clients (SSE broadcast + structured error_details on step run)
|
|
2072
|
+
await surfaceStepError(supabase, step, errMsg);
|
|
2073
|
+
return;
|
|
2074
|
+
}
|
|
2075
|
+
// Steps that go async (delay, sub_workflow, parallel, for_each, approval) don't chain
|
|
2076
|
+
const asyncTypes = new Set(["delay", "sub_workflow", "parallel", "for_each", "approval", "waitpoint"]);
|
|
2077
|
+
if (asyncTypes.has(step.step_type))
|
|
2078
|
+
return;
|
|
2079
|
+
// Chain to next step
|
|
2080
|
+
await executeInlineChain(supabase, runId, depth + 1, traceId);
|
|
2081
|
+
}
|
|
2082
|
+
async function checkWorkflowCompletion(supabase, runId, workflowId) {
|
|
2083
|
+
// P1 FIX: Single atomic query to count all step statuses — eliminates race windows
|
|
2084
|
+
// between multiple queries that could see inconsistent state
|
|
2085
|
+
const { data: allSteps } = await supabase.from("workflow_step_runs")
|
|
2086
|
+
.select("step_key, step_id, status, error_message")
|
|
2087
|
+
.eq("run_id", runId);
|
|
2088
|
+
if (!allSteps?.length) {
|
|
2089
|
+
// No steps at all — mark as success (empty workflow)
|
|
2090
|
+
const { data: run } = await supabase.from("workflow_runs").select("store_id").eq("id", runId).single();
|
|
2091
|
+
await completeWorkflowRun(supabase, runId, workflowId, run?.store_id, "success");
|
|
2092
|
+
return;
|
|
2093
|
+
}
|
|
2094
|
+
// Count in-memory from single query result
|
|
2095
|
+
const activeStatuses = new Set(["pending", "running", "retrying", "waiting"]);
|
|
2096
|
+
const activeSteps = allSteps.filter(s => activeStatuses.has(s.status));
|
|
2097
|
+
if (activeSteps.length > 0)
|
|
2098
|
+
return; // Still in progress
|
|
2099
|
+
// All steps are terminal — determine outcome
|
|
2100
|
+
const skippedSteps = allSteps.filter(s => s.status === "skipped");
|
|
2101
|
+
const failedSteps = allSteps.filter(s => s.status === "failed");
|
|
2102
|
+
// Filter to unhandled failures — steps where on_failure is null (no error handler)
|
|
2103
|
+
// M14 FIX: Use versioned step defs when available
|
|
2104
|
+
let failed = [];
|
|
2105
|
+
if (failedSteps.length) {
|
|
2106
|
+
const versionedSteps = await loadVersionedSteps(supabase, runId);
|
|
2107
|
+
if (versionedSteps) {
|
|
2108
|
+
// Use versioned definitions
|
|
2109
|
+
const vStepMap = new Map(versionedSteps.map((s) => [s.step_key, s]));
|
|
2110
|
+
failed = failedSteps.filter(s => {
|
|
2111
|
+
const vStep = vStepMap.get(s.step_key);
|
|
2112
|
+
return !vStep?.on_failure;
|
|
2113
|
+
});
|
|
2114
|
+
}
|
|
2115
|
+
else {
|
|
2116
|
+
// Fall back to live table
|
|
2117
|
+
const stepIds = failedSteps.map(s => s.step_id).filter(Boolean);
|
|
2118
|
+
if (stepIds.length) {
|
|
2119
|
+
const { data: stepDefs } = await supabase.from("workflow_steps")
|
|
2120
|
+
.select("id, on_failure").in("id", stepIds);
|
|
2121
|
+
const defMap = new Map((stepDefs || []).map(d => [d.id, d]));
|
|
2122
|
+
failed = failedSteps.filter(s => {
|
|
2123
|
+
const def = defMap.get(s.step_id);
|
|
2124
|
+
return !def?.on_failure;
|
|
2125
|
+
});
|
|
2126
|
+
}
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
2129
|
+
const { data: run } = await supabase.from("workflow_runs").select("store_id").eq("id", runId).single();
|
|
2130
|
+
if (failed.length) {
|
|
2131
|
+
await completeWorkflowRun(supabase, runId, workflowId, run?.store_id, "failed", failed[0].error_message, failed[0].step_key);
|
|
2132
|
+
}
|
|
2133
|
+
else if (skippedSteps.length) {
|
|
2134
|
+
// All remaining steps were skipped (circuit breaker) — fail the run
|
|
2135
|
+
await completeWorkflowRun(supabase, runId, workflowId, run?.store_id, "failed", `${skippedSteps.length} step(s) skipped by circuit breaker`, skippedSteps[0].step_key);
|
|
2136
|
+
}
|
|
2137
|
+
else {
|
|
2138
|
+
await completeWorkflowRun(supabase, runId, workflowId, run?.store_id, "success");
|
|
2139
|
+
}
|
|
2140
|
+
}
|
|
2141
|
+
export async function completeWorkflowRun(supabase, runId, workflowId, storeId, status, errorMessage, errorStepKey, traceId) {
|
|
2142
|
+
const { data: run } = await supabase.from("workflow_runs")
|
|
2143
|
+
.select("started_at, trace_id, metadata").eq("id", runId).single();
|
|
2144
|
+
const durationMs = run?.started_at ? Date.now() - new Date(run.started_at).getTime() : null;
|
|
2145
|
+
const resolvedTraceId = traceId || run?.trace_id || null;
|
|
2146
|
+
// Aggregate error_count and error_log from failed step runs
|
|
2147
|
+
const { data: failedStepRuns } = await supabase.from("workflow_step_runs")
|
|
2148
|
+
.select("step_key, step_type, error_message, error_details")
|
|
2149
|
+
.eq("run_id", runId)
|
|
2150
|
+
.eq("status", "failed");
|
|
2151
|
+
const errorCount = failedStepRuns?.length || 0;
|
|
2152
|
+
const errorLog = (failedStepRuns || []).map(sr => ({
|
|
2153
|
+
step_name: sr.step_key,
|
|
2154
|
+
step_type: sr.step_type,
|
|
2155
|
+
error_message: sr.error_message,
|
|
2156
|
+
...(sr.error_details ? { details: sr.error_details } : {}),
|
|
2157
|
+
}));
|
|
2158
|
+
const updatedMetadata = {
|
|
2159
|
+
...(run?.metadata || {}),
|
|
2160
|
+
error_count: errorCount,
|
|
2161
|
+
...(errorLog.length ? { error_log: errorLog } : {}),
|
|
2162
|
+
};
|
|
2163
|
+
// Guard against double-completion: only update if still running
|
|
2164
|
+
const { data: updatedRows, error: updateErr } = await supabase.from("workflow_runs").update({
|
|
2165
|
+
status, error_message: errorMessage || null, error_step_key: errorStepKey || null,
|
|
2166
|
+
completed_at: new Date().toISOString(), duration_ms: durationMs, current_step_key: null,
|
|
2167
|
+
metadata: updatedMetadata,
|
|
2168
|
+
}).eq("id", runId).eq("status", "running").select("id");
|
|
2169
|
+
if (updateErr) {
|
|
2170
|
+
log.warn({ err: updateErr.message, runId, status }, "completeWorkflowRun update failed");
|
|
2171
|
+
return;
|
|
2172
|
+
}
|
|
2173
|
+
if (!updatedRows || updatedRows.length === 0) {
|
|
2174
|
+
log.warn({ runId, status }, "completeWorkflowRun skipped — run already completed by another caller");
|
|
2175
|
+
return;
|
|
2176
|
+
}
|
|
2177
|
+
// Cancel remaining pending steps
|
|
2178
|
+
await supabase.from("workflow_step_runs").update({ status: "cancelled" })
|
|
2179
|
+
.eq("run_id", runId).in("status", ["pending", "retrying", "waiting"]);
|
|
2180
|
+
// Event journal — run completed
|
|
2181
|
+
await logWorkflowEvent(supabase, runId, `run_${status}`, {
|
|
2182
|
+
workflow_id: workflowId, duration_ms: durationMs,
|
|
2183
|
+
...(errorMessage ? { error: errorMessage } : {}),
|
|
2184
|
+
});
|
|
2185
|
+
// Circuit breaker
|
|
2186
|
+
if (workflowId)
|
|
2187
|
+
await handleWorkflowCircuitBreaker(supabase, workflowId, status === "success", errorMessage);
|
|
2188
|
+
// Audit
|
|
2189
|
+
const { error: runAuditErr } = await supabase.from("audit_logs").insert({
|
|
2190
|
+
action: `workflow.run.${status}`, severity: status === "success" ? "info" : "error",
|
|
2191
|
+
store_id: storeId || null, resource_type: "workflow_run", resource_id: runId,
|
|
2192
|
+
source: "workflow_engine", duration_ms: durationMs,
|
|
2193
|
+
request_id: resolvedTraceId,
|
|
2194
|
+
details: { workflow_id: workflowId, run_id: runId },
|
|
2195
|
+
error_message: errorMessage || null,
|
|
2196
|
+
});
|
|
2197
|
+
if (runAuditErr)
|
|
2198
|
+
log.warn({ err: runAuditErr.message, runId }, "run completion audit failed");
|
|
2199
|
+
// Error notifications
|
|
2200
|
+
if (status === "failed" && workflowId) {
|
|
2201
|
+
await sendErrorNotification(supabase, workflowId, runId, storeId, errorMessage, errorStepKey);
|
|
2202
|
+
// Auto-archive to Dead Letter Queue
|
|
2203
|
+
if (storeId) {
|
|
2204
|
+
try {
|
|
2205
|
+
await archiveToDlq(supabase, runId, workflowId, storeId);
|
|
2206
|
+
}
|
|
2207
|
+
catch (e) {
|
|
2208
|
+
log.warn({ err: e?.message, runId }, "archiveToDlq failed");
|
|
2209
|
+
}
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
}
|
|
2213
|
+
async function sendErrorNotification(supabase, workflowId, runId, storeId, errorMessage, errorStepKey) {
|
|
2214
|
+
const { data: wf } = await supabase.from("workflows")
|
|
2215
|
+
.select("name, on_error_webhook_url, on_error_email").eq("id", workflowId).single();
|
|
2216
|
+
if (!wf)
|
|
2217
|
+
return;
|
|
2218
|
+
const errorPayload = {
|
|
2219
|
+
event: "workflow.run.failed",
|
|
2220
|
+
workflow_id: workflowId, workflow_name: wf.name,
|
|
2221
|
+
run_id: runId, error_message: errorMessage, error_step: errorStepKey,
|
|
2222
|
+
timestamp: new Date().toISOString(),
|
|
2223
|
+
};
|
|
2224
|
+
// Webhook notification (with SSRF protection)
|
|
2225
|
+
const errorWebhookSsrf = wf.on_error_webhook_url ? await validateUrl(wf.on_error_webhook_url) : "no URL";
|
|
2226
|
+
if (wf.on_error_webhook_url && !errorWebhookSsrf) {
|
|
2227
|
+
try {
|
|
2228
|
+
const controller = new AbortController();
|
|
2229
|
+
const timer = setTimeout(() => controller.abort(), 10_000);
|
|
2230
|
+
await fetch(wf.on_error_webhook_url, {
|
|
2231
|
+
method: "POST", headers: { "Content-Type": "application/json" },
|
|
2232
|
+
body: JSON.stringify(errorPayload), signal: controller.signal,
|
|
2233
|
+
});
|
|
2234
|
+
clearTimeout(timer);
|
|
2235
|
+
}
|
|
2236
|
+
catch (err) {
|
|
2237
|
+
log.error({ err: sanitizeError(err), workflowId }, "error notification webhook failed");
|
|
2238
|
+
}
|
|
2239
|
+
}
|
|
2240
|
+
// Email notification
|
|
2241
|
+
if (wf.on_error_email && _executeTool && storeId) {
|
|
2242
|
+
try {
|
|
2243
|
+
await _executeTool(supabase, "email", {
|
|
2244
|
+
action: "send", to: wf.on_error_email,
|
|
2245
|
+
subject: `Workflow "${wf.name}" failed`,
|
|
2246
|
+
text: `Workflow "${wf.name}" failed at step "${errorStepKey || "unknown"}".\n\nError: ${errorMessage || "Unknown error"}\n\nRun ID: ${runId}\nTime: ${new Date().toISOString()}`,
|
|
2247
|
+
}, storeId);
|
|
2248
|
+
}
|
|
2249
|
+
catch (err) {
|
|
2250
|
+
log.error({ err: sanitizeError(err), workflowId }, "error notification email failed");
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
}
|
|
2254
|
+
// ============================================================================
|
|
2255
|
+
// WEBHOOK INGESTION
|
|
2256
|
+
// ============================================================================
|
|
2257
|
+
export async function handleWebhookIngestion(supabase, slug, rawBody, headers, storeId) {
|
|
2258
|
+
// P0 FIX: Multi-tenancy — scope webhook lookup by store_id when available
|
|
2259
|
+
let endpointQuery = supabase.from("webhook_endpoints")
|
|
2260
|
+
.select("*").eq("slug", slug).eq("is_active", true);
|
|
2261
|
+
if (storeId) {
|
|
2262
|
+
endpointQuery = endpointQuery.eq("store_id", storeId);
|
|
2263
|
+
}
|
|
2264
|
+
const { data: endpoints } = await endpointQuery.limit(1);
|
|
2265
|
+
const endpoint = endpoints?.[0];
|
|
2266
|
+
if (!endpoint)
|
|
2267
|
+
return { status: 404, body: { error: "Webhook endpoint not found" } };
|
|
2268
|
+
// Rate limit
|
|
2269
|
+
const oneMinAgo = new Date(Date.now() - 60_000).toISOString();
|
|
2270
|
+
const { count: recentCount } = await supabase.from("audit_logs")
|
|
2271
|
+
.select("id", { count: "exact", head: true })
|
|
2272
|
+
.eq("resource_type", "webhook_endpoint").eq("resource_id", endpoint.id)
|
|
2273
|
+
.gte("created_at", oneMinAgo);
|
|
2274
|
+
if ((recentCount || 0) >= endpoint.max_requests_per_minute) {
|
|
2275
|
+
return { status: 429, body: { error: "Rate limit exceeded" } };
|
|
2276
|
+
}
|
|
2277
|
+
// HMAC verification
|
|
2278
|
+
if (endpoint.verify_signature) {
|
|
2279
|
+
const signature = headers["x-webhook-signature"] || headers["x-hub-signature-256"] || "";
|
|
2280
|
+
if (!signature)
|
|
2281
|
+
return { status: 401, body: { error: "Missing signature" } };
|
|
2282
|
+
const expected = `sha256=${createHmac("sha256", endpoint.signing_secret).update(rawBody).digest("hex")}`;
|
|
2283
|
+
try {
|
|
2284
|
+
const sigBuf = Buffer.from(signature);
|
|
2285
|
+
const expBuf = Buffer.from(expected);
|
|
2286
|
+
if (sigBuf.length !== expBuf.length || !timingSafeEqual(sigBuf, expBuf)) {
|
|
2287
|
+
return { status: 401, body: { error: "Invalid signature" } };
|
|
2288
|
+
}
|
|
2289
|
+
}
|
|
2290
|
+
catch {
|
|
2291
|
+
return { status: 401, body: { error: "Invalid signature" } };
|
|
2292
|
+
}
|
|
2293
|
+
}
|
|
2294
|
+
// P3 FIX: Reject oversized payloads before parsing to prevent memory exhaustion
|
|
2295
|
+
const MAX_TRIGGER_PAYLOAD_BYTES = 10_000_000; // 10MB
|
|
2296
|
+
if (rawBody.length > MAX_TRIGGER_PAYLOAD_BYTES) {
|
|
2297
|
+
return { status: 413, body: { error: `Trigger payload too large (${rawBody.length} bytes, max 10MB)` } };
|
|
2298
|
+
}
|
|
2299
|
+
let payload;
|
|
2300
|
+
try {
|
|
2301
|
+
payload = JSON.parse(rawBody);
|
|
2302
|
+
}
|
|
2303
|
+
catch {
|
|
2304
|
+
payload = { raw: rawBody };
|
|
2305
|
+
}
|
|
2306
|
+
if (endpoint.payload_transform && typeof endpoint.payload_transform === "object") {
|
|
2307
|
+
payload = resolveTemplate(endpoint.payload_transform, { steps: {}, trigger: payload });
|
|
2308
|
+
}
|
|
2309
|
+
// Update stats
|
|
2310
|
+
await supabase.from("webhook_endpoints").update({
|
|
2311
|
+
last_received_at: new Date().toISOString(),
|
|
2312
|
+
total_received: (endpoint.total_received || 0) + 1,
|
|
2313
|
+
}).eq("id", endpoint.id);
|
|
2314
|
+
// Audit
|
|
2315
|
+
const { error: whAuditErr } = await supabase.from("audit_logs").insert({
|
|
2316
|
+
action: "webhook.received", severity: "info", store_id: endpoint.store_id,
|
|
2317
|
+
resource_type: "webhook_endpoint", resource_id: endpoint.id, source: "webhook",
|
|
2318
|
+
details: { slug, workflow_id: endpoint.workflow_id },
|
|
2319
|
+
});
|
|
2320
|
+
if (whAuditErr)
|
|
2321
|
+
log.warn({ err: whAuditErr.message, slug }, "webhook audit insert failed");
|
|
2322
|
+
// Start workflow
|
|
2323
|
+
const { data: startResult } = await supabase.rpc("start_workflow_run", {
|
|
2324
|
+
p_workflow_id: endpoint.workflow_id, p_store_id: endpoint.store_id,
|
|
2325
|
+
p_trigger_type: "webhook", p_trigger_payload: payload,
|
|
2326
|
+
});
|
|
2327
|
+
if (!startResult?.success) {
|
|
2328
|
+
return { status: 422, body: { error: startResult?.error || "Failed to start workflow" } };
|
|
2329
|
+
}
|
|
2330
|
+
const runId = startResult.run_id;
|
|
2331
|
+
// Sync response — poll until workflow completes or timeout
|
|
2332
|
+
if (endpoint.sync_response) {
|
|
2333
|
+
const timeoutMs = (endpoint.sync_timeout_seconds || 30) * 1000;
|
|
2334
|
+
const deadline = Date.now() + timeoutMs;
|
|
2335
|
+
while (Date.now() < deadline) {
|
|
2336
|
+
await new Promise(r => setTimeout(r, 500));
|
|
2337
|
+
const { data: run } = await supabase.from("workflow_runs")
|
|
2338
|
+
.select("status, step_outputs, error_message").eq("id", runId).single();
|
|
2339
|
+
if (run?.status === "success") {
|
|
2340
|
+
return { status: 200, body: { success: true, run_id: runId, output: run.step_outputs } };
|
|
2341
|
+
}
|
|
2342
|
+
if (run?.status === "failed") {
|
|
2343
|
+
return { status: 422, body: { success: false, run_id: runId, error: run.error_message, output: run.step_outputs } };
|
|
2344
|
+
}
|
|
2345
|
+
}
|
|
2346
|
+
return { status: 202, body: { success: true, run_id: runId, status: "running", message: "Workflow still in progress" } };
|
|
2347
|
+
}
|
|
2348
|
+
return { status: 200, body: { success: true, run_id: runId, deduplicated: startResult.deduplicated || false } };
|
|
2349
|
+
}
|