@synth-deploy/server 1.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/dist/agent/envoy-client.d.ts +65 -15
  2. package/dist/agent/envoy-client.d.ts.map +1 -1
  3. package/dist/agent/envoy-client.js +58 -8
  4. package/dist/agent/envoy-client.js.map +1 -1
  5. package/dist/agent/stale-deployment-detector.js +1 -1
  6. package/dist/agent/stale-deployment-detector.js.map +1 -1
  7. package/dist/agent/synth-agent.d.ts +7 -5
  8. package/dist/agent/synth-agent.d.ts.map +1 -1
  9. package/dist/agent/synth-agent.js +59 -50
  10. package/dist/agent/synth-agent.js.map +1 -1
  11. package/dist/alert-webhooks/alert-parsers.d.ts +21 -0
  12. package/dist/alert-webhooks/alert-parsers.d.ts.map +1 -0
  13. package/dist/alert-webhooks/alert-parsers.js +184 -0
  14. package/dist/alert-webhooks/alert-parsers.js.map +1 -0
  15. package/dist/api/agent.d.ts +0 -6
  16. package/dist/api/agent.d.ts.map +1 -1
  17. package/dist/api/agent.js +6 -459
  18. package/dist/api/agent.js.map +1 -1
  19. package/dist/api/alert-webhooks.d.ts +13 -0
  20. package/dist/api/alert-webhooks.d.ts.map +1 -0
  21. package/dist/api/alert-webhooks.js +279 -0
  22. package/dist/api/alert-webhooks.js.map +1 -0
  23. package/dist/api/envoy-reports.js +2 -2
  24. package/dist/api/envoy-reports.js.map +1 -1
  25. package/dist/api/envoys.js +1 -1
  26. package/dist/api/envoys.js.map +1 -1
  27. package/dist/api/fleet.d.ts.map +1 -1
  28. package/dist/api/fleet.js +14 -15
  29. package/dist/api/fleet.js.map +1 -1
  30. package/dist/api/graph.js +3 -3
  31. package/dist/api/graph.js.map +1 -1
  32. package/dist/api/operations.d.ts +7 -0
  33. package/dist/api/operations.d.ts.map +1 -0
  34. package/dist/api/operations.js +1900 -0
  35. package/dist/api/operations.js.map +1 -0
  36. package/dist/api/partitions.js +1 -1
  37. package/dist/api/partitions.js.map +1 -1
  38. package/dist/api/schemas.d.ts +434 -133
  39. package/dist/api/schemas.d.ts.map +1 -1
  40. package/dist/api/schemas.js +53 -25
  41. package/dist/api/schemas.js.map +1 -1
  42. package/dist/api/system.d.ts.map +1 -1
  43. package/dist/api/system.js +22 -21
  44. package/dist/api/system.js.map +1 -1
  45. package/dist/artifact-analyzer.js +2 -2
  46. package/dist/artifact-analyzer.js.map +1 -1
  47. package/dist/fleet/fleet-executor.js +3 -3
  48. package/dist/fleet/fleet-executor.js.map +1 -1
  49. package/dist/graph/graph-executor.d.ts.map +1 -1
  50. package/dist/graph/graph-executor.js +18 -4
  51. package/dist/graph/graph-executor.js.map +1 -1
  52. package/dist/index.js +89 -61
  53. package/dist/index.js.map +1 -1
  54. package/dist/mcp/resources.js +3 -3
  55. package/dist/mcp/resources.js.map +1 -1
  56. package/dist/mcp/tools.d.ts.map +1 -1
  57. package/dist/mcp/tools.js +2 -9
  58. package/dist/mcp/tools.js.map +1 -1
  59. package/dist/middleware/auth.js +1 -1
  60. package/dist/middleware/auth.js.map +1 -1
  61. package/package.json +1 -1
  62. package/src/agent/envoy-client.ts +111 -19
  63. package/src/agent/stale-deployment-detector.ts +1 -1
  64. package/src/agent/synth-agent.ts +76 -56
  65. package/src/alert-webhooks/alert-parsers.ts +291 -0
  66. package/src/api/agent.ts +9 -528
  67. package/src/api/alert-webhooks.ts +354 -0
  68. package/src/api/envoy-reports.ts +2 -2
  69. package/src/api/envoys.ts +1 -1
  70. package/src/api/fleet.ts +14 -15
  71. package/src/api/graph.ts +3 -3
  72. package/src/api/operations.ts +2260 -0
  73. package/src/api/partitions.ts +1 -1
  74. package/src/api/schemas.ts +59 -27
  75. package/src/api/system.ts +23 -21
  76. package/src/artifact-analyzer.ts +2 -2
  77. package/src/fleet/fleet-executor.ts +3 -3
  78. package/src/graph/graph-executor.ts +18 -4
  79. package/src/index.ts +91 -61
  80. package/src/mcp/resources.ts +3 -3
  81. package/src/mcp/tools.ts +5 -9
  82. package/src/middleware/auth.ts +1 -1
  83. package/tests/agent-mode.test.ts +5 -376
  84. package/tests/api-handlers.test.ts +27 -27
  85. package/tests/composite-operations.test.ts +557 -0
  86. package/tests/decision-diary.test.ts +62 -63
  87. package/tests/diary-reader.test.ts +14 -18
  88. package/tests/mcp-tools.test.ts +1 -1
  89. package/tests/orchestration.test.ts +34 -30
  90. package/tests/partition-isolation.test.ts +4 -9
  91. package/tests/rbac-enforcement.test.ts +8 -8
  92. package/tests/ui-journey.test.ts +9 -9
  93. package/dist/api/deployments.d.ts +0 -11
  94. package/dist/api/deployments.d.ts.map +0 -1
  95. package/dist/api/deployments.js +0 -1098
  96. package/dist/api/deployments.js.map +0 -1
  97. package/src/api/deployments.ts +0 -1347
@@ -0,0 +1,2260 @@
1
+ import type { FastifyInstance } from "fastify";
2
+ import { generatePostmortem, generatePostmortemAsync, resolveApprovalMode } from "@synth-deploy/core";
3
+ import type { LlmClient, IPartitionStore, IEnvironmentStore, IArtifactStore, ISettingsStore, IDeploymentStore, ITelemetryStore, DebriefWriter, DebriefReader, DebriefPinStore, DeploymentEnrichment, RecommendationVerdict, TelemetryAction } from "@synth-deploy/core";
4
+ import { requirePermission } from "../middleware/permissions.js";
5
+ import {
6
+ CreateOperationSchema,
7
+ ApproveDeploymentSchema,
8
+ RejectDeploymentSchema,
9
+ ModifyDeploymentPlanSchema,
10
+ SubmitPlanSchema,
11
+ DeploymentListQuerySchema,
12
+ DebriefQuerySchema,
13
+ ProgressEventSchema,
14
+ ReplanDeploymentSchema,
15
+ } from "./schemas.js";
16
+ import type { ProgressEventStore } from "./progress-event-store.js";
17
+ import { EnvoyClient } from "../agent/envoy-client.js";
18
+ import type { EnvoyRegistry } from "../agent/envoy-registry.js";
19
+
20
+ /**
21
+ * REST API routes for deployments. These are the traditional (non-MCP) interface
22
+ * for the web UI and integrations.
23
+ */
24
+ function getArtifactId(op: { input: import("@synth-deploy/core").OperationInput }): string | undefined {
25
+ return op.input.type === "deploy" ? op.input.artifactId : undefined;
26
+ }
27
+
28
+ export function registerOperationRoutes(
29
+ app: FastifyInstance,
30
+ deployments: IDeploymentStore,
31
+ debrief: DebriefWriter & DebriefReader & DebriefPinStore,
32
+ partitions: IPartitionStore,
33
+ environments: IEnvironmentStore,
34
+ artifactStore: IArtifactStore,
35
+ settings: ISettingsStore,
36
+ telemetry: ITelemetryStore,
37
+ progressStore?: ProgressEventStore,
38
+ envoyClient?: EnvoyClient,
39
+ envoyRegistry?: EnvoyRegistry,
40
+ llm?: LlmClient,
41
+ ): void {
42
+
43
+ // Create a deployment (plan phase)
44
+ app.post("/api/operations", { preHandler: [requirePermission("deployment.create")] }, async (request, reply) => {
45
+ const parsed = CreateOperationSchema.safeParse(request.body);
46
+ if (!parsed.success) {
47
+ return reply.status(400).send({ error: parsed.error.message });
48
+ }
49
+
50
+ const { artifactId, environmentId, partitionId, envoyId, version, type: operationType, intent, allowWrite, condition, responseIntent, parentOperationId, requireApproval } = parsed.data;
51
+
52
+ // Validate artifact exists (required for deploy operations)
53
+ if (operationType === "deploy" && !artifactId) {
54
+ return reply.status(400).send({ error: "artifactId is required for deploy operations" });
55
+ }
56
+ const artifact = artifactId ? artifactStore.get(artifactId) : undefined;
57
+ if (operationType === "deploy" && !artifact) {
58
+ return reply.status(404).send({ error: `Artifact not found: ${artifactId}` });
59
+ }
60
+
61
+ // Validate environment exists (optional when targeting a partition or envoy)
62
+ const environment = environmentId ? environments.get(environmentId) : undefined;
63
+ if (environmentId && !environment) {
64
+ return reply.status(404).send({ error: `Environment not found: ${environmentId}` });
65
+ }
66
+
67
+ // Validate partition if provided
68
+ const partition = partitionId ? partitions.get(partitionId) : undefined;
69
+ if (partitionId && !partition) {
70
+ return reply.status(404).send({ error: `Partition not found: ${partitionId}` });
71
+ }
72
+
73
+ // Validate envoy if provided
74
+ const targetEnvoy = envoyId ? envoyRegistry?.get(envoyId) : undefined;
75
+ if (envoyId && !targetEnvoy) {
76
+ return reply.status(404).send({ error: `Envoy not found: ${envoyId}` });
77
+ }
78
+
79
+ // Resolve variables — partition vars are base, environment vars take precedence if present
80
+ const envVars = environment ? environment.variables : {};
81
+ const partitionVars = partition?.variables ?? {};
82
+ const resolved: Record<string, string> = { ...partitionVars, ...envVars };
83
+
84
+ const operationInput = operationType === "deploy"
85
+ ? { type: "deploy" as const, artifactId: artifactId!, ...(version ? { artifactVersionId: version } : {}) }
86
+ : operationType === "trigger"
87
+ ? { type: "trigger" as const, condition: condition ?? intent ?? "", responseIntent: responseIntent ?? intent ?? "" }
88
+ : operationType === "composite"
89
+ ? { type: "composite" as const, operations: (parsed.data.operations ?? []) as import("@synth-deploy/core").OperationInput[] }
90
+ : operationType === "investigate"
91
+ ? { type: "investigate" as const, intent: intent ?? "", ...(allowWrite !== undefined ? { allowWrite } : {}) }
92
+ : { type: operationType as "maintain" | "query", intent: intent ?? "" };
93
+
94
+ const deployment = {
95
+ id: crypto.randomUUID(),
96
+ input: operationInput,
97
+ intent,
98
+ lineage: parentOperationId,
99
+ triggeredBy: parentOperationId ? ("user" as const) : undefined,
100
+ environmentId,
101
+ partitionId,
102
+ envoyId: targetEnvoy?.id,
103
+ version: version ?? "",
104
+ status: "pending" as const,
105
+ variables: resolved,
106
+ debriefEntryIds: [] as string[],
107
+ createdAt: new Date(),
108
+ ...(requireApproval ? { forceManualApproval: true } : {}),
109
+ };
110
+
111
+ deployments.save(deployment);
112
+ telemetry.record({ actor: (request.user?.email) ?? "anonymous", action: "operation.created", target: { type: "deployment", id: deployment.id }, details: { artifactId, environmentId, partitionId, envoyId } });
113
+
114
+ // Dispatch planning to the appropriate envoy asynchronously.
115
+ // The envoy reasons about the deployment (read-only) and POSTs back a plan,
116
+ // which transitions the deployment to awaiting_approval.
117
+ if (envoyRegistry) {
118
+ // Find the target envoy: explicit envoyId > environment-assigned > first available
119
+ const planningEnvoy = targetEnvoy
120
+ ?? (environment ? envoyRegistry.findForEnvironment(environment.name) : undefined)
121
+ ?? envoyRegistry.list()[0];
122
+
123
+ const needsArtifact = deployment.input.type === "deploy";
124
+ if (planningEnvoy && (!needsArtifact || artifact)) {
125
+ const planningClient = new EnvoyClient(planningEnvoy.url);
126
+ const environmentForPlanning = environment
127
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
128
+ : { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
129
+
130
+ // Composite: orchestrate child planning separately — do not send composite to envoy directly
131
+ if (deployment.input.type === "composite") {
132
+ planCompositeChildren(deployment, envoyRegistry, planningEnvoy).catch((err) => {
133
+ const dep = deployments.get(deployment.id);
134
+ if (dep && (dep.status === "pending" || dep.status === "planning")) {
135
+ dep.status = "failed" as typeof dep.status;
136
+ dep.failureReason = `Composite planning failed unexpectedly: ${err instanceof Error ? err.message : String(err)}`;
137
+ deployments.save(dep);
138
+ }
139
+ });
140
+ return;
141
+ }
142
+
143
+ planningClient.requestPlan({
144
+ operationId: deployment.id,
145
+ operationType: deployment.input.type as "deploy" | "query" | "investigate" | "maintain" | "trigger",
146
+ intent: deployment.intent ?? (deployment.input.type === "trigger"
147
+ ? `Monitor: ${(deployment.input as { condition: string }).condition}. When triggered: ${(deployment.input as { responseIntent: string }).responseIntent}`
148
+ : undefined),
149
+ ...(deployment.input.type === "trigger" ? {
150
+ triggerCondition: (deployment.input as { condition: string }).condition,
151
+ triggerResponseIntent: (deployment.input as { responseIntent: string }).responseIntent,
152
+ } : {}),
153
+ ...(artifact ? {
154
+ artifact: {
155
+ id: artifact.id,
156
+ name: artifact.name,
157
+ type: artifact.type,
158
+ analysis: {
159
+ summary: artifact.analysis.summary,
160
+ dependencies: artifact.analysis.dependencies,
161
+ configurationExpectations: artifact.analysis.configurationExpectations,
162
+ deploymentIntent: artifact.analysis.deploymentIntent,
163
+ confidence: artifact.analysis.confidence,
164
+ },
165
+ },
166
+ } : {}),
167
+ ...(deployment.input.type === "investigate" && "allowWrite" in deployment.input
168
+ ? { allowWrite: deployment.input.allowWrite }
169
+ : {}),
170
+ environment: environmentForPlanning,
171
+ partition: partition
172
+ ? { id: partition.id, name: partition.name, variables: partition.variables }
173
+ : undefined,
174
+ version: deployment.version ?? "",
175
+ resolvedVariables: resolved,
176
+ }).then((result) => {
177
+ const dep = deployments.get(deployment.id);
178
+ if (!dep || dep.status !== "pending") return;
179
+
180
+ dep.plan = result.plan;
181
+ dep.rollbackPlan = result.rollbackPlan;
182
+ dep.envoyId = planningEnvoy.id;
183
+
184
+ // Trigger operations: construct MonitoringDirective from plan, present for approval
185
+ if (dep.input.type === "trigger" && !result.blocked) {
186
+ const triggerInput = dep.input as { type: "trigger"; condition: string; responseIntent: string };
187
+ // Use probes from the envoy's trigger planning response (embedded in scriptedPlan reasoning),
188
+ // or fall back to a default probe. The envoy's planTrigger generates these.
189
+ const directive: import("@synth-deploy/core").MonitoringDirective = {
190
+ id: dep.id,
191
+ operationId: dep.id,
192
+ probes: [{
193
+ command: "echo 0",
194
+ label: "default-probe",
195
+ parseAs: "numeric" as const,
196
+ }],
197
+ intervalMs: result.intervalMs ?? 60_000,
198
+ cooldownMs: result.cooldownMs ?? 300_000,
199
+ condition: triggerInput.condition,
200
+ responseIntent: triggerInput.responseIntent,
201
+ responseType: "maintain",
202
+ environmentId: dep.environmentId,
203
+ partitionId: dep.partitionId,
204
+ status: "active",
205
+ };
206
+ dep.monitoringDirective = directive;
207
+ dep.triggerStatus = "active";
208
+ dep.status = "awaiting_approval" as typeof dep.status;
209
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
210
+ deployments.save(dep);
211
+ // Debrief plan-generation entry is recorded by the envoy's planTrigger — no duplicate here.
212
+ return;
213
+ }
214
+
215
+ // Check approval mode for query/investigate operations with findings
216
+ if ((dep.input.type === "query" || dep.input.type === "investigate") &&
217
+ (result.queryFindings || result.investigationFindings)) {
218
+ if (result.queryFindings) dep.queryFindings = result.queryFindings;
219
+ if (result.investigationFindings) dep.investigationFindings = result.investigationFindings;
220
+
221
+ const currentSettings = settings.get();
222
+ const envLookup = (id: string) => environments.get(id)?.name;
223
+ const approvalMode = dep.forceManualApproval
224
+ ? "required"
225
+ : resolveApprovalMode(dep.input.type, dep.environmentId, currentSettings, envLookup);
226
+
227
+ if (approvalMode === "auto") {
228
+ // Auto-approve — findings are the deliverable
229
+ dep.status = "succeeded" as typeof dep.status;
230
+ dep.completedAt = new Date();
231
+ deployments.save(dep);
232
+
233
+ const decisionType = dep.input.type === "query"
234
+ ? "query-findings" as const
235
+ : "investigation-findings" as const;
236
+ const findings = result.queryFindings ?? result.investigationFindings!;
237
+ debrief.record({
238
+ partitionId: dep.partitionId ?? null,
239
+ operationId: dep.id,
240
+ agent: "envoy",
241
+ decisionType,
242
+ decision: `${dep.input.type === "query" ? "Query" : "Investigation"} complete — ${findings.targetsSurveyed.length} target(s) surveyed`,
243
+ reasoning: findings.summary,
244
+ context: { targetsSurveyed: findings.targetsSurveyed, findingCount: findings.findings.length },
245
+ });
246
+ return;
247
+ }
248
+ // approvalMode === "required" — fall through to standard approval gate
249
+ }
250
+
251
+ if (result.blocked) {
252
+ // Unrecoverable precondition failures — block execution, do not present for approval
253
+ dep.status = "failed" as typeof dep.status;
254
+ dep.failureReason = result.blockReason ?? "Plan blocked due to unrecoverable precondition failures";
255
+ deployments.save(dep);
256
+
257
+ debrief.record({
258
+ partitionId: dep.partitionId ?? null,
259
+ operationId: dep.id,
260
+ agent: "envoy",
261
+ decisionType: "plan-generation",
262
+ decision: `Operation plan blocked — infrastructure prerequisites not met`,
263
+ reasoning: result.blockReason ?? result.plan.reasoning,
264
+ context: { stepCount: result.plan.scriptedPlan.stepSummary.length, envoyId: planningEnvoy.id, blocked: true },
265
+ });
266
+ } else {
267
+ // Plan is valid — transition to awaiting_approval
268
+ dep.status = "awaiting_approval" as typeof dep.status;
269
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
270
+ deployments.save(dep);
271
+
272
+ debrief.record({
273
+ partitionId: dep.partitionId ?? null,
274
+ operationId: dep.id,
275
+ agent: "envoy",
276
+ decisionType: "plan-generation",
277
+ decision: `Operation plan generated with ${result.plan.scriptedPlan.stepSummary.length} steps`,
278
+ reasoning: result.plan.reasoning,
279
+ context: { stepCount: result.plan.scriptedPlan.stepSummary.length, envoyId: planningEnvoy.id, delta: result.delta },
280
+ });
281
+ }
282
+ }).catch((err) => {
283
+ // Planning failed — mark deployment failed so UI doesn't wait forever
284
+ const dep = deployments.get(deployment.id);
285
+ if (!dep || dep.status !== "pending") return;
286
+
287
+ dep.status = "failed" as typeof dep.status;
288
+ dep.failureReason = err instanceof Error ? err.message : "Planning failed";
289
+ deployments.save(dep);
290
+
291
+ debrief.record({
292
+ partitionId: dep.partitionId ?? null,
293
+ operationId: dep.id,
294
+ agent: "server",
295
+ decisionType: "deployment-failure",
296
+ decision: "Envoy planning failed",
297
+ reasoning: dep.failureReason!,
298
+ context: { error: dep.failureReason, envoyId: planningEnvoy.id },
299
+ });
300
+ });
301
+ }
302
+ }
303
+
304
+ return reply.status(201).send({ deployment });
305
+ });
306
+
307
+ // Get deployment by ID
308
+ app.get<{ Params: { id: string } }>("/api/operations/:id", { preHandler: [requirePermission("deployment.view")] }, async (request, reply) => {
309
+ const deployment = deployments.get(request.params.id);
310
+ if (!deployment) {
311
+ return reply.status(404).send({ error: "Operation not found" });
312
+ }
313
+
314
+ return {
315
+ deployment,
316
+ debrief: debrief.getByOperation(deployment.id),
317
+ };
318
+ });
319
+
320
+ // What's New — compare deployed artifact version against catalog latest
321
+ app.get<{ Params: { id: string } }>("/api/operations/:id/whats-new", { preHandler: [requirePermission("deployment.view")] }, async (request, reply) => {
322
+ const deployment = deployments.get(request.params.id);
323
+ if (!deployment) {
324
+ return reply.status(404).send({ error: "Operation not found" });
325
+ }
326
+
327
+ const versions = artifactStore.getVersions(getArtifactId(deployment) ?? "");
328
+ const sorted = versions.slice().sort(
329
+ (a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime(),
330
+ );
331
+ const latest = sorted[0] ?? null;
332
+ const deployedVersion = deployment.version;
333
+ const latestVersion = latest?.version ?? null;
334
+ const isLatest = latestVersion === null || latestVersion === deployedVersion;
335
+
336
+ return {
337
+ deployedVersion,
338
+ latestVersion,
339
+ isLatest,
340
+ latestCreatedAt: latest?.createdAt ? new Date(latest.createdAt).toISOString() : null,
341
+ };
342
+ });
343
+
344
+ // List deployments (optionally filtered by partition, artifact, or envoy)
345
+ app.get("/api/operations", { preHandler: [requirePermission("deployment.view")] }, async (request) => {
346
+ const qParsed = DeploymentListQuerySchema.safeParse(request.query);
347
+ const { partitionId, artifactId, envoyId } = qParsed.success ? qParsed.data : {};
348
+
349
+ let list;
350
+ if (partitionId) {
351
+ list = deployments.getByPartition(partitionId);
352
+ } else if (artifactId) {
353
+ list = deployments.getByArtifact(artifactId);
354
+ } else {
355
+ list = deployments.list();
356
+ }
357
+
358
+ if (envoyId) {
359
+ list = list.filter((d) => d.envoyId === envoyId);
360
+ }
361
+
362
+ return { deployments: list };
363
+ });
364
+
365
+ // Submit a plan from envoy — transitions deployment to awaiting_approval
366
+ app.post<{ Params: { id: string } }>(
367
+ "/api/operations/:id/plan",
368
+ { preHandler: [requirePermission("deployment.create")] },
369
+ async (request, reply) => {
370
+ const deployment = deployments.get(request.params.id);
371
+ if (!deployment) {
372
+ return reply.status(404).send({ error: "Operation not found" });
373
+ }
374
+
375
+ const parsed = SubmitPlanSchema.safeParse(request.body);
376
+ if (!parsed.success) {
377
+ return reply.status(400).send({ error: "Invalid plan submission", details: parsed.error.format() });
378
+ }
379
+
380
+ if ((deployment.status) !== "pending" && (deployment.status) !== "planning") {
381
+ return reply.status(409).send({ error: `Cannot submit plan for operation in "${deployment.status}" status` });
382
+ }
383
+
384
+ deployment.plan = parsed.data.plan;
385
+ deployment.rollbackPlan = parsed.data.rollbackPlan;
386
+ deployment.status = "awaiting_approval" as typeof deployment.status;
387
+
388
+ // Generate recommendation from enrichment context
389
+ deployment.recommendation = computeRecommendation(deployment, deployments);
390
+
391
+ deployments.save(deployment);
392
+
393
+ debrief.record({
394
+ partitionId: deployment.partitionId ?? null,
395
+ operationId: deployment.id,
396
+ agent: "envoy",
397
+ decisionType: "plan-generation",
398
+ decision: `Operation plan submitted with ${parsed.data.plan.scriptedPlan.stepSummary.length} steps`,
399
+ reasoning: parsed.data.plan.reasoning,
400
+ context: { stepCount: parsed.data.plan.scriptedPlan.stepSummary.length },
401
+ });
402
+
403
+ return reply.status(200).send({ deployment });
404
+ },
405
+ );
406
+
407
+ // Approve a deployment plan
408
+ app.post<{ Params: { id: string } }>(
409
+ "/api/operations/:id/approve",
410
+ { preHandler: [requirePermission("deployment.approve")] },
411
+ async (request, reply) => {
412
+ const deployment = deployments.get(request.params.id);
413
+ if (!deployment) {
414
+ return reply.status(404).send({ error: "Operation not found" });
415
+ }
416
+
417
+ const parsed = ApproveDeploymentSchema.safeParse(request.body);
418
+ if (!parsed.success) {
419
+ return reply.status(400).send({ error: parsed.error.message });
420
+ }
421
+
422
+ if ((deployment.status) !== "awaiting_approval") {
423
+ return reply.status(409).send({ error: `Cannot approve operation in "${deployment.status}" status — must be "awaiting_approval"` });
424
+ }
425
+
426
+ // Transition deployment status
427
+ deployment.approvedBy = parsed.data.approvedBy;
428
+ deployment.approvedAt = new Date();
429
+ deployment.status = "approved" as typeof deployment.status;
430
+ deployments.save(deployment);
431
+
432
+ const actor = (request.user?.email) ?? parsed.data.approvedBy;
433
+
434
+ // Record approval in debrief
435
+ debrief.record({
436
+ partitionId: deployment.partitionId ?? null,
437
+ operationId: deployment.id,
438
+ agent: "server",
439
+ decisionType: "system",
440
+ decision: `Operation approved by ${actor}`,
441
+ reasoning: parsed.data.modifications
442
+ ? `Approved with modifications: ${parsed.data.modifications}`
443
+ : "Approved without modifications",
444
+ context: { approvedBy: actor },
445
+ actor: request.user?.email,
446
+ });
447
+ telemetry.record({ actor, action: "operation.approved", target: { type: "deployment", id: deployment.id }, details: { modifications: parsed.data.modifications } });
448
+ telemetry.record({
449
+ actor,
450
+ action: parsed.data.modifications ? "agent.recommendation.overridden" : "agent.recommendation.followed",
451
+ target: { type: "deployment", id: deployment.id },
452
+ details: parsed.data.modifications
453
+ ? { modifications: parsed.data.modifications }
454
+ : { planStepCount: deployment.plan?.scriptedPlan.stepSummary.length ?? 0 },
455
+ });
456
+
457
+ // Composite operations: execute children sequentially
458
+ if (deployment.input.type === "composite") {
459
+ deployment.status = "running" as typeof deployment.status;
460
+ deployments.save(deployment);
461
+
462
+ const compositeChildren = deployments.list()
463
+ .filter((d) => d.lineage === deployment.id)
464
+ .sort((a, b) => ((a as { sequenceIndex?: number }).sequenceIndex ?? 0) - ((b as { sequenceIndex?: number }).sequenceIndex ?? 0));
465
+
466
+ // Approve all children before executing sequentially
467
+ for (const child of compositeChildren) {
468
+ child.approvedBy = parsed.data.approvedBy;
469
+ child.approvedAt = new Date();
470
+ child.status = "approved" as typeof child.status;
471
+ deployments.save(child);
472
+ }
473
+
474
+ executeCompositeSequentially(deployment.id, compositeChildren.map((c) => c.id)).catch((err) => {
475
+ const dep = deployments.get(deployment.id);
476
+ if (dep && dep.status === "running") {
477
+ dep.status = "failed" as typeof dep.status;
478
+ dep.failureReason = `Composite execution failed unexpectedly: ${err instanceof Error ? err.message : String(err)}`;
479
+ dep.completedAt = new Date();
480
+ deployments.save(dep);
481
+ }
482
+ });
483
+
484
+ return { deployment, approved: true };
485
+ }
486
+
487
+ // Trigger operations: install monitoring directive on envoy
488
+ if (deployment.input.type === "trigger" && deployment.monitoringDirective && envoyRegistry) {
489
+ const targetEnvoyForTrigger = deployment.envoyId
490
+ ? envoyRegistry.get(deployment.envoyId)
491
+ : envoyRegistry.list()[0];
492
+
493
+ if (targetEnvoyForTrigger) {
494
+ const triggerClient = new EnvoyClient(targetEnvoyForTrigger.url);
495
+ deployment.status = "running" as typeof deployment.status;
496
+ deployment.triggerStatus = "active";
497
+ deployments.save(deployment);
498
+
499
+ triggerClient.installMonitoringDirective(deployment.monitoringDirective).then(() => {
500
+ deployment.status = "succeeded" as typeof deployment.status;
501
+ deployment.completedAt = new Date();
502
+ deployments.save(deployment);
503
+
504
+ debrief.record({
505
+ partitionId: deployment.partitionId ?? null,
506
+ operationId: deployment.id,
507
+ agent: "server",
508
+ decisionType: "trigger-activated",
509
+ decision: `Monitoring directive installed on ${targetEnvoyForTrigger.name}`,
510
+ reasoning: `Trigger activated: monitoring "${deployment.monitoringDirective!.condition}" every ${deployment.monitoringDirective!.intervalMs / 1000}s with ${deployment.monitoringDirective!.cooldownMs / 1000}s cooldown`,
511
+ context: { envoyId: targetEnvoyForTrigger.id, directiveId: deployment.monitoringDirective!.id },
512
+ });
513
+ telemetry.record({ actor, action: "trigger.activated" as TelemetryAction, target: { type: "trigger", id: deployment.id }, details: { envoyId: targetEnvoyForTrigger.id } });
514
+ }).catch((err) => {
515
+ deployment.status = "failed" as typeof deployment.status;
516
+ deployment.triggerStatus = "disabled";
517
+ deployment.failureReason = err instanceof Error ? err.message : "Failed to install monitoring directive";
518
+ deployments.save(deployment);
519
+
520
+ debrief.record({
521
+ partitionId: deployment.partitionId ?? null,
522
+ operationId: deployment.id,
523
+ agent: "server",
524
+ decisionType: "deployment-failure",
525
+ decision: "Failed to install monitoring directive on envoy",
526
+ reasoning: deployment.failureReason!,
527
+ context: { error: deployment.failureReason },
528
+ });
529
+ });
530
+ }
531
+ }
532
+ // Normal operations: dispatch approved plan to envoy for execution
533
+ else if (envoyClient && deployment.plan && deployment.rollbackPlan) {
534
+ const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
535
+ const serverPort = process.env.PORT ?? "9410";
536
+ const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
537
+ const progressCallbackUrl = `${serverUrl}/api/operations/${deployment.id}/progress`;
538
+ const callbackToken = envoyRegistry?.list().find(r => r.url === envoyClient.url)?.token;
539
+
540
+ deployment.status = "running" as typeof deployment.status;
541
+ deployments.save(deployment);
542
+
543
+ // Fire-and-forget: execution runs async, progress comes via callback
544
+ envoyClient.executeApprovedPlan({
545
+ operationId: deployment.id,
546
+ plan: deployment.plan,
547
+ rollbackPlan: deployment.rollbackPlan,
548
+ artifactType: artifact?.type ?? "unknown",
549
+ artifactName: artifact?.name ?? "unknown",
550
+ environmentId: deployment.environmentId ?? "",
551
+ progressCallbackUrl,
552
+ callbackToken,
553
+ }).catch((err) => {
554
+ // Execution dispatch failed — record failure
555
+ deployment.status = "failed" as typeof deployment.status;
556
+ deployment.failureReason = err instanceof Error ? err.message : "Execution dispatch failed";
557
+ deployments.save(deployment);
558
+
559
+ debrief.record({
560
+ partitionId: deployment.partitionId ?? null,
561
+ operationId: deployment.id,
562
+ agent: "server",
563
+ decisionType: "deployment-failure",
564
+ decision: "Failed to dispatch approved plan to envoy",
565
+ reasoning: deployment.failureReason!,
566
+ context: { error: deployment.failureReason },
567
+ });
568
+ });
569
+ }
570
+
571
+ return { deployment, approved: true };
572
+ },
573
+ );
574
+
575
+ // Reject a deployment plan
576
+ app.post<{ Params: { id: string } }>(
577
+ "/api/operations/:id/reject",
578
+ { preHandler: [requirePermission("deployment.reject")] },
579
+ async (request, reply) => {
580
+ const deployment = deployments.get(request.params.id);
581
+ if (!deployment) {
582
+ return reply.status(404).send({ error: "Operation not found" });
583
+ }
584
+
585
+ const parsed = RejectDeploymentSchema.safeParse(request.body);
586
+ if (!parsed.success) {
587
+ return reply.status(400).send({ error: parsed.error.message });
588
+ }
589
+
590
+ if ((deployment.status) !== "awaiting_approval") {
591
+ return reply.status(409).send({ error: `Cannot reject operation in "${deployment.status}" status — must be "awaiting_approval"` });
592
+ }
593
+
594
+ // Transition deployment status and store rejection reason
595
+ deployment.status = "rejected" as typeof deployment.status;
596
+ deployment.rejectionReason = parsed.data.reason;
597
+ deployments.save(deployment);
598
+
599
+ const actor = (request.user?.email) ?? "anonymous";
600
+
601
+ // Record rejection in debrief
602
+ debrief.record({
603
+ partitionId: deployment.partitionId ?? null,
604
+ operationId: deployment.id,
605
+ agent: "server",
606
+ decisionType: "system",
607
+ decision: "Operation plan rejected",
608
+ reasoning: parsed.data.reason,
609
+ context: { reason: parsed.data.reason },
610
+ actor: request.user?.email,
611
+ });
612
+ telemetry.record({ actor, action: "operation.rejected", target: { type: "deployment", id: deployment.id }, details: { reason: parsed.data.reason } });
613
+
614
+ return { deployment, rejected: true };
615
+ },
616
+ );
617
+
618
+ // Modify a deployment plan (user edits steps before approval)
619
+ app.post<{ Params: { id: string } }>(
620
+ "/api/operations/:id/modify",
621
+ { preHandler: [requirePermission("deployment.approve")] },
622
+ async (request, reply) => {
623
+ const deployment = deployments.get(request.params.id);
624
+ if (!deployment) {
625
+ return reply.status(404).send({ error: "Operation not found" });
626
+ }
627
+
628
+ const parsed = ModifyDeploymentPlanSchema.safeParse(request.body);
629
+ if (!parsed.success) {
630
+ return reply.status(400).send({ error: parsed.error.message });
631
+ }
632
+
633
+ if ((deployment.status) !== "awaiting_approval") {
634
+ return reply.status(409).send({ error: `Cannot modify operation in "${deployment.status}" status — must be "awaiting_approval"` });
635
+ }
636
+
637
+ if (!deployment.plan) {
638
+ return reply.status(409).send({ error: "Operation has no plan to modify" });
639
+ }
640
+
641
+ // Validate modified plan with envoy if available
642
+ if (envoyClient && deployment.plan.scriptedPlan) {
643
+ try {
644
+ const modifiedScript: import("@synth-deploy/core").ScriptedPlan = {
645
+ ...deployment.plan.scriptedPlan,
646
+ executionScript: parsed.data.executionScript,
647
+ ...(parsed.data.rollbackScript !== undefined ? { rollbackScript: parsed.data.rollbackScript } : {}),
648
+ };
649
+ const validation = await envoyClient.validatePlan(modifiedScript);
650
+ if (!validation.valid) {
651
+ return reply.status(422).send({
652
+ error: "Modified plan failed envoy validation",
653
+ violations: validation.violations,
654
+ });
655
+ }
656
+ } catch {
657
+ // Envoy unreachable — proceed without validation but note it
658
+ }
659
+ }
660
+
661
+ // Compute diff description
662
+ const oldScript = deployment.plan.scriptedPlan?.executionScript ?? "";
663
+ const newScript = parsed.data.executionScript;
664
+ const diffFromPreviousPlan = oldScript !== newScript
665
+ ? "Execution script modified by user"
666
+ : "Plan metadata changed (script unchanged)";
667
+
668
+ // Apply modifications
669
+ deployment.plan = {
670
+ ...deployment.plan,
671
+ scriptedPlan: {
672
+ ...deployment.plan.scriptedPlan,
673
+ executionScript: parsed.data.executionScript,
674
+ ...(parsed.data.rollbackScript !== undefined ? { rollbackScript: parsed.data.rollbackScript } : {}),
675
+ },
676
+ diffFromPreviousPlan,
677
+ };
678
+ deployments.save(deployment);
679
+
680
+ const actor = (request.user?.email) ?? "anonymous";
681
+
682
+ // Record modification in debrief
683
+ debrief.record({
684
+ partitionId: deployment.partitionId ?? null,
685
+ operationId: deployment.id,
686
+ agent: "server",
687
+ decisionType: "plan-modification",
688
+ decision: `Operation plan modified by ${actor}`,
689
+ reasoning: parsed.data.reason,
690
+ context: {
691
+ modifiedBy: actor,
692
+ reason: parsed.data.reason,
693
+ },
694
+ actor: request.user?.email,
695
+ });
696
+ telemetry.record({
697
+ actor,
698
+ action: "operation.modified" as Parameters<typeof telemetry.record>[0]["action"],
699
+ target: { type: "deployment", id: deployment.id },
700
+ details: { reason: parsed.data.reason },
701
+ });
702
+ telemetry.record({
703
+ actor,
704
+ action: "agent.recommendation.overridden",
705
+ target: { type: "deployment", id: deployment.id },
706
+ details: { reason: parsed.data.reason, diff: diffFromPreviousPlan },
707
+ });
708
+
709
+ return { deployment, modified: true };
710
+ },
711
+ );
712
+
713
+ // Replan a deployment with user feedback — triggers a new LLM planning pass
714
+ app.post<{ Params: { id: string } }>(
715
+ "/api/operations/:id/replan",
716
+ { preHandler: [requirePermission("deployment.approve")] },
717
+ async (request, reply) => {
718
+ const deploymentId = request.params.id;
719
+ const deployment = deployments.get(deploymentId);
720
+ if (!deployment) {
721
+ return reply.status(404).send({ error: "Operation not found" });
722
+ }
723
+
724
+ if ((deployment.status) !== "awaiting_approval") {
725
+ return reply.status(409).send({ error: `Cannot replan operation in "${deployment.status}" status — must be "awaiting_approval"` });
726
+ }
727
+
728
+ const parsed = ReplanDeploymentSchema.safeParse(request.body);
729
+ if (!parsed.success) {
730
+ return reply.status(400).send({ error: parsed.error.message });
731
+ }
732
+
733
+ const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
734
+ if (!artifact) {
735
+ return reply.status(404).send({ error: `Artifact not found: ${getArtifactId(deployment)}` });
736
+ }
737
+
738
+ const environment = deployment.environmentId ? environments.get(deployment.environmentId) : undefined;
739
+ const partition = deployment.partitionId ? partitions.get(deployment.partitionId) : undefined;
740
+
741
+ const planningEnvoy = deployment.envoyId ? envoyRegistry?.get(deployment.envoyId) : envoyRegistry?.list()[0];
742
+ if (!planningEnvoy) {
743
+ return reply.status(422).send({ error: "No envoy available for replanning" });
744
+ }
745
+
746
+ // Validate feedback with LLM before triggering expensive replan
747
+ const planningClientForValidation = new EnvoyClient(planningEnvoy.url);
748
+ try {
749
+ const validation = await planningClientForValidation.validateRefinementFeedback({
750
+ feedback: parsed.data.feedback,
751
+ currentPlanSummary: (deployment.plan?.scriptedPlan?.stepSummary ?? []).map((s) => ({
752
+ description: s.description,
753
+ reversible: s.reversible,
754
+ })),
755
+ artifactName: artifact?.name ?? "unknown",
756
+ environmentName: environment?.name ?? "unknown",
757
+ });
758
+ if (validation.mode === "rejection") {
759
+ return reply.status(422).send({ error: validation.message, mode: "rejection" });
760
+ }
761
+ if (validation.mode === "response") {
762
+ return reply.status(200).send({ mode: "response", message: validation.message });
763
+ }
764
+ // mode === "replan" — fall through to full replan
765
+ } catch {
766
+ // Validation call failed — proceed with replan rather than blocking the user
767
+ }
768
+
769
+ deployment.status = "planning" as typeof deployment.status;
770
+ deployments.save(deployment);
771
+
772
+ const planningClient = new EnvoyClient(planningEnvoy.url);
773
+ const environmentForPlanning = environment
774
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
775
+ : { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
776
+
777
+ let result: Awaited<ReturnType<typeof planningClient.requestPlan>>;
778
+ try {
779
+ result = await planningClient.requestPlan({
780
+ operationId: deploymentId,
781
+ artifact: {
782
+ id: artifact.id,
783
+ name: artifact.name,
784
+ type: artifact.type,
785
+ analysis: {
786
+ summary: artifact.analysis.summary,
787
+ dependencies: artifact.analysis.dependencies,
788
+ configurationExpectations: artifact.analysis.configurationExpectations,
789
+ deploymentIntent: artifact.analysis.deploymentIntent,
790
+ confidence: artifact.analysis.confidence,
791
+ },
792
+ },
793
+ environment: environmentForPlanning,
794
+ partition: partition
795
+ ? { id: partition.id, name: partition.name, variables: partition.variables }
796
+ : undefined,
797
+ version: deployment.version ?? "",
798
+ resolvedVariables: deployment.variables,
799
+ refinementFeedback: parsed.data.feedback,
800
+ });
801
+ } catch (err) {
802
+ const dep = deployments.get(deploymentId);
803
+ if (dep) {
804
+ dep.status = "awaiting_approval" as typeof dep.status;
805
+ deployments.save(dep);
806
+ }
807
+ return reply.status(500).send({ error: err instanceof Error ? err.message : "Replanning failed" });
808
+ }
809
+
810
+ const dep = deployments.get(deploymentId);
811
+ if (!dep) {
812
+ return reply.status(404).send({ error: "Operation not found after replanning" });
813
+ }
814
+
815
+ dep.plan = result.plan;
816
+ dep.rollbackPlan = result.rollbackPlan;
817
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
818
+ dep.status = "awaiting_approval" as typeof dep.status;
819
+ deployments.save(dep);
820
+
821
+ debrief.record({
822
+ partitionId: dep.partitionId ?? null,
823
+ operationId: dep.id,
824
+ agent: "envoy",
825
+ decisionType: "plan-generation",
826
+ decision: `Plan regenerated with user feedback (${result.plan.scriptedPlan.stepSummary.length} steps)`,
827
+ reasoning: result.plan.reasoning,
828
+ context: { stepCount: result.plan.scriptedPlan.stepSummary.length, envoyId: planningEnvoy.id, refinementFeedback: parsed.data.feedback },
829
+ });
830
+
831
+ return { deployment: dep, replanned: true };
832
+ },
833
+ );
834
+
835
+ // Get cross-system enrichment context for a deployment
836
+ app.get<{ Params: { id: string } }>(
837
+ "/api/operations/:id/context",
838
+ { preHandler: [requirePermission("deployment.view")] },
839
+ async (request, reply) => {
840
+ const deployment = deployments.get(request.params.id);
841
+ if (!deployment) {
842
+ return reply.status(404).send({ error: "Operation not found" });
843
+ }
844
+
845
+ const now = new Date();
846
+ const twentyFourHoursAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000);
847
+
848
+ // Count recent operations to the same environment (only meaningful when environmentId is set)
849
+ const recentOperationsToEnv = deployment.environmentId
850
+ ? deployments.countByEnvironment(deployment.environmentId, twentyFourHoursAgo)
851
+ : 0;
852
+
853
+ // Check if the same artifact version was previously rolled back
854
+ const previouslyRolledBack = deployment.version
855
+ ? deployments.findByArtifactVersion(
856
+ getArtifactId(deployment) ?? "",
857
+ deployment.version,
858
+ "rolled_back",
859
+ ).length > 0
860
+ : false;
861
+
862
+ // Check for other in-progress operations to the same environment
863
+ const conflictingOperations = deployment.environmentId
864
+ ? deployments.list()
865
+ .filter(
866
+ (d) =>
867
+ d.environmentId === deployment.environmentId &&
868
+ d.id !== deployment.id &&
869
+ ((d.status) === "running" || (d.status) === "approved" || (d.status) === "awaiting_approval"),
870
+ )
871
+ .map((d) => d.id)
872
+ : [];
873
+
874
+ // Find last operation to the same environment
875
+ const lastDeploy = deployment.environmentId
876
+ ? deployments.findLatestByEnvironment(deployment.environmentId)
877
+ : undefined;
878
+ const lastOperationToEnv = lastDeploy && lastDeploy.id !== deployment.id
879
+ ? {
880
+ id: lastDeploy.id,
881
+ status: lastDeploy.status,
882
+ version: lastDeploy.version ?? "",
883
+ completedAt: lastDeploy.completedAt,
884
+ }
885
+ : undefined;
886
+
887
+ const enrichment: DeploymentEnrichment = {
888
+ recentOperationsToEnv,
889
+ previouslyRolledBack,
890
+ conflictingOperations,
891
+ lastOperationToEnv,
892
+ };
893
+
894
+ return {
895
+ enrichment,
896
+ recommendation: deployment.recommendation ?? computeRecommendation(deployment, deployments),
897
+ };
898
+ },
899
+ );
900
+
901
+ // Request a post-hoc rollback plan — asks the envoy to reason about
902
+ // what actually ran and produce a targeted rollback plan
903
+ app.post<{ Params: { id: string } }>(
904
+ "/api/operations/:id/request-rollback-plan",
905
+ { preHandler: [requirePermission("deployment.approve")] },
906
+ async (request, reply) => {
907
+ const deployment = deployments.get(request.params.id);
908
+ if (!deployment) {
909
+ return reply.status(404).send({ error: "Operation not found" });
910
+ }
911
+
912
+ const finishedStatuses = new Set(["succeeded", "failed", "rolled_back"]);
913
+ if (!finishedStatuses.has(deployment.status)) {
914
+ return reply.status(409).send({
915
+ error: `Cannot request rollback plan for operation in "${deployment.status}" status — operation must be finished`,
916
+ });
917
+ }
918
+
919
+ const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
920
+ if (!artifact) {
921
+ return reply.status(404).send({ error: "Artifact not found" });
922
+ }
923
+
924
+ // Determine which envoy to ask
925
+ const targetEnvoy = deployment.envoyId
926
+ ? envoyRegistry?.get(deployment.envoyId)
927
+ : envoyRegistry?.list()[0];
928
+
929
+ if (!targetEnvoy) {
930
+ return reply.status(503).send({ error: "No envoy available to generate rollback plan" });
931
+ }
932
+
933
+ const environment = deployment.environmentId ? environments.get(deployment.environmentId) : undefined;
934
+
935
+ // Build the list of completed steps from execution record (or plan step summaries as fallback)
936
+ const completedSteps: Array<{
937
+ description: string;
938
+ action: string;
939
+ target: string;
940
+ status: "completed" | "failed" | "rolled_back";
941
+ output?: string;
942
+ }> = deployment.executionRecord?.steps.map((s) => ({
943
+ description: s.description,
944
+ action: "script-step",
945
+ target: "",
946
+ status: s.status,
947
+ output: s.output ?? s.error,
948
+ })) ?? deployment.plan?.scriptedPlan?.stepSummary.map((s) => ({
949
+ description: s.description,
950
+ action: "script-step",
951
+ target: "",
952
+ status: "completed" as const,
953
+ })) ?? [];
954
+
955
+ const rollbackClient = new EnvoyClient(targetEnvoy.url);
956
+
957
+ try {
958
+ const rollbackPlan = await rollbackClient.requestRollbackPlan({
959
+ operationId: deployment.id,
960
+ artifact: {
961
+ name: artifact.name,
962
+ type: artifact.type,
963
+ analysis: {
964
+ summary: artifact.analysis.summary,
965
+ dependencies: artifact.analysis.dependencies,
966
+ configurationExpectations: artifact.analysis.configurationExpectations,
967
+ deploymentIntent: artifact.analysis.deploymentIntent,
968
+ confidence: artifact.analysis.confidence,
969
+ },
970
+ },
971
+ environment: {
972
+ id: deployment.environmentId ?? "",
973
+ name: environment?.name ?? deployment.environmentId ?? "unknown",
974
+ },
975
+ completedSteps,
976
+ deployedVariables: deployment.variables,
977
+ version: deployment.version ?? "",
978
+ failureReason: deployment.failureReason ?? undefined,
979
+ });
980
+
981
+ // Store the generated rollback plan on the deployment
982
+ deployment.rollbackPlan = rollbackPlan;
983
+ deployments.save(deployment);
984
+
985
+ const actor = (request.user?.email) ?? "anonymous";
986
+
987
+ debrief.record({
988
+ partitionId: deployment.partitionId ?? null,
989
+ operationId: deployment.id,
990
+ agent: "server",
991
+ decisionType: "plan-generation",
992
+ decision: `Rollback plan requested and generated for ${artifact.name} v${deployment.version}`,
993
+ reasoning: rollbackPlan.reasoning,
994
+ context: {
995
+ requestedBy: actor,
996
+ stepCount: rollbackPlan.scriptedPlan.stepSummary.length,
997
+ envoyId: targetEnvoy.id,
998
+ deploymentStatus: deployment.status,
999
+ },
1000
+ actor: request.user?.email,
1001
+ });
1002
+ telemetry.record({
1003
+ actor,
1004
+ action: "deployment.rollback-plan-requested" as Parameters<typeof telemetry.record>[0]["action"],
1005
+ target: { type: "deployment", id: deployment.id },
1006
+ details: { stepCount: rollbackPlan.scriptedPlan.stepSummary.length },
1007
+ });
1008
+
1009
+ return reply.status(200).send({ deployment, rollbackPlan });
1010
+ } catch (err) {
1011
+ return reply.status(500).send({
1012
+ error: "Failed to generate rollback plan",
1013
+ details: err instanceof Error ? err.message : String(err),
1014
+ });
1015
+ }
1016
+ },
1017
+ );
1018
+
1019
+ // Execute rollback — runs the stored rollback plan against the envoy
1020
+ app.post<{ Params: { id: string } }>(
1021
+ "/api/operations/:id/execute-rollback",
1022
+ { preHandler: [requirePermission("deployment.approve")] },
1023
+ async (request, reply) => {
1024
+ const deployment = deployments.get(request.params.id);
1025
+ if (!deployment) {
1026
+ return reply.status(404).send({ error: "Operation not found" });
1027
+ }
1028
+
1029
+ if (!deployment.rollbackPlan) {
1030
+ return reply.status(409).send({ error: "No rollback plan available — request one first" });
1031
+ }
1032
+
1033
+ const finishedStatuses = new Set(["succeeded", "failed"]);
1034
+ if (!finishedStatuses.has(deployment.status)) {
1035
+ return reply.status(409).send({
1036
+ error: `Cannot execute rollback for operation in "${deployment.status}" status`,
1037
+ });
1038
+ }
1039
+
1040
+ const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
1041
+ const targetEnvoy = deployment.envoyId
1042
+ ? envoyRegistry?.get(deployment.envoyId)
1043
+ : envoyRegistry?.list()[0];
1044
+
1045
+ if (!targetEnvoy) {
1046
+ return reply.status(503).send({ error: "No envoy available to execute rollback" });
1047
+ }
1048
+
1049
+ const actor = (request.user?.email) ?? "anonymous";
1050
+ const serverPort = process.env.PORT ?? "9410";
1051
+ const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
1052
+ const progressCallbackUrl = `${serverUrl}/api/operations/${deployment.id}/progress`;
1053
+
1054
+ deployment.status = "running" as typeof deployment.status;
1055
+ deployments.save(deployment);
1056
+
1057
+ debrief.record({
1058
+ partitionId: deployment.partitionId ?? null,
1059
+ operationId: deployment.id,
1060
+ agent: "server",
1061
+ decisionType: "rollback-execution",
1062
+ decision: `Rollback execution initiated for ${artifact?.name ?? getArtifactId(deployment)} v${deployment.version}`,
1063
+ reasoning: `Rollback requested by ${actor}. Executing ${deployment.rollbackPlan.scriptedPlan.stepSummary.length} rollback step(s).`,
1064
+ context: { initiatedBy: actor, stepCount: deployment.rollbackPlan.scriptedPlan.stepSummary.length },
1065
+ actor: request.user?.email,
1066
+ });
1067
+ telemetry.record({
1068
+ actor,
1069
+ action: "deployment.rollback-executed" as Parameters<typeof telemetry.record>[0]["action"],
1070
+ target: { type: "deployment", id: deployment.id },
1071
+ details: { stepCount: deployment.rollbackPlan.scriptedPlan.stepSummary.length },
1072
+ });
1073
+
1074
+ const rollbackClient = new EnvoyClient(targetEnvoy.url);
1075
+
1076
+ // Execute the rollback plan as if it were a forward plan — it IS a forward plan
1077
+ // (just in the reverse direction). Use an empty no-op plan as the "rollback of rollback".
1078
+ const emptyPlan: import("@synth-deploy/core").OperationPlan = {
1079
+ scriptedPlan: {
1080
+ platform: "bash",
1081
+ executionScript: "# No rollback of rollback",
1082
+ dryRunScript: null,
1083
+ rollbackScript: null,
1084
+ reasoning: "No rollback of rollback.",
1085
+ stepSummary: [],
1086
+ },
1087
+ reasoning: "No rollback of rollback.",
1088
+ };
1089
+
1090
+ rollbackClient.executeApprovedPlan({
1091
+ operationId: deployment.id,
1092
+ plan: deployment.rollbackPlan,
1093
+ rollbackPlan: emptyPlan,
1094
+ artifactType: artifact?.type ?? "unknown",
1095
+ artifactName: artifact?.name ?? "unknown",
1096
+ environmentId: deployment.environmentId ?? "",
1097
+ progressCallbackUrl,
1098
+ callbackToken: targetEnvoy.token,
1099
+ }).then((result) => {
1100
+ const dep = deployments.get(deployment.id);
1101
+ if (!dep) return;
1102
+
1103
+ dep.status = result.success ? "rolled_back" as typeof dep.status : "failed" as typeof dep.status;
1104
+ if (!result.success) {
1105
+ dep.failureReason = result.failureReason ?? "Rollback execution failed";
1106
+ }
1107
+ dep.completedAt = new Date();
1108
+ deployments.save(dep);
1109
+
1110
+ debrief.record({
1111
+ partitionId: dep.partitionId ?? null,
1112
+ operationId: dep.id,
1113
+ agent: "server",
1114
+ decisionType: "rollback-execution",
1115
+ decision: result.success
1116
+ ? `Rollback completed successfully for ${artifact?.name ?? getArtifactId(dep)} v${dep.version}`
1117
+ : `Rollback failed for ${artifact?.name ?? getArtifactId(dep)} v${dep.version}`,
1118
+ reasoning: result.success
1119
+ ? `All rollback steps executed successfully.`
1120
+ : `Rollback failed: ${result.failureReason}`,
1121
+ context: { success: result.success, failureReason: result.failureReason },
1122
+ });
1123
+ }).catch((err) => {
1124
+ const dep = deployments.get(deployment.id);
1125
+ if (!dep) return;
1126
+
1127
+ dep.status = "failed" as typeof dep.status;
1128
+ dep.failureReason = err instanceof Error ? err.message : "Rollback execution dispatch failed";
1129
+ deployments.save(dep);
1130
+ });
1131
+
1132
+ return reply.status(202).send({ deployment, accepted: true });
1133
+ },
1134
+ );
1135
+
1136
+ // Retry (redeploy) — create a new deployment with the same parameters as the source
1137
+ app.post<{ Params: { id: string } }>(
1138
+ "/api/operations/:id/retry",
1139
+ { preHandler: [requirePermission("deployment.create")] },
1140
+ async (request, reply) => {
1141
+ const source = deployments.get(request.params.id);
1142
+ if (!source) {
1143
+ return reply.status(404).send({ error: "Operation not found" });
1144
+ }
1145
+
1146
+ // Calculate attempt number by following the retryOf chain
1147
+ let attemptNumber = 1;
1148
+ let cursor: typeof source | undefined = source;
1149
+ while (cursor?.retryOf) {
1150
+ attemptNumber++;
1151
+ cursor = deployments.get(cursor.retryOf);
1152
+ }
1153
+ attemptNumber++; // this new deployment is one more
1154
+
1155
+ // Validate artifact still exists
1156
+ const artifact = artifactStore.get(getArtifactId(source) ?? "");
1157
+ if (!artifact) {
1158
+ return reply.status(404).send({ error: `Artifact not found: ${getArtifactId(source)}` });
1159
+ }
1160
+
1161
+ // Validate environment still exists (if present on source)
1162
+ const environment = source.environmentId ? environments.get(source.environmentId) : undefined;
1163
+ if (source.environmentId && !environment) {
1164
+ return reply.status(404).send({ error: `Environment not found: ${source.environmentId}` });
1165
+ }
1166
+
1167
+ // Validate partition still exists (if present on source)
1168
+ const partition = source.partitionId ? partitions.get(source.partitionId) : undefined;
1169
+ if (source.partitionId && !partition) {
1170
+ return reply.status(404).send({ error: `Partition not found: ${source.partitionId}` });
1171
+ }
1172
+
1173
+ // Validate envoy still exists (if present on source)
1174
+ const targetEnvoy = source.envoyId ? envoyRegistry?.get(source.envoyId) : undefined;
1175
+ if (source.envoyId && !targetEnvoy) {
1176
+ return reply.status(404).send({ error: `Envoy not found: ${source.envoyId}` });
1177
+ }
1178
+
1179
+ // Resolve variables — same logic as POST /api/deployments
1180
+ const envVars = environment ? environment.variables : {};
1181
+ const partitionVars = partition?.variables ?? {};
1182
+ const resolved: Record<string, string> = { ...partitionVars, ...envVars };
1183
+
1184
+ const deployment = {
1185
+ id: crypto.randomUUID(),
1186
+ input: source.input,
1187
+ environmentId: source.environmentId,
1188
+ partitionId: source.partitionId,
1189
+ envoyId: targetEnvoy?.id,
1190
+ version: source.version ?? "",
1191
+ status: "pending" as const,
1192
+ variables: resolved,
1193
+ retryOf: source.id,
1194
+ debriefEntryIds: [] as string[],
1195
+ createdAt: new Date(),
1196
+ };
1197
+
1198
+ deployments.save(deployment);
1199
+
1200
+ const actor = (request.user?.email) ?? "anonymous";
1201
+ telemetry.record({ actor, action: "operation.created", target: { type: "deployment", id: deployment.id }, details: { artifactId: getArtifactId(source), environmentId: source.environmentId, partitionId: source.partitionId, envoyId: source.envoyId, retryOf: source.id } });
1202
+
1203
+ // Record retry debrief entry
1204
+ debrief.record({
1205
+ partitionId: deployment.partitionId ?? null,
1206
+ operationId: deployment.id,
1207
+ agent: "server",
1208
+ decisionType: "system",
1209
+ decision: `Retry of operation ${source.id} (attempt #${attemptNumber})`,
1210
+ reasoning: `User initiated retry of operation ${source.id}. Same artifact, version, environment, and partition.`,
1211
+ context: { retryOf: source.id, attemptNumber, actor },
1212
+ actor: request.user?.email,
1213
+ });
1214
+
1215
+ // Dispatch planning — same logic as POST /api/deployments
1216
+ if (envoyRegistry) {
1217
+ const planningEnvoy = targetEnvoy
1218
+ ?? (environment ? envoyRegistry.findForEnvironment(environment.name) : undefined)
1219
+ ?? envoyRegistry.list()[0];
1220
+
1221
+ if (planningEnvoy) {
1222
+ const planningClient = new EnvoyClient(planningEnvoy.url);
1223
+ const environmentForPlanning = environment
1224
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
1225
+ : { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
1226
+
1227
+ planningClient.requestPlan({
1228
+ operationId: deployment.id,
1229
+ artifact: {
1230
+ id: artifact.id,
1231
+ name: artifact.name,
1232
+ type: artifact.type,
1233
+ analysis: {
1234
+ summary: artifact.analysis.summary,
1235
+ dependencies: artifact.analysis.dependencies,
1236
+ configurationExpectations: artifact.analysis.configurationExpectations,
1237
+ deploymentIntent: artifact.analysis.deploymentIntent,
1238
+ confidence: artifact.analysis.confidence,
1239
+ },
1240
+ },
1241
+ environment: environmentForPlanning,
1242
+ partition: partition
1243
+ ? { id: partition.id, name: partition.name, variables: partition.variables }
1244
+ : undefined,
1245
+ version: deployment.version ?? "",
1246
+ resolvedVariables: resolved,
1247
+ }).then((result) => {
1248
+ const dep = deployments.get(deployment.id);
1249
+ if (!dep || dep.status !== "pending") return;
1250
+
1251
+ dep.plan = result.plan;
1252
+ dep.rollbackPlan = result.rollbackPlan;
1253
+ dep.envoyId = planningEnvoy.id;
1254
+
1255
+ if (result.blocked) {
1256
+ dep.status = "failed" as typeof dep.status;
1257
+ dep.failureReason = result.blockReason ?? "Plan blocked due to unrecoverable precondition failures";
1258
+ deployments.save(dep);
1259
+
1260
+ debrief.record({
1261
+ partitionId: dep.partitionId ?? null,
1262
+ operationId: dep.id,
1263
+ agent: "envoy",
1264
+ decisionType: "plan-generation",
1265
+ decision: `Operation plan blocked — infrastructure prerequisites not met`,
1266
+ reasoning: result.blockReason ?? result.plan.reasoning,
1267
+ context: { stepCount: result.plan.scriptedPlan.stepSummary.length, envoyId: planningEnvoy.id, blocked: true },
1268
+ });
1269
+ } else {
1270
+ dep.status = "awaiting_approval" as typeof dep.status;
1271
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
1272
+ deployments.save(dep);
1273
+
1274
+ debrief.record({
1275
+ partitionId: dep.partitionId ?? null,
1276
+ operationId: dep.id,
1277
+ agent: "envoy",
1278
+ decisionType: "plan-generation",
1279
+ decision: `Operation plan generated with ${result.plan.scriptedPlan.stepSummary.length} steps`,
1280
+ reasoning: result.plan.reasoning,
1281
+ context: { stepCount: result.plan.scriptedPlan.stepSummary.length, envoyId: planningEnvoy.id, delta: result.delta },
1282
+ });
1283
+ }
1284
+ }).catch((err) => {
1285
+ const dep = deployments.get(deployment.id);
1286
+ if (!dep || dep.status !== "pending") return;
1287
+
1288
+ dep.status = "failed" as typeof dep.status;
1289
+ dep.failureReason = err instanceof Error ? err.message : "Planning failed";
1290
+ deployments.save(dep);
1291
+
1292
+ debrief.record({
1293
+ partitionId: dep.partitionId ?? null,
1294
+ operationId: dep.id,
1295
+ agent: "server",
1296
+ decisionType: "deployment-failure",
1297
+ decision: "Envoy planning failed",
1298
+ reasoning: dep.failureReason!,
1299
+ context: { error: dep.failureReason, envoyId: planningEnvoy.id },
1300
+ });
1301
+ });
1302
+ }
1303
+ }
1304
+
1305
+ return reply.status(201).send({ deployment, sourceDeploymentId: source.id, attemptNumber });
1306
+ },
1307
+ );
1308
+
1309
+ // Get deployment postmortem
1310
+ app.get<{ Params: { id: string } }>(
1311
+ "/api/operations/:id/postmortem",
1312
+ { preHandler: [requirePermission("deployment.view")] },
1313
+ async (request, reply) => {
1314
+ const deployment = deployments.get(request.params.id);
1315
+ if (!deployment) {
1316
+ return reply.status(404).send({ error: "Operation not found" });
1317
+ }
1318
+
1319
+ const entries = debrief.getByOperation(deployment.id);
1320
+ const postmortem = generatePostmortem(entries, deployment);
1321
+ const llmResult = await generatePostmortemAsync(entries, deployment, llm);
1322
+ return {
1323
+ postmortem,
1324
+ ...(llmResult.heuristicFallback ? {} : { llmPostmortem: llmResult.llmPostmortem }),
1325
+ };
1326
+ },
1327
+ );
1328
+
1329
+ // Get recent debrief entries (supports filtering by partition, decision type, and full-text search)
1330
+ app.get("/api/debrief", { preHandler: [requirePermission("deployment.view")] }, async (request) => {
1331
+ const qParsed = DebriefQuerySchema.safeParse(request.query);
1332
+ const { limit, partitionId, decisionType, q: searchQuery } = qParsed.success ? qParsed.data : {};
1333
+
1334
+ const max = limit ?? 50;
1335
+
1336
+ // Full-text search — takes priority over filters
1337
+ if (searchQuery) {
1338
+ let entries = debrief.search(searchQuery, max);
1339
+ if (partitionId) entries = entries.filter((e) => e.partitionId === partitionId);
1340
+ if (decisionType) entries = entries.filter((e) => e.decisionType === decisionType);
1341
+ return { entries };
1342
+ }
1343
+
1344
+ // No filters — fast path
1345
+ if (!partitionId && !decisionType) {
1346
+ return { entries: debrief.getRecent(max) };
1347
+ }
1348
+
1349
+ // Start with the most selective filter, then narrow
1350
+ let entries: ReturnType<typeof debrief.getByPartition>;
1351
+ if (partitionId && decisionType) {
1352
+ entries = debrief.getByPartition(partitionId).filter(
1353
+ (e) => e.decisionType === decisionType,
1354
+ );
1355
+ } else if (partitionId) {
1356
+ entries = debrief.getByPartition(partitionId);
1357
+ } else {
1358
+ entries = debrief.getByType(decisionType as Parameters<typeof debrief.getByType>[0]);
1359
+ }
1360
+
1361
+ entries.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
1362
+ return { entries: entries.slice(0, max) };
1363
+ });
1364
+
1365
+ // Pin/unpin an operation for quick-access
1366
+ // Static route registered before parameterized :id routes to avoid shadowing
1367
+ app.get("/api/operations/pinned", { preHandler: [requirePermission("deployment.view")] }, async () => {
1368
+ const ids = debrief.getPinnedOperationIds();
1369
+ const operations = ids.map((id) => deployments.get(id)).filter(Boolean);
1370
+ return { operations, pinnedIds: ids };
1371
+ });
1372
+
1373
+ app.post<{ Params: { id: string } }>(
1374
+ "/api/operations/:id/pin",
1375
+ { preHandler: [requirePermission("deployment.view")] },
1376
+ async (request) => {
1377
+ debrief.pinOperation(request.params.id);
1378
+ return { pinned: true };
1379
+ },
1380
+ );
1381
+
1382
+ app.delete<{ Params: { id: string } }>(
1383
+ "/api/operations/:id/pin",
1384
+ { preHandler: [requirePermission("deployment.view")] },
1385
+ async (request) => {
1386
+ debrief.unpinOperation(request.params.id);
1387
+ return { pinned: false };
1388
+ },
1389
+ );
1390
+
1391
+ // ---------------------------------------------------------------------------
1392
+ // Progress streaming — envoy callback and SSE endpoints
1393
+ // ---------------------------------------------------------------------------
1394
+
1395
+ // POST /api/deployments/:id/progress — receives progress events from envoy
1396
+ app.post<{ Params: { id: string } }>(
1397
+ "/api/operations/:id/progress",
1398
+ async (request, reply) => {
1399
+ if (!progressStore) {
1400
+ return reply.status(501).send({ error: "Progress streaming not configured" });
1401
+ }
1402
+
1403
+ // Validate envoy token — this route is exempt from JWT auth
1404
+ if (envoyRegistry) {
1405
+ const authHeader = (request.headers.authorization ?? "") as string;
1406
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : null;
1407
+ if (!token || !envoyRegistry.validateToken(token)) {
1408
+ return reply.status(401).send({ error: "Invalid or missing envoy token" });
1409
+ }
1410
+ }
1411
+
1412
+ const parsed = ProgressEventSchema.safeParse(request.body);
1413
+ if (!parsed.success) {
1414
+ return reply.status(400).send({ error: "Invalid progress event", details: parsed.error.format() });
1415
+ }
1416
+
1417
+ const event = parsed.data;
1418
+
1419
+ // Validate the deploymentId in the URL matches the body
1420
+ if (event.deploymentId !== request.params.id) {
1421
+ return reply.status(400).send({ error: "Operation ID in URL does not match event body" });
1422
+ }
1423
+
1424
+ progressStore.push(event);
1425
+ return reply.status(200).send({ received: true });
1426
+ },
1427
+ );
1428
+
1429
+ // GET /api/deployments/:id/stream — SSE endpoint for live progress
1430
+ // Auth is via ?token= query param since EventSource cannot send headers
1431
+ app.get<{ Params: { id: string } }>(
1432
+ "/api/operations/:id/stream",
1433
+ { preHandler: [requirePermission("deployment.view")] },
1434
+ (request, reply) => {
1435
+ if (!progressStore) {
1436
+ reply.status(501).send({ error: "Progress streaming not configured" });
1437
+ return;
1438
+ }
1439
+
1440
+ // Hijack the connection so Fastify does not finalize the response
1441
+ reply.hijack();
1442
+
1443
+ // Set SSE headers
1444
+ reply.raw.writeHead(200, {
1445
+ "Content-Type": "text/event-stream",
1446
+ "Cache-Control": "no-cache",
1447
+ "Connection": "keep-alive",
1448
+ "X-Accel-Buffering": "no",
1449
+ });
1450
+
1451
+ const deploymentId = request.params.id;
1452
+
1453
+ // Check for Last-Event-ID header (reconnection with replay)
1454
+ const lastEventIdHeader = request.headers["last-event-id"];
1455
+ const lastEventId = lastEventIdHeader ? parseInt(String(lastEventIdHeader), 10) : 0;
1456
+
1457
+ // Send catch-up events — either all (fresh connect) or since last ID (reconnect)
1458
+ const existing = lastEventId
1459
+ ? progressStore.getEventsSince(deploymentId, lastEventId)
1460
+ : progressStore.getEvents(deploymentId);
1461
+ for (const event of existing) {
1462
+ reply.raw.write(`id: ${event.id}\ndata: ${JSON.stringify(event)}\n\n`);
1463
+ }
1464
+
1465
+ // Check if deployment already completed — if so, close after catch-up
1466
+ const lastEvent = existing[existing.length - 1];
1467
+ if (lastEvent?.type === "deployment-completed") {
1468
+ reply.raw.end();
1469
+ return;
1470
+ }
1471
+
1472
+ // Subscribe to new events
1473
+ const listener = (event: { id?: number; deploymentId: string; type: string }) => {
1474
+ try {
1475
+ reply.raw.write(`id: ${event.id}\ndata: ${JSON.stringify(event)}\n\n`);
1476
+
1477
+ // Close the stream when deployment completes
1478
+ if (event.type === "deployment-completed") {
1479
+ reply.raw.end();
1480
+ }
1481
+ } catch {
1482
+ // Client disconnected — clean up
1483
+ progressStore!.removeListener(deploymentId, listener);
1484
+ }
1485
+ };
1486
+
1487
+ progressStore.addListener(deploymentId, listener);
1488
+
1489
+ // Clean up on client disconnect
1490
+ request.raw.on("close", () => {
1491
+ progressStore!.removeListener(deploymentId, listener);
1492
+ });
1493
+ },
1494
+ );
1495
+
1496
+ // -- Health reports from envoys (trigger system) ---------------------------
1497
+
1498
+ app.post("/api/health-reports", async (request, reply) => {
1499
+ // Validate envoy token — same pattern as /api/envoy/report
1500
+ if (envoyRegistry) {
1501
+ const authHeader = (request.headers.authorization ?? "") as string;
1502
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : null;
1503
+ if (!token || !envoyRegistry.validateToken(token)) {
1504
+ return reply.status(401).send({ error: "Invalid or missing envoy token" });
1505
+ }
1506
+ }
1507
+
1508
+ const { HealthReportSchema } = await import("@synth-deploy/core");
1509
+ const parsed = HealthReportSchema.safeParse(request.body);
1510
+ if (!parsed.success) {
1511
+ return reply.status(400).send({ error: "Invalid health report", details: parsed.error.format() });
1512
+ }
1513
+
1514
+ const report = parsed.data;
1515
+
1516
+ // Find the trigger operation
1517
+ const triggerOp = deployments.get(report.triggerOperationId);
1518
+ if (!triggerOp || triggerOp.input.type !== "trigger") {
1519
+ return reply.status(404).send({ error: `Trigger operation not found: ${report.triggerOperationId}` });
1520
+ }
1521
+
1522
+ // Record the health report
1523
+ debrief.record({
1524
+ partitionId: report.partitionId ?? null,
1525
+ operationId: triggerOp.id,
1526
+ agent: "envoy",
1527
+ decisionType: "health-report-received",
1528
+ decision: `Health report: ${report.summary}`,
1529
+ reasoning: `Trigger condition met on ${report.envoyId}. Probes: ${report.probeResults.map(p => `${p.label}=${p.parsedValue ?? p.output}`).join(", ")}`,
1530
+ context: { directiveId: report.directiveId, envoyId: report.envoyId, probeResults: report.probeResults },
1531
+ });
1532
+
1533
+ // Deduplication: check for active child operations from this trigger
1534
+ const allOps = deployments.list();
1535
+ const activeChild = allOps.find(
1536
+ (op) => op.lineage === triggerOp.id &&
1537
+ ["pending", "planning", "awaiting_approval", "approved", "running"].includes(op.status),
1538
+ );
1539
+
1540
+ if (activeChild) {
1541
+ // Suppress — record that we suppressed
1542
+ triggerOp.triggerSuppressedCount = (triggerOp.triggerSuppressedCount ?? 0) + 1;
1543
+ deployments.save(triggerOp);
1544
+
1545
+ debrief.record({
1546
+ partitionId: report.partitionId ?? null,
1547
+ operationId: triggerOp.id,
1548
+ agent: "server",
1549
+ decisionType: "trigger-suppressed",
1550
+ decision: `Trigger suppressed — child operation ${activeChild.id} is still in progress (${activeChild.status})`,
1551
+ reasoning: `Deduplication: an operation spawned by this trigger is already active. Suppressed ${triggerOp.triggerSuppressedCount} time(s) total.`,
1552
+ context: { activeChildId: activeChild.id, activeChildStatus: activeChild.status, suppressedCount: triggerOp.triggerSuppressedCount },
1553
+ });
1554
+
1555
+ return reply.status(200).send({ spawned: false, reason: "deduplicated", activeChildId: activeChild.id });
1556
+ }
1557
+
1558
+ // Spawn child operation
1559
+ const triggerInput = triggerOp.input as { type: "trigger"; condition: string; responseIntent: string };
1560
+ const responseType = triggerOp.monitoringDirective?.responseType ?? "maintain";
1561
+ const childOp = {
1562
+ id: crypto.randomUUID(),
1563
+ input: responseType === "deploy"
1564
+ ? { type: "deploy" as const, artifactId: "" }
1565
+ : { type: "maintain" as const, intent: triggerInput.responseIntent },
1566
+ intent: triggerInput.responseIntent,
1567
+ lineage: triggerOp.id,
1568
+ triggeredBy: "trigger" as const,
1569
+ environmentId: report.environmentId ?? triggerOp.environmentId,
1570
+ partitionId: report.partitionId ?? triggerOp.partitionId,
1571
+ envoyId: report.envoyId,
1572
+ version: "",
1573
+ status: "pending" as const,
1574
+ variables: triggerOp.variables,
1575
+ debriefEntryIds: [] as string[],
1576
+ createdAt: new Date(),
1577
+ };
1578
+
1579
+ deployments.save(childOp);
1580
+
1581
+ // Update trigger stats
1582
+ triggerOp.triggerFireCount = (triggerOp.triggerFireCount ?? 0) + 1;
1583
+ triggerOp.triggerLastFiredAt = new Date();
1584
+ deployments.save(triggerOp);
1585
+
1586
+ debrief.record({
1587
+ partitionId: childOp.partitionId ?? null,
1588
+ operationId: childOp.id,
1589
+ agent: "server",
1590
+ decisionType: "trigger-fired",
1591
+ decision: `Trigger fired — spawned child operation ${childOp.id}`,
1592
+ reasoning: `Condition "${triggerInput.condition}" met. Response: "${triggerInput.responseIntent}". Fire count: ${triggerOp.triggerFireCount}.`,
1593
+ context: { triggerId: triggerOp.id, envoyId: report.envoyId, fireCount: triggerOp.triggerFireCount },
1594
+ });
1595
+ telemetry.record({ actor: "agent", action: "trigger.fired" as TelemetryAction, target: { type: "trigger", id: triggerOp.id }, details: { childOperationId: childOp.id } });
1596
+
1597
+ // Dispatch planning for the child operation (same as new operation flow)
1598
+ if (envoyRegistry) {
1599
+ const childEnvoy = report.envoyId
1600
+ ? envoyRegistry.get(report.envoyId)
1601
+ : envoyRegistry.list()[0];
1602
+
1603
+ if (childEnvoy) {
1604
+ const planningClient = new EnvoyClient(childEnvoy.url);
1605
+ const environment = childOp.environmentId ? environments.get(childOp.environmentId) : undefined;
1606
+ const environmentForPlanning = environment
1607
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
1608
+ : { id: `direct:${childEnvoy.id}`, name: childEnvoy.name, variables: {} };
1609
+
1610
+ planningClient.requestPlan({
1611
+ operationId: childOp.id,
1612
+ operationType: responseType as "deploy" | "query" | "investigate" | "maintain" | "trigger",
1613
+ intent: childOp.intent,
1614
+ environment: environmentForPlanning,
1615
+ version: "",
1616
+ resolvedVariables: childOp.variables,
1617
+ }).then((result) => {
1618
+ const dep = deployments.get(childOp.id);
1619
+ if (!dep || dep.status !== "pending") return;
1620
+
1621
+ dep.plan = result.plan;
1622
+ dep.rollbackPlan = result.rollbackPlan;
1623
+ dep.envoyId = childEnvoy.id;
1624
+
1625
+ if (result.blocked) {
1626
+ dep.status = "failed" as typeof dep.status;
1627
+ dep.failureReason = result.blockReason ?? "Plan blocked";
1628
+ deployments.save(dep);
1629
+ } else {
1630
+ dep.status = "awaiting_approval" as typeof dep.status;
1631
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
1632
+ deployments.save(dep);
1633
+ }
1634
+ }).catch((err) => {
1635
+ const dep = deployments.get(childOp.id);
1636
+ if (!dep || dep.status !== "pending") return;
1637
+ dep.status = "failed" as typeof dep.status;
1638
+ dep.failureReason = err instanceof Error ? err.message : "Planning failed";
1639
+ deployments.save(dep);
1640
+ });
1641
+ }
1642
+ }
1643
+
1644
+ return reply.status(201).send({ spawned: true, childOperationId: childOp.id });
1645
+ });
1646
+
1647
+ // -- Trigger management (pause/resume/disable) ----------------------------
1648
+
1649
+ app.post<{ Params: { id: string } }>(
1650
+ "/api/operations/:id/trigger/pause",
1651
+ { preHandler: [requirePermission("deployment.approve")] },
1652
+ async (request, reply) => {
1653
+ const op = deployments.get(request.params.id);
1654
+ if (!op || op.input.type !== "trigger") {
1655
+ return reply.status(404).send({ error: "Trigger operation not found" });
1656
+ }
1657
+ if (op.triggerStatus !== "active") {
1658
+ return reply.status(409).send({ error: `Cannot pause trigger in "${op.triggerStatus}" status` });
1659
+ }
1660
+
1661
+ // Pause on envoy
1662
+ if (op.envoyId && envoyRegistry) {
1663
+ const envoy = envoyRegistry.get(op.envoyId);
1664
+ if (envoy) {
1665
+ const client = new EnvoyClient(envoy.url);
1666
+ await client.pauseMonitoringDirective(op.id);
1667
+ }
1668
+ }
1669
+
1670
+ op.triggerStatus = "paused";
1671
+ if (op.monitoringDirective) op.monitoringDirective.status = "paused";
1672
+ deployments.save(op);
1673
+
1674
+ const actor = (request.user?.email) ?? "anonymous";
1675
+ debrief.record({
1676
+ partitionId: op.partitionId ?? null,
1677
+ operationId: op.id,
1678
+ agent: "server",
1679
+ decisionType: "trigger-paused",
1680
+ decision: `Trigger paused by ${actor}`,
1681
+ reasoning: "User requested trigger pause",
1682
+ context: {},
1683
+ actor: request.user?.email,
1684
+ });
1685
+ telemetry.record({ actor, action: "trigger.paused" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
1686
+
1687
+ return { operation: op, paused: true };
1688
+ },
1689
+ );
1690
+
1691
+ app.post<{ Params: { id: string } }>(
1692
+ "/api/operations/:id/trigger/resume",
1693
+ { preHandler: [requirePermission("deployment.approve")] },
1694
+ async (request, reply) => {
1695
+ const op = deployments.get(request.params.id);
1696
+ if (!op || op.input.type !== "trigger") {
1697
+ return reply.status(404).send({ error: "Trigger operation not found" });
1698
+ }
1699
+ if (op.triggerStatus !== "paused") {
1700
+ return reply.status(409).send({ error: `Cannot resume trigger in "${op.triggerStatus}" status` });
1701
+ }
1702
+
1703
+ // Resume on envoy
1704
+ if (op.envoyId && envoyRegistry) {
1705
+ const envoy = envoyRegistry.get(op.envoyId);
1706
+ if (envoy) {
1707
+ const client = new EnvoyClient(envoy.url);
1708
+ await client.resumeMonitoringDirective(op.id);
1709
+ }
1710
+ }
1711
+
1712
+ op.triggerStatus = "active";
1713
+ if (op.monitoringDirective) op.monitoringDirective.status = "active";
1714
+ deployments.save(op);
1715
+
1716
+ const actor = (request.user?.email) ?? "anonymous";
1717
+ debrief.record({
1718
+ partitionId: op.partitionId ?? null,
1719
+ operationId: op.id,
1720
+ agent: "server",
1721
+ decisionType: "trigger-resumed",
1722
+ decision: `Trigger resumed by ${actor}`,
1723
+ reasoning: "User requested trigger resume",
1724
+ context: {},
1725
+ actor: request.user?.email,
1726
+ });
1727
+ telemetry.record({ actor, action: "trigger.resumed" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
1728
+
1729
+ return { operation: op, resumed: true };
1730
+ },
1731
+ );
1732
+
1733
+ app.post<{ Params: { id: string } }>(
1734
+ "/api/operations/:id/trigger/disable",
1735
+ { preHandler: [requirePermission("deployment.approve")] },
1736
+ async (request, reply) => {
1737
+ const op = deployments.get(request.params.id);
1738
+ if (!op || op.input.type !== "trigger") {
1739
+ return reply.status(404).send({ error: "Trigger operation not found" });
1740
+ }
1741
+
1742
+ // Remove from envoy
1743
+ if (op.envoyId && envoyRegistry) {
1744
+ const envoy = envoyRegistry.get(op.envoyId);
1745
+ if (envoy) {
1746
+ const client = new EnvoyClient(envoy.url);
1747
+ await client.removeMonitoringDirective(op.id).catch(() => {});
1748
+ }
1749
+ }
1750
+
1751
+ op.triggerStatus = "disabled";
1752
+ if (op.monitoringDirective) op.monitoringDirective.status = "disabled";
1753
+ deployments.save(op);
1754
+
1755
+ const actor = (request.user?.email) ?? "anonymous";
1756
+ debrief.record({
1757
+ partitionId: op.partitionId ?? null,
1758
+ operationId: op.id,
1759
+ agent: "server",
1760
+ decisionType: "trigger-disabled",
1761
+ decision: `Trigger disabled by ${actor}`,
1762
+ reasoning: "User requested trigger disable",
1763
+ context: {},
1764
+ actor: request.user?.email,
1765
+ });
1766
+ telemetry.record({ actor, action: "trigger.disabled" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
1767
+
1768
+ return { operation: op, disabled: true };
1769
+ },
1770
+ );
1771
+
1772
+ // ---------------------------------------------------------------------------
1773
+ // Composite operation helpers — defined inside registerOperationRoutes so
1774
+ // they close over the stores and registry.
1775
+ // ---------------------------------------------------------------------------
1776
+
1777
+ async function planCompositeChildren(
1778
+ parentOp: import("@synth-deploy/core").Operation,
1779
+ _registry: EnvoyRegistry,
1780
+ planningEnvoy: { id: string; name: string; url: string },
1781
+ ): Promise<void> {
1782
+ const compositeInput = parentOp.input as { type: "composite"; operations: import("@synth-deploy/core").OperationInput[] };
1783
+ const childInputs = compositeInput.operations;
1784
+
1785
+ if (childInputs.length === 0) {
1786
+ const dep = deployments.get(parentOp.id);
1787
+ if (dep) {
1788
+ dep.status = "failed" as typeof dep.status;
1789
+ dep.failureReason = "Composite operation has no child operations";
1790
+ deployments.save(dep);
1791
+ }
1792
+ return;
1793
+ }
1794
+
1795
+ const childIds: string[] = [];
1796
+ const environment = parentOp.environmentId ? environments.get(parentOp.environmentId) : undefined;
1797
+ const partition = parentOp.partitionId ? partitions.get(parentOp.partitionId) : undefined;
1798
+
1799
+ for (let seqIdx = 0; seqIdx < childInputs.length; seqIdx++) {
1800
+ const childInput = childInputs[seqIdx];
1801
+ const childOp = {
1802
+ id: crypto.randomUUID(),
1803
+ input: childInput,
1804
+ intent: "intent" in childInput ? (childInput as { intent: string }).intent
1805
+ : childInput.type === "trigger" ? `Monitor: ${(childInput as { condition: string }).condition}`
1806
+ : undefined,
1807
+ lineage: parentOp.id,
1808
+ triggeredBy: "agent" as const,
1809
+ environmentId: parentOp.environmentId,
1810
+ partitionId: parentOp.partitionId,
1811
+ envoyId: planningEnvoy.id,
1812
+ version: parentOp.version ?? "",
1813
+ status: "pending" as const,
1814
+ variables: parentOp.variables,
1815
+ debriefEntryIds: [] as string[],
1816
+ createdAt: new Date(),
1817
+ sequenceIndex: seqIdx,
1818
+ };
1819
+ deployments.save(childOp);
1820
+ childIds.push(childOp.id);
1821
+ }
1822
+
1823
+ debrief.record({
1824
+ partitionId: parentOp.partitionId ?? null,
1825
+ operationId: parentOp.id,
1826
+ agent: "server",
1827
+ decisionType: "composite-started",
1828
+ decision: `Composite operation started — planning ${childIds.length} child operation(s) sequentially`,
1829
+ reasoning: `Sequential composite: ${childInputs.map((c) => c.type).join(" → ")}`,
1830
+ context: { childIds, childCount: childIds.length, sequence: childInputs.map((c) => c.type) },
1831
+ });
1832
+
1833
+ const environmentForPlanning = environment
1834
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
1835
+ : { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
1836
+
1837
+ let anyFailed = false;
1838
+
1839
+ for (const childId of childIds) {
1840
+ const child = deployments.get(childId);
1841
+ if (!child) continue;
1842
+ const childInput = child.input;
1843
+
1844
+ const childArtifact = childInput.type === "deploy"
1845
+ ? artifactStore.get((childInput as { artifactId: string }).artifactId)
1846
+ : undefined;
1847
+
1848
+ const planningClient = new EnvoyClient(planningEnvoy.url);
1849
+
1850
+ try {
1851
+ const result = await planningClient.requestPlan({
1852
+ operationId: childId,
1853
+ operationType: childInput.type as "deploy" | "query" | "investigate" | "maintain" | "trigger",
1854
+ intent: "intent" in childInput ? (childInput as { intent?: string }).intent
1855
+ : childInput.type === "trigger" ? `Monitor: ${(childInput as { condition: string }).condition}`
1856
+ : undefined,
1857
+ ...(childArtifact ? {
1858
+ artifact: {
1859
+ id: childArtifact.id,
1860
+ name: childArtifact.name,
1861
+ type: childArtifact.type,
1862
+ analysis: childArtifact.analysis,
1863
+ },
1864
+ } : {}),
1865
+ ...(childInput.type === "investigate" && "allowWrite" in childInput
1866
+ ? { allowWrite: (childInput as { allowWrite?: boolean }).allowWrite }
1867
+ : {}),
1868
+ environment: environmentForPlanning,
1869
+ partition: partition ? { id: partition.id, name: partition.name, variables: partition.variables } : undefined,
1870
+ version: parentOp.version ?? "",
1871
+ resolvedVariables: parentOp.variables,
1872
+ });
1873
+
1874
+ const childDep = deployments.get(childId);
1875
+ if (!childDep) continue;
1876
+
1877
+ if (result.blocked) {
1878
+ childDep.status = "failed" as typeof childDep.status;
1879
+ childDep.failureReason = result.blockReason ?? "Plan blocked";
1880
+ deployments.save(childDep);
1881
+ anyFailed = true;
1882
+
1883
+ const parentDep = deployments.get(parentOp.id);
1884
+ if (parentDep && parentDep.status === "pending") {
1885
+ parentDep.status = "failed" as typeof parentDep.status;
1886
+ parentDep.failureReason = `Child operation (${childInput.type}) plan blocked: ${childDep.failureReason}`;
1887
+ deployments.save(parentDep);
1888
+ debrief.record({
1889
+ partitionId: parentDep.partitionId ?? null,
1890
+ operationId: parentDep.id,
1891
+ agent: "server",
1892
+ decisionType: "composite-failed",
1893
+ decision: `Child operation planning blocked — composite cannot proceed`,
1894
+ reasoning: childDep.failureReason,
1895
+ context: { childId, childType: childInput.type },
1896
+ });
1897
+ }
1898
+ break;
1899
+ }
1900
+
1901
+ childDep.plan = result.plan;
1902
+ childDep.rollbackPlan = result.rollbackPlan;
1903
+ childDep.envoyId = planningEnvoy.id;
1904
+ if (childInput.type === "query" && result.queryFindings) childDep.queryFindings = result.queryFindings;
1905
+ if (childInput.type === "investigate" && result.investigationFindings) childDep.investigationFindings = result.investigationFindings;
1906
+ childDep.status = "awaiting_approval" as typeof childDep.status;
1907
+ deployments.save(childDep);
1908
+
1909
+ debrief.record({
1910
+ partitionId: childDep.partitionId ?? null,
1911
+ operationId: childDep.id,
1912
+ agent: "envoy",
1913
+ decisionType: "plan-generation",
1914
+ decision: `Child operation plan generated with ${result.plan.scriptedPlan.stepSummary.length} steps`,
1915
+ reasoning: result.plan.reasoning,
1916
+ context: { stepCount: result.plan.scriptedPlan.stepSummary.length, envoyId: planningEnvoy.id, parentOperationId: parentOp.id },
1917
+ });
1918
+ } catch (err) {
1919
+ const childDep = deployments.get(childId);
1920
+ if (childDep) {
1921
+ childDep.status = "failed" as typeof childDep.status;
1922
+ childDep.failureReason = err instanceof Error ? err.message : "Planning failed";
1923
+ deployments.save(childDep);
1924
+ }
1925
+ anyFailed = true;
1926
+
1927
+ const parentDep = deployments.get(parentOp.id);
1928
+ if (parentDep && parentDep.status === "pending") {
1929
+ parentDep.status = "failed" as typeof parentDep.status;
1930
+ parentDep.failureReason = `Child operation (${childInput.type}) planning failed: ${err instanceof Error ? err.message : "unknown error"}`;
1931
+ deployments.save(parentDep);
1932
+ debrief.record({
1933
+ partitionId: parentDep.partitionId ?? null,
1934
+ operationId: parentDep.id,
1935
+ agent: "server",
1936
+ decisionType: "composite-failed",
1937
+ decision: `Child operation planning failed — composite cannot proceed`,
1938
+ reasoning: parentDep.failureReason!,
1939
+ context: { childId, childType: childInput.type, error: parentDep.failureReason },
1940
+ });
1941
+ }
1942
+ break;
1943
+ }
1944
+ }
1945
+
1946
+ if (!anyFailed) {
1947
+ // All children planned — build combined summary plan and await approval
1948
+ const allChildren = childIds.map((id) => deployments.get(id)).filter(Boolean) as import("@synth-deploy/core").Operation[];
1949
+
1950
+ const combinedStepSummary = allChildren.flatMap((c, idx) => {
1951
+ if (!c.plan?.scriptedPlan) return [];
1952
+ return c.plan.scriptedPlan.stepSummary.map((step) => ({
1953
+ ...step,
1954
+ description: `[${idx + 1}/${allChildren.length}: ${c.input.type}] ${step.description}`,
1955
+ }));
1956
+ });
1957
+
1958
+ const combinedReasoning = allChildren.map((c, idx) =>
1959
+ `Step ${idx + 1} (${c.input.type}): ${c.plan?.reasoning ?? "no reasoning"}`
1960
+ ).join("\n\n");
1961
+
1962
+ // Combine child execution scripts into a single composite script
1963
+ const combinedScript = allChildren
1964
+ .map((c, idx) => `# --- Child ${idx + 1}/${allChildren.length}: ${c.input.type} ---\n${c.plan?.scriptedPlan?.executionScript ?? "# no script"}`)
1965
+ .join("\n\n");
1966
+
1967
+ const parentDep = deployments.get(parentOp.id);
1968
+ if (parentDep && parentDep.status === "pending") {
1969
+ parentDep.plan = {
1970
+ scriptedPlan: {
1971
+ platform: "bash",
1972
+ executionScript: combinedScript,
1973
+ dryRunScript: null,
1974
+ rollbackScript: null,
1975
+ reasoning: combinedReasoning,
1976
+ stepSummary: combinedStepSummary,
1977
+ },
1978
+ reasoning: combinedReasoning,
1979
+ };
1980
+ parentDep.rollbackPlan = {
1981
+ scriptedPlan: {
1982
+ platform: "bash",
1983
+ executionScript: "# Child operations handle their own rollback",
1984
+ dryRunScript: null,
1985
+ rollbackScript: null,
1986
+ reasoning: "Child operations handle their own rollback",
1987
+ stepSummary: [],
1988
+ },
1989
+ reasoning: "Child operations handle their own rollback",
1990
+ };
1991
+ parentDep.status = "awaiting_approval" as typeof parentDep.status;
1992
+ parentDep.recommendation = computeRecommendation(parentDep, deployments);
1993
+ deployments.save(parentDep);
1994
+
1995
+ debrief.record({
1996
+ partitionId: parentDep.partitionId ?? null,
1997
+ operationId: parentDep.id,
1998
+ agent: "server",
1999
+ decisionType: "composite-plan-ready",
2000
+ decision: `All ${allChildren.length} child plans ready — composite awaiting approval`,
2001
+ reasoning: combinedReasoning,
2002
+ context: { childIds, totalSteps: combinedStepSummary.length },
2003
+ });
2004
+ }
2005
+ }
2006
+ }
2007
+
2008
+ async function executeCompositeSequentially(
2009
+ parentId: string,
2010
+ childIds: string[],
2011
+ ): Promise<void> {
2012
+ const parentOp = deployments.get(parentId);
2013
+ if (!parentOp) return;
2014
+
2015
+ debrief.record({
2016
+ partitionId: parentOp.partitionId ?? null,
2017
+ operationId: parentOp.id,
2018
+ agent: "server",
2019
+ decisionType: "composite-started",
2020
+ decision: `Composite execution started — running ${childIds.length} child operations sequentially`,
2021
+ reasoning: `Composite operation approved — executing children in order`,
2022
+ context: { childIds, totalChildren: childIds.length },
2023
+ });
2024
+
2025
+ for (let i = 0; i < childIds.length; i++) {
2026
+ const childId = childIds[i];
2027
+ const child = deployments.get(childId);
2028
+ if (!child || !child.plan || !child.rollbackPlan) {
2029
+ const dep = deployments.get(parentId);
2030
+ if (dep) {
2031
+ dep.status = "failed" as typeof dep.status;
2032
+ dep.failureReason = `Child operation ${i + 1} has no plan — cannot execute`;
2033
+ deployments.save(dep);
2034
+ debrief.record({
2035
+ partitionId: dep.partitionId ?? null,
2036
+ operationId: dep.id,
2037
+ agent: "server",
2038
+ decisionType: "composite-failed",
2039
+ decision: `Child operation ${i + 1} missing plan — composite failed`,
2040
+ reasoning: dep.failureReason!,
2041
+ context: { childId, childIndex: i },
2042
+ });
2043
+ }
2044
+ return;
2045
+ }
2046
+
2047
+ const targetEnvoy = child.envoyId ? envoyRegistry?.get(child.envoyId) : envoyRegistry?.list()[0];
2048
+ if (!targetEnvoy) {
2049
+ const dep = deployments.get(parentId);
2050
+ if (dep) {
2051
+ dep.status = "failed" as typeof dep.status;
2052
+ dep.failureReason = `No envoy available for child operation ${i + 1}`;
2053
+ deployments.save(dep);
2054
+ }
2055
+ return;
2056
+ }
2057
+
2058
+ child.status = "running" as typeof child.status;
2059
+ deployments.save(child);
2060
+
2061
+ debrief.record({
2062
+ partitionId: child.partitionId ?? null,
2063
+ operationId: child.id,
2064
+ agent: "server",
2065
+ decisionType: "composite-child-started",
2066
+ decision: `Executing child operation ${i + 1}/${childIds.length} (${child.input.type})`,
2067
+ reasoning: `Sequential composite execution — child ${i + 1} of ${childIds.length}`,
2068
+ context: { childId, childIndex: i, parentOperationId: parentId, childType: child.input.type },
2069
+ });
2070
+
2071
+ const artifact = artifactStore.get(getArtifactId(child) ?? "");
2072
+ const serverPort = process.env.PORT ?? "9410";
2073
+ const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
2074
+ const progressCallbackUrl = `${serverUrl}/api/operations/${child.id}/progress`;
2075
+ const callbackToken = envoyRegistry?.list().find((r) => r.url === (targetEnvoy as { url: string }).url)?.token;
2076
+
2077
+ const childEnvoyClient = new EnvoyClient((targetEnvoy as { url: string }).url);
2078
+
2079
+ try {
2080
+ await childEnvoyClient.executeApprovedPlan({
2081
+ operationId: child.id,
2082
+ plan: child.plan,
2083
+ rollbackPlan: child.rollbackPlan,
2084
+ artifactType: artifact?.type ?? "unknown",
2085
+ artifactName: artifact?.name ?? "unknown",
2086
+ environmentId: child.environmentId ?? "",
2087
+ progressCallbackUrl,
2088
+ callbackToken,
2089
+ });
2090
+ } catch (err) {
2091
+ const dep = deployments.get(parentId);
2092
+ if (dep) {
2093
+ dep.status = "failed" as typeof dep.status;
2094
+ dep.failureReason = `Child operation ${i + 1} (${child.input.type}) execution dispatch failed: ${err instanceof Error ? err.message : "unknown error"}`;
2095
+ dep.completedAt = new Date();
2096
+ deployments.save(dep);
2097
+ debrief.record({
2098
+ partitionId: dep.partitionId ?? null,
2099
+ operationId: dep.id,
2100
+ agent: "server",
2101
+ decisionType: "composite-failed",
2102
+ decision: `Child operation ${i + 1} execution dispatch failed`,
2103
+ reasoning: dep.failureReason!,
2104
+ context: { childId, childIndex: i, error: dep.failureReason },
2105
+ });
2106
+ }
2107
+ return;
2108
+ }
2109
+
2110
+ // Wait for child to complete (poll every 2 seconds, 5-minute timeout)
2111
+ const timeoutMs = 300_000;
2112
+ const pollIntervalMs = 2_000;
2113
+ const start = Date.now();
2114
+ let childSucceeded = false;
2115
+
2116
+ while (Date.now() - start < timeoutMs) {
2117
+ await new Promise<void>((resolve) => setTimeout(resolve, pollIntervalMs));
2118
+ const updated = deployments.get(childId);
2119
+ if (updated?.status === "succeeded") {
2120
+ childSucceeded = true;
2121
+ break;
2122
+ }
2123
+ if (updated?.status === "failed" || updated?.status === "rolled_back" || updated?.status === "cancelled") {
2124
+ break;
2125
+ }
2126
+ // Stop if the parent was externally cancelled or failed while we were waiting
2127
+ const parentNow = deployments.get(parentId);
2128
+ if (!parentNow || parentNow.status === "failed" || parentNow.status === "cancelled") {
2129
+ return;
2130
+ }
2131
+ }
2132
+
2133
+ const finalChild = deployments.get(childId);
2134
+ if (!childSucceeded) {
2135
+ const reason = finalChild?.failureReason ?? `Child operation ${i + 1} did not complete in time`;
2136
+ const dep = deployments.get(parentId);
2137
+ if (dep) {
2138
+ dep.status = "failed" as typeof dep.status;
2139
+ dep.failureReason = `Composite stopped at step ${i + 1}/${childIds.length} (${child.input.type}): ${reason}`;
2140
+ dep.completedAt = new Date();
2141
+ deployments.save(dep);
2142
+ debrief.record({
2143
+ partitionId: dep.partitionId ?? null,
2144
+ operationId: dep.id,
2145
+ agent: "server",
2146
+ decisionType: "composite-failed",
2147
+ decision: `Composite stopped at child ${i + 1}/${childIds.length} — ${child.input.type} failed`,
2148
+ reasoning: dep.failureReason!,
2149
+ context: { childId, childIndex: i, failedChildType: child.input.type, completedChildren: i },
2150
+ });
2151
+ }
2152
+ return;
2153
+ }
2154
+
2155
+ debrief.record({
2156
+ partitionId: finalChild?.partitionId ?? null,
2157
+ operationId: childId,
2158
+ agent: "server",
2159
+ decisionType: "composite-child-completed",
2160
+ decision: `Child operation ${i + 1}/${childIds.length} (${child.input.type}) completed successfully`,
2161
+ reasoning: `Child execution succeeded — proceeding to next child`,
2162
+ context: { childId, childIndex: i, parentOperationId: parentId },
2163
+ });
2164
+ }
2165
+
2166
+ // All children succeeded
2167
+ const dep = deployments.get(parentId);
2168
+ if (dep) {
2169
+ dep.status = "succeeded" as typeof dep.status;
2170
+ dep.completedAt = new Date();
2171
+ deployments.save(dep);
2172
+ debrief.record({
2173
+ partitionId: dep.partitionId ?? null,
2174
+ operationId: dep.id,
2175
+ agent: "server",
2176
+ decisionType: "composite-completed",
2177
+ decision: `Composite operation completed — all ${childIds.length} child operations succeeded`,
2178
+ reasoning: `All child operations executed successfully in sequence`,
2179
+ context: { childIds, totalChildren: childIds.length },
2180
+ });
2181
+ }
2182
+ }
2183
+ }
2184
+
2185
+ // ---------------------------------------------------------------------------
2186
+ // Recommendation engine — synthesizes enrichment context into a verdict
2187
+ // ---------------------------------------------------------------------------
2188
+
2189
+ function computeRecommendation(
2190
+ deployment: import("@synth-deploy/core").Deployment,
2191
+ store: IDeploymentStore,
2192
+ llmSummary?: string,
2193
+ ): import("@synth-deploy/core").DeploymentRecommendation {
2194
+ const factors: string[] = [];
2195
+ let verdict: RecommendationVerdict = "proceed";
2196
+
2197
+ const now = new Date();
2198
+ const twentyFourHoursAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000);
2199
+
2200
+ // Check for previously rolled-back version
2201
+ if (deployment.version) {
2202
+ const rolledBack = store.findByArtifactVersion(
2203
+ getArtifactId(deployment) ?? "",
2204
+ deployment.version,
2205
+ "rolled_back",
2206
+ );
2207
+ if (rolledBack.length > 0) {
2208
+ verdict = "caution";
2209
+ factors.push("This artifact version was previously rolled back");
2210
+ }
2211
+ }
2212
+
2213
+ // Check for conflicting deployments (only meaningful when environmentId is set)
2214
+ if (deployment.environmentId) {
2215
+ const conflicting = store.list().filter(
2216
+ (d) =>
2217
+ d.environmentId === deployment.environmentId &&
2218
+ d.id !== deployment.id &&
2219
+ ((d.status) === "running" || (d.status) === "approved"),
2220
+ );
2221
+ if (conflicting.length > 0) {
2222
+ verdict = "hold";
2223
+ factors.push(`${conflicting.length} other operation(s) in progress for this environment`);
2224
+ }
2225
+ }
2226
+
2227
+ // Check deployment frequency
2228
+ const recentCount = deployment.environmentId
2229
+ ? store.countByEnvironment(deployment.environmentId, twentyFourHoursAgo)
2230
+ : 0;
2231
+ if (recentCount > 5) {
2232
+ if (verdict === "proceed") verdict = "caution";
2233
+ factors.push(`High operation frequency: ${recentCount} operations in the last 24h`);
2234
+ }
2235
+
2236
+ // Check last deployment status
2237
+ const lastDeploy = deployment.environmentId
2238
+ ? store.findLatestByEnvironment(deployment.environmentId)
2239
+ : undefined;
2240
+ if (lastDeploy && lastDeploy.id !== deployment.id) {
2241
+ if ((lastDeploy.status) === "failed" || (lastDeploy.status) === "rolled_back") {
2242
+ if (verdict === "proceed") verdict = "caution";
2243
+ factors.push(`Last operation to this environment ${lastDeploy.status}`);
2244
+ } else if ((lastDeploy.status) === "succeeded") {
2245
+ factors.push("Last operation to this environment succeeded");
2246
+ }
2247
+ }
2248
+
2249
+ if (factors.length === 0) {
2250
+ factors.push("No risk factors detected — target is stable");
2251
+ }
2252
+
2253
+ const summaryMap: Record<RecommendationVerdict, string> = {
2254
+ proceed: "Proceed — no conflicting operations, target environment is stable",
2255
+ caution: "Proceed with caution — review risk factors before greenlighting",
2256
+ hold: "Hold — resolve conflicting operations before proceeding",
2257
+ };
2258
+
2259
+ return { verdict, summary: llmSummary ?? summaryMap[verdict], factors };
2260
+ }