@synth-deploy/server 0.1.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/dist/agent/envoy-client.d.ts +62 -7
  2. package/dist/agent/envoy-client.d.ts.map +1 -1
  3. package/dist/agent/envoy-client.js +56 -6
  4. package/dist/agent/envoy-client.js.map +1 -1
  5. package/dist/agent/stale-deployment-detector.js +1 -1
  6. package/dist/agent/stale-deployment-detector.js.map +1 -1
  7. package/dist/agent/synth-agent.d.ts +7 -5
  8. package/dist/agent/synth-agent.d.ts.map +1 -1
  9. package/dist/agent/synth-agent.js +42 -39
  10. package/dist/agent/synth-agent.js.map +1 -1
  11. package/dist/alert-webhooks/alert-parsers.d.ts +21 -0
  12. package/dist/alert-webhooks/alert-parsers.d.ts.map +1 -0
  13. package/dist/alert-webhooks/alert-parsers.js +184 -0
  14. package/dist/alert-webhooks/alert-parsers.js.map +1 -0
  15. package/dist/api/agent.d.ts +0 -6
  16. package/dist/api/agent.d.ts.map +1 -1
  17. package/dist/api/agent.js +6 -459
  18. package/dist/api/agent.js.map +1 -1
  19. package/dist/api/alert-webhooks.d.ts +13 -0
  20. package/dist/api/alert-webhooks.d.ts.map +1 -0
  21. package/dist/api/alert-webhooks.js +279 -0
  22. package/dist/api/alert-webhooks.js.map +1 -0
  23. package/dist/api/envoy-reports.js +2 -2
  24. package/dist/api/envoy-reports.js.map +1 -1
  25. package/dist/api/envoys.js +1 -1
  26. package/dist/api/envoys.js.map +1 -1
  27. package/dist/api/fleet.d.ts.map +1 -1
  28. package/dist/api/fleet.js +14 -15
  29. package/dist/api/fleet.js.map +1 -1
  30. package/dist/api/graph.js +3 -3
  31. package/dist/api/graph.js.map +1 -1
  32. package/dist/api/operations.d.ts +7 -0
  33. package/dist/api/operations.d.ts.map +1 -0
  34. package/dist/api/operations.js +1883 -0
  35. package/dist/api/operations.js.map +1 -0
  36. package/dist/api/partitions.js +1 -1
  37. package/dist/api/partitions.js.map +1 -1
  38. package/dist/api/schemas.d.ts +194 -10
  39. package/dist/api/schemas.d.ts.map +1 -1
  40. package/dist/api/schemas.js +38 -5
  41. package/dist/api/schemas.js.map +1 -1
  42. package/dist/api/system.d.ts.map +1 -1
  43. package/dist/api/system.js +22 -21
  44. package/dist/api/system.js.map +1 -1
  45. package/dist/artifact-analyzer.js +2 -2
  46. package/dist/artifact-analyzer.js.map +1 -1
  47. package/dist/fleet/fleet-executor.js +1 -1
  48. package/dist/fleet/fleet-executor.js.map +1 -1
  49. package/dist/graph/graph-executor.js +2 -2
  50. package/dist/graph/graph-executor.js.map +1 -1
  51. package/dist/index.js +44 -40
  52. package/dist/index.js.map +1 -1
  53. package/dist/mcp/resources.js +3 -3
  54. package/dist/mcp/resources.js.map +1 -1
  55. package/dist/mcp/tools.d.ts.map +1 -1
  56. package/dist/mcp/tools.js +2 -9
  57. package/dist/mcp/tools.js.map +1 -1
  58. package/dist/middleware/auth.js +1 -1
  59. package/dist/middleware/auth.js.map +1 -1
  60. package/package.json +1 -1
  61. package/src/agent/envoy-client.ts +107 -15
  62. package/src/agent/stale-deployment-detector.ts +1 -1
  63. package/src/agent/synth-agent.ts +59 -45
  64. package/src/alert-webhooks/alert-parsers.ts +291 -0
  65. package/src/api/agent.ts +9 -528
  66. package/src/api/alert-webhooks.ts +354 -0
  67. package/src/api/envoy-reports.ts +2 -2
  68. package/src/api/envoys.ts +1 -1
  69. package/src/api/fleet.ts +14 -15
  70. package/src/api/graph.ts +3 -3
  71. package/src/api/operations.ts +2240 -0
  72. package/src/api/partitions.ts +1 -1
  73. package/src/api/schemas.ts +43 -7
  74. package/src/api/system.ts +23 -21
  75. package/src/artifact-analyzer.ts +2 -2
  76. package/src/fleet/fleet-executor.ts +1 -1
  77. package/src/graph/graph-executor.ts +2 -2
  78. package/src/index.ts +46 -40
  79. package/src/mcp/resources.ts +3 -3
  80. package/src/mcp/tools.ts +5 -9
  81. package/src/middleware/auth.ts +1 -1
  82. package/tests/agent-mode.test.ts +5 -376
  83. package/tests/api-handlers.test.ts +27 -27
  84. package/tests/composite-operations.test.ts +557 -0
  85. package/tests/decision-diary.test.ts +62 -63
  86. package/tests/diary-reader.test.ts +14 -18
  87. package/tests/mcp-tools.test.ts +1 -1
  88. package/tests/orchestration.test.ts +34 -30
  89. package/tests/partition-isolation.test.ts +4 -9
  90. package/tests/rbac-enforcement.test.ts +8 -8
  91. package/tests/ui-journey.test.ts +9 -9
  92. package/dist/api/deployments.d.ts +0 -11
  93. package/dist/api/deployments.d.ts.map +0 -1
  94. package/dist/api/deployments.js +0 -1098
  95. package/dist/api/deployments.js.map +0 -1
  96. package/src/api/deployments.ts +0 -1347
@@ -0,0 +1,2240 @@
1
+ import type { FastifyInstance } from "fastify";
2
+ import { generatePostmortem, generatePostmortemAsync, resolveApprovalMode } from "@synth-deploy/core";
3
+ import type { LlmClient, IPartitionStore, IEnvironmentStore, IArtifactStore, ISettingsStore, IDeploymentStore, ITelemetryStore, DebriefWriter, DebriefReader, DebriefPinStore, DeploymentEnrichment, RecommendationVerdict, TelemetryAction } from "@synth-deploy/core";
4
+ import { requirePermission } from "../middleware/permissions.js";
5
+ import {
6
+ CreateOperationSchema,
7
+ ApproveDeploymentSchema,
8
+ RejectDeploymentSchema,
9
+ ModifyDeploymentPlanSchema,
10
+ SubmitPlanSchema,
11
+ DeploymentListQuerySchema,
12
+ DebriefQuerySchema,
13
+ ProgressEventSchema,
14
+ ReplanDeploymentSchema,
15
+ } from "./schemas.js";
16
+ import type { ProgressEventStore } from "./progress-event-store.js";
17
+ import { EnvoyClient } from "../agent/envoy-client.js";
18
+ import type { EnvoyRegistry } from "../agent/envoy-registry.js";
19
+
20
+ /**
21
+ * REST API routes for deployments. These are the traditional (non-MCP) interface
22
+ * for the web UI and integrations.
23
+ */
24
+ function getArtifactId(op: { input: import("@synth-deploy/core").OperationInput }): string | undefined {
25
+ return op.input.type === "deploy" ? op.input.artifactId : undefined;
26
+ }
27
+
28
+ export function registerOperationRoutes(
29
+ app: FastifyInstance,
30
+ deployments: IDeploymentStore,
31
+ debrief: DebriefWriter & DebriefReader & DebriefPinStore,
32
+ partitions: IPartitionStore,
33
+ environments: IEnvironmentStore,
34
+ artifactStore: IArtifactStore,
35
+ settings: ISettingsStore,
36
+ telemetry: ITelemetryStore,
37
+ progressStore?: ProgressEventStore,
38
+ envoyClient?: EnvoyClient,
39
+ envoyRegistry?: EnvoyRegistry,
40
+ llm?: LlmClient,
41
+ ): void {
42
+
43
+ // Create a deployment (plan phase)
44
+ app.post("/api/operations", { preHandler: [requirePermission("deployment.create")] }, async (request, reply) => {
45
+ const parsed = CreateOperationSchema.safeParse(request.body);
46
+ if (!parsed.success) {
47
+ return reply.status(400).send({ error: parsed.error.message });
48
+ }
49
+
50
+ const { artifactId, environmentId, partitionId, envoyId, version, type: operationType, intent, allowWrite, condition, responseIntent, parentOperationId, requireApproval } = parsed.data;
51
+
52
+ // Validate artifact exists (required for deploy operations)
53
+ if (operationType === "deploy" && !artifactId) {
54
+ return reply.status(400).send({ error: "artifactId is required for deploy operations" });
55
+ }
56
+ const artifact = artifactId ? artifactStore.get(artifactId) : undefined;
57
+ if (operationType === "deploy" && !artifact) {
58
+ return reply.status(404).send({ error: `Artifact not found: ${artifactId}` });
59
+ }
60
+
61
+ // Validate environment exists (optional when targeting a partition or envoy)
62
+ const environment = environmentId ? environments.get(environmentId) : undefined;
63
+ if (environmentId && !environment) {
64
+ return reply.status(404).send({ error: `Environment not found: ${environmentId}` });
65
+ }
66
+
67
+ // Validate partition if provided
68
+ const partition = partitionId ? partitions.get(partitionId) : undefined;
69
+ if (partitionId && !partition) {
70
+ return reply.status(404).send({ error: `Partition not found: ${partitionId}` });
71
+ }
72
+
73
+ // Validate envoy if provided
74
+ const targetEnvoy = envoyId ? envoyRegistry?.get(envoyId) : undefined;
75
+ if (envoyId && !targetEnvoy) {
76
+ return reply.status(404).send({ error: `Envoy not found: ${envoyId}` });
77
+ }
78
+
79
+ // Resolve variables — partition vars are base, environment vars take precedence if present
80
+ const envVars = environment ? environment.variables : {};
81
+ const partitionVars = partition?.variables ?? {};
82
+ const resolved: Record<string, string> = { ...partitionVars, ...envVars };
83
+
84
+ const operationInput = operationType === "deploy"
85
+ ? { type: "deploy" as const, artifactId: artifactId!, ...(version ? { artifactVersionId: version } : {}) }
86
+ : operationType === "trigger"
87
+ ? { type: "trigger" as const, condition: condition ?? intent ?? "", responseIntent: responseIntent ?? intent ?? "" }
88
+ : operationType === "composite"
89
+ ? { type: "composite" as const, operations: (parsed.data.operations ?? []) as import("@synth-deploy/core").OperationInput[] }
90
+ : operationType === "investigate"
91
+ ? { type: "investigate" as const, intent: intent ?? "", ...(allowWrite !== undefined ? { allowWrite } : {}) }
92
+ : { type: operationType as "maintain" | "query", intent: intent ?? "" };
93
+
94
+ const deployment = {
95
+ id: crypto.randomUUID(),
96
+ input: operationInput,
97
+ intent,
98
+ lineage: parentOperationId,
99
+ triggeredBy: parentOperationId ? ("user" as const) : undefined,
100
+ environmentId,
101
+ partitionId,
102
+ envoyId: targetEnvoy?.id,
103
+ version: version ?? "",
104
+ status: "pending" as const,
105
+ variables: resolved,
106
+ debriefEntryIds: [] as string[],
107
+ createdAt: new Date(),
108
+ ...(requireApproval ? { forceManualApproval: true } : {}),
109
+ };
110
+
111
+ deployments.save(deployment);
112
+ telemetry.record({ actor: (request.user?.email) ?? "anonymous", action: "operation.created", target: { type: "deployment", id: deployment.id }, details: { artifactId, environmentId, partitionId, envoyId } });
113
+
114
+ // Dispatch planning to the appropriate envoy asynchronously.
115
+ // The envoy reasons about the deployment (read-only) and POSTs back a plan,
116
+ // which transitions the deployment to awaiting_approval.
117
+ if (envoyRegistry) {
118
+ // Find the target envoy: explicit envoyId > environment-assigned > first available
119
+ const planningEnvoy = targetEnvoy
120
+ ?? (environment ? envoyRegistry.findForEnvironment(environment.name) : undefined)
121
+ ?? envoyRegistry.list()[0];
122
+
123
+ const needsArtifact = deployment.input.type === "deploy";
124
+ if (planningEnvoy && (!needsArtifact || artifact)) {
125
+ const planningClient = new EnvoyClient(planningEnvoy.url);
126
+ const environmentForPlanning = environment
127
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
128
+ : { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
129
+
130
+ // Composite: orchestrate child planning separately — do not send composite to envoy directly
131
+ if (deployment.input.type === "composite") {
132
+ planCompositeChildren(deployment, envoyRegistry, planningEnvoy).catch((err) => {
133
+ const dep = deployments.get(deployment.id);
134
+ if (dep && (dep.status === "pending" || dep.status === "planning")) {
135
+ dep.status = "failed" as typeof dep.status;
136
+ dep.failureReason = `Composite planning failed unexpectedly: ${err instanceof Error ? err.message : String(err)}`;
137
+ deployments.save(dep);
138
+ }
139
+ });
140
+ return;
141
+ }
142
+
143
+ planningClient.requestPlan({
144
+ operationId: deployment.id,
145
+ operationType: deployment.input.type as "deploy" | "query" | "investigate" | "maintain" | "trigger",
146
+ intent: deployment.intent ?? (deployment.input.type === "trigger"
147
+ ? `Monitor: ${(deployment.input as { condition: string }).condition}. When triggered: ${(deployment.input as { responseIntent: string }).responseIntent}`
148
+ : undefined),
149
+ ...(deployment.input.type === "trigger" ? {
150
+ triggerCondition: (deployment.input as { condition: string }).condition,
151
+ triggerResponseIntent: (deployment.input as { responseIntent: string }).responseIntent,
152
+ } : {}),
153
+ ...(artifact ? {
154
+ artifact: {
155
+ id: artifact.id,
156
+ name: artifact.name,
157
+ type: artifact.type,
158
+ analysis: {
159
+ summary: artifact.analysis.summary,
160
+ dependencies: artifact.analysis.dependencies,
161
+ configurationExpectations: artifact.analysis.configurationExpectations,
162
+ deploymentIntent: artifact.analysis.deploymentIntent,
163
+ confidence: artifact.analysis.confidence,
164
+ },
165
+ },
166
+ } : {}),
167
+ ...(deployment.input.type === "investigate" && "allowWrite" in deployment.input
168
+ ? { allowWrite: deployment.input.allowWrite }
169
+ : {}),
170
+ environment: environmentForPlanning,
171
+ partition: partition
172
+ ? { id: partition.id, name: partition.name, variables: partition.variables }
173
+ : undefined,
174
+ version: deployment.version ?? "",
175
+ resolvedVariables: resolved,
176
+ }).then((result) => {
177
+ const dep = deployments.get(deployment.id);
178
+ if (!dep || dep.status !== "pending") return;
179
+
180
+ dep.plan = result.plan;
181
+ dep.rollbackPlan = result.rollbackPlan;
182
+ dep.envoyId = planningEnvoy.id;
183
+
184
+ // Trigger operations: construct MonitoringDirective from plan, present for approval
185
+ if (dep.input.type === "trigger" && !result.blocked) {
186
+ const triggerInput = dep.input as { type: "trigger"; condition: string; responseIntent: string; parameters?: Record<string, unknown> };
187
+ // Convert plan steps to monitoring probes
188
+ const probes = result.plan.steps.map((step) => ({
189
+ command: step.action,
190
+ label: step.description,
191
+ parseAs: (step.params?.parseAs === "exitCode" ? "exitCode" : "numeric") as "numeric" | "exitCode",
192
+ }));
193
+ const directive: import("@synth-deploy/core").MonitoringDirective = {
194
+ id: dep.id,
195
+ operationId: dep.id,
196
+ probes: probes.length > 0 ? probes : [{
197
+ command: "echo 0",
198
+ label: "default-probe",
199
+ parseAs: "numeric" as const,
200
+ }],
201
+ intervalMs: result.intervalMs ?? 60_000,
202
+ cooldownMs: result.cooldownMs ?? 300_000,
203
+ condition: triggerInput.condition,
204
+ responseIntent: triggerInput.responseIntent,
205
+ responseType: "maintain",
206
+ responseParameters: triggerInput.parameters,
207
+ environmentId: dep.environmentId,
208
+ partitionId: dep.partitionId,
209
+ status: "active",
210
+ };
211
+ dep.monitoringDirective = directive;
212
+ dep.triggerStatus = "active";
213
+ dep.status = "awaiting_approval" as typeof dep.status;
214
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
215
+ deployments.save(dep);
216
+ // Debrief plan-generation entry is recorded by the envoy's planTrigger — no duplicate here.
217
+ return;
218
+ }
219
+
220
+ // Check approval mode for query/investigate operations with findings
221
+ if ((dep.input.type === "query" || dep.input.type === "investigate") &&
222
+ (result.queryFindings || result.investigationFindings)) {
223
+ if (result.queryFindings) dep.queryFindings = result.queryFindings;
224
+ if (result.investigationFindings) dep.investigationFindings = result.investigationFindings;
225
+
226
+ const currentSettings = settings.get();
227
+ const envLookup = (id: string) => environments.get(id)?.name;
228
+ const approvalMode = dep.forceManualApproval
229
+ ? "required"
230
+ : resolveApprovalMode(dep.input.type, dep.environmentId, currentSettings, envLookup);
231
+
232
+ if (approvalMode === "auto") {
233
+ // Auto-approve — findings are the deliverable
234
+ dep.status = "succeeded" as typeof dep.status;
235
+ dep.completedAt = new Date();
236
+ deployments.save(dep);
237
+
238
+ const decisionType = dep.input.type === "query"
239
+ ? "query-findings" as const
240
+ : "investigation-findings" as const;
241
+ const findings = result.queryFindings ?? result.investigationFindings!;
242
+ debrief.record({
243
+ partitionId: dep.partitionId ?? null,
244
+ operationId: dep.id,
245
+ agent: "envoy",
246
+ decisionType,
247
+ decision: `${dep.input.type === "query" ? "Query" : "Investigation"} complete — ${findings.targetsSurveyed.length} target(s) surveyed`,
248
+ reasoning: findings.summary,
249
+ context: { targetsSurveyed: findings.targetsSurveyed, findingCount: findings.findings.length },
250
+ });
251
+ return;
252
+ }
253
+ // approvalMode === "required" — fall through to standard approval gate
254
+ }
255
+
256
+ if (result.blocked) {
257
+ // Unrecoverable precondition failures — block execution, do not present for approval
258
+ dep.status = "failed" as typeof dep.status;
259
+ dep.failureReason = result.blockReason ?? "Plan blocked due to unrecoverable precondition failures";
260
+ deployments.save(dep);
261
+
262
+ debrief.record({
263
+ partitionId: dep.partitionId ?? null,
264
+ operationId: dep.id,
265
+ agent: "envoy",
266
+ decisionType: "plan-generation",
267
+ decision: `Operation plan blocked — infrastructure prerequisites not met`,
268
+ reasoning: result.blockReason ?? result.plan.reasoning,
269
+ context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, blocked: true },
270
+ });
271
+ } else {
272
+ // Plan is valid — transition to awaiting_approval
273
+ dep.status = "awaiting_approval" as typeof dep.status;
274
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
275
+ deployments.save(dep);
276
+
277
+ debrief.record({
278
+ partitionId: dep.partitionId ?? null,
279
+ operationId: dep.id,
280
+ agent: "envoy",
281
+ decisionType: "plan-generation",
282
+ decision: `Operation plan generated with ${result.plan.steps.length} steps`,
283
+ reasoning: result.plan.reasoning,
284
+ context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, delta: result.delta },
285
+ });
286
+ }
287
+ }).catch((err) => {
288
+ // Planning failed — mark deployment failed so UI doesn't wait forever
289
+ const dep = deployments.get(deployment.id);
290
+ if (!dep || dep.status !== "pending") return;
291
+
292
+ dep.status = "failed" as typeof dep.status;
293
+ dep.failureReason = err instanceof Error ? err.message : "Planning failed";
294
+ deployments.save(dep);
295
+
296
+ debrief.record({
297
+ partitionId: dep.partitionId ?? null,
298
+ operationId: dep.id,
299
+ agent: "server",
300
+ decisionType: "deployment-failure",
301
+ decision: "Envoy planning failed",
302
+ reasoning: dep.failureReason!,
303
+ context: { error: dep.failureReason, envoyId: planningEnvoy.id },
304
+ });
305
+ });
306
+ }
307
+ }
308
+
309
+ return reply.status(201).send({ deployment });
310
+ });
311
+
312
+ // Get deployment by ID
313
+ app.get<{ Params: { id: string } }>("/api/operations/:id", { preHandler: [requirePermission("deployment.view")] }, async (request, reply) => {
314
+ const deployment = deployments.get(request.params.id);
315
+ if (!deployment) {
316
+ return reply.status(404).send({ error: "Operation not found" });
317
+ }
318
+
319
+ return {
320
+ deployment,
321
+ debrief: debrief.getByOperation(deployment.id),
322
+ };
323
+ });
324
+
325
+ // What's New — compare deployed artifact version against catalog latest
326
+ app.get<{ Params: { id: string } }>("/api/operations/:id/whats-new", { preHandler: [requirePermission("deployment.view")] }, async (request, reply) => {
327
+ const deployment = deployments.get(request.params.id);
328
+ if (!deployment) {
329
+ return reply.status(404).send({ error: "Operation not found" });
330
+ }
331
+
332
+ const versions = artifactStore.getVersions(getArtifactId(deployment) ?? "");
333
+ const sorted = versions.slice().sort(
334
+ (a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime(),
335
+ );
336
+ const latest = sorted[0] ?? null;
337
+ const deployedVersion = deployment.version;
338
+ const latestVersion = latest?.version ?? null;
339
+ const isLatest = latestVersion === null || latestVersion === deployedVersion;
340
+
341
+ return {
342
+ deployedVersion,
343
+ latestVersion,
344
+ isLatest,
345
+ latestCreatedAt: latest?.createdAt ? new Date(latest.createdAt).toISOString() : null,
346
+ };
347
+ });
348
+
349
+ // List deployments (optionally filtered by partition, artifact, or envoy)
350
+ app.get("/api/operations", { preHandler: [requirePermission("deployment.view")] }, async (request) => {
351
+ const qParsed = DeploymentListQuerySchema.safeParse(request.query);
352
+ const { partitionId, artifactId, envoyId } = qParsed.success ? qParsed.data : {};
353
+
354
+ let list;
355
+ if (partitionId) {
356
+ list = deployments.getByPartition(partitionId);
357
+ } else if (artifactId) {
358
+ list = deployments.getByArtifact(artifactId);
359
+ } else {
360
+ list = deployments.list();
361
+ }
362
+
363
+ if (envoyId) {
364
+ list = list.filter((d) => d.envoyId === envoyId);
365
+ }
366
+
367
+ return { deployments: list };
368
+ });
369
+
370
+ // Submit a plan from envoy — transitions deployment to awaiting_approval
371
+ app.post<{ Params: { id: string } }>(
372
+ "/api/operations/:id/plan",
373
+ { preHandler: [requirePermission("deployment.create")] },
374
+ async (request, reply) => {
375
+ const deployment = deployments.get(request.params.id);
376
+ if (!deployment) {
377
+ return reply.status(404).send({ error: "Operation not found" });
378
+ }
379
+
380
+ const parsed = SubmitPlanSchema.safeParse(request.body);
381
+ if (!parsed.success) {
382
+ return reply.status(400).send({ error: "Invalid plan submission", details: parsed.error.format() });
383
+ }
384
+
385
+ if ((deployment.status) !== "pending" && (deployment.status) !== "planning") {
386
+ return reply.status(409).send({ error: `Cannot submit plan for operation in "${deployment.status}" status` });
387
+ }
388
+
389
+ deployment.plan = parsed.data.plan;
390
+ deployment.rollbackPlan = parsed.data.rollbackPlan;
391
+ deployment.status = "awaiting_approval" as typeof deployment.status;
392
+
393
+ // Generate recommendation from enrichment context
394
+ deployment.recommendation = computeRecommendation(deployment, deployments);
395
+
396
+ deployments.save(deployment);
397
+
398
+ debrief.record({
399
+ partitionId: deployment.partitionId ?? null,
400
+ operationId: deployment.id,
401
+ agent: "envoy",
402
+ decisionType: "plan-generation",
403
+ decision: `Operation plan submitted with ${parsed.data.plan.steps.length} steps`,
404
+ reasoning: parsed.data.plan.reasoning,
405
+ context: { stepCount: parsed.data.plan.steps.length },
406
+ });
407
+
408
+ return reply.status(200).send({ deployment });
409
+ },
410
+ );
411
+
412
+ // Approve a deployment plan
413
+ app.post<{ Params: { id: string } }>(
414
+ "/api/operations/:id/approve",
415
+ { preHandler: [requirePermission("deployment.approve")] },
416
+ async (request, reply) => {
417
+ const deployment = deployments.get(request.params.id);
418
+ if (!deployment) {
419
+ return reply.status(404).send({ error: "Operation not found" });
420
+ }
421
+
422
+ const parsed = ApproveDeploymentSchema.safeParse(request.body);
423
+ if (!parsed.success) {
424
+ return reply.status(400).send({ error: parsed.error.message });
425
+ }
426
+
427
+ if ((deployment.status) !== "awaiting_approval") {
428
+ return reply.status(409).send({ error: `Cannot approve operation in "${deployment.status}" status — must be "awaiting_approval"` });
429
+ }
430
+
431
+ // Transition deployment status
432
+ deployment.approvedBy = parsed.data.approvedBy;
433
+ deployment.approvedAt = new Date();
434
+ deployment.status = "approved" as typeof deployment.status;
435
+ deployments.save(deployment);
436
+
437
+ const actor = (request.user?.email) ?? parsed.data.approvedBy;
438
+
439
+ // Record approval in debrief
440
+ debrief.record({
441
+ partitionId: deployment.partitionId ?? null,
442
+ operationId: deployment.id,
443
+ agent: "server",
444
+ decisionType: "system",
445
+ decision: `Operation approved by ${actor}`,
446
+ reasoning: parsed.data.modifications
447
+ ? `Approved with modifications: ${parsed.data.modifications}`
448
+ : "Approved without modifications",
449
+ context: { approvedBy: actor },
450
+ actor: request.user?.email,
451
+ });
452
+ telemetry.record({ actor, action: "operation.approved", target: { type: "deployment", id: deployment.id }, details: { modifications: parsed.data.modifications } });
453
+ telemetry.record({
454
+ actor,
455
+ action: parsed.data.modifications ? "agent.recommendation.overridden" : "agent.recommendation.followed",
456
+ target: { type: "deployment", id: deployment.id },
457
+ details: parsed.data.modifications
458
+ ? { modifications: parsed.data.modifications }
459
+ : { planStepCount: deployment.plan?.steps.length ?? 0 },
460
+ });
461
+
462
+ // Composite operations: execute children sequentially
463
+ if (deployment.input.type === "composite") {
464
+ deployment.status = "running" as typeof deployment.status;
465
+ deployments.save(deployment);
466
+
467
+ const compositeChildren = deployments.list()
468
+ .filter((d) => d.lineage === deployment.id)
469
+ .sort((a, b) => ((a as { sequenceIndex?: number }).sequenceIndex ?? 0) - ((b as { sequenceIndex?: number }).sequenceIndex ?? 0));
470
+
471
+ // Approve all children before executing sequentially
472
+ for (const child of compositeChildren) {
473
+ child.approvedBy = parsed.data.approvedBy;
474
+ child.approvedAt = new Date();
475
+ child.status = "approved" as typeof child.status;
476
+ deployments.save(child);
477
+ }
478
+
479
+ executeCompositeSequentially(deployment.id, compositeChildren.map((c) => c.id)).catch((err) => {
480
+ const dep = deployments.get(deployment.id);
481
+ if (dep && dep.status === "running") {
482
+ dep.status = "failed" as typeof dep.status;
483
+ dep.failureReason = `Composite execution failed unexpectedly: ${err instanceof Error ? err.message : String(err)}`;
484
+ dep.completedAt = new Date();
485
+ deployments.save(dep);
486
+ }
487
+ });
488
+
489
+ return { deployment, approved: true };
490
+ }
491
+
492
+ // Trigger operations: install monitoring directive on envoy
493
+ if (deployment.input.type === "trigger" && deployment.monitoringDirective && envoyRegistry) {
494
+ const targetEnvoyForTrigger = deployment.envoyId
495
+ ? envoyRegistry.get(deployment.envoyId)
496
+ : envoyRegistry.list()[0];
497
+
498
+ if (targetEnvoyForTrigger) {
499
+ const triggerClient = new EnvoyClient(targetEnvoyForTrigger.url);
500
+ deployment.status = "running" as typeof deployment.status;
501
+ deployment.triggerStatus = "active";
502
+ deployments.save(deployment);
503
+
504
+ triggerClient.installMonitoringDirective(deployment.monitoringDirective).then(() => {
505
+ deployment.status = "succeeded" as typeof deployment.status;
506
+ deployment.completedAt = new Date();
507
+ deployments.save(deployment);
508
+
509
+ debrief.record({
510
+ partitionId: deployment.partitionId ?? null,
511
+ operationId: deployment.id,
512
+ agent: "server",
513
+ decisionType: "trigger-activated",
514
+ decision: `Monitoring directive installed on ${targetEnvoyForTrigger.name}`,
515
+ reasoning: `Trigger activated: monitoring "${deployment.monitoringDirective!.condition}" every ${deployment.monitoringDirective!.intervalMs / 1000}s with ${deployment.monitoringDirective!.cooldownMs / 1000}s cooldown`,
516
+ context: { envoyId: targetEnvoyForTrigger.id, directiveId: deployment.monitoringDirective!.id },
517
+ });
518
+ telemetry.record({ actor, action: "trigger.activated" as TelemetryAction, target: { type: "trigger", id: deployment.id }, details: { envoyId: targetEnvoyForTrigger.id } });
519
+ }).catch((err) => {
520
+ deployment.status = "failed" as typeof deployment.status;
521
+ deployment.triggerStatus = "disabled";
522
+ deployment.failureReason = err instanceof Error ? err.message : "Failed to install monitoring directive";
523
+ deployments.save(deployment);
524
+
525
+ debrief.record({
526
+ partitionId: deployment.partitionId ?? null,
527
+ operationId: deployment.id,
528
+ agent: "server",
529
+ decisionType: "deployment-failure",
530
+ decision: "Failed to install monitoring directive on envoy",
531
+ reasoning: deployment.failureReason!,
532
+ context: { error: deployment.failureReason },
533
+ });
534
+ });
535
+ }
536
+ }
537
+ // Normal operations: dispatch approved plan to envoy for execution
538
+ else if (envoyClient && deployment.plan && deployment.rollbackPlan) {
539
+ const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
540
+ const serverPort = process.env.PORT ?? "9410";
541
+ const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
542
+ const progressCallbackUrl = `${serverUrl}/api/operations/${deployment.id}/progress`;
543
+ const callbackToken = envoyRegistry?.list().find(r => r.url === envoyClient.url)?.token;
544
+
545
+ deployment.status = "running" as typeof deployment.status;
546
+ deployments.save(deployment);
547
+
548
+ // Fire-and-forget: execution runs async, progress comes via callback
549
+ envoyClient.executeApprovedPlan({
550
+ operationId: deployment.id,
551
+ plan: deployment.plan,
552
+ rollbackPlan: deployment.rollbackPlan,
553
+ artifactType: artifact?.type ?? "unknown",
554
+ artifactName: artifact?.name ?? "unknown",
555
+ environmentId: deployment.environmentId ?? "",
556
+ progressCallbackUrl,
557
+ callbackToken,
558
+ }).catch((err) => {
559
+ // Execution dispatch failed — record failure
560
+ deployment.status = "failed" as typeof deployment.status;
561
+ deployment.failureReason = err instanceof Error ? err.message : "Execution dispatch failed";
562
+ deployments.save(deployment);
563
+
564
+ debrief.record({
565
+ partitionId: deployment.partitionId ?? null,
566
+ operationId: deployment.id,
567
+ agent: "server",
568
+ decisionType: "deployment-failure",
569
+ decision: "Failed to dispatch approved plan to envoy",
570
+ reasoning: deployment.failureReason!,
571
+ context: { error: deployment.failureReason },
572
+ });
573
+ });
574
+ }
575
+
576
+ return { deployment, approved: true };
577
+ },
578
+ );
579
+
580
+ // Reject a deployment plan
581
+ app.post<{ Params: { id: string } }>(
582
+ "/api/operations/:id/reject",
583
+ { preHandler: [requirePermission("deployment.reject")] },
584
+ async (request, reply) => {
585
+ const deployment = deployments.get(request.params.id);
586
+ if (!deployment) {
587
+ return reply.status(404).send({ error: "Operation not found" });
588
+ }
589
+
590
+ const parsed = RejectDeploymentSchema.safeParse(request.body);
591
+ if (!parsed.success) {
592
+ return reply.status(400).send({ error: parsed.error.message });
593
+ }
594
+
595
+ if ((deployment.status) !== "awaiting_approval") {
596
+ return reply.status(409).send({ error: `Cannot reject operation in "${deployment.status}" status — must be "awaiting_approval"` });
597
+ }
598
+
599
+ // Transition deployment status and store rejection reason
600
+ deployment.status = "rejected" as typeof deployment.status;
601
+ deployment.rejectionReason = parsed.data.reason;
602
+ deployments.save(deployment);
603
+
604
+ const actor = (request.user?.email) ?? "anonymous";
605
+
606
+ // Record rejection in debrief
607
+ debrief.record({
608
+ partitionId: deployment.partitionId ?? null,
609
+ operationId: deployment.id,
610
+ agent: "server",
611
+ decisionType: "system",
612
+ decision: "Operation plan rejected",
613
+ reasoning: parsed.data.reason,
614
+ context: { reason: parsed.data.reason },
615
+ actor: request.user?.email,
616
+ });
617
+ telemetry.record({ actor, action: "operation.rejected", target: { type: "deployment", id: deployment.id }, details: { reason: parsed.data.reason } });
618
+
619
+ return { deployment, rejected: true };
620
+ },
621
+ );
622
+
623
+ // Modify a deployment plan (user edits steps before approval)
624
+ app.post<{ Params: { id: string } }>(
625
+ "/api/operations/:id/modify",
626
+ { preHandler: [requirePermission("deployment.approve")] },
627
+ async (request, reply) => {
628
+ const deployment = deployments.get(request.params.id);
629
+ if (!deployment) {
630
+ return reply.status(404).send({ error: "Operation not found" });
631
+ }
632
+
633
+ const parsed = ModifyDeploymentPlanSchema.safeParse(request.body);
634
+ if (!parsed.success) {
635
+ return reply.status(400).send({ error: parsed.error.message });
636
+ }
637
+
638
+ if ((deployment.status) !== "awaiting_approval") {
639
+ return reply.status(409).send({ error: `Cannot modify operation in "${deployment.status}" status — must be "awaiting_approval"` });
640
+ }
641
+
642
+ if (!deployment.plan) {
643
+ return reply.status(409).send({ error: "Operation has no plan to modify" });
644
+ }
645
+
646
+ // Validate modified plan with envoy if available
647
+ if (envoyClient) {
648
+ try {
649
+ const validation = await envoyClient.validatePlan(parsed.data.steps);
650
+ if (!validation.valid) {
651
+ return reply.status(422).send({
652
+ error: "Modified plan failed envoy validation",
653
+ violations: validation.violations,
654
+ });
655
+ }
656
+ } catch {
657
+ // Envoy unreachable — proceed without validation but note it
658
+ }
659
+ }
660
+
661
+ // Build structured diff: what changed between old and new steps
662
+ const oldSteps = deployment.plan.steps;
663
+ const newSteps = parsed.data.steps;
664
+ const diffLines: string[] = [];
665
+ const maxLen = Math.max(oldSteps.length, newSteps.length);
666
+ for (let i = 0; i < maxLen; i++) {
667
+ const old = oldSteps[i];
668
+ const cur = newSteps[i];
669
+ if (!old) {
670
+ diffLines.push(`+ Step ${i + 1} (added): ${cur.action} ${cur.target} — ${cur.description}`);
671
+ } else if (!cur) {
672
+ diffLines.push(`- Step ${i + 1} (removed): ${old.action} ${old.target} — ${old.description}`);
673
+ } else if (old.action !== cur.action || old.target !== cur.target || old.description !== cur.description) {
674
+ diffLines.push(`~ Step ${i + 1} (changed): ${old.action} ${old.target} → ${cur.action} ${cur.target}`);
675
+ if (old.description !== cur.description) {
676
+ diffLines.push(` was: ${old.description}`);
677
+ diffLines.push(` now: ${cur.description}`);
678
+ }
679
+ }
680
+ }
681
+ const diffFromPreviousPlan = diffLines.length > 0
682
+ ? diffLines.join("\n")
683
+ : "Steps reordered or metadata changed (actions and targets unchanged)";
684
+
685
+ // Apply modifications
686
+ deployment.plan = {
687
+ ...deployment.plan,
688
+ steps: parsed.data.steps,
689
+ diffFromPreviousPlan,
690
+ };
691
+ deployments.save(deployment);
692
+
693
+ const actor = (request.user?.email) ?? "anonymous";
694
+
695
+ // Record modification in debrief
696
+ debrief.record({
697
+ partitionId: deployment.partitionId ?? null,
698
+ operationId: deployment.id,
699
+ agent: "server",
700
+ decisionType: "plan-modification",
701
+ decision: `Operation plan modified by ${actor}`,
702
+ reasoning: parsed.data.reason,
703
+ context: {
704
+ modifiedBy: actor,
705
+ stepCount: parsed.data.steps.length,
706
+ reason: parsed.data.reason,
707
+ },
708
+ actor: request.user?.email,
709
+ });
710
+ telemetry.record({
711
+ actor,
712
+ action: "operation.modified" as Parameters<typeof telemetry.record>[0]["action"],
713
+ target: { type: "deployment", id: deployment.id },
714
+ details: { reason: parsed.data.reason, stepCount: parsed.data.steps.length },
715
+ });
716
+ telemetry.record({
717
+ actor,
718
+ action: "agent.recommendation.overridden",
719
+ target: { type: "deployment", id: deployment.id },
720
+ details: { reason: parsed.data.reason, stepCount: parsed.data.steps.length, diff: diffFromPreviousPlan },
721
+ });
722
+
723
+ return { deployment, modified: true };
724
+ },
725
+ );
726
+
727
+ // Replan a deployment with user feedback — triggers a new LLM planning pass
728
+ app.post<{ Params: { id: string } }>(
729
+ "/api/operations/:id/replan",
730
+ { preHandler: [requirePermission("deployment.approve")] },
731
+ async (request, reply) => {
732
+ const deploymentId = request.params.id;
733
+ const deployment = deployments.get(deploymentId);
734
+ if (!deployment) {
735
+ return reply.status(404).send({ error: "Operation not found" });
736
+ }
737
+
738
+ if ((deployment.status) !== "awaiting_approval") {
739
+ return reply.status(409).send({ error: `Cannot replan operation in "${deployment.status}" status — must be "awaiting_approval"` });
740
+ }
741
+
742
+ const parsed = ReplanDeploymentSchema.safeParse(request.body);
743
+ if (!parsed.success) {
744
+ return reply.status(400).send({ error: parsed.error.message });
745
+ }
746
+
747
+ const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
748
+ if (!artifact) {
749
+ return reply.status(404).send({ error: `Artifact not found: ${getArtifactId(deployment)}` });
750
+ }
751
+
752
+ const environment = deployment.environmentId ? environments.get(deployment.environmentId) : undefined;
753
+ const partition = deployment.partitionId ? partitions.get(deployment.partitionId) : undefined;
754
+
755
+ const planningEnvoy = deployment.envoyId ? envoyRegistry?.get(deployment.envoyId) : envoyRegistry?.list()[0];
756
+ if (!planningEnvoy) {
757
+ return reply.status(422).send({ error: "No envoy available for replanning" });
758
+ }
759
+
760
+ // Validate feedback with LLM before triggering expensive replan
761
+ const planningClientForValidation = new EnvoyClient(planningEnvoy.url);
762
+ try {
763
+ const validation = await planningClientForValidation.validateRefinementFeedback({
764
+ feedback: parsed.data.feedback,
765
+ currentPlanSteps: (deployment.plan?.steps ?? []).map((s) => ({
766
+ description: s.description,
767
+ action: s.action,
768
+ target: s.target,
769
+ })),
770
+ artifactName: artifact?.name ?? "unknown",
771
+ environmentName: environment?.name ?? "unknown",
772
+ });
773
+ if (validation.mode === "rejection") {
774
+ return reply.status(422).send({ error: validation.message, mode: "rejection" });
775
+ }
776
+ if (validation.mode === "response") {
777
+ return reply.status(200).send({ mode: "response", message: validation.message });
778
+ }
779
+ // mode === "replan" — fall through to full replan
780
+ } catch {
781
+ // Validation call failed — proceed with replan rather than blocking the user
782
+ }
783
+
784
+ deployment.status = "planning" as typeof deployment.status;
785
+ deployments.save(deployment);
786
+
787
+ const planningClient = new EnvoyClient(planningEnvoy.url);
788
+ const environmentForPlanning = environment
789
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
790
+ : { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
791
+
792
+ let result: Awaited<ReturnType<typeof planningClient.requestPlan>>;
793
+ try {
794
+ result = await planningClient.requestPlan({
795
+ operationId: deploymentId,
796
+ artifact: {
797
+ id: artifact.id,
798
+ name: artifact.name,
799
+ type: artifact.type,
800
+ analysis: {
801
+ summary: artifact.analysis.summary,
802
+ dependencies: artifact.analysis.dependencies,
803
+ configurationExpectations: artifact.analysis.configurationExpectations,
804
+ deploymentIntent: artifact.analysis.deploymentIntent,
805
+ confidence: artifact.analysis.confidence,
806
+ },
807
+ },
808
+ environment: environmentForPlanning,
809
+ partition: partition
810
+ ? { id: partition.id, name: partition.name, variables: partition.variables }
811
+ : undefined,
812
+ version: deployment.version ?? "",
813
+ resolvedVariables: deployment.variables,
814
+ refinementFeedback: parsed.data.feedback,
815
+ });
816
+ } catch (err) {
817
+ const dep = deployments.get(deploymentId);
818
+ if (dep) {
819
+ dep.status = "awaiting_approval" as typeof dep.status;
820
+ deployments.save(dep);
821
+ }
822
+ return reply.status(500).send({ error: err instanceof Error ? err.message : "Replanning failed" });
823
+ }
824
+
825
+ const dep = deployments.get(deploymentId);
826
+ if (!dep) {
827
+ return reply.status(404).send({ error: "Operation not found after replanning" });
828
+ }
829
+
830
+ dep.plan = result.plan;
831
+ dep.rollbackPlan = result.rollbackPlan;
832
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
833
+ dep.status = "awaiting_approval" as typeof dep.status;
834
+ deployments.save(dep);
835
+
836
+ debrief.record({
837
+ partitionId: dep.partitionId ?? null,
838
+ operationId: dep.id,
839
+ agent: "envoy",
840
+ decisionType: "plan-generation",
841
+ decision: `Plan regenerated with user feedback (${result.plan.steps.length} steps)`,
842
+ reasoning: result.plan.reasoning,
843
+ context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, refinementFeedback: parsed.data.feedback },
844
+ });
845
+
846
+ return { deployment: dep, replanned: true };
847
+ },
848
+ );
849
+
850
+ // Get cross-system enrichment context for a deployment
851
+ app.get<{ Params: { id: string } }>(
852
+ "/api/operations/:id/context",
853
+ { preHandler: [requirePermission("deployment.view")] },
854
+ async (request, reply) => {
855
+ const deployment = deployments.get(request.params.id);
856
+ if (!deployment) {
857
+ return reply.status(404).send({ error: "Operation not found" });
858
+ }
859
+
860
+ const now = new Date();
861
+ const twentyFourHoursAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000);
862
+
863
+ // Count recent operations to the same environment (only meaningful when environmentId is set)
864
+ const recentOperationsToEnv = deployment.environmentId
865
+ ? deployments.countByEnvironment(deployment.environmentId, twentyFourHoursAgo)
866
+ : 0;
867
+
868
+ // Check if the same artifact version was previously rolled back
869
+ const previouslyRolledBack = deployment.version
870
+ ? deployments.findByArtifactVersion(
871
+ getArtifactId(deployment) ?? "",
872
+ deployment.version,
873
+ "rolled_back",
874
+ ).length > 0
875
+ : false;
876
+
877
+ // Check for other in-progress operations to the same environment
878
+ const conflictingOperations = deployment.environmentId
879
+ ? deployments.list()
880
+ .filter(
881
+ (d) =>
882
+ d.environmentId === deployment.environmentId &&
883
+ d.id !== deployment.id &&
884
+ ((d.status) === "running" || (d.status) === "approved" || (d.status) === "awaiting_approval"),
885
+ )
886
+ .map((d) => d.id)
887
+ : [];
888
+
889
+ // Find last operation to the same environment
890
+ const lastDeploy = deployment.environmentId
891
+ ? deployments.findLatestByEnvironment(deployment.environmentId)
892
+ : undefined;
893
+ const lastOperationToEnv = lastDeploy && lastDeploy.id !== deployment.id
894
+ ? {
895
+ id: lastDeploy.id,
896
+ status: lastDeploy.status,
897
+ version: lastDeploy.version ?? "",
898
+ completedAt: lastDeploy.completedAt,
899
+ }
900
+ : undefined;
901
+
902
+ const enrichment: DeploymentEnrichment = {
903
+ recentOperationsToEnv,
904
+ previouslyRolledBack,
905
+ conflictingOperations,
906
+ lastOperationToEnv,
907
+ };
908
+
909
+ return {
910
+ enrichment,
911
+ recommendation: deployment.recommendation ?? computeRecommendation(deployment, deployments),
912
+ };
913
+ },
914
+ );
915
+
916
+ // Request a post-hoc rollback plan — asks the envoy to reason about
917
+ // what actually ran and produce a targeted rollback plan
918
+ app.post<{ Params: { id: string } }>(
919
+ "/api/operations/:id/request-rollback-plan",
920
+ { preHandler: [requirePermission("deployment.approve")] },
921
+ async (request, reply) => {
922
+ const deployment = deployments.get(request.params.id);
923
+ if (!deployment) {
924
+ return reply.status(404).send({ error: "Operation not found" });
925
+ }
926
+
927
+ const finishedStatuses = new Set(["succeeded", "failed", "rolled_back"]);
928
+ if (!finishedStatuses.has(deployment.status)) {
929
+ return reply.status(409).send({
930
+ error: `Cannot request rollback plan for operation in "${deployment.status}" status — operation must be finished`,
931
+ });
932
+ }
933
+
934
+ const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
935
+ if (!artifact) {
936
+ return reply.status(404).send({ error: "Artifact not found" });
937
+ }
938
+
939
+ // Determine which envoy to ask
940
+ const targetEnvoy = deployment.envoyId
941
+ ? envoyRegistry?.get(deployment.envoyId)
942
+ : envoyRegistry?.list()[0];
943
+
944
+ if (!targetEnvoy) {
945
+ return reply.status(503).send({ error: "No envoy available to generate rollback plan" });
946
+ }
947
+
948
+ const environment = deployment.environmentId ? environments.get(deployment.environmentId) : undefined;
949
+
950
+ // Build the list of completed steps from execution record (or plan as fallback)
951
+ const completedSteps: Array<{
952
+ description: string;
953
+ action: string;
954
+ target: string;
955
+ status: "completed" | "failed" | "rolled_back";
956
+ output?: string;
957
+ }> = deployment.executionRecord?.steps.map((s) => ({
958
+ description: s.description,
959
+ action: deployment.plan?.steps.find((p) => p.description === s.description)?.action ?? "unknown",
960
+ target: deployment.plan?.steps.find((p) => p.description === s.description)?.target ?? "",
961
+ status: s.status,
962
+ output: s.output ?? s.error,
963
+ })) ?? deployment.plan?.steps.map((s) => ({
964
+ description: s.description,
965
+ action: s.action,
966
+ target: s.target,
967
+ status: "completed" as const,
968
+ })) ?? [];
969
+
970
+ const rollbackClient = new EnvoyClient(targetEnvoy.url);
971
+
972
+ try {
973
+ const rollbackPlan = await rollbackClient.requestRollbackPlan({
974
+ operationId: deployment.id,
975
+ artifact: {
976
+ name: artifact.name,
977
+ type: artifact.type,
978
+ analysis: {
979
+ summary: artifact.analysis.summary,
980
+ dependencies: artifact.analysis.dependencies,
981
+ configurationExpectations: artifact.analysis.configurationExpectations,
982
+ deploymentIntent: artifact.analysis.deploymentIntent,
983
+ confidence: artifact.analysis.confidence,
984
+ },
985
+ },
986
+ environment: {
987
+ id: deployment.environmentId ?? "",
988
+ name: environment?.name ?? deployment.environmentId ?? "unknown",
989
+ },
990
+ completedSteps,
991
+ deployedVariables: deployment.variables,
992
+ version: deployment.version ?? "",
993
+ failureReason: deployment.failureReason ?? undefined,
994
+ });
995
+
996
+ // Store the generated rollback plan on the deployment
997
+ deployment.rollbackPlan = rollbackPlan;
998
+ deployments.save(deployment);
999
+
1000
+ const actor = (request.user?.email) ?? "anonymous";
1001
+
1002
+ debrief.record({
1003
+ partitionId: deployment.partitionId ?? null,
1004
+ operationId: deployment.id,
1005
+ agent: "server",
1006
+ decisionType: "plan-generation",
1007
+ decision: `Rollback plan requested and generated for ${artifact.name} v${deployment.version}`,
1008
+ reasoning: rollbackPlan.reasoning,
1009
+ context: {
1010
+ requestedBy: actor,
1011
+ stepCount: rollbackPlan.steps.length,
1012
+ envoyId: targetEnvoy.id,
1013
+ deploymentStatus: deployment.status,
1014
+ },
1015
+ actor: request.user?.email,
1016
+ });
1017
+ telemetry.record({
1018
+ actor,
1019
+ action: "deployment.rollback-plan-requested" as Parameters<typeof telemetry.record>[0]["action"],
1020
+ target: { type: "deployment", id: deployment.id },
1021
+ details: { stepCount: rollbackPlan.steps.length },
1022
+ });
1023
+
1024
+ return reply.status(200).send({ deployment, rollbackPlan });
1025
+ } catch (err) {
1026
+ return reply.status(500).send({
1027
+ error: "Failed to generate rollback plan",
1028
+ details: err instanceof Error ? err.message : String(err),
1029
+ });
1030
+ }
1031
+ },
1032
+ );
1033
+
1034
+ // Execute rollback — runs the stored rollback plan against the envoy
1035
+ app.post<{ Params: { id: string } }>(
1036
+ "/api/operations/:id/execute-rollback",
1037
+ { preHandler: [requirePermission("deployment.approve")] },
1038
+ async (request, reply) => {
1039
+ const deployment = deployments.get(request.params.id);
1040
+ if (!deployment) {
1041
+ return reply.status(404).send({ error: "Operation not found" });
1042
+ }
1043
+
1044
+ if (!deployment.rollbackPlan) {
1045
+ return reply.status(409).send({ error: "No rollback plan available — request one first" });
1046
+ }
1047
+
1048
+ const finishedStatuses = new Set(["succeeded", "failed"]);
1049
+ if (!finishedStatuses.has(deployment.status)) {
1050
+ return reply.status(409).send({
1051
+ error: `Cannot execute rollback for operation in "${deployment.status}" status`,
1052
+ });
1053
+ }
1054
+
1055
+ const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
1056
+ const targetEnvoy = deployment.envoyId
1057
+ ? envoyRegistry?.get(deployment.envoyId)
1058
+ : envoyRegistry?.list()[0];
1059
+
1060
+ if (!targetEnvoy) {
1061
+ return reply.status(503).send({ error: "No envoy available to execute rollback" });
1062
+ }
1063
+
1064
+ const actor = (request.user?.email) ?? "anonymous";
1065
+ const serverPort = process.env.PORT ?? "9410";
1066
+ const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
1067
+ const progressCallbackUrl = `${serverUrl}/api/operations/${deployment.id}/progress`;
1068
+
1069
+ deployment.status = "running" as typeof deployment.status;
1070
+ deployments.save(deployment);
1071
+
1072
+ debrief.record({
1073
+ partitionId: deployment.partitionId ?? null,
1074
+ operationId: deployment.id,
1075
+ agent: "server",
1076
+ decisionType: "rollback-execution",
1077
+ decision: `Rollback execution initiated for ${artifact?.name ?? getArtifactId(deployment)} v${deployment.version}`,
1078
+ reasoning: `Rollback requested by ${actor}. Executing ${deployment.rollbackPlan.steps.length} rollback step(s).`,
1079
+ context: { initiatedBy: actor, stepCount: deployment.rollbackPlan.steps.length },
1080
+ actor: request.user?.email,
1081
+ });
1082
+ telemetry.record({
1083
+ actor,
1084
+ action: "deployment.rollback-executed" as Parameters<typeof telemetry.record>[0]["action"],
1085
+ target: { type: "deployment", id: deployment.id },
1086
+ details: { stepCount: deployment.rollbackPlan.steps.length },
1087
+ });
1088
+
1089
+ const rollbackClient = new EnvoyClient(targetEnvoy.url);
1090
+
1091
+ // Execute the rollback plan as if it were a forward plan — it IS a forward plan
1092
+ // (just in the reverse direction). Use an empty no-op plan as the "rollback of rollback".
1093
+ const emptyPlan = { steps: [], reasoning: "No rollback of rollback." };
1094
+
1095
+ rollbackClient.executeApprovedPlan({
1096
+ operationId: deployment.id,
1097
+ plan: deployment.rollbackPlan,
1098
+ rollbackPlan: emptyPlan,
1099
+ artifactType: artifact?.type ?? "unknown",
1100
+ artifactName: artifact?.name ?? "unknown",
1101
+ environmentId: deployment.environmentId ?? "",
1102
+ progressCallbackUrl,
1103
+ callbackToken: targetEnvoy.token,
1104
+ }).then((result) => {
1105
+ const dep = deployments.get(deployment.id);
1106
+ if (!dep) return;
1107
+
1108
+ dep.status = result.success ? "rolled_back" as typeof dep.status : "failed" as typeof dep.status;
1109
+ if (!result.success) {
1110
+ dep.failureReason = result.failureReason ?? "Rollback execution failed";
1111
+ }
1112
+ dep.completedAt = new Date();
1113
+ deployments.save(dep);
1114
+
1115
+ debrief.record({
1116
+ partitionId: dep.partitionId ?? null,
1117
+ operationId: dep.id,
1118
+ agent: "server",
1119
+ decisionType: "rollback-execution",
1120
+ decision: result.success
1121
+ ? `Rollback completed successfully for ${artifact?.name ?? getArtifactId(dep)} v${dep.version}`
1122
+ : `Rollback failed for ${artifact?.name ?? getArtifactId(dep)} v${dep.version}`,
1123
+ reasoning: result.success
1124
+ ? `All rollback steps executed successfully.`
1125
+ : `Rollback failed: ${result.failureReason}`,
1126
+ context: { success: result.success, failureReason: result.failureReason },
1127
+ });
1128
+ }).catch((err) => {
1129
+ const dep = deployments.get(deployment.id);
1130
+ if (!dep) return;
1131
+
1132
+ dep.status = "failed" as typeof dep.status;
1133
+ dep.failureReason = err instanceof Error ? err.message : "Rollback execution dispatch failed";
1134
+ deployments.save(dep);
1135
+ });
1136
+
1137
+ return reply.status(202).send({ deployment, accepted: true });
1138
+ },
1139
+ );
1140
+
1141
+ // Retry (redeploy) — create a new deployment with the same parameters as the source
1142
+ app.post<{ Params: { id: string } }>(
1143
+ "/api/operations/:id/retry",
1144
+ { preHandler: [requirePermission("deployment.create")] },
1145
+ async (request, reply) => {
1146
+ const source = deployments.get(request.params.id);
1147
+ if (!source) {
1148
+ return reply.status(404).send({ error: "Operation not found" });
1149
+ }
1150
+
1151
+ // Calculate attempt number by following the retryOf chain
1152
+ let attemptNumber = 1;
1153
+ let cursor: typeof source | undefined = source;
1154
+ while (cursor?.retryOf) {
1155
+ attemptNumber++;
1156
+ cursor = deployments.get(cursor.retryOf);
1157
+ }
1158
+ attemptNumber++; // this new deployment is one more
1159
+
1160
+ // Validate artifact still exists
1161
+ const artifact = artifactStore.get(getArtifactId(source) ?? "");
1162
+ if (!artifact) {
1163
+ return reply.status(404).send({ error: `Artifact not found: ${getArtifactId(source)}` });
1164
+ }
1165
+
1166
+ // Validate environment still exists (if present on source)
1167
+ const environment = source.environmentId ? environments.get(source.environmentId) : undefined;
1168
+ if (source.environmentId && !environment) {
1169
+ return reply.status(404).send({ error: `Environment not found: ${source.environmentId}` });
1170
+ }
1171
+
1172
+ // Validate partition still exists (if present on source)
1173
+ const partition = source.partitionId ? partitions.get(source.partitionId) : undefined;
1174
+ if (source.partitionId && !partition) {
1175
+ return reply.status(404).send({ error: `Partition not found: ${source.partitionId}` });
1176
+ }
1177
+
1178
+ // Validate envoy still exists (if present on source)
1179
+ const targetEnvoy = source.envoyId ? envoyRegistry?.get(source.envoyId) : undefined;
1180
+ if (source.envoyId && !targetEnvoy) {
1181
+ return reply.status(404).send({ error: `Envoy not found: ${source.envoyId}` });
1182
+ }
1183
+
1184
+ // Resolve variables — same logic as POST /api/deployments
1185
+ const envVars = environment ? environment.variables : {};
1186
+ const partitionVars = partition?.variables ?? {};
1187
+ const resolved: Record<string, string> = { ...partitionVars, ...envVars };
1188
+
1189
+ const deployment = {
1190
+ id: crypto.randomUUID(),
1191
+ input: source.input,
1192
+ environmentId: source.environmentId,
1193
+ partitionId: source.partitionId,
1194
+ envoyId: targetEnvoy?.id,
1195
+ version: source.version ?? "",
1196
+ status: "pending" as const,
1197
+ variables: resolved,
1198
+ retryOf: source.id,
1199
+ debriefEntryIds: [] as string[],
1200
+ createdAt: new Date(),
1201
+ };
1202
+
1203
+ deployments.save(deployment);
1204
+
1205
+ const actor = (request.user?.email) ?? "anonymous";
1206
+ telemetry.record({ actor, action: "operation.created", target: { type: "deployment", id: deployment.id }, details: { artifactId: getArtifactId(source), environmentId: source.environmentId, partitionId: source.partitionId, envoyId: source.envoyId, retryOf: source.id } });
1207
+
1208
+ // Record retry debrief entry
1209
+ debrief.record({
1210
+ partitionId: deployment.partitionId ?? null,
1211
+ operationId: deployment.id,
1212
+ agent: "server",
1213
+ decisionType: "system",
1214
+ decision: `Retry of operation ${source.id} (attempt #${attemptNumber})`,
1215
+ reasoning: `User initiated retry of operation ${source.id}. Same artifact, version, environment, and partition.`,
1216
+ context: { retryOf: source.id, attemptNumber, actor },
1217
+ actor: request.user?.email,
1218
+ });
1219
+
1220
+ // Dispatch planning — same logic as POST /api/deployments
1221
+ if (envoyRegistry) {
1222
+ const planningEnvoy = targetEnvoy
1223
+ ?? (environment ? envoyRegistry.findForEnvironment(environment.name) : undefined)
1224
+ ?? envoyRegistry.list()[0];
1225
+
1226
+ if (planningEnvoy) {
1227
+ const planningClient = new EnvoyClient(planningEnvoy.url);
1228
+ const environmentForPlanning = environment
1229
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
1230
+ : { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
1231
+
1232
+ planningClient.requestPlan({
1233
+ operationId: deployment.id,
1234
+ artifact: {
1235
+ id: artifact.id,
1236
+ name: artifact.name,
1237
+ type: artifact.type,
1238
+ analysis: {
1239
+ summary: artifact.analysis.summary,
1240
+ dependencies: artifact.analysis.dependencies,
1241
+ configurationExpectations: artifact.analysis.configurationExpectations,
1242
+ deploymentIntent: artifact.analysis.deploymentIntent,
1243
+ confidence: artifact.analysis.confidence,
1244
+ },
1245
+ },
1246
+ environment: environmentForPlanning,
1247
+ partition: partition
1248
+ ? { id: partition.id, name: partition.name, variables: partition.variables }
1249
+ : undefined,
1250
+ version: deployment.version ?? "",
1251
+ resolvedVariables: resolved,
1252
+ }).then((result) => {
1253
+ const dep = deployments.get(deployment.id);
1254
+ if (!dep || dep.status !== "pending") return;
1255
+
1256
+ dep.plan = result.plan;
1257
+ dep.rollbackPlan = result.rollbackPlan;
1258
+ dep.envoyId = planningEnvoy.id;
1259
+
1260
+ if (result.blocked) {
1261
+ dep.status = "failed" as typeof dep.status;
1262
+ dep.failureReason = result.blockReason ?? "Plan blocked due to unrecoverable precondition failures";
1263
+ deployments.save(dep);
1264
+
1265
+ debrief.record({
1266
+ partitionId: dep.partitionId ?? null,
1267
+ operationId: dep.id,
1268
+ agent: "envoy",
1269
+ decisionType: "plan-generation",
1270
+ decision: `Operation plan blocked — infrastructure prerequisites not met`,
1271
+ reasoning: result.blockReason ?? result.plan.reasoning,
1272
+ context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, blocked: true },
1273
+ });
1274
+ } else {
1275
+ dep.status = "awaiting_approval" as typeof dep.status;
1276
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
1277
+ deployments.save(dep);
1278
+
1279
+ debrief.record({
1280
+ partitionId: dep.partitionId ?? null,
1281
+ operationId: dep.id,
1282
+ agent: "envoy",
1283
+ decisionType: "plan-generation",
1284
+ decision: `Operation plan generated with ${result.plan.steps.length} steps`,
1285
+ reasoning: result.plan.reasoning,
1286
+ context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, delta: result.delta },
1287
+ });
1288
+ }
1289
+ }).catch((err) => {
1290
+ const dep = deployments.get(deployment.id);
1291
+ if (!dep || dep.status !== "pending") return;
1292
+
1293
+ dep.status = "failed" as typeof dep.status;
1294
+ dep.failureReason = err instanceof Error ? err.message : "Planning failed";
1295
+ deployments.save(dep);
1296
+
1297
+ debrief.record({
1298
+ partitionId: dep.partitionId ?? null,
1299
+ operationId: dep.id,
1300
+ agent: "server",
1301
+ decisionType: "deployment-failure",
1302
+ decision: "Envoy planning failed",
1303
+ reasoning: dep.failureReason!,
1304
+ context: { error: dep.failureReason, envoyId: planningEnvoy.id },
1305
+ });
1306
+ });
1307
+ }
1308
+ }
1309
+
1310
+ return reply.status(201).send({ deployment, sourceDeploymentId: source.id, attemptNumber });
1311
+ },
1312
+ );
1313
+
1314
+ // Get deployment postmortem
1315
+ app.get<{ Params: { id: string } }>(
1316
+ "/api/operations/:id/postmortem",
1317
+ { preHandler: [requirePermission("deployment.view")] },
1318
+ async (request, reply) => {
1319
+ const deployment = deployments.get(request.params.id);
1320
+ if (!deployment) {
1321
+ return reply.status(404).send({ error: "Operation not found" });
1322
+ }
1323
+
1324
+ const entries = debrief.getByOperation(deployment.id);
1325
+ const postmortem = generatePostmortem(entries, deployment);
1326
+ const llmResult = await generatePostmortemAsync(entries, deployment, llm);
1327
+ return {
1328
+ postmortem,
1329
+ ...(llmResult.heuristicFallback ? {} : { llmPostmortem: llmResult.llmPostmortem }),
1330
+ };
1331
+ },
1332
+ );
1333
+
1334
+ // Get recent debrief entries (supports filtering by partition, decision type, and full-text search)
1335
+ app.get("/api/debrief", { preHandler: [requirePermission("deployment.view")] }, async (request) => {
1336
+ const qParsed = DebriefQuerySchema.safeParse(request.query);
1337
+ const { limit, partitionId, decisionType, q: searchQuery } = qParsed.success ? qParsed.data : {};
1338
+
1339
+ const max = limit ?? 50;
1340
+
1341
+ // Full-text search — takes priority over filters
1342
+ if (searchQuery) {
1343
+ let entries = debrief.search(searchQuery, max);
1344
+ if (partitionId) entries = entries.filter((e) => e.partitionId === partitionId);
1345
+ if (decisionType) entries = entries.filter((e) => e.decisionType === decisionType);
1346
+ return { entries };
1347
+ }
1348
+
1349
+ // No filters — fast path
1350
+ if (!partitionId && !decisionType) {
1351
+ return { entries: debrief.getRecent(max) };
1352
+ }
1353
+
1354
+ // Start with the most selective filter, then narrow
1355
+ let entries: ReturnType<typeof debrief.getByPartition>;
1356
+ if (partitionId && decisionType) {
1357
+ entries = debrief.getByPartition(partitionId).filter(
1358
+ (e) => e.decisionType === decisionType,
1359
+ );
1360
+ } else if (partitionId) {
1361
+ entries = debrief.getByPartition(partitionId);
1362
+ } else {
1363
+ entries = debrief.getByType(decisionType as Parameters<typeof debrief.getByType>[0]);
1364
+ }
1365
+
1366
+ entries.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
1367
+ return { entries: entries.slice(0, max) };
1368
+ });
1369
+
1370
+ // Pin/unpin an operation for quick-access
1371
+ // Static route registered before parameterized :id routes to avoid shadowing
1372
+ app.get("/api/operations/pinned", { preHandler: [requirePermission("deployment.view")] }, async () => {
1373
+ const ids = debrief.getPinnedOperationIds();
1374
+ const operations = ids.map((id) => deployments.get(id)).filter(Boolean);
1375
+ return { operations, pinnedIds: ids };
1376
+ });
1377
+
1378
+ app.post<{ Params: { id: string } }>(
1379
+ "/api/operations/:id/pin",
1380
+ { preHandler: [requirePermission("deployment.view")] },
1381
+ async (request) => {
1382
+ debrief.pinOperation(request.params.id);
1383
+ return { pinned: true };
1384
+ },
1385
+ );
1386
+
1387
+ app.delete<{ Params: { id: string } }>(
1388
+ "/api/operations/:id/pin",
1389
+ { preHandler: [requirePermission("deployment.view")] },
1390
+ async (request) => {
1391
+ debrief.unpinOperation(request.params.id);
1392
+ return { pinned: false };
1393
+ },
1394
+ );
1395
+
1396
+ // ---------------------------------------------------------------------------
1397
+ // Progress streaming — envoy callback and SSE endpoints
1398
+ // ---------------------------------------------------------------------------
1399
+
1400
+ // POST /api/deployments/:id/progress — receives progress events from envoy
1401
+ app.post<{ Params: { id: string } }>(
1402
+ "/api/operations/:id/progress",
1403
+ async (request, reply) => {
1404
+ if (!progressStore) {
1405
+ return reply.status(501).send({ error: "Progress streaming not configured" });
1406
+ }
1407
+
1408
+ // Validate envoy token — this route is exempt from JWT auth
1409
+ if (envoyRegistry) {
1410
+ const authHeader = (request.headers.authorization ?? "") as string;
1411
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : null;
1412
+ if (!token || !envoyRegistry.validateToken(token)) {
1413
+ return reply.status(401).send({ error: "Invalid or missing envoy token" });
1414
+ }
1415
+ }
1416
+
1417
+ const parsed = ProgressEventSchema.safeParse(request.body);
1418
+ if (!parsed.success) {
1419
+ return reply.status(400).send({ error: "Invalid progress event", details: parsed.error.format() });
1420
+ }
1421
+
1422
+ const event = parsed.data;
1423
+
1424
+ // Validate the deploymentId in the URL matches the body
1425
+ if (event.deploymentId !== request.params.id) {
1426
+ return reply.status(400).send({ error: "Operation ID in URL does not match event body" });
1427
+ }
1428
+
1429
+ progressStore.push(event);
1430
+ return reply.status(200).send({ received: true });
1431
+ },
1432
+ );
1433
+
1434
+ // GET /api/deployments/:id/stream — SSE endpoint for live progress
1435
+ // Auth is via ?token= query param since EventSource cannot send headers
1436
+ app.get<{ Params: { id: string } }>(
1437
+ "/api/operations/:id/stream",
1438
+ { preHandler: [requirePermission("deployment.view")] },
1439
+ (request, reply) => {
1440
+ if (!progressStore) {
1441
+ reply.status(501).send({ error: "Progress streaming not configured" });
1442
+ return;
1443
+ }
1444
+
1445
+ // Hijack the connection so Fastify does not finalize the response
1446
+ reply.hijack();
1447
+
1448
+ // Set SSE headers
1449
+ reply.raw.writeHead(200, {
1450
+ "Content-Type": "text/event-stream",
1451
+ "Cache-Control": "no-cache",
1452
+ "Connection": "keep-alive",
1453
+ "X-Accel-Buffering": "no",
1454
+ });
1455
+
1456
+ const deploymentId = request.params.id;
1457
+
1458
+ // Check for Last-Event-ID header (reconnection with replay)
1459
+ const lastEventIdHeader = request.headers["last-event-id"];
1460
+ const lastEventId = lastEventIdHeader ? parseInt(String(lastEventIdHeader), 10) : 0;
1461
+
1462
+ // Send catch-up events — either all (fresh connect) or since last ID (reconnect)
1463
+ const existing = lastEventId
1464
+ ? progressStore.getEventsSince(deploymentId, lastEventId)
1465
+ : progressStore.getEvents(deploymentId);
1466
+ for (const event of existing) {
1467
+ reply.raw.write(`id: ${event.id}\ndata: ${JSON.stringify(event)}\n\n`);
1468
+ }
1469
+
1470
+ // Check if deployment already completed — if so, close after catch-up
1471
+ const lastEvent = existing[existing.length - 1];
1472
+ if (lastEvent?.type === "deployment-completed") {
1473
+ reply.raw.end();
1474
+ return;
1475
+ }
1476
+
1477
+ // Subscribe to new events
1478
+ const listener = (event: { id?: number; deploymentId: string; type: string }) => {
1479
+ try {
1480
+ reply.raw.write(`id: ${event.id}\ndata: ${JSON.stringify(event)}\n\n`);
1481
+
1482
+ // Close the stream when deployment completes
1483
+ if (event.type === "deployment-completed") {
1484
+ reply.raw.end();
1485
+ }
1486
+ } catch {
1487
+ // Client disconnected — clean up
1488
+ progressStore!.removeListener(deploymentId, listener);
1489
+ }
1490
+ };
1491
+
1492
+ progressStore.addListener(deploymentId, listener);
1493
+
1494
+ // Clean up on client disconnect
1495
+ request.raw.on("close", () => {
1496
+ progressStore!.removeListener(deploymentId, listener);
1497
+ });
1498
+ },
1499
+ );
1500
+
1501
+ // -- Health reports from envoys (trigger system) ---------------------------
1502
+
1503
+ app.post("/api/health-reports", async (request, reply) => {
1504
+ // Validate envoy token — same pattern as /api/envoy/report
1505
+ if (envoyRegistry) {
1506
+ const authHeader = (request.headers.authorization ?? "") as string;
1507
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : null;
1508
+ if (!token || !envoyRegistry.validateToken(token)) {
1509
+ return reply.status(401).send({ error: "Invalid or missing envoy token" });
1510
+ }
1511
+ }
1512
+
1513
+ const { HealthReportSchema } = await import("@synth-deploy/core");
1514
+ const parsed = HealthReportSchema.safeParse(request.body);
1515
+ if (!parsed.success) {
1516
+ return reply.status(400).send({ error: "Invalid health report", details: parsed.error.format() });
1517
+ }
1518
+
1519
+ const report = parsed.data;
1520
+
1521
+ // Find the trigger operation
1522
+ const triggerOp = deployments.get(report.triggerOperationId);
1523
+ if (!triggerOp || triggerOp.input.type !== "trigger") {
1524
+ return reply.status(404).send({ error: `Trigger operation not found: ${report.triggerOperationId}` });
1525
+ }
1526
+
1527
+ // Record the health report
1528
+ debrief.record({
1529
+ partitionId: report.partitionId ?? null,
1530
+ operationId: triggerOp.id,
1531
+ agent: "envoy",
1532
+ decisionType: "health-report-received",
1533
+ decision: `Health report: ${report.summary}`,
1534
+ reasoning: `Trigger condition met on ${report.envoyId}. Probes: ${report.probeResults.map(p => `${p.label}=${p.parsedValue ?? p.output}`).join(", ")}`,
1535
+ context: { directiveId: report.directiveId, envoyId: report.envoyId, probeResults: report.probeResults },
1536
+ });
1537
+
1538
+ // Deduplication: check for active child operations from this trigger
1539
+ const allOps = deployments.list();
1540
+ const activeChild = allOps.find(
1541
+ (op) => op.lineage === triggerOp.id &&
1542
+ ["pending", "planning", "awaiting_approval", "approved", "running"].includes(op.status),
1543
+ );
1544
+
1545
+ if (activeChild) {
1546
+ // Suppress — record that we suppressed
1547
+ triggerOp.triggerSuppressedCount = (triggerOp.triggerSuppressedCount ?? 0) + 1;
1548
+ deployments.save(triggerOp);
1549
+
1550
+ debrief.record({
1551
+ partitionId: report.partitionId ?? null,
1552
+ operationId: triggerOp.id,
1553
+ agent: "server",
1554
+ decisionType: "trigger-suppressed",
1555
+ decision: `Trigger suppressed — child operation ${activeChild.id} is still in progress (${activeChild.status})`,
1556
+ reasoning: `Deduplication: an operation spawned by this trigger is already active. Suppressed ${triggerOp.triggerSuppressedCount} time(s) total.`,
1557
+ context: { activeChildId: activeChild.id, activeChildStatus: activeChild.status, suppressedCount: triggerOp.triggerSuppressedCount },
1558
+ });
1559
+
1560
+ return reply.status(200).send({ spawned: false, reason: "deduplicated", activeChildId: activeChild.id });
1561
+ }
1562
+
1563
+ // Spawn child operation
1564
+ const triggerInput = triggerOp.input as { type: "trigger"; condition: string; responseIntent: string; parameters?: Record<string, unknown> };
1565
+ const responseType = triggerOp.monitoringDirective?.responseType ?? "maintain";
1566
+ const childOp = {
1567
+ id: crypto.randomUUID(),
1568
+ input: responseType === "deploy"
1569
+ ? { type: "deploy" as const, artifactId: "" }
1570
+ : { type: "maintain" as const, intent: triggerInput.responseIntent, parameters: triggerInput.parameters },
1571
+ intent: triggerInput.responseIntent,
1572
+ lineage: triggerOp.id,
1573
+ triggeredBy: "trigger" as const,
1574
+ environmentId: report.environmentId ?? triggerOp.environmentId,
1575
+ partitionId: report.partitionId ?? triggerOp.partitionId,
1576
+ envoyId: report.envoyId,
1577
+ version: "",
1578
+ status: "pending" as const,
1579
+ variables: triggerOp.variables,
1580
+ debriefEntryIds: [] as string[],
1581
+ createdAt: new Date(),
1582
+ };
1583
+
1584
+ deployments.save(childOp);
1585
+
1586
+ // Update trigger stats
1587
+ triggerOp.triggerFireCount = (triggerOp.triggerFireCount ?? 0) + 1;
1588
+ triggerOp.triggerLastFiredAt = new Date();
1589
+ deployments.save(triggerOp);
1590
+
1591
+ debrief.record({
1592
+ partitionId: childOp.partitionId ?? null,
1593
+ operationId: childOp.id,
1594
+ agent: "server",
1595
+ decisionType: "trigger-fired",
1596
+ decision: `Trigger fired — spawned child operation ${childOp.id}`,
1597
+ reasoning: `Condition "${triggerInput.condition}" met. Response: "${triggerInput.responseIntent}". Fire count: ${triggerOp.triggerFireCount}.`,
1598
+ context: { triggerId: triggerOp.id, envoyId: report.envoyId, fireCount: triggerOp.triggerFireCount },
1599
+ });
1600
+ telemetry.record({ actor: "agent", action: "trigger.fired" as TelemetryAction, target: { type: "trigger", id: triggerOp.id }, details: { childOperationId: childOp.id } });
1601
+
1602
+ // Dispatch planning for the child operation (same as new operation flow)
1603
+ if (envoyRegistry) {
1604
+ const childEnvoy = report.envoyId
1605
+ ? envoyRegistry.get(report.envoyId)
1606
+ : envoyRegistry.list()[0];
1607
+
1608
+ if (childEnvoy) {
1609
+ const planningClient = new EnvoyClient(childEnvoy.url);
1610
+ const environment = childOp.environmentId ? environments.get(childOp.environmentId) : undefined;
1611
+ const environmentForPlanning = environment
1612
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
1613
+ : { id: `direct:${childEnvoy.id}`, name: childEnvoy.name, variables: {} };
1614
+
1615
+ planningClient.requestPlan({
1616
+ operationId: childOp.id,
1617
+ operationType: responseType as "deploy" | "query" | "investigate" | "maintain" | "trigger",
1618
+ intent: childOp.intent,
1619
+ environment: environmentForPlanning,
1620
+ version: "",
1621
+ resolvedVariables: childOp.variables,
1622
+ }).then((result) => {
1623
+ const dep = deployments.get(childOp.id);
1624
+ if (!dep || dep.status !== "pending") return;
1625
+
1626
+ dep.plan = result.plan;
1627
+ dep.rollbackPlan = result.rollbackPlan;
1628
+ dep.envoyId = childEnvoy.id;
1629
+
1630
+ if (result.blocked) {
1631
+ dep.status = "failed" as typeof dep.status;
1632
+ dep.failureReason = result.blockReason ?? "Plan blocked";
1633
+ deployments.save(dep);
1634
+ } else {
1635
+ dep.status = "awaiting_approval" as typeof dep.status;
1636
+ dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
1637
+ deployments.save(dep);
1638
+ }
1639
+ }).catch((err) => {
1640
+ const dep = deployments.get(childOp.id);
1641
+ if (!dep || dep.status !== "pending") return;
1642
+ dep.status = "failed" as typeof dep.status;
1643
+ dep.failureReason = err instanceof Error ? err.message : "Planning failed";
1644
+ deployments.save(dep);
1645
+ });
1646
+ }
1647
+ }
1648
+
1649
+ return reply.status(201).send({ spawned: true, childOperationId: childOp.id });
1650
+ });
1651
+
1652
+ // -- Trigger management (pause/resume/disable) ----------------------------
1653
+
1654
+ app.post<{ Params: { id: string } }>(
1655
+ "/api/operations/:id/trigger/pause",
1656
+ { preHandler: [requirePermission("deployment.approve")] },
1657
+ async (request, reply) => {
1658
+ const op = deployments.get(request.params.id);
1659
+ if (!op || op.input.type !== "trigger") {
1660
+ return reply.status(404).send({ error: "Trigger operation not found" });
1661
+ }
1662
+ if (op.triggerStatus !== "active") {
1663
+ return reply.status(409).send({ error: `Cannot pause trigger in "${op.triggerStatus}" status` });
1664
+ }
1665
+
1666
+ // Pause on envoy
1667
+ if (op.envoyId && envoyRegistry) {
1668
+ const envoy = envoyRegistry.get(op.envoyId);
1669
+ if (envoy) {
1670
+ const client = new EnvoyClient(envoy.url);
1671
+ await client.pauseMonitoringDirective(op.id);
1672
+ }
1673
+ }
1674
+
1675
+ op.triggerStatus = "paused";
1676
+ if (op.monitoringDirective) op.monitoringDirective.status = "paused";
1677
+ deployments.save(op);
1678
+
1679
+ const actor = (request.user?.email) ?? "anonymous";
1680
+ debrief.record({
1681
+ partitionId: op.partitionId ?? null,
1682
+ operationId: op.id,
1683
+ agent: "server",
1684
+ decisionType: "trigger-paused",
1685
+ decision: `Trigger paused by ${actor}`,
1686
+ reasoning: "User requested trigger pause",
1687
+ context: {},
1688
+ actor: request.user?.email,
1689
+ });
1690
+ telemetry.record({ actor, action: "trigger.paused" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
1691
+
1692
+ return { operation: op, paused: true };
1693
+ },
1694
+ );
1695
+
1696
+ app.post<{ Params: { id: string } }>(
1697
+ "/api/operations/:id/trigger/resume",
1698
+ { preHandler: [requirePermission("deployment.approve")] },
1699
+ async (request, reply) => {
1700
+ const op = deployments.get(request.params.id);
1701
+ if (!op || op.input.type !== "trigger") {
1702
+ return reply.status(404).send({ error: "Trigger operation not found" });
1703
+ }
1704
+ if (op.triggerStatus !== "paused") {
1705
+ return reply.status(409).send({ error: `Cannot resume trigger in "${op.triggerStatus}" status` });
1706
+ }
1707
+
1708
+ // Resume on envoy
1709
+ if (op.envoyId && envoyRegistry) {
1710
+ const envoy = envoyRegistry.get(op.envoyId);
1711
+ if (envoy) {
1712
+ const client = new EnvoyClient(envoy.url);
1713
+ await client.resumeMonitoringDirective(op.id);
1714
+ }
1715
+ }
1716
+
1717
+ op.triggerStatus = "active";
1718
+ if (op.monitoringDirective) op.monitoringDirective.status = "active";
1719
+ deployments.save(op);
1720
+
1721
+ const actor = (request.user?.email) ?? "anonymous";
1722
+ debrief.record({
1723
+ partitionId: op.partitionId ?? null,
1724
+ operationId: op.id,
1725
+ agent: "server",
1726
+ decisionType: "trigger-resumed",
1727
+ decision: `Trigger resumed by ${actor}`,
1728
+ reasoning: "User requested trigger resume",
1729
+ context: {},
1730
+ actor: request.user?.email,
1731
+ });
1732
+ telemetry.record({ actor, action: "trigger.resumed" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
1733
+
1734
+ return { operation: op, resumed: true };
1735
+ },
1736
+ );
1737
+
1738
+ app.post<{ Params: { id: string } }>(
1739
+ "/api/operations/:id/trigger/disable",
1740
+ { preHandler: [requirePermission("deployment.approve")] },
1741
+ async (request, reply) => {
1742
+ const op = deployments.get(request.params.id);
1743
+ if (!op || op.input.type !== "trigger") {
1744
+ return reply.status(404).send({ error: "Trigger operation not found" });
1745
+ }
1746
+
1747
+ // Remove from envoy
1748
+ if (op.envoyId && envoyRegistry) {
1749
+ const envoy = envoyRegistry.get(op.envoyId);
1750
+ if (envoy) {
1751
+ const client = new EnvoyClient(envoy.url);
1752
+ await client.removeMonitoringDirective(op.id).catch(() => {});
1753
+ }
1754
+ }
1755
+
1756
+ op.triggerStatus = "disabled";
1757
+ if (op.monitoringDirective) op.monitoringDirective.status = "disabled";
1758
+ deployments.save(op);
1759
+
1760
+ const actor = (request.user?.email) ?? "anonymous";
1761
+ debrief.record({
1762
+ partitionId: op.partitionId ?? null,
1763
+ operationId: op.id,
1764
+ agent: "server",
1765
+ decisionType: "trigger-disabled",
1766
+ decision: `Trigger disabled by ${actor}`,
1767
+ reasoning: "User requested trigger disable",
1768
+ context: {},
1769
+ actor: request.user?.email,
1770
+ });
1771
+ telemetry.record({ actor, action: "trigger.disabled" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
1772
+
1773
+ return { operation: op, disabled: true };
1774
+ },
1775
+ );
1776
+
1777
+ // ---------------------------------------------------------------------------
1778
+ // Composite operation helpers — defined inside registerOperationRoutes so
1779
+ // they close over the stores and registry.
1780
+ // ---------------------------------------------------------------------------
1781
+
1782
+ async function planCompositeChildren(
1783
+ parentOp: import("@synth-deploy/core").Operation,
1784
+ _registry: EnvoyRegistry,
1785
+ planningEnvoy: { id: string; name: string; url: string },
1786
+ ): Promise<void> {
1787
+ const compositeInput = parentOp.input as { type: "composite"; operations: import("@synth-deploy/core").OperationInput[] };
1788
+ const childInputs = compositeInput.operations;
1789
+
1790
+ if (childInputs.length === 0) {
1791
+ const dep = deployments.get(parentOp.id);
1792
+ if (dep) {
1793
+ dep.status = "failed" as typeof dep.status;
1794
+ dep.failureReason = "Composite operation has no child operations";
1795
+ deployments.save(dep);
1796
+ }
1797
+ return;
1798
+ }
1799
+
1800
+ const childIds: string[] = [];
1801
+ const environment = parentOp.environmentId ? environments.get(parentOp.environmentId) : undefined;
1802
+ const partition = parentOp.partitionId ? partitions.get(parentOp.partitionId) : undefined;
1803
+
1804
+ for (let seqIdx = 0; seqIdx < childInputs.length; seqIdx++) {
1805
+ const childInput = childInputs[seqIdx];
1806
+ const childOp = {
1807
+ id: crypto.randomUUID(),
1808
+ input: childInput,
1809
+ intent: "intent" in childInput ? (childInput as { intent: string }).intent
1810
+ : childInput.type === "trigger" ? `Monitor: ${(childInput as { condition: string }).condition}`
1811
+ : undefined,
1812
+ lineage: parentOp.id,
1813
+ triggeredBy: "agent" as const,
1814
+ environmentId: parentOp.environmentId,
1815
+ partitionId: parentOp.partitionId,
1816
+ envoyId: planningEnvoy.id,
1817
+ version: parentOp.version ?? "",
1818
+ status: "pending" as const,
1819
+ variables: parentOp.variables,
1820
+ debriefEntryIds: [] as string[],
1821
+ createdAt: new Date(),
1822
+ sequenceIndex: seqIdx,
1823
+ };
1824
+ deployments.save(childOp);
1825
+ childIds.push(childOp.id);
1826
+ }
1827
+
1828
+ debrief.record({
1829
+ partitionId: parentOp.partitionId ?? null,
1830
+ operationId: parentOp.id,
1831
+ agent: "server",
1832
+ decisionType: "composite-started",
1833
+ decision: `Composite operation started — planning ${childIds.length} child operation(s) sequentially`,
1834
+ reasoning: `Sequential composite: ${childInputs.map((c) => c.type).join(" → ")}`,
1835
+ context: { childIds, childCount: childIds.length, sequence: childInputs.map((c) => c.type) },
1836
+ });
1837
+
1838
+ const environmentForPlanning = environment
1839
+ ? { id: environment.id, name: environment.name, variables: environment.variables }
1840
+ : { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
1841
+
1842
+ let anyFailed = false;
1843
+
1844
+ for (const childId of childIds) {
1845
+ const child = deployments.get(childId);
1846
+ if (!child) continue;
1847
+ const childInput = child.input;
1848
+
1849
+ const childArtifact = childInput.type === "deploy"
1850
+ ? artifactStore.get((childInput as { artifactId: string }).artifactId)
1851
+ : undefined;
1852
+
1853
+ const planningClient = new EnvoyClient(planningEnvoy.url);
1854
+
1855
+ try {
1856
+ const result = await planningClient.requestPlan({
1857
+ operationId: childId,
1858
+ operationType: childInput.type as "deploy" | "query" | "investigate" | "maintain" | "trigger",
1859
+ intent: "intent" in childInput ? (childInput as { intent?: string }).intent
1860
+ : childInput.type === "trigger" ? `Monitor: ${(childInput as { condition: string }).condition}`
1861
+ : undefined,
1862
+ ...(childArtifact ? {
1863
+ artifact: {
1864
+ id: childArtifact.id,
1865
+ name: childArtifact.name,
1866
+ type: childArtifact.type,
1867
+ analysis: childArtifact.analysis,
1868
+ },
1869
+ } : {}),
1870
+ ...(childInput.type === "investigate" && "allowWrite" in childInput
1871
+ ? { allowWrite: (childInput as { allowWrite?: boolean }).allowWrite }
1872
+ : {}),
1873
+ environment: environmentForPlanning,
1874
+ partition: partition ? { id: partition.id, name: partition.name, variables: partition.variables } : undefined,
1875
+ version: parentOp.version ?? "",
1876
+ resolvedVariables: parentOp.variables,
1877
+ });
1878
+
1879
+ const childDep = deployments.get(childId);
1880
+ if (!childDep) continue;
1881
+
1882
+ if (result.blocked) {
1883
+ childDep.status = "failed" as typeof childDep.status;
1884
+ childDep.failureReason = result.blockReason ?? "Plan blocked";
1885
+ deployments.save(childDep);
1886
+ anyFailed = true;
1887
+
1888
+ const parentDep = deployments.get(parentOp.id);
1889
+ if (parentDep && parentDep.status === "pending") {
1890
+ parentDep.status = "failed" as typeof parentDep.status;
1891
+ parentDep.failureReason = `Child operation (${childInput.type}) plan blocked: ${childDep.failureReason}`;
1892
+ deployments.save(parentDep);
1893
+ debrief.record({
1894
+ partitionId: parentDep.partitionId ?? null,
1895
+ operationId: parentDep.id,
1896
+ agent: "server",
1897
+ decisionType: "composite-failed",
1898
+ decision: `Child operation planning blocked — composite cannot proceed`,
1899
+ reasoning: childDep.failureReason,
1900
+ context: { childId, childType: childInput.type },
1901
+ });
1902
+ }
1903
+ break;
1904
+ }
1905
+
1906
+ childDep.plan = result.plan;
1907
+ childDep.rollbackPlan = result.rollbackPlan;
1908
+ childDep.envoyId = planningEnvoy.id;
1909
+ if (childInput.type === "query" && result.queryFindings) childDep.queryFindings = result.queryFindings;
1910
+ if (childInput.type === "investigate" && result.investigationFindings) childDep.investigationFindings = result.investigationFindings;
1911
+ childDep.status = "awaiting_approval" as typeof childDep.status;
1912
+ deployments.save(childDep);
1913
+
1914
+ debrief.record({
1915
+ partitionId: childDep.partitionId ?? null,
1916
+ operationId: childDep.id,
1917
+ agent: "envoy",
1918
+ decisionType: "plan-generation",
1919
+ decision: `Child operation plan generated with ${result.plan.steps.length} steps`,
1920
+ reasoning: result.plan.reasoning,
1921
+ context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, parentOperationId: parentOp.id },
1922
+ });
1923
+ } catch (err) {
1924
+ const childDep = deployments.get(childId);
1925
+ if (childDep) {
1926
+ childDep.status = "failed" as typeof childDep.status;
1927
+ childDep.failureReason = err instanceof Error ? err.message : "Planning failed";
1928
+ deployments.save(childDep);
1929
+ }
1930
+ anyFailed = true;
1931
+
1932
+ const parentDep = deployments.get(parentOp.id);
1933
+ if (parentDep && parentDep.status === "pending") {
1934
+ parentDep.status = "failed" as typeof parentDep.status;
1935
+ parentDep.failureReason = `Child operation (${childInput.type}) planning failed: ${err instanceof Error ? err.message : "unknown error"}`;
1936
+ deployments.save(parentDep);
1937
+ debrief.record({
1938
+ partitionId: parentDep.partitionId ?? null,
1939
+ operationId: parentDep.id,
1940
+ agent: "server",
1941
+ decisionType: "composite-failed",
1942
+ decision: `Child operation planning failed — composite cannot proceed`,
1943
+ reasoning: parentDep.failureReason!,
1944
+ context: { childId, childType: childInput.type, error: parentDep.failureReason },
1945
+ });
1946
+ }
1947
+ break;
1948
+ }
1949
+ }
1950
+
1951
+ if (!anyFailed) {
1952
+ // All children planned — build combined summary plan and await approval
1953
+ const allChildren = childIds.map((id) => deployments.get(id)).filter(Boolean) as import("@synth-deploy/core").Operation[];
1954
+
1955
+ const combinedSteps = allChildren.flatMap((c, idx) => {
1956
+ if (!c.plan) return [];
1957
+ return c.plan.steps.map((step) => ({
1958
+ ...step,
1959
+ description: `[${idx + 1}/${allChildren.length}: ${c.input.type}] ${step.description}`,
1960
+ }));
1961
+ });
1962
+
1963
+ const combinedReasoning = allChildren.map((c, idx) =>
1964
+ `Step ${idx + 1} (${c.input.type}): ${c.plan?.reasoning ?? "no reasoning"}`
1965
+ ).join("\n\n");
1966
+
1967
+ const parentDep = deployments.get(parentOp.id);
1968
+ if (parentDep && parentDep.status === "pending") {
1969
+ parentDep.plan = { steps: combinedSteps, reasoning: combinedReasoning };
1970
+ parentDep.rollbackPlan = { steps: [], reasoning: "Child operations handle their own rollback" };
1971
+ parentDep.status = "awaiting_approval" as typeof parentDep.status;
1972
+ parentDep.recommendation = computeRecommendation(parentDep, deployments);
1973
+ deployments.save(parentDep);
1974
+
1975
+ debrief.record({
1976
+ partitionId: parentDep.partitionId ?? null,
1977
+ operationId: parentDep.id,
1978
+ agent: "server",
1979
+ decisionType: "composite-plan-ready",
1980
+ decision: `All ${allChildren.length} child plans ready — composite awaiting approval`,
1981
+ reasoning: combinedReasoning,
1982
+ context: { childIds, totalSteps: combinedSteps.length },
1983
+ });
1984
+ }
1985
+ }
1986
+ }
1987
+
1988
+ async function executeCompositeSequentially(
1989
+ parentId: string,
1990
+ childIds: string[],
1991
+ ): Promise<void> {
1992
+ const parentOp = deployments.get(parentId);
1993
+ if (!parentOp) return;
1994
+
1995
+ debrief.record({
1996
+ partitionId: parentOp.partitionId ?? null,
1997
+ operationId: parentOp.id,
1998
+ agent: "server",
1999
+ decisionType: "composite-started",
2000
+ decision: `Composite execution started — running ${childIds.length} child operations sequentially`,
2001
+ reasoning: `Composite operation approved — executing children in order`,
2002
+ context: { childIds, totalChildren: childIds.length },
2003
+ });
2004
+
2005
+ for (let i = 0; i < childIds.length; i++) {
2006
+ const childId = childIds[i];
2007
+ const child = deployments.get(childId);
2008
+ if (!child || !child.plan || !child.rollbackPlan) {
2009
+ const dep = deployments.get(parentId);
2010
+ if (dep) {
2011
+ dep.status = "failed" as typeof dep.status;
2012
+ dep.failureReason = `Child operation ${i + 1} has no plan — cannot execute`;
2013
+ deployments.save(dep);
2014
+ debrief.record({
2015
+ partitionId: dep.partitionId ?? null,
2016
+ operationId: dep.id,
2017
+ agent: "server",
2018
+ decisionType: "composite-failed",
2019
+ decision: `Child operation ${i + 1} missing plan — composite failed`,
2020
+ reasoning: dep.failureReason!,
2021
+ context: { childId, childIndex: i },
2022
+ });
2023
+ }
2024
+ return;
2025
+ }
2026
+
2027
+ const targetEnvoy = child.envoyId ? envoyRegistry?.get(child.envoyId) : envoyRegistry?.list()[0];
2028
+ if (!targetEnvoy) {
2029
+ const dep = deployments.get(parentId);
2030
+ if (dep) {
2031
+ dep.status = "failed" as typeof dep.status;
2032
+ dep.failureReason = `No envoy available for child operation ${i + 1}`;
2033
+ deployments.save(dep);
2034
+ }
2035
+ return;
2036
+ }
2037
+
2038
+ child.status = "running" as typeof child.status;
2039
+ deployments.save(child);
2040
+
2041
+ debrief.record({
2042
+ partitionId: child.partitionId ?? null,
2043
+ operationId: child.id,
2044
+ agent: "server",
2045
+ decisionType: "composite-child-started",
2046
+ decision: `Executing child operation ${i + 1}/${childIds.length} (${child.input.type})`,
2047
+ reasoning: `Sequential composite execution — child ${i + 1} of ${childIds.length}`,
2048
+ context: { childId, childIndex: i, parentOperationId: parentId, childType: child.input.type },
2049
+ });
2050
+
2051
+ const artifact = artifactStore.get(getArtifactId(child) ?? "");
2052
+ const serverPort = process.env.PORT ?? "9410";
2053
+ const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
2054
+ const progressCallbackUrl = `${serverUrl}/api/operations/${child.id}/progress`;
2055
+ const callbackToken = envoyRegistry?.list().find((r) => r.url === (targetEnvoy as { url: string }).url)?.token;
2056
+
2057
+ const childEnvoyClient = new EnvoyClient((targetEnvoy as { url: string }).url);
2058
+
2059
+ try {
2060
+ await childEnvoyClient.executeApprovedPlan({
2061
+ operationId: child.id,
2062
+ plan: child.plan,
2063
+ rollbackPlan: child.rollbackPlan,
2064
+ artifactType: artifact?.type ?? "unknown",
2065
+ artifactName: artifact?.name ?? "unknown",
2066
+ environmentId: child.environmentId ?? "",
2067
+ progressCallbackUrl,
2068
+ callbackToken,
2069
+ });
2070
+ } catch (err) {
2071
+ const dep = deployments.get(parentId);
2072
+ if (dep) {
2073
+ dep.status = "failed" as typeof dep.status;
2074
+ dep.failureReason = `Child operation ${i + 1} (${child.input.type}) execution dispatch failed: ${err instanceof Error ? err.message : "unknown error"}`;
2075
+ dep.completedAt = new Date();
2076
+ deployments.save(dep);
2077
+ debrief.record({
2078
+ partitionId: dep.partitionId ?? null,
2079
+ operationId: dep.id,
2080
+ agent: "server",
2081
+ decisionType: "composite-failed",
2082
+ decision: `Child operation ${i + 1} execution dispatch failed`,
2083
+ reasoning: dep.failureReason!,
2084
+ context: { childId, childIndex: i, error: dep.failureReason },
2085
+ });
2086
+ }
2087
+ return;
2088
+ }
2089
+
2090
+ // Wait for child to complete (poll every 2 seconds, 5-minute timeout)
2091
+ const timeoutMs = 300_000;
2092
+ const pollIntervalMs = 2_000;
2093
+ const start = Date.now();
2094
+ let childSucceeded = false;
2095
+
2096
+ while (Date.now() - start < timeoutMs) {
2097
+ await new Promise<void>((resolve) => setTimeout(resolve, pollIntervalMs));
2098
+ const updated = deployments.get(childId);
2099
+ if (updated?.status === "succeeded") {
2100
+ childSucceeded = true;
2101
+ break;
2102
+ }
2103
+ if (updated?.status === "failed" || updated?.status === "rolled_back" || updated?.status === "cancelled") {
2104
+ break;
2105
+ }
2106
+ // Stop if the parent was externally cancelled or failed while we were waiting
2107
+ const parentNow = deployments.get(parentId);
2108
+ if (!parentNow || parentNow.status === "failed" || parentNow.status === "cancelled") {
2109
+ return;
2110
+ }
2111
+ }
2112
+
2113
+ const finalChild = deployments.get(childId);
2114
+ if (!childSucceeded) {
2115
+ const reason = finalChild?.failureReason ?? `Child operation ${i + 1} did not complete in time`;
2116
+ const dep = deployments.get(parentId);
2117
+ if (dep) {
2118
+ dep.status = "failed" as typeof dep.status;
2119
+ dep.failureReason = `Composite stopped at step ${i + 1}/${childIds.length} (${child.input.type}): ${reason}`;
2120
+ dep.completedAt = new Date();
2121
+ deployments.save(dep);
2122
+ debrief.record({
2123
+ partitionId: dep.partitionId ?? null,
2124
+ operationId: dep.id,
2125
+ agent: "server",
2126
+ decisionType: "composite-failed",
2127
+ decision: `Composite stopped at child ${i + 1}/${childIds.length} — ${child.input.type} failed`,
2128
+ reasoning: dep.failureReason!,
2129
+ context: { childId, childIndex: i, failedChildType: child.input.type, completedChildren: i },
2130
+ });
2131
+ }
2132
+ return;
2133
+ }
2134
+
2135
+ debrief.record({
2136
+ partitionId: finalChild?.partitionId ?? null,
2137
+ operationId: childId,
2138
+ agent: "server",
2139
+ decisionType: "composite-child-completed",
2140
+ decision: `Child operation ${i + 1}/${childIds.length} (${child.input.type}) completed successfully`,
2141
+ reasoning: `Child execution succeeded — proceeding to next child`,
2142
+ context: { childId, childIndex: i, parentOperationId: parentId },
2143
+ });
2144
+ }
2145
+
2146
+ // All children succeeded
2147
+ const dep = deployments.get(parentId);
2148
+ if (dep) {
2149
+ dep.status = "succeeded" as typeof dep.status;
2150
+ dep.completedAt = new Date();
2151
+ deployments.save(dep);
2152
+ debrief.record({
2153
+ partitionId: dep.partitionId ?? null,
2154
+ operationId: dep.id,
2155
+ agent: "server",
2156
+ decisionType: "composite-completed",
2157
+ decision: `Composite operation completed — all ${childIds.length} child operations succeeded`,
2158
+ reasoning: `All child operations executed successfully in sequence`,
2159
+ context: { childIds, totalChildren: childIds.length },
2160
+ });
2161
+ }
2162
+ }
2163
+ }
2164
+
2165
+ // ---------------------------------------------------------------------------
2166
+ // Recommendation engine — synthesizes enrichment context into a verdict
2167
+ // ---------------------------------------------------------------------------
2168
+
2169
+ function computeRecommendation(
2170
+ deployment: import("@synth-deploy/core").Deployment,
2171
+ store: IDeploymentStore,
2172
+ llmSummary?: string,
2173
+ ): import("@synth-deploy/core").DeploymentRecommendation {
2174
+ const factors: string[] = [];
2175
+ let verdict: RecommendationVerdict = "proceed";
2176
+
2177
+ const now = new Date();
2178
+ const twentyFourHoursAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000);
2179
+
2180
+ // Check for previously rolled-back version
2181
+ if (deployment.version) {
2182
+ const rolledBack = store.findByArtifactVersion(
2183
+ getArtifactId(deployment) ?? "",
2184
+ deployment.version,
2185
+ "rolled_back",
2186
+ );
2187
+ if (rolledBack.length > 0) {
2188
+ verdict = "caution";
2189
+ factors.push("This artifact version was previously rolled back");
2190
+ }
2191
+ }
2192
+
2193
+ // Check for conflicting deployments (only meaningful when environmentId is set)
2194
+ if (deployment.environmentId) {
2195
+ const conflicting = store.list().filter(
2196
+ (d) =>
2197
+ d.environmentId === deployment.environmentId &&
2198
+ d.id !== deployment.id &&
2199
+ ((d.status) === "running" || (d.status) === "approved"),
2200
+ );
2201
+ if (conflicting.length > 0) {
2202
+ verdict = "hold";
2203
+ factors.push(`${conflicting.length} other operation(s) in progress for this environment`);
2204
+ }
2205
+ }
2206
+
2207
+ // Check deployment frequency
2208
+ const recentCount = deployment.environmentId
2209
+ ? store.countByEnvironment(deployment.environmentId, twentyFourHoursAgo)
2210
+ : 0;
2211
+ if (recentCount > 5) {
2212
+ if (verdict === "proceed") verdict = "caution";
2213
+ factors.push(`High operation frequency: ${recentCount} operations in the last 24h`);
2214
+ }
2215
+
2216
+ // Check last deployment status
2217
+ const lastDeploy = deployment.environmentId
2218
+ ? store.findLatestByEnvironment(deployment.environmentId)
2219
+ : undefined;
2220
+ if (lastDeploy && lastDeploy.id !== deployment.id) {
2221
+ if ((lastDeploy.status) === "failed" || (lastDeploy.status) === "rolled_back") {
2222
+ if (verdict === "proceed") verdict = "caution";
2223
+ factors.push(`Last operation to this environment ${lastDeploy.status}`);
2224
+ } else if ((lastDeploy.status) === "succeeded") {
2225
+ factors.push("Last operation to this environment succeeded");
2226
+ }
2227
+ }
2228
+
2229
+ if (factors.length === 0) {
2230
+ factors.push("No risk factors detected — target is stable");
2231
+ }
2232
+
2233
+ const summaryMap: Record<RecommendationVerdict, string> = {
2234
+ proceed: "Proceed — no conflicting operations, target environment is stable",
2235
+ caution: "Proceed with caution — review risk factors before greenlighting",
2236
+ hold: "Hold — resolve conflicting operations before proceeding",
2237
+ };
2238
+
2239
+ return { verdict, summary: llmSummary ?? summaryMap[verdict], factors };
2240
+ }