@synth-deploy/server 0.1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/envoy-client.d.ts +62 -7
- package/dist/agent/envoy-client.d.ts.map +1 -1
- package/dist/agent/envoy-client.js +56 -6
- package/dist/agent/envoy-client.js.map +1 -1
- package/dist/agent/stale-deployment-detector.js +1 -1
- package/dist/agent/stale-deployment-detector.js.map +1 -1
- package/dist/agent/synth-agent.d.ts +7 -5
- package/dist/agent/synth-agent.d.ts.map +1 -1
- package/dist/agent/synth-agent.js +42 -39
- package/dist/agent/synth-agent.js.map +1 -1
- package/dist/alert-webhooks/alert-parsers.d.ts +21 -0
- package/dist/alert-webhooks/alert-parsers.d.ts.map +1 -0
- package/dist/alert-webhooks/alert-parsers.js +184 -0
- package/dist/alert-webhooks/alert-parsers.js.map +1 -0
- package/dist/api/agent.d.ts +0 -6
- package/dist/api/agent.d.ts.map +1 -1
- package/dist/api/agent.js +6 -459
- package/dist/api/agent.js.map +1 -1
- package/dist/api/alert-webhooks.d.ts +13 -0
- package/dist/api/alert-webhooks.d.ts.map +1 -0
- package/dist/api/alert-webhooks.js +279 -0
- package/dist/api/alert-webhooks.js.map +1 -0
- package/dist/api/envoy-reports.js +2 -2
- package/dist/api/envoy-reports.js.map +1 -1
- package/dist/api/envoys.js +1 -1
- package/dist/api/envoys.js.map +1 -1
- package/dist/api/fleet.d.ts.map +1 -1
- package/dist/api/fleet.js +14 -15
- package/dist/api/fleet.js.map +1 -1
- package/dist/api/graph.js +3 -3
- package/dist/api/graph.js.map +1 -1
- package/dist/api/operations.d.ts +7 -0
- package/dist/api/operations.d.ts.map +1 -0
- package/dist/api/operations.js +1883 -0
- package/dist/api/operations.js.map +1 -0
- package/dist/api/partitions.js +1 -1
- package/dist/api/partitions.js.map +1 -1
- package/dist/api/schemas.d.ts +194 -10
- package/dist/api/schemas.d.ts.map +1 -1
- package/dist/api/schemas.js +38 -5
- package/dist/api/schemas.js.map +1 -1
- package/dist/api/system.d.ts.map +1 -1
- package/dist/api/system.js +22 -21
- package/dist/api/system.js.map +1 -1
- package/dist/artifact-analyzer.js +2 -2
- package/dist/artifact-analyzer.js.map +1 -1
- package/dist/fleet/fleet-executor.js +1 -1
- package/dist/fleet/fleet-executor.js.map +1 -1
- package/dist/graph/graph-executor.js +2 -2
- package/dist/graph/graph-executor.js.map +1 -1
- package/dist/index.js +44 -40
- package/dist/index.js.map +1 -1
- package/dist/mcp/resources.js +3 -3
- package/dist/mcp/resources.js.map +1 -1
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +2 -9
- package/dist/mcp/tools.js.map +1 -1
- package/dist/middleware/auth.js +1 -1
- package/dist/middleware/auth.js.map +1 -1
- package/package.json +1 -1
- package/src/agent/envoy-client.ts +107 -15
- package/src/agent/stale-deployment-detector.ts +1 -1
- package/src/agent/synth-agent.ts +59 -45
- package/src/alert-webhooks/alert-parsers.ts +291 -0
- package/src/api/agent.ts +9 -528
- package/src/api/alert-webhooks.ts +354 -0
- package/src/api/envoy-reports.ts +2 -2
- package/src/api/envoys.ts +1 -1
- package/src/api/fleet.ts +14 -15
- package/src/api/graph.ts +3 -3
- package/src/api/operations.ts +2240 -0
- package/src/api/partitions.ts +1 -1
- package/src/api/schemas.ts +43 -7
- package/src/api/system.ts +23 -21
- package/src/artifact-analyzer.ts +2 -2
- package/src/fleet/fleet-executor.ts +1 -1
- package/src/graph/graph-executor.ts +2 -2
- package/src/index.ts +46 -40
- package/src/mcp/resources.ts +3 -3
- package/src/mcp/tools.ts +5 -9
- package/src/middleware/auth.ts +1 -1
- package/tests/agent-mode.test.ts +5 -376
- package/tests/api-handlers.test.ts +27 -27
- package/tests/composite-operations.test.ts +557 -0
- package/tests/decision-diary.test.ts +62 -63
- package/tests/diary-reader.test.ts +14 -18
- package/tests/mcp-tools.test.ts +1 -1
- package/tests/orchestration.test.ts +34 -30
- package/tests/partition-isolation.test.ts +4 -9
- package/tests/rbac-enforcement.test.ts +8 -8
- package/tests/ui-journey.test.ts +9 -9
- package/dist/api/deployments.d.ts +0 -11
- package/dist/api/deployments.d.ts.map +0 -1
- package/dist/api/deployments.js +0 -1098
- package/dist/api/deployments.js.map +0 -1
- package/src/api/deployments.ts +0 -1347
|
@@ -0,0 +1,2240 @@
|
|
|
1
|
+
import type { FastifyInstance } from "fastify";
|
|
2
|
+
import { generatePostmortem, generatePostmortemAsync, resolveApprovalMode } from "@synth-deploy/core";
|
|
3
|
+
import type { LlmClient, IPartitionStore, IEnvironmentStore, IArtifactStore, ISettingsStore, IDeploymentStore, ITelemetryStore, DebriefWriter, DebriefReader, DebriefPinStore, DeploymentEnrichment, RecommendationVerdict, TelemetryAction } from "@synth-deploy/core";
|
|
4
|
+
import { requirePermission } from "../middleware/permissions.js";
|
|
5
|
+
import {
|
|
6
|
+
CreateOperationSchema,
|
|
7
|
+
ApproveDeploymentSchema,
|
|
8
|
+
RejectDeploymentSchema,
|
|
9
|
+
ModifyDeploymentPlanSchema,
|
|
10
|
+
SubmitPlanSchema,
|
|
11
|
+
DeploymentListQuerySchema,
|
|
12
|
+
DebriefQuerySchema,
|
|
13
|
+
ProgressEventSchema,
|
|
14
|
+
ReplanDeploymentSchema,
|
|
15
|
+
} from "./schemas.js";
|
|
16
|
+
import type { ProgressEventStore } from "./progress-event-store.js";
|
|
17
|
+
import { EnvoyClient } from "../agent/envoy-client.js";
|
|
18
|
+
import type { EnvoyRegistry } from "../agent/envoy-registry.js";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* REST API routes for deployments. These are the traditional (non-MCP) interface
|
|
22
|
+
* for the web UI and integrations.
|
|
23
|
+
*/
|
|
24
|
+
function getArtifactId(op: { input: import("@synth-deploy/core").OperationInput }): string | undefined {
|
|
25
|
+
return op.input.type === "deploy" ? op.input.artifactId : undefined;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function registerOperationRoutes(
|
|
29
|
+
app: FastifyInstance,
|
|
30
|
+
deployments: IDeploymentStore,
|
|
31
|
+
debrief: DebriefWriter & DebriefReader & DebriefPinStore,
|
|
32
|
+
partitions: IPartitionStore,
|
|
33
|
+
environments: IEnvironmentStore,
|
|
34
|
+
artifactStore: IArtifactStore,
|
|
35
|
+
settings: ISettingsStore,
|
|
36
|
+
telemetry: ITelemetryStore,
|
|
37
|
+
progressStore?: ProgressEventStore,
|
|
38
|
+
envoyClient?: EnvoyClient,
|
|
39
|
+
envoyRegistry?: EnvoyRegistry,
|
|
40
|
+
llm?: LlmClient,
|
|
41
|
+
): void {
|
|
42
|
+
|
|
43
|
+
// Create a deployment (plan phase)
|
|
44
|
+
app.post("/api/operations", { preHandler: [requirePermission("deployment.create")] }, async (request, reply) => {
|
|
45
|
+
const parsed = CreateOperationSchema.safeParse(request.body);
|
|
46
|
+
if (!parsed.success) {
|
|
47
|
+
return reply.status(400).send({ error: parsed.error.message });
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const { artifactId, environmentId, partitionId, envoyId, version, type: operationType, intent, allowWrite, condition, responseIntent, parentOperationId, requireApproval } = parsed.data;
|
|
51
|
+
|
|
52
|
+
// Validate artifact exists (required for deploy operations)
|
|
53
|
+
if (operationType === "deploy" && !artifactId) {
|
|
54
|
+
return reply.status(400).send({ error: "artifactId is required for deploy operations" });
|
|
55
|
+
}
|
|
56
|
+
const artifact = artifactId ? artifactStore.get(artifactId) : undefined;
|
|
57
|
+
if (operationType === "deploy" && !artifact) {
|
|
58
|
+
return reply.status(404).send({ error: `Artifact not found: ${artifactId}` });
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Validate environment exists (optional when targeting a partition or envoy)
|
|
62
|
+
const environment = environmentId ? environments.get(environmentId) : undefined;
|
|
63
|
+
if (environmentId && !environment) {
|
|
64
|
+
return reply.status(404).send({ error: `Environment not found: ${environmentId}` });
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Validate partition if provided
|
|
68
|
+
const partition = partitionId ? partitions.get(partitionId) : undefined;
|
|
69
|
+
if (partitionId && !partition) {
|
|
70
|
+
return reply.status(404).send({ error: `Partition not found: ${partitionId}` });
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Validate envoy if provided
|
|
74
|
+
const targetEnvoy = envoyId ? envoyRegistry?.get(envoyId) : undefined;
|
|
75
|
+
if (envoyId && !targetEnvoy) {
|
|
76
|
+
return reply.status(404).send({ error: `Envoy not found: ${envoyId}` });
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Resolve variables — partition vars are base, environment vars take precedence if present
|
|
80
|
+
const envVars = environment ? environment.variables : {};
|
|
81
|
+
const partitionVars = partition?.variables ?? {};
|
|
82
|
+
const resolved: Record<string, string> = { ...partitionVars, ...envVars };
|
|
83
|
+
|
|
84
|
+
const operationInput = operationType === "deploy"
|
|
85
|
+
? { type: "deploy" as const, artifactId: artifactId!, ...(version ? { artifactVersionId: version } : {}) }
|
|
86
|
+
: operationType === "trigger"
|
|
87
|
+
? { type: "trigger" as const, condition: condition ?? intent ?? "", responseIntent: responseIntent ?? intent ?? "" }
|
|
88
|
+
: operationType === "composite"
|
|
89
|
+
? { type: "composite" as const, operations: (parsed.data.operations ?? []) as import("@synth-deploy/core").OperationInput[] }
|
|
90
|
+
: operationType === "investigate"
|
|
91
|
+
? { type: "investigate" as const, intent: intent ?? "", ...(allowWrite !== undefined ? { allowWrite } : {}) }
|
|
92
|
+
: { type: operationType as "maintain" | "query", intent: intent ?? "" };
|
|
93
|
+
|
|
94
|
+
const deployment = {
|
|
95
|
+
id: crypto.randomUUID(),
|
|
96
|
+
input: operationInput,
|
|
97
|
+
intent,
|
|
98
|
+
lineage: parentOperationId,
|
|
99
|
+
triggeredBy: parentOperationId ? ("user" as const) : undefined,
|
|
100
|
+
environmentId,
|
|
101
|
+
partitionId,
|
|
102
|
+
envoyId: targetEnvoy?.id,
|
|
103
|
+
version: version ?? "",
|
|
104
|
+
status: "pending" as const,
|
|
105
|
+
variables: resolved,
|
|
106
|
+
debriefEntryIds: [] as string[],
|
|
107
|
+
createdAt: new Date(),
|
|
108
|
+
...(requireApproval ? { forceManualApproval: true } : {}),
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
deployments.save(deployment);
|
|
112
|
+
telemetry.record({ actor: (request.user?.email) ?? "anonymous", action: "operation.created", target: { type: "deployment", id: deployment.id }, details: { artifactId, environmentId, partitionId, envoyId } });
|
|
113
|
+
|
|
114
|
+
// Dispatch planning to the appropriate envoy asynchronously.
|
|
115
|
+
// The envoy reasons about the deployment (read-only) and POSTs back a plan,
|
|
116
|
+
// which transitions the deployment to awaiting_approval.
|
|
117
|
+
if (envoyRegistry) {
|
|
118
|
+
// Find the target envoy: explicit envoyId > environment-assigned > first available
|
|
119
|
+
const planningEnvoy = targetEnvoy
|
|
120
|
+
?? (environment ? envoyRegistry.findForEnvironment(environment.name) : undefined)
|
|
121
|
+
?? envoyRegistry.list()[0];
|
|
122
|
+
|
|
123
|
+
const needsArtifact = deployment.input.type === "deploy";
|
|
124
|
+
if (planningEnvoy && (!needsArtifact || artifact)) {
|
|
125
|
+
const planningClient = new EnvoyClient(planningEnvoy.url);
|
|
126
|
+
const environmentForPlanning = environment
|
|
127
|
+
? { id: environment.id, name: environment.name, variables: environment.variables }
|
|
128
|
+
: { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
|
|
129
|
+
|
|
130
|
+
// Composite: orchestrate child planning separately — do not send composite to envoy directly
|
|
131
|
+
if (deployment.input.type === "composite") {
|
|
132
|
+
planCompositeChildren(deployment, envoyRegistry, planningEnvoy).catch((err) => {
|
|
133
|
+
const dep = deployments.get(deployment.id);
|
|
134
|
+
if (dep && (dep.status === "pending" || dep.status === "planning")) {
|
|
135
|
+
dep.status = "failed" as typeof dep.status;
|
|
136
|
+
dep.failureReason = `Composite planning failed unexpectedly: ${err instanceof Error ? err.message : String(err)}`;
|
|
137
|
+
deployments.save(dep);
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
planningClient.requestPlan({
|
|
144
|
+
operationId: deployment.id,
|
|
145
|
+
operationType: deployment.input.type as "deploy" | "query" | "investigate" | "maintain" | "trigger",
|
|
146
|
+
intent: deployment.intent ?? (deployment.input.type === "trigger"
|
|
147
|
+
? `Monitor: ${(deployment.input as { condition: string }).condition}. When triggered: ${(deployment.input as { responseIntent: string }).responseIntent}`
|
|
148
|
+
: undefined),
|
|
149
|
+
...(deployment.input.type === "trigger" ? {
|
|
150
|
+
triggerCondition: (deployment.input as { condition: string }).condition,
|
|
151
|
+
triggerResponseIntent: (deployment.input as { responseIntent: string }).responseIntent,
|
|
152
|
+
} : {}),
|
|
153
|
+
...(artifact ? {
|
|
154
|
+
artifact: {
|
|
155
|
+
id: artifact.id,
|
|
156
|
+
name: artifact.name,
|
|
157
|
+
type: artifact.type,
|
|
158
|
+
analysis: {
|
|
159
|
+
summary: artifact.analysis.summary,
|
|
160
|
+
dependencies: artifact.analysis.dependencies,
|
|
161
|
+
configurationExpectations: artifact.analysis.configurationExpectations,
|
|
162
|
+
deploymentIntent: artifact.analysis.deploymentIntent,
|
|
163
|
+
confidence: artifact.analysis.confidence,
|
|
164
|
+
},
|
|
165
|
+
},
|
|
166
|
+
} : {}),
|
|
167
|
+
...(deployment.input.type === "investigate" && "allowWrite" in deployment.input
|
|
168
|
+
? { allowWrite: deployment.input.allowWrite }
|
|
169
|
+
: {}),
|
|
170
|
+
environment: environmentForPlanning,
|
|
171
|
+
partition: partition
|
|
172
|
+
? { id: partition.id, name: partition.name, variables: partition.variables }
|
|
173
|
+
: undefined,
|
|
174
|
+
version: deployment.version ?? "",
|
|
175
|
+
resolvedVariables: resolved,
|
|
176
|
+
}).then((result) => {
|
|
177
|
+
const dep = deployments.get(deployment.id);
|
|
178
|
+
if (!dep || dep.status !== "pending") return;
|
|
179
|
+
|
|
180
|
+
dep.plan = result.plan;
|
|
181
|
+
dep.rollbackPlan = result.rollbackPlan;
|
|
182
|
+
dep.envoyId = planningEnvoy.id;
|
|
183
|
+
|
|
184
|
+
// Trigger operations: construct MonitoringDirective from plan, present for approval
|
|
185
|
+
if (dep.input.type === "trigger" && !result.blocked) {
|
|
186
|
+
const triggerInput = dep.input as { type: "trigger"; condition: string; responseIntent: string; parameters?: Record<string, unknown> };
|
|
187
|
+
// Convert plan steps to monitoring probes
|
|
188
|
+
const probes = result.plan.steps.map((step) => ({
|
|
189
|
+
command: step.action,
|
|
190
|
+
label: step.description,
|
|
191
|
+
parseAs: (step.params?.parseAs === "exitCode" ? "exitCode" : "numeric") as "numeric" | "exitCode",
|
|
192
|
+
}));
|
|
193
|
+
const directive: import("@synth-deploy/core").MonitoringDirective = {
|
|
194
|
+
id: dep.id,
|
|
195
|
+
operationId: dep.id,
|
|
196
|
+
probes: probes.length > 0 ? probes : [{
|
|
197
|
+
command: "echo 0",
|
|
198
|
+
label: "default-probe",
|
|
199
|
+
parseAs: "numeric" as const,
|
|
200
|
+
}],
|
|
201
|
+
intervalMs: result.intervalMs ?? 60_000,
|
|
202
|
+
cooldownMs: result.cooldownMs ?? 300_000,
|
|
203
|
+
condition: triggerInput.condition,
|
|
204
|
+
responseIntent: triggerInput.responseIntent,
|
|
205
|
+
responseType: "maintain",
|
|
206
|
+
responseParameters: triggerInput.parameters,
|
|
207
|
+
environmentId: dep.environmentId,
|
|
208
|
+
partitionId: dep.partitionId,
|
|
209
|
+
status: "active",
|
|
210
|
+
};
|
|
211
|
+
dep.monitoringDirective = directive;
|
|
212
|
+
dep.triggerStatus = "active";
|
|
213
|
+
dep.status = "awaiting_approval" as typeof dep.status;
|
|
214
|
+
dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
|
|
215
|
+
deployments.save(dep);
|
|
216
|
+
// Debrief plan-generation entry is recorded by the envoy's planTrigger — no duplicate here.
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Check approval mode for query/investigate operations with findings
|
|
221
|
+
if ((dep.input.type === "query" || dep.input.type === "investigate") &&
|
|
222
|
+
(result.queryFindings || result.investigationFindings)) {
|
|
223
|
+
if (result.queryFindings) dep.queryFindings = result.queryFindings;
|
|
224
|
+
if (result.investigationFindings) dep.investigationFindings = result.investigationFindings;
|
|
225
|
+
|
|
226
|
+
const currentSettings = settings.get();
|
|
227
|
+
const envLookup = (id: string) => environments.get(id)?.name;
|
|
228
|
+
const approvalMode = dep.forceManualApproval
|
|
229
|
+
? "required"
|
|
230
|
+
: resolveApprovalMode(dep.input.type, dep.environmentId, currentSettings, envLookup);
|
|
231
|
+
|
|
232
|
+
if (approvalMode === "auto") {
|
|
233
|
+
// Auto-approve — findings are the deliverable
|
|
234
|
+
dep.status = "succeeded" as typeof dep.status;
|
|
235
|
+
dep.completedAt = new Date();
|
|
236
|
+
deployments.save(dep);
|
|
237
|
+
|
|
238
|
+
const decisionType = dep.input.type === "query"
|
|
239
|
+
? "query-findings" as const
|
|
240
|
+
: "investigation-findings" as const;
|
|
241
|
+
const findings = result.queryFindings ?? result.investigationFindings!;
|
|
242
|
+
debrief.record({
|
|
243
|
+
partitionId: dep.partitionId ?? null,
|
|
244
|
+
operationId: dep.id,
|
|
245
|
+
agent: "envoy",
|
|
246
|
+
decisionType,
|
|
247
|
+
decision: `${dep.input.type === "query" ? "Query" : "Investigation"} complete — ${findings.targetsSurveyed.length} target(s) surveyed`,
|
|
248
|
+
reasoning: findings.summary,
|
|
249
|
+
context: { targetsSurveyed: findings.targetsSurveyed, findingCount: findings.findings.length },
|
|
250
|
+
});
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
// approvalMode === "required" — fall through to standard approval gate
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (result.blocked) {
|
|
257
|
+
// Unrecoverable precondition failures — block execution, do not present for approval
|
|
258
|
+
dep.status = "failed" as typeof dep.status;
|
|
259
|
+
dep.failureReason = result.blockReason ?? "Plan blocked due to unrecoverable precondition failures";
|
|
260
|
+
deployments.save(dep);
|
|
261
|
+
|
|
262
|
+
debrief.record({
|
|
263
|
+
partitionId: dep.partitionId ?? null,
|
|
264
|
+
operationId: dep.id,
|
|
265
|
+
agent: "envoy",
|
|
266
|
+
decisionType: "plan-generation",
|
|
267
|
+
decision: `Operation plan blocked — infrastructure prerequisites not met`,
|
|
268
|
+
reasoning: result.blockReason ?? result.plan.reasoning,
|
|
269
|
+
context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, blocked: true },
|
|
270
|
+
});
|
|
271
|
+
} else {
|
|
272
|
+
// Plan is valid — transition to awaiting_approval
|
|
273
|
+
dep.status = "awaiting_approval" as typeof dep.status;
|
|
274
|
+
dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
|
|
275
|
+
deployments.save(dep);
|
|
276
|
+
|
|
277
|
+
debrief.record({
|
|
278
|
+
partitionId: dep.partitionId ?? null,
|
|
279
|
+
operationId: dep.id,
|
|
280
|
+
agent: "envoy",
|
|
281
|
+
decisionType: "plan-generation",
|
|
282
|
+
decision: `Operation plan generated with ${result.plan.steps.length} steps`,
|
|
283
|
+
reasoning: result.plan.reasoning,
|
|
284
|
+
context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, delta: result.delta },
|
|
285
|
+
});
|
|
286
|
+
}
|
|
287
|
+
}).catch((err) => {
|
|
288
|
+
// Planning failed — mark deployment failed so UI doesn't wait forever
|
|
289
|
+
const dep = deployments.get(deployment.id);
|
|
290
|
+
if (!dep || dep.status !== "pending") return;
|
|
291
|
+
|
|
292
|
+
dep.status = "failed" as typeof dep.status;
|
|
293
|
+
dep.failureReason = err instanceof Error ? err.message : "Planning failed";
|
|
294
|
+
deployments.save(dep);
|
|
295
|
+
|
|
296
|
+
debrief.record({
|
|
297
|
+
partitionId: dep.partitionId ?? null,
|
|
298
|
+
operationId: dep.id,
|
|
299
|
+
agent: "server",
|
|
300
|
+
decisionType: "deployment-failure",
|
|
301
|
+
decision: "Envoy planning failed",
|
|
302
|
+
reasoning: dep.failureReason!,
|
|
303
|
+
context: { error: dep.failureReason, envoyId: planningEnvoy.id },
|
|
304
|
+
});
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return reply.status(201).send({ deployment });
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
// Get deployment by ID
|
|
313
|
+
app.get<{ Params: { id: string } }>("/api/operations/:id", { preHandler: [requirePermission("deployment.view")] }, async (request, reply) => {
|
|
314
|
+
const deployment = deployments.get(request.params.id);
|
|
315
|
+
if (!deployment) {
|
|
316
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
deployment,
|
|
321
|
+
debrief: debrief.getByOperation(deployment.id),
|
|
322
|
+
};
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
// What's New — compare deployed artifact version against catalog latest
|
|
326
|
+
app.get<{ Params: { id: string } }>("/api/operations/:id/whats-new", { preHandler: [requirePermission("deployment.view")] }, async (request, reply) => {
|
|
327
|
+
const deployment = deployments.get(request.params.id);
|
|
328
|
+
if (!deployment) {
|
|
329
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const versions = artifactStore.getVersions(getArtifactId(deployment) ?? "");
|
|
333
|
+
const sorted = versions.slice().sort(
|
|
334
|
+
(a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime(),
|
|
335
|
+
);
|
|
336
|
+
const latest = sorted[0] ?? null;
|
|
337
|
+
const deployedVersion = deployment.version;
|
|
338
|
+
const latestVersion = latest?.version ?? null;
|
|
339
|
+
const isLatest = latestVersion === null || latestVersion === deployedVersion;
|
|
340
|
+
|
|
341
|
+
return {
|
|
342
|
+
deployedVersion,
|
|
343
|
+
latestVersion,
|
|
344
|
+
isLatest,
|
|
345
|
+
latestCreatedAt: latest?.createdAt ? new Date(latest.createdAt).toISOString() : null,
|
|
346
|
+
};
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
// List deployments (optionally filtered by partition, artifact, or envoy)
|
|
350
|
+
app.get("/api/operations", { preHandler: [requirePermission("deployment.view")] }, async (request) => {
|
|
351
|
+
const qParsed = DeploymentListQuerySchema.safeParse(request.query);
|
|
352
|
+
const { partitionId, artifactId, envoyId } = qParsed.success ? qParsed.data : {};
|
|
353
|
+
|
|
354
|
+
let list;
|
|
355
|
+
if (partitionId) {
|
|
356
|
+
list = deployments.getByPartition(partitionId);
|
|
357
|
+
} else if (artifactId) {
|
|
358
|
+
list = deployments.getByArtifact(artifactId);
|
|
359
|
+
} else {
|
|
360
|
+
list = deployments.list();
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
if (envoyId) {
|
|
364
|
+
list = list.filter((d) => d.envoyId === envoyId);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
return { deployments: list };
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
// Submit a plan from envoy — transitions deployment to awaiting_approval
|
|
371
|
+
app.post<{ Params: { id: string } }>(
|
|
372
|
+
"/api/operations/:id/plan",
|
|
373
|
+
{ preHandler: [requirePermission("deployment.create")] },
|
|
374
|
+
async (request, reply) => {
|
|
375
|
+
const deployment = deployments.get(request.params.id);
|
|
376
|
+
if (!deployment) {
|
|
377
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
const parsed = SubmitPlanSchema.safeParse(request.body);
|
|
381
|
+
if (!parsed.success) {
|
|
382
|
+
return reply.status(400).send({ error: "Invalid plan submission", details: parsed.error.format() });
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if ((deployment.status) !== "pending" && (deployment.status) !== "planning") {
|
|
386
|
+
return reply.status(409).send({ error: `Cannot submit plan for operation in "${deployment.status}" status` });
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
deployment.plan = parsed.data.plan;
|
|
390
|
+
deployment.rollbackPlan = parsed.data.rollbackPlan;
|
|
391
|
+
deployment.status = "awaiting_approval" as typeof deployment.status;
|
|
392
|
+
|
|
393
|
+
// Generate recommendation from enrichment context
|
|
394
|
+
deployment.recommendation = computeRecommendation(deployment, deployments);
|
|
395
|
+
|
|
396
|
+
deployments.save(deployment);
|
|
397
|
+
|
|
398
|
+
debrief.record({
|
|
399
|
+
partitionId: deployment.partitionId ?? null,
|
|
400
|
+
operationId: deployment.id,
|
|
401
|
+
agent: "envoy",
|
|
402
|
+
decisionType: "plan-generation",
|
|
403
|
+
decision: `Operation plan submitted with ${parsed.data.plan.steps.length} steps`,
|
|
404
|
+
reasoning: parsed.data.plan.reasoning,
|
|
405
|
+
context: { stepCount: parsed.data.plan.steps.length },
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
return reply.status(200).send({ deployment });
|
|
409
|
+
},
|
|
410
|
+
);
|
|
411
|
+
|
|
412
|
+
// Approve a deployment plan
|
|
413
|
+
app.post<{ Params: { id: string } }>(
|
|
414
|
+
"/api/operations/:id/approve",
|
|
415
|
+
{ preHandler: [requirePermission("deployment.approve")] },
|
|
416
|
+
async (request, reply) => {
|
|
417
|
+
const deployment = deployments.get(request.params.id);
|
|
418
|
+
if (!deployment) {
|
|
419
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
const parsed = ApproveDeploymentSchema.safeParse(request.body);
|
|
423
|
+
if (!parsed.success) {
|
|
424
|
+
return reply.status(400).send({ error: parsed.error.message });
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if ((deployment.status) !== "awaiting_approval") {
|
|
428
|
+
return reply.status(409).send({ error: `Cannot approve operation in "${deployment.status}" status — must be "awaiting_approval"` });
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Transition deployment status
|
|
432
|
+
deployment.approvedBy = parsed.data.approvedBy;
|
|
433
|
+
deployment.approvedAt = new Date();
|
|
434
|
+
deployment.status = "approved" as typeof deployment.status;
|
|
435
|
+
deployments.save(deployment);
|
|
436
|
+
|
|
437
|
+
const actor = (request.user?.email) ?? parsed.data.approvedBy;
|
|
438
|
+
|
|
439
|
+
// Record approval in debrief
|
|
440
|
+
debrief.record({
|
|
441
|
+
partitionId: deployment.partitionId ?? null,
|
|
442
|
+
operationId: deployment.id,
|
|
443
|
+
agent: "server",
|
|
444
|
+
decisionType: "system",
|
|
445
|
+
decision: `Operation approved by ${actor}`,
|
|
446
|
+
reasoning: parsed.data.modifications
|
|
447
|
+
? `Approved with modifications: ${parsed.data.modifications}`
|
|
448
|
+
: "Approved without modifications",
|
|
449
|
+
context: { approvedBy: actor },
|
|
450
|
+
actor: request.user?.email,
|
|
451
|
+
});
|
|
452
|
+
telemetry.record({ actor, action: "operation.approved", target: { type: "deployment", id: deployment.id }, details: { modifications: parsed.data.modifications } });
|
|
453
|
+
telemetry.record({
|
|
454
|
+
actor,
|
|
455
|
+
action: parsed.data.modifications ? "agent.recommendation.overridden" : "agent.recommendation.followed",
|
|
456
|
+
target: { type: "deployment", id: deployment.id },
|
|
457
|
+
details: parsed.data.modifications
|
|
458
|
+
? { modifications: parsed.data.modifications }
|
|
459
|
+
: { planStepCount: deployment.plan?.steps.length ?? 0 },
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
// Composite operations: execute children sequentially
|
|
463
|
+
if (deployment.input.type === "composite") {
|
|
464
|
+
deployment.status = "running" as typeof deployment.status;
|
|
465
|
+
deployments.save(deployment);
|
|
466
|
+
|
|
467
|
+
const compositeChildren = deployments.list()
|
|
468
|
+
.filter((d) => d.lineage === deployment.id)
|
|
469
|
+
.sort((a, b) => ((a as { sequenceIndex?: number }).sequenceIndex ?? 0) - ((b as { sequenceIndex?: number }).sequenceIndex ?? 0));
|
|
470
|
+
|
|
471
|
+
// Approve all children before executing sequentially
|
|
472
|
+
for (const child of compositeChildren) {
|
|
473
|
+
child.approvedBy = parsed.data.approvedBy;
|
|
474
|
+
child.approvedAt = new Date();
|
|
475
|
+
child.status = "approved" as typeof child.status;
|
|
476
|
+
deployments.save(child);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
executeCompositeSequentially(deployment.id, compositeChildren.map((c) => c.id)).catch((err) => {
|
|
480
|
+
const dep = deployments.get(deployment.id);
|
|
481
|
+
if (dep && dep.status === "running") {
|
|
482
|
+
dep.status = "failed" as typeof dep.status;
|
|
483
|
+
dep.failureReason = `Composite execution failed unexpectedly: ${err instanceof Error ? err.message : String(err)}`;
|
|
484
|
+
dep.completedAt = new Date();
|
|
485
|
+
deployments.save(dep);
|
|
486
|
+
}
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
return { deployment, approved: true };
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Trigger operations: install monitoring directive on envoy
|
|
493
|
+
if (deployment.input.type === "trigger" && deployment.monitoringDirective && envoyRegistry) {
|
|
494
|
+
const targetEnvoyForTrigger = deployment.envoyId
|
|
495
|
+
? envoyRegistry.get(deployment.envoyId)
|
|
496
|
+
: envoyRegistry.list()[0];
|
|
497
|
+
|
|
498
|
+
if (targetEnvoyForTrigger) {
|
|
499
|
+
const triggerClient = new EnvoyClient(targetEnvoyForTrigger.url);
|
|
500
|
+
deployment.status = "running" as typeof deployment.status;
|
|
501
|
+
deployment.triggerStatus = "active";
|
|
502
|
+
deployments.save(deployment);
|
|
503
|
+
|
|
504
|
+
triggerClient.installMonitoringDirective(deployment.monitoringDirective).then(() => {
|
|
505
|
+
deployment.status = "succeeded" as typeof deployment.status;
|
|
506
|
+
deployment.completedAt = new Date();
|
|
507
|
+
deployments.save(deployment);
|
|
508
|
+
|
|
509
|
+
debrief.record({
|
|
510
|
+
partitionId: deployment.partitionId ?? null,
|
|
511
|
+
operationId: deployment.id,
|
|
512
|
+
agent: "server",
|
|
513
|
+
decisionType: "trigger-activated",
|
|
514
|
+
decision: `Monitoring directive installed on ${targetEnvoyForTrigger.name}`,
|
|
515
|
+
reasoning: `Trigger activated: monitoring "${deployment.monitoringDirective!.condition}" every ${deployment.monitoringDirective!.intervalMs / 1000}s with ${deployment.monitoringDirective!.cooldownMs / 1000}s cooldown`,
|
|
516
|
+
context: { envoyId: targetEnvoyForTrigger.id, directiveId: deployment.monitoringDirective!.id },
|
|
517
|
+
});
|
|
518
|
+
telemetry.record({ actor, action: "trigger.activated" as TelemetryAction, target: { type: "trigger", id: deployment.id }, details: { envoyId: targetEnvoyForTrigger.id } });
|
|
519
|
+
}).catch((err) => {
|
|
520
|
+
deployment.status = "failed" as typeof deployment.status;
|
|
521
|
+
deployment.triggerStatus = "disabled";
|
|
522
|
+
deployment.failureReason = err instanceof Error ? err.message : "Failed to install monitoring directive";
|
|
523
|
+
deployments.save(deployment);
|
|
524
|
+
|
|
525
|
+
debrief.record({
|
|
526
|
+
partitionId: deployment.partitionId ?? null,
|
|
527
|
+
operationId: deployment.id,
|
|
528
|
+
agent: "server",
|
|
529
|
+
decisionType: "deployment-failure",
|
|
530
|
+
decision: "Failed to install monitoring directive on envoy",
|
|
531
|
+
reasoning: deployment.failureReason!,
|
|
532
|
+
context: { error: deployment.failureReason },
|
|
533
|
+
});
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
// Normal operations: dispatch approved plan to envoy for execution
|
|
538
|
+
else if (envoyClient && deployment.plan && deployment.rollbackPlan) {
|
|
539
|
+
const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
|
|
540
|
+
const serverPort = process.env.PORT ?? "9410";
|
|
541
|
+
const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
|
|
542
|
+
const progressCallbackUrl = `${serverUrl}/api/operations/${deployment.id}/progress`;
|
|
543
|
+
const callbackToken = envoyRegistry?.list().find(r => r.url === envoyClient.url)?.token;
|
|
544
|
+
|
|
545
|
+
deployment.status = "running" as typeof deployment.status;
|
|
546
|
+
deployments.save(deployment);
|
|
547
|
+
|
|
548
|
+
// Fire-and-forget: execution runs async, progress comes via callback
|
|
549
|
+
envoyClient.executeApprovedPlan({
|
|
550
|
+
operationId: deployment.id,
|
|
551
|
+
plan: deployment.plan,
|
|
552
|
+
rollbackPlan: deployment.rollbackPlan,
|
|
553
|
+
artifactType: artifact?.type ?? "unknown",
|
|
554
|
+
artifactName: artifact?.name ?? "unknown",
|
|
555
|
+
environmentId: deployment.environmentId ?? "",
|
|
556
|
+
progressCallbackUrl,
|
|
557
|
+
callbackToken,
|
|
558
|
+
}).catch((err) => {
|
|
559
|
+
// Execution dispatch failed — record failure
|
|
560
|
+
deployment.status = "failed" as typeof deployment.status;
|
|
561
|
+
deployment.failureReason = err instanceof Error ? err.message : "Execution dispatch failed";
|
|
562
|
+
deployments.save(deployment);
|
|
563
|
+
|
|
564
|
+
debrief.record({
|
|
565
|
+
partitionId: deployment.partitionId ?? null,
|
|
566
|
+
operationId: deployment.id,
|
|
567
|
+
agent: "server",
|
|
568
|
+
decisionType: "deployment-failure",
|
|
569
|
+
decision: "Failed to dispatch approved plan to envoy",
|
|
570
|
+
reasoning: deployment.failureReason!,
|
|
571
|
+
context: { error: deployment.failureReason },
|
|
572
|
+
});
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
return { deployment, approved: true };
|
|
577
|
+
},
|
|
578
|
+
);
|
|
579
|
+
|
|
580
|
+
// Reject a deployment plan
|
|
581
|
+
app.post<{ Params: { id: string } }>(
|
|
582
|
+
"/api/operations/:id/reject",
|
|
583
|
+
{ preHandler: [requirePermission("deployment.reject")] },
|
|
584
|
+
async (request, reply) => {
|
|
585
|
+
const deployment = deployments.get(request.params.id);
|
|
586
|
+
if (!deployment) {
|
|
587
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
const parsed = RejectDeploymentSchema.safeParse(request.body);
|
|
591
|
+
if (!parsed.success) {
|
|
592
|
+
return reply.status(400).send({ error: parsed.error.message });
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
if ((deployment.status) !== "awaiting_approval") {
|
|
596
|
+
return reply.status(409).send({ error: `Cannot reject operation in "${deployment.status}" status — must be "awaiting_approval"` });
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
// Transition deployment status and store rejection reason
|
|
600
|
+
deployment.status = "rejected" as typeof deployment.status;
|
|
601
|
+
deployment.rejectionReason = parsed.data.reason;
|
|
602
|
+
deployments.save(deployment);
|
|
603
|
+
|
|
604
|
+
const actor = (request.user?.email) ?? "anonymous";
|
|
605
|
+
|
|
606
|
+
// Record rejection in debrief
|
|
607
|
+
debrief.record({
|
|
608
|
+
partitionId: deployment.partitionId ?? null,
|
|
609
|
+
operationId: deployment.id,
|
|
610
|
+
agent: "server",
|
|
611
|
+
decisionType: "system",
|
|
612
|
+
decision: "Operation plan rejected",
|
|
613
|
+
reasoning: parsed.data.reason,
|
|
614
|
+
context: { reason: parsed.data.reason },
|
|
615
|
+
actor: request.user?.email,
|
|
616
|
+
});
|
|
617
|
+
telemetry.record({ actor, action: "operation.rejected", target: { type: "deployment", id: deployment.id }, details: { reason: parsed.data.reason } });
|
|
618
|
+
|
|
619
|
+
return { deployment, rejected: true };
|
|
620
|
+
},
|
|
621
|
+
);
|
|
622
|
+
|
|
623
|
+
// Modify a deployment plan (user edits steps before approval)
|
|
624
|
+
app.post<{ Params: { id: string } }>(
|
|
625
|
+
"/api/operations/:id/modify",
|
|
626
|
+
{ preHandler: [requirePermission("deployment.approve")] },
|
|
627
|
+
async (request, reply) => {
|
|
628
|
+
const deployment = deployments.get(request.params.id);
|
|
629
|
+
if (!deployment) {
|
|
630
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
const parsed = ModifyDeploymentPlanSchema.safeParse(request.body);
|
|
634
|
+
if (!parsed.success) {
|
|
635
|
+
return reply.status(400).send({ error: parsed.error.message });
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
if ((deployment.status) !== "awaiting_approval") {
|
|
639
|
+
return reply.status(409).send({ error: `Cannot modify operation in "${deployment.status}" status — must be "awaiting_approval"` });
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
if (!deployment.plan) {
|
|
643
|
+
return reply.status(409).send({ error: "Operation has no plan to modify" });
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Validate modified plan with envoy if available
|
|
647
|
+
if (envoyClient) {
|
|
648
|
+
try {
|
|
649
|
+
const validation = await envoyClient.validatePlan(parsed.data.steps);
|
|
650
|
+
if (!validation.valid) {
|
|
651
|
+
return reply.status(422).send({
|
|
652
|
+
error: "Modified plan failed envoy validation",
|
|
653
|
+
violations: validation.violations,
|
|
654
|
+
});
|
|
655
|
+
}
|
|
656
|
+
} catch {
|
|
657
|
+
// Envoy unreachable — proceed without validation but note it
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// Build structured diff: what changed between old and new steps
|
|
662
|
+
const oldSteps = deployment.plan.steps;
|
|
663
|
+
const newSteps = parsed.data.steps;
|
|
664
|
+
const diffLines: string[] = [];
|
|
665
|
+
const maxLen = Math.max(oldSteps.length, newSteps.length);
|
|
666
|
+
for (let i = 0; i < maxLen; i++) {
|
|
667
|
+
const old = oldSteps[i];
|
|
668
|
+
const cur = newSteps[i];
|
|
669
|
+
if (!old) {
|
|
670
|
+
diffLines.push(`+ Step ${i + 1} (added): ${cur.action} ${cur.target} — ${cur.description}`);
|
|
671
|
+
} else if (!cur) {
|
|
672
|
+
diffLines.push(`- Step ${i + 1} (removed): ${old.action} ${old.target} — ${old.description}`);
|
|
673
|
+
} else if (old.action !== cur.action || old.target !== cur.target || old.description !== cur.description) {
|
|
674
|
+
diffLines.push(`~ Step ${i + 1} (changed): ${old.action} ${old.target} → ${cur.action} ${cur.target}`);
|
|
675
|
+
if (old.description !== cur.description) {
|
|
676
|
+
diffLines.push(` was: ${old.description}`);
|
|
677
|
+
diffLines.push(` now: ${cur.description}`);
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
const diffFromPreviousPlan = diffLines.length > 0
|
|
682
|
+
? diffLines.join("\n")
|
|
683
|
+
: "Steps reordered or metadata changed (actions and targets unchanged)";
|
|
684
|
+
|
|
685
|
+
// Apply modifications
|
|
686
|
+
deployment.plan = {
|
|
687
|
+
...deployment.plan,
|
|
688
|
+
steps: parsed.data.steps,
|
|
689
|
+
diffFromPreviousPlan,
|
|
690
|
+
};
|
|
691
|
+
deployments.save(deployment);
|
|
692
|
+
|
|
693
|
+
const actor = (request.user?.email) ?? "anonymous";
|
|
694
|
+
|
|
695
|
+
// Record modification in debrief
|
|
696
|
+
debrief.record({
|
|
697
|
+
partitionId: deployment.partitionId ?? null,
|
|
698
|
+
operationId: deployment.id,
|
|
699
|
+
agent: "server",
|
|
700
|
+
decisionType: "plan-modification",
|
|
701
|
+
decision: `Operation plan modified by ${actor}`,
|
|
702
|
+
reasoning: parsed.data.reason,
|
|
703
|
+
context: {
|
|
704
|
+
modifiedBy: actor,
|
|
705
|
+
stepCount: parsed.data.steps.length,
|
|
706
|
+
reason: parsed.data.reason,
|
|
707
|
+
},
|
|
708
|
+
actor: request.user?.email,
|
|
709
|
+
});
|
|
710
|
+
telemetry.record({
|
|
711
|
+
actor,
|
|
712
|
+
action: "operation.modified" as Parameters<typeof telemetry.record>[0]["action"],
|
|
713
|
+
target: { type: "deployment", id: deployment.id },
|
|
714
|
+
details: { reason: parsed.data.reason, stepCount: parsed.data.steps.length },
|
|
715
|
+
});
|
|
716
|
+
telemetry.record({
|
|
717
|
+
actor,
|
|
718
|
+
action: "agent.recommendation.overridden",
|
|
719
|
+
target: { type: "deployment", id: deployment.id },
|
|
720
|
+
details: { reason: parsed.data.reason, stepCount: parsed.data.steps.length, diff: diffFromPreviousPlan },
|
|
721
|
+
});
|
|
722
|
+
|
|
723
|
+
return { deployment, modified: true };
|
|
724
|
+
},
|
|
725
|
+
);
|
|
726
|
+
|
|
727
|
+
// Replan a deployment with user feedback — triggers a new LLM planning pass
|
|
728
|
+
app.post<{ Params: { id: string } }>(
|
|
729
|
+
"/api/operations/:id/replan",
|
|
730
|
+
{ preHandler: [requirePermission("deployment.approve")] },
|
|
731
|
+
async (request, reply) => {
|
|
732
|
+
const deploymentId = request.params.id;
|
|
733
|
+
const deployment = deployments.get(deploymentId);
|
|
734
|
+
if (!deployment) {
|
|
735
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
if ((deployment.status) !== "awaiting_approval") {
|
|
739
|
+
return reply.status(409).send({ error: `Cannot replan operation in "${deployment.status}" status — must be "awaiting_approval"` });
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
const parsed = ReplanDeploymentSchema.safeParse(request.body);
|
|
743
|
+
if (!parsed.success) {
|
|
744
|
+
return reply.status(400).send({ error: parsed.error.message });
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
|
|
748
|
+
if (!artifact) {
|
|
749
|
+
return reply.status(404).send({ error: `Artifact not found: ${getArtifactId(deployment)}` });
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
const environment = deployment.environmentId ? environments.get(deployment.environmentId) : undefined;
|
|
753
|
+
const partition = deployment.partitionId ? partitions.get(deployment.partitionId) : undefined;
|
|
754
|
+
|
|
755
|
+
const planningEnvoy = deployment.envoyId ? envoyRegistry?.get(deployment.envoyId) : envoyRegistry?.list()[0];
|
|
756
|
+
if (!planningEnvoy) {
|
|
757
|
+
return reply.status(422).send({ error: "No envoy available for replanning" });
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Validate feedback with LLM before triggering expensive replan
|
|
761
|
+
const planningClientForValidation = new EnvoyClient(planningEnvoy.url);
|
|
762
|
+
try {
|
|
763
|
+
const validation = await planningClientForValidation.validateRefinementFeedback({
|
|
764
|
+
feedback: parsed.data.feedback,
|
|
765
|
+
currentPlanSteps: (deployment.plan?.steps ?? []).map((s) => ({
|
|
766
|
+
description: s.description,
|
|
767
|
+
action: s.action,
|
|
768
|
+
target: s.target,
|
|
769
|
+
})),
|
|
770
|
+
artifactName: artifact?.name ?? "unknown",
|
|
771
|
+
environmentName: environment?.name ?? "unknown",
|
|
772
|
+
});
|
|
773
|
+
if (validation.mode === "rejection") {
|
|
774
|
+
return reply.status(422).send({ error: validation.message, mode: "rejection" });
|
|
775
|
+
}
|
|
776
|
+
if (validation.mode === "response") {
|
|
777
|
+
return reply.status(200).send({ mode: "response", message: validation.message });
|
|
778
|
+
}
|
|
779
|
+
// mode === "replan" — fall through to full replan
|
|
780
|
+
} catch {
|
|
781
|
+
// Validation call failed — proceed with replan rather than blocking the user
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
deployment.status = "planning" as typeof deployment.status;
|
|
785
|
+
deployments.save(deployment);
|
|
786
|
+
|
|
787
|
+
const planningClient = new EnvoyClient(planningEnvoy.url);
|
|
788
|
+
const environmentForPlanning = environment
|
|
789
|
+
? { id: environment.id, name: environment.name, variables: environment.variables }
|
|
790
|
+
: { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
|
|
791
|
+
|
|
792
|
+
let result: Awaited<ReturnType<typeof planningClient.requestPlan>>;
|
|
793
|
+
try {
|
|
794
|
+
result = await planningClient.requestPlan({
|
|
795
|
+
operationId: deploymentId,
|
|
796
|
+
artifact: {
|
|
797
|
+
id: artifact.id,
|
|
798
|
+
name: artifact.name,
|
|
799
|
+
type: artifact.type,
|
|
800
|
+
analysis: {
|
|
801
|
+
summary: artifact.analysis.summary,
|
|
802
|
+
dependencies: artifact.analysis.dependencies,
|
|
803
|
+
configurationExpectations: artifact.analysis.configurationExpectations,
|
|
804
|
+
deploymentIntent: artifact.analysis.deploymentIntent,
|
|
805
|
+
confidence: artifact.analysis.confidence,
|
|
806
|
+
},
|
|
807
|
+
},
|
|
808
|
+
environment: environmentForPlanning,
|
|
809
|
+
partition: partition
|
|
810
|
+
? { id: partition.id, name: partition.name, variables: partition.variables }
|
|
811
|
+
: undefined,
|
|
812
|
+
version: deployment.version ?? "",
|
|
813
|
+
resolvedVariables: deployment.variables,
|
|
814
|
+
refinementFeedback: parsed.data.feedback,
|
|
815
|
+
});
|
|
816
|
+
} catch (err) {
|
|
817
|
+
const dep = deployments.get(deploymentId);
|
|
818
|
+
if (dep) {
|
|
819
|
+
dep.status = "awaiting_approval" as typeof dep.status;
|
|
820
|
+
deployments.save(dep);
|
|
821
|
+
}
|
|
822
|
+
return reply.status(500).send({ error: err instanceof Error ? err.message : "Replanning failed" });
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
const dep = deployments.get(deploymentId);
|
|
826
|
+
if (!dep) {
|
|
827
|
+
return reply.status(404).send({ error: "Operation not found after replanning" });
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
dep.plan = result.plan;
|
|
831
|
+
dep.rollbackPlan = result.rollbackPlan;
|
|
832
|
+
dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
|
|
833
|
+
dep.status = "awaiting_approval" as typeof dep.status;
|
|
834
|
+
deployments.save(dep);
|
|
835
|
+
|
|
836
|
+
debrief.record({
|
|
837
|
+
partitionId: dep.partitionId ?? null,
|
|
838
|
+
operationId: dep.id,
|
|
839
|
+
agent: "envoy",
|
|
840
|
+
decisionType: "plan-generation",
|
|
841
|
+
decision: `Plan regenerated with user feedback (${result.plan.steps.length} steps)`,
|
|
842
|
+
reasoning: result.plan.reasoning,
|
|
843
|
+
context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, refinementFeedback: parsed.data.feedback },
|
|
844
|
+
});
|
|
845
|
+
|
|
846
|
+
return { deployment: dep, replanned: true };
|
|
847
|
+
},
|
|
848
|
+
);
|
|
849
|
+
|
|
850
|
+
// Get cross-system enrichment context for a deployment
|
|
851
|
+
app.get<{ Params: { id: string } }>(
|
|
852
|
+
"/api/operations/:id/context",
|
|
853
|
+
{ preHandler: [requirePermission("deployment.view")] },
|
|
854
|
+
async (request, reply) => {
|
|
855
|
+
const deployment = deployments.get(request.params.id);
|
|
856
|
+
if (!deployment) {
|
|
857
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
const now = new Date();
|
|
861
|
+
const twentyFourHoursAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000);
|
|
862
|
+
|
|
863
|
+
// Count recent operations to the same environment (only meaningful when environmentId is set)
|
|
864
|
+
const recentOperationsToEnv = deployment.environmentId
|
|
865
|
+
? deployments.countByEnvironment(deployment.environmentId, twentyFourHoursAgo)
|
|
866
|
+
: 0;
|
|
867
|
+
|
|
868
|
+
// Check if the same artifact version was previously rolled back
|
|
869
|
+
const previouslyRolledBack = deployment.version
|
|
870
|
+
? deployments.findByArtifactVersion(
|
|
871
|
+
getArtifactId(deployment) ?? "",
|
|
872
|
+
deployment.version,
|
|
873
|
+
"rolled_back",
|
|
874
|
+
).length > 0
|
|
875
|
+
: false;
|
|
876
|
+
|
|
877
|
+
// Check for other in-progress operations to the same environment
|
|
878
|
+
const conflictingOperations = deployment.environmentId
|
|
879
|
+
? deployments.list()
|
|
880
|
+
.filter(
|
|
881
|
+
(d) =>
|
|
882
|
+
d.environmentId === deployment.environmentId &&
|
|
883
|
+
d.id !== deployment.id &&
|
|
884
|
+
((d.status) === "running" || (d.status) === "approved" || (d.status) === "awaiting_approval"),
|
|
885
|
+
)
|
|
886
|
+
.map((d) => d.id)
|
|
887
|
+
: [];
|
|
888
|
+
|
|
889
|
+
// Find last operation to the same environment
|
|
890
|
+
const lastDeploy = deployment.environmentId
|
|
891
|
+
? deployments.findLatestByEnvironment(deployment.environmentId)
|
|
892
|
+
: undefined;
|
|
893
|
+
const lastOperationToEnv = lastDeploy && lastDeploy.id !== deployment.id
|
|
894
|
+
? {
|
|
895
|
+
id: lastDeploy.id,
|
|
896
|
+
status: lastDeploy.status,
|
|
897
|
+
version: lastDeploy.version ?? "",
|
|
898
|
+
completedAt: lastDeploy.completedAt,
|
|
899
|
+
}
|
|
900
|
+
: undefined;
|
|
901
|
+
|
|
902
|
+
const enrichment: DeploymentEnrichment = {
|
|
903
|
+
recentOperationsToEnv,
|
|
904
|
+
previouslyRolledBack,
|
|
905
|
+
conflictingOperations,
|
|
906
|
+
lastOperationToEnv,
|
|
907
|
+
};
|
|
908
|
+
|
|
909
|
+
return {
|
|
910
|
+
enrichment,
|
|
911
|
+
recommendation: deployment.recommendation ?? computeRecommendation(deployment, deployments),
|
|
912
|
+
};
|
|
913
|
+
},
|
|
914
|
+
);
|
|
915
|
+
|
|
916
|
+
// Request a post-hoc rollback plan — asks the envoy to reason about
|
|
917
|
+
// what actually ran and produce a targeted rollback plan
|
|
918
|
+
app.post<{ Params: { id: string } }>(
|
|
919
|
+
"/api/operations/:id/request-rollback-plan",
|
|
920
|
+
{ preHandler: [requirePermission("deployment.approve")] },
|
|
921
|
+
async (request, reply) => {
|
|
922
|
+
const deployment = deployments.get(request.params.id);
|
|
923
|
+
if (!deployment) {
|
|
924
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
const finishedStatuses = new Set(["succeeded", "failed", "rolled_back"]);
|
|
928
|
+
if (!finishedStatuses.has(deployment.status)) {
|
|
929
|
+
return reply.status(409).send({
|
|
930
|
+
error: `Cannot request rollback plan for operation in "${deployment.status}" status — operation must be finished`,
|
|
931
|
+
});
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
|
|
935
|
+
if (!artifact) {
|
|
936
|
+
return reply.status(404).send({ error: "Artifact not found" });
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
// Determine which envoy to ask
|
|
940
|
+
const targetEnvoy = deployment.envoyId
|
|
941
|
+
? envoyRegistry?.get(deployment.envoyId)
|
|
942
|
+
: envoyRegistry?.list()[0];
|
|
943
|
+
|
|
944
|
+
if (!targetEnvoy) {
|
|
945
|
+
return reply.status(503).send({ error: "No envoy available to generate rollback plan" });
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
const environment = deployment.environmentId ? environments.get(deployment.environmentId) : undefined;
|
|
949
|
+
|
|
950
|
+
// Build the list of completed steps from execution record (or plan as fallback)
|
|
951
|
+
const completedSteps: Array<{
|
|
952
|
+
description: string;
|
|
953
|
+
action: string;
|
|
954
|
+
target: string;
|
|
955
|
+
status: "completed" | "failed" | "rolled_back";
|
|
956
|
+
output?: string;
|
|
957
|
+
}> = deployment.executionRecord?.steps.map((s) => ({
|
|
958
|
+
description: s.description,
|
|
959
|
+
action: deployment.plan?.steps.find((p) => p.description === s.description)?.action ?? "unknown",
|
|
960
|
+
target: deployment.plan?.steps.find((p) => p.description === s.description)?.target ?? "",
|
|
961
|
+
status: s.status,
|
|
962
|
+
output: s.output ?? s.error,
|
|
963
|
+
})) ?? deployment.plan?.steps.map((s) => ({
|
|
964
|
+
description: s.description,
|
|
965
|
+
action: s.action,
|
|
966
|
+
target: s.target,
|
|
967
|
+
status: "completed" as const,
|
|
968
|
+
})) ?? [];
|
|
969
|
+
|
|
970
|
+
const rollbackClient = new EnvoyClient(targetEnvoy.url);
|
|
971
|
+
|
|
972
|
+
try {
|
|
973
|
+
const rollbackPlan = await rollbackClient.requestRollbackPlan({
|
|
974
|
+
operationId: deployment.id,
|
|
975
|
+
artifact: {
|
|
976
|
+
name: artifact.name,
|
|
977
|
+
type: artifact.type,
|
|
978
|
+
analysis: {
|
|
979
|
+
summary: artifact.analysis.summary,
|
|
980
|
+
dependencies: artifact.analysis.dependencies,
|
|
981
|
+
configurationExpectations: artifact.analysis.configurationExpectations,
|
|
982
|
+
deploymentIntent: artifact.analysis.deploymentIntent,
|
|
983
|
+
confidence: artifact.analysis.confidence,
|
|
984
|
+
},
|
|
985
|
+
},
|
|
986
|
+
environment: {
|
|
987
|
+
id: deployment.environmentId ?? "",
|
|
988
|
+
name: environment?.name ?? deployment.environmentId ?? "unknown",
|
|
989
|
+
},
|
|
990
|
+
completedSteps,
|
|
991
|
+
deployedVariables: deployment.variables,
|
|
992
|
+
version: deployment.version ?? "",
|
|
993
|
+
failureReason: deployment.failureReason ?? undefined,
|
|
994
|
+
});
|
|
995
|
+
|
|
996
|
+
// Store the generated rollback plan on the deployment
|
|
997
|
+
deployment.rollbackPlan = rollbackPlan;
|
|
998
|
+
deployments.save(deployment);
|
|
999
|
+
|
|
1000
|
+
const actor = (request.user?.email) ?? "anonymous";
|
|
1001
|
+
|
|
1002
|
+
debrief.record({
|
|
1003
|
+
partitionId: deployment.partitionId ?? null,
|
|
1004
|
+
operationId: deployment.id,
|
|
1005
|
+
agent: "server",
|
|
1006
|
+
decisionType: "plan-generation",
|
|
1007
|
+
decision: `Rollback plan requested and generated for ${artifact.name} v${deployment.version}`,
|
|
1008
|
+
reasoning: rollbackPlan.reasoning,
|
|
1009
|
+
context: {
|
|
1010
|
+
requestedBy: actor,
|
|
1011
|
+
stepCount: rollbackPlan.steps.length,
|
|
1012
|
+
envoyId: targetEnvoy.id,
|
|
1013
|
+
deploymentStatus: deployment.status,
|
|
1014
|
+
},
|
|
1015
|
+
actor: request.user?.email,
|
|
1016
|
+
});
|
|
1017
|
+
telemetry.record({
|
|
1018
|
+
actor,
|
|
1019
|
+
action: "deployment.rollback-plan-requested" as Parameters<typeof telemetry.record>[0]["action"],
|
|
1020
|
+
target: { type: "deployment", id: deployment.id },
|
|
1021
|
+
details: { stepCount: rollbackPlan.steps.length },
|
|
1022
|
+
});
|
|
1023
|
+
|
|
1024
|
+
return reply.status(200).send({ deployment, rollbackPlan });
|
|
1025
|
+
} catch (err) {
|
|
1026
|
+
return reply.status(500).send({
|
|
1027
|
+
error: "Failed to generate rollback plan",
|
|
1028
|
+
details: err instanceof Error ? err.message : String(err),
|
|
1029
|
+
});
|
|
1030
|
+
}
|
|
1031
|
+
},
|
|
1032
|
+
);
|
|
1033
|
+
|
|
1034
|
+
// Execute rollback — runs the stored rollback plan against the envoy
|
|
1035
|
+
app.post<{ Params: { id: string } }>(
|
|
1036
|
+
"/api/operations/:id/execute-rollback",
|
|
1037
|
+
{ preHandler: [requirePermission("deployment.approve")] },
|
|
1038
|
+
async (request, reply) => {
|
|
1039
|
+
const deployment = deployments.get(request.params.id);
|
|
1040
|
+
if (!deployment) {
|
|
1041
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
if (!deployment.rollbackPlan) {
|
|
1045
|
+
return reply.status(409).send({ error: "No rollback plan available — request one first" });
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
const finishedStatuses = new Set(["succeeded", "failed"]);
|
|
1049
|
+
if (!finishedStatuses.has(deployment.status)) {
|
|
1050
|
+
return reply.status(409).send({
|
|
1051
|
+
error: `Cannot execute rollback for operation in "${deployment.status}" status`,
|
|
1052
|
+
});
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
const artifact = artifactStore.get(getArtifactId(deployment) ?? "");
|
|
1056
|
+
const targetEnvoy = deployment.envoyId
|
|
1057
|
+
? envoyRegistry?.get(deployment.envoyId)
|
|
1058
|
+
: envoyRegistry?.list()[0];
|
|
1059
|
+
|
|
1060
|
+
if (!targetEnvoy) {
|
|
1061
|
+
return reply.status(503).send({ error: "No envoy available to execute rollback" });
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
const actor = (request.user?.email) ?? "anonymous";
|
|
1065
|
+
const serverPort = process.env.PORT ?? "9410";
|
|
1066
|
+
const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
|
|
1067
|
+
const progressCallbackUrl = `${serverUrl}/api/operations/${deployment.id}/progress`;
|
|
1068
|
+
|
|
1069
|
+
deployment.status = "running" as typeof deployment.status;
|
|
1070
|
+
deployments.save(deployment);
|
|
1071
|
+
|
|
1072
|
+
debrief.record({
|
|
1073
|
+
partitionId: deployment.partitionId ?? null,
|
|
1074
|
+
operationId: deployment.id,
|
|
1075
|
+
agent: "server",
|
|
1076
|
+
decisionType: "rollback-execution",
|
|
1077
|
+
decision: `Rollback execution initiated for ${artifact?.name ?? getArtifactId(deployment)} v${deployment.version}`,
|
|
1078
|
+
reasoning: `Rollback requested by ${actor}. Executing ${deployment.rollbackPlan.steps.length} rollback step(s).`,
|
|
1079
|
+
context: { initiatedBy: actor, stepCount: deployment.rollbackPlan.steps.length },
|
|
1080
|
+
actor: request.user?.email,
|
|
1081
|
+
});
|
|
1082
|
+
telemetry.record({
|
|
1083
|
+
actor,
|
|
1084
|
+
action: "deployment.rollback-executed" as Parameters<typeof telemetry.record>[0]["action"],
|
|
1085
|
+
target: { type: "deployment", id: deployment.id },
|
|
1086
|
+
details: { stepCount: deployment.rollbackPlan.steps.length },
|
|
1087
|
+
});
|
|
1088
|
+
|
|
1089
|
+
const rollbackClient = new EnvoyClient(targetEnvoy.url);
|
|
1090
|
+
|
|
1091
|
+
// Execute the rollback plan as if it were a forward plan — it IS a forward plan
|
|
1092
|
+
// (just in the reverse direction). Use an empty no-op plan as the "rollback of rollback".
|
|
1093
|
+
const emptyPlan = { steps: [], reasoning: "No rollback of rollback." };
|
|
1094
|
+
|
|
1095
|
+
rollbackClient.executeApprovedPlan({
|
|
1096
|
+
operationId: deployment.id,
|
|
1097
|
+
plan: deployment.rollbackPlan,
|
|
1098
|
+
rollbackPlan: emptyPlan,
|
|
1099
|
+
artifactType: artifact?.type ?? "unknown",
|
|
1100
|
+
artifactName: artifact?.name ?? "unknown",
|
|
1101
|
+
environmentId: deployment.environmentId ?? "",
|
|
1102
|
+
progressCallbackUrl,
|
|
1103
|
+
callbackToken: targetEnvoy.token,
|
|
1104
|
+
}).then((result) => {
|
|
1105
|
+
const dep = deployments.get(deployment.id);
|
|
1106
|
+
if (!dep) return;
|
|
1107
|
+
|
|
1108
|
+
dep.status = result.success ? "rolled_back" as typeof dep.status : "failed" as typeof dep.status;
|
|
1109
|
+
if (!result.success) {
|
|
1110
|
+
dep.failureReason = result.failureReason ?? "Rollback execution failed";
|
|
1111
|
+
}
|
|
1112
|
+
dep.completedAt = new Date();
|
|
1113
|
+
deployments.save(dep);
|
|
1114
|
+
|
|
1115
|
+
debrief.record({
|
|
1116
|
+
partitionId: dep.partitionId ?? null,
|
|
1117
|
+
operationId: dep.id,
|
|
1118
|
+
agent: "server",
|
|
1119
|
+
decisionType: "rollback-execution",
|
|
1120
|
+
decision: result.success
|
|
1121
|
+
? `Rollback completed successfully for ${artifact?.name ?? getArtifactId(dep)} v${dep.version}`
|
|
1122
|
+
: `Rollback failed for ${artifact?.name ?? getArtifactId(dep)} v${dep.version}`,
|
|
1123
|
+
reasoning: result.success
|
|
1124
|
+
? `All rollback steps executed successfully.`
|
|
1125
|
+
: `Rollback failed: ${result.failureReason}`,
|
|
1126
|
+
context: { success: result.success, failureReason: result.failureReason },
|
|
1127
|
+
});
|
|
1128
|
+
}).catch((err) => {
|
|
1129
|
+
const dep = deployments.get(deployment.id);
|
|
1130
|
+
if (!dep) return;
|
|
1131
|
+
|
|
1132
|
+
dep.status = "failed" as typeof dep.status;
|
|
1133
|
+
dep.failureReason = err instanceof Error ? err.message : "Rollback execution dispatch failed";
|
|
1134
|
+
deployments.save(dep);
|
|
1135
|
+
});
|
|
1136
|
+
|
|
1137
|
+
return reply.status(202).send({ deployment, accepted: true });
|
|
1138
|
+
},
|
|
1139
|
+
);
|
|
1140
|
+
|
|
1141
|
+
// Retry (redeploy) — create a new deployment with the same parameters as the source
|
|
1142
|
+
app.post<{ Params: { id: string } }>(
|
|
1143
|
+
"/api/operations/:id/retry",
|
|
1144
|
+
{ preHandler: [requirePermission("deployment.create")] },
|
|
1145
|
+
async (request, reply) => {
|
|
1146
|
+
const source = deployments.get(request.params.id);
|
|
1147
|
+
if (!source) {
|
|
1148
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
1149
|
+
}
|
|
1150
|
+
|
|
1151
|
+
// Calculate attempt number by following the retryOf chain
|
|
1152
|
+
let attemptNumber = 1;
|
|
1153
|
+
let cursor: typeof source | undefined = source;
|
|
1154
|
+
while (cursor?.retryOf) {
|
|
1155
|
+
attemptNumber++;
|
|
1156
|
+
cursor = deployments.get(cursor.retryOf);
|
|
1157
|
+
}
|
|
1158
|
+
attemptNumber++; // this new deployment is one more
|
|
1159
|
+
|
|
1160
|
+
// Validate artifact still exists
|
|
1161
|
+
const artifact = artifactStore.get(getArtifactId(source) ?? "");
|
|
1162
|
+
if (!artifact) {
|
|
1163
|
+
return reply.status(404).send({ error: `Artifact not found: ${getArtifactId(source)}` });
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// Validate environment still exists (if present on source)
|
|
1167
|
+
const environment = source.environmentId ? environments.get(source.environmentId) : undefined;
|
|
1168
|
+
if (source.environmentId && !environment) {
|
|
1169
|
+
return reply.status(404).send({ error: `Environment not found: ${source.environmentId}` });
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
// Validate partition still exists (if present on source)
|
|
1173
|
+
const partition = source.partitionId ? partitions.get(source.partitionId) : undefined;
|
|
1174
|
+
if (source.partitionId && !partition) {
|
|
1175
|
+
return reply.status(404).send({ error: `Partition not found: ${source.partitionId}` });
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
// Validate envoy still exists (if present on source)
|
|
1179
|
+
const targetEnvoy = source.envoyId ? envoyRegistry?.get(source.envoyId) : undefined;
|
|
1180
|
+
if (source.envoyId && !targetEnvoy) {
|
|
1181
|
+
return reply.status(404).send({ error: `Envoy not found: ${source.envoyId}` });
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
// Resolve variables — same logic as POST /api/deployments
|
|
1185
|
+
const envVars = environment ? environment.variables : {};
|
|
1186
|
+
const partitionVars = partition?.variables ?? {};
|
|
1187
|
+
const resolved: Record<string, string> = { ...partitionVars, ...envVars };
|
|
1188
|
+
|
|
1189
|
+
const deployment = {
|
|
1190
|
+
id: crypto.randomUUID(),
|
|
1191
|
+
input: source.input,
|
|
1192
|
+
environmentId: source.environmentId,
|
|
1193
|
+
partitionId: source.partitionId,
|
|
1194
|
+
envoyId: targetEnvoy?.id,
|
|
1195
|
+
version: source.version ?? "",
|
|
1196
|
+
status: "pending" as const,
|
|
1197
|
+
variables: resolved,
|
|
1198
|
+
retryOf: source.id,
|
|
1199
|
+
debriefEntryIds: [] as string[],
|
|
1200
|
+
createdAt: new Date(),
|
|
1201
|
+
};
|
|
1202
|
+
|
|
1203
|
+
deployments.save(deployment);
|
|
1204
|
+
|
|
1205
|
+
const actor = (request.user?.email) ?? "anonymous";
|
|
1206
|
+
telemetry.record({ actor, action: "operation.created", target: { type: "deployment", id: deployment.id }, details: { artifactId: getArtifactId(source), environmentId: source.environmentId, partitionId: source.partitionId, envoyId: source.envoyId, retryOf: source.id } });
|
|
1207
|
+
|
|
1208
|
+
// Record retry debrief entry
|
|
1209
|
+
debrief.record({
|
|
1210
|
+
partitionId: deployment.partitionId ?? null,
|
|
1211
|
+
operationId: deployment.id,
|
|
1212
|
+
agent: "server",
|
|
1213
|
+
decisionType: "system",
|
|
1214
|
+
decision: `Retry of operation ${source.id} (attempt #${attemptNumber})`,
|
|
1215
|
+
reasoning: `User initiated retry of operation ${source.id}. Same artifact, version, environment, and partition.`,
|
|
1216
|
+
context: { retryOf: source.id, attemptNumber, actor },
|
|
1217
|
+
actor: request.user?.email,
|
|
1218
|
+
});
|
|
1219
|
+
|
|
1220
|
+
// Dispatch planning — same logic as POST /api/deployments
|
|
1221
|
+
if (envoyRegistry) {
|
|
1222
|
+
const planningEnvoy = targetEnvoy
|
|
1223
|
+
?? (environment ? envoyRegistry.findForEnvironment(environment.name) : undefined)
|
|
1224
|
+
?? envoyRegistry.list()[0];
|
|
1225
|
+
|
|
1226
|
+
if (planningEnvoy) {
|
|
1227
|
+
const planningClient = new EnvoyClient(planningEnvoy.url);
|
|
1228
|
+
const environmentForPlanning = environment
|
|
1229
|
+
? { id: environment.id, name: environment.name, variables: environment.variables }
|
|
1230
|
+
: { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
|
|
1231
|
+
|
|
1232
|
+
planningClient.requestPlan({
|
|
1233
|
+
operationId: deployment.id,
|
|
1234
|
+
artifact: {
|
|
1235
|
+
id: artifact.id,
|
|
1236
|
+
name: artifact.name,
|
|
1237
|
+
type: artifact.type,
|
|
1238
|
+
analysis: {
|
|
1239
|
+
summary: artifact.analysis.summary,
|
|
1240
|
+
dependencies: artifact.analysis.dependencies,
|
|
1241
|
+
configurationExpectations: artifact.analysis.configurationExpectations,
|
|
1242
|
+
deploymentIntent: artifact.analysis.deploymentIntent,
|
|
1243
|
+
confidence: artifact.analysis.confidence,
|
|
1244
|
+
},
|
|
1245
|
+
},
|
|
1246
|
+
environment: environmentForPlanning,
|
|
1247
|
+
partition: partition
|
|
1248
|
+
? { id: partition.id, name: partition.name, variables: partition.variables }
|
|
1249
|
+
: undefined,
|
|
1250
|
+
version: deployment.version ?? "",
|
|
1251
|
+
resolvedVariables: resolved,
|
|
1252
|
+
}).then((result) => {
|
|
1253
|
+
const dep = deployments.get(deployment.id);
|
|
1254
|
+
if (!dep || dep.status !== "pending") return;
|
|
1255
|
+
|
|
1256
|
+
dep.plan = result.plan;
|
|
1257
|
+
dep.rollbackPlan = result.rollbackPlan;
|
|
1258
|
+
dep.envoyId = planningEnvoy.id;
|
|
1259
|
+
|
|
1260
|
+
if (result.blocked) {
|
|
1261
|
+
dep.status = "failed" as typeof dep.status;
|
|
1262
|
+
dep.failureReason = result.blockReason ?? "Plan blocked due to unrecoverable precondition failures";
|
|
1263
|
+
deployments.save(dep);
|
|
1264
|
+
|
|
1265
|
+
debrief.record({
|
|
1266
|
+
partitionId: dep.partitionId ?? null,
|
|
1267
|
+
operationId: dep.id,
|
|
1268
|
+
agent: "envoy",
|
|
1269
|
+
decisionType: "plan-generation",
|
|
1270
|
+
decision: `Operation plan blocked — infrastructure prerequisites not met`,
|
|
1271
|
+
reasoning: result.blockReason ?? result.plan.reasoning,
|
|
1272
|
+
context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, blocked: true },
|
|
1273
|
+
});
|
|
1274
|
+
} else {
|
|
1275
|
+
dep.status = "awaiting_approval" as typeof dep.status;
|
|
1276
|
+
dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
|
|
1277
|
+
deployments.save(dep);
|
|
1278
|
+
|
|
1279
|
+
debrief.record({
|
|
1280
|
+
partitionId: dep.partitionId ?? null,
|
|
1281
|
+
operationId: dep.id,
|
|
1282
|
+
agent: "envoy",
|
|
1283
|
+
decisionType: "plan-generation",
|
|
1284
|
+
decision: `Operation plan generated with ${result.plan.steps.length} steps`,
|
|
1285
|
+
reasoning: result.plan.reasoning,
|
|
1286
|
+
context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, delta: result.delta },
|
|
1287
|
+
});
|
|
1288
|
+
}
|
|
1289
|
+
}).catch((err) => {
|
|
1290
|
+
const dep = deployments.get(deployment.id);
|
|
1291
|
+
if (!dep || dep.status !== "pending") return;
|
|
1292
|
+
|
|
1293
|
+
dep.status = "failed" as typeof dep.status;
|
|
1294
|
+
dep.failureReason = err instanceof Error ? err.message : "Planning failed";
|
|
1295
|
+
deployments.save(dep);
|
|
1296
|
+
|
|
1297
|
+
debrief.record({
|
|
1298
|
+
partitionId: dep.partitionId ?? null,
|
|
1299
|
+
operationId: dep.id,
|
|
1300
|
+
agent: "server",
|
|
1301
|
+
decisionType: "deployment-failure",
|
|
1302
|
+
decision: "Envoy planning failed",
|
|
1303
|
+
reasoning: dep.failureReason!,
|
|
1304
|
+
context: { error: dep.failureReason, envoyId: planningEnvoy.id },
|
|
1305
|
+
});
|
|
1306
|
+
});
|
|
1307
|
+
}
|
|
1308
|
+
}
|
|
1309
|
+
|
|
1310
|
+
return reply.status(201).send({ deployment, sourceDeploymentId: source.id, attemptNumber });
|
|
1311
|
+
},
|
|
1312
|
+
);
|
|
1313
|
+
|
|
1314
|
+
// Get deployment postmortem
|
|
1315
|
+
app.get<{ Params: { id: string } }>(
|
|
1316
|
+
"/api/operations/:id/postmortem",
|
|
1317
|
+
{ preHandler: [requirePermission("deployment.view")] },
|
|
1318
|
+
async (request, reply) => {
|
|
1319
|
+
const deployment = deployments.get(request.params.id);
|
|
1320
|
+
if (!deployment) {
|
|
1321
|
+
return reply.status(404).send({ error: "Operation not found" });
|
|
1322
|
+
}
|
|
1323
|
+
|
|
1324
|
+
const entries = debrief.getByOperation(deployment.id);
|
|
1325
|
+
const postmortem = generatePostmortem(entries, deployment);
|
|
1326
|
+
const llmResult = await generatePostmortemAsync(entries, deployment, llm);
|
|
1327
|
+
return {
|
|
1328
|
+
postmortem,
|
|
1329
|
+
...(llmResult.heuristicFallback ? {} : { llmPostmortem: llmResult.llmPostmortem }),
|
|
1330
|
+
};
|
|
1331
|
+
},
|
|
1332
|
+
);
|
|
1333
|
+
|
|
1334
|
+
// Get recent debrief entries (supports filtering by partition, decision type, and full-text search)
|
|
1335
|
+
app.get("/api/debrief", { preHandler: [requirePermission("deployment.view")] }, async (request) => {
|
|
1336
|
+
const qParsed = DebriefQuerySchema.safeParse(request.query);
|
|
1337
|
+
const { limit, partitionId, decisionType, q: searchQuery } = qParsed.success ? qParsed.data : {};
|
|
1338
|
+
|
|
1339
|
+
const max = limit ?? 50;
|
|
1340
|
+
|
|
1341
|
+
// Full-text search — takes priority over filters
|
|
1342
|
+
if (searchQuery) {
|
|
1343
|
+
let entries = debrief.search(searchQuery, max);
|
|
1344
|
+
if (partitionId) entries = entries.filter((e) => e.partitionId === partitionId);
|
|
1345
|
+
if (decisionType) entries = entries.filter((e) => e.decisionType === decisionType);
|
|
1346
|
+
return { entries };
|
|
1347
|
+
}
|
|
1348
|
+
|
|
1349
|
+
// No filters — fast path
|
|
1350
|
+
if (!partitionId && !decisionType) {
|
|
1351
|
+
return { entries: debrief.getRecent(max) };
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
// Start with the most selective filter, then narrow
|
|
1355
|
+
let entries: ReturnType<typeof debrief.getByPartition>;
|
|
1356
|
+
if (partitionId && decisionType) {
|
|
1357
|
+
entries = debrief.getByPartition(partitionId).filter(
|
|
1358
|
+
(e) => e.decisionType === decisionType,
|
|
1359
|
+
);
|
|
1360
|
+
} else if (partitionId) {
|
|
1361
|
+
entries = debrief.getByPartition(partitionId);
|
|
1362
|
+
} else {
|
|
1363
|
+
entries = debrief.getByType(decisionType as Parameters<typeof debrief.getByType>[0]);
|
|
1364
|
+
}
|
|
1365
|
+
|
|
1366
|
+
entries.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
|
|
1367
|
+
return { entries: entries.slice(0, max) };
|
|
1368
|
+
});
|
|
1369
|
+
|
|
1370
|
+
// Pin/unpin an operation for quick-access
|
|
1371
|
+
// Static route registered before parameterized :id routes to avoid shadowing
|
|
1372
|
+
app.get("/api/operations/pinned", { preHandler: [requirePermission("deployment.view")] }, async () => {
|
|
1373
|
+
const ids = debrief.getPinnedOperationIds();
|
|
1374
|
+
const operations = ids.map((id) => deployments.get(id)).filter(Boolean);
|
|
1375
|
+
return { operations, pinnedIds: ids };
|
|
1376
|
+
});
|
|
1377
|
+
|
|
1378
|
+
app.post<{ Params: { id: string } }>(
|
|
1379
|
+
"/api/operations/:id/pin",
|
|
1380
|
+
{ preHandler: [requirePermission("deployment.view")] },
|
|
1381
|
+
async (request) => {
|
|
1382
|
+
debrief.pinOperation(request.params.id);
|
|
1383
|
+
return { pinned: true };
|
|
1384
|
+
},
|
|
1385
|
+
);
|
|
1386
|
+
|
|
1387
|
+
app.delete<{ Params: { id: string } }>(
|
|
1388
|
+
"/api/operations/:id/pin",
|
|
1389
|
+
{ preHandler: [requirePermission("deployment.view")] },
|
|
1390
|
+
async (request) => {
|
|
1391
|
+
debrief.unpinOperation(request.params.id);
|
|
1392
|
+
return { pinned: false };
|
|
1393
|
+
},
|
|
1394
|
+
);
|
|
1395
|
+
|
|
1396
|
+
// ---------------------------------------------------------------------------
|
|
1397
|
+
// Progress streaming — envoy callback and SSE endpoints
|
|
1398
|
+
// ---------------------------------------------------------------------------
|
|
1399
|
+
|
|
1400
|
+
// POST /api/deployments/:id/progress — receives progress events from envoy
|
|
1401
|
+
app.post<{ Params: { id: string } }>(
|
|
1402
|
+
"/api/operations/:id/progress",
|
|
1403
|
+
async (request, reply) => {
|
|
1404
|
+
if (!progressStore) {
|
|
1405
|
+
return reply.status(501).send({ error: "Progress streaming not configured" });
|
|
1406
|
+
}
|
|
1407
|
+
|
|
1408
|
+
// Validate envoy token — this route is exempt from JWT auth
|
|
1409
|
+
if (envoyRegistry) {
|
|
1410
|
+
const authHeader = (request.headers.authorization ?? "") as string;
|
|
1411
|
+
const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : null;
|
|
1412
|
+
if (!token || !envoyRegistry.validateToken(token)) {
|
|
1413
|
+
return reply.status(401).send({ error: "Invalid or missing envoy token" });
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
const parsed = ProgressEventSchema.safeParse(request.body);
|
|
1418
|
+
if (!parsed.success) {
|
|
1419
|
+
return reply.status(400).send({ error: "Invalid progress event", details: parsed.error.format() });
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
const event = parsed.data;
|
|
1423
|
+
|
|
1424
|
+
// Validate the deploymentId in the URL matches the body
|
|
1425
|
+
if (event.deploymentId !== request.params.id) {
|
|
1426
|
+
return reply.status(400).send({ error: "Operation ID in URL does not match event body" });
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
progressStore.push(event);
|
|
1430
|
+
return reply.status(200).send({ received: true });
|
|
1431
|
+
},
|
|
1432
|
+
);
|
|
1433
|
+
|
|
1434
|
+
// GET /api/deployments/:id/stream — SSE endpoint for live progress
|
|
1435
|
+
// Auth is via ?token= query param since EventSource cannot send headers
|
|
1436
|
+
app.get<{ Params: { id: string } }>(
|
|
1437
|
+
"/api/operations/:id/stream",
|
|
1438
|
+
{ preHandler: [requirePermission("deployment.view")] },
|
|
1439
|
+
(request, reply) => {
|
|
1440
|
+
if (!progressStore) {
|
|
1441
|
+
reply.status(501).send({ error: "Progress streaming not configured" });
|
|
1442
|
+
return;
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
// Hijack the connection so Fastify does not finalize the response
|
|
1446
|
+
reply.hijack();
|
|
1447
|
+
|
|
1448
|
+
// Set SSE headers
|
|
1449
|
+
reply.raw.writeHead(200, {
|
|
1450
|
+
"Content-Type": "text/event-stream",
|
|
1451
|
+
"Cache-Control": "no-cache",
|
|
1452
|
+
"Connection": "keep-alive",
|
|
1453
|
+
"X-Accel-Buffering": "no",
|
|
1454
|
+
});
|
|
1455
|
+
|
|
1456
|
+
const deploymentId = request.params.id;
|
|
1457
|
+
|
|
1458
|
+
// Check for Last-Event-ID header (reconnection with replay)
|
|
1459
|
+
const lastEventIdHeader = request.headers["last-event-id"];
|
|
1460
|
+
const lastEventId = lastEventIdHeader ? parseInt(String(lastEventIdHeader), 10) : 0;
|
|
1461
|
+
|
|
1462
|
+
// Send catch-up events — either all (fresh connect) or since last ID (reconnect)
|
|
1463
|
+
const existing = lastEventId
|
|
1464
|
+
? progressStore.getEventsSince(deploymentId, lastEventId)
|
|
1465
|
+
: progressStore.getEvents(deploymentId);
|
|
1466
|
+
for (const event of existing) {
|
|
1467
|
+
reply.raw.write(`id: ${event.id}\ndata: ${JSON.stringify(event)}\n\n`);
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1470
|
+
// Check if deployment already completed — if so, close after catch-up
|
|
1471
|
+
const lastEvent = existing[existing.length - 1];
|
|
1472
|
+
if (lastEvent?.type === "deployment-completed") {
|
|
1473
|
+
reply.raw.end();
|
|
1474
|
+
return;
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
// Subscribe to new events
|
|
1478
|
+
const listener = (event: { id?: number; deploymentId: string; type: string }) => {
|
|
1479
|
+
try {
|
|
1480
|
+
reply.raw.write(`id: ${event.id}\ndata: ${JSON.stringify(event)}\n\n`);
|
|
1481
|
+
|
|
1482
|
+
// Close the stream when deployment completes
|
|
1483
|
+
if (event.type === "deployment-completed") {
|
|
1484
|
+
reply.raw.end();
|
|
1485
|
+
}
|
|
1486
|
+
} catch {
|
|
1487
|
+
// Client disconnected — clean up
|
|
1488
|
+
progressStore!.removeListener(deploymentId, listener);
|
|
1489
|
+
}
|
|
1490
|
+
};
|
|
1491
|
+
|
|
1492
|
+
progressStore.addListener(deploymentId, listener);
|
|
1493
|
+
|
|
1494
|
+
// Clean up on client disconnect
|
|
1495
|
+
request.raw.on("close", () => {
|
|
1496
|
+
progressStore!.removeListener(deploymentId, listener);
|
|
1497
|
+
});
|
|
1498
|
+
},
|
|
1499
|
+
);
|
|
1500
|
+
|
|
1501
|
+
// -- Health reports from envoys (trigger system) ---------------------------
|
|
1502
|
+
|
|
1503
|
+
app.post("/api/health-reports", async (request, reply) => {
|
|
1504
|
+
// Validate envoy token — same pattern as /api/envoy/report
|
|
1505
|
+
if (envoyRegistry) {
|
|
1506
|
+
const authHeader = (request.headers.authorization ?? "") as string;
|
|
1507
|
+
const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : null;
|
|
1508
|
+
if (!token || !envoyRegistry.validateToken(token)) {
|
|
1509
|
+
return reply.status(401).send({ error: "Invalid or missing envoy token" });
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
const { HealthReportSchema } = await import("@synth-deploy/core");
|
|
1514
|
+
const parsed = HealthReportSchema.safeParse(request.body);
|
|
1515
|
+
if (!parsed.success) {
|
|
1516
|
+
return reply.status(400).send({ error: "Invalid health report", details: parsed.error.format() });
|
|
1517
|
+
}
|
|
1518
|
+
|
|
1519
|
+
const report = parsed.data;
|
|
1520
|
+
|
|
1521
|
+
// Find the trigger operation
|
|
1522
|
+
const triggerOp = deployments.get(report.triggerOperationId);
|
|
1523
|
+
if (!triggerOp || triggerOp.input.type !== "trigger") {
|
|
1524
|
+
return reply.status(404).send({ error: `Trigger operation not found: ${report.triggerOperationId}` });
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
// Record the health report
|
|
1528
|
+
debrief.record({
|
|
1529
|
+
partitionId: report.partitionId ?? null,
|
|
1530
|
+
operationId: triggerOp.id,
|
|
1531
|
+
agent: "envoy",
|
|
1532
|
+
decisionType: "health-report-received",
|
|
1533
|
+
decision: `Health report: ${report.summary}`,
|
|
1534
|
+
reasoning: `Trigger condition met on ${report.envoyId}. Probes: ${report.probeResults.map(p => `${p.label}=${p.parsedValue ?? p.output}`).join(", ")}`,
|
|
1535
|
+
context: { directiveId: report.directiveId, envoyId: report.envoyId, probeResults: report.probeResults },
|
|
1536
|
+
});
|
|
1537
|
+
|
|
1538
|
+
// Deduplication: check for active child operations from this trigger
|
|
1539
|
+
const allOps = deployments.list();
|
|
1540
|
+
const activeChild = allOps.find(
|
|
1541
|
+
(op) => op.lineage === triggerOp.id &&
|
|
1542
|
+
["pending", "planning", "awaiting_approval", "approved", "running"].includes(op.status),
|
|
1543
|
+
);
|
|
1544
|
+
|
|
1545
|
+
if (activeChild) {
|
|
1546
|
+
// Suppress — record that we suppressed
|
|
1547
|
+
triggerOp.triggerSuppressedCount = (triggerOp.triggerSuppressedCount ?? 0) + 1;
|
|
1548
|
+
deployments.save(triggerOp);
|
|
1549
|
+
|
|
1550
|
+
debrief.record({
|
|
1551
|
+
partitionId: report.partitionId ?? null,
|
|
1552
|
+
operationId: triggerOp.id,
|
|
1553
|
+
agent: "server",
|
|
1554
|
+
decisionType: "trigger-suppressed",
|
|
1555
|
+
decision: `Trigger suppressed — child operation ${activeChild.id} is still in progress (${activeChild.status})`,
|
|
1556
|
+
reasoning: `Deduplication: an operation spawned by this trigger is already active. Suppressed ${triggerOp.triggerSuppressedCount} time(s) total.`,
|
|
1557
|
+
context: { activeChildId: activeChild.id, activeChildStatus: activeChild.status, suppressedCount: triggerOp.triggerSuppressedCount },
|
|
1558
|
+
});
|
|
1559
|
+
|
|
1560
|
+
return reply.status(200).send({ spawned: false, reason: "deduplicated", activeChildId: activeChild.id });
|
|
1561
|
+
}
|
|
1562
|
+
|
|
1563
|
+
// Spawn child operation
|
|
1564
|
+
const triggerInput = triggerOp.input as { type: "trigger"; condition: string; responseIntent: string; parameters?: Record<string, unknown> };
|
|
1565
|
+
const responseType = triggerOp.monitoringDirective?.responseType ?? "maintain";
|
|
1566
|
+
const childOp = {
|
|
1567
|
+
id: crypto.randomUUID(),
|
|
1568
|
+
input: responseType === "deploy"
|
|
1569
|
+
? { type: "deploy" as const, artifactId: "" }
|
|
1570
|
+
: { type: "maintain" as const, intent: triggerInput.responseIntent, parameters: triggerInput.parameters },
|
|
1571
|
+
intent: triggerInput.responseIntent,
|
|
1572
|
+
lineage: triggerOp.id,
|
|
1573
|
+
triggeredBy: "trigger" as const,
|
|
1574
|
+
environmentId: report.environmentId ?? triggerOp.environmentId,
|
|
1575
|
+
partitionId: report.partitionId ?? triggerOp.partitionId,
|
|
1576
|
+
envoyId: report.envoyId,
|
|
1577
|
+
version: "",
|
|
1578
|
+
status: "pending" as const,
|
|
1579
|
+
variables: triggerOp.variables,
|
|
1580
|
+
debriefEntryIds: [] as string[],
|
|
1581
|
+
createdAt: new Date(),
|
|
1582
|
+
};
|
|
1583
|
+
|
|
1584
|
+
deployments.save(childOp);
|
|
1585
|
+
|
|
1586
|
+
// Update trigger stats
|
|
1587
|
+
triggerOp.triggerFireCount = (triggerOp.triggerFireCount ?? 0) + 1;
|
|
1588
|
+
triggerOp.triggerLastFiredAt = new Date();
|
|
1589
|
+
deployments.save(triggerOp);
|
|
1590
|
+
|
|
1591
|
+
debrief.record({
|
|
1592
|
+
partitionId: childOp.partitionId ?? null,
|
|
1593
|
+
operationId: childOp.id,
|
|
1594
|
+
agent: "server",
|
|
1595
|
+
decisionType: "trigger-fired",
|
|
1596
|
+
decision: `Trigger fired — spawned child operation ${childOp.id}`,
|
|
1597
|
+
reasoning: `Condition "${triggerInput.condition}" met. Response: "${triggerInput.responseIntent}". Fire count: ${triggerOp.triggerFireCount}.`,
|
|
1598
|
+
context: { triggerId: triggerOp.id, envoyId: report.envoyId, fireCount: triggerOp.triggerFireCount },
|
|
1599
|
+
});
|
|
1600
|
+
telemetry.record({ actor: "agent", action: "trigger.fired" as TelemetryAction, target: { type: "trigger", id: triggerOp.id }, details: { childOperationId: childOp.id } });
|
|
1601
|
+
|
|
1602
|
+
// Dispatch planning for the child operation (same as new operation flow)
|
|
1603
|
+
if (envoyRegistry) {
|
|
1604
|
+
const childEnvoy = report.envoyId
|
|
1605
|
+
? envoyRegistry.get(report.envoyId)
|
|
1606
|
+
: envoyRegistry.list()[0];
|
|
1607
|
+
|
|
1608
|
+
if (childEnvoy) {
|
|
1609
|
+
const planningClient = new EnvoyClient(childEnvoy.url);
|
|
1610
|
+
const environment = childOp.environmentId ? environments.get(childOp.environmentId) : undefined;
|
|
1611
|
+
const environmentForPlanning = environment
|
|
1612
|
+
? { id: environment.id, name: environment.name, variables: environment.variables }
|
|
1613
|
+
: { id: `direct:${childEnvoy.id}`, name: childEnvoy.name, variables: {} };
|
|
1614
|
+
|
|
1615
|
+
planningClient.requestPlan({
|
|
1616
|
+
operationId: childOp.id,
|
|
1617
|
+
operationType: responseType as "deploy" | "query" | "investigate" | "maintain" | "trigger",
|
|
1618
|
+
intent: childOp.intent,
|
|
1619
|
+
environment: environmentForPlanning,
|
|
1620
|
+
version: "",
|
|
1621
|
+
resolvedVariables: childOp.variables,
|
|
1622
|
+
}).then((result) => {
|
|
1623
|
+
const dep = deployments.get(childOp.id);
|
|
1624
|
+
if (!dep || dep.status !== "pending") return;
|
|
1625
|
+
|
|
1626
|
+
dep.plan = result.plan;
|
|
1627
|
+
dep.rollbackPlan = result.rollbackPlan;
|
|
1628
|
+
dep.envoyId = childEnvoy.id;
|
|
1629
|
+
|
|
1630
|
+
if (result.blocked) {
|
|
1631
|
+
dep.status = "failed" as typeof dep.status;
|
|
1632
|
+
dep.failureReason = result.blockReason ?? "Plan blocked";
|
|
1633
|
+
deployments.save(dep);
|
|
1634
|
+
} else {
|
|
1635
|
+
dep.status = "awaiting_approval" as typeof dep.status;
|
|
1636
|
+
dep.recommendation = computeRecommendation(dep, deployments, result.assessmentSummary);
|
|
1637
|
+
deployments.save(dep);
|
|
1638
|
+
}
|
|
1639
|
+
}).catch((err) => {
|
|
1640
|
+
const dep = deployments.get(childOp.id);
|
|
1641
|
+
if (!dep || dep.status !== "pending") return;
|
|
1642
|
+
dep.status = "failed" as typeof dep.status;
|
|
1643
|
+
dep.failureReason = err instanceof Error ? err.message : "Planning failed";
|
|
1644
|
+
deployments.save(dep);
|
|
1645
|
+
});
|
|
1646
|
+
}
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
return reply.status(201).send({ spawned: true, childOperationId: childOp.id });
|
|
1650
|
+
});
|
|
1651
|
+
|
|
1652
|
+
// -- Trigger management (pause/resume/disable) ----------------------------
|
|
1653
|
+
|
|
1654
|
+
app.post<{ Params: { id: string } }>(
|
|
1655
|
+
"/api/operations/:id/trigger/pause",
|
|
1656
|
+
{ preHandler: [requirePermission("deployment.approve")] },
|
|
1657
|
+
async (request, reply) => {
|
|
1658
|
+
const op = deployments.get(request.params.id);
|
|
1659
|
+
if (!op || op.input.type !== "trigger") {
|
|
1660
|
+
return reply.status(404).send({ error: "Trigger operation not found" });
|
|
1661
|
+
}
|
|
1662
|
+
if (op.triggerStatus !== "active") {
|
|
1663
|
+
return reply.status(409).send({ error: `Cannot pause trigger in "${op.triggerStatus}" status` });
|
|
1664
|
+
}
|
|
1665
|
+
|
|
1666
|
+
// Pause on envoy
|
|
1667
|
+
if (op.envoyId && envoyRegistry) {
|
|
1668
|
+
const envoy = envoyRegistry.get(op.envoyId);
|
|
1669
|
+
if (envoy) {
|
|
1670
|
+
const client = new EnvoyClient(envoy.url);
|
|
1671
|
+
await client.pauseMonitoringDirective(op.id);
|
|
1672
|
+
}
|
|
1673
|
+
}
|
|
1674
|
+
|
|
1675
|
+
op.triggerStatus = "paused";
|
|
1676
|
+
if (op.monitoringDirective) op.monitoringDirective.status = "paused";
|
|
1677
|
+
deployments.save(op);
|
|
1678
|
+
|
|
1679
|
+
const actor = (request.user?.email) ?? "anonymous";
|
|
1680
|
+
debrief.record({
|
|
1681
|
+
partitionId: op.partitionId ?? null,
|
|
1682
|
+
operationId: op.id,
|
|
1683
|
+
agent: "server",
|
|
1684
|
+
decisionType: "trigger-paused",
|
|
1685
|
+
decision: `Trigger paused by ${actor}`,
|
|
1686
|
+
reasoning: "User requested trigger pause",
|
|
1687
|
+
context: {},
|
|
1688
|
+
actor: request.user?.email,
|
|
1689
|
+
});
|
|
1690
|
+
telemetry.record({ actor, action: "trigger.paused" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
|
|
1691
|
+
|
|
1692
|
+
return { operation: op, paused: true };
|
|
1693
|
+
},
|
|
1694
|
+
);
|
|
1695
|
+
|
|
1696
|
+
app.post<{ Params: { id: string } }>(
|
|
1697
|
+
"/api/operations/:id/trigger/resume",
|
|
1698
|
+
{ preHandler: [requirePermission("deployment.approve")] },
|
|
1699
|
+
async (request, reply) => {
|
|
1700
|
+
const op = deployments.get(request.params.id);
|
|
1701
|
+
if (!op || op.input.type !== "trigger") {
|
|
1702
|
+
return reply.status(404).send({ error: "Trigger operation not found" });
|
|
1703
|
+
}
|
|
1704
|
+
if (op.triggerStatus !== "paused") {
|
|
1705
|
+
return reply.status(409).send({ error: `Cannot resume trigger in "${op.triggerStatus}" status` });
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1708
|
+
// Resume on envoy
|
|
1709
|
+
if (op.envoyId && envoyRegistry) {
|
|
1710
|
+
const envoy = envoyRegistry.get(op.envoyId);
|
|
1711
|
+
if (envoy) {
|
|
1712
|
+
const client = new EnvoyClient(envoy.url);
|
|
1713
|
+
await client.resumeMonitoringDirective(op.id);
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
|
|
1717
|
+
op.triggerStatus = "active";
|
|
1718
|
+
if (op.monitoringDirective) op.monitoringDirective.status = "active";
|
|
1719
|
+
deployments.save(op);
|
|
1720
|
+
|
|
1721
|
+
const actor = (request.user?.email) ?? "anonymous";
|
|
1722
|
+
debrief.record({
|
|
1723
|
+
partitionId: op.partitionId ?? null,
|
|
1724
|
+
operationId: op.id,
|
|
1725
|
+
agent: "server",
|
|
1726
|
+
decisionType: "trigger-resumed",
|
|
1727
|
+
decision: `Trigger resumed by ${actor}`,
|
|
1728
|
+
reasoning: "User requested trigger resume",
|
|
1729
|
+
context: {},
|
|
1730
|
+
actor: request.user?.email,
|
|
1731
|
+
});
|
|
1732
|
+
telemetry.record({ actor, action: "trigger.resumed" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
|
|
1733
|
+
|
|
1734
|
+
return { operation: op, resumed: true };
|
|
1735
|
+
},
|
|
1736
|
+
);
|
|
1737
|
+
|
|
1738
|
+
app.post<{ Params: { id: string } }>(
|
|
1739
|
+
"/api/operations/:id/trigger/disable",
|
|
1740
|
+
{ preHandler: [requirePermission("deployment.approve")] },
|
|
1741
|
+
async (request, reply) => {
|
|
1742
|
+
const op = deployments.get(request.params.id);
|
|
1743
|
+
if (!op || op.input.type !== "trigger") {
|
|
1744
|
+
return reply.status(404).send({ error: "Trigger operation not found" });
|
|
1745
|
+
}
|
|
1746
|
+
|
|
1747
|
+
// Remove from envoy
|
|
1748
|
+
if (op.envoyId && envoyRegistry) {
|
|
1749
|
+
const envoy = envoyRegistry.get(op.envoyId);
|
|
1750
|
+
if (envoy) {
|
|
1751
|
+
const client = new EnvoyClient(envoy.url);
|
|
1752
|
+
await client.removeMonitoringDirective(op.id).catch(() => {});
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
|
|
1756
|
+
op.triggerStatus = "disabled";
|
|
1757
|
+
if (op.monitoringDirective) op.monitoringDirective.status = "disabled";
|
|
1758
|
+
deployments.save(op);
|
|
1759
|
+
|
|
1760
|
+
const actor = (request.user?.email) ?? "anonymous";
|
|
1761
|
+
debrief.record({
|
|
1762
|
+
partitionId: op.partitionId ?? null,
|
|
1763
|
+
operationId: op.id,
|
|
1764
|
+
agent: "server",
|
|
1765
|
+
decisionType: "trigger-disabled",
|
|
1766
|
+
decision: `Trigger disabled by ${actor}`,
|
|
1767
|
+
reasoning: "User requested trigger disable",
|
|
1768
|
+
context: {},
|
|
1769
|
+
actor: request.user?.email,
|
|
1770
|
+
});
|
|
1771
|
+
telemetry.record({ actor, action: "trigger.disabled" as TelemetryAction, target: { type: "trigger", id: op.id }, details: {} });
|
|
1772
|
+
|
|
1773
|
+
return { operation: op, disabled: true };
|
|
1774
|
+
},
|
|
1775
|
+
);
|
|
1776
|
+
|
|
1777
|
+
// ---------------------------------------------------------------------------
|
|
1778
|
+
// Composite operation helpers — defined inside registerOperationRoutes so
|
|
1779
|
+
// they close over the stores and registry.
|
|
1780
|
+
// ---------------------------------------------------------------------------
|
|
1781
|
+
|
|
1782
|
+
async function planCompositeChildren(
|
|
1783
|
+
parentOp: import("@synth-deploy/core").Operation,
|
|
1784
|
+
_registry: EnvoyRegistry,
|
|
1785
|
+
planningEnvoy: { id: string; name: string; url: string },
|
|
1786
|
+
): Promise<void> {
|
|
1787
|
+
const compositeInput = parentOp.input as { type: "composite"; operations: import("@synth-deploy/core").OperationInput[] };
|
|
1788
|
+
const childInputs = compositeInput.operations;
|
|
1789
|
+
|
|
1790
|
+
if (childInputs.length === 0) {
|
|
1791
|
+
const dep = deployments.get(parentOp.id);
|
|
1792
|
+
if (dep) {
|
|
1793
|
+
dep.status = "failed" as typeof dep.status;
|
|
1794
|
+
dep.failureReason = "Composite operation has no child operations";
|
|
1795
|
+
deployments.save(dep);
|
|
1796
|
+
}
|
|
1797
|
+
return;
|
|
1798
|
+
}
|
|
1799
|
+
|
|
1800
|
+
const childIds: string[] = [];
|
|
1801
|
+
const environment = parentOp.environmentId ? environments.get(parentOp.environmentId) : undefined;
|
|
1802
|
+
const partition = parentOp.partitionId ? partitions.get(parentOp.partitionId) : undefined;
|
|
1803
|
+
|
|
1804
|
+
for (let seqIdx = 0; seqIdx < childInputs.length; seqIdx++) {
|
|
1805
|
+
const childInput = childInputs[seqIdx];
|
|
1806
|
+
const childOp = {
|
|
1807
|
+
id: crypto.randomUUID(),
|
|
1808
|
+
input: childInput,
|
|
1809
|
+
intent: "intent" in childInput ? (childInput as { intent: string }).intent
|
|
1810
|
+
: childInput.type === "trigger" ? `Monitor: ${(childInput as { condition: string }).condition}`
|
|
1811
|
+
: undefined,
|
|
1812
|
+
lineage: parentOp.id,
|
|
1813
|
+
triggeredBy: "agent" as const,
|
|
1814
|
+
environmentId: parentOp.environmentId,
|
|
1815
|
+
partitionId: parentOp.partitionId,
|
|
1816
|
+
envoyId: planningEnvoy.id,
|
|
1817
|
+
version: parentOp.version ?? "",
|
|
1818
|
+
status: "pending" as const,
|
|
1819
|
+
variables: parentOp.variables,
|
|
1820
|
+
debriefEntryIds: [] as string[],
|
|
1821
|
+
createdAt: new Date(),
|
|
1822
|
+
sequenceIndex: seqIdx,
|
|
1823
|
+
};
|
|
1824
|
+
deployments.save(childOp);
|
|
1825
|
+
childIds.push(childOp.id);
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
debrief.record({
|
|
1829
|
+
partitionId: parentOp.partitionId ?? null,
|
|
1830
|
+
operationId: parentOp.id,
|
|
1831
|
+
agent: "server",
|
|
1832
|
+
decisionType: "composite-started",
|
|
1833
|
+
decision: `Composite operation started — planning ${childIds.length} child operation(s) sequentially`,
|
|
1834
|
+
reasoning: `Sequential composite: ${childInputs.map((c) => c.type).join(" → ")}`,
|
|
1835
|
+
context: { childIds, childCount: childIds.length, sequence: childInputs.map((c) => c.type) },
|
|
1836
|
+
});
|
|
1837
|
+
|
|
1838
|
+
const environmentForPlanning = environment
|
|
1839
|
+
? { id: environment.id, name: environment.name, variables: environment.variables }
|
|
1840
|
+
: { id: `direct:${planningEnvoy.id}`, name: planningEnvoy.name, variables: {} };
|
|
1841
|
+
|
|
1842
|
+
let anyFailed = false;
|
|
1843
|
+
|
|
1844
|
+
for (const childId of childIds) {
|
|
1845
|
+
const child = deployments.get(childId);
|
|
1846
|
+
if (!child) continue;
|
|
1847
|
+
const childInput = child.input;
|
|
1848
|
+
|
|
1849
|
+
const childArtifact = childInput.type === "deploy"
|
|
1850
|
+
? artifactStore.get((childInput as { artifactId: string }).artifactId)
|
|
1851
|
+
: undefined;
|
|
1852
|
+
|
|
1853
|
+
const planningClient = new EnvoyClient(planningEnvoy.url);
|
|
1854
|
+
|
|
1855
|
+
try {
|
|
1856
|
+
const result = await planningClient.requestPlan({
|
|
1857
|
+
operationId: childId,
|
|
1858
|
+
operationType: childInput.type as "deploy" | "query" | "investigate" | "maintain" | "trigger",
|
|
1859
|
+
intent: "intent" in childInput ? (childInput as { intent?: string }).intent
|
|
1860
|
+
: childInput.type === "trigger" ? `Monitor: ${(childInput as { condition: string }).condition}`
|
|
1861
|
+
: undefined,
|
|
1862
|
+
...(childArtifact ? {
|
|
1863
|
+
artifact: {
|
|
1864
|
+
id: childArtifact.id,
|
|
1865
|
+
name: childArtifact.name,
|
|
1866
|
+
type: childArtifact.type,
|
|
1867
|
+
analysis: childArtifact.analysis,
|
|
1868
|
+
},
|
|
1869
|
+
} : {}),
|
|
1870
|
+
...(childInput.type === "investigate" && "allowWrite" in childInput
|
|
1871
|
+
? { allowWrite: (childInput as { allowWrite?: boolean }).allowWrite }
|
|
1872
|
+
: {}),
|
|
1873
|
+
environment: environmentForPlanning,
|
|
1874
|
+
partition: partition ? { id: partition.id, name: partition.name, variables: partition.variables } : undefined,
|
|
1875
|
+
version: parentOp.version ?? "",
|
|
1876
|
+
resolvedVariables: parentOp.variables,
|
|
1877
|
+
});
|
|
1878
|
+
|
|
1879
|
+
const childDep = deployments.get(childId);
|
|
1880
|
+
if (!childDep) continue;
|
|
1881
|
+
|
|
1882
|
+
if (result.blocked) {
|
|
1883
|
+
childDep.status = "failed" as typeof childDep.status;
|
|
1884
|
+
childDep.failureReason = result.blockReason ?? "Plan blocked";
|
|
1885
|
+
deployments.save(childDep);
|
|
1886
|
+
anyFailed = true;
|
|
1887
|
+
|
|
1888
|
+
const parentDep = deployments.get(parentOp.id);
|
|
1889
|
+
if (parentDep && parentDep.status === "pending") {
|
|
1890
|
+
parentDep.status = "failed" as typeof parentDep.status;
|
|
1891
|
+
parentDep.failureReason = `Child operation (${childInput.type}) plan blocked: ${childDep.failureReason}`;
|
|
1892
|
+
deployments.save(parentDep);
|
|
1893
|
+
debrief.record({
|
|
1894
|
+
partitionId: parentDep.partitionId ?? null,
|
|
1895
|
+
operationId: parentDep.id,
|
|
1896
|
+
agent: "server",
|
|
1897
|
+
decisionType: "composite-failed",
|
|
1898
|
+
decision: `Child operation planning blocked — composite cannot proceed`,
|
|
1899
|
+
reasoning: childDep.failureReason,
|
|
1900
|
+
context: { childId, childType: childInput.type },
|
|
1901
|
+
});
|
|
1902
|
+
}
|
|
1903
|
+
break;
|
|
1904
|
+
}
|
|
1905
|
+
|
|
1906
|
+
childDep.plan = result.plan;
|
|
1907
|
+
childDep.rollbackPlan = result.rollbackPlan;
|
|
1908
|
+
childDep.envoyId = planningEnvoy.id;
|
|
1909
|
+
if (childInput.type === "query" && result.queryFindings) childDep.queryFindings = result.queryFindings;
|
|
1910
|
+
if (childInput.type === "investigate" && result.investigationFindings) childDep.investigationFindings = result.investigationFindings;
|
|
1911
|
+
childDep.status = "awaiting_approval" as typeof childDep.status;
|
|
1912
|
+
deployments.save(childDep);
|
|
1913
|
+
|
|
1914
|
+
debrief.record({
|
|
1915
|
+
partitionId: childDep.partitionId ?? null,
|
|
1916
|
+
operationId: childDep.id,
|
|
1917
|
+
agent: "envoy",
|
|
1918
|
+
decisionType: "plan-generation",
|
|
1919
|
+
decision: `Child operation plan generated with ${result.plan.steps.length} steps`,
|
|
1920
|
+
reasoning: result.plan.reasoning,
|
|
1921
|
+
context: { stepCount: result.plan.steps.length, envoyId: planningEnvoy.id, parentOperationId: parentOp.id },
|
|
1922
|
+
});
|
|
1923
|
+
} catch (err) {
|
|
1924
|
+
const childDep = deployments.get(childId);
|
|
1925
|
+
if (childDep) {
|
|
1926
|
+
childDep.status = "failed" as typeof childDep.status;
|
|
1927
|
+
childDep.failureReason = err instanceof Error ? err.message : "Planning failed";
|
|
1928
|
+
deployments.save(childDep);
|
|
1929
|
+
}
|
|
1930
|
+
anyFailed = true;
|
|
1931
|
+
|
|
1932
|
+
const parentDep = deployments.get(parentOp.id);
|
|
1933
|
+
if (parentDep && parentDep.status === "pending") {
|
|
1934
|
+
parentDep.status = "failed" as typeof parentDep.status;
|
|
1935
|
+
parentDep.failureReason = `Child operation (${childInput.type}) planning failed: ${err instanceof Error ? err.message : "unknown error"}`;
|
|
1936
|
+
deployments.save(parentDep);
|
|
1937
|
+
debrief.record({
|
|
1938
|
+
partitionId: parentDep.partitionId ?? null,
|
|
1939
|
+
operationId: parentDep.id,
|
|
1940
|
+
agent: "server",
|
|
1941
|
+
decisionType: "composite-failed",
|
|
1942
|
+
decision: `Child operation planning failed — composite cannot proceed`,
|
|
1943
|
+
reasoning: parentDep.failureReason!,
|
|
1944
|
+
context: { childId, childType: childInput.type, error: parentDep.failureReason },
|
|
1945
|
+
});
|
|
1946
|
+
}
|
|
1947
|
+
break;
|
|
1948
|
+
}
|
|
1949
|
+
}
|
|
1950
|
+
|
|
1951
|
+
if (!anyFailed) {
|
|
1952
|
+
// All children planned — build combined summary plan and await approval
|
|
1953
|
+
const allChildren = childIds.map((id) => deployments.get(id)).filter(Boolean) as import("@synth-deploy/core").Operation[];
|
|
1954
|
+
|
|
1955
|
+
const combinedSteps = allChildren.flatMap((c, idx) => {
|
|
1956
|
+
if (!c.plan) return [];
|
|
1957
|
+
return c.plan.steps.map((step) => ({
|
|
1958
|
+
...step,
|
|
1959
|
+
description: `[${idx + 1}/${allChildren.length}: ${c.input.type}] ${step.description}`,
|
|
1960
|
+
}));
|
|
1961
|
+
});
|
|
1962
|
+
|
|
1963
|
+
const combinedReasoning = allChildren.map((c, idx) =>
|
|
1964
|
+
`Step ${idx + 1} (${c.input.type}): ${c.plan?.reasoning ?? "no reasoning"}`
|
|
1965
|
+
).join("\n\n");
|
|
1966
|
+
|
|
1967
|
+
const parentDep = deployments.get(parentOp.id);
|
|
1968
|
+
if (parentDep && parentDep.status === "pending") {
|
|
1969
|
+
parentDep.plan = { steps: combinedSteps, reasoning: combinedReasoning };
|
|
1970
|
+
parentDep.rollbackPlan = { steps: [], reasoning: "Child operations handle their own rollback" };
|
|
1971
|
+
parentDep.status = "awaiting_approval" as typeof parentDep.status;
|
|
1972
|
+
parentDep.recommendation = computeRecommendation(parentDep, deployments);
|
|
1973
|
+
deployments.save(parentDep);
|
|
1974
|
+
|
|
1975
|
+
debrief.record({
|
|
1976
|
+
partitionId: parentDep.partitionId ?? null,
|
|
1977
|
+
operationId: parentDep.id,
|
|
1978
|
+
agent: "server",
|
|
1979
|
+
decisionType: "composite-plan-ready",
|
|
1980
|
+
decision: `All ${allChildren.length} child plans ready — composite awaiting approval`,
|
|
1981
|
+
reasoning: combinedReasoning,
|
|
1982
|
+
context: { childIds, totalSteps: combinedSteps.length },
|
|
1983
|
+
});
|
|
1984
|
+
}
|
|
1985
|
+
}
|
|
1986
|
+
}
|
|
1987
|
+
|
|
1988
|
+
async function executeCompositeSequentially(
|
|
1989
|
+
parentId: string,
|
|
1990
|
+
childIds: string[],
|
|
1991
|
+
): Promise<void> {
|
|
1992
|
+
const parentOp = deployments.get(parentId);
|
|
1993
|
+
if (!parentOp) return;
|
|
1994
|
+
|
|
1995
|
+
debrief.record({
|
|
1996
|
+
partitionId: parentOp.partitionId ?? null,
|
|
1997
|
+
operationId: parentOp.id,
|
|
1998
|
+
agent: "server",
|
|
1999
|
+
decisionType: "composite-started",
|
|
2000
|
+
decision: `Composite execution started — running ${childIds.length} child operations sequentially`,
|
|
2001
|
+
reasoning: `Composite operation approved — executing children in order`,
|
|
2002
|
+
context: { childIds, totalChildren: childIds.length },
|
|
2003
|
+
});
|
|
2004
|
+
|
|
2005
|
+
for (let i = 0; i < childIds.length; i++) {
|
|
2006
|
+
const childId = childIds[i];
|
|
2007
|
+
const child = deployments.get(childId);
|
|
2008
|
+
if (!child || !child.plan || !child.rollbackPlan) {
|
|
2009
|
+
const dep = deployments.get(parentId);
|
|
2010
|
+
if (dep) {
|
|
2011
|
+
dep.status = "failed" as typeof dep.status;
|
|
2012
|
+
dep.failureReason = `Child operation ${i + 1} has no plan — cannot execute`;
|
|
2013
|
+
deployments.save(dep);
|
|
2014
|
+
debrief.record({
|
|
2015
|
+
partitionId: dep.partitionId ?? null,
|
|
2016
|
+
operationId: dep.id,
|
|
2017
|
+
agent: "server",
|
|
2018
|
+
decisionType: "composite-failed",
|
|
2019
|
+
decision: `Child operation ${i + 1} missing plan — composite failed`,
|
|
2020
|
+
reasoning: dep.failureReason!,
|
|
2021
|
+
context: { childId, childIndex: i },
|
|
2022
|
+
});
|
|
2023
|
+
}
|
|
2024
|
+
return;
|
|
2025
|
+
}
|
|
2026
|
+
|
|
2027
|
+
const targetEnvoy = child.envoyId ? envoyRegistry?.get(child.envoyId) : envoyRegistry?.list()[0];
|
|
2028
|
+
if (!targetEnvoy) {
|
|
2029
|
+
const dep = deployments.get(parentId);
|
|
2030
|
+
if (dep) {
|
|
2031
|
+
dep.status = "failed" as typeof dep.status;
|
|
2032
|
+
dep.failureReason = `No envoy available for child operation ${i + 1}`;
|
|
2033
|
+
deployments.save(dep);
|
|
2034
|
+
}
|
|
2035
|
+
return;
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
child.status = "running" as typeof child.status;
|
|
2039
|
+
deployments.save(child);
|
|
2040
|
+
|
|
2041
|
+
debrief.record({
|
|
2042
|
+
partitionId: child.partitionId ?? null,
|
|
2043
|
+
operationId: child.id,
|
|
2044
|
+
agent: "server",
|
|
2045
|
+
decisionType: "composite-child-started",
|
|
2046
|
+
decision: `Executing child operation ${i + 1}/${childIds.length} (${child.input.type})`,
|
|
2047
|
+
reasoning: `Sequential composite execution — child ${i + 1} of ${childIds.length}`,
|
|
2048
|
+
context: { childId, childIndex: i, parentOperationId: parentId, childType: child.input.type },
|
|
2049
|
+
});
|
|
2050
|
+
|
|
2051
|
+
const artifact = artifactStore.get(getArtifactId(child) ?? "");
|
|
2052
|
+
const serverPort = process.env.PORT ?? "9410";
|
|
2053
|
+
const serverUrl = process.env.SYNTH_SERVER_URL ?? `http://localhost:${serverPort}`;
|
|
2054
|
+
const progressCallbackUrl = `${serverUrl}/api/operations/${child.id}/progress`;
|
|
2055
|
+
const callbackToken = envoyRegistry?.list().find((r) => r.url === (targetEnvoy as { url: string }).url)?.token;
|
|
2056
|
+
|
|
2057
|
+
const childEnvoyClient = new EnvoyClient((targetEnvoy as { url: string }).url);
|
|
2058
|
+
|
|
2059
|
+
try {
|
|
2060
|
+
await childEnvoyClient.executeApprovedPlan({
|
|
2061
|
+
operationId: child.id,
|
|
2062
|
+
plan: child.plan,
|
|
2063
|
+
rollbackPlan: child.rollbackPlan,
|
|
2064
|
+
artifactType: artifact?.type ?? "unknown",
|
|
2065
|
+
artifactName: artifact?.name ?? "unknown",
|
|
2066
|
+
environmentId: child.environmentId ?? "",
|
|
2067
|
+
progressCallbackUrl,
|
|
2068
|
+
callbackToken,
|
|
2069
|
+
});
|
|
2070
|
+
} catch (err) {
|
|
2071
|
+
const dep = deployments.get(parentId);
|
|
2072
|
+
if (dep) {
|
|
2073
|
+
dep.status = "failed" as typeof dep.status;
|
|
2074
|
+
dep.failureReason = `Child operation ${i + 1} (${child.input.type}) execution dispatch failed: ${err instanceof Error ? err.message : "unknown error"}`;
|
|
2075
|
+
dep.completedAt = new Date();
|
|
2076
|
+
deployments.save(dep);
|
|
2077
|
+
debrief.record({
|
|
2078
|
+
partitionId: dep.partitionId ?? null,
|
|
2079
|
+
operationId: dep.id,
|
|
2080
|
+
agent: "server",
|
|
2081
|
+
decisionType: "composite-failed",
|
|
2082
|
+
decision: `Child operation ${i + 1} execution dispatch failed`,
|
|
2083
|
+
reasoning: dep.failureReason!,
|
|
2084
|
+
context: { childId, childIndex: i, error: dep.failureReason },
|
|
2085
|
+
});
|
|
2086
|
+
}
|
|
2087
|
+
return;
|
|
2088
|
+
}
|
|
2089
|
+
|
|
2090
|
+
// Wait for child to complete (poll every 2 seconds, 5-minute timeout)
|
|
2091
|
+
const timeoutMs = 300_000;
|
|
2092
|
+
const pollIntervalMs = 2_000;
|
|
2093
|
+
const start = Date.now();
|
|
2094
|
+
let childSucceeded = false;
|
|
2095
|
+
|
|
2096
|
+
while (Date.now() - start < timeoutMs) {
|
|
2097
|
+
await new Promise<void>((resolve) => setTimeout(resolve, pollIntervalMs));
|
|
2098
|
+
const updated = deployments.get(childId);
|
|
2099
|
+
if (updated?.status === "succeeded") {
|
|
2100
|
+
childSucceeded = true;
|
|
2101
|
+
break;
|
|
2102
|
+
}
|
|
2103
|
+
if (updated?.status === "failed" || updated?.status === "rolled_back" || updated?.status === "cancelled") {
|
|
2104
|
+
break;
|
|
2105
|
+
}
|
|
2106
|
+
// Stop if the parent was externally cancelled or failed while we were waiting
|
|
2107
|
+
const parentNow = deployments.get(parentId);
|
|
2108
|
+
if (!parentNow || parentNow.status === "failed" || parentNow.status === "cancelled") {
|
|
2109
|
+
return;
|
|
2110
|
+
}
|
|
2111
|
+
}
|
|
2112
|
+
|
|
2113
|
+
const finalChild = deployments.get(childId);
|
|
2114
|
+
if (!childSucceeded) {
|
|
2115
|
+
const reason = finalChild?.failureReason ?? `Child operation ${i + 1} did not complete in time`;
|
|
2116
|
+
const dep = deployments.get(parentId);
|
|
2117
|
+
if (dep) {
|
|
2118
|
+
dep.status = "failed" as typeof dep.status;
|
|
2119
|
+
dep.failureReason = `Composite stopped at step ${i + 1}/${childIds.length} (${child.input.type}): ${reason}`;
|
|
2120
|
+
dep.completedAt = new Date();
|
|
2121
|
+
deployments.save(dep);
|
|
2122
|
+
debrief.record({
|
|
2123
|
+
partitionId: dep.partitionId ?? null,
|
|
2124
|
+
operationId: dep.id,
|
|
2125
|
+
agent: "server",
|
|
2126
|
+
decisionType: "composite-failed",
|
|
2127
|
+
decision: `Composite stopped at child ${i + 1}/${childIds.length} — ${child.input.type} failed`,
|
|
2128
|
+
reasoning: dep.failureReason!,
|
|
2129
|
+
context: { childId, childIndex: i, failedChildType: child.input.type, completedChildren: i },
|
|
2130
|
+
});
|
|
2131
|
+
}
|
|
2132
|
+
return;
|
|
2133
|
+
}
|
|
2134
|
+
|
|
2135
|
+
debrief.record({
|
|
2136
|
+
partitionId: finalChild?.partitionId ?? null,
|
|
2137
|
+
operationId: childId,
|
|
2138
|
+
agent: "server",
|
|
2139
|
+
decisionType: "composite-child-completed",
|
|
2140
|
+
decision: `Child operation ${i + 1}/${childIds.length} (${child.input.type}) completed successfully`,
|
|
2141
|
+
reasoning: `Child execution succeeded — proceeding to next child`,
|
|
2142
|
+
context: { childId, childIndex: i, parentOperationId: parentId },
|
|
2143
|
+
});
|
|
2144
|
+
}
|
|
2145
|
+
|
|
2146
|
+
// All children succeeded
|
|
2147
|
+
const dep = deployments.get(parentId);
|
|
2148
|
+
if (dep) {
|
|
2149
|
+
dep.status = "succeeded" as typeof dep.status;
|
|
2150
|
+
dep.completedAt = new Date();
|
|
2151
|
+
deployments.save(dep);
|
|
2152
|
+
debrief.record({
|
|
2153
|
+
partitionId: dep.partitionId ?? null,
|
|
2154
|
+
operationId: dep.id,
|
|
2155
|
+
agent: "server",
|
|
2156
|
+
decisionType: "composite-completed",
|
|
2157
|
+
decision: `Composite operation completed — all ${childIds.length} child operations succeeded`,
|
|
2158
|
+
reasoning: `All child operations executed successfully in sequence`,
|
|
2159
|
+
context: { childIds, totalChildren: childIds.length },
|
|
2160
|
+
});
|
|
2161
|
+
}
|
|
2162
|
+
}
|
|
2163
|
+
}
|
|
2164
|
+
|
|
2165
|
+
// ---------------------------------------------------------------------------
|
|
2166
|
+
// Recommendation engine — synthesizes enrichment context into a verdict
|
|
2167
|
+
// ---------------------------------------------------------------------------
|
|
2168
|
+
|
|
2169
|
+
function computeRecommendation(
|
|
2170
|
+
deployment: import("@synth-deploy/core").Deployment,
|
|
2171
|
+
store: IDeploymentStore,
|
|
2172
|
+
llmSummary?: string,
|
|
2173
|
+
): import("@synth-deploy/core").DeploymentRecommendation {
|
|
2174
|
+
const factors: string[] = [];
|
|
2175
|
+
let verdict: RecommendationVerdict = "proceed";
|
|
2176
|
+
|
|
2177
|
+
const now = new Date();
|
|
2178
|
+
const twentyFourHoursAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000);
|
|
2179
|
+
|
|
2180
|
+
// Check for previously rolled-back version
|
|
2181
|
+
if (deployment.version) {
|
|
2182
|
+
const rolledBack = store.findByArtifactVersion(
|
|
2183
|
+
getArtifactId(deployment) ?? "",
|
|
2184
|
+
deployment.version,
|
|
2185
|
+
"rolled_back",
|
|
2186
|
+
);
|
|
2187
|
+
if (rolledBack.length > 0) {
|
|
2188
|
+
verdict = "caution";
|
|
2189
|
+
factors.push("This artifact version was previously rolled back");
|
|
2190
|
+
}
|
|
2191
|
+
}
|
|
2192
|
+
|
|
2193
|
+
// Check for conflicting deployments (only meaningful when environmentId is set)
|
|
2194
|
+
if (deployment.environmentId) {
|
|
2195
|
+
const conflicting = store.list().filter(
|
|
2196
|
+
(d) =>
|
|
2197
|
+
d.environmentId === deployment.environmentId &&
|
|
2198
|
+
d.id !== deployment.id &&
|
|
2199
|
+
((d.status) === "running" || (d.status) === "approved"),
|
|
2200
|
+
);
|
|
2201
|
+
if (conflicting.length > 0) {
|
|
2202
|
+
verdict = "hold";
|
|
2203
|
+
factors.push(`${conflicting.length} other operation(s) in progress for this environment`);
|
|
2204
|
+
}
|
|
2205
|
+
}
|
|
2206
|
+
|
|
2207
|
+
// Check deployment frequency
|
|
2208
|
+
const recentCount = deployment.environmentId
|
|
2209
|
+
? store.countByEnvironment(deployment.environmentId, twentyFourHoursAgo)
|
|
2210
|
+
: 0;
|
|
2211
|
+
if (recentCount > 5) {
|
|
2212
|
+
if (verdict === "proceed") verdict = "caution";
|
|
2213
|
+
factors.push(`High operation frequency: ${recentCount} operations in the last 24h`);
|
|
2214
|
+
}
|
|
2215
|
+
|
|
2216
|
+
// Check last deployment status
|
|
2217
|
+
const lastDeploy = deployment.environmentId
|
|
2218
|
+
? store.findLatestByEnvironment(deployment.environmentId)
|
|
2219
|
+
: undefined;
|
|
2220
|
+
if (lastDeploy && lastDeploy.id !== deployment.id) {
|
|
2221
|
+
if ((lastDeploy.status) === "failed" || (lastDeploy.status) === "rolled_back") {
|
|
2222
|
+
if (verdict === "proceed") verdict = "caution";
|
|
2223
|
+
factors.push(`Last operation to this environment ${lastDeploy.status}`);
|
|
2224
|
+
} else if ((lastDeploy.status) === "succeeded") {
|
|
2225
|
+
factors.push("Last operation to this environment succeeded");
|
|
2226
|
+
}
|
|
2227
|
+
}
|
|
2228
|
+
|
|
2229
|
+
if (factors.length === 0) {
|
|
2230
|
+
factors.push("No risk factors detected — target is stable");
|
|
2231
|
+
}
|
|
2232
|
+
|
|
2233
|
+
const summaryMap: Record<RecommendationVerdict, string> = {
|
|
2234
|
+
proceed: "Proceed — no conflicting operations, target environment is stable",
|
|
2235
|
+
caution: "Proceed with caution — review risk factors before greenlighting",
|
|
2236
|
+
hold: "Hold — resolve conflicting operations before proceeding",
|
|
2237
|
+
};
|
|
2238
|
+
|
|
2239
|
+
return { verdict, summary: llmSummary ?? summaryMap[verdict], factors };
|
|
2240
|
+
}
|