@synth-deploy/server 1.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/dist/agent/envoy-client.d.ts +65 -15
  2. package/dist/agent/envoy-client.d.ts.map +1 -1
  3. package/dist/agent/envoy-client.js +58 -8
  4. package/dist/agent/envoy-client.js.map +1 -1
  5. package/dist/agent/stale-deployment-detector.js +1 -1
  6. package/dist/agent/stale-deployment-detector.js.map +1 -1
  7. package/dist/agent/synth-agent.d.ts +7 -5
  8. package/dist/agent/synth-agent.d.ts.map +1 -1
  9. package/dist/agent/synth-agent.js +59 -50
  10. package/dist/agent/synth-agent.js.map +1 -1
  11. package/dist/alert-webhooks/alert-parsers.d.ts +21 -0
  12. package/dist/alert-webhooks/alert-parsers.d.ts.map +1 -0
  13. package/dist/alert-webhooks/alert-parsers.js +184 -0
  14. package/dist/alert-webhooks/alert-parsers.js.map +1 -0
  15. package/dist/api/agent.d.ts +0 -6
  16. package/dist/api/agent.d.ts.map +1 -1
  17. package/dist/api/agent.js +6 -459
  18. package/dist/api/agent.js.map +1 -1
  19. package/dist/api/alert-webhooks.d.ts +13 -0
  20. package/dist/api/alert-webhooks.d.ts.map +1 -0
  21. package/dist/api/alert-webhooks.js +279 -0
  22. package/dist/api/alert-webhooks.js.map +1 -0
  23. package/dist/api/envoy-reports.js +2 -2
  24. package/dist/api/envoy-reports.js.map +1 -1
  25. package/dist/api/envoys.js +1 -1
  26. package/dist/api/envoys.js.map +1 -1
  27. package/dist/api/fleet.d.ts.map +1 -1
  28. package/dist/api/fleet.js +14 -15
  29. package/dist/api/fleet.js.map +1 -1
  30. package/dist/api/graph.js +3 -3
  31. package/dist/api/graph.js.map +1 -1
  32. package/dist/api/operations.d.ts +7 -0
  33. package/dist/api/operations.d.ts.map +1 -0
  34. package/dist/api/operations.js +1900 -0
  35. package/dist/api/operations.js.map +1 -0
  36. package/dist/api/partitions.js +1 -1
  37. package/dist/api/partitions.js.map +1 -1
  38. package/dist/api/schemas.d.ts +434 -133
  39. package/dist/api/schemas.d.ts.map +1 -1
  40. package/dist/api/schemas.js +53 -25
  41. package/dist/api/schemas.js.map +1 -1
  42. package/dist/api/system.d.ts.map +1 -1
  43. package/dist/api/system.js +22 -21
  44. package/dist/api/system.js.map +1 -1
  45. package/dist/artifact-analyzer.js +2 -2
  46. package/dist/artifact-analyzer.js.map +1 -1
  47. package/dist/fleet/fleet-executor.js +3 -3
  48. package/dist/fleet/fleet-executor.js.map +1 -1
  49. package/dist/graph/graph-executor.d.ts.map +1 -1
  50. package/dist/graph/graph-executor.js +18 -4
  51. package/dist/graph/graph-executor.js.map +1 -1
  52. package/dist/index.js +89 -61
  53. package/dist/index.js.map +1 -1
  54. package/dist/mcp/resources.js +3 -3
  55. package/dist/mcp/resources.js.map +1 -1
  56. package/dist/mcp/tools.d.ts.map +1 -1
  57. package/dist/mcp/tools.js +2 -9
  58. package/dist/mcp/tools.js.map +1 -1
  59. package/dist/middleware/auth.js +1 -1
  60. package/dist/middleware/auth.js.map +1 -1
  61. package/package.json +1 -1
  62. package/src/agent/envoy-client.ts +111 -19
  63. package/src/agent/stale-deployment-detector.ts +1 -1
  64. package/src/agent/synth-agent.ts +76 -56
  65. package/src/alert-webhooks/alert-parsers.ts +291 -0
  66. package/src/api/agent.ts +9 -528
  67. package/src/api/alert-webhooks.ts +354 -0
  68. package/src/api/envoy-reports.ts +2 -2
  69. package/src/api/envoys.ts +1 -1
  70. package/src/api/fleet.ts +14 -15
  71. package/src/api/graph.ts +3 -3
  72. package/src/api/operations.ts +2260 -0
  73. package/src/api/partitions.ts +1 -1
  74. package/src/api/schemas.ts +59 -27
  75. package/src/api/system.ts +23 -21
  76. package/src/artifact-analyzer.ts +2 -2
  77. package/src/fleet/fleet-executor.ts +3 -3
  78. package/src/graph/graph-executor.ts +18 -4
  79. package/src/index.ts +91 -61
  80. package/src/mcp/resources.ts +3 -3
  81. package/src/mcp/tools.ts +5 -9
  82. package/src/middleware/auth.ts +1 -1
  83. package/tests/agent-mode.test.ts +5 -376
  84. package/tests/api-handlers.test.ts +27 -27
  85. package/tests/composite-operations.test.ts +557 -0
  86. package/tests/decision-diary.test.ts +62 -63
  87. package/tests/diary-reader.test.ts +14 -18
  88. package/tests/mcp-tools.test.ts +1 -1
  89. package/tests/orchestration.test.ts +34 -30
  90. package/tests/partition-isolation.test.ts +4 -9
  91. package/tests/rbac-enforcement.test.ts +8 -8
  92. package/tests/ui-journey.test.ts +9 -9
  93. package/dist/api/deployments.d.ts +0 -11
  94. package/dist/api/deployments.d.ts.map +0 -1
  95. package/dist/api/deployments.js +0 -1098
  96. package/dist/api/deployments.js.map +0 -1
  97. package/src/api/deployments.ts +0 -1347
package/src/index.ts CHANGED
@@ -9,13 +9,13 @@ import fastifyStatic from "@fastify/static";
9
9
  import fastifyFormBody from "@fastify/formbody";
10
10
  import fastifyMultipart from "@fastify/multipart";
11
11
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
12
- import { PersistentDecisionDebrief, openEntityDatabase, PersistentPartitionStore, PersistentEnvironmentStore, PersistentSettingsStore, PersistentDeploymentStore, PersistentArtifactStore, PersistentSecurityBoundaryStore, PersistentTelemetryStore, PersistentUserStore, PersistentRoleStore, PersistentUserRoleStore, PersistentSessionStore, PersistentIdpProviderStore, PersistentRoleMappingStore, PersistentApiKeyStore, PersistentEnvoyRegistryStore, PersistentRegistryPollerVersionStore, LlmClient, buildLlmConfigFromSettings, initEdition, EditionError } from "@synth-deploy/core";
12
+ import { PersistentDecisionDebrief, openEntityDatabase, PersistentPartitionStore, PersistentEnvironmentStore, PersistentSettingsStore, PersistentDeploymentStore, PersistentArtifactStore, PersistentSecurityBoundaryStore, PersistentTelemetryStore, PersistentUserStore, PersistentRoleStore, PersistentUserRoleStore, PersistentSessionStore, PersistentIdpProviderStore, PersistentRoleMappingStore, PersistentApiKeyStore, PersistentEnvoyRegistryStore, PersistentRegistryPollerVersionStore, PersistentAlertWebhookStore, LlmClient, buildLlmConfigFromSettings, initEdition, EditionError } from "@synth-deploy/core";
13
13
  import type { Deployment, Artifact, ArtifactVersion, SecurityBoundary, Permission, RoleId } from "@synth-deploy/core";
14
14
  import { SynthAgent } from "./agent/synth-agent.js";
15
15
  import { EnvoyHealthChecker } from "./agent/health-checker.js";
16
16
  import { McpClientManager } from "./agent/mcp-client-manager.js";
17
17
  import { createMcpServer } from "./mcp/server.js";
18
- import { registerDeploymentRoutes } from "./api/deployments.js";
18
+ import { registerOperationRoutes } from "./api/operations.js";
19
19
  import { registerHealthRoutes } from "./api/health.js";
20
20
  import { registerEnvoyReportRoutes } from "./api/envoy-reports.js";
21
21
  import { registerArtifactRoutes } from "./api/artifacts.js";
@@ -41,6 +41,7 @@ import { registerFleetRoutes } from "./api/fleet.js";
41
41
  import { FleetDeploymentStore, FleetExecutor } from "./fleet/index.js";
42
42
  import { IntakeChannelStore, IntakeEventStore, IntakeProcessor, RegistryPoller } from "./intake/index.js";
43
43
  import { registerIntakeRoutes } from "./api/intake.js";
44
+ import { registerAlertWebhookRoutes } from "./api/alert-webhooks.js";
44
45
  import { ArtifactAnalyzer } from "./artifact-analyzer.js";
45
46
  import { DeploymentGraphStore, GraphInferenceEngine } from "./graph/index.js";
46
47
  import { registerGraphRoutes } from "./api/graph.js";
@@ -291,17 +292,27 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
291
292
  // --- Deployments (mix of statuses and ages) ---
292
293
 
293
294
  const dep1: Deployment = {
294
- id: crypto.randomUUID() as Deployment["id"], artifactId: webAppArtifact.id as Deployment["artifactId"], partitionId: acmePartition.id as Deployment["partitionId"],
295
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: webAppArtifact.id }, partitionId: acmePartition.id as Deployment["partitionId"],
295
296
  environmentId: prodEnv.id as Deployment["environmentId"], version: "2.3.0", status: "succeeded",
296
297
  variables: { ...acmePartition.variables, ...prodEnv.variables },
297
298
  plan: {
298
- steps: [
299
- { description: "Stop service", action: "systemctl stop web-app", target: "prd-web-01", reversible: true, rollbackAction: "systemctl start web-app", execPreview: "systemctl stop web-app" },
300
- { description: "Backup current binaries", action: "cp -r /opt/web-app/ /opt/web-app.bak/", target: "prd-web-01", reversible: false, execPreview: "cp -r /opt/web-app/ /opt/web-app.bak/" },
301
- { description: "Deploy new artifact", action: "tar -xzf web-app-2.3.0.tar.gz -C /opt/web-app/", target: "prd-web-01", reversible: true, rollbackAction: "cp -r /opt/web-app.bak/ /opt/web-app/", execPreview: "tar -xzf /opt/releases/web-app-2.3.0.tar.gz -C /opt/web-app/" },
302
- { description: "Apply environment config (1 variable changed: API_ENDPOINT)", action: "envsubst < config.template > /opt/web-app/.env", target: "prd-web-01", reversible: true, rollbackAction: "cp /opt/web-app.bak/.env /opt/web-app/.env", execPreview: "envsubst < /opt/web-app/config.template > /opt/web-app/.env" },
303
- { description: "Start service and verify health endpoint → 200 OK", action: "systemctl start web-app && curl -f http://localhost:8080/health", target: "prd-web-01", reversible: true, rollbackAction: "systemctl stop web-app", execPreview: "systemctl start web-app" },
304
- ],
299
+ scriptedPlan: {
300
+ platform: "bash",
301
+ executionScript: "#!/usr/bin/env bash\nset -euo pipefail\nsystemctl stop web-app\ncp -r /opt/web-app/ /opt/web-app.bak/\ntar -xzf /opt/releases/web-app-2.3.0.tar.gz -C /opt/web-app/\nenvsubst < /opt/web-app/config.template > /opt/web-app/.env\nsystemctl start web-app\ncurl -f --retry 3 --retry-delay 5 http://localhost:8080/health",
302
+ dryRunScript: null,
303
+ rollbackScript: "#!/usr/bin/env bash\nset -euo pipefail\nsystemctl stop web-app\ncp -r /opt/web-app.bak/ /opt/web-app/\ncp /opt/web-app.bak/.env /opt/web-app/.env\nsystemctl start web-app",
304
+ reasoning: "Standard 5-step deploy: stop, backup, extract, config, start. One config change: API_ENDPOINT updated to v2 endpoint validated in staging for 4h.",
305
+ stepSummary: [
306
+ { description: "Stop service", reversible: true },
307
+ { description: "Backup current binaries", reversible: false },
308
+ { description: "Deploy new artifact", reversible: true },
309
+ { description: "Apply environment config (1 variable changed: API_ENDPOINT)", reversible: true },
310
+ { description: "Start service and verify health endpoint → 200 OK", reversible: true },
311
+ ],
312
+ diffFromCurrent: [
313
+ { key: "API_ENDPOINT", from: "https://api.acme.corp/v1", to: "https://api.acme.corp/v2" },
314
+ ],
315
+ },
305
316
  reasoning: "Standard 5-step deploy: stop, backup, extract, config, start. One config change: API_ENDPOINT updated to v2 endpoint validated in staging for 4h.",
306
317
  diffFromCurrent: [
307
318
  { key: "API_ENDPOINT", from: "https://api.acme.corp/v1", to: "https://api.acme.corp/v2" },
@@ -311,21 +322,21 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
311
322
  createdAt: hoursAgo(72), completedAt: hoursAgo(71.5), failureReason: undefined,
312
323
  };
313
324
  const dep2: Deployment = {
314
- id: crypto.randomUUID() as Deployment["id"], artifactId: webAppArtifact.id as Deployment["artifactId"], partitionId: acmePartition.id as Deployment["partitionId"],
325
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: webAppArtifact.id }, partitionId: acmePartition.id as Deployment["partitionId"],
315
326
  environmentId: prodEnv.id as Deployment["environmentId"], version: "2.4.0", status: "succeeded",
316
327
  variables: { ...acmePartition.variables, ...prodEnv.variables },
317
328
  debriefEntryIds: [],
318
329
  createdAt: hoursAgo(48), completedAt: hoursAgo(47.8), failureReason: undefined,
319
330
  };
320
331
  const dep3: Deployment = {
321
- id: crypto.randomUUID() as Deployment["id"], artifactId: webAppArtifact.id as Deployment["artifactId"], partitionId: acmePartition.id as Deployment["partitionId"],
332
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: webAppArtifact.id }, partitionId: acmePartition.id as Deployment["partitionId"],
322
333
  environmentId: prodEnv.id as Deployment["environmentId"], version: "2.4.1", status: "succeeded",
323
334
  variables: { ...acmePartition.variables, ...prodEnv.variables },
324
335
  debriefEntryIds: [],
325
336
  createdAt: hoursAgo(24), completedAt: hoursAgo(23.7), failureReason: undefined,
326
337
  };
327
338
  const dep4: Deployment = {
328
- id: crypto.randomUUID() as Deployment["id"], artifactId: apiArtifact.id as Deployment["artifactId"], partitionId: acmePartition.id as Deployment["partitionId"],
339
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: apiArtifact.id }, partitionId: acmePartition.id as Deployment["partitionId"],
329
340
  environmentId: prodEnv.id as Deployment["environmentId"], version: "1.11.0", status: "failed",
330
341
  variables: { ...acmePartition.variables, ...prodEnv.variables },
331
342
  debriefEntryIds: [],
@@ -333,21 +344,21 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
333
344
  failureReason: "Health check failed after 3 retries: connection refused on port 8080",
334
345
  };
335
346
  const dep5: Deployment = {
336
- id: crypto.randomUUID() as Deployment["id"], artifactId: apiArtifact.id as Deployment["artifactId"], partitionId: acmePartition.id as Deployment["partitionId"],
347
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: apiArtifact.id }, partitionId: acmePartition.id as Deployment["partitionId"],
337
348
  environmentId: prodEnv.id as Deployment["environmentId"], version: "1.12.0", status: "succeeded",
338
349
  variables: { ...acmePartition.variables, ...prodEnv.variables },
339
350
  debriefEntryIds: [],
340
351
  createdAt: hoursAgo(12), completedAt: hoursAgo(11.8), failureReason: undefined,
341
352
  };
342
353
  const dep6: Deployment = {
343
- id: crypto.randomUUID() as Deployment["id"], artifactId: webAppArtifact.id as Deployment["artifactId"], partitionId: globexPartition.id as Deployment["partitionId"],
354
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: webAppArtifact.id }, partitionId: globexPartition.id as Deployment["partitionId"],
344
355
  environmentId: stagingEnv.id as Deployment["environmentId"], version: "2.5.0-rc.1", status: "succeeded",
345
356
  variables: { ...globexPartition.variables, ...stagingEnv.variables },
346
357
  debriefEntryIds: [],
347
358
  createdAt: hoursAgo(6), completedAt: hoursAgo(5.8), failureReason: undefined,
348
359
  };
349
360
  const dep7: Deployment = {
350
- id: crypto.randomUUID() as Deployment["id"], artifactId: workerArtifact.id as Deployment["artifactId"], partitionId: initechPartition.id as Deployment["partitionId"],
361
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: workerArtifact.id }, partitionId: initechPartition.id as Deployment["partitionId"],
351
362
  environmentId: prodEnv.id as Deployment["environmentId"], version: "2.9.0", status: "failed",
352
363
  variables: { ...initechPartition.variables, ...prodEnv.variables },
353
364
  debriefEntryIds: [],
@@ -355,48 +366,62 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
355
366
  failureReason: "Queue depth exceeded threshold (342 > 100) during verification",
356
367
  };
357
368
  const dep8: Deployment = {
358
- id: crypto.randomUUID() as Deployment["id"], artifactId: workerArtifact.id as Deployment["artifactId"], partitionId: initechPartition.id as Deployment["partitionId"],
369
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: workerArtifact.id }, partitionId: initechPartition.id as Deployment["partitionId"],
359
370
  environmentId: prodEnv.id as Deployment["environmentId"], version: "3.0.0", status: "succeeded",
360
371
  variables: { ...initechPartition.variables, ...prodEnv.variables },
361
372
  debriefEntryIds: [],
362
373
  createdAt: hoursAgo(3), completedAt: hoursAgo(2.7), failureReason: undefined,
363
374
  };
364
375
  const dep9: Deployment = {
365
- id: crypto.randomUUID() as Deployment["id"], artifactId: apiArtifact.id as Deployment["artifactId"], partitionId: globexPartition.id as Deployment["partitionId"],
376
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: apiArtifact.id }, partitionId: globexPartition.id as Deployment["partitionId"],
366
377
  environmentId: stagingEnv.id as Deployment["environmentId"], version: "1.13.0-beta.2", status: "running",
367
378
  variables: { ...globexPartition.variables, ...stagingEnv.variables },
368
379
  plan: {
369
- steps: [
370
- { description: "Pull latest image from registry", action: "docker pull", target: "registry.internal/api:1.13.0-beta.2", reversible: true, rollbackAction: "docker pull registry.internal/api:1.12.0", execPreview: "docker pull registry.internal/api:1.13.0-beta.2" },
371
- { description: "Stop running container", action: "docker stop", target: "api-staging", reversible: true, rollbackAction: "docker start api-staging", execPreview: "docker stop api-staging" },
372
- { description: "Start new container with updated image", action: "docker run", target: "registry.internal/api:1.13.0-beta.2", reversible: true, rollbackAction: "docker stop api-staging && docker run ... api:1.12.0", execPreview: "docker run -d --name api-staging --env-file /opt/api/.env -p 8080:8080 registry.internal/api:1.13.0-beta.2" },
373
- { description: "Verify health endpoint returns 200", action: "verify health", target: "http://localhost:8080/health", reversible: false, execPreview: "curl -f --retry 3 --retry-delay 5 http://localhost:8080/health" },
374
- ],
380
+ scriptedPlan: {
381
+ platform: "bash",
382
+ executionScript: "#!/usr/bin/env bash\nset -euo pipefail\ndocker pull registry.internal/api:1.13.0-beta.2\ndocker stop api-staging\ndocker run -d --name api-staging --env-file /opt/api/.env -p 8080:8080 registry.internal/api:1.13.0-beta.2\ncurl -f --retry 3 --retry-delay 5 http://localhost:8080/health",
383
+ dryRunScript: null,
384
+ rollbackScript: "#!/usr/bin/env bash\nset -euo pipefail\ndocker stop api-staging\ndocker pull registry.internal/api:1.12.0\ndocker start api-staging",
385
+ reasoning: "Container swap: pull new image, stop old container, start new one, verify health. Staging environment — rollback is fast via image tag swap.",
386
+ stepSummary: [
387
+ { description: "Pull latest image from registry", reversible: true },
388
+ { description: "Stop running container", reversible: true },
389
+ { description: "Start new container with updated image", reversible: true },
390
+ { description: "Verify health endpoint returns 200", reversible: false },
391
+ ],
392
+ },
375
393
  reasoning: "Container swap: pull new image, stop old container, start new one, verify health. Staging environment — rollback is fast via image tag swap.",
376
394
  },
377
395
  debriefEntryIds: [],
378
396
  createdAt: hoursAgo(0.5),
379
397
  };
380
398
  const dep11: Deployment = {
381
- id: crypto.randomUUID() as Deployment["id"], artifactId: workerArtifact.id as Deployment["artifactId"], partitionId: globexPartition.id as Deployment["partitionId"],
399
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: workerArtifact.id }, partitionId: globexPartition.id as Deployment["partitionId"],
382
400
  environmentId: prodEnv.id as Deployment["environmentId"], version: "3.1.0", status: "awaiting_approval",
383
401
  variables: { ...globexPartition.variables, ...prodEnv.variables },
384
402
  plan: {
385
- steps: [
386
- { description: "Drain queue — wait for in-flight jobs to complete", action: "run command", target: "worker-drain", reversible: false, execPreview: "npm run worker:drain --timeout=120" },
387
- { description: "Stop worker processes on all nodes", action: "systemctl stop", target: "synth-worker", reversible: true, rollbackAction: "systemctl start synth-worker", execPreview: "systemctl stop synth-worker" },
388
- { description: "Deploy new worker binary", action: "copy file", target: "/opt/worker/", reversible: true, rollbackAction: "restore /opt/worker/ from backup", execPreview: "cp -r /opt/releases/worker-3.1.0/* /opt/worker/" },
389
- { description: "Update queue concurrency config (WORKER_CONCURRENCY: 4 8)", action: "write config", target: "/opt/worker/.env", reversible: true, rollbackAction: "restore previous .env", execPreview: "envsubst < /opt/worker/config.template > /opt/worker/.env" },
390
- { description: "Start worker and verify queue depth drops", action: "systemctl start", target: "synth-worker", reversible: true, rollbackAction: "systemctl stop synth-worker", execPreview: "systemctl start synth-worker" },
391
- { description: "Verify queue processing resumes within 30s", action: "verify health", target: "http://localhost:9090/metrics", reversible: false, execPreview: "curl -f --retry 6 --retry-delay 5 http://localhost:9090/metrics" },
392
- ],
403
+ scriptedPlan: {
404
+ platform: "bash",
405
+ executionScript: "#!/usr/bin/env bash\nset -euo pipefail\nnpm run worker:drain --timeout=120\nsystemctl stop synth-worker\ncp -r /opt/releases/worker-3.1.0/* /opt/worker/\nenvsubst < /opt/worker/config.template > /opt/worker/.env\nsystemctl start synth-worker\ncurl -f --retry 6 --retry-delay 5 http://localhost:9090/metrics",
406
+ dryRunScript: null,
407
+ rollbackScript: "#!/usr/bin/env bash\nset -euo pipefail\nsystemctl stop synth-worker\ncp -r /opt/worker.bak/* /opt/worker/\ncp /opt/worker.bak/.env /opt/worker/.env\nsystemctl start synth-worker",
408
+ reasoning: "Worker upgrade with concurrency increase. Drain first to avoid job loss, then replace binary and config atomically. Queue depth check confirms processing resumed.",
409
+ stepSummary: [
410
+ { description: "Drain queue — wait for in-flight jobs to complete", reversible: false },
411
+ { description: "Stop worker processes on all nodes", reversible: true },
412
+ { description: "Deploy new worker binary", reversible: true },
413
+ { description: "Update queue concurrency config (WORKER_CONCURRENCY: 4 → 8)", reversible: true },
414
+ { description: "Start worker and verify queue depth drops", reversible: true },
415
+ { description: "Verify queue processing resumes within 30s", reversible: false },
416
+ ],
417
+ },
393
418
  reasoning: "Worker upgrade with concurrency increase. Drain first to avoid job loss, then replace binary and config atomically. Queue depth check confirms processing resumed.",
394
419
  },
395
420
  debriefEntryIds: [],
396
421
  createdAt: hoursAgo(0.1),
397
422
  };
398
423
  const dep10: Deployment = {
399
- id: crypto.randomUUID() as Deployment["id"], artifactId: webAppArtifact.id as Deployment["artifactId"], partitionId: initechPartition.id as Deployment["partitionId"],
424
+ id: crypto.randomUUID() as Deployment["id"], input: { type: "deploy" as const, artifactId: webAppArtifact.id }, partitionId: initechPartition.id as Deployment["partitionId"],
400
425
  environmentId: prodEnv.id as Deployment["environmentId"], version: "2.4.1", status: "rolled_back",
401
426
  variables: { ...initechPartition.variables, ...prodEnv.variables },
402
427
  debriefEntryIds: [],
@@ -428,7 +453,7 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
428
453
  // --- Debrief entries (rich decision diary) ---
429
454
 
430
455
  debrief.record({
431
- partitionId: null, deploymentId: null, agent: "server", decisionType: "system",
456
+ partitionId: null, operationId: null, agent: "server", decisionType: "system",
432
457
  decision: "Command initialized with demo data",
433
458
  reasoning: "Seeded 3 partitions, 3 environments, 3 artifacts, 10 deployments, and 2 envoy security boundary sets.",
434
459
  context: { partitions: 3, environments: 3, deployments: 10, artifacts: 3, securityBoundaries: 2 },
@@ -436,31 +461,31 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
436
461
 
437
462
  // dep1 — web-app 2.3.0 succeeded
438
463
  debrief.record({
439
- partitionId: acmePartition.id, deploymentId: dep1.id, agent: "server", decisionType: "pipeline-plan",
464
+ partitionId: acmePartition.id, operationId: dep1.id, agent: "server", decisionType: "pipeline-plan",
440
465
  decision: "Planned deployment pipeline for web-app v2.3.0 to Acme Corp production",
441
466
  reasoning: "Standard 3-step pipeline: install deps, run migrations, health check. No variable conflicts.",
442
467
  context: { version: "2.3.0", steps: 3 },
443
468
  });
444
469
  debrief.record({
445
- partitionId: acmePartition.id, deploymentId: dep1.id, agent: "server", decisionType: "configuration-resolved",
470
+ partitionId: acmePartition.id, operationId: dep1.id, agent: "server", decisionType: "configuration-resolved",
446
471
  decision: "Resolved 4 variables for Acme Corp production (partition + environment merged)",
447
472
  reasoning: "Merged partition variables (APP_ENV, DB_HOST, REGION) with environment variables (APP_ENV, LOG_LEVEL). APP_ENV conflict resolved: environment value takes precedence.",
448
473
  context: { resolvedCount: 4, conflicts: 1, policy: "environment-wins" },
449
474
  });
450
475
  debrief.record({
451
- partitionId: acmePartition.id, deploymentId: dep1.id, agent: "envoy", decisionType: "deployment-execution",
476
+ partitionId: acmePartition.id, operationId: dep1.id, agent: "envoy", decisionType: "deployment-execution",
452
477
  decision: "Executed deployment web-app v2.3.0 on Acme Corp production",
453
478
  reasoning: "All 3 steps completed. Total execution time: 28.4s.",
454
479
  context: { duration: 28400 },
455
480
  });
456
481
  debrief.record({
457
- partitionId: acmePartition.id, deploymentId: dep1.id, agent: "envoy", decisionType: "health-check",
482
+ partitionId: acmePartition.id, operationId: dep1.id, agent: "envoy", decisionType: "health-check",
458
483
  decision: "Health check passed on first attempt",
459
484
  reasoning: "GET /health returned 200 with body {\"status\":\"ok\"} in 45ms.",
460
485
  context: { attempts: 1, responseTime: 45 },
461
486
  });
462
487
  debrief.record({
463
- partitionId: acmePartition.id, deploymentId: dep1.id, agent: "server", decisionType: "deployment-completion",
488
+ partitionId: acmePartition.id, operationId: dep1.id, agent: "server", decisionType: "deployment-completion",
464
489
  decision: "Deployment web-app v2.3.0 completed successfully",
465
490
  reasoning: "All pipeline steps passed. Health check confirmed. Marked as succeeded.",
466
491
  context: { status: "succeeded" },
@@ -468,31 +493,31 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
468
493
 
469
494
  // dep4 — api-service 1.11.0 failed
470
495
  debrief.record({
471
- partitionId: acmePartition.id, deploymentId: dep4.id, agent: "server", decisionType: "pipeline-plan",
496
+ partitionId: acmePartition.id, operationId: dep4.id, agent: "server", decisionType: "pipeline-plan",
472
497
  decision: "Planned deployment pipeline for api-service v1.11.0 to Acme Corp production",
473
498
  reasoning: "2-step pipeline: pull image, verify endpoint.",
474
499
  context: { version: "1.11.0", steps: 2 },
475
500
  });
476
501
  debrief.record({
477
- partitionId: acmePartition.id, deploymentId: dep4.id, agent: "envoy", decisionType: "deployment-execution",
502
+ partitionId: acmePartition.id, operationId: dep4.id, agent: "envoy", decisionType: "deployment-execution",
478
503
  decision: "Image pull succeeded, starting verification",
479
504
  reasoning: "docker pull completed in 12.3s. Image sha256:a4f8e... verified.",
480
505
  context: { step: "Pull image", duration: 12300 },
481
506
  });
482
507
  debrief.record({
483
- partitionId: acmePartition.id, deploymentId: dep4.id, agent: "envoy", decisionType: "health-check",
508
+ partitionId: acmePartition.id, operationId: dep4.id, agent: "envoy", decisionType: "health-check",
484
509
  decision: "Health check failed after 3 retries",
485
510
  reasoning: "Connection refused on port 8080. Retry 1: refused (5s). Retry 2: refused (10s). Retry 3: refused (15s). Container logs: \"Error: EADDRINUSE :::8080\".",
486
511
  context: { attempts: 3, lastError: "ECONNREFUSED", containerLog: "EADDRINUSE" },
487
512
  });
488
513
  debrief.record({
489
- partitionId: acmePartition.id, deploymentId: dep4.id, agent: "envoy", decisionType: "diagnostic-investigation",
514
+ partitionId: acmePartition.id, operationId: dep4.id, agent: "envoy", decisionType: "diagnostic-investigation",
490
515
  decision: "Root cause: port 8080 bound by stale process from previous deployment",
491
516
  reasoning: "Found zombie process from api-service v1.10.0 holding port 8080. Previous deployment did not cleanly shut down.",
492
517
  context: { rootCause: "port-conflict", stalePid: 14823 },
493
518
  });
494
519
  debrief.record({
495
- partitionId: acmePartition.id, deploymentId: dep4.id, agent: "server", decisionType: "deployment-failure",
520
+ partitionId: acmePartition.id, operationId: dep4.id, agent: "server", decisionType: "deployment-failure",
496
521
  decision: "Deployment api-service v1.11.0 failed — health check could not connect",
497
522
  reasoning: "Envoy diagnostic identified port conflict from stale process. Recommend adding a pre-deploy cleanup step.",
498
523
  context: { status: "failed", recommendation: "Add cleanup step" },
@@ -500,25 +525,25 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
500
525
 
501
526
  // dep7 — worker-service 2.9.0 failed
502
527
  debrief.record({
503
- partitionId: initechPartition.id, deploymentId: dep7.id, agent: "server", decisionType: "pipeline-plan",
528
+ partitionId: initechPartition.id, operationId: dep7.id, agent: "server", decisionType: "pipeline-plan",
504
529
  decision: "Planned deployment pipeline for worker-service v2.9.0 to Initech production",
505
530
  reasoning: "4-step pipeline with full verification strategy.",
506
531
  context: { version: "2.9.0", steps: 4, verificationStrategy: "full" },
507
532
  });
508
533
  debrief.record({
509
- partitionId: initechPartition.id, deploymentId: dep7.id, agent: "envoy", decisionType: "deployment-execution",
534
+ partitionId: initechPartition.id, operationId: dep7.id, agent: "envoy", decisionType: "deployment-execution",
510
535
  decision: "Workers stopped and binary deployed successfully",
511
536
  reasoning: "Pre-deploy steps completed. Workers stopped gracefully (0 in-flight jobs lost). Binary copied.",
512
537
  context: { stepsCompleted: 2, jobsLost: 0 },
513
538
  });
514
539
  debrief.record({
515
- partitionId: initechPartition.id, deploymentId: dep7.id, agent: "envoy", decisionType: "deployment-verification",
540
+ partitionId: initechPartition.id, operationId: dep7.id, agent: "envoy", decisionType: "deployment-verification",
516
541
  decision: "Verification failed: queue depth 342 exceeds threshold of 100",
517
542
  reasoning: "Workers restarted but queue depth grew rapidly. v2.9.0 introduced a regression in the message processing loop causing 10x slowdown.",
518
543
  context: { queueDepth: 342, threshold: 100, processingRate: "0.3/s vs expected 3/s" },
519
544
  });
520
545
  debrief.record({
521
- partitionId: initechPartition.id, deploymentId: dep7.id, agent: "server", decisionType: "deployment-failure",
546
+ partitionId: initechPartition.id, operationId: dep7.id, agent: "server", decisionType: "deployment-failure",
522
547
  decision: "Deployment worker-service v2.9.0 failed — queue depth exceeded threshold",
523
548
  reasoning: "Queue depth check returned 342 (max 100). Processing regression in v2.9.0.",
524
549
  context: { status: "failed" },
@@ -526,25 +551,25 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
526
551
 
527
552
  // dep10 — web-app 2.4.1 rolled back
528
553
  debrief.record({
529
- partitionId: initechPartition.id, deploymentId: dep10.id, agent: "server", decisionType: "pipeline-plan",
554
+ partitionId: initechPartition.id, operationId: dep10.id, agent: "server", decisionType: "pipeline-plan",
530
555
  decision: "Planned deployment pipeline for web-app v2.4.1 to Initech production",
531
556
  reasoning: "Standard 3-step pipeline.",
532
557
  context: { version: "2.4.1", steps: 3 },
533
558
  });
534
559
  debrief.record({
535
- partitionId: initechPartition.id, deploymentId: dep10.id, agent: "envoy", decisionType: "deployment-execution",
560
+ partitionId: initechPartition.id, operationId: dep10.id, agent: "envoy", decisionType: "deployment-execution",
536
561
  decision: "All deployment steps completed, starting post-deploy verification",
537
562
  reasoning: "Dependencies installed (14.2s), migrations ran (3.1s), health check passed (0.2s).",
538
563
  context: { totalDuration: 17500 },
539
564
  });
540
565
  debrief.record({
541
- partitionId: initechPartition.id, deploymentId: dep10.id, agent: "envoy", decisionType: "deployment-verification",
566
+ partitionId: initechPartition.id, operationId: dep10.id, agent: "envoy", decisionType: "deployment-verification",
542
567
  decision: "Post-deploy smoke test detected 502 errors on /api/v2/users",
543
568
  reasoning: "12 endpoint checks: 10 passed, 2 returned 502 (GET and POST /api/v2/users). The v2 users endpoint depends on a schema migration that was partially applied.",
544
569
  context: { passed: 10, failed: 2, failedEndpoints: ["/api/v2/users"] },
545
570
  });
546
571
  debrief.record({
547
- partitionId: initechPartition.id, deploymentId: dep10.id, agent: "server", decisionType: "deployment-failure",
572
+ partitionId: initechPartition.id, operationId: dep10.id, agent: "server", decisionType: "deployment-failure",
548
573
  decision: "Initiated rollback of web-app v2.4.1 on Initech production",
549
574
  reasoning: "502 errors on critical user endpoints. Rolling back to previous known-good version.",
550
575
  context: { status: "rolled_back", rolledBackFrom: "2.4.1" },
@@ -552,19 +577,19 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
552
577
 
553
578
  // dep6 — web-app 2.5.0-rc.1 with variable conflict
554
579
  debrief.record({
555
- partitionId: globexPartition.id, deploymentId: dep6.id, agent: "server", decisionType: "pipeline-plan",
580
+ partitionId: globexPartition.id, operationId: dep6.id, agent: "server", decisionType: "pipeline-plan",
556
581
  decision: "Planned deployment for web-app v2.5.0-rc.1 to Globex staging",
557
582
  reasoning: "Standard 3-step pipeline. Release candidate — permissive conflict policy.",
558
583
  context: { version: "2.5.0-rc.1", steps: 3 },
559
584
  });
560
585
  debrief.record({
561
- partitionId: globexPartition.id, deploymentId: dep6.id, agent: "server", decisionType: "variable-conflict",
586
+ partitionId: globexPartition.id, operationId: dep6.id, agent: "server", decisionType: "variable-conflict",
562
587
  decision: "Variable conflict: APP_ENV defined in both partition and environment",
563
588
  reasoning: "Partition sets APP_ENV=production, environment sets APP_ENV=staging. Permissive policy — using environment value.",
564
589
  context: { variable: "APP_ENV", partitionValue: "production", environmentValue: "staging", resolution: "environment-wins" },
565
590
  });
566
591
  debrief.record({
567
- partitionId: globexPartition.id, deploymentId: dep6.id, agent: "server", decisionType: "deployment-completion",
592
+ partitionId: globexPartition.id, operationId: dep6.id, agent: "server", decisionType: "deployment-completion",
568
593
  decision: "Deployment web-app v2.5.0-rc.1 completed on Globex staging",
569
594
  reasoning: "All steps passed despite variable conflict. RC verified in staging.",
570
595
  context: { status: "succeeded" },
@@ -572,13 +597,13 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
572
597
 
573
598
  // dep9 — in-progress
574
599
  debrief.record({
575
- partitionId: globexPartition.id, deploymentId: dep9.id, agent: "server", decisionType: "pipeline-plan",
600
+ partitionId: globexPartition.id, operationId: dep9.id, agent: "server", decisionType: "pipeline-plan",
576
601
  decision: "Planned deployment for api-service v1.13.0-beta.2 to Globex staging",
577
602
  reasoning: "2-step pipeline for staging. Beta version — monitoring closely.",
578
603
  context: { version: "1.13.0-beta.2", steps: 2 },
579
604
  });
580
605
  debrief.record({
581
- partitionId: globexPartition.id, deploymentId: dep9.id, agent: "envoy", decisionType: "deployment-execution",
606
+ partitionId: globexPartition.id, operationId: dep9.id, agent: "envoy", decisionType: "deployment-execution",
582
607
  decision: "Image pull in progress for api-service v1.13.0-beta.2",
583
608
  reasoning: "Pulling docker image from registry. Download progress: 67%.",
584
609
  context: { step: "Pull image", progress: "67%" },
@@ -586,13 +611,13 @@ if (process.env.SYNTH_SEED_DEMO !== 'false' && partitions.list().length === 0) {
586
611
 
587
612
  // Environment scans
588
613
  debrief.record({
589
- partitionId: acmePartition.id, deploymentId: null, agent: "envoy", decisionType: "environment-scan",
614
+ partitionId: acmePartition.id, operationId: null, agent: "envoy", decisionType: "environment-scan",
590
615
  decision: "Environment scan completed for Acme Corp production",
591
616
  reasoning: "Current versions: web-app v2.4.1, api-service v1.12.0. Disk: 62%. Memory: 71%. No drift detected.",
592
617
  context: { versions: { "web-app": "2.4.1", "api-service": "1.12.0" }, diskUsage: "62%", memoryUsage: "71%" },
593
618
  });
594
619
  debrief.record({
595
- partitionId: initechPartition.id, deploymentId: null, agent: "envoy", decisionType: "environment-scan",
620
+ partitionId: initechPartition.id, operationId: null, agent: "envoy", decisionType: "environment-scan",
596
621
  decision: "Environment scan for Initech production — drift detected",
597
622
  reasoning: "worker-service v3.0.0 running. web-app at v2.4.0 (v2.4.1 was rolled back). Drift: LOG_LEVEL manually changed from 'warn' to 'debug' outside deployment pipeline.",
598
623
  context: { drift: true, driftDetails: "LOG_LEVEL changed outside pipeline" },
@@ -706,7 +731,7 @@ registerHealthRoutes(app, {
706
731
  });
707
732
  const progressStore = new ProgressEventStore();
708
733
  const defaultEnvoyClient = new EnvoyClient(settings.get().envoy.url, settings.get().envoy.timeoutMs);
709
- registerDeploymentRoutes(app, deployments, debrief, partitions, environments, artifactStore, settings, telemetryStore, progressStore, defaultEnvoyClient, envoyRegistry, llm);
734
+ registerOperationRoutes(app, deployments, debrief, partitions, environments, artifactStore, settings, telemetryStore, progressStore, defaultEnvoyClient, envoyRegistry, llm);
710
735
  registerEnvoyReportRoutes(app, debrief, deployments, envoyRegistry);
711
736
  registerArtifactRoutes(app, artifactStore, telemetryStore, artifactAnalyzer);
712
737
  registerSecurityBoundaryRoutes(app, securityBoundaryStore, telemetryStore);
@@ -757,6 +782,11 @@ for (const ch of intakeChannelStore.list()) {
757
782
  }
758
783
  }
759
784
 
785
+ // --- Alert Webhooks (external monitoring triggers) ---
786
+
787
+ const alertWebhookStore = new PersistentAlertWebhookStore(entityDb);
788
+ registerAlertWebhookRoutes(app, alertWebhookStore, deployments, debrief, environments, partitions, telemetryStore, envoyRegistry);
789
+
760
790
  // --- Serve UI static files if built ---
761
791
 
762
792
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -50,9 +50,9 @@ export function registerResources(
50
50
  async (uri, { deploymentId }) => {
51
51
  const deployment = deployments.get(deploymentId as string);
52
52
  if (!deployment) {
53
- return { contents: [{ uri: uri.href, text: JSON.stringify({ error: "Deployment not found" }) }] };
53
+ return { contents: [{ uri: uri.href, text: JSON.stringify({ error: "Operation not found" }) }] };
54
54
  }
55
- const entries = debrief.getByDeployment(deploymentId as string);
55
+ const entries = debrief.getByOperation(deploymentId as string);
56
56
  return {
57
57
  contents: [
58
58
  {
@@ -71,7 +71,7 @@ export function registerResources(
71
71
  list: async () => ({
72
72
  resources: deployments.list().map((d) => ({
73
73
  uri: `deployment://${d.id}`,
74
- name: `${d.artifactId} v${d.version} → ${d.environmentId}`,
74
+ name: `${d.input.type === 'deploy' ? d.input.artifactId : d.intent ?? d.input.type} v${d.version ?? ''} → ${d.environmentId}`,
75
75
  })),
76
76
  }),
77
77
  }),
package/src/mcp/tools.ts CHANGED
@@ -57,14 +57,10 @@ export function registerTools(
57
57
  }
58
58
  }
59
59
 
60
- const deployment = await agent.triggerDeployment({
61
- artifactId,
62
- artifactVersionId: version,
63
- environmentId,
64
- partitionId,
65
- triggeredBy: "agent",
66
- variables,
67
- });
60
+ const deployment = await agent.triggerOperation(
61
+ { type: "deploy", artifactId, ...(version ? { artifactVersionId: version } : {}) },
62
+ { environmentId, partitionId, triggeredBy: "agent", variables },
63
+ );
68
64
 
69
65
  return {
70
66
  content: [
@@ -99,7 +95,7 @@ export function registerTools(
99
95
  const deployment = deployments.get(deploymentId);
100
96
  if (!deployment) {
101
97
  return {
102
- content: [{ type: "text", text: `Error: Deployment not found: ${deploymentId}` }],
98
+ content: [{ type: "text", text: `Error: Operation not found: ${deploymentId}` }],
103
99
  isError: true,
104
100
  };
105
101
  }
@@ -16,7 +16,7 @@ declare module "fastify" {
16
16
  }
17
17
 
18
18
  const EXEMPT_ROUTES = ["/health", "/api/health", "/api/auth/login", "/api/auth/register", "/api/auth/refresh", "/api/auth/status", "/api/auth/providers", "/api/envoy/report"];
19
- const EXEMPT_PREFIXES = ["/api/auth/oidc/", "/api/auth/callback/oidc/", "/api/auth/saml/", "/api/auth/callback/saml/", "/api/auth/ldap/", "/api/intake/webhook/"];
19
+ const EXEMPT_PREFIXES = ["/api/auth/oidc/", "/api/auth/callback/oidc/", "/api/auth/saml/", "/api/auth/callback/saml/", "/api/auth/ldap/", "/api/intake/webhook/", "/api/alert-webhooks/receive/"];
20
20
  // Envoy callback endpoints — validated by envoy token, not user JWT
21
21
  const EXEMPT_PATTERNS = [/^\/api\/deployments\/[^/]+\/progress$/];
22
22