@synth-deploy/server 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/debrief-retention.d.ts +12 -0
- package/dist/agent/debrief-retention.d.ts.map +1 -0
- package/dist/agent/debrief-retention.js +27 -0
- package/dist/agent/debrief-retention.js.map +1 -0
- package/dist/agent/envoy-client.d.ts +216 -0
- package/dist/agent/envoy-client.d.ts.map +1 -0
- package/dist/agent/envoy-client.js +266 -0
- package/dist/agent/envoy-client.js.map +1 -0
- package/dist/agent/envoy-registry.d.ts +102 -0
- package/dist/agent/envoy-registry.d.ts.map +1 -0
- package/dist/agent/envoy-registry.js +319 -0
- package/dist/agent/envoy-registry.js.map +1 -0
- package/dist/agent/health-checker.d.ts +39 -0
- package/dist/agent/health-checker.d.ts.map +1 -0
- package/dist/agent/health-checker.js +49 -0
- package/dist/agent/health-checker.js.map +1 -0
- package/dist/agent/mcp-client-manager.d.ts +36 -0
- package/dist/agent/mcp-client-manager.d.ts.map +1 -0
- package/dist/agent/mcp-client-manager.js +106 -0
- package/dist/agent/mcp-client-manager.js.map +1 -0
- package/dist/agent/stale-deployment-detector.d.ts +15 -0
- package/dist/agent/stale-deployment-detector.d.ts.map +1 -0
- package/dist/agent/stale-deployment-detector.js +50 -0
- package/dist/agent/stale-deployment-detector.js.map +1 -0
- package/dist/agent/step-runner.d.ts +31 -0
- package/dist/agent/step-runner.d.ts.map +1 -0
- package/dist/agent/step-runner.js +80 -0
- package/dist/agent/step-runner.js.map +1 -0
- package/dist/agent/synth-agent.d.ts +168 -0
- package/dist/agent/synth-agent.d.ts.map +1 -0
- package/dist/agent/synth-agent.js +1195 -0
- package/dist/agent/synth-agent.js.map +1 -0
- package/dist/api/agent.d.ts +36 -0
- package/dist/api/agent.d.ts.map +1 -0
- package/dist/api/agent.js +867 -0
- package/dist/api/agent.js.map +1 -0
- package/dist/api/api-keys.d.ts +4 -0
- package/dist/api/api-keys.d.ts.map +1 -0
- package/dist/api/api-keys.js +118 -0
- package/dist/api/api-keys.js.map +1 -0
- package/dist/api/artifacts.d.ts +5 -0
- package/dist/api/artifacts.d.ts.map +1 -0
- package/dist/api/artifacts.js +142 -0
- package/dist/api/artifacts.js.map +1 -0
- package/dist/api/auth.d.ts +4 -0
- package/dist/api/auth.d.ts.map +1 -0
- package/dist/api/auth.js +280 -0
- package/dist/api/auth.js.map +1 -0
- package/dist/api/deployments.d.ts +11 -0
- package/dist/api/deployments.d.ts.map +1 -0
- package/dist/api/deployments.js +1098 -0
- package/dist/api/deployments.js.map +1 -0
- package/dist/api/environments.d.ts +5 -0
- package/dist/api/environments.d.ts.map +1 -0
- package/dist/api/environments.js +69 -0
- package/dist/api/environments.js.map +1 -0
- package/dist/api/envoy-reports.d.ts +17 -0
- package/dist/api/envoy-reports.d.ts.map +1 -0
- package/dist/api/envoy-reports.js +138 -0
- package/dist/api/envoy-reports.js.map +1 -0
- package/dist/api/envoys.d.ts +5 -0
- package/dist/api/envoys.d.ts.map +1 -0
- package/dist/api/envoys.js +192 -0
- package/dist/api/envoys.js.map +1 -0
- package/dist/api/fleet.d.ts +11 -0
- package/dist/api/fleet.d.ts.map +1 -0
- package/dist/api/fleet.js +394 -0
- package/dist/api/fleet.js.map +1 -0
- package/dist/api/graph.d.ts +8 -0
- package/dist/api/graph.d.ts.map +1 -0
- package/dist/api/graph.js +355 -0
- package/dist/api/graph.js.map +1 -0
- package/dist/api/health.d.ts +20 -0
- package/dist/api/health.d.ts.map +1 -0
- package/dist/api/health.js +248 -0
- package/dist/api/health.js.map +1 -0
- package/dist/api/idp-schemas.d.ts +41 -0
- package/dist/api/idp-schemas.d.ts.map +1 -0
- package/dist/api/idp-schemas.js +17 -0
- package/dist/api/idp-schemas.js.map +1 -0
- package/dist/api/idp.d.ts +6 -0
- package/dist/api/idp.d.ts.map +1 -0
- package/dist/api/idp.js +620 -0
- package/dist/api/idp.js.map +1 -0
- package/dist/api/intake.d.ts +10 -0
- package/dist/api/intake.d.ts.map +1 -0
- package/dist/api/intake.js +418 -0
- package/dist/api/intake.js.map +1 -0
- package/dist/api/partitions.d.ts +5 -0
- package/dist/api/partitions.d.ts.map +1 -0
- package/dist/api/partitions.js +113 -0
- package/dist/api/partitions.js.map +1 -0
- package/dist/api/progress-event-store.d.ts +62 -0
- package/dist/api/progress-event-store.d.ts.map +1 -0
- package/dist/api/progress-event-store.js +118 -0
- package/dist/api/progress-event-store.js.map +1 -0
- package/dist/api/schemas.d.ts +1000 -0
- package/dist/api/schemas.d.ts.map +1 -0
- package/dist/api/schemas.js +328 -0
- package/dist/api/schemas.js.map +1 -0
- package/dist/api/security-boundaries.d.ts +4 -0
- package/dist/api/security-boundaries.d.ts.map +1 -0
- package/dist/api/security-boundaries.js +32 -0
- package/dist/api/security-boundaries.js.map +1 -0
- package/dist/api/settings.d.ts +4 -0
- package/dist/api/settings.d.ts.map +1 -0
- package/dist/api/settings.js +99 -0
- package/dist/api/settings.js.map +1 -0
- package/dist/api/system.d.ts +75 -0
- package/dist/api/system.d.ts.map +1 -0
- package/dist/api/system.js +558 -0
- package/dist/api/system.js.map +1 -0
- package/dist/api/telemetry.d.ts +4 -0
- package/dist/api/telemetry.d.ts.map +1 -0
- package/dist/api/telemetry.js +24 -0
- package/dist/api/telemetry.js.map +1 -0
- package/dist/api/users.d.ts +4 -0
- package/dist/api/users.d.ts.map +1 -0
- package/dist/api/users.js +173 -0
- package/dist/api/users.js.map +1 -0
- package/dist/archive-unpacker.d.ts +24 -0
- package/dist/archive-unpacker.d.ts.map +1 -0
- package/dist/archive-unpacker.js +239 -0
- package/dist/archive-unpacker.js.map +1 -0
- package/dist/artifact-analyzer.d.ts +59 -0
- package/dist/artifact-analyzer.d.ts.map +1 -0
- package/dist/artifact-analyzer.js +334 -0
- package/dist/artifact-analyzer.js.map +1 -0
- package/dist/auth/idp/index.d.ts +9 -0
- package/dist/auth/idp/index.d.ts.map +1 -0
- package/dist/auth/idp/index.js +5 -0
- package/dist/auth/idp/index.js.map +1 -0
- package/dist/auth/idp/ldap.d.ts +56 -0
- package/dist/auth/idp/ldap.d.ts.map +1 -0
- package/dist/auth/idp/ldap.js +276 -0
- package/dist/auth/idp/ldap.js.map +1 -0
- package/dist/auth/idp/oidc.d.ts +27 -0
- package/dist/auth/idp/oidc.d.ts.map +1 -0
- package/dist/auth/idp/oidc.js +97 -0
- package/dist/auth/idp/oidc.js.map +1 -0
- package/dist/auth/idp/role-mapping.d.ts +9 -0
- package/dist/auth/idp/role-mapping.d.ts.map +1 -0
- package/dist/auth/idp/role-mapping.js +16 -0
- package/dist/auth/idp/role-mapping.js.map +1 -0
- package/dist/auth/idp/saml.d.ts +40 -0
- package/dist/auth/idp/saml.d.ts.map +1 -0
- package/dist/auth/idp/saml.js +117 -0
- package/dist/auth/idp/saml.js.map +1 -0
- package/dist/auth/idp/types.d.ts +23 -0
- package/dist/auth/idp/types.d.ts.map +1 -0
- package/dist/auth/idp/types.js +2 -0
- package/dist/auth/idp/types.js.map +1 -0
- package/dist/fleet/fleet-executor.d.ts +35 -0
- package/dist/fleet/fleet-executor.d.ts.map +1 -0
- package/dist/fleet/fleet-executor.js +228 -0
- package/dist/fleet/fleet-executor.js.map +1 -0
- package/dist/fleet/fleet-store.d.ts +13 -0
- package/dist/fleet/fleet-store.d.ts.map +1 -0
- package/dist/fleet/fleet-store.js +13 -0
- package/dist/fleet/fleet-store.js.map +1 -0
- package/dist/fleet/index.d.ts +5 -0
- package/dist/fleet/index.d.ts.map +1 -0
- package/dist/fleet/index.js +4 -0
- package/dist/fleet/index.js.map +1 -0
- package/dist/fleet/representative-selector.d.ts +15 -0
- package/dist/fleet/representative-selector.d.ts.map +1 -0
- package/dist/fleet/representative-selector.js +71 -0
- package/dist/fleet/representative-selector.js.map +1 -0
- package/dist/graph/graph-executor.d.ts +36 -0
- package/dist/graph/graph-executor.d.ts.map +1 -0
- package/dist/graph/graph-executor.js +348 -0
- package/dist/graph/graph-executor.js.map +1 -0
- package/dist/graph/graph-inference.d.ts +22 -0
- package/dist/graph/graph-inference.d.ts.map +1 -0
- package/dist/graph/graph-inference.js +149 -0
- package/dist/graph/graph-inference.js.map +1 -0
- package/dist/graph/graph-store.d.ts +12 -0
- package/dist/graph/graph-store.d.ts.map +1 -0
- package/dist/graph/graph-store.js +61 -0
- package/dist/graph/graph-store.js.map +1 -0
- package/dist/graph/index.d.ts +5 -0
- package/dist/graph/index.d.ts.map +1 -0
- package/dist/graph/index.js +4 -0
- package/dist/graph/index.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +837 -0
- package/dist/index.js.map +1 -0
- package/dist/intake/index.d.ts +6 -0
- package/dist/intake/index.d.ts.map +1 -0
- package/dist/intake/index.js +5 -0
- package/dist/intake/index.js.map +1 -0
- package/dist/intake/intake-processor.d.ts +17 -0
- package/dist/intake/intake-processor.d.ts.map +1 -0
- package/dist/intake/intake-processor.js +99 -0
- package/dist/intake/intake-processor.js.map +1 -0
- package/dist/intake/intake-store.d.ts +7 -0
- package/dist/intake/intake-store.d.ts.map +1 -0
- package/dist/intake/intake-store.js +7 -0
- package/dist/intake/intake-store.js.map +1 -0
- package/dist/intake/registry-poller.d.ts +41 -0
- package/dist/intake/registry-poller.d.ts.map +1 -0
- package/dist/intake/registry-poller.js +202 -0
- package/dist/intake/registry-poller.js.map +1 -0
- package/dist/intake/webhook-handlers.d.ts +37 -0
- package/dist/intake/webhook-handlers.d.ts.map +1 -0
- package/dist/intake/webhook-handlers.js +268 -0
- package/dist/intake/webhook-handlers.js.map +1 -0
- package/dist/logger.d.ts +5 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +15 -0
- package/dist/logger.js.map +1 -0
- package/dist/mcp/resources.d.ts +9 -0
- package/dist/mcp/resources.d.ts.map +1 -0
- package/dist/mcp/resources.js +72 -0
- package/dist/mcp/resources.js.map +1 -0
- package/dist/mcp/server.d.ts +15 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +20 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools.d.ts +9 -0
- package/dist/mcp/tools.d.ts.map +1 -0
- package/dist/mcp/tools.js +88 -0
- package/dist/mcp/tools.js.map +1 -0
- package/dist/middleware/auth.d.ts +29 -0
- package/dist/middleware/auth.d.ts.map +1 -0
- package/dist/middleware/auth.js +76 -0
- package/dist/middleware/auth.js.map +1 -0
- package/dist/middleware/permissions.d.ts +13 -0
- package/dist/middleware/permissions.d.ts.map +1 -0
- package/dist/middleware/permissions.js +32 -0
- package/dist/middleware/permissions.js.map +1 -0
- package/dist/pattern-store.d.ts +104 -0
- package/dist/pattern-store.d.ts.map +1 -0
- package/dist/pattern-store.js +299 -0
- package/dist/pattern-store.js.map +1 -0
- package/package.json +54 -0
- package/src/agent/debrief-retention.ts +44 -0
- package/src/agent/envoy-client.ts +474 -0
- package/src/agent/envoy-registry.ts +384 -0
- package/src/agent/health-checker.ts +70 -0
- package/src/agent/mcp-client-manager.ts +131 -0
- package/src/agent/stale-deployment-detector.ts +79 -0
- package/src/agent/step-runner.ts +124 -0
- package/src/agent/synth-agent.ts +1567 -0
- package/src/api/agent.ts +1075 -0
- package/src/api/api-keys.ts +129 -0
- package/src/api/artifacts.ts +194 -0
- package/src/api/auth.ts +320 -0
- package/src/api/deployments.ts +1347 -0
- package/src/api/environments.ts +97 -0
- package/src/api/envoy-reports.ts +159 -0
- package/src/api/envoys.ts +237 -0
- package/src/api/fleet.ts +510 -0
- package/src/api/graph.ts +516 -0
- package/src/api/health.ts +311 -0
- package/src/api/idp-schemas.ts +19 -0
- package/src/api/idp.ts +735 -0
- package/src/api/intake.ts +537 -0
- package/src/api/partitions.ts +147 -0
- package/src/api/progress-event-store.ts +153 -0
- package/src/api/schemas.ts +376 -0
- package/src/api/security-boundaries.ts +54 -0
- package/src/api/settings.ts +118 -0
- package/src/api/system.ts +704 -0
- package/src/api/telemetry.ts +32 -0
- package/src/api/users.ts +210 -0
- package/src/archive-unpacker.ts +271 -0
- package/src/artifact-analyzer.ts +438 -0
- package/src/auth/idp/index.ts +8 -0
- package/src/auth/idp/ldap.ts +340 -0
- package/src/auth/idp/oidc.ts +117 -0
- package/src/auth/idp/role-mapping.ts +22 -0
- package/src/auth/idp/saml.ts +148 -0
- package/src/auth/idp/types.ts +22 -0
- package/src/fleet/fleet-executor.ts +309 -0
- package/src/fleet/fleet-store.ts +13 -0
- package/src/fleet/index.ts +4 -0
- package/src/fleet/representative-selector.ts +83 -0
- package/src/graph/graph-executor.ts +446 -0
- package/src/graph/graph-inference.ts +184 -0
- package/src/graph/graph-store.ts +75 -0
- package/src/graph/index.ts +4 -0
- package/src/index.ts +916 -0
- package/src/intake/index.ts +5 -0
- package/src/intake/intake-processor.ts +111 -0
- package/src/intake/intake-store.ts +7 -0
- package/src/intake/registry-poller.ts +230 -0
- package/src/intake/webhook-handlers.ts +328 -0
- package/src/logger.ts +19 -0
- package/src/mcp/resources.ts +98 -0
- package/src/mcp/server.ts +34 -0
- package/src/mcp/tools.ts +117 -0
- package/src/middleware/auth.ts +103 -0
- package/src/middleware/permissions.ts +35 -0
- package/src/pattern-store.ts +409 -0
- package/tests/agent-mode.test.ts +536 -0
- package/tests/api-handlers.test.ts +1245 -0
- package/tests/archive-unpacker.test.ts +179 -0
- package/tests/artifact-analyzer.test.ts +240 -0
- package/tests/auth-middleware.test.ts +189 -0
- package/tests/decision-diary.test.ts +957 -0
- package/tests/diary-reader.test.ts +782 -0
- package/tests/envoy-client.test.ts +342 -0
- package/tests/envoy-reports.test.ts +156 -0
- package/tests/mcp-tools.test.ts +213 -0
- package/tests/orchestration.test.ts +536 -0
- package/tests/partition-deletion.test.ts +143 -0
- package/tests/partition-isolation.test.ts +830 -0
- package/tests/pattern-store.test.ts +371 -0
- package/tests/rbac-enforcement.test.ts +409 -0
- package/tests/ssrf-validation.test.ts +56 -0
- package/tests/stale-deployment.test.ts +85 -0
- package/tests/step-runner.test.ts +308 -0
- package/tests/ui-journey.test.ts +330 -0
- package/tsconfig.json +11 -0
- package/vitest.config.ts +27 -0
|
@@ -0,0 +1,1195 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
import { DefaultHealthChecker } from "./health-checker.js";
|
|
3
|
+
import { EnvoyClient } from "./envoy-client.js";
|
|
4
|
+
import { serverLog, serverError } from "../logger.js";
|
|
5
|
+
const DEFAULT_OPTIONS = {
|
|
6
|
+
healthCheckRetries: 1,
|
|
7
|
+
healthCheckBackoffMs: 500,
|
|
8
|
+
executionDelayMs: 10,
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Variable name patterns that warrant extra scrutiny when overridden.
|
|
12
|
+
*/
|
|
13
|
+
const SENSITIVE_VARIABLE_PATTERNS = [
|
|
14
|
+
/secret/i,
|
|
15
|
+
/password/i,
|
|
16
|
+
/\bkey\b/i,
|
|
17
|
+
/token/i,
|
|
18
|
+
/credential/i,
|
|
19
|
+
];
|
|
20
|
+
/**
|
|
21
|
+
* Variable name patterns indicating network connectivity configuration.
|
|
22
|
+
* Overriding these cross-environment can route traffic or data to the
|
|
23
|
+
* wrong infrastructure.
|
|
24
|
+
*/
|
|
25
|
+
const CONNECTIVITY_VARIABLE_PATTERNS = [
|
|
26
|
+
/host/i,
|
|
27
|
+
/\burl\b/i,
|
|
28
|
+
/endpoint/i,
|
|
29
|
+
/\bport\b/i,
|
|
30
|
+
/\baddr/i,
|
|
31
|
+
/\buri\b/i,
|
|
32
|
+
/\bconn/i,
|
|
33
|
+
];
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// OrchestrationError
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
/**
|
|
38
|
+
* Thrown when a pipeline step fails after the agent has reasoned through it
|
|
39
|
+
* and determined the deployment cannot proceed.
|
|
40
|
+
*
|
|
41
|
+
* Carries structured reasoning so the final debrief entry can explain
|
|
42
|
+
* exactly why the deployment was aborted.
|
|
43
|
+
*/
|
|
44
|
+
export class OrchestrationError extends Error {
|
|
45
|
+
step;
|
|
46
|
+
reasoning;
|
|
47
|
+
constructor(step, message, reasoning) {
|
|
48
|
+
super(message);
|
|
49
|
+
this.step = step;
|
|
50
|
+
this.reasoning = reasoning;
|
|
51
|
+
this.name = "OrchestrationError";
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// SynthAgent — the deployment orchestration engine
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
/**
|
|
58
|
+
* Command Agent — the reasoning engine that orchestrates deployments.
|
|
59
|
+
*
|
|
60
|
+
* Processes deployment requests through a structured pipeline. When a step
|
|
61
|
+
* encounters an unexpected situation, the agent evaluates the specifics —
|
|
62
|
+
* error type, environment context, conflict severity — and makes a
|
|
63
|
+
* context-dependent decision about how to proceed.
|
|
64
|
+
*
|
|
65
|
+
* Key reasoning behaviors:
|
|
66
|
+
*
|
|
67
|
+
* Health check failures:
|
|
68
|
+
* - DNS errors abort immediately (retrying won't resolve infrastructure config)
|
|
69
|
+
* - Timeouts on production get extended backoff (service may be under load)
|
|
70
|
+
* - Connection refused gets standard retry (process may be restarting)
|
|
71
|
+
*
|
|
72
|
+
* Variable conflicts:
|
|
73
|
+
* - Multiple connectivity vars pointing cross-environment → block deployment
|
|
74
|
+
* - Single cross-env connectivity var → proceed with operator warning
|
|
75
|
+
* - Sensitive variable overrides → proceed, log for audit without exposing values
|
|
76
|
+
* - Standard overrides → proceed with precedence rules
|
|
77
|
+
*
|
|
78
|
+
* Every decision is recorded to the Debrief. No silent actions.
|
|
79
|
+
*/
|
|
80
|
+
export class SynthAgent {
|
|
81
|
+
debrief;
|
|
82
|
+
deployments;
|
|
83
|
+
artifactStore;
|
|
84
|
+
environmentStore;
|
|
85
|
+
partitionStore;
|
|
86
|
+
healthChecker;
|
|
87
|
+
settingsReader;
|
|
88
|
+
options;
|
|
89
|
+
explicitOptions;
|
|
90
|
+
mcpClientManager;
|
|
91
|
+
constructor(debrief, deployments, artifactStore, environmentStore, partitionStore, healthChecker = new DefaultHealthChecker(), options = {}, settingsReader) {
|
|
92
|
+
this.debrief = debrief;
|
|
93
|
+
this.deployments = deployments;
|
|
94
|
+
this.artifactStore = artifactStore;
|
|
95
|
+
this.environmentStore = environmentStore;
|
|
96
|
+
this.partitionStore = partitionStore;
|
|
97
|
+
this.healthChecker = healthChecker;
|
|
98
|
+
this.settingsReader = settingsReader;
|
|
99
|
+
this.explicitOptions = options;
|
|
100
|
+
this.options = { ...DEFAULT_OPTIONS, ...options };
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Returns effective agent options. Precedence (highest wins):
|
|
104
|
+
* 1. Explicit constructor options
|
|
105
|
+
* 2. Global settings from SettingsStore
|
|
106
|
+
* 3. DEFAULT_OPTIONS
|
|
107
|
+
*/
|
|
108
|
+
getEffectiveOptions() {
|
|
109
|
+
if (!this.settingsReader)
|
|
110
|
+
return this.options;
|
|
111
|
+
const settings = this.settingsReader.get();
|
|
112
|
+
return {
|
|
113
|
+
...DEFAULT_OPTIONS,
|
|
114
|
+
healthCheckRetries: settings.agent.defaultHealthCheckRetries,
|
|
115
|
+
...this.explicitOptions,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
// -----------------------------------------------------------------------
|
|
119
|
+
// RBAC permission check (not yet enforced)
|
|
120
|
+
// -----------------------------------------------------------------------
|
|
121
|
+
/**
|
|
122
|
+
* Check whether the caller has the required permission.
|
|
123
|
+
* RBAC enforcement is not yet implemented — always returns true.
|
|
124
|
+
*/
|
|
125
|
+
checkCallerPermission(_actor, _requiredPermission) {
|
|
126
|
+
// RBAC enforcement is not yet implemented. All callers are permitted.
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
// -----------------------------------------------------------------------
|
|
130
|
+
// Main entry point
|
|
131
|
+
// -----------------------------------------------------------------------
|
|
132
|
+
async triggerDeployment(trigger) {
|
|
133
|
+
const deploymentId = crypto.randomUUID();
|
|
134
|
+
// --- Look up entities from stores -----------------------------------------------
|
|
135
|
+
serverLog("DEPLOY-TRIGGER", { deploymentId, artifactId: trigger.artifactId, environmentId: trigger.environmentId, partitionId: trigger.partitionId ?? null, triggeredBy: trigger.triggeredBy });
|
|
136
|
+
const environment = this.environmentStore.get(trigger.environmentId);
|
|
137
|
+
if (!environment) {
|
|
138
|
+
throw new OrchestrationError("resolve-entities", `Environment not found: ${trigger.environmentId}`, `The trigger references environment ID "${trigger.environmentId}" which does not exist in the environment store. ` +
|
|
139
|
+
`Verify the environment ID is correct and the environment has been created.`);
|
|
140
|
+
}
|
|
141
|
+
let partition;
|
|
142
|
+
if (trigger.partitionId) {
|
|
143
|
+
partition = this.partitionStore.get(trigger.partitionId);
|
|
144
|
+
if (!partition) {
|
|
145
|
+
throw new OrchestrationError("resolve-entities", `Partition not found: ${trigger.partitionId}`, `The trigger references partition ID "${trigger.partitionId}" which does not exist in the partition store. ` +
|
|
146
|
+
`Verify the partition ID is correct and the partition has been created.`);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
const artifact = this.artifactStore.get(trigger.artifactId);
|
|
150
|
+
if (!artifact) {
|
|
151
|
+
throw new OrchestrationError("resolve-entities", `Artifact not found: ${trigger.artifactId}`, `The trigger references artifact ID "${trigger.artifactId}" which does not exist in the artifact store. ` +
|
|
152
|
+
`Verify the artifact ID is correct and the artifact has been created.`);
|
|
153
|
+
}
|
|
154
|
+
// --- RBAC check (not yet enforced) --------------------------------------------
|
|
155
|
+
const actor = trigger.triggeredBy === "user" ? "user" : "agent";
|
|
156
|
+
if (!this.checkCallerPermission(actor, "deployment.create")) {
|
|
157
|
+
throw new OrchestrationError("permission-check", `Caller "${actor}" lacks deployment.create permission`, `The deployment was rejected because the caller does not have the required ` +
|
|
158
|
+
`"deployment.create" permission. Contact an administrator to request access.`);
|
|
159
|
+
}
|
|
160
|
+
// --- Step 0: Artifact analysis ------------------------------------------------
|
|
161
|
+
const analysisEntry = this.debrief.record({
|
|
162
|
+
partitionId: trigger.partitionId ?? null,
|
|
163
|
+
deploymentId,
|
|
164
|
+
agent: "server",
|
|
165
|
+
decisionType: "artifact-analysis",
|
|
166
|
+
decision: `Analyzed artifact "${artifact.name}" (${artifact.type}) — confidence ${artifact.analysis.confidence}`,
|
|
167
|
+
reasoning: `Artifact "${artifact.name}" is a ${artifact.type} artifact. ` +
|
|
168
|
+
`Analysis summary: ${artifact.analysis.summary} ` +
|
|
169
|
+
`Dependencies: ${artifact.analysis.dependencies.length > 0 ? artifact.analysis.dependencies.join(", ") : "none identified"}. ` +
|
|
170
|
+
`Deployment intent: ${artifact.analysis.deploymentIntent ?? "not specified"}. ` +
|
|
171
|
+
`Analysis confidence: ${artifact.analysis.confidence}. ` +
|
|
172
|
+
`${artifact.annotations.length} operator annotation(s) applied. ` +
|
|
173
|
+
`${artifact.learningHistory.length} learning history entries.`,
|
|
174
|
+
context: {
|
|
175
|
+
artifactId: artifact.id,
|
|
176
|
+
artifactName: artifact.name,
|
|
177
|
+
artifactType: artifact.type,
|
|
178
|
+
confidence: artifact.analysis.confidence,
|
|
179
|
+
dependencies: artifact.analysis.dependencies,
|
|
180
|
+
annotationCount: artifact.annotations.length,
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
// --- Step 1: Plan the pipeline -----------------------------------------
|
|
184
|
+
const version = trigger.artifactVersionId ?? "latest";
|
|
185
|
+
const pipelineSteps = [
|
|
186
|
+
"resolve-configuration",
|
|
187
|
+
"preflight-health-check",
|
|
188
|
+
"execute-deployment",
|
|
189
|
+
"post-deploy-verify",
|
|
190
|
+
];
|
|
191
|
+
const planEntry = this.debrief.record({
|
|
192
|
+
partitionId: trigger.partitionId ?? null,
|
|
193
|
+
deploymentId,
|
|
194
|
+
agent: "server",
|
|
195
|
+
decisionType: "pipeline-plan",
|
|
196
|
+
decision: `Planned deployment pipeline: ${pipelineSteps.join(" → ")}`,
|
|
197
|
+
reasoning: `Deployment of "${artifact.name}" v${version} to ${environment.name}` +
|
|
198
|
+
(partition ? ` for partition "${partition.name}"` : "") +
|
|
199
|
+
`. Pipeline includes pre-flight health check to verify ` +
|
|
200
|
+
`target environment is reachable before deploying, and post-deployment verification ` +
|
|
201
|
+
`to confirm the deployment took effect.`,
|
|
202
|
+
context: {
|
|
203
|
+
steps: pipelineSteps,
|
|
204
|
+
artifactId: artifact.id,
|
|
205
|
+
artifactName: artifact.name,
|
|
206
|
+
version,
|
|
207
|
+
environmentName: environment.name,
|
|
208
|
+
partitionName: partition?.name ?? null,
|
|
209
|
+
},
|
|
210
|
+
});
|
|
211
|
+
// --- Create Deployment record ------------------------------------------
|
|
212
|
+
// Initial plan — the Envoy generates the real deployment plan during execution
|
|
213
|
+
const initialPlan = {
|
|
214
|
+
steps: [
|
|
215
|
+
{
|
|
216
|
+
description: `Deploy ${artifact.name} v${version}`,
|
|
217
|
+
action: "deploy",
|
|
218
|
+
target: environment.name,
|
|
219
|
+
reversible: true,
|
|
220
|
+
rollbackAction: "rollback to previous version",
|
|
221
|
+
},
|
|
222
|
+
],
|
|
223
|
+
reasoning: `Initial plan for "${artifact.name}". ` +
|
|
224
|
+
`Deployment intent: ${artifact.analysis.deploymentIntent ?? "standard deployment"}. ` +
|
|
225
|
+
`The Envoy will generate a detailed execution plan based on artifact analysis and environment state.`,
|
|
226
|
+
};
|
|
227
|
+
const planGenEntry = this.debrief.record({
|
|
228
|
+
partitionId: trigger.partitionId ?? null,
|
|
229
|
+
deploymentId,
|
|
230
|
+
agent: "server",
|
|
231
|
+
decisionType: "plan-generation",
|
|
232
|
+
decision: `Generated deployment plan for "${artifact.name}" v${version} — ${initialPlan.steps.length} step(s)`,
|
|
233
|
+
reasoning: initialPlan.reasoning,
|
|
234
|
+
context: {
|
|
235
|
+
stepCount: initialPlan.steps.length,
|
|
236
|
+
artifactName: artifact.name,
|
|
237
|
+
version,
|
|
238
|
+
},
|
|
239
|
+
});
|
|
240
|
+
const approvalEntry = this.debrief.record({
|
|
241
|
+
partitionId: trigger.partitionId ?? null,
|
|
242
|
+
deploymentId,
|
|
243
|
+
agent: "server",
|
|
244
|
+
decisionType: "plan-approval",
|
|
245
|
+
decision: `Auto-approved deployment plan for "${artifact.name}" v${version}`,
|
|
246
|
+
reasoning: `Plan auto-approved without explicit user review. The approval workflow will be ` +
|
|
247
|
+
`presented via the plan review UI before execution begins.`,
|
|
248
|
+
context: {
|
|
249
|
+
autoApproved: true,
|
|
250
|
+
approvedBy: actor,
|
|
251
|
+
},
|
|
252
|
+
});
|
|
253
|
+
const deployment = {
|
|
254
|
+
id: deploymentId,
|
|
255
|
+
artifactId: trigger.artifactId,
|
|
256
|
+
artifactVersionId: trigger.artifactVersionId,
|
|
257
|
+
environmentId: trigger.environmentId,
|
|
258
|
+
partitionId: trigger.partitionId,
|
|
259
|
+
version,
|
|
260
|
+
status: "pending",
|
|
261
|
+
variables: {},
|
|
262
|
+
plan: initialPlan,
|
|
263
|
+
approvedBy: actor,
|
|
264
|
+
approvedAt: new Date(),
|
|
265
|
+
debriefEntryIds: [analysisEntry.id, planEntry.id, planGenEntry.id, approvalEntry.id],
|
|
266
|
+
createdAt: new Date(),
|
|
267
|
+
};
|
|
268
|
+
this.deployments.save(deployment);
|
|
269
|
+
try {
|
|
270
|
+
// --- Step 2: Resolve configuration -----------------------------------
|
|
271
|
+
serverLog("DEPLOY-CONFIG-RESOLVE", { deploymentId: deployment.id });
|
|
272
|
+
const { variables, hasConflicts } = this.resolveConfiguration(deployment, trigger.variables, partition, environment, artifact);
|
|
273
|
+
deployment.variables = variables;
|
|
274
|
+
// --- Step 3: Pre-flight health check ---------------------------------
|
|
275
|
+
deployment.status = "running";
|
|
276
|
+
this.deployments.save(deployment);
|
|
277
|
+
serverLog("DEPLOY-HEALTH-CHECK", { deploymentId: deployment.id, environment: environment.name });
|
|
278
|
+
await this.preflightHealthCheck(deployment, partition, environment, artifact);
|
|
279
|
+
// --- Step 4: Execute deployment ----------------------------------------
|
|
280
|
+
serverLog("DEPLOY-EXECUTE", { deploymentId: deployment.id, artifact: artifact.name, version: deployment.version, environment: environment.name });
|
|
281
|
+
const delegated = await this.executeDeployment(deployment, partition, environment, artifact);
|
|
282
|
+
// --- Step 5: Post-deploy verify ----------------------------------------
|
|
283
|
+
// Only run when Envoy did NOT handle execution — Envoy ingests its own
|
|
284
|
+
// verification debrief entries via delegateToEnvoy(), so calling this
|
|
285
|
+
// after delegation would produce a duplicate/false entry.
|
|
286
|
+
if (!delegated) {
|
|
287
|
+
await this.postDeployVerify(deployment, partition, environment, artifact);
|
|
288
|
+
}
|
|
289
|
+
// --- Success ---------------------------------------------------------
|
|
290
|
+
deployment.status = "succeeded";
|
|
291
|
+
deployment.completedAt = new Date();
|
|
292
|
+
serverLog("DEPLOY-SUCCEEDED", { deploymentId: deployment.id, artifact: artifact.name, version: deployment.version, environment: environment.name, durationMs: deployment.completedAt.getTime() - deployment.createdAt.getTime() });
|
|
293
|
+
const completionEntry = this.debrief.record({
|
|
294
|
+
partitionId: deployment.partitionId ?? null,
|
|
295
|
+
deploymentId: deployment.id,
|
|
296
|
+
agent: "server",
|
|
297
|
+
decisionType: "deployment-completion",
|
|
298
|
+
decision: `Marking deployment of ${artifact.name} v${deployment.version} as succeeded on "${environment.name}"`,
|
|
299
|
+
reasoning: `All four pipeline steps completed: configuration accepted, health check passed, ` +
|
|
300
|
+
`execution finished, post-deploy verification confirmed. ` +
|
|
301
|
+
`${Object.keys(deployment.variables).length} variable(s) applied` +
|
|
302
|
+
(partition ? ` for partition "${partition.name}"` : "") + `. ` +
|
|
303
|
+
(hasConflicts
|
|
304
|
+
? "Variable conflicts were resolved via precedence rules — see earlier debrief entries for per-conflict reasoning."
|
|
305
|
+
: "No variable conflicts encountered — configuration was unambiguous.") +
|
|
306
|
+
` Total duration: ${deployment.completedAt.getTime() - deployment.createdAt.getTime()}ms.`,
|
|
307
|
+
context: {
|
|
308
|
+
durationMs: deployment.completedAt.getTime() - deployment.createdAt.getTime(),
|
|
309
|
+
status: deployment.status,
|
|
310
|
+
variableCount: Object.keys(deployment.variables).length,
|
|
311
|
+
},
|
|
312
|
+
});
|
|
313
|
+
deployment.debriefEntryIds.push(completionEntry.id);
|
|
314
|
+
}
|
|
315
|
+
catch (error) {
|
|
316
|
+
deployment.status = "failed";
|
|
317
|
+
deployment.completedAt = new Date();
|
|
318
|
+
deployment.failureReason =
|
|
319
|
+
error instanceof OrchestrationError
|
|
320
|
+
? error.message
|
|
321
|
+
: `Unexpected error: ${error instanceof Error ? error.message : String(error)}`;
|
|
322
|
+
serverError("DEPLOY-FAILED", { deploymentId: deployment.id, reason: deployment.failureReason, durationMs: deployment.completedAt.getTime() - deployment.createdAt.getTime() });
|
|
323
|
+
const failEntry = this.debrief.record({
|
|
324
|
+
partitionId: deployment.partitionId ?? null,
|
|
325
|
+
deploymentId: deployment.id,
|
|
326
|
+
agent: "server",
|
|
327
|
+
decisionType: "deployment-failure",
|
|
328
|
+
decision: `Deployment failed: ${deployment.failureReason}`,
|
|
329
|
+
reasoning: error instanceof OrchestrationError
|
|
330
|
+
? error.reasoning
|
|
331
|
+
: `Unexpected error during "${artifact.name}" v${version} ` +
|
|
332
|
+
`deployment to "${environment.name}"` +
|
|
333
|
+
(partition ? ` for partition "${partition.name}"` : "") + `: ` +
|
|
334
|
+
`${error instanceof Error ? error.message : String(error)}. ` +
|
|
335
|
+
`This error did not come from the orchestration pipeline (not a health check, ` +
|
|
336
|
+
`configuration, or execution failure) — it may indicate a server-side bug or ` +
|
|
337
|
+
`infrastructure issue. The deployment has been marked as failed and no changes ` +
|
|
338
|
+
`were applied to the target environment. Recommended action: check the server ` +
|
|
339
|
+
`process logs for a stack trace, then re-trigger the deployment. If the error ` +
|
|
340
|
+
`recurs, investigate the server runtime environment.`,
|
|
341
|
+
context: {
|
|
342
|
+
durationMs: deployment.completedAt.getTime() - deployment.createdAt.getTime(),
|
|
343
|
+
status: deployment.status,
|
|
344
|
+
step: error instanceof OrchestrationError ? error.step : "unknown",
|
|
345
|
+
...(error instanceof Error ? { errorMessage: error.message } : {}),
|
|
346
|
+
},
|
|
347
|
+
});
|
|
348
|
+
deployment.debriefEntryIds.push(failEntry.id);
|
|
349
|
+
}
|
|
350
|
+
this.deployments.save(deployment);
|
|
351
|
+
return deployment;
|
|
352
|
+
}
|
|
353
|
+
// -----------------------------------------------------------------------
|
|
354
|
+
// External MCP checks — pre-deployment intelligence from external servers
|
|
355
|
+
// -----------------------------------------------------------------------
|
|
356
|
+
/**
|
|
357
|
+
* Survey connected MCP servers and record available external intelligence
|
|
358
|
+
* to the Debrief. This runs before deployment to surface any relevant
|
|
359
|
+
* monitoring data, incident context, or diagnostic tools.
|
|
360
|
+
*
|
|
361
|
+
* Returns the list of tool call results (empty if no servers are connected).
|
|
362
|
+
* Never throws — external server failures must not block deployments.
|
|
363
|
+
*/
|
|
364
|
+
async runExternalChecks(partitionId, environmentId) {
|
|
365
|
+
if (!this.mcpClientManager ||
|
|
366
|
+
this.mcpClientManager.getConnectedServers().length === 0) {
|
|
367
|
+
return [];
|
|
368
|
+
}
|
|
369
|
+
const results = [];
|
|
370
|
+
try {
|
|
371
|
+
const tools = await this.mcpClientManager.listTools();
|
|
372
|
+
const connectedServers = this.mcpClientManager.getConnectedServers();
|
|
373
|
+
this.debrief.record({
|
|
374
|
+
partitionId,
|
|
375
|
+
deploymentId: null,
|
|
376
|
+
agent: "server",
|
|
377
|
+
decisionType: "diagnostic-investigation",
|
|
378
|
+
decision: `${tools.length} external tool(s) available from ${connectedServers.length} MCP server(s)`,
|
|
379
|
+
reasoning: "Pre-deployment check: surveyed connected MCP servers for available intelligence. " +
|
|
380
|
+
`Servers: ${connectedServers.join(", ")}. ` +
|
|
381
|
+
(tools.length > 0
|
|
382
|
+
? `Available tools: ${tools.map((t) => `${t.server}/${t.name}`).join(", ")}.`
|
|
383
|
+
: "No tools exposed by connected servers."),
|
|
384
|
+
context: {
|
|
385
|
+
servers: connectedServers,
|
|
386
|
+
toolCount: tools.length,
|
|
387
|
+
tools: tools.map((t) => ({
|
|
388
|
+
server: t.server,
|
|
389
|
+
name: t.name,
|
|
390
|
+
description: t.description,
|
|
391
|
+
})),
|
|
392
|
+
environmentId,
|
|
393
|
+
},
|
|
394
|
+
});
|
|
395
|
+
}
|
|
396
|
+
catch (error) {
|
|
397
|
+
// Never let external checks block the deployment
|
|
398
|
+
this.debrief.record({
|
|
399
|
+
partitionId,
|
|
400
|
+
deploymentId: null,
|
|
401
|
+
agent: "server",
|
|
402
|
+
decisionType: "diagnostic-investigation",
|
|
403
|
+
decision: "External MCP check failed — proceeding without external intelligence",
|
|
404
|
+
reasoning: `Error surveying MCP servers: ${error instanceof Error ? error.message : String(error)}. ` +
|
|
405
|
+
"This does not affect the deployment. External data sources are supplementary.",
|
|
406
|
+
context: {
|
|
407
|
+
error: error instanceof Error ? error.message : String(error),
|
|
408
|
+
environmentId,
|
|
409
|
+
},
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
return results;
|
|
413
|
+
}
|
|
414
|
+
// -----------------------------------------------------------------------
|
|
415
|
+
// Pipeline step: resolve configuration
|
|
416
|
+
// -----------------------------------------------------------------------
|
|
417
|
+
resolveConfiguration(deployment, triggerVariables, partition, environment, artifact) {
|
|
418
|
+
// Precedence: environment → partition (if present) → artifact defaults → trigger overrides
|
|
419
|
+
const resolved = { ...environment.variables };
|
|
420
|
+
const conflicts = [];
|
|
421
|
+
// Partition overrides environment (if partition is provided)
|
|
422
|
+
if (partition) {
|
|
423
|
+
for (const [key, value] of Object.entries(partition.variables)) {
|
|
424
|
+
if (key in resolved && resolved[key] !== value) {
|
|
425
|
+
conflicts.push({
|
|
426
|
+
variable: key,
|
|
427
|
+
winner: "partition",
|
|
428
|
+
winnerValue: value,
|
|
429
|
+
loserValue: resolved[key],
|
|
430
|
+
loserLevel: "environment",
|
|
431
|
+
});
|
|
432
|
+
}
|
|
433
|
+
resolved[key] = value;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
// Artifact configuration expectations as defaults (only fill gaps, don't override)
|
|
437
|
+
// These are hints from artifact analysis, not hard overrides
|
|
438
|
+
if (artifact.analysis.configurationExpectations) {
|
|
439
|
+
for (const [key, _description] of Object.entries(artifact.analysis.configurationExpectations)) {
|
|
440
|
+
// Only add if not already set — these are expectations, not values
|
|
441
|
+
// The artifact analysis tells us what variables are expected, not what values to use
|
|
442
|
+
// So we skip actually setting them — they serve as documentation
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
// Trigger overrides everything
|
|
446
|
+
if (triggerVariables) {
|
|
447
|
+
for (const [key, value] of Object.entries(triggerVariables)) {
|
|
448
|
+
if (key in resolved && resolved[key] !== value) {
|
|
449
|
+
conflicts.push({
|
|
450
|
+
variable: key,
|
|
451
|
+
winner: "trigger",
|
|
452
|
+
winnerValue: value,
|
|
453
|
+
loserValue: resolved[key],
|
|
454
|
+
loserLevel: partition && key in partition.variables ? "partition" : "environment",
|
|
455
|
+
});
|
|
456
|
+
}
|
|
457
|
+
resolved[key] = value;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
if (conflicts.length > 0) {
|
|
461
|
+
// Assess risk across ALL conflicts together, then act on the assessment
|
|
462
|
+
const assessment = this.assessConflictRisk(conflicts, environment);
|
|
463
|
+
this.recordConflictReasoning(deployment, assessment, environment);
|
|
464
|
+
if (assessment.action === "block") {
|
|
465
|
+
throw new OrchestrationError("resolve-configuration", `Deployment blocked: ${assessment.riskLevel}-risk variable configuration detected`, assessment.reasoning);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
const partitionVarCount = partition ? Object.keys(partition.variables).length : 0;
|
|
469
|
+
const configEntry = this.debrief.record({
|
|
470
|
+
partitionId: deployment.partitionId ?? null,
|
|
471
|
+
deploymentId: deployment.id,
|
|
472
|
+
agent: "server",
|
|
473
|
+
decisionType: "configuration-resolved",
|
|
474
|
+
decision: conflicts.length === 0
|
|
475
|
+
? `Accepted configuration for "${environment.name}" — ${Object.keys(resolved).length} variable(s) merged, no conflicts`
|
|
476
|
+
: `Accepted configuration for "${environment.name}" — ${conflicts.length} conflict(s) resolved via precedence, proceeding with merged result`,
|
|
477
|
+
reasoning: conflicts.length === 0
|
|
478
|
+
? `Environment "${environment.name}" provided ${Object.keys(environment.variables).length} base variable(s), ` +
|
|
479
|
+
(partition ? `partition added ${partitionVarCount}, ` : "") +
|
|
480
|
+
`trigger added ${Object.keys(triggerVariables ?? {}).length}. ` +
|
|
481
|
+
`No values collided across levels, so the merged configuration is unambiguous. ` +
|
|
482
|
+
`Accepting ${Object.keys(resolved).length} final variable(s) as the deployment configuration.`
|
|
483
|
+
: `${conflicts.length} variable(s) had different values at multiple precedence levels. ` +
|
|
484
|
+
`Resolved using the hierarchy trigger > partition > environment. ` +
|
|
485
|
+
`See preceding debrief entries for per-conflict risk assessment and reasoning. ` +
|
|
486
|
+
`Accepting the merged result as the deployment configuration.`,
|
|
487
|
+
context: {
|
|
488
|
+
variableCount: Object.keys(resolved).length,
|
|
489
|
+
conflictCount: conflicts.length,
|
|
490
|
+
sources: {
|
|
491
|
+
environment: Object.keys(environment.variables).length,
|
|
492
|
+
partition: partitionVarCount,
|
|
493
|
+
trigger: Object.keys(triggerVariables ?? {}).length,
|
|
494
|
+
},
|
|
495
|
+
},
|
|
496
|
+
});
|
|
497
|
+
deployment.debriefEntryIds.push(configEntry.id);
|
|
498
|
+
return { variables: resolved, hasConflicts: conflicts.length > 0 };
|
|
499
|
+
}
|
|
500
|
+
// -----------------------------------------------------------------------
|
|
501
|
+
// Reasoning: variable conflict risk assessment
|
|
502
|
+
// -----------------------------------------------------------------------
|
|
503
|
+
/**
|
|
504
|
+
* Analyze all variable conflicts together and produce a risk assessment.
|
|
505
|
+
*
|
|
506
|
+
* This is where genuine reasoning happens — the decision depends on
|
|
507
|
+
* the combination of factors across all conflicts, not just individual
|
|
508
|
+
* pattern matches:
|
|
509
|
+
*
|
|
510
|
+
* - A single cross-env connectivity var might be intentional partition config
|
|
511
|
+
* - Multiple cross-env connectivity vars are almost certainly misconfiguration
|
|
512
|
+
* - Sensitive vars get audit logging regardless of other factors
|
|
513
|
+
* - The assessed risk level determines whether to proceed or block
|
|
514
|
+
*/
|
|
515
|
+
assessConflictRisk(conflicts, environment) {
|
|
516
|
+
const details = [];
|
|
517
|
+
let crossEnvConnectivityCount = 0;
|
|
518
|
+
const crossEnvConnectivityVars = [];
|
|
519
|
+
for (const conflict of conflicts) {
|
|
520
|
+
const isCrossEnv = this.detectCrossEnvironmentPattern(conflict, environment.name);
|
|
521
|
+
const isConnectivity = CONNECTIVITY_VARIABLE_PATTERNS.some((p) => p.test(conflict.variable));
|
|
522
|
+
const isSensitive = SENSITIVE_VARIABLE_PATTERNS.some((p) => p.test(conflict.variable));
|
|
523
|
+
if (isCrossEnv && isConnectivity) {
|
|
524
|
+
crossEnvConnectivityCount++;
|
|
525
|
+
crossEnvConnectivityVars.push(conflict.variable);
|
|
526
|
+
details.push({
|
|
527
|
+
conflict,
|
|
528
|
+
category: "cross-env-connectivity",
|
|
529
|
+
riskContribution: `${conflict.variable} is a connectivity variable pointing to ` +
|
|
530
|
+
`"${conflict.winnerValue}" in a ${environment.name} deployment — ` +
|
|
531
|
+
`this could route traffic or data to the wrong environment`,
|
|
532
|
+
});
|
|
533
|
+
}
|
|
534
|
+
else if (isCrossEnv) {
|
|
535
|
+
details.push({
|
|
536
|
+
conflict,
|
|
537
|
+
category: "cross-env",
|
|
538
|
+
riskContribution: `${conflict.variable} value "${conflict.winnerValue}" references ` +
|
|
539
|
+
`a different environment than target "${environment.name}"`,
|
|
540
|
+
});
|
|
541
|
+
}
|
|
542
|
+
else if (isSensitive) {
|
|
543
|
+
details.push({
|
|
544
|
+
conflict,
|
|
545
|
+
category: "sensitive",
|
|
546
|
+
riskContribution: `${conflict.variable} is security-sensitive and overridden at ${conflict.winner} level`,
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
else {
|
|
550
|
+
details.push({
|
|
551
|
+
conflict,
|
|
552
|
+
category: "standard",
|
|
553
|
+
riskContribution: `${conflict.variable}: ${conflict.winner} value overrides ${conflict.loserLevel} value`,
|
|
554
|
+
});
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
// --- Decision logic: compound risk assessment ---
|
|
558
|
+
// Multiple connectivity variables pointing cross-environment = block.
|
|
559
|
+
// One might be intentional. Two or more is a pattern that indicates
|
|
560
|
+
// the partition's variable bindings are wrong for this environment.
|
|
561
|
+
if (crossEnvConnectivityCount >= 2) {
|
|
562
|
+
return {
|
|
563
|
+
action: "block",
|
|
564
|
+
riskLevel: "high",
|
|
565
|
+
reasoning: `${crossEnvConnectivityCount} connectivity variables ` +
|
|
566
|
+
`(${crossEnvConnectivityVars.join(", ")}) are overridden with values ` +
|
|
567
|
+
`referencing a different environment than the deployment target ` +
|
|
568
|
+
`"${environment.name}". ${details.filter((d) => d.category === "cross-env-connectivity").map((d) => d.riskContribution).join(". ")}. ` +
|
|
569
|
+
`A single cross-environment connectivity override might reflect ` +
|
|
570
|
+
`intentional partition-specific infrastructure, but multiple overrides ` +
|
|
571
|
+
`strongly suggest the partition's variable bindings are misconfigured ` +
|
|
572
|
+
`for this environment. Blocking deployment to prevent cross-environment ` +
|
|
573
|
+
`data access or traffic routing. To deploy with this configuration, ` +
|
|
574
|
+
`verify the partition's variables are correct and re-trigger with explicit ` +
|
|
575
|
+
`overrides at the trigger level.`,
|
|
576
|
+
details,
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
// Single cross-env connectivity var: proceed but flag as medium risk.
|
|
580
|
+
if (crossEnvConnectivityCount === 1) {
|
|
581
|
+
return {
|
|
582
|
+
action: "proceed",
|
|
583
|
+
riskLevel: "medium",
|
|
584
|
+
reasoning: `One connectivity variable (${crossEnvConnectivityVars[0]}) is overridden ` +
|
|
585
|
+
`with a value referencing a different environment than "${environment.name}". ` +
|
|
586
|
+
`This may reflect intentional partition-specific infrastructure (e.g., a partition ` +
|
|
587
|
+
`that maintains a shared database across environments) or may be ` +
|
|
588
|
+
`misconfiguration. Proceeding with partition-level precedence because a single ` +
|
|
589
|
+
`override does not establish a pattern of misconfiguration. The operator ` +
|
|
590
|
+
`should verify this override is intentional.`,
|
|
591
|
+
details,
|
|
592
|
+
};
|
|
593
|
+
}
|
|
594
|
+
// Cross-env non-connectivity or sensitive-only: low risk, proceed
|
|
595
|
+
return {
|
|
596
|
+
action: "proceed",
|
|
597
|
+
riskLevel: "low",
|
|
598
|
+
reasoning: `Variable conflicts resolved via standard precedence rules ` +
|
|
599
|
+
`(trigger > partition > environment). No high-risk cross-environment ` +
|
|
600
|
+
`connectivity patterns detected.`,
|
|
601
|
+
details,
|
|
602
|
+
};
|
|
603
|
+
}
|
|
604
|
+
/**
|
|
605
|
+
* Record debrief entries for each conflict category found in the assessment.
|
|
606
|
+
*/
|
|
607
|
+
recordConflictReasoning(deployment, assessment, environment) {
|
|
608
|
+
// Group details by category for debrief entries
|
|
609
|
+
const byCategory = new Map();
|
|
610
|
+
for (const detail of assessment.details) {
|
|
611
|
+
const existing = byCategory.get(detail.category) ?? [];
|
|
612
|
+
existing.push(detail);
|
|
613
|
+
byCategory.set(detail.category, existing);
|
|
614
|
+
}
|
|
615
|
+
// Cross-env connectivity (the high-risk ones)
|
|
616
|
+
const crossEnvConn = byCategory.get("cross-env-connectivity");
|
|
617
|
+
if (crossEnvConn) {
|
|
618
|
+
const entry = this.debrief.record({
|
|
619
|
+
partitionId: deployment.partitionId ?? null,
|
|
620
|
+
deploymentId: deployment.id,
|
|
621
|
+
agent: "server",
|
|
622
|
+
decisionType: "variable-conflict",
|
|
623
|
+
decision: assessment.action === "block"
|
|
624
|
+
? `Blocking deployment: ${crossEnvConn.length} cross-environment connectivity conflict(s)`
|
|
625
|
+
: `Cross-environment connectivity override detected in ${crossEnvConn.length} variable(s)`,
|
|
626
|
+
reasoning: assessment.reasoning,
|
|
627
|
+
context: {
|
|
628
|
+
category: "cross-environment",
|
|
629
|
+
riskLevel: assessment.riskLevel,
|
|
630
|
+
action: assessment.action,
|
|
631
|
+
conflicts: crossEnvConn.map((d) => ({
|
|
632
|
+
variable: d.conflict.variable,
|
|
633
|
+
winnerValue: d.conflict.winnerValue,
|
|
634
|
+
loserValue: d.conflict.loserValue,
|
|
635
|
+
})),
|
|
636
|
+
targetEnvironment: environment.name,
|
|
637
|
+
},
|
|
638
|
+
});
|
|
639
|
+
deployment.debriefEntryIds.push(entry.id);
|
|
640
|
+
}
|
|
641
|
+
// Cross-env non-connectivity
|
|
642
|
+
const crossEnv = byCategory.get("cross-env");
|
|
643
|
+
if (crossEnv) {
|
|
644
|
+
const details = crossEnv
|
|
645
|
+
.map((d) => d.riskContribution)
|
|
646
|
+
.join("; ");
|
|
647
|
+
const entry = this.debrief.record({
|
|
648
|
+
partitionId: deployment.partitionId ?? null,
|
|
649
|
+
deploymentId: deployment.id,
|
|
650
|
+
agent: "server",
|
|
651
|
+
decisionType: "variable-conflict",
|
|
652
|
+
decision: `Cross-environment variable pattern in ${crossEnv.length} non-connectivity variable(s)`,
|
|
653
|
+
reasoning: `Detected non-connectivity variable(s) referencing a different environment: ${details}. ` +
|
|
654
|
+
`These are lower risk than connectivity variables because they don't affect ` +
|
|
655
|
+
`data routing. Proceeding with standard precedence.`,
|
|
656
|
+
context: {
|
|
657
|
+
category: "cross-environment-non-connectivity",
|
|
658
|
+
conflicts: crossEnv.map((d) => ({
|
|
659
|
+
variable: d.conflict.variable,
|
|
660
|
+
winnerValue: d.conflict.winnerValue,
|
|
661
|
+
loserValue: d.conflict.loserValue,
|
|
662
|
+
})),
|
|
663
|
+
},
|
|
664
|
+
});
|
|
665
|
+
deployment.debriefEntryIds.push(entry.id);
|
|
666
|
+
}
|
|
667
|
+
// Sensitive overrides
|
|
668
|
+
const sensitiveDetails = byCategory.get("sensitive");
|
|
669
|
+
if (sensitiveDetails) {
|
|
670
|
+
const entry = this.debrief.record({
|
|
671
|
+
partitionId: deployment.partitionId ?? null,
|
|
672
|
+
deploymentId: deployment.id,
|
|
673
|
+
agent: "server",
|
|
674
|
+
decisionType: "variable-conflict",
|
|
675
|
+
decision: `Security-sensitive variable(s) overridden: ${sensitiveDetails.map((d) => d.conflict.variable).join(", ")}`,
|
|
676
|
+
reasoning: `${sensitiveDetails.length} variable(s) matching security-sensitive patterns ` +
|
|
677
|
+
`(secrets, keys, tokens, credentials) are being overridden by higher-precedence ` +
|
|
678
|
+
`levels. ${sensitiveDetails.map((d) => d.riskContribution).join("; ")}. ` +
|
|
679
|
+
`Applying precedence rules as configured. These overrides are recorded for ` +
|
|
680
|
+
`audit purposes.`,
|
|
681
|
+
context: {
|
|
682
|
+
category: "sensitive-override",
|
|
683
|
+
// Intentionally omit actual values for sensitive variables
|
|
684
|
+
variables: sensitiveDetails.map((d) => ({
|
|
685
|
+
variable: d.conflict.variable,
|
|
686
|
+
overriddenBy: d.conflict.winner,
|
|
687
|
+
})),
|
|
688
|
+
},
|
|
689
|
+
});
|
|
690
|
+
deployment.debriefEntryIds.push(entry.id);
|
|
691
|
+
}
|
|
692
|
+
// Standard overrides
|
|
693
|
+
const standardDetails = byCategory.get("standard");
|
|
694
|
+
if (standardDetails) {
|
|
695
|
+
const details = standardDetails
|
|
696
|
+
.map((d) => `${d.conflict.variable}: used ${d.conflict.winner} value ` +
|
|
697
|
+
`"${d.conflict.winnerValue}" over ${d.conflict.loserLevel} value ` +
|
|
698
|
+
`"${d.conflict.loserValue}"`)
|
|
699
|
+
.join("; ");
|
|
700
|
+
const entry = this.debrief.record({
|
|
701
|
+
partitionId: deployment.partitionId ?? null,
|
|
702
|
+
deploymentId: deployment.id,
|
|
703
|
+
agent: "server",
|
|
704
|
+
decisionType: "variable-conflict",
|
|
705
|
+
decision: `Resolved ${standardDetails.length} variable conflict(s) via precedence rules`,
|
|
706
|
+
reasoning: `Standard precedence applied (trigger > partition > environment). ` +
|
|
707
|
+
`Conflicts: ${details}. These are routine overrides consistent ` +
|
|
708
|
+
`with the configuration hierarchy.`,
|
|
709
|
+
context: {
|
|
710
|
+
category: "standard-override",
|
|
711
|
+
conflicts: standardDetails.map((d) => ({
|
|
712
|
+
variable: d.conflict.variable,
|
|
713
|
+
winner: d.conflict.winner,
|
|
714
|
+
winnerValue: d.conflict.winnerValue,
|
|
715
|
+
loserLevel: d.conflict.loserLevel,
|
|
716
|
+
loserValue: d.conflict.loserValue,
|
|
717
|
+
})),
|
|
718
|
+
},
|
|
719
|
+
});
|
|
720
|
+
deployment.debriefEntryIds.push(entry.id);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Detect if a variable's winning value might reference the wrong environment.
|
|
725
|
+
*/
|
|
726
|
+
detectCrossEnvironmentPattern(conflict, targetEnvName) {
|
|
727
|
+
const envPatterns = {
|
|
728
|
+
production: [/\bstag/i, /\bdev\b/i, /\btest\b/i],
|
|
729
|
+
staging: [/\bprod/i],
|
|
730
|
+
development: [/\bprod/i, /\bstag/i],
|
|
731
|
+
};
|
|
732
|
+
const patternsToCheck = envPatterns[targetEnvName.toLowerCase()];
|
|
733
|
+
if (!patternsToCheck)
|
|
734
|
+
return false;
|
|
735
|
+
return patternsToCheck.some((p) => p.test(conflict.winnerValue));
|
|
736
|
+
}
|
|
737
|
+
// -----------------------------------------------------------------------
|
|
738
|
+
// Pipeline step: pre-flight health check
|
|
739
|
+
// -----------------------------------------------------------------------
|
|
740
|
+
/**
|
|
741
|
+
* Pre-flight health check with context-dependent retry logic.
|
|
742
|
+
*
|
|
743
|
+
* The retry strategy depends on the error type:
|
|
744
|
+
* - DNS failure → abort immediately (retrying won't fix infrastructure config)
|
|
745
|
+
* - Timeout in production → retry with extended backoff (service under load)
|
|
746
|
+
* - Connection refused → retry with standard backoff (process restarting)
|
|
747
|
+
* - After retries exhausted → fail with environment-appropriate reasoning
|
|
748
|
+
*/
|
|
749
|
+
async preflightHealthCheck(deployment, partition, environment, artifact) {
|
|
750
|
+
const serviceId = `${artifact.name}/${environment.name}`;
|
|
751
|
+
const opts = this.getEffectiveOptions();
|
|
752
|
+
const maxAttempts = opts.healthCheckRetries + 1;
|
|
753
|
+
let attempt = 1;
|
|
754
|
+
const firstCheck = await this.healthChecker.check(serviceId, {
|
|
755
|
+
partitionId: partition?.id ?? "",
|
|
756
|
+
environmentName: environment.name,
|
|
757
|
+
});
|
|
758
|
+
if (firstCheck.reachable) {
|
|
759
|
+
const entry = this.debrief.record({
|
|
760
|
+
partitionId: deployment.partitionId ?? null,
|
|
761
|
+
deploymentId: deployment.id,
|
|
762
|
+
agent: "server",
|
|
763
|
+
decisionType: "health-check",
|
|
764
|
+
decision: `Proceeding with deployment — target environment "${environment.name}" confirmed healthy in ${firstCheck.responseTimeMs}ms`,
|
|
765
|
+
reasoning: `Health check to "${environment.name}" returned a successful response ` +
|
|
766
|
+
`in ${firstCheck.responseTimeMs}ms on the first attempt. This confirms ` +
|
|
767
|
+
`the target infrastructure is running and network-accessible. No reason ` +
|
|
768
|
+
`to delay — proceeding to deployment execution.`,
|
|
769
|
+
context: {
|
|
770
|
+
serviceId,
|
|
771
|
+
responseTimeMs: firstCheck.responseTimeMs,
|
|
772
|
+
attempt: 1,
|
|
773
|
+
},
|
|
774
|
+
});
|
|
775
|
+
deployment.debriefEntryIds.push(entry.id);
|
|
776
|
+
return;
|
|
777
|
+
}
|
|
778
|
+
// First check failed — reason about what to do
|
|
779
|
+
const decision = this.reasonAboutHealthFailure(firstCheck, environment, attempt, maxAttempts, opts);
|
|
780
|
+
if (decision.action === "abort") {
|
|
781
|
+
// Reasoning determined retrying won't help (e.g., DNS failure)
|
|
782
|
+
const abortEntry = this.debrief.record({
|
|
783
|
+
partitionId: deployment.partitionId ?? null,
|
|
784
|
+
deploymentId: deployment.id,
|
|
785
|
+
agent: "server",
|
|
786
|
+
decisionType: "health-check",
|
|
787
|
+
decision: "Pre-flight health check failed — aborting without retry",
|
|
788
|
+
reasoning: decision.reasoning,
|
|
789
|
+
context: {
|
|
790
|
+
serviceId,
|
|
791
|
+
error: firstCheck.error,
|
|
792
|
+
errorCategory: this.categorizeError(firstCheck.error),
|
|
793
|
+
attempt,
|
|
794
|
+
retriesSkipped: true,
|
|
795
|
+
},
|
|
796
|
+
});
|
|
797
|
+
deployment.debriefEntryIds.push(abortEntry.id);
|
|
798
|
+
throw new OrchestrationError("preflight-health-check", `Target environment "${environment.name}" unreachable: ${firstCheck.error}`, decision.reasoning);
|
|
799
|
+
}
|
|
800
|
+
// Decision is to retry
|
|
801
|
+
const retryEntry = this.debrief.record({
|
|
802
|
+
partitionId: deployment.partitionId ?? null,
|
|
803
|
+
deploymentId: deployment.id,
|
|
804
|
+
agent: "server",
|
|
805
|
+
decisionType: "health-check",
|
|
806
|
+
decision: "Pre-flight health check failed — attempting retry",
|
|
807
|
+
reasoning: decision.reasoning,
|
|
808
|
+
context: {
|
|
809
|
+
serviceId,
|
|
810
|
+
error: firstCheck.error,
|
|
811
|
+
errorCategory: this.categorizeError(firstCheck.error),
|
|
812
|
+
backoffMs: decision.delayMs,
|
|
813
|
+
retriesRemaining: maxAttempts - attempt,
|
|
814
|
+
attempt,
|
|
815
|
+
},
|
|
816
|
+
});
|
|
817
|
+
deployment.debriefEntryIds.push(retryEntry.id);
|
|
818
|
+
// Retry loop — each iteration re-evaluates the situation
|
|
819
|
+
for (let i = 0; i < opts.healthCheckRetries; i++) {
|
|
820
|
+
attempt++;
|
|
821
|
+
await this.delay(decision.delayMs);
|
|
822
|
+
const retryCheck = await this.healthChecker.check(serviceId, {
|
|
823
|
+
partitionId: partition?.id ?? "",
|
|
824
|
+
environmentName: environment.name,
|
|
825
|
+
});
|
|
826
|
+
if (retryCheck.reachable) {
|
|
827
|
+
const recoveryEntry = this.debrief.record({
|
|
828
|
+
partitionId: deployment.partitionId ?? null,
|
|
829
|
+
deploymentId: deployment.id,
|
|
830
|
+
agent: "server",
|
|
831
|
+
decisionType: "health-check",
|
|
832
|
+
decision: "Health check recovered on retry — proceeding with deployment",
|
|
833
|
+
reasoning: `Retry attempt ${i + 1} succeeded (response time: ` +
|
|
834
|
+
`${retryCheck.responseTimeMs}ms). The initial failure was transient — ` +
|
|
835
|
+
`likely caused by a brief service restart or momentary load spike. ` +
|
|
836
|
+
`Target environment "${environment.name}" is now confirmed healthy. ` +
|
|
837
|
+
`Proceeding with deployment.`,
|
|
838
|
+
context: {
|
|
839
|
+
serviceId,
|
|
840
|
+
responseTimeMs: retryCheck.responseTimeMs,
|
|
841
|
+
attempt,
|
|
842
|
+
recoveredAfterMs: decision.delayMs * (i + 1),
|
|
843
|
+
},
|
|
844
|
+
});
|
|
845
|
+
deployment.debriefEntryIds.push(recoveryEntry.id);
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
// All retries exhausted — produce context-aware failure
|
|
850
|
+
const exhaustedDecision = this.reasonAboutHealthFailure(firstCheck, environment, maxAttempts, maxAttempts, opts);
|
|
851
|
+
throw new OrchestrationError("preflight-health-check", `Target environment "${environment.name}" unreachable after ${maxAttempts} attempt(s)`, exhaustedDecision.reasoning);
|
|
852
|
+
}
|
|
853
|
+
// -----------------------------------------------------------------------
|
|
854
|
+
// Reasoning: health check failure analysis
|
|
855
|
+
// -----------------------------------------------------------------------
|
|
856
|
+
/**
|
|
857
|
+
* Analyze a health check failure and decide what to do.
|
|
858
|
+
*
|
|
859
|
+
* The decision depends on three factors:
|
|
860
|
+
* 1. Error type (DNS vs timeout vs connection refused vs server error)
|
|
861
|
+
* 2. Environment context (production gets more patience)
|
|
862
|
+
* 3. Whether retries remain
|
|
863
|
+
*
|
|
864
|
+
* Different factor combinations produce different actions:
|
|
865
|
+
* - DNS failure → abort immediately regardless of retries remaining
|
|
866
|
+
* - Timeout + production + retries remaining → retry with extended backoff
|
|
867
|
+
* - Connection refused + retries remaining → retry with standard backoff
|
|
868
|
+
* - Any error + no retries remaining → abort with environment-specific message
|
|
869
|
+
*/
|
|
870
|
+
reasonAboutHealthFailure(checkResult, environment, attempt, maxAttempts, opts = this.options) {
|
|
871
|
+
const errorCategory = this.categorizeError(checkResult.error);
|
|
872
|
+
const isProduction = environment.name.toLowerCase() === "production";
|
|
873
|
+
const retriesRemaining = maxAttempts - attempt;
|
|
874
|
+
// DNS failures are infrastructure-level — retrying won't help
|
|
875
|
+
if (errorCategory === "dns") {
|
|
876
|
+
return {
|
|
877
|
+
action: "abort",
|
|
878
|
+
delayMs: 0,
|
|
879
|
+
reasoning: `DNS resolution failed for "${environment.name}" (${checkResult.error}). ` +
|
|
880
|
+
`This is an infrastructure configuration issue, not a transient failure — ` +
|
|
881
|
+
`retrying will not resolve it. The environment's hostname cannot be resolved, ` +
|
|
882
|
+
`which typically indicates the service has not been provisioned or DNS records ` +
|
|
883
|
+
`are misconfigured. Recommended action: verify DNS configuration for the ` +
|
|
884
|
+
`target environment.`,
|
|
885
|
+
};
|
|
886
|
+
}
|
|
887
|
+
// No retries remaining — produce context-aware abort
|
|
888
|
+
if (retriesRemaining <= 0) {
|
|
889
|
+
const envContext = isProduction
|
|
890
|
+
? `This is a production environment — deploying to unreachable production ` +
|
|
891
|
+
`infrastructure would create a silent failure with no running service ` +
|
|
892
|
+
`to handle traffic.`
|
|
893
|
+
: `Aborting to prevent deploying artifacts to infrastructure that ` +
|
|
894
|
+
`cannot serve them.`;
|
|
895
|
+
return {
|
|
896
|
+
action: "abort",
|
|
897
|
+
delayMs: 0,
|
|
898
|
+
reasoning: `${attempt} health check attempt(s) to "${environment.name}" all failed ` +
|
|
899
|
+
`(error: ${checkResult.error ?? "service unreachable"}, ` +
|
|
900
|
+
`category: ${errorCategory}). Consecutive failures indicate a persistent ` +
|
|
901
|
+
`infrastructure issue rather than a transient glitch. ${envContext} ` +
|
|
902
|
+
`Recommended action: verify the target environment's infrastructure is ` +
|
|
903
|
+
`running and network-accessible, then re-trigger the deployment.`,
|
|
904
|
+
};
|
|
905
|
+
}
|
|
906
|
+
// Timeout in production → extended backoff (service may be under load)
|
|
907
|
+
if (errorCategory === "timeout" && isProduction) {
|
|
908
|
+
const extendedDelay = opts.healthCheckBackoffMs * 2;
|
|
909
|
+
return {
|
|
910
|
+
action: "retry",
|
|
911
|
+
delayMs: extendedDelay,
|
|
912
|
+
reasoning: `Health check to production environment "${environment.name}" timed out ` +
|
|
913
|
+
`(${checkResult.error}). Production services under heavy load may respond ` +
|
|
914
|
+
`slowly rather than refusing connections outright. Using extended backoff ` +
|
|
915
|
+
`(${extendedDelay}ms instead of ${opts.healthCheckBackoffMs}ms) ` +
|
|
916
|
+
`to allow the service time to recover before retrying. ` +
|
|
917
|
+
`${retriesRemaining} retry attempt(s) remaining.`,
|
|
918
|
+
};
|
|
919
|
+
}
|
|
920
|
+
// Server error (5xx) → the service is running but unhealthy
|
|
921
|
+
if (errorCategory === "server_error") {
|
|
922
|
+
return {
|
|
923
|
+
action: "retry",
|
|
924
|
+
delayMs: opts.healthCheckBackoffMs,
|
|
925
|
+
reasoning: `Health check to "${environment.name}" returned a server error ` +
|
|
926
|
+
`(${checkResult.error}). The service is running and network-reachable ` +
|
|
927
|
+
`but reporting unhealthy status — this could be a transient condition ` +
|
|
928
|
+
`during startup or a cascading failure from an upstream dependency. ` +
|
|
929
|
+
`Retrying in ${opts.healthCheckBackoffMs}ms. ` +
|
|
930
|
+
`${retriesRemaining} retry attempt(s) remaining.`,
|
|
931
|
+
};
|
|
932
|
+
}
|
|
933
|
+
// Connection refused or unknown → standard retry
|
|
934
|
+
return {
|
|
935
|
+
action: "retry",
|
|
936
|
+
delayMs: opts.healthCheckBackoffMs,
|
|
937
|
+
reasoning: `Health check to "${environment.name}" failed ` +
|
|
938
|
+
`(${checkResult.error ?? "service unreachable"}, category: ${errorCategory}). ` +
|
|
939
|
+
`The service process may be restarting or not yet started. ` +
|
|
940
|
+
`Retrying in ${opts.healthCheckBackoffMs}ms. ` +
|
|
941
|
+
`${retriesRemaining} retry attempt(s) remaining.`,
|
|
942
|
+
};
|
|
943
|
+
}
|
|
944
|
+
/**
|
|
945
|
+
* Categorize a health check error string into a semantic type.
|
|
946
|
+
* This drives the retry/abort decision tree.
|
|
947
|
+
*/
|
|
948
|
+
categorizeError(error) {
|
|
949
|
+
if (!error)
|
|
950
|
+
return "unknown";
|
|
951
|
+
const lower = error.toLowerCase();
|
|
952
|
+
if (lower.includes("dns") ||
|
|
953
|
+
lower.includes("enotfound") ||
|
|
954
|
+
lower.includes("getaddrinfo"))
|
|
955
|
+
return "dns";
|
|
956
|
+
if (lower.includes("timeout") ||
|
|
957
|
+
lower.includes("etimedout") ||
|
|
958
|
+
lower.includes("timed out"))
|
|
959
|
+
return "timeout";
|
|
960
|
+
if (lower.includes("econnrefused") ||
|
|
961
|
+
lower.includes("connection refused"))
|
|
962
|
+
return "connection_refused";
|
|
963
|
+
if (lower.includes("500") ||
|
|
964
|
+
lower.includes("502") ||
|
|
965
|
+
lower.includes("503"))
|
|
966
|
+
return "server_error";
|
|
967
|
+
return "unknown";
|
|
968
|
+
}
|
|
969
|
+
// -----------------------------------------------------------------------
|
|
970
|
+
// Pipeline steps: execute and verify
|
|
971
|
+
// -----------------------------------------------------------------------
|
|
972
|
+
async executeDeployment(deployment, partition, environment, artifact) {
|
|
973
|
+
const envoyConfig = this.settingsReader?.get().envoy;
|
|
974
|
+
// When a settingsReader is present (production), Command delegates all
|
|
975
|
+
// execution to Envoy. If Envoy is unreachable, the deployment fails
|
|
976
|
+
// explicitly — no silent local fallback.
|
|
977
|
+
if (this.settingsReader) {
|
|
978
|
+
if (!envoyConfig?.url) {
|
|
979
|
+
throw new OrchestrationError("execute-deployment", "No Envoy configured — cannot execute deployment", `Command cannot execute deployment steps locally. An Envoy must be configured ` +
|
|
980
|
+
`and reachable to run deployments. Configure an Envoy URL in settings ` +
|
|
981
|
+
`(Settings → envoy.url) and ensure the Envoy is running and healthy.`);
|
|
982
|
+
}
|
|
983
|
+
await this.delegateToEnvoy(deployment, partition, environment, artifact, envoyConfig);
|
|
984
|
+
return true;
|
|
985
|
+
}
|
|
986
|
+
// No settingsReader configured — execution skipped (test/offline environment)
|
|
987
|
+
const execEntry = this.debrief.record({
|
|
988
|
+
partitionId: deployment.partitionId ?? null,
|
|
989
|
+
deploymentId: deployment.id,
|
|
990
|
+
agent: "server",
|
|
991
|
+
decisionType: "deployment-execution",
|
|
992
|
+
decision: `Skipped Envoy delegation for ${artifact.name} v${deployment.version} — no settings reader configured`,
|
|
993
|
+
reasoning: `No settings reader is configured in this environment, so Envoy delegation was skipped. ` +
|
|
994
|
+
`${Object.keys(deployment.variables).length} variable(s) were resolved but not injected.`,
|
|
995
|
+
context: {
|
|
996
|
+
step: "execute-deployment",
|
|
997
|
+
envoySkipped: true,
|
|
998
|
+
variableCount: Object.keys(deployment.variables).length,
|
|
999
|
+
},
|
|
1000
|
+
});
|
|
1001
|
+
deployment.debriefEntryIds.push(execEntry.id);
|
|
1002
|
+
return false;
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* Delegate deployment execution to the configured Envoy.
|
|
1006
|
+
* Throws OrchestrationError if the Envoy is unreachable or reports failure.
|
|
1007
|
+
*/
|
|
1008
|
+
async delegateToEnvoy(deployment, partition, environment, artifact, envoyConfig) {
|
|
1009
|
+
const client = new EnvoyClient(envoyConfig.url, envoyConfig.timeoutMs);
|
|
1010
|
+
// Pre-flight: check if the Envoy is healthy before delegating
|
|
1011
|
+
try {
|
|
1012
|
+
const health = await client.checkHealth();
|
|
1013
|
+
if (health.status !== "healthy" || !health.readiness.ready) {
|
|
1014
|
+
throw new OrchestrationError("execute-deployment", `Envoy at ${envoyConfig.url} is not ready (${health.status}: ${health.readiness.reason})`, `Deployment cannot proceed because the Envoy at ${envoyConfig.url} reports ` +
|
|
1015
|
+
`status "${health.status}" with readiness: "${health.readiness.reason}". ` +
|
|
1016
|
+
`The target machine's Envoy must be healthy before deployments can run. ` +
|
|
1017
|
+
`Check the Envoy process, then re-trigger the deployment.`);
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
catch (err) {
|
|
1021
|
+
if (err instanceof OrchestrationError)
|
|
1022
|
+
throw err;
|
|
1023
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1024
|
+
throw new OrchestrationError("execute-deployment", `Envoy at ${envoyConfig.url} is unreachable: ${message}`, `Deployment cannot proceed because the Envoy at ${envoyConfig.url} is not responding: ${message}. ` +
|
|
1025
|
+
`Command does not execute deployment steps locally — an Envoy must be running on the target machine. ` +
|
|
1026
|
+
`Verify the Envoy process is running, check network connectivity, then re-trigger the deployment.`);
|
|
1027
|
+
}
|
|
1028
|
+
// Envoy is healthy — delegate the deployment
|
|
1029
|
+
const delegateEntry = this.debrief.record({
|
|
1030
|
+
partitionId: deployment.partitionId ?? null,
|
|
1031
|
+
deploymentId: deployment.id,
|
|
1032
|
+
agent: "server",
|
|
1033
|
+
decisionType: "deployment-execution",
|
|
1034
|
+
decision: `Delegating execution of ${artifact.name} v${deployment.version} to Envoy at ${envoyConfig.url}`,
|
|
1035
|
+
reasoning: `Envoy at ${envoyConfig.url} is healthy and ready. Delegating full deployment ` +
|
|
1036
|
+
`execution for "${artifact.name}" v${deployment.version} on "${environment.name}"` +
|
|
1037
|
+
(partition ? ` for partition "${partition.name}"` : "") +
|
|
1038
|
+
`. The Envoy will execute all steps, verify artifacts, ` +
|
|
1039
|
+
`and return debrief entries for ingestion into Command's unified decision diary.`,
|
|
1040
|
+
context: {
|
|
1041
|
+
step: "execute-deployment",
|
|
1042
|
+
envoyUrl: envoyConfig.url,
|
|
1043
|
+
delegated: true,
|
|
1044
|
+
},
|
|
1045
|
+
});
|
|
1046
|
+
deployment.debriefEntryIds.push(delegateEntry.id);
|
|
1047
|
+
let envoyResult;
|
|
1048
|
+
try {
|
|
1049
|
+
// Construct progress callback URL so the envoy can stream execution events back
|
|
1050
|
+
const commandPort = parseInt(process.env.PORT ?? "9410", 10);
|
|
1051
|
+
const commandHost = process.env.SYNTH_COMMAND_HOST ?? "localhost";
|
|
1052
|
+
const progressCallbackUrl = `http://${commandHost}:${commandPort}/api/deployments/${deployment.id}/progress`;
|
|
1053
|
+
envoyResult = await client.deploy({
|
|
1054
|
+
deploymentId: deployment.id,
|
|
1055
|
+
partitionId: deployment.partitionId ?? "",
|
|
1056
|
+
environmentId: deployment.environmentId ?? "",
|
|
1057
|
+
operationId: deployment.artifactId, // Envoy still uses operationId in its API
|
|
1058
|
+
version: deployment.version,
|
|
1059
|
+
variables: deployment.variables,
|
|
1060
|
+
environmentName: environment.name,
|
|
1061
|
+
partitionName: partition?.name ?? "",
|
|
1062
|
+
progressCallbackUrl,
|
|
1063
|
+
});
|
|
1064
|
+
}
|
|
1065
|
+
catch (err) {
|
|
1066
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1067
|
+
throw new OrchestrationError("execute-deployment", `Envoy delegation failed: ${message}`, `Command delegated deployment to Envoy at ${envoyConfig.url} but the request failed: ${message}. ` +
|
|
1068
|
+
`The Envoy may have gone down during execution. The deployment state on the Envoy is unknown — ` +
|
|
1069
|
+
`check the Envoy's health and local state before re-triggering.`);
|
|
1070
|
+
}
|
|
1071
|
+
// Ingest debrief entries from the Envoy into Command's unified diary
|
|
1072
|
+
if (envoyResult.debriefEntries && envoyResult.debriefEntries.length > 0) {
|
|
1073
|
+
for (const entry of envoyResult.debriefEntries) {
|
|
1074
|
+
const ingested = this.debrief.record({
|
|
1075
|
+
partitionId: entry.partitionId ?? deployment.partitionId ?? null,
|
|
1076
|
+
deploymentId: entry.deploymentId ?? deployment.id,
|
|
1077
|
+
agent: entry.agent,
|
|
1078
|
+
decisionType: entry.decisionType,
|
|
1079
|
+
decision: entry.decision,
|
|
1080
|
+
reasoning: entry.reasoning,
|
|
1081
|
+
context: {
|
|
1082
|
+
...entry.context,
|
|
1083
|
+
_envoyDelegation: {
|
|
1084
|
+
envoyUrl: envoyConfig.url,
|
|
1085
|
+
originalEntryId: entry.id,
|
|
1086
|
+
originalTimestamp: entry.timestamp,
|
|
1087
|
+
},
|
|
1088
|
+
},
|
|
1089
|
+
});
|
|
1090
|
+
deployment.debriefEntryIds.push(ingested.id);
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
// Handle Envoy result
|
|
1094
|
+
if (!envoyResult.success) {
|
|
1095
|
+
const reason = envoyResult.failureReason ?? "Envoy reported deployment failure with no reason";
|
|
1096
|
+
throw new OrchestrationError("execute-deployment", `Envoy deployment failed: ${reason}`, `Deployment was delegated to Envoy at ${envoyConfig.url}. The Envoy executed the deployment ` +
|
|
1097
|
+
`and reported failure: ${reason}. ` +
|
|
1098
|
+
`Execution took ${envoyResult.executionDurationMs}ms. ` +
|
|
1099
|
+
`${envoyResult.debriefEntries?.length ?? 0} debrief entries were ingested from the Envoy. ` +
|
|
1100
|
+
`Check the Envoy debrief entries above for detailed step-by-step reasoning about the failure.`);
|
|
1101
|
+
}
|
|
1102
|
+
// Record successful delegation completion
|
|
1103
|
+
const completionEntry = this.debrief.record({
|
|
1104
|
+
partitionId: deployment.partitionId ?? null,
|
|
1105
|
+
deploymentId: deployment.id,
|
|
1106
|
+
agent: "server",
|
|
1107
|
+
decisionType: "deployment-execution",
|
|
1108
|
+
decision: `Envoy completed deployment successfully in ${envoyResult.executionDurationMs}ms — ${envoyResult.artifacts.length} artifact(s) produced`,
|
|
1109
|
+
reasoning: `Envoy at ${envoyConfig.url} executed all deployment steps and verification checks successfully. ` +
|
|
1110
|
+
`Execution: ${envoyResult.executionDurationMs}ms, total: ${envoyResult.totalDurationMs}ms. ` +
|
|
1111
|
+
`Artifacts: ${envoyResult.artifacts.length > 0 ? envoyResult.artifacts.join(", ") : "none"}. ` +
|
|
1112
|
+
`Verification: ${envoyResult.verificationPassed ? "passed" : "skipped"} ` +
|
|
1113
|
+
`(${envoyResult.verificationChecks.length} check(s)). ` +
|
|
1114
|
+
`${envoyResult.debriefEntries?.length ?? 0} debrief entries ingested into Command's unified diary.`,
|
|
1115
|
+
context: {
|
|
1116
|
+
step: "execute-deployment",
|
|
1117
|
+
envoyUrl: envoyConfig.url,
|
|
1118
|
+
delegated: true,
|
|
1119
|
+
executionDurationMs: envoyResult.executionDurationMs,
|
|
1120
|
+
totalDurationMs: envoyResult.totalDurationMs,
|
|
1121
|
+
artifactCount: envoyResult.artifacts.length,
|
|
1122
|
+
verificationPassed: envoyResult.verificationPassed,
|
|
1123
|
+
debriefEntriesIngested: envoyResult.debriefEntries?.length ?? 0,
|
|
1124
|
+
},
|
|
1125
|
+
});
|
|
1126
|
+
deployment.debriefEntryIds.push(completionEntry.id);
|
|
1127
|
+
}
|
|
1128
|
+
async postDeployVerify(deployment, partition, environment, artifact) {
|
|
1129
|
+
const entry = this.debrief.record({
|
|
1130
|
+
partitionId: deployment.partitionId ?? null,
|
|
1131
|
+
deploymentId: deployment.id,
|
|
1132
|
+
agent: "server",
|
|
1133
|
+
decisionType: "deployment-verification",
|
|
1134
|
+
decision: `Post-deploy verification skipped for ${artifact.name} v${deployment.version} — no Envoy configured`,
|
|
1135
|
+
reasoning: `No Envoy is configured in this environment, so post-deploy verification was skipped. ` +
|
|
1136
|
+
`Configure an Envoy to enable real verification of "${artifact.name}" v${deployment.version} on "${environment.name}".`,
|
|
1137
|
+
context: {
|
|
1138
|
+
step: "post-deploy-verify",
|
|
1139
|
+
envoySkipped: true,
|
|
1140
|
+
variableCount: Object.keys(deployment.variables).length,
|
|
1141
|
+
artifactName: artifact.name,
|
|
1142
|
+
version: deployment.version,
|
|
1143
|
+
},
|
|
1144
|
+
});
|
|
1145
|
+
deployment.debriefEntryIds.push(entry.id);
|
|
1146
|
+
}
|
|
1147
|
+
// -----------------------------------------------------------------------
|
|
1148
|
+
// Utilities
|
|
1149
|
+
// -----------------------------------------------------------------------
|
|
1150
|
+
delay(ms) {
|
|
1151
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
// ---------------------------------------------------------------------------
|
|
1155
|
+
// In-memory deployment store
|
|
1156
|
+
// ---------------------------------------------------------------------------
|
|
1157
|
+
export class InMemoryDeploymentStore {
|
|
1158
|
+
deployments = new Map();
|
|
1159
|
+
save(deployment) {
|
|
1160
|
+
this.deployments.set(deployment.id, deployment);
|
|
1161
|
+
}
|
|
1162
|
+
get(id) {
|
|
1163
|
+
return this.deployments.get(id);
|
|
1164
|
+
}
|
|
1165
|
+
getByPartition(partitionId) {
|
|
1166
|
+
return [...this.deployments.values()].filter((d) => d.partitionId === partitionId);
|
|
1167
|
+
}
|
|
1168
|
+
getByArtifact(artifactId) {
|
|
1169
|
+
return [...this.deployments.values()].filter((d) => d.artifactId === artifactId);
|
|
1170
|
+
}
|
|
1171
|
+
list() {
|
|
1172
|
+
return [...this.deployments.values()];
|
|
1173
|
+
}
|
|
1174
|
+
countByEnvironment(envId, since) {
|
|
1175
|
+
return [...this.deployments.values()].filter((d) => d.environmentId === envId && new Date(d.createdAt).getTime() >= since.getTime()).length;
|
|
1176
|
+
}
|
|
1177
|
+
findByArtifactVersion(artifactId, version, status) {
|
|
1178
|
+
return [...this.deployments.values()].filter((d) => d.artifactId === artifactId &&
|
|
1179
|
+
d.version === version &&
|
|
1180
|
+
(!status || d.status === status));
|
|
1181
|
+
}
|
|
1182
|
+
findRecentByArtifact(artifactId, since, status) {
|
|
1183
|
+
return [...this.deployments.values()]
|
|
1184
|
+
.filter((d) => d.artifactId === artifactId &&
|
|
1185
|
+
new Date(d.createdAt).getTime() >= since.getTime() &&
|
|
1186
|
+
(!status || d.status === status))
|
|
1187
|
+
.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
|
|
1188
|
+
}
|
|
1189
|
+
findLatestByEnvironment(envId) {
|
|
1190
|
+
return [...this.deployments.values()]
|
|
1191
|
+
.filter((d) => d.environmentId === envId)
|
|
1192
|
+
.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime())[0];
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
//# sourceMappingURL=synth-agent.js.map
|