@bridge_gpt/mcp-server 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +97 -15
  2. package/build/agent-config-credential-migration.js +272 -0
  3. package/build/agents.generated.js +1 -1
  4. package/build/chain-orchestrator.js +16 -1
  5. package/build/commands.generated.js +9 -7
  6. package/build/conductor/bridge-api-client.js +625 -0
  7. package/build/conductor/claude-hook.js +251 -0
  8. package/build/conductor/cli.js +1048 -0
  9. package/build/conductor/data-normalization.js +114 -0
  10. package/build/conductor/doctor.js +164 -0
  11. package/build/conductor/done-gate.js +325 -0
  12. package/build/conductor/epic-reconcile.js +139 -0
  13. package/build/conductor/epic-runtime.js +611 -0
  14. package/build/conductor/epic-state.js +125 -0
  15. package/build/conductor/errors.js +85 -0
  16. package/build/conductor/git-ci-types.js +129 -0
  17. package/build/conductor/git-hooks.js +218 -0
  18. package/build/conductor/git-inspection.js +185 -0
  19. package/build/conductor/git-producer.js +137 -0
  20. package/build/conductor/merge-ledger.js +198 -0
  21. package/build/conductor/paths.js +224 -0
  22. package/build/conductor/plan.js +77 -0
  23. package/build/conductor/pr-ci-producer.js +427 -0
  24. package/build/conductor/pr-discovery.js +135 -0
  25. package/build/conductor/producer-ledger.js +125 -0
  26. package/build/conductor/redaction.js +112 -0
  27. package/build/conductor/store.js +1156 -0
  28. package/build/conductor/supervisor-config.js +150 -0
  29. package/build/conductor/supervisor-escalation.js +244 -0
  30. package/build/conductor/supervisor-judgment-python.js +141 -0
  31. package/build/conductor/supervisor-judgment.js +215 -0
  32. package/build/conductor/supervisor-ledger.js +119 -0
  33. package/build/conductor/supervisor-merge.js +127 -0
  34. package/build/conductor/supervisor-message-relay.js +61 -0
  35. package/build/conductor/supervisor-notification.js +39 -0
  36. package/build/conductor/supervisor-runtime.js +351 -0
  37. package/build/conductor/supervisor-state.js +572 -0
  38. package/build/conductor/supervisor-types.js +16 -0
  39. package/build/conductor/taxonomy.js +58 -0
  40. package/build/conductor/tools.js +367 -0
  41. package/build/conductor/types.js +9 -0
  42. package/build/conductor-bin.js +21 -0
  43. package/build/conductor-claude-hook-bin.js +21 -0
  44. package/build/credential-store.js +175 -4
  45. package/build/credentials-cli.js +223 -0
  46. package/build/decision-page-schema.js +60 -0
  47. package/build/decision-page-template.js +262 -10
  48. package/build/doctor.js +5 -1
  49. package/build/index.js +468 -59
  50. package/build/pipeline-orchestrator.js +5 -1
  51. package/build/pipeline-utils.js +45 -5
  52. package/build/pipelines.generated.js +37 -9
  53. package/build/readme.generated.js +1 -1
  54. package/build/review-tickets.js +596 -0
  55. package/build/scheduled-prompt.js +16 -10
  56. package/build/start-tickets-conductor.js +496 -0
  57. package/build/start-tickets-prereqs.js +32 -23
  58. package/build/start-tickets-repo.js +49 -0
  59. package/build/start-tickets.js +682 -81
  60. package/build/version.generated.js +1 -1
  61. package/design-assets/favicon/android-chrome-192x192.png +0 -0
  62. package/design-assets/favicon/android-chrome-512x512.png +0 -0
  63. package/design-assets/favicon/apple-touch-icon.png +0 -0
  64. package/design-assets/favicon/favicon-16x16.png +0 -0
  65. package/design-assets/favicon/favicon-32x32.png +0 -0
  66. package/design-assets/favicon/favicon.ico +0 -0
  67. package/design-assets/favicon/site.webmanifest +1 -0
  68. package/design-assets/just-logo-rough-draft.png +0 -0
  69. package/package.json +17 -5
  70. package/pipelines/idea-to-ticket.json +5 -0
  71. package/pipelines/plan-epic.json +16 -1
  72. package/pipelines/review-ticket.json +2 -1
  73. package/public/css/main.min.css +2 -0
  74. package/public/css/main.min.css.map +1 -0
  75. package/public/fonts/OFL.txt +93 -0
  76. package/public/fonts/SourceSansPro-Black.ttf +0 -0
  77. package/public/fonts/SourceSansPro-BlackItalic.ttf +0 -0
  78. package/public/fonts/SourceSansPro-Bold.ttf +0 -0
  79. package/public/fonts/SourceSansPro-BoldItalic.ttf +0 -0
  80. package/public/fonts/SourceSansPro-ExtraLight.ttf +0 -0
  81. package/public/fonts/SourceSansPro-ExtraLightItalic.ttf +0 -0
  82. package/public/fonts/SourceSansPro-Italic.ttf +0 -0
  83. package/public/fonts/SourceSansPro-Light.ttf +0 -0
  84. package/public/fonts/SourceSansPro-LightItalic.ttf +0 -0
  85. package/public/fonts/SourceSansPro-Regular.ttf +0 -0
  86. package/public/fonts/SourceSansPro-SemiBold.ttf +0 -0
  87. package/public/fonts/SourceSansPro-SemiBoldItalic.ttf +0 -0
  88. package/public/img/bridge-logo-160x51.webp +0 -0
  89. package/public/img/bridge-logo-300x92.webp +0 -0
  90. package/public/img/favicon/android-chrome-192x192.png +0 -0
  91. package/public/img/favicon/android-chrome-512x512.png +0 -0
  92. package/public/img/favicon/apple-touch-icon.png +0 -0
  93. package/public/img/favicon/favicon-16x16.png +0 -0
  94. package/public/img/favicon/favicon-32x32.png +0 -0
  95. package/public/img/favicon/favicon.ico +0 -0
  96. package/public/img/favicon/site.webmanifest +1 -0
  97. package/public/img/installation/bitbucket/app-password-1.png +0 -0
  98. package/public/img/installation/bitbucket/app-password-2.png +0 -0
  99. package/public/img/installation/bitbucket/create-token-1.png +0 -0
  100. package/public/img/installation/bitbucket/create-token-2.png +0 -0
  101. package/public/img/installation/bitbucket/webhook-1.png +0 -0
  102. package/public/img/installation/github/github-review-webhook.png +0 -0
  103. package/public/img/installation/jira/credentials/api-key.png +0 -0
  104. package/public/img/installation/jira/webhook/create-rule.png +0 -0
  105. package/public/img/installation/jira/webhook/project-settings.png +0 -0
  106. package/public/img/installation/jira/webhook/rule-create-1.png +0 -0
  107. package/public/img/installation/jira/webhook/rule-create-2.png +0 -0
  108. package/public/img/installation/jira/webhook/rule-create-3.png +0 -0
  109. package/public/img/installation/pinecone/pinecone-api-key.png +0 -0
  110. package/public/img/installation/pinecone/pinecone-index.png +0 -0
  111. package/public/js/main.min.js +2 -0
  112. package/public/js/main.min.js.map +1 -0
  113. package/smoke-test/SMOKE-TEST.md +16 -8
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Supervisor configuration resolution (BAPI-396, conductor C4).
3
+ *
4
+ * Centralizes the supervisor's wake cadence, global timeout, per-state stall
5
+ * thresholds, liveness thresholds, escalation cooldown, and LLM budget caps.
6
+ * All values are resolved from `process.env` with conservative, bounded defaults
7
+ * following the style of {@link resolveConductorStoreConfig} in `store.ts`:
8
+ * malformed values fall back to the default and excessive values are clamped.
9
+ *
10
+ * Token discipline: the WAKE cadence (frequent, deterministic event polling) is
11
+ * intentionally separate from the LLM cadence (judgment-triggered only). The
12
+ * deterministic loop runs every `wake_interval_ms`; the LLM is consulted at most
13
+ * `llm_max_calls` times per run and only for ambiguous escalation candidates.
14
+ */
15
+ const MINUTE_MS = 60_000;
16
+ const HOUR_MS = 60 * MINUTE_MS;
17
+ // --- Wake cadence: frequent, deterministic. -------------------------------
18
+ const WAKE_INTERVAL_DEFAULT_MS = 45_000;
19
+ const WAKE_INTERVAL_MIN_MS = 30_000;
20
+ const WAKE_INTERVAL_MAX_MS = 60_000;
21
+ // --- Global run timeout: a conservative long-run ceiling. -----------------
22
+ const GLOBAL_TIMEOUT_DEFAULT_MS = 24 * HOUR_MS;
23
+ const GLOBAL_TIMEOUT_MIN_MS = 5 * MINUTE_MS;
24
+ const GLOBAL_TIMEOUT_MAX_MS = 7 * 24 * HOUR_MS;
25
+ // --- Escalation cooldown: at least 15 minutes by default. -----------------
26
+ const ESCALATION_COOLDOWN_DEFAULT_MS = 15 * MINUTE_MS;
27
+ const ESCALATION_COOLDOWN_MIN_MS = 5 * MINUTE_MS;
28
+ const ESCALATION_COOLDOWN_MAX_MS = 24 * HOUR_MS;
29
+ // --- Liveness thresholds (timestamp-only, separate from progress). --------
30
+ const QUIET_AFTER_DEFAULT_MS = 5 * MINUTE_MS;
31
+ const QUIET_AFTER_MIN_MS = MINUTE_MS;
32
+ const QUIET_AFTER_MAX_MS = HOUR_MS;
33
+ const LIVENESS_STALLED_AFTER_DEFAULT_MS = 20 * MINUTE_MS;
34
+ const LIVENESS_STALLED_AFTER_MIN_MS = 5 * MINUTE_MS;
35
+ const LIVENESS_STALLED_AFTER_MAX_MS = 4 * HOUR_MS;
36
+ const DEAD_AFTER_DEFAULT_MS = 2 * HOUR_MS;
37
+ const DEAD_AFTER_MIN_MS = 10 * MINUTE_MS;
38
+ const DEAD_AFTER_MAX_MS = 24 * HOUR_MS;
39
+ // --- LLM budget. ----------------------------------------------------------
40
+ const LLM_MAX_CALLS_DEFAULT = 10;
41
+ const LLM_MAX_CALLS_MIN = 0;
42
+ const LLM_MAX_CALLS_MAX = 1000;
43
+ const LLM_TIMEOUT_DEFAULT_MS = 30_000;
44
+ const LLM_TIMEOUT_MIN_MS = 1_000;
45
+ const LLM_TIMEOUT_MAX_MS = 120_000;
46
+ /**
47
+ * Parse a bounded integer from an env-style string. Mirrors `parseBoundedInt`
48
+ * in `store.ts`: blank/undefined/non-finite/float-with-fraction/non-numeric
49
+ * values resolve to `fallback`; in-range integers pass through; out-of-range
50
+ * integers clamp to `[min, max]`.
51
+ */
52
+ export function parseBoundedSupervisorInt(raw, fallback, min, max) {
53
+ if (raw === undefined)
54
+ return fallback;
55
+ const trimmed = raw.trim();
56
+ if (trimmed.length === 0)
57
+ return fallback;
58
+ // Reject anything that is not a base-10 integer (e.g. "abc", "12.5",
59
+ // "Infinity", "1e3"): parseInt would silently truncate "12.5" -> 12, which
60
+ // hides malformed config. A strict integer regex keeps the contract honest.
61
+ if (!/^[+-]?\d+$/.test(trimmed))
62
+ return fallback;
63
+ const parsed = Number.parseInt(trimmed, 10);
64
+ if (!Number.isFinite(parsed))
65
+ return fallback;
66
+ return Math.min(max, Math.max(min, parsed));
67
+ }
68
+ const POLL_LIMIT_DEFAULT = 200;
69
+ const POLL_LIMIT_MIN = 1;
70
+ const POLL_LIMIT_MAX = 1000;
71
+ /** Truthy parse for boolean-ish env strings (`0`/`false`/`off`/`no` => false). */
72
+ function parseBoolEnv(raw, fallback) {
73
+ if (raw === undefined)
74
+ return fallback;
75
+ const v = raw.trim().toLowerCase();
76
+ if (v.length === 0)
77
+ return fallback;
78
+ if (v === "0" || v === "false" || v === "off" || v === "no")
79
+ return false;
80
+ if (v === "1" || v === "true" || v === "on" || v === "yes")
81
+ return true;
82
+ return fallback;
83
+ }
84
+ /**
85
+ * Resolve the supervisor configuration. Precedence per field: a direct override
86
+ * (when provided and valid) wins over the environment, which wins over the
87
+ * conservative default. Stall thresholds are per-state and intentionally make
88
+ * `active` / `verifying` (long legitimate work) far more tolerant than
89
+ * `not_started` / `unknown` (quick stalls) to avoid false positives on long
90
+ * tests/builds.
91
+ */
92
+ export function resolveSupervisorConfig(overrides = {}, env = process.env) {
93
+ const wake_interval_ms = clampOverride(overrides.wake_interval_ms, parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_WAKE_INTERVAL_MS, WAKE_INTERVAL_DEFAULT_MS, WAKE_INTERVAL_MIN_MS, WAKE_INTERVAL_MAX_MS), WAKE_INTERVAL_MIN_MS, WAKE_INTERVAL_MAX_MS);
94
+ const global_timeout_ms = clampOverride(overrides.global_timeout_ms, parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_GLOBAL_TIMEOUT_MS, GLOBAL_TIMEOUT_DEFAULT_MS, GLOBAL_TIMEOUT_MIN_MS, GLOBAL_TIMEOUT_MAX_MS), GLOBAL_TIMEOUT_MIN_MS, GLOBAL_TIMEOUT_MAX_MS);
95
+ const escalation_cooldown_ms = clampOverride(overrides.escalation_cooldown_ms, parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_ESCALATION_COOLDOWN_MS, ESCALATION_COOLDOWN_DEFAULT_MS, ESCALATION_COOLDOWN_MIN_MS, ESCALATION_COOLDOWN_MAX_MS), ESCALATION_COOLDOWN_MIN_MS, ESCALATION_COOLDOWN_MAX_MS);
96
+ const quiet_after_ms = parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_QUIET_AFTER_MS, QUIET_AFTER_DEFAULT_MS, QUIET_AFTER_MIN_MS, QUIET_AFTER_MAX_MS);
97
+ const liveness_stalled_after_ms = parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_LIVENESS_STALLED_AFTER_MS, LIVENESS_STALLED_AFTER_DEFAULT_MS, LIVENESS_STALLED_AFTER_MIN_MS, LIVENESS_STALLED_AFTER_MAX_MS);
98
+ const dead_after_ms = parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_DEAD_AFTER_MS, DEAD_AFTER_DEFAULT_MS, DEAD_AFTER_MIN_MS, DEAD_AFTER_MAX_MS);
99
+ // LLM is enabled by default but degrades cleanly. A direct `llm_enabled:false`
100
+ // override (e.g. CLI `--no-llm`) forces deterministic-only mode.
101
+ const llm_enabled = overrides.llm_enabled !== undefined
102
+ ? overrides.llm_enabled
103
+ : parseBoolEnv(env.BAPI_CONDUCTOR_LLM_ENABLED, true);
104
+ const llm_max_calls = clampOverride(overrides.llm_max_calls, parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_LLM_MAX_CALLS, LLM_MAX_CALLS_DEFAULT, LLM_MAX_CALLS_MIN, LLM_MAX_CALLS_MAX), LLM_MAX_CALLS_MIN, LLM_MAX_CALLS_MAX);
105
+ const llm_timeout_ms = clampOverride(overrides.llm_timeout_ms, parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_LLM_TIMEOUT_MS, LLM_TIMEOUT_DEFAULT_MS, LLM_TIMEOUT_MIN_MS, LLM_TIMEOUT_MAX_MS), LLM_TIMEOUT_MIN_MS, LLM_TIMEOUT_MAX_MS);
106
+ const poll_limit = parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_SUPERVISOR_POLL_LIMIT, POLL_LIMIT_DEFAULT, POLL_LIMIT_MIN, POLL_LIMIT_MAX);
107
+ return {
108
+ wake_interval_ms,
109
+ global_timeout_ms,
110
+ stall_thresholds_ms: resolveStallThresholds(env),
111
+ liveness: {
112
+ quiet_after_ms,
113
+ stalled_after_ms: liveness_stalled_after_ms,
114
+ dead_after_ms,
115
+ },
116
+ escalation_cooldown_ms,
117
+ llm_enabled,
118
+ llm_max_calls,
119
+ llm_timeout_ms,
120
+ poll_limit,
121
+ };
122
+ }
123
+ /** Apply a direct numeric override only when present + finite; else keep base. */
124
+ function clampOverride(override, base, min, max) {
125
+ if (override === undefined || !Number.isFinite(override))
126
+ return base;
127
+ return Math.min(max, Math.max(min, Math.floor(override)));
128
+ }
129
+ /** Resolve per-state stall thresholds with conservative defaults. */
130
+ function resolveStallThresholds(env) {
131
+ return {
132
+ // A worker that never starts should surface quickly.
133
+ not_started: parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_STALL_NOT_STARTED_MS, 15 * MINUTE_MS, MINUTE_MS, HOUR_MS),
134
+ // Active work (tests/builds) is given a long, tolerant budget.
135
+ active: parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_STALL_ACTIVE_MS, 2 * HOUR_MS, 10 * MINUTE_MS, 12 * HOUR_MS),
136
+ // Already-stalled workers re-escalate on a slower cadence.
137
+ stalled: parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_STALL_STALLED_MS, 30 * MINUTE_MS, 5 * MINUTE_MS, 6 * HOUR_MS),
138
+ // Blocked workers may legitimately wait a long time (human input, deps).
139
+ blocked: parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_STALL_BLOCKED_MS, 24 * HOUR_MS, 30 * MINUTE_MS, 7 * 24 * HOUR_MS),
140
+ // A done candidate that never gets verified should surface.
141
+ candidate_done: parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_STALL_CANDIDATE_DONE_MS, 30 * MINUTE_MS, 5 * MINUTE_MS, 6 * HOUR_MS),
142
+ // Verification (CI) gets a long, tolerant budget like active work.
143
+ verifying: parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_STALL_VERIFYING_MS, 2 * HOUR_MS, 10 * MINUTE_MS, 12 * HOUR_MS),
144
+ // Unknown-state workers surface on a moderate cadence.
145
+ unknown: parseBoundedSupervisorInt(env.BAPI_CONDUCTOR_STALL_UNKNOWN_MS, 30 * MINUTE_MS, 5 * MINUTE_MS, 6 * HOUR_MS),
146
+ // Terminal states never "stall"; large sentinels keep arithmetic uniform.
147
+ complete: Number.MAX_SAFE_INTEGER,
148
+ failed: Number.MAX_SAFE_INTEGER,
149
+ };
150
+ }
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Deterministic escalation policy (BAPI-396, conductor C4).
3
+ *
4
+ * Escalation v1 is TERMINAL OUTPUT + a ledger `supervisor.assessment` event
5
+ * only — it never merges, transitions Jira, emails, pays, or takes any other
6
+ * privileged action. This module is the deterministic core of that policy:
7
+ *
8
+ * - {@link findSupervisorEscalationCandidates} inspects worker state/liveness
9
+ * and the global deadline and returns machine-reasoned candidates.
10
+ * - {@link shouldEmitEscalation} enforces a per-worker, per-reason cooldown
11
+ * and returns the cooldown window identifier used to build the idempotency
12
+ * key — so the same escalation is not re-emitted within a window even across
13
+ * crash/restart (the window is time-bucketed and the history is persisted in
14
+ * the projection summary).
15
+ * - {@link recordEscalationResult} records the emit outcome into state.
16
+ * - {@link formatEscalationForTerminal} renders a concise, secret-free line.
17
+ */
18
+ /** Coarse kind embedded in the idempotency key for all escalations. */
19
+ export const ESCALATION_KIND = "escalation";
20
+ /** ms since the most recent signal a worker produced (0 when none). */
21
+ function elapsedSinceSignal(worker, now) {
22
+ const candidates = [
23
+ worker.last_event_time,
24
+ worker.last_progress_time,
25
+ worker.last_heartbeat_time,
26
+ ]
27
+ .map((iso) => (iso ? Date.parse(iso) : NaN))
28
+ .filter((v) => Number.isFinite(v));
29
+ if (candidates.length === 0)
30
+ return 0;
31
+ return Math.max(0, now - Math.max(...candidates));
32
+ }
33
+ /**
34
+ * Inspect run state and return deterministic escalation candidates. Reasons:
35
+ * `worker_not_started`, `worker_stalled`, `worker_blocked`, `worker_dead`,
36
+ * `candidate_done_stuck`, `verification_stalled`, and `global_timeout`. Terminal
37
+ * workers never produce candidates. `ambiguous` marks candidates that benefit
38
+ * from an LLM judgment (long-running stalls) vs. unambiguous ones.
39
+ */
40
+ export function findSupervisorEscalationCandidates(state, config, now) {
41
+ const candidates = [];
42
+ for (const worker of Object.values(state.workers)) {
43
+ if (worker.state === "complete" || worker.state === "failed")
44
+ continue;
45
+ const elapsed = elapsedSinceSignal(worker, now);
46
+ const baseContext = {
47
+ worker_id: worker.worker_id,
48
+ ticket_key: worker.ticket_key,
49
+ state: worker.state,
50
+ liveness: worker.liveness,
51
+ };
52
+ // A dead worker is the strongest liveness signal — surface it regardless of
53
+ // progress state.
54
+ if (worker.liveness === "dead") {
55
+ candidates.push({
56
+ reason: "worker_dead",
57
+ kind: ESCALATION_KIND,
58
+ worker_id: worker.worker_id,
59
+ state: worker.state,
60
+ liveness: worker.liveness,
61
+ elapsed_ms: elapsed,
62
+ ambiguous: false,
63
+ context: baseContext,
64
+ });
65
+ continue;
66
+ }
67
+ switch (worker.state) {
68
+ case "not_started":
69
+ // Worker tabs spawn AFTER `run.started`, so every roster worker is
70
+ // `not_started` on the supervisor's first iteration. Gate on liveness (as
71
+ // for `verifying`) so a worker that simply hasn't started YET does not
72
+ // escalate immediately — only once it has gone quiet/stalled/dead (no
73
+ // signal for the configured liveness grace) is a not-started worker worth
74
+ // surfacing. A still-not-started worker that later crosses its stall
75
+ // threshold is promoted to `stalled` by housekeeping and surfaces as
76
+ // `worker_stalled`.
77
+ if (worker.liveness !== "alive") {
78
+ candidates.push({
79
+ reason: "worker_not_started",
80
+ kind: ESCALATION_KIND,
81
+ worker_id: worker.worker_id,
82
+ state: worker.state,
83
+ liveness: worker.liveness,
84
+ elapsed_ms: elapsed,
85
+ ambiguous: false,
86
+ context: baseContext,
87
+ });
88
+ }
89
+ break;
90
+ case "blocked":
91
+ candidates.push({
92
+ reason: "worker_blocked",
93
+ kind: ESCALATION_KIND,
94
+ worker_id: worker.worker_id,
95
+ state: worker.state,
96
+ liveness: worker.liveness,
97
+ elapsed_ms: elapsed,
98
+ ambiguous: false,
99
+ context: { ...baseContext, blocked_reason: worker.blocked_reason },
100
+ });
101
+ break;
102
+ case "stalled":
103
+ candidates.push({
104
+ reason: "worker_stalled",
105
+ kind: ESCALATION_KIND,
106
+ worker_id: worker.worker_id,
107
+ state: worker.state,
108
+ liveness: worker.liveness,
109
+ elapsed_ms: elapsed,
110
+ ambiguous: true,
111
+ context: baseContext,
112
+ });
113
+ break;
114
+ case "candidate_done":
115
+ // `gate.met` flips a worker to `candidate_done` the instant the gate is
116
+ // reached — that is healthy progress, not a stall. Gate on liveness (as
117
+ // for `verifying`) so we only surface a done-candidate that has gone
118
+ // quiet/stalled waiting for verification, not every gate.met.
119
+ if (worker.liveness !== "alive") {
120
+ candidates.push({
121
+ reason: "candidate_done_stuck",
122
+ kind: ESCALATION_KIND,
123
+ worker_id: worker.worker_id,
124
+ state: worker.state,
125
+ liveness: worker.liveness,
126
+ elapsed_ms: elapsed,
127
+ ambiguous: true,
128
+ context: baseContext,
129
+ });
130
+ }
131
+ break;
132
+ case "verifying":
133
+ // Only surface verification as a candidate once liveness shows it has
134
+ // gone quiet/stalled — a healthy in-progress verification is not an
135
+ // escalation.
136
+ if (worker.liveness === "stalled") {
137
+ candidates.push({
138
+ reason: "verification_stalled",
139
+ kind: ESCALATION_KIND,
140
+ worker_id: worker.worker_id,
141
+ state: worker.state,
142
+ liveness: worker.liveness,
143
+ elapsed_ms: elapsed,
144
+ ambiguous: true,
145
+ context: baseContext,
146
+ });
147
+ }
148
+ break;
149
+ default:
150
+ break;
151
+ }
152
+ }
153
+ // Run-level: global timeout.
154
+ const deadlineMs = state.global_deadline_at ? Date.parse(state.global_deadline_at) : NaN;
155
+ if (Number.isFinite(deadlineMs) && now >= deadlineMs) {
156
+ candidates.push({
157
+ reason: "global_timeout",
158
+ kind: ESCALATION_KIND,
159
+ worker_id: null,
160
+ state: null,
161
+ liveness: null,
162
+ elapsed_ms: Math.max(0, now - deadlineMs),
163
+ ambiguous: false,
164
+ context: {
165
+ run_id: state.run_id,
166
+ deadline_at: state.global_deadline_at,
167
+ worker_count: Object.keys(state.workers).length,
168
+ },
169
+ });
170
+ }
171
+ return candidates;
172
+ }
173
+ /** Bucket `now` into a stable cooldown window id of width `cooldown_ms`. */
174
+ function cooldownWindowFor(now, cooldownMs) {
175
+ const width = cooldownMs > 0 ? cooldownMs : 1;
176
+ return String(Math.floor(now / width));
177
+ }
178
+ /**
179
+ * Decide whether a candidate should be emitted now. A candidate is SUPPRESSED
180
+ * when an escalation for the same worker + reason was already emitted (or hit a
181
+ * duplicate) within the SAME cooldown window. Returns the cooldown window
182
+ * identifier the caller must pass to `makeSupervisorIdempotencyKey` so the
183
+ * idempotency key and the suppression check agree.
184
+ */
185
+ export function shouldEmitEscalation(state, candidate, config, now) {
186
+ const cooldownWindow = cooldownWindowFor(now, config.escalation_cooldown_ms);
187
+ const alreadyDecided = state.escalations.some((record) => record.reason === candidate.reason &&
188
+ (record.worker_id ?? null) === (candidate.worker_id ?? null) &&
189
+ record.cooldown_window === cooldownWindow &&
190
+ // A prior emit/duplicate OR an explicit LLM "do not escalate" (suppressed)
191
+ // decision in this window is binding — do not re-decide (and, for ambiguous
192
+ // candidates, do not re-query the LLM) until the window rolls over.
193
+ (record.outcome === "emitted" ||
194
+ record.outcome === "duplicate" ||
195
+ record.outcome === "suppressed"));
196
+ return { emit: !alreadyDecided, cooldown_window: cooldownWindow };
197
+ }
198
+ /**
199
+ * Record the outcome of an escalation emit attempt into run state. The recorded
200
+ * window/idempotency metadata is what {@link shouldEmitEscalation} consults
201
+ * after a projection hydrate/restart to avoid duplicate terminal spam.
202
+ */
203
+ export function recordEscalationResult(state, candidate, cooldownWindow, idempotencyKey, outcome, now) {
204
+ const record = {
205
+ idempotency_key: idempotencyKey,
206
+ worker_id: candidate.worker_id ?? null,
207
+ reason: candidate.reason,
208
+ kind: candidate.kind,
209
+ cooldown_window: cooldownWindow,
210
+ outcome,
211
+ recorded_at: new Date(now).toISOString(),
212
+ };
213
+ state.escalations.push(record);
214
+ return record;
215
+ }
216
+ /** Format a compact elapsed duration like `1h2m`, `3m`, `45s`. */
217
+ function formatElapsed(ms) {
218
+ const totalSeconds = Math.max(0, Math.floor(ms / 1000));
219
+ const hours = Math.floor(totalSeconds / 3600);
220
+ const minutes = Math.floor((totalSeconds % 3600) / 60);
221
+ const seconds = totalSeconds % 60;
222
+ if (hours > 0)
223
+ return `${hours}h${minutes}m`;
224
+ if (minutes > 0)
225
+ return `${minutes}m`;
226
+ return `${seconds}s`;
227
+ }
228
+ /**
229
+ * Render a concise, SECRET-FREE terminal escalation line. Includes run id,
230
+ * worker id, reason, state, liveness, elapsed time, and optional LLM-drafted
231
+ * text. Never includes raw payloads or full JSON dumps.
232
+ */
233
+ export function formatEscalationForTerminal(runId, candidate, draftText) {
234
+ const worker = candidate.worker_id ? ` worker=${candidate.worker_id}` : "";
235
+ const stateBit = candidate.state ? ` state=${candidate.state}` : "";
236
+ const liveBit = candidate.liveness ? ` liveness=${candidate.liveness}` : "";
237
+ const elapsed = ` elapsed=${formatElapsed(candidate.elapsed_ms)}`;
238
+ let line = `[supervisor] run=${runId}${worker} reason=${candidate.reason}${stateBit}${liveBit}${elapsed}`;
239
+ if (draftText && draftText.trim().length > 0) {
240
+ // Single-line the drafted text so the terminal stays scannable.
241
+ line += ` :: ${draftText.replace(/\s+/g, " ").trim()}`;
242
+ }
243
+ return line;
244
+ }
@@ -0,0 +1,141 @@
1
+ /**
2
+ * TypeScript-to-Python judgment adapter (BAPI-396, conductor C4).
3
+ *
4
+ * Lets the Node conductor CLI invoke the approved Python LLM boundary
5
+ * (`src.python.conductor.supervisor_judgment`) WITHOUT any direct model-provider
6
+ * code in TypeScript. The compact judgment request is passed to the Python
7
+ * module over stdin as JSON, the module is spawned with `shell: false` and a
8
+ * list of arguments (never a shell string), and stdout is validated through the
9
+ * shared {@link parseSupervisorJudgmentResponse}.
10
+ *
11
+ * Every boundary failure (missing Python, timeout, non-zero exit, malformed
12
+ * output) is converted to a SANITIZED {@link SupervisorJudgmentError}: the
13
+ * caller ({@link assessSupervisorCandidate}) catches it and degrades to a
14
+ * deterministic assessment. Secrets, stderr text, and stack traces are never
15
+ * placed in the thrown error.
16
+ */
17
+ import { spawn as nodeSpawn } from "node:child_process";
18
+ import path from "node:path";
19
+ import { fileURLToPath } from "node:url";
20
+ import { SupervisorJudgmentError, parseSupervisorJudgmentResponse, } from "./supervisor-judgment.js";
21
+ /** The Python module the adapter invokes via `python -m <module>`. */
22
+ export const SUPERVISOR_JUDGMENT_PYTHON_MODULE = "src.python.conductor.supervisor_judgment";
23
+ function nonEmpty(value) {
24
+ return typeof value === "string" && value.trim().length > 0;
25
+ }
26
+ /**
27
+ * Resolve the Python executable. Prefers the explicit, safe env override
28
+ * `BAPI_CONDUCTOR_PYTHON`; otherwise falls back to `python3`. No shell string is
29
+ * ever constructed — the returned value is used as `spawn`'s `command` arg.
30
+ */
31
+ export function resolveSupervisorJudgmentCommand(env = process.env) {
32
+ if (nonEmpty(env.BAPI_CONDUCTOR_PYTHON))
33
+ return env.BAPI_CONDUCTOR_PYTHON.trim();
34
+ return "python3";
35
+ }
36
+ /**
37
+ * Resolve the working directory the Python module runs from (the repo root, so
38
+ * `src.python...` imports resolve). Prefers the explicit `BAPI_CONDUCTOR_PYTHON_CWD`
39
+ * override; otherwise derives the repo root relative to this compiled module
40
+ * (`<repo>/mcp_server/build/conductor/<file>.js` -> `<repo>`).
41
+ */
42
+ export function resolveSupervisorJudgmentCwd(env = process.env) {
43
+ if (nonEmpty(env.BAPI_CONDUCTOR_PYTHON_CWD))
44
+ return env.BAPI_CONDUCTOR_PYTHON_CWD.trim();
45
+ const here = fileURLToPath(import.meta.url);
46
+ // dirname=conductor, ../=build, ../../=mcp_server, ../../../=repo root.
47
+ return path.resolve(path.dirname(here), "..", "..", "..");
48
+ }
49
+ /** Build the secret-free stdin payload for the Python module. */
50
+ function buildRequestPayload(request, env) {
51
+ const payload = {
52
+ run_id: request.run_id,
53
+ candidate: request.candidate,
54
+ worker: request.worker,
55
+ };
56
+ if (nonEmpty(env.BAPI_CONDUCTOR_REPO_NAME))
57
+ payload.repo_name = env.BAPI_CONDUCTOR_REPO_NAME.trim();
58
+ if (nonEmpty(env.BAPI_CONDUCTOR_RUN_ID))
59
+ payload.session_id = env.BAPI_CONDUCTOR_RUN_ID.trim();
60
+ return payload;
61
+ }
62
+ /**
63
+ * Spawn the Python judgment module and resolve with the validated response.
64
+ * Rejects with a sanitized {@link SupervisorJudgmentError} on any boundary
65
+ * failure. The request is passed over stdin (keeping payloads out of the process
66
+ * argument list); the timeout is `config.llm_timeout_ms`.
67
+ */
68
+ export function requestPythonSupervisorJudgment(request, config, deps = {}) {
69
+ const spawnFn = deps.spawn ?? nodeSpawn;
70
+ const env = deps.env ?? process.env;
71
+ const command = resolveSupervisorJudgmentCommand(env);
72
+ const cwd = resolveSupervisorJudgmentCwd(env);
73
+ return new Promise((resolve, reject) => {
74
+ let settled = false;
75
+ let stdout = "";
76
+ let child;
77
+ try {
78
+ child = spawnFn(command, ["-m", SUPERVISOR_JUDGMENT_PYTHON_MODULE], {
79
+ cwd,
80
+ shell: false,
81
+ stdio: ["pipe", "pipe", "pipe"],
82
+ });
83
+ }
84
+ catch {
85
+ reject(new SupervisorJudgmentError("python judgment process could not be started"));
86
+ return;
87
+ }
88
+ const finish = (fn, value) => {
89
+ if (settled)
90
+ return;
91
+ settled = true;
92
+ clearTimeout(timer);
93
+ fn(value);
94
+ };
95
+ // NOTE: the timer is intentionally NOT unref'd. It must keep the event loop
96
+ // alive so the timeout actually fires and rejects when the Python subprocess
97
+ // hangs (and so the deterministic degraded path is taken). The timer is
98
+ // always cleared in `finish`, so it never outlives a settled judgment.
99
+ const timer = setTimeout(() => {
100
+ try {
101
+ child.kill("SIGKILL");
102
+ }
103
+ catch {
104
+ /* best-effort */
105
+ }
106
+ finish(reject, new SupervisorJudgmentError("python judgment timed out"));
107
+ }, config.llm_timeout_ms);
108
+ child.on("error", () => finish(reject, new SupervisorJudgmentError("python judgment process error")));
109
+ child.stdout?.on("data", (chunk) => {
110
+ stdout += String(chunk);
111
+ });
112
+ child.on("close", (code) => {
113
+ if (code !== 0) {
114
+ finish(reject, new SupervisorJudgmentError("python judgment exited non-zero"));
115
+ return;
116
+ }
117
+ try {
118
+ const parsed = parseSupervisorJudgmentResponse(stdout.trim());
119
+ finish(resolve, parsed);
120
+ }
121
+ catch {
122
+ finish(reject, new SupervisorJudgmentError("python judgment returned malformed output"));
123
+ }
124
+ });
125
+ try {
126
+ child.stdin?.write(JSON.stringify(buildRequestPayload(request, env)));
127
+ child.stdin?.end();
128
+ }
129
+ catch {
130
+ finish(reject, new SupervisorJudgmentError("python judgment stdin write failed"));
131
+ }
132
+ });
133
+ }
134
+ /**
135
+ * Build the default injectable judgment client used by the runtime. The returned
136
+ * function matches {@link SupervisorJudgmentClient}; it forwards each request to
137
+ * {@link requestPythonSupervisorJudgment} with the resolved config/deps.
138
+ */
139
+ export function createDefaultSupervisorJudgmentClient(config, deps = {}) {
140
+ return (request) => requestPythonSupervisorJudgment(request, config, deps);
141
+ }