@bridge_gpt/mcp-server 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -54
- package/build/agent-launchers/claude.js +25 -17
- package/build/agent-launchers/cursor.js +65 -0
- package/build/agent-launchers/index.js +23 -8
- package/build/agent-registry.js +68 -0
- package/build/command-catalog.js +376 -0
- package/build/commands.generated.js +8 -5
- package/build/index.js +406 -120
- package/build/mcp-provisioning.js +94 -1
- package/build/pipeline-utils.js +0 -33
- package/build/pipelines.generated.js +2 -31
- package/build/readme.generated.js +3 -0
- package/build/schedule-run.js +436 -88
- package/build/schedule-store.js +41 -1
- package/build/scheduled-prompt.js +109 -0
- package/build/scheduler-backends/at-fallback.js +5 -10
- package/build/scheduler-backends/escaping.js +40 -10
- package/build/scheduler-backends/launchd.js +23 -14
- package/build/scheduler-backends/systemd-user.js +32 -19
- package/build/scheduler-backends/task-scheduler.js +8 -13
- package/build/start-tickets.js +459 -30
- package/build/version.generated.js +1 -1
- package/package.json +4 -3
- package/pipelines/implement-ticket.json +2 -28
- package/smoke-test/SMOKE-TEST.md +61 -18
|
@@ -114,6 +114,40 @@ export function getWorktreeMcpRegistrationTargets(worktreePath, platform) {
|
|
|
114
114
|
{ filePath: api.join(worktreePath, ".cursor", "mcp.json"), topLevelKey: "mcpServers" },
|
|
115
115
|
];
|
|
116
116
|
}
|
|
117
|
+
/**
|
|
118
|
+
* Absolute path to the worktree's Claude local settings file
|
|
119
|
+
* (`.claude/settings.local.json`). This is where the server-trust pre-approval
|
|
120
|
+
* (`enabledMcpjsonServers`) is written so the just-registered `.mcp.json`
|
|
121
|
+
* servers don't trigger Claude Code's "use this MCP server?" prompt in the
|
|
122
|
+
* fresh worktree path. Deliberately NOT part of
|
|
123
|
+
* `getWorktreeMcpRegistrationTargets` — it is Claude-only and uses a different
|
|
124
|
+
* top-level key than an MCP registration.
|
|
125
|
+
*/
|
|
126
|
+
export function claudeSettingsTargetForWorktree(worktreePath, platform) {
|
|
127
|
+
const api = pathApiForProvisioningPlatform(platform);
|
|
128
|
+
return api.join(worktreePath, ".claude", "settings.local.json");
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Merge the given server names into an existing parsed settings document's
|
|
132
|
+
* `enabledMcpjsonServers` array. Unrelated top-level fields are preserved; the
|
|
133
|
+
* resulting array is the deduped union of any existing names plus the new ones,
|
|
134
|
+
* in stable order (existing names first, then newly-added names).
|
|
135
|
+
*/
|
|
136
|
+
export function mergeEnabledMcpjsonServers(existing, serverNames) {
|
|
137
|
+
const result = existing && typeof existing === "object" && !Array.isArray(existing)
|
|
138
|
+
? { ...existing }
|
|
139
|
+
: {};
|
|
140
|
+
const current = result.enabledMcpjsonServers;
|
|
141
|
+
const merged = Array.isArray(current)
|
|
142
|
+
? current.filter((name) => typeof name === "string")
|
|
143
|
+
: [];
|
|
144
|
+
for (const name of serverNames) {
|
|
145
|
+
if (!merged.includes(name))
|
|
146
|
+
merged.push(name);
|
|
147
|
+
}
|
|
148
|
+
result.enabledMcpjsonServers = merged;
|
|
149
|
+
return result;
|
|
150
|
+
}
|
|
117
151
|
/**
|
|
118
152
|
* Merge multiple shim entries into an existing parsed registration document.
|
|
119
153
|
* Unrelated top-level fields and unrelated MCP servers are preserved; only the
|
|
@@ -182,6 +216,51 @@ export async function writeMcpRegistrationFile(target, entries, deps) {
|
|
|
182
216
|
}
|
|
183
217
|
return { ok: true };
|
|
184
218
|
}
|
|
219
|
+
/**
|
|
220
|
+
* Write (or merge) the worktree's `.claude/settings.local.json` so the given
|
|
221
|
+
* `.mcp.json` server names are pre-approved via `enabledMcpjsonServers`. This
|
|
222
|
+
* suppresses Claude Code's per-project "use this MCP server?" trust prompt that
|
|
223
|
+
* would otherwise re-appear in every freshly-created worktree path.
|
|
224
|
+
*
|
|
225
|
+
* Mirrors `writeMcpRegistrationFile`'s read-merge-write contract, but is
|
|
226
|
+
* intentionally lenient: a missing file is created, an existing valid file is
|
|
227
|
+
* merged, and an existing file with malformed JSON is left untouched and
|
|
228
|
+
* reported as a failure so the caller can degrade to a warning (never a
|
|
229
|
+
* spawn-blocking error — trust pre-approval is convenience, not required).
|
|
230
|
+
*/
|
|
231
|
+
export async function writeClaudeServerTrustSettings(worktreePath, serverNames, deps) {
|
|
232
|
+
const api = pathApiForProvisioningPlatform(deps.platform);
|
|
233
|
+
const filePath = claudeSettingsTargetForWorktree(worktreePath, deps.platform);
|
|
234
|
+
let existing;
|
|
235
|
+
try {
|
|
236
|
+
const raw = await deps.readFile(filePath);
|
|
237
|
+
try {
|
|
238
|
+
existing = JSON.parse(raw);
|
|
239
|
+
}
|
|
240
|
+
catch {
|
|
241
|
+
return {
|
|
242
|
+
ok: false,
|
|
243
|
+
error: `existing ${filePath} contains malformed JSON; not overwriting`,
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
catch (err) {
|
|
248
|
+
const code = err && typeof err === "object" ? err.code : undefined;
|
|
249
|
+
if (code !== "ENOENT") {
|
|
250
|
+
return { ok: false, error: `unable to read ${filePath}` };
|
|
251
|
+
}
|
|
252
|
+
// ENOENT: create a fresh document below.
|
|
253
|
+
}
|
|
254
|
+
const merged = mergeEnabledMcpjsonServers(existing, serverNames);
|
|
255
|
+
try {
|
|
256
|
+
await deps.mkdir(api.dirname(filePath), { recursive: true });
|
|
257
|
+
await deps.writeFile(filePath, `${JSON.stringify(merged, null, 2)}\n`);
|
|
258
|
+
}
|
|
259
|
+
catch {
|
|
260
|
+
return { ok: false, error: `failed to write ${filePath}` };
|
|
261
|
+
}
|
|
262
|
+
return { ok: true };
|
|
263
|
+
}
|
|
185
264
|
// ---------------------------------------------------------------------------
|
|
186
265
|
// Per-worktree orchestration
|
|
187
266
|
// ---------------------------------------------------------------------------
|
|
@@ -205,6 +284,11 @@ function withWarnings(row, warnings) {
|
|
|
205
284
|
* add a secret-free warning but never abort provisioning.
|
|
206
285
|
* - A required write failure (or a malformed existing registration file) marks
|
|
207
286
|
* only this row `spawn-failed` with a descriptive error; other rows continue.
|
|
287
|
+
* - After the registration files are written, the worktree's
|
|
288
|
+
* `.claude/settings.local.json` is updated to pre-approve those servers via
|
|
289
|
+
* `enabledMcpjsonServers` (suppressing Claude Code's per-project trust
|
|
290
|
+
* prompt). This step is best-effort: any failure degrades to a warning and
|
|
291
|
+
* never blocks the spawn.
|
|
208
292
|
*/
|
|
209
293
|
export async function provisionMcpRegistrationForWorktree(row, deps) {
|
|
210
294
|
if (row.status !== "created" || !row.path) {
|
|
@@ -234,7 +318,16 @@ export async function provisionMcpRegistrationForWorktree(row, deps) {
|
|
|
234
318
|
return { ...row, status: "spawn-failed", error: `MCP provisioning failed: ${result.error}` };
|
|
235
319
|
}
|
|
236
320
|
}
|
|
237
|
-
|
|
321
|
+
// Pre-approve the just-registered servers so Claude Code does not prompt to
|
|
322
|
+
// trust them in this fresh worktree path. Best-effort: a failure here only
|
|
323
|
+
// loses the convenience of skipping the prompt, so warn and still spawn.
|
|
324
|
+
let result = withWarnings(row, built.warnings);
|
|
325
|
+
const serverNames = Object.keys(built.entries);
|
|
326
|
+
const trust = await writeClaudeServerTrustSettings(normalized.path, serverNames, deps);
|
|
327
|
+
if (!trust.ok) {
|
|
328
|
+
result = withWarning(result, `Claude MCP trust pre-approval skipped: ${trust.error}`);
|
|
329
|
+
}
|
|
330
|
+
return result;
|
|
238
331
|
}
|
|
239
332
|
/**
|
|
240
333
|
* Provision MCP registrations for every created worktree row, in order.
|
package/build/pipeline-utils.js
CHANGED
|
@@ -6,35 +6,6 @@
|
|
|
6
6
|
import { readdir, readFile } from "fs/promises";
|
|
7
7
|
import path from "path";
|
|
8
8
|
// ---------------------------------------------------------------------------
|
|
9
|
-
// Tiered section-graph contract metadata (BAPI-345, Ticket 1)
|
|
10
|
-
// ---------------------------------------------------------------------------
|
|
11
|
-
//
|
|
12
|
-
// Static, server-owned metadata advertised on every resolved recipe envelope so
|
|
13
|
-
// MCP consumers can detect that the section-graph contract exists. Ticket 1 is
|
|
14
|
-
// contract-only: every execution-related capability is disabled, and the
|
|
15
|
-
// section graph itself is NEVER embedded in the recipe envelope (it lives on the
|
|
16
|
-
// plan-generation response, not here). This descriptor is NOT derived from the
|
|
17
|
-
// repo's `tiered_execution` config value.
|
|
18
|
-
export const PIPELINE_CONTRACT_VERSION = "2";
|
|
19
|
-
export const TIERED_SECTION_CAPABILITY_DESCRIPTOR = {
|
|
20
|
-
section_graph: {
|
|
21
|
-
supported: true,
|
|
22
|
-
execution_enabled: false,
|
|
23
|
-
payload_location: "plan_generation_response.sections",
|
|
24
|
-
},
|
|
25
|
-
tiers: ["cheap", "basic", "premium"],
|
|
26
|
-
risk_levels: ["low", "medium", "high"],
|
|
27
|
-
activities: ["implement", "test", "docs", "debug", "config"],
|
|
28
|
-
tiered_execution_modes: ["off", "claude_code_only", "all_capable"],
|
|
29
|
-
capabilities: {
|
|
30
|
-
// All Ticket 2 execution features are disabled / not implemented in Ticket 1.
|
|
31
|
-
escalation: false,
|
|
32
|
-
checkpoint: false,
|
|
33
|
-
rollback: false,
|
|
34
|
-
subagent_dispatch: false,
|
|
35
|
-
},
|
|
36
|
-
};
|
|
37
|
-
// ---------------------------------------------------------------------------
|
|
38
9
|
// Schema Validation
|
|
39
10
|
// ---------------------------------------------------------------------------
|
|
40
11
|
export function validatePipelineSchema(json) {
|
|
@@ -241,10 +212,6 @@ export function resolveRecipe(pipeline, instructions, variables, skipSteps, auto
|
|
|
241
212
|
agent_instructions: baseInstructions + autoApproveSuffix,
|
|
242
213
|
auto_approve: !!autoApprove,
|
|
243
214
|
steps: resolvedSteps,
|
|
244
|
-
// BAPI-345 (Ticket 1): additive static contract metadata. The section graph
|
|
245
|
-
// is NEVER embedded here — it is delivered on the plan-generation response.
|
|
246
|
-
contract_version: PIPELINE_CONTRACT_VERSION,
|
|
247
|
-
capability_descriptor: TIERED_SECTION_CAPABILITY_DESCRIPTOR,
|
|
248
215
|
};
|
|
249
216
|
}
|
|
250
217
|
// ---------------------------------------------------------------------------
|
|
@@ -139,7 +139,7 @@ export const PIPELINES = {
|
|
|
139
139
|
},
|
|
140
140
|
{
|
|
141
141
|
"type": "agent_task",
|
|
142
|
-
"instruction_file": "execute-plan
|
|
142
|
+
"instruction_file": "execute-plan.md",
|
|
143
143
|
"description": "Execute the implementation plan"
|
|
144
144
|
},
|
|
145
145
|
{
|
|
@@ -177,35 +177,7 @@ export const PIPELINES = {
|
|
|
177
177
|
"instruction_file": "monitor-ci-checks.md",
|
|
178
178
|
"description": "Monitor CI checks and report results"
|
|
179
179
|
}
|
|
180
|
-
]
|
|
181
|
-
"tiered_executor_policy": {
|
|
182
|
-
"supported_hosts": [
|
|
183
|
-
"claude_code"
|
|
184
|
-
],
|
|
185
|
-
"sequential_only": true,
|
|
186
|
-
"tier_model_mapping": {
|
|
187
|
-
"cheap": "haiku",
|
|
188
|
-
"basic": "sonnet",
|
|
189
|
-
"premium": "opus"
|
|
190
|
-
},
|
|
191
|
-
"final_review_policy": {
|
|
192
|
-
"default": "premium_whole_diff_when_below_coordinator_tier_touched_code",
|
|
193
|
-
"skip_when": "entire_run_inline_default_at_coordinator_tier"
|
|
194
|
-
},
|
|
195
|
-
"escalation_policy": {
|
|
196
|
-
"max_section_escalations": 1,
|
|
197
|
-
"allowed_hops": {
|
|
198
|
-
"cheap": "basic",
|
|
199
|
-
"basic": "premium"
|
|
200
|
-
},
|
|
201
|
-
"final_review_fix_reverify_passes": 1
|
|
202
|
-
},
|
|
203
|
-
"budget_policy": {
|
|
204
|
-
"cache_hit_rate_source": "measurement_spike_go_marker",
|
|
205
|
-
"default_cache_hit_rate": 0,
|
|
206
|
-
"abort_mode": "inline_default"
|
|
207
|
-
}
|
|
208
|
-
}
|
|
180
|
+
]
|
|
209
181
|
},
|
|
210
182
|
"learn-repository": {
|
|
211
183
|
"name": "learn-repository",
|
|
@@ -655,7 +627,6 @@ export const INSTRUCTIONS = {
|
|
|
655
627
|
"duplicate-and-context-scan.md": "Detect existing Jira tickets that duplicate or relate to this idea before any Jira mutation.\n\n## Inputs\n\n- Run manifest: `{docs_dir}/idea-to-ticket/{slug}-{run_id}/run-manifest.json`.\n- Research pack: `{docs_dir}/idea-to-ticket/{slug}-{run_id}/research-pack.md` (if produced).\n- Pipeline variable `allow_duplicate` controls override behavior (for this run, `allow_duplicate` = `{allow_duplicate}`). Treat the literal string `\"true\"` as override; any other value (including `\"false\"`, missing, or empty) is non-override.\n\n## Instructions\n\n> **Orchestrator-directed step.** This agent task is part of the full-automation chain and is authorized to call `get_tickets` as directed below — performing an orchestrator-directed tool call is not \"re-orchestrating\".\n\n1. Build at least two Jira search queries from the manifest:\n - **Title/keyword query**: use the most salient nouns from `idea` and `slug` as title/text keywords. Prefer 2-4 concrete terms over long natural-language sentences. Run via `get_tickets`.\n - **Stable idea-hash query** (the reliable cross-run dedup): run `get_tickets` with its `labels` parameter set to `bapi-idea-hash-{idea_hash}`. This label is identical for every run of the same idea, so it catches a PRIOR run that already created a ticket for this idea — even one created days ago. A hit here is a strong `duplicate` signal.\n - **Idempotency-label query**: run `get_tickets` with its `labels` parameter set to `bapi-idea-to-ticket-{run_id}` (the tool builds the `labels in (...)` JQL for you — do not pass a raw JQL string). This per-run label only matches a partial run of THIS same run, so it supports resume behavior.\n\n2. For each returned ticket, capture: ticket key, summary, status, and a short reason it matched (which query, which keyword).\n\n3. Classify the overall verdict as one of:\n - `duplicate` — at least one returned ticket clearly describes the same work as `idea`.\n - `related` — returned tickets are adjacent or partial overlaps but not the same work.\n - `none_found` — no meaningful matches.\n - `unable_to_check` — the Jira search itself failed (network error, auth error, JQL rejection). Record the failure and pick this verdict.\n\n4. Write `{docs_dir}/idea-to-ticket/{slug}-{run_id}/duplicate-assessment.json` with at minimum:\n - `verdict` — one of the four values above.\n - `matches` — array of `{ticket_key, summary, status, reason}` objects (may be empty).\n - `queries_used` — array of the actual JQL/search strings sent.\n - `allow_duplicate` — the resolved value of `{allow_duplicate}` for this run.\n\n5. Halt behavior:\n - If `verdict` is `duplicate` and `allow_duplicate` is not `\"true\"`, halt locally. Do not continue the pipeline. Tell the user that the duplicate halt is strict and that re-running with `--allow-duplicate` overrides it.\n - If `verdict` is `duplicate` and `allow_duplicate` is `\"true\"`, continue the pipeline but keep the duplicate evidence in the assessment file so downstream steps can reference it (e.g., to add a \"supersedes\" note to the draft).\n - For `related`, `none_found`, and `unable_to_check`, continue without halting.\n\n## Return\n\nConfirm `duplicate-assessment.json` was written, report `verdict`, and report whether the run is halting or continuing.\n",
|
|
656
628
|
"evaluate-and-recommend.md": "Evaluate the clarifying questions and ticket critiques generated for {ticket_key} against the actual codebase, then decorate every actionable item with the resolution guidance the reviewer will need on the decision page. The result is a single combined review-and-resolution document.\n\n1. Fetch the current ticket description using the `get_ticket` tool with ticket_number `{ticket_key}` exactly once at the top of this procedure.\n\n2. Gather the clarifying questions and critique documents from the preceding pipeline steps. The local files at `{docs_dir}/clarifying-questions/{ticket_key}-clarifying-questions.md` and `{docs_dir}/ticket-critiques/{ticket_key}-ticket-quality-critique.md` are the canonical source. After a second-opinion run, each document has this shape:\n\n - A top-level H1 (`# Ticket Analysis` for clarifier docs, `# Ticket Quality Critique` for critique docs) followed by an italic provider-attribution line of the form `_This analysis was generated by GPT|Claude|Gemini._`. The attribution names the LLM family that produced the **first round**.\n - The first-round questions / critique items, exactly as written by the first-round model.\n - **Inline second-opinion blockquotes** nested directly under each prior item the second round addressed. Each blockquote starts with `> **Second opinion (<provider>) - <stance>.**` where `<provider>` is `GPT|Claude|Gemini` and `<stance>` is `concurrence|refinement|disagreement`. The blockquote is followed by `> *Citations: <comma-separated grounding refs>*`. Items the second round did **not** comment on have no blockquote — that is the \"weak concurrence\" signal. Use the provider name in the blockquote header to attribute the comment to the second-round LLM family in your evaluation prose where helpful.\n - A **`## New in Second Opinion`** tail block listing items the second round added on top of the first round. Immediately under the H2 you will find a second italic attribution line of the form `_These additional points were raised by GPT|Claude|Gemini._` — this names the second-round LLM family. Sub-headings are agent-specific:\n - Clarifier docs: `### New Requirements Questions` and `### New Technical Questions` — numbering continues from the prior section.\n - Critique docs: `### New Requested Changes` and `### New Points to Consider` — numbering continues from the prior section.\n Each new item has its own `*Citations: ...*` line.\n - A final **`## Second Opinion Summary`** footer (1-3 sentences) capturing the second round's overall position. This always renders, even when the second round had no inline comments and no new items.\n\n **Legacy fallback shape**: in rare cases (model lacks JSON-schema support, the JSON call failed, or the response could not be parsed), the document may instead end with `\\n\\n---\\n\\n` followed by a `## Second Opinion` section containing `### Response to Prior Items` and `### Additional Points` subsections. If you detect this fallback shape, treat it equivalently: subsection responses tagged `concurrence` map to weak/strong concurrence (use the body length to disambiguate — bare one-line concurrences are weak), `refinement`/`disagreement` map to the disagree buckets, and items under `### Additional Points` map to the gap-captured bucket below.\n\n **Partial-source-doc tolerance**: if the clarifying-questions doc OR the ticket-critique doc is missing or unreadable, skip that document silently and produce items only for the surviving doc. Do not fail. If **both** documents are absent, still write the combined output file at `{docs_dir}/review/{ticket_key}-review-and-resolution.md` with the standard top-level sections (`Confirmed Improvements`, `Needs Scrutiny`, `Open Questions`, `Round Agreement Summary`) present but no emitted E-items in any section. This preserves downstream file-existence expectations for the capture-review-decisions step.\n\n3. Determine **Round Agreement** for every clarifying question and critique point using these rules:\n\n - **Both rounds agree (weak concurrence)** — the prior item has NO inline blockquote AND is not in `## New in Second Opinion`. The second round did not object to the point and did not consider it important enough to comment on. Briefly validate the answer's groundedness against the codebase. If validation surfaces concerns, demote this item to **rounds disagree** (single round only depth) and treat as Needs Scrutiny.\n - **Both rounds agree (strong concurrence)** — the prior item carries an inline `> **Second opinion (<provider>) - concurrence.** ...` blockquote. The second round explicitly reinforced the prior point. Reuse the blockquote's `*Citations:*` as starting evidence; verify briefly.\n - **Rounds disagree (refinement)** — the prior item carries an inline `> **Second opinion (<provider>) - refinement.** ...` blockquote. The second round modified or added detail. Apply full disagreement-depth analysis; reuse blockquote citations.\n - **Rounds disagree (disagreement)** — the prior item carries an inline `> **Second opinion (<provider>) - disagreement.** ...` blockquote. The second round contradicts the prior. Apply full disagreement-depth analysis; categorize the outcome based on which position the codebase supports.\n - **Gap captured** — the item lives under `## New in Second Opinion > ### New <category>` (one of: New Requirements Questions, New Technical Questions, New Requested Changes, New Points to Consider). Apply the two-axis check below. Reuse the new item's `*Citations:*` as starting evidence.\n - **Single round only** — the document has none of the above markers (no inline blockquotes, no `## New in Second Opinion` block, no `## Second Opinion Summary` footer). The pipeline ran only one round. Treat every item as a disagreement: cite 2+ codebase locations and give full analytical depth.\n\n Apply these depth and categorization rules:\n\n - **Both rounds agree (weak concurrence)**: 1 codebase citation, 1-2 sentence assessment confirming grounding. Categorize as Confirmed Improvement if grounded; demote to Needs Scrutiny if validation finds problems.\n - **Both rounds agree (strong concurrence)**: 1 codebase citation (may reuse a blockquote citation), 1-2 sentence assessment. Categorize as Confirmed Improvement.\n - **Rounds disagree (refinement or disagreement)**: 2+ codebase citations, 3-4 sentence assessment that explicitly weighs the prior-round position against the second-opinion position. Categorize based on which position the evidence supports. Always include both positions in the Assessment.\n - **Gap captured — two-axis check** (for items in `## New in Second Opinion`):\n - If both the question is grounded in the codebase/standards AND the best-guess answer is sensible → **Confirmed Improvement** with a 1-2 sentence assessment and 1 citation.\n - If the question is genuine but the best-guess answer is flawed → **Needs Scrutiny**. Cite 2+ files. Use disagreement-depth.\n - If the question itself does not hold up → **Needs Scrutiny** with evidence of what the code actually does. Disagreement-depth.\n - If neither codebase nor standards can settle the question → **Open Questions**. Disagreement-depth.\n - **Single round only**: Treat as a disagreement — cite 2+ codebase locations and give full analytical depth.\n\n For critique points (Requested Changes and Points to Consider), apply the same Round Agreement rules. The signal locations are inline `> **Second opinion (<provider>) - ...**` blockquotes nested under items in `### Requested Changes` / `### Points to Consider`, and gap-captured items under `## New in Second Opinion > ### New Requested Changes` / `### New Points to Consider`.\n\n **Depth calibration**:\n - When Round Agreement is `both rounds agree (weak concurrence)`, `both rounds agree (strong concurrence)`, or `gap captured` (passes both axes), keep Assessment to 1-2 sentences and Codebase Evidence to 1 citation — the validation step or the consensus does the heavy lifting.\n - When Round Agreement is `rounds disagree (refinement)`, `rounds disagree (disagreement)`, or `single round only`, Assessment should be 3-4 sentences and Codebase Evidence should cite 2+ files explaining the discrepancy.\n - A `gap captured` item that FAILS the two-axis check uses the disagreement depth, not the gap-captured depth.\n - A `weak concurrence` item that FAILS your validation gets demoted: change Round Agreement to `rounds disagree (single round only)`, expand Assessment to 3-4 sentences, and add a 2nd citation.\n\n **Source field conventions** — the `**Source**` string disambiguates where in the source doc the item lives so the downstream `capture-review-decisions` step can route the rewrite correctly. Use these forms:\n\n - **Weak concurrence (silent prior item)**: `Clarifying Q3 (prior round, weak concurrence)` or `Critique: Requested Change 2 (prior round, weak concurrence)`.\n - **Strong concurrence (explicit blockquote)**: `Clarifying Q9 (prior round, concurrence inline)` or `Critique: Points to Consider 1 (prior round, concurrence inline)`.\n - **Refinement (inline blockquote)**: `Clarifying Q3 (prior round, refinement inline)`.\n - **Disagreement (inline blockquote)**: `Clarifying Q5 (prior round, disagreement inline)`.\n - **Gap captured (tail-block item)**: `Clarifying Q11 (new in second opinion → New Requirements Questions)` or `Critique: Requested Change N+1 (new in second opinion → New Requested Changes)`. Always spell out the sub-section name after the arrow — capture-review-decisions uses it to find the rewrite target.\n - **Single round only**: `Clarifying Q3 (single round)`.\n\n## Phase 1 — Evaluate and classify every item\n\nNumber every item sequentially across all sections (E-1, E-2, E-3, …). When the same underlying issue is raised in BOTH the clarifying-questions doc and the critique doc, consolidate it into a SINGLE E-item rather than emitting one per source, and cite both origins in its `**Source**` field (e.g. `Clarifying Q3 + Critique: Requested Change 2`); keep the numbering sequential with no gaps. Classify every clarifying question and every critique point into exactly one of three buckets using the Round Agreement rules, codebase groundedness checks, and the `gap captured` two-axis check before producing any recommendation decoration:\n\n- **Confirmed Improvements**: Suggestions that are grounded and would genuinely improve the ticket by closing significant gaps or correcting design issues. Includes weak-concurrence items that passed validation, strong-concurrence items, and `gap captured` items that passed both axes.\n- **Needs Scrutiny**: Suggestions based on inaccurate codebase assumptions, with evidence of the actual code behavior. Includes `gap captured` items that failed either axis, weak-concurrence items demoted by validation, and the loser of any rounds-disagree pair.\n- **Open Questions**: Legitimate ambiguities that require human input to resolve.\n\nPhase 1 must complete before Phase 2 begins — do not start decorating an item with a decision tree, recommendation index, or clarity fields until classification is final.\n\n## Phase 2 — Decorate actionable items with resolution guidance\n\nPhase 2 applies **only** to items in the `Needs Scrutiny` and `Open Questions` buckets. Confirmed Improvements remain compact and undecorated (see \"Confirmed Improvements output\" below).\n\nFor every actionable (Needs Scrutiny / Open Questions) item, produce the following template using these stable labels:\n\n```\n### E-<sequential number>: <concise title>\n\n**Source**: <where this item lives in the source doc — see Source field conventions above>\n\n**Round Agreement**: <one of the six values> — <1 sentence on what the second round contributed>\n\n**Confidence**: <High|Medium|Low>\n\n**Resolution path**: <\"resolve at your desk\" or \"needs a conversation\">\n\n**Decision tree**:\n- If <condition 1>, then <action 1>. See `file:line`. <1-2 sentence rationale.>\n- If <condition 2>, then <action 2>. See `file:line`. <1-2 sentence rationale.>\n- If <condition 3>, then <action 3>. See `file:line`. <1-2 sentence rationale.>\n\n**Recommendation Index**: <0-based index of the recommended branch in the decision tree above>\n\n**Recommendation**: <which branch the evidence best supports and why, 1-2 sentences>\n\n**Original question**: <the clarifying-question or critique point as it was originally raised, sourced verbatim or near-verbatim from the original clarifying-questions / critique docs. Light rephrasing is allowed; do NOT introduce new technical content. Soft cap ~30 words.>\n\n**Option consequences**:\n- <consequence for branch 1 — describe the behavioral consequence of choosing this option, not its rationale. ~25 words.>\n- <consequence for branch 2 — same shape. ~25 words.>\n- <consequence for branch 3 — same shape. ~25 words.>\n\n**Why it matters**: <one concrete sentence on the impact this decision has on the ticket, the users, or the affected code paths. Soft cap ~40 words.>\n\n**Recommendation explanation**: <explain why the recommended branch is the best choice, tied to the codebase evidence and the consequences of each option. Soft cap ~60 words.>\n\n**Assessment**: <three-point structure>\n1. **State the original suggestion**: What did the clarifying question or critique point propose?\n2. **State the codebase evidence**: What does the actual code show about this suggestion?\n3. **State the implication**: Does the evidence confirm the suggestion, contradict it, or leave it unresolved?\n\n**Codebase Evidence**:\n- `path/to/file.ts:42` — <what this line/block demonstrates>\n- `path/to/other.ts:110-125` — <what this range demonstrates>\n\n<If no direct codebase evidence exists, state: \"No direct codebase evidence found.\">\n```\n\n**Writing quality**: Write each Assessment as if explaining to a colleague who has NOT read the original clarifying questions or critique documents. Each assessment should be self-contained and understandable without cross-referencing the source material. The three-point Assessment structure ensures every assessment tells a complete story rather than assuming the reader already knows what was suggested and why.\n\n**Decision tree rules**:\n- Each decision tree must have **2–4 branches**. Do not exceed 4 and do not produce only 1.\n- **Strict lower bound — reclassify on single-branch items**: If you can think of only one branch for a `Needs Scrutiny` or `Open Questions` item — that is, the resolution is effectively forced — you must reclassify the item as a **Confirmed Improvement** instead of emitting a single-branch decision tree. The 2-branch lower bound is a hard rule; do not work around it by stretching to a contrived second branch. If a single answer is genuinely the only path, the item belongs in Confirmed Improvements.\n- Each branch must end with a concrete, actionable step (not \"investigate further\").\n- Cite relevant code in `file:line` format where possible. If no code reference exists, omit the citation rather than fabricating one.\n- Cap each branch at 2-3 sentences total (including the action and rationale).\n- `**Recommendation Index**` must be the 0-based index of the recommended branch in the decision tree above. The first branch is index 0, the second is index 1, etc.\n- **Option consequences** must be a list parallel to the decision-tree branches: one entry per branch, in the same order. Describe the behavioral consequence of choosing that option, not its rationale.\n- **\"resolve at your desk\"**: The item can be resolved through technical investigation — reading code, running tests, or checking configuration. No stakeholder input needed.\n- **\"needs a conversation\"**: The item involves a product decision, scope question, or cross-team dependency that cannot be resolved from the codebase alone.\n\n**Confidence Tags** — assign confidence based on codebase evidence strength:\n- **High**: Cite specific `file:line` references that directly support the assessment.\n- **Medium**: Reference related code patterns or architectural conventions, but not the exact code in question.\n- **Low**: No direct codebase evidence. Assessment is based on general reasoning or domain knowledge.\n\n### Confirmed Improvements output\n\nRender each Confirmed Improvement as a single bullet in a compact list. No headings per item, no decision trees, no clarity-field decoration:\n\n- **E-<number>: <title>** — Source: <source string>; Round Agreement: <one of the six values>; Confidence: <High|Medium|Low>. <recommended action, 1 sentence.>\n\nThe compact bullet still includes `Source`, `Round Agreement`, `Confidence`, and the one-sentence recommended action so `capture-review-decisions.md` can map these items to its `clear_improvements` array.\n\n## Round Agreement Summary\n\nAfter all items are processed, produce a summary section that groups items by round agreement status:\n\n### Points of Disagreement\nFor items where the evaluation marked `rounds disagree (refinement)`, `rounds disagree (disagreement)`, or `single round only` — including `gap captured` items that failed the two-axis check and landed in Needs Scrutiny — list as bullets with the E-number, the nature of the disagreement, and a 1-sentence explanation of why this disagreement matters for the ticket (e.g., it indicates an architectural ambiguity, a scope question, or a standards gap).\n\nIf no items were marked as disagreements, write: \"All reviewed points had round consensus. No disagreement-driven risks identified.\"\n\n### Points of Agreement\nSplit this section into two sub-bullets to surface the difference between the second round explicitly reinforcing a point versus tacitly accepting it:\n\n**Strong agreement** — items where the evaluation marked `both rounds agree (strong concurrence)`. The second round took the trouble to write an explicit `concurrence` blockquote; this is a soft signal that the point is important enough that the second round wanted to underline it. List as bullets with the E-number and a half-sentence noting the shared conclusion.\n\n**Weak agreement** — items where the evaluation marked `both rounds agree (weak concurrence)`. The second round did not object and did not consider the item important enough to comment on; the local agent's brief validation found no concerns. List as bullets with the E-number and a half-sentence noting the conclusion. Lower priority for human review than strong-agreement items.\n\nIf a sub-bullet has no items, omit it (rather than writing a \"no items\" note for each — keep the section tidy).\n\n### Gaps Captured by Second Round\nFor items where the evaluation marked `gap captured` (sound second-opinion Additional Points confirmed as Confirmed Improvements): list as bullets with the E-number and a half-sentence noting the gap the second round surfaced. These items did not require a decision — they are already in Confirmed Improvements — but are surfaced here so the reviewer sees what the second-round analysis added on top of the first round.\n\nIf no gaps were captured, write: \"The second round did not surface any net-new confirmed improvements.\"\n\n## Edge Cases\n\n- If the evaluation contains zero items in Needs Scrutiny, write: \"No items flagged for scrutiny. All reviewed suggestions were either confirmed or remain open questions.\"\n- If the evaluation contains zero items in Open Questions, write: \"No open questions identified. All ambiguities were resolved through codebase analysis.\"\n- If both Needs Scrutiny and Open Questions are empty, include only the Confirmed Improvements section and add a summary: \"All suggestions from the review were confirmed as grounded improvements. No decision trees are needed.\"\n- If both source documents are absent, still write the combined file with the standard top-level sections present but no emitted E-items rather than failing.\n\n## Example of a Well-Written E-Item (Weak Concurrence — Confirmed Improvement)\n\n### E-2: Caching of analysis-type lookups\n\n**Source**: Clarifying Q4 (prior round, weak concurrence)\n\n**Round Agreement**: both rounds agree (weak concurrence) — the second round did not comment on this item; brief validation confirms the answer is grounded.\n\n**Assessment**: The prior round suggested caching `ANALYSIS_TYPES` lookups in a module-level variable to avoid repeated DB round trips. The codebase already does this at `src/python/learn_repository/__init__.py:14`, so the suggestion is grounded and the second round's silence is consistent with tacit agreement.\n\n**Codebase Evidence**:\n- `src/python/learn_repository/__init__.py:14` — module-level constant pattern is the established convention\n\n(Confirmed Improvements compact bullet form: **E-2: Caching of analysis-type lookups** — Source: Clarifying Q4 (prior round, weak concurrence); Round Agreement: both rounds agree (weak concurrence); Confidence: High. Confirm the existing module-level cache and add a short comment naming the pattern.)\n\n## Example of a Well-Written E-Item (Strong Concurrence — Confirmed Improvement)\n\n### E-4: Sequential per-type review_repository fan-out\n\n**Source**: Clarifying Technical Q2 (prior round, concurrence inline)\n\n**Round Agreement**: both rounds agree (strong concurrence) — the second round explicitly reinforced the prior recommendation, citing per-type lock release simplicity as the deciding factor.\n\n**Assessment**: The prior round recommended sequential per-type execution; the second-opinion blockquote reinforced this, noting that the per-type lock release contract becomes trivial under sequential execution. `review_repository` already uses internal `asyncio.gather` for chunk-level concurrency, so wrapping it in another concurrency layer would not buy throughput and would complicate the abort/finally cleanup contract.\n\n**Codebase Evidence**:\n- `src/python/learn_repository/review_repository.py:369-387` — review_repository internally gathers chunks with return_exceptions=True\n\n## Example of a Well-Written E-Item (Rounds Disagree — Needs Scrutiny with full clarity fields)\n\n### E-5: Authentication middleware placement for new endpoint\n\n**Source**: Clarifying Q2 (prior round, disagreement inline)\n\n**Round Agreement**: rounds disagree (disagreement) — the prior round recommended adding auth at the router level; the second-opinion blockquote argued the existing middleware stack already covers it.\n\n**Confidence**: High\n\n**Resolution path**: resolve at your desk\n\n**Decision tree**:\n- If the global middleware stack already enforces auth on `/api/*` routes, then drop the explicit `Depends(require_api_key)` from the new endpoint. See `main.py:45-52`.\n- If routers each opt in to auth via dependencies, then add `Depends(require_api_key)` to the new endpoint. See `api/routes/__init__.py:18-30`.\n- If only certain `/api/*` sub-paths need auth, then carve out a sub-router with its own dependency. See `api/routes/__init__.py:18-30`.\n\n**Recommendation Index**: 1\n\n**Recommendation**: The existing routers each opt in to auth, so the new endpoint must do the same. Adding `Depends(require_api_key)` is the smallest correct change.\n\n**Original question**: Should the new `/api/exports` endpoint declare an explicit auth dependency, or is it covered by the global middleware?\n\n**Option consequences**:\n- Endpoint becomes publicly reachable; protected data leaks via the new path.\n- Endpoint requires a valid API key, matching every other `/api/*` route.\n- Adds a parallel router; doubles the auth surface that has to be kept consistent.\n\n**Why it matters**: Authentication on `/api/exports` directly determines whether protected data leaks; the wrong default is a security regression, not a stylistic choice.\n\n**Recommendation explanation**: The codebase pattern in `api/routes/__init__.py:18-30` shows each router declaring its own `Depends(require_api_key)`. Following that convention adds two lines, keeps auth uniform across endpoints, and avoids a parallel sub-router that future maintainers would have to keep in sync.\n\n**Assessment**: The prior round suggested that the new `/api/exports` endpoint needs an explicit `Depends(require_api_key)` guard because it is not covered by the global middleware. The second opinion disagreed, claiming the middleware stack in `main.py` handles authentication for all `/api/*` routes. Codebase analysis shows that `main.py:45-52` applies rate limiting globally but authentication is applied per-router in `api/routes/__init__.py:18-30` — each router must opt in via `Depends(require_api_key)`. This supports the prior round's position: the new endpoint needs an explicit auth dependency.\n\n**Codebase Evidence**:\n- `main.py:45-52` — global middleware applies rate limiting and CORS, but not authentication\n- `api/routes/__init__.py:18-30` — each router includes its own auth dependency; there is no catch-all auth middleware\n\n## Example of a Well-Written E-Item (Gap Captured — Confirmed Improvement)\n\n### E-7: Missing Alembic migration for new role-scope column\n\n**Source**: Critique: Requested Change N+1 (new in second opinion → New Requested Changes)\n\n**Round Agreement**: gap captured — the second opinion surfaced a missing migration that the prior round did not raise, and recommended adding an Alembic revision.\n\n**Assessment**: The ticket introduces a new `role_scope` column on the `users` table but does not mention a migration. The second opinion flagged this gap and recommended adding an Alembic revision; both the gap and the recommendation are grounded, since `db/alembic/versions/` is the established location for schema changes per the project's database guide.\n\n**Codebase Evidence**:\n- `db/alembic/versions/` — all schema changes land here as autogenerated revisions\n\n## Save rule\n\nSave the combined review-and-resolution document to `{docs_dir}/review/{ticket_key}-review-and-resolution.md`. Output only the combined review-and-resolution document — no meta-commentary.\n\n## Return\n\nConfirm \"Review-and-resolution document written to `{docs_dir}/review/{ticket_key}-review-and-resolution.md`.\" and report the total count of E-items captured.\n",
|
|
657
629
|
"execute-epic-research.md": "Execute the research plan and write findings.\n\n## Instructions\n\n1. Read the research plan from `{docs_dir}/epic-plans/{epic_slug}/research-plan.md`.\n\n2. Execute the plan based on the Research Mode:\n\n **If mode is `deep`**:\n - Call the `request_deep_research` MCP tool with:\n - `query`: the Deep Research Query from the plan\n - `context`: \"Bridge API is a Python/FastAPI application with PostgreSQL, LiteLLM, and Pinecone. This research supports epic planning for: {epic_description}\"\n - `wait_for_result`: true\n - `save_locally`: true\n - If deep research fails, log a warning and fall back to web searches using the Web Search Topics from the plan. Do NOT halt.\n\n **If mode is `web`**:\n - Perform web searches for each topic listed in the plan.\n - Capture relevant findings from each search.\n\n **If mode is `none`**:\n - Write a brief note: \"No external research needed. Proceeding with codebase exploration.\"\n\n3. Write all findings to `{docs_dir}/epic-plans/{epic_slug}/research-findings.md` with this structure:\n\n```markdown\n# Research Findings\n\n## Mode\n{deep | web | none}\n\n## Findings\n{Synthesized research results organized by topic. Include source references where applicable.}\n\n## Key Takeaways\n{Bullet points summarizing the most important findings that will inform the codebase exploration and epic decomposition.}\n```\n\n## Return\n\nConfirm research findings were written to `{docs_dir}/epic-plans/{epic_slug}/research-findings.md` and report the mode used (`deep`, `web`, or `none`) plus a one-line summary of the key takeaways.\n",
|
|
658
|
-
"execute-plan-sectioned.md": "Execute the AI-generated implementation plan for ticket {ticket_key} using the\nClaude Code tiered section executor (BAPI-346).\n\nThis executor is **gate-first and fail-open**: it only uses tiered sub-agent\ndispatch when every eligibility gate passes AND a measurement GO marker proves it\npays off. In every other case it degrades — with a visible Warn notice — to the\nexact inline executor behavior used today, which is risk-free. Tiered execution\nbeing unavailable must NEVER fail the `implement-ticket` pipeline.\n\nSections run **strictly sequentially**. There is no parallel scheduling, no\nworktree isolation, and no merge logic.\n\nFor this run, `auto_approve` = `{auto_approve}`. When `auto_approve` is the\nliteral `true`, do not pause for confirmation at any optional gate (e.g.\nverification-command preview); apply the documented defaults and proceed.\n\n---\n\n## Stage 0 — Measurement Spike Gate and Runtime Eligibility\n\nDo all of the following before deciding how to execute:\n\n1. Read the plan from `{docs_dir}/plans/{ticket_key}-plan.md`.\n2. Read the measurement GO marker from\n `{docs_dir}/tiered-section-executor/measurement-go.json` if present.\n3. Call `get_config_field` with `field_name` set to `\"tiered_execution\"`.\n\nContinue to **tiered execution** only when ALL of these are true:\n\n- `tiered_execution` is `claude_code_only` or `all_capable`.\n- The host is **Claude Code** and the Task tool supports a per-call `model`\n override.\n- The BAPI-345 `contract_version` is understood (the recipe envelope advertised\n a `contract_version` this asset recognizes).\n- A non-empty `sections[]` graph is present from the BAPI-345 contract\n payload/context (delivered on the plan-generation response — it is never\n embedded in the recipe envelope).\n- The measurement marker exists, is valid JSON describing an object with the\n required keys, and has `decision` equal to `\"GO\"`.\n- The marker's `token_reduction_ratio` is greater than or equal to `0.30`.\n\n`all_capable` behaves **like `claude_code_only` only for Claude Code**. On any\nother host, `all_capable` falls back to inline-default.\n\nIf ANY gate fails — including a missing, malformed, or non-GO marker, a\n`token_reduction_ratio` below `0.30`, a missing/invalid section graph, or a host\nthat is not Claude Code — print a **visible Warn notice naming the specific gate\nthat failed** and execute the **Fallback Inline Executor** below. Do not fail the\npipeline.\n\n---\n\n## Fallback Inline Executor (the safe default)\n\nThis preserves today's behavior exactly. Use it whenever Stage 0 degrades.\n\n1. Read the plan from `{docs_dir}/plans/{ticket_key}-plan.md` and count the steps.\n Announce: **\"Plan contains N steps.\"**\n2. For each step, in strict order:\n - Announce: **\"Step X of N: <step title from plan>\"**\n - Execute the step, making code changes as directed.\n - Confirm: **\"Step X complete — <brief summary>.\"**\n3. Rules:\n - Execute steps in strict sequential order. Do not skip, reorder, or combine\n steps.\n - Run only the safe verification explicitly specified by the plan's review\n steps.\n - Do NOT run `git commit` or `git push` — leave all changes uncommitted for\n developer review.\n - If a step is ambiguous or blocked, note the issue clearly (what is\n ambiguous and why) and continue with the next step.\n4. Final audit: re-read the plan, verify every step was addressed, and list any\n skipped/partial steps with reasons.\n\nFallback mode is **risk-free** and must not fail the pipeline merely because\ntiered execution was unavailable.\n\n---\n\n## Stage 1 — Section Graph Parsing and Topological Ordering\n\nWhen Stage 0 permits tiered execution:\n\n- Treat the BAPI-345 `sections[]` graph as **authoritative**. Do NOT infer\n tier, risk, dependencies, or order from the markdown prose.\n- Topologically sort sections by their `depends_on` edges.\n- Enforce **strict sequential** execution of the sorted order. No parallel\n scheduling, no worktree isolation, no merge logic.\n- If the graph has a cycle, a missing dependency reference, a duplicate section\n `id`, or a malformed section object, print a Warn notice and **degrade to the\n Fallback Inline Executor**.\n\n---\n\n## Stage 2 — Tier and Mode Resolution\n\nThe tier-to-model mapping is **asset-local** (defined here, not read from config):\n\n- `cheap` maps to model `haiku`\n- `basic` maps to model `sonnet`\n- `premium` maps to model `opus`\n\nExecution modes:\n\n- `sub_agent` — dispatch the section to a Claude Code Task sub-agent at the\n resolved model tier.\n- `inline_tiered` — run the section in-thread at the resolved model tier (no\n sub-agent dispatch).\n- `inline_default` — run the section in-thread at coordinator tier (the\n fallback/degraded mode).\n\nSelection rules for each section:\n\n- Select `sub_agent` ONLY when `subagent_eligible` is `true`, the Mechanical Risk\n Floor permits it, AND the Budget Guard permits it.\n- Select `inline_tiered` when sub-agent dispatch is not allowed but the host can\n still run the section at the resolved model tier.\n- Select `inline_default` for all fallback/degraded cases.\n\nWhenever the **mode actually run** is lower / less-tiered than the **mode\nintended**, emit a **visible Warn notice** with the reason.\n\n---\n\n## Stage 3 — Mechanical Risk Floor\n\nTrust the BAPI-345-derived `subagent_eligible` / `requires_review` flags, but\n**re-enforce the invariants locally** (defense in depth):\n\n- If `risk_level` is not `\"low\"`, OR `requires_review` is `true`, the section is\n **never** dispatched to a cheap/basic unsupervised worker. Run it at\n coordinator tier and require the Stage 7 review gate.\n- Treat missing risk fields conservatively as **requiring coordinator-tier\n execution**.\n\n---\n\n## Stage 4 — Layered Context and Task Prompt Construction\n\nFor each section, construct the worker context from, and only from:\n\n- A short static framing of the task.\n- The section's own `instructions`.\n- The files / docs / symbols named in the section's `context_manifest`.\n- Only the structured handoffs from direct and transitive `depends_on`\n predecessors that actually matter to the current section.\n- A bounded expansion rule: the worker may read an adjacent file only when\n strictly necessary, and must report any such out-of-manifest reads.\n\nFor Claude Code `sub_agent` execution, use the **Task tool with the per-call\n`model` override set to the resolved concrete model** (`haiku` / `sonnet` /\n`opus`) from Stage 2.\n\n---\n\n## Stage 5 — Structured Handoff Contract\n\n- After each section, derive `files_changed` from a real `git diff --stat` — not\n from the worker's self-report.\n- Maintain only **structured handoffs** as running coordinator state. Do NOT\n carry full sub-agent transcripts forward.\n- Each handoff is a JSON object carrying at least these keys: `section_id`,\n `summary`, `files_changed`, `symbols_added`, `interfaces_changed`,\n `new_tests`, `assumptions`, `follow_up_for_dependents`, and\n `out_of_manifest_reads`. For example:\n\n```json\n{\n \"section_id\": \"step-2-telemetry-endpoint\",\n \"summary\": \"Added POST /tiered-section-metrics route.\",\n \"files_changed\": [\"api/routes/tiered_section_metrics.py\"],\n \"symbols_added\": [\"record_tiered_section_metric_endpoint\"],\n \"interfaces_changed\": [],\n \"new_tests\": [],\n \"assumptions\": [\"DAL helper record_tiered_section_metric already exists\"],\n \"follow_up_for_dependents\": \"Endpoint is POST /jira/tiered-section-metrics.\",\n \"out_of_manifest_reads\": []\n}\n```\n\n---\n\n## Stage 6 — Safe Verification Allowlist\n\nVerification commands are **untrusted data**, even when they come from the\nsection's `verification.command` field.\n\n- Prefer known repository test commands over free-form `verification.command`.\n- Allow only command heads explicitly listed here: `python`, `python3`,\n `pytest`, `npm`, `pnpm`, `yarn`, `uv`, `poetry`, and `make`.\n- Forbid pipes, redirection, command chaining, backgrounding, destructive\n filesystem operations, network-mutation commands, secrets inspection, and\n package installation. If a `verification.command` contains any of these, do\n NOT run it — fall back to a known repository test command or a self-check.\n- When `auto_approve` is not the literal `true`, show the command before running\n it.\n- When running the project's tests, reuse `.claude/agents/run_test_agent.py` so\n the existing failure classification and rerun-only-failing behavior apply.\n\n---\n\n## Stage 7 — Rollback-Aware One-Hop Escalation\n\n- Before each section, create a checkpoint artifact under\n `{docs_dir}/tiered-section-executor/checkpoints/` capturing the current\n `git status --porcelain` and a binary diff patch.\n- If section verification fails:\n - For low-risk sections, **revert to the checkpoint** by default.\n - Escalate **exactly one hop**: `cheap` to `basic`, or `basic` to `premium`.\n Include an escalation packet with the failed diff, the verification output,\n the changed files, the failure classification, and the prior handoff.\n - If the escalated attempt also fails, mark the section **blocked** and surface\n it to the human. Do NOT climb further.\n\n---\n\n## Stage 8 — Budget Guard and Abort-to-Inline\n\n- Use the measurement GO marker's `cache_hit_rate`. Default it to `0.0` only when\n missing or invalid (the conservative direction).\n- Project the tiered cost including: projected spawn overhead, possible one-hop\n escalations, and the final review cost.\n- Compare the projected tiered cost to the **single-premium-agent baseline** from\n the GO marker (`single_agent_baseline`).\n- If projected cost **exceeds** the baseline, **abort remaining tiering** and run\n all remaining sections `inline_default`. Print a **visible Warn notice** naming\n the degradation reason.\n\n---\n\n## Stage 9 — Mandatory Higher-Tier Diff Review Gate\n\n- Require a higher-tier review of section outputs according to the section's risk\n and tier.\n- Require a **final premium whole-diff review** whenever anything below\n coordinator tier touched code.\n- Skip the final premium review **only** when the entire run stayed\n `inline_default` at coordinator tier.\n- Review findings must be fixed. Allow **exactly one** final\n review → fixer → reverify pass.\n\n---\n\n## Stage 10 — Telemetry Emission\n\nAfter **every** section attempt, call the MCP tool `record_tiered_section_metric`\nwith: `ticket_number`, `section_id`, `tier_assigned`, `mode_run`, and a `metrics`\nobject. Populate `metrics` with at least: `contract_version`, `mode_intended`,\n`model_resolved`, `activity`, `risk_level`, `risk_categories`, `isolation`,\n`subagent_eligible`, `requires_review`, `wall_clock_ms`, `tokens`, `cache`,\n`verification`, `escalation_count`, `budget_snapshot`, `files_changed`,\n`handoff`, and `degraded_reason`.\n\nIf telemetry recording fails, **Warn and continue** — never fail the\nimplementation over a telemetry write.\n\n---\n\n## Final Rules (both paths)\n\n- Do NOT run `git commit` or `git push`. Leave all changes uncommitted for\n developer review.\n- After all sections/steps are handled, re-read the plan and verify every step\n was addressed; list any blocked/partial sections with reasons.\n\n## Return\n\nSummarize for the developer: the ticket key; sections completed; sections\nblocked; the modes actually run; escalations used; whether the final premium\nreview ran; whether execution degraded to inline-default and why; and an explicit\nreminder that no commit or push was performed. If execution ran the Fallback\nInline Executor, say so and name the gate that triggered the fallback.\n",
|
|
659
630
|
"execute-plan.md": "Execute the AI-generated implementation plan for ticket {ticket_key}.\n\n---\n\n## Step 1 — Load and Enumerate the Plan\n\n1. Read the plan from `{docs_dir}/plans/{ticket_key}-plan.md`.\n2. Count the total number of implementation steps in the plan.\n3. Announce: **\"Plan contains N steps.\"**\n\n## Step 2 — Execute Each Step Sequentially\n\nFor each step in the plan:\n\n1. **Announce** before starting: **\"Step X of N: <step title from plan>\"**\n2. **Execute** the step, making code changes as directed.\n3. **Confirm** after completing: **\"Step X complete — <brief summary of what was done>.\"**\n\n### Rules\n\n- Execute steps in strict sequential order. Do not skip, reorder, or combine steps.\n- Run any tests or checks specified in the plan's review steps.\n- Do NOT run `git commit` or `git push` — leave all changes uncommitted for developer review.\n- If a step is ambiguous or blocked, note the issue clearly (what is ambiguous and why) and continue with the next step.\n\n## Step 3 — Final Audit\n\nAfter all steps are executed:\n\n1. Re-read the plan file at `{docs_dir}/plans/{ticket_key}-plan.md`.\n2. Compare the plan against the work completed. Verify every step was addressed.\n3. List any steps that were skipped or only partially completed, with reasons.\n4. Announce: **\"Audit complete — N of N steps fully addressed.\"** (or note discrepancies).\n\n## Return\n\nConfirm \"Audit complete — N of N steps fully addressed.\" (or list discrepancies — which steps were skipped/partial and why).\n",
|
|
660
631
|
"execute-research.md": "Execute the research plan and produce a consolidated research pack.\n\n## Inputs\n\n- Research plan: `{docs_dir}/idea-to-ticket/{slug}-{run_id}/research-plan.json`.\n- Run manifest: `{docs_dir}/idea-to-ticket/{slug}-{run_id}/run-manifest.json`.\n\n## Instructions\n\n1. Read `{docs_dir}/idea-to-ticket/{slug}-{run_id}/research-plan.json`. Execute only the tools listed in `selected_tools`. Do not invoke any tool that is not in that list.\n\n2. For each selected tool:\n - **codebase_search**: search the local working tree using the listed `codebase_search_topics`. Capture file paths, function names, and short excerpts as evidence.\n - **web_search**: run narrow, targeted searches for each item in `web_search_topics`. Capture the source URL and a short summary for each result.\n - **deep_research**: run the deep research query exactly once with the planned `deep_research_query`. Capture the consolidated answer plus the cited URLs.\n\n3. Tool failures must be recorded, not silently dropped:\n - If a tool returns an error, missing-credential message, or empty result, record the failure under `per_tool_failures` in the research pack and continue with the remaining tools.\n - A partial research pack is preferable to no research pack. Do not halt the pipeline because one tool failed.\n\n4. Write two artifacts to the run directory:\n - `{docs_dir}/idea-to-ticket/{slug}-{run_id}/research-pack.md` — a human-readable consolidated brief. It must include these sections:\n - **Evidence table** — a structured list of evidence rows: claim, source (file path / URL), and tool that produced it. Render as a bulleted list, not a markdown table (BAPI-320 hygiene).\n - **Codebase references** — file paths and function names worth citing in the ticket.\n - **External references** — only present when web/deep search ran; URL + short summary per item.\n - **Unresolved unknowns** — questions the research could not answer.\n - **Per-tool failures** — any tool that failed, with the failure reason.\n - `{docs_dir}/idea-to-ticket/{slug}-{run_id}/research-pack.json` — machine-readable counterpart with the same evidence rows, references, unresolved unknowns, and per-tool failures arrays.\n\n5. Mark research failures as warnings in `research-pack.json` so downstream steps can branch on them: include `partial: true` when any selected tool failed.\n\n## Return\n\nConfirm `research-pack.md` and `research-pack.json` were written, and list any tools that failed.\n",
|
|
661
632
|
"explore-epic-codebase.md": "Perform a holistic, epic-level codebase exploration.\n\n## Epic Description\n\n{epic_description}\n\n## Instructions\n\n1. Read the research findings from `{docs_dir}/epic-plans/{epic_slug}/research-findings.md` to establish context. If the file does not exist or is empty, proceed without it.\n\n2. Explore the codebase with a focus on breadth rather than depth. The goal is to build a \"lay of the land\" understanding for the entire epic, not to deeply analyze any single sub-task. Search by filename pattern, search file contents by text pattern, and read relevant files to find:\n - Files, modules, and directories relevant to the epic\n - Architectural patterns used in similar features\n - Integration points and dependencies between modules\n - Existing conventions for the type of work this epic involves\n - Database models, API routes, agent flows, and utilities that may be affected\n\n3. Build a mental model of:\n - What exists today that relates to the epic\n - What patterns and conventions are used in similar features\n - What dependencies, data flows, and integration points are involved\n - What areas of the codebase will likely need changes\n\n4. Write the exploration findings to `{docs_dir}/epic-plans/{epic_slug}/codebase-exploration.md` with this structure:\n\n```markdown\n# Codebase Exploration\n\n## Architecture Overview\n{High-level description of how the relevant parts of the codebase are structured.}\n\n## Relevant Code Areas\n{List of key files, modules, and directories with brief descriptions of their relevance to the epic.}\n\n## Existing Patterns\n{Patterns and conventions discovered that should be followed when implementing the epic.}\n\n## Integration Points\n{Dependencies, data flows, and integration points that the epic will need to account for.}\n\n## Potential Challenges\n{Any architectural constraints, technical debt, or complexity that could affect implementation.}\n```\n\n## Return\n\nConfirm the codebase exploration was written to `{docs_dir}/epic-plans/{epic_slug}/codebase-exploration.md` and return a concise summary of the discovered codebase areas, naming the key files and patterns relevant to the epic.\n",
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
// AUTO-GENERATED — do not edit manually. Regenerate with: npm run build
|
|
2
|
+
// This file is produced by scripts/bundle-readme.js
|
|
3
|
+
export const README = "# @bridge_gpt/mcp-server\n\nMCP server for [Bridge API](https://bridgegpt-api.com) — exposes Jira integration endpoints as MCP tools for AI coding agents. Works with Claude Code, VS Code/Copilot, Cursor, Windsurf, and OpenAI Codex.\n\n> **New here?** Jump to [Usage Documentation](#usage-documentation) for what you can actually do with Bridge, grouped by how often you'll reach for it.\n\n## Getting Started\n\n### 1. Install the Package\n\nFrom your **project root**, install the MCP server and scaffold slash commands:\n\n```bash\nnpm i @bridge_gpt/mcp-server\nnpx -y @bridge_gpt/mcp-server --init\n```\n\n`--init` must be run from the directory containing your `package.json`. It:\n\n- Creates slash commands in `.claude/commands/` and `.cursor/commands/`\n- Detects existing MCP config files and sets `BAPI_PROJECT_ROOT` so local file output resolves correctly\n- Scaffolds `.bridge/pipelines/` for custom pipeline authoring\n\nRe-run `--init` after upgrading the package to get updated commands.\n\n### 2. Generate an API Key\n\n1. Log in to [Bridge API](https://bridgegpt-api.com) and navigate to your project's **Security** page\n2. Click **Create New Key**\n3. Enter your email, an optional label (e.g., \"MCP Server\"), and select the **Admin** role\n4. Click **Create Key**\n5. **Copy the key immediately** — it will not be shown again\n\n### 3. Configure the MCP Server\n\nAdd the following to your editor's MCP configuration file, pasting in the API key from step 2:\n\n<details>\n<summary><strong>Claude Code (.mcp.json)</strong></summary>\n\nThe `--init` command (step 1) detects Claude Code and creates a `.mcp.json` at your project root with placeholder values. Open it and replace `your-repo` and `your-api-key` with your actual values from step 2:\n\n```json\n{\n \"mcpServers\": {\n \"bridge-api\": {\n \"command\": \"npx\",\n \"args\": [\"-y\", \"@bridge_gpt/mcp-server\"],\n \"env\": {\n \"BAPI_BASE_URL\": \"https://bridgegpt-api.com\",\n \"BAPI_REPO_NAME\": \"your-repo\",\n \"BAPI_API_KEY\": \"your-api-key\",\n \"BAPI_DOCS_DIR\": \"docs/tmp\"\n }\n }\n }\n}\n```\n</details>\n\n<details>\n<summary><strong>VS Code / Copilot (.vscode/mcp.json)</strong></summary>\n\n```json\n{\n \"servers\": {\n \"bridge-api\": {\n \"command\": \"npx\",\n \"args\": [\"-y\", \"@bridge_gpt/mcp-server\"],\n \"env\": {\n \"BAPI_BASE_URL\": \"https://bridgegpt-api.com\",\n \"BAPI_REPO_NAME\": \"your-repo\",\n \"BAPI_API_KEY\": \"your-api-key\",\n \"BAPI_DOCS_DIR\": \"docs/tmp\"\n }\n }\n }\n}\n```\n</details>\n\n<details>\n<summary><strong>Cursor (.cursor/mcp.json)</strong></summary>\n\n```json\n{\n \"mcpServers\": {\n \"bridge-api\": {\n \"command\": \"npx\",\n \"args\": [\"-y\", \"@bridge_gpt/mcp-server\"],\n \"env\": {\n \"BAPI_BASE_URL\": \"https://bridgegpt-api.com\",\n \"BAPI_REPO_NAME\": \"your-repo\",\n \"BAPI_API_KEY\": \"your-api-key\",\n \"BAPI_DOCS_DIR\": \"docs/tmp\"\n }\n }\n }\n}\n```\n\n> If project-local config is not supported in your Cursor version, use `~/.cursor/config/mcp.json` instead.\n</details>\n\n<details>\n<summary><strong>Windsurf (~/.codeium/windsurf/mcp_config.json)</strong></summary>\n\nWindsurf only supports global MCP configuration.\n\n```json\n{\n \"mcpServers\": {\n \"bridge-api\": {\n \"command\": \"npx\",\n \"args\": [\"-y\", \"@bridge_gpt/mcp-server\"],\n \"env\": {\n \"BAPI_BASE_URL\": \"https://bridgegpt-api.com\",\n \"BAPI_REPO_NAME\": \"your-repo\",\n \"BAPI_API_KEY\": \"your-api-key\",\n \"BAPI_DOCS_DIR\": \"docs/tmp\"\n }\n }\n }\n}\n```\n</details>\n\n<details>\n<summary><strong>OpenAI Codex (~/.codex/config.toml)</strong></summary>\n\n```toml\n[mcp_servers.bridge-api]\ncommand = \"npx\"\nargs = [\"-y\", \"@bridge_gpt/mcp-server\"]\n\n[mcp_servers.bridge-api.env]\nBAPI_BASE_URL = \"https://bridgegpt-api.com\"\nBAPI_REPO_NAME = \"your-repo\"\nBAPI_API_KEY = \"your-api-key\"\nBAPI_DOCS_DIR = \"docs/tmp\"\n```\n\n> Codex users: set `BAPI_PROJECT_ROOT` manually in your config (see Environment Variables below).\n</details>\n\nAfter saving the config, restart your editor or reload the MCP server connection. Verify connectivity by asking your AI assistant to call the `ping` tool.\n\n### 4. First-Time Setup: Teach Bridge Your Codebase\n\nIf you're the first person to install Bridge API on your project, run the `/learn-repository` slash command after completing setup. This analyzes your codebase's architecture, testing patterns, code review standards, and documentation conventions, then uploads the findings to Bridge API. This gives Bridge the context it needs to generate implementation plans, ticket critiques, and code reviews that are consistent with your project's actual architecture and conventions.\n\nYou only need to do this once per project — the learned standards persist for all team members.\n\n### Upgrading\n\nTo upgrade to the latest version and refresh all scaffolded artifacts in one step:\n\n```bash\nnpx -y @bridge_gpt/mcp-server --upgrade\n```\n\nThis runs `npm i @bridge_gpt/mcp-server@latest`, prints a before/after version summary, then re-runs the full `--init` scaffolding flow to update your slash commands, agents, and pipeline definitions.\n\nThe MCP server also checks for updates automatically on startup. If a newer version is available, you'll see a notice in your editor's MCP output logs with the upgrade command to run. This check is cached for 24 hours and never blocks server startup.\n\n## Usage Documentation\n\nThis is the Bridge API tooling worth knowing about as a software engineer — the things you'd ask an agent to do — grouped by how often you would use them. Each entry covers **what it does**, **when it's useful** and **how to use it**. The behind-the-scenes plumbing is summarized at the end under [Extra Capabilities](#extra-capabilities), and a full enumeration lives in [Reference](#reference).\n\nFor invocation, prefer the slash command — it's deterministic. A free-text example is shown only where natural-language phrasing reliably maps to the right automation; high-consequence or easily-misread automations show only the slash command on purpose.\n\n### Tier 1 — Regularly useful\n\nThese features are useful for most tickets.\n\n**1. Review Ticket**\n- **What it does:** Runs a two-round quality review of a ticket (clarifying questions + critique, plus an alternate-model second opinion) and produces a decision page to accept/reject findings.\n- **When it's useful:** (Refinement) Right after a ticket is drafted, before anyone starts building — to surface gaps and tighten it.\n- **How to use it:** `/review-ticket BAPI-123` (command only — \"review\" as free text is easily mistaken for a freehand agent review).\n- **Flags:** `--auto` auto-accept findings / skip the approval gates.\n\n**2. Start Tickets**\n- **What it does:** Creates one git worktree per ticket and spawns an agent session in each to implement them in parallel.\n- **When it's useful:** (Implementation | Automation) When you're ready to start building one or more refined tickets concurrently.\n- **How to use it:** `/start-tickets BAPI-248 BAPI-250` (see [CLI Subcommands](#cli-subcommands) for the full flag table and cross-platform behavior).\n- **Flags:** `--auto` skip the approval gates · `--base-branch <branch>` branch off something other than the default.\n\n**3. Brainstorm**\n- **What it does:** Fans your problem out to two different LLMs and synthesizes their approaches into one set of options. Runs in one of two modes: a **standard** brainstorm (implementation/architecture approaches) or a **design** brainstorm (UI/UX and visual direction).\n- **When it's useful:** (Architecture | Refinement) Early, when you want a spread of approaches — standard for *how to build it*, design for *how it should look*.\n- **How to use it:** ask your agent to brainstorm — *\"Brainstorm approaches for adding rate limiting to the LLM client; fan it out to multiple models and synthesize.\"* For a design pass: *\"Run a design brainstorm for the evidence-freshness dashboard UI.\"*\n\n**4. Deep Research**\n- **What it does:** Runs multi-source, fact-checked web research on a technical topic and returns a cited report.\n- **When it's useful:** (Architecture | Refinement) When a decision hinges on outside knowledge (libraries, best practices, standards) you don't already have.\n- **How to use it:** `/bridge-research <question>`\n\n**5. Jira Ticket Writer**\n- **What it does:** An agent that drafts a well-structured Jira ticket from a plain description, applying your project's standards.\n- **When it's useful:** (Refinement) When you have an idea in your head and want a properly-formatted ticket draft without writing it by hand.\n- **How to use it:** `/write-ticket <description>` (or ask your agent) — *\"Use the jira ticket writer to turn our conversation into a ticket.\"*\n- **Flags:** `--standards <path>` apply a specific standards file when drafting.\n\n**6. Upload Ticket**\n- **What it does:** Pushes a drafted ticket up to Jira as a real issue (the `create_ticket` capability); handles markdown and child tickets under an Epic.\n- **When it's useful:** (Refinement) The final step after drafting — to get the ticket into Jira so it can be tracked and worked.\n- **How to use it:** Ask your agent to create the ticket, and it should confirm before creating the live Jira issue.\n- **Options:** Describe the issue type (Bug / Story / Task / Epic) and, for a child under an Epic, the parent key.\n\n### Tier 2 — Occasionally useful\n\nThese features are good to know, but you probably won't use them every day.\n\n**1. Plan Ticket**\n- **What it does:** Generates a step-by-step implementation plan for a ticket, with references to real code files, and saves it locally.\n- **When it's useful:** (Refinement | Implementation) Once a ticket is solid and you want a concrete build plan before (or instead of) auto-implementing.\n- **How to use it:** `/plan-ticket BAPI-123`\n- **Flags:** `--provider <name>` choose the model provider · `--second-opinion <provider>` cross-check the plan with a second provider.\n\n**2. Clarify Ticket**\n- **What it does:** Generates clarifying questions for a ticket (or debugging guidance for bugs) and saves them locally.\n- **When it's useful:** (Refinement) When a ticket feels under-specified and you want the open questions made explicit.\n- **How to use it:** `/clarify-ticket BAPI-123` — *\"Generate clarifying questions for BAPI-123\"*\n- **Flags:** `--provider <name>` choose the model provider · `--second-opinion <provider>` cross-check with a second provider.\n\n**3. Critique Ticket**\n- **What it does:** Critiques a ticket's quality against your project standards and lists deviations + improvements.\n- **When it's useful:** (Refinement) When you want a quality gate on a ticket before it's worked.\n- **How to use it:** `/critique-ticket BAPI-123` — *\"Critique BAPI-123 against our project standards and list what's missing or deviating.\"*\n- **Flags:** `--provider <name>` choose the model provider · `--second-opinion <provider>` cross-check with a second provider.\n\n**4. Explore Ticket**\n- **What it does:** Explores the codebase for a task and recommends implementation options or surfaces clarifying questions, with optional research.\n- **When it's useful:** (Architecture | Refinement) Before writing a ticket or plan, when you're unsure how a change would fit the existing code.\n- **How to use it:** `/explore-ticket <task>` — *\"Explore the codebase for how we'd add a Mistral LLM provider and recommend 2–3 implementation options.\"*\n\n**5. Second Opinion**\n- **What it does:** Gets an immediate critique of any text from a different model family — no artifact saved, just the reply.\n- **When it's useful:** (Architecture | Refinement | Implementation) Any time you want a quick sanity check on a plan, draft, or decision from a fresh perspective.\n- **How to use it:** ask your agent — *\"Get a second opinion from Gemini on whether the BAPI-123 plan's migration step is safe to run against prod.\"*\n- **Options:** pick the provider (anthropic / openai / gemini) and the tier (cheap / basic / premium).\n\n**6. Generate Image**\n- **What it does:** Generates an image from a text prompt using a provider image model (OpenAI `gpt-image-2` by default, or Google Imagen) and returns the image directly. Spends provider credits on every call.\n- **When it's useful:** (Architecture | Refinement) When you want a quick visual — a UI mockup, diagram, or illustration — to anchor a design discussion or attach to a ticket.\n- **How to use it:** ask your agent — *\"Generate an image of a dashboard showing SOC2 evidence freshness as a traffic-light grid.\"*\n- **Options:** `provider` openai (`gpt-image-2`) / gemini (Imagen — adds an invisible SynthID watermark) · `quality` low (default, cheapest) / medium / high · `size` 1024x1024 / 1024x1536 / 1536x1024. The image is always saved to `BAPI_DOCS_DIR/images/` and also returned inline.\n\n**7. Implement Ticket**\n- **What it does:** Full build for one ticket: generate a plan, write the code, commit, open a PR, and monitor CI.\n- **When it's useful:** (Implementation) When a ticket is ready and you want it taken from plan to open PR in one go.\n- **How to use it:** `/implement-ticket BAPI-123` (command only — \"implement X\" as free text almost always triggers a freehand build instead of the Bridge plan→code→PR→CI pipeline).\n- **Flags:** `--auto` skip the approval gates (e.g. auto-commit/push).\n\n**8. Full Automation**\n- **What it does:** Drives the whole chain end-to-end: idea → ticket(s) → review each → spawn worktrees to implement.\n- **When it's useful:** (Automation) When you want to go from a raw idea to in-progress implementation with minimal hands-on steps.\n- **How to use it:** `/full-automation <idea>` (command only — creates tickets, spawns worktrees, and carries scheduling/`--max-children` flags that free text can't).\n- **Flags:** `--require-approval` toggle the approval gates, full automation runs end to end by default.\n\n**9. Idea to Ticket**\n- **What it does:** Turns a one-line idea into a Jira Task/Spike (or an Epic plus child tickets), with research, duplicate detection, and a critique pass built in.\n- **When it's useful:** (Refinement | Automation) When you have a rough idea and want a fully-formed, uploaded ticket without the manual draft-and-refine loop.\n- **How to use it:** `/idea-to-ticket <idea>`\n\n### Tier 3 — Now and then\n\nThese features are useful once in a while, but you probably won't need them everyday.\n\n**1. Reimplement Ticket**\n- **What it does:** Pulls in new context/attachments since the last pass and implements small follow-up changes on an already-built ticket.\n- **When it's useful:** (Implementation) After review feedback or new screenshots, when you need a targeted second pass rather than a fresh build.\n- **How to use it:** `/reimplement-ticket BAPI-123`\n\n**2. Run Tests**\n- **What it does:** Runs the unit and E2E suites and autonomously triages/fixes failures (via the test-correction agent).\n- **When it's useful:** (Implementation) After making changes, to confirm everything passes and auto-fix straightforward breakages.\n- **How to use it:** `/run-tests` (`--unit-only`, `--skip-e2e`)\n\n**3. Plan Epic**\n- **What it does:** Decomposes a large epic into sub-tasks with a structured exploration doc for each.\n- **When it's useful:** (Architecture | Refinement) When a feature is too big for one ticket and you need it broken down and scoped.\n- **How to use it:** `/plan-epic <epic>` — *\"Decompose the epic 'migrate PayPal token storage off Custom Objects' into sub-tasks with an exploration doc for each.\"*\n\n**4. Update Ticket**\n- **What it does:** Synthesizes a ticket's clarifying answers and critique into a rewritten description and pushes it to Jira.\n- **When it's useful:** (Refinement) After review, to fold the resolved questions and fixes back into the ticket itself.\n- **How to use it:** `/update-ticket BAPI-123` (command only — does a full overwrite of the live Jira description; \"update\" as free text is both vague and hard to reverse).\n\n**5. Get Ticket**\n- **What it does:** Retrieves the full details of a Jira ticket (summary, status, description, etc.).\n- **When it's useful:** (Refinement | Implementation) Any time you want the agent to read a ticket before acting on it.\n- **How to use it:** ask your agent — *\"Pull up BAPI-123 and show me its description, status, and acceptance criteria.\"*\n\n**6. Write Comment**\n- **What it does:** Posts a comment on a Jira ticket (markdown; long ones can attach as a file).\n- **When it's useful:** (Refinement | Implementation) To leave context, status, or a decision trail on the ticket.\n- **How to use it:** ask your agent — *\"Post a comment on BAPI-123: blocked on the expired Atlassian token — will retry after it's rotated.\"*\n\n**7. Download / Upload Attachment**\n- **What it does:** Pulls files off a Jira ticket to disk, or attaches a local file to a ticket.\n- **When it's useful:** (Refinement | Implementation) When a ticket has design files/logs you need locally, or you want to attach output back to it.\n- **How to use it:** ask your agent — *\"Download the design mockups attached to BAPI-123 into my docs folder.\"* / *\"Attach build-log.txt to BAPI-123.\"*\n\n**8. Learn Repository**\n- **What it does:** Researches and documents the repo's architecture, testing, review, and correctness standards, then saves them to Bridge for future agents.\n- **When it's useful:** (Setup/Learning) When onboarding a new repo, or after big changes, so Bridge's agents follow your conventions.\n- **How to use it:** `/learn-repository`\n\n**9. Teach Bridge**\n- **What it does:** Takes a plain-English instruction, figures out which standards field it belongs to, and merges it in (admin only).\n- **When it's useful:** (Setup/Learning) When you notice the agents missing a convention and want to correct it in one sentence.\n- **How to use it:** `/teach-bridge <teaching>` — *\"Teach Bridge: always use data-testid selectors in E2E tests.\"*\n\n### Operational commands\n\nWorkflow commands you'll reach for during implementation and CI, beyond the tiers above:\n\n| Command | What it does |\n|---|---|\n| `/code-ticket PROJ-123` | Download the implementation plan and questions, then execute the plan inline |\n| `/commit-ticket PROJ-123` | Stage, commit, and push changes; transition Jira status; post a smoke-test comment |\n| `/create-pr PROJ-123` | Commit staged changes and open a pull request |\n| `/check-ci [PROJ-123]` | Monitor CI checks for the current branch, triage failures, apply fixes, and report results |\n| `/parse-repository` | Queue a background job to index the repository for Bridge AI agents |\n| `/check-parse-status` | Check whether a background repository parse job is still running |\n| `/scan-tickets` | Sync recently-updated Jira tickets and backfill workflow timestamps |\n\n> Commands are designed for Claude Code. Other editors may support slash commands differently — check your editor's documentation for how to invoke prompt files.\n\n### Extra Capabilities\n\nBehind-the-scenes capabilities an agent gains from the MCP tools — mostly invoked automatically by the commands above, rarely requested by name:\n\n- **Ship a PR end-to-end:** commit & push, open a pull request, transition the Jira status, and discover/poll CI checks (powers `commit-ticket`, `create-pr`, `check-ci`, `implement-ticket`).\n- **Architecture plan** for a ticket (design-level guidance, separate from the implementation plan).\n- **Index the codebase** so Bridge's agents can reason about it: queue/parse the repo, check parse status, regenerate the directory map.\n- **Read & tune project config/standards:** list/read/update config fields, fetch project standards, and the per-topic `learn-*` commands that populate them.\n- **Ticket lifecycle bookkeeping:** track tickets and backfill workflow-state timestamps (`scan-tickets`), search across tickets, read comments, list attachments.\n- **Pipeline machinery:** list/inspect pipeline recipes and run/resume/list/delete pipeline runs (the engine under the orchestration commands).\n- **Decision page** generation for capturing human review decisions as structured data.\n- **Connectivity & identity checks:** ping Bridge, check your role, resolve the local docs directory.\n- **Retrieve any generated artifact** (`get_*` for plans, critiques, questions, brainstorms, research, architecture) without regenerating it.\n- **Tiered-section execution telemetry** recording (internal measurement).\n\n## CLI Subcommands\n\nBeyond `--init` / `--upgrade`, the package ships operational subcommands of the **single `bridge-api-mcp-server` bin** (not separate binaries) — so they travel with the package to every consumer. See [Usage Documentation → Start Tickets](#tier-1--regularly-useful) for *when* to use `start-tickets`; this section is the full CLI reference.\n\n### `start-tickets`\n\nSpawns one Worktrunk worktree + selected-agent session per Jira ticket and backs the `/start-tickets` slash command. The agent defaults to **Claude Code** (`claude`) and is configurable via `--agent`.\n\n```\nnpx -y @bridge_gpt/mcp-server start-tickets [flags] KEY [KEY ...]\n```\n\n| Flag | Default | Meaning |\n|---|---|---|\n| `--agent claude\\|cursor-agent` | `claude` | Agent command to launch in each worktree |\n| `--terminal terminal\\|iterm` | auto-detect via `$TERM_PROGRAM` | Override the macOS terminal app (honored on macOS only) |\n| `--dry-run` | off | Print intended actions; create no worktrees, open no tabs (any OS) |\n| `--branch KEY=BRANCH` | `feature/<KEY>` | Use a custom branch for that ticket (repeatable) |\n| `--base-branch <BRANCH>` | `main` | Cut new worktrees from `<BRANCH>` and refresh `origin/<BRANCH>` instead of `main` |\n| `--no-refresh-main` | off (the configured base branch is refreshed) | Skip refresh of the configured base branch (default `main`). Historical flag name preserved for backward compatibility — despite the name, it now skips refresh of whatever `--base-branch` resolves to. |\n| `--max-parallel N` | `3` | Max worktrees created concurrently |\n| `-h`, `--help` | — | Show usage |\n\nEach `KEY` must match `[A-Z]+-[0-9]+` (e.g., `BAPI-248`). The CLI creates/switches each worktree up front (throttled by `--max-parallel`), then opens one tab/session per successful worktree running the selected agent's `'/implement-ticket <KEY>'` — `claude '/implement-ticket <KEY>'` by default, or `cursor-agent '/implement-ticket <KEY>'` with `--agent cursor-agent`. The `/implement-ticket <KEY>` prompt is unchanged for both agents. To launch Cursor Agent instead of Claude Code:\n\n```\nnpx -y @bridge_gpt/mcp-server start-tickets --agent cursor-agent BAPI-248\n```\n\n**Difficulty-based model routing.** Before launching each agent, the CLI selects an implementation **model tier** from the ticket's `difficulty` (1-2 → cheap, 3-5 → basic, 6+ → premium) and injects it as a `--model` flag at the spawn boundary. The Python backend returns only the coarse tier (`GET /jira/tickets/{KEY}/model-tier`, computing + caching difficulty on demand); this CLI alone maps a tier to the agent-specific alias (`claude`: `haiku`/`sonnet`/`opus`; `cursor-agent`: version-suffixed strings validated against `cursor-agent --list-models`). It is gated per repo by `difficulty_model_routing_enabled` (default **ON**) with an optional `difficulty_model_tier_overrides` JSON map (tier → alias). Routing is **fail-open**: missing credentials, an evaluation failure/timeout, a backend `fallback`, an invalid/unavailable alias, an unadvertised Cursor model, or an agent without `--model` support all omit `--model` (the agent uses its default) and surface a per-ticket warning rather than failing the spawn. `--dry-run` does **not** fetch tiers or inject `--model`.\n\n**Cross-platform spawning.** The CLI routes spawning per platform; `--dry-run` previews the platform-correct command form on any OS. An unsupported `process.platform` (not `darwin`/`win32`/`linux`) fails fast with a clear \"unsupported platform\" message.\n\n- **macOS** — opens a Terminal.app or iTerm tab via `osascript`.\n- **Windows** — creates worktrees with **`git-wt`** (Worktrunk's winget alias) and opens a tab via **Windows Terminal (`wt.exe new-tab`)**, falling back to **`Start-Process powershell.exe`** when Windows Terminal is absent. Requires **Git for Windows / Git Bash** (Worktrunk runs its `pre-start` / `post-start` hooks via Git Bash). The Worktrunk binary (`git-wt`) and the tab launcher (`wt.exe`) are resolved independently and never conflated.\n- **Linux** — creates one detached **tmux** session per ticket (pane kept open after the agent exits); attach with `tmux attach -t <session>`. A missing `tmux` produces a clear, actionable error.\n\nPer-OS prerequisites: macOS `wt`, `git`, `osascript`; Windows `git-wt`, Git for Windows / Git Bash, Windows Terminal or PowerShell; Linux `wt`, `git`, `tmux`. Set `BAPI_WORKTRUNK_BIN` to override the Worktrunk executable name/path for nonstandard installs (`doctor` honors it too). The read-only `doctor` subcommand (below) additionally surfaces a missing `uv` — Worktrunk's `pre-start` hook runs `uv`, but live preflight does not check it — and the selected agent's command; run `doctor --agent cursor-agent` to also check `cursor-agent` (it prints `cursor-agent login` as an informational auth reminder).\n\n### `doctor`\n\nThe package also ships a strictly **read-only** `doctor` subcommand that diagnoses the `start-tickets` prerequisites for the current OS without changing anything:\n\n```\nnpx -y @bridge_gpt/mcp-server doctor [--agent <name>]\n```\n\nIt is **read-only**: it never installs anything, modifies your system, adds an npm `postinstall`, spawns a terminal, or starts the MCP server, and there is no `--fix`. For each prerequisite it prints found/missing and, when missing, the exact per-OS install command **as a manual instruction you run yourself**. The checked set is the `start-tickets` preflight prerequisites **plus `uv`** **plus the selected agent's command** (`claude` by default, or `cursor-agent` with `--agent cursor-agent`). The Worktrunk binary is probed via the resolved name (honoring `BAPI_WORKTRUNK_BIN`), not a hard-coded one. **Exit code:** `0` when all required prerequisites are present, non-zero when any is missing or the platform is unsupported. A failing `start-tickets` preflight now hints you to run `doctor` for an actionable diagnostics report.\n\n## Custom Pipelines\n\nYou can create your own pipelines by adding JSON files to `.bridge/pipelines/`. Running `--init` scaffolds this directory with a `README.md` and an example pipeline to get you started.\n\nThe easiest way to write a custom pipeline is to describe what you want to automate to your AI coding agent and have it draft the JSON for you. The schema is straightforward, and agents like Claude Code understand it well — just describe the steps you want, and the agent will produce a working pipeline file.\n\n**What you can build:**\n\n- Any sequence of Bridge MCP tool calls and free-form agent tasks\n- Parameterized workflows using variables (e.g., `{ticket_key}`)\n- Approval gates that pause for user confirmation before sensitive steps\n- Per-step error handling — halt immediately or warn and continue\n\n**Ideas for custom pipelines:**\n\n- A standup pipeline that fetches your open tickets and summarizes their status\n- A ticket triage pipeline that runs critiques on a batch of new tickets\n- A pre-merge checklist that runs tests, checks linting, and posts a summary comment\n\n**Step types:**\n\n| Type | What it does |\n|---|---|\n| `mcp_call` | Calls an MCP tool with the given params |\n| `agent_task` | Gives the AI a free-form instruction (inline or from a file in `.bridge/instructions/`) |\n\nVariables are declared in the `variables` array and referenced as `{variable_name}` in params and instructions. Each step supports `on_error: \"halt\"` (default) or `\"warn_and_continue\"`, and `requires_approval: true` to pause before execution.\n\n**System variables:**\n\nTwo variables are automatically available in every pipeline without declaring them:\n\n| Variable | What it does |\n|---|---|\n| `{provider}` | Routes AI generation to a specific LLM provider (`openai`, `anthropic`, or `gemini`). Pass it through to any `request_*` tool param to control which provider handles that step. Omit it (or leave it empty) to use the project default. |\n| `{second_opinion}` | Runs AI generation through a different provider than the default, acting as a cross-check. Set to `\"auto\"` to let Bridge pick the second provider automatically. When set, it takes precedence over `{provider}`. Use this when you want two independent AI perspectives on the same task — for example, running clarifying questions and a critique twice (once with each provider) produces better results than a single pass. |\n\nSee `.bridge/pipelines/README.md` for the full schema reference.\n\nIf a custom pipeline has the same key as a built-in pipeline, the custom version takes precedence (a warning is logged at startup).\n\n## Smoke testing\n\nThe package ships a canonical, **opt-in** in-host smoke-test runbook at\n`smoke-test/SMOKE-TEST.md`. An AI agent running inside your host (Claude Code,\nCursor, Codex, Windsurf, or VS Code/Copilot) executes it to verify that the MCP\nserver actually works end-to-end *inside that host* — it calls the real tools and\nrecords a PASS/FAIL verdict for each one in a markdown report.\n\n- `smoke-test/SMOKE-TEST.md` **ships with the npm package** and is the\n **canonical** source of truth for the smoke test.\n- The smoke test **adds no MCP tool** and **does not change the registered\n tool surface** (the server still registers its existing 55 tools).\n- It is **opt-in**: default `--init` **does not scaffold `/smoke-test-mcp`**, so\n consumer command palettes are not polluted.\n\n### Running it\n\nYou have two options:\n\n1. **Copy the opt-in command manually.** Copy the packaged command stub into your\n host's command directory, then invoke `/smoke-test-mcp`:\n\n ```bash\n # Claude Code\n cp node_modules/@bridge_gpt/mcp-server/smoke-test/smoke-test-mcp.md .claude/commands/smoke-test-mcp.md\n # Cursor\n cp node_modules/@bridge_gpt/mcp-server/smoke-test/smoke-test-mcp.md .cursor/commands/smoke-test-mcp.md\n ```\n\n2. **Open the runbook directly.** Alternatively, open\n `smoke-test/SMOKE-TEST.md` and ask the host agent to execute it.\n\nReports are written to `<BAPI_DOCS_DIR>/smoke-test/REPORT-<host>-<timestamp>.md`.\n\n## Environment Variables\n\n| Variable | Required | Default | Description |\n|---|---|---|---|\n| `BAPI_BASE_URL` | Yes | `https://bridgegpt-api.com` | Bridge API base URL |\n| `BAPI_REPO_NAME` | Yes | _(none)_ | Jira project/repository identifier configured in Bridge API |\n| `BAPI_API_KEY` | Yes | _(none)_ | API key obtained from the Bridge API setup UI |\n| `BAPI_PROJECT_ROOT` | No | _(auto-set by --init)_ | Absolute path to project root. Anchors `BAPI_DOCS_DIR` and `BAPI_PIPELINES_DIR` resolution |\n| `BAPI_DOCS_DIR` | No | `docs/tmp` | Local directory for saving plans, critiques, and research reports |\n| `BAPI_PIPELINES_DIR` | No | `.bridge/pipelines` | Directory for user-defined custom pipeline JSON files |\n| `BAPI_WORKTRUNK_BIN` | No | `wt` (`git-wt` on Windows) | Override the Worktrunk executable name/path used by `start-tickets` for nonstandard installs |\n| `BAPI_TMUX_SESSION` | No | `bridge-start-tickets` | Override the tmux session-name prefix used by `start-tickets` on Linux |\n\n## Worktree credentials and the `mcp-invoke` shim\n\nWhen `start-tickets` creates a git worktree, it provisions a Bridge API MCP\nregistration into that worktree so Claude Code (`.mcp.json`) and Cursor\n(`.cursor/mcp.json`) can reach the server immediately. These registrations are\n**secret-free**: they contain no `env` block and no API key. Instead they point\nat an internal subcommand of the published single CLI bin, `mcp-invoke`:\n\n```bash\nnpx -y @bridge_gpt/mcp-server@<VERSION> mcp-invoke --target bapi --project-root <ABS_WORKTREE_PATH>\n```\n\n`mcp-invoke` is not a separate binary — it is a positional subcommand of\n`bridge-api-mcp-server`. It resolves the repo identity from the absolute\n`--project-root` (the committed `.bridge/config` manifest, falling back to the\ngit common dir), resolves credentials from the home-directory credential store,\nand then spawns the real MCP server with that environment.\n\n### Credential store\n\nCredentials live outside the repository, keyed by `bapi:<repo_name>`:\n\n```json\n{\n \"bapi:<repo_name>\": {\n \"BAPI_API_KEY\": \"...\"\n }\n}\n```\n\nResolution order:\n\n1. `BAPI_API_KEY` in the parent environment (overrides the file entirely).\n2. `$XDG_CONFIG_HOME/bridge/credentials.json`, else `~/.config/bridge/credentials.json`.\n3. `~/.bridge/credentials.json` (only when the primary path is absent).\n\nOn POSIX systems, lock the file down so only you can read it:\n\n```bash\nchmod 600 ~/.config/bridge/credentials.json\n```\n\n`mcp-invoke` warns (but continues) if the file is group/world-readable, and it\nnever creates or initializes the credential file for you.\n\n## Reference\n\nThe full surface, for when you need the complete enumeration. Day-to-day, use [Usage Documentation](#usage-documentation) instead — you don't call MCP tools directly; you ask your AI assistant to perform a task, or compose tools into a pipeline.\n\n### MCP tools\n\nThe server registers **55 tools**. Async AI tools follow a request/get pattern: call the `request_*` tool to kick off generation, then the matching `get_*` tool to retrieve the result (or pass `wait_for_result: true` to poll automatically).\n\n- **Connectivity & identity** — `ping`, `get_my_role`, `get_docs_dir`\n- **Jira tickets** — `get_tickets`, `get_ticket`, `create_ticket`, `update_ticket_description`, `add_comment`, `get_comments`\n- **Attachments** — `list_attachments`, `upload_attachment`, `download_attachment`\n- **AI generation (request/get)** — `request_plan_generation`/`get_plan`, `request_architecture`/`get_architecture`, `request_clarifying_questions`/`get_clarifying_questions`, `request_ticket_critique`/`get_ticket_critique`, `request_ticket_review`, `request_reimplement_context`/`get_reimplement_context`, `request_brainstorm`/`get_brainstorm`, `request_deep_research`/`get_deep_research`\n- **Other AI** — `second_opinion`, `generate_image`, `generate_decision_page`\n- **Ticket lifecycle** — `track_ticket`, `update_ticket_state`, `get_ticket_state`\n- **Jira status** — `get_jira_transitions`, `update_jira_status`, `resolve_target_status`\n- **Repository & CI** — `parse_repository`, `get_parse_status`, `regenerate_directory_map`, `create_pull_request`, `resolve_ci_checks`, `poll_ci_checks`\n- **Pipelines & automation** — `list_pipelines`, `get_pipeline_recipe`, `run_pipeline`, `resume_pipeline`, `list_pipeline_runs`, `delete_pipeline_run`, `run_full_automation`, `resume_full_automation`\n- **Config** — `get_project_standards`, `list_config_fields`, `get_config_field`, `update_config_field`\n\n### Bundled pipelines\n\nPipelines are declarative, multi-step workflows your AI agent executes step-by-step — each a JSON recipe chaining MCP tool calls and free-form agent tasks, with variable substitution, per-step error handling, and optional approval gates. You can also write your own (see [Custom Pipelines](#custom-pipelines)).\n\n| Pipeline | Description | Invoke with |\n|---|---|---|\n| `implement-ticket` | Generate a plan, execute the implementation, commit, open a PR, and monitor CI | `/implement-ticket PROJ-123` |\n| `review-ticket` | Two-round ticket quality review: clarifying questions and critique from multiple providers | `/review-ticket PROJ-123` |\n| `idea-to-ticket` | Turn an idea into a Jira Task/Spike (or Epic + children) with research, dedup, and critique | `/idea-to-ticket \"<idea>\"` |\n| `plan-epic` | Decompose an epic into sub-tasks with a structured exploration doc for each | `/plan-epic \"<epic>\"` |\n| `full-automation` | Chain: idea → ticket(s) → review each → spawn worktrees to implement | `/full-automation \"<idea>\"` |\n| `pr-ticket` | Commit changes and open a pull request | `/create-pr PROJ-123` |\n| `check-ci-ticket` | Commit, open a PR, then monitor CI checks until they pass or fail | `/check-ci PROJ-123` |\n| `learn-repository` | Analyze codebase architecture, testing, review, and documentation standards, then upload to Bridge | `/learn-repository` |\n\n### Pipeline response envelope\n\n`run_pipeline`, `resume_pipeline`, and `list_pipeline_runs` share a unified envelope keyed on `status`:\n\n- `completed` — terminal success; `results` holds per-step output.\n- `needs_agent_task` — the orchestrator paused. Read `instruction`, perform the task, then call `resume_pipeline` with `pipeline_run_id` and a string `agent_result`.\n- `failed` — terminal failure. `error_code` is one of `VALIDATION`, `NOT_FOUND`, `EXPIRED`, `REPO_MISMATCH`, `TOOL_ERROR`.\n\nPaused runs auto-expire after an idle TTL (default 24 hours; override with `ttl_seconds`). The TTL is reset on every state transition. List output is metadata-only — it never includes resolved recipes, params, instructions, results, or agent outputs.\n";
|