@pugi/cli 0.1.0-alpha.9 → 0.1.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -0
- package/THIRD_PARTY_NOTICES.md +40 -0
- package/assets/pugi-mascot.ansi +16 -0
- package/dist/commands/deploy.js +439 -0
- package/dist/core/agents/loader.js +104 -0
- package/dist/core/agents/registry.js +1 -1
- package/dist/core/consensus/anvil-fanout.js +276 -0
- package/dist/core/consensus/diff-capture.js +382 -0
- package/dist/core/consensus/rubric.js +233 -0
- package/dist/core/context/index.js +21 -0
- package/dist/core/context/pugiignore.js +316 -0
- package/dist/core/context/repo-skeleton.js +533 -0
- package/dist/core/context/watcher.js +342 -0
- package/dist/core/context/working-set.js +165 -0
- package/dist/core/edits/dispatch.js +185 -0
- package/dist/core/edits/index.js +15 -0
- package/dist/core/edits/layer-a-apply.js +217 -0
- package/dist/core/edits/layer-b-apply.js +211 -0
- package/dist/core/edits/layer-c-apply.js +160 -0
- package/dist/core/edits/layer-d-ast.js +29 -0
- package/dist/core/edits/marker-parser.js +401 -0
- package/dist/core/edits/security-gate.js +223 -0
- package/dist/core/edits/worktree.js +322 -0
- package/dist/core/engine/native-pugi.js +6 -1
- package/dist/core/engine/prompts.js +8 -0
- package/dist/core/engine/tool-bridge.js +33 -1
- package/dist/core/lsp/client.js +719 -0
- package/dist/core/repl/ask.js +512 -0
- package/dist/core/repl/cancellation.js +98 -0
- package/dist/core/repl/dispatch-fsm.js +220 -0
- package/dist/core/repl/privacy-banner.js +71 -0
- package/dist/core/repl/session.js +1908 -13
- package/dist/core/repl/slash-commands.js +92 -32
- package/dist/core/repl/store/index.js +12 -0
- package/dist/core/repl/store/jsonl-log.js +321 -0
- package/dist/core/repl/store/lockfile.js +155 -0
- package/dist/core/repl/store/session-store.js +792 -0
- package/dist/core/repl/store/types.js +44 -0
- package/dist/core/repl/store/uuid-v7.js +68 -0
- package/dist/core/repl/workspace-context.js +72 -1
- package/dist/core/skills/defaults.js +457 -0
- package/dist/core/skills/loader.js +454 -0
- package/dist/core/skills/sources.js +480 -0
- package/dist/core/skills/trust.js +172 -0
- package/dist/runtime/cli.js +998 -12
- package/dist/runtime/commands/agents.js +385 -0
- package/dist/runtime/commands/config.js +338 -8
- package/dist/runtime/commands/delegate.js +289 -0
- package/dist/runtime/commands/lsp.js +206 -0
- package/dist/runtime/commands/patch.js +128 -0
- package/dist/runtime/commands/review-consensus.js +399 -0
- package/dist/runtime/commands/roster.js +117 -0
- package/dist/runtime/commands/skills.js +401 -0
- package/dist/runtime/commands/worktree.js +177 -0
- package/dist/runtime/plan-decompose.js +531 -0
- package/dist/tools/apply-patch.js +495 -0
- package/dist/tools/file-tools.js +90 -0
- package/dist/tools/lsp-tools.js +189 -0
- package/dist/tools/registry.js +26 -0
- package/dist/tools/web-fetch.js +1 -1
- package/dist/tui/agent-tree-pane.js +9 -0
- package/dist/tui/ask-cli.js +52 -0
- package/dist/tui/ask-modal.js +211 -0
- package/dist/tui/conversation-pane.js +48 -3
- package/dist/tui/input-box.js +48 -5
- package/dist/tui/markdown-render.js +266 -0
- package/dist/tui/repl-render.js +319 -3
- package/dist/tui/repl-splash-mascot.js +130 -0
- package/dist/tui/repl-splash.js +7 -1
- package/dist/tui/repl.js +96 -12
- package/dist/tui/status-bar.js +63 -3
- package/dist/tui/tool-stream-pane.js +91 -0
- package/docs/examples/codegraph.mcp.json +10 -0
- package/package.json +14 -6
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anvil fan-out — `pugi review --consensus` (α6.7).
|
|
3
|
+
*
|
|
4
|
+
* Posts the captured diff to Anvil's consensus endpoint and consumes the
|
|
5
|
+
* SSE stream that interleaves per-reviewer events (`type:"verdict"`) and
|
|
6
|
+
* the final consensus event (`type:"consensus"`).
|
|
7
|
+
*
|
|
8
|
+
* Endpoint contract (admin-api side, ships as α6.7.1 follow-up):
|
|
9
|
+
*
|
|
10
|
+
* POST {apiUrl}/api/pugi/review-consensus
|
|
11
|
+
* Authorization: Bearer {apiKey}
|
|
12
|
+
* Content-Type: application/json
|
|
13
|
+
* Body: { diff, context: { branch, commit, title } }
|
|
14
|
+
* Response: text/event-stream
|
|
15
|
+
* data: { reviewer: "codex"|"claude"|"deepseek", type:"started" }
|
|
16
|
+
* data: { reviewer, type:"verdict", severity:"P0|P1|P2|P3|CLEAN",
|
|
17
|
+
* rawContent:"<reviewer text>", latencyMs, error? }
|
|
18
|
+
* data: { type:"consensus", rubric_verdict, reasoning }
|
|
19
|
+
*
|
|
20
|
+
* The CLI side does NOT trust the server's `rubric_verdict` — we recompute
|
|
21
|
+
* it locally from the per-reviewer verdicts so a malformed / forged server
|
|
22
|
+
* cannot weaken the gate. The server-side verdict comes through as a
|
|
23
|
+
* cross-check (logged when it disagrees with the client rubric).
|
|
24
|
+
*
|
|
25
|
+
* Graceful degradation:
|
|
26
|
+
*
|
|
27
|
+
* - 404 from runtime → "endpoint_missing" (admin-api endpoint pending,
|
|
28
|
+
* α6.7 ships CLI-only). Caller falls back to the
|
|
29
|
+
* legacy `pugi review --triple --remote` flow OR
|
|
30
|
+
* prints a "backend not deployed" notice depending
|
|
31
|
+
* on the operator's invocation.
|
|
32
|
+
* - 401/403 / 429 → matching status with an actionable message.
|
|
33
|
+
* - 5xx / timeout → "failed" with the truncated body.
|
|
34
|
+
*
|
|
35
|
+
* Local-first contract (ADR-0037): this module never touches the disk,
|
|
36
|
+
* never logs the diff payload, and never retries on transient errors.
|
|
37
|
+
*/
|
|
38
|
+
/**
|
|
39
|
+
* Dispatch the consensus request and stream events back through `sink`
|
|
40
|
+
* until the SSE stream closes OR a transport error occurs.
|
|
41
|
+
*
|
|
42
|
+
* Returns the collected reviewer events plus the server's final consensus
|
|
43
|
+
* event (if it sent one). The caller computes the authoritative rubric
|
|
44
|
+
* locally from the reviewer events.
|
|
45
|
+
*/
|
|
46
|
+
export async function dispatchConsensus(config, request, sink) {
|
|
47
|
+
const url = `${config.apiUrl.replace(/\/+$/, '')}/api/pugi/review-consensus`;
|
|
48
|
+
const controller = new AbortController();
|
|
49
|
+
// Idle timeout: aborts the request when no SSE chunk has been
|
|
50
|
+
// received for `timeoutMs`. A one-shot setTimeout from request-start
|
|
51
|
+
// would kill long-running reviewers (codex ~30s, claude ~60s) even
|
|
52
|
+
// though the server is actively streaming events every few seconds.
|
|
53
|
+
// `resetIdleTimeout` is called from `parseSseStream` on each chunk.
|
|
54
|
+
let idleTimer = null;
|
|
55
|
+
const resetIdleTimeout = () => {
|
|
56
|
+
if (idleTimer)
|
|
57
|
+
clearTimeout(idleTimer);
|
|
58
|
+
idleTimer = setTimeout(() => controller.abort(), config.timeoutMs);
|
|
59
|
+
};
|
|
60
|
+
resetIdleTimeout();
|
|
61
|
+
try {
|
|
62
|
+
const res = await fetch(url, {
|
|
63
|
+
method: 'POST',
|
|
64
|
+
headers: {
|
|
65
|
+
'content-type': 'application/json',
|
|
66
|
+
accept: 'text/event-stream',
|
|
67
|
+
authorization: `Bearer ${config.apiKey}`,
|
|
68
|
+
'user-agent': 'pugi-cli-consensus/0.1.0',
|
|
69
|
+
},
|
|
70
|
+
body: JSON.stringify(request),
|
|
71
|
+
signal: controller.signal,
|
|
72
|
+
});
|
|
73
|
+
const code = res.status;
|
|
74
|
+
if (code === 404) {
|
|
75
|
+
return {
|
|
76
|
+
status: 'endpoint_missing',
|
|
77
|
+
code,
|
|
78
|
+
message: 'POST /api/pugi/review-consensus not deployed on this runtime (α6.7.1 follow-up).',
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
if (code === 401 || code === 403) {
|
|
82
|
+
return {
|
|
83
|
+
status: 'unauthenticated',
|
|
84
|
+
code,
|
|
85
|
+
message: `runtime rejected credentials (HTTP ${code})`,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
if (code === 429) {
|
|
89
|
+
const header = res.headers.get('retry-after');
|
|
90
|
+
const retryAfterMs = header ? Number.parseInt(header, 10) * 1000 : 60_000;
|
|
91
|
+
return {
|
|
92
|
+
status: 'rate_limited',
|
|
93
|
+
code,
|
|
94
|
+
retryAfterMs: Number.isFinite(retryAfterMs) ? retryAfterMs : 60_000,
|
|
95
|
+
message: 'runtime rate limit reached for this tenant',
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
if (code !== 200) {
|
|
99
|
+
const text = await safeText(res);
|
|
100
|
+
return {
|
|
101
|
+
status: 'failed',
|
|
102
|
+
code,
|
|
103
|
+
message: `runtime returned HTTP ${code}${text ? `: ${text.slice(0, 200)}` : ''}`,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
// 200 — consume the SSE stream. Surface a graceful failure if the
|
|
107
|
+
// body is missing (some intermediaries strip it on long-poll).
|
|
108
|
+
if (!res.body) {
|
|
109
|
+
return {
|
|
110
|
+
status: 'failed',
|
|
111
|
+
code,
|
|
112
|
+
message: 'runtime returned 200 but no SSE body',
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
const reviewerEvents = [];
|
|
116
|
+
let serverVerdict = null;
|
|
117
|
+
for await (const event of parseSseStream(res.body, resetIdleTimeout)) {
|
|
118
|
+
if (event.type === 'consensus') {
|
|
119
|
+
serverVerdict = event;
|
|
120
|
+
sink(event);
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
reviewerEvents.push(event);
|
|
124
|
+
sink(event);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return { status: 'ok', serverVerdict, reviewerEvents };
|
|
128
|
+
}
|
|
129
|
+
catch (error) {
|
|
130
|
+
const message = error instanceof Error
|
|
131
|
+
? error.name === 'AbortError'
|
|
132
|
+
? `runtime call idle for more than ${config.timeoutMs}ms`
|
|
133
|
+
: error.message
|
|
134
|
+
: 'unknown error';
|
|
135
|
+
return { status: 'failed', code: 0, message };
|
|
136
|
+
}
|
|
137
|
+
finally {
|
|
138
|
+
if (idleTimer)
|
|
139
|
+
clearTimeout(idleTimer);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Async-iterable SSE parser. Spec'd against the
|
|
144
|
+
* [HTML SSE spec](https://html.spec.whatwg.org/multipage/server-sent-events.html):
|
|
145
|
+
*
|
|
146
|
+
* - Events are delimited by a blank line.
|
|
147
|
+
* - Each line is `field:value` (whitespace after `:` stripped).
|
|
148
|
+
* - Multiple `data:` lines in one event concatenate with `\n`.
|
|
149
|
+
* - We only care about `data:` payloads carrying JSON; everything
|
|
150
|
+
* else (event:, id:, retry:) is ignored.
|
|
151
|
+
*
|
|
152
|
+
* The parser tolerates JSON-parse failures by dropping the malformed
|
|
153
|
+
* event and continuing; a single bad event must not block the consensus
|
|
154
|
+
* gate. Errors are surfaced to the sink as an `error` reviewer event
|
|
155
|
+
* with `reviewer:"stream"`.
|
|
156
|
+
*/
|
|
157
|
+
export async function* parseSseStream(body, onChunk) {
|
|
158
|
+
const decoder = new TextDecoder('utf-8');
|
|
159
|
+
let buffer = '';
|
|
160
|
+
// Bridge Node ReadableStream and web ReadableStream. The web type has
|
|
161
|
+
// `getReader()`; the Node type is an `AsyncIterable<Buffer>`. Detect
|
|
162
|
+
// by feature so the parser works regardless of which fetch impl wrote
|
|
163
|
+
// the body.
|
|
164
|
+
const chunks = toAsyncIterable(body);
|
|
165
|
+
for await (const chunk of chunks) {
|
|
166
|
+
// Signal liveness so the dispatcher can reset its idle timeout. Any
|
|
167
|
+
// bytes received counts: even a heartbeat comment line keeps the
|
|
168
|
+
// connection alive from our perspective.
|
|
169
|
+
if (onChunk)
|
|
170
|
+
onChunk();
|
|
171
|
+
buffer += decoder.decode(chunk, { stream: true });
|
|
172
|
+
// SSE event boundary is a blank line. The spec allows either LF
|
|
173
|
+
// (`\n\n`) OR CRLF (`\r\n\r\n`). nginx, CDNs, and some load
|
|
174
|
+
// balancers rewrite to CRLF, so we accept both and find whichever
|
|
175
|
+
// delimiter appears first in the buffer.
|
|
176
|
+
let boundary = findNextEventBoundary(buffer);
|
|
177
|
+
while (boundary !== null) {
|
|
178
|
+
const rawEvent = buffer.slice(0, boundary.start);
|
|
179
|
+
buffer = buffer.slice(boundary.end);
|
|
180
|
+
const parsed = parseSseEvent(rawEvent);
|
|
181
|
+
if (parsed)
|
|
182
|
+
yield parsed;
|
|
183
|
+
boundary = findNextEventBoundary(buffer);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
// Flush trailing event if the server omitted the final blank line.
|
|
187
|
+
const tail = buffer.trim();
|
|
188
|
+
if (tail.length > 0) {
|
|
189
|
+
const parsed = parseSseEvent(tail);
|
|
190
|
+
if (parsed)
|
|
191
|
+
yield parsed;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Find the next SSE event boundary in `buffer`. Returns the start
|
|
196
|
+
* index of the delimiter and the index where the next event begins,
|
|
197
|
+
* or `null` if no complete boundary has been buffered yet.
|
|
198
|
+
*
|
|
199
|
+
* Accepts both `\n\n` (Unix-style streams) and `\r\n\r\n` (CRLF, as
|
|
200
|
+
* emitted by nginx, Cloudflare, and some Node intermediaries). Picks
|
|
201
|
+
* whichever appears FIRST so a stream that mixes both styles parses
|
|
202
|
+
* deterministically.
|
|
203
|
+
*/
|
|
204
|
+
function findNextEventBoundary(buffer) {
|
|
205
|
+
const lfIdx = buffer.indexOf('\n\n');
|
|
206
|
+
const crlfIdx = buffer.indexOf('\r\n\r\n');
|
|
207
|
+
if (lfIdx === -1 && crlfIdx === -1)
|
|
208
|
+
return null;
|
|
209
|
+
if (crlfIdx === -1)
|
|
210
|
+
return { start: lfIdx, end: lfIdx + 2 };
|
|
211
|
+
if (lfIdx === -1)
|
|
212
|
+
return { start: crlfIdx, end: crlfIdx + 4 };
|
|
213
|
+
if (lfIdx < crlfIdx)
|
|
214
|
+
return { start: lfIdx, end: lfIdx + 2 };
|
|
215
|
+
return { start: crlfIdx, end: crlfIdx + 4 };
|
|
216
|
+
}
|
|
217
|
+
function parseSseEvent(raw) {
|
|
218
|
+
const dataLines = [];
|
|
219
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
220
|
+
// The SSE spec strips one leading space after the colon if present.
|
|
221
|
+
// We do the same so payloads written `data: {...}` parse correctly.
|
|
222
|
+
if (!line.startsWith('data:'))
|
|
223
|
+
continue;
|
|
224
|
+
const value = line.slice('data:'.length);
|
|
225
|
+
dataLines.push(value.startsWith(' ') ? value.slice(1) : value);
|
|
226
|
+
}
|
|
227
|
+
if (dataLines.length === 0)
|
|
228
|
+
return null;
|
|
229
|
+
const payload = dataLines.join('\n').trim();
|
|
230
|
+
if (payload.length === 0)
|
|
231
|
+
return null;
|
|
232
|
+
try {
|
|
233
|
+
const parsed = JSON.parse(payload);
|
|
234
|
+
if (parsed && typeof parsed === 'object' && typeof parsed['type'] === 'string') {
|
|
235
|
+
// Trust shape coming from the server enough to forward it; the
|
|
236
|
+
// command handler treats unknown fields defensively.
|
|
237
|
+
return parsed;
|
|
238
|
+
}
|
|
239
|
+
return null;
|
|
240
|
+
}
|
|
241
|
+
catch {
|
|
242
|
+
return null;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
function toAsyncIterable(body) {
|
|
246
|
+
// Web ReadableStream → iterate via reader.read() loop bridged to async-iter.
|
|
247
|
+
if (typeof body.getReader === 'function') {
|
|
248
|
+
return webStreamToAsyncIterable(body);
|
|
249
|
+
}
|
|
250
|
+
// Node Readable: already async-iterable.
|
|
251
|
+
return body;
|
|
252
|
+
}
|
|
253
|
+
async function* webStreamToAsyncIterable(stream) {
|
|
254
|
+
const reader = stream.getReader();
|
|
255
|
+
try {
|
|
256
|
+
while (true) {
|
|
257
|
+
const { done, value } = await reader.read();
|
|
258
|
+
if (done)
|
|
259
|
+
return;
|
|
260
|
+
if (value)
|
|
261
|
+
yield value;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
finally {
|
|
265
|
+
reader.releaseLock();
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
async function safeText(res) {
|
|
269
|
+
try {
|
|
270
|
+
return await res.text();
|
|
271
|
+
}
|
|
272
|
+
catch {
|
|
273
|
+
return '';
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
//# sourceMappingURL=anvil-fanout.js.map
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Diff capture — `pugi review --consensus` (α6.7).
|
|
3
|
+
*
|
|
4
|
+
* Captures the diff that the consensus fan-out will send to Anvil. Four
|
|
5
|
+
* supported source kinds (in order of precedence):
|
|
6
|
+
*
|
|
7
|
+
* 1. `--pr <number>` — uses `gh pr diff <num>` (gh CLI required).
|
|
8
|
+
* 2. `--commit <sha>` — diff of that commit vs its first parent.
|
|
9
|
+
* 3. `--branch <name>` — diff of HEAD vs `origin/<name>` merge-base.
|
|
10
|
+
* 4. (default) — diff of HEAD vs `origin/main` merge-base
|
|
11
|
+
* covering BOTH committed-since-base AND
|
|
12
|
+
* uncommitted (staged + working tree) edits.
|
|
13
|
+
*
|
|
14
|
+
* The shape mirrors the existing `performRemoteTripleReview` flow:
|
|
15
|
+
* uncommitted edits are deliberately included by computing the diff
|
|
16
|
+
* against the merge-base SHA rather than `base...HEAD`, otherwise the
|
|
17
|
+
* common case ("review what I'm about to commit") would lose signal.
|
|
18
|
+
*
|
|
19
|
+
* Protected paths (`.env*`, `*.key`, `*.pem`, `*.sql` etc) are excluded
|
|
20
|
+
* at the git layer so a secret cannot leak into the egress payload even
|
|
21
|
+
* if the operator has it staged.
|
|
22
|
+
*/
|
|
23
|
+
import { execFileSync } from 'node:child_process';
|
|
24
|
+
/**
|
|
25
|
+
* Hard cap on the diff payload sent egress. Anvil enforces its own cap
|
|
26
|
+
* server-side; this is a defense-in-depth so a runaway monorepo merge
|
|
27
|
+
* doesn't OOM the SSE encoder. 1 MiB ≈ 30k LOC, which is well above the
|
|
28
|
+
* largest review the rubric can reason about.
|
|
29
|
+
*/
|
|
30
|
+
export const DIFF_MAX_BYTES = 1 * 1024 * 1024;
|
|
31
|
+
/**
|
|
32
|
+
* Git pathspec exclusions for sensitive blobs. This is the source of truth
|
|
33
|
+
* for both the consensus surface AND the legacy `PROTECTED_DIFF_EXCLUDES`
|
|
34
|
+
* in cli.ts; keep both lists in sync when adding new patterns.
|
|
35
|
+
*
|
|
36
|
+
* Coverage policy: a credential committed under ANY plausible filename
|
|
37
|
+
* pattern must be excluded. Adversarial PRs can stage secrets under
|
|
38
|
+
* unconventional names (deploy.crt, credentials, .netrc) to bypass a
|
|
39
|
+
* narrow exclude list and exfiltrate to the reviewer payload.
|
|
40
|
+
*
|
|
41
|
+
* Pathspec form `:(exclude,glob)<starstar>/<pattern>` (where `<starstar>`
|
|
42
|
+
* is the `*` `*` doubled glob) matches at the repo root AND in any
|
|
43
|
+
* subdirectory; without the doubled-star prefix git's literal pathspec
|
|
44
|
+
* syntax silently misses subdir matches in pnpm/turbo monorepos.
|
|
45
|
+
*/
|
|
46
|
+
export const PROTECTED_PATHSPEC_EXCLUDES = Object.freeze([
|
|
47
|
+
// Dotfiles + RC files that frequently hold tokens.
|
|
48
|
+
':(exclude,glob)**/.env',
|
|
49
|
+
':(exclude,glob)**/.env.*',
|
|
50
|
+
':(exclude,glob)**/.npmrc',
|
|
51
|
+
':(exclude,glob)**/.yarnrc',
|
|
52
|
+
':(exclude,glob)**/.pypirc',
|
|
53
|
+
':(exclude,glob)**/.gitconfig',
|
|
54
|
+
':(exclude,glob)**/.netrc',
|
|
55
|
+
// SSH private keys (every algorithm we have seen committed in the wild).
|
|
56
|
+
':(exclude,glob)**/id_rsa',
|
|
57
|
+
':(exclude,glob)**/id_ed25519',
|
|
58
|
+
':(exclude,glob)**/id_ecdsa',
|
|
59
|
+
':(exclude,glob)**/id_dsa',
|
|
60
|
+
// PEM-encoded + DER-encoded private keys / certs / containers.
|
|
61
|
+
':(exclude,glob)**/*.pem',
|
|
62
|
+
':(exclude,glob)**/*.key',
|
|
63
|
+
':(exclude,glob)**/*.crt',
|
|
64
|
+
':(exclude,glob)**/*.cer',
|
|
65
|
+
':(exclude,glob)**/*.der',
|
|
66
|
+
':(exclude,glob)**/*.pfx',
|
|
67
|
+
':(exclude,glob)**/*.p12',
|
|
68
|
+
// SQL dumps / DB exports often contain real PII + credentials.
|
|
69
|
+
':(exclude,glob)**/*.dump',
|
|
70
|
+
':(exclude,glob)**/*.sql',
|
|
71
|
+
// Generic credential blobs under any directory.
|
|
72
|
+
':(exclude,glob)**/*.secret',
|
|
73
|
+
':(exclude,glob)**/credentials',
|
|
74
|
+
':(exclude,glob)**/credentials.json',
|
|
75
|
+
// `secrets/**` (not `secrets/*`) so nested credential paths recurse:
|
|
76
|
+
// `secrets/prod/token.txt`, `apps/foo/secrets/nested/key`, and any
|
|
77
|
+
// arbitrarily deep `**/secrets/<...>/<file>` get excluded. With glob
|
|
78
|
+
// pathspec magic enabled, a single `*` does NOT cross path separators,
|
|
79
|
+
// so the non-recursive form would leak nested-directory secrets.
|
|
80
|
+
':(exclude,glob)**/secrets/**',
|
|
81
|
+
]);
|
|
82
|
+
/**
|
|
83
|
+
* Captures the diff per the source spec and returns the augmented payload
|
|
84
|
+
* plus narrative context (branch, commit, title) that gets attached to
|
|
85
|
+
* the egress request.
|
|
86
|
+
*
|
|
87
|
+
* Errors are returned as thrown `Error` instances; the caller (the
|
|
88
|
+
* command handler) translates them to JSON error payloads + exit codes
|
|
89
|
+
* so the CLI never crashes on a malformed ref.
|
|
90
|
+
*/
|
|
91
|
+
export function captureDiff(spec) {
|
|
92
|
+
const cwd = spec.cwd ?? process.cwd();
|
|
93
|
+
// Source precedence: pr > commit > branch > default.
|
|
94
|
+
if (typeof spec.pr === 'number' && Number.isFinite(spec.pr) && spec.pr > 0) {
|
|
95
|
+
return captureFromPr(cwd, spec.pr);
|
|
96
|
+
}
|
|
97
|
+
if (typeof spec.commit === 'string' && spec.commit.length > 0) {
|
|
98
|
+
return captureFromCommit(cwd, spec.commit);
|
|
99
|
+
}
|
|
100
|
+
if (typeof spec.branch === 'string' && spec.branch.length > 0) {
|
|
101
|
+
return captureFromBranch(cwd, spec.branch, spec.baseRef ?? 'origin/main');
|
|
102
|
+
}
|
|
103
|
+
return captureFromBase(cwd, spec.baseRef ?? 'origin/main');
|
|
104
|
+
}
|
|
105
|
+
function captureFromPr(cwd, pr) {
|
|
106
|
+
// CRITICAL: `gh pr diff <num>` bypasses PROTECTED_PATHSPEC_EXCLUDES,
|
|
107
|
+
// exfiltrating `.env`, `*.key`, `*.pem`, `*.sql`, secrets/* to the
|
|
108
|
+
// reviewer payload. We instead fetch the PR head ref locally and run
|
|
109
|
+
// `git diff` with the same pathspec excludes as every other capture
|
|
110
|
+
// path. PR metadata still comes from `gh pr view` (read-only).
|
|
111
|
+
const metaRaw = safeExec(cwd, 'gh', ['pr', 'view', String(pr), '--json', 'title,headRefName,headRefOid,baseRefName']);
|
|
112
|
+
const meta = safeParseJson(metaRaw);
|
|
113
|
+
const tempRef = `refs/pugi/consensus-pr-${pr}`;
|
|
114
|
+
// Fetch the PR head into a private ref so we have local objects to
|
|
115
|
+
// diff against. `pull/<num>/head` is GitHub's special refspec exposed
|
|
116
|
+
// to anyone with read access on the repo.
|
|
117
|
+
safeExec(cwd, 'git', ['fetch', 'origin', `pull/${pr}/head:${tempRef}`]);
|
|
118
|
+
try {
|
|
119
|
+
// Resolve the base ref to diff against. Prefer the PR's declared
|
|
120
|
+
// base; fall back to `origin/main`. We compute the merge-base so a
|
|
121
|
+
// PR that's behind main still shows only the author's hunks.
|
|
122
|
+
const baseRef = meta?.baseRefName ? `origin/${meta.baseRefName}` : 'origin/main';
|
|
123
|
+
const mergeBase = safeExecOptional(cwd, 'git', ['merge-base', baseRef, tempRef]).trim();
|
|
124
|
+
const range = mergeBase ? `${mergeBase}..${tempRef}` : `${baseRef}..${tempRef}`;
|
|
125
|
+
const diff = safeExec(cwd, 'git', [
|
|
126
|
+
'diff',
|
|
127
|
+
range,
|
|
128
|
+
'--',
|
|
129
|
+
'.',
|
|
130
|
+
...PROTECTED_PATHSPEC_EXCLUDES,
|
|
131
|
+
]);
|
|
132
|
+
const cappedDiff = capDiff(diff);
|
|
133
|
+
const stats = computeStats(cappedDiff);
|
|
134
|
+
return {
|
|
135
|
+
diff: cappedDiff,
|
|
136
|
+
context: {
|
|
137
|
+
branch: meta?.headRefName ?? `pr-${pr}`,
|
|
138
|
+
commit: shortSha(meta?.headRefOid ?? ''),
|
|
139
|
+
title: meta?.title ?? `PR #${pr}`,
|
|
140
|
+
ref: `pr:${pr}`,
|
|
141
|
+
stats,
|
|
142
|
+
},
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
finally {
|
|
146
|
+
// Best-effort cleanup of the private ref. Never throw from the
|
|
147
|
+
// cleanup path so the operator's primary error (if any) reaches them.
|
|
148
|
+
try {
|
|
149
|
+
safeExec(cwd, 'git', ['update-ref', '-d', tempRef]);
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
// Swallow: a leftover ref under refs/pugi/ is harmless. The next
|
|
153
|
+
// run will overwrite it via `fetch ... :ref` anyway.
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
function captureFromCommit(cwd, commit) {
|
|
158
|
+
// `<sha>~1..<sha>` covers exactly that commit's changes. For a ROOT
|
|
159
|
+
// commit (no parent) the `~1` lookup explodes and produces an empty
|
|
160
|
+
// diff masquerading as success. Detect this up front and fall back
|
|
161
|
+
// to the git empty-tree sha so the first commit's introduction shows
|
|
162
|
+
// up in the diff.
|
|
163
|
+
const fullSha = safeExec(cwd, 'git', ['rev-parse', commit]).trim();
|
|
164
|
+
if (!fullSha)
|
|
165
|
+
throw new Error(`Unknown commit ref: ${commit}`);
|
|
166
|
+
// Probe for a parent. `rev-parse --verify <sha>~1` exits non-zero on
|
|
167
|
+
// a root commit; we treat that as "diff against the empty tree".
|
|
168
|
+
const hasParent = safeExecOptional(cwd, 'git', ['rev-parse', '--verify', `${fullSha}~1`]).trim().length > 0;
|
|
169
|
+
// The well-known git "empty tree" SHA. Stable across all git versions
|
|
170
|
+
// since 2005; documented in `git hash-object -t tree /dev/null`.
|
|
171
|
+
const EMPTY_TREE_SHA = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
|
|
172
|
+
const range = hasParent ? `${fullSha}~1..${fullSha}` : `${EMPTY_TREE_SHA}..${fullSha}`;
|
|
173
|
+
const diff = safeExec(cwd, 'git', [
|
|
174
|
+
'diff',
|
|
175
|
+
range,
|
|
176
|
+
'--',
|
|
177
|
+
'.',
|
|
178
|
+
...PROTECTED_PATHSPEC_EXCLUDES,
|
|
179
|
+
]);
|
|
180
|
+
const cappedDiff = capDiff(diff);
|
|
181
|
+
const subject = safeExec(cwd, 'git', ['log', '-1', '--pretty=%s', fullSha]).trim();
|
|
182
|
+
const branch = safeExec(cwd, 'git', ['name-rev', '--name-only', fullSha]).trim() || 'detached';
|
|
183
|
+
const stats = computeStats(cappedDiff);
|
|
184
|
+
return {
|
|
185
|
+
diff: cappedDiff,
|
|
186
|
+
context: {
|
|
187
|
+
branch,
|
|
188
|
+
commit: shortSha(fullSha),
|
|
189
|
+
title: subject || `commit ${shortSha(fullSha)}`,
|
|
190
|
+
ref: `commit:${shortSha(fullSha)}`,
|
|
191
|
+
stats,
|
|
192
|
+
},
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
function captureFromBranch(cwd, branch, baseRef) {
|
|
196
|
+
const remoteRef = branch.includes('/') ? branch : `origin/${branch}`;
|
|
197
|
+
const mergeBase = safeExec(cwd, 'git', ['merge-base', baseRef, remoteRef]).trim();
|
|
198
|
+
if (!mergeBase)
|
|
199
|
+
throw new Error(`Cannot compute merge-base of ${baseRef} and ${remoteRef}`);
|
|
200
|
+
const diff = safeExec(cwd, 'git', [
|
|
201
|
+
'diff',
|
|
202
|
+
`${mergeBase}..${remoteRef}`,
|
|
203
|
+
'--',
|
|
204
|
+
'.',
|
|
205
|
+
...PROTECTED_PATHSPEC_EXCLUDES,
|
|
206
|
+
]);
|
|
207
|
+
const cappedDiff = capDiff(diff);
|
|
208
|
+
const head = safeExec(cwd, 'git', ['rev-parse', remoteRef]).trim();
|
|
209
|
+
const subject = safeExec(cwd, 'git', ['log', '-1', '--pretty=%s', remoteRef]).trim();
|
|
210
|
+
const stats = computeStats(cappedDiff);
|
|
211
|
+
return {
|
|
212
|
+
diff: cappedDiff,
|
|
213
|
+
context: {
|
|
214
|
+
branch,
|
|
215
|
+
commit: shortSha(head),
|
|
216
|
+
title: subject || `branch ${branch}`,
|
|
217
|
+
ref: `branch:${branch}`,
|
|
218
|
+
stats,
|
|
219
|
+
},
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
function captureFromBase(cwd, baseRef) {
|
|
223
|
+
// The default surface — diff HEAD against the merge-base of the
|
|
224
|
+
// protected base. When the merge-base lookup fails (shallow clone,
|
|
225
|
+
// no upstream, baseRef not configured), fall back to the working-tree
|
|
226
|
+
// diff so the consensus gate still has signal rather than crashing.
|
|
227
|
+
const mergeBase = safeExecOptional(cwd, 'git', ['merge-base', baseRef, 'HEAD']).trim();
|
|
228
|
+
if (mergeBase) {
|
|
229
|
+
// Two parts (non-overlapping):
|
|
230
|
+
// 1. Committed since base: `<base>..HEAD`
|
|
231
|
+
// 2. Uncommitted (staged + working tree as a single union): `git diff HEAD`
|
|
232
|
+
// `git diff HEAD` already reports BOTH staged AND working-tree
|
|
233
|
+
// changes relative to HEAD, so we MUST NOT add a separate
|
|
234
|
+
// `--cached` invocation: doing so emits the same staged hunks
|
|
235
|
+
// twice, inflating reviewer cost and confusing the rubric on
|
|
236
|
+
// duplicate-finding correlation.
|
|
237
|
+
const committedDiff = safeExec(cwd, 'git', [
|
|
238
|
+
'diff',
|
|
239
|
+
`${mergeBase}..HEAD`,
|
|
240
|
+
'--',
|
|
241
|
+
'.',
|
|
242
|
+
...PROTECTED_PATHSPEC_EXCLUDES,
|
|
243
|
+
]);
|
|
244
|
+
const uncommittedDiff = safeExec(cwd, 'git', [
|
|
245
|
+
'diff',
|
|
246
|
+
'HEAD',
|
|
247
|
+
'--',
|
|
248
|
+
'.',
|
|
249
|
+
...PROTECTED_PATHSPEC_EXCLUDES,
|
|
250
|
+
]);
|
|
251
|
+
const combined = [committedDiff, uncommittedDiff]
|
|
252
|
+
.map((s) => s.trim())
|
|
253
|
+
.filter((s) => s.length > 0)
|
|
254
|
+
.join('\n');
|
|
255
|
+
const cappedDiff = capDiff(combined);
|
|
256
|
+
const branch = safeExec(cwd, 'git', ['branch', '--show-current']).trim() || 'detached';
|
|
257
|
+
const head = safeExec(cwd, 'git', ['rev-parse', 'HEAD']).trim();
|
|
258
|
+
const subject = safeExec(cwd, 'git', ['log', '-1', '--pretty=%s', 'HEAD']).trim();
|
|
259
|
+
const stats = computeStats(cappedDiff);
|
|
260
|
+
return {
|
|
261
|
+
diff: cappedDiff,
|
|
262
|
+
context: {
|
|
263
|
+
branch,
|
|
264
|
+
commit: shortSha(head),
|
|
265
|
+
title: subject || branch,
|
|
266
|
+
ref: `merge-base:${baseRef}`,
|
|
267
|
+
stats,
|
|
268
|
+
},
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
// Fallback path: no merge-base available. `git diff HEAD` reports
|
|
272
|
+
// BOTH staged AND working-tree changes relative to HEAD in a single
|
|
273
|
+
// unified diff, so it's the right one-shot capture for "what would I
|
|
274
|
+
// commit if I ran `git add -A && git commit` right now". A separate
|
|
275
|
+
// `--cached` call would double-report the staged hunks.
|
|
276
|
+
const cappedDiff = capDiff(safeExec(cwd, 'git', ['diff', 'HEAD', '--', '.', ...PROTECTED_PATHSPEC_EXCLUDES]));
|
|
277
|
+
const branch = safeExec(cwd, 'git', ['branch', '--show-current']).trim() || 'detached';
|
|
278
|
+
const head = safeExec(cwd, 'git', ['rev-parse', 'HEAD']).trim();
|
|
279
|
+
const subject = safeExec(cwd, 'git', ['log', '-1', '--pretty=%s', 'HEAD']).trim();
|
|
280
|
+
const stats = computeStats(cappedDiff);
|
|
281
|
+
return {
|
|
282
|
+
diff: cappedDiff,
|
|
283
|
+
context: {
|
|
284
|
+
branch,
|
|
285
|
+
commit: shortSha(head),
|
|
286
|
+
title: subject || branch,
|
|
287
|
+
ref: 'head-only',
|
|
288
|
+
stats,
|
|
289
|
+
},
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Non-throwing variant of `safeExec`. Returns an empty string on
|
|
294
|
+
* non-zero exit instead of throwing. Used for the optional `merge-base`
|
|
295
|
+
* lookup which fails legitimately in shallow clones / no-upstream setups.
|
|
296
|
+
*/
|
|
297
|
+
function safeExecOptional(cwd, file, args) {
|
|
298
|
+
try {
|
|
299
|
+
return safeExec(cwd, file, args);
|
|
300
|
+
}
|
|
301
|
+
catch {
|
|
302
|
+
return '';
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
/** Compute file / insertion / deletion counts from a unified diff. */
|
|
306
|
+
function computeStats(diff) {
|
|
307
|
+
let filesChanged = 0;
|
|
308
|
+
let insertions = 0;
|
|
309
|
+
let deletions = 0;
|
|
310
|
+
for (const line of diff.split(/\r?\n/)) {
|
|
311
|
+
if (line.startsWith('diff --git '))
|
|
312
|
+
filesChanged += 1;
|
|
313
|
+
else if (line.startsWith('+') && !line.startsWith('+++'))
|
|
314
|
+
insertions += 1;
|
|
315
|
+
else if (line.startsWith('-') && !line.startsWith('---'))
|
|
316
|
+
deletions += 1;
|
|
317
|
+
}
|
|
318
|
+
return { filesChanged, insertions, deletions };
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Truncate the diff if it grows past `DIFF_MAX_BYTES`. Truncation is
|
|
322
|
+
* marked with a sentinel comment so reviewers see the cap explicitly
|
|
323
|
+
* instead of silently reasoning over a partial patch.
|
|
324
|
+
*/
|
|
325
|
+
function capDiff(diff) {
|
|
326
|
+
// `Buffer.byteLength` is required — `.length` counts UTF-16 code units
|
|
327
|
+
// and underestimates multi-byte sequences common in diffs that touch
|
|
328
|
+
// i18n / cyrillic content.
|
|
329
|
+
if (Buffer.byteLength(diff, 'utf8') <= DIFF_MAX_BYTES)
|
|
330
|
+
return diff;
|
|
331
|
+
// Slice by code units, then re-check byte length. UTF-8 is variable-
|
|
332
|
+
// width, so 1 MiB of code units can exceed the cap; iterate until it
|
|
333
|
+
// fits. Two passes is the worst case for any reasonable input.
|
|
334
|
+
let slice = diff.slice(0, DIFF_MAX_BYTES);
|
|
335
|
+
while (Buffer.byteLength(slice, 'utf8') > DIFF_MAX_BYTES && slice.length > 0) {
|
|
336
|
+
slice = slice.slice(0, slice.length - 1024);
|
|
337
|
+
}
|
|
338
|
+
return `${slice}\n\n# [pugi-cli] diff truncated at ${DIFF_MAX_BYTES} bytes; reviewers see a partial patch.\n`;
|
|
339
|
+
}
|
|
340
|
+
function shortSha(sha) {
|
|
341
|
+
if (!sha)
|
|
342
|
+
return '';
|
|
343
|
+
return sha.length > 7 ? sha.slice(0, 7) : sha;
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Wrapper around `execFileSync` that returns stdout as a UTF-8 string,
|
|
347
|
+
* swallows stderr, and throws with a stable shape on non-zero exit.
|
|
348
|
+
*
|
|
349
|
+
* The `execFileSync` form avoids shell injection (no shell process is
|
|
350
|
+
* spawned), which matters because we pass user-supplied refs / branch
|
|
351
|
+
* names into the command line.
|
|
352
|
+
*/
|
|
353
|
+
function safeExec(cwd, file, args) {
|
|
354
|
+
try {
|
|
355
|
+
const out = execFileSync(file, args, {
|
|
356
|
+
cwd,
|
|
357
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
358
|
+
// 32 MiB buffer — covers the worst-case PR diff before our 1 MiB
|
|
359
|
+
// cap kicks in. The cap is applied after capture so we can report
|
|
360
|
+
// truncation honestly.
|
|
361
|
+
maxBuffer: 32 * 1024 * 1024,
|
|
362
|
+
encoding: 'utf8',
|
|
363
|
+
});
|
|
364
|
+
// Specifying `encoding: 'utf8'` narrows the return type to string,
|
|
365
|
+
// but TS still types `out` as `string` always here — defensively
|
|
366
|
+
// coerce via `String()` to satisfy lint without an `as` cast.
|
|
367
|
+
return String(out);
|
|
368
|
+
}
|
|
369
|
+
catch (error) {
|
|
370
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
371
|
+
throw new Error(`${file} ${args.slice(0, 2).join(' ')} failed: ${message.split('\n')[0]}`);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
function safeParseJson(raw) {
|
|
375
|
+
try {
|
|
376
|
+
return JSON.parse(raw);
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
return null;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
//# sourceMappingURL=diff-capture.js.map
|