@made-by-moonlight/athene-plugin-scm-github 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1007 @@
1
+ /**
2
+ * GraphQL Batch PR Enrichment
3
+ *
4
+ * Efficiently fetches data for multiple PRs using GraphQL aliases.
5
+ * Reduces API calls from N×3 to 1 (or a few if batching needed).
6
+ */
7
+ import { execFile } from "node:child_process";
8
+ import { promisify } from "node:util";
9
+ import { execGhObserved, recordActivityEvent, } from "@made-by-moonlight/athene-core";
10
+ import { LRUCache } from "./lru-cache.js";
11
+ let execFileAsync = promisify(execFile);
12
+ let execGhAsync = async (args, timeout, operation) => execGhObserved(args, { component: "scm-github-batch", operation }, timeout);
13
+ /**
14
+ * Set execFileAsync for testing.
15
+ * Allows mocking the underlying execFile in unit tests.
16
+ *
17
+ * NOTE: This bypasses the gh tracer (execGhObserved). Tests that need to
18
+ * verify tracer behavior should mock execGhObserved directly or use
19
+ * setExecGhAsync instead.
20
+ */
21
+ export function setExecFileAsync(fn) {
22
+ execFileAsync = fn;
23
+ execGhAsync = async (args, timeout) => {
24
+ const { stdout } = await execFileAsync("gh", args, {
25
+ maxBuffer: 10 * 1024 * 1024,
26
+ timeout,
27
+ });
28
+ return stdout.trim();
29
+ };
30
+ }
31
+ /**
32
+ * Set execGhAsync for testing — preserves tracer in the call chain.
33
+ * Use this when testing code that should exercise the traced execution path.
34
+ */
35
+ export function setExecGhAsync(fn) {
36
+ execGhAsync = fn;
37
+ }
38
+ /**
39
+ * Configuration constants for cache sizing.
40
+ * LRU cache automatically evicts oldest entries when these limits are reached.
41
+ */
42
+ const MAX_PR_LIST_ETAGS = 100; // Number of repos to cache
43
+ const MAX_COMMIT_STATUS_ETAGS = 500; // Number of commits to cache
44
+ const MAX_REVIEW_COMMENTS_ETAGS = 500; // Number of PRs to cache review ETags
45
+ const MAX_PR_METADATA = 200; // Number of PRs to cache full data
46
+ /**
47
+ * Global ETag cache instance.
48
+ * This is shared across all batch enrichment calls within the process lifecycle.
49
+ * The cache persists between polling cycles to avoid redundant REST/GraphQL calls.
50
+ *
51
+ * Uses LRU eviction to ensure bounded memory usage.
52
+ */
53
+ const etagCache = {
54
+ prList: new LRUCache(MAX_PR_LIST_ETAGS),
55
+ commitStatus: new LRUCache(MAX_COMMIT_STATUS_ETAGS),
56
+ reviewComments: new LRUCache(MAX_REVIEW_COMMENTS_ETAGS),
57
+ };
58
+ /**
59
+ * Clear all ETag cache entries.
60
+ * Useful for testing or when forcing a refresh.
61
+ */
62
+ export function clearETagCache() {
63
+ etagCache.prList.clear();
64
+ etagCache.commitStatus.clear();
65
+ etagCache.reviewComments.clear();
66
+ }
67
+ /**
68
+ * Get PR list ETag for a repository.
69
+ */
70
+ export function getPRListETag(owner, repo) {
71
+ return etagCache.prList.get(`${owner}/${repo}`);
72
+ }
73
+ /**
74
+ * Get commit status ETag for a specific commit.
75
+ */
76
+ export function getCommitStatusETag(owner, repo, sha) {
77
+ return etagCache.commitStatus.get(`${owner}/${repo}#${sha}`);
78
+ }
79
+ /**
80
+ * Set PR list ETag for a repository.
81
+ * Exported for testing.
82
+ */
83
+ export function setPRListETag(owner, repo, etag) {
84
+ etagCache.prList.set(`${owner}/${repo}`, etag);
85
+ }
86
+ /**
87
+ * Set commit status ETag for a specific commit.
88
+ * Exported for testing.
89
+ */
90
+ export function setCommitStatusETag(owner, repo, sha, etag) {
91
+ etagCache.commitStatus.set(`${owner}/${repo}#${sha}`, etag);
92
+ }
93
+ /**
94
+ * Cache for PR metadata needed for ETag guard decisions.
95
+ * Stores head SHA and CI status for each PR.
96
+ * Key: "${owner}/${repo}#${number}"
97
+ *
98
+ * Uses LRU eviction to ensure bounded memory usage.
99
+ */
100
+ const prMetadataCache = new LRUCache(MAX_PR_METADATA);
101
+ /**
102
+ * Cache for full PR enrichment data.
103
+ * Stores the complete PREnrichmentData object for each PR.
104
+ * Used when ETag guard indicates no refresh is needed.
105
+ * Key: "${owner}/${repo}#${number}"
106
+ *
107
+ * Uses LRU eviction to ensure bounded memory usage.
108
+ */
109
+ const prEnrichmentDataCache = new LRUCache(MAX_PR_METADATA);
110
+ /**
111
+ * Update PR metadata cache with latest enrichment data.
112
+ * Called after successful GraphQL batch enrichment.
113
+ */
114
+ function updatePRMetadataCache(prKey, enrichment, headSha) {
115
+ prMetadataCache.set(prKey, {
116
+ headSha,
117
+ ciStatus: enrichment.ciStatus,
118
+ });
119
+ // Also cache the full enrichment data for ETag guard bypass
120
+ prEnrichmentDataCache.set(prKey, enrichment);
121
+ }
122
+ /**
123
+ * 2-Guard ETag Strategy: Check if PR enrichment cache needs refreshing.
124
+ *
125
+ * Before running expensive GraphQL batch queries, use two lightweight REST API
126
+ * ETag checks to detect if anything actually changed:
127
+ *
128
+ * Guard 1: PR List ETag Check (per repo)
129
+ * - Detects: New commits, PR title/body edits, labels changes, reviews, PR state changes
130
+ * - Misses: CI status changes
131
+ *
132
+ * Guard 2: Commit Status ETag Check (per PR with cached metadata)
133
+ * - Checks ALL PRs with cached metadata and head SHA
134
+ * - Detects: CI check starts, passes, fails, or external status updates
135
+ * - Critical for catching CI transitions (failing -> passing, passing -> failing, etc.)
136
+ *
137
+ * @param prs - PRs to check
138
+ * @returns true if GraphQL batch should run, false if nothing changed
139
+ */
140
+ export async function shouldRefreshPREnrichment(prs, extraRepos = [], observer) {
141
+ const details = [];
142
+ let shouldRefresh = false;
143
+ // Group PRs by repository for Guard 1 (PR list check)
144
+ const repos = new Map();
145
+ for (const pr of prs) {
146
+ const repoKey = `${pr.owner}/${pr.repo}`;
147
+ if (!repos.has(repoKey)) {
148
+ repos.set(repoKey, []);
149
+ }
150
+ const repoPrs = repos.get(repoKey);
151
+ if (repoPrs) {
152
+ repoPrs.push(pr);
153
+ }
154
+ }
155
+ // Include repos from PR-less sessions so Guard 1 runs for them too
156
+ for (const repoKey of extraRepos) {
157
+ if (!repos.has(repoKey)) {
158
+ repos.set(repoKey, []);
159
+ }
160
+ }
161
+ if (repos.size === 0) {
162
+ return {
163
+ shouldRefresh: false,
164
+ details: ["No repos to check"],
165
+ prListUnchangedRepos: new Set(),
166
+ };
167
+ }
168
+ // Guard 1: Check PR list ETag for each repository
169
+ let guard1DetectedChanges = false;
170
+ const prListUnchangedRepos = new Set();
171
+ for (const [repoKey] of repos) {
172
+ const [owner, repo] = repoKey.split("/");
173
+ const prListChanged = await checkPRListETag(owner, repo, observer);
174
+ if (prListChanged) {
175
+ guard1DetectedChanges = true;
176
+ shouldRefresh = true;
177
+ details.push(`PR list changed for ${repoKey} (Guard 1)`);
178
+ }
179
+ else {
180
+ prListUnchangedRepos.add(repoKey);
181
+ }
182
+ }
183
+ // Guard 2: Check commit status ETag only when Guard 1 didn't detect changes
184
+ // We check ALL PRs (not just pending) to catch CI status transitions:
185
+ // - failing -> passing (PR becomes merge-ready)
186
+ // - passing -> failing (PR becomes unmergeable)
187
+ // - pending -> passing/failing (CI completes)
188
+ // - passing -> pending (new CI run starts)
189
+ //
190
+ // Guard 2 is only needed when Guard 1 returns 304 (no PR list changes).
191
+ // If Guard 1 detected changes, we're going to refresh all PRs anyway.
192
+ if (!guard1DetectedChanges) {
193
+ for (const pr of prs) {
194
+ const prKey = `${pr.owner}/${pr.repo}#${pr.number}`;
195
+ const cached = prMetadataCache.get(prKey);
196
+ // Check for incomplete cache (cached but no headSha)
197
+ // This happens when PR was cached but headSha wasn't captured
198
+ // We need to refresh to get complete data including headSha
199
+ if (cached && cached.headSha === null) {
200
+ shouldRefresh = true;
201
+ details.push(`First time seeing PR #${pr.number} (Guard 2: no cached head SHA)`);
202
+ continue;
203
+ }
204
+ // Only check commit status ETag if we have cached data with a non-null head SHA
205
+ if (!cached || !cached.headSha) {
206
+ // No cached metadata - skip Guard 2. Since Guard 1 didn't detect changes
207
+ // and we have no cached data, there's nothing to check.
208
+ continue;
209
+ }
210
+ const statusChanged = await checkCommitStatusETag(pr.owner, pr.repo, cached.headSha, observer);
211
+ if (statusChanged) {
212
+ shouldRefresh = true;
213
+ details.push(`CI status changed for ${pr.owner}/${pr.repo}#${pr.number} (Guard 2)`);
214
+ }
215
+ }
216
+ }
217
+ return { shouldRefresh, details, prListUnchangedRepos };
218
+ }
219
+ /**
220
+ * Get cached PR metadata for testing.
221
+ */
222
+ export function getPRMetadataCache() {
223
+ return prMetadataCache.toMap();
224
+ }
225
+ /**
226
+ * Get cached PR enrichment data for testing.
227
+ */
228
+ export function getPREnrichmentDataCache() {
229
+ return prEnrichmentDataCache.toMap();
230
+ }
231
+ /**
232
+ * Set PR metadata for testing.
233
+ */
234
+ export function setPRMetadata(key, metadata) {
235
+ prMetadataCache.set(key, metadata);
236
+ }
237
+ /**
238
+ * Clear PR metadata cache for testing.
239
+ */
240
+ export function clearPRMetadataCache() {
241
+ prMetadataCache.clear();
242
+ prEnrichmentDataCache.clear();
243
+ }
244
+ // Module-level guard so we only emit gh_unavailable once per process.
245
+ // The error is system-wide (gh missing globally), not session-specific.
246
+ let ghUnavailableEmitted = false;
247
+ const batchEnrichPRFailedEmitted = new Set();
248
+ /**
249
+ * Pre-flight check to verify gh CLI is available and authenticated.
250
+ * This prevents silent failures during GraphQL batch queries.
251
+ */
252
+ async function verifyGhCLI() {
253
+ try {
254
+ await execFileAsync("gh", ["--version"], { timeout: 5000 });
255
+ }
256
+ catch (err) {
257
+ if (!ghUnavailableEmitted) {
258
+ ghUnavailableEmitted = true;
259
+ const errorMessage = err instanceof Error ? err.message : String(err);
260
+ recordActivityEvent({
261
+ source: "scm",
262
+ kind: "scm.gh_unavailable",
263
+ level: "error",
264
+ summary: "gh CLI not available or not authenticated",
265
+ data: {
266
+ plugin: "scm-github",
267
+ errorMessage,
268
+ },
269
+ });
270
+ }
271
+ const error = new Error("gh CLI not available or not authenticated. GraphQL batch enrichment requires gh CLI to be installed and configured.");
272
+ error.cause = "GH_CLI_UNAVAILABLE";
273
+ throw error;
274
+ }
275
+ }
276
+ /** Test-only: reset the once-per-process gh_unavailable guard. */
277
+ export function _resetGhUnavailableEmittedForTesting() {
278
+ ghUnavailableEmitted = false;
279
+ }
280
+ /** Test-only: reset the once-per-PR batch extraction failure guard. */
281
+ export function _resetBatchEnrichPRFailedEmittedForTesting() {
282
+ batchEnrichPRFailedEmitted.clear();
283
+ }
284
+ /**
285
+ * Maximum number of PRs to query in a single GraphQL batch.
286
+ * GitHub has limits on query complexity and we stay well under this limit.
287
+ */
288
+ export const MAX_BATCH_SIZE = 25;
289
+ /**
290
+ * Check if an HTTP response contains a 304 Not Modified status.
291
+ * Handles HTTP/1.1, HTTP/2, and HTTP/2.0 status lines.
292
+ */
293
+ function is304(output) {
294
+ return /HTTP\/[\d.]+ 304/i.test(output);
295
+ }
296
+ /**
297
+ * Extract stdout/stderr from an execFile error object.
298
+ * gh cli puts the HTTP response in stdout even on exit code 1 (e.g. 304).
299
+ */
300
+ function extractErrorOutput(err) {
301
+ const e = err;
302
+ const stdout = typeof e.stdout === "string" ? e.stdout : "";
303
+ const stderr = typeof e.stderr === "string" ? e.stderr : "";
304
+ const combined = stdout + stderr;
305
+ return combined.length > 0 ? combined : null;
306
+ }
307
+ /**
308
+ * Extract ETag from HTTP response output.
309
+ * Used on both 200 and 304 paths — RFC 7232 allows servers to rotate
310
+ * the validator on a 304, so we must re-read the ETag even when unchanged.
311
+ */
312
+ function extractETag(output) {
313
+ const match = output.match(/etag:\s*(.+)/i);
314
+ return match ? match[1].trim() : undefined;
315
+ }
316
+ /**
317
+ * Guard 1: PR List ETag Check (per repo)
318
+ *
319
+ * Detects if PR metadata has changed in a repository using REST ETag.
320
+ *
321
+ * - Endpoint: GET /repos/{owner}/{repo}/pulls?state=open&sort=updated&direction=desc
322
+ * - Detects: New commits, PR title/body edits, label changes, reviews, PR state changes
323
+ * - Misses: CI status changes (handled by Guard 2)
324
+ *
325
+ * @returns true if PR list has changed (200 OK), false if unchanged (304 Not Modified)
326
+ */
327
+ async function checkPRListETag(owner, repo, observer) {
328
+ const repoKey = `${owner}/${repo}`;
329
+ const cachedETag = etagCache.prList.get(repoKey);
330
+ // Build gh CLI args for REST API call
331
+ const url = `repos/${repoKey}/pulls?state=open&sort=updated&direction=desc&per_page=1`;
332
+ const args = ["api", "--method", "GET", url, "-i"]; // -i includes headers
333
+ // Add If-None-Match header if we have a cached ETag
334
+ if (cachedETag) {
335
+ args.push("-H", `If-None-Match: ${cachedETag}`);
336
+ }
337
+ try {
338
+ const output = await execGhAsync(args, 10_000, "gh.api.guard-pr-list");
339
+ // Check for HTTP 304 Not Modified response
340
+ if (is304(output)) {
341
+ // Re-read ETag on 304 — RFC 7232 allows rotated validators
342
+ const rotatedETag = extractETag(output);
343
+ if (rotatedETag)
344
+ setPRListETag(owner, repo, rotatedETag);
345
+ return false;
346
+ }
347
+ // Extract new ETag from response headers
348
+ const newETag = extractETag(output);
349
+ if (newETag) {
350
+ setPRListETag(owner, repo, newETag);
351
+ }
352
+ // PR list changed - cost: 1 REST point
353
+ return true;
354
+ }
355
+ catch (err) {
356
+ // gh exits code 1 on 304 Not Modified — check stdout/stderr for the status line
357
+ const output = extractErrorOutput(err);
358
+ if (output && is304(output)) {
359
+ const rotatedETag = extractETag(output);
360
+ if (rotatedETag)
361
+ setPRListETag(owner, repo, rotatedETag);
362
+ return false;
363
+ }
364
+ const errorMsg = err instanceof Error ? err.message : String(err);
365
+ // HTTP 304 may surface as an error message without stdout/stderr (e.g. gh cli versions
366
+ // that don't populate stdout on non-zero exit). Use is304() anchored to the HTTP status
367
+ // line to avoid false positives from URL paths like "pulls/304/comments".
368
+ if (is304(errorMsg)) {
369
+ return false;
370
+ }
371
+ observer?.log("warn", `[ETag Guard 1] PR list check failed for ${repoKey}: ${errorMsg}`);
372
+ return true; // Assume changed to be safe
373
+ }
374
+ }
375
+ /**
376
+ * Guard 2: Check-Runs ETag Check (per PR with cached head SHA)
377
+ *
378
+ * Detects if CI status has changed for a specific commit using REST ETag.
379
+ *
380
+ * - Endpoint: GET /repos/{owner}/{repo}/commits/{head_sha}/check-runs
381
+ * Uses the check-runs endpoint (not legacy /status) because the batch
382
+ * query reads `statusCheckRollup` which aggregates check-runs. Pure-Actions
383
+ * repos only update check-runs, not the legacy combined-status endpoint.
384
+ * - Detects: CI check starts, passes, fails, or external status updates
385
+ *
386
+ * @returns true if CI status has changed (200 OK), false if unchanged (304 Not Modified)
387
+ */
388
+ async function checkCommitStatusETag(owner, repo, sha, observer) {
389
+ const commitKey = `${owner}/${repo}#${sha}`;
390
+ const cachedETag = etagCache.commitStatus.get(commitKey);
391
+ // Use check-runs endpoint (not legacy /status) to match statusCheckRollup
392
+ // data source. per_page=1 keeps the response small — we only need the ETag.
393
+ const url = `repos/${owner}/${repo}/commits/${sha}/check-runs?per_page=1`;
394
+ const args = ["api", "--method", "GET", url, "-i"]; // -i includes headers
395
+ // Add If-None-Match header if we have a cached ETag
396
+ if (cachedETag) {
397
+ args.push("-H", `If-None-Match: ${cachedETag}`);
398
+ }
399
+ try {
400
+ const output = await execGhAsync(args, 10_000, "gh.api.guard-commit-status");
401
+ // Check for HTTP 304 Not Modified response
402
+ if (is304(output)) {
403
+ const rotatedETag = extractETag(output);
404
+ if (rotatedETag)
405
+ setCommitStatusETag(owner, repo, sha, rotatedETag);
406
+ return false;
407
+ }
408
+ // Extract new ETag from response headers
409
+ const newETag = extractETag(output);
410
+ if (newETag) {
411
+ setCommitStatusETag(owner, repo, sha, newETag);
412
+ }
413
+ // CI status changed - cost: 1 REST point
414
+ return true;
415
+ }
416
+ catch (err) {
417
+ // gh exits code 1 on 304 Not Modified — check stdout/stderr for the status line
418
+ const output = extractErrorOutput(err);
419
+ if (output && is304(output)) {
420
+ const rotatedETag = extractETag(output);
421
+ if (rotatedETag)
422
+ setCommitStatusETag(owner, repo, sha, rotatedETag);
423
+ return false;
424
+ }
425
+ const errorMsg = err instanceof Error ? err.message : String(err);
426
+ if (is304(errorMsg)) {
427
+ return false;
428
+ }
429
+ observer?.log("warn", `[ETag Guard 2] Commit status check failed for ${commitKey}: ${errorMsg}`);
430
+ return true; // Assume changed to be safe
431
+ }
432
+ }
433
+ /**
434
+ * Guard 3: Review Comments ETag Check (per PR)
435
+ *
436
+ * Detects if inline review comments have changed on a PR.
437
+ * Used to gate the getReviewThreads GraphQL call — if no new comments
438
+ * exist (304), the cached result is reused without a GraphQL call.
439
+ *
440
+ * - Endpoint: GET /repos/{owner}/{repo}/pulls/{number}/comments
441
+ * No per_page limit — the ETag covers the full resource. With per_page=1,
442
+ * the ETag only covers the first page, so new comments on page 2+ would
443
+ * never bust the validator. Typical PR comment counts are small (<100)
444
+ * so the unbounded list is fine.
445
+ * - Detects: New review comments, edited comments, deleted comments
446
+ * - Cost: 0 REST points on 304, 1 REST point on 200
447
+ */
448
+ export async function checkReviewCommentsETag(owner, repo, prNumber, observer) {
449
+ const cacheKey = `${owner}/${repo}#${prNumber}`;
450
+ const cachedETag = etagCache.reviewComments.get(cacheKey);
451
+ const url = `repos/${owner}/${repo}/pulls/${prNumber}/comments`;
452
+ const args = ["api", "--method", "GET", url, "-i"];
453
+ if (cachedETag) {
454
+ args.push("-H", `If-None-Match: ${cachedETag}`);
455
+ }
456
+ try {
457
+ const output = await execGhAsync(args, 10_000, "gh.api.guard-review-comments");
458
+ if (is304(output)) {
459
+ const rotatedETag = extractETag(output);
460
+ if (rotatedETag)
461
+ etagCache.reviewComments.set(cacheKey, rotatedETag);
462
+ return false;
463
+ }
464
+ const newETag = extractETag(output);
465
+ if (newETag) {
466
+ etagCache.reviewComments.set(cacheKey, newETag);
467
+ }
468
+ return true;
469
+ }
470
+ catch (err) {
471
+ const output = extractErrorOutput(err);
472
+ if (output && is304(output)) {
473
+ const rotatedETag = extractETag(output);
474
+ if (rotatedETag)
475
+ etagCache.reviewComments.set(cacheKey, rotatedETag);
476
+ return false;
477
+ }
478
+ const errorMsg = err instanceof Error ? err.message : String(err);
479
+ if (is304(errorMsg)) {
480
+ return false;
481
+ }
482
+ observer?.log("warn", `[ETag Guard 3] Review comments check failed for ${cacheKey}: ${errorMsg}`);
483
+ return true; // Assume changed to be safe
484
+ }
485
+ }
486
+ /**
487
+ * GraphQL fields to fetch for each PR.
488
+ * This includes all data needed for orchestrator status detection.
489
+ * Includes head SHA for ETag Guard 2 (commit status checks).
490
+ */
491
+ const PR_FIELDS = `
492
+ title
493
+ state
494
+ additions
495
+ deletions
496
+ isDraft
497
+ mergeable
498
+ mergeStateStatus
499
+ reviewDecision
500
+ headRefName
501
+ headRefOid
502
+ commits(last: 1) {
503
+ nodes {
504
+ commit {
505
+ statusCheckRollup {
506
+ state
507
+ # 11 keeps per-PR node cost under budget for 25-PR batch queries
508
+ # (total cost ≤5000). Repos with >11 checks lose individual check
509
+ # visibility, but the rollup "state" still reflects all checks —
510
+ # overall pass/fail detection remains correct.
511
+ contexts(first: 11) {
512
+ nodes {
513
+ ... on CheckRun {
514
+ name
515
+ status
516
+ conclusion
517
+ detailsUrl
518
+ }
519
+ ... on StatusContext {
520
+ context
521
+ state
522
+ targetUrl
523
+ }
524
+ }
525
+ pageInfo {
526
+ hasNextPage
527
+ }
528
+ }
529
+ }
530
+ }
531
+ }
532
+ }
533
+ `;
534
+ /**
535
+ * Generate a GraphQL batch query for multiple PRs using aliases.
536
+ *
537
+ * Each PR gets a unique alias (pr0, pr1, pr2...) and the query
538
+ * fetches the same fields for each PR.
539
+ */
540
+ export function generateBatchQuery(prs) {
541
+ // Handle empty array - return empty query to be handled by caller
542
+ if (prs.length === 0) {
543
+ return {
544
+ query: "",
545
+ variables: {},
546
+ };
547
+ }
548
+ const selections = [];
549
+ const variables = {};
550
+ prs.forEach((pr, i) => {
551
+ const alias = `pr${i}`;
552
+ // Using inline fragments to handle nullable repository type
553
+ selections.push(`
554
+ ${alias}: repository(owner: $${alias}Owner, name: $${alias}Name) {
555
+ ... on Repository {
556
+ pullRequest(number: $${alias}Number) { ${PR_FIELDS} }
557
+ }
558
+ }
559
+ `);
560
+ variables[`${alias}Owner`] = pr.owner;
561
+ variables[`${alias}Name`] = pr.repo;
562
+ variables[`${alias}Number`] = pr.number;
563
+ });
564
+ const variableDefs = Object.entries(variables)
565
+ .map(([key, value]) => `$${key}: ${typeof value === "number" ? "Int!" : "String!"}`)
566
+ .join(", ");
567
+ return {
568
+ query: `query BatchPRs(${variableDefs}) {
569
+ ${selections.join("\n")}
570
+ rateLimit { cost remaining resetAt }
571
+ }`,
572
+ variables,
573
+ };
574
+ }
575
+ /**
576
+ * Execute a GraphQL batch query using the gh CLI.
577
+ *
578
+ * @throws Error if the query fails with GraphQL errors or parsing issues.
579
+ */
580
+ async function executeBatchQuery(prs) {
581
+ const { query, variables } = generateBatchQuery(prs);
582
+ // Handle empty array - no query needed
583
+ if (!query || prs.length === 0) {
584
+ return {};
585
+ }
586
+ // Pre-flight check to verify gh CLI is available
587
+ await verifyGhCLI();
588
+ // Build gh CLI arguments with variables
589
+ const varArgs = [];
590
+ for (const [key, value] of Object.entries(variables)) {
591
+ if (typeof value === "string") {
592
+ varArgs.push("-f", `${key}=${value}`);
593
+ }
594
+ else {
595
+ varArgs.push("-F", `${key}=${value}`);
596
+ }
597
+ }
598
+ const args = ["api", "graphql", "-i", ...varArgs, "-f", `query=${query}`];
599
+ // Scale timeout based on batch size to prevent large batches from timing out
600
+ // Base: 30s, +2s per PR beyond first 10
601
+ const batchSize = prs.length;
602
+ const adaptiveTimeout = 30_000 + Math.max(0, (batchSize - 10) * 2000);
603
+ const stdout = await execGhAsync(args, adaptiveTimeout, "gh.api.graphql-batch");
604
+ // With -i, stdout contains HTTP headers + blank line + JSON body.
605
+ // Split at first blank line to get the JSON body for parsing.
606
+ // The tracer (execGhObserved) already parses the headers for its trace row.
607
+ const blankLineIdx = stdout.indexOf("\r\n\r\n");
608
+ const altBlankLineIdx = stdout.indexOf("\n\n");
609
+ const splitIdx = blankLineIdx >= 0 && (altBlankLineIdx < 0 || blankLineIdx < altBlankLineIdx)
610
+ ? blankLineIdx + 4
611
+ : altBlankLineIdx >= 0
612
+ ? altBlankLineIdx + 2
613
+ : 0;
614
+ const body = splitIdx > 0 ? stdout.slice(splitIdx) : stdout;
615
+ const result = JSON.parse(body.trim());
616
+ // Check for GraphQL errors and throw to allow individual API fallback
617
+ if (result.errors && result.errors.length > 0) {
618
+ const errorMsg = result.errors.map((e) => e.message).join("; ");
619
+ throw new Error(`GraphQL query errors: ${errorMsg}`);
620
+ }
621
+ return (result.data ?? {});
622
+ }
623
+ /**
624
+ * Parse individual CI check contexts from statusCheckRollup.contexts.nodes.
625
+ * Handles both CheckRun (GitHub Actions) and StatusContext (legacy status checks).
626
+ */
627
+ function parseCheckContexts(contexts) {
628
+ if (!contexts || typeof contexts !== "object")
629
+ return [];
630
+ const nodes = contexts["nodes"];
631
+ if (!Array.isArray(nodes))
632
+ return [];
633
+ const checks = [];
634
+ for (const node of nodes) {
635
+ if (!node || typeof node !== "object")
636
+ continue;
637
+ const n = node;
638
+ // CheckRun node (GitHub Actions)
639
+ if (typeof n["name"] === "string" && typeof n["status"] === "string") {
640
+ const rawStatus = n["status"].toUpperCase();
641
+ // Uppercase conclusion to match REST getCIChecks/getCIChecksFromStatusRollup format
642
+ // so fingerprints are consistent regardless of which data source is used.
643
+ const rawConclusion = typeof n["conclusion"] === "string" ? n["conclusion"].toUpperCase() : null;
644
+ let status;
645
+ if (rawStatus === "COMPLETED") {
646
+ if (rawConclusion === "SUCCESS") {
647
+ status = "passed";
648
+ }
649
+ else if (rawConclusion === "SKIPPED" ||
650
+ rawConclusion === "NEUTRAL" ||
651
+ rawConclusion === "STALE" ||
652
+ rawConclusion === "NOT_REQUIRED" ||
653
+ rawConclusion === "NONE") {
654
+ // Mirror mapRawCheckStateToStatus() in the REST path: all non-failure
655
+ // terminal conclusions that are not SUCCESS map to "skipped".
656
+ status = "skipped";
657
+ }
658
+ else if (rawConclusion === "FAILURE" ||
659
+ rawConclusion === "TIMED_OUT" ||
660
+ rawConclusion === "CANCELLED" ||
661
+ rawConclusion === "ACTION_REQUIRED" ||
662
+ rawConclusion === "ERROR") {
663
+ // Explicit failure conclusions — mirrors the failure list in mapRawCheckStateToStatus()
664
+ status = "failed";
665
+ }
666
+ else {
667
+ // STARTUP_FAILURE and any other unrecognized conclusion → "skipped",
668
+ // matching mapRawCheckStateToStatus()'s default return "skipped" in the REST path.
669
+ status = "skipped";
670
+ }
671
+ }
672
+ else if (rawStatus === "IN_PROGRESS") {
673
+ // Only IN_PROGRESS maps to "running" — matches mapRawCheckStateToStatus() in REST path
674
+ status = "running";
675
+ }
676
+ else {
677
+ // QUEUED, WAITING, and any other non-COMPLETED status → "pending"
678
+ // (REST path maps QUEUED/WAITING to "pending", not "running")
679
+ status = "pending";
680
+ }
681
+ checks.push({
682
+ name: n["name"],
683
+ status,
684
+ // Store the uppercased conclusion to match REST format
685
+ conclusion: rawConclusion ?? undefined,
686
+ url: typeof n["detailsUrl"] === "string" ? n["detailsUrl"] : undefined,
687
+ });
688
+ continue;
689
+ }
690
+ // StatusContext node (legacy commit statuses)
691
+ if (typeof n["context"] === "string" && typeof n["state"] === "string") {
692
+ const rawState = n["state"].toUpperCase();
693
+ let status;
694
+ if (rawState === "SUCCESS") {
695
+ status = "passed";
696
+ }
697
+ else if (rawState === "FAILURE" || rawState === "ERROR") {
698
+ status = "failed";
699
+ }
700
+ else {
701
+ status = "pending";
702
+ }
703
+ // Set conclusion to match the REST getCIChecksFromStatusRollup format
704
+ // (which sets conclusion = rawState.toUpperCase()) so fingerprints are
705
+ // consistent regardless of which data source is used.
706
+ checks.push({
707
+ name: n["context"],
708
+ status,
709
+ conclusion: rawState,
710
+ url: typeof n["targetUrl"] === "string" ? n["targetUrl"] : undefined,
711
+ });
712
+ }
713
+ }
714
+ return checks;
715
+ }
716
+ /**
717
+ * Parse raw CI state from status check rollup.
718
+ *
719
+ * Uses only the top-level aggregate state to determine overall CI status.
720
+ * Individual check details are parsed separately via parseCheckContexts().
721
+ */
722
+ function parseCIState(statusCheckRollup) {
723
+ if (!statusCheckRollup || typeof statusCheckRollup !== "object") {
724
+ return "none";
725
+ }
726
+ const rollup = statusCheckRollup;
727
+ const state = typeof rollup["state"] === "string" ? rollup["state"].toUpperCase() : "";
728
+ // Map GitHub's statusCheckRollup.state to our CIStatus enum
729
+ // This top-level state aggregates all individual checks and is
730
+ // significantly cheaper than fetching contexts (10 points vs 50+ per PR)
731
+ if (state === "SUCCESS")
732
+ return "passing";
733
+ if (state === "FAILURE")
734
+ return "failing";
735
+ if (state === "ERROR")
736
+ return "failing";
737
+ if (state === "PENDING" || state === "EXPECTED")
738
+ return "pending";
739
+ if (state === "TIMED_OUT" || state === "CANCELLED" || state === "ACTION_REQUIRED")
740
+ return "failing";
741
+ if (state === "QUEUED" || state === "IN_PROGRESS" || state === "WAITING")
742
+ return "pending";
743
+ return "none";
744
+ }
745
+ /**
746
+ * Parse review decision from GraphQL response.
747
+ */
748
+ function parseReviewDecision(reviewDecision) {
749
+ const decision = typeof reviewDecision === "string" ? reviewDecision.toUpperCase() : "";
750
+ if (decision === "APPROVED")
751
+ return "approved";
752
+ if (decision === "CHANGES_REQUESTED")
753
+ return "changes_requested";
754
+ if (decision === "REVIEW_REQUIRED")
755
+ return "pending";
756
+ return "none";
757
+ }
758
+ /**
759
+ * Parse PR state from GraphQL response.
760
+ */
761
+ function parsePRState(state) {
762
+ const s = typeof state === "string" ? state.toUpperCase() : "";
763
+ if (s === "MERGED")
764
+ return "merged";
765
+ if (s === "CLOSED")
766
+ return "closed";
767
+ return "open";
768
+ }
769
+ /**
770
+ * Extract enrichment data from a single PR result.
771
+ *
772
+ * Returns the enrichment data along with the head SHA for ETag caching.
773
+ */
774
+ function extractPREnrichment(pullRequest) {
775
+ if (!pullRequest || typeof pullRequest !== "object") {
776
+ return null;
777
+ }
778
+ const pr = pullRequest;
779
+ // Check for at least one required field to validate this is a valid PR object
780
+ if (pr["state"] === undefined && pr["title"] === undefined && pr["commits"] === undefined) {
781
+ return null;
782
+ }
783
+ const state = parsePRState(pr["state"]);
784
+ // Extract basic info
785
+ const title = typeof pr["title"] === "string" ? pr["title"] : undefined;
786
+ const additions = typeof pr["additions"] === "number" ? pr["additions"] : 0;
787
+ const deletions = typeof pr["deletions"] === "number" ? pr["deletions"] : 0;
788
+ const isDraft = pr["isDraft"] === true;
789
+ // Extract head SHA for ETag Guard 2
790
+ const headSha = typeof pr["headRefOid"] === "string"
791
+ ? pr["headRefOid"]
792
+ : typeof pr["headSha"] === "string"
793
+ ? pr["headSha"]
794
+ : null;
795
+ // Extract merge info
796
+ const mergeable = pr["mergeable"];
797
+ const mergeStateStatus = typeof pr["mergeStateStatus"] === "string" ? pr["mergeStateStatus"].toUpperCase() : "";
798
+ const hasConflicts = mergeable === "CONFLICTING";
799
+ const isBehind = mergeStateStatus === "BEHIND";
800
+ // Extract review decision
801
+ const reviewDecision = parseReviewDecision(pr["reviewDecision"]);
802
+ // Extract CI status and individual checks from commits
803
+ const commits = pr["commits"];
804
+ const statusCheckRollup = commits?.nodes?.[0]?.commit?.statusCheckRollup;
805
+ const ciStatus = statusCheckRollup ? parseCIState(statusCheckRollup) : "none";
806
+ // Only include ciChecks when the list is complete (no truncation).
807
+ // contexts(first: 20) silently truncates PRs with >20 checks — when truncated,
808
+ // the failing check may be missing, so we set ciChecks to undefined to force
809
+ // the getCIChecks() REST fallback in maybeDispatchCIFailureDetails.
810
+ const contextsField = statusCheckRollup?.["contexts"];
811
+ const pageInfo = contextsField?.["pageInfo"];
812
+ const contextsHasNextPage = pageInfo !== null &&
813
+ pageInfo !== undefined &&
814
+ typeof pageInfo === "object" &&
815
+ pageInfo["hasNextPage"] === true;
816
+ const ciChecks = contextsField && !contextsHasNextPage ? parseCheckContexts(contextsField) : undefined;
817
+ // Build blockers list
818
+ const blockers = [];
819
+ if (ciStatus === "failing")
820
+ blockers.push("CI is failing");
821
+ if (reviewDecision === "changes_requested")
822
+ blockers.push("Changes requested in review");
823
+ if (reviewDecision === "pending")
824
+ blockers.push("Review required");
825
+ if (hasConflicts)
826
+ blockers.push("Merge conflicts");
827
+ if (isBehind)
828
+ blockers.push("Branch is behind base branch");
829
+ if (isDraft)
830
+ blockers.push("PR is still a draft");
831
+ // Determine if mergeable based on all conditions
832
+ // Merged/closed PRs are not considered mergeable for new changes
833
+ const isMergeableState = state === "open";
834
+ // Treat ciStatus "none" as passing (no CI checks configured), matching individual getMergeability
835
+ const ciPassing = ciStatus === "passing" || ciStatus === "none";
836
+ const mergeReady = isMergeableState &&
837
+ ciPassing &&
838
+ (reviewDecision === "approved" || reviewDecision === "none") &&
839
+ !hasConflicts &&
840
+ !isBehind &&
841
+ !isDraft;
842
+ const data = {
843
+ state,
844
+ ciStatus,
845
+ reviewDecision,
846
+ mergeable: mergeReady,
847
+ title,
848
+ additions,
849
+ deletions,
850
+ isDraft,
851
+ hasConflicts,
852
+ isBehind,
853
+ blockers,
854
+ ...(ciChecks !== undefined ? { ciChecks } : {}),
855
+ };
856
+ return { data, headSha };
857
+ }
858
+ /**
859
+ * Main batch enrichment function with 2-Guard ETag Strategy.
860
+ *
861
+ * Before running expensive GraphQL batch queries, uses two lightweight REST API
862
+ * ETag checks to detect if anything actually changed:
863
+ *
864
+ * 1. Guard 1: PR List ETag Check (per repo)
865
+ * - Detects PR metadata changes (commits, reviews, labels, state)
866
+ * - Cost: 1 REST point if changed, 0 if unchanged (304)
867
+ *
868
+ * 2. Guard 2: Commit Status ETag Check (per PR with pending CI)
869
+ * - Detects CI status changes for PRs with pending CI
870
+ * - Cost: 1 REST point if changed, 0 if unchanged (304)
871
+ *
872
+ * If guards indicate no changes, skips GraphQL entirely (saves ~50 points per batch).
873
+ * If any guard detects a change, runs GraphQL batch queries.
874
+ *
875
+ * Returns a Map keyed by "${owner}/${repo}#${number}" for efficient lookup.
876
+ */
877
+ export async function enrichSessionsPRBatch(prs, observer, repos = []) {
878
+ const result = new Map();
879
+ // Step 1: Check if we need to refresh using 2-Guard ETag Strategy
880
+ // Guard 1 runs for all repos (including those with no PRs yet) so the
881
+ // lifecycle manager knows whether detectPR can be skipped.
882
+ const guardResult = await shouldRefreshPREnrichment(prs, repos, observer);
883
+ // Report which repos had no PR list changes so the lifecycle can skip detectPR
884
+ observer?.reportPRListUnchangedRepos?.(guardResult.prListUnchangedRepos);
885
+ if (!guardResult.shouldRefresh) {
886
+ // No changes detected - try to return cached data
887
+ // If any PRs are missing from cache, we need to fetch them via GraphQL
888
+ const missingPRs = [];
889
+ for (const pr of prs) {
890
+ const prKey = `${pr.owner}/${pr.repo}#${pr.number}`;
891
+ const cachedData = prEnrichmentDataCache.get(prKey);
892
+ if (cachedData) {
893
+ result.set(prKey, cachedData);
894
+ }
895
+ else {
896
+ missingPRs.push(pr);
897
+ }
898
+ }
899
+ if (missingPRs.length === 0) {
900
+ // All PRs cached - return cached data
901
+ observer?.log("info", `[ETag Guard] Skipping GraphQL batch - all ${result.size} PRs cached. Reasons: ${guardResult.details.join(", ")}`);
902
+ return { enrichment: result, prListUnchangedRepos: guardResult.prListUnchangedRepos };
903
+ }
904
+ // Some PRs not cached - fetch missing PRs via GraphQL
905
+ observer?.log("info", `[ETag Guard] Partial cache: ${result.size} cached, ${missingPRs.length} missing. Fetching missing PRs via GraphQL.`);
906
+ prs = missingPRs; // Update to only fetch missing PRs
907
+ // Continue to GraphQL batch processing below
908
+ }
909
+ // Step 2: Split into batches if we have too many PRs
910
+ const batches = [];
911
+ for (let i = 0; i < prs.length; i += MAX_BATCH_SIZE) {
912
+ batches.push(prs.slice(i, i + MAX_BATCH_SIZE));
913
+ }
914
+ // Step 3: Execute each batch
915
+ for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
916
+ const batch = batches[batchIndex];
917
+ const prCountBefore = result.size;
918
+ const batchStartTime = Date.now();
919
+ let batchDuration;
920
+ try {
921
+ const data = await executeBatchQuery(batch);
922
+ batchDuration = Date.now() - batchStartTime;
923
+ // Extract results for each PR in the batch
924
+ batch.forEach((pr, index) => {
925
+ const alias = `pr${index}`;
926
+ const prKey = `${pr.owner}/${pr.repo}#${pr.number}`;
927
+ const repositoryData = data[alias];
928
+ if (repositoryData?.pullRequest) {
929
+ const extracted = extractPREnrichment(repositoryData.pullRequest);
930
+ if (extracted) {
931
+ const { data: enrichment, headSha } = extracted;
932
+ result.set(prKey, enrichment);
933
+ // Update PR metadata cache for future ETag checks
934
+ updatePRMetadataCache(prKey, enrichment, headSha);
935
+ }
936
+ else {
937
+ // GraphQL returned a PR object but extractPREnrichment couldn't
938
+ // parse it (missing fields, schema drift). Distinct from the
939
+ // whole-batch failure D02 catches further down.
940
+ if (!batchEnrichPRFailedEmitted.has(prKey)) {
941
+ batchEnrichPRFailedEmitted.add(prKey);
942
+ recordActivityEvent({
943
+ source: "scm",
944
+ kind: "scm.batch_enrich_pr_failed",
945
+ level: "warn",
946
+ summary: `batch enrich extraction failed for PR #${pr.number}`,
947
+ data: {
948
+ plugin: "scm-github",
949
+ prNumber: pr.number,
950
+ prOwner: pr.owner,
951
+ prRepo: pr.repo,
952
+ },
953
+ });
954
+ }
955
+ }
956
+ }
957
+ else {
958
+ // PR not found (deleted/closed/permission issue)
959
+ // Don't add to result or cache.
960
+ // This allows lifecycle-manager to fall back to individual API calls
961
+ // which can better handle permissions/edge cases.
962
+ // The batch will succeed with fewer PRs, and missing PRs
963
+ // will trigger the fallback path on the next poll.
964
+ }
965
+ });
966
+ // Log observability metric for successful batch
967
+ const prCountAfter = result.size;
968
+ if (prCountAfter > prCountBefore) {
969
+ const successData = {
970
+ batchIndex,
971
+ totalBatches: batches.length,
972
+ prCount: prCountAfter - prCountBefore,
973
+ durationMs: batchDuration,
974
+ };
975
+ observer?.recordSuccess(successData);
976
+ observer?.log("info", `[GraphQL Batch Success] Batch ${batchIndex + 1}/${batches.length} succeeded: added ${prCountAfter - prCountBefore} PRs to cache (${batchDuration}ms)`);
977
+ }
978
+ }
979
+ catch (err) {
980
+ // Calculate duration even on failure
981
+ batchDuration = Date.now() - batchStartTime;
982
+ // Record failure for observability
983
+ const errorMsg = err instanceof Error ? err.message : String(err);
984
+ observer?.recordFailure({
985
+ batchIndex,
986
+ totalBatches: batches.length,
987
+ prCount: batch.length,
988
+ error: errorMsg,
989
+ durationMs: batchDuration,
990
+ });
991
+ // Log error for observability but don't fail entirely
992
+ observer?.log("error", `[GraphQL Batch] Batch enrichment partially failed: ${errorMsg}`);
993
+ // Don't add placeholder entries to result or cache.
994
+ // This allows lifecycle-manager to fall back to individual API calls
995
+ // for PRs in the failed batch on the next poll.
996
+ // Return only the partial results we successfully fetched.
997
+ // Continue to next batch instead of throwing to allow partial success.
998
+ // Continue with next batch
999
+ }
1000
+ }
1001
+ return { enrichment: result, prListUnchangedRepos: guardResult.prListUnchangedRepos };
1002
+ }
1003
+ // Export internal functions for testing
1004
+ export { parseCIState, parseReviewDecision, parsePRState, extractPREnrichment, checkPRListETag, checkCommitStatusETag,
1005
+ // shouldRefreshPREnrichment is already exported as async function
1006
+ updatePRMetadataCache, };
1007
+ //# sourceMappingURL=graphql-batch.js.map