@oss-scout/core 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Anti-LLM Policy — scans repo policy docs (CONTRIBUTING.md, CODE_OF_CONDUCT.md,
3
+ * README.md) for keywords that signal an anti-AI / anti-LLM contribution policy
4
+ * (e.g. "no AI-generated code", "human-authored only", "no Copilot contributions").
5
+ *
6
+ * The keyword table lives here as a single source of truth so consumers
7
+ * can rely on a structured `AntiLLMPolicyResult` rather than re-implementing
8
+ * the scan in agent prose.
9
+ */
10
+ import { Octokit } from "@octokit/rest";
11
+ import type { AntiLLMPolicyResult } from "./types.js";
12
+ /**
13
+ * Conservative anti-LLM keyword phrases. Each entry is a lowercase substring
14
+ * that — when present in policy text — is a strong signal of an anti-AI policy.
15
+ * Phrases are deliberately narrow to avoid flagging "we use Copilot internally"
16
+ * style mentions; the table can grow as new patterns are observed.
17
+ */
18
+ export declare const ANTI_LLM_KEYWORDS: readonly string[];
19
+ /**
20
+ * Pure scan: does this text contain any anti-LLM keyword?
21
+ * Case-insensitive; returns the matched keywords (deduped, in table order).
22
+ */
23
+ export declare function scanForAntiLLMPolicy(text: string): {
24
+ matched: boolean;
25
+ matchedKeywords: string[];
26
+ };
27
+ /**
28
+ * Optional caller hints to avoid duplicate fetches.
29
+ *
30
+ * `contributingText`:
31
+ * - `string` — caller already fetched CONTRIBUTING; scan this text directly.
32
+ * - `null` — caller fetched and CONTRIBUTING is known absent; skip the family.
33
+ * - `undefined` (omitted) — fetch as normal.
34
+ *
35
+ * Note: the per-repo result cache (1-hour TTL) is consulted before this hint.
36
+ * On a cache hit the cached result wins regardless of what is passed here.
37
+ */
38
+ export interface AntiLLMPolicyOptions {
39
+ contributingText?: string | null;
40
+ }
41
+ /**
42
+ * Fetch CONTRIBUTING/CODE_OF_CONDUCT/README in priority order and return the
43
+ * first family whose text matches an anti-LLM keyword. Returns
44
+ * `{matched: false, matchedKeywords: [], sourceFile: null}` when no source
45
+ * file matches. Cached per-repo for POLICY_SCAN_CACHE_TTL_MS.
46
+ *
47
+ * Sequential by design: if CONTRIBUTING throws auth/rate-limit, we want to
48
+ * short-circuit rather than burn API budget on COC + README probes.
49
+ */
50
+ export declare function fetchAndScanAntiLLMPolicy(octokit: Octokit, owner: string, repo: string, options?: AntiLLMPolicyOptions): Promise<AntiLLMPolicyResult>;
@@ -0,0 +1,207 @@
1
+ /**
2
+ * Anti-LLM Policy — scans repo policy docs (CONTRIBUTING.md, CODE_OF_CONDUCT.md,
3
+ * README.md) for keywords that signal an anti-AI / anti-LLM contribution policy
4
+ * (e.g. "no AI-generated code", "human-authored only", "no Copilot contributions").
5
+ *
6
+ * The keyword table lives here as a single source of truth so consumers
7
+ * can rely on a structured `AntiLLMPolicyResult` rather than re-implementing
8
+ * the scan in agent prose.
9
+ */
10
+ import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
11
+ import { warn } from "./logger.js";
12
+ import { getHttpCache } from "./http-cache.js";
13
+ const MODULE = "anti-llm-policy";
14
+ /** TTL for cached anti-LLM policy scan results (1 hour). Policy docs change rarely. */
15
+ const POLICY_SCAN_CACHE_TTL_MS = 60 * 60 * 1000;
16
+ /**
17
+ * Conservative anti-LLM keyword phrases. Each entry is a lowercase substring
18
+ * that — when present in policy text — is a strong signal of an anti-AI policy.
19
+ * Phrases are deliberately narrow to avoid flagging "we use Copilot internally"
20
+ * style mentions; the table can grow as new patterns are observed.
21
+ */
22
+ export const ANTI_LLM_KEYWORDS = [
23
+ "no ai-generated",
24
+ "no ai generated",
25
+ "no ai-assisted",
26
+ "no ai assisted",
27
+ "no llm-generated",
28
+ "no llm generated",
29
+ "no copilot-generated",
30
+ "no chatgpt-generated",
31
+ "human-authored only",
32
+ "human authored only",
33
+ "human-written only",
34
+ "human written only",
35
+ "ai-free contributions",
36
+ "llm-free contributions",
37
+ "ai-generated code is not allowed",
38
+ "ai-generated code will not be accepted",
39
+ "do not submit ai-generated",
40
+ "do not submit llm-generated",
41
+ "do not use ai to",
42
+ "do not use llms",
43
+ "do not use copilot",
44
+ "do not use chatgpt",
45
+ "without ai assistance",
46
+ "without llm assistance",
47
+ "no use of generative ai",
48
+ "ban on ai-generated",
49
+ "prohibit ai-generated",
50
+ "prohibits ai-generated",
51
+ ];
52
+ /**
53
+ * Pure scan: does this text contain any anti-LLM keyword?
54
+ * Case-insensitive; returns the matched keywords (deduped, in table order).
55
+ */
56
+ export function scanForAntiLLMPolicy(text) {
57
+ if (!text)
58
+ return { matched: false, matchedKeywords: [] };
59
+ const haystack = text.toLowerCase();
60
+ const matchedKeywords = ANTI_LLM_KEYWORDS.filter((kw) => haystack.includes(kw));
61
+ return { matched: matchedKeywords.length > 0, matchedKeywords };
62
+ }
63
+ /** Source-file probe families, in priority order. First match wins. */
64
+ const SOURCE_FILE_FAMILIES = [
65
+ {
66
+ canonical: "CONTRIBUTING.md",
67
+ paths: [
68
+ "CONTRIBUTING.md",
69
+ ".github/CONTRIBUTING.md",
70
+ "docs/CONTRIBUTING.md",
71
+ "contributing.md",
72
+ ],
73
+ },
74
+ {
75
+ canonical: "CODE_OF_CONDUCT.md",
76
+ paths: [
77
+ "CODE_OF_CONDUCT.md",
78
+ ".github/CODE_OF_CONDUCT.md",
79
+ "docs/CODE_OF_CONDUCT.md",
80
+ "code_of_conduct.md",
81
+ ],
82
+ },
83
+ {
84
+ canonical: "README.md",
85
+ paths: ["README.md", "readme.md", "Readme.md"],
86
+ },
87
+ ];
88
+ /**
89
+ * Fetch one path's raw text content. The `transient` flag distinguishes a
90
+ * clean miss (404 — file absent) from a degraded miss (5xx, network) so the
91
+ * caller can decide whether to cache "no policy" or retry. Throws on
92
+ * 401/auth and rate-limit per documented project error strategy.
93
+ */
94
+ async function fetchFileText(octokit, owner, repo, path) {
95
+ try {
96
+ const { data } = await octokit.repos.getContent({ owner, repo, path });
97
+ if ("content" in data && typeof data.content === "string") {
98
+ return {
99
+ text: Buffer.from(data.content, "base64").toString("utf-8"),
100
+ transient: false,
101
+ };
102
+ }
103
+ return { text: null, transient: false };
104
+ }
105
+ catch (error) {
106
+ const status = getHttpStatusCode(error);
107
+ if (status === 404)
108
+ return { text: null, transient: false };
109
+ if (status === 401 || isRateLimitError(error))
110
+ throw error;
111
+ warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
112
+ return { text: null, transient: true };
113
+ }
114
+ }
115
+ /**
116
+ * Fetch the first available file from a family. Probes are issued in parallel,
117
+ * but auth/rate-limit rejections re-throw so the IssueVetter's existing
118
+ * rate-limit handling kicks in instead of silently caching a wrong answer.
119
+ */
120
+ async function fetchFamilyText(octokit, owner, repo, paths) {
121
+ const results = await Promise.allSettled(paths.map((p) => fetchFileText(octokit, owner, repo, p)));
122
+ let hadTransientFailure = false;
123
+ for (const result of results) {
124
+ if (result.status === "fulfilled") {
125
+ if (result.value.transient)
126
+ hadTransientFailure = true;
127
+ if (result.value.text)
128
+ return { text: result.value.text, hadTransientFailure };
129
+ }
130
+ else {
131
+ // Re-throw so vetIssuesParallel's isRateLimitError classifier sees it.
132
+ if (isRateLimitError(result.reason) ||
133
+ getHttpStatusCode(result.reason) === 401) {
134
+ throw result.reason;
135
+ }
136
+ hadTransientFailure = true;
137
+ }
138
+ }
139
+ return { text: null, hadTransientFailure };
140
+ }
141
+ /** Cached value passes runtime shape checks for AntiLLMPolicyResult. */
142
+ function isAntiLLMPolicyResult(value) {
143
+ if (!value || typeof value !== "object")
144
+ return false;
145
+ const v = value;
146
+ if (typeof v.matched !== "boolean")
147
+ return false;
148
+ if (!Array.isArray(v.matchedKeywords))
149
+ return false;
150
+ if (v.sourceFile !== null && typeof v.sourceFile !== "string")
151
+ return false;
152
+ return true;
153
+ }
154
+ /**
155
+ * Fetch CONTRIBUTING/CODE_OF_CONDUCT/README in priority order and return the
156
+ * first family whose text matches an anti-LLM keyword. Returns
157
+ * `{matched: false, matchedKeywords: [], sourceFile: null}` when no source
158
+ * file matches. Cached per-repo for POLICY_SCAN_CACHE_TTL_MS.
159
+ *
160
+ * Sequential by design: if CONTRIBUTING throws auth/rate-limit, we want to
161
+ * short-circuit rather than burn API budget on COC + README probes.
162
+ */
163
+ export async function fetchAndScanAntiLLMPolicy(octokit, owner, repo, options) {
164
+ const cache = getHttpCache();
165
+ const cacheKey = `anti-llm-policy:${owner}/${repo}`;
166
+ const cached = cache.getIfFresh(cacheKey, POLICY_SCAN_CACHE_TTL_MS);
167
+ if (isAntiLLMPolicyResult(cached))
168
+ return cached;
169
+ let anyTransientFailure = false;
170
+ for (const family of SOURCE_FILE_FAMILIES) {
171
+ let text;
172
+ let hadTransientFailure = false;
173
+ if (family.canonical === "CONTRIBUTING.md" &&
174
+ options?.contributingText !== undefined) {
175
+ // Use caller-provided text. null = known absent, string = use directly.
176
+ text = options.contributingText;
177
+ }
178
+ else {
179
+ ({ text, hadTransientFailure } = await fetchFamilyText(octokit, owner, repo, family.paths));
180
+ }
181
+ if (hadTransientFailure)
182
+ anyTransientFailure = true;
183
+ if (!text)
184
+ continue;
185
+ const { matched, matchedKeywords } = scanForAntiLLMPolicy(text);
186
+ if (matched) {
187
+ const result = {
188
+ matched: true,
189
+ matchedKeywords,
190
+ sourceFile: family.canonical,
191
+ };
192
+ cache.set(cacheKey, "", result);
193
+ return result;
194
+ }
195
+ }
196
+ const noMatch = {
197
+ matched: false,
198
+ matchedKeywords: [],
199
+ sourceFile: null,
200
+ };
201
+ // Skip the cache write when probes failed transiently — otherwise a
202
+ // single 5xx pin "no policy" for an hour for a repo that may actually have one.
203
+ if (!anyTransientFailure) {
204
+ cache.set(cacheKey, "", noMatch);
205
+ }
206
+ return noMatch;
207
+ }
@@ -7,6 +7,7 @@ export interface BootstrapResult {
7
7
  starredRepoCount: number;
8
8
  mergedPRCount: number;
9
9
  closedPRCount: number;
10
+ openPRCount: number;
10
11
  reposScoredCount: number;
11
12
  skippedDueToRateLimit: boolean;
12
13
  errors: string[];
@@ -4,7 +4,7 @@
4
4
  */
5
5
  import { getOctokit, checkRateLimit } from "./github.js";
6
6
  import { debug, warn } from "./logger.js";
7
- import { ConfigurationError, errorMessage } from "./errors.js";
7
+ import { ConfigurationError, errorMessage, getHttpStatusCode, isRateLimitError, } from "./errors.js";
8
8
  import { extractRepoFromUrl } from "./utils.js";
9
9
  const MODULE = "bootstrap";
10
10
  const STARRED_MAX_PAGES = 5;
@@ -23,6 +23,7 @@ export async function bootstrapScout(scout, token) {
23
23
  starredRepoCount: 0,
24
24
  mergedPRCount: 0,
25
25
  closedPRCount: 0,
26
+ openPRCount: 0,
26
27
  reposScoredCount: 0,
27
28
  skippedDueToRateLimit: true,
28
29
  errors: [],
@@ -80,6 +81,8 @@ export async function bootstrapScout(scout, token) {
80
81
  debug(MODULE, `Imported ${mergedPRCount} merged PRs`);
81
82
  }
82
83
  catch (err) {
84
+ if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
85
+ throw err;
83
86
  warn(MODULE, `Failed to fetch merged PRs: ${errorMessage(err)}`);
84
87
  errors.push("merged PR fetch failed");
85
88
  }
@@ -110,15 +113,50 @@ export async function bootstrapScout(scout, token) {
110
113
  debug(MODULE, `Imported ${closedPRCount} closed PRs`);
111
114
  }
112
115
  catch (err) {
116
+ if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
117
+ throw err;
113
118
  warn(MODULE, `Failed to fetch closed PRs: ${errorMessage(err)}`);
114
119
  errors.push("closed PR fetch failed");
115
120
  }
121
+ // 4. Fetch currently-open PRs via Search API
122
+ let openPRCount = 0;
123
+ try {
124
+ for (let page = 1; page <= SEARCH_MAX_PAGES; page++) {
125
+ const { data } = await octokit.search.issuesAndPullRequests({
126
+ q: `is:pr is:open author:${username}`,
127
+ per_page: PER_PAGE,
128
+ page,
129
+ });
130
+ for (const item of data.items) {
131
+ const repo = extractRepoFromUrl(item.html_url);
132
+ if (!repo)
133
+ continue;
134
+ scout.recordOpenPR({
135
+ url: item.html_url,
136
+ title: item.title,
137
+ openedAt: item.created_at ?? new Date().toISOString(),
138
+ repo,
139
+ });
140
+ openPRCount++;
141
+ }
142
+ if (data.items.length < PER_PAGE)
143
+ break;
144
+ }
145
+ debug(MODULE, `Imported ${openPRCount} open PRs`);
146
+ }
147
+ catch (err) {
148
+ if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
149
+ throw err;
150
+ warn(MODULE, `Failed to fetch open PRs: ${errorMessage(err)}`);
151
+ errors.push("open PR fetch failed");
152
+ }
116
153
  const state = scout.getState();
117
154
  const reposScoredCount = Object.keys(state.repoScores).length;
118
155
  return {
119
156
  starredRepoCount: starredRepos.length,
120
157
  mergedPRCount,
121
158
  closedPRCount,
159
+ openPRCount,
122
160
  reposScoredCount,
123
161
  skippedDueToRateLimit: false,
124
162
  errors,
@@ -88,7 +88,7 @@ export declare class GistStateStore {
88
88
  /**
89
89
  * Merge two ScoutState objects with conflict resolution:
90
90
  * - repoScores: per-repo, keep the one with more total PR activity
91
- * - mergedPRs/closedPRs: union by URL
91
+ * - mergedPRs/closedPRs/openPRs: union by URL
92
92
  * - preferences: remote wins
93
93
  * - starredRepos: keep the list with the fresher timestamp
94
94
  * - savedResults: union by issueUrl, keep newer lastSeenAt
@@ -238,7 +238,7 @@ export class GistStateStore {
238
238
  /**
239
239
  * Merge two ScoutState objects with conflict resolution:
240
240
  * - repoScores: per-repo, keep the one with more total PR activity
241
- * - mergedPRs/closedPRs: union by URL
241
+ * - mergedPRs/closedPRs/openPRs: union by URL
242
242
  * - preferences: remote wins
243
243
  * - starredRepos: keep the list with the fresher timestamp
244
244
  * - savedResults: union by issueUrl, keep newer lastSeenAt
@@ -252,6 +252,7 @@ export function mergeStates(local, remote) {
252
252
  starredReposLastFetched: pickFresherTimestamp(local.starredReposLastFetched, remote.starredReposLastFetched),
253
253
  mergedPRs: unionByUrl(local.mergedPRs, remote.mergedPRs),
254
254
  closedPRs: unionByUrl(local.closedPRs, remote.closedPRs),
255
+ openPRs: unionByUrl(local.openPRs ?? [], remote.openPRs ?? []),
255
256
  savedResults: mergeSavedResults(local.savedResults ?? [], remote.savedResults ?? []),
256
257
  skippedIssues: mergeSkippedIssues(local.skippedIssues ?? [], remote.skippedIssues ?? []),
257
258
  lastSearchAt: pickFresherTimestamp(local.lastSearchAt, remote.lastSearchAt),
@@ -326,6 +326,7 @@ export class IssueDiscovery {
326
326
  }
327
327
  // Derive search context
328
328
  const mergedPRRepos = this.stateReader.getReposWithMergedPRs();
329
+ const openPRRepos = this.stateReader.getReposWithOpenPRs();
329
330
  const starredRepos = this.getStarredRepos();
330
331
  const starredRepoSet = new Set(starredRepos);
331
332
  const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold));
@@ -352,8 +353,19 @@ export class IssueDiscovery {
352
353
  now: new Date(),
353
354
  includeDocIssues: config.includeDocIssues ?? true,
354
355
  });
355
- // Phase 0: Merged-PR repos
356
- const phase0Repos = mergedPRRepos.slice(0, 10);
356
+ // Phase 0: Repos the user has engaged with — merged PRs first (strongest
357
+ // signal), then open PRs (active engagement even without a merge yet).
358
+ // Deduped and capped so REST cost stays bounded.
359
+ const seenPhase0 = new Set();
360
+ const phase0Repos = [];
361
+ for (const repo of [...mergedPRRepos, ...openPRRepos]) {
362
+ if (seenPhase0.has(repo))
363
+ continue;
364
+ seenPhase0.add(repo);
365
+ phase0Repos.push(repo);
366
+ if (phase0Repos.length >= 10)
367
+ break;
368
+ }
357
369
  const phase0RepoSet = new Set(phase0Repos);
358
370
  if (phase0Repos.length > 0 && enabledStrategies.has("merged")) {
359
371
  const remaining = maxResults - allCandidates.length;
@@ -6,13 +6,17 @@
6
6
  * Extracted from issue-vetting.ts to isolate eligibility logic.
7
7
  */
8
8
  import { Octokit } from "@octokit/rest";
9
- import type { CheckResult } from "./types.js";
9
+ import type { CheckResult, LinkedPR } from "./types.js";
10
+ /** Result of the existing-PR check, including metadata for the first linked PR (if any). */
11
+ export interface ExistingPRCheckResult extends CheckResult {
12
+ linkedPR: LinkedPR | null;
13
+ }
10
14
  /**
11
15
  * Check whether an open PR already exists for the given issue.
12
16
  * Uses the timeline API (REST) to detect cross-referenced PRs, avoiding
13
17
  * the Search API's strict 30 req/min rate limit.
14
18
  */
15
- export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo: string, issueNumber: number): Promise<CheckResult>;
19
+ export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo: string, issueNumber: number): Promise<ExistingPRCheckResult>;
16
20
  /**
17
21
  * Check how many merged PRs the authenticated user has in a repo.
18
22
  * Uses GitHub Search API. Returns 0 on error (non-fatal).
@@ -6,10 +6,44 @@
6
6
  * Extracted from issue-vetting.ts to isolate eligibility logic.
7
7
  */
8
8
  import { paginateAll } from "./pagination.js";
9
- import { errorMessage } from "./errors.js";
9
+ import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
10
10
  import { warn } from "./logger.js";
11
11
  import { getHttpCache } from "./http-cache.js";
12
12
  import { getSearchBudgetTracker } from "./search-budget.js";
13
+ function isLinkedPREvent(e) {
14
+ return e.event === "cross-referenced" && !!e.source?.issue?.pull_request;
15
+ }
16
+ /**
17
+ * Build a LinkedPR from a cross-referenced timeline event's source.issue.
18
+ * Returns null if required fields are missing — and warns, because callers
19
+ * only invoke this after asserting the event is a linked-PR event, so a
20
+ * null return signals API shape drift, not absent data.
21
+ */
22
+ function buildLinkedPRFromTimelineEvent(e, context) {
23
+ const issue = e.source?.issue;
24
+ const ctx = `${context.owner}/${context.repo}#${context.issueNumber}`;
25
+ if (!issue || typeof issue.number !== "number") {
26
+ warn(MODULE, `Cross-referenced timeline event for ${ctx} missing source.issue.number — possible API shape drift`);
27
+ return null;
28
+ }
29
+ const author = issue.user?.login;
30
+ if (!author) {
31
+ warn(MODULE, `Cross-referenced PR #${issue.number} for ${ctx} has no user.login (deleted user?) — skipping linkedPR metadata`);
32
+ return null;
33
+ }
34
+ const url = issue.html_url;
35
+ if (!url) {
36
+ warn(MODULE, `Cross-referenced PR #${issue.number} for ${ctx} missing html_url — skipping linkedPR metadata`);
37
+ return null;
38
+ }
39
+ return {
40
+ number: issue.number,
41
+ author,
42
+ state: issue.state === "closed" ? "closed" : "open",
43
+ merged: !!issue.pull_request?.merged_at,
44
+ url,
45
+ };
46
+ }
13
47
  const MODULE = "issue-eligibility";
14
48
  /** Phrases that indicate someone has already claimed an issue. */
15
49
  const CLAIM_PHRASES = [
@@ -48,16 +82,31 @@ export async function checkNoExistingPR(octokit, owner, repo, issueNumber) {
48
82
  per_page: 100,
49
83
  page,
50
84
  }));
51
- const linkedPRs = timeline.filter((event) => {
85
+ // Single pass: count linked-PR events and capture metadata for the
86
+ // first valid one, so consumers can classify (own vs. competing,
87
+ // open vs. closed-unmerged) without a separate fetch.
88
+ let linkedPRCount = 0;
89
+ let linkedPR = null;
90
+ for (const event of timeline) {
52
91
  const e = event;
53
- return e.event === "cross-referenced" && e.source?.issue?.pull_request;
54
- });
55
- return { passed: linkedPRs.length === 0 };
92
+ if (!isLinkedPREvent(e))
93
+ continue;
94
+ linkedPRCount++;
95
+ linkedPR ??= buildLinkedPRFromTimelineEvent(e, {
96
+ owner,
97
+ repo,
98
+ issueNumber,
99
+ });
100
+ }
101
+ return { passed: linkedPRCount === 0, linkedPR };
56
102
  }
57
103
  catch (error) {
104
+ if (getHttpStatusCode(error) === 401 || isRateLimitError(error)) {
105
+ throw error;
106
+ }
58
107
  const errMsg = errorMessage(error);
59
108
  warn(MODULE, `Failed to check for existing PRs on ${owner}/${repo}#${issueNumber}: ${errMsg}. Assuming no existing PR.`);
60
- return { passed: true, inconclusive: true, reason: errMsg };
109
+ return { passed: true, inconclusive: true, reason: errMsg, linkedPR: null };
61
110
  }
62
111
  }
63
112
  /** TTL for cached merged-PR counts per repo (15 minutes). */
@@ -97,6 +146,9 @@ export async function checkUserMergedPRsInRepo(octokit, owner, repo) {
97
146
  }
98
147
  }
99
148
  catch (error) {
149
+ if (getHttpStatusCode(error) === 401 || isRateLimitError(error)) {
150
+ throw error;
151
+ }
100
152
  const errMsg = errorMessage(error);
101
153
  warn(MODULE, `Could not check merged PRs in ${owner}/${repo}: ${errMsg}. Defaulting to 0.`);
102
154
  return 0; // Not cached — next call will retry
@@ -128,6 +180,9 @@ export async function checkNotClaimed(octokit, owner, repo, issueNumber, comment
128
180
  return { passed: true };
129
181
  }
130
182
  catch (error) {
183
+ if (getHttpStatusCode(error) === 401 || isRateLimitError(error)) {
184
+ throw error;
185
+ }
131
186
  const errMsg = errorMessage(error);
132
187
  warn(MODULE, `Failed to check claim status on ${owner}/${repo}#${issueNumber}: ${errMsg}. Assuming not claimed.`);
133
188
  return { passed: true, inconclusive: true, reason: errMsg };
@@ -15,6 +15,8 @@ import { type SearchPriority, type IssueCandidate, type ProjectCategory } from "
15
15
  export interface ScoutStateReader {
16
16
  /** Repos where the user has at least one merged PR. */
17
17
  getReposWithMergedPRs(): string[];
18
+ /** Repos where the user has at least one open PR. */
19
+ getReposWithOpenPRs(): string[];
18
20
  /** User's starred repos (from GitHub). */
19
21
  getStarredRepos(): string[];
20
22
  /** Preferred project categories from user preferences. */
@@ -13,6 +13,7 @@ import { calculateRepoQualityBonus, calculateViabilityScore, } from "./issue-sco
13
13
  import { repoBelongsToCategory } from "./category-mapping.js";
14
14
  import { checkNoExistingPR, checkNotClaimed, checkUserMergedPRsInRepo, analyzeRequirements, } from "./issue-eligibility.js";
15
15
  import { checkProjectHealth, fetchContributionGuidelines, } from "./repo-health.js";
16
+ import { fetchAndScanAntiLLMPolicy } from "./anti-llm-policy.js";
16
17
  import { getHttpCache } from "./http-cache.js";
17
18
  const MODULE = "issue-vetting";
18
19
  /** Vetting concurrency: kept low to reduce burst pressure on GitHub's secondary rate limit. */
@@ -68,6 +69,13 @@ export class IssueVetter {
68
69
  ? Promise.resolve(0)
69
70
  : checkUserMergedPRsInRepo(this.octokit, owner, repo),
70
71
  ]);
72
+ // Anti-LLM scan reuses the CONTRIBUTING text just fetched above —
73
+ // dedup'd to avoid 4 redundant getContent calls on cold-cache repos.
74
+ // We deliberately pass undefined (not null) when guidelines is missing,
75
+ // because fetchContributionGuidelines returns undefined for BOTH a 404
76
+ // and a transient 5xx — collapsing them to null would bypass the
77
+ // anti-llm-policy transient-failure cache safeguard.
78
+ const antiLLMPolicy = await fetchAndScanAntiLLMPolicy(this.octokit, owner, repo, { contributingText: contributionGuidelines?.rawContent });
71
79
  const noExistingPR = existingPRCheck.passed;
72
80
  const notClaimed = claimCheck.passed;
73
81
  // Analyze issue quality
@@ -87,6 +95,7 @@ export class IssueVetter {
87
95
  contributionGuidelinesFound: !!contributionGuidelines,
88
96
  },
89
97
  contributionGuidelines,
98
+ linkedPR: existingPRCheck.linkedPR,
90
99
  notes: [],
91
100
  };
92
101
  // Build notes
@@ -216,6 +225,7 @@ export class IssueVetter {
216
225
  issue: trackedIssue,
217
226
  vettingResult,
218
227
  projectHealth,
228
+ antiLLMPolicy,
219
229
  recommendation,
220
230
  reasonsToSkip,
221
231
  reasonsToApprove,
@@ -5,7 +5,7 @@
5
5
  * from issue-level eligibility logic.
6
6
  */
7
7
  import { daysBetween } from "./utils.js";
8
- import { errorMessage } from "./errors.js";
8
+ import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
9
9
  import { warn } from "./logger.js";
10
10
  import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
11
11
  const MODULE = "repo-health";
@@ -114,6 +114,17 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
114
114
  }
115
115
  return null;
116
116
  })));
117
+ // Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
118
+ // otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
119
+ // wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
120
+ for (const result of results) {
121
+ if (result.status !== "rejected")
122
+ continue;
123
+ if (getHttpStatusCode(result.reason) === 401 ||
124
+ isRateLimitError(result.reason)) {
125
+ throw result.reason;
126
+ }
127
+ }
117
128
  for (let i = 0; i < results.length; i++) {
118
129
  const result = results[i];
119
130
  if (result.status === "fulfilled" && result.value) {
@@ -123,11 +134,9 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
123
134
  return guidelines;
124
135
  }
125
136
  if (result.status === "rejected") {
126
- const msg = result.reason instanceof Error
127
- ? result.reason.message
128
- : String(result.reason);
129
- if (!msg.includes("404") && !msg.includes("Not Found")) {
130
- warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${msg}`);
137
+ const status = getHttpStatusCode(result.reason);
138
+ if (status !== 404) {
139
+ warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
131
140
  }
132
141
  }
133
142
  }