@oss-scout/core 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +40 -39
- package/dist/core/anti-llm-policy.d.ts +50 -0
- package/dist/core/anti-llm-policy.js +207 -0
- package/dist/core/issue-eligibility.d.ts +6 -2
- package/dist/core/issue-eligibility.js +61 -6
- package/dist/core/issue-vetting.d.ts +9 -0
- package/dist/core/issue-vetting.js +28 -0
- package/dist/core/repo-health.js +15 -6
- package/dist/core/schemas.d.ts +35 -0
- package/dist/core/schemas.js +21 -0
- package/dist/core/slm-triage.d.ts +72 -0
- package/dist/core/slm-triage.js +135 -0
- package/dist/core/types.d.ts +24 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.js +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anti-LLM Policy — scans repo policy docs (CONTRIBUTING.md, CODE_OF_CONDUCT.md,
|
|
3
|
+
* README.md) for keywords that signal an anti-AI / anti-LLM contribution policy
|
|
4
|
+
* (e.g. "no AI-generated code", "human-authored only", "no Copilot contributions").
|
|
5
|
+
*
|
|
6
|
+
* The keyword table lives here as a single source of truth so consumers
|
|
7
|
+
* can rely on a structured `AntiLLMPolicyResult` rather than re-implementing
|
|
8
|
+
* the scan in agent prose.
|
|
9
|
+
*/
|
|
10
|
+
import { Octokit } from "@octokit/rest";
|
|
11
|
+
import type { AntiLLMPolicyResult } from "./types.js";
|
|
12
|
+
/**
|
|
13
|
+
* Conservative anti-LLM keyword phrases. Each entry is a lowercase substring
|
|
14
|
+
* that — when present in policy text — is a strong signal of an anti-AI policy.
|
|
15
|
+
* Phrases are deliberately narrow to avoid flagging "we use Copilot internally"
|
|
16
|
+
* style mentions; the table can grow as new patterns are observed.
|
|
17
|
+
*/
|
|
18
|
+
export declare const ANTI_LLM_KEYWORDS: readonly string[];
|
|
19
|
+
/**
|
|
20
|
+
* Pure scan: does this text contain any anti-LLM keyword?
|
|
21
|
+
* Case-insensitive; returns the matched keywords (deduped, in table order).
|
|
22
|
+
*/
|
|
23
|
+
export declare function scanForAntiLLMPolicy(text: string): {
|
|
24
|
+
matched: boolean;
|
|
25
|
+
matchedKeywords: string[];
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Optional caller hints to avoid duplicate fetches.
|
|
29
|
+
*
|
|
30
|
+
* `contributingText`:
|
|
31
|
+
* - `string` — caller already fetched CONTRIBUTING; scan this text directly.
|
|
32
|
+
* - `null` — caller fetched and CONTRIBUTING is known absent; skip the family.
|
|
33
|
+
* - `undefined` (omitted) — fetch as normal.
|
|
34
|
+
*
|
|
35
|
+
* Note: the per-repo result cache (1-hour TTL) is consulted before this hint.
|
|
36
|
+
* On a cache hit the cached result wins regardless of what is passed here.
|
|
37
|
+
*/
|
|
38
|
+
export interface AntiLLMPolicyOptions {
|
|
39
|
+
contributingText?: string | null;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Fetch CONTRIBUTING/CODE_OF_CONDUCT/README in priority order and return the
|
|
43
|
+
* first family whose text matches an anti-LLM keyword. Returns
|
|
44
|
+
* `{matched: false, matchedKeywords: [], sourceFile: null}` when no source
|
|
45
|
+
* file matches. Cached per-repo for POLICY_SCAN_CACHE_TTL_MS.
|
|
46
|
+
*
|
|
47
|
+
* Sequential by design: if CONTRIBUTING throws auth/rate-limit, we want to
|
|
48
|
+
* short-circuit rather than burn API budget on COC + README probes.
|
|
49
|
+
*/
|
|
50
|
+
export declare function fetchAndScanAntiLLMPolicy(octokit: Octokit, owner: string, repo: string, options?: AntiLLMPolicyOptions): Promise<AntiLLMPolicyResult>;
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anti-LLM Policy — scans repo policy docs (CONTRIBUTING.md, CODE_OF_CONDUCT.md,
|
|
3
|
+
* README.md) for keywords that signal an anti-AI / anti-LLM contribution policy
|
|
4
|
+
* (e.g. "no AI-generated code", "human-authored only", "no Copilot contributions").
|
|
5
|
+
*
|
|
6
|
+
* The keyword table lives here as a single source of truth so consumers
|
|
7
|
+
* can rely on a structured `AntiLLMPolicyResult` rather than re-implementing
|
|
8
|
+
* the scan in agent prose.
|
|
9
|
+
*/
|
|
10
|
+
import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
|
|
11
|
+
import { warn } from "./logger.js";
|
|
12
|
+
import { getHttpCache } from "./http-cache.js";
|
|
13
|
+
const MODULE = "anti-llm-policy";
|
|
14
|
+
/** TTL for cached anti-LLM policy scan results (1 hour). Policy docs change rarely. */
|
|
15
|
+
const POLICY_SCAN_CACHE_TTL_MS = 60 * 60 * 1000;
|
|
16
|
+
/**
|
|
17
|
+
* Conservative anti-LLM keyword phrases. Each entry is a lowercase substring
|
|
18
|
+
* that — when present in policy text — is a strong signal of an anti-AI policy.
|
|
19
|
+
* Phrases are deliberately narrow to avoid flagging "we use Copilot internally"
|
|
20
|
+
* style mentions; the table can grow as new patterns are observed.
|
|
21
|
+
*/
|
|
22
|
+
export const ANTI_LLM_KEYWORDS = [
|
|
23
|
+
"no ai-generated",
|
|
24
|
+
"no ai generated",
|
|
25
|
+
"no ai-assisted",
|
|
26
|
+
"no ai assisted",
|
|
27
|
+
"no llm-generated",
|
|
28
|
+
"no llm generated",
|
|
29
|
+
"no copilot-generated",
|
|
30
|
+
"no chatgpt-generated",
|
|
31
|
+
"human-authored only",
|
|
32
|
+
"human authored only",
|
|
33
|
+
"human-written only",
|
|
34
|
+
"human written only",
|
|
35
|
+
"ai-free contributions",
|
|
36
|
+
"llm-free contributions",
|
|
37
|
+
"ai-generated code is not allowed",
|
|
38
|
+
"ai-generated code will not be accepted",
|
|
39
|
+
"do not submit ai-generated",
|
|
40
|
+
"do not submit llm-generated",
|
|
41
|
+
"do not use ai to",
|
|
42
|
+
"do not use llms",
|
|
43
|
+
"do not use copilot",
|
|
44
|
+
"do not use chatgpt",
|
|
45
|
+
"without ai assistance",
|
|
46
|
+
"without llm assistance",
|
|
47
|
+
"no use of generative ai",
|
|
48
|
+
"ban on ai-generated",
|
|
49
|
+
"prohibit ai-generated",
|
|
50
|
+
"prohibits ai-generated",
|
|
51
|
+
];
|
|
52
|
+
/**
|
|
53
|
+
* Pure scan: does this text contain any anti-LLM keyword?
|
|
54
|
+
* Case-insensitive; returns the matched keywords (deduped, in table order).
|
|
55
|
+
*/
|
|
56
|
+
export function scanForAntiLLMPolicy(text) {
|
|
57
|
+
if (!text)
|
|
58
|
+
return { matched: false, matchedKeywords: [] };
|
|
59
|
+
const haystack = text.toLowerCase();
|
|
60
|
+
const matchedKeywords = ANTI_LLM_KEYWORDS.filter((kw) => haystack.includes(kw));
|
|
61
|
+
return { matched: matchedKeywords.length > 0, matchedKeywords };
|
|
62
|
+
}
|
|
63
|
+
/** Source-file probe families, in priority order. First match wins. */
|
|
64
|
+
const SOURCE_FILE_FAMILIES = [
|
|
65
|
+
{
|
|
66
|
+
canonical: "CONTRIBUTING.md",
|
|
67
|
+
paths: [
|
|
68
|
+
"CONTRIBUTING.md",
|
|
69
|
+
".github/CONTRIBUTING.md",
|
|
70
|
+
"docs/CONTRIBUTING.md",
|
|
71
|
+
"contributing.md",
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
canonical: "CODE_OF_CONDUCT.md",
|
|
76
|
+
paths: [
|
|
77
|
+
"CODE_OF_CONDUCT.md",
|
|
78
|
+
".github/CODE_OF_CONDUCT.md",
|
|
79
|
+
"docs/CODE_OF_CONDUCT.md",
|
|
80
|
+
"code_of_conduct.md",
|
|
81
|
+
],
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
canonical: "README.md",
|
|
85
|
+
paths: ["README.md", "readme.md", "Readme.md"],
|
|
86
|
+
},
|
|
87
|
+
];
|
|
88
|
+
/**
|
|
89
|
+
* Fetch one path's raw text content. The `transient` flag distinguishes a
|
|
90
|
+
* clean miss (404 — file absent) from a degraded miss (5xx, network) so the
|
|
91
|
+
* caller can decide whether to cache "no policy" or retry. Throws on
|
|
92
|
+
* 401/auth and rate-limit per documented project error strategy.
|
|
93
|
+
*/
|
|
94
|
+
async function fetchFileText(octokit, owner, repo, path) {
|
|
95
|
+
try {
|
|
96
|
+
const { data } = await octokit.repos.getContent({ owner, repo, path });
|
|
97
|
+
if ("content" in data && typeof data.content === "string") {
|
|
98
|
+
return {
|
|
99
|
+
text: Buffer.from(data.content, "base64").toString("utf-8"),
|
|
100
|
+
transient: false,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
return { text: null, transient: false };
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
const status = getHttpStatusCode(error);
|
|
107
|
+
if (status === 404)
|
|
108
|
+
return { text: null, transient: false };
|
|
109
|
+
if (status === 401 || isRateLimitError(error))
|
|
110
|
+
throw error;
|
|
111
|
+
warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
|
|
112
|
+
return { text: null, transient: true };
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Fetch the first available file from a family. Probes are issued in parallel,
|
|
117
|
+
* but auth/rate-limit rejections re-throw so the IssueVetter's existing
|
|
118
|
+
* rate-limit handling kicks in instead of silently caching a wrong answer.
|
|
119
|
+
*/
|
|
120
|
+
async function fetchFamilyText(octokit, owner, repo, paths) {
|
|
121
|
+
const results = await Promise.allSettled(paths.map((p) => fetchFileText(octokit, owner, repo, p)));
|
|
122
|
+
let hadTransientFailure = false;
|
|
123
|
+
for (const result of results) {
|
|
124
|
+
if (result.status === "fulfilled") {
|
|
125
|
+
if (result.value.transient)
|
|
126
|
+
hadTransientFailure = true;
|
|
127
|
+
if (result.value.text)
|
|
128
|
+
return { text: result.value.text, hadTransientFailure };
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
// Re-throw so vetIssuesParallel's isRateLimitError classifier sees it.
|
|
132
|
+
if (isRateLimitError(result.reason) ||
|
|
133
|
+
getHttpStatusCode(result.reason) === 401) {
|
|
134
|
+
throw result.reason;
|
|
135
|
+
}
|
|
136
|
+
hadTransientFailure = true;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return { text: null, hadTransientFailure };
|
|
140
|
+
}
|
|
141
|
+
/** Cached value passes runtime shape checks for AntiLLMPolicyResult. */
|
|
142
|
+
function isAntiLLMPolicyResult(value) {
|
|
143
|
+
if (!value || typeof value !== "object")
|
|
144
|
+
return false;
|
|
145
|
+
const v = value;
|
|
146
|
+
if (typeof v.matched !== "boolean")
|
|
147
|
+
return false;
|
|
148
|
+
if (!Array.isArray(v.matchedKeywords))
|
|
149
|
+
return false;
|
|
150
|
+
if (v.sourceFile !== null && typeof v.sourceFile !== "string")
|
|
151
|
+
return false;
|
|
152
|
+
return true;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Fetch CONTRIBUTING/CODE_OF_CONDUCT/README in priority order and return the
|
|
156
|
+
* first family whose text matches an anti-LLM keyword. Returns
|
|
157
|
+
* `{matched: false, matchedKeywords: [], sourceFile: null}` when no source
|
|
158
|
+
* file matches. Cached per-repo for POLICY_SCAN_CACHE_TTL_MS.
|
|
159
|
+
*
|
|
160
|
+
* Sequential by design: if CONTRIBUTING throws auth/rate-limit, we want to
|
|
161
|
+
* short-circuit rather than burn API budget on COC + README probes.
|
|
162
|
+
*/
|
|
163
|
+
export async function fetchAndScanAntiLLMPolicy(octokit, owner, repo, options) {
|
|
164
|
+
const cache = getHttpCache();
|
|
165
|
+
const cacheKey = `anti-llm-policy:${owner}/${repo}`;
|
|
166
|
+
const cached = cache.getIfFresh(cacheKey, POLICY_SCAN_CACHE_TTL_MS);
|
|
167
|
+
if (isAntiLLMPolicyResult(cached))
|
|
168
|
+
return cached;
|
|
169
|
+
let anyTransientFailure = false;
|
|
170
|
+
for (const family of SOURCE_FILE_FAMILIES) {
|
|
171
|
+
let text;
|
|
172
|
+
let hadTransientFailure = false;
|
|
173
|
+
if (family.canonical === "CONTRIBUTING.md" &&
|
|
174
|
+
options?.contributingText !== undefined) {
|
|
175
|
+
// Use caller-provided text. null = known absent, string = use directly.
|
|
176
|
+
text = options.contributingText;
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
({ text, hadTransientFailure } = await fetchFamilyText(octokit, owner, repo, family.paths));
|
|
180
|
+
}
|
|
181
|
+
if (hadTransientFailure)
|
|
182
|
+
anyTransientFailure = true;
|
|
183
|
+
if (!text)
|
|
184
|
+
continue;
|
|
185
|
+
const { matched, matchedKeywords } = scanForAntiLLMPolicy(text);
|
|
186
|
+
if (matched) {
|
|
187
|
+
const result = {
|
|
188
|
+
matched: true,
|
|
189
|
+
matchedKeywords,
|
|
190
|
+
sourceFile: family.canonical,
|
|
191
|
+
};
|
|
192
|
+
cache.set(cacheKey, "", result);
|
|
193
|
+
return result;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
const noMatch = {
|
|
197
|
+
matched: false,
|
|
198
|
+
matchedKeywords: [],
|
|
199
|
+
sourceFile: null,
|
|
200
|
+
};
|
|
201
|
+
// Skip the cache write when probes failed transiently — otherwise a
|
|
202
|
+
// single 5xx pin "no policy" for an hour for a repo that may actually have one.
|
|
203
|
+
if (!anyTransientFailure) {
|
|
204
|
+
cache.set(cacheKey, "", noMatch);
|
|
205
|
+
}
|
|
206
|
+
return noMatch;
|
|
207
|
+
}
|
|
@@ -6,13 +6,17 @@
|
|
|
6
6
|
* Extracted from issue-vetting.ts to isolate eligibility logic.
|
|
7
7
|
*/
|
|
8
8
|
import { Octokit } from "@octokit/rest";
|
|
9
|
-
import type { CheckResult } from "./types.js";
|
|
9
|
+
import type { CheckResult, LinkedPR } from "./types.js";
|
|
10
|
+
/** Result of the existing-PR check, including metadata for the first linked PR (if any). */
|
|
11
|
+
export interface ExistingPRCheckResult extends CheckResult {
|
|
12
|
+
linkedPR: LinkedPR | null;
|
|
13
|
+
}
|
|
10
14
|
/**
|
|
11
15
|
* Check whether an open PR already exists for the given issue.
|
|
12
16
|
* Uses the timeline API (REST) to detect cross-referenced PRs, avoiding
|
|
13
17
|
* the Search API's strict 30 req/min rate limit.
|
|
14
18
|
*/
|
|
15
|
-
export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo: string, issueNumber: number): Promise<
|
|
19
|
+
export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo: string, issueNumber: number): Promise<ExistingPRCheckResult>;
|
|
16
20
|
/**
|
|
17
21
|
* Check how many merged PRs the authenticated user has in a repo.
|
|
18
22
|
* Uses GitHub Search API. Returns 0 on error (non-fatal).
|
|
@@ -6,10 +6,44 @@
|
|
|
6
6
|
* Extracted from issue-vetting.ts to isolate eligibility logic.
|
|
7
7
|
*/
|
|
8
8
|
import { paginateAll } from "./pagination.js";
|
|
9
|
-
import { errorMessage } from "./errors.js";
|
|
9
|
+
import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
|
|
10
10
|
import { warn } from "./logger.js";
|
|
11
11
|
import { getHttpCache } from "./http-cache.js";
|
|
12
12
|
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
13
|
+
function isLinkedPREvent(e) {
|
|
14
|
+
return e.event === "cross-referenced" && !!e.source?.issue?.pull_request;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Build a LinkedPR from a cross-referenced timeline event's source.issue.
|
|
18
|
+
* Returns null if required fields are missing — and warns, because callers
|
|
19
|
+
* only invoke this after asserting the event is a linked-PR event, so a
|
|
20
|
+
* null return signals API shape drift, not absent data.
|
|
21
|
+
*/
|
|
22
|
+
function buildLinkedPRFromTimelineEvent(e, context) {
|
|
23
|
+
const issue = e.source?.issue;
|
|
24
|
+
const ctx = `${context.owner}/${context.repo}#${context.issueNumber}`;
|
|
25
|
+
if (!issue || typeof issue.number !== "number") {
|
|
26
|
+
warn(MODULE, `Cross-referenced timeline event for ${ctx} missing source.issue.number — possible API shape drift`);
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
const author = issue.user?.login;
|
|
30
|
+
if (!author) {
|
|
31
|
+
warn(MODULE, `Cross-referenced PR #${issue.number} for ${ctx} has no user.login (deleted user?) — skipping linkedPR metadata`);
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
const url = issue.html_url;
|
|
35
|
+
if (!url) {
|
|
36
|
+
warn(MODULE, `Cross-referenced PR #${issue.number} for ${ctx} missing html_url — skipping linkedPR metadata`);
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
return {
|
|
40
|
+
number: issue.number,
|
|
41
|
+
author,
|
|
42
|
+
state: issue.state === "closed" ? "closed" : "open",
|
|
43
|
+
merged: !!issue.pull_request?.merged_at,
|
|
44
|
+
url,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
13
47
|
const MODULE = "issue-eligibility";
|
|
14
48
|
/** Phrases that indicate someone has already claimed an issue. */
|
|
15
49
|
const CLAIM_PHRASES = [
|
|
@@ -48,16 +82,31 @@ export async function checkNoExistingPR(octokit, owner, repo, issueNumber) {
|
|
|
48
82
|
per_page: 100,
|
|
49
83
|
page,
|
|
50
84
|
}));
|
|
51
|
-
|
|
85
|
+
// Single pass: count linked-PR events and capture metadata for the
|
|
86
|
+
// first valid one, so consumers can classify (own vs. competing,
|
|
87
|
+
// open vs. closed-unmerged) without a separate fetch.
|
|
88
|
+
let linkedPRCount = 0;
|
|
89
|
+
let linkedPR = null;
|
|
90
|
+
for (const event of timeline) {
|
|
52
91
|
const e = event;
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
92
|
+
if (!isLinkedPREvent(e))
|
|
93
|
+
continue;
|
|
94
|
+
linkedPRCount++;
|
|
95
|
+
linkedPR ??= buildLinkedPRFromTimelineEvent(e, {
|
|
96
|
+
owner,
|
|
97
|
+
repo,
|
|
98
|
+
issueNumber,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
return { passed: linkedPRCount === 0, linkedPR };
|
|
56
102
|
}
|
|
57
103
|
catch (error) {
|
|
104
|
+
if (getHttpStatusCode(error) === 401 || isRateLimitError(error)) {
|
|
105
|
+
throw error;
|
|
106
|
+
}
|
|
58
107
|
const errMsg = errorMessage(error);
|
|
59
108
|
warn(MODULE, `Failed to check for existing PRs on ${owner}/${repo}#${issueNumber}: ${errMsg}. Assuming no existing PR.`);
|
|
60
|
-
return { passed: true, inconclusive: true, reason: errMsg };
|
|
109
|
+
return { passed: true, inconclusive: true, reason: errMsg, linkedPR: null };
|
|
61
110
|
}
|
|
62
111
|
}
|
|
63
112
|
/** TTL for cached merged-PR counts per repo (15 minutes). */
|
|
@@ -97,6 +146,9 @@ export async function checkUserMergedPRsInRepo(octokit, owner, repo) {
|
|
|
97
146
|
}
|
|
98
147
|
}
|
|
99
148
|
catch (error) {
|
|
149
|
+
if (getHttpStatusCode(error) === 401 || isRateLimitError(error)) {
|
|
150
|
+
throw error;
|
|
151
|
+
}
|
|
100
152
|
const errMsg = errorMessage(error);
|
|
101
153
|
warn(MODULE, `Could not check merged PRs in ${owner}/${repo}: ${errMsg}. Defaulting to 0.`);
|
|
102
154
|
return 0; // Not cached — next call will retry
|
|
@@ -128,6 +180,9 @@ export async function checkNotClaimed(octokit, owner, repo, issueNumber, comment
|
|
|
128
180
|
return { passed: true };
|
|
129
181
|
}
|
|
130
182
|
catch (error) {
|
|
183
|
+
if (getHttpStatusCode(error) === 401 || isRateLimitError(error)) {
|
|
184
|
+
throw error;
|
|
185
|
+
}
|
|
131
186
|
const errMsg = errorMessage(error);
|
|
132
187
|
warn(MODULE, `Failed to check claim status on ${owner}/${repo}#${issueNumber}: ${errMsg}. Assuming not claimed.`);
|
|
133
188
|
return { passed: true, inconclusive: true, reason: errMsg };
|
|
@@ -23,6 +23,15 @@ export interface ScoutStateReader {
|
|
|
23
23
|
getProjectCategories(): ProjectCategory[];
|
|
24
24
|
/** Numeric quality score for a repo, or null if not evaluated. */
|
|
25
25
|
getRepoScore(repo: string): number | null;
|
|
26
|
+
/**
|
|
27
|
+
* SLM pre-triage config (oss-autopilot#1122). Returns the configured
|
|
28
|
+
* model id and Ollama host, or empty strings when not configured —
|
|
29
|
+
* vetIssue treats either of these as "skip the SLM call".
|
|
30
|
+
*/
|
|
31
|
+
getSLMTriageConfig?(): {
|
|
32
|
+
model: string;
|
|
33
|
+
host: string;
|
|
34
|
+
};
|
|
26
35
|
}
|
|
27
36
|
export declare class IssueVetter {
|
|
28
37
|
private octokit;
|
|
@@ -13,7 +13,9 @@ import { calculateRepoQualityBonus, calculateViabilityScore, } from "./issue-sco
|
|
|
13
13
|
import { repoBelongsToCategory } from "./category-mapping.js";
|
|
14
14
|
import { checkNoExistingPR, checkNotClaimed, checkUserMergedPRsInRepo, analyzeRequirements, } from "./issue-eligibility.js";
|
|
15
15
|
import { checkProjectHealth, fetchContributionGuidelines, } from "./repo-health.js";
|
|
16
|
+
import { fetchAndScanAntiLLMPolicy } from "./anti-llm-policy.js";
|
|
16
17
|
import { getHttpCache } from "./http-cache.js";
|
|
18
|
+
import { triageWithSLM, buildTriageInput, } from "./slm-triage.js";
|
|
17
19
|
const MODULE = "issue-vetting";
|
|
18
20
|
/** Vetting concurrency: kept low to reduce burst pressure on GitHub's secondary rate limit. */
|
|
19
21
|
const MAX_CONCURRENT_VETTING = 3;
|
|
@@ -68,6 +70,13 @@ export class IssueVetter {
|
|
|
68
70
|
? Promise.resolve(0)
|
|
69
71
|
: checkUserMergedPRsInRepo(this.octokit, owner, repo),
|
|
70
72
|
]);
|
|
73
|
+
// Anti-LLM scan reuses the CONTRIBUTING text just fetched above —
|
|
74
|
+
// dedup'd to avoid 4 redundant getContent calls on cold-cache repos.
|
|
75
|
+
// We deliberately pass undefined (not null) when guidelines is missing,
|
|
76
|
+
// because fetchContributionGuidelines returns undefined for BOTH a 404
|
|
77
|
+
// and a transient 5xx — collapsing them to null would bypass the
|
|
78
|
+
// anti-llm-policy transient-failure cache safeguard.
|
|
79
|
+
const antiLLMPolicy = await fetchAndScanAntiLLMPolicy(this.octokit, owner, repo, { contributingText: contributionGuidelines?.rawContent });
|
|
71
80
|
const noExistingPR = existingPRCheck.passed;
|
|
72
81
|
const notClaimed = claimCheck.passed;
|
|
73
82
|
// Analyze issue quality
|
|
@@ -87,6 +96,7 @@ export class IssueVetter {
|
|
|
87
96
|
contributionGuidelinesFound: !!contributionGuidelines,
|
|
88
97
|
},
|
|
89
98
|
contributionGuidelines,
|
|
99
|
+
linkedPR: existingPRCheck.linkedPR,
|
|
90
100
|
notes: [],
|
|
91
101
|
};
|
|
92
102
|
// Build notes
|
|
@@ -212,10 +222,28 @@ export class IssueVetter {
|
|
|
212
222
|
else if (starredRepos.includes(repoFullName)) {
|
|
213
223
|
searchPriority = "starred";
|
|
214
224
|
}
|
|
225
|
+
// Optional SLM pre-triage (oss-autopilot#1122). Fail-open: any error
|
|
226
|
+
// path returns null and the rest of the pipeline is unaffected.
|
|
227
|
+
const slmConfig = this.stateReader.getSLMTriageConfig?.() ?? {
|
|
228
|
+
model: "",
|
|
229
|
+
host: "",
|
|
230
|
+
};
|
|
231
|
+
let slmTriage = null;
|
|
232
|
+
if (slmConfig.model) {
|
|
233
|
+
const slmOpts = { model: slmConfig.model };
|
|
234
|
+
if (slmConfig.host)
|
|
235
|
+
slmOpts.host = slmConfig.host;
|
|
236
|
+
slmTriage = await triageWithSLM(buildTriageInput({
|
|
237
|
+
issue: { ...trackedIssue, body: ghIssue.body ?? "" },
|
|
238
|
+
linkedPR: existingPRCheck.linkedPR ?? null,
|
|
239
|
+
}), slmOpts);
|
|
240
|
+
}
|
|
215
241
|
const result = {
|
|
216
242
|
issue: trackedIssue,
|
|
217
243
|
vettingResult,
|
|
218
244
|
projectHealth,
|
|
245
|
+
antiLLMPolicy,
|
|
246
|
+
slmTriage,
|
|
219
247
|
recommendation,
|
|
220
248
|
reasonsToSkip,
|
|
221
249
|
reasonsToApprove,
|
package/dist/core/repo-health.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* from issue-level eligibility logic.
|
|
6
6
|
*/
|
|
7
7
|
import { daysBetween } from "./utils.js";
|
|
8
|
-
import { errorMessage } from "./errors.js";
|
|
8
|
+
import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
|
|
9
9
|
import { warn } from "./logger.js";
|
|
10
10
|
import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
|
|
11
11
|
const MODULE = "repo-health";
|
|
@@ -114,6 +114,17 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
|
|
|
114
114
|
}
|
|
115
115
|
return null;
|
|
116
116
|
})));
|
|
117
|
+
// Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
|
|
118
|
+
// otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
|
|
119
|
+
// wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
|
|
120
|
+
for (const result of results) {
|
|
121
|
+
if (result.status !== "rejected")
|
|
122
|
+
continue;
|
|
123
|
+
if (getHttpStatusCode(result.reason) === 401 ||
|
|
124
|
+
isRateLimitError(result.reason)) {
|
|
125
|
+
throw result.reason;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
117
128
|
for (let i = 0; i < results.length; i++) {
|
|
118
129
|
const result = results[i];
|
|
119
130
|
if (result.status === "fulfilled" && result.value) {
|
|
@@ -123,11 +134,9 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
|
|
|
123
134
|
return guidelines;
|
|
124
135
|
}
|
|
125
136
|
if (result.status === "rejected") {
|
|
126
|
-
const
|
|
127
|
-
|
|
128
|
-
:
|
|
129
|
-
if (!msg.includes("404") && !msg.includes("Not Found")) {
|
|
130
|
-
warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${msg}`);
|
|
137
|
+
const status = getHttpStatusCode(result.reason);
|
|
138
|
+
if (status !== 404) {
|
|
139
|
+
warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
|
|
131
140
|
}
|
|
132
141
|
}
|
|
133
142
|
}
|
package/dist/core/schemas.d.ts
CHANGED
|
@@ -85,6 +85,16 @@ export declare const ContributionGuidelinesSchema: z.ZodObject<{
|
|
|
85
85
|
claRequired: z.ZodOptional<z.ZodBoolean>;
|
|
86
86
|
rawContent: z.ZodOptional<z.ZodString>;
|
|
87
87
|
}, z.core.$strip>;
|
|
88
|
+
export declare const LinkedPRSchema: z.ZodObject<{
|
|
89
|
+
number: z.ZodNumber;
|
|
90
|
+
author: z.ZodString;
|
|
91
|
+
state: z.ZodEnum<{
|
|
92
|
+
closed: "closed";
|
|
93
|
+
open: "open";
|
|
94
|
+
}>;
|
|
95
|
+
merged: z.ZodBoolean;
|
|
96
|
+
url: z.ZodString;
|
|
97
|
+
}, z.core.$strip>;
|
|
88
98
|
export declare const IssueVettingResultSchema: z.ZodObject<{
|
|
89
99
|
passedAllChecks: z.ZodBoolean;
|
|
90
100
|
checks: z.ZodObject<{
|
|
@@ -110,6 +120,16 @@ export declare const IssueVettingResultSchema: z.ZodObject<{
|
|
|
110
120
|
claRequired: z.ZodOptional<z.ZodBoolean>;
|
|
111
121
|
rawContent: z.ZodOptional<z.ZodString>;
|
|
112
122
|
}, z.core.$strip>>;
|
|
123
|
+
linkedPR: z.ZodOptional<z.ZodNullable<z.ZodObject<{
|
|
124
|
+
number: z.ZodNumber;
|
|
125
|
+
author: z.ZodString;
|
|
126
|
+
state: z.ZodEnum<{
|
|
127
|
+
closed: "closed";
|
|
128
|
+
open: "open";
|
|
129
|
+
}>;
|
|
130
|
+
merged: z.ZodBoolean;
|
|
131
|
+
url: z.ZodString;
|
|
132
|
+
}, z.core.$strip>>>;
|
|
113
133
|
notes: z.ZodArray<z.ZodString>;
|
|
114
134
|
}, z.core.$strip>;
|
|
115
135
|
export declare const TrackedIssueSchema: z.ZodObject<{
|
|
@@ -153,6 +173,16 @@ export declare const TrackedIssueSchema: z.ZodObject<{
|
|
|
153
173
|
claRequired: z.ZodOptional<z.ZodBoolean>;
|
|
154
174
|
rawContent: z.ZodOptional<z.ZodString>;
|
|
155
175
|
}, z.core.$strip>>;
|
|
176
|
+
linkedPR: z.ZodOptional<z.ZodNullable<z.ZodObject<{
|
|
177
|
+
number: z.ZodNumber;
|
|
178
|
+
author: z.ZodString;
|
|
179
|
+
state: z.ZodEnum<{
|
|
180
|
+
closed: "closed";
|
|
181
|
+
open: "open";
|
|
182
|
+
}>;
|
|
183
|
+
merged: z.ZodBoolean;
|
|
184
|
+
url: z.ZodString;
|
|
185
|
+
}, z.core.$strip>>>;
|
|
156
186
|
notes: z.ZodArray<z.ZodString>;
|
|
157
187
|
}, z.core.$strip>>;
|
|
158
188
|
}, z.core.$strip>;
|
|
@@ -222,6 +252,8 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
|
|
|
222
252
|
}>>>;
|
|
223
253
|
broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
224
254
|
skipBroadWhenSufficientResults: z.ZodDefault<z.ZodNumber>;
|
|
255
|
+
slmTriageModel: z.ZodDefault<z.ZodString>;
|
|
256
|
+
slmTriageHost: z.ZodDefault<z.ZodString>;
|
|
225
257
|
}, z.core.$strip>;
|
|
226
258
|
export declare const ScoutStateSchema: z.ZodObject<{
|
|
227
259
|
version: z.ZodLiteral<1>;
|
|
@@ -263,6 +295,8 @@ export declare const ScoutStateSchema: z.ZodObject<{
|
|
|
263
295
|
}>>>;
|
|
264
296
|
broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
265
297
|
skipBroadWhenSufficientResults: z.ZodDefault<z.ZodNumber>;
|
|
298
|
+
slmTriageModel: z.ZodDefault<z.ZodString>;
|
|
299
|
+
slmTriageHost: z.ZodDefault<z.ZodString>;
|
|
266
300
|
}, z.core.$strip>>;
|
|
267
301
|
repoScores: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
268
302
|
repo: z.ZodString;
|
|
@@ -334,6 +368,7 @@ export type StoredMergedPR = z.infer<typeof StoredMergedPRSchema>;
|
|
|
334
368
|
export type StoredClosedPR = z.infer<typeof StoredClosedPRSchema>;
|
|
335
369
|
export type StoredOpenPR = z.infer<typeof StoredOpenPRSchema>;
|
|
336
370
|
export type ContributionGuidelines = z.infer<typeof ContributionGuidelinesSchema>;
|
|
371
|
+
export type LinkedPR = z.infer<typeof LinkedPRSchema>;
|
|
337
372
|
export type IssueVettingResult = z.infer<typeof IssueVettingResultSchema>;
|
|
338
373
|
export type TrackedIssue = z.infer<typeof TrackedIssueSchema>;
|
|
339
374
|
export type ScoutPreferences = z.infer<typeof ScoutPreferencesSchema>;
|
package/dist/core/schemas.js
CHANGED
|
@@ -89,6 +89,13 @@ export const ContributionGuidelinesSchema = z.object({
|
|
|
89
89
|
claRequired: z.boolean().optional(),
|
|
90
90
|
rawContent: z.string().optional(),
|
|
91
91
|
});
|
|
92
|
+
export const LinkedPRSchema = z.object({
|
|
93
|
+
number: z.number(),
|
|
94
|
+
author: z.string(),
|
|
95
|
+
state: z.enum(["open", "closed"]),
|
|
96
|
+
merged: z.boolean(),
|
|
97
|
+
url: z.string(),
|
|
98
|
+
});
|
|
92
99
|
export const IssueVettingResultSchema = z.object({
|
|
93
100
|
passedAllChecks: z.boolean(),
|
|
94
101
|
checks: z.object({
|
|
@@ -99,6 +106,7 @@ export const IssueVettingResultSchema = z.object({
|
|
|
99
106
|
contributionGuidelinesFound: z.boolean(),
|
|
100
107
|
}),
|
|
101
108
|
contributionGuidelines: ContributionGuidelinesSchema.optional(),
|
|
109
|
+
linkedPR: LinkedPRSchema.nullable().optional(),
|
|
102
110
|
notes: z.array(z.string()),
|
|
103
111
|
});
|
|
104
112
|
export const TrackedIssueSchema = z.object({
|
|
@@ -156,6 +164,19 @@ export const ScoutPreferencesSchema = z.object({
|
|
|
156
164
|
defaultStrategy: z.array(SearchStrategySchema).optional(),
|
|
157
165
|
broadPhaseDelayMs: z.number().min(0).max(300000).default(90000),
|
|
158
166
|
skipBroadWhenSufficientResults: z.number().int().min(0).max(100).default(15),
|
|
167
|
+
/**
|
|
168
|
+
* Optional Ollama model id used for SLM pre-triage during vetting
|
|
169
|
+
* (oss-autopilot#1122). Empty disables the feature. Recommended values:
|
|
170
|
+
* `gemma4:e4b` (default for capable hardware) or `gemma4:e2b` /
|
|
171
|
+
* `qwen3:1.7b` for low-RAM machines.
|
|
172
|
+
*/
|
|
173
|
+
slmTriageModel: z.string().default(""),
|
|
174
|
+
/**
|
|
175
|
+
* Override the Ollama HTTP host. Defaults to `http://127.0.0.1:11434`
|
|
176
|
+
* when empty. Useful when Ollama runs on a different machine on the
|
|
177
|
+
* local network.
|
|
178
|
+
*/
|
|
179
|
+
slmTriageHost: z.string().default(""),
|
|
159
180
|
});
|
|
160
181
|
// ── Root state schema ───────────────────────────────────────────────
|
|
161
182
|
export const ScoutStateSchema = z.object({
|