@oss-scout/core 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +48 -48
- package/dist/cli.js +110 -86
- package/dist/commands/config.d.ts +1 -1
- package/dist/commands/config.js +77 -71
- package/dist/commands/results.d.ts +1 -1
- package/dist/commands/results.js +1 -1
- package/dist/commands/search.d.ts +2 -2
- package/dist/commands/search.js +16 -6
- package/dist/commands/setup.d.ts +1 -1
- package/dist/commands/setup.js +27 -21
- package/dist/commands/validation.d.ts +1 -1
- package/dist/commands/validation.js +1 -1
- package/dist/commands/vet-list.d.ts +2 -2
- package/dist/commands/vet-list.js +12 -5
- package/dist/commands/vet.d.ts +3 -3
- package/dist/commands/vet.js +9 -5
- package/dist/core/bootstrap.d.ts +1 -1
- package/dist/core/bootstrap.js +20 -16
- package/dist/core/category-mapping.d.ts +1 -1
- package/dist/core/category-mapping.js +104 -13
- package/dist/core/errors.d.ts +8 -1
- package/dist/core/errors.js +31 -19
- package/dist/core/gist-state-store.d.ts +1 -1
- package/dist/core/gist-state-store.js +36 -27
- package/dist/core/github.d.ts +1 -1
- package/dist/core/github.js +5 -5
- package/dist/core/http-cache.js +26 -22
- package/dist/core/issue-discovery.d.ts +3 -3
- package/dist/core/issue-discovery.js +325 -270
- package/dist/core/issue-eligibility.d.ts +2 -2
- package/dist/core/issue-eligibility.js +26 -21
- package/dist/core/issue-filtering.js +23 -15
- package/dist/core/issue-scoring.js +1 -1
- package/dist/core/issue-vetting.d.ts +2 -2
- package/dist/core/issue-vetting.js +66 -53
- package/dist/core/local-state.d.ts +1 -1
- package/dist/core/local-state.js +16 -14
- package/dist/core/repo-health.d.ts +2 -2
- package/dist/core/repo-health.js +46 -35
- package/dist/core/schemas.d.ts +3 -1
- package/dist/core/schemas.js +41 -18
- package/dist/core/search-budget.js +3 -3
- package/dist/core/search-phases.d.ts +6 -6
- package/dist/core/search-phases.js +23 -19
- package/dist/core/types.d.ts +9 -9
- package/dist/core/types.js +15 -3
- package/dist/core/utils.d.ts +10 -1
- package/dist/core/utils.js +44 -25
- package/dist/formatters/json.d.ts +1 -1
- package/dist/index.d.ts +7 -7
- package/dist/index.js +5 -5
- package/dist/scout.d.ts +4 -5
- package/dist/scout.js +72 -31
- package/package.json +1 -1
|
@@ -11,24 +11,218 @@
|
|
|
11
11
|
*
|
|
12
12
|
* All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
|
|
13
13
|
*/
|
|
14
|
-
import { getOctokit, checkRateLimit } from
|
|
15
|
-
import { getSearchBudgetTracker } from
|
|
16
|
-
import { daysBetween, sleep } from
|
|
17
|
-
import { SCOPE_LABELS } from
|
|
18
|
-
import { CONCRETE_STRATEGIES } from
|
|
19
|
-
import { ValidationError, errorMessage, getHttpStatusCode, isRateLimitError } from
|
|
20
|
-
import { debug, info, warn } from
|
|
21
|
-
import { isDocOnlyIssue, applyPerRepoCap } from
|
|
22
|
-
import { IssueVetter } from
|
|
23
|
-
import { getTopicsForCategories } from
|
|
24
|
-
import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, filterVetAndScore, searchInRepos, searchWithChunkedLabels, } from
|
|
25
|
-
const MODULE =
|
|
14
|
+
import { getOctokit, checkRateLimit } from "./github.js";
|
|
15
|
+
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
16
|
+
import { daysBetween, extractRepoFromUrl, sleep } from "./utils.js";
|
|
17
|
+
import { SCOPE_LABELS, } from "./types.js";
|
|
18
|
+
import { CONCRETE_STRATEGIES } from "./schemas.js";
|
|
19
|
+
import { ValidationError, errorMessage, getHttpStatusCode, isRateLimitError, } from "./errors.js";
|
|
20
|
+
import { debug, info, warn } from "./logger.js";
|
|
21
|
+
import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
|
|
22
|
+
import { IssueVetter } from "./issue-vetting.js";
|
|
23
|
+
import { getTopicsForCategories } from "./category-mapping.js";
|
|
24
|
+
import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, filterVetAndScore, searchInRepos, searchWithChunkedLabels, } from "./search-phases.js";
|
|
25
|
+
const MODULE = "issue-discovery";
|
|
26
26
|
/** Delay between major search phases to let GitHub's rate limit window cool down. */
|
|
27
27
|
const INTER_PHASE_DELAY_MS = 2000;
|
|
28
28
|
/** If remaining search quota is below this, skip heavy phases (2, 3). */
|
|
29
29
|
const LOW_BUDGET_THRESHOLD = 20;
|
|
30
30
|
/** If remaining search quota is below this, only run Phase 0. */
|
|
31
31
|
const CRITICAL_BUDGET_THRESHOLD = 10;
|
|
32
|
+
/** Build a reusable filter function from config. */
|
|
33
|
+
function buildIssueFilter(config) {
|
|
34
|
+
return (items) => {
|
|
35
|
+
return items.filter((item) => {
|
|
36
|
+
const repoFullName = extractRepoFromUrl(item.repository_url);
|
|
37
|
+
if (!repoFullName)
|
|
38
|
+
return false;
|
|
39
|
+
if (config.excludedRepos.has(repoFullName))
|
|
40
|
+
return false;
|
|
41
|
+
if (config.excludeOrgs.size > 0) {
|
|
42
|
+
const orgName = repoFullName.split("/")[0]?.toLowerCase();
|
|
43
|
+
if (orgName && config.excludeOrgs.has(orgName))
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
if (config.aiBlocklisted.has(repoFullName))
|
|
47
|
+
return false;
|
|
48
|
+
if (config.lowScoringRepos.has(repoFullName))
|
|
49
|
+
return false;
|
|
50
|
+
const updatedAt = new Date(item.updated_at);
|
|
51
|
+
const ageDays = daysBetween(updatedAt, config.now);
|
|
52
|
+
if (ageDays > config.maxAgeDays)
|
|
53
|
+
return false;
|
|
54
|
+
if (!config.includeDocIssues && isDocOnlyIssue(item))
|
|
55
|
+
return false;
|
|
56
|
+
return true;
|
|
57
|
+
});
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
/** Phase 0: Search repos where user has merged PRs (highest merge probability). */
|
|
61
|
+
async function runPhase0(octokit, vetter, repos, baseQualifiers, maxResults, filterIssues) {
|
|
62
|
+
info(MODULE, `Phase 0: Searching issues in ${repos.length} merged-PR repos (no label filter)...`);
|
|
63
|
+
const { candidates, allBatchesFailed, rateLimitHit } = await searchInRepos(octokit, vetter, repos, baseQualifiers, [], maxResults, "merged_pr", filterIssues);
|
|
64
|
+
info(MODULE, `Found ${candidates.length} candidates from merged-PR repos`);
|
|
65
|
+
return {
|
|
66
|
+
candidates,
|
|
67
|
+
error: allBatchesFailed ? "All merged-PR repo batches failed" : null,
|
|
68
|
+
rateLimitHit,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
/** Phase 0.5: Search preferred organizations. */
|
|
72
|
+
async function runPhase05(octokit, vetter, orgsToSearch, baseQualifiers, labels, maxResults, phase0RepoSet, filterIssues) {
|
|
73
|
+
info(MODULE, `Phase 0.5: Searching issues in ${orgsToSearch.length} preferred org(s)...`);
|
|
74
|
+
const orgRepoFilter = orgsToSearch.map((org) => `org:${org}`).join(" OR ");
|
|
75
|
+
const orgOps = orgsToSearch.length - 1;
|
|
76
|
+
try {
|
|
77
|
+
const allItems = await searchWithChunkedLabels(octokit, labels, orgOps, (labelQ) => `${baseQualifiers} ${labelQ} (${orgRepoFilter})`
|
|
78
|
+
.replace(/ +/g, " ")
|
|
79
|
+
.trim(), maxResults * 3);
|
|
80
|
+
if (allItems.length === 0) {
|
|
81
|
+
return { candidates: [], error: null, rateLimitHit: false };
|
|
82
|
+
}
|
|
83
|
+
const filtered = filterIssues(allItems).filter((item) => {
|
|
84
|
+
const repoFullName = extractRepoFromUrl(item.repository_url);
|
|
85
|
+
if (!repoFullName)
|
|
86
|
+
return false;
|
|
87
|
+
return !phase0RepoSet.has(repoFullName);
|
|
88
|
+
});
|
|
89
|
+
const { candidates, allFailed: allVetFailed, rateLimitHit, } = await vetter.vetIssuesParallel(filtered.slice(0, maxResults * 2).map((i) => i.html_url), maxResults, "preferred_org");
|
|
90
|
+
info(MODULE, `Found ${candidates.length} candidates from preferred orgs`);
|
|
91
|
+
return {
|
|
92
|
+
candidates,
|
|
93
|
+
error: allVetFailed ? "All preferred org issue vetting failed" : null,
|
|
94
|
+
rateLimitHit,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
catch (error) {
|
|
98
|
+
const errMsg = errorMessage(error);
|
|
99
|
+
warn(MODULE, `Error searching preferred orgs: ${errMsg}`);
|
|
100
|
+
return {
|
|
101
|
+
candidates: [],
|
|
102
|
+
error: errMsg,
|
|
103
|
+
rateLimitHit: isRateLimitError(error),
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
/** Phase 1: Search starred repos. */
|
|
108
|
+
async function runPhase1(octokit, vetter, repos, baseQualifiers, labels, maxResults, filterIssues) {
|
|
109
|
+
info(MODULE, `Phase 1: Searching issues in ${repos.length} starred repos...`);
|
|
110
|
+
// Cap labels to reduce Search API calls: starred repos already signal user
|
|
111
|
+
// interest, so fewer labels suffice.
|
|
112
|
+
const phase1Labels = labels.slice(0, 3);
|
|
113
|
+
const { candidates, allBatchesFailed, rateLimitHit } = await searchInRepos(octokit, vetter, repos.slice(0, 10), baseQualifiers, phase1Labels, maxResults, "starred", filterIssues);
|
|
114
|
+
info(MODULE, `Found ${candidates.length} candidates from starred repos`);
|
|
115
|
+
return {
|
|
116
|
+
candidates,
|
|
117
|
+
error: allBatchesFailed ? "All starred repo batches failed" : null,
|
|
118
|
+
rateLimitHit,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
/** Phase 2: General label-filtered search with multi-tier interleaving. */
|
|
122
|
+
async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQualifiers, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
|
|
123
|
+
info(MODULE, "Phase 2: General issue search...");
|
|
124
|
+
const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
|
|
125
|
+
// Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
|
|
126
|
+
const tierLabelGroups = [];
|
|
127
|
+
if (scopes && scopes.length > 1) {
|
|
128
|
+
for (const scope of scopes) {
|
|
129
|
+
const scopeLabels = SCOPE_LABELS[scope] ?? [];
|
|
130
|
+
if (scopeLabels.length === 0) {
|
|
131
|
+
warn(MODULE, `Scope "${scope}" has no labels, skipping tier`);
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
tierLabelGroups.push({ tier: scope, tierLabels: scopeLabels });
|
|
135
|
+
}
|
|
136
|
+
const allScopeLabels = new Set(scopes.flatMap((s) => SCOPE_LABELS[s] ?? []));
|
|
137
|
+
const customOnly = configLabels.filter((l) => !allScopeLabels.has(l));
|
|
138
|
+
if (customOnly.length > 0) {
|
|
139
|
+
tierLabelGroups.push({ tier: "custom", tierLabels: customOnly });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
tierLabelGroups.push({ tier: "general", tierLabels: labels });
|
|
144
|
+
}
|
|
145
|
+
const budgetPerTier = Math.ceil(maxResults / tierLabelGroups.length);
|
|
146
|
+
const tierResults = [];
|
|
147
|
+
let error = null;
|
|
148
|
+
let rateLimitHit = false;
|
|
149
|
+
for (const { tier, tierLabels } of tierLabelGroups) {
|
|
150
|
+
try {
|
|
151
|
+
const allItems = await searchWithChunkedLabels(octokit, tierLabels, 0, (labelQ) => `${baseQualifiers} ${labelQ}`.replace(/ +/g, " ").trim(), budgetPerTier * 3);
|
|
152
|
+
info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
|
|
153
|
+
const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
|
|
154
|
+
tierResults.push(tierCandidates);
|
|
155
|
+
for (const c of tierCandidates)
|
|
156
|
+
seenRepos.add(c.issue.repo);
|
|
157
|
+
if (allVetFailed) {
|
|
158
|
+
error = (error ? error + "; " : "") + `${tier}: all vetting failed`;
|
|
159
|
+
}
|
|
160
|
+
if (vetRateLimitHit) {
|
|
161
|
+
rateLimitHit = true;
|
|
162
|
+
}
|
|
163
|
+
info(MODULE, `Found ${tierCandidates.length} candidates from ${tier} tier`);
|
|
164
|
+
}
|
|
165
|
+
catch (err) {
|
|
166
|
+
if (getHttpStatusCode(err) === 401)
|
|
167
|
+
throw err;
|
|
168
|
+
const errMsg = errorMessage(err);
|
|
169
|
+
error = (error ? error + "; " : "") + `${tier}: ${errMsg}`;
|
|
170
|
+
if (isRateLimitError(err)) {
|
|
171
|
+
rateLimitHit = true;
|
|
172
|
+
}
|
|
173
|
+
warn(MODULE, `Error in ${tier} tier search: ${errMsg}`);
|
|
174
|
+
tierResults.push([]);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
const interleaved = interleaveArrays(tierResults);
|
|
178
|
+
if (interleaved.length === 0 && error) {
|
|
179
|
+
warn(MODULE, `All ${tierLabelGroups.length} scope tiers failed in Phase 2: ${error}`);
|
|
180
|
+
}
|
|
181
|
+
return {
|
|
182
|
+
candidates: interleaved.slice(0, maxResults),
|
|
183
|
+
error,
|
|
184
|
+
rateLimitHit,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
/** Phase 3: Actively maintained repos. */
|
|
188
|
+
async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
|
|
189
|
+
info(MODULE, "Phase 3: Searching actively maintained repos...");
|
|
190
|
+
const thirtyDaysAgo = new Date();
|
|
191
|
+
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
|
|
192
|
+
const pushedSince = thirtyDaysAgo.toISOString().split("T")[0];
|
|
193
|
+
const categoryTopics = getTopicsForCategories(projectCategories);
|
|
194
|
+
const topicQuery = categoryTopics.length > 0 ? `topic:${categoryTopics[0]}` : "";
|
|
195
|
+
const phase3Query = `is:issue is:open no:assignee ${langQuery} ${topicQuery} stars:>=${minStars} pushed:>=${pushedSince} archived:false`
|
|
196
|
+
.replace(/ +/g, " ")
|
|
197
|
+
.trim();
|
|
198
|
+
try {
|
|
199
|
+
const data = await cachedSearchIssues(octokit, {
|
|
200
|
+
q: phase3Query,
|
|
201
|
+
sort: "updated",
|
|
202
|
+
order: "desc",
|
|
203
|
+
per_page: maxResults * 3,
|
|
204
|
+
});
|
|
205
|
+
info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
|
|
206
|
+
const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
|
|
207
|
+
const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], maxResults, minStars, "Phase 3");
|
|
208
|
+
info(MODULE, `Found ${candidates.length} candidates from maintained-repo search`);
|
|
209
|
+
return {
|
|
210
|
+
candidates,
|
|
211
|
+
error: allVetFailed ? "all vetting failed" : null,
|
|
212
|
+
rateLimitHit: vetRateLimitHit,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
catch (error) {
|
|
216
|
+
const errMsg = errorMessage(error);
|
|
217
|
+
warn(MODULE, `Error in maintained-repo search: ${errMsg}`);
|
|
218
|
+
return {
|
|
219
|
+
candidates: [],
|
|
220
|
+
error: errMsg,
|
|
221
|
+
rateLimitHit: isRateLimitError(error),
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// ── IssueDiscovery class ─────────────────────────────────────────────
|
|
32
226
|
/**
|
|
33
227
|
* Multi-phase issue discovery engine that searches GitHub for contributable issues.
|
|
34
228
|
*
|
|
@@ -95,29 +289,30 @@ export class IssueDiscovery {
|
|
|
95
289
|
async searchIssues(options = {}) {
|
|
96
290
|
const config = this.preferences;
|
|
97
291
|
const languages = options.languages || config.languages;
|
|
98
|
-
const scopes = config.scope;
|
|
99
|
-
const labels = options.labels ||
|
|
292
|
+
const scopes = config.scope;
|
|
293
|
+
const labels = options.labels ||
|
|
294
|
+
(scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
|
|
100
295
|
const maxResults = options.maxResults || 10;
|
|
101
296
|
const minStars = config.minStars ?? 50;
|
|
102
|
-
// Strategy selection
|
|
297
|
+
// Strategy selection
|
|
103
298
|
const ALL_STRATEGIES = CONCRETE_STRATEGIES;
|
|
104
|
-
const rawStrategies = options.strategies ??
|
|
105
|
-
|
|
299
|
+
const rawStrategies = options.strategies ??
|
|
300
|
+
config.defaultStrategy ?? ["all"];
|
|
301
|
+
const enabledStrategies = new Set(rawStrategies.includes("all") ? ALL_STRATEGIES : rawStrategies);
|
|
106
302
|
const strategiesUsed = [];
|
|
107
303
|
const allCandidates = [];
|
|
108
|
-
|
|
109
|
-
let phase1Error = null;
|
|
304
|
+
const phaseErrors = {};
|
|
110
305
|
let rateLimitHitDuringSearch = false;
|
|
111
|
-
// Pre-flight rate limit check
|
|
306
|
+
// Pre-flight rate limit check
|
|
112
307
|
this.rateLimitWarning = null;
|
|
113
308
|
const tracker = getSearchBudgetTracker();
|
|
114
|
-
let searchBudget = LOW_BUDGET_THRESHOLD - 1;
|
|
309
|
+
let searchBudget = LOW_BUDGET_THRESHOLD - 1;
|
|
115
310
|
try {
|
|
116
311
|
const rateLimit = await checkRateLimit(this.githubToken);
|
|
117
312
|
searchBudget = rateLimit.remaining;
|
|
118
313
|
tracker.init(rateLimit.remaining, rateLimit.resetAt);
|
|
119
314
|
if (rateLimit.remaining < 5) {
|
|
120
|
-
const resetTime = new Date(rateLimit.resetAt).toLocaleTimeString(
|
|
315
|
+
const resetTime = new Date(rateLimit.resetAt).toLocaleTimeString("en-US", { hour12: false });
|
|
121
316
|
this.rateLimitWarning = `GitHub search API quota low (${rateLimit.remaining}/${rateLimit.limit} remaining, resets at ${resetTime}). Search may be slow.`;
|
|
122
317
|
warn(MODULE, this.rateLimitWarning);
|
|
123
318
|
}
|
|
@@ -129,271 +324,122 @@ export class IssueDiscovery {
|
|
|
129
324
|
}
|
|
130
325
|
}
|
|
131
326
|
catch (error) {
|
|
132
|
-
|
|
133
|
-
if (getHttpStatusCode(error) === 401) {
|
|
327
|
+
if (getHttpStatusCode(error) === 401)
|
|
134
328
|
throw error;
|
|
135
|
-
}
|
|
136
|
-
// Non-fatal: proceed with conservative budget for transient/network errors.
|
|
137
|
-
// Initialize tracker with conservative defaults so it doesn't fly blind.
|
|
138
329
|
tracker.init(CRITICAL_BUDGET_THRESHOLD, new Date(Date.now() + 60000).toISOString());
|
|
139
|
-
warn(MODULE,
|
|
330
|
+
warn(MODULE, "Could not check rate limit — using conservative budget, skipping heavy phases:", errorMessage(error));
|
|
140
331
|
}
|
|
141
|
-
//
|
|
332
|
+
// Derive search context
|
|
142
333
|
const mergedPRRepos = this.stateReader.getReposWithMergedPRs();
|
|
143
|
-
// Get starred repos (from local cache or state reader)
|
|
144
334
|
const starredRepos = this.getStarredRepos();
|
|
145
335
|
const starredRepoSet = new Set(starredRepos);
|
|
146
|
-
|
|
147
|
-
const minRepoScoreThreshold = config.minRepoScoreThreshold;
|
|
148
|
-
const lowScoringRepos = new Set(this.deriveLowScoringRepos(minRepoScoreThreshold));
|
|
149
|
-
// Common filters
|
|
150
|
-
const excludedRepos = new Set(config.excludeRepos);
|
|
151
|
-
const maxAgeDays = config.maxIssueAgeDays || 90;
|
|
152
|
-
const now = new Date();
|
|
336
|
+
const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold));
|
|
153
337
|
// Build query parts
|
|
154
|
-
|
|
155
|
-
const
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
338
|
+
const isAnyLanguage = languages.some((l) => l.toLowerCase() === "any");
|
|
339
|
+
const langQuery = isAnyLanguage
|
|
340
|
+
? ""
|
|
341
|
+
: languages.map((l) => `language:${l}`).join(" ");
|
|
342
|
+
const baseQualifiers = `is:issue is:open ${langQuery} no:assignee`
|
|
343
|
+
.replace(/ +/g, " ")
|
|
344
|
+
.trim();
|
|
345
|
+
// Build reusable filter
|
|
162
346
|
const aiBlocklisted = new Set(config.aiPolicyBlocklist);
|
|
163
347
|
if (aiBlocklisted.size > 0) {
|
|
164
|
-
debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(
|
|
348
|
+
debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(", ")}`);
|
|
165
349
|
}
|
|
166
|
-
const filterIssues = (
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
return false;
|
|
177
|
-
// Filter by issue age based on updated_at
|
|
178
|
-
const updatedAt = new Date(item.updated_at);
|
|
179
|
-
const ageDays = daysBetween(updatedAt, now);
|
|
180
|
-
if (ageDays > maxAgeDays)
|
|
181
|
-
return false;
|
|
182
|
-
// Filter out doc-only issues unless opted in
|
|
183
|
-
if (!includeDocIssues && isDocOnlyIssue(item))
|
|
184
|
-
return false;
|
|
185
|
-
return true;
|
|
186
|
-
});
|
|
187
|
-
};
|
|
188
|
-
// Phase 0: Search repos where user has merged PRs (highest merge probability)
|
|
350
|
+
const filterIssues = buildIssueFilter({
|
|
351
|
+
excludedRepos: new Set(config.excludeRepos),
|
|
352
|
+
excludeOrgs: new Set((config.excludeOrgs ?? []).map((o) => o.toLowerCase())),
|
|
353
|
+
aiBlocklisted,
|
|
354
|
+
lowScoringRepos,
|
|
355
|
+
maxAgeDays: config.maxIssueAgeDays || 90,
|
|
356
|
+
now: new Date(),
|
|
357
|
+
includeDocIssues: config.includeDocIssues ?? true,
|
|
358
|
+
});
|
|
359
|
+
// Phase 0: Merged-PR repos
|
|
189
360
|
const phase0Repos = mergedPRRepos.slice(0, 10);
|
|
190
361
|
const phase0RepoSet = new Set(phase0Repos);
|
|
191
|
-
if (phase0Repos.length > 0 && enabledStrategies.has(
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if (
|
|
198
|
-
phase0Error = 'All merged-PR repo batches failed';
|
|
199
|
-
}
|
|
200
|
-
if (rateLimitHit) {
|
|
362
|
+
if (phase0Repos.length > 0 && enabledStrategies.has("merged")) {
|
|
363
|
+
const remaining = maxResults - allCandidates.length;
|
|
364
|
+
if (remaining > 0) {
|
|
365
|
+
const result = await runPhase0(this.octokit, this.vetter, phase0Repos, baseQualifiers, remaining, filterIssues);
|
|
366
|
+
allCandidates.push(...result.candidates);
|
|
367
|
+
phaseErrors["0"] = result.error;
|
|
368
|
+
if (result.rateLimitHit)
|
|
201
369
|
rateLimitHitDuringSearch = true;
|
|
202
|
-
}
|
|
203
|
-
info(MODULE, `Found ${mergedCandidates.length} candidates from merged-PR repos`);
|
|
204
370
|
}
|
|
205
|
-
strategiesUsed.push(
|
|
371
|
+
strategiesUsed.push("merged");
|
|
206
372
|
}
|
|
207
|
-
// Phase 0.5:
|
|
208
|
-
// Skip if budget is critical — Phase 0 results are sufficient
|
|
209
|
-
let phase0_5Error = null;
|
|
373
|
+
// Phase 0.5: Preferred organizations
|
|
210
374
|
const preferredOrgs = config.preferredOrgs ?? [];
|
|
211
|
-
if (allCandidates.length < maxResults &&
|
|
212
|
-
|
|
375
|
+
if (allCandidates.length < maxResults &&
|
|
376
|
+
preferredOrgs.length > 0 &&
|
|
377
|
+
searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
|
|
378
|
+
enabledStrategies.has("orgs")) {
|
|
213
379
|
if (phase0Repos.length > 0)
|
|
214
380
|
await sleep(INTER_PHASE_DELAY_MS);
|
|
215
|
-
|
|
216
|
-
const
|
|
217
|
-
|
|
381
|
+
const phase0Orgs = new Set(phase0Repos.map((r) => r.split("/")[0]?.toLowerCase()));
|
|
382
|
+
const orgsToSearch = preferredOrgs
|
|
383
|
+
.filter((org) => !phase0Orgs.has(org.toLowerCase()))
|
|
384
|
+
.slice(0, 5);
|
|
218
385
|
if (orgsToSearch.length > 0) {
|
|
219
|
-
|
|
220
|
-
const
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
if (allItems.length > 0) {
|
|
226
|
-
const filtered = filterIssues(allItems).filter((item) => {
|
|
227
|
-
const repoFullName = item.repository_url.split('/').slice(-2).join('/');
|
|
228
|
-
return !phase0RepoSet.has(repoFullName);
|
|
229
|
-
});
|
|
230
|
-
const { candidates: orgCandidates, allFailed: allVetFailed, rateLimitHit, } = await this.vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, 'preferred_org');
|
|
231
|
-
allCandidates.push(...orgCandidates);
|
|
232
|
-
if (allVetFailed) {
|
|
233
|
-
phase0_5Error = 'All preferred org issue vetting failed';
|
|
234
|
-
}
|
|
235
|
-
if (rateLimitHit) {
|
|
236
|
-
rateLimitHitDuringSearch = true;
|
|
237
|
-
}
|
|
238
|
-
info(MODULE, `Found ${orgCandidates.length} candidates from preferred orgs`);
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
catch (error) {
|
|
242
|
-
const errMsg = errorMessage(error);
|
|
243
|
-
phase0_5Error = errMsg;
|
|
244
|
-
if (isRateLimitError(error)) {
|
|
245
|
-
rateLimitHitDuringSearch = true;
|
|
246
|
-
}
|
|
247
|
-
warn(MODULE, `Error searching preferred orgs: ${errMsg}`);
|
|
248
|
-
}
|
|
386
|
+
const remaining = maxResults - allCandidates.length;
|
|
387
|
+
const result = await runPhase05(this.octokit, this.vetter, orgsToSearch, baseQualifiers, labels, remaining, phase0RepoSet, filterIssues);
|
|
388
|
+
allCandidates.push(...result.candidates);
|
|
389
|
+
phaseErrors["0.5"] = result.error;
|
|
390
|
+
if (result.rateLimitHit)
|
|
391
|
+
rateLimitHitDuringSearch = true;
|
|
249
392
|
}
|
|
250
|
-
strategiesUsed.push(
|
|
393
|
+
strategiesUsed.push("orgs");
|
|
251
394
|
}
|
|
252
|
-
// Phase 1:
|
|
253
|
-
|
|
254
|
-
|
|
395
|
+
// Phase 1: Starred repos
|
|
396
|
+
if (allCandidates.length < maxResults &&
|
|
397
|
+
starredRepos.length > 0 &&
|
|
398
|
+
searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
|
|
399
|
+
enabledStrategies.has("starred")) {
|
|
255
400
|
await sleep(INTER_PHASE_DELAY_MS);
|
|
256
401
|
const reposToSearch = starredRepos.filter((r) => !phase0RepoSet.has(r));
|
|
257
402
|
if (reposToSearch.length > 0) {
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
// from ~12 to ~4.
|
|
265
|
-
const phase1Labels = labels.slice(0, 3);
|
|
266
|
-
const { candidates: starredCandidates, allBatchesFailed, rateLimitHit, } = await searchInRepos(this.octokit, this.vetter, reposToSearch.slice(0, 10), baseQualifiers, phase1Labels, remainingNeeded, 'starred', filterIssues);
|
|
267
|
-
allCandidates.push(...starredCandidates);
|
|
268
|
-
if (allBatchesFailed) {
|
|
269
|
-
phase1Error = 'All starred repo batches failed';
|
|
270
|
-
}
|
|
271
|
-
if (rateLimitHit) {
|
|
403
|
+
const remaining = maxResults - allCandidates.length;
|
|
404
|
+
if (remaining > 0) {
|
|
405
|
+
const result = await runPhase1(this.octokit, this.vetter, reposToSearch, baseQualifiers, labels, remaining, filterIssues);
|
|
406
|
+
allCandidates.push(...result.candidates);
|
|
407
|
+
phaseErrors["1"] = result.error;
|
|
408
|
+
if (result.rateLimitHit)
|
|
272
409
|
rateLimitHitDuringSearch = true;
|
|
273
|
-
}
|
|
274
|
-
info(MODULE, `Found ${starredCandidates.length} candidates from starred repos`);
|
|
275
410
|
}
|
|
276
411
|
}
|
|
277
|
-
strategiesUsed.push(
|
|
412
|
+
strategiesUsed.push("starred");
|
|
278
413
|
}
|
|
279
|
-
// Phase 2: General search
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
// beginner results.
|
|
284
|
-
let phase2Error = null;
|
|
285
|
-
if (allCandidates.length < maxResults && searchBudget >= LOW_BUDGET_THRESHOLD && enabledStrategies.has('broad')) {
|
|
414
|
+
// Phase 2: General search
|
|
415
|
+
if (allCandidates.length < maxResults &&
|
|
416
|
+
searchBudget >= LOW_BUDGET_THRESHOLD &&
|
|
417
|
+
enabledStrategies.has("broad")) {
|
|
286
418
|
await sleep(INTER_PHASE_DELAY_MS);
|
|
287
|
-
|
|
288
|
-
const
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
const scopeLabels = SCOPE_LABELS[scope] ?? [];
|
|
295
|
-
if (scopeLabels.length === 0) {
|
|
296
|
-
warn(MODULE, `Scope "${scope}" has no labels, skipping tier`);
|
|
297
|
-
continue;
|
|
298
|
-
}
|
|
299
|
-
tierLabelGroups.push({ tier: scope, tierLabels: scopeLabels });
|
|
300
|
-
}
|
|
301
|
-
// Custom labels not in any tier get their own pseudo-tier
|
|
302
|
-
const allScopeLabels = new Set(scopes.flatMap((s) => SCOPE_LABELS[s] ?? []));
|
|
303
|
-
const customOnly = config.labels.filter((l) => !allScopeLabels.has(l));
|
|
304
|
-
if (customOnly.length > 0) {
|
|
305
|
-
tierLabelGroups.push({ tier: 'custom', tierLabels: customOnly });
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
else {
|
|
309
|
-
tierLabelGroups.push({ tier: 'general', tierLabels: labels });
|
|
310
|
-
}
|
|
311
|
-
const budgetPerTier = Math.ceil(remainingNeeded / tierLabelGroups.length);
|
|
312
|
-
const tierResults = [];
|
|
313
|
-
for (const { tier, tierLabels } of tierLabelGroups) {
|
|
314
|
-
try {
|
|
315
|
-
const allItems = await searchWithChunkedLabels(this.octokit, tierLabels, 0, // no repo/org ORs in Phase 2
|
|
316
|
-
(labelQ) => `${baseQualifiers} ${labelQ}`.replace(/ +/g, ' ').trim(), budgetPerTier * 3);
|
|
317
|
-
info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
|
|
318
|
-
const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(this.vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
|
|
319
|
-
tierResults.push(tierCandidates);
|
|
320
|
-
// Update seenRepos so later tiers don't return duplicate repos
|
|
321
|
-
for (const c of tierCandidates)
|
|
322
|
-
seenRepos.add(c.issue.repo);
|
|
323
|
-
if (allVetFailed) {
|
|
324
|
-
phase2Error = (phase2Error ? phase2Error + '; ' : '') + `${tier}: all vetting failed`;
|
|
325
|
-
}
|
|
326
|
-
if (vetRateLimitHit) {
|
|
327
|
-
rateLimitHitDuringSearch = true;
|
|
328
|
-
}
|
|
329
|
-
info(MODULE, `Found ${tierCandidates.length} candidates from ${tier} tier`);
|
|
330
|
-
}
|
|
331
|
-
catch (error) {
|
|
332
|
-
if (getHttpStatusCode(error) === 401)
|
|
333
|
-
throw error;
|
|
334
|
-
const errMsg = errorMessage(error);
|
|
335
|
-
phase2Error = (phase2Error ? phase2Error + '; ' : '') + `${tier}: ${errMsg}`;
|
|
336
|
-
if (isRateLimitError(error)) {
|
|
337
|
-
rateLimitHitDuringSearch = true;
|
|
338
|
-
}
|
|
339
|
-
warn(MODULE, `Error in ${tier} tier search: ${errMsg}`);
|
|
340
|
-
tierResults.push([]);
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
const interleaved = interleaveArrays(tierResults);
|
|
344
|
-
if (interleaved.length === 0 && phase2Error) {
|
|
345
|
-
warn(MODULE, `All ${tierLabelGroups.length} scope tiers failed in Phase 2: ${phase2Error}`);
|
|
346
|
-
}
|
|
347
|
-
allCandidates.push(...interleaved.slice(0, remainingNeeded));
|
|
348
|
-
strategiesUsed.push('broad');
|
|
419
|
+
const remaining = maxResults - allCandidates.length;
|
|
420
|
+
const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, baseQualifiers, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
|
|
421
|
+
allCandidates.push(...result.candidates);
|
|
422
|
+
phaseErrors["2"] = result.error;
|
|
423
|
+
if (result.rateLimitHit)
|
|
424
|
+
rateLimitHitDuringSearch = true;
|
|
425
|
+
strategiesUsed.push("broad");
|
|
349
426
|
}
|
|
350
427
|
// Phase 3: Actively maintained repos
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
428
|
+
if (allCandidates.length < maxResults &&
|
|
429
|
+
searchBudget >= LOW_BUDGET_THRESHOLD &&
|
|
430
|
+
enabledStrategies.has("maintained")) {
|
|
354
431
|
await sleep(INTER_PHASE_DELAY_MS);
|
|
355
|
-
|
|
356
|
-
const
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
const phase3Query = `is:issue is:open no:assignee ${langQuery} ${topicQuery} stars:>=${minStars} pushed:>=${pushedSince} archived:false`
|
|
363
|
-
.replace(/ +/g, ' ')
|
|
364
|
-
.trim();
|
|
365
|
-
try {
|
|
366
|
-
const data = await cachedSearchIssues(this.octokit, {
|
|
367
|
-
q: phase3Query,
|
|
368
|
-
sort: 'updated',
|
|
369
|
-
order: 'desc',
|
|
370
|
-
per_page: remainingNeeded * 3,
|
|
371
|
-
});
|
|
372
|
-
info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
|
|
373
|
-
const seenRepos = new Set(allCandidates.map((c) => c.issue.repo));
|
|
374
|
-
const { candidates: starFiltered, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(this.vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], remainingNeeded, minStars, 'Phase 3');
|
|
375
|
-
allCandidates.push(...starFiltered);
|
|
376
|
-
if (allVetFailed) {
|
|
377
|
-
phase3Error = 'all vetting failed';
|
|
378
|
-
}
|
|
379
|
-
if (vetRateLimitHit) {
|
|
380
|
-
rateLimitHitDuringSearch = true;
|
|
381
|
-
}
|
|
382
|
-
info(MODULE, `Found ${starFiltered.length} candidates from maintained-repo search`);
|
|
383
|
-
}
|
|
384
|
-
catch (error) {
|
|
385
|
-
const errMsg = errorMessage(error);
|
|
386
|
-
phase3Error = errMsg;
|
|
387
|
-
if (isRateLimitError(error)) {
|
|
388
|
-
rateLimitHitDuringSearch = true;
|
|
389
|
-
}
|
|
390
|
-
warn(MODULE, `Error in maintained-repo search: ${errMsg}`);
|
|
391
|
-
}
|
|
392
|
-
strategiesUsed.push('maintained');
|
|
432
|
+
const remaining = maxResults - allCandidates.length;
|
|
433
|
+
const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
|
|
434
|
+
allCandidates.push(...result.candidates);
|
|
435
|
+
phaseErrors["3"] = result.error;
|
|
436
|
+
if (result.rateLimitHit)
|
|
437
|
+
rateLimitHitDuringSearch = true;
|
|
438
|
+
strategiesUsed.push("maintained");
|
|
393
439
|
}
|
|
394
|
-
//
|
|
440
|
+
// Build result / error summary
|
|
395
441
|
const phasesSkippedForBudget = searchBudget < LOW_BUDGET_THRESHOLD;
|
|
396
|
-
let budgetNote =
|
|
442
|
+
let budgetNote = "";
|
|
397
443
|
if (searchBudget < CRITICAL_BUDGET_THRESHOLD) {
|
|
398
444
|
budgetNote = ` Most search phases were skipped due to critically low API quota (${searchBudget} remaining).`;
|
|
399
445
|
}
|
|
@@ -401,14 +447,22 @@ export class IssueDiscovery {
|
|
|
401
447
|
budgetNote = ` Some search phases were skipped due to low API quota (${searchBudget} remaining).`;
|
|
402
448
|
}
|
|
403
449
|
if (allCandidates.length === 0) {
|
|
404
|
-
const
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
450
|
+
const errorDetails = [
|
|
451
|
+
phaseErrors["0"]
|
|
452
|
+
? `Phase 0 (merged-PR repos): ${phaseErrors["0"]}`
|
|
453
|
+
: null,
|
|
454
|
+
phaseErrors["0.5"]
|
|
455
|
+
? `Phase 0.5 (preferred orgs): ${phaseErrors["0.5"]}`
|
|
456
|
+
: null,
|
|
457
|
+
phaseErrors["1"]
|
|
458
|
+
? `Phase 1 (starred repos): ${phaseErrors["1"]}`
|
|
459
|
+
: null,
|
|
460
|
+
phaseErrors["2"] ? `Phase 2 (general): ${phaseErrors["2"]}` : null,
|
|
461
|
+
phaseErrors["3"]
|
|
462
|
+
? `Phase 3 (maintained repos): ${phaseErrors["3"]}`
|
|
463
|
+
: null,
|
|
410
464
|
].filter(Boolean);
|
|
411
|
-
const details =
|
|
465
|
+
const details = errorDetails.length > 0 ? ` ${errorDetails.join(". ")}.` : "";
|
|
412
466
|
if (rateLimitHitDuringSearch || phasesSkippedForBudget) {
|
|
413
467
|
this.rateLimitWarning =
|
|
414
468
|
`Search returned no results due to GitHub API rate limits.${details}${budgetNote} ` +
|
|
@@ -416,28 +470,32 @@ export class IssueDiscovery {
|
|
|
416
470
|
return { candidates: [], strategiesUsed };
|
|
417
471
|
}
|
|
418
472
|
throw new ValidationError(`No issue candidates found across all search phases.${details} ` +
|
|
419
|
-
|
|
473
|
+
"Try adjusting your search criteria (languages, labels) or check your network connection.");
|
|
420
474
|
}
|
|
421
|
-
// Surface rate limit warning even with partial results
|
|
422
475
|
if (rateLimitHitDuringSearch || phasesSkippedForBudget) {
|
|
423
476
|
this.rateLimitWarning =
|
|
424
477
|
`Search results may be incomplete: GitHub API rate limits were hit during search.${budgetNote} ` +
|
|
425
|
-
`Found ${allCandidates.length} candidate${allCandidates.length === 1 ?
|
|
478
|
+
`Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
|
|
426
479
|
`Try again after the rate limit resets for complete results.`;
|
|
427
480
|
}
|
|
428
|
-
// Sort by priority
|
|
481
|
+
// Sort by priority, recommendation, then viability score
|
|
429
482
|
allCandidates.sort((a, b) => {
|
|
430
|
-
const priorityOrder = {
|
|
483
|
+
const priorityOrder = {
|
|
484
|
+
merged_pr: 0,
|
|
485
|
+
preferred_org: 1,
|
|
486
|
+
starred: 2,
|
|
487
|
+
normal: 3,
|
|
488
|
+
};
|
|
431
489
|
const priorityDiff = priorityOrder[a.searchPriority] - priorityOrder[b.searchPriority];
|
|
432
490
|
if (priorityDiff !== 0)
|
|
433
491
|
return priorityDiff;
|
|
434
492
|
const recommendationOrder = { approve: 0, needs_review: 1, skip: 2 };
|
|
435
|
-
const recDiff = recommendationOrder[a.recommendation] -
|
|
493
|
+
const recDiff = recommendationOrder[a.recommendation] -
|
|
494
|
+
recommendationOrder[b.recommendation];
|
|
436
495
|
if (recDiff !== 0)
|
|
437
496
|
return recDiff;
|
|
438
497
|
return b.viabilityScore - a.viabilityScore;
|
|
439
498
|
});
|
|
440
|
-
// Apply per-repo cap: max 2 issues from any single repo
|
|
441
499
|
const capped = applyPerRepoCap(allCandidates, 2);
|
|
442
500
|
info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${capped.length} candidates returned`);
|
|
443
501
|
return { candidates: capped.slice(0, maxResults), strategiesUsed };
|
|
@@ -457,9 +515,6 @@ export class IssueDiscovery {
|
|
|
457
515
|
*/
|
|
458
516
|
deriveLowScoringRepos(threshold) {
|
|
459
517
|
const lowScoring = [];
|
|
460
|
-
// The ScoutStateReader doesn't expose a bulk "get all repos with scores" method,
|
|
461
|
-
// so we rely on the mergedPRRepos + starredRepos as the universe of known repos
|
|
462
|
-
// and check each one's score. Repos not in state simply return null (no penalty).
|
|
463
518
|
const knownRepos = new Set([
|
|
464
519
|
...this.stateReader.getReposWithMergedPRs(),
|
|
465
520
|
...this.stateReader.getStarredRepos(),
|