@oss-scout/core 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +51 -47
- package/dist/cli.js +218 -87
- package/dist/commands/config.d.ts +2 -4
- package/dist/commands/config.js +76 -78
- package/dist/commands/results.d.ts +1 -1
- package/dist/commands/results.js +1 -1
- package/dist/commands/search.d.ts +2 -2
- package/dist/commands/search.js +16 -6
- package/dist/commands/setup.d.ts +1 -1
- package/dist/commands/setup.js +25 -25
- package/dist/commands/skip.d.ts +33 -0
- package/dist/commands/skip.js +89 -0
- package/dist/commands/validation.d.ts +1 -1
- package/dist/commands/validation.js +1 -1
- package/dist/commands/vet-list.d.ts +2 -2
- package/dist/commands/vet-list.js +12 -5
- package/dist/commands/vet.d.ts +3 -3
- package/dist/commands/vet.js +9 -5
- package/dist/core/bootstrap.d.ts +1 -1
- package/dist/core/bootstrap.js +20 -16
- package/dist/core/category-mapping.d.ts +1 -1
- package/dist/core/category-mapping.js +104 -13
- package/dist/core/errors.d.ts +8 -1
- package/dist/core/errors.js +31 -19
- package/dist/core/gist-state-store.d.ts +1 -1
- package/dist/core/gist-state-store.js +55 -28
- package/dist/core/github.d.ts +1 -1
- package/dist/core/github.js +5 -5
- package/dist/core/http-cache.js +26 -22
- package/dist/core/issue-discovery.d.ts +6 -6
- package/dist/core/issue-discovery.js +279 -286
- package/dist/core/issue-eligibility.d.ts +2 -2
- package/dist/core/issue-eligibility.js +26 -21
- package/dist/core/issue-filtering.js +23 -15
- package/dist/core/issue-scoring.js +1 -1
- package/dist/core/issue-vetting.d.ts +2 -4
- package/dist/core/issue-vetting.js +65 -56
- package/dist/core/local-state.d.ts +1 -1
- package/dist/core/local-state.js +16 -14
- package/dist/core/repo-health.d.ts +2 -2
- package/dist/core/repo-health.js +46 -35
- package/dist/core/schemas.d.ts +17 -9
- package/dist/core/schemas.js +47 -19
- package/dist/core/search-budget.js +3 -3
- package/dist/core/search-phases.d.ts +6 -6
- package/dist/core/search-phases.js +23 -19
- package/dist/core/types.d.ts +9 -9
- package/dist/core/types.js +15 -3
- package/dist/core/utils.d.ts +10 -1
- package/dist/core/utils.js +44 -25
- package/dist/formatters/json.d.ts +1 -1
- package/dist/index.d.ts +7 -7
- package/dist/index.js +5 -5
- package/dist/scout.d.ts +30 -6
- package/dist/scout.js +141 -34
- package/package.json +7 -3
|
@@ -11,30 +11,189 @@
|
|
|
11
11
|
*
|
|
12
12
|
* All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
|
|
13
13
|
*/
|
|
14
|
-
import { getOctokit, checkRateLimit } from
|
|
15
|
-
import { getSearchBudgetTracker } from
|
|
16
|
-
import { daysBetween, sleep } from
|
|
17
|
-
import { SCOPE_LABELS } from
|
|
18
|
-
import { CONCRETE_STRATEGIES } from
|
|
19
|
-
import { ValidationError, errorMessage, getHttpStatusCode, isRateLimitError } from
|
|
20
|
-
import { debug, info, warn } from
|
|
21
|
-
import { isDocOnlyIssue, applyPerRepoCap } from
|
|
22
|
-
import { IssueVetter } from
|
|
23
|
-
import { getTopicsForCategories } from
|
|
24
|
-
import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, filterVetAndScore, searchInRepos, searchWithChunkedLabels, } from
|
|
25
|
-
const MODULE =
|
|
14
|
+
import { getOctokit, checkRateLimit } from "./github.js";
|
|
15
|
+
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
16
|
+
import { daysBetween, extractRepoFromUrl, sleep } from "./utils.js";
|
|
17
|
+
import { SCOPE_LABELS, } from "./types.js";
|
|
18
|
+
import { CONCRETE_STRATEGIES } from "./schemas.js";
|
|
19
|
+
import { ValidationError, errorMessage, getHttpStatusCode, isRateLimitError, } from "./errors.js";
|
|
20
|
+
import { debug, info, warn } from "./logger.js";
|
|
21
|
+
import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
|
|
22
|
+
import { IssueVetter } from "./issue-vetting.js";
|
|
23
|
+
import { getTopicsForCategories } from "./category-mapping.js";
|
|
24
|
+
import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, filterVetAndScore, searchInRepos, searchWithChunkedLabels, } from "./search-phases.js";
|
|
25
|
+
const MODULE = "issue-discovery";
|
|
26
26
|
/** Delay between major search phases to let GitHub's rate limit window cool down. */
|
|
27
27
|
const INTER_PHASE_DELAY_MS = 2000;
|
|
28
28
|
/** If remaining search quota is below this, skip heavy phases (2, 3). */
|
|
29
29
|
const LOW_BUDGET_THRESHOLD = 20;
|
|
30
30
|
/** If remaining search quota is below this, only run Phase 0. */
|
|
31
31
|
const CRITICAL_BUDGET_THRESHOLD = 10;
|
|
32
|
+
/** Build a reusable filter function from config. */
|
|
33
|
+
function buildIssueFilter(config) {
|
|
34
|
+
return (items) => {
|
|
35
|
+
return items.filter((item) => {
|
|
36
|
+
const repoFullName = extractRepoFromUrl(item.repository_url);
|
|
37
|
+
if (!repoFullName)
|
|
38
|
+
return false;
|
|
39
|
+
if (config.excludedRepos.has(repoFullName))
|
|
40
|
+
return false;
|
|
41
|
+
if (config.excludeOrgs.size > 0) {
|
|
42
|
+
const orgName = repoFullName.split("/")[0]?.toLowerCase();
|
|
43
|
+
if (orgName && config.excludeOrgs.has(orgName))
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
if (config.aiBlocklisted.has(repoFullName))
|
|
47
|
+
return false;
|
|
48
|
+
if (config.lowScoringRepos.has(repoFullName))
|
|
49
|
+
return false;
|
|
50
|
+
if (config.skippedUrls.has(item.html_url))
|
|
51
|
+
return false;
|
|
52
|
+
const updatedAt = new Date(item.updated_at);
|
|
53
|
+
const ageDays = daysBetween(updatedAt, config.now);
|
|
54
|
+
if (ageDays > config.maxAgeDays)
|
|
55
|
+
return false;
|
|
56
|
+
if (!config.includeDocIssues && isDocOnlyIssue(item))
|
|
57
|
+
return false;
|
|
58
|
+
return true;
|
|
59
|
+
});
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
/** Phase 0: Search repos where user has merged PRs (highest merge probability). */
|
|
63
|
+
async function runPhase0(octokit, vetter, repos, baseQualifiers, maxResults, filterIssues) {
|
|
64
|
+
info(MODULE, `Phase 0: Searching issues in ${repos.length} merged-PR repos (no label filter)...`);
|
|
65
|
+
const { candidates, allBatchesFailed, rateLimitHit } = await searchInRepos(octokit, vetter, repos, baseQualifiers, [], maxResults, "merged_pr", filterIssues);
|
|
66
|
+
info(MODULE, `Found ${candidates.length} candidates from merged-PR repos`);
|
|
67
|
+
return {
|
|
68
|
+
candidates,
|
|
69
|
+
error: allBatchesFailed ? "All merged-PR repo batches failed" : null,
|
|
70
|
+
rateLimitHit,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/** Phase 1: Search starred repos. */
|
|
74
|
+
async function runPhase1(octokit, vetter, repos, baseQualifiers, labels, maxResults, filterIssues) {
|
|
75
|
+
info(MODULE, `Phase 1: Searching issues in ${repos.length} starred repos...`);
|
|
76
|
+
// Cap labels to reduce Search API calls: starred repos already signal user
|
|
77
|
+
// interest, so fewer labels suffice.
|
|
78
|
+
const phase1Labels = labels.slice(0, 3);
|
|
79
|
+
const { candidates, allBatchesFailed, rateLimitHit } = await searchInRepos(octokit, vetter, repos.slice(0, 10), baseQualifiers, phase1Labels, maxResults, "starred", filterIssues);
|
|
80
|
+
info(MODULE, `Found ${candidates.length} candidates from starred repos`);
|
|
81
|
+
return {
|
|
82
|
+
candidates,
|
|
83
|
+
error: allBatchesFailed ? "All starred repo batches failed" : null,
|
|
84
|
+
rateLimitHit,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
/** Phase 2: General label-filtered search with multi-tier interleaving. */
|
|
88
|
+
async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQualifiers, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
|
|
89
|
+
info(MODULE, "Phase 2: General issue search...");
|
|
90
|
+
const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
|
|
91
|
+
// Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
|
|
92
|
+
const tierLabelGroups = [];
|
|
93
|
+
if (scopes && scopes.length > 1) {
|
|
94
|
+
for (const scope of scopes) {
|
|
95
|
+
const scopeLabels = SCOPE_LABELS[scope] ?? [];
|
|
96
|
+
if (scopeLabels.length === 0) {
|
|
97
|
+
warn(MODULE, `Scope "${scope}" has no labels, skipping tier`);
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
tierLabelGroups.push({ tier: scope, tierLabels: scopeLabels });
|
|
101
|
+
}
|
|
102
|
+
const allScopeLabels = new Set(scopes.flatMap((s) => SCOPE_LABELS[s] ?? []));
|
|
103
|
+
const customOnly = configLabels.filter((l) => !allScopeLabels.has(l));
|
|
104
|
+
if (customOnly.length > 0) {
|
|
105
|
+
tierLabelGroups.push({ tier: "custom", tierLabels: customOnly });
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
tierLabelGroups.push({ tier: "general", tierLabels: labels });
|
|
110
|
+
}
|
|
111
|
+
const budgetPerTier = Math.ceil(maxResults / tierLabelGroups.length);
|
|
112
|
+
const tierResults = [];
|
|
113
|
+
let error = null;
|
|
114
|
+
let rateLimitHit = false;
|
|
115
|
+
for (const { tier, tierLabels } of tierLabelGroups) {
|
|
116
|
+
try {
|
|
117
|
+
const allItems = await searchWithChunkedLabels(octokit, tierLabels, 0, (labelQ) => `${baseQualifiers} ${labelQ}`.replace(/ +/g, " ").trim(), budgetPerTier * 3);
|
|
118
|
+
info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
|
|
119
|
+
const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
|
|
120
|
+
tierResults.push(tierCandidates);
|
|
121
|
+
for (const c of tierCandidates)
|
|
122
|
+
seenRepos.add(c.issue.repo);
|
|
123
|
+
if (allVetFailed) {
|
|
124
|
+
error = (error ? error + "; " : "") + `${tier}: all vetting failed`;
|
|
125
|
+
}
|
|
126
|
+
if (vetRateLimitHit) {
|
|
127
|
+
rateLimitHit = true;
|
|
128
|
+
}
|
|
129
|
+
info(MODULE, `Found ${tierCandidates.length} candidates from ${tier} tier`);
|
|
130
|
+
}
|
|
131
|
+
catch (err) {
|
|
132
|
+
if (getHttpStatusCode(err) === 401)
|
|
133
|
+
throw err;
|
|
134
|
+
const errMsg = errorMessage(err);
|
|
135
|
+
error = (error ? error + "; " : "") + `${tier}: ${errMsg}`;
|
|
136
|
+
if (isRateLimitError(err)) {
|
|
137
|
+
rateLimitHit = true;
|
|
138
|
+
}
|
|
139
|
+
warn(MODULE, `Error in ${tier} tier search: ${errMsg}`);
|
|
140
|
+
tierResults.push([]);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
const interleaved = interleaveArrays(tierResults);
|
|
144
|
+
if (interleaved.length === 0 && error) {
|
|
145
|
+
warn(MODULE, `All ${tierLabelGroups.length} scope tiers failed in Phase 2: ${error}`);
|
|
146
|
+
}
|
|
147
|
+
return {
|
|
148
|
+
candidates: interleaved.slice(0, maxResults),
|
|
149
|
+
error,
|
|
150
|
+
rateLimitHit,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
/** Phase 3: Actively maintained repos. */
|
|
154
|
+
async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
|
|
155
|
+
info(MODULE, "Phase 3: Searching actively maintained repos...");
|
|
156
|
+
const thirtyDaysAgo = new Date();
|
|
157
|
+
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
|
|
158
|
+
const pushedSince = thirtyDaysAgo.toISOString().split("T")[0];
|
|
159
|
+
const categoryTopics = getTopicsForCategories(projectCategories);
|
|
160
|
+
const topicQuery = categoryTopics.length > 0 ? `topic:${categoryTopics[0]}` : "";
|
|
161
|
+
const phase3Query = `is:issue is:open no:assignee ${langQuery} ${topicQuery} stars:>=${minStars} pushed:>=${pushedSince} archived:false`
|
|
162
|
+
.replace(/ +/g, " ")
|
|
163
|
+
.trim();
|
|
164
|
+
try {
|
|
165
|
+
const data = await cachedSearchIssues(octokit, {
|
|
166
|
+
q: phase3Query,
|
|
167
|
+
sort: "updated",
|
|
168
|
+
order: "desc",
|
|
169
|
+
per_page: maxResults * 3,
|
|
170
|
+
});
|
|
171
|
+
info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
|
|
172
|
+
const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
|
|
173
|
+
const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], maxResults, minStars, "Phase 3");
|
|
174
|
+
info(MODULE, `Found ${candidates.length} candidates from maintained-repo search`);
|
|
175
|
+
return {
|
|
176
|
+
candidates,
|
|
177
|
+
error: allVetFailed ? "all vetting failed" : null,
|
|
178
|
+
rateLimitHit: vetRateLimitHit,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
catch (error) {
|
|
182
|
+
const errMsg = errorMessage(error);
|
|
183
|
+
warn(MODULE, `Error in maintained-repo search: ${errMsg}`);
|
|
184
|
+
return {
|
|
185
|
+
candidates: [],
|
|
186
|
+
error: errMsg,
|
|
187
|
+
rateLimitHit: isRateLimitError(error),
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
// ── IssueDiscovery class ─────────────────────────────────────────────
|
|
32
192
|
/**
|
|
33
193
|
* Multi-phase issue discovery engine that searches GitHub for contributable issues.
|
|
34
194
|
*
|
|
35
195
|
* Search phases (in priority order):
|
|
36
196
|
* 0. Repos where user has merged PRs (highest merge probability)
|
|
37
|
-
* 0.5. Preferred organizations
|
|
38
197
|
* 1. Starred repos
|
|
39
198
|
* 2. General label-filtered search
|
|
40
199
|
* 3. Actively maintained repos
|
|
@@ -70,8 +229,8 @@ export class IssueDiscovery {
|
|
|
70
229
|
}
|
|
71
230
|
/**
|
|
72
231
|
* Search for issues matching our criteria.
|
|
73
|
-
* Searches in priority order: merged-PR repos first (no label filter), then
|
|
74
|
-
*
|
|
232
|
+
* Searches in priority order: merged-PR repos first (no label filter), then starred
|
|
233
|
+
* repos, then general search, then actively maintained repos.
|
|
75
234
|
* Filters out issues from low-scoring and excluded repos.
|
|
76
235
|
*
|
|
77
236
|
* @param options - Search configuration
|
|
@@ -95,29 +254,30 @@ export class IssueDiscovery {
|
|
|
95
254
|
async searchIssues(options = {}) {
|
|
96
255
|
const config = this.preferences;
|
|
97
256
|
const languages = options.languages || config.languages;
|
|
98
|
-
const scopes = config.scope;
|
|
99
|
-
const labels = options.labels ||
|
|
257
|
+
const scopes = config.scope;
|
|
258
|
+
const labels = options.labels ||
|
|
259
|
+
(scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
|
|
100
260
|
const maxResults = options.maxResults || 10;
|
|
101
261
|
const minStars = config.minStars ?? 50;
|
|
102
|
-
// Strategy selection
|
|
262
|
+
// Strategy selection
|
|
103
263
|
const ALL_STRATEGIES = CONCRETE_STRATEGIES;
|
|
104
|
-
const rawStrategies = options.strategies ??
|
|
105
|
-
|
|
264
|
+
const rawStrategies = options.strategies ??
|
|
265
|
+
config.defaultStrategy ?? ["all"];
|
|
266
|
+
const enabledStrategies = new Set(rawStrategies.includes("all") ? ALL_STRATEGIES : rawStrategies);
|
|
106
267
|
const strategiesUsed = [];
|
|
107
268
|
const allCandidates = [];
|
|
108
|
-
|
|
109
|
-
let phase1Error = null;
|
|
269
|
+
const phaseErrors = {};
|
|
110
270
|
let rateLimitHitDuringSearch = false;
|
|
111
|
-
// Pre-flight rate limit check
|
|
271
|
+
// Pre-flight rate limit check
|
|
112
272
|
this.rateLimitWarning = null;
|
|
113
273
|
const tracker = getSearchBudgetTracker();
|
|
114
|
-
let searchBudget = LOW_BUDGET_THRESHOLD - 1;
|
|
274
|
+
let searchBudget = LOW_BUDGET_THRESHOLD - 1;
|
|
115
275
|
try {
|
|
116
276
|
const rateLimit = await checkRateLimit(this.githubToken);
|
|
117
277
|
searchBudget = rateLimit.remaining;
|
|
118
278
|
tracker.init(rateLimit.remaining, rateLimit.resetAt);
|
|
119
279
|
if (rateLimit.remaining < 5) {
|
|
120
|
-
const resetTime = new Date(rateLimit.resetAt).toLocaleTimeString(
|
|
280
|
+
const resetTime = new Date(rateLimit.resetAt).toLocaleTimeString("en-US", { hour12: false });
|
|
121
281
|
this.rateLimitWarning = `GitHub search API quota low (${rateLimit.remaining}/${rateLimit.limit} remaining, resets at ${resetTime}). Search may be slow.`;
|
|
122
282
|
warn(MODULE, this.rateLimitWarning);
|
|
123
283
|
}
|
|
@@ -129,278 +289,106 @@ export class IssueDiscovery {
|
|
|
129
289
|
}
|
|
130
290
|
}
|
|
131
291
|
catch (error) {
|
|
132
|
-
|
|
133
|
-
if (getHttpStatusCode(error) === 401) {
|
|
292
|
+
if (getHttpStatusCode(error) === 401)
|
|
134
293
|
throw error;
|
|
135
|
-
}
|
|
136
|
-
// Non-fatal: proceed with conservative budget for transient/network errors.
|
|
137
|
-
// Initialize tracker with conservative defaults so it doesn't fly blind.
|
|
138
294
|
tracker.init(CRITICAL_BUDGET_THRESHOLD, new Date(Date.now() + 60000).toISOString());
|
|
139
|
-
warn(MODULE,
|
|
295
|
+
warn(MODULE, "Could not check rate limit — using conservative budget, skipping heavy phases:", errorMessage(error));
|
|
140
296
|
}
|
|
141
|
-
|
|
297
|
+
if (searchBudget <= 0) {
|
|
298
|
+
this.rateLimitWarning =
|
|
299
|
+
"GitHub search API quota exhausted. Try again after the rate limit resets.";
|
|
300
|
+
return { candidates: [], strategiesUsed: [] };
|
|
301
|
+
}
|
|
302
|
+
// Derive search context
|
|
142
303
|
const mergedPRRepos = this.stateReader.getReposWithMergedPRs();
|
|
143
|
-
// Get starred repos (from local cache or state reader)
|
|
144
304
|
const starredRepos = this.getStarredRepos();
|
|
145
305
|
const starredRepoSet = new Set(starredRepos);
|
|
146
|
-
|
|
147
|
-
const minRepoScoreThreshold = config.minRepoScoreThreshold;
|
|
148
|
-
const lowScoringRepos = new Set(this.deriveLowScoringRepos(minRepoScoreThreshold));
|
|
149
|
-
// Common filters
|
|
150
|
-
const excludedRepos = new Set(config.excludeRepos);
|
|
151
|
-
const excludeOrgs = new Set((config.excludeOrgs ?? []).map(o => o.toLowerCase()));
|
|
152
|
-
const maxAgeDays = config.maxIssueAgeDays || 90;
|
|
153
|
-
const now = new Date();
|
|
306
|
+
const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold));
|
|
154
307
|
// Build query parts
|
|
155
|
-
|
|
156
|
-
const
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
308
|
+
const isAnyLanguage = languages.some((l) => l.toLowerCase() === "any");
|
|
309
|
+
const langQuery = isAnyLanguage
|
|
310
|
+
? ""
|
|
311
|
+
: languages.map((l) => `language:${l}`).join(" ");
|
|
312
|
+
const baseQualifiers = `is:issue is:open ${langQuery} no:assignee`
|
|
313
|
+
.replace(/ +/g, " ")
|
|
314
|
+
.trim();
|
|
315
|
+
// Build reusable filter
|
|
163
316
|
const aiBlocklisted = new Set(config.aiPolicyBlocklist);
|
|
164
317
|
if (aiBlocklisted.size > 0) {
|
|
165
|
-
debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(
|
|
318
|
+
debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(", ")}`);
|
|
166
319
|
}
|
|
167
|
-
const filterIssues = (
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
// Filter repos with known anti-AI contribution policies
|
|
179
|
-
if (aiBlocklisted.has(repoFullName))
|
|
180
|
-
return false;
|
|
181
|
-
// Filter OUT low-scoring repos
|
|
182
|
-
if (lowScoringRepos.has(repoFullName))
|
|
183
|
-
return false;
|
|
184
|
-
// Filter by issue age based on updated_at
|
|
185
|
-
const updatedAt = new Date(item.updated_at);
|
|
186
|
-
const ageDays = daysBetween(updatedAt, now);
|
|
187
|
-
if (ageDays > maxAgeDays)
|
|
188
|
-
return false;
|
|
189
|
-
// Filter out doc-only issues unless opted in
|
|
190
|
-
if (!includeDocIssues && isDocOnlyIssue(item))
|
|
191
|
-
return false;
|
|
192
|
-
return true;
|
|
193
|
-
});
|
|
194
|
-
};
|
|
195
|
-
// Phase 0: Search repos where user has merged PRs (highest merge probability)
|
|
320
|
+
const filterIssues = buildIssueFilter({
|
|
321
|
+
excludedRepos: new Set(config.excludeRepos),
|
|
322
|
+
excludeOrgs: new Set((config.excludeOrgs ?? []).map((o) => o.toLowerCase())),
|
|
323
|
+
aiBlocklisted,
|
|
324
|
+
lowScoringRepos,
|
|
325
|
+
skippedUrls: options.skippedUrls ?? new Set(),
|
|
326
|
+
maxAgeDays: config.maxIssueAgeDays || 90,
|
|
327
|
+
now: new Date(),
|
|
328
|
+
includeDocIssues: config.includeDocIssues ?? true,
|
|
329
|
+
});
|
|
330
|
+
// Phase 0: Merged-PR repos
|
|
196
331
|
const phase0Repos = mergedPRRepos.slice(0, 10);
|
|
197
332
|
const phase0RepoSet = new Set(phase0Repos);
|
|
198
|
-
if (phase0Repos.length > 0 && enabledStrategies.has(
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
if (
|
|
205
|
-
phase0Error = 'All merged-PR repo batches failed';
|
|
206
|
-
}
|
|
207
|
-
if (rateLimitHit) {
|
|
333
|
+
if (phase0Repos.length > 0 && enabledStrategies.has("merged")) {
|
|
334
|
+
const remaining = maxResults - allCandidates.length;
|
|
335
|
+
if (remaining > 0) {
|
|
336
|
+
const result = await runPhase0(this.octokit, this.vetter, phase0Repos, baseQualifiers, remaining, filterIssues);
|
|
337
|
+
allCandidates.push(...result.candidates);
|
|
338
|
+
phaseErrors["0"] = result.error;
|
|
339
|
+
if (result.rateLimitHit)
|
|
208
340
|
rateLimitHitDuringSearch = true;
|
|
209
|
-
}
|
|
210
|
-
info(MODULE, `Found ${mergedCandidates.length} candidates from merged-PR repos`);
|
|
211
341
|
}
|
|
212
|
-
strategiesUsed.push(
|
|
342
|
+
strategiesUsed.push("merged");
|
|
213
343
|
}
|
|
214
|
-
// Phase
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
// Inter-phase delay to let GitHub's rate limit window cool down
|
|
220
|
-
if (phase0Repos.length > 0)
|
|
221
|
-
await sleep(INTER_PHASE_DELAY_MS);
|
|
222
|
-
// Filter out orgs already covered by Phase 0 repos
|
|
223
|
-
const phase0Orgs = new Set(phase0Repos.map((r) => r.split('/')[0]?.toLowerCase()));
|
|
224
|
-
const orgsToSearch = preferredOrgs.filter((org) => !phase0Orgs.has(org.toLowerCase())).slice(0, 5);
|
|
225
|
-
if (orgsToSearch.length > 0) {
|
|
226
|
-
info(MODULE, `Phase 0.5: Searching issues in ${orgsToSearch.length} preferred org(s)...`);
|
|
227
|
-
const remainingNeeded = maxResults - allCandidates.length;
|
|
228
|
-
const orgRepoFilter = orgsToSearch.map((org) => `org:${org}`).join(' OR ');
|
|
229
|
-
const orgOps = orgsToSearch.length - 1;
|
|
230
|
-
try {
|
|
231
|
-
const allItems = await searchWithChunkedLabels(this.octokit, labels, orgOps, (labelQ) => `${baseQualifiers} ${labelQ} (${orgRepoFilter})`.replace(/ +/g, ' ').trim(), remainingNeeded * 3);
|
|
232
|
-
if (allItems.length > 0) {
|
|
233
|
-
const filtered = filterIssues(allItems).filter((item) => {
|
|
234
|
-
const repoFullName = item.repository_url.split('/').slice(-2).join('/');
|
|
235
|
-
return !phase0RepoSet.has(repoFullName);
|
|
236
|
-
});
|
|
237
|
-
const { candidates: orgCandidates, allFailed: allVetFailed, rateLimitHit, } = await this.vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, 'preferred_org');
|
|
238
|
-
allCandidates.push(...orgCandidates);
|
|
239
|
-
if (allVetFailed) {
|
|
240
|
-
phase0_5Error = 'All preferred org issue vetting failed';
|
|
241
|
-
}
|
|
242
|
-
if (rateLimitHit) {
|
|
243
|
-
rateLimitHitDuringSearch = true;
|
|
244
|
-
}
|
|
245
|
-
info(MODULE, `Found ${orgCandidates.length} candidates from preferred orgs`);
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
catch (error) {
|
|
249
|
-
const errMsg = errorMessage(error);
|
|
250
|
-
phase0_5Error = errMsg;
|
|
251
|
-
if (isRateLimitError(error)) {
|
|
252
|
-
rateLimitHitDuringSearch = true;
|
|
253
|
-
}
|
|
254
|
-
warn(MODULE, `Error searching preferred orgs: ${errMsg}`);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
strategiesUsed.push('orgs');
|
|
258
|
-
}
|
|
259
|
-
// Phase 1: Search starred repos (filter out already-searched Phase 0 repos)
|
|
260
|
-
// Skip if budget is critical
|
|
261
|
-
if (allCandidates.length < maxResults && starredRepos.length > 0 && searchBudget >= CRITICAL_BUDGET_THRESHOLD && enabledStrategies.has('starred')) {
|
|
344
|
+
// Phase 1: Starred repos
|
|
345
|
+
if (allCandidates.length < maxResults &&
|
|
346
|
+
starredRepos.length > 0 &&
|
|
347
|
+
searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
|
|
348
|
+
enabledStrategies.has("starred")) {
|
|
262
349
|
await sleep(INTER_PHASE_DELAY_MS);
|
|
263
350
|
const reposToSearch = starredRepos.filter((r) => !phase0RepoSet.has(r));
|
|
264
351
|
if (reposToSearch.length > 0) {
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
// from ~12 to ~4.
|
|
272
|
-
const phase1Labels = labels.slice(0, 3);
|
|
273
|
-
const { candidates: starredCandidates, allBatchesFailed, rateLimitHit, } = await searchInRepos(this.octokit, this.vetter, reposToSearch.slice(0, 10), baseQualifiers, phase1Labels, remainingNeeded, 'starred', filterIssues);
|
|
274
|
-
allCandidates.push(...starredCandidates);
|
|
275
|
-
if (allBatchesFailed) {
|
|
276
|
-
phase1Error = 'All starred repo batches failed';
|
|
277
|
-
}
|
|
278
|
-
if (rateLimitHit) {
|
|
352
|
+
const remaining = maxResults - allCandidates.length;
|
|
353
|
+
if (remaining > 0) {
|
|
354
|
+
const result = await runPhase1(this.octokit, this.vetter, reposToSearch, baseQualifiers, labels, remaining, filterIssues);
|
|
355
|
+
allCandidates.push(...result.candidates);
|
|
356
|
+
phaseErrors["1"] = result.error;
|
|
357
|
+
if (result.rateLimitHit)
|
|
279
358
|
rateLimitHitDuringSearch = true;
|
|
280
|
-
}
|
|
281
|
-
info(MODULE, `Found ${starredCandidates.length} candidates from starred repos`);
|
|
282
359
|
}
|
|
283
360
|
}
|
|
284
|
-
strategiesUsed.push(
|
|
361
|
+
strategiesUsed.push("starred");
|
|
285
362
|
}
|
|
286
|
-
// Phase 2: General search
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
// beginner results.
|
|
291
|
-
let phase2Error = null;
|
|
292
|
-
if (allCandidates.length < maxResults && searchBudget >= LOW_BUDGET_THRESHOLD && enabledStrategies.has('broad')) {
|
|
363
|
+
// Phase 2: General search
|
|
364
|
+
if (allCandidates.length < maxResults &&
|
|
365
|
+
searchBudget >= LOW_BUDGET_THRESHOLD &&
|
|
366
|
+
enabledStrategies.has("broad")) {
|
|
293
367
|
await sleep(INTER_PHASE_DELAY_MS);
|
|
294
|
-
|
|
295
|
-
const
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
const scopeLabels = SCOPE_LABELS[scope] ?? [];
|
|
302
|
-
if (scopeLabels.length === 0) {
|
|
303
|
-
warn(MODULE, `Scope "${scope}" has no labels, skipping tier`);
|
|
304
|
-
continue;
|
|
305
|
-
}
|
|
306
|
-
tierLabelGroups.push({ tier: scope, tierLabels: scopeLabels });
|
|
307
|
-
}
|
|
308
|
-
// Custom labels not in any tier get their own pseudo-tier
|
|
309
|
-
const allScopeLabels = new Set(scopes.flatMap((s) => SCOPE_LABELS[s] ?? []));
|
|
310
|
-
const customOnly = config.labels.filter((l) => !allScopeLabels.has(l));
|
|
311
|
-
if (customOnly.length > 0) {
|
|
312
|
-
tierLabelGroups.push({ tier: 'custom', tierLabels: customOnly });
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
else {
|
|
316
|
-
tierLabelGroups.push({ tier: 'general', tierLabels: labels });
|
|
317
|
-
}
|
|
318
|
-
const budgetPerTier = Math.ceil(remainingNeeded / tierLabelGroups.length);
|
|
319
|
-
const tierResults = [];
|
|
320
|
-
for (const { tier, tierLabels } of tierLabelGroups) {
|
|
321
|
-
try {
|
|
322
|
-
const allItems = await searchWithChunkedLabels(this.octokit, tierLabels, 0, // no repo/org ORs in Phase 2
|
|
323
|
-
(labelQ) => `${baseQualifiers} ${labelQ}`.replace(/ +/g, ' ').trim(), budgetPerTier * 3);
|
|
324
|
-
info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
|
|
325
|
-
const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(this.vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
|
|
326
|
-
tierResults.push(tierCandidates);
|
|
327
|
-
// Update seenRepos so later tiers don't return duplicate repos
|
|
328
|
-
for (const c of tierCandidates)
|
|
329
|
-
seenRepos.add(c.issue.repo);
|
|
330
|
-
if (allVetFailed) {
|
|
331
|
-
phase2Error = (phase2Error ? phase2Error + '; ' : '') + `${tier}: all vetting failed`;
|
|
332
|
-
}
|
|
333
|
-
if (vetRateLimitHit) {
|
|
334
|
-
rateLimitHitDuringSearch = true;
|
|
335
|
-
}
|
|
336
|
-
info(MODULE, `Found ${tierCandidates.length} candidates from ${tier} tier`);
|
|
337
|
-
}
|
|
338
|
-
catch (error) {
|
|
339
|
-
if (getHttpStatusCode(error) === 401)
|
|
340
|
-
throw error;
|
|
341
|
-
const errMsg = errorMessage(error);
|
|
342
|
-
phase2Error = (phase2Error ? phase2Error + '; ' : '') + `${tier}: ${errMsg}`;
|
|
343
|
-
if (isRateLimitError(error)) {
|
|
344
|
-
rateLimitHitDuringSearch = true;
|
|
345
|
-
}
|
|
346
|
-
warn(MODULE, `Error in ${tier} tier search: ${errMsg}`);
|
|
347
|
-
tierResults.push([]);
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
const interleaved = interleaveArrays(tierResults);
|
|
351
|
-
if (interleaved.length === 0 && phase2Error) {
|
|
352
|
-
warn(MODULE, `All ${tierLabelGroups.length} scope tiers failed in Phase 2: ${phase2Error}`);
|
|
353
|
-
}
|
|
354
|
-
allCandidates.push(...interleaved.slice(0, remainingNeeded));
|
|
355
|
-
strategiesUsed.push('broad');
|
|
368
|
+
const remaining = maxResults - allCandidates.length;
|
|
369
|
+
const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, baseQualifiers, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
|
|
370
|
+
allCandidates.push(...result.candidates);
|
|
371
|
+
phaseErrors["2"] = result.error;
|
|
372
|
+
if (result.rateLimitHit)
|
|
373
|
+
rateLimitHitDuringSearch = true;
|
|
374
|
+
strategiesUsed.push("broad");
|
|
356
375
|
}
|
|
357
376
|
// Phase 3: Actively maintained repos
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
377
|
+
if (allCandidates.length < maxResults &&
|
|
378
|
+
searchBudget >= LOW_BUDGET_THRESHOLD &&
|
|
379
|
+
enabledStrategies.has("maintained")) {
|
|
361
380
|
await sleep(INTER_PHASE_DELAY_MS);
|
|
362
|
-
|
|
363
|
-
const
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
const phase3Query = `is:issue is:open no:assignee ${langQuery} ${topicQuery} stars:>=${minStars} pushed:>=${pushedSince} archived:false`
|
|
370
|
-
.replace(/ +/g, ' ')
|
|
371
|
-
.trim();
|
|
372
|
-
try {
|
|
373
|
-
const data = await cachedSearchIssues(this.octokit, {
|
|
374
|
-
q: phase3Query,
|
|
375
|
-
sort: 'updated',
|
|
376
|
-
order: 'desc',
|
|
377
|
-
per_page: remainingNeeded * 3,
|
|
378
|
-
});
|
|
379
|
-
info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
|
|
380
|
-
const seenRepos = new Set(allCandidates.map((c) => c.issue.repo));
|
|
381
|
-
const { candidates: starFiltered, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(this.vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], remainingNeeded, minStars, 'Phase 3');
|
|
382
|
-
allCandidates.push(...starFiltered);
|
|
383
|
-
if (allVetFailed) {
|
|
384
|
-
phase3Error = 'all vetting failed';
|
|
385
|
-
}
|
|
386
|
-
if (vetRateLimitHit) {
|
|
387
|
-
rateLimitHitDuringSearch = true;
|
|
388
|
-
}
|
|
389
|
-
info(MODULE, `Found ${starFiltered.length} candidates from maintained-repo search`);
|
|
390
|
-
}
|
|
391
|
-
catch (error) {
|
|
392
|
-
const errMsg = errorMessage(error);
|
|
393
|
-
phase3Error = errMsg;
|
|
394
|
-
if (isRateLimitError(error)) {
|
|
395
|
-
rateLimitHitDuringSearch = true;
|
|
396
|
-
}
|
|
397
|
-
warn(MODULE, `Error in maintained-repo search: ${errMsg}`);
|
|
398
|
-
}
|
|
399
|
-
strategiesUsed.push('maintained');
|
|
381
|
+
const remaining = maxResults - allCandidates.length;
|
|
382
|
+
const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
|
|
383
|
+
allCandidates.push(...result.candidates);
|
|
384
|
+
phaseErrors["3"] = result.error;
|
|
385
|
+
if (result.rateLimitHit)
|
|
386
|
+
rateLimitHitDuringSearch = true;
|
|
387
|
+
strategiesUsed.push("maintained");
|
|
400
388
|
}
|
|
401
|
-
//
|
|
389
|
+
// Build result / error summary
|
|
402
390
|
const phasesSkippedForBudget = searchBudget < LOW_BUDGET_THRESHOLD;
|
|
403
|
-
let budgetNote =
|
|
391
|
+
let budgetNote = "";
|
|
404
392
|
if (searchBudget < CRITICAL_BUDGET_THRESHOLD) {
|
|
405
393
|
budgetNote = ` Most search phases were skipped due to critically low API quota (${searchBudget} remaining).`;
|
|
406
394
|
}
|
|
@@ -408,14 +396,19 @@ export class IssueDiscovery {
|
|
|
408
396
|
budgetNote = ` Some search phases were skipped due to low API quota (${searchBudget} remaining).`;
|
|
409
397
|
}
|
|
410
398
|
if (allCandidates.length === 0) {
|
|
411
|
-
const
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
399
|
+
const errorDetails = [
|
|
400
|
+
phaseErrors["0"]
|
|
401
|
+
? `Phase 0 (merged-PR repos): ${phaseErrors["0"]}`
|
|
402
|
+
: null,
|
|
403
|
+
phaseErrors["1"]
|
|
404
|
+
? `Phase 1 (starred repos): ${phaseErrors["1"]}`
|
|
405
|
+
: null,
|
|
406
|
+
phaseErrors["2"] ? `Phase 2 (general): ${phaseErrors["2"]}` : null,
|
|
407
|
+
phaseErrors["3"]
|
|
408
|
+
? `Phase 3 (maintained repos): ${phaseErrors["3"]}`
|
|
409
|
+
: null,
|
|
417
410
|
].filter(Boolean);
|
|
418
|
-
const details =
|
|
411
|
+
const details = errorDetails.length > 0 ? ` ${errorDetails.join(". ")}.` : "";
|
|
419
412
|
if (rateLimitHitDuringSearch || phasesSkippedForBudget) {
|
|
420
413
|
this.rateLimitWarning =
|
|
421
414
|
`Search returned no results due to GitHub API rate limits.${details}${budgetNote} ` +
|
|
@@ -423,28 +416,31 @@ export class IssueDiscovery {
|
|
|
423
416
|
return { candidates: [], strategiesUsed };
|
|
424
417
|
}
|
|
425
418
|
throw new ValidationError(`No issue candidates found across all search phases.${details} ` +
|
|
426
|
-
|
|
419
|
+
"Try adjusting your search criteria (languages, labels) or check your network connection.");
|
|
427
420
|
}
|
|
428
|
-
// Surface rate limit warning even with partial results
|
|
429
421
|
if (rateLimitHitDuringSearch || phasesSkippedForBudget) {
|
|
430
422
|
this.rateLimitWarning =
|
|
431
423
|
`Search results may be incomplete: GitHub API rate limits were hit during search.${budgetNote} ` +
|
|
432
|
-
`Found ${allCandidates.length} candidate${allCandidates.length === 1 ?
|
|
424
|
+
`Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
|
|
433
425
|
`Try again after the rate limit resets for complete results.`;
|
|
434
426
|
}
|
|
435
|
-
// Sort by priority
|
|
427
|
+
// Sort by priority, recommendation, then viability score
|
|
436
428
|
allCandidates.sort((a, b) => {
|
|
437
|
-
const priorityOrder = {
|
|
429
|
+
const priorityOrder = {
|
|
430
|
+
merged_pr: 0,
|
|
431
|
+
starred: 1,
|
|
432
|
+
normal: 2,
|
|
433
|
+
};
|
|
438
434
|
const priorityDiff = priorityOrder[a.searchPriority] - priorityOrder[b.searchPriority];
|
|
439
435
|
if (priorityDiff !== 0)
|
|
440
436
|
return priorityDiff;
|
|
441
437
|
const recommendationOrder = { approve: 0, needs_review: 1, skip: 2 };
|
|
442
|
-
const recDiff = recommendationOrder[a.recommendation] -
|
|
438
|
+
const recDiff = recommendationOrder[a.recommendation] -
|
|
439
|
+
recommendationOrder[b.recommendation];
|
|
443
440
|
if (recDiff !== 0)
|
|
444
441
|
return recDiff;
|
|
445
442
|
return b.viabilityScore - a.viabilityScore;
|
|
446
443
|
});
|
|
447
|
-
// Apply per-repo cap: max 2 issues from any single repo
|
|
448
444
|
const capped = applyPerRepoCap(allCandidates, 2);
|
|
449
445
|
info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${capped.length} candidates returned`);
|
|
450
446
|
return { candidates: capped.slice(0, maxResults), strategiesUsed };
|
|
@@ -464,9 +460,6 @@ export class IssueDiscovery {
|
|
|
464
460
|
*/
|
|
465
461
|
deriveLowScoringRepos(threshold) {
|
|
466
462
|
const lowScoring = [];
|
|
467
|
-
// The ScoutStateReader doesn't expose a bulk "get all repos with scores" method,
|
|
468
|
-
// so we rely on the mergedPRRepos + starredRepos as the universe of known repos
|
|
469
|
-
// and check each one's score. Repos not in state simply return null (no penalty).
|
|
470
463
|
const knownRepos = new Set([
|
|
471
464
|
...this.stateReader.getReposWithMergedPRs(),
|
|
472
465
|
...this.stateReader.getStarredRepos(),
|