@oss-scout/core 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/cli.bundle.cjs +42 -42
  2. package/dist/cli.js +110 -86
  3. package/dist/commands/config.d.ts +1 -1
  4. package/dist/commands/config.js +76 -72
  5. package/dist/commands/results.d.ts +1 -1
  6. package/dist/commands/results.js +1 -1
  7. package/dist/commands/search.d.ts +2 -2
  8. package/dist/commands/search.js +16 -6
  9. package/dist/commands/setup.d.ts +1 -1
  10. package/dist/commands/setup.js +27 -21
  11. package/dist/commands/validation.d.ts +1 -1
  12. package/dist/commands/validation.js +1 -1
  13. package/dist/commands/vet-list.d.ts +2 -2
  14. package/dist/commands/vet-list.js +12 -5
  15. package/dist/commands/vet.d.ts +3 -3
  16. package/dist/commands/vet.js +9 -5
  17. package/dist/core/bootstrap.d.ts +1 -1
  18. package/dist/core/bootstrap.js +20 -16
  19. package/dist/core/category-mapping.d.ts +1 -1
  20. package/dist/core/category-mapping.js +104 -13
  21. package/dist/core/errors.d.ts +8 -1
  22. package/dist/core/errors.js +31 -19
  23. package/dist/core/gist-state-store.d.ts +1 -1
  24. package/dist/core/gist-state-store.js +36 -27
  25. package/dist/core/github.d.ts +1 -1
  26. package/dist/core/github.js +5 -5
  27. package/dist/core/http-cache.js +26 -22
  28. package/dist/core/issue-discovery.d.ts +3 -3
  29. package/dist/core/issue-discovery.js +325 -277
  30. package/dist/core/issue-eligibility.d.ts +2 -2
  31. package/dist/core/issue-eligibility.js +26 -21
  32. package/dist/core/issue-filtering.js +23 -15
  33. package/dist/core/issue-scoring.js +1 -1
  34. package/dist/core/issue-vetting.d.ts +2 -2
  35. package/dist/core/issue-vetting.js +66 -53
  36. package/dist/core/local-state.d.ts +1 -1
  37. package/dist/core/local-state.js +16 -14
  38. package/dist/core/repo-health.d.ts +2 -2
  39. package/dist/core/repo-health.js +46 -35
  40. package/dist/core/schemas.d.ts +1 -1
  41. package/dist/core/schemas.js +40 -18
  42. package/dist/core/search-budget.js +3 -3
  43. package/dist/core/search-phases.d.ts +6 -6
  44. package/dist/core/search-phases.js +23 -19
  45. package/dist/core/types.d.ts +9 -9
  46. package/dist/core/types.js +15 -3
  47. package/dist/core/utils.d.ts +10 -1
  48. package/dist/core/utils.js +44 -25
  49. package/dist/formatters/json.d.ts +1 -1
  50. package/dist/index.d.ts +7 -7
  51. package/dist/index.js +5 -5
  52. package/dist/scout.d.ts +4 -5
  53. package/dist/scout.js +72 -31
  54. package/package.json +1 -1
@@ -11,24 +11,218 @@
11
11
  *
12
12
  * All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
13
13
  */
14
- import { getOctokit, checkRateLimit } from './github.js';
15
- import { getSearchBudgetTracker } from './search-budget.js';
16
- import { daysBetween, sleep } from './utils.js';
17
- import { SCOPE_LABELS } from './types.js';
18
- import { CONCRETE_STRATEGIES } from './schemas.js';
19
- import { ValidationError, errorMessage, getHttpStatusCode, isRateLimitError } from './errors.js';
20
- import { debug, info, warn } from './logger.js';
21
- import { isDocOnlyIssue, applyPerRepoCap } from './issue-filtering.js';
22
- import { IssueVetter } from './issue-vetting.js';
23
- import { getTopicsForCategories } from './category-mapping.js';
24
- import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, filterVetAndScore, searchInRepos, searchWithChunkedLabels, } from './search-phases.js';
25
- const MODULE = 'issue-discovery';
14
+ import { getOctokit, checkRateLimit } from "./github.js";
15
+ import { getSearchBudgetTracker } from "./search-budget.js";
16
+ import { daysBetween, extractRepoFromUrl, sleep } from "./utils.js";
17
+ import { SCOPE_LABELS, } from "./types.js";
18
+ import { CONCRETE_STRATEGIES } from "./schemas.js";
19
+ import { ValidationError, errorMessage, getHttpStatusCode, isRateLimitError, } from "./errors.js";
20
+ import { debug, info, warn } from "./logger.js";
21
+ import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
22
+ import { IssueVetter } from "./issue-vetting.js";
23
+ import { getTopicsForCategories } from "./category-mapping.js";
24
+ import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, filterVetAndScore, searchInRepos, searchWithChunkedLabels, } from "./search-phases.js";
25
+ const MODULE = "issue-discovery";
26
26
  /** Delay between major search phases to let GitHub's rate limit window cool down. */
27
27
  const INTER_PHASE_DELAY_MS = 2000;
28
28
  /** If remaining search quota is below this, skip heavy phases (2, 3). */
29
29
  const LOW_BUDGET_THRESHOLD = 20;
30
30
  /** If remaining search quota is below this, only run Phase 0. */
31
31
  const CRITICAL_BUDGET_THRESHOLD = 10;
32
+ /** Build a reusable filter function from config. */
33
+ function buildIssueFilter(config) {
34
+ return (items) => {
35
+ return items.filter((item) => {
36
+ const repoFullName = extractRepoFromUrl(item.repository_url);
37
+ if (!repoFullName)
38
+ return false;
39
+ if (config.excludedRepos.has(repoFullName))
40
+ return false;
41
+ if (config.excludeOrgs.size > 0) {
42
+ const orgName = repoFullName.split("/")[0]?.toLowerCase();
43
+ if (orgName && config.excludeOrgs.has(orgName))
44
+ return false;
45
+ }
46
+ if (config.aiBlocklisted.has(repoFullName))
47
+ return false;
48
+ if (config.lowScoringRepos.has(repoFullName))
49
+ return false;
50
+ const updatedAt = new Date(item.updated_at);
51
+ const ageDays = daysBetween(updatedAt, config.now);
52
+ if (ageDays > config.maxAgeDays)
53
+ return false;
54
+ if (!config.includeDocIssues && isDocOnlyIssue(item))
55
+ return false;
56
+ return true;
57
+ });
58
+ };
59
+ }
60
+ /** Phase 0: Search repos where user has merged PRs (highest merge probability). */
61
+ async function runPhase0(octokit, vetter, repos, baseQualifiers, maxResults, filterIssues) {
62
+ info(MODULE, `Phase 0: Searching issues in ${repos.length} merged-PR repos (no label filter)...`);
63
+ const { candidates, allBatchesFailed, rateLimitHit } = await searchInRepos(octokit, vetter, repos, baseQualifiers, [], maxResults, "merged_pr", filterIssues);
64
+ info(MODULE, `Found ${candidates.length} candidates from merged-PR repos`);
65
+ return {
66
+ candidates,
67
+ error: allBatchesFailed ? "All merged-PR repo batches failed" : null,
68
+ rateLimitHit,
69
+ };
70
+ }
71
+ /** Phase 0.5: Search preferred organizations. */
72
+ async function runPhase05(octokit, vetter, orgsToSearch, baseQualifiers, labels, maxResults, phase0RepoSet, filterIssues) {
73
+ info(MODULE, `Phase 0.5: Searching issues in ${orgsToSearch.length} preferred org(s)...`);
74
+ const orgRepoFilter = orgsToSearch.map((org) => `org:${org}`).join(" OR ");
75
+ const orgOps = orgsToSearch.length - 1;
76
+ try {
77
+ const allItems = await searchWithChunkedLabels(octokit, labels, orgOps, (labelQ) => `${baseQualifiers} ${labelQ} (${orgRepoFilter})`
78
+ .replace(/ +/g, " ")
79
+ .trim(), maxResults * 3);
80
+ if (allItems.length === 0) {
81
+ return { candidates: [], error: null, rateLimitHit: false };
82
+ }
83
+ const filtered = filterIssues(allItems).filter((item) => {
84
+ const repoFullName = extractRepoFromUrl(item.repository_url);
85
+ if (!repoFullName)
86
+ return false;
87
+ return !phase0RepoSet.has(repoFullName);
88
+ });
89
+ const { candidates, allFailed: allVetFailed, rateLimitHit, } = await vetter.vetIssuesParallel(filtered.slice(0, maxResults * 2).map((i) => i.html_url), maxResults, "preferred_org");
90
+ info(MODULE, `Found ${candidates.length} candidates from preferred orgs`);
91
+ return {
92
+ candidates,
93
+ error: allVetFailed ? "All preferred org issue vetting failed" : null,
94
+ rateLimitHit,
95
+ };
96
+ }
97
+ catch (error) {
98
+ const errMsg = errorMessage(error);
99
+ warn(MODULE, `Error searching preferred orgs: ${errMsg}`);
100
+ return {
101
+ candidates: [],
102
+ error: errMsg,
103
+ rateLimitHit: isRateLimitError(error),
104
+ };
105
+ }
106
+ }
107
+ /** Phase 1: Search starred repos. */
108
+ async function runPhase1(octokit, vetter, repos, baseQualifiers, labels, maxResults, filterIssues) {
109
+ info(MODULE, `Phase 1: Searching issues in ${repos.length} starred repos...`);
110
+ // Cap labels to reduce Search API calls: starred repos already signal user
111
+ // interest, so fewer labels suffice.
112
+ const phase1Labels = labels.slice(0, 3);
113
+ const { candidates, allBatchesFailed, rateLimitHit } = await searchInRepos(octokit, vetter, repos.slice(0, 10), baseQualifiers, phase1Labels, maxResults, "starred", filterIssues);
114
+ info(MODULE, `Found ${candidates.length} candidates from starred repos`);
115
+ return {
116
+ candidates,
117
+ error: allBatchesFailed ? "All starred repo batches failed" : null,
118
+ rateLimitHit,
119
+ };
120
+ }
121
+ /** Phase 2: General label-filtered search with multi-tier interleaving. */
122
+ async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQualifiers, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
123
+ info(MODULE, "Phase 2: General issue search...");
124
+ const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
125
+ // Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
126
+ const tierLabelGroups = [];
127
+ if (scopes && scopes.length > 1) {
128
+ for (const scope of scopes) {
129
+ const scopeLabels = SCOPE_LABELS[scope] ?? [];
130
+ if (scopeLabels.length === 0) {
131
+ warn(MODULE, `Scope "${scope}" has no labels, skipping tier`);
132
+ continue;
133
+ }
134
+ tierLabelGroups.push({ tier: scope, tierLabels: scopeLabels });
135
+ }
136
+ const allScopeLabels = new Set(scopes.flatMap((s) => SCOPE_LABELS[s] ?? []));
137
+ const customOnly = configLabels.filter((l) => !allScopeLabels.has(l));
138
+ if (customOnly.length > 0) {
139
+ tierLabelGroups.push({ tier: "custom", tierLabels: customOnly });
140
+ }
141
+ }
142
+ else {
143
+ tierLabelGroups.push({ tier: "general", tierLabels: labels });
144
+ }
145
+ const budgetPerTier = Math.ceil(maxResults / tierLabelGroups.length);
146
+ const tierResults = [];
147
+ let error = null;
148
+ let rateLimitHit = false;
149
+ for (const { tier, tierLabels } of tierLabelGroups) {
150
+ try {
151
+ const allItems = await searchWithChunkedLabels(octokit, tierLabels, 0, (labelQ) => `${baseQualifiers} ${labelQ}`.replace(/ +/g, " ").trim(), budgetPerTier * 3);
152
+ info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
153
+ const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
154
+ tierResults.push(tierCandidates);
155
+ for (const c of tierCandidates)
156
+ seenRepos.add(c.issue.repo);
157
+ if (allVetFailed) {
158
+ error = (error ? error + "; " : "") + `${tier}: all vetting failed`;
159
+ }
160
+ if (vetRateLimitHit) {
161
+ rateLimitHit = true;
162
+ }
163
+ info(MODULE, `Found ${tierCandidates.length} candidates from ${tier} tier`);
164
+ }
165
+ catch (err) {
166
+ if (getHttpStatusCode(err) === 401)
167
+ throw err;
168
+ const errMsg = errorMessage(err);
169
+ error = (error ? error + "; " : "") + `${tier}: ${errMsg}`;
170
+ if (isRateLimitError(err)) {
171
+ rateLimitHit = true;
172
+ }
173
+ warn(MODULE, `Error in ${tier} tier search: ${errMsg}`);
174
+ tierResults.push([]);
175
+ }
176
+ }
177
+ const interleaved = interleaveArrays(tierResults);
178
+ if (interleaved.length === 0 && error) {
179
+ warn(MODULE, `All ${tierLabelGroups.length} scope tiers failed in Phase 2: ${error}`);
180
+ }
181
+ return {
182
+ candidates: interleaved.slice(0, maxResults),
183
+ error,
184
+ rateLimitHit,
185
+ };
186
+ }
187
+ /** Phase 3: Actively maintained repos. */
188
+ async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
189
+ info(MODULE, "Phase 3: Searching actively maintained repos...");
190
+ const thirtyDaysAgo = new Date();
191
+ thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
192
+ const pushedSince = thirtyDaysAgo.toISOString().split("T")[0];
193
+ const categoryTopics = getTopicsForCategories(projectCategories);
194
+ const topicQuery = categoryTopics.length > 0 ? `topic:${categoryTopics[0]}` : "";
195
+ const phase3Query = `is:issue is:open no:assignee ${langQuery} ${topicQuery} stars:>=${minStars} pushed:>=${pushedSince} archived:false`
196
+ .replace(/ +/g, " ")
197
+ .trim();
198
+ try {
199
+ const data = await cachedSearchIssues(octokit, {
200
+ q: phase3Query,
201
+ sort: "updated",
202
+ order: "desc",
203
+ per_page: maxResults * 3,
204
+ });
205
+ info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
206
+ const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
207
+ const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], maxResults, minStars, "Phase 3");
208
+ info(MODULE, `Found ${candidates.length} candidates from maintained-repo search`);
209
+ return {
210
+ candidates,
211
+ error: allVetFailed ? "all vetting failed" : null,
212
+ rateLimitHit: vetRateLimitHit,
213
+ };
214
+ }
215
+ catch (error) {
216
+ const errMsg = errorMessage(error);
217
+ warn(MODULE, `Error in maintained-repo search: ${errMsg}`);
218
+ return {
219
+ candidates: [],
220
+ error: errMsg,
221
+ rateLimitHit: isRateLimitError(error),
222
+ };
223
+ }
224
+ }
225
+ // ── IssueDiscovery class ─────────────────────────────────────────────
32
226
  /**
33
227
  * Multi-phase issue discovery engine that searches GitHub for contributable issues.
34
228
  *
@@ -95,29 +289,30 @@ export class IssueDiscovery {
95
289
  async searchIssues(options = {}) {
96
290
  const config = this.preferences;
97
291
  const languages = options.languages || config.languages;
98
- const scopes = config.scope; // undefined = legacy mode
99
- const labels = options.labels || (scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
292
+ const scopes = config.scope;
293
+ const labels = options.labels ||
294
+ (scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
100
295
  const maxResults = options.maxResults || 10;
101
296
  const minStars = config.minStars ?? 50;
102
- // Strategy selection: resolve which phases to run
297
+ // Strategy selection
103
298
  const ALL_STRATEGIES = CONCRETE_STRATEGIES;
104
- const rawStrategies = options.strategies ?? config.defaultStrategy ?? ['all'];
105
- const enabledStrategies = new Set(rawStrategies.includes('all') ? ALL_STRATEGIES : rawStrategies);
299
+ const rawStrategies = options.strategies ??
300
+ config.defaultStrategy ?? ["all"];
301
+ const enabledStrategies = new Set(rawStrategies.includes("all") ? ALL_STRATEGIES : rawStrategies);
106
302
  const strategiesUsed = [];
107
303
  const allCandidates = [];
108
- let phase0Error = null;
109
- let phase1Error = null;
304
+ const phaseErrors = {};
110
305
  let rateLimitHitDuringSearch = false;
111
- // Pre-flight rate limit check — also determines adaptive phase budget
306
+ // Pre-flight rate limit check
112
307
  this.rateLimitWarning = null;
113
308
  const tracker = getSearchBudgetTracker();
114
- let searchBudget = LOW_BUDGET_THRESHOLD - 1; // conservative: below threshold to skip heavy phases
309
+ let searchBudget = LOW_BUDGET_THRESHOLD - 1;
115
310
  try {
116
311
  const rateLimit = await checkRateLimit(this.githubToken);
117
312
  searchBudget = rateLimit.remaining;
118
313
  tracker.init(rateLimit.remaining, rateLimit.resetAt);
119
314
  if (rateLimit.remaining < 5) {
120
- const resetTime = new Date(rateLimit.resetAt).toLocaleTimeString('en-US', { hour12: false });
315
+ const resetTime = new Date(rateLimit.resetAt).toLocaleTimeString("en-US", { hour12: false });
121
316
  this.rateLimitWarning = `GitHub search API quota low (${rateLimit.remaining}/${rateLimit.limit} remaining, resets at ${resetTime}). Search may be slow.`;
122
317
  warn(MODULE, this.rateLimitWarning);
123
318
  }
@@ -129,278 +324,122 @@ export class IssueDiscovery {
129
324
  }
130
325
  }
131
326
  catch (error) {
132
- // Fail fast on auth errors — no point searching with a bad token
133
- if (getHttpStatusCode(error) === 401) {
327
+ if (getHttpStatusCode(error) === 401)
134
328
  throw error;
135
- }
136
- // Non-fatal: proceed with conservative budget for transient/network errors.
137
- // Initialize tracker with conservative defaults so it doesn't fly blind.
138
329
  tracker.init(CRITICAL_BUDGET_THRESHOLD, new Date(Date.now() + 60000).toISOString());
139
- warn(MODULE, 'Could not check rate limit — using conservative budget, skipping heavy phases:', errorMessage(error));
330
+ warn(MODULE, "Could not check rate limit — using conservative budget, skipping heavy phases:", errorMessage(error));
140
331
  }
141
- // Get merged-PR repos (highest merge probability)
332
+ // Derive search context
142
333
  const mergedPRRepos = this.stateReader.getReposWithMergedPRs();
143
- // Get starred repos (from local cache or state reader)
144
334
  const starredRepos = this.getStarredRepos();
145
335
  const starredRepoSet = new Set(starredRepos);
146
- // Get low-scoring repos from state reader
147
- const minRepoScoreThreshold = config.minRepoScoreThreshold;
148
- const lowScoringRepos = new Set(this.deriveLowScoringRepos(minRepoScoreThreshold));
149
- // Common filters
150
- const excludedRepos = new Set(config.excludeRepos);
151
- const excludeOrgs = new Set((config.excludeOrgs ?? []).map(o => o.toLowerCase()));
152
- const maxAgeDays = config.maxIssueAgeDays || 90;
153
- const now = new Date();
336
+ const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold));
154
337
  // Build query parts
155
- // When languages includes 'any', omit the language filter entirely
156
- const isAnyLanguage = languages.some((l) => l.toLowerCase() === 'any');
157
- const langQuery = isAnyLanguage ? '' : languages.map((l) => `language:${l}`).join(' ');
158
- // Phase 0 uses a broader query — established contributors don't need beginner labels
159
- // Phases 1+ pass labels separately to searchInRepos/searchWithChunkedLabels
160
- const baseQualifiers = `is:issue is:open ${langQuery} no:assignee`.replace(/ +/g, ' ').trim();
161
- // Helper to filter issues
162
- const includeDocIssues = config.includeDocIssues ?? true;
338
+ const isAnyLanguage = languages.some((l) => l.toLowerCase() === "any");
339
+ const langQuery = isAnyLanguage
340
+ ? ""
341
+ : languages.map((l) => `language:${l}`).join(" ");
342
+ const baseQualifiers = `is:issue is:open ${langQuery} no:assignee`
343
+ .replace(/ +/g, " ")
344
+ .trim();
345
+ // Build reusable filter
163
346
  const aiBlocklisted = new Set(config.aiPolicyBlocklist);
164
347
  if (aiBlocklisted.size > 0) {
165
- debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(', ')}`);
348
+ debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(", ")}`);
166
349
  }
167
- const filterIssues = (items) => {
168
- return items.filter((item) => {
169
- const repoFullName = item.repository_url.split('/').slice(-2).join('/');
170
- if (excludedRepos.has(repoFullName))
171
- return false;
172
- // Filter out entire orgs
173
- if (excludeOrgs.size > 0) {
174
- const orgName = repoFullName.split('/')[0]?.toLowerCase();
175
- if (orgName && excludeOrgs.has(orgName))
176
- return false;
177
- }
178
- // Filter repos with known anti-AI contribution policies
179
- if (aiBlocklisted.has(repoFullName))
180
- return false;
181
- // Filter OUT low-scoring repos
182
- if (lowScoringRepos.has(repoFullName))
183
- return false;
184
- // Filter by issue age based on updated_at
185
- const updatedAt = new Date(item.updated_at);
186
- const ageDays = daysBetween(updatedAt, now);
187
- if (ageDays > maxAgeDays)
188
- return false;
189
- // Filter out doc-only issues unless opted in
190
- if (!includeDocIssues && isDocOnlyIssue(item))
191
- return false;
192
- return true;
193
- });
194
- };
195
- // Phase 0: Search repos where user has merged PRs (highest merge probability)
350
+ const filterIssues = buildIssueFilter({
351
+ excludedRepos: new Set(config.excludeRepos),
352
+ excludeOrgs: new Set((config.excludeOrgs ?? []).map((o) => o.toLowerCase())),
353
+ aiBlocklisted,
354
+ lowScoringRepos,
355
+ maxAgeDays: config.maxIssueAgeDays || 90,
356
+ now: new Date(),
357
+ includeDocIssues: config.includeDocIssues ?? true,
358
+ });
359
+ // Phase 0: Merged-PR repos
196
360
  const phase0Repos = mergedPRRepos.slice(0, 10);
197
361
  const phase0RepoSet = new Set(phase0Repos);
198
- if (phase0Repos.length > 0 && enabledStrategies.has('merged')) {
199
- info(MODULE, `Phase 0: Searching issues in ${phase0Repos.length} merged-PR repos (no label filter)...`);
200
- const remainingNeeded = maxResults - allCandidates.length;
201
- if (remainingNeeded > 0) {
202
- const { candidates: mergedCandidates, allBatchesFailed, rateLimitHit, } = await searchInRepos(this.octokit, this.vetter, phase0Repos, baseQualifiers, [], remainingNeeded, 'merged_pr', filterIssues);
203
- allCandidates.push(...mergedCandidates);
204
- if (allBatchesFailed) {
205
- phase0Error = 'All merged-PR repo batches failed';
206
- }
207
- if (rateLimitHit) {
362
+ if (phase0Repos.length > 0 && enabledStrategies.has("merged")) {
363
+ const remaining = maxResults - allCandidates.length;
364
+ if (remaining > 0) {
365
+ const result = await runPhase0(this.octokit, this.vetter, phase0Repos, baseQualifiers, remaining, filterIssues);
366
+ allCandidates.push(...result.candidates);
367
+ phaseErrors["0"] = result.error;
368
+ if (result.rateLimitHit)
208
369
  rateLimitHitDuringSearch = true;
209
- }
210
- info(MODULE, `Found ${mergedCandidates.length} candidates from merged-PR repos`);
211
370
  }
212
- strategiesUsed.push('merged');
371
+ strategiesUsed.push("merged");
213
372
  }
214
- // Phase 0.5: Search preferred organizations (explicit user preference)
215
- // Skip if budget is critical — Phase 0 results are sufficient
216
- let phase0_5Error = null;
373
+ // Phase 0.5: Preferred organizations
217
374
  const preferredOrgs = config.preferredOrgs ?? [];
218
- if (allCandidates.length < maxResults && preferredOrgs.length > 0 && searchBudget >= CRITICAL_BUDGET_THRESHOLD && enabledStrategies.has('orgs')) {
219
- // Inter-phase delay to let GitHub's rate limit window cool down
375
+ if (allCandidates.length < maxResults &&
376
+ preferredOrgs.length > 0 &&
377
+ searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
378
+ enabledStrategies.has("orgs")) {
220
379
  if (phase0Repos.length > 0)
221
380
  await sleep(INTER_PHASE_DELAY_MS);
222
- // Filter out orgs already covered by Phase 0 repos
223
- const phase0Orgs = new Set(phase0Repos.map((r) => r.split('/')[0]?.toLowerCase()));
224
- const orgsToSearch = preferredOrgs.filter((org) => !phase0Orgs.has(org.toLowerCase())).slice(0, 5);
381
+ const phase0Orgs = new Set(phase0Repos.map((r) => r.split("/")[0]?.toLowerCase()));
382
+ const orgsToSearch = preferredOrgs
383
+ .filter((org) => !phase0Orgs.has(org.toLowerCase()))
384
+ .slice(0, 5);
225
385
  if (orgsToSearch.length > 0) {
226
- info(MODULE, `Phase 0.5: Searching issues in ${orgsToSearch.length} preferred org(s)...`);
227
- const remainingNeeded = maxResults - allCandidates.length;
228
- const orgRepoFilter = orgsToSearch.map((org) => `org:${org}`).join(' OR ');
229
- const orgOps = orgsToSearch.length - 1;
230
- try {
231
- const allItems = await searchWithChunkedLabels(this.octokit, labels, orgOps, (labelQ) => `${baseQualifiers} ${labelQ} (${orgRepoFilter})`.replace(/ +/g, ' ').trim(), remainingNeeded * 3);
232
- if (allItems.length > 0) {
233
- const filtered = filterIssues(allItems).filter((item) => {
234
- const repoFullName = item.repository_url.split('/').slice(-2).join('/');
235
- return !phase0RepoSet.has(repoFullName);
236
- });
237
- const { candidates: orgCandidates, allFailed: allVetFailed, rateLimitHit, } = await this.vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, 'preferred_org');
238
- allCandidates.push(...orgCandidates);
239
- if (allVetFailed) {
240
- phase0_5Error = 'All preferred org issue vetting failed';
241
- }
242
- if (rateLimitHit) {
243
- rateLimitHitDuringSearch = true;
244
- }
245
- info(MODULE, `Found ${orgCandidates.length} candidates from preferred orgs`);
246
- }
247
- }
248
- catch (error) {
249
- const errMsg = errorMessage(error);
250
- phase0_5Error = errMsg;
251
- if (isRateLimitError(error)) {
252
- rateLimitHitDuringSearch = true;
253
- }
254
- warn(MODULE, `Error searching preferred orgs: ${errMsg}`);
255
- }
386
+ const remaining = maxResults - allCandidates.length;
387
+ const result = await runPhase05(this.octokit, this.vetter, orgsToSearch, baseQualifiers, labels, remaining, phase0RepoSet, filterIssues);
388
+ allCandidates.push(...result.candidates);
389
+ phaseErrors["0.5"] = result.error;
390
+ if (result.rateLimitHit)
391
+ rateLimitHitDuringSearch = true;
256
392
  }
257
- strategiesUsed.push('orgs');
393
+ strategiesUsed.push("orgs");
258
394
  }
259
- // Phase 1: Search starred repos (filter out already-searched Phase 0 repos)
260
- // Skip if budget is critical
261
- if (allCandidates.length < maxResults && starredRepos.length > 0 && searchBudget >= CRITICAL_BUDGET_THRESHOLD && enabledStrategies.has('starred')) {
395
+ // Phase 1: Starred repos
396
+ if (allCandidates.length < maxResults &&
397
+ starredRepos.length > 0 &&
398
+ searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
399
+ enabledStrategies.has("starred")) {
262
400
  await sleep(INTER_PHASE_DELAY_MS);
263
401
  const reposToSearch = starredRepos.filter((r) => !phase0RepoSet.has(r));
264
402
  if (reposToSearch.length > 0) {
265
- info(MODULE, `Phase 1: Searching issues in ${reposToSearch.length} starred repos...`);
266
- const remainingNeeded = maxResults - allCandidates.length;
267
- if (remainingNeeded > 0) {
268
- // Cap labels to reduce Search API calls: starred repos already signal user
269
- // interest, so fewer labels suffice. With 3 labels and batch size 3 (2 repo ORs),
270
- // each batch fits in a single label chunk instead of 3+, cutting Phase 1 calls
271
- // from ~12 to ~4.
272
- const phase1Labels = labels.slice(0, 3);
273
- const { candidates: starredCandidates, allBatchesFailed, rateLimitHit, } = await searchInRepos(this.octokit, this.vetter, reposToSearch.slice(0, 10), baseQualifiers, phase1Labels, remainingNeeded, 'starred', filterIssues);
274
- allCandidates.push(...starredCandidates);
275
- if (allBatchesFailed) {
276
- phase1Error = 'All starred repo batches failed';
277
- }
278
- if (rateLimitHit) {
403
+ const remaining = maxResults - allCandidates.length;
404
+ if (remaining > 0) {
405
+ const result = await runPhase1(this.octokit, this.vetter, reposToSearch, baseQualifiers, labels, remaining, filterIssues);
406
+ allCandidates.push(...result.candidates);
407
+ phaseErrors["1"] = result.error;
408
+ if (result.rateLimitHit)
279
409
  rateLimitHitDuringSearch = true;
280
- }
281
- info(MODULE, `Found ${starredCandidates.length} candidates from starred repos`);
282
410
  }
283
411
  }
284
- strategiesUsed.push('starred');
412
+ strategiesUsed.push("starred");
285
413
  }
286
- // Phase 2: General search (if still need more)
287
- // Skip if budget is low — Phases 0, 0.5, 1 are cheaper and higher-value
288
- // When multiple scope tiers are active, fire one query per tier and interleave
289
- // results to prevent high-volume tiers (e.g., "enhancement") from drowning out
290
- // beginner results.
291
- let phase2Error = null;
292
- if (allCandidates.length < maxResults && searchBudget >= LOW_BUDGET_THRESHOLD && enabledStrategies.has('broad')) {
414
+ // Phase 2: General search
415
+ if (allCandidates.length < maxResults &&
416
+ searchBudget >= LOW_BUDGET_THRESHOLD &&
417
+ enabledStrategies.has("broad")) {
293
418
  await sleep(INTER_PHASE_DELAY_MS);
294
- info(MODULE, 'Phase 2: General issue search...');
295
- const remainingNeeded = maxResults - allCandidates.length;
296
- const seenRepos = new Set(allCandidates.map((c) => c.issue.repo));
297
- // Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
298
- const tierLabelGroups = [];
299
- if (scopes && scopes.length > 1) {
300
- for (const scope of scopes) {
301
- const scopeLabels = SCOPE_LABELS[scope] ?? [];
302
- if (scopeLabels.length === 0) {
303
- warn(MODULE, `Scope "${scope}" has no labels, skipping tier`);
304
- continue;
305
- }
306
- tierLabelGroups.push({ tier: scope, tierLabels: scopeLabels });
307
- }
308
- // Custom labels not in any tier get their own pseudo-tier
309
- const allScopeLabels = new Set(scopes.flatMap((s) => SCOPE_LABELS[s] ?? []));
310
- const customOnly = config.labels.filter((l) => !allScopeLabels.has(l));
311
- if (customOnly.length > 0) {
312
- tierLabelGroups.push({ tier: 'custom', tierLabels: customOnly });
313
- }
314
- }
315
- else {
316
- tierLabelGroups.push({ tier: 'general', tierLabels: labels });
317
- }
318
- const budgetPerTier = Math.ceil(remainingNeeded / tierLabelGroups.length);
319
- const tierResults = [];
320
- for (const { tier, tierLabels } of tierLabelGroups) {
321
- try {
322
- const allItems = await searchWithChunkedLabels(this.octokit, tierLabels, 0, // no repo/org ORs in Phase 2
323
- (labelQ) => `${baseQualifiers} ${labelQ}`.replace(/ +/g, ' ').trim(), budgetPerTier * 3);
324
- info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
325
- const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(this.vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
326
- tierResults.push(tierCandidates);
327
- // Update seenRepos so later tiers don't return duplicate repos
328
- for (const c of tierCandidates)
329
- seenRepos.add(c.issue.repo);
330
- if (allVetFailed) {
331
- phase2Error = (phase2Error ? phase2Error + '; ' : '') + `${tier}: all vetting failed`;
332
- }
333
- if (vetRateLimitHit) {
334
- rateLimitHitDuringSearch = true;
335
- }
336
- info(MODULE, `Found ${tierCandidates.length} candidates from ${tier} tier`);
337
- }
338
- catch (error) {
339
- if (getHttpStatusCode(error) === 401)
340
- throw error;
341
- const errMsg = errorMessage(error);
342
- phase2Error = (phase2Error ? phase2Error + '; ' : '') + `${tier}: ${errMsg}`;
343
- if (isRateLimitError(error)) {
344
- rateLimitHitDuringSearch = true;
345
- }
346
- warn(MODULE, `Error in ${tier} tier search: ${errMsg}`);
347
- tierResults.push([]);
348
- }
349
- }
350
- const interleaved = interleaveArrays(tierResults);
351
- if (interleaved.length === 0 && phase2Error) {
352
- warn(MODULE, `All ${tierLabelGroups.length} scope tiers failed in Phase 2: ${phase2Error}`);
353
- }
354
- allCandidates.push(...interleaved.slice(0, remainingNeeded));
355
- strategiesUsed.push('broad');
419
+ const remaining = maxResults - allCandidates.length;
420
+ const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, baseQualifiers, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
421
+ allCandidates.push(...result.candidates);
422
+ phaseErrors["2"] = result.error;
423
+ if (result.rateLimitHit)
424
+ rateLimitHitDuringSearch = true;
425
+ strategiesUsed.push("broad");
356
426
  }
357
427
  // Phase 3: Actively maintained repos
358
- // Skip if budget is low — this phase is API-heavy with broad queries
359
- let phase3Error = null;
360
- if (allCandidates.length < maxResults && searchBudget >= LOW_BUDGET_THRESHOLD && enabledStrategies.has('maintained')) {
428
+ if (allCandidates.length < maxResults &&
429
+ searchBudget >= LOW_BUDGET_THRESHOLD &&
430
+ enabledStrategies.has("maintained")) {
361
431
  await sleep(INTER_PHASE_DELAY_MS);
362
- info(MODULE, 'Phase 3: Searching actively maintained repos...');
363
- const remainingNeeded = maxResults - allCandidates.length;
364
- const thirtyDaysAgo = new Date();
365
- thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
366
- const pushedSince = thirtyDaysAgo.toISOString().split('T')[0];
367
- const categoryTopics = getTopicsForCategories(config.projectCategories ?? []);
368
- const topicQuery = categoryTopics.length > 0 ? `topic:${categoryTopics[0]}` : '';
369
- const phase3Query = `is:issue is:open no:assignee ${langQuery} ${topicQuery} stars:>=${minStars} pushed:>=${pushedSince} archived:false`
370
- .replace(/ +/g, ' ')
371
- .trim();
372
- try {
373
- const data = await cachedSearchIssues(this.octokit, {
374
- q: phase3Query,
375
- sort: 'updated',
376
- order: 'desc',
377
- per_page: remainingNeeded * 3,
378
- });
379
- info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
380
- const seenRepos = new Set(allCandidates.map((c) => c.issue.repo));
381
- const { candidates: starFiltered, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(this.vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], remainingNeeded, minStars, 'Phase 3');
382
- allCandidates.push(...starFiltered);
383
- if (allVetFailed) {
384
- phase3Error = 'all vetting failed';
385
- }
386
- if (vetRateLimitHit) {
387
- rateLimitHitDuringSearch = true;
388
- }
389
- info(MODULE, `Found ${starFiltered.length} candidates from maintained-repo search`);
390
- }
391
- catch (error) {
392
- const errMsg = errorMessage(error);
393
- phase3Error = errMsg;
394
- if (isRateLimitError(error)) {
395
- rateLimitHitDuringSearch = true;
396
- }
397
- warn(MODULE, `Error in maintained-repo search: ${errMsg}`);
398
- }
399
- strategiesUsed.push('maintained');
432
+ const remaining = maxResults - allCandidates.length;
433
+ const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
434
+ allCandidates.push(...result.candidates);
435
+ phaseErrors["3"] = result.error;
436
+ if (result.rateLimitHit)
437
+ rateLimitHitDuringSearch = true;
438
+ strategiesUsed.push("maintained");
400
439
  }
401
- // Determine if phases were skipped due to budget constraints
440
+ // Build result / error summary
402
441
  const phasesSkippedForBudget = searchBudget < LOW_BUDGET_THRESHOLD;
403
- let budgetNote = '';
442
+ let budgetNote = "";
404
443
  if (searchBudget < CRITICAL_BUDGET_THRESHOLD) {
405
444
  budgetNote = ` Most search phases were skipped due to critically low API quota (${searchBudget} remaining).`;
406
445
  }
@@ -408,14 +447,22 @@ export class IssueDiscovery {
408
447
  budgetNote = ` Some search phases were skipped due to low API quota (${searchBudget} remaining).`;
409
448
  }
410
449
  if (allCandidates.length === 0) {
411
- const phaseErrors = [
412
- phase0Error ? `Phase 0 (merged-PR repos): ${phase0Error}` : null,
413
- phase0_5Error ? `Phase 0.5 (preferred orgs): ${phase0_5Error}` : null,
414
- phase1Error ? `Phase 1 (starred repos): ${phase1Error}` : null,
415
- phase2Error ? `Phase 2 (general): ${phase2Error}` : null,
416
- phase3Error ? `Phase 3 (maintained repos): ${phase3Error}` : null,
450
+ const errorDetails = [
451
+ phaseErrors["0"]
452
+ ? `Phase 0 (merged-PR repos): ${phaseErrors["0"]}`
453
+ : null,
454
+ phaseErrors["0.5"]
455
+ ? `Phase 0.5 (preferred orgs): ${phaseErrors["0.5"]}`
456
+ : null,
457
+ phaseErrors["1"]
458
+ ? `Phase 1 (starred repos): ${phaseErrors["1"]}`
459
+ : null,
460
+ phaseErrors["2"] ? `Phase 2 (general): ${phaseErrors["2"]}` : null,
461
+ phaseErrors["3"]
462
+ ? `Phase 3 (maintained repos): ${phaseErrors["3"]}`
463
+ : null,
417
464
  ].filter(Boolean);
418
- const details = phaseErrors.length > 0 ? ` ${phaseErrors.join('. ')}.` : '';
465
+ const details = errorDetails.length > 0 ? ` ${errorDetails.join(". ")}.` : "";
419
466
  if (rateLimitHitDuringSearch || phasesSkippedForBudget) {
420
467
  this.rateLimitWarning =
421
468
  `Search returned no results due to GitHub API rate limits.${details}${budgetNote} ` +
@@ -423,28 +470,32 @@ export class IssueDiscovery {
423
470
  return { candidates: [], strategiesUsed };
424
471
  }
425
472
  throw new ValidationError(`No issue candidates found across all search phases.${details} ` +
426
- 'Try adjusting your search criteria (languages, labels) or check your network connection.');
473
+ "Try adjusting your search criteria (languages, labels) or check your network connection.");
427
474
  }
428
- // Surface rate limit warning even with partial results
429
475
  if (rateLimitHitDuringSearch || phasesSkippedForBudget) {
430
476
  this.rateLimitWarning =
431
477
  `Search results may be incomplete: GitHub API rate limits were hit during search.${budgetNote} ` +
432
- `Found ${allCandidates.length} candidate${allCandidates.length === 1 ? '' : 's'} but some search phases were limited. ` +
478
+ `Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
433
479
  `Try again after the rate limit resets for complete results.`;
434
480
  }
435
- // Sort by priority first, then by recommendation, then by viability score
481
+ // Sort by priority, recommendation, then viability score
436
482
  allCandidates.sort((a, b) => {
437
- const priorityOrder = { merged_pr: 0, preferred_org: 1, starred: 2, normal: 3 };
483
+ const priorityOrder = {
484
+ merged_pr: 0,
485
+ preferred_org: 1,
486
+ starred: 2,
487
+ normal: 3,
488
+ };
438
489
  const priorityDiff = priorityOrder[a.searchPriority] - priorityOrder[b.searchPriority];
439
490
  if (priorityDiff !== 0)
440
491
  return priorityDiff;
441
492
  const recommendationOrder = { approve: 0, needs_review: 1, skip: 2 };
442
- const recDiff = recommendationOrder[a.recommendation] - recommendationOrder[b.recommendation];
493
+ const recDiff = recommendationOrder[a.recommendation] -
494
+ recommendationOrder[b.recommendation];
443
495
  if (recDiff !== 0)
444
496
  return recDiff;
445
497
  return b.viabilityScore - a.viabilityScore;
446
498
  });
447
- // Apply per-repo cap: max 2 issues from any single repo
448
499
  const capped = applyPerRepoCap(allCandidates, 2);
449
500
  info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${capped.length} candidates returned`);
450
501
  return { candidates: capped.slice(0, maxResults), strategiesUsed };
@@ -464,9 +515,6 @@ export class IssueDiscovery {
464
515
  */
465
516
  deriveLowScoringRepos(threshold) {
466
517
  const lowScoring = [];
467
- // The ScoutStateReader doesn't expose a bulk "get all repos with scores" method,
468
- // so we rely on the mergedPRRepos + starredRepos as the universe of known repos
469
- // and check each one's score. Repos not in state simply return null (no penalty).
470
518
  const knownRepos = new Set([
471
519
  ...this.stateReader.getReposWithMergedPRs(),
472
520
  ...this.stateReader.getStarredRepos(),