@oss-scout/core 0.11.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/cli.bundle.cjs +89 -66
  2. package/dist/cli.js +302 -436
  3. package/dist/commands/command-scout.d.ts +21 -0
  4. package/dist/commands/command-scout.js +21 -0
  5. package/dist/commands/config.js +10 -128
  6. package/dist/commands/features.js +15 -28
  7. package/dist/commands/results.d.ts +13 -2
  8. package/dist/commands/results.js +29 -2
  9. package/dist/commands/search.d.ts +4 -0
  10. package/dist/commands/search.js +65 -70
  11. package/dist/commands/setup.d.ts +2 -0
  12. package/dist/commands/setup.js +35 -6
  13. package/dist/commands/skip.d.ts +4 -0
  14. package/dist/commands/skip.js +45 -55
  15. package/dist/commands/sync.d.ts +10 -0
  16. package/dist/commands/sync.js +10 -0
  17. package/dist/commands/vet-list.js +3 -19
  18. package/dist/commands/vet.js +18 -25
  19. package/dist/commands/with-scout.d.ts +32 -0
  20. package/dist/commands/with-scout.js +41 -0
  21. package/dist/core/anti-llm-policy.js +5 -33
  22. package/dist/core/bootstrap.d.ts +2 -2
  23. package/dist/core/bootstrap.js +5 -9
  24. package/dist/core/errors.d.ts +10 -0
  25. package/dist/core/errors.js +20 -5
  26. package/dist/core/feature-discovery.d.ts +13 -1
  27. package/dist/core/feature-discovery.js +104 -81
  28. package/dist/core/gist-state-store.d.ts +13 -12
  29. package/dist/core/gist-state-store.js +128 -53
  30. package/dist/core/http-cache.d.ts +32 -2
  31. package/dist/core/http-cache.js +74 -19
  32. package/dist/core/issue-discovery.d.ts +12 -1
  33. package/dist/core/issue-discovery.js +94 -67
  34. package/dist/core/issue-eligibility.d.ts +11 -4
  35. package/dist/core/issue-eligibility.js +124 -69
  36. package/dist/core/issue-graphql.d.ts +58 -0
  37. package/dist/core/issue-graphql.js +108 -0
  38. package/dist/core/issue-vetting.d.ts +115 -9
  39. package/dist/core/issue-vetting.js +246 -109
  40. package/dist/core/local-state.d.ts +6 -2
  41. package/dist/core/local-state.js +23 -5
  42. package/dist/core/logger.d.ts +12 -4
  43. package/dist/core/logger.js +33 -7
  44. package/dist/core/personalization.d.ts +30 -10
  45. package/dist/core/personalization.js +64 -24
  46. package/dist/core/preference-fields.d.ts +47 -0
  47. package/dist/core/preference-fields.js +180 -0
  48. package/dist/core/probe-repo-file.d.ts +47 -0
  49. package/dist/core/probe-repo-file.js +57 -0
  50. package/dist/core/repo-health.js +40 -32
  51. package/dist/core/roadmap.js +26 -22
  52. package/dist/core/schemas.d.ts +148 -26
  53. package/dist/core/schemas.js +83 -17
  54. package/dist/core/search-budget.d.ts +9 -0
  55. package/dist/core/search-budget.js +36 -3
  56. package/dist/core/search-phases.d.ts +4 -21
  57. package/dist/core/search-phases.js +37 -89
  58. package/dist/core/types.d.ts +151 -38
  59. package/dist/core/utils.js +60 -26
  60. package/dist/formatters/human.d.ts +60 -0
  61. package/dist/formatters/human.js +199 -0
  62. package/dist/formatters/markdown.d.ts +10 -0
  63. package/dist/formatters/markdown.js +31 -0
  64. package/dist/index.d.ts +6 -2
  65. package/dist/index.js +8 -0
  66. package/dist/scout.d.ts +75 -12
  67. package/dist/scout.js +265 -26
  68. package/package.json +1 -1
@@ -16,6 +16,25 @@ import { getCacheDir } from "./utils.js";
16
16
  import { debug, warn } from "./logger.js";
17
17
  import { errorMessage, getHttpStatusCode } from "./errors.js";
18
18
  const MODULE = "http-cache";
19
+ /**
20
+ * Schema version for cache entries whose body is an oss-scout-defined shape
21
+ * (vetting results, search payloads, policy scans, merged-PR counts) rather
22
+ * than a raw GitHub API response. These are deserialized with an unchecked
23
+ * cast, so a shape change between releases would otherwise let a new build read
24
+ * a stale-shaped entry. Bump this whenever one of those cached shapes changes:
25
+ * old entries then miss the version-prefixed key and are refetched instead of
26
+ * misread (#158). Raw ETag-keyed GitHub responses are not versioned — their
27
+ * shape is owned by GitHub, not us.
28
+ */
29
+ export const CACHE_SCHEMA_VERSION = "v1";
30
+ /**
31
+ * Prefix a synthetic (non-URL) cache key with the schema version so a shape
32
+ * change invalidates old entries. Use for every key whose body is read back
33
+ * with an unchecked cast.
34
+ */
35
+ export function versionedCacheKey(key) {
36
+ return `${CACHE_SCHEMA_VERSION}:${key}`;
37
+ }
19
38
  /**
20
39
  * Maximum age (in ms) before a cache entry is considered stale and eligible for
21
40
  * eviction during cleanup. Defaults to 24 hours. Entries older than this are
@@ -51,13 +70,21 @@ export class HttpCache {
51
70
  * (e.g., caching aggregated results from paginated API calls).
52
71
  */
53
72
  getIfFresh(key, maxAgeMs) {
73
+ return this.getEntryIfFresh(key, maxAgeMs)?.body ?? null;
74
+ }
75
+ /**
76
+ * Like {@link getIfFresh}, but returns the whole entry so callers can
77
+ * distinguish "no fresh entry" (null) from a legitimately cached falsy
78
+ * body (`0`, `""`, `false`, `null`).
79
+ */
80
+ getEntryIfFresh(key, maxAgeMs) {
54
81
  const entry = this.get(key);
55
82
  if (!entry)
56
83
  return null;
57
84
  const age = Date.now() - new Date(entry.cachedAt).getTime();
58
85
  if (!Number.isFinite(age) || age < 0 || age > maxAgeMs)
59
86
  return null;
60
- return entry.body;
87
+ return entry;
61
88
  }
62
89
  /**
63
90
  * Look up a cached response. Returns `null` if no cache entry exists.
@@ -246,13 +273,29 @@ export function getHttpCache() {
246
273
  * cached body without consuming a rate-limit point.
247
274
  * 3. On a fresh 200, caches the ETag + body for next time.
248
275
  */
249
- export async function cachedRequest(cache, url, fetcher) {
250
- // --- Deduplication ---
251
- const existing = cache.getInflight(url);
276
+ /**
277
+ * Share one in-flight computation per key: concurrent callers for the same
278
+ * key await the same promise instead of paying duplicate API calls (#124).
279
+ * The check-then-register pair runs without an intervening await, so two
280
+ * concurrent callers cannot both miss. Rejections propagate to every waiter
281
+ * and are never cached.
282
+ */
283
+ export async function withInflightDedup(cache, key, fn) {
284
+ const existing = cache.getInflight(key);
252
285
  if (existing) {
253
- debug(MODULE, `Dedup hit for ${url}`);
286
+ debug(MODULE, `Dedup hit for ${key}`);
254
287
  return (await existing);
255
288
  }
289
+ const promise = fn();
290
+ const cleanup = cache.setInflight(key, promise);
291
+ try {
292
+ return await promise;
293
+ }
294
+ finally {
295
+ cleanup();
296
+ }
297
+ }
298
+ export async function cachedRequest(cache, url, fetcher) {
256
299
  const doFetch = async () => {
257
300
  const extraHeaders = {};
258
301
  const cached = cache.get(url);
@@ -276,19 +319,21 @@ export async function cachedRequest(cache, url, fetcher) {
276
319
  debug(MODULE, `304 cache hit for ${url}`);
277
320
  return freshCached.body;
278
321
  }
322
+ // The entry that supplied If-None-Match vanished mid-flight (e.g. a
323
+ // concurrent process deleted it). Refetch unconditionally; without
324
+ // the conditional header the server cannot answer 304 again.
325
+ debug(MODULE, `304 but cache entry vanished for ${url}, refetching`);
326
+ const response = await fetcher({});
327
+ const etag = response.headers?.["etag"];
328
+ if (etag) {
329
+ cache.set(url, etag, response.data);
330
+ }
331
+ return response.data;
279
332
  }
280
333
  throw err;
281
334
  }
282
335
  };
283
- const promise = doFetch();
284
- const cleanup = cache.setInflight(url, promise);
285
- try {
286
- const result = await promise;
287
- return result;
288
- }
289
- finally {
290
- cleanup();
291
- }
336
+ return withInflightDedup(cache, url, doFetch);
292
337
  }
293
338
  /**
294
339
  * Time-based cache wrapper (no ETag / conditional requests).
@@ -300,14 +345,24 @@ export async function cachedRequest(cache, url, fetcher) {
300
345
  * (e.g. search queries, project health checks).
301
346
  */
302
347
  export async function cachedTimeBased(cache, key, maxAgeMs, fetcher) {
303
- const cached = cache.getIfFresh(key, maxAgeMs);
348
+ const cached = cache.getEntryIfFresh(key, maxAgeMs);
304
349
  if (cached) {
305
350
  debug(MODULE, `Time-based cache hit for ${key}`);
306
- return cached;
351
+ return cached.body;
307
352
  }
308
- const result = await fetcher();
309
- cache.set(key, "", result);
310
- return result;
353
+ // Concurrent same-key callers (parallel vetting hitting one repo) share
354
+ // a single fetch instead of stampeding the API (#124)
355
+ return withInflightDedup(cache, key, async () => {
356
+ // Re-check inside the dedup window: a caller that finished while we
357
+ // queued may have populated the cache
358
+ const fresh = cache.getEntryIfFresh(key, maxAgeMs);
359
+ if (fresh) {
360
+ return fresh.body;
361
+ }
362
+ const result = await fetcher();
363
+ cache.set(key, "", result);
364
+ return result;
365
+ });
311
366
  }
312
367
  /**
313
368
  * Detect whether an error is a 304 Not Modified response.
@@ -11,6 +11,7 @@
11
11
  *
12
12
  * All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
13
13
  */
14
+ import { type SearchBudgetTracker } from "./search-budget.js";
14
15
  import { type IssueCandidate } from "./types.js";
15
16
  import type { ScoutPreferences, SearchStrategy } from "./schemas.js";
16
17
  import { type ScoutStateReader } from "./issue-vetting.js";
@@ -31,14 +32,20 @@ export declare class IssueDiscovery {
31
32
  private octokit;
32
33
  private githubToken;
33
34
  private vetter;
35
+ private budgetTracker;
34
36
  /** Set after searchIssues() runs if rate limits affected the search (low pre-flight quota or mid-search rate limit hits). */
35
37
  rateLimitWarning: string | null;
36
38
  /**
37
39
  * @param githubToken - GitHub personal access token or token from `gh auth token`
38
40
  * @param preferences - User's search preferences (languages, labels, scopes, etc.)
39
41
  * @param stateReader - Read-only interface for accessing scout state (merged PRs, starred repos, etc.)
42
+ * @param budgetTracker - Search budget tracker. Defaults to the shared
43
+ * singleton so existing callers behave identically. A long-lived host
44
+ * serving concurrent searches can inject a per-search instance so one
45
+ * search's init() no longer resets the budget state of another (the
46
+ * shared-singleton concurrency hazard, #156).
40
47
  */
41
- constructor(githubToken: string, preferences: ScoutPreferences, stateReader: ScoutStateReader);
48
+ constructor(githubToken: string, preferences: ScoutPreferences, stateReader: ScoutStateReader, budgetTracker?: SearchBudgetTracker);
42
49
  /**
43
50
  * Get starred repos from the state reader.
44
51
  * @returns Array of starred repo names in "owner/repo" format
@@ -76,7 +83,11 @@ export declare class IssueDiscovery {
76
83
  skippedUrls?: Set<string>;
77
84
  preferLanguages?: string[];
78
85
  preferRepos?: string[];
86
+ avoidRepos?: string[];
87
+ boostIssueTypes?: string[];
79
88
  diversityRatio?: number;
89
+ interPhaseDelayMs?: number;
90
+ broadPhaseDelayMs?: number;
80
91
  }): Promise<{
81
92
  candidates: IssueCandidate[];
82
93
  strategiesUsed: SearchStrategy[];
@@ -12,7 +12,7 @@
12
12
  * All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
13
13
  */
14
14
  import { getOctokit, checkRateLimit } from "./github.js";
15
- import { getSearchBudgetTracker } from "./search-budget.js";
15
+ import { getSearchBudgetTracker, } from "./search-budget.js";
16
16
  import { daysBetween, extractRepoFromUrl, sleep } from "./utils.js";
17
17
  import { SCOPE_LABELS, } from "./types.js";
18
18
  import { CONCRETE_STRATEGIES } from "./schemas.js";
@@ -22,7 +22,7 @@ import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
22
22
  import { IssueVetter } from "./issue-vetting.js";
23
23
  import { getTopicsForCategories } from "./category-mapping.js";
24
24
  import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
25
- import { annotateBoost, applyDiversityRatio } from "./personalization.js";
25
+ import { annotateBoost, applyDiversityRatio, boostScoreOf, } from "./personalization.js";
26
26
  const MODULE = "issue-discovery";
27
27
  /** If remaining search quota is below this, skip heavy phases (2, 3). */
28
28
  const LOW_BUDGET_THRESHOLD = 20;
@@ -35,16 +35,19 @@ function buildIssueFilter(config) {
35
35
  const repoFullName = extractRepoFromUrl(item.repository_url);
36
36
  if (!repoFullName)
37
37
  return false;
38
- if (config.excludedRepos.has(repoFullName))
38
+ // Repo-name sets are lowercased at construction; compare lowercased so
39
+ // user-typed casing (Microsoft/TypeScript) still matches API casing.
40
+ const repoLower = repoFullName.toLowerCase();
41
+ if (config.excludedRepos.has(repoLower))
39
42
  return false;
40
43
  if (config.excludeOrgs.size > 0) {
41
- const orgName = repoFullName.split("/")[0]?.toLowerCase();
44
+ const orgName = repoLower.split("/")[0];
42
45
  if (orgName && config.excludeOrgs.has(orgName))
43
46
  return false;
44
47
  }
45
- if (config.aiBlocklisted.has(repoFullName))
48
+ if (config.aiBlocklisted.has(repoLower))
46
49
  return false;
47
- if (config.lowScoringRepos.has(repoFullName))
50
+ if (config.lowScoringRepos.has(repoLower))
48
51
  return false;
49
52
  if (config.skippedUrls.has(item.html_url))
50
53
  return false;
@@ -84,7 +87,7 @@ async function runPhase1(octokit, vetter, repos, labels, maxResults, filterIssue
84
87
  };
85
88
  }
86
89
  /** Phase 2: General label-filtered search with multi-tier interleaving. */
87
- async function runPhase2(octokit, vetter, scopes, labels, configLabels, languages, isAnyLanguage, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
90
+ async function runPhase2(octokit, vetter, scopes, labels, configLabels, languages, isAnyLanguage, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues, tracker) {
88
91
  info(MODULE, "Phase 2: General issue search...");
89
92
  const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
90
93
  // Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
@@ -113,7 +116,7 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, language
113
116
  let rateLimitHit = false;
114
117
  for (const { tier, tierLabels } of tierLabelGroups) {
115
118
  try {
116
- const allItems = await searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, tierLabels, (langQ) => `is:issue is:open ${langQ} no:assignee`.replace(/ +/g, " ").trim(), budgetPerTier * 3);
119
+ const allItems = await searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, tierLabels, (langQ) => `is:issue is:open ${langQ} no:assignee`.replace(/ +/g, " ").trim(), budgetPerTier * 3, tracker);
117
120
  info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
118
121
  const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
119
122
  tierResults.push(tierCandidates);
@@ -150,7 +153,7 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, language
150
153
  };
151
154
  }
152
155
  /** Phase 3: Actively maintained repos (REST-first, Search API fallback). */
153
- async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, starredRepos, existingCandidates, filterIssues) {
156
+ async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, starredRepos, existingCandidates, filterIssues, tracker) {
154
157
  info(MODULE, "Phase 3: Searching actively maintained repos...");
155
158
  const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
156
159
  // Step 1: Try REST API with starred repos first (no Search API quota used)
@@ -193,7 +196,7 @@ async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories
193
196
  sort: "updated",
194
197
  order: "desc",
195
198
  per_page: maxResults * 3,
196
- });
199
+ }, tracker);
197
200
  info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
198
201
  const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], maxResults, minStars, "Phase 3");
199
202
  info(MODULE, `Found ${candidates.length} candidates from maintained-repo search`);
@@ -233,19 +236,28 @@ export class IssueDiscovery {
233
236
  octokit;
234
237
  githubToken;
235
238
  vetter;
239
+ budgetTracker;
236
240
  /** Set after searchIssues() runs if rate limits affected the search (low pre-flight quota or mid-search rate limit hits). */
237
241
  rateLimitWarning = null;
238
242
  /**
239
243
  * @param githubToken - GitHub personal access token or token from `gh auth token`
240
244
  * @param preferences - User's search preferences (languages, labels, scopes, etc.)
241
245
  * @param stateReader - Read-only interface for accessing scout state (merged PRs, starred repos, etc.)
246
+ * @param budgetTracker - Search budget tracker. Defaults to the shared
247
+ * singleton so existing callers behave identically. A long-lived host
248
+ * serving concurrent searches can inject a per-search instance so one
249
+ * search's init() no longer resets the budget state of another (the
250
+ * shared-singleton concurrency hazard, #156).
242
251
  */
243
- constructor(githubToken, preferences, stateReader) {
252
+ constructor(githubToken, preferences, stateReader, budgetTracker = getSearchBudgetTracker()) {
244
253
  this.preferences = preferences;
245
254
  this.stateReader = stateReader;
246
255
  this.githubToken = githubToken;
247
256
  this.octokit = getOctokit(githubToken);
248
- this.vetter = new IssueVetter(this.octokit, this.stateReader);
257
+ this.budgetTracker = budgetTracker;
258
+ // Thread the same tracker into the vetter so the merged-PR Search API
259
+ // call (checkUserMergedPRsInRepo) pays the same budget as the search phases.
260
+ this.vetter = new IssueVetter(this.octokit, this.stateReader, this.budgetTracker);
249
261
  }
250
262
  /**
251
263
  * Get starred repos from the state reader.
@@ -286,19 +298,38 @@ export class IssueDiscovery {
286
298
  (scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
287
299
  const maxResults = options.maxResults || 10;
288
300
  const minStars = config.minStars ?? 50;
289
- const interPhaseDelay = config.interPhaseDelayMs ?? 30000;
290
- // Strategy selection
301
+ const interPhaseDelay = options.interPhaseDelayMs ?? config.interPhaseDelayMs ?? 30000;
302
+ // Strategy selection. Empty arrays count as "unset" so a stored
303
+ // defaultStrategy of [] can't silently produce zero-strategy searches.
291
304
  const ALL_STRATEGIES = CONCRETE_STRATEGIES;
292
- const rawStrategies = options.strategies ??
293
- config.defaultStrategy ?? ["all"];
305
+ const pickStrategies = (...candidates) => candidates.find((c) => c && c.length > 0) ?? ["all"];
306
+ const rawStrategies = pickStrategies(options.strategies, config.defaultStrategy);
294
307
  const enabledStrategies = new Set(rawStrategies.includes("all") ? ALL_STRATEGIES : rawStrategies);
295
308
  const strategiesUsed = [];
296
309
  const allCandidates = [];
297
310
  const phaseErrors = {};
298
311
  let rateLimitHitDuringSearch = false;
312
+ // The standard inter-phase pause for rate-limit management. Phases 1, 2,
313
+ // and 3 all apply this identical delay before querying (Phase 0 is first,
314
+ // so it never waits). The broad phase wraps this with an extra cooldown.
315
+ const applyInterPhaseDelay = async () => {
316
+ if (interPhaseDelay > 0) {
317
+ info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
318
+ await sleep(interPhaseDelay);
319
+ }
320
+ };
321
+ // Fold a phase's result into the running totals. Every phase accumulates
322
+ // candidates, records its error under a stable key, and flips the
323
+ // rate-limit flag the same way; only the key and the result differ.
324
+ const recordPhaseResult = (key, result) => {
325
+ allCandidates.push(...result.candidates);
326
+ phaseErrors[key] = result.error;
327
+ if (result.rateLimitHit)
328
+ rateLimitHitDuringSearch = true;
329
+ };
299
330
  // Pre-flight rate limit check
300
331
  this.rateLimitWarning = null;
301
- const tracker = getSearchBudgetTracker();
332
+ const tracker = this.budgetTracker;
302
333
  let searchBudget = LOW_BUDGET_THRESHOLD - 1;
303
334
  try {
304
335
  const rateLimit = await checkRateLimit(this.githubToken);
@@ -332,19 +363,19 @@ export class IssueDiscovery {
332
363
  const openPRRepos = this.stateReader.getReposWithOpenPRs();
333
364
  const starredRepos = this.getStarredRepos();
334
365
  const starredRepoSet = new Set(starredRepos);
335
- const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold));
366
+ const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold).map((r) => r.toLowerCase()));
336
367
  // Build query parts
337
368
  const isAnyLanguage = languages.some((l) => l.toLowerCase() === "any");
338
369
  const langQuery = isAnyLanguage
339
370
  ? ""
340
371
  : languages.map((l) => `language:${l}`).join(" ");
341
- // Build reusable filter
342
- const aiBlocklisted = new Set(config.aiPolicyBlocklist);
372
+ // Build reusable filter (repo-name sets lowercased; see buildIssueFilter)
373
+ const aiBlocklisted = new Set(config.aiPolicyBlocklist.map((r) => r.toLowerCase()));
343
374
  if (aiBlocklisted.size > 0) {
344
375
  debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(", ")}`);
345
376
  }
346
377
  const filterIssues = buildIssueFilter({
347
- excludedRepos: new Set(config.excludeRepos),
378
+ excludedRepos: new Set(config.excludeRepos.map((r) => r.toLowerCase())),
348
379
  excludeOrgs: new Set((config.excludeOrgs ?? []).map((o) => o.toLowerCase())),
349
380
  aiBlocklisted,
350
381
  lowScoringRepos,
@@ -371,10 +402,7 @@ export class IssueDiscovery {
371
402
  const remaining = maxResults - allCandidates.length;
372
403
  if (remaining > 0) {
373
404
  const result = await runPhase0(this.octokit, this.vetter, phase0Repos, remaining, filterIssues);
374
- allCandidates.push(...result.candidates);
375
- phaseErrors["0"] = result.error;
376
- if (result.rateLimitHit)
377
- rateLimitHitDuringSearch = true;
405
+ recordPhaseResult("0", result);
378
406
  }
379
407
  strategiesUsed.push("merged");
380
408
  }
@@ -383,26 +411,28 @@ export class IssueDiscovery {
383
411
  starredRepos.length > 0 &&
384
412
  searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
385
413
  enabledStrategies.has("starred")) {
386
- if (interPhaseDelay > 0) {
387
- info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
388
- await sleep(interPhaseDelay);
389
- }
414
+ await applyInterPhaseDelay();
390
415
  const reposToSearch = starredRepos.filter((r) => !phase0RepoSet.has(r));
391
416
  if (reposToSearch.length > 0) {
392
417
  const remaining = maxResults - allCandidates.length;
393
418
  if (remaining > 0) {
394
419
  const result = await runPhase1(this.octokit, this.vetter, reposToSearch, labels, remaining, filterIssues);
395
- allCandidates.push(...result.candidates);
396
- phaseErrors["1"] = result.error;
397
- if (result.rateLimitHit)
398
- rateLimitHitDuringSearch = true;
420
+ recordPhaseResult("1", result);
421
+ // Recorded only when the phase actually queried (#130)
422
+ strategiesUsed.push("starred");
399
423
  }
400
424
  }
401
- strategiesUsed.push("starred");
402
425
  }
403
426
  // Phase 2: General search (with rate limit mitigation)
404
- const broadDelay = config.broadPhaseDelayMs ?? 90000;
405
- const skipThreshold = config.skipBroadWhenSufficientResults ?? 15;
427
+ const broadDelay = options.broadPhaseDelayMs ?? config.broadPhaseDelayMs ?? 90000;
428
+ // Clamp to maxResults - 1: the phase gate below already skips the whole
429
+ // phase at >= maxResults, so any larger threshold would be unsatisfiable
430
+ // (the default 15 vs default maxResults 10 made this dead config). 0
431
+ // stays "never skip".
432
+ const configuredSkipThreshold = config.skipBroadWhenSufficientResults ?? 8;
433
+ const skipThreshold = configuredSkipThreshold > 0
434
+ ? Math.min(configuredSkipThreshold, maxResults - 1)
435
+ : 0;
406
436
  if (allCandidates.length < maxResults &&
407
437
  searchBudget >= LOW_BUDGET_THRESHOLD &&
408
438
  enabledStrategies.has("broad")) {
@@ -412,10 +442,7 @@ export class IssueDiscovery {
412
442
  }
413
443
  else {
414
444
  // Always apply baseline inter-phase delay
415
- if (interPhaseDelay > 0) {
416
- info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
417
- await sleep(interPhaseDelay);
418
- }
445
+ await applyInterPhaseDelay();
419
446
  // Apply additional broad-phase cooldown, but skip if previous phases found nothing
420
447
  if (allCandidates.length > 0 && broadDelay > 0) {
421
448
  info(MODULE, `Waiting ${(broadDelay / 1000).toFixed(0)}s for rate limit cooldown before broad search...`);
@@ -425,28 +452,21 @@ export class IssueDiscovery {
425
452
  info(MODULE, `Skipping broad phase delay: no results from previous phases, proceeding immediately`);
426
453
  }
427
454
  const remaining = maxResults - allCandidates.length;
428
- const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, languages, isAnyLanguage, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
429
- allCandidates.push(...result.candidates);
430
- phaseErrors["2"] = result.error;
431
- if (result.rateLimitHit)
432
- rateLimitHitDuringSearch = true;
455
+ const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, languages, isAnyLanguage, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues, tracker);
456
+ recordPhaseResult("2", result);
457
+ // Recorded only when the phase actually queried, not when the
458
+ // skip-threshold branch short-circuited it (#130)
459
+ strategiesUsed.push("broad");
433
460
  }
434
- strategiesUsed.push("broad");
435
461
  }
436
462
  // Phase 3: Actively maintained repos
437
463
  if (allCandidates.length < maxResults &&
438
464
  searchBudget >= LOW_BUDGET_THRESHOLD &&
439
465
  enabledStrategies.has("maintained")) {
440
- if (interPhaseDelay > 0) {
441
- info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
442
- await sleep(interPhaseDelay);
443
- }
466
+ await applyInterPhaseDelay();
444
467
  const remaining = maxResults - allCandidates.length;
445
- const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, starredRepos, allCandidates, filterIssues);
446
- allCandidates.push(...result.candidates);
447
- phaseErrors["3"] = result.error;
448
- if (result.rateLimitHit)
449
- rateLimitHitDuringSearch = true;
468
+ const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, starredRepos, allCandidates, filterIssues, tracker);
469
+ recordPhaseResult("3", result);
450
470
  strategiesUsed.push("maintained");
451
471
  }
452
472
  // Build result / error summary
@@ -487,12 +507,19 @@ export class IssueDiscovery {
487
507
  `Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
488
508
  `Try again after the rate limit resets for complete results.`;
489
509
  }
490
- // Personalization annotation (#1244): tag each candidate with
491
- // boostScore + boostReasons before sorting so the new sort tier has
492
- // values to read. No-op when neither preference list is supplied.
493
- annotateBoost(allCandidates, options.preferLanguages, options.preferRepos);
510
+ // Personalization annotation (#1244, extended #168): tag candidates with a
511
+ // net `personalization` marker (preferRepos/preferLanguages/boostIssueTypes
512
+ // add, avoidRepos subtracts) before sorting so the sort tier has values to
513
+ // read. Returns a new array (no in-place candidate mutation, #158); a no-op
514
+ // when none of the bias lists are supplied.
515
+ const ranked = annotateBoost(allCandidates, {
516
+ preferLanguages: options.preferLanguages,
517
+ preferRepos: options.preferRepos,
518
+ avoidRepos: options.avoidRepos,
519
+ boostIssueTypes: options.boostIssueTypes,
520
+ });
494
521
  // Sort by priority, recommendation, boost (#1244), then viability score
495
- allCandidates.sort((a, b) => {
522
+ ranked.sort((a, b) => {
496
523
  const priorityOrder = {
497
524
  merged_pr: 0,
498
525
  starred: 1,
@@ -506,17 +533,17 @@ export class IssueDiscovery {
506
533
  recommendationOrder[b.recommendation];
507
534
  if (recDiff !== 0)
508
535
  return recDiff;
509
- // Personalization tier (#1244): higher boostScore wins. Treats
510
- // undefined as 0 so unboosted candidates rank below boosted peers
511
- // but stay ordered among themselves by viabilityScore. No-op when
512
- // `preferLanguages`/`preferRepos` are absent — all candidates carry
513
- // `boostScore: undefined` and the difference collapses to 0.
514
- const boostDiff = (b.boostScore ?? 0) - (a.boostScore ?? 0);
536
+ // Personalization tier (#1244): higher boost wins. boostScoreOf treats
537
+ // an unboosted candidate as 0 so they rank below boosted peers but stay
538
+ // ordered among themselves by viabilityScore. No-op when
539
+ // `preferLanguages`/`preferRepos` are absent — every candidate scores 0
540
+ // and the difference collapses.
541
+ const boostDiff = boostScoreOf(b) - boostScoreOf(a);
515
542
  if (boostDiff !== 0)
516
543
  return boostDiff;
517
544
  return b.viabilityScore - a.viabilityScore;
518
545
  });
519
- const capped = applyPerRepoCap(allCandidates, 2);
546
+ const capped = applyPerRepoCap(ranked, 2);
520
547
  // Diversity counterweight (#1244): when `diversityRatio > 0`, reserve
521
548
  // a fraction of the final slots for candidates that matched neither
522
549
  // preference list. No-op when the ratio is 0 or absent — collapses to
@@ -6,11 +6,18 @@
6
6
  * Extracted from issue-vetting.ts to isolate eligibility logic.
7
7
  */
8
8
  import { Octokit } from "@octokit/rest";
9
+ import { type SearchBudgetTracker } from "./search-budget.js";
9
10
  import type { CheckResult, LinkedPR } from "./types.js";
10
- /** Result of the existing-PR check, including metadata for the first linked PR (if any). */
11
- export interface ExistingPRCheckResult extends CheckResult {
11
+ /**
12
+ * Result of the existing-PR check, including metadata for the first linked PR
13
+ * (if any). An intersection (not `extends`) because CheckResult is now a
14
+ * discriminated union (#158); the `& { linkedPR }` distributes over both arms.
15
+ */
16
+ export type ExistingPRCheckResult = CheckResult & {
12
17
  linkedPR: LinkedPR | null;
13
- }
18
+ };
19
+ /** True when a single comment body claims the issue. */
20
+ export declare function commentClaimsIssue(body: string): boolean;
14
21
  /**
15
22
  * Check whether an open PR already exists for the given issue.
16
23
  * Uses the timeline API (REST) to detect cross-referenced PRs, avoiding
@@ -23,7 +30,7 @@ export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo:
23
30
  * Results are cached per-repo for 15 minutes to avoid redundant Search API
24
31
  * calls when multiple issues from the same repo are vetted.
25
32
  */
26
- export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string): Promise<number>;
33
+ export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string, tracker?: SearchBudgetTracker): Promise<number | null>;
27
34
  /**
28
35
  * Check whether an issue has been claimed by another contributor
29
36
  * by scanning recent comments for claim phrases.