npm - @oss-scout/core - Versions diffs - 0.11.0 → 1.1.0 - Mend

@oss-scout/core 0.11.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/dist/cli.bundle.cjs +89 -66
package/dist/cli.js +302 -436
package/dist/commands/command-scout.d.ts +21 -0
package/dist/commands/command-scout.js +21 -0
package/dist/commands/config.js +10 -128
package/dist/commands/features.js +15 -28
package/dist/commands/results.d.ts +13 -2
package/dist/commands/results.js +29 -2
package/dist/commands/search.d.ts +4 -0
package/dist/commands/search.js +65 -70
package/dist/commands/setup.d.ts +2 -0
package/dist/commands/setup.js +35 -6
package/dist/commands/skip.d.ts +4 -0
package/dist/commands/skip.js +45 -55
package/dist/commands/sync.d.ts +10 -0
package/dist/commands/sync.js +10 -0
package/dist/commands/vet-list.js +3 -19
package/dist/commands/vet.js +18 -25
package/dist/commands/with-scout.d.ts +32 -0
package/dist/commands/with-scout.js +41 -0
package/dist/core/anti-llm-policy.js +5 -33
package/dist/core/bootstrap.d.ts +2 -2
package/dist/core/bootstrap.js +5 -9
package/dist/core/errors.d.ts +10 -0
package/dist/core/errors.js +20 -5
package/dist/core/feature-discovery.d.ts +13 -1
package/dist/core/feature-discovery.js +104 -81
package/dist/core/gist-state-store.d.ts +13 -12
package/dist/core/gist-state-store.js +128 -53
package/dist/core/http-cache.d.ts +32 -2
package/dist/core/http-cache.js +74 -19
package/dist/core/issue-discovery.d.ts +12 -1
package/dist/core/issue-discovery.js +94 -67
package/dist/core/issue-eligibility.d.ts +11 -4
package/dist/core/issue-eligibility.js +124 -69
package/dist/core/issue-graphql.d.ts +58 -0
package/dist/core/issue-graphql.js +108 -0
package/dist/core/issue-vetting.d.ts +115 -9
package/dist/core/issue-vetting.js +246 -109
package/dist/core/local-state.d.ts +6 -2
package/dist/core/local-state.js +23 -5
package/dist/core/logger.d.ts +12 -4
package/dist/core/logger.js +33 -7
package/dist/core/personalization.d.ts +30 -10
package/dist/core/personalization.js +64 -24
package/dist/core/preference-fields.d.ts +47 -0
package/dist/core/preference-fields.js +180 -0
package/dist/core/probe-repo-file.d.ts +47 -0
package/dist/core/probe-repo-file.js +57 -0
package/dist/core/repo-health.js +40 -32
package/dist/core/roadmap.js +26 -22
package/dist/core/schemas.d.ts +148 -26
package/dist/core/schemas.js +83 -17
package/dist/core/search-budget.d.ts +9 -0
package/dist/core/search-budget.js +36 -3
package/dist/core/search-phases.d.ts +4 -21
package/dist/core/search-phases.js +37 -89
package/dist/core/types.d.ts +151 -38
package/dist/core/utils.js +60 -26
package/dist/formatters/human.d.ts +60 -0
package/dist/formatters/human.js +199 -0
package/dist/formatters/markdown.d.ts +10 -0
package/dist/formatters/markdown.js +31 -0
package/dist/index.d.ts +6 -2
package/dist/index.js +8 -0
package/dist/scout.d.ts +75 -12
package/dist/scout.js +265 -26
package/package.json +1 -1

package/dist/core/http-cache.js CHANGED Viewed

@@ -16,6 +16,25 @@ import { getCacheDir } from "./utils.js";
 import { debug, warn } from "./logger.js";
 import { errorMessage, getHttpStatusCode } from "./errors.js";
 const MODULE = "http-cache";
+/**
+ * Schema version for cache entries whose body is an oss-scout-defined shape
+ * (vetting results, search payloads, policy scans, merged-PR counts) rather
+ * than a raw GitHub API response. These are deserialized with an unchecked
+ * cast, so a shape change between releases would otherwise let a new build read
+ * a stale-shaped entry. Bump this whenever one of those cached shapes changes:
+ * old entries then miss the version-prefixed key and are refetched instead of
+ * misread (#158). Raw ETag-keyed GitHub responses are not versioned — their
+ * shape is owned by GitHub, not us.
+ */
+export const CACHE_SCHEMA_VERSION = "v1";
+/**
+ * Prefix a synthetic (non-URL) cache key with the schema version so a shape
+ * change invalidates old entries. Use for every key whose body is read back
+ * with an unchecked cast.
+ */
+export function versionedCacheKey(key) {
+    return `${CACHE_SCHEMA_VERSION}:${key}`;
+}
 /**
  * Maximum age (in ms) before a cache entry is considered stale and eligible for
  * eviction during cleanup. Defaults to 24 hours. Entries older than this are
@@ -51,13 +70,21 @@ export class HttpCache {
      * (e.g., caching aggregated results from paginated API calls).
      */
     getIfFresh(key, maxAgeMs) {
+        return this.getEntryIfFresh(key, maxAgeMs)?.body ?? null;
+    }
+    /**
+     * Like {@link getIfFresh}, but returns the whole entry so callers can
+     * distinguish "no fresh entry" (null) from a legitimately cached falsy
+     * body (`0`, `""`, `false`, `null`).
+     */
+    getEntryIfFresh(key, maxAgeMs) {
         const entry = this.get(key);
         if (!entry)
             return null;
         const age = Date.now() - new Date(entry.cachedAt).getTime();
         if (!Number.isFinite(age) || age < 0 || age > maxAgeMs)
             return null;
-        return entry.body;
+        return entry;
     }
     /**
      * Look up a cached response. Returns `null` if no cache entry exists.
@@ -246,13 +273,29 @@ export function getHttpCache() {
  *    cached body without consuming a rate-limit point.
  * 3. On a fresh 200, caches the ETag + body for next time.
  */
-export async function cachedRequest(cache, url, fetcher) {
-    // --- Deduplication ---
-    const existing = cache.getInflight(url);
+/**
+ * Share one in-flight computation per key: concurrent callers for the same
+ * key await the same promise instead of paying duplicate API calls (#124).
+ * The check-then-register pair runs without an intervening await, so two
+ * concurrent callers cannot both miss. Rejections propagate to every waiter
+ * and are never cached.
+ */
+export async function withInflightDedup(cache, key, fn) {
+    const existing = cache.getInflight(key);
     if (existing) {
-        debug(MODULE, `Dedup hit for ${url}`);
+        debug(MODULE, `Dedup hit for ${key}`);
         return (await existing);
     }
+    const promise = fn();
+    const cleanup = cache.setInflight(key, promise);
+    try {
+        return await promise;
+    }
+    finally {
+        cleanup();
+    }
+}
+export async function cachedRequest(cache, url, fetcher) {
     const doFetch = async () => {
         const extraHeaders = {};
         const cached = cache.get(url);
@@ -276,19 +319,21 @@ export async function cachedRequest(cache, url, fetcher) {
                     debug(MODULE, `304 cache hit for ${url}`);
                     return freshCached.body;
                 }
+                // The entry that supplied If-None-Match vanished mid-flight (e.g. a
+                // concurrent process deleted it). Refetch unconditionally; without
+                // the conditional header the server cannot answer 304 again.
+                debug(MODULE, `304 but cache entry vanished for ${url}, refetching`);
+                const response = await fetcher({});
+                const etag = response.headers?.["etag"];
+                if (etag) {
+                    cache.set(url, etag, response.data);
+                }
+                return response.data;
             }
             throw err;
         }
     };
-    const promise = doFetch();
-    const cleanup = cache.setInflight(url, promise);
-    try {
-        const result = await promise;
-        return result;
-    }
-    finally {
-        cleanup();
-    }
+    return withInflightDedup(cache, url, doFetch);
 }
 /**
  * Time-based cache wrapper (no ETag / conditional requests).
@@ -300,14 +345,24 @@ export async function cachedRequest(cache, url, fetcher) {
  * (e.g. search queries, project health checks).
  */
 export async function cachedTimeBased(cache, key, maxAgeMs, fetcher) {
-    const cached = cache.getIfFresh(key, maxAgeMs);
+    const cached = cache.getEntryIfFresh(key, maxAgeMs);
     if (cached) {
         debug(MODULE, `Time-based cache hit for ${key}`);
-        return cached;
+        return cached.body;
     }
-    const result = await fetcher();
-    cache.set(key, "", result);
-    return result;
+    // Concurrent same-key callers (parallel vetting hitting one repo) share
+    // a single fetch instead of stampeding the API (#124)
+    return withInflightDedup(cache, key, async () => {
+        // Re-check inside the dedup window: a caller that finished while we
+        // queued may have populated the cache
+        const fresh = cache.getEntryIfFresh(key, maxAgeMs);
+        if (fresh) {
+            return fresh.body;
+        }
+        const result = await fetcher();
+        cache.set(key, "", result);
+        return result;
+    });
 }
 /**
  * Detect whether an error is a 304 Not Modified response.

package/dist/core/issue-discovery.d.ts CHANGED Viewed

@@ -11,6 +11,7 @@
  *
  * All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
  */
+import { type SearchBudgetTracker } from "./search-budget.js";
 import { type IssueCandidate } from "./types.js";
 import type { ScoutPreferences, SearchStrategy } from "./schemas.js";
 import { type ScoutStateReader } from "./issue-vetting.js";
@@ -31,14 +32,20 @@ export declare class IssueDiscovery {
     private octokit;
     private githubToken;
     private vetter;
+    private budgetTracker;
     /** Set after searchIssues() runs if rate limits affected the search (low pre-flight quota or mid-search rate limit hits). */
     rateLimitWarning: string | null;
     /**
      * @param githubToken  - GitHub personal access token or token from `gh auth token`
      * @param preferences  - User's search preferences (languages, labels, scopes, etc.)
      * @param stateReader  - Read-only interface for accessing scout state (merged PRs, starred repos, etc.)
+     * @param budgetTracker - Search budget tracker. Defaults to the shared
+     *   singleton so existing callers behave identically. A long-lived host
+     *   serving concurrent searches can inject a per-search instance so one
+     *   search's init() no longer resets the budget state of another (the
+     *   shared-singleton concurrency hazard, #156).
      */
-    constructor(githubToken: string, preferences: ScoutPreferences, stateReader: ScoutStateReader);
+    constructor(githubToken: string, preferences: ScoutPreferences, stateReader: ScoutStateReader, budgetTracker?: SearchBudgetTracker);
     /**
      * Get starred repos from the state reader.
      * @returns Array of starred repo names in "owner/repo" format
@@ -76,7 +83,11 @@ export declare class IssueDiscovery {
         skippedUrls?: Set<string>;
         preferLanguages?: string[];
         preferRepos?: string[];
+        avoidRepos?: string[];
+        boostIssueTypes?: string[];
         diversityRatio?: number;
+        interPhaseDelayMs?: number;
+        broadPhaseDelayMs?: number;
     }): Promise<{
         candidates: IssueCandidate[];
         strategiesUsed: SearchStrategy[];

package/dist/core/issue-discovery.js CHANGED Viewed

@@ -12,7 +12,7 @@
  * All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
  */
 import { getOctokit, checkRateLimit } from "./github.js";
-import { getSearchBudgetTracker } from "./search-budget.js";
+import { getSearchBudgetTracker, } from "./search-budget.js";
 import { daysBetween, extractRepoFromUrl, sleep } from "./utils.js";
 import { SCOPE_LABELS, } from "./types.js";
 import { CONCRETE_STRATEGIES } from "./schemas.js";
@@ -22,7 +22,7 @@ import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
 import { IssueVetter } from "./issue-vetting.js";
 import { getTopicsForCategories } from "./category-mapping.js";
 import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
-import { annotateBoost, applyDiversityRatio } from "./personalization.js";
+import { annotateBoost, applyDiversityRatio, boostScoreOf, } from "./personalization.js";
 const MODULE = "issue-discovery";
 /** If remaining search quota is below this, skip heavy phases (2, 3). */
 const LOW_BUDGET_THRESHOLD = 20;
@@ -35,16 +35,19 @@ function buildIssueFilter(config) {
             const repoFullName = extractRepoFromUrl(item.repository_url);
             if (!repoFullName)
                 return false;
-            if (config.excludedRepos.has(repoFullName))
+            // Repo-name sets are lowercased at construction; compare lowercased so
+            // user-typed casing (Microsoft/TypeScript) still matches API casing.
+            const repoLower = repoFullName.toLowerCase();
+            if (config.excludedRepos.has(repoLower))
                 return false;
             if (config.excludeOrgs.size > 0) {
-                const orgName = repoFullName.split("/")[0]?.toLowerCase();
+                const orgName = repoLower.split("/")[0];
                 if (orgName && config.excludeOrgs.has(orgName))
                     return false;
             }
-            if (config.aiBlocklisted.has(repoFullName))
+            if (config.aiBlocklisted.has(repoLower))
                 return false;
-            if (config.lowScoringRepos.has(repoFullName))
+            if (config.lowScoringRepos.has(repoLower))
                 return false;
             if (config.skippedUrls.has(item.html_url))
                 return false;
@@ -84,7 +87,7 @@ async function runPhase1(octokit, vetter, repos, labels, maxResults, filterIssue
     };
 }
 /** Phase 2: General label-filtered search with multi-tier interleaving. */
-async function runPhase2(octokit, vetter, scopes, labels, configLabels, languages, isAnyLanguage, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
+async function runPhase2(octokit, vetter, scopes, labels, configLabels, languages, isAnyLanguage, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues, tracker) {
     info(MODULE, "Phase 2: General issue search...");
     const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
     // Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
@@ -113,7 +116,7 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, language
     let rateLimitHit = false;
     for (const { tier, tierLabels } of tierLabelGroups) {
         try {
-            const allItems = await searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, tierLabels, (langQ) => `is:issue is:open ${langQ} no:assignee`.replace(/  +/g, " ").trim(), budgetPerTier * 3);
+            const allItems = await searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, tierLabels, (langQ) => `is:issue is:open ${langQ} no:assignee`.replace(/  +/g, " ").trim(), budgetPerTier * 3, tracker);
             info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
             const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
             tierResults.push(tierCandidates);
@@ -150,7 +153,7 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, language
     };
 }
 /** Phase 3: Actively maintained repos (REST-first, Search API fallback). */
-async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, starredRepos, existingCandidates, filterIssues) {
+async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, starredRepos, existingCandidates, filterIssues, tracker) {
     info(MODULE, "Phase 3: Searching actively maintained repos...");
     const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
     // Step 1: Try REST API with starred repos first (no Search API quota used)
@@ -193,7 +196,7 @@ async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories
             sort: "updated",
             order: "desc",
             per_page: maxResults * 3,
-        });
+        }, tracker);
         info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
         const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], maxResults, minStars, "Phase 3");
         info(MODULE, `Found ${candidates.length} candidates from maintained-repo search`);
@@ -233,19 +236,28 @@ export class IssueDiscovery {
     octokit;
     githubToken;
     vetter;
+    budgetTracker;
     /** Set after searchIssues() runs if rate limits affected the search (low pre-flight quota or mid-search rate limit hits). */
     rateLimitWarning = null;
     /**
      * @param githubToken  - GitHub personal access token or token from `gh auth token`
      * @param preferences  - User's search preferences (languages, labels, scopes, etc.)
      * @param stateReader  - Read-only interface for accessing scout state (merged PRs, starred repos, etc.)
+     * @param budgetTracker - Search budget tracker. Defaults to the shared
+     *   singleton so existing callers behave identically. A long-lived host
+     *   serving concurrent searches can inject a per-search instance so one
+     *   search's init() no longer resets the budget state of another (the
+     *   shared-singleton concurrency hazard, #156).
      */
-    constructor(githubToken, preferences, stateReader) {
+    constructor(githubToken, preferences, stateReader, budgetTracker = getSearchBudgetTracker()) {
         this.preferences = preferences;
         this.stateReader = stateReader;
         this.githubToken = githubToken;
         this.octokit = getOctokit(githubToken);
-        this.vetter = new IssueVetter(this.octokit, this.stateReader);
+        this.budgetTracker = budgetTracker;
+        // Thread the same tracker into the vetter so the merged-PR Search API
+        // call (checkUserMergedPRsInRepo) pays the same budget as the search phases.
+        this.vetter = new IssueVetter(this.octokit, this.stateReader, this.budgetTracker);
     }
     /**
      * Get starred repos from the state reader.
@@ -286,19 +298,38 @@ export class IssueDiscovery {
             (scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
         const maxResults = options.maxResults || 10;
         const minStars = config.minStars ?? 50;
-        const interPhaseDelay = config.interPhaseDelayMs ?? 30000;
-        // Strategy selection
+        const interPhaseDelay = options.interPhaseDelayMs ?? config.interPhaseDelayMs ?? 30000;
+        // Strategy selection. Empty arrays count as "unset" so a stored
+        // defaultStrategy of [] can't silently produce zero-strategy searches.
         const ALL_STRATEGIES = CONCRETE_STRATEGIES;
-        const rawStrategies = options.strategies ??
-            config.defaultStrategy ?? ["all"];
+        const pickStrategies = (...candidates) => candidates.find((c) => c && c.length > 0) ?? ["all"];
+        const rawStrategies = pickStrategies(options.strategies, config.defaultStrategy);
         const enabledStrategies = new Set(rawStrategies.includes("all") ? ALL_STRATEGIES : rawStrategies);
         const strategiesUsed = [];
         const allCandidates = [];
         const phaseErrors = {};
         let rateLimitHitDuringSearch = false;
+        // The standard inter-phase pause for rate-limit management. Phases 1, 2,
+        // and 3 all apply this identical delay before querying (Phase 0 is first,
+        // so it never waits). The broad phase wraps this with an extra cooldown.
+        const applyInterPhaseDelay = async () => {
+            if (interPhaseDelay > 0) {
+                info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
+                await sleep(interPhaseDelay);
+            }
+        };
+        // Fold a phase's result into the running totals. Every phase accumulates
+        // candidates, records its error under a stable key, and flips the
+        // rate-limit flag the same way; only the key and the result differ.
+        const recordPhaseResult = (key, result) => {
+            allCandidates.push(...result.candidates);
+            phaseErrors[key] = result.error;
+            if (result.rateLimitHit)
+                rateLimitHitDuringSearch = true;
+        };
         // Pre-flight rate limit check
         this.rateLimitWarning = null;
-        const tracker = getSearchBudgetTracker();
+        const tracker = this.budgetTracker;
         let searchBudget = LOW_BUDGET_THRESHOLD - 1;
         try {
             const rateLimit = await checkRateLimit(this.githubToken);
@@ -332,19 +363,19 @@ export class IssueDiscovery {
         const openPRRepos = this.stateReader.getReposWithOpenPRs();
         const starredRepos = this.getStarredRepos();
         const starredRepoSet = new Set(starredRepos);
-        const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold));
+        const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold).map((r) => r.toLowerCase()));
         // Build query parts
         const isAnyLanguage = languages.some((l) => l.toLowerCase() === "any");
         const langQuery = isAnyLanguage
             ? ""
             : languages.map((l) => `language:${l}`).join(" ");
-        // Build reusable filter
-        const aiBlocklisted = new Set(config.aiPolicyBlocklist);
+        // Build reusable filter (repo-name sets lowercased; see buildIssueFilter)
+        const aiBlocklisted = new Set(config.aiPolicyBlocklist.map((r) => r.toLowerCase()));
         if (aiBlocklisted.size > 0) {
             debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(", ")}`);
         }
         const filterIssues = buildIssueFilter({
-            excludedRepos: new Set(config.excludeRepos),
+            excludedRepos: new Set(config.excludeRepos.map((r) => r.toLowerCase())),
             excludeOrgs: new Set((config.excludeOrgs ?? []).map((o) => o.toLowerCase())),
             aiBlocklisted,
             lowScoringRepos,
@@ -371,10 +402,7 @@ export class IssueDiscovery {
             const remaining = maxResults - allCandidates.length;
             if (remaining > 0) {
                 const result = await runPhase0(this.octokit, this.vetter, phase0Repos, remaining, filterIssues);
-                allCandidates.push(...result.candidates);
-                phaseErrors["0"] = result.error;
-                if (result.rateLimitHit)
-                    rateLimitHitDuringSearch = true;
+                recordPhaseResult("0", result);
             }
             strategiesUsed.push("merged");
         }
@@ -383,26 +411,28 @@ export class IssueDiscovery {
             starredRepos.length > 0 &&
             searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
             enabledStrategies.has("starred")) {
-            if (interPhaseDelay > 0) {
-                info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
-                await sleep(interPhaseDelay);
-            }
+            await applyInterPhaseDelay();
             const reposToSearch = starredRepos.filter((r) => !phase0RepoSet.has(r));
             if (reposToSearch.length > 0) {
                 const remaining = maxResults - allCandidates.length;
                 if (remaining > 0) {
                     const result = await runPhase1(this.octokit, this.vetter, reposToSearch, labels, remaining, filterIssues);
-                    allCandidates.push(...result.candidates);
-                    phaseErrors["1"] = result.error;
-                    if (result.rateLimitHit)
-                        rateLimitHitDuringSearch = true;
+                    recordPhaseResult("1", result);
+                    // Recorded only when the phase actually queried (#130)
+                    strategiesUsed.push("starred");
                 }
             }
-            strategiesUsed.push("starred");
         }
         // Phase 2: General search (with rate limit mitigation)
-        const broadDelay = config.broadPhaseDelayMs ?? 90000;
-        const skipThreshold = config.skipBroadWhenSufficientResults ?? 15;
+        const broadDelay = options.broadPhaseDelayMs ?? config.broadPhaseDelayMs ?? 90000;
+        // Clamp to maxResults - 1: the phase gate below already skips the whole
+        // phase at >= maxResults, so any larger threshold would be unsatisfiable
+        // (the default 15 vs default maxResults 10 made this dead config). 0
+        // stays "never skip".
+        const configuredSkipThreshold = config.skipBroadWhenSufficientResults ?? 8;
+        const skipThreshold = configuredSkipThreshold > 0
+            ? Math.min(configuredSkipThreshold, maxResults - 1)
+            : 0;
         if (allCandidates.length < maxResults &&
             searchBudget >= LOW_BUDGET_THRESHOLD &&
             enabledStrategies.has("broad")) {
@@ -412,10 +442,7 @@ export class IssueDiscovery {
             }
             else {
                 // Always apply baseline inter-phase delay
-                if (interPhaseDelay > 0) {
-                    info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
-                    await sleep(interPhaseDelay);
-                }
+                await applyInterPhaseDelay();
                 // Apply additional broad-phase cooldown, but skip if previous phases found nothing
                 if (allCandidates.length > 0 && broadDelay > 0) {
                     info(MODULE, `Waiting ${(broadDelay / 1000).toFixed(0)}s for rate limit cooldown before broad search...`);
@@ -425,28 +452,21 @@ export class IssueDiscovery {
                     info(MODULE, `Skipping broad phase delay: no results from previous phases, proceeding immediately`);
                 }
                 const remaining = maxResults - allCandidates.length;
-                const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, languages, isAnyLanguage, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
-                allCandidates.push(...result.candidates);
-                phaseErrors["2"] = result.error;
-                if (result.rateLimitHit)
-                    rateLimitHitDuringSearch = true;
+                const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, languages, isAnyLanguage, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues, tracker);
+                recordPhaseResult("2", result);
+                // Recorded only when the phase actually queried, not when the
+                // skip-threshold branch short-circuited it (#130)
+                strategiesUsed.push("broad");
             }
-            strategiesUsed.push("broad");
         }
         // Phase 3: Actively maintained repos
         if (allCandidates.length < maxResults &&
             searchBudget >= LOW_BUDGET_THRESHOLD &&
             enabledStrategies.has("maintained")) {
-            if (interPhaseDelay > 0) {
-                info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
-                await sleep(interPhaseDelay);
-            }
+            await applyInterPhaseDelay();
             const remaining = maxResults - allCandidates.length;
-            const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, starredRepos, allCandidates, filterIssues);
-            allCandidates.push(...result.candidates);
-            phaseErrors["3"] = result.error;
-            if (result.rateLimitHit)
-                rateLimitHitDuringSearch = true;
+            const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, starredRepos, allCandidates, filterIssues, tracker);
+            recordPhaseResult("3", result);
             strategiesUsed.push("maintained");
         }
         // Build result / error summary
@@ -487,12 +507,19 @@ export class IssueDiscovery {
                     `Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
                     `Try again after the rate limit resets for complete results.`;
         }
-        // Personalization annotation (#1244): tag each candidate with
-        // boostScore + boostReasons before sorting so the new sort tier has
-        // values to read. No-op when neither preference list is supplied.
-        annotateBoost(allCandidates, options.preferLanguages, options.preferRepos);
+        // Personalization annotation (#1244, extended #168): tag candidates with a
+        // net `personalization` marker (preferRepos/preferLanguages/boostIssueTypes
+        // add, avoidRepos subtracts) before sorting so the sort tier has values to
+        // read. Returns a new array (no in-place candidate mutation, #158); a no-op
+        // when none of the bias lists are supplied.
+        const ranked = annotateBoost(allCandidates, {
+            preferLanguages: options.preferLanguages,
+            preferRepos: options.preferRepos,
+            avoidRepos: options.avoidRepos,
+            boostIssueTypes: options.boostIssueTypes,
+        });
         // Sort by priority, recommendation, boost (#1244), then viability score
-        allCandidates.sort((a, b) => {
+        ranked.sort((a, b) => {
             const priorityOrder = {
                 merged_pr: 0,
                 starred: 1,
@@ -506,17 +533,17 @@ export class IssueDiscovery {
                 recommendationOrder[b.recommendation];
             if (recDiff !== 0)
                 return recDiff;
-            // Personalization tier (#1244): higher boostScore wins. Treats
-            // undefined as 0 so unboosted candidates rank below boosted peers
-            // but stay ordered among themselves by viabilityScore. No-op when
-            // `preferLanguages`/`preferRepos` are absent — all candidates carry
-            // `boostScore: undefined` and the difference collapses to 0.
-            const boostDiff = (b.boostScore ?? 0) - (a.boostScore ?? 0);
+            // Personalization tier (#1244): higher boost wins. boostScoreOf treats
+            // an unboosted candidate as 0 so they rank below boosted peers but stay
+            // ordered among themselves by viabilityScore. No-op when
+            // `preferLanguages`/`preferRepos` are absent — every candidate scores 0
+            // and the difference collapses.
+            const boostDiff = boostScoreOf(b) - boostScoreOf(a);
             if (boostDiff !== 0)
                 return boostDiff;
             return b.viabilityScore - a.viabilityScore;
         });
-        const capped = applyPerRepoCap(allCandidates, 2);
+        const capped = applyPerRepoCap(ranked, 2);
         // Diversity counterweight (#1244): when `diversityRatio > 0`, reserve
         // a fraction of the final slots for candidates that matched neither
         // preference list. No-op when the ratio is 0 or absent — collapses to

package/dist/core/issue-eligibility.d.ts CHANGED Viewed

@@ -6,11 +6,18 @@
  * Extracted from issue-vetting.ts to isolate eligibility logic.
  */
 import { Octokit } from "@octokit/rest";
+import { type SearchBudgetTracker } from "./search-budget.js";
 import type { CheckResult, LinkedPR } from "./types.js";
-/** Result of the existing-PR check, including metadata for the first linked PR (if any). */
-export interface ExistingPRCheckResult extends CheckResult {
+/**
+ * Result of the existing-PR check, including metadata for the first linked PR
+ * (if any). An intersection (not `extends`) because CheckResult is now a
+ * discriminated union (#158); the `& { linkedPR }` distributes over both arms.
+ */
+export type ExistingPRCheckResult = CheckResult & {
     linkedPR: LinkedPR | null;
-}
+};
+/** True when a single comment body claims the issue. */
+export declare function commentClaimsIssue(body: string): boolean;
 /**
  * Check whether an open PR already exists for the given issue.
  * Uses the timeline API (REST) to detect cross-referenced PRs, avoiding
@@ -23,7 +30,7 @@ export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo:
  * Results are cached per-repo for 15 minutes to avoid redundant Search API
  * calls when multiple issues from the same repo are vetted.
  */
-export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string): Promise<number>;
+export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string, tracker?: SearchBudgetTracker): Promise<number | null>;
 /**
  * Check whether an issue has been claimed by another contributor
  * by scanning recent comments for claim phrases.