npm - @oss-scout/core - Versions diffs - 0.11.0 → 1.1.0 - Mend

@oss-scout/core 0.11.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/dist/cli.bundle.cjs +89 -66
package/dist/cli.js +302 -436
package/dist/commands/command-scout.d.ts +21 -0
package/dist/commands/command-scout.js +21 -0
package/dist/commands/config.js +10 -128
package/dist/commands/features.js +15 -28
package/dist/commands/results.d.ts +13 -2
package/dist/commands/results.js +29 -2
package/dist/commands/search.d.ts +4 -0
package/dist/commands/search.js +65 -70
package/dist/commands/setup.d.ts +2 -0
package/dist/commands/setup.js +35 -6
package/dist/commands/skip.d.ts +4 -0
package/dist/commands/skip.js +45 -55
package/dist/commands/sync.d.ts +10 -0
package/dist/commands/sync.js +10 -0
package/dist/commands/vet-list.js +3 -19
package/dist/commands/vet.js +18 -25
package/dist/commands/with-scout.d.ts +32 -0
package/dist/commands/with-scout.js +41 -0
package/dist/core/anti-llm-policy.js +5 -33
package/dist/core/bootstrap.d.ts +2 -2
package/dist/core/bootstrap.js +5 -9
package/dist/core/errors.d.ts +10 -0
package/dist/core/errors.js +20 -5
package/dist/core/feature-discovery.d.ts +13 -1
package/dist/core/feature-discovery.js +104 -81
package/dist/core/gist-state-store.d.ts +13 -12
package/dist/core/gist-state-store.js +128 -53
package/dist/core/http-cache.d.ts +32 -2
package/dist/core/http-cache.js +74 -19
package/dist/core/issue-discovery.d.ts +12 -1
package/dist/core/issue-discovery.js +94 -67
package/dist/core/issue-eligibility.d.ts +11 -4
package/dist/core/issue-eligibility.js +124 -69
package/dist/core/issue-graphql.d.ts +58 -0
package/dist/core/issue-graphql.js +108 -0
package/dist/core/issue-vetting.d.ts +115 -9
package/dist/core/issue-vetting.js +246 -109
package/dist/core/local-state.d.ts +6 -2
package/dist/core/local-state.js +23 -5
package/dist/core/logger.d.ts +12 -4
package/dist/core/logger.js +33 -7
package/dist/core/personalization.d.ts +30 -10
package/dist/core/personalization.js +64 -24
package/dist/core/preference-fields.d.ts +47 -0
package/dist/core/preference-fields.js +180 -0
package/dist/core/probe-repo-file.d.ts +47 -0
package/dist/core/probe-repo-file.js +57 -0
package/dist/core/repo-health.js +40 -32
package/dist/core/roadmap.js +26 -22
package/dist/core/schemas.d.ts +148 -26
package/dist/core/schemas.js +83 -17
package/dist/core/search-budget.d.ts +9 -0
package/dist/core/search-budget.js +36 -3
package/dist/core/search-phases.d.ts +4 -21
package/dist/core/search-phases.js +37 -89
package/dist/core/types.d.ts +151 -38
package/dist/core/utils.js +60 -26
package/dist/formatters/human.d.ts +60 -0
package/dist/formatters/human.js +199 -0
package/dist/formatters/markdown.d.ts +10 -0
package/dist/formatters/markdown.js +31 -0
package/dist/index.d.ts +6 -2
package/dist/index.js +8 -0
package/dist/scout.d.ts +75 -12
package/dist/scout.js +265 -26
package/package.json +1 -1

package/dist/core/search-phases.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { Octokit } from "@octokit/rest";
 import { type SearchPriority, type IssueCandidate, type IssueScope } from "./types.js";
 import { type GitHubSearchItem } from "./issue-filtering.js";
 import { IssueVetter } from "./issue-vetting.js";
+import { type SearchBudgetTracker } from "./search-budget.js";
 /** Resolve scope tiers into a flat label list, merged with custom labels. */
 export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
 /** Round-robin interleave multiple arrays. */
@@ -22,7 +23,7 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
     sort: "created" | "updated" | "comments" | "reactions" | "interactions";
     order: "asc" | "desc";
     per_page: number;
-}): Promise<{
+}, tracker?: SearchBudgetTracker): Promise<{
     total_count: number;
     items: GitHubSearchItem[];
 }>;
@@ -60,7 +61,7 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
  * @param buildQuery   Callback that receives a label query string and returns the full search query
  * @param perPage      Number of results per API call
  */
-export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
+export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
 /**
  * Build per-call language qualifier strings, fanning out across languages
  * when a multi-language + labels combination would trip GitHub Search's
@@ -84,7 +85,7 @@ export declare function buildLanguageVariants(languages: string[], isAnyLanguage
  *                        e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
  * @param perPage         Results per API call
  */
-export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
+export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
 /**
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.
@@ -94,21 +95,3 @@ export declare function filterVetAndScore(vetter: IssueVetter, items: GitHubSear
     allVetFailed: boolean;
     rateLimitHit: boolean;
 }>;
-/**
- * Search for issues within specific repos using batched queries.
- *
- * To avoid GitHub's secondary rate limit (30 requests/minute), we batch
- * multiple repos into a single search query using OR syntax:
- *   repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
- *
- * Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
- * Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
- * budget is used for label OR operators.
- *
- * This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
- */
-export declare function searchInRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], baseQualifiers: string, labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
-    candidates: IssueCandidate[];
-    allBatchesFailed: boolean;
-    rateLimitHit: boolean;
-}>;

package/dist/core/search-phases.js CHANGED Viewed

@@ -7,10 +7,10 @@
 import { SCOPE_LABELS, } from "./types.js";
 import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
 import { debug, warn } from "./logger.js";
-import { getHttpCache } from "./http-cache.js";
+import { getHttpCache, versionedCacheKey } from "./http-cache.js";
 import { detectLabelFarmingRepos, } from "./issue-filtering.js";
 import { extractRepoFromUrl, sleep } from "./utils.js";
-import { getSearchBudgetTracker } from "./search-budget.js";
+import { getSearchBudgetTracker, } from "./search-budget.js";
 const MODULE = "search-phases";
 /** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
 const GITHUB_MAX_BOOLEAN_OPS = 5;
@@ -18,8 +18,6 @@ const GITHUB_MAX_BOOLEAN_OPS = 5;
  * Set to 2000ms as a safety floor (max 30/min at the limit). The SearchBudgetTracker
  * adds additional adaptive delays when needed. */
 const INTER_QUERY_DELAY_MS = 2000;
-/** Batch size for repo queries. 3 repos = 2 OR operators, leaving room for labels. */
-const BATCH_SIZE = 3;
 /**
  * Chunk labels into groups that fit within the operator budget.
  * N labels require N-1 OR operators, so maxPerChunk = budget + 1.
@@ -77,14 +75,6 @@ export function interleaveArrays(arrays) {
     }
     return result;
 }
-/** Split repos into batches of the specified size. */
-function batchRepos(repos, batchSize) {
-    const batches = [];
-    for (let i = 0; i < repos.length; i += batchSize) {
-        batches.push(repos.slice(i, i + batchSize));
-    }
-    return batches;
-}
 // ── Search caching ──
 /** TTL for cached search API results (15 minutes). */
 const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
@@ -93,8 +83,12 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
  * Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
  * without consuming GitHub API rate limit points.
  */
-export async function cachedSearchIssues(octokit, params) {
-    const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
+export async function cachedSearchIssues(octokit, params,
+// Optional injected budget tracker. Defaults to the shared singleton so
+// existing callers keep the exact same global budget accounting; a host
+// serving concurrent searches can inject a per-search tracker for isolation.
+tracker = getSearchBudgetTracker()) {
+    const cacheKey = versionedCacheKey(`search:${params.q}:${params.sort}:${params.order}:${params.per_page}`);
     const cache = getHttpCache();
     // Check cache first
     const cached = cache.getIfFresh(cacheKey, SEARCH_CACHE_TTL_MS);
@@ -103,7 +97,6 @@ export async function cachedSearchIssues(octokit, params) {
         return cached;
     }
     // Fetch from API
-    const tracker = getSearchBudgetTracker();
     await tracker.waitForBudget();
     let data;
     try {
@@ -206,17 +199,32 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
         const repoFullName = repos[i];
         const [owner, repo] = repoFullName.split("/");
         try {
-            const response = await octokit.issues.listForRepo({
-                owner,
-                repo,
-                state: "open",
-                sort: "created",
-                direction: "desc",
-                per_page: 5,
-                ...(labels.length > 0 ? { labels: labels.join(",") } : {}),
-            });
+            // One query per label: the REST `labels` parameter is AND semantics
+            // (issues carrying ALL listed labels), so a comma-joined list like
+            // "good first issue,help wanted" returned ~nothing (#118). Querying
+            // per label and merging restores the intended any-of behavior.
+            const labelFilters = labels.length > 0 ? labels : [undefined];
+            const seenUrls = new Set();
+            const rawIssues = [];
+            for (const label of labelFilters) {
+                const response = await octokit.issues.listForRepo({
+                    owner,
+                    repo,
+                    state: "open",
+                    sort: "created",
+                    direction: "desc",
+                    per_page: 5,
+                    ...(label !== undefined ? { labels: label } : {}),
+                });
+                for (const issue of response.data) {
+                    if (seenUrls.has(issue.html_url))
+                        continue;
+                    seenUrls.add(issue.html_url);
+                    rawIssues.push(issue);
+                }
+            }
             // Filter out pull requests (REST issues endpoint returns both) and assigned issues
-            const issuesOnly = response.data.filter((item) => !("pull_request" in item) && !item.assignee);
+            const issuesOnly = rawIssues.filter((item) => !("pull_request" in item) && !item.assignee);
             const mapped = issuesOnly.map((issue) => ({
                 html_url: issue.html_url,
                 repository_url: `https://api.github.com/repos/${repoFullName}`,
@@ -268,7 +276,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
  * @param buildQuery   Callback that receives a label query string and returns the full search query
  * @param perPage      Number of results per API call
  */
-export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage) {
+export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage, tracker = getSearchBudgetTracker()) {
     const labelChunks = chunkLabels(labels, reservedOps);
     const seenUrls = new Set();
     const allItems = [];
@@ -281,7 +289,7 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
             sort: "created",
             order: "desc",
             per_page: perPage,
-        });
+        }, tracker);
         for (const item of data.items) {
             if (!seenUrls.has(item.html_url)) {
                 seenUrls.add(item.html_url);
@@ -322,7 +330,7 @@ export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
  *                        e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
  * @param perPage         Results per API call
  */
-export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
+export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage, tracker = getSearchBudgetTracker()) {
     const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
     const seenUrls = new Set();
     const allItems = [];
@@ -331,7 +339,7 @@ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLa
             await sleep(INTER_QUERY_DELAY_MS);
         const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
             .replace(/  +/g, " ")
-            .trim(), perPage);
+            .trim(), perPage, tracker);
         for (const item of items) {
             if (!seenUrls.has(item.html_url)) {
                 seenUrls.add(item.html_url);
@@ -378,63 +386,3 @@ export async function filterVetAndScore(vetter, items, filterIssues, excludedRep
     }
     return { candidates: starFiltered, allVetFailed, rateLimitHit };
 }
-/**
- * Search for issues within specific repos using batched queries.
- *
- * To avoid GitHub's secondary rate limit (30 requests/minute), we batch
- * multiple repos into a single search query using OR syntax:
- *   repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
- *
- * Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
- * Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
- * budget is used for label OR operators.
- *
- * This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
- */
-export async function searchInRepos(octokit, vetter, repos, baseQualifiers, labels, maxResults, priority, filterFn) {
-    const candidates = [];
-    const batches = batchRepos(repos, BATCH_SIZE);
-    let failedBatches = 0;
-    let rateLimitFailures = 0;
-    for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
-        const batch = batches[batchIdx];
-        if (candidates.length >= maxResults)
-            break;
-        // Delay between batches to avoid secondary rate limits
-        if (batchIdx > 0)
-            await sleep(INTER_QUERY_DELAY_MS);
-        try {
-            const repoFilter = batch.map((r) => `repo:${r}`).join(" OR ");
-            const repoOps = batch.length - 1;
-            const perPage = Math.min(30, (maxResults - candidates.length) * 3);
-            const allItems = await searchWithChunkedLabels(octokit, labels, repoOps, (labelQ) => `${baseQualifiers} ${labelQ} (${repoFilter})`
-                .replace(/  +/g, " ")
-                .trim(), perPage);
-            if (allItems.length > 0) {
-                const filtered = filterFn(allItems);
-                const remainingNeeded = maxResults - candidates.length;
-                const { candidates: vetted, rateLimitHit: vetRateLimitHit } = await vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, priority);
-                candidates.push(...vetted);
-                if (vetRateLimitHit)
-                    rateLimitFailures++;
-            }
-        }
-        catch (error) {
-            if (getHttpStatusCode(error) === 401)
-                throw error;
-            failedBatches++;
-            if (isRateLimitError(error)) {
-                rateLimitFailures++;
-            }
-            const batchReposStr = batch.join(", ");
-            warn(MODULE, `Error searching issues in batch [${batchReposStr}]:`, errorMessage(error));
-        }
-    }
-    const allBatchesFailed = failedBatches === batches.length && batches.length > 0;
-    const rateLimitHit = rateLimitFailures > 0;
-    if (allBatchesFailed) {
-        warn(MODULE, `All ${batches.length} batch(es) failed for ${priority} phase. ` +
-            `This may indicate a systemic issue (rate limit, auth, network).`);
-    }
-    return { candidates, allBatchesFailed, rateLimitHit };
-}

package/dist/core/types.d.ts CHANGED Viewed

@@ -2,9 +2,10 @@
  * Core types for oss-scout — ephemeral types that are never persisted.
  */
 import type { RepoSignals, TrackedIssue, IssueVettingResult, IssueScope, ScoutState, SearchStrategy } from "./schemas.js";
+import type { LogLevel } from "./logger.js";
 export type { ProjectCategory, IssueScope, RepoSignals, RepoScore, StoredMergedPR, StoredClosedPR, ContributionGuidelines, IssueVettingResult, LinkedPR, TrackedIssue, ScoutPreferences, SavedCandidate, ScoutState, SearchStrategy, } from "./schemas.js";
-/** Health snapshot of a GitHub repository. */
-export interface ProjectHealth {
+/** A successful health snapshot of a GitHub repository. */
+export interface ProjectHealthData {
     repo: string;
     lastCommitAt: string;
     daysSinceLastCommit: number;
@@ -15,9 +16,26 @@ export interface ProjectHealth {
     stargazersCount?: number;
     forksCount?: number;
     language?: string | null;
-    checkFailed?: boolean;
-    failureReason?: string;
+    /** Discriminant: a real snapshot is never `checkFailed`. */
+    checkFailed?: false;
+    failureReason?: undefined;
 }
+/**
+ * The health check itself failed (transient API error). Only the repo and the
+ * failure reason are known — none of the snapshot fields are meaningful, so the
+ * type does not carry them. Narrow on `checkFailed` to reach a real snapshot.
+ */
+export interface ProjectHealthFailure {
+    repo: string;
+    checkFailed: true;
+    failureReason: string;
+}
+/**
+ * Health snapshot of a GitHub repository, or a marker that the check failed.
+ * A discriminated union (on `checkFailed`) so the "failure" shape can't be read
+ * as if it carried real snapshot data. Narrow before reading snapshot fields.
+ */
+export type ProjectHealth = ProjectHealthData | ProjectHealthFailure;
 /** Priority tier for issue search results. */
 export type SearchPriority = "merged_pr" | "starred" | "normal";
 /** Source file the anti-LLM policy match came from, or null when no file matched. */
@@ -43,6 +61,13 @@ export interface SLMTriageSummary {
 /** A fully vetted issue candidate with scoring. */
 export interface IssueCandidate {
     issue: TrackedIssue;
+    /**
+     * GitHub issue state at vet time (#120). GitHub answers 200 for closed
+     * issues, so without this vet-list classified them still_available and
+     * --prune kept them. Optional: cached candidates from older versions
+     * lack it and read as open.
+     */
+    issueState?: "open" | "closed";
     vettingResult: IssueVettingResult;
     projectHealth: ProjectHealth;
     antiLLMPolicy: AntiLLMPolicyResult;
@@ -54,26 +79,22 @@ export interface IssueCandidate {
     viabilityScore: number;
     searchPriority: SearchPriority;
     /**
-     * Personalization sort tier (#1244). Populated only when the caller
-     * passes `preferLanguages` / `preferRepos` to `search()` *and* the
-     * candidate matches at least one. Affects sort order between the
-     * `recommendation` tier and `viabilityScore`; never used as a filter.
-     */
-    boostScore?: number;
-    /**
-     * Human-readable reasons the candidate matched personalization bias
-     * (#1244). Mirrors `reasonsToApprove`/`reasonsToSkip` shape for
-     * symmetry with the existing surface.
-     */
-    boostReasons?: string[];
-    /**
-     * Marks a candidate that filled a reserved diversity slot (#1244).
-     * Populated only when `diversityRatio > 0` was passed AND the
-     * candidate matched no personalization bias. Mutually exclusive with
-     * a non-zero `boostScore` (a candidate cannot be both biased-toward
-     * and a diversity slot in the same result set).
+     * Personalization marker (#1244). A candidate is EITHER boosted (it matched
+     * a `preferLanguages` / `preferRepos` bias and gets a soft sort boost between
+     * the `recommendation` tier and `viabilityScore`) OR a diversity slot (it
+     * matched no bias and filled a slot reserved by `diversityRatio`) — never
+     * both. Modelling it as a single discriminated field makes that mutual
+     * exclusivity structural instead of prose across three optional fields.
+     * Absent when no personalization was requested or the candidate matched
+     * nothing.
      */
-    diversitySlot?: boolean;
+    personalization?: {
+        kind: "boosted";
+        score: number;
+        reasons: string[];
+    } | {
+        kind: "diversity";
+    };
 }
 /** Subset of RepoScore fields that callers may update. */
 export interface RepoScoreUpdate {
@@ -85,29 +106,51 @@ export interface RepoScoreUpdate {
     stargazersCount?: number;
     language?: string | null;
 }
-/** Result of a check (e.g., no existing PR, not claimed). */
-export interface CheckResult {
+/**
+ * Result of a check (e.g., no existing PR, not claimed). Discriminated on
+ * `inconclusive`: a `reason` exists only when the check could not be completed
+ * (a transient API error), and an inconclusive check always reports `passed:
+ * true` because the caller assumes the issue is still eligible. A conclusive
+ * result carries no `reason`.
+ */
+export type CheckResult = {
     passed: boolean;
-    inconclusive?: boolean;
-    reason?: string;
-}
+    inconclusive?: false;
+    reason?: undefined;
+} | {
+    passed: true;
+    inconclusive: true;
+    reason: string;
+};
 export declare const SCOPE_LABELS: Record<IssueScope, string[]>;
 /** Options for batch vetting saved results. */
 export interface VetListOptions {
     concurrency?: number;
     prune?: boolean;
 }
-/** A single entry in the vet-list result. */
-export interface VetListEntry {
+/** Identity fields shared by every vet-list entry, regardless of outcome. */
+export interface VetListEntryBase {
     issueUrl: string;
     repo: string;
     number: number;
     title: string;
     status: "still_available" | "claimed" | "closed" | "has_pr" | "error";
-    recommendation?: "approve" | "skip" | "needs_review";
-    viabilityScore?: number;
-    errorMessage?: string;
 }
+/**
+ * A single entry in the vet-list result. Discriminated on `ok`: a completed vet
+ * (`ok: true`) carries `recommendation` + `viabilityScore` and never an
+ * `errorMessage`; a vet that threw (`ok: false`, including a 404/410 that
+ * classifies the issue as `closed`) carries only the `errorMessage`. This makes
+ * the "score xor error" invariant structural instead of prose.
+ */
+export type VetListEntry = (VetListEntryBase & {
+    ok: true;
+    recommendation: "approve" | "skip" | "needs_review";
+    viabilityScore: number;
+}) | (VetListEntryBase & {
+    ok: false;
+    errorMessage: string;
+});
 /** Summary counts for a vet-list run. */
 export interface VetListSummary {
     total: number;
@@ -117,27 +160,69 @@ export interface VetListSummary {
     hasPR: number;
     errors: number;
 }
+/** Result of reconciling tracked open PRs against their current GitHub state (#164). */
+export interface SyncResult {
+    /** Open PRs checked. */
+    checked: number;
+    /** Transitioned to merged. */
+    merged: number;
+    /** Transitioned to closed-without-merge. */
+    closed: number;
+    /** Still open (kept). */
+    stillOpen: number;
+    /** Could not be checked (parse failure or transient API error). */
+    errors: number;
+}
+/** A saved result whose availability status changed since the last vet-list (#165). */
+export interface VetStatusTransition {
+    issueUrl: string;
+    repo: string;
+    number: number;
+    from: VetListEntry["status"];
+    to: VetListEntry["status"];
+}
 /** Result of a batch vet-list operation. */
 export interface VetListResult {
     results: VetListEntry[];
     summary: VetListSummary;
     prunedCount?: number;
+    /**
+     * Status changes since the previous vet-list run, computed from each saved
+     * result's `lastStatus`. Empty on a first run (no prior status to compare).
+     */
+    transitions: VetStatusTransition[];
 }
 /** Configuration for creating an OssScout instance. */
 export type ScoutConfig = {
-    /** GitHub token with `repo` read scope. Add `gist` scope for persistence. */
+    /** GitHub token with `repo` read scope. Add `gist` scope for gist persistence. */
     githubToken: string;
-    /** Use gist-backed persistence (default for standalone CLI). */
-    persistence?: "gist";
-    /** Gist ID override. Skips gist discovery/creation if provided. */
+    /**
+     * State storage. Omitted defaults to `"local"`: load and persist
+     * `~/.oss-scout/state.json`, no network on construct. `"gist"` syncs
+     * via a private GitHub gist (needs the `gist` token scope).
+     */
+    persistence?: "local" | "gist";
+    /** Gist ID override (gist mode). Skips gist discovery/creation if provided. */
     gistId?: string;
+    /**
+     * Minimum log level emitted to stderr. Omitted leaves the global level
+     * (default "info"). Hosts that don't want the "[INFO] Phase 0..."
+     * chatter can pass "warn" or "silent" (#156).
+     */
+    logLevel?: LogLevel;
 } | {
     /** GitHub token with `repo` read scope. */
     githubToken: string;
-    /** Caller provides state directly. */
+    /** Caller provides and owns state directly (embedding hosts). */
     persistence: "provided";
     /** Pre-loaded state. Required when persistence is 'provided'. */
     initialState: ScoutState;
+    /**
+     * Minimum log level emitted to stderr. Omitted leaves the global level
+     * (default "info"). Hosts that don't want the "[INFO] Phase 0..."
+     * chatter can pass "warn" or "silent" (#156).
+     */
+    logLevel?: LogLevel;
 };
 /** Options for the search method. */
 export interface SearchOptions {
@@ -158,6 +243,21 @@ export interface SearchOptions {
      * disables the boost.
      */
     preferRepos?: string[];
+    /**
+     * Per-call personalization bias: a SOFT penalty (milder than the hard
+     * `excludeRepos` filter) for candidates in one of these `owner/repo` slugs
+     * (#168). They are pushed below equally-recommended non-matches but not
+     * removed; a strong boost can still outweigh the penalty. Empty / undefined
+     * disables it.
+     */
+    avoidRepos?: string[];
+    /**
+     * Per-call personalization bias: a soft boost for candidates whose issue
+     * labels match one of these types, case-insensitive (e.g. "bug",
+     * "good first issue") (#168). Same tier as a language match. Does not filter
+     * results, does not change `viabilityScore`. Empty / undefined disables it.
+     */
+    boostIssueTypes?: string[];
     /**
      * Counterweight against echo-chamber bias as `preferLanguages` /
      * `preferRepos` boosts accumulate over time (#1244). A value of 0.2
@@ -168,6 +268,19 @@ export interface SearchOptions {
      * clamped to [0, 1].
      */
     diversityRatio?: number;
+    /**
+     * Per-call override for the delay between search phases (ms). Defaults to
+     * the `interPhaseDelayMs` preference (30s). Latency-sensitive callers like
+     * the MCP server pass 0; the sliding-window budget tracker still paces the
+     * actual API calls, so the fixed sleep is the only thing removed (#143).
+     */
+    interPhaseDelayMs?: number;
+    /**
+     * Per-call override for the extra cooldown before the broad phase (ms).
+     * Defaults to the `broadPhaseDelayMs` preference (90s). See
+     * `interPhaseDelayMs` for the rationale (#143).
+     */
+    broadPhaseDelayMs?: number;
 }
 /** Result of a search operation. */
 export interface SearchResult {

package/dist/core/utils.js CHANGED Viewed

@@ -36,14 +36,29 @@ export function getCacheDir() {
  * - https://api.github.com/repos/owner/repo/...
  */
 export function extractRepoFromUrl(url) {
+    // Real URL parsing: the previous regexes were unanchored (any host
+    // containing "github.com" matched) and leaked query/fragment text into
+    // the repo segment ("repo?tab=readme").
+    let parsed;
+    try {
+        parsed = new URL(url);
+    }
+    catch {
+        return null;
+    }
+    const host = parsed.hostname.toLowerCase().replace(/^www\./, "");
+    const segments = parsed.pathname.split("/").filter(Boolean);
     // API URLs: https://api.github.com/repos/owner/repo[/...]
-    const apiMatch = url.match(/api\.github\.com\/repos\/([^/]+\/[^/]+)/);
-    if (apiMatch)
-        return apiMatch[1];
+    if (host === "api.github.com") {
+        if (segments[0] === "repos" && segments.length >= 3) {
+            return `${segments[1]}/${segments[2]}`;
+        }
+        return null;
+    }
     // Web URLs: https://github.com/owner/repo[/...]
-    const webMatch = url.match(/github\.com\/([^/]+\/[^/]+)/);
-    if (webMatch)
-        return webMatch[1];
+    if (host === "github.com" && segments.length >= 2) {
+        return `${segments[0]}/${segments[1]}`;
+    }
     return null;
 }
 const OWNER_PATTERN = /^[a-zA-Z0-9_-]+$/;
@@ -52,25 +67,38 @@ function isValidOwnerRepo(owner, repo) {
     return OWNER_PATTERN.test(owner) && REPO_PATTERN.test(repo);
 }
 export function parseGitHubUrl(url) {
-    if (!url.startsWith("https://github.com/"))
-        return null;
-    const prMatch = url.match(/github\.com\/([^/]+)\/([^/]+)\/pull\/(\d+)/);
-    if (prMatch) {
-        const owner = prMatch[1];
-        const repo = prMatch[2];
-        if (!isValidOwnerRepo(owner, repo))
-            return null;
-        return { owner, repo, number: parseInt(prMatch[3], 10), type: "pull" };
+    // Accept pasteable variants: http://, www., and bare github.com/... forms
+    // normalize to a parseable URL. Strict canonical-form validation for
+    // command input lives in commands/validation.ts; this parser is lenient.
+    const normalized = /^(?:www\.)?github\.com\//i.test(url)
+        ? `https://${url}`
+        : url;
+    let parsed;
+    try {
+        parsed = new URL(normalized);
     }
-    const issueMatch = url.match(/github\.com\/([^/]+)\/([^/]+)\/issues\/(\d+)/);
-    if (issueMatch) {
-        const owner = issueMatch[1];
-        const repo = issueMatch[2];
-        if (!isValidOwnerRepo(owner, repo))
-            return null;
-        return { owner, repo, number: parseInt(issueMatch[3], 10), type: "issues" };
+    catch {
+        return null;
     }
-    return null;
+    if (parsed.protocol !== "https:" && parsed.protocol !== "http:")
+        return null;
+    const host = parsed.hostname.toLowerCase().replace(/^www\./, "");
+    if (host !== "github.com")
+        return null;
+    // Exactly owner/repo/(pull|issues)/<digits>; trailing slash tolerated via
+    // filter(Boolean), query/fragment excluded by pathname. A malformed number
+    // segment ("123abc") no longer half-parses to 123.
+    const segments = parsed.pathname.split("/").filter(Boolean);
+    if (segments.length !== 4)
+        return null;
+    const [owner, repo, type, num] = segments;
+    if (type !== "pull" && type !== "issues")
+        return null;
+    if (!isValidOwnerRepo(owner, repo))
+        return null;
+    if (!/^\d+$/.test(num))
+        return null;
+    return { owner, repo, number: parseInt(num, 10), type };
 }
 export function daysBetween(from, to = new Date()) {
     return Math.max(0, Math.floor((to.getTime() - from.getTime()) / (1000 * 60 * 60 * 24)));
@@ -91,8 +119,12 @@ export function getGitHubToken() {
     if (tokenFetchAttempted)
         return null;
     tokenFetchAttempted = true;
-    if (process.env.GITHUB_TOKEN) {
-        cachedGitHubToken = process.env.GITHUB_TOKEN;
+    // Trim: a trailing newline (e.g. GITHUB_TOKEN=$(cat file)) produces a
+    // malformed Authorization header with confusing 401s. A whitespace-only
+    // value falls through to the gh CLI.
+    const envToken = process.env.GITHUB_TOKEN?.trim();
+    if (envToken) {
+        cachedGitHubToken = envToken;
         return cachedGitHubToken;
     }
     try {
@@ -108,7 +140,9 @@ export function getGitHubToken() {
         }
     }
     catch (err) {
-        debug(MODULE, "gh auth token failed", err);
+        // Log only the message: the raw execFileSync error carries stdout/stderr
+        // buffers that could include a token if gh half-succeeded.
+        debug(MODULE, `gh auth token failed: ${errorMessage(err)}`);
     }
     return null;
 }