npm - @oss-scout/core - Versions diffs - 1.0.0 → 1.2.0 - Mend

@oss-scout/core 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/dist/cli.bundle.cjs +70 -64
package/dist/cli.js +19 -129
package/dist/commands/search.d.ts +4 -0
package/dist/commands/search.js +2 -0
package/dist/core/anti-llm-policy.js +3 -30
package/dist/core/issue-discovery.d.ts +10 -1
package/dist/core/issue-discovery.js +83 -48
package/dist/core/issue-eligibility.d.ts +2 -1
package/dist/core/issue-eligibility.js +6 -3
package/dist/core/issue-vetting.d.ts +10 -1
package/dist/core/issue-vetting.js +12 -2
package/dist/core/personalization.d.ts +27 -12
package/dist/core/personalization.js +50 -18
package/dist/core/preference-fields.js +2 -0
package/dist/core/probe-repo-file.d.ts +47 -0
package/dist/core/probe-repo-file.js +57 -0
package/dist/core/repo-health.js +9 -17
package/dist/core/roadmap.js +11 -21
package/dist/core/schemas.d.ts +4 -0
package/dist/core/schemas.js +9 -0
package/dist/core/search-phases.d.ts +5 -4
package/dist/core/search-phases.js +12 -9
package/dist/core/types.d.ts +15 -0
package/dist/formatters/human.d.ts +60 -0
package/dist/formatters/human.js +199 -0
package/dist/scout.d.ts +24 -10
package/dist/scout.js +29 -14
package/package.json +1 -1

package/dist/core/issue-vetting.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@
 import { Octokit } from "@octokit/rest";
 import { type SearchPriority, type IssueCandidate, type ProjectCategory, type ScoutPreferences, type ScoutState, type MergedPRRecord, type ClosedPRRecord, type OpenPRRecord } from "./types.js";
 import { type PrefetchedIssueCore } from "./issue-graphql.js";
+import { type SearchBudgetTracker } from "./search-budget.js";
 /**
  * Feature-mode signals supplied by the caller (orchestrator) — the vetter
  * does NOT extract these from the GitHub issue itself. When passed, they
@@ -142,7 +143,15 @@ export declare function deriveRecommendation(input: RecommendationInput): Recomm
 export declare class IssueVetter {
     private octokit;
     private stateReader;
-    constructor(octokit: Octokit, stateReader: ScoutStateReader);
+    private budgetTracker;
+    /**
+     * @param octokit      - Authenticated Octokit instance
+     * @param stateReader  - Read-only scout state interface
+     * @param budgetTracker - Search budget tracker. Defaults to the shared
+     *   singleton so existing callers behave identically; inject a per-search
+     *   instance to isolate budget accounting in a long-lived concurrent host.
+     */
+    constructor(octokit: Octokit, stateReader: ScoutStateReader, budgetTracker?: SearchBudgetTracker);
     /**
      * Vet a specific issue — runs all checks and computes recommendation + viability score.
      * Results are cached for 15 minutes to avoid redundant API calls on repeated searches.

package/dist/core/issue-vetting.js CHANGED Viewed

@@ -16,6 +16,7 @@ import { checkProjectHealth, fetchContributionGuidelines, } from "./repo-health.
 import { fetchAndScanAntiLLMPolicy } from "./anti-llm-policy.js";
 import { prefetchIssueCores, issueCoreKey, } from "./issue-graphql.js";
 import { getHttpCache, versionedCacheKey } from "./http-cache.js";
+import { getSearchBudgetTracker, } from "./search-budget.js";
 import { triageWithSLM, buildTriageInput, } from "./slm-triage.js";
 const MODULE = "issue-vetting";
 /** Vetting concurrency: kept low to reduce burst pressure on GitHub's secondary rate limit. */
@@ -119,9 +120,18 @@ export function deriveRecommendation(input) {
 export class IssueVetter {
     octokit;
     stateReader;
-    constructor(octokit, stateReader) {
+    budgetTracker;
+    /**
+     * @param octokit      - Authenticated Octokit instance
+     * @param stateReader  - Read-only scout state interface
+     * @param budgetTracker - Search budget tracker. Defaults to the shared
+     *   singleton so existing callers behave identically; inject a per-search
+     *   instance to isolate budget accounting in a long-lived concurrent host.
+     */
+    constructor(octokit, stateReader, budgetTracker = getSearchBudgetTracker()) {
         this.octokit = octokit;
         this.stateReader = stateReader;
+        this.budgetTracker = budgetTracker;
     }
     /**
      * Vet a specific issue — runs all checks and computes recommendation + viability score.
@@ -170,7 +180,7 @@ export class IssueVetter {
             fetchContributionGuidelines(this.octokit, owner, repo),
             hasMergedPRsInRepo
                 ? Promise.resolve(0)
-                : checkUserMergedPRsInRepo(this.octokit, owner, repo),
+                : checkUserMergedPRsInRepo(this.octokit, owner, repo, this.budgetTracker),
         ]);
         // Anti-LLM scan reuses the CONTRIBUTING text just fetched above —
         // dedup'd to avoid 4 redundant getContent calls on cold-cache repos.

package/dist/core/personalization.d.ts CHANGED Viewed

@@ -27,24 +27,39 @@ import type { IssueCandidate } from "./types.js";
  */
 export declare const REPO_BOOST = 20;
 export declare const LANGUAGE_BOOST = 10;
+/** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
+export declare const ISSUE_TYPE_BOOST = 10;
 /**
- * The personalization sort weight of a candidate: its boost score, or 0 when it
- * is not boosted (unboosted or a diversity slot). Reads the structural
- * `personalization` field (#158) so callers never poke at the old loose
- * `boostScore` field.
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
+ * preferRepos affinity, +20) can still outweigh it.
+ */
+export declare const AVOID_PENALTY = 15;
+/** Per-call personalization bias lists (#168). All optional; empty = no effect. */
+export interface PersonalizationBias {
+    preferLanguages?: string[];
+    preferRepos?: string[];
+    avoidRepos?: string[];
+    boostIssueTypes?: string[];
+}
+/**
+ * The personalization sort weight of a candidate: its net score, or 0 when it
+ * carries no personalization marker. Reads the structural `personalization`
+ * field (#158). The score can be negative when avoidRepos applied (#168).
  */
 export declare function boostScoreOf(candidate: IssueCandidate): number;
 /**
- * Return a new candidate list where each candidate that matches a
- * caller-supplied preference carries `personalization: { kind: "boosted", ... }`.
- * Does NOT mutate the input candidates (#158) — matched candidates are shallow
- * copies with the field set; unmatched candidates are passed through unchanged.
- * The caller re-sorts the returned array.
+ * Return a new candidate list where each candidate matching a caller-supplied
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
+ * be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
+ * unmatched ones pass through unchanged.
  *
- * No-op when both preference lists are empty or undefined: the input array is
- * returned as-is and the sort tier collapses to 0 for every candidate.
+ * No-op when every bias list is empty/undefined: the input array is returned
+ * as-is and the sort tier collapses to 0 for every candidate.
  */
-export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): IssueCandidate[];
+export declare function annotateBoost(candidates: IssueCandidate[], bias?: PersonalizationBias): IssueCandidate[];
 /**
  * Apply a diversity-counterweight pass over a pre-sorted candidate list
  * (#1244). Returns the first `maxResults` picks in priority order:

package/dist/core/personalization.js CHANGED Viewed

@@ -26,36 +26,54 @@
  */
 export const REPO_BOOST = 20;
 export const LANGUAGE_BOOST = 10;
+/** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
+export const ISSUE_TYPE_BOOST = 10;
 /**
- * The personalization sort weight of a candidate: its boost score, or 0 when it
- * is not boosted (unboosted or a diversity slot). Reads the structural
- * `personalization` field (#158) so callers never poke at the old loose
- * `boostScore` field.
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
+ * preferRepos affinity, +20) can still outweigh it.
+ */
+export const AVOID_PENALTY = 15;
+/**
+ * The personalization sort weight of a candidate: its net score, or 0 when it
+ * carries no personalization marker. Reads the structural `personalization`
+ * field (#158). The score can be negative when avoidRepos applied (#168).
  */
 export function boostScoreOf(candidate) {
     return candidate.personalization?.kind === "boosted"
         ? candidate.personalization.score
         : 0;
 }
+function normalizeSet(values) {
+    return new Set((values ?? []).map((v) => v.trim().toLowerCase()).filter(Boolean));
+}
 /**
- * Return a new candidate list where each candidate that matches a
- * caller-supplied preference carries `personalization: { kind: "boosted", ... }`.
- * Does NOT mutate the input candidates (#158) — matched candidates are shallow
- * copies with the field set; unmatched candidates are passed through unchanged.
- * The caller re-sorts the returned array.
+ * Return a new candidate list where each candidate matching a caller-supplied
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
+ * be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
+ * unmatched ones pass through unchanged.
  *
- * No-op when both preference lists are empty or undefined: the input array is
- * returned as-is and the sort tier collapses to 0 for every candidate.
+ * No-op when every bias list is empty/undefined: the input array is returned
+ * as-is and the sort tier collapses to 0 for every candidate.
  */
-export function annotateBoost(candidates, preferLanguages, preferRepos) {
-    const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
-    const repoSet = new Set((preferRepos ?? []).map((r) => r.trim().toLowerCase()).filter(Boolean));
-    if (langSet.size === 0 && repoSet.size === 0)
+export function annotateBoost(candidates, bias = {}) {
+    const langSet = normalizeSet(bias.preferLanguages);
+    const repoSet = normalizeSet(bias.preferRepos);
+    const avoidSet = normalizeSet(bias.avoidRepos);
+    const typeSet = normalizeSet(bias.boostIssueTypes);
+    if (langSet.size === 0 &&
+        repoSet.size === 0 &&
+        avoidSet.size === 0 &&
+        typeSet.size === 0) {
         return candidates;
+    }
     return candidates.map((c) => {
         let score = 0;
         const reasons = [];
-        if (repoSet.size > 0 && repoSet.has(c.issue.repo.toLowerCase())) {
+        const repoLower = c.issue.repo.toLowerCase();
+        if (repoSet.size > 0 && repoSet.has(repoLower)) {
             score += REPO_BOOST;
             reasons.push(`repo affinity: ${c.issue.repo}`);
         }
@@ -64,7 +82,18 @@ export function annotateBoost(candidates, preferLanguages, preferRepos) {
             score += LANGUAGE_BOOST;
             reasons.push(`language match: ${lang}`);
         }
-        if (score === 0)
+        if (typeSet.size > 0) {
+            const matched = c.issue.labels.find((l) => typeSet.has(l.toLowerCase()));
+            if (matched) {
+                score += ISSUE_TYPE_BOOST;
+                reasons.push(`issue type: ${matched}`);
+            }
+        }
+        if (avoidSet.size > 0 && avoidSet.has(repoLower)) {
+            score -= AVOID_PENALTY;
+            reasons.push(`avoided repo: ${c.issue.repo}`);
+        }
+        if (reasons.length === 0)
             return c;
         return { ...c, personalization: { kind: "boosted", score, reasons } };
     });
@@ -116,7 +145,10 @@ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
             break;
         if (seen.has(c.issue.url))
             continue;
-        if (boostScoreOf(c) > 0)
+        // Diversity slots are for candidates that matched NO personalization bias.
+        // Exclude both boosted (>0) and avoided (<0) candidates — resurfacing an
+        // avoided repo via a diversity slot would defeat the avoid (#168).
+        if (boostScoreOf(c) !== 0)
             continue;
         // Tag a shallow copy rather than mutating the shared candidate (#158).
         picks.push({ ...c, personalization: { kind: "diversity" } });

package/dist/core/preference-fields.js CHANGED Viewed

@@ -36,6 +36,8 @@ export const FIELD_CONFIGS = {
     preferLanguages: { type: "array" },
     preferRepos: { type: "array" },
     diversityRatio: { type: "float" },
+    avoidRepos: { type: "array" },
+    boostIssueTypes: { type: "array" },
     slmTriageModel: { type: "string" },
     slmTriageHost: { type: "string" },
     featuresAnchorThreshold: { type: "number" },

package/dist/core/probe-repo-file.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * Single-path repo-file probe (#156).
+ *
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
+ * genuinely-shared primitive.
+ *
+ * The orchestration around it stays per-caller (parallel 4-path probe,
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
+ * shared.
+ *
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
+ * file absent) from a degraded miss (5xx, network) so callers can decide
+ * whether to cache a negative result or leave it open to retry. Collapsing the
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
+ * primitive must keep them separate.
+ */
+import type { Octokit } from "@octokit/rest";
+/**
+ * Result of probing one repo file path.
+ *
+ * - `text` — decoded UTF-8 content on a 200 with a file payload, else `null`
+ *   (404, a non-content payload such as a directory listing, or a soft error).
+ * - `transient` — `true` only when the miss was a degraded failure (5xx,
+ *   network) rather than a clean 404 / missing file. A `true` value means the
+ *   `null` may be incomplete and the caller should avoid caching it as a known
+ *   absence.
+ */
+export interface ProbeRepoFileResult {
+    text: string | null;
+    transient: boolean;
+}
+/**
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
+ * rate limit) so the caller's existing rate-limit handling sees them.
+ *
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
+ * a faster path may have already resolved) must inspect the rejected reasons
+ * themselves; this primitive only rethrows for the single path it owns. See
+ * repo-health and anti-llm-policy for that pre-scan.
+ */
+export declare function probeRepoFile(octokit: Octokit, owner: string, repo: string, path: string): Promise<ProbeRepoFileResult>;

package/dist/core/probe-repo-file.js ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Single-path repo-file probe (#156).
+ *
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
+ * genuinely-shared primitive.
+ *
+ * The orchestration around it stays per-caller (parallel 4-path probe,
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
+ * shared.
+ *
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
+ * file absent) from a degraded miss (5xx, network) so callers can decide
+ * whether to cache a negative result or leave it open to retry. Collapsing the
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
+ * primitive must keep them separate.
+ */
+import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
+import { warn } from "./logger.js";
+const MODULE = "probe-repo-file";
+/**
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
+ * rate limit) so the caller's existing rate-limit handling sees them.
+ *
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
+ * a faster path may have already resolved) must inspect the rejected reasons
+ * themselves; this primitive only rethrows for the single path it owns. See
+ * repo-health and anti-llm-policy for that pre-scan.
+ */
+export async function probeRepoFile(octokit, owner, repo, path) {
+    try {
+        const { data } = await octokit.repos.getContent({ owner, repo, path });
+        if (data &&
+            typeof data === "object" &&
+            "content" in data &&
+            typeof data.content === "string") {
+            return {
+                text: Buffer.from(data.content, "base64").toString("utf-8"),
+                transient: false,
+            };
+        }
+        return { text: null, transient: false };
+    }
+    catch (error) {
+        const status = getHttpStatusCode(error);
+        if (status === 404)
+            return { text: null, transient: false };
+        rethrowIfFatal(error);
+        warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
+        return { text: null, transient: true };
+    }
+}

package/dist/core/repo-health.js CHANGED Viewed

@@ -8,6 +8,7 @@ import { daysBetween } from "./utils.js";
 import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
 import { warn } from "./logger.js";
 import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
+import { probeRepoFile } from "./probe-repo-file.js";
 const MODULE = "repo-health";
 // ── Cache for contribution guidelines ──
 const guidelinesCache = new Map();
@@ -121,13 +122,11 @@ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
         "docs/CONTRIBUTING.md",
         "contributing.md",
     ];
-    // Probe all paths in parallel — take the first success in priority order
-    const results = await Promise.allSettled(filesToCheck.map((file) => octokit.repos.getContent({ owner, repo, path: file }).then(({ data }) => {
-        if ("content" in data) {
-            return Buffer.from(data.content, "base64").toString("utf-8");
-        }
-        return null;
-    })));
+    // Probe all paths in parallel — take the first success in priority order.
+    // probeRepoFile rethrows 401/rate-limit, so those still surface here as
+    // rejected results for the pre-scan below; 404s and 5xx come back as a null
+    // text (the primitive warns on 5xx, so no extra warn is needed here).
+    const results = await Promise.allSettled(filesToCheck.map((file) => probeRepoFile(octokit, owner, repo, file)));
     // Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
     // otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
     // wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
@@ -139,20 +138,13 @@ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
             throw result.reason;
         }
     }
-    for (let i = 0; i < results.length; i++) {
-        const result = results[i];
-        if (result.status === "fulfilled" && result.value) {
-            const guidelines = parseContributionGuidelines(result.value);
+    for (const result of results) {
+        if (result.status === "fulfilled" && result.value.text) {
+            const guidelines = parseContributionGuidelines(result.value.text);
             guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
             pruneCache();
             return guidelines;
         }
-        if (result.status === "rejected") {
-            const status = getHttpStatusCode(result.reason);
-            if (status !== 404) {
-                warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
-            }
-        }
     }
     // Cache the negative result too and prune if needed
     guidelinesCache.set(cacheKey, {

package/dist/core/roadmap.js CHANGED Viewed

@@ -10,9 +10,7 @@
  * Auth (401) and rate-limit errors propagate, matching the rest of the
  * codebase's error strategy. Other errors degrade gracefully (warn + empty).
  */
-import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
-import { warn } from "./logger.js";
-const MODULE = "roadmap";
+import { probeRepoFile } from "./probe-repo-file.js";
 /** TTL for roadmap fetch results (1 hour). */
 const CACHE_TTL_MS = 60 * 60 * 1000;
 /** Paths probed in priority order. First success wins. */
@@ -113,24 +111,16 @@ export async function fetchRoadmapIssueRefs(octokit, owner, repo) {
 const roadmapInflight = new Map();
 async function fetchRoadmapIssueRefsUncached(octokit, owner, repo, cacheKey) {
     for (const path of ROADMAP_PATHS) {
-        try {
-            const { data } = await octokit.repos.getContent({ owner, repo, path });
-            if (!("content" in data))
-                continue;
-            const content = Buffer.from(data.content, "base64").toString("utf-8");
-            const refs = parseRoadmapIssueRefs(content, owner, repo);
-            roadmapCache.set(cacheKey, { refs, fetchedAt: Date.now() });
-            pruneCache();
-            return refs;
-        }
-        catch (err) {
-            rethrowIfFatal(err);
-            const status = getHttpStatusCode(err);
-            if (status === 404)
-                continue; // path missing — try next
-            warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(err)}`);
-            // Fall through and try next path.
-        }
+        // probeRepoFile rethrows 401/rate-limit, treats 404 and non-content
+        // payloads as a null text, and warns on 5xx — all of which we degrade past
+        // by trying the next path.
+        const { text } = await probeRepoFile(octokit, owner, repo, path);
+        if (!text)
+            continue;
+        const refs = parseRoadmapIssueRefs(text, owner, repo);
+        roadmapCache.set(cacheKey, { refs, fetchedAt: Date.now() });
+        pruneCache();
+        return refs;
     }
     // No roadmap found (or all probes errored softly). Cache the empty result
     // so we don't re-probe every run.

package/dist/core/schemas.d.ts CHANGED Viewed

@@ -279,6 +279,8 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
     preferLanguages: z.ZodDefault<z.ZodArray<z.ZodString>>;
     preferRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
     diversityRatio: z.ZodDefault<z.ZodNumber>;
+    avoidRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
+    boostIssueTypes: z.ZodDefault<z.ZodArray<z.ZodString>>;
     broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
     /**
      * Skip the expensive broad phase once this many candidates were found by
@@ -359,6 +361,8 @@ export declare const ScoutStateSchema: z.ZodObject<{
         preferLanguages: z.ZodDefault<z.ZodArray<z.ZodString>>;
         preferRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
         diversityRatio: z.ZodDefault<z.ZodNumber>;
+        avoidRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
+        boostIssueTypes: z.ZodDefault<z.ZodArray<z.ZodString>>;
         broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
         /**
          * Skip the expensive broad phase once this many candidates were found by

package/dist/core/schemas.js CHANGED Viewed

@@ -42,6 +42,9 @@ export const CONCRETE_STRATEGIES = [
 // ── Leaf schemas ────────────────────────────────────────────────────
 export const RepoSignalsSchema = z.looseObject({
     hasActiveMaintainers: z.boolean(),
+    // Retained for backward compatibility but no longer affects the repo score
+    // (#167): nothing computes it, and hasActiveMaintainers is the live activity
+    // proxy. Kept so old persisted state and the search JSON output still parse.
     isResponsive: z.boolean(),
     hasHostileComments: z.boolean(),
 });
@@ -190,6 +193,12 @@ export const ScoutPreferencesSchema = z.looseObject({
     preferLanguages: z.array(z.string()).default([]),
     preferRepos: z.array(z.string()).default([]),
     diversityRatio: z.number().min(0).max(1).default(0),
+    // Soft penalty (milder than the hard excludeRepos filter): candidates in
+    // these `owner/repo` slugs are pushed down the ranking but not removed (#168).
+    avoidRepos: z.array(z.string()).default([]),
+    // Soft boost for candidates whose issue labels match one of these types,
+    // case-insensitive (e.g. "bug", "good first issue") (#168).
+    boostIssueTypes: z.array(z.string()).default([]),
     broadPhaseDelayMs: z.number().min(0).max(300000).default(90000),
     /**
      * Skip the expensive broad phase once this many candidates were found by

package/dist/core/search-phases.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { Octokit } from "@octokit/rest";
 import { type SearchPriority, type IssueCandidate, type IssueScope } from "./types.js";
 import { type GitHubSearchItem } from "./issue-filtering.js";
 import { IssueVetter } from "./issue-vetting.js";
+import { type SearchBudgetTracker } from "./search-budget.js";
 /** Resolve scope tiers into a flat label list, merged with custom labels. */
 export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
 /** Round-robin interleave multiple arrays. */
@@ -22,7 +23,7 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
     sort: "created" | "updated" | "comments" | "reactions" | "interactions";
     order: "asc" | "desc";
     per_page: number;
-}): Promise<{
+}, tracker?: SearchBudgetTracker): Promise<{
     total_count: number;
     items: GitHubSearchItem[];
 }>;
@@ -42,7 +43,7 @@ export declare function fetchIssuesFromMaintainedRepos(octokit: Octokit, repos:
  * calls `GET /repos/{owner}/{repo}/issues` which counts against the much
  * larger Core API rate limit and avoids consuming the scarce Search quota.
  */
-export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
+export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[], perPage?: number): Promise<{
     candidates: IssueCandidate[];
     allReposFailed: boolean;
     rateLimitHit: boolean;
@@ -60,7 +61,7 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
  * @param buildQuery   Callback that receives a label query string and returns the full search query
  * @param perPage      Number of results per API call
  */
-export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
+export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
 /**
  * Build per-call language qualifier strings, fanning out across languages
  * when a multi-language + labels combination would trip GitHub Search's
@@ -84,7 +85,7 @@ export declare function buildLanguageVariants(languages: string[], isAnyLanguage
  *                        e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
  * @param perPage         Results per API call
  */
-export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
+export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
 /**
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.

package/dist/core/search-phases.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { debug, warn } from "./logger.js";
 import { getHttpCache, versionedCacheKey } from "./http-cache.js";
 import { detectLabelFarmingRepos, } from "./issue-filtering.js";
 import { extractRepoFromUrl, sleep } from "./utils.js";
-import { getSearchBudgetTracker } from "./search-budget.js";
+import { getSearchBudgetTracker, } from "./search-budget.js";
 const MODULE = "search-phases";
 /** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
 const GITHUB_MAX_BOOLEAN_OPS = 5;
@@ -83,7 +83,11 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
  * Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
  * without consuming GitHub API rate limit points.
  */
-export async function cachedSearchIssues(octokit, params) {
+export async function cachedSearchIssues(octokit, params,
+// Optional injected budget tracker. Defaults to the shared singleton so
+// existing callers keep the exact same global budget accounting; a host
+// serving concurrent searches can inject a per-search tracker for isolation.
+tracker = getSearchBudgetTracker()) {
     const cacheKey = versionedCacheKey(`search:${params.q}:${params.sort}:${params.order}:${params.per_page}`);
     const cache = getHttpCache();
     // Check cache first
@@ -93,7 +97,6 @@ export async function cachedSearchIssues(octokit, params) {
         return cached;
     }
     // Fetch from API
-    const tracker = getSearchBudgetTracker();
     await tracker.waitForBudget();
     let data;
     try {
@@ -183,7 +186,7 @@ export async function fetchIssuesFromMaintainedRepos(octokit, repos, minStars, m
  * calls `GET /repos/{owner}/{repo}/issues` which counts against the much
  * larger Core API rate limit and avoids consuming the scarce Search quota.
  */
-export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn) {
+export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn, perPage = 5) {
     const candidates = [];
     let failedRepos = 0;
     let rateLimitFailures = 0;
@@ -210,7 +213,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
                     state: "open",
                     sort: "created",
                     direction: "desc",
-                    per_page: 5,
+                    per_page: perPage,
                     ...(label !== undefined ? { labels: label } : {}),
                 });
                 for (const issue of response.data) {
@@ -273,7 +276,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
  * @param buildQuery   Callback that receives a label query string and returns the full search query
  * @param perPage      Number of results per API call
  */
-export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage) {
+export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage, tracker = getSearchBudgetTracker()) {
     const labelChunks = chunkLabels(labels, reservedOps);
     const seenUrls = new Set();
     const allItems = [];
@@ -286,7 +289,7 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
             sort: "created",
             order: "desc",
             per_page: perPage,
-        });
+        }, tracker);
         for (const item of data.items) {
             if (!seenUrls.has(item.html_url)) {
                 seenUrls.add(item.html_url);
@@ -327,7 +330,7 @@ export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
  *                        e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
  * @param perPage         Results per API call
  */
-export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
+export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage, tracker = getSearchBudgetTracker()) {
     const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
     const seenUrls = new Set();
     const allItems = [];
@@ -336,7 +339,7 @@ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLa
             await sleep(INTER_QUERY_DELAY_MS);
         const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
             .replace(/  +/g, " ")
-            .trim(), perPage);
+            .trim(), perPage, tracker);
         for (const item of items) {
             if (!seenUrls.has(item.html_url)) {
                 seenUrls.add(item.html_url);

package/dist/core/types.d.ts CHANGED Viewed

@@ -243,6 +243,21 @@ export interface SearchOptions {
      * disables the boost.
      */
     preferRepos?: string[];
+    /**
+     * Per-call personalization bias: a SOFT penalty (milder than the hard
+     * `excludeRepos` filter) for candidates in one of these `owner/repo` slugs
+     * (#168). They are pushed below equally-recommended non-matches but not
+     * removed; a strong boost can still outweigh the penalty. Empty / undefined
+     * disables it.
+     */
+    avoidRepos?: string[];
+    /**
+     * Per-call personalization bias: a soft boost for candidates whose issue
+     * labels match one of these types, case-insensitive (e.g. "bug",
+     * "good first issue") (#168). Same tier as a language match. Does not filter
+     * results, does not change `viabilityScore`. Empty / undefined disables it.
+     */
+    boostIssueTypes?: string[];
     /**
      * Counterweight against echo-chamber bias as `preferLanguages` /
      * `preferRepos` boosts accumulate over time (#1244). A value of 0.2