npm - @oss-scout/core - Versions diffs - 0.9.1 → 0.11.0 - Mend

@oss-scout/core 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/cli.bundle.cjs +33 -33
package/dist/cli.js +29 -1
package/dist/commands/search.d.ts +19 -0
package/dist/commands/search.js +6 -0
package/dist/core/issue-discovery.d.ts +3 -0
package/dist/core/issue-discovery.js +21 -3
package/dist/core/personalization.d.ts +67 -0
package/dist/core/personalization.js +126 -0
package/dist/core/types.d.ts +46 -0
package/dist/scout.js +3 -0
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -89,6 +89,9 @@ program
     .description("Search for contributable issues using multi-strategy discovery")
     .option("--json", "Output as JSON")
     .option("--strategy <strategies>", `Search strategies (${CONCRETE_STRATEGIES.join(",")},all)`, "all")
+    .option("--prefer-languages <list>", "Comma-separated languages to soft-boost in ranking (#1244). Candidates whose repo language matches sort above equally-recommended non-matches. Does not filter results.")
+    .option("--prefer-repos <list>", "Comma-separated `owner/repo` slugs to soft-boost in ranking (#1244). Stronger weight than language match. Does not filter results.")
+    .option("--diversity-ratio <n>", "Fraction of result slots (0-1) reserved for candidates that matched NEITHER preference list (#1244). Counterweights echo-chamber bias as boosts accumulate. Default 0 (disabled).")
     .action(async (count, options) => {
     try {
         if (!hasLocalState()) {
@@ -124,7 +127,32 @@ program
             }
             strategies.push(parsed.data);
         }
-        const results = await runSearch({ maxResults, state, strategies });
+        const splitCsv = (raw) => {
+            if (!raw)
+                return undefined;
+            const parts = raw
+                .split(",")
+                .map((s) => s.trim())
+                .filter(Boolean);
+            return parts.length > 0 ? parts : undefined;
+        };
+        let diversityRatio;
+        if (options.diversityRatio !== undefined) {
+            const parsed = Number(options.diversityRatio);
+            if (!Number.isFinite(parsed) || parsed < 0 || parsed > 1) {
+                console.error(`Error: --diversity-ratio must be a number in [0, 1] (got "${options.diversityRatio}")`);
+                process.exit(1);
+            }
+            diversityRatio = parsed;
+        }
+        const results = await runSearch({
+            maxResults,
+            state,
+            strategies,
+            preferLanguages: splitCsv(options.preferLanguages),
+            preferRepos: splitCsv(options.preferRepos),
+            diversityRatio,
+        });
         if (options.json) {
             console.log(formatJsonSuccess(results));
         }

package/dist/commands/search.d.ts CHANGED Viewed

@@ -37,6 +37,19 @@ export interface SearchOutput {
             updatedAt?: string;
             isStalled: boolean;
         };
+        /**
+         * Personalization sort-tier signal (#1244). Present only when the
+         * caller passed `preferLanguages` / `preferRepos` *and* this
+         * candidate matched at least one of them. `boostReasons` is the
+         * human-readable explanation (e.g. `"repo affinity: vercel/next.js"`).
+         */
+        boostScore?: number;
+        boostReasons?: string[];
+        /**
+         * Marks a candidate that filled a reserved diversity slot (#1244).
+         * Mutually exclusive with a non-zero `boostScore`.
+         */
+        diversitySlot?: boolean;
     }>;
     excludedRepos: string[];
     aiPolicyBlocklist: string[];
@@ -47,6 +60,12 @@ interface SearchCommandOptions {
     maxResults: number;
     state?: ScoutState;
     strategies?: SearchStrategy[];
+    /** Soft sort boost for candidates whose repo language matches (#1244). */
+    preferLanguages?: string[];
+    /** Soft sort boost for candidates in these `owner/repo` slugs (#1244). */
+    preferRepos?: string[];
+    /** Diversity counterweight: fraction of slots reserved for unboosted candidates (#1244). */
+    diversityRatio?: number;
 }
 export declare function runSearch(options: SearchCommandOptions): Promise<SearchOutput>;
 export {};

package/dist/commands/search.js CHANGED Viewed

@@ -17,6 +17,9 @@ export async function runSearch(options) {
     const result = await scout.search({
         maxResults: options.maxResults,
         strategies: options.strategies,
+        preferLanguages: options.preferLanguages,
+        preferRepos: options.preferRepos,
+        diversityRatio: options.diversityRatio,
     });
     // Persist results to local state and gist
     scout.saveResults(result.candidates);
@@ -60,6 +63,9 @@ export async function runSearch(options) {
                         isStalled: isLinkedPRStalled(c.vettingResult.linkedPR),
                     }
                     : undefined,
+                boostScore: c.boostScore,
+                boostReasons: c.boostReasons,
+                diversitySlot: c.diversitySlot,
             };
         }),
         excludedRepos: result.excludedRepos,

package/dist/core/issue-discovery.d.ts CHANGED Viewed

@@ -74,6 +74,9 @@ export declare class IssueDiscovery {
         maxResults?: number;
         strategies?: SearchStrategy[];
         skippedUrls?: Set<string>;
+        preferLanguages?: string[];
+        preferRepos?: string[];
+        diversityRatio?: number;
     }): Promise<{
         candidates: IssueCandidate[];
         strategiesUsed: SearchStrategy[];

package/dist/core/issue-discovery.js CHANGED Viewed

@@ -22,6 +22,7 @@ import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
 import { IssueVetter } from "./issue-vetting.js";
 import { getTopicsForCategories } from "./category-mapping.js";
 import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
+import { annotateBoost, applyDiversityRatio } from "./personalization.js";
 const MODULE = "issue-discovery";
 /** If remaining search quota is below this, skip heavy phases (2, 3). */
 const LOW_BUDGET_THRESHOLD = 20;
@@ -486,7 +487,11 @@ export class IssueDiscovery {
                     `Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
                     `Try again after the rate limit resets for complete results.`;
         }
-        // Sort by priority, recommendation, then viability score
+        // Personalization annotation (#1244): tag each candidate with
+        // boostScore + boostReasons before sorting so the new sort tier has
+        // values to read. No-op when neither preference list is supplied.
+        annotateBoost(allCandidates, options.preferLanguages, options.preferRepos);
+        // Sort by priority, recommendation, boost (#1244), then viability score
         allCandidates.sort((a, b) => {
             const priorityOrder = {
                 merged_pr: 0,
@@ -501,11 +506,24 @@ export class IssueDiscovery {
                 recommendationOrder[b.recommendation];
             if (recDiff !== 0)
                 return recDiff;
+            // Personalization tier (#1244): higher boostScore wins. Treats
+            // undefined as 0 so unboosted candidates rank below boosted peers
+            // but stay ordered among themselves by viabilityScore. No-op when
+            // `preferLanguages`/`preferRepos` are absent — all candidates carry
+            // `boostScore: undefined` and the difference collapses to 0.
+            const boostDiff = (b.boostScore ?? 0) - (a.boostScore ?? 0);
+            if (boostDiff !== 0)
+                return boostDiff;
             return b.viabilityScore - a.viabilityScore;
         });
         const capped = applyPerRepoCap(allCandidates, 2);
-        info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${capped.length} candidates returned`);
-        return { candidates: capped.slice(0, maxResults), strategiesUsed };
+        // Diversity counterweight (#1244): when `diversityRatio > 0`, reserve
+        // a fraction of the final slots for candidates that matched neither
+        // preference list. No-op when the ratio is 0 or absent — collapses to
+        // the original `slice(0, maxResults)` behavior.
+        const finalPicks = applyDiversityRatio(capped, maxResults, options.diversityRatio ?? 0);
+        info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${finalPicks.length} candidates returned`);
+        return { candidates: finalPicks, strategiesUsed };
     }
     /**
      * Vet a specific issue for claimability and project health.

package/dist/core/personalization.d.ts ADDED Viewed

@@ -0,0 +1,67 @@
+/**
+ * Personalization signals for search ranking (#1244).
+ *
+ * Two passes:
+ *
+ *   - `annotateBoost` translates `preferLanguages` / `preferRepos`
+ *     into a soft `boostScore` consumed by issue-discovery's final
+ *     sort tier between `recommendation` and `viabilityScore`.
+ *   - `applyDiversityRatio` reserves a fraction of the final slot
+ *     budget for candidates that matched no preference, counterweighting
+ *     echo-chamber bias as recommendations accumulate over time.
+ *
+ * Still out of scope for #1244: `boostIssueTypes`, `avoidRepos`, and
+ * render-time annotation of `boostReasons` / `diversitySlot` in the CLI
+ * non-JSON output. Those follow up in separate PRs.
+ */
+import type { IssueCandidate } from "./types.js";
+/**
+ * Boost weights. Tuned conservatively so personalization tips equally-
+ * scored candidates without drowning out high-viability normal results.
+ *
+ * Rationale:
+ *   - Repo affinity is the strongest signal — a candidate in a repo the
+ *     user has merged PRs into has real relationship context. Worth the
+ *     higher boost.
+ *   - Language match is broad and easy to satisfy. Lower weight.
+ */
+export declare const REPO_BOOST = 20;
+export declare const LANGUAGE_BOOST = 10;
+/**
+ * Annotate each candidate with `boostScore` and `boostReasons` based on
+ * the caller-supplied preference lists. Mutates the array in place; the
+ * caller is responsible for re-sorting afterwards.
+ *
+ * Mutation (rather than returning new objects) keeps the personalization
+ * step a single linear pass over the array the caller already holds —
+ * the sort step reads back from the same objects.
+ *
+ * No-op when both preference lists are empty or undefined: candidates
+ * retain `boostScore: undefined` and the sort tier collapses to 0.
+ */
+export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): void;
+/**
+ * Apply a diversity-counterweight pass over a pre-sorted candidate list
+ * (#1244). Returns the first `maxResults` picks in priority order:
+ *
+ *   1. Main slots: `maxResults - floor(maxResults * diversityRatio)`
+ *      top candidates from the input. Personalization-biased candidates
+ *      win these slots when present (since the input is already sorted
+ *      by the personalization tier).
+ *   2. Diversity slots: the highest-ranked candidates that carry NO
+ *      `boostScore` — i.e. they matched neither `preferLanguages` nor
+ *      `preferRepos`. Tagged with `diversitySlot: true` for caller
+ *      transparency.
+ *   3. Top-up: if the diversity pool was thinner than the reserve, fall
+ *      back to the remaining sorted candidates so the user gets
+ *      `maxResults` slots whenever the source has enough material.
+ *
+ * `diversityRatio` is clamped to [0, 1]. 0 is a no-op (just slices the
+ * input). 1 means every slot is a diversity slot — useful for
+ * deliberately suppressing personalization without disabling it.
+ *
+ * @param candidates    Pre-sorted candidate list (output of issue-discovery)
+ * @param maxResults    Total slots to fill
+ * @param diversityRatio Fraction of slots reserved for unboosted candidates
+ */
+export declare function applyDiversityRatio(candidates: IssueCandidate[], maxResults: number, diversityRatio: number): IssueCandidate[];

package/dist/core/personalization.js ADDED Viewed

@@ -0,0 +1,126 @@
+/**
+ * Personalization signals for search ranking (#1244).
+ *
+ * Two passes:
+ *
+ *   - `annotateBoost` translates `preferLanguages` / `preferRepos`
+ *     into a soft `boostScore` consumed by issue-discovery's final
+ *     sort tier between `recommendation` and `viabilityScore`.
+ *   - `applyDiversityRatio` reserves a fraction of the final slot
+ *     budget for candidates that matched no preference, counterweighting
+ *     echo-chamber bias as recommendations accumulate over time.
+ *
+ * Still out of scope for #1244: `boostIssueTypes`, `avoidRepos`, and
+ * render-time annotation of `boostReasons` / `diversitySlot` in the CLI
+ * non-JSON output. Those follow up in separate PRs.
+ */
+/**
+ * Boost weights. Tuned conservatively so personalization tips equally-
+ * scored candidates without drowning out high-viability normal results.
+ *
+ * Rationale:
+ *   - Repo affinity is the strongest signal — a candidate in a repo the
+ *     user has merged PRs into has real relationship context. Worth the
+ *     higher boost.
+ *   - Language match is broad and easy to satisfy. Lower weight.
+ */
+export const REPO_BOOST = 20;
+export const LANGUAGE_BOOST = 10;
+/**
+ * Annotate each candidate with `boostScore` and `boostReasons` based on
+ * the caller-supplied preference lists. Mutates the array in place; the
+ * caller is responsible for re-sorting afterwards.
+ *
+ * Mutation (rather than returning new objects) keeps the personalization
+ * step a single linear pass over the array the caller already holds —
+ * the sort step reads back from the same objects.
+ *
+ * No-op when both preference lists are empty or undefined: candidates
+ * retain `boostScore: undefined` and the sort tier collapses to 0.
+ */
+export function annotateBoost(candidates, preferLanguages, preferRepos) {
+    const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
+    const repoSet = new Set((preferRepos ?? []).map((r) => r.trim()).filter(Boolean));
+    if (langSet.size === 0 && repoSet.size === 0)
+        return;
+    for (const c of candidates) {
+        let score = 0;
+        const reasons = [];
+        if (repoSet.size > 0 && repoSet.has(c.issue.repo)) {
+            score += REPO_BOOST;
+            reasons.push(`repo affinity: ${c.issue.repo}`);
+        }
+        const lang = c.projectHealth.language;
+        if (langSet.size > 0 && lang && langSet.has(lang.toLowerCase())) {
+            score += LANGUAGE_BOOST;
+            reasons.push(`language match: ${lang}`);
+        }
+        if (score > 0) {
+            c.boostScore = score;
+            c.boostReasons = reasons;
+        }
+    }
+}
+/**
+ * Apply a diversity-counterweight pass over a pre-sorted candidate list
+ * (#1244). Returns the first `maxResults` picks in priority order:
+ *
+ *   1. Main slots: `maxResults - floor(maxResults * diversityRatio)`
+ *      top candidates from the input. Personalization-biased candidates
+ *      win these slots when present (since the input is already sorted
+ *      by the personalization tier).
+ *   2. Diversity slots: the highest-ranked candidates that carry NO
+ *      `boostScore` — i.e. they matched neither `preferLanguages` nor
+ *      `preferRepos`. Tagged with `diversitySlot: true` for caller
+ *      transparency.
+ *   3. Top-up: if the diversity pool was thinner than the reserve, fall
+ *      back to the remaining sorted candidates so the user gets
+ *      `maxResults` slots whenever the source has enough material.
+ *
+ * `diversityRatio` is clamped to [0, 1]. 0 is a no-op (just slices the
+ * input). 1 means every slot is a diversity slot — useful for
+ * deliberately suppressing personalization without disabling it.
+ *
+ * @param candidates    Pre-sorted candidate list (output of issue-discovery)
+ * @param maxResults    Total slots to fill
+ * @param diversityRatio Fraction of slots reserved for unboosted candidates
+ */
+export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
+    if (maxResults <= 0)
+        return [];
+    const ratio = Math.max(0, Math.min(1, diversityRatio));
+    if (ratio === 0)
+        return candidates.slice(0, maxResults);
+    const diversityReserve = Math.min(Math.floor(maxResults * ratio), maxResults);
+    if (diversityReserve === 0)
+        return candidates.slice(0, maxResults);
+    const mainBudget = maxResults - diversityReserve;
+    const picks = [];
+    const seen = new Set();
+    for (const c of candidates) {
+        if (picks.length >= mainBudget)
+            break;
+        picks.push(c);
+        seen.add(c.issue.url);
+    }
+    for (const c of candidates) {
+        if (picks.length >= maxResults)
+            break;
+        if (seen.has(c.issue.url))
+            continue;
+        if (c.boostScore && c.boostScore > 0)
+            continue;
+        c.diversitySlot = true;
+        picks.push(c);
+        seen.add(c.issue.url);
+    }
+    for (const c of candidates) {
+        if (picks.length >= maxResults)
+            break;
+        if (seen.has(c.issue.url))
+            continue;
+        picks.push(c);
+        seen.add(c.issue.url);
+    }
+    return picks;
+}

package/dist/core/types.d.ts CHANGED Viewed

@@ -53,6 +53,27 @@ export interface IssueCandidate {
     reasonsToApprove: string[];
     viabilityScore: number;
     searchPriority: SearchPriority;
+    /**
+     * Personalization sort tier (#1244). Populated only when the caller
+     * passes `preferLanguages` / `preferRepos` to `search()` *and* the
+     * candidate matches at least one. Affects sort order between the
+     * `recommendation` tier and `viabilityScore`; never used as a filter.
+     */
+    boostScore?: number;
+    /**
+     * Human-readable reasons the candidate matched personalization bias
+     * (#1244). Mirrors `reasonsToApprove`/`reasonsToSkip` shape for
+     * symmetry with the existing surface.
+     */
+    boostReasons?: string[];
+    /**
+     * Marks a candidate that filled a reserved diversity slot (#1244).
+     * Populated only when `diversityRatio > 0` was passed AND the
+     * candidate matched no personalization bias. Mutually exclusive with
+     * a non-zero `boostScore` (a candidate cannot be both biased-toward
+     * and a diversity slot in the same result set).
+     */
+    diversitySlot?: boolean;
 }
 /** Subset of RepoScore fields that callers may update. */
 export interface RepoScoreUpdate {
@@ -122,6 +143,31 @@ export type ScoutConfig = {
 export interface SearchOptions {
     maxResults?: number;
     strategies?: SearchStrategy[];
+    /**
+     * Per-call personalization bias: candidates whose repo language matches
+     * one of these (case-insensitive) get a soft sort boost above
+     * equally-recommended non-matches (#1244). Does not filter results, does
+     * not change `viabilityScore`. Empty / undefined disables the boost.
+     */
+    preferLanguages?: string[];
+    /**
+     * Per-call personalization bias: candidates in one of these
+     * `owner/repo` slugs get a soft sort boost above equally-recommended
+     * non-matches (#1244). Stronger weight than language match. Does not
+     * filter results, does not change `viabilityScore`. Empty / undefined
+     * disables the boost.
+     */
+    preferRepos?: string[];
+    /**
+     * Counterweight against echo-chamber bias as `preferLanguages` /
+     * `preferRepos` boosts accumulate over time (#1244). A value of 0.2
+     * means "reserve roughly 20% of the final slots for candidates that
+     * matched NEITHER preference list," filling them from the same sorted
+     * pool but skipping any candidate carrying a `boostScore`. 0 disables
+     * the counterweight; 1 makes every slot a diversity slot. Range
+     * clamped to [0, 1].
+     */
+    diversityRatio?: number;
 }
 /** Result of a search operation. */
 export interface SearchResult {

package/dist/scout.js CHANGED Viewed

@@ -148,6 +148,9 @@ export class OssScout {
             maxResults: options?.maxResults,
             strategies: options?.strategies,
             skippedUrls,
+            preferLanguages: options?.preferLanguages,
+            preferRepos: options?.preferRepos,
+            diversityRatio: options?.diversityRatio,
         });
         this.state.lastSearchAt = new Date().toISOString();
         this.dirty = true;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@oss-scout/core",
-  "version": "0.9.1",
+  "version": "0.11.0",
   "description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
   "type": "module",
   "bin": {