npm - @oss-scout/core - Versions diffs - 0.9.0 → 0.10.0 - Mend

@oss-scout/core 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/cli.bundle.cjs +49 -49
package/dist/cli.js +18 -1
package/dist/commands/search.d.ts +12 -0
package/dist/commands/search.js +4 -0
package/dist/core/issue-discovery.d.ts +2 -0
package/dist/core/issue-discovery.js +18 -8
package/dist/core/personalization.d.ts +39 -0
package/dist/core/personalization.js +60 -0
package/dist/core/search-phases.d.ts +24 -0
package/dist/core/search-phases.js +50 -0
package/dist/core/types.d.ts +28 -0
package/dist/scout.js +2 -0
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -89,6 +89,8 @@ program
     .description("Search for contributable issues using multi-strategy discovery")
     .option("--json", "Output as JSON")
     .option("--strategy <strategies>", `Search strategies (${CONCRETE_STRATEGIES.join(",")},all)`, "all")
+    .option("--prefer-languages <list>", "Comma-separated languages to soft-boost in ranking (#1244). Candidates whose repo language matches sort above equally-recommended non-matches. Does not filter results.")
+    .option("--prefer-repos <list>", "Comma-separated `owner/repo` slugs to soft-boost in ranking (#1244). Stronger weight than language match. Does not filter results.")
     .action(async (count, options) => {
     try {
         if (!hasLocalState()) {
@@ -124,7 +126,22 @@ program
             }
             strategies.push(parsed.data);
         }
-        const results = await runSearch({ maxResults, state, strategies });
+        const splitCsv = (raw) => {
+            if (!raw)
+                return undefined;
+            const parts = raw
+                .split(",")
+                .map((s) => s.trim())
+                .filter(Boolean);
+            return parts.length > 0 ? parts : undefined;
+        };
+        const results = await runSearch({
+            maxResults,
+            state,
+            strategies,
+            preferLanguages: splitCsv(options.preferLanguages),
+            preferRepos: splitCsv(options.preferRepos),
+        });
         if (options.json) {
             console.log(formatJsonSuccess(results));
         }

package/dist/commands/search.d.ts CHANGED Viewed

@@ -37,6 +37,14 @@ export interface SearchOutput {
             updatedAt?: string;
             isStalled: boolean;
         };
+        /**
+         * Personalization sort-tier signal (#1244). Present only when the
+         * caller passed `preferLanguages` / `preferRepos` *and* this
+         * candidate matched at least one of them. `boostReasons` is the
+         * human-readable explanation (e.g. `"repo affinity: vercel/next.js"`).
+         */
+        boostScore?: number;
+        boostReasons?: string[];
     }>;
     excludedRepos: string[];
     aiPolicyBlocklist: string[];
@@ -47,6 +55,10 @@ interface SearchCommandOptions {
     maxResults: number;
     state?: ScoutState;
     strategies?: SearchStrategy[];
+    /** Soft sort boost for candidates whose repo language matches (#1244). */
+    preferLanguages?: string[];
+    /** Soft sort boost for candidates in these `owner/repo` slugs (#1244). */
+    preferRepos?: string[];
 }
 export declare function runSearch(options: SearchCommandOptions): Promise<SearchOutput>;
 export {};

package/dist/commands/search.js CHANGED Viewed

@@ -17,6 +17,8 @@ export async function runSearch(options) {
     const result = await scout.search({
         maxResults: options.maxResults,
         strategies: options.strategies,
+        preferLanguages: options.preferLanguages,
+        preferRepos: options.preferRepos,
     });
     // Persist results to local state and gist
     scout.saveResults(result.candidates);
@@ -60,6 +62,8 @@ export async function runSearch(options) {
                         isStalled: isLinkedPRStalled(c.vettingResult.linkedPR),
                     }
                     : undefined,
+                boostScore: c.boostScore,
+                boostReasons: c.boostReasons,
             };
         }),
         excludedRepos: result.excludedRepos,

package/dist/core/issue-discovery.d.ts CHANGED Viewed

@@ -74,6 +74,8 @@ export declare class IssueDiscovery {
         maxResults?: number;
         strategies?: SearchStrategy[];
         skippedUrls?: Set<string>;
+        preferLanguages?: string[];
+        preferRepos?: string[];
     }): Promise<{
         candidates: IssueCandidate[];
         strategiesUsed: SearchStrategy[];

package/dist/core/issue-discovery.js CHANGED Viewed

@@ -21,7 +21,8 @@ import { debug, info, warn } from "./logger.js";
 import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
 import { IssueVetter } from "./issue-vetting.js";
 import { getTopicsForCategories } from "./category-mapping.js";
-import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchWithChunkedLabels, } from "./search-phases.js";
+import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
+import { annotateBoost } from "./personalization.js";
 const MODULE = "issue-discovery";
 /** If remaining search quota is below this, skip heavy phases (2, 3). */
 const LOW_BUDGET_THRESHOLD = 20;
@@ -83,7 +84,7 @@ async function runPhase1(octokit, vetter, repos, labels, maxResults, filterIssue
     };
 }
 /** Phase 2: General label-filtered search with multi-tier interleaving. */
-async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQualifiers, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
+async function runPhase2(octokit, vetter, scopes, labels, configLabels, languages, isAnyLanguage, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
     info(MODULE, "Phase 2: General issue search...");
     const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
     // Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
@@ -112,7 +113,7 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQual
     let rateLimitHit = false;
     for (const { tier, tierLabels } of tierLabelGroups) {
         try {
-            const allItems = await searchWithChunkedLabels(octokit, tierLabels, 0, (labelQ) => `${baseQualifiers} ${labelQ}`.replace(/  +/g, " ").trim(), budgetPerTier * 3);
+            const allItems = await searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, tierLabels, (langQ) => `is:issue is:open ${langQ} no:assignee`.replace(/  +/g, " ").trim(), budgetPerTier * 3);
             info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
             const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
             tierResults.push(tierCandidates);
@@ -337,9 +338,6 @@ export class IssueDiscovery {
         const langQuery = isAnyLanguage
             ? ""
             : languages.map((l) => `language:${l}`).join(" ");
-        const baseQualifiers = `is:issue is:open ${langQuery} no:assignee`
-            .replace(/  +/g, " ")
-            .trim();
         // Build reusable filter
         const aiBlocklisted = new Set(config.aiPolicyBlocklist);
         if (aiBlocklisted.size > 0) {
@@ -427,7 +425,7 @@ export class IssueDiscovery {
                     info(MODULE, `Skipping broad phase delay: no results from previous phases, proceeding immediately`);
                 }
                 const remaining = maxResults - allCandidates.length;
-                const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, baseQualifiers, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
+                const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, languages, isAnyLanguage, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
                 allCandidates.push(...result.candidates);
                 phaseErrors["2"] = result.error;
                 if (result.rateLimitHit)
@@ -489,7 +487,11 @@ export class IssueDiscovery {
                     `Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
                     `Try again after the rate limit resets for complete results.`;
         }
-        // Sort by priority, recommendation, then viability score
+        // Personalization annotation (#1244): tag each candidate with
+        // boostScore + boostReasons before sorting so the new sort tier has
+        // values to read. No-op when neither preference list is supplied.
+        annotateBoost(allCandidates, options.preferLanguages, options.preferRepos);
+        // Sort by priority, recommendation, boost (#1244), then viability score
         allCandidates.sort((a, b) => {
             const priorityOrder = {
                 merged_pr: 0,
@@ -504,6 +506,14 @@ export class IssueDiscovery {
                 recommendationOrder[b.recommendation];
             if (recDiff !== 0)
                 return recDiff;
+            // Personalization tier (#1244): higher boostScore wins. Treats
+            // undefined as 0 so unboosted candidates rank below boosted peers
+            // but stay ordered among themselves by viabilityScore. No-op when
+            // `preferLanguages`/`preferRepos` are absent — all candidates carry
+            // `boostScore: undefined` and the difference collapses to 0.
+            const boostDiff = (b.boostScore ?? 0) - (a.boostScore ?? 0);
+            if (boostDiff !== 0)
+                return boostDiff;
             return b.viabilityScore - a.viabilityScore;
         });
         const capped = applyPerRepoCap(allCandidates, 2);

package/dist/core/personalization.d.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * Personalization signals for search ranking (#1244).
+ *
+ * Translates caller-supplied `preferLanguages` / `preferRepos` lists
+ * into a soft `boostScore` on each `IssueCandidate`. The final search
+ * sort consults this score between the `recommendation` tier and the
+ * raw `viabilityScore`, so personalization reorders ties without
+ * changing which candidates pass vetting.
+ *
+ * This is the minimum-viable subset of Option A in #1244: only language
+ * and repo bias, no `boostIssueTypes` / `avoidRepos` / `diversityRatio`
+ * yet. Those follow up in separate PRs.
+ */
+import type { IssueCandidate } from "./types.js";
+/**
+ * Boost weights. Tuned conservatively so personalization tips equally-
+ * scored candidates without drowning out high-viability normal results.
+ *
+ * Rationale:
+ *   - Repo affinity is the strongest signal — a candidate in a repo the
+ *     user has merged PRs into has real relationship context. Worth the
+ *     higher boost.
+ *   - Language match is broad and easy to satisfy. Lower weight.
+ */
+export declare const REPO_BOOST = 20;
+export declare const LANGUAGE_BOOST = 10;
+/**
+ * Annotate each candidate with `boostScore` and `boostReasons` based on
+ * the caller-supplied preference lists. Mutates the array in place; the
+ * caller is responsible for re-sorting afterwards.
+ *
+ * Mutation (rather than returning new objects) keeps the personalization
+ * step a single linear pass over the array the caller already holds —
+ * the sort step reads back from the same objects.
+ *
+ * No-op when both preference lists are empty or undefined: candidates
+ * retain `boostScore: undefined` and the sort tier collapses to 0.
+ */
+export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): void;

package/dist/core/personalization.js ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Personalization signals for search ranking (#1244).
+ *
+ * Translates caller-supplied `preferLanguages` / `preferRepos` lists
+ * into a soft `boostScore` on each `IssueCandidate`. The final search
+ * sort consults this score between the `recommendation` tier and the
+ * raw `viabilityScore`, so personalization reorders ties without
+ * changing which candidates pass vetting.
+ *
+ * This is the minimum-viable subset of Option A in #1244: only language
+ * and repo bias, no `boostIssueTypes` / `avoidRepos` / `diversityRatio`
+ * yet. Those follow up in separate PRs.
+ */
+/**
+ * Boost weights. Tuned conservatively so personalization tips equally-
+ * scored candidates without drowning out high-viability normal results.
+ *
+ * Rationale:
+ *   - Repo affinity is the strongest signal — a candidate in a repo the
+ *     user has merged PRs into has real relationship context. Worth the
+ *     higher boost.
+ *   - Language match is broad and easy to satisfy. Lower weight.
+ */
+export const REPO_BOOST = 20;
+export const LANGUAGE_BOOST = 10;
+/**
+ * Annotate each candidate with `boostScore` and `boostReasons` based on
+ * the caller-supplied preference lists. Mutates the array in place; the
+ * caller is responsible for re-sorting afterwards.
+ *
+ * Mutation (rather than returning new objects) keeps the personalization
+ * step a single linear pass over the array the caller already holds —
+ * the sort step reads back from the same objects.
+ *
+ * No-op when both preference lists are empty or undefined: candidates
+ * retain `boostScore: undefined` and the sort tier collapses to 0.
+ */
+export function annotateBoost(candidates, preferLanguages, preferRepos) {
+    const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
+    const repoSet = new Set((preferRepos ?? []).map((r) => r.trim()).filter(Boolean));
+    if (langSet.size === 0 && repoSet.size === 0)
+        return;
+    for (const c of candidates) {
+        let score = 0;
+        const reasons = [];
+        if (repoSet.size > 0 && repoSet.has(c.issue.repo)) {
+            score += REPO_BOOST;
+            reasons.push(`repo affinity: ${c.issue.repo}`);
+        }
+        const lang = c.projectHealth.language;
+        if (langSet.size > 0 && lang && langSet.has(lang.toLowerCase())) {
+            score += LANGUAGE_BOOST;
+            reasons.push(`language match: ${lang}`);
+        }
+        if (score > 0) {
+            c.boostScore = score;
+            c.boostReasons = reasons;
+        }
+    }
+}

package/dist/core/search-phases.d.ts CHANGED Viewed

@@ -61,6 +61,30 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
  * @param perPage      Number of results per API call
  */
 export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
+/**
+ * Build per-call language qualifier strings, fanning out across languages
+ * when a multi-language + labels combination would trip GitHub Search's
+ * empty-result edge case (multi-`language:` AND with a label OR-group
+ * silently returns 0 — see https://github.com/costajohnt/oss-autopilot/issues/1331).
+ */
+export declare function buildLanguageVariants(languages: string[], isAnyLanguage: boolean, hasLabels: boolean): string[];
+/**
+ * Search across languages with label chunking, deduplicating results.
+ *
+ * Fans out one query per language when 2+ languages are paired with labels
+ * (works around a GitHub Search backend edge case where the multi-language
+ * AND combined with a label OR-group returns 0). For each language variant,
+ * delegates to searchWithChunkedLabels to keep within GitHub's 5-operator limit.
+ *
+ * @param octokit         Authenticated Octokit instance
+ * @param languages       Configured languages (used as `language:X` qualifiers)
+ * @param isAnyLanguage   When true, skip language qualifiers entirely
+ * @param labels          Label list passed to searchWithChunkedLabels
+ * @param buildBaseQuery  Builds the query prefix from a language qualifier string;
+ *                        e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
+ * @param perPage         Results per API call
+ */
+export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
 /**
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.

package/dist/core/search-phases.js CHANGED Viewed

@@ -291,6 +291,56 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
     }
     return allItems;
 }
+/**
+ * Build per-call language qualifier strings, fanning out across languages
+ * when a multi-language + labels combination would trip GitHub Search's
+ * empty-result edge case (multi-`language:` AND with a label OR-group
+ * silently returns 0 — see https://github.com/costajohnt/oss-autopilot/issues/1331).
+ */
+export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
+    if (isAnyLanguage || languages.length === 0)
+        return [""];
+    if (languages.length === 1)
+        return [`language:${languages[0]}`];
+    if (!hasLabels)
+        return [languages.map((l) => `language:${l}`).join(" ")];
+    return languages.map((l) => `language:${l}`);
+}
+/**
+ * Search across languages with label chunking, deduplicating results.
+ *
+ * Fans out one query per language when 2+ languages are paired with labels
+ * (works around a GitHub Search backend edge case where the multi-language
+ * AND combined with a label OR-group returns 0). For each language variant,
+ * delegates to searchWithChunkedLabels to keep within GitHub's 5-operator limit.
+ *
+ * @param octokit         Authenticated Octokit instance
+ * @param languages       Configured languages (used as `language:X` qualifiers)
+ * @param isAnyLanguage   When true, skip language qualifiers entirely
+ * @param labels          Label list passed to searchWithChunkedLabels
+ * @param buildBaseQuery  Builds the query prefix from a language qualifier string;
+ *                        e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
+ * @param perPage         Results per API call
+ */
+export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
+    const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
+    const seenUrls = new Set();
+    const allItems = [];
+    for (let i = 0; i < langVariants.length; i++) {
+        if (i > 0)
+            await sleep(INTER_QUERY_DELAY_MS);
+        const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
+            .replace(/  +/g, " ")
+            .trim(), perPage);
+        for (const item of items) {
+            if (!seenUrls.has(item.html_url)) {
+                seenUrls.add(item.html_url);
+                allItems.push(item);
+            }
+        }
+    }
+    return allItems;
+}
 /**
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.

package/dist/core/types.d.ts CHANGED Viewed

@@ -53,6 +53,19 @@ export interface IssueCandidate {
     reasonsToApprove: string[];
     viabilityScore: number;
     searchPriority: SearchPriority;
+    /**
+     * Personalization sort tier (#1244). Populated only when the caller
+     * passes `preferLanguages` / `preferRepos` to `search()` *and* the
+     * candidate matches at least one. Affects sort order between the
+     * `recommendation` tier and `viabilityScore`; never used as a filter.
+     */
+    boostScore?: number;
+    /**
+     * Human-readable reasons the candidate matched personalization bias
+     * (#1244). Mirrors `reasonsToApprove`/`reasonsToSkip` shape for
+     * symmetry with the existing surface.
+     */
+    boostReasons?: string[];
 }
 /** Subset of RepoScore fields that callers may update. */
 export interface RepoScoreUpdate {
@@ -122,6 +135,21 @@ export type ScoutConfig = {
 export interface SearchOptions {
     maxResults?: number;
     strategies?: SearchStrategy[];
+    /**
+     * Per-call personalization bias: candidates whose repo language matches
+     * one of these (case-insensitive) get a soft sort boost above
+     * equally-recommended non-matches (#1244). Does not filter results, does
+     * not change `viabilityScore`. Empty / undefined disables the boost.
+     */
+    preferLanguages?: string[];
+    /**
+     * Per-call personalization bias: candidates in one of these
+     * `owner/repo` slugs get a soft sort boost above equally-recommended
+     * non-matches (#1244). Stronger weight than language match. Does not
+     * filter results, does not change `viabilityScore`. Empty / undefined
+     * disables the boost.
+     */
+    preferRepos?: string[];
 }
 /** Result of a search operation. */
 export interface SearchResult {

package/dist/scout.js CHANGED Viewed

@@ -148,6 +148,8 @@ export class OssScout {
             maxResults: options?.maxResults,
             strategies: options?.strategies,
             skippedUrls,
+            preferLanguages: options?.preferLanguages,
+            preferRepos: options?.preferRepos,
         });
         this.state.lastSearchAt = new Date().toISOString();
         this.dirty = true;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@oss-scout/core",
-  "version": "0.9.0",
+  "version": "0.10.0",
   "description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
   "type": "module",
   "bin": {