@oss-scout/core 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -89,6 +89,9 @@ program
89
89
  .description("Search for contributable issues using multi-strategy discovery")
90
90
  .option("--json", "Output as JSON")
91
91
  .option("--strategy <strategies>", `Search strategies (${CONCRETE_STRATEGIES.join(",")},all)`, "all")
92
+ .option("--prefer-languages <list>", "Comma-separated languages to soft-boost in ranking (#1244). Candidates whose repo language matches sort above equally-recommended non-matches. Does not filter results.")
93
+ .option("--prefer-repos <list>", "Comma-separated `owner/repo` slugs to soft-boost in ranking (#1244). Stronger weight than language match. Does not filter results.")
94
+ .option("--diversity-ratio <n>", "Fraction of result slots (0-1) reserved for candidates that matched NEITHER preference list (#1244). Counterweights echo-chamber bias as boosts accumulate. Default 0 (disabled).")
92
95
  .action(async (count, options) => {
93
96
  try {
94
97
  if (!hasLocalState()) {
@@ -124,7 +127,32 @@ program
124
127
  }
125
128
  strategies.push(parsed.data);
126
129
  }
127
- const results = await runSearch({ maxResults, state, strategies });
130
+ const splitCsv = (raw) => {
131
+ if (!raw)
132
+ return undefined;
133
+ const parts = raw
134
+ .split(",")
135
+ .map((s) => s.trim())
136
+ .filter(Boolean);
137
+ return parts.length > 0 ? parts : undefined;
138
+ };
139
+ let diversityRatio;
140
+ if (options.diversityRatio !== undefined) {
141
+ const parsed = Number(options.diversityRatio);
142
+ if (!Number.isFinite(parsed) || parsed < 0 || parsed > 1) {
143
+ console.error(`Error: --diversity-ratio must be a number in [0, 1] (got "${options.diversityRatio}")`);
144
+ process.exit(1);
145
+ }
146
+ diversityRatio = parsed;
147
+ }
148
+ const results = await runSearch({
149
+ maxResults,
150
+ state,
151
+ strategies,
152
+ preferLanguages: splitCsv(options.preferLanguages),
153
+ preferRepos: splitCsv(options.preferRepos),
154
+ diversityRatio,
155
+ });
128
156
  if (options.json) {
129
157
  console.log(formatJsonSuccess(results));
130
158
  }
@@ -37,6 +37,19 @@ export interface SearchOutput {
37
37
  updatedAt?: string;
38
38
  isStalled: boolean;
39
39
  };
40
+ /**
41
+ * Personalization sort-tier signal (#1244). Present only when the
42
+ * caller passed `preferLanguages` / `preferRepos` *and* this
43
+ * candidate matched at least one of them. `boostReasons` is the
44
+ * human-readable explanation (e.g. `"repo affinity: vercel/next.js"`).
45
+ */
46
+ boostScore?: number;
47
+ boostReasons?: string[];
48
+ /**
49
+ * Marks a candidate that filled a reserved diversity slot (#1244).
50
+ * Mutually exclusive with a non-zero `boostScore`.
51
+ */
52
+ diversitySlot?: boolean;
40
53
  }>;
41
54
  excludedRepos: string[];
42
55
  aiPolicyBlocklist: string[];
@@ -47,6 +60,12 @@ interface SearchCommandOptions {
47
60
  maxResults: number;
48
61
  state?: ScoutState;
49
62
  strategies?: SearchStrategy[];
63
+ /** Soft sort boost for candidates whose repo language matches (#1244). */
64
+ preferLanguages?: string[];
65
+ /** Soft sort boost for candidates in these `owner/repo` slugs (#1244). */
66
+ preferRepos?: string[];
67
+ /** Diversity counterweight: fraction of slots reserved for unboosted candidates (#1244). */
68
+ diversityRatio?: number;
50
69
  }
51
70
  export declare function runSearch(options: SearchCommandOptions): Promise<SearchOutput>;
52
71
  export {};
@@ -17,6 +17,9 @@ export async function runSearch(options) {
17
17
  const result = await scout.search({
18
18
  maxResults: options.maxResults,
19
19
  strategies: options.strategies,
20
+ preferLanguages: options.preferLanguages,
21
+ preferRepos: options.preferRepos,
22
+ diversityRatio: options.diversityRatio,
20
23
  });
21
24
  // Persist results to local state and gist
22
25
  scout.saveResults(result.candidates);
@@ -60,6 +63,9 @@ export async function runSearch(options) {
60
63
  isStalled: isLinkedPRStalled(c.vettingResult.linkedPR),
61
64
  }
62
65
  : undefined,
66
+ boostScore: c.boostScore,
67
+ boostReasons: c.boostReasons,
68
+ diversitySlot: c.diversitySlot,
63
69
  };
64
70
  }),
65
71
  excludedRepos: result.excludedRepos,
@@ -74,6 +74,9 @@ export declare class IssueDiscovery {
74
74
  maxResults?: number;
75
75
  strategies?: SearchStrategy[];
76
76
  skippedUrls?: Set<string>;
77
+ preferLanguages?: string[];
78
+ preferRepos?: string[];
79
+ diversityRatio?: number;
77
80
  }): Promise<{
78
81
  candidates: IssueCandidate[];
79
82
  strategiesUsed: SearchStrategy[];
@@ -22,6 +22,7 @@ import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
22
22
  import { IssueVetter } from "./issue-vetting.js";
23
23
  import { getTopicsForCategories } from "./category-mapping.js";
24
24
  import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
25
+ import { annotateBoost, applyDiversityRatio } from "./personalization.js";
25
26
  const MODULE = "issue-discovery";
26
27
  /** If remaining search quota is below this, skip heavy phases (2, 3). */
27
28
  const LOW_BUDGET_THRESHOLD = 20;
@@ -486,7 +487,11 @@ export class IssueDiscovery {
486
487
  `Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
487
488
  `Try again after the rate limit resets for complete results.`;
488
489
  }
489
- // Sort by priority, recommendation, then viability score
490
+ // Personalization annotation (#1244): tag each candidate with
491
+ // boostScore + boostReasons before sorting so the new sort tier has
492
+ // values to read. No-op when neither preference list is supplied.
493
+ annotateBoost(allCandidates, options.preferLanguages, options.preferRepos);
494
+ // Sort by priority, recommendation, boost (#1244), then viability score
490
495
  allCandidates.sort((a, b) => {
491
496
  const priorityOrder = {
492
497
  merged_pr: 0,
@@ -501,11 +506,24 @@ export class IssueDiscovery {
501
506
  recommendationOrder[b.recommendation];
502
507
  if (recDiff !== 0)
503
508
  return recDiff;
509
+ // Personalization tier (#1244): higher boostScore wins. Treats
510
+ // undefined as 0 so unboosted candidates rank below boosted peers
511
+ // but stay ordered among themselves by viabilityScore. No-op when
512
+ // `preferLanguages`/`preferRepos` are absent — all candidates carry
513
+ // `boostScore: undefined` and the difference collapses to 0.
514
+ const boostDiff = (b.boostScore ?? 0) - (a.boostScore ?? 0);
515
+ if (boostDiff !== 0)
516
+ return boostDiff;
504
517
  return b.viabilityScore - a.viabilityScore;
505
518
  });
506
519
  const capped = applyPerRepoCap(allCandidates, 2);
507
- info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${capped.length} candidates returned`);
508
- return { candidates: capped.slice(0, maxResults), strategiesUsed };
520
+ // Diversity counterweight (#1244): when `diversityRatio > 0`, reserve
521
+ // a fraction of the final slots for candidates that matched neither
522
+ // preference list. No-op when the ratio is 0 or absent — collapses to
523
+ // the original `slice(0, maxResults)` behavior.
524
+ const finalPicks = applyDiversityRatio(capped, maxResults, options.diversityRatio ?? 0);
525
+ info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${finalPicks.length} candidates returned`);
526
+ return { candidates: finalPicks, strategiesUsed };
509
527
  }
510
528
  /**
511
529
  * Vet a specific issue for claimability and project health.
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Personalization signals for search ranking (#1244).
3
+ *
4
+ * Two passes:
5
+ *
6
+ * - `annotateBoost` translates `preferLanguages` / `preferRepos`
7
+ * into a soft `boostScore` consumed by issue-discovery's final
8
+ * sort tier between `recommendation` and `viabilityScore`.
9
+ * - `applyDiversityRatio` reserves a fraction of the final slot
10
+ * budget for candidates that matched no preference, counterweighting
11
+ * echo-chamber bias as recommendations accumulate over time.
12
+ *
13
+ * Still out of scope for #1244: `boostIssueTypes`, `avoidRepos`, and
14
+ * render-time annotation of `boostReasons` / `diversitySlot` in the CLI
15
+ * non-JSON output. Those follow up in separate PRs.
16
+ */
17
+ import type { IssueCandidate } from "./types.js";
18
+ /**
19
+ * Boost weights. Tuned conservatively so personalization tips equally-
20
+ * scored candidates without drowning out high-viability normal results.
21
+ *
22
+ * Rationale:
23
+ * - Repo affinity is the strongest signal — a candidate in a repo the
24
+ * user has merged PRs into has real relationship context. Worth the
25
+ * higher boost.
26
+ * - Language match is broad and easy to satisfy. Lower weight.
27
+ */
28
+ export declare const REPO_BOOST = 20;
29
+ export declare const LANGUAGE_BOOST = 10;
30
+ /**
31
+ * Annotate each candidate with `boostScore` and `boostReasons` based on
32
+ * the caller-supplied preference lists. Mutates the array in place; the
33
+ * caller is responsible for re-sorting afterwards.
34
+ *
35
+ * Mutation (rather than returning new objects) keeps the personalization
36
+ * step a single linear pass over the array the caller already holds —
37
+ * the sort step reads back from the same objects.
38
+ *
39
+ * No-op when both preference lists are empty or undefined: candidates
40
+ * retain `boostScore: undefined` and the sort tier collapses to 0.
41
+ */
42
+ export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): void;
43
+ /**
44
+ * Apply a diversity-counterweight pass over a pre-sorted candidate list
45
+ * (#1244). Returns the first `maxResults` picks in priority order:
46
+ *
47
+ * 1. Main slots: `maxResults - floor(maxResults * diversityRatio)`
48
+ * top candidates from the input. Personalization-biased candidates
49
+ * win these slots when present (since the input is already sorted
50
+ * by the personalization tier).
51
+ * 2. Diversity slots: the highest-ranked candidates that carry NO
52
+ * `boostScore` — i.e. they matched neither `preferLanguages` nor
53
+ * `preferRepos`. Tagged with `diversitySlot: true` for caller
54
+ * transparency.
55
+ * 3. Top-up: if the diversity pool was thinner than the reserve, fall
56
+ * back to the remaining sorted candidates so the user gets
57
+ * `maxResults` slots whenever the source has enough material.
58
+ *
59
+ * `diversityRatio` is clamped to [0, 1]. 0 is a no-op (just slices the
60
+ * input). 1 means every slot is a diversity slot — useful for
61
+ * deliberately suppressing personalization without disabling it.
62
+ *
63
+ * @param candidates Pre-sorted candidate list (output of issue-discovery)
64
+ * @param maxResults Total slots to fill
65
+ * @param diversityRatio Fraction of slots reserved for unboosted candidates
66
+ */
67
+ export declare function applyDiversityRatio(candidates: IssueCandidate[], maxResults: number, diversityRatio: number): IssueCandidate[];
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Personalization signals for search ranking (#1244).
3
+ *
4
+ * Two passes:
5
+ *
6
+ * - `annotateBoost` translates `preferLanguages` / `preferRepos`
7
+ * into a soft `boostScore` consumed by issue-discovery's final
8
+ * sort tier between `recommendation` and `viabilityScore`.
9
+ * - `applyDiversityRatio` reserves a fraction of the final slot
10
+ * budget for candidates that matched no preference, counterweighting
11
+ * echo-chamber bias as recommendations accumulate over time.
12
+ *
13
+ * Still out of scope for #1244: `boostIssueTypes`, `avoidRepos`, and
14
+ * render-time annotation of `boostReasons` / `diversitySlot` in the CLI
15
+ * non-JSON output. Those follow up in separate PRs.
16
+ */
17
+ /**
18
+ * Boost weights. Tuned conservatively so personalization tips equally-
19
+ * scored candidates without drowning out high-viability normal results.
20
+ *
21
+ * Rationale:
22
+ * - Repo affinity is the strongest signal — a candidate in a repo the
23
+ * user has merged PRs into has real relationship context. Worth the
24
+ * higher boost.
25
+ * - Language match is broad and easy to satisfy. Lower weight.
26
+ */
27
+ export const REPO_BOOST = 20;
28
+ export const LANGUAGE_BOOST = 10;
29
+ /**
30
+ * Annotate each candidate with `boostScore` and `boostReasons` based on
31
+ * the caller-supplied preference lists. Mutates the array in place; the
32
+ * caller is responsible for re-sorting afterwards.
33
+ *
34
+ * Mutation (rather than returning new objects) keeps the personalization
35
+ * step a single linear pass over the array the caller already holds —
36
+ * the sort step reads back from the same objects.
37
+ *
38
+ * No-op when both preference lists are empty or undefined: candidates
39
+ * retain `boostScore: undefined` and the sort tier collapses to 0.
40
+ */
41
+ export function annotateBoost(candidates, preferLanguages, preferRepos) {
42
+ const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
43
+ const repoSet = new Set((preferRepos ?? []).map((r) => r.trim()).filter(Boolean));
44
+ if (langSet.size === 0 && repoSet.size === 0)
45
+ return;
46
+ for (const c of candidates) {
47
+ let score = 0;
48
+ const reasons = [];
49
+ if (repoSet.size > 0 && repoSet.has(c.issue.repo)) {
50
+ score += REPO_BOOST;
51
+ reasons.push(`repo affinity: ${c.issue.repo}`);
52
+ }
53
+ const lang = c.projectHealth.language;
54
+ if (langSet.size > 0 && lang && langSet.has(lang.toLowerCase())) {
55
+ score += LANGUAGE_BOOST;
56
+ reasons.push(`language match: ${lang}`);
57
+ }
58
+ if (score > 0) {
59
+ c.boostScore = score;
60
+ c.boostReasons = reasons;
61
+ }
62
+ }
63
+ }
64
+ /**
65
+ * Apply a diversity-counterweight pass over a pre-sorted candidate list
66
+ * (#1244). Returns the first `maxResults` picks in priority order:
67
+ *
68
+ * 1. Main slots: `maxResults - floor(maxResults * diversityRatio)`
69
+ * top candidates from the input. Personalization-biased candidates
70
+ * win these slots when present (since the input is already sorted
71
+ * by the personalization tier).
72
+ * 2. Diversity slots: the highest-ranked candidates that carry NO
73
+ * `boostScore` — i.e. they matched neither `preferLanguages` nor
74
+ * `preferRepos`. Tagged with `diversitySlot: true` for caller
75
+ * transparency.
76
+ * 3. Top-up: if the diversity pool was thinner than the reserve, fall
77
+ * back to the remaining sorted candidates so the user gets
78
+ * `maxResults` slots whenever the source has enough material.
79
+ *
80
+ * `diversityRatio` is clamped to [0, 1]. 0 is a no-op (just slices the
81
+ * input). 1 means every slot is a diversity slot — useful for
82
+ * deliberately suppressing personalization without disabling it.
83
+ *
84
+ * @param candidates Pre-sorted candidate list (output of issue-discovery)
85
+ * @param maxResults Total slots to fill
86
+ * @param diversityRatio Fraction of slots reserved for unboosted candidates
87
+ */
88
+ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
89
+ if (maxResults <= 0)
90
+ return [];
91
+ const ratio = Math.max(0, Math.min(1, diversityRatio));
92
+ if (ratio === 0)
93
+ return candidates.slice(0, maxResults);
94
+ const diversityReserve = Math.min(Math.floor(maxResults * ratio), maxResults);
95
+ if (diversityReserve === 0)
96
+ return candidates.slice(0, maxResults);
97
+ const mainBudget = maxResults - diversityReserve;
98
+ const picks = [];
99
+ const seen = new Set();
100
+ for (const c of candidates) {
101
+ if (picks.length >= mainBudget)
102
+ break;
103
+ picks.push(c);
104
+ seen.add(c.issue.url);
105
+ }
106
+ for (const c of candidates) {
107
+ if (picks.length >= maxResults)
108
+ break;
109
+ if (seen.has(c.issue.url))
110
+ continue;
111
+ if (c.boostScore && c.boostScore > 0)
112
+ continue;
113
+ c.diversitySlot = true;
114
+ picks.push(c);
115
+ seen.add(c.issue.url);
116
+ }
117
+ for (const c of candidates) {
118
+ if (picks.length >= maxResults)
119
+ break;
120
+ if (seen.has(c.issue.url))
121
+ continue;
122
+ picks.push(c);
123
+ seen.add(c.issue.url);
124
+ }
125
+ return picks;
126
+ }
@@ -53,6 +53,27 @@ export interface IssueCandidate {
53
53
  reasonsToApprove: string[];
54
54
  viabilityScore: number;
55
55
  searchPriority: SearchPriority;
56
+ /**
57
+ * Personalization sort tier (#1244). Populated only when the caller
58
+ * passes `preferLanguages` / `preferRepos` to `search()` *and* the
59
+ * candidate matches at least one. Affects sort order between the
60
+ * `recommendation` tier and `viabilityScore`; never used as a filter.
61
+ */
62
+ boostScore?: number;
63
+ /**
64
+ * Human-readable reasons the candidate matched personalization bias
65
+ * (#1244). Mirrors `reasonsToApprove`/`reasonsToSkip` shape for
66
+ * symmetry with the existing surface.
67
+ */
68
+ boostReasons?: string[];
69
+ /**
70
+ * Marks a candidate that filled a reserved diversity slot (#1244).
71
+ * Populated only when `diversityRatio > 0` was passed AND the
72
+ * candidate matched no personalization bias. Mutually exclusive with
73
+ * a non-zero `boostScore` (a candidate cannot be both biased-toward
74
+ * and a diversity slot in the same result set).
75
+ */
76
+ diversitySlot?: boolean;
56
77
  }
57
78
  /** Subset of RepoScore fields that callers may update. */
58
79
  export interface RepoScoreUpdate {
@@ -122,6 +143,31 @@ export type ScoutConfig = {
122
143
  export interface SearchOptions {
123
144
  maxResults?: number;
124
145
  strategies?: SearchStrategy[];
146
+ /**
147
+ * Per-call personalization bias: candidates whose repo language matches
148
+ * one of these (case-insensitive) get a soft sort boost above
149
+ * equally-recommended non-matches (#1244). Does not filter results, does
150
+ * not change `viabilityScore`. Empty / undefined disables the boost.
151
+ */
152
+ preferLanguages?: string[];
153
+ /**
154
+ * Per-call personalization bias: candidates in one of these
155
+ * `owner/repo` slugs get a soft sort boost above equally-recommended
156
+ * non-matches (#1244). Stronger weight than language match. Does not
157
+ * filter results, does not change `viabilityScore`. Empty / undefined
158
+ * disables the boost.
159
+ */
160
+ preferRepos?: string[];
161
+ /**
162
+ * Counterweight against echo-chamber bias as `preferLanguages` /
163
+ * `preferRepos` boosts accumulate over time (#1244). A value of 0.2
164
+ * means "reserve roughly 20% of the final slots for candidates that
165
+ * matched NEITHER preference list," filling them from the same sorted
166
+ * pool but skipping any candidate carrying a `boostScore`. 0 disables
167
+ * the counterweight; 1 makes every slot a diversity slot. Range
168
+ * clamped to [0, 1].
169
+ */
170
+ diversityRatio?: number;
125
171
  }
126
172
  /** Result of a search operation. */
127
173
  export interface SearchResult {
package/dist/scout.js CHANGED
@@ -148,6 +148,9 @@ export class OssScout {
148
148
  maxResults: options?.maxResults,
149
149
  strategies: options?.strategies,
150
150
  skippedUrls,
151
+ preferLanguages: options?.preferLanguages,
152
+ preferRepos: options?.preferRepos,
153
+ diversityRatio: options?.diversityRatio,
151
154
  });
152
155
  this.state.lastSearchAt = new Date().toISOString();
153
156
  this.dirty = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oss-scout/core",
3
- "version": "0.9.1",
3
+ "version": "0.11.0",
4
4
  "description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
5
5
  "type": "module",
6
6
  "bin": {