@oss-scout/core 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -91,6 +91,7 @@ program
91
91
  .option("--strategy <strategies>", `Search strategies (${CONCRETE_STRATEGIES.join(",")},all)`, "all")
92
92
  .option("--prefer-languages <list>", "Comma-separated languages to soft-boost in ranking (#1244). Candidates whose repo language matches sort above equally-recommended non-matches. Does not filter results.")
93
93
  .option("--prefer-repos <list>", "Comma-separated `owner/repo` slugs to soft-boost in ranking (#1244). Stronger weight than language match. Does not filter results.")
94
+ .option("--diversity-ratio <n>", "Fraction of result slots (0-1) reserved for candidates that matched NEITHER preference list (#1244). Counterweights echo-chamber bias as boosts accumulate. Default 0 (disabled).")
94
95
  .action(async (count, options) => {
95
96
  try {
96
97
  if (!hasLocalState()) {
@@ -135,12 +136,22 @@ program
135
136
  .filter(Boolean);
136
137
  return parts.length > 0 ? parts : undefined;
137
138
  };
139
+ let diversityRatio;
140
+ if (options.diversityRatio !== undefined) {
141
+ const parsed = Number(options.diversityRatio);
142
+ if (!Number.isFinite(parsed) || parsed < 0 || parsed > 1) {
143
+ console.error(`Error: --diversity-ratio must be a number in [0, 1] (got "${options.diversityRatio}")`);
144
+ process.exit(1);
145
+ }
146
+ diversityRatio = parsed;
147
+ }
138
148
  const results = await runSearch({
139
149
  maxResults,
140
150
  state,
141
151
  strategies,
142
152
  preferLanguages: splitCsv(options.preferLanguages),
143
153
  preferRepos: splitCsv(options.preferRepos),
154
+ diversityRatio,
144
155
  });
145
156
  if (options.json) {
146
157
  console.log(formatJsonSuccess(results));
@@ -45,6 +45,11 @@ export interface SearchOutput {
45
45
  */
46
46
  boostScore?: number;
47
47
  boostReasons?: string[];
48
+ /**
49
+ * Marks a candidate that filled a reserved diversity slot (#1244).
50
+ * Mutually exclusive with a non-zero `boostScore`.
51
+ */
52
+ diversitySlot?: boolean;
48
53
  }>;
49
54
  excludedRepos: string[];
50
55
  aiPolicyBlocklist: string[];
@@ -59,6 +64,8 @@ interface SearchCommandOptions {
59
64
  preferLanguages?: string[];
60
65
  /** Soft sort boost for candidates in these `owner/repo` slugs (#1244). */
61
66
  preferRepos?: string[];
67
+ /** Diversity counterweight: fraction of slots reserved for unboosted candidates (#1244). */
68
+ diversityRatio?: number;
62
69
  }
63
70
  export declare function runSearch(options: SearchCommandOptions): Promise<SearchOutput>;
64
71
  export {};
@@ -19,6 +19,7 @@ export async function runSearch(options) {
19
19
  strategies: options.strategies,
20
20
  preferLanguages: options.preferLanguages,
21
21
  preferRepos: options.preferRepos,
22
+ diversityRatio: options.diversityRatio,
22
23
  });
23
24
  // Persist results to local state and gist
24
25
  scout.saveResults(result.candidates);
@@ -64,6 +65,7 @@ export async function runSearch(options) {
64
65
  : undefined,
65
66
  boostScore: c.boostScore,
66
67
  boostReasons: c.boostReasons,
68
+ diversitySlot: c.diversitySlot,
67
69
  };
68
70
  }),
69
71
  excludedRepos: result.excludedRepos,
@@ -76,6 +76,7 @@ export declare class IssueDiscovery {
76
76
  skippedUrls?: Set<string>;
77
77
  preferLanguages?: string[];
78
78
  preferRepos?: string[];
79
+ diversityRatio?: number;
79
80
  }): Promise<{
80
81
  candidates: IssueCandidate[];
81
82
  strategiesUsed: SearchStrategy[];
@@ -22,7 +22,7 @@ import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
22
22
  import { IssueVetter } from "./issue-vetting.js";
23
23
  import { getTopicsForCategories } from "./category-mapping.js";
24
24
  import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
25
- import { annotateBoost } from "./personalization.js";
25
+ import { annotateBoost, applyDiversityRatio } from "./personalization.js";
26
26
  const MODULE = "issue-discovery";
27
27
  /** If remaining search quota is below this, skip heavy phases (2, 3). */
28
28
  const LOW_BUDGET_THRESHOLD = 20;
@@ -517,8 +517,13 @@ export class IssueDiscovery {
517
517
  return b.viabilityScore - a.viabilityScore;
518
518
  });
519
519
  const capped = applyPerRepoCap(allCandidates, 2);
520
- info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${capped.length} candidates returned`);
521
- return { candidates: capped.slice(0, maxResults), strategiesUsed };
520
+ // Diversity counterweight (#1244): when `diversityRatio > 0`, reserve
521
+ // a fraction of the final slots for candidates that matched neither
522
+ // preference list. No-op when the ratio is 0 or absent — collapses to
523
+ // the original `slice(0, maxResults)` behavior.
524
+ const finalPicks = applyDiversityRatio(capped, maxResults, options.diversityRatio ?? 0);
525
+ info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${finalPicks.length} candidates returned`);
526
+ return { candidates: finalPicks, strategiesUsed };
522
527
  }
523
528
  /**
524
529
  * Vet a specific issue for claimability and project health.
@@ -1,15 +1,18 @@
1
1
  /**
2
2
  * Personalization signals for search ranking (#1244).
3
3
  *
4
- * Translates caller-supplied `preferLanguages` / `preferRepos` lists
5
- * into a soft `boostScore` on each `IssueCandidate`. The final search
6
- * sort consults this score between the `recommendation` tier and the
7
- * raw `viabilityScore`, so personalization reorders ties without
8
- * changing which candidates pass vetting.
9
- *
10
- * This is the minimum-viable subset of Option A in #1244: only language
11
- * and repo bias, no `boostIssueTypes` / `avoidRepos` / `diversityRatio`
12
- * yet. Those follow up in separate PRs.
4
+ * Two passes:
5
+ *
6
+ * - `annotateBoost` translates `preferLanguages` / `preferRepos`
7
+ * into a soft `boostScore` consumed by issue-discovery's final
8
+ * sort tier between `recommendation` and `viabilityScore`.
9
+ * - `applyDiversityRatio` reserves a fraction of the final slot
10
+ * budget for candidates that matched no preference, counterweighting
11
+ * echo-chamber bias as recommendations accumulate over time.
12
+ *
13
+ * Still out of scope for #1244: `boostIssueTypes`, `avoidRepos`, and
14
+ * render-time annotation of `boostReasons` / `diversitySlot` in the CLI
15
+ * non-JSON output. Those follow up in separate PRs.
13
16
  */
14
17
  import type { IssueCandidate } from "./types.js";
15
18
  /**
@@ -37,3 +40,28 @@ export declare const LANGUAGE_BOOST = 10;
37
40
  * retain `boostScore: undefined` and the sort tier collapses to 0.
38
41
  */
39
42
  export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): void;
43
+ /**
44
+ * Apply a diversity-counterweight pass over a pre-sorted candidate list
45
+ * (#1244). Returns the first `maxResults` picks in priority order:
46
+ *
47
+ * 1. Main slots: `maxResults - floor(maxResults * diversityRatio)`
48
+ * top candidates from the input. Personalization-biased candidates
49
+ * win these slots when present (since the input is already sorted
50
+ * by the personalization tier).
51
+ * 2. Diversity slots: the highest-ranked candidates that carry NO
52
+ * `boostScore` — i.e. they matched neither `preferLanguages` nor
53
+ * `preferRepos`. Tagged with `diversitySlot: true` for caller
54
+ * transparency.
55
+ * 3. Top-up: if the diversity pool was thinner than the reserve, fall
56
+ * back to the remaining sorted candidates so the user gets
57
+ * `maxResults` slots whenever the source has enough material.
58
+ *
59
+ * `diversityRatio` is clamped to [0, 1]. 0 is a no-op (just slices the
60
+ * input). 1 means every slot is a diversity slot — useful for
61
+ * deliberately suppressing personalization without disabling it.
62
+ *
63
+ * @param candidates Pre-sorted candidate list (output of issue-discovery)
64
+ * @param maxResults Total slots to fill
65
+ * @param diversityRatio Fraction of slots reserved for unboosted candidates
66
+ */
67
+ export declare function applyDiversityRatio(candidates: IssueCandidate[], maxResults: number, diversityRatio: number): IssueCandidate[];
@@ -1,15 +1,18 @@
1
1
  /**
2
2
  * Personalization signals for search ranking (#1244).
3
3
  *
4
- * Translates caller-supplied `preferLanguages` / `preferRepos` lists
5
- * into a soft `boostScore` on each `IssueCandidate`. The final search
6
- * sort consults this score between the `recommendation` tier and the
7
- * raw `viabilityScore`, so personalization reorders ties without
8
- * changing which candidates pass vetting.
9
- *
10
- * This is the minimum-viable subset of Option A in #1244: only language
11
- * and repo bias, no `boostIssueTypes` / `avoidRepos` / `diversityRatio`
12
- * yet. Those follow up in separate PRs.
4
+ * Two passes:
5
+ *
6
+ * - `annotateBoost` translates `preferLanguages` / `preferRepos`
7
+ * into a soft `boostScore` consumed by issue-discovery's final
8
+ * sort tier between `recommendation` and `viabilityScore`.
9
+ * - `applyDiversityRatio` reserves a fraction of the final slot
10
+ * budget for candidates that matched no preference, counterweighting
11
+ * echo-chamber bias as recommendations accumulate over time.
12
+ *
13
+ * Still out of scope for #1244: `boostIssueTypes`, `avoidRepos`, and
14
+ * render-time annotation of `boostReasons` / `diversitySlot` in the CLI
15
+ * non-JSON output. Those follow up in separate PRs.
13
16
  */
14
17
  /**
15
18
  * Boost weights. Tuned conservatively so personalization tips equally-
@@ -58,3 +61,66 @@ export function annotateBoost(candidates, preferLanguages, preferRepos) {
58
61
  }
59
62
  }
60
63
  }
64
+ /**
65
+ * Apply a diversity-counterweight pass over a pre-sorted candidate list
66
+ * (#1244). Returns the first `maxResults` picks in priority order:
67
+ *
68
+ * 1. Main slots: `maxResults - floor(maxResults * diversityRatio)`
69
+ * top candidates from the input. Personalization-biased candidates
70
+ * win these slots when present (since the input is already sorted
71
+ * by the personalization tier).
72
+ * 2. Diversity slots: the highest-ranked candidates that carry NO
73
+ * `boostScore` — i.e. they matched neither `preferLanguages` nor
74
+ * `preferRepos`. Tagged with `diversitySlot: true` for caller
75
+ * transparency.
76
+ * 3. Top-up: if the diversity pool was thinner than the reserve, fall
77
+ * back to the remaining sorted candidates so the user gets
78
+ * `maxResults` slots whenever the source has enough material.
79
+ *
80
+ * `diversityRatio` is clamped to [0, 1]. 0 is a no-op (just slices the
81
+ * input). 1 means every slot is a diversity slot — useful for
82
+ * deliberately suppressing personalization without disabling it.
83
+ *
84
+ * @param candidates Pre-sorted candidate list (output of issue-discovery)
85
+ * @param maxResults Total slots to fill
86
+ * @param diversityRatio Fraction of slots reserved for unboosted candidates
87
+ */
88
+ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
89
+ if (maxResults <= 0)
90
+ return [];
91
+ const ratio = Math.max(0, Math.min(1, diversityRatio));
92
+ if (ratio === 0)
93
+ return candidates.slice(0, maxResults);
94
+ const diversityReserve = Math.min(Math.floor(maxResults * ratio), maxResults);
95
+ if (diversityReserve === 0)
96
+ return candidates.slice(0, maxResults);
97
+ const mainBudget = maxResults - diversityReserve;
98
+ const picks = [];
99
+ const seen = new Set();
100
+ for (const c of candidates) {
101
+ if (picks.length >= mainBudget)
102
+ break;
103
+ picks.push(c);
104
+ seen.add(c.issue.url);
105
+ }
106
+ for (const c of candidates) {
107
+ if (picks.length >= maxResults)
108
+ break;
109
+ if (seen.has(c.issue.url))
110
+ continue;
111
+ if (c.boostScore && c.boostScore > 0)
112
+ continue;
113
+ c.diversitySlot = true;
114
+ picks.push(c);
115
+ seen.add(c.issue.url);
116
+ }
117
+ for (const c of candidates) {
118
+ if (picks.length >= maxResults)
119
+ break;
120
+ if (seen.has(c.issue.url))
121
+ continue;
122
+ picks.push(c);
123
+ seen.add(c.issue.url);
124
+ }
125
+ return picks;
126
+ }
@@ -66,6 +66,14 @@ export interface IssueCandidate {
66
66
  * symmetry with the existing surface.
67
67
  */
68
68
  boostReasons?: string[];
69
+ /**
70
+ * Marks a candidate that filled a reserved diversity slot (#1244).
71
+ * Populated only when `diversityRatio > 0` was passed AND the
72
+ * candidate matched no personalization bias. Mutually exclusive with
73
+ * a non-zero `boostScore` (a candidate cannot be both biased-toward
74
+ * and a diversity slot in the same result set).
75
+ */
76
+ diversitySlot?: boolean;
69
77
  }
70
78
  /** Subset of RepoScore fields that callers may update. */
71
79
  export interface RepoScoreUpdate {
@@ -150,6 +158,16 @@ export interface SearchOptions {
150
158
  * disables the boost.
151
159
  */
152
160
  preferRepos?: string[];
161
+ /**
162
+ * Counterweight against echo-chamber bias as `preferLanguages` /
163
+ * `preferRepos` boosts accumulate over time (#1244). A value of 0.2
164
+ * means "reserve roughly 20% of the final slots for candidates that
165
+ * matched NEITHER preference list," filling them from the same sorted
166
+ * pool but skipping any candidate carrying a `boostScore`. 0 disables
167
+ * the counterweight; 1 makes every slot a diversity slot. Range
168
+ * clamped to [0, 1].
169
+ */
170
+ diversityRatio?: number;
153
171
  }
154
172
  /** Result of a search operation. */
155
173
  export interface SearchResult {
package/dist/scout.js CHANGED
@@ -150,6 +150,7 @@ export class OssScout {
150
150
  skippedUrls,
151
151
  preferLanguages: options?.preferLanguages,
152
152
  preferRepos: options?.preferRepos,
153
+ diversityRatio: options?.diversityRatio,
153
154
  });
154
155
  this.state.lastSearchAt = new Date().toISOString();
155
156
  this.dirty = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oss-scout/core",
3
- "version": "0.10.0",
3
+ "version": "0.11.0",
4
4
  "description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
5
5
  "type": "module",
6
6
  "bin": {