@oss-scout/core 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +31 -31
- package/dist/cli.js +11 -0
- package/dist/commands/search.d.ts +7 -0
- package/dist/commands/search.js +2 -0
- package/dist/core/issue-discovery.d.ts +1 -0
- package/dist/core/issue-discovery.js +8 -3
- package/dist/core/personalization.d.ts +37 -9
- package/dist/core/personalization.js +75 -9
- package/dist/core/types.d.ts +18 -0
- package/dist/scout.js +1 -0
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -91,6 +91,7 @@ program
|
|
|
91
91
|
.option("--strategy <strategies>", `Search strategies (${CONCRETE_STRATEGIES.join(",")},all)`, "all")
|
|
92
92
|
.option("--prefer-languages <list>", "Comma-separated languages to soft-boost in ranking (#1244). Candidates whose repo language matches sort above equally-recommended non-matches. Does not filter results.")
|
|
93
93
|
.option("--prefer-repos <list>", "Comma-separated `owner/repo` slugs to soft-boost in ranking (#1244). Stronger weight than language match. Does not filter results.")
|
|
94
|
+
.option("--diversity-ratio <n>", "Fraction of result slots (0-1) reserved for candidates that matched NEITHER preference list (#1244). Counterweights echo-chamber bias as boosts accumulate. Default 0 (disabled).")
|
|
94
95
|
.action(async (count, options) => {
|
|
95
96
|
try {
|
|
96
97
|
if (!hasLocalState()) {
|
|
@@ -135,12 +136,22 @@ program
|
|
|
135
136
|
.filter(Boolean);
|
|
136
137
|
return parts.length > 0 ? parts : undefined;
|
|
137
138
|
};
|
|
139
|
+
let diversityRatio;
|
|
140
|
+
if (options.diversityRatio !== undefined) {
|
|
141
|
+
const parsed = Number(options.diversityRatio);
|
|
142
|
+
if (!Number.isFinite(parsed) || parsed < 0 || parsed > 1) {
|
|
143
|
+
console.error(`Error: --diversity-ratio must be a number in [0, 1] (got "${options.diversityRatio}")`);
|
|
144
|
+
process.exit(1);
|
|
145
|
+
}
|
|
146
|
+
diversityRatio = parsed;
|
|
147
|
+
}
|
|
138
148
|
const results = await runSearch({
|
|
139
149
|
maxResults,
|
|
140
150
|
state,
|
|
141
151
|
strategies,
|
|
142
152
|
preferLanguages: splitCsv(options.preferLanguages),
|
|
143
153
|
preferRepos: splitCsv(options.preferRepos),
|
|
154
|
+
diversityRatio,
|
|
144
155
|
});
|
|
145
156
|
if (options.json) {
|
|
146
157
|
console.log(formatJsonSuccess(results));
|
|
@@ -45,6 +45,11 @@ export interface SearchOutput {
|
|
|
45
45
|
*/
|
|
46
46
|
boostScore?: number;
|
|
47
47
|
boostReasons?: string[];
|
|
48
|
+
/**
|
|
49
|
+
* Marks a candidate that filled a reserved diversity slot (#1244).
|
|
50
|
+
* Mutually exclusive with a non-zero `boostScore`.
|
|
51
|
+
*/
|
|
52
|
+
diversitySlot?: boolean;
|
|
48
53
|
}>;
|
|
49
54
|
excludedRepos: string[];
|
|
50
55
|
aiPolicyBlocklist: string[];
|
|
@@ -59,6 +64,8 @@ interface SearchCommandOptions {
|
|
|
59
64
|
preferLanguages?: string[];
|
|
60
65
|
/** Soft sort boost for candidates in these `owner/repo` slugs (#1244). */
|
|
61
66
|
preferRepos?: string[];
|
|
67
|
+
/** Diversity counterweight: fraction of slots reserved for unboosted candidates (#1244). */
|
|
68
|
+
diversityRatio?: number;
|
|
62
69
|
}
|
|
63
70
|
export declare function runSearch(options: SearchCommandOptions): Promise<SearchOutput>;
|
|
64
71
|
export {};
|
package/dist/commands/search.js
CHANGED
|
@@ -19,6 +19,7 @@ export async function runSearch(options) {
|
|
|
19
19
|
strategies: options.strategies,
|
|
20
20
|
preferLanguages: options.preferLanguages,
|
|
21
21
|
preferRepos: options.preferRepos,
|
|
22
|
+
diversityRatio: options.diversityRatio,
|
|
22
23
|
});
|
|
23
24
|
// Persist results to local state and gist
|
|
24
25
|
scout.saveResults(result.candidates);
|
|
@@ -64,6 +65,7 @@ export async function runSearch(options) {
|
|
|
64
65
|
: undefined,
|
|
65
66
|
boostScore: c.boostScore,
|
|
66
67
|
boostReasons: c.boostReasons,
|
|
68
|
+
diversitySlot: c.diversitySlot,
|
|
67
69
|
};
|
|
68
70
|
}),
|
|
69
71
|
excludedRepos: result.excludedRepos,
|
|
@@ -22,7 +22,7 @@ import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
|
|
|
22
22
|
import { IssueVetter } from "./issue-vetting.js";
|
|
23
23
|
import { getTopicsForCategories } from "./category-mapping.js";
|
|
24
24
|
import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
|
|
25
|
-
import { annotateBoost } from "./personalization.js";
|
|
25
|
+
import { annotateBoost, applyDiversityRatio } from "./personalization.js";
|
|
26
26
|
const MODULE = "issue-discovery";
|
|
27
27
|
/** If remaining search quota is below this, skip heavy phases (2, 3). */
|
|
28
28
|
const LOW_BUDGET_THRESHOLD = 20;
|
|
@@ -517,8 +517,13 @@ export class IssueDiscovery {
|
|
|
517
517
|
return b.viabilityScore - a.viabilityScore;
|
|
518
518
|
});
|
|
519
519
|
const capped = applyPerRepoCap(allCandidates, 2);
|
|
520
|
-
|
|
521
|
-
|
|
520
|
+
// Diversity counterweight (#1244): when `diversityRatio > 0`, reserve
|
|
521
|
+
// a fraction of the final slots for candidates that matched neither
|
|
522
|
+
// preference list. No-op when the ratio is 0 or absent — collapses to
|
|
523
|
+
// the original `slice(0, maxResults)` behavior.
|
|
524
|
+
const finalPicks = applyDiversityRatio(capped, maxResults, options.diversityRatio ?? 0);
|
|
525
|
+
info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${finalPicks.length} candidates returned`);
|
|
526
|
+
return { candidates: finalPicks, strategiesUsed };
|
|
522
527
|
}
|
|
523
528
|
/**
|
|
524
529
|
* Vet a specific issue for claimability and project health.
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Personalization signals for search ranking (#1244).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
4
|
+
* Two passes:
|
|
5
|
+
*
|
|
6
|
+
* - `annotateBoost` translates `preferLanguages` / `preferRepos`
|
|
7
|
+
* into a soft `boostScore` consumed by issue-discovery's final
|
|
8
|
+
* sort tier between `recommendation` and `viabilityScore`.
|
|
9
|
+
* - `applyDiversityRatio` reserves a fraction of the final slot
|
|
10
|
+
* budget for candidates that matched no preference, counterweighting
|
|
11
|
+
* echo-chamber bias as recommendations accumulate over time.
|
|
12
|
+
*
|
|
13
|
+
* Still out of scope for #1244: `boostIssueTypes`, `avoidRepos`, and
|
|
14
|
+
* render-time annotation of `boostReasons` / `diversitySlot` in the CLI
|
|
15
|
+
* non-JSON output. Those follow up in separate PRs.
|
|
13
16
|
*/
|
|
14
17
|
import type { IssueCandidate } from "./types.js";
|
|
15
18
|
/**
|
|
@@ -37,3 +40,28 @@ export declare const LANGUAGE_BOOST = 10;
|
|
|
37
40
|
* retain `boostScore: undefined` and the sort tier collapses to 0.
|
|
38
41
|
*/
|
|
39
42
|
export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): void;
|
|
43
|
+
/**
|
|
44
|
+
* Apply a diversity-counterweight pass over a pre-sorted candidate list
|
|
45
|
+
* (#1244). Returns the first `maxResults` picks in priority order:
|
|
46
|
+
*
|
|
47
|
+
* 1. Main slots: `maxResults - floor(maxResults * diversityRatio)`
|
|
48
|
+
* top candidates from the input. Personalization-biased candidates
|
|
49
|
+
* win these slots when present (since the input is already sorted
|
|
50
|
+
* by the personalization tier).
|
|
51
|
+
* 2. Diversity slots: the highest-ranked candidates that carry NO
|
|
52
|
+
* `boostScore` — i.e. they matched neither `preferLanguages` nor
|
|
53
|
+
* `preferRepos`. Tagged with `diversitySlot: true` for caller
|
|
54
|
+
* transparency.
|
|
55
|
+
* 3. Top-up: if the diversity pool was thinner than the reserve, fall
|
|
56
|
+
* back to the remaining sorted candidates so the user gets
|
|
57
|
+
* `maxResults` slots whenever the source has enough material.
|
|
58
|
+
*
|
|
59
|
+
* `diversityRatio` is clamped to [0, 1]. 0 is a no-op (just slices the
|
|
60
|
+
* input). 1 means every slot is a diversity slot — useful for
|
|
61
|
+
* deliberately suppressing personalization without disabling it.
|
|
62
|
+
*
|
|
63
|
+
* @param candidates Pre-sorted candidate list (output of issue-discovery)
|
|
64
|
+
* @param maxResults Total slots to fill
|
|
65
|
+
* @param diversityRatio Fraction of slots reserved for unboosted candidates
|
|
66
|
+
*/
|
|
67
|
+
export declare function applyDiversityRatio(candidates: IssueCandidate[], maxResults: number, diversityRatio: number): IssueCandidate[];
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Personalization signals for search ranking (#1244).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
4
|
+
* Two passes:
|
|
5
|
+
*
|
|
6
|
+
* - `annotateBoost` translates `preferLanguages` / `preferRepos`
|
|
7
|
+
* into a soft `boostScore` consumed by issue-discovery's final
|
|
8
|
+
* sort tier between `recommendation` and `viabilityScore`.
|
|
9
|
+
* - `applyDiversityRatio` reserves a fraction of the final slot
|
|
10
|
+
* budget for candidates that matched no preference, counterweighting
|
|
11
|
+
* echo-chamber bias as recommendations accumulate over time.
|
|
12
|
+
*
|
|
13
|
+
* Still out of scope for #1244: `boostIssueTypes`, `avoidRepos`, and
|
|
14
|
+
* render-time annotation of `boostReasons` / `diversitySlot` in the CLI
|
|
15
|
+
* non-JSON output. Those follow up in separate PRs.
|
|
13
16
|
*/
|
|
14
17
|
/**
|
|
15
18
|
* Boost weights. Tuned conservatively so personalization tips equally-
|
|
@@ -58,3 +61,66 @@ export function annotateBoost(candidates, preferLanguages, preferRepos) {
|
|
|
58
61
|
}
|
|
59
62
|
}
|
|
60
63
|
}
|
|
64
|
+
/**
|
|
65
|
+
* Apply a diversity-counterweight pass over a pre-sorted candidate list
|
|
66
|
+
* (#1244). Returns the first `maxResults` picks in priority order:
|
|
67
|
+
*
|
|
68
|
+
* 1. Main slots: `maxResults - floor(maxResults * diversityRatio)`
|
|
69
|
+
* top candidates from the input. Personalization-biased candidates
|
|
70
|
+
* win these slots when present (since the input is already sorted
|
|
71
|
+
* by the personalization tier).
|
|
72
|
+
* 2. Diversity slots: the highest-ranked candidates that carry NO
|
|
73
|
+
* `boostScore` — i.e. they matched neither `preferLanguages` nor
|
|
74
|
+
* `preferRepos`. Tagged with `diversitySlot: true` for caller
|
|
75
|
+
* transparency.
|
|
76
|
+
* 3. Top-up: if the diversity pool was thinner than the reserve, fall
|
|
77
|
+
* back to the remaining sorted candidates so the user gets
|
|
78
|
+
* `maxResults` slots whenever the source has enough material.
|
|
79
|
+
*
|
|
80
|
+
* `diversityRatio` is clamped to [0, 1]. 0 is a no-op (just slices the
|
|
81
|
+
* input). 1 means every slot is a diversity slot — useful for
|
|
82
|
+
* deliberately suppressing personalization without disabling it.
|
|
83
|
+
*
|
|
84
|
+
* @param candidates Pre-sorted candidate list (output of issue-discovery)
|
|
85
|
+
* @param maxResults Total slots to fill
|
|
86
|
+
* @param diversityRatio Fraction of slots reserved for unboosted candidates
|
|
87
|
+
*/
|
|
88
|
+
export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
|
|
89
|
+
if (maxResults <= 0)
|
|
90
|
+
return [];
|
|
91
|
+
const ratio = Math.max(0, Math.min(1, diversityRatio));
|
|
92
|
+
if (ratio === 0)
|
|
93
|
+
return candidates.slice(0, maxResults);
|
|
94
|
+
const diversityReserve = Math.min(Math.floor(maxResults * ratio), maxResults);
|
|
95
|
+
if (diversityReserve === 0)
|
|
96
|
+
return candidates.slice(0, maxResults);
|
|
97
|
+
const mainBudget = maxResults - diversityReserve;
|
|
98
|
+
const picks = [];
|
|
99
|
+
const seen = new Set();
|
|
100
|
+
for (const c of candidates) {
|
|
101
|
+
if (picks.length >= mainBudget)
|
|
102
|
+
break;
|
|
103
|
+
picks.push(c);
|
|
104
|
+
seen.add(c.issue.url);
|
|
105
|
+
}
|
|
106
|
+
for (const c of candidates) {
|
|
107
|
+
if (picks.length >= maxResults)
|
|
108
|
+
break;
|
|
109
|
+
if (seen.has(c.issue.url))
|
|
110
|
+
continue;
|
|
111
|
+
if (c.boostScore && c.boostScore > 0)
|
|
112
|
+
continue;
|
|
113
|
+
c.diversitySlot = true;
|
|
114
|
+
picks.push(c);
|
|
115
|
+
seen.add(c.issue.url);
|
|
116
|
+
}
|
|
117
|
+
for (const c of candidates) {
|
|
118
|
+
if (picks.length >= maxResults)
|
|
119
|
+
break;
|
|
120
|
+
if (seen.has(c.issue.url))
|
|
121
|
+
continue;
|
|
122
|
+
picks.push(c);
|
|
123
|
+
seen.add(c.issue.url);
|
|
124
|
+
}
|
|
125
|
+
return picks;
|
|
126
|
+
}
|
package/dist/core/types.d.ts
CHANGED
|
@@ -66,6 +66,14 @@ export interface IssueCandidate {
|
|
|
66
66
|
* symmetry with the existing surface.
|
|
67
67
|
*/
|
|
68
68
|
boostReasons?: string[];
|
|
69
|
+
/**
|
|
70
|
+
* Marks a candidate that filled a reserved diversity slot (#1244).
|
|
71
|
+
* Populated only when `diversityRatio > 0` was passed AND the
|
|
72
|
+
* candidate matched no personalization bias. Mutually exclusive with
|
|
73
|
+
* a non-zero `boostScore` (a candidate cannot be both biased-toward
|
|
74
|
+
* and a diversity slot in the same result set).
|
|
75
|
+
*/
|
|
76
|
+
diversitySlot?: boolean;
|
|
69
77
|
}
|
|
70
78
|
/** Subset of RepoScore fields that callers may update. */
|
|
71
79
|
export interface RepoScoreUpdate {
|
|
@@ -150,6 +158,16 @@ export interface SearchOptions {
|
|
|
150
158
|
* disables the boost.
|
|
151
159
|
*/
|
|
152
160
|
preferRepos?: string[];
|
|
161
|
+
/**
|
|
162
|
+
* Counterweight against echo-chamber bias as `preferLanguages` /
|
|
163
|
+
* `preferRepos` boosts accumulate over time (#1244). A value of 0.2
|
|
164
|
+
* means "reserve roughly 20% of the final slots for candidates that
|
|
165
|
+
* matched NEITHER preference list," filling them from the same sorted
|
|
166
|
+
* pool but skipping any candidate carrying a `boostScore`. 0 disables
|
|
167
|
+
* the counterweight; 1 makes every slot a diversity slot. Range
|
|
168
|
+
* clamped to [0, 1].
|
|
169
|
+
*/
|
|
170
|
+
diversityRatio?: number;
|
|
153
171
|
}
|
|
154
172
|
/** Result of a search operation. */
|
|
155
173
|
export interface SearchResult {
|
package/dist/scout.js
CHANGED
|
@@ -150,6 +150,7 @@ export class OssScout {
|
|
|
150
150
|
skippedUrls,
|
|
151
151
|
preferLanguages: options?.preferLanguages,
|
|
152
152
|
preferRepos: options?.preferRepos,
|
|
153
|
+
diversityRatio: options?.diversityRatio,
|
|
153
154
|
});
|
|
154
155
|
this.state.lastSearchAt = new Date().toISOString();
|
|
155
156
|
this.dirty = true;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oss-scout/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.11.0",
|
|
4
4
|
"description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|