@oss-scout/core 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +70 -64
- package/dist/cli.js +19 -129
- package/dist/commands/search.d.ts +4 -0
- package/dist/commands/search.js +2 -0
- package/dist/core/anti-llm-policy.js +3 -30
- package/dist/core/issue-discovery.d.ts +10 -1
- package/dist/core/issue-discovery.js +83 -48
- package/dist/core/issue-eligibility.d.ts +2 -1
- package/dist/core/issue-eligibility.js +6 -3
- package/dist/core/issue-vetting.d.ts +10 -1
- package/dist/core/issue-vetting.js +12 -2
- package/dist/core/personalization.d.ts +27 -12
- package/dist/core/personalization.js +50 -18
- package/dist/core/preference-fields.js +2 -0
- package/dist/core/probe-repo-file.d.ts +47 -0
- package/dist/core/probe-repo-file.js +57 -0
- package/dist/core/repo-health.js +9 -17
- package/dist/core/roadmap.js +11 -21
- package/dist/core/schemas.d.ts +4 -0
- package/dist/core/schemas.js +9 -0
- package/dist/core/search-phases.d.ts +5 -4
- package/dist/core/search-phases.js +12 -9
- package/dist/core/types.d.ts +15 -0
- package/dist/formatters/human.d.ts +60 -0
- package/dist/formatters/human.js +199 -0
- package/dist/scout.d.ts +24 -10
- package/dist/scout.js +29 -14
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -6,6 +6,7 @@ import { Command } from "commander";
|
|
|
6
6
|
import { enableDebug } from "./core/logger.js";
|
|
7
7
|
import { getCLIVersion } from "./core/utils.js";
|
|
8
8
|
import { formatJsonSuccess, formatJsonError } from "./formatters/json.js";
|
|
9
|
+
import { renderSearch, renderFeatures, renderResults, renderVetList, renderVet, RESULTS_EMPTY_MESSAGE, VET_LIST_EMPTY_MESSAGE, } from "./formatters/human.js";
|
|
9
10
|
import { ValidationError, errorMessage, resolveErrorCode, } from "./core/errors.js";
|
|
10
11
|
import { hasLocalState, loadLocalState, saveLocalState, } from "./core/local-state.js";
|
|
11
12
|
import { CONCRETE_STRATEGIES, SearchStrategySchema } from "./core/schemas.js";
|
|
@@ -30,14 +31,6 @@ async function runAction(options, body) {
|
|
|
30
31
|
handleCommandError(err, options);
|
|
31
32
|
}
|
|
32
33
|
}
|
|
33
|
-
/** Emoji for a vetting recommendation, shared by the search and vet renderers. */
|
|
34
|
-
function recommendationIcon(recommendation) {
|
|
35
|
-
if (recommendation === "approve")
|
|
36
|
-
return "✅";
|
|
37
|
-
if (recommendation === "skip")
|
|
38
|
-
return "❌";
|
|
39
|
-
return "⚠️";
|
|
40
|
-
}
|
|
41
34
|
const program = new Command();
|
|
42
35
|
program
|
|
43
36
|
.name("oss-scout")
|
|
@@ -116,6 +109,8 @@ program
|
|
|
116
109
|
.option("--strategy <strategies>", `Search strategies (${CONCRETE_STRATEGIES.join(",")},all). Defaults to the defaultStrategy preference, or all.`)
|
|
117
110
|
.option("--prefer-languages <list>", "Comma-separated languages to soft-boost in ranking (#1244). Candidates whose repo language matches sort above equally-recommended non-matches. Does not filter results.")
|
|
118
111
|
.option("--prefer-repos <list>", "Comma-separated `owner/repo` slugs to soft-boost in ranking (#1244). Stronger weight than language match. Does not filter results.")
|
|
112
|
+
.option("--avoid-repos <list>", "Comma-separated `owner/repo` slugs to soft-penalize in ranking (#168). Milder than excludeRepos: pushes them down but does not filter them out.")
|
|
113
|
+
.option("--boost-issue-types <list>", "Comma-separated issue label types to soft-boost in ranking (#168), case-insensitive (e.g. `bug,good first issue`). Does not filter results.")
|
|
119
114
|
.option("--diversity-ratio <n>", "Fraction of result slots (0-1) reserved for candidates that matched NEITHER preference list (#1244). Counterweights echo-chamber bias as boosts accumulate. Default 0 (disabled).")
|
|
120
115
|
.action(async (count, options) => runAction(options, async () => {
|
|
121
116
|
if (!hasLocalState() && !options.json) {
|
|
@@ -175,6 +170,8 @@ program
|
|
|
175
170
|
strategies,
|
|
176
171
|
preferLanguages: splitCsv(options.preferLanguages),
|
|
177
172
|
preferRepos: splitCsv(options.preferRepos),
|
|
173
|
+
avoidRepos: splitCsv(options.avoidRepos),
|
|
174
|
+
boostIssueTypes: splitCsv(options.boostIssueTypes),
|
|
178
175
|
diversityRatio,
|
|
179
176
|
});
|
|
180
177
|
if (options.json) {
|
|
@@ -182,30 +179,10 @@ program
|
|
|
182
179
|
}
|
|
183
180
|
else {
|
|
184
181
|
// Human-readable output
|
|
185
|
-
console.log(
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
? " (stalled PR, revive opportunity)"
|
|
190
|
-
: "";
|
|
191
|
-
// Personalization tag (#1244). A candidate is either boosted
|
|
192
|
-
// (matched a preference) or a diversity slot (matched none and
|
|
193
|
-
// filled a reserved slot); never both.
|
|
194
|
-
let personalizationTag = "";
|
|
195
|
-
if (c.boostScore && c.boostReasons && c.boostReasons.length > 0) {
|
|
196
|
-
personalizationTag = ` [boosted: ${c.boostReasons.join("; ")}]`;
|
|
197
|
-
}
|
|
198
|
-
else if (c.diversitySlot) {
|
|
199
|
-
personalizationTag = " [diversity slot]";
|
|
200
|
-
}
|
|
201
|
-
console.log(` ${icon} ${c.issue.repo}#${c.issue.number} [${c.viabilityScore}/100]${personalizationTag}${stalledTag}`);
|
|
202
|
-
console.log(` ${c.issue.title}`);
|
|
203
|
-
console.log(` ${c.issue.url}`);
|
|
204
|
-
if (c.repoScore) {
|
|
205
|
-
console.log(` Repo: ${c.repoScore.score}/10, ${c.repoScore.mergedPRCount} merged PRs`);
|
|
206
|
-
}
|
|
207
|
-
console.log();
|
|
208
|
-
}
|
|
182
|
+
console.log(renderSearch(results));
|
|
183
|
+
// Rate-limit warning stays on stderr (NOT folded into the stdout
|
|
184
|
+
// render), so --json stdout purity and the stdout/stderr split are
|
|
185
|
+
// both preserved.
|
|
209
186
|
if (results.rateLimitWarning) {
|
|
210
187
|
console.error(`\n⚠️ ${results.rateLimitWarning}`);
|
|
211
188
|
}
|
|
@@ -252,41 +229,11 @@ program
|
|
|
252
229
|
console.log(formatJsonSuccess(result));
|
|
253
230
|
}
|
|
254
231
|
else {
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
return;
|
|
261
|
-
const headerScope = options.broad
|
|
262
|
-
? "across the ecosystem"
|
|
263
|
-
: "in your anchor repos";
|
|
264
|
-
console.log(`\n🎯 Feature opportunities ${headerScope} (${result.quickWins.length} quick wins + ${result.biggerBets.length} bigger bets)\n`);
|
|
265
|
-
if (!options.broad) {
|
|
266
|
-
console.log(`Anchor repos: ${result.anchorRepos.join(", ")}\n`);
|
|
267
|
-
}
|
|
268
|
-
if (result.quickWins.length) {
|
|
269
|
-
console.log("── Quick wins ─────────────────────────────────────────");
|
|
270
|
-
for (const c of result.quickWins) {
|
|
271
|
-
const stalledTag = c.linkedPR?.isStalled
|
|
272
|
-
? " (stalled PR, revive opportunity)"
|
|
273
|
-
: "";
|
|
274
|
-
console.log(` ${c.issue.repo}#${c.issue.number} [${c.viabilityScore}/100] ${c.issue.title}${stalledTag}`);
|
|
275
|
-
console.log(` ${c.issue.url}`);
|
|
276
|
-
}
|
|
277
|
-
console.log("");
|
|
278
|
-
}
|
|
279
|
-
if (result.biggerBets.length) {
|
|
280
|
-
console.log("── Bigger bets ────────────────────────────────────────");
|
|
281
|
-
for (const c of result.biggerBets) {
|
|
282
|
-
const stalledTag = c.linkedPR?.isStalled
|
|
283
|
-
? " (stalled PR, revive opportunity)"
|
|
284
|
-
: "";
|
|
285
|
-
console.log(` ${c.issue.repo}#${c.issue.number} [${c.viabilityScore}/100] ${c.issue.title}${stalledTag}`);
|
|
286
|
-
console.log(` ${c.issue.url}`);
|
|
287
|
-
}
|
|
288
|
-
console.log("");
|
|
289
|
-
}
|
|
232
|
+
// renderFeatures returns "" only when there is no message AND
|
|
233
|
+
// nothing to list; guard so the caller never logs a blank line.
|
|
234
|
+
const out = renderFeatures(result, { broad: options.broad });
|
|
235
|
+
if (out)
|
|
236
|
+
console.log(out);
|
|
290
237
|
}
|
|
291
238
|
}));
|
|
292
239
|
// ── results command ────────────────────────────────────────────────
|
|
@@ -313,21 +260,10 @@ resultsCmd
|
|
|
313
260
|
return;
|
|
314
261
|
}
|
|
315
262
|
if (results.length === 0) {
|
|
316
|
-
console.log(
|
|
263
|
+
console.log(RESULTS_EMPTY_MESSAGE);
|
|
317
264
|
return;
|
|
318
265
|
}
|
|
319
|
-
console.log(
|
|
320
|
-
console.log(" Score Repo Issue Recommendation Title");
|
|
321
|
-
console.log(" ───── ──────────────────────────────── ────── ────────────── ─────");
|
|
322
|
-
for (const r of results) {
|
|
323
|
-
const score = String(r.viabilityScore).padStart(3);
|
|
324
|
-
const repo = r.repo.padEnd(32).slice(0, 32);
|
|
325
|
-
const issue = `#${r.number}`.padEnd(6);
|
|
326
|
-
const rec = r.recommendation.padEnd(14);
|
|
327
|
-
const title = r.title.length > 50 ? r.title.slice(0, 47) + "..." : r.title;
|
|
328
|
-
console.log(` ${score} ${repo} ${issue} ${rec} ${title}`);
|
|
329
|
-
}
|
|
330
|
-
console.log();
|
|
266
|
+
console.log(renderResults(results));
|
|
331
267
|
}));
|
|
332
268
|
resultsCmd
|
|
333
269
|
.command("clear")
|
|
@@ -408,35 +344,10 @@ program
|
|
|
408
344
|
}
|
|
409
345
|
else {
|
|
410
346
|
if (result.results.length === 0) {
|
|
411
|
-
console.log(
|
|
347
|
+
console.log(VET_LIST_EMPTY_MESSAGE);
|
|
412
348
|
return;
|
|
413
349
|
}
|
|
414
|
-
console.log(
|
|
415
|
-
for (const r of result.results) {
|
|
416
|
-
const icon = r.status === "still_available"
|
|
417
|
-
? "✅"
|
|
418
|
-
: r.status === "claimed"
|
|
419
|
-
? "🔒"
|
|
420
|
-
: r.status === "has_pr"
|
|
421
|
-
? "🔀"
|
|
422
|
-
: r.status === "closed"
|
|
423
|
-
? "🚫"
|
|
424
|
-
: "❌";
|
|
425
|
-
const score = r.ok ? ` [${r.viabilityScore}/100]` : "";
|
|
426
|
-
console.log(` ${icon} ${r.repo}#${r.number} — ${r.status}${score}`);
|
|
427
|
-
console.log(` ${r.title}`);
|
|
428
|
-
}
|
|
429
|
-
if (result.transitions.length > 0) {
|
|
430
|
-
console.log(`\n🔔 Changes since last check (${result.transitions.length}):`);
|
|
431
|
-
for (const t of result.transitions) {
|
|
432
|
-
console.log(` ${t.repo}#${t.number}: ${t.from} → ${t.to}`);
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
console.log(`\nSummary: ${result.summary.stillAvailable} available, ${result.summary.claimed} claimed, ${result.summary.hasPR} has PR, ${result.summary.closed} closed, ${result.summary.errors} errors`);
|
|
436
|
-
if (result.prunedCount != null) {
|
|
437
|
-
console.log(`Pruned ${result.prunedCount} unavailable issues from saved results.`);
|
|
438
|
-
}
|
|
439
|
-
console.log();
|
|
350
|
+
console.log(renderVetList(result));
|
|
440
351
|
}
|
|
441
352
|
}));
|
|
442
353
|
// ── skip command ───────────────────────────────────────────────────
|
|
@@ -538,28 +449,7 @@ program
|
|
|
538
449
|
console.log(formatJsonSuccess(result));
|
|
539
450
|
}
|
|
540
451
|
else {
|
|
541
|
-
|
|
542
|
-
console.log(`\n${icon} ${result.issue.repo}#${result.issue.number}: ${result.recommendation.toUpperCase()}`);
|
|
543
|
-
console.log(` ${result.issue.title}`);
|
|
544
|
-
console.log(` ${result.issue.url}\n`);
|
|
545
|
-
if (result.reasonsToApprove.length > 0) {
|
|
546
|
-
console.log("Reasons to approve:");
|
|
547
|
-
for (const r of result.reasonsToApprove)
|
|
548
|
-
console.log(` + ${r}`);
|
|
549
|
-
}
|
|
550
|
-
if (result.reasonsToSkip.length > 0) {
|
|
551
|
-
console.log("Reasons to skip:");
|
|
552
|
-
for (const r of result.reasonsToSkip)
|
|
553
|
-
console.log(` - ${r}`);
|
|
554
|
-
}
|
|
555
|
-
if (result.projectHealth.checkFailed) {
|
|
556
|
-
console.log(`\nProject health: unknown (check failed: ${result.projectHealth.failureReason})`);
|
|
557
|
-
}
|
|
558
|
-
else {
|
|
559
|
-
console.log(`\nProject health: ${result.projectHealth.isActive ? "Active" : "Inactive"}`);
|
|
560
|
-
console.log(` Last commit: ${result.projectHealth.daysSinceLastCommit} days ago`);
|
|
561
|
-
console.log(` CI status: ${result.projectHealth.ciStatus}`);
|
|
562
|
-
}
|
|
452
|
+
console.log(renderVet(result));
|
|
563
453
|
}
|
|
564
454
|
}));
|
|
565
455
|
program.parse();
|
|
@@ -64,6 +64,10 @@ interface SearchCommandOptions {
|
|
|
64
64
|
preferLanguages?: string[];
|
|
65
65
|
/** Soft sort boost for candidates in these `owner/repo` slugs (#1244). */
|
|
66
66
|
preferRepos?: string[];
|
|
67
|
+
/** Soft sort penalty for candidates in these `owner/repo` slugs (#168). */
|
|
68
|
+
avoidRepos?: string[];
|
|
69
|
+
/** Soft sort boost for candidates whose labels match these types (#168). */
|
|
70
|
+
boostIssueTypes?: string[];
|
|
67
71
|
/** Diversity counterweight: fraction of slots reserved for unboosted candidates (#1244). */
|
|
68
72
|
diversityRatio?: number;
|
|
69
73
|
}
|
package/dist/commands/search.js
CHANGED
|
@@ -10,6 +10,8 @@ export async function runSearch(options) {
|
|
|
10
10
|
strategies: options.strategies,
|
|
11
11
|
preferLanguages: options.preferLanguages,
|
|
12
12
|
preferRepos: options.preferRepos,
|
|
13
|
+
avoidRepos: options.avoidRepos,
|
|
14
|
+
boostIssueTypes: options.boostIssueTypes,
|
|
13
15
|
diversityRatio: options.diversityRatio,
|
|
14
16
|
});
|
|
15
17
|
scout.saveResults(result.candidates);
|
|
@@ -7,10 +7,9 @@
|
|
|
7
7
|
* can rely on a structured `AntiLLMPolicyResult` rather than re-implementing
|
|
8
8
|
* the scan in agent prose.
|
|
9
9
|
*/
|
|
10
|
-
import {
|
|
11
|
-
import { warn } from "./logger.js";
|
|
10
|
+
import { getHttpStatusCode, isRateLimitError } from "./errors.js";
|
|
12
11
|
import { getHttpCache, versionedCacheKey } from "./http-cache.js";
|
|
13
|
-
|
|
12
|
+
import { probeRepoFile } from "./probe-repo-file.js";
|
|
14
13
|
/** TTL for cached anti-LLM policy scan results (1 hour). Policy docs change rarely. */
|
|
15
14
|
const POLICY_SCAN_CACHE_TTL_MS = 60 * 60 * 1000;
|
|
16
15
|
/**
|
|
@@ -85,39 +84,13 @@ const SOURCE_FILE_FAMILIES = [
|
|
|
85
84
|
paths: ["README.md", "readme.md", "Readme.md"],
|
|
86
85
|
},
|
|
87
86
|
];
|
|
88
|
-
/**
|
|
89
|
-
* Fetch one path's raw text content. The `transient` flag distinguishes a
|
|
90
|
-
* clean miss (404 — file absent) from a degraded miss (5xx, network) so the
|
|
91
|
-
* caller can decide whether to cache "no policy" or retry. Throws on
|
|
92
|
-
* 401/auth and rate-limit per documented project error strategy.
|
|
93
|
-
*/
|
|
94
|
-
async function fetchFileText(octokit, owner, repo, path) {
|
|
95
|
-
try {
|
|
96
|
-
const { data } = await octokit.repos.getContent({ owner, repo, path });
|
|
97
|
-
if ("content" in data && typeof data.content === "string") {
|
|
98
|
-
return {
|
|
99
|
-
text: Buffer.from(data.content, "base64").toString("utf-8"),
|
|
100
|
-
transient: false,
|
|
101
|
-
};
|
|
102
|
-
}
|
|
103
|
-
return { text: null, transient: false };
|
|
104
|
-
}
|
|
105
|
-
catch (error) {
|
|
106
|
-
const status = getHttpStatusCode(error);
|
|
107
|
-
if (status === 404)
|
|
108
|
-
return { text: null, transient: false };
|
|
109
|
-
rethrowIfFatal(error);
|
|
110
|
-
warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
|
|
111
|
-
return { text: null, transient: true };
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
87
|
/**
|
|
115
88
|
* Fetch the first available file from a family. Probes are issued in parallel,
|
|
116
89
|
* but auth/rate-limit rejections re-throw so the IssueVetter's existing
|
|
117
90
|
* rate-limit handling kicks in instead of silently caching a wrong answer.
|
|
118
91
|
*/
|
|
119
92
|
async function fetchFamilyText(octokit, owner, repo, paths) {
|
|
120
|
-
const results = await Promise.allSettled(paths.map((p) =>
|
|
93
|
+
const results = await Promise.allSettled(paths.map((p) => probeRepoFile(octokit, owner, repo, p)));
|
|
121
94
|
let hadTransientFailure = false;
|
|
122
95
|
for (const result of results) {
|
|
123
96
|
if (result.status === "fulfilled") {
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*
|
|
12
12
|
* All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
|
|
13
13
|
*/
|
|
14
|
+
import { type SearchBudgetTracker } from "./search-budget.js";
|
|
14
15
|
import { type IssueCandidate } from "./types.js";
|
|
15
16
|
import type { ScoutPreferences, SearchStrategy } from "./schemas.js";
|
|
16
17
|
import { type ScoutStateReader } from "./issue-vetting.js";
|
|
@@ -31,14 +32,20 @@ export declare class IssueDiscovery {
|
|
|
31
32
|
private octokit;
|
|
32
33
|
private githubToken;
|
|
33
34
|
private vetter;
|
|
35
|
+
private budgetTracker;
|
|
34
36
|
/** Set after searchIssues() runs if rate limits affected the search (low pre-flight quota or mid-search rate limit hits). */
|
|
35
37
|
rateLimitWarning: string | null;
|
|
36
38
|
/**
|
|
37
39
|
* @param githubToken - GitHub personal access token or token from `gh auth token`
|
|
38
40
|
* @param preferences - User's search preferences (languages, labels, scopes, etc.)
|
|
39
41
|
* @param stateReader - Read-only interface for accessing scout state (merged PRs, starred repos, etc.)
|
|
42
|
+
* @param budgetTracker - Search budget tracker. Defaults to the shared
|
|
43
|
+
* singleton so existing callers behave identically. A long-lived host
|
|
44
|
+
* serving concurrent searches can inject a per-search instance so one
|
|
45
|
+
* search's init() no longer resets the budget state of another (the
|
|
46
|
+
* shared-singleton concurrency hazard, #156).
|
|
40
47
|
*/
|
|
41
|
-
constructor(githubToken: string, preferences: ScoutPreferences, stateReader: ScoutStateReader);
|
|
48
|
+
constructor(githubToken: string, preferences: ScoutPreferences, stateReader: ScoutStateReader, budgetTracker?: SearchBudgetTracker);
|
|
42
49
|
/**
|
|
43
50
|
* Get starred repos from the state reader.
|
|
44
51
|
* @returns Array of starred repo names in "owner/repo" format
|
|
@@ -76,6 +83,8 @@ export declare class IssueDiscovery {
|
|
|
76
83
|
skippedUrls?: Set<string>;
|
|
77
84
|
preferLanguages?: string[];
|
|
78
85
|
preferRepos?: string[];
|
|
86
|
+
avoidRepos?: string[];
|
|
87
|
+
boostIssueTypes?: string[];
|
|
79
88
|
diversityRatio?: number;
|
|
80
89
|
interPhaseDelayMs?: number;
|
|
81
90
|
broadPhaseDelayMs?: number;
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
|
|
13
13
|
*/
|
|
14
14
|
import { getOctokit, checkRateLimit } from "./github.js";
|
|
15
|
-
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
15
|
+
import { getSearchBudgetTracker, } from "./search-budget.js";
|
|
16
16
|
import { daysBetween, extractRepoFromUrl, sleep } from "./utils.js";
|
|
17
17
|
import { SCOPE_LABELS, } from "./types.js";
|
|
18
18
|
import { CONCRETE_STRATEGIES } from "./schemas.js";
|
|
@@ -28,6 +28,20 @@ const MODULE = "issue-discovery";
|
|
|
28
28
|
const LOW_BUDGET_THRESHOLD = 20;
|
|
29
29
|
/** If remaining search quota is below this, only run Phase 0. */
|
|
30
30
|
const CRITICAL_BUDGET_THRESHOLD = 10;
|
|
31
|
+
/**
|
|
32
|
+
* Page size for Phase 0 (repos the user has contributed to). Larger than the
|
|
33
|
+
* default 5 so the backlog of open issues in known repos is reachable, not
|
|
34
|
+
* just the 5 newest-created. One `listForRepo` call regardless of page size,
|
|
35
|
+
* so this widens the candidate pool at no extra REST cost.
|
|
36
|
+
*/
|
|
37
|
+
const PHASE0_PER_PAGE = 30;
|
|
38
|
+
/**
|
|
39
|
+
* Max issue age (by last activity) for Phase 0 contributed repos. Relaxed well
|
|
40
|
+
* past the default `maxIssueAgeDays` (90) because in a repo the user already
|
|
41
|
+
* knows, an older-but-still-open issue is still worth evaluating — the vetter
|
|
42
|
+
* screens staleness, existing PRs, and claims downstream.
|
|
43
|
+
*/
|
|
44
|
+
const CONTRIBUTED_REPO_MAX_AGE_DAYS = 365;
|
|
31
45
|
/** Build a reusable filter function from config. */
|
|
32
46
|
function buildIssueFilter(config) {
|
|
33
47
|
return (items) => {
|
|
@@ -63,8 +77,8 @@ function buildIssueFilter(config) {
|
|
|
63
77
|
}
|
|
64
78
|
/** Phase 0: Search repos where user has merged PRs (highest merge probability). */
|
|
65
79
|
async function runPhase0(octokit, vetter, repos, maxResults, filterIssues) {
|
|
66
|
-
info(MODULE, `Phase 0: Searching issues in ${repos.length} merged-PR repos (no label filter)...`);
|
|
67
|
-
const { candidates, allReposFailed, rateLimitHit } = await fetchIssuesFromKnownRepos(octokit, vetter, repos, [], maxResults, "merged_pr", filterIssues);
|
|
80
|
+
info(MODULE, `Phase 0: Searching issues in ${repos.length} merged-PR repos (no label filter, ${PHASE0_PER_PAGE}/repo)...`);
|
|
81
|
+
const { candidates, allReposFailed, rateLimitHit } = await fetchIssuesFromKnownRepos(octokit, vetter, repos, [], maxResults, "merged_pr", filterIssues, PHASE0_PER_PAGE);
|
|
68
82
|
info(MODULE, `Found ${candidates.length} candidates from merged-PR repos`);
|
|
69
83
|
return {
|
|
70
84
|
candidates,
|
|
@@ -87,7 +101,7 @@ async function runPhase1(octokit, vetter, repos, labels, maxResults, filterIssue
|
|
|
87
101
|
};
|
|
88
102
|
}
|
|
89
103
|
/** Phase 2: General label-filtered search with multi-tier interleaving. */
|
|
90
|
-
async function runPhase2(octokit, vetter, scopes, labels, configLabels, languages, isAnyLanguage, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
|
|
104
|
+
async function runPhase2(octokit, vetter, scopes, labels, configLabels, languages, isAnyLanguage, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues, tracker) {
|
|
91
105
|
info(MODULE, "Phase 2: General issue search...");
|
|
92
106
|
const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
|
|
93
107
|
// Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
|
|
@@ -116,7 +130,7 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, language
|
|
|
116
130
|
let rateLimitHit = false;
|
|
117
131
|
for (const { tier, tierLabels } of tierLabelGroups) {
|
|
118
132
|
try {
|
|
119
|
-
const allItems = await searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, tierLabels, (langQ) => `is:issue is:open ${langQ} no:assignee`.replace(/ +/g, " ").trim(), budgetPerTier * 3);
|
|
133
|
+
const allItems = await searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, tierLabels, (langQ) => `is:issue is:open ${langQ} no:assignee`.replace(/ +/g, " ").trim(), budgetPerTier * 3, tracker);
|
|
120
134
|
info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
|
|
121
135
|
const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
|
|
122
136
|
tierResults.push(tierCandidates);
|
|
@@ -153,7 +167,7 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, language
|
|
|
153
167
|
};
|
|
154
168
|
}
|
|
155
169
|
/** Phase 3: Actively maintained repos (REST-first, Search API fallback). */
|
|
156
|
-
async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, starredRepos, existingCandidates, filterIssues) {
|
|
170
|
+
async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, starredRepos, existingCandidates, filterIssues, tracker) {
|
|
157
171
|
info(MODULE, "Phase 3: Searching actively maintained repos...");
|
|
158
172
|
const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
|
|
159
173
|
// Step 1: Try REST API with starred repos first (no Search API quota used)
|
|
@@ -196,7 +210,7 @@ async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories
|
|
|
196
210
|
sort: "updated",
|
|
197
211
|
order: "desc",
|
|
198
212
|
per_page: maxResults * 3,
|
|
199
|
-
});
|
|
213
|
+
}, tracker);
|
|
200
214
|
info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
|
|
201
215
|
const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], maxResults, minStars, "Phase 3");
|
|
202
216
|
info(MODULE, `Found ${candidates.length} candidates from maintained-repo search`);
|
|
@@ -236,19 +250,28 @@ export class IssueDiscovery {
|
|
|
236
250
|
octokit;
|
|
237
251
|
githubToken;
|
|
238
252
|
vetter;
|
|
253
|
+
budgetTracker;
|
|
239
254
|
/** Set after searchIssues() runs if rate limits affected the search (low pre-flight quota or mid-search rate limit hits). */
|
|
240
255
|
rateLimitWarning = null;
|
|
241
256
|
/**
|
|
242
257
|
* @param githubToken - GitHub personal access token or token from `gh auth token`
|
|
243
258
|
* @param preferences - User's search preferences (languages, labels, scopes, etc.)
|
|
244
259
|
* @param stateReader - Read-only interface for accessing scout state (merged PRs, starred repos, etc.)
|
|
260
|
+
* @param budgetTracker - Search budget tracker. Defaults to the shared
|
|
261
|
+
* singleton so existing callers behave identically. A long-lived host
|
|
262
|
+
* serving concurrent searches can inject a per-search instance so one
|
|
263
|
+
* search's init() no longer resets the budget state of another (the
|
|
264
|
+
* shared-singleton concurrency hazard, #156).
|
|
245
265
|
*/
|
|
246
|
-
constructor(githubToken, preferences, stateReader) {
|
|
266
|
+
constructor(githubToken, preferences, stateReader, budgetTracker = getSearchBudgetTracker()) {
|
|
247
267
|
this.preferences = preferences;
|
|
248
268
|
this.stateReader = stateReader;
|
|
249
269
|
this.githubToken = githubToken;
|
|
250
270
|
this.octokit = getOctokit(githubToken);
|
|
251
|
-
this.
|
|
271
|
+
this.budgetTracker = budgetTracker;
|
|
272
|
+
// Thread the same tracker into the vetter so the merged-PR Search API
|
|
273
|
+
// call (checkUserMergedPRsInRepo) pays the same budget as the search phases.
|
|
274
|
+
this.vetter = new IssueVetter(this.octokit, this.stateReader, this.budgetTracker);
|
|
252
275
|
}
|
|
253
276
|
/**
|
|
254
277
|
* Get starred repos from the state reader.
|
|
@@ -300,9 +323,27 @@ export class IssueDiscovery {
|
|
|
300
323
|
const allCandidates = [];
|
|
301
324
|
const phaseErrors = {};
|
|
302
325
|
let rateLimitHitDuringSearch = false;
|
|
326
|
+
// The standard inter-phase pause for rate-limit management. Phases 1, 2,
|
|
327
|
+
// and 3 all apply this identical delay before querying (Phase 0 is first,
|
|
328
|
+
// so it never waits). The broad phase wraps this with an extra cooldown.
|
|
329
|
+
const applyInterPhaseDelay = async () => {
|
|
330
|
+
if (interPhaseDelay > 0) {
|
|
331
|
+
info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
|
|
332
|
+
await sleep(interPhaseDelay);
|
|
333
|
+
}
|
|
334
|
+
};
|
|
335
|
+
// Fold a phase's result into the running totals. Every phase accumulates
|
|
336
|
+
// candidates, records its error under a stable key, and flips the
|
|
337
|
+
// rate-limit flag the same way; only the key and the result differ.
|
|
338
|
+
const recordPhaseResult = (key, result) => {
|
|
339
|
+
allCandidates.push(...result.candidates);
|
|
340
|
+
phaseErrors[key] = result.error;
|
|
341
|
+
if (result.rateLimitHit)
|
|
342
|
+
rateLimitHitDuringSearch = true;
|
|
343
|
+
};
|
|
303
344
|
// Pre-flight rate limit check
|
|
304
345
|
this.rateLimitWarning = null;
|
|
305
|
-
const tracker =
|
|
346
|
+
const tracker = this.budgetTracker;
|
|
306
347
|
let searchBudget = LOW_BUDGET_THRESHOLD - 1;
|
|
307
348
|
try {
|
|
308
349
|
const rateLimit = await checkRateLimit(this.githubToken);
|
|
@@ -347,15 +388,24 @@ export class IssueDiscovery {
|
|
|
347
388
|
if (aiBlocklisted.size > 0) {
|
|
348
389
|
debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(", ")}`);
|
|
349
390
|
}
|
|
350
|
-
const
|
|
391
|
+
const baseFilterConfig = {
|
|
351
392
|
excludedRepos: new Set(config.excludeRepos.map((r) => r.toLowerCase())),
|
|
352
393
|
excludeOrgs: new Set((config.excludeOrgs ?? []).map((o) => o.toLowerCase())),
|
|
353
394
|
aiBlocklisted,
|
|
354
395
|
lowScoringRepos,
|
|
355
396
|
skippedUrls: options.skippedUrls ?? new Set(),
|
|
356
|
-
maxAgeDays: config.maxIssueAgeDays || 90,
|
|
357
397
|
now: new Date(),
|
|
358
398
|
includeDocIssues: config.includeDocIssues ?? true,
|
|
399
|
+
};
|
|
400
|
+
const filterIssues = buildIssueFilter({
|
|
401
|
+
...baseFilterConfig,
|
|
402
|
+
maxAgeDays: config.maxIssueAgeDays || 90,
|
|
403
|
+
});
|
|
404
|
+
// Phase 0 (contributed repos) gets a relaxed age window so the existing
|
|
405
|
+
// backlog surfaces, not just issues active in the last 90 days.
|
|
406
|
+
const filterIssuesPhase0 = buildIssueFilter({
|
|
407
|
+
...baseFilterConfig,
|
|
408
|
+
maxAgeDays: CONTRIBUTED_REPO_MAX_AGE_DAYS,
|
|
359
409
|
});
|
|
360
410
|
// Phase 0: Repos the user has engaged with — merged PRs first (strongest
|
|
361
411
|
// signal), then open PRs (active engagement even without a merge yet).
|
|
@@ -374,11 +424,8 @@ export class IssueDiscovery {
|
|
|
374
424
|
if (phase0Repos.length > 0 && enabledStrategies.has("merged")) {
|
|
375
425
|
const remaining = maxResults - allCandidates.length;
|
|
376
426
|
if (remaining > 0) {
|
|
377
|
-
const result = await runPhase0(this.octokit, this.vetter, phase0Repos, remaining,
|
|
378
|
-
|
|
379
|
-
phaseErrors["0"] = result.error;
|
|
380
|
-
if (result.rateLimitHit)
|
|
381
|
-
rateLimitHitDuringSearch = true;
|
|
427
|
+
const result = await runPhase0(this.octokit, this.vetter, phase0Repos, remaining, filterIssuesPhase0);
|
|
428
|
+
recordPhaseResult("0", result);
|
|
382
429
|
}
|
|
383
430
|
strategiesUsed.push("merged");
|
|
384
431
|
}
|
|
@@ -387,19 +434,13 @@ export class IssueDiscovery {
|
|
|
387
434
|
starredRepos.length > 0 &&
|
|
388
435
|
searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
|
|
389
436
|
enabledStrategies.has("starred")) {
|
|
390
|
-
|
|
391
|
-
info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
|
|
392
|
-
await sleep(interPhaseDelay);
|
|
393
|
-
}
|
|
437
|
+
await applyInterPhaseDelay();
|
|
394
438
|
const reposToSearch = starredRepos.filter((r) => !phase0RepoSet.has(r));
|
|
395
439
|
if (reposToSearch.length > 0) {
|
|
396
440
|
const remaining = maxResults - allCandidates.length;
|
|
397
441
|
if (remaining > 0) {
|
|
398
442
|
const result = await runPhase1(this.octokit, this.vetter, reposToSearch, labels, remaining, filterIssues);
|
|
399
|
-
|
|
400
|
-
phaseErrors["1"] = result.error;
|
|
401
|
-
if (result.rateLimitHit)
|
|
402
|
-
rateLimitHitDuringSearch = true;
|
|
443
|
+
recordPhaseResult("1", result);
|
|
403
444
|
// Recorded only when the phase actually queried (#130)
|
|
404
445
|
strategiesUsed.push("starred");
|
|
405
446
|
}
|
|
@@ -424,10 +465,7 @@ export class IssueDiscovery {
|
|
|
424
465
|
}
|
|
425
466
|
else {
|
|
426
467
|
// Always apply baseline inter-phase delay
|
|
427
|
-
|
|
428
|
-
info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
|
|
429
|
-
await sleep(interPhaseDelay);
|
|
430
|
-
}
|
|
468
|
+
await applyInterPhaseDelay();
|
|
431
469
|
// Apply additional broad-phase cooldown, but skip if previous phases found nothing
|
|
432
470
|
if (allCandidates.length > 0 && broadDelay > 0) {
|
|
433
471
|
info(MODULE, `Waiting ${(broadDelay / 1000).toFixed(0)}s for rate limit cooldown before broad search...`);
|
|
@@ -437,11 +475,8 @@ export class IssueDiscovery {
|
|
|
437
475
|
info(MODULE, `Skipping broad phase delay: no results from previous phases, proceeding immediately`);
|
|
438
476
|
}
|
|
439
477
|
const remaining = maxResults - allCandidates.length;
|
|
440
|
-
const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, languages, isAnyLanguage, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
|
|
441
|
-
|
|
442
|
-
phaseErrors["2"] = result.error;
|
|
443
|
-
if (result.rateLimitHit)
|
|
444
|
-
rateLimitHitDuringSearch = true;
|
|
478
|
+
const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, languages, isAnyLanguage, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues, tracker);
|
|
479
|
+
recordPhaseResult("2", result);
|
|
445
480
|
// Recorded only when the phase actually queried, not when the
|
|
446
481
|
// skip-threshold branch short-circuited it (#130)
|
|
447
482
|
strategiesUsed.push("broad");
|
|
@@ -451,16 +486,10 @@ export class IssueDiscovery {
|
|
|
451
486
|
if (allCandidates.length < maxResults &&
|
|
452
487
|
searchBudget >= LOW_BUDGET_THRESHOLD &&
|
|
453
488
|
enabledStrategies.has("maintained")) {
|
|
454
|
-
|
|
455
|
-
info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
|
|
456
|
-
await sleep(interPhaseDelay);
|
|
457
|
-
}
|
|
489
|
+
await applyInterPhaseDelay();
|
|
458
490
|
const remaining = maxResults - allCandidates.length;
|
|
459
|
-
const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, starredRepos, allCandidates, filterIssues);
|
|
460
|
-
|
|
461
|
-
phaseErrors["3"] = result.error;
|
|
462
|
-
if (result.rateLimitHit)
|
|
463
|
-
rateLimitHitDuringSearch = true;
|
|
491
|
+
const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, starredRepos, allCandidates, filterIssues, tracker);
|
|
492
|
+
recordPhaseResult("3", result);
|
|
464
493
|
strategiesUsed.push("maintained");
|
|
465
494
|
}
|
|
466
495
|
// Build result / error summary
|
|
@@ -501,11 +530,17 @@ export class IssueDiscovery {
|
|
|
501
530
|
`Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
|
|
502
531
|
`Try again after the rate limit resets for complete results.`;
|
|
503
532
|
}
|
|
504
|
-
// Personalization annotation (#1244): tag
|
|
505
|
-
// `personalization` marker
|
|
506
|
-
//
|
|
507
|
-
// a no-
|
|
508
|
-
|
|
533
|
+
// Personalization annotation (#1244, extended #168): tag candidates with a
|
|
534
|
+
// net `personalization` marker (preferRepos/preferLanguages/boostIssueTypes
|
|
535
|
+
// add, avoidRepos subtracts) before sorting so the sort tier has values to
|
|
536
|
+
// read. Returns a new array (no in-place candidate mutation, #158); a no-op
|
|
537
|
+
// when none of the bias lists are supplied.
|
|
538
|
+
const ranked = annotateBoost(allCandidates, {
|
|
539
|
+
preferLanguages: options.preferLanguages,
|
|
540
|
+
preferRepos: options.preferRepos,
|
|
541
|
+
avoidRepos: options.avoidRepos,
|
|
542
|
+
boostIssueTypes: options.boostIssueTypes,
|
|
543
|
+
});
|
|
509
544
|
// Sort by priority, recommendation, boost (#1244), then viability score
|
|
510
545
|
ranked.sort((a, b) => {
|
|
511
546
|
const priorityOrder = {
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
* Extracted from issue-vetting.ts to isolate eligibility logic.
|
|
7
7
|
*/
|
|
8
8
|
import { Octokit } from "@octokit/rest";
|
|
9
|
+
import { type SearchBudgetTracker } from "./search-budget.js";
|
|
9
10
|
import type { CheckResult, LinkedPR } from "./types.js";
|
|
10
11
|
/**
|
|
11
12
|
* Result of the existing-PR check, including metadata for the first linked PR
|
|
@@ -29,7 +30,7 @@ export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo:
|
|
|
29
30
|
* Results are cached per-repo for 15 minutes to avoid redundant Search API
|
|
30
31
|
* calls when multiple issues from the same repo are vetted.
|
|
31
32
|
*/
|
|
32
|
-
export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string): Promise<number | null>;
|
|
33
|
+
export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string, tracker?: SearchBudgetTracker): Promise<number | null>;
|
|
33
34
|
/**
|
|
34
35
|
* Check whether an issue has been claimed by another contributor
|
|
35
36
|
* by scanning recent comments for claim phrases.
|
|
@@ -9,7 +9,7 @@ import { paginateAll } from "./pagination.js";
|
|
|
9
9
|
import { errorMessage, rethrowIfFatal } from "./errors.js";
|
|
10
10
|
import { warn } from "./logger.js";
|
|
11
11
|
import { getHttpCache, withInflightDedup, versionedCacheKey, } from "./http-cache.js";
|
|
12
|
-
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
12
|
+
import { getSearchBudgetTracker, } from "./search-budget.js";
|
|
13
13
|
function isLinkedPREvent(e) {
|
|
14
14
|
return e.event === "cross-referenced" && !!e.source?.issue?.pull_request;
|
|
15
15
|
}
|
|
@@ -161,7 +161,11 @@ const MERGED_PR_CACHE_TTL_MS = 15 * 60 * 1000;
|
|
|
161
161
|
* Results are cached per-repo for 15 minutes to avoid redundant Search API
|
|
162
162
|
* calls when multiple issues from the same repo are vetted.
|
|
163
163
|
*/
|
|
164
|
-
export async function checkUserMergedPRsInRepo(octokit, owner, repo
|
|
164
|
+
export async function checkUserMergedPRsInRepo(octokit, owner, repo,
|
|
165
|
+
// Optional injected budget tracker. Defaults to the shared singleton so
|
|
166
|
+
// existing callers keep the same global budget accounting; a host wanting
|
|
167
|
+
// per-search isolation threads its own tracker down from IssueVetter.
|
|
168
|
+
tracker = getSearchBudgetTracker()) {
|
|
165
169
|
const cache = getHttpCache();
|
|
166
170
|
const cacheKey = versionedCacheKey(`merged-prs:${owner}/${repo}`);
|
|
167
171
|
// In-flight dedup: parallel vetting frequently hits several issues from
|
|
@@ -177,7 +181,6 @@ export async function checkUserMergedPRsInRepo(octokit, owner, repo) {
|
|
|
177
181
|
return cached;
|
|
178
182
|
}
|
|
179
183
|
try {
|
|
180
|
-
const tracker = getSearchBudgetTracker();
|
|
181
184
|
await tracker.waitForBudget();
|
|
182
185
|
try {
|
|
183
186
|
// Use @me to search as the authenticated user
|