@oss-scout/core 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +70 -64
- package/dist/cli.js +19 -129
- package/dist/commands/search.d.ts +4 -0
- package/dist/commands/search.js +2 -0
- package/dist/core/anti-llm-policy.js +3 -30
- package/dist/core/issue-discovery.d.ts +10 -1
- package/dist/core/issue-discovery.js +83 -48
- package/dist/core/issue-eligibility.d.ts +2 -1
- package/dist/core/issue-eligibility.js +6 -3
- package/dist/core/issue-vetting.d.ts +10 -1
- package/dist/core/issue-vetting.js +12 -2
- package/dist/core/personalization.d.ts +27 -12
- package/dist/core/personalization.js +50 -18
- package/dist/core/preference-fields.js +2 -0
- package/dist/core/probe-repo-file.d.ts +47 -0
- package/dist/core/probe-repo-file.js +57 -0
- package/dist/core/repo-health.js +9 -17
- package/dist/core/roadmap.js +11 -21
- package/dist/core/schemas.d.ts +4 -0
- package/dist/core/schemas.js +9 -0
- package/dist/core/search-phases.d.ts +5 -4
- package/dist/core/search-phases.js +12 -9
- package/dist/core/types.d.ts +15 -0
- package/dist/formatters/human.d.ts +60 -0
- package/dist/formatters/human.js +199 -0
- package/dist/scout.d.ts +24 -10
- package/dist/scout.js +29 -14
- package/package.json +1 -1
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
import { Octokit } from "@octokit/rest";
|
|
10
10
|
import { type SearchPriority, type IssueCandidate, type ProjectCategory, type ScoutPreferences, type ScoutState, type MergedPRRecord, type ClosedPRRecord, type OpenPRRecord } from "./types.js";
|
|
11
11
|
import { type PrefetchedIssueCore } from "./issue-graphql.js";
|
|
12
|
+
import { type SearchBudgetTracker } from "./search-budget.js";
|
|
12
13
|
/**
|
|
13
14
|
* Feature-mode signals supplied by the caller (orchestrator) — the vetter
|
|
14
15
|
* does NOT extract these from the GitHub issue itself. When passed, they
|
|
@@ -142,7 +143,15 @@ export declare function deriveRecommendation(input: RecommendationInput): Recomm
|
|
|
142
143
|
export declare class IssueVetter {
|
|
143
144
|
private octokit;
|
|
144
145
|
private stateReader;
|
|
145
|
-
|
|
146
|
+
private budgetTracker;
|
|
147
|
+
/**
|
|
148
|
+
* @param octokit - Authenticated Octokit instance
|
|
149
|
+
* @param stateReader - Read-only scout state interface
|
|
150
|
+
* @param budgetTracker - Search budget tracker. Defaults to the shared
|
|
151
|
+
* singleton so existing callers behave identically; inject a per-search
|
|
152
|
+
* instance to isolate budget accounting in a long-lived concurrent host.
|
|
153
|
+
*/
|
|
154
|
+
constructor(octokit: Octokit, stateReader: ScoutStateReader, budgetTracker?: SearchBudgetTracker);
|
|
146
155
|
/**
|
|
147
156
|
* Vet a specific issue — runs all checks and computes recommendation + viability score.
|
|
148
157
|
* Results are cached for 15 minutes to avoid redundant API calls on repeated searches.
|
|
@@ -16,6 +16,7 @@ import { checkProjectHealth, fetchContributionGuidelines, } from "./repo-health.
|
|
|
16
16
|
import { fetchAndScanAntiLLMPolicy } from "./anti-llm-policy.js";
|
|
17
17
|
import { prefetchIssueCores, issueCoreKey, } from "./issue-graphql.js";
|
|
18
18
|
import { getHttpCache, versionedCacheKey } from "./http-cache.js";
|
|
19
|
+
import { getSearchBudgetTracker, } from "./search-budget.js";
|
|
19
20
|
import { triageWithSLM, buildTriageInput, } from "./slm-triage.js";
|
|
20
21
|
const MODULE = "issue-vetting";
|
|
21
22
|
/** Vetting concurrency: kept low to reduce burst pressure on GitHub's secondary rate limit. */
|
|
@@ -119,9 +120,18 @@ export function deriveRecommendation(input) {
|
|
|
119
120
|
export class IssueVetter {
|
|
120
121
|
octokit;
|
|
121
122
|
stateReader;
|
|
122
|
-
|
|
123
|
+
budgetTracker;
|
|
124
|
+
/**
|
|
125
|
+
* @param octokit - Authenticated Octokit instance
|
|
126
|
+
* @param stateReader - Read-only scout state interface
|
|
127
|
+
* @param budgetTracker - Search budget tracker. Defaults to the shared
|
|
128
|
+
* singleton so existing callers behave identically; inject a per-search
|
|
129
|
+
* instance to isolate budget accounting in a long-lived concurrent host.
|
|
130
|
+
*/
|
|
131
|
+
constructor(octokit, stateReader, budgetTracker = getSearchBudgetTracker()) {
|
|
123
132
|
this.octokit = octokit;
|
|
124
133
|
this.stateReader = stateReader;
|
|
134
|
+
this.budgetTracker = budgetTracker;
|
|
125
135
|
}
|
|
126
136
|
/**
|
|
127
137
|
* Vet a specific issue — runs all checks and computes recommendation + viability score.
|
|
@@ -170,7 +180,7 @@ export class IssueVetter {
|
|
|
170
180
|
fetchContributionGuidelines(this.octokit, owner, repo),
|
|
171
181
|
hasMergedPRsInRepo
|
|
172
182
|
? Promise.resolve(0)
|
|
173
|
-
: checkUserMergedPRsInRepo(this.octokit, owner, repo),
|
|
183
|
+
: checkUserMergedPRsInRepo(this.octokit, owner, repo, this.budgetTracker),
|
|
174
184
|
]);
|
|
175
185
|
// Anti-LLM scan reuses the CONTRIBUTING text just fetched above —
|
|
176
186
|
// dedup'd to avoid 4 redundant getContent calls on cold-cache repos.
|
|
@@ -27,24 +27,39 @@ import type { IssueCandidate } from "./types.js";
|
|
|
27
27
|
*/
|
|
28
28
|
export declare const REPO_BOOST = 20;
|
|
29
29
|
export declare const LANGUAGE_BOOST = 10;
|
|
30
|
+
/** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
|
|
31
|
+
export declare const ISSUE_TYPE_BOOST = 10;
|
|
30
32
|
/**
|
|
31
|
-
*
|
|
32
|
-
*
|
|
33
|
-
*
|
|
34
|
-
|
|
33
|
+
* Soft penalty for an avoidRepos match (#168). Milder than the hard
|
|
34
|
+
* excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
|
|
35
|
+
* preferRepos affinity, +20) can still outweigh it.
|
|
36
|
+
*/
|
|
37
|
+
export declare const AVOID_PENALTY = 15;
|
|
38
|
+
/** Per-call personalization bias lists (#168). All optional; empty = no effect. */
|
|
39
|
+
export interface PersonalizationBias {
|
|
40
|
+
preferLanguages?: string[];
|
|
41
|
+
preferRepos?: string[];
|
|
42
|
+
avoidRepos?: string[];
|
|
43
|
+
boostIssueTypes?: string[];
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* The personalization sort weight of a candidate: its net score, or 0 when it
|
|
47
|
+
* carries no personalization marker. Reads the structural `personalization`
|
|
48
|
+
* field (#158). The score can be negative when avoidRepos applied (#168).
|
|
35
49
|
*/
|
|
36
50
|
export declare function boostScoreOf(candidate: IssueCandidate): number;
|
|
37
51
|
/**
|
|
38
|
-
* Return a new candidate list where each candidate
|
|
39
|
-
*
|
|
40
|
-
*
|
|
41
|
-
*
|
|
42
|
-
*
|
|
52
|
+
* Return a new candidate list where each candidate matching a caller-supplied
|
|
53
|
+
* bias carries a `personalization` marker with a NET score (#168): preferRepos,
|
|
54
|
+
* preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
|
|
55
|
+
* be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
|
|
56
|
+
* Does NOT mutate the input (#158): matched candidates are shallow copies,
|
|
57
|
+
* unmatched ones pass through unchanged.
|
|
43
58
|
*
|
|
44
|
-
* No-op when
|
|
45
|
-
*
|
|
59
|
+
* No-op when every bias list is empty/undefined: the input array is returned
|
|
60
|
+
* as-is and the sort tier collapses to 0 for every candidate.
|
|
46
61
|
*/
|
|
47
|
-
export declare function annotateBoost(candidates: IssueCandidate[],
|
|
62
|
+
export declare function annotateBoost(candidates: IssueCandidate[], bias?: PersonalizationBias): IssueCandidate[];
|
|
48
63
|
/**
|
|
49
64
|
* Apply a diversity-counterweight pass over a pre-sorted candidate list
|
|
50
65
|
* (#1244). Returns the first `maxResults` picks in priority order:
|
|
@@ -26,36 +26,54 @@
|
|
|
26
26
|
*/
|
|
27
27
|
export const REPO_BOOST = 20;
|
|
28
28
|
export const LANGUAGE_BOOST = 10;
|
|
29
|
+
/** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
|
|
30
|
+
export const ISSUE_TYPE_BOOST = 10;
|
|
29
31
|
/**
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
33
|
-
|
|
32
|
+
* Soft penalty for an avoidRepos match (#168). Milder than the hard
|
|
33
|
+
* excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
|
|
34
|
+
* preferRepos affinity, +20) can still outweigh it.
|
|
35
|
+
*/
|
|
36
|
+
export const AVOID_PENALTY = 15;
|
|
37
|
+
/**
|
|
38
|
+
* The personalization sort weight of a candidate: its net score, or 0 when it
|
|
39
|
+
* carries no personalization marker. Reads the structural `personalization`
|
|
40
|
+
* field (#158). The score can be negative when avoidRepos applied (#168).
|
|
34
41
|
*/
|
|
35
42
|
export function boostScoreOf(candidate) {
|
|
36
43
|
return candidate.personalization?.kind === "boosted"
|
|
37
44
|
? candidate.personalization.score
|
|
38
45
|
: 0;
|
|
39
46
|
}
|
|
47
|
+
function normalizeSet(values) {
|
|
48
|
+
return new Set((values ?? []).map((v) => v.trim().toLowerCase()).filter(Boolean));
|
|
49
|
+
}
|
|
40
50
|
/**
|
|
41
|
-
* Return a new candidate list where each candidate
|
|
42
|
-
*
|
|
43
|
-
*
|
|
44
|
-
*
|
|
45
|
-
*
|
|
51
|
+
* Return a new candidate list where each candidate matching a caller-supplied
|
|
52
|
+
* bias carries a `personalization` marker with a NET score (#168): preferRepos,
|
|
53
|
+
* preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
|
|
54
|
+
* be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
|
|
55
|
+
* Does NOT mutate the input (#158): matched candidates are shallow copies,
|
|
56
|
+
* unmatched ones pass through unchanged.
|
|
46
57
|
*
|
|
47
|
-
* No-op when
|
|
48
|
-
*
|
|
58
|
+
* No-op when every bias list is empty/undefined: the input array is returned
|
|
59
|
+
* as-is and the sort tier collapses to 0 for every candidate.
|
|
49
60
|
*/
|
|
50
|
-
export function annotateBoost(candidates,
|
|
51
|
-
const langSet =
|
|
52
|
-
const repoSet =
|
|
53
|
-
|
|
61
|
+
export function annotateBoost(candidates, bias = {}) {
|
|
62
|
+
const langSet = normalizeSet(bias.preferLanguages);
|
|
63
|
+
const repoSet = normalizeSet(bias.preferRepos);
|
|
64
|
+
const avoidSet = normalizeSet(bias.avoidRepos);
|
|
65
|
+
const typeSet = normalizeSet(bias.boostIssueTypes);
|
|
66
|
+
if (langSet.size === 0 &&
|
|
67
|
+
repoSet.size === 0 &&
|
|
68
|
+
avoidSet.size === 0 &&
|
|
69
|
+
typeSet.size === 0) {
|
|
54
70
|
return candidates;
|
|
71
|
+
}
|
|
55
72
|
return candidates.map((c) => {
|
|
56
73
|
let score = 0;
|
|
57
74
|
const reasons = [];
|
|
58
|
-
|
|
75
|
+
const repoLower = c.issue.repo.toLowerCase();
|
|
76
|
+
if (repoSet.size > 0 && repoSet.has(repoLower)) {
|
|
59
77
|
score += REPO_BOOST;
|
|
60
78
|
reasons.push(`repo affinity: ${c.issue.repo}`);
|
|
61
79
|
}
|
|
@@ -64,7 +82,18 @@ export function annotateBoost(candidates, preferLanguages, preferRepos) {
|
|
|
64
82
|
score += LANGUAGE_BOOST;
|
|
65
83
|
reasons.push(`language match: ${lang}`);
|
|
66
84
|
}
|
|
67
|
-
if (
|
|
85
|
+
if (typeSet.size > 0) {
|
|
86
|
+
const matched = c.issue.labels.find((l) => typeSet.has(l.toLowerCase()));
|
|
87
|
+
if (matched) {
|
|
88
|
+
score += ISSUE_TYPE_BOOST;
|
|
89
|
+
reasons.push(`issue type: ${matched}`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (avoidSet.size > 0 && avoidSet.has(repoLower)) {
|
|
93
|
+
score -= AVOID_PENALTY;
|
|
94
|
+
reasons.push(`avoided repo: ${c.issue.repo}`);
|
|
95
|
+
}
|
|
96
|
+
if (reasons.length === 0)
|
|
68
97
|
return c;
|
|
69
98
|
return { ...c, personalization: { kind: "boosted", score, reasons } };
|
|
70
99
|
});
|
|
@@ -116,7 +145,10 @@ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
|
|
|
116
145
|
break;
|
|
117
146
|
if (seen.has(c.issue.url))
|
|
118
147
|
continue;
|
|
119
|
-
|
|
148
|
+
// Diversity slots are for candidates that matched NO personalization bias.
|
|
149
|
+
// Exclude both boosted (>0) and avoided (<0) candidates — resurfacing an
|
|
150
|
+
// avoided repo via a diversity slot would defeat the avoid (#168).
|
|
151
|
+
if (boostScoreOf(c) !== 0)
|
|
120
152
|
continue;
|
|
121
153
|
// Tag a shallow copy rather than mutating the shared candidate (#158).
|
|
122
154
|
picks.push({ ...c, personalization: { kind: "diversity" } });
|
|
@@ -36,6 +36,8 @@ export const FIELD_CONFIGS = {
|
|
|
36
36
|
preferLanguages: { type: "array" },
|
|
37
37
|
preferRepos: { type: "array" },
|
|
38
38
|
diversityRatio: { type: "float" },
|
|
39
|
+
avoidRepos: { type: "array" },
|
|
40
|
+
boostIssueTypes: { type: "array" },
|
|
39
41
|
slmTriageModel: { type: "string" },
|
|
40
42
|
slmTriageHost: { type: "string" },
|
|
41
43
|
featuresAnchorThreshold: { type: "number" },
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-path repo-file probe (#156).
|
|
3
|
+
*
|
|
4
|
+
* Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
|
|
5
|
+
* repo doc by trying a list of candidate paths and stopping at the first hit.
|
|
6
|
+
* The per-path fetch was copy-pasted three times, each re-deriving the same
|
|
7
|
+
* 404-continue / fatal-propagate / base64-decode logic. This is the one
|
|
8
|
+
* genuinely-shared primitive.
|
|
9
|
+
*
|
|
10
|
+
* The orchestration around it stays per-caller (parallel 4-path probe,
|
|
11
|
+
* sequential 5-path probe, sequential family probe) and so do the return shapes
|
|
12
|
+
* (parsed guidelines, issue-ref set, policy scan). Only the single GET is
|
|
13
|
+
* shared.
|
|
14
|
+
*
|
|
15
|
+
* The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
|
|
16
|
+
* file absent) from a degraded miss (5xx, network) so callers can decide
|
|
17
|
+
* whether to cache a negative result or leave it open to retry. Collapsing the
|
|
18
|
+
* two would bypass anti-llm-policy's transient-failure cache safeguard, so the
|
|
19
|
+
* primitive must keep them separate.
|
|
20
|
+
*/
|
|
21
|
+
import type { Octokit } from "@octokit/rest";
|
|
22
|
+
/**
|
|
23
|
+
* Result of probing one repo file path.
|
|
24
|
+
*
|
|
25
|
+
* - `text` — decoded UTF-8 content on a 200 with a file payload, else `null`
|
|
26
|
+
* (404, a non-content payload such as a directory listing, or a soft error).
|
|
27
|
+
* - `transient` — `true` only when the miss was a degraded failure (5xx,
|
|
28
|
+
* network) rather than a clean 404 / missing file. A `true` value means the
|
|
29
|
+
* `null` may be incomplete and the caller should avoid caching it as a known
|
|
30
|
+
* absence.
|
|
31
|
+
*/
|
|
32
|
+
export interface ProbeRepoFileResult {
|
|
33
|
+
text: string | null;
|
|
34
|
+
transient: boolean;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* GET one repo file path. Returns decoded content on a 200 file payload, a
|
|
38
|
+
* clean `null` on 404 or a non-content payload, and a transient `null` on a
|
|
39
|
+
* soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
|
|
40
|
+
* rate limit) so the caller's existing rate-limit handling sees them.
|
|
41
|
+
*
|
|
42
|
+
* Callers that need 401/rate-limit to surface across a *parallel* batch (where
|
|
43
|
+
* a faster path may have already resolved) must inspect the rejected reasons
|
|
44
|
+
* themselves; this primitive only rethrows for the single path it owns. See
|
|
45
|
+
* repo-health and anti-llm-policy for that pre-scan.
|
|
46
|
+
*/
|
|
47
|
+
export declare function probeRepoFile(octokit: Octokit, owner: string, repo: string, path: string): Promise<ProbeRepoFileResult>;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-path repo-file probe (#156).
|
|
3
|
+
*
|
|
4
|
+
* Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
|
|
5
|
+
* repo doc by trying a list of candidate paths and stopping at the first hit.
|
|
6
|
+
* The per-path fetch was copy-pasted three times, each re-deriving the same
|
|
7
|
+
* 404-continue / fatal-propagate / base64-decode logic. This is the one
|
|
8
|
+
* genuinely-shared primitive.
|
|
9
|
+
*
|
|
10
|
+
* The orchestration around it stays per-caller (parallel 4-path probe,
|
|
11
|
+
* sequential 5-path probe, sequential family probe) and so do the return shapes
|
|
12
|
+
* (parsed guidelines, issue-ref set, policy scan). Only the single GET is
|
|
13
|
+
* shared.
|
|
14
|
+
*
|
|
15
|
+
* The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
|
|
16
|
+
* file absent) from a degraded miss (5xx, network) so callers can decide
|
|
17
|
+
* whether to cache a negative result or leave it open to retry. Collapsing the
|
|
18
|
+
* two would bypass anti-llm-policy's transient-failure cache safeguard, so the
|
|
19
|
+
* primitive must keep them separate.
|
|
20
|
+
*/
|
|
21
|
+
import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
|
|
22
|
+
import { warn } from "./logger.js";
|
|
23
|
+
const MODULE = "probe-repo-file";
|
|
24
|
+
/**
|
|
25
|
+
* GET one repo file path. Returns decoded content on a 200 file payload, a
|
|
26
|
+
* clean `null` on 404 or a non-content payload, and a transient `null` on a
|
|
27
|
+
* soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
|
|
28
|
+
* rate limit) so the caller's existing rate-limit handling sees them.
|
|
29
|
+
*
|
|
30
|
+
* Callers that need 401/rate-limit to surface across a *parallel* batch (where
|
|
31
|
+
* a faster path may have already resolved) must inspect the rejected reasons
|
|
32
|
+
* themselves; this primitive only rethrows for the single path it owns. See
|
|
33
|
+
* repo-health and anti-llm-policy for that pre-scan.
|
|
34
|
+
*/
|
|
35
|
+
export async function probeRepoFile(octokit, owner, repo, path) {
|
|
36
|
+
try {
|
|
37
|
+
const { data } = await octokit.repos.getContent({ owner, repo, path });
|
|
38
|
+
if (data &&
|
|
39
|
+
typeof data === "object" &&
|
|
40
|
+
"content" in data &&
|
|
41
|
+
typeof data.content === "string") {
|
|
42
|
+
return {
|
|
43
|
+
text: Buffer.from(data.content, "base64").toString("utf-8"),
|
|
44
|
+
transient: false,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
return { text: null, transient: false };
|
|
48
|
+
}
|
|
49
|
+
catch (error) {
|
|
50
|
+
const status = getHttpStatusCode(error);
|
|
51
|
+
if (status === 404)
|
|
52
|
+
return { text: null, transient: false };
|
|
53
|
+
rethrowIfFatal(error);
|
|
54
|
+
warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
|
|
55
|
+
return { text: null, transient: true };
|
|
56
|
+
}
|
|
57
|
+
}
|
package/dist/core/repo-health.js
CHANGED
|
@@ -8,6 +8,7 @@ import { daysBetween } from "./utils.js";
|
|
|
8
8
|
import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
|
|
9
9
|
import { warn } from "./logger.js";
|
|
10
10
|
import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
|
|
11
|
+
import { probeRepoFile } from "./probe-repo-file.js";
|
|
11
12
|
const MODULE = "repo-health";
|
|
12
13
|
// ── Cache for contribution guidelines ──
|
|
13
14
|
const guidelinesCache = new Map();
|
|
@@ -121,13 +122,11 @@ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
|
|
|
121
122
|
"docs/CONTRIBUTING.md",
|
|
122
123
|
"contributing.md",
|
|
123
124
|
];
|
|
124
|
-
// Probe all paths in parallel — take the first success in priority order
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
return null;
|
|
130
|
-
})));
|
|
125
|
+
// Probe all paths in parallel — take the first success in priority order.
|
|
126
|
+
// probeRepoFile rethrows 401/rate-limit, so those still surface here as
|
|
127
|
+
// rejected results for the pre-scan below; 404s and 5xx come back as a null
|
|
128
|
+
// text (the primitive warns on 5xx, so no extra warn is needed here).
|
|
129
|
+
const results = await Promise.allSettled(filesToCheck.map((file) => probeRepoFile(octokit, owner, repo, file)));
|
|
131
130
|
// Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
|
|
132
131
|
// otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
|
|
133
132
|
// wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
|
|
@@ -139,20 +138,13 @@ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
|
|
|
139
138
|
throw result.reason;
|
|
140
139
|
}
|
|
141
140
|
}
|
|
142
|
-
for (
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
const guidelines = parseContributionGuidelines(result.value);
|
|
141
|
+
for (const result of results) {
|
|
142
|
+
if (result.status === "fulfilled" && result.value.text) {
|
|
143
|
+
const guidelines = parseContributionGuidelines(result.value.text);
|
|
146
144
|
guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
|
|
147
145
|
pruneCache();
|
|
148
146
|
return guidelines;
|
|
149
147
|
}
|
|
150
|
-
if (result.status === "rejected") {
|
|
151
|
-
const status = getHttpStatusCode(result.reason);
|
|
152
|
-
if (status !== 404) {
|
|
153
|
-
warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
148
|
}
|
|
157
149
|
// Cache the negative result too and prune if needed
|
|
158
150
|
guidelinesCache.set(cacheKey, {
|
package/dist/core/roadmap.js
CHANGED
|
@@ -10,9 +10,7 @@
|
|
|
10
10
|
* Auth (401) and rate-limit errors propagate, matching the rest of the
|
|
11
11
|
* codebase's error strategy. Other errors degrade gracefully (warn + empty).
|
|
12
12
|
*/
|
|
13
|
-
import {
|
|
14
|
-
import { warn } from "./logger.js";
|
|
15
|
-
const MODULE = "roadmap";
|
|
13
|
+
import { probeRepoFile } from "./probe-repo-file.js";
|
|
16
14
|
/** TTL for roadmap fetch results (1 hour). */
|
|
17
15
|
const CACHE_TTL_MS = 60 * 60 * 1000;
|
|
18
16
|
/** Paths probed in priority order. First success wins. */
|
|
@@ -113,24 +111,16 @@ export async function fetchRoadmapIssueRefs(octokit, owner, repo) {
|
|
|
113
111
|
const roadmapInflight = new Map();
|
|
114
112
|
async function fetchRoadmapIssueRefsUncached(octokit, owner, repo, cacheKey) {
|
|
115
113
|
for (const path of ROADMAP_PATHS) {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
catch (err) {
|
|
127
|
-
rethrowIfFatal(err);
|
|
128
|
-
const status = getHttpStatusCode(err);
|
|
129
|
-
if (status === 404)
|
|
130
|
-
continue; // path missing — try next
|
|
131
|
-
warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(err)}`);
|
|
132
|
-
// Fall through and try next path.
|
|
133
|
-
}
|
|
114
|
+
// probeRepoFile rethrows 401/rate-limit, treats 404 and non-content
|
|
115
|
+
// payloads as a null text, and warns on 5xx — all of which we degrade past
|
|
116
|
+
// by trying the next path.
|
|
117
|
+
const { text } = await probeRepoFile(octokit, owner, repo, path);
|
|
118
|
+
if (!text)
|
|
119
|
+
continue;
|
|
120
|
+
const refs = parseRoadmapIssueRefs(text, owner, repo);
|
|
121
|
+
roadmapCache.set(cacheKey, { refs, fetchedAt: Date.now() });
|
|
122
|
+
pruneCache();
|
|
123
|
+
return refs;
|
|
134
124
|
}
|
|
135
125
|
// No roadmap found (or all probes errored softly). Cache the empty result
|
|
136
126
|
// so we don't re-probe every run.
|
package/dist/core/schemas.d.ts
CHANGED
|
@@ -279,6 +279,8 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
|
|
|
279
279
|
preferLanguages: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
280
280
|
preferRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
281
281
|
diversityRatio: z.ZodDefault<z.ZodNumber>;
|
|
282
|
+
avoidRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
283
|
+
boostIssueTypes: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
282
284
|
broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
283
285
|
/**
|
|
284
286
|
* Skip the expensive broad phase once this many candidates were found by
|
|
@@ -359,6 +361,8 @@ export declare const ScoutStateSchema: z.ZodObject<{
|
|
|
359
361
|
preferLanguages: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
360
362
|
preferRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
361
363
|
diversityRatio: z.ZodDefault<z.ZodNumber>;
|
|
364
|
+
avoidRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
365
|
+
boostIssueTypes: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
362
366
|
broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
363
367
|
/**
|
|
364
368
|
* Skip the expensive broad phase once this many candidates were found by
|
package/dist/core/schemas.js
CHANGED
|
@@ -42,6 +42,9 @@ export const CONCRETE_STRATEGIES = [
|
|
|
42
42
|
// ── Leaf schemas ────────────────────────────────────────────────────
|
|
43
43
|
export const RepoSignalsSchema = z.looseObject({
|
|
44
44
|
hasActiveMaintainers: z.boolean(),
|
|
45
|
+
// Retained for backward compatibility but no longer affects the repo score
|
|
46
|
+
// (#167): nothing computes it, and hasActiveMaintainers is the live activity
|
|
47
|
+
// proxy. Kept so old persisted state and the search JSON output still parse.
|
|
45
48
|
isResponsive: z.boolean(),
|
|
46
49
|
hasHostileComments: z.boolean(),
|
|
47
50
|
});
|
|
@@ -190,6 +193,12 @@ export const ScoutPreferencesSchema = z.looseObject({
|
|
|
190
193
|
preferLanguages: z.array(z.string()).default([]),
|
|
191
194
|
preferRepos: z.array(z.string()).default([]),
|
|
192
195
|
diversityRatio: z.number().min(0).max(1).default(0),
|
|
196
|
+
// Soft penalty (milder than the hard excludeRepos filter): candidates in
|
|
197
|
+
// these `owner/repo` slugs are pushed down the ranking but not removed (#168).
|
|
198
|
+
avoidRepos: z.array(z.string()).default([]),
|
|
199
|
+
// Soft boost for candidates whose issue labels match one of these types,
|
|
200
|
+
// case-insensitive (e.g. "bug", "good first issue") (#168).
|
|
201
|
+
boostIssueTypes: z.array(z.string()).default([]),
|
|
193
202
|
broadPhaseDelayMs: z.number().min(0).max(300000).default(90000),
|
|
194
203
|
/**
|
|
195
204
|
* Skip the expensive broad phase once this many candidates were found by
|
|
@@ -8,6 +8,7 @@ import { Octokit } from "@octokit/rest";
|
|
|
8
8
|
import { type SearchPriority, type IssueCandidate, type IssueScope } from "./types.js";
|
|
9
9
|
import { type GitHubSearchItem } from "./issue-filtering.js";
|
|
10
10
|
import { IssueVetter } from "./issue-vetting.js";
|
|
11
|
+
import { type SearchBudgetTracker } from "./search-budget.js";
|
|
11
12
|
/** Resolve scope tiers into a flat label list, merged with custom labels. */
|
|
12
13
|
export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
|
|
13
14
|
/** Round-robin interleave multiple arrays. */
|
|
@@ -22,7 +23,7 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
|
|
|
22
23
|
sort: "created" | "updated" | "comments" | "reactions" | "interactions";
|
|
23
24
|
order: "asc" | "desc";
|
|
24
25
|
per_page: number;
|
|
25
|
-
}): Promise<{
|
|
26
|
+
}, tracker?: SearchBudgetTracker): Promise<{
|
|
26
27
|
total_count: number;
|
|
27
28
|
items: GitHubSearchItem[];
|
|
28
29
|
}>;
|
|
@@ -42,7 +43,7 @@ export declare function fetchIssuesFromMaintainedRepos(octokit: Octokit, repos:
|
|
|
42
43
|
* calls `GET /repos/{owner}/{repo}/issues` which counts against the much
|
|
43
44
|
* larger Core API rate limit and avoids consuming the scarce Search quota.
|
|
44
45
|
*/
|
|
45
|
-
export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
|
|
46
|
+
export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[], perPage?: number): Promise<{
|
|
46
47
|
candidates: IssueCandidate[];
|
|
47
48
|
allReposFailed: boolean;
|
|
48
49
|
rateLimitHit: boolean;
|
|
@@ -60,7 +61,7 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
|
|
|
60
61
|
* @param buildQuery Callback that receives a label query string and returns the full search query
|
|
61
62
|
* @param perPage Number of results per API call
|
|
62
63
|
*/
|
|
63
|
-
export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
|
|
64
|
+
export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
|
|
64
65
|
/**
|
|
65
66
|
* Build per-call language qualifier strings, fanning out across languages
|
|
66
67
|
* when a multi-language + labels combination would trip GitHub Search's
|
|
@@ -84,7 +85,7 @@ export declare function buildLanguageVariants(languages: string[], isAnyLanguage
|
|
|
84
85
|
* e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
|
|
85
86
|
* @param perPage Results per API call
|
|
86
87
|
*/
|
|
87
|
-
export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
|
|
88
|
+
export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
|
|
88
89
|
/**
|
|
89
90
|
* Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
|
|
90
91
|
* Used by Phases 2 and 3 to convert raw search results into vetted candidates.
|
|
@@ -10,7 +10,7 @@ import { debug, warn } from "./logger.js";
|
|
|
10
10
|
import { getHttpCache, versionedCacheKey } from "./http-cache.js";
|
|
11
11
|
import { detectLabelFarmingRepos, } from "./issue-filtering.js";
|
|
12
12
|
import { extractRepoFromUrl, sleep } from "./utils.js";
|
|
13
|
-
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
13
|
+
import { getSearchBudgetTracker, } from "./search-budget.js";
|
|
14
14
|
const MODULE = "search-phases";
|
|
15
15
|
/** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
|
|
16
16
|
const GITHUB_MAX_BOOLEAN_OPS = 5;
|
|
@@ -83,7 +83,11 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
|
|
|
83
83
|
* Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
|
|
84
84
|
* without consuming GitHub API rate limit points.
|
|
85
85
|
*/
|
|
86
|
-
export async function cachedSearchIssues(octokit, params
|
|
86
|
+
export async function cachedSearchIssues(octokit, params,
|
|
87
|
+
// Optional injected budget tracker. Defaults to the shared singleton so
|
|
88
|
+
// existing callers keep the exact same global budget accounting; a host
|
|
89
|
+
// serving concurrent searches can inject a per-search tracker for isolation.
|
|
90
|
+
tracker = getSearchBudgetTracker()) {
|
|
87
91
|
const cacheKey = versionedCacheKey(`search:${params.q}:${params.sort}:${params.order}:${params.per_page}`);
|
|
88
92
|
const cache = getHttpCache();
|
|
89
93
|
// Check cache first
|
|
@@ -93,7 +97,6 @@ export async function cachedSearchIssues(octokit, params) {
|
|
|
93
97
|
return cached;
|
|
94
98
|
}
|
|
95
99
|
// Fetch from API
|
|
96
|
-
const tracker = getSearchBudgetTracker();
|
|
97
100
|
await tracker.waitForBudget();
|
|
98
101
|
let data;
|
|
99
102
|
try {
|
|
@@ -183,7 +186,7 @@ export async function fetchIssuesFromMaintainedRepos(octokit, repos, minStars, m
|
|
|
183
186
|
* calls `GET /repos/{owner}/{repo}/issues` which counts against the much
|
|
184
187
|
* larger Core API rate limit and avoids consuming the scarce Search quota.
|
|
185
188
|
*/
|
|
186
|
-
export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn) {
|
|
189
|
+
export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn, perPage = 5) {
|
|
187
190
|
const candidates = [];
|
|
188
191
|
let failedRepos = 0;
|
|
189
192
|
let rateLimitFailures = 0;
|
|
@@ -210,7 +213,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
|
|
|
210
213
|
state: "open",
|
|
211
214
|
sort: "created",
|
|
212
215
|
direction: "desc",
|
|
213
|
-
per_page:
|
|
216
|
+
per_page: perPage,
|
|
214
217
|
...(label !== undefined ? { labels: label } : {}),
|
|
215
218
|
});
|
|
216
219
|
for (const issue of response.data) {
|
|
@@ -273,7 +276,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
|
|
|
273
276
|
* @param buildQuery Callback that receives a label query string and returns the full search query
|
|
274
277
|
* @param perPage Number of results per API call
|
|
275
278
|
*/
|
|
276
|
-
export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage) {
|
|
279
|
+
export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage, tracker = getSearchBudgetTracker()) {
|
|
277
280
|
const labelChunks = chunkLabels(labels, reservedOps);
|
|
278
281
|
const seenUrls = new Set();
|
|
279
282
|
const allItems = [];
|
|
@@ -286,7 +289,7 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
|
|
|
286
289
|
sort: "created",
|
|
287
290
|
order: "desc",
|
|
288
291
|
per_page: perPage,
|
|
289
|
-
});
|
|
292
|
+
}, tracker);
|
|
290
293
|
for (const item of data.items) {
|
|
291
294
|
if (!seenUrls.has(item.html_url)) {
|
|
292
295
|
seenUrls.add(item.html_url);
|
|
@@ -327,7 +330,7 @@ export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
|
|
|
327
330
|
* e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
|
|
328
331
|
* @param perPage Results per API call
|
|
329
332
|
*/
|
|
330
|
-
export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
|
|
333
|
+
export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage, tracker = getSearchBudgetTracker()) {
|
|
331
334
|
const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
|
|
332
335
|
const seenUrls = new Set();
|
|
333
336
|
const allItems = [];
|
|
@@ -336,7 +339,7 @@ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLa
|
|
|
336
339
|
await sleep(INTER_QUERY_DELAY_MS);
|
|
337
340
|
const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
|
|
338
341
|
.replace(/ +/g, " ")
|
|
339
|
-
.trim(), perPage);
|
|
342
|
+
.trim(), perPage, tracker);
|
|
340
343
|
for (const item of items) {
|
|
341
344
|
if (!seenUrls.has(item.html_url)) {
|
|
342
345
|
seenUrls.add(item.html_url);
|
package/dist/core/types.d.ts
CHANGED
|
@@ -243,6 +243,21 @@ export interface SearchOptions {
|
|
|
243
243
|
* disables the boost.
|
|
244
244
|
*/
|
|
245
245
|
preferRepos?: string[];
|
|
246
|
+
/**
|
|
247
|
+
* Per-call personalization bias: a SOFT penalty (milder than the hard
|
|
248
|
+
* `excludeRepos` filter) for candidates in one of these `owner/repo` slugs
|
|
249
|
+
* (#168). They are pushed below equally-recommended non-matches but not
|
|
250
|
+
* removed; a strong boost can still outweigh the penalty. Empty / undefined
|
|
251
|
+
* disables it.
|
|
252
|
+
*/
|
|
253
|
+
avoidRepos?: string[];
|
|
254
|
+
/**
|
|
255
|
+
* Per-call personalization bias: a soft boost for candidates whose issue
|
|
256
|
+
* labels match one of these types, case-insensitive (e.g. "bug",
|
|
257
|
+
* "good first issue") (#168). Same tier as a language match. Does not filter
|
|
258
|
+
* results, does not change `viabilityScore`. Empty / undefined disables it.
|
|
259
|
+
*/
|
|
260
|
+
boostIssueTypes?: string[];
|
|
246
261
|
/**
|
|
247
262
|
* Counterweight against echo-chamber bias as `preferLanguages` /
|
|
248
263
|
* `preferRepos` boosts accumulate over time (#1244). A value of 0.2
|