@oss-scout/core 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@
9
9
  import { Octokit } from "@octokit/rest";
10
10
  import { type SearchPriority, type IssueCandidate, type ProjectCategory, type ScoutPreferences, type ScoutState, type MergedPRRecord, type ClosedPRRecord, type OpenPRRecord } from "./types.js";
11
11
  import { type PrefetchedIssueCore } from "./issue-graphql.js";
12
+ import { type SearchBudgetTracker } from "./search-budget.js";
12
13
  /**
13
14
  * Feature-mode signals supplied by the caller (orchestrator) — the vetter
14
15
  * does NOT extract these from the GitHub issue itself. When passed, they
@@ -142,7 +143,15 @@ export declare function deriveRecommendation(input: RecommendationInput): Recomm
142
143
  export declare class IssueVetter {
143
144
  private octokit;
144
145
  private stateReader;
145
- constructor(octokit: Octokit, stateReader: ScoutStateReader);
146
+ private budgetTracker;
147
+ /**
148
+ * @param octokit - Authenticated Octokit instance
149
+ * @param stateReader - Read-only scout state interface
150
+ * @param budgetTracker - Search budget tracker. Defaults to the shared
151
+ * singleton so existing callers behave identically; inject a per-search
152
+ * instance to isolate budget accounting in a long-lived concurrent host.
153
+ */
154
+ constructor(octokit: Octokit, stateReader: ScoutStateReader, budgetTracker?: SearchBudgetTracker);
146
155
  /**
147
156
  * Vet a specific issue — runs all checks and computes recommendation + viability score.
148
157
  * Results are cached for 15 minutes to avoid redundant API calls on repeated searches.
@@ -16,6 +16,7 @@ import { checkProjectHealth, fetchContributionGuidelines, } from "./repo-health.
16
16
  import { fetchAndScanAntiLLMPolicy } from "./anti-llm-policy.js";
17
17
  import { prefetchIssueCores, issueCoreKey, } from "./issue-graphql.js";
18
18
  import { getHttpCache, versionedCacheKey } from "./http-cache.js";
19
+ import { getSearchBudgetTracker, } from "./search-budget.js";
19
20
  import { triageWithSLM, buildTriageInput, } from "./slm-triage.js";
20
21
  const MODULE = "issue-vetting";
21
22
  /** Vetting concurrency: kept low to reduce burst pressure on GitHub's secondary rate limit. */
@@ -119,9 +120,18 @@ export function deriveRecommendation(input) {
119
120
  export class IssueVetter {
120
121
  octokit;
121
122
  stateReader;
122
- constructor(octokit, stateReader) {
123
+ budgetTracker;
124
+ /**
125
+ * @param octokit - Authenticated Octokit instance
126
+ * @param stateReader - Read-only scout state interface
127
+ * @param budgetTracker - Search budget tracker. Defaults to the shared
128
+ * singleton so existing callers behave identically; inject a per-search
129
+ * instance to isolate budget accounting in a long-lived concurrent host.
130
+ */
131
+ constructor(octokit, stateReader, budgetTracker = getSearchBudgetTracker()) {
123
132
  this.octokit = octokit;
124
133
  this.stateReader = stateReader;
134
+ this.budgetTracker = budgetTracker;
125
135
  }
126
136
  /**
127
137
  * Vet a specific issue — runs all checks and computes recommendation + viability score.
@@ -170,7 +180,7 @@ export class IssueVetter {
170
180
  fetchContributionGuidelines(this.octokit, owner, repo),
171
181
  hasMergedPRsInRepo
172
182
  ? Promise.resolve(0)
173
- : checkUserMergedPRsInRepo(this.octokit, owner, repo),
183
+ : checkUserMergedPRsInRepo(this.octokit, owner, repo, this.budgetTracker),
174
184
  ]);
175
185
  // Anti-LLM scan reuses the CONTRIBUTING text just fetched above —
176
186
  // dedup'd to avoid 4 redundant getContent calls on cold-cache repos.
@@ -27,24 +27,39 @@ import type { IssueCandidate } from "./types.js";
27
27
  */
28
28
  export declare const REPO_BOOST = 20;
29
29
  export declare const LANGUAGE_BOOST = 10;
30
+ /** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
31
+ export declare const ISSUE_TYPE_BOOST = 10;
30
32
  /**
31
- * The personalization sort weight of a candidate: its boost score, or 0 when it
32
- * is not boosted (unboosted or a diversity slot). Reads the structural
33
- * `personalization` field (#158) so callers never poke at the old loose
34
- * `boostScore` field.
33
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
34
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
35
+ * preferRepos affinity, +20) can still outweigh it.
36
+ */
37
+ export declare const AVOID_PENALTY = 15;
38
+ /** Per-call personalization bias lists (#168). All optional; empty = no effect. */
39
+ export interface PersonalizationBias {
40
+ preferLanguages?: string[];
41
+ preferRepos?: string[];
42
+ avoidRepos?: string[];
43
+ boostIssueTypes?: string[];
44
+ }
45
+ /**
46
+ * The personalization sort weight of a candidate: its net score, or 0 when it
47
+ * carries no personalization marker. Reads the structural `personalization`
48
+ * field (#158). The score can be negative when avoidRepos applied (#168).
35
49
  */
36
50
  export declare function boostScoreOf(candidate: IssueCandidate): number;
37
51
  /**
38
- * Return a new candidate list where each candidate that matches a
39
- * caller-supplied preference carries `personalization: { kind: "boosted", ... }`.
40
- * Does NOT mutate the input candidates (#158) matched candidates are shallow
41
- * copies with the field set; unmatched candidates are passed through unchanged.
42
- * The caller re-sorts the returned array.
52
+ * Return a new candidate list where each candidate matching a caller-supplied
53
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
54
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
55
+ * be negative (avoid-only) boostScoreOf sorts those below neutral candidates.
56
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
57
+ * unmatched ones pass through unchanged.
43
58
  *
44
- * No-op when both preference lists are empty or undefined: the input array is
45
- * returned as-is and the sort tier collapses to 0 for every candidate.
59
+ * No-op when every bias list is empty/undefined: the input array is returned
60
+ * as-is and the sort tier collapses to 0 for every candidate.
46
61
  */
47
- export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): IssueCandidate[];
62
+ export declare function annotateBoost(candidates: IssueCandidate[], bias?: PersonalizationBias): IssueCandidate[];
48
63
  /**
49
64
  * Apply a diversity-counterweight pass over a pre-sorted candidate list
50
65
  * (#1244). Returns the first `maxResults` picks in priority order:
@@ -26,36 +26,54 @@
26
26
  */
27
27
  export const REPO_BOOST = 20;
28
28
  export const LANGUAGE_BOOST = 10;
29
+ /** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
30
+ export const ISSUE_TYPE_BOOST = 10;
29
31
  /**
30
- * The personalization sort weight of a candidate: its boost score, or 0 when it
31
- * is not boosted (unboosted or a diversity slot). Reads the structural
32
- * `personalization` field (#158) so callers never poke at the old loose
33
- * `boostScore` field.
32
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
33
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
34
+ * preferRepos affinity, +20) can still outweigh it.
35
+ */
36
+ export const AVOID_PENALTY = 15;
37
+ /**
38
+ * The personalization sort weight of a candidate: its net score, or 0 when it
39
+ * carries no personalization marker. Reads the structural `personalization`
40
+ * field (#158). The score can be negative when avoidRepos applied (#168).
34
41
  */
35
42
  export function boostScoreOf(candidate) {
36
43
  return candidate.personalization?.kind === "boosted"
37
44
  ? candidate.personalization.score
38
45
  : 0;
39
46
  }
47
+ function normalizeSet(values) {
48
+ return new Set((values ?? []).map((v) => v.trim().toLowerCase()).filter(Boolean));
49
+ }
40
50
  /**
41
- * Return a new candidate list where each candidate that matches a
42
- * caller-supplied preference carries `personalization: { kind: "boosted", ... }`.
43
- * Does NOT mutate the input candidates (#158) matched candidates are shallow
44
- * copies with the field set; unmatched candidates are passed through unchanged.
45
- * The caller re-sorts the returned array.
51
+ * Return a new candidate list where each candidate matching a caller-supplied
52
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
53
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
54
+ * be negative (avoid-only) boostScoreOf sorts those below neutral candidates.
55
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
56
+ * unmatched ones pass through unchanged.
46
57
  *
47
- * No-op when both preference lists are empty or undefined: the input array is
48
- * returned as-is and the sort tier collapses to 0 for every candidate.
58
+ * No-op when every bias list is empty/undefined: the input array is returned
59
+ * as-is and the sort tier collapses to 0 for every candidate.
49
60
  */
50
- export function annotateBoost(candidates, preferLanguages, preferRepos) {
51
- const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
52
- const repoSet = new Set((preferRepos ?? []).map((r) => r.trim().toLowerCase()).filter(Boolean));
53
- if (langSet.size === 0 && repoSet.size === 0)
61
+ export function annotateBoost(candidates, bias = {}) {
62
+ const langSet = normalizeSet(bias.preferLanguages);
63
+ const repoSet = normalizeSet(bias.preferRepos);
64
+ const avoidSet = normalizeSet(bias.avoidRepos);
65
+ const typeSet = normalizeSet(bias.boostIssueTypes);
66
+ if (langSet.size === 0 &&
67
+ repoSet.size === 0 &&
68
+ avoidSet.size === 0 &&
69
+ typeSet.size === 0) {
54
70
  return candidates;
71
+ }
55
72
  return candidates.map((c) => {
56
73
  let score = 0;
57
74
  const reasons = [];
58
- if (repoSet.size > 0 && repoSet.has(c.issue.repo.toLowerCase())) {
75
+ const repoLower = c.issue.repo.toLowerCase();
76
+ if (repoSet.size > 0 && repoSet.has(repoLower)) {
59
77
  score += REPO_BOOST;
60
78
  reasons.push(`repo affinity: ${c.issue.repo}`);
61
79
  }
@@ -64,7 +82,18 @@ export function annotateBoost(candidates, preferLanguages, preferRepos) {
64
82
  score += LANGUAGE_BOOST;
65
83
  reasons.push(`language match: ${lang}`);
66
84
  }
67
- if (score === 0)
85
+ if (typeSet.size > 0) {
86
+ const matched = c.issue.labels.find((l) => typeSet.has(l.toLowerCase()));
87
+ if (matched) {
88
+ score += ISSUE_TYPE_BOOST;
89
+ reasons.push(`issue type: ${matched}`);
90
+ }
91
+ }
92
+ if (avoidSet.size > 0 && avoidSet.has(repoLower)) {
93
+ score -= AVOID_PENALTY;
94
+ reasons.push(`avoided repo: ${c.issue.repo}`);
95
+ }
96
+ if (reasons.length === 0)
68
97
  return c;
69
98
  return { ...c, personalization: { kind: "boosted", score, reasons } };
70
99
  });
@@ -116,7 +145,10 @@ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
116
145
  break;
117
146
  if (seen.has(c.issue.url))
118
147
  continue;
119
- if (boostScoreOf(c) > 0)
148
+ // Diversity slots are for candidates that matched NO personalization bias.
149
+ // Exclude both boosted (>0) and avoided (<0) candidates — resurfacing an
150
+ // avoided repo via a diversity slot would defeat the avoid (#168).
151
+ if (boostScoreOf(c) !== 0)
120
152
  continue;
121
153
  // Tag a shallow copy rather than mutating the shared candidate (#158).
122
154
  picks.push({ ...c, personalization: { kind: "diversity" } });
@@ -36,6 +36,8 @@ export const FIELD_CONFIGS = {
36
36
  preferLanguages: { type: "array" },
37
37
  preferRepos: { type: "array" },
38
38
  diversityRatio: { type: "float" },
39
+ avoidRepos: { type: "array" },
40
+ boostIssueTypes: { type: "array" },
39
41
  slmTriageModel: { type: "string" },
40
42
  slmTriageHost: { type: "string" },
41
43
  featuresAnchorThreshold: { type: "number" },
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Single-path repo-file probe (#156).
3
+ *
4
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
5
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
6
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
7
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
8
+ * genuinely-shared primitive.
9
+ *
10
+ * The orchestration around it stays per-caller (parallel 4-path probe,
11
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
12
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
13
+ * shared.
14
+ *
15
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
16
+ * file absent) from a degraded miss (5xx, network) so callers can decide
17
+ * whether to cache a negative result or leave it open to retry. Collapsing the
18
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
19
+ * primitive must keep them separate.
20
+ */
21
+ import type { Octokit } from "@octokit/rest";
22
+ /**
23
+ * Result of probing one repo file path.
24
+ *
25
+ * - `text` — decoded UTF-8 content on a 200 with a file payload, else `null`
26
+ * (404, a non-content payload such as a directory listing, or a soft error).
27
+ * - `transient` — `true` only when the miss was a degraded failure (5xx,
28
+ * network) rather than a clean 404 / missing file. A `true` value means the
29
+ * `null` may be incomplete and the caller should avoid caching it as a known
30
+ * absence.
31
+ */
32
+ export interface ProbeRepoFileResult {
33
+ text: string | null;
34
+ transient: boolean;
35
+ }
36
+ /**
37
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
38
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
39
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
40
+ * rate limit) so the caller's existing rate-limit handling sees them.
41
+ *
42
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
43
+ * a faster path may have already resolved) must inspect the rejected reasons
44
+ * themselves; this primitive only rethrows for the single path it owns. See
45
+ * repo-health and anti-llm-policy for that pre-scan.
46
+ */
47
+ export declare function probeRepoFile(octokit: Octokit, owner: string, repo: string, path: string): Promise<ProbeRepoFileResult>;
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Single-path repo-file probe (#156).
3
+ *
4
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
5
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
6
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
7
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
8
+ * genuinely-shared primitive.
9
+ *
10
+ * The orchestration around it stays per-caller (parallel 4-path probe,
11
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
12
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
13
+ * shared.
14
+ *
15
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
16
+ * file absent) from a degraded miss (5xx, network) so callers can decide
17
+ * whether to cache a negative result or leave it open to retry. Collapsing the
18
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
19
+ * primitive must keep them separate.
20
+ */
21
+ import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
22
+ import { warn } from "./logger.js";
23
+ const MODULE = "probe-repo-file";
24
+ /**
25
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
26
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
27
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
28
+ * rate limit) so the caller's existing rate-limit handling sees them.
29
+ *
30
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
31
+ * a faster path may have already resolved) must inspect the rejected reasons
32
+ * themselves; this primitive only rethrows for the single path it owns. See
33
+ * repo-health and anti-llm-policy for that pre-scan.
34
+ */
35
+ export async function probeRepoFile(octokit, owner, repo, path) {
36
+ try {
37
+ const { data } = await octokit.repos.getContent({ owner, repo, path });
38
+ if (data &&
39
+ typeof data === "object" &&
40
+ "content" in data &&
41
+ typeof data.content === "string") {
42
+ return {
43
+ text: Buffer.from(data.content, "base64").toString("utf-8"),
44
+ transient: false,
45
+ };
46
+ }
47
+ return { text: null, transient: false };
48
+ }
49
+ catch (error) {
50
+ const status = getHttpStatusCode(error);
51
+ if (status === 404)
52
+ return { text: null, transient: false };
53
+ rethrowIfFatal(error);
54
+ warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
55
+ return { text: null, transient: true };
56
+ }
57
+ }
@@ -8,6 +8,7 @@ import { daysBetween } from "./utils.js";
8
8
  import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
9
9
  import { warn } from "./logger.js";
10
10
  import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
11
+ import { probeRepoFile } from "./probe-repo-file.js";
11
12
  const MODULE = "repo-health";
12
13
  // ── Cache for contribution guidelines ──
13
14
  const guidelinesCache = new Map();
@@ -121,13 +122,11 @@ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
121
122
  "docs/CONTRIBUTING.md",
122
123
  "contributing.md",
123
124
  ];
124
- // Probe all paths in parallel — take the first success in priority order
125
- const results = await Promise.allSettled(filesToCheck.map((file) => octokit.repos.getContent({ owner, repo, path: file }).then(({ data }) => {
126
- if ("content" in data) {
127
- return Buffer.from(data.content, "base64").toString("utf-8");
128
- }
129
- return null;
130
- })));
125
+ // Probe all paths in parallel — take the first success in priority order.
126
+ // probeRepoFile rethrows 401/rate-limit, so those still surface here as
127
+ // rejected results for the pre-scan below; 404s and 5xx come back as a null
128
+ // text (the primitive warns on 5xx, so no extra warn is needed here).
129
+ const results = await Promise.allSettled(filesToCheck.map((file) => probeRepoFile(octokit, owner, repo, file)));
131
130
  // Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
132
131
  // otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
133
132
  // wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
@@ -139,20 +138,13 @@ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
139
138
  throw result.reason;
140
139
  }
141
140
  }
142
- for (let i = 0; i < results.length; i++) {
143
- const result = results[i];
144
- if (result.status === "fulfilled" && result.value) {
145
- const guidelines = parseContributionGuidelines(result.value);
141
+ for (const result of results) {
142
+ if (result.status === "fulfilled" && result.value.text) {
143
+ const guidelines = parseContributionGuidelines(result.value.text);
146
144
  guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
147
145
  pruneCache();
148
146
  return guidelines;
149
147
  }
150
- if (result.status === "rejected") {
151
- const status = getHttpStatusCode(result.reason);
152
- if (status !== 404) {
153
- warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
154
- }
155
- }
156
148
  }
157
149
  // Cache the negative result too and prune if needed
158
150
  guidelinesCache.set(cacheKey, {
@@ -10,9 +10,7 @@
10
10
  * Auth (401) and rate-limit errors propagate, matching the rest of the
11
11
  * codebase's error strategy. Other errors degrade gracefully (warn + empty).
12
12
  */
13
- import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
14
- import { warn } from "./logger.js";
15
- const MODULE = "roadmap";
13
+ import { probeRepoFile } from "./probe-repo-file.js";
16
14
  /** TTL for roadmap fetch results (1 hour). */
17
15
  const CACHE_TTL_MS = 60 * 60 * 1000;
18
16
  /** Paths probed in priority order. First success wins. */
@@ -113,24 +111,16 @@ export async function fetchRoadmapIssueRefs(octokit, owner, repo) {
113
111
  const roadmapInflight = new Map();
114
112
  async function fetchRoadmapIssueRefsUncached(octokit, owner, repo, cacheKey) {
115
113
  for (const path of ROADMAP_PATHS) {
116
- try {
117
- const { data } = await octokit.repos.getContent({ owner, repo, path });
118
- if (!("content" in data))
119
- continue;
120
- const content = Buffer.from(data.content, "base64").toString("utf-8");
121
- const refs = parseRoadmapIssueRefs(content, owner, repo);
122
- roadmapCache.set(cacheKey, { refs, fetchedAt: Date.now() });
123
- pruneCache();
124
- return refs;
125
- }
126
- catch (err) {
127
- rethrowIfFatal(err);
128
- const status = getHttpStatusCode(err);
129
- if (status === 404)
130
- continue; // path missing — try next
131
- warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(err)}`);
132
- // Fall through and try next path.
133
- }
114
+ // probeRepoFile rethrows 401/rate-limit, treats 404 and non-content
115
+ // payloads as a null text, and warns on 5xx — all of which we degrade past
116
+ // by trying the next path.
117
+ const { text } = await probeRepoFile(octokit, owner, repo, path);
118
+ if (!text)
119
+ continue;
120
+ const refs = parseRoadmapIssueRefs(text, owner, repo);
121
+ roadmapCache.set(cacheKey, { refs, fetchedAt: Date.now() });
122
+ pruneCache();
123
+ return refs;
134
124
  }
135
125
  // No roadmap found (or all probes errored softly). Cache the empty result
136
126
  // so we don't re-probe every run.
@@ -279,6 +279,8 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
279
279
  preferLanguages: z.ZodDefault<z.ZodArray<z.ZodString>>;
280
280
  preferRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
281
281
  diversityRatio: z.ZodDefault<z.ZodNumber>;
282
+ avoidRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
283
+ boostIssueTypes: z.ZodDefault<z.ZodArray<z.ZodString>>;
282
284
  broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
283
285
  /**
284
286
  * Skip the expensive broad phase once this many candidates were found by
@@ -359,6 +361,8 @@ export declare const ScoutStateSchema: z.ZodObject<{
359
361
  preferLanguages: z.ZodDefault<z.ZodArray<z.ZodString>>;
360
362
  preferRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
361
363
  diversityRatio: z.ZodDefault<z.ZodNumber>;
364
+ avoidRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
365
+ boostIssueTypes: z.ZodDefault<z.ZodArray<z.ZodString>>;
362
366
  broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
363
367
  /**
364
368
  * Skip the expensive broad phase once this many candidates were found by
@@ -42,6 +42,9 @@ export const CONCRETE_STRATEGIES = [
42
42
  // ── Leaf schemas ────────────────────────────────────────────────────
43
43
  export const RepoSignalsSchema = z.looseObject({
44
44
  hasActiveMaintainers: z.boolean(),
45
+ // Retained for backward compatibility but no longer affects the repo score
46
+ // (#167): nothing computes it, and hasActiveMaintainers is the live activity
47
+ // proxy. Kept so old persisted state and the search JSON output still parse.
45
48
  isResponsive: z.boolean(),
46
49
  hasHostileComments: z.boolean(),
47
50
  });
@@ -190,6 +193,12 @@ export const ScoutPreferencesSchema = z.looseObject({
190
193
  preferLanguages: z.array(z.string()).default([]),
191
194
  preferRepos: z.array(z.string()).default([]),
192
195
  diversityRatio: z.number().min(0).max(1).default(0),
196
+ // Soft penalty (milder than the hard excludeRepos filter): candidates in
197
+ // these `owner/repo` slugs are pushed down the ranking but not removed (#168).
198
+ avoidRepos: z.array(z.string()).default([]),
199
+ // Soft boost for candidates whose issue labels match one of these types,
200
+ // case-insensitive (e.g. "bug", "good first issue") (#168).
201
+ boostIssueTypes: z.array(z.string()).default([]),
193
202
  broadPhaseDelayMs: z.number().min(0).max(300000).default(90000),
194
203
  /**
195
204
  * Skip the expensive broad phase once this many candidates were found by
@@ -8,6 +8,7 @@ import { Octokit } from "@octokit/rest";
8
8
  import { type SearchPriority, type IssueCandidate, type IssueScope } from "./types.js";
9
9
  import { type GitHubSearchItem } from "./issue-filtering.js";
10
10
  import { IssueVetter } from "./issue-vetting.js";
11
+ import { type SearchBudgetTracker } from "./search-budget.js";
11
12
  /** Resolve scope tiers into a flat label list, merged with custom labels. */
12
13
  export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
13
14
  /** Round-robin interleave multiple arrays. */
@@ -22,7 +23,7 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
22
23
  sort: "created" | "updated" | "comments" | "reactions" | "interactions";
23
24
  order: "asc" | "desc";
24
25
  per_page: number;
25
- }): Promise<{
26
+ }, tracker?: SearchBudgetTracker): Promise<{
26
27
  total_count: number;
27
28
  items: GitHubSearchItem[];
28
29
  }>;
@@ -42,7 +43,7 @@ export declare function fetchIssuesFromMaintainedRepos(octokit: Octokit, repos:
42
43
  * calls `GET /repos/{owner}/{repo}/issues` which counts against the much
43
44
  * larger Core API rate limit and avoids consuming the scarce Search quota.
44
45
  */
45
- export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
46
+ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[], perPage?: number): Promise<{
46
47
  candidates: IssueCandidate[];
47
48
  allReposFailed: boolean;
48
49
  rateLimitHit: boolean;
@@ -60,7 +61,7 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
60
61
  * @param buildQuery Callback that receives a label query string and returns the full search query
61
62
  * @param perPage Number of results per API call
62
63
  */
63
- export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
64
+ export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
64
65
  /**
65
66
  * Build per-call language qualifier strings, fanning out across languages
66
67
  * when a multi-language + labels combination would trip GitHub Search's
@@ -84,7 +85,7 @@ export declare function buildLanguageVariants(languages: string[], isAnyLanguage
84
85
  * e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
85
86
  * @param perPage Results per API call
86
87
  */
87
- export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
88
+ export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
88
89
  /**
89
90
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
90
91
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.
@@ -10,7 +10,7 @@ import { debug, warn } from "./logger.js";
10
10
  import { getHttpCache, versionedCacheKey } from "./http-cache.js";
11
11
  import { detectLabelFarmingRepos, } from "./issue-filtering.js";
12
12
  import { extractRepoFromUrl, sleep } from "./utils.js";
13
- import { getSearchBudgetTracker } from "./search-budget.js";
13
+ import { getSearchBudgetTracker, } from "./search-budget.js";
14
14
  const MODULE = "search-phases";
15
15
  /** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
16
16
  const GITHUB_MAX_BOOLEAN_OPS = 5;
@@ -83,7 +83,11 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
83
83
  * Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
84
84
  * without consuming GitHub API rate limit points.
85
85
  */
86
- export async function cachedSearchIssues(octokit, params) {
86
+ export async function cachedSearchIssues(octokit, params,
87
+ // Optional injected budget tracker. Defaults to the shared singleton so
88
+ // existing callers keep the exact same global budget accounting; a host
89
+ // serving concurrent searches can inject a per-search tracker for isolation.
90
+ tracker = getSearchBudgetTracker()) {
87
91
  const cacheKey = versionedCacheKey(`search:${params.q}:${params.sort}:${params.order}:${params.per_page}`);
88
92
  const cache = getHttpCache();
89
93
  // Check cache first
@@ -93,7 +97,6 @@ export async function cachedSearchIssues(octokit, params) {
93
97
  return cached;
94
98
  }
95
99
  // Fetch from API
96
- const tracker = getSearchBudgetTracker();
97
100
  await tracker.waitForBudget();
98
101
  let data;
99
102
  try {
@@ -183,7 +186,7 @@ export async function fetchIssuesFromMaintainedRepos(octokit, repos, minStars, m
183
186
  * calls `GET /repos/{owner}/{repo}/issues` which counts against the much
184
187
  * larger Core API rate limit and avoids consuming the scarce Search quota.
185
188
  */
186
- export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn) {
189
+ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn, perPage = 5) {
187
190
  const candidates = [];
188
191
  let failedRepos = 0;
189
192
  let rateLimitFailures = 0;
@@ -210,7 +213,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
210
213
  state: "open",
211
214
  sort: "created",
212
215
  direction: "desc",
213
- per_page: 5,
216
+ per_page: perPage,
214
217
  ...(label !== undefined ? { labels: label } : {}),
215
218
  });
216
219
  for (const issue of response.data) {
@@ -273,7 +276,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
273
276
  * @param buildQuery Callback that receives a label query string and returns the full search query
274
277
  * @param perPage Number of results per API call
275
278
  */
276
- export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage) {
279
+ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage, tracker = getSearchBudgetTracker()) {
277
280
  const labelChunks = chunkLabels(labels, reservedOps);
278
281
  const seenUrls = new Set();
279
282
  const allItems = [];
@@ -286,7 +289,7 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
286
289
  sort: "created",
287
290
  order: "desc",
288
291
  per_page: perPage,
289
- });
292
+ }, tracker);
290
293
  for (const item of data.items) {
291
294
  if (!seenUrls.has(item.html_url)) {
292
295
  seenUrls.add(item.html_url);
@@ -327,7 +330,7 @@ export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
327
330
  * e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
328
331
  * @param perPage Results per API call
329
332
  */
330
- export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
333
+ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage, tracker = getSearchBudgetTracker()) {
331
334
  const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
332
335
  const seenUrls = new Set();
333
336
  const allItems = [];
@@ -336,7 +339,7 @@ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLa
336
339
  await sleep(INTER_QUERY_DELAY_MS);
337
340
  const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
338
341
  .replace(/ +/g, " ")
339
- .trim(), perPage);
342
+ .trim(), perPage, tracker);
340
343
  for (const item of items) {
341
344
  if (!seenUrls.has(item.html_url)) {
342
345
  seenUrls.add(item.html_url);
@@ -243,6 +243,21 @@ export interface SearchOptions {
243
243
  * disables the boost.
244
244
  */
245
245
  preferRepos?: string[];
246
+ /**
247
+ * Per-call personalization bias: a SOFT penalty (milder than the hard
248
+ * `excludeRepos` filter) for candidates in one of these `owner/repo` slugs
249
+ * (#168). They are pushed below equally-recommended non-matches but not
250
+ * removed; a strong boost can still outweigh the penalty. Empty / undefined
251
+ * disables it.
252
+ */
253
+ avoidRepos?: string[];
254
+ /**
255
+ * Per-call personalization bias: a soft boost for candidates whose issue
256
+ * labels match one of these types, case-insensitive (e.g. "bug",
257
+ * "good first issue") (#168). Same tier as a language match. Does not filter
258
+ * results, does not change `viabilityScore`. Empty / undefined disables it.
259
+ */
260
+ boostIssueTypes?: string[];
246
261
  /**
247
262
  * Counterweight against echo-chamber bias as `preferLanguages` /
248
263
  * `preferRepos` boosts accumulate over time (#1244). A value of 0.2