@oss-scout/core 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,24 +27,39 @@ import type { IssueCandidate } from "./types.js";
27
27
  */
28
28
  export declare const REPO_BOOST = 20;
29
29
  export declare const LANGUAGE_BOOST = 10;
30
+ /** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
31
+ export declare const ISSUE_TYPE_BOOST = 10;
30
32
  /**
31
- * The personalization sort weight of a candidate: its boost score, or 0 when it
32
- * is not boosted (unboosted or a diversity slot). Reads the structural
33
- * `personalization` field (#158) so callers never poke at the old loose
34
- * `boostScore` field.
33
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
34
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
35
+ * preferRepos affinity, +20) can still outweigh it.
36
+ */
37
+ export declare const AVOID_PENALTY = 15;
38
+ /** Per-call personalization bias lists (#168). All optional; empty = no effect. */
39
+ export interface PersonalizationBias {
40
+ preferLanguages?: string[];
41
+ preferRepos?: string[];
42
+ avoidRepos?: string[];
43
+ boostIssueTypes?: string[];
44
+ }
45
+ /**
46
+ * The personalization sort weight of a candidate: its net score, or 0 when it
47
+ * carries no personalization marker. Reads the structural `personalization`
48
+ * field (#158). The score can be negative when avoidRepos applied (#168).
35
49
  */
36
50
  export declare function boostScoreOf(candidate: IssueCandidate): number;
37
51
  /**
38
- * Return a new candidate list where each candidate that matches a
39
- * caller-supplied preference carries `personalization: { kind: "boosted", ... }`.
40
- * Does NOT mutate the input candidates (#158) matched candidates are shallow
41
- * copies with the field set; unmatched candidates are passed through unchanged.
42
- * The caller re-sorts the returned array.
52
+ * Return a new candidate list where each candidate matching a caller-supplied
53
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
54
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
55
+ * be negative (avoid-only) boostScoreOf sorts those below neutral candidates.
56
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
57
+ * unmatched ones pass through unchanged.
43
58
  *
44
- * No-op when both preference lists are empty or undefined: the input array is
45
- * returned as-is and the sort tier collapses to 0 for every candidate.
59
+ * No-op when every bias list is empty/undefined: the input array is returned
60
+ * as-is and the sort tier collapses to 0 for every candidate.
46
61
  */
47
- export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): IssueCandidate[];
62
+ export declare function annotateBoost(candidates: IssueCandidate[], bias?: PersonalizationBias): IssueCandidate[];
48
63
  /**
49
64
  * Apply a diversity-counterweight pass over a pre-sorted candidate list
50
65
  * (#1244). Returns the first `maxResults` picks in priority order:
@@ -26,36 +26,54 @@
26
26
  */
27
27
  export const REPO_BOOST = 20;
28
28
  export const LANGUAGE_BOOST = 10;
29
+ /** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
30
+ export const ISSUE_TYPE_BOOST = 10;
29
31
  /**
30
- * The personalization sort weight of a candidate: its boost score, or 0 when it
31
- * is not boosted (unboosted or a diversity slot). Reads the structural
32
- * `personalization` field (#158) so callers never poke at the old loose
33
- * `boostScore` field.
32
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
33
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
34
+ * preferRepos affinity, +20) can still outweigh it.
35
+ */
36
+ export const AVOID_PENALTY = 15;
37
+ /**
38
+ * The personalization sort weight of a candidate: its net score, or 0 when it
39
+ * carries no personalization marker. Reads the structural `personalization`
40
+ * field (#158). The score can be negative when avoidRepos applied (#168).
34
41
  */
35
42
  export function boostScoreOf(candidate) {
36
43
  return candidate.personalization?.kind === "boosted"
37
44
  ? candidate.personalization.score
38
45
  : 0;
39
46
  }
47
+ function normalizeSet(values) {
48
+ return new Set((values ?? []).map((v) => v.trim().toLowerCase()).filter(Boolean));
49
+ }
40
50
  /**
41
- * Return a new candidate list where each candidate that matches a
42
- * caller-supplied preference carries `personalization: { kind: "boosted", ... }`.
43
- * Does NOT mutate the input candidates (#158) matched candidates are shallow
44
- * copies with the field set; unmatched candidates are passed through unchanged.
45
- * The caller re-sorts the returned array.
51
+ * Return a new candidate list where each candidate matching a caller-supplied
52
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
53
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
54
+ * be negative (avoid-only) boostScoreOf sorts those below neutral candidates.
55
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
56
+ * unmatched ones pass through unchanged.
46
57
  *
47
- * No-op when both preference lists are empty or undefined: the input array is
48
- * returned as-is and the sort tier collapses to 0 for every candidate.
58
+ * No-op when every bias list is empty/undefined: the input array is returned
59
+ * as-is and the sort tier collapses to 0 for every candidate.
49
60
  */
50
- export function annotateBoost(candidates, preferLanguages, preferRepos) {
51
- const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
52
- const repoSet = new Set((preferRepos ?? []).map((r) => r.trim().toLowerCase()).filter(Boolean));
53
- if (langSet.size === 0 && repoSet.size === 0)
61
+ export function annotateBoost(candidates, bias = {}) {
62
+ const langSet = normalizeSet(bias.preferLanguages);
63
+ const repoSet = normalizeSet(bias.preferRepos);
64
+ const avoidSet = normalizeSet(bias.avoidRepos);
65
+ const typeSet = normalizeSet(bias.boostIssueTypes);
66
+ if (langSet.size === 0 &&
67
+ repoSet.size === 0 &&
68
+ avoidSet.size === 0 &&
69
+ typeSet.size === 0) {
54
70
  return candidates;
71
+ }
55
72
  return candidates.map((c) => {
56
73
  let score = 0;
57
74
  const reasons = [];
58
- if (repoSet.size > 0 && repoSet.has(c.issue.repo.toLowerCase())) {
75
+ const repoLower = c.issue.repo.toLowerCase();
76
+ if (repoSet.size > 0 && repoSet.has(repoLower)) {
59
77
  score += REPO_BOOST;
60
78
  reasons.push(`repo affinity: ${c.issue.repo}`);
61
79
  }
@@ -64,7 +82,18 @@ export function annotateBoost(candidates, preferLanguages, preferRepos) {
64
82
  score += LANGUAGE_BOOST;
65
83
  reasons.push(`language match: ${lang}`);
66
84
  }
67
- if (score === 0)
85
+ if (typeSet.size > 0) {
86
+ const matched = c.issue.labels.find((l) => typeSet.has(l.toLowerCase()));
87
+ if (matched) {
88
+ score += ISSUE_TYPE_BOOST;
89
+ reasons.push(`issue type: ${matched}`);
90
+ }
91
+ }
92
+ if (avoidSet.size > 0 && avoidSet.has(repoLower)) {
93
+ score -= AVOID_PENALTY;
94
+ reasons.push(`avoided repo: ${c.issue.repo}`);
95
+ }
96
+ if (reasons.length === 0)
68
97
  return c;
69
98
  return { ...c, personalization: { kind: "boosted", score, reasons } };
70
99
  });
@@ -116,7 +145,10 @@ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
116
145
  break;
117
146
  if (seen.has(c.issue.url))
118
147
  continue;
119
- if (boostScoreOf(c) > 0)
148
+ // Diversity slots are for candidates that matched NO personalization bias.
149
+ // Exclude both boosted (>0) and avoided (<0) candidates — resurfacing an
150
+ // avoided repo via a diversity slot would defeat the avoid (#168).
151
+ if (boostScoreOf(c) !== 0)
120
152
  continue;
121
153
  // Tag a shallow copy rather than mutating the shared candidate (#158).
122
154
  picks.push({ ...c, personalization: { kind: "diversity" } });
@@ -36,6 +36,8 @@ export const FIELD_CONFIGS = {
36
36
  preferLanguages: { type: "array" },
37
37
  preferRepos: { type: "array" },
38
38
  diversityRatio: { type: "float" },
39
+ avoidRepos: { type: "array" },
40
+ boostIssueTypes: { type: "array" },
39
41
  slmTriageModel: { type: "string" },
40
42
  slmTriageHost: { type: "string" },
41
43
  featuresAnchorThreshold: { type: "number" },
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Single-path repo-file probe (#156).
3
+ *
4
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
5
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
6
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
7
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
8
+ * genuinely-shared primitive.
9
+ *
10
+ * The orchestration around it stays per-caller (parallel 4-path probe,
11
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
12
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
13
+ * shared.
14
+ *
15
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
16
+ * file absent) from a degraded miss (5xx, network) so callers can decide
17
+ * whether to cache a negative result or leave it open to retry. Collapsing the
18
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
19
+ * primitive must keep them separate.
20
+ */
21
+ import type { Octokit } from "@octokit/rest";
22
+ /**
23
+ * Result of probing one repo file path.
24
+ *
25
+ * - `text` — decoded UTF-8 content on a 200 with a file payload, else `null`
26
+ * (404, a non-content payload such as a directory listing, or a soft error).
27
+ * - `transient` — `true` only when the miss was a degraded failure (5xx,
28
+ * network) rather than a clean 404 / missing file. A `true` value means the
29
+ * `null` may be incomplete and the caller should avoid caching it as a known
30
+ * absence.
31
+ */
32
+ export interface ProbeRepoFileResult {
33
+ text: string | null;
34
+ transient: boolean;
35
+ }
36
+ /**
37
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
38
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
39
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
40
+ * rate limit) so the caller's existing rate-limit handling sees them.
41
+ *
42
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
43
+ * a faster path may have already resolved) must inspect the rejected reasons
44
+ * themselves; this primitive only rethrows for the single path it owns. See
45
+ * repo-health and anti-llm-policy for that pre-scan.
46
+ */
47
+ export declare function probeRepoFile(octokit: Octokit, owner: string, repo: string, path: string): Promise<ProbeRepoFileResult>;
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Single-path repo-file probe (#156).
3
+ *
4
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
5
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
6
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
7
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
8
+ * genuinely-shared primitive.
9
+ *
10
+ * The orchestration around it stays per-caller (parallel 4-path probe,
11
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
12
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
13
+ * shared.
14
+ *
15
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
16
+ * file absent) from a degraded miss (5xx, network) so callers can decide
17
+ * whether to cache a negative result or leave it open to retry. Collapsing the
18
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
19
+ * primitive must keep them separate.
20
+ */
21
+ import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
22
+ import { warn } from "./logger.js";
23
+ const MODULE = "probe-repo-file";
24
+ /**
25
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
26
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
27
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
28
+ * rate limit) so the caller's existing rate-limit handling sees them.
29
+ *
30
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
31
+ * a faster path may have already resolved) must inspect the rejected reasons
32
+ * themselves; this primitive only rethrows for the single path it owns. See
33
+ * repo-health and anti-llm-policy for that pre-scan.
34
+ */
35
+ export async function probeRepoFile(octokit, owner, repo, path) {
36
+ try {
37
+ const { data } = await octokit.repos.getContent({ owner, repo, path });
38
+ if (data &&
39
+ typeof data === "object" &&
40
+ "content" in data &&
41
+ typeof data.content === "string") {
42
+ return {
43
+ text: Buffer.from(data.content, "base64").toString("utf-8"),
44
+ transient: false,
45
+ };
46
+ }
47
+ return { text: null, transient: false };
48
+ }
49
+ catch (error) {
50
+ const status = getHttpStatusCode(error);
51
+ if (status === 404)
52
+ return { text: null, transient: false };
53
+ rethrowIfFatal(error);
54
+ warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
55
+ return { text: null, transient: true };
56
+ }
57
+ }
@@ -8,6 +8,7 @@ import { daysBetween } from "./utils.js";
8
8
  import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
9
9
  import { warn } from "./logger.js";
10
10
  import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
11
+ import { probeRepoFile } from "./probe-repo-file.js";
11
12
  const MODULE = "repo-health";
12
13
  // ── Cache for contribution guidelines ──
13
14
  const guidelinesCache = new Map();
@@ -121,13 +122,11 @@ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
121
122
  "docs/CONTRIBUTING.md",
122
123
  "contributing.md",
123
124
  ];
124
- // Probe all paths in parallel — take the first success in priority order
125
- const results = await Promise.allSettled(filesToCheck.map((file) => octokit.repos.getContent({ owner, repo, path: file }).then(({ data }) => {
126
- if ("content" in data) {
127
- return Buffer.from(data.content, "base64").toString("utf-8");
128
- }
129
- return null;
130
- })));
125
+ // Probe all paths in parallel — take the first success in priority order.
126
+ // probeRepoFile rethrows 401/rate-limit, so those still surface here as
127
+ // rejected results for the pre-scan below; 404s and 5xx come back as a null
128
+ // text (the primitive warns on 5xx, so no extra warn is needed here).
129
+ const results = await Promise.allSettled(filesToCheck.map((file) => probeRepoFile(octokit, owner, repo, file)));
131
130
  // Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
132
131
  // otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
133
132
  // wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
@@ -139,20 +138,13 @@ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
139
138
  throw result.reason;
140
139
  }
141
140
  }
142
- for (let i = 0; i < results.length; i++) {
143
- const result = results[i];
144
- if (result.status === "fulfilled" && result.value) {
145
- const guidelines = parseContributionGuidelines(result.value);
141
+ for (const result of results) {
142
+ if (result.status === "fulfilled" && result.value.text) {
143
+ const guidelines = parseContributionGuidelines(result.value.text);
146
144
  guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
147
145
  pruneCache();
148
146
  return guidelines;
149
147
  }
150
- if (result.status === "rejected") {
151
- const status = getHttpStatusCode(result.reason);
152
- if (status !== 404) {
153
- warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
154
- }
155
- }
156
148
  }
157
149
  // Cache the negative result too and prune if needed
158
150
  guidelinesCache.set(cacheKey, {
@@ -10,9 +10,7 @@
10
10
  * Auth (401) and rate-limit errors propagate, matching the rest of the
11
11
  * codebase's error strategy. Other errors degrade gracefully (warn + empty).
12
12
  */
13
- import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
14
- import { warn } from "./logger.js";
15
- const MODULE = "roadmap";
13
+ import { probeRepoFile } from "./probe-repo-file.js";
16
14
  /** TTL for roadmap fetch results (1 hour). */
17
15
  const CACHE_TTL_MS = 60 * 60 * 1000;
18
16
  /** Paths probed in priority order. First success wins. */
@@ -113,24 +111,16 @@ export async function fetchRoadmapIssueRefs(octokit, owner, repo) {
113
111
  const roadmapInflight = new Map();
114
112
  async function fetchRoadmapIssueRefsUncached(octokit, owner, repo, cacheKey) {
115
113
  for (const path of ROADMAP_PATHS) {
116
- try {
117
- const { data } = await octokit.repos.getContent({ owner, repo, path });
118
- if (!("content" in data))
119
- continue;
120
- const content = Buffer.from(data.content, "base64").toString("utf-8");
121
- const refs = parseRoadmapIssueRefs(content, owner, repo);
122
- roadmapCache.set(cacheKey, { refs, fetchedAt: Date.now() });
123
- pruneCache();
124
- return refs;
125
- }
126
- catch (err) {
127
- rethrowIfFatal(err);
128
- const status = getHttpStatusCode(err);
129
- if (status === 404)
130
- continue; // path missing — try next
131
- warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(err)}`);
132
- // Fall through and try next path.
133
- }
114
+ // probeRepoFile rethrows 401/rate-limit, treats 404 and non-content
115
+ // payloads as a null text, and warns on 5xx — all of which we degrade past
116
+ // by trying the next path.
117
+ const { text } = await probeRepoFile(octokit, owner, repo, path);
118
+ if (!text)
119
+ continue;
120
+ const refs = parseRoadmapIssueRefs(text, owner, repo);
121
+ roadmapCache.set(cacheKey, { refs, fetchedAt: Date.now() });
122
+ pruneCache();
123
+ return refs;
134
124
  }
135
125
  // No roadmap found (or all probes errored softly). Cache the empty result
136
126
  // so we don't re-probe every run.
@@ -279,6 +279,8 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
279
279
  preferLanguages: z.ZodDefault<z.ZodArray<z.ZodString>>;
280
280
  preferRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
281
281
  diversityRatio: z.ZodDefault<z.ZodNumber>;
282
+ avoidRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
283
+ boostIssueTypes: z.ZodDefault<z.ZodArray<z.ZodString>>;
282
284
  broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
283
285
  /**
284
286
  * Skip the expensive broad phase once this many candidates were found by
@@ -359,6 +361,8 @@ export declare const ScoutStateSchema: z.ZodObject<{
359
361
  preferLanguages: z.ZodDefault<z.ZodArray<z.ZodString>>;
360
362
  preferRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
361
363
  diversityRatio: z.ZodDefault<z.ZodNumber>;
364
+ avoidRepos: z.ZodDefault<z.ZodArray<z.ZodString>>;
365
+ boostIssueTypes: z.ZodDefault<z.ZodArray<z.ZodString>>;
362
366
  broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
363
367
  /**
364
368
  * Skip the expensive broad phase once this many candidates were found by
@@ -42,6 +42,9 @@ export const CONCRETE_STRATEGIES = [
42
42
  // ── Leaf schemas ────────────────────────────────────────────────────
43
43
  export const RepoSignalsSchema = z.looseObject({
44
44
  hasActiveMaintainers: z.boolean(),
45
+ // Retained for backward compatibility but no longer affects the repo score
46
+ // (#167): nothing computes it, and hasActiveMaintainers is the live activity
47
+ // proxy. Kept so old persisted state and the search JSON output still parse.
45
48
  isResponsive: z.boolean(),
46
49
  hasHostileComments: z.boolean(),
47
50
  });
@@ -190,6 +193,12 @@ export const ScoutPreferencesSchema = z.looseObject({
190
193
  preferLanguages: z.array(z.string()).default([]),
191
194
  preferRepos: z.array(z.string()).default([]),
192
195
  diversityRatio: z.number().min(0).max(1).default(0),
196
+ // Soft penalty (milder than the hard excludeRepos filter): candidates in
197
+ // these `owner/repo` slugs are pushed down the ranking but not removed (#168).
198
+ avoidRepos: z.array(z.string()).default([]),
199
+ // Soft boost for candidates whose issue labels match one of these types,
200
+ // case-insensitive (e.g. "bug", "good first issue") (#168).
201
+ boostIssueTypes: z.array(z.string()).default([]),
193
202
  broadPhaseDelayMs: z.number().min(0).max(300000).default(90000),
194
203
  /**
195
204
  * Skip the expensive broad phase once this many candidates were found by
@@ -8,6 +8,7 @@ import { Octokit } from "@octokit/rest";
8
8
  import { type SearchPriority, type IssueCandidate, type IssueScope } from "./types.js";
9
9
  import { type GitHubSearchItem } from "./issue-filtering.js";
10
10
  import { IssueVetter } from "./issue-vetting.js";
11
+ import { type SearchBudgetTracker } from "./search-budget.js";
11
12
  /** Resolve scope tiers into a flat label list, merged with custom labels. */
12
13
  export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
13
14
  /** Round-robin interleave multiple arrays. */
@@ -22,7 +23,7 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
22
23
  sort: "created" | "updated" | "comments" | "reactions" | "interactions";
23
24
  order: "asc" | "desc";
24
25
  per_page: number;
25
- }): Promise<{
26
+ }, tracker?: SearchBudgetTracker): Promise<{
26
27
  total_count: number;
27
28
  items: GitHubSearchItem[];
28
29
  }>;
@@ -60,7 +61,7 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
60
61
  * @param buildQuery Callback that receives a label query string and returns the full search query
61
62
  * @param perPage Number of results per API call
62
63
  */
63
- export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
64
+ export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
64
65
  /**
65
66
  * Build per-call language qualifier strings, fanning out across languages
66
67
  * when a multi-language + labels combination would trip GitHub Search's
@@ -84,7 +85,7 @@ export declare function buildLanguageVariants(languages: string[], isAnyLanguage
84
85
  * e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
85
86
  * @param perPage Results per API call
86
87
  */
87
- export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
88
+ export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
88
89
  /**
89
90
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
90
91
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.
@@ -10,7 +10,7 @@ import { debug, warn } from "./logger.js";
10
10
  import { getHttpCache, versionedCacheKey } from "./http-cache.js";
11
11
  import { detectLabelFarmingRepos, } from "./issue-filtering.js";
12
12
  import { extractRepoFromUrl, sleep } from "./utils.js";
13
- import { getSearchBudgetTracker } from "./search-budget.js";
13
+ import { getSearchBudgetTracker, } from "./search-budget.js";
14
14
  const MODULE = "search-phases";
15
15
  /** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
16
16
  const GITHUB_MAX_BOOLEAN_OPS = 5;
@@ -83,7 +83,11 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
83
83
  * Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
84
84
  * without consuming GitHub API rate limit points.
85
85
  */
86
- export async function cachedSearchIssues(octokit, params) {
86
+ export async function cachedSearchIssues(octokit, params,
87
+ // Optional injected budget tracker. Defaults to the shared singleton so
88
+ // existing callers keep the exact same global budget accounting; a host
89
+ // serving concurrent searches can inject a per-search tracker for isolation.
90
+ tracker = getSearchBudgetTracker()) {
87
91
  const cacheKey = versionedCacheKey(`search:${params.q}:${params.sort}:${params.order}:${params.per_page}`);
88
92
  const cache = getHttpCache();
89
93
  // Check cache first
@@ -93,7 +97,6 @@ export async function cachedSearchIssues(octokit, params) {
93
97
  return cached;
94
98
  }
95
99
  // Fetch from API
96
- const tracker = getSearchBudgetTracker();
97
100
  await tracker.waitForBudget();
98
101
  let data;
99
102
  try {
@@ -273,7 +276,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
273
276
  * @param buildQuery Callback that receives a label query string and returns the full search query
274
277
  * @param perPage Number of results per API call
275
278
  */
276
- export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage) {
279
+ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage, tracker = getSearchBudgetTracker()) {
277
280
  const labelChunks = chunkLabels(labels, reservedOps);
278
281
  const seenUrls = new Set();
279
282
  const allItems = [];
@@ -286,7 +289,7 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
286
289
  sort: "created",
287
290
  order: "desc",
288
291
  per_page: perPage,
289
- });
292
+ }, tracker);
290
293
  for (const item of data.items) {
291
294
  if (!seenUrls.has(item.html_url)) {
292
295
  seenUrls.add(item.html_url);
@@ -327,7 +330,7 @@ export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
327
330
  * e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
328
331
  * @param perPage Results per API call
329
332
  */
330
- export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
333
+ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage, tracker = getSearchBudgetTracker()) {
331
334
  const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
332
335
  const seenUrls = new Set();
333
336
  const allItems = [];
@@ -336,7 +339,7 @@ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLa
336
339
  await sleep(INTER_QUERY_DELAY_MS);
337
340
  const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
338
341
  .replace(/ +/g, " ")
339
- .trim(), perPage);
342
+ .trim(), perPage, tracker);
340
343
  for (const item of items) {
341
344
  if (!seenUrls.has(item.html_url)) {
342
345
  seenUrls.add(item.html_url);
@@ -243,6 +243,21 @@ export interface SearchOptions {
243
243
  * disables the boost.
244
244
  */
245
245
  preferRepos?: string[];
246
+ /**
247
+ * Per-call personalization bias: a SOFT penalty (milder than the hard
248
+ * `excludeRepos` filter) for candidates in one of these `owner/repo` slugs
249
+ * (#168). They are pushed below equally-recommended non-matches but not
250
+ * removed; a strong boost can still outweigh the penalty. Empty / undefined
251
+ * disables it.
252
+ */
253
+ avoidRepos?: string[];
254
+ /**
255
+ * Per-call personalization bias: a soft boost for candidates whose issue
256
+ * labels match one of these types, case-insensitive (e.g. "bug",
257
+ * "good first issue") (#168). Same tier as a language match. Does not filter
258
+ * results, does not change `viabilityScore`. Empty / undefined disables it.
259
+ */
260
+ boostIssueTypes?: string[];
246
261
  /**
247
262
  * Counterweight against echo-chamber bias as `preferLanguages` /
248
263
  * `preferRepos` boosts accumulate over time (#1244). A value of 0.2
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Human-readable (non-JSON) output formatters for the oss-scout CLI.
3
+ *
4
+ * Each renderer is a pure function that returns the exact multi-line string the
5
+ * CLI used to emit via a sequence of `console.log` calls. The caller does a
6
+ * single `console.log(renderX(...))`, which appends the one trailing newline
7
+ * that the final `console.log` in the old inline block produced.
8
+ *
9
+ * To stay byte-identical: every old `console.log(line)` becomes one entry in a
10
+ * lines array, a bare `console.log()` (blank line) becomes an empty entry, and
11
+ * the array is joined with "\n". The caller's own `console.log` supplies the
12
+ * last newline. STDERR output (the search rate-limit warning) is deliberately
13
+ * NOT folded in here — it stays a `console.error` in the caller.
14
+ */
15
+ import type { SearchOutput } from "../commands/search.js";
16
+ import type { FeaturesOutput } from "../commands/features.js";
17
+ import type { SavedCandidate } from "../core/schemas.js";
18
+ import type { VetListResult } from "../core/types.js";
19
+ import type { VetOutput } from "../commands/vet.js";
20
+ /** Emoji for a vetting recommendation, shared by the search and vet renderers. */
21
+ export declare function recommendationIcon(recommendation: "approve" | "skip" | "needs_review"): string;
22
+ /**
23
+ * Render the human-readable `search` output: the "Found N issue candidates"
24
+ * block with per-candidate icon, personalization and stalled tags, and the
25
+ * optional repoScore line. The trailing rate-limit warning is NOT included
26
+ * here; it goes to stderr in the caller.
27
+ */
28
+ export declare function renderSearch(results: SearchOutput): string;
29
+ /**
30
+ * Render the human-readable `features` output: the optional message, the
31
+ * "Feature opportunities" header, the anchor repos line, and the Quick wins /
32
+ * Bigger bets sections. Returns "" when there is nothing to print beyond an
33
+ * absent message (caller guards against logging a blank line).
34
+ */
35
+ export declare function renderFeatures(result: FeaturesOutput, options: {
36
+ broad?: boolean;
37
+ }): string;
38
+ /** The empty-state message printed by `results` when nothing is saved. */
39
+ export declare const RESULTS_EMPTY_MESSAGE = "\nNo saved results. Run `oss-scout search` to find issues.\n";
40
+ /**
41
+ * Render the human-readable `results` table: the "Saved results" header and a
42
+ * Score / Repo / Issue / Recommendation / Title row per saved candidate.
43
+ * Callers handle the empty state (RESULTS_EMPTY_MESSAGE) separately.
44
+ */
45
+ export declare function renderResults(results: SavedCandidate[]): string;
46
+ /** The empty-state message printed by `vet-list` when there is nothing to vet. */
47
+ export declare const VET_LIST_EMPTY_MESSAGE = "\nNo saved results to vet. Run `oss-scout search` first.\n";
48
+ /**
49
+ * Render the human-readable `vet-list` output: the "Vet-list results (N)"
50
+ * block with a per-row status icon, the "Changes since last check"
51
+ * transitions block, the summary line, and the optional pruned-count line.
52
+ * Callers handle the empty state (VET_LIST_EMPTY_MESSAGE) separately.
53
+ */
54
+ export declare function renderVetList(result: VetListResult): string;
55
+ /**
56
+ * Render the human-readable single-issue `vet` output: the recommendation
57
+ * header, the reasons to approve / skip, and the project-health block. The
58
+ * checkFailed branch (#158) is preserved exactly.
59
+ */
60
+ export declare function renderVet(result: VetOutput): string;