@oss-scout/core 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -89,6 +89,8 @@ program
89
89
  .description("Search for contributable issues using multi-strategy discovery")
90
90
  .option("--json", "Output as JSON")
91
91
  .option("--strategy <strategies>", `Search strategies (${CONCRETE_STRATEGIES.join(",")},all)`, "all")
92
+ .option("--prefer-languages <list>", "Comma-separated languages to soft-boost in ranking (#1244). Candidates whose repo language matches sort above equally-recommended non-matches. Does not filter results.")
93
+ .option("--prefer-repos <list>", "Comma-separated `owner/repo` slugs to soft-boost in ranking (#1244). Stronger weight than language match. Does not filter results.")
92
94
  .action(async (count, options) => {
93
95
  try {
94
96
  if (!hasLocalState()) {
@@ -124,7 +126,22 @@ program
124
126
  }
125
127
  strategies.push(parsed.data);
126
128
  }
127
- const results = await runSearch({ maxResults, state, strategies });
129
+ const splitCsv = (raw) => {
130
+ if (!raw)
131
+ return undefined;
132
+ const parts = raw
133
+ .split(",")
134
+ .map((s) => s.trim())
135
+ .filter(Boolean);
136
+ return parts.length > 0 ? parts : undefined;
137
+ };
138
+ const results = await runSearch({
139
+ maxResults,
140
+ state,
141
+ strategies,
142
+ preferLanguages: splitCsv(options.preferLanguages),
143
+ preferRepos: splitCsv(options.preferRepos),
144
+ });
128
145
  if (options.json) {
129
146
  console.log(formatJsonSuccess(results));
130
147
  }
@@ -37,6 +37,14 @@ export interface SearchOutput {
37
37
  updatedAt?: string;
38
38
  isStalled: boolean;
39
39
  };
40
+ /**
41
+ * Personalization sort-tier signal (#1244). Present only when the
42
+ * caller passed `preferLanguages` / `preferRepos` *and* this
43
+ * candidate matched at least one of them. `boostReasons` is the
44
+ * human-readable explanation (e.g. `"repo affinity: vercel/next.js"`).
45
+ */
46
+ boostScore?: number;
47
+ boostReasons?: string[];
40
48
  }>;
41
49
  excludedRepos: string[];
42
50
  aiPolicyBlocklist: string[];
@@ -47,6 +55,10 @@ interface SearchCommandOptions {
47
55
  maxResults: number;
48
56
  state?: ScoutState;
49
57
  strategies?: SearchStrategy[];
58
+ /** Soft sort boost for candidates whose repo language matches (#1244). */
59
+ preferLanguages?: string[];
60
+ /** Soft sort boost for candidates in these `owner/repo` slugs (#1244). */
61
+ preferRepos?: string[];
50
62
  }
51
63
  export declare function runSearch(options: SearchCommandOptions): Promise<SearchOutput>;
52
64
  export {};
@@ -17,6 +17,8 @@ export async function runSearch(options) {
17
17
  const result = await scout.search({
18
18
  maxResults: options.maxResults,
19
19
  strategies: options.strategies,
20
+ preferLanguages: options.preferLanguages,
21
+ preferRepos: options.preferRepos,
20
22
  });
21
23
  // Persist results to local state and gist
22
24
  scout.saveResults(result.candidates);
@@ -60,6 +62,8 @@ export async function runSearch(options) {
60
62
  isStalled: isLinkedPRStalled(c.vettingResult.linkedPR),
61
63
  }
62
64
  : undefined,
65
+ boostScore: c.boostScore,
66
+ boostReasons: c.boostReasons,
63
67
  };
64
68
  }),
65
69
  excludedRepos: result.excludedRepos,
@@ -74,6 +74,8 @@ export declare class IssueDiscovery {
74
74
  maxResults?: number;
75
75
  strategies?: SearchStrategy[];
76
76
  skippedUrls?: Set<string>;
77
+ preferLanguages?: string[];
78
+ preferRepos?: string[];
77
79
  }): Promise<{
78
80
  candidates: IssueCandidate[];
79
81
  strategiesUsed: SearchStrategy[];
@@ -21,7 +21,8 @@ import { debug, info, warn } from "./logger.js";
21
21
  import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
22
22
  import { IssueVetter } from "./issue-vetting.js";
23
23
  import { getTopicsForCategories } from "./category-mapping.js";
24
- import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchWithChunkedLabels, } from "./search-phases.js";
24
+ import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
25
+ import { annotateBoost } from "./personalization.js";
25
26
  const MODULE = "issue-discovery";
26
27
  /** If remaining search quota is below this, skip heavy phases (2, 3). */
27
28
  const LOW_BUDGET_THRESHOLD = 20;
@@ -83,7 +84,7 @@ async function runPhase1(octokit, vetter, repos, labels, maxResults, filterIssue
83
84
  };
84
85
  }
85
86
  /** Phase 2: General label-filtered search with multi-tier interleaving. */
86
- async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQualifiers, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
87
+ async function runPhase2(octokit, vetter, scopes, labels, configLabels, languages, isAnyLanguage, maxResults, minStars, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
87
88
  info(MODULE, "Phase 2: General issue search...");
88
89
  const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
89
90
  // Build per-tier label groups. Multi-tier when 2+ scopes; single-tier otherwise.
@@ -112,7 +113,7 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQual
112
113
  let rateLimitHit = false;
113
114
  for (const { tier, tierLabels } of tierLabelGroups) {
114
115
  try {
115
- const allItems = await searchWithChunkedLabels(octokit, tierLabels, 0, (labelQ) => `${baseQualifiers} ${labelQ}`.replace(/ +/g, " ").trim(), budgetPerTier * 3);
116
+ const allItems = await searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, tierLabels, (langQ) => `is:issue is:open ${langQ} no:assignee`.replace(/ +/g, " ").trim(), budgetPerTier * 3);
116
117
  info(MODULE, `Phase 2 [${tier}]: processing ${allItems.length} items...`);
117
118
  const { candidates: tierCandidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, allItems, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], budgetPerTier, minStars, `Phase 2 [${tier}]`);
118
119
  tierResults.push(tierCandidates);
@@ -337,9 +338,6 @@ export class IssueDiscovery {
337
338
  const langQuery = isAnyLanguage
338
339
  ? ""
339
340
  : languages.map((l) => `language:${l}`).join(" ");
340
- const baseQualifiers = `is:issue is:open ${langQuery} no:assignee`
341
- .replace(/ +/g, " ")
342
- .trim();
343
341
  // Build reusable filter
344
342
  const aiBlocklisted = new Set(config.aiPolicyBlocklist);
345
343
  if (aiBlocklisted.size > 0) {
@@ -427,7 +425,7 @@ export class IssueDiscovery {
427
425
  info(MODULE, `Skipping broad phase delay: no results from previous phases, proceeding immediately`);
428
426
  }
429
427
  const remaining = maxResults - allCandidates.length;
430
- const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, baseQualifiers, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
428
+ const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, languages, isAnyLanguage, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
431
429
  allCandidates.push(...result.candidates);
432
430
  phaseErrors["2"] = result.error;
433
431
  if (result.rateLimitHit)
@@ -489,7 +487,11 @@ export class IssueDiscovery {
489
487
  `Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
490
488
  `Try again after the rate limit resets for complete results.`;
491
489
  }
492
- // Sort by priority, recommendation, then viability score
490
+ // Personalization annotation (#1244): tag each candidate with
491
+ // boostScore + boostReasons before sorting so the new sort tier has
492
+ // values to read. No-op when neither preference list is supplied.
493
+ annotateBoost(allCandidates, options.preferLanguages, options.preferRepos);
494
+ // Sort by priority, recommendation, boost (#1244), then viability score
493
495
  allCandidates.sort((a, b) => {
494
496
  const priorityOrder = {
495
497
  merged_pr: 0,
@@ -504,6 +506,14 @@ export class IssueDiscovery {
504
506
  recommendationOrder[b.recommendation];
505
507
  if (recDiff !== 0)
506
508
  return recDiff;
509
+ // Personalization tier (#1244): higher boostScore wins. Treats
510
+ // undefined as 0 so unboosted candidates rank below boosted peers
511
+ // but stay ordered among themselves by viabilityScore. No-op when
512
+ // `preferLanguages`/`preferRepos` are absent — all candidates carry
513
+ // `boostScore: undefined` and the difference collapses to 0.
514
+ const boostDiff = (b.boostScore ?? 0) - (a.boostScore ?? 0);
515
+ if (boostDiff !== 0)
516
+ return boostDiff;
507
517
  return b.viabilityScore - a.viabilityScore;
508
518
  });
509
519
  const capped = applyPerRepoCap(allCandidates, 2);
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Personalization signals for search ranking (#1244).
3
+ *
4
+ * Translates caller-supplied `preferLanguages` / `preferRepos` lists
5
+ * into a soft `boostScore` on each `IssueCandidate`. The final search
6
+ * sort consults this score between the `recommendation` tier and the
7
+ * raw `viabilityScore`, so personalization reorders ties without
8
+ * changing which candidates pass vetting.
9
+ *
10
+ * This is the minimum-viable subset of Option A in #1244: only language
11
+ * and repo bias, no `boostIssueTypes` / `avoidRepos` / `diversityRatio`
12
+ * yet. Those follow up in separate PRs.
13
+ */
14
+ import type { IssueCandidate } from "./types.js";
15
+ /**
16
+ * Boost weights. Tuned conservatively so personalization tips equally-
17
+ * scored candidates without drowning out high-viability normal results.
18
+ *
19
+ * Rationale:
20
+ * - Repo affinity is the strongest signal — a candidate in a repo the
21
+ * user has merged PRs into has real relationship context. Worth the
22
+ * higher boost.
23
+ * - Language match is broad and easy to satisfy. Lower weight.
24
+ */
25
+ export declare const REPO_BOOST = 20;
26
+ export declare const LANGUAGE_BOOST = 10;
27
+ /**
28
+ * Annotate each candidate with `boostScore` and `boostReasons` based on
29
+ * the caller-supplied preference lists. Mutates the array in place; the
30
+ * caller is responsible for re-sorting afterwards.
31
+ *
32
+ * Mutation (rather than returning new objects) keeps the personalization
33
+ * step a single linear pass over the array the caller already holds —
34
+ * the sort step reads back from the same objects.
35
+ *
36
+ * No-op when both preference lists are empty or undefined: candidates
37
+ * retain `boostScore: undefined` and the sort tier collapses to 0.
38
+ */
39
+ export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): void;
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Personalization signals for search ranking (#1244).
3
+ *
4
+ * Translates caller-supplied `preferLanguages` / `preferRepos` lists
5
+ * into a soft `boostScore` on each `IssueCandidate`. The final search
6
+ * sort consults this score between the `recommendation` tier and the
7
+ * raw `viabilityScore`, so personalization reorders ties without
8
+ * changing which candidates pass vetting.
9
+ *
10
+ * This is the minimum-viable subset of Option A in #1244: only language
11
+ * and repo bias, no `boostIssueTypes` / `avoidRepos` / `diversityRatio`
12
+ * yet. Those follow up in separate PRs.
13
+ */
14
+ /**
15
+ * Boost weights. Tuned conservatively so personalization tips equally-
16
+ * scored candidates without drowning out high-viability normal results.
17
+ *
18
+ * Rationale:
19
+ * - Repo affinity is the strongest signal — a candidate in a repo the
20
+ * user has merged PRs into has real relationship context. Worth the
21
+ * higher boost.
22
+ * - Language match is broad and easy to satisfy. Lower weight.
23
+ */
24
+ export const REPO_BOOST = 20;
25
+ export const LANGUAGE_BOOST = 10;
26
+ /**
27
+ * Annotate each candidate with `boostScore` and `boostReasons` based on
28
+ * the caller-supplied preference lists. Mutates the array in place; the
29
+ * caller is responsible for re-sorting afterwards.
30
+ *
31
+ * Mutation (rather than returning new objects) keeps the personalization
32
+ * step a single linear pass over the array the caller already holds —
33
+ * the sort step reads back from the same objects.
34
+ *
35
+ * No-op when both preference lists are empty or undefined: candidates
36
+ * retain `boostScore: undefined` and the sort tier collapses to 0.
37
+ */
38
+ export function annotateBoost(candidates, preferLanguages, preferRepos) {
39
+ const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
40
+ const repoSet = new Set((preferRepos ?? []).map((r) => r.trim()).filter(Boolean));
41
+ if (langSet.size === 0 && repoSet.size === 0)
42
+ return;
43
+ for (const c of candidates) {
44
+ let score = 0;
45
+ const reasons = [];
46
+ if (repoSet.size > 0 && repoSet.has(c.issue.repo)) {
47
+ score += REPO_BOOST;
48
+ reasons.push(`repo affinity: ${c.issue.repo}`);
49
+ }
50
+ const lang = c.projectHealth.language;
51
+ if (langSet.size > 0 && lang && langSet.has(lang.toLowerCase())) {
52
+ score += LANGUAGE_BOOST;
53
+ reasons.push(`language match: ${lang}`);
54
+ }
55
+ if (score > 0) {
56
+ c.boostScore = score;
57
+ c.boostReasons = reasons;
58
+ }
59
+ }
60
+ }
@@ -61,6 +61,30 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
61
61
  * @param perPage Number of results per API call
62
62
  */
63
63
  export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
64
+ /**
65
+ * Build per-call language qualifier strings, fanning out across languages
66
+ * when a multi-language + labels combination would trip GitHub Search's
67
+ * empty-result edge case (multi-`language:` AND with a label OR-group
68
+ * silently returns 0 — see https://github.com/costajohnt/oss-autopilot/issues/1331).
69
+ */
70
+ export declare function buildLanguageVariants(languages: string[], isAnyLanguage: boolean, hasLabels: boolean): string[];
71
+ /**
72
+ * Search across languages with label chunking, deduplicating results.
73
+ *
74
+ * Fans out one query per language when 2+ languages are paired with labels
75
+ * (works around a GitHub Search backend edge case where the multi-language
76
+ * AND combined with a label OR-group returns 0). For each language variant,
77
+ * delegates to searchWithChunkedLabels to keep within GitHub's 5-operator limit.
78
+ *
79
+ * @param octokit Authenticated Octokit instance
80
+ * @param languages Configured languages (used as `language:X` qualifiers)
81
+ * @param isAnyLanguage When true, skip language qualifiers entirely
82
+ * @param labels Label list passed to searchWithChunkedLabels
83
+ * @param buildBaseQuery Builds the query prefix from a language qualifier string;
84
+ * e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
85
+ * @param perPage Results per API call
86
+ */
87
+ export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
64
88
  /**
65
89
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
66
90
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.
@@ -291,6 +291,56 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
291
291
  }
292
292
  return allItems;
293
293
  }
294
+ /**
295
+ * Build per-call language qualifier strings, fanning out across languages
296
+ * when a multi-language + labels combination would trip GitHub Search's
297
+ * empty-result edge case (multi-`language:` AND with a label OR-group
298
+ * silently returns 0 — see https://github.com/costajohnt/oss-autopilot/issues/1331).
299
+ */
300
+ export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
301
+ if (isAnyLanguage || languages.length === 0)
302
+ return [""];
303
+ if (languages.length === 1)
304
+ return [`language:${languages[0]}`];
305
+ if (!hasLabels)
306
+ return [languages.map((l) => `language:${l}`).join(" ")];
307
+ return languages.map((l) => `language:${l}`);
308
+ }
309
+ /**
310
+ * Search across languages with label chunking, deduplicating results.
311
+ *
312
+ * Fans out one query per language when 2+ languages are paired with labels
313
+ * (works around a GitHub Search backend edge case where the multi-language
314
+ * AND combined with a label OR-group returns 0). For each language variant,
315
+ * delegates to searchWithChunkedLabels to keep within GitHub's 5-operator limit.
316
+ *
317
+ * @param octokit Authenticated Octokit instance
318
+ * @param languages Configured languages (used as `language:X` qualifiers)
319
+ * @param isAnyLanguage When true, skip language qualifiers entirely
320
+ * @param labels Label list passed to searchWithChunkedLabels
321
+ * @param buildBaseQuery Builds the query prefix from a language qualifier string;
322
+ * e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
323
+ * @param perPage Results per API call
324
+ */
325
+ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
326
+ const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
327
+ const seenUrls = new Set();
328
+ const allItems = [];
329
+ for (let i = 0; i < langVariants.length; i++) {
330
+ if (i > 0)
331
+ await sleep(INTER_QUERY_DELAY_MS);
332
+ const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
333
+ .replace(/ +/g, " ")
334
+ .trim(), perPage);
335
+ for (const item of items) {
336
+ if (!seenUrls.has(item.html_url)) {
337
+ seenUrls.add(item.html_url);
338
+ allItems.push(item);
339
+ }
340
+ }
341
+ }
342
+ return allItems;
343
+ }
294
344
  /**
295
345
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
296
346
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.
@@ -53,6 +53,19 @@ export interface IssueCandidate {
53
53
  reasonsToApprove: string[];
54
54
  viabilityScore: number;
55
55
  searchPriority: SearchPriority;
56
+ /**
57
+ * Personalization sort tier (#1244). Populated only when the caller
58
+ * passes `preferLanguages` / `preferRepos` to `search()` *and* the
59
+ * candidate matches at least one. Affects sort order between the
60
+ * `recommendation` tier and `viabilityScore`; never used as a filter.
61
+ */
62
+ boostScore?: number;
63
+ /**
64
+ * Human-readable reasons the candidate matched personalization bias
65
+ * (#1244). Mirrors `reasonsToApprove`/`reasonsToSkip` shape for
66
+ * symmetry with the existing surface.
67
+ */
68
+ boostReasons?: string[];
56
69
  }
57
70
  /** Subset of RepoScore fields that callers may update. */
58
71
  export interface RepoScoreUpdate {
@@ -122,6 +135,21 @@ export type ScoutConfig = {
122
135
  export interface SearchOptions {
123
136
  maxResults?: number;
124
137
  strategies?: SearchStrategy[];
138
+ /**
139
+ * Per-call personalization bias: candidates whose repo language matches
140
+ * one of these (case-insensitive) get a soft sort boost above
141
+ * equally-recommended non-matches (#1244). Does not filter results, does
142
+ * not change `viabilityScore`. Empty / undefined disables the boost.
143
+ */
144
+ preferLanguages?: string[];
145
+ /**
146
+ * Per-call personalization bias: candidates in one of these
147
+ * `owner/repo` slugs get a soft sort boost above equally-recommended
148
+ * non-matches (#1244). Stronger weight than language match. Does not
149
+ * filter results, does not change `viabilityScore`. Empty / undefined
150
+ * disables the boost.
151
+ */
152
+ preferRepos?: string[];
125
153
  }
126
154
  /** Result of a search operation. */
127
155
  export interface SearchResult {
package/dist/scout.js CHANGED
@@ -148,6 +148,8 @@ export class OssScout {
148
148
  maxResults: options?.maxResults,
149
149
  strategies: options?.strategies,
150
150
  skippedUrls,
151
+ preferLanguages: options?.preferLanguages,
152
+ preferRepos: options?.preferRepos,
151
153
  });
152
154
  this.state.lastSearchAt = new Date().toISOString();
153
155
  this.dirty = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oss-scout/core",
3
- "version": "0.9.0",
3
+ "version": "0.10.0",
4
4
  "description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
5
5
  "type": "module",
6
6
  "bin": {