@oss-scout/core 0.11.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/cli.bundle.cjs +89 -66
  2. package/dist/cli.js +302 -436
  3. package/dist/commands/command-scout.d.ts +21 -0
  4. package/dist/commands/command-scout.js +21 -0
  5. package/dist/commands/config.js +10 -128
  6. package/dist/commands/features.js +15 -28
  7. package/dist/commands/results.d.ts +13 -2
  8. package/dist/commands/results.js +29 -2
  9. package/dist/commands/search.d.ts +4 -0
  10. package/dist/commands/search.js +65 -70
  11. package/dist/commands/setup.d.ts +2 -0
  12. package/dist/commands/setup.js +35 -6
  13. package/dist/commands/skip.d.ts +4 -0
  14. package/dist/commands/skip.js +45 -55
  15. package/dist/commands/sync.d.ts +10 -0
  16. package/dist/commands/sync.js +10 -0
  17. package/dist/commands/vet-list.js +3 -19
  18. package/dist/commands/vet.js +18 -25
  19. package/dist/commands/with-scout.d.ts +32 -0
  20. package/dist/commands/with-scout.js +41 -0
  21. package/dist/core/anti-llm-policy.js +5 -33
  22. package/dist/core/bootstrap.d.ts +2 -2
  23. package/dist/core/bootstrap.js +5 -9
  24. package/dist/core/errors.d.ts +10 -0
  25. package/dist/core/errors.js +20 -5
  26. package/dist/core/feature-discovery.d.ts +13 -1
  27. package/dist/core/feature-discovery.js +104 -81
  28. package/dist/core/gist-state-store.d.ts +13 -12
  29. package/dist/core/gist-state-store.js +128 -53
  30. package/dist/core/http-cache.d.ts +32 -2
  31. package/dist/core/http-cache.js +74 -19
  32. package/dist/core/issue-discovery.d.ts +12 -1
  33. package/dist/core/issue-discovery.js +94 -67
  34. package/dist/core/issue-eligibility.d.ts +11 -4
  35. package/dist/core/issue-eligibility.js +124 -69
  36. package/dist/core/issue-graphql.d.ts +58 -0
  37. package/dist/core/issue-graphql.js +108 -0
  38. package/dist/core/issue-vetting.d.ts +115 -9
  39. package/dist/core/issue-vetting.js +246 -109
  40. package/dist/core/local-state.d.ts +6 -2
  41. package/dist/core/local-state.js +23 -5
  42. package/dist/core/logger.d.ts +12 -4
  43. package/dist/core/logger.js +33 -7
  44. package/dist/core/personalization.d.ts +30 -10
  45. package/dist/core/personalization.js +64 -24
  46. package/dist/core/preference-fields.d.ts +47 -0
  47. package/dist/core/preference-fields.js +180 -0
  48. package/dist/core/probe-repo-file.d.ts +47 -0
  49. package/dist/core/probe-repo-file.js +57 -0
  50. package/dist/core/repo-health.js +40 -32
  51. package/dist/core/roadmap.js +26 -22
  52. package/dist/core/schemas.d.ts +148 -26
  53. package/dist/core/schemas.js +83 -17
  54. package/dist/core/search-budget.d.ts +9 -0
  55. package/dist/core/search-budget.js +36 -3
  56. package/dist/core/search-phases.d.ts +4 -21
  57. package/dist/core/search-phases.js +37 -89
  58. package/dist/core/types.d.ts +151 -38
  59. package/dist/core/utils.js +60 -26
  60. package/dist/formatters/human.d.ts +60 -0
  61. package/dist/formatters/human.js +199 -0
  62. package/dist/formatters/markdown.d.ts +10 -0
  63. package/dist/formatters/markdown.js +31 -0
  64. package/dist/index.d.ts +6 -2
  65. package/dist/index.js +8 -0
  66. package/dist/scout.d.ts +75 -12
  67. package/dist/scout.js +265 -26
  68. package/package.json +1 -1
@@ -8,6 +8,7 @@ import { Octokit } from "@octokit/rest";
8
8
  import { type SearchPriority, type IssueCandidate, type IssueScope } from "./types.js";
9
9
  import { type GitHubSearchItem } from "./issue-filtering.js";
10
10
  import { IssueVetter } from "./issue-vetting.js";
11
+ import { type SearchBudgetTracker } from "./search-budget.js";
11
12
  /** Resolve scope tiers into a flat label list, merged with custom labels. */
12
13
  export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
13
14
  /** Round-robin interleave multiple arrays. */
@@ -22,7 +23,7 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
22
23
  sort: "created" | "updated" | "comments" | "reactions" | "interactions";
23
24
  order: "asc" | "desc";
24
25
  per_page: number;
25
- }): Promise<{
26
+ }, tracker?: SearchBudgetTracker): Promise<{
26
27
  total_count: number;
27
28
  items: GitHubSearchItem[];
28
29
  }>;
@@ -60,7 +61,7 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
60
61
  * @param buildQuery Callback that receives a label query string and returns the full search query
61
62
  * @param perPage Number of results per API call
62
63
  */
63
- export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
64
+ export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
64
65
  /**
65
66
  * Build per-call language qualifier strings, fanning out across languages
66
67
  * when a multi-language + labels combination would trip GitHub Search's
@@ -84,7 +85,7 @@ export declare function buildLanguageVariants(languages: string[], isAnyLanguage
84
85
  * e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
85
86
  * @param perPage Results per API call
86
87
  */
87
- export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
88
+ export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
88
89
  /**
89
90
  * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
90
91
  * Used by Phases 2 and 3 to convert raw search results into vetted candidates.
@@ -94,21 +95,3 @@ export declare function filterVetAndScore(vetter: IssueVetter, items: GitHubSear
94
95
  allVetFailed: boolean;
95
96
  rateLimitHit: boolean;
96
97
  }>;
97
- /**
98
- * Search for issues within specific repos using batched queries.
99
- *
100
- * To avoid GitHub's secondary rate limit (30 requests/minute), we batch
101
- * multiple repos into a single search query using OR syntax:
102
- * repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
103
- *
104
- * Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
105
- * Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
106
- * budget is used for label OR operators.
107
- *
108
- * This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
109
- */
110
- export declare function searchInRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], baseQualifiers: string, labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
111
- candidates: IssueCandidate[];
112
- allBatchesFailed: boolean;
113
- rateLimitHit: boolean;
114
- }>;
@@ -7,10 +7,10 @@
7
7
  import { SCOPE_LABELS, } from "./types.js";
8
8
  import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
9
9
  import { debug, warn } from "./logger.js";
10
- import { getHttpCache } from "./http-cache.js";
10
+ import { getHttpCache, versionedCacheKey } from "./http-cache.js";
11
11
  import { detectLabelFarmingRepos, } from "./issue-filtering.js";
12
12
  import { extractRepoFromUrl, sleep } from "./utils.js";
13
- import { getSearchBudgetTracker } from "./search-budget.js";
13
+ import { getSearchBudgetTracker, } from "./search-budget.js";
14
14
  const MODULE = "search-phases";
15
15
  /** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
16
16
  const GITHUB_MAX_BOOLEAN_OPS = 5;
@@ -18,8 +18,6 @@ const GITHUB_MAX_BOOLEAN_OPS = 5;
18
18
  * Set to 2000ms as a safety floor (max 30/min at the limit). The SearchBudgetTracker
19
19
  * adds additional adaptive delays when needed. */
20
20
  const INTER_QUERY_DELAY_MS = 2000;
21
- /** Batch size for repo queries. 3 repos = 2 OR operators, leaving room for labels. */
22
- const BATCH_SIZE = 3;
23
21
  /**
24
22
  * Chunk labels into groups that fit within the operator budget.
25
23
  * N labels require N-1 OR operators, so maxPerChunk = budget + 1.
@@ -77,14 +75,6 @@ export function interleaveArrays(arrays) {
77
75
  }
78
76
  return result;
79
77
  }
80
- /** Split repos into batches of the specified size. */
81
- function batchRepos(repos, batchSize) {
82
- const batches = [];
83
- for (let i = 0; i < repos.length; i += batchSize) {
84
- batches.push(repos.slice(i, i + batchSize));
85
- }
86
- return batches;
87
- }
88
78
  // ── Search caching ──
89
79
  /** TTL for cached search API results (15 minutes). */
90
80
  const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
@@ -93,8 +83,12 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
93
83
  * Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
94
84
  * without consuming GitHub API rate limit points.
95
85
  */
96
- export async function cachedSearchIssues(octokit, params) {
97
- const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
86
+ export async function cachedSearchIssues(octokit, params,
87
+ // Optional injected budget tracker. Defaults to the shared singleton so
88
+ // existing callers keep the exact same global budget accounting; a host
89
+ // serving concurrent searches can inject a per-search tracker for isolation.
90
+ tracker = getSearchBudgetTracker()) {
91
+ const cacheKey = versionedCacheKey(`search:${params.q}:${params.sort}:${params.order}:${params.per_page}`);
98
92
  const cache = getHttpCache();
99
93
  // Check cache first
100
94
  const cached = cache.getIfFresh(cacheKey, SEARCH_CACHE_TTL_MS);
@@ -103,7 +97,6 @@ export async function cachedSearchIssues(octokit, params) {
103
97
  return cached;
104
98
  }
105
99
  // Fetch from API
106
- const tracker = getSearchBudgetTracker();
107
100
  await tracker.waitForBudget();
108
101
  let data;
109
102
  try {
@@ -206,17 +199,32 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
206
199
  const repoFullName = repos[i];
207
200
  const [owner, repo] = repoFullName.split("/");
208
201
  try {
209
- const response = await octokit.issues.listForRepo({
210
- owner,
211
- repo,
212
- state: "open",
213
- sort: "created",
214
- direction: "desc",
215
- per_page: 5,
216
- ...(labels.length > 0 ? { labels: labels.join(",") } : {}),
217
- });
202
+ // One query per label: the REST `labels` parameter is AND semantics
203
+ // (issues carrying ALL listed labels), so a comma-joined list like
204
+ // "good first issue,help wanted" returned ~nothing (#118). Querying
205
+ // per label and merging restores the intended any-of behavior.
206
+ const labelFilters = labels.length > 0 ? labels : [undefined];
207
+ const seenUrls = new Set();
208
+ const rawIssues = [];
209
+ for (const label of labelFilters) {
210
+ const response = await octokit.issues.listForRepo({
211
+ owner,
212
+ repo,
213
+ state: "open",
214
+ sort: "created",
215
+ direction: "desc",
216
+ per_page: 5,
217
+ ...(label !== undefined ? { labels: label } : {}),
218
+ });
219
+ for (const issue of response.data) {
220
+ if (seenUrls.has(issue.html_url))
221
+ continue;
222
+ seenUrls.add(issue.html_url);
223
+ rawIssues.push(issue);
224
+ }
225
+ }
218
226
  // Filter out pull requests (REST issues endpoint returns both) and assigned issues
219
- const issuesOnly = response.data.filter((item) => !("pull_request" in item) && !item.assignee);
227
+ const issuesOnly = rawIssues.filter((item) => !("pull_request" in item) && !item.assignee);
220
228
  const mapped = issuesOnly.map((issue) => ({
221
229
  html_url: issue.html_url,
222
230
  repository_url: `https://api.github.com/repos/${repoFullName}`,
@@ -268,7 +276,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
268
276
  * @param buildQuery Callback that receives a label query string and returns the full search query
269
277
  * @param perPage Number of results per API call
270
278
  */
271
- export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage) {
279
+ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage, tracker = getSearchBudgetTracker()) {
272
280
  const labelChunks = chunkLabels(labels, reservedOps);
273
281
  const seenUrls = new Set();
274
282
  const allItems = [];
@@ -281,7 +289,7 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
281
289
  sort: "created",
282
290
  order: "desc",
283
291
  per_page: perPage,
284
- });
292
+ }, tracker);
285
293
  for (const item of data.items) {
286
294
  if (!seenUrls.has(item.html_url)) {
287
295
  seenUrls.add(item.html_url);
@@ -322,7 +330,7 @@ export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
322
330
  * e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
323
331
  * @param perPage Results per API call
324
332
  */
325
- export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
333
+ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage, tracker = getSearchBudgetTracker()) {
326
334
  const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
327
335
  const seenUrls = new Set();
328
336
  const allItems = [];
@@ -331,7 +339,7 @@ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLa
331
339
  await sleep(INTER_QUERY_DELAY_MS);
332
340
  const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
333
341
  .replace(/ +/g, " ")
334
- .trim(), perPage);
342
+ .trim(), perPage, tracker);
335
343
  for (const item of items) {
336
344
  if (!seenUrls.has(item.html_url)) {
337
345
  seenUrls.add(item.html_url);
@@ -378,63 +386,3 @@ export async function filterVetAndScore(vetter, items, filterIssues, excludedRep
378
386
  }
379
387
  return { candidates: starFiltered, allVetFailed, rateLimitHit };
380
388
  }
381
- /**
382
- * Search for issues within specific repos using batched queries.
383
- *
384
- * To avoid GitHub's secondary rate limit (30 requests/minute), we batch
385
- * multiple repos into a single search query using OR syntax:
386
- * repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
387
- *
388
- * Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
389
- * Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
390
- * budget is used for label OR operators.
391
- *
392
- * This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
393
- */
394
- export async function searchInRepos(octokit, vetter, repos, baseQualifiers, labels, maxResults, priority, filterFn) {
395
- const candidates = [];
396
- const batches = batchRepos(repos, BATCH_SIZE);
397
- let failedBatches = 0;
398
- let rateLimitFailures = 0;
399
- for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
400
- const batch = batches[batchIdx];
401
- if (candidates.length >= maxResults)
402
- break;
403
- // Delay between batches to avoid secondary rate limits
404
- if (batchIdx > 0)
405
- await sleep(INTER_QUERY_DELAY_MS);
406
- try {
407
- const repoFilter = batch.map((r) => `repo:${r}`).join(" OR ");
408
- const repoOps = batch.length - 1;
409
- const perPage = Math.min(30, (maxResults - candidates.length) * 3);
410
- const allItems = await searchWithChunkedLabels(octokit, labels, repoOps, (labelQ) => `${baseQualifiers} ${labelQ} (${repoFilter})`
411
- .replace(/ +/g, " ")
412
- .trim(), perPage);
413
- if (allItems.length > 0) {
414
- const filtered = filterFn(allItems);
415
- const remainingNeeded = maxResults - candidates.length;
416
- const { candidates: vetted, rateLimitHit: vetRateLimitHit } = await vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, priority);
417
- candidates.push(...vetted);
418
- if (vetRateLimitHit)
419
- rateLimitFailures++;
420
- }
421
- }
422
- catch (error) {
423
- if (getHttpStatusCode(error) === 401)
424
- throw error;
425
- failedBatches++;
426
- if (isRateLimitError(error)) {
427
- rateLimitFailures++;
428
- }
429
- const batchReposStr = batch.join(", ");
430
- warn(MODULE, `Error searching issues in batch [${batchReposStr}]:`, errorMessage(error));
431
- }
432
- }
433
- const allBatchesFailed = failedBatches === batches.length && batches.length > 0;
434
- const rateLimitHit = rateLimitFailures > 0;
435
- if (allBatchesFailed) {
436
- warn(MODULE, `All ${batches.length} batch(es) failed for ${priority} phase. ` +
437
- `This may indicate a systemic issue (rate limit, auth, network).`);
438
- }
439
- return { candidates, allBatchesFailed, rateLimitHit };
440
- }
@@ -2,9 +2,10 @@
2
2
  * Core types for oss-scout — ephemeral types that are never persisted.
3
3
  */
4
4
  import type { RepoSignals, TrackedIssue, IssueVettingResult, IssueScope, ScoutState, SearchStrategy } from "./schemas.js";
5
+ import type { LogLevel } from "./logger.js";
5
6
  export type { ProjectCategory, IssueScope, RepoSignals, RepoScore, StoredMergedPR, StoredClosedPR, ContributionGuidelines, IssueVettingResult, LinkedPR, TrackedIssue, ScoutPreferences, SavedCandidate, ScoutState, SearchStrategy, } from "./schemas.js";
6
- /** Health snapshot of a GitHub repository. */
7
- export interface ProjectHealth {
7
+ /** A successful health snapshot of a GitHub repository. */
8
+ export interface ProjectHealthData {
8
9
  repo: string;
9
10
  lastCommitAt: string;
10
11
  daysSinceLastCommit: number;
@@ -15,9 +16,26 @@ export interface ProjectHealth {
15
16
  stargazersCount?: number;
16
17
  forksCount?: number;
17
18
  language?: string | null;
18
- checkFailed?: boolean;
19
- failureReason?: string;
19
+ /** Discriminant: a real snapshot is never `checkFailed`. */
20
+ checkFailed?: false;
21
+ failureReason?: undefined;
20
22
  }
23
+ /**
24
+ * The health check itself failed (transient API error). Only the repo and the
25
+ * failure reason are known — none of the snapshot fields are meaningful, so the
26
+ * type does not carry them. Narrow on `checkFailed` to reach a real snapshot.
27
+ */
28
+ export interface ProjectHealthFailure {
29
+ repo: string;
30
+ checkFailed: true;
31
+ failureReason: string;
32
+ }
33
+ /**
34
+ * Health snapshot of a GitHub repository, or a marker that the check failed.
35
+ * A discriminated union (on `checkFailed`) so the "failure" shape can't be read
36
+ * as if it carried real snapshot data. Narrow before reading snapshot fields.
37
+ */
38
+ export type ProjectHealth = ProjectHealthData | ProjectHealthFailure;
21
39
  /** Priority tier for issue search results. */
22
40
  export type SearchPriority = "merged_pr" | "starred" | "normal";
23
41
  /** Source file the anti-LLM policy match came from, or null when no file matched. */
@@ -43,6 +61,13 @@ export interface SLMTriageSummary {
43
61
  /** A fully vetted issue candidate with scoring. */
44
62
  export interface IssueCandidate {
45
63
  issue: TrackedIssue;
64
+ /**
65
+ * GitHub issue state at vet time (#120). GitHub answers 200 for closed
66
+ * issues, so without this vet-list classified them still_available and
67
+ * --prune kept them. Optional: cached candidates from older versions
68
+ * lack it and read as open.
69
+ */
70
+ issueState?: "open" | "closed";
46
71
  vettingResult: IssueVettingResult;
47
72
  projectHealth: ProjectHealth;
48
73
  antiLLMPolicy: AntiLLMPolicyResult;
@@ -54,26 +79,22 @@ export interface IssueCandidate {
54
79
  viabilityScore: number;
55
80
  searchPriority: SearchPriority;
56
81
  /**
57
- * Personalization sort tier (#1244). Populated only when the caller
58
- * passes `preferLanguages` / `preferRepos` to `search()` *and* the
59
- * candidate matches at least one. Affects sort order between the
60
- * `recommendation` tier and `viabilityScore`; never used as a filter.
61
- */
62
- boostScore?: number;
63
- /**
64
- * Human-readable reasons the candidate matched personalization bias
65
- * (#1244). Mirrors `reasonsToApprove`/`reasonsToSkip` shape for
66
- * symmetry with the existing surface.
67
- */
68
- boostReasons?: string[];
69
- /**
70
- * Marks a candidate that filled a reserved diversity slot (#1244).
71
- * Populated only when `diversityRatio > 0` was passed AND the
72
- * candidate matched no personalization bias. Mutually exclusive with
73
- * a non-zero `boostScore` (a candidate cannot be both biased-toward
74
- * and a diversity slot in the same result set).
82
+ * Personalization marker (#1244). A candidate is EITHER boosted (it matched
83
+ * a `preferLanguages` / `preferRepos` bias and gets a soft sort boost between
84
+ * the `recommendation` tier and `viabilityScore`) OR a diversity slot (it
85
+ * matched no bias and filled a slot reserved by `diversityRatio`) — never
86
+ * both. Modelling it as a single discriminated field makes that mutual
87
+ * exclusivity structural instead of prose across three optional fields.
88
+ * Absent when no personalization was requested or the candidate matched
89
+ * nothing.
75
90
  */
76
- diversitySlot?: boolean;
91
+ personalization?: {
92
+ kind: "boosted";
93
+ score: number;
94
+ reasons: string[];
95
+ } | {
96
+ kind: "diversity";
97
+ };
77
98
  }
78
99
  /** Subset of RepoScore fields that callers may update. */
79
100
  export interface RepoScoreUpdate {
@@ -85,29 +106,51 @@ export interface RepoScoreUpdate {
85
106
  stargazersCount?: number;
86
107
  language?: string | null;
87
108
  }
88
- /** Result of a check (e.g., no existing PR, not claimed). */
89
- export interface CheckResult {
109
+ /**
110
+ * Result of a check (e.g., no existing PR, not claimed). Discriminated on
111
+ * `inconclusive`: a `reason` exists only when the check could not be completed
112
+ * (a transient API error), and an inconclusive check always reports `passed:
113
+ * true` because the caller assumes the issue is still eligible. A conclusive
114
+ * result carries no `reason`.
115
+ */
116
+ export type CheckResult = {
90
117
  passed: boolean;
91
- inconclusive?: boolean;
92
- reason?: string;
93
- }
118
+ inconclusive?: false;
119
+ reason?: undefined;
120
+ } | {
121
+ passed: true;
122
+ inconclusive: true;
123
+ reason: string;
124
+ };
94
125
  export declare const SCOPE_LABELS: Record<IssueScope, string[]>;
95
126
  /** Options for batch vetting saved results. */
96
127
  export interface VetListOptions {
97
128
  concurrency?: number;
98
129
  prune?: boolean;
99
130
  }
100
- /** A single entry in the vet-list result. */
101
- export interface VetListEntry {
131
+ /** Identity fields shared by every vet-list entry, regardless of outcome. */
132
+ export interface VetListEntryBase {
102
133
  issueUrl: string;
103
134
  repo: string;
104
135
  number: number;
105
136
  title: string;
106
137
  status: "still_available" | "claimed" | "closed" | "has_pr" | "error";
107
- recommendation?: "approve" | "skip" | "needs_review";
108
- viabilityScore?: number;
109
- errorMessage?: string;
110
138
  }
139
+ /**
140
+ * A single entry in the vet-list result. Discriminated on `ok`: a completed vet
141
+ * (`ok: true`) carries `recommendation` + `viabilityScore` and never an
142
+ * `errorMessage`; a vet that threw (`ok: false`, including a 404/410 that
143
+ * classifies the issue as `closed`) carries only the `errorMessage`. This makes
144
+ * the "score xor error" invariant structural instead of prose.
145
+ */
146
+ export type VetListEntry = (VetListEntryBase & {
147
+ ok: true;
148
+ recommendation: "approve" | "skip" | "needs_review";
149
+ viabilityScore: number;
150
+ }) | (VetListEntryBase & {
151
+ ok: false;
152
+ errorMessage: string;
153
+ });
111
154
  /** Summary counts for a vet-list run. */
112
155
  export interface VetListSummary {
113
156
  total: number;
@@ -117,27 +160,69 @@ export interface VetListSummary {
117
160
  hasPR: number;
118
161
  errors: number;
119
162
  }
163
+ /** Result of reconciling tracked open PRs against their current GitHub state (#164). */
164
+ export interface SyncResult {
165
+ /** Open PRs checked. */
166
+ checked: number;
167
+ /** Transitioned to merged. */
168
+ merged: number;
169
+ /** Transitioned to closed-without-merge. */
170
+ closed: number;
171
+ /** Still open (kept). */
172
+ stillOpen: number;
173
+ /** Could not be checked (parse failure or transient API error). */
174
+ errors: number;
175
+ }
176
+ /** A saved result whose availability status changed since the last vet-list (#165). */
177
+ export interface VetStatusTransition {
178
+ issueUrl: string;
179
+ repo: string;
180
+ number: number;
181
+ from: VetListEntry["status"];
182
+ to: VetListEntry["status"];
183
+ }
120
184
  /** Result of a batch vet-list operation. */
121
185
  export interface VetListResult {
122
186
  results: VetListEntry[];
123
187
  summary: VetListSummary;
124
188
  prunedCount?: number;
189
+ /**
190
+ * Status changes since the previous vet-list run, computed from each saved
191
+ * result's `lastStatus`. Empty on a first run (no prior status to compare).
192
+ */
193
+ transitions: VetStatusTransition[];
125
194
  }
126
195
  /** Configuration for creating an OssScout instance. */
127
196
  export type ScoutConfig = {
128
- /** GitHub token with `repo` read scope. Add `gist` scope for persistence. */
197
+ /** GitHub token with `repo` read scope. Add `gist` scope for gist persistence. */
129
198
  githubToken: string;
130
- /** Use gist-backed persistence (default for standalone CLI). */
131
- persistence?: "gist";
132
- /** Gist ID override. Skips gist discovery/creation if provided. */
199
+ /**
200
+ * State storage. Omitted defaults to `"local"`: load and persist
201
+ * `~/.oss-scout/state.json`, no network on construct. `"gist"` syncs
202
+ * via a private GitHub gist (needs the `gist` token scope).
203
+ */
204
+ persistence?: "local" | "gist";
205
+ /** Gist ID override (gist mode). Skips gist discovery/creation if provided. */
133
206
  gistId?: string;
207
+ /**
208
+ * Minimum log level emitted to stderr. Omitted leaves the global level
209
+ * (default "info"). Hosts that don't want the "[INFO] Phase 0..."
210
+ * chatter can pass "warn" or "silent" (#156).
211
+ */
212
+ logLevel?: LogLevel;
134
213
  } | {
135
214
  /** GitHub token with `repo` read scope. */
136
215
  githubToken: string;
137
- /** Caller provides state directly. */
216
+ /** Caller provides and owns state directly (embedding hosts). */
138
217
  persistence: "provided";
139
218
  /** Pre-loaded state. Required when persistence is 'provided'. */
140
219
  initialState: ScoutState;
220
+ /**
221
+ * Minimum log level emitted to stderr. Omitted leaves the global level
222
+ * (default "info"). Hosts that don't want the "[INFO] Phase 0..."
223
+ * chatter can pass "warn" or "silent" (#156).
224
+ */
225
+ logLevel?: LogLevel;
141
226
  };
142
227
  /** Options for the search method. */
143
228
  export interface SearchOptions {
@@ -158,6 +243,21 @@ export interface SearchOptions {
158
243
  * disables the boost.
159
244
  */
160
245
  preferRepos?: string[];
246
+ /**
247
+ * Per-call personalization bias: a SOFT penalty (milder than the hard
248
+ * `excludeRepos` filter) for candidates in one of these `owner/repo` slugs
249
+ * (#168). They are pushed below equally-recommended non-matches but not
250
+ * removed; a strong boost can still outweigh the penalty. Empty / undefined
251
+ * disables it.
252
+ */
253
+ avoidRepos?: string[];
254
+ /**
255
+ * Per-call personalization bias: a soft boost for candidates whose issue
256
+ * labels match one of these types, case-insensitive (e.g. "bug",
257
+ * "good first issue") (#168). Same tier as a language match. Does not filter
258
+ * results, does not change `viabilityScore`. Empty / undefined disables it.
259
+ */
260
+ boostIssueTypes?: string[];
161
261
  /**
162
262
  * Counterweight against echo-chamber bias as `preferLanguages` /
163
263
  * `preferRepos` boosts accumulate over time (#1244). A value of 0.2
@@ -168,6 +268,19 @@ export interface SearchOptions {
168
268
  * clamped to [0, 1].
169
269
  */
170
270
  diversityRatio?: number;
271
+ /**
272
+ * Per-call override for the delay between search phases (ms). Defaults to
273
+ * the `interPhaseDelayMs` preference (30s). Latency-sensitive callers like
274
+ * the MCP server pass 0; the sliding-window budget tracker still paces the
275
+ * actual API calls, so the fixed sleep is the only thing removed (#143).
276
+ */
277
+ interPhaseDelayMs?: number;
278
+ /**
279
+ * Per-call override for the extra cooldown before the broad phase (ms).
280
+ * Defaults to the `broadPhaseDelayMs` preference (90s). See
281
+ * `interPhaseDelayMs` for the rationale (#143).
282
+ */
283
+ broadPhaseDelayMs?: number;
171
284
  }
172
285
  /** Result of a search operation. */
173
286
  export interface SearchResult {
@@ -36,14 +36,29 @@ export function getCacheDir() {
36
36
  * - https://api.github.com/repos/owner/repo/...
37
37
  */
38
38
  export function extractRepoFromUrl(url) {
39
+ // Real URL parsing: the previous regexes were unanchored (any host
40
+ // containing "github.com" matched) and leaked query/fragment text into
41
+ // the repo segment ("repo?tab=readme").
42
+ let parsed;
43
+ try {
44
+ parsed = new URL(url);
45
+ }
46
+ catch {
47
+ return null;
48
+ }
49
+ const host = parsed.hostname.toLowerCase().replace(/^www\./, "");
50
+ const segments = parsed.pathname.split("/").filter(Boolean);
39
51
  // API URLs: https://api.github.com/repos/owner/repo[/...]
40
- const apiMatch = url.match(/api\.github\.com\/repos\/([^/]+\/[^/]+)/);
41
- if (apiMatch)
42
- return apiMatch[1];
52
+ if (host === "api.github.com") {
53
+ if (segments[0] === "repos" && segments.length >= 3) {
54
+ return `${segments[1]}/${segments[2]}`;
55
+ }
56
+ return null;
57
+ }
43
58
  // Web URLs: https://github.com/owner/repo[/...]
44
- const webMatch = url.match(/github\.com\/([^/]+\/[^/]+)/);
45
- if (webMatch)
46
- return webMatch[1];
59
+ if (host === "github.com" && segments.length >= 2) {
60
+ return `${segments[0]}/${segments[1]}`;
61
+ }
47
62
  return null;
48
63
  }
49
64
  const OWNER_PATTERN = /^[a-zA-Z0-9_-]+$/;
@@ -52,25 +67,38 @@ function isValidOwnerRepo(owner, repo) {
52
67
  return OWNER_PATTERN.test(owner) && REPO_PATTERN.test(repo);
53
68
  }
54
69
  export function parseGitHubUrl(url) {
55
- if (!url.startsWith("https://github.com/"))
56
- return null;
57
- const prMatch = url.match(/github\.com\/([^/]+)\/([^/]+)\/pull\/(\d+)/);
58
- if (prMatch) {
59
- const owner = prMatch[1];
60
- const repo = prMatch[2];
61
- if (!isValidOwnerRepo(owner, repo))
62
- return null;
63
- return { owner, repo, number: parseInt(prMatch[3], 10), type: "pull" };
70
+ // Accept pasteable variants: http://, www., and bare github.com/... forms
71
+ // normalize to a parseable URL. Strict canonical-form validation for
72
+ // command input lives in commands/validation.ts; this parser is lenient.
73
+ const normalized = /^(?:www\.)?github\.com\//i.test(url)
74
+ ? `https://${url}`
75
+ : url;
76
+ let parsed;
77
+ try {
78
+ parsed = new URL(normalized);
64
79
  }
65
- const issueMatch = url.match(/github\.com\/([^/]+)\/([^/]+)\/issues\/(\d+)/);
66
- if (issueMatch) {
67
- const owner = issueMatch[1];
68
- const repo = issueMatch[2];
69
- if (!isValidOwnerRepo(owner, repo))
70
- return null;
71
- return { owner, repo, number: parseInt(issueMatch[3], 10), type: "issues" };
80
+ catch {
81
+ return null;
72
82
  }
73
- return null;
83
+ if (parsed.protocol !== "https:" && parsed.protocol !== "http:")
84
+ return null;
85
+ const host = parsed.hostname.toLowerCase().replace(/^www\./, "");
86
+ if (host !== "github.com")
87
+ return null;
88
+ // Exactly owner/repo/(pull|issues)/<digits>; trailing slash tolerated via
89
+ // filter(Boolean), query/fragment excluded by pathname. A malformed number
90
+ // segment ("123abc") no longer half-parses to 123.
91
+ const segments = parsed.pathname.split("/").filter(Boolean);
92
+ if (segments.length !== 4)
93
+ return null;
94
+ const [owner, repo, type, num] = segments;
95
+ if (type !== "pull" && type !== "issues")
96
+ return null;
97
+ if (!isValidOwnerRepo(owner, repo))
98
+ return null;
99
+ if (!/^\d+$/.test(num))
100
+ return null;
101
+ return { owner, repo, number: parseInt(num, 10), type };
74
102
  }
75
103
  export function daysBetween(from, to = new Date()) {
76
104
  return Math.max(0, Math.floor((to.getTime() - from.getTime()) / (1000 * 60 * 60 * 24)));
@@ -91,8 +119,12 @@ export function getGitHubToken() {
91
119
  if (tokenFetchAttempted)
92
120
  return null;
93
121
  tokenFetchAttempted = true;
94
- if (process.env.GITHUB_TOKEN) {
95
- cachedGitHubToken = process.env.GITHUB_TOKEN;
122
+ // Trim: a trailing newline (e.g. GITHUB_TOKEN=$(cat file)) produces a
123
+ // malformed Authorization header with confusing 401s. A whitespace-only
124
+ // value falls through to the gh CLI.
125
+ const envToken = process.env.GITHUB_TOKEN?.trim();
126
+ if (envToken) {
127
+ cachedGitHubToken = envToken;
96
128
  return cachedGitHubToken;
97
129
  }
98
130
  try {
@@ -108,7 +140,9 @@ export function getGitHubToken() {
108
140
  }
109
141
  }
110
142
  catch (err) {
111
- debug(MODULE, "gh auth token failed", err);
143
+ // Log only the message: the raw execFileSync error carries stdout/stderr
144
+ // buffers that could include a token if gh half-succeeded.
145
+ debug(MODULE, `gh auth token failed: ${errorMessage(err)}`);
112
146
  }
113
147
  return null;
114
148
  }