@oss-scout/core 0.11.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/dist/cli.bundle.cjs +78 -61
  2. package/dist/cli.js +401 -425
  3. package/dist/commands/command-scout.d.ts +21 -0
  4. package/dist/commands/command-scout.js +21 -0
  5. package/dist/commands/config.js +10 -128
  6. package/dist/commands/features.js +15 -28
  7. package/dist/commands/results.d.ts +13 -2
  8. package/dist/commands/results.js +29 -2
  9. package/dist/commands/search.js +63 -70
  10. package/dist/commands/setup.d.ts +2 -0
  11. package/dist/commands/setup.js +35 -6
  12. package/dist/commands/skip.d.ts +4 -0
  13. package/dist/commands/skip.js +45 -55
  14. package/dist/commands/sync.d.ts +10 -0
  15. package/dist/commands/sync.js +10 -0
  16. package/dist/commands/vet-list.js +3 -19
  17. package/dist/commands/vet.js +18 -25
  18. package/dist/commands/with-scout.d.ts +32 -0
  19. package/dist/commands/with-scout.js +41 -0
  20. package/dist/core/anti-llm-policy.js +4 -5
  21. package/dist/core/bootstrap.d.ts +2 -2
  22. package/dist/core/bootstrap.js +5 -9
  23. package/dist/core/errors.d.ts +10 -0
  24. package/dist/core/errors.js +20 -5
  25. package/dist/core/feature-discovery.d.ts +13 -1
  26. package/dist/core/feature-discovery.js +104 -81
  27. package/dist/core/gist-state-store.d.ts +13 -12
  28. package/dist/core/gist-state-store.js +128 -53
  29. package/dist/core/http-cache.d.ts +32 -2
  30. package/dist/core/http-cache.js +74 -19
  31. package/dist/core/issue-discovery.d.ts +2 -0
  32. package/dist/core/issue-discovery.js +44 -29
  33. package/dist/core/issue-eligibility.d.ts +10 -4
  34. package/dist/core/issue-eligibility.js +119 -67
  35. package/dist/core/issue-graphql.d.ts +58 -0
  36. package/dist/core/issue-graphql.js +108 -0
  37. package/dist/core/issue-vetting.d.ts +105 -8
  38. package/dist/core/issue-vetting.js +234 -107
  39. package/dist/core/local-state.d.ts +6 -2
  40. package/dist/core/local-state.js +23 -5
  41. package/dist/core/logger.d.ts +12 -4
  42. package/dist/core/logger.js +33 -7
  43. package/dist/core/personalization.d.ts +15 -10
  44. package/dist/core/personalization.js +30 -22
  45. package/dist/core/preference-fields.d.ts +47 -0
  46. package/dist/core/preference-fields.js +178 -0
  47. package/dist/core/repo-health.js +31 -15
  48. package/dist/core/roadmap.js +17 -3
  49. package/dist/core/schemas.d.ts +144 -26
  50. package/dist/core/schemas.js +74 -17
  51. package/dist/core/search-budget.d.ts +9 -0
  52. package/dist/core/search-budget.js +36 -3
  53. package/dist/core/search-phases.d.ts +0 -18
  54. package/dist/core/search-phases.js +27 -82
  55. package/dist/core/types.d.ts +136 -38
  56. package/dist/core/utils.js +60 -26
  57. package/dist/formatters/markdown.d.ts +10 -0
  58. package/dist/formatters/markdown.js +31 -0
  59. package/dist/index.d.ts +6 -2
  60. package/dist/index.js +8 -0
  61. package/dist/scout.d.ts +59 -10
  62. package/dist/scout.js +244 -20
  63. package/package.json +1 -1
@@ -7,7 +7,7 @@
7
7
  import { SCOPE_LABELS, } from "./types.js";
8
8
  import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
9
9
  import { debug, warn } from "./logger.js";
10
- import { getHttpCache } from "./http-cache.js";
10
+ import { getHttpCache, versionedCacheKey } from "./http-cache.js";
11
11
  import { detectLabelFarmingRepos, } from "./issue-filtering.js";
12
12
  import { extractRepoFromUrl, sleep } from "./utils.js";
13
13
  import { getSearchBudgetTracker } from "./search-budget.js";
@@ -18,8 +18,6 @@ const GITHUB_MAX_BOOLEAN_OPS = 5;
18
18
  * Set to 2000ms as a safety floor (max 30/min at the limit). The SearchBudgetTracker
19
19
  * adds additional adaptive delays when needed. */
20
20
  const INTER_QUERY_DELAY_MS = 2000;
21
- /** Batch size for repo queries. 3 repos = 2 OR operators, leaving room for labels. */
22
- const BATCH_SIZE = 3;
23
21
  /**
24
22
  * Chunk labels into groups that fit within the operator budget.
25
23
  * N labels require N-1 OR operators, so maxPerChunk = budget + 1.
@@ -77,14 +75,6 @@ export function interleaveArrays(arrays) {
77
75
  }
78
76
  return result;
79
77
  }
80
- /** Split repos into batches of the specified size. */
81
- function batchRepos(repos, batchSize) {
82
- const batches = [];
83
- for (let i = 0; i < repos.length; i += batchSize) {
84
- batches.push(repos.slice(i, i + batchSize));
85
- }
86
- return batches;
87
- }
88
78
  // ── Search caching ──
89
79
  /** TTL for cached search API results (15 minutes). */
90
80
  const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
@@ -94,7 +84,7 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
94
84
  * without consuming GitHub API rate limit points.
95
85
  */
96
86
  export async function cachedSearchIssues(octokit, params) {
97
- const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
87
+ const cacheKey = versionedCacheKey(`search:${params.q}:${params.sort}:${params.order}:${params.per_page}`);
98
88
  const cache = getHttpCache();
99
89
  // Check cache first
100
90
  const cached = cache.getIfFresh(cacheKey, SEARCH_CACHE_TTL_MS);
@@ -206,17 +196,32 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
206
196
  const repoFullName = repos[i];
207
197
  const [owner, repo] = repoFullName.split("/");
208
198
  try {
209
- const response = await octokit.issues.listForRepo({
210
- owner,
211
- repo,
212
- state: "open",
213
- sort: "created",
214
- direction: "desc",
215
- per_page: 5,
216
- ...(labels.length > 0 ? { labels: labels.join(",") } : {}),
217
- });
199
+ // One query per label: the REST `labels` parameter is AND semantics
200
+ // (issues carrying ALL listed labels), so a comma-joined list like
201
+ // "good first issue,help wanted" returned ~nothing (#118). Querying
202
+ // per label and merging restores the intended any-of behavior.
203
+ const labelFilters = labels.length > 0 ? labels : [undefined];
204
+ const seenUrls = new Set();
205
+ const rawIssues = [];
206
+ for (const label of labelFilters) {
207
+ const response = await octokit.issues.listForRepo({
208
+ owner,
209
+ repo,
210
+ state: "open",
211
+ sort: "created",
212
+ direction: "desc",
213
+ per_page: 5,
214
+ ...(label !== undefined ? { labels: label } : {}),
215
+ });
216
+ for (const issue of response.data) {
217
+ if (seenUrls.has(issue.html_url))
218
+ continue;
219
+ seenUrls.add(issue.html_url);
220
+ rawIssues.push(issue);
221
+ }
222
+ }
218
223
  // Filter out pull requests (REST issues endpoint returns both) and assigned issues
219
- const issuesOnly = response.data.filter((item) => !("pull_request" in item) && !item.assignee);
224
+ const issuesOnly = rawIssues.filter((item) => !("pull_request" in item) && !item.assignee);
220
225
  const mapped = issuesOnly.map((issue) => ({
221
226
  html_url: issue.html_url,
222
227
  repository_url: `https://api.github.com/repos/${repoFullName}`,
@@ -378,63 +383,3 @@ export async function filterVetAndScore(vetter, items, filterIssues, excludedRep
378
383
  }
379
384
  return { candidates: starFiltered, allVetFailed, rateLimitHit };
380
385
  }
381
- /**
382
- * Search for issues within specific repos using batched queries.
383
- *
384
- * To avoid GitHub's secondary rate limit (30 requests/minute), we batch
385
- * multiple repos into a single search query using OR syntax:
386
- * repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
387
- *
388
- * Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
389
- * Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
390
- * budget is used for label OR operators.
391
- *
392
- * This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
393
- */
394
- export async function searchInRepos(octokit, vetter, repos, baseQualifiers, labels, maxResults, priority, filterFn) {
395
- const candidates = [];
396
- const batches = batchRepos(repos, BATCH_SIZE);
397
- let failedBatches = 0;
398
- let rateLimitFailures = 0;
399
- for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
400
- const batch = batches[batchIdx];
401
- if (candidates.length >= maxResults)
402
- break;
403
- // Delay between batches to avoid secondary rate limits
404
- if (batchIdx > 0)
405
- await sleep(INTER_QUERY_DELAY_MS);
406
- try {
407
- const repoFilter = batch.map((r) => `repo:${r}`).join(" OR ");
408
- const repoOps = batch.length - 1;
409
- const perPage = Math.min(30, (maxResults - candidates.length) * 3);
410
- const allItems = await searchWithChunkedLabels(octokit, labels, repoOps, (labelQ) => `${baseQualifiers} ${labelQ} (${repoFilter})`
411
- .replace(/ +/g, " ")
412
- .trim(), perPage);
413
- if (allItems.length > 0) {
414
- const filtered = filterFn(allItems);
415
- const remainingNeeded = maxResults - candidates.length;
416
- const { candidates: vetted, rateLimitHit: vetRateLimitHit } = await vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, priority);
417
- candidates.push(...vetted);
418
- if (vetRateLimitHit)
419
- rateLimitFailures++;
420
- }
421
- }
422
- catch (error) {
423
- if (getHttpStatusCode(error) === 401)
424
- throw error;
425
- failedBatches++;
426
- if (isRateLimitError(error)) {
427
- rateLimitFailures++;
428
- }
429
- const batchReposStr = batch.join(", ");
430
- warn(MODULE, `Error searching issues in batch [${batchReposStr}]:`, errorMessage(error));
431
- }
432
- }
433
- const allBatchesFailed = failedBatches === batches.length && batches.length > 0;
434
- const rateLimitHit = rateLimitFailures > 0;
435
- if (allBatchesFailed) {
436
- warn(MODULE, `All ${batches.length} batch(es) failed for ${priority} phase. ` +
437
- `This may indicate a systemic issue (rate limit, auth, network).`);
438
- }
439
- return { candidates, allBatchesFailed, rateLimitHit };
440
- }
@@ -2,9 +2,10 @@
2
2
  * Core types for oss-scout — ephemeral types that are never persisted.
3
3
  */
4
4
  import type { RepoSignals, TrackedIssue, IssueVettingResult, IssueScope, ScoutState, SearchStrategy } from "./schemas.js";
5
+ import type { LogLevel } from "./logger.js";
5
6
  export type { ProjectCategory, IssueScope, RepoSignals, RepoScore, StoredMergedPR, StoredClosedPR, ContributionGuidelines, IssueVettingResult, LinkedPR, TrackedIssue, ScoutPreferences, SavedCandidate, ScoutState, SearchStrategy, } from "./schemas.js";
6
- /** Health snapshot of a GitHub repository. */
7
- export interface ProjectHealth {
7
+ /** A successful health snapshot of a GitHub repository. */
8
+ export interface ProjectHealthData {
8
9
  repo: string;
9
10
  lastCommitAt: string;
10
11
  daysSinceLastCommit: number;
@@ -15,9 +16,26 @@ export interface ProjectHealth {
15
16
  stargazersCount?: number;
16
17
  forksCount?: number;
17
18
  language?: string | null;
18
- checkFailed?: boolean;
19
- failureReason?: string;
19
+ /** Discriminant: a real snapshot is never `checkFailed`. */
20
+ checkFailed?: false;
21
+ failureReason?: undefined;
20
22
  }
23
+ /**
24
+ * The health check itself failed (transient API error). Only the repo and the
25
+ * failure reason are known — none of the snapshot fields are meaningful, so the
26
+ * type does not carry them. Narrow on `checkFailed` to reach a real snapshot.
27
+ */
28
+ export interface ProjectHealthFailure {
29
+ repo: string;
30
+ checkFailed: true;
31
+ failureReason: string;
32
+ }
33
+ /**
34
+ * Health snapshot of a GitHub repository, or a marker that the check failed.
35
+ * A discriminated union (on `checkFailed`) so the "failure" shape can't be read
36
+ * as if it carried real snapshot data. Narrow before reading snapshot fields.
37
+ */
38
+ export type ProjectHealth = ProjectHealthData | ProjectHealthFailure;
21
39
  /** Priority tier for issue search results. */
22
40
  export type SearchPriority = "merged_pr" | "starred" | "normal";
23
41
  /** Source file the anti-LLM policy match came from, or null when no file matched. */
@@ -43,6 +61,13 @@ export interface SLMTriageSummary {
43
61
  /** A fully vetted issue candidate with scoring. */
44
62
  export interface IssueCandidate {
45
63
  issue: TrackedIssue;
64
+ /**
65
+ * GitHub issue state at vet time (#120). GitHub answers 200 for closed
66
+ * issues, so without this vet-list classified them still_available and
67
+ * --prune kept them. Optional: cached candidates from older versions
68
+ * lack it and read as open.
69
+ */
70
+ issueState?: "open" | "closed";
46
71
  vettingResult: IssueVettingResult;
47
72
  projectHealth: ProjectHealth;
48
73
  antiLLMPolicy: AntiLLMPolicyResult;
@@ -54,26 +79,22 @@ export interface IssueCandidate {
54
79
  viabilityScore: number;
55
80
  searchPriority: SearchPriority;
56
81
  /**
57
- * Personalization sort tier (#1244). Populated only when the caller
58
- * passes `preferLanguages` / `preferRepos` to `search()` *and* the
59
- * candidate matches at least one. Affects sort order between the
60
- * `recommendation` tier and `viabilityScore`; never used as a filter.
61
- */
62
- boostScore?: number;
63
- /**
64
- * Human-readable reasons the candidate matched personalization bias
65
- * (#1244). Mirrors `reasonsToApprove`/`reasonsToSkip` shape for
66
- * symmetry with the existing surface.
82
+ * Personalization marker (#1244). A candidate is EITHER boosted (it matched
83
+ * a `preferLanguages` / `preferRepos` bias and gets a soft sort boost between
84
+ * the `recommendation` tier and `viabilityScore`) OR a diversity slot (it
85
+ * matched no bias and filled a slot reserved by `diversityRatio`) — never
86
+ * both. Modelling it as a single discriminated field makes that mutual
87
+ * exclusivity structural instead of prose across three optional fields.
88
+ * Absent when no personalization was requested or the candidate matched
89
+ * nothing.
67
90
  */
68
- boostReasons?: string[];
69
- /**
70
- * Marks a candidate that filled a reserved diversity slot (#1244).
71
- * Populated only when `diversityRatio > 0` was passed AND the
72
- * candidate matched no personalization bias. Mutually exclusive with
73
- * a non-zero `boostScore` (a candidate cannot be both biased-toward
74
- * and a diversity slot in the same result set).
75
- */
76
- diversitySlot?: boolean;
91
+ personalization?: {
92
+ kind: "boosted";
93
+ score: number;
94
+ reasons: string[];
95
+ } | {
96
+ kind: "diversity";
97
+ };
77
98
  }
78
99
  /** Subset of RepoScore fields that callers may update. */
79
100
  export interface RepoScoreUpdate {
@@ -85,29 +106,51 @@ export interface RepoScoreUpdate {
85
106
  stargazersCount?: number;
86
107
  language?: string | null;
87
108
  }
88
- /** Result of a check (e.g., no existing PR, not claimed). */
89
- export interface CheckResult {
109
+ /**
110
+ * Result of a check (e.g., no existing PR, not claimed). Discriminated on
111
+ * `inconclusive`: a `reason` exists only when the check could not be completed
112
+ * (a transient API error), and an inconclusive check always reports `passed:
113
+ * true` because the caller assumes the issue is still eligible. A conclusive
114
+ * result carries no `reason`.
115
+ */
116
+ export type CheckResult = {
90
117
  passed: boolean;
91
- inconclusive?: boolean;
92
- reason?: string;
93
- }
118
+ inconclusive?: false;
119
+ reason?: undefined;
120
+ } | {
121
+ passed: true;
122
+ inconclusive: true;
123
+ reason: string;
124
+ };
94
125
  export declare const SCOPE_LABELS: Record<IssueScope, string[]>;
95
126
  /** Options for batch vetting saved results. */
96
127
  export interface VetListOptions {
97
128
  concurrency?: number;
98
129
  prune?: boolean;
99
130
  }
100
- /** A single entry in the vet-list result. */
101
- export interface VetListEntry {
131
+ /** Identity fields shared by every vet-list entry, regardless of outcome. */
132
+ export interface VetListEntryBase {
102
133
  issueUrl: string;
103
134
  repo: string;
104
135
  number: number;
105
136
  title: string;
106
137
  status: "still_available" | "claimed" | "closed" | "has_pr" | "error";
107
- recommendation?: "approve" | "skip" | "needs_review";
108
- viabilityScore?: number;
109
- errorMessage?: string;
110
138
  }
139
+ /**
140
+ * A single entry in the vet-list result. Discriminated on `ok`: a completed vet
141
+ * (`ok: true`) carries `recommendation` + `viabilityScore` and never an
142
+ * `errorMessage`; a vet that threw (`ok: false`, including a 404/410 that
143
+ * classifies the issue as `closed`) carries only the `errorMessage`. This makes
144
+ * the "score xor error" invariant structural instead of prose.
145
+ */
146
+ export type VetListEntry = (VetListEntryBase & {
147
+ ok: true;
148
+ recommendation: "approve" | "skip" | "needs_review";
149
+ viabilityScore: number;
150
+ }) | (VetListEntryBase & {
151
+ ok: false;
152
+ errorMessage: string;
153
+ });
111
154
  /** Summary counts for a vet-list run. */
112
155
  export interface VetListSummary {
113
156
  total: number;
@@ -117,27 +160,69 @@ export interface VetListSummary {
117
160
  hasPR: number;
118
161
  errors: number;
119
162
  }
163
+ /** Result of reconciling tracked open PRs against their current GitHub state (#164). */
164
+ export interface SyncResult {
165
+ /** Open PRs checked. */
166
+ checked: number;
167
+ /** Transitioned to merged. */
168
+ merged: number;
169
+ /** Transitioned to closed-without-merge. */
170
+ closed: number;
171
+ /** Still open (kept). */
172
+ stillOpen: number;
173
+ /** Could not be checked (parse failure or transient API error). */
174
+ errors: number;
175
+ }
176
+ /** A saved result whose availability status changed since the last vet-list (#165). */
177
+ export interface VetStatusTransition {
178
+ issueUrl: string;
179
+ repo: string;
180
+ number: number;
181
+ from: VetListEntry["status"];
182
+ to: VetListEntry["status"];
183
+ }
120
184
  /** Result of a batch vet-list operation. */
121
185
  export interface VetListResult {
122
186
  results: VetListEntry[];
123
187
  summary: VetListSummary;
124
188
  prunedCount?: number;
189
+ /**
190
+ * Status changes since the previous vet-list run, computed from each saved
191
+ * result's `lastStatus`. Empty on a first run (no prior status to compare).
192
+ */
193
+ transitions: VetStatusTransition[];
125
194
  }
126
195
  /** Configuration for creating an OssScout instance. */
127
196
  export type ScoutConfig = {
128
- /** GitHub token with `repo` read scope. Add `gist` scope for persistence. */
197
+ /** GitHub token with `repo` read scope. Add `gist` scope for gist persistence. */
129
198
  githubToken: string;
130
- /** Use gist-backed persistence (default for standalone CLI). */
131
- persistence?: "gist";
132
- /** Gist ID override. Skips gist discovery/creation if provided. */
199
+ /**
200
+ * State storage. Omitted defaults to `"local"`: load and persist
201
+ * `~/.oss-scout/state.json`, no network on construct. `"gist"` syncs
202
+ * via a private GitHub gist (needs the `gist` token scope).
203
+ */
204
+ persistence?: "local" | "gist";
205
+ /** Gist ID override (gist mode). Skips gist discovery/creation if provided. */
133
206
  gistId?: string;
207
+ /**
208
+ * Minimum log level emitted to stderr. Omitted leaves the global level
209
+ * (default "info"). Hosts that don't want the "[INFO] Phase 0..."
210
+ * chatter can pass "warn" or "silent" (#156).
211
+ */
212
+ logLevel?: LogLevel;
134
213
  } | {
135
214
  /** GitHub token with `repo` read scope. */
136
215
  githubToken: string;
137
- /** Caller provides state directly. */
216
+ /** Caller provides and owns state directly (embedding hosts). */
138
217
  persistence: "provided";
139
218
  /** Pre-loaded state. Required when persistence is 'provided'. */
140
219
  initialState: ScoutState;
220
+ /**
221
+ * Minimum log level emitted to stderr. Omitted leaves the global level
222
+ * (default "info"). Hosts that don't want the "[INFO] Phase 0..."
223
+ * chatter can pass "warn" or "silent" (#156).
224
+ */
225
+ logLevel?: LogLevel;
141
226
  };
142
227
  /** Options for the search method. */
143
228
  export interface SearchOptions {
@@ -168,6 +253,19 @@ export interface SearchOptions {
168
253
  * clamped to [0, 1].
169
254
  */
170
255
  diversityRatio?: number;
256
+ /**
257
+ * Per-call override for the delay between search phases (ms). Defaults to
258
+ * the `interPhaseDelayMs` preference (30s). Latency-sensitive callers like
259
+ * the MCP server pass 0; the sliding-window budget tracker still paces the
260
+ * actual API calls, so the fixed sleep is the only thing removed (#143).
261
+ */
262
+ interPhaseDelayMs?: number;
263
+ /**
264
+ * Per-call override for the extra cooldown before the broad phase (ms).
265
+ * Defaults to the `broadPhaseDelayMs` preference (90s). See
266
+ * `interPhaseDelayMs` for the rationale (#143).
267
+ */
268
+ broadPhaseDelayMs?: number;
171
269
  }
172
270
  /** Result of a search operation. */
173
271
  export interface SearchResult {
@@ -36,14 +36,29 @@ export function getCacheDir() {
36
36
  * - https://api.github.com/repos/owner/repo/...
37
37
  */
38
38
  export function extractRepoFromUrl(url) {
39
+ // Real URL parsing: the previous regexes were unanchored (any host
40
+ // containing "github.com" matched) and leaked query/fragment text into
41
+ // the repo segment ("repo?tab=readme").
42
+ let parsed;
43
+ try {
44
+ parsed = new URL(url);
45
+ }
46
+ catch {
47
+ return null;
48
+ }
49
+ const host = parsed.hostname.toLowerCase().replace(/^www\./, "");
50
+ const segments = parsed.pathname.split("/").filter(Boolean);
39
51
  // API URLs: https://api.github.com/repos/owner/repo[/...]
40
- const apiMatch = url.match(/api\.github\.com\/repos\/([^/]+\/[^/]+)/);
41
- if (apiMatch)
42
- return apiMatch[1];
52
+ if (host === "api.github.com") {
53
+ if (segments[0] === "repos" && segments.length >= 3) {
54
+ return `${segments[1]}/${segments[2]}`;
55
+ }
56
+ return null;
57
+ }
43
58
  // Web URLs: https://github.com/owner/repo[/...]
44
- const webMatch = url.match(/github\.com\/([^/]+\/[^/]+)/);
45
- if (webMatch)
46
- return webMatch[1];
59
+ if (host === "github.com" && segments.length >= 2) {
60
+ return `${segments[0]}/${segments[1]}`;
61
+ }
47
62
  return null;
48
63
  }
49
64
  const OWNER_PATTERN = /^[a-zA-Z0-9_-]+$/;
@@ -52,25 +67,38 @@ function isValidOwnerRepo(owner, repo) {
52
67
  return OWNER_PATTERN.test(owner) && REPO_PATTERN.test(repo);
53
68
  }
54
69
  export function parseGitHubUrl(url) {
55
- if (!url.startsWith("https://github.com/"))
56
- return null;
57
- const prMatch = url.match(/github\.com\/([^/]+)\/([^/]+)\/pull\/(\d+)/);
58
- if (prMatch) {
59
- const owner = prMatch[1];
60
- const repo = prMatch[2];
61
- if (!isValidOwnerRepo(owner, repo))
62
- return null;
63
- return { owner, repo, number: parseInt(prMatch[3], 10), type: "pull" };
70
+ // Accept pasteable variants: http://, www., and bare github.com/... forms
71
+ // normalize to a parseable URL. Strict canonical-form validation for
72
+ // command input lives in commands/validation.ts; this parser is lenient.
73
+ const normalized = /^(?:www\.)?github\.com\//i.test(url)
74
+ ? `https://${url}`
75
+ : url;
76
+ let parsed;
77
+ try {
78
+ parsed = new URL(normalized);
64
79
  }
65
- const issueMatch = url.match(/github\.com\/([^/]+)\/([^/]+)\/issues\/(\d+)/);
66
- if (issueMatch) {
67
- const owner = issueMatch[1];
68
- const repo = issueMatch[2];
69
- if (!isValidOwnerRepo(owner, repo))
70
- return null;
71
- return { owner, repo, number: parseInt(issueMatch[3], 10), type: "issues" };
80
+ catch {
81
+ return null;
72
82
  }
73
- return null;
83
+ if (parsed.protocol !== "https:" && parsed.protocol !== "http:")
84
+ return null;
85
+ const host = parsed.hostname.toLowerCase().replace(/^www\./, "");
86
+ if (host !== "github.com")
87
+ return null;
88
+ // Exactly owner/repo/(pull|issues)/<digits>; trailing slash tolerated via
89
+ // filter(Boolean), query/fragment excluded by pathname. A malformed number
90
+ // segment ("123abc") no longer half-parses to 123.
91
+ const segments = parsed.pathname.split("/").filter(Boolean);
92
+ if (segments.length !== 4)
93
+ return null;
94
+ const [owner, repo, type, num] = segments;
95
+ if (type !== "pull" && type !== "issues")
96
+ return null;
97
+ if (!isValidOwnerRepo(owner, repo))
98
+ return null;
99
+ if (!/^\d+$/.test(num))
100
+ return null;
101
+ return { owner, repo, number: parseInt(num, 10), type };
74
102
  }
75
103
  export function daysBetween(from, to = new Date()) {
76
104
  return Math.max(0, Math.floor((to.getTime() - from.getTime()) / (1000 * 60 * 60 * 24)));
@@ -91,8 +119,12 @@ export function getGitHubToken() {
91
119
  if (tokenFetchAttempted)
92
120
  return null;
93
121
  tokenFetchAttempted = true;
94
- if (process.env.GITHUB_TOKEN) {
95
- cachedGitHubToken = process.env.GITHUB_TOKEN;
122
+ // Trim: a trailing newline (e.g. GITHUB_TOKEN=$(cat file)) produces a
123
+ // malformed Authorization header with confusing 401s. A whitespace-only
124
+ // value falls through to the gh CLI.
125
+ const envToken = process.env.GITHUB_TOKEN?.trim();
126
+ if (envToken) {
127
+ cachedGitHubToken = envToken;
96
128
  return cachedGitHubToken;
97
129
  }
98
130
  try {
@@ -108,7 +140,9 @@ export function getGitHubToken() {
108
140
  }
109
141
  }
110
142
  catch (err) {
111
- debug(MODULE, "gh auth token failed", err);
143
+ // Log only the message: the raw execFileSync error carries stdout/stderr
144
+ // buffers that could include a token if gh half-succeeded.
145
+ debug(MODULE, `gh auth token failed: ${errorMessage(err)}`);
112
146
  }
113
147
  return null;
114
148
  }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Markdown output formatter (#170) — renders saved results as a table for
3
+ * digests, notes export, and scheduled GitHub-issue summaries.
4
+ */
5
+ import type { SavedCandidate } from "../core/schemas.js";
6
+ /**
7
+ * Render saved results as a GitHub-flavored markdown table, sorted by
8
+ * viability score descending. Returns a friendly message when empty.
9
+ */
10
+ export declare function formatResultsMarkdown(results: SavedCandidate[]): string;
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Markdown output formatter (#170) — renders saved results as a table for
3
+ * digests, notes export, and scheduled GitHub-issue summaries.
4
+ */
5
+ /** Escape pipe and newline so a title can't break the markdown table. */
6
+ function cell(value) {
7
+ return value.replace(/\r?\n/g, " ").replace(/\|/g, "\\|").trim();
8
+ }
9
+ /**
10
+ * Render saved results as a GitHub-flavored markdown table, sorted by
11
+ * viability score descending. Returns a friendly message when empty.
12
+ */
13
+ export function formatResultsMarkdown(results) {
14
+ if (results.length === 0) {
15
+ return "_No saved results._";
16
+ }
17
+ const sorted = [...results].sort((a, b) => b.viabilityScore - a.viabilityScore);
18
+ const header = "| Score | Repo | Issue | Recommendation | Title |";
19
+ const divider = "| ----- | ---- | ----- | -------------- | ----- |";
20
+ const rows = sorted.map((r) => {
21
+ const issueLink = `[#${r.number}](${r.issueUrl})`;
22
+ return `| ${r.viabilityScore} | ${cell(r.repo)} | ${issueLink} | ${cell(r.recommendation)} | ${cell(r.title)} |`;
23
+ });
24
+ return [
25
+ `## oss-scout results (${results.length})`,
26
+ "",
27
+ header,
28
+ divider,
29
+ ...rows,
30
+ ].join("\n");
31
+ }
package/dist/index.d.ts CHANGED
@@ -15,13 +15,17 @@
15
15
  * @packageDocumentation
16
16
  */
17
17
  export { createScout, OssScout } from "./scout.js";
18
- export type { ScoutConfig, SearchOptions, SearchResult, IssueCandidate, MergedPRRecord, ClosedPRRecord, OpenPRRecord, RepoScoreUpdate, ProjectHealth, SearchPriority, CheckResult, AntiLLMPolicyResult, AntiLLMPolicySourceFile, VetListOptions, VetListResult, VetListEntry, VetListSummary, } from "./core/types.js";
18
+ export type { ScoutConfig, SearchOptions, SearchResult, IssueCandidate, MergedPRRecord, ClosedPRRecord, OpenPRRecord, RepoScoreUpdate, ProjectHealth, ProjectHealthData, ProjectHealthFailure, SearchPriority, CheckResult, AntiLLMPolicyResult, AntiLLMPolicySourceFile, VetListOptions, VetListResult, VetListEntry, VetListEntryBase, VetListSummary, SyncResult, } from "./core/types.js";
19
19
  export type { ScoutState, ScoutPreferences, RepoScore, RepoSignals, IssueVettingResult, LinkedPR, ContributionGuidelines, TrackedIssue, IssueScope, ProjectCategory, StoredMergedPR, StoredClosedPR, StoredOpenPR, SearchStrategy, SkippedIssue, Horizon, } from "./core/schemas.js";
20
20
  export { ScoutStateSchema, ScoutPreferencesSchema, RepoScoreSchema, IssueScopeSchema, ProjectCategorySchema, SearchStrategySchema, SkippedIssueSchema, HorizonSchema, } from "./core/schemas.js";
21
+ export { applyPreferenceField, FIELD_CONFIGS, PREFERENCE_KEYS, SORTED_PREFERENCE_KEYS, assertFieldConfigsCover, updateArray, type FieldConfig, } from "./core/preference-fields.js";
21
22
  export { requireGitHubToken, getGitHubToken } from "./core/utils.js";
22
23
  export { IssueDiscovery } from "./core/issue-discovery.js";
23
- export { IssueVetter, type ScoutStateReader, type FeatureSignals, } from "./core/issue-vetting.js";
24
+ export { IssueVetter, type ScoutStateReader, type ScoutStateWriter, type SLMConfig, type FeatureSignals, } from "./core/issue-vetting.js";
24
25
  export { scanForAntiLLMPolicy, ANTI_LLM_KEYWORDS, } from "./core/anti-llm-policy.js";
26
+ export { bootstrapScout, type BootstrapResult } from "./core/bootstrap.js";
27
+ export { setLogLevel, getLogLevel, enableDebug, type LogLevel, } from "./core/logger.js";
25
28
  export { discoverFeatures, resolveAnchorRepos, classifyHorizon, splitByHorizon, ANCHOR_THRESHOLD, FEATURE_LABELS, NO_ANCHORS_MESSAGE, NO_RESULTS_MESSAGE, type FeatureCandidate, type FeatureSearchResult, type DiscoverFeaturesOptions, } from "./core/feature-discovery.js";
26
29
  export { isLinkedPRStalled, STALLED_PR_THRESHOLD_DAYS, } from "./core/linked-pr.js";
27
30
  export { fetchRoadmapIssueRefs, parseRoadmapIssueRefs, } from "./core/roadmap.js";
31
+ export { ISSUE_URL_PATTERN, validateGitHubUrl, validateUrl, } from "./commands/validation.js";
package/dist/index.js CHANGED
@@ -18,15 +18,23 @@
18
18
  export { createScout, OssScout } from "./scout.js";
19
19
  // Schemas (for consumers who need runtime validation)
20
20
  export { ScoutStateSchema, ScoutPreferencesSchema, RepoScoreSchema, IssueScopeSchema, ProjectCategorySchema, SearchStrategySchema, SkippedIssueSchema, HorizonSchema, } from "./core/schemas.js";
21
+ // Preference-field metadata + parsing (shared by the CLI and the MCP server)
22
+ export { applyPreferenceField, FIELD_CONFIGS, PREFERENCE_KEYS, SORTED_PREFERENCE_KEYS, assertFieldConfigsCover, updateArray, } from "./core/preference-fields.js";
21
23
  // Utilities
22
24
  export { requireGitHubToken, getGitHubToken } from "./core/utils.js";
23
25
  // Internal classes (for advanced use)
24
26
  export { IssueDiscovery } from "./core/issue-discovery.js";
25
27
  export { IssueVetter, } from "./core/issue-vetting.js";
26
28
  export { scanForAntiLLMPolicy, ANTI_LLM_KEYWORDS, } from "./core/anti-llm-policy.js";
29
+ // Bootstrap (seed state from GitHub) — usable by library/MCP hosts (#156)
30
+ export { bootstrapScout } from "./core/bootstrap.js";
31
+ // Log-level control for library hosts (#156)
32
+ export { setLogLevel, getLogLevel, enableDebug, } from "./core/logger.js";
27
33
  // Feature discovery API
28
34
  export { discoverFeatures, resolveAnchorRepos, classifyHorizon, splitByHorizon, ANCHOR_THRESHOLD, FEATURE_LABELS, NO_ANCHORS_MESSAGE, NO_RESULTS_MESSAGE, } from "./core/feature-discovery.js";
29
35
  // Linked-PR helpers (#97)
30
36
  export { isLinkedPRStalled, STALLED_PR_THRESHOLD_DAYS, } from "./core/linked-pr.js";
31
37
  // Roadmap scraping (#95)
32
38
  export { fetchRoadmapIssueRefs, parseRoadmapIssueRefs, } from "./core/roadmap.js";
39
+ // Issue-URL validation (shared by the CLI and the MCP server)
40
+ export { ISSUE_URL_PATTERN, validateGitHubUrl, validateUrl, } from "./commands/validation.js";