@oss-scout/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cli.bundle.cjs +114 -0
  2. package/dist/cli.d.ts +5 -0
  3. package/dist/cli.js +341 -0
  4. package/dist/commands/config.d.ts +22 -0
  5. package/dist/commands/config.js +169 -0
  6. package/dist/commands/results.d.ts +8 -0
  7. package/dist/commands/results.js +13 -0
  8. package/dist/commands/search.d.ts +39 -0
  9. package/dist/commands/search.js +50 -0
  10. package/dist/commands/setup.d.ts +17 -0
  11. package/dist/commands/setup.js +104 -0
  12. package/dist/commands/validation.d.ts +6 -0
  13. package/dist/commands/validation.js +17 -0
  14. package/dist/commands/vet-list.d.ts +9 -0
  15. package/dist/commands/vet-list.js +16 -0
  16. package/dist/commands/vet.d.ts +25 -0
  17. package/dist/commands/vet.js +29 -0
  18. package/dist/core/bootstrap.d.ts +14 -0
  19. package/dist/core/bootstrap.js +122 -0
  20. package/dist/core/category-mapping.d.ts +19 -0
  21. package/dist/core/category-mapping.js +58 -0
  22. package/dist/core/concurrency.d.ts +6 -0
  23. package/dist/core/concurrency.js +25 -0
  24. package/dist/core/errors.d.ts +22 -0
  25. package/dist/core/errors.js +69 -0
  26. package/dist/core/gist-state-store.d.ts +96 -0
  27. package/dist/core/gist-state-store.js +302 -0
  28. package/dist/core/github.d.ts +16 -0
  29. package/dist/core/github.js +58 -0
  30. package/dist/core/http-cache.d.ts +108 -0
  31. package/dist/core/http-cache.js +314 -0
  32. package/dist/core/issue-discovery.d.ts +93 -0
  33. package/dist/core/issue-discovery.js +475 -0
  34. package/dist/core/issue-eligibility.d.ts +33 -0
  35. package/dist/core/issue-eligibility.js +151 -0
  36. package/dist/core/issue-filtering.d.ts +51 -0
  37. package/dist/core/issue-filtering.js +103 -0
  38. package/dist/core/issue-scoring.d.ts +43 -0
  39. package/dist/core/issue-scoring.js +97 -0
  40. package/dist/core/issue-vetting.d.ts +44 -0
  41. package/dist/core/issue-vetting.js +270 -0
  42. package/dist/core/local-state.d.ts +16 -0
  43. package/dist/core/local-state.js +56 -0
  44. package/dist/core/logger.d.ts +11 -0
  45. package/dist/core/logger.js +25 -0
  46. package/dist/core/pagination.d.ts +7 -0
  47. package/dist/core/pagination.js +16 -0
  48. package/dist/core/repo-health.d.ts +19 -0
  49. package/dist/core/repo-health.js +179 -0
  50. package/dist/core/schemas.d.ts +315 -0
  51. package/dist/core/schemas.js +137 -0
  52. package/dist/core/search-budget.d.ts +62 -0
  53. package/dist/core/search-budget.js +129 -0
  54. package/dist/core/search-phases.d.ts +69 -0
  55. package/dist/core/search-phases.js +238 -0
  56. package/dist/core/types.d.ts +124 -0
  57. package/dist/core/types.js +9 -0
  58. package/dist/core/utils.d.ts +18 -0
  59. package/dist/core/utils.js +106 -0
  60. package/dist/formatters/json.d.ts +6 -0
  61. package/dist/formatters/json.js +20 -0
  62. package/dist/index.d.ts +23 -0
  63. package/dist/index.js +25 -0
  64. package/dist/scout.d.ts +125 -0
  65. package/dist/scout.js +391 -0
  66. package/package.json +70 -0
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Issue Filtering — pure functions for filtering and spam detection on search results.
3
+ *
4
+ * Extracted from issue-discovery.ts to isolate filtering logic:
5
+ * label farming detection, doc-only filtering, per-repo caps, templated title detection.
6
+ */
7
+ /** Minimal shape of a GitHub search result item (from octokit.search.issuesAndPullRequests) */
8
+ export interface GitHubSearchItem {
9
+ html_url: string;
10
+ repository_url: string;
11
+ updated_at: string;
12
+ title?: string;
13
+ labels?: Array<{
14
+ name?: string;
15
+ } | string>;
16
+ [key: string]: unknown;
17
+ }
18
+ /** Labels that indicate documentation-only issues. */
19
+ export declare const DOC_ONLY_LABELS: Set<string>;
20
+ /**
21
+ * Check if an issue's labels are ALL documentation-related.
22
+ * Issues with mixed labels (e.g., "good first issue" + "documentation") pass through.
23
+ * Issues with no labels are not considered doc-only.
24
+ */
25
+ export declare function isDocOnlyIssue(item: GitHubSearchItem): boolean;
26
+ /** Known beginner-type label names used to detect label-farming repos. */
27
+ export declare const BEGINNER_LABELS: Set<string>;
28
+ /** Check if a single issue has an excessive number of beginner labels (>= 5). */
29
+ export declare function isLabelFarming(item: GitHubSearchItem): boolean;
30
+ /** Detect mass-created issue titles like "Add Trivia Question 61" or "Create Entry #5". */
31
+ export declare function hasTemplatedTitle(title: string): boolean;
32
+ /**
33
+ * Batch-analyze search items to detect label-farming repositories.
34
+ * Returns a Set of repo full names (owner/repo) that appear to be spam.
35
+ *
36
+ * A repo is flagged if:
37
+ * - ANY single issue has >= 5 beginner labels (strong individual signal), OR
38
+ * - It has >= 3 issues with templated titles (batch signal)
39
+ */
40
+ export declare function detectLabelFarmingRepos(items: GitHubSearchItem[]): Set<string>;
41
+ /**
42
+ * Apply per-repo cap to candidates.
43
+ * Keeps at most `maxPerRepo` issues from any single repo.
44
+ * Maintains the existing sort order — first N from each repo are kept,
45
+ * excess issues from over-represented repos are dropped.
46
+ */
47
+ export declare function applyPerRepoCap<T extends {
48
+ issue: {
49
+ repo: string;
50
+ };
51
+ }>(candidates: T[], maxPerRepo: number): T[];
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Issue Filtering — pure functions for filtering and spam detection on search results.
3
+ *
4
+ * Extracted from issue-discovery.ts to isolate filtering logic:
5
+ * label farming detection, doc-only filtering, per-repo caps, templated title detection.
6
+ */
7
+ /** Labels that indicate documentation-only issues. */
8
+ export const DOC_ONLY_LABELS = new Set(['documentation', 'docs', 'typo', 'spelling']);
9
+ /**
10
+ * Check if an issue's labels are ALL documentation-related.
11
+ * Issues with mixed labels (e.g., "good first issue" + "documentation") pass through.
12
+ * Issues with no labels are not considered doc-only.
13
+ */
14
+ export function isDocOnlyIssue(item) {
15
+ if (!item.labels || !Array.isArray(item.labels) || item.labels.length === 0)
16
+ return false;
17
+ const labelNames = item.labels.map((l) => (typeof l === 'string' ? l : l.name || '').toLowerCase());
18
+ // Filter out empty label names before checking
19
+ const nonEmptyLabels = labelNames.filter((n) => n.length > 0);
20
+ if (nonEmptyLabels.length === 0)
21
+ return false;
22
+ return nonEmptyLabels.every((n) => DOC_ONLY_LABELS.has(n));
23
+ }
24
+ /** Known beginner-type label names used to detect label-farming repos. */
25
+ export const BEGINNER_LABELS = new Set([
26
+ 'good first issue',
27
+ 'hacktoberfest',
28
+ 'easy',
29
+ 'up-for-grabs',
30
+ 'first-timers-only',
31
+ 'beginner-friendly',
32
+ 'beginner',
33
+ 'starter',
34
+ 'newbie',
35
+ 'low-hanging-fruit',
36
+ 'community',
37
+ ]);
38
+ /** Check if a single issue has an excessive number of beginner labels (>= 5). */
39
+ export function isLabelFarming(item) {
40
+ if (!item.labels || !Array.isArray(item.labels))
41
+ return false;
42
+ const labelNames = item.labels.map((l) => (typeof l === 'string' ? l : l.name || '').toLowerCase());
43
+ const beginnerCount = labelNames.filter((n) => BEGINNER_LABELS.has(n)).length;
44
+ return beginnerCount >= 5;
45
+ }
46
+ /** Detect mass-created issue titles like "Add Trivia Question 61" or "Create Entry #5". */
47
+ export function hasTemplatedTitle(title) {
48
+ if (!title)
49
+ return false;
50
+ // Matches "<anything> <category-noun> <number>" where category nouns are typical
51
+ // of mass-created templated issues. This avoids false positives on legitimate titles
52
+ // like "Add support for Python 3" or "Implement RFC 7231" which lack category nouns.
53
+ return /^.+\s+(question|fact|point|item|task|entry|post|challenge|exercise|example|problem|tip|recipe|snippet)\s+#?\d+$/i.test(title);
54
+ }
55
+ /**
56
+ * Batch-analyze search items to detect label-farming repositories.
57
+ * Returns a Set of repo full names (owner/repo) that appear to be spam.
58
+ *
59
+ * A repo is flagged if:
60
+ * - ANY single issue has >= 5 beginner labels (strong individual signal), OR
61
+ * - It has >= 3 issues with templated titles (batch signal)
62
+ */
63
+ export function detectLabelFarmingRepos(items) {
64
+ const spamRepos = new Set();
65
+ const repoSpamCounts = new Map();
66
+ for (const item of items) {
67
+ const repoFullName = item.repository_url.split('/').slice(-2).join('/');
68
+ // Strong signal: single issue with 5+ beginner labels
69
+ if (isLabelFarming(item)) {
70
+ spamRepos.add(repoFullName);
71
+ continue;
72
+ }
73
+ // Weaker signal: templated title
74
+ if (item.title && hasTemplatedTitle(item.title)) {
75
+ repoSpamCounts.set(repoFullName, (repoSpamCounts.get(repoFullName) || 0) + 1);
76
+ }
77
+ }
78
+ // Flag repos with 3+ templated-title issues
79
+ for (const [repo, count] of repoSpamCounts) {
80
+ if (count >= 3) {
81
+ spamRepos.add(repo);
82
+ }
83
+ }
84
+ return spamRepos;
85
+ }
86
+ /**
87
+ * Apply per-repo cap to candidates.
88
+ * Keeps at most `maxPerRepo` issues from any single repo.
89
+ * Maintains the existing sort order — first N from each repo are kept,
90
+ * excess issues from over-represented repos are dropped.
91
+ */
92
+ export function applyPerRepoCap(candidates, maxPerRepo) {
93
+ const repoCounts = new Map();
94
+ const kept = [];
95
+ for (const c of candidates) {
96
+ const count = repoCounts.get(c.issue.repo) || 0;
97
+ if (count < maxPerRepo) {
98
+ kept.push(c);
99
+ repoCounts.set(c.issue.repo, count + 1);
100
+ }
101
+ }
102
+ return kept;
103
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Issue Scoring — pure functions for computing viability scores and quality bonuses.
3
+ *
4
+ * Extracted from issue-discovery.ts to isolate scoring logic.
5
+ */
6
+ /**
7
+ * Calculate a quality bonus based on repo star and fork counts.
8
+ * Stars: <50 -> 0, 50-499 -> +3, 500-4999 -> +5, 5000+ -> +8
9
+ * Forks: 50+ -> +2, 500+ -> +4
10
+ * Natural max is 12 (8 stars + 4 forks).
11
+ */
12
+ export declare function calculateRepoQualityBonus(stargazersCount: number, forksCount: number): number;
13
+ export interface ViabilityScoreParams {
14
+ repoScore: number | null;
15
+ hasExistingPR: boolean;
16
+ isClaimed: boolean;
17
+ clearRequirements: boolean;
18
+ hasContributionGuidelines: boolean;
19
+ issueUpdatedAt: string;
20
+ closedWithoutMergeCount: number;
21
+ mergedPRCount: number;
22
+ orgHasMergedPRs: boolean;
23
+ repoQualityBonus?: number;
24
+ /** True when the repo matches one of the user's preferred project categories. */
25
+ matchesPreferredCategory?: boolean;
26
+ }
27
+ /**
28
+ * Calculate viability score for an issue (0-100 scale)
29
+ * Scoring:
30
+ * - Base: 50 points
31
+ * - +repoScore*2 (up to +20 for score of 10)
32
+ * - +repoQualityBonus (up to +12 for established repos, from star/fork counts)
33
+ * - +15 for merged PR in this repo (direct proven relationship)
34
+ * - +15 for clear requirements (clarity)
35
+ * - +15 for freshness (recently updated)
36
+ * - +10 for contribution guidelines
37
+ * - +5 for org affinity (merged PRs in same org)
38
+ * - +5 for category preference (matches user's project categories)
39
+ * - -30 if existing PR
40
+ * - -20 if claimed
41
+ * - -15 if closed-without-merge history with no merges
42
+ */
43
+ export declare function calculateViabilityScore(params: ViabilityScoreParams): number;
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Issue Scoring — pure functions for computing viability scores and quality bonuses.
3
+ *
4
+ * Extracted from issue-discovery.ts to isolate scoring logic.
5
+ */
6
+ import { daysBetween } from './utils.js';
7
+ /**
8
+ * Calculate a quality bonus based on repo star and fork counts.
9
+ * Stars: <50 -> 0, 50-499 -> +3, 500-4999 -> +5, 5000+ -> +8
10
+ * Forks: 50+ -> +2, 500+ -> +4
11
+ * Natural max is 12 (8 stars + 4 forks).
12
+ */
13
+ export function calculateRepoQualityBonus(stargazersCount, forksCount) {
14
+ let bonus = 0;
15
+ // Star tiers
16
+ if (stargazersCount >= 5000)
17
+ bonus += 8;
18
+ else if (stargazersCount >= 500)
19
+ bonus += 5;
20
+ else if (stargazersCount >= 50)
21
+ bonus += 3;
22
+ // Fork tiers
23
+ if (forksCount >= 500)
24
+ bonus += 4;
25
+ else if (forksCount >= 50)
26
+ bonus += 2;
27
+ return bonus;
28
+ }
29
+ /**
30
+ * Calculate viability score for an issue (0-100 scale)
31
+ * Scoring:
32
+ * - Base: 50 points
33
+ * - +repoScore*2 (up to +20 for score of 10)
34
+ * - +repoQualityBonus (up to +12 for established repos, from star/fork counts)
35
+ * - +15 for merged PR in this repo (direct proven relationship)
36
+ * - +15 for clear requirements (clarity)
37
+ * - +15 for freshness (recently updated)
38
+ * - +10 for contribution guidelines
39
+ * - +5 for org affinity (merged PRs in same org)
40
+ * - +5 for category preference (matches user's project categories)
41
+ * - -30 if existing PR
42
+ * - -20 if claimed
43
+ * - -15 if closed-without-merge history with no merges
44
+ */
45
+ export function calculateViabilityScore(params) {
46
+ let score = 50; // Base score
47
+ // Add repo score contribution (up to +20)
48
+ if (params.repoScore !== null) {
49
+ score += params.repoScore * 2;
50
+ }
51
+ // Repo quality bonus from star/fork counts (up to +12)
52
+ score += params.repoQualityBonus ?? 0;
53
+ // Merged PR bonus (+15) — direct proven relationship with this repo
54
+ if (params.mergedPRCount > 0) {
55
+ score += 15;
56
+ }
57
+ // Clarity bonus (+15)
58
+ if (params.clearRequirements) {
59
+ score += 15;
60
+ }
61
+ // Freshness bonus (+15 for issues updated within last 14 days)
62
+ const updatedAt = new Date(params.issueUpdatedAt);
63
+ const daysSinceUpdate = daysBetween(updatedAt);
64
+ if (daysSinceUpdate <= 14) {
65
+ score += 15;
66
+ }
67
+ else if (daysSinceUpdate <= 30) {
68
+ // Partial bonus for 15-30 days
69
+ score += Math.round(15 * (1 - (daysSinceUpdate - 14) / 16));
70
+ }
71
+ // Contribution guidelines bonus (+10)
72
+ if (params.hasContributionGuidelines) {
73
+ score += 10;
74
+ }
75
+ // Org affinity bonus (+5) — user has merged PRs in another repo under same org
76
+ if (params.orgHasMergedPRs) {
77
+ score += 5;
78
+ }
79
+ // Category preference bonus (+5) — repo matches user's preferred project categories
80
+ if (params.matchesPreferredCategory) {
81
+ score += 5;
82
+ }
83
+ // Penalty for existing PR (-30)
84
+ if (params.hasExistingPR) {
85
+ score -= 30;
86
+ }
87
+ // Penalty for claimed issue (-20)
88
+ if (params.isClaimed) {
89
+ score -= 20;
90
+ }
91
+ // Penalty for closed-without-merge history with no successful merges (-15)
92
+ if (params.closedWithoutMergeCount > 0 && params.mergedPRCount === 0) {
93
+ score -= 15;
94
+ }
95
+ // Clamp to 0-100
96
+ return Math.max(0, Math.min(100, score));
97
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Issue Vetting — orchestrates individual issue checks and computes
3
+ * recommendation + viability score.
4
+ *
5
+ * Delegates to focused modules:
6
+ * - issue-eligibility.ts — PR existence, claim detection, requirements analysis
7
+ * - repo-health.ts — project health, contribution guidelines
8
+ */
9
+ import { Octokit } from '@octokit/rest';
10
+ import { type SearchPriority, type IssueCandidate, type ProjectCategory } from './types.js';
11
+ /**
12
+ * Read-only interface for accessing scout state during issue vetting.
13
+ * Implementations may be backed by gist persistence, in-memory state, etc.
14
+ */
15
+ export interface ScoutStateReader {
16
+ /** Repos where the user has at least one merged PR. */
17
+ getReposWithMergedPRs(): string[];
18
+ /** User's starred repos (from GitHub). */
19
+ getStarredRepos(): string[];
20
+ /** Preferred GitHub orgs from user preferences. */
21
+ getPreferredOrgs(): string[];
22
+ /** Preferred project categories from user preferences. */
23
+ getProjectCategories(): ProjectCategory[];
24
+ /** Numeric quality score for a repo, or null if not evaluated. */
25
+ getRepoScore(repo: string): number | null;
26
+ }
27
+ export declare class IssueVetter {
28
+ private octokit;
29
+ private stateReader;
30
+ constructor(octokit: Octokit, stateReader: ScoutStateReader);
31
+ /**
32
+ * Vet a specific issue — runs all checks and computes recommendation + viability score.
33
+ * Results are cached for 15 minutes to avoid redundant API calls on repeated searches.
34
+ */
35
+ vetIssue(issueUrl: string): Promise<IssueCandidate>;
36
+ /**
37
+ * Vet multiple issues in parallel with concurrency limit
38
+ */
39
+ vetIssuesParallel(urls: string[], maxResults: number, priority?: SearchPriority): Promise<{
40
+ candidates: IssueCandidate[];
41
+ allFailed: boolean;
42
+ rateLimitHit: boolean;
43
+ }>;
44
+ }
@@ -0,0 +1,270 @@
1
+ /**
2
+ * Issue Vetting — orchestrates individual issue checks and computes
3
+ * recommendation + viability score.
4
+ *
5
+ * Delegates to focused modules:
6
+ * - issue-eligibility.ts — PR existence, claim detection, requirements analysis
7
+ * - repo-health.ts — project health, contribution guidelines
8
+ */
9
+ import { parseGitHubUrl } from './utils.js';
10
+ import { ValidationError, errorMessage, isRateLimitError } from './errors.js';
11
+ import { debug, warn } from './logger.js';
12
+ import { calculateRepoQualityBonus, calculateViabilityScore } from './issue-scoring.js';
13
+ import { repoBelongsToCategory } from './category-mapping.js';
14
+ import { checkNoExistingPR, checkNotClaimed, checkUserMergedPRsInRepo, analyzeRequirements, } from './issue-eligibility.js';
15
+ import { checkProjectHealth, fetchContributionGuidelines } from './repo-health.js';
16
+ import { getHttpCache } from './http-cache.js';
17
+ const MODULE = 'issue-vetting';
18
+ /** Vetting concurrency: kept low to reduce burst pressure on GitHub's secondary rate limit. */
19
+ const MAX_CONCURRENT_VETTING = 3;
20
+ /** TTL for cached vetting results (15 minutes). Kept short so config changes take effect quickly. */
21
+ const VETTING_CACHE_TTL_MS = 15 * 60 * 1000;
22
+ export class IssueVetter {
23
+ octokit;
24
+ stateReader;
25
+ constructor(octokit, stateReader) {
26
+ this.octokit = octokit;
27
+ this.stateReader = stateReader;
28
+ }
29
+ /**
30
+ * Vet a specific issue — runs all checks and computes recommendation + viability score.
31
+ * Results are cached for 15 minutes to avoid redundant API calls on repeated searches.
32
+ */
33
+ async vetIssue(issueUrl) {
34
+ // Check vetting cache first — avoids ~6+ API calls per issue
35
+ const cache = getHttpCache();
36
+ const cacheKey = `vet:${issueUrl}`;
37
+ const cached = cache.getIfFresh(cacheKey, VETTING_CACHE_TTL_MS);
38
+ if (cached && typeof cached === 'object' && 'issue' in cached && 'viabilityScore' in cached) {
39
+ debug(MODULE, `Vetting cache hit for ${issueUrl}`);
40
+ return cached;
41
+ }
42
+ // Parse URL
43
+ const parsed = parseGitHubUrl(issueUrl);
44
+ if (!parsed || parsed.type !== 'issues') {
45
+ throw new ValidationError(`Invalid issue URL: ${issueUrl}`);
46
+ }
47
+ const { owner, repo, number } = parsed;
48
+ const repoFullName = `${owner}/${repo}`;
49
+ // Fetch issue data
50
+ const { data: ghIssue } = await this.octokit.issues.get({
51
+ owner,
52
+ repo,
53
+ issue_number: number,
54
+ });
55
+ // Check if the user already has merged PRs in this repo (skip the Search API call)
56
+ const reposWithMergedPRs = this.stateReader.getReposWithMergedPRs();
57
+ const hasMergedPRsInRepo = reposWithMergedPRs.includes(repoFullName);
58
+ // Run all vetting checks in parallel — delegates to standalone functions
59
+ const [existingPRCheck, claimCheck, projectHealth, contributionGuidelines, userMergedPRCount] = await Promise.all([
60
+ checkNoExistingPR(this.octokit, owner, repo, number),
61
+ checkNotClaimed(this.octokit, owner, repo, number, ghIssue.comments),
62
+ checkProjectHealth(this.octokit, owner, repo),
63
+ fetchContributionGuidelines(this.octokit, owner, repo),
64
+ hasMergedPRsInRepo ? Promise.resolve(0) : checkUserMergedPRsInRepo(this.octokit, owner, repo),
65
+ ]);
66
+ const noExistingPR = existingPRCheck.passed;
67
+ const notClaimed = claimCheck.passed;
68
+ // Analyze issue quality
69
+ const clearRequirements = analyzeRequirements(ghIssue.body || '');
70
+ // When the health check itself failed (API error), use a neutral default:
71
+ // don't penalize the repo as inactive, but don't credit it as active either.
72
+ const projectActive = projectHealth.checkFailed ? true : projectHealth.isActive;
73
+ const vettingResult = {
74
+ passedAllChecks: noExistingPR && notClaimed && projectActive && clearRequirements,
75
+ checks: {
76
+ noExistingPR,
77
+ notClaimed,
78
+ projectActive,
79
+ clearRequirements,
80
+ contributionGuidelinesFound: !!contributionGuidelines,
81
+ },
82
+ contributionGuidelines,
83
+ notes: [],
84
+ };
85
+ // Build notes
86
+ if (!noExistingPR)
87
+ vettingResult.notes.push('Existing PR found for this issue');
88
+ if (!notClaimed)
89
+ vettingResult.notes.push('Issue appears to be claimed by someone');
90
+ if (existingPRCheck.inconclusive) {
91
+ vettingResult.notes.push(`Could not verify absence of existing PRs: ${existingPRCheck.reason || 'API error'}`);
92
+ }
93
+ if (claimCheck.inconclusive) {
94
+ vettingResult.notes.push(`Could not verify claim status: ${claimCheck.reason || 'API error'}`);
95
+ }
96
+ if (projectHealth.checkFailed) {
97
+ vettingResult.notes.push(`Could not verify project activity: ${projectHealth.failureReason || 'API error'}`);
98
+ }
99
+ else if (!projectHealth.isActive) {
100
+ vettingResult.notes.push('Project may be inactive');
101
+ }
102
+ if (!clearRequirements)
103
+ vettingResult.notes.push('Issue requirements are unclear');
104
+ if (!contributionGuidelines)
105
+ vettingResult.notes.push('No CONTRIBUTING.md found');
106
+ // Create tracked issue
107
+ const trackedIssue = {
108
+ id: ghIssue.id,
109
+ url: issueUrl,
110
+ repo: repoFullName,
111
+ number,
112
+ title: ghIssue.title,
113
+ status: 'candidate',
114
+ labels: ghIssue.labels.map((l) => (typeof l === 'string' ? l : l.name || '')),
115
+ createdAt: ghIssue.created_at,
116
+ updatedAt: ghIssue.updated_at,
117
+ vetted: true,
118
+ vettingResult,
119
+ };
120
+ // Determine recommendation
121
+ const reasonsToSkip = [];
122
+ const reasonsToApprove = [];
123
+ if (!noExistingPR)
124
+ reasonsToSkip.push('Has existing PR');
125
+ if (!notClaimed)
126
+ reasonsToSkip.push('Already claimed');
127
+ if (!projectHealth.isActive && !projectHealth.checkFailed)
128
+ reasonsToSkip.push('Inactive project');
129
+ if (!clearRequirements)
130
+ reasonsToSkip.push('Unclear requirements');
131
+ if (noExistingPR)
132
+ reasonsToApprove.push('No existing PR');
133
+ if (notClaimed)
134
+ reasonsToApprove.push('Not claimed');
135
+ if (projectHealth.isActive && !projectHealth.checkFailed)
136
+ reasonsToApprove.push('Active project');
137
+ if (clearRequirements)
138
+ reasonsToApprove.push('Clear requirements');
139
+ if (contributionGuidelines)
140
+ reasonsToApprove.push('Has contribution guidelines');
141
+ // Determine effective merged PR count: prefer local state (authoritative if present),
142
+ // fall back to live GitHub API count to detect contributions made before using oss-scout
143
+ const effectiveMergedCount = hasMergedPRsInRepo ? 1 : userMergedPRCount;
144
+ if (effectiveMergedCount > 0) {
145
+ reasonsToApprove.push(`Trusted project (${effectiveMergedCount} PR${effectiveMergedCount > 1 ? 's' : ''} merged)`);
146
+ }
147
+ // Check for org-level affinity (user has merged PRs in another repo under same org)
148
+ const orgName = repoFullName.split('/')[0];
149
+ let orgHasMergedPRs = false;
150
+ if (orgName && repoFullName.includes('/')) {
151
+ orgHasMergedPRs = reposWithMergedPRs.some((r) => r.startsWith(orgName + '/') && r !== repoFullName);
152
+ }
153
+ if (orgHasMergedPRs) {
154
+ reasonsToApprove.push(`Org affinity (merged PRs in other ${orgName} repos)`);
155
+ }
156
+ // Check for category preference match
157
+ const projectCategories = this.stateReader.getProjectCategories();
158
+ const matchesCategory = repoBelongsToCategory(repoFullName, projectCategories);
159
+ if (matchesCategory) {
160
+ reasonsToApprove.push('Matches preferred project category');
161
+ }
162
+ let recommendation;
163
+ if (vettingResult.passedAllChecks) {
164
+ recommendation = 'approve';
165
+ }
166
+ else if (reasonsToSkip.length > 2) {
167
+ recommendation = 'skip';
168
+ }
169
+ else {
170
+ recommendation = 'needs_review';
171
+ }
172
+ // Downgrade to needs_review if any check was inconclusive —
173
+ // "approve" should only be given when all checks actually passed, not when they were skipped.
174
+ const hasInconclusiveChecks = projectHealth.checkFailed || existingPRCheck.inconclusive || claimCheck.inconclusive;
175
+ if (recommendation === 'approve' && hasInconclusiveChecks) {
176
+ recommendation = 'needs_review';
177
+ vettingResult.notes.push('Recommendation downgraded: one or more checks were inconclusive');
178
+ }
179
+ // Calculate repo quality bonus from star/fork counts
180
+ const repoQualityBonus = calculateRepoQualityBonus(projectHealth.stargazersCount ?? 0, projectHealth.forksCount ?? 0);
181
+ if (projectHealth.checkFailed && repoQualityBonus === 0) {
182
+ vettingResult.notes.push('Repo quality bonus unavailable: could not fetch star/fork counts due to API error');
183
+ }
184
+ const repoScore = this.stateReader.getRepoScore(repoFullName);
185
+ const viabilityScore = calculateViabilityScore({
186
+ repoScore,
187
+ hasExistingPR: !noExistingPR,
188
+ isClaimed: !notClaimed,
189
+ clearRequirements,
190
+ hasContributionGuidelines: !!contributionGuidelines,
191
+ issueUpdatedAt: ghIssue.updated_at,
192
+ closedWithoutMergeCount: 0,
193
+ mergedPRCount: effectiveMergedCount,
194
+ orgHasMergedPRs,
195
+ repoQualityBonus,
196
+ matchesPreferredCategory: matchesCategory,
197
+ });
198
+ const starredRepos = this.stateReader.getStarredRepos();
199
+ const preferredOrgs = this.stateReader.getPreferredOrgs();
200
+ let searchPriority = 'normal';
201
+ if (effectiveMergedCount > 0) {
202
+ searchPriority = 'merged_pr';
203
+ }
204
+ else if (preferredOrgs.some((o) => o.toLowerCase() === orgName?.toLowerCase())) {
205
+ searchPriority = 'preferred_org';
206
+ }
207
+ else if (starredRepos.includes(repoFullName)) {
208
+ searchPriority = 'starred';
209
+ }
210
+ const result = {
211
+ issue: trackedIssue,
212
+ vettingResult,
213
+ projectHealth,
214
+ recommendation,
215
+ reasonsToSkip,
216
+ reasonsToApprove,
217
+ viabilityScore,
218
+ searchPriority,
219
+ };
220
+ // Cache the vetting result to avoid redundant API calls on repeated searches
221
+ cache.set(cacheKey, '', result);
222
+ return result;
223
+ }
224
+ /**
225
+ * Vet multiple issues in parallel with concurrency limit
226
+ */
227
+ async vetIssuesParallel(urls, maxResults, priority) {
228
+ const candidates = [];
229
+ const pending = new Map();
230
+ let failedVettingCount = 0;
231
+ let rateLimitFailures = 0;
232
+ let attemptedCount = 0;
233
+ for (const url of urls) {
234
+ if (candidates.length >= maxResults)
235
+ break;
236
+ attemptedCount++;
237
+ const task = this.vetIssue(url)
238
+ .then((candidate) => {
239
+ if (candidates.length < maxResults) {
240
+ // Override the priority if provided
241
+ if (priority) {
242
+ candidate.searchPriority = priority;
243
+ }
244
+ candidates.push(candidate);
245
+ }
246
+ })
247
+ .catch((error) => {
248
+ failedVettingCount++;
249
+ if (isRateLimitError(error)) {
250
+ rateLimitFailures++;
251
+ }
252
+ warn(MODULE, `Error vetting issue ${url}:`, errorMessage(error));
253
+ })
254
+ .finally(() => pending.delete(url));
255
+ pending.set(url, task);
256
+ // Limit concurrency — wait for at least one to complete before launching more
257
+ if (pending.size >= MAX_CONCURRENT_VETTING) {
258
+ await Promise.race(pending.values());
259
+ }
260
+ }
261
+ // Wait for remaining
262
+ await Promise.allSettled(pending.values());
263
+ const allFailed = failedVettingCount === attemptedCount && attemptedCount > 0;
264
+ if (allFailed) {
265
+ warn(MODULE, `All ${attemptedCount} issue(s) failed vetting. ` +
266
+ `This may indicate a systemic issue (rate limit, auth, network).`);
267
+ }
268
+ return { candidates: candidates.slice(0, maxResults), allFailed, rateLimitHit: rateLimitFailures > 0 };
269
+ }
270
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Local state persistence — reads/writes ScoutState to ~/.oss-scout/state.json.
3
+ */
4
+ import type { ScoutState } from './schemas.js';
5
+ /**
6
+ * Check if a local state file exists.
7
+ */
8
+ export declare function hasLocalState(): boolean;
9
+ /**
10
+ * Load state from local file. Returns fresh default state if file doesn't exist or is corrupt.
11
+ */
12
+ export declare function loadLocalState(): ScoutState;
13
+ /**
14
+ * Save state to local file using atomic write (write to .tmp, then rename).
15
+ */
16
+ export declare function saveLocalState(state: ScoutState): void;
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Local state persistence — reads/writes ScoutState to ~/.oss-scout/state.json.
3
+ */
4
+ import * as fs from 'fs';
5
+ import * as path from 'path';
6
+ import { ScoutStateSchema } from './schemas.js';
7
+ import { getDataDir } from './utils.js';
8
+ import { debug, warn } from './logger.js';
9
+ import { errorMessage } from './errors.js';
10
+ const MODULE = 'local-state';
11
+ function getStatePath() {
12
+ return path.join(getDataDir(), 'state.json');
13
+ }
14
+ /**
15
+ * Check if a local state file exists.
16
+ */
17
+ export function hasLocalState() {
18
+ return fs.existsSync(getStatePath());
19
+ }
20
+ /**
21
+ * Load state from local file. Returns fresh default state if file doesn't exist or is corrupt.
22
+ */
23
+ export function loadLocalState() {
24
+ const statePath = getStatePath();
25
+ try {
26
+ const raw = fs.readFileSync(statePath, 'utf-8');
27
+ return ScoutStateSchema.parse(JSON.parse(raw));
28
+ }
29
+ catch (err) {
30
+ const code = err?.code;
31
+ if (code === 'ENOENT') {
32
+ return ScoutStateSchema.parse({ version: 1 });
33
+ }
34
+ // State file exists but is corrupt or unreadable
35
+ warn(MODULE, `Failed to load state from ${statePath}: ${errorMessage(err)}. Using defaults.`);
36
+ // Backup corrupt file
37
+ try {
38
+ const backupPath = `${statePath}.corrupt.${Date.now()}`;
39
+ fs.copyFileSync(statePath, backupPath);
40
+ warn(MODULE, `Corrupt state backed up to ${backupPath}`);
41
+ }
42
+ catch { /* best effort backup */ }
43
+ return ScoutStateSchema.parse({ version: 1 });
44
+ }
45
+ }
46
+ /**
47
+ * Save state to local file using atomic write (write to .tmp, then rename).
48
+ */
49
+ export function saveLocalState(state) {
50
+ const statePath = getStatePath();
51
+ const tmpPath = statePath + '.tmp';
52
+ const data = JSON.stringify(state, null, 2) + '\n';
53
+ fs.writeFileSync(tmpPath, data, { mode: 0o600 });
54
+ fs.renameSync(tmpPath, statePath);
55
+ debug(MODULE, 'State saved');
56
+ }