@oss-autopilot/core 0.54.0 → 0.56.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +63 -63
- package/dist/commands/comments.js +0 -1
- package/dist/commands/config.js +45 -5
- package/dist/commands/daily.js +190 -157
- package/dist/commands/dashboard-data.js +37 -30
- package/dist/commands/dashboard-server.js +0 -1
- package/dist/commands/dismiss.js +0 -6
- package/dist/commands/init.js +0 -1
- package/dist/commands/local-repos.js +1 -2
- package/dist/commands/move.js +12 -11
- package/dist/commands/setup.d.ts +2 -1
- package/dist/commands/setup.js +166 -130
- package/dist/commands/shelve.js +10 -10
- package/dist/commands/startup.js +30 -14
- package/dist/core/ci-analysis.d.ts +6 -0
- package/dist/core/ci-analysis.js +89 -12
- package/dist/core/daily-logic.js +24 -33
- package/dist/core/index.d.ts +2 -1
- package/dist/core/index.js +2 -1
- package/dist/core/issue-discovery.d.ts +7 -44
- package/dist/core/issue-discovery.js +83 -188
- package/dist/core/issue-eligibility.d.ts +35 -0
- package/dist/core/issue-eligibility.js +126 -0
- package/dist/core/issue-vetting.d.ts +6 -21
- package/dist/core/issue-vetting.js +15 -279
- package/dist/core/pr-monitor.d.ts +7 -12
- package/dist/core/pr-monitor.js +14 -80
- package/dist/core/repo-health.d.ts +24 -0
- package/dist/core/repo-health.js +193 -0
- package/dist/core/search-phases.d.ts +55 -0
- package/dist/core/search-phases.js +155 -0
- package/dist/core/state.d.ts +11 -0
- package/dist/core/state.js +63 -4
- package/dist/core/types.d.ts +8 -1
- package/dist/core/types.js +7 -0
- package/dist/formatters/json.d.ts +1 -1
- package/package.json +1 -1
package/dist/core/pr-monitor.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Score methods still write to state.
|
|
5
5
|
*
|
|
6
6
|
* Decomposed into focused modules (#263):
|
|
7
|
-
* - ci-analysis.ts: CI check classification and analysis
|
|
7
|
+
* - ci-analysis.ts: CI status fetching, check classification and analysis
|
|
8
8
|
* - review-analysis.ts: Review decision and comment detection
|
|
9
9
|
* - checklist-analysis.ts: PR body checklist analysis
|
|
10
10
|
* - maintainer-analysis.ts: Maintainer action hint extraction
|
|
@@ -21,8 +21,7 @@ import { ConfigurationError, ValidationError, errorMessage, getHttpStatusCode }
|
|
|
21
21
|
import { paginateAll } from './pagination.js';
|
|
22
22
|
import { debug, warn, timed } from './logger.js';
|
|
23
23
|
import { getHttpCache, cachedRequest } from './http-cache.js';
|
|
24
|
-
|
|
25
|
-
import { classifyFailingChecks, analyzeCheckRuns, analyzeCombinedStatus, mergeStatuses } from './ci-analysis.js';
|
|
24
|
+
import { classifyFailingChecks, getCIStatus } from './ci-analysis.js';
|
|
26
25
|
import { determineReviewDecision, getLatestChangesRequestedDate, checkUnrespondedComments, } from './review-analysis.js';
|
|
27
26
|
import { analyzeChecklist } from './checklist-analysis.js';
|
|
28
27
|
import { extractMaintainerActionHints } from './maintainer-analysis.js';
|
|
@@ -30,9 +29,16 @@ import { computeDisplayLabel } from './display-utils.js';
|
|
|
30
29
|
import { fetchUserMergedPRCounts as fetchUserMergedPRCountsImpl, fetchUserClosedPRCounts as fetchUserClosedPRCountsImpl, fetchRecentlyClosedPRs as fetchRecentlyClosedPRsImpl, fetchRecentlyMergedPRs as fetchRecentlyMergedPRsImpl, } from './github-stats.js';
|
|
31
30
|
// Re-export so existing consumers can still import from pr-monitor
|
|
32
31
|
export { computeDisplayLabel } from './display-utils.js';
|
|
33
|
-
export { classifyCICheck, classifyFailingChecks } from './ci-analysis.js';
|
|
32
|
+
export { classifyCICheck, classifyFailingChecks, getCIStatus } from './ci-analysis.js';
|
|
34
33
|
export { isConditionalChecklistItem } from './checklist-analysis.js';
|
|
35
34
|
export { determineStatus } from './status-determination.js';
|
|
35
|
+
/**
|
|
36
|
+
* Check if a PR has a merge conflict based on GitHub's mergeable flag and mergeable_state.
|
|
37
|
+
* Returns true when mergeable is explicitly false or the mergeable_state is 'dirty'.
|
|
38
|
+
*/
|
|
39
|
+
export function hasMergeConflict(mergeable, mergeableState) {
|
|
40
|
+
return mergeable === false || mergeableState === 'dirty';
|
|
41
|
+
}
|
|
36
42
|
const MODULE = 'pr-monitor';
|
|
37
43
|
const MAX_CONCURRENT_REQUESTS = DEFAULT_CONCURRENCY;
|
|
38
44
|
export class PRMonitor {
|
|
@@ -167,14 +173,14 @@ export class PRMonitor {
|
|
|
167
173
|
// Determine review decision (delegated to review-analysis module)
|
|
168
174
|
const reviewDecision = determineReviewDecision(reviews);
|
|
169
175
|
// Check for merge conflict
|
|
170
|
-
const
|
|
176
|
+
const mergeConflict = hasMergeConflict(ghPR.mergeable, ghPR.mergeable_state);
|
|
171
177
|
// Check if there's an unresponded maintainer comment (delegated to review-analysis module)
|
|
172
178
|
const { hasUnrespondedComment, lastMaintainerComment } = checkUnrespondedComments(comments, reviews, reviewComments, config.githubUsername);
|
|
173
179
|
// Fetch CI status and (conditionally) latest commit date in parallel
|
|
174
180
|
// We need the commit date when hasUnrespondedComment is true (to distinguish
|
|
175
181
|
// "needs_response" from "waiting_on_maintainer") OR when reviewDecision is "changes_requested"
|
|
176
182
|
// (to detect needs_changes: review requested changes but no new commits pushed)
|
|
177
|
-
const ciPromise = this.
|
|
183
|
+
const ciPromise = getCIStatus(this.octokit, owner, repo, ghPR.head.sha);
|
|
178
184
|
const needCommitDate = hasUnrespondedComment || reviewDecision === 'changes_requested';
|
|
179
185
|
const commitInfoPromise = needCommitDate
|
|
180
186
|
? this.octokit.repos
|
|
@@ -222,7 +228,7 @@ export class PRMonitor {
|
|
|
222
228
|
const hasActionableCIFailure = ciStatus === 'failing' && classifiedChecks.some((c) => c.category === 'actionable');
|
|
223
229
|
const { status, actionReason, waitReason, stalenessTier, actionReasons } = determineStatus({
|
|
224
230
|
ciStatus,
|
|
225
|
-
hasMergeConflict,
|
|
231
|
+
hasMergeConflict: mergeConflict,
|
|
226
232
|
hasUnrespondedComment,
|
|
227
233
|
hasIncompleteChecklist,
|
|
228
234
|
reviewDecision,
|
|
@@ -253,7 +259,7 @@ export class PRMonitor {
|
|
|
253
259
|
ciStatus,
|
|
254
260
|
failingCheckNames,
|
|
255
261
|
classifiedChecks,
|
|
256
|
-
hasMergeConflict,
|
|
262
|
+
hasMergeConflict: mergeConflict,
|
|
257
263
|
reviewDecision,
|
|
258
264
|
hasUnrespondedComment,
|
|
259
265
|
lastMaintainerComment,
|
|
@@ -279,78 +285,6 @@ export class PRMonitor {
|
|
|
279
285
|
pr.displayDescription = displayDescription;
|
|
280
286
|
return pr;
|
|
281
287
|
}
|
|
282
|
-
/**
|
|
283
|
-
* Check if PR has merge conflict
|
|
284
|
-
*/
|
|
285
|
-
hasMergeConflict(mergeable, mergeableState) {
|
|
286
|
-
return mergeable === false || mergeableState === 'dirty';
|
|
287
|
-
}
|
|
288
|
-
/**
|
|
289
|
-
* Get CI status from combined status API and check runs.
|
|
290
|
-
* Returns status and names of failing checks for diagnostics.
|
|
291
|
-
* Delegates analysis to ci-analysis module.
|
|
292
|
-
*/
|
|
293
|
-
async getCIStatus(owner, repo, sha) {
|
|
294
|
-
if (!sha)
|
|
295
|
-
return { status: 'unknown', failingCheckNames: [], failingCheckConclusions: new Map() };
|
|
296
|
-
try {
|
|
297
|
-
// Fetch both combined status and check runs in parallel
|
|
298
|
-
const [statusResponse, checksResponse] = await Promise.all([
|
|
299
|
-
this.octokit.repos.getCombinedStatusForRef({ owner, repo, ref: sha }),
|
|
300
|
-
// 404 is expected for repos without check runs configured; log other errors for debugging
|
|
301
|
-
this.octokit.checks.listForRef({ owner, repo, ref: sha }).catch((err) => {
|
|
302
|
-
const status = getHttpStatusCode(err);
|
|
303
|
-
// Rate limit errors must propagate — matches listReviewComments pattern (#481)
|
|
304
|
-
if (status === 429)
|
|
305
|
-
throw err;
|
|
306
|
-
if (status === 403) {
|
|
307
|
-
const msg = errorMessage(err).toLowerCase();
|
|
308
|
-
if (msg.includes('rate limit') || msg.includes('abuse detection'))
|
|
309
|
-
throw err;
|
|
310
|
-
}
|
|
311
|
-
if (status === 404) {
|
|
312
|
-
debug('pr-monitor', `Check runs 404 for ${owner}/${repo}@${sha.slice(0, 7)} (no checks configured)`);
|
|
313
|
-
}
|
|
314
|
-
else {
|
|
315
|
-
warn('pr-monitor', `Non-404 error fetching check runs for ${owner}/${repo}@${sha.slice(0, 7)}: ${status ?? err}`);
|
|
316
|
-
}
|
|
317
|
-
return null;
|
|
318
|
-
}),
|
|
319
|
-
]);
|
|
320
|
-
const combinedStatus = statusResponse.data;
|
|
321
|
-
const allCheckRuns = checksResponse?.data?.check_runs || [];
|
|
322
|
-
// Deduplicate check runs by name, keeping only the most recent run per unique name.
|
|
323
|
-
// GitHub returns all historical runs (including re-runs), so without deduplication
|
|
324
|
-
// a superseded failure will incorrectly flag the PR as failing even after a re-run passes.
|
|
325
|
-
const latestCheckRunsByName = new Map();
|
|
326
|
-
for (const check of allCheckRuns) {
|
|
327
|
-
const existing = latestCheckRunsByName.get(check.name);
|
|
328
|
-
if (!existing || new Date(check.started_at ?? 0) > new Date(existing.started_at ?? 0)) {
|
|
329
|
-
latestCheckRunsByName.set(check.name, check);
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
const checkRuns = [...latestCheckRunsByName.values()];
|
|
333
|
-
// Delegate analysis to ci-analysis module
|
|
334
|
-
const checkRunAnalysis = analyzeCheckRuns(checkRuns);
|
|
335
|
-
const combinedAnalysis = analyzeCombinedStatus(combinedStatus);
|
|
336
|
-
return mergeStatuses(checkRunAnalysis, combinedAnalysis, checkRuns.length);
|
|
337
|
-
}
|
|
338
|
-
catch (error) {
|
|
339
|
-
const statusCode = getHttpStatusCode(error);
|
|
340
|
-
if (statusCode === 401 || statusCode === 403 || statusCode === 429) {
|
|
341
|
-
throw error;
|
|
342
|
-
}
|
|
343
|
-
else if (statusCode === 404) {
|
|
344
|
-
// Repo might not have CI configured, this is normal
|
|
345
|
-
debug('pr-monitor', `CI check 404 for ${owner}/${repo} (no CI configured)`);
|
|
346
|
-
return { status: 'unknown', failingCheckNames: [], failingCheckConclusions: new Map() };
|
|
347
|
-
}
|
|
348
|
-
else {
|
|
349
|
-
warn('pr-monitor', `Failed to check CI for ${owner}/${repo}@${sha.slice(0, 7)}: ${errorMessage(error)}`);
|
|
350
|
-
}
|
|
351
|
-
return { status: 'unknown', failingCheckNames: [], failingCheckConclusions: new Map() };
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
288
|
/**
|
|
355
289
|
* Fetch merged PR counts and latest merge dates per repository for the configured user.
|
|
356
290
|
* Delegates to github-stats module.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repo Health — project health checks and contribution guidelines fetching.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from issue-vetting.ts (#621) to isolate repo-level checks
|
|
5
|
+
* from issue-level eligibility logic.
|
|
6
|
+
*/
|
|
7
|
+
import { Octokit } from '@octokit/rest';
|
|
8
|
+
import { type ContributionGuidelines, type ProjectHealth } from './types.js';
|
|
9
|
+
/**
|
|
10
|
+
* Check the health of a GitHub project: recent commits, CI status, star/fork counts.
|
|
11
|
+
* Results are cached for HEALTH_CACHE_TTL_MS (4 hours).
|
|
12
|
+
*/
|
|
13
|
+
export declare function checkProjectHealth(octokit: Octokit, owner: string, repo: string): Promise<ProjectHealth>;
|
|
14
|
+
/**
|
|
15
|
+
* Fetch and parse CONTRIBUTING.md (or variants) from a GitHub repo.
|
|
16
|
+
* Probes multiple paths in parallel: CONTRIBUTING.md, .github/CONTRIBUTING.md,
|
|
17
|
+
* docs/CONTRIBUTING.md, contributing.md. Results are cached for CACHE_TTL_MS.
|
|
18
|
+
*/
|
|
19
|
+
export declare function fetchContributionGuidelines(octokit: Octokit, owner: string, repo: string): Promise<ContributionGuidelines | undefined>;
|
|
20
|
+
/**
|
|
21
|
+
* Parse the raw content of a CONTRIBUTING.md file to extract structured guidelines:
|
|
22
|
+
* branch naming, commit format, test framework, linter, formatter, CLA requirement.
|
|
23
|
+
*/
|
|
24
|
+
export declare function parseContributionGuidelines(content: string): ContributionGuidelines;
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repo Health — project health checks and contribution guidelines fetching.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from issue-vetting.ts (#621) to isolate repo-level checks
|
|
5
|
+
* from issue-level eligibility logic.
|
|
6
|
+
*/
|
|
7
|
+
import { daysBetween } from './utils.js';
|
|
8
|
+
import { errorMessage } from './errors.js';
|
|
9
|
+
import { warn } from './logger.js';
|
|
10
|
+
import { getHttpCache, cachedRequest, cachedTimeBased } from './http-cache.js';
|
|
11
|
+
const MODULE = 'repo-health';
|
|
12
|
+
// ── Cache for contribution guidelines ──
|
|
13
|
+
const guidelinesCache = new Map();
|
|
14
|
+
/** TTL for cached contribution guidelines (1 hour). */
|
|
15
|
+
const CACHE_TTL_MS = 60 * 60 * 1000;
|
|
16
|
+
/** TTL for cached project health results (4 hours). Health data (stars, commits, CI) changes slowly. */
|
|
17
|
+
const HEALTH_CACHE_TTL_MS = 4 * 60 * 60 * 1000;
|
|
18
|
+
/** Max entries in the guidelines cache before pruning. */
|
|
19
|
+
const CACHE_MAX_SIZE = 100;
|
|
20
|
+
/** Remove expired and excess entries from the guidelines cache. */
|
|
21
|
+
function pruneCache() {
|
|
22
|
+
const now = Date.now();
|
|
23
|
+
// First, remove expired entries (older than CACHE_TTL_MS)
|
|
24
|
+
for (const [key, value] of guidelinesCache.entries()) {
|
|
25
|
+
if (now - value.fetchedAt > CACHE_TTL_MS) {
|
|
26
|
+
guidelinesCache.delete(key);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
// Then, if still over size limit, remove oldest entries
|
|
30
|
+
if (guidelinesCache.size > CACHE_MAX_SIZE) {
|
|
31
|
+
const entries = Array.from(guidelinesCache.entries()).sort((a, b) => a[1].fetchedAt - b[1].fetchedAt);
|
|
32
|
+
const toRemove = entries.slice(0, guidelinesCache.size - CACHE_MAX_SIZE);
|
|
33
|
+
for (const [key] of toRemove) {
|
|
34
|
+
guidelinesCache.delete(key);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
// ── Project health ──
|
|
39
|
+
/**
|
|
40
|
+
* Check the health of a GitHub project: recent commits, CI status, star/fork counts.
|
|
41
|
+
* Results are cached for HEALTH_CACHE_TTL_MS (4 hours).
|
|
42
|
+
*/
|
|
43
|
+
export async function checkProjectHealth(octokit, owner, repo) {
|
|
44
|
+
const cache = getHttpCache();
|
|
45
|
+
const healthCacheKey = `health:${owner}/${repo}`;
|
|
46
|
+
try {
|
|
47
|
+
return await cachedTimeBased(cache, healthCacheKey, HEALTH_CACHE_TTL_MS, async () => {
|
|
48
|
+
// Get repo info (with ETag caching — repo metadata changes infrequently)
|
|
49
|
+
const url = `/repos/${owner}/${repo}`;
|
|
50
|
+
const repoData = await cachedRequest(cache, url, (headers) => octokit.repos.get({ owner, repo, headers }));
|
|
51
|
+
// Get recent commits
|
|
52
|
+
const { data: commits } = await octokit.repos.listCommits({
|
|
53
|
+
owner,
|
|
54
|
+
repo,
|
|
55
|
+
per_page: 1,
|
|
56
|
+
});
|
|
57
|
+
const lastCommit = commits[0];
|
|
58
|
+
const lastCommitAt = lastCommit?.commit?.author?.date || repoData.pushed_at;
|
|
59
|
+
const daysSinceLastCommit = daysBetween(new Date(lastCommitAt));
|
|
60
|
+
// Check CI status (simplified - just check if workflows exist)
|
|
61
|
+
let ciStatus = 'unknown';
|
|
62
|
+
try {
|
|
63
|
+
const { data: workflows } = await octokit.actions.listRepoWorkflows({
|
|
64
|
+
owner,
|
|
65
|
+
repo,
|
|
66
|
+
per_page: 1,
|
|
67
|
+
});
|
|
68
|
+
if (workflows.total_count > 0) {
|
|
69
|
+
ciStatus = 'passing'; // Assume passing if workflows exist
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
catch (error) {
|
|
73
|
+
const errMsg = errorMessage(error);
|
|
74
|
+
warn(MODULE, `Failed to check CI status for ${owner}/${repo}: ${errMsg}. Defaulting to unknown.`);
|
|
75
|
+
}
|
|
76
|
+
return {
|
|
77
|
+
repo: `${owner}/${repo}`,
|
|
78
|
+
lastCommitAt,
|
|
79
|
+
daysSinceLastCommit,
|
|
80
|
+
openIssuesCount: repoData.open_issues_count,
|
|
81
|
+
avgIssueResponseDays: 0, // Would need more API calls to calculate
|
|
82
|
+
ciStatus,
|
|
83
|
+
isActive: daysSinceLastCommit < 30,
|
|
84
|
+
stargazersCount: repoData.stargazers_count,
|
|
85
|
+
forksCount: repoData.forks_count,
|
|
86
|
+
};
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
catch (error) {
|
|
90
|
+
const errMsg = errorMessage(error);
|
|
91
|
+
warn(MODULE, `Error checking project health for ${owner}/${repo}: ${errMsg}`);
|
|
92
|
+
return {
|
|
93
|
+
repo: `${owner}/${repo}`,
|
|
94
|
+
lastCommitAt: '',
|
|
95
|
+
daysSinceLastCommit: 999,
|
|
96
|
+
openIssuesCount: 0,
|
|
97
|
+
avgIssueResponseDays: 0,
|
|
98
|
+
ciStatus: 'unknown',
|
|
99
|
+
isActive: false,
|
|
100
|
+
checkFailed: true,
|
|
101
|
+
failureReason: errMsg,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// ── Contribution guidelines ──
|
|
106
|
+
/**
|
|
107
|
+
* Fetch and parse CONTRIBUTING.md (or variants) from a GitHub repo.
|
|
108
|
+
* Probes multiple paths in parallel: CONTRIBUTING.md, .github/CONTRIBUTING.md,
|
|
109
|
+
* docs/CONTRIBUTING.md, contributing.md. Results are cached for CACHE_TTL_MS.
|
|
110
|
+
*/
|
|
111
|
+
export async function fetchContributionGuidelines(octokit, owner, repo) {
|
|
112
|
+
const cacheKey = `${owner}/${repo}`;
|
|
113
|
+
// Check cache first
|
|
114
|
+
const cached = guidelinesCache.get(cacheKey);
|
|
115
|
+
if (cached && Date.now() - cached.fetchedAt < CACHE_TTL_MS) {
|
|
116
|
+
return cached.guidelines;
|
|
117
|
+
}
|
|
118
|
+
const filesToCheck = ['CONTRIBUTING.md', '.github/CONTRIBUTING.md', 'docs/CONTRIBUTING.md', 'contributing.md'];
|
|
119
|
+
// Probe all paths in parallel — take the first success in priority order
|
|
120
|
+
const results = await Promise.allSettled(filesToCheck.map((file) => octokit.repos.getContent({ owner, repo, path: file }).then(({ data }) => {
|
|
121
|
+
if ('content' in data) {
|
|
122
|
+
return Buffer.from(data.content, 'base64').toString('utf-8');
|
|
123
|
+
}
|
|
124
|
+
return null;
|
|
125
|
+
})));
|
|
126
|
+
for (let i = 0; i < results.length; i++) {
|
|
127
|
+
const result = results[i];
|
|
128
|
+
if (result.status === 'fulfilled' && result.value) {
|
|
129
|
+
const guidelines = parseContributionGuidelines(result.value);
|
|
130
|
+
guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
|
|
131
|
+
pruneCache();
|
|
132
|
+
return guidelines;
|
|
133
|
+
}
|
|
134
|
+
if (result.status === 'rejected') {
|
|
135
|
+
const msg = result.reason instanceof Error ? result.reason.message : String(result.reason);
|
|
136
|
+
if (!msg.includes('404') && !msg.includes('Not Found')) {
|
|
137
|
+
warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${msg}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Cache the negative result too and prune if needed
|
|
142
|
+
guidelinesCache.set(cacheKey, { guidelines: undefined, fetchedAt: Date.now() });
|
|
143
|
+
pruneCache();
|
|
144
|
+
return undefined;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Parse the raw content of a CONTRIBUTING.md file to extract structured guidelines:
|
|
148
|
+
* branch naming, commit format, test framework, linter, formatter, CLA requirement.
|
|
149
|
+
*/
|
|
150
|
+
export function parseContributionGuidelines(content) {
|
|
151
|
+
const guidelines = {
|
|
152
|
+
rawContent: content,
|
|
153
|
+
};
|
|
154
|
+
const lowerContent = content.toLowerCase();
|
|
155
|
+
// Detect branch naming conventions
|
|
156
|
+
if (lowerContent.includes('branch')) {
|
|
157
|
+
const branchMatch = content.match(/branch[^\n]*(?:named?|format|convention)[^\n]*[`"]([^`"]+)[`"]/i);
|
|
158
|
+
if (branchMatch) {
|
|
159
|
+
guidelines.branchNamingConvention = branchMatch[1];
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
// Detect commit message format
|
|
163
|
+
if (lowerContent.includes('conventional commit')) {
|
|
164
|
+
guidelines.commitMessageFormat = 'conventional commits';
|
|
165
|
+
}
|
|
166
|
+
else if (lowerContent.includes('commit message')) {
|
|
167
|
+
const commitMatch = content.match(/commit message[^\n]*[`"]([^`"]+)[`"]/i);
|
|
168
|
+
if (commitMatch) {
|
|
169
|
+
guidelines.commitMessageFormat = commitMatch[1];
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// Detect test framework
|
|
173
|
+
if (lowerContent.includes('jest'))
|
|
174
|
+
guidelines.testFramework = 'Jest';
|
|
175
|
+
else if (lowerContent.includes('rspec'))
|
|
176
|
+
guidelines.testFramework = 'RSpec';
|
|
177
|
+
else if (lowerContent.includes('pytest'))
|
|
178
|
+
guidelines.testFramework = 'pytest';
|
|
179
|
+
else if (lowerContent.includes('mocha'))
|
|
180
|
+
guidelines.testFramework = 'Mocha';
|
|
181
|
+
// Detect linter
|
|
182
|
+
if (lowerContent.includes('eslint'))
|
|
183
|
+
guidelines.linter = 'ESLint';
|
|
184
|
+
else if (lowerContent.includes('rubocop'))
|
|
185
|
+
guidelines.linter = 'RuboCop';
|
|
186
|
+
else if (lowerContent.includes('prettier'))
|
|
187
|
+
guidelines.formatter = 'Prettier';
|
|
188
|
+
// Detect CLA requirement
|
|
189
|
+
if (lowerContent.includes('cla') || lowerContent.includes('contributor license agreement')) {
|
|
190
|
+
guidelines.claRequired = true;
|
|
191
|
+
}
|
|
192
|
+
return guidelines;
|
|
193
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search Phases — utilities and infrastructure for multi-phase issue search.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from issue-discovery.ts (#621) to isolate search helpers,
|
|
5
|
+
* caching, spam-filtering, and batched repo search logic.
|
|
6
|
+
*/
|
|
7
|
+
import { Octokit } from '@octokit/rest';
|
|
8
|
+
import { type SearchPriority, type IssueCandidate, type IssueScope } from './types.js';
|
|
9
|
+
import { type GitHubSearchItem } from './issue-filtering.js';
|
|
10
|
+
import { IssueVetter } from './issue-vetting.js';
|
|
11
|
+
/** Build a GitHub Search API label filter from a list of labels. */
|
|
12
|
+
export declare function buildLabelQuery(labels: string[]): string;
|
|
13
|
+
/** Resolve scope tiers into a flat label list, merged with custom labels. */
|
|
14
|
+
export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
|
|
15
|
+
/** Round-robin interleave multiple arrays. */
|
|
16
|
+
export declare function interleaveArrays<T>(arrays: T[][]): T[];
|
|
17
|
+
/** Split repos into batches of the specified size. */
|
|
18
|
+
export declare function batchRepos(repos: string[], batchSize: number): string[][];
|
|
19
|
+
/**
|
|
20
|
+
* Wrap octokit.search.issuesAndPullRequests with time-based caching.
|
|
21
|
+
* Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
|
|
22
|
+
* without consuming GitHub API rate limit points.
|
|
23
|
+
*/
|
|
24
|
+
export declare function cachedSearchIssues(octokit: Octokit, params: {
|
|
25
|
+
q: string;
|
|
26
|
+
sort: 'created' | 'updated' | 'comments' | 'reactions' | 'interactions';
|
|
27
|
+
order: 'asc' | 'desc';
|
|
28
|
+
per_page: number;
|
|
29
|
+
}): Promise<{
|
|
30
|
+
total_count: number;
|
|
31
|
+
items: GitHubSearchItem[];
|
|
32
|
+
}>;
|
|
33
|
+
/**
|
|
34
|
+
* Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
|
|
35
|
+
* Used by Phases 2 and 3 to convert raw search results into vetted candidates.
|
|
36
|
+
*/
|
|
37
|
+
export declare function filterVetAndScore(vetter: IssueVetter, items: GitHubSearchItem[], filterIssues: (items: GitHubSearchItem[]) => GitHubSearchItem[], excludedRepoSets: Set<string>[], remainingNeeded: number, minStars: number, phaseLabel: string): Promise<{
|
|
38
|
+
candidates: IssueCandidate[];
|
|
39
|
+
allVetFailed: boolean;
|
|
40
|
+
rateLimitHit: boolean;
|
|
41
|
+
}>;
|
|
42
|
+
/**
|
|
43
|
+
* Search for issues within specific repos using batched queries.
|
|
44
|
+
*
|
|
45
|
+
* To avoid GitHub's secondary rate limit (30 requests/minute), we batch
|
|
46
|
+
* multiple repos into a single search query using OR syntax:
|
|
47
|
+
* repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
|
|
48
|
+
*
|
|
49
|
+
* This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE).
|
|
50
|
+
*/
|
|
51
|
+
export declare function searchInRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], baseQuery: string, maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
|
|
52
|
+
candidates: IssueCandidate[];
|
|
53
|
+
allBatchesFailed: boolean;
|
|
54
|
+
rateLimitHit: boolean;
|
|
55
|
+
}>;
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search Phases — utilities and infrastructure for multi-phase issue search.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from issue-discovery.ts (#621) to isolate search helpers,
|
|
5
|
+
* caching, spam-filtering, and batched repo search logic.
|
|
6
|
+
*/
|
|
7
|
+
import { SCOPE_LABELS } from './types.js';
|
|
8
|
+
import { errorMessage, isRateLimitError } from './errors.js';
|
|
9
|
+
import { debug, warn } from './logger.js';
|
|
10
|
+
import { getHttpCache, cachedTimeBased } from './http-cache.js';
|
|
11
|
+
import { detectLabelFarmingRepos } from './issue-filtering.js';
|
|
12
|
+
const MODULE = 'search-phases';
|
|
13
|
+
// ── Pure utilities ──
|
|
14
|
+
/** Build a GitHub Search API label filter from a list of labels. */
|
|
15
|
+
export function buildLabelQuery(labels) {
|
|
16
|
+
if (labels.length === 0)
|
|
17
|
+
return '';
|
|
18
|
+
if (labels.length === 1)
|
|
19
|
+
return `label:"${labels[0]}"`;
|
|
20
|
+
return `(${labels.map((l) => `label:"${l}"`).join(' OR ')})`;
|
|
21
|
+
}
|
|
22
|
+
/** Resolve scope tiers into a flat label list, merged with custom labels. */
|
|
23
|
+
export function buildEffectiveLabels(scopes, customLabels) {
|
|
24
|
+
const labels = new Set();
|
|
25
|
+
for (const scope of scopes) {
|
|
26
|
+
for (const label of SCOPE_LABELS[scope] ?? [])
|
|
27
|
+
labels.add(label);
|
|
28
|
+
}
|
|
29
|
+
for (const label of customLabels)
|
|
30
|
+
labels.add(label);
|
|
31
|
+
return [...labels];
|
|
32
|
+
}
|
|
33
|
+
/** Round-robin interleave multiple arrays. */
|
|
34
|
+
export function interleaveArrays(arrays) {
|
|
35
|
+
const result = [];
|
|
36
|
+
const maxLen = Math.max(...arrays.map((a) => a.length), 0);
|
|
37
|
+
for (let i = 0; i < maxLen; i++) {
|
|
38
|
+
for (const arr of arrays) {
|
|
39
|
+
if (i < arr.length)
|
|
40
|
+
result.push(arr[i]);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return result;
|
|
44
|
+
}
|
|
45
|
+
/** Split repos into batches of the specified size. */
|
|
46
|
+
export function batchRepos(repos, batchSize) {
|
|
47
|
+
const batches = [];
|
|
48
|
+
for (let i = 0; i < repos.length; i += batchSize) {
|
|
49
|
+
batches.push(repos.slice(i, i + batchSize));
|
|
50
|
+
}
|
|
51
|
+
return batches;
|
|
52
|
+
}
|
|
53
|
+
// ── Search caching ──
|
|
54
|
+
/** TTL for cached search API results (15 minutes). */
|
|
55
|
+
const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
|
|
56
|
+
/**
|
|
57
|
+
* Wrap octokit.search.issuesAndPullRequests with time-based caching.
|
|
58
|
+
* Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
|
|
59
|
+
* without consuming GitHub API rate limit points.
|
|
60
|
+
*/
|
|
61
|
+
export async function cachedSearchIssues(octokit, params) {
|
|
62
|
+
const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
|
|
63
|
+
return cachedTimeBased(getHttpCache(), cacheKey, SEARCH_CACHE_TTL_MS, async () => {
|
|
64
|
+
const { data } = await octokit.search.issuesAndPullRequests(params);
|
|
65
|
+
return data;
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
// ── Search infrastructure ──
|
|
69
|
+
/**
|
|
70
|
+
* Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
|
|
71
|
+
* Used by Phases 2 and 3 to convert raw search results into vetted candidates.
|
|
72
|
+
*/
|
|
73
|
+
export async function filterVetAndScore(vetter, items, filterIssues, excludedRepoSets, remainingNeeded, minStars, phaseLabel) {
|
|
74
|
+
const spamRepos = detectLabelFarmingRepos(items);
|
|
75
|
+
if (spamRepos.size > 0) {
|
|
76
|
+
const spamCount = items.filter((i) => spamRepos.has(i.repository_url.split('/').slice(-2).join('/'))).length;
|
|
77
|
+
debug(MODULE, `[SPAM_FILTER] Filtered ${spamCount} issues from ${spamRepos.size} label-farming repos: ${[...spamRepos].join(', ')}`);
|
|
78
|
+
}
|
|
79
|
+
const itemsToVet = filterIssues(items)
|
|
80
|
+
.filter((item) => {
|
|
81
|
+
const repoFullName = item.repository_url.split('/').slice(-2).join('/');
|
|
82
|
+
if (spamRepos.has(repoFullName))
|
|
83
|
+
return false;
|
|
84
|
+
return excludedRepoSets.every((s) => !s.has(repoFullName));
|
|
85
|
+
})
|
|
86
|
+
.slice(0, remainingNeeded * 2);
|
|
87
|
+
if (itemsToVet.length === 0) {
|
|
88
|
+
debug(MODULE, `[${phaseLabel}] All ${items.length} items filtered before vetting`);
|
|
89
|
+
return { candidates: [], allVetFailed: false, rateLimitHit: false };
|
|
90
|
+
}
|
|
91
|
+
const { candidates: results, allFailed: allVetFailed, rateLimitHit, } = await vetter.vetIssuesParallel(itemsToVet.map((i) => i.html_url), remainingNeeded, 'normal');
|
|
92
|
+
const starFiltered = results.filter((c) => {
|
|
93
|
+
if (c.projectHealth.checkFailed)
|
|
94
|
+
return true;
|
|
95
|
+
const stars = c.projectHealth.stargazersCount ?? 0;
|
|
96
|
+
return stars >= minStars;
|
|
97
|
+
});
|
|
98
|
+
const starFilteredCount = results.length - starFiltered.length;
|
|
99
|
+
if (starFilteredCount > 0) {
|
|
100
|
+
debug(MODULE, `[STAR_FILTER] Filtered ${starFilteredCount} ${phaseLabel} candidates below ${minStars} stars`);
|
|
101
|
+
}
|
|
102
|
+
return { candidates: starFiltered, allVetFailed, rateLimitHit };
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Search for issues within specific repos using batched queries.
|
|
106
|
+
*
|
|
107
|
+
* To avoid GitHub's secondary rate limit (30 requests/minute), we batch
|
|
108
|
+
* multiple repos into a single search query using OR syntax:
|
|
109
|
+
* repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
|
|
110
|
+
*
|
|
111
|
+
* This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE).
|
|
112
|
+
*/
|
|
113
|
+
export async function searchInRepos(octokit, vetter, repos, baseQuery, maxResults, priority, filterFn) {
|
|
114
|
+
const candidates = [];
|
|
115
|
+
const BATCH_SIZE = 5;
|
|
116
|
+
const batches = batchRepos(repos, BATCH_SIZE);
|
|
117
|
+
let failedBatches = 0;
|
|
118
|
+
let rateLimitFailures = 0;
|
|
119
|
+
for (const batch of batches) {
|
|
120
|
+
if (candidates.length >= maxResults)
|
|
121
|
+
break;
|
|
122
|
+
try {
|
|
123
|
+
// Build repo filter: (repo:a OR repo:b OR repo:c)
|
|
124
|
+
const repoFilter = batch.map((r) => `repo:${r}`).join(' OR ');
|
|
125
|
+
const batchQuery = `${baseQuery} (${repoFilter})`;
|
|
126
|
+
const data = await cachedSearchIssues(octokit, {
|
|
127
|
+
q: batchQuery,
|
|
128
|
+
sort: 'created',
|
|
129
|
+
order: 'desc',
|
|
130
|
+
per_page: Math.min(30, (maxResults - candidates.length) * 3),
|
|
131
|
+
});
|
|
132
|
+
if (data.items.length > 0) {
|
|
133
|
+
const filtered = filterFn(data.items);
|
|
134
|
+
const remainingNeeded = maxResults - candidates.length;
|
|
135
|
+
const { candidates: vetted } = await vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, priority);
|
|
136
|
+
candidates.push(...vetted);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
catch (error) {
|
|
140
|
+
failedBatches++;
|
|
141
|
+
if (isRateLimitError(error)) {
|
|
142
|
+
rateLimitFailures++;
|
|
143
|
+
}
|
|
144
|
+
const batchReposStr = batch.join(', ');
|
|
145
|
+
warn(MODULE, `Error searching issues in batch [${batchReposStr}]:`, errorMessage(error));
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const allBatchesFailed = failedBatches === batches.length && batches.length > 0;
|
|
149
|
+
const rateLimitHit = rateLimitFailures > 0;
|
|
150
|
+
if (allBatchesFailed) {
|
|
151
|
+
warn(MODULE, `All ${batches.length} batch(es) failed for ${priority} phase. ` +
|
|
152
|
+
`This may indicate a systemic issue (rate limit, auth, network).`);
|
|
153
|
+
}
|
|
154
|
+
return { candidates, allBatchesFailed, rateLimitHit };
|
|
155
|
+
}
|
package/dist/core/state.d.ts
CHANGED
|
@@ -18,6 +18,8 @@ export declare class StateManager {
|
|
|
18
18
|
private state;
|
|
19
19
|
private readonly inMemoryOnly;
|
|
20
20
|
private lastLoadedMtimeMs;
|
|
21
|
+
private _batching;
|
|
22
|
+
private _batchDirty;
|
|
21
23
|
/**
|
|
22
24
|
* Create a new StateManager instance.
|
|
23
25
|
* @param inMemoryOnly - When true, state is held only in memory and never read from or
|
|
@@ -25,6 +27,15 @@ export declare class StateManager {
|
|
|
25
27
|
* Defaults to false (normal persistent mode).
|
|
26
28
|
*/
|
|
27
29
|
constructor(inMemoryOnly?: boolean);
|
|
30
|
+
/**
|
|
31
|
+
* Execute multiple mutations as a single batch, deferring disk I/O until the
|
|
32
|
+
* batch completes. Nested `batch()` calls are flattened — only the outermost saves.
|
|
33
|
+
*/
|
|
34
|
+
batch(fn: () => void): void;
|
|
35
|
+
/**
|
|
36
|
+
* Auto-persist after a mutation. Inside a `batch()`, defers to the batch boundary.
|
|
37
|
+
*/
|
|
38
|
+
private autoSave;
|
|
28
39
|
/**
|
|
29
40
|
* Check if initial setup has been completed.
|
|
30
41
|
*/
|