@oss-autopilot/core 0.54.0 → 0.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/cli.bundle.cjs +63 -63
  2. package/dist/commands/comments.js +0 -1
  3. package/dist/commands/config.js +45 -5
  4. package/dist/commands/daily.js +190 -157
  5. package/dist/commands/dashboard-data.js +37 -30
  6. package/dist/commands/dashboard-server.js +0 -1
  7. package/dist/commands/dismiss.js +0 -6
  8. package/dist/commands/init.js +0 -1
  9. package/dist/commands/local-repos.js +1 -2
  10. package/dist/commands/move.js +12 -11
  11. package/dist/commands/setup.d.ts +2 -1
  12. package/dist/commands/setup.js +166 -130
  13. package/dist/commands/shelve.js +10 -10
  14. package/dist/commands/startup.js +30 -14
  15. package/dist/core/ci-analysis.d.ts +6 -0
  16. package/dist/core/ci-analysis.js +89 -12
  17. package/dist/core/daily-logic.js +24 -33
  18. package/dist/core/index.d.ts +2 -1
  19. package/dist/core/index.js +2 -1
  20. package/dist/core/issue-discovery.d.ts +7 -44
  21. package/dist/core/issue-discovery.js +83 -188
  22. package/dist/core/issue-eligibility.d.ts +35 -0
  23. package/dist/core/issue-eligibility.js +126 -0
  24. package/dist/core/issue-vetting.d.ts +6 -21
  25. package/dist/core/issue-vetting.js +15 -279
  26. package/dist/core/pr-monitor.d.ts +7 -12
  27. package/dist/core/pr-monitor.js +14 -80
  28. package/dist/core/repo-health.d.ts +24 -0
  29. package/dist/core/repo-health.js +193 -0
  30. package/dist/core/search-phases.d.ts +55 -0
  31. package/dist/core/search-phases.js +155 -0
  32. package/dist/core/state.d.ts +11 -0
  33. package/dist/core/state.js +63 -4
  34. package/dist/core/types.d.ts +8 -1
  35. package/dist/core/types.js +7 -0
  36. package/dist/formatters/json.d.ts +1 -1
  37. package/package.json +1 -1
@@ -4,7 +4,7 @@
4
4
  * Score methods still write to state.
5
5
  *
6
6
  * Decomposed into focused modules (#263):
7
- * - ci-analysis.ts: CI check classification and analysis
7
+ * - ci-analysis.ts: CI status fetching, check classification and analysis
8
8
  * - review-analysis.ts: Review decision and comment detection
9
9
  * - checklist-analysis.ts: PR body checklist analysis
10
10
  * - maintainer-analysis.ts: Maintainer action hint extraction
@@ -21,8 +21,7 @@ import { ConfigurationError, ValidationError, errorMessage, getHttpStatusCode }
21
21
  import { paginateAll } from './pagination.js';
22
22
  import { debug, warn, timed } from './logger.js';
23
23
  import { getHttpCache, cachedRequest } from './http-cache.js';
24
- // Extracted modules
25
- import { classifyFailingChecks, analyzeCheckRuns, analyzeCombinedStatus, mergeStatuses } from './ci-analysis.js';
24
+ import { classifyFailingChecks, getCIStatus } from './ci-analysis.js';
26
25
  import { determineReviewDecision, getLatestChangesRequestedDate, checkUnrespondedComments, } from './review-analysis.js';
27
26
  import { analyzeChecklist } from './checklist-analysis.js';
28
27
  import { extractMaintainerActionHints } from './maintainer-analysis.js';
@@ -30,9 +29,16 @@ import { computeDisplayLabel } from './display-utils.js';
30
29
  import { fetchUserMergedPRCounts as fetchUserMergedPRCountsImpl, fetchUserClosedPRCounts as fetchUserClosedPRCountsImpl, fetchRecentlyClosedPRs as fetchRecentlyClosedPRsImpl, fetchRecentlyMergedPRs as fetchRecentlyMergedPRsImpl, } from './github-stats.js';
31
30
  // Re-export so existing consumers can still import from pr-monitor
32
31
  export { computeDisplayLabel } from './display-utils.js';
33
- export { classifyCICheck, classifyFailingChecks } from './ci-analysis.js';
32
+ export { classifyCICheck, classifyFailingChecks, getCIStatus } from './ci-analysis.js';
34
33
  export { isConditionalChecklistItem } from './checklist-analysis.js';
35
34
  export { determineStatus } from './status-determination.js';
35
+ /**
36
+ * Check if a PR has a merge conflict based on GitHub's mergeable flag and mergeable_state.
37
+ * Returns true when mergeable is explicitly false or the mergeable_state is 'dirty'.
38
+ */
39
+ export function hasMergeConflict(mergeable, mergeableState) {
40
+ return mergeable === false || mergeableState === 'dirty';
41
+ }
36
42
  const MODULE = 'pr-monitor';
37
43
  const MAX_CONCURRENT_REQUESTS = DEFAULT_CONCURRENCY;
38
44
  export class PRMonitor {
@@ -167,14 +173,14 @@ export class PRMonitor {
167
173
  // Determine review decision (delegated to review-analysis module)
168
174
  const reviewDecision = determineReviewDecision(reviews);
169
175
  // Check for merge conflict
170
- const hasMergeConflict = this.hasMergeConflict(ghPR.mergeable, ghPR.mergeable_state);
176
+ const mergeConflict = hasMergeConflict(ghPR.mergeable, ghPR.mergeable_state);
171
177
  // Check if there's an unresponded maintainer comment (delegated to review-analysis module)
172
178
  const { hasUnrespondedComment, lastMaintainerComment } = checkUnrespondedComments(comments, reviews, reviewComments, config.githubUsername);
173
179
  // Fetch CI status and (conditionally) latest commit date in parallel
174
180
  // We need the commit date when hasUnrespondedComment is true (to distinguish
175
181
  // "needs_response" from "waiting_on_maintainer") OR when reviewDecision is "changes_requested"
176
182
  // (to detect needs_changes: review requested changes but no new commits pushed)
177
- const ciPromise = this.getCIStatus(owner, repo, ghPR.head.sha);
183
+ const ciPromise = getCIStatus(this.octokit, owner, repo, ghPR.head.sha);
178
184
  const needCommitDate = hasUnrespondedComment || reviewDecision === 'changes_requested';
179
185
  const commitInfoPromise = needCommitDate
180
186
  ? this.octokit.repos
@@ -222,7 +228,7 @@ export class PRMonitor {
222
228
  const hasActionableCIFailure = ciStatus === 'failing' && classifiedChecks.some((c) => c.category === 'actionable');
223
229
  const { status, actionReason, waitReason, stalenessTier, actionReasons } = determineStatus({
224
230
  ciStatus,
225
- hasMergeConflict,
231
+ hasMergeConflict: mergeConflict,
226
232
  hasUnrespondedComment,
227
233
  hasIncompleteChecklist,
228
234
  reviewDecision,
@@ -253,7 +259,7 @@ export class PRMonitor {
253
259
  ciStatus,
254
260
  failingCheckNames,
255
261
  classifiedChecks,
256
- hasMergeConflict,
262
+ hasMergeConflict: mergeConflict,
257
263
  reviewDecision,
258
264
  hasUnrespondedComment,
259
265
  lastMaintainerComment,
@@ -279,78 +285,6 @@ export class PRMonitor {
279
285
  pr.displayDescription = displayDescription;
280
286
  return pr;
281
287
  }
282
- /**
283
- * Check if PR has merge conflict
284
- */
285
- hasMergeConflict(mergeable, mergeableState) {
286
- return mergeable === false || mergeableState === 'dirty';
287
- }
288
- /**
289
- * Get CI status from combined status API and check runs.
290
- * Returns status and names of failing checks for diagnostics.
291
- * Delegates analysis to ci-analysis module.
292
- */
293
- async getCIStatus(owner, repo, sha) {
294
- if (!sha)
295
- return { status: 'unknown', failingCheckNames: [], failingCheckConclusions: new Map() };
296
- try {
297
- // Fetch both combined status and check runs in parallel
298
- const [statusResponse, checksResponse] = await Promise.all([
299
- this.octokit.repos.getCombinedStatusForRef({ owner, repo, ref: sha }),
300
- // 404 is expected for repos without check runs configured; log other errors for debugging
301
- this.octokit.checks.listForRef({ owner, repo, ref: sha }).catch((err) => {
302
- const status = getHttpStatusCode(err);
303
- // Rate limit errors must propagate — matches listReviewComments pattern (#481)
304
- if (status === 429)
305
- throw err;
306
- if (status === 403) {
307
- const msg = errorMessage(err).toLowerCase();
308
- if (msg.includes('rate limit') || msg.includes('abuse detection'))
309
- throw err;
310
- }
311
- if (status === 404) {
312
- debug('pr-monitor', `Check runs 404 for ${owner}/${repo}@${sha.slice(0, 7)} (no checks configured)`);
313
- }
314
- else {
315
- warn('pr-monitor', `Non-404 error fetching check runs for ${owner}/${repo}@${sha.slice(0, 7)}: ${status ?? err}`);
316
- }
317
- return null;
318
- }),
319
- ]);
320
- const combinedStatus = statusResponse.data;
321
- const allCheckRuns = checksResponse?.data?.check_runs || [];
322
- // Deduplicate check runs by name, keeping only the most recent run per unique name.
323
- // GitHub returns all historical runs (including re-runs), so without deduplication
324
- // a superseded failure will incorrectly flag the PR as failing even after a re-run passes.
325
- const latestCheckRunsByName = new Map();
326
- for (const check of allCheckRuns) {
327
- const existing = latestCheckRunsByName.get(check.name);
328
- if (!existing || new Date(check.started_at ?? 0) > new Date(existing.started_at ?? 0)) {
329
- latestCheckRunsByName.set(check.name, check);
330
- }
331
- }
332
- const checkRuns = [...latestCheckRunsByName.values()];
333
- // Delegate analysis to ci-analysis module
334
- const checkRunAnalysis = analyzeCheckRuns(checkRuns);
335
- const combinedAnalysis = analyzeCombinedStatus(combinedStatus);
336
- return mergeStatuses(checkRunAnalysis, combinedAnalysis, checkRuns.length);
337
- }
338
- catch (error) {
339
- const statusCode = getHttpStatusCode(error);
340
- if (statusCode === 401 || statusCode === 403 || statusCode === 429) {
341
- throw error;
342
- }
343
- else if (statusCode === 404) {
344
- // Repo might not have CI configured, this is normal
345
- debug('pr-monitor', `CI check 404 for ${owner}/${repo} (no CI configured)`);
346
- return { status: 'unknown', failingCheckNames: [], failingCheckConclusions: new Map() };
347
- }
348
- else {
349
- warn('pr-monitor', `Failed to check CI for ${owner}/${repo}@${sha.slice(0, 7)}: ${errorMessage(error)}`);
350
- }
351
- return { status: 'unknown', failingCheckNames: [], failingCheckConclusions: new Map() };
352
- }
353
- }
354
288
  /**
355
289
  * Fetch merged PR counts and latest merge dates per repository for the configured user.
356
290
  * Delegates to github-stats module.
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Repo Health — project health checks and contribution guidelines fetching.
3
+ *
4
+ * Extracted from issue-vetting.ts (#621) to isolate repo-level checks
5
+ * from issue-level eligibility logic.
6
+ */
7
+ import { Octokit } from '@octokit/rest';
8
+ import { type ContributionGuidelines, type ProjectHealth } from './types.js';
9
+ /**
10
+ * Check the health of a GitHub project: recent commits, CI status, star/fork counts.
11
+ * Results are cached for HEALTH_CACHE_TTL_MS (4 hours).
12
+ */
13
+ export declare function checkProjectHealth(octokit: Octokit, owner: string, repo: string): Promise<ProjectHealth>;
14
+ /**
15
+ * Fetch and parse CONTRIBUTING.md (or variants) from a GitHub repo.
16
+ * Probes multiple paths in parallel: CONTRIBUTING.md, .github/CONTRIBUTING.md,
17
+ * docs/CONTRIBUTING.md, contributing.md. Results are cached for CACHE_TTL_MS.
18
+ */
19
+ export declare function fetchContributionGuidelines(octokit: Octokit, owner: string, repo: string): Promise<ContributionGuidelines | undefined>;
20
+ /**
21
+ * Parse the raw content of a CONTRIBUTING.md file to extract structured guidelines:
22
+ * branch naming, commit format, test framework, linter, formatter, CLA requirement.
23
+ */
24
+ export declare function parseContributionGuidelines(content: string): ContributionGuidelines;
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Repo Health — project health checks and contribution guidelines fetching.
3
+ *
4
+ * Extracted from issue-vetting.ts (#621) to isolate repo-level checks
5
+ * from issue-level eligibility logic.
6
+ */
7
+ import { daysBetween } from './utils.js';
8
+ import { errorMessage } from './errors.js';
9
+ import { warn } from './logger.js';
10
+ import { getHttpCache, cachedRequest, cachedTimeBased } from './http-cache.js';
11
+ const MODULE = 'repo-health';
12
+ // ── Cache for contribution guidelines ──
13
+ const guidelinesCache = new Map();
14
+ /** TTL for cached contribution guidelines (1 hour). */
15
+ const CACHE_TTL_MS = 60 * 60 * 1000;
16
+ /** TTL for cached project health results (4 hours). Health data (stars, commits, CI) changes slowly. */
17
+ const HEALTH_CACHE_TTL_MS = 4 * 60 * 60 * 1000;
18
+ /** Max entries in the guidelines cache before pruning. */
19
+ const CACHE_MAX_SIZE = 100;
20
+ /** Remove expired and excess entries from the guidelines cache. */
21
+ function pruneCache() {
22
+ const now = Date.now();
23
+ // First, remove expired entries (older than CACHE_TTL_MS)
24
+ for (const [key, value] of guidelinesCache.entries()) {
25
+ if (now - value.fetchedAt > CACHE_TTL_MS) {
26
+ guidelinesCache.delete(key);
27
+ }
28
+ }
29
+ // Then, if still over size limit, remove oldest entries
30
+ if (guidelinesCache.size > CACHE_MAX_SIZE) {
31
+ const entries = Array.from(guidelinesCache.entries()).sort((a, b) => a[1].fetchedAt - b[1].fetchedAt);
32
+ const toRemove = entries.slice(0, guidelinesCache.size - CACHE_MAX_SIZE);
33
+ for (const [key] of toRemove) {
34
+ guidelinesCache.delete(key);
35
+ }
36
+ }
37
+ }
38
+ // ── Project health ──
39
+ /**
40
+ * Check the health of a GitHub project: recent commits, CI status, star/fork counts.
41
+ * Results are cached for HEALTH_CACHE_TTL_MS (4 hours).
42
+ */
43
+ export async function checkProjectHealth(octokit, owner, repo) {
44
+ const cache = getHttpCache();
45
+ const healthCacheKey = `health:${owner}/${repo}`;
46
+ try {
47
+ return await cachedTimeBased(cache, healthCacheKey, HEALTH_CACHE_TTL_MS, async () => {
48
+ // Get repo info (with ETag caching — repo metadata changes infrequently)
49
+ const url = `/repos/${owner}/${repo}`;
50
+ const repoData = await cachedRequest(cache, url, (headers) => octokit.repos.get({ owner, repo, headers }));
51
+ // Get recent commits
52
+ const { data: commits } = await octokit.repos.listCommits({
53
+ owner,
54
+ repo,
55
+ per_page: 1,
56
+ });
57
+ const lastCommit = commits[0];
58
+ const lastCommitAt = lastCommit?.commit?.author?.date || repoData.pushed_at;
59
+ const daysSinceLastCommit = daysBetween(new Date(lastCommitAt));
60
+ // Check CI status (simplified - just check if workflows exist)
61
+ let ciStatus = 'unknown';
62
+ try {
63
+ const { data: workflows } = await octokit.actions.listRepoWorkflows({
64
+ owner,
65
+ repo,
66
+ per_page: 1,
67
+ });
68
+ if (workflows.total_count > 0) {
69
+ ciStatus = 'passing'; // Assume passing if workflows exist
70
+ }
71
+ }
72
+ catch (error) {
73
+ const errMsg = errorMessage(error);
74
+ warn(MODULE, `Failed to check CI status for ${owner}/${repo}: ${errMsg}. Defaulting to unknown.`);
75
+ }
76
+ return {
77
+ repo: `${owner}/${repo}`,
78
+ lastCommitAt,
79
+ daysSinceLastCommit,
80
+ openIssuesCount: repoData.open_issues_count,
81
+ avgIssueResponseDays: 0, // Would need more API calls to calculate
82
+ ciStatus,
83
+ isActive: daysSinceLastCommit < 30,
84
+ stargazersCount: repoData.stargazers_count,
85
+ forksCount: repoData.forks_count,
86
+ };
87
+ });
88
+ }
89
+ catch (error) {
90
+ const errMsg = errorMessage(error);
91
+ warn(MODULE, `Error checking project health for ${owner}/${repo}: ${errMsg}`);
92
+ return {
93
+ repo: `${owner}/${repo}`,
94
+ lastCommitAt: '',
95
+ daysSinceLastCommit: 999,
96
+ openIssuesCount: 0,
97
+ avgIssueResponseDays: 0,
98
+ ciStatus: 'unknown',
99
+ isActive: false,
100
+ checkFailed: true,
101
+ failureReason: errMsg,
102
+ };
103
+ }
104
+ }
105
+ // ── Contribution guidelines ──
106
+ /**
107
+ * Fetch and parse CONTRIBUTING.md (or variants) from a GitHub repo.
108
+ * Probes multiple paths in parallel: CONTRIBUTING.md, .github/CONTRIBUTING.md,
109
+ * docs/CONTRIBUTING.md, contributing.md. Results are cached for CACHE_TTL_MS.
110
+ */
111
+ export async function fetchContributionGuidelines(octokit, owner, repo) {
112
+ const cacheKey = `${owner}/${repo}`;
113
+ // Check cache first
114
+ const cached = guidelinesCache.get(cacheKey);
115
+ if (cached && Date.now() - cached.fetchedAt < CACHE_TTL_MS) {
116
+ return cached.guidelines;
117
+ }
118
+ const filesToCheck = ['CONTRIBUTING.md', '.github/CONTRIBUTING.md', 'docs/CONTRIBUTING.md', 'contributing.md'];
119
+ // Probe all paths in parallel — take the first success in priority order
120
+ const results = await Promise.allSettled(filesToCheck.map((file) => octokit.repos.getContent({ owner, repo, path: file }).then(({ data }) => {
121
+ if ('content' in data) {
122
+ return Buffer.from(data.content, 'base64').toString('utf-8');
123
+ }
124
+ return null;
125
+ })));
126
+ for (let i = 0; i < results.length; i++) {
127
+ const result = results[i];
128
+ if (result.status === 'fulfilled' && result.value) {
129
+ const guidelines = parseContributionGuidelines(result.value);
130
+ guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
131
+ pruneCache();
132
+ return guidelines;
133
+ }
134
+ if (result.status === 'rejected') {
135
+ const msg = result.reason instanceof Error ? result.reason.message : String(result.reason);
136
+ if (!msg.includes('404') && !msg.includes('Not Found')) {
137
+ warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${msg}`);
138
+ }
139
+ }
140
+ }
141
+ // Cache the negative result too and prune if needed
142
+ guidelinesCache.set(cacheKey, { guidelines: undefined, fetchedAt: Date.now() });
143
+ pruneCache();
144
+ return undefined;
145
+ }
146
+ /**
147
+ * Parse the raw content of a CONTRIBUTING.md file to extract structured guidelines:
148
+ * branch naming, commit format, test framework, linter, formatter, CLA requirement.
149
+ */
150
+ export function parseContributionGuidelines(content) {
151
+ const guidelines = {
152
+ rawContent: content,
153
+ };
154
+ const lowerContent = content.toLowerCase();
155
+ // Detect branch naming conventions
156
+ if (lowerContent.includes('branch')) {
157
+ const branchMatch = content.match(/branch[^\n]*(?:named?|format|convention)[^\n]*[`"]([^`"]+)[`"]/i);
158
+ if (branchMatch) {
159
+ guidelines.branchNamingConvention = branchMatch[1];
160
+ }
161
+ }
162
+ // Detect commit message format
163
+ if (lowerContent.includes('conventional commit')) {
164
+ guidelines.commitMessageFormat = 'conventional commits';
165
+ }
166
+ else if (lowerContent.includes('commit message')) {
167
+ const commitMatch = content.match(/commit message[^\n]*[`"]([^`"]+)[`"]/i);
168
+ if (commitMatch) {
169
+ guidelines.commitMessageFormat = commitMatch[1];
170
+ }
171
+ }
172
+ // Detect test framework
173
+ if (lowerContent.includes('jest'))
174
+ guidelines.testFramework = 'Jest';
175
+ else if (lowerContent.includes('rspec'))
176
+ guidelines.testFramework = 'RSpec';
177
+ else if (lowerContent.includes('pytest'))
178
+ guidelines.testFramework = 'pytest';
179
+ else if (lowerContent.includes('mocha'))
180
+ guidelines.testFramework = 'Mocha';
181
+ // Detect linter
182
+ if (lowerContent.includes('eslint'))
183
+ guidelines.linter = 'ESLint';
184
+ else if (lowerContent.includes('rubocop'))
185
+ guidelines.linter = 'RuboCop';
186
+ else if (lowerContent.includes('prettier'))
187
+ guidelines.formatter = 'Prettier';
188
+ // Detect CLA requirement
189
+ if (lowerContent.includes('cla') || lowerContent.includes('contributor license agreement')) {
190
+ guidelines.claRequired = true;
191
+ }
192
+ return guidelines;
193
+ }
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Search Phases — utilities and infrastructure for multi-phase issue search.
3
+ *
4
+ * Extracted from issue-discovery.ts (#621) to isolate search helpers,
5
+ * caching, spam-filtering, and batched repo search logic.
6
+ */
7
+ import { Octokit } from '@octokit/rest';
8
+ import { type SearchPriority, type IssueCandidate, type IssueScope } from './types.js';
9
+ import { type GitHubSearchItem } from './issue-filtering.js';
10
+ import { IssueVetter } from './issue-vetting.js';
11
+ /** Build a GitHub Search API label filter from a list of labels. */
12
+ export declare function buildLabelQuery(labels: string[]): string;
13
+ /** Resolve scope tiers into a flat label list, merged with custom labels. */
14
+ export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
15
+ /** Round-robin interleave multiple arrays. */
16
+ export declare function interleaveArrays<T>(arrays: T[][]): T[];
17
+ /** Split repos into batches of the specified size. */
18
+ export declare function batchRepos(repos: string[], batchSize: number): string[][];
19
+ /**
20
+ * Wrap octokit.search.issuesAndPullRequests with time-based caching.
21
+ * Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
22
+ * without consuming GitHub API rate limit points.
23
+ */
24
+ export declare function cachedSearchIssues(octokit: Octokit, params: {
25
+ q: string;
26
+ sort: 'created' | 'updated' | 'comments' | 'reactions' | 'interactions';
27
+ order: 'asc' | 'desc';
28
+ per_page: number;
29
+ }): Promise<{
30
+ total_count: number;
31
+ items: GitHubSearchItem[];
32
+ }>;
33
+ /**
34
+ * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
35
+ * Used by Phases 2 and 3 to convert raw search results into vetted candidates.
36
+ */
37
+ export declare function filterVetAndScore(vetter: IssueVetter, items: GitHubSearchItem[], filterIssues: (items: GitHubSearchItem[]) => GitHubSearchItem[], excludedRepoSets: Set<string>[], remainingNeeded: number, minStars: number, phaseLabel: string): Promise<{
38
+ candidates: IssueCandidate[];
39
+ allVetFailed: boolean;
40
+ rateLimitHit: boolean;
41
+ }>;
42
+ /**
43
+ * Search for issues within specific repos using batched queries.
44
+ *
45
+ * To avoid GitHub's secondary rate limit (30 requests/minute), we batch
46
+ * multiple repos into a single search query using OR syntax:
47
+ * repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
48
+ *
49
+ * This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE).
50
+ */
51
+ export declare function searchInRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], baseQuery: string, maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
52
+ candidates: IssueCandidate[];
53
+ allBatchesFailed: boolean;
54
+ rateLimitHit: boolean;
55
+ }>;
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Search Phases — utilities and infrastructure for multi-phase issue search.
3
+ *
4
+ * Extracted from issue-discovery.ts (#621) to isolate search helpers,
5
+ * caching, spam-filtering, and batched repo search logic.
6
+ */
7
+ import { SCOPE_LABELS } from './types.js';
8
+ import { errorMessage, isRateLimitError } from './errors.js';
9
+ import { debug, warn } from './logger.js';
10
+ import { getHttpCache, cachedTimeBased } from './http-cache.js';
11
+ import { detectLabelFarmingRepos } from './issue-filtering.js';
12
+ const MODULE = 'search-phases';
13
+ // ── Pure utilities ──
14
+ /** Build a GitHub Search API label filter from a list of labels. */
15
+ export function buildLabelQuery(labels) {
16
+ if (labels.length === 0)
17
+ return '';
18
+ if (labels.length === 1)
19
+ return `label:"${labels[0]}"`;
20
+ return `(${labels.map((l) => `label:"${l}"`).join(' OR ')})`;
21
+ }
22
+ /** Resolve scope tiers into a flat label list, merged with custom labels. */
23
+ export function buildEffectiveLabels(scopes, customLabels) {
24
+ const labels = new Set();
25
+ for (const scope of scopes) {
26
+ for (const label of SCOPE_LABELS[scope] ?? [])
27
+ labels.add(label);
28
+ }
29
+ for (const label of customLabels)
30
+ labels.add(label);
31
+ return [...labels];
32
+ }
33
+ /** Round-robin interleave multiple arrays. */
34
+ export function interleaveArrays(arrays) {
35
+ const result = [];
36
+ const maxLen = Math.max(...arrays.map((a) => a.length), 0);
37
+ for (let i = 0; i < maxLen; i++) {
38
+ for (const arr of arrays) {
39
+ if (i < arr.length)
40
+ result.push(arr[i]);
41
+ }
42
+ }
43
+ return result;
44
+ }
45
+ /** Split repos into batches of the specified size. */
46
+ export function batchRepos(repos, batchSize) {
47
+ const batches = [];
48
+ for (let i = 0; i < repos.length; i += batchSize) {
49
+ batches.push(repos.slice(i, i + batchSize));
50
+ }
51
+ return batches;
52
+ }
53
+ // ── Search caching ──
54
+ /** TTL for cached search API results (15 minutes). */
55
+ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
56
+ /**
57
+ * Wrap octokit.search.issuesAndPullRequests with time-based caching.
58
+ * Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
59
+ * without consuming GitHub API rate limit points.
60
+ */
61
+ export async function cachedSearchIssues(octokit, params) {
62
+ const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
63
+ return cachedTimeBased(getHttpCache(), cacheKey, SEARCH_CACHE_TTL_MS, async () => {
64
+ const { data } = await octokit.search.issuesAndPullRequests(params);
65
+ return data;
66
+ });
67
+ }
68
+ // ── Search infrastructure ──
69
+ /**
70
+ * Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
71
+ * Used by Phases 2 and 3 to convert raw search results into vetted candidates.
72
+ */
73
+ export async function filterVetAndScore(vetter, items, filterIssues, excludedRepoSets, remainingNeeded, minStars, phaseLabel) {
74
+ const spamRepos = detectLabelFarmingRepos(items);
75
+ if (spamRepos.size > 0) {
76
+ const spamCount = items.filter((i) => spamRepos.has(i.repository_url.split('/').slice(-2).join('/'))).length;
77
+ debug(MODULE, `[SPAM_FILTER] Filtered ${spamCount} issues from ${spamRepos.size} label-farming repos: ${[...spamRepos].join(', ')}`);
78
+ }
79
+ const itemsToVet = filterIssues(items)
80
+ .filter((item) => {
81
+ const repoFullName = item.repository_url.split('/').slice(-2).join('/');
82
+ if (spamRepos.has(repoFullName))
83
+ return false;
84
+ return excludedRepoSets.every((s) => !s.has(repoFullName));
85
+ })
86
+ .slice(0, remainingNeeded * 2);
87
+ if (itemsToVet.length === 0) {
88
+ debug(MODULE, `[${phaseLabel}] All ${items.length} items filtered before vetting`);
89
+ return { candidates: [], allVetFailed: false, rateLimitHit: false };
90
+ }
91
+ const { candidates: results, allFailed: allVetFailed, rateLimitHit, } = await vetter.vetIssuesParallel(itemsToVet.map((i) => i.html_url), remainingNeeded, 'normal');
92
+ const starFiltered = results.filter((c) => {
93
+ if (c.projectHealth.checkFailed)
94
+ return true;
95
+ const stars = c.projectHealth.stargazersCount ?? 0;
96
+ return stars >= minStars;
97
+ });
98
+ const starFilteredCount = results.length - starFiltered.length;
99
+ if (starFilteredCount > 0) {
100
+ debug(MODULE, `[STAR_FILTER] Filtered ${starFilteredCount} ${phaseLabel} candidates below ${minStars} stars`);
101
+ }
102
+ return { candidates: starFiltered, allVetFailed, rateLimitHit };
103
+ }
104
+ /**
105
+ * Search for issues within specific repos using batched queries.
106
+ *
107
+ * To avoid GitHub's secondary rate limit (30 requests/minute), we batch
108
+ * multiple repos into a single search query using OR syntax:
109
+ * repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
110
+ *
111
+ * This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE).
112
+ */
113
+ export async function searchInRepos(octokit, vetter, repos, baseQuery, maxResults, priority, filterFn) {
114
+ const candidates = [];
115
+ const BATCH_SIZE = 5;
116
+ const batches = batchRepos(repos, BATCH_SIZE);
117
+ let failedBatches = 0;
118
+ let rateLimitFailures = 0;
119
+ for (const batch of batches) {
120
+ if (candidates.length >= maxResults)
121
+ break;
122
+ try {
123
+ // Build repo filter: (repo:a OR repo:b OR repo:c)
124
+ const repoFilter = batch.map((r) => `repo:${r}`).join(' OR ');
125
+ const batchQuery = `${baseQuery} (${repoFilter})`;
126
+ const data = await cachedSearchIssues(octokit, {
127
+ q: batchQuery,
128
+ sort: 'created',
129
+ order: 'desc',
130
+ per_page: Math.min(30, (maxResults - candidates.length) * 3),
131
+ });
132
+ if (data.items.length > 0) {
133
+ const filtered = filterFn(data.items);
134
+ const remainingNeeded = maxResults - candidates.length;
135
+ const { candidates: vetted } = await vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, priority);
136
+ candidates.push(...vetted);
137
+ }
138
+ }
139
+ catch (error) {
140
+ failedBatches++;
141
+ if (isRateLimitError(error)) {
142
+ rateLimitFailures++;
143
+ }
144
+ const batchReposStr = batch.join(', ');
145
+ warn(MODULE, `Error searching issues in batch [${batchReposStr}]:`, errorMessage(error));
146
+ }
147
+ }
148
+ const allBatchesFailed = failedBatches === batches.length && batches.length > 0;
149
+ const rateLimitHit = rateLimitFailures > 0;
150
+ if (allBatchesFailed) {
151
+ warn(MODULE, `All ${batches.length} batch(es) failed for ${priority} phase. ` +
152
+ `This may indicate a systemic issue (rate limit, auth, network).`);
153
+ }
154
+ return { candidates, allBatchesFailed, rateLimitHit };
155
+ }
@@ -18,6 +18,8 @@ export declare class StateManager {
18
18
  private state;
19
19
  private readonly inMemoryOnly;
20
20
  private lastLoadedMtimeMs;
21
+ private _batching;
22
+ private _batchDirty;
21
23
  /**
22
24
  * Create a new StateManager instance.
23
25
  * @param inMemoryOnly - When true, state is held only in memory and never read from or
@@ -25,6 +27,15 @@ export declare class StateManager {
25
27
  * Defaults to false (normal persistent mode).
26
28
  */
27
29
  constructor(inMemoryOnly?: boolean);
30
+ /**
31
+ * Execute multiple mutations as a single batch, deferring disk I/O until the
32
+ * batch completes. Nested `batch()` calls are flattened — only the outermost saves.
33
+ */
34
+ batch(fn: () => void): void;
35
+ /**
36
+ * Auto-persist after a mutation. Inside a `batch()`, defers to the batch boundary.
37
+ */
38
+ private autoSave;
28
39
  /**
29
40
  * Check if initial setup has been completed.
30
41
  */