@oss-autopilot/core 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ import * as fs from 'fs';
13
13
  import * as path from 'path';
14
14
  import { getOctokit, checkRateLimit } from './github.js';
15
15
  import { getStateManager } from './state.js';
16
+ import { getSearchBudgetTracker } from './search-budget.js';
16
17
  import { daysBetween, getDataDir, sleep } from './utils.js';
17
18
  import { DEFAULT_CONFIG, SCOPE_LABELS } from './types.js';
18
19
  import { ValidationError, errorMessage, getHttpStatusCode, isRateLimitError } from './errors.js';
@@ -159,10 +160,12 @@ export class IssueDiscovery {
159
160
  let rateLimitHitDuringSearch = false;
160
161
  // Pre-flight rate limit check (#100) — also determines adaptive phase budget
161
162
  this.rateLimitWarning = null;
163
+ const tracker = getSearchBudgetTracker();
162
164
  let searchBudget = LOW_BUDGET_THRESHOLD - 1; // conservative: below threshold to skip heavy phases
163
165
  try {
164
166
  const rateLimit = await checkRateLimit(this.githubToken);
165
167
  searchBudget = rateLimit.remaining;
168
+ tracker.init(rateLimit.remaining, rateLimit.resetAt);
166
169
  if (rateLimit.remaining < 5) {
167
170
  const resetTime = new Date(rateLimit.resetAt).toLocaleTimeString('en-US', { hour12: false });
168
171
  this.rateLimitWarning = `GitHub search API quota low (${rateLimit.remaining}/${rateLimit.limit} remaining, resets at ${resetTime}). Search may be slow.`;
@@ -180,7 +183,9 @@ export class IssueDiscovery {
180
183
  if (getHttpStatusCode(error) === 401) {
181
184
  throw error;
182
185
  }
183
- // Non-fatal: proceed with conservative budget for transient/network errors
186
+ // Non-fatal: proceed with conservative budget for transient/network errors.
187
+ // Initialize tracker with conservative defaults so it doesn't fly blind.
188
+ tracker.init(CRITICAL_BUDGET_THRESHOLD, new Date(Date.now() + 60000).toISOString());
184
189
  warn(MODULE, 'Could not check rate limit — using conservative budget, skipping heavy phases:', errorMessage(error));
185
190
  }
186
191
  // Get merged-PR repos (highest merge probability)
@@ -329,7 +334,12 @@ export class IssueDiscovery {
329
334
  info(MODULE, `Phase 1: Searching issues in ${reposToSearch.length} starred repos...`);
330
335
  const remainingNeeded = maxResults - allCandidates.length;
331
336
  if (remainingNeeded > 0) {
332
- const { candidates: starredCandidates, allBatchesFailed, rateLimitHit, } = await searchInRepos(this.octokit, this.vetter, reposToSearch.slice(0, 10), baseQualifiers, labels, remainingNeeded, 'starred', filterIssues);
337
+ // Cap labels to reduce Search API calls: starred repos already signal user
338
+ // interest, so fewer labels suffice. With 3 labels and batch size 3 (2 repo ORs),
339
+ // each batch fits in a single label chunk instead of 3+, cutting Phase 1 calls
340
+ // from ~12 to ~4.
341
+ const phase1Labels = labels.slice(0, 3);
342
+ const { candidates: starredCandidates, allBatchesFailed, rateLimitHit, } = await searchInRepos(this.octokit, this.vetter, reposToSearch.slice(0, 10), baseQualifiers, phase1Labels, remainingNeeded, 'starred', filterIssues);
333
343
  allCandidates.push(...starredCandidates);
334
344
  if (allBatchesFailed) {
335
345
  phase1Error = 'All starred repo batches failed';
@@ -502,6 +512,7 @@ export class IssueDiscovery {
502
512
  });
503
513
  // Apply per-repo cap: max 2 issues from any single repo (#105)
504
514
  const capped = applyPerRepoCap(allCandidates, 2);
515
+ info(MODULE, `Search complete: ${tracker.getTotalCalls()} Search API calls used, ${capped.length} candidates returned`);
505
516
  return capped.slice(0, maxResults);
506
517
  }
507
518
  /**
@@ -14,12 +14,15 @@ export interface CheckResult {
14
14
  }
15
15
  /**
16
16
  * Check whether an open PR already exists for the given issue.
17
- * Searches both the PR search index and the issue timeline for linked PRs.
17
+ * Uses the timeline API (REST) to detect cross-referenced PRs, avoiding
18
+ * the Search API's strict 30 req/min rate limit.
18
19
  */
19
20
  export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo: string, issueNumber: number): Promise<CheckResult>;
20
21
  /**
21
22
  * Check how many merged PRs the authenticated user has in a repo.
22
23
  * Uses GitHub Search API. Returns 0 on error (non-fatal).
24
+ * Results are cached per-repo for 15 minutes to avoid redundant Search API
25
+ * calls when multiple issues from the same repo are vetted.
23
26
  */
24
27
  export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string): Promise<number>;
25
28
  /**
@@ -8,6 +8,8 @@
8
8
  import { paginateAll } from './pagination.js';
9
9
  import { errorMessage } from './errors.js';
10
10
  import { warn } from './logger.js';
11
+ import { getHttpCache } from './http-cache.js';
12
+ import { getSearchBudgetTracker } from './search-budget.js';
11
13
  const MODULE = 'issue-eligibility';
12
14
  /** Phrases that indicate someone has already claimed an issue. */
13
15
  const CLAIM_PHRASES = [
@@ -29,16 +31,16 @@ const CLAIM_PHRASES = [
29
31
  ];
30
32
  /**
31
33
  * Check whether an open PR already exists for the given issue.
32
- * Searches both the PR search index and the issue timeline for linked PRs.
34
+ * Uses the timeline API (REST) to detect cross-referenced PRs, avoiding
35
+ * the Search API's strict 30 req/min rate limit.
33
36
  */
34
37
  export async function checkNoExistingPR(octokit, owner, repo, issueNumber) {
35
38
  try {
36
- // Search for PRs that mention this issue
37
- const { data } = await octokit.search.issuesAndPullRequests({
38
- q: `repo:${owner}/${repo} is:pr ${issueNumber}`,
39
- per_page: 5,
40
- });
41
- // Also check timeline for linked PRs
39
+ // Use the timeline API (REST, not Search) to detect linked PRs.
40
+ // This avoids consuming GitHub Search API quota (30 req/min limit).
41
+ // Timeline captures formally linked PRs via cross-referenced events
42
+ // but may miss PRs that only mention the issue number without a formal
43
+ // link — an acceptable trade-off since most PRs use "Fixes #N" syntax.
42
44
  const timeline = await paginateAll((page) => octokit.issues.listEventsForTimeline({
43
45
  owner,
44
46
  repo,
@@ -50,7 +52,7 @@ export async function checkNoExistingPR(octokit, owner, repo, issueNumber) {
50
52
  const e = event;
51
53
  return e.event === 'cross-referenced' && e.source?.issue?.pull_request;
52
54
  });
53
- return { passed: data.total_count === 0 && linkedPRs.length === 0 };
55
+ return { passed: linkedPRs.length === 0 };
54
56
  }
55
57
  catch (error) {
56
58
  const errMsg = errorMessage(error);
@@ -58,23 +60,46 @@ export async function checkNoExistingPR(octokit, owner, repo, issueNumber) {
58
60
  return { passed: true, inconclusive: true, reason: errMsg };
59
61
  }
60
62
  }
63
+ /** TTL for cached merged-PR counts per repo (15 minutes). */
64
+ const MERGED_PR_CACHE_TTL_MS = 15 * 60 * 1000;
61
65
  /**
62
66
  * Check how many merged PRs the authenticated user has in a repo.
63
67
  * Uses GitHub Search API. Returns 0 on error (non-fatal).
68
+ * Results are cached per-repo for 15 minutes to avoid redundant Search API
69
+ * calls when multiple issues from the same repo are vetted.
64
70
  */
65
71
  export async function checkUserMergedPRsInRepo(octokit, owner, repo) {
72
+ const cache = getHttpCache();
73
+ const cacheKey = `merged-prs:${owner}/${repo}`;
74
+ // Manual cache check — do not use cachedTimeBased because we must NOT cache
75
+ // error-path fallback values (a transient failure returning 0 would poison the
76
+ // cache for 15 minutes, hiding that the user has merged PRs in the repo).
77
+ const cached = cache.getIfFresh(cacheKey, MERGED_PR_CACHE_TTL_MS);
78
+ if (cached != null && typeof cached === 'number') {
79
+ return cached;
80
+ }
66
81
  try {
67
- // Use @me to search as the authenticated user
68
- const { data } = await octokit.search.issuesAndPullRequests({
69
- q: `repo:${owner}/${repo} is:pr is:merged author:@me`,
70
- per_page: 1, // We only need total_count
71
- });
72
- return data.total_count;
82
+ const tracker = getSearchBudgetTracker();
83
+ await tracker.waitForBudget();
84
+ try {
85
+ // Use @me to search as the authenticated user
86
+ const { data } = await octokit.search.issuesAndPullRequests({
87
+ q: `repo:${owner}/${repo} is:pr is:merged author:@me`,
88
+ per_page: 1, // We only need total_count
89
+ });
90
+ // Only cache successful results
91
+ cache.set(cacheKey, '', data.total_count);
92
+ return data.total_count;
93
+ }
94
+ finally {
95
+ // Always record the call — failed requests still consume GitHub rate limit points
96
+ tracker.recordCall();
97
+ }
73
98
  }
74
99
  catch (error) {
75
100
  const errMsg = errorMessage(error);
76
101
  warn(MODULE, `Could not check merged PRs in ${owner}/${repo}: ${errMsg}. Defaulting to 0.`);
77
- return 0;
102
+ return 0; // Not cached — next call will retry
78
103
  }
79
104
  }
80
105
  /**
@@ -52,13 +52,17 @@ export class IssueVetter {
52
52
  repo,
53
53
  issue_number: number,
54
54
  });
55
+ // Check local state first to skip the merged-PR Search API call when
56
+ // the repo already has authoritative data (saves 1 Search call per issue).
57
+ const repoScoreRecord = this.stateManager.getRepoScore(repoFullName);
58
+ const skipMergedPRCheck = repoScoreRecord != null && repoScoreRecord.mergedPRCount > 0;
55
59
  // Run all vetting checks in parallel — delegates to standalone functions
56
60
  const [existingPRCheck, claimCheck, projectHealth, contributionGuidelines, userMergedPRCount] = await Promise.all([
57
61
  checkNoExistingPR(this.octokit, owner, repo, number),
58
62
  checkNotClaimed(this.octokit, owner, repo, number, ghIssue.comments),
59
63
  checkProjectHealth(this.octokit, owner, repo),
60
64
  fetchContributionGuidelines(this.octokit, owner, repo),
61
- checkUserMergedPRsInRepo(this.octokit, owner, repo),
65
+ skipMergedPRCheck ? Promise.resolve(0) : checkUserMergedPRsInRepo(this.octokit, owner, repo),
62
66
  ]);
63
67
  const noExistingPR = existingPRCheck.passed;
64
68
  const notClaimed = claimCheck.passed;
@@ -138,7 +142,6 @@ export class IssueVetter {
138
142
  // Determine effective merged PR count: prefer local state (authoritative if present),
139
143
  // fall back to live GitHub API count to detect contributions made before using oss-autopilot (#373)
140
144
  const config = this.stateManager.getState().config;
141
- const repoScoreRecord = this.stateManager.getRepoScore(repoFullName);
142
145
  const effectiveMergedCount = repoScoreRecord && repoScoreRecord.mergedPRCount > 0 ? repoScoreRecord.mergedPRCount : userMergedPRCount;
143
146
  if (effectiveMergedCount > 0) {
144
147
  reasonsToApprove.push(`Trusted project (${effectiveMergedCount} PR${effectiveMergedCount > 1 ? 's' : ''} merged)`);
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Search Budget Tracker — centralized rate limit management for GitHub Search API.
3
+ *
4
+ * The GitHub Search API enforces a strict 30 requests/minute limit for
5
+ * authenticated users. This module tracks actual consumption via a sliding
6
+ * window and provides adaptive delays to stay within budget.
7
+ *
8
+ * Usage:
9
+ * - Initialize once per search run with pre-flight rate limit data
10
+ * - Call recordCall() after every Search API call
11
+ * - Call waitForBudget() before making a Search API call to pace requests
12
+ * - Call canAfford(n) to check if n more calls fit in the remaining budget
13
+ */
14
+ export declare class SearchBudgetTracker {
15
+ /** Timestamps of recent Search API calls within the sliding window. */
16
+ private callTimestamps;
17
+ /** Last known remaining quota from GitHub's rate limit endpoint. */
18
+ private knownRemaining;
19
+ /** Epoch ms when the rate limit window resets (from GitHub API). */
20
+ private resetAt;
21
+ /** Total calls recorded since init (for diagnostics). */
22
+ private totalCalls;
23
+ /**
24
+ * Initialize with pre-flight rate limit data from GitHub.
25
+ */
26
+ init(remaining: number, resetAt: string): void;
27
+ /**
28
+ * Record that a Search API call was just made.
29
+ */
30
+ recordCall(): void;
31
+ /**
32
+ * Remove timestamps older than the sliding window.
33
+ */
34
+ private pruneOldTimestamps;
35
+ /**
36
+ * Get the number of calls made in the current sliding window.
37
+ */
38
+ getCallsInWindow(): number;
39
+ /**
40
+ * Get the effective budget, accounting for both the sliding window limit
41
+ * and the pre-flight remaining quota from GitHub.
42
+ */
43
+ private getEffectiveBudget;
44
+ /**
45
+ * Check if we can afford N more Search API calls without exceeding the budget.
46
+ */
47
+ canAfford(n: number): boolean;
48
+ /**
49
+ * Wait if necessary to stay within the Search API rate limit.
50
+ * If the sliding window is at capacity, sleeps until the oldest
51
+ * call ages out of the window.
52
+ */
53
+ waitForBudget(): Promise<void>;
54
+ /**
55
+ * Get total calls recorded since init (for diagnostics).
56
+ */
57
+ getTotalCalls(): number;
58
+ }
59
+ /**
60
+ * Get (or create) the shared SearchBudgetTracker singleton.
61
+ */
62
+ export declare function getSearchBudgetTracker(): SearchBudgetTracker;
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Search Budget Tracker — centralized rate limit management for GitHub Search API.
3
+ *
4
+ * The GitHub Search API enforces a strict 30 requests/minute limit for
5
+ * authenticated users. This module tracks actual consumption via a sliding
6
+ * window and provides adaptive delays to stay within budget.
7
+ *
8
+ * Usage:
9
+ * - Initialize once per search run with pre-flight rate limit data
10
+ * - Call recordCall() after every Search API call
11
+ * - Call waitForBudget() before making a Search API call to pace requests
12
+ * - Call canAfford(n) to check if n more calls fit in the remaining budget
13
+ */
14
+ import { debug } from './logger.js';
15
+ import { sleep } from './utils.js';
16
+ const MODULE = 'search-budget';
17
+ /** GitHub Search API rate limit: 30 requests per 60-second rolling window. */
18
+ const SEARCH_RATE_LIMIT = 30;
19
+ const SEARCH_WINDOW_MS = 60 * 1000;
20
+ /** Safety margin: reserve a few calls for retries and cross-process usage. */
21
+ const SAFETY_MARGIN = 4;
22
+ /** Effective budget per window after safety margin. */
23
+ const EFFECTIVE_BUDGET = SEARCH_RATE_LIMIT - SAFETY_MARGIN;
24
+ export class SearchBudgetTracker {
25
+ /** Timestamps of recent Search API calls within the sliding window. */
26
+ callTimestamps = [];
27
+ /** Last known remaining quota from GitHub's rate limit endpoint. */
28
+ knownRemaining = SEARCH_RATE_LIMIT;
29
+ /** Epoch ms when the rate limit window resets (from GitHub API). */
30
+ resetAt = 0;
31
+ /** Total calls recorded since init (for diagnostics). */
32
+ totalCalls = 0;
33
+ /**
34
+ * Initialize with pre-flight rate limit data from GitHub.
35
+ */
36
+ init(remaining, resetAt) {
37
+ this.knownRemaining = remaining;
38
+ this.resetAt = new Date(resetAt).getTime();
39
+ this.callTimestamps = [];
40
+ this.totalCalls = 0;
41
+ debug(MODULE, `Initialized: ${remaining} remaining, resets at ${new Date(this.resetAt).toLocaleTimeString()}`);
42
+ }
43
+ /**
44
+ * Record that a Search API call was just made.
45
+ */
46
+ recordCall() {
47
+ this.callTimestamps.push(Date.now());
48
+ this.totalCalls++;
49
+ this.pruneOldTimestamps();
50
+ }
51
+ /**
52
+ * Remove timestamps older than the sliding window.
53
+ */
54
+ pruneOldTimestamps() {
55
+ const cutoff = Date.now() - SEARCH_WINDOW_MS;
56
+ while (this.callTimestamps.length > 0 && this.callTimestamps[0] < cutoff) {
57
+ this.callTimestamps.shift();
58
+ }
59
+ }
60
+ /**
61
+ * Get the number of calls made in the current sliding window.
62
+ */
63
+ getCallsInWindow() {
64
+ this.pruneOldTimestamps();
65
+ return this.callTimestamps.length;
66
+ }
67
+ /**
68
+ * Get the effective budget, accounting for both the sliding window limit
69
+ * and the pre-flight remaining quota from GitHub.
70
+ */
71
+ getEffectiveBudget() {
72
+ // Use the stricter of: local window limit vs. pre-flight remaining minus calls made
73
+ const localBudget = EFFECTIVE_BUDGET - this.callTimestamps.length;
74
+ const externalBudget = this.knownRemaining - this.totalCalls;
75
+ return Math.max(0, Math.min(localBudget, externalBudget));
76
+ }
77
+ /**
78
+ * Check if we can afford N more Search API calls without exceeding the budget.
79
+ */
80
+ canAfford(n) {
81
+ this.pruneOldTimestamps();
82
+ return this.getEffectiveBudget() >= n;
83
+ }
84
+ /**
85
+ * Wait if necessary to stay within the Search API rate limit.
86
+ * If the sliding window is at capacity, sleeps until the oldest
87
+ * call ages out of the window.
88
+ */
89
+ async waitForBudget() {
90
+ // Loop to handle edge cases where a single sleep isn't enough
91
+ // (e.g., concurrent callers, clock skew, or external budget depletion)
92
+ while (true) {
93
+ this.pruneOldTimestamps();
94
+ if (this.getEffectiveBudget() > 0) {
95
+ return; // Budget available, no wait needed
96
+ }
97
+ // Wait until the oldest call in the window ages out
98
+ const oldestInWindow = this.callTimestamps[0];
99
+ if (!oldestInWindow) {
100
+ return; // No calls in window — budget exhausted by external consumption, can't wait it out
101
+ }
102
+ const waitUntil = oldestInWindow + SEARCH_WINDOW_MS;
103
+ const waitMs = waitUntil - Date.now();
104
+ if (waitMs > 0) {
105
+ debug(MODULE, `Budget full (${this.callTimestamps.length}/${EFFECTIVE_BUDGET} in window), waiting ${waitMs}ms`);
106
+ await sleep(waitMs + 100); // +100ms safety buffer
107
+ }
108
+ }
109
+ }
110
+ /**
111
+ * Get total calls recorded since init (for diagnostics).
112
+ */
113
+ getTotalCalls() {
114
+ return this.totalCalls;
115
+ }
116
+ }
117
+ // ---------------------------------------------------------------------------
118
+ // Singleton
119
+ // ---------------------------------------------------------------------------
120
+ let _tracker = null;
121
+ /**
122
+ * Get (or create) the shared SearchBudgetTracker singleton.
123
+ */
124
+ export function getSearchBudgetTracker() {
125
+ if (!_tracker) {
126
+ _tracker = new SearchBudgetTracker();
127
+ }
128
+ return _tracker;
129
+ }
@@ -10,11 +10,14 @@ import { debug, warn } from './logger.js';
10
10
  import { getHttpCache, cachedTimeBased } from './http-cache.js';
11
11
  import { detectLabelFarmingRepos } from './issue-filtering.js';
12
12
  import { sleep } from './utils.js';
13
+ import { getSearchBudgetTracker } from './search-budget.js';
13
14
  const MODULE = 'search-phases';
14
15
  /** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
15
16
  export const GITHUB_MAX_BOOLEAN_OPS = 5;
16
- /** Delay between search API calls to avoid GitHub's secondary rate limit (~30 req/min). */
17
- const INTER_QUERY_DELAY_MS = 1500;
17
+ /** Delay between search API calls to avoid GitHub's secondary rate limit (~30 req/min).
18
+ * Set to 2000ms as a safety floor (max 30/min at the limit). The SearchBudgetTracker
19
+ * adds additional adaptive delays when needed. */
20
+ const INTER_QUERY_DELAY_MS = 2000;
18
21
  /** Batch size for repo queries. 3 repos = 2 OR operators, leaving room for labels. */
19
22
  const BATCH_SIZE = 3;
20
23
  /**
@@ -93,8 +96,16 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
93
96
  export async function cachedSearchIssues(octokit, params) {
94
97
  const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
95
98
  return cachedTimeBased(getHttpCache(), cacheKey, SEARCH_CACHE_TTL_MS, async () => {
96
- const { data } = await octokit.search.issuesAndPullRequests(params);
97
- return data;
99
+ const tracker = getSearchBudgetTracker();
100
+ await tracker.waitForBudget();
101
+ try {
102
+ const { data } = await octokit.search.issuesAndPullRequests(params);
103
+ return data;
104
+ }
105
+ finally {
106
+ // Always record the call — failed requests still consume GitHub rate limit points
107
+ tracker.recordCall();
108
+ }
98
109
  });
99
110
  }
100
111
  // ── Search infrastructure ──
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oss-autopilot/core",
3
- "version": "1.7.0",
3
+ "version": "1.8.0",
4
4
  "description": "CLI and core library for managing open source contributions",
5
5
  "type": "module",
6
6
  "bin": {