@oss-autopilot/core 1.11.0 → 1.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -5
- package/dist/cli.bundle.cjs +67 -108
- package/dist/commands/daily.js +17 -0
- package/dist/commands/index.d.ts +3 -1
- package/dist/commands/index.js +2 -0
- package/dist/commands/scout-bridge.d.ts +15 -0
- package/dist/commands/scout-bridge.js +63 -0
- package/dist/commands/search.d.ts +1 -1
- package/dist/commands/search.js +10 -13
- package/dist/commands/vet-list.d.ts +1 -1
- package/dist/commands/vet-list.js +4 -5
- package/dist/commands/vet.d.ts +1 -1
- package/dist/commands/vet.js +4 -5
- package/dist/core/index.d.ts +0 -2
- package/dist/core/index.js +1 -2
- package/package.json +2 -1
- package/dist/core/category-mapping.d.ts +0 -19
- package/dist/core/category-mapping.js +0 -58
- package/dist/core/issue-discovery.d.ts +0 -94
- package/dist/core/issue-discovery.js +0 -591
- package/dist/core/issue-eligibility.d.ts +0 -38
- package/dist/core/issue-eligibility.js +0 -151
- package/dist/core/issue-filtering.d.ts +0 -51
- package/dist/core/issue-filtering.js +0 -103
- package/dist/core/issue-scoring.d.ts +0 -43
- package/dist/core/issue-scoring.js +0 -97
- package/dist/core/issue-vetting.d.ts +0 -33
- package/dist/core/issue-vetting.js +0 -306
- package/dist/core/repo-health.d.ts +0 -24
- package/dist/core/repo-health.js +0 -194
- package/dist/core/search-budget.d.ts +0 -62
- package/dist/core/search-budget.js +0 -129
- package/dist/core/search-phases.d.ts +0 -83
- package/dist/core/search-phases.js +0 -238
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Search Budget Tracker — centralized rate limit management for GitHub Search API.
|
|
3
|
-
*
|
|
4
|
-
* The GitHub Search API enforces a strict 30 requests/minute limit for
|
|
5
|
-
* authenticated users. This module tracks actual consumption via a sliding
|
|
6
|
-
* window and provides adaptive delays to stay within budget.
|
|
7
|
-
*
|
|
8
|
-
* Usage:
|
|
9
|
-
* - Initialize once per search run with pre-flight rate limit data
|
|
10
|
-
* - Call recordCall() after every Search API call
|
|
11
|
-
* - Call waitForBudget() before making a Search API call to pace requests
|
|
12
|
-
* - Call canAfford(n) to check if n more calls fit in the remaining budget
|
|
13
|
-
*/
|
|
14
|
-
import { debug } from './logger.js';
|
|
15
|
-
import { sleep } from './utils.js';
|
|
16
|
-
const MODULE = 'search-budget';
|
|
17
|
-
/** GitHub Search API rate limit: 30 requests per 60-second rolling window. */
|
|
18
|
-
const SEARCH_RATE_LIMIT = 30;
|
|
19
|
-
const SEARCH_WINDOW_MS = 60 * 1000;
|
|
20
|
-
/** Safety margin: reserve a few calls for retries and cross-process usage. */
|
|
21
|
-
const SAFETY_MARGIN = 4;
|
|
22
|
-
/** Effective budget per window after safety margin. */
|
|
23
|
-
const EFFECTIVE_BUDGET = SEARCH_RATE_LIMIT - SAFETY_MARGIN;
|
|
24
|
-
export class SearchBudgetTracker {
|
|
25
|
-
/** Timestamps of recent Search API calls within the sliding window. */
|
|
26
|
-
callTimestamps = [];
|
|
27
|
-
/** Last known remaining quota from GitHub's rate limit endpoint. */
|
|
28
|
-
knownRemaining = SEARCH_RATE_LIMIT;
|
|
29
|
-
/** Epoch ms when the rate limit window resets (from GitHub API). */
|
|
30
|
-
resetAt = 0;
|
|
31
|
-
/** Total calls recorded since init (for diagnostics). */
|
|
32
|
-
totalCalls = 0;
|
|
33
|
-
/**
|
|
34
|
-
* Initialize with pre-flight rate limit data from GitHub.
|
|
35
|
-
*/
|
|
36
|
-
init(remaining, resetAt) {
|
|
37
|
-
this.knownRemaining = remaining;
|
|
38
|
-
this.resetAt = new Date(resetAt).getTime();
|
|
39
|
-
this.callTimestamps = [];
|
|
40
|
-
this.totalCalls = 0;
|
|
41
|
-
debug(MODULE, `Initialized: ${remaining} remaining, resets at ${new Date(this.resetAt).toLocaleTimeString()}`);
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* Record that a Search API call was just made.
|
|
45
|
-
*/
|
|
46
|
-
recordCall() {
|
|
47
|
-
this.callTimestamps.push(Date.now());
|
|
48
|
-
this.totalCalls++;
|
|
49
|
-
this.pruneOldTimestamps();
|
|
50
|
-
}
|
|
51
|
-
/**
|
|
52
|
-
* Remove timestamps older than the sliding window.
|
|
53
|
-
*/
|
|
54
|
-
pruneOldTimestamps() {
|
|
55
|
-
const cutoff = Date.now() - SEARCH_WINDOW_MS;
|
|
56
|
-
while (this.callTimestamps.length > 0 && this.callTimestamps[0] < cutoff) {
|
|
57
|
-
this.callTimestamps.shift();
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
/**
|
|
61
|
-
* Get the number of calls made in the current sliding window.
|
|
62
|
-
*/
|
|
63
|
-
getCallsInWindow() {
|
|
64
|
-
this.pruneOldTimestamps();
|
|
65
|
-
return this.callTimestamps.length;
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Get the effective budget, accounting for both the sliding window limit
|
|
69
|
-
* and the pre-flight remaining quota from GitHub.
|
|
70
|
-
*/
|
|
71
|
-
getEffectiveBudget() {
|
|
72
|
-
// Use the stricter of: local window limit vs. pre-flight remaining minus calls made
|
|
73
|
-
const localBudget = EFFECTIVE_BUDGET - this.callTimestamps.length;
|
|
74
|
-
const externalBudget = this.knownRemaining - this.totalCalls;
|
|
75
|
-
return Math.max(0, Math.min(localBudget, externalBudget));
|
|
76
|
-
}
|
|
77
|
-
/**
|
|
78
|
-
* Check if we can afford N more Search API calls without exceeding the budget.
|
|
79
|
-
*/
|
|
80
|
-
canAfford(n) {
|
|
81
|
-
this.pruneOldTimestamps();
|
|
82
|
-
return this.getEffectiveBudget() >= n;
|
|
83
|
-
}
|
|
84
|
-
/**
|
|
85
|
-
* Wait if necessary to stay within the Search API rate limit.
|
|
86
|
-
* If the sliding window is at capacity, sleeps until the oldest
|
|
87
|
-
* call ages out of the window.
|
|
88
|
-
*/
|
|
89
|
-
async waitForBudget() {
|
|
90
|
-
// Loop to handle edge cases where a single sleep isn't enough
|
|
91
|
-
// (e.g., concurrent callers, clock skew, or external budget depletion)
|
|
92
|
-
while (true) {
|
|
93
|
-
this.pruneOldTimestamps();
|
|
94
|
-
if (this.getEffectiveBudget() > 0) {
|
|
95
|
-
return; // Budget available, no wait needed
|
|
96
|
-
}
|
|
97
|
-
// Wait until the oldest call in the window ages out
|
|
98
|
-
const oldestInWindow = this.callTimestamps[0];
|
|
99
|
-
if (!oldestInWindow) {
|
|
100
|
-
return; // No calls in window — budget exhausted by external consumption, can't wait it out
|
|
101
|
-
}
|
|
102
|
-
const waitUntil = oldestInWindow + SEARCH_WINDOW_MS;
|
|
103
|
-
const waitMs = waitUntil - Date.now();
|
|
104
|
-
if (waitMs > 0) {
|
|
105
|
-
debug(MODULE, `Budget full (${this.callTimestamps.length}/${EFFECTIVE_BUDGET} in window), waiting ${waitMs}ms`);
|
|
106
|
-
await sleep(waitMs + 100); // +100ms safety buffer
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
/**
|
|
111
|
-
* Get total calls recorded since init (for diagnostics).
|
|
112
|
-
*/
|
|
113
|
-
getTotalCalls() {
|
|
114
|
-
return this.totalCalls;
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
// ---------------------------------------------------------------------------
|
|
118
|
-
// Singleton
|
|
119
|
-
// ---------------------------------------------------------------------------
|
|
120
|
-
let _tracker = null;
|
|
121
|
-
/**
|
|
122
|
-
* Get (or create) the shared SearchBudgetTracker singleton.
|
|
123
|
-
*/
|
|
124
|
-
export function getSearchBudgetTracker() {
|
|
125
|
-
if (!_tracker) {
|
|
126
|
-
_tracker = new SearchBudgetTracker();
|
|
127
|
-
}
|
|
128
|
-
return _tracker;
|
|
129
|
-
}
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Search Phases — utilities and infrastructure for multi-phase issue search.
|
|
3
|
-
*
|
|
4
|
-
* Extracted from issue-discovery.ts (#621) to isolate search helpers,
|
|
5
|
-
* caching, spam-filtering, and batched repo search logic.
|
|
6
|
-
*/
|
|
7
|
-
import { Octokit } from '@octokit/rest';
|
|
8
|
-
import { type SearchPriority, type IssueCandidate, type IssueScope } from './types.js';
|
|
9
|
-
import { type GitHubSearchItem } from './issue-filtering.js';
|
|
10
|
-
import { IssueVetter } from './issue-vetting.js';
|
|
11
|
-
/** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
|
|
12
|
-
export declare const GITHUB_MAX_BOOLEAN_OPS = 5;
|
|
13
|
-
/**
|
|
14
|
-
* Chunk labels into groups that fit within the operator budget.
|
|
15
|
-
* N labels require N-1 OR operators, so maxPerChunk = budget + 1.
|
|
16
|
-
*
|
|
17
|
-
* @param labels Full label list
|
|
18
|
-
* @param reservedOps OR operators already consumed by repo/org filters
|
|
19
|
-
*/
|
|
20
|
-
export declare function chunkLabels(labels: string[], reservedOps?: number): string[][];
|
|
21
|
-
/** Build a GitHub Search API label filter from a list of labels. */
|
|
22
|
-
export declare function buildLabelQuery(labels: string[]): string;
|
|
23
|
-
/** Resolve scope tiers into a flat label list, merged with custom labels. */
|
|
24
|
-
export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
|
|
25
|
-
/** Round-robin interleave multiple arrays. */
|
|
26
|
-
export declare function interleaveArrays<T>(arrays: T[][]): T[];
|
|
27
|
-
/** Split repos into batches of the specified size. */
|
|
28
|
-
export declare function batchRepos(repos: string[], batchSize: number): string[][];
|
|
29
|
-
/**
|
|
30
|
-
* Wrap octokit.search.issuesAndPullRequests with time-based caching.
|
|
31
|
-
* Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
|
|
32
|
-
* without consuming GitHub API rate limit points.
|
|
33
|
-
*/
|
|
34
|
-
export declare function cachedSearchIssues(octokit: Octokit, params: {
|
|
35
|
-
q: string;
|
|
36
|
-
sort: 'created' | 'updated' | 'comments' | 'reactions' | 'interactions';
|
|
37
|
-
order: 'asc' | 'desc';
|
|
38
|
-
per_page: number;
|
|
39
|
-
}): Promise<{
|
|
40
|
-
total_count: number;
|
|
41
|
-
items: GitHubSearchItem[];
|
|
42
|
-
}>;
|
|
43
|
-
/**
|
|
44
|
-
* Search across chunked labels with deduplication.
|
|
45
|
-
*
|
|
46
|
-
* Splits labels into chunks that fit within GitHub's boolean operator budget,
|
|
47
|
-
* issues one search query per chunk, deduplicates results by URL, and returns
|
|
48
|
-
* the merged item list.
|
|
49
|
-
*
|
|
50
|
-
* @param octokit Authenticated Octokit instance
|
|
51
|
-
* @param labels Full label list to chunk
|
|
52
|
-
* @param reservedOps OR operators already consumed by repo/org filters in the query
|
|
53
|
-
* @param buildQuery Callback that receives a label query string and returns the full search query
|
|
54
|
-
* @param perPage Number of results per API call
|
|
55
|
-
*/
|
|
56
|
-
export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
|
|
57
|
-
/**
|
|
58
|
-
* Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
|
|
59
|
-
* Used by Phases 2 and 3 to convert raw search results into vetted candidates.
|
|
60
|
-
*/
|
|
61
|
-
export declare function filterVetAndScore(vetter: IssueVetter, items: GitHubSearchItem[], filterIssues: (items: GitHubSearchItem[]) => GitHubSearchItem[], excludedRepoSets: Set<string>[], remainingNeeded: number, minStars: number, phaseLabel: string): Promise<{
|
|
62
|
-
candidates: IssueCandidate[];
|
|
63
|
-
allVetFailed: boolean;
|
|
64
|
-
rateLimitHit: boolean;
|
|
65
|
-
}>;
|
|
66
|
-
/**
|
|
67
|
-
* Search for issues within specific repos using batched queries.
|
|
68
|
-
*
|
|
69
|
-
* To avoid GitHub's secondary rate limit (30 requests/minute), we batch
|
|
70
|
-
* multiple repos into a single search query using OR syntax:
|
|
71
|
-
* repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
|
|
72
|
-
*
|
|
73
|
-
* Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
|
|
74
|
-
* Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
|
|
75
|
-
* budget is used for label OR operators.
|
|
76
|
-
*
|
|
77
|
-
* This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
|
|
78
|
-
*/
|
|
79
|
-
export declare function searchInRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], baseQualifiers: string, labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
|
|
80
|
-
candidates: IssueCandidate[];
|
|
81
|
-
allBatchesFailed: boolean;
|
|
82
|
-
rateLimitHit: boolean;
|
|
83
|
-
}>;
|
|
@@ -1,238 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Search Phases — utilities and infrastructure for multi-phase issue search.
|
|
3
|
-
*
|
|
4
|
-
* Extracted from issue-discovery.ts (#621) to isolate search helpers,
|
|
5
|
-
* caching, spam-filtering, and batched repo search logic.
|
|
6
|
-
*/
|
|
7
|
-
import { SCOPE_LABELS } from './types.js';
|
|
8
|
-
import { errorMessage, isRateLimitError } from './errors.js';
|
|
9
|
-
import { debug, warn } from './logger.js';
|
|
10
|
-
import { getHttpCache, cachedTimeBased } from './http-cache.js';
|
|
11
|
-
import { detectLabelFarmingRepos } from './issue-filtering.js';
|
|
12
|
-
import { sleep } from './utils.js';
|
|
13
|
-
import { getSearchBudgetTracker } from './search-budget.js';
|
|
14
|
-
const MODULE = 'search-phases';
|
|
15
|
-
/** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
|
|
16
|
-
export const GITHUB_MAX_BOOLEAN_OPS = 5;
|
|
17
|
-
/** Delay between search API calls to avoid GitHub's secondary rate limit (~30 req/min).
|
|
18
|
-
* Set to 2000ms as a safety floor (max 30/min at the limit). The SearchBudgetTracker
|
|
19
|
-
* adds additional adaptive delays when needed. */
|
|
20
|
-
const INTER_QUERY_DELAY_MS = 2000;
|
|
21
|
-
/** Batch size for repo queries. 3 repos = 2 OR operators, leaving room for labels. */
|
|
22
|
-
const BATCH_SIZE = 3;
|
|
23
|
-
/**
|
|
24
|
-
* Chunk labels into groups that fit within the operator budget.
|
|
25
|
-
* N labels require N-1 OR operators, so maxPerChunk = budget + 1.
|
|
26
|
-
*
|
|
27
|
-
* @param labels Full label list
|
|
28
|
-
* @param reservedOps OR operators already consumed by repo/org filters
|
|
29
|
-
*/
|
|
30
|
-
export function chunkLabels(labels, reservedOps = 0) {
|
|
31
|
-
const maxPerChunk = GITHUB_MAX_BOOLEAN_OPS - reservedOps + 1;
|
|
32
|
-
if (maxPerChunk < 1) {
|
|
33
|
-
if (labels.length > 0) {
|
|
34
|
-
warn(MODULE, `Label filtering disabled: ${reservedOps} repo/org ORs exceed GitHub's ${GITHUB_MAX_BOOLEAN_OPS} operator limit. ` +
|
|
35
|
-
`All ${labels.length} label(s) dropped from query.`);
|
|
36
|
-
}
|
|
37
|
-
return [[]];
|
|
38
|
-
}
|
|
39
|
-
if (labels.length <= maxPerChunk)
|
|
40
|
-
return [labels];
|
|
41
|
-
const chunks = [];
|
|
42
|
-
for (let i = 0; i < labels.length; i += maxPerChunk) {
|
|
43
|
-
chunks.push(labels.slice(i, i + maxPerChunk));
|
|
44
|
-
}
|
|
45
|
-
debug(MODULE, `Split ${labels.length} labels into ${chunks.length} chunks (${reservedOps} ops reserved, max ${maxPerChunk} per chunk)`);
|
|
46
|
-
return chunks;
|
|
47
|
-
}
|
|
48
|
-
// ── Pure utilities ──
|
|
49
|
-
/** Build a GitHub Search API label filter from a list of labels. */
|
|
50
|
-
export function buildLabelQuery(labels) {
|
|
51
|
-
if (labels.length === 0)
|
|
52
|
-
return '';
|
|
53
|
-
if (labels.length === 1)
|
|
54
|
-
return `label:"${labels[0]}"`;
|
|
55
|
-
return `(${labels.map((l) => `label:"${l}"`).join(' OR ')})`;
|
|
56
|
-
}
|
|
57
|
-
/** Resolve scope tiers into a flat label list, merged with custom labels. */
|
|
58
|
-
export function buildEffectiveLabels(scopes, customLabels) {
|
|
59
|
-
const labels = new Set();
|
|
60
|
-
for (const scope of scopes) {
|
|
61
|
-
for (const label of SCOPE_LABELS[scope] ?? [])
|
|
62
|
-
labels.add(label);
|
|
63
|
-
}
|
|
64
|
-
for (const label of customLabels)
|
|
65
|
-
labels.add(label);
|
|
66
|
-
return [...labels];
|
|
67
|
-
}
|
|
68
|
-
/** Round-robin interleave multiple arrays. */
|
|
69
|
-
export function interleaveArrays(arrays) {
|
|
70
|
-
const result = [];
|
|
71
|
-
const maxLen = Math.max(...arrays.map((a) => a.length), 0);
|
|
72
|
-
for (let i = 0; i < maxLen; i++) {
|
|
73
|
-
for (const arr of arrays) {
|
|
74
|
-
if (i < arr.length)
|
|
75
|
-
result.push(arr[i]);
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
return result;
|
|
79
|
-
}
|
|
80
|
-
/** Split repos into batches of the specified size. */
|
|
81
|
-
export function batchRepos(repos, batchSize) {
|
|
82
|
-
const batches = [];
|
|
83
|
-
for (let i = 0; i < repos.length; i += batchSize) {
|
|
84
|
-
batches.push(repos.slice(i, i + batchSize));
|
|
85
|
-
}
|
|
86
|
-
return batches;
|
|
87
|
-
}
|
|
88
|
-
// ── Search caching ──
|
|
89
|
-
/** TTL for cached search API results (15 minutes). */
|
|
90
|
-
const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
|
|
91
|
-
/**
|
|
92
|
-
* Wrap octokit.search.issuesAndPullRequests with time-based caching.
|
|
93
|
-
* Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
|
|
94
|
-
* without consuming GitHub API rate limit points.
|
|
95
|
-
*/
|
|
96
|
-
export async function cachedSearchIssues(octokit, params) {
|
|
97
|
-
const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
|
|
98
|
-
return cachedTimeBased(getHttpCache(), cacheKey, SEARCH_CACHE_TTL_MS, async () => {
|
|
99
|
-
const tracker = getSearchBudgetTracker();
|
|
100
|
-
await tracker.waitForBudget();
|
|
101
|
-
try {
|
|
102
|
-
const { data } = await octokit.search.issuesAndPullRequests(params);
|
|
103
|
-
return data;
|
|
104
|
-
}
|
|
105
|
-
finally {
|
|
106
|
-
// Always record the call — failed requests still consume GitHub rate limit points
|
|
107
|
-
tracker.recordCall();
|
|
108
|
-
}
|
|
109
|
-
});
|
|
110
|
-
}
|
|
111
|
-
// ── Search infrastructure ──
|
|
112
|
-
/**
|
|
113
|
-
* Search across chunked labels with deduplication.
|
|
114
|
-
*
|
|
115
|
-
* Splits labels into chunks that fit within GitHub's boolean operator budget,
|
|
116
|
-
* issues one search query per chunk, deduplicates results by URL, and returns
|
|
117
|
-
* the merged item list.
|
|
118
|
-
*
|
|
119
|
-
* @param octokit Authenticated Octokit instance
|
|
120
|
-
* @param labels Full label list to chunk
|
|
121
|
-
* @param reservedOps OR operators already consumed by repo/org filters in the query
|
|
122
|
-
* @param buildQuery Callback that receives a label query string and returns the full search query
|
|
123
|
-
* @param perPage Number of results per API call
|
|
124
|
-
*/
|
|
125
|
-
export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage) {
|
|
126
|
-
const labelChunks = chunkLabels(labels, reservedOps);
|
|
127
|
-
const seenUrls = new Set();
|
|
128
|
-
const allItems = [];
|
|
129
|
-
for (let i = 0; i < labelChunks.length; i++) {
|
|
130
|
-
if (i > 0)
|
|
131
|
-
await sleep(INTER_QUERY_DELAY_MS);
|
|
132
|
-
const query = buildQuery(buildLabelQuery(labelChunks[i]));
|
|
133
|
-
const data = await cachedSearchIssues(octokit, {
|
|
134
|
-
q: query,
|
|
135
|
-
sort: 'created',
|
|
136
|
-
order: 'desc',
|
|
137
|
-
per_page: perPage,
|
|
138
|
-
});
|
|
139
|
-
for (const item of data.items) {
|
|
140
|
-
if (!seenUrls.has(item.html_url)) {
|
|
141
|
-
seenUrls.add(item.html_url);
|
|
142
|
-
allItems.push(item);
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
return allItems;
|
|
147
|
-
}
|
|
148
|
-
/**
|
|
149
|
-
* Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
|
|
150
|
-
* Used by Phases 2 and 3 to convert raw search results into vetted candidates.
|
|
151
|
-
*/
|
|
152
|
-
export async function filterVetAndScore(vetter, items, filterIssues, excludedRepoSets, remainingNeeded, minStars, phaseLabel) {
|
|
153
|
-
const spamRepos = detectLabelFarmingRepos(items);
|
|
154
|
-
if (spamRepos.size > 0) {
|
|
155
|
-
const spamCount = items.filter((i) => spamRepos.has(i.repository_url.split('/').slice(-2).join('/'))).length;
|
|
156
|
-
debug(MODULE, `[SPAM_FILTER] Filtered ${spamCount} issues from ${spamRepos.size} label-farming repos: ${[...spamRepos].join(', ')}`);
|
|
157
|
-
}
|
|
158
|
-
const itemsToVet = filterIssues(items)
|
|
159
|
-
.filter((item) => {
|
|
160
|
-
const repoFullName = item.repository_url.split('/').slice(-2).join('/');
|
|
161
|
-
if (spamRepos.has(repoFullName))
|
|
162
|
-
return false;
|
|
163
|
-
return excludedRepoSets.every((s) => !s.has(repoFullName));
|
|
164
|
-
})
|
|
165
|
-
.slice(0, remainingNeeded * 2);
|
|
166
|
-
if (itemsToVet.length === 0) {
|
|
167
|
-
debug(MODULE, `[${phaseLabel}] All ${items.length} items filtered before vetting`);
|
|
168
|
-
return { candidates: [], allVetFailed: false, rateLimitHit: false };
|
|
169
|
-
}
|
|
170
|
-
const { candidates: results, allFailed: allVetFailed, rateLimitHit, } = await vetter.vetIssuesParallel(itemsToVet.map((i) => i.html_url), remainingNeeded, 'normal');
|
|
171
|
-
const starFiltered = results.filter((c) => {
|
|
172
|
-
if (c.projectHealth.checkFailed)
|
|
173
|
-
return true;
|
|
174
|
-
const stars = c.projectHealth.stargazersCount ?? 0;
|
|
175
|
-
return stars >= minStars;
|
|
176
|
-
});
|
|
177
|
-
const starFilteredCount = results.length - starFiltered.length;
|
|
178
|
-
if (starFilteredCount > 0) {
|
|
179
|
-
debug(MODULE, `[STAR_FILTER] Filtered ${starFilteredCount} ${phaseLabel} candidates below ${minStars} stars`);
|
|
180
|
-
}
|
|
181
|
-
return { candidates: starFiltered, allVetFailed, rateLimitHit };
|
|
182
|
-
}
|
|
183
|
-
/**
|
|
184
|
-
* Search for issues within specific repos using batched queries.
|
|
185
|
-
*
|
|
186
|
-
* To avoid GitHub's secondary rate limit (30 requests/minute), we batch
|
|
187
|
-
* multiple repos into a single search query using OR syntax:
|
|
188
|
-
* repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
|
|
189
|
-
*
|
|
190
|
-
* Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
|
|
191
|
-
* Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
|
|
192
|
-
* budget is used for label OR operators.
|
|
193
|
-
*
|
|
194
|
-
* This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
|
|
195
|
-
*/
|
|
196
|
-
export async function searchInRepos(octokit, vetter, repos, baseQualifiers, labels, maxResults, priority, filterFn) {
|
|
197
|
-
const candidates = [];
|
|
198
|
-
const batches = batchRepos(repos, BATCH_SIZE);
|
|
199
|
-
let failedBatches = 0;
|
|
200
|
-
let rateLimitFailures = 0;
|
|
201
|
-
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
|
|
202
|
-
const batch = batches[batchIdx];
|
|
203
|
-
if (candidates.length >= maxResults)
|
|
204
|
-
break;
|
|
205
|
-
// Delay between batches to avoid secondary rate limits
|
|
206
|
-
if (batchIdx > 0)
|
|
207
|
-
await sleep(INTER_QUERY_DELAY_MS);
|
|
208
|
-
try {
|
|
209
|
-
const repoFilter = batch.map((r) => `repo:${r}`).join(' OR ');
|
|
210
|
-
const repoOps = batch.length - 1;
|
|
211
|
-
const perPage = Math.min(30, (maxResults - candidates.length) * 3);
|
|
212
|
-
const allItems = await searchWithChunkedLabels(octokit, labels, repoOps, (labelQ) => `${baseQualifiers} ${labelQ} (${repoFilter})`.replace(/ +/g, ' ').trim(), perPage);
|
|
213
|
-
if (allItems.length > 0) {
|
|
214
|
-
const filtered = filterFn(allItems);
|
|
215
|
-
const remainingNeeded = maxResults - candidates.length;
|
|
216
|
-
const { candidates: vetted, rateLimitHit: vetRateLimitHit } = await vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, priority);
|
|
217
|
-
candidates.push(...vetted);
|
|
218
|
-
if (vetRateLimitHit)
|
|
219
|
-
rateLimitFailures++;
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
catch (error) {
|
|
223
|
-
failedBatches++;
|
|
224
|
-
if (isRateLimitError(error)) {
|
|
225
|
-
rateLimitFailures++;
|
|
226
|
-
}
|
|
227
|
-
const batchReposStr = batch.join(', ');
|
|
228
|
-
warn(MODULE, `Error searching issues in batch [${batchReposStr}]:`, errorMessage(error));
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
const allBatchesFailed = failedBatches === batches.length && batches.length > 0;
|
|
232
|
-
const rateLimitHit = rateLimitFailures > 0;
|
|
233
|
-
if (allBatchesFailed) {
|
|
234
|
-
warn(MODULE, `All ${batches.length} batch(es) failed for ${priority} phase. ` +
|
|
235
|
-
`This may indicate a systemic issue (rate limit, auth, network).`);
|
|
236
|
-
}
|
|
237
|
-
return { candidates, allBatchesFailed, rateLimitHit };
|
|
238
|
-
}
|