@oss-scout/core 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/cli.bundle.cjs +30 -30
- package/dist/commands/config.js +6 -0
- package/dist/core/issue-discovery.js +78 -27
- package/dist/core/schemas.d.ts +6 -0
- package/dist/core/schemas.js +3 -0
- package/dist/core/search-phases.d.ts +21 -0
- package/dist/core/search-phases.js +157 -11
- package/package.json +11 -12
- package/dist/core/concurrency.d.ts +0 -6
- package/dist/core/concurrency.js +0 -25
package/dist/commands/config.js
CHANGED
|
@@ -13,6 +13,7 @@ const FIELD_CONFIGS = {
|
|
|
13
13
|
minStars: { type: "number" },
|
|
14
14
|
maxIssueAgeDays: { type: "number" },
|
|
15
15
|
minRepoScoreThreshold: { type: "number" },
|
|
16
|
+
interPhaseDelayMs: { type: "number" },
|
|
16
17
|
includeDocIssues: { type: "boolean" },
|
|
17
18
|
scope: { type: "enum-array", validValues: IssueScopeSchema.options },
|
|
18
19
|
projectCategories: {
|
|
@@ -25,6 +26,8 @@ const FIELD_CONFIGS = {
|
|
|
25
26
|
validValues: SearchStrategySchema.options,
|
|
26
27
|
},
|
|
27
28
|
githubUsername: { type: "string" },
|
|
29
|
+
broadPhaseDelayMs: { type: "number" },
|
|
30
|
+
skipBroadWhenSufficientResults: { type: "number" },
|
|
28
31
|
};
|
|
29
32
|
function parseBoolean(value) {
|
|
30
33
|
const lower = value.toLowerCase();
|
|
@@ -83,6 +86,7 @@ export function runConfigShow() {
|
|
|
83
86
|
console.log(` minStars: ${prefs.minStars}`);
|
|
84
87
|
console.log(` maxIssueAgeDays: ${prefs.maxIssueAgeDays}`);
|
|
85
88
|
console.log(` minRepoScoreThreshold: ${prefs.minRepoScoreThreshold}`);
|
|
89
|
+
console.log(` interPhaseDelayMs: ${prefs.interPhaseDelayMs}ms (${(prefs.interPhaseDelayMs / 1000).toFixed(0)}s)`);
|
|
86
90
|
console.log(` includeDocIssues: ${prefs.includeDocIssues}`);
|
|
87
91
|
console.log(` projectCategories: ${formatArray(prefs.projectCategories)}`);
|
|
88
92
|
console.log(` excludeRepos: ${formatArray(prefs.excludeRepos)}`);
|
|
@@ -90,6 +94,8 @@ export function runConfigShow() {
|
|
|
90
94
|
console.log(` aiPolicyBlocklist: ${formatArray(prefs.aiPolicyBlocklist)}`);
|
|
91
95
|
console.log(` defaultStrategy: ${prefs.defaultStrategy ? formatArray(prefs.defaultStrategy) : "(all)"}`);
|
|
92
96
|
console.log(` persistence: ${prefs.persistence}`);
|
|
97
|
+
console.log(` broadPhaseDelayMs: ${prefs.broadPhaseDelayMs}ms (${(prefs.broadPhaseDelayMs / 1000).toFixed(0)}s)`);
|
|
98
|
+
console.log(` skipBroadWhenSufficientResults: ${prefs.skipBroadWhenSufficientResults}`);
|
|
93
99
|
console.log();
|
|
94
100
|
}
|
|
95
101
|
/**
|
|
@@ -21,10 +21,8 @@ import { debug, info, warn } from "./logger.js";
|
|
|
21
21
|
import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
|
|
22
22
|
import { IssueVetter } from "./issue-vetting.js";
|
|
23
23
|
import { getTopicsForCategories } from "./category-mapping.js";
|
|
24
|
-
import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, filterVetAndScore,
|
|
24
|
+
import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchWithChunkedLabels, } from "./search-phases.js";
|
|
25
25
|
const MODULE = "issue-discovery";
|
|
26
|
-
/** Delay between major search phases to let GitHub's rate limit window cool down. */
|
|
27
|
-
const INTER_PHASE_DELAY_MS = 2000;
|
|
28
26
|
/** If remaining search quota is below this, skip heavy phases (2, 3). */
|
|
29
27
|
const LOW_BUDGET_THRESHOLD = 20;
|
|
30
28
|
/** If remaining search quota is below this, only run Phase 0. */
|
|
@@ -60,27 +58,27 @@ function buildIssueFilter(config) {
|
|
|
60
58
|
};
|
|
61
59
|
}
|
|
62
60
|
/** Phase 0: Search repos where user has merged PRs (highest merge probability). */
|
|
63
|
-
async function runPhase0(octokit, vetter, repos,
|
|
61
|
+
async function runPhase0(octokit, vetter, repos, maxResults, filterIssues) {
|
|
64
62
|
info(MODULE, `Phase 0: Searching issues in ${repos.length} merged-PR repos (no label filter)...`);
|
|
65
|
-
const { candidates,
|
|
63
|
+
const { candidates, allReposFailed, rateLimitHit } = await fetchIssuesFromKnownRepos(octokit, vetter, repos, [], maxResults, "merged_pr", filterIssues);
|
|
66
64
|
info(MODULE, `Found ${candidates.length} candidates from merged-PR repos`);
|
|
67
65
|
return {
|
|
68
66
|
candidates,
|
|
69
|
-
error:
|
|
67
|
+
error: allReposFailed ? "All merged-PR repo fetches failed" : null,
|
|
70
68
|
rateLimitHit,
|
|
71
69
|
};
|
|
72
70
|
}
|
|
73
71
|
/** Phase 1: Search starred repos. */
|
|
74
|
-
async function runPhase1(octokit, vetter, repos,
|
|
72
|
+
async function runPhase1(octokit, vetter, repos, labels, maxResults, filterIssues) {
|
|
75
73
|
info(MODULE, `Phase 1: Searching issues in ${repos.length} starred repos...`);
|
|
76
|
-
// Cap labels
|
|
77
|
-
// interest, so fewer labels suffice.
|
|
74
|
+
// Cap labels: starred repos already signal user interest, so fewer labels suffice.
|
|
78
75
|
const phase1Labels = labels.slice(0, 3);
|
|
79
|
-
const
|
|
76
|
+
const reposToSearch = repos.slice(0, 10);
|
|
77
|
+
const { candidates, allReposFailed, rateLimitHit } = await fetchIssuesFromKnownRepos(octokit, vetter, reposToSearch, phase1Labels, maxResults, "starred", filterIssues);
|
|
80
78
|
info(MODULE, `Found ${candidates.length} candidates from starred repos`);
|
|
81
79
|
return {
|
|
82
80
|
candidates,
|
|
83
|
-
error:
|
|
81
|
+
error: allReposFailed ? "All starred repo fetches failed" : null,
|
|
84
82
|
rateLimitHit,
|
|
85
83
|
};
|
|
86
84
|
}
|
|
@@ -150,9 +148,36 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQual
|
|
|
150
148
|
rateLimitHit,
|
|
151
149
|
};
|
|
152
150
|
}
|
|
153
|
-
/** Phase 3: Actively maintained repos. */
|
|
154
|
-
async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
|
|
151
|
+
/** Phase 3: Actively maintained repos (REST-first, Search API fallback). */
|
|
152
|
+
async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, starredRepos, existingCandidates, filterIssues) {
|
|
155
153
|
info(MODULE, "Phase 3: Searching actively maintained repos...");
|
|
154
|
+
const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
|
|
155
|
+
// Step 1: Try REST API with starred repos first (no Search API quota used)
|
|
156
|
+
const eligibleStarred = starredRepos.filter((r) => !phase0RepoSet.has(r) && !seenRepos.has(r));
|
|
157
|
+
if (eligibleStarred.length > 0) {
|
|
158
|
+
info(MODULE, `Phase 3: Checking ${eligibleStarred.length} starred repos via REST API...`);
|
|
159
|
+
const restItems = await fetchIssuesFromMaintainedRepos(octokit, eligibleStarred.slice(0, 15), minStars, maxResults);
|
|
160
|
+
if (restItems.length > 0) {
|
|
161
|
+
try {
|
|
162
|
+
const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, restItems, filterIssues, [phase0RepoSet, seenRepos], maxResults, minStars, "Phase 3 (REST)");
|
|
163
|
+
if (candidates.length > 0) {
|
|
164
|
+
info(MODULE, `Found ${candidates.length} candidates from maintained-repo REST search`);
|
|
165
|
+
return {
|
|
166
|
+
candidates,
|
|
167
|
+
error: allVetFailed ? "all vetting failed" : null,
|
|
168
|
+
rateLimitHit: vetRateLimitHit,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
if (getHttpStatusCode(error) === 401)
|
|
174
|
+
throw error;
|
|
175
|
+
warn(MODULE, `Phase 3 REST vetting failed, falling back to Search API:`, errorMessage(error));
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// Step 2: Fall back to Search API if REST didn't yield results
|
|
180
|
+
info(MODULE, "Phase 3: Falling back to Search API...");
|
|
156
181
|
const thirtyDaysAgo = new Date();
|
|
157
182
|
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
|
|
158
183
|
const pushedSince = thirtyDaysAgo.toISOString().split("T")[0];
|
|
@@ -169,7 +194,6 @@ async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories
|
|
|
169
194
|
per_page: maxResults * 3,
|
|
170
195
|
});
|
|
171
196
|
info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
|
|
172
|
-
const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
|
|
173
197
|
const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], maxResults, minStars, "Phase 3");
|
|
174
198
|
info(MODULE, `Found ${candidates.length} candidates from maintained-repo search`);
|
|
175
199
|
return {
|
|
@@ -259,6 +283,7 @@ export class IssueDiscovery {
|
|
|
259
283
|
(scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
|
|
260
284
|
const maxResults = options.maxResults || 10;
|
|
261
285
|
const minStars = config.minStars ?? 50;
|
|
286
|
+
const interPhaseDelay = config.interPhaseDelayMs ?? 30000;
|
|
262
287
|
// Strategy selection
|
|
263
288
|
const ALL_STRATEGIES = CONCRETE_STRATEGIES;
|
|
264
289
|
const rawStrategies = options.strategies ??
|
|
@@ -333,7 +358,7 @@ export class IssueDiscovery {
|
|
|
333
358
|
if (phase0Repos.length > 0 && enabledStrategies.has("merged")) {
|
|
334
359
|
const remaining = maxResults - allCandidates.length;
|
|
335
360
|
if (remaining > 0) {
|
|
336
|
-
const result = await runPhase0(this.octokit, this.vetter, phase0Repos,
|
|
361
|
+
const result = await runPhase0(this.octokit, this.vetter, phase0Repos, remaining, filterIssues);
|
|
337
362
|
allCandidates.push(...result.candidates);
|
|
338
363
|
phaseErrors["0"] = result.error;
|
|
339
364
|
if (result.rateLimitHit)
|
|
@@ -346,12 +371,15 @@ export class IssueDiscovery {
|
|
|
346
371
|
starredRepos.length > 0 &&
|
|
347
372
|
searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
|
|
348
373
|
enabledStrategies.has("starred")) {
|
|
349
|
-
|
|
374
|
+
if (interPhaseDelay > 0) {
|
|
375
|
+
info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
|
|
376
|
+
await sleep(interPhaseDelay);
|
|
377
|
+
}
|
|
350
378
|
const reposToSearch = starredRepos.filter((r) => !phase0RepoSet.has(r));
|
|
351
379
|
if (reposToSearch.length > 0) {
|
|
352
380
|
const remaining = maxResults - allCandidates.length;
|
|
353
381
|
if (remaining > 0) {
|
|
354
|
-
const result = await runPhase1(this.octokit, this.vetter, reposToSearch,
|
|
382
|
+
const result = await runPhase1(this.octokit, this.vetter, reposToSearch, labels, remaining, filterIssues);
|
|
355
383
|
allCandidates.push(...result.candidates);
|
|
356
384
|
phaseErrors["1"] = result.error;
|
|
357
385
|
if (result.rateLimitHit)
|
|
@@ -360,26 +388,49 @@ export class IssueDiscovery {
|
|
|
360
388
|
}
|
|
361
389
|
strategiesUsed.push("starred");
|
|
362
390
|
}
|
|
363
|
-
// Phase 2: General search
|
|
391
|
+
// Phase 2: General search (with rate limit mitigation)
|
|
392
|
+
const broadDelay = config.broadPhaseDelayMs ?? 90000;
|
|
393
|
+
const skipThreshold = config.skipBroadWhenSufficientResults ?? 15;
|
|
364
394
|
if (allCandidates.length < maxResults &&
|
|
365
395
|
searchBudget >= LOW_BUDGET_THRESHOLD &&
|
|
366
396
|
enabledStrategies.has("broad")) {
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
397
|
+
// Skip broad search if we already have enough candidates
|
|
398
|
+
if (skipThreshold > 0 && allCandidates.length >= skipThreshold) {
|
|
399
|
+
info(MODULE, `Skipping broad search: already found ${allCandidates.length} candidates (threshold: ${skipThreshold})`);
|
|
400
|
+
}
|
|
401
|
+
else {
|
|
402
|
+
// Always apply baseline inter-phase delay
|
|
403
|
+
if (interPhaseDelay > 0) {
|
|
404
|
+
info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
|
|
405
|
+
await sleep(interPhaseDelay);
|
|
406
|
+
}
|
|
407
|
+
// Apply additional broad-phase cooldown, but skip if previous phases found nothing
|
|
408
|
+
if (allCandidates.length > 0 && broadDelay > 0) {
|
|
409
|
+
info(MODULE, `Waiting ${(broadDelay / 1000).toFixed(0)}s for rate limit cooldown before broad search...`);
|
|
410
|
+
await sleep(broadDelay);
|
|
411
|
+
}
|
|
412
|
+
else if (allCandidates.length === 0) {
|
|
413
|
+
info(MODULE, `Skipping broad phase delay: no results from previous phases, proceeding immediately`);
|
|
414
|
+
}
|
|
415
|
+
const remaining = maxResults - allCandidates.length;
|
|
416
|
+
const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, baseQualifiers, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
|
|
417
|
+
allCandidates.push(...result.candidates);
|
|
418
|
+
phaseErrors["2"] = result.error;
|
|
419
|
+
if (result.rateLimitHit)
|
|
420
|
+
rateLimitHitDuringSearch = true;
|
|
421
|
+
}
|
|
374
422
|
strategiesUsed.push("broad");
|
|
375
423
|
}
|
|
376
424
|
// Phase 3: Actively maintained repos
|
|
377
425
|
if (allCandidates.length < maxResults &&
|
|
378
426
|
searchBudget >= LOW_BUDGET_THRESHOLD &&
|
|
379
427
|
enabledStrategies.has("maintained")) {
|
|
380
|
-
|
|
428
|
+
if (interPhaseDelay > 0) {
|
|
429
|
+
info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
|
|
430
|
+
await sleep(interPhaseDelay);
|
|
431
|
+
}
|
|
381
432
|
const remaining = maxResults - allCandidates.length;
|
|
382
|
-
const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
|
|
433
|
+
const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, starredRepos, allCandidates, filterIssues);
|
|
383
434
|
allCandidates.push(...result.candidates);
|
|
384
435
|
phaseErrors["3"] = result.error;
|
|
385
436
|
if (result.rateLimitHit)
|
package/dist/core/schemas.d.ts
CHANGED
|
@@ -203,6 +203,7 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
|
|
|
203
203
|
maxIssueAgeDays: z.ZodDefault<z.ZodNumber>;
|
|
204
204
|
includeDocIssues: z.ZodDefault<z.ZodBoolean>;
|
|
205
205
|
minRepoScoreThreshold: z.ZodDefault<z.ZodNumber>;
|
|
206
|
+
interPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
206
207
|
persistence: z.ZodDefault<z.ZodEnum<{
|
|
207
208
|
local: "local";
|
|
208
209
|
gist: "gist";
|
|
@@ -214,6 +215,8 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
|
|
|
214
215
|
broad: "broad";
|
|
215
216
|
maintained: "maintained";
|
|
216
217
|
}>>>;
|
|
218
|
+
broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
219
|
+
skipBroadWhenSufficientResults: z.ZodDefault<z.ZodNumber>;
|
|
217
220
|
}, z.core.$strip>;
|
|
218
221
|
export declare const ScoutStateSchema: z.ZodObject<{
|
|
219
222
|
version: z.ZodLiteral<1>;
|
|
@@ -241,6 +244,7 @@ export declare const ScoutStateSchema: z.ZodObject<{
|
|
|
241
244
|
maxIssueAgeDays: z.ZodDefault<z.ZodNumber>;
|
|
242
245
|
includeDocIssues: z.ZodDefault<z.ZodBoolean>;
|
|
243
246
|
minRepoScoreThreshold: z.ZodDefault<z.ZodNumber>;
|
|
247
|
+
interPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
244
248
|
persistence: z.ZodDefault<z.ZodEnum<{
|
|
245
249
|
local: "local";
|
|
246
250
|
gist: "gist";
|
|
@@ -252,6 +256,8 @@ export declare const ScoutStateSchema: z.ZodObject<{
|
|
|
252
256
|
broad: "broad";
|
|
253
257
|
maintained: "maintained";
|
|
254
258
|
}>>>;
|
|
259
|
+
broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
260
|
+
skipBroadWhenSufficientResults: z.ZodDefault<z.ZodNumber>;
|
|
255
261
|
}, z.core.$strip>>;
|
|
256
262
|
repoScores: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
257
263
|
repo: z.ZodString;
|
package/dist/core/schemas.js
CHANGED
|
@@ -146,8 +146,11 @@ export const ScoutPreferencesSchema = z.object({
|
|
|
146
146
|
maxIssueAgeDays: z.number().default(90),
|
|
147
147
|
includeDocIssues: z.boolean().default(true),
|
|
148
148
|
minRepoScoreThreshold: z.number().default(4),
|
|
149
|
+
interPhaseDelayMs: z.number().min(0).max(120000).default(30000),
|
|
149
150
|
persistence: PersistenceModeSchema.default("local"),
|
|
150
151
|
defaultStrategy: z.array(SearchStrategySchema).optional(),
|
|
152
|
+
broadPhaseDelayMs: z.number().min(0).max(300000).default(90000),
|
|
153
|
+
skipBroadWhenSufficientResults: z.number().int().min(0).max(100).default(15),
|
|
151
154
|
});
|
|
152
155
|
// ── Root state schema ───────────────────────────────────────────────
|
|
153
156
|
export const ScoutStateSchema = z.object({
|
|
@@ -26,6 +26,27 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
|
|
|
26
26
|
total_count: number;
|
|
27
27
|
items: GitHubSearchItem[];
|
|
28
28
|
}>;
|
|
29
|
+
/**
|
|
30
|
+
* Fetch issues from maintained repos using REST API (no Search API quota).
|
|
31
|
+
*
|
|
32
|
+
* Checks each repo for recent push activity and star threshold,
|
|
33
|
+
* then fetches open issues via `GET /repos/{owner}/{repo}/issues`.
|
|
34
|
+
* Falls back to the caller to use Search API if this doesn't yield enough.
|
|
35
|
+
*/
|
|
36
|
+
export declare function fetchIssuesFromMaintainedRepos(octokit: Octokit, repos: string[], minStars: number, maxResults: number): Promise<GitHubSearchItem[]>;
|
|
37
|
+
/**
|
|
38
|
+
* Fetch open issues from known repos using REST API (no Search API quota).
|
|
39
|
+
* Used by Phase 0 (merged-PR repos) and Phase 1 (starred repos).
|
|
40
|
+
*
|
|
41
|
+
* Instead of the Search API (`octokit.search.issuesAndPullRequests`), this
|
|
42
|
+
* calls `GET /repos/{owner}/{repo}/issues` which counts against the much
|
|
43
|
+
* larger Core API rate limit and avoids consuming the scarce Search quota.
|
|
44
|
+
*/
|
|
45
|
+
export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
|
|
46
|
+
candidates: IssueCandidate[];
|
|
47
|
+
allReposFailed: boolean;
|
|
48
|
+
rateLimitHit: boolean;
|
|
49
|
+
}>;
|
|
29
50
|
/**
|
|
30
51
|
* Search across chunked labels with deduplication.
|
|
31
52
|
*
|
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
* caching, spam-filtering, and batched repo search logic.
|
|
6
6
|
*/
|
|
7
7
|
import { SCOPE_LABELS, } from "./types.js";
|
|
8
|
-
import { errorMessage, isRateLimitError } from "./errors.js";
|
|
8
|
+
import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
|
|
9
9
|
import { debug, warn } from "./logger.js";
|
|
10
|
-
import { getHttpCache
|
|
10
|
+
import { getHttpCache } from "./http-cache.js";
|
|
11
11
|
import { detectLabelFarmingRepos, } from "./issue-filtering.js";
|
|
12
12
|
import { extractRepoFromUrl, sleep } from "./utils.js";
|
|
13
13
|
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
@@ -95,20 +95,166 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
|
|
|
95
95
|
*/
|
|
96
96
|
export async function cachedSearchIssues(octokit, params) {
|
|
97
97
|
const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
98
|
+
const cache = getHttpCache();
|
|
99
|
+
// Check cache first
|
|
100
|
+
const cached = cache.getIfFresh(cacheKey, SEARCH_CACHE_TTL_MS);
|
|
101
|
+
if (cached) {
|
|
102
|
+
debug(MODULE, `Search cache hit for query`);
|
|
103
|
+
return cached;
|
|
104
|
+
}
|
|
105
|
+
// Fetch from API
|
|
106
|
+
const tracker = getSearchBudgetTracker();
|
|
107
|
+
await tracker.waitForBudget();
|
|
108
|
+
let data;
|
|
109
|
+
try {
|
|
110
|
+
const response = await octokit.search.issuesAndPullRequests(params);
|
|
111
|
+
data = response.data;
|
|
112
|
+
}
|
|
113
|
+
finally {
|
|
114
|
+
tracker.recordCall();
|
|
115
|
+
}
|
|
116
|
+
// Only cache non-empty results to prevent poisoning from rate-limited responses
|
|
117
|
+
if (data.items.length > 0) {
|
|
118
|
+
cache.set(cacheKey, "", data);
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
debug(MODULE, `Skipping cache for empty search result (possible rate limit artifact)`);
|
|
122
|
+
}
|
|
123
|
+
return data;
|
|
124
|
+
}
|
|
125
|
+
// ── REST-based search functions ──
|
|
126
|
+
/**
|
|
127
|
+
* Fetch issues from maintained repos using REST API (no Search API quota).
|
|
128
|
+
*
|
|
129
|
+
* Checks each repo for recent push activity and star threshold,
|
|
130
|
+
* then fetches open issues via `GET /repos/{owner}/{repo}/issues`.
|
|
131
|
+
* Falls back to the caller to use Search API if this doesn't yield enough.
|
|
132
|
+
*/
|
|
133
|
+
export async function fetchIssuesFromMaintainedRepos(octokit, repos, minStars, maxResults) {
|
|
134
|
+
const items = [];
|
|
135
|
+
for (const repoFullName of repos) {
|
|
136
|
+
if (items.length >= maxResults * 3)
|
|
137
|
+
break;
|
|
138
|
+
const [owner, repo] = repoFullName.split("/");
|
|
139
|
+
if (!owner || !repo)
|
|
140
|
+
continue;
|
|
101
141
|
try {
|
|
102
|
-
const { data } = await octokit.
|
|
103
|
-
|
|
142
|
+
const { data: repoData } = await octokit.repos.get({ owner, repo });
|
|
143
|
+
if (!repoData.pushed_at)
|
|
144
|
+
continue;
|
|
145
|
+
const pushedAt = new Date(repoData.pushed_at);
|
|
146
|
+
const thirtyDaysAgo = new Date();
|
|
147
|
+
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
|
|
148
|
+
if (pushedAt < thirtyDaysAgo)
|
|
149
|
+
continue;
|
|
150
|
+
if ((repoData.stargazers_count ?? 0) < minStars)
|
|
151
|
+
continue;
|
|
152
|
+
if (repoData.archived)
|
|
153
|
+
continue;
|
|
154
|
+
const { data: issues } = await octokit.issues.listForRepo({
|
|
155
|
+
owner,
|
|
156
|
+
repo,
|
|
157
|
+
state: "open",
|
|
158
|
+
sort: "created",
|
|
159
|
+
direction: "desc",
|
|
160
|
+
per_page: 5,
|
|
161
|
+
});
|
|
162
|
+
// Filter out pull requests and assigned issues (REST endpoint returns both)
|
|
163
|
+
const realIssues = issues.filter((i) => !i.pull_request && !i.assignee);
|
|
164
|
+
for (const issue of realIssues) {
|
|
165
|
+
items.push({
|
|
166
|
+
html_url: issue.html_url,
|
|
167
|
+
repository_url: `https://api.github.com/repos/${repoFullName}`,
|
|
168
|
+
updated_at: issue.updated_at ?? "",
|
|
169
|
+
title: issue.title,
|
|
170
|
+
labels: issue.labels,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
await sleep(INTER_QUERY_DELAY_MS);
|
|
104
174
|
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
175
|
+
catch (error) {
|
|
176
|
+
if (getHttpStatusCode(error) === 401)
|
|
177
|
+
throw error;
|
|
178
|
+
if (isRateLimitError(error)) {
|
|
179
|
+
warn(MODULE, `Rate limit hit fetching issues from ${repoFullName}:`, errorMessage(error));
|
|
180
|
+
break;
|
|
181
|
+
}
|
|
182
|
+
warn(MODULE, `Error fetching issues from ${repoFullName}:`, errorMessage(error));
|
|
108
183
|
}
|
|
109
|
-
}
|
|
184
|
+
}
|
|
185
|
+
return items;
|
|
110
186
|
}
|
|
111
187
|
// ── Search infrastructure ──
|
|
188
|
+
/**
|
|
189
|
+
* Fetch open issues from known repos using REST API (no Search API quota).
|
|
190
|
+
* Used by Phase 0 (merged-PR repos) and Phase 1 (starred repos).
|
|
191
|
+
*
|
|
192
|
+
* Instead of the Search API (`octokit.search.issuesAndPullRequests`), this
|
|
193
|
+
* calls `GET /repos/{owner}/{repo}/issues` which counts against the much
|
|
194
|
+
* larger Core API rate limit and avoids consuming the scarce Search quota.
|
|
195
|
+
*/
|
|
196
|
+
export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn) {
|
|
197
|
+
const candidates = [];
|
|
198
|
+
let failedRepos = 0;
|
|
199
|
+
let rateLimitFailures = 0;
|
|
200
|
+
for (let i = 0; i < repos.length; i++) {
|
|
201
|
+
if (candidates.length >= maxResults)
|
|
202
|
+
break;
|
|
203
|
+
// Delay between repos to avoid REST secondary rate limits
|
|
204
|
+
if (i > 0)
|
|
205
|
+
await sleep(INTER_QUERY_DELAY_MS);
|
|
206
|
+
const repoFullName = repos[i];
|
|
207
|
+
const [owner, repo] = repoFullName.split("/");
|
|
208
|
+
try {
|
|
209
|
+
const response = await octokit.issues.listForRepo({
|
|
210
|
+
owner,
|
|
211
|
+
repo,
|
|
212
|
+
state: "open",
|
|
213
|
+
sort: "created",
|
|
214
|
+
direction: "desc",
|
|
215
|
+
per_page: 5,
|
|
216
|
+
...(labels.length > 0 ? { labels: labels.join(",") } : {}),
|
|
217
|
+
});
|
|
218
|
+
// Filter out pull requests (REST issues endpoint returns both) and assigned issues
|
|
219
|
+
const issuesOnly = response.data.filter((item) => !("pull_request" in item) && !item.assignee);
|
|
220
|
+
const mapped = issuesOnly.map((issue) => ({
|
|
221
|
+
html_url: issue.html_url,
|
|
222
|
+
repository_url: `https://api.github.com/repos/${repoFullName}`,
|
|
223
|
+
updated_at: issue.updated_at ?? "",
|
|
224
|
+
title: issue.title,
|
|
225
|
+
labels: issue.labels,
|
|
226
|
+
}));
|
|
227
|
+
if (mapped.length > 0) {
|
|
228
|
+
const filtered = filterFn(mapped);
|
|
229
|
+
if (filtered.length > 0) {
|
|
230
|
+
const remainingNeeded = maxResults - candidates.length;
|
|
231
|
+
const { candidates: vetted, rateLimitHit: vetRateLimitHit } = await vetter.vetIssuesParallel(filtered
|
|
232
|
+
.slice(0, remainingNeeded * 2)
|
|
233
|
+
.map((item) => item.html_url), remainingNeeded, priority);
|
|
234
|
+
candidates.push(...vetted);
|
|
235
|
+
if (vetRateLimitHit)
|
|
236
|
+
rateLimitFailures++;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
catch (error) {
|
|
241
|
+
if (getHttpStatusCode(error) === 401)
|
|
242
|
+
throw error;
|
|
243
|
+
failedRepos++;
|
|
244
|
+
if (isRateLimitError(error)) {
|
|
245
|
+
rateLimitFailures++;
|
|
246
|
+
}
|
|
247
|
+
warn(MODULE, `Error fetching issues from ${repoFullName}:`, errorMessage(error));
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
const allReposFailed = failedRepos === repos.length && repos.length > 0;
|
|
251
|
+
const rateLimitHit = rateLimitFailures > 0;
|
|
252
|
+
if (allReposFailed) {
|
|
253
|
+
warn(MODULE, `All ${repos.length} repo(s) failed for ${priority} phase. ` +
|
|
254
|
+
`This may indicate a systemic issue (rate limit, auth, network).`);
|
|
255
|
+
}
|
|
256
|
+
return { candidates, allReposFailed, rateLimitHit };
|
|
257
|
+
}
|
|
112
258
|
/**
|
|
113
259
|
* Search across chunked labels with deduplication.
|
|
114
260
|
*
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oss-scout/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -21,16 +21,6 @@
|
|
|
21
21
|
"!dist/**/*.map",
|
|
22
22
|
"!dist/core/test-utils.*"
|
|
23
23
|
],
|
|
24
|
-
"scripts": {
|
|
25
|
-
"build": "tsc",
|
|
26
|
-
"bundle": "esbuild src/cli.ts --bundle --platform=node --target=node20 --format=cjs --minify --sourcemap --outfile=dist/cli.bundle.cjs",
|
|
27
|
-
"start": "tsx src/cli.ts",
|
|
28
|
-
"typecheck": "tsc --noEmit",
|
|
29
|
-
"test": "vitest run",
|
|
30
|
-
"test:coverage": "vitest run --coverage",
|
|
31
|
-
"test:watch": "vitest",
|
|
32
|
-
"prepublishOnly": "pnpm run build && pnpm run bundle"
|
|
33
|
-
},
|
|
34
24
|
"keywords": [
|
|
35
25
|
"open-source",
|
|
36
26
|
"github",
|
|
@@ -70,5 +60,14 @@
|
|
|
70
60
|
"tsx": "^4.21.0",
|
|
71
61
|
"typescript": "^5.9.3",
|
|
72
62
|
"vitest": "^4.1.0"
|
|
63
|
+
},
|
|
64
|
+
"scripts": {
|
|
65
|
+
"build": "tsc",
|
|
66
|
+
"bundle": "esbuild src/cli.ts --bundle --platform=node --target=node20 --format=cjs --minify --sourcemap --outfile=dist/cli.bundle.cjs",
|
|
67
|
+
"start": "tsx src/cli.ts",
|
|
68
|
+
"typecheck": "tsc --noEmit",
|
|
69
|
+
"test": "vitest run",
|
|
70
|
+
"test:coverage": "vitest run --coverage",
|
|
71
|
+
"test:watch": "vitest"
|
|
73
72
|
}
|
|
74
|
-
}
|
|
73
|
+
}
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Runs a worker pool that processes items with bounded concurrency.
|
|
3
|
-
* N workers consume from a shared index. On any worker error, remaining
|
|
4
|
-
* workers are aborted via a shared flag and the error is propagated.
|
|
5
|
-
*/
|
|
6
|
-
export declare function runWorkerPool<T>(items: T[], worker: (item: T) => Promise<void>, concurrency: number): Promise<void>;
|
package/dist/core/concurrency.js
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Runs a worker pool that processes items with bounded concurrency.
|
|
3
|
-
* N workers consume from a shared index. On any worker error, remaining
|
|
4
|
-
* workers are aborted via a shared flag and the error is propagated.
|
|
5
|
-
*/
|
|
6
|
-
export async function runWorkerPool(items, worker, concurrency) {
|
|
7
|
-
let index = 0;
|
|
8
|
-
let aborted = false;
|
|
9
|
-
const poolWorker = async () => {
|
|
10
|
-
while (index < items.length) {
|
|
11
|
-
if (aborted)
|
|
12
|
-
break;
|
|
13
|
-
const item = items[index++];
|
|
14
|
-
try {
|
|
15
|
-
await worker(item);
|
|
16
|
-
}
|
|
17
|
-
catch (err) {
|
|
18
|
-
aborted = true;
|
|
19
|
-
throw err;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
};
|
|
23
|
-
const workerCount = Math.min(concurrency, items.length);
|
|
24
|
-
await Promise.all(Array.from({ length: workerCount }, () => poolWorker()));
|
|
25
|
-
}
|