@oss-scout/core 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ const FIELD_CONFIGS = {
13
13
  minStars: { type: "number" },
14
14
  maxIssueAgeDays: { type: "number" },
15
15
  minRepoScoreThreshold: { type: "number" },
16
+ interPhaseDelayMs: { type: "number" },
16
17
  includeDocIssues: { type: "boolean" },
17
18
  scope: { type: "enum-array", validValues: IssueScopeSchema.options },
18
19
  projectCategories: {
@@ -25,6 +26,8 @@ const FIELD_CONFIGS = {
25
26
  validValues: SearchStrategySchema.options,
26
27
  },
27
28
  githubUsername: { type: "string" },
29
+ broadPhaseDelayMs: { type: "number" },
30
+ skipBroadWhenSufficientResults: { type: "number" },
28
31
  };
29
32
  function parseBoolean(value) {
30
33
  const lower = value.toLowerCase();
@@ -83,6 +86,7 @@ export function runConfigShow() {
83
86
  console.log(` minStars: ${prefs.minStars}`);
84
87
  console.log(` maxIssueAgeDays: ${prefs.maxIssueAgeDays}`);
85
88
  console.log(` minRepoScoreThreshold: ${prefs.minRepoScoreThreshold}`);
89
+ console.log(` interPhaseDelayMs: ${prefs.interPhaseDelayMs}ms (${(prefs.interPhaseDelayMs / 1000).toFixed(0)}s)`);
86
90
  console.log(` includeDocIssues: ${prefs.includeDocIssues}`);
87
91
  console.log(` projectCategories: ${formatArray(prefs.projectCategories)}`);
88
92
  console.log(` excludeRepos: ${formatArray(prefs.excludeRepos)}`);
@@ -90,6 +94,8 @@ export function runConfigShow() {
90
94
  console.log(` aiPolicyBlocklist: ${formatArray(prefs.aiPolicyBlocklist)}`);
91
95
  console.log(` defaultStrategy: ${prefs.defaultStrategy ? formatArray(prefs.defaultStrategy) : "(all)"}`);
92
96
  console.log(` persistence: ${prefs.persistence}`);
97
+ console.log(` broadPhaseDelayMs: ${prefs.broadPhaseDelayMs}ms (${(prefs.broadPhaseDelayMs / 1000).toFixed(0)}s)`);
98
+ console.log(` skipBroadWhenSufficientResults: ${prefs.skipBroadWhenSufficientResults}`);
93
99
  console.log();
94
100
  }
95
101
  /**
@@ -21,10 +21,8 @@ import { debug, info, warn } from "./logger.js";
21
21
  import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
22
22
  import { IssueVetter } from "./issue-vetting.js";
23
23
  import { getTopicsForCategories } from "./category-mapping.js";
24
- import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, filterVetAndScore, searchInRepos, searchWithChunkedLabels, } from "./search-phases.js";
24
+ import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchWithChunkedLabels, } from "./search-phases.js";
25
25
  const MODULE = "issue-discovery";
26
- /** Delay between major search phases to let GitHub's rate limit window cool down. */
27
- const INTER_PHASE_DELAY_MS = 2000;
28
26
  /** If remaining search quota is below this, skip heavy phases (2, 3). */
29
27
  const LOW_BUDGET_THRESHOLD = 20;
30
28
  /** If remaining search quota is below this, only run Phase 0. */
@@ -60,27 +58,27 @@ function buildIssueFilter(config) {
60
58
  };
61
59
  }
62
60
  /** Phase 0: Search repos where user has merged PRs (highest merge probability). */
63
- async function runPhase0(octokit, vetter, repos, baseQualifiers, maxResults, filterIssues) {
61
+ async function runPhase0(octokit, vetter, repos, maxResults, filterIssues) {
64
62
  info(MODULE, `Phase 0: Searching issues in ${repos.length} merged-PR repos (no label filter)...`);
65
- const { candidates, allBatchesFailed, rateLimitHit } = await searchInRepos(octokit, vetter, repos, baseQualifiers, [], maxResults, "merged_pr", filterIssues);
63
+ const { candidates, allReposFailed, rateLimitHit } = await fetchIssuesFromKnownRepos(octokit, vetter, repos, [], maxResults, "merged_pr", filterIssues);
66
64
  info(MODULE, `Found ${candidates.length} candidates from merged-PR repos`);
67
65
  return {
68
66
  candidates,
69
- error: allBatchesFailed ? "All merged-PR repo batches failed" : null,
67
+ error: allReposFailed ? "All merged-PR repo fetches failed" : null,
70
68
  rateLimitHit,
71
69
  };
72
70
  }
73
71
  /** Phase 1: Search starred repos. */
74
- async function runPhase1(octokit, vetter, repos, baseQualifiers, labels, maxResults, filterIssues) {
72
+ async function runPhase1(octokit, vetter, repos, labels, maxResults, filterIssues) {
75
73
  info(MODULE, `Phase 1: Searching issues in ${repos.length} starred repos...`);
76
- // Cap labels to reduce Search API calls: starred repos already signal user
77
- // interest, so fewer labels suffice.
74
+ // Cap labels: starred repos already signal user interest, so fewer labels suffice.
78
75
  const phase1Labels = labels.slice(0, 3);
79
- const { candidates, allBatchesFailed, rateLimitHit } = await searchInRepos(octokit, vetter, repos.slice(0, 10), baseQualifiers, phase1Labels, maxResults, "starred", filterIssues);
76
+ const reposToSearch = repos.slice(0, 10);
77
+ const { candidates, allReposFailed, rateLimitHit } = await fetchIssuesFromKnownRepos(octokit, vetter, reposToSearch, phase1Labels, maxResults, "starred", filterIssues);
80
78
  info(MODULE, `Found ${candidates.length} candidates from starred repos`);
81
79
  return {
82
80
  candidates,
83
- error: allBatchesFailed ? "All starred repo batches failed" : null,
81
+ error: allReposFailed ? "All starred repo fetches failed" : null,
84
82
  rateLimitHit,
85
83
  };
86
84
  }
@@ -150,9 +148,36 @@ async function runPhase2(octokit, vetter, scopes, labels, configLabels, baseQual
150
148
  rateLimitHit,
151
149
  };
152
150
  }
153
- /** Phase 3: Actively maintained repos. */
154
- async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, existingCandidates, filterIssues) {
151
+ /** Phase 3: Actively maintained repos (REST-first, Search API fallback). */
152
+ async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories, maxResults, phase0RepoSet, starredRepoSet, starredRepos, existingCandidates, filterIssues) {
155
153
  info(MODULE, "Phase 3: Searching actively maintained repos...");
154
+ const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
155
+ // Step 1: Try REST API with starred repos first (no Search API quota used)
156
+ const eligibleStarred = starredRepos.filter((r) => !phase0RepoSet.has(r) && !seenRepos.has(r));
157
+ if (eligibleStarred.length > 0) {
158
+ info(MODULE, `Phase 3: Checking ${eligibleStarred.length} starred repos via REST API...`);
159
+ const restItems = await fetchIssuesFromMaintainedRepos(octokit, eligibleStarred.slice(0, 15), minStars, maxResults);
160
+ if (restItems.length > 0) {
161
+ try {
162
+ const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, restItems, filterIssues, [phase0RepoSet, seenRepos], maxResults, minStars, "Phase 3 (REST)");
163
+ if (candidates.length > 0) {
164
+ info(MODULE, `Found ${candidates.length} candidates from maintained-repo REST search`);
165
+ return {
166
+ candidates,
167
+ error: allVetFailed ? "all vetting failed" : null,
168
+ rateLimitHit: vetRateLimitHit,
169
+ };
170
+ }
171
+ }
172
+ catch (error) {
173
+ if (getHttpStatusCode(error) === 401)
174
+ throw error;
175
+ warn(MODULE, `Phase 3 REST vetting failed, falling back to Search API:`, errorMessage(error));
176
+ }
177
+ }
178
+ }
179
+ // Step 2: Fall back to Search API if REST didn't yield results
180
+ info(MODULE, "Phase 3: Falling back to Search API...");
156
181
  const thirtyDaysAgo = new Date();
157
182
  thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
158
183
  const pushedSince = thirtyDaysAgo.toISOString().split("T")[0];
@@ -169,7 +194,6 @@ async function runPhase3(octokit, vetter, langQuery, minStars, projectCategories
169
194
  per_page: maxResults * 3,
170
195
  });
171
196
  info(MODULE, `Found ${data.total_count} issues in maintained-repo search, processing top ${data.items.length}...`);
172
- const seenRepos = new Set(existingCandidates.map((c) => c.issue.repo));
173
197
  const { candidates, allVetFailed, rateLimitHit: vetRateLimitHit, } = await filterVetAndScore(vetter, data.items, filterIssues, [phase0RepoSet, starredRepoSet, seenRepos], maxResults, minStars, "Phase 3");
174
198
  info(MODULE, `Found ${candidates.length} candidates from maintained-repo search`);
175
199
  return {
@@ -259,6 +283,7 @@ export class IssueDiscovery {
259
283
  (scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
260
284
  const maxResults = options.maxResults || 10;
261
285
  const minStars = config.minStars ?? 50;
286
+ const interPhaseDelay = config.interPhaseDelayMs ?? 30000;
262
287
  // Strategy selection
263
288
  const ALL_STRATEGIES = CONCRETE_STRATEGIES;
264
289
  const rawStrategies = options.strategies ??
@@ -333,7 +358,7 @@ export class IssueDiscovery {
333
358
  if (phase0Repos.length > 0 && enabledStrategies.has("merged")) {
334
359
  const remaining = maxResults - allCandidates.length;
335
360
  if (remaining > 0) {
336
- const result = await runPhase0(this.octokit, this.vetter, phase0Repos, baseQualifiers, remaining, filterIssues);
361
+ const result = await runPhase0(this.octokit, this.vetter, phase0Repos, remaining, filterIssues);
337
362
  allCandidates.push(...result.candidates);
338
363
  phaseErrors["0"] = result.error;
339
364
  if (result.rateLimitHit)
@@ -346,12 +371,15 @@ export class IssueDiscovery {
346
371
  starredRepos.length > 0 &&
347
372
  searchBudget >= CRITICAL_BUDGET_THRESHOLD &&
348
373
  enabledStrategies.has("starred")) {
349
- await sleep(INTER_PHASE_DELAY_MS);
374
+ if (interPhaseDelay > 0) {
375
+ info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
376
+ await sleep(interPhaseDelay);
377
+ }
350
378
  const reposToSearch = starredRepos.filter((r) => !phase0RepoSet.has(r));
351
379
  if (reposToSearch.length > 0) {
352
380
  const remaining = maxResults - allCandidates.length;
353
381
  if (remaining > 0) {
354
- const result = await runPhase1(this.octokit, this.vetter, reposToSearch, baseQualifiers, labels, remaining, filterIssues);
382
+ const result = await runPhase1(this.octokit, this.vetter, reposToSearch, labels, remaining, filterIssues);
355
383
  allCandidates.push(...result.candidates);
356
384
  phaseErrors["1"] = result.error;
357
385
  if (result.rateLimitHit)
@@ -360,26 +388,49 @@ export class IssueDiscovery {
360
388
  }
361
389
  strategiesUsed.push("starred");
362
390
  }
363
- // Phase 2: General search
391
+ // Phase 2: General search (with rate limit mitigation)
392
+ const broadDelay = config.broadPhaseDelayMs ?? 90000;
393
+ const skipThreshold = config.skipBroadWhenSufficientResults ?? 15;
364
394
  if (allCandidates.length < maxResults &&
365
395
  searchBudget >= LOW_BUDGET_THRESHOLD &&
366
396
  enabledStrategies.has("broad")) {
367
- await sleep(INTER_PHASE_DELAY_MS);
368
- const remaining = maxResults - allCandidates.length;
369
- const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, baseQualifiers, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
370
- allCandidates.push(...result.candidates);
371
- phaseErrors["2"] = result.error;
372
- if (result.rateLimitHit)
373
- rateLimitHitDuringSearch = true;
397
+ // Skip broad search if we already have enough candidates
398
+ if (skipThreshold > 0 && allCandidates.length >= skipThreshold) {
399
+ info(MODULE, `Skipping broad search: already found ${allCandidates.length} candidates (threshold: ${skipThreshold})`);
400
+ }
401
+ else {
402
+ // Always apply baseline inter-phase delay
403
+ if (interPhaseDelay > 0) {
404
+ info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
405
+ await sleep(interPhaseDelay);
406
+ }
407
+ // Apply additional broad-phase cooldown, but skip if previous phases found nothing
408
+ if (allCandidates.length > 0 && broadDelay > 0) {
409
+ info(MODULE, `Waiting ${(broadDelay / 1000).toFixed(0)}s for rate limit cooldown before broad search...`);
410
+ await sleep(broadDelay);
411
+ }
412
+ else if (allCandidates.length === 0) {
413
+ info(MODULE, `Skipping broad phase delay: no results from previous phases, proceeding immediately`);
414
+ }
415
+ const remaining = maxResults - allCandidates.length;
416
+ const result = await runPhase2(this.octokit, this.vetter, scopes, labels, config.labels, baseQualifiers, remaining, minStars, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
417
+ allCandidates.push(...result.candidates);
418
+ phaseErrors["2"] = result.error;
419
+ if (result.rateLimitHit)
420
+ rateLimitHitDuringSearch = true;
421
+ }
374
422
  strategiesUsed.push("broad");
375
423
  }
376
424
  // Phase 3: Actively maintained repos
377
425
  if (allCandidates.length < maxResults &&
378
426
  searchBudget >= LOW_BUDGET_THRESHOLD &&
379
427
  enabledStrategies.has("maintained")) {
380
- await sleep(INTER_PHASE_DELAY_MS);
428
+ if (interPhaseDelay > 0) {
429
+ info(MODULE, `Waiting ${(interPhaseDelay / 1000).toFixed(0)}s between phases for rate limit management...`);
430
+ await sleep(interPhaseDelay);
431
+ }
381
432
  const remaining = maxResults - allCandidates.length;
382
- const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, allCandidates, filterIssues);
433
+ const result = await runPhase3(this.octokit, this.vetter, langQuery, minStars, config.projectCategories ?? [], remaining, phase0RepoSet, starredRepoSet, starredRepos, allCandidates, filterIssues);
383
434
  allCandidates.push(...result.candidates);
384
435
  phaseErrors["3"] = result.error;
385
436
  if (result.rateLimitHit)
@@ -203,6 +203,7 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
203
203
  maxIssueAgeDays: z.ZodDefault<z.ZodNumber>;
204
204
  includeDocIssues: z.ZodDefault<z.ZodBoolean>;
205
205
  minRepoScoreThreshold: z.ZodDefault<z.ZodNumber>;
206
+ interPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
206
207
  persistence: z.ZodDefault<z.ZodEnum<{
207
208
  local: "local";
208
209
  gist: "gist";
@@ -214,6 +215,8 @@ export declare const ScoutPreferencesSchema: z.ZodObject<{
214
215
  broad: "broad";
215
216
  maintained: "maintained";
216
217
  }>>>;
218
+ broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
219
+ skipBroadWhenSufficientResults: z.ZodDefault<z.ZodNumber>;
217
220
  }, z.core.$strip>;
218
221
  export declare const ScoutStateSchema: z.ZodObject<{
219
222
  version: z.ZodLiteral<1>;
@@ -241,6 +244,7 @@ export declare const ScoutStateSchema: z.ZodObject<{
241
244
  maxIssueAgeDays: z.ZodDefault<z.ZodNumber>;
242
245
  includeDocIssues: z.ZodDefault<z.ZodBoolean>;
243
246
  minRepoScoreThreshold: z.ZodDefault<z.ZodNumber>;
247
+ interPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
244
248
  persistence: z.ZodDefault<z.ZodEnum<{
245
249
  local: "local";
246
250
  gist: "gist";
@@ -252,6 +256,8 @@ export declare const ScoutStateSchema: z.ZodObject<{
252
256
  broad: "broad";
253
257
  maintained: "maintained";
254
258
  }>>>;
259
+ broadPhaseDelayMs: z.ZodDefault<z.ZodNumber>;
260
+ skipBroadWhenSufficientResults: z.ZodDefault<z.ZodNumber>;
255
261
  }, z.core.$strip>>;
256
262
  repoScores: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
257
263
  repo: z.ZodString;
@@ -146,8 +146,11 @@ export const ScoutPreferencesSchema = z.object({
146
146
  maxIssueAgeDays: z.number().default(90),
147
147
  includeDocIssues: z.boolean().default(true),
148
148
  minRepoScoreThreshold: z.number().default(4),
149
+ interPhaseDelayMs: z.number().min(0).max(120000).default(30000),
149
150
  persistence: PersistenceModeSchema.default("local"),
150
151
  defaultStrategy: z.array(SearchStrategySchema).optional(),
152
+ broadPhaseDelayMs: z.number().min(0).max(300000).default(90000),
153
+ skipBroadWhenSufficientResults: z.number().int().min(0).max(100).default(15),
151
154
  });
152
155
  // ── Root state schema ───────────────────────────────────────────────
153
156
  export const ScoutStateSchema = z.object({
@@ -26,6 +26,27 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
26
26
  total_count: number;
27
27
  items: GitHubSearchItem[];
28
28
  }>;
29
+ /**
30
+ * Fetch issues from maintained repos using REST API (no Search API quota).
31
+ *
32
+ * Checks each repo for recent push activity and star threshold,
33
+ * then fetches open issues via `GET /repos/{owner}/{repo}/issues`.
34
+ * Falls back to the caller to use Search API if this doesn't yield enough.
35
+ */
36
+ export declare function fetchIssuesFromMaintainedRepos(octokit: Octokit, repos: string[], minStars: number, maxResults: number): Promise<GitHubSearchItem[]>;
37
+ /**
38
+ * Fetch open issues from known repos using REST API (no Search API quota).
39
+ * Used by Phase 0 (merged-PR repos) and Phase 1 (starred repos).
40
+ *
41
+ * Instead of the Search API (`octokit.search.issuesAndPullRequests`), this
42
+ * calls `GET /repos/{owner}/{repo}/issues` which counts against the much
43
+ * larger Core API rate limit and avoids consuming the scarce Search quota.
44
+ */
45
+ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
46
+ candidates: IssueCandidate[];
47
+ allReposFailed: boolean;
48
+ rateLimitHit: boolean;
49
+ }>;
29
50
  /**
30
51
  * Search across chunked labels with deduplication.
31
52
  *
@@ -5,9 +5,9 @@
5
5
  * caching, spam-filtering, and batched repo search logic.
6
6
  */
7
7
  import { SCOPE_LABELS, } from "./types.js";
8
- import { errorMessage, isRateLimitError } from "./errors.js";
8
+ import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
9
9
  import { debug, warn } from "./logger.js";
10
- import { getHttpCache, cachedTimeBased } from "./http-cache.js";
10
+ import { getHttpCache } from "./http-cache.js";
11
11
  import { detectLabelFarmingRepos, } from "./issue-filtering.js";
12
12
  import { extractRepoFromUrl, sleep } from "./utils.js";
13
13
  import { getSearchBudgetTracker } from "./search-budget.js";
@@ -95,20 +95,166 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
95
95
  */
96
96
  export async function cachedSearchIssues(octokit, params) {
97
97
  const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
98
- return cachedTimeBased(getHttpCache(), cacheKey, SEARCH_CACHE_TTL_MS, async () => {
99
- const tracker = getSearchBudgetTracker();
100
- await tracker.waitForBudget();
98
+ const cache = getHttpCache();
99
+ // Check cache first
100
+ const cached = cache.getIfFresh(cacheKey, SEARCH_CACHE_TTL_MS);
101
+ if (cached) {
102
+ debug(MODULE, `Search cache hit for query`);
103
+ return cached;
104
+ }
105
+ // Fetch from API
106
+ const tracker = getSearchBudgetTracker();
107
+ await tracker.waitForBudget();
108
+ let data;
109
+ try {
110
+ const response = await octokit.search.issuesAndPullRequests(params);
111
+ data = response.data;
112
+ }
113
+ finally {
114
+ tracker.recordCall();
115
+ }
116
+ // Only cache non-empty results to prevent poisoning from rate-limited responses
117
+ if (data.items.length > 0) {
118
+ cache.set(cacheKey, "", data);
119
+ }
120
+ else {
121
+ debug(MODULE, `Skipping cache for empty search result (possible rate limit artifact)`);
122
+ }
123
+ return data;
124
+ }
125
+ // ── REST-based search functions ──
126
+ /**
127
+ * Fetch issues from maintained repos using REST API (no Search API quota).
128
+ *
129
+ * Checks each repo for recent push activity and star threshold,
130
+ * then fetches open issues via `GET /repos/{owner}/{repo}/issues`.
131
+ * Falls back to the caller to use Search API if this doesn't yield enough.
132
+ */
133
+ export async function fetchIssuesFromMaintainedRepos(octokit, repos, minStars, maxResults) {
134
+ const items = [];
135
+ for (const repoFullName of repos) {
136
+ if (items.length >= maxResults * 3)
137
+ break;
138
+ const [owner, repo] = repoFullName.split("/");
139
+ if (!owner || !repo)
140
+ continue;
101
141
  try {
102
- const { data } = await octokit.search.issuesAndPullRequests(params);
103
- return data;
142
+ const { data: repoData } = await octokit.repos.get({ owner, repo });
143
+ if (!repoData.pushed_at)
144
+ continue;
145
+ const pushedAt = new Date(repoData.pushed_at);
146
+ const thirtyDaysAgo = new Date();
147
+ thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
148
+ if (pushedAt < thirtyDaysAgo)
149
+ continue;
150
+ if ((repoData.stargazers_count ?? 0) < minStars)
151
+ continue;
152
+ if (repoData.archived)
153
+ continue;
154
+ const { data: issues } = await octokit.issues.listForRepo({
155
+ owner,
156
+ repo,
157
+ state: "open",
158
+ sort: "created",
159
+ direction: "desc",
160
+ per_page: 5,
161
+ });
162
+ // Filter out pull requests and assigned issues (REST endpoint returns both)
163
+ const realIssues = issues.filter((i) => !i.pull_request && !i.assignee);
164
+ for (const issue of realIssues) {
165
+ items.push({
166
+ html_url: issue.html_url,
167
+ repository_url: `https://api.github.com/repos/${repoFullName}`,
168
+ updated_at: issue.updated_at ?? "",
169
+ title: issue.title,
170
+ labels: issue.labels,
171
+ });
172
+ }
173
+ await sleep(INTER_QUERY_DELAY_MS);
104
174
  }
105
- finally {
106
- // Always record the call — failed requests still consume GitHub rate limit points
107
- tracker.recordCall();
175
+ catch (error) {
176
+ if (getHttpStatusCode(error) === 401)
177
+ throw error;
178
+ if (isRateLimitError(error)) {
179
+ warn(MODULE, `Rate limit hit fetching issues from ${repoFullName}:`, errorMessage(error));
180
+ break;
181
+ }
182
+ warn(MODULE, `Error fetching issues from ${repoFullName}:`, errorMessage(error));
108
183
  }
109
- });
184
+ }
185
+ return items;
110
186
  }
111
187
  // ── Search infrastructure ──
188
+ /**
189
+ * Fetch open issues from known repos using REST API (no Search API quota).
190
+ * Used by Phase 0 (merged-PR repos) and Phase 1 (starred repos).
191
+ *
192
+ * Instead of the Search API (`octokit.search.issuesAndPullRequests`), this
193
+ * calls `GET /repos/{owner}/{repo}/issues` which counts against the much
194
+ * larger Core API rate limit and avoids consuming the scarce Search quota.
195
+ */
196
+ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn) {
197
+ const candidates = [];
198
+ let failedRepos = 0;
199
+ let rateLimitFailures = 0;
200
+ for (let i = 0; i < repos.length; i++) {
201
+ if (candidates.length >= maxResults)
202
+ break;
203
+ // Delay between repos to avoid REST secondary rate limits
204
+ if (i > 0)
205
+ await sleep(INTER_QUERY_DELAY_MS);
206
+ const repoFullName = repos[i];
207
+ const [owner, repo] = repoFullName.split("/");
208
+ try {
209
+ const response = await octokit.issues.listForRepo({
210
+ owner,
211
+ repo,
212
+ state: "open",
213
+ sort: "created",
214
+ direction: "desc",
215
+ per_page: 5,
216
+ ...(labels.length > 0 ? { labels: labels.join(",") } : {}),
217
+ });
218
+ // Filter out pull requests (REST issues endpoint returns both) and assigned issues
219
+ const issuesOnly = response.data.filter((item) => !("pull_request" in item) && !item.assignee);
220
+ const mapped = issuesOnly.map((issue) => ({
221
+ html_url: issue.html_url,
222
+ repository_url: `https://api.github.com/repos/${repoFullName}`,
223
+ updated_at: issue.updated_at ?? "",
224
+ title: issue.title,
225
+ labels: issue.labels,
226
+ }));
227
+ if (mapped.length > 0) {
228
+ const filtered = filterFn(mapped);
229
+ if (filtered.length > 0) {
230
+ const remainingNeeded = maxResults - candidates.length;
231
+ const { candidates: vetted, rateLimitHit: vetRateLimitHit } = await vetter.vetIssuesParallel(filtered
232
+ .slice(0, remainingNeeded * 2)
233
+ .map((item) => item.html_url), remainingNeeded, priority);
234
+ candidates.push(...vetted);
235
+ if (vetRateLimitHit)
236
+ rateLimitFailures++;
237
+ }
238
+ }
239
+ }
240
+ catch (error) {
241
+ if (getHttpStatusCode(error) === 401)
242
+ throw error;
243
+ failedRepos++;
244
+ if (isRateLimitError(error)) {
245
+ rateLimitFailures++;
246
+ }
247
+ warn(MODULE, `Error fetching issues from ${repoFullName}:`, errorMessage(error));
248
+ }
249
+ }
250
+ const allReposFailed = failedRepos === repos.length && repos.length > 0;
251
+ const rateLimitHit = rateLimitFailures > 0;
252
+ if (allReposFailed) {
253
+ warn(MODULE, `All ${repos.length} repo(s) failed for ${priority} phase. ` +
254
+ `This may indicate a systemic issue (rate limit, auth, network).`);
255
+ }
256
+ return { candidates, allReposFailed, rateLimitHit };
257
+ }
112
258
  /**
113
259
  * Search across chunked labels with deduplication.
114
260
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oss-scout/core",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
5
5
  "type": "module",
6
6
  "bin": {
@@ -21,16 +21,6 @@
21
21
  "!dist/**/*.map",
22
22
  "!dist/core/test-utils.*"
23
23
  ],
24
- "scripts": {
25
- "build": "tsc",
26
- "bundle": "esbuild src/cli.ts --bundle --platform=node --target=node20 --format=cjs --minify --sourcemap --outfile=dist/cli.bundle.cjs",
27
- "start": "tsx src/cli.ts",
28
- "typecheck": "tsc --noEmit",
29
- "test": "vitest run",
30
- "test:coverage": "vitest run --coverage",
31
- "test:watch": "vitest",
32
- "prepublishOnly": "pnpm run build && pnpm run bundle"
33
- },
34
24
  "keywords": [
35
25
  "open-source",
36
26
  "github",
@@ -70,5 +60,14 @@
70
60
  "tsx": "^4.21.0",
71
61
  "typescript": "^5.9.3",
72
62
  "vitest": "^4.1.0"
63
+ },
64
+ "scripts": {
65
+ "build": "tsc",
66
+ "bundle": "esbuild src/cli.ts --bundle --platform=node --target=node20 --format=cjs --minify --sourcemap --outfile=dist/cli.bundle.cjs",
67
+ "start": "tsx src/cli.ts",
68
+ "typecheck": "tsc --noEmit",
69
+ "test": "vitest run",
70
+ "test:coverage": "vitest run --coverage",
71
+ "test:watch": "vitest"
73
72
  }
74
- }
73
+ }
@@ -1,6 +0,0 @@
1
- /**
2
- * Runs a worker pool that processes items with bounded concurrency.
3
- * N workers consume from a shared index. On any worker error, remaining
4
- * workers are aborted via a shared flag and the error is propagated.
5
- */
6
- export declare function runWorkerPool<T>(items: T[], worker: (item: T) => Promise<void>, concurrency: number): Promise<void>;
@@ -1,25 +0,0 @@
1
- /**
2
- * Runs a worker pool that processes items with bounded concurrency.
3
- * N workers consume from a shared index. On any worker error, remaining
4
- * workers are aborted via a shared flag and the error is propagated.
5
- */
6
- export async function runWorkerPool(items, worker, concurrency) {
7
- let index = 0;
8
- let aborted = false;
9
- const poolWorker = async () => {
10
- while (index < items.length) {
11
- if (aborted)
12
- break;
13
- const item = items[index++];
14
- try {
15
- await worker(item);
16
- }
17
- catch (err) {
18
- aborted = true;
19
- throw err;
20
- }
21
- }
22
- };
23
- const workerCount = Math.min(concurrency, items.length);
24
- await Promise.all(Array.from({ length: workerCount }, () => poolWorker()));
25
- }