@oss-scout/core 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,9 +5,9 @@
5
5
  * caching, spam-filtering, and batched repo search logic.
6
6
  */
7
7
  import { SCOPE_LABELS, } from "./types.js";
8
- import { errorMessage, isRateLimitError } from "./errors.js";
8
+ import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
9
9
  import { debug, warn } from "./logger.js";
10
- import { getHttpCache, cachedTimeBased } from "./http-cache.js";
10
+ import { getHttpCache } from "./http-cache.js";
11
11
  import { detectLabelFarmingRepos, } from "./issue-filtering.js";
12
12
  import { extractRepoFromUrl, sleep } from "./utils.js";
13
13
  import { getSearchBudgetTracker } from "./search-budget.js";
@@ -95,20 +95,166 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
95
95
  */
96
96
  export async function cachedSearchIssues(octokit, params) {
97
97
  const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
98
- return cachedTimeBased(getHttpCache(), cacheKey, SEARCH_CACHE_TTL_MS, async () => {
99
- const tracker = getSearchBudgetTracker();
100
- await tracker.waitForBudget();
98
+ const cache = getHttpCache();
99
+ // Check cache first
100
+ const cached = cache.getIfFresh(cacheKey, SEARCH_CACHE_TTL_MS);
101
+ if (cached) {
102
+ debug(MODULE, `Search cache hit for query`);
103
+ return cached;
104
+ }
105
+ // Fetch from API
106
+ const tracker = getSearchBudgetTracker();
107
+ await tracker.waitForBudget();
108
+ let data;
109
+ try {
110
+ const response = await octokit.search.issuesAndPullRequests(params);
111
+ data = response.data;
112
+ }
113
+ finally {
114
+ tracker.recordCall();
115
+ }
116
+ // Only cache non-empty results to prevent poisoning from rate-limited responses
117
+ if (data.items.length > 0) {
118
+ cache.set(cacheKey, "", data);
119
+ }
120
+ else {
121
+ debug(MODULE, `Skipping cache for empty search result (possible rate limit artifact)`);
122
+ }
123
+ return data;
124
+ }
125
+ // ── REST-based search functions ──
126
+ /**
127
+ * Fetch issues from maintained repos using REST API (no Search API quota).
128
+ *
129
+ * Checks each repo for recent push activity and star threshold,
130
+ * then fetches open issues via `GET /repos/{owner}/{repo}/issues`.
131
+ * Falls back to the caller to use Search API if this doesn't yield enough.
132
+ */
133
+ export async function fetchIssuesFromMaintainedRepos(octokit, repos, minStars, maxResults) {
134
+ const items = [];
135
+ for (const repoFullName of repos) {
136
+ if (items.length >= maxResults * 3)
137
+ break;
138
+ const [owner, repo] = repoFullName.split("/");
139
+ if (!owner || !repo)
140
+ continue;
101
141
  try {
102
- const { data } = await octokit.search.issuesAndPullRequests(params);
103
- return data;
142
+ const { data: repoData } = await octokit.repos.get({ owner, repo });
143
+ if (!repoData.pushed_at)
144
+ continue;
145
+ const pushedAt = new Date(repoData.pushed_at);
146
+ const thirtyDaysAgo = new Date();
147
+ thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
148
+ if (pushedAt < thirtyDaysAgo)
149
+ continue;
150
+ if ((repoData.stargazers_count ?? 0) < minStars)
151
+ continue;
152
+ if (repoData.archived)
153
+ continue;
154
+ const { data: issues } = await octokit.issues.listForRepo({
155
+ owner,
156
+ repo,
157
+ state: "open",
158
+ sort: "created",
159
+ direction: "desc",
160
+ per_page: 5,
161
+ });
162
+ // Filter out pull requests and assigned issues (REST endpoint returns both)
163
+ const realIssues = issues.filter((i) => !i.pull_request && !i.assignee);
164
+ for (const issue of realIssues) {
165
+ items.push({
166
+ html_url: issue.html_url,
167
+ repository_url: `https://api.github.com/repos/${repoFullName}`,
168
+ updated_at: issue.updated_at ?? "",
169
+ title: issue.title,
170
+ labels: issue.labels,
171
+ });
172
+ }
173
+ await sleep(INTER_QUERY_DELAY_MS);
104
174
  }
105
- finally {
106
- // Always record the call — failed requests still consume GitHub rate limit points
107
- tracker.recordCall();
175
+ catch (error) {
176
+ if (getHttpStatusCode(error) === 401)
177
+ throw error;
178
+ if (isRateLimitError(error)) {
179
+ warn(MODULE, `Rate limit hit fetching issues from ${repoFullName}:`, errorMessage(error));
180
+ break;
181
+ }
182
+ warn(MODULE, `Error fetching issues from ${repoFullName}:`, errorMessage(error));
108
183
  }
109
- });
184
+ }
185
+ return items;
110
186
  }
111
187
  // ── Search infrastructure ──
188
+ /**
189
+ * Fetch open issues from known repos using REST API (no Search API quota).
190
+ * Used by Phase 0 (merged-PR repos) and Phase 1 (starred repos).
191
+ *
192
+ * Instead of the Search API (`octokit.search.issuesAndPullRequests`), this
193
+ * calls `GET /repos/{owner}/{repo}/issues` which counts against the much
194
+ * larger Core API rate limit and avoids consuming the scarce Search quota.
195
+ */
196
+ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels, maxResults, priority, filterFn) {
197
+ const candidates = [];
198
+ let failedRepos = 0;
199
+ let rateLimitFailures = 0;
200
+ for (let i = 0; i < repos.length; i++) {
201
+ if (candidates.length >= maxResults)
202
+ break;
203
+ // Delay between repos to avoid REST secondary rate limits
204
+ if (i > 0)
205
+ await sleep(INTER_QUERY_DELAY_MS);
206
+ const repoFullName = repos[i];
207
+ const [owner, repo] = repoFullName.split("/");
208
+ try {
209
+ const response = await octokit.issues.listForRepo({
210
+ owner,
211
+ repo,
212
+ state: "open",
213
+ sort: "created",
214
+ direction: "desc",
215
+ per_page: 5,
216
+ ...(labels.length > 0 ? { labels: labels.join(",") } : {}),
217
+ });
218
+ // Filter out pull requests (REST issues endpoint returns both) and assigned issues
219
+ const issuesOnly = response.data.filter((item) => !("pull_request" in item) && !item.assignee);
220
+ const mapped = issuesOnly.map((issue) => ({
221
+ html_url: issue.html_url,
222
+ repository_url: `https://api.github.com/repos/${repoFullName}`,
223
+ updated_at: issue.updated_at ?? "",
224
+ title: issue.title,
225
+ labels: issue.labels,
226
+ }));
227
+ if (mapped.length > 0) {
228
+ const filtered = filterFn(mapped);
229
+ if (filtered.length > 0) {
230
+ const remainingNeeded = maxResults - candidates.length;
231
+ const { candidates: vetted, rateLimitHit: vetRateLimitHit } = await vetter.vetIssuesParallel(filtered
232
+ .slice(0, remainingNeeded * 2)
233
+ .map((item) => item.html_url), remainingNeeded, priority);
234
+ candidates.push(...vetted);
235
+ if (vetRateLimitHit)
236
+ rateLimitFailures++;
237
+ }
238
+ }
239
+ }
240
+ catch (error) {
241
+ if (getHttpStatusCode(error) === 401)
242
+ throw error;
243
+ failedRepos++;
244
+ if (isRateLimitError(error)) {
245
+ rateLimitFailures++;
246
+ }
247
+ warn(MODULE, `Error fetching issues from ${repoFullName}:`, errorMessage(error));
248
+ }
249
+ }
250
+ const allReposFailed = failedRepos === repos.length && repos.length > 0;
251
+ const rateLimitHit = rateLimitFailures > 0;
252
+ if (allReposFailed) {
253
+ warn(MODULE, `All ${repos.length} repo(s) failed for ${priority} phase. ` +
254
+ `This may indicate a systemic issue (rate limit, auth, network).`);
255
+ }
256
+ return { candidates, allReposFailed, rateLimitHit };
257
+ }
112
258
  /**
113
259
  * Search across chunked labels with deduplication.
114
260
  *
@@ -19,7 +19,7 @@ export interface ProjectHealth {
19
19
  failureReason?: string;
20
20
  }
21
21
  /** Priority tier for issue search results. */
22
- export type SearchPriority = "merged_pr" | "preferred_org" | "starred" | "normal";
22
+ export type SearchPriority = "merged_pr" | "starred" | "normal";
23
23
  /** A fully vetted issue candidate with scoring. */
24
24
  export interface IssueCandidate {
25
25
  issue: TrackedIssue;
package/dist/index.d.ts CHANGED
@@ -16,8 +16,8 @@
16
16
  */
17
17
  export { createScout, OssScout } from "./scout.js";
18
18
  export type { ScoutConfig, SearchOptions, SearchResult, IssueCandidate, MergedPRRecord, ClosedPRRecord, RepoScoreUpdate, ProjectHealth, SearchPriority, CheckResult, VetListOptions, VetListResult, VetListEntry, VetListSummary, } from "./core/types.js";
19
- export type { ScoutState, ScoutPreferences, RepoScore, RepoSignals, IssueVettingResult, ContributionGuidelines, TrackedIssue, IssueScope, ProjectCategory, StoredMergedPR, StoredClosedPR, SearchStrategy, } from "./core/schemas.js";
20
- export { ScoutStateSchema, ScoutPreferencesSchema, RepoScoreSchema, IssueScopeSchema, ProjectCategorySchema, SearchStrategySchema, } from "./core/schemas.js";
19
+ export type { ScoutState, ScoutPreferences, RepoScore, RepoSignals, IssueVettingResult, ContributionGuidelines, TrackedIssue, IssueScope, ProjectCategory, StoredMergedPR, StoredClosedPR, SearchStrategy, SkippedIssue, } from "./core/schemas.js";
20
+ export { ScoutStateSchema, ScoutPreferencesSchema, RepoScoreSchema, IssueScopeSchema, ProjectCategorySchema, SearchStrategySchema, SkippedIssueSchema, } from "./core/schemas.js";
21
21
  export { requireGitHubToken, getGitHubToken } from "./core/utils.js";
22
22
  export { IssueDiscovery } from "./core/issue-discovery.js";
23
23
  export { IssueVetter, type ScoutStateReader } from "./core/issue-vetting.js";
package/dist/index.js CHANGED
@@ -17,7 +17,7 @@
17
17
  // Main API
18
18
  export { createScout, OssScout } from "./scout.js";
19
19
  // Schemas (for consumers who need runtime validation)
20
- export { ScoutStateSchema, ScoutPreferencesSchema, RepoScoreSchema, IssueScopeSchema, ProjectCategorySchema, SearchStrategySchema, } from "./core/schemas.js";
20
+ export { ScoutStateSchema, ScoutPreferencesSchema, RepoScoreSchema, IssueScopeSchema, ProjectCategorySchema, SearchStrategySchema, SkippedIssueSchema, } from "./core/schemas.js";
21
21
  // Utilities
22
22
  export { requireGitHubToken, getGitHubToken } from "./core/utils.js";
23
23
  // Internal classes (for advanced use)
package/dist/scout.d.ts CHANGED
@@ -5,7 +5,7 @@
5
5
  * Implements ScoutStateReader to bridge state with the search engine.
6
6
  */
7
7
  import type { ScoutStateReader } from "./core/issue-vetting.js";
8
- import type { ScoutState, ScoutPreferences, RepoScore, SavedCandidate } from "./core/schemas.js";
8
+ import type { ScoutState, ScoutPreferences, RepoScore, SavedCandidate, SkippedIssue } from "./core/schemas.js";
9
9
  import type { ScoutConfig, SearchOptions, SearchResult, IssueCandidate, MergedPRRecord, ClosedPRRecord, RepoScoreUpdate, ProjectCategory, VetListOptions, VetListResult } from "./core/types.js";
10
10
  import { GistStateStore } from "./core/gist-state-store.js";
11
11
  /**
@@ -44,6 +44,7 @@ export declare class OssScout implements ScoutStateReader {
44
44
  constructor(githubToken: string, initialState: ScoutState, gistStore?: GistStateStore | null);
45
45
  /**
46
46
  * Multi-strategy issue search. Returns scored, sorted candidates.
47
+ * Automatically culls expired skip entries and filters skipped issues.
47
48
  */
48
49
  search(options?: SearchOptions): Promise<SearchResult>;
49
50
  /**
@@ -59,7 +60,6 @@ export declare class OssScout implements ScoutStateReader {
59
60
  private classifyVetResult;
60
61
  getReposWithMergedPRs(): string[];
61
62
  getStarredRepos(): string[];
62
- getPreferredOrgs(): string[];
63
63
  getProjectCategories(): ProjectCategory[];
64
64
  getRepoScore(repo: string): number | null;
65
65
  /** Get current preferences (read-only). */
@@ -101,6 +101,31 @@ export declare class OssScout implements ScoutStateReader {
101
101
  * Clear all saved results.
102
102
  */
103
103
  clearResults(): void;
104
+ /**
105
+ * Skip an issue — excludes it from future searches. Auto-culled after 90 days.
106
+ */
107
+ skipIssue(url: string, metadata?: {
108
+ repo?: string;
109
+ number?: number;
110
+ title?: string;
111
+ }): void;
112
+ /**
113
+ * Get all skipped issues.
114
+ */
115
+ getSkippedIssues(): SkippedIssue[];
116
+ /**
117
+ * Remove a specific issue from the skip list.
118
+ */
119
+ unskipIssue(url: string): void;
120
+ /**
121
+ * Clear all skipped issues.
122
+ */
123
+ clearSkippedIssues(): void;
124
+ /**
125
+ * Remove skipped issues older than maxDays (default 90). Called automatically during search.
126
+ * @returns The number of expired entries that were removed.
127
+ */
128
+ cullExpiredSkips(maxDays?: number): number;
104
129
  /**
105
130
  * Check if state has uncommitted changes.
106
131
  */
package/dist/scout.js CHANGED
@@ -119,12 +119,17 @@ export class OssScout {
119
119
  // ── Search ──────────────────────────────────────────────────────────
120
120
  /**
121
121
  * Multi-strategy issue search. Returns scored, sorted candidates.
122
+ * Automatically culls expired skip entries and filters skipped issues.
122
123
  */
123
124
  async search(options) {
125
+ // Auto-cull expired skips before searching
126
+ this.cullExpiredSkips();
127
+ const skippedUrls = new Set((this.state.skippedIssues ?? []).map((s) => s.url));
124
128
  const discovery = new IssueDiscovery(this.githubToken, this.state.preferences, this);
125
129
  const { candidates, strategiesUsed } = await discovery.searchIssues({
126
130
  maxResults: options?.maxResults,
127
131
  strategies: options?.strategies,
132
+ skippedUrls,
128
133
  });
129
134
  this.state.lastSearchAt = new Date().toISOString();
130
135
  this.dirty = true;
@@ -234,9 +239,6 @@ export class OssScout {
234
239
  getStarredRepos() {
235
240
  return this.state.starredRepos;
236
241
  }
237
- getPreferredOrgs() {
238
- return this.state.preferences.preferredOrgs;
239
- }
240
242
  getProjectCategories() {
241
243
  return this.state.preferences.projectCategories;
242
244
  }
@@ -369,6 +371,70 @@ export class OssScout {
369
371
  this.state.savedResults = [];
370
372
  this.dirty = true;
371
373
  }
374
+ // ── Skip List ───────────────────────────────────────────────────────
375
+ /**
376
+ * Skip an issue — excludes it from future searches. Auto-culled after 90 days.
377
+ */
378
+ skipIssue(url, metadata) {
379
+ const existing = this.state.skippedIssues ?? [];
380
+ if (existing.some((s) => s.url === url))
381
+ return; // already skipped
382
+ this.state.skippedIssues = [
383
+ ...existing,
384
+ {
385
+ url,
386
+ repo: metadata?.repo ?? "",
387
+ number: metadata?.number ?? 0,
388
+ title: metadata?.title ?? "",
389
+ skippedAt: new Date().toISOString(),
390
+ },
391
+ ];
392
+ // Also remove from saved results if present
393
+ if (this.state.savedResults) {
394
+ this.state.savedResults = this.state.savedResults.filter((r) => r.issueUrl !== url);
395
+ }
396
+ this.dirty = true;
397
+ }
398
+ /**
399
+ * Get all skipped issues.
400
+ */
401
+ getSkippedIssues() {
402
+ return this.state.skippedIssues ?? [];
403
+ }
404
+ /**
405
+ * Remove a specific issue from the skip list.
406
+ */
407
+ unskipIssue(url) {
408
+ this.state.skippedIssues = (this.state.skippedIssues ?? []).filter((s) => s.url !== url);
409
+ this.dirty = true;
410
+ }
411
+ /**
412
+ * Clear all skipped issues.
413
+ */
414
+ clearSkippedIssues() {
415
+ this.state.skippedIssues = [];
416
+ this.dirty = true;
417
+ }
418
+ /**
419
+ * Remove skipped issues older than maxDays (default 90). Called automatically during search.
420
+ * @returns The number of expired entries that were removed.
421
+ */
422
+ cullExpiredSkips(maxDays = 90) {
423
+ const cutoff = new Date();
424
+ cutoff.setDate(cutoff.getDate() - maxDays);
425
+ const before = (this.state.skippedIssues ?? []).length;
426
+ this.state.skippedIssues = (this.state.skippedIssues ?? []).filter((s) => {
427
+ const d = new Date(s.skippedAt);
428
+ if (isNaN(d.getTime())) {
429
+ return true; // keep entries with invalid dates rather than silently dropping
430
+ }
431
+ return d >= cutoff;
432
+ });
433
+ const culled = before - this.state.skippedIssues.length;
434
+ if (culled > 0)
435
+ this.dirty = true;
436
+ return culled;
437
+ }
372
438
  // ── Persistence ─────────────────────────────────────────────────────
373
439
  /**
374
440
  * Check if state has uncommitted changes.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@oss-scout/core",
3
- "version": "0.2.1",
4
- "description": "Find open source issues personalized to your contribution history",
3
+ "version": "0.4.0",
4
+ "description": "Personalized GitHub issue finder with multi-strategy search, deep vetting, and viability scoring — CLI, library, MCP server, and Claude Code plugin",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "oss-scout": "./dist/cli.bundle.cjs"
@@ -21,23 +21,17 @@
21
21
  "!dist/**/*.map",
22
22
  "!dist/core/test-utils.*"
23
23
  ],
24
- "scripts": {
25
- "build": "tsc",
26
- "bundle": "esbuild src/cli.ts --bundle --platform=node --target=node20 --format=cjs --minify --sourcemap --outfile=dist/cli.bundle.cjs",
27
- "start": "tsx src/cli.ts",
28
- "typecheck": "tsc --noEmit",
29
- "test": "vitest run",
30
- "test:coverage": "vitest run --coverage",
31
- "test:watch": "vitest",
32
- "prepublishOnly": "pnpm run build && pnpm run bundle"
33
- },
34
24
  "keywords": [
35
25
  "open-source",
36
26
  "github",
37
27
  "issue-discovery",
38
28
  "cli",
39
29
  "vetting",
40
- "contributions"
30
+ "contributions",
31
+ "claude",
32
+ "mcp-server",
33
+ "contribution-finder",
34
+ "personalized"
41
35
  ],
42
36
  "author": "John Costa",
43
37
  "license": "MIT",
@@ -66,5 +60,14 @@
66
60
  "tsx": "^4.21.0",
67
61
  "typescript": "^5.9.3",
68
62
  "vitest": "^4.1.0"
63
+ },
64
+ "scripts": {
65
+ "build": "tsc",
66
+ "bundle": "esbuild src/cli.ts --bundle --platform=node --target=node20 --format=cjs --minify --sourcemap --outfile=dist/cli.bundle.cjs",
67
+ "start": "tsx src/cli.ts",
68
+ "typecheck": "tsc --noEmit",
69
+ "test": "vitest run",
70
+ "test:coverage": "vitest run --coverage",
71
+ "test:watch": "vitest"
69
72
  }
70
- }
73
+ }
@@ -1,6 +0,0 @@
1
- /**
2
- * Runs a worker pool that processes items with bounded concurrency.
3
- * N workers consume from a shared index. On any worker error, remaining
4
- * workers are aborted via a shared flag and the error is propagated.
5
- */
6
- export declare function runWorkerPool<T>(items: T[], worker: (item: T) => Promise<void>, concurrency: number): Promise<void>;
@@ -1,25 +0,0 @@
1
- /**
2
- * Runs a worker pool that processes items with bounded concurrency.
3
- * N workers consume from a shared index. On any worker error, remaining
4
- * workers are aborted via a shared flag and the error is propagated.
5
- */
6
- export async function runWorkerPool(items, worker, concurrency) {
7
- let index = 0;
8
- let aborted = false;
9
- const poolWorker = async () => {
10
- while (index < items.length) {
11
- if (aborted)
12
- break;
13
- const item = items[index++];
14
- try {
15
- await worker(item);
16
- }
17
- catch (err) {
18
- aborted = true;
19
- throw err;
20
- }
21
- }
22
- };
23
- const workerCount = Math.min(concurrency, items.length);
24
- await Promise.all(Array.from({ length: workerCount }, () => poolWorker()));
25
- }