@oss-scout/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cli.bundle.cjs +114 -0
  2. package/dist/cli.d.ts +5 -0
  3. package/dist/cli.js +341 -0
  4. package/dist/commands/config.d.ts +22 -0
  5. package/dist/commands/config.js +169 -0
  6. package/dist/commands/results.d.ts +8 -0
  7. package/dist/commands/results.js +13 -0
  8. package/dist/commands/search.d.ts +39 -0
  9. package/dist/commands/search.js +50 -0
  10. package/dist/commands/setup.d.ts +17 -0
  11. package/dist/commands/setup.js +104 -0
  12. package/dist/commands/validation.d.ts +6 -0
  13. package/dist/commands/validation.js +17 -0
  14. package/dist/commands/vet-list.d.ts +9 -0
  15. package/dist/commands/vet-list.js +16 -0
  16. package/dist/commands/vet.d.ts +25 -0
  17. package/dist/commands/vet.js +29 -0
  18. package/dist/core/bootstrap.d.ts +14 -0
  19. package/dist/core/bootstrap.js +122 -0
  20. package/dist/core/category-mapping.d.ts +19 -0
  21. package/dist/core/category-mapping.js +58 -0
  22. package/dist/core/concurrency.d.ts +6 -0
  23. package/dist/core/concurrency.js +25 -0
  24. package/dist/core/errors.d.ts +22 -0
  25. package/dist/core/errors.js +69 -0
  26. package/dist/core/gist-state-store.d.ts +96 -0
  27. package/dist/core/gist-state-store.js +302 -0
  28. package/dist/core/github.d.ts +16 -0
  29. package/dist/core/github.js +58 -0
  30. package/dist/core/http-cache.d.ts +108 -0
  31. package/dist/core/http-cache.js +314 -0
  32. package/dist/core/issue-discovery.d.ts +93 -0
  33. package/dist/core/issue-discovery.js +475 -0
  34. package/dist/core/issue-eligibility.d.ts +33 -0
  35. package/dist/core/issue-eligibility.js +151 -0
  36. package/dist/core/issue-filtering.d.ts +51 -0
  37. package/dist/core/issue-filtering.js +103 -0
  38. package/dist/core/issue-scoring.d.ts +43 -0
  39. package/dist/core/issue-scoring.js +97 -0
  40. package/dist/core/issue-vetting.d.ts +44 -0
  41. package/dist/core/issue-vetting.js +270 -0
  42. package/dist/core/local-state.d.ts +16 -0
  43. package/dist/core/local-state.js +56 -0
  44. package/dist/core/logger.d.ts +11 -0
  45. package/dist/core/logger.js +25 -0
  46. package/dist/core/pagination.d.ts +7 -0
  47. package/dist/core/pagination.js +16 -0
  48. package/dist/core/repo-health.d.ts +19 -0
  49. package/dist/core/repo-health.js +179 -0
  50. package/dist/core/schemas.d.ts +315 -0
  51. package/dist/core/schemas.js +137 -0
  52. package/dist/core/search-budget.d.ts +62 -0
  53. package/dist/core/search-budget.js +129 -0
  54. package/dist/core/search-phases.d.ts +69 -0
  55. package/dist/core/search-phases.js +238 -0
  56. package/dist/core/types.d.ts +124 -0
  57. package/dist/core/types.js +9 -0
  58. package/dist/core/utils.d.ts +18 -0
  59. package/dist/core/utils.js +106 -0
  60. package/dist/formatters/json.d.ts +6 -0
  61. package/dist/formatters/json.js +20 -0
  62. package/dist/index.d.ts +23 -0
  63. package/dist/index.js +25 -0
  64. package/dist/scout.d.ts +125 -0
  65. package/dist/scout.js +391 -0
  66. package/package.json +70 -0
@@ -0,0 +1,314 @@
1
+ /**
2
+ * HTTP caching with ETags for GitHub API responses.
3
+ *
4
+ * Stores ETags and response bodies for cacheable GET endpoints in
5
+ * `~/.oss-scout/cache/`. On subsequent requests, sends `If-None-Match`
6
+ * headers — 304 responses don't count against GitHub rate limits.
7
+ *
8
+ * Also provides in-flight request deduplication so that concurrent calls
9
+ * for the same endpoint (e.g., star counts for two PRs in the same repo)
10
+ * share a single HTTP round-trip.
11
+ */
12
+ import * as fs from 'fs';
13
+ import * as path from 'path';
14
+ import * as crypto from 'crypto';
15
+ import { getCacheDir } from './utils.js';
16
+ import { debug, warn } from './logger.js';
17
+ import { errorMessage, getHttpStatusCode } from './errors.js';
18
+ const MODULE = 'http-cache';
19
+ /**
20
+ * Maximum age (in ms) before a cache entry is considered stale and eligible for
21
+ * eviction during cleanup. Defaults to 24 hours. Entries older than this are
22
+ * still *usable* for conditional requests (the ETag may still be valid), but
23
+ * `evictStale()` will remove them.
24
+ */
25
+ const DEFAULT_MAX_AGE_MS = 24 * 60 * 60 * 1000;
26
+ /**
27
+ * File-based HTTP cache backed by `~/.oss-scout/cache/`.
28
+ *
29
+ * Each cache entry is stored as a separate JSON file keyed by the SHA-256
30
+ * hash of the request URL. This avoids filesystem issues with URL-based
31
+ * filenames and keeps lookup O(1).
32
+ */
33
+ export class HttpCache {
34
+ cacheDir;
35
+ /** In-flight request deduplication map: URL -> Promise<response>. */
36
+ inflightRequests = new Map();
37
+ constructor(cacheDir) {
38
+ this.cacheDir = cacheDir ?? getCacheDir();
39
+ }
40
+ /** Derive a filesystem-safe cache key from a URL. */
41
+ keyFor(url) {
42
+ return crypto.createHash('sha256').update(url).digest('hex');
43
+ }
44
+ /** Full path to the cache file for a given URL. */
45
+ pathFor(url) {
46
+ return path.join(this.cacheDir, `${this.keyFor(url)}.json`);
47
+ }
48
+ /**
49
+ * Return the cached body if the entry exists and is younger than `maxAgeMs`.
50
+ * Useful for time-based caching where ETag validation isn't applicable
51
+ * (e.g., caching aggregated results from paginated API calls).
52
+ */
53
+ getIfFresh(key, maxAgeMs) {
54
+ const entry = this.get(key);
55
+ if (!entry)
56
+ return null;
57
+ const age = Date.now() - new Date(entry.cachedAt).getTime();
58
+ if (!Number.isFinite(age) || age < 0 || age > maxAgeMs)
59
+ return null;
60
+ return entry.body;
61
+ }
62
+ /**
63
+ * Look up a cached response. Returns `null` if no cache entry exists.
64
+ */
65
+ get(url) {
66
+ const filePath = this.pathFor(url);
67
+ try {
68
+ const raw = fs.readFileSync(filePath, 'utf-8');
69
+ const entry = JSON.parse(raw);
70
+ // Sanity-check: the file should contain the URL we asked for
71
+ if (entry.url !== url) {
72
+ debug(MODULE, `Cache collision detected for ${url}, ignoring`);
73
+ return null;
74
+ }
75
+ return entry;
76
+ }
77
+ catch (err) {
78
+ const code = err?.code;
79
+ if (code === 'ENOENT')
80
+ return null;
81
+ if (err instanceof SyntaxError) {
82
+ debug(MODULE, `Corrupt cache entry, deleting: ${url}`);
83
+ try {
84
+ fs.unlinkSync(filePath);
85
+ }
86
+ catch (unlinkErr) {
87
+ debug(MODULE, `Failed to delete corrupt cache entry: ${errorMessage(unlinkErr)}`);
88
+ }
89
+ return null;
90
+ }
91
+ warn(MODULE, `Cache read failed for ${url}: ${errorMessage(err)}`);
92
+ return null;
93
+ }
94
+ }
95
+ /**
96
+ * Store a response with its ETag.
97
+ */
98
+ set(url, etag, body) {
99
+ const entry = {
100
+ etag,
101
+ url,
102
+ body,
103
+ cachedAt: new Date().toISOString(),
104
+ };
105
+ try {
106
+ fs.writeFileSync(this.pathFor(url), JSON.stringify(entry), { encoding: 'utf-8', mode: 0o600 });
107
+ debug(MODULE, `Cached response for ${url}`);
108
+ }
109
+ catch (err) {
110
+ // Non-fatal: cache write failure should not break the request
111
+ warn(MODULE, `Failed to write cache for ${url}: ${errorMessage(err)}`);
112
+ }
113
+ }
114
+ /**
115
+ * Get the in-flight promise for a URL (for deduplication).
116
+ */
117
+ getInflight(url) {
118
+ return this.inflightRequests.get(url);
119
+ }
120
+ /**
121
+ * Register an in-flight request for deduplication.
122
+ * Returns a cleanup function to call when the request completes.
123
+ */
124
+ setInflight(url, promise) {
125
+ this.inflightRequests.set(url, promise);
126
+ return () => {
127
+ this.inflightRequests.delete(url);
128
+ };
129
+ }
130
+ /**
131
+ * Remove stale entries older than `maxAgeMs` from the cache directory.
132
+ * Intended to be called periodically (e.g., once per search invocation).
133
+ */
134
+ evictStale(maxAgeMs = DEFAULT_MAX_AGE_MS) {
135
+ let evicted = 0;
136
+ try {
137
+ const files = fs.readdirSync(this.cacheDir);
138
+ const now = Date.now();
139
+ for (const file of files) {
140
+ if (!file.endsWith('.json'))
141
+ continue;
142
+ const filePath = path.join(this.cacheDir, file);
143
+ try {
144
+ const raw = fs.readFileSync(filePath, 'utf-8');
145
+ const entry = JSON.parse(raw);
146
+ const age = now - new Date(entry.cachedAt).getTime();
147
+ if (age > maxAgeMs) {
148
+ fs.unlinkSync(filePath);
149
+ evicted++;
150
+ }
151
+ }
152
+ catch {
153
+ debug(MODULE, `Removing unreadable cache entry ${file}`);
154
+ try {
155
+ fs.unlinkSync(filePath);
156
+ evicted++;
157
+ }
158
+ catch (unlinkErr) {
159
+ debug(MODULE, `Failed to remove stale cache entry ${file}: ${errorMessage(unlinkErr)}`);
160
+ }
161
+ }
162
+ }
163
+ }
164
+ catch (err) {
165
+ const code = err?.code;
166
+ if (code !== 'ENOENT') {
167
+ warn(MODULE, `Failed to evict stale cache entries: ${errorMessage(err)}`);
168
+ }
169
+ }
170
+ if (evicted > 0) {
171
+ debug(MODULE, `Evicted ${evicted} stale cache entries`);
172
+ }
173
+ return evicted;
174
+ }
175
+ /**
176
+ * Remove all entries from the cache.
177
+ */
178
+ clear() {
179
+ try {
180
+ const files = fs.readdirSync(this.cacheDir);
181
+ for (const file of files) {
182
+ if (!file.endsWith('.json'))
183
+ continue;
184
+ fs.unlinkSync(path.join(this.cacheDir, file));
185
+ }
186
+ debug(MODULE, 'Cache cleared');
187
+ }
188
+ catch (err) {
189
+ const code = err?.code;
190
+ if (code !== 'ENOENT') {
191
+ warn(MODULE, `Failed to clear cache: ${errorMessage(err)}`);
192
+ }
193
+ }
194
+ }
195
+ /**
196
+ * Return the number of entries currently in the cache.
197
+ */
198
+ size() {
199
+ try {
200
+ return fs.readdirSync(this.cacheDir).filter((f) => f.endsWith('.json')).length;
201
+ }
202
+ catch (err) {
203
+ const code = err?.code;
204
+ if (code !== 'ENOENT') {
205
+ debug(MODULE, `Failed to read cache size: ${errorMessage(err)}`);
206
+ }
207
+ return 0;
208
+ }
209
+ }
210
+ }
211
+ // ---------------------------------------------------------------------------
212
+ // Singleton
213
+ // ---------------------------------------------------------------------------
214
+ let _httpCache = null;
215
+ /**
216
+ * Get (or create) the shared HttpCache singleton.
217
+ * The singleton is lazily initialized on first access.
218
+ */
219
+ export function getHttpCache() {
220
+ if (!_httpCache) {
221
+ _httpCache = new HttpCache();
222
+ }
223
+ return _httpCache;
224
+ }
225
+ // ---------------------------------------------------------------------------
226
+ // Octokit integration helpers
227
+ // ---------------------------------------------------------------------------
228
+ /**
229
+ * Wraps an Octokit `repos.get`-style call with ETag caching and request
230
+ * deduplication.
231
+ *
232
+ * Usage:
233
+ * ```ts
234
+ * const data = await cachedRequest(cache, octokit, '/repos/owner/repo', () =>
235
+ * octokit.repos.get({ owner, repo: name }),
236
+ * );
237
+ * ```
238
+ *
239
+ * 1. If an identical request is already in-flight, returns the existing promise
240
+ * (request deduplication).
241
+ * 2. If a cached ETag exists, sends `If-None-Match`. On 304, returns the
242
+ * cached body without consuming a rate-limit point.
243
+ * 3. On a fresh 200, caches the ETag + body for next time.
244
+ */
245
+ export async function cachedRequest(cache, url, fetcher) {
246
+ // --- Deduplication ---
247
+ const existing = cache.getInflight(url);
248
+ if (existing) {
249
+ debug(MODULE, `Dedup hit for ${url}`);
250
+ return (await existing);
251
+ }
252
+ const doFetch = async () => {
253
+ const extraHeaders = {};
254
+ const cached = cache.get(url);
255
+ if (cached) {
256
+ extraHeaders['if-none-match'] = cached.etag;
257
+ }
258
+ try {
259
+ const response = await fetcher(extraHeaders);
260
+ // Store ETag if present (headers may be absent in test mocks)
261
+ const etag = response.headers?.['etag'];
262
+ if (etag) {
263
+ cache.set(url, etag, response.data);
264
+ }
265
+ return response.data;
266
+ }
267
+ catch (err) {
268
+ // Check for 304 Not Modified — re-read cache to avoid stale closure snapshot
269
+ if (isNotModifiedError(err)) {
270
+ const freshCached = cache.get(url);
271
+ if (freshCached) {
272
+ debug(MODULE, `304 cache hit for ${url}`);
273
+ return freshCached.body;
274
+ }
275
+ }
276
+ throw err;
277
+ }
278
+ };
279
+ const promise = doFetch();
280
+ const cleanup = cache.setInflight(url, promise);
281
+ try {
282
+ const result = await promise;
283
+ return result;
284
+ }
285
+ finally {
286
+ cleanup();
287
+ }
288
+ }
289
+ /**
290
+ * Time-based cache wrapper (no ETag / conditional requests).
291
+ *
292
+ * If a cached result exists and is younger than `maxAgeMs`, returns it.
293
+ * Otherwise calls `fetcher`, caches the result, and returns it.
294
+ *
295
+ * Use this for expensive operations whose results change slowly
296
+ * (e.g. search queries, project health checks).
297
+ */
298
+ export async function cachedTimeBased(cache, key, maxAgeMs, fetcher) {
299
+ const cached = cache.getIfFresh(key, maxAgeMs);
300
+ if (cached) {
301
+ debug(MODULE, `Time-based cache hit for ${key}`);
302
+ return cached;
303
+ }
304
+ const result = await fetcher();
305
+ cache.set(key, '', result);
306
+ return result;
307
+ }
308
+ /**
309
+ * Detect whether an error is a 304 Not Modified response.
310
+ * Octokit throws a RequestError with status 304 for conditional requests.
311
+ */
312
+ function isNotModifiedError(err) {
313
+ return getHttpStatusCode(err) === 304;
314
+ }
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Issue Discovery — orchestrates multi-phase issue search across GitHub.
3
+ *
4
+ * Delegates filtering, scoring, vetting, and search infrastructure to focused modules:
5
+ * - issue-filtering.ts — spam detection, doc-only filtering, per-repo caps
6
+ * - issue-scoring.ts — viability scores, repo quality bonuses
7
+ * - issue-vetting.ts — vetting orchestration, recommendation + viability scoring
8
+ * - issue-eligibility.ts — PR existence, claim detection, requirements analysis
9
+ * - repo-health.ts — project health checks, contribution guidelines
10
+ * - search-phases.ts — search helpers, caching, batched repo search
11
+ *
12
+ * All state is injected via constructor parameters (ScoutStateReader + ScoutPreferences).
13
+ */
14
+ import { type IssueCandidate } from './types.js';
15
+ import type { ScoutPreferences, SearchStrategy } from './schemas.js';
16
+ import { type ScoutStateReader } from './issue-vetting.js';
17
+ /**
18
+ * Multi-phase issue discovery engine that searches GitHub for contributable issues.
19
+ *
20
+ * Search phases (in priority order):
21
+ * 0. Repos where user has merged PRs (highest merge probability)
22
+ * 0.5. Preferred organizations
23
+ * 1. Starred repos
24
+ * 2. General label-filtered search
25
+ * 3. Actively maintained repos
26
+ *
27
+ * Each candidate is vetted for claimability and scored 0-100 for viability.
28
+ */
29
+ export declare class IssueDiscovery {
30
+ private preferences;
31
+ private stateReader;
32
+ private octokit;
33
+ private githubToken;
34
+ private vetter;
35
+ /** Set after searchIssues() runs if rate limits affected the search (low pre-flight quota or mid-search rate limit hits). */
36
+ rateLimitWarning: string | null;
37
+ /**
38
+ * @param githubToken - GitHub personal access token or token from `gh auth token`
39
+ * @param preferences - User's search preferences (languages, labels, scopes, etc.)
40
+ * @param stateReader - Read-only interface for accessing scout state (merged PRs, starred repos, etc.)
41
+ */
42
+ constructor(githubToken: string, preferences: ScoutPreferences, stateReader: ScoutStateReader);
43
+ /**
44
+ * Get starred repos from the state reader.
45
+ * @returns Array of starred repo names in "owner/repo" format
46
+ */
47
+ getStarredRepos(): string[];
48
+ /**
49
+ * Search for issues matching our criteria.
50
+ * Searches in priority order: merged-PR repos first (no label filter), then preferred
51
+ * organizations, then starred repos, then general search, then actively maintained repos.
52
+ * Filters out issues from low-scoring and excluded repos.
53
+ *
54
+ * @param options - Search configuration
55
+ * @param options.languages - Programming languages to filter by
56
+ * @param options.labels - Issue labels to search for
57
+ * @param options.maxResults - Maximum candidates to return (default: 10)
58
+ * @returns Scored and sorted issue candidates
59
+ * @throws {ValidationError} If no candidates found and no rate limits prevented the search
60
+ *
61
+ * @example
62
+ * ```typescript
63
+ * import { IssueDiscovery } from '@oss-scout/core';
64
+ *
65
+ * const discovery = new IssueDiscovery(token, preferences, stateReader);
66
+ * const candidates = await discovery.searchIssues({ maxResults: 5 });
67
+ * for (const c of candidates) {
68
+ * console.log(`${c.issue.repo}#${c.issue.number}: ${c.viabilityScore}/100`);
69
+ * }
70
+ * ```
71
+ */
72
+ searchIssues(options?: {
73
+ languages?: string[];
74
+ labels?: string[];
75
+ maxResults?: number;
76
+ strategies?: SearchStrategy[];
77
+ }): Promise<{
78
+ candidates: IssueCandidate[];
79
+ strategiesUsed: SearchStrategy[];
80
+ }>;
81
+ /**
82
+ * Vet a specific issue for claimability and project health.
83
+ * @param issueUrl - Full GitHub issue URL
84
+ * @returns The vetted issue candidate with recommendation and scores
85
+ * @throws {ValidationError} If the URL is invalid or the issue cannot be fetched
86
+ */
87
+ vetIssue(issueUrl: string): Promise<IssueCandidate>;
88
+ /**
89
+ * Derive low-scoring repos from the state reader.
90
+ * A repo is considered "low-scoring" if its score is at or below the threshold.
91
+ */
92
+ private deriveLowScoringRepos;
93
+ }