@oss-scout/core 0.11.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +78 -61
- package/dist/cli.js +401 -425
- package/dist/commands/command-scout.d.ts +21 -0
- package/dist/commands/command-scout.js +21 -0
- package/dist/commands/config.js +10 -128
- package/dist/commands/features.js +15 -28
- package/dist/commands/results.d.ts +13 -2
- package/dist/commands/results.js +29 -2
- package/dist/commands/search.js +63 -70
- package/dist/commands/setup.d.ts +2 -0
- package/dist/commands/setup.js +35 -6
- package/dist/commands/skip.d.ts +4 -0
- package/dist/commands/skip.js +45 -55
- package/dist/commands/sync.d.ts +10 -0
- package/dist/commands/sync.js +10 -0
- package/dist/commands/vet-list.js +3 -19
- package/dist/commands/vet.js +18 -25
- package/dist/commands/with-scout.d.ts +32 -0
- package/dist/commands/with-scout.js +41 -0
- package/dist/core/anti-llm-policy.js +4 -5
- package/dist/core/bootstrap.d.ts +2 -2
- package/dist/core/bootstrap.js +5 -9
- package/dist/core/errors.d.ts +10 -0
- package/dist/core/errors.js +20 -5
- package/dist/core/feature-discovery.d.ts +13 -1
- package/dist/core/feature-discovery.js +104 -81
- package/dist/core/gist-state-store.d.ts +13 -12
- package/dist/core/gist-state-store.js +128 -53
- package/dist/core/http-cache.d.ts +32 -2
- package/dist/core/http-cache.js +74 -19
- package/dist/core/issue-discovery.d.ts +2 -0
- package/dist/core/issue-discovery.js +44 -29
- package/dist/core/issue-eligibility.d.ts +10 -4
- package/dist/core/issue-eligibility.js +119 -67
- package/dist/core/issue-graphql.d.ts +58 -0
- package/dist/core/issue-graphql.js +108 -0
- package/dist/core/issue-vetting.d.ts +105 -8
- package/dist/core/issue-vetting.js +234 -107
- package/dist/core/local-state.d.ts +6 -2
- package/dist/core/local-state.js +23 -5
- package/dist/core/logger.d.ts +12 -4
- package/dist/core/logger.js +33 -7
- package/dist/core/personalization.d.ts +15 -10
- package/dist/core/personalization.js +30 -22
- package/dist/core/preference-fields.d.ts +47 -0
- package/dist/core/preference-fields.js +178 -0
- package/dist/core/repo-health.js +31 -15
- package/dist/core/roadmap.js +17 -3
- package/dist/core/schemas.d.ts +144 -26
- package/dist/core/schemas.js +74 -17
- package/dist/core/search-budget.d.ts +9 -0
- package/dist/core/search-budget.js +36 -3
- package/dist/core/search-phases.d.ts +0 -18
- package/dist/core/search-phases.js +27 -82
- package/dist/core/types.d.ts +136 -38
- package/dist/core/utils.js +60 -26
- package/dist/formatters/markdown.d.ts +10 -0
- package/dist/formatters/markdown.js +31 -0
- package/dist/index.d.ts +6 -2
- package/dist/index.js +8 -0
- package/dist/scout.d.ts +59 -10
- package/dist/scout.js +244 -20
- package/package.json +1 -1
package/dist/core/http-cache.js
CHANGED
|
@@ -16,6 +16,25 @@ import { getCacheDir } from "./utils.js";
|
|
|
16
16
|
import { debug, warn } from "./logger.js";
|
|
17
17
|
import { errorMessage, getHttpStatusCode } from "./errors.js";
|
|
18
18
|
const MODULE = "http-cache";
|
|
19
|
+
/**
|
|
20
|
+
* Schema version for cache entries whose body is an oss-scout-defined shape
|
|
21
|
+
* (vetting results, search payloads, policy scans, merged-PR counts) rather
|
|
22
|
+
* than a raw GitHub API response. These are deserialized with an unchecked
|
|
23
|
+
* cast, so a shape change between releases would otherwise let a new build read
|
|
24
|
+
* a stale-shaped entry. Bump this whenever one of those cached shapes changes:
|
|
25
|
+
* old entries then miss the version-prefixed key and are refetched instead of
|
|
26
|
+
* misread (#158). Raw ETag-keyed GitHub responses are not versioned — their
|
|
27
|
+
* shape is owned by GitHub, not us.
|
|
28
|
+
*/
|
|
29
|
+
export const CACHE_SCHEMA_VERSION = "v1";
|
|
30
|
+
/**
|
|
31
|
+
* Prefix a synthetic (non-URL) cache key with the schema version so a shape
|
|
32
|
+
* change invalidates old entries. Use for every key whose body is read back
|
|
33
|
+
* with an unchecked cast.
|
|
34
|
+
*/
|
|
35
|
+
export function versionedCacheKey(key) {
|
|
36
|
+
return `${CACHE_SCHEMA_VERSION}:${key}`;
|
|
37
|
+
}
|
|
19
38
|
/**
|
|
20
39
|
* Maximum age (in ms) before a cache entry is considered stale and eligible for
|
|
21
40
|
* eviction during cleanup. Defaults to 24 hours. Entries older than this are
|
|
@@ -51,13 +70,21 @@ export class HttpCache {
|
|
|
51
70
|
* (e.g., caching aggregated results from paginated API calls).
|
|
52
71
|
*/
|
|
53
72
|
getIfFresh(key, maxAgeMs) {
|
|
73
|
+
return this.getEntryIfFresh(key, maxAgeMs)?.body ?? null;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Like {@link getIfFresh}, but returns the whole entry so callers can
|
|
77
|
+
* distinguish "no fresh entry" (null) from a legitimately cached falsy
|
|
78
|
+
* body (`0`, `""`, `false`, `null`).
|
|
79
|
+
*/
|
|
80
|
+
getEntryIfFresh(key, maxAgeMs) {
|
|
54
81
|
const entry = this.get(key);
|
|
55
82
|
if (!entry)
|
|
56
83
|
return null;
|
|
57
84
|
const age = Date.now() - new Date(entry.cachedAt).getTime();
|
|
58
85
|
if (!Number.isFinite(age) || age < 0 || age > maxAgeMs)
|
|
59
86
|
return null;
|
|
60
|
-
return entry
|
|
87
|
+
return entry;
|
|
61
88
|
}
|
|
62
89
|
/**
|
|
63
90
|
* Look up a cached response. Returns `null` if no cache entry exists.
|
|
@@ -246,13 +273,29 @@ export function getHttpCache() {
|
|
|
246
273
|
* cached body without consuming a rate-limit point.
|
|
247
274
|
* 3. On a fresh 200, caches the ETag + body for next time.
|
|
248
275
|
*/
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
276
|
+
/**
|
|
277
|
+
* Share one in-flight computation per key: concurrent callers for the same
|
|
278
|
+
* key await the same promise instead of paying duplicate API calls (#124).
|
|
279
|
+
* The check-then-register pair runs without an intervening await, so two
|
|
280
|
+
* concurrent callers cannot both miss. Rejections propagate to every waiter
|
|
281
|
+
* and are never cached.
|
|
282
|
+
*/
|
|
283
|
+
export async function withInflightDedup(cache, key, fn) {
|
|
284
|
+
const existing = cache.getInflight(key);
|
|
252
285
|
if (existing) {
|
|
253
|
-
debug(MODULE, `Dedup hit for ${
|
|
286
|
+
debug(MODULE, `Dedup hit for ${key}`);
|
|
254
287
|
return (await existing);
|
|
255
288
|
}
|
|
289
|
+
const promise = fn();
|
|
290
|
+
const cleanup = cache.setInflight(key, promise);
|
|
291
|
+
try {
|
|
292
|
+
return await promise;
|
|
293
|
+
}
|
|
294
|
+
finally {
|
|
295
|
+
cleanup();
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
export async function cachedRequest(cache, url, fetcher) {
|
|
256
299
|
const doFetch = async () => {
|
|
257
300
|
const extraHeaders = {};
|
|
258
301
|
const cached = cache.get(url);
|
|
@@ -276,19 +319,21 @@ export async function cachedRequest(cache, url, fetcher) {
|
|
|
276
319
|
debug(MODULE, `304 cache hit for ${url}`);
|
|
277
320
|
return freshCached.body;
|
|
278
321
|
}
|
|
322
|
+
// The entry that supplied If-None-Match vanished mid-flight (e.g. a
|
|
323
|
+
// concurrent process deleted it). Refetch unconditionally; without
|
|
324
|
+
// the conditional header the server cannot answer 304 again.
|
|
325
|
+
debug(MODULE, `304 but cache entry vanished for ${url}, refetching`);
|
|
326
|
+
const response = await fetcher({});
|
|
327
|
+
const etag = response.headers?.["etag"];
|
|
328
|
+
if (etag) {
|
|
329
|
+
cache.set(url, etag, response.data);
|
|
330
|
+
}
|
|
331
|
+
return response.data;
|
|
279
332
|
}
|
|
280
333
|
throw err;
|
|
281
334
|
}
|
|
282
335
|
};
|
|
283
|
-
|
|
284
|
-
const cleanup = cache.setInflight(url, promise);
|
|
285
|
-
try {
|
|
286
|
-
const result = await promise;
|
|
287
|
-
return result;
|
|
288
|
-
}
|
|
289
|
-
finally {
|
|
290
|
-
cleanup();
|
|
291
|
-
}
|
|
336
|
+
return withInflightDedup(cache, url, doFetch);
|
|
292
337
|
}
|
|
293
338
|
/**
|
|
294
339
|
* Time-based cache wrapper (no ETag / conditional requests).
|
|
@@ -300,14 +345,24 @@ export async function cachedRequest(cache, url, fetcher) {
|
|
|
300
345
|
* (e.g. search queries, project health checks).
|
|
301
346
|
*/
|
|
302
347
|
export async function cachedTimeBased(cache, key, maxAgeMs, fetcher) {
|
|
303
|
-
const cached = cache.
|
|
348
|
+
const cached = cache.getEntryIfFresh(key, maxAgeMs);
|
|
304
349
|
if (cached) {
|
|
305
350
|
debug(MODULE, `Time-based cache hit for ${key}`);
|
|
306
|
-
return cached;
|
|
351
|
+
return cached.body;
|
|
307
352
|
}
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
return
|
|
353
|
+
// Concurrent same-key callers (parallel vetting hitting one repo) share
|
|
354
|
+
// a single fetch instead of stampeding the API (#124)
|
|
355
|
+
return withInflightDedup(cache, key, async () => {
|
|
356
|
+
// Re-check inside the dedup window: a caller that finished while we
|
|
357
|
+
// queued may have populated the cache
|
|
358
|
+
const fresh = cache.getEntryIfFresh(key, maxAgeMs);
|
|
359
|
+
if (fresh) {
|
|
360
|
+
return fresh.body;
|
|
361
|
+
}
|
|
362
|
+
const result = await fetcher();
|
|
363
|
+
cache.set(key, "", result);
|
|
364
|
+
return result;
|
|
365
|
+
});
|
|
311
366
|
}
|
|
312
367
|
/**
|
|
313
368
|
* Detect whether an error is a 304 Not Modified response.
|
|
@@ -77,6 +77,8 @@ export declare class IssueDiscovery {
|
|
|
77
77
|
preferLanguages?: string[];
|
|
78
78
|
preferRepos?: string[];
|
|
79
79
|
diversityRatio?: number;
|
|
80
|
+
interPhaseDelayMs?: number;
|
|
81
|
+
broadPhaseDelayMs?: number;
|
|
80
82
|
}): Promise<{
|
|
81
83
|
candidates: IssueCandidate[];
|
|
82
84
|
strategiesUsed: SearchStrategy[];
|
|
@@ -22,7 +22,7 @@ import { isDocOnlyIssue, applyPerRepoCap, } from "./issue-filtering.js";
|
|
|
22
22
|
import { IssueVetter } from "./issue-vetting.js";
|
|
23
23
|
import { getTopicsForCategories } from "./category-mapping.js";
|
|
24
24
|
import { buildEffectiveLabels, interleaveArrays, cachedSearchIssues, fetchIssuesFromMaintainedRepos, filterVetAndScore, fetchIssuesFromKnownRepos, searchAcrossLanguagesAndLabels, } from "./search-phases.js";
|
|
25
|
-
import { annotateBoost, applyDiversityRatio } from "./personalization.js";
|
|
25
|
+
import { annotateBoost, applyDiversityRatio, boostScoreOf, } from "./personalization.js";
|
|
26
26
|
const MODULE = "issue-discovery";
|
|
27
27
|
/** If remaining search quota is below this, skip heavy phases (2, 3). */
|
|
28
28
|
const LOW_BUDGET_THRESHOLD = 20;
|
|
@@ -35,16 +35,19 @@ function buildIssueFilter(config) {
|
|
|
35
35
|
const repoFullName = extractRepoFromUrl(item.repository_url);
|
|
36
36
|
if (!repoFullName)
|
|
37
37
|
return false;
|
|
38
|
-
|
|
38
|
+
// Repo-name sets are lowercased at construction; compare lowercased so
|
|
39
|
+
// user-typed casing (Microsoft/TypeScript) still matches API casing.
|
|
40
|
+
const repoLower = repoFullName.toLowerCase();
|
|
41
|
+
if (config.excludedRepos.has(repoLower))
|
|
39
42
|
return false;
|
|
40
43
|
if (config.excludeOrgs.size > 0) {
|
|
41
|
-
const orgName =
|
|
44
|
+
const orgName = repoLower.split("/")[0];
|
|
42
45
|
if (orgName && config.excludeOrgs.has(orgName))
|
|
43
46
|
return false;
|
|
44
47
|
}
|
|
45
|
-
if (config.aiBlocklisted.has(
|
|
48
|
+
if (config.aiBlocklisted.has(repoLower))
|
|
46
49
|
return false;
|
|
47
|
-
if (config.lowScoringRepos.has(
|
|
50
|
+
if (config.lowScoringRepos.has(repoLower))
|
|
48
51
|
return false;
|
|
49
52
|
if (config.skippedUrls.has(item.html_url))
|
|
50
53
|
return false;
|
|
@@ -286,11 +289,12 @@ export class IssueDiscovery {
|
|
|
286
289
|
(scopes ? buildEffectiveLabels(scopes, config.labels) : config.labels);
|
|
287
290
|
const maxResults = options.maxResults || 10;
|
|
288
291
|
const minStars = config.minStars ?? 50;
|
|
289
|
-
const interPhaseDelay = config.interPhaseDelayMs ?? 30000;
|
|
290
|
-
// Strategy selection
|
|
292
|
+
const interPhaseDelay = options.interPhaseDelayMs ?? config.interPhaseDelayMs ?? 30000;
|
|
293
|
+
// Strategy selection. Empty arrays count as "unset" so a stored
|
|
294
|
+
// defaultStrategy of [] can't silently produce zero-strategy searches.
|
|
291
295
|
const ALL_STRATEGIES = CONCRETE_STRATEGIES;
|
|
292
|
-
const
|
|
293
|
-
|
|
296
|
+
const pickStrategies = (...candidates) => candidates.find((c) => c && c.length > 0) ?? ["all"];
|
|
297
|
+
const rawStrategies = pickStrategies(options.strategies, config.defaultStrategy);
|
|
294
298
|
const enabledStrategies = new Set(rawStrategies.includes("all") ? ALL_STRATEGIES : rawStrategies);
|
|
295
299
|
const strategiesUsed = [];
|
|
296
300
|
const allCandidates = [];
|
|
@@ -332,19 +336,19 @@ export class IssueDiscovery {
|
|
|
332
336
|
const openPRRepos = this.stateReader.getReposWithOpenPRs();
|
|
333
337
|
const starredRepos = this.getStarredRepos();
|
|
334
338
|
const starredRepoSet = new Set(starredRepos);
|
|
335
|
-
const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold));
|
|
339
|
+
const lowScoringRepos = new Set(this.deriveLowScoringRepos(config.minRepoScoreThreshold).map((r) => r.toLowerCase()));
|
|
336
340
|
// Build query parts
|
|
337
341
|
const isAnyLanguage = languages.some((l) => l.toLowerCase() === "any");
|
|
338
342
|
const langQuery = isAnyLanguage
|
|
339
343
|
? ""
|
|
340
344
|
: languages.map((l) => `language:${l}`).join(" ");
|
|
341
|
-
// Build reusable filter
|
|
342
|
-
const aiBlocklisted = new Set(config.aiPolicyBlocklist);
|
|
345
|
+
// Build reusable filter (repo-name sets lowercased; see buildIssueFilter)
|
|
346
|
+
const aiBlocklisted = new Set(config.aiPolicyBlocklist.map((r) => r.toLowerCase()));
|
|
343
347
|
if (aiBlocklisted.size > 0) {
|
|
344
348
|
debug(MODULE, `[AI_POLICY_FILTER] Filtering issues from ${aiBlocklisted.size} blocklisted repo(s): ${[...aiBlocklisted].join(", ")}`);
|
|
345
349
|
}
|
|
346
350
|
const filterIssues = buildIssueFilter({
|
|
347
|
-
excludedRepos: new Set(config.excludeRepos),
|
|
351
|
+
excludedRepos: new Set(config.excludeRepos.map((r) => r.toLowerCase())),
|
|
348
352
|
excludeOrgs: new Set((config.excludeOrgs ?? []).map((o) => o.toLowerCase())),
|
|
349
353
|
aiBlocklisted,
|
|
350
354
|
lowScoringRepos,
|
|
@@ -396,13 +400,21 @@ export class IssueDiscovery {
|
|
|
396
400
|
phaseErrors["1"] = result.error;
|
|
397
401
|
if (result.rateLimitHit)
|
|
398
402
|
rateLimitHitDuringSearch = true;
|
|
403
|
+
// Recorded only when the phase actually queried (#130)
|
|
404
|
+
strategiesUsed.push("starred");
|
|
399
405
|
}
|
|
400
406
|
}
|
|
401
|
-
strategiesUsed.push("starred");
|
|
402
407
|
}
|
|
403
408
|
// Phase 2: General search (with rate limit mitigation)
|
|
404
|
-
const broadDelay = config.broadPhaseDelayMs ?? 90000;
|
|
405
|
-
|
|
409
|
+
const broadDelay = options.broadPhaseDelayMs ?? config.broadPhaseDelayMs ?? 90000;
|
|
410
|
+
// Clamp to maxResults - 1: the phase gate below already skips the whole
|
|
411
|
+
// phase at >= maxResults, so any larger threshold would be unsatisfiable
|
|
412
|
+
// (the default 15 vs default maxResults 10 made this dead config). 0
|
|
413
|
+
// stays "never skip".
|
|
414
|
+
const configuredSkipThreshold = config.skipBroadWhenSufficientResults ?? 8;
|
|
415
|
+
const skipThreshold = configuredSkipThreshold > 0
|
|
416
|
+
? Math.min(configuredSkipThreshold, maxResults - 1)
|
|
417
|
+
: 0;
|
|
406
418
|
if (allCandidates.length < maxResults &&
|
|
407
419
|
searchBudget >= LOW_BUDGET_THRESHOLD &&
|
|
408
420
|
enabledStrategies.has("broad")) {
|
|
@@ -430,8 +442,10 @@ export class IssueDiscovery {
|
|
|
430
442
|
phaseErrors["2"] = result.error;
|
|
431
443
|
if (result.rateLimitHit)
|
|
432
444
|
rateLimitHitDuringSearch = true;
|
|
445
|
+
// Recorded only when the phase actually queried, not when the
|
|
446
|
+
// skip-threshold branch short-circuited it (#130)
|
|
447
|
+
strategiesUsed.push("broad");
|
|
433
448
|
}
|
|
434
|
-
strategiesUsed.push("broad");
|
|
435
449
|
}
|
|
436
450
|
// Phase 3: Actively maintained repos
|
|
437
451
|
if (allCandidates.length < maxResults &&
|
|
@@ -487,12 +501,13 @@ export class IssueDiscovery {
|
|
|
487
501
|
`Found ${allCandidates.length} candidate${allCandidates.length === 1 ? "" : "s"} but some search phases were limited. ` +
|
|
488
502
|
`Try again after the rate limit resets for complete results.`;
|
|
489
503
|
}
|
|
490
|
-
// Personalization annotation (#1244): tag
|
|
491
|
-
//
|
|
492
|
-
//
|
|
493
|
-
|
|
504
|
+
// Personalization annotation (#1244): tag matched candidates with a
|
|
505
|
+
// `personalization` marker before sorting so the new sort tier has values
|
|
506
|
+
// to read. Returns a new array (no in-place candidate mutation, #158);
|
|
507
|
+
// a no-op when neither preference list is supplied.
|
|
508
|
+
const ranked = annotateBoost(allCandidates, options.preferLanguages, options.preferRepos);
|
|
494
509
|
// Sort by priority, recommendation, boost (#1244), then viability score
|
|
495
|
-
|
|
510
|
+
ranked.sort((a, b) => {
|
|
496
511
|
const priorityOrder = {
|
|
497
512
|
merged_pr: 0,
|
|
498
513
|
starred: 1,
|
|
@@ -506,17 +521,17 @@ export class IssueDiscovery {
|
|
|
506
521
|
recommendationOrder[b.recommendation];
|
|
507
522
|
if (recDiff !== 0)
|
|
508
523
|
return recDiff;
|
|
509
|
-
// Personalization tier (#1244): higher
|
|
510
|
-
//
|
|
511
|
-
//
|
|
512
|
-
// `preferLanguages`/`preferRepos` are absent —
|
|
513
|
-
//
|
|
514
|
-
const boostDiff = (b
|
|
524
|
+
// Personalization tier (#1244): higher boost wins. boostScoreOf treats
|
|
525
|
+
// an unboosted candidate as 0 so they rank below boosted peers but stay
|
|
526
|
+
// ordered among themselves by viabilityScore. No-op when
|
|
527
|
+
// `preferLanguages`/`preferRepos` are absent — every candidate scores 0
|
|
528
|
+
// and the difference collapses.
|
|
529
|
+
const boostDiff = boostScoreOf(b) - boostScoreOf(a);
|
|
515
530
|
if (boostDiff !== 0)
|
|
516
531
|
return boostDiff;
|
|
517
532
|
return b.viabilityScore - a.viabilityScore;
|
|
518
533
|
});
|
|
519
|
-
const capped = applyPerRepoCap(
|
|
534
|
+
const capped = applyPerRepoCap(ranked, 2);
|
|
520
535
|
// Diversity counterweight (#1244): when `diversityRatio > 0`, reserve
|
|
521
536
|
// a fraction of the final slots for candidates that matched neither
|
|
522
537
|
// preference list. No-op when the ratio is 0 or absent — collapses to
|
|
@@ -7,10 +7,16 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import { Octokit } from "@octokit/rest";
|
|
9
9
|
import type { CheckResult, LinkedPR } from "./types.js";
|
|
10
|
-
/**
|
|
11
|
-
|
|
10
|
+
/**
|
|
11
|
+
* Result of the existing-PR check, including metadata for the first linked PR
|
|
12
|
+
* (if any). An intersection (not `extends`) because CheckResult is now a
|
|
13
|
+
* discriminated union (#158); the `& { linkedPR }` distributes over both arms.
|
|
14
|
+
*/
|
|
15
|
+
export type ExistingPRCheckResult = CheckResult & {
|
|
12
16
|
linkedPR: LinkedPR | null;
|
|
13
|
-
}
|
|
17
|
+
};
|
|
18
|
+
/** True when a single comment body claims the issue. */
|
|
19
|
+
export declare function commentClaimsIssue(body: string): boolean;
|
|
14
20
|
/**
|
|
15
21
|
* Check whether an open PR already exists for the given issue.
|
|
16
22
|
* Uses the timeline API (REST) to detect cross-referenced PRs, avoiding
|
|
@@ -23,7 +29,7 @@ export declare function checkNoExistingPR(octokit: Octokit, owner: string, repo:
|
|
|
23
29
|
* Results are cached per-repo for 15 minutes to avoid redundant Search API
|
|
24
30
|
* calls when multiple issues from the same repo are vetted.
|
|
25
31
|
*/
|
|
26
|
-
export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string): Promise<number>;
|
|
32
|
+
export declare function checkUserMergedPRsInRepo(octokit: Octokit, owner: string, repo: string): Promise<number | null>;
|
|
27
33
|
/**
|
|
28
34
|
* Check whether an issue has been claimed by another contributor
|
|
29
35
|
* by scanning recent comments for claim phrases.
|
|
@@ -6,9 +6,9 @@
|
|
|
6
6
|
* Extracted from issue-vetting.ts to isolate eligibility logic.
|
|
7
7
|
*/
|
|
8
8
|
import { paginateAll } from "./pagination.js";
|
|
9
|
-
import { errorMessage,
|
|
9
|
+
import { errorMessage, rethrowIfFatal } from "./errors.js";
|
|
10
10
|
import { warn } from "./logger.js";
|
|
11
|
-
import { getHttpCache } from "./http-cache.js";
|
|
11
|
+
import { getHttpCache, withInflightDedup, versionedCacheKey, } from "./http-cache.js";
|
|
12
12
|
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
13
13
|
function isLinkedPREvent(e) {
|
|
14
14
|
return e.event === "cross-referenced" && !!e.source?.issue?.pull_request;
|
|
@@ -50,24 +50,65 @@ function buildLinkedPRFromTimelineEvent(e, context) {
|
|
|
50
50
|
};
|
|
51
51
|
}
|
|
52
52
|
const MODULE = "issue-eligibility";
|
|
53
|
-
/**
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
53
|
+
/**
|
|
54
|
+
* Claim detection, applied per clause (sentence). Plain substring matching
|
|
55
|
+
* flagged questions ("is anyone working on it?") and negations ("no one is
|
|
56
|
+
* working on this") as claims. Rules:
|
|
57
|
+
*
|
|
58
|
+
* - A clause ending in "?" is never a claim, EXCEPT a permission request
|
|
59
|
+
* ("can I work on this?"), which is the author asking to take the issue.
|
|
60
|
+
* - A declarative clause with an indefinite or negated subject (anyone,
|
|
61
|
+
* someone, nobody, not, ...) is never a claim.
|
|
62
|
+
* - Otherwise declarative claim patterns match, including third-person
|
|
63
|
+
* ("Bob is working on it" means the issue is taken).
|
|
64
|
+
*/
|
|
65
|
+
/**
|
|
66
|
+
* Object that refers to the issue at hand: this/it/that, "the <thing>",
|
|
67
|
+
* "#123", "issue ...". Deliberately excludes "a <thing>" ("can I work on a
|
|
68
|
+
* repro?" introduces new work, it does not claim the issue). The numeric
|
|
69
|
+
* branch requires the # prefix: a bare number collides with quantity idioms
|
|
70
|
+
* ("can I take 5 minutes"). Residual misses: gerund objects ("work on
|
|
71
|
+
* fixing the bug") and bare numbers ("work on 126").
|
|
72
|
+
*/
|
|
73
|
+
const ISSUE_OBJECT = String.raw `(?:this\b|it\b|that\b|the\b|#\d+|issue\b)`;
|
|
74
|
+
/** Explicit first-person claims; not subject to the subject guard. */
|
|
75
|
+
const FIRST_PERSON_CLAIM_PATTERNS = [
|
|
76
|
+
new RegExp(String.raw `\bi\s*(?:'ll|will) take ${ISSUE_OBJECT}`),
|
|
77
|
+
new RegExp(String.raw `\bi\s*(?:'d|would) (?:like|love) to work on ${ISSUE_OBJECT}`),
|
|
78
|
+
/\bi\s*(?:'m|am) on it\b/,
|
|
79
|
+
/\bi\s*(?:'ll|will) submit a pr\b/,
|
|
80
|
+
/\bassigned to me\b/,
|
|
81
|
+
];
|
|
82
|
+
/**
|
|
83
|
+
* Generic "working on ..." phrasings. These also match third-person claims
|
|
84
|
+
* ("Bob is working on it"), so they need the subject guard below to avoid
|
|
85
|
+
* flagging indefinite or negated subjects.
|
|
86
|
+
*/
|
|
87
|
+
const GENERIC_WORKING_PATTERNS = [
|
|
88
|
+
/\bworking on (?:this|it)\b/,
|
|
89
|
+
/\bworking on a (?:fix|pr)\b/,
|
|
70
90
|
];
|
|
91
|
+
/** Asking to take the issue counts as a claim even phrased as a question. */
|
|
92
|
+
const PERMISSION_CLAIM_PATTERN = new RegExp(String.raw `\b(?:can|may|could) i (?:work on|take) ${ISSUE_OBJECT}`);
|
|
93
|
+
/** Subjects/negations that make a "working on ..." clause a non-claim. */
|
|
94
|
+
const NON_CLAIM_SUBJECTS = /\b(?:anyone|anybody|someone|somebody|who|whoever|nobody|no[- ]?one|not)\b/;
|
|
95
|
+
/** True when a single comment body claims the issue. */
|
|
96
|
+
export function commentClaimsIssue(body) {
|
|
97
|
+
const clauses = body.toLowerCase().split(/(?<=[.!?])|\n+/);
|
|
98
|
+
for (const clause of clauses) {
|
|
99
|
+
if (PERMISSION_CLAIM_PATTERN.test(clause))
|
|
100
|
+
return true;
|
|
101
|
+
if (clause.trimEnd().endsWith("?"))
|
|
102
|
+
continue;
|
|
103
|
+
if (FIRST_PERSON_CLAIM_PATTERNS.some((p) => p.test(clause)))
|
|
104
|
+
return true;
|
|
105
|
+
if (NON_CLAIM_SUBJECTS.test(clause))
|
|
106
|
+
continue;
|
|
107
|
+
if (GENERIC_WORKING_PATTERNS.some((p) => p.test(clause)))
|
|
108
|
+
return true;
|
|
109
|
+
}
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
71
112
|
/**
|
|
72
113
|
* Check whether an open PR already exists for the given issue.
|
|
73
114
|
* Uses the timeline API (REST) to detect cross-referenced PRs, avoiding
|
|
@@ -106,9 +147,7 @@ export async function checkNoExistingPR(octokit, owner, repo, issueNumber) {
|
|
|
106
147
|
return { passed: linkedPRCount === 0, linkedPR };
|
|
107
148
|
}
|
|
108
149
|
catch (error) {
|
|
109
|
-
|
|
110
|
-
throw error;
|
|
111
|
-
}
|
|
150
|
+
rethrowIfFatal(error);
|
|
112
151
|
const errMsg = errorMessage(error);
|
|
113
152
|
warn(MODULE, `Failed to check for existing PRs on ${owner}/${repo}#${issueNumber}: ${errMsg}. Assuming no existing PR.`);
|
|
114
153
|
return { passed: true, inconclusive: true, reason: errMsg, linkedPR: null };
|
|
@@ -124,40 +163,46 @@ const MERGED_PR_CACHE_TTL_MS = 15 * 60 * 1000;
|
|
|
124
163
|
*/
|
|
125
164
|
export async function checkUserMergedPRsInRepo(octokit, owner, repo) {
|
|
126
165
|
const cache = getHttpCache();
|
|
127
|
-
const cacheKey = `merged-prs:${owner}/${repo}
|
|
128
|
-
//
|
|
129
|
-
//
|
|
130
|
-
//
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
// Use @me to search as the authenticated user
|
|
140
|
-
const { data } = await octokit.search.issuesAndPullRequests({
|
|
141
|
-
q: `repo:${owner}/${repo} is:pr is:merged author:@me`,
|
|
142
|
-
per_page: 1, // We only need total_count
|
|
143
|
-
});
|
|
144
|
-
// Only cache successful results
|
|
145
|
-
cache.set(cacheKey, "", data.total_count);
|
|
146
|
-
return data.total_count;
|
|
166
|
+
const cacheKey = versionedCacheKey(`merged-prs:${owner}/${repo}`);
|
|
167
|
+
// In-flight dedup: parallel vetting frequently hits several issues from
|
|
168
|
+
// one repo at once, and each used to pay a separate Search API call
|
|
169
|
+
// before the first populated the cache (#124).
|
|
170
|
+
return withInflightDedup(cache, cacheKey, async () => {
|
|
171
|
+
// Manual cache check — do not use cachedTimeBased because we must NOT
|
|
172
|
+
// cache error-path fallback values (a transient failure returning 0
|
|
173
|
+
// would poison the cache for 15 minutes, hiding that the user has
|
|
174
|
+
// merged PRs in the repo).
|
|
175
|
+
const cached = cache.getIfFresh(cacheKey, MERGED_PR_CACHE_TTL_MS);
|
|
176
|
+
if (cached != null && typeof cached === "number") {
|
|
177
|
+
return cached;
|
|
147
178
|
}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
tracker.
|
|
179
|
+
try {
|
|
180
|
+
const tracker = getSearchBudgetTracker();
|
|
181
|
+
await tracker.waitForBudget();
|
|
182
|
+
try {
|
|
183
|
+
// Use @me to search as the authenticated user
|
|
184
|
+
const { data } = await octokit.search.issuesAndPullRequests({
|
|
185
|
+
q: `repo:${owner}/${repo} is:pr is:merged author:@me`,
|
|
186
|
+
per_page: 1, // We only need total_count
|
|
187
|
+
});
|
|
188
|
+
// Only cache successful results
|
|
189
|
+
cache.set(cacheKey, "", data.total_count);
|
|
190
|
+
return data.total_count;
|
|
191
|
+
}
|
|
192
|
+
finally {
|
|
193
|
+
// Always record the call — failed requests still consume GitHub rate limit points
|
|
194
|
+
tracker.recordCall();
|
|
195
|
+
}
|
|
151
196
|
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
197
|
+
catch (error) {
|
|
198
|
+
rethrowIfFatal(error);
|
|
199
|
+
const errMsg = errorMessage(error);
|
|
200
|
+
warn(MODULE, `Could not check merged PRs in ${owner}/${repo}: ${errMsg}. Treating as unknown.`);
|
|
201
|
+
// null (not 0) so callers can tell a transient failure from a real zero
|
|
202
|
+
// and avoid caching verdicts built on it. Not cached — next call retries.
|
|
203
|
+
return null;
|
|
156
204
|
}
|
|
157
|
-
|
|
158
|
-
warn(MODULE, `Could not check merged PRs in ${owner}/${repo}: ${errMsg}. Defaulting to 0.`);
|
|
159
|
-
return 0; // Not cached — next call will retry
|
|
160
|
-
}
|
|
205
|
+
});
|
|
161
206
|
}
|
|
162
207
|
/**
|
|
163
208
|
* Check whether an issue has been claimed by another contributor
|
|
@@ -167,27 +212,34 @@ export async function checkNotClaimed(octokit, owner, repo, issueNumber, comment
|
|
|
167
212
|
if (commentCount === 0)
|
|
168
213
|
return { passed: true };
|
|
169
214
|
try {
|
|
170
|
-
//
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
const recentComments =
|
|
215
|
+
// Fetch only the newest comments. Walking every page cost a
|
|
216
|
+
// 2,000-comment issue 20 list calls per vet, then discarded all but the
|
|
217
|
+
// tail anyway. Claims live in recent activity, so fetch the last page
|
|
218
|
+
// (plus its predecessor so a short last page still yields ~100+
|
|
219
|
+
// comments): at most 2 calls.
|
|
220
|
+
const PER_PAGE = 100;
|
|
221
|
+
const lastPage = Math.max(1, Math.ceil(commentCount / PER_PAGE));
|
|
222
|
+
const pagesToFetch = lastPage > 1 ? [lastPage - 1, lastPage] : [1];
|
|
223
|
+
const recentComments = [];
|
|
224
|
+
for (const page of pagesToFetch) {
|
|
225
|
+
const response = await octokit.issues.listComments({
|
|
226
|
+
owner,
|
|
227
|
+
repo,
|
|
228
|
+
issue_number: issueNumber,
|
|
229
|
+
per_page: PER_PAGE,
|
|
230
|
+
page,
|
|
231
|
+
});
|
|
232
|
+
recentComments.push(...response.data);
|
|
233
|
+
}
|
|
179
234
|
for (const comment of recentComments) {
|
|
180
|
-
|
|
181
|
-
if (CLAIM_PHRASES.some((phrase) => body.includes(phrase))) {
|
|
235
|
+
if (commentClaimsIssue(comment.body || "")) {
|
|
182
236
|
return { passed: false };
|
|
183
237
|
}
|
|
184
238
|
}
|
|
185
239
|
return { passed: true };
|
|
186
240
|
}
|
|
187
241
|
catch (error) {
|
|
188
|
-
|
|
189
|
-
throw error;
|
|
190
|
-
}
|
|
242
|
+
rethrowIfFatal(error);
|
|
191
243
|
const errMsg = errorMessage(error);
|
|
192
244
|
warn(MODULE, `Failed to check claim status on ${owner}/${repo}#${issueNumber}: ${errMsg}. Assuming not claimed.`);
|
|
193
245
|
return { passed: true, inconclusive: true, reason: errMsg };
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Batched GraphQL prefetch of issue "core" data (#169).
|
|
3
|
+
*
|
|
4
|
+
* `vetIssue` re-fetches each issue's basic fields (title, body, state, labels,
|
|
5
|
+
* timestamps, comment count) via a per-issue REST `issues.get`. When a search
|
|
6
|
+
* surfaces N issues that all need vetting, that is N separate REST calls before
|
|
7
|
+
* any of the deeper checks even start.
|
|
8
|
+
*
|
|
9
|
+
* `prefetchIssueCores` collapses those N calls into ONE aliased GraphQL query.
|
|
10
|
+
* The result is a map keyed by `owner/repo#number`; `vetIssue` consumes a hit
|
|
11
|
+
* instead of calling `issues.get`, and falls back to REST for any miss (a
|
|
12
|
+
* deleted issue, a permission error on one repo, or a non-fatal GraphQL blip).
|
|
13
|
+
*
|
|
14
|
+
* Scope is deliberately limited to the `issues.get` fields. The other vetting
|
|
15
|
+
* calls (timeline-based PR detection, claim scanning, project health,
|
|
16
|
+
* contribution guidelines) stay REST — batching those has pagination-semantics
|
|
17
|
+
* divergence risk and is left as a follow-up.
|
|
18
|
+
*/
|
|
19
|
+
import type { Octokit } from "@octokit/rest";
|
|
20
|
+
/**
|
|
21
|
+
* Normalized issue fields equivalent to the subset of a REST `issues.get`
|
|
22
|
+
* response that `vetIssue` reads. Produced from either GraphQL (prefetch) or
|
|
23
|
+
* REST (fallback) so the two paths are interchangeable.
|
|
24
|
+
*/
|
|
25
|
+
export interface PrefetchedIssueCore {
|
|
26
|
+
/** GitHub numeric database id (REST `id` / GraphQL `databaseId`). */
|
|
27
|
+
id: number;
|
|
28
|
+
title: string;
|
|
29
|
+
/** Empty string when the issue has no body (matches REST `body || ""`). */
|
|
30
|
+
body: string;
|
|
31
|
+
state: "open" | "closed";
|
|
32
|
+
/** Label names, in declared order. */
|
|
33
|
+
labels: string[];
|
|
34
|
+
/** Total comment count (REST `comments` / GraphQL `comments.totalCount`). */
|
|
35
|
+
commentCount: number;
|
|
36
|
+
createdAt: string;
|
|
37
|
+
updatedAt: string;
|
|
38
|
+
}
|
|
39
|
+
/** A single issue to prefetch. */
|
|
40
|
+
export interface IssueRef {
|
|
41
|
+
owner: string;
|
|
42
|
+
repo: string;
|
|
43
|
+
number: number;
|
|
44
|
+
}
|
|
45
|
+
/** Map key for a prefetched core, also used by callers to look one up. */
|
|
46
|
+
export declare function issueCoreKey(owner: string, repo: string, number: number): string;
|
|
47
|
+
/**
|
|
48
|
+
* Batch-fetch issue core data with one aliased GraphQL query. Returns a map of
|
|
49
|
+
* `owner/repo#number` to the normalized core. Issues that the query could not
|
|
50
|
+
* resolve are simply absent — the caller is expected to fall back to REST for
|
|
51
|
+
* any key not in the map.
|
|
52
|
+
*
|
|
53
|
+
* Failure handling mirrors the rest of the vetter: fatal errors (401 / rate
|
|
54
|
+
* limit) propagate via `rethrowIfFatal`; a partial-data GraphQL error (one bad
|
|
55
|
+
* issue in the batch) keeps the aliases that did resolve; any other non-fatal
|
|
56
|
+
* error returns whatever resolved so the caller degrades to all-REST.
|
|
57
|
+
*/
|
|
58
|
+
export declare function prefetchIssueCores(octokit: Octokit, issues: IssueRef[]): Promise<Map<string, PrefetchedIssueCore>>;
|