@oss-scout/core 0.11.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +89 -66
- package/dist/cli.js +302 -436
- package/dist/commands/command-scout.d.ts +21 -0
- package/dist/commands/command-scout.js +21 -0
- package/dist/commands/config.js +10 -128
- package/dist/commands/features.js +15 -28
- package/dist/commands/results.d.ts +13 -2
- package/dist/commands/results.js +29 -2
- package/dist/commands/search.d.ts +4 -0
- package/dist/commands/search.js +65 -70
- package/dist/commands/setup.d.ts +2 -0
- package/dist/commands/setup.js +35 -6
- package/dist/commands/skip.d.ts +4 -0
- package/dist/commands/skip.js +45 -55
- package/dist/commands/sync.d.ts +10 -0
- package/dist/commands/sync.js +10 -0
- package/dist/commands/vet-list.js +3 -19
- package/dist/commands/vet.js +18 -25
- package/dist/commands/with-scout.d.ts +32 -0
- package/dist/commands/with-scout.js +41 -0
- package/dist/core/anti-llm-policy.js +5 -33
- package/dist/core/bootstrap.d.ts +2 -2
- package/dist/core/bootstrap.js +5 -9
- package/dist/core/errors.d.ts +10 -0
- package/dist/core/errors.js +20 -5
- package/dist/core/feature-discovery.d.ts +13 -1
- package/dist/core/feature-discovery.js +104 -81
- package/dist/core/gist-state-store.d.ts +13 -12
- package/dist/core/gist-state-store.js +128 -53
- package/dist/core/http-cache.d.ts +32 -2
- package/dist/core/http-cache.js +74 -19
- package/dist/core/issue-discovery.d.ts +12 -1
- package/dist/core/issue-discovery.js +94 -67
- package/dist/core/issue-eligibility.d.ts +11 -4
- package/dist/core/issue-eligibility.js +124 -69
- package/dist/core/issue-graphql.d.ts +58 -0
- package/dist/core/issue-graphql.js +108 -0
- package/dist/core/issue-vetting.d.ts +115 -9
- package/dist/core/issue-vetting.js +246 -109
- package/dist/core/local-state.d.ts +6 -2
- package/dist/core/local-state.js +23 -5
- package/dist/core/logger.d.ts +12 -4
- package/dist/core/logger.js +33 -7
- package/dist/core/personalization.d.ts +30 -10
- package/dist/core/personalization.js +64 -24
- package/dist/core/preference-fields.d.ts +47 -0
- package/dist/core/preference-fields.js +180 -0
- package/dist/core/probe-repo-file.d.ts +47 -0
- package/dist/core/probe-repo-file.js +57 -0
- package/dist/core/repo-health.js +40 -32
- package/dist/core/roadmap.js +26 -22
- package/dist/core/schemas.d.ts +148 -26
- package/dist/core/schemas.js +83 -17
- package/dist/core/search-budget.d.ts +9 -0
- package/dist/core/search-budget.js +36 -3
- package/dist/core/search-phases.d.ts +4 -21
- package/dist/core/search-phases.js +37 -89
- package/dist/core/types.d.ts +151 -38
- package/dist/core/utils.js +60 -26
- package/dist/formatters/human.d.ts +60 -0
- package/dist/formatters/human.js +199 -0
- package/dist/formatters/markdown.d.ts +10 -0
- package/dist/formatters/markdown.js +31 -0
- package/dist/index.d.ts +6 -2
- package/dist/index.js +8 -0
- package/dist/scout.d.ts +75 -12
- package/dist/scout.js +265 -26
- package/package.json +1 -1
|
@@ -8,6 +8,7 @@ import { Octokit } from "@octokit/rest";
|
|
|
8
8
|
import { type SearchPriority, type IssueCandidate, type IssueScope } from "./types.js";
|
|
9
9
|
import { type GitHubSearchItem } from "./issue-filtering.js";
|
|
10
10
|
import { IssueVetter } from "./issue-vetting.js";
|
|
11
|
+
import { type SearchBudgetTracker } from "./search-budget.js";
|
|
11
12
|
/** Resolve scope tiers into a flat label list, merged with custom labels. */
|
|
12
13
|
export declare function buildEffectiveLabels(scopes: IssueScope[], customLabels: string[]): string[];
|
|
13
14
|
/** Round-robin interleave multiple arrays. */
|
|
@@ -22,7 +23,7 @@ export declare function cachedSearchIssues(octokit: Octokit, params: {
|
|
|
22
23
|
sort: "created" | "updated" | "comments" | "reactions" | "interactions";
|
|
23
24
|
order: "asc" | "desc";
|
|
24
25
|
per_page: number;
|
|
25
|
-
}): Promise<{
|
|
26
|
+
}, tracker?: SearchBudgetTracker): Promise<{
|
|
26
27
|
total_count: number;
|
|
27
28
|
items: GitHubSearchItem[];
|
|
28
29
|
}>;
|
|
@@ -60,7 +61,7 @@ export declare function fetchIssuesFromKnownRepos(octokit: Octokit, vetter: Issu
|
|
|
60
61
|
* @param buildQuery Callback that receives a label query string and returns the full search query
|
|
61
62
|
* @param perPage Number of results per API call
|
|
62
63
|
*/
|
|
63
|
-
export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
|
|
64
|
+
export declare function searchWithChunkedLabels(octokit: Octokit, labels: string[], reservedOps: number, buildQuery: (labelQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
|
|
64
65
|
/**
|
|
65
66
|
* Build per-call language qualifier strings, fanning out across languages
|
|
66
67
|
* when a multi-language + labels combination would trip GitHub Search's
|
|
@@ -84,7 +85,7 @@ export declare function buildLanguageVariants(languages: string[], isAnyLanguage
|
|
|
84
85
|
* e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
|
|
85
86
|
* @param perPage Results per API call
|
|
86
87
|
*/
|
|
87
|
-
export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number): Promise<GitHubSearchItem[]>;
|
|
88
|
+
export declare function searchAcrossLanguagesAndLabels(octokit: Octokit, languages: string[], isAnyLanguage: boolean, labels: string[], buildBaseQuery: (langQuery: string) => string, perPage: number, tracker?: SearchBudgetTracker): Promise<GitHubSearchItem[]>;
|
|
88
89
|
/**
|
|
89
90
|
* Shared pipeline: spam-filter, repo-exclusion, vetting, and star-count filter.
|
|
90
91
|
* Used by Phases 2 and 3 to convert raw search results into vetted candidates.
|
|
@@ -94,21 +95,3 @@ export declare function filterVetAndScore(vetter: IssueVetter, items: GitHubSear
|
|
|
94
95
|
allVetFailed: boolean;
|
|
95
96
|
rateLimitHit: boolean;
|
|
96
97
|
}>;
|
|
97
|
-
/**
|
|
98
|
-
* Search for issues within specific repos using batched queries.
|
|
99
|
-
*
|
|
100
|
-
* To avoid GitHub's secondary rate limit (30 requests/minute), we batch
|
|
101
|
-
* multiple repos into a single search query using OR syntax:
|
|
102
|
-
* repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
|
|
103
|
-
*
|
|
104
|
-
* Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
|
|
105
|
-
* Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
|
|
106
|
-
* budget is used for label OR operators.
|
|
107
|
-
*
|
|
108
|
-
* This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
|
|
109
|
-
*/
|
|
110
|
-
export declare function searchInRepos(octokit: Octokit, vetter: IssueVetter, repos: string[], baseQualifiers: string, labels: string[], maxResults: number, priority: SearchPriority, filterFn: (items: GitHubSearchItem[]) => GitHubSearchItem[]): Promise<{
|
|
111
|
-
candidates: IssueCandidate[];
|
|
112
|
-
allBatchesFailed: boolean;
|
|
113
|
-
rateLimitHit: boolean;
|
|
114
|
-
}>;
|
|
@@ -7,10 +7,10 @@
|
|
|
7
7
|
import { SCOPE_LABELS, } from "./types.js";
|
|
8
8
|
import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
|
|
9
9
|
import { debug, warn } from "./logger.js";
|
|
10
|
-
import { getHttpCache } from "./http-cache.js";
|
|
10
|
+
import { getHttpCache, versionedCacheKey } from "./http-cache.js";
|
|
11
11
|
import { detectLabelFarmingRepos, } from "./issue-filtering.js";
|
|
12
12
|
import { extractRepoFromUrl, sleep } from "./utils.js";
|
|
13
|
-
import { getSearchBudgetTracker } from "./search-budget.js";
|
|
13
|
+
import { getSearchBudgetTracker, } from "./search-budget.js";
|
|
14
14
|
const MODULE = "search-phases";
|
|
15
15
|
/** GitHub Search API enforces a max of 5 AND/OR/NOT operators per query. */
|
|
16
16
|
const GITHUB_MAX_BOOLEAN_OPS = 5;
|
|
@@ -18,8 +18,6 @@ const GITHUB_MAX_BOOLEAN_OPS = 5;
|
|
|
18
18
|
* Set to 2000ms as a safety floor (max 30/min at the limit). The SearchBudgetTracker
|
|
19
19
|
* adds additional adaptive delays when needed. */
|
|
20
20
|
const INTER_QUERY_DELAY_MS = 2000;
|
|
21
|
-
/** Batch size for repo queries. 3 repos = 2 OR operators, leaving room for labels. */
|
|
22
|
-
const BATCH_SIZE = 3;
|
|
23
21
|
/**
|
|
24
22
|
* Chunk labels into groups that fit within the operator budget.
|
|
25
23
|
* N labels require N-1 OR operators, so maxPerChunk = budget + 1.
|
|
@@ -77,14 +75,6 @@ export function interleaveArrays(arrays) {
|
|
|
77
75
|
}
|
|
78
76
|
return result;
|
|
79
77
|
}
|
|
80
|
-
/** Split repos into batches of the specified size. */
|
|
81
|
-
function batchRepos(repos, batchSize) {
|
|
82
|
-
const batches = [];
|
|
83
|
-
for (let i = 0; i < repos.length; i += batchSize) {
|
|
84
|
-
batches.push(repos.slice(i, i + batchSize));
|
|
85
|
-
}
|
|
86
|
-
return batches;
|
|
87
|
-
}
|
|
88
78
|
// ── Search caching ──
|
|
89
79
|
/** TTL for cached search API results (15 minutes). */
|
|
90
80
|
const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
|
|
@@ -93,8 +83,12 @@ const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
|
|
|
93
83
|
* Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
|
|
94
84
|
* without consuming GitHub API rate limit points.
|
|
95
85
|
*/
|
|
96
|
-
export async function cachedSearchIssues(octokit, params
|
|
97
|
-
|
|
86
|
+
export async function cachedSearchIssues(octokit, params,
|
|
87
|
+
// Optional injected budget tracker. Defaults to the shared singleton so
|
|
88
|
+
// existing callers keep the exact same global budget accounting; a host
|
|
89
|
+
// serving concurrent searches can inject a per-search tracker for isolation.
|
|
90
|
+
tracker = getSearchBudgetTracker()) {
|
|
91
|
+
const cacheKey = versionedCacheKey(`search:${params.q}:${params.sort}:${params.order}:${params.per_page}`);
|
|
98
92
|
const cache = getHttpCache();
|
|
99
93
|
// Check cache first
|
|
100
94
|
const cached = cache.getIfFresh(cacheKey, SEARCH_CACHE_TTL_MS);
|
|
@@ -103,7 +97,6 @@ export async function cachedSearchIssues(octokit, params) {
|
|
|
103
97
|
return cached;
|
|
104
98
|
}
|
|
105
99
|
// Fetch from API
|
|
106
|
-
const tracker = getSearchBudgetTracker();
|
|
107
100
|
await tracker.waitForBudget();
|
|
108
101
|
let data;
|
|
109
102
|
try {
|
|
@@ -206,17 +199,32 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
|
|
|
206
199
|
const repoFullName = repos[i];
|
|
207
200
|
const [owner, repo] = repoFullName.split("/");
|
|
208
201
|
try {
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
202
|
+
// One query per label: the REST `labels` parameter is AND semantics
|
|
203
|
+
// (issues carrying ALL listed labels), so a comma-joined list like
|
|
204
|
+
// "good first issue,help wanted" returned ~nothing (#118). Querying
|
|
205
|
+
// per label and merging restores the intended any-of behavior.
|
|
206
|
+
const labelFilters = labels.length > 0 ? labels : [undefined];
|
|
207
|
+
const seenUrls = new Set();
|
|
208
|
+
const rawIssues = [];
|
|
209
|
+
for (const label of labelFilters) {
|
|
210
|
+
const response = await octokit.issues.listForRepo({
|
|
211
|
+
owner,
|
|
212
|
+
repo,
|
|
213
|
+
state: "open",
|
|
214
|
+
sort: "created",
|
|
215
|
+
direction: "desc",
|
|
216
|
+
per_page: 5,
|
|
217
|
+
...(label !== undefined ? { labels: label } : {}),
|
|
218
|
+
});
|
|
219
|
+
for (const issue of response.data) {
|
|
220
|
+
if (seenUrls.has(issue.html_url))
|
|
221
|
+
continue;
|
|
222
|
+
seenUrls.add(issue.html_url);
|
|
223
|
+
rawIssues.push(issue);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
218
226
|
// Filter out pull requests (REST issues endpoint returns both) and assigned issues
|
|
219
|
-
const issuesOnly =
|
|
227
|
+
const issuesOnly = rawIssues.filter((item) => !("pull_request" in item) && !item.assignee);
|
|
220
228
|
const mapped = issuesOnly.map((issue) => ({
|
|
221
229
|
html_url: issue.html_url,
|
|
222
230
|
repository_url: `https://api.github.com/repos/${repoFullName}`,
|
|
@@ -268,7 +276,7 @@ export async function fetchIssuesFromKnownRepos(octokit, vetter, repos, labels,
|
|
|
268
276
|
* @param buildQuery Callback that receives a label query string and returns the full search query
|
|
269
277
|
* @param perPage Number of results per API call
|
|
270
278
|
*/
|
|
271
|
-
export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage) {
|
|
279
|
+
export async function searchWithChunkedLabels(octokit, labels, reservedOps, buildQuery, perPage, tracker = getSearchBudgetTracker()) {
|
|
272
280
|
const labelChunks = chunkLabels(labels, reservedOps);
|
|
273
281
|
const seenUrls = new Set();
|
|
274
282
|
const allItems = [];
|
|
@@ -281,7 +289,7 @@ export async function searchWithChunkedLabels(octokit, labels, reservedOps, buil
|
|
|
281
289
|
sort: "created",
|
|
282
290
|
order: "desc",
|
|
283
291
|
per_page: perPage,
|
|
284
|
-
});
|
|
292
|
+
}, tracker);
|
|
285
293
|
for (const item of data.items) {
|
|
286
294
|
if (!seenUrls.has(item.html_url)) {
|
|
287
295
|
seenUrls.add(item.html_url);
|
|
@@ -322,7 +330,7 @@ export function buildLanguageVariants(languages, isAnyLanguage, hasLabels) {
|
|
|
322
330
|
* e.g. `(langQ) => `is:issue is:open ${langQ} no:assignee`.trim()`
|
|
323
331
|
* @param perPage Results per API call
|
|
324
332
|
*/
|
|
325
|
-
export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage) {
|
|
333
|
+
export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLanguage, labels, buildBaseQuery, perPage, tracker = getSearchBudgetTracker()) {
|
|
326
334
|
const langVariants = buildLanguageVariants(languages, isAnyLanguage, labels.length > 0);
|
|
327
335
|
const seenUrls = new Set();
|
|
328
336
|
const allItems = [];
|
|
@@ -331,7 +339,7 @@ export async function searchAcrossLanguagesAndLabels(octokit, languages, isAnyLa
|
|
|
331
339
|
await sleep(INTER_QUERY_DELAY_MS);
|
|
332
340
|
const items = await searchWithChunkedLabels(octokit, labels, 0, (labelQ) => `${buildBaseQuery(langVariants[i])} ${labelQ}`
|
|
333
341
|
.replace(/ +/g, " ")
|
|
334
|
-
.trim(), perPage);
|
|
342
|
+
.trim(), perPage, tracker);
|
|
335
343
|
for (const item of items) {
|
|
336
344
|
if (!seenUrls.has(item.html_url)) {
|
|
337
345
|
seenUrls.add(item.html_url);
|
|
@@ -378,63 +386,3 @@ export async function filterVetAndScore(vetter, items, filterIssues, excludedRep
|
|
|
378
386
|
}
|
|
379
387
|
return { candidates: starFiltered, allVetFailed, rateLimitHit };
|
|
380
388
|
}
|
|
381
|
-
/**
|
|
382
|
-
* Search for issues within specific repos using batched queries.
|
|
383
|
-
*
|
|
384
|
-
* To avoid GitHub's secondary rate limit (30 requests/minute), we batch
|
|
385
|
-
* multiple repos into a single search query using OR syntax:
|
|
386
|
-
* repo:owner1/repo1 OR repo:owner2/repo2 OR repo:owner3/repo3
|
|
387
|
-
*
|
|
388
|
-
* Labels are chunked separately to stay within GitHub's 5 boolean operator limit.
|
|
389
|
-
* Each batch of repos consumes (batch.length - 1) OR operators, and the remaining
|
|
390
|
-
* budget is used for label OR operators.
|
|
391
|
-
*
|
|
392
|
-
* This reduces API calls from N (one per repo) to ceil(N/BATCH_SIZE) * label_chunks.
|
|
393
|
-
*/
|
|
394
|
-
export async function searchInRepos(octokit, vetter, repos, baseQualifiers, labels, maxResults, priority, filterFn) {
|
|
395
|
-
const candidates = [];
|
|
396
|
-
const batches = batchRepos(repos, BATCH_SIZE);
|
|
397
|
-
let failedBatches = 0;
|
|
398
|
-
let rateLimitFailures = 0;
|
|
399
|
-
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
|
|
400
|
-
const batch = batches[batchIdx];
|
|
401
|
-
if (candidates.length >= maxResults)
|
|
402
|
-
break;
|
|
403
|
-
// Delay between batches to avoid secondary rate limits
|
|
404
|
-
if (batchIdx > 0)
|
|
405
|
-
await sleep(INTER_QUERY_DELAY_MS);
|
|
406
|
-
try {
|
|
407
|
-
const repoFilter = batch.map((r) => `repo:${r}`).join(" OR ");
|
|
408
|
-
const repoOps = batch.length - 1;
|
|
409
|
-
const perPage = Math.min(30, (maxResults - candidates.length) * 3);
|
|
410
|
-
const allItems = await searchWithChunkedLabels(octokit, labels, repoOps, (labelQ) => `${baseQualifiers} ${labelQ} (${repoFilter})`
|
|
411
|
-
.replace(/ +/g, " ")
|
|
412
|
-
.trim(), perPage);
|
|
413
|
-
if (allItems.length > 0) {
|
|
414
|
-
const filtered = filterFn(allItems);
|
|
415
|
-
const remainingNeeded = maxResults - candidates.length;
|
|
416
|
-
const { candidates: vetted, rateLimitHit: vetRateLimitHit } = await vetter.vetIssuesParallel(filtered.slice(0, remainingNeeded * 2).map((i) => i.html_url), remainingNeeded, priority);
|
|
417
|
-
candidates.push(...vetted);
|
|
418
|
-
if (vetRateLimitHit)
|
|
419
|
-
rateLimitFailures++;
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
catch (error) {
|
|
423
|
-
if (getHttpStatusCode(error) === 401)
|
|
424
|
-
throw error;
|
|
425
|
-
failedBatches++;
|
|
426
|
-
if (isRateLimitError(error)) {
|
|
427
|
-
rateLimitFailures++;
|
|
428
|
-
}
|
|
429
|
-
const batchReposStr = batch.join(", ");
|
|
430
|
-
warn(MODULE, `Error searching issues in batch [${batchReposStr}]:`, errorMessage(error));
|
|
431
|
-
}
|
|
432
|
-
}
|
|
433
|
-
const allBatchesFailed = failedBatches === batches.length && batches.length > 0;
|
|
434
|
-
const rateLimitHit = rateLimitFailures > 0;
|
|
435
|
-
if (allBatchesFailed) {
|
|
436
|
-
warn(MODULE, `All ${batches.length} batch(es) failed for ${priority} phase. ` +
|
|
437
|
-
`This may indicate a systemic issue (rate limit, auth, network).`);
|
|
438
|
-
}
|
|
439
|
-
return { candidates, allBatchesFailed, rateLimitHit };
|
|
440
|
-
}
|
package/dist/core/types.d.ts
CHANGED
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
* Core types for oss-scout — ephemeral types that are never persisted.
|
|
3
3
|
*/
|
|
4
4
|
import type { RepoSignals, TrackedIssue, IssueVettingResult, IssueScope, ScoutState, SearchStrategy } from "./schemas.js";
|
|
5
|
+
import type { LogLevel } from "./logger.js";
|
|
5
6
|
export type { ProjectCategory, IssueScope, RepoSignals, RepoScore, StoredMergedPR, StoredClosedPR, ContributionGuidelines, IssueVettingResult, LinkedPR, TrackedIssue, ScoutPreferences, SavedCandidate, ScoutState, SearchStrategy, } from "./schemas.js";
|
|
6
|
-
/**
|
|
7
|
-
export interface
|
|
7
|
+
/** A successful health snapshot of a GitHub repository. */
|
|
8
|
+
export interface ProjectHealthData {
|
|
8
9
|
repo: string;
|
|
9
10
|
lastCommitAt: string;
|
|
10
11
|
daysSinceLastCommit: number;
|
|
@@ -15,9 +16,26 @@ export interface ProjectHealth {
|
|
|
15
16
|
stargazersCount?: number;
|
|
16
17
|
forksCount?: number;
|
|
17
18
|
language?: string | null;
|
|
18
|
-
checkFailed
|
|
19
|
-
|
|
19
|
+
/** Discriminant: a real snapshot is never `checkFailed`. */
|
|
20
|
+
checkFailed?: false;
|
|
21
|
+
failureReason?: undefined;
|
|
20
22
|
}
|
|
23
|
+
/**
|
|
24
|
+
* The health check itself failed (transient API error). Only the repo and the
|
|
25
|
+
* failure reason are known — none of the snapshot fields are meaningful, so the
|
|
26
|
+
* type does not carry them. Narrow on `checkFailed` to reach a real snapshot.
|
|
27
|
+
*/
|
|
28
|
+
export interface ProjectHealthFailure {
|
|
29
|
+
repo: string;
|
|
30
|
+
checkFailed: true;
|
|
31
|
+
failureReason: string;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Health snapshot of a GitHub repository, or a marker that the check failed.
|
|
35
|
+
* A discriminated union (on `checkFailed`) so the "failure" shape can't be read
|
|
36
|
+
* as if it carried real snapshot data. Narrow before reading snapshot fields.
|
|
37
|
+
*/
|
|
38
|
+
export type ProjectHealth = ProjectHealthData | ProjectHealthFailure;
|
|
21
39
|
/** Priority tier for issue search results. */
|
|
22
40
|
export type SearchPriority = "merged_pr" | "starred" | "normal";
|
|
23
41
|
/** Source file the anti-LLM policy match came from, or null when no file matched. */
|
|
@@ -43,6 +61,13 @@ export interface SLMTriageSummary {
|
|
|
43
61
|
/** A fully vetted issue candidate with scoring. */
|
|
44
62
|
export interface IssueCandidate {
|
|
45
63
|
issue: TrackedIssue;
|
|
64
|
+
/**
|
|
65
|
+
* GitHub issue state at vet time (#120). GitHub answers 200 for closed
|
|
66
|
+
* issues, so without this vet-list classified them still_available and
|
|
67
|
+
* --prune kept them. Optional: cached candidates from older versions
|
|
68
|
+
* lack it and read as open.
|
|
69
|
+
*/
|
|
70
|
+
issueState?: "open" | "closed";
|
|
46
71
|
vettingResult: IssueVettingResult;
|
|
47
72
|
projectHealth: ProjectHealth;
|
|
48
73
|
antiLLMPolicy: AntiLLMPolicyResult;
|
|
@@ -54,26 +79,22 @@ export interface IssueCandidate {
|
|
|
54
79
|
viabilityScore: number;
|
|
55
80
|
searchPriority: SearchPriority;
|
|
56
81
|
/**
|
|
57
|
-
* Personalization
|
|
58
|
-
*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
*
|
|
65
|
-
* (#1244). Mirrors `reasonsToApprove`/`reasonsToSkip` shape for
|
|
66
|
-
* symmetry with the existing surface.
|
|
67
|
-
*/
|
|
68
|
-
boostReasons?: string[];
|
|
69
|
-
/**
|
|
70
|
-
* Marks a candidate that filled a reserved diversity slot (#1244).
|
|
71
|
-
* Populated only when `diversityRatio > 0` was passed AND the
|
|
72
|
-
* candidate matched no personalization bias. Mutually exclusive with
|
|
73
|
-
* a non-zero `boostScore` (a candidate cannot be both biased-toward
|
|
74
|
-
* and a diversity slot in the same result set).
|
|
82
|
+
* Personalization marker (#1244). A candidate is EITHER boosted (it matched
|
|
83
|
+
* a `preferLanguages` / `preferRepos` bias and gets a soft sort boost between
|
|
84
|
+
* the `recommendation` tier and `viabilityScore`) OR a diversity slot (it
|
|
85
|
+
* matched no bias and filled a slot reserved by `diversityRatio`) — never
|
|
86
|
+
* both. Modelling it as a single discriminated field makes that mutual
|
|
87
|
+
* exclusivity structural instead of prose across three optional fields.
|
|
88
|
+
* Absent when no personalization was requested or the candidate matched
|
|
89
|
+
* nothing.
|
|
75
90
|
*/
|
|
76
|
-
|
|
91
|
+
personalization?: {
|
|
92
|
+
kind: "boosted";
|
|
93
|
+
score: number;
|
|
94
|
+
reasons: string[];
|
|
95
|
+
} | {
|
|
96
|
+
kind: "diversity";
|
|
97
|
+
};
|
|
77
98
|
}
|
|
78
99
|
/** Subset of RepoScore fields that callers may update. */
|
|
79
100
|
export interface RepoScoreUpdate {
|
|
@@ -85,29 +106,51 @@ export interface RepoScoreUpdate {
|
|
|
85
106
|
stargazersCount?: number;
|
|
86
107
|
language?: string | null;
|
|
87
108
|
}
|
|
88
|
-
/**
|
|
89
|
-
|
|
109
|
+
/**
|
|
110
|
+
* Result of a check (e.g., no existing PR, not claimed). Discriminated on
|
|
111
|
+
* `inconclusive`: a `reason` exists only when the check could not be completed
|
|
112
|
+
* (a transient API error), and an inconclusive check always reports `passed:
|
|
113
|
+
* true` because the caller assumes the issue is still eligible. A conclusive
|
|
114
|
+
* result carries no `reason`.
|
|
115
|
+
*/
|
|
116
|
+
export type CheckResult = {
|
|
90
117
|
passed: boolean;
|
|
91
|
-
inconclusive?:
|
|
92
|
-
reason?:
|
|
93
|
-
}
|
|
118
|
+
inconclusive?: false;
|
|
119
|
+
reason?: undefined;
|
|
120
|
+
} | {
|
|
121
|
+
passed: true;
|
|
122
|
+
inconclusive: true;
|
|
123
|
+
reason: string;
|
|
124
|
+
};
|
|
94
125
|
export declare const SCOPE_LABELS: Record<IssueScope, string[]>;
|
|
95
126
|
/** Options for batch vetting saved results. */
|
|
96
127
|
export interface VetListOptions {
|
|
97
128
|
concurrency?: number;
|
|
98
129
|
prune?: boolean;
|
|
99
130
|
}
|
|
100
|
-
/**
|
|
101
|
-
export interface
|
|
131
|
+
/** Identity fields shared by every vet-list entry, regardless of outcome. */
|
|
132
|
+
export interface VetListEntryBase {
|
|
102
133
|
issueUrl: string;
|
|
103
134
|
repo: string;
|
|
104
135
|
number: number;
|
|
105
136
|
title: string;
|
|
106
137
|
status: "still_available" | "claimed" | "closed" | "has_pr" | "error";
|
|
107
|
-
recommendation?: "approve" | "skip" | "needs_review";
|
|
108
|
-
viabilityScore?: number;
|
|
109
|
-
errorMessage?: string;
|
|
110
138
|
}
|
|
139
|
+
/**
|
|
140
|
+
* A single entry in the vet-list result. Discriminated on `ok`: a completed vet
|
|
141
|
+
* (`ok: true`) carries `recommendation` + `viabilityScore` and never an
|
|
142
|
+
* `errorMessage`; a vet that threw (`ok: false`, including a 404/410 that
|
|
143
|
+
* classifies the issue as `closed`) carries only the `errorMessage`. This makes
|
|
144
|
+
* the "score xor error" invariant structural instead of prose.
|
|
145
|
+
*/
|
|
146
|
+
export type VetListEntry = (VetListEntryBase & {
|
|
147
|
+
ok: true;
|
|
148
|
+
recommendation: "approve" | "skip" | "needs_review";
|
|
149
|
+
viabilityScore: number;
|
|
150
|
+
}) | (VetListEntryBase & {
|
|
151
|
+
ok: false;
|
|
152
|
+
errorMessage: string;
|
|
153
|
+
});
|
|
111
154
|
/** Summary counts for a vet-list run. */
|
|
112
155
|
export interface VetListSummary {
|
|
113
156
|
total: number;
|
|
@@ -117,27 +160,69 @@ export interface VetListSummary {
|
|
|
117
160
|
hasPR: number;
|
|
118
161
|
errors: number;
|
|
119
162
|
}
|
|
163
|
+
/** Result of reconciling tracked open PRs against their current GitHub state (#164). */
|
|
164
|
+
export interface SyncResult {
|
|
165
|
+
/** Open PRs checked. */
|
|
166
|
+
checked: number;
|
|
167
|
+
/** Transitioned to merged. */
|
|
168
|
+
merged: number;
|
|
169
|
+
/** Transitioned to closed-without-merge. */
|
|
170
|
+
closed: number;
|
|
171
|
+
/** Still open (kept). */
|
|
172
|
+
stillOpen: number;
|
|
173
|
+
/** Could not be checked (parse failure or transient API error). */
|
|
174
|
+
errors: number;
|
|
175
|
+
}
|
|
176
|
+
/** A saved result whose availability status changed since the last vet-list (#165). */
|
|
177
|
+
export interface VetStatusTransition {
|
|
178
|
+
issueUrl: string;
|
|
179
|
+
repo: string;
|
|
180
|
+
number: number;
|
|
181
|
+
from: VetListEntry["status"];
|
|
182
|
+
to: VetListEntry["status"];
|
|
183
|
+
}
|
|
120
184
|
/** Result of a batch vet-list operation. */
|
|
121
185
|
export interface VetListResult {
|
|
122
186
|
results: VetListEntry[];
|
|
123
187
|
summary: VetListSummary;
|
|
124
188
|
prunedCount?: number;
|
|
189
|
+
/**
|
|
190
|
+
* Status changes since the previous vet-list run, computed from each saved
|
|
191
|
+
* result's `lastStatus`. Empty on a first run (no prior status to compare).
|
|
192
|
+
*/
|
|
193
|
+
transitions: VetStatusTransition[];
|
|
125
194
|
}
|
|
126
195
|
/** Configuration for creating an OssScout instance. */
|
|
127
196
|
export type ScoutConfig = {
|
|
128
|
-
/** GitHub token with `repo` read scope. Add `gist` scope for persistence. */
|
|
197
|
+
/** GitHub token with `repo` read scope. Add `gist` scope for gist persistence. */
|
|
129
198
|
githubToken: string;
|
|
130
|
-
/**
|
|
131
|
-
|
|
132
|
-
|
|
199
|
+
/**
|
|
200
|
+
* State storage. Omitted defaults to `"local"`: load and persist
|
|
201
|
+
* `~/.oss-scout/state.json`, no network on construct. `"gist"` syncs
|
|
202
|
+
* via a private GitHub gist (needs the `gist` token scope).
|
|
203
|
+
*/
|
|
204
|
+
persistence?: "local" | "gist";
|
|
205
|
+
/** Gist ID override (gist mode). Skips gist discovery/creation if provided. */
|
|
133
206
|
gistId?: string;
|
|
207
|
+
/**
|
|
208
|
+
* Minimum log level emitted to stderr. Omitted leaves the global level
|
|
209
|
+
* (default "info"). Hosts that don't want the "[INFO] Phase 0..."
|
|
210
|
+
* chatter can pass "warn" or "silent" (#156).
|
|
211
|
+
*/
|
|
212
|
+
logLevel?: LogLevel;
|
|
134
213
|
} | {
|
|
135
214
|
/** GitHub token with `repo` read scope. */
|
|
136
215
|
githubToken: string;
|
|
137
|
-
/** Caller provides state directly. */
|
|
216
|
+
/** Caller provides and owns state directly (embedding hosts). */
|
|
138
217
|
persistence: "provided";
|
|
139
218
|
/** Pre-loaded state. Required when persistence is 'provided'. */
|
|
140
219
|
initialState: ScoutState;
|
|
220
|
+
/**
|
|
221
|
+
* Minimum log level emitted to stderr. Omitted leaves the global level
|
|
222
|
+
* (default "info"). Hosts that don't want the "[INFO] Phase 0..."
|
|
223
|
+
* chatter can pass "warn" or "silent" (#156).
|
|
224
|
+
*/
|
|
225
|
+
logLevel?: LogLevel;
|
|
141
226
|
};
|
|
142
227
|
/** Options for the search method. */
|
|
143
228
|
export interface SearchOptions {
|
|
@@ -158,6 +243,21 @@ export interface SearchOptions {
|
|
|
158
243
|
* disables the boost.
|
|
159
244
|
*/
|
|
160
245
|
preferRepos?: string[];
|
|
246
|
+
/**
|
|
247
|
+
* Per-call personalization bias: a SOFT penalty (milder than the hard
|
|
248
|
+
* `excludeRepos` filter) for candidates in one of these `owner/repo` slugs
|
|
249
|
+
* (#168). They are pushed below equally-recommended non-matches but not
|
|
250
|
+
* removed; a strong boost can still outweigh the penalty. Empty / undefined
|
|
251
|
+
* disables it.
|
|
252
|
+
*/
|
|
253
|
+
avoidRepos?: string[];
|
|
254
|
+
/**
|
|
255
|
+
* Per-call personalization bias: a soft boost for candidates whose issue
|
|
256
|
+
* labels match one of these types, case-insensitive (e.g. "bug",
|
|
257
|
+
* "good first issue") (#168). Same tier as a language match. Does not filter
|
|
258
|
+
* results, does not change `viabilityScore`. Empty / undefined disables it.
|
|
259
|
+
*/
|
|
260
|
+
boostIssueTypes?: string[];
|
|
161
261
|
/**
|
|
162
262
|
* Counterweight against echo-chamber bias as `preferLanguages` /
|
|
163
263
|
* `preferRepos` boosts accumulate over time (#1244). A value of 0.2
|
|
@@ -168,6 +268,19 @@ export interface SearchOptions {
|
|
|
168
268
|
* clamped to [0, 1].
|
|
169
269
|
*/
|
|
170
270
|
diversityRatio?: number;
|
|
271
|
+
/**
|
|
272
|
+
* Per-call override for the delay between search phases (ms). Defaults to
|
|
273
|
+
* the `interPhaseDelayMs` preference (30s). Latency-sensitive callers like
|
|
274
|
+
* the MCP server pass 0; the sliding-window budget tracker still paces the
|
|
275
|
+
* actual API calls, so the fixed sleep is the only thing removed (#143).
|
|
276
|
+
*/
|
|
277
|
+
interPhaseDelayMs?: number;
|
|
278
|
+
/**
|
|
279
|
+
* Per-call override for the extra cooldown before the broad phase (ms).
|
|
280
|
+
* Defaults to the `broadPhaseDelayMs` preference (90s). See
|
|
281
|
+
* `interPhaseDelayMs` for the rationale (#143).
|
|
282
|
+
*/
|
|
283
|
+
broadPhaseDelayMs?: number;
|
|
171
284
|
}
|
|
172
285
|
/** Result of a search operation. */
|
|
173
286
|
export interface SearchResult {
|
package/dist/core/utils.js
CHANGED
|
@@ -36,14 +36,29 @@ export function getCacheDir() {
|
|
|
36
36
|
* - https://api.github.com/repos/owner/repo/...
|
|
37
37
|
*/
|
|
38
38
|
export function extractRepoFromUrl(url) {
|
|
39
|
+
// Real URL parsing: the previous regexes were unanchored (any host
|
|
40
|
+
// containing "github.com" matched) and leaked query/fragment text into
|
|
41
|
+
// the repo segment ("repo?tab=readme").
|
|
42
|
+
let parsed;
|
|
43
|
+
try {
|
|
44
|
+
parsed = new URL(url);
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, "");
|
|
50
|
+
const segments = parsed.pathname.split("/").filter(Boolean);
|
|
39
51
|
// API URLs: https://api.github.com/repos/owner/repo[/...]
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
52
|
+
if (host === "api.github.com") {
|
|
53
|
+
if (segments[0] === "repos" && segments.length >= 3) {
|
|
54
|
+
return `${segments[1]}/${segments[2]}`;
|
|
55
|
+
}
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
43
58
|
// Web URLs: https://github.com/owner/repo[/...]
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
59
|
+
if (host === "github.com" && segments.length >= 2) {
|
|
60
|
+
return `${segments[0]}/${segments[1]}`;
|
|
61
|
+
}
|
|
47
62
|
return null;
|
|
48
63
|
}
|
|
49
64
|
const OWNER_PATTERN = /^[a-zA-Z0-9_-]+$/;
|
|
@@ -52,25 +67,38 @@ function isValidOwnerRepo(owner, repo) {
|
|
|
52
67
|
return OWNER_PATTERN.test(owner) && REPO_PATTERN.test(repo);
|
|
53
68
|
}
|
|
54
69
|
export function parseGitHubUrl(url) {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
70
|
+
// Accept pasteable variants: http://, www., and bare github.com/... forms
|
|
71
|
+
// normalize to a parseable URL. Strict canonical-form validation for
|
|
72
|
+
// command input lives in commands/validation.ts; this parser is lenient.
|
|
73
|
+
const normalized = /^(?:www\.)?github\.com\//i.test(url)
|
|
74
|
+
? `https://${url}`
|
|
75
|
+
: url;
|
|
76
|
+
let parsed;
|
|
77
|
+
try {
|
|
78
|
+
parsed = new URL(normalized);
|
|
64
79
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
const owner = issueMatch[1];
|
|
68
|
-
const repo = issueMatch[2];
|
|
69
|
-
if (!isValidOwnerRepo(owner, repo))
|
|
70
|
-
return null;
|
|
71
|
-
return { owner, repo, number: parseInt(issueMatch[3], 10), type: "issues" };
|
|
80
|
+
catch {
|
|
81
|
+
return null;
|
|
72
82
|
}
|
|
73
|
-
|
|
83
|
+
if (parsed.protocol !== "https:" && parsed.protocol !== "http:")
|
|
84
|
+
return null;
|
|
85
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, "");
|
|
86
|
+
if (host !== "github.com")
|
|
87
|
+
return null;
|
|
88
|
+
// Exactly owner/repo/(pull|issues)/<digits>; trailing slash tolerated via
|
|
89
|
+
// filter(Boolean), query/fragment excluded by pathname. A malformed number
|
|
90
|
+
// segment ("123abc") no longer half-parses to 123.
|
|
91
|
+
const segments = parsed.pathname.split("/").filter(Boolean);
|
|
92
|
+
if (segments.length !== 4)
|
|
93
|
+
return null;
|
|
94
|
+
const [owner, repo, type, num] = segments;
|
|
95
|
+
if (type !== "pull" && type !== "issues")
|
|
96
|
+
return null;
|
|
97
|
+
if (!isValidOwnerRepo(owner, repo))
|
|
98
|
+
return null;
|
|
99
|
+
if (!/^\d+$/.test(num))
|
|
100
|
+
return null;
|
|
101
|
+
return { owner, repo, number: parseInt(num, 10), type };
|
|
74
102
|
}
|
|
75
103
|
export function daysBetween(from, to = new Date()) {
|
|
76
104
|
return Math.max(0, Math.floor((to.getTime() - from.getTime()) / (1000 * 60 * 60 * 24)));
|
|
@@ -91,8 +119,12 @@ export function getGitHubToken() {
|
|
|
91
119
|
if (tokenFetchAttempted)
|
|
92
120
|
return null;
|
|
93
121
|
tokenFetchAttempted = true;
|
|
94
|
-
|
|
95
|
-
|
|
122
|
+
// Trim: a trailing newline (e.g. GITHUB_TOKEN=$(cat file)) produces a
|
|
123
|
+
// malformed Authorization header with confusing 401s. A whitespace-only
|
|
124
|
+
// value falls through to the gh CLI.
|
|
125
|
+
const envToken = process.env.GITHUB_TOKEN?.trim();
|
|
126
|
+
if (envToken) {
|
|
127
|
+
cachedGitHubToken = envToken;
|
|
96
128
|
return cachedGitHubToken;
|
|
97
129
|
}
|
|
98
130
|
try {
|
|
@@ -108,7 +140,9 @@ export function getGitHubToken() {
|
|
|
108
140
|
}
|
|
109
141
|
}
|
|
110
142
|
catch (err) {
|
|
111
|
-
|
|
143
|
+
// Log only the message: the raw execFileSync error carries stdout/stderr
|
|
144
|
+
// buffers that could include a token if gh half-succeeded.
|
|
145
|
+
debug(MODULE, `gh auth token failed: ${errorMessage(err)}`);
|
|
112
146
|
}
|
|
113
147
|
return null;
|
|
114
148
|
}
|