@oss-scout/core 0.11.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +89 -66
- package/dist/cli.js +302 -436
- package/dist/commands/command-scout.d.ts +21 -0
- package/dist/commands/command-scout.js +21 -0
- package/dist/commands/config.js +10 -128
- package/dist/commands/features.js +15 -28
- package/dist/commands/results.d.ts +13 -2
- package/dist/commands/results.js +29 -2
- package/dist/commands/search.d.ts +4 -0
- package/dist/commands/search.js +65 -70
- package/dist/commands/setup.d.ts +2 -0
- package/dist/commands/setup.js +35 -6
- package/dist/commands/skip.d.ts +4 -0
- package/dist/commands/skip.js +45 -55
- package/dist/commands/sync.d.ts +10 -0
- package/dist/commands/sync.js +10 -0
- package/dist/commands/vet-list.js +3 -19
- package/dist/commands/vet.js +18 -25
- package/dist/commands/with-scout.d.ts +32 -0
- package/dist/commands/with-scout.js +41 -0
- package/dist/core/anti-llm-policy.js +5 -33
- package/dist/core/bootstrap.d.ts +2 -2
- package/dist/core/bootstrap.js +5 -9
- package/dist/core/errors.d.ts +10 -0
- package/dist/core/errors.js +20 -5
- package/dist/core/feature-discovery.d.ts +13 -1
- package/dist/core/feature-discovery.js +104 -81
- package/dist/core/gist-state-store.d.ts +13 -12
- package/dist/core/gist-state-store.js +128 -53
- package/dist/core/http-cache.d.ts +32 -2
- package/dist/core/http-cache.js +74 -19
- package/dist/core/issue-discovery.d.ts +12 -1
- package/dist/core/issue-discovery.js +94 -67
- package/dist/core/issue-eligibility.d.ts +11 -4
- package/dist/core/issue-eligibility.js +124 -69
- package/dist/core/issue-graphql.d.ts +58 -0
- package/dist/core/issue-graphql.js +108 -0
- package/dist/core/issue-vetting.d.ts +115 -9
- package/dist/core/issue-vetting.js +246 -109
- package/dist/core/local-state.d.ts +6 -2
- package/dist/core/local-state.js +23 -5
- package/dist/core/logger.d.ts +12 -4
- package/dist/core/logger.js +33 -7
- package/dist/core/personalization.d.ts +30 -10
- package/dist/core/personalization.js +64 -24
- package/dist/core/preference-fields.d.ts +47 -0
- package/dist/core/preference-fields.js +180 -0
- package/dist/core/probe-repo-file.d.ts +47 -0
- package/dist/core/probe-repo-file.js +57 -0
- package/dist/core/repo-health.js +40 -32
- package/dist/core/roadmap.js +26 -22
- package/dist/core/schemas.d.ts +148 -26
- package/dist/core/schemas.js +83 -17
- package/dist/core/search-budget.d.ts +9 -0
- package/dist/core/search-budget.js +36 -3
- package/dist/core/search-phases.d.ts +4 -21
- package/dist/core/search-phases.js +37 -89
- package/dist/core/types.d.ts +151 -38
- package/dist/core/utils.js +60 -26
- package/dist/formatters/human.d.ts +60 -0
- package/dist/formatters/human.js +199 -0
- package/dist/formatters/markdown.d.ts +10 -0
- package/dist/formatters/markdown.js +31 -0
- package/dist/index.d.ts +6 -2
- package/dist/index.js +8 -0
- package/dist/scout.d.ts +75 -12
- package/dist/scout.js +265 -26
- package/package.json +1 -1
|
@@ -11,10 +11,11 @@
|
|
|
11
11
|
*
|
|
12
12
|
* No state singletons — anchor repos are resolved from RepoScore[] passed in.
|
|
13
13
|
*/
|
|
14
|
-
import { errorMessage,
|
|
14
|
+
import { errorMessage, rethrowIfFatal } from "./errors.js";
|
|
15
15
|
import { warn } from "./logger.js";
|
|
16
16
|
import { sleep } from "./utils.js";
|
|
17
17
|
import { fetchRoadmapIssueRefs } from "./roadmap.js";
|
|
18
|
+
import { cachedSearchIssues } from "./search-phases.js";
|
|
18
19
|
const MODULE = "feature-discovery";
|
|
19
20
|
/** Delay between per-repo issue lists, mirroring search-phases.INTER_QUERY_DELAY_MS. */
|
|
20
21
|
const INTER_REPO_DELAY_MS = 2000;
|
|
@@ -164,6 +165,53 @@ function isFeatureIssue(item) {
|
|
|
164
165
|
return false;
|
|
165
166
|
return labels.some((l) => FEATURE_LABELS.includes(l));
|
|
166
167
|
}
|
|
168
|
+
/**
|
|
169
|
+
* Extract feature signals from a raw issue, vet it, and classify its horizon.
|
|
170
|
+
* Shared by the anchor (discoverFeatures) and broad (discoverFeaturesBroad)
|
|
171
|
+
* paths (#157). Returns null when vetting fails for this item so the caller
|
|
172
|
+
* can skip it; fatal errors (auth/rate-limit) propagate.
|
|
173
|
+
*
|
|
174
|
+
* `roadmapRefs` is only supplied by the anchor path: when present, the roadmap
|
|
175
|
+
* signal is threaded into both the vet call and the horizon classifier exactly
|
|
176
|
+
* as before; the broad path omits it (roadmap scraping is per-repo and kept out
|
|
177
|
+
* of the cheap broad search).
|
|
178
|
+
*/
|
|
179
|
+
async function vetAndClassify(item, vetter, roadmapRefs) {
|
|
180
|
+
const labels = extractLabels(item);
|
|
181
|
+
const hasMilestone = !!item.milestone;
|
|
182
|
+
const reactions = item.reactions?.total_count ?? 0;
|
|
183
|
+
const comments = item.comments ?? 0;
|
|
184
|
+
const wontfixNoContributor = item.created_at
|
|
185
|
+
? detectWontfixNoContributor({ labels, createdAt: item.created_at })
|
|
186
|
+
: false;
|
|
187
|
+
const useRoadmap = roadmapRefs !== undefined;
|
|
188
|
+
const onRoadmap = useRoadmap &&
|
|
189
|
+
typeof item.number === "number" &&
|
|
190
|
+
roadmapRefs.has(item.number);
|
|
191
|
+
let candidate;
|
|
192
|
+
try {
|
|
193
|
+
candidate = await vetter.vetIssue(item.html_url, {
|
|
194
|
+
featureSignals: {
|
|
195
|
+
reactions,
|
|
196
|
+
comments,
|
|
197
|
+
hasMilestone,
|
|
198
|
+
wontfixNoContributor,
|
|
199
|
+
...(useRoadmap ? { onRoadmap } : {}),
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
catch (err) {
|
|
204
|
+
rethrowIfFatal(err);
|
|
205
|
+
warn(MODULE, `vet failed for ${item.html_url}: ${errorMessage(err)}`);
|
|
206
|
+
return null;
|
|
207
|
+
}
|
|
208
|
+
const horizon = classifyHorizon({
|
|
209
|
+
hasMilestone,
|
|
210
|
+
labels,
|
|
211
|
+
...(useRoadmap ? { isOnRoadmap: onRoadmap } : {}),
|
|
212
|
+
});
|
|
213
|
+
return { ...candidate, horizon };
|
|
214
|
+
}
|
|
167
215
|
/**
|
|
168
216
|
* Orchestrate `scout features`: anchor resolution → per-repo issue listing
|
|
169
217
|
* → feature-signal extraction → vetting → horizon classification → bucket split.
|
|
@@ -211,45 +259,15 @@ export async function discoverFeatures(opts) {
|
|
|
211
259
|
roadmapRefs = refs;
|
|
212
260
|
}
|
|
213
261
|
catch (err) {
|
|
214
|
-
|
|
215
|
-
throw err;
|
|
262
|
+
rethrowIfFatal(err);
|
|
216
263
|
warn(MODULE, `failed to list issues for ${anchorRepos[i]}: ${errorMessage(err)}`);
|
|
217
264
|
continue;
|
|
218
265
|
}
|
|
219
266
|
const items = response.data.filter((it) => !it.pull_request && !it.assignee && isFeatureIssue(it));
|
|
220
267
|
for (const item of items) {
|
|
221
|
-
const
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
const comments = item.comments ?? 0;
|
|
225
|
-
const wontfixNoContributor = item.created_at
|
|
226
|
-
? detectWontfixNoContributor({ labels, createdAt: item.created_at })
|
|
227
|
-
: false;
|
|
228
|
-
const onRoadmap = typeof item.number === "number" && roadmapRefs.has(item.number);
|
|
229
|
-
let candidate;
|
|
230
|
-
try {
|
|
231
|
-
candidate = await opts.vetter.vetIssue(item.html_url, {
|
|
232
|
-
featureSignals: {
|
|
233
|
-
reactions,
|
|
234
|
-
comments,
|
|
235
|
-
hasMilestone,
|
|
236
|
-
wontfixNoContributor,
|
|
237
|
-
onRoadmap,
|
|
238
|
-
},
|
|
239
|
-
});
|
|
240
|
-
}
|
|
241
|
-
catch (err) {
|
|
242
|
-
if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
|
|
243
|
-
throw err;
|
|
244
|
-
warn(MODULE, `vet failed for ${item.html_url}: ${errorMessage(err)}`);
|
|
245
|
-
continue;
|
|
246
|
-
}
|
|
247
|
-
const horizon = classifyHorizon({
|
|
248
|
-
hasMilestone,
|
|
249
|
-
labels,
|
|
250
|
-
isOnRoadmap: onRoadmap,
|
|
251
|
-
});
|
|
252
|
-
candidates.push({ ...candidate, horizon });
|
|
268
|
+
const candidate = await vetAndClassify(item, opts.vetter, roadmapRefs);
|
|
269
|
+
if (candidate)
|
|
270
|
+
candidates.push(candidate);
|
|
253
271
|
}
|
|
254
272
|
}
|
|
255
273
|
// Drop low-viability results — same threshold as scout search.
|
|
@@ -273,19 +291,16 @@ const DEFAULT_BROAD_MAX_TO_VET = 30;
|
|
|
273
291
|
*/
|
|
274
292
|
export function buildBroadFeatureSearchQuery(opts) {
|
|
275
293
|
const parts = ["is:issue", "is:open", "no:assignee"];
|
|
276
|
-
// Feature labels — any-of via parenthesized OR.
|
|
294
|
+
// Feature labels — any-of via parenthesized OR. The six labels spend
|
|
295
|
+
// exactly five OR operators, GitHub's entire per-query allowance.
|
|
277
296
|
const labelClause = FEATURE_LABELS.map((l) => `label:"${l}"`).join(" OR ");
|
|
278
297
|
parts.push(`(${labelClause})`);
|
|
279
298
|
// Exclude labels that overlap with `scout` territory.
|
|
280
299
|
for (const excl of FEATURE_EXCLUSION_LABELS) {
|
|
281
300
|
parts.push(`-label:"${excl}"`);
|
|
282
301
|
}
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
const languages = (opts.languages ?? []).filter((l) => l && l.toLowerCase() !== "any");
|
|
286
|
-
if (languages.length > 0) {
|
|
287
|
-
const langClause = languages.map((l) => `language:${l}`).join(" OR ");
|
|
288
|
-
parts.push(`(${langClause})`);
|
|
302
|
+
if (opts.language) {
|
|
303
|
+
parts.push(`language:${opts.language}`);
|
|
289
304
|
}
|
|
290
305
|
// User exclusions.
|
|
291
306
|
for (const repo of opts.excludeRepos ?? []) {
|
|
@@ -296,6 +311,23 @@ export function buildBroadFeatureSearchQuery(opts) {
|
|
|
296
311
|
}
|
|
297
312
|
return parts.join(" ");
|
|
298
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* One query per language. A combined `(language:a OR language:b)` clause
|
|
316
|
+
* pushed the query past GitHub's 5-operator limit (the label ORs already
|
|
317
|
+
* spend all five), so every 2+ language config drew a 422 that the caller
|
|
318
|
+
* swallowed into "no results" (#121). "any" disables the filter.
|
|
319
|
+
*/
|
|
320
|
+
export function buildBroadFeatureSearchQueries(opts) {
|
|
321
|
+
const base = {
|
|
322
|
+
excludeRepos: opts.excludeRepos,
|
|
323
|
+
excludeOrgs: opts.excludeOrgs,
|
|
324
|
+
};
|
|
325
|
+
const languages = (opts.languages ?? []).filter((l) => l && l.toLowerCase() !== "any");
|
|
326
|
+
if (languages.length === 0) {
|
|
327
|
+
return [buildBroadFeatureSearchQuery(base)];
|
|
328
|
+
}
|
|
329
|
+
return languages.map((language) => buildBroadFeatureSearchQuery({ ...base, language }));
|
|
330
|
+
}
|
|
299
331
|
/**
|
|
300
332
|
* Orchestrate broad / cross-repo feature discovery (#100). Bypasses anchor
|
|
301
333
|
* resolution; runs a single GitHub Search API query for feature-labeled
|
|
@@ -310,7 +342,7 @@ export function buildBroadFeatureSearchQuery(opts) {
|
|
|
310
342
|
* degrade gracefully.
|
|
311
343
|
*/
|
|
312
344
|
export async function discoverFeaturesBroad(opts) {
|
|
313
|
-
const
|
|
345
|
+
const queries = buildBroadFeatureSearchQueries({
|
|
314
346
|
languages: opts.languages,
|
|
315
347
|
excludeRepos: opts.excludeRepos,
|
|
316
348
|
excludeOrgs: opts.excludeOrgs,
|
|
@@ -318,17 +350,32 @@ export async function discoverFeaturesBroad(opts) {
|
|
|
318
350
|
const maxToVet = opts.maxToVet ?? DEFAULT_BROAD_MAX_TO_VET;
|
|
319
351
|
let items;
|
|
320
352
|
try {
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
353
|
+
// cachedSearchIssues pays the budget tracker and the 15-minute search
|
|
354
|
+
// cache; this was previously the only Search API call in the pipeline
|
|
355
|
+
// outside that infrastructure (#121). Runtime items keep the rich
|
|
356
|
+
// fields (milestone, reactions, ...) the narrow cache type omits.
|
|
357
|
+
const merged = [];
|
|
358
|
+
const seenUrls = new Set();
|
|
359
|
+
for (const query of queries) {
|
|
360
|
+
const data = await cachedSearchIssues(opts.octokit, {
|
|
361
|
+
q: query,
|
|
362
|
+
sort: "interactions",
|
|
363
|
+
order: "desc",
|
|
364
|
+
per_page: maxToVet,
|
|
365
|
+
});
|
|
366
|
+
for (const raw of data.items) {
|
|
367
|
+
if (seenUrls.has(raw.html_url))
|
|
368
|
+
continue;
|
|
369
|
+
seenUrls.add(raw.html_url);
|
|
370
|
+
merged.push(raw);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
items = merged
|
|
374
|
+
.filter((it) => !it.pull_request && !it.assignee && isFeatureIssue(it))
|
|
375
|
+
.slice(0, maxToVet);
|
|
328
376
|
}
|
|
329
377
|
catch (err) {
|
|
330
|
-
|
|
331
|
-
throw err;
|
|
378
|
+
rethrowIfFatal(err);
|
|
332
379
|
warn(MODULE, `broad feature search failed: ${errorMessage(err)}`);
|
|
333
380
|
return {
|
|
334
381
|
quickWins: [],
|
|
@@ -339,35 +386,11 @@ export async function discoverFeaturesBroad(opts) {
|
|
|
339
386
|
}
|
|
340
387
|
const candidates = [];
|
|
341
388
|
for (const item of items) {
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
const
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
? detectWontfixNoContributor({ labels, createdAt: item.created_at })
|
|
348
|
-
: false;
|
|
349
|
-
let candidate;
|
|
350
|
-
try {
|
|
351
|
-
candidate = await opts.vetter.vetIssue(item.html_url, {
|
|
352
|
-
featureSignals: {
|
|
353
|
-
reactions,
|
|
354
|
-
comments,
|
|
355
|
-
hasMilestone,
|
|
356
|
-
wontfixNoContributor,
|
|
357
|
-
// Roadmap scraping is per-repo and would require an extra fetch
|
|
358
|
-
// per unique repo in the broad result set — deliberately skipped
|
|
359
|
-
// here to keep the broad path cheap. Anchor mode keeps the bonus.
|
|
360
|
-
},
|
|
361
|
-
});
|
|
362
|
-
}
|
|
363
|
-
catch (err) {
|
|
364
|
-
if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
|
|
365
|
-
throw err;
|
|
366
|
-
warn(MODULE, `vet failed for ${item.html_url}: ${errorMessage(err)}`);
|
|
367
|
-
continue;
|
|
368
|
-
}
|
|
369
|
-
const horizon = classifyHorizon({ hasMilestone, labels });
|
|
370
|
-
candidates.push({ ...candidate, horizon });
|
|
389
|
+
// No roadmapRefs: roadmap scraping is per-repo and deliberately skipped
|
|
390
|
+
// on the broad path to keep it cheap. Anchor mode keeps the bonus.
|
|
391
|
+
const candidate = await vetAndClassify(item, opts.vetter);
|
|
392
|
+
if (candidate)
|
|
393
|
+
candidates.push(candidate);
|
|
371
394
|
}
|
|
372
395
|
const passing = candidates.filter((c) => c.viabilityScore >= MIN_VIABILITY_SCORE);
|
|
373
396
|
const split = splitByHorizon(passing, opts.count, opts.splitRatio);
|
|
@@ -72,28 +72,29 @@ export declare class GistStateStore {
|
|
|
72
72
|
* Push state to the gist. Also writes to local cache as fallback.
|
|
73
73
|
*/
|
|
74
74
|
push(state: ScoutState): Promise<boolean>;
|
|
75
|
-
/**
|
|
76
|
-
* Pull state from the gist and merge with local state.
|
|
77
|
-
*/
|
|
78
|
-
pull(): Promise<ScoutState | null>;
|
|
79
75
|
/** Get the current gist ID (if known). */
|
|
80
76
|
getGistId(): string | null;
|
|
81
77
|
private bootstrapFromApi;
|
|
82
78
|
private bootstrapFromCache;
|
|
83
79
|
private fetchGistState;
|
|
80
|
+
/**
|
|
81
|
+
* Scan the user's gists for the state gist. `exhaustive: false` means the
|
|
82
|
+
* page cap was hit while pages were still full, so the account may hold
|
|
83
|
+
* the state gist beyond the scan window.
|
|
84
|
+
*/
|
|
84
85
|
private searchForGist;
|
|
85
86
|
private createGist;
|
|
86
87
|
private readCachedGistId;
|
|
87
88
|
private saveGistId;
|
|
89
|
+
/**
|
|
90
|
+
* Merge the local state-cache into a freshly fetched gist state before it
|
|
91
|
+
* overwrites the cache (#117). Without this, a prior failed push left its
|
|
92
|
+
* only copy of the user's changes in state-cache.json, which the next
|
|
93
|
+
* successful bootstrap silently clobbered. Tombstone-aware mergeStates
|
|
94
|
+
* keeps both sides' real changes.
|
|
95
|
+
*/
|
|
96
|
+
private mergeCacheInto;
|
|
88
97
|
private readCache;
|
|
89
98
|
private writeCache;
|
|
90
99
|
}
|
|
91
|
-
/**
|
|
92
|
-
* Merge two ScoutState objects with conflict resolution:
|
|
93
|
-
* - repoScores: per-repo, keep the one with more total PR activity
|
|
94
|
-
* - mergedPRs/closedPRs/openPRs: union by URL
|
|
95
|
-
* - preferences: remote wins
|
|
96
|
-
* - starredRepos: keep the list with the fresher timestamp
|
|
97
|
-
* - savedResults: union by issueUrl, keep newer lastSeenAt
|
|
98
|
-
*/
|
|
99
100
|
export declare function mergeStates(local: ScoutState, remote: ScoutState): ScoutState;
|
|
@@ -6,28 +6,23 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import * as fs from "fs";
|
|
8
8
|
import * as path from "path";
|
|
9
|
-
import { ScoutStateSchema } from "./schemas.js";
|
|
9
|
+
import { ScoutStateSchema, parseScoutState } from "./schemas.js";
|
|
10
10
|
import { getDataDir } from "./utils.js";
|
|
11
11
|
import { debug, warn } from "./logger.js";
|
|
12
|
-
import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
|
|
12
|
+
import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
|
|
13
13
|
const MODULE = "gist-state";
|
|
14
14
|
const GIST_DESCRIPTION = "oss-scout-state";
|
|
15
15
|
const GIST_FILENAME = "state.json";
|
|
16
16
|
const GIST_ID_FILE = "gist-id";
|
|
17
17
|
const CACHE_FILE = "state-cache.json";
|
|
18
|
-
const SEARCH_MAX_PAGES =
|
|
18
|
+
const SEARCH_MAX_PAGES = 10;
|
|
19
19
|
/** Classify an unknown error into a DegradedReason for user-facing messaging. */
|
|
20
20
|
function classifyDegradedReason(err) {
|
|
21
|
+
// isRateLimitError covers 429, 403 + "rate limit" (including secondary),
|
|
22
|
+
// and 403 + "abuse detection" (#138) — single source of truth.
|
|
21
23
|
if (isRateLimitError(err))
|
|
22
24
|
return "rate_limit";
|
|
23
25
|
const status = getHttpStatusCode(err);
|
|
24
|
-
// GitHub's abuse-detection responses arrive as 403 with "abuse detection"
|
|
25
|
-
// in the message but no "rate limit" substring — match resolveErrorCode's
|
|
26
|
-
// logic in errors.ts so we don't misclassify as 'unknown'.
|
|
27
|
-
if (status === 403 &&
|
|
28
|
-
errorMessage(err).toLowerCase().includes("abuse detection")) {
|
|
29
|
-
return "rate_limit";
|
|
30
|
-
}
|
|
31
26
|
if (status !== undefined && status >= 500 && status < 600)
|
|
32
27
|
return "server";
|
|
33
28
|
if (err && typeof err === "object" && "code" in err) {
|
|
@@ -81,12 +76,29 @@ export class GistStateStore {
|
|
|
81
76
|
* Push state to the gist. Also writes to local cache as fallback.
|
|
82
77
|
*/
|
|
83
78
|
async push(state) {
|
|
84
|
-
this.writeCache(state);
|
|
85
79
|
if (!this.gistId) {
|
|
86
80
|
warn(MODULE, "No gist ID — cannot push");
|
|
81
|
+
this.writeCache(state);
|
|
87
82
|
return false;
|
|
88
83
|
}
|
|
89
|
-
|
|
84
|
+
// Fetch the current gist and merge before writing, so a concurrent push
|
|
85
|
+
// from another machine is not blindly clobbered (#117). The deletion
|
|
86
|
+
// tombstones in mergeStates keep removals from resurfacing. A fetch
|
|
87
|
+
// failure (not auth/rate-limit, which propagate) degrades to writing the
|
|
88
|
+
// local snapshot, the prior best-effort behavior.
|
|
89
|
+
let toWrite = state;
|
|
90
|
+
try {
|
|
91
|
+
const remote = await this.fetchGistState(this.gistId);
|
|
92
|
+
if (remote) {
|
|
93
|
+
toWrite = mergeStates(state, remote);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
rethrowIfFatal(err);
|
|
98
|
+
warn(MODULE, `Could not fetch gist before push, writing local snapshot: ${errorMessage(err)}`);
|
|
99
|
+
}
|
|
100
|
+
this.writeCache(toWrite);
|
|
101
|
+
const json = JSON.stringify(toWrite, null, 2);
|
|
90
102
|
if (json.length > 900000) {
|
|
91
103
|
warn(MODULE, `State too large for gist (${Math.round(json.length / 1024)}KB). Consider clearing old results with 'oss-scout results clear'.`);
|
|
92
104
|
return false;
|
|
@@ -105,32 +117,11 @@ export class GistStateStore {
|
|
|
105
117
|
// Both auth and rate-limit propagate per documented strategy.
|
|
106
118
|
// Local cache write already happened above, so the user's work isn't
|
|
107
119
|
// lost — but they need clear feedback that the sync failed.
|
|
108
|
-
|
|
109
|
-
throw err;
|
|
120
|
+
rethrowIfFatal(err);
|
|
110
121
|
warn(MODULE, `Failed to push: ${errorMessage(err)}`);
|
|
111
122
|
return false;
|
|
112
123
|
}
|
|
113
124
|
}
|
|
114
|
-
/**
|
|
115
|
-
* Pull state from the gist and merge with local state.
|
|
116
|
-
*/
|
|
117
|
-
async pull() {
|
|
118
|
-
if (!this.gistId)
|
|
119
|
-
return null;
|
|
120
|
-
try {
|
|
121
|
-
const state = await this.fetchGistState(this.gistId);
|
|
122
|
-
if (state) {
|
|
123
|
-
this.writeCache(state);
|
|
124
|
-
}
|
|
125
|
-
return state;
|
|
126
|
-
}
|
|
127
|
-
catch (err) {
|
|
128
|
-
if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
|
|
129
|
-
throw err;
|
|
130
|
-
warn(MODULE, `Failed to pull: ${errorMessage(err)}`);
|
|
131
|
-
return null;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
125
|
/** Get the current gist ID (if known). */
|
|
135
126
|
getGistId() {
|
|
136
127
|
return this.gistId;
|
|
@@ -142,9 +133,10 @@ export class GistStateStore {
|
|
|
142
133
|
if (cachedId) {
|
|
143
134
|
debug(MODULE, `Trying cached gist ID: ${cachedId}`);
|
|
144
135
|
try {
|
|
145
|
-
const
|
|
146
|
-
if (
|
|
136
|
+
const fetched = await this.fetchGistState(cachedId);
|
|
137
|
+
if (fetched) {
|
|
147
138
|
this.gistId = cachedId;
|
|
139
|
+
const state = this.mergeCacheInto(fetched);
|
|
148
140
|
this.writeCache(state);
|
|
149
141
|
return { gistId: cachedId, state, created: false };
|
|
150
142
|
}
|
|
@@ -162,21 +154,28 @@ export class GistStateStore {
|
|
|
162
154
|
debug(MODULE, "Cached gist ID invalid, searching...");
|
|
163
155
|
}
|
|
164
156
|
// 2. Search user's gists
|
|
165
|
-
const
|
|
166
|
-
if (
|
|
167
|
-
debug(MODULE, `Found gist via search: ${
|
|
168
|
-
this.saveGistId(
|
|
169
|
-
this.gistId =
|
|
170
|
-
const
|
|
171
|
-
if (
|
|
157
|
+
const search = await this.searchForGist();
|
|
158
|
+
if (search.id) {
|
|
159
|
+
debug(MODULE, `Found gist via search: ${search.id}`);
|
|
160
|
+
this.saveGistId(search.id);
|
|
161
|
+
this.gistId = search.id;
|
|
162
|
+
const fetched = await this.fetchGistState(search.id);
|
|
163
|
+
if (fetched) {
|
|
164
|
+
const state = this.mergeCacheInto(fetched);
|
|
172
165
|
this.writeCache(state);
|
|
173
|
-
return { gistId:
|
|
166
|
+
return { gistId: search.id, state, created: false };
|
|
174
167
|
}
|
|
175
168
|
// Gist exists but content failed validation — fall back to cache
|
|
176
169
|
// to avoid overwriting the user's data by creating a new gist.
|
|
177
|
-
warn(MODULE, `Found existing gist ${
|
|
170
|
+
warn(MODULE, `Found existing gist ${search.id} but content failed validation. Using local cache to avoid data loss.`);
|
|
178
171
|
return this.bootstrapFromCache("unknown");
|
|
179
172
|
}
|
|
173
|
+
if (!search.exhaustive) {
|
|
174
|
+
// The account has more gists than we scanned; an existing state gist
|
|
175
|
+
// may sit beyond the scan window, and creating a new one would fork
|
|
176
|
+
// state across machines.
|
|
177
|
+
warn(MODULE, `Scanned the first ${SEARCH_MAX_PAGES * 100} gists without finding an oss-scout state gist, but the account has more. Creating a new state gist; if one already exists, copy its id into the gist-id file in the oss-scout data directory to avoid a duplicate.`);
|
|
178
|
+
}
|
|
180
179
|
// 3. Create new gist
|
|
181
180
|
debug(MODULE, "No existing gist found, creating new one");
|
|
182
181
|
const freshState = ScoutStateSchema.parse({ version: 1 });
|
|
@@ -219,13 +218,18 @@ export class GistStateStore {
|
|
|
219
218
|
return null;
|
|
220
219
|
try {
|
|
221
220
|
const parsed = JSON.parse(file.content);
|
|
222
|
-
return
|
|
221
|
+
return parseScoutState(parsed);
|
|
223
222
|
}
|
|
224
223
|
catch (err) {
|
|
225
224
|
warn(MODULE, `Gist content failed validation: ${errorMessage(err)}`);
|
|
226
225
|
return null;
|
|
227
226
|
}
|
|
228
227
|
}
|
|
228
|
+
/**
|
|
229
|
+
* Scan the user's gists for the state gist. `exhaustive: false` means the
|
|
230
|
+
* page cap was hit while pages were still full, so the account may hold
|
|
231
|
+
* the state gist beyond the scan window.
|
|
232
|
+
*/
|
|
229
233
|
async searchForGist() {
|
|
230
234
|
for (let page = 1; page <= SEARCH_MAX_PAGES; page++) {
|
|
231
235
|
const { data: gists } = await this.octokit.gists.list({
|
|
@@ -233,12 +237,15 @@ export class GistStateStore {
|
|
|
233
237
|
page,
|
|
234
238
|
});
|
|
235
239
|
if (gists.length === 0)
|
|
236
|
-
|
|
240
|
+
return { id: null, exhaustive: true };
|
|
237
241
|
const match = gists.find((g) => g.description === GIST_DESCRIPTION);
|
|
238
242
|
if (match)
|
|
239
|
-
return match.id;
|
|
243
|
+
return { id: match.id, exhaustive: true };
|
|
244
|
+
// A short page means we have seen every gist
|
|
245
|
+
if (gists.length < 100)
|
|
246
|
+
return { id: null, exhaustive: true };
|
|
240
247
|
}
|
|
241
|
-
return null;
|
|
248
|
+
return { id: null, exhaustive: false };
|
|
242
249
|
}
|
|
243
250
|
async createGist(state) {
|
|
244
251
|
const { data } = await this.octokit.gists.create({
|
|
@@ -267,10 +274,21 @@ export class GistStateStore {
|
|
|
267
274
|
saveGistId(id) {
|
|
268
275
|
fs.writeFileSync(getGistIdPath(), id + "\n", { mode: 0o600 });
|
|
269
276
|
}
|
|
277
|
+
/**
|
|
278
|
+
* Merge the local state-cache into a freshly fetched gist state before it
|
|
279
|
+
* overwrites the cache (#117). Without this, a prior failed push left its
|
|
280
|
+
* only copy of the user's changes in state-cache.json, which the next
|
|
281
|
+
* successful bootstrap silently clobbered. Tombstone-aware mergeStates
|
|
282
|
+
* keeps both sides' real changes.
|
|
283
|
+
*/
|
|
284
|
+
mergeCacheInto(fetched) {
|
|
285
|
+
const cached = this.readCache();
|
|
286
|
+
return cached ? mergeStates(cached, fetched) : fetched;
|
|
287
|
+
}
|
|
270
288
|
readCache() {
|
|
271
289
|
try {
|
|
272
290
|
const raw = fs.readFileSync(getCachePath(), "utf-8");
|
|
273
|
-
return
|
|
291
|
+
return parseScoutState(JSON.parse(raw));
|
|
274
292
|
}
|
|
275
293
|
catch (err) {
|
|
276
294
|
const code = err?.code;
|
|
@@ -299,19 +317,76 @@ export class GistStateStore {
|
|
|
299
317
|
* - preferences: remote wins
|
|
300
318
|
* - starredRepos: keep the list with the fresher timestamp
|
|
301
319
|
* - savedResults: union by issueUrl, keep newer lastSeenAt
|
|
320
|
+
* - unknown top-level keys (from a newer binary, #137): carried over via
|
|
321
|
+
* spreads, remote wins on conflicts to mirror the preferences rule
|
|
302
322
|
*/
|
|
323
|
+
/** Retain tombstones this long so a slow-to-sync machine still honors them. */
|
|
324
|
+
const TOMBSTONE_TTL_MS = 90 * 24 * 60 * 60 * 1000;
|
|
325
|
+
/**
|
|
326
|
+
* Merge tombstones from both sides (newest removedAt per URL wins) and drop
|
|
327
|
+
* any older than the TTL so the list cannot grow without bound (#117).
|
|
328
|
+
*/
|
|
329
|
+
function mergeTombstones(local, remote) {
|
|
330
|
+
const cutoff = Date.now() - TOMBSTONE_TTL_MS;
|
|
331
|
+
const byUrl = new Map();
|
|
332
|
+
for (const t of [...local, ...remote]) {
|
|
333
|
+
const existing = byUrl.get(t.url);
|
|
334
|
+
if (!existing || t.removedAt > existing.removedAt)
|
|
335
|
+
byUrl.set(t.url, t);
|
|
336
|
+
}
|
|
337
|
+
return [...byUrl.values()].filter((t) => {
|
|
338
|
+
const ts = new Date(t.removedAt).getTime();
|
|
339
|
+
return !Number.isFinite(ts) || ts >= cutoff;
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* Drop merged items that a tombstone deleted, unless the item was re-added
|
|
344
|
+
* after the deletion (item timestamp newer than the tombstone) (#117).
|
|
345
|
+
*/
|
|
346
|
+
function applyTombstones(items, tombstones, urlOf, touchedAtOf) {
|
|
347
|
+
if (tombstones.length === 0)
|
|
348
|
+
return items;
|
|
349
|
+
const byUrl = new Map(tombstones.map((t) => [t.url, t.removedAt]));
|
|
350
|
+
return items.filter((item) => {
|
|
351
|
+
const removedAt = byUrl.get(urlOf(item));
|
|
352
|
+
if (removedAt === undefined)
|
|
353
|
+
return true;
|
|
354
|
+
// Keep only if re-added strictly after the deletion
|
|
355
|
+
return touchedAtOf(item) > removedAt;
|
|
356
|
+
});
|
|
357
|
+
}
|
|
303
358
|
export function mergeStates(local, remote) {
|
|
359
|
+
const tombstones = mergeTombstones(local.tombstones ?? [], remote.tombstones ?? []);
|
|
360
|
+
const savedResults = applyTombstones(mergeSavedResults(local.savedResults ?? [], remote.savedResults ?? []), tombstones, (r) => r.issueUrl, (r) => r.lastSeenAt);
|
|
361
|
+
const skippedIssues = applyTombstones(mergeSkippedIssues(local.skippedIssues ?? [], remote.skippedIssues ?? []), tombstones, (s) => s.url, (s) => s.skippedAt);
|
|
362
|
+
// A skipped URL must never linger in saved results (skipIssue's own
|
|
363
|
+
// invariant). Enforce it after the merge so a remote copy can't resurrect
|
|
364
|
+
// a now-skipped result into the saved list (#117).
|
|
365
|
+
const skippedUrls = new Set(skippedIssues.map((s) => s.url));
|
|
366
|
+
const savedResultsFinal = savedResults.filter((r) => !skippedUrls.has(r.issueUrl));
|
|
367
|
+
// Preferences: keep the side with the fresher preferencesUpdatedAt instead
|
|
368
|
+
// of always taking remote, which silently reverted a local edit (#117).
|
|
369
|
+
const localPrefsTs = local.preferencesUpdatedAt;
|
|
370
|
+
const remotePrefsTs = remote.preferencesUpdatedAt;
|
|
371
|
+
const localPrefsWin = localPrefsTs !== undefined &&
|
|
372
|
+
(remotePrefsTs === undefined || localPrefsTs > remotePrefsTs);
|
|
373
|
+
const preferences = localPrefsWin ? local.preferences : remote.preferences;
|
|
374
|
+
const preferencesUpdatedAt = pickFresherTimestamp(localPrefsTs, remotePrefsTs);
|
|
304
375
|
return {
|
|
376
|
+
...local,
|
|
377
|
+
...remote,
|
|
305
378
|
version: 1,
|
|
306
|
-
preferences
|
|
379
|
+
preferences,
|
|
380
|
+
preferencesUpdatedAt,
|
|
381
|
+
tombstones,
|
|
307
382
|
repoScores: mergeRepoScores(local.repoScores, remote.repoScores),
|
|
308
383
|
starredRepos: mergeStarredRepos(local, remote),
|
|
309
384
|
starredReposLastFetched: pickFresherTimestamp(local.starredReposLastFetched, remote.starredReposLastFetched),
|
|
310
385
|
mergedPRs: unionByUrl(local.mergedPRs, remote.mergedPRs),
|
|
311
386
|
closedPRs: unionByUrl(local.closedPRs, remote.closedPRs),
|
|
312
387
|
openPRs: unionByUrl(local.openPRs ?? [], remote.openPRs ?? []),
|
|
313
|
-
savedResults:
|
|
314
|
-
skippedIssues
|
|
388
|
+
savedResults: savedResultsFinal,
|
|
389
|
+
skippedIssues,
|
|
315
390
|
lastSearchAt: pickFresherTimestamp(local.lastSearchAt, remote.lastSearchAt),
|
|
316
391
|
lastRunAt: pickFresherTimestamp(local.lastRunAt, remote.lastRunAt) ??
|
|
317
392
|
new Date().toISOString(),
|
|
@@ -9,8 +9,25 @@
|
|
|
9
9
|
* for the same endpoint (e.g., star counts for two PRs in the same repo)
|
|
10
10
|
* share a single HTTP round-trip.
|
|
11
11
|
*/
|
|
12
|
+
/**
|
|
13
|
+
* Schema version for cache entries whose body is an oss-scout-defined shape
|
|
14
|
+
* (vetting results, search payloads, policy scans, merged-PR counts) rather
|
|
15
|
+
* than a raw GitHub API response. These are deserialized with an unchecked
|
|
16
|
+
* cast, so a shape change between releases would otherwise let a new build read
|
|
17
|
+
* a stale-shaped entry. Bump this whenever one of those cached shapes changes:
|
|
18
|
+
* old entries then miss the version-prefixed key and are refetched instead of
|
|
19
|
+
* misread (#158). Raw ETag-keyed GitHub responses are not versioned — their
|
|
20
|
+
* shape is owned by GitHub, not us.
|
|
21
|
+
*/
|
|
22
|
+
export declare const CACHE_SCHEMA_VERSION = "v1";
|
|
23
|
+
/**
|
|
24
|
+
* Prefix a synthetic (non-URL) cache key with the schema version so a shape
|
|
25
|
+
* change invalidates old entries. Use for every key whose body is read back
|
|
26
|
+
* with an unchecked cast.
|
|
27
|
+
*/
|
|
28
|
+
export declare function versionedCacheKey(key: string): string;
|
|
12
29
|
/** Shape of a single cache entry on disk. */
|
|
13
|
-
interface CacheEntry {
|
|
30
|
+
export interface CacheEntry {
|
|
14
31
|
etag: string;
|
|
15
32
|
url: string;
|
|
16
33
|
body: unknown;
|
|
@@ -38,6 +55,12 @@ export declare class HttpCache {
|
|
|
38
55
|
* (e.g., caching aggregated results from paginated API calls).
|
|
39
56
|
*/
|
|
40
57
|
getIfFresh(key: string, maxAgeMs: number): unknown | null;
|
|
58
|
+
/**
|
|
59
|
+
* Like {@link getIfFresh}, but returns the whole entry so callers can
|
|
60
|
+
* distinguish "no fresh entry" (null) from a legitimately cached falsy
|
|
61
|
+
* body (`0`, `""`, `false`, `null`).
|
|
62
|
+
*/
|
|
63
|
+
getEntryIfFresh(key: string, maxAgeMs: number): CacheEntry | null;
|
|
41
64
|
/**
|
|
42
65
|
* Look up a cached response. Returns `null` if no cache entry exists.
|
|
43
66
|
*/
|
|
@@ -91,6 +114,14 @@ export declare function getHttpCache(): HttpCache;
|
|
|
91
114
|
* cached body without consuming a rate-limit point.
|
|
92
115
|
* 3. On a fresh 200, caches the ETag + body for next time.
|
|
93
116
|
*/
|
|
117
|
+
/**
|
|
118
|
+
* Share one in-flight computation per key: concurrent callers for the same
|
|
119
|
+
* key await the same promise instead of paying duplicate API calls (#124).
|
|
120
|
+
* The check-then-register pair runs without an intervening await, so two
|
|
121
|
+
* concurrent callers cannot both miss. Rejections propagate to every waiter
|
|
122
|
+
* and are never cached.
|
|
123
|
+
*/
|
|
124
|
+
export declare function withInflightDedup<T>(cache: HttpCache, key: string, fn: () => Promise<T>): Promise<T>;
|
|
94
125
|
export declare function cachedRequest<T>(cache: HttpCache, url: string, fetcher: (headers: Record<string, string>) => Promise<{
|
|
95
126
|
data: T;
|
|
96
127
|
headers?: Record<string, string>;
|
|
@@ -105,4 +136,3 @@ export declare function cachedRequest<T>(cache: HttpCache, url: string, fetcher:
|
|
|
105
136
|
* (e.g. search queries, project health checks).
|
|
106
137
|
*/
|
|
107
138
|
export declare function cachedTimeBased<T>(cache: HttpCache, key: string, maxAgeMs: number, fetcher: () => Promise<T>): Promise<T>;
|
|
108
|
-
export {};
|