@oss-scout/core 0.11.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/cli.bundle.cjs +89 -66
  2. package/dist/cli.js +302 -436
  3. package/dist/commands/command-scout.d.ts +21 -0
  4. package/dist/commands/command-scout.js +21 -0
  5. package/dist/commands/config.js +10 -128
  6. package/dist/commands/features.js +15 -28
  7. package/dist/commands/results.d.ts +13 -2
  8. package/dist/commands/results.js +29 -2
  9. package/dist/commands/search.d.ts +4 -0
  10. package/dist/commands/search.js +65 -70
  11. package/dist/commands/setup.d.ts +2 -0
  12. package/dist/commands/setup.js +35 -6
  13. package/dist/commands/skip.d.ts +4 -0
  14. package/dist/commands/skip.js +45 -55
  15. package/dist/commands/sync.d.ts +10 -0
  16. package/dist/commands/sync.js +10 -0
  17. package/dist/commands/vet-list.js +3 -19
  18. package/dist/commands/vet.js +18 -25
  19. package/dist/commands/with-scout.d.ts +32 -0
  20. package/dist/commands/with-scout.js +41 -0
  21. package/dist/core/anti-llm-policy.js +5 -33
  22. package/dist/core/bootstrap.d.ts +2 -2
  23. package/dist/core/bootstrap.js +5 -9
  24. package/dist/core/errors.d.ts +10 -0
  25. package/dist/core/errors.js +20 -5
  26. package/dist/core/feature-discovery.d.ts +13 -1
  27. package/dist/core/feature-discovery.js +104 -81
  28. package/dist/core/gist-state-store.d.ts +13 -12
  29. package/dist/core/gist-state-store.js +128 -53
  30. package/dist/core/http-cache.d.ts +32 -2
  31. package/dist/core/http-cache.js +74 -19
  32. package/dist/core/issue-discovery.d.ts +12 -1
  33. package/dist/core/issue-discovery.js +94 -67
  34. package/dist/core/issue-eligibility.d.ts +11 -4
  35. package/dist/core/issue-eligibility.js +124 -69
  36. package/dist/core/issue-graphql.d.ts +58 -0
  37. package/dist/core/issue-graphql.js +108 -0
  38. package/dist/core/issue-vetting.d.ts +115 -9
  39. package/dist/core/issue-vetting.js +246 -109
  40. package/dist/core/local-state.d.ts +6 -2
  41. package/dist/core/local-state.js +23 -5
  42. package/dist/core/logger.d.ts +12 -4
  43. package/dist/core/logger.js +33 -7
  44. package/dist/core/personalization.d.ts +30 -10
  45. package/dist/core/personalization.js +64 -24
  46. package/dist/core/preference-fields.d.ts +47 -0
  47. package/dist/core/preference-fields.js +180 -0
  48. package/dist/core/probe-repo-file.d.ts +47 -0
  49. package/dist/core/probe-repo-file.js +57 -0
  50. package/dist/core/repo-health.js +40 -32
  51. package/dist/core/roadmap.js +26 -22
  52. package/dist/core/schemas.d.ts +148 -26
  53. package/dist/core/schemas.js +83 -17
  54. package/dist/core/search-budget.d.ts +9 -0
  55. package/dist/core/search-budget.js +36 -3
  56. package/dist/core/search-phases.d.ts +4 -21
  57. package/dist/core/search-phases.js +37 -89
  58. package/dist/core/types.d.ts +151 -38
  59. package/dist/core/utils.js +60 -26
  60. package/dist/formatters/human.d.ts +60 -0
  61. package/dist/formatters/human.js +199 -0
  62. package/dist/formatters/markdown.d.ts +10 -0
  63. package/dist/formatters/markdown.js +31 -0
  64. package/dist/index.d.ts +6 -2
  65. package/dist/index.js +8 -0
  66. package/dist/scout.d.ts +75 -12
  67. package/dist/scout.js +265 -26
  68. package/package.json +1 -1
@@ -11,10 +11,11 @@
11
11
  *
12
12
  * No state singletons — anchor repos are resolved from RepoScore[] passed in.
13
13
  */
14
- import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
14
+ import { errorMessage, rethrowIfFatal } from "./errors.js";
15
15
  import { warn } from "./logger.js";
16
16
  import { sleep } from "./utils.js";
17
17
  import { fetchRoadmapIssueRefs } from "./roadmap.js";
18
+ import { cachedSearchIssues } from "./search-phases.js";
18
19
  const MODULE = "feature-discovery";
19
20
  /** Delay between per-repo issue lists, mirroring search-phases.INTER_QUERY_DELAY_MS. */
20
21
  const INTER_REPO_DELAY_MS = 2000;
@@ -164,6 +165,53 @@ function isFeatureIssue(item) {
164
165
  return false;
165
166
  return labels.some((l) => FEATURE_LABELS.includes(l));
166
167
  }
168
+ /**
169
+ * Extract feature signals from a raw issue, vet it, and classify its horizon.
170
+ * Shared by the anchor (discoverFeatures) and broad (discoverFeaturesBroad)
171
+ * paths (#157). Returns null when vetting fails for this item so the caller
172
+ * can skip it; fatal errors (auth/rate-limit) propagate.
173
+ *
174
+ * `roadmapRefs` is only supplied by the anchor path: when present, the roadmap
175
+ * signal is threaded into both the vet call and the horizon classifier exactly
176
+ * as before; the broad path omits it (roadmap scraping is per-repo and kept out
177
+ * of the cheap broad search).
178
+ */
179
+ async function vetAndClassify(item, vetter, roadmapRefs) {
180
+ const labels = extractLabels(item);
181
+ const hasMilestone = !!item.milestone;
182
+ const reactions = item.reactions?.total_count ?? 0;
183
+ const comments = item.comments ?? 0;
184
+ const wontfixNoContributor = item.created_at
185
+ ? detectWontfixNoContributor({ labels, createdAt: item.created_at })
186
+ : false;
187
+ const useRoadmap = roadmapRefs !== undefined;
188
+ const onRoadmap = useRoadmap &&
189
+ typeof item.number === "number" &&
190
+ roadmapRefs.has(item.number);
191
+ let candidate;
192
+ try {
193
+ candidate = await vetter.vetIssue(item.html_url, {
194
+ featureSignals: {
195
+ reactions,
196
+ comments,
197
+ hasMilestone,
198
+ wontfixNoContributor,
199
+ ...(useRoadmap ? { onRoadmap } : {}),
200
+ },
201
+ });
202
+ }
203
+ catch (err) {
204
+ rethrowIfFatal(err);
205
+ warn(MODULE, `vet failed for ${item.html_url}: ${errorMessage(err)}`);
206
+ return null;
207
+ }
208
+ const horizon = classifyHorizon({
209
+ hasMilestone,
210
+ labels,
211
+ ...(useRoadmap ? { isOnRoadmap: onRoadmap } : {}),
212
+ });
213
+ return { ...candidate, horizon };
214
+ }
167
215
  /**
168
216
  * Orchestrate `scout features`: anchor resolution → per-repo issue listing
169
217
  * → feature-signal extraction → vetting → horizon classification → bucket split.
@@ -211,45 +259,15 @@ export async function discoverFeatures(opts) {
211
259
  roadmapRefs = refs;
212
260
  }
213
261
  catch (err) {
214
- if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
215
- throw err;
262
+ rethrowIfFatal(err);
216
263
  warn(MODULE, `failed to list issues for ${anchorRepos[i]}: ${errorMessage(err)}`);
217
264
  continue;
218
265
  }
219
266
  const items = response.data.filter((it) => !it.pull_request && !it.assignee && isFeatureIssue(it));
220
267
  for (const item of items) {
221
- const labels = extractLabels(item);
222
- const hasMilestone = !!item.milestone;
223
- const reactions = item.reactions?.total_count ?? 0;
224
- const comments = item.comments ?? 0;
225
- const wontfixNoContributor = item.created_at
226
- ? detectWontfixNoContributor({ labels, createdAt: item.created_at })
227
- : false;
228
- const onRoadmap = typeof item.number === "number" && roadmapRefs.has(item.number);
229
- let candidate;
230
- try {
231
- candidate = await opts.vetter.vetIssue(item.html_url, {
232
- featureSignals: {
233
- reactions,
234
- comments,
235
- hasMilestone,
236
- wontfixNoContributor,
237
- onRoadmap,
238
- },
239
- });
240
- }
241
- catch (err) {
242
- if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
243
- throw err;
244
- warn(MODULE, `vet failed for ${item.html_url}: ${errorMessage(err)}`);
245
- continue;
246
- }
247
- const horizon = classifyHorizon({
248
- hasMilestone,
249
- labels,
250
- isOnRoadmap: onRoadmap,
251
- });
252
- candidates.push({ ...candidate, horizon });
268
+ const candidate = await vetAndClassify(item, opts.vetter, roadmapRefs);
269
+ if (candidate)
270
+ candidates.push(candidate);
253
271
  }
254
272
  }
255
273
  // Drop low-viability results — same threshold as scout search.
@@ -273,19 +291,16 @@ const DEFAULT_BROAD_MAX_TO_VET = 30;
273
291
  */
274
292
  export function buildBroadFeatureSearchQuery(opts) {
275
293
  const parts = ["is:issue", "is:open", "no:assignee"];
276
- // Feature labels — any-of via parenthesized OR.
294
+ // Feature labels — any-of via parenthesized OR. The six labels spend
295
+ // exactly five OR operators, GitHub's entire per-query allowance.
277
296
  const labelClause = FEATURE_LABELS.map((l) => `label:"${l}"`).join(" OR ");
278
297
  parts.push(`(${labelClause})`);
279
298
  // Exclude labels that overlap with `scout` territory.
280
299
  for (const excl of FEATURE_EXCLUSION_LABELS) {
281
300
  parts.push(`-label:"${excl}"`);
282
301
  }
283
- // Languages — skip the filter when "any" is the only preference, since
284
- // GitHub Search has no `language:any` operator.
285
- const languages = (opts.languages ?? []).filter((l) => l && l.toLowerCase() !== "any");
286
- if (languages.length > 0) {
287
- const langClause = languages.map((l) => `language:${l}`).join(" OR ");
288
- parts.push(`(${langClause})`);
302
+ if (opts.language) {
303
+ parts.push(`language:${opts.language}`);
289
304
  }
290
305
  // User exclusions.
291
306
  for (const repo of opts.excludeRepos ?? []) {
@@ -296,6 +311,23 @@ export function buildBroadFeatureSearchQuery(opts) {
296
311
  }
297
312
  return parts.join(" ");
298
313
  }
314
+ /**
315
+ * One query per language. A combined `(language:a OR language:b)` clause
316
+ * pushed the query past GitHub's 5-operator limit (the label ORs already
317
+ * spend all five), so every 2+ language config drew a 422 that the caller
318
+ * swallowed into "no results" (#121). "any" disables the filter.
319
+ */
320
+ export function buildBroadFeatureSearchQueries(opts) {
321
+ const base = {
322
+ excludeRepos: opts.excludeRepos,
323
+ excludeOrgs: opts.excludeOrgs,
324
+ };
325
+ const languages = (opts.languages ?? []).filter((l) => l && l.toLowerCase() !== "any");
326
+ if (languages.length === 0) {
327
+ return [buildBroadFeatureSearchQuery(base)];
328
+ }
329
+ return languages.map((language) => buildBroadFeatureSearchQuery({ ...base, language }));
330
+ }
299
331
  /**
300
332
  * Orchestrate broad / cross-repo feature discovery (#100). Bypasses anchor
301
333
  * resolution; runs a single GitHub Search API query for feature-labeled
@@ -310,7 +342,7 @@ export function buildBroadFeatureSearchQuery(opts) {
310
342
  * degrade gracefully.
311
343
  */
312
344
  export async function discoverFeaturesBroad(opts) {
313
- const query = buildBroadFeatureSearchQuery({
345
+ const queries = buildBroadFeatureSearchQueries({
314
346
  languages: opts.languages,
315
347
  excludeRepos: opts.excludeRepos,
316
348
  excludeOrgs: opts.excludeOrgs,
@@ -318,17 +350,32 @@ export async function discoverFeaturesBroad(opts) {
318
350
  const maxToVet = opts.maxToVet ?? DEFAULT_BROAD_MAX_TO_VET;
319
351
  let items;
320
352
  try {
321
- const response = await opts.octokit.search.issuesAndPullRequests({
322
- q: query,
323
- sort: "interactions",
324
- order: "desc",
325
- per_page: maxToVet,
326
- });
327
- items = response.data.items.filter((it) => !it.pull_request && !it.assignee && isFeatureIssue(it));
353
+ // cachedSearchIssues pays the budget tracker and the 15-minute search
354
+ // cache; this was previously the only Search API call in the pipeline
355
+ // outside that infrastructure (#121). Runtime items keep the rich
356
+ // fields (milestone, reactions, ...) the narrow cache type omits.
357
+ const merged = [];
358
+ const seenUrls = new Set();
359
+ for (const query of queries) {
360
+ const data = await cachedSearchIssues(opts.octokit, {
361
+ q: query,
362
+ sort: "interactions",
363
+ order: "desc",
364
+ per_page: maxToVet,
365
+ });
366
+ for (const raw of data.items) {
367
+ if (seenUrls.has(raw.html_url))
368
+ continue;
369
+ seenUrls.add(raw.html_url);
370
+ merged.push(raw);
371
+ }
372
+ }
373
+ items = merged
374
+ .filter((it) => !it.pull_request && !it.assignee && isFeatureIssue(it))
375
+ .slice(0, maxToVet);
328
376
  }
329
377
  catch (err) {
330
- if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
331
- throw err;
378
+ rethrowIfFatal(err);
332
379
  warn(MODULE, `broad feature search failed: ${errorMessage(err)}`);
333
380
  return {
334
381
  quickWins: [],
@@ -339,35 +386,11 @@ export async function discoverFeaturesBroad(opts) {
339
386
  }
340
387
  const candidates = [];
341
388
  for (const item of items) {
342
- const labels = extractLabels(item);
343
- const hasMilestone = !!item.milestone;
344
- const reactions = item.reactions?.total_count ?? 0;
345
- const comments = item.comments ?? 0;
346
- const wontfixNoContributor = item.created_at
347
- ? detectWontfixNoContributor({ labels, createdAt: item.created_at })
348
- : false;
349
- let candidate;
350
- try {
351
- candidate = await opts.vetter.vetIssue(item.html_url, {
352
- featureSignals: {
353
- reactions,
354
- comments,
355
- hasMilestone,
356
- wontfixNoContributor,
357
- // Roadmap scraping is per-repo and would require an extra fetch
358
- // per unique repo in the broad result set — deliberately skipped
359
- // here to keep the broad path cheap. Anchor mode keeps the bonus.
360
- },
361
- });
362
- }
363
- catch (err) {
364
- if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
365
- throw err;
366
- warn(MODULE, `vet failed for ${item.html_url}: ${errorMessage(err)}`);
367
- continue;
368
- }
369
- const horizon = classifyHorizon({ hasMilestone, labels });
370
- candidates.push({ ...candidate, horizon });
389
+ // No roadmapRefs: roadmap scraping is per-repo and deliberately skipped
390
+ // on the broad path to keep it cheap. Anchor mode keeps the bonus.
391
+ const candidate = await vetAndClassify(item, opts.vetter);
392
+ if (candidate)
393
+ candidates.push(candidate);
371
394
  }
372
395
  const passing = candidates.filter((c) => c.viabilityScore >= MIN_VIABILITY_SCORE);
373
396
  const split = splitByHorizon(passing, opts.count, opts.splitRatio);
@@ -72,28 +72,29 @@ export declare class GistStateStore {
72
72
  * Push state to the gist. Also writes to local cache as fallback.
73
73
  */
74
74
  push(state: ScoutState): Promise<boolean>;
75
- /**
76
- * Pull state from the gist and merge with local state.
77
- */
78
- pull(): Promise<ScoutState | null>;
79
75
  /** Get the current gist ID (if known). */
80
76
  getGistId(): string | null;
81
77
  private bootstrapFromApi;
82
78
  private bootstrapFromCache;
83
79
  private fetchGistState;
80
+ /**
81
+ * Scan the user's gists for the state gist. `exhaustive: false` means the
82
+ * page cap was hit while pages were still full, so the account may hold
83
+ * the state gist beyond the scan window.
84
+ */
84
85
  private searchForGist;
85
86
  private createGist;
86
87
  private readCachedGistId;
87
88
  private saveGistId;
89
+ /**
90
+ * Merge the local state-cache into a freshly fetched gist state before it
91
+ * overwrites the cache (#117). Without this, a prior failed push left its
92
+ * only copy of the user's changes in state-cache.json, which the next
93
+ * successful bootstrap silently clobbered. Tombstone-aware mergeStates
94
+ * keeps both sides' real changes.
95
+ */
96
+ private mergeCacheInto;
88
97
  private readCache;
89
98
  private writeCache;
90
99
  }
91
- /**
92
- * Merge two ScoutState objects with conflict resolution:
93
- * - repoScores: per-repo, keep the one with more total PR activity
94
- * - mergedPRs/closedPRs/openPRs: union by URL
95
- * - preferences: remote wins
96
- * - starredRepos: keep the list with the fresher timestamp
97
- * - savedResults: union by issueUrl, keep newer lastSeenAt
98
- */
99
100
  export declare function mergeStates(local: ScoutState, remote: ScoutState): ScoutState;
@@ -6,28 +6,23 @@
6
6
  */
7
7
  import * as fs from "fs";
8
8
  import * as path from "path";
9
- import { ScoutStateSchema } from "./schemas.js";
9
+ import { ScoutStateSchema, parseScoutState } from "./schemas.js";
10
10
  import { getDataDir } from "./utils.js";
11
11
  import { debug, warn } from "./logger.js";
12
- import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
12
+ import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
13
13
  const MODULE = "gist-state";
14
14
  const GIST_DESCRIPTION = "oss-scout-state";
15
15
  const GIST_FILENAME = "state.json";
16
16
  const GIST_ID_FILE = "gist-id";
17
17
  const CACHE_FILE = "state-cache.json";
18
- const SEARCH_MAX_PAGES = 5;
18
+ const SEARCH_MAX_PAGES = 10;
19
19
  /** Classify an unknown error into a DegradedReason for user-facing messaging. */
20
20
  function classifyDegradedReason(err) {
21
+ // isRateLimitError covers 429, 403 + "rate limit" (including secondary),
22
+ // and 403 + "abuse detection" (#138) — single source of truth.
21
23
  if (isRateLimitError(err))
22
24
  return "rate_limit";
23
25
  const status = getHttpStatusCode(err);
24
- // GitHub's abuse-detection responses arrive as 403 with "abuse detection"
25
- // in the message but no "rate limit" substring — match resolveErrorCode's
26
- // logic in errors.ts so we don't misclassify as 'unknown'.
27
- if (status === 403 &&
28
- errorMessage(err).toLowerCase().includes("abuse detection")) {
29
- return "rate_limit";
30
- }
31
26
  if (status !== undefined && status >= 500 && status < 600)
32
27
  return "server";
33
28
  if (err && typeof err === "object" && "code" in err) {
@@ -81,12 +76,29 @@ export class GistStateStore {
81
76
  * Push state to the gist. Also writes to local cache as fallback.
82
77
  */
83
78
  async push(state) {
84
- this.writeCache(state);
85
79
  if (!this.gistId) {
86
80
  warn(MODULE, "No gist ID — cannot push");
81
+ this.writeCache(state);
87
82
  return false;
88
83
  }
89
- const json = JSON.stringify(state, null, 2);
84
+ // Fetch the current gist and merge before writing, so a concurrent push
85
+ // from another machine is not blindly clobbered (#117). The deletion
86
+ // tombstones in mergeStates keep removals from resurfacing. A fetch
87
+ // failure (not auth/rate-limit, which propagate) degrades to writing the
88
+ // local snapshot, the prior best-effort behavior.
89
+ let toWrite = state;
90
+ try {
91
+ const remote = await this.fetchGistState(this.gistId);
92
+ if (remote) {
93
+ toWrite = mergeStates(state, remote);
94
+ }
95
+ }
96
+ catch (err) {
97
+ rethrowIfFatal(err);
98
+ warn(MODULE, `Could not fetch gist before push, writing local snapshot: ${errorMessage(err)}`);
99
+ }
100
+ this.writeCache(toWrite);
101
+ const json = JSON.stringify(toWrite, null, 2);
90
102
  if (json.length > 900000) {
91
103
  warn(MODULE, `State too large for gist (${Math.round(json.length / 1024)}KB). Consider clearing old results with 'oss-scout results clear'.`);
92
104
  return false;
@@ -105,32 +117,11 @@ export class GistStateStore {
105
117
  // Both auth and rate-limit propagate per documented strategy.
106
118
  // Local cache write already happened above, so the user's work isn't
107
119
  // lost — but they need clear feedback that the sync failed.
108
- if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
109
- throw err;
120
+ rethrowIfFatal(err);
110
121
  warn(MODULE, `Failed to push: ${errorMessage(err)}`);
111
122
  return false;
112
123
  }
113
124
  }
114
- /**
115
- * Pull state from the gist and merge with local state.
116
- */
117
- async pull() {
118
- if (!this.gistId)
119
- return null;
120
- try {
121
- const state = await this.fetchGistState(this.gistId);
122
- if (state) {
123
- this.writeCache(state);
124
- }
125
- return state;
126
- }
127
- catch (err) {
128
- if (getHttpStatusCode(err) === 401 || isRateLimitError(err))
129
- throw err;
130
- warn(MODULE, `Failed to pull: ${errorMessage(err)}`);
131
- return null;
132
- }
133
- }
134
125
  /** Get the current gist ID (if known). */
135
126
  getGistId() {
136
127
  return this.gistId;
@@ -142,9 +133,10 @@ export class GistStateStore {
142
133
  if (cachedId) {
143
134
  debug(MODULE, `Trying cached gist ID: ${cachedId}`);
144
135
  try {
145
- const state = await this.fetchGistState(cachedId);
146
- if (state) {
136
+ const fetched = await this.fetchGistState(cachedId);
137
+ if (fetched) {
147
138
  this.gistId = cachedId;
139
+ const state = this.mergeCacheInto(fetched);
148
140
  this.writeCache(state);
149
141
  return { gistId: cachedId, state, created: false };
150
142
  }
@@ -162,21 +154,28 @@ export class GistStateStore {
162
154
  debug(MODULE, "Cached gist ID invalid, searching...");
163
155
  }
164
156
  // 2. Search user's gists
165
- const foundId = await this.searchForGist();
166
- if (foundId) {
167
- debug(MODULE, `Found gist via search: ${foundId}`);
168
- this.saveGistId(foundId);
169
- this.gistId = foundId;
170
- const state = await this.fetchGistState(foundId);
171
- if (state) {
157
+ const search = await this.searchForGist();
158
+ if (search.id) {
159
+ debug(MODULE, `Found gist via search: ${search.id}`);
160
+ this.saveGistId(search.id);
161
+ this.gistId = search.id;
162
+ const fetched = await this.fetchGistState(search.id);
163
+ if (fetched) {
164
+ const state = this.mergeCacheInto(fetched);
172
165
  this.writeCache(state);
173
- return { gistId: foundId, state, created: false };
166
+ return { gistId: search.id, state, created: false };
174
167
  }
175
168
  // Gist exists but content failed validation — fall back to cache
176
169
  // to avoid overwriting the user's data by creating a new gist.
177
- warn(MODULE, `Found existing gist ${foundId} but content failed validation. Using local cache to avoid data loss.`);
170
+ warn(MODULE, `Found existing gist ${search.id} but content failed validation. Using local cache to avoid data loss.`);
178
171
  return this.bootstrapFromCache("unknown");
179
172
  }
173
+ if (!search.exhaustive) {
174
+ // The account has more gists than we scanned; an existing state gist
175
+ // may sit beyond the scan window, and creating a new one would fork
176
+ // state across machines.
177
+ warn(MODULE, `Scanned the first ${SEARCH_MAX_PAGES * 100} gists without finding an oss-scout state gist, but the account has more. Creating a new state gist; if one already exists, copy its id into the gist-id file in the oss-scout data directory to avoid a duplicate.`);
178
+ }
180
179
  // 3. Create new gist
181
180
  debug(MODULE, "No existing gist found, creating new one");
182
181
  const freshState = ScoutStateSchema.parse({ version: 1 });
@@ -219,13 +218,18 @@ export class GistStateStore {
219
218
  return null;
220
219
  try {
221
220
  const parsed = JSON.parse(file.content);
222
- return ScoutStateSchema.parse(parsed);
221
+ return parseScoutState(parsed);
223
222
  }
224
223
  catch (err) {
225
224
  warn(MODULE, `Gist content failed validation: ${errorMessage(err)}`);
226
225
  return null;
227
226
  }
228
227
  }
228
+ /**
229
+ * Scan the user's gists for the state gist. `exhaustive: false` means the
230
+ * page cap was hit while pages were still full, so the account may hold
231
+ * the state gist beyond the scan window.
232
+ */
229
233
  async searchForGist() {
230
234
  for (let page = 1; page <= SEARCH_MAX_PAGES; page++) {
231
235
  const { data: gists } = await this.octokit.gists.list({
@@ -233,12 +237,15 @@ export class GistStateStore {
233
237
  page,
234
238
  });
235
239
  if (gists.length === 0)
236
- break;
240
+ return { id: null, exhaustive: true };
237
241
  const match = gists.find((g) => g.description === GIST_DESCRIPTION);
238
242
  if (match)
239
- return match.id;
243
+ return { id: match.id, exhaustive: true };
244
+ // A short page means we have seen every gist
245
+ if (gists.length < 100)
246
+ return { id: null, exhaustive: true };
240
247
  }
241
- return null;
248
+ return { id: null, exhaustive: false };
242
249
  }
243
250
  async createGist(state) {
244
251
  const { data } = await this.octokit.gists.create({
@@ -267,10 +274,21 @@ export class GistStateStore {
267
274
  saveGistId(id) {
268
275
  fs.writeFileSync(getGistIdPath(), id + "\n", { mode: 0o600 });
269
276
  }
277
+ /**
278
+ * Merge the local state-cache into a freshly fetched gist state before it
279
+ * overwrites the cache (#117). Without this, a prior failed push left its
280
+ * only copy of the user's changes in state-cache.json, which the next
281
+ * successful bootstrap silently clobbered. Tombstone-aware mergeStates
282
+ * keeps both sides' real changes.
283
+ */
284
+ mergeCacheInto(fetched) {
285
+ const cached = this.readCache();
286
+ return cached ? mergeStates(cached, fetched) : fetched;
287
+ }
270
288
  readCache() {
271
289
  try {
272
290
  const raw = fs.readFileSync(getCachePath(), "utf-8");
273
- return ScoutStateSchema.parse(JSON.parse(raw));
291
+ return parseScoutState(JSON.parse(raw));
274
292
  }
275
293
  catch (err) {
276
294
  const code = err?.code;
@@ -299,19 +317,76 @@ export class GistStateStore {
299
317
  * - preferences: remote wins
300
318
  * - starredRepos: keep the list with the fresher timestamp
301
319
  * - savedResults: union by issueUrl, keep newer lastSeenAt
320
+ * - unknown top-level keys (from a newer binary, #137): carried over via
321
+ * spreads, remote wins on conflicts to mirror the preferences rule
302
322
  */
323
+ /** Retain tombstones this long so a slow-to-sync machine still honors them. */
324
+ const TOMBSTONE_TTL_MS = 90 * 24 * 60 * 60 * 1000;
325
+ /**
326
+ * Merge tombstones from both sides (newest removedAt per URL wins) and drop
327
+ * any older than the TTL so the list cannot grow without bound (#117).
328
+ */
329
+ function mergeTombstones(local, remote) {
330
+ const cutoff = Date.now() - TOMBSTONE_TTL_MS;
331
+ const byUrl = new Map();
332
+ for (const t of [...local, ...remote]) {
333
+ const existing = byUrl.get(t.url);
334
+ if (!existing || t.removedAt > existing.removedAt)
335
+ byUrl.set(t.url, t);
336
+ }
337
+ return [...byUrl.values()].filter((t) => {
338
+ const ts = new Date(t.removedAt).getTime();
339
+ return !Number.isFinite(ts) || ts >= cutoff;
340
+ });
341
+ }
342
+ /**
343
+ * Drop merged items that a tombstone deleted, unless the item was re-added
344
+ * after the deletion (item timestamp newer than the tombstone) (#117).
345
+ */
346
+ function applyTombstones(items, tombstones, urlOf, touchedAtOf) {
347
+ if (tombstones.length === 0)
348
+ return items;
349
+ const byUrl = new Map(tombstones.map((t) => [t.url, t.removedAt]));
350
+ return items.filter((item) => {
351
+ const removedAt = byUrl.get(urlOf(item));
352
+ if (removedAt === undefined)
353
+ return true;
354
+ // Keep only if re-added strictly after the deletion
355
+ return touchedAtOf(item) > removedAt;
356
+ });
357
+ }
303
358
  export function mergeStates(local, remote) {
359
+ const tombstones = mergeTombstones(local.tombstones ?? [], remote.tombstones ?? []);
360
+ const savedResults = applyTombstones(mergeSavedResults(local.savedResults ?? [], remote.savedResults ?? []), tombstones, (r) => r.issueUrl, (r) => r.lastSeenAt);
361
+ const skippedIssues = applyTombstones(mergeSkippedIssues(local.skippedIssues ?? [], remote.skippedIssues ?? []), tombstones, (s) => s.url, (s) => s.skippedAt);
362
+ // A skipped URL must never linger in saved results (skipIssue's own
363
+ // invariant). Enforce it after the merge so a remote copy can't resurrect
364
+ // a now-skipped result into the saved list (#117).
365
+ const skippedUrls = new Set(skippedIssues.map((s) => s.url));
366
+ const savedResultsFinal = savedResults.filter((r) => !skippedUrls.has(r.issueUrl));
367
+ // Preferences: keep the side with the fresher preferencesUpdatedAt instead
368
+ // of always taking remote, which silently reverted a local edit (#117).
369
+ const localPrefsTs = local.preferencesUpdatedAt;
370
+ const remotePrefsTs = remote.preferencesUpdatedAt;
371
+ const localPrefsWin = localPrefsTs !== undefined &&
372
+ (remotePrefsTs === undefined || localPrefsTs > remotePrefsTs);
373
+ const preferences = localPrefsWin ? local.preferences : remote.preferences;
374
+ const preferencesUpdatedAt = pickFresherTimestamp(localPrefsTs, remotePrefsTs);
304
375
  return {
376
+ ...local,
377
+ ...remote,
305
378
  version: 1,
306
- preferences: remote.preferences,
379
+ preferences,
380
+ preferencesUpdatedAt,
381
+ tombstones,
307
382
  repoScores: mergeRepoScores(local.repoScores, remote.repoScores),
308
383
  starredRepos: mergeStarredRepos(local, remote),
309
384
  starredReposLastFetched: pickFresherTimestamp(local.starredReposLastFetched, remote.starredReposLastFetched),
310
385
  mergedPRs: unionByUrl(local.mergedPRs, remote.mergedPRs),
311
386
  closedPRs: unionByUrl(local.closedPRs, remote.closedPRs),
312
387
  openPRs: unionByUrl(local.openPRs ?? [], remote.openPRs ?? []),
313
- savedResults: mergeSavedResults(local.savedResults ?? [], remote.savedResults ?? []),
314
- skippedIssues: mergeSkippedIssues(local.skippedIssues ?? [], remote.skippedIssues ?? []),
388
+ savedResults: savedResultsFinal,
389
+ skippedIssues,
315
390
  lastSearchAt: pickFresherTimestamp(local.lastSearchAt, remote.lastSearchAt),
316
391
  lastRunAt: pickFresherTimestamp(local.lastRunAt, remote.lastRunAt) ??
317
392
  new Date().toISOString(),
@@ -9,8 +9,25 @@
9
9
  * for the same endpoint (e.g., star counts for two PRs in the same repo)
10
10
  * share a single HTTP round-trip.
11
11
  */
12
+ /**
13
+ * Schema version for cache entries whose body is an oss-scout-defined shape
14
+ * (vetting results, search payloads, policy scans, merged-PR counts) rather
15
+ * than a raw GitHub API response. These are deserialized with an unchecked
16
+ * cast, so a shape change between releases would otherwise let a new build read
17
+ * a stale-shaped entry. Bump this whenever one of those cached shapes changes:
18
+ * old entries then miss the version-prefixed key and are refetched instead of
19
+ * misread (#158). Raw ETag-keyed GitHub responses are not versioned — their
20
+ * shape is owned by GitHub, not us.
21
+ */
22
+ export declare const CACHE_SCHEMA_VERSION = "v1";
23
+ /**
24
+ * Prefix a synthetic (non-URL) cache key with the schema version so a shape
25
+ * change invalidates old entries. Use for every key whose body is read back
26
+ * with an unchecked cast.
27
+ */
28
+ export declare function versionedCacheKey(key: string): string;
12
29
  /** Shape of a single cache entry on disk. */
13
- interface CacheEntry {
30
+ export interface CacheEntry {
14
31
  etag: string;
15
32
  url: string;
16
33
  body: unknown;
@@ -38,6 +55,12 @@ export declare class HttpCache {
38
55
  * (e.g., caching aggregated results from paginated API calls).
39
56
  */
40
57
  getIfFresh(key: string, maxAgeMs: number): unknown | null;
58
+ /**
59
+ * Like {@link getIfFresh}, but returns the whole entry so callers can
60
+ * distinguish "no fresh entry" (null) from a legitimately cached falsy
61
+ * body (`0`, `""`, `false`, `null`).
62
+ */
63
+ getEntryIfFresh(key: string, maxAgeMs: number): CacheEntry | null;
41
64
  /**
42
65
  * Look up a cached response. Returns `null` if no cache entry exists.
43
66
  */
@@ -91,6 +114,14 @@ export declare function getHttpCache(): HttpCache;
91
114
  * cached body without consuming a rate-limit point.
92
115
  * 3. On a fresh 200, caches the ETag + body for next time.
93
116
  */
117
+ /**
118
+ * Share one in-flight computation per key: concurrent callers for the same
119
+ * key await the same promise instead of paying duplicate API calls (#124).
120
+ * The check-then-register pair runs without an intervening await, so two
121
+ * concurrent callers cannot both miss. Rejections propagate to every waiter
122
+ * and are never cached.
123
+ */
124
+ export declare function withInflightDedup<T>(cache: HttpCache, key: string, fn: () => Promise<T>): Promise<T>;
94
125
  export declare function cachedRequest<T>(cache: HttpCache, url: string, fetcher: (headers: Record<string, string>) => Promise<{
95
126
  data: T;
96
127
  headers?: Record<string, string>;
@@ -105,4 +136,3 @@ export declare function cachedRequest<T>(cache: HttpCache, url: string, fetcher:
105
136
  * (e.g. search queries, project health checks).
106
137
  */
107
138
  export declare function cachedTimeBased<T>(cache: HttpCache, key: string, maxAgeMs: number, fetcher: () => Promise<T>): Promise<T>;
108
- export {};