@oss-scout/core 0.11.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/cli.bundle.cjs +89 -66
  2. package/dist/cli.js +302 -436
  3. package/dist/commands/command-scout.d.ts +21 -0
  4. package/dist/commands/command-scout.js +21 -0
  5. package/dist/commands/config.js +10 -128
  6. package/dist/commands/features.js +15 -28
  7. package/dist/commands/results.d.ts +13 -2
  8. package/dist/commands/results.js +29 -2
  9. package/dist/commands/search.d.ts +4 -0
  10. package/dist/commands/search.js +65 -70
  11. package/dist/commands/setup.d.ts +2 -0
  12. package/dist/commands/setup.js +35 -6
  13. package/dist/commands/skip.d.ts +4 -0
  14. package/dist/commands/skip.js +45 -55
  15. package/dist/commands/sync.d.ts +10 -0
  16. package/dist/commands/sync.js +10 -0
  17. package/dist/commands/vet-list.js +3 -19
  18. package/dist/commands/vet.js +18 -25
  19. package/dist/commands/with-scout.d.ts +32 -0
  20. package/dist/commands/with-scout.js +41 -0
  21. package/dist/core/anti-llm-policy.js +5 -33
  22. package/dist/core/bootstrap.d.ts +2 -2
  23. package/dist/core/bootstrap.js +5 -9
  24. package/dist/core/errors.d.ts +10 -0
  25. package/dist/core/errors.js +20 -5
  26. package/dist/core/feature-discovery.d.ts +13 -1
  27. package/dist/core/feature-discovery.js +104 -81
  28. package/dist/core/gist-state-store.d.ts +13 -12
  29. package/dist/core/gist-state-store.js +128 -53
  30. package/dist/core/http-cache.d.ts +32 -2
  31. package/dist/core/http-cache.js +74 -19
  32. package/dist/core/issue-discovery.d.ts +12 -1
  33. package/dist/core/issue-discovery.js +94 -67
  34. package/dist/core/issue-eligibility.d.ts +11 -4
  35. package/dist/core/issue-eligibility.js +124 -69
  36. package/dist/core/issue-graphql.d.ts +58 -0
  37. package/dist/core/issue-graphql.js +108 -0
  38. package/dist/core/issue-vetting.d.ts +115 -9
  39. package/dist/core/issue-vetting.js +246 -109
  40. package/dist/core/local-state.d.ts +6 -2
  41. package/dist/core/local-state.js +23 -5
  42. package/dist/core/logger.d.ts +12 -4
  43. package/dist/core/logger.js +33 -7
  44. package/dist/core/personalization.d.ts +30 -10
  45. package/dist/core/personalization.js +64 -24
  46. package/dist/core/preference-fields.d.ts +47 -0
  47. package/dist/core/preference-fields.js +180 -0
  48. package/dist/core/probe-repo-file.d.ts +47 -0
  49. package/dist/core/probe-repo-file.js +57 -0
  50. package/dist/core/repo-health.js +40 -32
  51. package/dist/core/roadmap.js +26 -22
  52. package/dist/core/schemas.d.ts +148 -26
  53. package/dist/core/schemas.js +83 -17
  54. package/dist/core/search-budget.d.ts +9 -0
  55. package/dist/core/search-budget.js +36 -3
  56. package/dist/core/search-phases.d.ts +4 -21
  57. package/dist/core/search-phases.js +37 -89
  58. package/dist/core/types.d.ts +151 -38
  59. package/dist/core/utils.js +60 -26
  60. package/dist/formatters/human.d.ts +60 -0
  61. package/dist/formatters/human.js +199 -0
  62. package/dist/formatters/markdown.d.ts +10 -0
  63. package/dist/formatters/markdown.js +31 -0
  64. package/dist/index.d.ts +6 -2
  65. package/dist/index.js +8 -0
  66. package/dist/scout.d.ts +75 -12
  67. package/dist/scout.js +265 -26
  68. package/package.json +1 -1
@@ -27,19 +27,39 @@ import type { IssueCandidate } from "./types.js";
27
27
  */
28
28
  export declare const REPO_BOOST = 20;
29
29
  export declare const LANGUAGE_BOOST = 10;
30
+ /** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
31
+ export declare const ISSUE_TYPE_BOOST = 10;
30
32
  /**
31
- * Annotate each candidate with `boostScore` and `boostReasons` based on
32
- * the caller-supplied preference lists. Mutates the array in place; the
33
- * caller is responsible for re-sorting afterwards.
34
- *
35
- * Mutation (rather than returning new objects) keeps the personalization
36
- * step a single linear pass over the array the caller already holds —
37
- * the sort step reads back from the same objects.
33
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
34
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
35
+ * preferRepos affinity, +20) can still outweigh it.
36
+ */
37
+ export declare const AVOID_PENALTY = 15;
38
+ /** Per-call personalization bias lists (#168). All optional; empty = no effect. */
39
+ export interface PersonalizationBias {
40
+ preferLanguages?: string[];
41
+ preferRepos?: string[];
42
+ avoidRepos?: string[];
43
+ boostIssueTypes?: string[];
44
+ }
45
+ /**
46
+ * The personalization sort weight of a candidate: its net score, or 0 when it
47
+ * carries no personalization marker. Reads the structural `personalization`
48
+ * field (#158). The score can be negative when avoidRepos applied (#168).
49
+ */
50
+ export declare function boostScoreOf(candidate: IssueCandidate): number;
51
+ /**
52
+ * Return a new candidate list where each candidate matching a caller-supplied
53
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
54
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
55
+ * be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
56
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
57
+ * unmatched ones pass through unchanged.
38
58
  *
39
- * No-op when both preference lists are empty or undefined: candidates
40
- * retain `boostScore: undefined` and the sort tier collapses to 0.
59
+ * No-op when every bias list is empty/undefined: the input array is returned
60
+ * as-is and the sort tier collapses to 0 for every candidate.
41
61
  */
42
- export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): void;
62
+ export declare function annotateBoost(candidates: IssueCandidate[], bias?: PersonalizationBias): IssueCandidate[];
43
63
  /**
44
64
  * Apply a diversity-counterweight pass over a pre-sorted candidate list
45
65
  * (#1244). Returns the first `maxResults` picks in priority order:
@@ -26,40 +26,77 @@
26
26
  */
27
27
  export const REPO_BOOST = 20;
28
28
  export const LANGUAGE_BOOST = 10;
29
+ /** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
30
+ export const ISSUE_TYPE_BOOST = 10;
29
31
  /**
30
- * Annotate each candidate with `boostScore` and `boostReasons` based on
31
- * the caller-supplied preference lists. Mutates the array in place; the
32
- * caller is responsible for re-sorting afterwards.
33
- *
34
- * Mutation (rather than returning new objects) keeps the personalization
35
- * step a single linear pass over the array the caller already holds —
36
- * the sort step reads back from the same objects.
32
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
33
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
34
+ * preferRepos affinity, +20) can still outweigh it.
35
+ */
36
+ export const AVOID_PENALTY = 15;
37
+ /**
38
+ * The personalization sort weight of a candidate: its net score, or 0 when it
39
+ * carries no personalization marker. Reads the structural `personalization`
40
+ * field (#158). The score can be negative when avoidRepos applied (#168).
41
+ */
42
+ export function boostScoreOf(candidate) {
43
+ return candidate.personalization?.kind === "boosted"
44
+ ? candidate.personalization.score
45
+ : 0;
46
+ }
47
+ function normalizeSet(values) {
48
+ return new Set((values ?? []).map((v) => v.trim().toLowerCase()).filter(Boolean));
49
+ }
50
+ /**
51
+ * Return a new candidate list where each candidate matching a caller-supplied
52
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
53
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
54
+ * be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
55
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
56
+ * unmatched ones pass through unchanged.
37
57
  *
38
- * No-op when both preference lists are empty or undefined: candidates
39
- * retain `boostScore: undefined` and the sort tier collapses to 0.
58
+ * No-op when every bias list is empty/undefined: the input array is returned
59
+ * as-is and the sort tier collapses to 0 for every candidate.
40
60
  */
41
- export function annotateBoost(candidates, preferLanguages, preferRepos) {
42
- const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
43
- const repoSet = new Set((preferRepos ?? []).map((r) => r.trim()).filter(Boolean));
44
- if (langSet.size === 0 && repoSet.size === 0)
45
- return;
46
- for (const c of candidates) {
61
+ export function annotateBoost(candidates, bias = {}) {
62
+ const langSet = normalizeSet(bias.preferLanguages);
63
+ const repoSet = normalizeSet(bias.preferRepos);
64
+ const avoidSet = normalizeSet(bias.avoidRepos);
65
+ const typeSet = normalizeSet(bias.boostIssueTypes);
66
+ if (langSet.size === 0 &&
67
+ repoSet.size === 0 &&
68
+ avoidSet.size === 0 &&
69
+ typeSet.size === 0) {
70
+ return candidates;
71
+ }
72
+ return candidates.map((c) => {
47
73
  let score = 0;
48
74
  const reasons = [];
49
- if (repoSet.size > 0 && repoSet.has(c.issue.repo)) {
75
+ const repoLower = c.issue.repo.toLowerCase();
76
+ if (repoSet.size > 0 && repoSet.has(repoLower)) {
50
77
  score += REPO_BOOST;
51
78
  reasons.push(`repo affinity: ${c.issue.repo}`);
52
79
  }
53
- const lang = c.projectHealth.language;
80
+ const lang = c.projectHealth.checkFailed ? null : c.projectHealth.language;
54
81
  if (langSet.size > 0 && lang && langSet.has(lang.toLowerCase())) {
55
82
  score += LANGUAGE_BOOST;
56
83
  reasons.push(`language match: ${lang}`);
57
84
  }
58
- if (score > 0) {
59
- c.boostScore = score;
60
- c.boostReasons = reasons;
85
+ if (typeSet.size > 0) {
86
+ const matched = c.issue.labels.find((l) => typeSet.has(l.toLowerCase()));
87
+ if (matched) {
88
+ score += ISSUE_TYPE_BOOST;
89
+ reasons.push(`issue type: ${matched}`);
90
+ }
61
91
  }
62
- }
92
+ if (avoidSet.size > 0 && avoidSet.has(repoLower)) {
93
+ score -= AVOID_PENALTY;
94
+ reasons.push(`avoided repo: ${c.issue.repo}`);
95
+ }
96
+ if (reasons.length === 0)
97
+ return c;
98
+ return { ...c, personalization: { kind: "boosted", score, reasons } };
99
+ });
63
100
  }
64
101
  /**
65
102
  * Apply a diversity-counterweight pass over a pre-sorted candidate list
@@ -108,10 +145,13 @@ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
108
145
  break;
109
146
  if (seen.has(c.issue.url))
110
147
  continue;
111
- if (c.boostScore && c.boostScore > 0)
148
+ // Diversity slots are for candidates that matched NO personalization bias.
149
+ // Exclude both boosted (>0) and avoided (<0) candidates — resurfacing an
150
+ // avoided repo via a diversity slot would defeat the avoid (#168).
151
+ if (boostScoreOf(c) !== 0)
112
152
  continue;
113
- c.diversitySlot = true;
114
- picks.push(c);
153
+ // Tag a shallow copy rather than mutating the shared candidate (#158).
154
+ picks.push({ ...c, personalization: { kind: "diversity" } });
115
155
  seen.add(c.issue.url);
116
156
  }
117
157
  for (const c of candidates) {
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Shared preference-field metadata and value parsing.
3
+ *
4
+ * The CLI (`commands/config.ts`) and the MCP `config-set` tool both update a
5
+ * single preference from a raw string. They used to carry separate, drifting
6
+ * copies of the key tables and parse logic — the CLI was missing the SLM
7
+ * triage keys, the MCP side lacked the `scope` special case and the +/- array
8
+ * syntax. This module is the single source of truth both drive (#153).
9
+ */
10
+ import type { ScoutPreferences } from "./schemas.js";
11
+ export type FieldConfig = {
12
+ type: "array" | "number" | "float" | "boolean" | "string";
13
+ } | {
14
+ type: "enum" | "enum-array";
15
+ validValues: readonly string[];
16
+ };
17
+ export declare const FIELD_CONFIGS: Record<string, FieldConfig>;
18
+ /**
19
+ * Every configurable preference key, derived from the schema so a new
20
+ * preference can't be silently left unconfigurable. `assertFieldConfigsCover`
21
+ * (exercised by a unit test) fails loudly if FIELD_CONFIGS drifts from this.
22
+ */
23
+ export declare const PREFERENCE_KEYS: readonly string[];
24
+ /** Sorted key list for "unknown key" error messages and help text. */
25
+ export declare const SORTED_PREFERENCE_KEYS: readonly string[];
26
+ /**
27
+ * Throw if any schema preference lacks a FIELD_CONFIG entry. Called from a
28
+ * test so adding a preference to the schema without teaching config-set how to
29
+ * parse it is caught in CI rather than at a user's first `config set newKey`.
30
+ */
31
+ export declare function assertFieldConfigsCover(): void;
32
+ /**
33
+ * Apply an array update: plain set, +append, or -remove.
34
+ *
35
+ * The -remove form starts with a dash, which commander rejects as an unknown
36
+ * option unless escaped: `config set excludeRepos -- "-spam/repo"`. The MCP
37
+ * tool has no commander layer so it can pass `-spam/repo` directly. Documented
38
+ * in the CLI help and README (#132).
39
+ */
40
+ export declare function updateArray(current: string[], value: string): string[];
41
+ /**
42
+ * Apply a single key/value update to a preferences object and return the
43
+ * fully validated result. The raw string `value` is the form both the CLI and
44
+ * the MCP tool receive; arrays accept comma-separated values and the +add /
45
+ * -remove syntax. Throws ValidationError on an unknown key or a bad value.
46
+ */
47
+ export declare function applyPreferenceField(preferences: ScoutPreferences, key: string, value: string): ScoutPreferences;
@@ -0,0 +1,180 @@
1
+ /**
2
+ * Shared preference-field metadata and value parsing.
3
+ *
4
+ * The CLI (`commands/config.ts`) and the MCP `config-set` tool both update a
5
+ * single preference from a raw string. They used to carry separate, drifting
6
+ * copies of the key tables and parse logic — the CLI was missing the SLM
7
+ * triage keys, the MCP side lacked the `scope` special case and the +/- array
8
+ * syntax. This module is the single source of truth both drive (#153).
9
+ */
10
+ import { ScoutPreferencesSchema, IssueScopeSchema, ProjectCategorySchema, PersistenceModeSchema, SearchStrategySchema, } from "./schemas.js";
11
+ import { ValidationError } from "./errors.js";
12
+ export const FIELD_CONFIGS = {
13
+ githubUsername: { type: "string" },
14
+ languages: { type: "array" },
15
+ labels: { type: "array" },
16
+ scope: { type: "enum-array", validValues: IssueScopeSchema.options },
17
+ excludeRepos: { type: "array" },
18
+ excludeOrgs: { type: "array" },
19
+ aiPolicyBlocklist: { type: "array" },
20
+ projectCategories: {
21
+ type: "enum-array",
22
+ validValues: ProjectCategorySchema.options,
23
+ },
24
+ minStars: { type: "number" },
25
+ maxIssueAgeDays: { type: "number" },
26
+ includeDocIssues: { type: "boolean" },
27
+ minRepoScoreThreshold: { type: "number" },
28
+ interPhaseDelayMs: { type: "number" },
29
+ persistence: { type: "enum", validValues: PersistenceModeSchema.options },
30
+ defaultStrategy: {
31
+ type: "enum-array",
32
+ validValues: SearchStrategySchema.options,
33
+ },
34
+ broadPhaseDelayMs: { type: "number" },
35
+ skipBroadWhenSufficientResults: { type: "number" },
36
+ preferLanguages: { type: "array" },
37
+ preferRepos: { type: "array" },
38
+ diversityRatio: { type: "float" },
39
+ avoidRepos: { type: "array" },
40
+ boostIssueTypes: { type: "array" },
41
+ slmTriageModel: { type: "string" },
42
+ slmTriageHost: { type: "string" },
43
+ featuresAnchorThreshold: { type: "number" },
44
+ featuresSplitRatio: { type: "float" },
45
+ };
46
+ /**
47
+ * Every configurable preference key, derived from the schema so a new
48
+ * preference can't be silently left unconfigurable. `assertFieldConfigsCover`
49
+ * (exercised by a unit test) fails loudly if FIELD_CONFIGS drifts from this.
50
+ */
51
+ export const PREFERENCE_KEYS = Object.keys(ScoutPreferencesSchema.shape);
52
+ /** Sorted key list for "unknown key" error messages and help text. */
53
+ export const SORTED_PREFERENCE_KEYS = [
54
+ ...PREFERENCE_KEYS,
55
+ ].sort();
56
+ /**
57
+ * Throw if any schema preference lacks a FIELD_CONFIG entry. Called from a
58
+ * test so adding a preference to the schema without teaching config-set how to
59
+ * parse it is caught in CI rather than at a user's first `config set newKey`.
60
+ */
61
+ export function assertFieldConfigsCover() {
62
+ const missing = PREFERENCE_KEYS.filter((k) => !(k in FIELD_CONFIGS));
63
+ if (missing.length > 0) {
64
+ throw new Error(`FIELD_CONFIGS is missing entries for preference keys: ${missing.join(", ")}`);
65
+ }
66
+ const extra = Object.keys(FIELD_CONFIGS).filter((k) => !PREFERENCE_KEYS.includes(k));
67
+ if (extra.length > 0) {
68
+ throw new Error(`FIELD_CONFIGS has entries for unknown preference keys: ${extra.join(", ")}`);
69
+ }
70
+ }
71
+ function parseBoolean(value) {
72
+ const lower = value.toLowerCase();
73
+ if (lower === "true" || lower === "yes")
74
+ return true;
75
+ if (lower === "false" || lower === "no")
76
+ return false;
77
+ throw new ValidationError(`Invalid boolean value: "${value}". Use true/false or yes/no.`);
78
+ }
79
+ function parseIntValue(value, key) {
80
+ const num = parseInt(value, 10);
81
+ if (isNaN(num)) {
82
+ throw new ValidationError(`Invalid number for "${key}": "${value}"`);
83
+ }
84
+ return num;
85
+ }
86
+ function parseFloatValue(value, key) {
87
+ const num = Number.parseFloat(value);
88
+ if (isNaN(num)) {
89
+ throw new ValidationError(`Invalid number for "${key}": "${value}"`);
90
+ }
91
+ return num;
92
+ }
93
+ function parseArrayValue(value) {
94
+ return value
95
+ .split(",")
96
+ .map((s) => s.trim())
97
+ .filter((s) => s.length > 0);
98
+ }
99
+ /**
100
+ * Apply an array update: plain set, +append, or -remove.
101
+ *
102
+ * The -remove form starts with a dash, which commander rejects as an unknown
103
+ * option unless escaped: `config set excludeRepos -- "-spam/repo"`. The MCP
104
+ * tool has no commander layer so it can pass `-spam/repo` directly. Documented
105
+ * in the CLI help and README (#132).
106
+ */
107
+ export function updateArray(current, value) {
108
+ if (value.startsWith("+")) {
109
+ const toAdd = parseArrayValue(value.slice(1));
110
+ const merged = [...current];
111
+ for (const item of toAdd) {
112
+ if (!merged.includes(item))
113
+ merged.push(item);
114
+ }
115
+ return merged;
116
+ }
117
+ if (value.startsWith("-")) {
118
+ const toRemove = new Set(parseArrayValue(value.slice(1)));
119
+ return current.filter((item) => !toRemove.has(item));
120
+ }
121
+ return parseArrayValue(value);
122
+ }
123
+ /**
124
+ * Apply a single key/value update to a preferences object and return the
125
+ * fully validated result. The raw string `value` is the form both the CLI and
126
+ * the MCP tool receive; arrays accept comma-separated values and the +add /
127
+ * -remove syntax. Throws ValidationError on an unknown key or a bad value.
128
+ */
129
+ export function applyPreferenceField(preferences, key, value) {
130
+ const field = FIELD_CONFIGS[key];
131
+ if (!field) {
132
+ throw new ValidationError(`Unknown config key: "${key}". Valid keys: ${SORTED_PREFERENCE_KEYS.join(", ")}`);
133
+ }
134
+ const prefs = { ...preferences };
135
+ switch (field.type) {
136
+ case "string":
137
+ prefs[key] = value;
138
+ break;
139
+ case "boolean":
140
+ prefs[key] = parseBoolean(value);
141
+ break;
142
+ case "number":
143
+ prefs[key] = parseIntValue(value, key);
144
+ break;
145
+ case "float":
146
+ prefs[key] = parseFloatValue(value, key);
147
+ break;
148
+ case "array": {
149
+ const current = prefs[key] ?? [];
150
+ prefs[key] = updateArray(current, value);
151
+ break;
152
+ }
153
+ case "enum": {
154
+ const validValues = field.validValues;
155
+ if (!validValues.includes(value)) {
156
+ throw new ValidationError(`Invalid value for "${key}": "${value}". Valid: ${validValues.join(", ")}`);
157
+ }
158
+ prefs[key] = value;
159
+ break;
160
+ }
161
+ case "enum-array": {
162
+ const current = prefs[key] ?? [];
163
+ const updated = updateArray(current, value);
164
+ const validValues = field.validValues;
165
+ const invalid = updated.filter((s) => !validValues.includes(s));
166
+ if (invalid.length > 0) {
167
+ throw new ValidationError(`Invalid value(s) for "${key}": ${invalid.join(", ")}. Valid: ${validValues.join(", ")}`);
168
+ }
169
+ // For 'scope', an empty array means undefined (all scopes).
170
+ if (key === "scope") {
171
+ prefs[key] = updated.length > 0 ? updated : undefined;
172
+ }
173
+ else {
174
+ prefs[key] = updated;
175
+ }
176
+ break;
177
+ }
178
+ }
179
+ return ScoutPreferencesSchema.parse(prefs);
180
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Single-path repo-file probe (#156).
3
+ *
4
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
5
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
6
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
7
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
8
+ * genuinely-shared primitive.
9
+ *
10
+ * The orchestration around it stays per-caller (parallel 4-path probe,
11
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
12
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
13
+ * shared.
14
+ *
15
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
16
+ * file absent) from a degraded miss (5xx, network) so callers can decide
17
+ * whether to cache a negative result or leave it open to retry. Collapsing the
18
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
19
+ * primitive must keep them separate.
20
+ */
21
+ import type { Octokit } from "@octokit/rest";
22
+ /**
23
+ * Result of probing one repo file path.
24
+ *
25
+ * - `text` — decoded UTF-8 content on a 200 with a file payload, else `null`
26
+ * (404, a non-content payload such as a directory listing, or a soft error).
27
+ * - `transient` — `true` only when the miss was a degraded failure (5xx,
28
+ * network) rather than a clean 404 / missing file. A `true` value means the
29
+ * `null` may be incomplete and the caller should avoid caching it as a known
30
+ * absence.
31
+ */
32
+ export interface ProbeRepoFileResult {
33
+ text: string | null;
34
+ transient: boolean;
35
+ }
36
+ /**
37
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
38
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
39
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
40
+ * rate limit) so the caller's existing rate-limit handling sees them.
41
+ *
42
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
43
+ * a faster path may have already resolved) must inspect the rejected reasons
44
+ * themselves; this primitive only rethrows for the single path it owns. See
45
+ * repo-health and anti-llm-policy for that pre-scan.
46
+ */
47
+ export declare function probeRepoFile(octokit: Octokit, owner: string, repo: string, path: string): Promise<ProbeRepoFileResult>;
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Single-path repo-file probe (#156).
3
+ *
4
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
5
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
6
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
7
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
8
+ * genuinely-shared primitive.
9
+ *
10
+ * The orchestration around it stays per-caller (parallel 4-path probe,
11
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
12
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
13
+ * shared.
14
+ *
15
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
16
+ * file absent) from a degraded miss (5xx, network) so callers can decide
17
+ * whether to cache a negative result or leave it open to retry. Collapsing the
18
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
19
+ * primitive must keep them separate.
20
+ */
21
+ import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
22
+ import { warn } from "./logger.js";
23
+ const MODULE = "probe-repo-file";
24
+ /**
25
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
26
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
27
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
28
+ * rate limit) so the caller's existing rate-limit handling sees them.
29
+ *
30
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
31
+ * a faster path may have already resolved) must inspect the rejected reasons
32
+ * themselves; this primitive only rethrows for the single path it owns. See
33
+ * repo-health and anti-llm-policy for that pre-scan.
34
+ */
35
+ export async function probeRepoFile(octokit, owner, repo, path) {
36
+ try {
37
+ const { data } = await octokit.repos.getContent({ owner, repo, path });
38
+ if (data &&
39
+ typeof data === "object" &&
40
+ "content" in data &&
41
+ typeof data.content === "string") {
42
+ return {
43
+ text: Buffer.from(data.content, "base64").toString("utf-8"),
44
+ transient: false,
45
+ };
46
+ }
47
+ return { text: null, transient: false };
48
+ }
49
+ catch (error) {
50
+ const status = getHttpStatusCode(error);
51
+ if (status === 404)
52
+ return { text: null, transient: false };
53
+ rethrowIfFatal(error);
54
+ warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
55
+ return { text: null, transient: true };
56
+ }
57
+ }
@@ -5,9 +5,10 @@
5
5
  * from issue-level eligibility logic.
6
6
  */
7
7
  import { daysBetween } from "./utils.js";
8
- import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
8
+ import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
9
9
  import { warn } from "./logger.js";
10
10
  import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
11
+ import { probeRepoFile } from "./probe-repo-file.js";
11
12
  const MODULE = "repo-health";
12
13
  // ── Cache for contribution guidelines ──
13
14
  const guidelinesCache = new Map();
@@ -73,19 +74,14 @@ export async function checkProjectHealth(octokit, owner, repo) {
73
74
  });
74
75
  }
75
76
  catch (error) {
76
- if (getHttpStatusCode(error) === 401 || isRateLimitError(error)) {
77
- throw error;
78
- }
77
+ rethrowIfFatal(error);
79
78
  const errMsg = errorMessage(error);
80
79
  warn(MODULE, `Error checking project health for ${owner}/${repo}: ${errMsg}`);
80
+ // The check failed: only the repo and the reason are known. The
81
+ // discriminated ProjectHealth type intentionally has no place for the
82
+ // neutral-default snapshot fields this used to fabricate (#158).
81
83
  return {
82
84
  repo: `${owner}/${repo}`,
83
- lastCommitAt: "",
84
- daysSinceLastCommit: 999,
85
- openIssuesCount: 0,
86
- avgIssueResponseDays: 0,
87
- ciStatus: "unknown",
88
- isActive: false,
89
85
  checkFailed: true,
90
86
  failureReason: errMsg,
91
87
  };
@@ -104,19 +100,33 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
104
100
  if (cached && Date.now() - cached.fetchedAt < CACHE_TTL_MS) {
105
101
  return cached.guidelines;
106
102
  }
103
+ // Concurrent vets of issues from one repo share a single probe (#124)
104
+ const inflight = guidelinesInflight.get(cacheKey);
105
+ if (inflight)
106
+ return inflight;
107
+ const promise = fetchContributionGuidelinesUncached(octokit, owner, repo);
108
+ guidelinesInflight.set(cacheKey, promise);
109
+ try {
110
+ return await promise;
111
+ }
112
+ finally {
113
+ guidelinesInflight.delete(cacheKey);
114
+ }
115
+ }
116
+ const guidelinesInflight = new Map();
117
+ async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
118
+ const cacheKey = `${owner}/${repo}`;
107
119
  const filesToCheck = [
108
120
  "CONTRIBUTING.md",
109
121
  ".github/CONTRIBUTING.md",
110
122
  "docs/CONTRIBUTING.md",
111
123
  "contributing.md",
112
124
  ];
113
- // Probe all paths in parallel — take the first success in priority order
114
- const results = await Promise.allSettled(filesToCheck.map((file) => octokit.repos.getContent({ owner, repo, path: file }).then(({ data }) => {
115
- if ("content" in data) {
116
- return Buffer.from(data.content, "base64").toString("utf-8");
117
- }
118
- return null;
119
- })));
125
+ // Probe all paths in parallel — take the first success in priority order.
126
+ // probeRepoFile rethrows 401/rate-limit, so those still surface here as
127
+ // rejected results for the pre-scan below; 404s and 5xx come back as a null
128
+ // text (the primitive warns on 5xx, so no extra warn is needed here).
129
+ const results = await Promise.allSettled(filesToCheck.map((file) => probeRepoFile(octokit, owner, repo, file)));
120
130
  // Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
121
131
  // otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
122
132
  // wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
@@ -128,20 +138,13 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
128
138
  throw result.reason;
129
139
  }
130
140
  }
131
- for (let i = 0; i < results.length; i++) {
132
- const result = results[i];
133
- if (result.status === "fulfilled" && result.value) {
134
- const guidelines = parseContributionGuidelines(result.value);
141
+ for (const result of results) {
142
+ if (result.status === "fulfilled" && result.value.text) {
143
+ const guidelines = parseContributionGuidelines(result.value.text);
135
144
  guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
136
145
  pruneCache();
137
146
  return guidelines;
138
147
  }
139
- if (result.status === "rejected") {
140
- const status = getHttpStatusCode(result.reason);
141
- if (status !== 404) {
142
- warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
143
- }
144
- }
145
148
  }
146
149
  // Cache the negative result too and prune if needed
147
150
  guidelinesCache.set(cacheKey, {
@@ -160,9 +163,13 @@ function parseContributionGuidelines(content) {
160
163
  rawContent: content,
161
164
  };
162
165
  const lowerContent = content.toLowerCase();
163
- // Detect branch naming conventions
166
+ // Detect branch naming conventions. CONTRIBUTING.md is attacker-controlled
167
+ // (it belongs to the repo being vetted): the unbounded [^\n]* pair forced
168
+ // quadratic backtracking on a long quote-less line, stalling the vet
169
+ // (#152). Bounded quantifiers keep the scan linear-ish; real conventions
170
+ // sit well inside 200 chars of their keyword.
164
171
  if (lowerContent.includes("branch")) {
165
- const branchMatch = content.match(/branch[^\n]*(?:named?|format|convention)[^\n]*[`"]([^`"]+)[`"]/i);
172
+ const branchMatch = content.match(/branch[^\n]{0,200}?(?:named?|format|convention)[^\n]{0,200}?[`"]([^`"\n]{1,100})[`"]/i);
166
173
  if (branchMatch) {
167
174
  guidelines.branchNamingConvention = branchMatch[1];
168
175
  }
@@ -172,7 +179,7 @@ function parseContributionGuidelines(content) {
172
179
  guidelines.commitMessageFormat = "conventional commits";
173
180
  }
174
181
  else if (lowerContent.includes("commit message")) {
175
- const commitMatch = content.match(/commit message[^\n]*[`"]([^`"]+)[`"]/i);
182
+ const commitMatch = content.match(/commit message[^\n]{0,200}?[`"]([^`"\n]{1,100})[`"]/i);
176
183
  if (commitMatch) {
177
184
  guidelines.commitMessageFormat = commitMatch[1];
178
185
  }
@@ -193,8 +200,9 @@ function parseContributionGuidelines(content) {
193
200
  guidelines.linter = "RuboCop";
194
201
  else if (lowerContent.includes("prettier"))
195
202
  guidelines.formatter = "Prettier";
196
- // Detect CLA requirement
197
- if (lowerContent.includes("cla") ||
203
+ // Detect CLA requirement. Word boundary matters: a bare substring check
204
+ // matches "class", "clang", "clarify", etc. and flags nearly every doc.
205
+ if (/\bcla\b/.test(lowerContent) ||
198
206
  lowerContent.includes("contributor license agreement")) {
199
207
  guidelines.claRequired = true;
200
208
  }