npm - @oss-scout/core - Versions diffs - 0.11.0 → 1.1.0 - Mend

@oss-scout/core 0.11.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/dist/cli.bundle.cjs +89 -66
package/dist/cli.js +302 -436
package/dist/commands/command-scout.d.ts +21 -0
package/dist/commands/command-scout.js +21 -0
package/dist/commands/config.js +10 -128
package/dist/commands/features.js +15 -28
package/dist/commands/results.d.ts +13 -2
package/dist/commands/results.js +29 -2
package/dist/commands/search.d.ts +4 -0
package/dist/commands/search.js +65 -70
package/dist/commands/setup.d.ts +2 -0
package/dist/commands/setup.js +35 -6
package/dist/commands/skip.d.ts +4 -0
package/dist/commands/skip.js +45 -55
package/dist/commands/sync.d.ts +10 -0
package/dist/commands/sync.js +10 -0
package/dist/commands/vet-list.js +3 -19
package/dist/commands/vet.js +18 -25
package/dist/commands/with-scout.d.ts +32 -0
package/dist/commands/with-scout.js +41 -0
package/dist/core/anti-llm-policy.js +5 -33
package/dist/core/bootstrap.d.ts +2 -2
package/dist/core/bootstrap.js +5 -9
package/dist/core/errors.d.ts +10 -0
package/dist/core/errors.js +20 -5
package/dist/core/feature-discovery.d.ts +13 -1
package/dist/core/feature-discovery.js +104 -81
package/dist/core/gist-state-store.d.ts +13 -12
package/dist/core/gist-state-store.js +128 -53
package/dist/core/http-cache.d.ts +32 -2
package/dist/core/http-cache.js +74 -19
package/dist/core/issue-discovery.d.ts +12 -1
package/dist/core/issue-discovery.js +94 -67
package/dist/core/issue-eligibility.d.ts +11 -4
package/dist/core/issue-eligibility.js +124 -69
package/dist/core/issue-graphql.d.ts +58 -0
package/dist/core/issue-graphql.js +108 -0
package/dist/core/issue-vetting.d.ts +115 -9
package/dist/core/issue-vetting.js +246 -109
package/dist/core/local-state.d.ts +6 -2
package/dist/core/local-state.js +23 -5
package/dist/core/logger.d.ts +12 -4
package/dist/core/logger.js +33 -7
package/dist/core/personalization.d.ts +30 -10
package/dist/core/personalization.js +64 -24
package/dist/core/preference-fields.d.ts +47 -0
package/dist/core/preference-fields.js +180 -0
package/dist/core/probe-repo-file.d.ts +47 -0
package/dist/core/probe-repo-file.js +57 -0
package/dist/core/repo-health.js +40 -32
package/dist/core/roadmap.js +26 -22
package/dist/core/schemas.d.ts +148 -26
package/dist/core/schemas.js +83 -17
package/dist/core/search-budget.d.ts +9 -0
package/dist/core/search-budget.js +36 -3
package/dist/core/search-phases.d.ts +4 -21
package/dist/core/search-phases.js +37 -89
package/dist/core/types.d.ts +151 -38
package/dist/core/utils.js +60 -26
package/dist/formatters/human.d.ts +60 -0
package/dist/formatters/human.js +199 -0
package/dist/formatters/markdown.d.ts +10 -0
package/dist/formatters/markdown.js +31 -0
package/dist/index.d.ts +6 -2
package/dist/index.js +8 -0
package/dist/scout.d.ts +75 -12
package/dist/scout.js +265 -26
package/package.json +1 -1

package/dist/core/personalization.d.ts CHANGED Viewed

@@ -27,19 +27,39 @@ import type { IssueCandidate } from "./types.js";
  */
 export declare const REPO_BOOST = 20;
 export declare const LANGUAGE_BOOST = 10;
+/** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
+export declare const ISSUE_TYPE_BOOST = 10;
 /**
- * Annotate each candidate with `boostScore` and `boostReasons` based on
- * the caller-supplied preference lists. Mutates the array in place; the
- * caller is responsible for re-sorting afterwards.
- *
- * Mutation (rather than returning new objects) keeps the personalization
- * step a single linear pass over the array the caller already holds —
- * the sort step reads back from the same objects.
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
+ * preferRepos affinity, +20) can still outweigh it.
+ */
+export declare const AVOID_PENALTY = 15;
+/** Per-call personalization bias lists (#168). All optional; empty = no effect. */
+export interface PersonalizationBias {
+    preferLanguages?: string[];
+    preferRepos?: string[];
+    avoidRepos?: string[];
+    boostIssueTypes?: string[];
+}
+/**
+ * The personalization sort weight of a candidate: its net score, or 0 when it
+ * carries no personalization marker. Reads the structural `personalization`
+ * field (#158). The score can be negative when avoidRepos applied (#168).
+ */
+export declare function boostScoreOf(candidate: IssueCandidate): number;
+/**
+ * Return a new candidate list where each candidate matching a caller-supplied
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
+ * be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
+ * unmatched ones pass through unchanged.
  *
- * No-op when both preference lists are empty or undefined: candidates
- * retain `boostScore: undefined` and the sort tier collapses to 0.
+ * No-op when every bias list is empty/undefined: the input array is returned
+ * as-is and the sort tier collapses to 0 for every candidate.
  */
-export declare function annotateBoost(candidates: IssueCandidate[], preferLanguages?: string[], preferRepos?: string[]): void;
+export declare function annotateBoost(candidates: IssueCandidate[], bias?: PersonalizationBias): IssueCandidate[];
 /**
  * Apply a diversity-counterweight pass over a pre-sorted candidate list
  * (#1244). Returns the first `maxResults` picks in priority order:

package/dist/core/personalization.js CHANGED Viewed

@@ -26,40 +26,77 @@
  */
 export const REPO_BOOST = 20;
 export const LANGUAGE_BOOST = 10;
+/** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
+export const ISSUE_TYPE_BOOST = 10;
 /**
- * Annotate each candidate with `boostScore` and `boostReasons` based on
- * the caller-supplied preference lists. Mutates the array in place; the
- * caller is responsible for re-sorting afterwards.
- *
- * Mutation (rather than returning new objects) keeps the personalization
- * step a single linear pass over the array the caller already holds —
- * the sort step reads back from the same objects.
+ * Soft penalty for an avoidRepos match (#168). Milder than the hard
+ * excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
+ * preferRepos affinity, +20) can still outweigh it.
+ */
+export const AVOID_PENALTY = 15;
+/**
+ * The personalization sort weight of a candidate: its net score, or 0 when it
+ * carries no personalization marker. Reads the structural `personalization`
+ * field (#158). The score can be negative when avoidRepos applied (#168).
+ */
+export function boostScoreOf(candidate) {
+    return candidate.personalization?.kind === "boosted"
+        ? candidate.personalization.score
+        : 0;
+}
+function normalizeSet(values) {
+    return new Set((values ?? []).map((v) => v.trim().toLowerCase()).filter(Boolean));
+}
+/**
+ * Return a new candidate list where each candidate matching a caller-supplied
+ * bias carries a `personalization` marker with a NET score (#168): preferRepos,
+ * preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
+ * be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
+ * Does NOT mutate the input (#158): matched candidates are shallow copies,
+ * unmatched ones pass through unchanged.
  *
- * No-op when both preference lists are empty or undefined: candidates
- * retain `boostScore: undefined` and the sort tier collapses to 0.
+ * No-op when every bias list is empty/undefined: the input array is returned
+ * as-is and the sort tier collapses to 0 for every candidate.
  */
-export function annotateBoost(candidates, preferLanguages, preferRepos) {
-    const langSet = new Set((preferLanguages ?? []).map((l) => l.trim().toLowerCase()).filter(Boolean));
-    const repoSet = new Set((preferRepos ?? []).map((r) => r.trim()).filter(Boolean));
-    if (langSet.size === 0 && repoSet.size === 0)
-        return;
-    for (const c of candidates) {
+export function annotateBoost(candidates, bias = {}) {
+    const langSet = normalizeSet(bias.preferLanguages);
+    const repoSet = normalizeSet(bias.preferRepos);
+    const avoidSet = normalizeSet(bias.avoidRepos);
+    const typeSet = normalizeSet(bias.boostIssueTypes);
+    if (langSet.size === 0 &&
+        repoSet.size === 0 &&
+        avoidSet.size === 0 &&
+        typeSet.size === 0) {
+        return candidates;
+    }
+    return candidates.map((c) => {
         let score = 0;
         const reasons = [];
-        if (repoSet.size > 0 && repoSet.has(c.issue.repo)) {
+        const repoLower = c.issue.repo.toLowerCase();
+        if (repoSet.size > 0 && repoSet.has(repoLower)) {
             score += REPO_BOOST;
             reasons.push(`repo affinity: ${c.issue.repo}`);
         }
-        const lang = c.projectHealth.language;
+        const lang = c.projectHealth.checkFailed ? null : c.projectHealth.language;
         if (langSet.size > 0 && lang && langSet.has(lang.toLowerCase())) {
             score += LANGUAGE_BOOST;
             reasons.push(`language match: ${lang}`);
         }
-        if (score > 0) {
-            c.boostScore = score;
-            c.boostReasons = reasons;
+        if (typeSet.size > 0) {
+            const matched = c.issue.labels.find((l) => typeSet.has(l.toLowerCase()));
+            if (matched) {
+                score += ISSUE_TYPE_BOOST;
+                reasons.push(`issue type: ${matched}`);
+            }
         }
-    }
+        if (avoidSet.size > 0 && avoidSet.has(repoLower)) {
+            score -= AVOID_PENALTY;
+            reasons.push(`avoided repo: ${c.issue.repo}`);
+        }
+        if (reasons.length === 0)
+            return c;
+        return { ...c, personalization: { kind: "boosted", score, reasons } };
+    });
 }
 /**
  * Apply a diversity-counterweight pass over a pre-sorted candidate list
@@ -108,10 +145,13 @@ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
             break;
         if (seen.has(c.issue.url))
             continue;
-        if (c.boostScore && c.boostScore > 0)
+        // Diversity slots are for candidates that matched NO personalization bias.
+        // Exclude both boosted (>0) and avoided (<0) candidates — resurfacing an
+        // avoided repo via a diversity slot would defeat the avoid (#168).
+        if (boostScoreOf(c) !== 0)
             continue;
-        c.diversitySlot = true;
-        picks.push(c);
+        // Tag a shallow copy rather than mutating the shared candidate (#158).
+        picks.push({ ...c, personalization: { kind: "diversity" } });
         seen.add(c.issue.url);
     }
     for (const c of candidates) {

package/dist/core/preference-fields.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * Shared preference-field metadata and value parsing.
+ *
+ * The CLI (`commands/config.ts`) and the MCP `config-set` tool both update a
+ * single preference from a raw string. They used to carry separate, drifting
+ * copies of the key tables and parse logic — the CLI was missing the SLM
+ * triage keys, the MCP side lacked the `scope` special case and the +/- array
+ * syntax. This module is the single source of truth both drive (#153).
+ */
+import type { ScoutPreferences } from "./schemas.js";
+export type FieldConfig = {
+    type: "array" | "number" | "float" | "boolean" | "string";
+} | {
+    type: "enum" | "enum-array";
+    validValues: readonly string[];
+};
+export declare const FIELD_CONFIGS: Record<string, FieldConfig>;
+/**
+ * Every configurable preference key, derived from the schema so a new
+ * preference can't be silently left unconfigurable. `assertFieldConfigsCover`
+ * (exercised by a unit test) fails loudly if FIELD_CONFIGS drifts from this.
+ */
+export declare const PREFERENCE_KEYS: readonly string[];
+/** Sorted key list for "unknown key" error messages and help text. */
+export declare const SORTED_PREFERENCE_KEYS: readonly string[];
+/**
+ * Throw if any schema preference lacks a FIELD_CONFIG entry. Called from a
+ * test so adding a preference to the schema without teaching config-set how to
+ * parse it is caught in CI rather than at a user's first `config set newKey`.
+ */
+export declare function assertFieldConfigsCover(): void;
+/**
+ * Apply an array update: plain set, +append, or -remove.
+ *
+ * The -remove form starts with a dash, which commander rejects as an unknown
+ * option unless escaped: `config set excludeRepos -- "-spam/repo"`. The MCP
+ * tool has no commander layer so it can pass `-spam/repo` directly. Documented
+ * in the CLI help and README (#132).
+ */
+export declare function updateArray(current: string[], value: string): string[];
+/**
+ * Apply a single key/value update to a preferences object and return the
+ * fully validated result. The raw string `value` is the form both the CLI and
+ * the MCP tool receive; arrays accept comma-separated values and the +add /
+ * -remove syntax. Throws ValidationError on an unknown key or a bad value.
+ */
+export declare function applyPreferenceField(preferences: ScoutPreferences, key: string, value: string): ScoutPreferences;

package/dist/core/preference-fields.js ADDED Viewed

@@ -0,0 +1,180 @@
+/**
+ * Shared preference-field metadata and value parsing.
+ *
+ * The CLI (`commands/config.ts`) and the MCP `config-set` tool both update a
+ * single preference from a raw string. They used to carry separate, drifting
+ * copies of the key tables and parse logic — the CLI was missing the SLM
+ * triage keys, the MCP side lacked the `scope` special case and the +/- array
+ * syntax. This module is the single source of truth both drive (#153).
+ */
+import { ScoutPreferencesSchema, IssueScopeSchema, ProjectCategorySchema, PersistenceModeSchema, SearchStrategySchema, } from "./schemas.js";
+import { ValidationError } from "./errors.js";
+export const FIELD_CONFIGS = {
+    githubUsername: { type: "string" },
+    languages: { type: "array" },
+    labels: { type: "array" },
+    scope: { type: "enum-array", validValues: IssueScopeSchema.options },
+    excludeRepos: { type: "array" },
+    excludeOrgs: { type: "array" },
+    aiPolicyBlocklist: { type: "array" },
+    projectCategories: {
+        type: "enum-array",
+        validValues: ProjectCategorySchema.options,
+    },
+    minStars: { type: "number" },
+    maxIssueAgeDays: { type: "number" },
+    includeDocIssues: { type: "boolean" },
+    minRepoScoreThreshold: { type: "number" },
+    interPhaseDelayMs: { type: "number" },
+    persistence: { type: "enum", validValues: PersistenceModeSchema.options },
+    defaultStrategy: {
+        type: "enum-array",
+        validValues: SearchStrategySchema.options,
+    },
+    broadPhaseDelayMs: { type: "number" },
+    skipBroadWhenSufficientResults: { type: "number" },
+    preferLanguages: { type: "array" },
+    preferRepos: { type: "array" },
+    diversityRatio: { type: "float" },
+    avoidRepos: { type: "array" },
+    boostIssueTypes: { type: "array" },
+    slmTriageModel: { type: "string" },
+    slmTriageHost: { type: "string" },
+    featuresAnchorThreshold: { type: "number" },
+    featuresSplitRatio: { type: "float" },
+};
+/**
+ * Every configurable preference key, derived from the schema so a new
+ * preference can't be silently left unconfigurable. `assertFieldConfigsCover`
+ * (exercised by a unit test) fails loudly if FIELD_CONFIGS drifts from this.
+ */
+export const PREFERENCE_KEYS = Object.keys(ScoutPreferencesSchema.shape);
+/** Sorted key list for "unknown key" error messages and help text. */
+export const SORTED_PREFERENCE_KEYS = [
+    ...PREFERENCE_KEYS,
+].sort();
+/**
+ * Throw if any schema preference lacks a FIELD_CONFIG entry. Called from a
+ * test so adding a preference to the schema without teaching config-set how to
+ * parse it is caught in CI rather than at a user's first `config set newKey`.
+ */
+export function assertFieldConfigsCover() {
+    const missing = PREFERENCE_KEYS.filter((k) => !(k in FIELD_CONFIGS));
+    if (missing.length > 0) {
+        throw new Error(`FIELD_CONFIGS is missing entries for preference keys: ${missing.join(", ")}`);
+    }
+    const extra = Object.keys(FIELD_CONFIGS).filter((k) => !PREFERENCE_KEYS.includes(k));
+    if (extra.length > 0) {
+        throw new Error(`FIELD_CONFIGS has entries for unknown preference keys: ${extra.join(", ")}`);
+    }
+}
+function parseBoolean(value) {
+    const lower = value.toLowerCase();
+    if (lower === "true" || lower === "yes")
+        return true;
+    if (lower === "false" || lower === "no")
+        return false;
+    throw new ValidationError(`Invalid boolean value: "${value}". Use true/false or yes/no.`);
+}
+function parseIntValue(value, key) {
+    const num = parseInt(value, 10);
+    if (isNaN(num)) {
+        throw new ValidationError(`Invalid number for "${key}": "${value}"`);
+    }
+    return num;
+}
+function parseFloatValue(value, key) {
+    const num = Number.parseFloat(value);
+    if (isNaN(num)) {
+        throw new ValidationError(`Invalid number for "${key}": "${value}"`);
+    }
+    return num;
+}
+function parseArrayValue(value) {
+    return value
+        .split(",")
+        .map((s) => s.trim())
+        .filter((s) => s.length > 0);
+}
+/**
+ * Apply an array update: plain set, +append, or -remove.
+ *
+ * The -remove form starts with a dash, which commander rejects as an unknown
+ * option unless escaped: `config set excludeRepos -- "-spam/repo"`. The MCP
+ * tool has no commander layer so it can pass `-spam/repo` directly. Documented
+ * in the CLI help and README (#132).
+ */
+export function updateArray(current, value) {
+    if (value.startsWith("+")) {
+        const toAdd = parseArrayValue(value.slice(1));
+        const merged = [...current];
+        for (const item of toAdd) {
+            if (!merged.includes(item))
+                merged.push(item);
+        }
+        return merged;
+    }
+    if (value.startsWith("-")) {
+        const toRemove = new Set(parseArrayValue(value.slice(1)));
+        return current.filter((item) => !toRemove.has(item));
+    }
+    return parseArrayValue(value);
+}
+/**
+ * Apply a single key/value update to a preferences object and return the
+ * fully validated result. The raw string `value` is the form both the CLI and
+ * the MCP tool receive; arrays accept comma-separated values and the +add /
+ * -remove syntax. Throws ValidationError on an unknown key or a bad value.
+ */
+export function applyPreferenceField(preferences, key, value) {
+    const field = FIELD_CONFIGS[key];
+    if (!field) {
+        throw new ValidationError(`Unknown config key: "${key}". Valid keys: ${SORTED_PREFERENCE_KEYS.join(", ")}`);
+    }
+    const prefs = { ...preferences };
+    switch (field.type) {
+        case "string":
+            prefs[key] = value;
+            break;
+        case "boolean":
+            prefs[key] = parseBoolean(value);
+            break;
+        case "number":
+            prefs[key] = parseIntValue(value, key);
+            break;
+        case "float":
+            prefs[key] = parseFloatValue(value, key);
+            break;
+        case "array": {
+            const current = prefs[key] ?? [];
+            prefs[key] = updateArray(current, value);
+            break;
+        }
+        case "enum": {
+            const validValues = field.validValues;
+            if (!validValues.includes(value)) {
+                throw new ValidationError(`Invalid value for "${key}": "${value}". Valid: ${validValues.join(", ")}`);
+            }
+            prefs[key] = value;
+            break;
+        }
+        case "enum-array": {
+            const current = prefs[key] ?? [];
+            const updated = updateArray(current, value);
+            const validValues = field.validValues;
+            const invalid = updated.filter((s) => !validValues.includes(s));
+            if (invalid.length > 0) {
+                throw new ValidationError(`Invalid value(s) for "${key}": ${invalid.join(", ")}. Valid: ${validValues.join(", ")}`);
+            }
+            // For 'scope', an empty array means undefined (all scopes).
+            if (key === "scope") {
+                prefs[key] = updated.length > 0 ? updated : undefined;
+            }
+            else {
+                prefs[key] = updated;
+            }
+            break;
+        }
+    }
+    return ScoutPreferencesSchema.parse(prefs);
+}

package/dist/core/probe-repo-file.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * Single-path repo-file probe (#156).
+ *
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
+ * genuinely-shared primitive.
+ *
+ * The orchestration around it stays per-caller (parallel 4-path probe,
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
+ * shared.
+ *
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
+ * file absent) from a degraded miss (5xx, network) so callers can decide
+ * whether to cache a negative result or leave it open to retry. Collapsing the
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
+ * primitive must keep them separate.
+ */
+import type { Octokit } from "@octokit/rest";
+/**
+ * Result of probing one repo file path.
+ *
+ * - `text` — decoded UTF-8 content on a 200 with a file payload, else `null`
+ *   (404, a non-content payload such as a directory listing, or a soft error).
+ * - `transient` — `true` only when the miss was a degraded failure (5xx,
+ *   network) rather than a clean 404 / missing file. A `true` value means the
+ *   `null` may be incomplete and the caller should avoid caching it as a known
+ *   absence.
+ */
+export interface ProbeRepoFileResult {
+    text: string | null;
+    transient: boolean;
+}
+/**
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
+ * rate limit) so the caller's existing rate-limit handling sees them.
+ *
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
+ * a faster path may have already resolved) must inspect the rejected reasons
+ * themselves; this primitive only rethrows for the single path it owns. See
+ * repo-health and anti-llm-policy for that pre-scan.
+ */
+export declare function probeRepoFile(octokit: Octokit, owner: string, repo: string, path: string): Promise<ProbeRepoFileResult>;

package/dist/core/probe-repo-file.js ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Single-path repo-file probe (#156).
+ *
+ * Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
+ * repo doc by trying a list of candidate paths and stopping at the first hit.
+ * The per-path fetch was copy-pasted three times, each re-deriving the same
+ * 404-continue / fatal-propagate / base64-decode logic. This is the one
+ * genuinely-shared primitive.
+ *
+ * The orchestration around it stays per-caller (parallel 4-path probe,
+ * sequential 5-path probe, sequential family probe) and so do the return shapes
+ * (parsed guidelines, issue-ref set, policy scan). Only the single GET is
+ * shared.
+ *
+ * The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
+ * file absent) from a degraded miss (5xx, network) so callers can decide
+ * whether to cache a negative result or leave it open to retry. Collapsing the
+ * two would bypass anti-llm-policy's transient-failure cache safeguard, so the
+ * primitive must keep them separate.
+ */
+import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
+import { warn } from "./logger.js";
+const MODULE = "probe-repo-file";
+/**
+ * GET one repo file path. Returns decoded content on a 200 file payload, a
+ * clean `null` on 404 or a non-content payload, and a transient `null` on a
+ * soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
+ * rate limit) so the caller's existing rate-limit handling sees them.
+ *
+ * Callers that need 401/rate-limit to surface across a *parallel* batch (where
+ * a faster path may have already resolved) must inspect the rejected reasons
+ * themselves; this primitive only rethrows for the single path it owns. See
+ * repo-health and anti-llm-policy for that pre-scan.
+ */
+export async function probeRepoFile(octokit, owner, repo, path) {
+    try {
+        const { data } = await octokit.repos.getContent({ owner, repo, path });
+        if (data &&
+            typeof data === "object" &&
+            "content" in data &&
+            typeof data.content === "string") {
+            return {
+                text: Buffer.from(data.content, "base64").toString("utf-8"),
+                transient: false,
+            };
+        }
+        return { text: null, transient: false };
+    }
+    catch (error) {
+        const status = getHttpStatusCode(error);
+        if (status === 404)
+            return { text: null, transient: false };
+        rethrowIfFatal(error);
+        warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
+        return { text: null, transient: true };
+    }
+}

package/dist/core/repo-health.js CHANGED Viewed

@@ -5,9 +5,10 @@
  * from issue-level eligibility logic.
  */
 import { daysBetween } from "./utils.js";
-import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
+import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
 import { warn } from "./logger.js";
 import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
+import { probeRepoFile } from "./probe-repo-file.js";
 const MODULE = "repo-health";
 // ── Cache for contribution guidelines ──
 const guidelinesCache = new Map();
@@ -73,19 +74,14 @@ export async function checkProjectHealth(octokit, owner, repo) {
         });
     }
     catch (error) {
-        if (getHttpStatusCode(error) === 401 || isRateLimitError(error)) {
-            throw error;
-        }
+        rethrowIfFatal(error);
         const errMsg = errorMessage(error);
         warn(MODULE, `Error checking project health for ${owner}/${repo}: ${errMsg}`);
+        // The check failed: only the repo and the reason are known. The
+        // discriminated ProjectHealth type intentionally has no place for the
+        // neutral-default snapshot fields this used to fabricate (#158).
         return {
             repo: `${owner}/${repo}`,
-            lastCommitAt: "",
-            daysSinceLastCommit: 999,
-            openIssuesCount: 0,
-            avgIssueResponseDays: 0,
-            ciStatus: "unknown",
-            isActive: false,
             checkFailed: true,
             failureReason: errMsg,
         };
@@ -104,19 +100,33 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
     if (cached && Date.now() - cached.fetchedAt < CACHE_TTL_MS) {
         return cached.guidelines;
     }
+    // Concurrent vets of issues from one repo share a single probe (#124)
+    const inflight = guidelinesInflight.get(cacheKey);
+    if (inflight)
+        return inflight;
+    const promise = fetchContributionGuidelinesUncached(octokit, owner, repo);
+    guidelinesInflight.set(cacheKey, promise);
+    try {
+        return await promise;
+    }
+    finally {
+        guidelinesInflight.delete(cacheKey);
+    }
+}
+const guidelinesInflight = new Map();
+async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
+    const cacheKey = `${owner}/${repo}`;
     const filesToCheck = [
         "CONTRIBUTING.md",
         ".github/CONTRIBUTING.md",
         "docs/CONTRIBUTING.md",
         "contributing.md",
     ];
-    // Probe all paths in parallel — take the first success in priority order
-    const results = await Promise.allSettled(filesToCheck.map((file) => octokit.repos.getContent({ owner, repo, path: file }).then(({ data }) => {
-        if ("content" in data) {
-            return Buffer.from(data.content, "base64").toString("utf-8");
-        }
-        return null;
-    })));
+    // Probe all paths in parallel — take the first success in priority order.
+    // probeRepoFile rethrows 401/rate-limit, so those still surface here as
+    // rejected results for the pre-scan below; 404s and 5xx come back as a null
+    // text (the primitive warns on 5xx, so no extra warn is needed here).
+    const results = await Promise.allSettled(filesToCheck.map((file) => probeRepoFile(octokit, owner, repo, file)));
     // Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
     // otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
     // wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
@@ -128,20 +138,13 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
             throw result.reason;
         }
     }
-    for (let i = 0; i < results.length; i++) {
-        const result = results[i];
-        if (result.status === "fulfilled" && result.value) {
-            const guidelines = parseContributionGuidelines(result.value);
+    for (const result of results) {
+        if (result.status === "fulfilled" && result.value.text) {
+            const guidelines = parseContributionGuidelines(result.value.text);
             guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
             pruneCache();
             return guidelines;
         }
-        if (result.status === "rejected") {
-            const status = getHttpStatusCode(result.reason);
-            if (status !== 404) {
-                warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
-            }
-        }
     }
     // Cache the negative result too and prune if needed
     guidelinesCache.set(cacheKey, {
@@ -160,9 +163,13 @@ function parseContributionGuidelines(content) {
         rawContent: content,
     };
     const lowerContent = content.toLowerCase();
-    // Detect branch naming conventions
+    // Detect branch naming conventions. CONTRIBUTING.md is attacker-controlled
+    // (it belongs to the repo being vetted): the unbounded [^\n]* pair forced
+    // quadratic backtracking on a long quote-less line, stalling the vet
+    // (#152). Bounded quantifiers keep the scan linear-ish; real conventions
+    // sit well inside 200 chars of their keyword.
     if (lowerContent.includes("branch")) {
-        const branchMatch = content.match(/branch[^\n]*(?:named?|format|convention)[^\n]*[`"]([^`"]+)[`"]/i);
+        const branchMatch = content.match(/branch[^\n]{0,200}?(?:named?|format|convention)[^\n]{0,200}?[`"]([^`"\n]{1,100})[`"]/i);
         if (branchMatch) {
             guidelines.branchNamingConvention = branchMatch[1];
         }
@@ -172,7 +179,7 @@ function parseContributionGuidelines(content) {
         guidelines.commitMessageFormat = "conventional commits";
     }
     else if (lowerContent.includes("commit message")) {
-        const commitMatch = content.match(/commit message[^\n]*[`"]([^`"]+)[`"]/i);
+        const commitMatch = content.match(/commit message[^\n]{0,200}?[`"]([^`"\n]{1,100})[`"]/i);
         if (commitMatch) {
             guidelines.commitMessageFormat = commitMatch[1];
         }
@@ -193,8 +200,9 @@ function parseContributionGuidelines(content) {
         guidelines.linter = "RuboCop";
     else if (lowerContent.includes("prettier"))
         guidelines.formatter = "Prettier";
-    // Detect CLA requirement
-    if (lowerContent.includes("cla") ||
+    // Detect CLA requirement. Word boundary matters: a bare substring check
+    // matches "class", "clang", "clarify", etc. and flags nearly every doc.
+    if (/\bcla\b/.test(lowerContent) ||
         lowerContent.includes("contributor license agreement")) {
         guidelines.claRequired = true;
     }