@oss-scout/core 0.11.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.bundle.cjs +89 -66
- package/dist/cli.js +302 -436
- package/dist/commands/command-scout.d.ts +21 -0
- package/dist/commands/command-scout.js +21 -0
- package/dist/commands/config.js +10 -128
- package/dist/commands/features.js +15 -28
- package/dist/commands/results.d.ts +13 -2
- package/dist/commands/results.js +29 -2
- package/dist/commands/search.d.ts +4 -0
- package/dist/commands/search.js +65 -70
- package/dist/commands/setup.d.ts +2 -0
- package/dist/commands/setup.js +35 -6
- package/dist/commands/skip.d.ts +4 -0
- package/dist/commands/skip.js +45 -55
- package/dist/commands/sync.d.ts +10 -0
- package/dist/commands/sync.js +10 -0
- package/dist/commands/vet-list.js +3 -19
- package/dist/commands/vet.js +18 -25
- package/dist/commands/with-scout.d.ts +32 -0
- package/dist/commands/with-scout.js +41 -0
- package/dist/core/anti-llm-policy.js +5 -33
- package/dist/core/bootstrap.d.ts +2 -2
- package/dist/core/bootstrap.js +5 -9
- package/dist/core/errors.d.ts +10 -0
- package/dist/core/errors.js +20 -5
- package/dist/core/feature-discovery.d.ts +13 -1
- package/dist/core/feature-discovery.js +104 -81
- package/dist/core/gist-state-store.d.ts +13 -12
- package/dist/core/gist-state-store.js +128 -53
- package/dist/core/http-cache.d.ts +32 -2
- package/dist/core/http-cache.js +74 -19
- package/dist/core/issue-discovery.d.ts +12 -1
- package/dist/core/issue-discovery.js +94 -67
- package/dist/core/issue-eligibility.d.ts +11 -4
- package/dist/core/issue-eligibility.js +124 -69
- package/dist/core/issue-graphql.d.ts +58 -0
- package/dist/core/issue-graphql.js +108 -0
- package/dist/core/issue-vetting.d.ts +115 -9
- package/dist/core/issue-vetting.js +246 -109
- package/dist/core/local-state.d.ts +6 -2
- package/dist/core/local-state.js +23 -5
- package/dist/core/logger.d.ts +12 -4
- package/dist/core/logger.js +33 -7
- package/dist/core/personalization.d.ts +30 -10
- package/dist/core/personalization.js +64 -24
- package/dist/core/preference-fields.d.ts +47 -0
- package/dist/core/preference-fields.js +180 -0
- package/dist/core/probe-repo-file.d.ts +47 -0
- package/dist/core/probe-repo-file.js +57 -0
- package/dist/core/repo-health.js +40 -32
- package/dist/core/roadmap.js +26 -22
- package/dist/core/schemas.d.ts +148 -26
- package/dist/core/schemas.js +83 -17
- package/dist/core/search-budget.d.ts +9 -0
- package/dist/core/search-budget.js +36 -3
- package/dist/core/search-phases.d.ts +4 -21
- package/dist/core/search-phases.js +37 -89
- package/dist/core/types.d.ts +151 -38
- package/dist/core/utils.js +60 -26
- package/dist/formatters/human.d.ts +60 -0
- package/dist/formatters/human.js +199 -0
- package/dist/formatters/markdown.d.ts +10 -0
- package/dist/formatters/markdown.js +31 -0
- package/dist/index.d.ts +6 -2
- package/dist/index.js +8 -0
- package/dist/scout.d.ts +75 -12
- package/dist/scout.js +265 -26
- package/package.json +1 -1
|
@@ -27,19 +27,39 @@ import type { IssueCandidate } from "./types.js";
|
|
|
27
27
|
*/
|
|
28
28
|
export declare const REPO_BOOST = 20;
|
|
29
29
|
export declare const LANGUAGE_BOOST = 10;
|
|
30
|
+
/** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
|
|
31
|
+
export declare const ISSUE_TYPE_BOOST = 10;
|
|
30
32
|
/**
|
|
31
|
-
*
|
|
32
|
-
* the
|
|
33
|
-
*
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
* Soft penalty for an avoidRepos match (#168). Milder than the hard
|
|
34
|
+
* excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
|
|
35
|
+
* preferRepos affinity, +20) can still outweigh it.
|
|
36
|
+
*/
|
|
37
|
+
export declare const AVOID_PENALTY = 15;
|
|
38
|
+
/** Per-call personalization bias lists (#168). All optional; empty = no effect. */
|
|
39
|
+
export interface PersonalizationBias {
|
|
40
|
+
preferLanguages?: string[];
|
|
41
|
+
preferRepos?: string[];
|
|
42
|
+
avoidRepos?: string[];
|
|
43
|
+
boostIssueTypes?: string[];
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* The personalization sort weight of a candidate: its net score, or 0 when it
|
|
47
|
+
* carries no personalization marker. Reads the structural `personalization`
|
|
48
|
+
* field (#158). The score can be negative when avoidRepos applied (#168).
|
|
49
|
+
*/
|
|
50
|
+
export declare function boostScoreOf(candidate: IssueCandidate): number;
|
|
51
|
+
/**
|
|
52
|
+
* Return a new candidate list where each candidate matching a caller-supplied
|
|
53
|
+
* bias carries a `personalization` marker with a NET score (#168): preferRepos,
|
|
54
|
+
* preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
|
|
55
|
+
* be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
|
|
56
|
+
* Does NOT mutate the input (#158): matched candidates are shallow copies,
|
|
57
|
+
* unmatched ones pass through unchanged.
|
|
38
58
|
*
|
|
39
|
-
* No-op when
|
|
40
|
-
*
|
|
59
|
+
* No-op when every bias list is empty/undefined: the input array is returned
|
|
60
|
+
* as-is and the sort tier collapses to 0 for every candidate.
|
|
41
61
|
*/
|
|
42
|
-
export declare function annotateBoost(candidates: IssueCandidate[],
|
|
62
|
+
export declare function annotateBoost(candidates: IssueCandidate[], bias?: PersonalizationBias): IssueCandidate[];
|
|
43
63
|
/**
|
|
44
64
|
* Apply a diversity-counterweight pass over a pre-sorted candidate list
|
|
45
65
|
* (#1244). Returns the first `maxResults` picks in priority order:
|
|
@@ -26,40 +26,77 @@
|
|
|
26
26
|
*/
|
|
27
27
|
export const REPO_BOOST = 20;
|
|
28
28
|
export const LANGUAGE_BOOST = 10;
|
|
29
|
+
/** Soft boost for an issue-label ("issue type") match (#168). Language-tier. */
|
|
30
|
+
export const ISSUE_TYPE_BOOST = 10;
|
|
29
31
|
/**
|
|
30
|
-
*
|
|
31
|
-
* the
|
|
32
|
-
*
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
*
|
|
32
|
+
* Soft penalty for an avoidRepos match (#168). Milder than the hard
|
|
33
|
+
* excludeRepos filter: it pushes the candidate down but a strong boost (e.g. a
|
|
34
|
+
* preferRepos affinity, +20) can still outweigh it.
|
|
35
|
+
*/
|
|
36
|
+
export const AVOID_PENALTY = 15;
|
|
37
|
+
/**
|
|
38
|
+
* The personalization sort weight of a candidate: its net score, or 0 when it
|
|
39
|
+
* carries no personalization marker. Reads the structural `personalization`
|
|
40
|
+
* field (#158). The score can be negative when avoidRepos applied (#168).
|
|
41
|
+
*/
|
|
42
|
+
export function boostScoreOf(candidate) {
|
|
43
|
+
return candidate.personalization?.kind === "boosted"
|
|
44
|
+
? candidate.personalization.score
|
|
45
|
+
: 0;
|
|
46
|
+
}
|
|
47
|
+
function normalizeSet(values) {
|
|
48
|
+
return new Set((values ?? []).map((v) => v.trim().toLowerCase()).filter(Boolean));
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Return a new candidate list where each candidate matching a caller-supplied
|
|
52
|
+
* bias carries a `personalization` marker with a NET score (#168): preferRepos,
|
|
53
|
+
* preferLanguages and boostIssueTypes add; avoidRepos subtracts. The score may
|
|
54
|
+
* be negative (avoid-only) — boostScoreOf sorts those below neutral candidates.
|
|
55
|
+
* Does NOT mutate the input (#158): matched candidates are shallow copies,
|
|
56
|
+
* unmatched ones pass through unchanged.
|
|
37
57
|
*
|
|
38
|
-
* No-op when
|
|
39
|
-
*
|
|
58
|
+
* No-op when every bias list is empty/undefined: the input array is returned
|
|
59
|
+
* as-is and the sort tier collapses to 0 for every candidate.
|
|
40
60
|
*/
|
|
41
|
-
export function annotateBoost(candidates,
|
|
42
|
-
const langSet =
|
|
43
|
-
const repoSet =
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
61
|
+
export function annotateBoost(candidates, bias = {}) {
|
|
62
|
+
const langSet = normalizeSet(bias.preferLanguages);
|
|
63
|
+
const repoSet = normalizeSet(bias.preferRepos);
|
|
64
|
+
const avoidSet = normalizeSet(bias.avoidRepos);
|
|
65
|
+
const typeSet = normalizeSet(bias.boostIssueTypes);
|
|
66
|
+
if (langSet.size === 0 &&
|
|
67
|
+
repoSet.size === 0 &&
|
|
68
|
+
avoidSet.size === 0 &&
|
|
69
|
+
typeSet.size === 0) {
|
|
70
|
+
return candidates;
|
|
71
|
+
}
|
|
72
|
+
return candidates.map((c) => {
|
|
47
73
|
let score = 0;
|
|
48
74
|
const reasons = [];
|
|
49
|
-
|
|
75
|
+
const repoLower = c.issue.repo.toLowerCase();
|
|
76
|
+
if (repoSet.size > 0 && repoSet.has(repoLower)) {
|
|
50
77
|
score += REPO_BOOST;
|
|
51
78
|
reasons.push(`repo affinity: ${c.issue.repo}`);
|
|
52
79
|
}
|
|
53
|
-
const lang = c.projectHealth.language;
|
|
80
|
+
const lang = c.projectHealth.checkFailed ? null : c.projectHealth.language;
|
|
54
81
|
if (langSet.size > 0 && lang && langSet.has(lang.toLowerCase())) {
|
|
55
82
|
score += LANGUAGE_BOOST;
|
|
56
83
|
reasons.push(`language match: ${lang}`);
|
|
57
84
|
}
|
|
58
|
-
if (
|
|
59
|
-
c.
|
|
60
|
-
|
|
85
|
+
if (typeSet.size > 0) {
|
|
86
|
+
const matched = c.issue.labels.find((l) => typeSet.has(l.toLowerCase()));
|
|
87
|
+
if (matched) {
|
|
88
|
+
score += ISSUE_TYPE_BOOST;
|
|
89
|
+
reasons.push(`issue type: ${matched}`);
|
|
90
|
+
}
|
|
61
91
|
}
|
|
62
|
-
|
|
92
|
+
if (avoidSet.size > 0 && avoidSet.has(repoLower)) {
|
|
93
|
+
score -= AVOID_PENALTY;
|
|
94
|
+
reasons.push(`avoided repo: ${c.issue.repo}`);
|
|
95
|
+
}
|
|
96
|
+
if (reasons.length === 0)
|
|
97
|
+
return c;
|
|
98
|
+
return { ...c, personalization: { kind: "boosted", score, reasons } };
|
|
99
|
+
});
|
|
63
100
|
}
|
|
64
101
|
/**
|
|
65
102
|
* Apply a diversity-counterweight pass over a pre-sorted candidate list
|
|
@@ -108,10 +145,13 @@ export function applyDiversityRatio(candidates, maxResults, diversityRatio) {
|
|
|
108
145
|
break;
|
|
109
146
|
if (seen.has(c.issue.url))
|
|
110
147
|
continue;
|
|
111
|
-
|
|
148
|
+
// Diversity slots are for candidates that matched NO personalization bias.
|
|
149
|
+
// Exclude both boosted (>0) and avoided (<0) candidates — resurfacing an
|
|
150
|
+
// avoided repo via a diversity slot would defeat the avoid (#168).
|
|
151
|
+
if (boostScoreOf(c) !== 0)
|
|
112
152
|
continue;
|
|
113
|
-
|
|
114
|
-
picks.push(c);
|
|
153
|
+
// Tag a shallow copy rather than mutating the shared candidate (#158).
|
|
154
|
+
picks.push({ ...c, personalization: { kind: "diversity" } });
|
|
115
155
|
seen.add(c.issue.url);
|
|
116
156
|
}
|
|
117
157
|
for (const c of candidates) {
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared preference-field metadata and value parsing.
|
|
3
|
+
*
|
|
4
|
+
* The CLI (`commands/config.ts`) and the MCP `config-set` tool both update a
|
|
5
|
+
* single preference from a raw string. They used to carry separate, drifting
|
|
6
|
+
* copies of the key tables and parse logic — the CLI was missing the SLM
|
|
7
|
+
* triage keys, the MCP side lacked the `scope` special case and the +/- array
|
|
8
|
+
* syntax. This module is the single source of truth both drive (#153).
|
|
9
|
+
*/
|
|
10
|
+
import type { ScoutPreferences } from "./schemas.js";
|
|
11
|
+
export type FieldConfig = {
|
|
12
|
+
type: "array" | "number" | "float" | "boolean" | "string";
|
|
13
|
+
} | {
|
|
14
|
+
type: "enum" | "enum-array";
|
|
15
|
+
validValues: readonly string[];
|
|
16
|
+
};
|
|
17
|
+
export declare const FIELD_CONFIGS: Record<string, FieldConfig>;
|
|
18
|
+
/**
|
|
19
|
+
* Every configurable preference key, derived from the schema so a new
|
|
20
|
+
* preference can't be silently left unconfigurable. `assertFieldConfigsCover`
|
|
21
|
+
* (exercised by a unit test) fails loudly if FIELD_CONFIGS drifts from this.
|
|
22
|
+
*/
|
|
23
|
+
export declare const PREFERENCE_KEYS: readonly string[];
|
|
24
|
+
/** Sorted key list for "unknown key" error messages and help text. */
|
|
25
|
+
export declare const SORTED_PREFERENCE_KEYS: readonly string[];
|
|
26
|
+
/**
|
|
27
|
+
* Throw if any schema preference lacks a FIELD_CONFIG entry. Called from a
|
|
28
|
+
* test so adding a preference to the schema without teaching config-set how to
|
|
29
|
+
* parse it is caught in CI rather than at a user's first `config set newKey`.
|
|
30
|
+
*/
|
|
31
|
+
export declare function assertFieldConfigsCover(): void;
|
|
32
|
+
/**
|
|
33
|
+
* Apply an array update: plain set, +append, or -remove.
|
|
34
|
+
*
|
|
35
|
+
* The -remove form starts with a dash, which commander rejects as an unknown
|
|
36
|
+
* option unless escaped: `config set excludeRepos -- "-spam/repo"`. The MCP
|
|
37
|
+
* tool has no commander layer so it can pass `-spam/repo` directly. Documented
|
|
38
|
+
* in the CLI help and README (#132).
|
|
39
|
+
*/
|
|
40
|
+
export declare function updateArray(current: string[], value: string): string[];
|
|
41
|
+
/**
|
|
42
|
+
* Apply a single key/value update to a preferences object and return the
|
|
43
|
+
* fully validated result. The raw string `value` is the form both the CLI and
|
|
44
|
+
* the MCP tool receive; arrays accept comma-separated values and the +add /
|
|
45
|
+
* -remove syntax. Throws ValidationError on an unknown key or a bad value.
|
|
46
|
+
*/
|
|
47
|
+
export declare function applyPreferenceField(preferences: ScoutPreferences, key: string, value: string): ScoutPreferences;
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared preference-field metadata and value parsing.
|
|
3
|
+
*
|
|
4
|
+
* The CLI (`commands/config.ts`) and the MCP `config-set` tool both update a
|
|
5
|
+
* single preference from a raw string. They used to carry separate, drifting
|
|
6
|
+
* copies of the key tables and parse logic — the CLI was missing the SLM
|
|
7
|
+
* triage keys, the MCP side lacked the `scope` special case and the +/- array
|
|
8
|
+
* syntax. This module is the single source of truth both drive (#153).
|
|
9
|
+
*/
|
|
10
|
+
import { ScoutPreferencesSchema, IssueScopeSchema, ProjectCategorySchema, PersistenceModeSchema, SearchStrategySchema, } from "./schemas.js";
|
|
11
|
+
import { ValidationError } from "./errors.js";
|
|
12
|
+
export const FIELD_CONFIGS = {
|
|
13
|
+
githubUsername: { type: "string" },
|
|
14
|
+
languages: { type: "array" },
|
|
15
|
+
labels: { type: "array" },
|
|
16
|
+
scope: { type: "enum-array", validValues: IssueScopeSchema.options },
|
|
17
|
+
excludeRepos: { type: "array" },
|
|
18
|
+
excludeOrgs: { type: "array" },
|
|
19
|
+
aiPolicyBlocklist: { type: "array" },
|
|
20
|
+
projectCategories: {
|
|
21
|
+
type: "enum-array",
|
|
22
|
+
validValues: ProjectCategorySchema.options,
|
|
23
|
+
},
|
|
24
|
+
minStars: { type: "number" },
|
|
25
|
+
maxIssueAgeDays: { type: "number" },
|
|
26
|
+
includeDocIssues: { type: "boolean" },
|
|
27
|
+
minRepoScoreThreshold: { type: "number" },
|
|
28
|
+
interPhaseDelayMs: { type: "number" },
|
|
29
|
+
persistence: { type: "enum", validValues: PersistenceModeSchema.options },
|
|
30
|
+
defaultStrategy: {
|
|
31
|
+
type: "enum-array",
|
|
32
|
+
validValues: SearchStrategySchema.options,
|
|
33
|
+
},
|
|
34
|
+
broadPhaseDelayMs: { type: "number" },
|
|
35
|
+
skipBroadWhenSufficientResults: { type: "number" },
|
|
36
|
+
preferLanguages: { type: "array" },
|
|
37
|
+
preferRepos: { type: "array" },
|
|
38
|
+
diversityRatio: { type: "float" },
|
|
39
|
+
avoidRepos: { type: "array" },
|
|
40
|
+
boostIssueTypes: { type: "array" },
|
|
41
|
+
slmTriageModel: { type: "string" },
|
|
42
|
+
slmTriageHost: { type: "string" },
|
|
43
|
+
featuresAnchorThreshold: { type: "number" },
|
|
44
|
+
featuresSplitRatio: { type: "float" },
|
|
45
|
+
};
|
|
46
|
+
/**
|
|
47
|
+
* Every configurable preference key, derived from the schema so a new
|
|
48
|
+
* preference can't be silently left unconfigurable. `assertFieldConfigsCover`
|
|
49
|
+
* (exercised by a unit test) fails loudly if FIELD_CONFIGS drifts from this.
|
|
50
|
+
*/
|
|
51
|
+
export const PREFERENCE_KEYS = Object.keys(ScoutPreferencesSchema.shape);
|
|
52
|
+
/** Sorted key list for "unknown key" error messages and help text. */
|
|
53
|
+
export const SORTED_PREFERENCE_KEYS = [
|
|
54
|
+
...PREFERENCE_KEYS,
|
|
55
|
+
].sort();
|
|
56
|
+
/**
|
|
57
|
+
* Throw if any schema preference lacks a FIELD_CONFIG entry. Called from a
|
|
58
|
+
* test so adding a preference to the schema without teaching config-set how to
|
|
59
|
+
* parse it is caught in CI rather than at a user's first `config set newKey`.
|
|
60
|
+
*/
|
|
61
|
+
export function assertFieldConfigsCover() {
|
|
62
|
+
const missing = PREFERENCE_KEYS.filter((k) => !(k in FIELD_CONFIGS));
|
|
63
|
+
if (missing.length > 0) {
|
|
64
|
+
throw new Error(`FIELD_CONFIGS is missing entries for preference keys: ${missing.join(", ")}`);
|
|
65
|
+
}
|
|
66
|
+
const extra = Object.keys(FIELD_CONFIGS).filter((k) => !PREFERENCE_KEYS.includes(k));
|
|
67
|
+
if (extra.length > 0) {
|
|
68
|
+
throw new Error(`FIELD_CONFIGS has entries for unknown preference keys: ${extra.join(", ")}`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
function parseBoolean(value) {
|
|
72
|
+
const lower = value.toLowerCase();
|
|
73
|
+
if (lower === "true" || lower === "yes")
|
|
74
|
+
return true;
|
|
75
|
+
if (lower === "false" || lower === "no")
|
|
76
|
+
return false;
|
|
77
|
+
throw new ValidationError(`Invalid boolean value: "${value}". Use true/false or yes/no.`);
|
|
78
|
+
}
|
|
79
|
+
function parseIntValue(value, key) {
|
|
80
|
+
const num = parseInt(value, 10);
|
|
81
|
+
if (isNaN(num)) {
|
|
82
|
+
throw new ValidationError(`Invalid number for "${key}": "${value}"`);
|
|
83
|
+
}
|
|
84
|
+
return num;
|
|
85
|
+
}
|
|
86
|
+
function parseFloatValue(value, key) {
|
|
87
|
+
const num = Number.parseFloat(value);
|
|
88
|
+
if (isNaN(num)) {
|
|
89
|
+
throw new ValidationError(`Invalid number for "${key}": "${value}"`);
|
|
90
|
+
}
|
|
91
|
+
return num;
|
|
92
|
+
}
|
|
93
|
+
function parseArrayValue(value) {
|
|
94
|
+
return value
|
|
95
|
+
.split(",")
|
|
96
|
+
.map((s) => s.trim())
|
|
97
|
+
.filter((s) => s.length > 0);
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Apply an array update: plain set, +append, or -remove.
|
|
101
|
+
*
|
|
102
|
+
* The -remove form starts with a dash, which commander rejects as an unknown
|
|
103
|
+
* option unless escaped: `config set excludeRepos -- "-spam/repo"`. The MCP
|
|
104
|
+
* tool has no commander layer so it can pass `-spam/repo` directly. Documented
|
|
105
|
+
* in the CLI help and README (#132).
|
|
106
|
+
*/
|
|
107
|
+
export function updateArray(current, value) {
|
|
108
|
+
if (value.startsWith("+")) {
|
|
109
|
+
const toAdd = parseArrayValue(value.slice(1));
|
|
110
|
+
const merged = [...current];
|
|
111
|
+
for (const item of toAdd) {
|
|
112
|
+
if (!merged.includes(item))
|
|
113
|
+
merged.push(item);
|
|
114
|
+
}
|
|
115
|
+
return merged;
|
|
116
|
+
}
|
|
117
|
+
if (value.startsWith("-")) {
|
|
118
|
+
const toRemove = new Set(parseArrayValue(value.slice(1)));
|
|
119
|
+
return current.filter((item) => !toRemove.has(item));
|
|
120
|
+
}
|
|
121
|
+
return parseArrayValue(value);
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Apply a single key/value update to a preferences object and return the
|
|
125
|
+
* fully validated result. The raw string `value` is the form both the CLI and
|
|
126
|
+
* the MCP tool receive; arrays accept comma-separated values and the +add /
|
|
127
|
+
* -remove syntax. Throws ValidationError on an unknown key or a bad value.
|
|
128
|
+
*/
|
|
129
|
+
export function applyPreferenceField(preferences, key, value) {
|
|
130
|
+
const field = FIELD_CONFIGS[key];
|
|
131
|
+
if (!field) {
|
|
132
|
+
throw new ValidationError(`Unknown config key: "${key}". Valid keys: ${SORTED_PREFERENCE_KEYS.join(", ")}`);
|
|
133
|
+
}
|
|
134
|
+
const prefs = { ...preferences };
|
|
135
|
+
switch (field.type) {
|
|
136
|
+
case "string":
|
|
137
|
+
prefs[key] = value;
|
|
138
|
+
break;
|
|
139
|
+
case "boolean":
|
|
140
|
+
prefs[key] = parseBoolean(value);
|
|
141
|
+
break;
|
|
142
|
+
case "number":
|
|
143
|
+
prefs[key] = parseIntValue(value, key);
|
|
144
|
+
break;
|
|
145
|
+
case "float":
|
|
146
|
+
prefs[key] = parseFloatValue(value, key);
|
|
147
|
+
break;
|
|
148
|
+
case "array": {
|
|
149
|
+
const current = prefs[key] ?? [];
|
|
150
|
+
prefs[key] = updateArray(current, value);
|
|
151
|
+
break;
|
|
152
|
+
}
|
|
153
|
+
case "enum": {
|
|
154
|
+
const validValues = field.validValues;
|
|
155
|
+
if (!validValues.includes(value)) {
|
|
156
|
+
throw new ValidationError(`Invalid value for "${key}": "${value}". Valid: ${validValues.join(", ")}`);
|
|
157
|
+
}
|
|
158
|
+
prefs[key] = value;
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
case "enum-array": {
|
|
162
|
+
const current = prefs[key] ?? [];
|
|
163
|
+
const updated = updateArray(current, value);
|
|
164
|
+
const validValues = field.validValues;
|
|
165
|
+
const invalid = updated.filter((s) => !validValues.includes(s));
|
|
166
|
+
if (invalid.length > 0) {
|
|
167
|
+
throw new ValidationError(`Invalid value(s) for "${key}": ${invalid.join(", ")}. Valid: ${validValues.join(", ")}`);
|
|
168
|
+
}
|
|
169
|
+
// For 'scope', an empty array means undefined (all scopes).
|
|
170
|
+
if (key === "scope") {
|
|
171
|
+
prefs[key] = updated.length > 0 ? updated : undefined;
|
|
172
|
+
}
|
|
173
|
+
else {
|
|
174
|
+
prefs[key] = updated;
|
|
175
|
+
}
|
|
176
|
+
break;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return ScoutPreferencesSchema.parse(prefs);
|
|
180
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-path repo-file probe (#156).
|
|
3
|
+
*
|
|
4
|
+
* Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
|
|
5
|
+
* repo doc by trying a list of candidate paths and stopping at the first hit.
|
|
6
|
+
* The per-path fetch was copy-pasted three times, each re-deriving the same
|
|
7
|
+
* 404-continue / fatal-propagate / base64-decode logic. This is the one
|
|
8
|
+
* genuinely-shared primitive.
|
|
9
|
+
*
|
|
10
|
+
* The orchestration around it stays per-caller (parallel 4-path probe,
|
|
11
|
+
* sequential 5-path probe, sequential family probe) and so do the return shapes
|
|
12
|
+
* (parsed guidelines, issue-ref set, policy scan). Only the single GET is
|
|
13
|
+
* shared.
|
|
14
|
+
*
|
|
15
|
+
* The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
|
|
16
|
+
* file absent) from a degraded miss (5xx, network) so callers can decide
|
|
17
|
+
* whether to cache a negative result or leave it open to retry. Collapsing the
|
|
18
|
+
* two would bypass anti-llm-policy's transient-failure cache safeguard, so the
|
|
19
|
+
* primitive must keep them separate.
|
|
20
|
+
*/
|
|
21
|
+
import type { Octokit } from "@octokit/rest";
|
|
22
|
+
/**
|
|
23
|
+
* Result of probing one repo file path.
|
|
24
|
+
*
|
|
25
|
+
* - `text` — decoded UTF-8 content on a 200 with a file payload, else `null`
|
|
26
|
+
* (404, a non-content payload such as a directory listing, or a soft error).
|
|
27
|
+
* - `transient` — `true` only when the miss was a degraded failure (5xx,
|
|
28
|
+
* network) rather than a clean 404 / missing file. A `true` value means the
|
|
29
|
+
* `null` may be incomplete and the caller should avoid caching it as a known
|
|
30
|
+
* absence.
|
|
31
|
+
*/
|
|
32
|
+
export interface ProbeRepoFileResult {
|
|
33
|
+
text: string | null;
|
|
34
|
+
transient: boolean;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* GET one repo file path. Returns decoded content on a 200 file payload, a
|
|
38
|
+
* clean `null` on 404 or a non-content payload, and a transient `null` on a
|
|
39
|
+
* soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
|
|
40
|
+
* rate limit) so the caller's existing rate-limit handling sees them.
|
|
41
|
+
*
|
|
42
|
+
* Callers that need 401/rate-limit to surface across a *parallel* batch (where
|
|
43
|
+
* a faster path may have already resolved) must inspect the rejected reasons
|
|
44
|
+
* themselves; this primitive only rethrows for the single path it owns. See
|
|
45
|
+
* repo-health and anti-llm-policy for that pre-scan.
|
|
46
|
+
*/
|
|
47
|
+
export declare function probeRepoFile(octokit: Octokit, owner: string, repo: string, path: string): Promise<ProbeRepoFileResult>;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-path repo-file probe (#156).
|
|
3
|
+
*
|
|
4
|
+
* Three modules (repo-health, roadmap, anti-llm-policy) independently fetch a
|
|
5
|
+
* repo doc by trying a list of candidate paths and stopping at the first hit.
|
|
6
|
+
* The per-path fetch was copy-pasted three times, each re-deriving the same
|
|
7
|
+
* 404-continue / fatal-propagate / base64-decode logic. This is the one
|
|
8
|
+
* genuinely-shared primitive.
|
|
9
|
+
*
|
|
10
|
+
* The orchestration around it stays per-caller (parallel 4-path probe,
|
|
11
|
+
* sequential 5-path probe, sequential family probe) and so do the return shapes
|
|
12
|
+
* (parsed guidelines, issue-ref set, policy scan). Only the single GET is
|
|
13
|
+
* shared.
|
|
14
|
+
*
|
|
15
|
+
* The `transient` flag is load-bearing: it distinguishes a clean miss (404 —
|
|
16
|
+
* file absent) from a degraded miss (5xx, network) so callers can decide
|
|
17
|
+
* whether to cache a negative result or leave it open to retry. Collapsing the
|
|
18
|
+
* two would bypass anti-llm-policy's transient-failure cache safeguard, so the
|
|
19
|
+
* primitive must keep them separate.
|
|
20
|
+
*/
|
|
21
|
+
import { errorMessage, getHttpStatusCode, rethrowIfFatal } from "./errors.js";
|
|
22
|
+
import { warn } from "./logger.js";
|
|
23
|
+
const MODULE = "probe-repo-file";
|
|
24
|
+
/**
|
|
25
|
+
* GET one repo file path. Returns decoded content on a 200 file payload, a
|
|
26
|
+
* clean `null` on 404 or a non-content payload, and a transient `null` on a
|
|
27
|
+
* soft error (5xx, network) after logging it. Rethrows fatal errors (401 auth,
|
|
28
|
+
* rate limit) so the caller's existing rate-limit handling sees them.
|
|
29
|
+
*
|
|
30
|
+
* Callers that need 401/rate-limit to surface across a *parallel* batch (where
|
|
31
|
+
* a faster path may have already resolved) must inspect the rejected reasons
|
|
32
|
+
* themselves; this primitive only rethrows for the single path it owns. See
|
|
33
|
+
* repo-health and anti-llm-policy for that pre-scan.
|
|
34
|
+
*/
|
|
35
|
+
export async function probeRepoFile(octokit, owner, repo, path) {
|
|
36
|
+
try {
|
|
37
|
+
const { data } = await octokit.repos.getContent({ owner, repo, path });
|
|
38
|
+
if (data &&
|
|
39
|
+
typeof data === "object" &&
|
|
40
|
+
"content" in data &&
|
|
41
|
+
typeof data.content === "string") {
|
|
42
|
+
return {
|
|
43
|
+
text: Buffer.from(data.content, "base64").toString("utf-8"),
|
|
44
|
+
transient: false,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
return { text: null, transient: false };
|
|
48
|
+
}
|
|
49
|
+
catch (error) {
|
|
50
|
+
const status = getHttpStatusCode(error);
|
|
51
|
+
if (status === 404)
|
|
52
|
+
return { text: null, transient: false };
|
|
53
|
+
rethrowIfFatal(error);
|
|
54
|
+
warn(MODULE, `Unexpected error fetching ${path} from ${owner}/${repo}: ${errorMessage(error)}`);
|
|
55
|
+
return { text: null, transient: true };
|
|
56
|
+
}
|
|
57
|
+
}
|
package/dist/core/repo-health.js
CHANGED
|
@@ -5,9 +5,10 @@
|
|
|
5
5
|
* from issue-level eligibility logic.
|
|
6
6
|
*/
|
|
7
7
|
import { daysBetween } from "./utils.js";
|
|
8
|
-
import { errorMessage, getHttpStatusCode, isRateLimitError } from "./errors.js";
|
|
8
|
+
import { errorMessage, getHttpStatusCode, isRateLimitError, rethrowIfFatal, } from "./errors.js";
|
|
9
9
|
import { warn } from "./logger.js";
|
|
10
10
|
import { getHttpCache, cachedRequest, cachedTimeBased } from "./http-cache.js";
|
|
11
|
+
import { probeRepoFile } from "./probe-repo-file.js";
|
|
11
12
|
const MODULE = "repo-health";
|
|
12
13
|
// ── Cache for contribution guidelines ──
|
|
13
14
|
const guidelinesCache = new Map();
|
|
@@ -73,19 +74,14 @@ export async function checkProjectHealth(octokit, owner, repo) {
|
|
|
73
74
|
});
|
|
74
75
|
}
|
|
75
76
|
catch (error) {
|
|
76
|
-
|
|
77
|
-
throw error;
|
|
78
|
-
}
|
|
77
|
+
rethrowIfFatal(error);
|
|
79
78
|
const errMsg = errorMessage(error);
|
|
80
79
|
warn(MODULE, `Error checking project health for ${owner}/${repo}: ${errMsg}`);
|
|
80
|
+
// The check failed: only the repo and the reason are known. The
|
|
81
|
+
// discriminated ProjectHealth type intentionally has no place for the
|
|
82
|
+
// neutral-default snapshot fields this used to fabricate (#158).
|
|
81
83
|
return {
|
|
82
84
|
repo: `${owner}/${repo}`,
|
|
83
|
-
lastCommitAt: "",
|
|
84
|
-
daysSinceLastCommit: 999,
|
|
85
|
-
openIssuesCount: 0,
|
|
86
|
-
avgIssueResponseDays: 0,
|
|
87
|
-
ciStatus: "unknown",
|
|
88
|
-
isActive: false,
|
|
89
85
|
checkFailed: true,
|
|
90
86
|
failureReason: errMsg,
|
|
91
87
|
};
|
|
@@ -104,19 +100,33 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
|
|
|
104
100
|
if (cached && Date.now() - cached.fetchedAt < CACHE_TTL_MS) {
|
|
105
101
|
return cached.guidelines;
|
|
106
102
|
}
|
|
103
|
+
// Concurrent vets of issues from one repo share a single probe (#124)
|
|
104
|
+
const inflight = guidelinesInflight.get(cacheKey);
|
|
105
|
+
if (inflight)
|
|
106
|
+
return inflight;
|
|
107
|
+
const promise = fetchContributionGuidelinesUncached(octokit, owner, repo);
|
|
108
|
+
guidelinesInflight.set(cacheKey, promise);
|
|
109
|
+
try {
|
|
110
|
+
return await promise;
|
|
111
|
+
}
|
|
112
|
+
finally {
|
|
113
|
+
guidelinesInflight.delete(cacheKey);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
const guidelinesInflight = new Map();
|
|
117
|
+
async function fetchContributionGuidelinesUncached(octokit, owner, repo) {
|
|
118
|
+
const cacheKey = `${owner}/${repo}`;
|
|
107
119
|
const filesToCheck = [
|
|
108
120
|
"CONTRIBUTING.md",
|
|
109
121
|
".github/CONTRIBUTING.md",
|
|
110
122
|
"docs/CONTRIBUTING.md",
|
|
111
123
|
"contributing.md",
|
|
112
124
|
];
|
|
113
|
-
// Probe all paths in parallel — take the first success in priority order
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
return null;
|
|
119
|
-
})));
|
|
125
|
+
// Probe all paths in parallel — take the first success in priority order.
|
|
126
|
+
// probeRepoFile rethrows 401/rate-limit, so those still surface here as
|
|
127
|
+
// rejected results for the pre-scan below; 404s and 5xx come back as a null
|
|
128
|
+
// text (the primitive warns on 5xx, so no extra warn is needed here).
|
|
129
|
+
const results = await Promise.allSettled(filesToCheck.map((file) => probeRepoFile(octokit, owner, repo, file)));
|
|
120
130
|
// Pre-scan: auth/rate-limit must propagate even if a faster probe succeeded —
|
|
121
131
|
// otherwise a path-restricted token that 401s on .github/CONTRIBUTING.md but
|
|
122
132
|
// wins on CONTRIBUTING.md would silently hide the auth misconfiguration.
|
|
@@ -128,20 +138,13 @@ export async function fetchContributionGuidelines(octokit, owner, repo) {
|
|
|
128
138
|
throw result.reason;
|
|
129
139
|
}
|
|
130
140
|
}
|
|
131
|
-
for (
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
const guidelines = parseContributionGuidelines(result.value);
|
|
141
|
+
for (const result of results) {
|
|
142
|
+
if (result.status === "fulfilled" && result.value.text) {
|
|
143
|
+
const guidelines = parseContributionGuidelines(result.value.text);
|
|
135
144
|
guidelinesCache.set(cacheKey, { guidelines, fetchedAt: Date.now() });
|
|
136
145
|
pruneCache();
|
|
137
146
|
return guidelines;
|
|
138
147
|
}
|
|
139
|
-
if (result.status === "rejected") {
|
|
140
|
-
const status = getHttpStatusCode(result.reason);
|
|
141
|
-
if (status !== 404) {
|
|
142
|
-
warn(MODULE, `Unexpected error fetching ${filesToCheck[i]} from ${owner}/${repo}: ${errorMessage(result.reason)}`);
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
148
|
}
|
|
146
149
|
// Cache the negative result too and prune if needed
|
|
147
150
|
guidelinesCache.set(cacheKey, {
|
|
@@ -160,9 +163,13 @@ function parseContributionGuidelines(content) {
|
|
|
160
163
|
rawContent: content,
|
|
161
164
|
};
|
|
162
165
|
const lowerContent = content.toLowerCase();
|
|
163
|
-
// Detect branch naming conventions
|
|
166
|
+
// Detect branch naming conventions. CONTRIBUTING.md is attacker-controlled
|
|
167
|
+
// (it belongs to the repo being vetted): the unbounded [^\n]* pair forced
|
|
168
|
+
// quadratic backtracking on a long quote-less line, stalling the vet
|
|
169
|
+
// (#152). Bounded quantifiers keep the scan linear-ish; real conventions
|
|
170
|
+
// sit well inside 200 chars of their keyword.
|
|
164
171
|
if (lowerContent.includes("branch")) {
|
|
165
|
-
const branchMatch = content.match(/branch[^\n]
|
|
172
|
+
const branchMatch = content.match(/branch[^\n]{0,200}?(?:named?|format|convention)[^\n]{0,200}?[`"]([^`"\n]{1,100})[`"]/i);
|
|
166
173
|
if (branchMatch) {
|
|
167
174
|
guidelines.branchNamingConvention = branchMatch[1];
|
|
168
175
|
}
|
|
@@ -172,7 +179,7 @@ function parseContributionGuidelines(content) {
|
|
|
172
179
|
guidelines.commitMessageFormat = "conventional commits";
|
|
173
180
|
}
|
|
174
181
|
else if (lowerContent.includes("commit message")) {
|
|
175
|
-
const commitMatch = content.match(/commit message[^\n]
|
|
182
|
+
const commitMatch = content.match(/commit message[^\n]{0,200}?[`"]([^`"\n]{1,100})[`"]/i);
|
|
176
183
|
if (commitMatch) {
|
|
177
184
|
guidelines.commitMessageFormat = commitMatch[1];
|
|
178
185
|
}
|
|
@@ -193,8 +200,9 @@ function parseContributionGuidelines(content) {
|
|
|
193
200
|
guidelines.linter = "RuboCop";
|
|
194
201
|
else if (lowerContent.includes("prettier"))
|
|
195
202
|
guidelines.formatter = "Prettier";
|
|
196
|
-
// Detect CLA requirement
|
|
197
|
-
|
|
203
|
+
// Detect CLA requirement. Word boundary matters: a bare substring check
|
|
204
|
+
// matches "class", "clang", "clarify", etc. and flags nearly every doc.
|
|
205
|
+
if (/\bcla\b/.test(lowerContent) ||
|
|
198
206
|
lowerContent.includes("contributor license agreement")) {
|
|
199
207
|
guidelines.claRequired = true;
|
|
200
208
|
}
|