@oss-autopilot/core 3.4.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/cli-registry.js +50 -0
  2. package/dist/cli.bundle.cjs +81 -78
  3. package/dist/commands/compliance-score.d.ts +21 -0
  4. package/dist/commands/compliance-score.js +156 -0
  5. package/dist/commands/index.d.ts +4 -0
  6. package/dist/commands/index.js +4 -0
  7. package/dist/commands/list-mark-done.d.ts +48 -0
  8. package/dist/commands/list-mark-done.js +213 -0
  9. package/dist/commands/parse-list.js +86 -9
  10. package/dist/commands/repo-vet.d.ts +21 -0
  11. package/dist/commands/repo-vet.js +215 -0
  12. package/dist/commands/startup.js +18 -0
  13. package/dist/core/ci-enforced-tools.d.ts +35 -0
  14. package/dist/core/ci-enforced-tools.js +109 -0
  15. package/dist/core/comment-decision.d.ts +72 -0
  16. package/dist/core/comment-decision.js +74 -0
  17. package/dist/core/compliance-score.d.ts +127 -0
  18. package/dist/core/compliance-score.js +277 -0
  19. package/dist/core/config-registry.js +12 -0
  20. package/dist/core/contributing.d.ts +52 -0
  21. package/dist/core/contributing.js +139 -0
  22. package/dist/core/extraction-categories.d.ts +55 -0
  23. package/dist/core/extraction-categories.js +108 -0
  24. package/dist/core/follow-up-history.d.ts +41 -0
  25. package/dist/core/follow-up-history.js +71 -0
  26. package/dist/core/gist-state-store.d.ts +30 -7
  27. package/dist/core/gist-state-store.js +87 -11
  28. package/dist/core/issue-conversation.js +1 -0
  29. package/dist/core/issue-effort.d.ts +29 -0
  30. package/dist/core/issue-effort.js +41 -0
  31. package/dist/core/maintainer-hints.d.ts +23 -0
  32. package/dist/core/maintainer-hints.js +36 -0
  33. package/dist/core/pr-quality-rubric.d.ts +70 -0
  34. package/dist/core/pr-quality-rubric.js +121 -0
  35. package/dist/core/repo-vet.d.ts +90 -0
  36. package/dist/core/repo-vet.js +178 -0
  37. package/dist/core/state-schema.d.ts +76 -0
  38. package/dist/core/state-schema.js +75 -0
  39. package/dist/core/strategy.d.ts +75 -0
  40. package/dist/core/strategy.js +226 -0
  41. package/dist/core/types.d.ts +2 -0
  42. package/dist/core/workflow-state.d.ts +56 -0
  43. package/dist/core/workflow-state.js +101 -0
  44. package/dist/formatters/json.d.ts +147 -0
  45. package/dist/formatters/json.js +79 -0
  46. package/package.json +1 -1
@@ -0,0 +1,277 @@
1
+ /**
2
+ * PR compliance scoring (#1245).
3
+ *
4
+ * Extracted from `agents/pr-compliance-checker.md`'s in-prompt scoring
5
+ * tables so the weights, thresholds, and per-check rules are
6
+ * deterministic, unit-testable, and tunable without editing markdown.
7
+ * Same architectural shape as success-grade (#858), linked-PR
8
+ * classifier (#910), and anti-AI scan (#911).
9
+ *
10
+ * The function intentionally does not fetch PR data — callers (the MCP
11
+ * tool, the CLI command, the agent) supply pre-fetched metadata so the
12
+ * score is reproducible against fixture data and the same input shape
13
+ * works for both live PRs and historical replay.
14
+ */
15
+ /**
16
+ * After how many days a closed-issue reference flips from "warn"
17
+ * (probably still relevant) to "fail" (probably stale). Exported so
18
+ * callers can document the cutoff (#1246).
19
+ */
20
+ export const CLOSED_ISSUE_RECENT_DAYS = 30;
21
+ const WEIGHTS = {
22
+ issueReference: 25,
23
+ description: 25,
24
+ focusedChanges: 20,
25
+ tests: 15,
26
+ title: 10,
27
+ branch: 5,
28
+ };
29
+ const STATUS_TO_FRACTION = {
30
+ pass: 1,
31
+ warn: 0.5,
32
+ fail: 0,
33
+ };
34
+ // Pull canonical rubric thresholds from the single source of truth
35
+ // (#1252). Re-exported so existing consumers of compliance-score
36
+ // (tests, agent prompts) keep working without touching their imports.
37
+ import { TITLE_LENGTH_BUDGET, FOCUSED_CHANGES_THRESHOLDS } from './pr-quality-rubric.js';
38
+ /** Title byte budget — Conventional Commits style fits comfortably under 72. */
39
+ export { TITLE_LENGTH_BUDGET } from './pr-quality-rubric.js';
40
+ /** "Focused changes" thresholds. Source of truth lives in pr-quality-rubric.ts. */
41
+ export const FOCUSED_CHANGES = FOCUSED_CHANGES_THRESHOLDS;
42
+ /** Score → rating cutoffs. */
43
+ export const RATING_CUTOFFS = {
44
+ ready: 90,
45
+ minor: 75,
46
+ fixFirst: 60,
47
+ };
48
+ /**
49
+ * Detect a closing or referencing keyword in the PR body. GitHub's own
50
+ * auto-close keyword set: close, closes, closed, fix, fixes, fixed,
51
+ * resolve, resolves, resolved.
52
+ */
53
+ const CLOSING_KEYWORDS = /\b(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+#\d+/i;
54
+ const REFERENCE_KEYWORDS = /\b(?:relates?\s+to|see|refs?|references?)\s+#\d+/i;
55
+ const ISSUE_URL = /https?:\/\/github\.com\/[^/]+\/[^/]+\/issues\/\d+/i;
56
+ /**
57
+ * If verified linked-issue state is available, derive a status from
58
+ * the worst single reference (#1246 Improvement B). Returns `null` when
59
+ * no validation data is supplied — the caller falls back to the
60
+ * regex-only result.
61
+ *
62
+ * Failure modes the precedence ranks (worst first):
63
+ * 1. `not_found` — referenced issue doesn't exist (typo, wrong repo)
64
+ * 2. `closed` more than {@link CLOSED_ISSUE_RECENT_DAYS} days ago
65
+ * 3. `closed` recently — probably still relevant but worth confirming
66
+ * 4. `open` cross-repo — caller should sanity-check the link applies
67
+ * 5. `open` same-repo — canonical pass.
68
+ */
69
+ function evaluateLinkedIssues(weight, linkedIssues) {
70
+ if (linkedIssues.length === 0)
71
+ return null;
72
+ const notFound = linkedIssues.find((li) => li.state === 'not_found');
73
+ if (notFound) {
74
+ const tag = notFound.crossRepo ? `${notFound.repo}#${notFound.number}` : `#${notFound.number}`;
75
+ return {
76
+ status: 'fail',
77
+ weight,
78
+ detail: `linked issue ${tag} does not exist — typo or wrong repo?`,
79
+ };
80
+ }
81
+ // If every entry is unverifiable (and none were found-and-known-bad),
82
+ // neither pass nor fail — return a `warn` so the caller surfaces the
83
+ // gap without downgrading a valid PR's score. A rate-limit on a single
84
+ // reference shouldn't make a perfectly good PR look broken. Mixed sets
85
+ // fall through to the verifiable-state checks below; unverifiable
86
+ // entries are silently dropped from the worst-of-precedence ranking.
87
+ const verifiable = linkedIssues.filter((li) => li.state !== 'unverifiable');
88
+ if (verifiable.length === 0) {
89
+ return {
90
+ status: 'warn',
91
+ weight,
92
+ detail: `linked issue${linkedIssues.length > 1 ? 's' : ''} could not be verified (rate limit or network) — confirm manually`,
93
+ };
94
+ }
95
+ const staleClosed = verifiable.find((li) => li.state === 'closed' && (li.closedDaysAgo ?? 0) > CLOSED_ISSUE_RECENT_DAYS);
96
+ if (staleClosed) {
97
+ return {
98
+ status: 'fail',
99
+ weight,
100
+ detail: `linked issue #${staleClosed.number} has been closed for ` +
101
+ `${staleClosed.closedDaysAgo} days — reference is probably stale`,
102
+ };
103
+ }
104
+ const recentClosed = verifiable.find((li) => li.state === 'closed');
105
+ if (recentClosed) {
106
+ return {
107
+ status: 'warn',
108
+ weight,
109
+ detail: `linked issue #${recentClosed.number} was closed ` +
110
+ `${recentClosed.closedDaysAgo ?? '?'} days ago — confirm this PR is still relevant`,
111
+ };
112
+ }
113
+ const crossRepo = verifiable.find((li) => li.crossRepo);
114
+ if (crossRepo) {
115
+ return {
116
+ status: 'warn',
117
+ weight,
118
+ detail: `cross-repo reference ${crossRepo.repo}#${crossRepo.number} — ` +
119
+ `verify the linked issue applies to changes in this repo`,
120
+ };
121
+ }
122
+ return {
123
+ status: 'pass',
124
+ weight,
125
+ detail: `linked issue${verifiable.length > 1 ? 's' : ''} verified open`,
126
+ };
127
+ }
128
+ function checkIssueReference(meta, repoContext) {
129
+ const weight = WEIGHTS.issueReference;
130
+ const hasClosing = CLOSING_KEYWORDS.test(meta.body);
131
+ // The parser's `linkedIssues` already captures cross-repo (`owner/repo#N`)
132
+ // and direct-URL references that the same-repo regex misses. Treat any
133
+ // parsed reference as "a reference exists" so cross-repo links don't
134
+ // collapse to a fail just because they didn't match the bare-ref regex.
135
+ const hasReference = hasClosing ||
136
+ REFERENCE_KEYWORDS.test(meta.body) ||
137
+ ISSUE_URL.test(meta.body) ||
138
+ (repoContext?.linkedIssues?.length ?? 0) > 0;
139
+ if (!hasReference) {
140
+ return { status: 'fail', weight, detail: 'no issue reference' };
141
+ }
142
+ // When the caller pre-fetched the linked issues' state, that
143
+ // verification supersedes the regex-only signal — a `Closes #999`
144
+ // pointing at a non-existent issue must not score as pass.
145
+ const verified = repoContext?.linkedIssues ? evaluateLinkedIssues(weight, repoContext.linkedIssues) : null;
146
+ if (verified)
147
+ return verified;
148
+ if (hasClosing) {
149
+ return { status: 'pass', weight, detail: 'closing keyword present' };
150
+ }
151
+ return {
152
+ status: 'warn',
153
+ weight,
154
+ detail: 'issue referenced without a closing keyword',
155
+ };
156
+ }
157
+ const SECTION_WHAT = /(?:^|\n)#{1,3}\s*(?:summary|overview|what(?:\s+changed)?)\b/i;
158
+ const SECTION_WHY = /(?:^|\n)#{1,3}\s*(?:why|motivation|context|background|rationale)\b/i;
159
+ const SECTION_TEST = /(?:^|\n)#{1,3}\s*(?:test\s*plan|how\s+to\s+test|testing|tests?)\b/i;
160
+ function checkDescription(meta) {
161
+ const weight = WEIGHTS.description;
162
+ const trimmed = meta.body.trim();
163
+ if (trimmed.length === 0) {
164
+ return { status: 'fail', weight, detail: 'description is empty' };
165
+ }
166
+ const what = SECTION_WHAT.test(meta.body);
167
+ const why = SECTION_WHY.test(meta.body);
168
+ const test = SECTION_TEST.test(meta.body);
169
+ const present = [what, why, test].filter(Boolean).length;
170
+ if (present === 3) {
171
+ return { status: 'pass', weight, detail: 'what / why / test sections present' };
172
+ }
173
+ if (present >= 1 || trimmed.length >= 80) {
174
+ return {
175
+ status: 'warn',
176
+ weight,
177
+ detail: `${present} of 3 sections present (what/why/test)`,
178
+ };
179
+ }
180
+ return { status: 'fail', weight, detail: 'minimal description, no recognizable sections' };
181
+ }
182
+ function checkFocusedChanges(meta) {
183
+ const weight = WEIGHTS.focusedChanges;
184
+ const lines = meta.additions + meta.deletions;
185
+ const detail = `${meta.filesChangedCount} files, ${lines} lines`;
186
+ if (meta.filesChangedCount < FOCUSED_CHANGES.passFiles && lines < FOCUSED_CHANGES.passLines) {
187
+ return { status: 'pass', weight, detail };
188
+ }
189
+ if (meta.filesChangedCount > FOCUSED_CHANGES.warnFiles || lines > FOCUSED_CHANGES.warnLines) {
190
+ return { status: 'fail', weight, detail: `${detail} — needs splitting` };
191
+ }
192
+ return { status: 'warn', weight, detail };
193
+ }
194
+ const TEST_FILE_PATTERN = /(?:^|\/)(?:tests?|__tests__|spec)\/|\.(?:test|spec)\.[jt]sx?$|\.test_/i;
195
+ function checkTests(meta, repoContext) {
196
+ const weight = WEIGHTS.tests;
197
+ const hasTestFile = meta.files.some((f) => TEST_FILE_PATTERN.test(f));
198
+ if (hasTestFile) {
199
+ return { status: 'pass', weight, detail: 'test file(s) touched' };
200
+ }
201
+ if (repoContext?.hasTestInfrastructure === false) {
202
+ return {
203
+ status: 'warn',
204
+ weight,
205
+ detail: 'no tests, but project has no visible test infrastructure',
206
+ };
207
+ }
208
+ return { status: 'fail', weight, detail: 'no test files in a test-requiring project' };
209
+ }
210
+ const CONVENTIONAL_TITLE = /^(?:feat|fix|chore|docs|refactor|test|perf|build|ci|style|revert)(?:\([^)]+\))?!?:\s+\S/i;
211
+ const VAGUE_EXACT = new Set(['wip', 'test', 'hello', 'tmp', 'temp', 'untitled']);
212
+ const ASDF_ONLY = /^[asdfqwer]+$/i;
213
+ const NON_DESCRIPTIVE_UPDATE = /^update\s+\S+\s*$/i;
214
+ function isVagueTitle(title) {
215
+ const trimmed = title.trim();
216
+ if (VAGUE_EXACT.has(trimmed.toLowerCase()))
217
+ return true;
218
+ if (ASDF_ONLY.test(trimmed))
219
+ return true;
220
+ if (NON_DESCRIPTIVE_UPDATE.test(trimmed))
221
+ return true;
222
+ return false;
223
+ }
224
+ function checkTitle(meta) {
225
+ const weight = WEIGHTS.title;
226
+ const len = meta.title.length;
227
+ if (isVagueTitle(meta.title)) {
228
+ return { status: 'fail', weight, detail: 'vague or placeholder title' };
229
+ }
230
+ if (len > TITLE_LENGTH_BUDGET) {
231
+ return { status: 'warn', weight, detail: `title is ${len} chars (budget: ${TITLE_LENGTH_BUDGET})` };
232
+ }
233
+ if (CONVENTIONAL_TITLE.test(meta.title)) {
234
+ return { status: 'pass', weight, detail: 'descriptive, conventional, within budget' };
235
+ }
236
+ return { status: 'warn', weight, detail: 'descriptive but not conventional commit format' };
237
+ }
238
+ const PATCH_NUM_BRANCH = /^patch-\d+$/i;
239
+ const ROOT_BRANCH = /^(?:main|master)$/i;
240
+ function checkBranch(meta) {
241
+ const weight = WEIGHTS.branch;
242
+ if (ROOT_BRANCH.test(meta.branch) || PATCH_NUM_BRANCH.test(meta.branch)) {
243
+ return { status: 'fail', weight, detail: `non-descriptive branch name "${meta.branch}"` };
244
+ }
245
+ // Treat anything containing a separator (`/`, `-`, `_`) as descriptive.
246
+ if (/[/_-]/.test(meta.branch)) {
247
+ return { status: 'pass', weight, detail: meta.branch };
248
+ }
249
+ return { status: 'warn', weight, detail: `branch "${meta.branch}" lacks a clear separator` };
250
+ }
251
+ function ratingFor(score) {
252
+ if (score >= RATING_CUTOFFS.ready)
253
+ return { rating: 'ready', emoji: '🌟' };
254
+ if (score >= RATING_CUTOFFS.minor)
255
+ return { rating: 'minor', emoji: '✅' };
256
+ if (score >= RATING_CUTOFFS.fixFirst)
257
+ return { rating: 'fix_first', emoji: '⚠️' };
258
+ return { rating: 'significant_work', emoji: '❌' };
259
+ }
260
+ /**
261
+ * Compute a compliance score from PR metadata, optionally fine-tuned by
262
+ * repo context (#1245). Pure function — no I/O, no global state.
263
+ */
264
+ export function computeComplianceScore(meta, repoContext) {
265
+ const checks = {
266
+ issueReference: checkIssueReference(meta, repoContext),
267
+ description: checkDescription(meta),
268
+ focusedChanges: checkFocusedChanges(meta),
269
+ tests: checkTests(meta, repoContext),
270
+ title: checkTitle(meta),
271
+ branch: checkBranch(meta),
272
+ };
273
+ const weighted = Object.values(checks).reduce((acc, check) => acc + STATUS_TO_FRACTION[check.status] * check.weight, 0);
274
+ const score = Math.round(weighted);
275
+ const { rating, emoji } = ratingFor(score);
276
+ return { score, rating, emoji, checks };
277
+ }
@@ -191,6 +191,18 @@ export const CONFIG_KEY_REGISTRY = [
191
191
  settableVia: 'setup',
192
192
  valueHint: 'true|false',
193
193
  },
194
+ {
195
+ key: 'healthCheckFreshnessMinutes',
196
+ description: 'Suppress the SessionStart PR health one-liner when the cached digest is older than this many minutes. The line silently disappears between /oss runs, so what remains is always current. Defaults to 30 minutes (#1255).',
197
+ settableVia: 'setup',
198
+ valueHint: 'positive integer',
199
+ },
200
+ {
201
+ key: 'reviewMaxPasses',
202
+ description: 'Convergence cap for the multi-agent review loop in workflows/dispatch-review.md. Optional; falls back to per-mode defaults (5 for diff, 3 for plan) when unset (#1275).',
203
+ settableVia: 'setup',
204
+ valueHint: 'positive integer',
205
+ },
194
206
  // ── Setup-only completion flag ──────────────────────────────────────
195
207
  {
196
208
  key: 'complete',
@@ -0,0 +1,52 @@
1
+ /**
2
+ * CONTRIBUTING.md requirement extraction (#1279).
3
+ *
4
+ * Extracted from `workflows/draft-first-workflow.md` Step 1d so the
5
+ * heuristic that pulls actionable requirements out of a project's
6
+ * CONTRIBUTING file lives in typed code instead of workflow prose.
7
+ * Same architectural shape as compliance-score (#1245), repo-vet
8
+ * (#1242), strategy (#1243), and the recent #1252 / #1264 / #1286
9
+ * extractions.
10
+ *
11
+ * Pure typed helper — no I/O. Callers (the workflow runner, the
12
+ * `pr-compliance-checker` agent) read the file themselves and pass
13
+ * the contents in. The extraction step is heuristic regex matching
14
+ * over headings + bullet phrases; it intentionally over-recalls
15
+ * (some false positives) rather than under-recalling.
16
+ *
17
+ * Out of scope (deferred per #1279):
18
+ * - `findContributingFile(repoPath)` — file-system search for the
19
+ * guidelines file at one of seven well-known locations.
20
+ * - `verifyRequirements(...)` — diff-aware satisfaction check per
21
+ * requirement.
22
+ * - `checkContributingCompliance(...)` — convenience wrapper that
23
+ * calls all three.
24
+ *
25
+ * The remaining pieces plumb `extractRequirements()` into specific
26
+ * surfaces; each ships independently.
27
+ */
28
+ export type ContributingCategory = 'tests' | 'documentation' | 'changelog' | 'code_style' | 'commit_format' | 'cla_dco' | 'branch_target' | 'scope';
29
+ export interface ContributingRequirement {
30
+ category: ContributingCategory;
31
+ /** One-line description of what the project asks for. */
32
+ description: string;
33
+ /**
34
+ * The line that surfaced the requirement, lightly trimmed. Useful
35
+ * for explainability in agent output ("the project's CONTRIBUTING
36
+ * says: …").
37
+ */
38
+ evidence: string;
39
+ }
40
+ /**
41
+ * Extract structured requirements from a CONTRIBUTING.md (or
42
+ * similar) text. Each rule fires at most once per document — a
43
+ * project that says "tests are required" twice still surfaces a
44
+ * single tests requirement.
45
+ */
46
+ export declare function extractRequirements(content: string): ContributingRequirement[];
47
+ /**
48
+ * Convenience: dedupe a requirement list down to one entry per
49
+ * category. Useful for top-line summaries where the agent doesn't
50
+ * need to render every matched rule.
51
+ */
52
+ export declare function dedupeByCategory(requirements: readonly ContributingRequirement[]): ContributingRequirement[];
@@ -0,0 +1,139 @@
1
+ /**
2
+ * CONTRIBUTING.md requirement extraction (#1279).
3
+ *
4
+ * Extracted from `workflows/draft-first-workflow.md` Step 1d so the
5
+ * heuristic that pulls actionable requirements out of a project's
6
+ * CONTRIBUTING file lives in typed code instead of workflow prose.
7
+ * Same architectural shape as compliance-score (#1245), repo-vet
8
+ * (#1242), strategy (#1243), and the recent #1252 / #1264 / #1286
9
+ * extractions.
10
+ *
11
+ * Pure typed helper — no I/O. Callers (the workflow runner, the
12
+ * `pr-compliance-checker` agent) read the file themselves and pass
13
+ * the contents in. The extraction step is heuristic regex matching
14
+ * over headings + bullet phrases; it intentionally over-recalls
15
+ * (some false positives) rather than under-recalling.
16
+ *
17
+ * Out of scope (deferred per #1279):
18
+ * - `findContributingFile(repoPath)` — file-system search for the
19
+ * guidelines file at one of seven well-known locations.
20
+ * - `verifyRequirements(...)` — diff-aware satisfaction check per
21
+ * requirement.
22
+ * - `checkContributingCompliance(...)` — convenience wrapper that
23
+ * calls all three.
24
+ *
25
+ * The remaining pieces plumb `extractRequirements()` into specific
26
+ * surfaces; each ships independently.
27
+ */
28
+ /**
29
+ * Heuristic patterns. Each rule fires once if any line in the
30
+ * document matches its pattern. Rules are ordered by specificity:
31
+ * commit-format and CLA matchers are precise; the broader
32
+ * documentation/scope rules sit at the end so they don't shadow
33
+ * narrower categories.
34
+ */
35
+ const RULES = [
36
+ {
37
+ category: 'tests',
38
+ pattern: /\b(?:add|include|write|provide|cover\s+with)\s+(?:unit\s+)?tests?\b/i,
39
+ description: 'Add tests covering the change',
40
+ },
41
+ {
42
+ category: 'tests',
43
+ pattern: /\btest(?:s|ing)?\s+(?:are|is)\s+required\b/i,
44
+ description: 'Tests are required',
45
+ },
46
+ {
47
+ category: 'documentation',
48
+ pattern: /\b(?:update|add|provide)\s+(?:the\s+)?(?:docs?|documentation)\b/i,
49
+ description: 'Update documentation when behavior changes',
50
+ },
51
+ {
52
+ category: 'changelog',
53
+ pattern: /\b(?:add|include|update)\s+(?:an?\s+)?(?:entry\s+(?:to|in)\s+)?(?:the\s+)?(?:changelog|CHANGELOG\.md|changeset)\b/i,
54
+ description: 'Add a changelog entry / changeset',
55
+ },
56
+ {
57
+ category: 'changelog',
58
+ pattern: /\bchange(?:log|set)\s+(?:entry|file)\s+(?:is\s+)?required\b/i,
59
+ description: 'Changelog entry / changeset is required',
60
+ },
61
+ {
62
+ category: 'code_style',
63
+ pattern: /\b(?:run|use)\s+(?:the\s+)?(?:linter|formatter|prettier|eslint|biome|black|ruff|gofmt|rustfmt|clang-format)\b/i,
64
+ description: 'Run the project formatter / linter before submitting',
65
+ },
66
+ {
67
+ category: 'commit_format',
68
+ pattern: /\bconventional\s+commits?\b/i,
69
+ description: 'Use Conventional Commits format',
70
+ },
71
+ {
72
+ category: 'commit_format',
73
+ pattern: /\bcommit\s+messages?\s+(?:must|should|need)\b/i,
74
+ description: 'Project enforces a commit-message convention',
75
+ },
76
+ {
77
+ category: 'cla_dco',
78
+ pattern: /\b(?:CLA|contributor\s+license\s+agreement|DCO|sign(?:ed)?-off-by|signoff)\b/i,
79
+ description: 'Contributor license / DCO sign-off required',
80
+ },
81
+ {
82
+ category: 'branch_target',
83
+ pattern: /\b(?:open|submit|target)\s+(?:a\s+)?(?:PR|pull\s+request).*?(?:against|to|targeting)\s+(?:the\s+)?(\S+)\s+branch\b/i,
84
+ description: 'PR must target a specific branch',
85
+ },
86
+ {
87
+ category: 'scope',
88
+ pattern: /\b(?:one\s+(?:logical\s+)?change|focused\s+PR|atomic\s+commits?)\b/i,
89
+ description: 'Keep PRs focused / atomic',
90
+ },
91
+ ];
92
+ /**
93
+ * Extract structured requirements from a CONTRIBUTING.md (or
94
+ * similar) text. Each rule fires at most once per document — a
95
+ * project that says "tests are required" twice still surfaces a
96
+ * single tests requirement.
97
+ */
98
+ export function extractRequirements(content) {
99
+ if (!content || content.trim().length === 0)
100
+ return [];
101
+ const lines = content.split(/\r?\n/);
102
+ const out = [];
103
+ const seenCategoriesPerRule = new Set();
104
+ for (const line of lines) {
105
+ const trimmed = line.trim();
106
+ if (!trimmed)
107
+ continue;
108
+ for (const rule of RULES) {
109
+ const ruleKey = `${rule.category}::${rule.pattern.source}`;
110
+ if (seenCategoriesPerRule.has(ruleKey))
111
+ continue;
112
+ if (rule.pattern.test(trimmed)) {
113
+ out.push({
114
+ category: rule.category,
115
+ description: rule.description,
116
+ evidence: trimmed.length > 200 ? trimmed.slice(0, 200) + '…' : trimmed,
117
+ });
118
+ seenCategoriesPerRule.add(ruleKey);
119
+ }
120
+ }
121
+ }
122
+ return out;
123
+ }
124
+ /**
125
+ * Convenience: dedupe a requirement list down to one entry per
126
+ * category. Useful for top-line summaries where the agent doesn't
127
+ * need to render every matched rule.
128
+ */
129
+ export function dedupeByCategory(requirements) {
130
+ const seen = new Set();
131
+ const out = [];
132
+ for (const r of requirements) {
133
+ if (seen.has(r.category))
134
+ continue;
135
+ seen.add(r.category);
136
+ out.push(r);
137
+ }
138
+ return out;
139
+ }
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Per-repo extraction category configuration (#1284).
3
+ *
4
+ * The `extract-learnings` MCP prompt produces a structured markdown
5
+ * document organized into category sections. The default category
6
+ * set is sensible for typical web/library OSS work; specialized
7
+ * repos (security-focused, performance-critical, accessibility-
8
+ * forward) benefit from a tailored taxonomy.
9
+ *
10
+ * This module is the single source of truth for:
11
+ * - The default category list.
12
+ * - Validation of custom category lists (non-empty, no duplicates,
13
+ * reasonable string lengths).
14
+ * - Resolution: given a repo's optional override, produce the list
15
+ * of categories the prompt and the storage layer should use.
16
+ *
17
+ * Pure typed helper — no I/O. Same architectural shape as the recent
18
+ * #1252 / #1264 / #1286 / #1279 / #1277 extractions.
19
+ *
20
+ * Out of scope (deferred per #1284):
21
+ * - Wiring `categories` into the `guidelines store` / `guidelines view`
22
+ * shape so the override persists alongside the markdown.
23
+ * - Updating the `extract-learnings` MCP prompt to consume the
24
+ * resolved category list at run time.
25
+ * - Detecting repo type from signals (SECURITY.md presence, repo
26
+ * topics, etc.) to suggest categories.
27
+ */
28
+ /**
29
+ * The default category list used by `extract-learnings` when no
30
+ * per-repo override is configured. Order matters: the prompt
31
+ * renders sections in this order, so `Code Style` first /
32
+ * `Other` last is the established convention.
33
+ */
34
+ export declare const DEFAULT_EXTRACTION_CATEGORIES: readonly string[];
35
+ export interface CategoryValidationResult {
36
+ ok: boolean;
37
+ /** Non-empty when ok === false. One issue per detected problem. */
38
+ errors: string[];
39
+ /** The list as a caller should persist it, with `Other` appended
40
+ * if the user forgot it (the prompt always needs an "everything
41
+ * else" bucket). Only populated when ok === true. */
42
+ normalized?: readonly string[];
43
+ }
44
+ /**
45
+ * Validate a user-supplied category list. Returns a structured
46
+ * result rather than throwing — the CLI / slash-command callers
47
+ * surface error strings inline.
48
+ */
49
+ export declare function validateCategories(input: readonly string[]): CategoryValidationResult;
50
+ /**
51
+ * Resolve the categories the prompt + storage layer should use for a
52
+ * specific extraction. Falls back to the default list when no
53
+ * override is supplied or when the override fails validation.
54
+ */
55
+ export declare function resolveCategories(override: readonly string[] | undefined | null): readonly string[];
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Per-repo extraction category configuration (#1284).
3
+ *
4
+ * The `extract-learnings` MCP prompt produces a structured markdown
5
+ * document organized into category sections. The default category
6
+ * set is sensible for typical web/library OSS work; specialized
7
+ * repos (security-focused, performance-critical, accessibility-
8
+ * forward) benefit from a tailored taxonomy.
9
+ *
10
+ * This module is the single source of truth for:
11
+ * - The default category list.
12
+ * - Validation of custom category lists (non-empty, no duplicates,
13
+ * reasonable string lengths).
14
+ * - Resolution: given a repo's optional override, produce the list
15
+ * of categories the prompt and the storage layer should use.
16
+ *
17
+ * Pure typed helper — no I/O. Same architectural shape as the recent
18
+ * #1252 / #1264 / #1286 / #1279 / #1277 extractions.
19
+ *
20
+ * Out of scope (deferred per #1284):
21
+ * - Wiring `categories` into the `guidelines store` / `guidelines view`
22
+ * shape so the override persists alongside the markdown.
23
+ * - Updating the `extract-learnings` MCP prompt to consume the
24
+ * resolved category list at run time.
25
+ * - Detecting repo type from signals (SECURITY.md presence, repo
26
+ * topics, etc.) to suggest categories.
27
+ */
28
+ /**
29
+ * The default category list used by `extract-learnings` when no
30
+ * per-repo override is configured. Order matters: the prompt
31
+ * renders sections in this order, so `Code Style` first /
32
+ * `Other` last is the established convention.
33
+ */
34
+ export const DEFAULT_EXTRACTION_CATEGORIES = [
35
+ 'Code Style',
36
+ 'Process',
37
+ 'Architecture',
38
+ 'Testing',
39
+ 'Other',
40
+ ];
41
+ /**
42
+ * Reasonable bounds on an override list. The prompt produces output
43
+ * organized into one heading per category; very long lists become
44
+ * unreadable, very long category names blow out heading width.
45
+ */
46
+ const CATEGORY_NAME_MAX_LENGTH = 40;
47
+ const MAX_CATEGORIES = 12;
48
+ /**
49
+ * Validate a user-supplied category list. Returns a structured
50
+ * result rather than throwing — the CLI / slash-command callers
51
+ * surface error strings inline.
52
+ */
53
+ export function validateCategories(input) {
54
+ const errors = [];
55
+ if (input.length === 0) {
56
+ errors.push('At least one category is required.');
57
+ return { ok: false, errors };
58
+ }
59
+ if (input.length > MAX_CATEGORIES) {
60
+ errors.push(`At most ${MAX_CATEGORIES} categories are supported (received ${input.length}).`);
61
+ }
62
+ const seen = new Set();
63
+ const cleaned = [];
64
+ for (const raw of input) {
65
+ const trimmed = raw.trim();
66
+ if (trimmed.length === 0) {
67
+ errors.push('Category names cannot be empty or whitespace-only.');
68
+ continue;
69
+ }
70
+ if (trimmed.length > CATEGORY_NAME_MAX_LENGTH) {
71
+ errors.push(`Category "${trimmed.slice(0, 30)}…" is longer than ${CATEGORY_NAME_MAX_LENGTH} characters.`);
72
+ continue;
73
+ }
74
+ const lowerKey = trimmed.toLowerCase();
75
+ if (seen.has(lowerKey)) {
76
+ errors.push(`Duplicate category "${trimmed}" (case-insensitive).`);
77
+ continue;
78
+ }
79
+ seen.add(lowerKey);
80
+ cleaned.push(trimmed);
81
+ }
82
+ if (errors.length > 0) {
83
+ return { ok: false, errors };
84
+ }
85
+ // Always ensure an "Other" bucket exists at the end so the prompt
86
+ // has a place to put feedback that doesn't fit the user's chosen
87
+ // categories. Append silently if missing rather than treating the
88
+ // omission as an error — most users won't think to add it.
89
+ if (!seen.has('other')) {
90
+ cleaned.push('Other');
91
+ }
92
+ return { ok: true, errors: [], normalized: cleaned };
93
+ }
94
+ /**
95
+ * Resolve the categories the prompt + storage layer should use for a
96
+ * specific extraction. Falls back to the default list when no
97
+ * override is supplied or when the override fails validation.
98
+ */
99
+ export function resolveCategories(override) {
100
+ if (!override || override.length === 0) {
101
+ return DEFAULT_EXTRACTION_CATEGORIES;
102
+ }
103
+ const result = validateCategories(override);
104
+ if (!result.ok || !result.normalized) {
105
+ return DEFAULT_EXTRACTION_CATEGORIES;
106
+ }
107
+ return result.normalized;
108
+ }