@ryuenn3123/agentic-senior-core 3.0.50 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/.agent-context/review-checklists/pr-checklist.md +1 -0
  2. package/.agent-context/rules/api-docs.md +63 -47
  3. package/.agent-context/rules/architecture.md +133 -120
  4. package/.agent-context/rules/database-design.md +36 -18
  5. package/.agent-context/rules/docker-runtime.md +66 -43
  6. package/.agent-context/rules/efficiency-vs-hype.md +38 -17
  7. package/.agent-context/rules/error-handling.md +35 -16
  8. package/.agent-context/rules/event-driven.md +35 -18
  9. package/.agent-context/rules/frontend-architecture.md +103 -76
  10. package/.agent-context/rules/git-workflow.md +81 -197
  11. package/.agent-context/rules/microservices.md +42 -41
  12. package/.agent-context/rules/naming-conv.md +27 -8
  13. package/.agent-context/rules/performance.md +32 -12
  14. package/.agent-context/rules/realtime.md +26 -9
  15. package/.agent-context/rules/security.md +39 -20
  16. package/.agent-context/rules/testing.md +36 -16
  17. package/AGENTS.md +9 -9
  18. package/README.md +10 -1
  19. package/lib/cli/commands/init.mjs +1 -0
  20. package/lib/cli/compiler.mjs +1 -0
  21. package/lib/cli/detector/constants.mjs +135 -0
  22. package/lib/cli/detector/design-evidence/collector.mjs +256 -0
  23. package/lib/cli/detector/design-evidence/constants.mjs +39 -0
  24. package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
  25. package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
  26. package/lib/cli/detector/design-evidence/summary.mjs +109 -0
  27. package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
  28. package/lib/cli/detector/design-evidence.mjs +25 -610
  29. package/lib/cli/detector/stack-detection.mjs +243 -0
  30. package/lib/cli/detector/ui-signals.mjs +150 -0
  31. package/lib/cli/detector/workspace-scan.mjs +177 -0
  32. package/lib/cli/detector.mjs +20 -688
  33. package/lib/cli/memory-continuity.mjs +1 -0
  34. package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
  35. package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +116 -0
  36. package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
  37. package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
  38. package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
  39. package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +222 -0
  40. package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
  41. package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
  42. package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
  43. package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
  44. package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
  45. package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
  46. package/lib/cli/project-scaffolder/design-contract/validation.mjs +59 -896
  47. package/lib/cli/project-scaffolder/design-contract.mjs +147 -557
  48. package/mcp.json +30 -9
  49. package/package.json +17 -2
  50. package/scripts/audit-cache-layer-contract.mjs +258 -0
  51. package/scripts/audit-caching-scope-hygiene.mjs +263 -0
  52. package/scripts/audit-file-size.mjs +219 -0
  53. package/scripts/audit-reflection-citations.mjs +163 -0
  54. package/scripts/audit-release-bundle.mjs +170 -0
  55. package/scripts/audit-rule-id-uniqueness.mjs +313 -0
  56. package/scripts/benchmark-evidence-bundle.mjs +1 -0
  57. package/scripts/build-release-benchmark-bundle.mjs +204 -0
  58. package/scripts/context-triggered-audit.mjs +1 -0
  59. package/scripts/documentation-boundary-audit.mjs +1 -0
  60. package/scripts/explain-on-demand-audit.mjs +2 -1
  61. package/scripts/frontend-usability-audit.mjs +10 -10
  62. package/scripts/llm-judge/checklist-loader.mjs +45 -0
  63. package/scripts/llm-judge/constants.mjs +66 -0
  64. package/scripts/llm-judge/diff-collection.mjs +74 -0
  65. package/scripts/llm-judge/prompting.mjs +78 -0
  66. package/scripts/llm-judge/providers.mjs +111 -0
  67. package/scripts/llm-judge/verdict.mjs +134 -0
  68. package/scripts/llm-judge.mjs +21 -482
  69. package/scripts/mcp-server/tool-registry.mjs +55 -0
  70. package/scripts/mcp-server/tools.mjs +137 -1
  71. package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
  72. package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
  73. package/scripts/migrate-rule-format/render-new.mjs +169 -0
  74. package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
  75. package/scripts/migrate-rule-format.mjs +192 -0
  76. package/scripts/release-gate/constants.mjs +1 -1
  77. package/scripts/release-gate/static-checks.mjs +1 -1
  78. package/scripts/rules-guardian-audit.mjs +5 -2
  79. package/scripts/single-source-lazy-loading-audit.mjs +2 -1
  80. package/scripts/ui-design-judge/git-input.mjs +3 -0
  81. package/scripts/validate/config.mjs +3 -2
  82. package/scripts/validate/coverage-checks.mjs +1 -1
  83. package/scripts/validate.mjs +93 -1
@@ -1,7 +1,8 @@
1
1
  // @ts-check
2
+ // @file-size-exception: Standalone MCP dispatcher intentionally keeps copied tool handlers together; Phase 3 adds bounded rule-validation handlers here.
2
3
 
3
4
  import { existsSync } from 'node:fs';
4
- import { mkdir, readFile, writeFile } from 'node:fs/promises';
5
+ import { mkdir, readFile, readdir, writeFile } from 'node:fs/promises';
5
6
  import { spawn } from 'node:child_process';
6
7
  import { dirname, resolve, sep } from 'node:path';
7
8
  import {
@@ -18,6 +19,10 @@ import {
18
19
  TEST_SUITE_ARGS,
19
20
  } from './constants.mjs';
20
21
 
22
+ const RULES_DIRECTORY = resolve(REPOSITORY_ROOT, '.agent-context', 'rules');
23
+ const RULE_SECTION_HEADING_PATTERN = /^##\s+([A-Z]+-\d{3,4}(?:-[A-Z])?):\s+(.+)$/gm;
24
+ const RULE_ID_INPUT_PATTERN = /^[A-Z]+-\d{3,4}(?:-[A-Z])?$/;
25
+
21
26
  function buildCommandOutput(commandLabel, commandArguments, exitCode, stdoutContent, stderrContent) {
22
27
  const outputSections = [
23
28
  `Command: node ${commandArguments.join(' ')}`,
@@ -50,6 +55,125 @@ function buildJsonResult(payload, isError = false) {
50
55
  };
51
56
  }
52
57
 
58
+ function normalizeRuleId(rawRuleId) {
59
+ return typeof rawRuleId === 'string' ? rawRuleId.trim().toUpperCase() : '';
60
+ }
61
+
62
+ function normalizeRuleIdList(rawRuleIds) {
63
+ if (!Array.isArray(rawRuleIds)) {
64
+ return [];
65
+ }
66
+
67
+ return Array.from(new Set(rawRuleIds.map(normalizeRuleId).filter(Boolean)));
68
+ }
69
+
70
+ async function buildRuleSectionIndex() {
71
+ const index = new Map();
72
+ const filenames = (await readdir(RULES_DIRECTORY))
73
+ .filter((filename) => filename.endsWith('.md') && !filename.endsWith('.candidate.md'))
74
+ .sort();
75
+
76
+ for (const filename of filenames) {
77
+ const relativePath = `.agent-context/rules/${filename}`;
78
+ const sourceText = await readFile(resolve(RULES_DIRECTORY, filename), 'utf8');
79
+ const matches = [...sourceText.matchAll(RULE_SECTION_HEADING_PATTERN)];
80
+
81
+ for (let matchIndex = 0; matchIndex < matches.length; matchIndex += 1) {
82
+ const match = matches[matchIndex];
83
+ const nextMatch = matches[matchIndex + 1];
84
+ const sectionStart = match.index || 0;
85
+ const sectionEnd = nextMatch?.index ?? sourceText.length;
86
+ const ruleId = match[1];
87
+ index.set(ruleId, {
88
+ ruleId,
89
+ title: match[2].trim(),
90
+ path: relativePath,
91
+ content: sourceText.slice(sectionStart, sectionEnd).trim(),
92
+ });
93
+ }
94
+ }
95
+
96
+ return index;
97
+ }
98
+
99
+ async function runLookupRuleTool(toolArguments = {}) {
100
+ const ruleId = normalizeRuleId(toolArguments.ruleId);
101
+ if (!RULE_ID_INPUT_PATTERN.test(ruleId)) {
102
+ return buildJsonResult({
103
+ error: 'ruleId must use the stable <PREFIX>-NNN format.',
104
+ input: toolArguments.ruleId || null,
105
+ }, true);
106
+ }
107
+
108
+ const ruleIndex = await buildRuleSectionIndex();
109
+ const ruleEntry = ruleIndex.get(ruleId);
110
+ if (!ruleEntry) {
111
+ return buildJsonResult({
112
+ error: `Unknown rule ID: ${ruleId}`,
113
+ ruleId,
114
+ knownRuleCount: ruleIndex.size,
115
+ }, true);
116
+ }
117
+
118
+ return buildJsonResult({
119
+ found: true,
120
+ ...ruleEntry,
121
+ });
122
+ }
123
+
124
+ async function runValidateAgainstRulesTool(toolArguments = {}) {
125
+ const ruleIds = normalizeRuleIdList(toolArguments.ruleIds);
126
+ const ruleIndex = await buildRuleSectionIndex();
127
+ const invalidFormatIds = ruleIds.filter((ruleId) => !RULE_ID_INPUT_PATTERN.test(ruleId));
128
+ const unknownRuleIds = ruleIds.filter((ruleId) => RULE_ID_INPUT_PATTERN.test(ruleId) && !ruleIndex.has(ruleId));
129
+ const resolvedRules = ruleIds
130
+ .filter((ruleId) => ruleIndex.has(ruleId))
131
+ .map((ruleId) => {
132
+ const ruleEntry = ruleIndex.get(ruleId);
133
+ return {
134
+ ruleId,
135
+ title: ruleEntry.title,
136
+ path: ruleEntry.path,
137
+ };
138
+ });
139
+ const passed = ruleIds.length > 0 && invalidFormatIds.length === 0 && unknownRuleIds.length === 0;
140
+
141
+ return buildJsonResult({
142
+ passed,
143
+ checkedAt: new Date().toISOString(),
144
+ summary: typeof toolArguments.summary === 'string' ? toolArguments.summary.trim() || null : null,
145
+ ruleCount: ruleIds.length,
146
+ resolvedRules,
147
+ invalidFormatIds,
148
+ unknownRuleIds,
149
+ }, !passed);
150
+ }
151
+
152
+ async function runAuditComplianceTool(toolArguments = {}) {
153
+ const validationResult = await runValidateAgainstRulesTool(toolArguments);
154
+ const validationPayload = JSON.parse(validationResult.content[0].text);
155
+ const scope = typeof toolArguments.scope === 'string' ? toolArguments.scope.trim().toLowerCase() : '';
156
+ const warnings = [];
157
+
158
+ if (!scope) {
159
+ warnings.push({
160
+ kind: 'scope.missing',
161
+ detail: 'Provide scope when checking whether cited rules match a changed boundary.',
162
+ });
163
+ }
164
+
165
+ return buildJsonResult({
166
+ auditName: 'mcp-audit-compliance',
167
+ reportVersion: '1.0.0',
168
+ generatedAt: new Date().toISOString(),
169
+ scope: scope || null,
170
+ passed: validationPayload.passed,
171
+ failureCount: validationPayload.passed ? 0 : validationPayload.invalidFormatIds.length + validationPayload.unknownRuleIds.length,
172
+ warnings,
173
+ ruleValidation: validationPayload,
174
+ }, !validationPayload.passed);
175
+ }
176
+
53
177
  function normalizePlainText(rawText) {
54
178
  return rawText
55
179
  .replace(/<script[\s\S]*?<\/script>/gi, ' ')
@@ -418,6 +542,18 @@ export async function executeToolCall(toolName, toolArguments = {}) {
418
542
  return runNodeCommand('forbidden_content_check', ['./scripts/forbidden-content-check.mjs']);
419
543
  }
420
544
 
545
+ if (toolName === 'lookup_rule') {
546
+ return runLookupRuleTool(toolArguments);
547
+ }
548
+
549
+ if (toolName === 'validate_against_rules') {
550
+ return runValidateAgainstRulesTool(toolArguments);
551
+ }
552
+
553
+ if (toolName === 'audit_compliance') {
554
+ return runAuditComplianceTool(toolArguments);
555
+ }
556
+
421
557
  if (toolName === 'research_fetch') {
422
558
  return runResearchFetchTool(toolArguments);
423
559
  }
@@ -0,0 +1,37 @@
1
+ // @ts-check
2
+
3
+ /**
4
+ * Locked ID prefix table per `docs/plan/format-spec.md` section 3.
5
+ * The migration helper reads this map to assign frontmatter and section IDs.
6
+ * Lock new entries here when adding a new rule file; never invent prefixes inline.
7
+ */
8
+
9
+ export const ID_PREFIX_TABLE = Object.freeze({
10
+ 'api-docs.md': { prefix: 'API', domain: 'api-docs', priority: 'high', scope: 'backend', appliesTo: ['backend', 'fullstack'] },
11
+ 'architecture.md': { prefix: 'ARCH', domain: 'architecture', priority: 'critical', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
12
+ 'database-design.md': { prefix: 'DATA', domain: 'database-design', priority: 'high', scope: 'data', appliesTo: ['backend', 'fullstack'] },
13
+ 'docker-runtime.md': { prefix: 'DOCK', domain: 'docker-runtime', priority: 'high', scope: 'infra', appliesTo: ['backend', 'frontend', 'fullstack'] },
14
+ 'efficiency-vs-hype.md': { prefix: 'DEP', domain: 'efficiency-vs-hype', priority: 'medium', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
15
+ 'error-handling.md': { prefix: 'ERR', domain: 'error-handling', priority: 'high', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
16
+ 'event-driven.md': { prefix: 'EVT', domain: 'event-driven', priority: 'medium', scope: 'backend', appliesTo: ['backend', 'fullstack'] },
17
+ 'frontend-architecture.md': { prefix: 'FE', domain: 'frontend-architecture', priority: 'high', scope: 'ui', appliesTo: ['frontend', 'fullstack'] },
18
+ 'git-workflow.md': { prefix: 'GIT', domain: 'git-workflow', priority: 'medium', scope: 'governance', appliesTo: ['backend', 'frontend', 'fullstack'] },
19
+ 'microservices.md': { prefix: 'SVC', domain: 'microservices', priority: 'medium', scope: 'backend', appliesTo: ['backend', 'fullstack'] },
20
+ 'naming-conv.md': { prefix: 'NAME', domain: 'naming-conv', priority: 'medium', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
21
+ 'performance.md': { prefix: 'PERF', domain: 'performance', priority: 'medium', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
22
+ 'realtime.md': { prefix: 'RT', domain: 'realtime', priority: 'medium', scope: 'backend', appliesTo: ['backend', 'fullstack'] },
23
+ 'security.md': { prefix: 'SEC', domain: 'security', priority: 'critical', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
24
+ 'testing.md': { prefix: 'TEST', domain: 'testing', priority: 'high', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
25
+ });
26
+
27
+ /**
28
+ * @param {string} filename
29
+ * @returns {{ prefix: string, domain: string, priority: string, scope: string, appliesTo: string[] }}
30
+ */
31
+ export function getPrefixEntry(filename) {
32
+ const entry = ID_PREFIX_TABLE[filename];
33
+ if (!entry) {
34
+ throw new Error(`Unknown rule file '${filename}'. Add it to ID_PREFIX_TABLE before migrating.`);
35
+ }
36
+ return entry;
37
+ }
@@ -0,0 +1,180 @@
1
+ // @ts-check
2
+
3
+ /**
4
+ * Best-effort parser for the legacy v3 rule file format. Extracts:
5
+ * - the H1 title
6
+ * - an optional intro paragraph (1-3 sentences before the first H2)
7
+ * - a list of sections, each with H2 title + ordered content blocks
8
+ *
9
+ * Each content block is one of:
10
+ * { kind: 'paragraph', text }
11
+ * { kind: 'bullet-list', items: string[] }
12
+ * { kind: 'sub-bullet-list', items: string[] } // legacy nested bullets
13
+ *
14
+ * The parser intentionally throws on shapes it cannot represent in the new
15
+ * format. This forces the human migrator to review unusual sections instead of
16
+ * silently losing content.
17
+ */
18
+
19
+ /**
20
+ * @typedef {{ kind: 'paragraph', text: string }} ParagraphBlock
21
+ * @typedef {{ kind: 'bullet-list', items: string[] }} BulletListBlock
22
+ * @typedef {ParagraphBlock | BulletListBlock} ContentBlock
23
+ *
24
+ * @typedef {{
25
+ * title: string,
26
+ * blocks: ContentBlock[],
27
+ * }} ParsedSection
28
+ *
29
+ * @typedef {{
30
+ * h1Title: string,
31
+ * introParagraph: string | null,
32
+ * sections: ParsedSection[],
33
+ * warnings: string[],
34
+ * }} ParsedRuleFile
35
+ */
36
+
37
+ /**
38
+ * @param {string} sourceText
39
+ * @returns {ParsedRuleFile}
40
+ */
41
+ export function parseLegacyRuleFile(sourceText) {
42
+ const lines = sourceText.replace(/\r\n/g, '\n').split('\n');
43
+ const warnings = [];
44
+ const isH2 = (line) => line.startsWith('## ');
45
+ const isH1 = (line) => line.startsWith('# ');
46
+ const isColonSectionLabel = (line) => /^[A-Z][^:\n]+:$/.test(line.trim());
47
+
48
+ let cursor = 0;
49
+ while (cursor < lines.length && lines[cursor].trim() === '') {
50
+ cursor += 1;
51
+ }
52
+
53
+ const h1Match = (lines[cursor] || '').match(/^#\s+(.+)$/);
54
+ if (!h1Match) {
55
+ throw new Error('Legacy file missing top-level H1 heading at the first non-empty line.');
56
+ }
57
+ const h1Title = h1Match[1].trim();
58
+ cursor += 1;
59
+
60
+ while (cursor < lines.length && lines[cursor].trim() === '') {
61
+ cursor += 1;
62
+ }
63
+
64
+ let introParagraph = null;
65
+ if (cursor < lines.length && !lines[cursor].startsWith('## ') && !lines[cursor].startsWith('# ')) {
66
+ const introLines = [];
67
+ while (cursor < lines.length && !lines[cursor].startsWith('## ') && !lines[cursor].startsWith('# ')) {
68
+ const line = lines[cursor];
69
+ if (line.trim() === '' && introLines.length > 0) {
70
+ break;
71
+ }
72
+ if (line.trim() !== '') {
73
+ introLines.push(line.trim());
74
+ }
75
+ cursor += 1;
76
+ }
77
+ if (introLines.length > 0) {
78
+ introParagraph = introLines.join(' ').trim();
79
+ const sentenceCount = (introParagraph.match(/[.!?](?=\s|$)/g) || []).length;
80
+ if (sentenceCount > 3) {
81
+ warnings.push(`Intro paragraph has ${sentenceCount} sentences (max 3 per format spec). Trim or split during manual review.`);
82
+ }
83
+ }
84
+ }
85
+
86
+ while (cursor < lines.length && lines[cursor].trim() === '') {
87
+ cursor += 1;
88
+ }
89
+
90
+ /** @type {ParsedSection[]} */
91
+ const sections = [];
92
+ while (cursor < lines.length) {
93
+ while (cursor < lines.length && lines[cursor].trim() === '') {
94
+ cursor += 1;
95
+ }
96
+ if (cursor >= lines.length) {
97
+ break;
98
+ }
99
+
100
+ let sectionTitle = '';
101
+ if (isH2(lines[cursor])) {
102
+ sectionTitle = lines[cursor].slice(3).trim();
103
+ cursor += 1;
104
+ } else if (isColonSectionLabel(lines[cursor])) {
105
+ sectionTitle = lines[cursor].trim().replace(/:$/, '');
106
+ cursor += 1;
107
+ } else if (!isH1(lines[cursor])) {
108
+ sectionTitle = sections.length === 0 ? 'General Guidance' : 'Boundary Summary';
109
+ } else {
110
+ cursor += 1;
111
+ continue;
112
+ }
113
+
114
+ /** @type {ContentBlock[]} */
115
+ const blocks = [];
116
+ while (cursor < lines.length && !isH2(lines[cursor]) && !isH1(lines[cursor]) && !isColonSectionLabel(lines[cursor])) {
117
+ const line = lines[cursor];
118
+
119
+ if (line.trim() === '') {
120
+ cursor += 1;
121
+ continue;
122
+ }
123
+
124
+ if (/^\s*-\s+/.test(line)) {
125
+ const items = [];
126
+ let nestedItems = [];
127
+ while (cursor < lines.length && (/^\s*-\s+/.test(lines[cursor]) || lines[cursor].trim() === '' || /^\s{2,}\S/.test(lines[cursor]))) {
128
+ const bulletLine = lines[cursor];
129
+ if (bulletLine.trim() === '') {
130
+ cursor += 1;
131
+ if (cursor < lines.length && !/^\s*-\s+/.test(lines[cursor])) {
132
+ break;
133
+ }
134
+ continue;
135
+ }
136
+ const topMatch = bulletLine.match(/^-\s+(.+)$/);
137
+ const nestedMatch = bulletLine.match(/^\s{2,}-\s+(.+)$/);
138
+ const continuationMatch = bulletLine.match(/^\s{2,}(\S.+)$/);
139
+ if (topMatch) {
140
+ if (nestedItems.length > 0 && items.length > 0) {
141
+ items[items.length - 1] += `\n ${nestedItems.map((nested) => `- ${nested}`).join('\n ')}`;
142
+ nestedItems = [];
143
+ }
144
+ items.push(topMatch[1].trim());
145
+ } else if (nestedMatch) {
146
+ nestedItems.push(nestedMatch[1].trim());
147
+ } else if (continuationMatch && items.length > 0) {
148
+ items[items.length - 1] += ` ${continuationMatch[1].trim()}`;
149
+ } else {
150
+ break;
151
+ }
152
+ cursor += 1;
153
+ }
154
+ if (nestedItems.length > 0 && items.length > 0) {
155
+ items[items.length - 1] += `\n ${nestedItems.map((nested) => `- ${nested}`).join('\n ')}`;
156
+ }
157
+ blocks.push({ kind: 'bullet-list', items });
158
+ continue;
159
+ }
160
+
161
+ const paragraphLines = [];
162
+ while (
163
+ cursor < lines.length
164
+ && lines[cursor].trim() !== ''
165
+ && !isH2(lines[cursor])
166
+ && !isH1(lines[cursor])
167
+ && !isColonSectionLabel(lines[cursor])
168
+ && !/^\s*-\s+/.test(lines[cursor])
169
+ ) {
170
+ paragraphLines.push(lines[cursor].trim());
171
+ cursor += 1;
172
+ }
173
+ blocks.push({ kind: 'paragraph', text: paragraphLines.join(' ').trim() });
174
+ }
175
+
176
+ sections.push({ title: sectionTitle, blocks });
177
+ }
178
+
179
+ return { h1Title, introParagraph, sections, warnings };
180
+ }
@@ -0,0 +1,169 @@
1
+ // @ts-check
2
+
3
+ /**
4
+ * Renders a parsed legacy rule file plus a prefix-table entry into the v4
5
+ * canonical format defined in `docs/plan/format-spec.md`.
6
+ *
7
+ * Section IDs auto-assign sequentially starting at 001. The renderer never
8
+ * skips integers; humans introduce gaps manually during review by editing
9
+ * the produced file (e.g. when expecting later splits).
10
+ *
11
+ * Each parsed bullet-list becomes one numbered item if it has 1-2 items, or a
12
+ * single numbered item with sub-bullets when the list is enumerative (3+
13
+ * items that share the same shape).
14
+ */
15
+
16
+ import { stringify as stringifyYaml } from 'yaml';
17
+
18
+ function pickKeywords(parsedRuleFile, prefixEntry) {
19
+ // Hand-picked first: the file's id_prefix lowercased and the domain itself
20
+ // are always relevant. Additional keywords are drawn from the highest-signal
21
+ // kebab-case tokens in the H1 + section titles, capped at 6 total. The
22
+ // validate gate snippet checks accept either body presence or this array, so
23
+ // we prioritize tokens that appear in section titles (more likely to be
24
+ // queried) over tokens buried in paragraphs.
25
+ const handPicked = new Set([prefixEntry.domain, prefixEntry.prefix.toLowerCase()]);
26
+ const titleSignal = parsedRuleFile.h1Title + ' ' + parsedRuleFile.sections.map((section) => section.title).join(' ');
27
+ for (const word of titleSignal.toLowerCase().match(/[a-z][a-z0-9]+(?:-[a-z0-9]+)*/g) ?? []) {
28
+ if (word.length >= 4 && word.length <= 32 && handPicked.size < 6) {
29
+ handPicked.add(word);
30
+ }
31
+ }
32
+ return [...handPicked];
33
+ }
34
+
35
+ function renderFrontmatter(prefixEntry, parsedRuleFile) {
36
+ // Trimmed v4 frontmatter (per phase-1-format.md GATE B revision):
37
+ // - drop `version` for first-time-v1 files (only meaningful when bumped)
38
+ // - drop `last_migrated` (git history is the audit trail)
39
+ // - cap `keywords` at 6 hand-picked entries instead of 12 auto-extracted
40
+ const frontmatterObject = {
41
+ id_prefix: prefixEntry.prefix,
42
+ domain: prefixEntry.domain,
43
+ priority: prefixEntry.priority,
44
+ scope: prefixEntry.scope,
45
+ applies_to: [...prefixEntry.appliesTo],
46
+ keywords: pickKeywords(parsedRuleFile, prefixEntry),
47
+ };
48
+ const yamlBody = stringifyYaml(frontmatterObject, { lineWidth: 0 }).trimEnd();
49
+ return `---\n${yamlBody}\n---\n`;
50
+ }
51
+
52
+ function renderIntroParagraph(parsedRuleFile) {
53
+ if (!parsedRuleFile.introParagraph) return '';
54
+ return `${parsedRuleFile.introParagraph}\n\n`;
55
+ }
56
+
57
+ // Common abbreviations that end with a period but are not sentence endings.
58
+ // Mid-sentence occurrences like "etc. The next..." would otherwise be split
59
+ // at the abbreviation. Pre-masking is the cheapest fix and is easy to extend.
60
+ const NON_SENTENCE_ENDING_ABBREVIATIONS = Object.freeze(['e.g', 'i.e', 'etc', 'vs', 'cf', 'Mr', 'Dr', 'Mrs', 'Inc', 'Ltd']);
61
+ const ABBREVIATION_MASK_TOKEN = '\u0001';
62
+
63
+ function maskAbbreviationPeriods(paragraphText) {
64
+ let masked = paragraphText;
65
+ for (const abbreviation of NON_SENTENCE_ENDING_ABBREVIATIONS) {
66
+ const escaped = abbreviation.replace(/\./g, '\\.');
67
+ masked = masked.replace(new RegExp(`\\b${escaped}\\.`, 'g'), `${abbreviation}${ABBREVIATION_MASK_TOKEN}`);
68
+ }
69
+ return masked;
70
+ }
71
+
72
+ function unmaskAbbreviationPeriods(text) {
73
+ return text.replace(new RegExp(ABBREVIATION_MASK_TOKEN, 'g'), '.');
74
+ }
75
+
76
+ export function paragraphSplitsIntoDirectives(paragraphText) {
77
+ // A `.` `!` or `?` ends a sentence only when it is followed by whitespace
78
+ // and an uppercase letter, a backtick (next clause starts with `code`), or
79
+ // an opening parenthesis. This rule recognizes:
80
+ // - file paths "docs/DESIGN.md" period + lowercase, no whitespace -> not a boundary
81
+ // - dotted versions "v1.5", "2.0.0" period + digit -> not a boundary
82
+ // - domain literals "example.com" period + lowercase -> not a boundary
83
+ // - abbreviations "e.g.", "i.e.", "etc." pre-masked so their internal periods do not split
84
+ // Everything else is treated as sentence-final.
85
+ const masked = maskAbbreviationPeriods(paragraphText);
86
+ const SENTENCE_BOUNDARY = /([.!?])\s+(?=[A-Z`(])/g;
87
+ const sentences = [];
88
+ let cursor = 0;
89
+ for (const match of masked.matchAll(SENTENCE_BOUNDARY)) {
90
+ const sentenceEnd = match.index + match[1].length;
91
+ sentences.push(unmaskAbbreviationPeriods(masked.slice(cursor, sentenceEnd)).trim());
92
+ cursor = match.index + match[0].length;
93
+ }
94
+ const tail = unmaskAbbreviationPeriods(masked.slice(cursor)).trim();
95
+ if (tail.length > 0) {
96
+ sentences.push(tail);
97
+ }
98
+ return sentences.filter((sentence) => sentence.length > 0);
99
+ }
100
+
101
+ function renderBlockAsNumberedItem(block) {
102
+ if (block.kind === 'paragraph') {
103
+ return paragraphSplitsIntoDirectives(block.text);
104
+ }
105
+
106
+ // Each bullet becomes its own numbered directive. The format spec allows
107
+ // sub-bullets only as supporting detail under one parent directive, never
108
+ // as a way to compress an enumerative list into a single item. Keeping them
109
+ // as numbered items preserves citability (each becomes a sub-ID candidate
110
+ // during manual review) and matches the worked example in section 6.2.
111
+ return [...block.items];
112
+ }
113
+
114
+ function buildSectionBody(blocks) {
115
+ const numberedDirectives = [];
116
+ for (const block of blocks) {
117
+ const directives = renderBlockAsNumberedItem(block);
118
+ for (const directive of directives) {
119
+ numberedDirectives.push(directive);
120
+ }
121
+ }
122
+ return numberedDirectives;
123
+ }
124
+
125
+ /**
126
+ * @param {{ prefix: string, domain: string, priority: string, scope: string, appliesTo: string[] }} prefixEntry
127
+ * @param {ReturnType<typeof import('./parse-legacy.mjs').parseLegacyRuleFile>} parsedRuleFile
128
+ * @returns {{ rendered: string, sectionAssignments: Array<{ sectionTitle: string, sectionId: string, itemCount: number }>, warnings: string[] }}
129
+ */
130
+ export function renderNewFormat(prefixEntry, parsedRuleFile) {
131
+ const warnings = [...parsedRuleFile.warnings];
132
+ const renderedParts = [];
133
+ renderedParts.push(renderFrontmatter(prefixEntry, parsedRuleFile));
134
+ renderedParts.push('\n');
135
+ renderedParts.push(`# ${parsedRuleFile.h1Title}\n\n`);
136
+ renderedParts.push(renderIntroParagraph(parsedRuleFile));
137
+
138
+ const sectionAssignments = [];
139
+ parsedRuleFile.sections.forEach((section, sectionIndex) => {
140
+ const sectionId = `${prefixEntry.prefix}-${String(sectionIndex + 1).padStart(3, '0')}`;
141
+ const numberedItems = buildSectionBody(section.blocks);
142
+ if (numberedItems.length > 12) {
143
+ warnings.push(
144
+ `Section "${section.title}" has ${numberedItems.length} numbered items. Format spec caps at 12; split into two sections during manual review.`,
145
+ );
146
+ }
147
+ if (numberedItems.length === 0) {
148
+ warnings.push(`Section "${section.title}" produced no numbered items. Manual review required.`);
149
+ }
150
+
151
+ renderedParts.push(`## ${sectionId}: ${section.title}\n\n`);
152
+ numberedItems.forEach((directive, itemIndex) => {
153
+ renderedParts.push(`${itemIndex + 1}. ${directive}\n`);
154
+ });
155
+ renderedParts.push('\n');
156
+
157
+ sectionAssignments.push({
158
+ sectionTitle: section.title,
159
+ sectionId,
160
+ itemCount: numberedItems.length,
161
+ });
162
+ });
163
+
164
+ return {
165
+ rendered: renderedParts.join('').replace(/\n{3,}/g, '\n\n').trimEnd() + '\n',
166
+ sectionAssignments,
167
+ warnings,
168
+ };
169
+ }
@@ -0,0 +1,89 @@
1
+ // @ts-check
2
+
3
+ /**
4
+ * Roundtrip substance validator.
5
+ *
6
+ * After rendering the new format, we extract the substantial-word set from
7
+ * both the original v3 file and the rendered v4 file, then compute set overlap.
8
+ * Drop in overlap below the threshold means the migration almost certainly
9
+ * lost real content; the helper surfaces the lost words so the human migrator
10
+ * can decide whether the loss is intentional (renamed terms) or a bug.
11
+ */
12
+
13
+ const STOPWORD_SET = new Set([
14
+ 'the', 'and', 'for', 'with', 'that', 'this', 'from', 'into', 'are', 'was',
15
+ 'were', 'has', 'have', 'had', 'not', 'but', 'can', 'will', 'must', 'use',
16
+ 'used', 'using', 'when', 'then', 'than', 'they', 'their', 'them', 'who',
17
+ 'what', 'why', 'how', 'all', 'any', 'one', 'two', 'three', 'four', 'five',
18
+ 'six', 'seven', 'eight', 'nine', 'ten', 'each', 'such', 'some', 'most',
19
+ 'more', 'less', 'only', 'also', 'just', 'over', 'under', 'between', 'across',
20
+ 'before', 'after', 'because', 'while', 'until', 'unless', 'within',
21
+ 'without', 'inside', 'outside', 'about', 'around', 'against', 'through',
22
+ 'throughout', 'during', 'including', 'include', 'includes', 'see', 'note',
23
+ 'rule', 'rules', 'agent', 'agents', 'project', 'repo', 'code', 'file',
24
+ 'files', 'item', 'items', 'list', 'lists', 'thing', 'things', 'value',
25
+ 'values', 'should', 'shall', 'may', 'might', 'could', 'would', 'does',
26
+ 'doing', 'done', 'make', 'makes', 'making', 'made', 'set', 'sets',
27
+ ]);
28
+
29
+ function tokenize(text) {
30
+ const lowered = text.toLowerCase();
31
+ // Strip fenced code blocks first (multi-line ``` ... ``` spans).
32
+ const noFenced = lowered.replace(/```[\s\S]*?```/g, ' ');
33
+ // Strip inline code spans, but only within a single line so that an
34
+ // unmatched backtick on a code-heavy line cannot eat the rest of the file.
35
+ const noInline = noFenced.replace(/`[^`\n]+`/g, ' ');
36
+ const words = noInline.match(/[a-z][a-z0-9]+(?:-[a-z0-9]+)*/g) ?? [];
37
+ return words.filter((word) => word.length >= 4 && !STOPWORD_SET.has(word));
38
+ }
39
+
40
+ /**
41
+ * @param {string} originalSourceText
42
+ * @param {string} renderedSourceText
43
+ * @param {{ minimumOverlapPercent?: number }} [options]
44
+ * @returns {{
45
+ * passed: boolean,
46
+ * originalSubstantialWordCount: number,
47
+ * renderedSubstantialWordCount: number,
48
+ * overlapPercent: number,
49
+ * lostWords: string[],
50
+ * newWords: string[],
51
+ * minimumRequired: number,
52
+ * }}
53
+ */
54
+ export function roundtripSubstanceCheck(originalSourceText, renderedSourceText, options = {}) {
55
+ const minimumOverlapPercent = options.minimumOverlapPercent ?? 95;
56
+ const originalWordCounts = new Map();
57
+ for (const word of tokenize(originalSourceText)) {
58
+ originalWordCounts.set(word, (originalWordCounts.get(word) || 0) + 1);
59
+ }
60
+ const renderedWordSet = new Set(tokenize(renderedSourceText));
61
+
62
+ const lostWords = [];
63
+ let preservedDistinctWordCount = 0;
64
+ for (const [word, count] of originalWordCounts.entries()) {
65
+ if (renderedWordSet.has(word)) {
66
+ preservedDistinctWordCount += 1;
67
+ } else {
68
+ lostWords.push(`${word} (x${count})`);
69
+ }
70
+ }
71
+
72
+ const originalDistinctCount = originalWordCounts.size;
73
+ const overlapPercent = originalDistinctCount > 0
74
+ ? (preservedDistinctWordCount / originalDistinctCount) * 100
75
+ : 100;
76
+
77
+ const originalWordSet = new Set(originalWordCounts.keys());
78
+ const newWords = [...renderedWordSet].filter((word) => !originalWordSet.has(word));
79
+
80
+ return {
81
+ passed: overlapPercent >= minimumOverlapPercent,
82
+ originalSubstantialWordCount: originalDistinctCount,
83
+ renderedSubstantialWordCount: renderedWordSet.size,
84
+ overlapPercent: Math.round(overlapPercent * 100) / 100,
85
+ lostWords: lostWords.sort().slice(0, 50),
86
+ newWords: newWords.sort().slice(0, 50),
87
+ minimumRequired: minimumOverlapPercent,
88
+ };
89
+ }