npm - @ryuenn3123/agentic-senior-core - Versions diffs - 3.0.50 → 4.0.0 - Mend

@ryuenn3123/agentic-senior-core 3.0.50 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

package/.agent-context/review-checklists/pr-checklist.md +1 -0
package/.agent-context/rules/api-docs.md +63 -47
package/.agent-context/rules/architecture.md +133 -120
package/.agent-context/rules/database-design.md +36 -18
package/.agent-context/rules/docker-runtime.md +66 -43
package/.agent-context/rules/efficiency-vs-hype.md +38 -17
package/.agent-context/rules/error-handling.md +35 -16
package/.agent-context/rules/event-driven.md +35 -18
package/.agent-context/rules/frontend-architecture.md +103 -76
package/.agent-context/rules/git-workflow.md +81 -197
package/.agent-context/rules/microservices.md +42 -41
package/.agent-context/rules/naming-conv.md +27 -8
package/.agent-context/rules/performance.md +32 -12
package/.agent-context/rules/realtime.md +26 -9
package/.agent-context/rules/security.md +39 -20
package/.agent-context/rules/testing.md +36 -16
package/AGENTS.md +9 -9
package/README.md +10 -1
package/lib/cli/commands/init.mjs +1 -0
package/lib/cli/compiler.mjs +1 -0
package/lib/cli/detector/constants.mjs +135 -0
package/lib/cli/detector/design-evidence/collector.mjs +256 -0
package/lib/cli/detector/design-evidence/constants.mjs +39 -0
package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
package/lib/cli/detector/design-evidence/summary.mjs +109 -0
package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
package/lib/cli/detector/design-evidence.mjs +25 -610
package/lib/cli/detector/stack-detection.mjs +243 -0
package/lib/cli/detector/ui-signals.mjs +150 -0
package/lib/cli/detector/workspace-scan.mjs +177 -0
package/lib/cli/detector.mjs +20 -688
package/lib/cli/memory-continuity.mjs +1 -0
package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +116 -0
package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +222 -0
package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
package/lib/cli/project-scaffolder/design-contract/validation.mjs +59 -896
package/lib/cli/project-scaffolder/design-contract.mjs +147 -557
package/mcp.json +30 -9
package/package.json +17 -2
package/scripts/audit-cache-layer-contract.mjs +258 -0
package/scripts/audit-caching-scope-hygiene.mjs +263 -0
package/scripts/audit-file-size.mjs +219 -0
package/scripts/audit-reflection-citations.mjs +163 -0
package/scripts/audit-release-bundle.mjs +170 -0
package/scripts/audit-rule-id-uniqueness.mjs +313 -0
package/scripts/benchmark-evidence-bundle.mjs +1 -0
package/scripts/build-release-benchmark-bundle.mjs +204 -0
package/scripts/context-triggered-audit.mjs +1 -0
package/scripts/documentation-boundary-audit.mjs +1 -0
package/scripts/explain-on-demand-audit.mjs +2 -1
package/scripts/frontend-usability-audit.mjs +10 -10
package/scripts/llm-judge/checklist-loader.mjs +45 -0
package/scripts/llm-judge/constants.mjs +66 -0
package/scripts/llm-judge/diff-collection.mjs +74 -0
package/scripts/llm-judge/prompting.mjs +78 -0
package/scripts/llm-judge/providers.mjs +111 -0
package/scripts/llm-judge/verdict.mjs +134 -0
package/scripts/llm-judge.mjs +21 -482
package/scripts/mcp-server/tool-registry.mjs +55 -0
package/scripts/mcp-server/tools.mjs +137 -1
package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
package/scripts/migrate-rule-format/render-new.mjs +169 -0
package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
package/scripts/migrate-rule-format.mjs +192 -0
package/scripts/release-gate/constants.mjs +1 -1
package/scripts/release-gate/static-checks.mjs +1 -1
package/scripts/rules-guardian-audit.mjs +5 -2
package/scripts/single-source-lazy-loading-audit.mjs +2 -1
package/scripts/ui-design-judge/git-input.mjs +3 -0
package/scripts/validate/config.mjs +3 -2
package/scripts/validate/coverage-checks.mjs +1 -1
package/scripts/validate.mjs +93 -1

package/scripts/mcp-server/tools.mjs CHANGED Viewed

@@ -1,7 +1,8 @@
 // @ts-check
+// @file-size-exception: Standalone MCP dispatcher intentionally keeps copied tool handlers together; Phase 3 adds bounded rule-validation handlers here.
 import { existsSync } from 'node:fs';
-import { mkdir, readFile, writeFile } from 'node:fs/promises';
+import { mkdir, readFile, readdir, writeFile } from 'node:fs/promises';
 import { spawn } from 'node:child_process';
 import { dirname, resolve, sep } from 'node:path';
 import {
@@ -18,6 +19,10 @@ import {
   TEST_SUITE_ARGS,
 } from './constants.mjs';
+const RULES_DIRECTORY = resolve(REPOSITORY_ROOT, '.agent-context', 'rules');
+const RULE_SECTION_HEADING_PATTERN = /^##\s+([A-Z]+-\d{3,4}(?:-[A-Z])?):\s+(.+)$/gm;
+const RULE_ID_INPUT_PATTERN = /^[A-Z]+-\d{3,4}(?:-[A-Z])?$/;
 function buildCommandOutput(commandLabel, commandArguments, exitCode, stdoutContent, stderrContent) {
   const outputSections = [
     `Command: node ${commandArguments.join(' ')}`,
@@ -50,6 +55,125 @@ function buildJsonResult(payload, isError = false) {
   };
 }
+function normalizeRuleId(rawRuleId) {
+  return typeof rawRuleId === 'string' ? rawRuleId.trim().toUpperCase() : '';
+}
+function normalizeRuleIdList(rawRuleIds) {
+  if (!Array.isArray(rawRuleIds)) {
+    return [];
+  }
+  return Array.from(new Set(rawRuleIds.map(normalizeRuleId).filter(Boolean)));
+}
+async function buildRuleSectionIndex() {
+  const index = new Map();
+  const filenames = (await readdir(RULES_DIRECTORY))
+    .filter((filename) => filename.endsWith('.md') && !filename.endsWith('.candidate.md'))
+    .sort();
+  for (const filename of filenames) {
+    const relativePath = `.agent-context/rules/${filename}`;
+    const sourceText = await readFile(resolve(RULES_DIRECTORY, filename), 'utf8');
+    const matches = [...sourceText.matchAll(RULE_SECTION_HEADING_PATTERN)];
+    for (let matchIndex = 0; matchIndex < matches.length; matchIndex += 1) {
+      const match = matches[matchIndex];
+      const nextMatch = matches[matchIndex + 1];
+      const sectionStart = match.index || 0;
+      const sectionEnd = nextMatch?.index ?? sourceText.length;
+      const ruleId = match[1];
+      index.set(ruleId, {
+        ruleId,
+        title: match[2].trim(),
+        path: relativePath,
+        content: sourceText.slice(sectionStart, sectionEnd).trim(),
+      });
+    }
+  }
+  return index;
+}
+async function runLookupRuleTool(toolArguments = {}) {
+  const ruleId = normalizeRuleId(toolArguments.ruleId);
+  if (!RULE_ID_INPUT_PATTERN.test(ruleId)) {
+    return buildJsonResult({
+      error: 'ruleId must use the stable <PREFIX>-NNN format.',
+      input: toolArguments.ruleId || null,
+    }, true);
+  }
+  const ruleIndex = await buildRuleSectionIndex();
+  const ruleEntry = ruleIndex.get(ruleId);
+  if (!ruleEntry) {
+    return buildJsonResult({
+      error: `Unknown rule ID: ${ruleId}`,
+      ruleId,
+      knownRuleCount: ruleIndex.size,
+    }, true);
+  }
+  return buildJsonResult({
+    found: true,
+    ...ruleEntry,
+  });
+}
+async function runValidateAgainstRulesTool(toolArguments = {}) {
+  const ruleIds = normalizeRuleIdList(toolArguments.ruleIds);
+  const ruleIndex = await buildRuleSectionIndex();
+  const invalidFormatIds = ruleIds.filter((ruleId) => !RULE_ID_INPUT_PATTERN.test(ruleId));
+  const unknownRuleIds = ruleIds.filter((ruleId) => RULE_ID_INPUT_PATTERN.test(ruleId) && !ruleIndex.has(ruleId));
+  const resolvedRules = ruleIds
+    .filter((ruleId) => ruleIndex.has(ruleId))
+    .map((ruleId) => {
+      const ruleEntry = ruleIndex.get(ruleId);
+      return {
+        ruleId,
+        title: ruleEntry.title,
+        path: ruleEntry.path,
+      };
+    });
+  const passed = ruleIds.length > 0 && invalidFormatIds.length === 0 && unknownRuleIds.length === 0;
+  return buildJsonResult({
+    passed,
+    checkedAt: new Date().toISOString(),
+    summary: typeof toolArguments.summary === 'string' ? toolArguments.summary.trim() || null : null,
+    ruleCount: ruleIds.length,
+    resolvedRules,
+    invalidFormatIds,
+    unknownRuleIds,
+  }, !passed);
+}
+async function runAuditComplianceTool(toolArguments = {}) {
+  const validationResult = await runValidateAgainstRulesTool(toolArguments);
+  const validationPayload = JSON.parse(validationResult.content[0].text);
+  const scope = typeof toolArguments.scope === 'string' ? toolArguments.scope.trim().toLowerCase() : '';
+  const warnings = [];
+  if (!scope) {
+    warnings.push({
+      kind: 'scope.missing',
+      detail: 'Provide scope when checking whether cited rules match a changed boundary.',
+    });
+  }
+  return buildJsonResult({
+    auditName: 'mcp-audit-compliance',
+    reportVersion: '1.0.0',
+    generatedAt: new Date().toISOString(),
+    scope: scope || null,
+    passed: validationPayload.passed,
+    failureCount: validationPayload.passed ? 0 : validationPayload.invalidFormatIds.length + validationPayload.unknownRuleIds.length,
+    warnings,
+    ruleValidation: validationPayload,
+  }, !validationPayload.passed);
+}
 function normalizePlainText(rawText) {
   return rawText
     .replace(/<script[\s\S]*?<\/script>/gi, ' ')
@@ -418,6 +542,18 @@ export async function executeToolCall(toolName, toolArguments = {}) {
     return runNodeCommand('forbidden_content_check', ['./scripts/forbidden-content-check.mjs']);
   }
+  if (toolName === 'lookup_rule') {
+    return runLookupRuleTool(toolArguments);
+  }
+  if (toolName === 'validate_against_rules') {
+    return runValidateAgainstRulesTool(toolArguments);
+  }
+  if (toolName === 'audit_compliance') {
+    return runAuditComplianceTool(toolArguments);
+  }
   if (toolName === 'research_fetch') {
     return runResearchFetchTool(toolArguments);
   }

package/scripts/migrate-rule-format/id-prefix-table.mjs ADDED Viewed

@@ -0,0 +1,37 @@
+// @ts-check
+/**
+ * Locked ID prefix table per `docs/plan/format-spec.md` section 3.
+ * The migration helper reads this map to assign frontmatter and section IDs.
+ * Lock new entries here when adding a new rule file; never invent prefixes inline.
+ */
+export const ID_PREFIX_TABLE = Object.freeze({
+  'api-docs.md': { prefix: 'API', domain: 'api-docs', priority: 'high', scope: 'backend', appliesTo: ['backend', 'fullstack'] },
+  'architecture.md': { prefix: 'ARCH', domain: 'architecture', priority: 'critical', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
+  'database-design.md': { prefix: 'DATA', domain: 'database-design', priority: 'high', scope: 'data', appliesTo: ['backend', 'fullstack'] },
+  'docker-runtime.md': { prefix: 'DOCK', domain: 'docker-runtime', priority: 'high', scope: 'infra', appliesTo: ['backend', 'frontend', 'fullstack'] },
+  'efficiency-vs-hype.md': { prefix: 'DEP', domain: 'efficiency-vs-hype', priority: 'medium', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
+  'error-handling.md': { prefix: 'ERR', domain: 'error-handling', priority: 'high', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
+  'event-driven.md': { prefix: 'EVT', domain: 'event-driven', priority: 'medium', scope: 'backend', appliesTo: ['backend', 'fullstack'] },
+  'frontend-architecture.md': { prefix: 'FE', domain: 'frontend-architecture', priority: 'high', scope: 'ui', appliesTo: ['frontend', 'fullstack'] },
+  'git-workflow.md': { prefix: 'GIT', domain: 'git-workflow', priority: 'medium', scope: 'governance', appliesTo: ['backend', 'frontend', 'fullstack'] },
+  'microservices.md': { prefix: 'SVC', domain: 'microservices', priority: 'medium', scope: 'backend', appliesTo: ['backend', 'fullstack'] },
+  'naming-conv.md': { prefix: 'NAME', domain: 'naming-conv', priority: 'medium', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
+  'performance.md': { prefix: 'PERF', domain: 'performance', priority: 'medium', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
+  'realtime.md': { prefix: 'RT', domain: 'realtime', priority: 'medium', scope: 'backend', appliesTo: ['backend', 'fullstack'] },
+  'security.md': { prefix: 'SEC', domain: 'security', priority: 'critical', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
+  'testing.md': { prefix: 'TEST', domain: 'testing', priority: 'high', scope: 'all-tasks', appliesTo: ['backend', 'frontend', 'fullstack'] },
+});
+/**
+ * @param {string} filename
+ * @returns {{ prefix: string, domain: string, priority: string, scope: string, appliesTo: string[] }}
+ */
+export function getPrefixEntry(filename) {
+  const entry = ID_PREFIX_TABLE[filename];
+  if (!entry) {
+    throw new Error(`Unknown rule file '${filename}'. Add it to ID_PREFIX_TABLE before migrating.`);
+  }
+  return entry;
+}

package/scripts/migrate-rule-format/parse-legacy.mjs ADDED Viewed

@@ -0,0 +1,180 @@
+// @ts-check
+/**
+ * Best-effort parser for the legacy v3 rule file format. Extracts:
+ *   - the H1 title
+ *   - an optional intro paragraph (1-3 sentences before the first H2)
+ *   - a list of sections, each with H2 title + ordered content blocks
+ *
+ * Each content block is one of:
+ *   { kind: 'paragraph', text }
+ *   { kind: 'bullet-list', items: string[] }
+ *   { kind: 'sub-bullet-list', items: string[] }  // legacy nested bullets
+ *
+ * The parser intentionally throws on shapes it cannot represent in the new
+ * format. This forces the human migrator to review unusual sections instead of
+ * silently losing content.
+ */
+/**
+ * @typedef {{ kind: 'paragraph', text: string }} ParagraphBlock
+ * @typedef {{ kind: 'bullet-list', items: string[] }} BulletListBlock
+ * @typedef {ParagraphBlock | BulletListBlock} ContentBlock
+ *
+ * @typedef {{
+ *   title: string,
+ *   blocks: ContentBlock[],
+ * }} ParsedSection
+ *
+ * @typedef {{
+ *   h1Title: string,
+ *   introParagraph: string | null,
+ *   sections: ParsedSection[],
+ *   warnings: string[],
+ * }} ParsedRuleFile
+ */
+/**
+ * @param {string} sourceText
+ * @returns {ParsedRuleFile}
+ */
+export function parseLegacyRuleFile(sourceText) {
+  const lines = sourceText.replace(/\r\n/g, '\n').split('\n');
+  const warnings = [];
+  const isH2 = (line) => line.startsWith('## ');
+  const isH1 = (line) => line.startsWith('# ');
+  const isColonSectionLabel = (line) => /^[A-Z][^:\n]+:$/.test(line.trim());
+  let cursor = 0;
+  while (cursor < lines.length && lines[cursor].trim() === '') {
+    cursor += 1;
+  }
+  const h1Match = (lines[cursor] || '').match(/^#\s+(.+)$/);
+  if (!h1Match) {
+    throw new Error('Legacy file missing top-level H1 heading at the first non-empty line.');
+  }
+  const h1Title = h1Match[1].trim();
+  cursor += 1;
+  while (cursor < lines.length && lines[cursor].trim() === '') {
+    cursor += 1;
+  }
+  let introParagraph = null;
+  if (cursor < lines.length && !lines[cursor].startsWith('## ') && !lines[cursor].startsWith('# ')) {
+    const introLines = [];
+    while (cursor < lines.length && !lines[cursor].startsWith('## ') && !lines[cursor].startsWith('# ')) {
+      const line = lines[cursor];
+      if (line.trim() === '' && introLines.length > 0) {
+        break;
+      }
+      if (line.trim() !== '') {
+        introLines.push(line.trim());
+      }
+      cursor += 1;
+    }
+    if (introLines.length > 0) {
+      introParagraph = introLines.join(' ').trim();
+      const sentenceCount = (introParagraph.match(/[.!?](?=\s|$)/g) || []).length;
+      if (sentenceCount > 3) {
+        warnings.push(`Intro paragraph has ${sentenceCount} sentences (max 3 per format spec). Trim or split during manual review.`);
+      }
+    }
+  }
+  while (cursor < lines.length && lines[cursor].trim() === '') {
+    cursor += 1;
+  }
+  /** @type {ParsedSection[]} */
+  const sections = [];
+  while (cursor < lines.length) {
+    while (cursor < lines.length && lines[cursor].trim() === '') {
+      cursor += 1;
+    }
+    if (cursor >= lines.length) {
+      break;
+    }
+    let sectionTitle = '';
+    if (isH2(lines[cursor])) {
+      sectionTitle = lines[cursor].slice(3).trim();
+      cursor += 1;
+    } else if (isColonSectionLabel(lines[cursor])) {
+      sectionTitle = lines[cursor].trim().replace(/:$/, '');
+      cursor += 1;
+    } else if (!isH1(lines[cursor])) {
+      sectionTitle = sections.length === 0 ? 'General Guidance' : 'Boundary Summary';
+    } else {
+      cursor += 1;
+      continue;
+    }
+    /** @type {ContentBlock[]} */
+    const blocks = [];
+    while (cursor < lines.length && !isH2(lines[cursor]) && !isH1(lines[cursor]) && !isColonSectionLabel(lines[cursor])) {
+      const line = lines[cursor];
+      if (line.trim() === '') {
+        cursor += 1;
+        continue;
+      }
+      if (/^\s*-\s+/.test(line)) {
+        const items = [];
+        let nestedItems = [];
+        while (cursor < lines.length && (/^\s*-\s+/.test(lines[cursor]) || lines[cursor].trim() === '' || /^\s{2,}\S/.test(lines[cursor]))) {
+          const bulletLine = lines[cursor];
+          if (bulletLine.trim() === '') {
+            cursor += 1;
+            if (cursor < lines.length && !/^\s*-\s+/.test(lines[cursor])) {
+              break;
+            }
+            continue;
+          }
+          const topMatch = bulletLine.match(/^-\s+(.+)$/);
+          const nestedMatch = bulletLine.match(/^\s{2,}-\s+(.+)$/);
+          const continuationMatch = bulletLine.match(/^\s{2,}(\S.+)$/);
+          if (topMatch) {
+            if (nestedItems.length > 0 && items.length > 0) {
+              items[items.length - 1] += `\n  ${nestedItems.map((nested) => `- ${nested}`).join('\n  ')}`;
+              nestedItems = [];
+            }
+            items.push(topMatch[1].trim());
+          } else if (nestedMatch) {
+            nestedItems.push(nestedMatch[1].trim());
+          } else if (continuationMatch && items.length > 0) {
+            items[items.length - 1] += ` ${continuationMatch[1].trim()}`;
+          } else {
+            break;
+          }
+          cursor += 1;
+        }
+        if (nestedItems.length > 0 && items.length > 0) {
+          items[items.length - 1] += `\n  ${nestedItems.map((nested) => `- ${nested}`).join('\n  ')}`;
+        }
+        blocks.push({ kind: 'bullet-list', items });
+        continue;
+      }
+      const paragraphLines = [];
+      while (
+        cursor < lines.length
+        && lines[cursor].trim() !== ''
+        && !isH2(lines[cursor])
+        && !isH1(lines[cursor])
+        && !isColonSectionLabel(lines[cursor])
+        && !/^\s*-\s+/.test(lines[cursor])
+      ) {
+        paragraphLines.push(lines[cursor].trim());
+        cursor += 1;
+      }
+      blocks.push({ kind: 'paragraph', text: paragraphLines.join(' ').trim() });
+    }
+    sections.push({ title: sectionTitle, blocks });
+  }
+  return { h1Title, introParagraph, sections, warnings };
+}

package/scripts/migrate-rule-format/render-new.mjs ADDED Viewed

@@ -0,0 +1,169 @@
+// @ts-check
+/**
+ * Renders a parsed legacy rule file plus a prefix-table entry into the v4
+ * canonical format defined in `docs/plan/format-spec.md`.
+ *
+ * Section IDs auto-assign sequentially starting at 001. The renderer never
+ * skips integers; humans introduce gaps manually during review by editing
+ * the produced file (e.g. when expecting later splits).
+ *
+ * Each parsed bullet-list becomes one numbered item if it has 1-2 items, or a
+ * single numbered item with sub-bullets when the list is enumerative (3+
+ * items that share the same shape).
+ */
+import { stringify as stringifyYaml } from 'yaml';
+function pickKeywords(parsedRuleFile, prefixEntry) {
+  // Hand-picked first: the file's id_prefix lowercased and the domain itself
+  // are always relevant. Additional keywords are drawn from the highest-signal
+  // kebab-case tokens in the H1 + section titles, capped at 6 total. The
+  // validate gate snippet checks accept either body presence or this array, so
+  // we prioritize tokens that appear in section titles (more likely to be
+  // queried) over tokens buried in paragraphs.
+  const handPicked = new Set([prefixEntry.domain, prefixEntry.prefix.toLowerCase()]);
+  const titleSignal = parsedRuleFile.h1Title + ' ' + parsedRuleFile.sections.map((section) => section.title).join(' ');
+  for (const word of titleSignal.toLowerCase().match(/[a-z][a-z0-9]+(?:-[a-z0-9]+)*/g) ?? []) {
+    if (word.length >= 4 && word.length <= 32 && handPicked.size < 6) {
+      handPicked.add(word);
+    }
+  }
+  return [...handPicked];
+}
+function renderFrontmatter(prefixEntry, parsedRuleFile) {
+  // Trimmed v4 frontmatter (per phase-1-format.md GATE B revision):
+  //   - drop `version` for first-time-v1 files (only meaningful when bumped)
+  //   - drop `last_migrated` (git history is the audit trail)
+  //   - cap `keywords` at 6 hand-picked entries instead of 12 auto-extracted
+  const frontmatterObject = {
+    id_prefix: prefixEntry.prefix,
+    domain: prefixEntry.domain,
+    priority: prefixEntry.priority,
+    scope: prefixEntry.scope,
+    applies_to: [...prefixEntry.appliesTo],
+    keywords: pickKeywords(parsedRuleFile, prefixEntry),
+  };
+  const yamlBody = stringifyYaml(frontmatterObject, { lineWidth: 0 }).trimEnd();
+  return `---\n${yamlBody}\n---\n`;
+}
+function renderIntroParagraph(parsedRuleFile) {
+  if (!parsedRuleFile.introParagraph) return '';
+  return `${parsedRuleFile.introParagraph}\n\n`;
+}
+// Common abbreviations that end with a period but are not sentence endings.
+// Mid-sentence occurrences like "etc. The next..." would otherwise be split
+// at the abbreviation. Pre-masking is the cheapest fix and is easy to extend.
+const NON_SENTENCE_ENDING_ABBREVIATIONS = Object.freeze(['e.g', 'i.e', 'etc', 'vs', 'cf', 'Mr', 'Dr', 'Mrs', 'Inc', 'Ltd']);
+const ABBREVIATION_MASK_TOKEN = '\u0001';
+function maskAbbreviationPeriods(paragraphText) {
+  let masked = paragraphText;
+  for (const abbreviation of NON_SENTENCE_ENDING_ABBREVIATIONS) {
+    const escaped = abbreviation.replace(/\./g, '\\.');
+    masked = masked.replace(new RegExp(`\\b${escaped}\\.`, 'g'), `${abbreviation}${ABBREVIATION_MASK_TOKEN}`);
+  }
+  return masked;
+}
+function unmaskAbbreviationPeriods(text) {
+  return text.replace(new RegExp(ABBREVIATION_MASK_TOKEN, 'g'), '.');
+}
+export function paragraphSplitsIntoDirectives(paragraphText) {
+  // A `.` `!` or `?` ends a sentence only when it is followed by whitespace
+  // and an uppercase letter, a backtick (next clause starts with `code`), or
+  // an opening parenthesis. This rule recognizes:
+  //   - file paths     "docs/DESIGN.md"        period + lowercase, no whitespace -> not a boundary
+  //   - dotted versions "v1.5", "2.0.0"        period + digit -> not a boundary
+  //   - domain literals "example.com"          period + lowercase -> not a boundary
+  //   - abbreviations  "e.g.", "i.e.", "etc."  pre-masked so their internal periods do not split
+  // Everything else is treated as sentence-final.
+  const masked = maskAbbreviationPeriods(paragraphText);
+  const SENTENCE_BOUNDARY = /([.!?])\s+(?=[A-Z`(])/g;
+  const sentences = [];
+  let cursor = 0;
+  for (const match of masked.matchAll(SENTENCE_BOUNDARY)) {
+    const sentenceEnd = match.index + match[1].length;
+    sentences.push(unmaskAbbreviationPeriods(masked.slice(cursor, sentenceEnd)).trim());
+    cursor = match.index + match[0].length;
+  }
+  const tail = unmaskAbbreviationPeriods(masked.slice(cursor)).trim();
+  if (tail.length > 0) {
+    sentences.push(tail);
+  }
+  return sentences.filter((sentence) => sentence.length > 0);
+}
+function renderBlockAsNumberedItem(block) {
+  if (block.kind === 'paragraph') {
+    return paragraphSplitsIntoDirectives(block.text);
+  }
+  // Each bullet becomes its own numbered directive. The format spec allows
+  // sub-bullets only as supporting detail under one parent directive, never
+  // as a way to compress an enumerative list into a single item. Keeping them
+  // as numbered items preserves citability (each becomes a sub-ID candidate
+  // during manual review) and matches the worked example in section 6.2.
+  return [...block.items];
+}
+function buildSectionBody(blocks) {
+  const numberedDirectives = [];
+  for (const block of blocks) {
+    const directives = renderBlockAsNumberedItem(block);
+    for (const directive of directives) {
+      numberedDirectives.push(directive);
+    }
+  }
+  return numberedDirectives;
+}
+/**
+ * @param {{ prefix: string, domain: string, priority: string, scope: string, appliesTo: string[] }} prefixEntry
+ * @param {ReturnType<typeof import('./parse-legacy.mjs').parseLegacyRuleFile>} parsedRuleFile
+ * @returns {{ rendered: string, sectionAssignments: Array<{ sectionTitle: string, sectionId: string, itemCount: number }>, warnings: string[] }}
+ */
+export function renderNewFormat(prefixEntry, parsedRuleFile) {
+  const warnings = [...parsedRuleFile.warnings];
+  const renderedParts = [];
+  renderedParts.push(renderFrontmatter(prefixEntry, parsedRuleFile));
+  renderedParts.push('\n');
+  renderedParts.push(`# ${parsedRuleFile.h1Title}\n\n`);
+  renderedParts.push(renderIntroParagraph(parsedRuleFile));
+  const sectionAssignments = [];
+  parsedRuleFile.sections.forEach((section, sectionIndex) => {
+    const sectionId = `${prefixEntry.prefix}-${String(sectionIndex + 1).padStart(3, '0')}`;
+    const numberedItems = buildSectionBody(section.blocks);
+    if (numberedItems.length > 12) {
+      warnings.push(
+        `Section "${section.title}" has ${numberedItems.length} numbered items. Format spec caps at 12; split into two sections during manual review.`,
+      );
+    }
+    if (numberedItems.length === 0) {
+      warnings.push(`Section "${section.title}" produced no numbered items. Manual review required.`);
+    }
+    renderedParts.push(`## ${sectionId}: ${section.title}\n\n`);
+    numberedItems.forEach((directive, itemIndex) => {
+      renderedParts.push(`${itemIndex + 1}. ${directive}\n`);
+    });
+    renderedParts.push('\n');
+    sectionAssignments.push({
+      sectionTitle: section.title,
+      sectionId,
+      itemCount: numberedItems.length,
+    });
+  });
+  return {
+    rendered: renderedParts.join('').replace(/\n{3,}/g, '\n\n').trimEnd() + '\n',
+    sectionAssignments,
+    warnings,
+  };
+}

package/scripts/migrate-rule-format/roundtrip-validate.mjs ADDED Viewed

@@ -0,0 +1,89 @@
+// @ts-check
+/**
+ * Roundtrip substance validator.
+ *
+ * After rendering the new format, we extract the substantial-word set from
+ * both the original v3 file and the rendered v4 file, then compute set overlap.
+ * Drop in overlap below the threshold means the migration almost certainly
+ * lost real content; the helper surfaces the lost words so the human migrator
+ * can decide whether the loss is intentional (renamed terms) or a bug.
+ */
+const STOPWORD_SET = new Set([
+  'the', 'and', 'for', 'with', 'that', 'this', 'from', 'into', 'are', 'was',
+  'were', 'has', 'have', 'had', 'not', 'but', 'can', 'will', 'must', 'use',
+  'used', 'using', 'when', 'then', 'than', 'they', 'their', 'them', 'who',
+  'what', 'why', 'how', 'all', 'any', 'one', 'two', 'three', 'four', 'five',
+  'six', 'seven', 'eight', 'nine', 'ten', 'each', 'such', 'some', 'most',
+  'more', 'less', 'only', 'also', 'just', 'over', 'under', 'between', 'across',
+  'before', 'after', 'because', 'while', 'until', 'unless', 'within',
+  'without', 'inside', 'outside', 'about', 'around', 'against', 'through',
+  'throughout', 'during', 'including', 'include', 'includes', 'see', 'note',
+  'rule', 'rules', 'agent', 'agents', 'project', 'repo', 'code', 'file',
+  'files', 'item', 'items', 'list', 'lists', 'thing', 'things', 'value',
+  'values', 'should', 'shall', 'may', 'might', 'could', 'would', 'does',
+  'doing', 'done', 'make', 'makes', 'making', 'made', 'set', 'sets',
+]);
+function tokenize(text) {
+  const lowered = text.toLowerCase();
+  // Strip fenced code blocks first (multi-line ``` ... ``` spans).
+  const noFenced = lowered.replace(/```[\s\S]*?```/g, ' ');
+  // Strip inline code spans, but only within a single line so that an
+  // unmatched backtick on a code-heavy line cannot eat the rest of the file.
+  const noInline = noFenced.replace(/`[^`\n]+`/g, ' ');
+  const words = noInline.match(/[a-z][a-z0-9]+(?:-[a-z0-9]+)*/g) ?? [];
+  return words.filter((word) => word.length >= 4 && !STOPWORD_SET.has(word));
+}
+/**
+ * @param {string} originalSourceText
+ * @param {string} renderedSourceText
+ * @param {{ minimumOverlapPercent?: number }} [options]
+ * @returns {{
+ *   passed: boolean,
+ *   originalSubstantialWordCount: number,
+ *   renderedSubstantialWordCount: number,
+ *   overlapPercent: number,
+ *   lostWords: string[],
+ *   newWords: string[],
+ *   minimumRequired: number,
+ * }}
+ */
+export function roundtripSubstanceCheck(originalSourceText, renderedSourceText, options = {}) {
+  const minimumOverlapPercent = options.minimumOverlapPercent ?? 95;
+  const originalWordCounts = new Map();
+  for (const word of tokenize(originalSourceText)) {
+    originalWordCounts.set(word, (originalWordCounts.get(word) || 0) + 1);
+  }
+  const renderedWordSet = new Set(tokenize(renderedSourceText));
+  const lostWords = [];
+  let preservedDistinctWordCount = 0;
+  for (const [word, count] of originalWordCounts.entries()) {
+    if (renderedWordSet.has(word)) {
+      preservedDistinctWordCount += 1;
+    } else {
+      lostWords.push(`${word} (x${count})`);
+    }
+  }
+  const originalDistinctCount = originalWordCounts.size;
+  const overlapPercent = originalDistinctCount > 0
+    ? (preservedDistinctWordCount / originalDistinctCount) * 100
+    : 100;
+  const originalWordSet = new Set(originalWordCounts.keys());
+  const newWords = [...renderedWordSet].filter((word) => !originalWordSet.has(word));
+  return {
+    passed: overlapPercent >= minimumOverlapPercent,
+    originalSubstantialWordCount: originalDistinctCount,
+    renderedSubstantialWordCount: renderedWordSet.size,
+    overlapPercent: Math.round(overlapPercent * 100) / 100,
+    lostWords: lostWords.sort().slice(0, 50),
+    newWords: newWords.sort().slice(0, 50),
+    minimumRequired: minimumOverlapPercent,
+  };
+}