npm - dravoice - Versions diffs - 0.1.3 → 0.1.4 - Mend

dravoice 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +32 -9
package/package.json +1 -1
package/src/index.js +106 -13
package/src/v2/analyzers/discourse.js +7 -1
package/src/v2/analyzers/evidence.js +3 -3
package/src/v2/analyzers/register.js +28 -4
package/src/v2/analyzers/rhetorical-shape.js +7 -1
package/src/v2/analyzers/structure.js +18 -1
package/src/v2/benchmark.js +83 -0
package/src/v2/doctor.js +308 -0
package/src/v2/document-model.js +77 -6
package/src/v2/inspect.js +2 -2
package/src/v2/profile.js +126 -11
package/src/v2/review.js +142 -16
package/src/v2/revise-plan.js +111 -8
package/src/v2/stylometry.js +11 -7

package/README.md CHANGED Viewed

@@ -38,18 +38,22 @@ files before it learns from them.
 If your writing is already somewhere else, point Dravoice at that directory:
 ```bash
+npx dravoice doctor --examples ~/writing
 npx dravoice init --examples ~/writing
 ```
-### 1. Initialize and Inspect
+### 1. Check, Initialize, and Inspect
 ```bash
+npx dravoice doctor
 npx dravoice init
 npx dravoice inspect
 ```
-Inspect the profile before trusting it. If the feature-family summaries do not
-look recognizable, improve the source corpus first.
+Run `doctor` before learning so missing folders, unsupported files, weak
+corpora, duplicate-looking files, and length-imbalanced source sets get clear
+next steps. Inspect the profile before trusting it. If the feature-family
+summaries do not look recognizable, improve the source corpus first.
 ### 2. Generate Guidance and a Brief
@@ -68,12 +72,13 @@ npx dravoice revise-plan draft.md
 npx dravoice review draft.md
 ```
-`init` reads your source pieces, writes a local profile in `./dravoice-voice`,
-and writes `.dravoice.yml` project defaults. `inspect` makes the learned
-feature families visible, `prompt` turns high-confidence observations into
-drafting guidance, `brief` creates an evidence-first article plan,
-`revise-plan` ranks calibrated stylometric revision actions, and `review`
-reports family-level drift.
+`doctor` checks corpus readiness, `init` reads your source pieces, writes a
+local profile in `./dravoice-voice`, and writes `.dravoice.yml` project
+defaults. `inspect` makes the learned feature families visible, `prompt` turns
+high-confidence observations into drafting guidance, `brief` creates an
+evidence-first article plan, `revise-plan` ranks calibrated stylometric
+revision actions, and `review` reports family-level drift with calibration
+confidence.
 Run `drav help init` or `drav help review` for command-specific help.
@@ -90,9 +95,27 @@ workflow:
 ```bash
 npx dravoice benchmark prepare --examples ./articles --topic "A new article topic" --out ./bench-run --seed 42
+npx dravoice benchmark prepare-many --examples ./articles --topic "A new article topic" --out ./bench-runs --runs 3 --seed 42
 npx dravoice benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json
 ```
+Benchmark reports include deterministic margins and repeat-run cautions. A
+single run is directional evidence only. Score output includes suggested
+follow-up seeds for repeated validation runs.
+## Trust Boundaries
+Dravoice profiles include per-family calibration diagnostics: threshold
+observations, stability, minimum-evidence checks, and whether a family is usable
+for findings. Weak corpora produce cautious guidance. Strict review can surface
+document-level discourse, lexical, register, and structure drift, but the
+findings remain revision guidance, not authorship proof.
+V2 profiles expose marker-set register metadata, mixed-register warnings,
+heading-depth and section-order structure signals, paragraph-localized
+revision actions, expanded MDX scaffold filtering before analysis, and
+library-level custom register marker sets for project-specific scoring.
 ## Fresh Install Smoke Test
 From a packed tarball:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "dravoice",
-  "version": "0.1.3",
+  "version": "0.1.4",
   "description": "Compile article voice profiles into reusable LLM writing context, evidence-first briefs, and deterministic draft review notes.",
   "type": "module",
   "bin": {

package/src/index.js CHANGED Viewed

@@ -3,11 +3,13 @@ import path from "node:path";
 import readline from "node:readline";
 import {
   prepareVoiceBenchmark,
+  prepareVoiceBenchmarkRuns,
   renderBenchmarkReport,
   scoreVoiceBenchmark,
 } from "./v2/benchmark.js";
 import { renderVoiceBriefV2, voiceArticleBriefV2 } from "./v2/brief.js";
 import { renderInspectV2 } from "./v2/inspect.js";
+import { diagnoseVoiceCorpusV2, renderCorpusDoctorV2 } from "./v2/doctor.js";
 import { learnVoicePackV2, loadVoicePackV2 } from "./v2/profile.js";
 import { voicePromptPackV2 } from "./v2/prompt.js";
 import { renderVoiceReviewV2, reviewVoiceDraftV2 } from "./v2/review.js";
@@ -27,13 +29,15 @@ import {
 export {
   learnVoicePackV2 as learnVoicePack,
   loadVoicePackV2 as loadVoicePack,
+  diagnoseVoiceCorpusV2 as diagnoseVoiceCorpus,
   revisePlanDraftV2 as revisePlanDraft,
   reviewVoiceDraftV2 as reviewVoiceDraft,
   voicePromptPackV2 as voicePromptPack,
 };
 export { renderInspectV2, renderRevisePlanV2 as renderRevisePlan, renderVoiceReviewV2 as renderVoiceReview };
 export { renderVoiceBriefV2 as renderVoiceBrief, voiceArticleBriefV2 as voiceArticleBrief };
-export { prepareVoiceBenchmark, renderBenchmarkReport, scoreVoiceBenchmark };
+export { renderCorpusDoctorV2 as renderCorpusDoctor };
+export { prepareVoiceBenchmark, prepareVoiceBenchmarkRuns, renderBenchmarkReport, scoreVoiceBenchmark };
 const INIT_DISCOVERY_DIRS = [
   "./articles",
@@ -128,6 +132,24 @@ export async function runCli(args, io) {
       return 0;
     }
+    if (command === "doctor") {
+      const { options, positional } = parseArgs(rest, ["examples", "format"], "doctor");
+      rejectPositionals(positional, "doctor");
+      const config = loadProjectConfig(io.cwd);
+      const examples = options.examples ?? config.examples ?? DEFAULT_EXAMPLES_DIR;
+      const format = formatOption(options.format, ["text", "json"], "doctor");
+      const result = diagnoseVoiceCorpusV2({
+        examplesDir: examples,
+        cwd: io.cwd,
+      });
+      if (format === "json") {
+        io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
+      } else {
+        io.stdout.write(renderCorpusDoctorV2(result));
+      }
+      return result.exitCode;
+    }
     if (command === "review") {
       const { options, positional } = parseArgs(rest, ["voice", "mode", "format"], "review");
       rejectUnexpectedPositionals(positional, 1, "review positional");
@@ -139,7 +161,7 @@ export async function runCli(args, io) {
       }
       const result = reviewVoiceDraftV2({
         file: resolvePath(io.cwd, file),
-        voice: loadVoicePackV2(resolveVoicePath(io.cwd, options.voice)),
+        voice: loadVoiceForCommand(io.cwd, options.voice, "review"),
         cwd: io.cwd,
         mode,
       });
@@ -162,7 +184,7 @@ export async function runCli(args, io) {
       }
       const result = revisePlanDraftV2({
         file: resolvePath(io.cwd, file),
-        voice: loadVoicePackV2(resolveVoicePath(io.cwd, options.voice)),
+        voice: loadVoiceForCommand(io.cwd, options.voice, "revise-plan"),
         cwd: io.cwd,
       });
       if (format === "json") {
@@ -180,7 +202,7 @@ export async function runCli(args, io) {
       const promptOptions = resolvePromptOptions(io.cwd, options);
       const format = formatOption(promptOptions.format, ["agents", "claude", "system"], "prompt");
       const rendered = voicePromptPackV2({
-        voice: loadVoicePackV2(resolveVoicePath(io.cwd, options.voice)),
+        voice: loadVoiceForCommand(io.cwd, options.voice, "prompt"),
         format,
         outPath: promptOptions.out ? resolvePath(io.cwd, promptOptions.out) : undefined,
       });
@@ -197,7 +219,7 @@ export async function runCli(args, io) {
       const { options, positional } = parseArgs(rest, ["voice", "topic", "evidence", "format", "out"], "brief");
       const format = formatOption(options.format, ["text", "json"], "brief");
       const result = voiceArticleBriefV2({
-        voice: loadVoicePackV2(resolveVoicePath(io.cwd, options.voice)),
+        voice: loadVoiceForCommand(io.cwd, options.voice, "brief"),
         topic: topicOption(options, positional, "brief"),
         evidence: options.evidence ? resolvePath(io.cwd, options.evidence) : undefined,
         cwd: io.cwd,
@@ -220,7 +242,7 @@ export async function runCli(args, io) {
     if (command === "inspect") {
       const { options, positional } = parseArgs(rest, ["voice"], "inspect");
       rejectPositionals(positional, "inspect");
-      const profile = loadVoicePackV2(resolveVoicePath(io.cwd, options.voice));
+      const profile = loadVoiceForCommand(io.cwd, options.voice, "inspect");
       io.stdout.write(renderInspectV2(profile));
       io.stdout.write("Next: drav prompt --out AGENTS.md\n");
       return 0;
@@ -263,6 +285,20 @@ function runBenchmarkCli(args, io) {
     return 0;
   }
+  if (benchmarkCommand === "prepare-many") {
+    const { options, positional } = parseArgs(rest, ["examples", "topic", "out", "seed", "runs"], "benchmark prepare-many");
+    rejectPositionals(positional, "benchmark prepare-many");
+    const result = prepareVoiceBenchmarkRuns({
+      examplesDir: resolvePath(io.cwd, requiredOption(options, "examples", "benchmark prepare-many")),
+      topic: requiredOption(options, "topic", "benchmark prepare-many"),
+      outDir: resolvePath(io.cwd, requiredOption(options, "out", "benchmark prepare-many")),
+      seed: options.seed ?? "1",
+      runs: options.runs ?? "3",
+    });
+    io.stdout.write(`Prepared ${result.runs.length} benchmark run(s) at ${resolvePath(io.cwd, requiredOption(options, "out", "benchmark prepare-many"))}.\n`);
+    return 0;
+  }
   if (benchmarkCommand === "score") {
     const { options, positional } = parseArgs(rest, ["run", "judge", "format"], "benchmark score");
     rejectPositionals(positional, "benchmark score");
@@ -665,6 +701,33 @@ function resolveVoicePath(cwd, optionValue) {
   return cwd;
 }
+function loadVoiceForCommand(cwd, optionValue, command) {
+  const voicePath = resolveVoicePath(cwd, optionValue);
+  try {
+    return loadVoicePackV2(voicePath);
+  } catch (error) {
+    if (/^No Dravoice V2 profile found/.test(error.message)) {
+      throw usageError(missingVoiceProfileMessage(command, voicePath), command);
+    }
+    throw error;
+  }
+}
+function missingVoiceProfileMessage(command, voicePath) {
+  return [
+    "No Dravoice voice profile found.",
+    `Looked for: ${displayPath(voicePath)}/profile.json`,
+    "",
+    "Run `drav doctor` first to check whether your writing folder is ready.",
+    "Then run `drav init` to create ./dravoice-voice/profile.json and .dravoice.yml.",
+    command === "inspect"
+      ? "After that, run `drav inspect` again."
+      : "After that, rerun this command.",
+    "",
+    "Next: drav init",
+  ].join("\n");
+}
 function resolvePromptOptions(cwd, options) {
   const config = loadProjectConfig(cwd);
   return {
@@ -694,20 +757,23 @@ function helpText() {
     "Dravoice - local-first voice guidance for writers",
     "",
     "First run:",
-    "1. Initialize your voice profile",
+    "1. Check your writing folder",
+    "   drav doctor",
+    "2. Initialize your voice profile",
     "   drav init",
-    "2. Inspect the profile before trusting it",
+    "3. Inspect the profile before trusting it",
     "   drav inspect",
-    "3. Generate reusable drafting guidance",
+    "4. Generate reusable drafting guidance",
     "   drav prompt --out AGENTS.md",
-    "4. Plan a grounded draft from evidence",
+    "5. Plan a grounded draft from evidence",
     "   drav brief \"New topic\" --evidence notes.md --out brief.md",
-    "5. Revise, then review",
+    "6. Revise, then review",
     "   drav revise-plan draft.md",
     "   drav review draft.md",
     "",
     "Commands:",
     "  init           Learn a profile and save project defaults in one first-run command.",
+    "  doctor         Check whether a writing corpus is ready to learn from.",
     "  learn          Build a local voice profile from Markdown, MDX, or text examples.",
     "  inspect        Show the learned profile in plain language.",
     "  prompt         Render reusable LLM drafting guidance.",
@@ -719,7 +785,7 @@ function helpText() {
     "  drav review draft.md --mode strict --format json",
     "  drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
     "",
-    "Run `drav help init` for command-specific help.",
+    "Run `drav help doctor` for command-specific help.",
     "",
   ].join("\n");
 }
@@ -736,6 +802,7 @@ function helpForTopic(topic) {
 const EXAMPLES = {
   help: "drav help init",
   init: "drav init",
+  doctor: "drav doctor --examples ./articles",
   learn: "drav learn --examples ./articles --out ./dravoice-voice",
   inspect: "drav inspect",
   prompt: "drav prompt --out AGENTS.md",
@@ -744,6 +811,7 @@ const EXAMPLES = {
   review: "drav review draft.md --format json",
   benchmark: "drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
   "benchmark prepare": "drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
+  "benchmark prepare-many": "drav benchmark prepare-many --examples ./articles --topic \"New topic\" --out ./bench-runs --runs 3 --seed 42",
   "benchmark score": "drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
 };
@@ -785,6 +853,20 @@ const HELP_TOPICS = {
     "Next: drav inspect",
     "",
   ].join("\n"),
+  doctor: [
+    "Usage: drav doctor [--examples ./articles] [--format text]",
+    "",
+    "What it does:",
+    "Checks whether a Markdown, MDX, or plain-text writing corpus is ready to learn from before you trust a profile.",
+    "",
+    "Options:",
+    "  --examples <dir>    Directory with representative long-form pieces. Defaults to .dravoice.yml, then ./articles.",
+    "  --format <format>   text or json. Defaults to text.",
+    "",
+    `Example: ${EXAMPLES.doctor}`,
+    "Next: drav init",
+    "",
+  ].join("\n"),
   inspect: [
     "Usage: drav inspect [--voice ./dravoice-voice]",
     "",
@@ -871,13 +953,14 @@ const HELP_TOPICS = {
     "",
   ].join("\n"),
   benchmark: [
-    "Usage: drav benchmark <prepare|score> ...",
+    "Usage: drav benchmark <prepare|prepare-many|score> ...",
     "",
     "What it does:",
     "Runs validation workflows for Dravoice development. Most writers do not need this for first use.",
     "",
     "Examples:",
     `  ${EXAMPLES["benchmark prepare"]}`,
+    `  ${EXAMPLES["benchmark prepare-many"]}`,
     `  ${EXAMPLES["benchmark score"]}`,
     "",
   ].join("\n"),
@@ -891,6 +974,16 @@ const HELP_TOPICS = {
     "Next: fill benchmark drafts, then run drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
     "",
   ].join("\n"),
+  "benchmark prepare-many": [
+    "Usage: drav benchmark prepare-many --examples ./articles --topic \"New topic\" --out ./bench-runs --runs 3 --seed 42",
+    "",
+    "What it does:",
+    "Creates multiple seeded benchmark run directories for repeated validation.",
+    "",
+    `Example: ${EXAMPLES["benchmark prepare-many"]}`,
+    "Next: fill each run's drafts, then score each run.",
+    "",
+  ].join("\n"),
   "benchmark score": [
     "Usage: drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
     "",

package/src/v2/analyzers/discourse.js CHANGED Viewed

@@ -35,14 +35,20 @@ export function analyzeDiscourse(documents) {
 }
 export function transitionLabel(text) {
+  const normalized = String(text ?? "");
   for (const [label, pattern] of Object.entries(TRANSITIONS)) {
-    if (pattern.test(text)) {
+    if (pattern.test(normalized) || embeddedTransitionPattern(pattern).test(normalized)) {
       return label;
     }
   }
   return "plain";
 }
+function embeddedTransitionPattern(pattern) {
+  const source = pattern.source.replace(/^\^\(\?:/, "(?:");
+  return new RegExp(`[.;:,]\\s+${source}`, pattern.flags);
+}
 function callbackRate(sentences) {
   let callbacks = 0;
   for (let index = 1; index < sentences.length; index += 1) {

package/src/v2/analyzers/evidence.js CHANGED Viewed

@@ -1,12 +1,12 @@
 import { rate, topItems } from "../text-utils.js";
 const EVIDENCE_PATTERNS = {
-  date: /\b\d{1,2}:\d{2}\s?(?:am|pm)?\b|\b20\d{2}-\d{2}-\d{2}\b|\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/i,
+  date: /\b\d{1,2}:\d{2}\s?(?:am|pm)?\b|\b20\d{2}-\d{2}-\d{2}\b|\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b|\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+\d{1,2},?\s+\d{4}\b/i,
   number: /\b\d+(?:\.\d+)?\b/,
   quote: /"[^"]+"|'[^']+'|^>/,
   url: /https?:\/\/\S+/i,
-  citation: /\[[^\]]+\]\([^)]+\)|\([A-Z][A-Za-z]+,\s*\d{4}\)/,
-  sourceAttribution: /\b(according to|reported|observed|noted|recorded|quoted|interviewed|surveyed|field notes said|data shows|study found|the memo|the log|the report)\b/i,
+  citation: /\[[^\]]+\]\([^)]+\)|\[\^[^\]]+\]|\([A-Z][A-Za-z]+,\s*\d{4}\)/,
+  sourceAttribution: /\b(according to|reported|observed|noted|recorded|quoted|interviewed|surveyed|field notes said|data shows|study found|the memo|the log|the report|source:|internal memo)\b/i,
   sensory: /\b(cold|warm|hot|cool|quiet|loud|bright|dark|red|blue|green|rough|smooth|sharp|soft|smelled|smell|scent|tasted|heard|sound|noise|flashed|visible|physical|rain|metal|smoke)\b/i,
   specificExample: /\b(for example|for instance|such as|including|included|includes|sample|case in point|specifically|in one case)\b/i,
 };

package/src/v2/analyzers/register.js CHANGED Viewed

@@ -11,13 +11,23 @@ const REGISTER_MARKERS = {
   formal: ["requires", "outcomes", "process", "alignment", "therefore"],
 };
-export function analyzeRegister(documents) {
+export function analyzeRegister(documents, { markers = REGISTER_MARKERS } = {}) {
   const text = documents.map((document) => document.text.toLowerCase()).join("\n\n");
   const words = new Set(contentWords(text));
-  const scores = Object.entries(REGISTER_MARKERS).map(([value, markers]) => ({
+  const markerSets = Object.entries(markers).map(([value, markerList]) => {
+    const matchedMarkers = markerList.filter((marker) => markerAppears(text, words, marker));
+    return {
+      value,
+      markers: markerList,
+      matchedMarkers,
+      score: rate(matchedMarkers.length, markerList.length, 2),
+    };
+  });
+  const scores = markerSets.map(({ value, score }) => ({
     value,
-    score: rate(markers.filter((marker) => markerAppears(text, words, marker)).length, markers.length, 2),
+    score,
   })).sort((left, right) => right.score - left.score || left.value.localeCompare(right.value));
+  const mixedRegister = isMixedRegister(scores);
   return {
     family: "register",
@@ -25,14 +35,28 @@ export function analyzeRegister(documents) {
     features: {
       primary: scores[0] ?? { value: "unknown", score: 0 },
       scores,
+      markerSets,
+      mixedRegister,
       topContentWords: topItems(contentWords(text), 12),
     },
     examples: scores.slice(0, 3).map((item) => `${item.value}: ${item.score}`),
-    warnings: documents.length < 3 ? ["Register confidence is limited because the corpus has fewer than 3 documents."] : [],
+    warnings: [
+      ...(documents.length < 3 ? ["Register confidence is limited because the corpus has fewer than 3 documents."] : []),
+      ...(mixedRegister ? ["Mixed register signals detected; treat the primary register as a weak summary of the genre mix."] : []),
+    ],
     revisionHandles: ["Check whether the draft uses the same broad register and genre mix as the corpus."],
   };
 }
+function isMixedRegister(scores) {
+  const active = scores.filter((item) => item.score > 0);
+  if (active.length < 2) {
+    return false;
+  }
+  const [first, second] = active;
+  return second.score >= Math.max(0.2, first.score * 0.6);
+}
 function markerAppears(text, words, marker) {
   const normalized = marker.toLowerCase();
   if (/^[a-z0-9'-]+$/.test(normalized)) {

package/src/v2/analyzers/rhetorical-shape.js CHANGED Viewed

@@ -1,11 +1,16 @@
 import { evidenceTypes } from "./evidence.js";
 import { transitionLabel } from "./discourse.js";
-import { topItems } from "../text-utils.js";
+import { splitSentences, topItems } from "../text-utils.js";
 export function analyzeRhetoricalShape(documents) {
   const documentMoves = documents.map((document) => document.sentences.map((sentence) => moveFor(sentence.text)));
   const sentenceMoves = documentMoves.flat();
   const openingMoves = documents.flatMap((document) => document.sentences.slice(0, 3).map((sentence) => moveFor(sentence.text)));
+  const paragraphMovePatterns = documents.flatMap((document) =>
+    document.paragraphs
+      .map((paragraph) => splitSentences(paragraph.text).map((sentence) => moveFor(sentence)).join(" -> "))
+      .filter(Boolean)
+  );
   const bigrams = [];
   const trigrams = [];
   const openingMovePatterns = [];
@@ -28,6 +33,7 @@ export function analyzeRhetoricalShape(documents) {
       moveRates: topItems(sentenceMoves, 12),
       openingMoves: openingMoves.slice(0, 9),
       openingMovePatterns: topItems(openingMovePatterns, 8),
+      paragraphMovePatterns: topItems(paragraphMovePatterns, 12),
       moveBigrams: topItems(bigrams, 12),
       moveTrigrams: topItems(trigrams, 12),
       commonSequences: topItems(bigrams, 12),

package/src/v2/analyzers/structure.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { distribution, rate } from "../text-utils.js";
+import { distribution, rate, topItems } from "../text-utils.js";
 import { moveFor } from "./rhetorical-shape.js";
 export function analyzeStructure(documents) {
@@ -6,6 +6,19 @@ export function analyzeStructure(documents) {
     document.sections.map((section) => section.blocks.reduce((sum, block) => sum + block.lines.join(" ").split(/\s+/).filter(Boolean).length, 0))
   );
   const openingMoves = documents.flatMap((document) => document.sentences.slice(0, 2).map((sentence) => moveFor(sentence.text)));
+  const sectionOrderPatterns = documents
+    .map((document) => document.headings.map((heading) => `h${heading.depth}`).join(" -> "))
+    .filter(Boolean);
+  const listPlacementPatterns = documents.flatMap((document) =>
+    document.blocks
+      .filter((block) => block.type === "list")
+      .map((block) => `h${block.headingDepth || 0}:list`)
+  );
+  const quotePlacementPatterns = documents.flatMap((document) =>
+    document.blocks
+      .filter((block) => block.type === "quote")
+      .map((block) => `h${block.headingDepth || 0}:quote`)
+  );
   return {
     family: "structure",
@@ -13,6 +26,10 @@ export function analyzeStructure(documents) {
     features: {
       sectionWords: distribution(sectionLengths),
       headingCount: distribution(documents.map((document) => document.headings.length)),
+      maxHeadingDepth: distribution(documents.map((document) => Math.max(0, ...document.headings.map((heading) => heading.depth)))),
+      sectionOrderPatterns: topItems(sectionOrderPatterns, 12),
+      listPlacementPatterns: topItems(listPlacementPatterns, 12),
+      quotePlacementPatterns: topItems(quotePlacementPatterns, 12),
       openingMoves,
       listDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "list")).length, documents.length, 2),
       quoteDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "quote")).length, documents.length, 2),

package/src/v2/benchmark.js CHANGED Viewed

@@ -81,6 +81,40 @@ export function prepareVoiceBenchmark({ examplesDir, topic, outDir, seed = 1, cw
   return benchmark;
 }
+export function prepareVoiceBenchmarkRuns({ examplesDir, topic, outDir, seed = 1, runs = 3, cwd = process.cwd() }) {
+  const runCount = normalizeRunCount(runs);
+  const normalizedSeed = normalizeSeed(seed);
+  const root = path.resolve(resolvePath(cwd, outDir));
+  const preparedRuns = [];
+  for (let index = 0; index < runCount; index += 1) {
+    const runSeed = (normalizedSeed + index) >>> 0;
+    const runName = `run-${String(index + 1).padStart(3, "0")}`;
+    const benchmark = prepareVoiceBenchmark({
+      examplesDir,
+      topic,
+      outDir: path.join(root, runName),
+      seed: runSeed,
+      cwd,
+    });
+    preparedRuns.push({
+      index: index + 1,
+      name: runName,
+      seed: runSeed,
+      path: runName,
+      corpusFileCount: benchmark.corpus.fileCount,
+    });
+  }
+  const manifest = {
+    schemaVersion: BENCHMARK_SCHEMA_VERSION,
+    generatedBy: `${GENERATED_BY}-runs`,
+    topic,
+    runs: preparedRuns,
+    minimumRunsRecommended: Math.max(3, runCount),
+  };
+  writeUtf8FileSafely(path.join(root, "benchmark-runs.json"), `${JSON.stringify(manifest, null, 2)}\n`);
+  return manifest;
+}
 export function scoreVoiceBenchmark({ runDir, judgePath, judgeFile, judge, cwd = process.cwd() }) {
   const root = path.resolve(resolvePath(cwd, runDir));
   const resolvedJudgePath = judgePath ?? judgeFile ?? judge;
@@ -145,6 +179,15 @@ export function renderBenchmarkReport(scores) {
     lines.push(`Deterministic provisional leader: ${scores.deterministicWinner.draft} (${scores.deterministicWinner.label})`);
   }
   lines.push("Single benchmark run is directional, not proof; compare repeated runs and family diagnostics before deciding.");
+  if (scores.comparison) {
+    lines.push("");
+    lines.push(`Deterministic comparison: ${scores.comparison.deterministicLeader} leads by ${scores.comparison.deterministicMargin} point(s).`);
+    lines.push(`Repeated runs recommended: ${scores.comparison.repeatedRunsRecommended ? "yes" : "no"}.`);
+  }
+  if (scores.repeatSummary) {
+    lines.push(`Minimum repeat runs recommended: ${scores.repeatSummary.minimumRunsRecommended}.`);
+    lines.push(`Suggested next seeds: ${scores.repeatSummary.nextSeeds.join(", ")}.`);
+  }
   for (const key of ["baseline", "voiceAssisted"]) {
     const draft = scores.drafts[key];
@@ -217,12 +260,52 @@ function benchmarkScores({ benchmark, baselineReview, voiceReview, judge, judgeP
       baseline,
       voiceAssisted,
     },
+    comparison: benchmarkComparison({ baseline, voiceAssisted }),
+    repeatSummary: repeatSummaryFor(benchmark.seed),
     deterministicWinner,
     winner,
     exitCode: 0,
   };
 }
+function repeatSummaryFor(seed) {
+  const normalized = normalizeSeed(seed);
+  return {
+    minimumRunsRecommended: 3,
+    nextSeeds: [1, 2, 3].map((offset) => (normalized + offset) >>> 0),
+    reason: "Compare multiple topics, blind mappings, and draft pairs before treating benchmark results as product evidence.",
+  };
+}
+function normalizeRunCount(runs) {
+  const value = String(runs);
+  if (!/^\d+$/.test(value)) {
+    throw new Error(`Invalid runs value: ${runs}`);
+  }
+  const parsed = Number(value);
+  if (!Number.isSafeInteger(parsed) || parsed < 1 || parsed > 50) {
+    throw new Error("Benchmark runs must be an integer between 1 and 50.");
+  }
+  return parsed;
+}
+function benchmarkComparison({ baseline, voiceAssisted }) {
+  const margin = roundHalfUp(Math.abs(
+    voiceAssisted.deterministic.voiceFit - baseline.deterministic.voiceFit,
+  ), 2);
+  const deterministicLeader = voiceAssisted.deterministic.voiceFit === baseline.deterministic.voiceFit
+    ? "tie"
+    : voiceAssisted.deterministic.voiceFit > baseline.deterministic.voiceFit
+      ? "voice-assisted"
+      : "baseline";
+  return {
+    deterministicLeader,
+    deterministicMargin: margin,
+    repeatedRunsRecommended: true,
+    caution: "Single benchmark runs are directional; repeat with more topics and draft pairs before making product claims.",
+  };
+}
 function draftScore({ key, name, label, review, judge }) {
   const deterministic = deterministicScore(review);
   const judgeDraft = judge ? normalizeJudgeDraft(judge.drafts?.[label], label) : null;