npm - dravoice - Versions diffs - 0.1.4 → 0.2.0 - Mend

dravoice 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +7 -0
package/package.json +1 -1
package/src/index.js +7 -0
package/src/v2/analyzers/structure.js +92 -1
package/src/v2/brief.js +41 -7
package/src/v2/document-model.js +1 -0
package/src/v2/profile.js +112 -8
package/src/v2/prompt.js +10 -3
package/src/v2/text-utils.js +5 -2

package/README.md CHANGED Viewed

@@ -15,6 +15,13 @@ Start with your own writing, not a blank prompt. Create an `articles`
 directory in the folder where you want to run Dravoice, then copy in at least
 3 representative long-form Markdown, MDX, or plain-text pieces.
+## Why Not Just Paste Examples Into AI?
+That can be fine for casual one-off drafts. Dravoice is for repeatable
+long-form workflows where you want inspection, evidence grounding, and revision diagnostics
+from local, inspectable guidance instead of asking a model to infer your style
+from scratch every time.
 ```bash
 mkdir -p articles
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "dravoice",
-  "version": "0.1.4",
+  "version": "0.2.0",
   "description": "Compile article voice profiles into reusable LLM writing context, evidence-first briefs, and deterministic draft review notes.",
   "type": "module",
   "bin": {

package/src/index.js CHANGED Viewed

@@ -756,6 +756,9 @@ function helpText() {
   return [
     "Dravoice - local-first voice guidance for writers",
     "",
+    "Why not just paste examples into AI?",
+    "Dravoice complements AI by turning your own corpus into inspectable, repeatable, and reviewable guidance.",
+    "",
     "First run:",
     "1. Check your writing folder",
     "   drav doctor",
@@ -872,6 +875,7 @@ const HELP_TOPICS = {
     "",
     "What it does:",
     "Shows corpus confidence, feature families, revision handles, and drafting guidance in plain language.",
+    "Check what Dravoice learned before trusting it.",
     "",
     "Options:",
     "  --voice <dir>   Voice profile directory. Defaults to .dravoice.yml,",
@@ -886,6 +890,7 @@ const HELP_TOPICS = {
     "",
     "What it does:",
     "Turns high-confidence profile observations into reusable drafting guidance for an LLM or writing agent.",
+    "Use this to give AI stable guidance without re-pasting source writing.",
     "",
     "Options:",
     "  --voice <dir>       Voice profile directory. Defaults to .dravoice.yml,",
@@ -923,6 +928,7 @@ const HELP_TOPICS = {
     "",
     "What it does:",
     "Ranks calibrated, human-editable revision actions. It does not rewrite the draft or claim AI detection.",
+    "Use deterministic diagnostics after drafting; this is the part a plain prompt cannot reliably provide.",
     "",
     "Options:",
     "  --voice <dir>       Voice profile directory. Defaults to .dravoice.yml,",
@@ -938,6 +944,7 @@ const HELP_TOPICS = {
     "",
     "What it does:",
     "Compares a draft with the profile and reports family-level drift. It is revision guidance, not AI detection.",
+    "Use deterministic diagnostics after drafting; this is the part a plain prompt cannot reliably provide.",
     "",
     "Options:",
     "  --voice <dir>       Voice profile directory. Defaults to .dravoice.yml,",

package/src/v2/analyzers/structure.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { distribution, rate, topItems } from "../text-utils.js";
+import { distribution, rate, splitSentences, topItems } from "../text-utils.js";
 import { moveFor } from "./rhetorical-shape.js";
 export function analyzeStructure(documents) {
@@ -33,9 +33,100 @@ export function analyzeStructure(documents) {
       openingMoves,
       listDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "list")).length, documents.length, 2),
       quoteDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "quote")).length, documents.length, 2),
+      templateTics: templateTics(documents),
+      formattingPalette: formattingPalette(documents),
     },
     examples: openingMoves.slice(0, 5),
     warnings: documents.length < 3 ? ["Structure confidence is limited because the corpus has fewer than 3 documents."] : [],
     revisionHandles: ["Compare headings, list/quote use, section size, and opening structure."],
   };
 }
+// Detects recurring document-template "tics" — the formatting habits that make a
+// writer's pieces look copy-pasted regardless of topic. These are the visible
+// signature: single-sentence paragraphs, Title-Case headings, a pull-quote in the
+// lede, and a bullet list in nearly every section. Reported as corpus-wide rates so
+// guidance can tell the model to vary them deliberately rather than reproduce the same
+// skeleton every time.
+function templateTics(documents) {
+  const proseParagraphs = documents.flatMap((document) =>
+    document.blocks.filter((block) => block.type === "paragraph"));
+  const singleSentenceParagraphs = proseParagraphs.filter((block) =>
+    splitSentences(block.lines.join(" ")).length <= 1).length;
+  const allHeadings = documents.flatMap((document) => document.headings);
+  const titleCaseHeadings = allHeadings.filter((heading) => isTitleCase(heading.text)).length;
+  // A blockquote in the lede = a quote block before the first heading.
+  const ledeBlockquoteDocuments = documents.filter((document) =>
+    document.blocks.some((block) => block.type === "quote" && block.headingId == null)).length;
+  // Sections (heading + its blocks) that contain at least one list.
+  const headedSections = documents.flatMap((document) =>
+    document.sections.filter((section) => section.heading));
+  const sectionsWithList = headedSections.filter((section) =>
+    section.blocks.some((block) => block.type === "list")).length;
+  return {
+    singleSentenceParagraphRate: rate(singleSentenceParagraphs, proseParagraphs.length, 2),
+    titleCaseHeadingRate: rate(titleCaseHeadings, allHeadings.length, 2),
+    ledeBlockquoteRate: rate(ledeBlockquoteDocuments, documents.length, 2),
+    sectionListRate: rate(sectionsWithList, headedSections.length, 2),
+  };
+}
+// The set of Markdown formatting devices a corpus actually reaches for. Reported as
+// per-document usage rates so guidance can name what the corpus leans on and which
+// devices it underuses — Markdown offers far more range than blockquote + bullet list.
+const FORMATTING_DEVICES = {
+  blockquote: /^>\s+/m,
+  bulletList: /^[ \t]*[-*+]\s+/m,
+  orderedList: /^[ \t]*\d+[.)]\s+/m,
+  nestedList: /^[ \t]+[-*+\d]/m,
+  subHeading: /^#{3,6}\s+/m,
+  table: /^\|.*\|\s*$/m,
+  codeBlock: /^(```|~~~)/m,
+  inlineCode: /(^|[^`])`[^`\n]+`/m,
+  boldInline: /\*\*[^*\n]+\*\*|__[^_\n]+__/m,
+  italicInline: /(^|[^*_])[*_][^*_\n]+[*_]/m,
+  link: /\[[^\]]+\]\([^)]+\)/m,
+  horizontalRule: /^(\s*([-*_])\s*){3,}$/m,
+};
+function formattingPalette(documents) {
+  const palette = {};
+  for (const [device, pattern] of Object.entries(FORMATTING_DEVICES)) {
+    const used = documents.filter((document) => {
+      const raw = stripFrontmatter(document.raw || "");
+      // codeBlock is the fence itself; every other device is checked outside fences.
+      return pattern.test(device === "codeBlock" ? raw : stripCodeFences(raw));
+    }).length;
+    palette[device] = rate(used, documents.length, 2);
+  }
+  return palette;
+}
+// Ignore device markers that appear only inside fenced code so a corpus of code-heavy
+// posts is not credited with using tables/lists it merely quoted.
+function stripCodeFences(raw) {
+  return raw.replace(/```[\s\S]*?```/g, "\n").replace(/~~~[\s\S]*?~~~/g, "\n");
+}
+// Drop leading YAML front matter so its `---` fence and indented `  - tag` lines are not
+// counted as section dividers or nested lists.
+function stripFrontmatter(raw) {
+  return raw.replace(/^?\s*---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
+}
+function isTitleCase(text) {
+  const words = String(text ?? "").split(/\s+/).filter((word) => /[a-z]/i.test(word));
+  if (words.length < 2) {
+    return false;
+  }
+  const significant = words.filter((word) => word.length > 3);
+  if (significant.length < 2) {
+    return false;
+  }
+  const capitalized = significant.filter((word) => /^[A-Z]/.test(word)).length;
+  return capitalized / significant.length >= 0.8;
+}

package/src/v2/brief.js CHANGED Viewed

@@ -27,7 +27,7 @@ export function voiceArticleBriefV2({ voice, topic, evidence, cwd = process.cwd(
     workingThesis: `Draft a grounded article about ${topic}. Let the supplied evidence set the size of each claim before broadening the lesson.`,
     evidence: evidenceResult,
     missingEvidence: missingEvidenceFor({ topic, evidenceAnchors: evidenceResult.anchors }),
-    outline: outlineFor(profile),
+    outline: outlineFor(profile, topic),
     voiceCautions: [
       ...profile.guidance.avoid,
       "Mark unsupported claims as [specific evidence needed] instead of inventing proof.",
@@ -116,19 +116,53 @@ function missingEvidenceFor({ topic, evidenceAnchors }) {
   return items;
 }
-function outlineFor(profile) {
-  const opening = profile.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
-  const sentenceMedian = profile.families.rhythm.features.sentenceWords.median;
+function outlineFor(profile, topic = "") {
+  const rhythm = profile.families.rhythm.features.sentenceWords;
+  const seed = topicSeed(topic);
+  const openingPatterns = (profile.families.rhetoricalShape.features.openingMovePatterns || [])
+    .map((item) => item.value)
+    .filter(Boolean);
+  const opening = pickSeeded(openingPatterns, seed);
+  const sectionShapes = (profile.families.structure.features.sectionOrderPatterns || [])
+    .map((item) => item.value)
+    .filter(Boolean);
+  const sectionShape = pickSeeded(sectionShapes, seed + 1);
+  const headingCount = profile.families.structure.features.headingCount || {};
+  const rhythmRange = rhythm.count > 0
+    ? `${rhythm.p25}-${rhythm.p75} words (median ${rhythm.median}, variation ~${rhythm.stdev})`
+    : "a varied range of sentence lengths";
   return [
     opening
-      ? `Start from a concrete artifact or observation, keeping the opening shape compatible with: ${opening}.`
-      : "Start from a concrete artifact or observation before making the larger claim.",
+      ? `Start from a concrete artifact or observation; for this piece try the opening shape "${opening}", but do not reuse one opening across articles.`
+      : "Start from a concrete artifact or observation before making the larger claim, varying the opening across pieces.",
     "Name the pressure, question, or practical stakes that make the evidence matter.",
-    `Develop the article in the learned register with sentence pacing near the ${sentenceMedian}-word median where it fits.`,
+    sectionShape
+      ? `Shape the body using a section pattern from the corpus such as "${sectionShape}" (heading count usually ${headingCount.min ?? 0}-${headingCount.max ?? 0}); pick what fits this topic rather than a fixed skeleton.`
+      : "Shape the body to fit this topic; vary sectioning across pieces instead of reusing one skeleton.",
+    `Develop the article in the learned register, mixing short and long sentences across ${rhythmRange} rather than holding a constant cadence.`,
     "Close by returning to the evidence and leaving the reader with a practical handle, not a generic conclusion.",
   ];
 }
+function topicSeed(topic) {
+  let hash = 0;
+  for (const char of String(topic ?? "")) {
+    hash = (hash * 31 + char.charCodeAt(0)) >>> 0;
+  }
+  return hash;
+}
+function pickSeeded(values, seed) {
+  if (!values.length) {
+    return "";
+  }
+  return values[seed % values.length];
+}
 function resolvePath(cwd, value) {
   return path.isAbsolute(value) ? value : path.join(cwd, value);
 }

package/src/v2/document-model.js CHANGED Viewed

@@ -191,6 +191,7 @@ export function parseDocument({ filePath, rootDir = process.cwd(), contents }) {
     sentences,
     wordCount: sentences.reduce((sum, sentence) => sum + sentence.tokens.length, 0),
     text: paragraphs.map((paragraph) => paragraph.text).join("\n\n"),
+    raw: String(contents ?? ""),
   };
 }

package/src/v2/profile.js CHANGED Viewed

@@ -302,35 +302,139 @@ function confidenceFor(documentCount, sentenceCount, wordCount) {
 function guidanceFor({ source, families }) {
   const primaryRegister = families.register.features.primary.value;
   const evidenceRate = families.evidence.features.evidenceSentenceRate;
-  const sentenceMedian = families.rhythm.features.sentenceWords.median;
-  const opening = families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
+  const sentenceWords = families.rhythm.features.sentenceWords;
+  const registerMix = registerMixLabel(families.register.features.scores);
+  const openingPatterns = (families.rhetoricalShape.features.openingMovePatterns || [])
+    .slice(0, 3)
+    .map((item) => item.value)
+    .filter(Boolean);
   const draftingRules = [
-    `Keep sentence pacing near the learned median of ${sentenceMedian} words when it fits the draft.`,
-    `Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
+    sentenceWords.count > 0
+      ? `Vary sentence length across the corpus band of ${sentenceWords.p25}-${sentenceWords.p75} words (median ${sentenceWords.median}). Mix short and long sentences; do not hold a constant length.`
+      : "Vary sentence length; mix short and long sentences rather than holding a constant cadence.",
+    sentenceWords.stdev > 0
+      ? `Keep sentence-length variation (burstiness) near the corpus standard deviation of about ${sentenceWords.stdev} words; flattening every sentence to the median reads as machine-made.`
+      : "Preserve natural variation in sentence length rather than flattening it to one value.",
+    registerMix
+      ? `Default to the learned register mix (${registerMix}) instead of one fixed genre; let each piece lean differently within it.`
+      : `Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
     evidenceRate > 0.35
       ? "Anchor broad claims with concrete scenes, numbers, quotes, citations, or specific examples."
       : "Do not force evidence density higher than the source corpus supports.",
-    opening
-      ? `Prefer opening moves compatible with: ${opening}.`
-      : "Start from the article's real subject rather than generic positioning.",
+    openingPatterns.length
+      ? `Rotate among the corpus's observed opening shapes (e.g. ${openingPatterns.join("; ")}) rather than reusing one opening every time.`
+      : "Start from the article's real subject rather than generic positioning, and vary openings across pieces.",
   ];
   return {
     summary: [
       `Local Dravoice V2 profile from ${source.documentCount} document(s), ${source.wordCount} words, confidence ${source.confidence.band}.`,
-      `Primary register signal: ${primaryRegister}.`,
+      `Primary register signal: ${primaryRegister}${registerMix ? ` (mix: ${registerMix})` : ""}.`,
     ],
     draftingRules,
+    formatting: formattingGuidance(families.structure.features),
     avoid: [
       "Do not treat topic vocabulary as proof of voice fit.",
       "Do not claim a draft is or is not the writer's true voice.",
       "Do not invent concrete evidence to satisfy a style finding.",
+      "Do not reuse the same outline, opening, or formatting on every piece; stay inside the learned ranges and vary within them.",
     ],
     examples: [],
   };
 }
+function registerMixLabel(scores) {
+  const active = (scores || []).filter((item) => item.score > 0).slice(0, 3);
+  if (active.length < 2) {
+    return "";
+  }
+  return active.map((item) => `${item.value} ${item.score}`).join(", ");
+}
+function formattingGuidance(structure) {
+  const rules = [];
+  const headingCount = structure.headingCount || {};
+  if (headingCount.count > 0 && (headingCount.min !== headingCount.max)) {
+    rules.push(`Heading count varies across the corpus (${headingCount.min}-${headingCount.max}); pick a level of sectioning that fits the piece rather than a fixed template.`);
+  }
+  const patterns = (structure.sectionOrderPatterns || []).slice(0, 4).map((item) => item.value).filter(Boolean);
+  if (patterns.length) {
+    rules.push(`Observed section shapes to choose among (do not reuse one skeleton): ${patterns.join(" | ")}.`);
+  }
+  if (typeof structure.listDocumentRate === "number") {
+    rules.push(`Lists appear in about ${Math.round(structure.listDocumentRate * 100)}% of pieces and quotes in about ${Math.round((structure.quoteDocumentRate ?? 0) * 100)}%; use them where they fit, not on every draft.`);
+  }
+  return [
+    "Formatting and document structure are NOT the writer's voice and must not be reproduced as if they were. Voice lives in the Drafting Rules above (rhythm, diction, register, evidence). The items below are formatting habits to deliberately vary: do not justify keeping a repeated tic as \"authentic voice.\"",
+    ...rules,
+    ...antiTemplateGuidance(structure.templateTics),
+    ...paletteGuidance(structure.formattingPalette),
+  ];
+}
+const PALETTE_LABELS = {
+  blockquote: "blockquotes",
+  bulletList: "bullet lists",
+  orderedList: "numbered lists",
+  nestedList: "nested lists",
+  subHeading: "sub-headings (h3+)",
+  table: "tables",
+  codeBlock: "code blocks",
+  inlineCode: "inline code",
+  boldInline: "bold emphasis",
+  italicInline: "italic emphasis",
+  link: "links",
+  horizontalRule: "section dividers",
+};
+// Markdown offers far more range than the few devices a homogeneous corpus reaches for.
+// Name what the corpus leans on and which devices it underuses, and push the model to
+// broaden the palette where the content genuinely calls for it.
+function paletteGuidance(palette) {
+  if (!palette) {
+    return [];
+  }
+  const leanedOn = Object.keys(palette).filter((device) => palette[device] >= 0.6);
+  const underused = Object.keys(palette).filter((device) => palette[device] <= 0.2);
+  const rules = [];
+  if (leanedOn.length) {
+    rules.push(`The corpus's formatting palette is narrow: it leans on ${labelList(leanedOn)}. Treat that as a habit to widen, not a target to hit.`);
+  }
+  if (underused.length) {
+    rules.push(`Markdown devices the corpus rarely or never uses: ${labelList(underused)}. Reach for these where the content fits (a comparison wants a table, steps want numbered items, a definition wants inline code, an aside wants italics) so pieces do not all share one formatting shape.`);
+  }
+  return rules;
+}
+function labelList(devices) {
+  return devices.map((device) => PALETTE_LABELS[device] ?? device).join(", ");
+}
+// When the corpus over-relies on a formatting template, faithfully reproducing it is
+// what makes generated pieces look copy-pasted. This guidance is prescriptive (injected
+// to break the habit), not a learned pattern to match — surfaced only when a tic
+// dominates the corpus, so varied corpora are left alone.
+function antiTemplateGuidance(tics) {
+  if (!tics) {
+    return [];
+  }
+  const rules = [];
+  if (tics.singleSentenceParagraphRate >= 0.35) {
+    rules.push(`Watch a strong template tic: about ${Math.round(tics.singleSentenceParagraphRate * 100)}% of corpus paragraphs are a single sentence. This staccato one-line-paragraph cadence is the loudest "copy-paste" tell. Deliberately group related sentences into multi-sentence paragraphs; reserve one-line paragraphs for genuine emphasis.`);
+  }
+  if (tics.titleCaseHeadingRate >= 0.6) {
+    rules.push(`Watch a template tic: about ${Math.round(tics.titleCaseHeadingRate * 100)}% of headings are Title Case. Vary heading style (sentence case, questions, or no headings at all) instead of the same Title-Case section labels every time.`);
+  }
+  if (tics.ledeBlockquoteRate >= 0.5) {
+    rules.push(`Watch a template tic: about ${Math.round(tics.ledeBlockquoteRate * 100)}% of pieces drop a pull-quote (blockquote) into the intro before the first heading. Do not open with an aphoristic blockquote by default; let most pieces earn a quote later or skip it.`);
+  }
+  if (tics.sectionListRate >= 0.45) {
+    rules.push(`Watch a template tic: about ${Math.round(tics.sectionListRate * 100)}% of sections contain a bullet list. Do not put a list in section after section; carry most points in connected prose and reserve lists for genuinely enumerable material.`);
+  }
+  return rules;
+}
 function toleranceFor(band, deep, strong, weak) {
   if (band === "deep") {
     return deep;

package/src/v2/prompt.js CHANGED Viewed

@@ -23,9 +23,9 @@ function renderPrompt(profile, format) {
     system: "System writing guidance: Dravoice V2",
   }[format];
   const preface = {
-    agents: "Use this as local, inspectable drafting guidance from the writer's own corpus. It is not an AI detector or a license to imitate a third party.",
-    claude: "Use these project-local voice notes when drafting or reviewing prose for this repository. Treat them as guidance, not identity proof.",
-    system: "Follow these local voice constraints when writing prose. Do not expose private source text or claim authorship identity from them.",
+    agents: "Use this as local guidance that complements AI by preserving reusable, inspectable constraints from the writer's own corpus. It is not an AI detector or a license to imitate a third party.",
+    claude: "Use these project-local voice notes as guidance that complements AI by preserving reusable, inspectable constraints for drafting or reviewing prose. Treat them as guidance, not identity proof.",
+    system: "Follow these local voice constraints as reusable, inspectable guidance for AI-assisted prose. Do not expose private source text or claim authorship identity from them.",
   }[format];
   const lines = [
     header,
@@ -49,6 +49,13 @@ function renderPrompt(profile, format) {
     lines.push(`- ${rule}`);
   }
+  if (profile.guidance.formatting && profile.guidance.formatting.length) {
+    lines.push("", "## Formatting: Vary Deliberately (Not Voice)", "");
+    for (const rule of profile.guidance.formatting) {
+      lines.push(`- ${rule}`);
+    }
+  }
   lines.push("", "## Avoid", "");
   for (const item of profile.guidance.avoid) {
     lines.push(`- ${item}`);

package/src/v2/text-utils.js CHANGED Viewed

@@ -75,17 +75,20 @@ export function characterNgrams(text, size = 3) {
 export function distribution(values) {
   if (!values.length) {
-    return { count: 0, min: 0, max: 0, mean: 0, median: 0, p25: 0, p75: 0 };
+    return { count: 0, min: 0, max: 0, mean: 0, median: 0, p25: 0, p75: 0, stdev: 0 };
   }
   const sorted = [...values].sort((a, b) => a - b);
+  const mean = sorted.reduce((sum, value) => sum + value, 0) / sorted.length;
+  const variance = sorted.reduce((sum, value) => sum + (value - mean) ** 2, 0) / sorted.length;
   return {
     count: sorted.length,
     min: sorted[0],
     max: sorted[sorted.length - 1],
-    mean: round(sorted.reduce((sum, value) => sum + value, 0) / sorted.length, 2),
+    mean: round(mean, 2),
     median: percentile(sorted, 0.5),
     p25: percentile(sorted, 0.25),
     p75: percentile(sorted, 0.75),
+    stdev: round(Math.sqrt(variance), 2),
   };
 }