npm - dravoice - Versions diffs - 0.1.1 → 0.1.3 - Mend

dravoice 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/LICENSE +21 -21
package/README.md +102 -36
package/bin/dravoice.js +11 -10
package/package.json +47 -45
package/src/index.js +874 -197
package/src/v2/analyzers/discourse.js +63 -52
package/src/v2/analyzers/evidence.js +73 -38
package/src/v2/analyzers/lexical.js +114 -58
package/src/v2/analyzers/register.js +46 -34
package/src/v2/analyzers/rhetorical-shape.js +59 -48
package/src/v2/analyzers/rhythm.js +39 -47
package/src/v2/analyzers/structure.js +24 -24
package/src/v2/benchmark.js +574 -568
package/src/v2/brief.js +154 -146
package/src/v2/config.js +78 -0
package/src/v2/document-model.js +351 -260
package/src/v2/inspect.js +67 -67
package/src/v2/io-utils.js +51 -0
package/src/v2/profile.js +155 -129
package/src/v2/prompt.js +65 -64
package/src/v2/review.js +177 -219
package/src/v2/revise-plan.js +130 -33
package/src/v2/stylometry.js +123 -17
package/src/v2/text-utils.js +123 -123

package/src/v2/revise-plan.js CHANGED Viewed

@@ -1,18 +1,16 @@
-import fs from "node:fs";
 import path from "node:path";
 import { evidenceTypes, isAbstractClaim } from "./analyzers/evidence.js";
 import { transitionLabel } from "./analyzers/discourse.js";
 import { moveFor } from "./analyzers/rhetorical-shape.js";
 import { parseDocument } from "./document-model.js";
+import { readUtf8FileBounded } from "./io-utils.js";
 import { buildVoiceProfileV2, loadVoicePackV2 } from "./profile.js";
 import { clampScore, round } from "./text-utils.js";
 import {
   STYLOMETRIC_REFERENCES,
-  calibratedFamilyDrift,
-  defaultStyleThresholds,
-  distanceByFamily,
-  familyScoreFromDistance,
+  familyDiagnosticsFor,
   familyWeight,
+  styleDistanceFromDiagnostics,
 } from "./stylometry.js";
 const MAX_ACTIONS = 8;
@@ -29,18 +27,20 @@ const EDITABILITY = {
 export function revisePlanDraftV2({ file, voice, cwd = process.cwd(), maxActions = MAX_ACTIONS }) {
   const sourceProfile = typeof voice === "string" ? loadVoicePackV2(voice) : voice;
-  const filePath = path.resolve(file);
+  const filePath = resolvePath(cwd, file);
   const draftDocument = parseDocument({
     filePath,
     rootDir: cwd,
-    contents: fs.readFileSync(filePath, "utf8"),
+    contents: readUtf8FileBounded(filePath, { label: "Draft file", maxBytes: 2 * 1024 * 1024 }),
   });
   const draftProfile = buildVoiceProfileV2({ documents: [draftDocument] });
   const familyDiagnostics = familyDiagnosticsFor(sourceProfile, draftProfile);
+  const rollingWindows = rollingWindowsFor({ sourceProfile, draftDocument });
   const actions = rankedActions({
     sourceProfile,
     draftDocument,
     familyDiagnostics,
+    rollingWindows,
     maxActions,
   });
@@ -55,11 +55,12 @@ export function revisePlanDraftV2({ file, voice, cwd = process.cwd(), maxActions
     },
     summary: {
       corpusConfidence: sourceProfile.source.confidence,
-      distance: Math.round(Object.values(familyDiagnostics).reduce((sum, item) => sum + (100 - item.score), 0) / Object.keys(familyDiagnostics).length),
+      distance: styleDistanceFromDiagnostics(familyDiagnostics),
       familyScores: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.score])),
       familyDistances: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.distance])),
       familyDrift: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.drift])),
       thresholds: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.threshold])),
+      rollingWindows,
     },
     actions,
   };
@@ -102,35 +103,14 @@ export function renderRevisePlanV2(plan) {
   return lines.join("\n");
 }
-function familyDiagnosticsFor(sourceProfile, draftProfile) {
-  const thresholds = thresholdMap(sourceProfile);
-  return Object.fromEntries(Object.keys(sourceProfile.families).map((family) => {
-    const distance = distanceByFamily(
-      family,
-      sourceProfile.families[family].features,
-      draftProfile.families[family].features,
-    );
-    const threshold = thresholds[family]?.threshold ?? defaultStyleThresholds()[family]?.threshold ?? 0.4;
-    return [family, {
-      distance,
-      threshold,
-      drift: calibratedFamilyDrift(distance, threshold),
-      score: familyScoreFromDistance(distance, threshold),
-    }];
-  }));
-}
-function thresholdMap(sourceProfile) {
-  return sourceProfile.calibration?.styleThresholds?.families ?? defaultStyleThresholds();
-}
-function rankedActions({ sourceProfile, draftDocument, familyDiagnostics, maxActions }) {
+function rankedActions({ sourceProfile, draftDocument, familyDiagnostics, rollingWindows, maxActions }) {
   const confidence = confidenceWeight(sourceProfile.source.confidence.band);
   const actions = [
     ...evidenceActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
     ...rhythmActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
     ...shapeActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
     ...discourseActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
+    ...rollingWindowActions({ rollingWindows, confidence }),
     ...documentLevelActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
   ].filter((action) => action.actionScore > 0);
@@ -165,6 +145,7 @@ function evidenceActions({ sourceProfile, draftDocument, familyDiagnostics, conf
       unit: { type: "sentence", line: sentence.line },
       confidence,
       drift,
+      stability: familyDiagnostics[family]?.stability,
       localMismatch,
       why: "This sentence carries a broad claim pattern without the concrete support rate learned from the source corpus.",
       reviseBy: "Add concrete support: a scene, quote, number, date, citation, URL, sensory detail, or specific example the writer can verify.",
@@ -194,6 +175,7 @@ function rhythmActions({ sourceProfile, draftDocument, familyDiagnostics, confid
       unit: { type: "sentence", line: sentence.line },
       confidence,
       drift,
+      stability: familyDiagnostics[family]?.stability,
       localMismatch,
       why: `This sentence is ${direction} than the learned sentence-length band (${source.p25}-${source.p75} words).`,
       reviseBy: "Adjust sentence pacing toward the learned range by splitting, tightening, or pairing it with a deliberately shorter sentence.",
@@ -221,6 +203,7 @@ function shapeActions({ sourceProfile, draftDocument, familyDiagnostics, confide
     unit: { type: "opening", line: draftDocument.sentences[0].line },
     confidence,
     drift,
+    stability: familyDiagnostics[family]?.stability,
     localMismatch,
     why: `Draft opening moves (${draftOpening.join(" -> ")}) drift from the learned opening pattern (${sourceOpening.join(" -> ")}).`,
     reviseBy: "Rework the opening toward a compatible scene, claim, contrast, reflection, or evidence sequence without inventing new facts.",
@@ -252,6 +235,7 @@ function discourseActions({ sourceProfile, draftDocument, familyDiagnostics, con
       unit: { type: "sentence", line: sentence.line },
       confidence,
       drift,
+      stability: familyDiagnostics[family]?.stability,
       localMismatch: Math.min(1, localMismatch),
       why: `The draft overuses ${label} transitions compared with the source corpus.`,
       reviseBy: "Vary the sentence turn: replace a repeated transition with a callback, concrete example, or direct continuation where it fits.",
@@ -273,6 +257,7 @@ function documentLevelActions({ sourceProfile, draftDocument, familyDiagnostics,
       unit: { type: "document", line: draftDocument.sentences[0]?.line ?? 1 },
       confidence,
       drift,
+      stability: familyDiagnostics[family]?.stability,
       localMismatch: 0.7,
       why: documentLevelWhy(sourceProfile, family),
       reviseBy: documentLevelReviseBy(family),
@@ -301,13 +286,121 @@ function documentLevelReviseBy(family) {
   return "Reorder the first section so the piece starts with a structure the source corpus actually uses.";
 }
-function makeAction({ family, ordinal, priority, unit, confidence, drift, localMismatch, why, reviseBy }) {
+function rollingWindowsFor({ sourceProfile, draftDocument }) {
+  const sentences = draftDocument.sentences;
+  if (sentences.length < 5) {
+    return [];
+  }
+  const windowSize = sentences.length < 8 ? 3 : 4;
+  const windowStarts = rollingWindowStarts(sentences.length, windowSize, 2);
+  const result = [];
+  for (const start of windowStarts) {
+    const windowSentences = sentences.slice(start, start + windowSize);
+    const windowProfile = buildVoiceProfileV2({ documents: [documentForSentences(draftDocument, windowSentences, start)] });
+    const diagnostics = familyDiagnosticsFor(sourceProfile, windowProfile);
+    const ranked = ["evidence", "rhythm", "discourse", "rhetoricalShape", "lexical"]
+      .map((family) => ({ family, ...diagnostics[family] }))
+      .sort((left, right) => right.drift - left.drift || (100 - right.score) - (100 - left.score));
+    const best = ranked[0];
+    if (best?.drift > 0) {
+      result.push({
+        family: best.family,
+        startSentence: start + 1,
+        endSentence: start + windowSentences.length,
+        startLine: windowSentences[0].line,
+        endLine: windowSentences.at(-1).line,
+        distance: best.distance,
+        drift: best.drift,
+        score: best.score,
+        threshold: best.threshold,
+        stability: best.stability,
+      });
+    }
+  }
+  return result
+    .sort((left, right) => right.drift - left.drift || left.startLine - right.startLine)
+    .slice(0, 4);
+}
+function rollingWindowStarts(sentenceCount, windowSize, stride) {
+  const starts = [];
+  for (let start = 0; start <= sentenceCount - windowSize; start += stride) {
+    starts.push(start);
+  }
+  const finalStart = Math.max(0, sentenceCount - windowSize);
+  if (!starts.includes(finalStart)) {
+    starts.push(finalStart);
+  }
+  return starts;
+}
+function documentForSentences(draftDocument, sentences, windowIndex) {
+  const text = sentences.map((sentence) => sentence.text).join(" ");
+  const block = {
+    type: "paragraph",
+    line: sentences[0]?.line ?? 1,
+    heading: null,
+    headingId: null,
+    headingDepth: 0,
+    lines: [text],
+  };
+  return {
+    file: `${draftDocument.file ?? "draft"}#window-${windowIndex + 1}`,
+    path: draftDocument.path,
+    headings: [],
+    sections: [{ heading: null, blocks: [block] }],
+    blocks: [block],
+    paragraphs: [{
+      type: "paragraph",
+      line: block.line,
+      heading: null,
+      headingId: null,
+      text,
+    }],
+    sentences,
+    wordCount: sentences.reduce((sum, sentence) => sum + sentence.tokens.length, 0),
+    text,
+  };
+}
+function rollingWindowActions({ rollingWindows, confidence }) {
+  return rollingWindows.map((window, index) => makeAction({
+    family: window.family,
+    ordinal: `window-${index + 1}`,
+    priority: window.family === "evidence" ? "review" : "consider",
+    unit: { type: "window", line: window.startLine, endLine: window.endLine },
+    confidence,
+    drift: window.drift,
+    stability: window.stability,
+    localMismatch: Math.min(1, window.drift / Math.max(1, window.drift + 0.5)),
+    why: `Sentences ${window.startSentence}-${window.endSentence} show localized ${window.family} drift beyond the writer's calibrated range.`,
+    reviseBy: rollingWindowReviseBy(window.family),
+  }));
+}
+function rollingWindowReviseBy(family) {
+  if (family === "evidence") {
+    return "Add or move concrete support into this local passage, or narrow the unsupported claims in the same window.";
+  }
+  if (family === "rhythm") {
+    return "Revise this passage's sentence and paragraph pacing before changing the whole draft.";
+  }
+  if (family === "discourse") {
+    return "Vary the local sentence turns, callbacks, and transitions in this passage.";
+  }
+  if (family === "rhetoricalShape") {
+    return "Adjust this passage's move sequence so the local claim, turn, evidence, and implication pattern is less abrupt.";
+  }
+  return "Revise this local passage for style fit before making document-wide lexical changes.";
+}
+function makeAction({ family, ordinal, priority, unit, confidence, drift, stability = 0.7, localMismatch, why, reviseBy }) {
   return {
     id: `v2.revise-plan.${family}.${ordinal}`,
     family,
     priority,
     unit,
-    actionScore: clampScore(100 * confidence * familyWeight(family) * (EDITABILITY[family] ?? 0.6) * drift * localMismatch),
+    actionScore: clampScore(100 * confidence * familyWeight(family) * (EDITABILITY[family] ?? 0.6) * Math.max(0.35, stability) * drift * localMismatch),
     localMismatch: round(localMismatch, 3),
     why,
     reviseBy,
@@ -335,6 +428,10 @@ function displayPath(filePath, cwd) {
   return filePath.split(path.sep).join("/");
 }
+function resolvePath(cwd, value) {
+  return path.isAbsolute(value) ? value : path.resolve(cwd, value);
+}
 function capitalize(value) {
   return value.charAt(0).toUpperCase() + value.slice(1);
 }

package/src/v2/stylometry.js CHANGED Viewed

@@ -31,7 +31,7 @@ const FAMILY_WEIGHTS = {
 export function defaultStyleThresholds() {
   return Object.fromEntries(Object.entries(DEFAULT_THRESHOLDS).map(([family, threshold]) => [
     family,
-    { threshold, observations: 0 },
+    { threshold, observations: 0, stability: 0.45 },
   ]));
 }
@@ -59,14 +59,14 @@ export function distanceByFamily(family, sourceFeatures, draftFeatures) {
     return evidenceDistance(sourceFeatures, draftFeatures);
   }
   if (family === "structure") {
-    return shapeDistance(sourceFeatures, draftFeatures);
+    return structureDistance(sourceFeatures, draftFeatures);
   }
   return 0;
 }
 export function familyScoreFromDistance(distance, threshold) {
-  const basis = Math.max(0.01, threshold * 2);
-  return clampScore(100 - (distance / basis) * 100);
+  const ratio = distance / Math.max(0.01, threshold);
+  return clampScore(100 / (1 + Math.exp(3 * (ratio - 1.35))));
 }
 export function calibratedFamilyDrift(distance, threshold) {
@@ -82,6 +82,46 @@ export function percentile(values, ratio) {
   return sorted[index];
 }
+export function familyDiagnosticsFor(sourceProfile, draftProfile) {
+  const thresholds = sourceProfile.calibration?.styleThresholds?.families ?? defaultStyleThresholds();
+  return Object.fromEntries(Object.keys(sourceProfile.families).map((family) => {
+    const distance = distanceByFamily(
+      family,
+      sourceProfile.families[family].features,
+      draftProfile.families[family].features,
+    );
+    const thresholdData = thresholds[family] ?? defaultStyleThresholds()[family] ?? { threshold: 0.4, stability: 0.45 };
+    const threshold = thresholdData.threshold ?? 0.4;
+    return [family, {
+      distance,
+      threshold,
+      stability: thresholdData.stability ?? stabilityFromObservationCount(thresholdData.observations ?? 0),
+      drift: calibratedFamilyDrift(distance, threshold),
+      score: familyScoreFromDistance(distance, threshold),
+    }];
+  }));
+}
+export function styleDistanceFromDiagnostics(familyDiagnostics) {
+  const entries = Object.entries(familyDiagnostics);
+  const weighted = entries.map(([family, item]) => [
+    100 - item.score,
+    familyWeight(family) * Math.max(0.35, item.stability ?? 0.45),
+  ]);
+  return Math.round(weightedMean(weighted));
+}
+export function stabilityFromDistances(values) {
+  const finite = values.filter((value) => Number.isFinite(value));
+  if (finite.length < 2) {
+    return stabilityFromObservationCount(finite.length);
+  }
+  const mean = finite.reduce((sum, value) => sum + value, 0) / finite.length;
+  const variance = finite.reduce((sum, value) => sum + (value - mean) ** 2, 0) / finite.length;
+  const coefficient = Math.sqrt(variance) / Math.max(0.01, mean);
+  return round(Math.max(0.35, Math.min(1, 1 - coefficient)), 3);
+}
 function rhythmDistance(source, draft) {
   return weightedMean([
     [distributionDelta(source.sentenceWords, draft.sentenceWords), 0.40],
@@ -95,7 +135,12 @@ function rhythmDistance(source, draft) {
 function lexicalDistance(source, draft) {
   return weightedMean([
     [topItemDistance(source.functionWords, draft.functionWords), 1.00],
-    [topItemDistance(source.characterTrigrams, draft.characterTrigrams), 0.85],
+    [topItemDistance(source.functionWordBigrams, draft.functionWordBigrams), 0.75],
+    [topItemDistance(source.maskedCharacterFourgrams, draft.maskedCharacterFourgrams), 1.00],
+    [topItemDistance(source.characterTrigrams, draft.characterTrigrams), 0.45],
+    [topItemDistance(source.sentenceInitialTokens, draft.sentenceInitialTokens), 0.35],
+    [topItemDistance(source.sentenceFinalTokens, draft.sentenceFinalTokens), 0.35],
+    [topItemDistance(source.punctuationNgrams, draft.punctuationNgrams), 0.45],
     [punctuationDistance(source.punctuation, draft.punctuation), 0.85],
     [Math.abs((source.vocabularyRichness?.contentTypeTokenRatio ?? 0) - (draft.vocabularyRichness?.contentTypeTokenRatio ?? 0)), 0.25],
     [distributionDelta(source.wordLength, draft.wordLength), 0.40],
@@ -114,15 +159,18 @@ function discourseDistance(source, draft) {
   const transitionDelta = rateMapDistance(source.transitionRates, draft.transitionRates);
   const callbackDelta = Math.abs((source.sentenceCallbacks ?? 0) - (draft.sentenceCallbacks ?? 0));
   return weightedMean([
-    [transitionDelta, 0.75],
-    [callbackDelta, 0.25],
+    [transitionDelta, 0.55],
+    [topItemDistance(source.transitionBigrams, draft.transitionBigrams), 0.25],
+    [topItemDistance(source.transitionTrigrams, draft.transitionTrigrams), 0.10],
+    [callbackDelta, 0.20],
   ]);
 }
 function evidenceDistance(source, draft) {
   return weightedMean([
-    [Math.abs((source.evidenceSentenceRate ?? 0) - (draft.evidenceSentenceRate ?? 0)), 0.36],
-    [Math.abs((source.claimSentenceRate ?? 0) - (draft.claimSentenceRate ?? 0)), 0.18],
+    [Math.abs((source.evidenceSentenceRate ?? 0) - (draft.evidenceSentenceRate ?? 0)), 0.30],
+    [Math.abs((source.claimSentenceRate ?? 0) - (draft.claimSentenceRate ?? 0)), 0.14],
+    [Math.max(0, (source.supportedClaimRate ?? 0) - (draft.supportedClaimRate ?? 0)), 0.20],
     [Math.max(0, (draft.unsupportedClaimRate ?? 0) - (source.unsupportedClaimRate ?? 0)), 0.26],
     [topItemDistance(source.evidenceTypes, draft.evidenceTypes), 0.20],
   ]);
@@ -130,9 +178,21 @@ function evidenceDistance(source, draft) {
 function shapeDistance(source, draft) {
   return weightedMean([
-    [sequenceDistance(source.openingMoves, draft.openingMoves), 0.55],
-    [topItemDistance(source.moveRates, draft.moveRates), 0.25],
-    [topItemDistance(source.commonSequences, draft.commonSequences), 0.20],
+    [sequenceDistance(source.openingMoves, draft.openingMoves), 0.35],
+    [topItemDistance(source.openingMovePatterns, draft.openingMovePatterns), 0.25],
+    [topItemDistance(source.moveRates, draft.moveRates), 0.20],
+    [topItemDistance(source.moveBigrams ?? source.commonSequences, draft.moveBigrams ?? draft.commonSequences), 0.25],
+    [topItemDistance(source.moveTrigrams, draft.moveTrigrams), 0.15],
+  ]);
+}
+function structureDistance(source, draft) {
+  return weightedMean([
+    [distributionDelta(source.sectionWords, draft.sectionWords), 0.35],
+    [distributionDelta(source.headingCount, draft.headingCount), 0.20],
+    [Math.abs((source.listDocumentRate ?? 0) - (draft.listDocumentRate ?? 0)), 0.18],
+    [Math.abs((source.quoteDocumentRate ?? 0) - (draft.quoteDocumentRate ?? 0)), 0.12],
+    [sequenceDistance(source.openingMoves, draft.openingMoves), 0.15],
   ]);
 }
@@ -160,14 +220,22 @@ function rateMapDistance(source = {}, draft = {}) {
   return keys.reduce((sum, key) => sum + Math.abs((source[key] ?? 0) - (draft[key] ?? 0)), 0) / keys.length;
 }
-function topItemDistance(sourceItems = [], draftItems = []) {
+function topItemDistance(sourceItems, draftItems) {
+  if (!Array.isArray(sourceItems) || !Array.isArray(draftItems)) {
+    return null;
+  }
+  if (sourceItems.length === 0 && draftItems.length === 0) {
+    return null;
+  }
   const source = normalizedItemMap(sourceItems);
   const draft = normalizedItemMap(draftItems);
   const delta = rateMapDistance(source, draft);
   const cosine = cosineDistance(source, draft);
+  const jsd = jensenShannonDistance(source, draft);
   return weightedMean([
-    [delta, 0.45],
-    [cosine, 0.55],
+    [delta, 0.30],
+    [cosine, 0.45],
+    [jsd, 0.25],
   ]);
 }
@@ -204,6 +272,30 @@ function cosineDistance(left, right) {
   return Math.max(0, Math.min(1, 1 - dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm))));
 }
+function jensenShannonDistance(left, right) {
+  const keys = Array.from(new Set([...Object.keys(left), ...Object.keys(right)]));
+  if (keys.length === 0) {
+    return 0;
+  }
+  const midpoint = {};
+  for (const key of keys) {
+    midpoint[key] = ((left[key] ?? 0) + (right[key] ?? 0)) / 2;
+  }
+  return Math.sqrt((klDivergence(left, midpoint, keys) + klDivergence(right, midpoint, keys)) / 2);
+}
+function klDivergence(source, target, keys) {
+  let sum = 0;
+  for (const key of keys) {
+    const sourceValue = source[key] ?? 0;
+    const targetValue = target[key] ?? 0;
+    if (sourceValue > 0 && targetValue > 0) {
+      sum += sourceValue * Math.log2(sourceValue / targetValue);
+    }
+  }
+  return sum;
+}
 function sequenceDistance(source = [], draft = []) {
   if (source.length === 0 && draft.length === 0) {
     return 0;
@@ -228,9 +320,23 @@ function relativeDelta(sourceValue, draftValue) {
 }
 function weightedMean(weightedValues) {
-  const totalWeight = weightedValues.reduce((sum, [, weight]) => sum + weight, 0);
+  const usableValues = weightedValues.filter(([value, weight]) => Number.isFinite(value) && weight > 0);
+  const totalWeight = usableValues.reduce((sum, [, weight]) => sum + weight, 0);
   if (totalWeight === 0) {
     return 0;
   }
-  return round(weightedValues.reduce((sum, [value, weight]) => sum + value * weight, 0) / totalWeight, 3);
+  return round(usableValues.reduce((sum, [value, weight]) => sum + value * weight, 0) / totalWeight, 3);
+}
+function stabilityFromObservationCount(observations) {
+  if (observations >= 5) {
+    return 0.85;
+  }
+  if (observations >= 3) {
+    return 0.7;
+  }
+  if (observations >= 2) {
+    return 0.6;
+  }
+  return 0.45;
 }