dravoice 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dravoice",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Compile article voice profiles into reusable LLM writing context, evidence-first briefs, and deterministic draft review notes.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,6 +12,7 @@ const TRANSITIONS = {
12
12
  export function analyzeDiscourse(documents) {
13
13
  const sentences = documents.flatMap((document) => document.sentences);
14
14
  const labels = sentences.map((sentence) => transitionLabel(sentence.text));
15
+ const nonPlainLabels = labels.filter((label) => label !== "plain");
15
16
  const transitionRates = {};
16
17
  for (const key of Object.keys(TRANSITIONS)) {
17
18
  transitionRates[key] = rate(labels.filter((label) => label === key).length, sentences.length, 2);
@@ -22,10 +23,12 @@ export function analyzeDiscourse(documents) {
22
23
  confidence: sentences.length >= 12 ? "medium" : "low",
23
24
  features: {
24
25
  transitionRates,
25
- transitionSequence: labels.filter(Boolean).slice(0, 12),
26
+ transitionSequence: labels.slice(0, 12),
27
+ transitionBigrams: topItems(sequenceNgrams(labels, 2), 12),
28
+ transitionTrigrams: topItems(sequenceNgrams(labels, 3), 12),
26
29
  sentenceCallbacks: callbackRate(sentences),
27
30
  },
28
- examples: topItems(labels.filter(Boolean), 5).map((item) => item.value),
31
+ examples: topItems(nonPlainLabels, 5).map((item) => item.value),
29
32
  warnings: sentences.length < 12 ? ["Discourse confidence is limited because the corpus has fewer than 12 sentences."] : [],
30
33
  revisionHandles: ["Compare how sentences turn, contrast, explain, and return to earlier ideas."],
31
34
  };
@@ -50,3 +53,11 @@ function callbackRate(sentences) {
50
53
  }
51
54
  return rate(callbacks, Math.max(1, sentences.length - 1), 2);
52
55
  }
56
+
57
+ function sequenceNgrams(values, size) {
58
+ const result = [];
59
+ for (let index = 0; index <= values.length - size; index += 1) {
60
+ result.push(values.slice(index, index + size).join(" -> "));
61
+ }
62
+ return result;
63
+ }
@@ -14,34 +14,69 @@ const EVIDENCE_PATTERNS = {
14
14
  const ABSTRACT_CLAIM_RE = /\b(always|never|everyone|everything|nothing|best|better|worse|important|obvious|clearly|should|must|need to|have to|all|none|every)\b/i;
15
15
 
16
16
  export function analyzeEvidence(documents) {
17
- const sentences = documents.flatMap((document) => document.sentences);
18
- const evidenceSentences = sentences.filter((sentence) => evidenceTypes(sentence.text).length > 0);
19
- const claimSentences = sentences.filter((sentence) => ABSTRACT_CLAIM_RE.test(sentence.text));
20
- const typeValues = evidenceSentences.flatMap((sentence) => evidenceTypes(sentence.text));
17
+ const documentResults = documents.map(documentEvidence);
18
+ const sentenceCount = documentResults.reduce((sum, item) => sum + item.sentenceCount, 0);
19
+ const evidenceSentenceCount = documentResults.reduce((sum, item) => sum + item.evidenceSentenceCount, 0);
20
+ const claimSentenceCount = documentResults.reduce((sum, item) => sum + item.claimSentenceCount, 0);
21
+ const supportedClaimCount = documentResults.reduce((sum, item) => sum + item.supportedClaimCount, 0);
22
+ const unsupportedClaimCount = documentResults.reduce((sum, item) => sum + item.unsupportedClaimCount, 0);
23
+ const typeValues = documentResults.flatMap((item) => item.typeValues);
21
24
 
22
25
  return {
23
26
  family: "evidence",
24
- confidence: sentences.length >= 12 ? "medium" : "low",
27
+ confidence: sentenceCount >= 12 ? "medium" : "low",
25
28
  features: {
26
- sentenceCount: sentences.length,
27
- evidenceSentenceCount: evidenceSentences.length,
28
- evidenceSentenceRate: rate(evidenceSentences.length, sentences.length, 2),
29
- claimSentenceRate: rate(claimSentences.length, sentences.length, 2),
30
- unsupportedClaimRate: rate(Math.max(0, claimSentences.length - evidenceSentences.length), sentences.length, 2),
29
+ sentenceCount,
30
+ evidenceSentenceCount,
31
+ evidenceSentenceRate: rate(evidenceSentenceCount, sentenceCount, 2),
32
+ claimSentenceCount,
33
+ claimSentenceRate: rate(claimSentenceCount, sentenceCount, 2),
34
+ supportedClaimRate: rate(supportedClaimCount, Math.max(1, claimSentenceCount), 2),
35
+ unsupportedClaimRate: rate(unsupportedClaimCount, Math.max(1, claimSentenceCount), 2),
31
36
  evidenceTypes: topItems(typeValues, 8),
32
37
  },
33
38
  examples: topItems(typeValues, 4).map((item) => `${item.value}: ${item.count}`),
34
- warnings: sentences.length < 12 ? ["Evidence confidence is limited because the corpus has fewer than 12 sentences."] : [],
39
+ warnings: sentenceCount < 12 ? ["Evidence confidence is limited because the corpus has fewer than 12 sentences."] : [],
35
40
  revisionHandles: ["Compare how broad claims are supported by concrete scenes, numbers, quotes, citations, or examples."],
36
41
  };
37
42
  }
38
43
 
39
- export function evidenceTypes(text) {
40
- return Object.entries(EVIDENCE_PATTERNS)
41
- .filter(([, pattern]) => pattern.test(text))
42
- .map(([type]) => type);
43
- }
44
-
45
- export function isAbstractClaim(text) {
46
- return ABSTRACT_CLAIM_RE.test(text);
47
- }
44
+ function documentEvidence(document) {
45
+ const sentences = document.sentences;
46
+ const sentenceEvidenceTypes = sentences.map((sentence) => evidenceTypes(sentence.text));
47
+ const evidenceSentences = sentences.filter((_, index) => sentenceEvidenceTypes[index].length > 0);
48
+ const claimIndexes = sentences
49
+ .map((sentence, index) => ({ sentence, index }))
50
+ .filter(({ sentence }) => ABSTRACT_CLAIM_RE.test(sentence.text))
51
+ .map(({ index }) => index);
52
+ const supportedClaimIndexes = claimIndexes.filter((index) => hasNearbyEvidence(sentenceEvidenceTypes, index));
53
+ return {
54
+ sentenceCount: sentences.length,
55
+ evidenceSentenceCount: evidenceSentences.length,
56
+ claimSentenceCount: claimIndexes.length,
57
+ supportedClaimCount: supportedClaimIndexes.length,
58
+ unsupportedClaimCount: claimIndexes.length - supportedClaimIndexes.length,
59
+ typeValues: evidenceSentences.flatMap((sentence) => evidenceTypes(sentence.text)),
60
+ };
61
+ }
62
+
63
+ function hasNearbyEvidence(sentenceEvidenceTypes, claimIndex) {
64
+ const start = Math.max(0, claimIndex - 2);
65
+ const end = Math.min(sentenceEvidenceTypes.length - 1, claimIndex + 2);
66
+ for (let index = start; index <= end; index += 1) {
67
+ if (sentenceEvidenceTypes[index].length > 0) {
68
+ return true;
69
+ }
70
+ }
71
+ return false;
72
+ }
73
+
74
+ export function evidenceTypes(text) {
75
+ return Object.entries(EVIDENCE_PATTERNS)
76
+ .filter(([, pattern]) => pattern.test(text))
77
+ .map(([type]) => type);
78
+ }
79
+
80
+ export function isAbstractClaim(text) {
81
+ return ABSTRACT_CLAIM_RE.test(text);
82
+ }
@@ -3,6 +3,7 @@ import {
3
3
  characterNgrams,
4
4
  contentWords,
5
5
  distribution,
6
+ normalizeText,
6
7
  rate,
7
8
  tokenizeWords,
8
9
  topItems,
@@ -27,8 +28,13 @@ export function analyzeLexical(documents) {
27
28
  },
28
29
  wordLength: distribution(words.map((word) => word.length)),
29
30
  functionWords: topItems(words.filter((word) => functionWordSet.has(word)), 24),
31
+ functionWordBigrams: topItems(tokenNgrams(words.filter((word) => functionWordSet.has(word)), 2), 36),
30
32
  characterTrigrams: topItems(characterNgrams(text, 3), 24),
33
+ maskedCharacterFourgrams: topItems(maskedCharacterNgrams(text, 4), 48),
31
34
  repeatedMotifs: topItems(content, 16).filter((item) => item.count > 1),
35
+ sentenceInitialTokens: topItems(sentences.map((sentence) => boundaryToken(sentence.tokens[0], functionWordSet)), 16),
36
+ sentenceFinalTokens: topItems(sentences.map((sentence) => boundaryToken(sentence.tokens.at(-1), functionWordSet)), 16),
37
+ punctuationNgrams: topItems(punctuationNgrams(text, 3), 16),
32
38
  punctuation: {
33
39
  commaRate: rate(count(text, /,/g), sentences.length, 2),
34
40
  semicolonRate: rate(count(text, /;/g), sentences.length, 2),
@@ -43,6 +49,56 @@ export function analyzeLexical(documents) {
43
49
  };
44
50
  }
45
51
 
52
+ function boundaryToken(word, functionWordSet) {
53
+ if (!word) {
54
+ return null;
55
+ }
56
+ if (functionWordSet.has(word)) {
57
+ return word;
58
+ }
59
+ if (/^\d+$/.test(word)) {
60
+ return "<number>";
61
+ }
62
+ return "<content>";
63
+ }
64
+
65
+ function tokenNgrams(tokens, size) {
66
+ const grams = [];
67
+ for (let index = 0; index <= tokens.length - size; index += 1) {
68
+ grams.push(tokens.slice(index, index + size).join(" "));
69
+ }
70
+ return grams;
71
+ }
72
+
73
+ function maskedCharacterNgrams(text, size) {
74
+ const functionWordSet = new Set(FUNCTION_WORDS);
75
+ const masked = normalizeText(text)
76
+ .replace(/\p{L}[\p{L}\p{N}'-]*|\p{N}+/gu, (word) => {
77
+ const normalized = word.toLowerCase().replace(/'s$/, "");
78
+ if (functionWordSet.has(normalized)) {
79
+ return normalized;
80
+ }
81
+ if (/^\p{N}+$/u.test(normalized)) {
82
+ return "@";
83
+ }
84
+ return "#";
85
+ })
86
+ .replace(/\s+/g, " ");
87
+ const grams = [];
88
+ for (let index = 0; index <= masked.length - size; index += 1) {
89
+ const gram = masked.slice(index, index + size);
90
+ if (gram.trim()) {
91
+ grams.push(gram);
92
+ }
93
+ }
94
+ return grams;
95
+ }
96
+
97
+ function punctuationNgrams(text, size) {
98
+ const marks = Array.from(String(text ?? "").matchAll(/[.,;:!?-]/g)).map((match) => match[0]);
99
+ return tokenNgrams(marks, size);
100
+ }
101
+
46
102
  function confidenceFor(wordCount) {
47
103
  if (wordCount >= 2000) {
48
104
  return "high";
@@ -6,10 +6,18 @@ export function analyzeRhetoricalShape(documents) {
6
6
  const documentMoves = documents.map((document) => document.sentences.map((sentence) => moveFor(sentence.text)));
7
7
  const sentenceMoves = documentMoves.flat();
8
8
  const openingMoves = documents.flatMap((document) => document.sentences.slice(0, 3).map((sentence) => moveFor(sentence.text)));
9
- const sequences = [];
9
+ const bigrams = [];
10
+ const trigrams = [];
11
+ const openingMovePatterns = [];
10
12
  for (const moves of documentMoves) {
13
+ if (moves.length > 0) {
14
+ openingMovePatterns.push(moves.slice(0, 3).join(" -> "));
15
+ }
11
16
  for (let index = 0; index < moves.length - 1; index += 1) {
12
- sequences.push(`${moves[index]} -> ${moves[index + 1]}`);
17
+ bigrams.push(`${moves[index]} -> ${moves[index + 1]}`);
18
+ }
19
+ for (let index = 0; index < moves.length - 2; index += 1) {
20
+ trigrams.push(`${moves[index]} -> ${moves[index + 1]} -> ${moves[index + 2]}`);
13
21
  }
14
22
  }
15
23
 
@@ -19,7 +27,10 @@ export function analyzeRhetoricalShape(documents) {
19
27
  features: {
20
28
  moveRates: topItems(sentenceMoves, 12),
21
29
  openingMoves: openingMoves.slice(0, 9),
22
- commonSequences: topItems(sequences, 12),
30
+ openingMovePatterns: topItems(openingMovePatterns, 8),
31
+ moveBigrams: topItems(bigrams, 12),
32
+ moveTrigrams: topItems(trigrams, 12),
33
+ commonSequences: topItems(bigrams, 12),
23
34
  },
24
35
  examples: openingMoves.slice(0, 5),
25
36
  warnings: sentenceMoves.length < 12 ? ["Rhetorical-shape confidence is limited because the corpus has fewer than 12 sentences."] : [],
package/src/v2/inspect.js CHANGED
@@ -46,7 +46,7 @@ function featureSummary(name, features) {
46
46
  return `sentenceWords.median=${features.sentenceWords.median}; paragraphWords.median=${features.paragraphWords.median}; listDensity=${features.listDensity}; quoteDensity=${features.quoteDensity}`;
47
47
  }
48
48
  if (name === "lexical") {
49
- return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}`;
49
+ return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}; maskedCharacterFourgrams=${features.maskedCharacterFourgrams?.length ?? 0}; functionWordBigrams=${features.functionWordBigrams?.length ?? 0}`;
50
50
  }
51
51
  if (name === "register") {
52
52
  return `primary=${features.primary.value} (${features.primary.score}); alternates=${features.scores.slice(1, 4).map((score) => `${score.value}:${score.score}`).join(", ")}`;
@@ -55,10 +55,10 @@ function featureSummary(name, features) {
55
55
  return `transitionRates=${Object.entries(features.transitionRates).map(([key, value]) => `${key}:${value}`).join(", ")}; sentenceCallbacks=${features.sentenceCallbacks}`;
56
56
  }
57
57
  if (name === "rhetoricalShape") {
58
- return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
58
+ return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; moveTrigrams=${features.moveTrigrams?.slice(0, 2).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
59
59
  }
60
60
  if (name === "evidence") {
61
- return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
61
+ return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; supportedClaimRate=${features.supportedClaimRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
62
62
  }
63
63
  if (name === "structure") {
64
64
  return `sectionWords.median=${features.sectionWords.median}; headingCount.median=${features.headingCount.median}; listDocumentRate=${features.listDocumentRate}; quoteDocumentRate=${features.quoteDocumentRate}`;
package/src/v2/profile.js CHANGED
@@ -6,14 +6,15 @@ import { analyzeLexical } from "./analyzers/lexical.js";
6
6
  import { analyzeRegister } from "./analyzers/register.js";
7
7
  import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
8
8
  import { analyzeRhythm } from "./analyzers/rhythm.js";
9
- import { analyzeStructure } from "./analyzers/structure.js";
10
- import { loadDocuments } from "./document-model.js";
11
- import {
12
- STYLOMETRIC_REFERENCES,
13
- defaultStyleThresholds,
14
- distanceByFamily,
15
- percentile,
16
- } from "./stylometry.js";
9
+ import { analyzeStructure } from "./analyzers/structure.js";
10
+ import { loadDocuments } from "./document-model.js";
11
+ import {
12
+ STYLOMETRIC_REFERENCES,
13
+ defaultStyleThresholds,
14
+ distanceByFamily,
15
+ percentile,
16
+ stabilityFromDistances,
17
+ } from "./stylometry.js";
17
18
 
18
19
  export function learnVoicePackV2({ examplesDir, outDir }) {
19
20
  const documents = loadDocuments({ examplesDir });
@@ -36,11 +37,11 @@ export function loadVoicePackV2(voiceDir) {
36
37
  return profile;
37
38
  }
38
39
 
39
- export function buildVoiceProfileV2({ documents }) {
40
- const source = sourceSummary(documents);
41
- const families = analyzeFeatureFamilies(documents);
42
-
43
- return {
40
+ export function buildVoiceProfileV2({ documents }) {
41
+ const source = sourceSummary(documents);
42
+ const families = analyzeFeatureFamilies(documents);
43
+
44
+ return {
44
45
  schemaVersion: 2,
45
46
  generatedBy: "dravoice-v2",
46
47
  tool: { name: "Dravoice", cli: "drav" },
@@ -49,67 +50,68 @@ export function buildVoiceProfileV2({ documents }) {
49
50
  guidance: guidanceFor({ source, families }),
50
51
  calibration: {
51
52
  featureStability: Object.fromEntries(Object.entries(families).map(([name, family]) => [name, family.confidence])),
52
- tolerances: {
53
- rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
54
- evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
55
- },
56
- styleThresholds: styleThresholdsFor(documents, families),
57
- minimumDraftSize: {
58
- words: source.confidence.band === "weak" ? 25 : 35,
59
- sentences: source.confidence.band === "weak" ? 3 : 4,
53
+ tolerances: {
54
+ rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
55
+ evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
56
+ },
57
+ styleThresholds: styleThresholdsFor(documents, families),
58
+ minimumDraftSize: {
59
+ words: source.confidence.band === "weak" ? 25 : 35,
60
+ sentences: source.confidence.band === "weak" ? 3 : 4,
60
61
  },
61
62
  },
62
- };
63
- }
64
-
65
- function analyzeFeatureFamilies(documents) {
66
- return {
67
- rhythm: analyzeRhythm(documents),
68
- lexical: analyzeLexical(documents),
69
- register: analyzeRegister(documents),
70
- discourse: analyzeDiscourse(documents),
71
- rhetoricalShape: analyzeRhetoricalShape(documents),
72
- evidence: analyzeEvidence(documents),
73
- structure: analyzeStructure(documents),
74
- };
75
- }
76
-
77
- function styleThresholdsFor(documents, fallbackFamilies) {
78
- const fallbackThresholds = defaultStyleThresholds();
79
- const distancesByFamily = Object.fromEntries(Object.keys(fallbackFamilies).map((family) => [family, []]));
80
-
81
- if (documents.length >= 2) {
82
- for (let index = 0; index < documents.length; index += 1) {
83
- const referenceDocuments = documents.filter((_, candidateIndex) => candidateIndex !== index);
84
- const referenceFamilies = analyzeFeatureFamilies(referenceDocuments);
85
- const heldoutFamilies = analyzeFeatureFamilies([documents[index]]);
86
- for (const family of Object.keys(fallbackFamilies)) {
87
- distancesByFamily[family].push(distanceByFamily(
88
- family,
89
- referenceFamilies[family].features,
90
- heldoutFamilies[family].features,
91
- ));
92
- }
93
- }
94
- }
95
-
96
- const families = {};
97
- for (const family of Object.keys(fallbackFamilies)) {
98
- const observations = distancesByFamily[family];
99
- const observedThreshold = observations.length > 0 ? percentile(observations, 0.9) : 0;
100
- const fallback = fallbackThresholds[family].threshold;
101
- families[family] = {
102
- threshold: Math.max(0.01, Math.min(0.95, observedThreshold || fallback)),
103
- observations: observations.length,
104
- };
105
- }
106
-
107
- return {
108
- method: "leave-one-out-cosine-delta",
109
- references: STYLOMETRIC_REFERENCES,
110
- families,
111
- };
112
- }
63
+ };
64
+ }
65
+
66
+ function analyzeFeatureFamilies(documents) {
67
+ return {
68
+ rhythm: analyzeRhythm(documents),
69
+ lexical: analyzeLexical(documents),
70
+ register: analyzeRegister(documents),
71
+ discourse: analyzeDiscourse(documents),
72
+ rhetoricalShape: analyzeRhetoricalShape(documents),
73
+ evidence: analyzeEvidence(documents),
74
+ structure: analyzeStructure(documents),
75
+ };
76
+ }
77
+
78
+ function styleThresholdsFor(documents, fallbackFamilies) {
79
+ const fallbackThresholds = defaultStyleThresholds();
80
+ const distancesByFamily = Object.fromEntries(Object.keys(fallbackFamilies).map((family) => [family, []]));
81
+
82
+ if (documents.length >= 2) {
83
+ for (let index = 0; index < documents.length; index += 1) {
84
+ const referenceDocuments = documents.filter((_, candidateIndex) => candidateIndex !== index);
85
+ const referenceFamilies = analyzeFeatureFamilies(referenceDocuments);
86
+ const heldoutFamilies = analyzeFeatureFamilies([documents[index]]);
87
+ for (const family of Object.keys(fallbackFamilies)) {
88
+ distancesByFamily[family].push(distanceByFamily(
89
+ family,
90
+ referenceFamilies[family].features,
91
+ heldoutFamilies[family].features,
92
+ ));
93
+ }
94
+ }
95
+ }
96
+
97
+ const families = {};
98
+ for (const family of Object.keys(fallbackFamilies)) {
99
+ const observations = distancesByFamily[family];
100
+ const observedThreshold = observations.length > 0 ? percentile(observations, 0.9) : 0;
101
+ const fallback = fallbackThresholds[family].threshold;
102
+ families[family] = {
103
+ threshold: Math.max(0.01, Math.min(0.95, observedThreshold || fallback)),
104
+ observations: observations.length,
105
+ stability: stabilityFromDistances(observations),
106
+ };
107
+ }
108
+
109
+ return {
110
+ method: "leave-one-out-cosine-delta",
111
+ references: STYLOMETRIC_REFERENCES,
112
+ families,
113
+ };
114
+ }
113
115
 
114
116
  function writeVoicePackV2(outDir, profile) {
115
117
  fs.mkdirSync(outDir, { recursive: true });
package/src/v2/review.js CHANGED
@@ -2,7 +2,7 @@ import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { parseDocument } from "./document-model.js";
4
4
  import { buildVoiceProfileV2, loadVoicePackV2 } from "./profile.js";
5
- import { clampScore } from "./text-utils.js";
5
+ import { familyDiagnosticsFor, styleDistanceFromDiagnostics } from "./stylometry.js";
6
6
 
7
7
  const REVIEW_MODES = {
8
8
  loose: {
@@ -48,11 +48,12 @@ export function reviewVoiceDraftV2({ file, voice, cwd = process.cwd(), mode = "b
48
48
  };
49
49
  }
50
50
 
51
- const familyScores = scoreFamilies(sourceProfile, draftProfile);
52
- const distance = Math.round(Object.values(familyScores).reduce((sum, score) => sum + (100 - score), 0) / Object.keys(familyScores).length);
51
+ const familyDiagnostics = familyDiagnosticsFor(sourceProfile, draftProfile);
52
+ const familyScores = Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.score]));
53
+ const distance = styleDistanceFromDiagnostics(familyDiagnostics);
53
54
  const findings = reviewFindings(sourceProfile, draftProfile, familyScores, reviewMode);
54
55
  const fit = {
55
- band: fitBand(distance, findings),
56
+ band: fitBand(distance, findings, familyDiagnostics),
56
57
  distance,
57
58
  };
58
59
 
@@ -63,6 +64,9 @@ export function reviewVoiceDraftV2({ file, voice, cwd = process.cwd(), mode = "b
63
64
  mode: reviewMode,
64
65
  fit,
65
66
  familyScores,
67
+ familyDistances: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.distance])),
68
+ familyDrift: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.drift])),
69
+ thresholds: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.threshold])),
66
70
  corpusConfidence: sourceProfile.source.confidence,
67
71
  },
68
72
  findings,
@@ -101,57 +105,6 @@ export function renderVoiceReviewV2(result) {
101
105
  return lines.join("\n");
102
106
  }
103
107
 
104
- function scoreFamilies(source, draft) {
105
- return {
106
- rhythm: rhythmScore(source.families.rhythm.features, draft.families.rhythm.features),
107
- lexical: lexicalScore(source.families.lexical.features, draft.families.lexical.features),
108
- register: source.families.register.features.primary.value === draft.families.register.features.primary.value ? 90 : 58,
109
- discourse: transitionScore(source.families.discourse.features.transitionRates, draft.families.discourse.features.transitionRates),
110
- rhetoricalShape: sequenceScore(source.families.rhetoricalShape.features.openingMoves, draft.families.rhetoricalShape.features.openingMoves),
111
- evidence: evidenceScore(source.families.evidence.features, draft.families.evidence.features),
112
- structure: sequenceScore(source.families.structure.features.openingMoves, draft.families.structure.features.openingMoves),
113
- };
114
- }
115
-
116
- function rhythmScore(source, draft) {
117
- const sentenceDelta = Math.abs(source.sentenceWords.median - draft.sentenceWords.median);
118
- const paragraphDelta = Math.abs(source.paragraphWords.median - draft.paragraphWords.median);
119
- return clampScore(100 - sentenceDelta * 4 - paragraphDelta * 0.6);
120
- }
121
-
122
- function lexicalScore(source, draft) {
123
- const ratioDelta = Math.abs(source.vocabularyRichness.contentTypeTokenRatio - draft.vocabularyRichness.contentTypeTokenRatio);
124
- const wordLengthDelta = Math.abs(source.wordLength.median - draft.wordLength.median);
125
- return clampScore(100 - ratioDelta * 100 - wordLengthDelta * 8);
126
- }
127
-
128
- function transitionScore(source, draft) {
129
- const keys = Array.from(new Set([...Object.keys(source), ...Object.keys(draft)]));
130
- const delta = keys.reduce((sum, key) => sum + Math.abs((source[key] ?? 0) - (draft[key] ?? 0)), 0);
131
- return clampScore(100 - delta * 150);
132
- }
133
-
134
- function sequenceScore(source, draft) {
135
- if (source.length === 0 || draft.length === 0) {
136
- return 100;
137
- }
138
- let matches = 0;
139
- for (let index = 0; index < Math.min(source.length, draft.length); index += 1) {
140
- if (source[index] === draft[index]) {
141
- matches += 1;
142
- } else if (source.includes(draft[index]) || draft.includes(source[index])) {
143
- matches += 0.5;
144
- }
145
- }
146
- return clampScore(100 * matches / Math.min(source.length, draft.length));
147
- }
148
-
149
- function evidenceScore(source, draft) {
150
- const rateDelta = Math.max(0, source.evidenceSentenceRate - draft.evidenceSentenceRate);
151
- const unsupportedDelta = Math.max(0, draft.unsupportedClaimRate - source.unsupportedClaimRate);
152
- return clampScore(100 - rateDelta * 180 - unsupportedDelta * 180);
153
- }
154
-
155
108
  function reviewFindings(source, draft, scores, mode) {
156
109
  const modeConfig = REVIEW_MODES[mode];
157
110
  if (source.source?.confidence?.band === "weak" || !modeConfig.findingThresholds) {
@@ -196,11 +149,12 @@ function normalizeReviewMode(mode) {
196
149
  return normalized;
197
150
  }
198
151
 
199
- function fitBand(distance, findings) {
200
- if (findings.some((finding) => finding.priority === "review") || distance >= 35) {
152
+ function fitBand(distance, findings, familyDiagnostics) {
153
+ const maxDrift = Math.max(0, ...Object.values(familyDiagnostics).map((item) => item.drift));
154
+ if (findings.some((finding) => finding.priority === "review") || distance >= 35 || maxDrift >= 1.25) {
201
155
  return "drift";
202
156
  }
203
- if (findings.length > 0 || distance >= 20) {
157
+ if (findings.length > 0 || distance >= 20 || maxDrift > 0) {
204
158
  return "watch";
205
159
  }
206
160
  return "close";