dravoice 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,219 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { parseDocument } from "./document-model.js";
4
+ import { buildVoiceProfileV2, loadVoicePackV2 } from "./profile.js";
5
+ import { clampScore } from "./text-utils.js";
6
+
7
+ const REVIEW_MODES = {
8
+ loose: {
9
+ findingThresholds: null,
10
+ exitOnDrift: false,
11
+ },
12
+ balanced: {
13
+ findingThresholds: { evidence: 65, rhythm: 55, rhetoricalShape: 50 },
14
+ exitOnDrift: false,
15
+ },
16
+ strict: {
17
+ findingThresholds: { evidence: 75, rhythm: 70, rhetoricalShape: 65 },
18
+ exitOnDrift: true,
19
+ },
20
+ };
21
+
22
+ export function reviewVoiceDraftV2({ file, voice, cwd = process.cwd(), mode = "balanced" }) {
23
+ const reviewMode = normalizeReviewMode(mode);
24
+ const sourceProfile = typeof voice === "string" ? loadVoicePackV2(voice) : voice;
25
+ const filePath = path.resolve(file);
26
+ const draftDocument = parseDocument({
27
+ filePath,
28
+ rootDir: cwd,
29
+ contents: fs.readFileSync(filePath, "utf8"),
30
+ });
31
+ const draftProfile = buildVoiceProfileV2({ documents: [draftDocument] });
32
+
33
+ if (
34
+ draftDocument.wordCount < sourceProfile.calibration.minimumDraftSize.words ||
35
+ draftDocument.sentences.length < sourceProfile.calibration.minimumDraftSize.sentences
36
+ ) {
37
+ return {
38
+ schemaVersion: 2,
39
+ file: displayPath(filePath, cwd),
40
+ summary: {
41
+ mode: reviewMode,
42
+ fit: { band: "insufficient-evidence", distance: 0 },
43
+ familyScores: {},
44
+ corpusConfidence: sourceProfile.source.confidence,
45
+ },
46
+ findings: [],
47
+ exitCode: 0,
48
+ };
49
+ }
50
+
51
+ const familyScores = scoreFamilies(sourceProfile, draftProfile);
52
+ const distance = Math.round(Object.values(familyScores).reduce((sum, score) => sum + (100 - score), 0) / Object.keys(familyScores).length);
53
+ const findings = reviewFindings(sourceProfile, draftProfile, familyScores, reviewMode);
54
+ const fit = {
55
+ band: fitBand(distance, findings),
56
+ distance,
57
+ };
58
+
59
+ return {
60
+ schemaVersion: 2,
61
+ file: displayPath(filePath, cwd),
62
+ summary: {
63
+ mode: reviewMode,
64
+ fit,
65
+ familyScores,
66
+ corpusConfidence: sourceProfile.source.confidence,
67
+ },
68
+ findings,
69
+ exitCode: REVIEW_MODES[reviewMode].exitOnDrift && fit.band === "drift" ? 1 : 0,
70
+ };
71
+ }
72
+
73
+ export function renderVoiceReviewV2(result) {
74
+ const lines = [
75
+ "Voice rewrite notes, not AI detection.",
76
+ "",
77
+ result.file,
78
+ `Voice fit: ${capitalize(result.summary.fit.band)} (${result.summary.fit.distance} distance)`,
79
+ `Corpus confidence: ${capitalize(result.summary.corpusConfidence.band)} - ${result.summary.corpusConfidence.message}`,
80
+ "Family scores:",
81
+ ];
82
+
83
+ for (const [family, score] of Object.entries(result.summary.familyScores)) {
84
+ lines.push(`- ${family}: ${score}`);
85
+ }
86
+
87
+ lines.push("");
88
+ if (result.findings.length === 0) {
89
+ lines.push("No high-confidence V2 voice drift findings.");
90
+ lines.push("");
91
+ return lines.join("\n");
92
+ }
93
+
94
+ lines.push("Start here:");
95
+ for (const finding of result.findings.slice(0, 6)) {
96
+ lines.push(`${finding.priority} ${finding.family} ${finding.id}`);
97
+ lines.push(`Why flagged: ${finding.why}`);
98
+ lines.push(`Revise by: ${finding.action}`);
99
+ lines.push("");
100
+ }
101
+ return lines.join("\n");
102
+ }
103
+
104
+ function scoreFamilies(source, draft) {
105
+ return {
106
+ rhythm: rhythmScore(source.families.rhythm.features, draft.families.rhythm.features),
107
+ lexical: lexicalScore(source.families.lexical.features, draft.families.lexical.features),
108
+ register: source.families.register.features.primary.value === draft.families.register.features.primary.value ? 90 : 58,
109
+ discourse: transitionScore(source.families.discourse.features.transitionRates, draft.families.discourse.features.transitionRates),
110
+ rhetoricalShape: sequenceScore(source.families.rhetoricalShape.features.openingMoves, draft.families.rhetoricalShape.features.openingMoves),
111
+ evidence: evidenceScore(source.families.evidence.features, draft.families.evidence.features),
112
+ structure: sequenceScore(source.families.structure.features.openingMoves, draft.families.structure.features.openingMoves),
113
+ };
114
+ }
115
+
116
+ function rhythmScore(source, draft) {
117
+ const sentenceDelta = Math.abs(source.sentenceWords.median - draft.sentenceWords.median);
118
+ const paragraphDelta = Math.abs(source.paragraphWords.median - draft.paragraphWords.median);
119
+ return clampScore(100 - sentenceDelta * 4 - paragraphDelta * 0.6);
120
+ }
121
+
122
+ function lexicalScore(source, draft) {
123
+ const ratioDelta = Math.abs(source.vocabularyRichness.contentTypeTokenRatio - draft.vocabularyRichness.contentTypeTokenRatio);
124
+ const wordLengthDelta = Math.abs(source.wordLength.median - draft.wordLength.median);
125
+ return clampScore(100 - ratioDelta * 100 - wordLengthDelta * 8);
126
+ }
127
+
128
+ function transitionScore(source, draft) {
129
+ const keys = Array.from(new Set([...Object.keys(source), ...Object.keys(draft)]));
130
+ const delta = keys.reduce((sum, key) => sum + Math.abs((source[key] ?? 0) - (draft[key] ?? 0)), 0);
131
+ return clampScore(100 - delta * 150);
132
+ }
133
+
134
+ function sequenceScore(source, draft) {
135
+ if (source.length === 0 || draft.length === 0) {
136
+ return 100;
137
+ }
138
+ let matches = 0;
139
+ for (let index = 0; index < Math.min(source.length, draft.length); index += 1) {
140
+ if (source[index] === draft[index]) {
141
+ matches += 1;
142
+ } else if (source.includes(draft[index]) || draft.includes(source[index])) {
143
+ matches += 0.5;
144
+ }
145
+ }
146
+ return clampScore(100 * matches / Math.min(source.length, draft.length));
147
+ }
148
+
149
+ function evidenceScore(source, draft) {
150
+ const rateDelta = Math.max(0, source.evidenceSentenceRate - draft.evidenceSentenceRate);
151
+ const unsupportedDelta = Math.max(0, draft.unsupportedClaimRate - source.unsupportedClaimRate);
152
+ return clampScore(100 - rateDelta * 180 - unsupportedDelta * 180);
153
+ }
154
+
155
+ function reviewFindings(source, draft, scores, mode) {
156
+ const modeConfig = REVIEW_MODES[mode];
157
+ if (source.source?.confidence?.band === "weak" || !modeConfig.findingThresholds) {
158
+ return [];
159
+ }
160
+ const findings = [];
161
+ if (scores.evidence < modeConfig.findingThresholds.evidence) {
162
+ findings.push({
163
+ id: "v2.evidence-drift",
164
+ family: "evidence",
165
+ priority: "review",
166
+ why: `Source evidence sentence rate is ${source.families.evidence.features.evidenceSentenceRate}; draft rate is ${draft.families.evidence.features.evidenceSentenceRate}.`,
167
+ action: "Add concrete support before broad claims: a scene, quote, number, citation, URL, sensory detail, or specific example.",
168
+ });
169
+ }
170
+ if (scores.rhythm < modeConfig.findingThresholds.rhythm) {
171
+ findings.push({
172
+ id: "v2.rhythm-drift",
173
+ family: "rhythm",
174
+ priority: "consider",
175
+ why: `Source median sentence/paragraph length is ${source.families.rhythm.features.sentenceWords.median}/${source.families.rhythm.features.paragraphWords.median}; draft is ${draft.families.rhythm.features.sentenceWords.median}/${draft.families.rhythm.features.paragraphWords.median}.`,
176
+ action: "Revise sentence and paragraph pacing toward the learned range.",
177
+ });
178
+ }
179
+ if (scores.rhetoricalShape < modeConfig.findingThresholds.rhetoricalShape) {
180
+ findings.push({
181
+ id: "v2.shape-drift",
182
+ family: "rhetoricalShape",
183
+ priority: "consider",
184
+ why: `Source opening shape is ${source.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ")}; draft opening shape is ${draft.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ")}.`,
185
+ action: "Rework the opening so it uses a compatible scene, claim, contrast, reflection, or example sequence.",
186
+ });
187
+ }
188
+ return findings;
189
+ }
190
+
191
+ function normalizeReviewMode(mode) {
192
+ const normalized = String(mode ?? "balanced").toLowerCase();
193
+ if (!Object.hasOwn(REVIEW_MODES, normalized)) {
194
+ throw new Error(`Unsupported review mode: ${mode}. Expected loose, balanced, or strict.`);
195
+ }
196
+ return normalized;
197
+ }
198
+
199
+ function fitBand(distance, findings) {
200
+ if (findings.some((finding) => finding.priority === "review") || distance >= 35) {
201
+ return "drift";
202
+ }
203
+ if (findings.length > 0 || distance >= 20) {
204
+ return "watch";
205
+ }
206
+ return "close";
207
+ }
208
+
209
+ function displayPath(filePath, cwd) {
210
+ const relative = path.relative(cwd, filePath);
211
+ if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
212
+ return relative.split(path.sep).join("/");
213
+ }
214
+ return filePath.split(path.sep).join("/");
215
+ }
216
+
217
+ function capitalize(value) {
218
+ return value.charAt(0).toUpperCase() + value.slice(1);
219
+ }
@@ -0,0 +1,123 @@
1
+ const WORD_RE = /[a-z][a-z0-9'-]*/gi;
2
+ const ABBREVIATIONS = ["Dr.", "Mr.", "Mrs.", "Ms.", "Prof.", "Sr.", "Jr.", "e.g.", "i.e."];
3
+
4
+ export const FUNCTION_WORDS = [
5
+ "a", "an", "and", "as", "at", "because", "but", "by", "for", "from",
6
+ "if", "in", "into", "is", "it", "its", "of", "on", "or", "that",
7
+ "the", "their", "then", "there", "this", "to", "was", "we", "with",
8
+ "you", "your", "i", "my", "not", "so", "before", "after", "while",
9
+ ];
10
+
11
+ export const STOP_WORDS = new Set([
12
+ ...FUNCTION_WORDS,
13
+ "about", "also", "between", "can", "could", "each", "had", "has",
14
+ "have", "more", "one", "only", "our", "out", "over", "should",
15
+ "than", "they", "were", "when", "where", "which", "who", "will",
16
+ "would",
17
+ ]);
18
+
19
+ export function normalizeText(text) {
20
+ return String(text ?? "").replace(/\s+/g, " ").trim().toLowerCase();
21
+ }
22
+
23
+ export function tokenizeWords(text) {
24
+ return (String(text ?? "").match(WORD_RE) ?? [])
25
+ .map((word) => word.toLowerCase().replace(/'s$/, ""));
26
+ }
27
+
28
+ export function contentWords(text) {
29
+ return tokenizeWords(text).filter((word) => word.length > 2 && !STOP_WORDS.has(word));
30
+ }
31
+
32
+ export function splitSentences(text) {
33
+ let protectedText = String(text ?? "");
34
+ const replacements = new Map();
35
+ const protect = (pattern, prefix) => {
36
+ protectedText = protectedText.replace(pattern, (match) => {
37
+ const key = `__${prefix}_${replacements.size}__`;
38
+ replacements.set(key, match);
39
+ return key;
40
+ });
41
+ };
42
+
43
+ for (const abbreviation of ABBREVIATIONS) {
44
+ protectedText = protectedText.replaceAll(abbreviation, abbreviation.replaceAll(".", "§"));
45
+ }
46
+ protect(/https?:\/\/\S+/g, "URL");
47
+ protect(/\b[\w/-]+\.(?:js|ts|py|md|mdx|txt|html|json|yml|yaml|toml)\b/g, "PATH");
48
+ protect(/\b\d+(?:\.\d+)+\b/g, "VERSION");
49
+
50
+ const parts = protectedText
51
+ .split(/(?<=[.!?])\s+(?=["'(\[]?[A-Z0-9])/)
52
+ .map((part) => part.trim())
53
+ .filter(Boolean);
54
+
55
+ return parts.map((part) => {
56
+ let restored = part.replaceAll("§", ".");
57
+ for (const [key, value] of replacements.entries()) {
58
+ restored = restored.replaceAll(key, value);
59
+ }
60
+ return restored;
61
+ });
62
+ }
63
+
64
+ export function characterNgrams(text, size = 3) {
65
+ const compact = normalizeText(text).replace(/[^a-z0-9]+/g, " ").trim();
66
+ const grams = [];
67
+ for (let index = 0; index <= compact.length - size; index += 1) {
68
+ const gram = compact.slice(index, index + size);
69
+ if (!gram.includes(" ")) {
70
+ grams.push(gram);
71
+ }
72
+ }
73
+ return grams;
74
+ }
75
+
76
+ export function distribution(values) {
77
+ if (!values.length) {
78
+ return { count: 0, min: 0, max: 0, mean: 0, median: 0, p25: 0, p75: 0 };
79
+ }
80
+ const sorted = [...values].sort((a, b) => a - b);
81
+ return {
82
+ count: sorted.length,
83
+ min: sorted[0],
84
+ max: sorted[sorted.length - 1],
85
+ mean: round(sorted.reduce((sum, value) => sum + value, 0) / sorted.length, 2),
86
+ median: percentile(sorted, 0.5),
87
+ p25: percentile(sorted, 0.25),
88
+ p75: percentile(sorted, 0.75),
89
+ };
90
+ }
91
+
92
+ export function topItems(values, limit = 10) {
93
+ const counts = new Map();
94
+ for (const value of values.filter(Boolean)) {
95
+ counts.set(value, (counts.get(value) ?? 0) + 1);
96
+ }
97
+ return Array.from(counts.entries())
98
+ .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
99
+ .slice(0, limit)
100
+ .map(([value, count]) => ({ value, count }));
101
+ }
102
+
103
+ export function rate(count, total, digits = 3) {
104
+ return round(count / Math.max(1, total), digits);
105
+ }
106
+
107
+ export function round(value, digits = 0) {
108
+ const multiplier = 10 ** digits;
109
+ const rounded = Math.floor(value * multiplier + 0.5) / multiplier;
110
+ return digits === 0 ? Math.trunc(rounded) : rounded;
111
+ }
112
+
113
+ export function clampScore(value) {
114
+ return Math.max(0, Math.min(100, Math.round(value)));
115
+ }
116
+
117
+ function percentile(sortedValues, ratio) {
118
+ const index = Math.min(
119
+ sortedValues.length - 1,
120
+ Math.max(0, Math.round((sortedValues.length - 1) * ratio)),
121
+ );
122
+ return sortedValues[index];
123
+ }