dravoice 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/v2/inspect.js CHANGED
@@ -1,67 +1,67 @@
1
- export function renderInspectV2(profile) {
2
- const lines = [
3
- "Dravoice V2 Profile",
4
- "",
5
- `Source: ${profile.source.documentCount} document(s), ${profile.source.wordCount} words, ${profile.source.sentenceCount} sentences`,
6
- `Corpus confidence: ${capitalize(profile.source.confidence.band)} - ${profile.source.confidence.message}`,
7
- "",
8
- "Feature families:",
9
- ];
10
-
11
- for (const [name, family] of Object.entries(profile.families)) {
12
- lines.push(`- ${name}: ${family.confidence}`);
13
- if (family.warnings.length) {
14
- for (const warning of family.warnings) {
15
- lines.push(` warning: ${warning}`);
16
- }
17
- }
18
- }
19
-
20
- lines.push("", "Feature details:");
21
- for (const [name, family] of Object.entries(profile.families)) {
22
- lines.push(`- ${name}: ${featureSummary(name, family.features)}`);
23
- if (family.revisionHandles?.length) {
24
- lines.push(" Revision handles:");
25
- for (const handle of family.revisionHandles) {
26
- lines.push(` - ${handle}`);
27
- }
28
- }
29
- }
30
-
31
- lines.push("", "Guidance:");
32
- for (const rule of profile.guidance.draftingRules) {
33
- lines.push(`- ${rule}`);
34
- }
35
-
36
- lines.push("");
37
- return `${lines.join("\n")}`;
38
- }
39
-
40
- function capitalize(value) {
41
- return value.charAt(0).toUpperCase() + value.slice(1);
42
- }
43
-
44
- function featureSummary(name, features) {
45
- if (name === "rhythm") {
46
- return `sentenceWords.median=${features.sentenceWords.median}; paragraphWords.median=${features.paragraphWords.median}; listDensity=${features.listDensity}; quoteDensity=${features.quoteDensity}`;
47
- }
48
- if (name === "lexical") {
49
- return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}; maskedCharacterFourgrams=${features.maskedCharacterFourgrams?.length ?? 0}; functionWordBigrams=${features.functionWordBigrams?.length ?? 0}`;
50
- }
51
- if (name === "register") {
52
- return `primary=${features.primary.value} (${features.primary.score}); alternates=${features.scores.slice(1, 4).map((score) => `${score.value}:${score.score}`).join(", ")}`;
53
- }
54
- if (name === "discourse") {
55
- return `transitionRates=${Object.entries(features.transitionRates).map(([key, value]) => `${key}:${value}`).join(", ")}; sentenceCallbacks=${features.sentenceCallbacks}`;
56
- }
57
- if (name === "rhetoricalShape") {
58
- return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; moveTrigrams=${features.moveTrigrams?.slice(0, 2).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
59
- }
60
- if (name === "evidence") {
61
- return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; supportedClaimRate=${features.supportedClaimRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
62
- }
63
- if (name === "structure") {
64
- return `sectionWords.median=${features.sectionWords.median}; headingCount.median=${features.headingCount.median}; listDocumentRate=${features.listDocumentRate}; quoteDocumentRate=${features.quoteDocumentRate}`;
65
- }
66
- return JSON.stringify(features);
67
- }
1
+ export function renderInspectV2(profile) {
2
+ const lines = [
3
+ "Dravoice V2 Profile",
4
+ "",
5
+ `Source: ${profile.source.documentCount} document(s), ${profile.source.wordCount} words, ${profile.source.sentenceCount} sentences`,
6
+ `Corpus confidence: ${capitalize(profile.source.confidence.band)} - ${profile.source.confidence.message}`,
7
+ "",
8
+ "Feature families:",
9
+ ];
10
+
11
+ for (const [name, family] of Object.entries(profile.families)) {
12
+ lines.push(`- ${name}: ${family.confidence}`);
13
+ if (family.warnings.length) {
14
+ for (const warning of family.warnings) {
15
+ lines.push(` warning: ${warning}`);
16
+ }
17
+ }
18
+ }
19
+
20
+ lines.push("", "Feature details:");
21
+ for (const [name, family] of Object.entries(profile.families)) {
22
+ lines.push(`- ${name}: ${featureSummary(name, family.features)}`);
23
+ if (family.revisionHandles?.length) {
24
+ lines.push(" Revision handles:");
25
+ for (const handle of family.revisionHandles) {
26
+ lines.push(` - ${handle}`);
27
+ }
28
+ }
29
+ }
30
+
31
+ lines.push("", "Guidance:");
32
+ for (const rule of profile.guidance.draftingRules) {
33
+ lines.push(`- ${rule}`);
34
+ }
35
+
36
+ lines.push("");
37
+ return `${lines.join("\n")}`;
38
+ }
39
+
40
+ function capitalize(value) {
41
+ return value.charAt(0).toUpperCase() + value.slice(1);
42
+ }
43
+
44
+ function featureSummary(name, features) {
45
+ if (name === "rhythm") {
46
+ return `sentenceWords.median=${features.sentenceWords.median}; paragraphWords.median=${features.paragraphWords.median}; listDensity=${features.listDensity}; quoteDensity=${features.quoteDensity}`;
47
+ }
48
+ if (name === "lexical") {
49
+ return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}; maskedCharacterFourgrams=${features.maskedCharacterFourgrams?.length ?? 0}; functionWordBigrams=${features.functionWordBigrams?.length ?? 0}`;
50
+ }
51
+ if (name === "register") {
52
+ return `primary=${features.primary.value} (${features.primary.score}); alternates=${features.scores.slice(1, 4).map((score) => `${score.value}:${score.score}`).join(", ")}`;
53
+ }
54
+ if (name === "discourse") {
55
+ return `transitionRates=${Object.entries(features.transitionRates).map(([key, value]) => `${key}:${value}`).join(", ")}; sentenceCallbacks=${features.sentenceCallbacks}`;
56
+ }
57
+ if (name === "rhetoricalShape") {
58
+ return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; moveTrigrams=${features.moveTrigrams?.slice(0, 2).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
59
+ }
60
+ if (name === "evidence") {
61
+ return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; supportedClaimRate=${features.supportedClaimRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
62
+ }
63
+ if (name === "structure") {
64
+ return `sectionWords.median=${features.sectionWords.median}; headingCount.median=${features.headingCount.median}; listDocumentRate=${features.listDocumentRate}; quoteDocumentRate=${features.quoteDocumentRate}`;
65
+ }
66
+ return JSON.stringify(features);
67
+ }
@@ -0,0 +1,51 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+
4
+ const DEFAULT_MAX_READ_BYTES = 5 * 1024 * 1024;
5
+
6
+ export function readUtf8FileBounded(filePath, {
7
+ label = "File",
8
+ maxBytes = DEFAULT_MAX_READ_BYTES,
9
+ rejectBinary = true,
10
+ } = {}) {
11
+ const resolved = path.resolve(filePath);
12
+ const stats = fs.statSync(resolved);
13
+ if (!stats.isFile()) {
14
+ throw new Error(`${label} is not a regular file: ${filePath}`);
15
+ }
16
+ if (stats.size > maxBytes) {
17
+ throw new Error(`${label} ${displayPath(filePath)} exceeds the ${maxBytes} byte limit.`);
18
+ }
19
+ const contents = fs.readFileSync(resolved, "utf8");
20
+ if (rejectBinary && contents.includes("\0")) {
21
+ throw new Error(`${label} ${displayPath(filePath)} looks like binary-looking text and cannot be analyzed.`);
22
+ }
23
+ return contents;
24
+ }
25
+
26
+ export function readJsonFileBounded(filePath, options = {}) {
27
+ const label = options.label ?? "JSON file";
28
+ const contents = readUtf8FileBounded(filePath, { ...options, label });
29
+ try {
30
+ return JSON.parse(contents);
31
+ } catch (error) {
32
+ throw new Error(`${label} ${displayPath(filePath)} is not valid JSON: ${error.message}`);
33
+ }
34
+ }
35
+
36
+ export function writeUtf8FileSafely(filePath, contents) {
37
+ rejectSymlink(filePath);
38
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
39
+ rejectSymlink(filePath);
40
+ fs.writeFileSync(filePath, contents, "utf8");
41
+ }
42
+
43
+ function rejectSymlink(filePath) {
44
+ if (fs.existsSync(filePath) && fs.lstatSync(filePath).isSymbolicLink()) {
45
+ throw new Error(`Refusing to write through symlink: ${filePath}`);
46
+ }
47
+ }
48
+
49
+ function displayPath(filePath) {
50
+ return String(filePath).split(path.sep).join("/");
51
+ }
package/src/v2/profile.js CHANGED
@@ -1,203 +1,227 @@
1
- import fs from "node:fs";
2
- import path from "node:path";
3
- import { analyzeDiscourse } from "./analyzers/discourse.js";
4
- import { analyzeEvidence } from "./analyzers/evidence.js";
5
- import { analyzeLexical } from "./analyzers/lexical.js";
6
- import { analyzeRegister } from "./analyzers/register.js";
7
- import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
8
- import { analyzeRhythm } from "./analyzers/rhythm.js";
9
- import { analyzeStructure } from "./analyzers/structure.js";
10
- import { loadDocuments } from "./document-model.js";
11
- import {
12
- STYLOMETRIC_REFERENCES,
13
- defaultStyleThresholds,
14
- distanceByFamily,
15
- percentile,
16
- stabilityFromDistances,
17
- } from "./stylometry.js";
18
-
19
- export function learnVoicePackV2({ examplesDir, outDir }) {
20
- const documents = loadDocuments({ examplesDir });
21
- const profile = buildVoiceProfileV2({ documents });
22
- if (outDir) {
23
- writeVoicePackV2(outDir, profile);
24
- }
25
- return profile;
26
- }
27
-
28
- export function loadVoicePackV2(voiceDir) {
29
- const profilePath = path.join(voiceDir, "profile.json");
30
- if (!fs.existsSync(profilePath)) {
31
- throw new Error(`No Dravoice V2 profile found at ${voiceDir}`);
32
- }
33
- const profile = JSON.parse(fs.readFileSync(profilePath, "utf8"));
34
- if (profile.schemaVersion !== 2) {
35
- throw new Error(`Expected a Dravoice V2 profile at ${voiceDir}; found schemaVersion ${profile.schemaVersion ?? "unknown"}.`);
36
- }
37
- return profile;
38
- }
39
-
40
- export function buildVoiceProfileV2({ documents }) {
41
- const source = sourceSummary(documents);
42
- const families = analyzeFeatureFamilies(documents);
43
-
44
- return {
45
- schemaVersion: 2,
46
- generatedBy: "dravoice-v2",
47
- tool: { name: "Dravoice", cli: "drav" },
48
- source,
49
- families,
50
- guidance: guidanceFor({ source, families }),
51
- calibration: {
52
- featureStability: Object.fromEntries(Object.entries(families).map(([name, family]) => [name, family.confidence])),
53
- tolerances: {
54
- rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
55
- evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
56
- },
57
- styleThresholds: styleThresholdsFor(documents, families),
58
- minimumDraftSize: {
59
- words: source.confidence.band === "weak" ? 25 : 35,
60
- sentences: source.confidence.band === "weak" ? 3 : 4,
61
- },
62
- },
63
- };
64
- }
65
-
66
- function analyzeFeatureFamilies(documents) {
67
- return {
68
- rhythm: analyzeRhythm(documents),
69
- lexical: analyzeLexical(documents),
70
- register: analyzeRegister(documents),
71
- discourse: analyzeDiscourse(documents),
72
- rhetoricalShape: analyzeRhetoricalShape(documents),
73
- evidence: analyzeEvidence(documents),
74
- structure: analyzeStructure(documents),
75
- };
76
- }
77
-
78
- function styleThresholdsFor(documents, fallbackFamilies) {
79
- const fallbackThresholds = defaultStyleThresholds();
80
- const distancesByFamily = Object.fromEntries(Object.keys(fallbackFamilies).map((family) => [family, []]));
81
-
82
- if (documents.length >= 2) {
83
- for (let index = 0; index < documents.length; index += 1) {
84
- const referenceDocuments = documents.filter((_, candidateIndex) => candidateIndex !== index);
85
- const referenceFamilies = analyzeFeatureFamilies(referenceDocuments);
86
- const heldoutFamilies = analyzeFeatureFamilies([documents[index]]);
87
- for (const family of Object.keys(fallbackFamilies)) {
88
- distancesByFamily[family].push(distanceByFamily(
89
- family,
90
- referenceFamilies[family].features,
91
- heldoutFamilies[family].features,
92
- ));
93
- }
94
- }
95
- }
96
-
97
- const families = {};
98
- for (const family of Object.keys(fallbackFamilies)) {
99
- const observations = distancesByFamily[family];
100
- const observedThreshold = observations.length > 0 ? percentile(observations, 0.9) : 0;
101
- const fallback = fallbackThresholds[family].threshold;
102
- families[family] = {
103
- threshold: Math.max(0.01, Math.min(0.95, observedThreshold || fallback)),
104
- observations: observations.length,
105
- stability: stabilityFromDistances(observations),
106
- };
107
- }
108
-
109
- return {
110
- method: "leave-one-out-cosine-delta",
111
- references: STYLOMETRIC_REFERENCES,
112
- families,
113
- };
114
- }
115
-
116
- function writeVoicePackV2(outDir, profile) {
117
- fs.mkdirSync(outDir, { recursive: true });
118
- fs.writeFileSync(path.join(outDir, "profile.json"), `${JSON.stringify(profile, null, 2)}\n`, "utf8");
119
- fs.writeFileSync(
120
- path.join(outDir, ".dravoice.yml"),
121
- [
122
- "schemaVersion: 2",
123
- "generatedBy: dravoice-v2",
124
- `confidence: ${profile.source.confidence.band}`,
125
- "",
126
- ].join("\n"),
127
- "utf8",
128
- );
129
- }
130
-
131
- function sourceSummary(documents) {
132
- const wordCount = documents.reduce((sum, document) => sum + document.wordCount, 0);
133
- const sentenceCount = documents.reduce((sum, document) => sum + document.sentences.length, 0);
134
- const confidence = confidenceFor(documents.length, sentenceCount, wordCount);
135
- return {
136
- files: documents.map((document, index) => ({
137
- id: `source-${index + 1}`,
138
- extension: path.extname(document.path || document.file).toLowerCase() || ".txt",
139
- wordCount: document.wordCount,
140
- sentenceCount: document.sentences.length,
141
- })),
142
- documentCount: documents.length,
143
- wordCount,
144
- sentenceCount,
145
- genres: [],
146
- confidence,
147
- };
148
- }
149
-
150
- function confidenceFor(documentCount, sentenceCount, wordCount) {
151
- if (documentCount >= 10 && sentenceCount >= 80 && wordCount >= 5000) {
152
- return { band: "deep", message: "Enough material for stricter family-level review." };
153
- }
154
- if (documentCount >= 5 && sentenceCount >= 30) {
155
- return { band: "strong", message: "Enough examples for stable family-level review." };
156
- }
157
- if (documentCount >= 3 && sentenceCount >= 10) {
158
- return { band: "usable", message: "Enough text for prompt guidance and cautious review." };
159
- }
160
- return { band: "weak", message: "Limited corpus; use findings as weak signals." };
161
- }
162
-
163
- function guidanceFor({ source, families }) {
164
- const primaryRegister = families.register.features.primary.value;
165
- const evidenceRate = families.evidence.features.evidenceSentenceRate;
166
- const sentenceMedian = families.rhythm.features.sentenceWords.median;
167
- const opening = families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
168
-
169
- const draftingRules = [
170
- `Keep sentence pacing near the learned median of ${sentenceMedian} words when it fits the draft.`,
171
- `Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
172
- evidenceRate > 0.35
173
- ? "Anchor broad claims with concrete scenes, numbers, quotes, citations, or specific examples."
174
- : "Do not force evidence density higher than the source corpus supports.",
175
- opening
176
- ? `Prefer opening moves compatible with: ${opening}.`
177
- : "Start from the article's real subject rather than generic positioning.",
178
- ];
179
-
180
- return {
181
- summary: [
182
- `Local Dravoice V2 profile from ${source.documentCount} document(s), ${source.wordCount} words, confidence ${source.confidence.band}.`,
183
- `Primary register signal: ${primaryRegister}.`,
184
- ],
185
- draftingRules,
186
- avoid: [
187
- "Do not treat topic vocabulary as proof of voice fit.",
188
- "Do not claim a draft is or is not the writer's true voice.",
189
- "Do not invent concrete evidence to satisfy a style finding.",
190
- ],
191
- examples: [],
192
- };
193
- }
194
-
195
- function toleranceFor(band, deep, strong, weak) {
196
- if (band === "deep") {
197
- return deep;
198
- }
199
- if (band === "strong") {
200
- return strong;
201
- }
202
- return weak;
203
- }
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { analyzeDiscourse } from "./analyzers/discourse.js";
4
+ import { analyzeEvidence } from "./analyzers/evidence.js";
5
+ import { analyzeLexical } from "./analyzers/lexical.js";
6
+ import { analyzeRegister } from "./analyzers/register.js";
7
+ import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
8
+ import { analyzeRhythm } from "./analyzers/rhythm.js";
9
+ import { analyzeStructure } from "./analyzers/structure.js";
10
+ import { loadDocuments } from "./document-model.js";
11
+ import { readJsonFileBounded, writeUtf8FileSafely } from "./io-utils.js";
12
+ import {
13
+ STYLOMETRIC_REFERENCES,
14
+ defaultStyleThresholds,
15
+ distanceByFamily,
16
+ percentile,
17
+ stabilityFromDistances,
18
+ } from "./stylometry.js";
19
+
20
+ export function learnVoicePackV2({ examplesDir, outDir, excludePaths = [] }) {
21
+ const documents = loadDocuments({ examplesDir, excludePaths });
22
+ const profile = buildVoiceProfileV2({ documents });
23
+ if (outDir) {
24
+ writeVoicePackV2(outDir, profile);
25
+ }
26
+ return profile;
27
+ }
28
+
29
+ export function loadVoicePackV2(voiceDir) {
30
+ const profilePath = path.join(voiceDir, "profile.json");
31
+ if (!fs.existsSync(profilePath)) {
32
+ throw new Error(`No Dravoice V2 profile found at ${voiceDir}`);
33
+ }
34
+ const profile = readJsonFileBounded(profilePath, { label: "Dravoice profile", maxBytes: 2 * 1024 * 1024 });
35
+ if (profile.schemaVersion !== 2) {
36
+ throw new Error(`Expected a Dravoice V2 profile at ${voiceDir}; found schemaVersion ${profile.schemaVersion ?? "unknown"}.`);
37
+ }
38
+ validateVoiceProfileV2(profile, voiceDir);
39
+ return profile;
40
+ }
41
+
42
+ export function buildVoiceProfileV2({ documents }) {
43
+ const source = sourceSummary(documents);
44
+ const families = analyzeFeatureFamilies(documents);
45
+
46
+ return {
47
+ schemaVersion: 2,
48
+ generatedBy: "dravoice-v2",
49
+ tool: { name: "Dravoice", cli: "drav" },
50
+ source,
51
+ families,
52
+ guidance: guidanceFor({ source, families }),
53
+ calibration: {
54
+ featureStability: Object.fromEntries(Object.entries(families).map(([name, family]) => [name, family.confidence])),
55
+ tolerances: {
56
+ rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
57
+ evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
58
+ },
59
+ styleThresholds: styleThresholdsFor(documents, families),
60
+ minimumDraftSize: {
61
+ words: source.confidence.band === "weak" ? 25 : 35,
62
+ sentences: source.confidence.band === "weak" ? 3 : 4,
63
+ },
64
+ },
65
+ };
66
+ }
67
+
68
+ function analyzeFeatureFamilies(documents) {
69
+ return {
70
+ rhythm: analyzeRhythm(documents),
71
+ lexical: analyzeLexical(documents),
72
+ register: analyzeRegister(documents),
73
+ discourse: analyzeDiscourse(documents),
74
+ rhetoricalShape: analyzeRhetoricalShape(documents),
75
+ evidence: analyzeEvidence(documents),
76
+ structure: analyzeStructure(documents),
77
+ };
78
+ }
79
+
80
+ function styleThresholdsFor(documents, fallbackFamilies) {
81
+ const fallbackThresholds = defaultStyleThresholds();
82
+ const distancesByFamily = Object.fromEntries(Object.keys(fallbackFamilies).map((family) => [family, []]));
83
+
84
+ if (documents.length >= 2) {
85
+ for (let index = 0; index < documents.length; index += 1) {
86
+ const referenceDocuments = documents.filter((_, candidateIndex) => candidateIndex !== index);
87
+ const referenceFamilies = analyzeFeatureFamilies(referenceDocuments);
88
+ const heldoutFamilies = analyzeFeatureFamilies([documents[index]]);
89
+ for (const family of Object.keys(fallbackFamilies)) {
90
+ distancesByFamily[family].push(distanceByFamily(
91
+ family,
92
+ referenceFamilies[family].features,
93
+ heldoutFamilies[family].features,
94
+ ));
95
+ }
96
+ }
97
+ }
98
+
99
+ const families = {};
100
+ for (const family of Object.keys(fallbackFamilies)) {
101
+ const observations = distancesByFamily[family];
102
+ const observedThreshold = observations.length > 0 ? percentile(observations, 0.9) : 0;
103
+ const fallback = fallbackThresholds[family].threshold;
104
+ families[family] = {
105
+ threshold: Math.max(0.01, Math.min(0.95, observedThreshold || fallback)),
106
+ observations: observations.length,
107
+ stability: stabilityFromDistances(observations),
108
+ };
109
+ }
110
+
111
+ return {
112
+ method: "leave-one-out-cosine-delta",
113
+ references: STYLOMETRIC_REFERENCES,
114
+ families,
115
+ };
116
+ }
117
+
118
+ function writeVoicePackV2(outDir, profile) {
119
+ fs.mkdirSync(outDir, { recursive: true });
120
+ writeUtf8FileSafely(path.join(outDir, "profile.json"), `${JSON.stringify(profile, null, 2)}\n`);
121
+ writeUtf8FileSafely(
122
+ path.join(outDir, ".dravoice.yml"),
123
+ [
124
+ "schemaVersion: 2",
125
+ "generatedBy: dravoice-v2",
126
+ `confidence: ${profile.source.confidence.band}`,
127
+ "",
128
+ ].join("\n"),
129
+ );
130
+ }
131
+
132
+ function validateVoiceProfileV2(profile, voiceDir) {
133
+ assertObject(profile.source, "profile.source", voiceDir);
134
+ assertObject(profile.source.confidence, "profile.source.confidence", voiceDir);
135
+ assertObject(profile.families, "profile.families", voiceDir);
136
+ for (const family of ["rhythm", "lexical", "register", "discourse", "rhetoricalShape", "evidence", "structure"]) {
137
+ assertObject(profile.families[family], `profile.families.${family}`, voiceDir);
138
+ assertObject(profile.families[family].features, `profile.families.${family}.features`, voiceDir);
139
+ }
140
+ assertObject(profile.guidance, "profile.guidance", voiceDir);
141
+ if (!Array.isArray(profile.guidance.summary) || !Array.isArray(profile.guidance.draftingRules) || !Array.isArray(profile.guidance.avoid)) {
142
+ throw new Error(`Invalid Dravoice V2 profile at ${voiceDir}: profile.guidance must include summary, draftingRules, and avoid arrays.`);
143
+ }
144
+ assertObject(profile.calibration, "profile.calibration", voiceDir);
145
+ assertObject(profile.calibration.minimumDraftSize, "profile.calibration.minimumDraftSize", voiceDir);
146
+ assertObject(profile.calibration.styleThresholds, "profile.calibration.styleThresholds", voiceDir);
147
+ }
148
+
149
+ function assertObject(value, field, voiceDir) {
150
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
151
+ throw new Error(`Invalid Dravoice V2 profile at ${voiceDir}: missing ${field}.`);
152
+ }
153
+ }
154
+
155
+ function sourceSummary(documents) {
156
+ const wordCount = documents.reduce((sum, document) => sum + document.wordCount, 0);
157
+ const sentenceCount = documents.reduce((sum, document) => sum + document.sentences.length, 0);
158
+ const confidence = confidenceFor(documents.length, sentenceCount, wordCount);
159
+ return {
160
+ files: documents.map((document, index) => ({
161
+ id: `source-${index + 1}`,
162
+ extension: path.extname(document.path || document.file).toLowerCase() || ".txt",
163
+ wordCount: document.wordCount,
164
+ sentenceCount: document.sentences.length,
165
+ })),
166
+ documentCount: documents.length,
167
+ wordCount,
168
+ sentenceCount,
169
+ genres: [],
170
+ confidence,
171
+ };
172
+ }
173
+
174
+ function confidenceFor(documentCount, sentenceCount, wordCount) {
175
+ if (documentCount >= 10 && sentenceCount >= 80 && wordCount >= 5000) {
176
+ return { band: "deep", message: "Enough material for stricter family-level review." };
177
+ }
178
+ if (documentCount >= 5 && sentenceCount >= 30) {
179
+ return { band: "strong", message: "Enough examples for stable family-level review." };
180
+ }
181
+ if (documentCount >= 3 && sentenceCount >= 10) {
182
+ return { band: "usable", message: "Enough text for prompt guidance and cautious review." };
183
+ }
184
+ return { band: "weak", message: "Limited corpus; use findings as weak signals." };
185
+ }
186
+
187
+ function guidanceFor({ source, families }) {
188
+ const primaryRegister = families.register.features.primary.value;
189
+ const evidenceRate = families.evidence.features.evidenceSentenceRate;
190
+ const sentenceMedian = families.rhythm.features.sentenceWords.median;
191
+ const opening = families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
192
+
193
+ const draftingRules = [
194
+ `Keep sentence pacing near the learned median of ${sentenceMedian} words when it fits the draft.`,
195
+ `Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
196
+ evidenceRate > 0.35
197
+ ? "Anchor broad claims with concrete scenes, numbers, quotes, citations, or specific examples."
198
+ : "Do not force evidence density higher than the source corpus supports.",
199
+ opening
200
+ ? `Prefer opening moves compatible with: ${opening}.`
201
+ : "Start from the article's real subject rather than generic positioning.",
202
+ ];
203
+
204
+ return {
205
+ summary: [
206
+ `Local Dravoice V2 profile from ${source.documentCount} document(s), ${source.wordCount} words, confidence ${source.confidence.band}.`,
207
+ `Primary register signal: ${primaryRegister}.`,
208
+ ],
209
+ draftingRules,
210
+ avoid: [
211
+ "Do not treat topic vocabulary as proof of voice fit.",
212
+ "Do not claim a draft is or is not the writer's true voice.",
213
+ "Do not invent concrete evidence to satisfy a style finding.",
214
+ ],
215
+ examples: [],
216
+ };
217
+ }
218
+
219
+ function toleranceFor(band, deep, strong, weak) {
220
+ if (band === "deep") {
221
+ return deep;
222
+ }
223
+ if (band === "strong") {
224
+ return strong;
225
+ }
226
+ return weak;
227
+ }