dravoice 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,30 +5,22 @@ import { voicePromptPackV2 } from "./prompt.js";
5
5
  import { reviewVoiceDraftV2 } from "./review.js";
6
6
  import { clampScore } from "./text-utils.js";
7
7
 
8
- const BENCHMARK_SCHEMA_VERSION = 2;
9
- const GENERATED_BY = "dravoice-v2-benchmark";
10
- const VOICE_EXTENSIONS = new Set([".md", ".mdx", ".txt"]);
11
- const SKIP_DIRS = new Set([".git", "dist", "build", "node_modules", ".venv", "venv", "__pycache__"]);
12
- const SOURCE_SNAPSHOT_MARKER = ".dravoice-benchmark-sources";
13
- const BENCHMARK_ARTIFACT_DIRS = new Set(["prompts", "drafts", "judge"]);
14
-
15
- export function prepareVoiceBenchmark({ examplesDir, topic, outDir, seed = 1, cwd = process.cwd() }) {
16
- const examplesRoot = path.resolve(resolvePath(cwd, examplesDir));
17
- const outputRoot = path.resolve(resolvePath(cwd, outDir));
18
- const normalizedSeed = normalizeSeed(seed);
19
- const sourceFiles = walkVoiceFiles(examplesRoot);
20
- if (sourceFiles.length === 0) {
21
- throw new Error(`No Markdown, MDX, or text examples found at ${examplesDir}`);
22
- }
23
-
24
- const voicePackDir = path.join(outputRoot, "voice-pack");
25
- const promptsDir = path.join(outputRoot, "prompts");
26
- const draftsDir = path.join(outputRoot, "drafts");
27
- fs.mkdirSync(promptsDir, { recursive: true });
28
- fs.mkdirSync(draftsDir, { recursive: true });
29
-
30
- const profile = learnVoicePackV2({ examplesDir: examplesRoot, outDir: voicePackDir });
31
- const sources = sourceFiles.map((filePath, index) => sourceArticle(filePath, examplesRoot, index));
8
+ const BENCHMARK_SCHEMA_VERSION = 2;
9
+ const GENERATED_BY = "dravoice-v2-benchmark";
10
+
11
+ export function prepareVoiceBenchmark({ examplesDir, topic, outDir, seed = 1, cwd = process.cwd() }) {
12
+ const examplesRoot = path.resolve(resolvePath(cwd, examplesDir));
13
+ const outputRoot = path.resolve(resolvePath(cwd, outDir));
14
+ const normalizedSeed = normalizeSeed(seed);
15
+
16
+ const voicePackDir = path.join(outputRoot, "voice-pack");
17
+ const promptsDir = path.join(outputRoot, "prompts");
18
+ const draftsDir = path.join(outputRoot, "drafts");
19
+ fs.mkdirSync(promptsDir, { recursive: true });
20
+ fs.mkdirSync(draftsDir, { recursive: true });
21
+
22
+ const profile = learnVoicePackV2({ examplesDir: examplesRoot, outDir: voicePackDir });
23
+ const sources = benchmarkSourcesFromProfile(profile);
32
24
  const blind = blindMapping(normalizedSeed);
33
25
  const benchmark = {
34
26
  schemaVersion: BENCHMARK_SCHEMA_VERSION,
@@ -538,58 +530,56 @@ function assertObject(value, field) {
538
530
  }
539
531
  }
540
532
 
541
- function sourceArticle(filePath, rootDir, index = 0) {
542
- const contents = fs.readFileSync(filePath, "utf8");
543
- const extension = path.extname(filePath).toLowerCase() || ".txt";
544
- return {
545
- id: `source-${index + 1}`,
546
- path: `source-${index + 1}${extension}`,
547
- title: `Source ${index + 1}`,
548
- words: (contents.match(/[a-z][a-z0-9'-]*/gi) ?? []).length,
549
- };
550
- }
551
-
552
- function benchmarkSources(benchmark, resolved) {
553
- return resolved.corpus.files.map((file, index) => ({
554
- id: file.id ?? `source-${index + 1}`,
555
- path: file.path,
556
- title: file.title ?? `Source ${index + 1}`,
557
- words: file.words,
558
- }));
559
- }
560
-
561
- function benchmarkDrafts(benchmark, resolved) {
562
- return ["A", "B"].map((label) => {
563
- const draftPath = benchmark.blind.draftFiles[label];
564
- const resolvedDraftPath = resolved.blind.draftFiles[label];
565
- return {
566
- label,
567
- path: draftPath,
568
- words: fs.existsSync(resolvedDraftPath)
569
- ? (fs.readFileSync(resolvedDraftPath, "utf8").match(/[a-z][a-z0-9'-]*/gi) ?? []).length
570
- : 0,
571
- };
572
- });
573
- }
574
-
575
- function blindMapping(seed) {
576
- const baselineIsA = seededRandom(seed) < 0.5;
577
- const labels = {
578
- baseline: baselineIsA ? "A" : "B",
579
- voiceAssisted: baselineIsA ? "B" : "A",
580
- };
581
- return {
582
- labels,
583
- drafts: {
584
- A: baselineIsA ? "baseline" : "voice-assisted",
585
- B: baselineIsA ? "voice-assisted" : "baseline",
586
- },
587
- draftFiles: {
588
- A: baselineIsA ? "drafts/baseline.md" : "drafts/voice-assisted.md",
589
- B: baselineIsA ? "drafts/voice-assisted.md" : "drafts/baseline.md",
590
- },
591
- };
592
- }
533
+ function benchmarkSourcesFromProfile(profile) {
534
+ return profile.source.files.map((file, index) => ({
535
+ id: file.id ?? `source-${index + 1}`,
536
+ path: `source-${index + 1}${file.extension || ".txt"}`,
537
+ title: `Source ${index + 1}`,
538
+ words: file.wordCount,
539
+ }));
540
+ }
541
+
542
+ function benchmarkSources(benchmark, resolved) {
543
+ return resolved.corpus.files.map((file, index) => ({
544
+ id: file.id ?? `source-${index + 1}`,
545
+ path: file.path,
546
+ title: file.title ?? `Source ${index + 1}`,
547
+ words: file.words,
548
+ }));
549
+ }
550
+
551
+ function benchmarkDrafts(benchmark, resolved) {
552
+ return ["A", "B"].map((label) => {
553
+ const draftPath = benchmark.blind.draftFiles[label];
554
+ const resolvedDraftPath = resolved.blind.draftFiles[label];
555
+ return {
556
+ label,
557
+ path: draftPath,
558
+ words: fs.existsSync(resolvedDraftPath)
559
+ ? (fs.readFileSync(resolvedDraftPath, "utf8").match(/[a-z][a-z0-9'-]*/gi) ?? []).length
560
+ : 0,
561
+ };
562
+ });
563
+ }
564
+
565
+ function blindMapping(seed) {
566
+ const baselineIsA = seededRandom(seed) < 0.5;
567
+ const labels = {
568
+ baseline: baselineIsA ? "A" : "B",
569
+ voiceAssisted: baselineIsA ? "B" : "A",
570
+ };
571
+ return {
572
+ labels,
573
+ drafts: {
574
+ A: baselineIsA ? "baseline" : "voice-assisted",
575
+ B: baselineIsA ? "voice-assisted" : "baseline",
576
+ },
577
+ draftFiles: {
578
+ A: baselineIsA ? "drafts/baseline.md" : "drafts/voice-assisted.md",
579
+ B: baselineIsA ? "drafts/voice-assisted.md" : "drafts/baseline.md",
580
+ },
581
+ };
582
+ }
593
583
 
594
584
  function seededRandom(seed) {
595
585
  const next = (normalizeSeed(seed) * 1664525 + 1013904223) >>> 0;
@@ -608,66 +598,7 @@ function normalizeSeed(seed) {
608
598
  return Number(parsed);
609
599
  }
610
600
 
611
- function walkVoiceFiles(rootDir) {
612
- if (!fs.existsSync(rootDir)) {
613
- return [];
614
- }
615
- const result = [];
616
- for (const entry of fs.readdirSync(rootDir, { withFileTypes: true }).sort((a, b) => a.name.localeCompare(b.name))) {
617
- const fullPath = path.join(rootDir, entry.name);
618
- if (entry.isDirectory()) {
619
- if (!isSkippedVoiceDir(fullPath, entry.name)) {
620
- result.push(...walkVoiceFiles(fullPath));
621
- }
622
- } else if (VOICE_EXTENSIONS.has(path.extname(entry.name).toLowerCase()) && !isGeneratedBenchmarkMarkdown(fullPath)) {
623
- result.push(fullPath);
624
- }
625
- }
626
- return result.sort((a, b) => toPosix(path.relative(rootDir, a)).localeCompare(toPosix(path.relative(rootDir, b))));
627
- }
628
-
629
- function isSkippedVoiceDir(dirPath, name) {
630
- if (SKIP_DIRS.has(name)) {
631
- return true;
632
- }
633
- if (name === "voice-pack" || name === "dravoice-voice" || name === ".dravoice") {
634
- return true;
635
- }
636
- if (fs.existsSync(path.join(dirPath, SOURCE_SNAPSHOT_MARKER))) {
637
- return true;
638
- }
639
- if (isBenchmarkRunDir(dirPath)) {
640
- return true;
641
- }
642
- if (BENCHMARK_ARTIFACT_DIRS.has(name) && isBenchmarkRunDir(path.dirname(dirPath))) {
643
- return true;
644
- }
645
- return fs.existsSync(path.join(dirPath, "profile.json"));
646
- }
647
-
648
- function isBenchmarkRunDir(dirPath) {
649
- const benchmarkPath = path.join(dirPath, "benchmark.json");
650
- if (!fs.existsSync(benchmarkPath)) {
651
- return false;
652
- }
653
- try {
654
- const benchmark = JSON.parse(fs.readFileSync(benchmarkPath, "utf8"));
655
- return benchmark?.generatedBy === GENERATED_BY;
656
- } catch {
657
- return false;
658
- }
659
- }
660
-
661
- function isGeneratedBenchmarkMarkdown(filePath) {
662
- const basename = path.basename(filePath).toLowerCase();
663
- if (!["baseline-writer.md", "voice-writer.md", "judge.md", "report.md"].includes(basename)) {
664
- return false;
665
- }
666
- const contents = fs.readFileSync(filePath, "utf8");
667
- return /^# (?:Baseline Writer Prompt|Voice-Assisted Writer Prompt|Blind V2 Voice Benchmark Judge Prompt|Dravoice V2 Voice Benchmark Report)/m.test(contents);
668
- }
669
-
670
- function writeIfMissing(filePath, contents) {
601
+ function writeIfMissing(filePath, contents) {
671
602
  if (!fs.existsSync(filePath)) {
672
603
  fs.writeFileSync(filePath, contents, "utf8");
673
604
  }
package/src/v2/brief.js CHANGED
@@ -1,146 +1,146 @@
1
- import fs from "node:fs";
2
- import path from "node:path";
3
- import { evidenceTypes } from "./analyzers/evidence.js";
4
- import { parseDocument } from "./document-model.js";
5
- import { loadVoicePackV2 } from "./profile.js";
6
-
7
- export function voiceArticleBriefV2({ voice, topic, evidence, cwd = process.cwd() }) {
8
- const profile = typeof voice === "string" ? loadVoicePackV2(resolvePath(cwd, voice)) : voice;
9
- const evidenceResult = evidence ? evidenceAnchorsFromFile({ evidence, cwd }) : {
10
- source: null,
11
- anchors: [],
12
- };
13
-
14
- return {
15
- schemaVersion: 2,
16
- generatedBy: "dravoice-v2-brief",
17
- topic,
18
- voice: {
19
- corpusConfidence: profile.source.confidence,
20
- sourceFileCount: profile.source.documentCount,
21
- sourceWordCount: profile.source.wordCount,
22
- primaryRegister: profile.families.register.features.primary.value,
23
- evidenceSentenceRate: profile.families.evidence.features.evidenceSentenceRate,
24
- featureFamilies: Object.keys(profile.families),
25
- draftingRules: profile.guidance.draftingRules.slice(0, 5),
26
- },
27
- workingThesis: `Draft a grounded article about ${topic}. Let the supplied evidence set the size of each claim before broadening the lesson.`,
28
- evidence: evidenceResult,
29
- missingEvidence: missingEvidenceFor({ topic, evidenceAnchors: evidenceResult.anchors }),
30
- outline: outlineFor(profile),
31
- voiceCautions: [
32
- ...profile.guidance.avoid,
33
- "Mark unsupported claims as [specific evidence needed] instead of inventing proof.",
34
- ].slice(0, 5),
35
- };
36
- }
37
-
38
- export function renderVoiceBriefV2(brief) {
39
- const lines = [
40
- `# Article Brief: ${brief.topic}`,
41
- "",
42
- "## Voice Source",
43
- "",
44
- `- Corpus confidence: ${capitalize(brief.voice.corpusConfidence.band)} - ${brief.voice.corpusConfidence.message}`,
45
- `- Source files: ${brief.voice.sourceFileCount}`,
46
- `- Primary register: ${brief.voice.primaryRegister}`,
47
- `- Evidence sentence rate: ${brief.voice.evidenceSentenceRate}`,
48
- "",
49
- "## Working Thesis",
50
- "",
51
- `- ${brief.workingThesis}`,
52
- "",
53
- "## Evidence Anchors",
54
- "",
55
- ];
56
-
57
- if (brief.evidence.anchors.length) {
58
- for (const item of brief.evidence.anchors) {
59
- const typeList = item.types.length ? ` (${item.types.join(", ")})` : "";
60
- lines.push(`- ${brief.evidence.source}:${item.line}${typeList} - ${item.text}`);
61
- }
62
- } else {
63
- lines.push("- [specific evidence needed] Add notes, dates, quotes, examples, or source links before drafting broad claims.");
64
- }
65
-
66
- lines.push("", "## Missing Evidence", "");
67
- lines.push(...brief.missingEvidence.map((item) => `- ${item}`));
68
-
69
- lines.push("", "## Outline", "");
70
- lines.push(...brief.outline.map((item, index) => `${index + 1}. ${item}`));
71
-
72
- lines.push("", "## Voice Cautions", "");
73
- lines.push(...brief.voiceCautions.map((item) => `- ${item}`));
74
-
75
- lines.push(
76
- "",
77
- "## Drafting Prompt",
78
- "",
79
- `Write the article about ${brief.topic} using the evidence anchors above. Keep claims close to concrete support, follow the voice cautions, and write [specific evidence needed] anywhere the brief does not supply enough ground.`,
80
- "",
81
- );
82
-
83
- return lines.join("\n");
84
- }
85
-
86
- function evidenceAnchorsFromFile({ evidence, cwd }) {
87
- const evidencePath = resolvePath(cwd, evidence);
88
- const contents = fs.readFileSync(evidencePath, "utf8");
89
- const document = parseDocument({
90
- filePath: evidencePath,
91
- rootDir: cwd,
92
- contents,
93
- });
94
-
95
- return {
96
- source: displayPath(evidencePath, cwd),
97
- anchors: document.sentences
98
- .map((sentence) => ({
99
- line: sentence.line,
100
- text: sentence.text,
101
- types: evidenceTypes(sentence.text),
102
- }))
103
- .filter((sentence) => sentence.types.length > 0)
104
- .slice(0, 8),
105
- };
106
- }
107
-
108
- function missingEvidenceFor({ topic, evidenceAnchors }) {
109
- const items = [
110
- `Add [specific evidence needed] for the central claim about ${topic}.`,
111
- "Add [specific evidence needed] for any number, date, quote, source, or example the article depends on.",
112
- ];
113
- if (evidenceAnchors.length === 0) {
114
- items.unshift("No evidence anchors were detected; collect concrete notes before asking for a full draft.");
115
- }
116
- return items;
117
- }
118
-
119
- function outlineFor(profile) {
120
- const opening = profile.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
121
- const sentenceMedian = profile.families.rhythm.features.sentenceWords.median;
122
- return [
123
- opening
124
- ? `Start from a concrete artifact or observation, keeping the opening shape compatible with: ${opening}.`
125
- : "Start from a concrete artifact or observation before making the larger claim.",
126
- "Name the pressure, question, or practical stakes that make the evidence matter.",
127
- `Develop the article in the learned register with sentence pacing near the ${sentenceMedian}-word median where it fits.`,
128
- "Close by returning to the evidence and leaving the reader with a practical handle, not a generic conclusion.",
129
- ];
130
- }
131
-
132
- function resolvePath(cwd, value) {
133
- return path.isAbsolute(value) ? value : path.join(cwd, value);
134
- }
135
-
136
- function displayPath(filePath, rootDir) {
137
- const relative = path.relative(rootDir, filePath);
138
- if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
139
- return relative.split(path.sep).join("/");
140
- }
141
- return filePath.split(path.sep).join("/");
142
- }
143
-
144
- function capitalize(value) {
145
- return String(value ?? "").charAt(0).toUpperCase() + String(value ?? "").slice(1);
146
- }
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { evidenceTypes } from "./analyzers/evidence.js";
4
+ import { parseDocument } from "./document-model.js";
5
+ import { loadVoicePackV2 } from "./profile.js";
6
+
7
+ export function voiceArticleBriefV2({ voice, topic, evidence, cwd = process.cwd() }) {
8
+ const profile = typeof voice === "string" ? loadVoicePackV2(resolvePath(cwd, voice)) : voice;
9
+ const evidenceResult = evidence ? evidenceAnchorsFromFile({ evidence, cwd }) : {
10
+ source: null,
11
+ anchors: [],
12
+ };
13
+
14
+ return {
15
+ schemaVersion: 2,
16
+ generatedBy: "dravoice-v2-brief",
17
+ topic,
18
+ voice: {
19
+ corpusConfidence: profile.source.confidence,
20
+ sourceFileCount: profile.source.documentCount,
21
+ sourceWordCount: profile.source.wordCount,
22
+ primaryRegister: profile.families.register.features.primary.value,
23
+ evidenceSentenceRate: profile.families.evidence.features.evidenceSentenceRate,
24
+ featureFamilies: Object.keys(profile.families),
25
+ draftingRules: profile.guidance.draftingRules.slice(0, 5),
26
+ },
27
+ workingThesis: `Draft a grounded article about ${topic}. Let the supplied evidence set the size of each claim before broadening the lesson.`,
28
+ evidence: evidenceResult,
29
+ missingEvidence: missingEvidenceFor({ topic, evidenceAnchors: evidenceResult.anchors }),
30
+ outline: outlineFor(profile),
31
+ voiceCautions: [
32
+ ...profile.guidance.avoid,
33
+ "Mark unsupported claims as [specific evidence needed] instead of inventing proof.",
34
+ ].slice(0, 5),
35
+ };
36
+ }
37
+
38
+ export function renderVoiceBriefV2(brief) {
39
+ const lines = [
40
+ `# Article Brief: ${brief.topic}`,
41
+ "",
42
+ "## Voice Source",
43
+ "",
44
+ `- Corpus confidence: ${capitalize(brief.voice.corpusConfidence.band)} - ${brief.voice.corpusConfidence.message}`,
45
+ `- Source files: ${brief.voice.sourceFileCount}`,
46
+ `- Primary register: ${brief.voice.primaryRegister}`,
47
+ `- Evidence sentence rate: ${brief.voice.evidenceSentenceRate}`,
48
+ "",
49
+ "## Working Thesis",
50
+ "",
51
+ `- ${brief.workingThesis}`,
52
+ "",
53
+ "## Evidence Anchors",
54
+ "",
55
+ ];
56
+
57
+ if (brief.evidence.anchors.length) {
58
+ for (const item of brief.evidence.anchors) {
59
+ const typeList = item.types.length ? ` (${item.types.join(", ")})` : "";
60
+ lines.push(`- ${brief.evidence.source}:${item.line}${typeList} - ${item.text}`);
61
+ }
62
+ } else {
63
+ lines.push("- [specific evidence needed] Add notes, dates, quotes, examples, or source links before drafting broad claims.");
64
+ }
65
+
66
+ lines.push("", "## Missing Evidence", "");
67
+ lines.push(...brief.missingEvidence.map((item) => `- ${item}`));
68
+
69
+ lines.push("", "## Outline", "");
70
+ lines.push(...brief.outline.map((item, index) => `${index + 1}. ${item}`));
71
+
72
+ lines.push("", "## Voice Cautions", "");
73
+ lines.push(...brief.voiceCautions.map((item) => `- ${item}`));
74
+
75
+ lines.push(
76
+ "",
77
+ "## Drafting Prompt",
78
+ "",
79
+ `Write the article about ${brief.topic} using the evidence anchors above. Keep claims close to concrete support, follow the voice cautions, and write [specific evidence needed] anywhere the brief does not supply enough ground.`,
80
+ "",
81
+ );
82
+
83
+ return lines.join("\n");
84
+ }
85
+
86
+ function evidenceAnchorsFromFile({ evidence, cwd }) {
87
+ const evidencePath = resolvePath(cwd, evidence);
88
+ const contents = fs.readFileSync(evidencePath, "utf8");
89
+ const document = parseDocument({
90
+ filePath: evidencePath,
91
+ rootDir: cwd,
92
+ contents,
93
+ });
94
+
95
+ return {
96
+ source: displayPath(evidencePath, cwd),
97
+ anchors: document.sentences
98
+ .map((sentence) => ({
99
+ line: sentence.line,
100
+ text: sentence.text,
101
+ types: evidenceTypes(sentence.text),
102
+ }))
103
+ .filter((sentence) => sentence.types.length > 0)
104
+ .slice(0, 8),
105
+ };
106
+ }
107
+
108
+ function missingEvidenceFor({ topic, evidenceAnchors }) {
109
+ const items = [
110
+ `Add [specific evidence needed] for the central claim about ${topic}.`,
111
+ "Add [specific evidence needed] for any number, date, quote, source, or example the article depends on.",
112
+ ];
113
+ if (evidenceAnchors.length === 0) {
114
+ items.unshift("No evidence anchors were detected; collect concrete notes before asking for a full draft.");
115
+ }
116
+ return items;
117
+ }
118
+
119
+ function outlineFor(profile) {
120
+ const opening = profile.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
121
+ const sentenceMedian = profile.families.rhythm.features.sentenceWords.median;
122
+ return [
123
+ opening
124
+ ? `Start from a concrete artifact or observation, keeping the opening shape compatible with: ${opening}.`
125
+ : "Start from a concrete artifact or observation before making the larger claim.",
126
+ "Name the pressure, question, or practical stakes that make the evidence matter.",
127
+ `Develop the article in the learned register with sentence pacing near the ${sentenceMedian}-word median where it fits.`,
128
+ "Close by returning to the evidence and leaving the reader with a practical handle, not a generic conclusion.",
129
+ ];
130
+ }
131
+
132
+ function resolvePath(cwd, value) {
133
+ return path.isAbsolute(value) ? value : path.join(cwd, value);
134
+ }
135
+
136
+ function displayPath(filePath, rootDir) {
137
+ const relative = path.relative(rootDir, filePath);
138
+ if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
139
+ return relative.split(path.sep).join("/");
140
+ }
141
+ return filePath.split(path.sep).join("/");
142
+ }
143
+
144
+ function capitalize(value) {
145
+ return String(value ?? "").charAt(0).toUpperCase() + String(value ?? "").slice(1);
146
+ }
package/src/v2/inspect.js CHANGED
@@ -46,7 +46,7 @@ function featureSummary(name, features) {
46
46
  return `sentenceWords.median=${features.sentenceWords.median}; paragraphWords.median=${features.paragraphWords.median}; listDensity=${features.listDensity}; quoteDensity=${features.quoteDensity}`;
47
47
  }
48
48
  if (name === "lexical") {
49
- return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}`;
49
+ return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}; maskedCharacterFourgrams=${features.maskedCharacterFourgrams?.length ?? 0}; functionWordBigrams=${features.functionWordBigrams?.length ?? 0}`;
50
50
  }
51
51
  if (name === "register") {
52
52
  return `primary=${features.primary.value} (${features.primary.score}); alternates=${features.scores.slice(1, 4).map((score) => `${score.value}:${score.score}`).join(", ")}`;
@@ -55,10 +55,10 @@ function featureSummary(name, features) {
55
55
  return `transitionRates=${Object.entries(features.transitionRates).map(([key, value]) => `${key}:${value}`).join(", ")}; sentenceCallbacks=${features.sentenceCallbacks}`;
56
56
  }
57
57
  if (name === "rhetoricalShape") {
58
- return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
58
+ return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; moveTrigrams=${features.moveTrigrams?.slice(0, 2).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
59
59
  }
60
60
  if (name === "evidence") {
61
- return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
61
+ return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; supportedClaimRate=${features.supportedClaimRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
62
62
  }
63
63
  if (name === "structure") {
64
64
  return `sectionWords.median=${features.sectionWords.median}; headingCount.median=${features.headingCount.median}; listDocumentRate=${features.listDocumentRate}; quoteDocumentRate=${features.quoteDocumentRate}`;
package/src/v2/profile.js CHANGED
@@ -8,6 +8,13 @@ import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
8
8
  import { analyzeRhythm } from "./analyzers/rhythm.js";
9
9
  import { analyzeStructure } from "./analyzers/structure.js";
10
10
  import { loadDocuments } from "./document-model.js";
11
+ import {
12
+ STYLOMETRIC_REFERENCES,
13
+ defaultStyleThresholds,
14
+ distanceByFamily,
15
+ percentile,
16
+ stabilityFromDistances,
17
+ } from "./stylometry.js";
11
18
 
12
19
  export function learnVoicePackV2({ examplesDir, outDir }) {
13
20
  const documents = loadDocuments({ examplesDir });
@@ -32,20 +39,12 @@ export function loadVoicePackV2(voiceDir) {
32
39
 
33
40
  export function buildVoiceProfileV2({ documents }) {
34
41
  const source = sourceSummary(documents);
35
- const families = {
36
- rhythm: analyzeRhythm(documents),
37
- lexical: analyzeLexical(documents),
38
- register: analyzeRegister(documents),
39
- discourse: analyzeDiscourse(documents),
40
- rhetoricalShape: analyzeRhetoricalShape(documents),
41
- evidence: analyzeEvidence(documents),
42
- structure: analyzeStructure(documents),
43
- };
42
+ const families = analyzeFeatureFamilies(documents);
44
43
 
45
44
  return {
46
45
  schemaVersion: 2,
47
46
  generatedBy: "dravoice-v2",
48
- tool: { name: "Dravoice", cli: "drav" },
47
+ tool: { name: "Dravoice", cli: "drav" },
49
48
  source,
50
49
  families,
51
50
  guidance: guidanceFor({ source, families }),
@@ -55,6 +54,7 @@ export function buildVoiceProfileV2({ documents }) {
55
54
  rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
56
55
  evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
57
56
  },
57
+ styleThresholds: styleThresholdsFor(documents, families),
58
58
  minimumDraftSize: {
59
59
  words: source.confidence.band === "weak" ? 25 : 35,
60
60
  sentences: source.confidence.band === "weak" ? 3 : 4,
@@ -63,6 +63,56 @@ export function buildVoiceProfileV2({ documents }) {
63
63
  };
64
64
  }
65
65
 
66
+ function analyzeFeatureFamilies(documents) {
67
+ return {
68
+ rhythm: analyzeRhythm(documents),
69
+ lexical: analyzeLexical(documents),
70
+ register: analyzeRegister(documents),
71
+ discourse: analyzeDiscourse(documents),
72
+ rhetoricalShape: analyzeRhetoricalShape(documents),
73
+ evidence: analyzeEvidence(documents),
74
+ structure: analyzeStructure(documents),
75
+ };
76
+ }
77
+
78
+ function styleThresholdsFor(documents, fallbackFamilies) {
79
+ const fallbackThresholds = defaultStyleThresholds();
80
+ const distancesByFamily = Object.fromEntries(Object.keys(fallbackFamilies).map((family) => [family, []]));
81
+
82
+ if (documents.length >= 2) {
83
+ for (let index = 0; index < documents.length; index += 1) {
84
+ const referenceDocuments = documents.filter((_, candidateIndex) => candidateIndex !== index);
85
+ const referenceFamilies = analyzeFeatureFamilies(referenceDocuments);
86
+ const heldoutFamilies = analyzeFeatureFamilies([documents[index]]);
87
+ for (const family of Object.keys(fallbackFamilies)) {
88
+ distancesByFamily[family].push(distanceByFamily(
89
+ family,
90
+ referenceFamilies[family].features,
91
+ heldoutFamilies[family].features,
92
+ ));
93
+ }
94
+ }
95
+ }
96
+
97
+ const families = {};
98
+ for (const family of Object.keys(fallbackFamilies)) {
99
+ const observations = distancesByFamily[family];
100
+ const observedThreshold = observations.length > 0 ? percentile(observations, 0.9) : 0;
101
+ const fallback = fallbackThresholds[family].threshold;
102
+ families[family] = {
103
+ threshold: Math.max(0.01, Math.min(0.95, observedThreshold || fallback)),
104
+ observations: observations.length,
105
+ stability: stabilityFromDistances(observations),
106
+ };
107
+ }
108
+
109
+ return {
110
+ method: "leave-one-out-cosine-delta",
111
+ references: STYLOMETRIC_REFERENCES,
112
+ families,
113
+ };
114
+ }
115
+
66
116
  function writeVoicePackV2(outDir, profile) {
67
117
  fs.mkdirSync(outDir, { recursive: true });
68
118
  fs.writeFileSync(path.join(outDir, "profile.json"), `${JSON.stringify(profile, null, 2)}\n`, "utf8");