dravoice 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +35 -0
- package/bin/dravoice.js +10 -0
- package/package.json +45 -0
- package/src/index.js +204 -0
- package/src/v2/analyzers/discourse.js +52 -0
- package/src/v2/analyzers/evidence.js +43 -0
- package/src/v2/analyzers/lexical.js +58 -0
- package/src/v2/analyzers/register.js +34 -0
- package/src/v2/analyzers/rhetorical-shape.js +48 -0
- package/src/v2/analyzers/rhythm.js +47 -0
- package/src/v2/analyzers/structure.js +24 -0
- package/src/v2/benchmark.js +702 -0
- package/src/v2/brief.js +146 -0
- package/src/v2/document-model.js +260 -0
- package/src/v2/inspect.js +67 -0
- package/src/v2/profile.js +153 -0
- package/src/v2/prompt.js +64 -0
- package/src/v2/review.js +219 -0
- package/src/v2/text-utils.js +123 -0
package/src/v2/brief.js
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { evidenceTypes } from "./analyzers/evidence.js";
|
|
4
|
+
import { parseDocument } from "./document-model.js";
|
|
5
|
+
import { loadVoicePackV2 } from "./profile.js";
|
|
6
|
+
|
|
7
|
+
export function voiceArticleBriefV2({ voice, topic, evidence, cwd = process.cwd() }) {
|
|
8
|
+
const profile = typeof voice === "string" ? loadVoicePackV2(resolvePath(cwd, voice)) : voice;
|
|
9
|
+
const evidenceResult = evidence ? evidenceAnchorsFromFile({ evidence, cwd }) : {
|
|
10
|
+
source: null,
|
|
11
|
+
anchors: [],
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
return {
|
|
15
|
+
schemaVersion: 2,
|
|
16
|
+
generatedBy: "dravoice-v2-brief",
|
|
17
|
+
topic,
|
|
18
|
+
voice: {
|
|
19
|
+
corpusConfidence: profile.source.confidence,
|
|
20
|
+
sourceFileCount: profile.source.documentCount,
|
|
21
|
+
sourceWordCount: profile.source.wordCount,
|
|
22
|
+
primaryRegister: profile.families.register.features.primary.value,
|
|
23
|
+
evidenceSentenceRate: profile.families.evidence.features.evidenceSentenceRate,
|
|
24
|
+
featureFamilies: Object.keys(profile.families),
|
|
25
|
+
draftingRules: profile.guidance.draftingRules.slice(0, 5),
|
|
26
|
+
},
|
|
27
|
+
workingThesis: `Draft a grounded article about ${topic}. Let the supplied evidence set the size of each claim before broadening the lesson.`,
|
|
28
|
+
evidence: evidenceResult,
|
|
29
|
+
missingEvidence: missingEvidenceFor({ topic, evidenceAnchors: evidenceResult.anchors }),
|
|
30
|
+
outline: outlineFor(profile),
|
|
31
|
+
voiceCautions: [
|
|
32
|
+
...profile.guidance.avoid,
|
|
33
|
+
"Mark unsupported claims as [specific evidence needed] instead of inventing proof.",
|
|
34
|
+
].slice(0, 5),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function renderVoiceBriefV2(brief) {
|
|
39
|
+
const lines = [
|
|
40
|
+
`# Article Brief: ${brief.topic}`,
|
|
41
|
+
"",
|
|
42
|
+
"## Voice Source",
|
|
43
|
+
"",
|
|
44
|
+
`- Corpus confidence: ${capitalize(brief.voice.corpusConfidence.band)} - ${brief.voice.corpusConfidence.message}`,
|
|
45
|
+
`- Source files: ${brief.voice.sourceFileCount}`,
|
|
46
|
+
`- Primary register: ${brief.voice.primaryRegister}`,
|
|
47
|
+
`- Evidence sentence rate: ${brief.voice.evidenceSentenceRate}`,
|
|
48
|
+
"",
|
|
49
|
+
"## Working Thesis",
|
|
50
|
+
"",
|
|
51
|
+
`- ${brief.workingThesis}`,
|
|
52
|
+
"",
|
|
53
|
+
"## Evidence Anchors",
|
|
54
|
+
"",
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
if (brief.evidence.anchors.length) {
|
|
58
|
+
for (const item of brief.evidence.anchors) {
|
|
59
|
+
const typeList = item.types.length ? ` (${item.types.join(", ")})` : "";
|
|
60
|
+
lines.push(`- ${brief.evidence.source}:${item.line}${typeList} - ${item.text}`);
|
|
61
|
+
}
|
|
62
|
+
} else {
|
|
63
|
+
lines.push("- [specific evidence needed] Add notes, dates, quotes, examples, or source links before drafting broad claims.");
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
lines.push("", "## Missing Evidence", "");
|
|
67
|
+
lines.push(...brief.missingEvidence.map((item) => `- ${item}`));
|
|
68
|
+
|
|
69
|
+
lines.push("", "## Outline", "");
|
|
70
|
+
lines.push(...brief.outline.map((item, index) => `${index + 1}. ${item}`));
|
|
71
|
+
|
|
72
|
+
lines.push("", "## Voice Cautions", "");
|
|
73
|
+
lines.push(...brief.voiceCautions.map((item) => `- ${item}`));
|
|
74
|
+
|
|
75
|
+
lines.push(
|
|
76
|
+
"",
|
|
77
|
+
"## Drafting Prompt",
|
|
78
|
+
"",
|
|
79
|
+
`Write the article about ${brief.topic} using the evidence anchors above. Keep claims close to concrete support, follow the voice cautions, and write [specific evidence needed] anywhere the brief does not supply enough ground.`,
|
|
80
|
+
"",
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
return lines.join("\n");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function evidenceAnchorsFromFile({ evidence, cwd }) {
|
|
87
|
+
const evidencePath = resolvePath(cwd, evidence);
|
|
88
|
+
const contents = fs.readFileSync(evidencePath, "utf8");
|
|
89
|
+
const document = parseDocument({
|
|
90
|
+
filePath: evidencePath,
|
|
91
|
+
rootDir: cwd,
|
|
92
|
+
contents,
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
return {
|
|
96
|
+
source: displayPath(evidencePath, cwd),
|
|
97
|
+
anchors: document.sentences
|
|
98
|
+
.map((sentence) => ({
|
|
99
|
+
line: sentence.line,
|
|
100
|
+
text: sentence.text,
|
|
101
|
+
types: evidenceTypes(sentence.text),
|
|
102
|
+
}))
|
|
103
|
+
.filter((sentence) => sentence.types.length > 0)
|
|
104
|
+
.slice(0, 8),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function missingEvidenceFor({ topic, evidenceAnchors }) {
|
|
109
|
+
const items = [
|
|
110
|
+
`Add [specific evidence needed] for the central claim about ${topic}.`,
|
|
111
|
+
"Add [specific evidence needed] for any number, date, quote, source, or example the article depends on.",
|
|
112
|
+
];
|
|
113
|
+
if (evidenceAnchors.length === 0) {
|
|
114
|
+
items.unshift("No evidence anchors were detected; collect concrete notes before asking for a full draft.");
|
|
115
|
+
}
|
|
116
|
+
return items;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function outlineFor(profile) {
|
|
120
|
+
const opening = profile.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
|
|
121
|
+
const sentenceMedian = profile.families.rhythm.features.sentenceWords.median;
|
|
122
|
+
return [
|
|
123
|
+
opening
|
|
124
|
+
? `Start from a concrete artifact or observation, keeping the opening shape compatible with: ${opening}.`
|
|
125
|
+
: "Start from a concrete artifact or observation before making the larger claim.",
|
|
126
|
+
"Name the pressure, question, or practical stakes that make the evidence matter.",
|
|
127
|
+
`Develop the article in the learned register with sentence pacing near the ${sentenceMedian}-word median where it fits.`,
|
|
128
|
+
"Close by returning to the evidence and leaving the reader with a practical handle, not a generic conclusion.",
|
|
129
|
+
];
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function resolvePath(cwd, value) {
|
|
133
|
+
return path.isAbsolute(value) ? value : path.join(cwd, value);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function displayPath(filePath, rootDir) {
|
|
137
|
+
const relative = path.relative(rootDir, filePath);
|
|
138
|
+
if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
|
|
139
|
+
return relative.split(path.sep).join("/");
|
|
140
|
+
}
|
|
141
|
+
return filePath.split(path.sep).join("/");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function capitalize(value) {
|
|
145
|
+
return String(value ?? "").charAt(0).toUpperCase() + String(value ?? "").slice(1);
|
|
146
|
+
}
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { normalizeText, splitSentences, tokenizeWords } from "./text-utils.js";
|
|
4
|
+
|
|
5
|
+
export const VOICE_EXTENSIONS = new Set([".md", ".mdx", ".txt"]);
|
|
6
|
+
const DEFAULT_MAX_FILES = 500;
|
|
7
|
+
const DEFAULT_MAX_FILE_BYTES = 1024 * 1024;
|
|
8
|
+
const DEFAULT_MAX_TOTAL_BYTES = 20 * 1024 * 1024;
|
|
9
|
+
|
|
10
|
+
export function loadDocuments({
|
|
11
|
+
examplesDir,
|
|
12
|
+
maxFiles = DEFAULT_MAX_FILES,
|
|
13
|
+
maxFileBytes = DEFAULT_MAX_FILE_BYTES,
|
|
14
|
+
maxTotalBytes = DEFAULT_MAX_TOTAL_BYTES,
|
|
15
|
+
}) {
|
|
16
|
+
const root = path.resolve(examplesDir);
|
|
17
|
+
const files = walkVoiceFiles(root);
|
|
18
|
+
if (files.length === 0) {
|
|
19
|
+
throw new Error(`No Markdown, MDX, or text examples found at ${examplesDir}`);
|
|
20
|
+
}
|
|
21
|
+
if (files.length > maxFiles) {
|
|
22
|
+
throw new Error(`Corpus contains ${files.length} voice files, more than ${maxFiles} voice file(s) allowed.`);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
let totalBytes = 0;
|
|
26
|
+
return files.map((filePath) => {
|
|
27
|
+
const stats = fs.statSync(filePath);
|
|
28
|
+
if (stats.size > maxFileBytes) {
|
|
29
|
+
throw new Error(`Voice file ${displayPath(filePath, root)} exceeds the ${maxFileBytes} byte limit.`);
|
|
30
|
+
}
|
|
31
|
+
totalBytes += stats.size;
|
|
32
|
+
if (totalBytes > maxTotalBytes) {
|
|
33
|
+
throw new Error(`Voice corpus exceeds the ${maxTotalBytes} byte total limit.`);
|
|
34
|
+
}
|
|
35
|
+
const contents = fs.readFileSync(filePath, "utf8");
|
|
36
|
+
if (looksBinary(contents)) {
|
|
37
|
+
throw new Error(`Voice file ${displayPath(filePath, root)} looks like binary-looking text and cannot be analyzed.`);
|
|
38
|
+
}
|
|
39
|
+
return parseDocument({
|
|
40
|
+
filePath,
|
|
41
|
+
rootDir: root,
|
|
42
|
+
contents,
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function parseDocument({ filePath, rootDir = process.cwd(), contents }) {
|
|
48
|
+
const relative = displayPath(filePath, rootDir);
|
|
49
|
+
const lines = String(contents ?? "").split(/\r?\n/);
|
|
50
|
+
const headings = [];
|
|
51
|
+
const blocks = [];
|
|
52
|
+
let currentHeading = null;
|
|
53
|
+
let currentParagraph = null;
|
|
54
|
+
let inFence = false;
|
|
55
|
+
let inFrontmatter = lines[0]?.trim() === "---";
|
|
56
|
+
let inHtmlComment = false;
|
|
57
|
+
|
|
58
|
+
const flushParagraph = () => {
|
|
59
|
+
if (currentParagraph?.lines.length) {
|
|
60
|
+
blocks.push(currentParagraph);
|
|
61
|
+
}
|
|
62
|
+
currentParagraph = null;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
lines.forEach((line, index) => {
|
|
66
|
+
const lineNumber = index + 1;
|
|
67
|
+
const trimmed = line.trim();
|
|
68
|
+
|
|
69
|
+
if (inHtmlComment) {
|
|
70
|
+
if (trimmed.includes("-->")) {
|
|
71
|
+
inHtmlComment = false;
|
|
72
|
+
}
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (inFrontmatter) {
|
|
77
|
+
if (index > 0 && trimmed === "---") {
|
|
78
|
+
inFrontmatter = false;
|
|
79
|
+
}
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (/^(```|~~~)/.test(trimmed)) {
|
|
84
|
+
flushParagraph();
|
|
85
|
+
inFence = !inFence;
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (trimmed.startsWith("<!--")) {
|
|
90
|
+
flushParagraph();
|
|
91
|
+
if (!trimmed.includes("-->")) {
|
|
92
|
+
inHtmlComment = true;
|
|
93
|
+
}
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (inFence || !trimmed || isMdxScaffold(trimmed) || isTableLine(trimmed) || isJsxLike(trimmed)) {
|
|
98
|
+
if (!trimmed) {
|
|
99
|
+
flushParagraph();
|
|
100
|
+
}
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const headingMatch = /^(#{1,6})\s+(.+)$/.exec(trimmed);
|
|
105
|
+
if (headingMatch) {
|
|
106
|
+
flushParagraph();
|
|
107
|
+
currentHeading = {
|
|
108
|
+
id: `h${headings.length + 1}`,
|
|
109
|
+
depth: headingMatch[1].length,
|
|
110
|
+
text: stripMarkdown(headingMatch[2]).trim(),
|
|
111
|
+
line: lineNumber,
|
|
112
|
+
};
|
|
113
|
+
headings.push(currentHeading);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const blockType = blockTypeFor(trimmed);
|
|
118
|
+
const text = stripMarkdown(trimmed.replace(/^[-*]\s+/, "").replace(/^\d+[.)]\s+/, "").replace(/^>\s+/, ""));
|
|
119
|
+
if (blockType === "paragraph") {
|
|
120
|
+
if (!currentParagraph) {
|
|
121
|
+
currentParagraph = makeBlock({ type: "paragraph", line: lineNumber, heading: currentHeading });
|
|
122
|
+
}
|
|
123
|
+
currentParagraph.lines.push(text);
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
flushParagraph();
|
|
128
|
+
blocks.push({
|
|
129
|
+
...makeBlock({ type: blockType, line: lineNumber, heading: currentHeading }),
|
|
130
|
+
lines: [text],
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
flushParagraph();
|
|
134
|
+
|
|
135
|
+
const paragraphs = blocks.map(blockToParagraph).filter((paragraph) => paragraph.text);
|
|
136
|
+
const sentences = paragraphs.flatMap(paragraphToSentences);
|
|
137
|
+
return {
|
|
138
|
+
file: relative,
|
|
139
|
+
path: filePath,
|
|
140
|
+
headings,
|
|
141
|
+
sections: buildSections(headings, blocks),
|
|
142
|
+
blocks,
|
|
143
|
+
paragraphs,
|
|
144
|
+
sentences,
|
|
145
|
+
wordCount: sentences.reduce((sum, sentence) => sum + sentence.tokens.length, 0),
|
|
146
|
+
text: paragraphs.map((paragraph) => paragraph.text).join("\n\n"),
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function makeBlock({ type, line, heading }) {
|
|
151
|
+
return {
|
|
152
|
+
type,
|
|
153
|
+
line,
|
|
154
|
+
heading: heading?.text ?? null,
|
|
155
|
+
headingId: heading?.id ?? null,
|
|
156
|
+
headingDepth: heading?.depth ?? 0,
|
|
157
|
+
lines: [],
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function blockTypeFor(trimmed) {
|
|
162
|
+
if (/^>\s+/.test(trimmed)) {
|
|
163
|
+
return "quote";
|
|
164
|
+
}
|
|
165
|
+
if (/^[-*]\s+/.test(trimmed)) {
|
|
166
|
+
return "list";
|
|
167
|
+
}
|
|
168
|
+
if (/^\d+[.)]\s+/.test(trimmed)) {
|
|
169
|
+
return "list";
|
|
170
|
+
}
|
|
171
|
+
return "paragraph";
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function buildSections(headings, blocks) {
|
|
175
|
+
if (headings.length === 0) {
|
|
176
|
+
return [{ heading: null, blocks }];
|
|
177
|
+
}
|
|
178
|
+
return headings.map((heading, index) => {
|
|
179
|
+
const nextBoundary = headings.slice(index + 1).find((candidate) => candidate.depth <= heading.depth);
|
|
180
|
+
return {
|
|
181
|
+
heading,
|
|
182
|
+
blocks: blocks.filter((block) =>
|
|
183
|
+
block.line > heading.line &&
|
|
184
|
+
(!nextBoundary || block.line < nextBoundary.line)
|
|
185
|
+
),
|
|
186
|
+
};
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function blockToParagraph(block) {
|
|
191
|
+
return {
|
|
192
|
+
type: block.type,
|
|
193
|
+
line: block.line,
|
|
194
|
+
heading: block.heading,
|
|
195
|
+
headingId: block.headingId,
|
|
196
|
+
text: block.lines.join(" ").replace(/\s+/g, " ").trim(),
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function paragraphToSentences(paragraph) {
|
|
201
|
+
return splitSentences(paragraph.text).map((text, offset) => ({
|
|
202
|
+
text,
|
|
203
|
+
normalized: normalizeText(text),
|
|
204
|
+
line: paragraph.line + offset,
|
|
205
|
+
blockType: paragraph.type,
|
|
206
|
+
heading: paragraph.heading,
|
|
207
|
+
headingId: paragraph.headingId,
|
|
208
|
+
tokens: tokenizeWords(text),
|
|
209
|
+
}));
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function stripMarkdown(text) {
|
|
213
|
+
return text
|
|
214
|
+
.replace(/\*\*/g, "")
|
|
215
|
+
.replace(/__+/g, "")
|
|
216
|
+
.replace(/`([^`]+)`/g, "$1")
|
|
217
|
+
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function isJsxLike(trimmed) {
|
|
221
|
+
return /^<\/?[A-Za-z][^>]*>/.test(trimmed) || /^<[A-Za-z][^>]*\/>$/.test(trimmed);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function isMdxScaffold(trimmed) {
|
|
225
|
+
return /^(?:import|export)\s/.test(trimmed);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function isTableLine(trimmed) {
|
|
229
|
+
return /^\|.*\|$/.test(trimmed);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function looksBinary(contents) {
|
|
233
|
+
return String(contents ?? "").includes("\0");
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function walkVoiceFiles(rootDir) {
|
|
237
|
+
const result = [];
|
|
238
|
+
if (!fs.existsSync(rootDir)) {
|
|
239
|
+
return result;
|
|
240
|
+
}
|
|
241
|
+
for (const entry of fs.readdirSync(rootDir, { withFileTypes: true }).sort((a, b) => a.name.localeCompare(b.name))) {
|
|
242
|
+
const fullPath = path.join(rootDir, entry.name);
|
|
243
|
+
if (entry.isDirectory()) {
|
|
244
|
+
if (![".git", "node_modules", "dist", "build", "__pycache__", "prompts", "voice-pack", "dravoice-voice"].includes(entry.name)) {
|
|
245
|
+
result.push(...walkVoiceFiles(fullPath));
|
|
246
|
+
}
|
|
247
|
+
} else if (VOICE_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) {
|
|
248
|
+
result.push(fullPath);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
return result;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function displayPath(filePath, rootDir) {
|
|
255
|
+
const relative = path.relative(rootDir, filePath);
|
|
256
|
+
if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
|
|
257
|
+
return relative.split(path.sep).join("/");
|
|
258
|
+
}
|
|
259
|
+
return filePath.split(path.sep).join("/");
|
|
260
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
export function renderInspectV2(profile) {
|
|
2
|
+
const lines = [
|
|
3
|
+
"Dravoice V2 Profile",
|
|
4
|
+
"",
|
|
5
|
+
`Source: ${profile.source.documentCount} document(s), ${profile.source.wordCount} words, ${profile.source.sentenceCount} sentences`,
|
|
6
|
+
`Corpus confidence: ${capitalize(profile.source.confidence.band)} - ${profile.source.confidence.message}`,
|
|
7
|
+
"",
|
|
8
|
+
"Feature families:",
|
|
9
|
+
];
|
|
10
|
+
|
|
11
|
+
for (const [name, family] of Object.entries(profile.families)) {
|
|
12
|
+
lines.push(`- ${name}: ${family.confidence}`);
|
|
13
|
+
if (family.warnings.length) {
|
|
14
|
+
for (const warning of family.warnings) {
|
|
15
|
+
lines.push(` warning: ${warning}`);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
lines.push("", "Feature details:");
|
|
21
|
+
for (const [name, family] of Object.entries(profile.families)) {
|
|
22
|
+
lines.push(`- ${name}: ${featureSummary(name, family.features)}`);
|
|
23
|
+
if (family.revisionHandles?.length) {
|
|
24
|
+
lines.push(" Revision handles:");
|
|
25
|
+
for (const handle of family.revisionHandles) {
|
|
26
|
+
lines.push(` - ${handle}`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
lines.push("", "Guidance:");
|
|
32
|
+
for (const rule of profile.guidance.draftingRules) {
|
|
33
|
+
lines.push(`- ${rule}`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
lines.push("");
|
|
37
|
+
return `${lines.join("\n")}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function capitalize(value) {
|
|
41
|
+
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function featureSummary(name, features) {
|
|
45
|
+
if (name === "rhythm") {
|
|
46
|
+
return `sentenceWords.median=${features.sentenceWords.median}; paragraphWords.median=${features.paragraphWords.median}; listDensity=${features.listDensity}; quoteDensity=${features.quoteDensity}`;
|
|
47
|
+
}
|
|
48
|
+
if (name === "lexical") {
|
|
49
|
+
return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}`;
|
|
50
|
+
}
|
|
51
|
+
if (name === "register") {
|
|
52
|
+
return `primary=${features.primary.value} (${features.primary.score}); alternates=${features.scores.slice(1, 4).map((score) => `${score.value}:${score.score}`).join(", ")}`;
|
|
53
|
+
}
|
|
54
|
+
if (name === "discourse") {
|
|
55
|
+
return `transitionRates=${Object.entries(features.transitionRates).map(([key, value]) => `${key}:${value}`).join(", ")}; sentenceCallbacks=${features.sentenceCallbacks}`;
|
|
56
|
+
}
|
|
57
|
+
if (name === "rhetoricalShape") {
|
|
58
|
+
return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
|
|
59
|
+
}
|
|
60
|
+
if (name === "evidence") {
|
|
61
|
+
return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
|
|
62
|
+
}
|
|
63
|
+
if (name === "structure") {
|
|
64
|
+
return `sectionWords.median=${features.sectionWords.median}; headingCount.median=${features.headingCount.median}; listDocumentRate=${features.listDocumentRate}; quoteDocumentRate=${features.quoteDocumentRate}`;
|
|
65
|
+
}
|
|
66
|
+
return JSON.stringify(features);
|
|
67
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { analyzeDiscourse } from "./analyzers/discourse.js";
|
|
4
|
+
import { analyzeEvidence } from "./analyzers/evidence.js";
|
|
5
|
+
import { analyzeLexical } from "./analyzers/lexical.js";
|
|
6
|
+
import { analyzeRegister } from "./analyzers/register.js";
|
|
7
|
+
import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
|
|
8
|
+
import { analyzeRhythm } from "./analyzers/rhythm.js";
|
|
9
|
+
import { analyzeStructure } from "./analyzers/structure.js";
|
|
10
|
+
import { loadDocuments } from "./document-model.js";
|
|
11
|
+
|
|
12
|
+
export function learnVoicePackV2({ examplesDir, outDir }) {
|
|
13
|
+
const documents = loadDocuments({ examplesDir });
|
|
14
|
+
const profile = buildVoiceProfileV2({ documents });
|
|
15
|
+
if (outDir) {
|
|
16
|
+
writeVoicePackV2(outDir, profile);
|
|
17
|
+
}
|
|
18
|
+
return profile;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function loadVoicePackV2(voiceDir) {
|
|
22
|
+
const profilePath = path.join(voiceDir, "profile.json");
|
|
23
|
+
if (!fs.existsSync(profilePath)) {
|
|
24
|
+
throw new Error(`No Dravoice V2 profile found at ${voiceDir}`);
|
|
25
|
+
}
|
|
26
|
+
const profile = JSON.parse(fs.readFileSync(profilePath, "utf8"));
|
|
27
|
+
if (profile.schemaVersion !== 2) {
|
|
28
|
+
throw new Error(`Expected a Dravoice V2 profile at ${voiceDir}; found schemaVersion ${profile.schemaVersion ?? "unknown"}.`);
|
|
29
|
+
}
|
|
30
|
+
return profile;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function buildVoiceProfileV2({ documents }) {
|
|
34
|
+
const source = sourceSummary(documents);
|
|
35
|
+
const families = {
|
|
36
|
+
rhythm: analyzeRhythm(documents),
|
|
37
|
+
lexical: analyzeLexical(documents),
|
|
38
|
+
register: analyzeRegister(documents),
|
|
39
|
+
discourse: analyzeDiscourse(documents),
|
|
40
|
+
rhetoricalShape: analyzeRhetoricalShape(documents),
|
|
41
|
+
evidence: analyzeEvidence(documents),
|
|
42
|
+
structure: analyzeStructure(documents),
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
schemaVersion: 2,
|
|
47
|
+
generatedBy: "dravoice-v2",
|
|
48
|
+
tool: { name: "Dravoice", cli: "drav" },
|
|
49
|
+
source,
|
|
50
|
+
families,
|
|
51
|
+
guidance: guidanceFor({ source, families }),
|
|
52
|
+
calibration: {
|
|
53
|
+
featureStability: Object.fromEntries(Object.entries(families).map(([name, family]) => [name, family.confidence])),
|
|
54
|
+
tolerances: {
|
|
55
|
+
rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
|
|
56
|
+
evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
|
|
57
|
+
},
|
|
58
|
+
minimumDraftSize: {
|
|
59
|
+
words: source.confidence.band === "weak" ? 25 : 35,
|
|
60
|
+
sentences: source.confidence.band === "weak" ? 3 : 4,
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function writeVoicePackV2(outDir, profile) {
|
|
67
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
68
|
+
fs.writeFileSync(path.join(outDir, "profile.json"), `${JSON.stringify(profile, null, 2)}\n`, "utf8");
|
|
69
|
+
fs.writeFileSync(
|
|
70
|
+
path.join(outDir, ".dravoice.yml"),
|
|
71
|
+
[
|
|
72
|
+
"schemaVersion: 2",
|
|
73
|
+
"generatedBy: dravoice-v2",
|
|
74
|
+
`confidence: ${profile.source.confidence.band}`,
|
|
75
|
+
"",
|
|
76
|
+
].join("\n"),
|
|
77
|
+
"utf8",
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function sourceSummary(documents) {
|
|
82
|
+
const wordCount = documents.reduce((sum, document) => sum + document.wordCount, 0);
|
|
83
|
+
const sentenceCount = documents.reduce((sum, document) => sum + document.sentences.length, 0);
|
|
84
|
+
const confidence = confidenceFor(documents.length, sentenceCount, wordCount);
|
|
85
|
+
return {
|
|
86
|
+
files: documents.map((document, index) => ({
|
|
87
|
+
id: `source-${index + 1}`,
|
|
88
|
+
extension: path.extname(document.path || document.file).toLowerCase() || ".txt",
|
|
89
|
+
wordCount: document.wordCount,
|
|
90
|
+
sentenceCount: document.sentences.length,
|
|
91
|
+
})),
|
|
92
|
+
documentCount: documents.length,
|
|
93
|
+
wordCount,
|
|
94
|
+
sentenceCount,
|
|
95
|
+
genres: [],
|
|
96
|
+
confidence,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function confidenceFor(documentCount, sentenceCount, wordCount) {
|
|
101
|
+
if (documentCount >= 10 && sentenceCount >= 80 && wordCount >= 5000) {
|
|
102
|
+
return { band: "deep", message: "Enough material for stricter family-level review." };
|
|
103
|
+
}
|
|
104
|
+
if (documentCount >= 5 && sentenceCount >= 30) {
|
|
105
|
+
return { band: "strong", message: "Enough examples for stable family-level review." };
|
|
106
|
+
}
|
|
107
|
+
if (documentCount >= 3 && sentenceCount >= 10) {
|
|
108
|
+
return { band: "usable", message: "Enough text for prompt guidance and cautious review." };
|
|
109
|
+
}
|
|
110
|
+
return { band: "weak", message: "Limited corpus; use findings as weak signals." };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function guidanceFor({ source, families }) {
|
|
114
|
+
const primaryRegister = families.register.features.primary.value;
|
|
115
|
+
const evidenceRate = families.evidence.features.evidenceSentenceRate;
|
|
116
|
+
const sentenceMedian = families.rhythm.features.sentenceWords.median;
|
|
117
|
+
const opening = families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
|
|
118
|
+
|
|
119
|
+
const draftingRules = [
|
|
120
|
+
`Keep sentence pacing near the learned median of ${sentenceMedian} words when it fits the draft.`,
|
|
121
|
+
`Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
|
|
122
|
+
evidenceRate > 0.35
|
|
123
|
+
? "Anchor broad claims with concrete scenes, numbers, quotes, citations, or specific examples."
|
|
124
|
+
: "Do not force evidence density higher than the source corpus supports.",
|
|
125
|
+
opening
|
|
126
|
+
? `Prefer opening moves compatible with: ${opening}.`
|
|
127
|
+
: "Start from the article's real subject rather than generic positioning.",
|
|
128
|
+
];
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
summary: [
|
|
132
|
+
`Local Dravoice V2 profile from ${source.documentCount} document(s), ${source.wordCount} words, confidence ${source.confidence.band}.`,
|
|
133
|
+
`Primary register signal: ${primaryRegister}.`,
|
|
134
|
+
],
|
|
135
|
+
draftingRules,
|
|
136
|
+
avoid: [
|
|
137
|
+
"Do not treat topic vocabulary as proof of voice fit.",
|
|
138
|
+
"Do not claim a draft is or is not the writer's true voice.",
|
|
139
|
+
"Do not invent concrete evidence to satisfy a style finding.",
|
|
140
|
+
],
|
|
141
|
+
examples: [],
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function toleranceFor(band, deep, strong, weak) {
|
|
146
|
+
if (band === "deep") {
|
|
147
|
+
return deep;
|
|
148
|
+
}
|
|
149
|
+
if (band === "strong") {
|
|
150
|
+
return strong;
|
|
151
|
+
}
|
|
152
|
+
return weak;
|
|
153
|
+
}
|
package/src/v2/prompt.js
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { loadVoicePackV2 } from "./profile.js";
|
|
4
|
+
|
|
5
|
+
export function voicePromptPackV2({ voice, format = "agents", outPath }) {
|
|
6
|
+
const profile = typeof voice === "string" ? loadVoicePackV2(voice) : voice;
|
|
7
|
+
if (format !== "agents" && format !== "claude" && format !== "system") {
|
|
8
|
+
throw new Error(`Unsupported prompt format: ${format}`);
|
|
9
|
+
}
|
|
10
|
+
const rendered = renderPrompt(profile, format);
|
|
11
|
+
if (outPath) {
|
|
12
|
+
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
|
13
|
+
fs.writeFileSync(outPath, rendered, "utf8");
|
|
14
|
+
}
|
|
15
|
+
return rendered;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function renderPrompt(profile, format) {
|
|
19
|
+
const header = {
|
|
20
|
+
agents: "# Dravoice V2 Writing Guidance",
|
|
21
|
+
claude: "# CLAUDE.md guidance for Dravoice V2",
|
|
22
|
+
system: "System writing guidance: Dravoice V2",
|
|
23
|
+
}[format];
|
|
24
|
+
const preface = {
|
|
25
|
+
agents: "Use this as local, inspectable drafting guidance from the writer's own corpus. It is not an AI detector or a license to imitate a third party.",
|
|
26
|
+
claude: "Use these project-local voice notes when drafting or reviewing prose for this repository. Treat them as guidance, not identity proof.",
|
|
27
|
+
system: "Follow these local voice constraints when writing prose. Do not expose private source text or claim authorship identity from them.",
|
|
28
|
+
}[format];
|
|
29
|
+
const lines = [
|
|
30
|
+
header,
|
|
31
|
+
"",
|
|
32
|
+
preface,
|
|
33
|
+
"",
|
|
34
|
+
"## Summary",
|
|
35
|
+
"",
|
|
36
|
+
...profile.guidance.summary.map((item) => `- ${item}`),
|
|
37
|
+
"",
|
|
38
|
+
"## Feature Families",
|
|
39
|
+
"",
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
for (const [name, family] of Object.entries(profile.families)) {
|
|
43
|
+
lines.push(`- ${name}: ${family.confidence} confidence; ${family.revisionHandles[0]}`);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
lines.push("", "## Drafting Rules", "");
|
|
47
|
+
for (const rule of profile.guidance.draftingRules) {
|
|
48
|
+
lines.push(`- ${rule}`);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
lines.push("", "## Avoid", "");
|
|
52
|
+
for (const item of profile.guidance.avoid) {
|
|
53
|
+
lines.push(`- ${item}`);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (profile.guidance.examples.length) {
|
|
57
|
+
lines.push("", "## Source-Backed Examples", "");
|
|
58
|
+
for (const example of profile.guidance.examples) {
|
|
59
|
+
lines.push(`- ${example}`);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return `${lines.join("\n")}\n`;
|
|
64
|
+
}
|