dravoice 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +126 -37
- package/bin/dravoice.js +11 -10
- package/package.json +47 -45
- package/src/index.js +967 -197
- package/src/v2/analyzers/discourse.js +69 -63
- package/src/v2/analyzers/evidence.js +82 -82
- package/src/v2/analyzers/lexical.js +114 -114
- package/src/v2/analyzers/register.js +70 -34
- package/src/v2/analyzers/rhetorical-shape.js +65 -59
- package/src/v2/analyzers/rhythm.js +39 -47
- package/src/v2/analyzers/structure.js +41 -24
- package/src/v2/benchmark.js +657 -568
- package/src/v2/brief.js +154 -146
- package/src/v2/config.js +78 -0
- package/src/v2/doctor.js +308 -0
- package/src/v2/document-model.js +422 -260
- package/src/v2/inspect.js +67 -67
- package/src/v2/io-utils.js +51 -0
- package/src/v2/profile.js +342 -203
- package/src/v2/prompt.js +65 -64
- package/src/v2/review.js +303 -173
- package/src/v2/revise-plan.js +540 -433
- package/src/v2/stylometry.js +346 -332
- package/src/v2/text-utils.js +123 -123
package/src/v2/brief.js
CHANGED
|
@@ -1,146 +1,154 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import { loadVoicePackV2 } from "./profile.js";
|
|
6
|
-
|
|
7
|
-
export function voiceArticleBriefV2({ voice, topic, evidence, cwd = process.cwd() }) {
|
|
8
|
-
const profile = typeof voice === "string" ? loadVoicePackV2(resolvePath(cwd, voice)) : voice;
|
|
9
|
-
const evidenceResult = evidence ? evidenceAnchorsFromFile({ evidence, cwd }) : {
|
|
10
|
-
source: null,
|
|
11
|
-
anchors: [],
|
|
12
|
-
};
|
|
13
|
-
|
|
14
|
-
return {
|
|
15
|
-
schemaVersion: 2,
|
|
16
|
-
generatedBy: "dravoice-v2-brief",
|
|
17
|
-
topic,
|
|
18
|
-
voice: {
|
|
19
|
-
corpusConfidence: profile.source.confidence,
|
|
20
|
-
sourceFileCount: profile.source.documentCount,
|
|
21
|
-
sourceWordCount: profile.source.wordCount,
|
|
22
|
-
primaryRegister: profile.families.register.features.primary.value,
|
|
23
|
-
evidenceSentenceRate: profile.families.evidence.features.evidenceSentenceRate,
|
|
24
|
-
featureFamilies: Object.keys(profile.families),
|
|
25
|
-
draftingRules: profile.guidance.draftingRules.slice(0, 5),
|
|
26
|
-
},
|
|
27
|
-
workingThesis: `Draft a grounded article about ${topic}. Let the supplied evidence set the size of each claim before broadening the lesson.`,
|
|
28
|
-
evidence: evidenceResult,
|
|
29
|
-
missingEvidence: missingEvidenceFor({ topic, evidenceAnchors: evidenceResult.anchors }),
|
|
30
|
-
outline: outlineFor(profile),
|
|
31
|
-
voiceCautions: [
|
|
32
|
-
...profile.guidance.avoid,
|
|
33
|
-
"Mark unsupported claims as [specific evidence needed] instead of inventing proof.",
|
|
34
|
-
].slice(0, 5),
|
|
35
|
-
};
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
export function renderVoiceBriefV2(brief) {
|
|
39
|
-
const lines = [
|
|
40
|
-
`# Article Brief: ${brief.topic}`,
|
|
41
|
-
"",
|
|
42
|
-
"## Voice Source",
|
|
43
|
-
"",
|
|
44
|
-
`- Corpus confidence: ${capitalize(brief.voice.corpusConfidence.band)} - ${brief.voice.corpusConfidence.message}`,
|
|
45
|
-
`- Source files: ${brief.voice.sourceFileCount}`,
|
|
46
|
-
`- Primary register: ${brief.voice.primaryRegister}`,
|
|
47
|
-
`- Evidence sentence rate: ${brief.voice.evidenceSentenceRate}`,
|
|
48
|
-
"",
|
|
49
|
-
"## Working Thesis",
|
|
50
|
-
"",
|
|
51
|
-
`- ${brief.workingThesis}`,
|
|
52
|
-
"",
|
|
53
|
-
"## Evidence Anchors",
|
|
54
|
-
"",
|
|
55
|
-
];
|
|
56
|
-
|
|
57
|
-
if (brief.evidence.anchors.length) {
|
|
58
|
-
for (const item of brief.evidence.anchors) {
|
|
59
|
-
const typeList = item.types.length ? ` (${item.types.join(", ")})` : "";
|
|
60
|
-
lines.push(`- ${brief.evidence.source}:${item.line}${typeList} - ${item.text}`);
|
|
61
|
-
}
|
|
62
|
-
} else {
|
|
63
|
-
lines.push("- [specific evidence needed] Add notes, dates, quotes, examples, or source links before drafting broad claims.");
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
lines.push("", "## Missing Evidence", "");
|
|
67
|
-
lines.push(...brief.missingEvidence.map((item) => `- ${item}`));
|
|
68
|
-
|
|
69
|
-
lines.push("", "## Outline", "");
|
|
70
|
-
lines.push(...brief.outline.map((item, index) => `${index + 1}. ${item}`));
|
|
71
|
-
|
|
72
|
-
lines.push("", "## Voice Cautions", "");
|
|
73
|
-
lines.push(...brief.voiceCautions.map((item) => `- ${item}`));
|
|
74
|
-
|
|
75
|
-
lines.push(
|
|
76
|
-
"",
|
|
77
|
-
"## Drafting Prompt",
|
|
78
|
-
"",
|
|
79
|
-
`Write the article about ${brief.topic} using the evidence anchors above. Keep claims close to concrete support, follow the voice cautions, and write [specific evidence needed] anywhere the brief does not supply enough ground.`,
|
|
80
|
-
"",
|
|
81
|
-
);
|
|
82
|
-
|
|
83
|
-
return lines.join("\n");
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
function evidenceAnchorsFromFile({ evidence, cwd }) {
|
|
87
|
-
const evidencePath = resolvePath(cwd, evidence);
|
|
88
|
-
const contents =
|
|
89
|
-
const document = parseDocument({
|
|
90
|
-
filePath: evidencePath,
|
|
91
|
-
rootDir: cwd,
|
|
92
|
-
contents,
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
return {
|
|
96
|
-
source: displayPath(evidencePath, cwd),
|
|
97
|
-
anchors: document.sentences
|
|
98
|
-
.map((sentence) => ({
|
|
99
|
-
line: sentence.line,
|
|
100
|
-
text: sentence.text,
|
|
101
|
-
types: evidenceTypes(sentence.text),
|
|
102
|
-
}))
|
|
103
|
-
.filter((sentence) => sentence.types.length > 0)
|
|
104
|
-
.slice(0, 8),
|
|
105
|
-
};
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
function missingEvidenceFor({ topic, evidenceAnchors }) {
|
|
109
|
-
const items = [
|
|
110
|
-
`Add [specific evidence needed] for the central claim about ${topic}.`,
|
|
111
|
-
"Add [specific evidence needed] for any number, date, quote, source, or example the article depends on.",
|
|
112
|
-
];
|
|
113
|
-
if (evidenceAnchors.length === 0) {
|
|
114
|
-
items.unshift("No evidence anchors were detected; collect concrete notes before asking for a full draft.");
|
|
115
|
-
}
|
|
116
|
-
return items;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
function outlineFor(profile) {
|
|
120
|
-
const opening = profile.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
|
|
121
|
-
const sentenceMedian = profile.families.rhythm.features.sentenceWords.median;
|
|
122
|
-
return [
|
|
123
|
-
opening
|
|
124
|
-
? `Start from a concrete artifact or observation, keeping the opening shape compatible with: ${opening}.`
|
|
125
|
-
: "Start from a concrete artifact or observation before making the larger claim.",
|
|
126
|
-
"Name the pressure, question, or practical stakes that make the evidence matter.",
|
|
127
|
-
`Develop the article in the learned register with sentence pacing near the ${sentenceMedian}-word median where it fits.`,
|
|
128
|
-
"Close by returning to the evidence and leaving the reader with a practical handle, not a generic conclusion.",
|
|
129
|
-
];
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
function resolvePath(cwd, value) {
|
|
133
|
-
return path.isAbsolute(value) ? value : path.join(cwd, value);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
function displayPath(filePath, rootDir) {
|
|
137
|
-
const relative = path.relative(rootDir, filePath);
|
|
138
|
-
if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
|
|
139
|
-
return relative.split(path.sep).join("/");
|
|
140
|
-
}
|
|
141
|
-
return filePath.split(path.sep).join("/");
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
function capitalize(value) {
|
|
145
|
-
return String(value ?? "").charAt(0).toUpperCase() + String(value ?? "").slice(1);
|
|
146
|
-
}
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { evidenceTypes } from "./analyzers/evidence.js";
|
|
3
|
+
import { parseDocument } from "./document-model.js";
|
|
4
|
+
import { readUtf8FileBounded } from "./io-utils.js";
|
|
5
|
+
import { loadVoicePackV2 } from "./profile.js";
|
|
6
|
+
|
|
7
|
+
export function voiceArticleBriefV2({ voice, topic, evidence, cwd = process.cwd() }) {
|
|
8
|
+
const profile = typeof voice === "string" ? loadVoicePackV2(resolvePath(cwd, voice)) : voice;
|
|
9
|
+
const evidenceResult = evidence ? evidenceAnchorsFromFile({ evidence, cwd }) : {
|
|
10
|
+
source: null,
|
|
11
|
+
anchors: [],
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
return {
|
|
15
|
+
schemaVersion: 2,
|
|
16
|
+
generatedBy: "dravoice-v2-brief",
|
|
17
|
+
topic,
|
|
18
|
+
voice: {
|
|
19
|
+
corpusConfidence: profile.source.confidence,
|
|
20
|
+
sourceFileCount: profile.source.documentCount,
|
|
21
|
+
sourceWordCount: profile.source.wordCount,
|
|
22
|
+
primaryRegister: profile.families.register.features.primary.value,
|
|
23
|
+
evidenceSentenceRate: profile.families.evidence.features.evidenceSentenceRate,
|
|
24
|
+
featureFamilies: Object.keys(profile.families),
|
|
25
|
+
draftingRules: profile.guidance.draftingRules.slice(0, 5),
|
|
26
|
+
},
|
|
27
|
+
workingThesis: `Draft a grounded article about ${topic}. Let the supplied evidence set the size of each claim before broadening the lesson.`,
|
|
28
|
+
evidence: evidenceResult,
|
|
29
|
+
missingEvidence: missingEvidenceFor({ topic, evidenceAnchors: evidenceResult.anchors }),
|
|
30
|
+
outline: outlineFor(profile),
|
|
31
|
+
voiceCautions: [
|
|
32
|
+
...profile.guidance.avoid,
|
|
33
|
+
"Mark unsupported claims as [specific evidence needed] instead of inventing proof.",
|
|
34
|
+
].slice(0, 5),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function renderVoiceBriefV2(brief) {
|
|
39
|
+
const lines = [
|
|
40
|
+
`# Article Brief: ${safeInline(brief.topic)}`,
|
|
41
|
+
"",
|
|
42
|
+
"## Voice Source",
|
|
43
|
+
"",
|
|
44
|
+
`- Corpus confidence: ${capitalize(brief.voice.corpusConfidence.band)} - ${brief.voice.corpusConfidence.message}`,
|
|
45
|
+
`- Source files: ${brief.voice.sourceFileCount}`,
|
|
46
|
+
`- Primary register: ${brief.voice.primaryRegister}`,
|
|
47
|
+
`- Evidence sentence rate: ${brief.voice.evidenceSentenceRate}`,
|
|
48
|
+
"",
|
|
49
|
+
"## Working Thesis",
|
|
50
|
+
"",
|
|
51
|
+
`- ${safeInline(brief.workingThesis)}`,
|
|
52
|
+
"",
|
|
53
|
+
"## Evidence Anchors",
|
|
54
|
+
"",
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
if (brief.evidence.anchors.length) {
|
|
58
|
+
for (const item of brief.evidence.anchors) {
|
|
59
|
+
const typeList = item.types.length ? ` (${item.types.join(", ")})` : "";
|
|
60
|
+
lines.push(`- ${safeInline(brief.evidence.source)}:${item.line}${typeList} - ${safeInline(item.text)}`);
|
|
61
|
+
}
|
|
62
|
+
} else {
|
|
63
|
+
lines.push("- [specific evidence needed] Add notes, dates, quotes, examples, or source links before drafting broad claims.");
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
lines.push("", "## Missing Evidence", "");
|
|
67
|
+
lines.push(...brief.missingEvidence.map((item) => `- ${safeInline(item)}`));
|
|
68
|
+
|
|
69
|
+
lines.push("", "## Outline", "");
|
|
70
|
+
lines.push(...brief.outline.map((item, index) => `${index + 1}. ${safeInline(item)}`));
|
|
71
|
+
|
|
72
|
+
lines.push("", "## Voice Cautions", "");
|
|
73
|
+
lines.push(...brief.voiceCautions.map((item) => `- ${safeInline(item)}`));
|
|
74
|
+
|
|
75
|
+
lines.push(
|
|
76
|
+
"",
|
|
77
|
+
"## Drafting Prompt",
|
|
78
|
+
"",
|
|
79
|
+
`Write the article about ${safeInline(brief.topic)} using the evidence anchors above. Keep claims close to concrete support, follow the voice cautions, and write [specific evidence needed] anywhere the brief does not supply enough ground.`,
|
|
80
|
+
"",
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
return lines.join("\n");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function evidenceAnchorsFromFile({ evidence, cwd }) {
|
|
87
|
+
const evidencePath = resolvePath(cwd, evidence);
|
|
88
|
+
const contents = readUtf8FileBounded(evidencePath, { label: "Evidence file", maxBytes: 1024 * 1024 });
|
|
89
|
+
const document = parseDocument({
|
|
90
|
+
filePath: evidencePath,
|
|
91
|
+
rootDir: cwd,
|
|
92
|
+
contents,
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
return {
|
|
96
|
+
source: displayPath(evidencePath, cwd),
|
|
97
|
+
anchors: document.sentences
|
|
98
|
+
.map((sentence) => ({
|
|
99
|
+
line: sentence.line,
|
|
100
|
+
text: sentence.text,
|
|
101
|
+
types: evidenceTypes(sentence.text),
|
|
102
|
+
}))
|
|
103
|
+
.filter((sentence) => sentence.types.length > 0)
|
|
104
|
+
.slice(0, 8),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function missingEvidenceFor({ topic, evidenceAnchors }) {
|
|
109
|
+
const items = [
|
|
110
|
+
`Add [specific evidence needed] for the central claim about ${topic}.`,
|
|
111
|
+
"Add [specific evidence needed] for any number, date, quote, source, or example the article depends on.",
|
|
112
|
+
];
|
|
113
|
+
if (evidenceAnchors.length === 0) {
|
|
114
|
+
items.unshift("No evidence anchors were detected; collect concrete notes before asking for a full draft.");
|
|
115
|
+
}
|
|
116
|
+
return items;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function outlineFor(profile) {
|
|
120
|
+
const opening = profile.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
|
|
121
|
+
const sentenceMedian = profile.families.rhythm.features.sentenceWords.median;
|
|
122
|
+
return [
|
|
123
|
+
opening
|
|
124
|
+
? `Start from a concrete artifact or observation, keeping the opening shape compatible with: ${opening}.`
|
|
125
|
+
: "Start from a concrete artifact or observation before making the larger claim.",
|
|
126
|
+
"Name the pressure, question, or practical stakes that make the evidence matter.",
|
|
127
|
+
`Develop the article in the learned register with sentence pacing near the ${sentenceMedian}-word median where it fits.`,
|
|
128
|
+
"Close by returning to the evidence and leaving the reader with a practical handle, not a generic conclusion.",
|
|
129
|
+
];
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function resolvePath(cwd, value) {
|
|
133
|
+
return path.isAbsolute(value) ? value : path.join(cwd, value);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function displayPath(filePath, rootDir) {
|
|
137
|
+
const relative = path.relative(rootDir, filePath);
|
|
138
|
+
if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
|
|
139
|
+
return relative.split(path.sep).join("/");
|
|
140
|
+
}
|
|
141
|
+
return filePath.split(path.sep).join("/");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function capitalize(value) {
|
|
145
|
+
return String(value ?? "").charAt(0).toUpperCase() + String(value ?? "").slice(1);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function safeInline(value) {
|
|
149
|
+
return String(value ?? "")
|
|
150
|
+
.replace(/[\0-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "")
|
|
151
|
+
.replace(/\s+/g, " ")
|
|
152
|
+
.replace(/`/g, "'")
|
|
153
|
+
.trim();
|
|
154
|
+
}
|
package/src/v2/config.js
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { writeUtf8FileSafely } from "./io-utils.js";
|
|
4
|
+
|
|
5
|
+
export const DEFAULT_EXAMPLES_DIR = "./articles";
|
|
6
|
+
export const DEFAULT_VOICE_DIR = "./dravoice-voice";
|
|
7
|
+
export const DEFAULT_PROMPT_FORMAT = "agents";
|
|
8
|
+
export const DEFAULT_PROMPT_OUT = "AGENTS.md";
|
|
9
|
+
|
|
10
|
+
const CONFIG_FILE = ".dravoice.yml";
|
|
11
|
+
|
|
12
|
+
export function configPathFor(cwd) {
|
|
13
|
+
return path.join(cwd, CONFIG_FILE);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function loadProjectConfig(cwd) {
|
|
17
|
+
const configPath = configPathFor(cwd);
|
|
18
|
+
if (!fs.existsSync(configPath)) {
|
|
19
|
+
return {};
|
|
20
|
+
}
|
|
21
|
+
const config = {};
|
|
22
|
+
const contents = fs.readFileSync(configPath, "utf8");
|
|
23
|
+
for (const rawLine of contents.split(/\r?\n/)) {
|
|
24
|
+
const line = rawLine.trim();
|
|
25
|
+
if (!line || line.startsWith("#")) {
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
const match = /^([A-Za-z][A-Za-z0-9_-]*):\s*(.*)$/.exec(rawLine);
|
|
29
|
+
if (!match) {
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
const [, key, value] = match;
|
|
33
|
+
if (["voice", "examples", "promptFormat", "promptOut"].includes(key)) {
|
|
34
|
+
config[key] = unquoteYamlScalar(value.trim());
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return config;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function writeProjectConfig(cwd, updates = {}) {
|
|
41
|
+
const existing = loadProjectConfig(cwd);
|
|
42
|
+
const config = {
|
|
43
|
+
voice: updates.voice ?? existing.voice ?? DEFAULT_VOICE_DIR,
|
|
44
|
+
examples: updates.examples ?? existing.examples ?? DEFAULT_EXAMPLES_DIR,
|
|
45
|
+
promptFormat: updates.promptFormat ?? existing.promptFormat ?? DEFAULT_PROMPT_FORMAT,
|
|
46
|
+
promptOut: updates.promptOut ?? existing.promptOut ?? DEFAULT_PROMPT_OUT,
|
|
47
|
+
};
|
|
48
|
+
writeUtf8FileSafely(
|
|
49
|
+
configPathFor(cwd),
|
|
50
|
+
[
|
|
51
|
+
"# Dravoice project defaults",
|
|
52
|
+
`voice: ${displayPath(config.voice)}`,
|
|
53
|
+
`examples: ${displayPath(config.examples)}`,
|
|
54
|
+
`promptFormat: ${config.promptFormat}`,
|
|
55
|
+
`promptOut: ${displayPath(config.promptOut)}`,
|
|
56
|
+
"",
|
|
57
|
+
].join("\n"),
|
|
58
|
+
);
|
|
59
|
+
return config;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function resolveConfiguredPath(cwd, value) {
|
|
63
|
+
if (!value) {
|
|
64
|
+
return undefined;
|
|
65
|
+
}
|
|
66
|
+
return path.isAbsolute(value) ? value : path.join(cwd, value);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function displayPath(value) {
|
|
70
|
+
return String(value).replace(/[\\/]+/g, "/");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function unquoteYamlScalar(value) {
|
|
74
|
+
if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
|
|
75
|
+
return value.slice(1, -1);
|
|
76
|
+
}
|
|
77
|
+
return value;
|
|
78
|
+
}
|
package/src/v2/doctor.js
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import {
|
|
4
|
+
DEFAULT_MAX_FILE_BYTES,
|
|
5
|
+
DEFAULT_MAX_FILES,
|
|
6
|
+
DEFAULT_MAX_TOTAL_BYTES,
|
|
7
|
+
VOICE_EXTENSIONS,
|
|
8
|
+
VOICE_SKIP_DIRS,
|
|
9
|
+
parseDocument,
|
|
10
|
+
} from "./document-model.js";
|
|
11
|
+
import { buildVoiceProfileV2 } from "./profile.js";
|
|
12
|
+
|
|
13
|
+
export function diagnoseVoiceCorpusV2({ examplesDir, cwd = process.cwd() }) {
|
|
14
|
+
const requestedExamples = examplesDir ?? "./articles";
|
|
15
|
+
const resolvedExamples = resolvePath(cwd, requestedExamples);
|
|
16
|
+
const displayedExamples = path.isAbsolute(requestedExamples)
|
|
17
|
+
? displayPath(resolvedExamples, cwd)
|
|
18
|
+
: displayPathForCommand(requestedExamples);
|
|
19
|
+
const baseResult = {
|
|
20
|
+
schemaVersion: 2,
|
|
21
|
+
generatedBy: "dravoice-v2-doctor",
|
|
22
|
+
examples: displayedExamples,
|
|
23
|
+
status: "needs-work",
|
|
24
|
+
exitCode: 1,
|
|
25
|
+
summary: emptySummary(),
|
|
26
|
+
files: [],
|
|
27
|
+
unsupportedFiles: [],
|
|
28
|
+
recommendations: [],
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
if (!fs.existsSync(resolvedExamples)) {
|
|
32
|
+
return {
|
|
33
|
+
...baseResult,
|
|
34
|
+
recommendations: [
|
|
35
|
+
"Examples directory is missing.",
|
|
36
|
+
"Run mkdir -p articles, then copy at least 3 representative .md, .mdx, or .txt pieces into it.",
|
|
37
|
+
`If your writing lives elsewhere, run drav doctor --examples ${displayPathForCommand(requestedExamples) === "./articles" ? "~/path/to/writing" : displayPathForCommand(requestedExamples)}.`,
|
|
38
|
+
],
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (!fs.statSync(resolvedExamples).isDirectory()) {
|
|
43
|
+
return {
|
|
44
|
+
...baseResult,
|
|
45
|
+
recommendations: [
|
|
46
|
+
"Examples path is not a directory.",
|
|
47
|
+
"Point --examples at a folder containing representative .md, .mdx, or .txt pieces.",
|
|
48
|
+
],
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const discovered = discoverCorpusFiles(resolvedExamples);
|
|
53
|
+
const validDocuments = [];
|
|
54
|
+
const files = [];
|
|
55
|
+
let totalBytes = 0;
|
|
56
|
+
|
|
57
|
+
discovered.supportedFiles.forEach((filePath, index) => {
|
|
58
|
+
const relative = displayPath(filePath, resolvedExamples);
|
|
59
|
+
const extension = path.extname(filePath).toLowerCase();
|
|
60
|
+
const record = { path: relative, extension, wordCount: 0, sentenceCount: 0 };
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
if (index >= DEFAULT_MAX_FILES) {
|
|
64
|
+
throw new Error(`Corpus contains more than ${DEFAULT_MAX_FILES} voice file(s) allowed.`);
|
|
65
|
+
}
|
|
66
|
+
const stats = fs.statSync(filePath);
|
|
67
|
+
if (stats.size > DEFAULT_MAX_FILE_BYTES) {
|
|
68
|
+
throw new Error(`Voice file ${relative} exceeds the ${DEFAULT_MAX_FILE_BYTES} byte limit.`);
|
|
69
|
+
}
|
|
70
|
+
totalBytes += stats.size;
|
|
71
|
+
if (totalBytes > DEFAULT_MAX_TOTAL_BYTES) {
|
|
72
|
+
throw new Error(`Voice corpus exceeds the ${DEFAULT_MAX_TOTAL_BYTES} byte total limit.`);
|
|
73
|
+
}
|
|
74
|
+
const contents = fs.readFileSync(filePath, "utf8");
|
|
75
|
+
if (contents.includes("\0")) {
|
|
76
|
+
throw new Error(`Voice file ${relative} looks like binary-looking text and cannot be analyzed.`);
|
|
77
|
+
}
|
|
78
|
+
const document = parseDocument({ filePath, rootDir: resolvedExamples, contents });
|
|
79
|
+
record.wordCount = document.wordCount;
|
|
80
|
+
record.sentenceCount = document.sentences.length;
|
|
81
|
+
validDocuments.push(document);
|
|
82
|
+
} catch (error) {
|
|
83
|
+
record.issue = error.message;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
files.push(record);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
const summary = summaryFor(validDocuments);
|
|
90
|
+
const unsupportedFiles = discovered.unsupportedFiles.map((filePath) => ({
|
|
91
|
+
path: displayPath(filePath, resolvedExamples),
|
|
92
|
+
extension: path.extname(filePath).toLowerCase() || null,
|
|
93
|
+
}));
|
|
94
|
+
const status = statusFor({ summary, files, supportedCount: discovered.supportedFiles.length });
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
...baseResult,
|
|
98
|
+
status,
|
|
99
|
+
exitCode: status === "ready" ? 0 : 1,
|
|
100
|
+
summary,
|
|
101
|
+
files,
|
|
102
|
+
unsupportedFiles,
|
|
103
|
+
recommendations: recommendationsFor({
|
|
104
|
+
status,
|
|
105
|
+
summary,
|
|
106
|
+
files,
|
|
107
|
+
unsupportedFiles,
|
|
108
|
+
examples: baseResult.examples,
|
|
109
|
+
}),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export function renderCorpusDoctorV2(result) {
|
|
114
|
+
const lines = [
|
|
115
|
+
"# Corpus Doctor",
|
|
116
|
+
"",
|
|
117
|
+
`Examples: ${result.examples}`,
|
|
118
|
+
`Status: ${result.status}`,
|
|
119
|
+
`Documents: ${result.summary.documentCount}`,
|
|
120
|
+
`Words: ${result.summary.wordCount}`,
|
|
121
|
+
`Sentences: ${result.summary.sentenceCount}`,
|
|
122
|
+
`Confidence: ${result.summary.confidence.band} - ${result.summary.confidence.message}`,
|
|
123
|
+
`Supported files: ${result.files.length}`,
|
|
124
|
+
`Unsupported files: ${result.unsupportedFiles.length}`,
|
|
125
|
+
"",
|
|
126
|
+
];
|
|
127
|
+
|
|
128
|
+
const issueFiles = result.files.filter((file) => file.issue);
|
|
129
|
+
if (issueFiles.length > 0) {
|
|
130
|
+
lines.push("File issues:");
|
|
131
|
+
issueFiles.slice(0, 8).forEach((file) => {
|
|
132
|
+
lines.push(`- ${file.path}: ${file.issue}`);
|
|
133
|
+
});
|
|
134
|
+
if (issueFiles.length > 8) {
|
|
135
|
+
lines.push(`- ...and ${issueFiles.length - 8} more`);
|
|
136
|
+
}
|
|
137
|
+
lines.push("");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (result.unsupportedFiles.length > 0) {
|
|
141
|
+
lines.push("Unsupported files:");
|
|
142
|
+
result.unsupportedFiles.slice(0, 8).forEach((file) => {
|
|
143
|
+
lines.push(`- ${file.path}`);
|
|
144
|
+
});
|
|
145
|
+
if (result.unsupportedFiles.length > 8) {
|
|
146
|
+
lines.push(`- ...and ${result.unsupportedFiles.length - 8} more`);
|
|
147
|
+
}
|
|
148
|
+
lines.push("");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (result.summary.quality?.warnings?.length) {
|
|
152
|
+
lines.push("Corpus quality warnings:");
|
|
153
|
+
result.summary.quality.warnings.forEach((warning) => {
|
|
154
|
+
lines.push(`- ${warning}`);
|
|
155
|
+
});
|
|
156
|
+
lines.push("");
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
lines.push("Recommendations:");
|
|
160
|
+
result.recommendations.forEach((item) => {
|
|
161
|
+
lines.push(`- ${item}`);
|
|
162
|
+
});
|
|
163
|
+
lines.push("");
|
|
164
|
+
lines.push(`Next: ${nextStepFor(result)}`);
|
|
165
|
+
lines.push("");
|
|
166
|
+
return lines.join("\n");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function discoverCorpusFiles(rootDir) {
|
|
170
|
+
const supportedFiles = [];
|
|
171
|
+
const unsupportedFiles = [];
|
|
172
|
+
const stack = [rootDir];
|
|
173
|
+
|
|
174
|
+
while (stack.length) {
|
|
175
|
+
const current = stack.pop();
|
|
176
|
+
const entries = fs.readdirSync(current, { withFileTypes: true }).sort((left, right) => left.name.localeCompare(right.name));
|
|
177
|
+
const directories = [];
|
|
178
|
+
for (const entry of entries) {
|
|
179
|
+
const fullPath = path.join(current, entry.name);
|
|
180
|
+
if (entry.isDirectory()) {
|
|
181
|
+
if (!VOICE_SKIP_DIRS.has(entry.name)) {
|
|
182
|
+
directories.push(fullPath);
|
|
183
|
+
}
|
|
184
|
+
} else if (entry.isFile()) {
|
|
185
|
+
if (VOICE_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) {
|
|
186
|
+
supportedFiles.push(fullPath);
|
|
187
|
+
} else {
|
|
188
|
+
unsupportedFiles.push(fullPath);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
stack.push(...directories.reverse());
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
supportedFiles: supportedFiles.sort((left, right) => left.localeCompare(right)),
|
|
197
|
+
unsupportedFiles: unsupportedFiles.sort((left, right) => left.localeCompare(right)),
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function summaryFor(documents) {
|
|
202
|
+
if (documents.length === 0) {
|
|
203
|
+
return emptySummary();
|
|
204
|
+
}
|
|
205
|
+
const source = buildVoiceProfileV2({ documents }).source;
|
|
206
|
+
return {
|
|
207
|
+
documentCount: source.documentCount,
|
|
208
|
+
wordCount: source.wordCount,
|
|
209
|
+
sentenceCount: source.sentenceCount,
|
|
210
|
+
confidence: source.confidence,
|
|
211
|
+
quality: source.quality,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function emptySummary() {
|
|
216
|
+
return {
|
|
217
|
+
documentCount: 0,
|
|
218
|
+
wordCount: 0,
|
|
219
|
+
sentenceCount: 0,
|
|
220
|
+
confidence: {
|
|
221
|
+
band: "weak",
|
|
222
|
+
message: "No usable source documents were found.",
|
|
223
|
+
},
|
|
224
|
+
quality: {
|
|
225
|
+
lengthSpread: { minWords: 0, maxWords: 0, minSentences: 0, maxSentences: 0 },
|
|
226
|
+
duplicateGroups: 0,
|
|
227
|
+
warnings: [],
|
|
228
|
+
},
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function statusFor({ summary, files, supportedCount }) {
|
|
233
|
+
const issueCount = files.filter((file) => file.issue).length;
|
|
234
|
+
if (supportedCount === 0 || summary.documentCount === 0) {
|
|
235
|
+
return "needs-work";
|
|
236
|
+
}
|
|
237
|
+
if (issueCount > 0) {
|
|
238
|
+
return "needs-work";
|
|
239
|
+
}
|
|
240
|
+
if (summary.documentCount >= 3 && summary.sentenceCount >= 10) {
|
|
241
|
+
return "ready";
|
|
242
|
+
}
|
|
243
|
+
return "weak";
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function recommendationsFor({ status, summary, files, unsupportedFiles, examples }) {
|
|
247
|
+
const recommendations = [];
|
|
248
|
+
const issueCount = files.filter((file) => file.issue).length;
|
|
249
|
+
|
|
250
|
+
if (status === "ready") {
|
|
251
|
+
recommendations.push("Corpus is ready for cautious profile learning.");
|
|
252
|
+
if (summary.quality?.warnings?.length) {
|
|
253
|
+
recommendations.push("Review corpus quality warnings before trusting strict calibration.");
|
|
254
|
+
}
|
|
255
|
+
recommendations.push(`Run drav init --examples ${examples}.`);
|
|
256
|
+
return recommendations;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (files.length === 0) {
|
|
260
|
+
recommendations.push("Convert or copy your writing into Markdown, MDX, or plain text.");
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (summary.documentCount < 3) {
|
|
264
|
+
const remaining = 3 - summary.documentCount;
|
|
265
|
+
recommendations.push(`Add at least ${remaining} more representative long-form piece(s).`);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (summary.sentenceCount < 10) {
|
|
269
|
+
recommendations.push("Use longer source pieces; Dravoice needs at least 10 total sentences for a ready corpus.");
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (issueCount > 0) {
|
|
273
|
+
recommendations.push("Fix or remove supported files with issues, then rerun drav doctor.");
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (unsupportedFiles.length > 0) {
|
|
277
|
+
recommendations.push("Supported extensions: .md, .mdx, .txt.");
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (recommendations.length === 0) {
|
|
281
|
+
recommendations.push("Add more representative long-form writing, then rerun drav doctor.");
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return recommendations;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function nextStepFor(result) {
|
|
288
|
+
if (result.status === "ready") {
|
|
289
|
+
return `drav init --examples ${result.examples}`;
|
|
290
|
+
}
|
|
291
|
+
return "add more writing, then run drav doctor";
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function resolvePath(cwd, value) {
|
|
295
|
+
return path.isAbsolute(value) ? value : path.join(cwd, value);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function displayPath(filePath, rootDir) {
|
|
299
|
+
const relative = path.relative(rootDir, filePath);
|
|
300
|
+
if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
|
|
301
|
+
return relative.split(path.sep).join("/");
|
|
302
|
+
}
|
|
303
|
+
return filePath.split(path.sep).join("/");
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function displayPathForCommand(value) {
|
|
307
|
+
return String(value).replace(/[\\/]+/g, "/");
|
|
308
|
+
}
|