dravoice 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +102 -36
- package/bin/dravoice.js +11 -10
- package/package.json +47 -45
- package/src/index.js +874 -197
- package/src/v2/analyzers/discourse.js +63 -52
- package/src/v2/analyzers/evidence.js +73 -38
- package/src/v2/analyzers/lexical.js +114 -58
- package/src/v2/analyzers/register.js +46 -34
- package/src/v2/analyzers/rhetorical-shape.js +59 -48
- package/src/v2/analyzers/rhythm.js +39 -47
- package/src/v2/analyzers/structure.js +24 -24
- package/src/v2/benchmark.js +574 -568
- package/src/v2/brief.js +154 -146
- package/src/v2/config.js +78 -0
- package/src/v2/document-model.js +351 -260
- package/src/v2/inspect.js +67 -67
- package/src/v2/io-utils.js +51 -0
- package/src/v2/profile.js +155 -129
- package/src/v2/prompt.js +65 -64
- package/src/v2/review.js +177 -219
- package/src/v2/revise-plan.js +130 -33
- package/src/v2/stylometry.js +123 -17
- package/src/v2/text-utils.js +123 -123
package/src/v2/inspect.js
CHANGED
|
@@ -1,67 +1,67 @@
|
|
|
1
|
-
export function renderInspectV2(profile) {
|
|
2
|
-
const lines = [
|
|
3
|
-
"Dravoice V2 Profile",
|
|
4
|
-
"",
|
|
5
|
-
`Source: ${profile.source.documentCount} document(s), ${profile.source.wordCount} words, ${profile.source.sentenceCount} sentences`,
|
|
6
|
-
`Corpus confidence: ${capitalize(profile.source.confidence.band)} - ${profile.source.confidence.message}`,
|
|
7
|
-
"",
|
|
8
|
-
"Feature families:",
|
|
9
|
-
];
|
|
10
|
-
|
|
11
|
-
for (const [name, family] of Object.entries(profile.families)) {
|
|
12
|
-
lines.push(`- ${name}: ${family.confidence}`);
|
|
13
|
-
if (family.warnings.length) {
|
|
14
|
-
for (const warning of family.warnings) {
|
|
15
|
-
lines.push(` warning: ${warning}`);
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
lines.push("", "Feature details:");
|
|
21
|
-
for (const [name, family] of Object.entries(profile.families)) {
|
|
22
|
-
lines.push(`- ${name}: ${featureSummary(name, family.features)}`);
|
|
23
|
-
if (family.revisionHandles?.length) {
|
|
24
|
-
lines.push(" Revision handles:");
|
|
25
|
-
for (const handle of family.revisionHandles) {
|
|
26
|
-
lines.push(` - ${handle}`);
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
lines.push("", "Guidance:");
|
|
32
|
-
for (const rule of profile.guidance.draftingRules) {
|
|
33
|
-
lines.push(`- ${rule}`);
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
lines.push("");
|
|
37
|
-
return `${lines.join("\n")}`;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
function capitalize(value) {
|
|
41
|
-
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
function featureSummary(name, features) {
|
|
45
|
-
if (name === "rhythm") {
|
|
46
|
-
return `sentenceWords.median=${features.sentenceWords.median}; paragraphWords.median=${features.paragraphWords.median}; listDensity=${features.listDensity}; quoteDensity=${features.quoteDensity}`;
|
|
47
|
-
}
|
|
48
|
-
if (name === "lexical") {
|
|
49
|
-
return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}`;
|
|
50
|
-
}
|
|
51
|
-
if (name === "register") {
|
|
52
|
-
return `primary=${features.primary.value} (${features.primary.score}); alternates=${features.scores.slice(1, 4).map((score) => `${score.value}:${score.score}`).join(", ")}`;
|
|
53
|
-
}
|
|
54
|
-
if (name === "discourse") {
|
|
55
|
-
return `transitionRates=${Object.entries(features.transitionRates).map(([key, value]) => `${key}:${value}`).join(", ")}; sentenceCallbacks=${features.sentenceCallbacks}`;
|
|
56
|
-
}
|
|
57
|
-
if (name === "rhetoricalShape") {
|
|
58
|
-
return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
|
|
59
|
-
}
|
|
60
|
-
if (name === "evidence") {
|
|
61
|
-
return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
|
|
62
|
-
}
|
|
63
|
-
if (name === "structure") {
|
|
64
|
-
return `sectionWords.median=${features.sectionWords.median}; headingCount.median=${features.headingCount.median}; listDocumentRate=${features.listDocumentRate}; quoteDocumentRate=${features.quoteDocumentRate}`;
|
|
65
|
-
}
|
|
66
|
-
return JSON.stringify(features);
|
|
67
|
-
}
|
|
1
|
+
export function renderInspectV2(profile) {
|
|
2
|
+
const lines = [
|
|
3
|
+
"Dravoice V2 Profile",
|
|
4
|
+
"",
|
|
5
|
+
`Source: ${profile.source.documentCount} document(s), ${profile.source.wordCount} words, ${profile.source.sentenceCount} sentences`,
|
|
6
|
+
`Corpus confidence: ${capitalize(profile.source.confidence.band)} - ${profile.source.confidence.message}`,
|
|
7
|
+
"",
|
|
8
|
+
"Feature families:",
|
|
9
|
+
];
|
|
10
|
+
|
|
11
|
+
for (const [name, family] of Object.entries(profile.families)) {
|
|
12
|
+
lines.push(`- ${name}: ${family.confidence}`);
|
|
13
|
+
if (family.warnings.length) {
|
|
14
|
+
for (const warning of family.warnings) {
|
|
15
|
+
lines.push(` warning: ${warning}`);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
lines.push("", "Feature details:");
|
|
21
|
+
for (const [name, family] of Object.entries(profile.families)) {
|
|
22
|
+
lines.push(`- ${name}: ${featureSummary(name, family.features)}`);
|
|
23
|
+
if (family.revisionHandles?.length) {
|
|
24
|
+
lines.push(" Revision handles:");
|
|
25
|
+
for (const handle of family.revisionHandles) {
|
|
26
|
+
lines.push(` - ${handle}`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
lines.push("", "Guidance:");
|
|
32
|
+
for (const rule of profile.guidance.draftingRules) {
|
|
33
|
+
lines.push(`- ${rule}`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
lines.push("");
|
|
37
|
+
return `${lines.join("\n")}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function capitalize(value) {
|
|
41
|
+
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function featureSummary(name, features) {
|
|
45
|
+
if (name === "rhythm") {
|
|
46
|
+
return `sentenceWords.median=${features.sentenceWords.median}; paragraphWords.median=${features.paragraphWords.median}; listDensity=${features.listDensity}; quoteDensity=${features.quoteDensity}`;
|
|
47
|
+
}
|
|
48
|
+
if (name === "lexical") {
|
|
49
|
+
return `wordCount=${features.wordCount}; contentTypeTokenRatio=${features.vocabularyRichness.contentTypeTokenRatio}; wordLength.median=${features.wordLength.median}; maskedCharacterFourgrams=${features.maskedCharacterFourgrams?.length ?? 0}; functionWordBigrams=${features.functionWordBigrams?.length ?? 0}`;
|
|
50
|
+
}
|
|
51
|
+
if (name === "register") {
|
|
52
|
+
return `primary=${features.primary.value} (${features.primary.score}); alternates=${features.scores.slice(1, 4).map((score) => `${score.value}:${score.score}`).join(", ")}`;
|
|
53
|
+
}
|
|
54
|
+
if (name === "discourse") {
|
|
55
|
+
return `transitionRates=${Object.entries(features.transitionRates).map(([key, value]) => `${key}:${value}`).join(", ")}; sentenceCallbacks=${features.sentenceCallbacks}`;
|
|
56
|
+
}
|
|
57
|
+
if (name === "rhetoricalShape") {
|
|
58
|
+
return `openingMoves=${features.openingMoves.slice(0, 5).join(" -> ") || "none"}; moveTrigrams=${features.moveTrigrams?.slice(0, 2).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}; commonSequences=${features.commonSequences.slice(0, 3).map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
|
|
59
|
+
}
|
|
60
|
+
if (name === "evidence") {
|
|
61
|
+
return `evidenceSentenceRate=${features.evidenceSentenceRate}; claimSentenceRate=${features.claimSentenceRate}; supportedClaimRate=${features.supportedClaimRate}; unsupportedClaimRate=${features.unsupportedClaimRate}; evidenceTypes=${features.evidenceTypes.map((item) => `${item.value}:${item.count}`).join(", ") || "none"}`;
|
|
62
|
+
}
|
|
63
|
+
if (name === "structure") {
|
|
64
|
+
return `sectionWords.median=${features.sectionWords.median}; headingCount.median=${features.headingCount.median}; listDocumentRate=${features.listDocumentRate}; quoteDocumentRate=${features.quoteDocumentRate}`;
|
|
65
|
+
}
|
|
66
|
+
return JSON.stringify(features);
|
|
67
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
const DEFAULT_MAX_READ_BYTES = 5 * 1024 * 1024;
|
|
5
|
+
|
|
6
|
+
export function readUtf8FileBounded(filePath, {
|
|
7
|
+
label = "File",
|
|
8
|
+
maxBytes = DEFAULT_MAX_READ_BYTES,
|
|
9
|
+
rejectBinary = true,
|
|
10
|
+
} = {}) {
|
|
11
|
+
const resolved = path.resolve(filePath);
|
|
12
|
+
const stats = fs.statSync(resolved);
|
|
13
|
+
if (!stats.isFile()) {
|
|
14
|
+
throw new Error(`${label} is not a regular file: ${filePath}`);
|
|
15
|
+
}
|
|
16
|
+
if (stats.size > maxBytes) {
|
|
17
|
+
throw new Error(`${label} ${displayPath(filePath)} exceeds the ${maxBytes} byte limit.`);
|
|
18
|
+
}
|
|
19
|
+
const contents = fs.readFileSync(resolved, "utf8");
|
|
20
|
+
if (rejectBinary && contents.includes("\0")) {
|
|
21
|
+
throw new Error(`${label} ${displayPath(filePath)} looks like binary-looking text and cannot be analyzed.`);
|
|
22
|
+
}
|
|
23
|
+
return contents;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function readJsonFileBounded(filePath, options = {}) {
|
|
27
|
+
const label = options.label ?? "JSON file";
|
|
28
|
+
const contents = readUtf8FileBounded(filePath, { ...options, label });
|
|
29
|
+
try {
|
|
30
|
+
return JSON.parse(contents);
|
|
31
|
+
} catch (error) {
|
|
32
|
+
throw new Error(`${label} ${displayPath(filePath)} is not valid JSON: ${error.message}`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function writeUtf8FileSafely(filePath, contents) {
|
|
37
|
+
rejectSymlink(filePath);
|
|
38
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
39
|
+
rejectSymlink(filePath);
|
|
40
|
+
fs.writeFileSync(filePath, contents, "utf8");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function rejectSymlink(filePath) {
|
|
44
|
+
if (fs.existsSync(filePath) && fs.lstatSync(filePath).isSymbolicLink()) {
|
|
45
|
+
throw new Error(`Refusing to write through symlink: ${filePath}`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function displayPath(filePath) {
|
|
50
|
+
return String(filePath).split(path.sep).join("/");
|
|
51
|
+
}
|
package/src/v2/profile.js
CHANGED
|
@@ -1,54 +1,57 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import { analyzeDiscourse } from "./analyzers/discourse.js";
|
|
4
|
-
import { analyzeEvidence } from "./analyzers/evidence.js";
|
|
5
|
-
import { analyzeLexical } from "./analyzers/lexical.js";
|
|
6
|
-
import { analyzeRegister } from "./analyzers/register.js";
|
|
7
|
-
import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
|
|
8
|
-
import { analyzeRhythm } from "./analyzers/rhythm.js";
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { analyzeDiscourse } from "./analyzers/discourse.js";
|
|
4
|
+
import { analyzeEvidence } from "./analyzers/evidence.js";
|
|
5
|
+
import { analyzeLexical } from "./analyzers/lexical.js";
|
|
6
|
+
import { analyzeRegister } from "./analyzers/register.js";
|
|
7
|
+
import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
|
|
8
|
+
import { analyzeRhythm } from "./analyzers/rhythm.js";
|
|
9
9
|
import { analyzeStructure } from "./analyzers/structure.js";
|
|
10
10
|
import { loadDocuments } from "./document-model.js";
|
|
11
|
+
import { readJsonFileBounded, writeUtf8FileSafely } from "./io-utils.js";
|
|
11
12
|
import {
|
|
12
13
|
STYLOMETRIC_REFERENCES,
|
|
13
14
|
defaultStyleThresholds,
|
|
14
15
|
distanceByFamily,
|
|
15
16
|
percentile,
|
|
17
|
+
stabilityFromDistances,
|
|
16
18
|
} from "./stylometry.js";
|
|
17
|
-
|
|
18
|
-
export function learnVoicePackV2({ examplesDir, outDir }) {
|
|
19
|
-
const documents = loadDocuments({ examplesDir });
|
|
20
|
-
const profile = buildVoiceProfileV2({ documents });
|
|
21
|
-
if (outDir) {
|
|
22
|
-
writeVoicePackV2(outDir, profile);
|
|
23
|
-
}
|
|
24
|
-
return profile;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export function loadVoicePackV2(voiceDir) {
|
|
28
|
-
const profilePath = path.join(voiceDir, "profile.json");
|
|
29
|
-
if (!fs.existsSync(profilePath)) {
|
|
30
|
-
throw new Error(`No Dravoice V2 profile found at ${voiceDir}`);
|
|
31
|
-
}
|
|
32
|
-
const profile =
|
|
33
|
-
if (profile.schemaVersion !== 2) {
|
|
34
|
-
throw new Error(`Expected a Dravoice V2 profile at ${voiceDir}; found schemaVersion ${profile.schemaVersion ?? "unknown"}.`);
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
19
|
+
|
|
20
|
+
export function learnVoicePackV2({ examplesDir, outDir, excludePaths = [] }) {
|
|
21
|
+
const documents = loadDocuments({ examplesDir, excludePaths });
|
|
22
|
+
const profile = buildVoiceProfileV2({ documents });
|
|
23
|
+
if (outDir) {
|
|
24
|
+
writeVoicePackV2(outDir, profile);
|
|
25
|
+
}
|
|
26
|
+
return profile;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function loadVoicePackV2(voiceDir) {
|
|
30
|
+
const profilePath = path.join(voiceDir, "profile.json");
|
|
31
|
+
if (!fs.existsSync(profilePath)) {
|
|
32
|
+
throw new Error(`No Dravoice V2 profile found at ${voiceDir}`);
|
|
33
|
+
}
|
|
34
|
+
const profile = readJsonFileBounded(profilePath, { label: "Dravoice profile", maxBytes: 2 * 1024 * 1024 });
|
|
35
|
+
if (profile.schemaVersion !== 2) {
|
|
36
|
+
throw new Error(`Expected a Dravoice V2 profile at ${voiceDir}; found schemaVersion ${profile.schemaVersion ?? "unknown"}.`);
|
|
37
|
+
}
|
|
38
|
+
validateVoiceProfileV2(profile, voiceDir);
|
|
39
|
+
return profile;
|
|
40
|
+
}
|
|
41
|
+
|
|
39
42
|
export function buildVoiceProfileV2({ documents }) {
|
|
40
43
|
const source = sourceSummary(documents);
|
|
41
44
|
const families = analyzeFeatureFamilies(documents);
|
|
42
45
|
|
|
43
46
|
return {
|
|
44
|
-
schemaVersion: 2,
|
|
45
|
-
generatedBy: "dravoice-v2",
|
|
46
|
-
tool: { name: "Dravoice", cli: "drav" },
|
|
47
|
-
source,
|
|
48
|
-
families,
|
|
49
|
-
guidance: guidanceFor({ source, families }),
|
|
50
|
-
calibration: {
|
|
51
|
-
featureStability: Object.fromEntries(Object.entries(families).map(([name, family]) => [name, family.confidence])),
|
|
47
|
+
schemaVersion: 2,
|
|
48
|
+
generatedBy: "dravoice-v2",
|
|
49
|
+
tool: { name: "Dravoice", cli: "drav" },
|
|
50
|
+
source,
|
|
51
|
+
families,
|
|
52
|
+
guidance: guidanceFor({ source, families }),
|
|
53
|
+
calibration: {
|
|
54
|
+
featureStability: Object.fromEntries(Object.entries(families).map(([name, family]) => [name, family.confidence])),
|
|
52
55
|
tolerances: {
|
|
53
56
|
rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
|
|
54
57
|
evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
|
|
@@ -57,8 +60,8 @@ export function buildVoiceProfileV2({ documents }) {
|
|
|
57
60
|
minimumDraftSize: {
|
|
58
61
|
words: source.confidence.band === "weak" ? 25 : 35,
|
|
59
62
|
sentences: source.confidence.band === "weak" ? 3 : 4,
|
|
60
|
-
},
|
|
61
|
-
},
|
|
63
|
+
},
|
|
64
|
+
},
|
|
62
65
|
};
|
|
63
66
|
}
|
|
64
67
|
|
|
@@ -101,6 +104,7 @@ function styleThresholdsFor(documents, fallbackFamilies) {
|
|
|
101
104
|
families[family] = {
|
|
102
105
|
threshold: Math.max(0.01, Math.min(0.95, observedThreshold || fallback)),
|
|
103
106
|
observations: observations.length,
|
|
107
|
+
stability: stabilityFromDistances(observations),
|
|
104
108
|
};
|
|
105
109
|
}
|
|
106
110
|
|
|
@@ -110,92 +114,114 @@ function styleThresholdsFor(documents, fallbackFamilies) {
|
|
|
110
114
|
families,
|
|
111
115
|
};
|
|
112
116
|
}
|
|
113
|
-
|
|
114
|
-
function writeVoicePackV2(outDir, profile) {
|
|
115
|
-
fs.mkdirSync(outDir, { recursive: true });
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
path.join(outDir, ".dravoice.yml"),
|
|
119
|
-
[
|
|
120
|
-
"schemaVersion: 2",
|
|
121
|
-
"generatedBy: dravoice-v2",
|
|
122
|
-
`confidence: ${profile.source.confidence.band}`,
|
|
123
|
-
"",
|
|
124
|
-
].join("\n"),
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
return
|
|
201
|
-
|
|
117
|
+
|
|
118
|
+
function writeVoicePackV2(outDir, profile) {
|
|
119
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
120
|
+
writeUtf8FileSafely(path.join(outDir, "profile.json"), `${JSON.stringify(profile, null, 2)}\n`);
|
|
121
|
+
writeUtf8FileSafely(
|
|
122
|
+
path.join(outDir, ".dravoice.yml"),
|
|
123
|
+
[
|
|
124
|
+
"schemaVersion: 2",
|
|
125
|
+
"generatedBy: dravoice-v2",
|
|
126
|
+
`confidence: ${profile.source.confidence.band}`,
|
|
127
|
+
"",
|
|
128
|
+
].join("\n"),
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function validateVoiceProfileV2(profile, voiceDir) {
|
|
133
|
+
assertObject(profile.source, "profile.source", voiceDir);
|
|
134
|
+
assertObject(profile.source.confidence, "profile.source.confidence", voiceDir);
|
|
135
|
+
assertObject(profile.families, "profile.families", voiceDir);
|
|
136
|
+
for (const family of ["rhythm", "lexical", "register", "discourse", "rhetoricalShape", "evidence", "structure"]) {
|
|
137
|
+
assertObject(profile.families[family], `profile.families.${family}`, voiceDir);
|
|
138
|
+
assertObject(profile.families[family].features, `profile.families.${family}.features`, voiceDir);
|
|
139
|
+
}
|
|
140
|
+
assertObject(profile.guidance, "profile.guidance", voiceDir);
|
|
141
|
+
if (!Array.isArray(profile.guidance.summary) || !Array.isArray(profile.guidance.draftingRules) || !Array.isArray(profile.guidance.avoid)) {
|
|
142
|
+
throw new Error(`Invalid Dravoice V2 profile at ${voiceDir}: profile.guidance must include summary, draftingRules, and avoid arrays.`);
|
|
143
|
+
}
|
|
144
|
+
assertObject(profile.calibration, "profile.calibration", voiceDir);
|
|
145
|
+
assertObject(profile.calibration.minimumDraftSize, "profile.calibration.minimumDraftSize", voiceDir);
|
|
146
|
+
assertObject(profile.calibration.styleThresholds, "profile.calibration.styleThresholds", voiceDir);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function assertObject(value, field, voiceDir) {
|
|
150
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
151
|
+
throw new Error(`Invalid Dravoice V2 profile at ${voiceDir}: missing ${field}.`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function sourceSummary(documents) {
|
|
156
|
+
const wordCount = documents.reduce((sum, document) => sum + document.wordCount, 0);
|
|
157
|
+
const sentenceCount = documents.reduce((sum, document) => sum + document.sentences.length, 0);
|
|
158
|
+
const confidence = confidenceFor(documents.length, sentenceCount, wordCount);
|
|
159
|
+
return {
|
|
160
|
+
files: documents.map((document, index) => ({
|
|
161
|
+
id: `source-${index + 1}`,
|
|
162
|
+
extension: path.extname(document.path || document.file).toLowerCase() || ".txt",
|
|
163
|
+
wordCount: document.wordCount,
|
|
164
|
+
sentenceCount: document.sentences.length,
|
|
165
|
+
})),
|
|
166
|
+
documentCount: documents.length,
|
|
167
|
+
wordCount,
|
|
168
|
+
sentenceCount,
|
|
169
|
+
genres: [],
|
|
170
|
+
confidence,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function confidenceFor(documentCount, sentenceCount, wordCount) {
|
|
175
|
+
if (documentCount >= 10 && sentenceCount >= 80 && wordCount >= 5000) {
|
|
176
|
+
return { band: "deep", message: "Enough material for stricter family-level review." };
|
|
177
|
+
}
|
|
178
|
+
if (documentCount >= 5 && sentenceCount >= 30) {
|
|
179
|
+
return { band: "strong", message: "Enough examples for stable family-level review." };
|
|
180
|
+
}
|
|
181
|
+
if (documentCount >= 3 && sentenceCount >= 10) {
|
|
182
|
+
return { band: "usable", message: "Enough text for prompt guidance and cautious review." };
|
|
183
|
+
}
|
|
184
|
+
return { band: "weak", message: "Limited corpus; use findings as weak signals." };
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function guidanceFor({ source, families }) {
|
|
188
|
+
const primaryRegister = families.register.features.primary.value;
|
|
189
|
+
const evidenceRate = families.evidence.features.evidenceSentenceRate;
|
|
190
|
+
const sentenceMedian = families.rhythm.features.sentenceWords.median;
|
|
191
|
+
const opening = families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
|
|
192
|
+
|
|
193
|
+
const draftingRules = [
|
|
194
|
+
`Keep sentence pacing near the learned median of ${sentenceMedian} words when it fits the draft.`,
|
|
195
|
+
`Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
|
|
196
|
+
evidenceRate > 0.35
|
|
197
|
+
? "Anchor broad claims with concrete scenes, numbers, quotes, citations, or specific examples."
|
|
198
|
+
: "Do not force evidence density higher than the source corpus supports.",
|
|
199
|
+
opening
|
|
200
|
+
? `Prefer opening moves compatible with: ${opening}.`
|
|
201
|
+
: "Start from the article's real subject rather than generic positioning.",
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
summary: [
|
|
206
|
+
`Local Dravoice V2 profile from ${source.documentCount} document(s), ${source.wordCount} words, confidence ${source.confidence.band}.`,
|
|
207
|
+
`Primary register signal: ${primaryRegister}.`,
|
|
208
|
+
],
|
|
209
|
+
draftingRules,
|
|
210
|
+
avoid: [
|
|
211
|
+
"Do not treat topic vocabulary as proof of voice fit.",
|
|
212
|
+
"Do not claim a draft is or is not the writer's true voice.",
|
|
213
|
+
"Do not invent concrete evidence to satisfy a style finding.",
|
|
214
|
+
],
|
|
215
|
+
examples: [],
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function toleranceFor(band, deep, strong, weak) {
|
|
220
|
+
if (band === "deep") {
|
|
221
|
+
return deep;
|
|
222
|
+
}
|
|
223
|
+
if (band === "strong") {
|
|
224
|
+
return strong;
|
|
225
|
+
}
|
|
226
|
+
return weak;
|
|
227
|
+
}
|
package/src/v2/prompt.js
CHANGED
|
@@ -1,64 +1,65 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
fs.
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"
|
|
35
|
-
"",
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
"
|
|
39
|
-
"",
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
}
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { writeUtf8FileSafely } from "./io-utils.js";
|
|
4
|
+
import { loadVoicePackV2 } from "./profile.js";
|
|
5
|
+
|
|
6
|
+
export function voicePromptPackV2({ voice, format = "agents", outPath }) {
|
|
7
|
+
const profile = typeof voice === "string" ? loadVoicePackV2(voice) : voice;
|
|
8
|
+
if (format !== "agents" && format !== "claude" && format !== "system") {
|
|
9
|
+
throw new Error(`Unsupported prompt format: ${format}`);
|
|
10
|
+
}
|
|
11
|
+
const rendered = renderPrompt(profile, format);
|
|
12
|
+
if (outPath) {
|
|
13
|
+
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
|
14
|
+
writeUtf8FileSafely(outPath, rendered);
|
|
15
|
+
}
|
|
16
|
+
return rendered;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function renderPrompt(profile, format) {
|
|
20
|
+
const header = {
|
|
21
|
+
agents: "# Dravoice V2 Writing Guidance",
|
|
22
|
+
claude: "# CLAUDE.md guidance for Dravoice V2",
|
|
23
|
+
system: "System writing guidance: Dravoice V2",
|
|
24
|
+
}[format];
|
|
25
|
+
const preface = {
|
|
26
|
+
agents: "Use this as local, inspectable drafting guidance from the writer's own corpus. It is not an AI detector or a license to imitate a third party.",
|
|
27
|
+
claude: "Use these project-local voice notes when drafting or reviewing prose for this repository. Treat them as guidance, not identity proof.",
|
|
28
|
+
system: "Follow these local voice constraints when writing prose. Do not expose private source text or claim authorship identity from them.",
|
|
29
|
+
}[format];
|
|
30
|
+
const lines = [
|
|
31
|
+
header,
|
|
32
|
+
"",
|
|
33
|
+
preface,
|
|
34
|
+
"",
|
|
35
|
+
"## Summary",
|
|
36
|
+
"",
|
|
37
|
+
...profile.guidance.summary.map((item) => `- ${item}`),
|
|
38
|
+
"",
|
|
39
|
+
"## Feature Families",
|
|
40
|
+
"",
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
for (const [name, family] of Object.entries(profile.families)) {
|
|
44
|
+
lines.push(`- ${name}: ${family.confidence} confidence; ${family.revisionHandles[0]}`);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
lines.push("", "## Drafting Rules", "");
|
|
48
|
+
for (const rule of profile.guidance.draftingRules) {
|
|
49
|
+
lines.push(`- ${rule}`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
lines.push("", "## Avoid", "");
|
|
53
|
+
for (const item of profile.guidance.avoid) {
|
|
54
|
+
lines.push(`- ${item}`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (profile.guidance.examples.length) {
|
|
58
|
+
lines.push("", "## Source-Backed Examples", "");
|
|
59
|
+
for (const example of profile.guidance.examples) {
|
|
60
|
+
lines.push(`- ${example}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return `${lines.join("\n")}\n`;
|
|
65
|
+
}
|