dravoice 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +126 -37
- package/bin/dravoice.js +11 -10
- package/package.json +47 -45
- package/src/index.js +967 -197
- package/src/v2/analyzers/discourse.js +69 -63
- package/src/v2/analyzers/evidence.js +82 -82
- package/src/v2/analyzers/lexical.js +114 -114
- package/src/v2/analyzers/register.js +70 -34
- package/src/v2/analyzers/rhetorical-shape.js +65 -59
- package/src/v2/analyzers/rhythm.js +39 -47
- package/src/v2/analyzers/structure.js +41 -24
- package/src/v2/benchmark.js +657 -568
- package/src/v2/brief.js +154 -146
- package/src/v2/config.js +78 -0
- package/src/v2/doctor.js +308 -0
- package/src/v2/document-model.js +422 -260
- package/src/v2/inspect.js +67 -67
- package/src/v2/io-utils.js +51 -0
- package/src/v2/profile.js +342 -203
- package/src/v2/prompt.js +65 -64
- package/src/v2/review.js +303 -173
- package/src/v2/revise-plan.js +540 -433
- package/src/v2/stylometry.js +346 -332
- package/src/v2/text-utils.js +123 -123
package/src/v2/profile.js
CHANGED
|
@@ -1,203 +1,342 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import { analyzeDiscourse } from "./analyzers/discourse.js";
|
|
4
|
-
import { analyzeEvidence } from "./analyzers/evidence.js";
|
|
5
|
-
import { analyzeLexical } from "./analyzers/lexical.js";
|
|
6
|
-
import { analyzeRegister } from "./analyzers/register.js";
|
|
7
|
-
import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
|
|
8
|
-
import { analyzeRhythm } from "./analyzers/rhythm.js";
|
|
9
|
-
import { analyzeStructure } from "./analyzers/structure.js";
|
|
10
|
-
import { loadDocuments } from "./document-model.js";
|
|
11
|
-
import {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
},
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
return {
|
|
181
|
-
|
|
182
|
-
`
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
],
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { analyzeDiscourse } from "./analyzers/discourse.js";
|
|
4
|
+
import { analyzeEvidence } from "./analyzers/evidence.js";
|
|
5
|
+
import { analyzeLexical } from "./analyzers/lexical.js";
|
|
6
|
+
import { analyzeRegister } from "./analyzers/register.js";
|
|
7
|
+
import { analyzeRhetoricalShape } from "./analyzers/rhetorical-shape.js";
|
|
8
|
+
import { analyzeRhythm } from "./analyzers/rhythm.js";
|
|
9
|
+
import { analyzeStructure } from "./analyzers/structure.js";
|
|
10
|
+
import { loadDocuments } from "./document-model.js";
|
|
11
|
+
import { readJsonFileBounded, writeUtf8FileSafely } from "./io-utils.js";
|
|
12
|
+
import {
|
|
13
|
+
STYLOMETRIC_REFERENCES,
|
|
14
|
+
defaultStyleThresholds,
|
|
15
|
+
distanceByFamily,
|
|
16
|
+
percentile,
|
|
17
|
+
stabilityFromDistances,
|
|
18
|
+
} from "./stylometry.js";
|
|
19
|
+
|
|
20
|
+
export function learnVoicePackV2({ examplesDir, outDir, excludePaths = [], registerMarkers }) {
|
|
21
|
+
const documents = loadDocuments({ examplesDir, excludePaths });
|
|
22
|
+
const profile = buildVoiceProfileV2({ documents, registerMarkers });
|
|
23
|
+
if (outDir) {
|
|
24
|
+
writeVoicePackV2(outDir, profile);
|
|
25
|
+
}
|
|
26
|
+
return profile;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function loadVoicePackV2(voiceDir) {
|
|
30
|
+
const profilePath = path.join(voiceDir, "profile.json");
|
|
31
|
+
if (!fs.existsSync(profilePath)) {
|
|
32
|
+
throw new Error(`No Dravoice V2 profile found at ${voiceDir}`);
|
|
33
|
+
}
|
|
34
|
+
const profile = readJsonFileBounded(profilePath, { label: "Dravoice profile", maxBytes: 2 * 1024 * 1024 });
|
|
35
|
+
if (profile.schemaVersion !== 2) {
|
|
36
|
+
throw new Error(`Expected a Dravoice V2 profile at ${voiceDir}; found schemaVersion ${profile.schemaVersion ?? "unknown"}.`);
|
|
37
|
+
}
|
|
38
|
+
validateVoiceProfileV2(profile, voiceDir);
|
|
39
|
+
return profile;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function buildVoiceProfileV2({ documents, registerMarkers }) {
|
|
43
|
+
const source = sourceSummary(documents);
|
|
44
|
+
const families = analyzeFeatureFamilies(documents, { registerMarkers });
|
|
45
|
+
const styleThresholds = styleThresholdsFor(documents, families, { registerMarkers });
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
schemaVersion: 2,
|
|
49
|
+
generatedBy: "dravoice-v2",
|
|
50
|
+
tool: { name: "Dravoice", cli: "drav" },
|
|
51
|
+
source,
|
|
52
|
+
families,
|
|
53
|
+
guidance: guidanceFor({ source, families }),
|
|
54
|
+
calibration: {
|
|
55
|
+
featureStability: Object.fromEntries(Object.entries(families).map(([name, family]) => [name, family.confidence])),
|
|
56
|
+
tolerances: {
|
|
57
|
+
rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
|
|
58
|
+
evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
|
|
59
|
+
},
|
|
60
|
+
styleThresholds,
|
|
61
|
+
familyDiagnostics: familyCalibrationDiagnostics({ source, families, styleThresholds }),
|
|
62
|
+
minimumDraftSize: {
|
|
63
|
+
words: source.confidence.band === "weak" ? 25 : 35,
|
|
64
|
+
sentences: source.confidence.band === "weak" ? 3 : 4,
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function analyzeFeatureFamilies(documents, { registerMarkers } = {}) {
|
|
71
|
+
return {
|
|
72
|
+
rhythm: analyzeRhythm(documents),
|
|
73
|
+
lexical: analyzeLexical(documents),
|
|
74
|
+
register: analyzeRegister(documents, registerMarkers ? { markers: registerMarkers } : undefined),
|
|
75
|
+
discourse: analyzeDiscourse(documents),
|
|
76
|
+
rhetoricalShape: analyzeRhetoricalShape(documents),
|
|
77
|
+
evidence: analyzeEvidence(documents),
|
|
78
|
+
structure: analyzeStructure(documents),
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function styleThresholdsFor(documents, fallbackFamilies, { registerMarkers } = {}) {
|
|
83
|
+
const fallbackThresholds = defaultStyleThresholds();
|
|
84
|
+
const distancesByFamily = Object.fromEntries(Object.keys(fallbackFamilies).map((family) => [family, []]));
|
|
85
|
+
|
|
86
|
+
if (documents.length >= 2) {
|
|
87
|
+
for (let index = 0; index < documents.length; index += 1) {
|
|
88
|
+
const referenceDocuments = documents.filter((_, candidateIndex) => candidateIndex !== index);
|
|
89
|
+
const referenceFamilies = analyzeFeatureFamilies(referenceDocuments, { registerMarkers });
|
|
90
|
+
const heldoutFamilies = analyzeFeatureFamilies([documents[index]], { registerMarkers });
|
|
91
|
+
for (const family of Object.keys(fallbackFamilies)) {
|
|
92
|
+
distancesByFamily[family].push(distanceByFamily(
|
|
93
|
+
family,
|
|
94
|
+
referenceFamilies[family].features,
|
|
95
|
+
heldoutFamilies[family].features,
|
|
96
|
+
));
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (documents.length >= 5) {
|
|
102
|
+
for (let start = 0; start < documents.length; start += 1) {
|
|
103
|
+
const heldoutIndexes = new Set([start, (start + 1) % documents.length]);
|
|
104
|
+
const referenceDocuments = documents.filter((_, index) => !heldoutIndexes.has(index));
|
|
105
|
+
const heldoutDocuments = documents.filter((_, index) => heldoutIndexes.has(index));
|
|
106
|
+
const referenceFamilies = analyzeFeatureFamilies(referenceDocuments, { registerMarkers });
|
|
107
|
+
const heldoutFamilies = analyzeFeatureFamilies(heldoutDocuments, { registerMarkers });
|
|
108
|
+
for (const family of Object.keys(fallbackFamilies)) {
|
|
109
|
+
distancesByFamily[family].push(distanceByFamily(
|
|
110
|
+
family,
|
|
111
|
+
referenceFamilies[family].features,
|
|
112
|
+
heldoutFamilies[family].features,
|
|
113
|
+
));
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const families = {};
|
|
119
|
+
for (const family of Object.keys(fallbackFamilies)) {
|
|
120
|
+
const observations = distancesByFamily[family];
|
|
121
|
+
const observedThreshold = observations.length > 0 ? percentile(observations, 0.9) : 0;
|
|
122
|
+
const fallback = fallbackThresholds[family].threshold;
|
|
123
|
+
families[family] = {
|
|
124
|
+
threshold: Math.max(0.01, Math.min(0.95, observedThreshold || fallback)),
|
|
125
|
+
observations: observations.length,
|
|
126
|
+
stability: stabilityFromDistances(observations),
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
method: documents.length >= 5
|
|
132
|
+
? "leave-one-out-and-rotating-holdout-cosine-delta"
|
|
133
|
+
: "leave-one-out-cosine-delta",
|
|
134
|
+
references: STYLOMETRIC_REFERENCES,
|
|
135
|
+
families,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function writeVoicePackV2(outDir, profile) {
|
|
140
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
141
|
+
writeUtf8FileSafely(path.join(outDir, "profile.json"), `${JSON.stringify(profile, null, 2)}\n`);
|
|
142
|
+
writeUtf8FileSafely(
|
|
143
|
+
path.join(outDir, ".dravoice.yml"),
|
|
144
|
+
[
|
|
145
|
+
"schemaVersion: 2",
|
|
146
|
+
"generatedBy: dravoice-v2",
|
|
147
|
+
`confidence: ${profile.source.confidence.band}`,
|
|
148
|
+
"",
|
|
149
|
+
].join("\n"),
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function validateVoiceProfileV2(profile, voiceDir) {
|
|
154
|
+
assertObject(profile.source, "profile.source", voiceDir);
|
|
155
|
+
assertObject(profile.source.confidence, "profile.source.confidence", voiceDir);
|
|
156
|
+
assertObject(profile.families, "profile.families", voiceDir);
|
|
157
|
+
for (const family of ["rhythm", "lexical", "register", "discourse", "rhetoricalShape", "evidence", "structure"]) {
|
|
158
|
+
assertObject(profile.families[family], `profile.families.${family}`, voiceDir);
|
|
159
|
+
assertObject(profile.families[family].features, `profile.families.${family}.features`, voiceDir);
|
|
160
|
+
}
|
|
161
|
+
assertObject(profile.guidance, "profile.guidance", voiceDir);
|
|
162
|
+
if (!Array.isArray(profile.guidance.summary) || !Array.isArray(profile.guidance.draftingRules) || !Array.isArray(profile.guidance.avoid)) {
|
|
163
|
+
throw new Error(`Invalid Dravoice V2 profile at ${voiceDir}: profile.guidance must include summary, draftingRules, and avoid arrays.`);
|
|
164
|
+
}
|
|
165
|
+
assertObject(profile.calibration, "profile.calibration", voiceDir);
|
|
166
|
+
assertObject(profile.calibration.minimumDraftSize, "profile.calibration.minimumDraftSize", voiceDir);
|
|
167
|
+
assertObject(profile.calibration.styleThresholds, "profile.calibration.styleThresholds", voiceDir);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function assertObject(value, field, voiceDir) {
|
|
171
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
172
|
+
throw new Error(`Invalid Dravoice V2 profile at ${voiceDir}: missing ${field}.`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function sourceSummary(documents) {
|
|
177
|
+
const wordCount = documents.reduce((sum, document) => sum + document.wordCount, 0);
|
|
178
|
+
const sentenceCount = documents.reduce((sum, document) => sum + document.sentences.length, 0);
|
|
179
|
+
const confidence = confidenceFor(documents.length, sentenceCount, wordCount);
|
|
180
|
+
return {
|
|
181
|
+
files: documents.map((document, index) => ({
|
|
182
|
+
id: `source-${index + 1}`,
|
|
183
|
+
extension: path.extname(document.path || document.file).toLowerCase() || ".txt",
|
|
184
|
+
wordCount: document.wordCount,
|
|
185
|
+
sentenceCount: document.sentences.length,
|
|
186
|
+
})),
|
|
187
|
+
documentCount: documents.length,
|
|
188
|
+
wordCount,
|
|
189
|
+
sentenceCount,
|
|
190
|
+
genres: [],
|
|
191
|
+
confidence,
|
|
192
|
+
quality: sourceQualityFor(documents),
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function sourceQualityFor(documents) {
|
|
197
|
+
const wordCounts = documents.map((document) => document.wordCount);
|
|
198
|
+
const sentenceCounts = documents.map((document) => document.sentences.length);
|
|
199
|
+
const fingerprints = new Map();
|
|
200
|
+
for (const document of documents) {
|
|
201
|
+
const fingerprint = documentFingerprint(document);
|
|
202
|
+
if (!fingerprint) {
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
fingerprints.set(fingerprint, (fingerprints.get(fingerprint) ?? 0) + 1);
|
|
206
|
+
}
|
|
207
|
+
const duplicateGroups = Array.from(fingerprints.values()).filter((count) => count > 1).length;
|
|
208
|
+
const minWords = minValue(wordCounts);
|
|
209
|
+
const maxWords = maxValue(wordCounts);
|
|
210
|
+
const warnings = [];
|
|
211
|
+
if (duplicateGroups > 0) {
|
|
212
|
+
warnings.push(`${duplicateGroups} duplicate-looking source group(s) detected; remove repeated drafts before trusting calibration.`);
|
|
213
|
+
}
|
|
214
|
+
if (documents.length >= 3 && minWords > 0 && maxWords / minWords >= 5) {
|
|
215
|
+
warnings.push(`Document length imbalance detected (${minWords}-${maxWords} words); long pieces may dominate the learned profile.`);
|
|
216
|
+
}
|
|
217
|
+
if (documents.some((document) => document.wordCount < 80)) {
|
|
218
|
+
warnings.push("One or more source files are very short; prefer representative long-form pieces.");
|
|
219
|
+
}
|
|
220
|
+
return {
|
|
221
|
+
lengthSpread: {
|
|
222
|
+
minWords,
|
|
223
|
+
maxWords,
|
|
224
|
+
minSentences: minValue(sentenceCounts),
|
|
225
|
+
maxSentences: maxValue(sentenceCounts),
|
|
226
|
+
},
|
|
227
|
+
duplicateGroups,
|
|
228
|
+
warnings,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function minValue(values) {
|
|
233
|
+
return values.length ? Math.min(...values) : 0;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function maxValue(values) {
|
|
237
|
+
return values.length ? Math.max(...values) : 0;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function documentFingerprint(document) {
|
|
241
|
+
return String(document.text ?? "")
|
|
242
|
+
.toLowerCase()
|
|
243
|
+
.replace(/\s+/g, " ")
|
|
244
|
+
.replace(/[^a-z0-9 ]+/g, "")
|
|
245
|
+
.trim();
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function familyCalibrationDiagnostics({ source, families, styleThresholds }) {
|
|
249
|
+
return Object.fromEntries(Object.keys(families).map((family) => {
|
|
250
|
+
const minimumEvidence = minimumEvidenceFor(family, source, families[family]);
|
|
251
|
+
const threshold = styleThresholds.families[family] ?? {};
|
|
252
|
+
return [family, {
|
|
253
|
+
confidence: families[family].confidence,
|
|
254
|
+
threshold: threshold.threshold ?? 0,
|
|
255
|
+
observations: threshold.observations ?? 0,
|
|
256
|
+
stability: threshold.stability ?? 0.45,
|
|
257
|
+
minimumEvidence,
|
|
258
|
+
usableForFindings: source.confidence.band !== "weak" &&
|
|
259
|
+
minimumEvidence.documentsMet &&
|
|
260
|
+
minimumEvidence.sentencesMet &&
|
|
261
|
+
minimumEvidence.wordsMet &&
|
|
262
|
+
(threshold.stability ?? 0.45) >= 0.35,
|
|
263
|
+
}];
|
|
264
|
+
}));
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function minimumEvidenceFor(family, source, familyData) {
|
|
268
|
+
const requirements = {
|
|
269
|
+
rhythm: { documents: 1, sentences: 8, words: 80 },
|
|
270
|
+
lexical: { documents: 1, sentences: 4, words: 120 },
|
|
271
|
+
register: { documents: 3, sentences: 8, words: 120 },
|
|
272
|
+
discourse: { documents: 1, sentences: 12, words: 120 },
|
|
273
|
+
rhetoricalShape: { documents: 1, sentences: 12, words: 120 },
|
|
274
|
+
evidence: { documents: 1, sentences: 12, words: 120 },
|
|
275
|
+
structure: { documents: 3, sentences: 8, words: 120 },
|
|
276
|
+
}[family] ?? { documents: 1, sentences: 1, words: 1 };
|
|
277
|
+
const wordCount = familyData.features?.wordCount ?? source.wordCount;
|
|
278
|
+
const sentenceCount = familyData.features?.sentenceCount ?? source.sentenceCount;
|
|
279
|
+
return {
|
|
280
|
+
requiredDocuments: requirements.documents,
|
|
281
|
+
requiredSentences: requirements.sentences,
|
|
282
|
+
requiredWords: requirements.words,
|
|
283
|
+
documentsMet: source.documentCount >= requirements.documents,
|
|
284
|
+
sentencesMet: sentenceCount >= requirements.sentences,
|
|
285
|
+
wordsMet: wordCount >= requirements.words,
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function confidenceFor(documentCount, sentenceCount, wordCount) {
|
|
290
|
+
if (documentCount >= 10 && sentenceCount >= 80 && wordCount >= 5000) {
|
|
291
|
+
return { band: "deep", message: "Enough material for stricter family-level review." };
|
|
292
|
+
}
|
|
293
|
+
if (documentCount >= 5 && sentenceCount >= 30) {
|
|
294
|
+
return { band: "strong", message: "Enough examples for stable family-level review." };
|
|
295
|
+
}
|
|
296
|
+
if (documentCount >= 3 && sentenceCount >= 10) {
|
|
297
|
+
return { band: "usable", message: "Enough text for prompt guidance and cautious review." };
|
|
298
|
+
}
|
|
299
|
+
return { band: "weak", message: "Limited corpus; use findings as weak signals." };
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function guidanceFor({ source, families }) {
|
|
303
|
+
const primaryRegister = families.register.features.primary.value;
|
|
304
|
+
const evidenceRate = families.evidence.features.evidenceSentenceRate;
|
|
305
|
+
const sentenceMedian = families.rhythm.features.sentenceWords.median;
|
|
306
|
+
const opening = families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ");
|
|
307
|
+
|
|
308
|
+
const draftingRules = [
|
|
309
|
+
`Keep sentence pacing near the learned median of ${sentenceMedian} words when it fits the draft.`,
|
|
310
|
+
`Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
|
|
311
|
+
evidenceRate > 0.35
|
|
312
|
+
? "Anchor broad claims with concrete scenes, numbers, quotes, citations, or specific examples."
|
|
313
|
+
: "Do not force evidence density higher than the source corpus supports.",
|
|
314
|
+
opening
|
|
315
|
+
? `Prefer opening moves compatible with: ${opening}.`
|
|
316
|
+
: "Start from the article's real subject rather than generic positioning.",
|
|
317
|
+
];
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
summary: [
|
|
321
|
+
`Local Dravoice V2 profile from ${source.documentCount} document(s), ${source.wordCount} words, confidence ${source.confidence.band}.`,
|
|
322
|
+
`Primary register signal: ${primaryRegister}.`,
|
|
323
|
+
],
|
|
324
|
+
draftingRules,
|
|
325
|
+
avoid: [
|
|
326
|
+
"Do not treat topic vocabulary as proof of voice fit.",
|
|
327
|
+
"Do not claim a draft is or is not the writer's true voice.",
|
|
328
|
+
"Do not invent concrete evidence to satisfy a style finding.",
|
|
329
|
+
],
|
|
330
|
+
examples: [],
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
function toleranceFor(band, deep, strong, weak) {
|
|
335
|
+
if (band === "deep") {
|
|
336
|
+
return deep;
|
|
337
|
+
}
|
|
338
|
+
if (band === "strong") {
|
|
339
|
+
return strong;
|
|
340
|
+
}
|
|
341
|
+
return weak;
|
|
342
|
+
}
|