dravoice 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -9
- package/package.json +1 -1
- package/src/index.js +113 -13
- package/src/v2/analyzers/discourse.js +7 -1
- package/src/v2/analyzers/evidence.js +3 -3
- package/src/v2/analyzers/register.js +28 -4
- package/src/v2/analyzers/rhetorical-shape.js +7 -1
- package/src/v2/analyzers/structure.js +109 -1
- package/src/v2/benchmark.js +83 -0
- package/src/v2/brief.js +41 -7
- package/src/v2/doctor.js +308 -0
- package/src/v2/document-model.js +78 -6
- package/src/v2/inspect.js +2 -2
- package/src/v2/profile.js +238 -19
- package/src/v2/prompt.js +10 -3
- package/src/v2/review.js +142 -16
- package/src/v2/revise-plan.js +111 -8
- package/src/v2/stylometry.js +11 -7
- package/src/v2/text-utils.js +5 -2
package/src/v2/profile.js
CHANGED
|
@@ -17,9 +17,9 @@ import {
|
|
|
17
17
|
stabilityFromDistances,
|
|
18
18
|
} from "./stylometry.js";
|
|
19
19
|
|
|
20
|
-
export function learnVoicePackV2({ examplesDir, outDir, excludePaths = [] }) {
|
|
20
|
+
export function learnVoicePackV2({ examplesDir, outDir, excludePaths = [], registerMarkers }) {
|
|
21
21
|
const documents = loadDocuments({ examplesDir, excludePaths });
|
|
22
|
-
const profile = buildVoiceProfileV2({ documents });
|
|
22
|
+
const profile = buildVoiceProfileV2({ documents, registerMarkers });
|
|
23
23
|
if (outDir) {
|
|
24
24
|
writeVoicePackV2(outDir, profile);
|
|
25
25
|
}
|
|
@@ -39,9 +39,10 @@ export function loadVoicePackV2(voiceDir) {
|
|
|
39
39
|
return profile;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
-
export function buildVoiceProfileV2({ documents }) {
|
|
42
|
+
export function buildVoiceProfileV2({ documents, registerMarkers }) {
|
|
43
43
|
const source = sourceSummary(documents);
|
|
44
|
-
const families = analyzeFeatureFamilies(documents);
|
|
44
|
+
const families = analyzeFeatureFamilies(documents, { registerMarkers });
|
|
45
|
+
const styleThresholds = styleThresholdsFor(documents, families, { registerMarkers });
|
|
45
46
|
|
|
46
47
|
return {
|
|
47
48
|
schemaVersion: 2,
|
|
@@ -56,7 +57,8 @@ export function buildVoiceProfileV2({ documents }) {
|
|
|
56
57
|
rhythmMedianWords: toleranceFor(source.confidence.band, 5, 8, 12),
|
|
57
58
|
evidenceRate: toleranceFor(source.confidence.band, 0.12, 0.18, 0.25),
|
|
58
59
|
},
|
|
59
|
-
styleThresholds
|
|
60
|
+
styleThresholds,
|
|
61
|
+
familyDiagnostics: familyCalibrationDiagnostics({ source, families, styleThresholds }),
|
|
60
62
|
minimumDraftSize: {
|
|
61
63
|
words: source.confidence.band === "weak" ? 25 : 35,
|
|
62
64
|
sentences: source.confidence.band === "weak" ? 3 : 4,
|
|
@@ -65,11 +67,11 @@ export function buildVoiceProfileV2({ documents }) {
|
|
|
65
67
|
};
|
|
66
68
|
}
|
|
67
69
|
|
|
68
|
-
function analyzeFeatureFamilies(documents) {
|
|
70
|
+
function analyzeFeatureFamilies(documents, { registerMarkers } = {}) {
|
|
69
71
|
return {
|
|
70
72
|
rhythm: analyzeRhythm(documents),
|
|
71
73
|
lexical: analyzeLexical(documents),
|
|
72
|
-
register: analyzeRegister(documents),
|
|
74
|
+
register: analyzeRegister(documents, registerMarkers ? { markers: registerMarkers } : undefined),
|
|
73
75
|
discourse: analyzeDiscourse(documents),
|
|
74
76
|
rhetoricalShape: analyzeRhetoricalShape(documents),
|
|
75
77
|
evidence: analyzeEvidence(documents),
|
|
@@ -77,15 +79,32 @@ function analyzeFeatureFamilies(documents) {
|
|
|
77
79
|
};
|
|
78
80
|
}
|
|
79
81
|
|
|
80
|
-
function styleThresholdsFor(documents, fallbackFamilies) {
|
|
82
|
+
function styleThresholdsFor(documents, fallbackFamilies, { registerMarkers } = {}) {
|
|
81
83
|
const fallbackThresholds = defaultStyleThresholds();
|
|
82
84
|
const distancesByFamily = Object.fromEntries(Object.keys(fallbackFamilies).map((family) => [family, []]));
|
|
83
85
|
|
|
84
86
|
if (documents.length >= 2) {
|
|
85
87
|
for (let index = 0; index < documents.length; index += 1) {
|
|
86
88
|
const referenceDocuments = documents.filter((_, candidateIndex) => candidateIndex !== index);
|
|
87
|
-
const referenceFamilies = analyzeFeatureFamilies(referenceDocuments);
|
|
88
|
-
const heldoutFamilies = analyzeFeatureFamilies([documents[index]]);
|
|
89
|
+
const referenceFamilies = analyzeFeatureFamilies(referenceDocuments, { registerMarkers });
|
|
90
|
+
const heldoutFamilies = analyzeFeatureFamilies([documents[index]], { registerMarkers });
|
|
91
|
+
for (const family of Object.keys(fallbackFamilies)) {
|
|
92
|
+
distancesByFamily[family].push(distanceByFamily(
|
|
93
|
+
family,
|
|
94
|
+
referenceFamilies[family].features,
|
|
95
|
+
heldoutFamilies[family].features,
|
|
96
|
+
));
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (documents.length >= 5) {
|
|
102
|
+
for (let start = 0; start < documents.length; start += 1) {
|
|
103
|
+
const heldoutIndexes = new Set([start, (start + 1) % documents.length]);
|
|
104
|
+
const referenceDocuments = documents.filter((_, index) => !heldoutIndexes.has(index));
|
|
105
|
+
const heldoutDocuments = documents.filter((_, index) => heldoutIndexes.has(index));
|
|
106
|
+
const referenceFamilies = analyzeFeatureFamilies(referenceDocuments, { registerMarkers });
|
|
107
|
+
const heldoutFamilies = analyzeFeatureFamilies(heldoutDocuments, { registerMarkers });
|
|
89
108
|
for (const family of Object.keys(fallbackFamilies)) {
|
|
90
109
|
distancesByFamily[family].push(distanceByFamily(
|
|
91
110
|
family,
|
|
@@ -109,7 +128,9 @@ function styleThresholdsFor(documents, fallbackFamilies) {
|
|
|
109
128
|
}
|
|
110
129
|
|
|
111
130
|
return {
|
|
112
|
-
method:
|
|
131
|
+
method: documents.length >= 5
|
|
132
|
+
? "leave-one-out-and-rotating-holdout-cosine-delta"
|
|
133
|
+
: "leave-one-out-cosine-delta",
|
|
113
134
|
references: STYLOMETRIC_REFERENCES,
|
|
114
135
|
families,
|
|
115
136
|
};
|
|
@@ -168,6 +189,100 @@ function sourceSummary(documents) {
|
|
|
168
189
|
sentenceCount,
|
|
169
190
|
genres: [],
|
|
170
191
|
confidence,
|
|
192
|
+
quality: sourceQualityFor(documents),
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function sourceQualityFor(documents) {
|
|
197
|
+
const wordCounts = documents.map((document) => document.wordCount);
|
|
198
|
+
const sentenceCounts = documents.map((document) => document.sentences.length);
|
|
199
|
+
const fingerprints = new Map();
|
|
200
|
+
for (const document of documents) {
|
|
201
|
+
const fingerprint = documentFingerprint(document);
|
|
202
|
+
if (!fingerprint) {
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
fingerprints.set(fingerprint, (fingerprints.get(fingerprint) ?? 0) + 1);
|
|
206
|
+
}
|
|
207
|
+
const duplicateGroups = Array.from(fingerprints.values()).filter((count) => count > 1).length;
|
|
208
|
+
const minWords = minValue(wordCounts);
|
|
209
|
+
const maxWords = maxValue(wordCounts);
|
|
210
|
+
const warnings = [];
|
|
211
|
+
if (duplicateGroups > 0) {
|
|
212
|
+
warnings.push(`${duplicateGroups} duplicate-looking source group(s) detected; remove repeated drafts before trusting calibration.`);
|
|
213
|
+
}
|
|
214
|
+
if (documents.length >= 3 && minWords > 0 && maxWords / minWords >= 5) {
|
|
215
|
+
warnings.push(`Document length imbalance detected (${minWords}-${maxWords} words); long pieces may dominate the learned profile.`);
|
|
216
|
+
}
|
|
217
|
+
if (documents.some((document) => document.wordCount < 80)) {
|
|
218
|
+
warnings.push("One or more source files are very short; prefer representative long-form pieces.");
|
|
219
|
+
}
|
|
220
|
+
return {
|
|
221
|
+
lengthSpread: {
|
|
222
|
+
minWords,
|
|
223
|
+
maxWords,
|
|
224
|
+
minSentences: minValue(sentenceCounts),
|
|
225
|
+
maxSentences: maxValue(sentenceCounts),
|
|
226
|
+
},
|
|
227
|
+
duplicateGroups,
|
|
228
|
+
warnings,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function minValue(values) {
|
|
233
|
+
return values.length ? Math.min(...values) : 0;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function maxValue(values) {
|
|
237
|
+
return values.length ? Math.max(...values) : 0;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function documentFingerprint(document) {
|
|
241
|
+
return String(document.text ?? "")
|
|
242
|
+
.toLowerCase()
|
|
243
|
+
.replace(/\s+/g, " ")
|
|
244
|
+
.replace(/[^a-z0-9 ]+/g, "")
|
|
245
|
+
.trim();
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function familyCalibrationDiagnostics({ source, families, styleThresholds }) {
|
|
249
|
+
return Object.fromEntries(Object.keys(families).map((family) => {
|
|
250
|
+
const minimumEvidence = minimumEvidenceFor(family, source, families[family]);
|
|
251
|
+
const threshold = styleThresholds.families[family] ?? {};
|
|
252
|
+
return [family, {
|
|
253
|
+
confidence: families[family].confidence,
|
|
254
|
+
threshold: threshold.threshold ?? 0,
|
|
255
|
+
observations: threshold.observations ?? 0,
|
|
256
|
+
stability: threshold.stability ?? 0.45,
|
|
257
|
+
minimumEvidence,
|
|
258
|
+
usableForFindings: source.confidence.band !== "weak" &&
|
|
259
|
+
minimumEvidence.documentsMet &&
|
|
260
|
+
minimumEvidence.sentencesMet &&
|
|
261
|
+
minimumEvidence.wordsMet &&
|
|
262
|
+
(threshold.stability ?? 0.45) >= 0.35,
|
|
263
|
+
}];
|
|
264
|
+
}));
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function minimumEvidenceFor(family, source, familyData) {
|
|
268
|
+
const requirements = {
|
|
269
|
+
rhythm: { documents: 1, sentences: 8, words: 80 },
|
|
270
|
+
lexical: { documents: 1, sentences: 4, words: 120 },
|
|
271
|
+
register: { documents: 3, sentences: 8, words: 120 },
|
|
272
|
+
discourse: { documents: 1, sentences: 12, words: 120 },
|
|
273
|
+
rhetoricalShape: { documents: 1, sentences: 12, words: 120 },
|
|
274
|
+
evidence: { documents: 1, sentences: 12, words: 120 },
|
|
275
|
+
structure: { documents: 3, sentences: 8, words: 120 },
|
|
276
|
+
}[family] ?? { documents: 1, sentences: 1, words: 1 };
|
|
277
|
+
const wordCount = familyData.features?.wordCount ?? source.wordCount;
|
|
278
|
+
const sentenceCount = familyData.features?.sentenceCount ?? source.sentenceCount;
|
|
279
|
+
return {
|
|
280
|
+
requiredDocuments: requirements.documents,
|
|
281
|
+
requiredSentences: requirements.sentences,
|
|
282
|
+
requiredWords: requirements.words,
|
|
283
|
+
documentsMet: source.documentCount >= requirements.documents,
|
|
284
|
+
sentencesMet: sentenceCount >= requirements.sentences,
|
|
285
|
+
wordsMet: wordCount >= requirements.words,
|
|
171
286
|
};
|
|
172
287
|
}
|
|
173
288
|
|
|
@@ -187,35 +302,139 @@ function confidenceFor(documentCount, sentenceCount, wordCount) {
|
|
|
187
302
|
function guidanceFor({ source, families }) {
|
|
188
303
|
const primaryRegister = families.register.features.primary.value;
|
|
189
304
|
const evidenceRate = families.evidence.features.evidenceSentenceRate;
|
|
190
|
-
const
|
|
191
|
-
const
|
|
305
|
+
const sentenceWords = families.rhythm.features.sentenceWords;
|
|
306
|
+
const registerMix = registerMixLabel(families.register.features.scores);
|
|
307
|
+
const openingPatterns = (families.rhetoricalShape.features.openingMovePatterns || [])
|
|
308
|
+
.slice(0, 3)
|
|
309
|
+
.map((item) => item.value)
|
|
310
|
+
.filter(Boolean);
|
|
192
311
|
|
|
193
312
|
const draftingRules = [
|
|
194
|
-
|
|
195
|
-
|
|
313
|
+
sentenceWords.count > 0
|
|
314
|
+
? `Vary sentence length across the corpus band of ${sentenceWords.p25}-${sentenceWords.p75} words (median ${sentenceWords.median}). Mix short and long sentences; do not hold a constant length.`
|
|
315
|
+
: "Vary sentence length; mix short and long sentences rather than holding a constant cadence.",
|
|
316
|
+
sentenceWords.stdev > 0
|
|
317
|
+
? `Keep sentence-length variation (burstiness) near the corpus standard deviation of about ${sentenceWords.stdev} words; flattening every sentence to the median reads as machine-made.`
|
|
318
|
+
: "Preserve natural variation in sentence length rather than flattening it to one value.",
|
|
319
|
+
registerMix
|
|
320
|
+
? `Default to the learned register mix (${registerMix}) instead of one fixed genre; let each piece lean differently within it.`
|
|
321
|
+
: `Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
|
|
196
322
|
evidenceRate > 0.35
|
|
197
323
|
? "Anchor broad claims with concrete scenes, numbers, quotes, citations, or specific examples."
|
|
198
324
|
: "Do not force evidence density higher than the source corpus supports.",
|
|
199
|
-
|
|
200
|
-
? `
|
|
201
|
-
: "Start from the article's real subject rather than generic positioning.",
|
|
325
|
+
openingPatterns.length
|
|
326
|
+
? `Rotate among the corpus's observed opening shapes (e.g. ${openingPatterns.join("; ")}) rather than reusing one opening every time.`
|
|
327
|
+
: "Start from the article's real subject rather than generic positioning, and vary openings across pieces.",
|
|
202
328
|
];
|
|
203
329
|
|
|
204
330
|
return {
|
|
205
331
|
summary: [
|
|
206
332
|
`Local Dravoice V2 profile from ${source.documentCount} document(s), ${source.wordCount} words, confidence ${source.confidence.band}.`,
|
|
207
|
-
`Primary register signal: ${primaryRegister}.`,
|
|
333
|
+
`Primary register signal: ${primaryRegister}${registerMix ? ` (mix: ${registerMix})` : ""}.`,
|
|
208
334
|
],
|
|
209
335
|
draftingRules,
|
|
336
|
+
formatting: formattingGuidance(families.structure.features),
|
|
210
337
|
avoid: [
|
|
211
338
|
"Do not treat topic vocabulary as proof of voice fit.",
|
|
212
339
|
"Do not claim a draft is or is not the writer's true voice.",
|
|
213
340
|
"Do not invent concrete evidence to satisfy a style finding.",
|
|
341
|
+
"Do not reuse the same outline, opening, or formatting on every piece; stay inside the learned ranges and vary within them.",
|
|
214
342
|
],
|
|
215
343
|
examples: [],
|
|
216
344
|
};
|
|
217
345
|
}
|
|
218
346
|
|
|
347
|
+
function registerMixLabel(scores) {
|
|
348
|
+
const active = (scores || []).filter((item) => item.score > 0).slice(0, 3);
|
|
349
|
+
if (active.length < 2) {
|
|
350
|
+
return "";
|
|
351
|
+
}
|
|
352
|
+
return active.map((item) => `${item.value} ${item.score}`).join(", ");
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function formattingGuidance(structure) {
|
|
356
|
+
const rules = [];
|
|
357
|
+
const headingCount = structure.headingCount || {};
|
|
358
|
+
if (headingCount.count > 0 && (headingCount.min !== headingCount.max)) {
|
|
359
|
+
rules.push(`Heading count varies across the corpus (${headingCount.min}-${headingCount.max}); pick a level of sectioning that fits the piece rather than a fixed template.`);
|
|
360
|
+
}
|
|
361
|
+
const patterns = (structure.sectionOrderPatterns || []).slice(0, 4).map((item) => item.value).filter(Boolean);
|
|
362
|
+
if (patterns.length) {
|
|
363
|
+
rules.push(`Observed section shapes to choose among (do not reuse one skeleton): ${patterns.join(" | ")}.`);
|
|
364
|
+
}
|
|
365
|
+
if (typeof structure.listDocumentRate === "number") {
|
|
366
|
+
rules.push(`Lists appear in about ${Math.round(structure.listDocumentRate * 100)}% of pieces and quotes in about ${Math.round((structure.quoteDocumentRate ?? 0) * 100)}%; use them where they fit, not on every draft.`);
|
|
367
|
+
}
|
|
368
|
+
return [
|
|
369
|
+
"Formatting and document structure are NOT the writer's voice and must not be reproduced as if they were. Voice lives in the Drafting Rules above (rhythm, diction, register, evidence). The items below are formatting habits to deliberately vary: do not justify keeping a repeated tic as \"authentic voice.\"",
|
|
370
|
+
...rules,
|
|
371
|
+
...antiTemplateGuidance(structure.templateTics),
|
|
372
|
+
...paletteGuidance(structure.formattingPalette),
|
|
373
|
+
];
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
const PALETTE_LABELS = {
|
|
377
|
+
blockquote: "blockquotes",
|
|
378
|
+
bulletList: "bullet lists",
|
|
379
|
+
orderedList: "numbered lists",
|
|
380
|
+
nestedList: "nested lists",
|
|
381
|
+
subHeading: "sub-headings (h3+)",
|
|
382
|
+
table: "tables",
|
|
383
|
+
codeBlock: "code blocks",
|
|
384
|
+
inlineCode: "inline code",
|
|
385
|
+
boldInline: "bold emphasis",
|
|
386
|
+
italicInline: "italic emphasis",
|
|
387
|
+
link: "links",
|
|
388
|
+
horizontalRule: "section dividers",
|
|
389
|
+
};
|
|
390
|
+
|
|
391
|
+
// Markdown offers far more range than the few devices a homogeneous corpus reaches for.
|
|
392
|
+
// Name what the corpus leans on and which devices it underuses, and push the model to
|
|
393
|
+
// broaden the palette where the content genuinely calls for it.
|
|
394
|
+
function paletteGuidance(palette) {
|
|
395
|
+
if (!palette) {
|
|
396
|
+
return [];
|
|
397
|
+
}
|
|
398
|
+
const leanedOn = Object.keys(palette).filter((device) => palette[device] >= 0.6);
|
|
399
|
+
const underused = Object.keys(palette).filter((device) => palette[device] <= 0.2);
|
|
400
|
+
const rules = [];
|
|
401
|
+
if (leanedOn.length) {
|
|
402
|
+
rules.push(`The corpus's formatting palette is narrow: it leans on ${labelList(leanedOn)}. Treat that as a habit to widen, not a target to hit.`);
|
|
403
|
+
}
|
|
404
|
+
if (underused.length) {
|
|
405
|
+
rules.push(`Markdown devices the corpus rarely or never uses: ${labelList(underused)}. Reach for these where the content fits (a comparison wants a table, steps want numbered items, a definition wants inline code, an aside wants italics) so pieces do not all share one formatting shape.`);
|
|
406
|
+
}
|
|
407
|
+
return rules;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function labelList(devices) {
|
|
411
|
+
return devices.map((device) => PALETTE_LABELS[device] ?? device).join(", ");
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// When the corpus over-relies on a formatting template, faithfully reproducing it is
|
|
415
|
+
// what makes generated pieces look copy-pasted. This guidance is prescriptive (injected
|
|
416
|
+
// to break the habit), not a learned pattern to match — surfaced only when a tic
|
|
417
|
+
// dominates the corpus, so varied corpora are left alone.
|
|
418
|
+
function antiTemplateGuidance(tics) {
|
|
419
|
+
if (!tics) {
|
|
420
|
+
return [];
|
|
421
|
+
}
|
|
422
|
+
const rules = [];
|
|
423
|
+
if (tics.singleSentenceParagraphRate >= 0.35) {
|
|
424
|
+
rules.push(`Watch a strong template tic: about ${Math.round(tics.singleSentenceParagraphRate * 100)}% of corpus paragraphs are a single sentence. This staccato one-line-paragraph cadence is the loudest "copy-paste" tell. Deliberately group related sentences into multi-sentence paragraphs; reserve one-line paragraphs for genuine emphasis.`);
|
|
425
|
+
}
|
|
426
|
+
if (tics.titleCaseHeadingRate >= 0.6) {
|
|
427
|
+
rules.push(`Watch a template tic: about ${Math.round(tics.titleCaseHeadingRate * 100)}% of headings are Title Case. Vary heading style (sentence case, questions, or no headings at all) instead of the same Title-Case section labels every time.`);
|
|
428
|
+
}
|
|
429
|
+
if (tics.ledeBlockquoteRate >= 0.5) {
|
|
430
|
+
rules.push(`Watch a template tic: about ${Math.round(tics.ledeBlockquoteRate * 100)}% of pieces drop a pull-quote (blockquote) into the intro before the first heading. Do not open with an aphoristic blockquote by default; let most pieces earn a quote later or skip it.`);
|
|
431
|
+
}
|
|
432
|
+
if (tics.sectionListRate >= 0.45) {
|
|
433
|
+
rules.push(`Watch a template tic: about ${Math.round(tics.sectionListRate * 100)}% of sections contain a bullet list. Do not put a list in section after section; carry most points in connected prose and reserve lists for genuinely enumerable material.`);
|
|
434
|
+
}
|
|
435
|
+
return rules;
|
|
436
|
+
}
|
|
437
|
+
|
|
219
438
|
function toleranceFor(band, deep, strong, weak) {
|
|
220
439
|
if (band === "deep") {
|
|
221
440
|
return deep;
|
package/src/v2/prompt.js
CHANGED
|
@@ -23,9 +23,9 @@ function renderPrompt(profile, format) {
|
|
|
23
23
|
system: "System writing guidance: Dravoice V2",
|
|
24
24
|
}[format];
|
|
25
25
|
const preface = {
|
|
26
|
-
agents: "Use this as local, inspectable
|
|
27
|
-
claude: "Use these project-local voice notes
|
|
28
|
-
system: "Follow these local voice constraints
|
|
26
|
+
agents: "Use this as local guidance that complements AI by preserving reusable, inspectable constraints from the writer's own corpus. It is not an AI detector or a license to imitate a third party.",
|
|
27
|
+
claude: "Use these project-local voice notes as guidance that complements AI by preserving reusable, inspectable constraints for drafting or reviewing prose. Treat them as guidance, not identity proof.",
|
|
28
|
+
system: "Follow these local voice constraints as reusable, inspectable guidance for AI-assisted prose. Do not expose private source text or claim authorship identity from them.",
|
|
29
29
|
}[format];
|
|
30
30
|
const lines = [
|
|
31
31
|
header,
|
|
@@ -49,6 +49,13 @@ function renderPrompt(profile, format) {
|
|
|
49
49
|
lines.push(`- ${rule}`);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
+
if (profile.guidance.formatting && profile.guidance.formatting.length) {
|
|
53
|
+
lines.push("", "## Formatting: Vary Deliberately (Not Voice)", "");
|
|
54
|
+
for (const rule of profile.guidance.formatting) {
|
|
55
|
+
lines.push(`- ${rule}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
52
59
|
lines.push("", "## Avoid", "");
|
|
53
60
|
for (const item of profile.guidance.avoid) {
|
|
54
61
|
lines.push(`- ${item}`);
|
package/src/v2/review.js
CHANGED
|
@@ -10,11 +10,11 @@ const REVIEW_MODES = {
|
|
|
10
10
|
exitOnDrift: false,
|
|
11
11
|
},
|
|
12
12
|
balanced: {
|
|
13
|
-
findingThresholds: { evidence: 65, rhythm: 55, rhetoricalShape: 50 },
|
|
13
|
+
findingThresholds: { evidence: 65, rhythm: 55, rhetoricalShape: 50, discourse: 55, lexical: 55, register: 55, structure: 55 },
|
|
14
14
|
exitOnDrift: false,
|
|
15
15
|
},
|
|
16
16
|
strict: {
|
|
17
|
-
findingThresholds: { evidence: 75, rhythm: 70, rhetoricalShape: 65 },
|
|
17
|
+
findingThresholds: { evidence: 75, rhythm: 70, rhetoricalShape: 65, discourse: 65, lexical: 65, register: 70, structure: 91 },
|
|
18
18
|
exitOnDrift: true,
|
|
19
19
|
},
|
|
20
20
|
};
|
|
@@ -34,6 +34,7 @@ export function reviewVoiceDraftV2({ file, voice, cwd = process.cwd(), mode = "b
|
|
|
34
34
|
draftDocument.wordCount < sourceProfile.calibration.minimumDraftSize.words ||
|
|
35
35
|
draftDocument.sentences.length < sourceProfile.calibration.minimumDraftSize.sentences
|
|
36
36
|
) {
|
|
37
|
+
const familyConfidence = familyConfidenceFor(sourceProfile, {});
|
|
37
38
|
return {
|
|
38
39
|
schemaVersion: 2,
|
|
39
40
|
file: displayPath(filePath, cwd),
|
|
@@ -41,6 +42,8 @@ export function reviewVoiceDraftV2({ file, voice, cwd = process.cwd(), mode = "b
|
|
|
41
42
|
mode: reviewMode,
|
|
42
43
|
fit: { band: "insufficient-evidence", distance: 0 },
|
|
43
44
|
familyScores: {},
|
|
45
|
+
familyConfidence,
|
|
46
|
+
suppressedFindings: suppressedFindingsFor(sourceProfile, familyConfidence),
|
|
44
47
|
corpusConfidence: sourceProfile.source.confidence,
|
|
45
48
|
},
|
|
46
49
|
findings: [],
|
|
@@ -51,7 +54,8 @@ export function reviewVoiceDraftV2({ file, voice, cwd = process.cwd(), mode = "b
|
|
|
51
54
|
const familyDiagnostics = familyDiagnosticsFor(sourceProfile, draftProfile);
|
|
52
55
|
const familyScores = Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.score]));
|
|
53
56
|
const distance = styleDistanceFromDiagnostics(familyDiagnostics);
|
|
54
|
-
const
|
|
57
|
+
const familyConfidence = familyConfidenceFor(sourceProfile, familyDiagnostics);
|
|
58
|
+
const findings = reviewFindings(sourceProfile, draftProfile, familyScores, reviewMode, familyConfidence);
|
|
55
59
|
const fit = {
|
|
56
60
|
band: fitBand(distance, findings, familyDiagnostics),
|
|
57
61
|
distance,
|
|
@@ -64,6 +68,8 @@ export function reviewVoiceDraftV2({ file, voice, cwd = process.cwd(), mode = "b
|
|
|
64
68
|
mode: reviewMode,
|
|
65
69
|
fit,
|
|
66
70
|
familyScores,
|
|
71
|
+
familyConfidence,
|
|
72
|
+
suppressedFindings: suppressedFindingsFor(sourceProfile, familyConfidence),
|
|
67
73
|
familyDistances: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.distance])),
|
|
68
74
|
familyDrift: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.drift])),
|
|
69
75
|
thresholds: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.threshold])),
|
|
@@ -76,71 +82,175 @@ export function reviewVoiceDraftV2({ file, voice, cwd = process.cwd(), mode = "b
|
|
|
76
82
|
|
|
77
83
|
export function renderVoiceReviewV2(result) {
|
|
78
84
|
const lines = [
|
|
79
|
-
"Voice
|
|
85
|
+
"Voice review notes, not AI detection.",
|
|
80
86
|
"",
|
|
81
87
|
result.file,
|
|
82
|
-
`
|
|
88
|
+
`Fit: ${capitalize(result.summary.fit.band)} (${result.summary.fit.distance} style distance)`,
|
|
83
89
|
`Corpus confidence: ${capitalize(result.summary.corpusConfidence.band)} - ${result.summary.corpusConfidence.message}`,
|
|
84
90
|
"Family scores:",
|
|
85
91
|
];
|
|
86
92
|
|
|
87
93
|
for (const [family, score] of Object.entries(result.summary.familyScores)) {
|
|
88
|
-
lines.push(`- ${family}: ${score}`);
|
|
94
|
+
lines.push(`- ${familyLabel(family)}: ${score}`);
|
|
89
95
|
}
|
|
90
96
|
|
|
91
97
|
lines.push("");
|
|
92
98
|
if (result.findings.length === 0) {
|
|
93
|
-
lines.push("No high-confidence
|
|
99
|
+
lines.push("No high-confidence voice drift findings.");
|
|
100
|
+
appendSuppressedFindings(lines, result.summary.suppressedFindings);
|
|
94
101
|
lines.push("");
|
|
95
102
|
return lines.join("\n");
|
|
96
103
|
}
|
|
97
104
|
|
|
98
105
|
lines.push("Start here:");
|
|
99
|
-
|
|
100
|
-
lines.push(`${
|
|
101
|
-
|
|
102
|
-
|
|
106
|
+
result.findings.slice(0, 6).forEach((finding, index) => {
|
|
107
|
+
lines.push(`${index + 1}. ${priorityLabel(finding.priority)} ${familyLabel(finding.family)}`);
|
|
108
|
+
if (finding.confidence) {
|
|
109
|
+
lines.push(`Confidence: ${capitalize(finding.confidence.band)}; stability ${finding.confidence.stability}`);
|
|
110
|
+
}
|
|
111
|
+
lines.push(`Why: ${finding.why}`);
|
|
112
|
+
lines.push(`Do this: ${finding.action}`);
|
|
103
113
|
lines.push("");
|
|
104
|
-
}
|
|
114
|
+
});
|
|
115
|
+
appendSuppressedFindings(lines, result.summary.suppressedFindings);
|
|
105
116
|
return lines.join("\n");
|
|
106
117
|
}
|
|
107
118
|
|
|
108
|
-
function reviewFindings(source, draft, scores, mode) {
|
|
119
|
+
function reviewFindings(source, draft, scores, mode, familyConfidence) {
|
|
109
120
|
const modeConfig = REVIEW_MODES[mode];
|
|
110
121
|
if (source.source?.confidence?.band === "weak" || !modeConfig.findingThresholds) {
|
|
111
122
|
return [];
|
|
112
123
|
}
|
|
113
124
|
const findings = [];
|
|
114
|
-
if (
|
|
125
|
+
if (shouldFlag("evidence", scores, modeConfig, familyConfidence)) {
|
|
115
126
|
findings.push({
|
|
116
127
|
id: "v2.evidence-drift",
|
|
117
128
|
family: "evidence",
|
|
118
129
|
priority: "review",
|
|
130
|
+
confidence: familyConfidence.evidence,
|
|
119
131
|
why: `Source evidence sentence rate is ${source.families.evidence.features.evidenceSentenceRate}; draft rate is ${draft.families.evidence.features.evidenceSentenceRate}.`,
|
|
120
132
|
action: "Add concrete support before broad claims: a scene, quote, number, citation, URL, sensory detail, or specific example.",
|
|
121
133
|
});
|
|
122
134
|
}
|
|
123
|
-
if (
|
|
135
|
+
if (shouldFlag("rhythm", scores, modeConfig, familyConfidence)) {
|
|
124
136
|
findings.push({
|
|
125
137
|
id: "v2.rhythm-drift",
|
|
126
138
|
family: "rhythm",
|
|
127
139
|
priority: "consider",
|
|
140
|
+
confidence: familyConfidence.rhythm,
|
|
128
141
|
why: `Source median sentence/paragraph length is ${source.families.rhythm.features.sentenceWords.median}/${source.families.rhythm.features.paragraphWords.median}; draft is ${draft.families.rhythm.features.sentenceWords.median}/${draft.families.rhythm.features.paragraphWords.median}.`,
|
|
129
142
|
action: "Revise sentence and paragraph pacing toward the learned range.",
|
|
130
143
|
});
|
|
131
144
|
}
|
|
132
|
-
if (
|
|
145
|
+
if (shouldFlag("rhetoricalShape", scores, modeConfig, familyConfidence)) {
|
|
133
146
|
findings.push({
|
|
134
147
|
id: "v2.shape-drift",
|
|
135
148
|
family: "rhetoricalShape",
|
|
136
149
|
priority: "consider",
|
|
150
|
+
confidence: familyConfidence.rhetoricalShape,
|
|
137
151
|
why: `Source opening shape is ${source.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ")}; draft opening shape is ${draft.families.rhetoricalShape.features.openingMoves.slice(0, 3).join(" -> ")}.`,
|
|
138
152
|
action: "Rework the opening so it uses a compatible scene, claim, contrast, reflection, or example sequence.",
|
|
139
153
|
});
|
|
140
154
|
}
|
|
155
|
+
if (shouldFlag("discourse", scores, modeConfig, familyConfidence)) {
|
|
156
|
+
findings.push({
|
|
157
|
+
id: "v2.discourse-drift",
|
|
158
|
+
family: "discourse",
|
|
159
|
+
priority: "consider",
|
|
160
|
+
confidence: familyConfidence.discourse,
|
|
161
|
+
why: `Source transition rates are ${rateMapLabel(source.families.discourse.features.transitionRates)}; draft rates are ${rateMapLabel(draft.families.discourse.features.transitionRates)}.`,
|
|
162
|
+
action: "Revise repeated sentence turns, callbacks, and transitions so the draft does not lean on a different discourse pattern.",
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
if (shouldFlag("lexical", scores, modeConfig, familyConfidence)) {
|
|
166
|
+
findings.push({
|
|
167
|
+
id: "v2.lexical-drift",
|
|
168
|
+
family: "lexical",
|
|
169
|
+
priority: "consider",
|
|
170
|
+
confidence: familyConfidence.lexical,
|
|
171
|
+
why: `Function-word, masked character, punctuation, or boundary-token habits drift from the calibrated source profile.`,
|
|
172
|
+
action: "Revise diction and punctuation where it improves the article; do not stuff source topic words or add artificial imperfections.",
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
if (shouldFlag("register", scores, modeConfig, familyConfidence)) {
|
|
176
|
+
findings.push({
|
|
177
|
+
id: "v2.register-drift",
|
|
178
|
+
family: "register",
|
|
179
|
+
priority: "consider",
|
|
180
|
+
confidence: familyConfidence.register,
|
|
181
|
+
why: `Source primary register is ${source.families.register.features.primary.value}; draft primary register is ${draft.families.register.features.primary.value}.`,
|
|
182
|
+
action: "Bring the stance closer to the learned genre mix while preserving the draft's real subject and audience.",
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
if (shouldFlag("structure", scores, modeConfig, familyConfidence)) {
|
|
186
|
+
findings.push({
|
|
187
|
+
id: "v2.structure-drift",
|
|
188
|
+
family: "structure",
|
|
189
|
+
priority: "consider",
|
|
190
|
+
confidence: familyConfidence.structure,
|
|
191
|
+
why: `Source heading/list/quote and section-size patterns differ from the draft's document structure.`,
|
|
192
|
+
action: "Adjust section shape, opening order, list use, or quote placement only where the article benefits from that structure.",
|
|
193
|
+
});
|
|
194
|
+
}
|
|
141
195
|
return findings;
|
|
142
196
|
}
|
|
143
197
|
|
|
198
|
+
function shouldFlag(family, scores, modeConfig, familyConfidence) {
|
|
199
|
+
return familyConfidence[family]?.usableForFindings &&
|
|
200
|
+
Number.isFinite(scores[family]) &&
|
|
201
|
+
scores[family] < modeConfig.findingThresholds[family];
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function familyConfidenceFor(sourceProfile, familyDiagnostics) {
|
|
205
|
+
const profileDiagnostics = sourceProfile.calibration?.familyDiagnostics ?? {};
|
|
206
|
+
return Object.fromEntries(Object.keys(sourceProfile.families ?? {}).map((family) => {
|
|
207
|
+
const profile = profileDiagnostics[family] ?? {};
|
|
208
|
+
const runtime = familyDiagnostics[family] ?? {};
|
|
209
|
+
return [family, {
|
|
210
|
+
band: sourceProfile.families[family]?.confidence ?? "low",
|
|
211
|
+
stability: runtime.stability ?? profile.stability ?? 0.45,
|
|
212
|
+
observations: profile.observations ?? runtime.observations ?? 0,
|
|
213
|
+
usableForFindings: profile.usableForFindings ?? sourceProfile.source?.confidence?.band !== "weak",
|
|
214
|
+
minimumEvidence: profile.minimumEvidence ?? null,
|
|
215
|
+
}];
|
|
216
|
+
}));
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function suppressedFindingsFor(sourceProfile, familyConfidence) {
|
|
220
|
+
return Object.entries(familyConfidence)
|
|
221
|
+
.filter(([, confidence]) => !confidence.usableForFindings)
|
|
222
|
+
.map(([family, confidence]) => ({
|
|
223
|
+
family,
|
|
224
|
+
reason: suppressedReasonFor(sourceProfile, confidence),
|
|
225
|
+
}));
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function suppressedReasonFor(sourceProfile, confidence) {
|
|
229
|
+
if (sourceProfile.source?.confidence?.band === "weak") {
|
|
230
|
+
return "weak corpus confidence; add more representative source documents before trusting this family.";
|
|
231
|
+
}
|
|
232
|
+
const minimum = confidence.minimumEvidence;
|
|
233
|
+
if (minimum && (!minimum.documentsMet || !minimum.sentencesMet || !minimum.wordsMet)) {
|
|
234
|
+
return `minimum evidence not met; requires ${minimum.requiredDocuments} document(s), ${minimum.requiredSentences} sentence(s), and ${minimum.requiredWords} word(s).`;
|
|
235
|
+
}
|
|
236
|
+
return "calibration stability is too low for a high-confidence finding.";
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function appendSuppressedFindings(lines, suppressedFindings = []) {
|
|
240
|
+
if (!suppressedFindings.length) {
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
lines.push("");
|
|
244
|
+
lines.push("Suppressed findings:");
|
|
245
|
+
suppressedFindings.slice(0, 6).forEach((item) => {
|
|
246
|
+
lines.push(`- ${familyLabel(item.family)}: ${item.reason}`);
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function rateMapLabel(value = {}) {
|
|
251
|
+
return Object.entries(value).map(([key, rate]) => `${key}:${rate}`).join(", ") || "none";
|
|
252
|
+
}
|
|
253
|
+
|
|
144
254
|
function normalizeReviewMode(mode) {
|
|
145
255
|
const normalized = String(mode ?? "balanced").toLowerCase();
|
|
146
256
|
if (!Object.hasOwn(REVIEW_MODES, normalized)) {
|
|
@@ -175,3 +285,19 @@ function resolvePath(cwd, value) {
|
|
|
175
285
|
function capitalize(value) {
|
|
176
286
|
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
177
287
|
}
|
|
288
|
+
|
|
289
|
+
function familyLabel(family) {
|
|
290
|
+
return {
|
|
291
|
+
evidence: "Evidence",
|
|
292
|
+
rhythm: "Rhythm",
|
|
293
|
+
rhetoricalShape: "Rhetorical shape",
|
|
294
|
+
discourse: "Discourse",
|
|
295
|
+
lexical: "Lexical style",
|
|
296
|
+
register: "Register",
|
|
297
|
+
structure: "Structure",
|
|
298
|
+
}[family] ?? capitalize(family);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function priorityLabel(priority) {
|
|
302
|
+
return priority === "review" ? "Review" : "Consider";
|
|
303
|
+
}
|