dravoice 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/package.json +1 -1
- package/src/index.js +7 -0
- package/src/v2/analyzers/structure.js +92 -1
- package/src/v2/brief.js +41 -7
- package/src/v2/document-model.js +1 -0
- package/src/v2/profile.js +112 -8
- package/src/v2/prompt.js +10 -3
- package/src/v2/text-utils.js +5 -2
package/README.md
CHANGED
|
@@ -15,6 +15,13 @@ Start with your own writing, not a blank prompt. Create an `articles`
|
|
|
15
15
|
directory in the folder where you want to run Dravoice, then copy in at least
|
|
16
16
|
3 representative long-form Markdown, MDX, or plain-text pieces.
|
|
17
17
|
|
|
18
|
+
## Why Not Just Paste Examples Into AI?
|
|
19
|
+
|
|
20
|
+
That can be fine for casual one-off drafts. Dravoice is for repeatable
|
|
21
|
+
long-form workflows where you want inspection, evidence grounding, and revision diagnostics
|
|
22
|
+
from local, inspectable guidance instead of asking a model to infer your style
|
|
23
|
+
from scratch every time.
|
|
24
|
+
|
|
18
25
|
```bash
|
|
19
26
|
mkdir -p articles
|
|
20
27
|
```
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -756,6 +756,9 @@ function helpText() {
|
|
|
756
756
|
return [
|
|
757
757
|
"Dravoice - local-first voice guidance for writers",
|
|
758
758
|
"",
|
|
759
|
+
"Why not just paste examples into AI?",
|
|
760
|
+
"Dravoice complements AI by turning your own corpus into inspectable, repeatable, and reviewable guidance.",
|
|
761
|
+
"",
|
|
759
762
|
"First run:",
|
|
760
763
|
"1. Check your writing folder",
|
|
761
764
|
" drav doctor",
|
|
@@ -872,6 +875,7 @@ const HELP_TOPICS = {
|
|
|
872
875
|
"",
|
|
873
876
|
"What it does:",
|
|
874
877
|
"Shows corpus confidence, feature families, revision handles, and drafting guidance in plain language.",
|
|
878
|
+
"Check what Dravoice learned before trusting it.",
|
|
875
879
|
"",
|
|
876
880
|
"Options:",
|
|
877
881
|
" --voice <dir> Voice profile directory. Defaults to .dravoice.yml,",
|
|
@@ -886,6 +890,7 @@ const HELP_TOPICS = {
|
|
|
886
890
|
"",
|
|
887
891
|
"What it does:",
|
|
888
892
|
"Turns high-confidence profile observations into reusable drafting guidance for an LLM or writing agent.",
|
|
893
|
+
"Use this to give AI stable guidance without re-pasting source writing.",
|
|
889
894
|
"",
|
|
890
895
|
"Options:",
|
|
891
896
|
" --voice <dir> Voice profile directory. Defaults to .dravoice.yml,",
|
|
@@ -923,6 +928,7 @@ const HELP_TOPICS = {
|
|
|
923
928
|
"",
|
|
924
929
|
"What it does:",
|
|
925
930
|
"Ranks calibrated, human-editable revision actions. It does not rewrite the draft or claim AI detection.",
|
|
931
|
+
"Use deterministic diagnostics after drafting; this is the part a plain prompt cannot reliably provide.",
|
|
926
932
|
"",
|
|
927
933
|
"Options:",
|
|
928
934
|
" --voice <dir> Voice profile directory. Defaults to .dravoice.yml,",
|
|
@@ -938,6 +944,7 @@ const HELP_TOPICS = {
|
|
|
938
944
|
"",
|
|
939
945
|
"What it does:",
|
|
940
946
|
"Compares a draft with the profile and reports family-level drift. It is revision guidance, not AI detection.",
|
|
947
|
+
"Use deterministic diagnostics after drafting; this is the part a plain prompt cannot reliably provide.",
|
|
941
948
|
"",
|
|
942
949
|
"Options:",
|
|
943
950
|
" --voice <dir> Voice profile directory. Defaults to .dravoice.yml,",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { distribution, rate, topItems } from "../text-utils.js";
|
|
1
|
+
import { distribution, rate, splitSentences, topItems } from "../text-utils.js";
|
|
2
2
|
import { moveFor } from "./rhetorical-shape.js";
|
|
3
3
|
|
|
4
4
|
export function analyzeStructure(documents) {
|
|
@@ -33,9 +33,100 @@ export function analyzeStructure(documents) {
|
|
|
33
33
|
openingMoves,
|
|
34
34
|
listDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "list")).length, documents.length, 2),
|
|
35
35
|
quoteDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "quote")).length, documents.length, 2),
|
|
36
|
+
templateTics: templateTics(documents),
|
|
37
|
+
formattingPalette: formattingPalette(documents),
|
|
36
38
|
},
|
|
37
39
|
examples: openingMoves.slice(0, 5),
|
|
38
40
|
warnings: documents.length < 3 ? ["Structure confidence is limited because the corpus has fewer than 3 documents."] : [],
|
|
39
41
|
revisionHandles: ["Compare headings, list/quote use, section size, and opening structure."],
|
|
40
42
|
};
|
|
41
43
|
}
|
|
44
|
+
|
|
45
|
+
// Detects recurring document-template "tics" — the formatting habits that make a
|
|
46
|
+
// writer's pieces look copy-pasted regardless of topic. These are the visible
|
|
47
|
+
// signature: single-sentence paragraphs, Title-Case headings, a pull-quote in the
|
|
48
|
+
// lede, and a bullet list in nearly every section. Reported as corpus-wide rates so
|
|
49
|
+
// guidance can tell the model to vary them deliberately rather than reproduce the same
|
|
50
|
+
// skeleton every time.
|
|
51
|
+
function templateTics(documents) {
|
|
52
|
+
const proseParagraphs = documents.flatMap((document) =>
|
|
53
|
+
document.blocks.filter((block) => block.type === "paragraph"));
|
|
54
|
+
const singleSentenceParagraphs = proseParagraphs.filter((block) =>
|
|
55
|
+
splitSentences(block.lines.join(" ")).length <= 1).length;
|
|
56
|
+
|
|
57
|
+
const allHeadings = documents.flatMap((document) => document.headings);
|
|
58
|
+
const titleCaseHeadings = allHeadings.filter((heading) => isTitleCase(heading.text)).length;
|
|
59
|
+
|
|
60
|
+
// A blockquote in the lede = a quote block before the first heading.
|
|
61
|
+
const ledeBlockquoteDocuments = documents.filter((document) =>
|
|
62
|
+
document.blocks.some((block) => block.type === "quote" && block.headingId == null)).length;
|
|
63
|
+
|
|
64
|
+
// Sections (heading + its blocks) that contain at least one list.
|
|
65
|
+
const headedSections = documents.flatMap((document) =>
|
|
66
|
+
document.sections.filter((section) => section.heading));
|
|
67
|
+
const sectionsWithList = headedSections.filter((section) =>
|
|
68
|
+
section.blocks.some((block) => block.type === "list")).length;
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
singleSentenceParagraphRate: rate(singleSentenceParagraphs, proseParagraphs.length, 2),
|
|
72
|
+
titleCaseHeadingRate: rate(titleCaseHeadings, allHeadings.length, 2),
|
|
73
|
+
ledeBlockquoteRate: rate(ledeBlockquoteDocuments, documents.length, 2),
|
|
74
|
+
sectionListRate: rate(sectionsWithList, headedSections.length, 2),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// The set of Markdown formatting devices a corpus actually reaches for. Reported as
|
|
79
|
+
// per-document usage rates so guidance can name what the corpus leans on and which
|
|
80
|
+
// devices it underuses — Markdown offers far more range than blockquote + bullet list.
|
|
81
|
+
const FORMATTING_DEVICES = {
|
|
82
|
+
blockquote: /^>\s+/m,
|
|
83
|
+
bulletList: /^[ \t]*[-*+]\s+/m,
|
|
84
|
+
orderedList: /^[ \t]*\d+[.)]\s+/m,
|
|
85
|
+
nestedList: /^[ \t]+[-*+\d]/m,
|
|
86
|
+
subHeading: /^#{3,6}\s+/m,
|
|
87
|
+
table: /^\|.*\|\s*$/m,
|
|
88
|
+
codeBlock: /^(```|~~~)/m,
|
|
89
|
+
inlineCode: /(^|[^`])`[^`\n]+`/m,
|
|
90
|
+
boldInline: /\*\*[^*\n]+\*\*|__[^_\n]+__/m,
|
|
91
|
+
italicInline: /(^|[^*_])[*_][^*_\n]+[*_]/m,
|
|
92
|
+
link: /\[[^\]]+\]\([^)]+\)/m,
|
|
93
|
+
horizontalRule: /^(\s*([-*_])\s*){3,}$/m,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
function formattingPalette(documents) {
|
|
97
|
+
const palette = {};
|
|
98
|
+
for (const [device, pattern] of Object.entries(FORMATTING_DEVICES)) {
|
|
99
|
+
const used = documents.filter((document) => {
|
|
100
|
+
const raw = stripFrontmatter(document.raw || "");
|
|
101
|
+
// codeBlock is the fence itself; every other device is checked outside fences.
|
|
102
|
+
return pattern.test(device === "codeBlock" ? raw : stripCodeFences(raw));
|
|
103
|
+
}).length;
|
|
104
|
+
palette[device] = rate(used, documents.length, 2);
|
|
105
|
+
}
|
|
106
|
+
return palette;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Ignore device markers that appear only inside fenced code so a corpus of code-heavy
|
|
110
|
+
// posts is not credited with using tables/lists it merely quoted.
|
|
111
|
+
function stripCodeFences(raw) {
|
|
112
|
+
return raw.replace(/```[\s\S]*?```/g, "\n").replace(/~~~[\s\S]*?~~~/g, "\n");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Drop leading YAML front matter so its `---` fence and indented ` - tag` lines are not
|
|
116
|
+
// counted as section dividers or nested lists.
|
|
117
|
+
function stripFrontmatter(raw) {
|
|
118
|
+
return raw.replace(/^?\s*---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function isTitleCase(text) {
|
|
122
|
+
const words = String(text ?? "").split(/\s+/).filter((word) => /[a-z]/i.test(word));
|
|
123
|
+
if (words.length < 2) {
|
|
124
|
+
return false;
|
|
125
|
+
}
|
|
126
|
+
const significant = words.filter((word) => word.length > 3);
|
|
127
|
+
if (significant.length < 2) {
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
const capitalized = significant.filter((word) => /^[A-Z]/.test(word)).length;
|
|
131
|
+
return capitalized / significant.length >= 0.8;
|
|
132
|
+
}
|
package/src/v2/brief.js
CHANGED
|
@@ -27,7 +27,7 @@ export function voiceArticleBriefV2({ voice, topic, evidence, cwd = process.cwd(
|
|
|
27
27
|
workingThesis: `Draft a grounded article about ${topic}. Let the supplied evidence set the size of each claim before broadening the lesson.`,
|
|
28
28
|
evidence: evidenceResult,
|
|
29
29
|
missingEvidence: missingEvidenceFor({ topic, evidenceAnchors: evidenceResult.anchors }),
|
|
30
|
-
outline: outlineFor(profile),
|
|
30
|
+
outline: outlineFor(profile, topic),
|
|
31
31
|
voiceCautions: [
|
|
32
32
|
...profile.guidance.avoid,
|
|
33
33
|
"Mark unsupported claims as [specific evidence needed] instead of inventing proof.",
|
|
@@ -116,19 +116,53 @@ function missingEvidenceFor({ topic, evidenceAnchors }) {
|
|
|
116
116
|
return items;
|
|
117
117
|
}
|
|
118
118
|
|
|
119
|
-
function outlineFor(profile) {
|
|
120
|
-
const
|
|
121
|
-
const
|
|
119
|
+
function outlineFor(profile, topic = "") {
|
|
120
|
+
const rhythm = profile.families.rhythm.features.sentenceWords;
|
|
121
|
+
const seed = topicSeed(topic);
|
|
122
|
+
|
|
123
|
+
const openingPatterns = (profile.families.rhetoricalShape.features.openingMovePatterns || [])
|
|
124
|
+
.map((item) => item.value)
|
|
125
|
+
.filter(Boolean);
|
|
126
|
+
const opening = pickSeeded(openingPatterns, seed);
|
|
127
|
+
|
|
128
|
+
const sectionShapes = (profile.families.structure.features.sectionOrderPatterns || [])
|
|
129
|
+
.map((item) => item.value)
|
|
130
|
+
.filter(Boolean);
|
|
131
|
+
const sectionShape = pickSeeded(sectionShapes, seed + 1);
|
|
132
|
+
const headingCount = profile.families.structure.features.headingCount || {};
|
|
133
|
+
|
|
134
|
+
const rhythmRange = rhythm.count > 0
|
|
135
|
+
? `${rhythm.p25}-${rhythm.p75} words (median ${rhythm.median}, variation ~${rhythm.stdev})`
|
|
136
|
+
: "a varied range of sentence lengths";
|
|
137
|
+
|
|
122
138
|
return [
|
|
123
139
|
opening
|
|
124
|
-
? `Start from a concrete artifact or observation
|
|
125
|
-
: "Start from a concrete artifact or observation before making the larger claim.",
|
|
140
|
+
? `Start from a concrete artifact or observation; for this piece try the opening shape "${opening}", but do not reuse one opening across articles.`
|
|
141
|
+
: "Start from a concrete artifact or observation before making the larger claim, varying the opening across pieces.",
|
|
126
142
|
"Name the pressure, question, or practical stakes that make the evidence matter.",
|
|
127
|
-
|
|
143
|
+
sectionShape
|
|
144
|
+
? `Shape the body using a section pattern from the corpus such as "${sectionShape}" (heading count usually ${headingCount.min ?? 0}-${headingCount.max ?? 0}); pick what fits this topic rather than a fixed skeleton.`
|
|
145
|
+
: "Shape the body to fit this topic; vary sectioning across pieces instead of reusing one skeleton.",
|
|
146
|
+
`Develop the article in the learned register, mixing short and long sentences across ${rhythmRange} rather than holding a constant cadence.`,
|
|
128
147
|
"Close by returning to the evidence and leaving the reader with a practical handle, not a generic conclusion.",
|
|
129
148
|
];
|
|
130
149
|
}
|
|
131
150
|
|
|
151
|
+
function topicSeed(topic) {
|
|
152
|
+
let hash = 0;
|
|
153
|
+
for (const char of String(topic ?? "")) {
|
|
154
|
+
hash = (hash * 31 + char.charCodeAt(0)) >>> 0;
|
|
155
|
+
}
|
|
156
|
+
return hash;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function pickSeeded(values, seed) {
|
|
160
|
+
if (!values.length) {
|
|
161
|
+
return "";
|
|
162
|
+
}
|
|
163
|
+
return values[seed % values.length];
|
|
164
|
+
}
|
|
165
|
+
|
|
132
166
|
function resolvePath(cwd, value) {
|
|
133
167
|
return path.isAbsolute(value) ? value : path.join(cwd, value);
|
|
134
168
|
}
|
package/src/v2/document-model.js
CHANGED
|
@@ -191,6 +191,7 @@ export function parseDocument({ filePath, rootDir = process.cwd(), contents }) {
|
|
|
191
191
|
sentences,
|
|
192
192
|
wordCount: sentences.reduce((sum, sentence) => sum + sentence.tokens.length, 0),
|
|
193
193
|
text: paragraphs.map((paragraph) => paragraph.text).join("\n\n"),
|
|
194
|
+
raw: String(contents ?? ""),
|
|
194
195
|
};
|
|
195
196
|
}
|
|
196
197
|
|
package/src/v2/profile.js
CHANGED
|
@@ -302,35 +302,139 @@ function confidenceFor(documentCount, sentenceCount, wordCount) {
|
|
|
302
302
|
function guidanceFor({ source, families }) {
|
|
303
303
|
const primaryRegister = families.register.features.primary.value;
|
|
304
304
|
const evidenceRate = families.evidence.features.evidenceSentenceRate;
|
|
305
|
-
const
|
|
306
|
-
const
|
|
305
|
+
const sentenceWords = families.rhythm.features.sentenceWords;
|
|
306
|
+
const registerMix = registerMixLabel(families.register.features.scores);
|
|
307
|
+
const openingPatterns = (families.rhetoricalShape.features.openingMovePatterns || [])
|
|
308
|
+
.slice(0, 3)
|
|
309
|
+
.map((item) => item.value)
|
|
310
|
+
.filter(Boolean);
|
|
307
311
|
|
|
308
312
|
const draftingRules = [
|
|
309
|
-
|
|
310
|
-
|
|
313
|
+
sentenceWords.count > 0
|
|
314
|
+
? `Vary sentence length across the corpus band of ${sentenceWords.p25}-${sentenceWords.p75} words (median ${sentenceWords.median}). Mix short and long sentences; do not hold a constant length.`
|
|
315
|
+
: "Vary sentence length; mix short and long sentences rather than holding a constant cadence.",
|
|
316
|
+
sentenceWords.stdev > 0
|
|
317
|
+
? `Keep sentence-length variation (burstiness) near the corpus standard deviation of about ${sentenceWords.stdev} words; flattening every sentence to the median reads as machine-made.`
|
|
318
|
+
: "Preserve natural variation in sentence length rather than flattening it to one value.",
|
|
319
|
+
registerMix
|
|
320
|
+
? `Default to the learned register mix (${registerMix}) instead of one fixed genre; let each piece lean differently within it.`
|
|
321
|
+
: `Use ${primaryRegister} register as the default genre signal unless the piece intentionally changes genre.`,
|
|
311
322
|
evidenceRate > 0.35
|
|
312
323
|
? "Anchor broad claims with concrete scenes, numbers, quotes, citations, or specific examples."
|
|
313
324
|
: "Do not force evidence density higher than the source corpus supports.",
|
|
314
|
-
|
|
315
|
-
? `
|
|
316
|
-
: "Start from the article's real subject rather than generic positioning.",
|
|
325
|
+
openingPatterns.length
|
|
326
|
+
? `Rotate among the corpus's observed opening shapes (e.g. ${openingPatterns.join("; ")}) rather than reusing one opening every time.`
|
|
327
|
+
: "Start from the article's real subject rather than generic positioning, and vary openings across pieces.",
|
|
317
328
|
];
|
|
318
329
|
|
|
319
330
|
return {
|
|
320
331
|
summary: [
|
|
321
332
|
`Local Dravoice V2 profile from ${source.documentCount} document(s), ${source.wordCount} words, confidence ${source.confidence.band}.`,
|
|
322
|
-
`Primary register signal: ${primaryRegister}.`,
|
|
333
|
+
`Primary register signal: ${primaryRegister}${registerMix ? ` (mix: ${registerMix})` : ""}.`,
|
|
323
334
|
],
|
|
324
335
|
draftingRules,
|
|
336
|
+
formatting: formattingGuidance(families.structure.features),
|
|
325
337
|
avoid: [
|
|
326
338
|
"Do not treat topic vocabulary as proof of voice fit.",
|
|
327
339
|
"Do not claim a draft is or is not the writer's true voice.",
|
|
328
340
|
"Do not invent concrete evidence to satisfy a style finding.",
|
|
341
|
+
"Do not reuse the same outline, opening, or formatting on every piece; stay inside the learned ranges and vary within them.",
|
|
329
342
|
],
|
|
330
343
|
examples: [],
|
|
331
344
|
};
|
|
332
345
|
}
|
|
333
346
|
|
|
347
|
+
function registerMixLabel(scores) {
|
|
348
|
+
const active = (scores || []).filter((item) => item.score > 0).slice(0, 3);
|
|
349
|
+
if (active.length < 2) {
|
|
350
|
+
return "";
|
|
351
|
+
}
|
|
352
|
+
return active.map((item) => `${item.value} ${item.score}`).join(", ");
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function formattingGuidance(structure) {
|
|
356
|
+
const rules = [];
|
|
357
|
+
const headingCount = structure.headingCount || {};
|
|
358
|
+
if (headingCount.count > 0 && (headingCount.min !== headingCount.max)) {
|
|
359
|
+
rules.push(`Heading count varies across the corpus (${headingCount.min}-${headingCount.max}); pick a level of sectioning that fits the piece rather than a fixed template.`);
|
|
360
|
+
}
|
|
361
|
+
const patterns = (structure.sectionOrderPatterns || []).slice(0, 4).map((item) => item.value).filter(Boolean);
|
|
362
|
+
if (patterns.length) {
|
|
363
|
+
rules.push(`Observed section shapes to choose among (do not reuse one skeleton): ${patterns.join(" | ")}.`);
|
|
364
|
+
}
|
|
365
|
+
if (typeof structure.listDocumentRate === "number") {
|
|
366
|
+
rules.push(`Lists appear in about ${Math.round(structure.listDocumentRate * 100)}% of pieces and quotes in about ${Math.round((structure.quoteDocumentRate ?? 0) * 100)}%; use them where they fit, not on every draft.`);
|
|
367
|
+
}
|
|
368
|
+
return [
|
|
369
|
+
"Formatting and document structure are NOT the writer's voice and must not be reproduced as if they were. Voice lives in the Drafting Rules above (rhythm, diction, register, evidence). The items below are formatting habits to deliberately vary: do not justify keeping a repeated tic as \"authentic voice.\"",
|
|
370
|
+
...rules,
|
|
371
|
+
...antiTemplateGuidance(structure.templateTics),
|
|
372
|
+
...paletteGuidance(structure.formattingPalette),
|
|
373
|
+
];
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
const PALETTE_LABELS = {
|
|
377
|
+
blockquote: "blockquotes",
|
|
378
|
+
bulletList: "bullet lists",
|
|
379
|
+
orderedList: "numbered lists",
|
|
380
|
+
nestedList: "nested lists",
|
|
381
|
+
subHeading: "sub-headings (h3+)",
|
|
382
|
+
table: "tables",
|
|
383
|
+
codeBlock: "code blocks",
|
|
384
|
+
inlineCode: "inline code",
|
|
385
|
+
boldInline: "bold emphasis",
|
|
386
|
+
italicInline: "italic emphasis",
|
|
387
|
+
link: "links",
|
|
388
|
+
horizontalRule: "section dividers",
|
|
389
|
+
};
|
|
390
|
+
|
|
391
|
+
// Markdown offers far more range than the few devices a homogeneous corpus reaches for.
|
|
392
|
+
// Name what the corpus leans on and which devices it underuses, and push the model to
|
|
393
|
+
// broaden the palette where the content genuinely calls for it.
|
|
394
|
+
function paletteGuidance(palette) {
|
|
395
|
+
if (!palette) {
|
|
396
|
+
return [];
|
|
397
|
+
}
|
|
398
|
+
const leanedOn = Object.keys(palette).filter((device) => palette[device] >= 0.6);
|
|
399
|
+
const underused = Object.keys(palette).filter((device) => palette[device] <= 0.2);
|
|
400
|
+
const rules = [];
|
|
401
|
+
if (leanedOn.length) {
|
|
402
|
+
rules.push(`The corpus's formatting palette is narrow: it leans on ${labelList(leanedOn)}. Treat that as a habit to widen, not a target to hit.`);
|
|
403
|
+
}
|
|
404
|
+
if (underused.length) {
|
|
405
|
+
rules.push(`Markdown devices the corpus rarely or never uses: ${labelList(underused)}. Reach for these where the content fits (a comparison wants a table, steps want numbered items, a definition wants inline code, an aside wants italics) so pieces do not all share one formatting shape.`);
|
|
406
|
+
}
|
|
407
|
+
return rules;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function labelList(devices) {
|
|
411
|
+
return devices.map((device) => PALETTE_LABELS[device] ?? device).join(", ");
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// When the corpus over-relies on a formatting template, faithfully reproducing it is
|
|
415
|
+
// what makes generated pieces look copy-pasted. This guidance is prescriptive (injected
|
|
416
|
+
// to break the habit), not a learned pattern to match — surfaced only when a tic
|
|
417
|
+
// dominates the corpus, so varied corpora are left alone.
|
|
418
|
+
function antiTemplateGuidance(tics) {
|
|
419
|
+
if (!tics) {
|
|
420
|
+
return [];
|
|
421
|
+
}
|
|
422
|
+
const rules = [];
|
|
423
|
+
if (tics.singleSentenceParagraphRate >= 0.35) {
|
|
424
|
+
rules.push(`Watch a strong template tic: about ${Math.round(tics.singleSentenceParagraphRate * 100)}% of corpus paragraphs are a single sentence. This staccato one-line-paragraph cadence is the loudest "copy-paste" tell. Deliberately group related sentences into multi-sentence paragraphs; reserve one-line paragraphs for genuine emphasis.`);
|
|
425
|
+
}
|
|
426
|
+
if (tics.titleCaseHeadingRate >= 0.6) {
|
|
427
|
+
rules.push(`Watch a template tic: about ${Math.round(tics.titleCaseHeadingRate * 100)}% of headings are Title Case. Vary heading style (sentence case, questions, or no headings at all) instead of the same Title-Case section labels every time.`);
|
|
428
|
+
}
|
|
429
|
+
if (tics.ledeBlockquoteRate >= 0.5) {
|
|
430
|
+
rules.push(`Watch a template tic: about ${Math.round(tics.ledeBlockquoteRate * 100)}% of pieces drop a pull-quote (blockquote) into the intro before the first heading. Do not open with an aphoristic blockquote by default; let most pieces earn a quote later or skip it.`);
|
|
431
|
+
}
|
|
432
|
+
if (tics.sectionListRate >= 0.45) {
|
|
433
|
+
rules.push(`Watch a template tic: about ${Math.round(tics.sectionListRate * 100)}% of sections contain a bullet list. Do not put a list in section after section; carry most points in connected prose and reserve lists for genuinely enumerable material.`);
|
|
434
|
+
}
|
|
435
|
+
return rules;
|
|
436
|
+
}
|
|
437
|
+
|
|
334
438
|
function toleranceFor(band, deep, strong, weak) {
|
|
335
439
|
if (band === "deep") {
|
|
336
440
|
return deep;
|
package/src/v2/prompt.js
CHANGED
|
@@ -23,9 +23,9 @@ function renderPrompt(profile, format) {
|
|
|
23
23
|
system: "System writing guidance: Dravoice V2",
|
|
24
24
|
}[format];
|
|
25
25
|
const preface = {
|
|
26
|
-
agents: "Use this as local, inspectable
|
|
27
|
-
claude: "Use these project-local voice notes
|
|
28
|
-
system: "Follow these local voice constraints
|
|
26
|
+
agents: "Use this as local guidance that complements AI by preserving reusable, inspectable constraints from the writer's own corpus. It is not an AI detector or a license to imitate a third party.",
|
|
27
|
+
claude: "Use these project-local voice notes as guidance that complements AI by preserving reusable, inspectable constraints for drafting or reviewing prose. Treat them as guidance, not identity proof.",
|
|
28
|
+
system: "Follow these local voice constraints as reusable, inspectable guidance for AI-assisted prose. Do not expose private source text or claim authorship identity from them.",
|
|
29
29
|
}[format];
|
|
30
30
|
const lines = [
|
|
31
31
|
header,
|
|
@@ -49,6 +49,13 @@ function renderPrompt(profile, format) {
|
|
|
49
49
|
lines.push(`- ${rule}`);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
+
if (profile.guidance.formatting && profile.guidance.formatting.length) {
|
|
53
|
+
lines.push("", "## Formatting: Vary Deliberately (Not Voice)", "");
|
|
54
|
+
for (const rule of profile.guidance.formatting) {
|
|
55
|
+
lines.push(`- ${rule}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
52
59
|
lines.push("", "## Avoid", "");
|
|
53
60
|
for (const item of profile.guidance.avoid) {
|
|
54
61
|
lines.push(`- ${item}`);
|
package/src/v2/text-utils.js
CHANGED
|
@@ -75,17 +75,20 @@ export function characterNgrams(text, size = 3) {
|
|
|
75
75
|
|
|
76
76
|
export function distribution(values) {
|
|
77
77
|
if (!values.length) {
|
|
78
|
-
return { count: 0, min: 0, max: 0, mean: 0, median: 0, p25: 0, p75: 0 };
|
|
78
|
+
return { count: 0, min: 0, max: 0, mean: 0, median: 0, p25: 0, p75: 0, stdev: 0 };
|
|
79
79
|
}
|
|
80
80
|
const sorted = [...values].sort((a, b) => a - b);
|
|
81
|
+
const mean = sorted.reduce((sum, value) => sum + value, 0) / sorted.length;
|
|
82
|
+
const variance = sorted.reduce((sum, value) => sum + (value - mean) ** 2, 0) / sorted.length;
|
|
81
83
|
return {
|
|
82
84
|
count: sorted.length,
|
|
83
85
|
min: sorted[0],
|
|
84
86
|
max: sorted[sorted.length - 1],
|
|
85
|
-
mean: round(
|
|
87
|
+
mean: round(mean, 2),
|
|
86
88
|
median: percentile(sorted, 0.5),
|
|
87
89
|
p25: percentile(sorted, 0.25),
|
|
88
90
|
p75: percentile(sorted, 0.75),
|
|
91
|
+
stdev: round(Math.sqrt(variance), 2),
|
|
89
92
|
};
|
|
90
93
|
}
|
|
91
94
|
|