dravoice 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -9
- package/package.json +1 -1
- package/src/index.js +113 -13
- package/src/v2/analyzers/discourse.js +7 -1
- package/src/v2/analyzers/evidence.js +3 -3
- package/src/v2/analyzers/register.js +28 -4
- package/src/v2/analyzers/rhetorical-shape.js +7 -1
- package/src/v2/analyzers/structure.js +109 -1
- package/src/v2/benchmark.js +83 -0
- package/src/v2/brief.js +41 -7
- package/src/v2/doctor.js +308 -0
- package/src/v2/document-model.js +78 -6
- package/src/v2/inspect.js +2 -2
- package/src/v2/profile.js +238 -19
- package/src/v2/prompt.js +10 -3
- package/src/v2/review.js +142 -16
- package/src/v2/revise-plan.js +111 -8
- package/src/v2/stylometry.js +11 -7
- package/src/v2/text-utils.js +5 -2
package/README.md
CHANGED
|
@@ -15,6 +15,13 @@ Start with your own writing, not a blank prompt. Create an `articles`
|
|
|
15
15
|
directory in the folder where you want to run Dravoice, then copy in at least
|
|
16
16
|
3 representative long-form Markdown, MDX, or plain-text pieces.
|
|
17
17
|
|
|
18
|
+
## Why Not Just Paste Examples Into AI?
|
|
19
|
+
|
|
20
|
+
That can be fine for casual one-off drafts. Dravoice is for repeatable
|
|
21
|
+
long-form workflows where you want inspection, evidence grounding, and revision diagnostics
|
|
22
|
+
from local, inspectable guidance instead of asking a model to infer your style
|
|
23
|
+
from scratch every time.
|
|
24
|
+
|
|
18
25
|
```bash
|
|
19
26
|
mkdir -p articles
|
|
20
27
|
```
|
|
@@ -38,18 +45,22 @@ files before it learns from them.
|
|
|
38
45
|
If your writing is already somewhere else, point Dravoice at that directory:
|
|
39
46
|
|
|
40
47
|
```bash
|
|
48
|
+
npx dravoice doctor --examples ~/writing
|
|
41
49
|
npx dravoice init --examples ~/writing
|
|
42
50
|
```
|
|
43
51
|
|
|
44
|
-
### 1. Initialize and Inspect
|
|
52
|
+
### 1. Check, Initialize, and Inspect
|
|
45
53
|
|
|
46
54
|
```bash
|
|
55
|
+
npx dravoice doctor
|
|
47
56
|
npx dravoice init
|
|
48
57
|
npx dravoice inspect
|
|
49
58
|
```
|
|
50
59
|
|
|
51
|
-
|
|
52
|
-
|
|
60
|
+
Run `doctor` before learning so missing folders, unsupported files, weak
|
|
61
|
+
corpora, duplicate-looking files, and length-imbalanced source sets get clear
|
|
62
|
+
next steps. Inspect the profile before trusting it. If the feature-family
|
|
63
|
+
summaries do not look recognizable, improve the source corpus first.
|
|
53
64
|
|
|
54
65
|
### 2. Generate Guidance and a Brief
|
|
55
66
|
|
|
@@ -68,12 +79,13 @@ npx dravoice revise-plan draft.md
|
|
|
68
79
|
npx dravoice review draft.md
|
|
69
80
|
```
|
|
70
81
|
|
|
71
|
-
`init` reads your source pieces, writes a
|
|
72
|
-
and writes `.dravoice.yml` project
|
|
73
|
-
feature families visible, `prompt` turns
|
|
74
|
-
drafting guidance, `brief` creates an
|
|
75
|
-
`revise-plan` ranks calibrated stylometric
|
|
76
|
-
reports family-level drift
|
|
82
|
+
`doctor` checks corpus readiness, `init` reads your source pieces, writes a
|
|
83
|
+
local profile in `./dravoice-voice`, and writes `.dravoice.yml` project
|
|
84
|
+
defaults. `inspect` makes the learned feature families visible, `prompt` turns
|
|
85
|
+
high-confidence observations into drafting guidance, `brief` creates an
|
|
86
|
+
evidence-first article plan, `revise-plan` ranks calibrated stylometric
|
|
87
|
+
revision actions, and `review` reports family-level drift with calibration
|
|
88
|
+
confidence.
|
|
77
89
|
|
|
78
90
|
Run `drav help init` or `drav help review` for command-specific help.
|
|
79
91
|
|
|
@@ -90,9 +102,27 @@ workflow:
|
|
|
90
102
|
|
|
91
103
|
```bash
|
|
92
104
|
npx dravoice benchmark prepare --examples ./articles --topic "A new article topic" --out ./bench-run --seed 42
|
|
105
|
+
npx dravoice benchmark prepare-many --examples ./articles --topic "A new article topic" --out ./bench-runs --runs 3 --seed 42
|
|
93
106
|
npx dravoice benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json
|
|
94
107
|
```
|
|
95
108
|
|
|
109
|
+
Benchmark reports include deterministic margins and repeat-run cautions. A
|
|
110
|
+
single run is directional evidence only. Score output includes suggested
|
|
111
|
+
follow-up seeds for repeated validation runs.
|
|
112
|
+
|
|
113
|
+
## Trust Boundaries
|
|
114
|
+
|
|
115
|
+
Dravoice profiles include per-family calibration diagnostics: threshold
|
|
116
|
+
observations, stability, minimum-evidence checks, and whether a family is usable
|
|
117
|
+
for findings. Weak corpora produce cautious guidance. Strict review can surface
|
|
118
|
+
document-level discourse, lexical, register, and structure drift, but the
|
|
119
|
+
findings remain revision guidance, not authorship proof.
|
|
120
|
+
|
|
121
|
+
V2 profiles expose marker-set register metadata, mixed-register warnings,
|
|
122
|
+
heading-depth and section-order structure signals, paragraph-localized
|
|
123
|
+
revision actions, expanded MDX scaffold filtering before analysis, and
|
|
124
|
+
library-level custom register marker sets for project-specific scoring.
|
|
125
|
+
|
|
96
126
|
## Fresh Install Smoke Test
|
|
97
127
|
|
|
98
128
|
From a packed tarball:
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -3,11 +3,13 @@ import path from "node:path";
|
|
|
3
3
|
import readline from "node:readline";
|
|
4
4
|
import {
|
|
5
5
|
prepareVoiceBenchmark,
|
|
6
|
+
prepareVoiceBenchmarkRuns,
|
|
6
7
|
renderBenchmarkReport,
|
|
7
8
|
scoreVoiceBenchmark,
|
|
8
9
|
} from "./v2/benchmark.js";
|
|
9
10
|
import { renderVoiceBriefV2, voiceArticleBriefV2 } from "./v2/brief.js";
|
|
10
11
|
import { renderInspectV2 } from "./v2/inspect.js";
|
|
12
|
+
import { diagnoseVoiceCorpusV2, renderCorpusDoctorV2 } from "./v2/doctor.js";
|
|
11
13
|
import { learnVoicePackV2, loadVoicePackV2 } from "./v2/profile.js";
|
|
12
14
|
import { voicePromptPackV2 } from "./v2/prompt.js";
|
|
13
15
|
import { renderVoiceReviewV2, reviewVoiceDraftV2 } from "./v2/review.js";
|
|
@@ -27,13 +29,15 @@ import {
|
|
|
27
29
|
export {
|
|
28
30
|
learnVoicePackV2 as learnVoicePack,
|
|
29
31
|
loadVoicePackV2 as loadVoicePack,
|
|
32
|
+
diagnoseVoiceCorpusV2 as diagnoseVoiceCorpus,
|
|
30
33
|
revisePlanDraftV2 as revisePlanDraft,
|
|
31
34
|
reviewVoiceDraftV2 as reviewVoiceDraft,
|
|
32
35
|
voicePromptPackV2 as voicePromptPack,
|
|
33
36
|
};
|
|
34
37
|
export { renderInspectV2, renderRevisePlanV2 as renderRevisePlan, renderVoiceReviewV2 as renderVoiceReview };
|
|
35
38
|
export { renderVoiceBriefV2 as renderVoiceBrief, voiceArticleBriefV2 as voiceArticleBrief };
|
|
36
|
-
export {
|
|
39
|
+
export { renderCorpusDoctorV2 as renderCorpusDoctor };
|
|
40
|
+
export { prepareVoiceBenchmark, prepareVoiceBenchmarkRuns, renderBenchmarkReport, scoreVoiceBenchmark };
|
|
37
41
|
|
|
38
42
|
const INIT_DISCOVERY_DIRS = [
|
|
39
43
|
"./articles",
|
|
@@ -128,6 +132,24 @@ export async function runCli(args, io) {
|
|
|
128
132
|
return 0;
|
|
129
133
|
}
|
|
130
134
|
|
|
135
|
+
if (command === "doctor") {
|
|
136
|
+
const { options, positional } = parseArgs(rest, ["examples", "format"], "doctor");
|
|
137
|
+
rejectPositionals(positional, "doctor");
|
|
138
|
+
const config = loadProjectConfig(io.cwd);
|
|
139
|
+
const examples = options.examples ?? config.examples ?? DEFAULT_EXAMPLES_DIR;
|
|
140
|
+
const format = formatOption(options.format, ["text", "json"], "doctor");
|
|
141
|
+
const result = diagnoseVoiceCorpusV2({
|
|
142
|
+
examplesDir: examples,
|
|
143
|
+
cwd: io.cwd,
|
|
144
|
+
});
|
|
145
|
+
if (format === "json") {
|
|
146
|
+
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
|
147
|
+
} else {
|
|
148
|
+
io.stdout.write(renderCorpusDoctorV2(result));
|
|
149
|
+
}
|
|
150
|
+
return result.exitCode;
|
|
151
|
+
}
|
|
152
|
+
|
|
131
153
|
if (command === "review") {
|
|
132
154
|
const { options, positional } = parseArgs(rest, ["voice", "mode", "format"], "review");
|
|
133
155
|
rejectUnexpectedPositionals(positional, 1, "review positional");
|
|
@@ -139,7 +161,7 @@ export async function runCli(args, io) {
|
|
|
139
161
|
}
|
|
140
162
|
const result = reviewVoiceDraftV2({
|
|
141
163
|
file: resolvePath(io.cwd, file),
|
|
142
|
-
voice:
|
|
164
|
+
voice: loadVoiceForCommand(io.cwd, options.voice, "review"),
|
|
143
165
|
cwd: io.cwd,
|
|
144
166
|
mode,
|
|
145
167
|
});
|
|
@@ -162,7 +184,7 @@ export async function runCli(args, io) {
|
|
|
162
184
|
}
|
|
163
185
|
const result = revisePlanDraftV2({
|
|
164
186
|
file: resolvePath(io.cwd, file),
|
|
165
|
-
voice:
|
|
187
|
+
voice: loadVoiceForCommand(io.cwd, options.voice, "revise-plan"),
|
|
166
188
|
cwd: io.cwd,
|
|
167
189
|
});
|
|
168
190
|
if (format === "json") {
|
|
@@ -180,7 +202,7 @@ export async function runCli(args, io) {
|
|
|
180
202
|
const promptOptions = resolvePromptOptions(io.cwd, options);
|
|
181
203
|
const format = formatOption(promptOptions.format, ["agents", "claude", "system"], "prompt");
|
|
182
204
|
const rendered = voicePromptPackV2({
|
|
183
|
-
voice:
|
|
205
|
+
voice: loadVoiceForCommand(io.cwd, options.voice, "prompt"),
|
|
184
206
|
format,
|
|
185
207
|
outPath: promptOptions.out ? resolvePath(io.cwd, promptOptions.out) : undefined,
|
|
186
208
|
});
|
|
@@ -197,7 +219,7 @@ export async function runCli(args, io) {
|
|
|
197
219
|
const { options, positional } = parseArgs(rest, ["voice", "topic", "evidence", "format", "out"], "brief");
|
|
198
220
|
const format = formatOption(options.format, ["text", "json"], "brief");
|
|
199
221
|
const result = voiceArticleBriefV2({
|
|
200
|
-
voice:
|
|
222
|
+
voice: loadVoiceForCommand(io.cwd, options.voice, "brief"),
|
|
201
223
|
topic: topicOption(options, positional, "brief"),
|
|
202
224
|
evidence: options.evidence ? resolvePath(io.cwd, options.evidence) : undefined,
|
|
203
225
|
cwd: io.cwd,
|
|
@@ -220,7 +242,7 @@ export async function runCli(args, io) {
|
|
|
220
242
|
if (command === "inspect") {
|
|
221
243
|
const { options, positional } = parseArgs(rest, ["voice"], "inspect");
|
|
222
244
|
rejectPositionals(positional, "inspect");
|
|
223
|
-
const profile =
|
|
245
|
+
const profile = loadVoiceForCommand(io.cwd, options.voice, "inspect");
|
|
224
246
|
io.stdout.write(renderInspectV2(profile));
|
|
225
247
|
io.stdout.write("Next: drav prompt --out AGENTS.md\n");
|
|
226
248
|
return 0;
|
|
@@ -263,6 +285,20 @@ function runBenchmarkCli(args, io) {
|
|
|
263
285
|
return 0;
|
|
264
286
|
}
|
|
265
287
|
|
|
288
|
+
if (benchmarkCommand === "prepare-many") {
|
|
289
|
+
const { options, positional } = parseArgs(rest, ["examples", "topic", "out", "seed", "runs"], "benchmark prepare-many");
|
|
290
|
+
rejectPositionals(positional, "benchmark prepare-many");
|
|
291
|
+
const result = prepareVoiceBenchmarkRuns({
|
|
292
|
+
examplesDir: resolvePath(io.cwd, requiredOption(options, "examples", "benchmark prepare-many")),
|
|
293
|
+
topic: requiredOption(options, "topic", "benchmark prepare-many"),
|
|
294
|
+
outDir: resolvePath(io.cwd, requiredOption(options, "out", "benchmark prepare-many")),
|
|
295
|
+
seed: options.seed ?? "1",
|
|
296
|
+
runs: options.runs ?? "3",
|
|
297
|
+
});
|
|
298
|
+
io.stdout.write(`Prepared ${result.runs.length} benchmark run(s) at ${resolvePath(io.cwd, requiredOption(options, "out", "benchmark prepare-many"))}.\n`);
|
|
299
|
+
return 0;
|
|
300
|
+
}
|
|
301
|
+
|
|
266
302
|
if (benchmarkCommand === "score") {
|
|
267
303
|
const { options, positional } = parseArgs(rest, ["run", "judge", "format"], "benchmark score");
|
|
268
304
|
rejectPositionals(positional, "benchmark score");
|
|
@@ -665,6 +701,33 @@ function resolveVoicePath(cwd, optionValue) {
|
|
|
665
701
|
return cwd;
|
|
666
702
|
}
|
|
667
703
|
|
|
704
|
+
function loadVoiceForCommand(cwd, optionValue, command) {
|
|
705
|
+
const voicePath = resolveVoicePath(cwd, optionValue);
|
|
706
|
+
try {
|
|
707
|
+
return loadVoicePackV2(voicePath);
|
|
708
|
+
} catch (error) {
|
|
709
|
+
if (/^No Dravoice V2 profile found/.test(error.message)) {
|
|
710
|
+
throw usageError(missingVoiceProfileMessage(command, voicePath), command);
|
|
711
|
+
}
|
|
712
|
+
throw error;
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
function missingVoiceProfileMessage(command, voicePath) {
|
|
717
|
+
return [
|
|
718
|
+
"No Dravoice voice profile found.",
|
|
719
|
+
`Looked for: ${displayPath(voicePath)}/profile.json`,
|
|
720
|
+
"",
|
|
721
|
+
"Run `drav doctor` first to check whether your writing folder is ready.",
|
|
722
|
+
"Then run `drav init` to create ./dravoice-voice/profile.json and .dravoice.yml.",
|
|
723
|
+
command === "inspect"
|
|
724
|
+
? "After that, run `drav inspect` again."
|
|
725
|
+
: "After that, rerun this command.",
|
|
726
|
+
"",
|
|
727
|
+
"Next: drav init",
|
|
728
|
+
].join("\n");
|
|
729
|
+
}
|
|
730
|
+
|
|
668
731
|
function resolvePromptOptions(cwd, options) {
|
|
669
732
|
const config = loadProjectConfig(cwd);
|
|
670
733
|
return {
|
|
@@ -693,21 +756,27 @@ function helpText() {
|
|
|
693
756
|
return [
|
|
694
757
|
"Dravoice - local-first voice guidance for writers",
|
|
695
758
|
"",
|
|
759
|
+
"Why not just paste examples into AI?",
|
|
760
|
+
"Dravoice complements AI by turning your own corpus into inspectable, repeatable, and reviewable guidance.",
|
|
761
|
+
"",
|
|
696
762
|
"First run:",
|
|
697
|
-
"1.
|
|
763
|
+
"1. Check your writing folder",
|
|
764
|
+
" drav doctor",
|
|
765
|
+
"2. Initialize your voice profile",
|
|
698
766
|
" drav init",
|
|
699
|
-
"
|
|
767
|
+
"3. Inspect the profile before trusting it",
|
|
700
768
|
" drav inspect",
|
|
701
|
-
"
|
|
769
|
+
"4. Generate reusable drafting guidance",
|
|
702
770
|
" drav prompt --out AGENTS.md",
|
|
703
|
-
"
|
|
771
|
+
"5. Plan a grounded draft from evidence",
|
|
704
772
|
" drav brief \"New topic\" --evidence notes.md --out brief.md",
|
|
705
|
-
"
|
|
773
|
+
"6. Revise, then review",
|
|
706
774
|
" drav revise-plan draft.md",
|
|
707
775
|
" drav review draft.md",
|
|
708
776
|
"",
|
|
709
777
|
"Commands:",
|
|
710
778
|
" init Learn a profile and save project defaults in one first-run command.",
|
|
779
|
+
" doctor Check whether a writing corpus is ready to learn from.",
|
|
711
780
|
" learn Build a local voice profile from Markdown, MDX, or text examples.",
|
|
712
781
|
" inspect Show the learned profile in plain language.",
|
|
713
782
|
" prompt Render reusable LLM drafting guidance.",
|
|
@@ -719,7 +788,7 @@ function helpText() {
|
|
|
719
788
|
" drav review draft.md --mode strict --format json",
|
|
720
789
|
" drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
|
|
721
790
|
"",
|
|
722
|
-
"Run `drav help
|
|
791
|
+
"Run `drav help doctor` for command-specific help.",
|
|
723
792
|
"",
|
|
724
793
|
].join("\n");
|
|
725
794
|
}
|
|
@@ -736,6 +805,7 @@ function helpForTopic(topic) {
|
|
|
736
805
|
const EXAMPLES = {
|
|
737
806
|
help: "drav help init",
|
|
738
807
|
init: "drav init",
|
|
808
|
+
doctor: "drav doctor --examples ./articles",
|
|
739
809
|
learn: "drav learn --examples ./articles --out ./dravoice-voice",
|
|
740
810
|
inspect: "drav inspect",
|
|
741
811
|
prompt: "drav prompt --out AGENTS.md",
|
|
@@ -744,6 +814,7 @@ const EXAMPLES = {
|
|
|
744
814
|
review: "drav review draft.md --format json",
|
|
745
815
|
benchmark: "drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
|
|
746
816
|
"benchmark prepare": "drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
|
|
817
|
+
"benchmark prepare-many": "drav benchmark prepare-many --examples ./articles --topic \"New topic\" --out ./bench-runs --runs 3 --seed 42",
|
|
747
818
|
"benchmark score": "drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
|
|
748
819
|
};
|
|
749
820
|
|
|
@@ -785,11 +856,26 @@ const HELP_TOPICS = {
|
|
|
785
856
|
"Next: drav inspect",
|
|
786
857
|
"",
|
|
787
858
|
].join("\n"),
|
|
859
|
+
doctor: [
|
|
860
|
+
"Usage: drav doctor [--examples ./articles] [--format text]",
|
|
861
|
+
"",
|
|
862
|
+
"What it does:",
|
|
863
|
+
"Checks whether a Markdown, MDX, or plain-text writing corpus is ready to learn from before you trust a profile.",
|
|
864
|
+
"",
|
|
865
|
+
"Options:",
|
|
866
|
+
" --examples <dir> Directory with representative long-form pieces. Defaults to .dravoice.yml, then ./articles.",
|
|
867
|
+
" --format <format> text or json. Defaults to text.",
|
|
868
|
+
"",
|
|
869
|
+
`Example: ${EXAMPLES.doctor}`,
|
|
870
|
+
"Next: drav init",
|
|
871
|
+
"",
|
|
872
|
+
].join("\n"),
|
|
788
873
|
inspect: [
|
|
789
874
|
"Usage: drav inspect [--voice ./dravoice-voice]",
|
|
790
875
|
"",
|
|
791
876
|
"What it does:",
|
|
792
877
|
"Shows corpus confidence, feature families, revision handles, and drafting guidance in plain language.",
|
|
878
|
+
"Check what Dravoice learned before trusting it.",
|
|
793
879
|
"",
|
|
794
880
|
"Options:",
|
|
795
881
|
" --voice <dir> Voice profile directory. Defaults to .dravoice.yml,",
|
|
@@ -804,6 +890,7 @@ const HELP_TOPICS = {
|
|
|
804
890
|
"",
|
|
805
891
|
"What it does:",
|
|
806
892
|
"Turns high-confidence profile observations into reusable drafting guidance for an LLM or writing agent.",
|
|
893
|
+
"Use this to give AI stable guidance without re-pasting source writing.",
|
|
807
894
|
"",
|
|
808
895
|
"Options:",
|
|
809
896
|
" --voice <dir> Voice profile directory. Defaults to .dravoice.yml,",
|
|
@@ -841,6 +928,7 @@ const HELP_TOPICS = {
|
|
|
841
928
|
"",
|
|
842
929
|
"What it does:",
|
|
843
930
|
"Ranks calibrated, human-editable revision actions. It does not rewrite the draft or claim AI detection.",
|
|
931
|
+
"Use deterministic diagnostics after drafting; this is the part a plain prompt cannot reliably provide.",
|
|
844
932
|
"",
|
|
845
933
|
"Options:",
|
|
846
934
|
" --voice <dir> Voice profile directory. Defaults to .dravoice.yml,",
|
|
@@ -856,6 +944,7 @@ const HELP_TOPICS = {
|
|
|
856
944
|
"",
|
|
857
945
|
"What it does:",
|
|
858
946
|
"Compares a draft with the profile and reports family-level drift. It is revision guidance, not AI detection.",
|
|
947
|
+
"Use deterministic diagnostics after drafting; this is the part a plain prompt cannot reliably provide.",
|
|
859
948
|
"",
|
|
860
949
|
"Options:",
|
|
861
950
|
" --voice <dir> Voice profile directory. Defaults to .dravoice.yml,",
|
|
@@ -871,13 +960,14 @@ const HELP_TOPICS = {
|
|
|
871
960
|
"",
|
|
872
961
|
].join("\n"),
|
|
873
962
|
benchmark: [
|
|
874
|
-
"Usage: drav benchmark <prepare|score> ...",
|
|
963
|
+
"Usage: drav benchmark <prepare|prepare-many|score> ...",
|
|
875
964
|
"",
|
|
876
965
|
"What it does:",
|
|
877
966
|
"Runs validation workflows for Dravoice development. Most writers do not need this for first use.",
|
|
878
967
|
"",
|
|
879
968
|
"Examples:",
|
|
880
969
|
` ${EXAMPLES["benchmark prepare"]}`,
|
|
970
|
+
` ${EXAMPLES["benchmark prepare-many"]}`,
|
|
881
971
|
` ${EXAMPLES["benchmark score"]}`,
|
|
882
972
|
"",
|
|
883
973
|
].join("\n"),
|
|
@@ -891,6 +981,16 @@ const HELP_TOPICS = {
|
|
|
891
981
|
"Next: fill benchmark drafts, then run drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
|
|
892
982
|
"",
|
|
893
983
|
].join("\n"),
|
|
984
|
+
"benchmark prepare-many": [
|
|
985
|
+
"Usage: drav benchmark prepare-many --examples ./articles --topic \"New topic\" --out ./bench-runs --runs 3 --seed 42",
|
|
986
|
+
"",
|
|
987
|
+
"What it does:",
|
|
988
|
+
"Creates multiple seeded benchmark run directories for repeated validation.",
|
|
989
|
+
"",
|
|
990
|
+
`Example: ${EXAMPLES["benchmark prepare-many"]}`,
|
|
991
|
+
"Next: fill each run's drafts, then score each run.",
|
|
992
|
+
"",
|
|
993
|
+
].join("\n"),
|
|
894
994
|
"benchmark score": [
|
|
895
995
|
"Usage: drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
|
|
896
996
|
"",
|
|
@@ -35,14 +35,20 @@ export function analyzeDiscourse(documents) {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
export function transitionLabel(text) {
|
|
38
|
+
const normalized = String(text ?? "");
|
|
38
39
|
for (const [label, pattern] of Object.entries(TRANSITIONS)) {
|
|
39
|
-
if (pattern.test(
|
|
40
|
+
if (pattern.test(normalized) || embeddedTransitionPattern(pattern).test(normalized)) {
|
|
40
41
|
return label;
|
|
41
42
|
}
|
|
42
43
|
}
|
|
43
44
|
return "plain";
|
|
44
45
|
}
|
|
45
46
|
|
|
47
|
+
function embeddedTransitionPattern(pattern) {
|
|
48
|
+
const source = pattern.source.replace(/^\^\(\?:/, "(?:");
|
|
49
|
+
return new RegExp(`[.;:,]\\s+${source}`, pattern.flags);
|
|
50
|
+
}
|
|
51
|
+
|
|
46
52
|
function callbackRate(sentences) {
|
|
47
53
|
let callbacks = 0;
|
|
48
54
|
for (let index = 1; index < sentences.length; index += 1) {
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { rate, topItems } from "../text-utils.js";
|
|
2
2
|
|
|
3
3
|
const EVIDENCE_PATTERNS = {
|
|
4
|
-
date: /\b\d{1,2}:\d{2}\s?(?:am|pm)?\b|\b20\d{2}-\d{2}-\d{2}\b|\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/i,
|
|
4
|
+
date: /\b\d{1,2}:\d{2}\s?(?:am|pm)?\b|\b20\d{2}-\d{2}-\d{2}\b|\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b|\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+\d{1,2},?\s+\d{4}\b/i,
|
|
5
5
|
number: /\b\d+(?:\.\d+)?\b/,
|
|
6
6
|
quote: /"[^"]+"|'[^']+'|^>/,
|
|
7
7
|
url: /https?:\/\/\S+/i,
|
|
8
|
-
citation: /\[[^\]]+\]\([^)]+\)|\([A-Z][A-Za-z]+,\s*\d{4}\)/,
|
|
9
|
-
sourceAttribution: /\b(according to|reported|observed|noted|recorded|quoted|interviewed|surveyed|field notes said|data shows|study found|the memo|the log|the report)\b/i,
|
|
8
|
+
citation: /\[[^\]]+\]\([^)]+\)|\[\^[^\]]+\]|\([A-Z][A-Za-z]+,\s*\d{4}\)/,
|
|
9
|
+
sourceAttribution: /\b(according to|reported|observed|noted|recorded|quoted|interviewed|surveyed|field notes said|data shows|study found|the memo|the log|the report|source:|internal memo)\b/i,
|
|
10
10
|
sensory: /\b(cold|warm|hot|cool|quiet|loud|bright|dark|red|blue|green|rough|smooth|sharp|soft|smelled|smell|scent|tasted|heard|sound|noise|flashed|visible|physical|rain|metal|smoke)\b/i,
|
|
11
11
|
specificExample: /\b(for example|for instance|such as|including|included|includes|sample|case in point|specifically|in one case)\b/i,
|
|
12
12
|
};
|
|
@@ -11,13 +11,23 @@ const REGISTER_MARKERS = {
|
|
|
11
11
|
formal: ["requires", "outcomes", "process", "alignment", "therefore"],
|
|
12
12
|
};
|
|
13
13
|
|
|
14
|
-
export function analyzeRegister(documents) {
|
|
14
|
+
export function analyzeRegister(documents, { markers = REGISTER_MARKERS } = {}) {
|
|
15
15
|
const text = documents.map((document) => document.text.toLowerCase()).join("\n\n");
|
|
16
16
|
const words = new Set(contentWords(text));
|
|
17
|
-
const
|
|
17
|
+
const markerSets = Object.entries(markers).map(([value, markerList]) => {
|
|
18
|
+
const matchedMarkers = markerList.filter((marker) => markerAppears(text, words, marker));
|
|
19
|
+
return {
|
|
20
|
+
value,
|
|
21
|
+
markers: markerList,
|
|
22
|
+
matchedMarkers,
|
|
23
|
+
score: rate(matchedMarkers.length, markerList.length, 2),
|
|
24
|
+
};
|
|
25
|
+
});
|
|
26
|
+
const scores = markerSets.map(({ value, score }) => ({
|
|
18
27
|
value,
|
|
19
|
-
score
|
|
28
|
+
score,
|
|
20
29
|
})).sort((left, right) => right.score - left.score || left.value.localeCompare(right.value));
|
|
30
|
+
const mixedRegister = isMixedRegister(scores);
|
|
21
31
|
|
|
22
32
|
return {
|
|
23
33
|
family: "register",
|
|
@@ -25,14 +35,28 @@ export function analyzeRegister(documents) {
|
|
|
25
35
|
features: {
|
|
26
36
|
primary: scores[0] ?? { value: "unknown", score: 0 },
|
|
27
37
|
scores,
|
|
38
|
+
markerSets,
|
|
39
|
+
mixedRegister,
|
|
28
40
|
topContentWords: topItems(contentWords(text), 12),
|
|
29
41
|
},
|
|
30
42
|
examples: scores.slice(0, 3).map((item) => `${item.value}: ${item.score}`),
|
|
31
|
-
warnings:
|
|
43
|
+
warnings: [
|
|
44
|
+
...(documents.length < 3 ? ["Register confidence is limited because the corpus has fewer than 3 documents."] : []),
|
|
45
|
+
...(mixedRegister ? ["Mixed register signals detected; treat the primary register as a weak summary of the genre mix."] : []),
|
|
46
|
+
],
|
|
32
47
|
revisionHandles: ["Check whether the draft uses the same broad register and genre mix as the corpus."],
|
|
33
48
|
};
|
|
34
49
|
}
|
|
35
50
|
|
|
51
|
+
function isMixedRegister(scores) {
|
|
52
|
+
const active = scores.filter((item) => item.score > 0);
|
|
53
|
+
if (active.length < 2) {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
const [first, second] = active;
|
|
57
|
+
return second.score >= Math.max(0.2, first.score * 0.6);
|
|
58
|
+
}
|
|
59
|
+
|
|
36
60
|
function markerAppears(text, words, marker) {
|
|
37
61
|
const normalized = marker.toLowerCase();
|
|
38
62
|
if (/^[a-z0-9'-]+$/.test(normalized)) {
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
import { evidenceTypes } from "./evidence.js";
|
|
2
2
|
import { transitionLabel } from "./discourse.js";
|
|
3
|
-
import { topItems } from "../text-utils.js";
|
|
3
|
+
import { splitSentences, topItems } from "../text-utils.js";
|
|
4
4
|
|
|
5
5
|
export function analyzeRhetoricalShape(documents) {
|
|
6
6
|
const documentMoves = documents.map((document) => document.sentences.map((sentence) => moveFor(sentence.text)));
|
|
7
7
|
const sentenceMoves = documentMoves.flat();
|
|
8
8
|
const openingMoves = documents.flatMap((document) => document.sentences.slice(0, 3).map((sentence) => moveFor(sentence.text)));
|
|
9
|
+
const paragraphMovePatterns = documents.flatMap((document) =>
|
|
10
|
+
document.paragraphs
|
|
11
|
+
.map((paragraph) => splitSentences(paragraph.text).map((sentence) => moveFor(sentence)).join(" -> "))
|
|
12
|
+
.filter(Boolean)
|
|
13
|
+
);
|
|
9
14
|
const bigrams = [];
|
|
10
15
|
const trigrams = [];
|
|
11
16
|
const openingMovePatterns = [];
|
|
@@ -28,6 +33,7 @@ export function analyzeRhetoricalShape(documents) {
|
|
|
28
33
|
moveRates: topItems(sentenceMoves, 12),
|
|
29
34
|
openingMoves: openingMoves.slice(0, 9),
|
|
30
35
|
openingMovePatterns: topItems(openingMovePatterns, 8),
|
|
36
|
+
paragraphMovePatterns: topItems(paragraphMovePatterns, 12),
|
|
31
37
|
moveBigrams: topItems(bigrams, 12),
|
|
32
38
|
moveTrigrams: topItems(trigrams, 12),
|
|
33
39
|
commonSequences: topItems(bigrams, 12),
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { distribution, rate } from "../text-utils.js";
|
|
1
|
+
import { distribution, rate, splitSentences, topItems } from "../text-utils.js";
|
|
2
2
|
import { moveFor } from "./rhetorical-shape.js";
|
|
3
3
|
|
|
4
4
|
export function analyzeStructure(documents) {
|
|
@@ -6,6 +6,19 @@ export function analyzeStructure(documents) {
|
|
|
6
6
|
document.sections.map((section) => section.blocks.reduce((sum, block) => sum + block.lines.join(" ").split(/\s+/).filter(Boolean).length, 0))
|
|
7
7
|
);
|
|
8
8
|
const openingMoves = documents.flatMap((document) => document.sentences.slice(0, 2).map((sentence) => moveFor(sentence.text)));
|
|
9
|
+
const sectionOrderPatterns = documents
|
|
10
|
+
.map((document) => document.headings.map((heading) => `h${heading.depth}`).join(" -> "))
|
|
11
|
+
.filter(Boolean);
|
|
12
|
+
const listPlacementPatterns = documents.flatMap((document) =>
|
|
13
|
+
document.blocks
|
|
14
|
+
.filter((block) => block.type === "list")
|
|
15
|
+
.map((block) => `h${block.headingDepth || 0}:list`)
|
|
16
|
+
);
|
|
17
|
+
const quotePlacementPatterns = documents.flatMap((document) =>
|
|
18
|
+
document.blocks
|
|
19
|
+
.filter((block) => block.type === "quote")
|
|
20
|
+
.map((block) => `h${block.headingDepth || 0}:quote`)
|
|
21
|
+
);
|
|
9
22
|
|
|
10
23
|
return {
|
|
11
24
|
family: "structure",
|
|
@@ -13,12 +26,107 @@ export function analyzeStructure(documents) {
|
|
|
13
26
|
features: {
|
|
14
27
|
sectionWords: distribution(sectionLengths),
|
|
15
28
|
headingCount: distribution(documents.map((document) => document.headings.length)),
|
|
29
|
+
maxHeadingDepth: distribution(documents.map((document) => Math.max(0, ...document.headings.map((heading) => heading.depth)))),
|
|
30
|
+
sectionOrderPatterns: topItems(sectionOrderPatterns, 12),
|
|
31
|
+
listPlacementPatterns: topItems(listPlacementPatterns, 12),
|
|
32
|
+
quotePlacementPatterns: topItems(quotePlacementPatterns, 12),
|
|
16
33
|
openingMoves,
|
|
17
34
|
listDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "list")).length, documents.length, 2),
|
|
18
35
|
quoteDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "quote")).length, documents.length, 2),
|
|
36
|
+
templateTics: templateTics(documents),
|
|
37
|
+
formattingPalette: formattingPalette(documents),
|
|
19
38
|
},
|
|
20
39
|
examples: openingMoves.slice(0, 5),
|
|
21
40
|
warnings: documents.length < 3 ? ["Structure confidence is limited because the corpus has fewer than 3 documents."] : [],
|
|
22
41
|
revisionHandles: ["Compare headings, list/quote use, section size, and opening structure."],
|
|
23
42
|
};
|
|
24
43
|
}
|
|
44
|
+
|
|
45
|
+
// Detects recurring document-template "tics" — the formatting habits that make a
|
|
46
|
+
// writer's pieces look copy-pasted regardless of topic. These are the visible
|
|
47
|
+
// signature: single-sentence paragraphs, Title-Case headings, a pull-quote in the
|
|
48
|
+
// lede, and a bullet list in nearly every section. Reported as corpus-wide rates so
|
|
49
|
+
// guidance can tell the model to vary them deliberately rather than reproduce the same
|
|
50
|
+
// skeleton every time.
|
|
51
|
+
function templateTics(documents) {
|
|
52
|
+
const proseParagraphs = documents.flatMap((document) =>
|
|
53
|
+
document.blocks.filter((block) => block.type === "paragraph"));
|
|
54
|
+
const singleSentenceParagraphs = proseParagraphs.filter((block) =>
|
|
55
|
+
splitSentences(block.lines.join(" ")).length <= 1).length;
|
|
56
|
+
|
|
57
|
+
const allHeadings = documents.flatMap((document) => document.headings);
|
|
58
|
+
const titleCaseHeadings = allHeadings.filter((heading) => isTitleCase(heading.text)).length;
|
|
59
|
+
|
|
60
|
+
// A blockquote in the lede = a quote block before the first heading.
|
|
61
|
+
const ledeBlockquoteDocuments = documents.filter((document) =>
|
|
62
|
+
document.blocks.some((block) => block.type === "quote" && block.headingId == null)).length;
|
|
63
|
+
|
|
64
|
+
// Sections (heading + its blocks) that contain at least one list.
|
|
65
|
+
const headedSections = documents.flatMap((document) =>
|
|
66
|
+
document.sections.filter((section) => section.heading));
|
|
67
|
+
const sectionsWithList = headedSections.filter((section) =>
|
|
68
|
+
section.blocks.some((block) => block.type === "list")).length;
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
singleSentenceParagraphRate: rate(singleSentenceParagraphs, proseParagraphs.length, 2),
|
|
72
|
+
titleCaseHeadingRate: rate(titleCaseHeadings, allHeadings.length, 2),
|
|
73
|
+
ledeBlockquoteRate: rate(ledeBlockquoteDocuments, documents.length, 2),
|
|
74
|
+
sectionListRate: rate(sectionsWithList, headedSections.length, 2),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// The set of Markdown formatting devices a corpus actually reaches for. Reported as
|
|
79
|
+
// per-document usage rates so guidance can name what the corpus leans on and which
|
|
80
|
+
// devices it underuses — Markdown offers far more range than blockquote + bullet list.
|
|
81
|
+
const FORMATTING_DEVICES = {
|
|
82
|
+
blockquote: /^>\s+/m,
|
|
83
|
+
bulletList: /^[ \t]*[-*+]\s+/m,
|
|
84
|
+
orderedList: /^[ \t]*\d+[.)]\s+/m,
|
|
85
|
+
nestedList: /^[ \t]+[-*+\d]/m,
|
|
86
|
+
subHeading: /^#{3,6}\s+/m,
|
|
87
|
+
table: /^\|.*\|\s*$/m,
|
|
88
|
+
codeBlock: /^(```|~~~)/m,
|
|
89
|
+
inlineCode: /(^|[^`])`[^`\n]+`/m,
|
|
90
|
+
boldInline: /\*\*[^*\n]+\*\*|__[^_\n]+__/m,
|
|
91
|
+
italicInline: /(^|[^*_])[*_][^*_\n]+[*_]/m,
|
|
92
|
+
link: /\[[^\]]+\]\([^)]+\)/m,
|
|
93
|
+
horizontalRule: /^(\s*([-*_])\s*){3,}$/m,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
function formattingPalette(documents) {
|
|
97
|
+
const palette = {};
|
|
98
|
+
for (const [device, pattern] of Object.entries(FORMATTING_DEVICES)) {
|
|
99
|
+
const used = documents.filter((document) => {
|
|
100
|
+
const raw = stripFrontmatter(document.raw || "");
|
|
101
|
+
// codeBlock is the fence itself; every other device is checked outside fences.
|
|
102
|
+
return pattern.test(device === "codeBlock" ? raw : stripCodeFences(raw));
|
|
103
|
+
}).length;
|
|
104
|
+
palette[device] = rate(used, documents.length, 2);
|
|
105
|
+
}
|
|
106
|
+
return palette;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Ignore device markers that appear only inside fenced code so a corpus of code-heavy
|
|
110
|
+
// posts is not credited with using tables/lists it merely quoted.
|
|
111
|
+
function stripCodeFences(raw) {
|
|
112
|
+
return raw.replace(/```[\s\S]*?```/g, "\n").replace(/~~~[\s\S]*?~~~/g, "\n");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Drop leading YAML front matter so its `---` fence and indented ` - tag` lines are not
|
|
116
|
+
// counted as section dividers or nested lists.
|
|
117
|
+
function stripFrontmatter(raw) {
|
|
118
|
+
return raw.replace(/^?\s*---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function isTitleCase(text) {
|
|
122
|
+
const words = String(text ?? "").split(/\s+/).filter((word) => /[a-z]/i.test(word));
|
|
123
|
+
if (words.length < 2) {
|
|
124
|
+
return false;
|
|
125
|
+
}
|
|
126
|
+
const significant = words.filter((word) => word.length > 3);
|
|
127
|
+
if (significant.length < 2) {
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
const capitalized = significant.filter((word) => /^[A-Z]/.test(word)).length;
|
|
131
|
+
return capitalized / significant.length >= 0.8;
|
|
132
|
+
}
|