dravoice 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -9
- package/package.json +1 -1
- package/src/index.js +106 -13
- package/src/v2/analyzers/discourse.js +7 -1
- package/src/v2/analyzers/evidence.js +3 -3
- package/src/v2/analyzers/register.js +28 -4
- package/src/v2/analyzers/rhetorical-shape.js +7 -1
- package/src/v2/analyzers/structure.js +18 -1
- package/src/v2/benchmark.js +83 -0
- package/src/v2/doctor.js +308 -0
- package/src/v2/document-model.js +77 -6
- package/src/v2/inspect.js +2 -2
- package/src/v2/profile.js +126 -11
- package/src/v2/review.js +142 -16
- package/src/v2/revise-plan.js +111 -8
- package/src/v2/stylometry.js +11 -7
package/README.md
CHANGED
|
@@ -38,18 +38,22 @@ files before it learns from them.
|
|
|
38
38
|
If your writing is already somewhere else, point Dravoice at that directory:
|
|
39
39
|
|
|
40
40
|
```bash
|
|
41
|
+
npx dravoice doctor --examples ~/writing
|
|
41
42
|
npx dravoice init --examples ~/writing
|
|
42
43
|
```
|
|
43
44
|
|
|
44
|
-
### 1. Initialize and Inspect
|
|
45
|
+
### 1. Check, Initialize, and Inspect
|
|
45
46
|
|
|
46
47
|
```bash
|
|
48
|
+
npx dravoice doctor
|
|
47
49
|
npx dravoice init
|
|
48
50
|
npx dravoice inspect
|
|
49
51
|
```
|
|
50
52
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
+
Run `doctor` before learning so missing folders, unsupported files, weak
|
|
54
|
+
corpora, duplicate-looking files, and length-imbalanced source sets get clear
|
|
55
|
+
next steps. Inspect the profile before trusting it. If the feature-family
|
|
56
|
+
summaries do not look recognizable, improve the source corpus first.
|
|
53
57
|
|
|
54
58
|
### 2. Generate Guidance and a Brief
|
|
55
59
|
|
|
@@ -68,12 +72,13 @@ npx dravoice revise-plan draft.md
|
|
|
68
72
|
npx dravoice review draft.md
|
|
69
73
|
```
|
|
70
74
|
|
|
71
|
-
`init` reads your source pieces, writes a
|
|
72
|
-
and writes `.dravoice.yml` project
|
|
73
|
-
feature families visible, `prompt` turns
|
|
74
|
-
drafting guidance, `brief` creates an
|
|
75
|
-
`revise-plan` ranks calibrated stylometric
|
|
76
|
-
reports family-level drift
|
|
75
|
+
`doctor` checks corpus readiness, `init` reads your source pieces, writes a
|
|
76
|
+
local profile in `./dravoice-voice`, and writes `.dravoice.yml` project
|
|
77
|
+
defaults. `inspect` makes the learned feature families visible, `prompt` turns
|
|
78
|
+
high-confidence observations into drafting guidance, `brief` creates an
|
|
79
|
+
evidence-first article plan, `revise-plan` ranks calibrated stylometric
|
|
80
|
+
revision actions, and `review` reports family-level drift with calibration
|
|
81
|
+
confidence.
|
|
77
82
|
|
|
78
83
|
Run `drav help init` or `drav help review` for command-specific help.
|
|
79
84
|
|
|
@@ -90,9 +95,27 @@ workflow:
|
|
|
90
95
|
|
|
91
96
|
```bash
|
|
92
97
|
npx dravoice benchmark prepare --examples ./articles --topic "A new article topic" --out ./bench-run --seed 42
|
|
98
|
+
npx dravoice benchmark prepare-many --examples ./articles --topic "A new article topic" --out ./bench-runs --runs 3 --seed 42
|
|
93
99
|
npx dravoice benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json
|
|
94
100
|
```
|
|
95
101
|
|
|
102
|
+
Benchmark reports include deterministic margins and repeat-run cautions. A
|
|
103
|
+
single run is directional evidence only. Score output includes suggested
|
|
104
|
+
follow-up seeds for repeated validation runs.
|
|
105
|
+
|
|
106
|
+
## Trust Boundaries
|
|
107
|
+
|
|
108
|
+
Dravoice profiles include per-family calibration diagnostics: threshold
|
|
109
|
+
observations, stability, minimum-evidence checks, and whether a family is usable
|
|
110
|
+
for findings. Weak corpora produce cautious guidance. Strict review can surface
|
|
111
|
+
document-level discourse, lexical, register, and structure drift, but the
|
|
112
|
+
findings remain revision guidance, not authorship proof.
|
|
113
|
+
|
|
114
|
+
V2 profiles expose marker-set register metadata, mixed-register warnings,
|
|
115
|
+
heading-depth and section-order structure signals, paragraph-localized
|
|
116
|
+
revision actions, expanded MDX scaffold filtering before analysis, and
|
|
117
|
+
library-level custom register marker sets for project-specific scoring.
|
|
118
|
+
|
|
96
119
|
## Fresh Install Smoke Test
|
|
97
120
|
|
|
98
121
|
From a packed tarball:
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -3,11 +3,13 @@ import path from "node:path";
|
|
|
3
3
|
import readline from "node:readline";
|
|
4
4
|
import {
|
|
5
5
|
prepareVoiceBenchmark,
|
|
6
|
+
prepareVoiceBenchmarkRuns,
|
|
6
7
|
renderBenchmarkReport,
|
|
7
8
|
scoreVoiceBenchmark,
|
|
8
9
|
} from "./v2/benchmark.js";
|
|
9
10
|
import { renderVoiceBriefV2, voiceArticleBriefV2 } from "./v2/brief.js";
|
|
10
11
|
import { renderInspectV2 } from "./v2/inspect.js";
|
|
12
|
+
import { diagnoseVoiceCorpusV2, renderCorpusDoctorV2 } from "./v2/doctor.js";
|
|
11
13
|
import { learnVoicePackV2, loadVoicePackV2 } from "./v2/profile.js";
|
|
12
14
|
import { voicePromptPackV2 } from "./v2/prompt.js";
|
|
13
15
|
import { renderVoiceReviewV2, reviewVoiceDraftV2 } from "./v2/review.js";
|
|
@@ -27,13 +29,15 @@ import {
|
|
|
27
29
|
export {
|
|
28
30
|
learnVoicePackV2 as learnVoicePack,
|
|
29
31
|
loadVoicePackV2 as loadVoicePack,
|
|
32
|
+
diagnoseVoiceCorpusV2 as diagnoseVoiceCorpus,
|
|
30
33
|
revisePlanDraftV2 as revisePlanDraft,
|
|
31
34
|
reviewVoiceDraftV2 as reviewVoiceDraft,
|
|
32
35
|
voicePromptPackV2 as voicePromptPack,
|
|
33
36
|
};
|
|
34
37
|
export { renderInspectV2, renderRevisePlanV2 as renderRevisePlan, renderVoiceReviewV2 as renderVoiceReview };
|
|
35
38
|
export { renderVoiceBriefV2 as renderVoiceBrief, voiceArticleBriefV2 as voiceArticleBrief };
|
|
36
|
-
export {
|
|
39
|
+
export { renderCorpusDoctorV2 as renderCorpusDoctor };
|
|
40
|
+
export { prepareVoiceBenchmark, prepareVoiceBenchmarkRuns, renderBenchmarkReport, scoreVoiceBenchmark };
|
|
37
41
|
|
|
38
42
|
const INIT_DISCOVERY_DIRS = [
|
|
39
43
|
"./articles",
|
|
@@ -128,6 +132,24 @@ export async function runCli(args, io) {
|
|
|
128
132
|
return 0;
|
|
129
133
|
}
|
|
130
134
|
|
|
135
|
+
if (command === "doctor") {
|
|
136
|
+
const { options, positional } = parseArgs(rest, ["examples", "format"], "doctor");
|
|
137
|
+
rejectPositionals(positional, "doctor");
|
|
138
|
+
const config = loadProjectConfig(io.cwd);
|
|
139
|
+
const examples = options.examples ?? config.examples ?? DEFAULT_EXAMPLES_DIR;
|
|
140
|
+
const format = formatOption(options.format, ["text", "json"], "doctor");
|
|
141
|
+
const result = diagnoseVoiceCorpusV2({
|
|
142
|
+
examplesDir: examples,
|
|
143
|
+
cwd: io.cwd,
|
|
144
|
+
});
|
|
145
|
+
if (format === "json") {
|
|
146
|
+
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
|
147
|
+
} else {
|
|
148
|
+
io.stdout.write(renderCorpusDoctorV2(result));
|
|
149
|
+
}
|
|
150
|
+
return result.exitCode;
|
|
151
|
+
}
|
|
152
|
+
|
|
131
153
|
if (command === "review") {
|
|
132
154
|
const { options, positional } = parseArgs(rest, ["voice", "mode", "format"], "review");
|
|
133
155
|
rejectUnexpectedPositionals(positional, 1, "review positional");
|
|
@@ -139,7 +161,7 @@ export async function runCli(args, io) {
|
|
|
139
161
|
}
|
|
140
162
|
const result = reviewVoiceDraftV2({
|
|
141
163
|
file: resolvePath(io.cwd, file),
|
|
142
|
-
voice:
|
|
164
|
+
voice: loadVoiceForCommand(io.cwd, options.voice, "review"),
|
|
143
165
|
cwd: io.cwd,
|
|
144
166
|
mode,
|
|
145
167
|
});
|
|
@@ -162,7 +184,7 @@ export async function runCli(args, io) {
|
|
|
162
184
|
}
|
|
163
185
|
const result = revisePlanDraftV2({
|
|
164
186
|
file: resolvePath(io.cwd, file),
|
|
165
|
-
voice:
|
|
187
|
+
voice: loadVoiceForCommand(io.cwd, options.voice, "revise-plan"),
|
|
166
188
|
cwd: io.cwd,
|
|
167
189
|
});
|
|
168
190
|
if (format === "json") {
|
|
@@ -180,7 +202,7 @@ export async function runCli(args, io) {
|
|
|
180
202
|
const promptOptions = resolvePromptOptions(io.cwd, options);
|
|
181
203
|
const format = formatOption(promptOptions.format, ["agents", "claude", "system"], "prompt");
|
|
182
204
|
const rendered = voicePromptPackV2({
|
|
183
|
-
voice:
|
|
205
|
+
voice: loadVoiceForCommand(io.cwd, options.voice, "prompt"),
|
|
184
206
|
format,
|
|
185
207
|
outPath: promptOptions.out ? resolvePath(io.cwd, promptOptions.out) : undefined,
|
|
186
208
|
});
|
|
@@ -197,7 +219,7 @@ export async function runCli(args, io) {
|
|
|
197
219
|
const { options, positional } = parseArgs(rest, ["voice", "topic", "evidence", "format", "out"], "brief");
|
|
198
220
|
const format = formatOption(options.format, ["text", "json"], "brief");
|
|
199
221
|
const result = voiceArticleBriefV2({
|
|
200
|
-
voice:
|
|
222
|
+
voice: loadVoiceForCommand(io.cwd, options.voice, "brief"),
|
|
201
223
|
topic: topicOption(options, positional, "brief"),
|
|
202
224
|
evidence: options.evidence ? resolvePath(io.cwd, options.evidence) : undefined,
|
|
203
225
|
cwd: io.cwd,
|
|
@@ -220,7 +242,7 @@ export async function runCli(args, io) {
|
|
|
220
242
|
if (command === "inspect") {
|
|
221
243
|
const { options, positional } = parseArgs(rest, ["voice"], "inspect");
|
|
222
244
|
rejectPositionals(positional, "inspect");
|
|
223
|
-
const profile =
|
|
245
|
+
const profile = loadVoiceForCommand(io.cwd, options.voice, "inspect");
|
|
224
246
|
io.stdout.write(renderInspectV2(profile));
|
|
225
247
|
io.stdout.write("Next: drav prompt --out AGENTS.md\n");
|
|
226
248
|
return 0;
|
|
@@ -263,6 +285,20 @@ function runBenchmarkCli(args, io) {
|
|
|
263
285
|
return 0;
|
|
264
286
|
}
|
|
265
287
|
|
|
288
|
+
if (benchmarkCommand === "prepare-many") {
|
|
289
|
+
const { options, positional } = parseArgs(rest, ["examples", "topic", "out", "seed", "runs"], "benchmark prepare-many");
|
|
290
|
+
rejectPositionals(positional, "benchmark prepare-many");
|
|
291
|
+
const result = prepareVoiceBenchmarkRuns({
|
|
292
|
+
examplesDir: resolvePath(io.cwd, requiredOption(options, "examples", "benchmark prepare-many")),
|
|
293
|
+
topic: requiredOption(options, "topic", "benchmark prepare-many"),
|
|
294
|
+
outDir: resolvePath(io.cwd, requiredOption(options, "out", "benchmark prepare-many")),
|
|
295
|
+
seed: options.seed ?? "1",
|
|
296
|
+
runs: options.runs ?? "3",
|
|
297
|
+
});
|
|
298
|
+
io.stdout.write(`Prepared ${result.runs.length} benchmark run(s) at ${resolvePath(io.cwd, requiredOption(options, "out", "benchmark prepare-many"))}.\n`);
|
|
299
|
+
return 0;
|
|
300
|
+
}
|
|
301
|
+
|
|
266
302
|
if (benchmarkCommand === "score") {
|
|
267
303
|
const { options, positional } = parseArgs(rest, ["run", "judge", "format"], "benchmark score");
|
|
268
304
|
rejectPositionals(positional, "benchmark score");
|
|
@@ -665,6 +701,33 @@ function resolveVoicePath(cwd, optionValue) {
|
|
|
665
701
|
return cwd;
|
|
666
702
|
}
|
|
667
703
|
|
|
704
|
+
function loadVoiceForCommand(cwd, optionValue, command) {
|
|
705
|
+
const voicePath = resolveVoicePath(cwd, optionValue);
|
|
706
|
+
try {
|
|
707
|
+
return loadVoicePackV2(voicePath);
|
|
708
|
+
} catch (error) {
|
|
709
|
+
if (/^No Dravoice V2 profile found/.test(error.message)) {
|
|
710
|
+
throw usageError(missingVoiceProfileMessage(command, voicePath), command);
|
|
711
|
+
}
|
|
712
|
+
throw error;
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
function missingVoiceProfileMessage(command, voicePath) {
|
|
717
|
+
return [
|
|
718
|
+
"No Dravoice voice profile found.",
|
|
719
|
+
`Looked for: ${displayPath(voicePath)}/profile.json`,
|
|
720
|
+
"",
|
|
721
|
+
"Run `drav doctor` first to check whether your writing folder is ready.",
|
|
722
|
+
"Then run `drav init` to create ./dravoice-voice/profile.json and .dravoice.yml.",
|
|
723
|
+
command === "inspect"
|
|
724
|
+
? "After that, run `drav inspect` again."
|
|
725
|
+
: "After that, rerun this command.",
|
|
726
|
+
"",
|
|
727
|
+
"Next: drav init",
|
|
728
|
+
].join("\n");
|
|
729
|
+
}
|
|
730
|
+
|
|
668
731
|
function resolvePromptOptions(cwd, options) {
|
|
669
732
|
const config = loadProjectConfig(cwd);
|
|
670
733
|
return {
|
|
@@ -694,20 +757,23 @@ function helpText() {
|
|
|
694
757
|
"Dravoice - local-first voice guidance for writers",
|
|
695
758
|
"",
|
|
696
759
|
"First run:",
|
|
697
|
-
"1.
|
|
760
|
+
"1. Check your writing folder",
|
|
761
|
+
" drav doctor",
|
|
762
|
+
"2. Initialize your voice profile",
|
|
698
763
|
" drav init",
|
|
699
|
-
"
|
|
764
|
+
"3. Inspect the profile before trusting it",
|
|
700
765
|
" drav inspect",
|
|
701
|
-
"
|
|
766
|
+
"4. Generate reusable drafting guidance",
|
|
702
767
|
" drav prompt --out AGENTS.md",
|
|
703
|
-
"
|
|
768
|
+
"5. Plan a grounded draft from evidence",
|
|
704
769
|
" drav brief \"New topic\" --evidence notes.md --out brief.md",
|
|
705
|
-
"
|
|
770
|
+
"6. Revise, then review",
|
|
706
771
|
" drav revise-plan draft.md",
|
|
707
772
|
" drav review draft.md",
|
|
708
773
|
"",
|
|
709
774
|
"Commands:",
|
|
710
775
|
" init Learn a profile and save project defaults in one first-run command.",
|
|
776
|
+
" doctor Check whether a writing corpus is ready to learn from.",
|
|
711
777
|
" learn Build a local voice profile from Markdown, MDX, or text examples.",
|
|
712
778
|
" inspect Show the learned profile in plain language.",
|
|
713
779
|
" prompt Render reusable LLM drafting guidance.",
|
|
@@ -719,7 +785,7 @@ function helpText() {
|
|
|
719
785
|
" drav review draft.md --mode strict --format json",
|
|
720
786
|
" drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
|
|
721
787
|
"",
|
|
722
|
-
"Run `drav help
|
|
788
|
+
"Run `drav help doctor` for command-specific help.",
|
|
723
789
|
"",
|
|
724
790
|
].join("\n");
|
|
725
791
|
}
|
|
@@ -736,6 +802,7 @@ function helpForTopic(topic) {
|
|
|
736
802
|
const EXAMPLES = {
|
|
737
803
|
help: "drav help init",
|
|
738
804
|
init: "drav init",
|
|
805
|
+
doctor: "drav doctor --examples ./articles",
|
|
739
806
|
learn: "drav learn --examples ./articles --out ./dravoice-voice",
|
|
740
807
|
inspect: "drav inspect",
|
|
741
808
|
prompt: "drav prompt --out AGENTS.md",
|
|
@@ -744,6 +811,7 @@ const EXAMPLES = {
|
|
|
744
811
|
review: "drav review draft.md --format json",
|
|
745
812
|
benchmark: "drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
|
|
746
813
|
"benchmark prepare": "drav benchmark prepare --examples ./articles --topic \"New topic\" --out ./bench-run --seed 42",
|
|
814
|
+
"benchmark prepare-many": "drav benchmark prepare-many --examples ./articles --topic \"New topic\" --out ./bench-runs --runs 3 --seed 42",
|
|
747
815
|
"benchmark score": "drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
|
|
748
816
|
};
|
|
749
817
|
|
|
@@ -785,6 +853,20 @@ const HELP_TOPICS = {
|
|
|
785
853
|
"Next: drav inspect",
|
|
786
854
|
"",
|
|
787
855
|
].join("\n"),
|
|
856
|
+
doctor: [
|
|
857
|
+
"Usage: drav doctor [--examples ./articles] [--format text]",
|
|
858
|
+
"",
|
|
859
|
+
"What it does:",
|
|
860
|
+
"Checks whether a Markdown, MDX, or plain-text writing corpus is ready to learn from before you trust a profile.",
|
|
861
|
+
"",
|
|
862
|
+
"Options:",
|
|
863
|
+
" --examples <dir> Directory with representative long-form pieces. Defaults to .dravoice.yml, then ./articles.",
|
|
864
|
+
" --format <format> text or json. Defaults to text.",
|
|
865
|
+
"",
|
|
866
|
+
`Example: ${EXAMPLES.doctor}`,
|
|
867
|
+
"Next: drav init",
|
|
868
|
+
"",
|
|
869
|
+
].join("\n"),
|
|
788
870
|
inspect: [
|
|
789
871
|
"Usage: drav inspect [--voice ./dravoice-voice]",
|
|
790
872
|
"",
|
|
@@ -871,13 +953,14 @@ const HELP_TOPICS = {
|
|
|
871
953
|
"",
|
|
872
954
|
].join("\n"),
|
|
873
955
|
benchmark: [
|
|
874
|
-
"Usage: drav benchmark <prepare|score> ...",
|
|
956
|
+
"Usage: drav benchmark <prepare|prepare-many|score> ...",
|
|
875
957
|
"",
|
|
876
958
|
"What it does:",
|
|
877
959
|
"Runs validation workflows for Dravoice development. Most writers do not need this for first use.",
|
|
878
960
|
"",
|
|
879
961
|
"Examples:",
|
|
880
962
|
` ${EXAMPLES["benchmark prepare"]}`,
|
|
963
|
+
` ${EXAMPLES["benchmark prepare-many"]}`,
|
|
881
964
|
` ${EXAMPLES["benchmark score"]}`,
|
|
882
965
|
"",
|
|
883
966
|
].join("\n"),
|
|
@@ -891,6 +974,16 @@ const HELP_TOPICS = {
|
|
|
891
974
|
"Next: fill benchmark drafts, then run drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
|
|
892
975
|
"",
|
|
893
976
|
].join("\n"),
|
|
977
|
+
"benchmark prepare-many": [
|
|
978
|
+
"Usage: drav benchmark prepare-many --examples ./articles --topic \"New topic\" --out ./bench-runs --runs 3 --seed 42",
|
|
979
|
+
"",
|
|
980
|
+
"What it does:",
|
|
981
|
+
"Creates multiple seeded benchmark run directories for repeated validation.",
|
|
982
|
+
"",
|
|
983
|
+
`Example: ${EXAMPLES["benchmark prepare-many"]}`,
|
|
984
|
+
"Next: fill each run's drafts, then score each run.",
|
|
985
|
+
"",
|
|
986
|
+
].join("\n"),
|
|
894
987
|
"benchmark score": [
|
|
895
988
|
"Usage: drav benchmark score --run ./bench-run --judge ./bench-run/judge/judgment.json",
|
|
896
989
|
"",
|
|
@@ -35,14 +35,20 @@ export function analyzeDiscourse(documents) {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
export function transitionLabel(text) {
|
|
38
|
+
const normalized = String(text ?? "");
|
|
38
39
|
for (const [label, pattern] of Object.entries(TRANSITIONS)) {
|
|
39
|
-
if (pattern.test(
|
|
40
|
+
if (pattern.test(normalized) || embeddedTransitionPattern(pattern).test(normalized)) {
|
|
40
41
|
return label;
|
|
41
42
|
}
|
|
42
43
|
}
|
|
43
44
|
return "plain";
|
|
44
45
|
}
|
|
45
46
|
|
|
47
|
+
function embeddedTransitionPattern(pattern) {
|
|
48
|
+
const source = pattern.source.replace(/^\^\(\?:/, "(?:");
|
|
49
|
+
return new RegExp(`[.;:,]\\s+${source}`, pattern.flags);
|
|
50
|
+
}
|
|
51
|
+
|
|
46
52
|
function callbackRate(sentences) {
|
|
47
53
|
let callbacks = 0;
|
|
48
54
|
for (let index = 1; index < sentences.length; index += 1) {
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { rate, topItems } from "../text-utils.js";
|
|
2
2
|
|
|
3
3
|
const EVIDENCE_PATTERNS = {
|
|
4
|
-
date: /\b\d{1,2}:\d{2}\s?(?:am|pm)?\b|\b20\d{2}-\d{2}-\d{2}\b|\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/i,
|
|
4
|
+
date: /\b\d{1,2}:\d{2}\s?(?:am|pm)?\b|\b20\d{2}-\d{2}-\d{2}\b|\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b|\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+\d{1,2},?\s+\d{4}\b/i,
|
|
5
5
|
number: /\b\d+(?:\.\d+)?\b/,
|
|
6
6
|
quote: /"[^"]+"|'[^']+'|^>/,
|
|
7
7
|
url: /https?:\/\/\S+/i,
|
|
8
|
-
citation: /\[[^\]]+\]\([^)]+\)|\([A-Z][A-Za-z]+,\s*\d{4}\)/,
|
|
9
|
-
sourceAttribution: /\b(according to|reported|observed|noted|recorded|quoted|interviewed|surveyed|field notes said|data shows|study found|the memo|the log|the report)\b/i,
|
|
8
|
+
citation: /\[[^\]]+\]\([^)]+\)|\[\^[^\]]+\]|\([A-Z][A-Za-z]+,\s*\d{4}\)/,
|
|
9
|
+
sourceAttribution: /\b(according to|reported|observed|noted|recorded|quoted|interviewed|surveyed|field notes said|data shows|study found|the memo|the log|the report|source:|internal memo)\b/i,
|
|
10
10
|
sensory: /\b(cold|warm|hot|cool|quiet|loud|bright|dark|red|blue|green|rough|smooth|sharp|soft|smelled|smell|scent|tasted|heard|sound|noise|flashed|visible|physical|rain|metal|smoke)\b/i,
|
|
11
11
|
specificExample: /\b(for example|for instance|such as|including|included|includes|sample|case in point|specifically|in one case)\b/i,
|
|
12
12
|
};
|
|
@@ -11,13 +11,23 @@ const REGISTER_MARKERS = {
|
|
|
11
11
|
formal: ["requires", "outcomes", "process", "alignment", "therefore"],
|
|
12
12
|
};
|
|
13
13
|
|
|
14
|
-
export function analyzeRegister(documents) {
|
|
14
|
+
export function analyzeRegister(documents, { markers = REGISTER_MARKERS } = {}) {
|
|
15
15
|
const text = documents.map((document) => document.text.toLowerCase()).join("\n\n");
|
|
16
16
|
const words = new Set(contentWords(text));
|
|
17
|
-
const
|
|
17
|
+
const markerSets = Object.entries(markers).map(([value, markerList]) => {
|
|
18
|
+
const matchedMarkers = markerList.filter((marker) => markerAppears(text, words, marker));
|
|
19
|
+
return {
|
|
20
|
+
value,
|
|
21
|
+
markers: markerList,
|
|
22
|
+
matchedMarkers,
|
|
23
|
+
score: rate(matchedMarkers.length, markerList.length, 2),
|
|
24
|
+
};
|
|
25
|
+
});
|
|
26
|
+
const scores = markerSets.map(({ value, score }) => ({
|
|
18
27
|
value,
|
|
19
|
-
score
|
|
28
|
+
score,
|
|
20
29
|
})).sort((left, right) => right.score - left.score || left.value.localeCompare(right.value));
|
|
30
|
+
const mixedRegister = isMixedRegister(scores);
|
|
21
31
|
|
|
22
32
|
return {
|
|
23
33
|
family: "register",
|
|
@@ -25,14 +35,28 @@ export function analyzeRegister(documents) {
|
|
|
25
35
|
features: {
|
|
26
36
|
primary: scores[0] ?? { value: "unknown", score: 0 },
|
|
27
37
|
scores,
|
|
38
|
+
markerSets,
|
|
39
|
+
mixedRegister,
|
|
28
40
|
topContentWords: topItems(contentWords(text), 12),
|
|
29
41
|
},
|
|
30
42
|
examples: scores.slice(0, 3).map((item) => `${item.value}: ${item.score}`),
|
|
31
|
-
warnings:
|
|
43
|
+
warnings: [
|
|
44
|
+
...(documents.length < 3 ? ["Register confidence is limited because the corpus has fewer than 3 documents."] : []),
|
|
45
|
+
...(mixedRegister ? ["Mixed register signals detected; treat the primary register as a weak summary of the genre mix."] : []),
|
|
46
|
+
],
|
|
32
47
|
revisionHandles: ["Check whether the draft uses the same broad register and genre mix as the corpus."],
|
|
33
48
|
};
|
|
34
49
|
}
|
|
35
50
|
|
|
51
|
+
function isMixedRegister(scores) {
|
|
52
|
+
const active = scores.filter((item) => item.score > 0);
|
|
53
|
+
if (active.length < 2) {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
const [first, second] = active;
|
|
57
|
+
return second.score >= Math.max(0.2, first.score * 0.6);
|
|
58
|
+
}
|
|
59
|
+
|
|
36
60
|
function markerAppears(text, words, marker) {
|
|
37
61
|
const normalized = marker.toLowerCase();
|
|
38
62
|
if (/^[a-z0-9'-]+$/.test(normalized)) {
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
import { evidenceTypes } from "./evidence.js";
|
|
2
2
|
import { transitionLabel } from "./discourse.js";
|
|
3
|
-
import { topItems } from "../text-utils.js";
|
|
3
|
+
import { splitSentences, topItems } from "../text-utils.js";
|
|
4
4
|
|
|
5
5
|
export function analyzeRhetoricalShape(documents) {
|
|
6
6
|
const documentMoves = documents.map((document) => document.sentences.map((sentence) => moveFor(sentence.text)));
|
|
7
7
|
const sentenceMoves = documentMoves.flat();
|
|
8
8
|
const openingMoves = documents.flatMap((document) => document.sentences.slice(0, 3).map((sentence) => moveFor(sentence.text)));
|
|
9
|
+
const paragraphMovePatterns = documents.flatMap((document) =>
|
|
10
|
+
document.paragraphs
|
|
11
|
+
.map((paragraph) => splitSentences(paragraph.text).map((sentence) => moveFor(sentence)).join(" -> "))
|
|
12
|
+
.filter(Boolean)
|
|
13
|
+
);
|
|
9
14
|
const bigrams = [];
|
|
10
15
|
const trigrams = [];
|
|
11
16
|
const openingMovePatterns = [];
|
|
@@ -28,6 +33,7 @@ export function analyzeRhetoricalShape(documents) {
|
|
|
28
33
|
moveRates: topItems(sentenceMoves, 12),
|
|
29
34
|
openingMoves: openingMoves.slice(0, 9),
|
|
30
35
|
openingMovePatterns: topItems(openingMovePatterns, 8),
|
|
36
|
+
paragraphMovePatterns: topItems(paragraphMovePatterns, 12),
|
|
31
37
|
moveBigrams: topItems(bigrams, 12),
|
|
32
38
|
moveTrigrams: topItems(trigrams, 12),
|
|
33
39
|
commonSequences: topItems(bigrams, 12),
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { distribution, rate } from "../text-utils.js";
|
|
1
|
+
import { distribution, rate, topItems } from "../text-utils.js";
|
|
2
2
|
import { moveFor } from "./rhetorical-shape.js";
|
|
3
3
|
|
|
4
4
|
export function analyzeStructure(documents) {
|
|
@@ -6,6 +6,19 @@ export function analyzeStructure(documents) {
|
|
|
6
6
|
document.sections.map((section) => section.blocks.reduce((sum, block) => sum + block.lines.join(" ").split(/\s+/).filter(Boolean).length, 0))
|
|
7
7
|
);
|
|
8
8
|
const openingMoves = documents.flatMap((document) => document.sentences.slice(0, 2).map((sentence) => moveFor(sentence.text)));
|
|
9
|
+
const sectionOrderPatterns = documents
|
|
10
|
+
.map((document) => document.headings.map((heading) => `h${heading.depth}`).join(" -> "))
|
|
11
|
+
.filter(Boolean);
|
|
12
|
+
const listPlacementPatterns = documents.flatMap((document) =>
|
|
13
|
+
document.blocks
|
|
14
|
+
.filter((block) => block.type === "list")
|
|
15
|
+
.map((block) => `h${block.headingDepth || 0}:list`)
|
|
16
|
+
);
|
|
17
|
+
const quotePlacementPatterns = documents.flatMap((document) =>
|
|
18
|
+
document.blocks
|
|
19
|
+
.filter((block) => block.type === "quote")
|
|
20
|
+
.map((block) => `h${block.headingDepth || 0}:quote`)
|
|
21
|
+
);
|
|
9
22
|
|
|
10
23
|
return {
|
|
11
24
|
family: "structure",
|
|
@@ -13,6 +26,10 @@ export function analyzeStructure(documents) {
|
|
|
13
26
|
features: {
|
|
14
27
|
sectionWords: distribution(sectionLengths),
|
|
15
28
|
headingCount: distribution(documents.map((document) => document.headings.length)),
|
|
29
|
+
maxHeadingDepth: distribution(documents.map((document) => Math.max(0, ...document.headings.map((heading) => heading.depth)))),
|
|
30
|
+
sectionOrderPatterns: topItems(sectionOrderPatterns, 12),
|
|
31
|
+
listPlacementPatterns: topItems(listPlacementPatterns, 12),
|
|
32
|
+
quotePlacementPatterns: topItems(quotePlacementPatterns, 12),
|
|
16
33
|
openingMoves,
|
|
17
34
|
listDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "list")).length, documents.length, 2),
|
|
18
35
|
quoteDocumentRate: rate(documents.filter((document) => document.blocks.some((block) => block.type === "quote")).length, documents.length, 2),
|
package/src/v2/benchmark.js
CHANGED
|
@@ -81,6 +81,40 @@ export function prepareVoiceBenchmark({ examplesDir, topic, outDir, seed = 1, cw
|
|
|
81
81
|
return benchmark;
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
+
export function prepareVoiceBenchmarkRuns({ examplesDir, topic, outDir, seed = 1, runs = 3, cwd = process.cwd() }) {
|
|
85
|
+
const runCount = normalizeRunCount(runs);
|
|
86
|
+
const normalizedSeed = normalizeSeed(seed);
|
|
87
|
+
const root = path.resolve(resolvePath(cwd, outDir));
|
|
88
|
+
const preparedRuns = [];
|
|
89
|
+
for (let index = 0; index < runCount; index += 1) {
|
|
90
|
+
const runSeed = (normalizedSeed + index) >>> 0;
|
|
91
|
+
const runName = `run-${String(index + 1).padStart(3, "0")}`;
|
|
92
|
+
const benchmark = prepareVoiceBenchmark({
|
|
93
|
+
examplesDir,
|
|
94
|
+
topic,
|
|
95
|
+
outDir: path.join(root, runName),
|
|
96
|
+
seed: runSeed,
|
|
97
|
+
cwd,
|
|
98
|
+
});
|
|
99
|
+
preparedRuns.push({
|
|
100
|
+
index: index + 1,
|
|
101
|
+
name: runName,
|
|
102
|
+
seed: runSeed,
|
|
103
|
+
path: runName,
|
|
104
|
+
corpusFileCount: benchmark.corpus.fileCount,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
const manifest = {
|
|
108
|
+
schemaVersion: BENCHMARK_SCHEMA_VERSION,
|
|
109
|
+
generatedBy: `${GENERATED_BY}-runs`,
|
|
110
|
+
topic,
|
|
111
|
+
runs: preparedRuns,
|
|
112
|
+
minimumRunsRecommended: Math.max(3, runCount),
|
|
113
|
+
};
|
|
114
|
+
writeUtf8FileSafely(path.join(root, "benchmark-runs.json"), `${JSON.stringify(manifest, null, 2)}\n`);
|
|
115
|
+
return manifest;
|
|
116
|
+
}
|
|
117
|
+
|
|
84
118
|
export function scoreVoiceBenchmark({ runDir, judgePath, judgeFile, judge, cwd = process.cwd() }) {
|
|
85
119
|
const root = path.resolve(resolvePath(cwd, runDir));
|
|
86
120
|
const resolvedJudgePath = judgePath ?? judgeFile ?? judge;
|
|
@@ -145,6 +179,15 @@ export function renderBenchmarkReport(scores) {
|
|
|
145
179
|
lines.push(`Deterministic provisional leader: ${scores.deterministicWinner.draft} (${scores.deterministicWinner.label})`);
|
|
146
180
|
}
|
|
147
181
|
lines.push("Single benchmark run is directional, not proof; compare repeated runs and family diagnostics before deciding.");
|
|
182
|
+
if (scores.comparison) {
|
|
183
|
+
lines.push("");
|
|
184
|
+
lines.push(`Deterministic comparison: ${scores.comparison.deterministicLeader} leads by ${scores.comparison.deterministicMargin} point(s).`);
|
|
185
|
+
lines.push(`Repeated runs recommended: ${scores.comparison.repeatedRunsRecommended ? "yes" : "no"}.`);
|
|
186
|
+
}
|
|
187
|
+
if (scores.repeatSummary) {
|
|
188
|
+
lines.push(`Minimum repeat runs recommended: ${scores.repeatSummary.minimumRunsRecommended}.`);
|
|
189
|
+
lines.push(`Suggested next seeds: ${scores.repeatSummary.nextSeeds.join(", ")}.`);
|
|
190
|
+
}
|
|
148
191
|
|
|
149
192
|
for (const key of ["baseline", "voiceAssisted"]) {
|
|
150
193
|
const draft = scores.drafts[key];
|
|
@@ -217,12 +260,52 @@ function benchmarkScores({ benchmark, baselineReview, voiceReview, judge, judgeP
|
|
|
217
260
|
baseline,
|
|
218
261
|
voiceAssisted,
|
|
219
262
|
},
|
|
263
|
+
comparison: benchmarkComparison({ baseline, voiceAssisted }),
|
|
264
|
+
repeatSummary: repeatSummaryFor(benchmark.seed),
|
|
220
265
|
deterministicWinner,
|
|
221
266
|
winner,
|
|
222
267
|
exitCode: 0,
|
|
223
268
|
};
|
|
224
269
|
}
|
|
225
270
|
|
|
271
|
+
function repeatSummaryFor(seed) {
|
|
272
|
+
const normalized = normalizeSeed(seed);
|
|
273
|
+
return {
|
|
274
|
+
minimumRunsRecommended: 3,
|
|
275
|
+
nextSeeds: [1, 2, 3].map((offset) => (normalized + offset) >>> 0),
|
|
276
|
+
reason: "Compare multiple topics, blind mappings, and draft pairs before treating benchmark results as product evidence.",
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function normalizeRunCount(runs) {
|
|
281
|
+
const value = String(runs);
|
|
282
|
+
if (!/^\d+$/.test(value)) {
|
|
283
|
+
throw new Error(`Invalid runs value: ${runs}`);
|
|
284
|
+
}
|
|
285
|
+
const parsed = Number(value);
|
|
286
|
+
if (!Number.isSafeInteger(parsed) || parsed < 1 || parsed > 50) {
|
|
287
|
+
throw new Error("Benchmark runs must be an integer between 1 and 50.");
|
|
288
|
+
}
|
|
289
|
+
return parsed;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function benchmarkComparison({ baseline, voiceAssisted }) {
|
|
293
|
+
const margin = roundHalfUp(Math.abs(
|
|
294
|
+
voiceAssisted.deterministic.voiceFit - baseline.deterministic.voiceFit,
|
|
295
|
+
), 2);
|
|
296
|
+
const deterministicLeader = voiceAssisted.deterministic.voiceFit === baseline.deterministic.voiceFit
|
|
297
|
+
? "tie"
|
|
298
|
+
: voiceAssisted.deterministic.voiceFit > baseline.deterministic.voiceFit
|
|
299
|
+
? "voice-assisted"
|
|
300
|
+
: "baseline";
|
|
301
|
+
return {
|
|
302
|
+
deterministicLeader,
|
|
303
|
+
deterministicMargin: margin,
|
|
304
|
+
repeatedRunsRecommended: true,
|
|
305
|
+
caution: "Single benchmark runs are directional; repeat with more topics and draft pairs before making product claims.",
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
|
|
226
309
|
function draftScore({ key, name, label, review, judge }) {
|
|
227
310
|
const deterministic = deterministicScore(review);
|
|
228
311
|
const judgeDraft = judge ? normalizeJudgeDraft(judge.drafts?.[label], label) : null;
|