dravoice 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +102 -36
- package/bin/dravoice.js +11 -10
- package/package.json +47 -45
- package/src/index.js +874 -197
- package/src/v2/analyzers/discourse.js +63 -52
- package/src/v2/analyzers/evidence.js +73 -38
- package/src/v2/analyzers/lexical.js +114 -58
- package/src/v2/analyzers/register.js +46 -34
- package/src/v2/analyzers/rhetorical-shape.js +59 -48
- package/src/v2/analyzers/rhythm.js +39 -47
- package/src/v2/analyzers/structure.js +24 -24
- package/src/v2/benchmark.js +574 -568
- package/src/v2/brief.js +154 -146
- package/src/v2/config.js +78 -0
- package/src/v2/document-model.js +351 -260
- package/src/v2/inspect.js +67 -67
- package/src/v2/io-utils.js +51 -0
- package/src/v2/profile.js +155 -129
- package/src/v2/prompt.js +65 -64
- package/src/v2/review.js +177 -219
- package/src/v2/revise-plan.js +130 -33
- package/src/v2/stylometry.js +123 -17
- package/src/v2/text-utils.js +123 -123
package/src/v2/revise-plan.js
CHANGED
|
@@ -1,18 +1,16 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
1
|
import path from "node:path";
|
|
3
2
|
import { evidenceTypes, isAbstractClaim } from "./analyzers/evidence.js";
|
|
4
3
|
import { transitionLabel } from "./analyzers/discourse.js";
|
|
5
4
|
import { moveFor } from "./analyzers/rhetorical-shape.js";
|
|
6
5
|
import { parseDocument } from "./document-model.js";
|
|
6
|
+
import { readUtf8FileBounded } from "./io-utils.js";
|
|
7
7
|
import { buildVoiceProfileV2, loadVoicePackV2 } from "./profile.js";
|
|
8
8
|
import { clampScore, round } from "./text-utils.js";
|
|
9
9
|
import {
|
|
10
10
|
STYLOMETRIC_REFERENCES,
|
|
11
|
-
|
|
12
|
-
defaultStyleThresholds,
|
|
13
|
-
distanceByFamily,
|
|
14
|
-
familyScoreFromDistance,
|
|
11
|
+
familyDiagnosticsFor,
|
|
15
12
|
familyWeight,
|
|
13
|
+
styleDistanceFromDiagnostics,
|
|
16
14
|
} from "./stylometry.js";
|
|
17
15
|
|
|
18
16
|
const MAX_ACTIONS = 8;
|
|
@@ -29,18 +27,20 @@ const EDITABILITY = {
|
|
|
29
27
|
|
|
30
28
|
export function revisePlanDraftV2({ file, voice, cwd = process.cwd(), maxActions = MAX_ACTIONS }) {
|
|
31
29
|
const sourceProfile = typeof voice === "string" ? loadVoicePackV2(voice) : voice;
|
|
32
|
-
const filePath =
|
|
30
|
+
const filePath = resolvePath(cwd, file);
|
|
33
31
|
const draftDocument = parseDocument({
|
|
34
32
|
filePath,
|
|
35
33
|
rootDir: cwd,
|
|
36
|
-
contents:
|
|
34
|
+
contents: readUtf8FileBounded(filePath, { label: "Draft file", maxBytes: 2 * 1024 * 1024 }),
|
|
37
35
|
});
|
|
38
36
|
const draftProfile = buildVoiceProfileV2({ documents: [draftDocument] });
|
|
39
37
|
const familyDiagnostics = familyDiagnosticsFor(sourceProfile, draftProfile);
|
|
38
|
+
const rollingWindows = rollingWindowsFor({ sourceProfile, draftDocument });
|
|
40
39
|
const actions = rankedActions({
|
|
41
40
|
sourceProfile,
|
|
42
41
|
draftDocument,
|
|
43
42
|
familyDiagnostics,
|
|
43
|
+
rollingWindows,
|
|
44
44
|
maxActions,
|
|
45
45
|
});
|
|
46
46
|
|
|
@@ -55,11 +55,12 @@ export function revisePlanDraftV2({ file, voice, cwd = process.cwd(), maxActions
|
|
|
55
55
|
},
|
|
56
56
|
summary: {
|
|
57
57
|
corpusConfidence: sourceProfile.source.confidence,
|
|
58
|
-
distance:
|
|
58
|
+
distance: styleDistanceFromDiagnostics(familyDiagnostics),
|
|
59
59
|
familyScores: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.score])),
|
|
60
60
|
familyDistances: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.distance])),
|
|
61
61
|
familyDrift: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.drift])),
|
|
62
62
|
thresholds: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.threshold])),
|
|
63
|
+
rollingWindows,
|
|
63
64
|
},
|
|
64
65
|
actions,
|
|
65
66
|
};
|
|
@@ -102,35 +103,14 @@ export function renderRevisePlanV2(plan) {
|
|
|
102
103
|
return lines.join("\n");
|
|
103
104
|
}
|
|
104
105
|
|
|
105
|
-
function
|
|
106
|
-
const thresholds = thresholdMap(sourceProfile);
|
|
107
|
-
return Object.fromEntries(Object.keys(sourceProfile.families).map((family) => {
|
|
108
|
-
const distance = distanceByFamily(
|
|
109
|
-
family,
|
|
110
|
-
sourceProfile.families[family].features,
|
|
111
|
-
draftProfile.families[family].features,
|
|
112
|
-
);
|
|
113
|
-
const threshold = thresholds[family]?.threshold ?? defaultStyleThresholds()[family]?.threshold ?? 0.4;
|
|
114
|
-
return [family, {
|
|
115
|
-
distance,
|
|
116
|
-
threshold,
|
|
117
|
-
drift: calibratedFamilyDrift(distance, threshold),
|
|
118
|
-
score: familyScoreFromDistance(distance, threshold),
|
|
119
|
-
}];
|
|
120
|
-
}));
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
function thresholdMap(sourceProfile) {
|
|
124
|
-
return sourceProfile.calibration?.styleThresholds?.families ?? defaultStyleThresholds();
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
function rankedActions({ sourceProfile, draftDocument, familyDiagnostics, maxActions }) {
|
|
106
|
+
function rankedActions({ sourceProfile, draftDocument, familyDiagnostics, rollingWindows, maxActions }) {
|
|
128
107
|
const confidence = confidenceWeight(sourceProfile.source.confidence.band);
|
|
129
108
|
const actions = [
|
|
130
109
|
...evidenceActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
131
110
|
...rhythmActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
132
111
|
...shapeActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
133
112
|
...discourseActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
113
|
+
...rollingWindowActions({ rollingWindows, confidence }),
|
|
134
114
|
...documentLevelActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
135
115
|
].filter((action) => action.actionScore > 0);
|
|
136
116
|
|
|
@@ -165,6 +145,7 @@ function evidenceActions({ sourceProfile, draftDocument, familyDiagnostics, conf
|
|
|
165
145
|
unit: { type: "sentence", line: sentence.line },
|
|
166
146
|
confidence,
|
|
167
147
|
drift,
|
|
148
|
+
stability: familyDiagnostics[family]?.stability,
|
|
168
149
|
localMismatch,
|
|
169
150
|
why: "This sentence carries a broad claim pattern without the concrete support rate learned from the source corpus.",
|
|
170
151
|
reviseBy: "Add concrete support: a scene, quote, number, date, citation, URL, sensory detail, or specific example the writer can verify.",
|
|
@@ -194,6 +175,7 @@ function rhythmActions({ sourceProfile, draftDocument, familyDiagnostics, confid
|
|
|
194
175
|
unit: { type: "sentence", line: sentence.line },
|
|
195
176
|
confidence,
|
|
196
177
|
drift,
|
|
178
|
+
stability: familyDiagnostics[family]?.stability,
|
|
197
179
|
localMismatch,
|
|
198
180
|
why: `This sentence is ${direction} than the learned sentence-length band (${source.p25}-${source.p75} words).`,
|
|
199
181
|
reviseBy: "Adjust sentence pacing toward the learned range by splitting, tightening, or pairing it with a deliberately shorter sentence.",
|
|
@@ -221,6 +203,7 @@ function shapeActions({ sourceProfile, draftDocument, familyDiagnostics, confide
|
|
|
221
203
|
unit: { type: "opening", line: draftDocument.sentences[0].line },
|
|
222
204
|
confidence,
|
|
223
205
|
drift,
|
|
206
|
+
stability: familyDiagnostics[family]?.stability,
|
|
224
207
|
localMismatch,
|
|
225
208
|
why: `Draft opening moves (${draftOpening.join(" -> ")}) drift from the learned opening pattern (${sourceOpening.join(" -> ")}).`,
|
|
226
209
|
reviseBy: "Rework the opening toward a compatible scene, claim, contrast, reflection, or evidence sequence without inventing new facts.",
|
|
@@ -252,6 +235,7 @@ function discourseActions({ sourceProfile, draftDocument, familyDiagnostics, con
|
|
|
252
235
|
unit: { type: "sentence", line: sentence.line },
|
|
253
236
|
confidence,
|
|
254
237
|
drift,
|
|
238
|
+
stability: familyDiagnostics[family]?.stability,
|
|
255
239
|
localMismatch: Math.min(1, localMismatch),
|
|
256
240
|
why: `The draft overuses ${label} transitions compared with the source corpus.`,
|
|
257
241
|
reviseBy: "Vary the sentence turn: replace a repeated transition with a callback, concrete example, or direct continuation where it fits.",
|
|
@@ -273,6 +257,7 @@ function documentLevelActions({ sourceProfile, draftDocument, familyDiagnostics,
|
|
|
273
257
|
unit: { type: "document", line: draftDocument.sentences[0]?.line ?? 1 },
|
|
274
258
|
confidence,
|
|
275
259
|
drift,
|
|
260
|
+
stability: familyDiagnostics[family]?.stability,
|
|
276
261
|
localMismatch: 0.7,
|
|
277
262
|
why: documentLevelWhy(sourceProfile, family),
|
|
278
263
|
reviseBy: documentLevelReviseBy(family),
|
|
@@ -301,13 +286,121 @@ function documentLevelReviseBy(family) {
|
|
|
301
286
|
return "Reorder the first section so the piece starts with a structure the source corpus actually uses.";
|
|
302
287
|
}
|
|
303
288
|
|
|
304
|
-
function
|
|
289
|
+
function rollingWindowsFor({ sourceProfile, draftDocument }) {
|
|
290
|
+
const sentences = draftDocument.sentences;
|
|
291
|
+
if (sentences.length < 5) {
|
|
292
|
+
return [];
|
|
293
|
+
}
|
|
294
|
+
const windowSize = sentences.length < 8 ? 3 : 4;
|
|
295
|
+
const windowStarts = rollingWindowStarts(sentences.length, windowSize, 2);
|
|
296
|
+
const result = [];
|
|
297
|
+
for (const start of windowStarts) {
|
|
298
|
+
const windowSentences = sentences.slice(start, start + windowSize);
|
|
299
|
+
const windowProfile = buildVoiceProfileV2({ documents: [documentForSentences(draftDocument, windowSentences, start)] });
|
|
300
|
+
const diagnostics = familyDiagnosticsFor(sourceProfile, windowProfile);
|
|
301
|
+
const ranked = ["evidence", "rhythm", "discourse", "rhetoricalShape", "lexical"]
|
|
302
|
+
.map((family) => ({ family, ...diagnostics[family] }))
|
|
303
|
+
.sort((left, right) => right.drift - left.drift || (100 - right.score) - (100 - left.score));
|
|
304
|
+
const best = ranked[0];
|
|
305
|
+
if (best?.drift > 0) {
|
|
306
|
+
result.push({
|
|
307
|
+
family: best.family,
|
|
308
|
+
startSentence: start + 1,
|
|
309
|
+
endSentence: start + windowSentences.length,
|
|
310
|
+
startLine: windowSentences[0].line,
|
|
311
|
+
endLine: windowSentences.at(-1).line,
|
|
312
|
+
distance: best.distance,
|
|
313
|
+
drift: best.drift,
|
|
314
|
+
score: best.score,
|
|
315
|
+
threshold: best.threshold,
|
|
316
|
+
stability: best.stability,
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return result
|
|
321
|
+
.sort((left, right) => right.drift - left.drift || left.startLine - right.startLine)
|
|
322
|
+
.slice(0, 4);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function rollingWindowStarts(sentenceCount, windowSize, stride) {
|
|
326
|
+
const starts = [];
|
|
327
|
+
for (let start = 0; start <= sentenceCount - windowSize; start += stride) {
|
|
328
|
+
starts.push(start);
|
|
329
|
+
}
|
|
330
|
+
const finalStart = Math.max(0, sentenceCount - windowSize);
|
|
331
|
+
if (!starts.includes(finalStart)) {
|
|
332
|
+
starts.push(finalStart);
|
|
333
|
+
}
|
|
334
|
+
return starts;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function documentForSentences(draftDocument, sentences, windowIndex) {
|
|
338
|
+
const text = sentences.map((sentence) => sentence.text).join(" ");
|
|
339
|
+
const block = {
|
|
340
|
+
type: "paragraph",
|
|
341
|
+
line: sentences[0]?.line ?? 1,
|
|
342
|
+
heading: null,
|
|
343
|
+
headingId: null,
|
|
344
|
+
headingDepth: 0,
|
|
345
|
+
lines: [text],
|
|
346
|
+
};
|
|
347
|
+
return {
|
|
348
|
+
file: `${draftDocument.file ?? "draft"}#window-${windowIndex + 1}`,
|
|
349
|
+
path: draftDocument.path,
|
|
350
|
+
headings: [],
|
|
351
|
+
sections: [{ heading: null, blocks: [block] }],
|
|
352
|
+
blocks: [block],
|
|
353
|
+
paragraphs: [{
|
|
354
|
+
type: "paragraph",
|
|
355
|
+
line: block.line,
|
|
356
|
+
heading: null,
|
|
357
|
+
headingId: null,
|
|
358
|
+
text,
|
|
359
|
+
}],
|
|
360
|
+
sentences,
|
|
361
|
+
wordCount: sentences.reduce((sum, sentence) => sum + sentence.tokens.length, 0),
|
|
362
|
+
text,
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function rollingWindowActions({ rollingWindows, confidence }) {
|
|
367
|
+
return rollingWindows.map((window, index) => makeAction({
|
|
368
|
+
family: window.family,
|
|
369
|
+
ordinal: `window-${index + 1}`,
|
|
370
|
+
priority: window.family === "evidence" ? "review" : "consider",
|
|
371
|
+
unit: { type: "window", line: window.startLine, endLine: window.endLine },
|
|
372
|
+
confidence,
|
|
373
|
+
drift: window.drift,
|
|
374
|
+
stability: window.stability,
|
|
375
|
+
localMismatch: Math.min(1, window.drift / Math.max(1, window.drift + 0.5)),
|
|
376
|
+
why: `Sentences ${window.startSentence}-${window.endSentence} show localized ${window.family} drift beyond the writer's calibrated range.`,
|
|
377
|
+
reviseBy: rollingWindowReviseBy(window.family),
|
|
378
|
+
}));
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function rollingWindowReviseBy(family) {
|
|
382
|
+
if (family === "evidence") {
|
|
383
|
+
return "Add or move concrete support into this local passage, or narrow the unsupported claims in the same window.";
|
|
384
|
+
}
|
|
385
|
+
if (family === "rhythm") {
|
|
386
|
+
return "Revise this passage's sentence and paragraph pacing before changing the whole draft.";
|
|
387
|
+
}
|
|
388
|
+
if (family === "discourse") {
|
|
389
|
+
return "Vary the local sentence turns, callbacks, and transitions in this passage.";
|
|
390
|
+
}
|
|
391
|
+
if (family === "rhetoricalShape") {
|
|
392
|
+
return "Adjust this passage's move sequence so the local claim, turn, evidence, and implication pattern is less abrupt.";
|
|
393
|
+
}
|
|
394
|
+
return "Revise this local passage for style fit before making document-wide lexical changes.";
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
function makeAction({ family, ordinal, priority, unit, confidence, drift, stability = 0.7, localMismatch, why, reviseBy }) {
|
|
305
398
|
return {
|
|
306
399
|
id: `v2.revise-plan.${family}.${ordinal}`,
|
|
307
400
|
family,
|
|
308
401
|
priority,
|
|
309
402
|
unit,
|
|
310
|
-
actionScore: clampScore(100 * confidence * familyWeight(family) * (EDITABILITY[family] ?? 0.6) * drift * localMismatch),
|
|
403
|
+
actionScore: clampScore(100 * confidence * familyWeight(family) * (EDITABILITY[family] ?? 0.6) * Math.max(0.35, stability) * drift * localMismatch),
|
|
311
404
|
localMismatch: round(localMismatch, 3),
|
|
312
405
|
why,
|
|
313
406
|
reviseBy,
|
|
@@ -335,6 +428,10 @@ function displayPath(filePath, cwd) {
|
|
|
335
428
|
return filePath.split(path.sep).join("/");
|
|
336
429
|
}
|
|
337
430
|
|
|
431
|
+
function resolvePath(cwd, value) {
|
|
432
|
+
return path.isAbsolute(value) ? value : path.resolve(cwd, value);
|
|
433
|
+
}
|
|
434
|
+
|
|
338
435
|
function capitalize(value) {
|
|
339
436
|
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
340
437
|
}
|
package/src/v2/stylometry.js
CHANGED
|
@@ -31,7 +31,7 @@ const FAMILY_WEIGHTS = {
|
|
|
31
31
|
export function defaultStyleThresholds() {
|
|
32
32
|
return Object.fromEntries(Object.entries(DEFAULT_THRESHOLDS).map(([family, threshold]) => [
|
|
33
33
|
family,
|
|
34
|
-
{ threshold, observations: 0 },
|
|
34
|
+
{ threshold, observations: 0, stability: 0.45 },
|
|
35
35
|
]));
|
|
36
36
|
}
|
|
37
37
|
|
|
@@ -59,14 +59,14 @@ export function distanceByFamily(family, sourceFeatures, draftFeatures) {
|
|
|
59
59
|
return evidenceDistance(sourceFeatures, draftFeatures);
|
|
60
60
|
}
|
|
61
61
|
if (family === "structure") {
|
|
62
|
-
return
|
|
62
|
+
return structureDistance(sourceFeatures, draftFeatures);
|
|
63
63
|
}
|
|
64
64
|
return 0;
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
export function familyScoreFromDistance(distance, threshold) {
|
|
68
|
-
const
|
|
69
|
-
return clampScore(100
|
|
68
|
+
const ratio = distance / Math.max(0.01, threshold);
|
|
69
|
+
return clampScore(100 / (1 + Math.exp(3 * (ratio - 1.35))));
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
export function calibratedFamilyDrift(distance, threshold) {
|
|
@@ -82,6 +82,46 @@ export function percentile(values, ratio) {
|
|
|
82
82
|
return sorted[index];
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
+
export function familyDiagnosticsFor(sourceProfile, draftProfile) {
|
|
86
|
+
const thresholds = sourceProfile.calibration?.styleThresholds?.families ?? defaultStyleThresholds();
|
|
87
|
+
return Object.fromEntries(Object.keys(sourceProfile.families).map((family) => {
|
|
88
|
+
const distance = distanceByFamily(
|
|
89
|
+
family,
|
|
90
|
+
sourceProfile.families[family].features,
|
|
91
|
+
draftProfile.families[family].features,
|
|
92
|
+
);
|
|
93
|
+
const thresholdData = thresholds[family] ?? defaultStyleThresholds()[family] ?? { threshold: 0.4, stability: 0.45 };
|
|
94
|
+
const threshold = thresholdData.threshold ?? 0.4;
|
|
95
|
+
return [family, {
|
|
96
|
+
distance,
|
|
97
|
+
threshold,
|
|
98
|
+
stability: thresholdData.stability ?? stabilityFromObservationCount(thresholdData.observations ?? 0),
|
|
99
|
+
drift: calibratedFamilyDrift(distance, threshold),
|
|
100
|
+
score: familyScoreFromDistance(distance, threshold),
|
|
101
|
+
}];
|
|
102
|
+
}));
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export function styleDistanceFromDiagnostics(familyDiagnostics) {
|
|
106
|
+
const entries = Object.entries(familyDiagnostics);
|
|
107
|
+
const weighted = entries.map(([family, item]) => [
|
|
108
|
+
100 - item.score,
|
|
109
|
+
familyWeight(family) * Math.max(0.35, item.stability ?? 0.45),
|
|
110
|
+
]);
|
|
111
|
+
return Math.round(weightedMean(weighted));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export function stabilityFromDistances(values) {
|
|
115
|
+
const finite = values.filter((value) => Number.isFinite(value));
|
|
116
|
+
if (finite.length < 2) {
|
|
117
|
+
return stabilityFromObservationCount(finite.length);
|
|
118
|
+
}
|
|
119
|
+
const mean = finite.reduce((sum, value) => sum + value, 0) / finite.length;
|
|
120
|
+
const variance = finite.reduce((sum, value) => sum + (value - mean) ** 2, 0) / finite.length;
|
|
121
|
+
const coefficient = Math.sqrt(variance) / Math.max(0.01, mean);
|
|
122
|
+
return round(Math.max(0.35, Math.min(1, 1 - coefficient)), 3);
|
|
123
|
+
}
|
|
124
|
+
|
|
85
125
|
function rhythmDistance(source, draft) {
|
|
86
126
|
return weightedMean([
|
|
87
127
|
[distributionDelta(source.sentenceWords, draft.sentenceWords), 0.40],
|
|
@@ -95,7 +135,12 @@ function rhythmDistance(source, draft) {
|
|
|
95
135
|
function lexicalDistance(source, draft) {
|
|
96
136
|
return weightedMean([
|
|
97
137
|
[topItemDistance(source.functionWords, draft.functionWords), 1.00],
|
|
98
|
-
[topItemDistance(source.
|
|
138
|
+
[topItemDistance(source.functionWordBigrams, draft.functionWordBigrams), 0.75],
|
|
139
|
+
[topItemDistance(source.maskedCharacterFourgrams, draft.maskedCharacterFourgrams), 1.00],
|
|
140
|
+
[topItemDistance(source.characterTrigrams, draft.characterTrigrams), 0.45],
|
|
141
|
+
[topItemDistance(source.sentenceInitialTokens, draft.sentenceInitialTokens), 0.35],
|
|
142
|
+
[topItemDistance(source.sentenceFinalTokens, draft.sentenceFinalTokens), 0.35],
|
|
143
|
+
[topItemDistance(source.punctuationNgrams, draft.punctuationNgrams), 0.45],
|
|
99
144
|
[punctuationDistance(source.punctuation, draft.punctuation), 0.85],
|
|
100
145
|
[Math.abs((source.vocabularyRichness?.contentTypeTokenRatio ?? 0) - (draft.vocabularyRichness?.contentTypeTokenRatio ?? 0)), 0.25],
|
|
101
146
|
[distributionDelta(source.wordLength, draft.wordLength), 0.40],
|
|
@@ -114,15 +159,18 @@ function discourseDistance(source, draft) {
|
|
|
114
159
|
const transitionDelta = rateMapDistance(source.transitionRates, draft.transitionRates);
|
|
115
160
|
const callbackDelta = Math.abs((source.sentenceCallbacks ?? 0) - (draft.sentenceCallbacks ?? 0));
|
|
116
161
|
return weightedMean([
|
|
117
|
-
[transitionDelta, 0.
|
|
118
|
-
[
|
|
162
|
+
[transitionDelta, 0.55],
|
|
163
|
+
[topItemDistance(source.transitionBigrams, draft.transitionBigrams), 0.25],
|
|
164
|
+
[topItemDistance(source.transitionTrigrams, draft.transitionTrigrams), 0.10],
|
|
165
|
+
[callbackDelta, 0.20],
|
|
119
166
|
]);
|
|
120
167
|
}
|
|
121
168
|
|
|
122
169
|
function evidenceDistance(source, draft) {
|
|
123
170
|
return weightedMean([
|
|
124
|
-
[Math.abs((source.evidenceSentenceRate ?? 0) - (draft.evidenceSentenceRate ?? 0)), 0.
|
|
125
|
-
[Math.abs((source.claimSentenceRate ?? 0) - (draft.claimSentenceRate ?? 0)), 0.
|
|
171
|
+
[Math.abs((source.evidenceSentenceRate ?? 0) - (draft.evidenceSentenceRate ?? 0)), 0.30],
|
|
172
|
+
[Math.abs((source.claimSentenceRate ?? 0) - (draft.claimSentenceRate ?? 0)), 0.14],
|
|
173
|
+
[Math.max(0, (source.supportedClaimRate ?? 0) - (draft.supportedClaimRate ?? 0)), 0.20],
|
|
126
174
|
[Math.max(0, (draft.unsupportedClaimRate ?? 0) - (source.unsupportedClaimRate ?? 0)), 0.26],
|
|
127
175
|
[topItemDistance(source.evidenceTypes, draft.evidenceTypes), 0.20],
|
|
128
176
|
]);
|
|
@@ -130,9 +178,21 @@ function evidenceDistance(source, draft) {
|
|
|
130
178
|
|
|
131
179
|
function shapeDistance(source, draft) {
|
|
132
180
|
return weightedMean([
|
|
133
|
-
[sequenceDistance(source.openingMoves, draft.openingMoves), 0.
|
|
134
|
-
[topItemDistance(source.
|
|
135
|
-
[topItemDistance(source.
|
|
181
|
+
[sequenceDistance(source.openingMoves, draft.openingMoves), 0.35],
|
|
182
|
+
[topItemDistance(source.openingMovePatterns, draft.openingMovePatterns), 0.25],
|
|
183
|
+
[topItemDistance(source.moveRates, draft.moveRates), 0.20],
|
|
184
|
+
[topItemDistance(source.moveBigrams ?? source.commonSequences, draft.moveBigrams ?? draft.commonSequences), 0.25],
|
|
185
|
+
[topItemDistance(source.moveTrigrams, draft.moveTrigrams), 0.15],
|
|
186
|
+
]);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function structureDistance(source, draft) {
|
|
190
|
+
return weightedMean([
|
|
191
|
+
[distributionDelta(source.sectionWords, draft.sectionWords), 0.35],
|
|
192
|
+
[distributionDelta(source.headingCount, draft.headingCount), 0.20],
|
|
193
|
+
[Math.abs((source.listDocumentRate ?? 0) - (draft.listDocumentRate ?? 0)), 0.18],
|
|
194
|
+
[Math.abs((source.quoteDocumentRate ?? 0) - (draft.quoteDocumentRate ?? 0)), 0.12],
|
|
195
|
+
[sequenceDistance(source.openingMoves, draft.openingMoves), 0.15],
|
|
136
196
|
]);
|
|
137
197
|
}
|
|
138
198
|
|
|
@@ -160,14 +220,22 @@ function rateMapDistance(source = {}, draft = {}) {
|
|
|
160
220
|
return keys.reduce((sum, key) => sum + Math.abs((source[key] ?? 0) - (draft[key] ?? 0)), 0) / keys.length;
|
|
161
221
|
}
|
|
162
222
|
|
|
163
|
-
function topItemDistance(sourceItems
|
|
223
|
+
function topItemDistance(sourceItems, draftItems) {
|
|
224
|
+
if (!Array.isArray(sourceItems) || !Array.isArray(draftItems)) {
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
if (sourceItems.length === 0 && draftItems.length === 0) {
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
164
230
|
const source = normalizedItemMap(sourceItems);
|
|
165
231
|
const draft = normalizedItemMap(draftItems);
|
|
166
232
|
const delta = rateMapDistance(source, draft);
|
|
167
233
|
const cosine = cosineDistance(source, draft);
|
|
234
|
+
const jsd = jensenShannonDistance(source, draft);
|
|
168
235
|
return weightedMean([
|
|
169
|
-
[delta, 0.
|
|
170
|
-
[cosine, 0.
|
|
236
|
+
[delta, 0.30],
|
|
237
|
+
[cosine, 0.45],
|
|
238
|
+
[jsd, 0.25],
|
|
171
239
|
]);
|
|
172
240
|
}
|
|
173
241
|
|
|
@@ -204,6 +272,30 @@ function cosineDistance(left, right) {
|
|
|
204
272
|
return Math.max(0, Math.min(1, 1 - dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm))));
|
|
205
273
|
}
|
|
206
274
|
|
|
275
|
+
function jensenShannonDistance(left, right) {
|
|
276
|
+
const keys = Array.from(new Set([...Object.keys(left), ...Object.keys(right)]));
|
|
277
|
+
if (keys.length === 0) {
|
|
278
|
+
return 0;
|
|
279
|
+
}
|
|
280
|
+
const midpoint = {};
|
|
281
|
+
for (const key of keys) {
|
|
282
|
+
midpoint[key] = ((left[key] ?? 0) + (right[key] ?? 0)) / 2;
|
|
283
|
+
}
|
|
284
|
+
return Math.sqrt((klDivergence(left, midpoint, keys) + klDivergence(right, midpoint, keys)) / 2);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function klDivergence(source, target, keys) {
|
|
288
|
+
let sum = 0;
|
|
289
|
+
for (const key of keys) {
|
|
290
|
+
const sourceValue = source[key] ?? 0;
|
|
291
|
+
const targetValue = target[key] ?? 0;
|
|
292
|
+
if (sourceValue > 0 && targetValue > 0) {
|
|
293
|
+
sum += sourceValue * Math.log2(sourceValue / targetValue);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
return sum;
|
|
297
|
+
}
|
|
298
|
+
|
|
207
299
|
function sequenceDistance(source = [], draft = []) {
|
|
208
300
|
if (source.length === 0 && draft.length === 0) {
|
|
209
301
|
return 0;
|
|
@@ -228,9 +320,23 @@ function relativeDelta(sourceValue, draftValue) {
|
|
|
228
320
|
}
|
|
229
321
|
|
|
230
322
|
function weightedMean(weightedValues) {
|
|
231
|
-
const
|
|
323
|
+
const usableValues = weightedValues.filter(([value, weight]) => Number.isFinite(value) && weight > 0);
|
|
324
|
+
const totalWeight = usableValues.reduce((sum, [, weight]) => sum + weight, 0);
|
|
232
325
|
if (totalWeight === 0) {
|
|
233
326
|
return 0;
|
|
234
327
|
}
|
|
235
|
-
return round(
|
|
328
|
+
return round(usableValues.reduce((sum, [value, weight]) => sum + value * weight, 0) / totalWeight, 3);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function stabilityFromObservationCount(observations) {
|
|
332
|
+
if (observations >= 5) {
|
|
333
|
+
return 0.85;
|
|
334
|
+
}
|
|
335
|
+
if (observations >= 3) {
|
|
336
|
+
return 0.7;
|
|
337
|
+
}
|
|
338
|
+
if (observations >= 2) {
|
|
339
|
+
return 0.6;
|
|
340
|
+
}
|
|
341
|
+
return 0.45;
|
|
236
342
|
}
|