dravoice 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +28 -26
- package/package.json +2 -2
- package/src/index.js +78 -56
- package/src/v2/analyzers/evidence.js +9 -5
- package/src/v2/benchmark.js +67 -136
- package/src/v2/brief.js +146 -146
- package/src/v2/profile.js +73 -25
- package/src/v2/revise-plan.js +340 -0
- package/src/v2/stylometry.js +236 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { evidenceTypes, isAbstractClaim } from "./analyzers/evidence.js";
|
|
4
|
+
import { transitionLabel } from "./analyzers/discourse.js";
|
|
5
|
+
import { moveFor } from "./analyzers/rhetorical-shape.js";
|
|
6
|
+
import { parseDocument } from "./document-model.js";
|
|
7
|
+
import { buildVoiceProfileV2, loadVoicePackV2 } from "./profile.js";
|
|
8
|
+
import { clampScore, round } from "./text-utils.js";
|
|
9
|
+
import {
|
|
10
|
+
STYLOMETRIC_REFERENCES,
|
|
11
|
+
calibratedFamilyDrift,
|
|
12
|
+
defaultStyleThresholds,
|
|
13
|
+
distanceByFamily,
|
|
14
|
+
familyScoreFromDistance,
|
|
15
|
+
familyWeight,
|
|
16
|
+
} from "./stylometry.js";
|
|
17
|
+
|
|
18
|
+
const MAX_ACTIONS = 8;
|
|
19
|
+
|
|
20
|
+
const EDITABILITY = {
|
|
21
|
+
evidence: 1.00,
|
|
22
|
+
rhetoricalShape: 0.90,
|
|
23
|
+
rhythm: 0.80,
|
|
24
|
+
discourse: 0.75,
|
|
25
|
+
lexical: 0.55,
|
|
26
|
+
register: 0.50,
|
|
27
|
+
structure: 0.60,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
export function revisePlanDraftV2({ file, voice, cwd = process.cwd(), maxActions = MAX_ACTIONS }) {
|
|
31
|
+
const sourceProfile = typeof voice === "string" ? loadVoicePackV2(voice) : voice;
|
|
32
|
+
const filePath = path.resolve(file);
|
|
33
|
+
const draftDocument = parseDocument({
|
|
34
|
+
filePath,
|
|
35
|
+
rootDir: cwd,
|
|
36
|
+
contents: fs.readFileSync(filePath, "utf8"),
|
|
37
|
+
});
|
|
38
|
+
const draftProfile = buildVoiceProfileV2({ documents: [draftDocument] });
|
|
39
|
+
const familyDiagnostics = familyDiagnosticsFor(sourceProfile, draftProfile);
|
|
40
|
+
const actions = rankedActions({
|
|
41
|
+
sourceProfile,
|
|
42
|
+
draftDocument,
|
|
43
|
+
familyDiagnostics,
|
|
44
|
+
maxActions,
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
schemaVersion: 2,
|
|
49
|
+
generatedBy: "dravoice-v2-revise-plan",
|
|
50
|
+
file: displayPath(filePath, cwd),
|
|
51
|
+
method: {
|
|
52
|
+
name: "calibrated-stylometric-revision-plan",
|
|
53
|
+
references: STYLOMETRIC_REFERENCES,
|
|
54
|
+
caution: "Revision planning only; not AI detection and not proof of authorship.",
|
|
55
|
+
},
|
|
56
|
+
summary: {
|
|
57
|
+
corpusConfidence: sourceProfile.source.confidence,
|
|
58
|
+
distance: Math.round(Object.values(familyDiagnostics).reduce((sum, item) => sum + (100 - item.score), 0) / Object.keys(familyDiagnostics).length),
|
|
59
|
+
familyScores: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.score])),
|
|
60
|
+
familyDistances: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.distance])),
|
|
61
|
+
familyDrift: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.drift])),
|
|
62
|
+
thresholds: Object.fromEntries(Object.entries(familyDiagnostics).map(([family, item]) => [family, item.threshold])),
|
|
63
|
+
},
|
|
64
|
+
actions,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function renderRevisePlanV2(plan) {
|
|
69
|
+
const lines = [
|
|
70
|
+
"# Revision Plan",
|
|
71
|
+
"",
|
|
72
|
+
"Voice revision guidance, not AI detection.",
|
|
73
|
+
`Method: calibrated stylometric distance using Burrows Delta, Cosine Delta, function-word stylometry, and discourse features.`,
|
|
74
|
+
"",
|
|
75
|
+
plan.file,
|
|
76
|
+
`Corpus confidence: ${capitalize(plan.summary.corpusConfidence.band)} - ${plan.summary.corpusConfidence.message}`,
|
|
77
|
+
`Style distance: ${plan.summary.distance}`,
|
|
78
|
+
"Family scores:",
|
|
79
|
+
];
|
|
80
|
+
|
|
81
|
+
for (const [family, score] of Object.entries(plan.summary.familyScores)) {
|
|
82
|
+
const drift = plan.summary.familyDrift[family];
|
|
83
|
+
lines.push(`- ${family}: ${score} (drift ${drift})`);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
lines.push("");
|
|
87
|
+
if (plan.actions.length === 0) {
|
|
88
|
+
lines.push("No calibrated revision actions exceeded the writer's normal style variance.");
|
|
89
|
+
lines.push("");
|
|
90
|
+
return lines.join("\n");
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
lines.push("Start here:");
|
|
94
|
+
plan.actions.forEach((action, index) => {
|
|
95
|
+
lines.push(`${index + 1}. ${action.priority} ${action.family} ${action.id}`);
|
|
96
|
+
lines.push(` Unit: ${action.unit.type} at line ${action.unit.line}`);
|
|
97
|
+
lines.push(` Score: ${action.actionScore}`);
|
|
98
|
+
lines.push(` Why flagged: ${action.why}`);
|
|
99
|
+
lines.push(` Revise by: ${action.reviseBy}`);
|
|
100
|
+
});
|
|
101
|
+
lines.push("");
|
|
102
|
+
return lines.join("\n");
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function familyDiagnosticsFor(sourceProfile, draftProfile) {
|
|
106
|
+
const thresholds = thresholdMap(sourceProfile);
|
|
107
|
+
return Object.fromEntries(Object.keys(sourceProfile.families).map((family) => {
|
|
108
|
+
const distance = distanceByFamily(
|
|
109
|
+
family,
|
|
110
|
+
sourceProfile.families[family].features,
|
|
111
|
+
draftProfile.families[family].features,
|
|
112
|
+
);
|
|
113
|
+
const threshold = thresholds[family]?.threshold ?? defaultStyleThresholds()[family]?.threshold ?? 0.4;
|
|
114
|
+
return [family, {
|
|
115
|
+
distance,
|
|
116
|
+
threshold,
|
|
117
|
+
drift: calibratedFamilyDrift(distance, threshold),
|
|
118
|
+
score: familyScoreFromDistance(distance, threshold),
|
|
119
|
+
}];
|
|
120
|
+
}));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function thresholdMap(sourceProfile) {
|
|
124
|
+
return sourceProfile.calibration?.styleThresholds?.families ?? defaultStyleThresholds();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function rankedActions({ sourceProfile, draftDocument, familyDiagnostics, maxActions }) {
|
|
128
|
+
const confidence = confidenceWeight(sourceProfile.source.confidence.band);
|
|
129
|
+
const actions = [
|
|
130
|
+
...evidenceActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
131
|
+
...rhythmActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
132
|
+
...shapeActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
133
|
+
...discourseActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
134
|
+
...documentLevelActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }),
|
|
135
|
+
].filter((action) => action.actionScore > 0);
|
|
136
|
+
|
|
137
|
+
return actions
|
|
138
|
+
.sort((left, right) => right.actionScore - left.actionScore || left.id.localeCompare(right.id))
|
|
139
|
+
.slice(0, maxActions)
|
|
140
|
+
.map((action, index) => ({ ...action, rank: index + 1 }));
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function evidenceActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }) {
|
|
144
|
+
const family = "evidence";
|
|
145
|
+
const drift = familyDiagnostics[family]?.drift ?? 0;
|
|
146
|
+
if (drift <= 0) {
|
|
147
|
+
return [];
|
|
148
|
+
}
|
|
149
|
+
const sourceRate = sourceProfile.families.evidence.features.evidenceSentenceRate;
|
|
150
|
+
return draftDocument.sentences.flatMap((sentence, index) => {
|
|
151
|
+
const types = evidenceTypes(sentence.text);
|
|
152
|
+
const claim = isAbstractClaim(sentence.text);
|
|
153
|
+
const localMismatch = claim && types.length === 0
|
|
154
|
+
? 1
|
|
155
|
+
: types.length === 0 && sourceRate >= 0.25
|
|
156
|
+
? 0.65
|
|
157
|
+
: 0;
|
|
158
|
+
if (localMismatch <= 0) {
|
|
159
|
+
return [];
|
|
160
|
+
}
|
|
161
|
+
return [makeAction({
|
|
162
|
+
family,
|
|
163
|
+
ordinal: index + 1,
|
|
164
|
+
priority: claim ? "review" : "consider",
|
|
165
|
+
unit: { type: "sentence", line: sentence.line },
|
|
166
|
+
confidence,
|
|
167
|
+
drift,
|
|
168
|
+
localMismatch,
|
|
169
|
+
why: "This sentence carries a broad claim pattern without the concrete support rate learned from the source corpus.",
|
|
170
|
+
reviseBy: "Add concrete support: a scene, quote, number, date, citation, URL, sensory detail, or specific example the writer can verify.",
|
|
171
|
+
})];
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function rhythmActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }) {
|
|
176
|
+
const family = "rhythm";
|
|
177
|
+
const drift = familyDiagnostics[family]?.drift ?? 0;
|
|
178
|
+
if (drift <= 0) {
|
|
179
|
+
return [];
|
|
180
|
+
}
|
|
181
|
+
const source = sourceProfile.families.rhythm.features.sentenceWords;
|
|
182
|
+
return draftDocument.sentences.flatMap((sentence, index) => {
|
|
183
|
+
const wordCount = sentence.tokens.length;
|
|
184
|
+
const nearestBound = wordCount < source.p25 ? source.p25 : wordCount > source.p75 ? source.p75 : wordCount;
|
|
185
|
+
const localMismatch = Math.min(1, Math.abs(wordCount - nearestBound) / Math.max(1, source.median));
|
|
186
|
+
if (localMismatch <= 0) {
|
|
187
|
+
return [];
|
|
188
|
+
}
|
|
189
|
+
const direction = wordCount > source.p75 ? "longer" : "shorter";
|
|
190
|
+
return [makeAction({
|
|
191
|
+
family,
|
|
192
|
+
ordinal: index + 1,
|
|
193
|
+
priority: "consider",
|
|
194
|
+
unit: { type: "sentence", line: sentence.line },
|
|
195
|
+
confidence,
|
|
196
|
+
drift,
|
|
197
|
+
localMismatch,
|
|
198
|
+
why: `This sentence is ${direction} than the learned sentence-length band (${source.p25}-${source.p75} words).`,
|
|
199
|
+
reviseBy: "Adjust sentence pacing toward the learned range by splitting, tightening, or pairing it with a deliberately shorter sentence.",
|
|
200
|
+
})];
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function shapeActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }) {
|
|
205
|
+
const family = "rhetoricalShape";
|
|
206
|
+
const drift = familyDiagnostics[family]?.drift ?? 0;
|
|
207
|
+
if (drift <= 0 || draftDocument.sentences.length === 0) {
|
|
208
|
+
return [];
|
|
209
|
+
}
|
|
210
|
+
const sourceOpening = sourceProfile.families.rhetoricalShape.features.openingMoves.slice(0, 3);
|
|
211
|
+
const draftOpening = draftDocument.sentences.slice(0, 3).map((sentence) => moveFor(sentence.text));
|
|
212
|
+
const mismatches = draftOpening.filter((move, index) => move !== sourceOpening[index]).length;
|
|
213
|
+
const localMismatch = mismatches / Math.max(1, Math.min(3, draftOpening.length));
|
|
214
|
+
if (localMismatch <= 0) {
|
|
215
|
+
return [];
|
|
216
|
+
}
|
|
217
|
+
return [makeAction({
|
|
218
|
+
family,
|
|
219
|
+
ordinal: 1,
|
|
220
|
+
priority: "consider",
|
|
221
|
+
unit: { type: "opening", line: draftDocument.sentences[0].line },
|
|
222
|
+
confidence,
|
|
223
|
+
drift,
|
|
224
|
+
localMismatch,
|
|
225
|
+
why: `Draft opening moves (${draftOpening.join(" -> ")}) drift from the learned opening pattern (${sourceOpening.join(" -> ")}).`,
|
|
226
|
+
reviseBy: "Rework the opening toward a compatible scene, claim, contrast, reflection, or evidence sequence without inventing new facts.",
|
|
227
|
+
})];
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function discourseActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }) {
|
|
231
|
+
const family = "discourse";
|
|
232
|
+
const drift = familyDiagnostics[family]?.drift ?? 0;
|
|
233
|
+
if (drift <= 0) {
|
|
234
|
+
return [];
|
|
235
|
+
}
|
|
236
|
+
const sourceTransitions = sourceProfile.families.discourse.features.transitionRates;
|
|
237
|
+
return draftDocument.sentences.flatMap((sentence, index) => {
|
|
238
|
+
const label = transitionLabel(sentence.text);
|
|
239
|
+
if (label === "plain") {
|
|
240
|
+
return [];
|
|
241
|
+
}
|
|
242
|
+
const sourceRate = sourceTransitions[label] ?? 0;
|
|
243
|
+
const draftRate = draftDocument.sentences.filter((candidate) => transitionLabel(candidate.text) === label).length / Math.max(1, draftDocument.sentences.length);
|
|
244
|
+
const localMismatch = Math.max(0, draftRate - sourceRate);
|
|
245
|
+
if (localMismatch <= 0.1) {
|
|
246
|
+
return [];
|
|
247
|
+
}
|
|
248
|
+
return [makeAction({
|
|
249
|
+
family,
|
|
250
|
+
ordinal: index + 1,
|
|
251
|
+
priority: "consider",
|
|
252
|
+
unit: { type: "sentence", line: sentence.line },
|
|
253
|
+
confidence,
|
|
254
|
+
drift,
|
|
255
|
+
localMismatch: Math.min(1, localMismatch),
|
|
256
|
+
why: `The draft overuses ${label} transitions compared with the source corpus.`,
|
|
257
|
+
reviseBy: "Vary the sentence turn: replace a repeated transition with a callback, concrete example, or direct continuation where it fits.",
|
|
258
|
+
})];
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function documentLevelActions({ sourceProfile, draftDocument, familyDiagnostics, confidence }) {
|
|
263
|
+
const result = [];
|
|
264
|
+
for (const family of ["lexical", "register", "structure"]) {
|
|
265
|
+
const drift = familyDiagnostics[family]?.drift ?? 0;
|
|
266
|
+
if (drift <= 0) {
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
result.push(makeAction({
|
|
270
|
+
family,
|
|
271
|
+
ordinal: 1,
|
|
272
|
+
priority: "consider",
|
|
273
|
+
unit: { type: "document", line: draftDocument.sentences[0]?.line ?? 1 },
|
|
274
|
+
confidence,
|
|
275
|
+
drift,
|
|
276
|
+
localMismatch: 0.7,
|
|
277
|
+
why: documentLevelWhy(sourceProfile, family),
|
|
278
|
+
reviseBy: documentLevelReviseBy(family),
|
|
279
|
+
}));
|
|
280
|
+
}
|
|
281
|
+
return result;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
function documentLevelWhy(sourceProfile, family) {
|
|
285
|
+
if (family === "lexical") {
|
|
286
|
+
return "Function words, character trigrams, punctuation, or word-length habits drift from the calibrated source profile.";
|
|
287
|
+
}
|
|
288
|
+
if (family === "register") {
|
|
289
|
+
return `The draft register differs from the learned primary register (${sourceProfile.families.register.features.primary.value}).`;
|
|
290
|
+
}
|
|
291
|
+
return "The document-level opening structure drifts from the source corpus.";
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function documentLevelReviseBy(family) {
|
|
295
|
+
if (family === "lexical") {
|
|
296
|
+
return "Revise diction and punctuation only where it improves the article; avoid topic-word stuffing or random imperfections.";
|
|
297
|
+
}
|
|
298
|
+
if (family === "register") {
|
|
299
|
+
return "Bring the paragraph stance closer to the learned genre mix without pretending to be someone else.";
|
|
300
|
+
}
|
|
301
|
+
return "Reorder the first section so the piece starts with a structure the source corpus actually uses.";
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function makeAction({ family, ordinal, priority, unit, confidence, drift, localMismatch, why, reviseBy }) {
|
|
305
|
+
return {
|
|
306
|
+
id: `v2.revise-plan.${family}.${ordinal}`,
|
|
307
|
+
family,
|
|
308
|
+
priority,
|
|
309
|
+
unit,
|
|
310
|
+
actionScore: clampScore(100 * confidence * familyWeight(family) * (EDITABILITY[family] ?? 0.6) * drift * localMismatch),
|
|
311
|
+
localMismatch: round(localMismatch, 3),
|
|
312
|
+
why,
|
|
313
|
+
reviseBy,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function confidenceWeight(band) {
|
|
318
|
+
if (band === "deep") {
|
|
319
|
+
return 1;
|
|
320
|
+
}
|
|
321
|
+
if (band === "strong") {
|
|
322
|
+
return 0.9;
|
|
323
|
+
}
|
|
324
|
+
if (band === "usable") {
|
|
325
|
+
return 0.75;
|
|
326
|
+
}
|
|
327
|
+
return 0.45;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function displayPath(filePath, cwd) {
|
|
331
|
+
const relative = path.relative(cwd, filePath);
|
|
332
|
+
if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
|
|
333
|
+
return relative.split(path.sep).join("/");
|
|
334
|
+
}
|
|
335
|
+
return filePath.split(path.sep).join("/");
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function capitalize(value) {
|
|
339
|
+
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
340
|
+
}
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import { clampScore, round } from "./text-utils.js";
|
|
2
|
+
|
|
3
|
+
export const STYLOMETRIC_REFERENCES = [
|
|
4
|
+
"Burrows Delta (Burrows, 2002)",
|
|
5
|
+
"Cosine Delta (Smith & Aldridge, 2011)",
|
|
6
|
+
"Function-word stylometry (Kestemont, 2014)",
|
|
7
|
+
"Authorship-attribution feature families (Stamatatos, 2009)",
|
|
8
|
+
"Cohesion and discourse measures inspired by Coh-Metrix (Graesser et al., 2004)",
|
|
9
|
+
];
|
|
10
|
+
|
|
11
|
+
const DEFAULT_THRESHOLDS = {
|
|
12
|
+
rhythm: 0.36,
|
|
13
|
+
lexical: 0.32,
|
|
14
|
+
register: 0.45,
|
|
15
|
+
discourse: 0.28,
|
|
16
|
+
rhetoricalShape: 0.42,
|
|
17
|
+
evidence: 0.30,
|
|
18
|
+
structure: 0.42,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const FAMILY_WEIGHTS = {
|
|
22
|
+
rhythm: 0.75,
|
|
23
|
+
lexical: 0.85,
|
|
24
|
+
register: 0.50,
|
|
25
|
+
discourse: 0.70,
|
|
26
|
+
rhetoricalShape: 0.70,
|
|
27
|
+
evidence: 0.85,
|
|
28
|
+
structure: 0.65,
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export function defaultStyleThresholds() {
|
|
32
|
+
return Object.fromEntries(Object.entries(DEFAULT_THRESHOLDS).map(([family, threshold]) => [
|
|
33
|
+
family,
|
|
34
|
+
{ threshold, observations: 0 },
|
|
35
|
+
]));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function familyWeight(family) {
|
|
39
|
+
return FAMILY_WEIGHTS[family] ?? 0.6;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function distanceByFamily(family, sourceFeatures, draftFeatures) {
|
|
43
|
+
if (family === "rhythm") {
|
|
44
|
+
return rhythmDistance(sourceFeatures, draftFeatures);
|
|
45
|
+
}
|
|
46
|
+
if (family === "lexical") {
|
|
47
|
+
return lexicalDistance(sourceFeatures, draftFeatures);
|
|
48
|
+
}
|
|
49
|
+
if (family === "register") {
|
|
50
|
+
return registerDistance(sourceFeatures, draftFeatures);
|
|
51
|
+
}
|
|
52
|
+
if (family === "discourse") {
|
|
53
|
+
return discourseDistance(sourceFeatures, draftFeatures);
|
|
54
|
+
}
|
|
55
|
+
if (family === "rhetoricalShape") {
|
|
56
|
+
return shapeDistance(sourceFeatures, draftFeatures);
|
|
57
|
+
}
|
|
58
|
+
if (family === "evidence") {
|
|
59
|
+
return evidenceDistance(sourceFeatures, draftFeatures);
|
|
60
|
+
}
|
|
61
|
+
if (family === "structure") {
|
|
62
|
+
return shapeDistance(sourceFeatures, draftFeatures);
|
|
63
|
+
}
|
|
64
|
+
return 0;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function familyScoreFromDistance(distance, threshold) {
|
|
68
|
+
const basis = Math.max(0.01, threshold * 2);
|
|
69
|
+
return clampScore(100 - (distance / basis) * 100);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function calibratedFamilyDrift(distance, threshold) {
|
|
73
|
+
return round(Math.max(0, distance - threshold) / Math.max(0.01, threshold), 3);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function percentile(values, ratio) {
|
|
77
|
+
const sorted = values.filter((value) => Number.isFinite(value)).sort((left, right) => left - right);
|
|
78
|
+
if (sorted.length === 0) {
|
|
79
|
+
return 0;
|
|
80
|
+
}
|
|
81
|
+
const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil((sorted.length - 1) * ratio)));
|
|
82
|
+
return sorted[index];
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function rhythmDistance(source, draft) {
|
|
86
|
+
return weightedMean([
|
|
87
|
+
[distributionDelta(source.sentenceWords, draft.sentenceWords), 0.40],
|
|
88
|
+
[distributionDelta(source.paragraphWords, draft.paragraphWords), 0.32],
|
|
89
|
+
[distributionDelta(source.paragraphSentences, draft.paragraphSentences), 0.16],
|
|
90
|
+
[Math.abs((source.listDensity ?? 0) - (draft.listDensity ?? 0)), 0.06],
|
|
91
|
+
[Math.abs((source.quoteDensity ?? 0) - (draft.quoteDensity ?? 0)), 0.06],
|
|
92
|
+
]);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function lexicalDistance(source, draft) {
|
|
96
|
+
return weightedMean([
|
|
97
|
+
[topItemDistance(source.functionWords, draft.functionWords), 1.00],
|
|
98
|
+
[topItemDistance(source.characterTrigrams, draft.characterTrigrams), 0.85],
|
|
99
|
+
[punctuationDistance(source.punctuation, draft.punctuation), 0.85],
|
|
100
|
+
[Math.abs((source.vocabularyRichness?.contentTypeTokenRatio ?? 0) - (draft.vocabularyRichness?.contentTypeTokenRatio ?? 0)), 0.25],
|
|
101
|
+
[distributionDelta(source.wordLength, draft.wordLength), 0.40],
|
|
102
|
+
]);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function registerDistance(source, draft) {
|
|
106
|
+
const primaryDelta = source.primary?.value === draft.primary?.value ? 0 : 0.65;
|
|
107
|
+
return weightedMean([
|
|
108
|
+
[primaryDelta, 0.8],
|
|
109
|
+
[topItemDistance(source.scores, draft.scores), 0.2],
|
|
110
|
+
]);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function discourseDistance(source, draft) {
|
|
114
|
+
const transitionDelta = rateMapDistance(source.transitionRates, draft.transitionRates);
|
|
115
|
+
const callbackDelta = Math.abs((source.sentenceCallbacks ?? 0) - (draft.sentenceCallbacks ?? 0));
|
|
116
|
+
return weightedMean([
|
|
117
|
+
[transitionDelta, 0.75],
|
|
118
|
+
[callbackDelta, 0.25],
|
|
119
|
+
]);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function evidenceDistance(source, draft) {
|
|
123
|
+
return weightedMean([
|
|
124
|
+
[Math.abs((source.evidenceSentenceRate ?? 0) - (draft.evidenceSentenceRate ?? 0)), 0.36],
|
|
125
|
+
[Math.abs((source.claimSentenceRate ?? 0) - (draft.claimSentenceRate ?? 0)), 0.18],
|
|
126
|
+
[Math.max(0, (draft.unsupportedClaimRate ?? 0) - (source.unsupportedClaimRate ?? 0)), 0.26],
|
|
127
|
+
[topItemDistance(source.evidenceTypes, draft.evidenceTypes), 0.20],
|
|
128
|
+
]);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function shapeDistance(source, draft) {
|
|
132
|
+
return weightedMean([
|
|
133
|
+
[sequenceDistance(source.openingMoves, draft.openingMoves), 0.55],
|
|
134
|
+
[topItemDistance(source.moveRates, draft.moveRates), 0.25],
|
|
135
|
+
[topItemDistance(source.commonSequences, draft.commonSequences), 0.20],
|
|
136
|
+
]);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function distributionDelta(source, draft) {
|
|
140
|
+
if (!source || !draft) {
|
|
141
|
+
return 0;
|
|
142
|
+
}
|
|
143
|
+
return weightedMean([
|
|
144
|
+
[relativeDelta(source.median, draft.median), 0.45],
|
|
145
|
+
[relativeDelta(source.p25, draft.p25), 0.20],
|
|
146
|
+
[relativeDelta(source.p75, draft.p75), 0.20],
|
|
147
|
+
[relativeDelta(source.mean, draft.mean), 0.15],
|
|
148
|
+
]);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function punctuationDistance(source = {}, draft = {}) {
|
|
152
|
+
return rateMapDistance(source, draft);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function rateMapDistance(source = {}, draft = {}) {
|
|
156
|
+
const keys = Array.from(new Set([...Object.keys(source), ...Object.keys(draft)]));
|
|
157
|
+
if (keys.length === 0) {
|
|
158
|
+
return 0;
|
|
159
|
+
}
|
|
160
|
+
return keys.reduce((sum, key) => sum + Math.abs((source[key] ?? 0) - (draft[key] ?? 0)), 0) / keys.length;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function topItemDistance(sourceItems = [], draftItems = []) {
|
|
164
|
+
const source = normalizedItemMap(sourceItems);
|
|
165
|
+
const draft = normalizedItemMap(draftItems);
|
|
166
|
+
const delta = rateMapDistance(source, draft);
|
|
167
|
+
const cosine = cosineDistance(source, draft);
|
|
168
|
+
return weightedMean([
|
|
169
|
+
[delta, 0.45],
|
|
170
|
+
[cosine, 0.55],
|
|
171
|
+
]);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function normalizedItemMap(items) {
|
|
175
|
+
const weightedItems = items.map((item) => ({
|
|
176
|
+
value: item.value,
|
|
177
|
+
weight: item.count ?? item.score ?? 0,
|
|
178
|
+
}));
|
|
179
|
+
const total = weightedItems.reduce((sum, item) => sum + item.weight, 0);
|
|
180
|
+
return Object.fromEntries(weightedItems.map((item) => [item.value, item.weight / Math.max(1, total)]));
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function cosineDistance(left, right) {
|
|
184
|
+
const keys = Array.from(new Set([...Object.keys(left), ...Object.keys(right)]));
|
|
185
|
+
if (keys.length === 0) {
|
|
186
|
+
return 0;
|
|
187
|
+
}
|
|
188
|
+
let dot = 0;
|
|
189
|
+
let leftNorm = 0;
|
|
190
|
+
let rightNorm = 0;
|
|
191
|
+
for (const key of keys) {
|
|
192
|
+
const leftValue = left[key] ?? 0;
|
|
193
|
+
const rightValue = right[key] ?? 0;
|
|
194
|
+
dot += leftValue * rightValue;
|
|
195
|
+
leftNorm += leftValue ** 2;
|
|
196
|
+
rightNorm += rightValue ** 2;
|
|
197
|
+
}
|
|
198
|
+
if (leftNorm === 0 && rightNorm === 0) {
|
|
199
|
+
return 0;
|
|
200
|
+
}
|
|
201
|
+
if (leftNorm === 0 || rightNorm === 0) {
|
|
202
|
+
return 1;
|
|
203
|
+
}
|
|
204
|
+
return Math.max(0, Math.min(1, 1 - dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm))));
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function sequenceDistance(source = [], draft = []) {
|
|
208
|
+
if (source.length === 0 && draft.length === 0) {
|
|
209
|
+
return 0;
|
|
210
|
+
}
|
|
211
|
+
if (source.length === 0 || draft.length === 0) {
|
|
212
|
+
return 1;
|
|
213
|
+
}
|
|
214
|
+
const limit = Math.min(source.length, draft.length);
|
|
215
|
+
let matches = 0;
|
|
216
|
+
for (let index = 0; index < limit; index += 1) {
|
|
217
|
+
if (source[index] === draft[index]) {
|
|
218
|
+
matches += 1;
|
|
219
|
+
} else if (source.includes(draft[index]) || draft.includes(source[index])) {
|
|
220
|
+
matches += 0.5;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
return 1 - matches / limit;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function relativeDelta(sourceValue, draftValue) {
|
|
227
|
+
return Math.abs((sourceValue ?? 0) - (draftValue ?? 0)) / Math.max(1, sourceValue ?? 0);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function weightedMean(weightedValues) {
|
|
231
|
+
const totalWeight = weightedValues.reduce((sum, [, weight]) => sum + weight, 0);
|
|
232
|
+
if (totalWeight === 0) {
|
|
233
|
+
return 0;
|
|
234
|
+
}
|
|
235
|
+
return round(weightedValues.reduce((sum, [value, weight]) => sum + value * weight, 0) / totalWeight, 3);
|
|
236
|
+
}
|