dravoice 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +102 -36
- package/bin/dravoice.js +11 -10
- package/package.json +47 -45
- package/src/index.js +874 -197
- package/src/v2/analyzers/discourse.js +63 -63
- package/src/v2/analyzers/evidence.js +82 -82
- package/src/v2/analyzers/lexical.js +114 -114
- package/src/v2/analyzers/register.js +46 -34
- package/src/v2/analyzers/rhetorical-shape.js +59 -59
- package/src/v2/analyzers/rhythm.js +39 -47
- package/src/v2/analyzers/structure.js +24 -24
- package/src/v2/benchmark.js +574 -568
- package/src/v2/brief.js +154 -146
- package/src/v2/config.js +78 -0
- package/src/v2/document-model.js +351 -260
- package/src/v2/inspect.js +67 -67
- package/src/v2/io-utils.js +51 -0
- package/src/v2/profile.js +227 -203
- package/src/v2/prompt.js +65 -64
- package/src/v2/review.js +177 -173
- package/src/v2/revise-plan.js +437 -433
- package/src/v2/stylometry.js +342 -332
- package/src/v2/text-utils.js +123 -123
package/src/v2/stylometry.js
CHANGED
|
@@ -1,332 +1,342 @@
|
|
|
1
|
-
import { clampScore, round } from "./text-utils.js";
|
|
2
|
-
|
|
3
|
-
export const STYLOMETRIC_REFERENCES = [
|
|
4
|
-
"Burrows Delta (Burrows, 2002)",
|
|
5
|
-
"Cosine Delta (Smith & Aldridge, 2011)",
|
|
6
|
-
"Function-word stylometry (Kestemont, 2014)",
|
|
7
|
-
"Authorship-attribution feature families (Stamatatos, 2009)",
|
|
8
|
-
"Cohesion and discourse measures inspired by Coh-Metrix (Graesser et al., 2004)",
|
|
9
|
-
];
|
|
10
|
-
|
|
11
|
-
const DEFAULT_THRESHOLDS = {
|
|
12
|
-
rhythm: 0.36,
|
|
13
|
-
lexical: 0.32,
|
|
14
|
-
register: 0.45,
|
|
15
|
-
discourse: 0.28,
|
|
16
|
-
rhetoricalShape: 0.42,
|
|
17
|
-
evidence: 0.30,
|
|
18
|
-
structure: 0.42,
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
const FAMILY_WEIGHTS = {
|
|
22
|
-
rhythm: 0.75,
|
|
23
|
-
lexical: 0.85,
|
|
24
|
-
register: 0.50,
|
|
25
|
-
discourse: 0.70,
|
|
26
|
-
rhetoricalShape: 0.70,
|
|
27
|
-
evidence: 0.85,
|
|
28
|
-
structure: 0.65,
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
export function defaultStyleThresholds() {
|
|
32
|
-
return Object.fromEntries(Object.entries(DEFAULT_THRESHOLDS).map(([family, threshold]) => [
|
|
33
|
-
family,
|
|
34
|
-
{ threshold, observations: 0, stability: 0.45 },
|
|
35
|
-
]));
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
export function familyWeight(family) {
|
|
39
|
-
return FAMILY_WEIGHTS[family] ?? 0.6;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
export function distanceByFamily(family, sourceFeatures, draftFeatures) {
|
|
43
|
-
if (family === "rhythm") {
|
|
44
|
-
return rhythmDistance(sourceFeatures, draftFeatures);
|
|
45
|
-
}
|
|
46
|
-
if (family === "lexical") {
|
|
47
|
-
return lexicalDistance(sourceFeatures, draftFeatures);
|
|
48
|
-
}
|
|
49
|
-
if (family === "register") {
|
|
50
|
-
return registerDistance(sourceFeatures, draftFeatures);
|
|
51
|
-
}
|
|
52
|
-
if (family === "discourse") {
|
|
53
|
-
return discourseDistance(sourceFeatures, draftFeatures);
|
|
54
|
-
}
|
|
55
|
-
if (family === "rhetoricalShape") {
|
|
56
|
-
return shapeDistance(sourceFeatures, draftFeatures);
|
|
57
|
-
}
|
|
58
|
-
if (family === "evidence") {
|
|
59
|
-
return evidenceDistance(sourceFeatures, draftFeatures);
|
|
60
|
-
}
|
|
61
|
-
if (family === "structure") {
|
|
62
|
-
return
|
|
63
|
-
}
|
|
64
|
-
return 0;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
export function familyScoreFromDistance(distance, threshold) {
|
|
68
|
-
const ratio = distance / Math.max(0.01, threshold);
|
|
69
|
-
return clampScore(100 / (1 + Math.exp(3 * (ratio - 1.35))));
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
export function calibratedFamilyDrift(distance, threshold) {
|
|
73
|
-
return round(Math.max(0, distance - threshold) / Math.max(0.01, threshold), 3);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
export function percentile(values, ratio) {
|
|
77
|
-
const sorted = values.filter((value) => Number.isFinite(value)).sort((left, right) => left - right);
|
|
78
|
-
if (sorted.length === 0) {
|
|
79
|
-
return 0;
|
|
80
|
-
}
|
|
81
|
-
const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil((sorted.length - 1) * ratio)));
|
|
82
|
-
return sorted[index];
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
export function familyDiagnosticsFor(sourceProfile, draftProfile) {
|
|
86
|
-
const thresholds = sourceProfile.calibration?.styleThresholds?.families ?? defaultStyleThresholds();
|
|
87
|
-
return Object.fromEntries(Object.keys(sourceProfile.families).map((family) => {
|
|
88
|
-
const distance = distanceByFamily(
|
|
89
|
-
family,
|
|
90
|
-
sourceProfile.families[family].features,
|
|
91
|
-
draftProfile.families[family].features,
|
|
92
|
-
);
|
|
93
|
-
const thresholdData = thresholds[family] ?? defaultStyleThresholds()[family] ?? { threshold: 0.4, stability: 0.45 };
|
|
94
|
-
const threshold = thresholdData.threshold ?? 0.4;
|
|
95
|
-
return [family, {
|
|
96
|
-
distance,
|
|
97
|
-
threshold,
|
|
98
|
-
stability: thresholdData.stability ?? stabilityFromObservationCount(thresholdData.observations ?? 0),
|
|
99
|
-
drift: calibratedFamilyDrift(distance, threshold),
|
|
100
|
-
score: familyScoreFromDistance(distance, threshold),
|
|
101
|
-
}];
|
|
102
|
-
}));
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
export function styleDistanceFromDiagnostics(familyDiagnostics) {
|
|
106
|
-
const entries = Object.entries(familyDiagnostics);
|
|
107
|
-
const weighted = entries.map(([family, item]) => [
|
|
108
|
-
100 - item.score,
|
|
109
|
-
familyWeight(family) * Math.max(0.35, item.stability ?? 0.45),
|
|
110
|
-
]);
|
|
111
|
-
return Math.round(weightedMean(weighted));
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
export function stabilityFromDistances(values) {
|
|
115
|
-
const finite = values.filter((value) => Number.isFinite(value));
|
|
116
|
-
if (finite.length < 2) {
|
|
117
|
-
return stabilityFromObservationCount(finite.length);
|
|
118
|
-
}
|
|
119
|
-
const mean = finite.reduce((sum, value) => sum + value, 0) / finite.length;
|
|
120
|
-
const variance = finite.reduce((sum, value) => sum + (value - mean) ** 2, 0) / finite.length;
|
|
121
|
-
const coefficient = Math.sqrt(variance) / Math.max(0.01, mean);
|
|
122
|
-
return round(Math.max(0.35, Math.min(1, 1 - coefficient)), 3);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
function rhythmDistance(source, draft) {
|
|
126
|
-
return weightedMean([
|
|
127
|
-
[distributionDelta(source.sentenceWords, draft.sentenceWords), 0.40],
|
|
128
|
-
[distributionDelta(source.paragraphWords, draft.paragraphWords), 0.32],
|
|
129
|
-
[distributionDelta(source.paragraphSentences, draft.paragraphSentences), 0.16],
|
|
130
|
-
[Math.abs((source.listDensity ?? 0) - (draft.listDensity ?? 0)), 0.06],
|
|
131
|
-
[Math.abs((source.quoteDensity ?? 0) - (draft.quoteDensity ?? 0)), 0.06],
|
|
132
|
-
]);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
function lexicalDistance(source, draft) {
|
|
136
|
-
return weightedMean([
|
|
137
|
-
[topItemDistance(source.functionWords, draft.functionWords), 1.00],
|
|
138
|
-
[topItemDistance(source.functionWordBigrams, draft.functionWordBigrams), 0.75],
|
|
139
|
-
[topItemDistance(source.maskedCharacterFourgrams, draft.maskedCharacterFourgrams), 1.00],
|
|
140
|
-
[topItemDistance(source.characterTrigrams, draft.characterTrigrams), 0.45],
|
|
141
|
-
[topItemDistance(source.sentenceInitialTokens, draft.sentenceInitialTokens), 0.35],
|
|
142
|
-
[topItemDistance(source.sentenceFinalTokens, draft.sentenceFinalTokens), 0.35],
|
|
143
|
-
[topItemDistance(source.punctuationNgrams, draft.punctuationNgrams), 0.45],
|
|
144
|
-
[punctuationDistance(source.punctuation, draft.punctuation), 0.85],
|
|
145
|
-
[Math.abs((source.vocabularyRichness?.contentTypeTokenRatio ?? 0) - (draft.vocabularyRichness?.contentTypeTokenRatio ?? 0)), 0.25],
|
|
146
|
-
[distributionDelta(source.wordLength, draft.wordLength), 0.40],
|
|
147
|
-
]);
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
function registerDistance(source, draft) {
|
|
151
|
-
const primaryDelta = source.primary?.value === draft.primary?.value ? 0 : 0.65;
|
|
152
|
-
return weightedMean([
|
|
153
|
-
[primaryDelta, 0.8],
|
|
154
|
-
[topItemDistance(source.scores, draft.scores), 0.2],
|
|
155
|
-
]);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
function discourseDistance(source, draft) {
|
|
159
|
-
const transitionDelta = rateMapDistance(source.transitionRates, draft.transitionRates);
|
|
160
|
-
const callbackDelta = Math.abs((source.sentenceCallbacks ?? 0) - (draft.sentenceCallbacks ?? 0));
|
|
161
|
-
return weightedMean([
|
|
162
|
-
[transitionDelta, 0.55],
|
|
163
|
-
[topItemDistance(source.transitionBigrams, draft.transitionBigrams), 0.25],
|
|
164
|
-
[topItemDistance(source.transitionTrigrams, draft.transitionTrigrams), 0.10],
|
|
165
|
-
[callbackDelta, 0.20],
|
|
166
|
-
]);
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
function evidenceDistance(source, draft) {
|
|
170
|
-
return weightedMean([
|
|
171
|
-
[Math.abs((source.evidenceSentenceRate ?? 0) - (draft.evidenceSentenceRate ?? 0)), 0.30],
|
|
172
|
-
[Math.abs((source.claimSentenceRate ?? 0) - (draft.claimSentenceRate ?? 0)), 0.14],
|
|
173
|
-
[Math.max(0, (source.supportedClaimRate ?? 0) - (draft.supportedClaimRate ?? 0)), 0.20],
|
|
174
|
-
[Math.max(0, (draft.unsupportedClaimRate ?? 0) - (source.unsupportedClaimRate ?? 0)), 0.26],
|
|
175
|
-
[topItemDistance(source.evidenceTypes, draft.evidenceTypes), 0.20],
|
|
176
|
-
]);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
function shapeDistance(source, draft) {
|
|
180
|
-
return weightedMean([
|
|
181
|
-
[sequenceDistance(source.openingMoves, draft.openingMoves), 0.35],
|
|
182
|
-
[topItemDistance(source.openingMovePatterns, draft.openingMovePatterns), 0.25],
|
|
183
|
-
[topItemDistance(source.moveRates, draft.moveRates), 0.20],
|
|
184
|
-
[topItemDistance(source.moveBigrams ?? source.commonSequences, draft.moveBigrams ?? draft.commonSequences), 0.25],
|
|
185
|
-
[topItemDistance(source.moveTrigrams, draft.moveTrigrams), 0.15],
|
|
186
|
-
]);
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
function
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
[
|
|
195
|
-
[
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
if (
|
|
218
|
-
return
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
const
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
if (
|
|
326
|
-
return 0
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
1
|
+
import { clampScore, round } from "./text-utils.js";
|
|
2
|
+
|
|
3
|
+
export const STYLOMETRIC_REFERENCES = [
|
|
4
|
+
"Burrows Delta (Burrows, 2002)",
|
|
5
|
+
"Cosine Delta (Smith & Aldridge, 2011)",
|
|
6
|
+
"Function-word stylometry (Kestemont, 2014)",
|
|
7
|
+
"Authorship-attribution feature families (Stamatatos, 2009)",
|
|
8
|
+
"Cohesion and discourse measures inspired by Coh-Metrix (Graesser et al., 2004)",
|
|
9
|
+
];
|
|
10
|
+
|
|
11
|
+
const DEFAULT_THRESHOLDS = {
|
|
12
|
+
rhythm: 0.36,
|
|
13
|
+
lexical: 0.32,
|
|
14
|
+
register: 0.45,
|
|
15
|
+
discourse: 0.28,
|
|
16
|
+
rhetoricalShape: 0.42,
|
|
17
|
+
evidence: 0.30,
|
|
18
|
+
structure: 0.42,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const FAMILY_WEIGHTS = {
|
|
22
|
+
rhythm: 0.75,
|
|
23
|
+
lexical: 0.85,
|
|
24
|
+
register: 0.50,
|
|
25
|
+
discourse: 0.70,
|
|
26
|
+
rhetoricalShape: 0.70,
|
|
27
|
+
evidence: 0.85,
|
|
28
|
+
structure: 0.65,
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export function defaultStyleThresholds() {
|
|
32
|
+
return Object.fromEntries(Object.entries(DEFAULT_THRESHOLDS).map(([family, threshold]) => [
|
|
33
|
+
family,
|
|
34
|
+
{ threshold, observations: 0, stability: 0.45 },
|
|
35
|
+
]));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function familyWeight(family) {
|
|
39
|
+
return FAMILY_WEIGHTS[family] ?? 0.6;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function distanceByFamily(family, sourceFeatures, draftFeatures) {
|
|
43
|
+
if (family === "rhythm") {
|
|
44
|
+
return rhythmDistance(sourceFeatures, draftFeatures);
|
|
45
|
+
}
|
|
46
|
+
if (family === "lexical") {
|
|
47
|
+
return lexicalDistance(sourceFeatures, draftFeatures);
|
|
48
|
+
}
|
|
49
|
+
if (family === "register") {
|
|
50
|
+
return registerDistance(sourceFeatures, draftFeatures);
|
|
51
|
+
}
|
|
52
|
+
if (family === "discourse") {
|
|
53
|
+
return discourseDistance(sourceFeatures, draftFeatures);
|
|
54
|
+
}
|
|
55
|
+
if (family === "rhetoricalShape") {
|
|
56
|
+
return shapeDistance(sourceFeatures, draftFeatures);
|
|
57
|
+
}
|
|
58
|
+
if (family === "evidence") {
|
|
59
|
+
return evidenceDistance(sourceFeatures, draftFeatures);
|
|
60
|
+
}
|
|
61
|
+
if (family === "structure") {
|
|
62
|
+
return structureDistance(sourceFeatures, draftFeatures);
|
|
63
|
+
}
|
|
64
|
+
return 0;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function familyScoreFromDistance(distance, threshold) {
|
|
68
|
+
const ratio = distance / Math.max(0.01, threshold);
|
|
69
|
+
return clampScore(100 / (1 + Math.exp(3 * (ratio - 1.35))));
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function calibratedFamilyDrift(distance, threshold) {
|
|
73
|
+
return round(Math.max(0, distance - threshold) / Math.max(0.01, threshold), 3);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function percentile(values, ratio) {
|
|
77
|
+
const sorted = values.filter((value) => Number.isFinite(value)).sort((left, right) => left - right);
|
|
78
|
+
if (sorted.length === 0) {
|
|
79
|
+
return 0;
|
|
80
|
+
}
|
|
81
|
+
const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil((sorted.length - 1) * ratio)));
|
|
82
|
+
return sorted[index];
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function familyDiagnosticsFor(sourceProfile, draftProfile) {
|
|
86
|
+
const thresholds = sourceProfile.calibration?.styleThresholds?.families ?? defaultStyleThresholds();
|
|
87
|
+
return Object.fromEntries(Object.keys(sourceProfile.families).map((family) => {
|
|
88
|
+
const distance = distanceByFamily(
|
|
89
|
+
family,
|
|
90
|
+
sourceProfile.families[family].features,
|
|
91
|
+
draftProfile.families[family].features,
|
|
92
|
+
);
|
|
93
|
+
const thresholdData = thresholds[family] ?? defaultStyleThresholds()[family] ?? { threshold: 0.4, stability: 0.45 };
|
|
94
|
+
const threshold = thresholdData.threshold ?? 0.4;
|
|
95
|
+
return [family, {
|
|
96
|
+
distance,
|
|
97
|
+
threshold,
|
|
98
|
+
stability: thresholdData.stability ?? stabilityFromObservationCount(thresholdData.observations ?? 0),
|
|
99
|
+
drift: calibratedFamilyDrift(distance, threshold),
|
|
100
|
+
score: familyScoreFromDistance(distance, threshold),
|
|
101
|
+
}];
|
|
102
|
+
}));
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export function styleDistanceFromDiagnostics(familyDiagnostics) {
|
|
106
|
+
const entries = Object.entries(familyDiagnostics);
|
|
107
|
+
const weighted = entries.map(([family, item]) => [
|
|
108
|
+
100 - item.score,
|
|
109
|
+
familyWeight(family) * Math.max(0.35, item.stability ?? 0.45),
|
|
110
|
+
]);
|
|
111
|
+
return Math.round(weightedMean(weighted));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export function stabilityFromDistances(values) {
|
|
115
|
+
const finite = values.filter((value) => Number.isFinite(value));
|
|
116
|
+
if (finite.length < 2) {
|
|
117
|
+
return stabilityFromObservationCount(finite.length);
|
|
118
|
+
}
|
|
119
|
+
const mean = finite.reduce((sum, value) => sum + value, 0) / finite.length;
|
|
120
|
+
const variance = finite.reduce((sum, value) => sum + (value - mean) ** 2, 0) / finite.length;
|
|
121
|
+
const coefficient = Math.sqrt(variance) / Math.max(0.01, mean);
|
|
122
|
+
return round(Math.max(0.35, Math.min(1, 1 - coefficient)), 3);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function rhythmDistance(source, draft) {
|
|
126
|
+
return weightedMean([
|
|
127
|
+
[distributionDelta(source.sentenceWords, draft.sentenceWords), 0.40],
|
|
128
|
+
[distributionDelta(source.paragraphWords, draft.paragraphWords), 0.32],
|
|
129
|
+
[distributionDelta(source.paragraphSentences, draft.paragraphSentences), 0.16],
|
|
130
|
+
[Math.abs((source.listDensity ?? 0) - (draft.listDensity ?? 0)), 0.06],
|
|
131
|
+
[Math.abs((source.quoteDensity ?? 0) - (draft.quoteDensity ?? 0)), 0.06],
|
|
132
|
+
]);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function lexicalDistance(source, draft) {
|
|
136
|
+
return weightedMean([
|
|
137
|
+
[topItemDistance(source.functionWords, draft.functionWords), 1.00],
|
|
138
|
+
[topItemDistance(source.functionWordBigrams, draft.functionWordBigrams), 0.75],
|
|
139
|
+
[topItemDistance(source.maskedCharacterFourgrams, draft.maskedCharacterFourgrams), 1.00],
|
|
140
|
+
[topItemDistance(source.characterTrigrams, draft.characterTrigrams), 0.45],
|
|
141
|
+
[topItemDistance(source.sentenceInitialTokens, draft.sentenceInitialTokens), 0.35],
|
|
142
|
+
[topItemDistance(source.sentenceFinalTokens, draft.sentenceFinalTokens), 0.35],
|
|
143
|
+
[topItemDistance(source.punctuationNgrams, draft.punctuationNgrams), 0.45],
|
|
144
|
+
[punctuationDistance(source.punctuation, draft.punctuation), 0.85],
|
|
145
|
+
[Math.abs((source.vocabularyRichness?.contentTypeTokenRatio ?? 0) - (draft.vocabularyRichness?.contentTypeTokenRatio ?? 0)), 0.25],
|
|
146
|
+
[distributionDelta(source.wordLength, draft.wordLength), 0.40],
|
|
147
|
+
]);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function registerDistance(source, draft) {
|
|
151
|
+
const primaryDelta = source.primary?.value === draft.primary?.value ? 0 : 0.65;
|
|
152
|
+
return weightedMean([
|
|
153
|
+
[primaryDelta, 0.8],
|
|
154
|
+
[topItemDistance(source.scores, draft.scores), 0.2],
|
|
155
|
+
]);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function discourseDistance(source, draft) {
|
|
159
|
+
const transitionDelta = rateMapDistance(source.transitionRates, draft.transitionRates);
|
|
160
|
+
const callbackDelta = Math.abs((source.sentenceCallbacks ?? 0) - (draft.sentenceCallbacks ?? 0));
|
|
161
|
+
return weightedMean([
|
|
162
|
+
[transitionDelta, 0.55],
|
|
163
|
+
[topItemDistance(source.transitionBigrams, draft.transitionBigrams), 0.25],
|
|
164
|
+
[topItemDistance(source.transitionTrigrams, draft.transitionTrigrams), 0.10],
|
|
165
|
+
[callbackDelta, 0.20],
|
|
166
|
+
]);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function evidenceDistance(source, draft) {
|
|
170
|
+
return weightedMean([
|
|
171
|
+
[Math.abs((source.evidenceSentenceRate ?? 0) - (draft.evidenceSentenceRate ?? 0)), 0.30],
|
|
172
|
+
[Math.abs((source.claimSentenceRate ?? 0) - (draft.claimSentenceRate ?? 0)), 0.14],
|
|
173
|
+
[Math.max(0, (source.supportedClaimRate ?? 0) - (draft.supportedClaimRate ?? 0)), 0.20],
|
|
174
|
+
[Math.max(0, (draft.unsupportedClaimRate ?? 0) - (source.unsupportedClaimRate ?? 0)), 0.26],
|
|
175
|
+
[topItemDistance(source.evidenceTypes, draft.evidenceTypes), 0.20],
|
|
176
|
+
]);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function shapeDistance(source, draft) {
|
|
180
|
+
return weightedMean([
|
|
181
|
+
[sequenceDistance(source.openingMoves, draft.openingMoves), 0.35],
|
|
182
|
+
[topItemDistance(source.openingMovePatterns, draft.openingMovePatterns), 0.25],
|
|
183
|
+
[topItemDistance(source.moveRates, draft.moveRates), 0.20],
|
|
184
|
+
[topItemDistance(source.moveBigrams ?? source.commonSequences, draft.moveBigrams ?? draft.commonSequences), 0.25],
|
|
185
|
+
[topItemDistance(source.moveTrigrams, draft.moveTrigrams), 0.15],
|
|
186
|
+
]);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function structureDistance(source, draft) {
|
|
190
|
+
return weightedMean([
|
|
191
|
+
[distributionDelta(source.sectionWords, draft.sectionWords), 0.35],
|
|
192
|
+
[distributionDelta(source.headingCount, draft.headingCount), 0.20],
|
|
193
|
+
[Math.abs((source.listDocumentRate ?? 0) - (draft.listDocumentRate ?? 0)), 0.18],
|
|
194
|
+
[Math.abs((source.quoteDocumentRate ?? 0) - (draft.quoteDocumentRate ?? 0)), 0.12],
|
|
195
|
+
[sequenceDistance(source.openingMoves, draft.openingMoves), 0.15],
|
|
196
|
+
]);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function distributionDelta(source, draft) {
|
|
200
|
+
if (!source || !draft) {
|
|
201
|
+
return 0;
|
|
202
|
+
}
|
|
203
|
+
return weightedMean([
|
|
204
|
+
[relativeDelta(source.median, draft.median), 0.45],
|
|
205
|
+
[relativeDelta(source.p25, draft.p25), 0.20],
|
|
206
|
+
[relativeDelta(source.p75, draft.p75), 0.20],
|
|
207
|
+
[relativeDelta(source.mean, draft.mean), 0.15],
|
|
208
|
+
]);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function punctuationDistance(source = {}, draft = {}) {
|
|
212
|
+
return rateMapDistance(source, draft);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function rateMapDistance(source = {}, draft = {}) {
|
|
216
|
+
const keys = Array.from(new Set([...Object.keys(source), ...Object.keys(draft)]));
|
|
217
|
+
if (keys.length === 0) {
|
|
218
|
+
return 0;
|
|
219
|
+
}
|
|
220
|
+
return keys.reduce((sum, key) => sum + Math.abs((source[key] ?? 0) - (draft[key] ?? 0)), 0) / keys.length;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function topItemDistance(sourceItems, draftItems) {
|
|
224
|
+
if (!Array.isArray(sourceItems) || !Array.isArray(draftItems)) {
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
if (sourceItems.length === 0 && draftItems.length === 0) {
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
const source = normalizedItemMap(sourceItems);
|
|
231
|
+
const draft = normalizedItemMap(draftItems);
|
|
232
|
+
const delta = rateMapDistance(source, draft);
|
|
233
|
+
const cosine = cosineDistance(source, draft);
|
|
234
|
+
const jsd = jensenShannonDistance(source, draft);
|
|
235
|
+
return weightedMean([
|
|
236
|
+
[delta, 0.30],
|
|
237
|
+
[cosine, 0.45],
|
|
238
|
+
[jsd, 0.25],
|
|
239
|
+
]);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function normalizedItemMap(items) {
|
|
243
|
+
const weightedItems = items.map((item) => ({
|
|
244
|
+
value: item.value,
|
|
245
|
+
weight: item.count ?? item.score ?? 0,
|
|
246
|
+
}));
|
|
247
|
+
const total = weightedItems.reduce((sum, item) => sum + item.weight, 0);
|
|
248
|
+
return Object.fromEntries(weightedItems.map((item) => [item.value, item.weight / Math.max(1, total)]));
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function cosineDistance(left, right) {
|
|
252
|
+
const keys = Array.from(new Set([...Object.keys(left), ...Object.keys(right)]));
|
|
253
|
+
if (keys.length === 0) {
|
|
254
|
+
return 0;
|
|
255
|
+
}
|
|
256
|
+
let dot = 0;
|
|
257
|
+
let leftNorm = 0;
|
|
258
|
+
let rightNorm = 0;
|
|
259
|
+
for (const key of keys) {
|
|
260
|
+
const leftValue = left[key] ?? 0;
|
|
261
|
+
const rightValue = right[key] ?? 0;
|
|
262
|
+
dot += leftValue * rightValue;
|
|
263
|
+
leftNorm += leftValue ** 2;
|
|
264
|
+
rightNorm += rightValue ** 2;
|
|
265
|
+
}
|
|
266
|
+
if (leftNorm === 0 && rightNorm === 0) {
|
|
267
|
+
return 0;
|
|
268
|
+
}
|
|
269
|
+
if (leftNorm === 0 || rightNorm === 0) {
|
|
270
|
+
return 1;
|
|
271
|
+
}
|
|
272
|
+
return Math.max(0, Math.min(1, 1 - dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm))));
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function jensenShannonDistance(left, right) {
|
|
276
|
+
const keys = Array.from(new Set([...Object.keys(left), ...Object.keys(right)]));
|
|
277
|
+
if (keys.length === 0) {
|
|
278
|
+
return 0;
|
|
279
|
+
}
|
|
280
|
+
const midpoint = {};
|
|
281
|
+
for (const key of keys) {
|
|
282
|
+
midpoint[key] = ((left[key] ?? 0) + (right[key] ?? 0)) / 2;
|
|
283
|
+
}
|
|
284
|
+
return Math.sqrt((klDivergence(left, midpoint, keys) + klDivergence(right, midpoint, keys)) / 2);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function klDivergence(source, target, keys) {
|
|
288
|
+
let sum = 0;
|
|
289
|
+
for (const key of keys) {
|
|
290
|
+
const sourceValue = source[key] ?? 0;
|
|
291
|
+
const targetValue = target[key] ?? 0;
|
|
292
|
+
if (sourceValue > 0 && targetValue > 0) {
|
|
293
|
+
sum += sourceValue * Math.log2(sourceValue / targetValue);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
return sum;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function sequenceDistance(source = [], draft = []) {
|
|
300
|
+
if (source.length === 0 && draft.length === 0) {
|
|
301
|
+
return 0;
|
|
302
|
+
}
|
|
303
|
+
if (source.length === 0 || draft.length === 0) {
|
|
304
|
+
return 1;
|
|
305
|
+
}
|
|
306
|
+
const limit = Math.min(source.length, draft.length);
|
|
307
|
+
let matches = 0;
|
|
308
|
+
for (let index = 0; index < limit; index += 1) {
|
|
309
|
+
if (source[index] === draft[index]) {
|
|
310
|
+
matches += 1;
|
|
311
|
+
} else if (source.includes(draft[index]) || draft.includes(source[index])) {
|
|
312
|
+
matches += 0.5;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return 1 - matches / limit;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
function relativeDelta(sourceValue, draftValue) {
|
|
319
|
+
return Math.abs((sourceValue ?? 0) - (draftValue ?? 0)) / Math.max(1, sourceValue ?? 0);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
function weightedMean(weightedValues) {
|
|
323
|
+
const usableValues = weightedValues.filter(([value, weight]) => Number.isFinite(value) && weight > 0);
|
|
324
|
+
const totalWeight = usableValues.reduce((sum, [, weight]) => sum + weight, 0);
|
|
325
|
+
if (totalWeight === 0) {
|
|
326
|
+
return 0;
|
|
327
|
+
}
|
|
328
|
+
return round(usableValues.reduce((sum, [value, weight]) => sum + value * weight, 0) / totalWeight, 3);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function stabilityFromObservationCount(observations) {
|
|
332
|
+
if (observations >= 5) {
|
|
333
|
+
return 0.85;
|
|
334
|
+
}
|
|
335
|
+
if (observations >= 3) {
|
|
336
|
+
return 0.7;
|
|
337
|
+
}
|
|
338
|
+
if (observations >= 2) {
|
|
339
|
+
return 0.6;
|
|
340
|
+
}
|
|
341
|
+
return 0.45;
|
|
342
|
+
}
|