dravoice 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +102 -36
- package/bin/dravoice.js +11 -10
- package/package.json +47 -45
- package/src/index.js +874 -197
- package/src/v2/analyzers/discourse.js +63 -52
- package/src/v2/analyzers/evidence.js +73 -38
- package/src/v2/analyzers/lexical.js +114 -58
- package/src/v2/analyzers/register.js +46 -34
- package/src/v2/analyzers/rhetorical-shape.js +59 -48
- package/src/v2/analyzers/rhythm.js +39 -47
- package/src/v2/analyzers/structure.js +24 -24
- package/src/v2/benchmark.js +574 -568
- package/src/v2/brief.js +154 -146
- package/src/v2/config.js +78 -0
- package/src/v2/document-model.js +351 -260
- package/src/v2/inspect.js +67 -67
- package/src/v2/io-utils.js +51 -0
- package/src/v2/profile.js +155 -129
- package/src/v2/prompt.js +65 -64
- package/src/v2/review.js +177 -219
- package/src/v2/revise-plan.js +130 -33
- package/src/v2/stylometry.js +123 -17
- package/src/v2/text-utils.js +123 -123
package/src/v2/text-utils.js
CHANGED
|
@@ -1,123 +1,123 @@
|
|
|
1
|
-
const WORD_RE = /[a-z][a-z0-9'-]*/gi;
|
|
2
|
-
const ABBREVIATIONS = ["Dr.", "Mr.", "Mrs.", "Ms.", "Prof.", "Sr.", "Jr.", "e.g.", "i.e."];
|
|
3
|
-
|
|
4
|
-
export const FUNCTION_WORDS = [
|
|
5
|
-
"a", "an", "and", "as", "at", "because", "but", "by", "for", "from",
|
|
6
|
-
"if", "in", "into", "is", "it", "its", "of", "on", "or", "that",
|
|
7
|
-
"the", "their", "then", "there", "this", "to", "was", "we", "with",
|
|
8
|
-
"you", "your", "i", "my", "not", "so", "before", "after", "while",
|
|
9
|
-
];
|
|
10
|
-
|
|
11
|
-
export const STOP_WORDS = new Set([
|
|
12
|
-
...FUNCTION_WORDS,
|
|
13
|
-
"about", "also", "between", "can", "could", "each", "had", "has",
|
|
14
|
-
"have", "more", "one", "only", "our", "out", "over", "should",
|
|
15
|
-
"than", "they", "were", "when", "where", "which", "who", "will",
|
|
16
|
-
"would",
|
|
17
|
-
]);
|
|
18
|
-
|
|
19
|
-
export function normalizeText(text) {
|
|
20
|
-
return String(text ?? "").replace(/\s+/g, " ").trim().toLowerCase();
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export function tokenizeWords(text) {
|
|
24
|
-
return (String(text ?? "").match(WORD_RE) ?? [])
|
|
25
|
-
.map((word) => word.toLowerCase().replace(/'s$/, ""));
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export function contentWords(text) {
|
|
29
|
-
return tokenizeWords(text).filter((word) => word.length > 2 && !STOP_WORDS.has(word));
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export function splitSentences(text) {
|
|
33
|
-
let protectedText = String(text ?? "");
|
|
34
|
-
const replacements = new Map();
|
|
35
|
-
const protect = (pattern, prefix) => {
|
|
36
|
-
protectedText = protectedText.replace(pattern, (match) => {
|
|
37
|
-
const key = `__${prefix}_${replacements.size}__`;
|
|
38
|
-
replacements.set(key, match);
|
|
39
|
-
return key;
|
|
40
|
-
});
|
|
41
|
-
};
|
|
42
|
-
|
|
43
|
-
for (const abbreviation of ABBREVIATIONS) {
|
|
44
|
-
protectedText = protectedText.replaceAll(abbreviation, abbreviation.replaceAll(".", "§"));
|
|
45
|
-
}
|
|
46
|
-
protect(/https?:\/\/\S+/g, "URL");
|
|
47
|
-
protect(/\b[\w/-]+\.(?:js|ts|py|md|mdx|txt|html|json|yml|yaml|toml)\b/g, "PATH");
|
|
48
|
-
protect(/\b\d+(?:\.\d+)+\b/g, "VERSION");
|
|
49
|
-
|
|
50
|
-
const parts = protectedText
|
|
51
|
-
.split(/(?<=[.!?])\s+(?=["'(\[]?[A-Z0-9])/)
|
|
52
|
-
.map((part) => part.trim())
|
|
53
|
-
.filter(Boolean);
|
|
54
|
-
|
|
55
|
-
return parts.map((part) => {
|
|
56
|
-
let restored = part.replaceAll("§", ".");
|
|
57
|
-
for (const [key, value] of replacements.entries()) {
|
|
58
|
-
restored = restored.replaceAll(key, value);
|
|
59
|
-
}
|
|
60
|
-
return restored;
|
|
61
|
-
});
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export function characterNgrams(text, size = 3) {
|
|
65
|
-
const compact = normalizeText(text).replace(/[^a-z0-9]+/g, " ").trim();
|
|
66
|
-
const grams = [];
|
|
67
|
-
for (let index = 0; index <= compact.length - size; index += 1) {
|
|
68
|
-
const gram = compact.slice(index, index + size);
|
|
69
|
-
if (!gram.includes(" ")) {
|
|
70
|
-
grams.push(gram);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
return grams;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
export function distribution(values) {
|
|
77
|
-
if (!values.length) {
|
|
78
|
-
return { count: 0, min: 0, max: 0, mean: 0, median: 0, p25: 0, p75: 0 };
|
|
79
|
-
}
|
|
80
|
-
const sorted = [...values].sort((a, b) => a - b);
|
|
81
|
-
return {
|
|
82
|
-
count: sorted.length,
|
|
83
|
-
min: sorted[0],
|
|
84
|
-
max: sorted[sorted.length - 1],
|
|
85
|
-
mean: round(sorted.reduce((sum, value) => sum + value, 0) / sorted.length, 2),
|
|
86
|
-
median: percentile(sorted, 0.5),
|
|
87
|
-
p25: percentile(sorted, 0.25),
|
|
88
|
-
p75: percentile(sorted, 0.75),
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
export function topItems(values, limit = 10) {
|
|
93
|
-
const counts = new Map();
|
|
94
|
-
for (const value of values.filter(Boolean)) {
|
|
95
|
-
counts.set(value, (counts.get(value) ?? 0) + 1);
|
|
96
|
-
}
|
|
97
|
-
return Array.from(counts.entries())
|
|
98
|
-
.sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
|
|
99
|
-
.slice(0, limit)
|
|
100
|
-
.map(([value, count]) => ({ value, count }));
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
export function rate(count, total, digits = 3) {
|
|
104
|
-
return round(count / Math.max(1, total), digits);
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
export function round(value, digits = 0) {
|
|
108
|
-
const multiplier = 10 ** digits;
|
|
109
|
-
const rounded = Math.floor(value * multiplier + 0.5) / multiplier;
|
|
110
|
-
return digits === 0 ? Math.trunc(rounded) : rounded;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
export function clampScore(value) {
|
|
114
|
-
return Math.max(0, Math.min(100, Math.round(value)));
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
function percentile(sortedValues, ratio) {
|
|
118
|
-
const index = Math.min(
|
|
119
|
-
sortedValues.length - 1,
|
|
120
|
-
Math.max(0, Math.round((sortedValues.length - 1) * ratio)),
|
|
121
|
-
);
|
|
122
|
-
return sortedValues[index];
|
|
123
|
-
}
|
|
1
|
+
const WORD_RE = /[a-z][a-z0-9'-]*/gi;
|
|
2
|
+
const ABBREVIATIONS = ["Dr.", "Mr.", "Mrs.", "Ms.", "Prof.", "Sr.", "Jr.", "e.g.", "i.e."];
|
|
3
|
+
|
|
4
|
+
export const FUNCTION_WORDS = [
|
|
5
|
+
"a", "an", "and", "as", "at", "because", "but", "by", "for", "from",
|
|
6
|
+
"if", "in", "into", "is", "it", "its", "of", "on", "or", "that",
|
|
7
|
+
"the", "their", "then", "there", "this", "to", "was", "we", "with",
|
|
8
|
+
"you", "your", "i", "my", "not", "so", "before", "after", "while",
|
|
9
|
+
];
|
|
10
|
+
|
|
11
|
+
export const STOP_WORDS = new Set([
|
|
12
|
+
...FUNCTION_WORDS,
|
|
13
|
+
"about", "also", "between", "can", "could", "each", "had", "has",
|
|
14
|
+
"have", "more", "one", "only", "our", "out", "over", "should",
|
|
15
|
+
"than", "they", "were", "when", "where", "which", "who", "will",
|
|
16
|
+
"would",
|
|
17
|
+
]);
|
|
18
|
+
|
|
19
|
+
export function normalizeText(text) {
|
|
20
|
+
return String(text ?? "").replace(/\s+/g, " ").trim().toLowerCase();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function tokenizeWords(text) {
|
|
24
|
+
return (String(text ?? "").match(WORD_RE) ?? [])
|
|
25
|
+
.map((word) => word.toLowerCase().replace(/'s$/, ""));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function contentWords(text) {
|
|
29
|
+
return tokenizeWords(text).filter((word) => word.length > 2 && !STOP_WORDS.has(word));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function splitSentences(text) {
|
|
33
|
+
let protectedText = String(text ?? "");
|
|
34
|
+
const replacements = new Map();
|
|
35
|
+
const protect = (pattern, prefix) => {
|
|
36
|
+
protectedText = protectedText.replace(pattern, (match) => {
|
|
37
|
+
const key = `__${prefix}_${replacements.size}__`;
|
|
38
|
+
replacements.set(key, match);
|
|
39
|
+
return key;
|
|
40
|
+
});
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
for (const abbreviation of ABBREVIATIONS) {
|
|
44
|
+
protectedText = protectedText.replaceAll(abbreviation, abbreviation.replaceAll(".", "§"));
|
|
45
|
+
}
|
|
46
|
+
protect(/https?:\/\/\S+/g, "URL");
|
|
47
|
+
protect(/\b[\w/-]+\.(?:js|ts|py|md|mdx|txt|html|json|yml|yaml|toml)\b/g, "PATH");
|
|
48
|
+
protect(/\b\d+(?:\.\d+)+\b/g, "VERSION");
|
|
49
|
+
|
|
50
|
+
const parts = protectedText
|
|
51
|
+
.split(/(?<=[.!?])\s+(?=["'(\[]?[A-Z0-9])/)
|
|
52
|
+
.map((part) => part.trim())
|
|
53
|
+
.filter(Boolean);
|
|
54
|
+
|
|
55
|
+
return parts.map((part) => {
|
|
56
|
+
let restored = part.replaceAll("§", ".");
|
|
57
|
+
for (const [key, value] of replacements.entries()) {
|
|
58
|
+
restored = restored.replaceAll(key, value);
|
|
59
|
+
}
|
|
60
|
+
return restored;
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function characterNgrams(text, size = 3) {
|
|
65
|
+
const compact = normalizeText(text).replace(/[^a-z0-9]+/g, " ").trim();
|
|
66
|
+
const grams = [];
|
|
67
|
+
for (let index = 0; index <= compact.length - size; index += 1) {
|
|
68
|
+
const gram = compact.slice(index, index + size);
|
|
69
|
+
if (!gram.includes(" ")) {
|
|
70
|
+
grams.push(gram);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return grams;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function distribution(values) {
|
|
77
|
+
if (!values.length) {
|
|
78
|
+
return { count: 0, min: 0, max: 0, mean: 0, median: 0, p25: 0, p75: 0 };
|
|
79
|
+
}
|
|
80
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
81
|
+
return {
|
|
82
|
+
count: sorted.length,
|
|
83
|
+
min: sorted[0],
|
|
84
|
+
max: sorted[sorted.length - 1],
|
|
85
|
+
mean: round(sorted.reduce((sum, value) => sum + value, 0) / sorted.length, 2),
|
|
86
|
+
median: percentile(sorted, 0.5),
|
|
87
|
+
p25: percentile(sorted, 0.25),
|
|
88
|
+
p75: percentile(sorted, 0.75),
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function topItems(values, limit = 10) {
|
|
93
|
+
const counts = new Map();
|
|
94
|
+
for (const value of values.filter(Boolean)) {
|
|
95
|
+
counts.set(value, (counts.get(value) ?? 0) + 1);
|
|
96
|
+
}
|
|
97
|
+
return Array.from(counts.entries())
|
|
98
|
+
.sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
|
|
99
|
+
.slice(0, limit)
|
|
100
|
+
.map(([value, count]) => ({ value, count }));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function rate(count, total, digits = 3) {
|
|
104
|
+
return round(count / Math.max(1, total), digits);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function round(value, digits = 0) {
|
|
108
|
+
const multiplier = 10 ** digits;
|
|
109
|
+
const rounded = Math.floor(value * multiplier + 0.5) / multiplier;
|
|
110
|
+
return digits === 0 ? Math.trunc(rounded) : rounded;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export function clampScore(value) {
|
|
114
|
+
return Math.max(0, Math.min(100, Math.round(value)));
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function percentile(sortedValues, ratio) {
|
|
118
|
+
const index = Math.min(
|
|
119
|
+
sortedValues.length - 1,
|
|
120
|
+
Math.max(0, Math.round((sortedValues.length - 1) * ratio)),
|
|
121
|
+
);
|
|
122
|
+
return sortedValues[index];
|
|
123
|
+
}
|