@mrclrchtr/supi-flow 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -20
- package/{src → extensions}/cli.ts +38 -27
- package/{src → extensions}/index.ts +29 -54
- package/{src → extensions}/tools/flow-tools.ts +59 -33
- package/{src → extensions}/tools/tndm-cli.ts +1 -44
- package/package.json +4 -9
- package/prompts/supi-coding-retro.md +52 -7
- package/skills/supi-flow-archive/SKILL.md +21 -24
- package/skills/supi-flow-plan/SKILL.md +2 -2
- package/skills/supi-flow-slop-detect/SKILL.md +0 -393
- package/skills/supi-flow-slop-detect/references/vocabulary.json +0 -161
- package/skills/supi-flow-slop-detect/scripts/slop-helpers.ts +0 -301
- package/skills/supi-flow-slop-detect/scripts/slop-scan-structural.ts +0 -269
- package/skills/supi-flow-slop-detect/scripts/slop-scan-vocab.ts +0 -161
- package/skills/supi-flow-slop-detect/scripts/slop-scan.ts +0 -209
|
@@ -1,301 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared utilities for slop detection scripts.
|
|
3
|
-
*
|
|
4
|
-
* Cross-platform Node.js/TypeScript — runs wherever pi runs.
|
|
5
|
-
* Use via: pnpm exec jiti <script>.ts <file>
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { readFileSync } from "node:fs";
|
|
9
|
-
|
|
10
|
-
export type DocProfile = "skill" | "technical" | "prose";
|
|
11
|
-
|
|
12
|
-
interface ArrowConnectorStats {
|
|
13
|
-
total: number;
|
|
14
|
-
technical: number;
|
|
15
|
-
prose: number;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
const ARROW_CONNECTOR_PATTERN = /->|→/g;
|
|
19
|
-
const TECHNICAL_TOKEN_PATTERN = /[A-Za-z0-9_./-]+/g;
|
|
20
|
-
const ARROW_PROSE_START_WORDS = new Set([
|
|
21
|
-
"a",
|
|
22
|
-
"an",
|
|
23
|
-
"are",
|
|
24
|
-
"can",
|
|
25
|
-
"does",
|
|
26
|
-
"helps",
|
|
27
|
-
"improves",
|
|
28
|
-
"is",
|
|
29
|
-
"it",
|
|
30
|
-
"lets",
|
|
31
|
-
"makes",
|
|
32
|
-
"means",
|
|
33
|
-
"shows",
|
|
34
|
-
"that",
|
|
35
|
-
"the",
|
|
36
|
-
"their",
|
|
37
|
-
"there",
|
|
38
|
-
"these",
|
|
39
|
-
"this",
|
|
40
|
-
"those",
|
|
41
|
-
"we",
|
|
42
|
-
"you",
|
|
43
|
-
]);
|
|
44
|
-
|
|
45
|
-
/** Read a file as UTF-8 string. */
|
|
46
|
-
export function readFile(path: string): string {
|
|
47
|
-
return readFileSync(path, "utf-8");
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
/** Strip fenced code blocks from markdown content. */
|
|
51
|
-
export function stripCodeBlocks(content: string): string {
|
|
52
|
-
return content.replace(/```[\s\S]*?```/g, "");
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/** Strip inline code spans from markdown content. */
|
|
56
|
-
export function stripInlineCode(content: string): string {
|
|
57
|
-
return content.replace(/`[^`]+`/g, "");
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/** Detect the document profile used for structural scoring. */
|
|
61
|
-
export function detectDocProfile(filePath: string): DocProfile {
|
|
62
|
-
if (/[\\/]skills[\\/].*[\\/]SKILL\.md$/i.test(filePath)) return "skill";
|
|
63
|
-
if (/(?:^|[\\/])README\.md$/i.test(filePath) || /[\\/]docs[\\/].*\.md$/i.test(filePath)) {
|
|
64
|
-
return "technical";
|
|
65
|
-
}
|
|
66
|
-
return "prose";
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/** Count non-empty lines. */
|
|
70
|
-
export function countNonEmpty(content: string): number {
|
|
71
|
-
return content.split("\n").filter((l) => l.trim().length > 0).length;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/** Count words in text. */
|
|
75
|
-
export function countWords(text: string): number {
|
|
76
|
-
return text.split(/[\s\n]+/).filter((w) => w.length > 0).length;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/** Count sentences in text (naive: split on sentence-ending punctuation). */
|
|
80
|
-
export function countSentences(text: string): number {
|
|
81
|
-
return text.split(/[.!?]+/).filter((s) => s.trim().length > 0).length || 1;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
/** Count paragraphs (blocks separated by blank lines). */
|
|
85
|
-
export function countParagraphs(text: string): number {
|
|
86
|
-
return text.split(/\n\s*\n/).filter((p) => p.trim().length > 0).length || 1;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/** Count em dashes in text. */
|
|
90
|
-
export function countEmDashes(text: string): number {
|
|
91
|
-
return (text.match(/—/g) || []).length;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/** Count semicolons in text. */
|
|
95
|
-
export function countSemicolons(text: string): number {
|
|
96
|
-
return (text.match(/;/g) || []).length;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
/** Count colons in text. */
|
|
100
|
-
export function countColons(text: string): number {
|
|
101
|
-
return (text.match(/:/g) || []).length;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
function isTechnicalArrowContext(leftTokens: string[], rightTokens: string[]): boolean {
|
|
105
|
-
if (leftTokens.length === 0 || rightTokens.length === 0) return false;
|
|
106
|
-
|
|
107
|
-
const leftFirst = leftTokens[0]?.toLowerCase() ?? "";
|
|
108
|
-
const rightFirst = rightTokens[0]?.toLowerCase() ?? "";
|
|
109
|
-
|
|
110
|
-
if (ARROW_PROSE_START_WORDS.has(leftFirst) || ARROW_PROSE_START_WORDS.has(rightFirst)) {
|
|
111
|
-
return false;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
return leftTokens.length <= 3 && rightTokens.length <= 3;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
/** Analyze arrow connectors in prose and split technical notation from prose shorthand. */
|
|
118
|
-
export function analyzeArrowConnectors(content: string): ArrowConnectorStats {
|
|
119
|
-
const prose = stripInlineCode(stripCodeBlocks(content));
|
|
120
|
-
const lines = prose.split("\n");
|
|
121
|
-
|
|
122
|
-
let total = 0;
|
|
123
|
-
let technical = 0;
|
|
124
|
-
|
|
125
|
-
for (const line of lines) {
|
|
126
|
-
const matches = [...line.matchAll(ARROW_CONNECTOR_PATTERN)];
|
|
127
|
-
total += matches.length;
|
|
128
|
-
|
|
129
|
-
for (const match of matches) {
|
|
130
|
-
const index = match.index ?? -1;
|
|
131
|
-
if (index < 0) continue;
|
|
132
|
-
|
|
133
|
-
const leftTokens = [...line.slice(0, index).matchAll(TECHNICAL_TOKEN_PATTERN)]
|
|
134
|
-
.map((token) => token[0])
|
|
135
|
-
.slice(-3);
|
|
136
|
-
const rightTokens = [...line.slice(index + match[0].length).matchAll(TECHNICAL_TOKEN_PATTERN)]
|
|
137
|
-
.map((token) => token[0])
|
|
138
|
-
.slice(0, 3);
|
|
139
|
-
|
|
140
|
-
if (isTechnicalArrowContext(leftTokens, rightTokens)) {
|
|
141
|
-
technical++;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return {
|
|
147
|
-
total,
|
|
148
|
-
technical,
|
|
149
|
-
prose: Math.max(0, total - technical),
|
|
150
|
-
};
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/** Count plus-sign conjunctions in prose (excluding code blocks). */
|
|
154
|
-
export function countPlusSigns(content: string): number {
|
|
155
|
-
const prose = stripInlineCode(stripCodeBlocks(content));
|
|
156
|
-
return (prose.match(/\s\+\s/g) || []).length;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/** Count bullet list items (lines starting with -, *, +). */
|
|
160
|
-
export function countBulletLines(content: string): number {
|
|
161
|
-
return (content.match(/^[ \t]*[-*+]\s/gm) || []).length;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
/** Count participial phrase tail-loading patterns. */
|
|
165
|
-
export function countParticipialTails(text: string): number {
|
|
166
|
-
// Pattern: [main clause], [present participle] [detail].
|
|
167
|
-
const pattern =
|
|
168
|
-
/,\s*(enabling|making|creating|providing|leading|marking|contributing|resulting|allowing|using|bringing|taking|giving|setting)\s+\w+/gi;
|
|
169
|
-
return (text.match(pattern) || []).length;
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
/** Count correlative conjunction pairs in proximity. */
|
|
173
|
-
export function countCorrelativePairs(text: string): number {
|
|
174
|
-
const patterns = [
|
|
175
|
-
/not\s+only\s+\w+\s+but\s+also/gi,
|
|
176
|
-
/whether\s+\w+\s+or\s+\w+/gi,
|
|
177
|
-
/not\s+just\s+\w+\s+but/gi,
|
|
178
|
-
/both\s+\w+\s+and\s+\w+/gi,
|
|
179
|
-
/either\s+\w+\s+or\s+\w+/gi,
|
|
180
|
-
/neither\s+\w+\s+nor\s+\w+/gi,
|
|
181
|
-
];
|
|
182
|
-
return patterns.reduce((sum, re) => sum + (text.match(re) || []).length, 0);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
/** Count "From X to Y" range constructions. */
|
|
186
|
-
export function countFromToRanges(text: string): number {
|
|
187
|
-
return (text.match(/\bfrom\s+\w+.*?\bto\s+\w+/gi) || []).length;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
/** Get first and last paragraph from markdown (for conclusion mirroring check). */
|
|
191
|
-
export function getFirstAndLastParagraph(content: string): [string, string] {
|
|
192
|
-
const paragraphs = content
|
|
193
|
-
.split(/\n\s*\n/)
|
|
194
|
-
.map((p) => p.trim())
|
|
195
|
-
.filter((p) => p.length > 0 && !p.startsWith("---") && !p.startsWith("```"));
|
|
196
|
-
|
|
197
|
-
if (paragraphs.length < 2) {
|
|
198
|
-
return [paragraphs[0] || "", ""];
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
return [paragraphs[0] || "", paragraphs[paragraphs.length - 1] || ""];
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
/** Check if two paragraphs are near-paraphrases (simple word-overlap heuristic). */
|
|
205
|
-
export function isNearParaphrase(a: string, b: string, threshold = 0.6): boolean {
|
|
206
|
-
const wordsA = new Set(
|
|
207
|
-
a
|
|
208
|
-
.toLowerCase()
|
|
209
|
-
.split(/[\s,.;:!?()]+/)
|
|
210
|
-
.filter((w) => w.length > 3),
|
|
211
|
-
);
|
|
212
|
-
const wordsB = new Set(
|
|
213
|
-
b
|
|
214
|
-
.toLowerCase()
|
|
215
|
-
.split(/[\s,.;:!?()]+/)
|
|
216
|
-
.filter((w) => w.length > 3),
|
|
217
|
-
);
|
|
218
|
-
if (wordsA.size === 0 || wordsB.size === 0) return false;
|
|
219
|
-
|
|
220
|
-
let overlap = 0;
|
|
221
|
-
for (const w of wordsA) {
|
|
222
|
-
if (wordsB.has(w)) overlap++;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
const scoreA = overlap / wordsA.size;
|
|
226
|
-
const scoreB = overlap / wordsB.size;
|
|
227
|
-
return Math.max(scoreA, scoreB) > threshold;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
/** Compute bullet-to-prose ratio (as fraction 0-1). */
|
|
231
|
-
export function computeBulletRatio(content: string): number {
|
|
232
|
-
const totalLines = countNonEmpty(content);
|
|
233
|
-
if (totalLines === 0) return 0;
|
|
234
|
-
const bulletLines = countBulletLines(content);
|
|
235
|
-
return bulletLines / totalLines;
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
/** Detect intro-body-conclusion structure where the closing paragraph mirrors
|
|
239
|
-
* the opening (the "five-paragraph essay" pattern common in AI-generated prose). */
|
|
240
|
-
export function detectIntroBodyConclusion(content: string): boolean {
|
|
241
|
-
const paragraphs = content
|
|
242
|
-
.split(/\n\s*\n/)
|
|
243
|
-
.map((p) => p.trim())
|
|
244
|
-
.filter((p) => p.length > 0 && !p.startsWith("```"));
|
|
245
|
-
|
|
246
|
-
if (paragraphs.length < 5) return false;
|
|
247
|
-
|
|
248
|
-
const firstLen = countWords(paragraphs[0]);
|
|
249
|
-
const lastLen = countWords(paragraphs[paragraphs.length - 1]);
|
|
250
|
-
const bodyLens = paragraphs.slice(1, -1).map(countWords);
|
|
251
|
-
|
|
252
|
-
// Heuristic: intro + 3+ body sections + short conclusion
|
|
253
|
-
const hasThreeMiddleSections = bodyLens.length >= 3;
|
|
254
|
-
const conclusionShorter = lastLen < firstLen * 0.8;
|
|
255
|
-
const startsWithIntro = firstLen > 20;
|
|
256
|
-
|
|
257
|
-
return hasThreeMiddleSections && conclusionShorter && startsWithIntro;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
/** Compute paragraph word-count uniformity score (0-1).
|
|
261
|
-
* Higher = more uniform paragraph lengths (strong AI signal).
|
|
262
|
-
* Uses inverted coefficient of variation: 1 - min(1, stddev/mean).
|
|
263
|
-
* Score > 0.7 means paragraphs are suspiciously uniform. */
|
|
264
|
-
export function paragraphUniformity(content: string): number {
|
|
265
|
-
const paragraphs = content
|
|
266
|
-
.split(/\n\s*\n/)
|
|
267
|
-
.map((p) => p.trim())
|
|
268
|
-
.filter((p) => p.length > 0 && !p.startsWith("```"))
|
|
269
|
-
.map(countWords);
|
|
270
|
-
|
|
271
|
-
if (paragraphs.length < 3) return 0;
|
|
272
|
-
|
|
273
|
-
const mean = paragraphs.reduce((s, w) => s + w, 0) / paragraphs.length;
|
|
274
|
-
if (mean === 0) return 0;
|
|
275
|
-
const variance = paragraphs.reduce((s, w) => s + (w - mean) ** 2, 0) / paragraphs.length;
|
|
276
|
-
const cv = Math.sqrt(variance) / mean;
|
|
277
|
-
return Math.round((1 - Math.min(1, cv)) * 100) / 100;
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
/** Compute sentence length clustering score (0-1). Ratio of sentences in 15-25 word range. */
|
|
281
|
-
export function sentenceLengthClustering(text: string): number {
|
|
282
|
-
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0);
|
|
283
|
-
if (sentences.length < 3) return 0;
|
|
284
|
-
|
|
285
|
-
const wordCounts = sentences.map((s) => countWords(s));
|
|
286
|
-
const clustered = wordCounts.filter((w) => w >= 15 && w <= 25).length;
|
|
287
|
-
return clustered / sentences.length;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
/** Count emoji-led bullet lines. */
|
|
291
|
-
export function countEmojiBullets(content: string): number {
|
|
292
|
-
// Use alternation instead of a character class to avoid
|
|
293
|
-
// biome lint error about character + combining character in same class.
|
|
294
|
-
const emojiPattern = /^[ \t]*(?:✅|❌|🔴|🟢|🟡|⭐|🎯|💡|📌|🔹|🔸|✔️|✏️|📝|🚀|💪|🔧|⚡|🔥|💎)/gm;
|
|
295
|
-
return (content.match(emojiPattern) || []).length;
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
/** Output structured result as JSON. */
|
|
299
|
-
export function outputJSON(data: unknown): void {
|
|
300
|
-
process.stdout.write(JSON.stringify(data, null, 2) + "\n");
|
|
301
|
-
}
|
|
@@ -1,269 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env -S pnpm exec jiti
|
|
2
|
-
/**
|
|
3
|
-
* Structural pattern scanner — analyzes markdown for AI-prose structural tells.
|
|
4
|
-
*
|
|
5
|
-
* Usage:
|
|
6
|
-
* pnpm exec jiti scripts/slop-scan-structural.ts <file> [<file>...]
|
|
7
|
-
*
|
|
8
|
-
* Cross-platform Node.js/TypeScript — runs wherever pi runs.
|
|
9
|
-
* Output: JSON array with one result per file.
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import {
|
|
13
|
-
analyzeArrowConnectors,
|
|
14
|
-
computeBulletRatio,
|
|
15
|
-
countColons,
|
|
16
|
-
countCorrelativePairs,
|
|
17
|
-
countEmDashes,
|
|
18
|
-
countEmojiBullets,
|
|
19
|
-
countFromToRanges,
|
|
20
|
-
countParticipialTails,
|
|
21
|
-
countPlusSigns,
|
|
22
|
-
countSemicolons,
|
|
23
|
-
countWords,
|
|
24
|
-
type DocProfile,
|
|
25
|
-
detectDocProfile,
|
|
26
|
-
detectIntroBodyConclusion,
|
|
27
|
-
getFirstAndLastParagraph,
|
|
28
|
-
isNearParaphrase,
|
|
29
|
-
outputJSON,
|
|
30
|
-
paragraphUniformity,
|
|
31
|
-
readFile,
|
|
32
|
-
sentenceLengthClustering,
|
|
33
|
-
stripCodeBlocks,
|
|
34
|
-
} from "./slop-helpers.ts";
|
|
35
|
-
|
|
36
|
-
interface StructuralMetrics {
|
|
37
|
-
emDashDensity: number;
|
|
38
|
-
bulletRatio: number;
|
|
39
|
-
participialTails: number;
|
|
40
|
-
/** Normalized participial tails per 500 words. */
|
|
41
|
-
participialTailsPer500: number;
|
|
42
|
-
arrowConnectors: number;
|
|
43
|
-
technicalArrowConnectors: number;
|
|
44
|
-
proseArrowConnectors: number;
|
|
45
|
-
correlativePairs: number;
|
|
46
|
-
plusSigns: number;
|
|
47
|
-
colons: number;
|
|
48
|
-
semicolons: number;
|
|
49
|
-
sentenceClusterRatio: number;
|
|
50
|
-
fromToRanges: number;
|
|
51
|
-
emojiBullets: number;
|
|
52
|
-
introBodyConclusion: boolean;
|
|
53
|
-
conclusionMirroring: boolean;
|
|
54
|
-
/** Paragraph uniformity score (0-1), higher = more uniform (more AI-like). */
|
|
55
|
-
paragraphUniformity: number;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
interface StructuralResult {
|
|
59
|
-
file: string;
|
|
60
|
-
profile: DocProfile;
|
|
61
|
-
adjustments: string[];
|
|
62
|
-
wordCount: number;
|
|
63
|
-
metrics: StructuralMetrics;
|
|
64
|
-
structuralScore: number;
|
|
65
|
-
flags: string[];
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
interface ProfileThresholds {
|
|
69
|
-
emDashWarn: number;
|
|
70
|
-
emDashPenalty: number;
|
|
71
|
-
bulletPenalty: number;
|
|
72
|
-
plusSignPenalty: number;
|
|
73
|
-
scoreIntroBodyConclusion: boolean;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
function getThresholds(profile: DocProfile): ProfileThresholds {
|
|
77
|
-
if (profile === "skill") {
|
|
78
|
-
return {
|
|
79
|
-
emDashWarn: 6,
|
|
80
|
-
emDashPenalty: 8,
|
|
81
|
-
bulletPenalty: 0.65,
|
|
82
|
-
plusSignPenalty: 2,
|
|
83
|
-
scoreIntroBodyConclusion: false,
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
if (profile === "technical") {
|
|
88
|
-
return {
|
|
89
|
-
emDashWarn: 3,
|
|
90
|
-
emDashPenalty: 5,
|
|
91
|
-
bulletPenalty: 0.5,
|
|
92
|
-
plusSignPenalty: 1,
|
|
93
|
-
scoreIntroBodyConclusion: true,
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
return {
|
|
98
|
-
emDashWarn: 3,
|
|
99
|
-
emDashPenalty: 5,
|
|
100
|
-
bulletPenalty: 0.45,
|
|
101
|
-
plusSignPenalty: 1,
|
|
102
|
-
scoreIntroBodyConclusion: true,
|
|
103
|
-
};
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
function getProfileAdjustments(profile: DocProfile): string[] {
|
|
107
|
-
if (profile === "skill") {
|
|
108
|
-
return [
|
|
109
|
-
"workflow arrow chains relaxed",
|
|
110
|
-
"higher bullet-ratio threshold",
|
|
111
|
-
"higher em-dash threshold",
|
|
112
|
-
"intro-body-conclusion penalty disabled",
|
|
113
|
-
];
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
if (profile === "technical") {
|
|
117
|
-
return ["technical-doc thresholds", "workflow arrow chains relaxed"];
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
return ["default prose thresholds"];
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
function computeStructuralScore(metrics: StructuralMetrics, profile: DocProfile): number {
|
|
124
|
-
const thresholds = getThresholds(profile);
|
|
125
|
-
let score = 0;
|
|
126
|
-
if (metrics.emDashDensity > thresholds.emDashPenalty) score += 2;
|
|
127
|
-
if (metrics.sentenceClusterRatio > 0.7) score += 2;
|
|
128
|
-
if (metrics.bulletRatio > thresholds.bulletPenalty) score += 2;
|
|
129
|
-
if (metrics.paragraphUniformity > 0.7) score += 2;
|
|
130
|
-
if (metrics.emojiBullets > 0) score += 1;
|
|
131
|
-
if (metrics.participialTailsPer500 > 3) score += 2;
|
|
132
|
-
if (thresholds.scoreIntroBodyConclusion && metrics.introBodyConclusion) score += 2;
|
|
133
|
-
if (metrics.correlativePairs > 2) score += 1;
|
|
134
|
-
if (metrics.proseArrowConnectors > 0) score += 1;
|
|
135
|
-
if (metrics.plusSigns > thresholds.plusSignPenalty) score += 1;
|
|
136
|
-
if (metrics.emDashDensity > thresholds.emDashPenalty && metrics.semicolons === 0) score += 1;
|
|
137
|
-
if (metrics.conclusionMirroring) score += 1;
|
|
138
|
-
return score;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function genFlags(metrics: StructuralMetrics, profile: DocProfile): string[] {
|
|
142
|
-
const thresholds = getThresholds(profile);
|
|
143
|
-
const flags: string[] = [];
|
|
144
|
-
|
|
145
|
-
if (metrics.emDashDensity > thresholds.emDashPenalty) {
|
|
146
|
-
flags.push(
|
|
147
|
-
`Em dash density ${metrics.emDashDensity.toFixed(1)}/1000 words (threshold: ${thresholds.emDashPenalty}) — review usage`,
|
|
148
|
-
);
|
|
149
|
-
} else if (metrics.emDashDensity > thresholds.emDashWarn) {
|
|
150
|
-
flags.push(
|
|
151
|
-
`Em dash density ${metrics.emDashDensity.toFixed(1)}/1000 words — elevated for ${profile} docs, spot-check`,
|
|
152
|
-
);
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
if (metrics.sentenceClusterRatio > 0.7) {
|
|
156
|
-
flags.push(
|
|
157
|
-
`Sentence length clustering ${(metrics.sentenceClusterRatio * 100).toFixed(0)}% (threshold: 70%) — vary rhythm`,
|
|
158
|
-
);
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
if (metrics.bulletRatio > thresholds.bulletPenalty) {
|
|
162
|
-
flags.push(
|
|
163
|
-
`Bullet ratio ${(metrics.bulletRatio * 100).toFixed(0)}% (threshold: ${(thresholds.bulletPenalty * 100).toFixed(0)}%) — convert some to prose`,
|
|
164
|
-
);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
if (metrics.paragraphUniformity > 0.7) {
|
|
168
|
-
flags.push(
|
|
169
|
-
`Paragraph uniformity ${(metrics.paragraphUniformity * 100).toFixed(0)}% (threshold: 70%) — vary paragraph length`,
|
|
170
|
-
);
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
if (metrics.emojiBullets > 0) {
|
|
174
|
-
flags.push(`Emoji-led bullets: ${metrics.emojiBullets} — strong AI tell in technical docs`);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
if (metrics.participialTailsPer500 > 3) {
|
|
178
|
-
flags.push(
|
|
179
|
-
`Participial phrase tails: ${metrics.participialTailsPer500.toFixed(1)}/500 words (threshold: 3) — split or restructure`,
|
|
180
|
-
);
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
if (getThresholds(profile).scoreIntroBodyConclusion && metrics.introBodyConclusion) {
|
|
184
|
-
flags.push("Intro-body-conclusion structure — cut the intro and start with content");
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
if (metrics.correlativePairs > 2) {
|
|
188
|
-
flags.push(
|
|
189
|
-
`Correlative pairs: ${metrics.correlativePairs} (threshold: 2) — reduce "not only...but also" etc.`,
|
|
190
|
-
);
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
if (metrics.proseArrowConnectors > 0) {
|
|
194
|
-
flags.push(
|
|
195
|
-
`Arrow connectors used as prose shorthand: ${metrics.proseArrowConnectors} — keep arrows for technical chains and use words in normal sentences`,
|
|
196
|
-
);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
if (metrics.plusSigns > thresholds.plusSignPenalty) {
|
|
200
|
-
flags.push(
|
|
201
|
-
`Plus-sign conjunctions: ${metrics.plusSigns} (threshold: ${thresholds.plusSignPenalty}) — use "and" instead`,
|
|
202
|
-
);
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
if (metrics.emDashDensity > thresholds.emDashPenalty && metrics.semicolons === 0) {
|
|
206
|
-
flags.push("Em dashes above threshold with zero semicolons — strong AI signal");
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
if (metrics.conclusionMirroring) {
|
|
210
|
-
flags.push("Conclusion mirrors intro — cut or replace with specifics");
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return flags;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
function scanFile(filePath: string): StructuralResult {
|
|
217
|
-
const content = readFile(filePath);
|
|
218
|
-
const prose = stripCodeBlocks(content);
|
|
219
|
-
const wordCount = countWords(content);
|
|
220
|
-
const profile = detectDocProfile(filePath);
|
|
221
|
-
|
|
222
|
-
const emDashCount = countEmDashes(prose);
|
|
223
|
-
const emDashDensity = wordCount > 0 ? (emDashCount / wordCount) * 1000 : 0;
|
|
224
|
-
const rawTails = countParticipialTails(prose);
|
|
225
|
-
const tailsPer500 = wordCount > 0 ? (rawTails / wordCount) * 500 : 0;
|
|
226
|
-
const arrows = analyzeArrowConnectors(content);
|
|
227
|
-
|
|
228
|
-
const metrics: StructuralMetrics = {
|
|
229
|
-
emDashDensity: Math.round(emDashDensity * 100) / 100,
|
|
230
|
-
bulletRatio: Math.round(computeBulletRatio(content) * 100) / 100,
|
|
231
|
-
participialTails: rawTails,
|
|
232
|
-
participialTailsPer500: Math.round(tailsPer500 * 10) / 10,
|
|
233
|
-
arrowConnectors: arrows.total,
|
|
234
|
-
technicalArrowConnectors: arrows.technical,
|
|
235
|
-
proseArrowConnectors: arrows.prose,
|
|
236
|
-
correlativePairs: countCorrelativePairs(prose),
|
|
237
|
-
plusSigns: countPlusSigns(content),
|
|
238
|
-
colons: countColons(prose),
|
|
239
|
-
semicolons: countSemicolons(prose),
|
|
240
|
-
sentenceClusterRatio: Math.round(sentenceLengthClustering(prose) * 100) / 100,
|
|
241
|
-
fromToRanges: countFromToRanges(prose),
|
|
242
|
-
emojiBullets: countEmojiBullets(content),
|
|
243
|
-
introBodyConclusion: detectIntroBodyConclusion(content),
|
|
244
|
-
conclusionMirroring: isNearParaphrase(...getFirstAndLastParagraph(content)),
|
|
245
|
-
paragraphUniformity: paragraphUniformity(content),
|
|
246
|
-
};
|
|
247
|
-
|
|
248
|
-
return {
|
|
249
|
-
file: filePath,
|
|
250
|
-
profile,
|
|
251
|
-
adjustments: getProfileAdjustments(profile),
|
|
252
|
-
wordCount,
|
|
253
|
-
metrics,
|
|
254
|
-
structuralScore: computeStructuralScore(metrics, profile),
|
|
255
|
-
flags: genFlags(metrics, profile),
|
|
256
|
-
};
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
// --- CLI ---
|
|
260
|
-
const files = process.argv.slice(2);
|
|
261
|
-
if (files.length === 0) {
|
|
262
|
-
process.stderr.write(
|
|
263
|
-
"Usage: pnpm exec jiti scripts/slop-scan-structural.ts <file> [<file>...]\n",
|
|
264
|
-
);
|
|
265
|
-
process.exit(1);
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
const results = files.map(scanFile);
|
|
269
|
-
outputJSON(results);
|