@oh-my-pi/omp-stats 14.9.3 → 14.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,486 @@
1
+ /**
2
+ * Behavioral metrics extracted from a single user message.
3
+ *
4
+ * Pure, side-effect free. Designed for batch use during session ingestion
5
+ * and standalone testing.
6
+ */
7
+
8
+ export interface UserMessageMetrics {
9
+ /** Total characters of analyzed text. */
10
+ chars: number;
11
+ /** Whitespace-delimited word count. */
12
+ words: number;
13
+ /**
14
+ * Number of "yelling" sentences: sentences where more than half of the
15
+ * alphabetic characters are uppercase (and there are enough letters to
16
+ * make the ratio meaningful — short acronyms like "OK" don't count).
17
+ */
18
+ yellingSentences: number;
19
+ /** Profanity hits (word-boundary, case-insensitive). */
20
+ profanity: number;
21
+ /** Runs of 3+ `!` / `?` characters (including `1`-mishit fallout). */
22
+ dramaRuns: number;
23
+ }
24
+
25
+ /**
26
+ * Words considered profane/aggressive. Word-boundary, case-insensitive.
27
+ *
28
+ * Broad English coverage: f-/s-word families and their censored variants,
29
+ * mild swears, intelligence-based insults, body-part epithets, British/
30
+ * Australian/Irish slang, religious exclamations, chat acronyms, and
31
+ * frustration interjections. Curated to exclude racial, homophobic, and
32
+ * other identity slurs.
33
+ */
34
+ const PROFANITY: readonly string[] = [
35
+ // f-word family
36
+ "fuck",
37
+ "fucks",
38
+ "fucked",
39
+ "fucking",
40
+ "fuckin",
41
+ "fucker",
42
+ "fuckers",
43
+ "fuckup",
44
+ "fuckups",
45
+ "fuckhead",
46
+ "fuckheads",
47
+ "fuckface",
48
+ "fuckwit",
49
+ "fuckwits",
50
+ "fucktard",
51
+ "fuckery",
52
+ "fuckoff",
53
+ "motherfucker",
54
+ "motherfuckers",
55
+ "motherfucking",
56
+ "clusterfuck",
57
+ "ratfuck",
58
+ "unfuck",
59
+ // censored / euphemistic f-word
60
+ "fk",
61
+ "fks",
62
+ "fking",
63
+ "fkin",
64
+ "fker",
65
+ "fck",
66
+ "fcks",
67
+ "fcking",
68
+ "fckin",
69
+ "fcker",
70
+ "fuk",
71
+ "fuking",
72
+ "fukin",
73
+ "eff",
74
+ "effs",
75
+ "effed",
76
+ "effing",
77
+ "frick",
78
+ "fricks",
79
+ "fricked",
80
+ "fricking",
81
+ "frickin",
82
+ "freaking",
83
+ "freakin",
84
+ "freaked",
85
+ // s-word family
86
+ "shit",
87
+ "shits",
88
+ "shat",
89
+ "shitty",
90
+ "shittier",
91
+ "shittiest",
92
+ "shite",
93
+ "shites",
94
+ "shited",
95
+ "shitting",
96
+ "shitter",
97
+ "shitters",
98
+ "shithead",
99
+ "shitheads",
100
+ "shitshow",
101
+ "shitstorm",
102
+ "shitstain",
103
+ "shitfaced",
104
+ "shitload",
105
+ "shitbag",
106
+ "shitcan",
107
+ "shitcanned",
108
+ "shitpost",
109
+ "shitposting",
110
+ "bullshit",
111
+ "bullshits",
112
+ "bullshitting",
113
+ "bullshitter",
114
+ "horseshit",
115
+ "batshit",
116
+ "dogshit",
117
+ "dipshit",
118
+ "jackshit",
119
+ "dumbshit",
120
+ "holyshit",
121
+ // mild swears
122
+ "damn",
123
+ "damns",
124
+ "damned",
125
+ "damning",
126
+ "dammit",
127
+ "goddamn",
128
+ "goddamned",
129
+ "goddamnit",
130
+ "goddammit",
131
+ "darn",
132
+ "darns",
133
+ "darned",
134
+ "darnit",
135
+ "dang",
136
+ "danged",
137
+ "dangit",
138
+ "hell",
139
+ "hells",
140
+ "heck",
141
+ "hecks",
142
+ "heckin",
143
+ "gosh",
144
+ "blast",
145
+ "blasted",
146
+ "bloody",
147
+ "bollocks",
148
+ "bollox",
149
+ // crap family
150
+ "crap",
151
+ "craps",
152
+ "crappy",
153
+ "crappier",
154
+ "crappiest",
155
+ "crapped",
156
+ "crapping",
157
+ "crapload",
158
+ "crapshoot",
159
+ "crapola",
160
+ // piss family
161
+ "piss",
162
+ "pisses",
163
+ "pissed",
164
+ "pissing",
165
+ "pisser",
166
+ "pisspoor",
167
+ "pisstake",
168
+ "pisshead",
169
+ // ass family
170
+ "ass",
171
+ "asses",
172
+ "asshole",
173
+ "assholes",
174
+ "asshat",
175
+ "asshats",
176
+ "asswipe",
177
+ "asswipes",
178
+ "assclown",
179
+ "assbag",
180
+ "asskisser",
181
+ "dumbass",
182
+ "dumbasses",
183
+ "jackass",
184
+ "jackasses",
185
+ "smartass",
186
+ "smartasses",
187
+ "badass",
188
+ "badasses",
189
+ "lazyass",
190
+ "fatass",
191
+ "hardass",
192
+ "halfass",
193
+ "halfassed",
194
+ "arse",
195
+ "arsed",
196
+ "arsehole",
197
+ "arseholes",
198
+ "arsewipe",
199
+ // bitch family
200
+ "bitch",
201
+ "bitches",
202
+ "bitched",
203
+ "bitching",
204
+ "bitchy",
205
+ "bitchier",
206
+ "bitchiest",
207
+ "sonofabitch",
208
+ "biatch",
209
+ "biotch",
210
+ // strong vulgarity
211
+ "cunt",
212
+ "cunts",
213
+ "cunty",
214
+ "cuntish",
215
+ "twat",
216
+ "twats",
217
+ "twatty",
218
+ "bastard",
219
+ "bastards",
220
+ // body-part insults
221
+ "dick",
222
+ "dicks",
223
+ "dickhead",
224
+ "dickheads",
225
+ "dickish",
226
+ "dickwad",
227
+ "dickwads",
228
+ "dickface",
229
+ "dickbag",
230
+ "prick",
231
+ "pricks",
232
+ "prickish",
233
+ "cock",
234
+ "cocks",
235
+ "cocky",
236
+ "cockier",
237
+ "cockiest",
238
+ "cockhead",
239
+ "cockblock",
240
+ "cocksucker",
241
+ "cocksuckers",
242
+ "knob",
243
+ "knobhead",
244
+ "knobheads",
245
+ "knobend",
246
+ "wanker",
247
+ "wankers",
248
+ "wankery",
249
+ "tosser",
250
+ "tossers",
251
+ "jerkoff",
252
+ "jerkoffs",
253
+ "douche",
254
+ "douches",
255
+ "douchebag",
256
+ "douchebags",
257
+ "douchey",
258
+ "scumbag",
259
+ "scumbags",
260
+ "scum",
261
+ "sleazebag",
262
+ "sleazeball",
263
+ "slimeball",
264
+ "lowlife",
265
+ "lowlifes",
266
+ "deadbeat",
267
+ // intelligence-based insults
268
+ "idiot",
269
+ "idiots",
270
+ "idiotic",
271
+ "idiocy",
272
+ "stupid",
273
+ "stupider",
274
+ "stupidest",
275
+ "stupidity",
276
+ "moron",
277
+ "morons",
278
+ "moronic",
279
+ "imbecile",
280
+ "imbeciles",
281
+ "retard",
282
+ "retards",
283
+ "retarded",
284
+ "dumb",
285
+ "dumber",
286
+ "dumbest",
287
+ "dumbo",
288
+ "dummy",
289
+ "dummies",
290
+ "fool",
291
+ "fools",
292
+ "foolish",
293
+ "foolery",
294
+ "clown",
295
+ "clowns",
296
+ "clownish",
297
+ "buffoon",
298
+ "buffoons",
299
+ "simpleton",
300
+ "halfwit",
301
+ "halfwits",
302
+ "nitwit",
303
+ "nitwits",
304
+ "dimwit",
305
+ "dimwits",
306
+ "dolt",
307
+ "dolts",
308
+ "doltish",
309
+ "knucklehead",
310
+ "knuckleheads",
311
+ "blockhead",
312
+ "blockheads",
313
+ "lamebrain",
314
+ "airhead",
315
+ "airheads",
316
+ "scatterbrain",
317
+ "numbnuts",
318
+ "numbskull",
319
+ "numpty",
320
+ "numpties",
321
+ "muppet",
322
+ "muppets",
323
+ "pillock",
324
+ "pillocks",
325
+ "plonker",
326
+ "plonkers",
327
+ "prat",
328
+ "prats",
329
+ "berk",
330
+ "berks",
331
+ "ninny",
332
+ "ninnies",
333
+ "dingbat",
334
+ "dingbats",
335
+ "putz",
336
+ "putzes",
337
+ "schmuck",
338
+ "schmucks",
339
+ "jerk",
340
+ "jerks",
341
+ "jerkface",
342
+ "git",
343
+ "gits",
344
+ "sod",
345
+ "sodding",
346
+ "bugger",
347
+ "buggered",
348
+ // generic aggression / dismissal
349
+ "hate",
350
+ "hated",
351
+ "hates",
352
+ "hating",
353
+ "hateful",
354
+ "suck",
355
+ "sucks",
356
+ "sucked",
357
+ "sucking",
358
+ "sucky",
359
+ "suckage",
360
+ "trash",
361
+ "trashy",
362
+ "trashed",
363
+ "garbage",
364
+ "crud",
365
+ "crudded",
366
+ // religious exclamations
367
+ "jesus",
368
+ "christ",
369
+ "jeez",
370
+ "jeezus",
371
+ "sheesh",
372
+ "holymoly",
373
+ "holyfuck",
374
+ "holysmokes",
375
+ "godsake",
376
+ // chat acronyms
377
+ "wtf",
378
+ "wth",
379
+ "wtaf",
380
+ "stfu",
381
+ "gtfo",
382
+ "omfg",
383
+ "omg",
384
+ "ffs",
385
+ "jfc",
386
+ "kys",
387
+ "fml",
388
+ "smh",
389
+ "smdh",
390
+ "smfh",
391
+ "idgaf",
392
+ "idfc",
393
+ "lmfao",
394
+ "fubar",
395
+ "snafu",
396
+ // frustration interjections
397
+ "ugh",
398
+ "ughh",
399
+ "ughhh",
400
+ "urgh",
401
+ "argh",
402
+ "arghh",
403
+ "arghhh",
404
+ "arrgh",
405
+ "blah",
406
+ "bleh",
407
+ "meh",
408
+ "yikes",
409
+ "yeesh",
410
+ "oof",
411
+ "gah",
412
+ "gahh",
413
+ "grr",
414
+ "grrr",
415
+ "grrrr",
416
+ ];
417
+
418
+ const PROFANITY_RE = new RegExp(`\\b(?:${PROFANITY.join("|")})\\b`, "gi");
419
+ const SENTENCE_RE = /[^.!?\n]+/g;
420
+ const LETTER_RE = /\p{L}/gu;
421
+ const UPPER_LETTER_RE = /\p{Lu}/gu;
422
+ const YELLING_MIN_LETTERS = 4;
423
+ const YELLING_THRESHOLD = 0.5;
424
+ // Runs starting with `!` or `?` followed by ≥2 of `!?1`. The `1` is the
425
+ // classic shift-key mishit ("!!!111" / "!?!??111") so we count those as
426
+ // part of the same drama burst.
427
+ const DRAMA_RE = /[!?][!?1]{2,}/g;
428
+ const WORD_RE = /\S+/g;
429
+
430
+ /** Count regex hits without materializing the match array. */
431
+ function countMatches(text: string, re: RegExp): number {
432
+ let count = 0;
433
+ re.lastIndex = 0;
434
+ while (re.exec(text) !== null) count++;
435
+ return count;
436
+ }
437
+
438
+ /**
439
+ * Count sentences where the share of uppercase letters exceeds
440
+ * {@link YELLING_THRESHOLD}. Sentences shorter than
441
+ * {@link YELLING_MIN_LETTERS} alphabetic characters are ignored so that
442
+ * short acronyms ("OK", "WIP", "TODO") don't register as yelling.
443
+ */
444
+ function countYellingSentences(text: string): number {
445
+ let count = 0;
446
+ SENTENCE_RE.lastIndex = 0;
447
+ let match: RegExpExecArray | null = SENTENCE_RE.exec(text);
448
+ while (match !== null) {
449
+ const sentence = match[0];
450
+ const letters = countMatches(sentence, LETTER_RE);
451
+ if (letters >= YELLING_MIN_LETTERS) {
452
+ const upper = countMatches(sentence, UPPER_LETTER_RE);
453
+ if (upper / letters > YELLING_THRESHOLD) count++;
454
+ }
455
+ match = SENTENCE_RE.exec(text);
456
+ }
457
+ return count;
458
+ }
459
+
460
+ /**
461
+ * Compute behavioral metrics for a user message.
462
+ *
463
+ * `text` may be empty or whitespace; in that case every metric is 0.
464
+ */
465
+ export function computeUserMessageMetrics(text: string): UserMessageMetrics {
466
+ const trimmed = text.trim();
467
+ if (!trimmed) {
468
+ return { chars: 0, words: 0, yellingSentences: 0, profanity: 0, dramaRuns: 0 };
469
+ }
470
+ return {
471
+ chars: trimmed.length,
472
+ words: countMatches(trimmed, WORD_RE),
473
+ yellingSentences: countYellingSentences(trimmed),
474
+ profanity: countMatches(trimmed, PROFANITY_RE),
475
+ dramaRuns: countMatches(trimmed, DRAMA_RE),
476
+ };
477
+ }
478
+
479
+ /** Empty metrics constant for callers that need a default. */
480
+ export const EMPTY_USER_METRICS: UserMessageMetrics = Object.freeze({
481
+ chars: 0,
482
+ words: 0,
483
+ yellingSentences: 0,
484
+ profanity: 0,
485
+ dramaRuns: 0,
486
+ });