@oh-my-pi/omp-stats 14.9.2 → 14.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/aggregator.ts +138 -11
- package/src/client/App.tsx +125 -30
- package/src/client/api.ts +35 -3
- package/src/client/components/BehaviorChart.tsx +367 -0
- package/src/client/components/BehaviorModelsTable.tsx +422 -0
- package/src/client/components/BehaviorSummary.tsx +75 -0
- package/src/client/components/CostChart.tsx +5 -38
- package/src/client/components/CostSummary.tsx +8 -47
- package/src/client/components/Header.tsx +28 -4
- package/src/client/components/StatsGrid.tsx +10 -1
- package/src/client/types.ts +54 -0
- package/src/db.ts +307 -26
- package/src/parser.ts +75 -4
- package/src/server.ts +30 -6
- package/src/types.ts +81 -0
- package/src/user-metrics.ts +486 -0
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Behavioral metrics extracted from a single user message.
|
|
3
|
+
*
|
|
4
|
+
* Pure, side-effect free. Designed for batch use during session ingestion
|
|
5
|
+
* and standalone testing.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export interface UserMessageMetrics {
|
|
9
|
+
/** Total characters of analyzed text. */
|
|
10
|
+
chars: number;
|
|
11
|
+
/** Whitespace-delimited word count. */
|
|
12
|
+
words: number;
|
|
13
|
+
/**
|
|
14
|
+
* Number of "yelling" sentences: sentences where more than half of the
|
|
15
|
+
* alphabetic characters are uppercase (and there are enough letters to
|
|
16
|
+
* make the ratio meaningful — short acronyms like "OK" don't count).
|
|
17
|
+
*/
|
|
18
|
+
yellingSentences: number;
|
|
19
|
+
/** Profanity hits (word-boundary, case-insensitive). */
|
|
20
|
+
profanity: number;
|
|
21
|
+
/** Runs of 3+ `!` / `?` characters (including `1`-mishit fallout). */
|
|
22
|
+
dramaRuns: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Words considered profane/aggressive. Word-boundary, case-insensitive.
|
|
27
|
+
*
|
|
28
|
+
* Broad English coverage: f-/s-word families and their censored variants,
|
|
29
|
+
* mild swears, intelligence-based insults, body-part epithets, British/
|
|
30
|
+
* Australian/Irish slang, religious exclamations, chat acronyms, and
|
|
31
|
+
* frustration interjections. Curated to exclude racial, homophobic, and
|
|
32
|
+
* other identity slurs.
|
|
33
|
+
*/
|
|
34
|
+
const PROFANITY: readonly string[] = [
|
|
35
|
+
// f-word family
|
|
36
|
+
"fuck",
|
|
37
|
+
"fucks",
|
|
38
|
+
"fucked",
|
|
39
|
+
"fucking",
|
|
40
|
+
"fuckin",
|
|
41
|
+
"fucker",
|
|
42
|
+
"fuckers",
|
|
43
|
+
"fuckup",
|
|
44
|
+
"fuckups",
|
|
45
|
+
"fuckhead",
|
|
46
|
+
"fuckheads",
|
|
47
|
+
"fuckface",
|
|
48
|
+
"fuckwit",
|
|
49
|
+
"fuckwits",
|
|
50
|
+
"fucktard",
|
|
51
|
+
"fuckery",
|
|
52
|
+
"fuckoff",
|
|
53
|
+
"motherfucker",
|
|
54
|
+
"motherfuckers",
|
|
55
|
+
"motherfucking",
|
|
56
|
+
"clusterfuck",
|
|
57
|
+
"ratfuck",
|
|
58
|
+
"unfuck",
|
|
59
|
+
// censored / euphemistic f-word
|
|
60
|
+
"fk",
|
|
61
|
+
"fks",
|
|
62
|
+
"fking",
|
|
63
|
+
"fkin",
|
|
64
|
+
"fker",
|
|
65
|
+
"fck",
|
|
66
|
+
"fcks",
|
|
67
|
+
"fcking",
|
|
68
|
+
"fckin",
|
|
69
|
+
"fcker",
|
|
70
|
+
"fuk",
|
|
71
|
+
"fuking",
|
|
72
|
+
"fukin",
|
|
73
|
+
"eff",
|
|
74
|
+
"effs",
|
|
75
|
+
"effed",
|
|
76
|
+
"effing",
|
|
77
|
+
"frick",
|
|
78
|
+
"fricks",
|
|
79
|
+
"fricked",
|
|
80
|
+
"fricking",
|
|
81
|
+
"frickin",
|
|
82
|
+
"freaking",
|
|
83
|
+
"freakin",
|
|
84
|
+
"freaked",
|
|
85
|
+
// s-word family
|
|
86
|
+
"shit",
|
|
87
|
+
"shits",
|
|
88
|
+
"shat",
|
|
89
|
+
"shitty",
|
|
90
|
+
"shittier",
|
|
91
|
+
"shittiest",
|
|
92
|
+
"shite",
|
|
93
|
+
"shites",
|
|
94
|
+
"shited",
|
|
95
|
+
"shitting",
|
|
96
|
+
"shitter",
|
|
97
|
+
"shitters",
|
|
98
|
+
"shithead",
|
|
99
|
+
"shitheads",
|
|
100
|
+
"shitshow",
|
|
101
|
+
"shitstorm",
|
|
102
|
+
"shitstain",
|
|
103
|
+
"shitfaced",
|
|
104
|
+
"shitload",
|
|
105
|
+
"shitbag",
|
|
106
|
+
"shitcan",
|
|
107
|
+
"shitcanned",
|
|
108
|
+
"shitpost",
|
|
109
|
+
"shitposting",
|
|
110
|
+
"bullshit",
|
|
111
|
+
"bullshits",
|
|
112
|
+
"bullshitting",
|
|
113
|
+
"bullshitter",
|
|
114
|
+
"horseshit",
|
|
115
|
+
"batshit",
|
|
116
|
+
"dogshit",
|
|
117
|
+
"dipshit",
|
|
118
|
+
"jackshit",
|
|
119
|
+
"dumbshit",
|
|
120
|
+
"holyshit",
|
|
121
|
+
// mild swears
|
|
122
|
+
"damn",
|
|
123
|
+
"damns",
|
|
124
|
+
"damned",
|
|
125
|
+
"damning",
|
|
126
|
+
"dammit",
|
|
127
|
+
"goddamn",
|
|
128
|
+
"goddamned",
|
|
129
|
+
"goddamnit",
|
|
130
|
+
"goddammit",
|
|
131
|
+
"darn",
|
|
132
|
+
"darns",
|
|
133
|
+
"darned",
|
|
134
|
+
"darnit",
|
|
135
|
+
"dang",
|
|
136
|
+
"danged",
|
|
137
|
+
"dangit",
|
|
138
|
+
"hell",
|
|
139
|
+
"hells",
|
|
140
|
+
"heck",
|
|
141
|
+
"hecks",
|
|
142
|
+
"heckin",
|
|
143
|
+
"gosh",
|
|
144
|
+
"blast",
|
|
145
|
+
"blasted",
|
|
146
|
+
"bloody",
|
|
147
|
+
"bollocks",
|
|
148
|
+
"bollox",
|
|
149
|
+
// crap family
|
|
150
|
+
"crap",
|
|
151
|
+
"craps",
|
|
152
|
+
"crappy",
|
|
153
|
+
"crappier",
|
|
154
|
+
"crappiest",
|
|
155
|
+
"crapped",
|
|
156
|
+
"crapping",
|
|
157
|
+
"crapload",
|
|
158
|
+
"crapshoot",
|
|
159
|
+
"crapola",
|
|
160
|
+
// piss family
|
|
161
|
+
"piss",
|
|
162
|
+
"pisses",
|
|
163
|
+
"pissed",
|
|
164
|
+
"pissing",
|
|
165
|
+
"pisser",
|
|
166
|
+
"pisspoor",
|
|
167
|
+
"pisstake",
|
|
168
|
+
"pisshead",
|
|
169
|
+
// ass family
|
|
170
|
+
"ass",
|
|
171
|
+
"asses",
|
|
172
|
+
"asshole",
|
|
173
|
+
"assholes",
|
|
174
|
+
"asshat",
|
|
175
|
+
"asshats",
|
|
176
|
+
"asswipe",
|
|
177
|
+
"asswipes",
|
|
178
|
+
"assclown",
|
|
179
|
+
"assbag",
|
|
180
|
+
"asskisser",
|
|
181
|
+
"dumbass",
|
|
182
|
+
"dumbasses",
|
|
183
|
+
"jackass",
|
|
184
|
+
"jackasses",
|
|
185
|
+
"smartass",
|
|
186
|
+
"smartasses",
|
|
187
|
+
"badass",
|
|
188
|
+
"badasses",
|
|
189
|
+
"lazyass",
|
|
190
|
+
"fatass",
|
|
191
|
+
"hardass",
|
|
192
|
+
"halfass",
|
|
193
|
+
"halfassed",
|
|
194
|
+
"arse",
|
|
195
|
+
"arsed",
|
|
196
|
+
"arsehole",
|
|
197
|
+
"arseholes",
|
|
198
|
+
"arsewipe",
|
|
199
|
+
// bitch family
|
|
200
|
+
"bitch",
|
|
201
|
+
"bitches",
|
|
202
|
+
"bitched",
|
|
203
|
+
"bitching",
|
|
204
|
+
"bitchy",
|
|
205
|
+
"bitchier",
|
|
206
|
+
"bitchiest",
|
|
207
|
+
"sonofabitch",
|
|
208
|
+
"biatch",
|
|
209
|
+
"biotch",
|
|
210
|
+
// strong vulgarity
|
|
211
|
+
"cunt",
|
|
212
|
+
"cunts",
|
|
213
|
+
"cunty",
|
|
214
|
+
"cuntish",
|
|
215
|
+
"twat",
|
|
216
|
+
"twats",
|
|
217
|
+
"twatty",
|
|
218
|
+
"bastard",
|
|
219
|
+
"bastards",
|
|
220
|
+
// body-part insults
|
|
221
|
+
"dick",
|
|
222
|
+
"dicks",
|
|
223
|
+
"dickhead",
|
|
224
|
+
"dickheads",
|
|
225
|
+
"dickish",
|
|
226
|
+
"dickwad",
|
|
227
|
+
"dickwads",
|
|
228
|
+
"dickface",
|
|
229
|
+
"dickbag",
|
|
230
|
+
"prick",
|
|
231
|
+
"pricks",
|
|
232
|
+
"prickish",
|
|
233
|
+
"cock",
|
|
234
|
+
"cocks",
|
|
235
|
+
"cocky",
|
|
236
|
+
"cockier",
|
|
237
|
+
"cockiest",
|
|
238
|
+
"cockhead",
|
|
239
|
+
"cockblock",
|
|
240
|
+
"cocksucker",
|
|
241
|
+
"cocksuckers",
|
|
242
|
+
"knob",
|
|
243
|
+
"knobhead",
|
|
244
|
+
"knobheads",
|
|
245
|
+
"knobend",
|
|
246
|
+
"wanker",
|
|
247
|
+
"wankers",
|
|
248
|
+
"wankery",
|
|
249
|
+
"tosser",
|
|
250
|
+
"tossers",
|
|
251
|
+
"jerkoff",
|
|
252
|
+
"jerkoffs",
|
|
253
|
+
"douche",
|
|
254
|
+
"douches",
|
|
255
|
+
"douchebag",
|
|
256
|
+
"douchebags",
|
|
257
|
+
"douchey",
|
|
258
|
+
"scumbag",
|
|
259
|
+
"scumbags",
|
|
260
|
+
"scum",
|
|
261
|
+
"sleazebag",
|
|
262
|
+
"sleazeball",
|
|
263
|
+
"slimeball",
|
|
264
|
+
"lowlife",
|
|
265
|
+
"lowlifes",
|
|
266
|
+
"deadbeat",
|
|
267
|
+
// intelligence-based insults
|
|
268
|
+
"idiot",
|
|
269
|
+
"idiots",
|
|
270
|
+
"idiotic",
|
|
271
|
+
"idiocy",
|
|
272
|
+
"stupid",
|
|
273
|
+
"stupider",
|
|
274
|
+
"stupidest",
|
|
275
|
+
"stupidity",
|
|
276
|
+
"moron",
|
|
277
|
+
"morons",
|
|
278
|
+
"moronic",
|
|
279
|
+
"imbecile",
|
|
280
|
+
"imbeciles",
|
|
281
|
+
"retard",
|
|
282
|
+
"retards",
|
|
283
|
+
"retarded",
|
|
284
|
+
"dumb",
|
|
285
|
+
"dumber",
|
|
286
|
+
"dumbest",
|
|
287
|
+
"dumbo",
|
|
288
|
+
"dummy",
|
|
289
|
+
"dummies",
|
|
290
|
+
"fool",
|
|
291
|
+
"fools",
|
|
292
|
+
"foolish",
|
|
293
|
+
"foolery",
|
|
294
|
+
"clown",
|
|
295
|
+
"clowns",
|
|
296
|
+
"clownish",
|
|
297
|
+
"buffoon",
|
|
298
|
+
"buffoons",
|
|
299
|
+
"simpleton",
|
|
300
|
+
"halfwit",
|
|
301
|
+
"halfwits",
|
|
302
|
+
"nitwit",
|
|
303
|
+
"nitwits",
|
|
304
|
+
"dimwit",
|
|
305
|
+
"dimwits",
|
|
306
|
+
"dolt",
|
|
307
|
+
"dolts",
|
|
308
|
+
"doltish",
|
|
309
|
+
"knucklehead",
|
|
310
|
+
"knuckleheads",
|
|
311
|
+
"blockhead",
|
|
312
|
+
"blockheads",
|
|
313
|
+
"lamebrain",
|
|
314
|
+
"airhead",
|
|
315
|
+
"airheads",
|
|
316
|
+
"scatterbrain",
|
|
317
|
+
"numbnuts",
|
|
318
|
+
"numbskull",
|
|
319
|
+
"numpty",
|
|
320
|
+
"numpties",
|
|
321
|
+
"muppet",
|
|
322
|
+
"muppets",
|
|
323
|
+
"pillock",
|
|
324
|
+
"pillocks",
|
|
325
|
+
"plonker",
|
|
326
|
+
"plonkers",
|
|
327
|
+
"prat",
|
|
328
|
+
"prats",
|
|
329
|
+
"berk",
|
|
330
|
+
"berks",
|
|
331
|
+
"ninny",
|
|
332
|
+
"ninnies",
|
|
333
|
+
"dingbat",
|
|
334
|
+
"dingbats",
|
|
335
|
+
"putz",
|
|
336
|
+
"putzes",
|
|
337
|
+
"schmuck",
|
|
338
|
+
"schmucks",
|
|
339
|
+
"jerk",
|
|
340
|
+
"jerks",
|
|
341
|
+
"jerkface",
|
|
342
|
+
"git",
|
|
343
|
+
"gits",
|
|
344
|
+
"sod",
|
|
345
|
+
"sodding",
|
|
346
|
+
"bugger",
|
|
347
|
+
"buggered",
|
|
348
|
+
// generic aggression / dismissal
|
|
349
|
+
"hate",
|
|
350
|
+
"hated",
|
|
351
|
+
"hates",
|
|
352
|
+
"hating",
|
|
353
|
+
"hateful",
|
|
354
|
+
"suck",
|
|
355
|
+
"sucks",
|
|
356
|
+
"sucked",
|
|
357
|
+
"sucking",
|
|
358
|
+
"sucky",
|
|
359
|
+
"suckage",
|
|
360
|
+
"trash",
|
|
361
|
+
"trashy",
|
|
362
|
+
"trashed",
|
|
363
|
+
"garbage",
|
|
364
|
+
"crud",
|
|
365
|
+
"crudded",
|
|
366
|
+
// religious exclamations
|
|
367
|
+
"jesus",
|
|
368
|
+
"christ",
|
|
369
|
+
"jeez",
|
|
370
|
+
"jeezus",
|
|
371
|
+
"sheesh",
|
|
372
|
+
"holymoly",
|
|
373
|
+
"holyfuck",
|
|
374
|
+
"holysmokes",
|
|
375
|
+
"godsake",
|
|
376
|
+
// chat acronyms
|
|
377
|
+
"wtf",
|
|
378
|
+
"wth",
|
|
379
|
+
"wtaf",
|
|
380
|
+
"stfu",
|
|
381
|
+
"gtfo",
|
|
382
|
+
"omfg",
|
|
383
|
+
"omg",
|
|
384
|
+
"ffs",
|
|
385
|
+
"jfc",
|
|
386
|
+
"kys",
|
|
387
|
+
"fml",
|
|
388
|
+
"smh",
|
|
389
|
+
"smdh",
|
|
390
|
+
"smfh",
|
|
391
|
+
"idgaf",
|
|
392
|
+
"idfc",
|
|
393
|
+
"lmfao",
|
|
394
|
+
"fubar",
|
|
395
|
+
"snafu",
|
|
396
|
+
// frustration interjections
|
|
397
|
+
"ugh",
|
|
398
|
+
"ughh",
|
|
399
|
+
"ughhh",
|
|
400
|
+
"urgh",
|
|
401
|
+
"argh",
|
|
402
|
+
"arghh",
|
|
403
|
+
"arghhh",
|
|
404
|
+
"arrgh",
|
|
405
|
+
"blah",
|
|
406
|
+
"bleh",
|
|
407
|
+
"meh",
|
|
408
|
+
"yikes",
|
|
409
|
+
"yeesh",
|
|
410
|
+
"oof",
|
|
411
|
+
"gah",
|
|
412
|
+
"gahh",
|
|
413
|
+
"grr",
|
|
414
|
+
"grrr",
|
|
415
|
+
"grrrr",
|
|
416
|
+
];
|
|
417
|
+
|
|
418
|
+
const PROFANITY_RE = new RegExp(`\\b(?:${PROFANITY.join("|")})\\b`, "gi");
|
|
419
|
+
const SENTENCE_RE = /[^.!?\n]+/g;
|
|
420
|
+
const LETTER_RE = /\p{L}/gu;
|
|
421
|
+
const UPPER_LETTER_RE = /\p{Lu}/gu;
|
|
422
|
+
const YELLING_MIN_LETTERS = 4;
|
|
423
|
+
const YELLING_THRESHOLD = 0.5;
|
|
424
|
+
// Runs starting with `!` or `?` followed by ≥2 of `!?1`. The `1` is the
|
|
425
|
+
// classic shift-key mishit ("!!!111" / "!?!??111") so we count those as
|
|
426
|
+
// part of the same drama burst.
|
|
427
|
+
const DRAMA_RE = /[!?][!?1]{2,}/g;
|
|
428
|
+
const WORD_RE = /\S+/g;
|
|
429
|
+
|
|
430
|
+
/** Count regex hits without materializing the match array. */
|
|
431
|
+
function countMatches(text: string, re: RegExp): number {
|
|
432
|
+
let count = 0;
|
|
433
|
+
re.lastIndex = 0;
|
|
434
|
+
while (re.exec(text) !== null) count++;
|
|
435
|
+
return count;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Count sentences where the share of uppercase letters exceeds
|
|
440
|
+
* {@link YELLING_THRESHOLD}. Sentences shorter than
|
|
441
|
+
* {@link YELLING_MIN_LETTERS} alphabetic characters are ignored so that
|
|
442
|
+
* short acronyms ("OK", "WIP", "TODO") don't register as yelling.
|
|
443
|
+
*/
|
|
444
|
+
function countYellingSentences(text: string): number {
|
|
445
|
+
let count = 0;
|
|
446
|
+
SENTENCE_RE.lastIndex = 0;
|
|
447
|
+
let match: RegExpExecArray | null = SENTENCE_RE.exec(text);
|
|
448
|
+
while (match !== null) {
|
|
449
|
+
const sentence = match[0];
|
|
450
|
+
const letters = countMatches(sentence, LETTER_RE);
|
|
451
|
+
if (letters >= YELLING_MIN_LETTERS) {
|
|
452
|
+
const upper = countMatches(sentence, UPPER_LETTER_RE);
|
|
453
|
+
if (upper / letters > YELLING_THRESHOLD) count++;
|
|
454
|
+
}
|
|
455
|
+
match = SENTENCE_RE.exec(text);
|
|
456
|
+
}
|
|
457
|
+
return count;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Compute behavioral metrics for a user message.
|
|
462
|
+
*
|
|
463
|
+
* `text` may be empty or whitespace; in that case every metric is 0.
|
|
464
|
+
*/
|
|
465
|
+
export function computeUserMessageMetrics(text: string): UserMessageMetrics {
|
|
466
|
+
const trimmed = text.trim();
|
|
467
|
+
if (!trimmed) {
|
|
468
|
+
return { chars: 0, words: 0, yellingSentences: 0, profanity: 0, dramaRuns: 0 };
|
|
469
|
+
}
|
|
470
|
+
return {
|
|
471
|
+
chars: trimmed.length,
|
|
472
|
+
words: countMatches(trimmed, WORD_RE),
|
|
473
|
+
yellingSentences: countYellingSentences(trimmed),
|
|
474
|
+
profanity: countMatches(trimmed, PROFANITY_RE),
|
|
475
|
+
dramaRuns: countMatches(trimmed, DRAMA_RE),
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/** Empty metrics constant for callers that need a default. */
|
|
480
|
+
export const EMPTY_USER_METRICS: UserMessageMetrics = Object.freeze({
|
|
481
|
+
chars: 0,
|
|
482
|
+
words: 0,
|
|
483
|
+
yellingSentences: 0,
|
|
484
|
+
profanity: 0,
|
|
485
|
+
dramaRuns: 0,
|
|
486
|
+
});
|