@gajae-code/stats 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +82 -0
  2. package/build.ts +84 -0
  3. package/dist/client/index.css +1 -0
  4. package/dist/client/index.html +13 -0
  5. package/dist/client/index.js +257 -0
  6. package/dist/client/styles.css +1159 -0
  7. package/dist/types/aggregator.d.ts +65 -0
  8. package/dist/types/client/App.d.ts +1 -0
  9. package/dist/types/client/api.d.ts +10 -0
  10. package/dist/types/client/components/BehaviorChart.d.ts +6 -0
  11. package/dist/types/client/components/BehaviorModelsTable.d.ts +7 -0
  12. package/dist/types/client/components/BehaviorSummary.d.ts +7 -0
  13. package/dist/types/client/components/ChartsContainer.d.ts +7 -0
  14. package/dist/types/client/components/CostChart.d.ts +6 -0
  15. package/dist/types/client/components/CostSummary.d.ts +6 -0
  16. package/dist/types/client/components/Header.d.ts +12 -0
  17. package/dist/types/client/components/ModelsTable.d.ts +8 -0
  18. package/dist/types/client/components/RequestDetail.d.ts +6 -0
  19. package/dist/types/client/components/RequestList.d.ts +8 -0
  20. package/dist/types/client/components/StatsGrid.d.ts +6 -0
  21. package/dist/types/client/components/chart-shared.d.ts +187 -0
  22. package/dist/types/client/components/models-table-shared.d.ts +195 -0
  23. package/dist/types/client/components/range-meta.d.ts +21 -0
  24. package/dist/types/client/index.d.ts +1 -0
  25. package/dist/types/client/types.d.ts +62 -0
  26. package/dist/types/client/useSystemTheme.d.ts +2 -0
  27. package/dist/types/db.d.ts +93 -0
  28. package/dist/types/index.d.ts +5 -0
  29. package/dist/types/parser.d.ts +40 -0
  30. package/dist/types/server.d.ts +7 -0
  31. package/dist/types/shared-types.d.ts +192 -0
  32. package/dist/types/sync-worker.d.ts +31 -0
  33. package/dist/types/types.d.ts +120 -0
  34. package/dist/types/user-metrics.d.ts +72 -0
  35. package/package.json +91 -0
  36. package/src/aggregator.ts +454 -0
  37. package/src/client/App.tsx +221 -0
  38. package/src/client/api.ts +65 -0
  39. package/src/client/components/BehaviorChart.tsx +189 -0
  40. package/src/client/components/BehaviorModelsTable.tsx +342 -0
  41. package/src/client/components/BehaviorSummary.tsx +95 -0
  42. package/src/client/components/ChartsContainer.tsx +221 -0
  43. package/src/client/components/CostChart.tsx +171 -0
  44. package/src/client/components/CostSummary.tsx +53 -0
  45. package/src/client/components/Header.tsx +72 -0
  46. package/src/client/components/ModelsTable.tsx +265 -0
  47. package/src/client/components/RequestDetail.tsx +172 -0
  48. package/src/client/components/RequestList.tsx +73 -0
  49. package/src/client/components/StatsGrid.tsx +135 -0
  50. package/src/client/components/chart-shared.tsx +320 -0
  51. package/src/client/components/models-table-shared.tsx +275 -0
  52. package/src/client/components/range-meta.ts +72 -0
  53. package/src/client/css.d.ts +1 -0
  54. package/src/client/index.tsx +6 -0
  55. package/src/client/styles.css +306 -0
  56. package/src/client/types.ts +78 -0
  57. package/src/client/useSystemTheme.ts +31 -0
  58. package/src/db.ts +1100 -0
  59. package/src/embedded-client.generated.txt +7 -0
  60. package/src/index.ts +182 -0
  61. package/src/parser.ts +334 -0
  62. package/src/server.ts +325 -0
  63. package/src/shared-types.ts +204 -0
  64. package/src/sync-worker.ts +40 -0
  65. package/src/types.ts +125 -0
  66. package/src/user-metrics.ts +686 -0
  67. package/tailwind.config.js +40 -0
@@ -0,0 +1,686 @@
1
+ /**
2
+ * Behavioral metrics extracted from a single user message.
3
+ *
4
+ * Pure, side-effect free. Designed for batch use during session ingestion
5
+ * and standalone testing.
6
+ */
7
+
8
+ export interface UserMessageMetrics {
9
+ /** Total characters of analyzed text. */
10
+ chars: number;
11
+ /** Whitespace-delimited word count. */
12
+ words: number;
13
+ /**
14
+ * Number of "yelling" sentences: sentences where more than half of the
15
+ * alphabetic characters are uppercase (and there are enough letters to
16
+ * make the ratio meaningful - short acronyms like "OK" don't count).
17
+ */
18
+ yelling: number;
19
+ /** Profanity hits (word-boundary, case-insensitive). */
20
+ profanity: number;
21
+ /**
22
+ * Catch-all "obviously upset" signal computed on a *prose-only* body
23
+ * (code fences, XML/HTML tags, URLs, file mentions, and quoted lines
24
+ * are stripped first; messages whose remaining prose is >=3 lines score
25
+ * zero because formatted prompts aren't tantrums).
26
+ *
27
+ * Sum of:
28
+ * - drama runs: 3+ `!` / `?` (with `1`-mishit fallout)
29
+ * - elongated interjections: `noooo`, `ahhhh`, `ughhh`, `argh`, `stooop`,
30
+ * `whyyy`, `fuuu(ck)`, `shiiit`, `wtfff`, `omggg`, `yessss`, `helpp`,
31
+ * `goddd`, `dammm`, `bruhh`
32
+ * - standalone `dude`
33
+ * - dot runs: `..`, `...`, `....+`
34
+ */
35
+ anguish: number;
36
+ /**
37
+ * Corrective negation: the user is telling us we got it wrong.
38
+ *
39
+ * Counted on the same prose-only body as {@link anguish}.
40
+ *
41
+ * - line-leading `no` / `nope` / `nah` / `nvm` / `wrong` / `incorrect`
42
+ * (word-bounded, so `now`, `nobody`, `north` don't match)
43
+ * - `that(?:'s)? not (what|right|it)` and `not what i (meant|asked|said|wanted)`
44
+ */
45
+ negation: number;
46
+ /**
47
+ * The user is repeating themselves - strong signal the previous turn
48
+ * missed the ask. Counts hits for:
49
+ *
50
+ * - `i (meant|said|asked|told you|already (said|told|did|asked|wrote))`
51
+ * - `(like|as) i (said|told you|asked)`
52
+ * - `still (doesn't|isn't|not|broken|wrong|fails|failing|the same|same)`
53
+ *
54
+ * Bare `still` / `again` are too ambiguous to count alone (they show up
55
+ * in normal speech like "try again" or "still works").
56
+ */
57
+ repetition: number;
58
+ /**
59
+ * Direct second-person reproach pinned on the agent:
60
+ *
61
+ * - `you (didn't|did not|broke|missed|forgot|keep|always|never|still|ignored)`
62
+ * - sentence-leading `stop <verb>ing` imperatives
63
+ */
64
+ blame: number;
65
+ }
66
+
67
+ /**
68
+ * Words considered profane/aggressive. Word-boundary, case-insensitive.
69
+ *
70
+ * Broad English coverage: f-/s-word families and their censored variants,
71
+ * mild swears, intelligence-based insults, body-part epithets, British/
72
+ * Australian/Irish slang, religious exclamations, chat acronyms, and
73
+ * frustration interjections. Curated to exclude racial, homophobic, and
74
+ * other identity slurs.
75
+ */
76
+ const PROFANITY: readonly string[] = [
77
+ // f-word family
78
+ "fuck",
79
+ "fucks",
80
+ "fucked",
81
+ "fucking",
82
+ "fuckin",
83
+ "fucker",
84
+ "fuckers",
85
+ "fuckup",
86
+ "fuckups",
87
+ "fuckhead",
88
+ "fuckheads",
89
+ "fuckface",
90
+ "fuckwit",
91
+ "fuckwits",
92
+ "fucktard",
93
+ "fuckery",
94
+ "fuckoff",
95
+ "motherfucker",
96
+ "motherfuckers",
97
+ "motherfucking",
98
+ "clusterfuck",
99
+ "ratfuck",
100
+ "unfuck",
101
+ // censored / euphemistic f-word
102
+ "fk",
103
+ "fks",
104
+ "fking",
105
+ "fkin",
106
+ "fker",
107
+ "fck",
108
+ "fcks",
109
+ "fcking",
110
+ "fckin",
111
+ "fcker",
112
+ "fuk",
113
+ "fuking",
114
+ "fukin",
115
+ "eff",
116
+ "effs",
117
+ "effed",
118
+ "effing",
119
+ "frick",
120
+ "fricks",
121
+ "fricked",
122
+ "fricking",
123
+ "frickin",
124
+ "freaking",
125
+ "freakin",
126
+ "freaked",
127
+ // s-word family
128
+ "shit",
129
+ "shits",
130
+ "shat",
131
+ "shitty",
132
+ "shittier",
133
+ "shittiest",
134
+ "shite",
135
+ "shites",
136
+ "shited",
137
+ "shitting",
138
+ "shitter",
139
+ "shitters",
140
+ "shithead",
141
+ "shitheads",
142
+ "shitshow",
143
+ "shitstorm",
144
+ "shitstain",
145
+ "shitfaced",
146
+ "shitload",
147
+ "shitbag",
148
+ "shitcan",
149
+ "shitcanned",
150
+ "shitpost",
151
+ "shitposting",
152
+ "bullshit",
153
+ "bullshits",
154
+ "bullshitting",
155
+ "bullshitter",
156
+ "horseshit",
157
+ "batshit",
158
+ "dogshit",
159
+ "dipshit",
160
+ "jackshit",
161
+ "dumbshit",
162
+ "holyshit",
163
+ // mild swears
164
+ "damn",
165
+ "damns",
166
+ "damned",
167
+ "damning",
168
+ "dammit",
169
+ "goddamn",
170
+ "goddamned",
171
+ "goddamnit",
172
+ "goddammit",
173
+ "darn",
174
+ "darns",
175
+ "darned",
176
+ "darnit",
177
+ "dang",
178
+ "danged",
179
+ "dangit",
180
+ "hell",
181
+ "hells",
182
+ "heck",
183
+ "hecks",
184
+ "heckin",
185
+ "gosh",
186
+ "blast",
187
+ "blasted",
188
+ "bloody",
189
+ "bollocks",
190
+ "bollox",
191
+ // crap family
192
+ "crap",
193
+ "craps",
194
+ "crappy",
195
+ "crappier",
196
+ "crappiest",
197
+ "crapped",
198
+ "crapping",
199
+ "crapload",
200
+ "crapshoot",
201
+ "crapola",
202
+ // piss family
203
+ "piss",
204
+ "pisses",
205
+ "pissed",
206
+ "pissing",
207
+ "pisser",
208
+ "pisspoor",
209
+ "pisstake",
210
+ "pisshead",
211
+ // ass family
212
+ "ass",
213
+ "asses",
214
+ "asshole",
215
+ "assholes",
216
+ "asshat",
217
+ "asshats",
218
+ "asswipe",
219
+ "asswipes",
220
+ "assclown",
221
+ "assbag",
222
+ "asskisser",
223
+ "dumbass",
224
+ "dumbasses",
225
+ "jackass",
226
+ "jackasses",
227
+ "smartass",
228
+ "smartasses",
229
+ "badass",
230
+ "badasses",
231
+ "lazyass",
232
+ "fatass",
233
+ "hardass",
234
+ "halfass",
235
+ "halfassed",
236
+ "arse",
237
+ "arsed",
238
+ "arsehole",
239
+ "arseholes",
240
+ "arsewipe",
241
+ // bitch family
242
+ "bitch",
243
+ "bitches",
244
+ "bitched",
245
+ "bitching",
246
+ "bitchy",
247
+ "bitchier",
248
+ "bitchiest",
249
+ "sonofabitch",
250
+ "biatch",
251
+ "biotch",
252
+ // strong vulgarity
253
+ "cunt",
254
+ "cunts",
255
+ "cunty",
256
+ "cuntish",
257
+ "twat",
258
+ "twats",
259
+ "twatty",
260
+ "bastard",
261
+ "bastards",
262
+ // body-part insults
263
+ "dick",
264
+ "dicks",
265
+ "dickhead",
266
+ "dickheads",
267
+ "dickish",
268
+ "dickwad",
269
+ "dickwads",
270
+ "dickface",
271
+ "dickbag",
272
+ "prick",
273
+ "pricks",
274
+ "prickish",
275
+ "cock",
276
+ "cocks",
277
+ "cocky",
278
+ "cockier",
279
+ "cockiest",
280
+ "cockhead",
281
+ "cockblock",
282
+ "cocksucker",
283
+ "cocksuckers",
284
+ "knob",
285
+ "knobhead",
286
+ "knobheads",
287
+ "knobend",
288
+ "wanker",
289
+ "wankers",
290
+ "wankery",
291
+ "tosser",
292
+ "tossers",
293
+ "jerkoff",
294
+ "jerkoffs",
295
+ "douche",
296
+ "douches",
297
+ "douchebag",
298
+ "douchebags",
299
+ "douchey",
300
+ "scumbag",
301
+ "scumbags",
302
+ "scum",
303
+ "sleazebag",
304
+ "sleazeball",
305
+ "slimeball",
306
+ "lowlife",
307
+ "lowlifes",
308
+ "deadbeat",
309
+ // intelligence-based insults
310
+ "idiot",
311
+ "idiots",
312
+ "idiotic",
313
+ "idiocy",
314
+ "stupid",
315
+ "stupider",
316
+ "stupidest",
317
+ "stupidity",
318
+ "moron",
319
+ "morons",
320
+ "moronic",
321
+ "imbecile",
322
+ "imbeciles",
323
+ "retard",
324
+ "retards",
325
+ "retarded",
326
+ "dumb",
327
+ "dumber",
328
+ "dumbest",
329
+ "dumbo",
330
+ "dummy",
331
+ "dummies",
332
+ "fool",
333
+ "fools",
334
+ "foolish",
335
+ "foolery",
336
+ "clown",
337
+ "clowns",
338
+ "clownish",
339
+ "buffoon",
340
+ "buffoons",
341
+ "simpleton",
342
+ "halfwit",
343
+ "halfwits",
344
+ "nitwit",
345
+ "nitwits",
346
+ "dimwit",
347
+ "dimwits",
348
+ "dolt",
349
+ "dolts",
350
+ "doltish",
351
+ "knucklehead",
352
+ "knuckleheads",
353
+ "blockhead",
354
+ "blockheads",
355
+ "lamebrain",
356
+ "airhead",
357
+ "airheads",
358
+ "scatterbrain",
359
+ "numbnuts",
360
+ "numbskull",
361
+ "numpty",
362
+ "numpties",
363
+ "muppet",
364
+ "muppets",
365
+ "pillock",
366
+ "pillocks",
367
+ "plonker",
368
+ "plonkers",
369
+ "prat",
370
+ "prats",
371
+ "berk",
372
+ "berks",
373
+ "ninny",
374
+ "ninnies",
375
+ "dingbat",
376
+ "dingbats",
377
+ "putz",
378
+ "putzes",
379
+ "schmuck",
380
+ "schmucks",
381
+ "jerk",
382
+ "jerks",
383
+ "jerkface",
384
+ "git",
385
+ "gits",
386
+ "sod",
387
+ "sodding",
388
+ "bugger",
389
+ "buggered",
390
+ // generic aggression / dismissal
391
+ "hate",
392
+ "hated",
393
+ "hates",
394
+ "hating",
395
+ "hateful",
396
+ "suck",
397
+ "sucks",
398
+ "sucked",
399
+ "sucking",
400
+ "sucky",
401
+ "suckage",
402
+ "trash",
403
+ "trashy",
404
+ "trashed",
405
+ "garbage",
406
+ "crud",
407
+ "crudded",
408
+ // quality-dismissal ("this is garbage / pointless")
409
+ "useless",
410
+ "pointless",
411
+ "horrible",
412
+ "awful",
413
+ "worthless",
414
+ "ridiculous",
415
+ "nonsense",
416
+ // religious exclamations
417
+ "jesus",
418
+ "christ",
419
+ "jeez",
420
+ "jeezus",
421
+ "sheesh",
422
+ "godsake",
423
+ // chat acronyms
424
+ "wtf",
425
+ "wth",
426
+ "wtaf",
427
+ "stfu",
428
+ "gtfo",
429
+ "omfg",
430
+ "omg",
431
+ "ffs",
432
+ "jfc",
433
+ "kys",
434
+ "fml",
435
+ "smh",
436
+ "smdh",
437
+ "smfh",
438
+ "idgaf",
439
+ "idfc",
440
+ "lmfao",
441
+ "fubar",
442
+ "snafu",
443
+ // frustration interjections
444
+ "ugh",
445
+ "ughh",
446
+ "ughhh",
447
+ "urgh",
448
+ "argh",
449
+ "arghh",
450
+ "arghhh",
451
+ "arrgh",
452
+ "blah",
453
+ "bleh",
454
+ "meh",
455
+ "yikes",
456
+ "yeesh",
457
+ "oof",
458
+ "gah",
459
+ "gahh",
460
+ "grr",
461
+ "grrr",
462
+ "grrrr",
463
+ ];
464
+
465
+ const PROFANITY_RE = new RegExp(String.raw`\b(?:${PROFANITY.join("|")})\b`, "gi");
466
+ const SENTENCE_RE = /[^.!?\n]+/g;
467
+ const LETTER_RE = /\p{L}/gu;
468
+ const UPPER_LETTER_RE = /\p{Lu}/gu;
469
+ const YELLING_MIN_LETTERS = 4;
470
+ const YELLING_THRESHOLD = 0.5;
471
+ // Runs starting with `!` or `?` followed by 2+ of `!?1`. The `1` is the
472
+ // classic shift-key mishit ("!!!111" / "!?!??111") so we count those as
473
+ // part of the same drama burst.
474
+ const DRAMA_RE = /[!?][!?1]{2,}/g;
475
+ const WORD_RE = /\S+/g;
476
+
477
+ // Elongated anguish/exasperation interjections. Each alternative is a
478
+ // case-insensitive word-bounded pattern that requires *real* elongation
479
+ // (so plain "no" / "argh" / "ahh" / "god" don't fire). Picked to avoid
480
+ // hex / base64 contamination via the surrounding `\b` plus letter-only
481
+ // alternatives.
482
+ const ANGUISH_PATTERNS: readonly string[] = [
483
+ "no{3,}", // nooo, noooooo
484
+ "a+h{2,}", // ahh, aaaahhh
485
+ "u+g+h{2,}", // ughh, uuugh
486
+ "a+r+g+h+", // argh, aaargh, arrgghhh
487
+ "st+o{3,}p+", // stooop, sttooopp
488
+ "w+h+y{3,}", // whyyy, whyyyyy
489
+ "f+u{3,}c*k*", // fuuu, fuuuck
490
+ "wtf{3,}", // wtfff
491
+ "o+m+g{2,}", // omgg, omggg
492
+ "ye+s{3,}", // yesss, yeessss
493
+ "g+o+d{3,}", // goddd, goddddd
494
+ "br+u+h{2,}", // bruhh, bruuuhh
495
+ ];
496
+ const ANGUISH_RE = new RegExp(String.raw`\b(?:${ANGUISH_PATTERNS.join("|")})\b`, "gi");
497
+ const DUDE_RE = /\bdude\b/gi;
498
+ // Runs of 2+ dots. Captures `..` (lazy trail-off), `...` (tentative
499
+ // ellipsis), and `....+` (exasperation) in a single signal.
500
+ const ELLIPSIS_RE = /\.{2,}/g;
501
+
502
+ // --- Frustration signals ----------------------------------------------------
503
+ // Each set of patterns below is tuned against ~42k real user prompts so the
504
+ // short-prose hits are dominated by genuine frustration, not technical talk.
505
+
506
+ // Corrective negation. We deliberately anchor to the very start of the
507
+ // trimmed prose body (no `m` flag) - in practice mid-message lines that
508
+ // start with `no`/`Wrong`/`No JSDoc warning` are list items, pasted error
509
+ // text or descriptive statements, not actual corrections. Real frustration
510
+ // negation overwhelmingly opens the message.
511
+ const NEGATION_LEAD_RE = /^[ \t]*(?:no|nope|nah|nvm|wrong|incorrect)\b/gi;
512
+ const NEGATION_PHRASE_RE =
513
+ /\b(?:that['\u2019]?s\s+not\s+(?:what|right|it)|not\s+what\s+i\s+(?:meant|asked|said|wanted))\b/gi;
514
+
515
+ // User repeating themselves. The recall pattern accepts an optional
516
+ // `like ` / `as ` prefix so "like i said" doesn't double-count with bare
517
+ // "i said". Bare `i asked` is too noisy - it's overwhelmingly "i asked
518
+ // <some third party>" in this corpus (committee, experts, weaker LLM, ...) -
519
+ // so we require `i asked you` for that variant. Bare `still` / `again` are
520
+ // ambiguous so we only count `still` when followed by a negative or
521
+ // sameness marker.
522
+ const REPETITION_RECALL_RE =
523
+ /\b(?:(?:like|as)\s+i\s+(?:said|told\s+you|asked)|i\s+(?:meant|said|told\s+you|asked\s+you|already\s+(?:said|told|did|asked|wrote)))\b/gi;
524
+ const REPETITION_STILL_RE =
525
+ /\bstill\s+(?:doesn['\u2019]?t|doesnt|isn['\u2019]?t|isnt|not|broken|wrong|fails|failing|the\s+same|same)\b/gi;
526
+
527
+ // Direct second-person reproach. `you` alone is too generic (>7k hits in
528
+ // short prose), so we anchor it to a small set of accusatory verbs.
529
+ const BLAME_YOU_RE = /\byou\s+(?:didn['\u2019]?t|did\s+not|broke|missed|forgot|keep|always|never|still|ignored)\b/gi;
530
+ // `stop <verb>ing` is only frustration when it's an imperative - require it
531
+ // to start a sentence (line start or after a sentence-terminating punctuator).
532
+ const BLAME_STOP_RE = /(?:^|(?<=[.!?\n]))\s*stop\s+\w+ing\b/gim;
533
+
534
+ // Stripped from the analyzed body before scoring so that structured
535
+ // content (code, XML/HTML, URLs, file mentions, quoted blocks) doesn't
536
+ // pollute behavior signals. We replace with a newline so line counts
537
+ // reflect what was removed instead of merging neighbors.
538
+ const FENCED_CODE_RE = /```[\s\S]*?```/g;
539
+ const XML_TAG_PAIR_RE = /<([A-Za-z][\w-]*)\b[^>]*>[\s\S]*?<\/\1>/g;
540
+ const XML_TAG_BARE_RE = /<\/?[A-Za-z][\w-]*\b[^>]*\/?>/g;
541
+ const INLINE_CODE_RE = /`[^`\n]*`/g;
542
+ const URL_RE = /\bhttps?:\/\/\S+/gi;
543
+ const FILE_MENTION_RE = /(^|\s)@[\w./-]+/g;
544
+ const QUOTE_LINE_RE = /^[ \t]*>.*$/gm;
545
+ // Harness placeholders the TUI substitutes for binary/non-text user input.
546
+ // Strip them so real frustration signals on later lines aren't masked off
547
+ // by `[Image #1]` etc. consuming line 1.
548
+ const IMAGE_MARKER_RE = /\[Image #\d+\]/g;
549
+ // ANSI escape sequences sometimes leak in from terminal copy-paste
550
+ // (e.g. when the user pastes a bash transcript). Strip them.
551
+ const ANSI_ESCAPE_RE = /\x1b\[[0-9;]*[A-Za-z]/g;
552
+
553
+ // Users don't really get angry with super detailed and formatted prompts
554
+ // - if the remaining prose is this many lines or more, score zero.
555
+ const MAX_PROSE_LINES = 3;
556
+
557
+ /** Count regex hits without materializing the match array. */
558
+ function countMatches(text: string, re: RegExp): number {
559
+ let count = 0;
560
+ re.lastIndex = 0;
561
+ while (re.exec(text) !== null) count++;
562
+ return count;
563
+ }
564
+
565
+ /**
566
+ * Count sentences where the share of uppercase letters exceeds
567
+ * {@link YELLING_THRESHOLD}. Sentences shorter than
568
+ * {@link YELLING_MIN_LETTERS} alphabetic characters are ignored so that
569
+ * short acronyms ("OK", "WIP", "TODO") don't register as yelling.
570
+ */
571
+ function countYellingSentences(text: string): number {
572
+ let count = 0;
573
+ SENTENCE_RE.lastIndex = 0;
574
+ let match: RegExpExecArray | null = SENTENCE_RE.exec(text);
575
+ while (match !== null) {
576
+ const sentence = match[0];
577
+ const letters = countMatches(sentence, LETTER_RE);
578
+ if (letters >= YELLING_MIN_LETTERS) {
579
+ const upper = countMatches(sentence, UPPER_LETTER_RE);
580
+ if (upper / letters > YELLING_THRESHOLD) count++;
581
+ }
582
+ match = SENTENCE_RE.exec(text);
583
+ }
584
+ return count;
585
+ }
586
+
587
+ /**
588
+ * Strip structured content so that pasted code, harness wrappers, file
589
+ * mentions and quoted blocks don't dilute or fake behavior signals.
590
+ * Each strip is replaced with a newline so subsequent line counting
591
+ * reflects what was removed instead of merging neighbors.
592
+ */
593
+ function stripStructuredContent(text: string): string {
594
+ return text
595
+ .replace(FENCED_CODE_RE, "\n")
596
+ .replace(XML_TAG_PAIR_RE, "\n")
597
+ .replace(XML_TAG_BARE_RE, " ")
598
+ .replace(INLINE_CODE_RE, " ")
599
+ .replace(URL_RE, " ")
600
+ .replace(FILE_MENTION_RE, "$1 ")
601
+ .replace(QUOTE_LINE_RE, "")
602
+ .replace(IMAGE_MARKER_RE, " ")
603
+ .replace(ANSI_ESCAPE_RE, "");
604
+ }
605
+
606
+ function countNonEmptyLines(text: string): number {
607
+ let count = 0;
608
+ for (const line of text.split("\n")) {
609
+ if (line.trim().length > 0) count++;
610
+ }
611
+ return count;
612
+ }
613
+
614
+ /**
615
+ * Compute behavioral metrics for a user message.
616
+ *
617
+ * `text` may be empty or whitespace; in that case every metric is 0.
618
+ */
619
+ export function computeUserMessageMetrics(text: string): UserMessageMetrics {
620
+ const trimmed = text.trim();
621
+ if (!trimmed) {
622
+ return {
623
+ chars: 0,
624
+ words: 0,
625
+ yelling: 0,
626
+ profanity: 0,
627
+ anguish: 0,
628
+ negation: 0,
629
+ repetition: 0,
630
+ blame: 0,
631
+ };
632
+ }
633
+
634
+ const chars = trimmed.length;
635
+ const words = countMatches(trimmed, WORD_RE);
636
+
637
+ // Behavior signals are computed on a stripped prose body; long /
638
+ // well-formatted messages score zero because they are deliberate, not
639
+ // emotional outbursts.
640
+ const prose = stripStructuredContent(trimmed).trim();
641
+ if (!prose || countNonEmptyLines(prose) >= MAX_PROSE_LINES) {
642
+ return {
643
+ chars,
644
+ words,
645
+ yelling: 0,
646
+ profanity: 0,
647
+ anguish: 0,
648
+ negation: 0,
649
+ repetition: 0,
650
+ blame: 0,
651
+ };
652
+ }
653
+
654
+ const anguish =
655
+ countMatches(prose, DRAMA_RE) +
656
+ countMatches(prose, ANGUISH_RE) +
657
+ countMatches(prose, DUDE_RE) +
658
+ countMatches(prose, ELLIPSIS_RE);
659
+
660
+ const negation = countMatches(prose, NEGATION_LEAD_RE) + countMatches(prose, NEGATION_PHRASE_RE);
661
+ const repetition = countMatches(prose, REPETITION_RECALL_RE) + countMatches(prose, REPETITION_STILL_RE);
662
+ const blame = countMatches(prose, BLAME_YOU_RE) + countMatches(prose, BLAME_STOP_RE);
663
+
664
+ return {
665
+ chars,
666
+ words,
667
+ yelling: countYellingSentences(prose),
668
+ profanity: countMatches(prose, PROFANITY_RE),
669
+ anguish,
670
+ negation,
671
+ repetition,
672
+ blame,
673
+ };
674
+ }
675
+
676
+ /** Empty metrics constant for callers that need a default. */
677
+ export const EMPTY_USER_METRICS: UserMessageMetrics = Object.freeze({
678
+ chars: 0,
679
+ words: 0,
680
+ yelling: 0,
681
+ profanity: 0,
682
+ anguish: 0,
683
+ negation: 0,
684
+ repetition: 0,
685
+ blame: 0,
686
+ });