@oh-my-pi/omp-stats 14.9.5 → 14.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/aggregator.ts +146 -36
- package/src/client/components/BehaviorChart.tsx +11 -4
- package/src/client/components/BehaviorModelsTable.tsx +62 -19
- package/src/client/components/BehaviorSummary.tsx +30 -10
- package/src/client/types.ts +15 -6
- package/src/db.ts +151 -38
- package/src/index.ts +29 -3
- package/src/parser.ts +31 -14
- package/src/sync-worker.ts +31 -0
- package/src/types.ts +42 -10
- package/src/user-metrics.ts +217 -17
package/src/user-metrics.ts
CHANGED
|
@@ -13,13 +13,55 @@ export interface UserMessageMetrics {
|
|
|
13
13
|
/**
|
|
14
14
|
* Number of "yelling" sentences: sentences where more than half of the
|
|
15
15
|
* alphabetic characters are uppercase (and there are enough letters to
|
|
16
|
-
* make the ratio meaningful
|
|
16
|
+
* make the ratio meaningful - short acronyms like "OK" don't count).
|
|
17
17
|
*/
|
|
18
|
-
|
|
18
|
+
yelling: number;
|
|
19
19
|
/** Profanity hits (word-boundary, case-insensitive). */
|
|
20
20
|
profanity: number;
|
|
21
|
-
/**
|
|
22
|
-
|
|
21
|
+
/**
|
|
22
|
+
* Catch-all "obviously upset" signal computed on a *prose-only* body
|
|
23
|
+
* (code fences, XML/HTML tags, URLs, file mentions, and quoted lines
|
|
24
|
+
* are stripped first; messages whose remaining prose is >=3 lines score
|
|
25
|
+
* zero because formatted prompts aren't tantrums).
|
|
26
|
+
*
|
|
27
|
+
* Sum of:
|
|
28
|
+
* - drama runs: 3+ `!` / `?` (with `1`-mishit fallout)
|
|
29
|
+
* - elongated interjections: `noooo`, `ahhhh`, `ughhh`, `argh`, `stooop`,
|
|
30
|
+
* `whyyy`, `fuuu(ck)`, `shiiit`, `wtfff`, `omggg`, `yessss`, `helpp`,
|
|
31
|
+
* `goddd`, `dammm`, `bruhh`
|
|
32
|
+
* - standalone `dude`
|
|
33
|
+
* - dot runs: `..`, `...`, `....+`
|
|
34
|
+
*/
|
|
35
|
+
anguish: number;
|
|
36
|
+
/**
|
|
37
|
+
* Corrective negation: the user is telling us we got it wrong.
|
|
38
|
+
*
|
|
39
|
+
* Counted on the same prose-only body as {@link anguish}.
|
|
40
|
+
*
|
|
41
|
+
* - line-leading `no` / `nope` / `nah` / `nvm` / `wrong` / `incorrect`
|
|
42
|
+
* (word-bounded, so `now`, `nobody`, `north` don't match)
|
|
43
|
+
* - `that(?:'s)? not (what|right|it)` and `not what i (meant|asked|said|wanted)`
|
|
44
|
+
*/
|
|
45
|
+
negation: number;
|
|
46
|
+
/**
|
|
47
|
+
* The user is repeating themselves - strong signal the previous turn
|
|
48
|
+
* missed the ask. Counts hits for:
|
|
49
|
+
*
|
|
50
|
+
* - `i (meant|said|asked|told you|already (said|told|did|asked|wrote))`
|
|
51
|
+
* - `(like|as) i (said|told you|asked)`
|
|
52
|
+
* - `still (doesn't|isn't|not|broken|wrong|fails|failing|the same|same)`
|
|
53
|
+
*
|
|
54
|
+
* Bare `still` / `again` are too ambiguous to count alone (they show up
|
|
55
|
+
* in normal speech like "try again" or "still works").
|
|
56
|
+
*/
|
|
57
|
+
repetition: number;
|
|
58
|
+
/**
|
|
59
|
+
* Direct second-person reproach pinned on the agent:
|
|
60
|
+
*
|
|
61
|
+
* - `you (didn't|did not|broke|missed|forgot|keep|always|never|still|ignored)`
|
|
62
|
+
* - sentence-leading `stop <verb>ing` imperatives
|
|
63
|
+
*/
|
|
64
|
+
blame: number;
|
|
23
65
|
}
|
|
24
66
|
|
|
25
67
|
/**
|
|
@@ -363,15 +405,20 @@ const PROFANITY: readonly string[] = [
|
|
|
363
405
|
"garbage",
|
|
364
406
|
"crud",
|
|
365
407
|
"crudded",
|
|
408
|
+
// quality-dismissal ("this is garbage / pointless")
|
|
409
|
+
"useless",
|
|
410
|
+
"pointless",
|
|
411
|
+
"horrible",
|
|
412
|
+
"awful",
|
|
413
|
+
"worthless",
|
|
414
|
+
"ridiculous",
|
|
415
|
+
"nonsense",
|
|
366
416
|
// religious exclamations
|
|
367
417
|
"jesus",
|
|
368
418
|
"christ",
|
|
369
419
|
"jeez",
|
|
370
420
|
"jeezus",
|
|
371
421
|
"sheesh",
|
|
372
|
-
"holymoly",
|
|
373
|
-
"holyfuck",
|
|
374
|
-
"holysmokes",
|
|
375
422
|
"godsake",
|
|
376
423
|
// chat acronyms
|
|
377
424
|
"wtf",
|
|
@@ -415,18 +462,98 @@ const PROFANITY: readonly string[] = [
|
|
|
415
462
|
"grrrr",
|
|
416
463
|
];
|
|
417
464
|
|
|
418
|
-
const PROFANITY_RE = new RegExp(
|
|
465
|
+
const PROFANITY_RE = new RegExp(String.raw`\b(?:${PROFANITY.join("|")})\b`, "gi");
|
|
419
466
|
const SENTENCE_RE = /[^.!?\n]+/g;
|
|
420
467
|
const LETTER_RE = /\p{L}/gu;
|
|
421
468
|
const UPPER_LETTER_RE = /\p{Lu}/gu;
|
|
422
469
|
const YELLING_MIN_LETTERS = 4;
|
|
423
470
|
const YELLING_THRESHOLD = 0.5;
|
|
424
|
-
// Runs starting with `!` or `?` followed by
|
|
471
|
+
// Runs starting with `!` or `?` followed by 2+ of `!?1`. The `1` is the
|
|
425
472
|
// classic shift-key mishit ("!!!111" / "!?!??111") so we count those as
|
|
426
473
|
// part of the same drama burst.
|
|
427
474
|
const DRAMA_RE = /[!?][!?1]{2,}/g;
|
|
428
475
|
const WORD_RE = /\S+/g;
|
|
429
476
|
|
|
477
|
+
// Elongated anguish/exasperation interjections. Each alternative is a
|
|
478
|
+
// case-insensitive word-bounded pattern that requires *real* elongation
|
|
479
|
+
// (so plain "no" / "argh" / "ahh" / "god" don't fire). Picked to avoid
|
|
480
|
+
// hex / base64 contamination via the surrounding `\b` plus letter-only
|
|
481
|
+
// alternatives.
|
|
482
|
+
const ANGUISH_PATTERNS: readonly string[] = [
|
|
483
|
+
"no{3,}", // nooo, noooooo
|
|
484
|
+
"a+h{2,}", // ahh, aaaahhh
|
|
485
|
+
"u+g+h{2,}", // ughh, uuugh
|
|
486
|
+
"a+r+g+h+", // argh, aaargh, arrgghhh
|
|
487
|
+
"st+o{3,}p+", // stooop, sttooopp
|
|
488
|
+
"w+h+y{3,}", // whyyy, whyyyyy
|
|
489
|
+
"f+u{3,}c*k*", // fuuu, fuuuck
|
|
490
|
+
"wtf{3,}", // wtfff
|
|
491
|
+
"o+m+g{2,}", // omgg, omggg
|
|
492
|
+
"ye+s{3,}", // yesss, yeessss
|
|
493
|
+
"g+o+d{3,}", // goddd, goddddd
|
|
494
|
+
"br+u+h{2,}", // bruhh, bruuuhh
|
|
495
|
+
];
|
|
496
|
+
const ANGUISH_RE = new RegExp(String.raw`\b(?:${ANGUISH_PATTERNS.join("|")})\b`, "gi");
|
|
497
|
+
const DUDE_RE = /\bdude\b/gi;
|
|
498
|
+
// Runs of 2+ dots. Captures `..` (lazy trail-off), `...` (tentative
|
|
499
|
+
// ellipsis), and `....+` (exasperation) in a single signal.
|
|
500
|
+
const ELLIPSIS_RE = /\.{2,}/g;
|
|
501
|
+
|
|
502
|
+
// --- Frustration signals ----------------------------------------------------
|
|
503
|
+
// Each set of patterns below is tuned against ~42k real user prompts so the
|
|
504
|
+
// short-prose hits are dominated by genuine frustration, not technical talk.
|
|
505
|
+
|
|
506
|
+
// Corrective negation. We deliberately anchor to the very start of the
|
|
507
|
+
// trimmed prose body (no `m` flag) - in practice mid-message lines that
|
|
508
|
+
// start with `no`/`Wrong`/`No JSDoc warning` are list items, pasted error
|
|
509
|
+
// text or descriptive statements, not actual corrections. Real frustration
|
|
510
|
+
// negation overwhelmingly opens the message.
|
|
511
|
+
const NEGATION_LEAD_RE = /^[ \t]*(?:no|nope|nah|nvm|wrong|incorrect)\b/gi;
|
|
512
|
+
const NEGATION_PHRASE_RE =
|
|
513
|
+
/\b(?:that['\u2019]?s\s+not\s+(?:what|right|it)|not\s+what\s+i\s+(?:meant|asked|said|wanted))\b/gi;
|
|
514
|
+
|
|
515
|
+
// User repeating themselves. The recall pattern accepts an optional
|
|
516
|
+
// `like ` / `as ` prefix so "like i said" doesn't double-count with bare
|
|
517
|
+
// "i said". Bare `i asked` is too noisy - it's overwhelmingly "i asked
|
|
518
|
+
// <some third party>" in this corpus (committee, experts, weaker LLM, ...) -
|
|
519
|
+
// so we require `i asked you` for that variant. Bare `still` / `again` are
|
|
520
|
+
// ambiguous so we only count `still` when followed by a negative or
|
|
521
|
+
// sameness marker.
|
|
522
|
+
const REPETITION_RECALL_RE =
|
|
523
|
+
/\b(?:(?:like|as)\s+i\s+(?:said|told\s+you|asked)|i\s+(?:meant|said|told\s+you|asked\s+you|already\s+(?:said|told|did|asked|wrote)))\b/gi;
|
|
524
|
+
const REPETITION_STILL_RE =
|
|
525
|
+
/\bstill\s+(?:doesn['\u2019]?t|doesnt|isn['\u2019]?t|isnt|not|broken|wrong|fails|failing|the\s+same|same)\b/gi;
|
|
526
|
+
|
|
527
|
+
// Direct second-person reproach. `you` alone is too generic (>7k hits in
|
|
528
|
+
// short prose), so we anchor it to a small set of accusatory verbs.
|
|
529
|
+
const BLAME_YOU_RE = /\byou\s+(?:didn['\u2019]?t|did\s+not|broke|missed|forgot|keep|always|never|still|ignored)\b/gi;
|
|
530
|
+
// `stop <verb>ing` is only frustration when it's an imperative - require it
|
|
531
|
+
// to start a sentence (line start or after a sentence-terminating punctuator).
|
|
532
|
+
const BLAME_STOP_RE = /(?:^|(?<=[.!?\n]))\s*stop\s+\w+ing\b/gim;
|
|
533
|
+
|
|
534
|
+
// Stripped from the analyzed body before scoring so that structured
|
|
535
|
+
// content (code, XML/HTML, URLs, file mentions, quoted blocks) doesn't
|
|
536
|
+
// pollute behavior signals. We replace with a newline so line counts
|
|
537
|
+
// reflect what was removed instead of merging neighbors.
|
|
538
|
+
const FENCED_CODE_RE = /```[\s\S]*?```/g;
|
|
539
|
+
const XML_TAG_PAIR_RE = /<([A-Za-z][\w-]*)\b[^>]*>[\s\S]*?<\/\1>/g;
|
|
540
|
+
const XML_TAG_BARE_RE = /<\/?[A-Za-z][\w-]*\b[^>]*\/?>/g;
|
|
541
|
+
const INLINE_CODE_RE = /`[^`\n]*`/g;
|
|
542
|
+
const URL_RE = /\bhttps?:\/\/\S+/gi;
|
|
543
|
+
const FILE_MENTION_RE = /(^|\s)@[\w./-]+/g;
|
|
544
|
+
const QUOTE_LINE_RE = /^[ \t]*>.*$/gm;
|
|
545
|
+
// Harness placeholders the TUI substitutes for binary/non-text user input.
|
|
546
|
+
// Strip them so real frustration signals on later lines aren't masked off
|
|
547
|
+
// by `[Image #1]` etc. consuming line 1.
|
|
548
|
+
const IMAGE_MARKER_RE = /\[Image #\d+\]/g;
|
|
549
|
+
// ANSI escape sequences sometimes leak in from terminal copy-paste
|
|
550
|
+
// (e.g. when the user pastes a bash transcript). Strip them.
|
|
551
|
+
const ANSI_ESCAPE_RE = /\x1b\[[0-9;]*[A-Za-z]/g;
|
|
552
|
+
|
|
553
|
+
// Users don't really get angry with super detailed and formatted prompts
|
|
554
|
+
// - if the remaining prose is this many lines or more, score zero.
|
|
555
|
+
const MAX_PROSE_LINES = 3;
|
|
556
|
+
|
|
430
557
|
/** Count regex hits without materializing the match array. */
|
|
431
558
|
function countMatches(text: string, re: RegExp): number {
|
|
432
559
|
let count = 0;
|
|
@@ -457,6 +584,33 @@ function countYellingSentences(text: string): number {
|
|
|
457
584
|
return count;
|
|
458
585
|
}
|
|
459
586
|
|
|
587
|
+
/**
|
|
588
|
+
* Strip structured content so that pasted code, harness wrappers, file
|
|
589
|
+
* mentions and quoted blocks don't dilute or fake behavior signals.
|
|
590
|
+
* Each strip is replaced with a newline so subsequent line counting
|
|
591
|
+
* reflects what was removed instead of merging neighbors.
|
|
592
|
+
*/
|
|
593
|
+
function stripStructuredContent(text: string): string {
|
|
594
|
+
return text
|
|
595
|
+
.replace(FENCED_CODE_RE, "\n")
|
|
596
|
+
.replace(XML_TAG_PAIR_RE, "\n")
|
|
597
|
+
.replace(XML_TAG_BARE_RE, " ")
|
|
598
|
+
.replace(INLINE_CODE_RE, " ")
|
|
599
|
+
.replace(URL_RE, " ")
|
|
600
|
+
.replace(FILE_MENTION_RE, "$1 ")
|
|
601
|
+
.replace(QUOTE_LINE_RE, "")
|
|
602
|
+
.replace(IMAGE_MARKER_RE, " ")
|
|
603
|
+
.replace(ANSI_ESCAPE_RE, "");
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
function countNonEmptyLines(text: string): number {
|
|
607
|
+
let count = 0;
|
|
608
|
+
for (const line of text.split("\n")) {
|
|
609
|
+
if (line.trim().length > 0) count++;
|
|
610
|
+
}
|
|
611
|
+
return count;
|
|
612
|
+
}
|
|
613
|
+
|
|
460
614
|
/**
|
|
461
615
|
* Compute behavioral metrics for a user message.
|
|
462
616
|
*
|
|
@@ -465,14 +619,57 @@ function countYellingSentences(text: string): number {
|
|
|
465
619
|
export function computeUserMessageMetrics(text: string): UserMessageMetrics {
|
|
466
620
|
const trimmed = text.trim();
|
|
467
621
|
if (!trimmed) {
|
|
468
|
-
return {
|
|
622
|
+
return {
|
|
623
|
+
chars: 0,
|
|
624
|
+
words: 0,
|
|
625
|
+
yelling: 0,
|
|
626
|
+
profanity: 0,
|
|
627
|
+
anguish: 0,
|
|
628
|
+
negation: 0,
|
|
629
|
+
repetition: 0,
|
|
630
|
+
blame: 0,
|
|
631
|
+
};
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
const chars = trimmed.length;
|
|
635
|
+
const words = countMatches(trimmed, WORD_RE);
|
|
636
|
+
|
|
637
|
+
// Behavior signals are computed on a stripped prose body; long /
|
|
638
|
+
// well-formatted messages score zero because they are deliberate, not
|
|
639
|
+
// emotional outbursts.
|
|
640
|
+
const prose = stripStructuredContent(trimmed).trim();
|
|
641
|
+
if (!prose || countNonEmptyLines(prose) >= MAX_PROSE_LINES) {
|
|
642
|
+
return {
|
|
643
|
+
chars,
|
|
644
|
+
words,
|
|
645
|
+
yelling: 0,
|
|
646
|
+
profanity: 0,
|
|
647
|
+
anguish: 0,
|
|
648
|
+
negation: 0,
|
|
649
|
+
repetition: 0,
|
|
650
|
+
blame: 0,
|
|
651
|
+
};
|
|
469
652
|
}
|
|
653
|
+
|
|
654
|
+
const anguish =
|
|
655
|
+
countMatches(prose, DRAMA_RE) +
|
|
656
|
+
countMatches(prose, ANGUISH_RE) +
|
|
657
|
+
countMatches(prose, DUDE_RE) +
|
|
658
|
+
countMatches(prose, ELLIPSIS_RE);
|
|
659
|
+
|
|
660
|
+
const negation = countMatches(prose, NEGATION_LEAD_RE) + countMatches(prose, NEGATION_PHRASE_RE);
|
|
661
|
+
const repetition = countMatches(prose, REPETITION_RECALL_RE) + countMatches(prose, REPETITION_STILL_RE);
|
|
662
|
+
const blame = countMatches(prose, BLAME_YOU_RE) + countMatches(prose, BLAME_STOP_RE);
|
|
663
|
+
|
|
470
664
|
return {
|
|
471
|
-
chars
|
|
472
|
-
words
|
|
473
|
-
|
|
474
|
-
profanity: countMatches(
|
|
475
|
-
|
|
665
|
+
chars,
|
|
666
|
+
words,
|
|
667
|
+
yelling: countYellingSentences(prose),
|
|
668
|
+
profanity: countMatches(prose, PROFANITY_RE),
|
|
669
|
+
anguish,
|
|
670
|
+
negation,
|
|
671
|
+
repetition,
|
|
672
|
+
blame,
|
|
476
673
|
};
|
|
477
674
|
}
|
|
478
675
|
|
|
@@ -480,7 +677,10 @@ export function computeUserMessageMetrics(text: string): UserMessageMetrics {
|
|
|
480
677
|
export const EMPTY_USER_METRICS: UserMessageMetrics = Object.freeze({
|
|
481
678
|
chars: 0,
|
|
482
679
|
words: 0,
|
|
483
|
-
|
|
680
|
+
yelling: 0,
|
|
484
681
|
profanity: 0,
|
|
485
|
-
|
|
682
|
+
anguish: 0,
|
|
683
|
+
negation: 0,
|
|
684
|
+
repetition: 0,
|
|
685
|
+
blame: 0,
|
|
486
686
|
});
|