npm - @oh-my-pi/omp-stats - Versions diffs - 14.9.5 → 14.9.8 - Mend

@oh-my-pi/omp-stats 14.9.5 → 14.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json +3 -3
package/src/aggregator.ts +146 -36
package/src/client/components/BehaviorChart.tsx +11 -4
package/src/client/components/BehaviorModelsTable.tsx +62 -19
package/src/client/components/BehaviorSummary.tsx +30 -10
package/src/client/types.ts +15 -6
package/src/db.ts +151 -38
package/src/index.ts +29 -3
package/src/parser.ts +31 -14
package/src/sync-worker.ts +31 -0
package/src/types.ts +42 -10
package/src/user-metrics.ts +217 -17

package/src/user-metrics.ts CHANGED Viewed

@@ -13,13 +13,55 @@ export interface UserMessageMetrics {
 	/**
 	 * Number of "yelling" sentences: sentences where more than half of the
 	 * alphabetic characters are uppercase (and there are enough letters to
-	 * make the ratio meaningful — short acronyms like "OK" don't count).
+	 * make the ratio meaningful - short acronyms like "OK" don't count).
 	 */
-	yellingSentences: number;
+	yelling: number;
 	/** Profanity hits (word-boundary, case-insensitive). */
 	profanity: number;
-	/** Runs of 3+ `!` / `?` characters (including `1`-mishit fallout). */
-	dramaRuns: number;
+	/**
+	 * Catch-all "obviously upset" signal computed on a *prose-only* body
+	 * (code fences, XML/HTML tags, URLs, file mentions, and quoted lines
+	 * are stripped first; messages whose remaining prose is >=3 lines score
+	 * zero because formatted prompts aren't tantrums).
+	 *
+	 * Sum of:
+	 * - drama runs: 3+ `!` / `?` (with `1`-mishit fallout)
+	 * - elongated interjections: `noooo`, `ahhhh`, `ughhh`, `argh`, `stooop`,
+	 *   `whyyy`, `fuuu(ck)`, `shiiit`, `wtfff`, `omggg`, `yessss`, `helpp`,
+	 *   `goddd`, `dammm`, `bruhh`
+	 * - standalone `dude`
+	 * - dot runs: `..`, `...`, `....+`
+	 */
+	anguish: number;
+	/**
+	 * Corrective negation: the user is telling us we got it wrong.
+	 *
+	 * Counted on the same prose-only body as {@link anguish}.
+	 *
+	 * - line-leading `no` / `nope` / `nah` / `nvm` / `wrong` / `incorrect`
+	 *   (word-bounded, so `now`, `nobody`, `north` don't match)
+	 * - `that(?:'s)? not (what|right|it)` and `not what i (meant|asked|said|wanted)`
+	 */
+	negation: number;
+	/**
+	 * The user is repeating themselves - strong signal the previous turn
+	 * missed the ask. Counts hits for:
+	 *
+	 * - `i (meant|said|asked|told you|already (said|told|did|asked|wrote))`
+	 * - `(like|as) i (said|told you|asked)`
+	 * - `still (doesn't|isn't|not|broken|wrong|fails|failing|the same|same)`
+	 *
+	 * Bare `still` / `again` are too ambiguous to count alone (they show up
+	 * in normal speech like "try again" or "still works").
+	 */
+	repetition: number;
+	/**
+	 * Direct second-person reproach pinned on the agent:
+	 *
+	 * - `you (didn't|did not|broke|missed|forgot|keep|always|never|still|ignored)`
+	 * - sentence-leading `stop <verb>ing` imperatives
+	 */
+	blame: number;
 }
 /**
@@ -363,15 +405,20 @@ const PROFANITY: readonly string[] = [
 	"garbage",
 	"crud",
 	"crudded",
+	// quality-dismissal ("this is garbage / pointless")
+	"useless",
+	"pointless",
+	"horrible",
+	"awful",
+	"worthless",
+	"ridiculous",
+	"nonsense",
 	// religious exclamations
 	"jesus",
 	"christ",
 	"jeez",
 	"jeezus",
 	"sheesh",
-	"holymoly",
-	"holyfuck",
-	"holysmokes",
 	"godsake",
 	// chat acronyms
 	"wtf",
@@ -415,18 +462,98 @@ const PROFANITY: readonly string[] = [
 	"grrrr",
 ];
-const PROFANITY_RE = new RegExp(`\\b(?:${PROFANITY.join("|")})\\b`, "gi");
+const PROFANITY_RE = new RegExp(String.raw`\b(?:${PROFANITY.join("|")})\b`, "gi");
 const SENTENCE_RE = /[^.!?\n]+/g;
 const LETTER_RE = /\p{L}/gu;
 const UPPER_LETTER_RE = /\p{Lu}/gu;
 const YELLING_MIN_LETTERS = 4;
 const YELLING_THRESHOLD = 0.5;
-// Runs starting with `!` or `?` followed by ≥2 of `!?1`. The `1` is the
+// Runs starting with `!` or `?` followed by 2+ of `!?1`. The `1` is the
 // classic shift-key mishit ("!!!111" / "!?!??111") so we count those as
 // part of the same drama burst.
 const DRAMA_RE = /[!?][!?1]{2,}/g;
 const WORD_RE = /\S+/g;
+// Elongated anguish/exasperation interjections. Each alternative is a
+// case-insensitive word-bounded pattern that requires *real* elongation
+// (so plain "no" / "argh" / "ahh" / "god" don't fire). Picked to avoid
+// hex / base64 contamination via the surrounding `\b` plus letter-only
+// alternatives.
+const ANGUISH_PATTERNS: readonly string[] = [
+	"no{3,}", //          nooo, noooooo
+	"a+h{2,}", //         ahh, aaaahhh
+	"u+g+h{2,}", //       ughh, uuugh
+	"a+r+g+h+", //        argh, aaargh, arrgghhh
+	"st+o{3,}p+", //      stooop, sttooopp
+	"w+h+y{3,}", //       whyyy, whyyyyy
+	"f+u{3,}c*k*", //     fuuu, fuuuck
+	"wtf{3,}", //         wtfff
+	"o+m+g{2,}", //       omgg, omggg
+	"ye+s{3,}", //        yesss, yeessss
+	"g+o+d{3,}", //       goddd, goddddd
+	"br+u+h{2,}", //      bruhh, bruuuhh
+];
+const ANGUISH_RE = new RegExp(String.raw`\b(?:${ANGUISH_PATTERNS.join("|")})\b`, "gi");
+const DUDE_RE = /\bdude\b/gi;
+// Runs of 2+ dots. Captures `..` (lazy trail-off), `...` (tentative
+// ellipsis), and `....+` (exasperation) in a single signal.
+const ELLIPSIS_RE = /\.{2,}/g;
+// --- Frustration signals ----------------------------------------------------
+// Each set of patterns below is tuned against ~42k real user prompts so the
+// short-prose hits are dominated by genuine frustration, not technical talk.
+// Corrective negation. We deliberately anchor to the very start of the
+// trimmed prose body (no `m` flag) - in practice mid-message lines that
+// start with `no`/`Wrong`/`No JSDoc warning` are list items, pasted error
+// text or descriptive statements, not actual corrections. Real frustration
+// negation overwhelmingly opens the message.
+const NEGATION_LEAD_RE = /^[ \t]*(?:no|nope|nah|nvm|wrong|incorrect)\b/gi;
+const NEGATION_PHRASE_RE =
+	/\b(?:that['\u2019]?s\s+not\s+(?:what|right|it)|not\s+what\s+i\s+(?:meant|asked|said|wanted))\b/gi;
+// User repeating themselves. The recall pattern accepts an optional
+// `like ` / `as ` prefix so "like i said" doesn't double-count with bare
+// "i said". Bare `i asked` is too noisy - it's overwhelmingly "i asked
+// <some third party>" in this corpus (committee, experts, weaker LLM, ...) -
+// so we require `i asked you` for that variant. Bare `still` / `again` are
+// ambiguous so we only count `still` when followed by a negative or
+// sameness marker.
+const REPETITION_RECALL_RE =
+	/\b(?:(?:like|as)\s+i\s+(?:said|told\s+you|asked)|i\s+(?:meant|said|told\s+you|asked\s+you|already\s+(?:said|told|did|asked|wrote)))\b/gi;
+const REPETITION_STILL_RE =
+	/\bstill\s+(?:doesn['\u2019]?t|doesnt|isn['\u2019]?t|isnt|not|broken|wrong|fails|failing|the\s+same|same)\b/gi;
+// Direct second-person reproach. `you` alone is too generic (>7k hits in
+// short prose), so we anchor it to a small set of accusatory verbs.
+const BLAME_YOU_RE = /\byou\s+(?:didn['\u2019]?t|did\s+not|broke|missed|forgot|keep|always|never|still|ignored)\b/gi;
+// `stop <verb>ing` is only frustration when it's an imperative - require it
+// to start a sentence (line start or after a sentence-terminating punctuator).
+const BLAME_STOP_RE = /(?:^|(?<=[.!?\n]))\s*stop\s+\w+ing\b/gim;
+// Stripped from the analyzed body before scoring so that structured
+// content (code, XML/HTML, URLs, file mentions, quoted blocks) doesn't
+// pollute behavior signals. We replace with a newline so line counts
+// reflect what was removed instead of merging neighbors.
+const FENCED_CODE_RE = /```[\s\S]*?```/g;
+const XML_TAG_PAIR_RE = /<([A-Za-z][\w-]*)\b[^>]*>[\s\S]*?<\/\1>/g;
+const XML_TAG_BARE_RE = /<\/?[A-Za-z][\w-]*\b[^>]*\/?>/g;
+const INLINE_CODE_RE = /`[^`\n]*`/g;
+const URL_RE = /\bhttps?:\/\/\S+/gi;
+const FILE_MENTION_RE = /(^|\s)@[\w./-]+/g;
+const QUOTE_LINE_RE = /^[ \t]*>.*$/gm;
+// Harness placeholders the TUI substitutes for binary/non-text user input.
+// Strip them so real frustration signals on later lines aren't masked off
+// by `[Image #1]` etc. consuming line 1.
+const IMAGE_MARKER_RE = /\[Image #\d+\]/g;
+// ANSI escape sequences sometimes leak in from terminal copy-paste
+// (e.g. when the user pastes a bash transcript). Strip them.
+const ANSI_ESCAPE_RE = /\x1b\[[0-9;]*[A-Za-z]/g;
+// Users don't really get angry with super detailed and formatted prompts
+// - if the remaining prose is this many lines or more, score zero.
+const MAX_PROSE_LINES = 3;
 /** Count regex hits without materializing the match array. */
 function countMatches(text: string, re: RegExp): number {
 	let count = 0;
@@ -457,6 +584,33 @@ function countYellingSentences(text: string): number {
 	return count;
 }
+/**
+ * Strip structured content so that pasted code, harness wrappers, file
+ * mentions and quoted blocks don't dilute or fake behavior signals.
+ * Each strip is replaced with a newline so subsequent line counting
+ * reflects what was removed instead of merging neighbors.
+ */
+function stripStructuredContent(text: string): string {
+	return text
+		.replace(FENCED_CODE_RE, "\n")
+		.replace(XML_TAG_PAIR_RE, "\n")
+		.replace(XML_TAG_BARE_RE, " ")
+		.replace(INLINE_CODE_RE, " ")
+		.replace(URL_RE, " ")
+		.replace(FILE_MENTION_RE, "$1 ")
+		.replace(QUOTE_LINE_RE, "")
+		.replace(IMAGE_MARKER_RE, " ")
+		.replace(ANSI_ESCAPE_RE, "");
+}
+function countNonEmptyLines(text: string): number {
+	let count = 0;
+	for (const line of text.split("\n")) {
+		if (line.trim().length > 0) count++;
+	}
+	return count;
+}
 /**
  * Compute behavioral metrics for a user message.
  *
@@ -465,14 +619,57 @@ function countYellingSentences(text: string): number {
 export function computeUserMessageMetrics(text: string): UserMessageMetrics {
 	const trimmed = text.trim();
 	if (!trimmed) {
-		return { chars: 0, words: 0, yellingSentences: 0, profanity: 0, dramaRuns: 0 };
+		return {
+			chars: 0,
+			words: 0,
+			yelling: 0,
+			profanity: 0,
+			anguish: 0,
+			negation: 0,
+			repetition: 0,
+			blame: 0,
+		};
+	}
+	const chars = trimmed.length;
+	const words = countMatches(trimmed, WORD_RE);
+	// Behavior signals are computed on a stripped prose body; long /
+	// well-formatted messages score zero because they are deliberate, not
+	// emotional outbursts.
+	const prose = stripStructuredContent(trimmed).trim();
+	if (!prose || countNonEmptyLines(prose) >= MAX_PROSE_LINES) {
+		return {
+			chars,
+			words,
+			yelling: 0,
+			profanity: 0,
+			anguish: 0,
+			negation: 0,
+			repetition: 0,
+			blame: 0,
+		};
 	}
+	const anguish =
+		countMatches(prose, DRAMA_RE) +
+		countMatches(prose, ANGUISH_RE) +
+		countMatches(prose, DUDE_RE) +
+		countMatches(prose, ELLIPSIS_RE);
+	const negation = countMatches(prose, NEGATION_LEAD_RE) + countMatches(prose, NEGATION_PHRASE_RE);
+	const repetition = countMatches(prose, REPETITION_RECALL_RE) + countMatches(prose, REPETITION_STILL_RE);
+	const blame = countMatches(prose, BLAME_YOU_RE) + countMatches(prose, BLAME_STOP_RE);
 	return {
-		chars: trimmed.length,
-		words: countMatches(trimmed, WORD_RE),
-		yellingSentences: countYellingSentences(trimmed),
-		profanity: countMatches(trimmed, PROFANITY_RE),
-		dramaRuns: countMatches(trimmed, DRAMA_RE),
+		chars,
+		words,
+		yelling: countYellingSentences(prose),
+		profanity: countMatches(prose, PROFANITY_RE),
+		anguish,
+		negation,
+		repetition,
+		blame,
 	};
 }
@@ -480,7 +677,10 @@ export function computeUserMessageMetrics(text: string): UserMessageMetrics {
 export const EMPTY_USER_METRICS: UserMessageMetrics = Object.freeze({
 	chars: 0,
 	words: 0,
-	yellingSentences: 0,
+	yelling: 0,
 	profanity: 0,
-	dramaRuns: 0,
+	anguish: 0,
+	negation: 0,
+	repetition: 0,
+	blame: 0,
 });