psyche-ai 5.0.0 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/classify.js CHANGED
@@ -3,7 +3,159 @@
3
3
  //
4
4
  // Closes the loop: instead of asking the LLM to self-classify,
5
5
  // we pre-classify the user's message and pre-compute chemistry.
6
+ //
7
+ // v2: Enhanced multi-signal scoring — weighted sentiment words,
8
+ // emoji analysis, structural features, and contextual priming.
9
+ // Pure computation, no LLM calls.
6
10
  // ============================================================
11
+ // ── Sentiment word sets (loaded once at module parse) ────────
12
+ const POSITIVE_WORDS = new Set([
13
+ "开心", "快乐", "幸福", "满意", "期待", "兴奋", "感动", "温暖", "喜欢", "棒", "厉害", "佩服", "优秀", "了不起",
14
+ "happy", "glad", "love", "wonderful", "enjoy", "grateful", "excited", "awesome", "great", "amazing", "beautiful",
15
+ ]);
16
+ const NEGATIVE_WORDS = new Set([
17
+ "难过", "痛苦", "失望", "沮丧", "愤怒", "烦", "讨厌", "害怕", "无奈", "累", "焦虑", "压力", "崩溃", "绝望",
18
+ "sad", "angry", "frustrated", "disappointed", "hurt", "afraid", "worried", "tired", "stressed", "anxious",
19
+ ]);
20
+ const INTIMATE_WORDS = new Set([
21
+ "想你", "陪", "在乎", "珍惜", "温柔", "拥抱", "信任", "安全感", "依赖",
22
+ "miss", "care", "feel", "heart", "close", "together", "trust", "comfort",
23
+ ]);
24
+ /** Words that are ambiguous and context-dependent */
25
+ const AMBIGUOUS_SARCASM_WORDS = new Set([
26
+ "呵呵", "嗯嗯", "哦", "好吧", "随便", "都行", "行吧",
27
+ "ok", "fine", "whatever", "sure",
28
+ ]);
29
+ // ── Emoji sets ───────────────────────────────────────────────
30
+ const POSITIVE_EMOJI = /😊|😄|❤️|👍|🎉|😃|🥰|💕|✨|🌟|💪|😁|🤗|💖|😍/;
31
+ const NEGATIVE_EMOJI = /😢|😭|😡|💔|😰|😞|😔|🥺|😩|😣|😤|😨|😱|🤮|💀/;
32
+ // ── Helpers ──────────────────────────────────────────────────
33
+ /**
34
+ * Tokenize text by splitting on whitespace and extracting individual
35
+ * Chinese characters. Returns lowercase tokens for matching.
36
+ */
37
+ function tokenize(text) {
38
+ const tokens = [];
39
+ // Split on whitespace first
40
+ const parts = text.toLowerCase().split(/\s+/).filter(Boolean);
41
+ for (const part of parts) {
42
+ // For each part, extract Chinese character runs and non-Chinese runs
43
+ const segments = part.match(/[\u4e00-\u9fff]+|[a-z]+/g);
44
+ if (!segments)
45
+ continue;
46
+ for (const seg of segments) {
47
+ if (/[\u4e00-\u9fff]/.test(seg)) {
48
+ // Chinese: check both individual chars and bigrams (for 2-char words)
49
+ for (let i = 0; i < seg.length; i++) {
50
+ tokens.push(seg[i]);
51
+ if (i + 1 < seg.length) {
52
+ tokens.push(seg[i] + seg[i + 1]);
53
+ }
54
+ }
55
+ }
56
+ else {
57
+ tokens.push(seg);
58
+ }
59
+ }
60
+ }
61
+ return tokens;
62
+ }
63
+ /**
64
+ * Score sentiment by counting hits in positive/negative/intimate word sets.
65
+ * Returns normalized counts (0-1 range).
66
+ */
67
+ export function scoreSentiment(text) {
68
+ const tokens = tokenize(text);
69
+ if (tokens.length === 0)
70
+ return { positive: 0, negative: 0, intimate: 0 };
71
+ let positive = 0;
72
+ let negative = 0;
73
+ let intimate = 0;
74
+ for (const token of tokens) {
75
+ if (POSITIVE_WORDS.has(token))
76
+ positive++;
77
+ if (NEGATIVE_WORDS.has(token))
78
+ negative++;
79
+ if (INTIMATE_WORDS.has(token))
80
+ intimate++;
81
+ }
82
+ // Normalize: cap at 1.0, scale so 1 hit already gives a meaningful signal
83
+ const norm = (count) => Math.min(1, count / 3);
84
+ return { positive: norm(positive), negative: norm(negative), intimate: norm(intimate) };
85
+ }
86
+ /**
87
+ * Score emoji sentiment. Returns -1 (all negative) to +1 (all positive).
88
+ * Returns 0 if no emoji detected.
89
+ */
90
+ export function scoreEmoji(text) {
91
+ const posMatches = text.match(new RegExp(POSITIVE_EMOJI.source, "g")) || [];
92
+ const negMatches = text.match(new RegExp(NEGATIVE_EMOJI.source, "g")) || [];
93
+ const total = posMatches.length + negMatches.length;
94
+ if (total === 0)
95
+ return 0;
96
+ return (posMatches.length - negMatches.length) / total;
97
+ }
98
+ /**
99
+ * Detect sarcasm signals: surface-positive words combined with contextual negativity.
100
+ * Returns a score 0-1 indicating sarcasm likelihood.
101
+ */
102
+ export function detectSarcasmSignals(text, recentStimuli) {
103
+ if (text.length === 0)
104
+ return 0;
105
+ let score = 0;
106
+ const lower = text.toLowerCase();
107
+ // Chinese sarcasm patterns: surface praise + particles + short length
108
+ const zhSarcasmPatterns = [
109
+ /你真(行|棒|厉害|了不起|牛|强|能|可以)(啊|呀|哦|嘛|吧|呢)?/,
110
+ /厉害了/,
111
+ /好好好/,
112
+ /行行行/,
113
+ /是是是/,
114
+ /对对对/,
115
+ /了不起/,
116
+ /牛[啊逼]?$/,
117
+ /可以[的啊]?$/,
118
+ /哦[。?]?$/,
119
+ /呵呵/,
120
+ /嗯嗯[。]?$/,
121
+ /随[你便]/,
122
+ /爱[咋怎]咋[地的]?/,
123
+ ];
124
+ for (const pattern of zhSarcasmPatterns) {
125
+ if (pattern.test(lower)) {
126
+ score += 0.3;
127
+ }
128
+ }
129
+ // English sarcasm patterns
130
+ const enSarcasmPatterns = [
131
+ /oh really/i, /sure thing/i, /yeah right/i, /wow.{0,5}amazing/i,
132
+ /good for you/i, /how nice/i, /whatever you say/i,
133
+ ];
134
+ for (const pattern of enSarcasmPatterns) {
135
+ if (pattern.test(text)) {
136
+ score += 0.3;
137
+ }
138
+ }
139
+ // Short message + praise words = likely sarcasm
140
+ if (text.length < 15) {
141
+ const hasPraiseWord = /棒|厉害|了不起|牛|great|amazing|wonderful|brilliant/i.test(text);
142
+ if (hasPraiseWord)
143
+ score += 0.15;
144
+ }
145
+ // Context: if recent interactions were negative, surface praise is more likely sarcasm
146
+ if (recentStimuli && recentStimuli.length > 0) {
147
+ const negativeTypes = ["criticism", "conflict", "sarcasm", "authority"];
148
+ const recentNegative = recentStimuli.filter((s) => s && negativeTypes.includes(s)).length;
149
+ if (recentNegative >= 1) {
150
+ score += 0.2;
151
+ }
152
+ }
153
+ return Math.min(1, score);
154
+ }
155
+ /** Negative stimulus types for contextual priming */
156
+ const NEGATIVE_TYPES = new Set([
157
+ "criticism", "conflict", "neglect", "vulnerability", "sarcasm",
158
+ ]);
7
159
  const RULES = [
8
160
  {
9
161
  type: "praise",
@@ -154,9 +306,17 @@ const RULES = [
154
306
  * Classify the stimulus type(s) of a user message.
155
307
  * Returns all detected types sorted by confidence, highest first.
156
308
  * Falls back to "casual" if nothing matches.
309
+ *
310
+ * v2: When keyword rules miss (confidence < 0.5), a weighted multi-signal
311
+ * scoring system combines sentiment words, emoji, structural features,
312
+ * and optional contextual priming to produce better classifications for
313
+ * everyday messages.
314
+ *
315
+ * @param text The user's message text
316
+ * @param recentStimuli Optional recent stimulus history for contextual priming
157
317
  */
158
- export function classifyStimulus(text) {
159
- const results = [];
318
+ export function classifyStimulus(text, recentStimuli, recentMessages) {
319
+ let results = [];
160
320
  for (const rule of RULES) {
161
321
  let matchCount = 0;
162
322
  for (const pattern of rule.patterns) {
@@ -169,8 +329,9 @@ export function classifyStimulus(text) {
169
329
  results.push({ type: rule.type, confidence });
170
330
  }
171
331
  }
332
+ // If keyword rules produced a high-confidence match, boost with structural signals and return
333
+ const bestKeywordConfidence = results.length > 0 ? Math.max(...results.map(r => r.confidence)) : 0;
172
334
  // ── Structural signals (message-level features) ──
173
- // When keywords miss, message shape still carries meaning.
174
335
  const len = text.length;
175
336
  const hasI = /我/.test(text) || /\bI\b/i.test(text);
176
337
  const hasYou = /你/.test(text) || /\byou\b/i.test(text);
@@ -180,70 +341,195 @@ export function classifyStimulus(text) {
180
341
  const hasLaughter = /[2]{3,}|hhh|www|哈{2,}/i.test(text);
181
342
  const hasSharing = /我[今昨前]天|我刚[才刚]|我最近/.test(text);
182
343
  const sentenceCount = text.split(/[。!?!?.…]+/).filter(Boolean).length;
183
- if (results.length === 0) {
184
- // No keyword matched — use structural fallback
185
- if (len === 0) {
186
- // Empty input neutral
187
- results.push({ type: "casual", confidence: 0.3 });
188
- }
189
- else if (hasLaughter) {
190
- // Internet laughter not caught by keywords (e.g. 233333)
191
- results.push({ type: "humor", confidence: 0.65 });
192
- }
193
- else if (exclamationCount >= 2) {
194
- // Emphatic expression → surprise/excitement
195
- results.push({ type: "surprise", confidence: 0.55 });
196
- }
197
- else if (len <= 4 && !hasQuestion) {
198
- // Ultra-short non-question: "好" "行" "哦" — neglect-like
199
- results.push({ type: "neglect", confidence: 0.45 });
344
+ if (bestKeywordConfidence >= 0.5) {
345
+ // Keywords matched with good confidence — structural features can boost
346
+ if (hasI && len > 30 && results[0].confidence < 0.8) {
347
+ results[0].confidence = Math.min(0.9, results[0].confidence + 0.1);
200
348
  }
201
- else if (hasI && hasEllipsis) {
202
- // Personal + trailing off: "我觉得...有点难" — vulnerability
203
- results.push({ type: "vulnerability", confidence: 0.55 });
349
+ if (exclamationCount >= 2 && results[0].confidence < 0.85) {
350
+ results[0].confidence = Math.min(0.9, results[0].confidence + 0.05);
204
351
  }
205
- else if (hasSharing && len > 20) {
206
- // Sharing personal experience higher engagement signal
207
- results.push({ type: "casual", confidence: 0.65 });
352
+ results.sort((a, b) => b.confidence - a.confidence);
353
+ // Sarcasm reclassification: if primary looks like praise but sarcasm signals are strong
354
+ if (results.length > 0 && results[0].type === "praise") {
355
+ const sarcasmScore = detectSarcasmSignals(text, recentStimuli);
356
+ if (sarcasmScore >= 0.4) {
357
+ // Reclassify: replace praise with sarcasm
358
+ results = results.filter((r) => r.type !== "praise");
359
+ results.unshift({ type: "sarcasm", confidence: Math.min(0.9, sarcasmScore) });
360
+ }
208
361
  }
209
- else if (hasI && len > 8) {
210
- // Personal sharing (any meaningful length) — engagement signal
211
- results.push({ type: "casual", confidence: 0.55 });
362
+ // Ambiguous words: default to sarcasm unless context is positive
363
+ const lowerText = text.trim().toLowerCase();
364
+ if (AMBIGUOUS_SARCASM_WORDS.has(lowerText) || AMBIGUOUS_SARCASM_WORDS.has(text.trim())) {
365
+ const hasPositiveContext = recentStimuli?.some((s) => s && ["praise", "validation", "humor", "intimacy", "casual"].includes(s));
366
+ if (!hasPositiveContext) {
367
+ // Default ambiguous to sarcasm/cold
368
+ results = [{ type: "sarcasm", confidence: 0.6 }];
369
+ }
212
370
  }
213
- else if (hasQuestion && hasYou) {
214
- // Asking about the agent specifically intellectual curiosity
215
- results.push({ type: "intellectual", confidence: 0.5 });
371
+ // Contextual contrast: if previous messages were negative and this one is surface-positive
372
+ if (recentMessages && recentMessages.length > 0 && results.length > 0 && results[0].type === "praise") {
373
+ const lastMsg = recentMessages[recentMessages.length - 1];
374
+ const lastSentiment = scoreSentiment(lastMsg);
375
+ if (lastSentiment.negative > 0.3) {
376
+ // Previous message was negative, current is praise → likely sarcasm
377
+ const sarcasmBoost = detectSarcasmSignals(text, recentStimuli);
378
+ if (sarcasmBoost > 0.2) {
379
+ results = results.filter((r) => r.type !== "praise");
380
+ results.unshift({ type: "sarcasm", confidence: 0.7 });
381
+ }
382
+ }
216
383
  }
217
- else if (hasQuestion) {
218
- // Any question — intellectual curiosity or casual
219
- results.push({ type: "casual", confidence: 0.55 });
384
+ return results;
385
+ }
386
+ // ── Enhanced multi-signal scoring (fallback path) ─────────
387
+ // No keyword rule matched with confidence >= 0.5.
388
+ // Build a score map across all stimulus types using weighted signals.
389
+ if (len === 0) {
390
+ return [{ type: "casual", confidence: 0.3 }];
391
+ }
392
+ const scores = {};
393
+ const addScore = (type, delta) => {
394
+ scores[type] = (scores[type] ?? 0) + delta;
395
+ };
396
+ // ── Signal 1: Sentiment words (weight: up to ~0.65) ──
397
+ // A single word hit gives normalized ~0.33; multiplier must be high enough
398
+ // so one word + structural signals can cross the 0.35 threshold.
399
+ // Short messages get a density boost — when there are few words, each
400
+ // sentiment word carries proportionally more meaning.
401
+ const sentiment = scoreSentiment(text);
402
+ const densityBoost = len <= 15 ? 1.4 : 1.0;
403
+ if (sentiment.positive > 0) {
404
+ addScore("praise", sentiment.positive * 0.55 * densityBoost);
405
+ addScore("validation", sentiment.positive * 0.35 * densityBoost);
406
+ }
407
+ if (sentiment.negative > 0) {
408
+ addScore("vulnerability", sentiment.negative * 0.55 * densityBoost);
409
+ addScore("criticism", sentiment.negative * 0.25 * densityBoost);
410
+ }
411
+ if (sentiment.intimate > 0) {
412
+ addScore("intimacy", sentiment.intimate * 0.55 * densityBoost);
413
+ addScore("validation", sentiment.intimate * 0.15 * densityBoost);
414
+ }
415
+ // Personal pronoun + sentiment = stronger emotional expression
416
+ if (hasI && (sentiment.positive > 0 || sentiment.negative > 0 || sentiment.intimate > 0)) {
417
+ const maxSentiment = Math.max(sentiment.positive, sentiment.negative, sentiment.intimate);
418
+ if (sentiment.positive === maxSentiment)
419
+ addScore("praise", 0.10);
420
+ if (sentiment.negative === maxSentiment)
421
+ addScore("vulnerability", 0.10);
422
+ if (sentiment.intimate === maxSentiment)
423
+ addScore("intimacy", 0.10);
424
+ }
425
+ // ── Signal 2: Emoji sentiment (weight: up to 0.25) ──
426
+ const emojiScore = scoreEmoji(text);
427
+ if (emojiScore > 0) {
428
+ addScore("praise", emojiScore * 0.20);
429
+ addScore("humor", emojiScore * 0.15);
430
+ }
431
+ else if (emojiScore < 0) {
432
+ addScore("vulnerability", Math.abs(emojiScore) * 0.25);
433
+ addScore("neglect", Math.abs(emojiScore) * 0.15);
434
+ }
435
+ // Emoji-only messages: if text is entirely emoji (no alphanumeric/CJK), boost
436
+ const strippedText = text.replace(/[\s\p{Emoji_Presentation}\p{Emoji}\uFE0F\u200D]/gu, "").trim();
437
+ if (strippedText.length === 0 && len > 0) {
438
+ // Pure emoji message — amplify emoji signal
439
+ if (emojiScore < 0) {
440
+ addScore("vulnerability", 0.30);
441
+ addScore("neglect", 0.20);
220
442
  }
221
- else if (len > 50 && sentenceCount >= 3) {
222
- // Long multi-sentence without keywords → engaged storytelling
223
- results.push({ type: "casual", confidence: 0.6 });
443
+ else if (emojiScore > 0) {
444
+ addScore("praise", 0.25);
445
+ addScore("humor", 0.20);
224
446
  }
225
- else {
226
- results.push({ type: "casual", confidence: 0.3 });
447
+ }
448
+ // ── Signal 3: Structural features (additive, weight: 0.05-0.20 each) ──
449
+ if (hasLaughter) {
450
+ addScore("humor", 0.35);
451
+ }
452
+ if (exclamationCount >= 2) {
453
+ addScore("surprise", 0.25);
454
+ }
455
+ else if (exclamationCount === 1) {
456
+ addScore("surprise", 0.08);
457
+ }
458
+ if (hasEllipsis) {
459
+ addScore("vulnerability", 0.12);
460
+ addScore("neglect", 0.05);
461
+ }
462
+ if (hasI && hasEllipsis) {
463
+ addScore("vulnerability", 0.15);
464
+ }
465
+ if (hasQuestion && hasYou) {
466
+ addScore("intellectual", 0.20);
467
+ }
468
+ else if (hasQuestion) {
469
+ addScore("intellectual", 0.12);
470
+ addScore("casual", 0.10);
471
+ }
472
+ if (hasSharing && len > 20) {
473
+ addScore("casual", 0.20);
474
+ }
475
+ if (hasI && len > 8) {
476
+ addScore("casual", 0.10);
477
+ }
478
+ if (len > 50 && sentenceCount >= 3) {
479
+ addScore("casual", 0.15);
480
+ }
481
+ // Ultra-short non-question messages (e.g. "嗯", "好", "行")
482
+ if (len <= 4 && !hasQuestion) {
483
+ addScore("neglect", 0.20);
484
+ addScore("casual", 0.10);
485
+ }
486
+ // ── Signal 4: Low-confidence keyword matches contribute to scores ──
487
+ // If keyword rules matched but below 0.5, fold their signal in
488
+ for (const r of results) {
489
+ addScore(r.type, r.confidence * 0.5);
490
+ }
491
+ // ── Signal 5: Contextual priming from recent stimuli ──
492
+ if (recentStimuli && recentStimuli.length > 0) {
493
+ const recentNonNull = recentStimuli.filter((s) => s !== null);
494
+ if (recentNonNull.length > 0) {
495
+ const negCount = recentNonNull.filter(s => NEGATIVE_TYPES.has(s)).length;
496
+ const negRatio = negCount / recentNonNull.length;
497
+ // If recent context is mostly negative (>= 50%), give a small bonus to negative types
498
+ if (negRatio >= 0.5) {
499
+ const bonus = 0.05 + negRatio * 0.05; // 0.075-0.1
500
+ addScore("vulnerability", bonus);
501
+ addScore("criticism", bonus * 0.6);
502
+ addScore("neglect", bonus * 0.5);
503
+ }
227
504
  }
228
505
  }
229
- else {
230
- // Keywords matched — structural features can boost confidence
231
- if (hasI && len > 30 && results[0].confidence < 0.8) {
232
- // Long personal message boosts the primary match slightly
233
- results[0].confidence = Math.min(0.9, results[0].confidence + 0.1);
506
+ // ── Pick the best scoring type ──
507
+ const THRESHOLD = 0.35;
508
+ const scoredResults = [];
509
+ for (const [type, score] of Object.entries(scores)) {
510
+ if (score >= THRESHOLD) {
511
+ scoredResults.push({ type, confidence: Math.min(0.85, score) });
234
512
  }
235
- if (exclamationCount >= 2 && results[0].confidence < 0.85) {
236
- // Emphasis boosts conviction
237
- results[0].confidence = Math.min(0.9, results[0].confidence + 0.05);
513
+ }
514
+ if (scoredResults.length > 0) {
515
+ scoredResults.sort((a, b) => b.confidence - a.confidence);
516
+ // Sarcasm reclassification: if primary looks like praise but sarcasm signals are strong
517
+ if (scoredResults[0].type === "praise") {
518
+ const sarcasmScore = detectSarcasmSignals(text, recentStimuli);
519
+ if (sarcasmScore >= 0.4) {
520
+ const filtered = scoredResults.filter((r) => r.type !== "praise");
521
+ filtered.unshift({ type: "sarcasm", confidence: Math.min(0.85, sarcasmScore) });
522
+ return filtered;
523
+ }
238
524
  }
525
+ return scoredResults;
239
526
  }
240
- // Sort by confidence descending
241
- results.sort((a, b) => b.confidence - a.confidence);
242
- return results;
527
+ // Nothing scored above threshold — fall back to casual with 0.3
528
+ return [{ type: "casual", confidence: 0.3 }];
243
529
  }
244
530
  /**
245
531
  * Get the primary (highest confidence) stimulus type.
246
532
  */
247
- export function getPrimaryStimulus(text) {
248
- return classifyStimulus(text)[0].type;
533
+ export function getPrimaryStimulus(text, recentStimuli) {
534
+ return classifyStimulus(text, recentStimuli)[0].type;
249
535
  }