@jungjaehoon/mama-server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,349 @@
1
+ /**
2
+ * MAMA (Memory-Augmented MCP Architecture) - Outcome Tracker
3
+ *
4
+ * Track decision outcomes from user feedback
5
+ * Tasks: 4.1-4.8 (Failure/success indicators, UserPromptSubmit analysis, outcome marking)
6
+ * AC #3: Failure tracking (user feedback → outcome marked, failure_reason extracted, duration calculated)
7
+ *
8
+ * @module outcome-tracker
9
+ * @version 1.0
10
+ * @date 2025-11-14
11
+ */
12
+
13
+ const { info, error: logError } = require('./debug-logger');
14
+ const { getDB, updateDecisionOutcome } = require('./memory-store');
15
+ const { updateConfidence } = require('./decision-tracker');
16
+
17
+ /**
18
+ * Failure indicators
19
+ * Task 4.2: Define failure indicators
20
+ */
21
+ const FAILURE_INDICATORS = [
22
+ /doesn't\s*work/i,
23
+ /failed/i,
24
+ /error/i,
25
+ /slow/i,
26
+ /broken/i,
27
+ /bug/i,
28
+ /wrong/i,
29
+ /not\s*working/i,
30
+ ];
31
+
32
+ /**
33
+ * Success indicators
34
+ * Task 4.3: Define success indicators
35
+ */
36
+ const SUCCESS_INDICATORS = [
37
+ /works/i,
38
+ /perfect/i,
39
+ /great/i,
40
+ /success/i,
41
+ /excellent/i,
42
+ /fast/i,
43
+ /good/i,
44
+ ];
45
+
46
+ /**
47
+ * Partial success indicators
48
+ * Task 4.3: Define partial success indicators
49
+ */
50
+ const PARTIAL_INDICATORS = [
51
+ /okay/i,
52
+ /acceptable/i,
53
+ /improved/i,
54
+ /better/i,
55
+ ];
56
+
57
+ /**
58
+ * Recent decision time window (1 hour in milliseconds)
59
+ * Task 4.5: Only mark outcome if decision is recent (< 1 hour)
60
+ */
61
+ const RECENT_WINDOW_MS = 60 * 60 * 1000; // 1 hour
62
+
63
+ /**
64
+ * Check if message matches failure indicators
65
+ *
66
+ * Task 4.2: Match failure patterns
67
+ *
68
+ * @param {string} message - User message
69
+ * @returns {boolean} True if failure detected
70
+ */
71
+ function matchesFailureIndicators(message) {
72
+ return FAILURE_INDICATORS.some((pattern) => pattern.test(message));
73
+ }
74
+
75
+ /**
76
+ * Check if message matches success indicators
77
+ *
78
+ * Task 4.3: Match success patterns
79
+ *
80
+ * @param {string} message - User message
81
+ * @returns {boolean} True if success detected
82
+ */
83
+ function matchesSuccessIndicators(message) {
84
+ return SUCCESS_INDICATORS.some((pattern) => pattern.test(message));
85
+ }
86
+
87
+ /**
88
+ * Check if message matches partial success indicators
89
+ *
90
+ * Task 4.3: Match partial success patterns
91
+ *
92
+ * @param {string} message - User message
93
+ * @returns {boolean} True if partial success detected
94
+ */
95
+ function matchesPartialIndicators(message) {
96
+ return PARTIAL_INDICATORS.some((pattern) => pattern.test(message));
97
+ }
98
+
99
+ /**
100
+ * Determine outcome from user message
101
+ *
102
+ * Task 4.4, 4.5: Analyze user message for indicators
103
+ * AC #3: Failure tracking from user feedback
104
+ *
105
+ * @param {string} message - User message
106
+ * @returns {string|null} Outcome type ('FAILED', 'SUCCESS', 'PARTIAL') or null
107
+ */
108
+ function analyzeOutcome(message) {
109
+ if (matchesFailureIndicators(message)) {
110
+ return 'FAILED';
111
+ }
112
+
113
+ if (matchesSuccessIndicators(message)) {
114
+ return 'SUCCESS';
115
+ }
116
+
117
+ if (matchesPartialIndicators(message)) {
118
+ return 'PARTIAL';
119
+ }
120
+
121
+ return null; // No clear outcome
122
+ }
123
+
124
+ /**
125
+ * Extract failure reason from user message
126
+ *
127
+ * Task 4.6: Extract failure_reason from user message
128
+ * AC #3: failure_reason extracted
129
+ *
130
+ * Simple extraction: First sentence or first 200 characters
131
+ * Future: Use LLM for better extraction
132
+ *
133
+ * @param {string} message - User message
134
+ * @param {string} outcome - Outcome type
135
+ * @returns {string|null} Failure reason
136
+ */
137
+ function extractFailureReason(message, outcome) {
138
+ if (outcome !== 'FAILED') {
139
+ return null;
140
+ }
141
+
142
+ // Extract first sentence
143
+ const firstSentence = message.split(/[.!?]/)[0].trim();
144
+
145
+ // Limit to 200 characters
146
+ const reason = firstSentence.substring(0, 200);
147
+
148
+ return reason || 'User indicated failure';
149
+ }
150
+
151
+ /**
152
+ * Get recent decision (within 1 hour)
153
+ *
154
+ * Task 4.5: Find recent decision (< 1 hour)
155
+ * AC #3: Recent decision (< 1 hour) marked
156
+ *
157
+ * @param {string} sessionId - Session ID
158
+ * @returns {Object|null} Recent decision or null
159
+ */
160
+ function getRecentDecision(sessionId) {
161
+ const db = getDB();
162
+
163
+ try {
164
+ const now = Date.now();
165
+ const cutoffTime = now - RECENT_WINDOW_MS;
166
+
167
+ const recent = db
168
+ .prepare(
169
+ `
170
+ SELECT * FROM decisions
171
+ WHERE session_id = ?
172
+ AND outcome IS NULL
173
+ AND created_at > ?
174
+ ORDER BY created_at DESC
175
+ LIMIT 1
176
+ `
177
+ )
178
+ .get(sessionId, cutoffTime);
179
+
180
+ return recent || null;
181
+ } catch (error) {
182
+ throw new Error(`Failed to query recent decision: ${error.message}`);
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Calculate duration in days
188
+ *
189
+ * Task 4.7: Calculate duration_days
190
+ * AC #3: duration_days calculated
191
+ *
192
+ * @param {number} createdAt - Decision created timestamp
193
+ * @returns {number} Duration in days
194
+ */
195
+ function calculateDurationDays(createdAt) {
196
+ const now = Date.now();
197
+ const durationMs = now - createdAt;
198
+ const durationDays = durationMs / (1000 * 60 * 60 * 24);
199
+
200
+ // Round to 2 decimal places
201
+ return Math.round(durationDays * 100) / 100;
202
+ }
203
+
204
+ /**
205
+ * Get evidence impact for outcome
206
+ *
207
+ * Task 6: Confidence evolution - Calculate impact based on outcome
208
+ * AC #5: Confidence score calculated based on history
209
+ *
210
+ * @param {string} outcome - Outcome type ('FAILED', 'SUCCESS', 'PARTIAL')
211
+ * @param {number} durationDays - Duration in days
212
+ * @returns {number} Impact on confidence
213
+ */
214
+ function getEvidenceImpact(outcome, durationDays) {
215
+ // Task 6.3: Define evidence impacts
216
+ let impact = 0;
217
+
218
+ switch (outcome) {
219
+ case 'SUCCESS':
220
+ impact = 0.2; // +0.2 for success
221
+ break;
222
+ case 'FAILED':
223
+ impact = -0.3; // -0.3 for failure
224
+ break;
225
+ case 'PARTIAL':
226
+ impact = 0.1; // +0.1 for partial success
227
+ break;
228
+ }
229
+
230
+ // Task 6.3: Temporal stability bonus (30+ days)
231
+ if (outcome === 'SUCCESS' && durationDays >= 30) {
232
+ impact += 0.1; // +0.1 for temporal stability
233
+ }
234
+
235
+ return impact;
236
+ }
237
+
238
+ /**
239
+ * Mark decision outcome
240
+ *
241
+ * Task 4.8: Update decision row with outcome, failure_reason, duration_days
242
+ * Task 6.5: Update confidence when outcome is marked
243
+ * AC #3: Outcome marked with failure_reason and duration_days
244
+ * AC #5: Confidence evolution
245
+ *
246
+ * @param {string} decisionId - Decision ID
247
+ * @param {string} outcome - Outcome type ('FAILED', 'SUCCESS', 'PARTIAL')
248
+ * @param {string} failureReason - Failure reason (if outcome=FAILED)
249
+ * @param {number} durationDays - Duration in days
250
+ */
251
+ function markOutcome(decisionId, outcome, failureReason, durationDays) {
252
+ try {
253
+ // Get current decision to read confidence
254
+ const db = getDB();
255
+ const decision = db.prepare('SELECT * FROM decisions WHERE id = ?').get(decisionId);
256
+
257
+ if (!decision) {
258
+ throw new Error(`Decision not found: ${decisionId}`);
259
+ }
260
+
261
+ // Task 6.5: Calculate new confidence
262
+ const evidenceImpact = getEvidenceImpact(outcome, durationDays);
263
+ const evidence = [{ type: outcome, impact: evidenceImpact }];
264
+ const newConfidence = updateConfidence(decision.confidence, evidence);
265
+
266
+ // Update decision with outcome and new confidence
267
+ updateDecisionOutcome(decisionId, {
268
+ outcome,
269
+ failure_reason: failureReason,
270
+ duration_days: durationDays,
271
+ confidence: newConfidence,
272
+ });
273
+
274
+ info(
275
+ `[MAMA] Confidence updated: ${decision.confidence.toFixed(2)} → ${newConfidence.toFixed(2)} (${outcome})`
276
+ );
277
+ } catch (error) {
278
+ throw new Error(`Failed to mark outcome: ${error.message}`);
279
+ }
280
+ }
281
+
282
+ /**
283
+ * UserPromptSubmit Handler
284
+ *
285
+ * Task 4.4: On UserPromptSubmit, analyze user message for indicators
286
+ * Task 4.5: If matches + recent decision (< 1 hour), mark outcome
287
+ * Task 4.6: Extract failure_reason from user message
288
+ * Task 4.7: Calculate duration_days
289
+ * Task 4.8: Update decision row
290
+ *
291
+ * AC #3: Failure tracking (user feedback → outcome marked)
292
+ *
293
+ * @param {Object} hookContext - Hook context from Claude Code
294
+ * @param {string} hookContext.user_message - User's message
295
+ * @param {string} hookContext.session_id - Session ID
296
+ */
297
+ function onUserPromptSubmit(hookContext) {
298
+ try {
299
+ const userMessage = hookContext.user_message || '';
300
+ const sessionId = hookContext.session_id || '';
301
+
302
+ // Task 4.4: Analyze user message for outcome
303
+ const outcome = analyzeOutcome(userMessage);
304
+
305
+ if (!outcome) {
306
+ // No clear outcome detected
307
+ return;
308
+ }
309
+
310
+ // Task 4.5: Find recent decision (< 1 hour)
311
+ const recentDecision = getRecentDecision(sessionId);
312
+
313
+ if (!recentDecision) {
314
+ // No recent decision to mark
315
+ return;
316
+ }
317
+
318
+ // Task 4.6: Extract failure reason
319
+ const failureReason = extractFailureReason(userMessage, outcome);
320
+
321
+ // Task 4.7: Calculate duration
322
+ const durationDays = calculateDurationDays(recentDecision.created_at);
323
+
324
+ // Task 4.8: Mark outcome
325
+ markOutcome(recentDecision.id, outcome, failureReason, durationDays);
326
+
327
+ info(`[MAMA] Outcome marked: ${recentDecision.id} → ${outcome} (${durationDays} days)`);
328
+ } catch (error) {
329
+ // Log error but don't crash hook
330
+ logError(`[MAMA] Outcome tracking failed: ${error.message}`);
331
+ }
332
+ }
333
+
334
+ // Export API
335
+ module.exports = {
336
+ onUserPromptSubmit,
337
+ analyzeOutcome,
338
+ extractFailureReason,
339
+ getRecentDecision,
340
+ calculateDurationDays,
341
+ markOutcome,
342
+ matchesFailureIndicators,
343
+ matchesSuccessIndicators,
344
+ matchesPartialIndicators,
345
+ FAILURE_INDICATORS,
346
+ SUCCESS_INDICATORS,
347
+ PARTIAL_INDICATORS,
348
+ RECENT_WINDOW_MS,
349
+ };
@@ -0,0 +1,236 @@
1
+ /**
2
+ * MAMA (Memory-Augmented MCP Architecture) - Query Intent Analysis
3
+ *
4
+ * Analyzes user queries to detect decision-related intent using EXAONE 3.5
5
+ * Tasks: 2.1-2.8 (LLM intent analysis with fallback chain)
6
+ * AC #1: Query intent analysis within 100ms
7
+ * AC #5: LLM fallback (EXAONE → Gemma → Qwen)
8
+ *
9
+ * @module query-intent
10
+ * @version 1.0
11
+ * @date 2025-11-14
12
+ */
13
+
14
+ const { info, error: logError } = require('./debug-logger');
15
+ const { generate, DEFAULT_MODEL, FALLBACK_MODEL } = require('./ollama-client');
16
+
17
+ /**
18
+ * Analyze user message for decision-related intent
19
+ *
20
+ * Task 2.1-2.5: LLM intent analysis
21
+ * AC #1: Detect if query involves decisions
22
+ * AC #5: Fallback chain implemented
23
+ *
24
+ * @param {string} userMessage - User's message to analyze
25
+ * @param {Object} options - Analysis options
26
+ * @param {number} options.timeout - Timeout in ms (default: 100ms)
27
+ * @param {number} options.threshold - Minimum confidence (default: 0.6)
28
+ * @returns {Promise<Object>} Intent analysis result
29
+ */
30
+ async function analyzeIntent(userMessage, options = {}) {
31
+ const {
32
+ timeout = 5000, // Increased: LLM needs time, user accepts longer thinking
33
+ threshold = 0.6,
34
+ } = options;
35
+
36
+ const startTime = Date.now();
37
+
38
+ try {
39
+ // Task 2.2: Build prompt for decision-making analysis
40
+ const prompt = `
41
+ Analyze if this query involves decision-making or past choices:
42
+
43
+ User Message: "${userMessage}"
44
+
45
+ Decision Indicators:
46
+ 1. References to past decisions ("we chose X", "last time we did Y")
47
+ 2. Questions about previous approaches ("why did we use X?")
48
+ 3. Decision evolution queries ("should we change from X to Y?")
49
+ 4. Architecture/strategy questions
50
+ 5. Method/approach questions ("how do I...", "what's the way to...")
51
+ 6. Best practice questions ("what should I use for...", "which one should I use...")
52
+
53
+ Return JSON with "topic" as a short snake_case identifier (e.g., "mesh_structure", "database_choice", "auth_strategy", "coding_style", "error_handling"):
54
+ {
55
+ "involves_decision": boolean,
56
+ "topic": string or null (extract main technical topic in snake_case),
57
+ "confidence": 0.0-1.0,
58
+ "reasoning": "brief explanation"
59
+ }
60
+
61
+ IMPORTANT: Generate "topic" freely based on the message content. Do NOT limit to predefined values.
62
+
63
+ Examples:
64
+ - "Why did we choose COMPLEX mesh structure?" → {"involves_decision": true, "topic": "mesh_structure", "confidence": 0.9}
65
+ - "Let's use PostgreSQL for database" → {"involves_decision": true, "topic": "database_choice", "confidence": 0.9}
66
+ - "How should we store workflow data?" → {"involves_decision": true, "topic": "workflow_storage", "confidence": 0.85}
67
+ - "Read the file please" → {"involves_decision": false, "topic": null, "confidence": 0.1}
68
+ `.trim();
69
+
70
+ // Task 2.3: Call EXAONE 3.5 with Tier 1 fallback
71
+ const result = await generateWithFallback(prompt, {
72
+ format: 'json',
73
+ temperature: 0.3,
74
+ max_tokens: 200,
75
+ timeout,
76
+ });
77
+
78
+ const latency = Date.now() - startTime;
79
+
80
+ // Task 2.4: Parse response
81
+ const parsed = typeof result === 'string' ? JSON.parse(result) : result;
82
+
83
+ // Task 2.5: Threshold check
84
+ const meetsThreshold = parsed.confidence >= threshold;
85
+
86
+ if (!meetsThreshold) {
87
+ info(`[MAMA] Intent confidence ${parsed.confidence} below threshold ${threshold}`);
88
+ return {
89
+ involves_decision: false,
90
+ topic: null,
91
+ confidence: parsed.confidence,
92
+ reasoning: 'Confidence below threshold',
93
+ };
94
+ }
95
+
96
+ return parsed;
97
+ } catch (error) {
98
+ // CLAUDE.md Rule #1: NO FALLBACK
99
+ // Errors must be thrown for debugging
100
+ logError(`[MAMA] Intent analysis FAILED: ${error.message}`);
101
+ throw new Error(`Intent analysis failed: ${error.message}`);
102
+ }
103
+ }
104
+
105
+ /**
106
+ * Generate with tiered fallback chain
107
+ *
108
+ * Task 2.6-2.7: Implement fallback to Gemma 2B and Qwen 3B
109
+ * AC #5: LLM fallback works
110
+ *
111
+ * @param {string} prompt - LLM prompt
112
+ * @param {Object} options - Generation options
113
+ * @returns {Promise<Object|string>} LLM response
114
+ */
115
+ async function generateWithFallback(prompt, options = {}) {
116
+ const models = [
117
+ DEFAULT_MODEL, // Tier 1: EXAONE 3.5 (2.4B)
118
+ FALLBACK_MODEL, // Tier 2: Gemma 2B
119
+ 'qwen:3b', // Tier 3: Qwen 3B
120
+ ];
121
+
122
+ for (let i = 0; i < models.length; i++) {
123
+ const model = models[i];
124
+
125
+ try {
126
+ info(`[MAMA] Trying ${model}...`);
127
+
128
+ const result = await generate(prompt, {
129
+ ...options,
130
+ model,
131
+ });
132
+
133
+ info(`[MAMA] ${model} succeeded`);
134
+ return result;
135
+ } catch (error) {
136
+ console.warn(`[MAMA] ${model} failed: ${error.message}`);
137
+
138
+ // Continue to next tier
139
+ if (i === models.length - 1) {
140
+ // All tiers failed
141
+ throw new Error(`All LLM tiers failed. Last error: ${error.message}`);
142
+ }
143
+ }
144
+ }
145
+ }
146
+
147
+ /**
148
+ * Extract topic keywords from user message (fallback method)
149
+ *
150
+ * Task 2.8: Keyword-based fallback when all LLMs fail
151
+ * Simple regex matching for common topics
152
+ *
153
+ * @param {string} userMessage - User's message
154
+ * @returns {Object} Topic detection result
155
+ */
156
+ function extractTopicKeywords(userMessage) {
157
+ const topicPatterns = {
158
+ workflow_storage: /workflow|save|persist/i,
159
+ mesh_structure: /mesh|structure/i,
160
+ authentication: /auth|jwt|oauth|login/i,
161
+ testing: /test|jest|spec/i,
162
+ architecture: /architecture|design/i,
163
+ coding_style: /style|format|coding/i,
164
+ };
165
+
166
+ for (const [topic, pattern] of Object.entries(topicPatterns)) {
167
+ if (pattern.test(userMessage)) {
168
+ return {
169
+ involves_decision: true,
170
+ topic,
171
+ confidence: 0.5, // Lower confidence for keyword matching
172
+ reasoning: 'Keyword-based detection (LLM fallback)',
173
+ };
174
+ }
175
+ }
176
+
177
+ return {
178
+ involves_decision: false,
179
+ topic: null,
180
+ confidence: 0.0,
181
+ reasoning: 'No topic keywords found',
182
+ };
183
+ }
184
+
185
+ // Export API
186
+ module.exports = {
187
+ analyzeIntent,
188
+ extractTopicKeywords,
189
+ };
190
+
191
+ // CLI execution for testing
192
+ if (require.main === module) {
193
+ info('🧠 MAMA Query Intent Analysis - Test\n');
194
+
195
+ // Task 2.8: Test intent detection accuracy
196
+ (async () => {
197
+ const testQueries = [
198
+ {
199
+ message: 'Why did we choose COMPLEX mesh structure?',
200
+ expected: { involves_decision: true, topic: 'mesh_structure' },
201
+ },
202
+ {
203
+ message: 'Read the file please',
204
+ expected: { involves_decision: false },
205
+ },
206
+ {
207
+ message: 'We chose JWT for authentication, remember?',
208
+ expected: { involves_decision: true, topic: 'authentication' },
209
+ },
210
+ ];
211
+
212
+ for (const test of testQueries) {
213
+ info(`📋 Testing: "${test.message}"`);
214
+
215
+ try {
216
+ const result = await analyzeIntent(test.message);
217
+ info('✅ Result:', result);
218
+
219
+ // Verify expectations
220
+ if (result.involves_decision === test.expected.involves_decision) {
221
+ info(' ✓ Decision detection matches');
222
+ } else {
223
+ info(' ✗ Decision detection MISMATCH');
224
+ }
225
+
226
+ info('');
227
+ } catch (error) {
228
+ logError(`❌ Error: ${error.message}\n`);
229
+ }
230
+ }
231
+
232
+ info('═══════════════════════════');
233
+ info('✅ Intent analysis tests complete');
234
+ info('═══════════════════════════');
235
+ })();
236
+ }