@equilateral_ai/mindmeld 3.4.0 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1103 @@
1
+ /**
2
+ * MindMeld MCP Core — Shared business logic for MCP handlers
3
+ *
4
+ * Extracted from mindmeldMcpHandler.js so both the API Gateway handler
5
+ * and the Lambda Function URL streaming handler can share the same
6
+ * JSON-RPC router, auth, tool implementations, and scoring.
7
+ *
8
+ * Auth: X-MindMeld-Token header OR Authorization: Bearer token
9
+ */
10
+
11
+ const { executeQuery } = require('./dbOperations');
12
+ const crypto = require('crypto');
13
+ const { BedrockRuntimeClient, InvokeModelCommand } = require('@aws-sdk/client-bedrock-runtime');
14
+
15
+ let _bedrockClient = null;
16
+ function getBedrockClient() {
17
+ if (!_bedrockClient) {
18
+ _bedrockClient = new BedrockRuntimeClient({ region: 'us-east-2' });
19
+ }
20
+ return _bedrockClient;
21
+ }
22
+
23
+ const SERVER_INFO = {
24
+ name: 'mindmeld',
25
+ version: '0.2.0',
26
+ description: 'Standards injection and governance for AI-assisted development'
27
+ };
28
+
29
+ const PROTOCOL_VERSION = '2025-03-26';
30
+
31
+ const CORS_HEADERS = {
32
+ 'Access-Control-Allow-Origin': '*',
33
+ 'Access-Control-Allow-Methods': 'POST, GET, DELETE, OPTIONS',
34
+ 'Access-Control-Allow-Headers': 'Content-Type, Accept, Mcp-Session-Id, X-MindMeld-Token, Authorization',
35
+ };
36
+
37
+ // ============================================================
38
+ // Category Weights (same as standardsRelevantPost.js)
39
+ // ============================================================
40
+
41
+ const CATEGORY_WEIGHTS = {
42
+ // Code standard categories
43
+ 'serverless-saas-aws': 1.0,
44
+ 'frontend-development': 1.0,
45
+ 'database': 0.9,
46
+ 'backend': 0.9,
47
+ 'compliance-security': 0.9,
48
+ 'deployment': 0.8,
49
+ 'testing': 0.7,
50
+ 'real-time-systems': 0.7,
51
+ 'well-architected': 0.7,
52
+ 'cost-optimization': 0.7,
53
+ 'multi-agent-orchestration': 0.1,
54
+ // Business domains
55
+ 'ip-strategy': 0.6,
56
+ 'architecture-decisions': 0.8,
57
+ 'go-to-market': 0.6,
58
+ 'operations': 0.5,
59
+ 'legal-process': 0.5,
60
+ 'finance': 0.5,
61
+ 'communication': 0.4,
62
+ 'product-strategy': 0.6,
63
+ 'investor-relations': 0.4,
64
+ };
65
+
66
+ // ============================================================
67
+ // Tool Definitions
68
+ // ============================================================
69
+
70
+ const TOOLS = [
71
+ {
72
+ name: 'mindmeld_init_session',
73
+ description: 'Initialize a MindMeld standards injection session. Scans project context, identifies relevant standards, returns injected rules and session token.',
74
+ inputSchema: {
75
+ type: 'object',
76
+ properties: {
77
+ project_path: { type: 'string', description: 'Absolute path to the project root' },
78
+ task_description: { type: 'string', description: 'Optional: what the developer intends to work on this session' },
79
+ team_id: { type: 'string', description: 'Team identifier for corpus lookup. Uses personal corpus if omitted.' }
80
+ },
81
+ required: ['project_path']
82
+ }
83
+ },
84
+ {
85
+ name: 'mindmeld_record_correction',
86
+ description: 'Record a correction to AI output. Feeds the standards maturity pipeline. Corrections drive pattern detection and eventually promote to Provisional standards.',
87
+ inputSchema: {
88
+ type: 'object',
89
+ properties: {
90
+ session_id: { type: 'string', description: 'Session token from mindmeld_init_session' },
91
+ original_output: { type: 'string', description: 'What the AI generated' },
92
+ corrected_output: { type: 'string', description: 'What the developer changed it to' },
93
+ correction_note: { type: 'string', description: 'Optional: developer explanation of why the correction was made' },
94
+ file_context: { type: 'string', description: 'Optional: filename or path where correction occurred' }
95
+ },
96
+ required: ['session_id', 'original_output', 'corrected_output']
97
+ }
98
+ },
99
+ {
100
+ name: 'mindmeld_get_standards',
101
+ description: 'On-demand lookup for specific standards or maturity status. Use for UI display, not injection — mindmeld_init_session handles injection.',
102
+ inputSchema: {
103
+ type: 'object',
104
+ properties: {
105
+ team_id: { type: 'string', description: 'Team identifier (optional, resolved from token)' },
106
+ filter: {
107
+ type: 'object',
108
+ properties: {
109
+ maturity: { type: 'array', items: { type: 'string' }, description: 'Filter by maturity: provisional, solidified, reinforced' },
110
+ content_type: { type: 'string', enum: ['code_standard', 'business_invariant'], description: 'Filter by content type. Omit for all.' },
111
+ domain: { type: 'string', description: 'Filter by domain (e.g., "ip-strategy", "architecture-decisions"). Omit for all.' },
112
+ source: { type: 'string', description: 'Filter by corpus source (e.g., "equilateral-standards", "mcp-extraction", "nist-800-53"). Omit for all.' },
113
+ standard_name: { type: 'string', description: 'Filter by standard name (partial match)' },
114
+ limit: { type: 'integer', description: 'Max results (default 20)' }
115
+ }
116
+ }
117
+ }
118
+ }
119
+ },
120
+ {
121
+ name: 'mindmeld_ingest_raw_session',
122
+ description: 'Extract business invariants from a raw conversation transcript using LLM analysis. Returns invariant candidates in BUSINESS-SCHEMA shape with confidence scores. Use dry_run=true to preview before committing.',
123
+ inputSchema: {
124
+ type: 'object',
125
+ properties: {
126
+ session_text: {
127
+ type: 'string',
128
+ description: 'Raw conversation transcript to extract invariants from'
129
+ },
130
+ source_label: {
131
+ type: 'string',
132
+ description: 'Label for provenance tracking (e.g., "claude-chat-2026-02-24-patent-filing")'
133
+ },
134
+ domain_hint: {
135
+ type: 'string',
136
+ description: 'Optional domain hint to guide classification (e.g., "ip-strategy", "architecture-decisions")'
137
+ },
138
+ auto_maturity: {
139
+ type: 'string',
140
+ enum: ['provisional', 'solidified'],
141
+ description: 'Maturity level for extracted invariants. Default: "provisional". Only use "solidified" when multiple independent sessions have validated the same decision.'
142
+ },
143
+ dry_run: {
144
+ type: 'boolean',
145
+ description: 'If true, return extracted candidates without committing to corpus. Default: true'
146
+ },
147
+ model: {
148
+ type: 'string',
149
+ enum: ['haiku', 'sonnet', 'opus'],
150
+ description: 'Claude model for extraction. haiku=fast/cheap, sonnet=balanced (default), opus=deepest extraction'
151
+ }
152
+ },
153
+ required: ['session_text', 'source_label']
154
+ }
155
+ }
156
+ ];
157
+
158
+ // ============================================================
159
+ // Auth: API Token Validation
160
+ // ============================================================
161
+
162
+ /**
163
+ * Validate a Cognito JWT access token.
164
+ * Decodes the JWT, verifies issuer and expiry, looks up user by email.
165
+ * Full signature verification uses Cognito JWKS (cached).
166
+ */
167
+ let _jwksCache = null;
168
+ let _jwksCacheTime = 0;
169
+ const COGNITO_ISSUER = 'https://cognito-idp.us-east-2.amazonaws.com/us-east-2_638OhwuV1';
170
+ const JWKS_URL = `${COGNITO_ISSUER}/.well-known/jwks.json`;
171
+ const JWKS_CACHE_TTL = 3600000; // 1 hour
172
+
173
+ async function fetchJwks() {
174
+ if (_jwksCache && (Date.now() - _jwksCacheTime) < JWKS_CACHE_TTL) {
175
+ return _jwksCache;
176
+ }
177
+ const https = require('https');
178
+ return new Promise((resolve, reject) => {
179
+ https.get(JWKS_URL, (res) => {
180
+ let data = '';
181
+ res.on('data', (chunk) => { data += chunk; });
182
+ res.on('end', () => {
183
+ try {
184
+ _jwksCache = JSON.parse(data);
185
+ _jwksCacheTime = Date.now();
186
+ resolve(_jwksCache);
187
+ } catch (e) { reject(e); }
188
+ });
189
+ }).on('error', reject);
190
+ });
191
+ }
192
+
193
+ function base64UrlDecode(str) {
194
+ str = str.replace(/-/g, '+').replace(/_/g, '/');
195
+ while (str.length % 4) str += '=';
196
+ return Buffer.from(str, 'base64');
197
+ }
198
+
199
+ async function validateCognitoJwt(token) {
200
+ // Decode header and payload without verification first
201
+ const parts = token.split('.');
202
+ if (parts.length !== 3) return null;
203
+
204
+ let header, payload;
205
+ try {
206
+ header = JSON.parse(base64UrlDecode(parts[0]).toString());
207
+ payload = JSON.parse(base64UrlDecode(parts[1]).toString());
208
+ } catch (e) { return null; }
209
+
210
+ // Check issuer and expiry
211
+ if (payload.iss !== COGNITO_ISSUER) return null;
212
+ if (payload.exp && payload.exp < Math.floor(Date.now() / 1000)) return null;
213
+ if (payload.token_use !== 'access') return null;
214
+
215
+ // Verify signature using JWKS
216
+ try {
217
+ const jwks = await fetchJwks();
218
+ const key = jwks.keys?.find(k => k.kid === header.kid);
219
+ if (!key) return null;
220
+
221
+ // Build RSA public key from JWK
222
+ const keyObject = crypto.createPublicKey({ key, format: 'jwk' });
223
+ const verify = crypto.createVerify('RSA-SHA256');
224
+ verify.update(`${parts[0]}.${parts[1]}`);
225
+ if (!verify.verify(keyObject, base64UrlDecode(parts[2]))) return null;
226
+ } catch (e) {
227
+ console.error('[MCP] JWT signature verification failed:', e.message);
228
+ return null;
229
+ }
230
+
231
+ // Extract user info — Cognito access tokens have 'username' and 'sub'
232
+ return {
233
+ sub: payload.sub,
234
+ username: payload.username,
235
+ email: payload.username, // Cognito username is typically email
236
+ scope: payload.scope,
237
+ };
238
+ }
239
+
240
+ async function validateApiToken(headers) {
241
+ // Support multiple auth methods:
242
+ // 1. X-MindMeld-Token: mm_live_xxx (existing clients, stdio bridge)
243
+ // 2. Authorization: Bearer mm_live_xxx (API token)
244
+ // 3. Authorization: Bearer <cognito-jwt> (OAuth via Cognito)
245
+ let token = headers['x-mindmeld-token'] || headers['X-MindMeld-Token'];
246
+
247
+ if (!token) {
248
+ const authHeader = headers['authorization'] || headers['Authorization'];
249
+ if (authHeader && authHeader.startsWith('Bearer ')) {
250
+ token = authHeader.substring(7).trim();
251
+ }
252
+ }
253
+
254
+ if (!token) {
255
+ return { error: 'auth_required', message: 'Authentication required.' };
256
+ }
257
+
258
+ // Check if token looks like a JWT (has 3 dot-separated parts, starts with eyJ)
259
+ if (token.startsWith('eyJ') && token.split('.').length === 3) {
260
+ const jwtUser = await validateCognitoJwt(token);
261
+ if (!jwtUser) {
262
+ return { error: 'auth_invalid', message: 'Invalid or expired OAuth token' };
263
+ }
264
+
265
+ // Look up user by email/username in our database
266
+ const result = await executeQuery(`
267
+ SELECT u.email_address, c.client_id, c.subscription_tier, c.subscription_status,
268
+ ue.company_id
269
+ FROM rapport.users u
270
+ JOIN rapport.user_entitlements ue ON u.email_address = ue.email_address
271
+ JOIN rapport.clients c ON ue.client_id = c.client_id
272
+ WHERE u.email_address = $1 OR u.cognito_sub = $2
273
+ LIMIT 1
274
+ `, [jwtUser.email, jwtUser.sub]);
275
+
276
+ if (result.rows.length === 0) {
277
+ return { error: 'auth_invalid', message: 'User not found. Sign up at mindmeld.dev' };
278
+ }
279
+
280
+ const row = result.rows[0];
281
+ if (!row.subscription_tier || row.subscription_tier === 'free') {
282
+ return { error: 'auth_invalid', message: 'Active MindMeld subscription required. Subscribe at app.mindmeld.dev' };
283
+ }
284
+
285
+ return {
286
+ user: {
287
+ email: row.email_address,
288
+ client_id: row.client_id,
289
+ company_id: row.company_id,
290
+ subscription_tier: row.subscription_tier
291
+ }
292
+ };
293
+ }
294
+
295
+ // API token path (mm_live_xxx)
296
+ const tokenHash = crypto.createHash('sha256').update(token).digest('hex');
297
+
298
+ const result = await executeQuery(`
299
+ SELECT t.token_id, t.email_address, t.client_id, t.company_id,
300
+ c.subscription_tier, c.subscription_status
301
+ FROM rapport.api_tokens t
302
+ JOIN rapport.clients c ON t.client_id = c.client_id
303
+ WHERE t.token_hash = $1
304
+ AND t.status = 'active'
305
+ `, [tokenHash]);
306
+
307
+ if (result.rows.length === 0) {
308
+ return { error: 'auth_invalid', message: 'Invalid or expired API token' };
309
+ }
310
+
311
+ const row = result.rows[0];
312
+
313
+ // Require active subscription (no free tier)
314
+ if (!row.subscription_tier || row.subscription_tier === 'free') {
315
+ return { error: 'auth_invalid', message: 'Active MindMeld subscription required. Subscribe at app.mindmeld.dev' };
316
+ }
317
+
318
+ // Fire-and-forget: update usage stats
319
+ executeQuery(
320
+ 'UPDATE rapport.api_tokens SET last_used_at = NOW(), request_count = request_count + 1 WHERE token_id = $1',
321
+ [row.token_id]
322
+ ).catch(() => {});
323
+
324
+ return {
325
+ user: {
326
+ email: row.email_address,
327
+ client_id: row.client_id,
328
+ company_id: row.company_id,
329
+ subscription_tier: row.subscription_tier
330
+ }
331
+ };
332
+ }
333
+
334
+ // ============================================================
335
+ // Relevance Scoring (same algorithm as standardsRelevantPost.js)
336
+ // ============================================================
337
+
338
+ function rankStandards(standards, recentCategories) {
339
+ return standards.map(standard => {
340
+ let score = 0;
341
+ score += (standard.correlation || 1.0) * 40;
342
+
343
+ const maturityScores = { enforced: 30, reinforced: 25, validated: 20, solidified: 15, recommended: 10, provisional: 5 };
344
+ score += maturityScores[standard.maturity] || 0;
345
+
346
+ const categoryWeight = CATEGORY_WEIGHTS[standard.category] || 0.5;
347
+ score += categoryWeight * 20;
348
+
349
+ if (standard.applicable_files && standard.applicable_files.length > 0) score += 5;
350
+ if (standard.cost_impact && standard.cost_impact.severity === 'critical') score += 10;
351
+
352
+ if (standard.anti_patterns) {
353
+ const apCount = Array.isArray(standard.anti_patterns)
354
+ ? standard.anti_patterns.length
355
+ : Object.keys(standard.anti_patterns).length;
356
+ if (apCount > 0) score += 5;
357
+ }
358
+
359
+ const isWorkflow = (standard.rule && standard.rule.startsWith('WORKFLOW:'))
360
+ || (Array.isArray(standard.keywords) && standard.keywords.includes('workflow'));
361
+ if (isWorkflow) score += 10;
362
+
363
+ if (standard.rationale) score += 5;
364
+
365
+ if (recentCategories && recentCategories[standard.category]) {
366
+ const usageCount = recentCategories[standard.category];
367
+ let rawBonus;
368
+ if (usageCount >= 8) rawBonus = 25;
369
+ else if (usageCount >= 4) rawBonus = 18;
370
+ else rawBonus = 10;
371
+ score += rawBonus * categoryWeight;
372
+ }
373
+
374
+ return { ...standard, relevance_score: Math.round(score * 10) / 10 };
375
+ }).sort((a, b) => b.relevance_score - a.relevance_score);
376
+ }
377
+
378
+ // ============================================================
379
+ // Formatted Injection (same format as hooks/session-start.js)
380
+ // ============================================================
381
+
382
+ function formatInjection(sessionId, standards) {
383
+ const sections = [];
384
+
385
+ sections.push('# MindMeld Standards Injection');
386
+ sections.push(`<!-- session:${sessionId} -->`);
387
+ sections.push('');
388
+ sections.push('\u00A9 2025 Equilateral AI (Pareidolia LLC). All rights reserved.');
389
+ sections.push('Licensed for use within MindMeld platform only. Redistribution prohibited.');
390
+ sections.push('');
391
+
392
+ const codeStandards = standards.filter(s => s.content_type !== 'business_invariant');
393
+ const businessInvariants = standards.filter(s => s.content_type === 'business_invariant');
394
+
395
+ if (codeStandards.length > 0) {
396
+ sections.push('## Relevant Standards');
397
+ sections.push('');
398
+
399
+ for (const standard of codeStandards) {
400
+ sections.push(`### ${standard.element}`);
401
+ sections.push(`**Category**: ${standard.category}`);
402
+ sections.push(`**Rule**: ${standard.rule}`);
403
+
404
+ if (standard.examples && standard.examples.length > 0) {
405
+ const example = standard.examples[0];
406
+ const exampleCode = typeof example === 'string' ? example : (example?.code || example?.description || '');
407
+ if (exampleCode) {
408
+ sections.push('');
409
+ sections.push('**Example**:');
410
+ sections.push('```javascript');
411
+ sections.push(exampleCode);
412
+ sections.push('```');
413
+ }
414
+ }
415
+
416
+ if (standard.anti_patterns && standard.anti_patterns.length > 0) {
417
+ sections.push('');
418
+ sections.push('**Anti-patterns**:');
419
+ for (const ap of standard.anti_patterns) {
420
+ const desc = typeof ap === 'string' ? ap : (ap?.description || '');
421
+ if (desc) sections.push(`- \u274C ${desc}`);
422
+ }
423
+ }
424
+
425
+ sections.push('');
426
+ }
427
+ }
428
+
429
+ if (businessInvariants.length > 0) {
430
+ sections.push('## Business Invariants');
431
+ sections.push('');
432
+
433
+ for (const invariant of businessInvariants) {
434
+ sections.push(`### ${invariant.element}`);
435
+ sections.push(`**Domain**: ${invariant.category}`);
436
+ sections.push(`**Invariant**: ${invariant.rule}`);
437
+ if (invariant.rationale) {
438
+ sections.push(`**Rationale**: ${invariant.rationale}`);
439
+ }
440
+ if (invariant.consequences) {
441
+ sections.push(`**If violated**: ${invariant.consequences}`);
442
+ }
443
+ if (invariant.exceptions && Array.isArray(invariant.exceptions) && invariant.exceptions.length > 0) {
444
+ sections.push('**Exceptions**:');
445
+ for (const ex of invariant.exceptions) {
446
+ sections.push(`- ${ex}`);
447
+ }
448
+ }
449
+ sections.push('');
450
+ }
451
+ }
452
+
453
+ sections.push('---');
454
+ sections.push('*Context provided by MindMeld - mindmeld.dev*');
455
+
456
+ return sections.join('\n');
457
+ }
458
+
459
+ // ============================================================
460
+ // Tool Implementations
461
+ // ============================================================
462
+
463
+ async function callTool(name, args, user) {
464
+ switch (name) {
465
+ case 'mindmeld_init_session':
466
+ return await toolInitSession(args, user);
467
+ case 'mindmeld_record_correction':
468
+ return await toolRecordCorrection(args, user);
469
+ case 'mindmeld_get_standards':
470
+ return await toolGetStandards(args, user);
471
+ case 'mindmeld_ingest_raw_session':
472
+ return await toolIngestRawSession(args, user);
473
+ default:
474
+ throw new Error(`Unknown tool: ${name}`);
475
+ }
476
+ }
477
+
478
+ async function toolInitSession(args, user) {
479
+ const { project_path, task_description } = args;
480
+ const sessionId = crypto.randomUUID();
481
+
482
+ // Try to match project by name for the user's company, auto-create if not found
483
+ let projectId = null;
484
+ const projectName = project_path ? project_path.split('/').filter(Boolean).pop() : 'default';
485
+ try {
486
+ const projectResult = await executeQuery(`
487
+ SELECT project_id FROM rapport.projects
488
+ WHERE company_id = $1 AND LOWER(project_name) = LOWER($2)
489
+ LIMIT 1
490
+ `, [user.company_id, projectName]);
491
+ if (projectResult.rows.length > 0) {
492
+ projectId = projectResult.rows[0].project_id;
493
+ } else {
494
+ // Auto-create project so session INSERT never fails on NOT NULL
495
+ const newId = crypto.randomUUID();
496
+ await executeQuery(`
497
+ INSERT INTO rapport.projects (project_id, company_id, project_name, description, created_at)
498
+ VALUES ($1, $2, $3, $4, NOW())
499
+ ON CONFLICT (project_id) DO NOTHING
500
+ `, [newId, user.company_id, projectName, `Auto-created from MCP session (${project_path || 'no path'})`]);
501
+ projectId = newId;
502
+ }
503
+ } catch (err) {
504
+ console.error('[MCP] Project lookup/create failed:', err.message);
505
+ }
506
+
507
+ // Get recency data for scoring boost
508
+ const recentCategories = {};
509
+ try {
510
+ const recencyResult = await executeQuery(`
511
+ SELECT sp.category, COUNT(*) as usage_count
512
+ FROM rapport.session_standards ss
513
+ JOIN rapport.sessions s ON s.session_id = ss.session_id
514
+ JOIN rapport.standards_patterns sp ON sp.pattern_id = ss.standard_id
515
+ WHERE s.email_address = $1
516
+ AND s.started_at >= NOW() - INTERVAL '7 days'
517
+ GROUP BY sp.category
518
+ ORDER BY usage_count DESC LIMIT 5
519
+ `, [user.email]);
520
+ for (const row of recencyResult.rows) {
521
+ recentCategories[row.category] = parseInt(row.usage_count, 10);
522
+ }
523
+ } catch (err) {
524
+ console.error('[MCP] Recency query failed:', err.message);
525
+ }
526
+
527
+ // Default to broad categories (no filesystem scanning in Lambda)
528
+ // Includes business domains so invariants can surface via recency boost
529
+ const categories = [
530
+ 'serverless-saas-aws', 'frontend-development', 'database', 'backend',
531
+ 'compliance-security', 'well-architected', 'cost-optimization', 'deployment', 'testing',
532
+ 'ip-strategy', 'architecture-decisions', 'go-to-market', 'operations',
533
+ 'legal-process', 'finance', 'communication', 'product-strategy', 'investor-relations'
534
+ ];
535
+
536
+ // Merge recency categories
537
+ for (const cat of Object.keys(recentCategories)) {
538
+ if (!categories.includes(cat)) categories.push(cat);
539
+ }
540
+
541
+ // Query standards — tenant-isolated via get_effective_standards()
542
+ const maturityList = ['enforced', 'validated', 'recommended', 'provisional', 'solidified', 'reinforced'];
543
+ const result = await executeQuery(`
544
+ SELECT * FROM rapport.get_effective_standards($1, $2::varchar[], $3::varchar[])
545
+ ORDER BY CASE WHEN maturity = 'enforced' THEN 1 WHEN maturity = 'reinforced' THEN 2
546
+ WHEN maturity = 'validated' THEN 3 WHEN maturity = 'solidified' THEN 4 ELSE 5 END,
547
+ correlation DESC
548
+ `, [user.company_id, categories, maturityList]);
549
+
550
+ if (result.rows.length === 0) {
551
+ return {
552
+ content: [{ type: 'text', text: JSON.stringify({ error: 'corpus_empty', message: 'No standards found in corpus' }) }],
553
+ isError: true
554
+ };
555
+ }
556
+
557
+ // Rank, deduplicate, apply diversity caps
558
+ let ranked = rankStandards(result.rows, recentCategories);
559
+
560
+ const seenElements = new Set();
561
+ ranked = ranked.filter(s => {
562
+ if (seenElements.has(s.element)) return false;
563
+ seenElements.add(s.element);
564
+ return true;
565
+ });
566
+
567
+ const MAX_PER_CATEGORY = 2;
568
+ const MAX_PER_TITLE = 1;
569
+ const top = [];
570
+ const categoryCounts = {};
571
+ const titleCounts = {};
572
+ for (const standard of ranked) {
573
+ const cat = standard.category;
574
+ const title = standard.title || standard.element;
575
+ categoryCounts[cat] = (categoryCounts[cat] || 0) + 1;
576
+ titleCounts[title] = (titleCounts[title] || 0) + 1;
577
+ if (categoryCounts[cat] <= MAX_PER_CATEGORY && titleCounts[title] <= MAX_PER_TITLE) {
578
+ top.push(standard);
579
+ if (top.length >= 10) break;
580
+ }
581
+ }
582
+
583
+ // Format injection markdown
584
+ const formattedInjection = formatInjection(sessionId, top);
585
+
586
+ // Record session first (must complete before session_standards FK references it)
587
+ try {
588
+ await executeQuery(`
589
+ INSERT INTO rapport.sessions (session_id, project_id, email_address, started_at, session_data)
590
+ VALUES ($1, $2, $3, NOW(), $4)
591
+ ON CONFLICT (session_id) DO NOTHING
592
+ `, [sessionId, projectId, user.email, JSON.stringify({ source: 'mcp', task_description: task_description || null })]);
593
+
594
+ // Now safe to insert session_standards — session row exists
595
+ for (const standard of top) {
596
+ await executeQuery(`
597
+ INSERT INTO rapport.session_standards (session_id, standard_id, standard_name, relevance_score, created_at)
598
+ VALUES ($1, $2, $3, $4, NOW())
599
+ ON CONFLICT (session_id, standard_id) DO UPDATE SET relevance_score = EXCLUDED.relevance_score
600
+ `, [sessionId, standard.pattern_id, standard.element, standard.relevance_score]);
601
+ }
602
+ } catch (err) {
603
+ console.error('[MCP] Session/standards record failed:', err.message);
604
+ }
605
+
606
+ // Get corpus size for summary
607
+ let corpusSize = result.rows.length;
608
+ try {
609
+ const countResult = await executeQuery('SELECT COUNT(*) as cnt FROM rapport.standards_patterns');
610
+ corpusSize = parseInt(countResult.rows[0].cnt, 10);
611
+ } catch (err) {
612
+ // Use result count as fallback
613
+ }
614
+
615
+ const response = {
616
+ session_id: sessionId,
617
+ injected_rules: top.map(s => ({
618
+ rule_id: s.pattern_id,
619
+ standard: s.element,
620
+ maturity: s.maturity,
621
+ text: s.rule,
622
+ relevance_score: s.relevance_score
623
+ })),
624
+ injection_summary: {
625
+ rule_count: top.length,
626
+ token_estimate: Math.ceil(formattedInjection.length / 4),
627
+ standards_matched: ranked.length,
628
+ corpus_size: corpusSize
629
+ },
630
+ formatted_injection: formattedInjection
631
+ };
632
+
633
+ return { content: [{ type: 'text', text: JSON.stringify(response, null, 2) }] };
634
+ }
635
+
636
+ async function toolRecordCorrection(args, user) {
637
+ const { session_id, original_output, corrected_output, correction_note, file_context } = args;
638
+ const correctionId = crypto.randomUUID();
639
+
640
+ // Simple pattern match: check if correction keywords match any existing standard rules
641
+ let matchedStandardId = null;
642
+ let patternDetected = false;
643
+ try {
644
+ // Extract significant words from the correction
645
+ const correctionWords = corrected_output.toLowerCase().split(/\s+/).filter(w => w.length > 4);
646
+ if (correctionWords.length > 0) {
647
+ const searchTerms = correctionWords.slice(0, 5).join(' | ');
648
+ const matchResult = await executeQuery(`
649
+ SELECT pattern_id, element FROM rapport.standards_patterns
650
+ WHERE to_tsvector('english', rule) @@ to_tsquery('english', $1)
651
+ LIMIT 1
652
+ `, [searchTerms]);
653
+ if (matchResult.rows.length > 0) {
654
+ matchedStandardId = matchResult.rows[0].pattern_id;
655
+ patternDetected = true;
656
+ }
657
+ }
658
+ } catch (err) {
659
+ console.error('[MCP] Pattern match failed:', err.message);
660
+ }
661
+
662
+ // Store correction
663
+ await executeQuery(`
664
+ INSERT INTO rapport.mcp_corrections
665
+ (correction_id, session_id, email_address, company_id, original_output,
666
+ corrected_output, correction_note, file_context, matched_standard_id, status)
667
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, 'recorded')
668
+ `, [correctionId, session_id, user.email, user.company_id,
669
+ original_output, corrected_output, correction_note || null,
670
+ file_context || null, matchedStandardId]);
671
+
672
+ const response = {
673
+ correction_id: correctionId,
674
+ pattern_detected: patternDetected,
675
+ matched_standard: matchedStandardId,
676
+ status: 'recorded'
677
+ };
678
+
679
+ return { content: [{ type: 'text', text: JSON.stringify(response, null, 2) }] };
680
+ }
681
+
682
+ async function toolGetStandards(args, user) {
683
+ const filter = args.filter || {};
684
+ const limit = Math.min(parseInt(filter.limit) || 20, 100);
685
+ const maturityFilter = filter.maturity;
686
+ const nameFilter = filter.standard_name;
687
+ const contentTypeFilter = filter.content_type;
688
+ const domainFilter = filter.domain;
689
+ const sourceFilter = filter.source;
690
+
691
+ let query = `
692
+ SELECT pattern_id as standard_id, element as name, maturity, source,
693
+ content_type, domain, rule, rationale, consequences, exceptions, source_context,
694
+ COUNT(*) OVER() as total_count,
695
+ (SELECT COUNT(*) FROM rapport.session_standards WHERE standard_id = sp.pattern_id) as session_count
696
+ FROM rapport.standards_patterns sp
697
+ WHERE (company_id IS NULL OR company_id = $1)
698
+ `;
699
+ const params = [user.company_id];
700
+
701
+ if (maturityFilter && Array.isArray(maturityFilter) && maturityFilter.length > 0) {
702
+ params.push(maturityFilter);
703
+ query += ` AND maturity = ANY($${params.length}::varchar[])`;
704
+ }
705
+
706
+ if (contentTypeFilter) {
707
+ params.push(contentTypeFilter);
708
+ query += ` AND content_type = $${params.length}`;
709
+ }
710
+
711
+ if (domainFilter) {
712
+ params.push(domainFilter);
713
+ query += ` AND domain = $${params.length}`;
714
+ }
715
+
716
+ if (sourceFilter) {
717
+ params.push(sourceFilter);
718
+ query += ` AND source = $${params.length}`;
719
+ }
720
+
721
+ if (nameFilter) {
722
+ params.push(`%${nameFilter}%`);
723
+ query += ` AND (element ILIKE $${params.length} OR title ILIKE $${params.length})`;
724
+ }
725
+
726
+ query += ` ORDER BY maturity DESC, element ASC`;
727
+ params.push(limit);
728
+ query += ` LIMIT $${params.length}`;
729
+
730
+ const result = await executeQuery(query, params);
731
+
732
+ // Corpus summary
733
+ const summaryResult = await executeQuery(`
734
+ SELECT
735
+ COUNT(*) as total_standards,
736
+ COUNT(*) FILTER (WHERE content_type = 'code_standard' OR content_type IS NULL) as code_standards_count,
737
+ COUNT(*) FILTER (WHERE content_type = 'business_invariant') as business_invariants_count,
738
+ COUNT(*) FILTER (WHERE maturity IN ('enforced', 'reinforced')) as reinforced_count,
739
+ COUNT(*) FILTER (WHERE maturity IN ('validated', 'solidified')) as solidified_count,
740
+ COUNT(*) FILTER (WHERE maturity IN ('recommended', 'provisional')) as provisional_count
741
+ FROM rapport.standards_patterns
742
+ `);
743
+ const sourcesResult = await executeQuery(`
744
+ SELECT source, COUNT(*) as count
745
+ FROM rapport.standards_patterns
746
+ GROUP BY source
747
+ ORDER BY count DESC
748
+ `);
749
+ const summary = summaryResult.rows[0] || {};
750
+ const sourceBreakdown = {};
751
+ for (const row of sourcesResult.rows) {
752
+ sourceBreakdown[row.source || 'unknown'] = parseInt(row.count, 10);
753
+ }
754
+
755
+ const response = {
756
+ standards: result.rows.map(r => {
757
+ const entry = {
758
+ standard_id: r.standard_id,
759
+ name: r.name,
760
+ content_type: r.content_type || 'code_standard',
761
+ source: r.source,
762
+ maturity: r.maturity,
763
+ session_count: parseInt(r.session_count, 10) || 0,
764
+ };
765
+ if (r.content_type === 'business_invariant') {
766
+ entry.domain = r.domain;
767
+ entry.invariant = r.rule;
768
+ entry.rationale = r.rationale;
769
+ entry.consequences = r.consequences;
770
+ if (r.exceptions && Array.isArray(r.exceptions) && r.exceptions.length > 0) {
771
+ entry.exceptions = r.exceptions;
772
+ }
773
+ if (r.source_context) entry.source_label = r.source_context;
774
+ }
775
+ return entry;
776
+ }),
777
+ corpus_summary: {
778
+ total_standards: parseInt(summary.total_standards, 10) || 0,
779
+ code_standards: parseInt(summary.code_standards_count, 10) || 0,
780
+ business_invariants: parseInt(summary.business_invariants_count, 10) || 0,
781
+ reinforced_count: parseInt(summary.reinforced_count, 10) || 0,
782
+ solidified_count: parseInt(summary.solidified_count, 10) || 0,
783
+ provisional_count: parseInt(summary.provisional_count, 10) || 0,
784
+ by_source: sourceBreakdown,
785
+ }
786
+ };
787
+
788
+ return { content: [{ type: 'text', text: JSON.stringify(response, null, 2) }] };
789
+ }
790
+
791
+ // ============================================================
792
+ // Tool: Ingest Raw Session (LLM Extraction)
793
+ // ============================================================
794
+
795
+ async function toolIngestRawSession(args, user) {
796
+ const {
797
+ session_text,
798
+ source_label,
799
+ domain_hint,
800
+ auto_maturity = 'provisional',
801
+ dry_run = true,
802
+ model = 'sonnet'
803
+ } = args;
804
+
805
+ const MODEL_IDS = {
806
+ haiku: 'us.anthropic.claude-haiku-4-5-20251001-v1:0',
807
+ sonnet: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
808
+ opus: 'us.anthropic.claude-opus-4-5-20251101-v1:0'
809
+ };
810
+ const modelId = MODEL_IDS[model] || MODEL_IDS.sonnet;
811
+
812
+ // Validate input size
813
+ if (!session_text || session_text.length < 100) {
814
+ return { content: [{ type: 'text', text: JSON.stringify({
815
+ error: 'session_text_too_short',
816
+ message: 'Conversation text must be at least 100 characters'
817
+ }) }], isError: true };
818
+ }
819
+ if (session_text.length > 200000) {
820
+ return { content: [{ type: 'text', text: JSON.stringify({
821
+ error: 'session_text_too_long',
822
+ message: 'Conversation text must be under 200,000 characters. Split into smaller segments.'
823
+ }) }], isError: true };
824
+ }
825
+
826
+ // --- LLM Extraction ---
827
+ const systemPrompt = `You are an expert at identifying organizational knowledge, business rules, and architectural decisions from conversation transcripts.
828
+
829
+ Your task: Extract business invariants — rules, constraints, decisions, and standards that an organization should follow consistently.
830
+
831
+ A business invariant is NOT:
832
+ - A one-time task or action item
833
+ - A personal preference without organizational impact
834
+ - A fact or observation without a prescriptive element
835
+ - Code-level implementation details (those are code standards, not business invariants)
836
+
837
+ A business invariant IS:
838
+ - A decision that constrains future behavior ("always do X", "never do Y")
839
+ - A rule with organizational reasoning behind it
840
+ - A constraint that has consequences if violated
841
+ - A standard that should be consistently applied across similar situations
842
+
843
+ For each invariant found, output:
844
+ - id: kebab-case unique identifier
845
+ - domain: one of [ip-strategy, architecture-decisions, go-to-market, operations, legal-process, finance, communication, product-strategy, investor-relations]
846
+ - priority: 10 (critical), 20 (important), or 30 (advisory)
847
+ - invariant: the rule as a single, complete statement
848
+ - rationale: WHY this is the standard (the reasoning, not just the rule)
849
+ - consequences: what goes wrong if this is violated
850
+ - applies_to: array of activities this constrains
851
+ - exceptions: array of when this doesn't apply (empty array if universal)
852
+ - confidence: 0.0-1.0 how confident you are this is a real organizational invariant vs a one-time comment
853
+
854
+ Output valid JSON only. No markdown, no explanation. Format:
855
+ { "invariants": [...] }
856
+
857
+ If no business invariants are found, return { "invariants": [] }.`;
858
+
859
+ const userPrompt = domain_hint
860
+ ? `Extract business invariants from this conversation. Domain hint: ${domain_hint}\n\n---\n\n${session_text}`
861
+ : `Extract business invariants from this conversation.\n\n---\n\n${session_text}`;
862
+
863
+ let candidates;
864
+ try {
865
+ const client = getBedrockClient();
866
+ const command = new InvokeModelCommand({
867
+ modelId: modelId,
868
+ contentType: 'application/json',
869
+ accept: 'application/json',
870
+ body: JSON.stringify({
871
+ anthropic_version: 'bedrock-2023-05-31',
872
+ max_tokens: 16384,
873
+ system: systemPrompt,
874
+ messages: [{ role: 'user', content: userPrompt }]
875
+ })
876
+ });
877
+ const response = await client.send(command);
878
+ const responseBody = JSON.parse(new TextDecoder().decode(response.body));
879
+ const text = responseBody.content?.[0]?.text || '{"invariants":[]}';
880
+
881
+ // Parse JSON (handle markdown code blocks if present)
882
+ const cleaned = text.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
883
+ candidates = JSON.parse(cleaned).invariants || [];
884
+ } catch (err) {
885
+ console.error('[MCP] Bedrock extraction failed:', err.message);
886
+ return { content: [{ type: 'text', text: JSON.stringify({
887
+ error: 'extraction_failed',
888
+ message: `LLM extraction failed: ${err.message}`
889
+ }) }], isError: true };
890
+ }
891
+
892
+ if (candidates.length === 0) {
893
+ return { content: [{ type: 'text', text: JSON.stringify({
894
+ candidates: [],
895
+ extraction_summary: { total_extracted: 0, message: 'No business invariants found in this conversation' }
896
+ }) }] };
897
+ }
898
+
899
+ // --- Deduplication against existing corpus ---
900
+ let existingInvariants = [];
901
+ try {
902
+ const existing = await executeQuery(`
903
+ SELECT pattern_id, element, rule, domain
904
+ FROM rapport.standards_patterns
905
+ WHERE content_type = 'business_invariant'
906
+ `);
907
+ existingInvariants = existing.rows;
908
+ } catch (err) {
909
+ console.error('[MCP] Dedup query failed:', err.message);
910
+ }
911
+
912
+ // Simple dedup: check if candidate ID or invariant text closely matches existing
913
+ for (const candidate of candidates) {
914
+ candidate.dedup_status = 'new';
915
+ for (const existing of existingInvariants) {
916
+ if (candidate.id === existing.pattern_id) {
917
+ candidate.dedup_status = 'duplicate_id';
918
+ candidate.existing_id = existing.pattern_id;
919
+ break;
920
+ }
921
+ // Fuzzy text match: if >60% of words overlap, flag as potential duplicate
922
+ const candidateWords = new Set(candidate.invariant.toLowerCase().split(/\s+/).filter(w => w.length > 3));
923
+ const existingWords = new Set(existing.rule.toLowerCase().split(/\s+/).filter(w => w.length > 3));
924
+ const overlap = [...candidateWords].filter(w => existingWords.has(w)).length;
925
+ const similarity = overlap / Math.max(candidateWords.size, existingWords.size);
926
+ if (similarity > 0.6) {
927
+ candidate.dedup_status = 'potential_duplicate';
928
+ candidate.existing_id = existing.pattern_id;
929
+ candidate.similarity = Math.round(similarity * 100);
930
+ break;
931
+ }
932
+ }
933
+ }
934
+
935
+ // --- Commit (if not dry_run) ---
936
+ let committed = 0;
937
+ if (!dry_run) {
938
+ for (const candidate of candidates) {
939
+ if (candidate.dedup_status === 'duplicate_id') continue;
940
+ if (candidate.dedup_status === 'potential_duplicate') continue;
941
+ if (candidate.confidence < 0.5) continue;
942
+
943
+ const patternId = candidate.id;
944
+ const title = patternId.replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
945
+ const keywords = (candidate.applies_to || [])
946
+ .flatMap(a => a.split(/\s+/))
947
+ .filter(w => w.length > 2)
948
+ .slice(0, 10);
949
+
950
+ try {
951
+ const fileName = `mcp-extraction/${source_label}/${patternId}.yaml`;
952
+ await executeQuery(`
953
+ INSERT INTO rapport.standards_patterns (
954
+ pattern_id, file_name, element, title, rule, category, domain,
955
+ content_type, maturity, correlation, source, scope,
956
+ rationale, consequences, exceptions, source_context,
957
+ applicable_files, keywords, priority, active,
958
+ company_id,
959
+ created_at, last_updated
960
+ ) VALUES (
961
+ $1, $2, $3, $3, $4, $5, $5,
962
+ 'business_invariant', $6, 1.00, 'mcp-extraction', 'organization',
963
+ $7, $8, $9, $10,
964
+ $11, $12, $13, TRUE,
965
+ $14,
966
+ NOW(), NOW()
967
+ )
968
+ ON CONFLICT (pattern_id) DO UPDATE SET
969
+ rule = EXCLUDED.rule,
970
+ rationale = EXCLUDED.rationale,
971
+ consequences = EXCLUDED.consequences,
972
+ exceptions = EXCLUDED.exceptions,
973
+ source_context = EXCLUDED.source_context,
974
+ last_updated = NOW(),
975
+ last_seen_at = NOW(),
976
+ occurrence_count = COALESCE(rapport.standards_patterns.occurrence_count, 0) + 1
977
+ `, [
978
+ patternId, fileName, title, candidate.invariant,
979
+ candidate.domain, auto_maturity,
980
+ candidate.rationale, candidate.consequences,
981
+ JSON.stringify(candidate.exceptions || []),
982
+ source_label,
983
+ candidate.applies_to || [],
984
+ keywords,
985
+ candidate.priority || 20,
986
+ user.company_id
987
+ ]);
988
+ candidate.committed = true;
989
+ committed++;
990
+ } catch (err) {
991
+ console.error(`[MCP] Failed to commit ${patternId}:`, err.message);
992
+ candidate.committed = false;
993
+ candidate.commit_error = err.message;
994
+ }
995
+ }
996
+ }
997
+
998
+ // --- Response ---
999
+ const result = {
1000
+ candidates: candidates.map(c => ({
1001
+ id: c.id,
1002
+ domain: c.domain,
1003
+ priority: c.priority,
1004
+ invariant: c.invariant,
1005
+ rationale: c.rationale,
1006
+ consequences: c.consequences,
1007
+ applies_to: c.applies_to,
1008
+ exceptions: c.exceptions,
1009
+ confidence: c.confidence,
1010
+ dedup_status: c.dedup_status,
1011
+ existing_id: c.existing_id,
1012
+ similarity: c.similarity,
1013
+ committed: c.committed
1014
+ })),
1015
+ extraction_summary: {
1016
+ total_extracted: candidates.length,
1017
+ new_invariants: candidates.filter(c => c.dedup_status === 'new').length,
1018
+ duplicates: candidates.filter(c => c.dedup_status !== 'new').length,
1019
+ committed: committed,
1020
+ dry_run: dry_run,
1021
+ model: model,
1022
+ source_label: source_label,
1023
+ domains_covered: [...new Set(candidates.map(c => c.domain))],
1024
+ average_confidence: Math.round(
1025
+ candidates.reduce((sum, c) => sum + (c.confidence || 0), 0) / candidates.length * 100
1026
+ ) / 100
1027
+ }
1028
+ };
1029
+
1030
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
1031
+ }
1032
+
1033
+ // ============================================================
1034
+ // JSON-RPC Message Router
1035
+ // ============================================================
1036
+
1037
+ async function handleJsonRpc(message, user) {
1038
+ const { method, params, id } = message;
1039
+ const toolName = method === 'tools/call' ? params?.name : null;
1040
+ console.log(`[MCP] JSON-RPC method=${method}${toolName ? ` tool=${toolName}` : ''} id=${id} user=${user?.email}`);
1041
+
1042
+ switch (method) {
1043
+ case 'initialize':
1044
+ return {
1045
+ jsonrpc: '2.0',
1046
+ id,
1047
+ result: {
1048
+ protocolVersion: PROTOCOL_VERSION,
1049
+ capabilities: {
1050
+ tools: { listChanged: false }
1051
+ },
1052
+ serverInfo: SERVER_INFO
1053
+ }
1054
+ };
1055
+
1056
+ case 'notifications/initialized':
1057
+ return null;
1058
+
1059
+ case 'ping':
1060
+ return { jsonrpc: '2.0', id, result: {} };
1061
+
1062
+ case 'tools/list':
1063
+ return { jsonrpc: '2.0', id, result: { tools: TOOLS } };
1064
+
1065
+ case 'tools/call': {
1066
+ const { name, arguments: args } = params;
1067
+ try {
1068
+ const result = await callTool(name, args || {}, user);
1069
+ return { jsonrpc: '2.0', id, result };
1070
+ } catch (error) {
1071
+ console.error(`[MCP] Tool ${name} error:`, error.message);
1072
+ return {
1073
+ jsonrpc: '2.0',
1074
+ id,
1075
+ result: {
1076
+ content: [{ type: 'text', text: JSON.stringify({ error: 'tool_error', message: error.message }) }],
1077
+ isError: true
1078
+ }
1079
+ };
1080
+ }
1081
+ }
1082
+
1083
+ default:
1084
+ return {
1085
+ jsonrpc: '2.0',
1086
+ id,
1087
+ error: { code: -32601, message: `Method not found: ${method}` }
1088
+ };
1089
+ }
1090
+ }
1091
+
1092
+ module.exports = {
1093
+ SERVER_INFO,
1094
+ PROTOCOL_VERSION,
1095
+ CORS_HEADERS,
1096
+ TOOLS,
1097
+ CATEGORY_WEIGHTS,
1098
+ validateApiToken,
1099
+ handleJsonRpc,
1100
+ callTool,
1101
+ rankStandards,
1102
+ formatInjection,
1103
+ };