@equilateral_ai/mindmeld 3.4.0 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,56 @@ const path = require('path');
18
18
  const fs = require('fs').promises;
19
19
  const crypto = require('crypto');
20
20
 
21
+ /**
22
+ * Scrub sensitive data from text before sending to MindMeld API.
23
+ * Replaces AWS keys, API tokens, passwords, connection strings,
24
+ * private keys, and generic secrets with [REDACTED].
25
+ * @param {string} text - Text that may contain secrets
26
+ * @returns {string} Text with secrets replaced by [REDACTED]
27
+ */
28
+ function scrubSecrets(text) {
29
+ if (typeof text !== 'string') return text;
30
+
31
+ const patterns = [
32
+ // AWS Access Keys
33
+ /AKIA[0-9A-Z]{16}/g,
34
+ // AWS Secret Keys
35
+ /(?:aws_secret_access_key|secret_key|secretAccessKey)\s*[=:]\s*['"]?[A-Za-z0-9/+=]{40}['"]?/gi,
36
+ // Generic API tokens
37
+ /(?:api[_-]?key|api[_-]?token|auth[_-]?token|bearer)\s*[=:]\s*['"]?[A-Za-z0-9_\-\.]{20,}['"]?/gi,
38
+ // Passwords
39
+ /(?:password|passwd|pwd)\s*[=:]\s*['"]?[^\s'"]{4,}['"]?/gi,
40
+ // Connection strings
41
+ /(?:postgres|mysql|mongodb|redis):\/\/[^\s'"]+/gi,
42
+ // Private keys
43
+ /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----[\s\S]*?-----END\s+(?:RSA\s+)?PRIVATE\s+KEY-----/g,
44
+ // Generic secrets
45
+ /(?:secret|token|credential)\s*[=:]\s*['"]?[A-Za-z0-9_\-\.\/+=]{16,}['"]?/gi,
46
+ ];
47
+
48
+ let scrubbed = text;
49
+ for (const pattern of patterns) {
50
+ scrubbed = scrubbed.replace(pattern, '[REDACTED]');
51
+ }
52
+ return scrubbed;
53
+ }
54
+
55
+ /**
56
+ * Deep-scrub secrets from an object by traversing all string values.
57
+ * @param {*} obj - Object, array, or primitive to scrub
58
+ * @returns {*} Scrubbed copy (original is not mutated)
59
+ */
60
+ function scrubSecretsDeep(obj) {
61
+ if (typeof obj === 'string') return scrubSecrets(obj);
62
+ if (obj === null || obj === undefined || typeof obj !== 'object') return obj;
63
+ if (Array.isArray(obj)) return obj.map(scrubSecretsDeep);
64
+ const result = {};
65
+ for (const [key, value] of Object.entries(obj)) {
66
+ result[key] = scrubSecretsDeep(value);
67
+ }
68
+ return result;
69
+ }
70
+
21
71
  // LLM Pattern Detection (optional - requires ANTHROPIC_API_KEY)
22
72
  let LLMPatternDetector = null;
23
73
  try {
@@ -200,7 +250,7 @@ async function harvestPatterns(sessionTranscript) {
200
250
  ? sessionTranscript
201
251
  : sessionTranscript.transcript || JSON.stringify(sessionTranscript);
202
252
 
203
- llmAnalysis = await detector.analyzeSessionTranscript(transcriptText, {
253
+ llmAnalysis = await detector.analyzeSessionTranscript(scrubSecrets(transcriptText), {
204
254
  projectName: path.basename(process.cwd()),
205
255
  filesChanged: sessionTranscript.filesChanged || []
206
256
  });
@@ -238,23 +288,23 @@ async function harvestPatterns(sessionTranscript) {
238
288
  // 2. Validate against standards
239
289
  const validationResults = await validatePatterns(mindmeld, patterns);
240
290
 
241
- // 3. Record violations
291
+ // 3. Record violations (scrub secrets before sending to API)
242
292
  for (const result of validationResults.violations) {
243
- await mindmeld.recordViolation({
293
+ await mindmeld.recordViolation(scrubSecretsDeep({
244
294
  pattern: result.pattern,
245
295
  violations: result.violations,
246
296
  sessionId: sessionId,
247
297
  userId: userId
248
- });
298
+ }));
249
299
  }
250
300
 
251
- // 4. Reinforce valid patterns
301
+ // 4. Reinforce valid patterns (scrub secrets before sending to API)
252
302
  for (const result of validationResults.valid) {
253
- await mindmeld.reinforcePattern({
303
+ await mindmeld.reinforcePattern(scrubSecretsDeep({
254
304
  pattern: result.pattern,
255
305
  sessionId: sessionId,
256
306
  userId: userId
257
- });
307
+ }));
258
308
  }
259
309
 
260
310
  // 5. Check for promotion candidates
@@ -274,7 +324,26 @@ async function harvestPatterns(sessionTranscript) {
274
324
  console.error('[MindMeld] Plan harvesting failed (non-fatal):', error.message);
275
325
  }
276
326
 
277
- // 7. Log results
327
+ // 7. Detect and send corrections from conversation text
328
+ let correctionsDetected = 0;
329
+ try {
330
+ const transcriptText = typeof sessionTranscript === 'string'
331
+ ? sessionTranscript
332
+ : sessionTranscript.transcript || JSON.stringify(sessionTranscript);
333
+
334
+ const corrections = detectCorrections(transcriptText);
335
+ correctionsDetected = corrections.length;
336
+
337
+ if (corrections.length > 0) {
338
+ console.error(`[MindMeld] Detected ${corrections.length} correction(s) in session`);
339
+ await sendCorrections(corrections, authToken, apiConfig);
340
+ console.error(`[MindMeld] Sent ${corrections.length} correction(s) to API`);
341
+ }
342
+ } catch (error) {
343
+ console.error('[MindMeld] Correction harvesting failed (non-fatal):', error.message);
344
+ }
345
+
346
+ // 8. Log results
278
347
  const elapsed = Date.now() - startTime;
279
348
  const summary = {
280
349
  patternsDetected: patterns.length,
@@ -282,6 +351,7 @@ async function harvestPatterns(sessionTranscript) {
282
351
  reinforced: validationResults.valid.length,
283
352
  promotionCandidates: candidates.length,
284
353
  plansHarvested: harvestedPlans.length,
354
+ correctionsDetected: correctionsDetected,
285
355
  plans: harvestedPlans,
286
356
  readmeStale: null,
287
357
  readmeUpdateRecommended: false,
@@ -436,10 +506,10 @@ function parsePlanFile(filename, content, stat) {
436
506
  sizeBytes: stat.size,
437
507
  lineCount: lines.length,
438
508
  sections: Object.keys(sections),
439
- context: sections['context'] || null,
509
+ context: scrubSecrets(sections['context'] || null),
440
510
  filesReferenced: fileRefs.slice(0, 20),
441
511
  projectHints: projectHints,
442
- content: content
512
+ content: scrubSecrets(content)
443
513
  };
444
514
  }
445
515
 
@@ -502,6 +572,109 @@ async function checkPromotionCandidates(mindmeld, validPatterns) {
502
572
  return candidates;
503
573
  }
504
574
 
575
+ /**
576
+ * Detect correction language patterns in conversation text.
577
+ * Scans for phrases indicating the user corrected the AI's approach.
578
+ * @param {string} conversationText - Raw conversation text to scan
579
+ * @returns {Array<{correction_text: string, context_before: string, context_after: string, pattern_matched: string}>}
580
+ */
581
+ function detectCorrections(conversationText) {
582
+ if (typeof conversationText !== 'string' || conversationText.length === 0) return [];
583
+
584
+ const correctionPatterns = [
585
+ /no,? don'?t/gi,
586
+ /that'?s wrong/gi,
587
+ /instead,? do/gi,
588
+ /not like that/gi,
589
+ /revert that/gi,
590
+ /undo that/gi,
591
+ /shouldn'?t have/gi,
592
+ /wrong approach/gi,
593
+ /bad pattern/gi,
594
+ /don'?t use/gi,
595
+ /never do that/gi,
596
+ /stop doing/gi,
597
+ ];
598
+
599
+ const corrections = [];
600
+
601
+ for (const pattern of correctionPatterns) {
602
+ let match;
603
+ while ((match = pattern.exec(conversationText)) !== null) {
604
+ const matchStart = match.index;
605
+ const matchEnd = matchStart + match[0].length;
606
+
607
+ const contextStart = Math.max(0, matchStart - 100);
608
+ const contextEnd = Math.min(conversationText.length, matchEnd + 100);
609
+
610
+ corrections.push({
611
+ correction_text: match[0],
612
+ context_before: conversationText.slice(contextStart, matchStart),
613
+ context_after: conversationText.slice(matchEnd, contextEnd),
614
+ pattern_matched: pattern.source,
615
+ });
616
+ }
617
+ }
618
+
619
+ return corrections;
620
+ }
621
+
622
+ /**
623
+ * Send detected corrections to the MindMeld API.
624
+ * @param {Array} corrections - Array of correction objects from detectCorrections
625
+ * @param {string} authToken - Auth token for API calls
626
+ * @param {{apiUrl: string}} apiConfig - API configuration
627
+ * @returns {Promise<void>}
628
+ */
629
+ async function sendCorrections(corrections, authToken, apiConfig) {
630
+ if (!corrections || corrections.length === 0) return;
631
+
632
+ const url = `${apiConfig.apiUrl}/corrections`;
633
+ const body = JSON.stringify({
634
+ corrections: scrubSecretsDeep(corrections),
635
+ source: 'hook-harvest',
636
+ });
637
+
638
+ const https = require('https');
639
+ const http = require('http');
640
+ const parsedUrl = new URL(url);
641
+ const transport = parsedUrl.protocol === 'https:' ? https : http;
642
+
643
+ const headers = {
644
+ 'Content-Type': 'application/json',
645
+ 'Content-Length': Buffer.byteLength(body),
646
+ };
647
+ if (authToken) {
648
+ headers['Authorization'] = `Bearer ${authToken}`;
649
+ }
650
+
651
+ return new Promise((resolve, reject) => {
652
+ const req = transport.request(
653
+ {
654
+ hostname: parsedUrl.hostname,
655
+ port: parsedUrl.port,
656
+ path: parsedUrl.pathname,
657
+ method: 'POST',
658
+ headers,
659
+ },
660
+ (res) => {
661
+ let data = '';
662
+ res.on('data', (chunk) => (data += chunk));
663
+ res.on('end', () => {
664
+ if (res.statusCode >= 200 && res.statusCode < 300) {
665
+ resolve(data);
666
+ } else {
667
+ reject(new Error(`Corrections API returned ${res.statusCode}: ${data}`));
668
+ }
669
+ });
670
+ }
671
+ );
672
+ req.on('error', reject);
673
+ req.write(body);
674
+ req.end();
675
+ });
676
+ }
677
+
505
678
  /**
506
679
  * Generate session ID using crypto for consistency
507
680
  */
@@ -650,20 +823,95 @@ async function generatePostCompactContext(summary, llmAnalysis) {
650
823
  return sections.join('\n');
651
824
  }
652
825
 
653
- // Execute if called directly
654
- if (require.main === module) {
655
- // Read session transcript from stdin or args
656
- const input = process.argv[2];
826
+ /**
827
+ * Read stdin with timeout.
828
+ * Claude Code hooks receive JSON input via stdin, not command-line arguments.
829
+ * @returns {Promise<string>} stdin content or empty string
830
+ */
831
+ function readStdin() {
832
+ return new Promise((resolve) => {
833
+ let data = '';
834
+ const timeout = setTimeout(() => resolve(data), 2000);
835
+
836
+ if (process.stdin.isTTY) {
837
+ clearTimeout(timeout);
838
+ resolve('');
839
+ return;
840
+ }
841
+
842
+ process.stdin.setEncoding('utf-8');
843
+ process.stdin.on('data', chunk => { data += chunk; });
844
+ process.stdin.on('end', () => {
845
+ clearTimeout(timeout);
846
+ resolve(data);
847
+ });
848
+ process.stdin.on('error', () => {
849
+ clearTimeout(timeout);
850
+ resolve('');
851
+ });
852
+ process.stdin.resume();
853
+ });
854
+ }
855
+
856
+ /**
857
+ * Read the session transcript from the best available source.
858
+ * Priority:
859
+ * 1. stdin JSON with transcript_path (Claude Code hooks protocol)
860
+ * 2. process.argv[2] (legacy / manual testing, limited by OS ARG_MAX)
861
+ * @returns {Promise<Object>} Parsed session transcript
862
+ */
863
+ async function readTranscriptInput() {
864
+ // 1. Try stdin — Claude Code hooks pass JSON with transcript_path
865
+ try {
866
+ const stdin = await readStdin();
867
+ if (stdin) {
868
+ const hookInput = JSON.parse(stdin);
869
+
870
+ // If we got a transcript_path, read the file (no ARG_MAX limit)
871
+ if (hookInput.transcript_path) {
872
+ console.error(`[MindMeld] Reading transcript from file: ${hookInput.transcript_path}`);
873
+ const content = await fs.readFile(hookInput.transcript_path, 'utf-8');
874
+ const transcript = parseSessionTranscript(content);
875
+ // Merge hook metadata (session_id, cwd) into transcript
876
+ if (hookInput.session_id && !transcript.sessionId) {
877
+ transcript.sessionId = hookInput.session_id;
878
+ }
879
+ return transcript;
880
+ }
881
+
882
+ // If stdin itself contains the transcript (inline JSON)
883
+ if (hookInput.transcript || hookInput.messages || hookInput.sessionId) {
884
+ return parseSessionTranscript(JSON.stringify(hookInput));
885
+ }
657
886
 
658
- if (!input) {
659
- console.error('[MindMeld] Usage: pre-compact.js <session-transcript-json>');
660
- process.exit(0);
887
+ // Stdin had JSON but no transcript — try parsing it as-is
888
+ return parseSessionTranscript(stdin);
889
+ }
890
+ } catch (e) {
891
+ console.error(`[MindMeld] stdin parse failed (falling back to argv): ${e.message}`);
892
+ }
893
+
894
+ // 2. Fallback: process.argv[2] (legacy, limited to ~1MB on macOS)
895
+ const argInput = process.argv[2];
896
+ if (argInput) {
897
+ console.error('[MindMeld] Reading transcript from argv (legacy mode)');
898
+ return parseSessionTranscript(argInput);
661
899
  }
662
900
 
663
- const sessionTranscript = parseSessionTranscript(input);
901
+ return null;
902
+ }
903
+
904
+ // Execute if called directly
905
+ if (require.main === module) {
906
+ readTranscriptInput()
907
+ .then(async (sessionTranscript) => {
908
+ if (!sessionTranscript) {
909
+ console.error('[MindMeld] No transcript input received (stdin or argv)');
910
+ process.exit(0);
911
+ }
912
+
913
+ const result = await harvestPatterns(sessionTranscript);
664
914
 
665
- harvestPatterns(sessionTranscript)
666
- .then(async (result) => {
667
915
  // Generate and output context summary for post-compaction injection
668
916
  const postCompactContext = await generatePostCompactContext(
669
917
  result,
@@ -680,4 +928,4 @@ if (require.main === module) {
680
928
  });
681
929
  }
682
930
 
683
- module.exports = { harvestPatterns, harvestPlans, parseSessionTranscript, generatePostCompactContext };
931
+ module.exports = { harvestPatterns, harvestPlans, parseSessionTranscript, generatePostCompactContext, scrubSecrets, scrubSecretsDeep, detectCorrections, sendCorrections, readStdin, readTranscriptInput };
@@ -5,16 +5,29 @@
5
5
  * Records session completion and outcomes when a Claude Code session ends.
6
6
  * Calls POST /api/sessions/end with session metadata.
7
7
  *
8
+ * When reason === "clear" (user cleared context to continue), also harvests
9
+ * patterns from the transcript before context is lost. This covers the gap
10
+ * where PreCompact doesn't fire on /clear — only on compaction.
11
+ *
8
12
  * Input (stdin JSON from Claude Code):
9
13
  * { session_id, transcript_path, cwd, reason, hook_event_name }
10
14
  *
11
- * @equilateral_ai/mindmeld v3.3.0
15
+ * @equilateral_ai/mindmeld v3.5.0
12
16
  */
13
17
 
14
18
  const path = require('path');
15
19
  const fs = require('fs').promises;
16
20
  const { execSync } = require('child_process');
17
21
 
22
+ // Import pattern harvesting from pre-compact hook
23
+ let harvestPatterns = null;
24
+ try {
25
+ const preCompact = require('./pre-compact');
26
+ harvestPatterns = preCompact.harvestPatterns;
27
+ } catch (error) {
28
+ // pre-compact module not available — pattern harvesting on clear will be skipped
29
+ }
30
+
18
31
  /**
19
32
  * Load auth token for API calls
20
33
  * Priority: env var → project credentials.json → global ~/.mindmeld/auth.json
@@ -166,9 +179,94 @@ function readStdin() {
166
179
  });
167
180
  }
168
181
 
182
+ /**
183
+ * Harvest patterns from transcript on clear events.
184
+ * Reads the JSONL transcript, extracts conversation text,
185
+ * and delegates to pre-compact's harvestPatterns.
186
+ *
187
+ * @param {string} transcriptPath - Path to the JSONL transcript file
188
+ * @param {string} sessionId - Current session ID
189
+ * @returns {Promise<Object|null>} Harvest results or null
190
+ */
191
+ async function harvestPatternsOnClear(transcriptPath, sessionId) {
192
+ try {
193
+ console.error('[MindMeld] Clear detected — harvesting patterns before context is lost');
194
+
195
+ // Read transcript JSONL (cap at 200KB to keep processing fast)
196
+ const stat = await fs.stat(transcriptPath);
197
+ let transcriptContent;
198
+
199
+ if (stat.size > 200 * 1024) {
200
+ // Read last 200KB — most recent context is most valuable
201
+ const fd = await fs.open(transcriptPath, 'r');
202
+ const buffer = Buffer.alloc(200 * 1024);
203
+ await fd.read(buffer, 0, buffer.length, stat.size - buffer.length);
204
+ await fd.close();
205
+ transcriptContent = buffer.toString('utf-8');
206
+ // Skip partial first line
207
+ const firstNewline = transcriptContent.indexOf('\n');
208
+ if (firstNewline > 0) {
209
+ transcriptContent = transcriptContent.substring(firstNewline + 1);
210
+ }
211
+ } else {
212
+ transcriptContent = await fs.readFile(transcriptPath, 'utf-8');
213
+ }
214
+
215
+ // Parse JSONL — extract assistant message text for pattern detection
216
+ const lines = transcriptContent.split('\n').filter(l => l.trim());
217
+ const textParts = [];
218
+
219
+ for (const line of lines) {
220
+ try {
221
+ const entry = JSON.parse(line);
222
+ // Claude Code transcript entries have varied formats
223
+ const content = entry.message?.content || entry.content;
224
+ if (!content) continue;
225
+
226
+ if (typeof content === 'string') {
227
+ textParts.push(content);
228
+ } else if (Array.isArray(content)) {
229
+ // Content blocks — extract text blocks
230
+ for (const block of content) {
231
+ if (block.type === 'text' && block.text) {
232
+ textParts.push(block.text);
233
+ }
234
+ }
235
+ }
236
+ } catch (e) {
237
+ // Skip unparseable lines
238
+ }
239
+ }
240
+
241
+ const transcriptText = textParts.join('\n\n');
242
+
243
+ if (transcriptText.length < 100) {
244
+ console.error('[MindMeld] Transcript too short for pattern detection, skipping');
245
+ return null;
246
+ }
247
+
248
+ // Delegate to pre-compact's full harvesting pipeline
249
+ const result = await harvestPatterns({
250
+ sessionId: sessionId,
251
+ userId: process.env.USER || 'unknown',
252
+ transcript: transcriptText
253
+ });
254
+
255
+ console.error(`[MindMeld] Clear-event harvest: ${result.patternsDetected || 0} patterns, ` +
256
+ `${result.violations || 0} violations, ${result.reinforced || 0} reinforced` +
257
+ (result.plansHarvested ? `, ${result.plansHarvested} plans` : ''));
258
+
259
+ return result;
260
+ } catch (error) {
261
+ console.error(`[MindMeld] Clear-event harvest failed (non-fatal): ${error.message}`);
262
+ return null;
263
+ }
264
+ }
265
+
169
266
  /**
170
267
  * Main hook execution
171
268
  * Records session end via API call (fire-and-forget)
269
+ * On clear events, also harvests patterns from the transcript
172
270
  */
173
271
  async function recordSessionEnd() {
174
272
  const startTime = Date.now();
@@ -217,7 +315,7 @@ async function recordSessionEnd() {
217
315
  git_branch: gitBranch,
218
316
  session_data: {
219
317
  end_reason: reason,
220
- hook_version: '3.3.0'
318
+ hook_version: '3.5.0'
221
319
  }
222
320
  };
223
321
 
@@ -238,7 +336,7 @@ async function recordSessionEnd() {
238
336
  timeout: 3000
239
337
  };
240
338
 
241
- await new Promise((resolve) => {
339
+ const metadataPromise = new Promise((resolve) => {
242
340
  const req = http.request(options, (res) => {
243
341
  let body = '';
244
342
  res.on('data', (chunk) => { body += chunk; });
@@ -268,10 +366,21 @@ async function recordSessionEnd() {
268
366
  req.end();
269
367
  });
270
368
 
369
+ // Pattern harvesting on clear — PreCompact doesn't fire on /clear,
370
+ // so we harvest here before context is lost
371
+ let harvestResult = null;
372
+ const harvestPromise = (reason === 'clear' && transcriptPath && harvestPatterns)
373
+ ? harvestPatternsOnClear(transcriptPath, sessionId)
374
+ : Promise.resolve(null);
375
+
376
+ // Run metadata recording and pattern harvesting in parallel
377
+ [, harvestResult] = await Promise.all([metadataPromise, harvestPromise]);
378
+
271
379
  return {
272
380
  sessionId,
273
381
  reason,
274
382
  duration,
383
+ harvest: harvestResult,
275
384
  elapsed: Date.now() - startTime
276
385
  };
277
386