grov 0.5.8 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,7 +44,7 @@ ${firstPrompt.substring(0, 2000)}
44
44
 
45
45
  Extract as JSON:
46
46
  {
47
- "goal": "The main objective in 1-2 sentences",
47
+ "goal": "A single, high-density sentence describing the technical intent. RULES: 1. No bullet points, no newlines. 2. Must include the main Technology Name (e.g. 'Prometheus', 'React', 'AWS') if inferred. 3. If the user provided a list, synthesize it into one summary statement. Example: 'Implement Prometheus metrics collection with counter and gauge primitives' instead of 'Add metrics: - counters - gauges'.",
48
48
  "expected_scope": ["list", "of", "files/folders", "likely", "to", "be", "modified"],
49
49
  "constraints": ["EXPLICIT restrictions from the user - see examples below"],
50
50
  "success_criteria": ["How to know when the task is complete"],
@@ -103,7 +103,7 @@ RESPONSE RULES:
103
103
  }
104
104
  const parsed = JSON.parse(jsonMatch[0]);
105
105
  return {
106
- goal: typeof parsed.goal === 'string' ? parsed.goal : firstPrompt.substring(0, 200),
106
+ goal: typeof parsed.goal === 'string' ? parsed.goal : '', // Don't fallback to prompt
107
107
  expected_scope: Array.isArray(parsed.expected_scope)
108
108
  ? parsed.expected_scope.filter((s) => typeof s === 'string')
109
109
  : [],
@@ -139,7 +139,7 @@ function createFallbackIntent(prompt) {
139
139
  // Extract file patterns
140
140
  const filePatterns = prompt.match(/[\w\/.-]+\.(ts|js|tsx|jsx|py|go|rs|java|css|html|md)/g) || [];
141
141
  return {
142
- goal: prompt.substring(0, 200),
142
+ goal: '', // Empty - don't copy user prompt as goal; goal should be synthesized only
143
143
  expected_scope: [...new Set(filePatterns)].slice(0, 5),
144
144
  constraints: [],
145
145
  success_criteria: [],
@@ -311,6 +311,7 @@ Return a JSON object with these fields:
311
311
  - task_type: one of "information", "planning", or "implementation"
312
312
  - action: one of "continue", "task_complete", "new_task", or "subtask_complete"
313
313
  - task_id: existing session_id "${currentSession?.session_id || 'NEW'}" or "NEW" for new task
314
+ - current_goal: "SYNTHESIZE a concise goal (max 150 chars). RULES: 1. If original_goal is empty, SYNTHESIZE from user messages. 2. DO NOT copy the user's request verbatim - summarize it. 3. Start with Technology/Component name. 4. One sentence, no newlines. Example: 'TypeScript Logger with level filtering and JSON output' NOT 'Create a structured logger in /home/... with debug, info...'"
314
315
  - reasoning: brief explanation of why you made this decision${compressionInstruction}
315
316
  </output>
316
317
 
@@ -320,28 +321,49 @@ First, analyze the original_goal to understand what kind of task this is. Do not
320
321
  TYPE A - Information Request
321
322
  The user wants to learn or understand something. They are seeking knowledge, not asking for any changes or decisions to be made. The answer itself is what they need.
322
323
 
323
- Think about whether the user is curious about how something works, wants an explanation of a concept, or is asking for clarification about existing behavior.
324
+ This INCLUDES clarifying questions about what the assistant already explained:
325
+ - Asking for confirmation: "Are you sure about X?"
326
+ - Asking for clarification: "Did you mean Y?"
327
+ - Checking understanding: "Does this also apply to Z?"
324
328
 
325
- Examples of information requests in different phrasings:
329
+ These questions REFERENCE the previous response and seek clarification, not new decisions.
330
+
331
+ Think about whether the user is curious about how something works, wants an explanation of a concept, or is asking about something the assistant already said.
332
+
333
+ Examples of information requests:
326
334
  - "How does the authentication system work?"
327
335
  - "Explica-mi cum functioneaza cache-ul"
328
336
  - "What is the difference between Redis and Memcached?"
329
337
  - "Can you walk me through the payment flow?"
330
338
  - "I don't understand why this function returns null"
331
339
  - "Ce face acest cod?"
340
+ - "Are you sure this method works for async calls?" (asking about previous explanation)
341
+ - "When you said RAM storage, did you mean on the user's machine?" (clarifying what was said)
342
+ - "Does this approach also handle edge cases?" (checking understanding)
332
343
 
333
344
  TYPE B - Planning or Decision Request
334
- The user wants to figure out the best approach before taking action. They need to make a decision or create a plan. The conversation may involve exploring options, discussing tradeoffs, or clarifying requirements.
345
+ The user is asking the assistant to HELP THEM CHOOSE between options. The decision does NOT exist yet - they are deciding now. The user introduces alternatives and wants a recommendation or to weigh tradeoffs together.
346
+
347
+ Think about whether the user is introducing new options to choose between, wants recommendations for how to build something, or is working toward a plan they will implement later.
348
+
349
+ KEY DISTINCTION from Information:
350
+ - Planning: User introduces options to choose between → "Should we use X or Y?"
351
+ - Information: User asks about what assistant already said → "You mentioned X, are you sure?"
352
+
353
+ If the assistant ALREADY explained or decided something, and the user is asking about THAT explanation, it is Information, not Planning.
335
354
 
336
- Think about whether the user is trying to decide between approaches, wants recommendations for how to build something, or is working toward a plan they will implement later.
355
+ Examples of planning requests:
356
+ - "How should we implement user authentication?" (no decision made yet)
357
+ - "What's the best way to handle caching for this API?" (asking for recommendation)
358
+ - "Cum ar trebui sa structuram baza de date?" (exploring options)
359
+ - "I'm thinking about using Redis vs Memcached, what do you recommend?" (user introduces options)
360
+ - "Let's figure out the architecture before we start coding" (planning session)
361
+ - "We need to decide on the approach for handling errors" (decision needed)
337
362
 
338
- Examples of planning requests in different phrasings:
339
- - "How should we implement user authentication?"
340
- - "What's the best way to handle caching for this API?"
341
- - "Cum ar trebui sa structuram baza de date?"
342
- - "I'm thinking about using Redis vs Memcached, what do you recommend?"
343
- - "Let's figure out the architecture before we start coding"
344
- - "We need to decide on the approach for handling errors"
363
+ NOT planning (these are Information):
364
+ - "Are you sure Redis is the right choice?" (asking about previous recommendation)
365
+ - "Did you mean async or sync?" (clarifying what was said)
366
+ - "Will this also work for the edge cases we discussed?" (checking understanding)
345
367
 
346
368
  TYPE C - Implementation Request
347
369
  The user wants actual changes made. They want code written, files edited, commands run, or something built. The task involves using tools to modify the codebase.
@@ -438,6 +460,15 @@ Reason: The question is about how to implement the original request.
438
460
  Example situation: Original goal was "explain how auth works", user asks "and how does the session storage work?"
439
461
  Decision: new_task
440
462
  Reason: This is a new information request, separate from the first.
463
+
464
+ CRITICAL - NEW TASK COMPLETED IN SAME TURN:
465
+ If the user's message starts a NEW task (different topic from original_goal) AND the assistant's response COMPLETES that new task in the same turn, use task_complete (NOT new_task).
466
+
467
+ Example: Original goal was "implement cache service", user now asks "build an EventEmitter class", assistant writes the complete EventEmitter code.
468
+ Decision: task_complete
469
+ Reason: The new task was requested AND completed. Use task_complete so it gets saved with the new goal.
470
+
471
+ The key insight: task_complete saves the memory. If you return new_task, the work won't be saved until a FUTURE completion. If Claude already finished the work, use task_complete.
441
472
  </step_3_detect_new_task>
442
473
 
443
474
  <important_notes>
@@ -480,16 +511,21 @@ RESPONSE RULES:
480
511
  if (!needsCompression && assistantResponse.length > 0) {
481
512
  analysis.step_reasoning = assistantResponse.substring(0, 1000);
482
513
  }
483
- debugLLM('analyzeTaskContext', `Result: task_type=${analysis.task_type}, action=${analysis.action}, reasoning="${analysis.reasoning?.substring(0, 150) || 'none'}"`);
514
+ debugLLM('analyzeTaskContext', `Result: task_type=${analysis.task_type}, action=${analysis.action}, goal="${analysis.current_goal?.substring(0, 50) || 'N/A'}" reasoning="${analysis.reasoning?.substring(0, 150) || 'none'}"`);
484
515
  return analysis;
485
516
  }
486
517
  catch (parseError) {
487
518
  debugLLM('analyzeTaskContext', `Parse error: ${String(parseError)}, using fallback`);
488
519
  // Fallback: continue existing session or create new
520
+ // Use existing goal if available, otherwise leave empty (don't copy user prompt)
521
+ const fallbackGoal = currentSession?.original_goal && currentSession.original_goal.length > 0
522
+ ? currentSession.original_goal
523
+ : ''; // Don't synthesize from user message - leave empty
489
524
  return {
490
525
  task_type: 'implementation',
491
526
  action: currentSession ? 'continue' : 'new_task',
492
527
  task_id: currentSession?.session_id || 'NEW',
528
+ current_goal: fallbackGoal,
493
529
  reasoning: 'Fallback due to parse error',
494
530
  step_reasoning: assistantResponse.substring(0, 1000),
495
531
  };
@@ -511,7 +547,7 @@ export function isReasoningExtractionAvailable() {
511
547
  export async function extractReasoningAndDecisions(formattedSteps, originalGoal) {
512
548
  const client = getAnthropicClient();
513
549
  if (formattedSteps.length < 50) {
514
- return { reasoning_trace: [], decisions: [] };
550
+ return { system_name: null, summary: null, reasoning_trace: [], decisions: [] };
515
551
  }
516
552
  const prompt = `<role>
517
553
  You are a Knowledge Engineer specialized in extracting reusable team knowledge from coding sessions.
@@ -533,114 +569,333 @@ ${formattedSteps.substring(0, 8000)}
533
569
 
534
570
  <instructions>
535
571
 
536
- We need TWO types of knowledge extracted:
572
+ *** GLOBAL STANDARDS FOR CODE REFERENCES ***
537
573
 
538
- TYPE A: CONCLUSIONS (Factual findings from the session)
574
+ We want "Code Anchors" (Searchable Names), NOT "Implementation Logic" (Syntax).
539
575
 
540
- What this means:
541
- These are FACTS discovered during the session. Things that were explicitly found, read, or confirmed in the code. A new developer reading these should immediately know WHERE to find things and WHAT values/patterns exist.
576
+ 1. NO SYNTAX / NO LOGIC:
577
+ - STRICTLY FORBIDDEN: \`if\`, \`for\`, \`while\`, \`=>\`, \`return\`, \`{ }\`, \`;\`.
578
+ - NEVER write snippet-style logic.
579
+ - BAD: "Uses \`user.id ? save() : null\` to persist." (This is logic)
580
+ - GOOD: "Uses \`save()\` method on \`User\` entity." (This is a named reference)
581
+
582
+ 2. USE "NAMED ENTITIES" ONLY:
583
+ - Treat code references as Proper Nouns (Substantive Proprii).
584
+ - Only reference Names of: Functions, Classes, File Paths, Constants, Env Vars, Config Keys.
585
+ - Format: Wrap them in single backticks (e.g., \`auth.ts\`, \`MAX_RETRIES\`).
586
+
587
+ 3. BE CONCISE:
588
+ - Do not paste long paths if not necessary. Use relative paths.
589
+ - BAD: \`src/features/users/controllers/auth.controller.ts\` (Too noisy)
590
+ - GOOD: \`auth.controller.ts\` (Sufficient anchor)
591
+
592
+ 4. FACTUAL EXTRACTION (CRITICAL FOR Q&A SESSIONS):
593
+
594
+ PURPOSE: All extracted knowledge must be FACTUAL STATEMENTS about the codebase,
595
+ NOT descriptions of the conversation or session.
596
+
597
+ WHY THIS MATTERS:
598
+ - Chunks are stored as embeddings for semantic search
599
+ - "User asked about caching" has DIFFERENT embedding than "Cache uses LRU eviction"
600
+ - If we store meta-descriptions, future searches will NOT match
601
+ - We need UNIFORM factual statements regardless of session type (Q&A or implementation)
602
+
603
+ FORBIDDEN PATTERNS (NEVER USE THESE):
604
+
605
+ BANNED PHRASES - do NOT start sentences with:
606
+ - "User asked...", "Explained that...", "Discussed..."
607
+ - "The conversation...", "This session...", "It was determined..."
608
+ - "We talked about...", "Question about...", "Answered...", "Covered...", "Explored..."
609
+
610
+ BANNED META-WORDS anywhere in text:
611
+ - "session", "conversation", "discussion", "chat"
612
+ - "user", "developer", "team" (when used as actors doing things)
613
+ - "asked", "explained", "answered", "clarified"
614
+ - "this memory", "this task", "this query"
615
+
616
+ TRANSFORMATION EXAMPLES:
617
+
618
+ Example 1 - Authentication Q&A:
619
+ Session context: User asked "How does auth work?" then explained JWT flow
620
+ BAD: "User asked about authentication. Explained it uses JWT tokens."
621
+ BAD: "Discussion covered the auth mechanism and its JWT implementation."
622
+ GOOD: "Authentication uses JWT tokens with 24h expiry for stateless verification."
542
623
 
543
- Must include:
544
- - Specific file paths (not just "auth files" but "src/lib/jwt.ts")
545
- - Specific values (not just "short expiry" but "1 hour access, 7 day refresh")
546
- - Specific patterns (not just "uses JWT" but "JWT with sub, email, type, teams payload")
547
- - Specific functions/classes (not just "middleware" but "requireAuth, optionalAuth preHandlers")
624
+ Example 2 - Caching explanation:
625
+ Session context: User asked "Why use Map for LRU?" then explained insertion order
626
+ BAD: "Explained why JavaScript Map was chosen for the LRU cache."
627
+ BAD: "Answered question about Map's insertion order property."
628
+ GOOD: "LRU Cache uses JavaScript Map because Map guarantees insertion order."
548
629
 
549
- Format: Start with "CONCLUSION: " prefix
630
+ Example 3 - Architecture discussion:
631
+ Session context: User asked "What pattern does Circuit Breaker use?"
632
+ BAD: "Discussed the Circuit Breaker pattern and its three states."
633
+ GOOD: "Circuit Breaker implements finite state machine with CLOSED, OPEN, HALF_OPEN states."
550
634
 
551
- Good examples:
552
- - "CONCLUSION: JWT tokens stored in ~/.grov/credentials.json with 1hr access/7d refresh expiry"
553
- - "CONCLUSION: Auth middleware in src/routes/auth.ts exports requireAuth and optionalAuth preHandlers"
554
- - "CONCLUSION: Device flow polling interval is 5 seconds, endpoint /auth/device/poll"
635
+ MENTAL MODEL FOR EXTRACTION:
555
636
 
556
- Bad examples:
557
- - "CONCLUSION: Found authentication files" (too vague, no paths)
558
- - "CONCLUSION: JWT is used for auth" (too generic, no specifics)
559
- - "CONCLUSION: Explored the codebase" (process description, not finding)
637
+ Ask yourself: "If I remove all context about WHO asked and WHEN, what FACTUAL
638
+ KNOWLEDGE about the CODE remains?"
560
639
 
640
+ Transform pattern:
641
+ 1. Identify the SUBJECT (component, function, pattern)
642
+ 2. Identify the BEHAVIOR (what it does, how it works)
643
+ 3. Identify the IMPLEMENTATION (specific details, values, files)
644
+ 4. Write: "[SUBJECT] [BEHAVIOR] [IMPLEMENTATION]"
561
645
 
562
- TYPE B: INSIGHTS (Your analysis and inferences)
646
+ Result pattern: "[Component] uses/implements/provides [mechanism] for/via/with [details]"
647
+
648
+ Examples of correct factual statements:
649
+ - "Retry Queue uses exponential backoff with 1s base delay"
650
+ - "EventEmitter implements copy-before-iterate for safe listener removal"
651
+ - "LRU Cache provides O(1) eviction via Map insertion order property"
652
+
653
+ SELF-CHECK BEFORE OUTPUT:
654
+
655
+ Before returning, verify EACH knowledge_pair and decision:
656
+ - Does it start with a code component/system, NOT "User/Explained/Discussed"?
657
+ - Is it a factual statement about code, NOT about the conversation?
658
+ - Could this exact sentence appear in technical documentation?
659
+ - If someone searches for this topic, would this sentence match?
660
+
661
+ If ANY answer is NO, rewrite it as a factual statement.
662
+
663
+ TYPE A: CONCLUSIONS (Factual Findings)
563
664
 
564
665
  What this means:
565
- These are YOUR observations that go BEYOND what was explicitly stated. Connections between different parts, patterns you identified, implications for future work. This is where YOU add value beyond just summarizing.
666
+ Facts explicitly found in the code.
667
+ CRITICAL GOAL: Eliminate vagueness. Replace generic descriptions with specific "Named Entities" defined above.
566
668
 
567
- Types of insights we value:
669
+ Rules for High-Quality Conclusions:
568
670
 
569
- 1. CONNECTIONS - How do different files/modules relate?
570
- Example: "jwt.ts handles token creation, credentials.ts handles storage - separation of crypto operations from I/O"
671
+ 1. The "Who" Rule:
672
+ - NEVER start with "The system", "The function", or "We found".
673
+ - Start with the specific Component/Class/File Name.
674
+ - BAD: "The function calculates the hash."
675
+ - GOOD: "\`FileScanner\` calculates \`SHA-256\` hash."
571
676
 
572
- 2. INFERENCES - What decisions were made implicitly?
573
- Example: "File storage in ~/.grov/ instead of env vars - implies single-user CLI design, not multi-tenant"
677
+ 2. The "Value" Rule:
678
+ - Do not use adjectives like "short", "large", "standard". Use the actual values found.
679
+ - BAD: "Sets a short timeout."
680
+ - GOOD: "Sets \`connectionTimeout\` to \`500ms\`."
574
681
 
575
- 3. PATTERNS - What architectural patterns emerge?
576
- Example: "All config files use 0600 permissions - security-conscious design for sensitive data"
682
+ 3. The "Location" Rule:
683
+ - Always mention WHERE this happens (File or Module).
684
+ - BAD: "Validates the token."
685
+ - GOOD: "Validates \`jwt_token\` inside \`auth.middleware.ts\`."
577
686
 
578
- 4. IMPLICATIONS - What does this mean for future development?
579
- Example: "1hr token expiry requires background refresh mechanism for long operations to avoid mid-task auth failures"
687
+ Format Pattern:
688
+ "CONCLUSION: [Code Anchor Subject] performs [Action] using [Code Anchor Object/Value]"
580
689
 
581
- Format: Start with "INSIGHT: " prefix
690
+ Examples:
691
+ - "CONCLUSION: \`JwtService\` in \`jwt.ts\` signs tokens with \`RS256\` algorithm, \`1hr\` expiry"
692
+ - "CONCLUSION: \`requireAuth\` preHandler in \`auth.ts\` validates \`Authorization\` header"
693
+ - "CONCLUSION: \`CredentialStore\` writes to \`~/.grov/credentials.json\` with \`0600\` permissions"
694
+
695
+ ═══════════════════════════════════════════════════════════════
696
+
697
+ TYPE B: INSIGHTS (Architectural Analysis)
698
+
699
+ What this means:
700
+ The architectural "Why" behind the code.
701
+ CRITICAL GOAL: Connect the code to a Computer Science Concept or Business Outcome.
582
702
 
583
- Good examples:
584
- - "INSIGHT: Dual-file pattern (jwt.ts + credentials.ts) separates crypto from I/O, reducing attack surface"
585
- - "INSIGHT: Device Authorization Flow chosen over password flow - enables OAuth providers without storing secrets in CLI"
586
- - "INSIGHT: Teams array cached in JWT payload - avoids DB query per request but requires token refresh on team changes"
703
+ Rules for High-Quality Insights:
587
704
 
588
- Bad examples:
589
- - "INSIGHT: The code is well organized" (subjective, not actionable)
590
- - "INSIGHT: Authentication is important" (obvious, no value)
591
- - "INSIGHT: Files were read" (process description, not insight)
705
+ 1. Name the Pattern/Trade-off:
706
+ - Use standard terminology: "Singleton", "Lazy Loading", "Race Condition", "O(N) Complexity", "Dependency Injection", "Circuit Breaker".
707
+ - BAD: "This is good for organizing code."
708
+ - GOOD: "Implements \`Dependency Injection\` to decouple storage logic."
709
+
710
+ 2. Explain the "Hard" Consequence:
711
+ - Focus on: Memory, CPU, Latency, Security, Consistency, Disk I/O.
712
+ - BAD: "It makes it faster."
713
+ - GOOD: "Reduces I/O operations by caching \`scan_result\` in memory."
714
+
715
+ Format Pattern:
716
+ "INSIGHT: Implements [Pattern Name] to optimize [Resource/Outcome] by [Specific Mechanism]"
717
+
718
+ Examples:
719
+ - "INSIGHT: \`timingSafeEqual\` prevents timing attacks - constant-time comparison regardless of input"
720
+ - "INSIGHT: Lazy expiration pattern in \`MemoryCache\` - trades read latency for no timer overhead"
721
+ - "INSIGHT: JWT payload caches \`teams[]\` - avoids DB query per request, requires refresh on team change"
592
722
 
593
723
  </instructions>
594
724
 
725
+ <summary_rules>
726
+ ═══════════════════════════════════════════════════════════════
727
+ SUMMARY GENERATION - CRITICAL FOR SEMANTIC SEARCH
728
+ ═══════════════════════════════════════════════════════════════
729
+
730
+ FRONT-LOADING RULE:
731
+ First 7-8 words determine 80% of search match quality.
732
+ Start DIRECTLY with the main technology or system name, then immediately follow with what was done in a few key words.
733
+
734
+ WRONG: "In this session we implemented a metrics system..."
735
+ WRONG: "This memory contains information about..."
736
+ WRONG: "Discussion about implementing..."
737
+ RIGHT: "Prometheus Metrics System with Counter, Gauge, Histogram primitives..."
738
+ RIGHT: "Event Bus pub/sub with wildcard subscriptions and circular buffer..."
739
+ RIGHT: "Redis caching layer with TTL expiration and LRU eviction..."
740
+
741
+ CONTENT RULES:
742
+ 1. Lead with technology/system name (Prometheus, Redis, Event Bus, AWS S3)
743
+ 2. Include 2-3 key technical terms that users would search for
744
+ 3. NO meta-language: ban "this memory", "discussion about", "implemented", "session"
745
+ 4. NO file paths (save those for conclusions)
746
+ 5. Describe WHAT it is, not WHAT was done
747
+
748
+ LENGTH: 150-200 characters MAXIMUM. Dense, not verbose.
749
+ </summary_rules>
750
+
595
751
  <output_format>
596
752
  Return a JSON object with this structure:
597
753
 
598
754
  {
755
+ "system_name": "[MANDATORY - see SYSTEM_NAME RULES below]",
756
+ "summary": "[150-200 chars MAX - MUST follow SUMMARY RULES above]",
599
757
  "knowledge_pairs": [
600
758
  {
759
+ "aspect": "[Specific component within system - see ASPECT RULES below]",
601
760
  "conclusion": "CONCLUSION: [specific factual finding with file paths and values]",
602
761
  "insight": "INSIGHT: [inference or implication RELATED to this conclusion]"
603
- },
604
- {
605
- "conclusion": "CONCLUSION: [another specific finding]",
606
- "insight": "INSIGHT: [what this means for future development]"
607
762
  }
608
763
  ],
609
764
  "decisions": [
610
765
  {
611
- "choice": "[What was chosen - be specific]",
612
- "reason": "[Why - include whether this is factual or inferred]"
766
+ "aspect": "[Specific component this decision is about]",
767
+ "choice": "[What was chosen - be specific. Max 100 chars]",
768
+ "reason": "[Why - include whether this is factual or inferred. Max 150 chars]"
613
769
  }
614
770
  ]
615
771
  }
616
772
 
773
+ ═══════════════════════════════════════════════════════════════
774
+ SYSTEM_NAME RULES (MANDATORY - TOP LEVEL FIELD)
775
+ ═══════════════════════════════════════════════════════════════
776
+
777
+ PURPOSE: This is the "parent anchor" that connects all knowledge and decisions to the same system. It will be prepended to EVERY chunk for semantic search.
778
+
779
+ WHAT TO PUT: The main system, component, or feature being discussed in this task. Extract it from the PROJECT GOAL - ask yourself "What is being built/analyzed/debugged?"
780
+
781
+ HOW TO IDENTIFY:
782
+ - Look at the goal/query - what noun represents the main subject?
783
+ - It should be a PROPER NOUN (specific name), not a generic term
784
+ - If goal is "Build a retry queue with exponential backoff" then system_name is "Retry Queue"
785
+ - If goal is "Fix authentication bug in login flow" then system_name is "Auth Module" or "Login Flow"
786
+ - If goal is "Optimize database queries for user search" then system_name is "User Search" or "Search Query Optimizer"
787
+
788
+ GOOD EXAMPLES:
789
+ - "Retry Queue" (specific component)
790
+ - "Webhook Delivery System" (specific feature)
791
+ - "Rate Limiter" (specific utility)
792
+ - "JWT Authentication" (specific mechanism)
793
+ - "Memory Cache" (specific component)
794
+ - "File Scanner" (specific service)
795
+
796
+ BAD EXAMPLES:
797
+ - "System" (too generic)
798
+ - "Code" (meaningless)
799
+ - "Implementation" (not a noun/component)
800
+ - "Backend" (too broad)
801
+ - "Feature" (not specific)
802
+ - "The function" (not a name)
803
+
804
+ RULE: If a user searches "How does [system_name] work?", this field should make that search find ALL chunks from this memory.
805
+
806
+ ═══════════════════════════════════════════════════════════════
807
+ ASPECT RULES (per knowledge_pair and decision)
808
+ ═══════════════════════════════════════════════════════════════
809
+
810
+ PURPOSE: The specific component, pattern, or topic WITHIN the system_name that THIS PARTICULAR entry discusses. More granular than system_name.
811
+
812
+ WHAT TO PUT: The specific part of the system this knowledge/decision is about. Ask yourself "What specific aspect of [system_name] does this entry cover?"
813
+
814
+ RELATIONSHIP TO system_name:
815
+ - system_name = "Retry Queue" (the whole system)
816
+ - aspect = "Job State Model" (one specific part)
817
+ - aspect = "Backoff Strategy" (another specific part)
818
+ - aspect = "Failed Job Recovery" (another specific part)
819
+
820
+ HOW TO IDENTIFY:
821
+ - What sub-component or pattern does this entry describe?
822
+ - What would you title this paragraph if it were a section header?
823
+ - It should be MORE SPECIFIC than system_name
824
+
825
+ GOOD EXAMPLES (for system_name = "Retry Queue"):
826
+ - "Job State Model" (how jobs are stored)
827
+ - "Backoff Strategy" (how delays work)
828
+ - "Failed Job Recovery" (how failures are handled)
829
+ - "Queue Statistics" (how stats are exposed)
830
+
831
+ GOOD EXAMPLES (for system_name = "Webhook Delivery"):
832
+ - "Signature Verification" (security aspect)
833
+ - "Retry Logic" (reliability aspect)
834
+ - "Payload Serialization" (data format aspect)
835
+ - "Timeout Handling" (error handling aspect)
836
+
837
+ BAD EXAMPLES:
838
+ - Same as system_name (redundant - don't repeat parent)
839
+ - "Implementation" (not specific)
840
+ - "Code" (meaningless)
841
+ - "Logic" (too vague)
842
+
843
+ ═══════════════════════════════════════════════════════════════
844
+
617
845
  IMPORTANT: Generate knowledge as PAIRS where each INSIGHT is directly related to its CONCLUSION.
618
846
 
619
- Example pair:
847
+ Example with system_name and aspect:
620
848
  {
621
- "conclusion": "CONCLUSION: MemoryCache uses lazy expiration - entries checked/deleted on get(), not via timers",
622
- "insight": "INSIGHT: Lazy expiration avoids timer overhead that would accumulate with large caches - trades CPU on read for memory efficiency"
849
+ "system_name": "Memory Cache",
850
+ "knowledge_pairs": [
851
+ {
852
+ "aspect": "Expiration Strategy",
853
+ "conclusion": "CONCLUSION: Uses lazy expiration - entries checked/deleted on get(), not via timers",
854
+ "insight": "INSIGHT: Lazy expiration avoids timer overhead - trades CPU on read for memory efficiency"
855
+ }
856
+ ]
623
857
  }
624
858
 
625
859
  Rules:
626
- 1. Each pair MUST have a conclusion AND a related insight
627
- 2. The insight MUST add value beyond the conclusion (inference, implication, pattern)
628
- 3. Max 5 pairs (10 entries total) - prioritize most valuable
629
- 4. Max 5 decisions - only significant architectural choices
630
- 5. If you cannot find a meaningful insight for a conclusion, still include the conclusion with insight: null
631
- 6. NEVER include process descriptions ("explored", "searched", "looked at")
632
- 7. English only, no emojis
633
- 8. Use prefixes "CONCLUSION: " and "INSIGHT: " in the strings
860
+ 1. system_name is MANDATORY - identifies the parent system for ALL entries
861
+ 2. Each pair MUST have aspect, conclusion AND a related insight
862
+ 3. aspect should be MORE SPECIFIC than system_name (not the same)
863
+ 4. The insight MUST add value beyond the conclusion (inference, implication, pattern)
864
+ 5. DO NOT repeat system_name in conclusion/insight
865
+ 6. Max 5 pairs - prioritize most valuable
866
+ 7. Max 5 decisions - only significant architectural choices
867
+ 8. If you cannot find a meaningful insight for a conclusion, still include with insight: null
868
+ 9. NEVER include process descriptions ("explored", "searched", "looked at")
869
+ 10. English only, no emojis
870
+ 11. Use prefixes "CONCLUSION: " and "INSIGHT: " in the strings
871
+
872
+ CHARACTER LIMITS (strict - for embedding optimization):
873
+ - system_name: 2-5 words (e.g. "Retry Queue", "Webhook Delivery System")
874
+ - summary: 150-200 characters MAX (front-loaded with tech name, NO meta-language)
875
+ - Each aspect: 2-4 words (e.g. "Job State Model", "Backoff Strategy")
876
+ - Each conclusion: max 150 characters (including "CONCLUSION: " prefix)
877
+ - Each insight: max 150 characters (including "INSIGHT: " prefix)
878
+ - Each decision aspect: 2-4 words
879
+ - Each decision choice: max 100 characters
880
+ - Each decision reason: max 150 characters
881
+ If content exceeds limit, prioritize SPECIFICITY over completeness.
882
+ Truncate gracefully - never cut mid-word or mid-path.
634
883
  </output_format>
635
884
 
636
885
  <validation>
637
886
  Before responding, verify:
887
+ - Did I include a system_name that identifies the PARENT system?
888
+ - Is system_name a specific proper noun, NOT generic like "System" or "Code"?
889
+ - Is the summary 150-200 chars, front-loaded with technology name, NO meta-language?
890
+ - Does each knowledge_pair include an 'aspect' field MORE SPECIFIC than system_name?
638
891
  - Does each CONCLUSION contain a specific file path or value?
639
892
  - Is each INSIGHT directly related to its paired CONCLUSION?
640
893
  - Does each INSIGHT add something NOT explicitly in the input?
641
894
  - Would a new developer find the pairs useful without seeing the original session?
642
895
  - Did I avoid process descriptions?
643
896
  - Are the decisions about significant architectural choices?
897
+ - Does each decision include a specific 'aspect' field?
898
+ - Are ALL entries within character limits?
644
899
  </validation>
645
900
 
646
901
  Return ONLY valid JSON, no markdown code blocks, no explanation.`;
@@ -654,8 +909,8 @@ Return ONLY valid JSON, no markdown code blocks, no explanation.`;
654
909
  const text = response.content[0].type === 'text' ? response.content[0].text : '';
655
910
  const jsonMatch = text.match(/\{[\s\S]*\}/);
656
911
  if (!jsonMatch) {
657
- debugLLM('extractReasoningAndDecisions', 'No JSON found in response');
658
- return { reasoning_trace: [], decisions: [] };
912
+ console.error('[LLM-EXTRACTOR] No JSON in response');
913
+ return { system_name: null, summary: null, reasoning_trace: [], decisions: [] };
659
914
  }
660
915
  // Try to parse JSON, with repair attempts for common Haiku formatting issues
661
916
  let result;
@@ -673,49 +928,584 @@ Return ONLY valid JSON, no markdown code blocks, no explanation.`;
673
928
  try {
674
929
  result = JSON.parse(repaired);
675
930
  }
676
- catch {
677
- // Last resort: try to extract just knowledge_pairs array
931
+ catch (repairError) {
932
+ // Last resort: try to extract individual fields
678
933
  const pairsMatch = jsonMatch[0].match(/"knowledge_pairs"\s*:\s*\[([\s\S]*?)\]/);
679
934
  if (pairsMatch) {
680
935
  try {
681
936
  const pairs = JSON.parse(`[${pairsMatch[1].replace(/,\s*$/, '')}]`);
682
- result = { knowledge_pairs: pairs, decisions: [] };
937
+ const systemMatch = jsonMatch[0].match(/"system_name"\s*:\s*"([^"]+)"/);
938
+ const extractedSystemName = systemMatch ? systemMatch[1] : undefined;
939
+ result = { system_name: extractedSystemName, knowledge_pairs: pairs, decisions: [] };
683
940
  }
684
- catch {
685
- throw parseError; // Re-throw original error
941
+ catch (fallbackError) {
942
+ console.error('[LLM-EXTRACTOR] JSON parse failed');
943
+ throw parseError;
686
944
  }
687
945
  }
688
946
  else {
947
+ console.error('[LLM-EXTRACTOR] JSON parse failed');
689
948
  throw parseError;
690
949
  }
691
950
  }
692
951
  }
693
- // Flatten knowledge_pairs into reasoning_trace (interleaved: conclusion, insight, conclusion, insight...)
952
+ // Extract system_name (parent anchor for all chunks)
953
+ const systemName = result.system_name || null;
954
+ // Keep knowledge_pairs as objects (with aspect for semantic search)
694
955
  let reasoningTrace = [];
695
956
  if (result.knowledge_pairs && result.knowledge_pairs.length > 0) {
696
- // New format: flatten pairs into interleaved array
697
- for (const pair of result.knowledge_pairs) {
698
- if (pair.conclusion) {
699
- reasoningTrace.push(pair.conclusion);
700
- }
701
- if (pair.insight) {
702
- reasoningTrace.push(pair.insight);
703
- }
704
- }
705
- debugLLM('extractReasoningAndDecisions', `Extracted ${result.knowledge_pairs.length} pairs (${reasoningTrace.length} entries), ${result.decisions?.length || 0} decisions`);
957
+ // New format: keep as objects with aspect (fall back to tags for backwards compat)
958
+ reasoningTrace = result.knowledge_pairs.map(pair => ({
959
+ aspect: pair.aspect || pair.tags || '', // Prefer aspect, fallback to tags
960
+ tags: pair.tags, // Keep for backwards compat
961
+ conclusion: pair.conclusion || '',
962
+ insight: pair.insight || null,
963
+ }));
964
+ debugLLM('extractReasoningAndDecisions', `Extracted system_name="${systemName}", ${result.knowledge_pairs.length} pairs, ${result.decisions?.length || 0} decisions`);
706
965
  }
707
966
  else if (result.reasoning_trace) {
708
- // Backwards compatibility: old format with flat array
709
- reasoningTrace = result.reasoning_trace;
967
+ // Backwards compatibility: old format with flat string array - wrap in objects
968
+ reasoningTrace = result.reasoning_trace.map(entry => ({
969
+ aspect: '', // No aspect in old format
970
+ conclusion: entry,
971
+ insight: null,
972
+ }));
710
973
  debugLLM('extractReasoningAndDecisions', `Extracted ${reasoningTrace.length} traces (old format), ${result.decisions?.length || 0} decisions`);
711
974
  }
712
975
  return {
976
+ system_name: systemName,
977
+ summary: result.summary || null,
713
978
  reasoning_trace: reasoningTrace,
714
979
  decisions: result.decisions || [],
715
980
  };
716
981
  }
717
982
  catch (error) {
718
983
  debugLLM('extractReasoningAndDecisions', `Error: ${String(error)}`);
719
- return { reasoning_trace: [], decisions: [] };
984
+ return { system_name: null, summary: null, reasoning_trace: [], decisions: [] };
720
985
  }
721
986
  }
987
+ /**
988
+ * Check if shouldUpdateMemory is available
989
+ */
990
+ export function isShouldUpdateAvailable() {
991
+ return !!process.env.ANTHROPIC_API_KEY || !!process.env.GROV_API_KEY;
992
+ }
993
+ /**
994
+ * Decide if a memory should be updated based on new session data
995
+ * Called when a match is found before sync
996
+ *
997
+ * Handles 5 tasks in one Haiku call:
998
+ * 1. should_update decision (boolean + reason)
999
+ * 2. superseded_mapping (which old decisions are replaced by which new ones)
1000
+ * 3. condensed_old_reasoning (max 200 chars for reasoning_evolution)
1001
+ * 4. evolution_summary (200-250 chars for evolution_steps)
1002
+ * 5. consolidated_evolution_steps (only if > 10 entries, CONDITIONAL)
1003
+ *
1004
+ * @param existingMemory - The memory that was matched
1005
+ * @param newData - Extracted reasoning and decisions from current session
1006
+ * @param sessionContext - Task type, original query, files touched
1007
+ * @returns Decision result with all transformation data
1008
+ */
1009
+ /**
1010
+ * Build the prompt for shouldUpdateMemory
1011
+ * Structured with XML tags for clear task separation
1012
+ */
1013
+ function buildShouldUpdatePrompt(existingMemory, newData, sessionContext, needsConsolidation, evolutionCount) {
1014
+ // Format existing decisions with indices
1015
+ const formattedDecisions = existingMemory.decisions
1016
+ .map((d, i) => `[${i}] ${d.choice} (${d.active !== false ? 'active' : 'inactive'}): ${d.reason}`)
1017
+ .join('\n') || 'None';
1018
+ // Format existing reasoning trace (limit to 10)
1019
+ const formattedReasoning = existingMemory.reasoning_trace
1020
+ .slice(0, 10)
1021
+ .join('\n') || 'None';
1022
+ // Format evolution steps
1023
+ const formattedEvolution = existingMemory.evolution_steps
1024
+ .map(e => `- ${e.date}: ${e.summary}`)
1025
+ .join('\n') || 'No evolution history yet';
1026
+ // Format new decisions
1027
+ const formattedNewDecisions = newData.decisions
1028
+ .map(d => `- ${d.choice}: ${d.reason}`)
1029
+ .join('\n') || 'None extracted';
1030
+ // Format new reasoning (limit to 5)
1031
+ const formattedNewReasoning = newData.reasoning_trace
1032
+ .slice(0, 5)
1033
+ .join('\n') || 'None extracted';
1034
+ // Build output format based on whether consolidation is needed
1035
+ const outputFormat = needsConsolidation
1036
+ ? `{
1037
+ "should_update": boolean,
1038
+ "reason": "1-2 sentence explanation of your decision",
1039
+ "superseded_mapping": [{"old_index": number, "replaced_by_choice": "string", "replaced_by_reason": "string"}] or [],
1040
+ "condensed_old_reasoning": "string max 200 chars" or null,
1041
+ "evolution_summary": "string 200-250 chars" or null,
1042
+ "consolidated_evolution_steps": [{"summary": "...", "date": "YYYY-MM-DD"}, ...]
1043
+ }`
1044
+ : `{
1045
+ "should_update": boolean,
1046
+ "reason": "1-2 sentence explanation of your decision",
1047
+ "superseded_mapping": [{"old_index": number, "replaced_by_choice": "string", "replaced_by_reason": "string"}] or [],
1048
+ "condensed_old_reasoning": "string max 200 chars" or null,
1049
+ "evolution_summary": "string 200-250 chars" or null
1050
+ }`;
1051
+ // Build consolidation section (Task 5) - only if needed
1052
+ // Defined here to maintain task order in code: 1, 2, 3, 4, then 5
1053
+ const consolidationSection = needsConsolidation ? `
1054
+ <task_5_consolidation>
1055
+ ═══════════════════════════════════════════════════════════════
1056
+ TASK 5: CONSOLIDATION REQUIRED
1057
+ ═══════════════════════════════════════════════════════════════
1058
+
1059
+ Current evolution_steps has ${evolutionCount} entries (maximum is 10).
1060
+
1061
+ You MUST consolidate the OLDEST 3-5 entries into 1-2 summary entries.
1062
+ Keep the NEWEST entries unchanged.
1063
+
1064
+ Current evolution_steps:
1065
+ ${existingMemory.evolution_steps.map((e, i) => `[${i}] ${e.date}: ${e.summary}`).join('\n')}
1066
+
1067
+ Rules:
1068
+ 1. Merge entries [0] to [3] or [4] into 1-2 summary entries
1069
+ 2. Keep entries [5] onwards unchanged
1070
+ 3. Each summary should capture the key transitions, not every detail
1071
+ 4. Preserve dates - use the earliest date for consolidated entries
1072
+
1073
+ Return the FULL consolidated array in consolidated_evolution_steps.
1074
+ </task_5_consolidation>
1075
+ ` : '';
1076
+ return `<role>
1077
+ You are a Memory Update Analyst for Grov, a coding assistant that maintains team knowledge.
1078
+
1079
+ Your job is to analyze whether an existing memory should be UPDATED based on new session data, or if the new session is just a query about existing knowledge (SKIP).
1080
+
1081
+ You have ${needsConsolidation ? '5' : '4'} tasks to complete. Read all instructions carefully before responding.
1082
+ </role>
1083
+
1084
+ <context>
1085
+ WHAT IS A MEMORY?
1086
+ A memory stores knowledge from past coding sessions: decisions made, reasoning discovered, and how the project evolved over time.
1087
+
1088
+ WHY UPDATE MATTERS:
1089
+ - UPDATE when: user made real changes, switched approaches, or discovered new information
1090
+ - SKIP when: user just asked questions about existing knowledge without changing anything
1091
+
1092
+ THE PROBLEM WE SOLVE:
1093
+ User asks "Why did we choose JWT?" then says "Ok I understand" = SKIP (just a question)
1094
+ User says "Let's switch from JWT to sessions" then confirms "Ok let's do it" = UPDATE (real change)
1095
+ </context>
1096
+
1097
+ <existing_memory>
1098
+ <goal>${existingMemory.goal || 'Not specified'}</goal>
1099
+
1100
+ <decisions>
1101
+ ${formattedDecisions}
1102
+ </decisions>
1103
+
1104
+ <reasoning_trace>
1105
+ ${formattedReasoning}
1106
+ </reasoning_trace>
1107
+
1108
+ <evolution_steps>
1109
+ ${formattedEvolution}
1110
+ </evolution_steps>
1111
+
1112
+ <files_in_memory>
1113
+ ${existingMemory.files_touched.slice(0, 10).join(', ') || 'None'}
1114
+ </files_in_memory>
1115
+ </existing_memory>
1116
+
1117
+ <new_session_data>
1118
+ <task_type>${sessionContext.task_type}</task_type>
1119
+ <original_query>${sessionContext.original_query}</original_query>
1120
+
1121
+ <files_touched_in_session>
1122
+ ${sessionContext.files_touched.join(', ') || 'None'}
1123
+ </files_touched_in_session>
1124
+
1125
+ <extracted_decisions>
1126
+ ${formattedNewDecisions}
1127
+ </extracted_decisions>
1128
+
1129
+ <extracted_reasoning>
1130
+ ${formattedNewReasoning}
1131
+ </extracted_reasoning>
1132
+ </new_session_data>
1133
+
1134
+ <task_1_should_update>
1135
+ ═══════════════════════════════════════════════════════════════
1136
+ TASK 1: Decide should_update (boolean) and provide reason
1137
+ ═══════════════════════════════════════════════════════════════
1138
+
1139
+ <strong_signals>
1140
+ RETURN should_update: true IF ANY of these STRONG signals are present:
1141
+
1142
+ STRONG SIGNAL A - Files were modified:
1143
+ If files_touched_in_session is NOT empty, code was changed. Real changes must be recorded.
1144
+
1145
+ STRONG SIGNAL B - Decisions are OPPOSITE or ALTERNATIVE:
1146
+ Compare extracted_decisions with existing decisions.
1147
+ OPPOSITE/ALTERNATIVE means:
1148
+ - They solve the SAME problem (e.g., both about authentication)
1149
+ - But use DIFFERENT approach (e.g., JWT vs sessions)
1150
+ - Choosing one means NOT using the other
1151
+
1152
+ Example OPPOSITE: "Use JWT" vs "Use sessions" = OPPOSITE (both auth, different approach)
1153
+ Example NOT OPPOSITE: "Use JWT" vs "JWT with refresh tokens" = REFINEMENT (same approach, more detail)
1154
+ </strong_signals>
1155
+
1156
+ <weak_signals>
1157
+ WEAK SIGNALS (require combination):
1158
+ - task_type is "planning" AND decisions cover a NEW topic not in existing memory
1159
+ - User confirmed a proposed change (patterns: "ok let's do", "yes", "da", "hai", "mergem cu")
1160
+ BUT confirmation must be IN CONTEXT of a change proposal, not just acknowledgment.
1161
+ </weak_signals>
1162
+
1163
+ <false_criteria>
1164
+ RETURN should_update: false IF:
1165
+
1166
+ KEY DISTINCTION - Who introduced the topic?
1167
+
1168
+ If the assistant ALREADY explained or decided something, and the user is
1169
+ asking ABOUT that explanation, this is clarifying Q&A, not new work.
1170
+
1171
+ CLEAR FALSE SIGNAL:
1172
+ task_type is "information" AND files_touched_in_session is empty:
1173
+ - The user was asking questions or seeking clarification
1174
+ - No code was modified (only edit/write counts, not read)
1175
+ - User is asking about what was ALREADY explained:
1176
+ - Confirmation: "Are you sure about X?"
1177
+ - Clarification: "Did you mean Y?"
1178
+ - Understanding: "Does this also work for Z?"
1179
+ - These are NOT new decisions - just questions about existing explanations
1180
+ - should_update: false
1181
+
1182
+ ALSO FALSE:
1183
+ - extracted_decisions are REFORMULATIONS of existing (same meaning, different words)
1184
+ - No NEW options introduced by user - just explaining existing decisions
1185
+ </false_criteria>
1186
+ </task_1_should_update>
1187
+
1188
+ <task_2_superseded_decisions>
1189
+ ═══════════════════════════════════════════════════════════════
1190
+ TASK 2: Identify superseded decisions with replacement mapping
1191
+ ═══════════════════════════════════════════════════════════════
1192
+
1193
+ ONLY IF should_update = true:
1194
+
1195
+ For each existing decision, check if ANY new decision SUPERSEDES it.
1196
+ Return a MAPPING that includes the replacement details.
1197
+
1198
+ <definition>
1199
+ WHAT DOES "SUPERSEDED" MEAN?
1200
+
1201
+ A decision is SUPERSEDED only when ALL these conditions are true:
1202
+
1203
+ 1. SAME DOMAIN: Both decisions address the SAME technical area:
1204
+ - Authentication: JWT, sessions, OAuth, API keys
1205
+ - Database: PostgreSQL, MySQL, MongoDB, SQLite
1206
+ - Caching: Redis, Memcached, in-memory
1207
+ - Storage: local files, S3, cloud storage
1208
+ - Framework: React, Vue, Angular
1209
+
1210
+ 2. MUTUALLY EXCLUSIVE: Choosing one means NOT using the other.
1211
+ You cannot use both solutions simultaneously for the same purpose.
1212
+
1213
+ 3. EXPLICIT REPLACEMENT: The new decision clearly replaces the old approach,
1214
+ not just adds to it or refines it.
1215
+ </definition>
1216
+
1217
+ <protections>
1218
+ WHAT IS NOT SUPERSEDED (IMPORTANT)
1219
+
1220
+ DO NOT mark as superseded if:
1221
+
1222
+ 1. DIFFERENT DOMAINS:
1223
+ - "Use PostgreSQL" and "Use Redis for caching" = DIFFERENT domains
1224
+ - Database storage ≠ caching layer. Both can coexist.
1225
+
1226
+ 2. REFINEMENT, not replacement:
1227
+ - "Use JWT" → "Use JWT with refresh tokens" = REFINEMENT
1228
+ - Same approach, more detail. NOT superseded.
1229
+
1230
+ 3. ADDITION, not replacement:
1231
+ - "Add rate limiting" does NOT supersede "Use JWT"
1232
+ - Different concerns, both remain active.
1233
+
1234
+ 4. UNCERTAIN CONNECTION:
1235
+ - If you're not 100% sure they're the same domain → DO NOT SUPERSEDE
1236
+ - Missing a supersede = minor issue
1237
+ - Wrong supersede = corrupts history (worse!)
1238
+
1239
+ DEFAULT: If uncertain, return empty mapping. Be conservative.
1240
+ </protections>
1241
+
1242
+ <output_format_task2>
1243
+ Return superseded_mapping as array of objects:
1244
+
1245
+ superseded_mapping: [
1246
+ {
1247
+ "old_index": 0,
1248
+ "replaced_by_choice": "Use sessions",
1249
+ "replaced_by_reason": "Better for long-running operations"
1250
+ }
1251
+ ]
1252
+
1253
+ If no decisions are superseded: return empty array []
1254
+ If should_update = false: return empty array []
1255
+ </output_format_task2>
1256
+
1257
+ <examples_task2>
1258
+ EXAMPLE A - SUPERSEDED (same domain, opposite approach):
1259
+ existing: [0] "Use JWT for authentication"
1260
+ new: "Use session-based auth with Redis"
1261
+ → SUPERSEDED: same domain (auth), mutually exclusive
1262
+ → superseded_mapping: [{"old_index": 0, "replaced_by_choice": "Use session-based auth with Redis", "replaced_by_reason": "Better for long-running operations"}]
1263
+
1264
+ EXAMPLE B - NOT SUPERSEDED (different domains):
1265
+ existing: [0] "Use PostgreSQL for main database"
1266
+ new: "Add Redis for caching"
1267
+ → NOT SUPERSEDED: different domains (database vs caching)
1268
+ → superseded_mapping: []
1269
+
1270
+ EXAMPLE C - NOT SUPERSEDED (refinement):
1271
+ existing: [0] "Use JWT tokens"
1272
+ new: "Use JWT with 1hr access and 7day refresh tokens"
1273
+ → NOT SUPERSEDED: refinement of same approach
1274
+ → superseded_mapping: []
1275
+
1276
+ EXAMPLE D - NOT SUPERSEDED (addition):
1277
+ existing: [0] "Use PostgreSQL", [1] "Use Express.js"
1278
+ new: "Add input validation with Zod"
1279
+ → NOT SUPERSEDED: new concern, doesn't replace existing
1280
+ → superseded_mapping: []
1281
+
1282
+ EXAMPLE E - MULTIPLE SUPERSEDED:
1283
+ existing: [0] "Use JWT", [1] "Store tokens in localStorage"
1284
+ new: "Use session cookies", "Store session ID in httpOnly cookie"
1285
+ → superseded_mapping: [
1286
+ {"old_index": 0, "replaced_by_choice": "Use session cookies", "replaced_by_reason": "Server-side session management"},
1287
+ {"old_index": 1, "replaced_by_choice": "Store session ID in httpOnly cookie", "replaced_by_reason": "More secure than localStorage"}
1288
+ ]
1289
+
1290
+ EXAMPLE F - UNCERTAIN (be conservative):
1291
+ existing: [0] "Use MongoDB"
1292
+ new: "Consider PostgreSQL for better relational queries"
1293
+ → UNCERTAIN: "consider" suggests exploration, not decision
1294
+ → superseded_mapping: []
1295
+ </examples_task2>
1296
+
1297
+ </task_2_superseded_decisions>
1298
+
1299
+ <task_3_condense_reasoning>
1300
+ ═══════════════════════════════════════════════════════════════
1301
+ TASK 3: Condense old reasoning (max 200 characters)
1302
+ ═══════════════════════════════════════════════════════════════
1303
+
1304
+ ONLY IF should_update = true:
1305
+
1306
+ <purpose_task3>
1307
+ The existing reasoning_trace will be OVERWRITTEN with new reasoning.
1308
+ Before it's lost forever, you must preserve the most valuable insights
1309
+ in a condensed form (max 200 chars) for historical context.
1310
+
1311
+ This condensed version will be stored in reasoning_evolution array
1312
+ so users can understand past thinking even after updates.
1313
+ </purpose_task3>
1314
+
1315
+ <what_to_include>
1316
+ Prioritize in this order:
1317
+
1318
+ 1. KEY TECHNICAL DECISIONS and their rationale
1319
+ - "JWT chosen for stateless auth"
1320
+ - "PostgreSQL for ACID compliance"
1321
+
1322
+ 2. CONSTRAINTS or LIMITATIONS discovered
1323
+ - "API rate limit 100req/min"
1324
+ - "Browser storage max 5MB"
1325
+
1326
+ 3. TRADE-OFFS that were considered
1327
+ - "Chose simplicity over performance"
1328
+
1329
+ 4. NON-OBVIOUS INSIGHTS that would be hard to rediscover
1330
+ - "Edge case: empty arrays cause crash"
1331
+ - "Must call init() before connect()"
1332
+ </what_to_include>
1333
+
1334
+ <what_to_exclude>
1335
+ - Generic statements ("Implemented feature")
1336
+ - Process descriptions ("User asked about X")
1337
+ - Obvious facts that anyone could infer from code
1338
+ - Temporary debugging notes
1339
+ </what_to_exclude>
1340
+
1341
+ <format_guidelines_task3>
1342
+ - Use concise phrases, not full sentences
1343
+ - Separate distinct insights with periods
1344
+ - Abbreviate common terms (auth, config, impl)
1345
+ - Focus on WHAT and WHY, not HOW
1346
+ </format_guidelines_task3>
1347
+
1348
+ <examples_task3>
1349
+ GOOD: "JWT with 1hr/7d expiry for offline CLI. Device flow for OAuth. No secrets in localStorage."
1350
+ GOOD: "PostgreSQL chosen over MongoDB for relational queries. Indexes on user_id, created_at."
1351
+ BAD: "We implemented authentication" (too vague, no insight)
1352
+ BAD: "The user wanted to know about JWT" (process, not knowledge)
1353
+ </examples_task3>
1354
+
1355
+ IF should_update = false: return null
1356
+ </task_3_condense_reasoning>
1357
+
1358
+ <task_4_evolution_summary>
1359
+ ═══════════════════════════════════════════════════════════════
1360
+ TASK 4: Generate evolution summary (200-250 characters)
1361
+ ═══════════════════════════════════════════════════════════════
1362
+
1363
+ ONLY IF should_update = true:
1364
+
1365
+ <purpose_task4>
1366
+ This summary describes WHAT CHANGED in this update.
1367
+ It will be added to evolution_steps to create a timeline of how
1368
+ the memory evolved over time.
1369
+
1370
+ Future readers will scan these summaries to understand the journey
1371
+ from initial implementation to current state.
1372
+ </purpose_task4>
1373
+
1374
+ <good_summary_criteria>
1375
+ 1. DESCRIBES THE CHANGE, not the session
1376
+ - YES: "Switched from JWT to sessions"
1377
+ - NO: "User discussed authentication options"
1378
+
1379
+ 2. INCLUDES THE REASON when relevant
1380
+ - YES: "Added Redis caching for API performance"
1381
+ - NO: "Added Redis" (why?)
1382
+
1383
+ 3. MENTIONS KEY COMPONENTS affected
1384
+ - YES: "Updated auth middleware and token validation"
1385
+ - NO: "Made some changes to auth"
1386
+
1387
+ 4. CAPTURES THE SCOPE (what areas were touched)
1388
+ - YES: "Refactored database layer: connection pool, query caching, error handling"
1389
+ - NO: "Database changes"
1390
+ </good_summary_criteria>
1391
+
1392
+ <structure_template>
1393
+ [ACTION] [WHAT] [FOR/BECAUSE] [REASON]. [ADDITIONAL DETAILS IF SPACE].
1394
+
1395
+ Examples:
1396
+ - "Switched from [X] to [Y] for [reason]. Updated [components]."
1397
+ - "Added [feature] to [achieve goal]. Implemented [details]."
1398
+ - "Fixed [problem] in [component]. Root cause was [X]."
1399
+ </structure_template>
1400
+
1401
+ <examples_task4>
1402
+ GOOD (switching): "Switched from JWT to session-based auth for long-running operations. Added Redis for session storage and updated middleware."
1403
+ GOOD (adding): "Added caching layer with Redis for API optimization. Implemented 5min TTL for reads and cache invalidation on writes."
1404
+ GOOD (fixing): "Fixed memory leak in WebSocket connections. Root cause was missing cleanup on disconnect. Added connection pool."
1405
+ GOOD (refactoring): "Refactored user service to repository pattern. Separated data access from business logic. Added unit tests."
1406
+
1407
+ BAD: "Updated stuff" (too vague)
1408
+ BAD: "User asked about JWT" (describes session, not change)
1409
+ BAD: "Changes to authentication" (no specifics)
1410
+ </examples_task4>
1411
+
1412
+ <length_guide>
1413
+ Target: 200-250 characters
1414
+ - Under 150: probably missing important details
1415
+ - Over 300: probably too verbose, condense
1416
+ </length_guide>
1417
+
1418
+ IF should_update = false: return null
1419
+ </task_4_evolution_summary>
1420
+ ${consolidationSection}
1421
+ <output_format>
1422
+ Return ONLY valid JSON. No markdown, no explanation, no extra text.
1423
+
1424
+ IMPORTANT RULES:
1425
+ - English only (translate Romanian/other languages to English in all fields)
1426
+ - No emojis
1427
+ - All string values in English
1428
+
1429
+ ${outputFormat}
1430
+ </output_format>
1431
+
1432
+ <examples>
1433
+ EXAMPLE 1 - Should UPDATE (files modified):
1434
+ Input: task_type=implementation, files_touched=["src/auth.ts"], query="Fix auth bug"
1435
+ Output: {"should_update": true, "reason": "Files were modified in implementation session", "superseded_mapping": [], "condensed_old_reasoning": "Initial JWT implementation", "evolution_summary": "Fixed authentication bug in token validation"}
1436
+
1437
+ EXAMPLE 2 - Should UPDATE (opposite decision):
1438
+ Input: task_type=planning, query="Let's switch to sessions instead of JWT", existing=[{choice:"Use JWT"}], new=[{choice:"Use sessions"}]
1439
+ Output: {"should_update": true, "reason": "User switched from JWT to sessions - opposite approaches", "superseded_mapping": [{"old_index": 0, "replaced_by_choice": "Use sessions", "replaced_by_reason": "Better session management for long operations"}], "condensed_old_reasoning": "JWT for stateless CLI auth with refresh tokens", "evolution_summary": "Switched from JWT to session-based authentication"}
1440
+
1441
+ EXAMPLE 3 - Should SKIP (pure question):
1442
+ Input: task_type=information, query="Why did we choose JWT?", files_touched=[]
1443
+ Output: {"should_update": false, "reason": "Pure information query about existing decision - no changes", "superseded_mapping": [], "condensed_old_reasoning": null, "evolution_summary": null}
1444
+
1445
+ EXAMPLE 4 - Should SKIP (acknowledgment):
1446
+ Input: task_type=information, query="Ok I understand now", files_touched=[]
1447
+ Output: {"should_update": false, "reason": "User acknowledged explanation but did not confirm any change", "superseded_mapping": [], "condensed_old_reasoning": null, "evolution_summary": null}
1448
+ </examples>`;
1449
+ }
1450
+ export async function shouldUpdateMemory(existingMemory, newData, sessionContext) {
1451
+ const client = getAnthropicClient();
1452
+ // Check if evolution_steps consolidation is needed
1453
+ const evolutionCount = existingMemory.evolution_steps?.length || 0;
1454
+ const needsConsolidation = evolutionCount > 10;
1455
+ // Build the prompt with all context
1456
+ const prompt = buildShouldUpdatePrompt(existingMemory, newData, sessionContext, needsConsolidation, evolutionCount);
1457
+ debugLLM('shouldUpdateMemory', `Analyzing memory update (needsConsolidation=${needsConsolidation})`);
1458
+ try {
1459
+ const response = await client.messages.create({
1460
+ model: 'claude-haiku-4-5-20251001',
1461
+ max_tokens: needsConsolidation ? 1500 : 800,
1462
+ messages: [{ role: 'user', content: prompt }],
1463
+ });
1464
+ const text = response.content[0].type === 'text' ? response.content[0].text : '';
1465
+ // Try to parse JSON from response
1466
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
1467
+ if (!jsonMatch) {
1468
+ console.error('[HAIKU] No JSON in response');
1469
+ return createFallbackResult(sessionContext);
1470
+ }
1471
+ // Parse and validate response
1472
+ let result;
1473
+ try {
1474
+ result = JSON.parse(jsonMatch[0]);
1475
+ }
1476
+ catch (parseErr) {
1477
+ console.error('[HAIKU] JSON parse failed');
1478
+ return createFallbackResult(sessionContext);
1479
+ }
1480
+ // Ensure required fields have defaults
1481
+ result.should_update = result.should_update ?? false;
1482
+ result.reason = result.reason ?? 'No reason provided';
1483
+ result.superseded_mapping = result.superseded_mapping ?? [];
1484
+ result.condensed_old_reasoning = result.condensed_old_reasoning ?? null;
1485
+ result.evolution_summary = result.evolution_summary ?? null;
1486
+ // Decision stored in result - logged by cloud-sync.ts
1487
+ debugLLM('shouldUpdateMemory', `Result: should_update=${result.should_update}, reason="${result.reason.substring(0, 50)}"`);
1488
+ return result;
1489
+ }
1490
+ catch (error) {
1491
+ console.error('[HAIKU] Error:', String(error));
1492
+ return createFallbackResult(sessionContext);
1493
+ }
1494
+ }
1495
+ /**
1496
+ * Create fallback result when Haiku call fails
1497
+ * Default: do NOT update to avoid data loss
1498
+ */
1499
+ function createFallbackResult(sessionContext) {
1500
+ // If files were touched, likely a real change - lean toward update
1501
+ const hasFiles = sessionContext.files_touched.length > 0;
1502
+ return {
1503
+ should_update: hasFiles,
1504
+ reason: hasFiles
1505
+ ? 'Fallback: files modified, assuming update needed'
1506
+ : 'Fallback: no files modified, skipping update',
1507
+ superseded_mapping: [],
1508
+ condensed_old_reasoning: null,
1509
+ evolution_summary: hasFiles ? 'Session with file modifications' : null,
1510
+ };
1511
+ }