grov 0.5.8 → 0.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +4 -1
- package/dist/lib/api-client.d.ts +42 -1
- package/dist/lib/api-client.js +33 -6
- package/dist/lib/cloud-sync.d.ts +57 -3
- package/dist/lib/cloud-sync.js +176 -6
- package/dist/lib/llm-extractor.d.ts +63 -1
- package/dist/lib/llm-extractor.js +882 -92
- package/dist/lib/store/database.js +14 -0
- package/dist/lib/store/index.d.ts +1 -1
- package/dist/lib/store/index.js +1 -1
- package/dist/lib/store/sessions.js +9 -3
- package/dist/lib/store/tasks.d.ts +12 -0
- package/dist/lib/store/tasks.js +43 -3
- package/dist/lib/store/types.d.ts +13 -2
- package/dist/lib/store/types.js +0 -1
- package/dist/proxy/action-parser.d.ts +0 -4
- package/dist/proxy/action-parser.js +0 -29
- package/dist/proxy/cache.d.ts +0 -4
- package/dist/proxy/cache.js +4 -8
- package/dist/proxy/extended-cache.js +6 -16
- package/dist/proxy/handlers/preprocess.js +29 -12
- package/dist/proxy/injection/delta-tracking.js +1 -0
- package/dist/proxy/request-processor.js +44 -54
- package/dist/proxy/response-processor.d.ts +6 -2
- package/dist/proxy/response-processor.js +27 -9
- package/dist/proxy/server.js +72 -38
- package/dist/proxy/utils/logging.d.ts +2 -1
- package/dist/proxy/utils/logging.js +11 -5
- package/package.json +1 -1
|
@@ -44,7 +44,7 @@ ${firstPrompt.substring(0, 2000)}
|
|
|
44
44
|
|
|
45
45
|
Extract as JSON:
|
|
46
46
|
{
|
|
47
|
-
"goal": "
|
|
47
|
+
"goal": "A single, high-density sentence describing the technical intent. RULES: 1. No bullet points, no newlines. 2. Must include the main Technology Name (e.g. 'Prometheus', 'React', 'AWS') if inferred. 3. If the user provided a list, synthesize it into one summary statement. Example: 'Implement Prometheus metrics collection with counter and gauge primitives' instead of 'Add metrics: - counters - gauges'.",
|
|
48
48
|
"expected_scope": ["list", "of", "files/folders", "likely", "to", "be", "modified"],
|
|
49
49
|
"constraints": ["EXPLICIT restrictions from the user - see examples below"],
|
|
50
50
|
"success_criteria": ["How to know when the task is complete"],
|
|
@@ -103,7 +103,7 @@ RESPONSE RULES:
|
|
|
103
103
|
}
|
|
104
104
|
const parsed = JSON.parse(jsonMatch[0]);
|
|
105
105
|
return {
|
|
106
|
-
goal: typeof parsed.goal === 'string' ? parsed.goal :
|
|
106
|
+
goal: typeof parsed.goal === 'string' ? parsed.goal : '', // Don't fallback to prompt
|
|
107
107
|
expected_scope: Array.isArray(parsed.expected_scope)
|
|
108
108
|
? parsed.expected_scope.filter((s) => typeof s === 'string')
|
|
109
109
|
: [],
|
|
@@ -139,7 +139,7 @@ function createFallbackIntent(prompt) {
|
|
|
139
139
|
// Extract file patterns
|
|
140
140
|
const filePatterns = prompt.match(/[\w\/.-]+\.(ts|js|tsx|jsx|py|go|rs|java|css|html|md)/g) || [];
|
|
141
141
|
return {
|
|
142
|
-
goal:
|
|
142
|
+
goal: '', // Empty - don't copy user prompt as goal; goal should be synthesized only
|
|
143
143
|
expected_scope: [...new Set(filePatterns)].slice(0, 5),
|
|
144
144
|
constraints: [],
|
|
145
145
|
success_criteria: [],
|
|
@@ -311,6 +311,7 @@ Return a JSON object with these fields:
|
|
|
311
311
|
- task_type: one of "information", "planning", or "implementation"
|
|
312
312
|
- action: one of "continue", "task_complete", "new_task", or "subtask_complete"
|
|
313
313
|
- task_id: existing session_id "${currentSession?.session_id || 'NEW'}" or "NEW" for new task
|
|
314
|
+
- current_goal: "SYNTHESIZE a concise goal (max 150 chars). RULES: 1. If original_goal is empty, SYNTHESIZE from user messages. 2. DO NOT copy the user's request verbatim - summarize it. 3. Start with Technology/Component name. 4. One sentence, no newlines. Example: 'TypeScript Logger with level filtering and JSON output' NOT 'Create a structured logger in /home/... with debug, info...'"
|
|
314
315
|
- reasoning: brief explanation of why you made this decision${compressionInstruction}
|
|
315
316
|
</output>
|
|
316
317
|
|
|
@@ -320,28 +321,49 @@ First, analyze the original_goal to understand what kind of task this is. Do not
|
|
|
320
321
|
TYPE A - Information Request
|
|
321
322
|
The user wants to learn or understand something. They are seeking knowledge, not asking for any changes or decisions to be made. The answer itself is what they need.
|
|
322
323
|
|
|
323
|
-
|
|
324
|
+
This INCLUDES clarifying questions about what the assistant already explained:
|
|
325
|
+
- Asking for confirmation: "Are you sure about X?"
|
|
326
|
+
- Asking for clarification: "Did you mean Y?"
|
|
327
|
+
- Checking understanding: "Does this also apply to Z?"
|
|
324
328
|
|
|
325
|
-
|
|
329
|
+
These questions REFERENCE the previous response and seek clarification, not new decisions.
|
|
330
|
+
|
|
331
|
+
Think about whether the user is curious about how something works, wants an explanation of a concept, or is asking about something the assistant already said.
|
|
332
|
+
|
|
333
|
+
Examples of information requests:
|
|
326
334
|
- "How does the authentication system work?"
|
|
327
335
|
- "Explica-mi cum functioneaza cache-ul"
|
|
328
336
|
- "What is the difference between Redis and Memcached?"
|
|
329
337
|
- "Can you walk me through the payment flow?"
|
|
330
338
|
- "I don't understand why this function returns null"
|
|
331
339
|
- "Ce face acest cod?"
|
|
340
|
+
- "Are you sure this method works for async calls?" (asking about previous explanation)
|
|
341
|
+
- "When you said RAM storage, did you mean on the user's machine?" (clarifying what was said)
|
|
342
|
+
- "Does this approach also handle edge cases?" (checking understanding)
|
|
332
343
|
|
|
333
344
|
TYPE B - Planning or Decision Request
|
|
334
|
-
The user
|
|
345
|
+
The user is asking the assistant to HELP THEM CHOOSE between options. The decision does NOT exist yet - they are deciding now. The user introduces alternatives and wants a recommendation or to weigh tradeoffs together.
|
|
346
|
+
|
|
347
|
+
Think about whether the user is introducing new options to choose between, wants recommendations for how to build something, or is working toward a plan they will implement later.
|
|
348
|
+
|
|
349
|
+
KEY DISTINCTION from Information:
|
|
350
|
+
- Planning: User introduces options to choose between → "Should we use X or Y?"
|
|
351
|
+
- Information: User asks about what assistant already said → "You mentioned X, are you sure?"
|
|
352
|
+
|
|
353
|
+
If the assistant ALREADY explained or decided something, and the user is asking about THAT explanation, it is Information, not Planning.
|
|
335
354
|
|
|
336
|
-
|
|
355
|
+
Examples of planning requests:
|
|
356
|
+
- "How should we implement user authentication?" (no decision made yet)
|
|
357
|
+
- "What's the best way to handle caching for this API?" (asking for recommendation)
|
|
358
|
+
- "Cum ar trebui sa structuram baza de date?" (exploring options)
|
|
359
|
+
- "I'm thinking about using Redis vs Memcached, what do you recommend?" (user introduces options)
|
|
360
|
+
- "Let's figure out the architecture before we start coding" (planning session)
|
|
361
|
+
- "We need to decide on the approach for handling errors" (decision needed)
|
|
337
362
|
|
|
338
|
-
|
|
339
|
-
- "
|
|
340
|
-
- "
|
|
341
|
-
- "
|
|
342
|
-
- "I'm thinking about using Redis vs Memcached, what do you recommend?"
|
|
343
|
-
- "Let's figure out the architecture before we start coding"
|
|
344
|
-
- "We need to decide on the approach for handling errors"
|
|
363
|
+
NOT planning (these are Information):
|
|
364
|
+
- "Are you sure Redis is the right choice?" (asking about previous recommendation)
|
|
365
|
+
- "Did you mean async or sync?" (clarifying what was said)
|
|
366
|
+
- "Will this also work for the edge cases we discussed?" (checking understanding)
|
|
345
367
|
|
|
346
368
|
TYPE C - Implementation Request
|
|
347
369
|
The user wants actual changes made. They want code written, files edited, commands run, or something built. The task involves using tools to modify the codebase.
|
|
@@ -438,6 +460,15 @@ Reason: The question is about how to implement the original request.
|
|
|
438
460
|
Example situation: Original goal was "explain how auth works", user asks "and how does the session storage work?"
|
|
439
461
|
Decision: new_task
|
|
440
462
|
Reason: This is a new information request, separate from the first.
|
|
463
|
+
|
|
464
|
+
CRITICAL - NEW TASK COMPLETED IN SAME TURN:
|
|
465
|
+
If the user's message starts a NEW task (different topic from original_goal) AND the assistant's response COMPLETES that new task in the same turn, use task_complete (NOT new_task).
|
|
466
|
+
|
|
467
|
+
Example: Original goal was "implement cache service", user now asks "build an EventEmitter class", assistant writes the complete EventEmitter code.
|
|
468
|
+
Decision: task_complete
|
|
469
|
+
Reason: The new task was requested AND completed. Use task_complete so it gets saved with the new goal.
|
|
470
|
+
|
|
471
|
+
The key insight: task_complete saves the memory. If you return new_task, the work won't be saved until a FUTURE completion. If Claude already finished the work, use task_complete.
|
|
441
472
|
</step_3_detect_new_task>
|
|
442
473
|
|
|
443
474
|
<important_notes>
|
|
@@ -480,16 +511,21 @@ RESPONSE RULES:
|
|
|
480
511
|
if (!needsCompression && assistantResponse.length > 0) {
|
|
481
512
|
analysis.step_reasoning = assistantResponse.substring(0, 1000);
|
|
482
513
|
}
|
|
483
|
-
debugLLM('analyzeTaskContext', `Result: task_type=${analysis.task_type}, action=${analysis.action}, reasoning="${analysis.reasoning?.substring(0, 150) || 'none'}"`);
|
|
514
|
+
debugLLM('analyzeTaskContext', `Result: task_type=${analysis.task_type}, action=${analysis.action}, goal="${analysis.current_goal?.substring(0, 50) || 'N/A'}" reasoning="${analysis.reasoning?.substring(0, 150) || 'none'}"`);
|
|
484
515
|
return analysis;
|
|
485
516
|
}
|
|
486
517
|
catch (parseError) {
|
|
487
518
|
debugLLM('analyzeTaskContext', `Parse error: ${String(parseError)}, using fallback`);
|
|
488
519
|
// Fallback: continue existing session or create new
|
|
520
|
+
// Use existing goal if available, otherwise leave empty (don't copy user prompt)
|
|
521
|
+
const fallbackGoal = currentSession?.original_goal && currentSession.original_goal.length > 0
|
|
522
|
+
? currentSession.original_goal
|
|
523
|
+
: ''; // Don't synthesize from user message - leave empty
|
|
489
524
|
return {
|
|
490
525
|
task_type: 'implementation',
|
|
491
526
|
action: currentSession ? 'continue' : 'new_task',
|
|
492
527
|
task_id: currentSession?.session_id || 'NEW',
|
|
528
|
+
current_goal: fallbackGoal,
|
|
493
529
|
reasoning: 'Fallback due to parse error',
|
|
494
530
|
step_reasoning: assistantResponse.substring(0, 1000),
|
|
495
531
|
};
|
|
@@ -511,7 +547,7 @@ export function isReasoningExtractionAvailable() {
|
|
|
511
547
|
export async function extractReasoningAndDecisions(formattedSteps, originalGoal) {
|
|
512
548
|
const client = getAnthropicClient();
|
|
513
549
|
if (formattedSteps.length < 50) {
|
|
514
|
-
return { reasoning_trace: [], decisions: [] };
|
|
550
|
+
return { system_name: null, summary: null, reasoning_trace: [], decisions: [] };
|
|
515
551
|
}
|
|
516
552
|
const prompt = `<role>
|
|
517
553
|
You are a Knowledge Engineer specialized in extracting reusable team knowledge from coding sessions.
|
|
@@ -533,114 +569,333 @@ ${formattedSteps.substring(0, 8000)}
|
|
|
533
569
|
|
|
534
570
|
<instructions>
|
|
535
571
|
|
|
536
|
-
|
|
572
|
+
*** GLOBAL STANDARDS FOR CODE REFERENCES ***
|
|
537
573
|
|
|
538
|
-
|
|
574
|
+
We want "Code Anchors" (Searchable Names), NOT "Implementation Logic" (Syntax).
|
|
539
575
|
|
|
540
|
-
|
|
541
|
-
|
|
576
|
+
1. NO SYNTAX / NO LOGIC:
|
|
577
|
+
- STRICTLY FORBIDDEN: \`if\`, \`for\`, \`while\`, \`=>\`, \`return\`, \`{ }\`, \`;\`.
|
|
578
|
+
- NEVER write snippet-style logic.
|
|
579
|
+
- BAD: "Uses \`user.id ? save() : null\` to persist." (This is logic)
|
|
580
|
+
- GOOD: "Uses \`save()\` method on \`User\` entity." (This is a named reference)
|
|
581
|
+
|
|
582
|
+
2. USE "NAMED ENTITIES" ONLY:
|
|
583
|
+
- Treat code references as Proper Nouns (Substantive Proprii).
|
|
584
|
+
- Only reference Names of: Functions, Classes, File Paths, Constants, Env Vars, Config Keys.
|
|
585
|
+
- Format: Wrap them in single backticks (e.g., \`auth.ts\`, \`MAX_RETRIES\`).
|
|
586
|
+
|
|
587
|
+
3. BE CONCISE:
|
|
588
|
+
- Do not paste long paths if not necessary. Use relative paths.
|
|
589
|
+
- BAD: \`src/features/users/controllers/auth.controller.ts\` (Too noisy)
|
|
590
|
+
- GOOD: \`auth.controller.ts\` (Sufficient anchor)
|
|
591
|
+
|
|
592
|
+
4. FACTUAL EXTRACTION (CRITICAL FOR Q&A SESSIONS):
|
|
593
|
+
|
|
594
|
+
PURPOSE: All extracted knowledge must be FACTUAL STATEMENTS about the codebase,
|
|
595
|
+
NOT descriptions of the conversation or session.
|
|
596
|
+
|
|
597
|
+
WHY THIS MATTERS:
|
|
598
|
+
- Chunks are stored as embeddings for semantic search
|
|
599
|
+
- "User asked about caching" has DIFFERENT embedding than "Cache uses LRU eviction"
|
|
600
|
+
- If we store meta-descriptions, future searches will NOT match
|
|
601
|
+
- We need UNIFORM factual statements regardless of session type (Q&A or implementation)
|
|
602
|
+
|
|
603
|
+
FORBIDDEN PATTERNS (NEVER USE THESE):
|
|
604
|
+
|
|
605
|
+
BANNED PHRASES - do NOT start sentences with:
|
|
606
|
+
- "User asked...", "Explained that...", "Discussed..."
|
|
607
|
+
- "The conversation...", "This session...", "It was determined..."
|
|
608
|
+
- "We talked about...", "Question about...", "Answered...", "Covered...", "Explored..."
|
|
609
|
+
|
|
610
|
+
BANNED META-WORDS anywhere in text:
|
|
611
|
+
- "session", "conversation", "discussion", "chat"
|
|
612
|
+
- "user", "developer", "team" (when used as actors doing things)
|
|
613
|
+
- "asked", "explained", "answered", "clarified"
|
|
614
|
+
- "this memory", "this task", "this query"
|
|
615
|
+
|
|
616
|
+
TRANSFORMATION EXAMPLES:
|
|
617
|
+
|
|
618
|
+
Example 1 - Authentication Q&A:
|
|
619
|
+
Session context: User asked "How does auth work?" then explained JWT flow
|
|
620
|
+
BAD: "User asked about authentication. Explained it uses JWT tokens."
|
|
621
|
+
BAD: "Discussion covered the auth mechanism and its JWT implementation."
|
|
622
|
+
GOOD: "Authentication uses JWT tokens with 24h expiry for stateless verification."
|
|
542
623
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
624
|
+
Example 2 - Caching explanation:
|
|
625
|
+
Session context: User asked "Why use Map for LRU?" then explained insertion order
|
|
626
|
+
BAD: "Explained why JavaScript Map was chosen for the LRU cache."
|
|
627
|
+
BAD: "Answered question about Map's insertion order property."
|
|
628
|
+
GOOD: "LRU Cache uses JavaScript Map because Map guarantees insertion order."
|
|
548
629
|
|
|
549
|
-
|
|
630
|
+
Example 3 - Architecture discussion:
|
|
631
|
+
Session context: User asked "What pattern does Circuit Breaker use?"
|
|
632
|
+
BAD: "Discussed the Circuit Breaker pattern and its three states."
|
|
633
|
+
GOOD: "Circuit Breaker implements finite state machine with CLOSED, OPEN, HALF_OPEN states."
|
|
550
634
|
|
|
551
|
-
|
|
552
|
-
- "CONCLUSION: JWT tokens stored in ~/.grov/credentials.json with 1hr access/7d refresh expiry"
|
|
553
|
-
- "CONCLUSION: Auth middleware in src/routes/auth.ts exports requireAuth and optionalAuth preHandlers"
|
|
554
|
-
- "CONCLUSION: Device flow polling interval is 5 seconds, endpoint /auth/device/poll"
|
|
635
|
+
MENTAL MODEL FOR EXTRACTION:
|
|
555
636
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
- "CONCLUSION: JWT is used for auth" (too generic, no specifics)
|
|
559
|
-
- "CONCLUSION: Explored the codebase" (process description, not finding)
|
|
637
|
+
Ask yourself: "If I remove all context about WHO asked and WHEN, what FACTUAL
|
|
638
|
+
KNOWLEDGE about the CODE remains?"
|
|
560
639
|
|
|
640
|
+
Transform pattern:
|
|
641
|
+
1. Identify the SUBJECT (component, function, pattern)
|
|
642
|
+
2. Identify the BEHAVIOR (what it does, how it works)
|
|
643
|
+
3. Identify the IMPLEMENTATION (specific details, values, files)
|
|
644
|
+
4. Write: "[SUBJECT] [BEHAVIOR] [IMPLEMENTATION]"
|
|
561
645
|
|
|
562
|
-
|
|
646
|
+
Result pattern: "[Component] uses/implements/provides [mechanism] for/via/with [details]"
|
|
647
|
+
|
|
648
|
+
Examples of correct factual statements:
|
|
649
|
+
- "Retry Queue uses exponential backoff with 1s base delay"
|
|
650
|
+
- "EventEmitter implements copy-before-iterate for safe listener removal"
|
|
651
|
+
- "LRU Cache provides O(1) eviction via Map insertion order property"
|
|
652
|
+
|
|
653
|
+
SELF-CHECK BEFORE OUTPUT:
|
|
654
|
+
|
|
655
|
+
Before returning, verify EACH knowledge_pair and decision:
|
|
656
|
+
- Does it start with a code component/system, NOT "User/Explained/Discussed"?
|
|
657
|
+
- Is it a factual statement about code, NOT about the conversation?
|
|
658
|
+
- Could this exact sentence appear in technical documentation?
|
|
659
|
+
- If someone searches for this topic, would this sentence match?
|
|
660
|
+
|
|
661
|
+
If ANY answer is NO, rewrite it as a factual statement.
|
|
662
|
+
|
|
663
|
+
TYPE A: CONCLUSIONS (Factual Findings)
|
|
563
664
|
|
|
564
665
|
What this means:
|
|
565
|
-
|
|
666
|
+
Facts explicitly found in the code.
|
|
667
|
+
CRITICAL GOAL: Eliminate vagueness. Replace generic descriptions with specific "Named Entities" defined above.
|
|
566
668
|
|
|
567
|
-
|
|
669
|
+
Rules for High-Quality Conclusions:
|
|
568
670
|
|
|
569
|
-
1.
|
|
570
|
-
|
|
671
|
+
1. The "Who" Rule:
|
|
672
|
+
- NEVER start with "The system", "The function", or "We found".
|
|
673
|
+
- Start with the specific Component/Class/File Name.
|
|
674
|
+
- BAD: "The function calculates the hash."
|
|
675
|
+
- GOOD: "\`FileScanner\` calculates \`SHA-256\` hash."
|
|
571
676
|
|
|
572
|
-
2.
|
|
573
|
-
|
|
677
|
+
2. The "Value" Rule:
|
|
678
|
+
- Do not use adjectives like "short", "large", "standard". Use the actual values found.
|
|
679
|
+
- BAD: "Sets a short timeout."
|
|
680
|
+
- GOOD: "Sets \`connectionTimeout\` to \`500ms\`."
|
|
574
681
|
|
|
575
|
-
3.
|
|
576
|
-
|
|
682
|
+
3. The "Location" Rule:
|
|
683
|
+
- Always mention WHERE this happens (File or Module).
|
|
684
|
+
- BAD: "Validates the token."
|
|
685
|
+
- GOOD: "Validates \`jwt_token\` inside \`auth.middleware.ts\`."
|
|
577
686
|
|
|
578
|
-
|
|
579
|
-
|
|
687
|
+
Format Pattern:
|
|
688
|
+
"CONCLUSION: [Code Anchor Subject] performs [Action] using [Code Anchor Object/Value]"
|
|
580
689
|
|
|
581
|
-
|
|
690
|
+
Examples:
|
|
691
|
+
- "CONCLUSION: \`JwtService\` in \`jwt.ts\` signs tokens with \`RS256\` algorithm, \`1hr\` expiry"
|
|
692
|
+
- "CONCLUSION: \`requireAuth\` preHandler in \`auth.ts\` validates \`Authorization\` header"
|
|
693
|
+
- "CONCLUSION: \`CredentialStore\` writes to \`~/.grov/credentials.json\` with \`0600\` permissions"
|
|
694
|
+
|
|
695
|
+
═══════════════════════════════════════════════════════════════
|
|
696
|
+
|
|
697
|
+
TYPE B: INSIGHTS (Architectural Analysis)
|
|
698
|
+
|
|
699
|
+
What this means:
|
|
700
|
+
The architectural "Why" behind the code.
|
|
701
|
+
CRITICAL GOAL: Connect the code to a Computer Science Concept or Business Outcome.
|
|
582
702
|
|
|
583
|
-
|
|
584
|
-
- "INSIGHT: Dual-file pattern (jwt.ts + credentials.ts) separates crypto from I/O, reducing attack surface"
|
|
585
|
-
- "INSIGHT: Device Authorization Flow chosen over password flow - enables OAuth providers without storing secrets in CLI"
|
|
586
|
-
- "INSIGHT: Teams array cached in JWT payload - avoids DB query per request but requires token refresh on team changes"
|
|
703
|
+
Rules for High-Quality Insights:
|
|
587
704
|
|
|
588
|
-
|
|
589
|
-
-
|
|
590
|
-
-
|
|
591
|
-
-
|
|
705
|
+
1. Name the Pattern/Trade-off:
|
|
706
|
+
- Use standard terminology: "Singleton", "Lazy Loading", "Race Condition", "O(N) Complexity", "Dependency Injection", "Circuit Breaker".
|
|
707
|
+
- BAD: "This is good for organizing code."
|
|
708
|
+
- GOOD: "Implements \`Dependency Injection\` to decouple storage logic."
|
|
709
|
+
|
|
710
|
+
2. Explain the "Hard" Consequence:
|
|
711
|
+
- Focus on: Memory, CPU, Latency, Security, Consistency, Disk I/O.
|
|
712
|
+
- BAD: "It makes it faster."
|
|
713
|
+
- GOOD: "Reduces I/O operations by caching \`scan_result\` in memory."
|
|
714
|
+
|
|
715
|
+
Format Pattern:
|
|
716
|
+
"INSIGHT: Implements [Pattern Name] to optimize [Resource/Outcome] by [Specific Mechanism]"
|
|
717
|
+
|
|
718
|
+
Examples:
|
|
719
|
+
- "INSIGHT: \`timingSafeEqual\` prevents timing attacks - constant-time comparison regardless of input"
|
|
720
|
+
- "INSIGHT: Lazy expiration pattern in \`MemoryCache\` - trades read latency for no timer overhead"
|
|
721
|
+
- "INSIGHT: JWT payload caches \`teams[]\` - avoids DB query per request, requires refresh on team change"
|
|
592
722
|
|
|
593
723
|
</instructions>
|
|
594
724
|
|
|
725
|
+
<summary_rules>
|
|
726
|
+
═══════════════════════════════════════════════════════════════
|
|
727
|
+
SUMMARY GENERATION - CRITICAL FOR SEMANTIC SEARCH
|
|
728
|
+
═══════════════════════════════════════════════════════════════
|
|
729
|
+
|
|
730
|
+
FRONT-LOADING RULE:
|
|
731
|
+
First 7-8 words determine 80% of search match quality.
|
|
732
|
+
Start DIRECTLY with the main technology or system name, then immediately follow with what was done in a few key words.
|
|
733
|
+
|
|
734
|
+
WRONG: "In this session we implemented a metrics system..."
|
|
735
|
+
WRONG: "This memory contains information about..."
|
|
736
|
+
WRONG: "Discussion about implementing..."
|
|
737
|
+
RIGHT: "Prometheus Metrics System with Counter, Gauge, Histogram primitives..."
|
|
738
|
+
RIGHT: "Event Bus pub/sub with wildcard subscriptions and circular buffer..."
|
|
739
|
+
RIGHT: "Redis caching layer with TTL expiration and LRU eviction..."
|
|
740
|
+
|
|
741
|
+
CONTENT RULES:
|
|
742
|
+
1. Lead with technology/system name (Prometheus, Redis, Event Bus, AWS S3)
|
|
743
|
+
2. Include 2-3 key technical terms that users would search for
|
|
744
|
+
3. NO meta-language: ban "this memory", "discussion about", "implemented", "session"
|
|
745
|
+
4. NO file paths (save those for conclusions)
|
|
746
|
+
5. Describe WHAT it is, not WHAT was done
|
|
747
|
+
|
|
748
|
+
LENGTH: 150-200 characters MAXIMUM. Dense, not verbose.
|
|
749
|
+
</summary_rules>
|
|
750
|
+
|
|
595
751
|
<output_format>
|
|
596
752
|
Return a JSON object with this structure:
|
|
597
753
|
|
|
598
754
|
{
|
|
755
|
+
"system_name": "[MANDATORY - see SYSTEM_NAME RULES below]",
|
|
756
|
+
"summary": "[150-200 chars MAX - MUST follow SUMMARY RULES above]",
|
|
599
757
|
"knowledge_pairs": [
|
|
600
758
|
{
|
|
759
|
+
"aspect": "[Specific component within system - see ASPECT RULES below]",
|
|
601
760
|
"conclusion": "CONCLUSION: [specific factual finding with file paths and values]",
|
|
602
761
|
"insight": "INSIGHT: [inference or implication RELATED to this conclusion]"
|
|
603
|
-
},
|
|
604
|
-
{
|
|
605
|
-
"conclusion": "CONCLUSION: [another specific finding]",
|
|
606
|
-
"insight": "INSIGHT: [what this means for future development]"
|
|
607
762
|
}
|
|
608
763
|
],
|
|
609
764
|
"decisions": [
|
|
610
765
|
{
|
|
611
|
-
"
|
|
612
|
-
"
|
|
766
|
+
"aspect": "[Specific component this decision is about]",
|
|
767
|
+
"choice": "[What was chosen - be specific. Max 100 chars]",
|
|
768
|
+
"reason": "[Why - include whether this is factual or inferred. Max 150 chars]"
|
|
613
769
|
}
|
|
614
770
|
]
|
|
615
771
|
}
|
|
616
772
|
|
|
773
|
+
═══════════════════════════════════════════════════════════════
|
|
774
|
+
SYSTEM_NAME RULES (MANDATORY - TOP LEVEL FIELD)
|
|
775
|
+
═══════════════════════════════════════════════════════════════
|
|
776
|
+
|
|
777
|
+
PURPOSE: This is the "parent anchor" that connects all knowledge and decisions to the same system. It will be prepended to EVERY chunk for semantic search.
|
|
778
|
+
|
|
779
|
+
WHAT TO PUT: The main system, component, or feature being discussed in this task. Extract it from the PROJECT GOAL - ask yourself "What is being built/analyzed/debugged?"
|
|
780
|
+
|
|
781
|
+
HOW TO IDENTIFY:
|
|
782
|
+
- Look at the goal/query - what noun represents the main subject?
|
|
783
|
+
- It should be a PROPER NOUN (specific name), not a generic term
|
|
784
|
+
- If goal is "Build a retry queue with exponential backoff" then system_name is "Retry Queue"
|
|
785
|
+
- If goal is "Fix authentication bug in login flow" then system_name is "Auth Module" or "Login Flow"
|
|
786
|
+
- If goal is "Optimize database queries for user search" then system_name is "User Search" or "Search Query Optimizer"
|
|
787
|
+
|
|
788
|
+
GOOD EXAMPLES:
|
|
789
|
+
- "Retry Queue" (specific component)
|
|
790
|
+
- "Webhook Delivery System" (specific feature)
|
|
791
|
+
- "Rate Limiter" (specific utility)
|
|
792
|
+
- "JWT Authentication" (specific mechanism)
|
|
793
|
+
- "Memory Cache" (specific component)
|
|
794
|
+
- "File Scanner" (specific service)
|
|
795
|
+
|
|
796
|
+
BAD EXAMPLES:
|
|
797
|
+
- "System" (too generic)
|
|
798
|
+
- "Code" (meaningless)
|
|
799
|
+
- "Implementation" (not a noun/component)
|
|
800
|
+
- "Backend" (too broad)
|
|
801
|
+
- "Feature" (not specific)
|
|
802
|
+
- "The function" (not a name)
|
|
803
|
+
|
|
804
|
+
RULE: If a user searches "How does [system_name] work?", this field should make that search find ALL chunks from this memory.
|
|
805
|
+
|
|
806
|
+
═══════════════════════════════════════════════════════════════
|
|
807
|
+
ASPECT RULES (per knowledge_pair and decision)
|
|
808
|
+
═══════════════════════════════════════════════════════════════
|
|
809
|
+
|
|
810
|
+
PURPOSE: The specific component, pattern, or topic WITHIN the system_name that THIS PARTICULAR entry discusses. More granular than system_name.
|
|
811
|
+
|
|
812
|
+
WHAT TO PUT: The specific part of the system this knowledge/decision is about. Ask yourself "What specific aspect of [system_name] does this entry cover?"
|
|
813
|
+
|
|
814
|
+
RELATIONSHIP TO system_name:
|
|
815
|
+
- system_name = "Retry Queue" (the whole system)
|
|
816
|
+
- aspect = "Job State Model" (one specific part)
|
|
817
|
+
- aspect = "Backoff Strategy" (another specific part)
|
|
818
|
+
- aspect = "Failed Job Recovery" (another specific part)
|
|
819
|
+
|
|
820
|
+
HOW TO IDENTIFY:
|
|
821
|
+
- What sub-component or pattern does this entry describe?
|
|
822
|
+
- What would you title this paragraph if it were a section header?
|
|
823
|
+
- It should be MORE SPECIFIC than system_name
|
|
824
|
+
|
|
825
|
+
GOOD EXAMPLES (for system_name = "Retry Queue"):
|
|
826
|
+
- "Job State Model" (how jobs are stored)
|
|
827
|
+
- "Backoff Strategy" (how delays work)
|
|
828
|
+
- "Failed Job Recovery" (how failures are handled)
|
|
829
|
+
- "Queue Statistics" (how stats are exposed)
|
|
830
|
+
|
|
831
|
+
GOOD EXAMPLES (for system_name = "Webhook Delivery"):
|
|
832
|
+
- "Signature Verification" (security aspect)
|
|
833
|
+
- "Retry Logic" (reliability aspect)
|
|
834
|
+
- "Payload Serialization" (data format aspect)
|
|
835
|
+
- "Timeout Handling" (error handling aspect)
|
|
836
|
+
|
|
837
|
+
BAD EXAMPLES:
|
|
838
|
+
- Same as system_name (redundant - don't repeat parent)
|
|
839
|
+
- "Implementation" (not specific)
|
|
840
|
+
- "Code" (meaningless)
|
|
841
|
+
- "Logic" (too vague)
|
|
842
|
+
|
|
843
|
+
═══════════════════════════════════════════════════════════════
|
|
844
|
+
|
|
617
845
|
IMPORTANT: Generate knowledge as PAIRS where each INSIGHT is directly related to its CONCLUSION.
|
|
618
846
|
|
|
619
|
-
Example
|
|
847
|
+
Example with system_name and aspect:
|
|
620
848
|
{
|
|
621
|
-
"
|
|
622
|
-
"
|
|
849
|
+
"system_name": "Memory Cache",
|
|
850
|
+
"knowledge_pairs": [
|
|
851
|
+
{
|
|
852
|
+
"aspect": "Expiration Strategy",
|
|
853
|
+
"conclusion": "CONCLUSION: Uses lazy expiration - entries checked/deleted on get(), not via timers",
|
|
854
|
+
"insight": "INSIGHT: Lazy expiration avoids timer overhead - trades CPU on read for memory efficiency"
|
|
855
|
+
}
|
|
856
|
+
]
|
|
623
857
|
}
|
|
624
858
|
|
|
625
859
|
Rules:
|
|
626
|
-
1.
|
|
627
|
-
2.
|
|
628
|
-
3.
|
|
629
|
-
4.
|
|
630
|
-
5.
|
|
631
|
-
6.
|
|
632
|
-
7.
|
|
633
|
-
8.
|
|
860
|
+
1. system_name is MANDATORY - identifies the parent system for ALL entries
|
|
861
|
+
2. Each pair MUST have aspect, conclusion AND a related insight
|
|
862
|
+
3. aspect should be MORE SPECIFIC than system_name (not the same)
|
|
863
|
+
4. The insight MUST add value beyond the conclusion (inference, implication, pattern)
|
|
864
|
+
5. DO NOT repeat system_name in conclusion/insight
|
|
865
|
+
6. Max 5 pairs - prioritize most valuable
|
|
866
|
+
7. Max 5 decisions - only significant architectural choices
|
|
867
|
+
8. If you cannot find a meaningful insight for a conclusion, still include with insight: null
|
|
868
|
+
9. NEVER include process descriptions ("explored", "searched", "looked at")
|
|
869
|
+
10. English only, no emojis
|
|
870
|
+
11. Use prefixes "CONCLUSION: " and "INSIGHT: " in the strings
|
|
871
|
+
|
|
872
|
+
CHARACTER LIMITS (strict - for embedding optimization):
|
|
873
|
+
- system_name: 2-5 words (e.g. "Retry Queue", "Webhook Delivery System")
|
|
874
|
+
- summary: 150-200 characters MAX (front-loaded with tech name, NO meta-language)
|
|
875
|
+
- Each aspect: 2-4 words (e.g. "Job State Model", "Backoff Strategy")
|
|
876
|
+
- Each conclusion: max 150 characters (including "CONCLUSION: " prefix)
|
|
877
|
+
- Each insight: max 150 characters (including "INSIGHT: " prefix)
|
|
878
|
+
- Each decision aspect: 2-4 words
|
|
879
|
+
- Each decision choice: max 100 characters
|
|
880
|
+
- Each decision reason: max 150 characters
|
|
881
|
+
If content exceeds limit, prioritize SPECIFICITY over completeness.
|
|
882
|
+
Truncate gracefully - never cut mid-word or mid-path.
|
|
634
883
|
</output_format>
|
|
635
884
|
|
|
636
885
|
<validation>
|
|
637
886
|
Before responding, verify:
|
|
887
|
+
- Did I include a system_name that identifies the PARENT system?
|
|
888
|
+
- Is system_name a specific proper noun, NOT generic like "System" or "Code"?
|
|
889
|
+
- Is the summary 150-200 chars, front-loaded with technology name, NO meta-language?
|
|
890
|
+
- Does each knowledge_pair include an 'aspect' field MORE SPECIFIC than system_name?
|
|
638
891
|
- Does each CONCLUSION contain a specific file path or value?
|
|
639
892
|
- Is each INSIGHT directly related to its paired CONCLUSION?
|
|
640
893
|
- Does each INSIGHT add something NOT explicitly in the input?
|
|
641
894
|
- Would a new developer find the pairs useful without seeing the original session?
|
|
642
895
|
- Did I avoid process descriptions?
|
|
643
896
|
- Are the decisions about significant architectural choices?
|
|
897
|
+
- Does each decision include a specific 'aspect' field?
|
|
898
|
+
- Are ALL entries within character limits?
|
|
644
899
|
</validation>
|
|
645
900
|
|
|
646
901
|
Return ONLY valid JSON, no markdown code blocks, no explanation.`;
|
|
@@ -654,8 +909,8 @@ Return ONLY valid JSON, no markdown code blocks, no explanation.`;
|
|
|
654
909
|
const text = response.content[0].type === 'text' ? response.content[0].text : '';
|
|
655
910
|
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
656
911
|
if (!jsonMatch) {
|
|
657
|
-
|
|
658
|
-
return { reasoning_trace: [], decisions: [] };
|
|
912
|
+
console.error('[LLM-EXTRACTOR] No JSON in response');
|
|
913
|
+
return { system_name: null, summary: null, reasoning_trace: [], decisions: [] };
|
|
659
914
|
}
|
|
660
915
|
// Try to parse JSON, with repair attempts for common Haiku formatting issues
|
|
661
916
|
let result;
|
|
@@ -673,49 +928,584 @@ Return ONLY valid JSON, no markdown code blocks, no explanation.`;
|
|
|
673
928
|
try {
|
|
674
929
|
result = JSON.parse(repaired);
|
|
675
930
|
}
|
|
676
|
-
catch {
|
|
677
|
-
// Last resort: try to extract
|
|
931
|
+
catch (repairError) {
|
|
932
|
+
// Last resort: try to extract individual fields
|
|
678
933
|
const pairsMatch = jsonMatch[0].match(/"knowledge_pairs"\s*:\s*\[([\s\S]*?)\]/);
|
|
679
934
|
if (pairsMatch) {
|
|
680
935
|
try {
|
|
681
936
|
const pairs = JSON.parse(`[${pairsMatch[1].replace(/,\s*$/, '')}]`);
|
|
682
|
-
|
|
937
|
+
const systemMatch = jsonMatch[0].match(/"system_name"\s*:\s*"([^"]+)"/);
|
|
938
|
+
const extractedSystemName = systemMatch ? systemMatch[1] : undefined;
|
|
939
|
+
result = { system_name: extractedSystemName, knowledge_pairs: pairs, decisions: [] };
|
|
683
940
|
}
|
|
684
|
-
catch {
|
|
685
|
-
|
|
941
|
+
catch (fallbackError) {
|
|
942
|
+
console.error('[LLM-EXTRACTOR] JSON parse failed');
|
|
943
|
+
throw parseError;
|
|
686
944
|
}
|
|
687
945
|
}
|
|
688
946
|
else {
|
|
947
|
+
console.error('[LLM-EXTRACTOR] JSON parse failed');
|
|
689
948
|
throw parseError;
|
|
690
949
|
}
|
|
691
950
|
}
|
|
692
951
|
}
|
|
693
|
-
//
|
|
952
|
+
// Extract system_name (parent anchor for all chunks)
|
|
953
|
+
const systemName = result.system_name || null;
|
|
954
|
+
// Keep knowledge_pairs as objects (with aspect for semantic search)
|
|
694
955
|
let reasoningTrace = [];
|
|
695
956
|
if (result.knowledge_pairs && result.knowledge_pairs.length > 0) {
|
|
696
|
-
// New format:
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
}
|
|
705
|
-
debugLLM('extractReasoningAndDecisions', `Extracted ${result.knowledge_pairs.length} pairs (${reasoningTrace.length} entries), ${result.decisions?.length || 0} decisions`);
|
|
957
|
+
// New format: keep as objects with aspect (fall back to tags for backwards compat)
|
|
958
|
+
reasoningTrace = result.knowledge_pairs.map(pair => ({
|
|
959
|
+
aspect: pair.aspect || pair.tags || '', // Prefer aspect, fallback to tags
|
|
960
|
+
tags: pair.tags, // Keep for backwards compat
|
|
961
|
+
conclusion: pair.conclusion || '',
|
|
962
|
+
insight: pair.insight || null,
|
|
963
|
+
}));
|
|
964
|
+
debugLLM('extractReasoningAndDecisions', `Extracted system_name="${systemName}", ${result.knowledge_pairs.length} pairs, ${result.decisions?.length || 0} decisions`);
|
|
706
965
|
}
|
|
707
966
|
else if (result.reasoning_trace) {
|
|
708
|
-
// Backwards compatibility: old format with flat array
|
|
709
|
-
reasoningTrace = result.reasoning_trace
|
|
967
|
+
// Backwards compatibility: old format with flat string array - wrap in objects
|
|
968
|
+
reasoningTrace = result.reasoning_trace.map(entry => ({
|
|
969
|
+
aspect: '', // No aspect in old format
|
|
970
|
+
conclusion: entry,
|
|
971
|
+
insight: null,
|
|
972
|
+
}));
|
|
710
973
|
debugLLM('extractReasoningAndDecisions', `Extracted ${reasoningTrace.length} traces (old format), ${result.decisions?.length || 0} decisions`);
|
|
711
974
|
}
|
|
712
975
|
return {
|
|
976
|
+
system_name: systemName,
|
|
977
|
+
summary: result.summary || null,
|
|
713
978
|
reasoning_trace: reasoningTrace,
|
|
714
979
|
decisions: result.decisions || [],
|
|
715
980
|
};
|
|
716
981
|
}
|
|
717
982
|
catch (error) {
|
|
718
983
|
debugLLM('extractReasoningAndDecisions', `Error: ${String(error)}`);
|
|
719
|
-
return { reasoning_trace: [], decisions: [] };
|
|
984
|
+
return { system_name: null, summary: null, reasoning_trace: [], decisions: [] };
|
|
720
985
|
}
|
|
721
986
|
}
|
|
987
|
+
/**
|
|
988
|
+
* Check if shouldUpdateMemory is available
|
|
989
|
+
*/
|
|
990
|
+
export function isShouldUpdateAvailable() {
|
|
991
|
+
return !!process.env.ANTHROPIC_API_KEY || !!process.env.GROV_API_KEY;
|
|
992
|
+
}
|
|
993
|
+
/**
|
|
994
|
+
* Decide if a memory should be updated based on new session data
|
|
995
|
+
* Called when a match is found before sync
|
|
996
|
+
*
|
|
997
|
+
* Handles 5 tasks in one Haiku call:
|
|
998
|
+
* 1. should_update decision (boolean + reason)
|
|
999
|
+
* 2. superseded_mapping (which old decisions are replaced by which new ones)
|
|
1000
|
+
* 3. condensed_old_reasoning (max 200 chars for reasoning_evolution)
|
|
1001
|
+
* 4. evolution_summary (200-250 chars for evolution_steps)
|
|
1002
|
+
* 5. consolidated_evolution_steps (only if > 10 entries, CONDITIONAL)
|
|
1003
|
+
*
|
|
1004
|
+
* @param existingMemory - The memory that was matched
|
|
1005
|
+
* @param newData - Extracted reasoning and decisions from current session
|
|
1006
|
+
* @param sessionContext - Task type, original query, files touched
|
|
1007
|
+
* @returns Decision result with all transformation data
|
|
1008
|
+
*/
|
|
1009
|
+
/**
|
|
1010
|
+
* Build the prompt for shouldUpdateMemory
|
|
1011
|
+
* Structured with XML tags for clear task separation
|
|
1012
|
+
*/
|
|
1013
|
+
function buildShouldUpdatePrompt(existingMemory, newData, sessionContext, needsConsolidation, evolutionCount) {
|
|
1014
|
+
// Format existing decisions with indices
|
|
1015
|
+
const formattedDecisions = existingMemory.decisions
|
|
1016
|
+
.map((d, i) => `[${i}] ${d.choice} (${d.active !== false ? 'active' : 'inactive'}): ${d.reason}`)
|
|
1017
|
+
.join('\n') || 'None';
|
|
1018
|
+
// Format existing reasoning trace (limit to 10)
|
|
1019
|
+
const formattedReasoning = existingMemory.reasoning_trace
|
|
1020
|
+
.slice(0, 10)
|
|
1021
|
+
.join('\n') || 'None';
|
|
1022
|
+
// Format evolution steps
|
|
1023
|
+
const formattedEvolution = existingMemory.evolution_steps
|
|
1024
|
+
.map(e => `- ${e.date}: ${e.summary}`)
|
|
1025
|
+
.join('\n') || 'No evolution history yet';
|
|
1026
|
+
// Format new decisions
|
|
1027
|
+
const formattedNewDecisions = newData.decisions
|
|
1028
|
+
.map(d => `- ${d.choice}: ${d.reason}`)
|
|
1029
|
+
.join('\n') || 'None extracted';
|
|
1030
|
+
// Format new reasoning (limit to 5)
|
|
1031
|
+
const formattedNewReasoning = newData.reasoning_trace
|
|
1032
|
+
.slice(0, 5)
|
|
1033
|
+
.join('\n') || 'None extracted';
|
|
1034
|
+
// Build output format based on whether consolidation is needed
|
|
1035
|
+
const outputFormat = needsConsolidation
|
|
1036
|
+
? `{
|
|
1037
|
+
"should_update": boolean,
|
|
1038
|
+
"reason": "1-2 sentence explanation of your decision",
|
|
1039
|
+
"superseded_mapping": [{"old_index": number, "replaced_by_choice": "string", "replaced_by_reason": "string"}] or [],
|
|
1040
|
+
"condensed_old_reasoning": "string max 200 chars" or null,
|
|
1041
|
+
"evolution_summary": "string 200-250 chars" or null,
|
|
1042
|
+
"consolidated_evolution_steps": [{"summary": "...", "date": "YYYY-MM-DD"}, ...]
|
|
1043
|
+
}`
|
|
1044
|
+
: `{
|
|
1045
|
+
"should_update": boolean,
|
|
1046
|
+
"reason": "1-2 sentence explanation of your decision",
|
|
1047
|
+
"superseded_mapping": [{"old_index": number, "replaced_by_choice": "string", "replaced_by_reason": "string"}] or [],
|
|
1048
|
+
"condensed_old_reasoning": "string max 200 chars" or null,
|
|
1049
|
+
"evolution_summary": "string 200-250 chars" or null
|
|
1050
|
+
}`;
|
|
1051
|
+
// Build consolidation section (Task 5) - only if needed
|
|
1052
|
+
// Defined here to maintain task order in code: 1, 2, 3, 4, then 5
|
|
1053
|
+
const consolidationSection = needsConsolidation ? `
|
|
1054
|
+
<task_5_consolidation>
|
|
1055
|
+
═══════════════════════════════════════════════════════════════
|
|
1056
|
+
TASK 5: CONSOLIDATION REQUIRED
|
|
1057
|
+
═══════════════════════════════════════════════════════════════
|
|
1058
|
+
|
|
1059
|
+
Current evolution_steps has ${evolutionCount} entries (maximum is 10).
|
|
1060
|
+
|
|
1061
|
+
You MUST consolidate the OLDEST 3-5 entries into 1-2 summary entries.
|
|
1062
|
+
Keep the NEWEST entries unchanged.
|
|
1063
|
+
|
|
1064
|
+
Current evolution_steps:
|
|
1065
|
+
${existingMemory.evolution_steps.map((e, i) => `[${i}] ${e.date}: ${e.summary}`).join('\n')}
|
|
1066
|
+
|
|
1067
|
+
Rules:
|
|
1068
|
+
1. Merge entries [0] to [3] or [4] into 1-2 summary entries
|
|
1069
|
+
2. Keep entries [5] onwards unchanged
|
|
1070
|
+
3. Each summary should capture the key transitions, not every detail
|
|
1071
|
+
4. Preserve dates - use the earliest date for consolidated entries
|
|
1072
|
+
|
|
1073
|
+
Return the FULL consolidated array in consolidated_evolution_steps.
|
|
1074
|
+
</task_5_consolidation>
|
|
1075
|
+
` : '';
|
|
1076
|
+
return `<role>
|
|
1077
|
+
You are a Memory Update Analyst for Grov, a coding assistant that maintains team knowledge.
|
|
1078
|
+
|
|
1079
|
+
Your job is to analyze whether an existing memory should be UPDATED based on new session data, or if the new session is just a query about existing knowledge (SKIP).
|
|
1080
|
+
|
|
1081
|
+
You have ${needsConsolidation ? '5' : '4'} tasks to complete. Read all instructions carefully before responding.
|
|
1082
|
+
</role>
|
|
1083
|
+
|
|
1084
|
+
<context>
|
|
1085
|
+
WHAT IS A MEMORY?
|
|
1086
|
+
A memory stores knowledge from past coding sessions: decisions made, reasoning discovered, and how the project evolved over time.
|
|
1087
|
+
|
|
1088
|
+
WHY UPDATE MATTERS:
|
|
1089
|
+
- UPDATE when: user made real changes, switched approaches, or discovered new information
|
|
1090
|
+
- SKIP when: user just asked questions about existing knowledge without changing anything
|
|
1091
|
+
|
|
1092
|
+
THE PROBLEM WE SOLVE:
|
|
1093
|
+
User asks "Why did we choose JWT?" then says "Ok I understand" = SKIP (just a question)
|
|
1094
|
+
User says "Let's switch from JWT to sessions" then confirms "Ok let's do it" = UPDATE (real change)
|
|
1095
|
+
</context>
|
|
1096
|
+
|
|
1097
|
+
<existing_memory>
|
|
1098
|
+
<goal>${existingMemory.goal || 'Not specified'}</goal>
|
|
1099
|
+
|
|
1100
|
+
<decisions>
|
|
1101
|
+
${formattedDecisions}
|
|
1102
|
+
</decisions>
|
|
1103
|
+
|
|
1104
|
+
<reasoning_trace>
|
|
1105
|
+
${formattedReasoning}
|
|
1106
|
+
</reasoning_trace>
|
|
1107
|
+
|
|
1108
|
+
<evolution_steps>
|
|
1109
|
+
${formattedEvolution}
|
|
1110
|
+
</evolution_steps>
|
|
1111
|
+
|
|
1112
|
+
<files_in_memory>
|
|
1113
|
+
${existingMemory.files_touched.slice(0, 10).join(', ') || 'None'}
|
|
1114
|
+
</files_in_memory>
|
|
1115
|
+
</existing_memory>
|
|
1116
|
+
|
|
1117
|
+
<new_session_data>
|
|
1118
|
+
<task_type>${sessionContext.task_type}</task_type>
|
|
1119
|
+
<original_query>${sessionContext.original_query}</original_query>
|
|
1120
|
+
|
|
1121
|
+
<files_touched_in_session>
|
|
1122
|
+
${sessionContext.files_touched.join(', ') || 'None'}
|
|
1123
|
+
</files_touched_in_session>
|
|
1124
|
+
|
|
1125
|
+
<extracted_decisions>
|
|
1126
|
+
${formattedNewDecisions}
|
|
1127
|
+
</extracted_decisions>
|
|
1128
|
+
|
|
1129
|
+
<extracted_reasoning>
|
|
1130
|
+
${formattedNewReasoning}
|
|
1131
|
+
</extracted_reasoning>
|
|
1132
|
+
</new_session_data>
|
|
1133
|
+
|
|
1134
|
+
<task_1_should_update>
|
|
1135
|
+
═══════════════════════════════════════════════════════════════
|
|
1136
|
+
TASK 1: Decide should_update (boolean) and provide reason
|
|
1137
|
+
═══════════════════════════════════════════════════════════════
|
|
1138
|
+
|
|
1139
|
+
<strong_signals>
|
|
1140
|
+
RETURN should_update: true IF ANY of these STRONG signals are present:
|
|
1141
|
+
|
|
1142
|
+
STRONG SIGNAL A - Files were modified:
|
|
1143
|
+
If files_touched_in_session is NOT empty, code was changed. Real changes must be recorded.
|
|
1144
|
+
|
|
1145
|
+
STRONG SIGNAL B - Decisions are OPPOSITE or ALTERNATIVE:
|
|
1146
|
+
Compare extracted_decisions with existing decisions.
|
|
1147
|
+
OPPOSITE/ALTERNATIVE means:
|
|
1148
|
+
- They solve the SAME problem (e.g., both about authentication)
|
|
1149
|
+
- But use DIFFERENT approach (e.g., JWT vs sessions)
|
|
1150
|
+
- Choosing one means NOT using the other
|
|
1151
|
+
|
|
1152
|
+
Example OPPOSITE: "Use JWT" vs "Use sessions" = OPPOSITE (both auth, different approach)
|
|
1153
|
+
Example NOT OPPOSITE: "Use JWT" vs "JWT with refresh tokens" = REFINEMENT (same approach, more detail)
|
|
1154
|
+
</strong_signals>
|
|
1155
|
+
|
|
1156
|
+
<weak_signals>
|
|
1157
|
+
WEAK SIGNALS (require combination):
|
|
1158
|
+
- task_type is "planning" AND decisions cover a NEW topic not in existing memory
|
|
1159
|
+
- User confirmed a proposed change (patterns: "ok let's do", "yes", "da", "hai", "mergem cu")
|
|
1160
|
+
BUT confirmation must be IN CONTEXT of a change proposal, not just acknowledgment.
|
|
1161
|
+
</weak_signals>
|
|
1162
|
+
|
|
1163
|
+
<false_criteria>
|
|
1164
|
+
RETURN should_update: false IF:
|
|
1165
|
+
|
|
1166
|
+
KEY DISTINCTION - Who introduced the topic?
|
|
1167
|
+
|
|
1168
|
+
If the assistant ALREADY explained or decided something, and the user is
|
|
1169
|
+
asking ABOUT that explanation, this is clarifying Q&A, not new work.
|
|
1170
|
+
|
|
1171
|
+
CLEAR FALSE SIGNAL:
|
|
1172
|
+
task_type is "information" AND files_touched_in_session is empty:
|
|
1173
|
+
- The user was asking questions or seeking clarification
|
|
1174
|
+
- No code was modified (only edit/write counts, not read)
|
|
1175
|
+
- User is asking about what was ALREADY explained:
|
|
1176
|
+
- Confirmation: "Are you sure about X?"
|
|
1177
|
+
- Clarification: "Did you mean Y?"
|
|
1178
|
+
- Understanding: "Does this also work for Z?"
|
|
1179
|
+
- These are NOT new decisions - just questions about existing explanations
|
|
1180
|
+
- should_update: false
|
|
1181
|
+
|
|
1182
|
+
ALSO FALSE:
|
|
1183
|
+
- extracted_decisions are REFORMULATIONS of existing (same meaning, different words)
|
|
1184
|
+
- No NEW options introduced by user - just explaining existing decisions
|
|
1185
|
+
</false_criteria>
|
|
1186
|
+
</task_1_should_update>
|
|
1187
|
+
|
|
1188
|
+
<task_2_superseded_decisions>
|
|
1189
|
+
═══════════════════════════════════════════════════════════════
|
|
1190
|
+
TASK 2: Identify superseded decisions with replacement mapping
|
|
1191
|
+
═══════════════════════════════════════════════════════════════
|
|
1192
|
+
|
|
1193
|
+
ONLY IF should_update = true:
|
|
1194
|
+
|
|
1195
|
+
For each existing decision, check if ANY new decision SUPERSEDES it.
|
|
1196
|
+
Return a MAPPING that includes the replacement details.
|
|
1197
|
+
|
|
1198
|
+
<definition>
|
|
1199
|
+
WHAT DOES "SUPERSEDED" MEAN?
|
|
1200
|
+
|
|
1201
|
+
A decision is SUPERSEDED only when ALL these conditions are true:
|
|
1202
|
+
|
|
1203
|
+
1. SAME DOMAIN: Both decisions address the SAME technical area:
|
|
1204
|
+
- Authentication: JWT, sessions, OAuth, API keys
|
|
1205
|
+
- Database: PostgreSQL, MySQL, MongoDB, SQLite
|
|
1206
|
+
- Caching: Redis, Memcached, in-memory
|
|
1207
|
+
- Storage: local files, S3, cloud storage
|
|
1208
|
+
- Framework: React, Vue, Angular
|
|
1209
|
+
|
|
1210
|
+
2. MUTUALLY EXCLUSIVE: Choosing one means NOT using the other.
|
|
1211
|
+
You cannot use both solutions simultaneously for the same purpose.
|
|
1212
|
+
|
|
1213
|
+
3. EXPLICIT REPLACEMENT: The new decision clearly replaces the old approach,
|
|
1214
|
+
not just adds to it or refines it.
|
|
1215
|
+
</definition>
|
|
1216
|
+
|
|
1217
|
+
<protections>
|
|
1218
|
+
WHAT IS NOT SUPERSEDED (IMPORTANT)
|
|
1219
|
+
|
|
1220
|
+
DO NOT mark as superseded if:
|
|
1221
|
+
|
|
1222
|
+
1. DIFFERENT DOMAINS:
|
|
1223
|
+
- "Use PostgreSQL" and "Use Redis for caching" = DIFFERENT domains
|
|
1224
|
+
- Database storage ≠ caching layer. Both can coexist.
|
|
1225
|
+
|
|
1226
|
+
2. REFINEMENT, not replacement:
|
|
1227
|
+
- "Use JWT" → "Use JWT with refresh tokens" = REFINEMENT
|
|
1228
|
+
- Same approach, more detail. NOT superseded.
|
|
1229
|
+
|
|
1230
|
+
3. ADDITION, not replacement:
|
|
1231
|
+
- "Add rate limiting" does NOT supersede "Use JWT"
|
|
1232
|
+
- Different concerns, both remain active.
|
|
1233
|
+
|
|
1234
|
+
4. UNCERTAIN CONNECTION:
|
|
1235
|
+
- If you're not 100% sure they're the same domain → DO NOT SUPERSEDE
|
|
1236
|
+
- Missing a supersede = minor issue
|
|
1237
|
+
- Wrong supersede = corrupts history (worse!)
|
|
1238
|
+
|
|
1239
|
+
DEFAULT: If uncertain, return empty mapping. Be conservative.
|
|
1240
|
+
</protections>
|
|
1241
|
+
|
|
1242
|
+
<output_format_task2>
|
|
1243
|
+
Return superseded_mapping as array of objects:
|
|
1244
|
+
|
|
1245
|
+
superseded_mapping: [
|
|
1246
|
+
{
|
|
1247
|
+
"old_index": 0,
|
|
1248
|
+
"replaced_by_choice": "Use sessions",
|
|
1249
|
+
"replaced_by_reason": "Better for long-running operations"
|
|
1250
|
+
}
|
|
1251
|
+
]
|
|
1252
|
+
|
|
1253
|
+
If no decisions are superseded: return empty array []
|
|
1254
|
+
If should_update = false: return empty array []
|
|
1255
|
+
</output_format_task2>
|
|
1256
|
+
|
|
1257
|
+
<examples_task2>
|
|
1258
|
+
EXAMPLE A - SUPERSEDED (same domain, opposite approach):
|
|
1259
|
+
existing: [0] "Use JWT for authentication"
|
|
1260
|
+
new: "Use session-based auth with Redis"
|
|
1261
|
+
→ SUPERSEDED: same domain (auth), mutually exclusive
|
|
1262
|
+
→ superseded_mapping: [{"old_index": 0, "replaced_by_choice": "Use session-based auth with Redis", "replaced_by_reason": "Better for long-running operations"}]
|
|
1263
|
+
|
|
1264
|
+
EXAMPLE B - NOT SUPERSEDED (different domains):
|
|
1265
|
+
existing: [0] "Use PostgreSQL for main database"
|
|
1266
|
+
new: "Add Redis for caching"
|
|
1267
|
+
→ NOT SUPERSEDED: different domains (database vs caching)
|
|
1268
|
+
→ superseded_mapping: []
|
|
1269
|
+
|
|
1270
|
+
EXAMPLE C - NOT SUPERSEDED (refinement):
|
|
1271
|
+
existing: [0] "Use JWT tokens"
|
|
1272
|
+
new: "Use JWT with 1hr access and 7day refresh tokens"
|
|
1273
|
+
→ NOT SUPERSEDED: refinement of same approach
|
|
1274
|
+
→ superseded_mapping: []
|
|
1275
|
+
|
|
1276
|
+
EXAMPLE D - NOT SUPERSEDED (addition):
|
|
1277
|
+
existing: [0] "Use PostgreSQL", [1] "Use Express.js"
|
|
1278
|
+
new: "Add input validation with Zod"
|
|
1279
|
+
→ NOT SUPERSEDED: new concern, doesn't replace existing
|
|
1280
|
+
→ superseded_mapping: []
|
|
1281
|
+
|
|
1282
|
+
EXAMPLE E - MULTIPLE SUPERSEDED:
|
|
1283
|
+
existing: [0] "Use JWT", [1] "Store tokens in localStorage"
|
|
1284
|
+
new: "Use session cookies", "Store session ID in httpOnly cookie"
|
|
1285
|
+
→ superseded_mapping: [
|
|
1286
|
+
{"old_index": 0, "replaced_by_choice": "Use session cookies", "replaced_by_reason": "Server-side session management"},
|
|
1287
|
+
{"old_index": 1, "replaced_by_choice": "Store session ID in httpOnly cookie", "replaced_by_reason": "More secure than localStorage"}
|
|
1288
|
+
]
|
|
1289
|
+
|
|
1290
|
+
EXAMPLE F - UNCERTAIN (be conservative):
|
|
1291
|
+
existing: [0] "Use MongoDB"
|
|
1292
|
+
new: "Consider PostgreSQL for better relational queries"
|
|
1293
|
+
→ UNCERTAIN: "consider" suggests exploration, not decision
|
|
1294
|
+
→ superseded_mapping: []
|
|
1295
|
+
</examples_task2>
|
|
1296
|
+
|
|
1297
|
+
</task_2_superseded_decisions>
|
|
1298
|
+
|
|
1299
|
+
<task_3_condense_reasoning>
|
|
1300
|
+
═══════════════════════════════════════════════════════════════
|
|
1301
|
+
TASK 3: Condense old reasoning (max 200 characters)
|
|
1302
|
+
═══════════════════════════════════════════════════════════════
|
|
1303
|
+
|
|
1304
|
+
ONLY IF should_update = true:
|
|
1305
|
+
|
|
1306
|
+
<purpose_task3>
|
|
1307
|
+
The existing reasoning_trace will be OVERWRITTEN with new reasoning.
|
|
1308
|
+
Before it's lost forever, you must preserve the most valuable insights
|
|
1309
|
+
in a condensed form (max 200 chars) for historical context.
|
|
1310
|
+
|
|
1311
|
+
This condensed version will be stored in reasoning_evolution array
|
|
1312
|
+
so users can understand past thinking even after updates.
|
|
1313
|
+
</purpose_task3>
|
|
1314
|
+
|
|
1315
|
+
<what_to_include>
|
|
1316
|
+
Prioritize in this order:
|
|
1317
|
+
|
|
1318
|
+
1. KEY TECHNICAL DECISIONS and their rationale
|
|
1319
|
+
- "JWT chosen for stateless auth"
|
|
1320
|
+
- "PostgreSQL for ACID compliance"
|
|
1321
|
+
|
|
1322
|
+
2. CONSTRAINTS or LIMITATIONS discovered
|
|
1323
|
+
- "API rate limit 100req/min"
|
|
1324
|
+
- "Browser storage max 5MB"
|
|
1325
|
+
|
|
1326
|
+
3. TRADE-OFFS that were considered
|
|
1327
|
+
- "Chose simplicity over performance"
|
|
1328
|
+
|
|
1329
|
+
4. NON-OBVIOUS INSIGHTS that would be hard to rediscover
|
|
1330
|
+
- "Edge case: empty arrays cause crash"
|
|
1331
|
+
- "Must call init() before connect()"
|
|
1332
|
+
</what_to_include>
|
|
1333
|
+
|
|
1334
|
+
<what_to_exclude>
|
|
1335
|
+
- Generic statements ("Implemented feature")
|
|
1336
|
+
- Process descriptions ("User asked about X")
|
|
1337
|
+
- Obvious facts that anyone could infer from code
|
|
1338
|
+
- Temporary debugging notes
|
|
1339
|
+
</what_to_exclude>
|
|
1340
|
+
|
|
1341
|
+
<format_guidelines_task3>
|
|
1342
|
+
- Use concise phrases, not full sentences
|
|
1343
|
+
- Separate distinct insights with periods
|
|
1344
|
+
- Abbreviate common terms (auth, config, impl)
|
|
1345
|
+
- Focus on WHAT and WHY, not HOW
|
|
1346
|
+
</format_guidelines_task3>
|
|
1347
|
+
|
|
1348
|
+
<examples_task3>
|
|
1349
|
+
GOOD: "JWT with 1hr/7d expiry for offline CLI. Device flow for OAuth. No secrets in localStorage."
|
|
1350
|
+
GOOD: "PostgreSQL chosen over MongoDB for relational queries. Indexes on user_id, created_at."
|
|
1351
|
+
BAD: "We implemented authentication" (too vague, no insight)
|
|
1352
|
+
BAD: "The user wanted to know about JWT" (process, not knowledge)
|
|
1353
|
+
</examples_task3>
|
|
1354
|
+
|
|
1355
|
+
IF should_update = false: return null
|
|
1356
|
+
</task_3_condense_reasoning>
|
|
1357
|
+
|
|
1358
|
+
<task_4_evolution_summary>
|
|
1359
|
+
═══════════════════════════════════════════════════════════════
|
|
1360
|
+
TASK 4: Generate evolution summary (200-250 characters)
|
|
1361
|
+
═══════════════════════════════════════════════════════════════
|
|
1362
|
+
|
|
1363
|
+
ONLY IF should_update = true:
|
|
1364
|
+
|
|
1365
|
+
<purpose_task4>
|
|
1366
|
+
This summary describes WHAT CHANGED in this update.
|
|
1367
|
+
It will be added to evolution_steps to create a timeline of how
|
|
1368
|
+
the memory evolved over time.
|
|
1369
|
+
|
|
1370
|
+
Future readers will scan these summaries to understand the journey
|
|
1371
|
+
from initial implementation to current state.
|
|
1372
|
+
</purpose_task4>
|
|
1373
|
+
|
|
1374
|
+
<good_summary_criteria>
|
|
1375
|
+
1. DESCRIBES THE CHANGE, not the session
|
|
1376
|
+
- YES: "Switched from JWT to sessions"
|
|
1377
|
+
- NO: "User discussed authentication options"
|
|
1378
|
+
|
|
1379
|
+
2. INCLUDES THE REASON when relevant
|
|
1380
|
+
- YES: "Added Redis caching for API performance"
|
|
1381
|
+
- NO: "Added Redis" (why?)
|
|
1382
|
+
|
|
1383
|
+
3. MENTIONS KEY COMPONENTS affected
|
|
1384
|
+
- YES: "Updated auth middleware and token validation"
|
|
1385
|
+
- NO: "Made some changes to auth"
|
|
1386
|
+
|
|
1387
|
+
4. CAPTURES THE SCOPE (what areas were touched)
|
|
1388
|
+
- YES: "Refactored database layer: connection pool, query caching, error handling"
|
|
1389
|
+
- NO: "Database changes"
|
|
1390
|
+
</good_summary_criteria>
|
|
1391
|
+
|
|
1392
|
+
<structure_template>
|
|
1393
|
+
[ACTION] [WHAT] [FOR/BECAUSE] [REASON]. [ADDITIONAL DETAILS IF SPACE].
|
|
1394
|
+
|
|
1395
|
+
Examples:
|
|
1396
|
+
- "Switched from [X] to [Y] for [reason]. Updated [components]."
|
|
1397
|
+
- "Added [feature] to [achieve goal]. Implemented [details]."
|
|
1398
|
+
- "Fixed [problem] in [component]. Root cause was [X]."
|
|
1399
|
+
</structure_template>
|
|
1400
|
+
|
|
1401
|
+
<examples_task4>
|
|
1402
|
+
GOOD (switching): "Switched from JWT to session-based auth for long-running operations. Added Redis for session storage and updated middleware."
|
|
1403
|
+
GOOD (adding): "Added caching layer with Redis for API optimization. Implemented 5min TTL for reads and cache invalidation on writes."
|
|
1404
|
+
GOOD (fixing): "Fixed memory leak in WebSocket connections. Root cause was missing cleanup on disconnect. Added connection pool."
|
|
1405
|
+
GOOD (refactoring): "Refactored user service to repository pattern. Separated data access from business logic. Added unit tests."
|
|
1406
|
+
|
|
1407
|
+
BAD: "Updated stuff" (too vague)
|
|
1408
|
+
BAD: "User asked about JWT" (describes session, not change)
|
|
1409
|
+
BAD: "Changes to authentication" (no specifics)
|
|
1410
|
+
</examples_task4>
|
|
1411
|
+
|
|
1412
|
+
<length_guide>
|
|
1413
|
+
Target: 200-250 characters
|
|
1414
|
+
- Under 150: probably missing important details
|
|
1415
|
+
- Over 300: probably too verbose, condense
|
|
1416
|
+
</length_guide>
|
|
1417
|
+
|
|
1418
|
+
IF should_update = false: return null
|
|
1419
|
+
</task_4_evolution_summary>
|
|
1420
|
+
${consolidationSection}
|
|
1421
|
+
<output_format>
|
|
1422
|
+
Return ONLY valid JSON. No markdown, no explanation, no extra text.
|
|
1423
|
+
|
|
1424
|
+
IMPORTANT RULES:
|
|
1425
|
+
- English only (translate Romanian/other languages to English in all fields)
|
|
1426
|
+
- No emojis
|
|
1427
|
+
- All string values in English
|
|
1428
|
+
|
|
1429
|
+
${outputFormat}
|
|
1430
|
+
</output_format>
|
|
1431
|
+
|
|
1432
|
+
<examples>
|
|
1433
|
+
EXAMPLE 1 - Should UPDATE (files modified):
|
|
1434
|
+
Input: task_type=implementation, files_touched=["src/auth.ts"], query="Fix auth bug"
|
|
1435
|
+
Output: {"should_update": true, "reason": "Files were modified in implementation session", "superseded_mapping": [], "condensed_old_reasoning": "Initial JWT implementation", "evolution_summary": "Fixed authentication bug in token validation"}
|
|
1436
|
+
|
|
1437
|
+
EXAMPLE 2 - Should UPDATE (opposite decision):
|
|
1438
|
+
Input: task_type=planning, query="Let's switch to sessions instead of JWT", existing=[{choice:"Use JWT"}], new=[{choice:"Use sessions"}]
|
|
1439
|
+
Output: {"should_update": true, "reason": "User switched from JWT to sessions - opposite approaches", "superseded_mapping": [{"old_index": 0, "replaced_by_choice": "Use sessions", "replaced_by_reason": "Better session management for long operations"}], "condensed_old_reasoning": "JWT for stateless CLI auth with refresh tokens", "evolution_summary": "Switched from JWT to session-based authentication"}
|
|
1440
|
+
|
|
1441
|
+
EXAMPLE 3 - Should SKIP (pure question):
|
|
1442
|
+
Input: task_type=information, query="Why did we choose JWT?", files_touched=[]
|
|
1443
|
+
Output: {"should_update": false, "reason": "Pure information query about existing decision - no changes", "superseded_mapping": [], "condensed_old_reasoning": null, "evolution_summary": null}
|
|
1444
|
+
|
|
1445
|
+
EXAMPLE 4 - Should SKIP (acknowledgment):
|
|
1446
|
+
Input: task_type=information, query="Ok I understand now", files_touched=[]
|
|
1447
|
+
Output: {"should_update": false, "reason": "User acknowledged explanation but did not confirm any change", "superseded_mapping": [], "condensed_old_reasoning": null, "evolution_summary": null}
|
|
1448
|
+
</examples>`;
|
|
1449
|
+
}
|
|
1450
|
+
export async function shouldUpdateMemory(existingMemory, newData, sessionContext) {
|
|
1451
|
+
const client = getAnthropicClient();
|
|
1452
|
+
// Check if evolution_steps consolidation is needed
|
|
1453
|
+
const evolutionCount = existingMemory.evolution_steps?.length || 0;
|
|
1454
|
+
const needsConsolidation = evolutionCount > 10;
|
|
1455
|
+
// Build the prompt with all context
|
|
1456
|
+
const prompt = buildShouldUpdatePrompt(existingMemory, newData, sessionContext, needsConsolidation, evolutionCount);
|
|
1457
|
+
debugLLM('shouldUpdateMemory', `Analyzing memory update (needsConsolidation=${needsConsolidation})`);
|
|
1458
|
+
try {
|
|
1459
|
+
const response = await client.messages.create({
|
|
1460
|
+
model: 'claude-haiku-4-5-20251001',
|
|
1461
|
+
max_tokens: needsConsolidation ? 1500 : 800,
|
|
1462
|
+
messages: [{ role: 'user', content: prompt }],
|
|
1463
|
+
});
|
|
1464
|
+
const text = response.content[0].type === 'text' ? response.content[0].text : '';
|
|
1465
|
+
// Try to parse JSON from response
|
|
1466
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
1467
|
+
if (!jsonMatch) {
|
|
1468
|
+
console.error('[HAIKU] No JSON in response');
|
|
1469
|
+
return createFallbackResult(sessionContext);
|
|
1470
|
+
}
|
|
1471
|
+
// Parse and validate response
|
|
1472
|
+
let result;
|
|
1473
|
+
try {
|
|
1474
|
+
result = JSON.parse(jsonMatch[0]);
|
|
1475
|
+
}
|
|
1476
|
+
catch (parseErr) {
|
|
1477
|
+
console.error('[HAIKU] JSON parse failed');
|
|
1478
|
+
return createFallbackResult(sessionContext);
|
|
1479
|
+
}
|
|
1480
|
+
// Ensure required fields have defaults
|
|
1481
|
+
result.should_update = result.should_update ?? false;
|
|
1482
|
+
result.reason = result.reason ?? 'No reason provided';
|
|
1483
|
+
result.superseded_mapping = result.superseded_mapping ?? [];
|
|
1484
|
+
result.condensed_old_reasoning = result.condensed_old_reasoning ?? null;
|
|
1485
|
+
result.evolution_summary = result.evolution_summary ?? null;
|
|
1486
|
+
// Decision stored in result - logged by cloud-sync.ts
|
|
1487
|
+
debugLLM('shouldUpdateMemory', `Result: should_update=${result.should_update}, reason="${result.reason.substring(0, 50)}"`);
|
|
1488
|
+
return result;
|
|
1489
|
+
}
|
|
1490
|
+
catch (error) {
|
|
1491
|
+
console.error('[HAIKU] Error:', String(error));
|
|
1492
|
+
return createFallbackResult(sessionContext);
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1495
|
+
/**
|
|
1496
|
+
* Create fallback result when Haiku call fails
|
|
1497
|
+
* Default: do NOT update to avoid data loss
|
|
1498
|
+
*/
|
|
1499
|
+
function createFallbackResult(sessionContext) {
|
|
1500
|
+
// If files were touched, likely a real change - lean toward update
|
|
1501
|
+
const hasFiles = sessionContext.files_touched.length > 0;
|
|
1502
|
+
return {
|
|
1503
|
+
should_update: hasFiles,
|
|
1504
|
+
reason: hasFiles
|
|
1505
|
+
? 'Fallback: files modified, assuming update needed'
|
|
1506
|
+
: 'Fallback: no files modified, skipping update',
|
|
1507
|
+
superseded_mapping: [],
|
|
1508
|
+
condensed_old_reasoning: null,
|
|
1509
|
+
evolution_summary: hasFiles ? 'Session with file modifications' : null,
|
|
1510
|
+
};
|
|
1511
|
+
}
|