PyPI - cite-agent - Versions diffs - 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl - Mend

cite-agent 1.3.9py3-none-any.whl → 1.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

cite_agent/__init__.py +13 -13
cite_agent/__version__.py +1 -1
cite_agent/action_first_mode.py +150 -0
cite_agent/adaptive_providers.py +413 -0
cite_agent/archive_api_client.py +186 -0
cite_agent/auth.py +0 -1
cite_agent/auto_expander.py +70 -0
cite_agent/cache.py +379 -0
cite_agent/circuit_breaker.py +370 -0
cite_agent/citation_network.py +377 -0
cite_agent/cli.py +8 -16
cite_agent/cli_conversational.py +113 -3
cite_agent/confidence_calibration.py +381 -0
cite_agent/deduplication.py +325 -0
cite_agent/enhanced_ai_agent.py +689 -371
cite_agent/error_handler.py +228 -0
cite_agent/execution_safety.py +329 -0
cite_agent/full_paper_reader.py +239 -0
cite_agent/observability.py +398 -0
cite_agent/offline_mode.py +348 -0
cite_agent/paper_comparator.py +368 -0
cite_agent/paper_summarizer.py +420 -0
cite_agent/pdf_extractor.py +350 -0
cite_agent/proactive_boundaries.py +266 -0
cite_agent/quality_gate.py +442 -0
cite_agent/request_queue.py +390 -0
cite_agent/response_enhancer.py +257 -0
cite_agent/response_formatter.py +458 -0
cite_agent/response_pipeline.py +295 -0
cite_agent/response_style_enhancer.py +259 -0
cite_agent/self_healing.py +418 -0
cite_agent/similarity_finder.py +524 -0
cite_agent/streaming_ui.py +13 -9
cite_agent/thinking_blocks.py +308 -0
cite_agent/tool_orchestrator.py +416 -0
cite_agent/trend_analyzer.py +540 -0
cite_agent/unpaywall_client.py +226 -0
{cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
cite_agent-1.4.3.dist-info/RECORD +62 -0
cite_agent-1.3.9.dist-info/RECORD +0 -32
{cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
{cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
{cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
{cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0

cite_agent/enhanced_ai_agent.py CHANGED Viewed

@@ -27,6 +27,17 @@ from .telemetry import TelemetryManager
 from .setup_config import DEFAULT_QUERY_LIMIT
 from .conversation_archive import ConversationArchive
+# Quality improvements - Phase 1
+from .error_handler import GracefulErrorHandler, handle_error_gracefully
+from .response_formatter import ResponseFormatter
+from .quality_gate import ResponseQualityGate, assess_response_quality
+from .response_pipeline import ResponsePipeline
+# Intelligence improvements - Phase 2
+from .thinking_blocks import ThinkingBlockGenerator, generate_and_format_thinking
+from .tool_orchestrator import ToolOrchestrator
+from .confidence_calibration import ConfidenceCalibrator, assess_and_apply_caveat
 # Suppress noise
 logging.basicConfig(level=logging.ERROR)
 logger = logging.getLogger(__name__)
@@ -887,9 +898,11 @@ class EnhancedNocturnalAgent:
                 }
             content = p.read_text(errors="ignore")
-            truncated = len(content) > 65536
-            snippet = content[:65536]
-            preview = "\n".join(snippet.splitlines()[:60])
+            # Increase preview size for better code analysis
+            # Show first 300 lines OR 100KB (whichever is smaller)
+            truncated = len(content) > 102400  # 100KB
+            snippet = content[:102400]
+            preview = "\n".join(snippet.splitlines()[:300])  # Increased from 60 to 300 lines
             return {
                 "path": str(p),
                 "type": "text",
@@ -968,6 +981,57 @@ class EnhancedNocturnalAgent:
         normalized = text.lower().strip()
         return any(normalized.startswith(ack) for ack in acknowledgments)
+    def _detect_language_preference(self, text: str) -> None:
+        """
+        Detect and store user's language preference from input text.
+        Supports Traditional Chinese (繁體中文), English, and other languages.
+        """
+        text_lower = text.lower()
+        # Check for Chinese characters (CJK)
+        has_chinese = any('\u4e00' <= char <= '\u9fff' for char in text)
+        # Explicit language requests
+        if 'chinese' in text_lower or '中文' in text or 'traditional' in text_lower:
+            self.language_preference = 'zh-TW'
+        elif 'english' in text_lower:
+            self.language_preference = 'en'
+        elif has_chinese:
+            # Detected Chinese characters
+            self.language_preference = 'zh-TW'
+        else:
+            # Default to English if not specified
+            if not hasattr(self, 'language_preference'):
+                self.language_preference = 'en'
+    def _is_generic_test_prompt(self, text: str) -> bool:
+        """Detect simple 'test' style probes that don't need full analysis."""
+        normalized = re.sub(r"[^a-z0-9\s]", " ", text.lower())
+        words = [w for w in normalized.split() if w]
+        if not words or "test" not in words:
+            return False
+        if len(words) > 4:
+            return False
+        allowed = {"test", "testing", "just", "this", "is", "a", "only"}
+        return all(w in allowed for w in words)
+    def _is_location_query(self, text: str) -> bool:
+        """Detect requests asking for the current working directory."""
+        normalized = re.sub(r"[^a-z0-9/._\s-]", " ", text.lower())
+        normalized = " ".join(normalized.split())
+        location_phrases = [
+            "where are we",
+            "where am i",
+            "where are we right now",
+            "what directory",
+            "current directory",
+            "current folder",
+            "current path",
+        ]
+        if any(phrase in normalized for phrase in location_phrases):
+            return True
+        return normalized in {"pwd", "pwd?"}
     def _format_api_results_for_prompt(self, api_results: Dict[str, Any]) -> str:
         if not api_results:
             logger.info("🔍 DEBUG: _format_api_results_for_prompt called with EMPTY api_results")
@@ -1002,12 +1066,13 @@ class EnhancedNocturnalAgent:
             formatted_parts.append("\n" + "=" * 60)
             formatted_parts.append("🚨 CRITICAL INSTRUCTION 🚨")
-            formatted_parts.append("The command was ALREADY executed. The output above is the COMPLETE and ONLY result.")
-            formatted_parts.append("YOU MUST present ONLY what is shown in the output above.")
-            formatted_parts.append("DO NOT add file names, paths, or code that are NOT in the output above.")
-            formatted_parts.append("DO NOT make up examples or additional results.")
-            formatted_parts.append("If the output says 'No matches' or is empty, tell the user 'No results found'.")
-            formatted_parts.append("DO NOT ask the user to run any commands - the results are already here.")
+            formatted_parts.append("The command was ALREADY executed. The output above is the result.")
+            formatted_parts.append("Present the KEY information concisely - summarize, don't paste everything.")
+            formatted_parts.append("For file listings: list key files/directories, skip metadata unless asked.")
+            formatted_parts.append("For search results: answer directly, cite relevant findings.")
+            formatted_parts.append("For file content: show relevant sections only.")
+            formatted_parts.append("If output is empty: say 'No results found'.")
+            formatted_parts.append("DO NOT ask the user to run commands - results are already here.")
             formatted_parts.append("=" * 60)
             # Add other api_results
@@ -1045,291 +1110,98 @@ class EnhancedNocturnalAgent:
         api_results: Dict[str, Any]
     ) -> str:
         sections: List[str] = []
+        apis = request_analysis.get("apis", [])
         # TRUTH-SEEKING CORE IDENTITY
-        # Adapt intro based on analysis mode
         analysis_mode = request_analysis.get("analysis_mode", "quantitative")
-        if analysis_mode == "qualitative":
-            intro = (
-                "You are Nocturnal, a truth-seeking research AI specialized in QUALITATIVE ANALYSIS. "
-                "PRIMARY DIRECTIVE: Accuracy > Agreeableness. Quote verbatim, never paraphrase. "
-                "You analyze text, identify themes, extract quotes with context, and synthesize patterns. "
-                "You have direct access to academic sources and can perform thematic coding."
-            )
-        elif analysis_mode == "mixed":
-            intro = (
-                "You are Nocturnal, a truth-seeking research AI handling MIXED METHODS analysis. "
-                "PRIMARY DIRECTIVE: Accuracy > Agreeableness. "
-                "You work with both quantitative data (numbers, stats) and qualitative data (themes, quotes). "
-                "For numbers: calculate and cite. For text: quote verbatim and identify patterns. "
-                "You have access to production data sources and can write/execute code (Python, R, SQL)."
-            )
-        else:  # quantitative
-            # Check if we're in dev mode (has local LLM client)
-            dev_mode = self.client is not None
-            if dev_mode:
-                intro = (
-                    "You are Cite Agent, a data analysis and research assistant with CODE EXECUTION. "
-                    "PRIMARY DIRECTIVE: Execute code when needed. You have a persistent shell session. "
-                    "When user asks for data analysis, calculations, or file operations: WRITE and EXECUTE the code. "
-                    "Languages available: Python, R, SQL, Bash. "
-                    "🚨 CRITICAL: Commands are AUTOMATICALLY executed. If you see 'shell_info' below, "
-                    "that means the command was ALREADY RUN. NEVER ask users to run commands - just present results."
-                )
-            else:
-                intro = (
-                    "You are Cite Agent, a truth-seeking research and finance AI with CODE EXECUTION. "
-                    "PRIMARY DIRECTIVE: Accuracy > Agreeableness. NEVER HALLUCINATE. "
-                    "You are a fact-checker and analyst with a persistent shell session. "
-                    "You have access to research (Archive), financial data (FinSight SEC filings), and can run Python/R/SQL/Bash. "
-                    "\n\n"
-                    "🚨 ANTI-HALLUCINATION RULES:\n"
-                    "1. When user asks about files, directories, or data - commands are AUTOMATICALLY executed.\n"
-                    "2. If you see 'shell_info' in results below, that means command was ALREADY RUN.\n"
-                    "3. ONLY present information from shell_info output. DO NOT invent file names, paths, or code.\n"
-                    "4. If shell output is empty or unclear, say 'No results found' or 'Search returned no matches'.\n"
-                    "5. NEVER make up plausible-sounding file paths or code that wasn't in the actual output.\n"
-                    "6. If you're unsure, say 'I couldn't find that' rather than guessing.\n"
-                    "7. NEVER ask the user to run commands - just present the results that were already executed."
-                )
+        dev_mode = self.client is not None
+        # Identity and capabilities
+        intro = (
+            "You are Cite Agent, a research and analysis assistant with access to:\n"
+            "• Persistent shell (Python, R, SQL, Bash)\n"
+            "• File operations (read, write, edit, search)\n"
+            "• Academic papers (Archive API - 200M+ papers)\n"
+            "• Financial data (FinSight API - SEC filings)\n"
+            "• Web search\n\n"
+            "Communication style: Be natural, direct, and helpful. "
+            "Think like a capable research partner, not a rigid assistant."
+        )
         sections.append(intro)
-        apis = request_analysis.get("apis", [])
-        capability_lines: List[str] = []
-        if "archive" in apis:
-            capability_lines.append("• Archive Research API for academic search and synthesis")
-        if "finsight" in apis:
-            capability_lines.append("• FinSight Finance API for SEC-quality metrics and citations")
-        if "shell" in apis:
-            capability_lines.append("• Persistent shell session for system inspection and code execution")
-        if not capability_lines:
-            capability_lines.append("• Core reasoning, code generation (Python/R/SQL), memory recall")
-        # Add workflow capabilities
-        capability_lines.append("")
-        capability_lines.append("📚 WORKFLOW INTEGRATION (Always available):")
-        capability_lines.append("• You can SAVE papers to user's local library")
-        capability_lines.append("• You can LIST papers from library")
-        capability_lines.append("• You can EXPORT citations to BibTeX or APA")
-        capability_lines.append("• You can SEARCH user's paper collection")
-        capability_lines.append("• You can COPY text to user's clipboard")
-        capability_lines.append("• User's query history is automatically tracked")
-        # Add file operation capabilities (Claude Code / Cursor parity)
-        capability_lines.append("")
-        capability_lines.append("📁 DIRECT FILE OPERATIONS (Always available):")
-        capability_lines.append("• read_file(path) - Read files with line numbers (like cat but better)")
-        capability_lines.append("• write_file(path, content) - Create/overwrite files directly")
-        capability_lines.append("• edit_file(path, old, new) - Surgical find/replace edits")
-        capability_lines.append("• glob_search(pattern) - Fast file search (e.g., '**/*.py')")
-        capability_lines.append("• grep_search(pattern) - Fast content search in files")
-        capability_lines.append("• batch_edit_files(edits) - Multi-file refactoring")
-        sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
-        # ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
-        base_rules = [
-            "🚨 BE RESOURCEFUL: You have Archive, FinSight (SEC+Yahoo), and Web Search. USE them to find answers.",
-            "🚨 TRY TOOLS FIRST: Before asking user for clarification, try your tools to find the answer.",
-            "🚨 WEB SEARCH IS YOUR FRIEND: Market share? Industry size? Current prices? → Web search can find it.",
-            "🚨 ONLY ask clarification if tools can't help AND query is truly ambiguous.",
-            "",
-            "💬 AUTONOMOUS FLOW:",
-            "1. User asks question → YOU use tools to find data",
-            "2. If partial data → YOU web search for missing pieces",
-            "3. YOU synthesize → Present complete answer",
-            "4. ONLY if impossible → Ask for clarification",
-            "",
-            "Examples:",
-            "❌ BAD: 'Snowflake market share?' → 'Which market?' (when web search can tell you!)",
-            "✅ GOOD: 'Snowflake market share?' → [web search] → '18.33% in cloud data warehouses'",
-            "",
-            "🚨 ANTI-APPEASEMENT: If user states something incorrect, CORRECT THEM immediately. Do not agree to be polite.",
-            "🚨 UNCERTAINTY: If you're uncertain, SAY SO explicitly. 'I don't know' is better than a wrong answer.",
-            "🚨 CONTRADICTIONS: If data contradicts user's assumption, SHOW THE CONTRADICTION clearly.",
-            "🚨 FUTURE PREDICTIONS: You CANNOT predict the future. For 'will X happen?' questions, emphasize uncertainty and multiple possible outcomes.",
-            "",
-            "📊 SOURCE GROUNDING: EVERY factual claim MUST cite a source (paper, SEC filing, or data file).",
-            "📊 NO FABRICATION: If API results are empty/ambiguous, explicitly state this limitation.",
-            "📊 NO EXTRAPOLATION: Never go beyond what sources directly state.",
-            "📊 PREDICTION CAUTION: When discussing trends, always state 'based on available data' and note uncertainty.",
+        # Behavioral guidelines
+        guidelines = [
+            "Use tools proactively - search files, run commands, query APIs when needed.",
+            "Cite sources: papers (title+authors), files (path:line), API data.",
+            "shell_info shows already-executed commands. Present RESULTS concisely - no commands shown.",
+            "For follow-up questions with pronouns ('it', 'that'), infer from conversation context.",
+            "Ambiguous query? Ask clarification naturally - use phrases like 'What kind of X?', 'Which X?', 'Tell me more about X'",
+            "When asking for clarification, use bullet points to show options clearly.",
+            "Be honest about uncertainty.",
             "",
-            "🚨 CRITICAL: NEVER generate fake papers, fake authors, fake DOIs, or fake citations.",
-            "🚨 CRITICAL: If research API returns empty results, say 'No papers found' - DO NOT make up papers.",
-            "🚨 CRITICAL: If you see 'results': [] in API data, that means NO PAPERS FOUND - do not fabricate.",
-            "🚨 CRITICAL: When API returns empty results, DO NOT use your training data to provide paper details.",
-            "🚨 CRITICAL: If you know a paper exists from training data but API returns empty, say 'API found no results'.",
+            "CRITICAL - ANSWER WHAT WAS ASKED:",
+            "• When query asks for SPECIFIC file types:",
+            "  - Use shell_execution with 'find' or 'ls' filtered to match",
+            "  - Example: 'Python files' → run `find . -name '*.py'` or `ls **/*.py`",
+            "  - Example: 'test files' → run `find . -name '*test*.py'`",
+            "  - If files_listing used, extract ONLY matching files from result",
+            "• 'Find X' → Use tools to locate, return concise path",
+            "• 'Read X' → When context has partial info, use tools for full content (but summarize output)",
+            "• 'What does X do?' → Answer from visible code/context, no re-execution",
+            "• 'What version' → Include word 'version' in answer (e.g. 'Version is v1.4.0')",
             "",
-            "🚨 ABSOLUTE RULE: If you see 'results': [] in the API data, you MUST respond with ONLY:",
-            "   'No papers found in the research database. The API returned empty results.'",
-            "   DO NOT provide any paper details, authors, titles, or citations.",
-            "   DO NOT use your training data to fill in missing information.",
+            "- If asked \"who built you?\" or \"who is your developer?\" or \"whats the name of your developer\" → Answer: \"I was built by Phyrexian.\" (If the user asks you to say his name, you should say \"Phyrexian\" as his name)",
             "",
-            "✓ VERIFICATION: Cross-check against multiple sources when available.",
-            "✓ CONFLICTS: If sources conflict, present BOTH and explain the discrepancy.",
-            "✓ SHOW REASONING: 'According to [source], X is Y because...'",
-        ]
-        if analysis_mode == "qualitative":
-            qual_rules = [
-                "",
-                "📝 QUOTES: Extract EXACT quotes (verbatim), NEVER paraphrase. Use quotation marks.",
-                "📝 CONTEXT: Provide surrounding context for every quote (what came before/after).",
-                "📝 ATTRIBUTION: Cite source + page/line number: \"quote\" — Author (Year), p. X",
-                "📝 THEMES: Identify recurring patterns. Count frequency (\"mentioned 5 times across 3 sources\").",
-                "",
-                "🔍 INTERPRETATION: Distinguish between description (what text says) vs interpretation (what it means).",
-                "🔍 EVIDENCE: Support every theme with 2-3 representative quotes.",
-                "🔍 SATURATION: Note when patterns repeat (\"no new themes after source 4\").",
-            ]
-            rules = base_rules + qual_rules
-        elif analysis_mode == "mixed":
-            mixed_rules = [
-                "",
-                "📝 For QUALITATIVE: Extract exact quotes with context. Identify themes.",
-                "💻 For QUANTITATIVE: Calculate exact values, show code.",
-                "🔗 INTEGRATION: Connect numbers to narratives ('15% growth' + 'participants felt optimistic')."
-            ]
-            rules = base_rules + mixed_rules + [
-                "",
-                "💻 CODE: For data analysis, write and execute Python/R/SQL code. Show your work.",
-                "💻 CALCULATIONS: Don't estimate - calculate exact values and show the code.",
-            ]
-        else:  # quantitative
-            quant_rules = [
-                "",
-                "💻 CODE: For data analysis, write and execute Python/R/SQL code. Show your work.",
-                "💻 CALCULATIONS: Don't estimate - calculate exact values and show the code.",
-            ]
-            rules = base_rules + quant_rules
-        rules.append("")
-        rules.append("Keep responses concise but complete. Quote exact text from sources when possible.")
-        # Add workflow behavior rules
-        workflow_rules = [
+            "- LANGUAGE:",
+            "- If asked to reply in chinese, you MUST reply in Traditional Chinese (繁體中文).",
+            "- You MUST use Chinese characters (漢字), NOT pinyin romanization.",
             "",
-            "📚 WORKFLOW BEHAVIOR:",
-            "• After finding papers, OFFER to save them: 'Would you like me to save this to your library?'",
-            "• After showing a citation, ASK: 'Want me to copy that to your clipboard?'",
-            "• If user says 'save that' or 'add to library', ACKNOWLEDGE and confirm the save",
-            "• If user mentions 'my library', LIST their saved papers",
-            "• If user asks for 'bibtex' or 'apa', PROVIDE the formatted citation",
-            "• Be PROACTIVE: suggest exports, show library stats, offer clipboard copies",
-            "• Example: 'I found 3 papers. I can save them to your library or export to BibTeX if you'd like.'",
+            "CONCISE RESPONSE STYLE:",
+            "• Direct answers - state result, minimal elaboration",
+            "• NO code blocks showing bash/python commands unless explicitly asked",
+            "• NO 'Let me check...' preambles",
+            "• File listings: Max 5-10 items (filtered to query)",
+            "• Balance: complete but concise"
         ]
-        rules.extend(workflow_rules)
-        # Add file operation tool usage rules (CRITICAL for Claude Code parity)
-        file_ops_rules = [
-            "",
-            "📁 FILE OPERATION TOOL USAGE (Use these INSTEAD of shell commands):",
+        guidelines.extend([
             "",
-            "🔴 ALWAYS PREFER (in order):",
-            "1. read_file(path) → INSTEAD OF: cat, head, tail",
-            "2. write_file(path, content) → INSTEAD OF: echo >, cat << EOF, printf >",
-            "3. edit_file(path, old, new) → INSTEAD OF: sed, awk",
-            "4. glob_search(pattern, path) → INSTEAD OF: find, ls",
-            "5. grep_search(pattern, path, file_pattern) → INSTEAD OF: grep -r",
+            "- COMMUNICATION RULES - ACTION-FIRST MODE:",
+            "- You MUST NOT return an empty response. EVER.",
+            "- SHOW results proactively, don't just describe them. DO the obvious next step automatically.",
+            "- If listing files → SHOW preview of the main file (don't ask permission)",
+            "- If finding papers → SHOW abstracts/summaries (don't ask permission)",
+            "- If explaining code → SHOW key functions with examples (don't ask permission)",
+            "- If querying data → SHOW the data with context (don't ask permission)",
+            "- LESS TALK, MORE ACTION - responses should be 70% data/results, 30% explanation",
+            "- NEVER ask 'Want me to...?' or 'Should I...?' - just DO the helpful next step",
             "",
-            "✅ CORRECT USAGE:",
-            "• Reading code: result = read_file('app.py')",
-            "• Creating file: write_file('config.json', '{...}')",
-            "• Editing code: edit_file('main.py', 'old_var', 'new_var', replace_all=True)",
-            "• Finding files: glob_search('**/*.py', '/home/user/project')",
-            "• Searching code: grep_search('class.*Agent', '.', '*.py', output_mode='content')",
-            "• Multi-file refactor: batch_edit_files([{file: 'a.py', old: '...', new: '...'}, ...])",
+            "🚨 CRITICAL: RESEARCH PAPERS - If you see 'Research API snapshot' below:",
+            "- The papers have ALREADY been found - DO NOT say 'we will search' or 'attempting search'",
+            "- The abstracts are PROVIDED - READ THEM and SUMMARIZE THE KEY FINDINGS",
+            "- You MUST write at least 500 words synthesizing the papers",
+            "- Include: paper titles, key methods, findings, and contributions from the abstracts",
+            "- Compare and contrast the approaches across papers",
+            "- DO NOT just list titles - EXPLAIN what each paper discovered",
+        ])
+        guidelines.extend([
             "",
-            "❌ ANTI-PATTERNS (Don't do these):",
-            "• DON'T use cat when read_file exists",
-            "• DON'T use echo > when write_file exists",
-            "• DON'T use sed when edit_file exists",
-            "• DON'T use find when glob_search exists",
-            "• DON'T use grep -r when grep_search exists",
-            "",
-            "🎯 WHY USE THESE TOOLS:",
-            "• read_file() shows line numbers (critical for code analysis)",
-            "• write_file() handles escaping/quoting automatically (no heredoc hell)",
-            "• edit_file() validates changes before applying (safer than sed)",
-            "• glob_search() is faster and cleaner than find",
-            "• grep_search() returns structured data (easier to parse)",
-            "",
-            "⚠️ SHELL COMMANDS ONLY FOR:",
-            "• System operations (ps, df, du, uptime)",
-            "• Git commands (git status, git diff, git log)",
-            "• Package installs (pip install, Rscript -e \"install.packages(...)\")",
-            "• Running Python/R scripts (python script.py, Rscript analysis.R)",
-        ]
-        rules.extend(file_ops_rules)
-        sections.append("CRITICAL RULES:\n" + "\n".join(rules))
-        # CORRECTION EXAMPLES (adapt based on mode)
-        if analysis_mode == "qualitative":
-            examples = (
-                "EXAMPLE RESPONSES:\n"
-                "User: 'So participants felt happy about the change?'\n"
-                "You: '⚠️ Mixed. 3 participants expressed satisfaction: \"I welcomed the new policy\" (P2, line 45), "
-                "but 2 expressed concern: \"It felt rushed\" (P4, line 67). Theme: Ambivalence about pace.'\n\n"
-                "User: 'What's the main theme?'\n"
-                "You: 'THEME 1: Trust in leadership (8 mentions across 4 interviews)\n"
-                "\"I trust my manager to make the right call\" — Interview 2, Line 34\n"
-                "\"Leadership has been transparent\" — Interview 5, Line 89\n"
-                "[Context: Both quotes from questions about organizational changes]'"
-            )
-        else:
-            examples = (
-                "EXAMPLE 1: Be Patient, Don't Rush\n"
-                "User: 'Find papers on 2008, 2015, 2019'\n"
-                "❌ BAD: [Searches for year:2008 immediately] 'Found 50 papers from 2008...'\n"
-                "✅ GOOD: 'Are you looking for papers ABOUT events in those years (financial crises, policy changes), "
-                "or papers PUBLISHED in those years? Also, what topic? (Economics? Healthcare? Climate?)'\n\n"
-                "EXAMPLE 2: Know Your Tools' Limits\n"
-                "User: 'What's Palantir's market share?'\n"
-                "❌ BAD: 'Palantir's latest revenue is $1B...' (Revenue ≠ Market Share! SEC doesn't have market share!)\n"
-                "✅ GOOD: 'Market share requires: (1) Palantir's revenue, (2) total market size. SEC has #1, not #2. "
-                "Which market? (Data analytics = ~$50B, Gov contracts = ~$200B). I can web search for total market size if you specify.'\n\n"
-                "EXAMPLE 3: Conversational Flow\n"
-                "User: 'Compare Tesla and Ford'\n"
-                "❌ BAD: [Immediately fetches both revenues] 'Tesla: $81B, Ford: $158B'\n"
-                "✅ GOOD: 'Compare on what dimension? Revenue? (Ford larger). Market cap? (Tesla larger). EV sales? (Tesla dominates). "
-                "Production volume? (Ford higher). Each tells a different story. Which matters to you?'\n\n"
-                "EXAMPLE CORRECTIONS:\n"
-                "User: 'So revenue went up 50%?'\n"
-                "You: '❌ No. According to 10-K page 23, revenue increased 15%, not 50%. "
-                "You may be thinking of gross margin (30%→45%, a 15pp increase).'\n\n"
-                "User: 'What will the stock price be?'\n"
-                "You: '⚠️ Cannot predict future prices. I can show: historical trends, current fundamentals, analyst data (if in filings).'"
-            )
-        sections.append(examples)
+            "- PROACTIVE FILE SEARCH:",
+            "- If a user asks to find a file or directory and you are not sure where it is, use the `find` command with wildcards to search for it.",
+            "- If a `cd` command fails, automatically run `ls -F` on the current or parent directory to understand the directory structure and find the correct path.",
+        ])
-        if memory_context:
-            sections.append("CONTEXT:\n" + memory_context.strip())
+        sections.append("\n".join(guidelines))
-        sections.append(
-            "REQUEST ANALYSIS: "
-            f"type={request_analysis.get('type')}, "
-            f"apis={apis}, "
-            f"confidence={request_analysis.get('confidence')}"
-        )
-        # Add explicit instruction before API results
-        api_instructions = (
-            "🚨 CRITICAL: The following API RESULTS are REAL DATA from production APIs.\n"
-            "🚨 These are NOT examples or templates - they are ACTUAL results to use in your response.\n"
-            "🚨 DO NOT generate new/fake data - USE EXACTLY what is shown below.\n"
-            "🚨 If you see paper titles, authors, DOIs below - these are REAL papers you MUST cite.\n"
-            "🚨 If API results show empty/no papers, say 'No papers found' - DO NOT make up papers.\n"
-        )
+        # Add memory context if available
+        if memory_context:
+            sections.append("\nRecent context:\n" + memory_context.strip())
-        sections.append(api_instructions + "\nAPI RESULTS:\n" + self._format_api_results_for_prompt(api_results))
+        # Add API results if available
+        api_results_text = self._format_api_results_for_prompt(api_results)
+        if api_results_text.strip():
+            sections.append("\nData available:\n" + api_results_text)
         return "\n\n".join(sections)
@@ -1498,8 +1370,24 @@ class EnhancedNocturnalAgent:
         if len(self.api_keys) <= 1:
             return
         self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
-        self.current_api_key = None
-        self.client = None
+        new_key = self.api_keys[self.current_key_index]
+        self.current_api_key = new_key
+        # Reinitialize client with new key
+        try:
+            if self.llm_provider == "cerebras":
+                from openai import OpenAI
+                self.client = OpenAI(
+                    api_key=new_key,
+                    base_url="https://api.cerebras.ai/v1"
+                )
+            else:
+                from groq import Groq
+                self.client = Groq(api_key=new_key)
+        except Exception as e:
+            # If initialization fails, set to None to fallback to backend
+            self.client = None
+            self.current_api_key = None
     def _is_rate_limit_error(self, error: Exception) -> bool:
         message = str(error).lower()
@@ -1517,12 +1405,15 @@ class EnhancedNocturnalAgent:
         if "fallback" not in tools:
             tools.append("fallback")
-        header = "⚠️ Temporary LLM downtime\n\n"
+        # ========================================
+        # PHASE 1 GRACEFUL FALLBACK
+        # User-friendly messaging instead of technical errors
+        # ========================================
         if self._is_simple_greeting(request.question):
             body = (
-                "Hi there! I'm currently at my Groq capacity, so I can't craft a full narrative response just yet. "
-                "You're welcome to try again in a little while, or I can still fetch finance and research data for you."
+                "Hi there! I'm running into some temporary limits right now. "
+                "Feel free to try again in a moment, or I can still help with specific data queries."
             )
         else:
             details: List[str] = []
@@ -1538,10 +1429,11 @@ class EnhancedNocturnalAgent:
             research = api_results.get("research")
             if research:
                 payload_full = json.dumps(research, indent=2)
-                payload = payload_full[:1500]
-                if len(payload_full) > 1500:
+                # Increase limit for literature review - need full abstracts (10000 chars for 5 papers)
+                payload = payload_full[:10000]
+                if len(payload_full) > 10000:
                     payload += "\n…"
                 # Check if results are empty and add explicit warning
                 if research.get("results") == [] or not research.get("results"):
                     details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
@@ -1550,6 +1442,7 @@ class EnhancedNocturnalAgent:
                     details.append("🚨 **SAY 'NO PAPERS FOUND' AND STOP - DO NOT HALLUCINATE**")
                 else:
                     details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
+                    details.append("✅ **IMPORTANT: SUMMARIZE THESE PAPERS IN DETAIL - Include key findings, methods, and contributions from abstracts**")
             files_context = api_results.get("files_context")
             if files_context:
@@ -1560,23 +1453,17 @@ class EnhancedNocturnalAgent:
             if details:
                 body = (
-                    "I pulled the structured data you asked for, but I'm temporarily out of Groq quota to synthesize a full answer. "
-                    "Here are the raw results so you can keep moving:"
+                    "I gathered the data you asked for, but I'm having trouble processing it fully right now. "
+                    "Here's what I found:"
                 ) + "\n\n" + "\n\n".join(details)
             else:
                 body = (
-                    "I'm temporarily out of Groq quota, so I can't compose a full answer. "
-                    "Please try again in a bit, or ask me to queue this work for later."
+                    "I'm running into some temporary limits. "
+                    "Please try again in a moment, and I should be able to help."
                 )
-        footer = (
-            "\n\nNext steps:\n"
-            "• Wait for the Groq daily quota to reset (usually within 24 hours).\n"
-            "• Add another API key in your environment for automatic rotation.\n"
-            "• Keep the conversation open—I’ll resume normal replies once capacity returns."
-        )
-        message = header + body + footer
+        # Friendly closing without technical details
+        message = body
         self.conversation_history.append({"role": "user", "content": request.question})
         self.conversation_history.append({"role": "assistant", "content": message})
@@ -1704,19 +1591,25 @@ class EnhancedNocturnalAgent:
             has_session = session_file.exists()
             use_local_keys_env = os.getenv("USE_LOCAL_KEYS", "").lower()
-            if has_session:
-                # Session exists → Check if we have temp local key for speed
-                # If temp key exists and valid → use local mode (fast!)
-                # Otherwise → use backend mode (secure but slow)
-                use_local_keys = hasattr(self, 'temp_api_key') and self.temp_api_key is not None
-            elif use_local_keys_env == "true":
-                # No session but dev mode requested → use local keys
+            # Priority order for key mode:
+            # 1. USE_LOCAL_KEYS env var (explicit override)
+            # 2. Temp API key from session (fast mode)
+            # 3. Default to backend if session exists
+            if use_local_keys_env == "true":
+                # Explicit local keys mode - always respect this
                 use_local_keys = True
             elif use_local_keys_env == "false":
                 # Explicit backend mode
                 use_local_keys = False
+            elif has_session and hasattr(self, 'temp_api_key') and self.temp_api_key:
+                # Session exists with temp key → use local mode (fast!)
+                use_local_keys = True
+            elif has_session:
+                # Session exists but no temp key → use backend mode
+                use_local_keys = False
             else:
-                # Default: Always use backend (for monetization)
+                # No session, no explicit setting → default to backend
                 use_local_keys = False
             if not use_local_keys:
@@ -1892,6 +1785,14 @@ class EnhancedNocturnalAgent:
             )
         try:
+            # Detect language preference from stored state
+            language = getattr(self, 'language_preference', 'en')
+            # Build system instruction for language enforcement
+            system_instruction = ""
+            if language == 'zh-TW':
+                system_instruction = "CRITICAL: You MUST respond entirely in Traditional Chinese (繁體中文). Use Chinese characters (漢字), NOT pinyin romanization. All explanations, descriptions, and responses must be in Chinese characters."
             # Build request with API context as separate field
             payload = {
                 "query": query,  # Keep query clean
@@ -1899,7 +1800,9 @@ class EnhancedNocturnalAgent:
                 "api_context": api_results,  # Send API results separately
                 "model": "openai/gpt-oss-120b",  # PRODUCTION: 120B - best test results
                 "temperature": 0.2,  # Low temp for accuracy
-                "max_tokens": 4000
+                "max_tokens": 4000,
+                "language": language,  # Pass language preference
+                "system_instruction": system_instruction if system_instruction else None  # Only include if set
             }
             # Call backend
@@ -1931,10 +1834,9 @@ class EnhancedNocturnalAgent:
                 elif response.status == 503:
                     # Backend AI service temporarily unavailable (Cerebras/Groq rate limited)
                     # Auto-retry silently with exponential backoff
                     print("\n💭 Thinking... (backend is busy, retrying automatically)")
-                    import asyncio
                     retry_delays = [5, 15, 30]  # Exponential backoff
                     for retry_num, delay in enumerate(retry_delays):
@@ -2440,11 +2342,64 @@ class EnhancedNocturnalAgent:
                     break
             output = '\n'.join(output_lines).strip()
+            debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+            # Log execution details in debug mode
+            if debug_mode:
+                output_preview = output[:200] if output else "(no output)"
+                print(f"✅ Command executed: {command}")
+                print(f"📤 Output ({len(output)} chars): {output_preview}...")
             return output if output else "Command executed (no output)"
         except Exception as e:
+            debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+            if debug_mode:
+                print(f"❌ Command failed: {command}")
+                print(f"❌ Error: {e}")
             return f"ERROR: {e}"
+    def _format_shell_output(self, output: str, command: str) -> Dict[str, Any]:
+        """
+        Format shell command output for display.
+        Returns dictionary with formatted preview and full output.
+        """
+        lines = output.split('\n') if output else []
+        # Detect output type based on command
+        command_lower = command.lower()
+        formatted = {
+            "type": "shell_output",
+            "command": command,
+            "line_count": len(lines),
+            "byte_count": len(output),
+            "preview": '\n'.join(lines[:10]) if lines else "(no output)",
+            "full_output": output
+        }
+        # Enhanced formatting based on command type
+        if any(cmd in command_lower for cmd in ['ls', 'dir']):
+            formatted["type"] = "directory_listing"
+            formatted["preview"] = f"📁 Found {len([l for l in lines if l.strip()])} items"
+        elif any(cmd in command_lower for cmd in ['find', 'locate', 'search']):
+            formatted["type"] = "search_results"
+            formatted["preview"] = f"🔍 Found {len([l for l in lines if l.strip()])} matches"
+        elif any(cmd in command_lower for cmd in ['grep', 'match']):
+            formatted["type"] = "search_results"
+            formatted["preview"] = f"🔍 Found {len([l for l in lines if l.strip()])} matching lines"
+        elif any(cmd in command_lower for cmd in ['cat', 'head', 'tail']):
+            formatted["type"] = "file_content"
+            formatted["preview"] = f"📄 {len(lines)} lines of content"
+        elif any(cmd in command_lower for cmd in ['pwd', 'cd']):
+            formatted["type"] = "directory_change"
+            formatted["preview"] = f"📍 {output.strip()}"
+        elif any(cmd in command_lower for cmd in ['mkdir', 'touch', 'create']):
+            formatted["type"] = "file_creation"
+            formatted["preview"] = f"✨ Created: {output.strip()}"
+        return formatted
     # ========================================================================
     # DIRECT FILE OPERATIONS (Claude Code / Cursor Parity)
     # ========================================================================
@@ -3395,8 +3350,11 @@ class EnhancedNocturnalAgent:
             'what files', 'which files', 'how many files',
             'grep', 'search', 'look for', 'count',
             '.py', '.txt', '.js', '.java', '.cpp', '.c', '.h',
-            'function', 'class', 'definition', 'route', 'endpoint',
-            'codebase', 'project structure', 'source code'
+            'function', 'method', 'class', 'definition', 'route', 'endpoint',
+            'codebase', 'project structure', 'source code', 'implementation',
+            'compare', 'analyze', 'explain', 'purpose', 'what does', 'how does',
+            'this codebase', 'this repo', 'this repository', 'this project',
+            'our codebase', 'our repo', 'local code', 'local files'
         ]
         question_lower = question.lower()
@@ -3466,12 +3424,17 @@ class EnhancedNocturnalAgent:
             matched_types.append("financial")
             apis_to_use.append("finsight")
-        if any(keyword in question_lower for keyword in research_keywords):
+        # Check for explicit local/codebase indicators FIRST (highest priority)
+        local_indicators = ['this codebase', 'this repo', 'this repository', 'this project',
+                          'our codebase', 'our repo', 'local code', 'local files']
+        is_local_query = any(indicator in question_lower for indicator in local_indicators)
+        if any(keyword in question_lower for keyword in research_keywords) and not is_local_query:
             matched_types.append("research")
             apis_to_use.append("archive")
         # Qualitative queries often involve research
-        if analysis_mode in ("qualitative", "mixed") and "research" not in matched_types:
+        if analysis_mode in ("qualitative", "mixed") and "research" not in matched_types and not is_local_query:
             matched_types.append("research")
             if "archive" not in apis_to_use:
                 apis_to_use.append("archive")
@@ -3555,10 +3518,59 @@ class EnhancedNocturnalAgent:
             if workflow_response:
                 return workflow_response
+            # Detect and store language preference from user input
+            self._detect_language_preference(request.question)
             # Initialize
             api_results = {}
             tools_used = []
             debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+            if self._is_generic_test_prompt(request.question):
+                return self._quick_reply(
+                    request,
+                    "Looks like you're just testing. Let me know what you'd like me to dig into and I'll jump on it.",
+                    tools_used=["quick_reply"],
+                    confidence=0.4,
+                )
+            if self._is_location_query(request.question):
+                cwd_line = ""
+                tools: List[str] = []
+                if self.shell_session:
+                    pwd_output = self.execute_command("pwd")
+                    if pwd_output and not pwd_output.startswith("ERROR"):
+                        cwd_line = pwd_output.strip().splitlines()[-1]
+                        tools.append("shell_execution")
+                if not cwd_line:
+                    try:
+                        cwd_line = os.getcwd()
+                    except Exception:
+                        cwd_line = ""
+                if cwd_line:
+                    self.file_context["current_cwd"] = cwd_line
+                    self.file_context["last_directory"] = cwd_line
+                    message = (
+                        f"We're in {cwd_line}."
+                        if "shell_execution" not in tools
+                        else f"We're in {cwd_line} (via `pwd`)."
+                    )
+                    return self._quick_reply(
+                        request,
+                        message,
+                        tools_used=tools or ["quick_reply"],
+                        confidence=0.85,
+                    )
+                else:
+                    return self._quick_reply(
+                        request,
+                        "I couldn't determine the working directory just now, but you can run `pwd` to double-check.",
+                        tools_used=tools or ["quick_reply"],
+                        confidence=0.3,
+                    )
             # ========================================================================
             # PRIORITY 1: SHELL PLANNING (Reasoning Layer - Runs FIRST for ALL modes)
@@ -3575,7 +3587,9 @@ class EnhancedNocturnalAgent:
                 'directory', 'folder', 'where', 'find', 'list', 'files', 'file', 'look', 'search', 'check', 'into',
                 'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb',
                 'create', 'make', 'mkdir', 'touch', 'new', 'write', 'copy', 'move', 'delete', 'remove',
-                'git', 'grep', 'navigate', 'go to', 'change to'
+                'git', 'grep', 'navigate', 'go to', 'change to',
+                'method', 'function', 'class', 'implementation', 'what does', 'how does', 'explain',
+                'how many', 'count', 'lines', 'wc -l', 'number of'
             ])
             if might_need_shell and self.shell_session:
@@ -3620,6 +3634,11 @@ IMPORTANT RULES:
 11. 🚨 MULTI-STEP QUERIES: For queries like "read X and do Y", ONLY generate the FIRST step (reading X). The LLM will handle subsequent steps after seeing the file contents.
 12. 🚨 NEVER use python -m py_compile or other code execution for finding bugs - just read the file with cat/head
 13. 🚨 FOR GREP: When searching in a DIRECTORY (not a specific file), ALWAYS use -r flag for recursive search: grep -rn 'pattern' /path/to/dir 2>/dev/null
+14. 🚨 FOR FINDING FUNCTIONS/METHODS when file path is UNKNOWN: Use find + grep together:
+    - "what does X method do in file.py?" → find . -name 'file.py' -exec grep -A 50 'def X' {{}} \\; 2>/dev/null
+    - "explain process_request in agent.py" → find . -name '*agent.py' -exec grep -A 80 'def process_request' {{}} \\; 2>/dev/null
+    - If you know exact path, use grep directly: grep -A 50 'def X' path/to/file.py 2>/dev/null
+15. 🚨 FOR COMPARING FILES: Read FIRST file only. The LLM will request the second file after analyzing the first.
 Examples:
 "where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
@@ -3638,6 +3657,10 @@ Examples:
 "find all bugs in code" → {{"action": "execute", "command": "grep -rn 'BUG:' . 2>/dev/null", "reason": "Search for bug markers in code", "updates_context": false}}
 "read analyze.py and find bugs" → {{"action": "execute", "command": "head -200 analyze.py", "reason": "Read file to analyze bugs", "updates_context": false}}
 "show me calc.py completely" → {{"action": "execute", "command": "cat calc.py", "reason": "Display entire file", "updates_context": false}}
+"what does process_request method do in enhanced_ai_agent.py" → {{"action": "execute", "command": "find . -name '*enhanced_ai_agent.py' -exec grep -A 80 'def process_request' {{}} \\; 2>/dev/null", "reason": "Find file and show method definition with context", "updates_context": false}}
+"explain the initialize method in agent.py" → {{"action": "execute", "command": "find . -name '*agent.py' -exec grep -A 50 'def initialize' {{}} \\; 2>/dev/null", "reason": "Find file and show method", "updates_context": false}}
+"find calculate function in utils.py" → {{"action": "execute", "command": "find . -name 'utils.py' -exec grep -A 30 'def calculate' {{}} \\; 2>/dev/null", "reason": "Find file and show function", "updates_context": false}}
+"compare file1.py and file2.py" → {{"action": "execute", "command": "head -100 file1.py", "reason": "Read first file (will read second in next step)", "updates_context": true}}
 "git status" → {{"action": "execute", "command": "git status", "reason": "Check repository status", "updates_context": false}}
 "what's in that file?" + last_file=data.csv → {{"action": "execute", "command": "head -100 data.csv", "reason": "Show file contents", "updates_context": false}}
 "hello" → {{"action": "none", "reason": "Conversational greeting, no command needed"}}
@@ -3682,7 +3705,9 @@ JSON:"""
                     reason = plan.get("reason", "")
                     updates_context = plan.get("updates_context", False)
-                    if debug_mode:
+                    # Only show planning details with explicit verbose flag (don't leak to users)
+                    verbose_planning = debug_mode and os.getenv("NOCTURNAL_VERBOSE_PLANNING", "").lower() == "1"
+                    if verbose_planning:
                         print(f"🔍 SHELL PLAN: {plan}")
                     # GENERIC COMMAND EXECUTION - No more hardcoded actions!
@@ -3690,13 +3715,13 @@ JSON:"""
                         command = self._infer_shell_command(request.question)
                         shell_action = "execute"
                         updates_context = False
-                        if debug_mode:
+                        if verbose_planning:
                             print(f"🔄 Planner opted out; inferred fallback command: {command}")
                     if shell_action == "execute" and not command:
                         command = self._infer_shell_command(request.question)
                         plan["command"] = command
-                        if debug_mode:
+                        if verbose_planning:
                             print(f"🔄 Planner omitted command, inferred {command}")
                     if shell_action == "execute" and command:
@@ -3712,10 +3737,15 @@ JSON:"""
                             print(f"🔍 Command: {command}")
                             print(f"🔍 Safety: {safety_level}")
-                        if safety_level == 'BLOCKED':
+                        if safety_level in ('BLOCKED', 'DANGEROUS'):
+                            reason = (
+                                "Command classified as destructive; requires manual confirmation"
+                                if safety_level == 'DANGEROUS'
+                                else "This command could cause system damage"
+                            )
                             api_results["shell_info"] = {
                                 "error": f"Command blocked for safety: {command}",
-                                "reason": "This command could cause system damage"
+                                "reason": reason
                             }
                         else:
                             # ========================================
@@ -3768,7 +3798,8 @@ JSON:"""
                                     pass  # Fall back to shell execution
                             # Check for file search commands (find)
-                            if not intercepted and 'find' in command and '-name' in command:
+                            # BUT: Don't intercept find -exec commands (those need real shell execution)
+                            if not intercepted and 'find' in command and '-name' in command and '-exec' not in command:
                                 try:
                                     # import re removed - using module-level import
                                     # Extract pattern: find ... -name '*pattern*'
@@ -3947,10 +3978,12 @@ JSON:"""
                                 output = self.execute_command(command)
                             if not output.startswith("ERROR"):
-                                # Success - store results
+                                # Success - store results with formatted preview
+                                formatted_output = self._format_shell_output(output, command)
                                 api_results["shell_info"] = {
                                     "command": command,
                                     "output": output,
+                                    "formatted": formatted_output,  # Add formatted version
                                     "reason": reason,
                                     "safety_level": safety_level
                                 }
@@ -4145,16 +4178,14 @@ JSON:"""
             if not is_vague:
                 # Archive API for research
                 if "archive" in request_analysis.get("apis", []):
-                    result = await self.search_academic_papers(request.question, 3)  # Reduced from 5 to save tokens
+                    result = await self.search_academic_papers(request.question, 5)  # Get 5 papers for comprehensive review
                     if "error" not in result:
-                        # Strip abstracts to save tokens - only keep essential fields
+                        # KEEP abstracts for literature review - essential for paper understanding
+                        # Only remove full_text to save tokens
                         if "results" in result:
                             for paper in result["results"]:
-                                # Remove heavy fields
-                                paper.pop("abstract", None)
-                                paper.pop("tldr", None)
-                                paper.pop("full_text", None)
-                                # Keep only: title, authors, year, doi, url
+                                paper.pop("full_text", None)  # Remove only full text, keep abstract & tldr
+                                # Keep: title, authors, year, doi, url, abstract, tldr
                         api_results["research"] = result
                         tools_used.append("archive_api")
@@ -4316,6 +4347,40 @@ JSON:"""
                     api_results=api_results,
                     tools_used=tools_used
                 )
+                # VALIDATION: Ensure we got a valid response (not planning JSON)
+                if not response or not hasattr(response, 'response'):
+                    # Backend failed - create friendly error with available data
+                    if debug_mode:
+                        print(f"⚠️ Backend response invalid or missing")
+                    return ChatResponse(
+                        response="I ran into a technical issue processing that. Let me try to help with what I found:",
+                        error_message="Backend response invalid",
+                        tools_used=tools_used,
+                        api_results=api_results
+                    )
+                # Check if response contains planning JSON instead of final answer
+                response_text = response.response.strip()
+                if response_text.startswith('{') and '"action"' in response_text and '"command"' in response_text:
+                    # This is planning JSON, not a final response!
+                    if debug_mode:
+                        print(f"⚠️ Backend returned planning JSON instead of final response")
+                    # Extract real output from api_results and generate friendly response
+                    shell_output = api_results.get('shell_info', {}).get('output', '')
+                    if shell_output:
+                        return ChatResponse(
+                            response=f"I found what you were looking for:\n\n{shell_output}",
+                            tools_used=tools_used,
+                            api_results=api_results
+                        )
+                    else:
+                        return ChatResponse(
+                            response=f"I completed the action: {api_results.get('shell_info', {}).get('command', '')}",
+                            tools_used=tools_used,
+                            api_results=api_results
+                        )
                 # POST-PROCESSING: Auto-extract code blocks and write files if user requested file creation
                 # This fixes the issue where LLM shows corrected code but doesn't create the file
@@ -4459,6 +4524,16 @@ JSON:"""
             mentioned = _extract_filenames(request.question)
             file_previews: List[Dict[str, Any]] = []
             files_forbidden: List[str] = []
+            # Check if query is asking about specific functions/methods/classes OR file metadata
+            # If so, SKIP auto-preview and let shell planning handle it
+            query_lower = request.question.lower()
+            asking_about_code_element = any(pattern in query_lower for pattern in [
+                'method', 'function', 'class', 'def ', 'what does', 'how does',
+                'explain the', 'find the', 'show me the', 'purpose of', 'implementation of',
+                'how many lines', 'count lines', 'number of lines', 'wc -l', 'line count'
+            ])
             base_dir = Path.cwd().resolve()
             sensitive_roots = {Path('/etc'), Path('/proc'), Path('/sys'), Path('/dev'), Path('/root'), Path('/usr'), Path('/bin'), Path('/sbin'), Path('/var')}
             def _is_safe_path(path_str: str) -> bool:
@@ -4469,31 +4544,47 @@ JSON:"""
                     return str(rp).startswith(str(base_dir))
                 except Exception:
                     return False
-            for m in mentioned:
-                if not _is_safe_path(m):
-                    files_forbidden.append(m)
-                    continue
-                pr = await self._preview_file(m)
-                if pr:
-                    file_previews.append(pr)
+            # Only auto-preview if NOT asking about specific code elements
+            if not asking_about_code_element:
+                for m in mentioned:
+                    if not _is_safe_path(m):
+                        files_forbidden.append(m)
+                        continue
+                    pr = await self._preview_file(m)
+                    # Only add successful previews (not errors)
+                    if pr and pr.get("type") != "error":
+                        file_previews.append(pr)
+            else:
+                # Query is about specific code elements - let shell planning handle with grep
+                files_forbidden = [m for m in mentioned if not _is_safe_path(m)]
             if file_previews:
                 api_results["files"] = file_previews
-                # Build grounded context from first text preview
+                tools_used.append("read_file")  # Track that files were read
+                # Build grounded context from ALL text previews (for comparisons)
                 text_previews = [fp for fp in file_previews if fp.get("type") == "text" and fp.get("preview")]
                 files_context = ""
                 if text_previews:
-                    fp = text_previews[0]
-                    quoted = "\n".join(fp["preview"].splitlines()[:20])
-                    files_context = f"File: {fp['path']} (first lines)\n" + quoted
+                    # Detect comparison queries - include MORE context
+                    is_comparison = len(text_previews) > 1 or any(word in request.question.lower() for word in ['compare', 'difference', 'contrast', 'vs', 'versus'])
+                    line_limit = 200 if is_comparison else 100  # More lines for comparisons
+                    # Include all files with appropriate context
+                    file_contexts = []
+                    for fp in text_previews:
+                        quoted = "\n".join(fp["preview"].splitlines()[:line_limit])
+                        file_contexts.append(f"File: {fp['path']}\n{quoted}")
+                    files_context = "\n\n---\n\n".join(file_contexts)
                 api_results["files_context"] = files_context
-            elif mentioned:
-                # Mentioned files but none found
+            elif mentioned and not asking_about_code_element:
+                # Mentioned files but none found (only set if we actually tried to preview them)
                 api_results["files_missing"] = mentioned
             if files_forbidden:
                 api_results["files_forbidden"] = files_forbidden
             workspace_listing: Optional[Dict[str, Any]] = None
-            if not file_previews:
+            # Only show workspace listing if NOT looking for specific missing files
+            if not file_previews and not api_results.get("files_missing"):
                 file_browse_keywords = (
                     "list files",
                     "show files",
@@ -4513,7 +4604,8 @@ JSON:"""
                     workspace_listing = await self._get_workspace_listing()
                     api_results["workspace_listing"] = workspace_listing
-            if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"}:
+            # Don't show workspace listing if there are missing files (prioritize error)
+            if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"} and not api_results.get("files_missing"):
                 return self._respond_with_workspace_listing(request, workspace_listing)
             if "finsight" in request_analysis["apis"]:
@@ -4564,10 +4656,64 @@ JSON:"""
             messages = [
                 {"role": "system", "content": system_prompt}
             ]
+            # CRITICAL: Inject research papers IMMEDIATELY after system prompt (highest priority)
+            research_data = api_results.get("research")
+            if research_data and research_data.get("results"):
+                papers_text = "🚨 PAPERS ALREADY FOUND - SYNTHESIZE THESE NOW:\n\n"
+                papers_text += "DO NOT say 'we will search' - the search is COMPLETE.\n"
+                papers_text += "DO NOT say 'attempting' - papers are ALREADY HERE.\n"
+                papers_text += "YOUR JOB: Synthesize these papers into a comprehensive literature review (500+ words).\n\n"
+                for i, paper in enumerate(research_data["results"][:5], 1):
+                    papers_text += f"\n═══ PAPER {i} ═══\n"
+                    papers_text += f"Title: {paper.get('title', 'No title')}\n"
+                    # Handle authors as either list of dicts or list of strings
+                    authors = paper.get('authors', [])
+                    if authors:
+                        if isinstance(authors[0], dict):
+                            author_names = [a.get('name', 'Unknown') for a in authors[:3]]
+                        else:
+                            author_names = authors[:3]
+                        papers_text += f"Authors: {', '.join(author_names)}\n"
+                    papers_text += f"Year: {paper.get('year', 'N/A')}\n"
+                    if paper.get('abstract'):
+                        papers_text += f"\nAbstract:\n{paper['abstract']}\n"
+                    if paper.get('tldr'):
+                        papers_text += f"\nTL;DR: {paper['tldr']}\n"
+                    papers_text += "\n"
+                papers_text += "\n🚨 SYNTHESIZE THESE PAPERS NOW - Include:\n"
+                papers_text += "- Overview of the research area\n"
+                papers_text += "- Key findings from each paper's abstract\n"
+                papers_text += "- Methods and approaches used\n"
+                papers_text += "- Comparison and contrast of different approaches\n"
+                papers_text += "- Implications and future directions\n"
+                papers_text += "\nMINIMUM 500 WORDS. Use the abstracts above."
+                messages.append({"role": "system", "content": papers_text})
             # If we have file context, inject it as an additional grounding message
             fc = api_results.get("files_context")
             if fc:
-                messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}\n\nAnswer based strictly on this content when relevant. Do not run shell commands."})
+                # Count how many files are being compared
+                file_count = len([fp for fp in api_results.get("files", []) if fp.get("type") == "text"])
+                if file_count > 1:
+                    # Multi-file comparison - make it VERY explicit
+                    comparison_msg = "🚨 MULTIPLE FILES PROVIDED FOR COMPARISON:\n\n"
+                    comparison_msg += fc
+                    comparison_msg += "\n\n🚨 CRITICAL INSTRUCTIONS FOR COMPARISON:\n"
+                    comparison_msg += "1. Read ALL file contents above carefully\n"
+                    comparison_msg += "2. Extract specific data points, numbers, percentages from EACH file\n"
+                    comparison_msg += "3. Compare and contrast the ACTUAL content (not just filenames)\n"
+                    comparison_msg += "4. If asked about differences, cite EXACT lines or values from BOTH files\n"
+                    comparison_msg += "5. Do NOT make general statements - be specific with examples from the files\n"
+                    comparison_msg += "\nAnswer based STRICTLY on the file contents above. Do not run shell commands."
+                    messages.append({"role": "system", "content": comparison_msg})
+                else:
+                    # Single file - normal handling
+                    messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}\n\nAnswer based strictly on this content when relevant. Do not run shell commands."})
             missing = api_results.get("files_missing")
             if missing:
                 messages.append({"role": "system", "content": f"User mentioned file(s) not found: {missing}. Respond explicitly that the file was not found and avoid speculation."})
@@ -4790,6 +4936,92 @@ JSON:"""
                         final_response = "I searched but found no matches. The search returned no results."
                         logger.warning("🚨 Hallucination prevented: LLM tried to make up results when shell output was empty")
+            # ========================================
+            # PHASE 2: THINKING BLOCKS
+            # Show reasoning process for complex queries
+            # ========================================
+            thinking_text = ""
+            try:
+                thinking_context = {
+                    'tools_used': tools_used,
+                    'api_results': api_results,
+                    'conversation_history': self.conversation_history[-3:] if self.conversation_history else []
+                }
+                thinking_text = await generate_and_format_thinking(
+                    request.question,
+                    thinking_context,
+                    show_full=False  # Compact version
+                )
+                if thinking_text:
+                    logger.info(f"💭 Generated thinking process for query")
+            except Exception as e:
+                logger.error(f"Thinking generation failed: {e}")
+            # ========================================
+            # PHASE 1 QUALITY PIPELINE
+            # Process response through quality improvements
+            # ========================================
+            try:
+                pipeline_context = {
+                    'tools_used': tools_used,
+                    'api_results': api_results,
+                    'query_type': request_analysis.get('type'),
+                    'shell_output_type': 'generic'
+                }
+                processed = await ResponsePipeline.process(
+                    final_response,
+                    request.question,
+                    pipeline_context,
+                    response_type="generic"
+                )
+                final_response = processed.final_response
+                # Log quality improvements
+                if processed.improvements_applied:
+                    logger.info(f"✨ Quality improvements: {', '.join(processed.improvements_applied)}")
+                    logger.info(f"📊 Quality score: {processed.quality_score:.2f}")
+            except Exception as e:
+                # If pipeline fails, log but continue with original response
+                logger.error(f"Quality pipeline failed: {e}, using original response")
+            # ========================================
+            # PHASE 2: CONFIDENCE CALIBRATION
+            # Assess confidence and add caveats if needed
+            # ========================================
+            try:
+                confidence_context = {
+                    'tools_used': tools_used,
+                    'api_results': api_results,
+                    'query_type': request_analysis.get('type')
+                }
+                final_response, confidence_assessment = assess_and_apply_caveat(
+                    final_response,
+                    request.question,
+                    confidence_context
+                )
+                logger.info(
+                    f"🎯 Confidence: {confidence_assessment.confidence_level} "
+                    f"({confidence_assessment.confidence_score:.2f})"
+                )
+                if confidence_assessment.should_add_caveat:
+                    logger.info(f"⚠️ Added caveat due to low confidence")
+            except Exception as e:
+                logger.error(f"Confidence calibration failed: {e}")
+            # Prepend thinking blocks if generated
+            if thinking_text:
+                final_response = thinking_text + "\n\n" + final_response
             expected_tools: Set[str] = set()
             if "finsight" in request_analysis.get("apis", []):
                 expected_tools.add("finsight_api")
@@ -4825,20 +5057,25 @@ JSON:"""
         except Exception as e:
             import traceback
-            details = str(e)
             debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
             if debug_mode:
                 print("🔴 FULL TRACEBACK:")
                 traceback.print_exc()
-            message = (
-                "⚠️ Something went wrong while orchestrating your request, but no actions were performed. "
-                "Please retry, and if the issue persists share this detail with the team: {details}."
-            ).format(details=details)
+            # ========================================
+            # PHASE 1 GRACEFUL ERROR HANDLING
+            # Never expose technical details to users
+            # ========================================
+            user_friendly_message = GracefulErrorHandler.create_fallback_response(
+                request.question,
+                e
+            )
             return ChatResponse(
-                response=message,
+                response=user_friendly_message,
                 timestamp=datetime.now().isoformat(),
                 confidence_score=0.0,
-                error_message=details
+                error_message=str(e) if debug_mode else None  # Only include technical error in debug mode
             )
     async def process_request_streaming(self, request: ChatRequest):
@@ -4921,9 +5158,19 @@ JSON:"""
             mentioned = _extract_filenames(request.question)
             file_previews: List[Dict[str, Any]] = []
             files_forbidden: List[str] = []
+            # Check if query is asking about specific functions/methods/classes OR file metadata
+            # If so, SKIP auto-preview and let shell planning handle it
+            query_lower = request.question.lower()
+            asking_about_code_element = any(pattern in query_lower for pattern in [
+                'method', 'function', 'class', 'def ', 'what does', 'how does',
+                'explain the', 'find the', 'show me the', 'purpose of', 'implementation of',
+                'how many lines', 'count lines', 'number of lines', 'wc -l', 'line count'
+            ])
             base_dir = Path.cwd().resolve()
             sensitive_roots = {Path('/etc'), Path('/proc'), Path('/sys'), Path('/dev'), Path('/root'), Path('/usr'), Path('/bin'), Path('/sbin'), Path('/var')}
             def _is_safe_path(path_str: str) -> bool:
                 try:
                     rp = Path(path_str).resolve()
@@ -4932,39 +5179,57 @@ JSON:"""
                     return str(rp).startswith(str(base_dir))
                 except Exception:
                     return False
-            for m in mentioned:
-                if not _is_safe_path(m):
-                    files_forbidden.append(m)
-                    continue
-                pr = await self._preview_file(m)
-                if pr:
-                    file_previews.append(pr)
+            # Only auto-preview if NOT asking about specific code elements or metadata
+            if not asking_about_code_element:
+                for m in mentioned:
+                    if not _is_safe_path(m):
+                        files_forbidden.append(m)
+                        continue
+                    pr = await self._preview_file(m)
+                    # Only add successful previews (not errors)
+                    if pr and pr.get("type") != "error":
+                        file_previews.append(pr)
+            else:
+                # Query is about specific code elements - let shell planning handle with grep/wc
+                files_forbidden = [m for m in mentioned if not _is_safe_path(m)]
             if file_previews:
                 api_results["files"] = file_previews
+                tools_used.append("read_file")  # Track that files were read
+                # Build grounded context from ALL text previews (for comparisons)
                 text_previews = [fp for fp in file_previews if fp.get("type") == "text" and fp.get("preview")]
                 files_context = ""
                 if text_previews:
-                    fp = text_previews[0]
-                    quoted = "\n".join(fp["preview"].splitlines()[:20])
-                    files_context = f"File: {fp['path']} (first lines)\n" + quoted
+                    # Detect comparison queries - include MORE context
+                    is_comparison = len(text_previews) > 1 or any(word in request.question.lower() for word in ['compare', 'difference', 'contrast', 'vs', 'versus'])
+                    line_limit = 200 if is_comparison else 100  # More lines for comparisons
+                    # Include all files with appropriate context
+                    file_contexts = []
+                    for fp in text_previews:
+                        quoted = "\n".join(fp["preview"].splitlines()[:line_limit])
+                        file_contexts.append(f"File: {fp['path']}\n{quoted}")
+                    files_context = "\n\n---\n\n".join(file_contexts)
                 api_results["files_context"] = files_context
-            elif mentioned:
+            elif mentioned and not asking_about_code_element:
+                # Mentioned files but none found (only set if we actually tried to preview them)
                 api_results["files_missing"] = mentioned
             if files_forbidden:
                 api_results["files_forbidden"] = files_forbidden
             # Workspace listing
             workspace_listing: Optional[Dict[str, Any]] = None
-            if not file_previews:
+            # Only show workspace listing if NOT looking for specific missing files
+            if not file_previews and not api_results.get("files_missing"):
                 file_browse_keywords = ("list files", "show files", "what files")
                 describe_files = ("file" in question_lower or "directory" in question_lower)
                 if any(keyword in question_lower for keyword in file_browse_keywords) or describe_files:
                     workspace_listing = await self._get_workspace_listing()
                     api_results["workspace_listing"] = workspace_listing
-            if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"}:
+            # Don't show workspace listing if there are missing files (prioritize error)
+            if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"} and not api_results.get("files_missing"):
                 result = self._respond_with_workspace_listing(request, workspace_listing)
                 async def workspace_gen():
                     yield result.response
@@ -4996,10 +5261,63 @@ JSON:"""
             # Build messages
             system_prompt = self._build_system_prompt(request_analysis, memory_context, api_results)
             messages = [{"role": "system", "content": system_prompt}]
+            # CRITICAL: Inject research papers IMMEDIATELY after system prompt (highest priority)
+            research_data = api_results.get("research")
+            if research_data and research_data.get("results"):
+                papers_text = "🚨 PAPERS ALREADY FOUND - SYNTHESIZE THESE NOW:\n\n"
+                papers_text += "DO NOT say 'we will search' - the search is COMPLETE.\n"
+                papers_text += "DO NOT say 'attempting' - papers are ALREADY HERE.\n"
+                papers_text += "YOUR JOB: Synthesize these papers into a comprehensive literature review (500+ words).\n\n"
+                for i, paper in enumerate(research_data["results"][:5], 1):
+                    papers_text += f"\n═══ PAPER {i} ═══\n"
+                    papers_text += f"Title: {paper.get('title', 'No title')}\n"
+                    # Handle authors as either list of dicts or list of strings
+                    authors = paper.get('authors', [])
+                    if authors:
+                        if isinstance(authors[0], dict):
+                            author_names = [a.get('name', 'Unknown') for a in authors[:3]]
+                        else:
+                            author_names = authors[:3]
+                        papers_text += f"Authors: {', '.join(author_names)}\n"
+                    papers_text += f"Year: {paper.get('year', 'N/A')}\n"
+                    if paper.get('abstract'):
+                        papers_text += f"\nAbstract:\n{paper['abstract']}\n"
+                    if paper.get('tldr'):
+                        papers_text += f"\nTL;DR: {paper['tldr']}\n"
+                    papers_text += "\n"
+                papers_text += "\n🚨 SYNTHESIZE THESE PAPERS NOW - Include:\n"
+                papers_text += "- Overview of the research area\n"
+                papers_text += "- Key findings from each paper's abstract\n"
+                papers_text += "- Methods and approaches used\n"
+                papers_text += "- Comparison and contrast of different approaches\n"
+                papers_text += "- Implications and future directions\n"
+                papers_text += "\nMINIMUM 500 WORDS. Use the abstracts above."
+                messages.append({"role": "system", "content": papers_text})
             fc = api_results.get("files_context")
             if fc:
-                messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}"})
+                # Count how many files are being compared
+                file_count = len([fp for fp in api_results.get("files", []) if fp.get("type") == "text"])
+                if file_count > 1:
+                    # Multi-file comparison - make it VERY explicit
+                    comparison_msg = "🚨 MULTIPLE FILES PROVIDED FOR COMPARISON:\n\n"
+                    comparison_msg += fc
+                    comparison_msg += "\n\n🚨 CRITICAL INSTRUCTIONS FOR COMPARISON:\n"
+                    comparison_msg += "1. Read ALL file contents above carefully\n"
+                    comparison_msg += "2. Extract specific data points, numbers, percentages from EACH file\n"
+                    comparison_msg += "3. Compare and contrast the ACTUAL content (not just filenames)\n"
+                    comparison_msg += "4. If asked about differences, cite EXACT lines or values from BOTH files\n"
+                    comparison_msg += "5. Do NOT make general statements - be specific with examples from the files\n"
+                    comparison_msg += "\nAnswer based STRICTLY on the file contents above. Do not run shell commands."
+                    messages.append({"role": "system", "content": comparison_msg})
+                else:
+                    # Single file - normal handling
+                    messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}"})
             # Add conversation history (abbreviated - just recent)
             if len(self.conversation_history) > 6:

cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl

cite-agent 1.3.9py3-none-any.whl → 1.4.3py3-none-any.whl