cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cite_agent/__init__.py +13 -13
  2. cite_agent/__version__.py +1 -1
  3. cite_agent/action_first_mode.py +150 -0
  4. cite_agent/adaptive_providers.py +413 -0
  5. cite_agent/archive_api_client.py +186 -0
  6. cite_agent/auth.py +0 -1
  7. cite_agent/auto_expander.py +70 -0
  8. cite_agent/cache.py +379 -0
  9. cite_agent/circuit_breaker.py +370 -0
  10. cite_agent/citation_network.py +377 -0
  11. cite_agent/cli.py +8 -16
  12. cite_agent/cli_conversational.py +113 -3
  13. cite_agent/confidence_calibration.py +381 -0
  14. cite_agent/deduplication.py +325 -0
  15. cite_agent/enhanced_ai_agent.py +689 -371
  16. cite_agent/error_handler.py +228 -0
  17. cite_agent/execution_safety.py +329 -0
  18. cite_agent/full_paper_reader.py +239 -0
  19. cite_agent/observability.py +398 -0
  20. cite_agent/offline_mode.py +348 -0
  21. cite_agent/paper_comparator.py +368 -0
  22. cite_agent/paper_summarizer.py +420 -0
  23. cite_agent/pdf_extractor.py +350 -0
  24. cite_agent/proactive_boundaries.py +266 -0
  25. cite_agent/quality_gate.py +442 -0
  26. cite_agent/request_queue.py +390 -0
  27. cite_agent/response_enhancer.py +257 -0
  28. cite_agent/response_formatter.py +458 -0
  29. cite_agent/response_pipeline.py +295 -0
  30. cite_agent/response_style_enhancer.py +259 -0
  31. cite_agent/self_healing.py +418 -0
  32. cite_agent/similarity_finder.py +524 -0
  33. cite_agent/streaming_ui.py +13 -9
  34. cite_agent/thinking_blocks.py +308 -0
  35. cite_agent/tool_orchestrator.py +416 -0
  36. cite_agent/trend_analyzer.py +540 -0
  37. cite_agent/unpaywall_client.py +226 -0
  38. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
  39. cite_agent-1.4.3.dist-info/RECORD +62 -0
  40. cite_agent-1.3.9.dist-info/RECORD +0 -32
  41. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
  42. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
  43. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
  44. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,17 @@ from .telemetry import TelemetryManager
27
27
  from .setup_config import DEFAULT_QUERY_LIMIT
28
28
  from .conversation_archive import ConversationArchive
29
29
 
30
+ # Quality improvements - Phase 1
31
+ from .error_handler import GracefulErrorHandler, handle_error_gracefully
32
+ from .response_formatter import ResponseFormatter
33
+ from .quality_gate import ResponseQualityGate, assess_response_quality
34
+ from .response_pipeline import ResponsePipeline
35
+
36
+ # Intelligence improvements - Phase 2
37
+ from .thinking_blocks import ThinkingBlockGenerator, generate_and_format_thinking
38
+ from .tool_orchestrator import ToolOrchestrator
39
+ from .confidence_calibration import ConfidenceCalibrator, assess_and_apply_caveat
40
+
30
41
  # Suppress noise
31
42
  logging.basicConfig(level=logging.ERROR)
32
43
  logger = logging.getLogger(__name__)
@@ -887,9 +898,11 @@ class EnhancedNocturnalAgent:
887
898
  }
888
899
 
889
900
  content = p.read_text(errors="ignore")
890
- truncated = len(content) > 65536
891
- snippet = content[:65536]
892
- preview = "\n".join(snippet.splitlines()[:60])
901
+ # Increase preview size for better code analysis
902
+ # Show first 300 lines OR 100KB (whichever is smaller)
903
+ truncated = len(content) > 102400 # 100KB
904
+ snippet = content[:102400]
905
+ preview = "\n".join(snippet.splitlines()[:300]) # Increased from 60 to 300 lines
893
906
  return {
894
907
  "path": str(p),
895
908
  "type": "text",
@@ -968,6 +981,57 @@ class EnhancedNocturnalAgent:
968
981
  normalized = text.lower().strip()
969
982
  return any(normalized.startswith(ack) for ack in acknowledgments)
970
983
 
984
+ def _detect_language_preference(self, text: str) -> None:
985
+ """
986
+ Detect and store user's language preference from input text.
987
+ Supports Traditional Chinese (繁體中文), English, and other languages.
988
+ """
989
+ text_lower = text.lower()
990
+
991
+ # Check for Chinese characters (CJK)
992
+ has_chinese = any('\u4e00' <= char <= '\u9fff' for char in text)
993
+
994
+ # Explicit language requests
995
+ if 'chinese' in text_lower or '中文' in text or 'traditional' in text_lower:
996
+ self.language_preference = 'zh-TW'
997
+ elif 'english' in text_lower:
998
+ self.language_preference = 'en'
999
+ elif has_chinese:
1000
+ # Detected Chinese characters
1001
+ self.language_preference = 'zh-TW'
1002
+ else:
1003
+ # Default to English if not specified
1004
+ if not hasattr(self, 'language_preference'):
1005
+ self.language_preference = 'en'
1006
+
1007
+ def _is_generic_test_prompt(self, text: str) -> bool:
1008
+ """Detect simple 'test' style probes that don't need full analysis."""
1009
+ normalized = re.sub(r"[^a-z0-9\s]", " ", text.lower())
1010
+ words = [w for w in normalized.split() if w]
1011
+ if not words or "test" not in words:
1012
+ return False
1013
+ if len(words) > 4:
1014
+ return False
1015
+ allowed = {"test", "testing", "just", "this", "is", "a", "only"}
1016
+ return all(w in allowed for w in words)
1017
+
1018
+ def _is_location_query(self, text: str) -> bool:
1019
+ """Detect requests asking for the current working directory."""
1020
+ normalized = re.sub(r"[^a-z0-9/._\s-]", " ", text.lower())
1021
+ normalized = " ".join(normalized.split())
1022
+ location_phrases = [
1023
+ "where are we",
1024
+ "where am i",
1025
+ "where are we right now",
1026
+ "what directory",
1027
+ "current directory",
1028
+ "current folder",
1029
+ "current path",
1030
+ ]
1031
+ if any(phrase in normalized for phrase in location_phrases):
1032
+ return True
1033
+ return normalized in {"pwd", "pwd?"}
1034
+
971
1035
  def _format_api_results_for_prompt(self, api_results: Dict[str, Any]) -> str:
972
1036
  if not api_results:
973
1037
  logger.info("🔍 DEBUG: _format_api_results_for_prompt called with EMPTY api_results")
@@ -1002,12 +1066,13 @@ class EnhancedNocturnalAgent:
1002
1066
 
1003
1067
  formatted_parts.append("\n" + "=" * 60)
1004
1068
  formatted_parts.append("🚨 CRITICAL INSTRUCTION 🚨")
1005
- formatted_parts.append("The command was ALREADY executed. The output above is the COMPLETE and ONLY result.")
1006
- formatted_parts.append("YOU MUST present ONLY what is shown in the output above.")
1007
- formatted_parts.append("DO NOT add file names, paths, or code that are NOT in the output above.")
1008
- formatted_parts.append("DO NOT make up examples or additional results.")
1009
- formatted_parts.append("If the output says 'No matches' or is empty, tell the user 'No results found'.")
1010
- formatted_parts.append("DO NOT ask the user to run any commands - the results are already here.")
1069
+ formatted_parts.append("The command was ALREADY executed. The output above is the result.")
1070
+ formatted_parts.append("Present the KEY information concisely - summarize, don't paste everything.")
1071
+ formatted_parts.append("For file listings: list key files/directories, skip metadata unless asked.")
1072
+ formatted_parts.append("For search results: answer directly, cite relevant findings.")
1073
+ formatted_parts.append("For file content: show relevant sections only.")
1074
+ formatted_parts.append("If output is empty: say 'No results found'.")
1075
+ formatted_parts.append("DO NOT ask the user to run commands - results are already here.")
1011
1076
  formatted_parts.append("=" * 60)
1012
1077
 
1013
1078
  # Add other api_results
@@ -1045,291 +1110,98 @@ class EnhancedNocturnalAgent:
1045
1110
  api_results: Dict[str, Any]
1046
1111
  ) -> str:
1047
1112
  sections: List[str] = []
1048
-
1113
+ apis = request_analysis.get("apis", [])
1114
+
1049
1115
  # TRUTH-SEEKING CORE IDENTITY
1050
- # Adapt intro based on analysis mode
1051
1116
  analysis_mode = request_analysis.get("analysis_mode", "quantitative")
1052
-
1053
- if analysis_mode == "qualitative":
1054
- intro = (
1055
- "You are Nocturnal, a truth-seeking research AI specialized in QUALITATIVE ANALYSIS. "
1056
- "PRIMARY DIRECTIVE: Accuracy > Agreeableness. Quote verbatim, never paraphrase. "
1057
- "You analyze text, identify themes, extract quotes with context, and synthesize patterns. "
1058
- "You have direct access to academic sources and can perform thematic coding."
1059
- )
1060
- elif analysis_mode == "mixed":
1061
- intro = (
1062
- "You are Nocturnal, a truth-seeking research AI handling MIXED METHODS analysis. "
1063
- "PRIMARY DIRECTIVE: Accuracy > Agreeableness. "
1064
- "You work with both quantitative data (numbers, stats) and qualitative data (themes, quotes). "
1065
- "For numbers: calculate and cite. For text: quote verbatim and identify patterns. "
1066
- "You have access to production data sources and can write/execute code (Python, R, SQL)."
1067
- )
1068
- else: # quantitative
1069
- # Check if we're in dev mode (has local LLM client)
1070
- dev_mode = self.client is not None
1071
-
1072
- if dev_mode:
1073
- intro = (
1074
- "You are Cite Agent, a data analysis and research assistant with CODE EXECUTION. "
1075
- "PRIMARY DIRECTIVE: Execute code when needed. You have a persistent shell session. "
1076
- "When user asks for data analysis, calculations, or file operations: WRITE and EXECUTE the code. "
1077
- "Languages available: Python, R, SQL, Bash. "
1078
- "🚨 CRITICAL: Commands are AUTOMATICALLY executed. If you see 'shell_info' below, "
1079
- "that means the command was ALREADY RUN. NEVER ask users to run commands - just present results."
1080
- )
1081
- else:
1082
- intro = (
1083
- "You are Cite Agent, a truth-seeking research and finance AI with CODE EXECUTION. "
1084
- "PRIMARY DIRECTIVE: Accuracy > Agreeableness. NEVER HALLUCINATE. "
1085
- "You are a fact-checker and analyst with a persistent shell session. "
1086
- "You have access to research (Archive), financial data (FinSight SEC filings), and can run Python/R/SQL/Bash. "
1087
- "\n\n"
1088
- "🚨 ANTI-HALLUCINATION RULES:\n"
1089
- "1. When user asks about files, directories, or data - commands are AUTOMATICALLY executed.\n"
1090
- "2. If you see 'shell_info' in results below, that means command was ALREADY RUN.\n"
1091
- "3. ONLY present information from shell_info output. DO NOT invent file names, paths, or code.\n"
1092
- "4. If shell output is empty or unclear, say 'No results found' or 'Search returned no matches'.\n"
1093
- "5. NEVER make up plausible-sounding file paths or code that wasn't in the actual output.\n"
1094
- "6. If you're unsure, say 'I couldn't find that' rather than guessing.\n"
1095
- "7. NEVER ask the user to run commands - just present the results that were already executed."
1096
- )
1097
-
1117
+ dev_mode = self.client is not None
1118
+
1119
+ # Identity and capabilities
1120
+ intro = (
1121
+ "You are Cite Agent, a research and analysis assistant with access to:\n"
1122
+ " Persistent shell (Python, R, SQL, Bash)\n"
1123
+ " File operations (read, write, edit, search)\n"
1124
+ "• Academic papers (Archive API - 200M+ papers)\n"
1125
+ "• Financial data (FinSight API - SEC filings)\n"
1126
+ "• Web search\n\n"
1127
+ "Communication style: Be natural, direct, and helpful. "
1128
+ "Think like a capable research partner, not a rigid assistant."
1129
+ )
1098
1130
  sections.append(intro)
1099
1131
 
1100
- apis = request_analysis.get("apis", [])
1101
- capability_lines: List[str] = []
1102
- if "archive" in apis:
1103
- capability_lines.append(" Archive Research API for academic search and synthesis")
1104
- if "finsight" in apis:
1105
- capability_lines.append(" FinSight Finance API for SEC-quality metrics and citations")
1106
- if "shell" in apis:
1107
- capability_lines.append(" Persistent shell session for system inspection and code execution")
1108
- if not capability_lines:
1109
- capability_lines.append("• Core reasoning, code generation (Python/R/SQL), memory recall")
1110
-
1111
- # Add workflow capabilities
1112
- capability_lines.append("")
1113
- capability_lines.append("📚 WORKFLOW INTEGRATION (Always available):")
1114
- capability_lines.append("• You can SAVE papers to user's local library")
1115
- capability_lines.append("• You can LIST papers from library")
1116
- capability_lines.append("• You can EXPORT citations to BibTeX or APA")
1117
- capability_lines.append("• You can SEARCH user's paper collection")
1118
- capability_lines.append("• You can COPY text to user's clipboard")
1119
- capability_lines.append("• User's query history is automatically tracked")
1120
-
1121
- # Add file operation capabilities (Claude Code / Cursor parity)
1122
- capability_lines.append("")
1123
- capability_lines.append("📁 DIRECT FILE OPERATIONS (Always available):")
1124
- capability_lines.append("• read_file(path) - Read files with line numbers (like cat but better)")
1125
- capability_lines.append("• write_file(path, content) - Create/overwrite files directly")
1126
- capability_lines.append("• edit_file(path, old, new) - Surgical find/replace edits")
1127
- capability_lines.append("• glob_search(pattern) - Fast file search (e.g., '**/*.py')")
1128
- capability_lines.append("• grep_search(pattern) - Fast content search in files")
1129
- capability_lines.append("• batch_edit_files(edits) - Multi-file refactoring")
1130
-
1131
- sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
1132
-
1133
- # ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
1134
- base_rules = [
1135
- "🚨 BE RESOURCEFUL: You have Archive, FinSight (SEC+Yahoo), and Web Search. USE them to find answers.",
1136
- "🚨 TRY TOOLS FIRST: Before asking user for clarification, try your tools to find the answer.",
1137
- "🚨 WEB SEARCH IS YOUR FRIEND: Market share? Industry size? Current prices? → Web search can find it.",
1138
- "🚨 ONLY ask clarification if tools can't help AND query is truly ambiguous.",
1139
- "",
1140
- "💬 AUTONOMOUS FLOW:",
1141
- "1. User asks question → YOU use tools to find data",
1142
- "2. If partial data → YOU web search for missing pieces",
1143
- "3. YOU synthesize → Present complete answer",
1144
- "4. ONLY if impossible → Ask for clarification",
1145
- "",
1146
- "Examples:",
1147
- "❌ BAD: 'Snowflake market share?' → 'Which market?' (when web search can tell you!)",
1148
- "✅ GOOD: 'Snowflake market share?' → [web search] → '18.33% in cloud data warehouses'",
1149
- "",
1150
- "🚨 ANTI-APPEASEMENT: If user states something incorrect, CORRECT THEM immediately. Do not agree to be polite.",
1151
- "🚨 UNCERTAINTY: If you're uncertain, SAY SO explicitly. 'I don't know' is better than a wrong answer.",
1152
- "🚨 CONTRADICTIONS: If data contradicts user's assumption, SHOW THE CONTRADICTION clearly.",
1153
- "🚨 FUTURE PREDICTIONS: You CANNOT predict the future. For 'will X happen?' questions, emphasize uncertainty and multiple possible outcomes.",
1154
- "",
1155
- "📊 SOURCE GROUNDING: EVERY factual claim MUST cite a source (paper, SEC filing, or data file).",
1156
- "📊 NO FABRICATION: If API results are empty/ambiguous, explicitly state this limitation.",
1157
- "📊 NO EXTRAPOLATION: Never go beyond what sources directly state.",
1158
- "📊 PREDICTION CAUTION: When discussing trends, always state 'based on available data' and note uncertainty.",
1132
+ # Behavioral guidelines
1133
+ guidelines = [
1134
+ "Use tools proactively - search files, run commands, query APIs when needed.",
1135
+ "Cite sources: papers (title+authors), files (path:line), API data.",
1136
+ "shell_info shows already-executed commands. Present RESULTS concisely - no commands shown.",
1137
+ "For follow-up questions with pronouns ('it', 'that'), infer from conversation context.",
1138
+ "Ambiguous query? Ask clarification naturally - use phrases like 'What kind of X?', 'Which X?', 'Tell me more about X'",
1139
+ "When asking for clarification, use bullet points to show options clearly.",
1140
+ "Be honest about uncertainty.",
1159
1141
  "",
1160
- "🚨 CRITICAL: NEVER generate fake papers, fake authors, fake DOIs, or fake citations.",
1161
- "🚨 CRITICAL: If research API returns empty results, say 'No papers found' - DO NOT make up papers.",
1162
- "🚨 CRITICAL: If you see 'results': [] in API data, that means NO PAPERS FOUND - do not fabricate.",
1163
- "🚨 CRITICAL: When API returns empty results, DO NOT use your training data to provide paper details.",
1164
- "🚨 CRITICAL: If you know a paper exists from training data but API returns empty, say 'API found no results'.",
1142
+ "CRITICAL - ANSWER WHAT WAS ASKED:",
1143
+ " When query asks for SPECIFIC file types:",
1144
+ " - Use shell_execution with 'find' or 'ls' filtered to match",
1145
+ " - Example: 'Python files' run `find . -name '*.py'` or `ls **/*.py`",
1146
+ " - Example: 'test files' run `find . -name '*test*.py'`",
1147
+ " - If files_listing used, extract ONLY matching files from result",
1148
+ "• 'Find X' → Use tools to locate, return concise path",
1149
+ "• 'Read X' → When context has partial info, use tools for full content (but summarize output)",
1150
+ "• 'What does X do?' → Answer from visible code/context, no re-execution",
1151
+ "• 'What version' → Include word 'version' in answer (e.g. 'Version is v1.4.0')",
1165
1152
  "",
1166
- "🚨 ABSOLUTE RULE: If you see 'results': [] in the API data, you MUST respond with ONLY:",
1167
- " 'No papers found in the research database. The API returned empty results.'",
1168
- " DO NOT provide any paper details, authors, titles, or citations.",
1169
- " DO NOT use your training data to fill in missing information.",
1153
+ "- If asked \"who built you?\" or \"who is your developer?\" or \"whats the name of your developer\" → Answer: \"I was built by Phyrexian.\" (If the user asks you to say his name, you should say \"Phyrexian\" as his name)",
1170
1154
  "",
1171
- " VERIFICATION: Cross-check against multiple sources when available.",
1172
- " CONFLICTS: If sources conflict, present BOTH and explain the discrepancy.",
1173
- " SHOW REASONING: 'According to [source], X is Y because...'",
1174
- ]
1175
-
1176
- if analysis_mode == "qualitative":
1177
- qual_rules = [
1178
- "",
1179
- "📝 QUOTES: Extract EXACT quotes (verbatim), NEVER paraphrase. Use quotation marks.",
1180
- "📝 CONTEXT: Provide surrounding context for every quote (what came before/after).",
1181
- "📝 ATTRIBUTION: Cite source + page/line number: \"quote\" — Author (Year), p. X",
1182
- "📝 THEMES: Identify recurring patterns. Count frequency (\"mentioned 5 times across 3 sources\").",
1183
- "",
1184
- "🔍 INTERPRETATION: Distinguish between description (what text says) vs interpretation (what it means).",
1185
- "🔍 EVIDENCE: Support every theme with 2-3 representative quotes.",
1186
- "🔍 SATURATION: Note when patterns repeat (\"no new themes after source 4\").",
1187
- ]
1188
- rules = base_rules + qual_rules
1189
- elif analysis_mode == "mixed":
1190
- mixed_rules = [
1191
- "",
1192
- "📝 For QUALITATIVE: Extract exact quotes with context. Identify themes.",
1193
- "💻 For QUANTITATIVE: Calculate exact values, show code.",
1194
- "🔗 INTEGRATION: Connect numbers to narratives ('15% growth' + 'participants felt optimistic')."
1195
- ]
1196
- rules = base_rules + mixed_rules + [
1197
- "",
1198
- "💻 CODE: For data analysis, write and execute Python/R/SQL code. Show your work.",
1199
- "💻 CALCULATIONS: Don't estimate - calculate exact values and show the code.",
1200
- ]
1201
- else: # quantitative
1202
- quant_rules = [
1203
- "",
1204
- "💻 CODE: For data analysis, write and execute Python/R/SQL code. Show your work.",
1205
- "💻 CALCULATIONS: Don't estimate - calculate exact values and show the code.",
1206
- ]
1207
- rules = base_rules + quant_rules
1208
-
1209
- rules.append("")
1210
- rules.append("Keep responses concise but complete. Quote exact text from sources when possible.")
1211
-
1212
- # Add workflow behavior rules
1213
- workflow_rules = [
1155
+ "- LANGUAGE:",
1156
+ "- If asked to reply in chinese, you MUST reply in Traditional Chinese (繁體中文).",
1157
+ "- You MUST use Chinese characters (漢字), NOT pinyin romanization.",
1214
1158
  "",
1215
- "📚 WORKFLOW BEHAVIOR:",
1216
- "• After finding papers, OFFER to save them: 'Would you like me to save this to your library?'",
1217
- "• After showing a citation, ASK: 'Want me to copy that to your clipboard?'",
1218
- "• If user says 'save that' or 'add to library', ACKNOWLEDGE and confirm the save",
1219
- "• If user mentions 'my library', LIST their saved papers",
1220
- "• If user asks for 'bibtex' or 'apa', PROVIDE the formatted citation",
1221
- "• Be PROACTIVE: suggest exports, show library stats, offer clipboard copies",
1222
- "• Example: 'I found 3 papers. I can save them to your library or export to BibTeX if you'd like.'",
1159
+ "CONCISE RESPONSE STYLE:",
1160
+ "• Direct answers - state result, minimal elaboration",
1161
+ "• NO code blocks showing bash/python commands unless explicitly asked",
1162
+ "• NO 'Let me check...' preambles",
1163
+ "• File listings: Max 5-10 items (filtered to query)",
1164
+ "• Balance: complete but concise"
1223
1165
  ]
1224
- rules.extend(workflow_rules)
1225
1166
 
1226
- # Add file operation tool usage rules (CRITICAL for Claude Code parity)
1227
- file_ops_rules = [
1228
- "",
1229
- "📁 FILE OPERATION TOOL USAGE (Use these INSTEAD of shell commands):",
1167
+ guidelines.extend([
1230
1168
  "",
1231
- "🔴 ALWAYS PREFER (in order):",
1232
- "1. read_file(path) INSTEAD OF: cat, head, tail",
1233
- "2. write_file(path, content) INSTEAD OF: echo >, cat << EOF, printf >",
1234
- "3. edit_file(path, old, new)INSTEAD OF: sed, awk",
1235
- "4. glob_search(pattern, path)INSTEAD OF: find, ls",
1236
- "5. grep_search(pattern, path, file_pattern)INSTEAD OF: grep -r",
1169
+ "- COMMUNICATION RULES - ACTION-FIRST MODE:",
1170
+ "- You MUST NOT return an empty response. EVER.",
1171
+ "- SHOW results proactively, don't just describe them. DO the obvious next step automatically.",
1172
+ "- If listing filesSHOW preview of the main file (don't ask permission)",
1173
+ "- If finding papers SHOW abstracts/summaries (don't ask permission)",
1174
+ "- If explaining codeSHOW key functions with examples (don't ask permission)",
1175
+ "- If querying data → SHOW the data with context (don't ask permission)",
1176
+ "- LESS TALK, MORE ACTION - responses should be 70% data/results, 30% explanation",
1177
+ "- NEVER ask 'Want me to...?' or 'Should I...?' - just DO the helpful next step",
1237
1178
  "",
1238
- " CORRECT USAGE:",
1239
- " Reading code: result = read_file('app.py')",
1240
- " Creating file: write_file('config.json', '{...}')",
1241
- " Editing code: edit_file('main.py', 'old_var', 'new_var', replace_all=True)",
1242
- " Finding files: glob_search('**/*.py', '/home/user/project')",
1243
- " Searching code: grep_search('class.*Agent', '.', '*.py', output_mode='content')",
1244
- "• Multi-file refactor: batch_edit_files([{file: 'a.py', old: '...', new: '...'}, ...])",
1179
+ "🚨 CRITICAL: RESEARCH PAPERS - If you see 'Research API snapshot' below:",
1180
+ "- The papers have ALREADY been found - DO NOT say 'we will search' or 'attempting search'",
1181
+ "- The abstracts are PROVIDED - READ THEM and SUMMARIZE THE KEY FINDINGS",
1182
+ "- You MUST write at least 500 words synthesizing the papers",
1183
+ "- Include: paper titles, key methods, findings, and contributions from the abstracts",
1184
+ "- Compare and contrast the approaches across papers",
1185
+ "- DO NOT just list titles - EXPLAIN what each paper discovered",
1186
+ ])
1187
+
1188
+ guidelines.extend([
1245
1189
  "",
1246
- "❌ ANTI-PATTERNS (Don't do these):",
1247
- " DON'T use cat when read_file exists",
1248
- " DON'T use echo > when write_file exists",
1249
- "• DON'T use sed when edit_file exists",
1250
- "• DON'T use find when glob_search exists",
1251
- "• DON'T use grep -r when grep_search exists",
1252
- "",
1253
- "🎯 WHY USE THESE TOOLS:",
1254
- "• read_file() shows line numbers (critical for code analysis)",
1255
- "• write_file() handles escaping/quoting automatically (no heredoc hell)",
1256
- "• edit_file() validates changes before applying (safer than sed)",
1257
- "• glob_search() is faster and cleaner than find",
1258
- "• grep_search() returns structured data (easier to parse)",
1259
- "",
1260
- "⚠️ SHELL COMMANDS ONLY FOR:",
1261
- "• System operations (ps, df, du, uptime)",
1262
- "• Git commands (git status, git diff, git log)",
1263
- "• Package installs (pip install, Rscript -e \"install.packages(...)\")",
1264
- "• Running Python/R scripts (python script.py, Rscript analysis.R)",
1265
- ]
1266
- rules.extend(file_ops_rules)
1267
-
1268
- sections.append("CRITICAL RULES:\n" + "\n".join(rules))
1269
-
1270
- # CORRECTION EXAMPLES (adapt based on mode)
1271
- if analysis_mode == "qualitative":
1272
- examples = (
1273
- "EXAMPLE RESPONSES:\n"
1274
- "User: 'So participants felt happy about the change?'\n"
1275
- "You: '⚠️ Mixed. 3 participants expressed satisfaction: \"I welcomed the new policy\" (P2, line 45), "
1276
- "but 2 expressed concern: \"It felt rushed\" (P4, line 67). Theme: Ambivalence about pace.'\n\n"
1277
- "User: 'What's the main theme?'\n"
1278
- "You: 'THEME 1: Trust in leadership (8 mentions across 4 interviews)\n"
1279
- "\"I trust my manager to make the right call\" — Interview 2, Line 34\n"
1280
- "\"Leadership has been transparent\" — Interview 5, Line 89\n"
1281
- "[Context: Both quotes from questions about organizational changes]'"
1282
- )
1283
- else:
1284
- examples = (
1285
- "EXAMPLE 1: Be Patient, Don't Rush\n"
1286
- "User: 'Find papers on 2008, 2015, 2019'\n"
1287
- "❌ BAD: [Searches for year:2008 immediately] 'Found 50 papers from 2008...'\n"
1288
- "✅ GOOD: 'Are you looking for papers ABOUT events in those years (financial crises, policy changes), "
1289
- "or papers PUBLISHED in those years? Also, what topic? (Economics? Healthcare? Climate?)'\n\n"
1290
-
1291
- "EXAMPLE 2: Know Your Tools' Limits\n"
1292
- "User: 'What's Palantir's market share?'\n"
1293
- "❌ BAD: 'Palantir's latest revenue is $1B...' (Revenue ≠ Market Share! SEC doesn't have market share!)\n"
1294
- "✅ GOOD: 'Market share requires: (1) Palantir's revenue, (2) total market size. SEC has #1, not #2. "
1295
- "Which market? (Data analytics = ~$50B, Gov contracts = ~$200B). I can web search for total market size if you specify.'\n\n"
1296
-
1297
- "EXAMPLE 3: Conversational Flow\n"
1298
- "User: 'Compare Tesla and Ford'\n"
1299
- "❌ BAD: [Immediately fetches both revenues] 'Tesla: $81B, Ford: $158B'\n"
1300
- "✅ GOOD: 'Compare on what dimension? Revenue? (Ford larger). Market cap? (Tesla larger). EV sales? (Tesla dominates). "
1301
- "Production volume? (Ford higher). Each tells a different story. Which matters to you?'\n\n"
1302
-
1303
- "EXAMPLE CORRECTIONS:\n"
1304
- "User: 'So revenue went up 50%?'\n"
1305
- "You: '❌ No. According to 10-K page 23, revenue increased 15%, not 50%. "
1306
- "You may be thinking of gross margin (30%→45%, a 15pp increase).'\n\n"
1307
- "User: 'What will the stock price be?'\n"
1308
- "You: '⚠️ Cannot predict future prices. I can show: historical trends, current fundamentals, analyst data (if in filings).'"
1309
- )
1310
-
1311
- sections.append(examples)
1190
+ "- PROACTIVE FILE SEARCH:",
1191
+ "- If a user asks to find a file or directory and you are not sure where it is, use the `find` command with wildcards to search for it.",
1192
+ "- If a `cd` command fails, automatically run `ls -F` on the current or parent directory to understand the directory structure and find the correct path.",
1193
+ ])
1312
1194
 
1313
- if memory_context:
1314
- sections.append("CONTEXT:\n" + memory_context.strip())
1195
+ sections.append("\n".join(guidelines))
1315
1196
 
1316
- sections.append(
1317
- "REQUEST ANALYSIS: "
1318
- f"type={request_analysis.get('type')}, "
1319
- f"apis={apis}, "
1320
- f"confidence={request_analysis.get('confidence')}"
1321
- )
1322
-
1323
- # Add explicit instruction before API results
1324
- api_instructions = (
1325
- "🚨 CRITICAL: The following API RESULTS are REAL DATA from production APIs.\n"
1326
- "🚨 These are NOT examples or templates - they are ACTUAL results to use in your response.\n"
1327
- "🚨 DO NOT generate new/fake data - USE EXACTLY what is shown below.\n"
1328
- "🚨 If you see paper titles, authors, DOIs below - these are REAL papers you MUST cite.\n"
1329
- "🚨 If API results show empty/no papers, say 'No papers found' - DO NOT make up papers.\n"
1330
- )
1197
+ # Add memory context if available
1198
+ if memory_context:
1199
+ sections.append("\nRecent context:\n" + memory_context.strip())
1331
1200
 
1332
- sections.append(api_instructions + "\nAPI RESULTS:\n" + self._format_api_results_for_prompt(api_results))
1201
+ # Add API results if available
1202
+ api_results_text = self._format_api_results_for_prompt(api_results)
1203
+ if api_results_text.strip():
1204
+ sections.append("\nData available:\n" + api_results_text)
1333
1205
 
1334
1206
  return "\n\n".join(sections)
1335
1207
 
@@ -1498,8 +1370,24 @@ class EnhancedNocturnalAgent:
1498
1370
  if len(self.api_keys) <= 1:
1499
1371
  return
1500
1372
  self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
1501
- self.current_api_key = None
1502
- self.client = None
1373
+ new_key = self.api_keys[self.current_key_index]
1374
+ self.current_api_key = new_key
1375
+
1376
+ # Reinitialize client with new key
1377
+ try:
1378
+ if self.llm_provider == "cerebras":
1379
+ from openai import OpenAI
1380
+ self.client = OpenAI(
1381
+ api_key=new_key,
1382
+ base_url="https://api.cerebras.ai/v1"
1383
+ )
1384
+ else:
1385
+ from groq import Groq
1386
+ self.client = Groq(api_key=new_key)
1387
+ except Exception as e:
1388
+ # If initialization fails, set to None to fallback to backend
1389
+ self.client = None
1390
+ self.current_api_key = None
1503
1391
 
1504
1392
  def _is_rate_limit_error(self, error: Exception) -> bool:
1505
1393
  message = str(error).lower()
@@ -1517,12 +1405,15 @@ class EnhancedNocturnalAgent:
1517
1405
  if "fallback" not in tools:
1518
1406
  tools.append("fallback")
1519
1407
 
1520
- header = "⚠️ Temporary LLM downtime\n\n"
1408
+ # ========================================
1409
+ # PHASE 1 GRACEFUL FALLBACK
1410
+ # User-friendly messaging instead of technical errors
1411
+ # ========================================
1521
1412
 
1522
1413
  if self._is_simple_greeting(request.question):
1523
1414
  body = (
1524
- "Hi there! I'm currently at my Groq capacity, so I can't craft a full narrative response just yet. "
1525
- "You're welcome to try again in a little while, or I can still fetch finance and research data for you."
1415
+ "Hi there! I'm running into some temporary limits right now. "
1416
+ "Feel free to try again in a moment, or I can still help with specific data queries."
1526
1417
  )
1527
1418
  else:
1528
1419
  details: List[str] = []
@@ -1538,10 +1429,11 @@ class EnhancedNocturnalAgent:
1538
1429
  research = api_results.get("research")
1539
1430
  if research:
1540
1431
  payload_full = json.dumps(research, indent=2)
1541
- payload = payload_full[:1500]
1542
- if len(payload_full) > 1500:
1432
+ # Increase limit for literature review - need full abstracts (10000 chars for 5 papers)
1433
+ payload = payload_full[:10000]
1434
+ if len(payload_full) > 10000:
1543
1435
  payload += "\n…"
1544
-
1436
+
1545
1437
  # Check if results are empty and add explicit warning
1546
1438
  if research.get("results") == [] or not research.get("results"):
1547
1439
  details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
@@ -1550,6 +1442,7 @@ class EnhancedNocturnalAgent:
1550
1442
  details.append("🚨 **SAY 'NO PAPERS FOUND' AND STOP - DO NOT HALLUCINATE**")
1551
1443
  else:
1552
1444
  details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
1445
+ details.append("✅ **IMPORTANT: SUMMARIZE THESE PAPERS IN DETAIL - Include key findings, methods, and contributions from abstracts**")
1553
1446
 
1554
1447
  files_context = api_results.get("files_context")
1555
1448
  if files_context:
@@ -1560,23 +1453,17 @@ class EnhancedNocturnalAgent:
1560
1453
 
1561
1454
  if details:
1562
1455
  body = (
1563
- "I pulled the structured data you asked for, but I'm temporarily out of Groq quota to synthesize a full answer. "
1564
- "Here are the raw results so you can keep moving:"
1456
+ "I gathered the data you asked for, but I'm having trouble processing it fully right now. "
1457
+ "Here's what I found:"
1565
1458
  ) + "\n\n" + "\n\n".join(details)
1566
1459
  else:
1567
1460
  body = (
1568
- "I'm temporarily out of Groq quota, so I can't compose a full answer. "
1569
- "Please try again in a bit, or ask me to queue this work for later."
1461
+ "I'm running into some temporary limits. "
1462
+ "Please try again in a moment, and I should be able to help."
1570
1463
  )
1571
1464
 
1572
- footer = (
1573
- "\n\nNext steps:\n"
1574
- "• Wait for the Groq daily quota to reset (usually within 24 hours).\n"
1575
- "• Add another API key in your environment for automatic rotation.\n"
1576
- "• Keep the conversation open—I’ll resume normal replies once capacity returns."
1577
- )
1578
-
1579
- message = header + body + footer
1465
+ # Friendly closing without technical details
1466
+ message = body
1580
1467
 
1581
1468
  self.conversation_history.append({"role": "user", "content": request.question})
1582
1469
  self.conversation_history.append({"role": "assistant", "content": message})
@@ -1704,19 +1591,25 @@ class EnhancedNocturnalAgent:
1704
1591
  has_session = session_file.exists()
1705
1592
  use_local_keys_env = os.getenv("USE_LOCAL_KEYS", "").lower()
1706
1593
 
1707
- if has_session:
1708
- # Session exists Check if we have temp local key for speed
1709
- # If temp key exists and valid → use local mode (fast!)
1710
- # Otherwise use backend mode (secure but slow)
1711
- use_local_keys = hasattr(self, 'temp_api_key') and self.temp_api_key is not None
1712
- elif use_local_keys_env == "true":
1713
- # No session but dev mode requested use local keys
1594
+ # Priority order for key mode:
1595
+ # 1. USE_LOCAL_KEYS env var (explicit override)
1596
+ # 2. Temp API key from session (fast mode)
1597
+ # 3. Default to backend if session exists
1598
+
1599
+ if use_local_keys_env == "true":
1600
+ # Explicit local keys mode - always respect this
1714
1601
  use_local_keys = True
1715
1602
  elif use_local_keys_env == "false":
1716
1603
  # Explicit backend mode
1717
1604
  use_local_keys = False
1605
+ elif has_session and hasattr(self, 'temp_api_key') and self.temp_api_key:
1606
+ # Session exists with temp key → use local mode (fast!)
1607
+ use_local_keys = True
1608
+ elif has_session:
1609
+ # Session exists but no temp key → use backend mode
1610
+ use_local_keys = False
1718
1611
  else:
1719
- # Default: Always use backend (for monetization)
1612
+ # No session, no explicit setting → default to backend
1720
1613
  use_local_keys = False
1721
1614
 
1722
1615
  if not use_local_keys:
@@ -1892,6 +1785,14 @@ class EnhancedNocturnalAgent:
1892
1785
  )
1893
1786
 
1894
1787
  try:
1788
+ # Detect language preference from stored state
1789
+ language = getattr(self, 'language_preference', 'en')
1790
+
1791
+ # Build system instruction for language enforcement
1792
+ system_instruction = ""
1793
+ if language == 'zh-TW':
1794
+ system_instruction = "CRITICAL: You MUST respond entirely in Traditional Chinese (繁體中文). Use Chinese characters (漢字), NOT pinyin romanization. All explanations, descriptions, and responses must be in Chinese characters."
1795
+
1895
1796
  # Build request with API context as separate field
1896
1797
  payload = {
1897
1798
  "query": query, # Keep query clean
@@ -1899,7 +1800,9 @@ class EnhancedNocturnalAgent:
1899
1800
  "api_context": api_results, # Send API results separately
1900
1801
  "model": "openai/gpt-oss-120b", # PRODUCTION: 120B - best test results
1901
1802
  "temperature": 0.2, # Low temp for accuracy
1902
- "max_tokens": 4000
1803
+ "max_tokens": 4000,
1804
+ "language": language, # Pass language preference
1805
+ "system_instruction": system_instruction if system_instruction else None # Only include if set
1903
1806
  }
1904
1807
 
1905
1808
  # Call backend
@@ -1931,10 +1834,9 @@ class EnhancedNocturnalAgent:
1931
1834
  elif response.status == 503:
1932
1835
  # Backend AI service temporarily unavailable (Cerebras/Groq rate limited)
1933
1836
  # Auto-retry silently with exponential backoff
1934
-
1837
+
1935
1838
  print("\n💭 Thinking... (backend is busy, retrying automatically)")
1936
-
1937
- import asyncio
1839
+
1938
1840
  retry_delays = [5, 15, 30] # Exponential backoff
1939
1841
 
1940
1842
  for retry_num, delay in enumerate(retry_delays):
@@ -2440,11 +2342,64 @@ class EnhancedNocturnalAgent:
2440
2342
  break
2441
2343
 
2442
2344
  output = '\n'.join(output_lines).strip()
2345
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
2346
+
2347
+ # Log execution details in debug mode
2348
+ if debug_mode:
2349
+ output_preview = output[:200] if output else "(no output)"
2350
+ print(f"✅ Command executed: {command}")
2351
+ print(f"📤 Output ({len(output)} chars): {output_preview}...")
2352
+
2443
2353
  return output if output else "Command executed (no output)"
2444
2354
 
2445
2355
  except Exception as e:
2356
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
2357
+ if debug_mode:
2358
+ print(f"❌ Command failed: {command}")
2359
+ print(f"❌ Error: {e}")
2446
2360
  return f"ERROR: {e}"
2447
2361
 
2362
+ def _format_shell_output(self, output: str, command: str) -> Dict[str, Any]:
2363
+ """
2364
+ Format shell command output for display.
2365
+ Returns dictionary with formatted preview and full output.
2366
+ """
2367
+ lines = output.split('\n') if output else []
2368
+
2369
+ # Detect output type based on command
2370
+ command_lower = command.lower()
2371
+
2372
+ formatted = {
2373
+ "type": "shell_output",
2374
+ "command": command,
2375
+ "line_count": len(lines),
2376
+ "byte_count": len(output),
2377
+ "preview": '\n'.join(lines[:10]) if lines else "(no output)",
2378
+ "full_output": output
2379
+ }
2380
+
2381
+ # Enhanced formatting based on command type
2382
+ if any(cmd in command_lower for cmd in ['ls', 'dir']):
2383
+ formatted["type"] = "directory_listing"
2384
+ formatted["preview"] = f"📁 Found {len([l for l in lines if l.strip()])} items"
2385
+ elif any(cmd in command_lower for cmd in ['find', 'locate', 'search']):
2386
+ formatted["type"] = "search_results"
2387
+ formatted["preview"] = f"🔍 Found {len([l for l in lines if l.strip()])} matches"
2388
+ elif any(cmd in command_lower for cmd in ['grep', 'match']):
2389
+ formatted["type"] = "search_results"
2390
+ formatted["preview"] = f"🔍 Found {len([l for l in lines if l.strip()])} matching lines"
2391
+ elif any(cmd in command_lower for cmd in ['cat', 'head', 'tail']):
2392
+ formatted["type"] = "file_content"
2393
+ formatted["preview"] = f"📄 {len(lines)} lines of content"
2394
+ elif any(cmd in command_lower for cmd in ['pwd', 'cd']):
2395
+ formatted["type"] = "directory_change"
2396
+ formatted["preview"] = f"📍 {output.strip()}"
2397
+ elif any(cmd in command_lower for cmd in ['mkdir', 'touch', 'create']):
2398
+ formatted["type"] = "file_creation"
2399
+ formatted["preview"] = f"✨ Created: {output.strip()}"
2400
+
2401
+ return formatted
2402
+
2448
2403
  # ========================================================================
2449
2404
  # DIRECT FILE OPERATIONS (Claude Code / Cursor Parity)
2450
2405
  # ========================================================================
@@ -3395,8 +3350,11 @@ class EnhancedNocturnalAgent:
3395
3350
  'what files', 'which files', 'how many files',
3396
3351
  'grep', 'search', 'look for', 'count',
3397
3352
  '.py', '.txt', '.js', '.java', '.cpp', '.c', '.h',
3398
- 'function', 'class', 'definition', 'route', 'endpoint',
3399
- 'codebase', 'project structure', 'source code'
3353
+ 'function', 'method', 'class', 'definition', 'route', 'endpoint',
3354
+ 'codebase', 'project structure', 'source code', 'implementation',
3355
+ 'compare', 'analyze', 'explain', 'purpose', 'what does', 'how does',
3356
+ 'this codebase', 'this repo', 'this repository', 'this project',
3357
+ 'our codebase', 'our repo', 'local code', 'local files'
3400
3358
  ]
3401
3359
 
3402
3360
  question_lower = question.lower()
@@ -3466,12 +3424,17 @@ class EnhancedNocturnalAgent:
3466
3424
  matched_types.append("financial")
3467
3425
  apis_to_use.append("finsight")
3468
3426
 
3469
- if any(keyword in question_lower for keyword in research_keywords):
3427
+ # Check for explicit local/codebase indicators FIRST (highest priority)
3428
+ local_indicators = ['this codebase', 'this repo', 'this repository', 'this project',
3429
+ 'our codebase', 'our repo', 'local code', 'local files']
3430
+ is_local_query = any(indicator in question_lower for indicator in local_indicators)
3431
+
3432
+ if any(keyword in question_lower for keyword in research_keywords) and not is_local_query:
3470
3433
  matched_types.append("research")
3471
3434
  apis_to_use.append("archive")
3472
-
3435
+
3473
3436
  # Qualitative queries often involve research
3474
- if analysis_mode in ("qualitative", "mixed") and "research" not in matched_types:
3437
+ if analysis_mode in ("qualitative", "mixed") and "research" not in matched_types and not is_local_query:
3475
3438
  matched_types.append("research")
3476
3439
  if "archive" not in apis_to_use:
3477
3440
  apis_to_use.append("archive")
@@ -3555,10 +3518,59 @@ class EnhancedNocturnalAgent:
3555
3518
  if workflow_response:
3556
3519
  return workflow_response
3557
3520
 
3521
+ # Detect and store language preference from user input
3522
+ self._detect_language_preference(request.question)
3523
+
3558
3524
  # Initialize
3559
3525
  api_results = {}
3560
3526
  tools_used = []
3561
3527
  debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
3528
+
3529
+ if self._is_generic_test_prompt(request.question):
3530
+ return self._quick_reply(
3531
+ request,
3532
+ "Looks like you're just testing. Let me know what you'd like me to dig into and I'll jump on it.",
3533
+ tools_used=["quick_reply"],
3534
+ confidence=0.4,
3535
+ )
3536
+
3537
+ if self._is_location_query(request.question):
3538
+ cwd_line = ""
3539
+ tools: List[str] = []
3540
+
3541
+ if self.shell_session:
3542
+ pwd_output = self.execute_command("pwd")
3543
+ if pwd_output and not pwd_output.startswith("ERROR"):
3544
+ cwd_line = pwd_output.strip().splitlines()[-1]
3545
+ tools.append("shell_execution")
3546
+
3547
+ if not cwd_line:
3548
+ try:
3549
+ cwd_line = os.getcwd()
3550
+ except Exception:
3551
+ cwd_line = ""
3552
+
3553
+ if cwd_line:
3554
+ self.file_context["current_cwd"] = cwd_line
3555
+ self.file_context["last_directory"] = cwd_line
3556
+ message = (
3557
+ f"We're in {cwd_line}."
3558
+ if "shell_execution" not in tools
3559
+ else f"We're in {cwd_line} (via `pwd`)."
3560
+ )
3561
+ return self._quick_reply(
3562
+ request,
3563
+ message,
3564
+ tools_used=tools or ["quick_reply"],
3565
+ confidence=0.85,
3566
+ )
3567
+ else:
3568
+ return self._quick_reply(
3569
+ request,
3570
+ "I couldn't determine the working directory just now, but you can run `pwd` to double-check.",
3571
+ tools_used=tools or ["quick_reply"],
3572
+ confidence=0.3,
3573
+ )
3562
3574
 
3563
3575
  # ========================================================================
3564
3576
  # PRIORITY 1: SHELL PLANNING (Reasoning Layer - Runs FIRST for ALL modes)
@@ -3575,7 +3587,9 @@ class EnhancedNocturnalAgent:
3575
3587
  'directory', 'folder', 'where', 'find', 'list', 'files', 'file', 'look', 'search', 'check', 'into',
3576
3588
  'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb',
3577
3589
  'create', 'make', 'mkdir', 'touch', 'new', 'write', 'copy', 'move', 'delete', 'remove',
3578
- 'git', 'grep', 'navigate', 'go to', 'change to'
3590
+ 'git', 'grep', 'navigate', 'go to', 'change to',
3591
+ 'method', 'function', 'class', 'implementation', 'what does', 'how does', 'explain',
3592
+ 'how many', 'count', 'lines', 'wc -l', 'number of'
3579
3593
  ])
3580
3594
 
3581
3595
  if might_need_shell and self.shell_session:
@@ -3620,6 +3634,11 @@ IMPORTANT RULES:
3620
3634
  11. 🚨 MULTI-STEP QUERIES: For queries like "read X and do Y", ONLY generate the FIRST step (reading X). The LLM will handle subsequent steps after seeing the file contents.
3621
3635
  12. 🚨 NEVER use python -m py_compile or other code execution for finding bugs - just read the file with cat/head
3622
3636
  13. 🚨 FOR GREP: When searching in a DIRECTORY (not a specific file), ALWAYS use -r flag for recursive search: grep -rn 'pattern' /path/to/dir 2>/dev/null
3637
+ 14. 🚨 FOR FINDING FUNCTIONS/METHODS when file path is UNKNOWN: Use find + grep together:
3638
+ - "what does X method do in file.py?" → find . -name 'file.py' -exec grep -A 50 'def X' {{}} \\; 2>/dev/null
3639
+ - "explain process_request in agent.py" → find . -name '*agent.py' -exec grep -A 80 'def process_request' {{}} \\; 2>/dev/null
3640
+ - If you know exact path, use grep directly: grep -A 50 'def X' path/to/file.py 2>/dev/null
3641
+ 15. 🚨 FOR COMPARING FILES: Read FIRST file only. The LLM will request the second file after analyzing the first.
3623
3642
 
3624
3643
  Examples:
3625
3644
  "where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
@@ -3638,6 +3657,10 @@ Examples:
3638
3657
  "find all bugs in code" → {{"action": "execute", "command": "grep -rn 'BUG:' . 2>/dev/null", "reason": "Search for bug markers in code", "updates_context": false}}
3639
3658
  "read analyze.py and find bugs" → {{"action": "execute", "command": "head -200 analyze.py", "reason": "Read file to analyze bugs", "updates_context": false}}
3640
3659
  "show me calc.py completely" → {{"action": "execute", "command": "cat calc.py", "reason": "Display entire file", "updates_context": false}}
3660
+ "what does process_request method do in enhanced_ai_agent.py" → {{"action": "execute", "command": "find . -name '*enhanced_ai_agent.py' -exec grep -A 80 'def process_request' {{}} \\; 2>/dev/null", "reason": "Find file and show method definition with context", "updates_context": false}}
3661
+ "explain the initialize method in agent.py" → {{"action": "execute", "command": "find . -name '*agent.py' -exec grep -A 50 'def initialize' {{}} \\; 2>/dev/null", "reason": "Find file and show method", "updates_context": false}}
3662
+ "find calculate function in utils.py" → {{"action": "execute", "command": "find . -name 'utils.py' -exec grep -A 30 'def calculate' {{}} \\; 2>/dev/null", "reason": "Find file and show function", "updates_context": false}}
3663
+ "compare file1.py and file2.py" → {{"action": "execute", "command": "head -100 file1.py", "reason": "Read first file (will read second in next step)", "updates_context": true}}
3641
3664
  "git status" → {{"action": "execute", "command": "git status", "reason": "Check repository status", "updates_context": false}}
3642
3665
  "what's in that file?" + last_file=data.csv → {{"action": "execute", "command": "head -100 data.csv", "reason": "Show file contents", "updates_context": false}}
3643
3666
  "hello" → {{"action": "none", "reason": "Conversational greeting, no command needed"}}
@@ -3682,7 +3705,9 @@ JSON:"""
3682
3705
  reason = plan.get("reason", "")
3683
3706
  updates_context = plan.get("updates_context", False)
3684
3707
 
3685
- if debug_mode:
3708
+ # Only show planning details with explicit verbose flag (don't leak to users)
3709
+ verbose_planning = debug_mode and os.getenv("NOCTURNAL_VERBOSE_PLANNING", "").lower() == "1"
3710
+ if verbose_planning:
3686
3711
  print(f"🔍 SHELL PLAN: {plan}")
3687
3712
 
3688
3713
  # GENERIC COMMAND EXECUTION - No more hardcoded actions!
@@ -3690,13 +3715,13 @@ JSON:"""
3690
3715
  command = self._infer_shell_command(request.question)
3691
3716
  shell_action = "execute"
3692
3717
  updates_context = False
3693
- if debug_mode:
3718
+ if verbose_planning:
3694
3719
  print(f"🔄 Planner opted out; inferred fallback command: {command}")
3695
3720
 
3696
3721
  if shell_action == "execute" and not command:
3697
3722
  command = self._infer_shell_command(request.question)
3698
3723
  plan["command"] = command
3699
- if debug_mode:
3724
+ if verbose_planning:
3700
3725
  print(f"🔄 Planner omitted command, inferred {command}")
3701
3726
 
3702
3727
  if shell_action == "execute" and command:
@@ -3712,10 +3737,15 @@ JSON:"""
3712
3737
  print(f"🔍 Command: {command}")
3713
3738
  print(f"🔍 Safety: {safety_level}")
3714
3739
 
3715
- if safety_level == 'BLOCKED':
3740
+ if safety_level in ('BLOCKED', 'DANGEROUS'):
3741
+ reason = (
3742
+ "Command classified as destructive; requires manual confirmation"
3743
+ if safety_level == 'DANGEROUS'
3744
+ else "This command could cause system damage"
3745
+ )
3716
3746
  api_results["shell_info"] = {
3717
3747
  "error": f"Command blocked for safety: {command}",
3718
- "reason": "This command could cause system damage"
3748
+ "reason": reason
3719
3749
  }
3720
3750
  else:
3721
3751
  # ========================================
@@ -3768,7 +3798,8 @@ JSON:"""
3768
3798
  pass # Fall back to shell execution
3769
3799
 
3770
3800
  # Check for file search commands (find)
3771
- if not intercepted and 'find' in command and '-name' in command:
3801
+ # BUT: Don't intercept find -exec commands (those need real shell execution)
3802
+ if not intercepted and 'find' in command and '-name' in command and '-exec' not in command:
3772
3803
  try:
3773
3804
  # import re removed - using module-level import
3774
3805
  # Extract pattern: find ... -name '*pattern*'
@@ -3947,10 +3978,12 @@ JSON:"""
3947
3978
  output = self.execute_command(command)
3948
3979
 
3949
3980
  if not output.startswith("ERROR"):
3950
- # Success - store results
3981
+ # Success - store results with formatted preview
3982
+ formatted_output = self._format_shell_output(output, command)
3951
3983
  api_results["shell_info"] = {
3952
3984
  "command": command,
3953
3985
  "output": output,
3986
+ "formatted": formatted_output, # Add formatted version
3954
3987
  "reason": reason,
3955
3988
  "safety_level": safety_level
3956
3989
  }
@@ -4145,16 +4178,14 @@ JSON:"""
4145
4178
  if not is_vague:
4146
4179
  # Archive API for research
4147
4180
  if "archive" in request_analysis.get("apis", []):
4148
- result = await self.search_academic_papers(request.question, 3) # Reduced from 5 to save tokens
4181
+ result = await self.search_academic_papers(request.question, 5) # Get 5 papers for comprehensive review
4149
4182
  if "error" not in result:
4150
- # Strip abstracts to save tokens - only keep essential fields
4183
+ # KEEP abstracts for literature review - essential for paper understanding
4184
+ # Only remove full_text to save tokens
4151
4185
  if "results" in result:
4152
4186
  for paper in result["results"]:
4153
- # Remove heavy fields
4154
- paper.pop("abstract", None)
4155
- paper.pop("tldr", None)
4156
- paper.pop("full_text", None)
4157
- # Keep only: title, authors, year, doi, url
4187
+ paper.pop("full_text", None) # Remove only full text, keep abstract & tldr
4188
+ # Keep: title, authors, year, doi, url, abstract, tldr
4158
4189
  api_results["research"] = result
4159
4190
  tools_used.append("archive_api")
4160
4191
 
@@ -4316,6 +4347,40 @@ JSON:"""
4316
4347
  api_results=api_results,
4317
4348
  tools_used=tools_used
4318
4349
  )
4350
+
4351
+ # VALIDATION: Ensure we got a valid response (not planning JSON)
4352
+ if not response or not hasattr(response, 'response'):
4353
+ # Backend failed - create friendly error with available data
4354
+ if debug_mode:
4355
+ print(f"⚠️ Backend response invalid or missing")
4356
+ return ChatResponse(
4357
+ response="I ran into a technical issue processing that. Let me try to help with what I found:",
4358
+ error_message="Backend response invalid",
4359
+ tools_used=tools_used,
4360
+ api_results=api_results
4361
+ )
4362
+
4363
+ # Check if response contains planning JSON instead of final answer
4364
+ response_text = response.response.strip()
4365
+ if response_text.startswith('{') and '"action"' in response_text and '"command"' in response_text:
4366
+ # This is planning JSON, not a final response!
4367
+ if debug_mode:
4368
+ print(f"⚠️ Backend returned planning JSON instead of final response")
4369
+
4370
+ # Extract real output from api_results and generate friendly response
4371
+ shell_output = api_results.get('shell_info', {}).get('output', '')
4372
+ if shell_output:
4373
+ return ChatResponse(
4374
+ response=f"I found what you were looking for:\n\n{shell_output}",
4375
+ tools_used=tools_used,
4376
+ api_results=api_results
4377
+ )
4378
+ else:
4379
+ return ChatResponse(
4380
+ response=f"I completed the action: {api_results.get('shell_info', {}).get('command', '')}",
4381
+ tools_used=tools_used,
4382
+ api_results=api_results
4383
+ )
4319
4384
 
4320
4385
  # POST-PROCESSING: Auto-extract code blocks and write files if user requested file creation
4321
4386
  # This fixes the issue where LLM shows corrected code but doesn't create the file
@@ -4459,6 +4524,16 @@ JSON:"""
4459
4524
  mentioned = _extract_filenames(request.question)
4460
4525
  file_previews: List[Dict[str, Any]] = []
4461
4526
  files_forbidden: List[str] = []
4527
+
4528
+ # Check if query is asking about specific functions/methods/classes OR file metadata
4529
+ # If so, SKIP auto-preview and let shell planning handle it
4530
+ query_lower = request.question.lower()
4531
+ asking_about_code_element = any(pattern in query_lower for pattern in [
4532
+ 'method', 'function', 'class', 'def ', 'what does', 'how does',
4533
+ 'explain the', 'find the', 'show me the', 'purpose of', 'implementation of',
4534
+ 'how many lines', 'count lines', 'number of lines', 'wc -l', 'line count'
4535
+ ])
4536
+
4462
4537
  base_dir = Path.cwd().resolve()
4463
4538
  sensitive_roots = {Path('/etc'), Path('/proc'), Path('/sys'), Path('/dev'), Path('/root'), Path('/usr'), Path('/bin'), Path('/sbin'), Path('/var')}
4464
4539
  def _is_safe_path(path_str: str) -> bool:
@@ -4469,31 +4544,47 @@ JSON:"""
4469
4544
  return str(rp).startswith(str(base_dir))
4470
4545
  except Exception:
4471
4546
  return False
4472
- for m in mentioned:
4473
- if not _is_safe_path(m):
4474
- files_forbidden.append(m)
4475
- continue
4476
- pr = await self._preview_file(m)
4477
- if pr:
4478
- file_previews.append(pr)
4547
+
4548
+ # Only auto-preview if NOT asking about specific code elements
4549
+ if not asking_about_code_element:
4550
+ for m in mentioned:
4551
+ if not _is_safe_path(m):
4552
+ files_forbidden.append(m)
4553
+ continue
4554
+ pr = await self._preview_file(m)
4555
+ # Only add successful previews (not errors)
4556
+ if pr and pr.get("type") != "error":
4557
+ file_previews.append(pr)
4558
+ else:
4559
+ # Query is about specific code elements - let shell planning handle with grep
4560
+ files_forbidden = [m for m in mentioned if not _is_safe_path(m)]
4479
4561
  if file_previews:
4480
4562
  api_results["files"] = file_previews
4481
- # Build grounded context from first text preview
4563
+ tools_used.append("read_file") # Track that files were read
4564
+ # Build grounded context from ALL text previews (for comparisons)
4482
4565
  text_previews = [fp for fp in file_previews if fp.get("type") == "text" and fp.get("preview")]
4483
4566
  files_context = ""
4484
4567
  if text_previews:
4485
- fp = text_previews[0]
4486
- quoted = "\n".join(fp["preview"].splitlines()[:20])
4487
- files_context = f"File: {fp['path']} (first lines)\n" + quoted
4568
+ # Detect comparison queries - include MORE context
4569
+ is_comparison = len(text_previews) > 1 or any(word in request.question.lower() for word in ['compare', 'difference', 'contrast', 'vs', 'versus'])
4570
+ line_limit = 200 if is_comparison else 100 # More lines for comparisons
4571
+
4572
+ # Include all files with appropriate context
4573
+ file_contexts = []
4574
+ for fp in text_previews:
4575
+ quoted = "\n".join(fp["preview"].splitlines()[:line_limit])
4576
+ file_contexts.append(f"File: {fp['path']}\n{quoted}")
4577
+ files_context = "\n\n---\n\n".join(file_contexts)
4488
4578
  api_results["files_context"] = files_context
4489
- elif mentioned:
4490
- # Mentioned files but none found
4579
+ elif mentioned and not asking_about_code_element:
4580
+ # Mentioned files but none found (only set if we actually tried to preview them)
4491
4581
  api_results["files_missing"] = mentioned
4492
4582
  if files_forbidden:
4493
4583
  api_results["files_forbidden"] = files_forbidden
4494
4584
 
4495
4585
  workspace_listing: Optional[Dict[str, Any]] = None
4496
- if not file_previews:
4586
+ # Only show workspace listing if NOT looking for specific missing files
4587
+ if not file_previews and not api_results.get("files_missing"):
4497
4588
  file_browse_keywords = (
4498
4589
  "list files",
4499
4590
  "show files",
@@ -4513,7 +4604,8 @@ JSON:"""
4513
4604
  workspace_listing = await self._get_workspace_listing()
4514
4605
  api_results["workspace_listing"] = workspace_listing
4515
4606
 
4516
- if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"}:
4607
+ # Don't show workspace listing if there are missing files (prioritize error)
4608
+ if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"} and not api_results.get("files_missing"):
4517
4609
  return self._respond_with_workspace_listing(request, workspace_listing)
4518
4610
 
4519
4611
  if "finsight" in request_analysis["apis"]:
@@ -4564,10 +4656,64 @@ JSON:"""
4564
4656
  messages = [
4565
4657
  {"role": "system", "content": system_prompt}
4566
4658
  ]
4659
+
4660
+ # CRITICAL: Inject research papers IMMEDIATELY after system prompt (highest priority)
4661
+ research_data = api_results.get("research")
4662
+ if research_data and research_data.get("results"):
4663
+ papers_text = "🚨 PAPERS ALREADY FOUND - SYNTHESIZE THESE NOW:\n\n"
4664
+ papers_text += "DO NOT say 'we will search' - the search is COMPLETE.\n"
4665
+ papers_text += "DO NOT say 'attempting' - papers are ALREADY HERE.\n"
4666
+ papers_text += "YOUR JOB: Synthesize these papers into a comprehensive literature review (500+ words).\n\n"
4667
+
4668
+ for i, paper in enumerate(research_data["results"][:5], 1):
4669
+ papers_text += f"\n═══ PAPER {i} ═══\n"
4670
+ papers_text += f"Title: {paper.get('title', 'No title')}\n"
4671
+ # Handle authors as either list of dicts or list of strings
4672
+ authors = paper.get('authors', [])
4673
+ if authors:
4674
+ if isinstance(authors[0], dict):
4675
+ author_names = [a.get('name', 'Unknown') for a in authors[:3]]
4676
+ else:
4677
+ author_names = authors[:3]
4678
+ papers_text += f"Authors: {', '.join(author_names)}\n"
4679
+ papers_text += f"Year: {paper.get('year', 'N/A')}\n"
4680
+ if paper.get('abstract'):
4681
+ papers_text += f"\nAbstract:\n{paper['abstract']}\n"
4682
+ if paper.get('tldr'):
4683
+ papers_text += f"\nTL;DR: {paper['tldr']}\n"
4684
+ papers_text += "\n"
4685
+
4686
+ papers_text += "\n🚨 SYNTHESIZE THESE PAPERS NOW - Include:\n"
4687
+ papers_text += "- Overview of the research area\n"
4688
+ papers_text += "- Key findings from each paper's abstract\n"
4689
+ papers_text += "- Methods and approaches used\n"
4690
+ papers_text += "- Comparison and contrast of different approaches\n"
4691
+ papers_text += "- Implications and future directions\n"
4692
+ papers_text += "\nMINIMUM 500 WORDS. Use the abstracts above."
4693
+
4694
+ messages.append({"role": "system", "content": papers_text})
4695
+
4567
4696
  # If we have file context, inject it as an additional grounding message
4568
4697
  fc = api_results.get("files_context")
4569
4698
  if fc:
4570
- messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}\n\nAnswer based strictly on this content when relevant. Do not run shell commands."})
4699
+ # Count how many files are being compared
4700
+ file_count = len([fp for fp in api_results.get("files", []) if fp.get("type") == "text"])
4701
+
4702
+ if file_count > 1:
4703
+ # Multi-file comparison - make it VERY explicit
4704
+ comparison_msg = "🚨 MULTIPLE FILES PROVIDED FOR COMPARISON:\n\n"
4705
+ comparison_msg += fc
4706
+ comparison_msg += "\n\n🚨 CRITICAL INSTRUCTIONS FOR COMPARISON:\n"
4707
+ comparison_msg += "1. Read ALL file contents above carefully\n"
4708
+ comparison_msg += "2. Extract specific data points, numbers, percentages from EACH file\n"
4709
+ comparison_msg += "3. Compare and contrast the ACTUAL content (not just filenames)\n"
4710
+ comparison_msg += "4. If asked about differences, cite EXACT lines or values from BOTH files\n"
4711
+ comparison_msg += "5. Do NOT make general statements - be specific with examples from the files\n"
4712
+ comparison_msg += "\nAnswer based STRICTLY on the file contents above. Do not run shell commands."
4713
+ messages.append({"role": "system", "content": comparison_msg})
4714
+ else:
4715
+ # Single file - normal handling
4716
+ messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}\n\nAnswer based strictly on this content when relevant. Do not run shell commands."})
4571
4717
  missing = api_results.get("files_missing")
4572
4718
  if missing:
4573
4719
  messages.append({"role": "system", "content": f"User mentioned file(s) not found: {missing}. Respond explicitly that the file was not found and avoid speculation."})
@@ -4790,6 +4936,92 @@ JSON:"""
4790
4936
  final_response = "I searched but found no matches. The search returned no results."
4791
4937
  logger.warning("🚨 Hallucination prevented: LLM tried to make up results when shell output was empty")
4792
4938
 
4939
+ # ========================================
4940
+ # PHASE 2: THINKING BLOCKS
4941
+ # Show reasoning process for complex queries
4942
+ # ========================================
4943
+ thinking_text = ""
4944
+ try:
4945
+ thinking_context = {
4946
+ 'tools_used': tools_used,
4947
+ 'api_results': api_results,
4948
+ 'conversation_history': self.conversation_history[-3:] if self.conversation_history else []
4949
+ }
4950
+
4951
+ thinking_text = await generate_and_format_thinking(
4952
+ request.question,
4953
+ thinking_context,
4954
+ show_full=False # Compact version
4955
+ )
4956
+
4957
+ if thinking_text:
4958
+ logger.info(f"💭 Generated thinking process for query")
4959
+
4960
+ except Exception as e:
4961
+ logger.error(f"Thinking generation failed: {e}")
4962
+
4963
+ # ========================================
4964
+ # PHASE 1 QUALITY PIPELINE
4965
+ # Process response through quality improvements
4966
+ # ========================================
4967
+ try:
4968
+ pipeline_context = {
4969
+ 'tools_used': tools_used,
4970
+ 'api_results': api_results,
4971
+ 'query_type': request_analysis.get('type'),
4972
+ 'shell_output_type': 'generic'
4973
+ }
4974
+
4975
+ processed = await ResponsePipeline.process(
4976
+ final_response,
4977
+ request.question,
4978
+ pipeline_context,
4979
+ response_type="generic"
4980
+ )
4981
+
4982
+ final_response = processed.final_response
4983
+
4984
+ # Log quality improvements
4985
+ if processed.improvements_applied:
4986
+ logger.info(f"✨ Quality improvements: {', '.join(processed.improvements_applied)}")
4987
+ logger.info(f"📊 Quality score: {processed.quality_score:.2f}")
4988
+
4989
+ except Exception as e:
4990
+ # If pipeline fails, log but continue with original response
4991
+ logger.error(f"Quality pipeline failed: {e}, using original response")
4992
+
4993
+ # ========================================
4994
+ # PHASE 2: CONFIDENCE CALIBRATION
4995
+ # Assess confidence and add caveats if needed
4996
+ # ========================================
4997
+ try:
4998
+ confidence_context = {
4999
+ 'tools_used': tools_used,
5000
+ 'api_results': api_results,
5001
+ 'query_type': request_analysis.get('type')
5002
+ }
5003
+
5004
+ final_response, confidence_assessment = assess_and_apply_caveat(
5005
+ final_response,
5006
+ request.question,
5007
+ confidence_context
5008
+ )
5009
+
5010
+ logger.info(
5011
+ f"🎯 Confidence: {confidence_assessment.confidence_level} "
5012
+ f"({confidence_assessment.confidence_score:.2f})"
5013
+ )
5014
+
5015
+ if confidence_assessment.should_add_caveat:
5016
+ logger.info(f"⚠️ Added caveat due to low confidence")
5017
+
5018
+ except Exception as e:
5019
+ logger.error(f"Confidence calibration failed: {e}")
5020
+
5021
+ # Prepend thinking blocks if generated
5022
+ if thinking_text:
5023
+ final_response = thinking_text + "\n\n" + final_response
5024
+
4793
5025
  expected_tools: Set[str] = set()
4794
5026
  if "finsight" in request_analysis.get("apis", []):
4795
5027
  expected_tools.add("finsight_api")
@@ -4825,20 +5057,25 @@ JSON:"""
4825
5057
 
4826
5058
  except Exception as e:
4827
5059
  import traceback
4828
- details = str(e)
4829
5060
  debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
4830
5061
  if debug_mode:
4831
5062
  print("🔴 FULL TRACEBACK:")
4832
5063
  traceback.print_exc()
4833
- message = (
4834
- "⚠️ Something went wrong while orchestrating your request, but no actions were performed. "
4835
- "Please retry, and if the issue persists share this detail with the team: {details}."
4836
- ).format(details=details)
5064
+
5065
+ # ========================================
5066
+ # PHASE 1 GRACEFUL ERROR HANDLING
5067
+ # Never expose technical details to users
5068
+ # ========================================
5069
+ user_friendly_message = GracefulErrorHandler.create_fallback_response(
5070
+ request.question,
5071
+ e
5072
+ )
5073
+
4837
5074
  return ChatResponse(
4838
- response=message,
5075
+ response=user_friendly_message,
4839
5076
  timestamp=datetime.now().isoformat(),
4840
5077
  confidence_score=0.0,
4841
- error_message=details
5078
+ error_message=str(e) if debug_mode else None # Only include technical error in debug mode
4842
5079
  )
4843
5080
 
4844
5081
  async def process_request_streaming(self, request: ChatRequest):
@@ -4921,9 +5158,19 @@ JSON:"""
4921
5158
  mentioned = _extract_filenames(request.question)
4922
5159
  file_previews: List[Dict[str, Any]] = []
4923
5160
  files_forbidden: List[str] = []
5161
+
5162
+ # Check if query is asking about specific functions/methods/classes OR file metadata
5163
+ # If so, SKIP auto-preview and let shell planning handle it
5164
+ query_lower = request.question.lower()
5165
+ asking_about_code_element = any(pattern in query_lower for pattern in [
5166
+ 'method', 'function', 'class', 'def ', 'what does', 'how does',
5167
+ 'explain the', 'find the', 'show me the', 'purpose of', 'implementation of',
5168
+ 'how many lines', 'count lines', 'number of lines', 'wc -l', 'line count'
5169
+ ])
5170
+
4924
5171
  base_dir = Path.cwd().resolve()
4925
5172
  sensitive_roots = {Path('/etc'), Path('/proc'), Path('/sys'), Path('/dev'), Path('/root'), Path('/usr'), Path('/bin'), Path('/sbin'), Path('/var')}
4926
-
5173
+
4927
5174
  def _is_safe_path(path_str: str) -> bool:
4928
5175
  try:
4929
5176
  rp = Path(path_str).resolve()
@@ -4932,39 +5179,57 @@ JSON:"""
4932
5179
  return str(rp).startswith(str(base_dir))
4933
5180
  except Exception:
4934
5181
  return False
4935
-
4936
- for m in mentioned:
4937
- if not _is_safe_path(m):
4938
- files_forbidden.append(m)
4939
- continue
4940
- pr = await self._preview_file(m)
4941
- if pr:
4942
- file_previews.append(pr)
4943
-
5182
+
5183
+ # Only auto-preview if NOT asking about specific code elements or metadata
5184
+ if not asking_about_code_element:
5185
+ for m in mentioned:
5186
+ if not _is_safe_path(m):
5187
+ files_forbidden.append(m)
5188
+ continue
5189
+ pr = await self._preview_file(m)
5190
+ # Only add successful previews (not errors)
5191
+ if pr and pr.get("type") != "error":
5192
+ file_previews.append(pr)
5193
+ else:
5194
+ # Query is about specific code elements - let shell planning handle with grep/wc
5195
+ files_forbidden = [m for m in mentioned if not _is_safe_path(m)]
5196
+
4944
5197
  if file_previews:
4945
5198
  api_results["files"] = file_previews
5199
+ tools_used.append("read_file") # Track that files were read
5200
+ # Build grounded context from ALL text previews (for comparisons)
4946
5201
  text_previews = [fp for fp in file_previews if fp.get("type") == "text" and fp.get("preview")]
4947
5202
  files_context = ""
4948
5203
  if text_previews:
4949
- fp = text_previews[0]
4950
- quoted = "\n".join(fp["preview"].splitlines()[:20])
4951
- files_context = f"File: {fp['path']} (first lines)\n" + quoted
5204
+ # Detect comparison queries - include MORE context
5205
+ is_comparison = len(text_previews) > 1 or any(word in request.question.lower() for word in ['compare', 'difference', 'contrast', 'vs', 'versus'])
5206
+ line_limit = 200 if is_comparison else 100 # More lines for comparisons
5207
+
5208
+ # Include all files with appropriate context
5209
+ file_contexts = []
5210
+ for fp in text_previews:
5211
+ quoted = "\n".join(fp["preview"].splitlines()[:line_limit])
5212
+ file_contexts.append(f"File: {fp['path']}\n{quoted}")
5213
+ files_context = "\n\n---\n\n".join(file_contexts)
4952
5214
  api_results["files_context"] = files_context
4953
- elif mentioned:
5215
+ elif mentioned and not asking_about_code_element:
5216
+ # Mentioned files but none found (only set if we actually tried to preview them)
4954
5217
  api_results["files_missing"] = mentioned
4955
5218
  if files_forbidden:
4956
5219
  api_results["files_forbidden"] = files_forbidden
4957
5220
 
4958
5221
  # Workspace listing
4959
5222
  workspace_listing: Optional[Dict[str, Any]] = None
4960
- if not file_previews:
5223
+ # Only show workspace listing if NOT looking for specific missing files
5224
+ if not file_previews and not api_results.get("files_missing"):
4961
5225
  file_browse_keywords = ("list files", "show files", "what files")
4962
5226
  describe_files = ("file" in question_lower or "directory" in question_lower)
4963
5227
  if any(keyword in question_lower for keyword in file_browse_keywords) or describe_files:
4964
5228
  workspace_listing = await self._get_workspace_listing()
4965
5229
  api_results["workspace_listing"] = workspace_listing
4966
5230
 
4967
- if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"}:
5231
+ # Don't show workspace listing if there are missing files (prioritize error)
5232
+ if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"} and not api_results.get("files_missing"):
4968
5233
  result = self._respond_with_workspace_listing(request, workspace_listing)
4969
5234
  async def workspace_gen():
4970
5235
  yield result.response
@@ -4996,10 +5261,63 @@ JSON:"""
4996
5261
  # Build messages
4997
5262
  system_prompt = self._build_system_prompt(request_analysis, memory_context, api_results)
4998
5263
  messages = [{"role": "system", "content": system_prompt}]
4999
-
5264
+
5265
+ # CRITICAL: Inject research papers IMMEDIATELY after system prompt (highest priority)
5266
+ research_data = api_results.get("research")
5267
+ if research_data and research_data.get("results"):
5268
+ papers_text = "🚨 PAPERS ALREADY FOUND - SYNTHESIZE THESE NOW:\n\n"
5269
+ papers_text += "DO NOT say 'we will search' - the search is COMPLETE.\n"
5270
+ papers_text += "DO NOT say 'attempting' - papers are ALREADY HERE.\n"
5271
+ papers_text += "YOUR JOB: Synthesize these papers into a comprehensive literature review (500+ words).\n\n"
5272
+
5273
+ for i, paper in enumerate(research_data["results"][:5], 1):
5274
+ papers_text += f"\n═══ PAPER {i} ═══\n"
5275
+ papers_text += f"Title: {paper.get('title', 'No title')}\n"
5276
+ # Handle authors as either list of dicts or list of strings
5277
+ authors = paper.get('authors', [])
5278
+ if authors:
5279
+ if isinstance(authors[0], dict):
5280
+ author_names = [a.get('name', 'Unknown') for a in authors[:3]]
5281
+ else:
5282
+ author_names = authors[:3]
5283
+ papers_text += f"Authors: {', '.join(author_names)}\n"
5284
+ papers_text += f"Year: {paper.get('year', 'N/A')}\n"
5285
+ if paper.get('abstract'):
5286
+ papers_text += f"\nAbstract:\n{paper['abstract']}\n"
5287
+ if paper.get('tldr'):
5288
+ papers_text += f"\nTL;DR: {paper['tldr']}\n"
5289
+ papers_text += "\n"
5290
+
5291
+ papers_text += "\n🚨 SYNTHESIZE THESE PAPERS NOW - Include:\n"
5292
+ papers_text += "- Overview of the research area\n"
5293
+ papers_text += "- Key findings from each paper's abstract\n"
5294
+ papers_text += "- Methods and approaches used\n"
5295
+ papers_text += "- Comparison and contrast of different approaches\n"
5296
+ papers_text += "- Implications and future directions\n"
5297
+ papers_text += "\nMINIMUM 500 WORDS. Use the abstracts above."
5298
+
5299
+ messages.append({"role": "system", "content": papers_text})
5300
+
5000
5301
  fc = api_results.get("files_context")
5001
5302
  if fc:
5002
- messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}"})
5303
+ # Count how many files are being compared
5304
+ file_count = len([fp for fp in api_results.get("files", []) if fp.get("type") == "text"])
5305
+
5306
+ if file_count > 1:
5307
+ # Multi-file comparison - make it VERY explicit
5308
+ comparison_msg = "🚨 MULTIPLE FILES PROVIDED FOR COMPARISON:\n\n"
5309
+ comparison_msg += fc
5310
+ comparison_msg += "\n\n🚨 CRITICAL INSTRUCTIONS FOR COMPARISON:\n"
5311
+ comparison_msg += "1. Read ALL file contents above carefully\n"
5312
+ comparison_msg += "2. Extract specific data points, numbers, percentages from EACH file\n"
5313
+ comparison_msg += "3. Compare and contrast the ACTUAL content (not just filenames)\n"
5314
+ comparison_msg += "4. If asked about differences, cite EXACT lines or values from BOTH files\n"
5315
+ comparison_msg += "5. Do NOT make general statements - be specific with examples from the files\n"
5316
+ comparison_msg += "\nAnswer based STRICTLY on the file contents above. Do not run shell commands."
5317
+ messages.append({"role": "system", "content": comparison_msg})
5318
+ else:
5319
+ # Single file - normal handling
5320
+ messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}"})
5003
5321
 
5004
5322
  # Add conversation history (abbreviated - just recent)
5005
5323
  if len(self.conversation_history) > 6: