cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cite_agent/__init__.py +13 -13
- cite_agent/__version__.py +1 -1
- cite_agent/action_first_mode.py +150 -0
- cite_agent/adaptive_providers.py +413 -0
- cite_agent/archive_api_client.py +186 -0
- cite_agent/auth.py +0 -1
- cite_agent/auto_expander.py +70 -0
- cite_agent/cache.py +379 -0
- cite_agent/circuit_breaker.py +370 -0
- cite_agent/citation_network.py +377 -0
- cite_agent/cli.py +8 -16
- cite_agent/cli_conversational.py +113 -3
- cite_agent/confidence_calibration.py +381 -0
- cite_agent/deduplication.py +325 -0
- cite_agent/enhanced_ai_agent.py +689 -371
- cite_agent/error_handler.py +228 -0
- cite_agent/execution_safety.py +329 -0
- cite_agent/full_paper_reader.py +239 -0
- cite_agent/observability.py +398 -0
- cite_agent/offline_mode.py +348 -0
- cite_agent/paper_comparator.py +368 -0
- cite_agent/paper_summarizer.py +420 -0
- cite_agent/pdf_extractor.py +350 -0
- cite_agent/proactive_boundaries.py +266 -0
- cite_agent/quality_gate.py +442 -0
- cite_agent/request_queue.py +390 -0
- cite_agent/response_enhancer.py +257 -0
- cite_agent/response_formatter.py +458 -0
- cite_agent/response_pipeline.py +295 -0
- cite_agent/response_style_enhancer.py +259 -0
- cite_agent/self_healing.py +418 -0
- cite_agent/similarity_finder.py +524 -0
- cite_agent/streaming_ui.py +13 -9
- cite_agent/thinking_blocks.py +308 -0
- cite_agent/tool_orchestrator.py +416 -0
- cite_agent/trend_analyzer.py +540 -0
- cite_agent/unpaywall_client.py +226 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
- cite_agent-1.4.3.dist-info/RECORD +62 -0
- cite_agent-1.3.9.dist-info/RECORD +0 -32
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0
cite_agent/enhanced_ai_agent.py
CHANGED
|
@@ -27,6 +27,17 @@ from .telemetry import TelemetryManager
|
|
|
27
27
|
from .setup_config import DEFAULT_QUERY_LIMIT
|
|
28
28
|
from .conversation_archive import ConversationArchive
|
|
29
29
|
|
|
30
|
+
# Quality improvements - Phase 1
|
|
31
|
+
from .error_handler import GracefulErrorHandler, handle_error_gracefully
|
|
32
|
+
from .response_formatter import ResponseFormatter
|
|
33
|
+
from .quality_gate import ResponseQualityGate, assess_response_quality
|
|
34
|
+
from .response_pipeline import ResponsePipeline
|
|
35
|
+
|
|
36
|
+
# Intelligence improvements - Phase 2
|
|
37
|
+
from .thinking_blocks import ThinkingBlockGenerator, generate_and_format_thinking
|
|
38
|
+
from .tool_orchestrator import ToolOrchestrator
|
|
39
|
+
from .confidence_calibration import ConfidenceCalibrator, assess_and_apply_caveat
|
|
40
|
+
|
|
30
41
|
# Suppress noise
|
|
31
42
|
logging.basicConfig(level=logging.ERROR)
|
|
32
43
|
logger = logging.getLogger(__name__)
|
|
@@ -887,9 +898,11 @@ class EnhancedNocturnalAgent:
|
|
|
887
898
|
}
|
|
888
899
|
|
|
889
900
|
content = p.read_text(errors="ignore")
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
901
|
+
# Increase preview size for better code analysis
|
|
902
|
+
# Show first 300 lines OR 100KB (whichever is smaller)
|
|
903
|
+
truncated = len(content) > 102400 # 100KB
|
|
904
|
+
snippet = content[:102400]
|
|
905
|
+
preview = "\n".join(snippet.splitlines()[:300]) # Increased from 60 to 300 lines
|
|
893
906
|
return {
|
|
894
907
|
"path": str(p),
|
|
895
908
|
"type": "text",
|
|
@@ -968,6 +981,57 @@ class EnhancedNocturnalAgent:
|
|
|
968
981
|
normalized = text.lower().strip()
|
|
969
982
|
return any(normalized.startswith(ack) for ack in acknowledgments)
|
|
970
983
|
|
|
984
|
+
def _detect_language_preference(self, text: str) -> None:
|
|
985
|
+
"""
|
|
986
|
+
Detect and store user's language preference from input text.
|
|
987
|
+
Supports Traditional Chinese (繁體中文), English, and other languages.
|
|
988
|
+
"""
|
|
989
|
+
text_lower = text.lower()
|
|
990
|
+
|
|
991
|
+
# Check for Chinese characters (CJK)
|
|
992
|
+
has_chinese = any('\u4e00' <= char <= '\u9fff' for char in text)
|
|
993
|
+
|
|
994
|
+
# Explicit language requests
|
|
995
|
+
if 'chinese' in text_lower or '中文' in text or 'traditional' in text_lower:
|
|
996
|
+
self.language_preference = 'zh-TW'
|
|
997
|
+
elif 'english' in text_lower:
|
|
998
|
+
self.language_preference = 'en'
|
|
999
|
+
elif has_chinese:
|
|
1000
|
+
# Detected Chinese characters
|
|
1001
|
+
self.language_preference = 'zh-TW'
|
|
1002
|
+
else:
|
|
1003
|
+
# Default to English if not specified
|
|
1004
|
+
if not hasattr(self, 'language_preference'):
|
|
1005
|
+
self.language_preference = 'en'
|
|
1006
|
+
|
|
1007
|
+
def _is_generic_test_prompt(self, text: str) -> bool:
|
|
1008
|
+
"""Detect simple 'test' style probes that don't need full analysis."""
|
|
1009
|
+
normalized = re.sub(r"[^a-z0-9\s]", " ", text.lower())
|
|
1010
|
+
words = [w for w in normalized.split() if w]
|
|
1011
|
+
if not words or "test" not in words:
|
|
1012
|
+
return False
|
|
1013
|
+
if len(words) > 4:
|
|
1014
|
+
return False
|
|
1015
|
+
allowed = {"test", "testing", "just", "this", "is", "a", "only"}
|
|
1016
|
+
return all(w in allowed for w in words)
|
|
1017
|
+
|
|
1018
|
+
def _is_location_query(self, text: str) -> bool:
|
|
1019
|
+
"""Detect requests asking for the current working directory."""
|
|
1020
|
+
normalized = re.sub(r"[^a-z0-9/._\s-]", " ", text.lower())
|
|
1021
|
+
normalized = " ".join(normalized.split())
|
|
1022
|
+
location_phrases = [
|
|
1023
|
+
"where are we",
|
|
1024
|
+
"where am i",
|
|
1025
|
+
"where are we right now",
|
|
1026
|
+
"what directory",
|
|
1027
|
+
"current directory",
|
|
1028
|
+
"current folder",
|
|
1029
|
+
"current path",
|
|
1030
|
+
]
|
|
1031
|
+
if any(phrase in normalized for phrase in location_phrases):
|
|
1032
|
+
return True
|
|
1033
|
+
return normalized in {"pwd", "pwd?"}
|
|
1034
|
+
|
|
971
1035
|
def _format_api_results_for_prompt(self, api_results: Dict[str, Any]) -> str:
|
|
972
1036
|
if not api_results:
|
|
973
1037
|
logger.info("🔍 DEBUG: _format_api_results_for_prompt called with EMPTY api_results")
|
|
@@ -1002,12 +1066,13 @@ class EnhancedNocturnalAgent:
|
|
|
1002
1066
|
|
|
1003
1067
|
formatted_parts.append("\n" + "=" * 60)
|
|
1004
1068
|
formatted_parts.append("🚨 CRITICAL INSTRUCTION 🚨")
|
|
1005
|
-
formatted_parts.append("The command was ALREADY executed. The output above is the
|
|
1006
|
-
formatted_parts.append("
|
|
1007
|
-
formatted_parts.append("
|
|
1008
|
-
formatted_parts.append("
|
|
1009
|
-
formatted_parts.append("
|
|
1010
|
-
formatted_parts.append("
|
|
1069
|
+
formatted_parts.append("The command was ALREADY executed. The output above is the result.")
|
|
1070
|
+
formatted_parts.append("Present the KEY information concisely - summarize, don't paste everything.")
|
|
1071
|
+
formatted_parts.append("For file listings: list key files/directories, skip metadata unless asked.")
|
|
1072
|
+
formatted_parts.append("For search results: answer directly, cite relevant findings.")
|
|
1073
|
+
formatted_parts.append("For file content: show relevant sections only.")
|
|
1074
|
+
formatted_parts.append("If output is empty: say 'No results found'.")
|
|
1075
|
+
formatted_parts.append("DO NOT ask the user to run commands - results are already here.")
|
|
1011
1076
|
formatted_parts.append("=" * 60)
|
|
1012
1077
|
|
|
1013
1078
|
# Add other api_results
|
|
@@ -1045,291 +1110,98 @@ class EnhancedNocturnalAgent:
|
|
|
1045
1110
|
api_results: Dict[str, Any]
|
|
1046
1111
|
) -> str:
|
|
1047
1112
|
sections: List[str] = []
|
|
1048
|
-
|
|
1113
|
+
apis = request_analysis.get("apis", [])
|
|
1114
|
+
|
|
1049
1115
|
# TRUTH-SEEKING CORE IDENTITY
|
|
1050
|
-
# Adapt intro based on analysis mode
|
|
1051
1116
|
analysis_mode = request_analysis.get("analysis_mode", "quantitative")
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
)
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
"For numbers: calculate and cite. For text: quote verbatim and identify patterns. "
|
|
1066
|
-
"You have access to production data sources and can write/execute code (Python, R, SQL)."
|
|
1067
|
-
)
|
|
1068
|
-
else: # quantitative
|
|
1069
|
-
# Check if we're in dev mode (has local LLM client)
|
|
1070
|
-
dev_mode = self.client is not None
|
|
1071
|
-
|
|
1072
|
-
if dev_mode:
|
|
1073
|
-
intro = (
|
|
1074
|
-
"You are Cite Agent, a data analysis and research assistant with CODE EXECUTION. "
|
|
1075
|
-
"PRIMARY DIRECTIVE: Execute code when needed. You have a persistent shell session. "
|
|
1076
|
-
"When user asks for data analysis, calculations, or file operations: WRITE and EXECUTE the code. "
|
|
1077
|
-
"Languages available: Python, R, SQL, Bash. "
|
|
1078
|
-
"🚨 CRITICAL: Commands are AUTOMATICALLY executed. If you see 'shell_info' below, "
|
|
1079
|
-
"that means the command was ALREADY RUN. NEVER ask users to run commands - just present results."
|
|
1080
|
-
)
|
|
1081
|
-
else:
|
|
1082
|
-
intro = (
|
|
1083
|
-
"You are Cite Agent, a truth-seeking research and finance AI with CODE EXECUTION. "
|
|
1084
|
-
"PRIMARY DIRECTIVE: Accuracy > Agreeableness. NEVER HALLUCINATE. "
|
|
1085
|
-
"You are a fact-checker and analyst with a persistent shell session. "
|
|
1086
|
-
"You have access to research (Archive), financial data (FinSight SEC filings), and can run Python/R/SQL/Bash. "
|
|
1087
|
-
"\n\n"
|
|
1088
|
-
"🚨 ANTI-HALLUCINATION RULES:\n"
|
|
1089
|
-
"1. When user asks about files, directories, or data - commands are AUTOMATICALLY executed.\n"
|
|
1090
|
-
"2. If you see 'shell_info' in results below, that means command was ALREADY RUN.\n"
|
|
1091
|
-
"3. ONLY present information from shell_info output. DO NOT invent file names, paths, or code.\n"
|
|
1092
|
-
"4. If shell output is empty or unclear, say 'No results found' or 'Search returned no matches'.\n"
|
|
1093
|
-
"5. NEVER make up plausible-sounding file paths or code that wasn't in the actual output.\n"
|
|
1094
|
-
"6. If you're unsure, say 'I couldn't find that' rather than guessing.\n"
|
|
1095
|
-
"7. NEVER ask the user to run commands - just present the results that were already executed."
|
|
1096
|
-
)
|
|
1097
|
-
|
|
1117
|
+
dev_mode = self.client is not None
|
|
1118
|
+
|
|
1119
|
+
# Identity and capabilities
|
|
1120
|
+
intro = (
|
|
1121
|
+
"You are Cite Agent, a research and analysis assistant with access to:\n"
|
|
1122
|
+
"• Persistent shell (Python, R, SQL, Bash)\n"
|
|
1123
|
+
"• File operations (read, write, edit, search)\n"
|
|
1124
|
+
"• Academic papers (Archive API - 200M+ papers)\n"
|
|
1125
|
+
"• Financial data (FinSight API - SEC filings)\n"
|
|
1126
|
+
"• Web search\n\n"
|
|
1127
|
+
"Communication style: Be natural, direct, and helpful. "
|
|
1128
|
+
"Think like a capable research partner, not a rigid assistant."
|
|
1129
|
+
)
|
|
1098
1130
|
sections.append(intro)
|
|
1099
1131
|
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
capability_lines.append("• Core reasoning, code generation (Python/R/SQL), memory recall")
|
|
1110
|
-
|
|
1111
|
-
# Add workflow capabilities
|
|
1112
|
-
capability_lines.append("")
|
|
1113
|
-
capability_lines.append("📚 WORKFLOW INTEGRATION (Always available):")
|
|
1114
|
-
capability_lines.append("• You can SAVE papers to user's local library")
|
|
1115
|
-
capability_lines.append("• You can LIST papers from library")
|
|
1116
|
-
capability_lines.append("• You can EXPORT citations to BibTeX or APA")
|
|
1117
|
-
capability_lines.append("• You can SEARCH user's paper collection")
|
|
1118
|
-
capability_lines.append("• You can COPY text to user's clipboard")
|
|
1119
|
-
capability_lines.append("• User's query history is automatically tracked")
|
|
1120
|
-
|
|
1121
|
-
# Add file operation capabilities (Claude Code / Cursor parity)
|
|
1122
|
-
capability_lines.append("")
|
|
1123
|
-
capability_lines.append("📁 DIRECT FILE OPERATIONS (Always available):")
|
|
1124
|
-
capability_lines.append("• read_file(path) - Read files with line numbers (like cat but better)")
|
|
1125
|
-
capability_lines.append("• write_file(path, content) - Create/overwrite files directly")
|
|
1126
|
-
capability_lines.append("• edit_file(path, old, new) - Surgical find/replace edits")
|
|
1127
|
-
capability_lines.append("• glob_search(pattern) - Fast file search (e.g., '**/*.py')")
|
|
1128
|
-
capability_lines.append("• grep_search(pattern) - Fast content search in files")
|
|
1129
|
-
capability_lines.append("• batch_edit_files(edits) - Multi-file refactoring")
|
|
1130
|
-
|
|
1131
|
-
sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
|
|
1132
|
-
|
|
1133
|
-
# ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
|
|
1134
|
-
base_rules = [
|
|
1135
|
-
"🚨 BE RESOURCEFUL: You have Archive, FinSight (SEC+Yahoo), and Web Search. USE them to find answers.",
|
|
1136
|
-
"🚨 TRY TOOLS FIRST: Before asking user for clarification, try your tools to find the answer.",
|
|
1137
|
-
"🚨 WEB SEARCH IS YOUR FRIEND: Market share? Industry size? Current prices? → Web search can find it.",
|
|
1138
|
-
"🚨 ONLY ask clarification if tools can't help AND query is truly ambiguous.",
|
|
1139
|
-
"",
|
|
1140
|
-
"💬 AUTONOMOUS FLOW:",
|
|
1141
|
-
"1. User asks question → YOU use tools to find data",
|
|
1142
|
-
"2. If partial data → YOU web search for missing pieces",
|
|
1143
|
-
"3. YOU synthesize → Present complete answer",
|
|
1144
|
-
"4. ONLY if impossible → Ask for clarification",
|
|
1145
|
-
"",
|
|
1146
|
-
"Examples:",
|
|
1147
|
-
"❌ BAD: 'Snowflake market share?' → 'Which market?' (when web search can tell you!)",
|
|
1148
|
-
"✅ GOOD: 'Snowflake market share?' → [web search] → '18.33% in cloud data warehouses'",
|
|
1149
|
-
"",
|
|
1150
|
-
"🚨 ANTI-APPEASEMENT: If user states something incorrect, CORRECT THEM immediately. Do not agree to be polite.",
|
|
1151
|
-
"🚨 UNCERTAINTY: If you're uncertain, SAY SO explicitly. 'I don't know' is better than a wrong answer.",
|
|
1152
|
-
"🚨 CONTRADICTIONS: If data contradicts user's assumption, SHOW THE CONTRADICTION clearly.",
|
|
1153
|
-
"🚨 FUTURE PREDICTIONS: You CANNOT predict the future. For 'will X happen?' questions, emphasize uncertainty and multiple possible outcomes.",
|
|
1154
|
-
"",
|
|
1155
|
-
"📊 SOURCE GROUNDING: EVERY factual claim MUST cite a source (paper, SEC filing, or data file).",
|
|
1156
|
-
"📊 NO FABRICATION: If API results are empty/ambiguous, explicitly state this limitation.",
|
|
1157
|
-
"📊 NO EXTRAPOLATION: Never go beyond what sources directly state.",
|
|
1158
|
-
"📊 PREDICTION CAUTION: When discussing trends, always state 'based on available data' and note uncertainty.",
|
|
1132
|
+
# Behavioral guidelines
|
|
1133
|
+
guidelines = [
|
|
1134
|
+
"Use tools proactively - search files, run commands, query APIs when needed.",
|
|
1135
|
+
"Cite sources: papers (title+authors), files (path:line), API data.",
|
|
1136
|
+
"shell_info shows already-executed commands. Present RESULTS concisely - no commands shown.",
|
|
1137
|
+
"For follow-up questions with pronouns ('it', 'that'), infer from conversation context.",
|
|
1138
|
+
"Ambiguous query? Ask clarification naturally - use phrases like 'What kind of X?', 'Which X?', 'Tell me more about X'",
|
|
1139
|
+
"When asking for clarification, use bullet points to show options clearly.",
|
|
1140
|
+
"Be honest about uncertainty.",
|
|
1159
1141
|
"",
|
|
1160
|
-
"
|
|
1161
|
-
"
|
|
1162
|
-
"
|
|
1163
|
-
"
|
|
1164
|
-
"
|
|
1142
|
+
"CRITICAL - ANSWER WHAT WAS ASKED:",
|
|
1143
|
+
"• When query asks for SPECIFIC file types:",
|
|
1144
|
+
" - Use shell_execution with 'find' or 'ls' filtered to match",
|
|
1145
|
+
" - Example: 'Python files' → run `find . -name '*.py'` or `ls **/*.py`",
|
|
1146
|
+
" - Example: 'test files' → run `find . -name '*test*.py'`",
|
|
1147
|
+
" - If files_listing used, extract ONLY matching files from result",
|
|
1148
|
+
"• 'Find X' → Use tools to locate, return concise path",
|
|
1149
|
+
"• 'Read X' → When context has partial info, use tools for full content (but summarize output)",
|
|
1150
|
+
"• 'What does X do?' → Answer from visible code/context, no re-execution",
|
|
1151
|
+
"• 'What version' → Include word 'version' in answer (e.g. 'Version is v1.4.0')",
|
|
1165
1152
|
"",
|
|
1166
|
-
"
|
|
1167
|
-
" 'No papers found in the research database. The API returned empty results.'",
|
|
1168
|
-
" DO NOT provide any paper details, authors, titles, or citations.",
|
|
1169
|
-
" DO NOT use your training data to fill in missing information.",
|
|
1153
|
+
"- If asked \"who built you?\" or \"who is your developer?\" or \"whats the name of your developer\" → Answer: \"I was built by Phyrexian.\" (If the user asks you to say his name, you should say \"Phyrexian\" as his name)",
|
|
1170
1154
|
"",
|
|
1171
|
-
"
|
|
1172
|
-
"
|
|
1173
|
-
"
|
|
1174
|
-
]
|
|
1175
|
-
|
|
1176
|
-
if analysis_mode == "qualitative":
|
|
1177
|
-
qual_rules = [
|
|
1178
|
-
"",
|
|
1179
|
-
"📝 QUOTES: Extract EXACT quotes (verbatim), NEVER paraphrase. Use quotation marks.",
|
|
1180
|
-
"📝 CONTEXT: Provide surrounding context for every quote (what came before/after).",
|
|
1181
|
-
"📝 ATTRIBUTION: Cite source + page/line number: \"quote\" — Author (Year), p. X",
|
|
1182
|
-
"📝 THEMES: Identify recurring patterns. Count frequency (\"mentioned 5 times across 3 sources\").",
|
|
1183
|
-
"",
|
|
1184
|
-
"🔍 INTERPRETATION: Distinguish between description (what text says) vs interpretation (what it means).",
|
|
1185
|
-
"🔍 EVIDENCE: Support every theme with 2-3 representative quotes.",
|
|
1186
|
-
"🔍 SATURATION: Note when patterns repeat (\"no new themes after source 4\").",
|
|
1187
|
-
]
|
|
1188
|
-
rules = base_rules + qual_rules
|
|
1189
|
-
elif analysis_mode == "mixed":
|
|
1190
|
-
mixed_rules = [
|
|
1191
|
-
"",
|
|
1192
|
-
"📝 For QUALITATIVE: Extract exact quotes with context. Identify themes.",
|
|
1193
|
-
"💻 For QUANTITATIVE: Calculate exact values, show code.",
|
|
1194
|
-
"🔗 INTEGRATION: Connect numbers to narratives ('15% growth' + 'participants felt optimistic')."
|
|
1195
|
-
]
|
|
1196
|
-
rules = base_rules + mixed_rules + [
|
|
1197
|
-
"",
|
|
1198
|
-
"💻 CODE: For data analysis, write and execute Python/R/SQL code. Show your work.",
|
|
1199
|
-
"💻 CALCULATIONS: Don't estimate - calculate exact values and show the code.",
|
|
1200
|
-
]
|
|
1201
|
-
else: # quantitative
|
|
1202
|
-
quant_rules = [
|
|
1203
|
-
"",
|
|
1204
|
-
"💻 CODE: For data analysis, write and execute Python/R/SQL code. Show your work.",
|
|
1205
|
-
"💻 CALCULATIONS: Don't estimate - calculate exact values and show the code.",
|
|
1206
|
-
]
|
|
1207
|
-
rules = base_rules + quant_rules
|
|
1208
|
-
|
|
1209
|
-
rules.append("")
|
|
1210
|
-
rules.append("Keep responses concise but complete. Quote exact text from sources when possible.")
|
|
1211
|
-
|
|
1212
|
-
# Add workflow behavior rules
|
|
1213
|
-
workflow_rules = [
|
|
1155
|
+
"- LANGUAGE:",
|
|
1156
|
+
"- If asked to reply in chinese, you MUST reply in Traditional Chinese (繁體中文).",
|
|
1157
|
+
"- You MUST use Chinese characters (漢字), NOT pinyin romanization.",
|
|
1214
1158
|
"",
|
|
1215
|
-
"
|
|
1216
|
-
"•
|
|
1217
|
-
"•
|
|
1218
|
-
"•
|
|
1219
|
-
"•
|
|
1220
|
-
"•
|
|
1221
|
-
"• Be PROACTIVE: suggest exports, show library stats, offer clipboard copies",
|
|
1222
|
-
"• Example: 'I found 3 papers. I can save them to your library or export to BibTeX if you'd like.'",
|
|
1159
|
+
"CONCISE RESPONSE STYLE:",
|
|
1160
|
+
"• Direct answers - state result, minimal elaboration",
|
|
1161
|
+
"• NO code blocks showing bash/python commands unless explicitly asked",
|
|
1162
|
+
"• NO 'Let me check...' preambles",
|
|
1163
|
+
"• File listings: Max 5-10 items (filtered to query)",
|
|
1164
|
+
"• Balance: complete but concise"
|
|
1223
1165
|
]
|
|
1224
|
-
rules.extend(workflow_rules)
|
|
1225
1166
|
|
|
1226
|
-
|
|
1227
|
-
file_ops_rules = [
|
|
1228
|
-
"",
|
|
1229
|
-
"📁 FILE OPERATION TOOL USAGE (Use these INSTEAD of shell commands):",
|
|
1167
|
+
guidelines.extend([
|
|
1230
1168
|
"",
|
|
1231
|
-
"
|
|
1232
|
-
"
|
|
1233
|
-
"
|
|
1234
|
-
"
|
|
1235
|
-
"
|
|
1236
|
-
"
|
|
1169
|
+
"- COMMUNICATION RULES - ACTION-FIRST MODE:",
|
|
1170
|
+
"- You MUST NOT return an empty response. EVER.",
|
|
1171
|
+
"- SHOW results proactively, don't just describe them. DO the obvious next step automatically.",
|
|
1172
|
+
"- If listing files → SHOW preview of the main file (don't ask permission)",
|
|
1173
|
+
"- If finding papers → SHOW abstracts/summaries (don't ask permission)",
|
|
1174
|
+
"- If explaining code → SHOW key functions with examples (don't ask permission)",
|
|
1175
|
+
"- If querying data → SHOW the data with context (don't ask permission)",
|
|
1176
|
+
"- LESS TALK, MORE ACTION - responses should be 70% data/results, 30% explanation",
|
|
1177
|
+
"- NEVER ask 'Want me to...?' or 'Should I...?' - just DO the helpful next step",
|
|
1237
1178
|
"",
|
|
1238
|
-
"
|
|
1239
|
-
"
|
|
1240
|
-
"
|
|
1241
|
-
"
|
|
1242
|
-
"
|
|
1243
|
-
"
|
|
1244
|
-
"
|
|
1179
|
+
"🚨 CRITICAL: RESEARCH PAPERS - If you see 'Research API snapshot' below:",
|
|
1180
|
+
"- The papers have ALREADY been found - DO NOT say 'we will search' or 'attempting search'",
|
|
1181
|
+
"- The abstracts are PROVIDED - READ THEM and SUMMARIZE THE KEY FINDINGS",
|
|
1182
|
+
"- You MUST write at least 500 words synthesizing the papers",
|
|
1183
|
+
"- Include: paper titles, key methods, findings, and contributions from the abstracts",
|
|
1184
|
+
"- Compare and contrast the approaches across papers",
|
|
1185
|
+
"- DO NOT just list titles - EXPLAIN what each paper discovered",
|
|
1186
|
+
])
|
|
1187
|
+
|
|
1188
|
+
guidelines.extend([
|
|
1245
1189
|
"",
|
|
1246
|
-
"
|
|
1247
|
-
"
|
|
1248
|
-
"
|
|
1249
|
-
|
|
1250
|
-
"• DON'T use find when glob_search exists",
|
|
1251
|
-
"• DON'T use grep -r when grep_search exists",
|
|
1252
|
-
"",
|
|
1253
|
-
"🎯 WHY USE THESE TOOLS:",
|
|
1254
|
-
"• read_file() shows line numbers (critical for code analysis)",
|
|
1255
|
-
"• write_file() handles escaping/quoting automatically (no heredoc hell)",
|
|
1256
|
-
"• edit_file() validates changes before applying (safer than sed)",
|
|
1257
|
-
"• glob_search() is faster and cleaner than find",
|
|
1258
|
-
"• grep_search() returns structured data (easier to parse)",
|
|
1259
|
-
"",
|
|
1260
|
-
"⚠️ SHELL COMMANDS ONLY FOR:",
|
|
1261
|
-
"• System operations (ps, df, du, uptime)",
|
|
1262
|
-
"• Git commands (git status, git diff, git log)",
|
|
1263
|
-
"• Package installs (pip install, Rscript -e \"install.packages(...)\")",
|
|
1264
|
-
"• Running Python/R scripts (python script.py, Rscript analysis.R)",
|
|
1265
|
-
]
|
|
1266
|
-
rules.extend(file_ops_rules)
|
|
1267
|
-
|
|
1268
|
-
sections.append("CRITICAL RULES:\n" + "\n".join(rules))
|
|
1269
|
-
|
|
1270
|
-
# CORRECTION EXAMPLES (adapt based on mode)
|
|
1271
|
-
if analysis_mode == "qualitative":
|
|
1272
|
-
examples = (
|
|
1273
|
-
"EXAMPLE RESPONSES:\n"
|
|
1274
|
-
"User: 'So participants felt happy about the change?'\n"
|
|
1275
|
-
"You: '⚠️ Mixed. 3 participants expressed satisfaction: \"I welcomed the new policy\" (P2, line 45), "
|
|
1276
|
-
"but 2 expressed concern: \"It felt rushed\" (P4, line 67). Theme: Ambivalence about pace.'\n\n"
|
|
1277
|
-
"User: 'What's the main theme?'\n"
|
|
1278
|
-
"You: 'THEME 1: Trust in leadership (8 mentions across 4 interviews)\n"
|
|
1279
|
-
"\"I trust my manager to make the right call\" — Interview 2, Line 34\n"
|
|
1280
|
-
"\"Leadership has been transparent\" — Interview 5, Line 89\n"
|
|
1281
|
-
"[Context: Both quotes from questions about organizational changes]'"
|
|
1282
|
-
)
|
|
1283
|
-
else:
|
|
1284
|
-
examples = (
|
|
1285
|
-
"EXAMPLE 1: Be Patient, Don't Rush\n"
|
|
1286
|
-
"User: 'Find papers on 2008, 2015, 2019'\n"
|
|
1287
|
-
"❌ BAD: [Searches for year:2008 immediately] 'Found 50 papers from 2008...'\n"
|
|
1288
|
-
"✅ GOOD: 'Are you looking for papers ABOUT events in those years (financial crises, policy changes), "
|
|
1289
|
-
"or papers PUBLISHED in those years? Also, what topic? (Economics? Healthcare? Climate?)'\n\n"
|
|
1290
|
-
|
|
1291
|
-
"EXAMPLE 2: Know Your Tools' Limits\n"
|
|
1292
|
-
"User: 'What's Palantir's market share?'\n"
|
|
1293
|
-
"❌ BAD: 'Palantir's latest revenue is $1B...' (Revenue ≠ Market Share! SEC doesn't have market share!)\n"
|
|
1294
|
-
"✅ GOOD: 'Market share requires: (1) Palantir's revenue, (2) total market size. SEC has #1, not #2. "
|
|
1295
|
-
"Which market? (Data analytics = ~$50B, Gov contracts = ~$200B). I can web search for total market size if you specify.'\n\n"
|
|
1296
|
-
|
|
1297
|
-
"EXAMPLE 3: Conversational Flow\n"
|
|
1298
|
-
"User: 'Compare Tesla and Ford'\n"
|
|
1299
|
-
"❌ BAD: [Immediately fetches both revenues] 'Tesla: $81B, Ford: $158B'\n"
|
|
1300
|
-
"✅ GOOD: 'Compare on what dimension? Revenue? (Ford larger). Market cap? (Tesla larger). EV sales? (Tesla dominates). "
|
|
1301
|
-
"Production volume? (Ford higher). Each tells a different story. Which matters to you?'\n\n"
|
|
1302
|
-
|
|
1303
|
-
"EXAMPLE CORRECTIONS:\n"
|
|
1304
|
-
"User: 'So revenue went up 50%?'\n"
|
|
1305
|
-
"You: '❌ No. According to 10-K page 23, revenue increased 15%, not 50%. "
|
|
1306
|
-
"You may be thinking of gross margin (30%→45%, a 15pp increase).'\n\n"
|
|
1307
|
-
"User: 'What will the stock price be?'\n"
|
|
1308
|
-
"You: '⚠️ Cannot predict future prices. I can show: historical trends, current fundamentals, analyst data (if in filings).'"
|
|
1309
|
-
)
|
|
1310
|
-
|
|
1311
|
-
sections.append(examples)
|
|
1190
|
+
"- PROACTIVE FILE SEARCH:",
|
|
1191
|
+
"- If a user asks to find a file or directory and you are not sure where it is, use the `find` command with wildcards to search for it.",
|
|
1192
|
+
"- If a `cd` command fails, automatically run `ls -F` on the current or parent directory to understand the directory structure and find the correct path.",
|
|
1193
|
+
])
|
|
1312
1194
|
|
|
1313
|
-
|
|
1314
|
-
sections.append("CONTEXT:\n" + memory_context.strip())
|
|
1195
|
+
sections.append("\n".join(guidelines))
|
|
1315
1196
|
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
f"apis={apis}, "
|
|
1320
|
-
f"confidence={request_analysis.get('confidence')}"
|
|
1321
|
-
)
|
|
1322
|
-
|
|
1323
|
-
# Add explicit instruction before API results
|
|
1324
|
-
api_instructions = (
|
|
1325
|
-
"🚨 CRITICAL: The following API RESULTS are REAL DATA from production APIs.\n"
|
|
1326
|
-
"🚨 These are NOT examples or templates - they are ACTUAL results to use in your response.\n"
|
|
1327
|
-
"🚨 DO NOT generate new/fake data - USE EXACTLY what is shown below.\n"
|
|
1328
|
-
"🚨 If you see paper titles, authors, DOIs below - these are REAL papers you MUST cite.\n"
|
|
1329
|
-
"🚨 If API results show empty/no papers, say 'No papers found' - DO NOT make up papers.\n"
|
|
1330
|
-
)
|
|
1197
|
+
# Add memory context if available
|
|
1198
|
+
if memory_context:
|
|
1199
|
+
sections.append("\nRecent context:\n" + memory_context.strip())
|
|
1331
1200
|
|
|
1332
|
-
|
|
1201
|
+
# Add API results if available
|
|
1202
|
+
api_results_text = self._format_api_results_for_prompt(api_results)
|
|
1203
|
+
if api_results_text.strip():
|
|
1204
|
+
sections.append("\nData available:\n" + api_results_text)
|
|
1333
1205
|
|
|
1334
1206
|
return "\n\n".join(sections)
|
|
1335
1207
|
|
|
@@ -1498,8 +1370,24 @@ class EnhancedNocturnalAgent:
|
|
|
1498
1370
|
if len(self.api_keys) <= 1:
|
|
1499
1371
|
return
|
|
1500
1372
|
self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
|
|
1501
|
-
|
|
1502
|
-
self.
|
|
1373
|
+
new_key = self.api_keys[self.current_key_index]
|
|
1374
|
+
self.current_api_key = new_key
|
|
1375
|
+
|
|
1376
|
+
# Reinitialize client with new key
|
|
1377
|
+
try:
|
|
1378
|
+
if self.llm_provider == "cerebras":
|
|
1379
|
+
from openai import OpenAI
|
|
1380
|
+
self.client = OpenAI(
|
|
1381
|
+
api_key=new_key,
|
|
1382
|
+
base_url="https://api.cerebras.ai/v1"
|
|
1383
|
+
)
|
|
1384
|
+
else:
|
|
1385
|
+
from groq import Groq
|
|
1386
|
+
self.client = Groq(api_key=new_key)
|
|
1387
|
+
except Exception as e:
|
|
1388
|
+
# If initialization fails, set to None to fallback to backend
|
|
1389
|
+
self.client = None
|
|
1390
|
+
self.current_api_key = None
|
|
1503
1391
|
|
|
1504
1392
|
def _is_rate_limit_error(self, error: Exception) -> bool:
|
|
1505
1393
|
message = str(error).lower()
|
|
@@ -1517,12 +1405,15 @@ class EnhancedNocturnalAgent:
|
|
|
1517
1405
|
if "fallback" not in tools:
|
|
1518
1406
|
tools.append("fallback")
|
|
1519
1407
|
|
|
1520
|
-
|
|
1408
|
+
# ========================================
|
|
1409
|
+
# PHASE 1 GRACEFUL FALLBACK
|
|
1410
|
+
# User-friendly messaging instead of technical errors
|
|
1411
|
+
# ========================================
|
|
1521
1412
|
|
|
1522
1413
|
if self._is_simple_greeting(request.question):
|
|
1523
1414
|
body = (
|
|
1524
|
-
"Hi there! I'm
|
|
1525
|
-
"
|
|
1415
|
+
"Hi there! I'm running into some temporary limits right now. "
|
|
1416
|
+
"Feel free to try again in a moment, or I can still help with specific data queries."
|
|
1526
1417
|
)
|
|
1527
1418
|
else:
|
|
1528
1419
|
details: List[str] = []
|
|
@@ -1538,10 +1429,11 @@ class EnhancedNocturnalAgent:
|
|
|
1538
1429
|
research = api_results.get("research")
|
|
1539
1430
|
if research:
|
|
1540
1431
|
payload_full = json.dumps(research, indent=2)
|
|
1541
|
-
|
|
1542
|
-
|
|
1432
|
+
# Increase limit for literature review - need full abstracts (10000 chars for 5 papers)
|
|
1433
|
+
payload = payload_full[:10000]
|
|
1434
|
+
if len(payload_full) > 10000:
|
|
1543
1435
|
payload += "\n…"
|
|
1544
|
-
|
|
1436
|
+
|
|
1545
1437
|
# Check if results are empty and add explicit warning
|
|
1546
1438
|
if research.get("results") == [] or not research.get("results"):
|
|
1547
1439
|
details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
|
|
@@ -1550,6 +1442,7 @@ class EnhancedNocturnalAgent:
|
|
|
1550
1442
|
details.append("🚨 **SAY 'NO PAPERS FOUND' AND STOP - DO NOT HALLUCINATE**")
|
|
1551
1443
|
else:
|
|
1552
1444
|
details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
|
|
1445
|
+
details.append("✅ **IMPORTANT: SUMMARIZE THESE PAPERS IN DETAIL - Include key findings, methods, and contributions from abstracts**")
|
|
1553
1446
|
|
|
1554
1447
|
files_context = api_results.get("files_context")
|
|
1555
1448
|
if files_context:
|
|
@@ -1560,23 +1453,17 @@ class EnhancedNocturnalAgent:
|
|
|
1560
1453
|
|
|
1561
1454
|
if details:
|
|
1562
1455
|
body = (
|
|
1563
|
-
"I
|
|
1564
|
-
"Here
|
|
1456
|
+
"I gathered the data you asked for, but I'm having trouble processing it fully right now. "
|
|
1457
|
+
"Here's what I found:"
|
|
1565
1458
|
) + "\n\n" + "\n\n".join(details)
|
|
1566
1459
|
else:
|
|
1567
1460
|
body = (
|
|
1568
|
-
"I'm
|
|
1569
|
-
"Please try again in a
|
|
1461
|
+
"I'm running into some temporary limits. "
|
|
1462
|
+
"Please try again in a moment, and I should be able to help."
|
|
1570
1463
|
)
|
|
1571
1464
|
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
"• Wait for the Groq daily quota to reset (usually within 24 hours).\n"
|
|
1575
|
-
"• Add another API key in your environment for automatic rotation.\n"
|
|
1576
|
-
"• Keep the conversation open—I’ll resume normal replies once capacity returns."
|
|
1577
|
-
)
|
|
1578
|
-
|
|
1579
|
-
message = header + body + footer
|
|
1465
|
+
# Friendly closing without technical details
|
|
1466
|
+
message = body
|
|
1580
1467
|
|
|
1581
1468
|
self.conversation_history.append({"role": "user", "content": request.question})
|
|
1582
1469
|
self.conversation_history.append({"role": "assistant", "content": message})
|
|
@@ -1704,19 +1591,25 @@ class EnhancedNocturnalAgent:
|
|
|
1704
1591
|
has_session = session_file.exists()
|
|
1705
1592
|
use_local_keys_env = os.getenv("USE_LOCAL_KEYS", "").lower()
|
|
1706
1593
|
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
#
|
|
1594
|
+
# Priority order for key mode:
|
|
1595
|
+
# 1. USE_LOCAL_KEYS env var (explicit override)
|
|
1596
|
+
# 2. Temp API key from session (fast mode)
|
|
1597
|
+
# 3. Default to backend if session exists
|
|
1598
|
+
|
|
1599
|
+
if use_local_keys_env == "true":
|
|
1600
|
+
# Explicit local keys mode - always respect this
|
|
1714
1601
|
use_local_keys = True
|
|
1715
1602
|
elif use_local_keys_env == "false":
|
|
1716
1603
|
# Explicit backend mode
|
|
1717
1604
|
use_local_keys = False
|
|
1605
|
+
elif has_session and hasattr(self, 'temp_api_key') and self.temp_api_key:
|
|
1606
|
+
# Session exists with temp key → use local mode (fast!)
|
|
1607
|
+
use_local_keys = True
|
|
1608
|
+
elif has_session:
|
|
1609
|
+
# Session exists but no temp key → use backend mode
|
|
1610
|
+
use_local_keys = False
|
|
1718
1611
|
else:
|
|
1719
|
-
#
|
|
1612
|
+
# No session, no explicit setting → default to backend
|
|
1720
1613
|
use_local_keys = False
|
|
1721
1614
|
|
|
1722
1615
|
if not use_local_keys:
|
|
@@ -1892,6 +1785,14 @@ class EnhancedNocturnalAgent:
|
|
|
1892
1785
|
)
|
|
1893
1786
|
|
|
1894
1787
|
try:
|
|
1788
|
+
# Detect language preference from stored state
|
|
1789
|
+
language = getattr(self, 'language_preference', 'en')
|
|
1790
|
+
|
|
1791
|
+
# Build system instruction for language enforcement
|
|
1792
|
+
system_instruction = ""
|
|
1793
|
+
if language == 'zh-TW':
|
|
1794
|
+
system_instruction = "CRITICAL: You MUST respond entirely in Traditional Chinese (繁體中文). Use Chinese characters (漢字), NOT pinyin romanization. All explanations, descriptions, and responses must be in Chinese characters."
|
|
1795
|
+
|
|
1895
1796
|
# Build request with API context as separate field
|
|
1896
1797
|
payload = {
|
|
1897
1798
|
"query": query, # Keep query clean
|
|
@@ -1899,7 +1800,9 @@ class EnhancedNocturnalAgent:
|
|
|
1899
1800
|
"api_context": api_results, # Send API results separately
|
|
1900
1801
|
"model": "openai/gpt-oss-120b", # PRODUCTION: 120B - best test results
|
|
1901
1802
|
"temperature": 0.2, # Low temp for accuracy
|
|
1902
|
-
"max_tokens": 4000
|
|
1803
|
+
"max_tokens": 4000,
|
|
1804
|
+
"language": language, # Pass language preference
|
|
1805
|
+
"system_instruction": system_instruction if system_instruction else None # Only include if set
|
|
1903
1806
|
}
|
|
1904
1807
|
|
|
1905
1808
|
# Call backend
|
|
@@ -1931,10 +1834,9 @@ class EnhancedNocturnalAgent:
|
|
|
1931
1834
|
elif response.status == 503:
|
|
1932
1835
|
# Backend AI service temporarily unavailable (Cerebras/Groq rate limited)
|
|
1933
1836
|
# Auto-retry silently with exponential backoff
|
|
1934
|
-
|
|
1837
|
+
|
|
1935
1838
|
print("\n💭 Thinking... (backend is busy, retrying automatically)")
|
|
1936
|
-
|
|
1937
|
-
import asyncio
|
|
1839
|
+
|
|
1938
1840
|
retry_delays = [5, 15, 30] # Exponential backoff
|
|
1939
1841
|
|
|
1940
1842
|
for retry_num, delay in enumerate(retry_delays):
|
|
@@ -2440,11 +2342,64 @@ class EnhancedNocturnalAgent:
|
|
|
2440
2342
|
break
|
|
2441
2343
|
|
|
2442
2344
|
output = '\n'.join(output_lines).strip()
|
|
2345
|
+
debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
|
|
2346
|
+
|
|
2347
|
+
# Log execution details in debug mode
|
|
2348
|
+
if debug_mode:
|
|
2349
|
+
output_preview = output[:200] if output else "(no output)"
|
|
2350
|
+
print(f"✅ Command executed: {command}")
|
|
2351
|
+
print(f"📤 Output ({len(output)} chars): {output_preview}...")
|
|
2352
|
+
|
|
2443
2353
|
return output if output else "Command executed (no output)"
|
|
2444
2354
|
|
|
2445
2355
|
except Exception as e:
|
|
2356
|
+
debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
|
|
2357
|
+
if debug_mode:
|
|
2358
|
+
print(f"❌ Command failed: {command}")
|
|
2359
|
+
print(f"❌ Error: {e}")
|
|
2446
2360
|
return f"ERROR: {e}"
|
|
2447
2361
|
|
|
2362
|
+
def _format_shell_output(self, output: str, command: str) -> Dict[str, Any]:
|
|
2363
|
+
"""
|
|
2364
|
+
Format shell command output for display.
|
|
2365
|
+
Returns dictionary with formatted preview and full output.
|
|
2366
|
+
"""
|
|
2367
|
+
lines = output.split('\n') if output else []
|
|
2368
|
+
|
|
2369
|
+
# Detect output type based on command
|
|
2370
|
+
command_lower = command.lower()
|
|
2371
|
+
|
|
2372
|
+
formatted = {
|
|
2373
|
+
"type": "shell_output",
|
|
2374
|
+
"command": command,
|
|
2375
|
+
"line_count": len(lines),
|
|
2376
|
+
"byte_count": len(output),
|
|
2377
|
+
"preview": '\n'.join(lines[:10]) if lines else "(no output)",
|
|
2378
|
+
"full_output": output
|
|
2379
|
+
}
|
|
2380
|
+
|
|
2381
|
+
# Enhanced formatting based on command type
|
|
2382
|
+
if any(cmd in command_lower for cmd in ['ls', 'dir']):
|
|
2383
|
+
formatted["type"] = "directory_listing"
|
|
2384
|
+
formatted["preview"] = f"📁 Found {len([l for l in lines if l.strip()])} items"
|
|
2385
|
+
elif any(cmd in command_lower for cmd in ['find', 'locate', 'search']):
|
|
2386
|
+
formatted["type"] = "search_results"
|
|
2387
|
+
formatted["preview"] = f"🔍 Found {len([l for l in lines if l.strip()])} matches"
|
|
2388
|
+
elif any(cmd in command_lower for cmd in ['grep', 'match']):
|
|
2389
|
+
formatted["type"] = "search_results"
|
|
2390
|
+
formatted["preview"] = f"🔍 Found {len([l for l in lines if l.strip()])} matching lines"
|
|
2391
|
+
elif any(cmd in command_lower for cmd in ['cat', 'head', 'tail']):
|
|
2392
|
+
formatted["type"] = "file_content"
|
|
2393
|
+
formatted["preview"] = f"📄 {len(lines)} lines of content"
|
|
2394
|
+
elif any(cmd in command_lower for cmd in ['pwd', 'cd']):
|
|
2395
|
+
formatted["type"] = "directory_change"
|
|
2396
|
+
formatted["preview"] = f"📍 {output.strip()}"
|
|
2397
|
+
elif any(cmd in command_lower for cmd in ['mkdir', 'touch', 'create']):
|
|
2398
|
+
formatted["type"] = "file_creation"
|
|
2399
|
+
formatted["preview"] = f"✨ Created: {output.strip()}"
|
|
2400
|
+
|
|
2401
|
+
return formatted
|
|
2402
|
+
|
|
2448
2403
|
# ========================================================================
|
|
2449
2404
|
# DIRECT FILE OPERATIONS (Claude Code / Cursor Parity)
|
|
2450
2405
|
# ========================================================================
|
|
@@ -3395,8 +3350,11 @@ class EnhancedNocturnalAgent:
|
|
|
3395
3350
|
'what files', 'which files', 'how many files',
|
|
3396
3351
|
'grep', 'search', 'look for', 'count',
|
|
3397
3352
|
'.py', '.txt', '.js', '.java', '.cpp', '.c', '.h',
|
|
3398
|
-
'function', 'class', 'definition', 'route', 'endpoint',
|
|
3399
|
-
'codebase', 'project structure', 'source code'
|
|
3353
|
+
'function', 'method', 'class', 'definition', 'route', 'endpoint',
|
|
3354
|
+
'codebase', 'project structure', 'source code', 'implementation',
|
|
3355
|
+
'compare', 'analyze', 'explain', 'purpose', 'what does', 'how does',
|
|
3356
|
+
'this codebase', 'this repo', 'this repository', 'this project',
|
|
3357
|
+
'our codebase', 'our repo', 'local code', 'local files'
|
|
3400
3358
|
]
|
|
3401
3359
|
|
|
3402
3360
|
question_lower = question.lower()
|
|
@@ -3466,12 +3424,17 @@ class EnhancedNocturnalAgent:
|
|
|
3466
3424
|
matched_types.append("financial")
|
|
3467
3425
|
apis_to_use.append("finsight")
|
|
3468
3426
|
|
|
3469
|
-
|
|
3427
|
+
# Check for explicit local/codebase indicators FIRST (highest priority)
|
|
3428
|
+
local_indicators = ['this codebase', 'this repo', 'this repository', 'this project',
|
|
3429
|
+
'our codebase', 'our repo', 'local code', 'local files']
|
|
3430
|
+
is_local_query = any(indicator in question_lower for indicator in local_indicators)
|
|
3431
|
+
|
|
3432
|
+
if any(keyword in question_lower for keyword in research_keywords) and not is_local_query:
|
|
3470
3433
|
matched_types.append("research")
|
|
3471
3434
|
apis_to_use.append("archive")
|
|
3472
|
-
|
|
3435
|
+
|
|
3473
3436
|
# Qualitative queries often involve research
|
|
3474
|
-
if analysis_mode in ("qualitative", "mixed") and "research" not in matched_types:
|
|
3437
|
+
if analysis_mode in ("qualitative", "mixed") and "research" not in matched_types and not is_local_query:
|
|
3475
3438
|
matched_types.append("research")
|
|
3476
3439
|
if "archive" not in apis_to_use:
|
|
3477
3440
|
apis_to_use.append("archive")
|
|
@@ -3555,10 +3518,59 @@ class EnhancedNocturnalAgent:
|
|
|
3555
3518
|
if workflow_response:
|
|
3556
3519
|
return workflow_response
|
|
3557
3520
|
|
|
3521
|
+
# Detect and store language preference from user input
|
|
3522
|
+
self._detect_language_preference(request.question)
|
|
3523
|
+
|
|
3558
3524
|
# Initialize
|
|
3559
3525
|
api_results = {}
|
|
3560
3526
|
tools_used = []
|
|
3561
3527
|
debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
|
|
3528
|
+
|
|
3529
|
+
if self._is_generic_test_prompt(request.question):
|
|
3530
|
+
return self._quick_reply(
|
|
3531
|
+
request,
|
|
3532
|
+
"Looks like you're just testing. Let me know what you'd like me to dig into and I'll jump on it.",
|
|
3533
|
+
tools_used=["quick_reply"],
|
|
3534
|
+
confidence=0.4,
|
|
3535
|
+
)
|
|
3536
|
+
|
|
3537
|
+
if self._is_location_query(request.question):
|
|
3538
|
+
cwd_line = ""
|
|
3539
|
+
tools: List[str] = []
|
|
3540
|
+
|
|
3541
|
+
if self.shell_session:
|
|
3542
|
+
pwd_output = self.execute_command("pwd")
|
|
3543
|
+
if pwd_output and not pwd_output.startswith("ERROR"):
|
|
3544
|
+
cwd_line = pwd_output.strip().splitlines()[-1]
|
|
3545
|
+
tools.append("shell_execution")
|
|
3546
|
+
|
|
3547
|
+
if not cwd_line:
|
|
3548
|
+
try:
|
|
3549
|
+
cwd_line = os.getcwd()
|
|
3550
|
+
except Exception:
|
|
3551
|
+
cwd_line = ""
|
|
3552
|
+
|
|
3553
|
+
if cwd_line:
|
|
3554
|
+
self.file_context["current_cwd"] = cwd_line
|
|
3555
|
+
self.file_context["last_directory"] = cwd_line
|
|
3556
|
+
message = (
|
|
3557
|
+
f"We're in {cwd_line}."
|
|
3558
|
+
if "shell_execution" not in tools
|
|
3559
|
+
else f"We're in {cwd_line} (via `pwd`)."
|
|
3560
|
+
)
|
|
3561
|
+
return self._quick_reply(
|
|
3562
|
+
request,
|
|
3563
|
+
message,
|
|
3564
|
+
tools_used=tools or ["quick_reply"],
|
|
3565
|
+
confidence=0.85,
|
|
3566
|
+
)
|
|
3567
|
+
else:
|
|
3568
|
+
return self._quick_reply(
|
|
3569
|
+
request,
|
|
3570
|
+
"I couldn't determine the working directory just now, but you can run `pwd` to double-check.",
|
|
3571
|
+
tools_used=tools or ["quick_reply"],
|
|
3572
|
+
confidence=0.3,
|
|
3573
|
+
)
|
|
3562
3574
|
|
|
3563
3575
|
# ========================================================================
|
|
3564
3576
|
# PRIORITY 1: SHELL PLANNING (Reasoning Layer - Runs FIRST for ALL modes)
|
|
@@ -3575,7 +3587,9 @@ class EnhancedNocturnalAgent:
|
|
|
3575
3587
|
'directory', 'folder', 'where', 'find', 'list', 'files', 'file', 'look', 'search', 'check', 'into',
|
|
3576
3588
|
'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb',
|
|
3577
3589
|
'create', 'make', 'mkdir', 'touch', 'new', 'write', 'copy', 'move', 'delete', 'remove',
|
|
3578
|
-
'git', 'grep', 'navigate', 'go to', 'change to'
|
|
3590
|
+
'git', 'grep', 'navigate', 'go to', 'change to',
|
|
3591
|
+
'method', 'function', 'class', 'implementation', 'what does', 'how does', 'explain',
|
|
3592
|
+
'how many', 'count', 'lines', 'wc -l', 'number of'
|
|
3579
3593
|
])
|
|
3580
3594
|
|
|
3581
3595
|
if might_need_shell and self.shell_session:
|
|
@@ -3620,6 +3634,11 @@ IMPORTANT RULES:
|
|
|
3620
3634
|
11. 🚨 MULTI-STEP QUERIES: For queries like "read X and do Y", ONLY generate the FIRST step (reading X). The LLM will handle subsequent steps after seeing the file contents.
|
|
3621
3635
|
12. 🚨 NEVER use python -m py_compile or other code execution for finding bugs - just read the file with cat/head
|
|
3622
3636
|
13. 🚨 FOR GREP: When searching in a DIRECTORY (not a specific file), ALWAYS use -r flag for recursive search: grep -rn 'pattern' /path/to/dir 2>/dev/null
|
|
3637
|
+
14. 🚨 FOR FINDING FUNCTIONS/METHODS when file path is UNKNOWN: Use find + grep together:
|
|
3638
|
+
- "what does X method do in file.py?" → find . -name 'file.py' -exec grep -A 50 'def X' {{}} \\; 2>/dev/null
|
|
3639
|
+
- "explain process_request in agent.py" → find . -name '*agent.py' -exec grep -A 80 'def process_request' {{}} \\; 2>/dev/null
|
|
3640
|
+
- If you know exact path, use grep directly: grep -A 50 'def X' path/to/file.py 2>/dev/null
|
|
3641
|
+
15. 🚨 FOR COMPARING FILES: Read FIRST file only. The LLM will request the second file after analyzing the first.
|
|
3623
3642
|
|
|
3624
3643
|
Examples:
|
|
3625
3644
|
"where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
|
|
@@ -3638,6 +3657,10 @@ Examples:
|
|
|
3638
3657
|
"find all bugs in code" → {{"action": "execute", "command": "grep -rn 'BUG:' . 2>/dev/null", "reason": "Search for bug markers in code", "updates_context": false}}
|
|
3639
3658
|
"read analyze.py and find bugs" → {{"action": "execute", "command": "head -200 analyze.py", "reason": "Read file to analyze bugs", "updates_context": false}}
|
|
3640
3659
|
"show me calc.py completely" → {{"action": "execute", "command": "cat calc.py", "reason": "Display entire file", "updates_context": false}}
|
|
3660
|
+
"what does process_request method do in enhanced_ai_agent.py" → {{"action": "execute", "command": "find . -name '*enhanced_ai_agent.py' -exec grep -A 80 'def process_request' {{}} \\; 2>/dev/null", "reason": "Find file and show method definition with context", "updates_context": false}}
|
|
3661
|
+
"explain the initialize method in agent.py" → {{"action": "execute", "command": "find . -name '*agent.py' -exec grep -A 50 'def initialize' {{}} \\; 2>/dev/null", "reason": "Find file and show method", "updates_context": false}}
|
|
3662
|
+
"find calculate function in utils.py" → {{"action": "execute", "command": "find . -name 'utils.py' -exec grep -A 30 'def calculate' {{}} \\; 2>/dev/null", "reason": "Find file and show function", "updates_context": false}}
|
|
3663
|
+
"compare file1.py and file2.py" → {{"action": "execute", "command": "head -100 file1.py", "reason": "Read first file (will read second in next step)", "updates_context": true}}
|
|
3641
3664
|
"git status" → {{"action": "execute", "command": "git status", "reason": "Check repository status", "updates_context": false}}
|
|
3642
3665
|
"what's in that file?" + last_file=data.csv → {{"action": "execute", "command": "head -100 data.csv", "reason": "Show file contents", "updates_context": false}}
|
|
3643
3666
|
"hello" → {{"action": "none", "reason": "Conversational greeting, no command needed"}}
|
|
@@ -3682,7 +3705,9 @@ JSON:"""
|
|
|
3682
3705
|
reason = plan.get("reason", "")
|
|
3683
3706
|
updates_context = plan.get("updates_context", False)
|
|
3684
3707
|
|
|
3685
|
-
|
|
3708
|
+
# Only show planning details with explicit verbose flag (don't leak to users)
|
|
3709
|
+
verbose_planning = debug_mode and os.getenv("NOCTURNAL_VERBOSE_PLANNING", "").lower() == "1"
|
|
3710
|
+
if verbose_planning:
|
|
3686
3711
|
print(f"🔍 SHELL PLAN: {plan}")
|
|
3687
3712
|
|
|
3688
3713
|
# GENERIC COMMAND EXECUTION - No more hardcoded actions!
|
|
@@ -3690,13 +3715,13 @@ JSON:"""
|
|
|
3690
3715
|
command = self._infer_shell_command(request.question)
|
|
3691
3716
|
shell_action = "execute"
|
|
3692
3717
|
updates_context = False
|
|
3693
|
-
if
|
|
3718
|
+
if verbose_planning:
|
|
3694
3719
|
print(f"🔄 Planner opted out; inferred fallback command: {command}")
|
|
3695
3720
|
|
|
3696
3721
|
if shell_action == "execute" and not command:
|
|
3697
3722
|
command = self._infer_shell_command(request.question)
|
|
3698
3723
|
plan["command"] = command
|
|
3699
|
-
if
|
|
3724
|
+
if verbose_planning:
|
|
3700
3725
|
print(f"🔄 Planner omitted command, inferred {command}")
|
|
3701
3726
|
|
|
3702
3727
|
if shell_action == "execute" and command:
|
|
@@ -3712,10 +3737,15 @@ JSON:"""
|
|
|
3712
3737
|
print(f"🔍 Command: {command}")
|
|
3713
3738
|
print(f"🔍 Safety: {safety_level}")
|
|
3714
3739
|
|
|
3715
|
-
if safety_level
|
|
3740
|
+
if safety_level in ('BLOCKED', 'DANGEROUS'):
|
|
3741
|
+
reason = (
|
|
3742
|
+
"Command classified as destructive; requires manual confirmation"
|
|
3743
|
+
if safety_level == 'DANGEROUS'
|
|
3744
|
+
else "This command could cause system damage"
|
|
3745
|
+
)
|
|
3716
3746
|
api_results["shell_info"] = {
|
|
3717
3747
|
"error": f"Command blocked for safety: {command}",
|
|
3718
|
-
"reason":
|
|
3748
|
+
"reason": reason
|
|
3719
3749
|
}
|
|
3720
3750
|
else:
|
|
3721
3751
|
# ========================================
|
|
@@ -3768,7 +3798,8 @@ JSON:"""
|
|
|
3768
3798
|
pass # Fall back to shell execution
|
|
3769
3799
|
|
|
3770
3800
|
# Check for file search commands (find)
|
|
3771
|
-
|
|
3801
|
+
# BUT: Don't intercept find -exec commands (those need real shell execution)
|
|
3802
|
+
if not intercepted and 'find' in command and '-name' in command and '-exec' not in command:
|
|
3772
3803
|
try:
|
|
3773
3804
|
# import re removed - using module-level import
|
|
3774
3805
|
# Extract pattern: find ... -name '*pattern*'
|
|
@@ -3947,10 +3978,12 @@ JSON:"""
|
|
|
3947
3978
|
output = self.execute_command(command)
|
|
3948
3979
|
|
|
3949
3980
|
if not output.startswith("ERROR"):
|
|
3950
|
-
# Success - store results
|
|
3981
|
+
# Success - store results with formatted preview
|
|
3982
|
+
formatted_output = self._format_shell_output(output, command)
|
|
3951
3983
|
api_results["shell_info"] = {
|
|
3952
3984
|
"command": command,
|
|
3953
3985
|
"output": output,
|
|
3986
|
+
"formatted": formatted_output, # Add formatted version
|
|
3954
3987
|
"reason": reason,
|
|
3955
3988
|
"safety_level": safety_level
|
|
3956
3989
|
}
|
|
@@ -4145,16 +4178,14 @@ JSON:"""
|
|
|
4145
4178
|
if not is_vague:
|
|
4146
4179
|
# Archive API for research
|
|
4147
4180
|
if "archive" in request_analysis.get("apis", []):
|
|
4148
|
-
result = await self.search_academic_papers(request.question,
|
|
4181
|
+
result = await self.search_academic_papers(request.question, 5) # Get 5 papers for comprehensive review
|
|
4149
4182
|
if "error" not in result:
|
|
4150
|
-
#
|
|
4183
|
+
# KEEP abstracts for literature review - essential for paper understanding
|
|
4184
|
+
# Only remove full_text to save tokens
|
|
4151
4185
|
if "results" in result:
|
|
4152
4186
|
for paper in result["results"]:
|
|
4153
|
-
# Remove
|
|
4154
|
-
|
|
4155
|
-
paper.pop("tldr", None)
|
|
4156
|
-
paper.pop("full_text", None)
|
|
4157
|
-
# Keep only: title, authors, year, doi, url
|
|
4187
|
+
paper.pop("full_text", None) # Remove only full text, keep abstract & tldr
|
|
4188
|
+
# Keep: title, authors, year, doi, url, abstract, tldr
|
|
4158
4189
|
api_results["research"] = result
|
|
4159
4190
|
tools_used.append("archive_api")
|
|
4160
4191
|
|
|
@@ -4316,6 +4347,40 @@ JSON:"""
|
|
|
4316
4347
|
api_results=api_results,
|
|
4317
4348
|
tools_used=tools_used
|
|
4318
4349
|
)
|
|
4350
|
+
|
|
4351
|
+
# VALIDATION: Ensure we got a valid response (not planning JSON)
|
|
4352
|
+
if not response or not hasattr(response, 'response'):
|
|
4353
|
+
# Backend failed - create friendly error with available data
|
|
4354
|
+
if debug_mode:
|
|
4355
|
+
print(f"⚠️ Backend response invalid or missing")
|
|
4356
|
+
return ChatResponse(
|
|
4357
|
+
response="I ran into a technical issue processing that. Let me try to help with what I found:",
|
|
4358
|
+
error_message="Backend response invalid",
|
|
4359
|
+
tools_used=tools_used,
|
|
4360
|
+
api_results=api_results
|
|
4361
|
+
)
|
|
4362
|
+
|
|
4363
|
+
# Check if response contains planning JSON instead of final answer
|
|
4364
|
+
response_text = response.response.strip()
|
|
4365
|
+
if response_text.startswith('{') and '"action"' in response_text and '"command"' in response_text:
|
|
4366
|
+
# This is planning JSON, not a final response!
|
|
4367
|
+
if debug_mode:
|
|
4368
|
+
print(f"⚠️ Backend returned planning JSON instead of final response")
|
|
4369
|
+
|
|
4370
|
+
# Extract real output from api_results and generate friendly response
|
|
4371
|
+
shell_output = api_results.get('shell_info', {}).get('output', '')
|
|
4372
|
+
if shell_output:
|
|
4373
|
+
return ChatResponse(
|
|
4374
|
+
response=f"I found what you were looking for:\n\n{shell_output}",
|
|
4375
|
+
tools_used=tools_used,
|
|
4376
|
+
api_results=api_results
|
|
4377
|
+
)
|
|
4378
|
+
else:
|
|
4379
|
+
return ChatResponse(
|
|
4380
|
+
response=f"I completed the action: {api_results.get('shell_info', {}).get('command', '')}",
|
|
4381
|
+
tools_used=tools_used,
|
|
4382
|
+
api_results=api_results
|
|
4383
|
+
)
|
|
4319
4384
|
|
|
4320
4385
|
# POST-PROCESSING: Auto-extract code blocks and write files if user requested file creation
|
|
4321
4386
|
# This fixes the issue where LLM shows corrected code but doesn't create the file
|
|
@@ -4459,6 +4524,16 @@ JSON:"""
|
|
|
4459
4524
|
mentioned = _extract_filenames(request.question)
|
|
4460
4525
|
file_previews: List[Dict[str, Any]] = []
|
|
4461
4526
|
files_forbidden: List[str] = []
|
|
4527
|
+
|
|
4528
|
+
# Check if query is asking about specific functions/methods/classes OR file metadata
|
|
4529
|
+
# If so, SKIP auto-preview and let shell planning handle it
|
|
4530
|
+
query_lower = request.question.lower()
|
|
4531
|
+
asking_about_code_element = any(pattern in query_lower for pattern in [
|
|
4532
|
+
'method', 'function', 'class', 'def ', 'what does', 'how does',
|
|
4533
|
+
'explain the', 'find the', 'show me the', 'purpose of', 'implementation of',
|
|
4534
|
+
'how many lines', 'count lines', 'number of lines', 'wc -l', 'line count'
|
|
4535
|
+
])
|
|
4536
|
+
|
|
4462
4537
|
base_dir = Path.cwd().resolve()
|
|
4463
4538
|
sensitive_roots = {Path('/etc'), Path('/proc'), Path('/sys'), Path('/dev'), Path('/root'), Path('/usr'), Path('/bin'), Path('/sbin'), Path('/var')}
|
|
4464
4539
|
def _is_safe_path(path_str: str) -> bool:
|
|
@@ -4469,31 +4544,47 @@ JSON:"""
|
|
|
4469
4544
|
return str(rp).startswith(str(base_dir))
|
|
4470
4545
|
except Exception:
|
|
4471
4546
|
return False
|
|
4472
|
-
|
|
4473
|
-
|
|
4474
|
-
|
|
4475
|
-
|
|
4476
|
-
|
|
4477
|
-
|
|
4478
|
-
|
|
4547
|
+
|
|
4548
|
+
# Only auto-preview if NOT asking about specific code elements
|
|
4549
|
+
if not asking_about_code_element:
|
|
4550
|
+
for m in mentioned:
|
|
4551
|
+
if not _is_safe_path(m):
|
|
4552
|
+
files_forbidden.append(m)
|
|
4553
|
+
continue
|
|
4554
|
+
pr = await self._preview_file(m)
|
|
4555
|
+
# Only add successful previews (not errors)
|
|
4556
|
+
if pr and pr.get("type") != "error":
|
|
4557
|
+
file_previews.append(pr)
|
|
4558
|
+
else:
|
|
4559
|
+
# Query is about specific code elements - let shell planning handle with grep
|
|
4560
|
+
files_forbidden = [m for m in mentioned if not _is_safe_path(m)]
|
|
4479
4561
|
if file_previews:
|
|
4480
4562
|
api_results["files"] = file_previews
|
|
4481
|
-
#
|
|
4563
|
+
tools_used.append("read_file") # Track that files were read
|
|
4564
|
+
# Build grounded context from ALL text previews (for comparisons)
|
|
4482
4565
|
text_previews = [fp for fp in file_previews if fp.get("type") == "text" and fp.get("preview")]
|
|
4483
4566
|
files_context = ""
|
|
4484
4567
|
if text_previews:
|
|
4485
|
-
|
|
4486
|
-
|
|
4487
|
-
|
|
4568
|
+
# Detect comparison queries - include MORE context
|
|
4569
|
+
is_comparison = len(text_previews) > 1 or any(word in request.question.lower() for word in ['compare', 'difference', 'contrast', 'vs', 'versus'])
|
|
4570
|
+
line_limit = 200 if is_comparison else 100 # More lines for comparisons
|
|
4571
|
+
|
|
4572
|
+
# Include all files with appropriate context
|
|
4573
|
+
file_contexts = []
|
|
4574
|
+
for fp in text_previews:
|
|
4575
|
+
quoted = "\n".join(fp["preview"].splitlines()[:line_limit])
|
|
4576
|
+
file_contexts.append(f"File: {fp['path']}\n{quoted}")
|
|
4577
|
+
files_context = "\n\n---\n\n".join(file_contexts)
|
|
4488
4578
|
api_results["files_context"] = files_context
|
|
4489
|
-
elif mentioned:
|
|
4490
|
-
# Mentioned files but none found
|
|
4579
|
+
elif mentioned and not asking_about_code_element:
|
|
4580
|
+
# Mentioned files but none found (only set if we actually tried to preview them)
|
|
4491
4581
|
api_results["files_missing"] = mentioned
|
|
4492
4582
|
if files_forbidden:
|
|
4493
4583
|
api_results["files_forbidden"] = files_forbidden
|
|
4494
4584
|
|
|
4495
4585
|
workspace_listing: Optional[Dict[str, Any]] = None
|
|
4496
|
-
if
|
|
4586
|
+
# Only show workspace listing if NOT looking for specific missing files
|
|
4587
|
+
if not file_previews and not api_results.get("files_missing"):
|
|
4497
4588
|
file_browse_keywords = (
|
|
4498
4589
|
"list files",
|
|
4499
4590
|
"show files",
|
|
@@ -4513,7 +4604,8 @@ JSON:"""
|
|
|
4513
4604
|
workspace_listing = await self._get_workspace_listing()
|
|
4514
4605
|
api_results["workspace_listing"] = workspace_listing
|
|
4515
4606
|
|
|
4516
|
-
if
|
|
4607
|
+
# Don't show workspace listing if there are missing files (prioritize error)
|
|
4608
|
+
if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"} and not api_results.get("files_missing"):
|
|
4517
4609
|
return self._respond_with_workspace_listing(request, workspace_listing)
|
|
4518
4610
|
|
|
4519
4611
|
if "finsight" in request_analysis["apis"]:
|
|
@@ -4564,10 +4656,64 @@ JSON:"""
|
|
|
4564
4656
|
messages = [
|
|
4565
4657
|
{"role": "system", "content": system_prompt}
|
|
4566
4658
|
]
|
|
4659
|
+
|
|
4660
|
+
# CRITICAL: Inject research papers IMMEDIATELY after system prompt (highest priority)
|
|
4661
|
+
research_data = api_results.get("research")
|
|
4662
|
+
if research_data and research_data.get("results"):
|
|
4663
|
+
papers_text = "🚨 PAPERS ALREADY FOUND - SYNTHESIZE THESE NOW:\n\n"
|
|
4664
|
+
papers_text += "DO NOT say 'we will search' - the search is COMPLETE.\n"
|
|
4665
|
+
papers_text += "DO NOT say 'attempting' - papers are ALREADY HERE.\n"
|
|
4666
|
+
papers_text += "YOUR JOB: Synthesize these papers into a comprehensive literature review (500+ words).\n\n"
|
|
4667
|
+
|
|
4668
|
+
for i, paper in enumerate(research_data["results"][:5], 1):
|
|
4669
|
+
papers_text += f"\n═══ PAPER {i} ═══\n"
|
|
4670
|
+
papers_text += f"Title: {paper.get('title', 'No title')}\n"
|
|
4671
|
+
# Handle authors as either list of dicts or list of strings
|
|
4672
|
+
authors = paper.get('authors', [])
|
|
4673
|
+
if authors:
|
|
4674
|
+
if isinstance(authors[0], dict):
|
|
4675
|
+
author_names = [a.get('name', 'Unknown') for a in authors[:3]]
|
|
4676
|
+
else:
|
|
4677
|
+
author_names = authors[:3]
|
|
4678
|
+
papers_text += f"Authors: {', '.join(author_names)}\n"
|
|
4679
|
+
papers_text += f"Year: {paper.get('year', 'N/A')}\n"
|
|
4680
|
+
if paper.get('abstract'):
|
|
4681
|
+
papers_text += f"\nAbstract:\n{paper['abstract']}\n"
|
|
4682
|
+
if paper.get('tldr'):
|
|
4683
|
+
papers_text += f"\nTL;DR: {paper['tldr']}\n"
|
|
4684
|
+
papers_text += "\n"
|
|
4685
|
+
|
|
4686
|
+
papers_text += "\n🚨 SYNTHESIZE THESE PAPERS NOW - Include:\n"
|
|
4687
|
+
papers_text += "- Overview of the research area\n"
|
|
4688
|
+
papers_text += "- Key findings from each paper's abstract\n"
|
|
4689
|
+
papers_text += "- Methods and approaches used\n"
|
|
4690
|
+
papers_text += "- Comparison and contrast of different approaches\n"
|
|
4691
|
+
papers_text += "- Implications and future directions\n"
|
|
4692
|
+
papers_text += "\nMINIMUM 500 WORDS. Use the abstracts above."
|
|
4693
|
+
|
|
4694
|
+
messages.append({"role": "system", "content": papers_text})
|
|
4695
|
+
|
|
4567
4696
|
# If we have file context, inject it as an additional grounding message
|
|
4568
4697
|
fc = api_results.get("files_context")
|
|
4569
4698
|
if fc:
|
|
4570
|
-
|
|
4699
|
+
# Count how many files are being compared
|
|
4700
|
+
file_count = len([fp for fp in api_results.get("files", []) if fp.get("type") == "text"])
|
|
4701
|
+
|
|
4702
|
+
if file_count > 1:
|
|
4703
|
+
# Multi-file comparison - make it VERY explicit
|
|
4704
|
+
comparison_msg = "🚨 MULTIPLE FILES PROVIDED FOR COMPARISON:\n\n"
|
|
4705
|
+
comparison_msg += fc
|
|
4706
|
+
comparison_msg += "\n\n🚨 CRITICAL INSTRUCTIONS FOR COMPARISON:\n"
|
|
4707
|
+
comparison_msg += "1. Read ALL file contents above carefully\n"
|
|
4708
|
+
comparison_msg += "2. Extract specific data points, numbers, percentages from EACH file\n"
|
|
4709
|
+
comparison_msg += "3. Compare and contrast the ACTUAL content (not just filenames)\n"
|
|
4710
|
+
comparison_msg += "4. If asked about differences, cite EXACT lines or values from BOTH files\n"
|
|
4711
|
+
comparison_msg += "5. Do NOT make general statements - be specific with examples from the files\n"
|
|
4712
|
+
comparison_msg += "\nAnswer based STRICTLY on the file contents above. Do not run shell commands."
|
|
4713
|
+
messages.append({"role": "system", "content": comparison_msg})
|
|
4714
|
+
else:
|
|
4715
|
+
# Single file - normal handling
|
|
4716
|
+
messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}\n\nAnswer based strictly on this content when relevant. Do not run shell commands."})
|
|
4571
4717
|
missing = api_results.get("files_missing")
|
|
4572
4718
|
if missing:
|
|
4573
4719
|
messages.append({"role": "system", "content": f"User mentioned file(s) not found: {missing}. Respond explicitly that the file was not found and avoid speculation."})
|
|
@@ -4790,6 +4936,92 @@ JSON:"""
|
|
|
4790
4936
|
final_response = "I searched but found no matches. The search returned no results."
|
|
4791
4937
|
logger.warning("🚨 Hallucination prevented: LLM tried to make up results when shell output was empty")
|
|
4792
4938
|
|
|
4939
|
+
# ========================================
|
|
4940
|
+
# PHASE 2: THINKING BLOCKS
|
|
4941
|
+
# Show reasoning process for complex queries
|
|
4942
|
+
# ========================================
|
|
4943
|
+
thinking_text = ""
|
|
4944
|
+
try:
|
|
4945
|
+
thinking_context = {
|
|
4946
|
+
'tools_used': tools_used,
|
|
4947
|
+
'api_results': api_results,
|
|
4948
|
+
'conversation_history': self.conversation_history[-3:] if self.conversation_history else []
|
|
4949
|
+
}
|
|
4950
|
+
|
|
4951
|
+
thinking_text = await generate_and_format_thinking(
|
|
4952
|
+
request.question,
|
|
4953
|
+
thinking_context,
|
|
4954
|
+
show_full=False # Compact version
|
|
4955
|
+
)
|
|
4956
|
+
|
|
4957
|
+
if thinking_text:
|
|
4958
|
+
logger.info(f"💭 Generated thinking process for query")
|
|
4959
|
+
|
|
4960
|
+
except Exception as e:
|
|
4961
|
+
logger.error(f"Thinking generation failed: {e}")
|
|
4962
|
+
|
|
4963
|
+
# ========================================
|
|
4964
|
+
# PHASE 1 QUALITY PIPELINE
|
|
4965
|
+
# Process response through quality improvements
|
|
4966
|
+
# ========================================
|
|
4967
|
+
try:
|
|
4968
|
+
pipeline_context = {
|
|
4969
|
+
'tools_used': tools_used,
|
|
4970
|
+
'api_results': api_results,
|
|
4971
|
+
'query_type': request_analysis.get('type'),
|
|
4972
|
+
'shell_output_type': 'generic'
|
|
4973
|
+
}
|
|
4974
|
+
|
|
4975
|
+
processed = await ResponsePipeline.process(
|
|
4976
|
+
final_response,
|
|
4977
|
+
request.question,
|
|
4978
|
+
pipeline_context,
|
|
4979
|
+
response_type="generic"
|
|
4980
|
+
)
|
|
4981
|
+
|
|
4982
|
+
final_response = processed.final_response
|
|
4983
|
+
|
|
4984
|
+
# Log quality improvements
|
|
4985
|
+
if processed.improvements_applied:
|
|
4986
|
+
logger.info(f"✨ Quality improvements: {', '.join(processed.improvements_applied)}")
|
|
4987
|
+
logger.info(f"📊 Quality score: {processed.quality_score:.2f}")
|
|
4988
|
+
|
|
4989
|
+
except Exception as e:
|
|
4990
|
+
# If pipeline fails, log but continue with original response
|
|
4991
|
+
logger.error(f"Quality pipeline failed: {e}, using original response")
|
|
4992
|
+
|
|
4993
|
+
# ========================================
|
|
4994
|
+
# PHASE 2: CONFIDENCE CALIBRATION
|
|
4995
|
+
# Assess confidence and add caveats if needed
|
|
4996
|
+
# ========================================
|
|
4997
|
+
try:
|
|
4998
|
+
confidence_context = {
|
|
4999
|
+
'tools_used': tools_used,
|
|
5000
|
+
'api_results': api_results,
|
|
5001
|
+
'query_type': request_analysis.get('type')
|
|
5002
|
+
}
|
|
5003
|
+
|
|
5004
|
+
final_response, confidence_assessment = assess_and_apply_caveat(
|
|
5005
|
+
final_response,
|
|
5006
|
+
request.question,
|
|
5007
|
+
confidence_context
|
|
5008
|
+
)
|
|
5009
|
+
|
|
5010
|
+
logger.info(
|
|
5011
|
+
f"🎯 Confidence: {confidence_assessment.confidence_level} "
|
|
5012
|
+
f"({confidence_assessment.confidence_score:.2f})"
|
|
5013
|
+
)
|
|
5014
|
+
|
|
5015
|
+
if confidence_assessment.should_add_caveat:
|
|
5016
|
+
logger.info(f"⚠️ Added caveat due to low confidence")
|
|
5017
|
+
|
|
5018
|
+
except Exception as e:
|
|
5019
|
+
logger.error(f"Confidence calibration failed: {e}")
|
|
5020
|
+
|
|
5021
|
+
# Prepend thinking blocks if generated
|
|
5022
|
+
if thinking_text:
|
|
5023
|
+
final_response = thinking_text + "\n\n" + final_response
|
|
5024
|
+
|
|
4793
5025
|
expected_tools: Set[str] = set()
|
|
4794
5026
|
if "finsight" in request_analysis.get("apis", []):
|
|
4795
5027
|
expected_tools.add("finsight_api")
|
|
@@ -4825,20 +5057,25 @@ JSON:"""
|
|
|
4825
5057
|
|
|
4826
5058
|
except Exception as e:
|
|
4827
5059
|
import traceback
|
|
4828
|
-
details = str(e)
|
|
4829
5060
|
debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
|
|
4830
5061
|
if debug_mode:
|
|
4831
5062
|
print("🔴 FULL TRACEBACK:")
|
|
4832
5063
|
traceback.print_exc()
|
|
4833
|
-
|
|
4834
|
-
|
|
4835
|
-
|
|
4836
|
-
|
|
5064
|
+
|
|
5065
|
+
# ========================================
|
|
5066
|
+
# PHASE 1 GRACEFUL ERROR HANDLING
|
|
5067
|
+
# Never expose technical details to users
|
|
5068
|
+
# ========================================
|
|
5069
|
+
user_friendly_message = GracefulErrorHandler.create_fallback_response(
|
|
5070
|
+
request.question,
|
|
5071
|
+
e
|
|
5072
|
+
)
|
|
5073
|
+
|
|
4837
5074
|
return ChatResponse(
|
|
4838
|
-
response=
|
|
5075
|
+
response=user_friendly_message,
|
|
4839
5076
|
timestamp=datetime.now().isoformat(),
|
|
4840
5077
|
confidence_score=0.0,
|
|
4841
|
-
error_message=
|
|
5078
|
+
error_message=str(e) if debug_mode else None # Only include technical error in debug mode
|
|
4842
5079
|
)
|
|
4843
5080
|
|
|
4844
5081
|
async def process_request_streaming(self, request: ChatRequest):
|
|
@@ -4921,9 +5158,19 @@ JSON:"""
|
|
|
4921
5158
|
mentioned = _extract_filenames(request.question)
|
|
4922
5159
|
file_previews: List[Dict[str, Any]] = []
|
|
4923
5160
|
files_forbidden: List[str] = []
|
|
5161
|
+
|
|
5162
|
+
# Check if query is asking about specific functions/methods/classes OR file metadata
|
|
5163
|
+
# If so, SKIP auto-preview and let shell planning handle it
|
|
5164
|
+
query_lower = request.question.lower()
|
|
5165
|
+
asking_about_code_element = any(pattern in query_lower for pattern in [
|
|
5166
|
+
'method', 'function', 'class', 'def ', 'what does', 'how does',
|
|
5167
|
+
'explain the', 'find the', 'show me the', 'purpose of', 'implementation of',
|
|
5168
|
+
'how many lines', 'count lines', 'number of lines', 'wc -l', 'line count'
|
|
5169
|
+
])
|
|
5170
|
+
|
|
4924
5171
|
base_dir = Path.cwd().resolve()
|
|
4925
5172
|
sensitive_roots = {Path('/etc'), Path('/proc'), Path('/sys'), Path('/dev'), Path('/root'), Path('/usr'), Path('/bin'), Path('/sbin'), Path('/var')}
|
|
4926
|
-
|
|
5173
|
+
|
|
4927
5174
|
def _is_safe_path(path_str: str) -> bool:
|
|
4928
5175
|
try:
|
|
4929
5176
|
rp = Path(path_str).resolve()
|
|
@@ -4932,39 +5179,57 @@ JSON:"""
|
|
|
4932
5179
|
return str(rp).startswith(str(base_dir))
|
|
4933
5180
|
except Exception:
|
|
4934
5181
|
return False
|
|
4935
|
-
|
|
4936
|
-
|
|
4937
|
-
|
|
4938
|
-
|
|
4939
|
-
|
|
4940
|
-
|
|
4941
|
-
|
|
4942
|
-
|
|
4943
|
-
|
|
5182
|
+
|
|
5183
|
+
# Only auto-preview if NOT asking about specific code elements or metadata
|
|
5184
|
+
if not asking_about_code_element:
|
|
5185
|
+
for m in mentioned:
|
|
5186
|
+
if not _is_safe_path(m):
|
|
5187
|
+
files_forbidden.append(m)
|
|
5188
|
+
continue
|
|
5189
|
+
pr = await self._preview_file(m)
|
|
5190
|
+
# Only add successful previews (not errors)
|
|
5191
|
+
if pr and pr.get("type") != "error":
|
|
5192
|
+
file_previews.append(pr)
|
|
5193
|
+
else:
|
|
5194
|
+
# Query is about specific code elements - let shell planning handle with grep/wc
|
|
5195
|
+
files_forbidden = [m for m in mentioned if not _is_safe_path(m)]
|
|
5196
|
+
|
|
4944
5197
|
if file_previews:
|
|
4945
5198
|
api_results["files"] = file_previews
|
|
5199
|
+
tools_used.append("read_file") # Track that files were read
|
|
5200
|
+
# Build grounded context from ALL text previews (for comparisons)
|
|
4946
5201
|
text_previews = [fp for fp in file_previews if fp.get("type") == "text" and fp.get("preview")]
|
|
4947
5202
|
files_context = ""
|
|
4948
5203
|
if text_previews:
|
|
4949
|
-
|
|
4950
|
-
|
|
4951
|
-
|
|
5204
|
+
# Detect comparison queries - include MORE context
|
|
5205
|
+
is_comparison = len(text_previews) > 1 or any(word in request.question.lower() for word in ['compare', 'difference', 'contrast', 'vs', 'versus'])
|
|
5206
|
+
line_limit = 200 if is_comparison else 100 # More lines for comparisons
|
|
5207
|
+
|
|
5208
|
+
# Include all files with appropriate context
|
|
5209
|
+
file_contexts = []
|
|
5210
|
+
for fp in text_previews:
|
|
5211
|
+
quoted = "\n".join(fp["preview"].splitlines()[:line_limit])
|
|
5212
|
+
file_contexts.append(f"File: {fp['path']}\n{quoted}")
|
|
5213
|
+
files_context = "\n\n---\n\n".join(file_contexts)
|
|
4952
5214
|
api_results["files_context"] = files_context
|
|
4953
|
-
elif mentioned:
|
|
5215
|
+
elif mentioned and not asking_about_code_element:
|
|
5216
|
+
# Mentioned files but none found (only set if we actually tried to preview them)
|
|
4954
5217
|
api_results["files_missing"] = mentioned
|
|
4955
5218
|
if files_forbidden:
|
|
4956
5219
|
api_results["files_forbidden"] = files_forbidden
|
|
4957
5220
|
|
|
4958
5221
|
# Workspace listing
|
|
4959
5222
|
workspace_listing: Optional[Dict[str, Any]] = None
|
|
4960
|
-
if
|
|
5223
|
+
# Only show workspace listing if NOT looking for specific missing files
|
|
5224
|
+
if not file_previews and not api_results.get("files_missing"):
|
|
4961
5225
|
file_browse_keywords = ("list files", "show files", "what files")
|
|
4962
5226
|
describe_files = ("file" in question_lower or "directory" in question_lower)
|
|
4963
5227
|
if any(keyword in question_lower for keyword in file_browse_keywords) or describe_files:
|
|
4964
5228
|
workspace_listing = await self._get_workspace_listing()
|
|
4965
5229
|
api_results["workspace_listing"] = workspace_listing
|
|
4966
5230
|
|
|
4967
|
-
if
|
|
5231
|
+
# Don't show workspace listing if there are missing files (prioritize error)
|
|
5232
|
+
if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"} and not api_results.get("files_missing"):
|
|
4968
5233
|
result = self._respond_with_workspace_listing(request, workspace_listing)
|
|
4969
5234
|
async def workspace_gen():
|
|
4970
5235
|
yield result.response
|
|
@@ -4996,10 +5261,63 @@ JSON:"""
|
|
|
4996
5261
|
# Build messages
|
|
4997
5262
|
system_prompt = self._build_system_prompt(request_analysis, memory_context, api_results)
|
|
4998
5263
|
messages = [{"role": "system", "content": system_prompt}]
|
|
4999
|
-
|
|
5264
|
+
|
|
5265
|
+
# CRITICAL: Inject research papers IMMEDIATELY after system prompt (highest priority)
|
|
5266
|
+
research_data = api_results.get("research")
|
|
5267
|
+
if research_data and research_data.get("results"):
|
|
5268
|
+
papers_text = "🚨 PAPERS ALREADY FOUND - SYNTHESIZE THESE NOW:\n\n"
|
|
5269
|
+
papers_text += "DO NOT say 'we will search' - the search is COMPLETE.\n"
|
|
5270
|
+
papers_text += "DO NOT say 'attempting' - papers are ALREADY HERE.\n"
|
|
5271
|
+
papers_text += "YOUR JOB: Synthesize these papers into a comprehensive literature review (500+ words).\n\n"
|
|
5272
|
+
|
|
5273
|
+
for i, paper in enumerate(research_data["results"][:5], 1):
|
|
5274
|
+
papers_text += f"\n═══ PAPER {i} ═══\n"
|
|
5275
|
+
papers_text += f"Title: {paper.get('title', 'No title')}\n"
|
|
5276
|
+
# Handle authors as either list of dicts or list of strings
|
|
5277
|
+
authors = paper.get('authors', [])
|
|
5278
|
+
if authors:
|
|
5279
|
+
if isinstance(authors[0], dict):
|
|
5280
|
+
author_names = [a.get('name', 'Unknown') for a in authors[:3]]
|
|
5281
|
+
else:
|
|
5282
|
+
author_names = authors[:3]
|
|
5283
|
+
papers_text += f"Authors: {', '.join(author_names)}\n"
|
|
5284
|
+
papers_text += f"Year: {paper.get('year', 'N/A')}\n"
|
|
5285
|
+
if paper.get('abstract'):
|
|
5286
|
+
papers_text += f"\nAbstract:\n{paper['abstract']}\n"
|
|
5287
|
+
if paper.get('tldr'):
|
|
5288
|
+
papers_text += f"\nTL;DR: {paper['tldr']}\n"
|
|
5289
|
+
papers_text += "\n"
|
|
5290
|
+
|
|
5291
|
+
papers_text += "\n🚨 SYNTHESIZE THESE PAPERS NOW - Include:\n"
|
|
5292
|
+
papers_text += "- Overview of the research area\n"
|
|
5293
|
+
papers_text += "- Key findings from each paper's abstract\n"
|
|
5294
|
+
papers_text += "- Methods and approaches used\n"
|
|
5295
|
+
papers_text += "- Comparison and contrast of different approaches\n"
|
|
5296
|
+
papers_text += "- Implications and future directions\n"
|
|
5297
|
+
papers_text += "\nMINIMUM 500 WORDS. Use the abstracts above."
|
|
5298
|
+
|
|
5299
|
+
messages.append({"role": "system", "content": papers_text})
|
|
5300
|
+
|
|
5000
5301
|
fc = api_results.get("files_context")
|
|
5001
5302
|
if fc:
|
|
5002
|
-
|
|
5303
|
+
# Count how many files are being compared
|
|
5304
|
+
file_count = len([fp for fp in api_results.get("files", []) if fp.get("type") == "text"])
|
|
5305
|
+
|
|
5306
|
+
if file_count > 1:
|
|
5307
|
+
# Multi-file comparison - make it VERY explicit
|
|
5308
|
+
comparison_msg = "🚨 MULTIPLE FILES PROVIDED FOR COMPARISON:\n\n"
|
|
5309
|
+
comparison_msg += fc
|
|
5310
|
+
comparison_msg += "\n\n🚨 CRITICAL INSTRUCTIONS FOR COMPARISON:\n"
|
|
5311
|
+
comparison_msg += "1. Read ALL file contents above carefully\n"
|
|
5312
|
+
comparison_msg += "2. Extract specific data points, numbers, percentages from EACH file\n"
|
|
5313
|
+
comparison_msg += "3. Compare and contrast the ACTUAL content (not just filenames)\n"
|
|
5314
|
+
comparison_msg += "4. If asked about differences, cite EXACT lines or values from BOTH files\n"
|
|
5315
|
+
comparison_msg += "5. Do NOT make general statements - be specific with examples from the files\n"
|
|
5316
|
+
comparison_msg += "\nAnswer based STRICTLY on the file contents above. Do not run shell commands."
|
|
5317
|
+
messages.append({"role": "system", "content": comparison_msg})
|
|
5318
|
+
else:
|
|
5319
|
+
# Single file - normal handling
|
|
5320
|
+
messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}"})
|
|
5003
5321
|
|
|
5004
5322
|
# Add conversation history (abbreviated - just recent)
|
|
5005
5323
|
if len(self.conversation_history) > 6:
|