cite-agent 1.3.8__py3-none-any.whl → 1.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,13 +17,15 @@ from importlib import resources
17
17
 
18
18
  import aiohttp
19
19
  from datetime import datetime, timezone
20
- from typing import Dict, Any, List, Optional, Tuple
20
+ from typing import Dict, Any, List, Optional, Tuple, Set
21
21
  from urllib.parse import urlparse
22
22
  from dataclasses import dataclass, field
23
23
  from pathlib import Path
24
+ import platform
24
25
 
25
26
  from .telemetry import TelemetryManager
26
27
  from .setup_config import DEFAULT_QUERY_LIMIT
28
+ from .conversation_archive import ConversationArchive
27
29
 
28
30
  # Suppress noise
29
31
  logging.basicConfig(level=logging.ERROR)
@@ -89,6 +91,7 @@ class EnhancedNocturnalAgent:
89
91
  from .workflow import WorkflowManager
90
92
  self.workflow = WorkflowManager()
91
93
  self.last_paper_result = None # Track last paper mentioned for "save that"
94
+ self.archive = ConversationArchive()
92
95
 
93
96
  # File context tracking (for pronoun resolution and multi-turn)
94
97
  self.file_context = {
@@ -98,6 +101,7 @@ class EnhancedNocturnalAgent:
98
101
  'recent_dirs': [], # Last 5 directories
99
102
  'current_cwd': None, # Track shell's current directory
100
103
  }
104
+ self._is_windows = os.name == "nt"
101
105
  try:
102
106
  self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
103
107
  except (TypeError, ValueError):
@@ -141,6 +145,24 @@ class EnhancedNocturnalAgent:
141
145
  self._health_ttl = 30.0
142
146
  self._recent_sources: List[Dict[str, Any]] = []
143
147
 
148
+ def _remove_expired_temp_key(self, session_file):
149
+ """Remove expired temporary API key from session file"""
150
+ try:
151
+ import json
152
+ with open(session_file, 'r') as f:
153
+ session_data = json.load(f)
154
+
155
+ # Remove temp key fields
156
+ session_data.pop('temp_api_key', None)
157
+ session_data.pop('temp_key_expires', None)
158
+ session_data.pop('temp_key_provider', None)
159
+
160
+ # Write back
161
+ with open(session_file, 'w') as f:
162
+ json.dump(session_data, f, indent=2)
163
+ except Exception as e:
164
+ logger.warning(f"Failed to remove expired temp key: {e}")
165
+
144
166
  def _load_authentication(self):
145
167
  """Load authentication from session file"""
146
168
  use_local_keys = os.getenv("USE_LOCAL_KEYS", "false").lower() == "true"
@@ -162,6 +184,38 @@ class EnhancedNocturnalAgent:
162
184
  session_data = json.load(f)
163
185
  self.auth_token = session_data.get('auth_token')
164
186
  self.user_id = session_data.get('account_id')
187
+
188
+ # NEW: Check for temporary local API key with expiration
189
+ temp_key = session_data.get('temp_api_key')
190
+ temp_key_expires = session_data.get('temp_key_expires')
191
+
192
+ if temp_key and temp_key_expires:
193
+ # Check if key is still valid
194
+ from datetime import datetime, timezone
195
+ try:
196
+ expires_at = datetime.fromisoformat(temp_key_expires.replace('Z', '+00:00'))
197
+ now = datetime.now(timezone.utc)
198
+
199
+ if now < expires_at:
200
+ # Key is still valid - use local mode for speed!
201
+ self.temp_api_key = temp_key
202
+ self.temp_key_provider = session_data.get('temp_key_provider', 'cerebras')
203
+ if debug_mode:
204
+ time_left = (expires_at - now).total_seconds() / 3600
205
+ print(f"✅ Using temporary local key (expires in {time_left:.1f}h)")
206
+ else:
207
+ # Key expired - remove it and fall back to backend
208
+ if debug_mode:
209
+ print(f"⏰ Temporary key expired, using backend mode")
210
+ self._remove_expired_temp_key(session_file)
211
+ self.temp_api_key = None
212
+ except Exception as e:
213
+ if debug_mode:
214
+ print(f"⚠️ Error parsing temp key expiration: {e}")
215
+ self.temp_api_key = None
216
+ else:
217
+ self.temp_api_key = None
218
+
165
219
  if debug_mode:
166
220
  print(f"🔍 _load_authentication: loaded auth_token={self.auth_token}, user_id={self.user_id}")
167
221
  except Exception as e:
@@ -169,6 +223,7 @@ class EnhancedNocturnalAgent:
169
223
  print(f"🔍 _load_authentication: ERROR loading session: {e}")
170
224
  self.auth_token = None
171
225
  self.user_id = None
226
+ self.temp_api_key = None
172
227
  else:
173
228
  # FALLBACK: Check if config.env has credentials but session.json is missing
174
229
  # This handles cases where old setup didn't create session.json
@@ -917,6 +972,56 @@ class EnhancedNocturnalAgent:
917
972
  if not api_results:
918
973
  logger.info("🔍 DEBUG: _format_api_results_for_prompt called with EMPTY api_results")
919
974
  return "No API results yet."
975
+
976
+ # Special formatting for shell results to make them VERY clear
977
+ if "shell_info" in api_results:
978
+ shell_info = api_results["shell_info"]
979
+ formatted_parts = ["=" * 60]
980
+ formatted_parts.append("🔧 SHELL COMMAND EXECUTION RESULTS (ALREADY EXECUTED)")
981
+ formatted_parts.append("=" * 60)
982
+
983
+ if "command" in shell_info:
984
+ formatted_parts.append(f"\n📝 Command that was executed:")
985
+ formatted_parts.append(f" $ {shell_info['command']}")
986
+
987
+ if "output" in shell_info:
988
+ formatted_parts.append(f"\n📤 Command output (THIS IS THE RESULT):")
989
+ formatted_parts.append(f"{shell_info['output']}")
990
+
991
+ if "error" in shell_info:
992
+ formatted_parts.append(f"\n❌ Error occurred:")
993
+ formatted_parts.append(f"{shell_info['error']}")
994
+
995
+ if "directory_contents" in shell_info:
996
+ formatted_parts.append(f"\n📂 Directory listing (THIS IS THE RESULT):")
997
+ formatted_parts.append(f"{shell_info['directory_contents']}")
998
+
999
+ if "search_results" in shell_info:
1000
+ formatted_parts.append(f"\n🔍 Search results (THIS IS THE RESULT):")
1001
+ formatted_parts.append(f"{shell_info['search_results']}")
1002
+
1003
+ formatted_parts.append("\n" + "=" * 60)
1004
+ formatted_parts.append("🚨 CRITICAL INSTRUCTION 🚨")
1005
+ formatted_parts.append("The command was ALREADY executed. The output above is the COMPLETE and ONLY result.")
1006
+ formatted_parts.append("YOU MUST present ONLY what is shown in the output above.")
1007
+ formatted_parts.append("DO NOT add file names, paths, or code that are NOT in the output above.")
1008
+ formatted_parts.append("DO NOT make up examples or additional results.")
1009
+ formatted_parts.append("If the output says 'No matches' or is empty, tell the user 'No results found'.")
1010
+ formatted_parts.append("DO NOT ask the user to run any commands - the results are already here.")
1011
+ formatted_parts.append("=" * 60)
1012
+
1013
+ # Add other api_results
1014
+ other_results = {k: v for k, v in api_results.items() if k != "shell_info"}
1015
+ if other_results:
1016
+ try:
1017
+ serialized = json.dumps(other_results, indent=2)
1018
+ except Exception:
1019
+ serialized = str(other_results)
1020
+ formatted_parts.append(f"\nOther data:\n{serialized}")
1021
+
1022
+ return "\n".join(formatted_parts)
1023
+
1024
+ # Normal formatting for non-shell results
920
1025
  try:
921
1026
  serialized = json.dumps(api_results, indent=2)
922
1027
  except Exception:
@@ -970,15 +1075,24 @@ class EnhancedNocturnalAgent:
970
1075
  "PRIMARY DIRECTIVE: Execute code when needed. You have a persistent shell session. "
971
1076
  "When user asks for data analysis, calculations, or file operations: WRITE and EXECUTE the code. "
972
1077
  "Languages available: Python, R, SQL, Bash. "
973
- "You can read files, run scripts, perform calculations, and show results."
1078
+ "🚨 CRITICAL: Commands are AUTOMATICALLY executed. If you see 'shell_info' below, "
1079
+ "that means the command was ALREADY RUN. NEVER ask users to run commands - just present results."
974
1080
  )
975
1081
  else:
976
1082
  intro = (
977
1083
  "You are Cite Agent, a truth-seeking research and finance AI with CODE EXECUTION. "
978
- "PRIMARY DIRECTIVE: Accuracy > Agreeableness. Execute code for analysis, calculations, and file operations. "
1084
+ "PRIMARY DIRECTIVE: Accuracy > Agreeableness. NEVER HALLUCINATE. "
979
1085
  "You are a fact-checker and analyst with a persistent shell session. "
980
1086
  "You have access to research (Archive), financial data (FinSight SEC filings), and can run Python/R/SQL/Bash. "
981
- "When user asks about files, directories, or data: EXECUTE commands to find answers."
1087
+ "\n\n"
1088
+ "🚨 ANTI-HALLUCINATION RULES:\n"
1089
+ "1. When user asks about files, directories, or data - commands are AUTOMATICALLY executed.\n"
1090
+ "2. If you see 'shell_info' in results below, that means command was ALREADY RUN.\n"
1091
+ "3. ONLY present information from shell_info output. DO NOT invent file names, paths, or code.\n"
1092
+ "4. If shell output is empty or unclear, say 'No results found' or 'Search returned no matches'.\n"
1093
+ "5. NEVER make up plausible-sounding file paths or code that wasn't in the actual output.\n"
1094
+ "6. If you're unsure, say 'I couldn't find that' rather than guessing.\n"
1095
+ "7. NEVER ask the user to run commands - just present the results that were already executed."
982
1096
  )
983
1097
 
984
1098
  sections.append(intro)
@@ -1279,7 +1393,7 @@ class EnhancedNocturnalAgent:
1279
1393
  "temperature": 0.2
1280
1394
  }
1281
1395
  return {
1282
- "model": "llama-3.3-70b", # Cerebras 70B model
1396
+ "model": "gpt-oss-120b", # PRODUCTION: Cerebras gpt-oss-120b - 100% test pass, 60K TPM
1283
1397
  "max_tokens": 900,
1284
1398
  "temperature": 0.3
1285
1399
  }
@@ -1292,7 +1406,7 @@ class EnhancedNocturnalAgent:
1292
1406
  "temperature": 0.2
1293
1407
  }
1294
1408
  return {
1295
- "model": "llama-3.3-70b-versatile",
1409
+ "model": "openai/gpt-oss-120b", # PRODUCTION: 120B model - 100% test pass rate
1296
1410
  "max_tokens": 900,
1297
1411
  "temperature": 0.3
1298
1412
  }
@@ -1514,6 +1628,49 @@ class EnhancedNocturnalAgent:
1514
1628
  seen.add(t)
1515
1629
  ordered.append(t)
1516
1630
  return ordered[:4]
1631
+
1632
+ def _plan_financial_request(self, question: str, session_key: Optional[str] = None) -> Tuple[List[str], List[str]]:
1633
+ """Derive ticker and metric targets for a financial query."""
1634
+ tickers = list(self._extract_tickers_from_text(question))
1635
+ question_lower = question.lower()
1636
+
1637
+ if not tickers:
1638
+ if "apple" in question_lower:
1639
+ tickers.append("AAPL")
1640
+ if "microsoft" in question_lower:
1641
+ tickers.append("MSFT" if "AAPL" not in tickers else "MSFT")
1642
+
1643
+ metrics_to_fetch: List[str] = []
1644
+ keyword_map = [
1645
+ ("revenue", ["revenue", "sales", "top line"]),
1646
+ ("grossProfit", ["gross profit", "gross margin", "margin"]),
1647
+ ("operatingIncome", ["operating income", "operating profit", "ebit"]),
1648
+ ("netIncome", ["net income", "profit", "earnings", "bottom line"]),
1649
+ ]
1650
+
1651
+ for metric, keywords in keyword_map:
1652
+ if any(kw in question_lower for kw in keywords):
1653
+ metrics_to_fetch.append(metric)
1654
+
1655
+ if session_key:
1656
+ last_topic = self._session_topics.get(session_key)
1657
+ else:
1658
+ last_topic = None
1659
+
1660
+ if not metrics_to_fetch and last_topic and last_topic.get("metrics"):
1661
+ metrics_to_fetch = list(last_topic["metrics"])
1662
+
1663
+ if not metrics_to_fetch:
1664
+ metrics_to_fetch = ["revenue", "grossProfit"]
1665
+
1666
+ deduped: List[str] = []
1667
+ seen: Set[str] = set()
1668
+ for symbol in tickers:
1669
+ if symbol and symbol not in seen:
1670
+ seen.add(symbol)
1671
+ deduped.append(symbol)
1672
+
1673
+ return deduped[:2], metrics_to_fetch
1517
1674
 
1518
1675
  async def initialize(self, force_reload: bool = False):
1519
1676
  """Initialize the agent with API keys and shell session."""
@@ -1548,8 +1705,10 @@ class EnhancedNocturnalAgent:
1548
1705
  use_local_keys_env = os.getenv("USE_LOCAL_KEYS", "").lower()
1549
1706
 
1550
1707
  if has_session:
1551
- # Session exists → ALWAYS use backend mode (ignore USE_LOCAL_KEYS)
1552
- use_local_keys = False
1708
+ # Session exists → Check if we have temp local key for speed
1709
+ # If temp key exists and valid → use local mode (fast!)
1710
+ # Otherwise → use backend mode (secure but slow)
1711
+ use_local_keys = hasattr(self, 'temp_api_key') and self.temp_api_key is not None
1553
1712
  elif use_local_keys_env == "true":
1554
1713
  # No session but dev mode requested → use local keys
1555
1714
  use_local_keys = True
@@ -1597,16 +1756,24 @@ class EnhancedNocturnalAgent:
1597
1756
  else:
1598
1757
  print("⚠️ Not authenticated. Please log in to use the agent.")
1599
1758
  else:
1600
- # Local keys mode - load Cerebras API keys (primary) with Groq fallback
1601
- self.auth_token = None
1602
- self.user_id = None
1759
+ # Local keys mode - use temporary key if available, otherwise load from env
1760
+
1761
+ # Check if we have a temporary key (for speed + security)
1762
+ if hasattr(self, 'temp_api_key') and self.temp_api_key:
1763
+ # Use temporary key provided by backend
1764
+ self.api_keys = [self.temp_api_key]
1765
+ self.llm_provider = getattr(self, 'temp_key_provider', 'cerebras')
1766
+ else:
1767
+ # Fallback: Load permanent keys from environment (dev mode only)
1768
+ self.auth_token = None
1769
+ self.user_id = None
1603
1770
 
1604
- # Load Cerebras keys from environment (PRIMARY)
1605
- self.api_keys = []
1606
- for i in range(1, 10): # Check CEREBRAS_API_KEY_1 through CEREBRAS_API_KEY_9
1607
- key = os.getenv(f"CEREBRAS_API_KEY_{i}") or os.getenv(f"CEREBRAS_API_KEY")
1608
- if key and key not in self.api_keys:
1609
- self.api_keys.append(key)
1771
+ # Load Cerebras keys from environment (PRIMARY)
1772
+ self.api_keys = []
1773
+ for i in range(1, 10): # Check CEREBRAS_API_KEY_1 through CEREBRAS_API_KEY_9
1774
+ key = os.getenv(f"CEREBRAS_API_KEY_{i}") or os.getenv(f"CEREBRAS_API_KEY")
1775
+ if key and key not in self.api_keys:
1776
+ self.api_keys.append(key)
1610
1777
 
1611
1778
  # Fallback to Groq keys if no Cerebras keys found
1612
1779
  if not self.api_keys:
@@ -1650,8 +1817,12 @@ class EnhancedNocturnalAgent:
1650
1817
 
1651
1818
  if self.shell_session is None:
1652
1819
  try:
1820
+ if self._is_windows:
1821
+ command = ['powershell', '-NoLogo', '-NoProfile']
1822
+ else:
1823
+ command = ['bash']
1653
1824
  self.shell_session = subprocess.Popen(
1654
- ['bash'],
1825
+ command,
1655
1826
  stdin=subprocess.PIPE,
1656
1827
  stdout=subprocess.PIPE,
1657
1828
  stderr=subprocess.STDOUT,
@@ -1726,7 +1897,7 @@ class EnhancedNocturnalAgent:
1726
1897
  "query": query, # Keep query clean
1727
1898
  "conversation_history": conversation_history or [],
1728
1899
  "api_context": api_results, # Send API results separately
1729
- "model": "llama-3.3-70b", # Compatible with Cerebras (priority) and Groq
1900
+ "model": "openai/gpt-oss-120b", # PRODUCTION: 120B - best test results
1730
1901
  "temperature": 0.2, # Low temp for accuracy
1731
1902
  "max_tokens": 4000
1732
1903
  }
@@ -1796,7 +1967,7 @@ class EnhancedNocturnalAgent:
1796
1967
  response=response_text,
1797
1968
  tokens_used=tokens,
1798
1969
  tools_used=all_tools,
1799
- model=data.get('model', 'llama-3.3-70b'),
1970
+ model=data.get('model', 'openai/gpt-oss-120b'),
1800
1971
  timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat()),
1801
1972
  api_results=api_results
1802
1973
  )
@@ -1835,7 +2006,7 @@ class EnhancedNocturnalAgent:
1835
2006
  response=response_text,
1836
2007
  tokens_used=tokens,
1837
2008
  tools_used=all_tools,
1838
- model=data.get('model', 'llama-3.3-70b-versatile'),
2009
+ model=data.get('model', 'openai/gpt-oss-120b'),
1839
2010
  timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat()),
1840
2011
  api_results=api_results
1841
2012
  )
@@ -2186,7 +2357,30 @@ class EnhancedNocturnalAgent:
2186
2357
  results.update(payload)
2187
2358
 
2188
2359
  return results
2189
-
2360
+
2361
+ def _looks_like_user_prompt(self, command: str) -> bool:
2362
+ command_lower = command.strip().lower()
2363
+ if not command_lower:
2364
+ return True
2365
+ phrases = [
2366
+ "ask the user",
2367
+ "can you run",
2368
+ "please run",
2369
+ "tell the user",
2370
+ "ask them",
2371
+ ]
2372
+ return any(phrase in command_lower for phrase in phrases)
2373
+
2374
+ def _infer_shell_command(self, question: str) -> str:
2375
+ question_lower = question.lower()
2376
+ if any(word in question_lower for word in ["list", "show", "files", "directory", "folder", "ls"]):
2377
+ return "ls -lah"
2378
+ if any(word in question_lower for word in ["where", "pwd", "current directory", "location"]):
2379
+ return "pwd"
2380
+ if "read" in question_lower and any(ext in question_lower for ext in [".py", ".txt", ".csv", "file"]):
2381
+ return "ls -lah"
2382
+ return "pwd"
2383
+
2190
2384
  def execute_command(self, command: str) -> str:
2191
2385
  """Execute command and return output - improved with echo markers"""
2192
2386
  try:
@@ -2216,10 +2410,14 @@ class EnhancedNocturnalAgent:
2216
2410
  marker = f"CMD_DONE_{uuid.uuid4().hex[:8]}"
2217
2411
 
2218
2412
  # Send command with marker
2219
- full_command = f"{command}; echo '{marker}'\n"
2413
+ terminator = "\r\n" if self._is_windows else "\n"
2414
+ if self._is_windows:
2415
+ full_command = f"{command}; echo '{marker}'{terminator}"
2416
+ else:
2417
+ full_command = f"{command}; echo '{marker}'{terminator}"
2220
2418
  self.shell_session.stdin.write(full_command)
2221
2419
  self.shell_session.stdin.flush()
2222
-
2420
+
2223
2421
  # Read until we see the marker
2224
2422
  output_lines = []
2225
2423
  start_time = time.time()
@@ -2510,7 +2708,7 @@ class EnhancedNocturnalAgent:
2510
2708
  - count: {"counts": {file: match_count}}
2511
2709
  """
2512
2710
  try:
2513
- import re
2711
+ # import re removed - using module-level import
2514
2712
 
2515
2713
  # Expand ~ to home directory
2516
2714
  path = os.path.expanduser(path)
@@ -2781,7 +2979,39 @@ class EnhancedNocturnalAgent:
2781
2979
 
2782
2980
  # Default: Treat unknown commands as requiring user awareness
2783
2981
  return 'WRITE'
2784
-
2982
+
2983
+ def _format_archive_summary(
2984
+ self,
2985
+ question: str,
2986
+ response: str,
2987
+ api_results: Dict[str, Any],
2988
+ ) -> Dict[str, Any]:
2989
+ """Prepare compact summary payload for the conversation archive."""
2990
+ clean_question = question.strip().replace("\n", " ")
2991
+ summary_text = response.strip().replace("\n", " ")
2992
+ if len(summary_text) > 320:
2993
+ summary_text = summary_text[:317].rstrip() + "..."
2994
+
2995
+ citations: List[str] = []
2996
+ research = api_results.get("research")
2997
+ if isinstance(research, dict):
2998
+ for item in research.get("results", [])[:3]:
2999
+ title = item.get("title") or item.get("paperTitle")
3000
+ if title:
3001
+ citations.append(title)
3002
+
3003
+ financial = api_results.get("financial")
3004
+ if isinstance(financial, dict):
3005
+ tickers = ", ".join(sorted(financial.keys()))
3006
+ if tickers:
3007
+ citations.append(f"Financial data: {tickers}")
3008
+
3009
+ return {
3010
+ "question": clean_question,
3011
+ "summary": summary_text,
3012
+ "citations": citations,
3013
+ }
3014
+
2785
3015
  def _is_safe_shell_command(self, cmd: str) -> bool:
2786
3016
  """
2787
3017
  Compatibility wrapper for old safety check.
@@ -2840,6 +3070,71 @@ class EnhancedNocturnalAgent:
2840
3070
  self.daily_token_usage += tokens
2841
3071
  if user_id:
2842
3072
  self.user_token_usage[user_id] = self.user_token_usage.get(user_id, 0) + tokens
3073
+
3074
+ def _finalize_interaction(
3075
+ self,
3076
+ request: ChatRequest,
3077
+ response: ChatResponse,
3078
+ tools_used: Optional[List[str]],
3079
+ api_results: Optional[Dict[str, Any]],
3080
+ request_analysis: Optional[Dict[str, Any]],
3081
+ *,
3082
+ log_workflow: bool = True,
3083
+ ) -> ChatResponse:
3084
+ """Common tail logic: history, memory, workflow logging, archive save."""
3085
+ merged_tools: List[str] = []
3086
+ seen: Set[str] = set()
3087
+ for tool in (tools_used or []) + (response.tools_used or []):
3088
+ if tool and tool not in seen:
3089
+ merged_tools.append(tool)
3090
+ seen.add(tool)
3091
+ response.tools_used = merged_tools
3092
+
3093
+ if request_analysis and not response.confidence_score:
3094
+ response.confidence_score = request_analysis.get("confidence", response.confidence_score) or 0.0
3095
+
3096
+ self.conversation_history.append({"role": "user", "content": request.question})
3097
+ self.conversation_history.append({"role": "assistant", "content": response.response})
3098
+
3099
+ self._update_memory(
3100
+ request.user_id,
3101
+ request.conversation_id,
3102
+ f"Q: {request.question[:100]}... A: {response.response[:100]}...",
3103
+ )
3104
+
3105
+ if log_workflow:
3106
+ try:
3107
+ self.workflow.save_query_result(
3108
+ query=request.question,
3109
+ response=response.response,
3110
+ metadata={
3111
+ "tools_used": response.tools_used,
3112
+ "tokens_used": response.tokens_used,
3113
+ "confidence_score": response.confidence_score,
3114
+ },
3115
+ )
3116
+ except Exception:
3117
+ logger.debug("Workflow logging failed", exc_info=True)
3118
+
3119
+ if getattr(self, "archive", None):
3120
+ try:
3121
+ archive_payload = self._format_archive_summary(
3122
+ request.question,
3123
+ response.response,
3124
+ api_results or {},
3125
+ )
3126
+ self.archive.record_entry(
3127
+ request.user_id,
3128
+ request.conversation_id,
3129
+ archive_payload["question"],
3130
+ archive_payload["summary"],
3131
+ response.tools_used,
3132
+ archive_payload["citations"],
3133
+ )
3134
+ except Exception as archive_error:
3135
+ logger.debug("Archive write failed", error=str(archive_error))
3136
+
3137
+ return response
2843
3138
 
2844
3139
  def _get_memory_context(self, user_id: str, conversation_id: str) -> str:
2845
3140
  """Get relevant memory context for the conversation"""
@@ -3093,8 +3388,15 @@ class EnhancedNocturnalAgent:
3093
3388
 
3094
3389
  # System/technical indicators
3095
3390
  system_keywords = [
3096
- 'file', 'directory', 'command', 'run', 'execute', 'install',
3097
- 'python', 'code', 'script', 'program', 'system', 'terminal'
3391
+ 'file', 'files', 'directory', 'directories', 'folder', 'folders',
3392
+ 'command', 'run', 'execute', 'install',
3393
+ 'python', 'code', 'script', 'scripts', 'program', 'system', 'terminal',
3394
+ 'find', 'search for', 'locate', 'list', 'show me', 'where is',
3395
+ 'what files', 'which files', 'how many files',
3396
+ 'grep', 'search', 'look for', 'count',
3397
+ '.py', '.txt', '.js', '.java', '.cpp', '.c', '.h',
3398
+ 'function', 'class', 'definition', 'route', 'endpoint',
3399
+ 'codebase', 'project structure', 'source code'
3098
3400
  ]
3099
3401
 
3100
3402
  question_lower = question.lower()
@@ -3212,7 +3514,7 @@ class EnhancedNocturnalAgent:
3212
3514
  question_lower = question.lower()
3213
3515
 
3214
3516
  # Pattern 1: Multiple years without SPECIFIC topic (e.g., "2008, 2015, 2019")
3215
- import re
3517
+ # import re removed - using module-level import
3216
3518
  years_pattern = r'\b(19\d{2}|20\d{2})\b'
3217
3519
  years = re.findall(years_pattern, question)
3218
3520
  if len(years) >= 2:
@@ -3314,9 +3616,10 @@ IMPORTANT RULES:
3314
3616
  7. For finding things, use: find ~ -maxdepth 4 -name '*pattern*' 2>/dev/null
3315
3617
  8. For creating files: touch filename OR echo "content" > filename
3316
3618
  9. For creating directories: mkdir dirname
3317
- 10. ALWAYS include 2>/dev/null to suppress errors from find
3619
+ 10. ALWAYS include 2>/dev/null to suppress errors from find and grep
3318
3620
  11. 🚨 MULTI-STEP QUERIES: For queries like "read X and do Y", ONLY generate the FIRST step (reading X). The LLM will handle subsequent steps after seeing the file contents.
3319
3621
  12. 🚨 NEVER use python -m py_compile or other code execution for finding bugs - just read the file with cat/head
3622
+ 13. 🚨 FOR GREP: When searching in a DIRECTORY (not a specific file), ALWAYS use -r flag for recursive search: grep -rn 'pattern' /path/to/dir 2>/dev/null
3320
3623
 
3321
3624
  Examples:
3322
3625
  "where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
@@ -3329,7 +3632,9 @@ Examples:
3329
3632
  "write hello.txt with content Hello World" → {{"action": "execute", "command": "echo 'Hello World' > hello.txt", "reason": "Create file with content", "updates_context": true}}
3330
3633
  "create results.txt with line 1 and line 2" → {{"action": "execute", "command": "echo 'line 1' > results.txt && echo 'line 2' >> results.txt", "reason": "Create file with multiple lines", "updates_context": true}}
3331
3634
  "fix bug in script.py change OLD to NEW" → {{"action": "execute", "command": "sed -i 's/OLD/NEW/g' script.py && echo 'Fixed script.py'", "reason": "Edit file to fix bug", "updates_context": true}}
3332
- "search for TODO in py files" → {{"action": "execute", "command": "grep -n 'TODO' *.py 2>/dev/null", "reason": "Find TODO comments", "updates_context": false}}
3635
+ "search for TODO in py files here" → {{"action": "execute", "command": "grep -n 'TODO' *.py 2>/dev/null", "reason": "Find TODO in current directory py files", "updates_context": false}}
3636
+ "search for TODO in /some/directory" → {{"action": "execute", "command": "grep -rn 'TODO' /some/directory 2>/dev/null", "reason": "Recursively search directory for TODO", "updates_context": false}}
3637
+ "search for TODO comments in /tmp/test" → {{"action": "execute", "command": "grep -rn 'TODO' /tmp/test 2>/dev/null", "reason": "Recursively search directory for TODO", "updates_context": false}}
3333
3638
  "find all bugs in code" → {{"action": "execute", "command": "grep -rn 'BUG:' . 2>/dev/null", "reason": "Search for bug markers in code", "updates_context": false}}
3334
3639
  "read analyze.py and find bugs" → {{"action": "execute", "command": "head -200 analyze.py", "reason": "Read file to analyze bugs", "updates_context": false}}
3335
3640
  "show me calc.py completely" → {{"action": "execute", "command": "cat calc.py", "reason": "Display entire file", "updates_context": false}}
@@ -3344,12 +3649,28 @@ Examples:
3344
3649
  JSON:"""
3345
3650
 
3346
3651
  try:
3347
- plan_response = await self.call_backend_query(
3348
- query=planner_prompt,
3349
- conversation_history=[],
3350
- api_results={},
3351
- tools_used=[]
3352
- )
3652
+ # Use LOCAL LLM for planning (don't recurse into call_backend_query)
3653
+ # This avoids infinite recursion and uses temp key if available
3654
+ if hasattr(self, 'client') and self.client:
3655
+ # Local mode with temp key or dev keys
3656
+ # Use gpt-oss-120b for Cerebras (100% test pass, better accuracy)
3657
+ model_name = "gpt-oss-120b" if self.llm_provider == "cerebras" else "llama-3.1-70b-versatile"
3658
+ response = self.client.chat.completions.create(
3659
+ model=model_name,
3660
+ messages=[{"role": "user", "content": planner_prompt}],
3661
+ max_tokens=500,
3662
+ temperature=0.3
3663
+ )
3664
+ plan_text = response.choices[0].message.content.strip()
3665
+ plan_response = ChatResponse(response=plan_text)
3666
+ else:
3667
+ # Backend mode - make a simplified backend call
3668
+ plan_response = await self.call_backend_query(
3669
+ query=planner_prompt,
3670
+ conversation_history=[],
3671
+ api_results={},
3672
+ tools_used=[]
3673
+ )
3353
3674
 
3354
3675
  plan_text = plan_response.response.strip()
3355
3676
  if '```' in plan_text:
@@ -3363,9 +3684,27 @@ JSON:"""
3363
3684
 
3364
3685
  if debug_mode:
3365
3686
  print(f"🔍 SHELL PLAN: {plan}")
3366
-
3687
+
3367
3688
  # GENERIC COMMAND EXECUTION - No more hardcoded actions!
3689
+ if shell_action != "execute" and might_need_shell:
3690
+ command = self._infer_shell_command(request.question)
3691
+ shell_action = "execute"
3692
+ updates_context = False
3693
+ if debug_mode:
3694
+ print(f"🔄 Planner opted out; inferred fallback command: {command}")
3695
+
3696
+ if shell_action == "execute" and not command:
3697
+ command = self._infer_shell_command(request.question)
3698
+ plan["command"] = command
3699
+ if debug_mode:
3700
+ print(f"🔄 Planner omitted command, inferred {command}")
3701
+
3368
3702
  if shell_action == "execute" and command:
3703
+ if self._looks_like_user_prompt(command):
3704
+ command = self._infer_shell_command(request.question)
3705
+ plan["command"] = command
3706
+ if debug_mode:
3707
+ print(f"🔄 Replacing delegating plan with command: {command}")
3369
3708
  # Check command safety
3370
3709
  safety_level = self._classify_command_safety(command)
3371
3710
 
@@ -3431,7 +3770,7 @@ JSON:"""
3431
3770
  # Check for file search commands (find)
3432
3771
  if not intercepted and 'find' in command and '-name' in command:
3433
3772
  try:
3434
- import re
3773
+ # import re removed - using module-level import
3435
3774
  # Extract pattern: find ... -name '*pattern*'
3436
3775
  name_match = re.search(r"-name\s+['\"]?\*?([^'\"*\s]+)\*?['\"]?", command)
3437
3776
  if name_match:
@@ -3450,14 +3789,15 @@ JSON:"""
3450
3789
 
3451
3790
  # Check for file writing commands (echo > file, grep > file, etc.) - CHECK THIS FIRST!
3452
3791
  # This must come BEFORE the plain grep interceptor
3453
- if not intercepted and ('>' in command or '>>' in command):
3792
+ # BUT: Ignore 2>/dev/null which is error redirection, not file writing
3793
+ if not intercepted and ('>' in command or '>>' in command) and '2>' not in command:
3454
3794
  try:
3455
- import re
3795
+ # import re removed - using module-level import
3456
3796
 
3457
3797
  # Handle grep ... > file (intercept and execute grep, then write output)
3458
3798
  if 'grep' in command and '>' in command:
3459
3799
  # Extract: grep -rn 'pattern' path > output.txt
3460
- grep_match = re.search(r"grep\s+(.*)>\s*(\S+)", command)
3800
+ grep_match = re.search(r"grep\s+(.*)\s>\s*(\S+)", command)
3461
3801
  if grep_match:
3462
3802
  grep_part = grep_match.group(1).strip()
3463
3803
  output_file = grep_match.group(2)
@@ -3523,7 +3863,7 @@ JSON:"""
3523
3863
  # Check for sed editing commands
3524
3864
  if not intercepted and command.startswith('sed '):
3525
3865
  try:
3526
- import re
3866
+ # import re removed - using module-level import
3527
3867
  # sed 's/old/new/g' file OR sed -i 's/old/new/' file
3528
3868
  match = re.search(r"sed.*?['\"]s/([^/]+)/([^/]+)/", command)
3529
3869
  if match:
@@ -3549,7 +3889,7 @@ JSON:"""
3549
3889
  # Check for heredoc file creation (cat << EOF > file)
3550
3890
  if not intercepted and '<<' in command and ('EOF' in command or 'HEREDOC' in command):
3551
3891
  try:
3552
- import re
3892
+ # import re removed - using module-level import
3553
3893
  # Extract: cat << EOF > filename OR cat > filename << EOF
3554
3894
  # Note: We can't actually get the heredoc content from a single command line
3555
3895
  # This would need to be handled differently (multi-line input)
@@ -3561,24 +3901,45 @@ JSON:"""
3561
3901
 
3562
3902
  # Check for content search commands (grep -r) WITHOUT redirection
3563
3903
  # This comes AFTER grep > file interceptor to avoid conflicts
3564
- if not intercepted and command.startswith('grep ') and ('-r' in command or '-R' in command):
3904
+ if not intercepted and 'grep' in command and ('-r' in command or '-R' in command):
3565
3905
  try:
3566
- import re
3906
+ # import re removed - using module-level import
3567
3907
  # Extract pattern: grep -r 'pattern' path
3568
3908
  pattern_match = re.search(r"grep.*?['\"]([^'\"]+)['\"]", command)
3569
3909
  if pattern_match:
3570
3910
  pattern = pattern_match.group(1)
3571
- # Extract path (last argument usually)
3572
- parts = command.split()
3573
- search_path = parts[-1] if len(parts) > 2 else "."
3911
+ # Extract path - skip flags and options
3912
+ parts = [p for p in command.split() if not p.startswith('-') and p != 'grep' and p != '2>/dev/null']
3913
+ # Path is after pattern (skip the quoted pattern)
3914
+ search_path = parts[-1] if len(parts) >= 2 else "."
3915
+
3916
+ # Detect file pattern from command (e.g., *.py, *.txt) or use *
3917
+ file_pattern = "*"
3918
+ if '*.py' in command:
3919
+ file_pattern = "*.py"
3920
+ elif '*.txt' in command:
3921
+ file_pattern = "*.txt"
3922
+
3923
+ result = self.grep_search(pattern, search_path, file_pattern, output_mode="content")
3924
+
3925
+ # Format grep results
3926
+ if 'matches' in result and result['matches']:
3927
+ output_parts = []
3928
+ for file_path, matches in result['matches'].items():
3929
+ output_parts.append(f"{file_path}:")
3930
+ for line_num, line_content in matches[:10]: # Limit per file
3931
+ output_parts.append(f" {line_num}: {line_content}")
3932
+ output = '\n'.join(output_parts)
3933
+ else:
3934
+ output = f"No matches found for '{pattern}'"
3574
3935
 
3575
- result = self.grep_search(pattern, search_path, "*.py", output_mode="files_with_matches")
3576
- output = f"Files matching '{pattern}':\n" + '\n'.join(result['files'][:20])
3577
3936
  intercepted = True
3578
3937
  tools_used.append("grep_search")
3579
3938
  if debug_mode:
3580
- print(f"🔄 Intercepted: {command} → grep_search({pattern}, {search_path})")
3581
- except:
3939
+ print(f"🔄 Intercepted: {command} → grep_search({pattern}, {search_path}, {file_pattern})")
3940
+ except Exception as e:
3941
+ if debug_mode:
3942
+ print(f"⚠️ Grep interceptor failed: {e}")
3582
3943
  pass
3583
3944
 
3584
3945
  # If not intercepted, execute as shell command
@@ -3597,7 +3958,7 @@ JSON:"""
3597
3958
 
3598
3959
  # Update file context if needed
3599
3960
  if updates_context:
3600
- import re
3961
+ # import re removed - using module-level import
3601
3962
  # Extract file paths from command
3602
3963
  file_patterns = r'([a-zA-Z0-9_\-./]+\.(py|r|csv|txt|json|md|ipynb|rmd))'
3603
3964
  files_mentioned = re.findall(file_patterns, command, re.IGNORECASE)
@@ -3695,7 +4056,7 @@ JSON:"""
3695
4056
 
3696
4057
  elif shell_action == "read_file":
3697
4058
  # NEW: Read and inspect file (R, Python, CSV, etc.)
3698
- import re # Import at function level
4059
+ # import re removed - using module-level import
3699
4060
 
3700
4061
  file_path = plan.get("file_path", "")
3701
4062
  if not file_path and might_need_shell:
@@ -3799,58 +4160,22 @@ JSON:"""
3799
4160
 
3800
4161
  # FinSight API for financial data - Use LLM for ticker/metric extraction
3801
4162
  if "finsight" in request_analysis.get("apis", []):
3802
- # LLM extracts ticker + metric (more accurate than regex)
3803
- finance_prompt = f"""Extract financial query details from user's question.
3804
-
3805
- User query: "{request.question}"
3806
-
3807
- Respond with JSON:
3808
- {{
3809
- "tickers": ["AAPL", "TSLA"] (stock symbols - infer from company names if needed),
3810
- "metric": "revenue|marketCap|price|netIncome|eps|freeCashFlow|grossProfit"
3811
- }}
3812
-
3813
- Examples:
3814
- - "Tesla revenue" → {{"tickers": ["TSLA"], "metric": "revenue"}}
3815
- - "What's Apple worth?" → {{"tickers": ["AAPL"], "metric": "marketCap"}}
3816
- - "tsla stock price" → {{"tickers": ["TSLA"], "metric": "price"}}
3817
- - "Microsoft profit" → {{"tickers": ["MSFT"], "metric": "netIncome"}}
3818
-
3819
- JSON:"""
4163
+ session_key = f"{request.user_id}:{request.conversation_id}"
4164
+ tickers, metrics_to_fetch = self._plan_financial_request(request.question, session_key)
4165
+ financial_payload: Dict[str, Any] = {}
4166
+
4167
+ for ticker in tickers:
4168
+ result = await self.get_financial_metrics(ticker, metrics_to_fetch)
4169
+ financial_payload[ticker] = result
4170
+
4171
+ if financial_payload:
4172
+ self._session_topics[session_key] = {
4173
+ "tickers": tickers,
4174
+ "metrics": metrics_to_fetch,
4175
+ }
4176
+ api_results["financial"] = financial_payload
4177
+ tools_used.append("finsight_api")
3820
4178
 
3821
- try:
3822
- finance_response = await self.call_backend_query(
3823
- query=finance_prompt,
3824
- conversation_history=[],
3825
- api_results={},
3826
- tools_used=[]
3827
- )
3828
-
3829
- import json as json_module
3830
- finance_text = finance_response.response.strip()
3831
- if '```' in finance_text:
3832
- finance_text = finance_text.split('```')[1].replace('json', '').strip()
3833
-
3834
- finance_plan = json_module.loads(finance_text)
3835
- tickers = finance_plan.get("tickers", [])
3836
- metric = finance_plan.get("metric", "revenue")
3837
-
3838
- if debug_mode:
3839
- print(f"🔍 LLM FINANCE PLAN: tickers={tickers}, metric={metric}")
3840
-
3841
- if tickers:
3842
- # Call FinSight with extracted ticker + metric
3843
- financial_data = await self._call_finsight_api(f"calc/{tickers[0]}/{metric}")
3844
- if debug_mode:
3845
- print(f"🔍 FinSight returned: {list(financial_data.keys()) if financial_data else None}")
3846
- if financial_data and "error" not in financial_data:
3847
- api_results["financial"] = financial_data
3848
- tools_used.append("finsight_api")
3849
-
3850
- except Exception as e:
3851
- if debug_mode:
3852
- print(f"🔍 Finance LLM extraction failed: {e}")
3853
-
3854
4179
  # ========================================================================
3855
4180
  # PRIORITY 3: WEB SEARCH (Fallback - only if shell didn't handle AND no data yet)
3856
4181
  # ========================================================================
@@ -3933,12 +4258,27 @@ Respond with JSON:
3933
4258
  JSON:"""
3934
4259
 
3935
4260
  try:
3936
- web_decision_response = await self.call_backend_query(
3937
- query=web_decision_prompt,
3938
- conversation_history=[],
3939
- api_results={},
3940
- tools_used=[]
3941
- )
4261
+ # Use LOCAL LLM for web search decision (avoid recursion)
4262
+ if hasattr(self, 'client') and self.client:
4263
+ # Local mode
4264
+ # Use gpt-oss-120b for Cerebras (100% test pass, better accuracy)
4265
+ model_name = "gpt-oss-120b" if self.llm_provider == "cerebras" else "llama-3.1-70b-versatile"
4266
+ response = self.client.chat.completions.create(
4267
+ model=model_name,
4268
+ messages=[{"role": "user", "content": web_decision_prompt}],
4269
+ max_tokens=300,
4270
+ temperature=0.2
4271
+ )
4272
+ decision_text = response.choices[0].message.content.strip()
4273
+ web_decision_response = ChatResponse(response=decision_text)
4274
+ else:
4275
+ # Backend mode
4276
+ web_decision_response = await self.call_backend_query(
4277
+ query=web_decision_prompt,
4278
+ conversation_history=[],
4279
+ api_results={},
4280
+ tools_used=[]
4281
+ )
3942
4282
 
3943
4283
  import json as json_module
3944
4284
  decision_text = web_decision_response.response.strip()
@@ -3981,7 +4321,7 @@ JSON:"""
3981
4321
  # This fixes the issue where LLM shows corrected code but doesn't create the file
3982
4322
  if any(keyword in request.question.lower() for keyword in ['create', 'write', 'save', 'generate', 'fixed', 'corrected']):
3983
4323
  # Extract filename from query (e.g., "write to foo.py", "create bar_fixed.py")
3984
- import re
4324
+ # Note: re is already imported at module level (line 12)
3985
4325
  filename_match = re.search(r'(?:to|create|write|save|generate)\s+(\w+[._-]\w+\.[\w]+)', request.question, re.IGNORECASE)
3986
4326
  if not filename_match:
3987
4327
  # Try pattern: "foo_fixed.py" or "bar.py"
@@ -4010,11 +4350,14 @@ JSON:"""
4010
4350
  if debug_mode:
4011
4351
  print(f"⚠️ Auto-write failed: {e}")
4012
4352
 
4013
- # CRITICAL: Save to conversation history
4014
- self.conversation_history.append({"role": "user", "content": request.question})
4015
- self.conversation_history.append({"role": "assistant", "content": response.response})
4016
-
4017
- return response
4353
+ return self._finalize_interaction(
4354
+ request,
4355
+ response,
4356
+ tools_used,
4357
+ api_results,
4358
+ request_analysis,
4359
+ log_workflow=False,
4360
+ )
4018
4361
 
4019
4362
  # DEV MODE ONLY: Direct Groq calls (only works with local API keys)
4020
4363
  # This code path won't execute in production since self.client = None
@@ -4049,6 +4392,26 @@ JSON:"""
4049
4392
 
4050
4393
  # Get memory context
4051
4394
  memory_context = self._get_memory_context(request.user_id, request.conversation_id)
4395
+ archive_context = self.archive.get_recent_context(
4396
+ request.user_id,
4397
+ request.conversation_id,
4398
+ limit=3,
4399
+ ) if getattr(self, "archive", None) else ""
4400
+ if archive_context:
4401
+ if memory_context:
4402
+ memory_context = f"{memory_context}\n\n{archive_context}"
4403
+ else:
4404
+ memory_context = archive_context
4405
+ archive_context = self.archive.get_recent_context(
4406
+ request.user_id,
4407
+ request.conversation_id,
4408
+ limit=3,
4409
+ ) if getattr(self, "archive", None) else ""
4410
+ if archive_context:
4411
+ if memory_context:
4412
+ memory_context = f"{memory_context}\n\n{archive_context}"
4413
+ else:
4414
+ memory_context = archive_context
4052
4415
 
4053
4416
  # Ultra-light handling for small talk to save tokens entirely
4054
4417
  if self._is_simple_greeting(request.question):
@@ -4154,44 +4517,17 @@ JSON:"""
4154
4517
  return self._respond_with_workspace_listing(request, workspace_listing)
4155
4518
 
4156
4519
  if "finsight" in request_analysis["apis"]:
4157
- # Extract tickers from symbols or company names
4158
- tickers = self._extract_tickers_from_text(request.question)
4159
- financial_payload = {}
4160
4520
  session_key = f"{request.user_id}:{request.conversation_id}"
4161
- last_topic = self._session_topics.get(session_key)
4162
- if not tickers:
4163
- # Heuristic defaults for common requests
4164
- if "apple" in request.question.lower():
4165
- tickers = ["AAPL"]
4166
- if "microsoft" in request.question.lower():
4167
- tickers = tickers + ["MSFT"] if "AAPL" in tickers else ["MSFT"]
4168
-
4169
- # Determine which metrics to fetch based on query keywords
4170
- metrics_to_fetch = []
4171
- if any(kw in question_lower for kw in ["revenue", "sales", "top line"]):
4172
- metrics_to_fetch.append("revenue")
4173
- if any(kw in question_lower for kw in ["gross profit", "gross margin", "margin"]):
4174
- metrics_to_fetch.append("grossProfit")
4175
- if any(kw in question_lower for kw in ["operating income", "operating profit", "ebit"]):
4176
- metrics_to_fetch.append("operatingIncome")
4177
- if any(kw in question_lower for kw in ["net income", "profit", "earnings", "bottom line"]):
4178
- metrics_to_fetch.append("netIncome")
4179
-
4180
- # Default to key metrics if no specific request
4181
- if not metrics_to_fetch and last_topic and last_topic.get("metrics"):
4182
- metrics_to_fetch = list(last_topic["metrics"])
4183
-
4184
- if not metrics_to_fetch:
4185
- metrics_to_fetch = ["revenue", "grossProfit"]
4186
-
4187
- # Fetch metrics for each ticker (cap 2 tickers)
4188
- for t in tickers[:2]:
4189
- result = await self.get_financial_metrics(t, metrics_to_fetch)
4190
- financial_payload[t] = result
4521
+ tickers, metrics_to_fetch = self._plan_financial_request(request.question, session_key)
4522
+ financial_payload: Dict[str, Any] = {}
4523
+
4524
+ for ticker in tickers:
4525
+ result = await self.get_financial_metrics(ticker, metrics_to_fetch)
4526
+ financial_payload[ticker] = result
4191
4527
 
4192
4528
  if financial_payload:
4193
4529
  self._session_topics[session_key] = {
4194
- "tickers": tickers[:2],
4530
+ "tickers": tickers,
4195
4531
  "metrics": metrics_to_fetch,
4196
4532
  }
4197
4533
  direct_finance = (
@@ -4265,7 +4601,18 @@ JSON:"""
4265
4601
  summary_tokens = summary_response.usage.total_tokens
4266
4602
  self._charge_tokens(request.user_id, summary_tokens)
4267
4603
  self.total_cost += (summary_tokens / 1000) * self.cost_per_1k_tokens
4604
+ else:
4605
+ summary_tokens = 0
4268
4606
  messages.append({"role": "system", "content": f"Previous conversation summary: {conversation_summary}"})
4607
+ self._emit_telemetry(
4608
+ "history_summarized",
4609
+ request,
4610
+ success=True,
4611
+ extra={
4612
+ "history_length": len(self.conversation_history),
4613
+ "summary_tokens": summary_tokens,
4614
+ },
4615
+ )
4269
4616
  except:
4270
4617
  # If summary fails, just use recent history
4271
4618
  pass
@@ -4429,29 +4776,35 @@ JSON:"""
4429
4776
  if footer:
4430
4777
  final_response = f"{final_response}\n\n_{footer}_"
4431
4778
 
4432
- # Update conversation history
4433
- self.conversation_history.append({"role": "user", "content": request.question})
4434
- self.conversation_history.append({"role": "assistant", "content": final_response})
4435
-
4436
- # Update memory
4437
- self._update_memory(
4438
- request.user_id,
4439
- request.conversation_id,
4440
- f"Q: {request.question[:100]}... A: {final_response[:100]}..."
4441
- )
4442
-
4443
- # Save to workflow history automatically
4444
- self.workflow.save_query_result(
4445
- query=request.question,
4446
- response=final_response,
4447
- metadata={
4448
- "tools_used": tools_used,
4449
- "tokens_used": tokens_used,
4450
- "confidence_score": request_analysis['confidence']
4451
- }
4452
- )
4453
-
4454
- return ChatResponse(
4779
+ # TRUTH-SEEKING VERIFICATION: Check if response matches actual shell output
4780
+ if "shell_info" in api_results and api_results["shell_info"]:
4781
+ shell_output = api_results["shell_info"].get("output", "")
4782
+
4783
+ # If shell output was empty or says "no results", but response lists specific items
4784
+ # This indicates hallucination
4785
+ if (not shell_output or "no" in shell_output.lower() and "found" in shell_output.lower()):
4786
+ # Check if response contains made-up file paths or code
4787
+ response_lower = final_response.lower()
4788
+ if any(indicator in response_lower for indicator in [".py:", "found in", "route", "@app", "@router", "file1", "file2"]):
4789
+ # Hallucination detected - replace with honest answer
4790
+ final_response = "I searched but found no matches. The search returned no results."
4791
+ logger.warning("🚨 Hallucination prevented: LLM tried to make up results when shell output was empty")
4792
+
4793
+ expected_tools: Set[str] = set()
4794
+ if "finsight" in request_analysis.get("apis", []):
4795
+ expected_tools.add("finsight_api")
4796
+ if "archive" in request_analysis.get("apis", []):
4797
+ expected_tools.add("archive_api")
4798
+ for expected in expected_tools:
4799
+ if expected not in tools_used:
4800
+ self._emit_telemetry(
4801
+ "tool_missing",
4802
+ request,
4803
+ success=False,
4804
+ extra={"expected": expected},
4805
+ )
4806
+
4807
+ response_obj = ChatResponse(
4455
4808
  response=final_response,
4456
4809
  tools_used=tools_used,
4457
4810
  reasoning_steps=[f"Request type: {request_analysis['type']}", f"APIs used: {request_analysis['apis']}"],
@@ -4461,9 +4814,22 @@ JSON:"""
4461
4814
  execution_results=execution_results,
4462
4815
  api_results=api_results
4463
4816
  )
4817
+ return self._finalize_interaction(
4818
+ request,
4819
+ response_obj,
4820
+ tools_used,
4821
+ api_results,
4822
+ request_analysis,
4823
+ log_workflow=True,
4824
+ )
4464
4825
 
4465
4826
  except Exception as e:
4827
+ import traceback
4466
4828
  details = str(e)
4829
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
4830
+ if debug_mode:
4831
+ print("🔴 FULL TRACEBACK:")
4832
+ traceback.print_exc()
4467
4833
  message = (
4468
4834
  "⚠️ Something went wrong while orchestrating your request, but no actions were performed. "
4469
4835
  "Please retry, and if the issue persists share this detail with the team: {details}."
@@ -4606,24 +4972,13 @@ JSON:"""
4606
4972
 
4607
4973
  # FinSight API (abbreviated)
4608
4974
  if "finsight" in request_analysis["apis"]:
4609
- tickers = self._extract_tickers_from_text(request.question)
4975
+ session_key = f"{request.user_id}:{request.conversation_id}"
4976
+ tickers, metrics_to_fetch = self._plan_financial_request(request.question, session_key)
4610
4977
  financial_payload = {}
4611
-
4612
- if not tickers:
4613
- if "apple" in question_lower:
4614
- tickers = ["AAPL"]
4615
- if "microsoft" in question_lower:
4616
- tickers = ["MSFT"] if not tickers else tickers + ["MSFT"]
4617
-
4618
- metrics_to_fetch = ["revenue", "grossProfit"]
4619
- if any(kw in question_lower for kw in ["revenue", "sales"]):
4620
- metrics_to_fetch = ["revenue"]
4621
- if any(kw in question_lower for kw in ["profit", "margin"]):
4622
- metrics_to_fetch.append("grossProfit")
4623
-
4624
- for t in tickers[:2]:
4625
- result = await self.get_financial_metrics(t, metrics_to_fetch)
4626
- financial_payload[t] = result
4978
+
4979
+ for ticker in tickers:
4980
+ result = await self.get_financial_metrics(ticker, metrics_to_fetch)
4981
+ financial_payload[ticker] = result
4627
4982
 
4628
4983
  if financial_payload:
4629
4984
  api_results["financial"] = financial_payload