PyPI - cite-agent - Versions diffs - 1.3.8__py3-none-any.whl → 1.3.9__py3-none-any.whl - Mend

cite-agent 1.3.8py3-none-any.whl → 1.3.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

cite_agent/enhanced_ai_agent.py CHANGED Viewed

@@ -17,13 +17,15 @@ from importlib import resources
 import aiohttp
 from datetime import datetime, timezone
-from typing import Dict, Any, List, Optional, Tuple
+from typing import Dict, Any, List, Optional, Tuple, Set
 from urllib.parse import urlparse
 from dataclasses import dataclass, field
 from pathlib import Path
+import platform
 from .telemetry import TelemetryManager
 from .setup_config import DEFAULT_QUERY_LIMIT
+from .conversation_archive import ConversationArchive
 # Suppress noise
 logging.basicConfig(level=logging.ERROR)
@@ -89,6 +91,7 @@ class EnhancedNocturnalAgent:
         from .workflow import WorkflowManager
         self.workflow = WorkflowManager()
         self.last_paper_result = None  # Track last paper mentioned for "save that"
+        self.archive = ConversationArchive()
         # File context tracking (for pronoun resolution and multi-turn)
         self.file_context = {
@@ -98,6 +101,7 @@ class EnhancedNocturnalAgent:
             'recent_dirs': [],           # Last 5 directories
             'current_cwd': None,         # Track shell's current directory
         }
+        self._is_windows = os.name == "nt"
         try:
             self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
         except (TypeError, ValueError):
@@ -141,6 +145,24 @@ class EnhancedNocturnalAgent:
             self._health_ttl = 30.0
         self._recent_sources: List[Dict[str, Any]] = []
+    def _remove_expired_temp_key(self, session_file):
+        """Remove expired temporary API key from session file"""
+        try:
+            import json
+            with open(session_file, 'r') as f:
+                session_data = json.load(f)
+            # Remove temp key fields
+            session_data.pop('temp_api_key', None)
+            session_data.pop('temp_key_expires', None)
+            session_data.pop('temp_key_provider', None)
+            # Write back
+            with open(session_file, 'w') as f:
+                json.dump(session_data, f, indent=2)
+        except Exception as e:
+            logger.warning(f"Failed to remove expired temp key: {e}")
     def _load_authentication(self):
         """Load authentication from session file"""
         use_local_keys = os.getenv("USE_LOCAL_KEYS", "false").lower() == "true"
@@ -162,6 +184,38 @@ class EnhancedNocturnalAgent:
                         session_data = json.load(f)
                         self.auth_token = session_data.get('auth_token')
                         self.user_id = session_data.get('account_id')
+                        # NEW: Check for temporary local API key with expiration
+                        temp_key = session_data.get('temp_api_key')
+                        temp_key_expires = session_data.get('temp_key_expires')
+                        if temp_key and temp_key_expires:
+                            # Check if key is still valid
+                            from datetime import datetime, timezone
+                            try:
+                                expires_at = datetime.fromisoformat(temp_key_expires.replace('Z', '+00:00'))
+                                now = datetime.now(timezone.utc)
+                                if now < expires_at:
+                                    # Key is still valid - use local mode for speed!
+                                    self.temp_api_key = temp_key
+                                    self.temp_key_provider = session_data.get('temp_key_provider', 'cerebras')
+                                    if debug_mode:
+                                        time_left = (expires_at - now).total_seconds() / 3600
+                                        print(f"✅ Using temporary local key (expires in {time_left:.1f}h)")
+                                else:
+                                    # Key expired - remove it and fall back to backend
+                                    if debug_mode:
+                                        print(f"⏰ Temporary key expired, using backend mode")
+                                    self._remove_expired_temp_key(session_file)
+                                    self.temp_api_key = None
+                            except Exception as e:
+                                if debug_mode:
+                                    print(f"⚠️ Error parsing temp key expiration: {e}")
+                                self.temp_api_key = None
+                        else:
+                            self.temp_api_key = None
                         if debug_mode:
                             print(f"🔍 _load_authentication: loaded auth_token={self.auth_token}, user_id={self.user_id}")
                 except Exception as e:
@@ -169,6 +223,7 @@ class EnhancedNocturnalAgent:
                         print(f"🔍 _load_authentication: ERROR loading session: {e}")
                     self.auth_token = None
                     self.user_id = None
+                    self.temp_api_key = None
             else:
                 # FALLBACK: Check if config.env has credentials but session.json is missing
                 # This handles cases where old setup didn't create session.json
@@ -917,6 +972,56 @@ class EnhancedNocturnalAgent:
         if not api_results:
             logger.info("🔍 DEBUG: _format_api_results_for_prompt called with EMPTY api_results")
             return "No API results yet."
+        # Special formatting for shell results to make them VERY clear
+        if "shell_info" in api_results:
+            shell_info = api_results["shell_info"]
+            formatted_parts = ["=" * 60]
+            formatted_parts.append("🔧 SHELL COMMAND EXECUTION RESULTS (ALREADY EXECUTED)")
+            formatted_parts.append("=" * 60)
+            if "command" in shell_info:
+                formatted_parts.append(f"\n📝 Command that was executed:")
+                formatted_parts.append(f"   $ {shell_info['command']}")
+            if "output" in shell_info:
+                formatted_parts.append(f"\n📤 Command output (THIS IS THE RESULT):")
+                formatted_parts.append(f"{shell_info['output']}")
+            if "error" in shell_info:
+                formatted_parts.append(f"\n❌ Error occurred:")
+                formatted_parts.append(f"{shell_info['error']}")
+            if "directory_contents" in shell_info:
+                formatted_parts.append(f"\n📂 Directory listing (THIS IS THE RESULT):")
+                formatted_parts.append(f"{shell_info['directory_contents']}")
+            if "search_results" in shell_info:
+                formatted_parts.append(f"\n🔍 Search results (THIS IS THE RESULT):")
+                formatted_parts.append(f"{shell_info['search_results']}")
+            formatted_parts.append("\n" + "=" * 60)
+            formatted_parts.append("🚨 CRITICAL INSTRUCTION 🚨")
+            formatted_parts.append("The command was ALREADY executed. The output above is the COMPLETE and ONLY result.")
+            formatted_parts.append("YOU MUST present ONLY what is shown in the output above.")
+            formatted_parts.append("DO NOT add file names, paths, or code that are NOT in the output above.")
+            formatted_parts.append("DO NOT make up examples or additional results.")
+            formatted_parts.append("If the output says 'No matches' or is empty, tell the user 'No results found'.")
+            formatted_parts.append("DO NOT ask the user to run any commands - the results are already here.")
+            formatted_parts.append("=" * 60)
+            # Add other api_results
+            other_results = {k: v for k, v in api_results.items() if k != "shell_info"}
+            if other_results:
+                try:
+                    serialized = json.dumps(other_results, indent=2)
+                except Exception:
+                    serialized = str(other_results)
+                formatted_parts.append(f"\nOther data:\n{serialized}")
+            return "\n".join(formatted_parts)
+        # Normal formatting for non-shell results
         try:
             serialized = json.dumps(api_results, indent=2)
         except Exception:
@@ -970,15 +1075,24 @@ class EnhancedNocturnalAgent:
                     "PRIMARY DIRECTIVE: Execute code when needed. You have a persistent shell session. "
                     "When user asks for data analysis, calculations, or file operations: WRITE and EXECUTE the code. "
                     "Languages available: Python, R, SQL, Bash. "
-                    "You can read files, run scripts, perform calculations, and show results."
+                    "🚨 CRITICAL: Commands are AUTOMATICALLY executed. If you see 'shell_info' below, "
+                    "that means the command was ALREADY RUN. NEVER ask users to run commands - just present results."
                 )
             else:
                 intro = (
                     "You are Cite Agent, a truth-seeking research and finance AI with CODE EXECUTION. "
-                    "PRIMARY DIRECTIVE: Accuracy > Agreeableness. Execute code for analysis, calculations, and file operations. "
+                    "PRIMARY DIRECTIVE: Accuracy > Agreeableness. NEVER HALLUCINATE. "
                     "You are a fact-checker and analyst with a persistent shell session. "
                     "You have access to research (Archive), financial data (FinSight SEC filings), and can run Python/R/SQL/Bash. "
-                    "When user asks about files, directories, or data: EXECUTE commands to find answers."
+                    "\n\n"
+                    "🚨 ANTI-HALLUCINATION RULES:\n"
+                    "1. When user asks about files, directories, or data - commands are AUTOMATICALLY executed.\n"
+                    "2. If you see 'shell_info' in results below, that means command was ALREADY RUN.\n"
+                    "3. ONLY present information from shell_info output. DO NOT invent file names, paths, or code.\n"
+                    "4. If shell output is empty or unclear, say 'No results found' or 'Search returned no matches'.\n"
+                    "5. NEVER make up plausible-sounding file paths or code that wasn't in the actual output.\n"
+                    "6. If you're unsure, say 'I couldn't find that' rather than guessing.\n"
+                    "7. NEVER ask the user to run commands - just present the results that were already executed."
                 )
         sections.append(intro)
@@ -1279,7 +1393,7 @@ class EnhancedNocturnalAgent:
                     "temperature": 0.2
                 }
             return {
-                "model": "llama-3.3-70b",  # Cerebras 70B model
+                "model": "gpt-oss-120b",  # PRODUCTION: Cerebras gpt-oss-120b - 100% test pass, 60K TPM
                 "max_tokens": 900,
                 "temperature": 0.3
             }
@@ -1292,7 +1406,7 @@ class EnhancedNocturnalAgent:
                     "temperature": 0.2
                 }
             return {
-                "model": "llama-3.3-70b-versatile",
+                "model": "openai/gpt-oss-120b",  # PRODUCTION: 120B model - 100% test pass rate
                 "max_tokens": 900,
                 "temperature": 0.3
             }
@@ -1514,6 +1628,49 @@ class EnhancedNocturnalAgent:
                 seen.add(t)
                 ordered.append(t)
         return ordered[:4]
+    def _plan_financial_request(self, question: str, session_key: Optional[str] = None) -> Tuple[List[str], List[str]]:
+        """Derive ticker and metric targets for a financial query."""
+        tickers = list(self._extract_tickers_from_text(question))
+        question_lower = question.lower()
+        if not tickers:
+            if "apple" in question_lower:
+                tickers.append("AAPL")
+            if "microsoft" in question_lower:
+                tickers.append("MSFT" if "AAPL" not in tickers else "MSFT")
+        metrics_to_fetch: List[str] = []
+        keyword_map = [
+            ("revenue", ["revenue", "sales", "top line"]),
+            ("grossProfit", ["gross profit", "gross margin", "margin"]),
+            ("operatingIncome", ["operating income", "operating profit", "ebit"]),
+            ("netIncome", ["net income", "profit", "earnings", "bottom line"]),
+        ]
+        for metric, keywords in keyword_map:
+            if any(kw in question_lower for kw in keywords):
+                metrics_to_fetch.append(metric)
+        if session_key:
+            last_topic = self._session_topics.get(session_key)
+        else:
+            last_topic = None
+        if not metrics_to_fetch and last_topic and last_topic.get("metrics"):
+            metrics_to_fetch = list(last_topic["metrics"])
+        if not metrics_to_fetch:
+            metrics_to_fetch = ["revenue", "grossProfit"]
+        deduped: List[str] = []
+        seen: Set[str] = set()
+        for symbol in tickers:
+            if symbol and symbol not in seen:
+                seen.add(symbol)
+                deduped.append(symbol)
+        return deduped[:2], metrics_to_fetch
     async def initialize(self, force_reload: bool = False):
         """Initialize the agent with API keys and shell session."""
@@ -1548,8 +1705,10 @@ class EnhancedNocturnalAgent:
             use_local_keys_env = os.getenv("USE_LOCAL_KEYS", "").lower()
             if has_session:
-                # Session exists → ALWAYS use backend mode (ignore USE_LOCAL_KEYS)
-                use_local_keys = False
+                # Session exists → Check if we have temp local key for speed
+                # If temp key exists and valid → use local mode (fast!)
+                # Otherwise → use backend mode (secure but slow)
+                use_local_keys = hasattr(self, 'temp_api_key') and self.temp_api_key is not None
             elif use_local_keys_env == "true":
                 # No session but dev mode requested → use local keys
                 use_local_keys = True
@@ -1597,16 +1756,24 @@ class EnhancedNocturnalAgent:
                     else:
                         print("⚠️ Not authenticated. Please log in to use the agent.")
             else:
-                # Local keys mode - load Cerebras API keys (primary) with Groq fallback
-                self.auth_token = None
-                self.user_id = None
+                # Local keys mode - use temporary key if available, otherwise load from env
+                # Check if we have a temporary key (for speed + security)
+                if hasattr(self, 'temp_api_key') and self.temp_api_key:
+                    # Use temporary key provided by backend
+                    self.api_keys = [self.temp_api_key]
+                    self.llm_provider = getattr(self, 'temp_key_provider', 'cerebras')
+                else:
+                    # Fallback: Load permanent keys from environment (dev mode only)
+                    self.auth_token = None
+                    self.user_id = None
-                # Load Cerebras keys from environment (PRIMARY)
-                self.api_keys = []
-                for i in range(1, 10):  # Check CEREBRAS_API_KEY_1 through CEREBRAS_API_KEY_9
-                    key = os.getenv(f"CEREBRAS_API_KEY_{i}") or os.getenv(f"CEREBRAS_API_KEY")
-                    if key and key not in self.api_keys:
-                        self.api_keys.append(key)
+                    # Load Cerebras keys from environment (PRIMARY)
+                    self.api_keys = []
+                    for i in range(1, 10):  # Check CEREBRAS_API_KEY_1 through CEREBRAS_API_KEY_9
+                        key = os.getenv(f"CEREBRAS_API_KEY_{i}") or os.getenv(f"CEREBRAS_API_KEY")
+                        if key and key not in self.api_keys:
+                            self.api_keys.append(key)
                 # Fallback to Groq keys if no Cerebras keys found
                 if not self.api_keys:
@@ -1650,8 +1817,12 @@ class EnhancedNocturnalAgent:
             if self.shell_session is None:
                 try:
+                    if self._is_windows:
+                        command = ['powershell', '-NoLogo', '-NoProfile']
+                    else:
+                        command = ['bash']
                     self.shell_session = subprocess.Popen(
-                        ['bash'],
+                        command,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
@@ -1726,7 +1897,7 @@ class EnhancedNocturnalAgent:
                 "query": query,  # Keep query clean
                 "conversation_history": conversation_history or [],
                 "api_context": api_results,  # Send API results separately
-                "model": "llama-3.3-70b",  # Compatible with Cerebras (priority) and Groq
+                "model": "openai/gpt-oss-120b",  # PRODUCTION: 120B - best test results
                 "temperature": 0.2,  # Low temp for accuracy
                 "max_tokens": 4000
             }
@@ -1796,7 +1967,7 @@ class EnhancedNocturnalAgent:
                                     response=response_text,
                                     tokens_used=tokens,
                                     tools_used=all_tools,
-                                    model=data.get('model', 'llama-3.3-70b'),
+                                    model=data.get('model', 'openai/gpt-oss-120b'),
                                     timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat()),
                                     api_results=api_results
                                 )
@@ -1835,7 +2006,7 @@ class EnhancedNocturnalAgent:
                         response=response_text,
                         tokens_used=tokens,
                         tools_used=all_tools,
-                        model=data.get('model', 'llama-3.3-70b-versatile'),
+                        model=data.get('model', 'openai/gpt-oss-120b'),
                         timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat()),
                         api_results=api_results
                     )
@@ -2186,7 +2357,30 @@ class EnhancedNocturnalAgent:
             results.update(payload)
         return results
+    def _looks_like_user_prompt(self, command: str) -> bool:
+        command_lower = command.strip().lower()
+        if not command_lower:
+            return True
+        phrases = [
+            "ask the user",
+            "can you run",
+            "please run",
+            "tell the user",
+            "ask them",
+        ]
+        return any(phrase in command_lower for phrase in phrases)
+    def _infer_shell_command(self, question: str) -> str:
+        question_lower = question.lower()
+        if any(word in question_lower for word in ["list", "show", "files", "directory", "folder", "ls"]):
+            return "ls -lah"
+        if any(word in question_lower for word in ["where", "pwd", "current directory", "location"]):
+            return "pwd"
+        if "read" in question_lower and any(ext in question_lower for ext in [".py", ".txt", ".csv", "file"]):
+            return "ls -lah"
+        return "pwd"
     def execute_command(self, command: str) -> str:
         """Execute command and return output - improved with echo markers"""
         try:
@@ -2216,10 +2410,14 @@ class EnhancedNocturnalAgent:
             marker = f"CMD_DONE_{uuid.uuid4().hex[:8]}"
             # Send command with marker
-            full_command = f"{command}; echo '{marker}'\n"
+            terminator = "\r\n" if self._is_windows else "\n"
+            if self._is_windows:
+                full_command = f"{command}; echo '{marker}'{terminator}"
+            else:
+                full_command = f"{command}; echo '{marker}'{terminator}"
             self.shell_session.stdin.write(full_command)
             self.shell_session.stdin.flush()
             # Read until we see the marker
             output_lines = []
             start_time = time.time()
@@ -2510,7 +2708,7 @@ class EnhancedNocturnalAgent:
             - count: {"counts": {file: match_count}}
         """
         try:
-            import re
+            # import re removed - using module-level import
             # Expand ~ to home directory
             path = os.path.expanduser(path)
@@ -2781,7 +2979,39 @@ class EnhancedNocturnalAgent:
         # Default: Treat unknown commands as requiring user awareness
         return 'WRITE'
+    def _format_archive_summary(
+        self,
+        question: str,
+        response: str,
+        api_results: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """Prepare compact summary payload for the conversation archive."""
+        clean_question = question.strip().replace("\n", " ")
+        summary_text = response.strip().replace("\n", " ")
+        if len(summary_text) > 320:
+            summary_text = summary_text[:317].rstrip() + "..."
+        citations: List[str] = []
+        research = api_results.get("research")
+        if isinstance(research, dict):
+            for item in research.get("results", [])[:3]:
+                title = item.get("title") or item.get("paperTitle")
+                if title:
+                    citations.append(title)
+        financial = api_results.get("financial")
+        if isinstance(financial, dict):
+            tickers = ", ".join(sorted(financial.keys()))
+            if tickers:
+                citations.append(f"Financial data: {tickers}")
+        return {
+            "question": clean_question,
+            "summary": summary_text,
+            "citations": citations,
+        }
     def _is_safe_shell_command(self, cmd: str) -> bool:
         """
         Compatibility wrapper for old safety check.
@@ -2840,6 +3070,71 @@ class EnhancedNocturnalAgent:
         self.daily_token_usage += tokens
         if user_id:
             self.user_token_usage[user_id] = self.user_token_usage.get(user_id, 0) + tokens
+    def _finalize_interaction(
+        self,
+        request: ChatRequest,
+        response: ChatResponse,
+        tools_used: Optional[List[str]],
+        api_results: Optional[Dict[str, Any]],
+        request_analysis: Optional[Dict[str, Any]],
+        *,
+        log_workflow: bool = True,
+    ) -> ChatResponse:
+        """Common tail logic: history, memory, workflow logging, archive save."""
+        merged_tools: List[str] = []
+        seen: Set[str] = set()
+        for tool in (tools_used or []) + (response.tools_used or []):
+            if tool and tool not in seen:
+                merged_tools.append(tool)
+                seen.add(tool)
+        response.tools_used = merged_tools
+        if request_analysis and not response.confidence_score:
+            response.confidence_score = request_analysis.get("confidence", response.confidence_score) or 0.0
+        self.conversation_history.append({"role": "user", "content": request.question})
+        self.conversation_history.append({"role": "assistant", "content": response.response})
+        self._update_memory(
+            request.user_id,
+            request.conversation_id,
+            f"Q: {request.question[:100]}... A: {response.response[:100]}...",
+        )
+        if log_workflow:
+            try:
+                self.workflow.save_query_result(
+                    query=request.question,
+                    response=response.response,
+                    metadata={
+                        "tools_used": response.tools_used,
+                        "tokens_used": response.tokens_used,
+                        "confidence_score": response.confidence_score,
+                    },
+                )
+            except Exception:
+                logger.debug("Workflow logging failed", exc_info=True)
+        if getattr(self, "archive", None):
+            try:
+                archive_payload = self._format_archive_summary(
+                    request.question,
+                    response.response,
+                    api_results or {},
+                )
+                self.archive.record_entry(
+                    request.user_id,
+                    request.conversation_id,
+                    archive_payload["question"],
+                    archive_payload["summary"],
+                    response.tools_used,
+                    archive_payload["citations"],
+                )
+            except Exception as archive_error:
+                logger.debug("Archive write failed", error=str(archive_error))
+        return response
     def _get_memory_context(self, user_id: str, conversation_id: str) -> str:
         """Get relevant memory context for the conversation"""
@@ -3093,8 +3388,15 @@ class EnhancedNocturnalAgent:
         # System/technical indicators
         system_keywords = [
-            'file', 'directory', 'command', 'run', 'execute', 'install',
-            'python', 'code', 'script', 'program', 'system', 'terminal'
+            'file', 'files', 'directory', 'directories', 'folder', 'folders',
+            'command', 'run', 'execute', 'install',
+            'python', 'code', 'script', 'scripts', 'program', 'system', 'terminal',
+            'find', 'search for', 'locate', 'list', 'show me', 'where is',
+            'what files', 'which files', 'how many files',
+            'grep', 'search', 'look for', 'count',
+            '.py', '.txt', '.js', '.java', '.cpp', '.c', '.h',
+            'function', 'class', 'definition', 'route', 'endpoint',
+            'codebase', 'project structure', 'source code'
         ]
         question_lower = question.lower()
@@ -3212,7 +3514,7 @@ class EnhancedNocturnalAgent:
         question_lower = question.lower()
         # Pattern 1: Multiple years without SPECIFIC topic (e.g., "2008, 2015, 2019")
-        import re
+        # import re removed - using module-level import
         years_pattern = r'\b(19\d{2}|20\d{2})\b'
         years = re.findall(years_pattern, question)
         if len(years) >= 2:
@@ -3314,9 +3616,10 @@ IMPORTANT RULES:
 7. For finding things, use: find ~ -maxdepth 4 -name '*pattern*' 2>/dev/null
 8. For creating files: touch filename OR echo "content" > filename
 9. For creating directories: mkdir dirname
-10. ALWAYS include 2>/dev/null to suppress errors from find
+10. ALWAYS include 2>/dev/null to suppress errors from find and grep
 11. 🚨 MULTI-STEP QUERIES: For queries like "read X and do Y", ONLY generate the FIRST step (reading X). The LLM will handle subsequent steps after seeing the file contents.
 12. 🚨 NEVER use python -m py_compile or other code execution for finding bugs - just read the file with cat/head
+13. 🚨 FOR GREP: When searching in a DIRECTORY (not a specific file), ALWAYS use -r flag for recursive search: grep -rn 'pattern' /path/to/dir 2>/dev/null
 Examples:
 "where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
@@ -3329,7 +3632,9 @@ Examples:
 "write hello.txt with content Hello World" → {{"action": "execute", "command": "echo 'Hello World' > hello.txt", "reason": "Create file with content", "updates_context": true}}
 "create results.txt with line 1 and line 2" → {{"action": "execute", "command": "echo 'line 1' > results.txt && echo 'line 2' >> results.txt", "reason": "Create file with multiple lines", "updates_context": true}}
 "fix bug in script.py change OLD to NEW" → {{"action": "execute", "command": "sed -i 's/OLD/NEW/g' script.py && echo 'Fixed script.py'", "reason": "Edit file to fix bug", "updates_context": true}}
-"search for TODO in py files" → {{"action": "execute", "command": "grep -n 'TODO' *.py 2>/dev/null", "reason": "Find TODO comments", "updates_context": false}}
+"search for TODO in py files here" → {{"action": "execute", "command": "grep -n 'TODO' *.py 2>/dev/null", "reason": "Find TODO in current directory py files", "updates_context": false}}
+"search for TODO in /some/directory" → {{"action": "execute", "command": "grep -rn 'TODO' /some/directory 2>/dev/null", "reason": "Recursively search directory for TODO", "updates_context": false}}
+"search for TODO comments in /tmp/test" → {{"action": "execute", "command": "grep -rn 'TODO' /tmp/test 2>/dev/null", "reason": "Recursively search directory for TODO", "updates_context": false}}
 "find all bugs in code" → {{"action": "execute", "command": "grep -rn 'BUG:' . 2>/dev/null", "reason": "Search for bug markers in code", "updates_context": false}}
 "read analyze.py and find bugs" → {{"action": "execute", "command": "head -200 analyze.py", "reason": "Read file to analyze bugs", "updates_context": false}}
 "show me calc.py completely" → {{"action": "execute", "command": "cat calc.py", "reason": "Display entire file", "updates_context": false}}
@@ -3344,12 +3649,28 @@ Examples:
 JSON:"""
                 try:
-                    plan_response = await self.call_backend_query(
-                        query=planner_prompt,
-                        conversation_history=[],
-                        api_results={},
-                        tools_used=[]
-                    )
+                    # Use LOCAL LLM for planning (don't recurse into call_backend_query)
+                    # This avoids infinite recursion and uses temp key if available
+                    if hasattr(self, 'client') and self.client:
+                        # Local mode with temp key or dev keys
+                        # Use gpt-oss-120b for Cerebras (100% test pass, better accuracy)
+                        model_name = "gpt-oss-120b" if self.llm_provider == "cerebras" else "llama-3.1-70b-versatile"
+                        response = self.client.chat.completions.create(
+                            model=model_name,
+                            messages=[{"role": "user", "content": planner_prompt}],
+                            max_tokens=500,
+                            temperature=0.3
+                        )
+                        plan_text = response.choices[0].message.content.strip()
+                        plan_response = ChatResponse(response=plan_text)
+                    else:
+                        # Backend mode - make a simplified backend call
+                        plan_response = await self.call_backend_query(
+                            query=planner_prompt,
+                            conversation_history=[],
+                            api_results={},
+                            tools_used=[]
+                        )
                     plan_text = plan_response.response.strip()
                     if '```' in plan_text:
@@ -3363,9 +3684,27 @@ JSON:"""
                     if debug_mode:
                         print(f"🔍 SHELL PLAN: {plan}")
                     # GENERIC COMMAND EXECUTION - No more hardcoded actions!
+                    if shell_action != "execute" and might_need_shell:
+                        command = self._infer_shell_command(request.question)
+                        shell_action = "execute"
+                        updates_context = False
+                        if debug_mode:
+                            print(f"🔄 Planner opted out; inferred fallback command: {command}")
+                    if shell_action == "execute" and not command:
+                        command = self._infer_shell_command(request.question)
+                        plan["command"] = command
+                        if debug_mode:
+                            print(f"🔄 Planner omitted command, inferred {command}")
                     if shell_action == "execute" and command:
+                        if self._looks_like_user_prompt(command):
+                            command = self._infer_shell_command(request.question)
+                            plan["command"] = command
+                            if debug_mode:
+                                print(f"🔄 Replacing delegating plan with command: {command}")
                         # Check command safety
                         safety_level = self._classify_command_safety(command)
@@ -3431,7 +3770,7 @@ JSON:"""
                             # Check for file search commands (find)
                             if not intercepted and 'find' in command and '-name' in command:
                                 try:
-                                    import re
+                                    # import re removed - using module-level import
                                     # Extract pattern: find ... -name '*pattern*'
                                     name_match = re.search(r"-name\s+['\"]?\*?([^'\"*\s]+)\*?['\"]?", command)
                                     if name_match:
@@ -3450,14 +3789,15 @@ JSON:"""
                             # Check for file writing commands (echo > file, grep > file, etc.) - CHECK THIS FIRST!
                             # This must come BEFORE the plain grep interceptor
-                            if not intercepted and ('>' in command or '>>' in command):
+                            # BUT: Ignore 2>/dev/null which is error redirection, not file writing
+                            if not intercepted and ('>' in command or '>>' in command) and '2>' not in command:
                                 try:
-                                    import re
+                                    # import re removed - using module-level import
                                     # Handle grep ... > file (intercept and execute grep, then write output)
                                     if 'grep' in command and '>' in command:
                                         # Extract: grep -rn 'pattern' path > output.txt
-                                        grep_match = re.search(r"grep\s+(.*)>\s*(\S+)", command)
+                                        grep_match = re.search(r"grep\s+(.*)\s>\s*(\S+)", command)
                                         if grep_match:
                                             grep_part = grep_match.group(1).strip()
                                             output_file = grep_match.group(2)
@@ -3523,7 +3863,7 @@ JSON:"""
                             # Check for sed editing commands
                             if not intercepted and command.startswith('sed '):
                                 try:
-                                    import re
+                                    # import re removed - using module-level import
                                     # sed 's/old/new/g' file OR sed -i 's/old/new/' file
                                     match = re.search(r"sed.*?['\"]s/([^/]+)/([^/]+)/", command)
                                     if match:
@@ -3549,7 +3889,7 @@ JSON:"""
                             # Check for heredoc file creation (cat << EOF > file)
                             if not intercepted and '<<' in command and ('EOF' in command or 'HEREDOC' in command):
                                 try:
-                                    import re
+                                    # import re removed - using module-level import
                                     # Extract: cat << EOF > filename OR cat > filename << EOF
                                     # Note: We can't actually get the heredoc content from a single command line
                                     # This would need to be handled differently (multi-line input)
@@ -3561,24 +3901,45 @@ JSON:"""
                             # Check for content search commands (grep -r) WITHOUT redirection
                             # This comes AFTER grep > file interceptor to avoid conflicts
-                            if not intercepted and command.startswith('grep ') and ('-r' in command or '-R' in command):
+                            if not intercepted and 'grep' in command and ('-r' in command or '-R' in command):
                                 try:
-                                    import re
+                                    # import re removed - using module-level import
                                     # Extract pattern: grep -r 'pattern' path
                                     pattern_match = re.search(r"grep.*?['\"]([^'\"]+)['\"]", command)
                                     if pattern_match:
                                         pattern = pattern_match.group(1)
-                                        # Extract path (last argument usually)
-                                        parts = command.split()
-                                        search_path = parts[-1] if len(parts) > 2 else "."
+                                        # Extract path - skip flags and options
+                                        parts = [p for p in command.split() if not p.startswith('-') and p != 'grep' and p != '2>/dev/null']
+                                        # Path is after pattern (skip the quoted pattern)
+                                        search_path = parts[-1] if len(parts) >= 2 else "."
+                                        # Detect file pattern from command (e.g., *.py, *.txt) or use *
+                                        file_pattern = "*"
+                                        if '*.py' in command:
+                                            file_pattern = "*.py"
+                                        elif '*.txt' in command:
+                                            file_pattern = "*.txt"
+                                        result = self.grep_search(pattern, search_path, file_pattern, output_mode="content")
+                                        # Format grep results
+                                        if 'matches' in result and result['matches']:
+                                            output_parts = []
+                                            for file_path, matches in result['matches'].items():
+                                                output_parts.append(f"{file_path}:")
+                                                for line_num, line_content in matches[:10]:  # Limit per file
+                                                    output_parts.append(f"  {line_num}: {line_content}")
+                                            output = '\n'.join(output_parts)
+                                        else:
+                                            output = f"No matches found for '{pattern}'"
-                                        result = self.grep_search(pattern, search_path, "*.py", output_mode="files_with_matches")
-                                        output = f"Files matching '{pattern}':\n" + '\n'.join(result['files'][:20])
                                         intercepted = True
                                         tools_used.append("grep_search")
                                         if debug_mode:
-                                            print(f"🔄 Intercepted: {command} → grep_search({pattern}, {search_path})")
-                                except:
+                                            print(f"🔄 Intercepted: {command} → grep_search({pattern}, {search_path}, {file_pattern})")
+                                except Exception as e:
+                                    if debug_mode:
+                                        print(f"⚠️  Grep interceptor failed: {e}")
                                     pass
                             # If not intercepted, execute as shell command
@@ -3597,7 +3958,7 @@ JSON:"""
                                 # Update file context if needed
                                 if updates_context:
-                                    import re
+                                    # import re removed - using module-level import
                                     # Extract file paths from command
                                     file_patterns = r'([a-zA-Z0-9_\-./]+\.(py|r|csv|txt|json|md|ipynb|rmd))'
                                     files_mentioned = re.findall(file_patterns, command, re.IGNORECASE)
@@ -3695,7 +4056,7 @@ JSON:"""
                     elif shell_action == "read_file":
                         # NEW: Read and inspect file (R, Python, CSV, etc.)
-                        import re  # Import at function level
+                        # import re removed - using module-level import
                         file_path = plan.get("file_path", "")
                         if not file_path and might_need_shell:
@@ -3799,58 +4160,22 @@ JSON:"""
                 # FinSight API for financial data - Use LLM for ticker/metric extraction
                 if "finsight" in request_analysis.get("apis", []):
-                    # LLM extracts ticker + metric (more accurate than regex)
-                    finance_prompt = f"""Extract financial query details from user's question.
-User query: "{request.question}"
-Respond with JSON:
-{{
-  "tickers": ["AAPL", "TSLA"] (stock symbols - infer from company names if needed),
-  "metric": "revenue|marketCap|price|netIncome|eps|freeCashFlow|grossProfit"
-}}
-Examples:
-- "Tesla revenue" → {{"tickers": ["TSLA"], "metric": "revenue"}}
-- "What's Apple worth?" → {{"tickers": ["AAPL"], "metric": "marketCap"}}
-- "tsla stock price" → {{"tickers": ["TSLA"], "metric": "price"}}
-- "Microsoft profit" → {{"tickers": ["MSFT"], "metric": "netIncome"}}
-JSON:"""
+                    session_key = f"{request.user_id}:{request.conversation_id}"
+                    tickers, metrics_to_fetch = self._plan_financial_request(request.question, session_key)
+                    financial_payload: Dict[str, Any] = {}
+                    for ticker in tickers:
+                        result = await self.get_financial_metrics(ticker, metrics_to_fetch)
+                        financial_payload[ticker] = result
+                    if financial_payload:
+                        self._session_topics[session_key] = {
+                            "tickers": tickers,
+                            "metrics": metrics_to_fetch,
+                        }
+                        api_results["financial"] = financial_payload
+                        tools_used.append("finsight_api")
-                    try:
-                        finance_response = await self.call_backend_query(
-                            query=finance_prompt,
-                            conversation_history=[],
-                            api_results={},
-                            tools_used=[]
-                        )
-                        import json as json_module
-                        finance_text = finance_response.response.strip()
-                        if '```' in finance_text:
-                            finance_text = finance_text.split('```')[1].replace('json', '').strip()
-                        finance_plan = json_module.loads(finance_text)
-                        tickers = finance_plan.get("tickers", [])
-                        metric = finance_plan.get("metric", "revenue")
-                        if debug_mode:
-                            print(f"🔍 LLM FINANCE PLAN: tickers={tickers}, metric={metric}")
-                        if tickers:
-                            # Call FinSight with extracted ticker + metric
-                            financial_data = await self._call_finsight_api(f"calc/{tickers[0]}/{metric}")
-                            if debug_mode:
-                                print(f"🔍 FinSight returned: {list(financial_data.keys()) if financial_data else None}")
-                            if financial_data and "error" not in financial_data:
-                                api_results["financial"] = financial_data
-                                tools_used.append("finsight_api")
-                    except Exception as e:
-                        if debug_mode:
-                            print(f"🔍 Finance LLM extraction failed: {e}")
             # ========================================================================
             # PRIORITY 3: WEB SEARCH (Fallback - only if shell didn't handle AND no data yet)
             # ========================================================================
@@ -3933,12 +4258,27 @@ Respond with JSON:
 JSON:"""
                 try:
-                    web_decision_response = await self.call_backend_query(
-                        query=web_decision_prompt,
-                        conversation_history=[],
-                        api_results={},
-                        tools_used=[]
-                    )
+                    # Use LOCAL LLM for web search decision (avoid recursion)
+                    if hasattr(self, 'client') and self.client:
+                        # Local mode
+                        # Use gpt-oss-120b for Cerebras (100% test pass, better accuracy)
+                        model_name = "gpt-oss-120b" if self.llm_provider == "cerebras" else "llama-3.1-70b-versatile"
+                        response = self.client.chat.completions.create(
+                            model=model_name,
+                            messages=[{"role": "user", "content": web_decision_prompt}],
+                            max_tokens=300,
+                            temperature=0.2
+                        )
+                        decision_text = response.choices[0].message.content.strip()
+                        web_decision_response = ChatResponse(response=decision_text)
+                    else:
+                        # Backend mode
+                        web_decision_response = await self.call_backend_query(
+                            query=web_decision_prompt,
+                            conversation_history=[],
+                            api_results={},
+                            tools_used=[]
+                        )
                     import json as json_module
                     decision_text = web_decision_response.response.strip()
@@ -3981,7 +4321,7 @@ JSON:"""
                 # This fixes the issue where LLM shows corrected code but doesn't create the file
                 if any(keyword in request.question.lower() for keyword in ['create', 'write', 'save', 'generate', 'fixed', 'corrected']):
                     # Extract filename from query (e.g., "write to foo.py", "create bar_fixed.py")
-                    import re
+                    # Note: re is already imported at module level (line 12)
                     filename_match = re.search(r'(?:to|create|write|save|generate)\s+(\w+[._-]\w+\.[\w]+)', request.question, re.IGNORECASE)
                     if not filename_match:
                         # Try pattern: "foo_fixed.py" or "bar.py"
@@ -4010,11 +4350,14 @@ JSON:"""
                                 if debug_mode:
                                     print(f"⚠️ Auto-write failed: {e}")
-                # CRITICAL: Save to conversation history
-                self.conversation_history.append({"role": "user", "content": request.question})
-                self.conversation_history.append({"role": "assistant", "content": response.response})
-                return response
+                return self._finalize_interaction(
+                    request,
+                    response,
+                    tools_used,
+                    api_results,
+                    request_analysis,
+                    log_workflow=False,
+                )
             # DEV MODE ONLY: Direct Groq calls (only works with local API keys)
             # This code path won't execute in production since self.client = None
@@ -4049,6 +4392,26 @@ JSON:"""
             # Get memory context
             memory_context = self._get_memory_context(request.user_id, request.conversation_id)
+            archive_context = self.archive.get_recent_context(
+                request.user_id,
+                request.conversation_id,
+                limit=3,
+            ) if getattr(self, "archive", None) else ""
+            if archive_context:
+                if memory_context:
+                    memory_context = f"{memory_context}\n\n{archive_context}"
+                else:
+                    memory_context = archive_context
+            archive_context = self.archive.get_recent_context(
+                request.user_id,
+                request.conversation_id,
+                limit=3,
+            ) if getattr(self, "archive", None) else ""
+            if archive_context:
+                if memory_context:
+                    memory_context = f"{memory_context}\n\n{archive_context}"
+                else:
+                    memory_context = archive_context
             # Ultra-light handling for small talk to save tokens entirely
             if self._is_simple_greeting(request.question):
@@ -4154,44 +4517,17 @@ JSON:"""
                 return self._respond_with_workspace_listing(request, workspace_listing)
             if "finsight" in request_analysis["apis"]:
-                # Extract tickers from symbols or company names
-                tickers = self._extract_tickers_from_text(request.question)
-                financial_payload = {}
                 session_key = f"{request.user_id}:{request.conversation_id}"
-                last_topic = self._session_topics.get(session_key)
-                if not tickers:
-                    # Heuristic defaults for common requests
-                    if "apple" in request.question.lower():
-                        tickers = ["AAPL"]
-                    if "microsoft" in request.question.lower():
-                        tickers = tickers + ["MSFT"] if "AAPL" in tickers else ["MSFT"]
-                # Determine which metrics to fetch based on query keywords
-                metrics_to_fetch = []
-                if any(kw in question_lower for kw in ["revenue", "sales", "top line"]):
-                    metrics_to_fetch.append("revenue")
-                if any(kw in question_lower for kw in ["gross profit", "gross margin", "margin"]):
-                    metrics_to_fetch.append("grossProfit")
-                if any(kw in question_lower for kw in ["operating income", "operating profit", "ebit"]):
-                    metrics_to_fetch.append("operatingIncome")
-                if any(kw in question_lower for kw in ["net income", "profit", "earnings", "bottom line"]):
-                    metrics_to_fetch.append("netIncome")
-                # Default to key metrics if no specific request
-                if not metrics_to_fetch and last_topic and last_topic.get("metrics"):
-                    metrics_to_fetch = list(last_topic["metrics"])
-                if not metrics_to_fetch:
-                    metrics_to_fetch = ["revenue", "grossProfit"]
-                # Fetch metrics for each ticker (cap 2 tickers)
-                for t in tickers[:2]:
-                    result = await self.get_financial_metrics(t, metrics_to_fetch)
-                    financial_payload[t] = result
+                tickers, metrics_to_fetch = self._plan_financial_request(request.question, session_key)
+                financial_payload: Dict[str, Any] = {}
+                for ticker in tickers:
+                    result = await self.get_financial_metrics(ticker, metrics_to_fetch)
+                    financial_payload[ticker] = result
                 if financial_payload:
                     self._session_topics[session_key] = {
-                        "tickers": tickers[:2],
+                        "tickers": tickers,
                         "metrics": metrics_to_fetch,
                     }
                     direct_finance = (
@@ -4265,7 +4601,18 @@ JSON:"""
                             summary_tokens = summary_response.usage.total_tokens
                             self._charge_tokens(request.user_id, summary_tokens)
                             self.total_cost += (summary_tokens / 1000) * self.cost_per_1k_tokens
+                        else:
+                            summary_tokens = 0
                         messages.append({"role": "system", "content": f"Previous conversation summary: {conversation_summary}"})
+                        self._emit_telemetry(
+                            "history_summarized",
+                            request,
+                            success=True,
+                            extra={
+                                "history_length": len(self.conversation_history),
+                                "summary_tokens": summary_tokens,
+                            },
+                        )
                 except:
                     # If summary fails, just use recent history
                     pass
@@ -4429,29 +4776,35 @@ JSON:"""
             if footer:
                 final_response = f"{final_response}\n\n_{footer}_"
-            # Update conversation history
-            self.conversation_history.append({"role": "user", "content": request.question})
-            self.conversation_history.append({"role": "assistant", "content": final_response})
-            # Update memory
-            self._update_memory(
-                request.user_id,
-                request.conversation_id,
-                f"Q: {request.question[:100]}... A: {final_response[:100]}..."
-            )
-            # Save to workflow history automatically
-            self.workflow.save_query_result(
-                query=request.question,
-                response=final_response,
-                metadata={
-                    "tools_used": tools_used,
-                    "tokens_used": tokens_used,
-                    "confidence_score": request_analysis['confidence']
-                }
-            )
-            return ChatResponse(
+            # TRUTH-SEEKING VERIFICATION: Check if response matches actual shell output
+            if "shell_info" in api_results and api_results["shell_info"]:
+                shell_output = api_results["shell_info"].get("output", "")
+                # If shell output was empty or says "no results", but response lists specific items
+                # This indicates hallucination
+                if (not shell_output or "no" in shell_output.lower() and "found" in shell_output.lower()):
+                    # Check if response contains made-up file paths or code
+                    response_lower = final_response.lower()
+                    if any(indicator in response_lower for indicator in [".py:", "found in", "route", "@app", "@router", "file1", "file2"]):
+                        # Hallucination detected - replace with honest answer
+                        final_response = "I searched but found no matches. The search returned no results."
+                        logger.warning("🚨 Hallucination prevented: LLM tried to make up results when shell output was empty")
+            expected_tools: Set[str] = set()
+            if "finsight" in request_analysis.get("apis", []):
+                expected_tools.add("finsight_api")
+            if "archive" in request_analysis.get("apis", []):
+                expected_tools.add("archive_api")
+            for expected in expected_tools:
+                if expected not in tools_used:
+                    self._emit_telemetry(
+                        "tool_missing",
+                        request,
+                        success=False,
+                        extra={"expected": expected},
+                    )
+            response_obj = ChatResponse(
                 response=final_response,
                 tools_used=tools_used,
                 reasoning_steps=[f"Request type: {request_analysis['type']}", f"APIs used: {request_analysis['apis']}"],
@@ -4461,9 +4814,22 @@ JSON:"""
                 execution_results=execution_results,
                 api_results=api_results
             )
+            return self._finalize_interaction(
+                request,
+                response_obj,
+                tools_used,
+                api_results,
+                request_analysis,
+                log_workflow=True,
+            )
         except Exception as e:
+            import traceback
             details = str(e)
+            debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+            if debug_mode:
+                print("🔴 FULL TRACEBACK:")
+                traceback.print_exc()
             message = (
                 "⚠️ Something went wrong while orchestrating your request, but no actions were performed. "
                 "Please retry, and if the issue persists share this detail with the team: {details}."
@@ -4606,24 +4972,13 @@ JSON:"""
             # FinSight API (abbreviated)
             if "finsight" in request_analysis["apis"]:
-                tickers = self._extract_tickers_from_text(request.question)
+                session_key = f"{request.user_id}:{request.conversation_id}"
+                tickers, metrics_to_fetch = self._plan_financial_request(request.question, session_key)
                 financial_payload = {}
-                if not tickers:
-                    if "apple" in question_lower:
-                        tickers = ["AAPL"]
-                    if "microsoft" in question_lower:
-                        tickers = ["MSFT"] if not tickers else tickers + ["MSFT"]
-                metrics_to_fetch = ["revenue", "grossProfit"]
-                if any(kw in question_lower for kw in ["revenue", "sales"]):
-                    metrics_to_fetch = ["revenue"]
-                if any(kw in question_lower for kw in ["profit", "margin"]):
-                    metrics_to_fetch.append("grossProfit")
-                for t in tickers[:2]:
-                    result = await self.get_financial_metrics(t, metrics_to_fetch)
-                    financial_payload[t] = result
+                for ticker in tickers:
+                    result = await self.get_financial_metrics(ticker, metrics_to_fetch)
+                    financial_payload[ticker] = result
                 if financial_payload:
                     api_results["financial"] = financial_payload

cite-agent 1.3.8__py3-none-any.whl → 1.3.9__py3-none-any.whl

cite-agent 1.3.8py3-none-any.whl → 1.3.9py3-none-any.whl