PyPI - cite-agent - Versions diffs - 1.3.6__py3-none-any.whl → 1.3.8__py3-none-any.whl - Mend

cite-agent 1.3.6py3-none-any.whl → 1.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cite-agent might be problematic. Click here for more details.

Files changed (36) hide show

cite_agent/__version__.py +1 -1
cite_agent/cli.py +9 -2
cite_agent/enhanced_ai_agent.py +1100 -77
{cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/METADATA +1 -1
cite_agent-1.3.8.dist-info/RECORD +31 -0
{cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/top_level.txt +0 -1
cite_agent-1.3.6.dist-info/RECORD +0 -57
src/__init__.py +0 -1
src/services/__init__.py +0 -132
src/services/auth_service/__init__.py +0 -3
src/services/auth_service/auth_manager.py +0 -33
src/services/graph/__init__.py +0 -1
src/services/graph/knowledge_graph.py +0 -194
src/services/llm_service/__init__.py +0 -5
src/services/llm_service/llm_manager.py +0 -495
src/services/paper_service/__init__.py +0 -5
src/services/paper_service/openalex.py +0 -231
src/services/performance_service/__init__.py +0 -1
src/services/performance_service/rust_performance.py +0 -395
src/services/research_service/__init__.py +0 -23
src/services/research_service/chatbot.py +0 -2056
src/services/research_service/citation_manager.py +0 -436
src/services/research_service/context_manager.py +0 -1441
src/services/research_service/conversation_manager.py +0 -597
src/services/research_service/critical_paper_detector.py +0 -577
src/services/research_service/enhanced_research.py +0 -121
src/services/research_service/enhanced_synthesizer.py +0 -375
src/services/research_service/query_generator.py +0 -777
src/services/research_service/synthesizer.py +0 -1273
src/services/search_service/__init__.py +0 -5
src/services/search_service/indexer.py +0 -186
src/services/search_service/search_engine.py +0 -342
src/services/simple_enhanced_main.py +0 -287
{cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/WHEEL +0 -0
{cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/entry_points.txt +0 -0
{cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/licenses/LICENSE +0 -0

cite_agent/enhanced_ai_agent.py CHANGED Viewed

@@ -89,6 +89,15 @@ class EnhancedNocturnalAgent:
         from .workflow import WorkflowManager
         self.workflow = WorkflowManager()
         self.last_paper_result = None  # Track last paper mentioned for "save that"
+        # File context tracking (for pronoun resolution and multi-turn)
+        self.file_context = {
+            'last_file': None,           # Last file mentioned/read
+            'last_directory': None,      # Last directory mentioned/navigated
+            'recent_files': [],          # Last 5 files (for "those files")
+            'recent_dirs': [],           # Last 5 directories
+            'current_cwd': None,         # Track shell's current directory
+        }
         try:
             self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
         except (TypeError, ValueError):
@@ -994,7 +1003,17 @@ class EnhancedNocturnalAgent:
         capability_lines.append("• You can SEARCH user's paper collection")
         capability_lines.append("• You can COPY text to user's clipboard")
         capability_lines.append("• User's query history is automatically tracked")
+        # Add file operation capabilities (Claude Code / Cursor parity)
+        capability_lines.append("")
+        capability_lines.append("📁 DIRECT FILE OPERATIONS (Always available):")
+        capability_lines.append("• read_file(path) - Read files with line numbers (like cat but better)")
+        capability_lines.append("• write_file(path, content) - Create/overwrite files directly")
+        capability_lines.append("• edit_file(path, old, new) - Surgical find/replace edits")
+        capability_lines.append("• glob_search(pattern) - Fast file search (e.g., '**/*.py')")
+        capability_lines.append("• grep_search(pattern) - Fast content search in files")
+        capability_lines.append("• batch_edit_files(edits) - Multi-file refactoring")
         sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
         # ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
@@ -1089,6 +1108,48 @@ class EnhancedNocturnalAgent:
             "• Example: 'I found 3 papers. I can save them to your library or export to BibTeX if you'd like.'",
         ]
         rules.extend(workflow_rules)
+        # Add file operation tool usage rules (CRITICAL for Claude Code parity)
+        file_ops_rules = [
+            "",
+            "📁 FILE OPERATION TOOL USAGE (Use these INSTEAD of shell commands):",
+            "",
+            "🔴 ALWAYS PREFER (in order):",
+            "1. read_file(path) → INSTEAD OF: cat, head, tail",
+            "2. write_file(path, content) → INSTEAD OF: echo >, cat << EOF, printf >",
+            "3. edit_file(path, old, new) → INSTEAD OF: sed, awk",
+            "4. glob_search(pattern, path) → INSTEAD OF: find, ls",
+            "5. grep_search(pattern, path, file_pattern) → INSTEAD OF: grep -r",
+            "",
+            "✅ CORRECT USAGE:",
+            "• Reading code: result = read_file('app.py')",
+            "• Creating file: write_file('config.json', '{...}')",
+            "• Editing code: edit_file('main.py', 'old_var', 'new_var', replace_all=True)",
+            "• Finding files: glob_search('**/*.py', '/home/user/project')",
+            "• Searching code: grep_search('class.*Agent', '.', '*.py', output_mode='content')",
+            "• Multi-file refactor: batch_edit_files([{file: 'a.py', old: '...', new: '...'}, ...])",
+            "",
+            "❌ ANTI-PATTERNS (Don't do these):",
+            "• DON'T use cat when read_file exists",
+            "• DON'T use echo > when write_file exists",
+            "• DON'T use sed when edit_file exists",
+            "• DON'T use find when glob_search exists",
+            "• DON'T use grep -r when grep_search exists",
+            "",
+            "🎯 WHY USE THESE TOOLS:",
+            "• read_file() shows line numbers (critical for code analysis)",
+            "• write_file() handles escaping/quoting automatically (no heredoc hell)",
+            "• edit_file() validates changes before applying (safer than sed)",
+            "• glob_search() is faster and cleaner than find",
+            "• grep_search() returns structured data (easier to parse)",
+            "",
+            "⚠️ SHELL COMMANDS ONLY FOR:",
+            "• System operations (ps, df, du, uptime)",
+            "• Git commands (git status, git diff, git log)",
+            "• Package installs (pip install, Rscript -e \"install.packages(...)\")",
+            "• Running Python/R scripts (python script.py, Rscript analysis.R)",
+        ]
+        rules.extend(file_ops_rules)
         sections.append("CRITICAL RULES:\n" + "\n".join(rules))
@@ -1950,14 +2011,17 @@ class EnhancedNocturnalAgent:
                 url = f"{self.finsight_base_url}/{endpoint}"
                 # Start fresh with headers - don't use _default_headers which might be wrong
                 headers = {}
                 # Always use demo key for FinSight (SEC data is public)
                 headers["X-API-Key"] = "demo-key-123"
+                # Mark request as agent-mediated for product separation
+                headers["X-Request-Source"] = "agent"
                 # Also add JWT if we have it
                 if self.auth_token:
                     headers["Authorization"] = f"Bearer {self.auth_token}"
                 debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
                 if debug_mode:
                     print(f"🔍 FinSight headers: {list(headers.keys())}, X-API-Key={headers.get('X-API-Key')}")
@@ -2179,40 +2243,552 @@ class EnhancedNocturnalAgent:
             output = '\n'.join(output_lines).strip()
             return output if output else "Command executed (no output)"
         except Exception as e:
             return f"ERROR: {e}"
-    def _is_safe_shell_command(self, cmd: str) -> bool:
+    # ========================================================================
+    # DIRECT FILE OPERATIONS (Claude Code / Cursor Parity)
+    # ========================================================================
+    def read_file(self, file_path: str, offset: int = 0, limit: int = 2000) -> str:
+        """
+        Read file with line numbers (like Claude Code's Read tool)
+        Args:
+            file_path: Path to file
+            offset: Starting line number (0-indexed)
+            limit: Maximum number of lines to read
+        Returns:
+            File contents with line numbers in format: "  123→content"
+        """
+        try:
+            # Expand ~ to home directory
+            file_path = os.path.expanduser(file_path)
+            # Make absolute if relative
+            if not os.path.isabs(file_path):
+                file_path = os.path.abspath(file_path)
+            with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+                lines = f.readlines()
+            # Apply offset and limit
+            if offset or limit:
+                lines = lines[offset:offset+limit if limit else None]
+            # Format with line numbers (1-indexed, like vim/editors)
+            numbered_lines = [
+                f"{offset+i+1:6d}→{line.rstrip()}\n"
+                for i, line in enumerate(lines)
+            ]
+            result = ''.join(numbered_lines)
+            # Update file context
+            self.file_context['last_file'] = file_path
+            if file_path not in self.file_context['recent_files']:
+                self.file_context['recent_files'].append(file_path)
+                self.file_context['recent_files'] = self.file_context['recent_files'][-5:]
+            return result if result else "(empty file)"
+        except FileNotFoundError:
+            return f"ERROR: File not found: {file_path}"
+        except PermissionError:
+            return f"ERROR: Permission denied: {file_path}"
+        except IsADirectoryError:
+            return f"ERROR: {file_path} is a directory, not a file"
+        except Exception as e:
+            return f"ERROR: {type(e).__name__}: {e}"
+    def write_file(self, file_path: str, content: str) -> Dict[str, Any]:
+        """
+        Write file directly (like Claude Code's Write tool)
+        Creates new file or overwrites existing one.
+        Args:
+            file_path: Path to file
+            content: Full file content
+        Returns:
+            {"success": bool, "message": str, "bytes_written": int}
+        """
+        try:
+            # Expand ~ to home directory
+            file_path = os.path.expanduser(file_path)
+            # Make absolute if relative
+            if not os.path.isabs(file_path):
+                file_path = os.path.abspath(file_path)
+            # Create parent directories if needed
+            parent_dir = os.path.dirname(file_path)
+            if parent_dir and not os.path.exists(parent_dir):
+                os.makedirs(parent_dir, exist_ok=True)
+            # Write file
+            with open(file_path, 'w', encoding='utf-8') as f:
+                bytes_written = f.write(content)
+            # Update file context
+            self.file_context['last_file'] = file_path
+            if file_path not in self.file_context['recent_files']:
+                self.file_context['recent_files'].append(file_path)
+                self.file_context['recent_files'] = self.file_context['recent_files'][-5:]
+            return {
+                "success": True,
+                "message": f"Wrote {bytes_written} bytes to {file_path}",
+                "bytes_written": bytes_written
+            }
+        except PermissionError:
+            return {
+                "success": False,
+                "message": f"ERROR: Permission denied: {file_path}",
+                "bytes_written": 0
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "message": f"ERROR: {type(e).__name__}: {e}",
+                "bytes_written": 0
+            }
+    def edit_file(self, file_path: str, old_string: str, new_string: str,
+                  replace_all: bool = False) -> Dict[str, Any]:
+        """
+        Surgical file edit (like Claude Code's Edit tool)
+        Args:
+            file_path: Path to file
+            old_string: Exact string to replace (must be unique unless replace_all=True)
+            new_string: Replacement string
+            replace_all: If True, replace all occurrences. If False, old_string must be unique.
+        Returns:
+            {"success": bool, "message": str, "replacements": int}
+        """
+        try:
+            # Expand ~ to home directory
+            file_path = os.path.expanduser(file_path)
+            # Make absolute if relative
+            if not os.path.isabs(file_path):
+                file_path = os.path.abspath(file_path)
+            # Read file
+            with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+                content = f.read()
+            # Check if old_string exists
+            if old_string not in content:
+                return {
+                    "success": False,
+                    "message": f"ERROR: old_string not found in {file_path}",
+                    "replacements": 0
+                }
+            # Check uniqueness if not replace_all
+            occurrences = content.count(old_string)
+            if not replace_all and occurrences > 1:
+                return {
+                    "success": False,
+                    "message": f"ERROR: old_string appears {occurrences} times in {file_path}. Use replace_all=True or provide more context to make it unique.",
+                    "replacements": 0
+                }
+            # Perform replacement
+            if replace_all:
+                new_content = content.replace(old_string, new_string)
+            else:
+                new_content = content.replace(old_string, new_string, 1)
+            # Write back
+            with open(file_path, 'w', encoding='utf-8') as f:
+                f.write(new_content)
+            # Update file context
+            self.file_context['last_file'] = file_path
+            return {
+                "success": True,
+                "message": f"Replaced {occurrences if replace_all else 1} occurrence(s) in {file_path}",
+                "replacements": occurrences if replace_all else 1
+            }
+        except FileNotFoundError:
+            return {
+                "success": False,
+                "message": f"ERROR: File not found: {file_path}",
+                "replacements": 0
+            }
+        except PermissionError:
+            return {
+                "success": False,
+                "message": f"ERROR: Permission denied: {file_path}",
+                "replacements": 0
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "message": f"ERROR: {type(e).__name__}: {e}",
+                "replacements": 0
+            }
+    def glob_search(self, pattern: str, path: str = ".") -> Dict[str, Any]:
+        """
+        Fast file pattern matching (like Claude Code's Glob tool)
+        Args:
+            pattern: Glob pattern (e.g., "*.py", "**/*.md", "src/**/*.ts")
+            path: Starting directory (default: current directory)
+        Returns:
+            {"files": List[str], "count": int, "pattern": str}
+        """
+        try:
+            import glob as glob_module
+            # Expand ~ to home directory
+            path = os.path.expanduser(path)
+            # Make absolute if relative
+            if not os.path.isabs(path):
+                path = os.path.abspath(path)
+            # Combine path and pattern
+            full_pattern = os.path.join(path, pattern)
+            # Find matches (recursive if ** in pattern)
+            matches = glob_module.glob(full_pattern, recursive=True)
+            # Filter to files only (not directories)
+            files = [f for f in matches if os.path.isfile(f)]
+            # Sort by modification time (newest first)
+            files.sort(key=lambda f: os.path.getmtime(f), reverse=True)
+            return {
+                "files": files,
+                "count": len(files),
+                "pattern": full_pattern
+            }
+        except Exception as e:
+            return {
+                "files": [],
+                "count": 0,
+                "pattern": pattern,
+                "error": f"{type(e).__name__}: {e}"
+            }
+    def grep_search(self, pattern: str, path: str = ".",
+                    file_pattern: str = "*",
+                    output_mode: str = "files_with_matches",
+                    context_lines: int = 0,
+                    ignore_case: bool = False,
+                    max_results: int = 100) -> Dict[str, Any]:
+        """
+        Fast content search (like Claude Code's Grep tool / ripgrep)
+        Args:
+            pattern: Regex pattern to search for
+            path: Directory to search in
+            file_pattern: Glob pattern for files to search (e.g., "*.py")
+            output_mode: "files_with_matches", "content", or "count"
+            context_lines: Lines of context around matches
+            ignore_case: Case-insensitive search
+            max_results: Maximum number of results to return
+        Returns:
+            Depends on output_mode:
+            - files_with_matches: {"files": List[str], "count": int}
+            - content: {"matches": {file: [(line_num, line_content), ...]}}
+            - count: {"counts": {file: match_count}}
+        """
+        try:
+            import re
+            # Expand ~ to home directory
+            path = os.path.expanduser(path)
+            # Make absolute if relative
+            if not os.path.isabs(path):
+                path = os.path.abspath(path)
+            # Compile regex
+            flags = re.IGNORECASE if ignore_case else 0
+            regex = re.compile(pattern, flags)
+            # Find files to search
+            glob_result = self.glob_search(file_pattern, path)
+            files_to_search = glob_result["files"]
+            # Search each file
+            if output_mode == "files_with_matches":
+                matching_files = []
+                for file_path in files_to_search[:max_results]:
+                    try:
+                        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+                            content = f.read()
+                        if regex.search(content):
+                            matching_files.append(file_path)
+                    except:
+                        continue
+                return {
+                    "files": matching_files,
+                    "count": len(matching_files),
+                    "pattern": pattern
+                }
+            elif output_mode == "content":
+                matches = {}
+                for file_path in files_to_search:
+                    try:
+                        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+                            lines = f.readlines()
+                        file_matches = []
+                        for line_num, line in enumerate(lines, 1):
+                            if regex.search(line):
+                                file_matches.append((line_num, line.rstrip()))
+                                if len(file_matches) >= max_results:
+                                    break
+                        if file_matches:
+                            matches[file_path] = file_matches
+                    except:
+                        continue
+                return {
+                    "matches": matches,
+                    "file_count": len(matches),
+                    "pattern": pattern
+                }
+            elif output_mode == "count":
+                counts = {}
+                for file_path in files_to_search:
+                    try:
+                        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+                            content = f.read()
+                        match_count = len(regex.findall(content))
+                        if match_count > 0:
+                            counts[file_path] = match_count
+                    except:
+                        continue
+                return {
+                    "counts": counts,
+                    "total_matches": sum(counts.values()),
+                    "pattern": pattern
+                }
+            else:
+                return {
+                    "error": f"Invalid output_mode: {output_mode}. Use 'files_with_matches', 'content', or 'count'."
+                }
+        except re.error as e:
+            return {
+                "error": f"Invalid regex pattern: {e}"
+            }
+        except Exception as e:
+            return {
+                "error": f"{type(e).__name__}: {e}"
+            }
+    async def batch_edit_files(self, edits: List[Dict[str, str]]) -> Dict[str, Any]:
         """
-        Minimal safety check - only block truly catastrophic commands.
-        Philosophy: This is the user's machine. They can do anything in terminal anyway.
-        We only block commands that could cause immediate, irreversible system damage.
+        Apply multiple file edits atomically (all-or-nothing)
+        Args:
+            edits: List of edit operations:
+                [
+                    {"file": "path.py", "old": "...", "new": "..."},
+                    {"file": "other.py", "old": "...", "new": "...", "replace_all": True},
+                    ...
+                ]
+        Returns:
+            {
+                "success": bool,
+                "results": {file: {"success": bool, "message": str, "replacements": int}},
+                "total_edits": int,
+                "failed_edits": int
+            }
+        """
+        try:
+            results = {}
+            # Phase 1: Validate all edits
+            for edit in edits:
+                file_path = edit["file"]
+                old_string = edit["old"]
+                replace_all = edit.get("replace_all", False)
+                # Expand path
+                file_path = os.path.expanduser(file_path)
+                if not os.path.isabs(file_path):
+                    file_path = os.path.abspath(file_path)
+                # Check file exists
+                if not os.path.exists(file_path):
+                    return {
+                        "success": False,
+                        "results": {},
+                        "total_edits": 0,
+                        "failed_edits": len(edits),
+                        "error": f"Validation failed: {file_path} not found. No edits applied."
+                    }
+                # Check old_string exists
+                try:
+                    with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+                        content = f.read()
+                    if old_string not in content:
+                        return {
+                            "success": False,
+                            "results": {},
+                            "total_edits": 0,
+                            "failed_edits": len(edits),
+                            "error": f"Validation failed: Pattern not found in {file_path}. No edits applied."
+                        }
+                    # Check uniqueness if not replace_all
+                    if not replace_all and content.count(old_string) > 1:
+                        return {
+                            "success": False,
+                            "results": {},
+                            "total_edits": 0,
+                            "failed_edits": len(edits),
+                            "error": f"Validation failed: Pattern appears {content.count(old_string)} times in {file_path}. Use replace_all or provide more context. No edits applied."
+                        }
+                except Exception as e:
+                    return {
+                        "success": False,
+                        "results": {},
+                        "total_edits": 0,
+                        "failed_edits": len(edits),
+                        "error": f"Validation failed reading {file_path}: {e}. No edits applied."
+                    }
+            # Phase 2: Apply all edits (validation passed)
+            for edit in edits:
+                file_path = edit["file"]
+                old_string = edit["old"]
+                new_string = edit["new"]
+                replace_all = edit.get("replace_all", False)
+                result = self.edit_file(file_path, old_string, new_string, replace_all)
+                results[file_path] = result
+            # Count successes/failures
+            successful_edits = sum(1 for r in results.values() if r["success"])
+            failed_edits = len(edits) - successful_edits
+            return {
+                "success": failed_edits == 0,
+                "results": results,
+                "total_edits": len(edits),
+                "successful_edits": successful_edits,
+                "failed_edits": failed_edits
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "results": {},
+                "total_edits": 0,
+                "failed_edits": len(edits),
+                "error": f"Batch edit failed: {type(e).__name__}: {e}"
+            }
+    # ========================================================================
+    # END DIRECT FILE OPERATIONS
+    # ========================================================================
+    def _classify_command_safety(self, cmd: str) -> str:
+        """
+        Classify command by safety level for smart execution.
+        Returns: 'SAFE', 'WRITE', 'DANGEROUS', or 'BLOCKED'
         """
         cmd = cmd.strip()
         if not cmd:
-            return False
-        # Block ONLY truly catastrophic commands
+            return 'BLOCKED'
+        cmd_lower = cmd.lower()
+        cmd_parts = cmd.split()
+        cmd_base = cmd_parts[0] if cmd_parts else ''
+        cmd_with_sub = ' '.join(cmd_parts[:2]) if len(cmd_parts) >= 2 else ''
+        # BLOCKED: Catastrophic commands
         nuclear_patterns = [
-            'rm -rf /',       # Wipe root filesystem
-            'rm -rf ~/*',     # Wipe home directory
-            'dd if=/dev/zero of=/dev/sda',  # Wipe disk
-            'dd if=/dev/zero of=/dev/hda',
-            'mkfs',           # Format filesystem
-            'fdisk',          # Partition disk
+            'rm -rf /',
+            'rm -rf ~',
+            'rm -rf /*',
+            'dd if=/dev/zero',
+            'mkfs',
+            'fdisk',
             ':(){ :|:& };:',  # Fork bomb
-            'chmod -R 777 /', # Make everything executable
+            'chmod -r 777 /',
+            '> /dev/sda',
         ]
-        cmd_lower = cmd.lower()
         for pattern in nuclear_patterns:
-            if pattern.lower() in cmd_lower:
-                return False
-        # Allow everything else - pip, npm, git, pipes, redirection, etc.
-        # User asked for it, user gets it. Just like Cursor.
-        return True
+            if pattern in cmd_lower:
+                return 'BLOCKED'
+        # SAFE: Read-only commands
+        safe_commands = {
+            'pwd', 'ls', 'cd', 'cat', 'head', 'tail', 'grep', 'find', 'which', 'type',
+            'wc', 'diff', 'echo', 'ps', 'top', 'df', 'du', 'file', 'stat', 'tree',
+            'whoami', 'hostname', 'date', 'cal', 'uptime', 'printenv', 'env',
+        }
+        safe_git = {'git status', 'git log', 'git diff', 'git branch', 'git show', 'git remote'}
+        if cmd_base in safe_commands or cmd_with_sub in safe_git:
+            return 'SAFE'
+        # WRITE: File creation/modification (allowed but tracked)
+        write_commands = {'mkdir', 'touch', 'cp', 'mv', 'tee'}
+        if cmd_base in write_commands:
+            return 'WRITE'
+        # WRITE: Redirection operations (echo > file, cat > file)
+        if '>' in cmd or '>>' in cmd:
+            # Allow redirection to regular files, block to devices
+            if '/dev/' not in cmd_lower:
+                return 'WRITE'
+            else:
+                return 'BLOCKED'
+        # DANGEROUS: Deletion and permission changes
+        dangerous_commands = {'rm', 'rmdir', 'chmod', 'chown', 'chgrp'}
+        if cmd_base in dangerous_commands:
+            return 'DANGEROUS'
+        # WRITE: Git write operations
+        write_git = {'git add', 'git commit', 'git push', 'git pull', 'git checkout', 'git merge'}
+        if cmd_with_sub in write_git:
+            return 'WRITE'
+        # Default: Treat unknown commands as requiring user awareness
+        return 'WRITE'
+    def _is_safe_shell_command(self, cmd: str) -> bool:
+        """
+        Compatibility wrapper for old safety check.
+        Now uses tiered classification system.
+        """
+        classification = self._classify_command_safety(cmd)
+        return classification in ['SAFE', 'WRITE']  # Allow SAFE and WRITE, block DANGEROUS and BLOCKED
     def _check_token_budget(self, estimated_tokens: int) -> bool:
         """Check if we have enough token budget"""
@@ -2450,12 +3026,42 @@ class EnhancedNocturnalAgent:
     async def _analyze_request_type(self, question: str) -> Dict[str, Any]:
         """Analyze what type of request this is and what APIs to use"""
-        # Financial indicators
+        # Financial indicators - COMPREHENSIVE list to ensure FinSight is used
         financial_keywords = [
-            'financial', 'revenue', 'profit', 'earnings', 'stock', 'market',
-            'ticker', 'company', 'balance sheet', 'income statement', 'cash flow',
-            'valuation', 'pe ratio', 'debt', 'equity', 'dividend', 'growth',
-            'ceo', 'earnings call', 'quarterly', 'annual report'
+            # Core metrics
+            'financial', 'revenue', 'sales', 'income', 'profit', 'earnings', 'loss',
+            'net income', 'operating income', 'gross profit', 'ebitda', 'ebit',
+            # Margins & Ratios
+            'margin', 'gross margin', 'profit margin', 'operating margin', 'net margin', 'ebitda margin',
+            'ratio', 'current ratio', 'quick ratio', 'debt ratio', 'pe ratio', 'p/e',
+            'roe', 'roa', 'roic', 'roce', 'eps',
+            # Balance Sheet
+            'assets', 'liabilities', 'equity', 'debt', 'cash', 'capital',
+            'balance sheet', 'total assets', 'current assets', 'fixed assets',
+            'shareholders equity', 'stockholders equity', 'retained earnings',
+            # Cash Flow
+            'cash flow', 'fcf', 'free cash flow', 'operating cash flow',
+            'cfo', 'cfi', 'cff', 'capex', 'capital expenditure',
+            # Market Metrics
+            'stock', 'market cap', 'market capitalization', 'enterprise value',
+            'valuation', 'price', 'share price', 'stock price', 'quote',
+            'volume', 'trading volume', 'shares outstanding',
+            # Financial Statements
+            'income statement', '10-k', '10-q', '8-k', 'filing', 'sec filing',
+            'quarterly', 'annual report', 'earnings report', 'financial statement',
+            # Company Info
+            'ticker', 'company', 'corporation', 'ceo', 'earnings call',
+            'dividend', 'dividend yield', 'payout ratio',
+            # Growth & Performance
+            'growth', 'yoy', 'year over year', 'qoq', 'quarter over quarter',
+            'cagr', 'trend', 'performance', 'returns'
         ]
         # Research indicators (quantitative)
@@ -2664,40 +3270,76 @@ class EnhancedNocturnalAgent:
             # Quick check if query might need shell
             question_lower = request.question.lower()
             might_need_shell = any(word in question_lower for word in [
-                'directory', 'folder', 'where', 'find', 'list', 'files', 'look', 'search', 'check', 'into',
-                'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb'
+                'directory', 'folder', 'where', 'find', 'list', 'files', 'file', 'look', 'search', 'check', 'into',
+                'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb',
+                'create', 'make', 'mkdir', 'touch', 'new', 'write', 'copy', 'move', 'delete', 'remove',
+                'git', 'grep', 'navigate', 'go to', 'change to'
             ])
             if might_need_shell and self.shell_session:
+                # Get current directory and context for intelligent planning
+                try:
+                    current_dir = self.execute_command("pwd").strip()
+                    self.file_context['current_cwd'] = current_dir
+                except:
+                    current_dir = "~"
+                last_file = self.file_context.get('last_file') or 'None'
+                last_dir = self.file_context.get('last_directory') or 'None'
                 # Ask LLM planner: What shell command should we run?
-                planner_prompt = f"""You are a shell command planner. Determine what shell command to run.
+                planner_prompt = f"""You are a shell command planner. Determine what shell command to run, if any.
 User query: "{request.question}"
 Previous conversation: {json.dumps(self.conversation_history[-2:]) if self.conversation_history else "None"}
+Current directory: {current_dir}
+Last file mentioned: {last_file}
+Last directory mentioned: {last_dir}
 Respond ONLY with JSON:
 {{
-  "action": "pwd|ls|find|read_file|none",
-  "search_target": "cm522" (if find),
-  "search_path": "~/Downloads" (if find),
-  "target_path": "/full/path" (if ls on previous result),
-  "file_path": "/full/path/to/file.R" (if read_file)
+  "action": "execute|none",
+  "command": "pwd" (the actual shell command to run, if action=execute),
+  "reason": "Show current directory" (why this command is needed),
+  "updates_context": true (set to true if command changes files/directories)
 }}
+IMPORTANT RULES:
+1. Return "none" for conversational queries ("hello", "test", "thanks", "how are you")
+2. Return "none" when query is ambiguous without more context
+3. Return "none" for questions about data that don't need shell (e.g., "Tesla revenue", "Apple stock price")
+4. Use ACTUAL shell commands (pwd, ls, cd, mkdir, cat, grep, find, touch, etc.)
+5. Resolve pronouns using context: "it"={last_file}, "there"/{last_dir}
+6. For reading files, prefer: head -100 filename (shows first 100 lines)
+7. For finding things, use: find ~ -maxdepth 4 -name '*pattern*' 2>/dev/null
+8. For creating files: touch filename OR echo "content" > filename
+9. For creating directories: mkdir dirname
+10. ALWAYS include 2>/dev/null to suppress errors from find
+11. 🚨 MULTI-STEP QUERIES: For queries like "read X and do Y", ONLY generate the FIRST step (reading X). The LLM will handle subsequent steps after seeing the file contents.
+12. 🚨 NEVER use python -m py_compile or other code execution for finding bugs - just read the file with cat/head
 Examples:
-"where am i?" → {{"action": "pwd"}}
-"what files here?" → {{"action": "ls"}}
-"find cm522" → {{"action": "find", "search_target": "cm522"}}
-"look into it" + Previous: "Found /path" → {{"action": "ls", "target_path": "/path"}}
-"show me calculate_betas.R" → {{"action": "read_file", "file_path": "calculate_betas.R"}}
-"open regression.R" → {{"action": "read_file", "file_path": "regression.R"}}
-"read that file" + Previous: "regression.R" → {{"action": "read_file", "file_path": "regression.R"}}
-"display analysis.py" → {{"action": "read_file", "file_path": "analysis.py"}}
-"cat data.csv" → {{"action": "read_file", "file_path": "data.csv"}}
-"what columns does it have?" + Previous: file was shown → {{"action": "none"}} (LLM will parse from conversation)
-"Tesla revenue" → {{"action": "none"}}
-KEY: If query mentions a specific FILENAME (*.R, *.py, *.csv), use read_file, NOT find!
+"where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
+"list files" → {{"action": "execute", "command": "ls -lah", "reason": "List all files with details", "updates_context": false}}
+"find cm522" → {{"action": "execute", "command": "find ~ -maxdepth 4 -name '*cm522*' -type d 2>/dev/null | head -20", "reason": "Search for cm522 directory", "updates_context": false}}
+"go to Downloads" → {{"action": "execute", "command": "cd ~/Downloads && pwd", "reason": "Navigate to Downloads directory", "updates_context": true}}
+"show me calc.R" → {{"action": "execute", "command": "head -100 calc.R", "reason": "Display file contents", "updates_context": true}}
+"create test directory" → {{"action": "execute", "command": "mkdir test && echo 'Created test/'", "reason": "Create new directory", "updates_context": true}}
+"create empty config.json" → {{"action": "execute", "command": "touch config.json && echo 'Created config.json'", "reason": "Create empty file", "updates_context": true}}
+"write hello.txt with content Hello World" → {{"action": "execute", "command": "echo 'Hello World' > hello.txt", "reason": "Create file with content", "updates_context": true}}
+"create results.txt with line 1 and line 2" → {{"action": "execute", "command": "echo 'line 1' > results.txt && echo 'line 2' >> results.txt", "reason": "Create file with multiple lines", "updates_context": true}}
+"fix bug in script.py change OLD to NEW" → {{"action": "execute", "command": "sed -i 's/OLD/NEW/g' script.py && echo 'Fixed script.py'", "reason": "Edit file to fix bug", "updates_context": true}}
+"search for TODO in py files" → {{"action": "execute", "command": "grep -n 'TODO' *.py 2>/dev/null", "reason": "Find TODO comments", "updates_context": false}}
+"find all bugs in code" → {{"action": "execute", "command": "grep -rn 'BUG:' . 2>/dev/null", "reason": "Search for bug markers in code", "updates_context": false}}
+"read analyze.py and find bugs" → {{"action": "execute", "command": "head -200 analyze.py", "reason": "Read file to analyze bugs", "updates_context": false}}
+"show me calc.py completely" → {{"action": "execute", "command": "cat calc.py", "reason": "Display entire file", "updates_context": false}}
+"git status" → {{"action": "execute", "command": "git status", "reason": "Check repository status", "updates_context": false}}
+"what's in that file?" + last_file=data.csv → {{"action": "execute", "command": "head -100 data.csv", "reason": "Show file contents", "updates_context": false}}
+"hello" → {{"action": "none", "reason": "Conversational greeting, no command needed"}}
+"test" → {{"action": "none", "reason": "Ambiguous query, needs clarification"}}
+"thanks" → {{"action": "none", "reason": "Conversational acknowledgment"}}
+"Tesla revenue" → {{"action": "none", "reason": "Finance query, will use FinSight API not shell"}}
+"what does the error mean?" → {{"action": "none", "reason": "Explanation request, no command needed"}}
 JSON:"""
@@ -2715,17 +3357,285 @@ JSON:"""
                     plan = json.loads(plan_text)
                     shell_action = plan.get("action", "none")
+                    command = plan.get("command", "")
+                    reason = plan.get("reason", "")
+                    updates_context = plan.get("updates_context", False)
                     if debug_mode:
                         print(f"🔍 SHELL PLAN: {plan}")
-                    # Execute shell command based on plan
-                    if shell_action == "pwd":
-                        pwd_output = self.execute_command("pwd")
-                        api_results["shell_info"] = {"current_directory": pwd_output.strip()}
-                        tools_used.append("shell_execution")
+                    # GENERIC COMMAND EXECUTION - No more hardcoded actions!
+                    if shell_action == "execute" and command:
+                        # Check command safety
+                        safety_level = self._classify_command_safety(command)
+                        if debug_mode:
+                            print(f"🔍 Command: {command}")
+                            print(f"🔍 Safety: {safety_level}")
+                        if safety_level == 'BLOCKED':
+                            api_results["shell_info"] = {
+                                "error": f"Command blocked for safety: {command}",
+                                "reason": "This command could cause system damage"
+                            }
+                        else:
+                            # ========================================
+                            # COMMAND INTERCEPTOR: Translate shell commands to file operations
+                            # (Claude Code / Cursor parity)
+                            # ========================================
+                            intercepted = False
+                            output = ""
+                            # Check for file reading commands (cat, head, tail)
+                            if command.startswith(('cat ', 'head ', 'tail ')):
+                                import shlex
+                                try:
+                                    parts = shlex.split(command)
+                                    cmd = parts[0]
+                                    # Extract filename (last non-flag argument)
+                                    filename = None
+                                    for part in reversed(parts[1:]):
+                                        if not part.startswith('-'):
+                                            filename = part
+                                            break
+                                    if filename:
+                                        # Use read_file instead of cat/head/tail
+                                        if cmd == 'head':
+                                            # head -n 100 file OR head file
+                                            limit = 100  # default
+                                            if '-n' in parts or '-' in parts[0]:
+                                                try:
+                                                    idx = parts.index('-n') if '-n' in parts else 0
+                                                    limit = int(parts[idx + 1])
+                                                except:
+                                                    pass
+                                            output = self.read_file(filename, offset=0, limit=limit)
+                                        elif cmd == 'tail':
+                                            # For tail, read last N lines (harder, so just read all and show it's tail)
+                                            output = self.read_file(filename)
+                                            if "ERROR" not in output:
+                                                lines = output.split('\n')
+                                                output = '\n'.join(lines[-100:])  # last 100 lines
+                                        else:  # cat
+                                            output = self.read_file(filename)
+                                        intercepted = True
+                                        tools_used.append("read_file")
+                                        if debug_mode:
+                                            print(f"🔄 Intercepted: {command} → read_file({filename})")
+                                except:
+                                    pass  # Fall back to shell execution
+                            # Check for file search commands (find)
+                            if not intercepted and 'find' in command and '-name' in command:
+                                try:
+                                    import re
+                                    # Extract pattern: find ... -name '*pattern*'
+                                    name_match = re.search(r"-name\s+['\"]?\*?([^'\"*\s]+)\*?['\"]?", command)
+                                    if name_match:
+                                        pattern = f"**/*{name_match.group(1)}*"
+                                        path_match = re.search(r"find\s+([^\s]+)", command)
+                                        search_path = path_match.group(1) if path_match else "."
+                                        result = self.glob_search(pattern, search_path)
+                                        output = '\n'.join(result['files'][:20])  # Show first 20 matches
+                                        intercepted = True
+                                        tools_used.append("glob_search")
+                                        if debug_mode:
+                                            print(f"🔄 Intercepted: {command} → glob_search({pattern}, {search_path})")
+                                except:
+                                    pass
+                            # Check for file writing commands (echo > file, grep > file, etc.) - CHECK THIS FIRST!
+                            # This must come BEFORE the plain grep interceptor
+                            if not intercepted and ('>' in command or '>>' in command):
+                                try:
+                                    import re
+                                    # Handle grep ... > file (intercept and execute grep, then write output)
+                                    if 'grep' in command and '>' in command:
+                                        # Extract: grep -rn 'pattern' path > output.txt
+                                        grep_match = re.search(r"grep\s+(.*)>\s*(\S+)", command)
+                                        if grep_match:
+                                            grep_part = grep_match.group(1).strip()
+                                            output_file = grep_match.group(2)
+                                            # Extract pattern and options from grep command
+                                            pattern_match = re.search(r"['\"]([^'\"]+)['\"]", grep_part)
+                                            if pattern_match:
+                                                pattern = pattern_match.group(1)
+                                                search_path = "."
+                                                file_pattern = "*.py" if "*.py" in command else "*"
+                                                if debug_mode:
+                                                    print(f"🔄 Intercepted: {command} → grep_search('{pattern}', '{search_path}', '{file_pattern}') + write_file({output_file})")
+                                                # Execute grep_search
+                                                try:
+                                                    grep_result = self.grep_search(
+                                                        pattern=pattern,
+                                                        path=search_path,
+                                                        file_pattern=file_pattern,
+                                                        output_mode="content"
+                                                    )
+                                                    # Format matches as text (like grep -rn output)
+                                                    output_lines = []
+                                                    for file_path, matches in grep_result.get('matches', {}).items():
+                                                        for line_num, line_content in matches:
+                                                            output_lines.append(f"{file_path}:{line_num}:{line_content}")
+                                                    content_to_write = '\n'.join(output_lines) if output_lines else "(no matches found)"
+                                                    # Write grep output to file
+                                                    write_result = self.write_file(output_file, content_to_write)
+                                                    if write_result['success']:
+                                                        output = f"Found {len(output_lines)} lines with '{pattern}' → Created {output_file} ({write_result['bytes_written']} bytes)"
+                                                        intercepted = True
+                                                        tools_used.extend(["grep_search", "write_file"])
+                                                except Exception as e:
+                                                    if debug_mode:
+                                                        print(f"⚠️ Grep > file interception error: {e}")
+                                                    # Fall back to normal execution
+                                                    pass
+                                    # Extract: echo 'content' > filename OR cat << EOF > filename
+                                    if not intercepted and 'echo' in command and '>' in command:
+                                        # echo 'content' > file OR echo "content" > file
+                                        match = re.search(r"echo\s+['\"](.+?)['\"].*?>\s*(\S+)", command)
+                                        if match:
+                                            content = match.group(1)
+                                            filename = match.group(2)
+                                            # Unescape common sequences
+                                            content = content.replace('\\n', '\n').replace('\\t', '\t')
+                                            result = self.write_file(filename, content + '\n')
+                                            if result['success']:
+                                                output = f"Created {filename} ({result['bytes_written']} bytes)"
+                                                intercepted = True
+                                                tools_used.append("write_file")
+                                                if debug_mode:
+                                                    print(f"🔄 Intercepted: {command} → write_file({filename}, ...)")
+                                except:
+                                    pass
+                            # Check for sed editing commands
+                            if not intercepted and command.startswith('sed '):
+                                try:
+                                    import re
+                                    # sed 's/old/new/g' file OR sed -i 's/old/new/' file
+                                    match = re.search(r"sed.*?['\"]s/([^/]+)/([^/]+)/", command)
+                                    if match:
+                                        old_text = match.group(1)
+                                        new_text = match.group(2)
+                                        # Extract filename (last argument)
+                                        parts = command.split()
+                                        filename = parts[-1]
+                                        # Determine if replace_all based on /g flag
+                                        replace_all = '/g' in command
+                                        result = self.edit_file(filename, old_text, new_text, replace_all=replace_all)
+                                        if result['success']:
+                                            output = result['message']
+                                            intercepted = True
+                                            tools_used.append("edit_file")
+                                            if debug_mode:
+                                                print(f"🔄 Intercepted: {command} → edit_file({filename}, {old_text}, {new_text})")
+                                except:
+                                    pass
+                            # Check for heredoc file creation (cat << EOF > file)
+                            if not intercepted and '<<' in command and ('EOF' in command or 'HEREDOC' in command):
+                                try:
+                                    import re
+                                    # Extract: cat << EOF > filename OR cat > filename << EOF
+                                    # Note: We can't actually get the heredoc content from a single command line
+                                    # This would need to be handled differently (multi-line input)
+                                    # For now, just detect and warn
+                                    if debug_mode:
+                                        print(f"⚠️  Heredoc detected but not intercepted: {command[:80]}")
+                                except:
+                                    pass
+                            # Check for content search commands (grep -r) WITHOUT redirection
+                            # This comes AFTER grep > file interceptor to avoid conflicts
+                            if not intercepted and command.startswith('grep ') and ('-r' in command or '-R' in command):
+                                try:
+                                    import re
+                                    # Extract pattern: grep -r 'pattern' path
+                                    pattern_match = re.search(r"grep.*?['\"]([^'\"]+)['\"]", command)
+                                    if pattern_match:
+                                        pattern = pattern_match.group(1)
+                                        # Extract path (last argument usually)
+                                        parts = command.split()
+                                        search_path = parts[-1] if len(parts) > 2 else "."
+                                        result = self.grep_search(pattern, search_path, "*.py", output_mode="files_with_matches")
+                                        output = f"Files matching '{pattern}':\n" + '\n'.join(result['files'][:20])
+                                        intercepted = True
+                                        tools_used.append("grep_search")
+                                        if debug_mode:
+                                            print(f"🔄 Intercepted: {command} → grep_search({pattern}, {search_path})")
+                                except:
+                                    pass
+                            # If not intercepted, execute as shell command
+                            if not intercepted:
+                                output = self.execute_command(command)
+                            if not output.startswith("ERROR"):
+                                # Success - store results
+                                api_results["shell_info"] = {
+                                    "command": command,
+                                    "output": output,
+                                    "reason": reason,
+                                    "safety_level": safety_level
+                                }
+                                tools_used.append("shell_execution")
+                                # Update file context if needed
+                                if updates_context:
+                                    import re
+                                    # Extract file paths from command
+                                    file_patterns = r'([a-zA-Z0-9_\-./]+\.(py|r|csv|txt|json|md|ipynb|rmd))'
+                                    files_mentioned = re.findall(file_patterns, command, re.IGNORECASE)
+                                    if files_mentioned:
+                                        file_path = files_mentioned[0][0]
+                                        self.file_context['last_file'] = file_path
+                                        if file_path not in self.file_context['recent_files']:
+                                            self.file_context['recent_files'].append(file_path)
+                                            self.file_context['recent_files'] = self.file_context['recent_files'][-5:]  # Keep last 5
+                                    # Extract directory paths
+                                    dir_patterns = r'cd\s+([^\s&|;]+)|mkdir\s+([^\s&|;]+)'
+                                    dirs_mentioned = re.findall(dir_patterns, command)
+                                    if dirs_mentioned:
+                                        for dir_tuple in dirs_mentioned:
+                                            dir_path = dir_tuple[0] or dir_tuple[1]
+                                            if dir_path:
+                                                self.file_context['last_directory'] = dir_path
+                                                if dir_path not in self.file_context['recent_dirs']:
+                                                    self.file_context['recent_dirs'].append(dir_path)
+                                                    self.file_context['recent_dirs'] = self.file_context['recent_dirs'][-5:]  # Keep last 5
+                                    # If cd command, update current_cwd
+                                    if command.startswith('cd '):
+                                        try:
+                                            new_cwd = self.execute_command("pwd").strip()
+                                            self.file_context['current_cwd'] = new_cwd
+                                        except:
+                                            pass
+                            else:
+                                # Command failed
+                                api_results["shell_info"] = {
+                                    "error": output,
+                                    "command": command
+                                }
-                    elif shell_action == "ls":
+                    # Backwards compatibility: support old hardcoded actions if LLM still returns them
+                    elif shell_action == "pwd":
                         target = plan.get("target_path")
                         if target:
                             ls_output = self.execute_command(f"ls -lah {target}")
@@ -2757,6 +3667,32 @@ JSON:"""
                                 }
                             tools_used.append("shell_execution")
+                    elif shell_action == "cd":
+                        # NEW: Change directory
+                        target = plan.get("target_path")
+                        if target:
+                            # Expand ~ to home directory
+                            if target.startswith("~"):
+                                home = os.path.expanduser("~")
+                                target = target.replace("~", home, 1)
+                            # Execute cd command
+                            cd_cmd = f"cd {target} && pwd"
+                            cd_output = self.execute_command(cd_cmd)
+                            if not cd_output.startswith("ERROR"):
+                                api_results["shell_info"] = {
+                                    "directory_changed": True,
+                                    "new_directory": cd_output.strip(),
+                                    "target_path": target
+                                }
+                                tools_used.append("shell_execution")
+                            else:
+                                api_results["shell_info"] = {
+                                    "directory_changed": False,
+                                    "error": f"Failed to change to {target}: {cd_output}"
+                                }
                     elif shell_action == "read_file":
                         # NEW: Read and inspect file (R, Python, CSV, etc.)
                         import re  # Import at function level
@@ -2836,6 +3772,14 @@ JSON:"""
             if debug_mode and is_vague:
                 print(f"🔍 Query is VAGUE - skipping expensive APIs")
+            # If query is vague, hint to backend LLM to ask clarifying questions
+            if is_vague:
+                api_results["query_analysis"] = {
+                    "is_vague": True,
+                    "suggestion": "Ask clarifying questions instead of guessing",
+                    "reason": "Query needs more specificity to provide accurate answer"
+                }
             # Skip Archive/FinSight if query is too vague, but still allow web search later
             if not is_vague:
                 # Archive API for research
@@ -2914,32 +3858,78 @@ JSON:"""
             # - Shell said "none" (not a directory/file operation)
             # - We don't have enough data from Archive/FinSight
-            if self.web_search and shell_action == "none":
+            # First check: Is this a conversational query that doesn't need web search?
+            def is_conversational_query(query: str) -> bool:
+                """Detect if query is conversational (greeting, thanks, testing, etc.)"""
+                query_lower = query.lower().strip()
+                # Single word queries that are conversational
+                conversational_words = {
+                    'hello', 'hi', 'hey', 'thanks', 'thank', 'ok', 'okay', 'yes', 'no',
+                    'test', 'testing', 'cool', 'nice', 'great', 'awesome', 'perfect',
+                    'bye', 'goodbye', 'quit', 'exit', 'help'
+                }
+                # Short conversational phrases
+                conversational_phrases = [
+                    'how are you', 'thank you', 'thanks!', 'ok', 'got it', 'i see',
+                    'makes sense', 'sounds good', 'that works', 'no problem'
+                ]
+                words = query_lower.split()
+                # Single word check
+                if len(words) == 1 and words[0] in conversational_words:
+                    return True
+                # Short phrase check
+                if len(words) <= 3 and any(phrase in query_lower for phrase in conversational_phrases):
+                    return True
+                # Question marks with no content words (just pronouns)
+                if '?' in query_lower and len(words) <= 2:
+                    return True
+                return False
+            skip_web_search = is_conversational_query(request.question)
+            if self.web_search and shell_action == "none" and not skip_web_search:
                 # Ask LLM: Should we web search for this?
-                web_decision_prompt = f"""Should we use web search for this query?
+                web_decision_prompt = f"""You are a tool selection expert. Decide if web search is needed.
 User query: "{request.question}"
 Data already available: {list(api_results.keys())}
-Shell action: {shell_action}
+Tools already used: {tools_used}
+AVAILABLE TOOLS YOU SHOULD KNOW:
+1. FinSight API: Company financial data (revenue, income, margins, ratios, cash flow, balance sheet, SEC filings)
+   - Covers: All US public companies (~8,000)
+   - Data: SEC EDGAR + Yahoo Finance
+   - Metrics: 50+ financial KPIs
+2. Archive API: Academic research papers
+   - Covers: Semantic Scholar, OpenAlex, PubMed
+   - Data: Papers, citations, abstracts
+3. Web Search: General information, current events
+   - Covers: Anything on the internet
+   - Use for: Market share, industry news, non-financial company info
+DECISION RULES:
+- If query is about company financials (revenue, profit, margins, etc.) → Check if FinSight already provided data
+- If FinSight has data in api_results → Web search is NOT needed
+- If FinSight was called but no data → Web search as fallback is OK
+- If query is about market share, industry size, trends → Web search (FinSight doesn't have this)
+- If query is about research papers → Archive handles it, not web
+- If query is conversational → Already filtered, you won't see these
 Respond with JSON:
 {{
   "use_web_search": true/false,
-  "reason": "why or why not"
+  "reason": "explain why based on tools available and data already fetched"
 }}
-Use web search for:
-- Market share/size (not in SEC filings)
-- Current prices (Bitcoin, commodities, real-time data)
-- Industry data, statistics
-- Recent events, news
-- Questions not answered by existing data
-Don't use if:
-- Shell already handled it (pwd/ls/find)
-- Question answered by research/financial APIs
-- Pure opinion question
 JSON:"""
                 try:
@@ -2986,11 +3976,44 @@ JSON:"""
                     api_results=api_results,
                     tools_used=tools_used
                 )
+                # POST-PROCESSING: Auto-extract code blocks and write files if user requested file creation
+                # This fixes the issue where LLM shows corrected code but doesn't create the file
+                if any(keyword in request.question.lower() for keyword in ['create', 'write', 'save', 'generate', 'fixed', 'corrected']):
+                    # Extract filename from query (e.g., "write to foo.py", "create bar_fixed.py")
+                    import re
+                    filename_match = re.search(r'(?:to|create|write|save|generate)\s+(\w+[._-]\w+\.[\w]+)', request.question, re.IGNORECASE)
+                    if not filename_match:
+                        # Try pattern: "foo_fixed.py" or "bar.py"
+                        filename_match = re.search(r'(\w+_fixed\.[\w]+|\w+\.[\w]+)', request.question)
+                    if filename_match:
+                        target_filename = filename_match.group(1)
+                        # Extract code block from response (```python ... ``` or ``` ... ```)
+                        code_block_pattern = r'```(?:python|bash|sh|r|sql)?\n(.*?)```'
+                        code_blocks = re.findall(code_block_pattern, response.response, re.DOTALL)
+                        if code_blocks:
+                            # Use the LARGEST code block (likely the complete file)
+                            largest_block = max(code_blocks, key=len)
+                            # Write to file
+                            try:
+                                write_result = self.write_file(target_filename, largest_block)
+                                if write_result['success']:
+                                    # Append confirmation to response
+                                    response.response += f"\n\n✅ File created: {target_filename} ({write_result['bytes_written']} bytes)"
+                                    if debug_mode:
+                                        print(f"🔄 Auto-extracted code block → write_file({target_filename})")
+                            except Exception as e:
+                                if debug_mode:
+                                    print(f"⚠️ Auto-write failed: {e}")
                 # CRITICAL: Save to conversation history
                 self.conversation_history.append({"role": "user", "content": request.question})
                 self.conversation_history.append({"role": "assistant", "content": response.response})
                 return response
             # DEV MODE ONLY: Direct Groq calls (only works with local API keys)

cite-agent 1.3.6__py3-none-any.whl → 1.3.8__py3-none-any.whl

Potentially problematic release.

cite-agent 1.3.6py3-none-any.whl → 1.3.8py3-none-any.whl