PyPI - cite-agent - Versions diffs - 1.2.13__tar.gz → 1.3.1__tar.gz - Mend

cite-agent 1.2.13tar.gz → 1.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

{cite_agent-1.2.13/cite_agent.egg-info → cite_agent-1.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cite-agent
-Version: 1.2.13
+Version: 1.3.1
 Summary: Terminal AI assistant for academic research with citation verification
 Home-page: https://github.com/Spectating101/cite-agent
 Author: Cite-Agent Team

cite_agent-1.3.1/cite_agent/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.3.1"

{cite_agent-1.2.13 → cite_agent-1.3.1}/cite_agent/enhanced_ai_agent.py RENAMED Viewed

@@ -2594,23 +2594,125 @@ class EnhancedNocturnalAgent:
             if workflow_response:
                 return workflow_response
-            # Analyze request to determine what APIs to call
-            request_analysis = await self._analyze_request_type(request.question)
-            # Debug: Check what was detected
+            # Initialize
+            api_results = {}
+            tools_used = []
             debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+            # ========================================================================
+            # PRIORITY 1: SHELL PLANNING (Reasoning Layer - Runs FIRST for ALL modes)
+            # ========================================================================
+            # This determines USER INTENT before fetching any data
+            # Prevents waste: "find cm522" won't trigger Archive API, "look into it" won't web search
+            # Works in BOTH production and dev modes
+            shell_action = "none"  # Will be: pwd|ls|find|none
+            # Quick check if query might need shell
+            question_lower = request.question.lower()
+            might_need_shell = any(word in question_lower for word in [
+                'directory', 'folder', 'where', 'find', 'list', 'files', 'look', 'search', 'check', 'into'
+            ])
+            if might_need_shell and self.shell_session:
+                # Ask LLM planner: What shell command should we run?
+                planner_prompt = f"""You are a shell command planner. Determine what shell command to run.
+User query: "{request.question}"
+Previous conversation: {json.dumps(self.conversation_history[-2:]) if self.conversation_history else "None"}
+Respond ONLY with JSON:
+{{
+  "action": "pwd|ls|find|none",
+  "search_target": "cm522" (if find),
+  "search_path": "~/Downloads" (if find),
+  "target_path": "/full/path" (if ls on previous result)
+}}
+Examples:
+"where am i?" → {{"action": "pwd"}}
+"what files here?" → {{"action": "ls"}}
+"find cm522 in downloads" → {{"action": "find", "search_target": "cm522", "search_path": "~/Downloads"}}
+"look into it" + Previous: "Found /path/to/dir" → {{"action": "ls", "target_path": "/path/to/dir"}}
+"Tesla revenue" → {{"action": "none"}}
+JSON:"""
+                try:
+                    plan_response = await self.call_backend_query(
+                        query=planner_prompt,
+                        conversation_history=[],
+                        api_results={},
+                        tools_used=[]
+                    )
+                    plan_text = plan_response.response.strip()
+                    if '```' in plan_text:
+                        plan_text = plan_text.split('```')[1].replace('json', '').strip()
+                    plan = json.loads(plan_text)
+                    shell_action = plan.get("action", "none")
+                    if debug_mode:
+                        print(f"🔍 SHELL PLAN: {plan}")
+                    # Execute shell command based on plan
+                    if shell_action == "pwd":
+                        pwd_output = self.execute_command("pwd")
+                        api_results["shell_info"] = {"current_directory": pwd_output.strip()}
+                        tools_used.append("shell_execution")
+                    elif shell_action == "ls":
+                        target = plan.get("target_path")
+                        if target:
+                            ls_output = self.execute_command(f"ls -lah {target}")
+                            api_results["shell_info"] = {
+                                "directory_contents": ls_output,
+                                "target_path": target
+                            }
+                        else:
+                            ls_output = self.execute_command("ls -lah")
+                            api_results["shell_info"] = {"directory_contents": ls_output}
+                        tools_used.append("shell_execution")
+                    elif shell_action == "find":
+                        search_target = plan.get("search_target", "")
+                        search_path = plan.get("search_path", "~")
+                        if search_target:
+                            find_cmd = f"find {search_path} -maxdepth 4 -type d -iname '*{search_target}*' 2>/dev/null | head -20"
+                            find_output = self.execute_command(find_cmd)
+                            if debug_mode:
+                                print(f"🔍 FIND: {find_cmd}")
+                                print(f"🔍 OUTPUT: {repr(find_output)}")
+                            if find_output.strip():
+                                api_results["shell_info"] = {
+                                    "search_results": f"Searched for '*{search_target}*' in {search_path}:\n{find_output}"
+                                }
+                            else:
+                                api_results["shell_info"] = {
+                                    "search_results": f"No directories matching '{search_target}' found in {search_path}"
+                                }
+                            tools_used.append("shell_execution")
+                except Exception as e:
+                    if debug_mode:
+                        print(f"🔍 Shell planner failed: {e}, continuing without shell")
+                    shell_action = "none"
+            # ========================================================================
+            # PRIORITY 2: DATA APIs (Only if shell didn't fully handle the query)
+            # ========================================================================
+            # If shell_action = pwd/ls/find, we might still want data APIs
+            # But we skip vague queries to save tokens
+            # Analyze what data APIs are needed (only if not pure shell command)
+            request_analysis = await self._analyze_request_type(request.question)
             if debug_mode:
                 print(f"🔍 Request analysis: {request_analysis}")
-            # Check if query is too vague - skip EXPENSIVE API calls to save tokens
-            # But still allow web search (cheap and flexible)
             is_vague = self._is_query_too_vague_for_apis(request.question)
             if debug_mode and is_vague:
-                print(f"🔍 Query detected as VAGUE - skipping Archive/FinSight, but may use web search")
-            # Call appropriate APIs (Archive, FinSight) - BOTH production and dev mode
-            api_results = {}
-            tools_used = []
+                print(f"🔍 Query is VAGUE - skipping expensive APIs")
             # Skip Archive/FinSight if query is too vague, but still allow web search later
             if not is_vague:
@@ -2629,232 +2731,141 @@ class EnhancedNocturnalAgent:
                         api_results["research"] = result
                         tools_used.append("archive_api")
-                # FinSight API for financial data
+                # FinSight API for financial data - Use LLM for ticker/metric extraction
                 if "finsight" in request_analysis.get("apis", []):
-                    tickers = self._extract_tickers_from_text(request.question)
-                    if not tickers:
-                        # Try common company name mappings
-                        question_lower = request.question.lower()
-                        if "apple" in question_lower:
-                            tickers = ["AAPL"]
-                        elif "tesla" in question_lower:
-                            tickers = ["TSLA"]
-                        elif "microsoft" in question_lower:
-                            tickers = ["MSFT"]
-                        elif "google" in question_lower or "alphabet" in question_lower:
-                            tickers = ["GOOGL"]
-                    if debug_mode:
-                        print(f"🔍 Extracted tickers: {tickers}")
-                    if tickers:
-                        # Detect what metric user is asking for
-                        question_lower = request.question.lower()
-                        metric = "revenue"  # Default
+                    # LLM extracts ticker + metric (more accurate than regex)
+                    finance_prompt = f"""Extract financial query details from user's question.
+User query: "{request.question}"
+Respond with JSON:
+{{
+  "tickers": ["AAPL", "TSLA"] (stock symbols - infer from company names if needed),
+  "metric": "revenue|marketCap|price|netIncome|eps|freeCashFlow|grossProfit"
+}}
+Examples:
+- "Tesla revenue" → {{"tickers": ["TSLA"], "metric": "revenue"}}
+- "What's Apple worth?" → {{"tickers": ["AAPL"], "metric": "marketCap"}}
+- "tsla stock price" → {{"tickers": ["TSLA"], "metric": "price"}}
+- "Microsoft profit" → {{"tickers": ["MSFT"], "metric": "netIncome"}}
+JSON:"""
+                    try:
+                        finance_response = await self.call_backend_query(
+                            query=finance_prompt,
+                            conversation_history=[],
+                            api_results={},
+                            tools_used=[]
+                        )
+                        import json as json_module
+                        finance_text = finance_response.response.strip()
+                        if '```' in finance_text:
+                            finance_text = finance_text.split('```')[1].replace('json', '').strip()
-                        if any(word in question_lower for word in ['market cap', 'marketcap', 'market value', 'valuation']):
-                            metric = "marketCap"
-                        elif any(word in question_lower for word in ['stock price', 'share price', 'current price', 'trading at']):
-                            metric = "price"
-                        elif 'profit' in question_lower and 'gross' not in question_lower:
-                            metric = "netIncome"
-                        elif 'earnings' in question_lower or 'eps' in question_lower:
-                            metric = "eps"
-                        elif any(word in question_lower for word in ['cash flow', 'cashflow']):
-                            metric = "freeCashFlow"
+                        finance_plan = json_module.loads(finance_text)
+                        tickers = finance_plan.get("tickers", [])
+                        metric = finance_plan.get("metric", "revenue")
-                        # Call FinSight with detected metric
                         if debug_mode:
-                            print(f"🔍 Calling FinSight API: calc/{tickers[0]}/{metric}")
-                        financial_data = await self._call_finsight_api(f"calc/{tickers[0]}/{metric}")
+                            print(f"🔍 LLM FINANCE PLAN: tickers={tickers}, metric={metric}")
+                        if tickers:
+                            # Call FinSight with extracted ticker + metric
+                            financial_data = await self._call_finsight_api(f"calc/{tickers[0]}/{metric}")
+                            if debug_mode:
+                                print(f"🔍 FinSight returned: {list(financial_data.keys()) if financial_data else None}")
+                            if financial_data and "error" not in financial_data:
+                                api_results["financial"] = financial_data
+                                tools_used.append("finsight_api")
+                    except Exception as e:
                         if debug_mode:
-                            print(f"🔍 FinSight returned: {list(financial_data.keys()) if financial_data else None}")
-                        if financial_data and "error" not in financial_data:
-                            api_results["financial"] = financial_data
-                            tools_used.append("finsight_api")
-                        else:
-                            if debug_mode and financial_data:
-                                print(f"🔍 FinSight error: {financial_data.get('error')}")
+                            print(f"🔍 Finance LLM extraction failed: {e}")
-            # Web Search fallback - ALWAYS available even for vague queries
-            # Use for: market share, industry data, current events, prices, anything not in APIs
-            if self.web_search:
-                question_lower = request.question.lower()
-                # Only search if query needs data and APIs didn't provide it
-                needs_web_search = (
-                    ('market share' in question_lower) or
-                    ('market size' in question_lower) or
-                    ('industry' in question_lower and not api_results.get('research')) or
-                    ('price' in question_lower and ('today' in question_lower or 'current' in question_lower or 'now' in question_lower)) or
-                    ('bitcoin' in question_lower or 'btc' in question_lower or 'crypto' in question_lower) or
-                    ('exchange rate' in question_lower or 'forex' in question_lower) or
-                    (not api_results and 'latest' in question_lower)  # Latest news/data
-                )
-                if needs_web_search or (not api_results and len(request.question.split()) > 5):
-                    try:
-                        if debug_mode:
-                            print(f"🔍 Using web search for: {request.question[:50]}...")
+            # ========================================================================
+            # PRIORITY 3: WEB SEARCH (Fallback - only if shell didn't handle AND no data yet)
+            # ========================================================================
+            # Only web search if:
+            # - Shell said "none" (not a directory/file operation)
+            # - We don't have enough data from Archive/FinSight
+            if self.web_search and shell_action == "none":
+                # Ask LLM: Should we web search for this?
+                web_decision_prompt = f"""Should we use web search for this query?
+User query: "{request.question}"
+Data already available: {list(api_results.keys())}
+Shell action: {shell_action}
+Respond with JSON:
+{{
+  "use_web_search": true/false,
+  "reason": "why or why not"
+}}
+Use web search for:
+- Market share/size (not in SEC filings)
+- Current prices (Bitcoin, commodities, real-time data)
+- Industry data, statistics
+- Recent events, news
+- Questions not answered by existing data
+Don't use if:
+- Shell already handled it (pwd/ls/find)
+- Question answered by research/financial APIs
+- Pure opinion question
+JSON:"""
+                try:
+                    web_decision_response = await self.call_backend_query(
+                        query=web_decision_prompt,
+                        conversation_history=[],
+                        api_results={},
+                        tools_used=[]
+                    )
+                    import json as json_module
+                    decision_text = web_decision_response.response.strip()
+                    if '```' in decision_text:
+                        decision_text = decision_text.split('```')[1].replace('json', '').strip()
+                    decision = json_module.loads(decision_text)
+                    needs_web_search = decision.get("use_web_search", False)
+                    if debug_mode:
+                        print(f"🔍 WEB SEARCH DECISION: {needs_web_search}, reason: {decision.get('reason')}")
+                    if needs_web_search:
                         web_results = await self.web_search.search_web(request.question, num_results=3)
                         if web_results and "results" in web_results:
                             api_results["web_search"] = web_results
                             tools_used.append("web_search")
                             if debug_mode:
                                 print(f"🔍 Web search returned: {len(web_results.get('results', []))} results")
-                    except Exception as e:
-                        if debug_mode:
-                            print(f"🔍 Web search failed: {e}")
+                except Exception as e:
+                    if debug_mode:
+                        print(f"🔍 Web search decision failed: {e}")
-            # PRODUCTION MODE: Check for shell/code execution needs FIRST
+            # PRODUCTION MODE: Call backend LLM with all gathered data
             if self.client is None:
-                # Check if query needs directory/file info or exploration
-                question_lower = request.question.lower()
-                # Basic info queries
-                needs_shell_info = any(phrase in question_lower for phrase in [
-                    'directory', 'folder', 'where am i', 'pwd', 'current location',
-                    'list files', 'what files', 'ls', 'files in', 'show files',
-                    'data files', 'csv files', 'check if file', 'file exists'
-                ])
-                # Fuzzy search queries (find similar directories/files)
-                needs_find = any(phrase in question_lower for phrase in [
-                    'looking for', 'find', 'search for', 'similar to',
-                    'go to', 'cd to', 'navigate to', 'or something', 'forgot the name',
-                    'look into', 'check what', 'what\'s in'
-                ])
-                if (needs_shell_info or needs_find) and self.shell_session:
-                    # Execute exploration commands
-                    try:
-                        api_results["shell_info"] = {}
-                        # Always include current location
-                        pwd_output = self.execute_command("pwd")
-                        api_results["shell_info"]["current_directory"] = pwd_output.strip()
-                        if needs_shell_info and not needs_find:
-                            # Just list current directory
-                            ls_output = self.execute_command("ls -lah")
-                            api_results["shell_info"]["directory_contents"] = ls_output
-                        if needs_find:
-                            # Smart search: extract directory name and location hints
-                            import re
-                            # Check if user is referring to previous context ("it", "there")
-                            has_pronoun = any(word in question_lower for word in ['it', 'there', 'that folder', 'that directory'])
-                            pronoun_resolved = False
-                            if has_pronoun and len(self.conversation_history) > 0:
-                                # Look for directory path in last assistant message
-                                last_assistant = None
-                                for msg in reversed(self.conversation_history):
-                                    if msg.get('role') == 'assistant':
-                                        last_assistant = msg.get('content', '')
-                                        break
-                                if last_assistant:
-                                    # Extract paths like /home/user/Downloads/cm522-main
-                                    paths = re.findall(r'(/[\w/.-]+)', last_assistant)
-                                    if paths:
-                                        # List contents of the first path found
-                                        target_path = paths[0]
-                                        ls_output = self.execute_command(f"ls -lah {target_path}")
-                                        api_results["shell_info"]["directory_contents"] = ls_output
-                                        api_results["shell_info"]["target_path"] = target_path
-                                        tools_used.append("shell_execution")
-                                        pronoun_resolved = True
-                            # Generic search if no pronoun or pronoun not resolved
-                            if not pronoun_resolved:
-                                # SMART EXTRACTION: Use pattern matching + common sense
-                                import re
-                                # Strategy: Look for quoted strings or alphanumeric codes
-                                # Priority 1: Quoted strings ("cm522", 'my_folder')
-                                quoted = re.findall(r'["\']([^"\']+)["\']', request.question)
-                                if quoted:
-                                    search_terms = quoted
-                                else:
-                                    # Priority 2: Alphanumeric codes/IDs (cm522, hw03, proj_2024)
-                                    # Pattern: letters + numbers mixed, or underscores/dashes
-                                    codes = re.findall(r'\b([a-zA-Z]*\d+[a-zA-Z0-9_-]*|[a-zA-Z0-9]*[_-]+[a-zA-Z0-9]+)\b', request.question)
-                                    # Priority 3: Capitalize words (likely proper nouns: GitHub, MyProject)
-                                    capitalized = re.findall(r'\b([A-Z][a-zA-Z0-9_-]+)\b', request.question)
-                                    # Priority 4: Long words (≥ 6 chars, likely meaningful)
-                                    long_words = re.findall(r'\b([a-zA-Z]{6,})\b', request.question)
-                                    # Combine and dedupe
-                                    search_terms = list(dict.fromkeys(codes + capitalized + long_words))
-                                    # Filter out common words
-                                    common = {
-                                        'looking', 'folder', 'directory', 'called', 'something',
-                                        'downloads', 'documents', 'computer', 'somewhere'
-                                    }
-                                    search_terms = [t for t in search_terms if t.lower() not in common][:2]
-                                debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
-                                if debug_mode:
-                                    print(f"🔍 EXTRACTED SEARCH TERMS: {search_terms}")
-                                if not search_terms:
-                                    search_terms = ['']  # Empty search to show "no target found"
-                                # Detect location hints
-                                search_path = "~"  # Default to home
-                                if 'downloads' in question_lower:
-                                    search_path = "~/Downloads"
-                                elif 'documents' in question_lower:
-                                    search_path = "~/Documents"
-                                search_results = []
-                                for name in search_terms:
-                                    if not name:
-                                        continue
-                                    # Search with increasing depth
-                                    find_cmd = f"find {search_path} -maxdepth 4 -type d -iname '*{name}*' 2>/dev/null | head -20"
-                                    find_output = self.execute_command(find_cmd)
-                                    debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
-                                    if debug_mode:
-                                        print(f"🔍 FIND EXECUTED: {find_cmd}")
-                                        print(f"🔍 FIND OUTPUT: {repr(find_output)}")
-                                    if find_output.strip():
-                                        search_results.append(f"Searched for '*{name}*' in {search_path}:\n{find_output}")
-                                if search_results:
-                                    api_results["shell_info"]["search_results"] = "\n\n".join(search_results)
-                                else:
-                                    api_results["shell_info"]["search_results"] = f"No directories found matching query in {search_path}"
-                        tools_used.append("shell_execution")
-                    except Exception as e:
-                        if debug_mode:
-                            print(f"🔍 Shell execution failed: {e}")
-                # DEBUG: Log exactly what we're sending to backend
-                debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
-                if debug_mode and api_results.get("shell_info", {}).get("search_results"):
-                    print(f"🔍 SENDING TO BACKEND:")
-                    print(f"🔍 shell_info.search_results = {repr(api_results['shell_info']['search_results'])}")
+                # DEBUG: Log what we're sending
+                if debug_mode and api_results.get("shell_info"):
+                    print(f"🔍 SENDING TO BACKEND: shell_info keys = {list(api_results.get('shell_info', {}).keys())}")
                 # Call backend and UPDATE CONVERSATION HISTORY
                 response = await self.call_backend_query(
                     query=request.question,
                     conversation_history=self.conversation_history[-10:],
-                    api_results=api_results,  # Include the data!
-                    tools_used=tools_used  # Pass tools list for history
+                    api_results=api_results,
+                    tools_used=tools_used
                 )
-                # CRITICAL: Save to conversation history for context
+                # CRITICAL: Save to conversation history
                 self.conversation_history.append({"role": "user", "content": request.question})
                 self.conversation_history.append({"role": "assistant", "content": response.response})

{cite_agent-1.2.13 → cite_agent-1.3.1/cite_agent.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cite-agent
-Version: 1.2.13
+Version: 1.3.1
 Summary: Terminal AI assistant for academic research with citation verification
 Home-page: https://github.com/Spectating101/cite-agent
 Author: Cite-Agent Team

{cite_agent-1.2.13 → cite_agent-1.3.1}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ long_description = readme_path.read_text() if readme_path.exists() else "Termina
 setup(
     name="cite-agent",
-    version="1.2.13",
+    version="1.3.1",
     author="Cite-Agent Team",
     author_email="contact@citeagent.dev",
     description="Terminal AI assistant for academic research with citation verification",