PyPI - cite-agent - Versions diffs - 1.0.5__py3-none-any.whl → 1.2.4__py3-none-any.whl - Mend

cite-agent 1.0.5py3-none-any.whl → 1.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cite-agent might be problematic. Click here for more details.

Files changed (14) hide show

cite_agent/cli.py +374 -39
cite_agent/cli_workflow.py +276 -0
cite_agent/enhanced_ai_agent.py +575 -80
cite_agent/session_manager.py +215 -0
cite_agent/updater.py +50 -17
cite_agent/workflow.py +427 -0
cite_agent/workflow_integration.py +275 -0
cite_agent-1.2.4.dist-info/METADATA +442 -0
{cite_agent-1.0.5.dist-info → cite_agent-1.2.4.dist-info}/RECORD +13 -9
cite_agent-1.0.5.dist-info/METADATA +0 -235
{cite_agent-1.0.5.dist-info → cite_agent-1.2.4.dist-info}/WHEEL +0 -0
{cite_agent-1.0.5.dist-info → cite_agent-1.2.4.dist-info}/entry_points.txt +0 -0
{cite_agent-1.0.5.dist-info → cite_agent-1.2.4.dist-info}/licenses/LICENSE +0 -0
{cite_agent-1.0.5.dist-info → cite_agent-1.2.4.dist-info}/top_level.txt +0 -0

cite_agent/enhanced_ai_agent.py CHANGED Viewed

@@ -76,6 +76,11 @@ class EnhancedNocturnalAgent:
         self.total_cost = 0.0
         self.cost_per_1k_tokens = 0.0001  # Groq pricing estimate
         self._auto_update_enabled = True
+        # Workflow integration
+        from .workflow import WorkflowManager
+        self.workflow = WorkflowManager()
+        self.last_paper_result = None  # Track last paper mentioned for "save that"
         try:
             self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
         except (TypeError, ValueError):
@@ -108,11 +113,42 @@ class EnhancedNocturnalAgent:
         self._service_roots: List[str] = []
         self._backend_health_cache: Dict[str, Dict[str, Any]] = {}
+        # Initialize authentication
+        self.auth_token = None
+        self.user_id = None
+        self._load_authentication()
         try:
             self._health_ttl = float(os.getenv("NOCTURNAL_HEALTH_TTL", 30))
         except Exception:
             self._health_ttl = 30.0
         self._recent_sources: List[Dict[str, Any]] = []
+    def _load_authentication(self):
+        """Load authentication from session file"""
+        use_local_keys = os.getenv("USE_LOCAL_KEYS", "true").lower() == "true"
+        if not use_local_keys:
+            # Backend mode - load auth token from session
+            from pathlib import Path
+            session_file = Path.home() / ".nocturnal_archive" / "session.json"
+            if session_file.exists():
+                try:
+                    import json
+                    with open(session_file, 'r') as f:
+                        session_data = json.load(f)
+                        self.auth_token = session_data.get('access_token')
+                        self.user_id = session_data.get('user_id')
+                except Exception:
+                    self.auth_token = None
+                    self.user_id = None
+            else:
+                self.auth_token = None
+                self.user_id = None
+        else:
+            # Local keys mode
+            self.auth_token = None
+            self.user_id = None
         self._session_topics: Dict[str, Dict[str, Any]] = {}
         # Initialize API clients
@@ -184,10 +220,10 @@ class EnhancedNocturnalAgent:
             )
             # Archive API client
-            self.archive_base_url = _normalize_base(archive_env, "http://127.0.0.1:8000/api")
+            self.archive_base_url = _normalize_base(archive_env, "https://cite-agent-api-720dfadd602c.herokuapp.com/api")
             # FinSight API client
-            self.finsight_base_url = _normalize_base(finsight_env, "http://127.0.0.1:8000/v1/finance")
+            self.finsight_base_url = _normalize_base(finsight_env, "https://cite-agent-api-720dfadd602c.herokuapp.com/v1/finance")
             # Workspace Files API client
             files_env = os.getenv("FILES_API_URL")
@@ -203,13 +239,15 @@ class EnhancedNocturnalAgent:
             self._default_headers.clear()
             if self.api_key:
                 self._default_headers["X-API-Key"] = self.api_key
-                if self.api_key == "demo-key-123":
-                    print("⚠️ Using demo API key. Set NOCTURNAL_KEY for production usage.")
-            else:
-                print("⚠️ No API key configured for Nocturnal Archive API calls")
             self._update_service_roots()
-            print(f"✅ API clients initialized (Archive={self.archive_base_url}, FinSight={self.finsight_base_url})")
+            # Only show init messages in debug mode
+            debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+            if debug_mode:
+                if self.api_key == "demo-key-123":
+                    print("⚠️ Using demo API key")
+                print(f"✅ API clients initialized (Archive={self.archive_base_url}, FinSight={self.finsight_base_url})")
         except Exception as e:
             print(f"⚠️ API client initialization warning: {e}")
@@ -812,14 +850,22 @@ class EnhancedNocturnalAgent:
     def _format_api_results_for_prompt(self, api_results: Dict[str, Any]) -> str:
         if not api_results:
+            logger.info("🔍 DEBUG: _format_api_results_for_prompt called with EMPTY api_results")
             return "No API results yet."
         try:
             serialized = json.dumps(api_results, indent=2)
         except Exception:
             serialized = str(api_results)
-        max_len = 2000
+        max_len = 8000  # Keep under 12K token limit (backend + context)
         if len(serialized) > max_len:
-            serialized = serialized[:max_len] + "\n... (truncated)"
+            serialized = serialized[:max_len] + "\n... (truncated for length)"
+        # DEBUG: Log formatted results length and preview
+        logger.info(f"🔍 DEBUG: _format_api_results_for_prompt returning {len(serialized)} chars")
+        if "research" in api_results:
+            papers_count = len(api_results.get("research", {}).get("results", []))
+            logger.info(f"🔍 DEBUG: api_results contains 'research' with {papers_count} papers")
         return serialized
     def _build_system_prompt(
@@ -851,8 +897,8 @@ class EnhancedNocturnalAgent:
             )
         else:  # quantitative
             intro = (
-                "You are Nocturnal, a truth-seeking research and finance AI. "
-                "PRIMARY DIRECTIVE: Accuracy > Agreeableness. "
+                "You are Cite Agent, a truth-seeking research and finance AI. "
+                "PRIMARY DIRECTIVE: Accuracy > Agreeableness. Ask clarifying questions when context is missing. "
                 "You are a fact-checker and analyst, NOT a people-pleaser. "
                 "You have direct access to production-grade data sources and can write/execute code (Python, R, SQL)."
             )
@@ -869,10 +915,32 @@ class EnhancedNocturnalAgent:
             capability_lines.append("• Persistent shell session for system inspection and code execution")
         if not capability_lines:
             capability_lines.append("• Core reasoning, code generation (Python/R/SQL), memory recall")
+        # Add workflow capabilities
+        capability_lines.append("")
+        capability_lines.append("📚 WORKFLOW INTEGRATION (Always available):")
+        capability_lines.append("• You can SAVE papers to user's local library")
+        capability_lines.append("• You can LIST papers from library")
+        capability_lines.append("• You can EXPORT citations to BibTeX or APA")
+        capability_lines.append("• You can SEARCH user's paper collection")
+        capability_lines.append("• You can COPY text to user's clipboard")
+        capability_lines.append("• User's query history is automatically tracked")
         sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
         # ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
         base_rules = [
+            "🚨 BE PATIENT: Don't rush to tools. Have a conversation to understand intent FIRST.",
+            "🚨 CLARIFY BEFORE SEARCH: If you see '2008, 2015, 2019' → ask 'Are you looking for crisis patterns? Economic events? Papers published in those years?' DON'T just search '2008'.",
+            "🚨 KNOW YOUR TOOLS' LIMITS: SEC has revenue, not market share. Archive has papers, not market data. If tool can't answer, say 'I don't have that data' or use web search.",
+            "🚨 TOOL != ANSWER: Don't use tools just because you have them. Revenue ≠ Market Share. Published year ≠ Subject matter.",
+            "",
+            "💬 CONVERSATIONAL FLOW:",
+            "1. User asks vague question → YOU ask clarifying questions",
+            "2. User provides context → YOU confirm understanding",
+            "3. YOU make tool calls → Present results",
+            "NEVER skip step 1 or 2. Be deliberate, not eager.",
+            "",
             "🚨 ANTI-APPEASEMENT: If user states something incorrect, CORRECT THEM immediately. Do not agree to be polite.",
             "🚨 UNCERTAINTY: If you're uncertain, SAY SO explicitly. 'I don't know' is better than a wrong answer.",
             "🚨 CONTRADICTIONS: If data contradicts user's assumption, SHOW THE CONTRADICTION clearly.",
@@ -883,6 +951,17 @@ class EnhancedNocturnalAgent:
             "📊 NO EXTRAPOLATION: Never go beyond what sources directly state.",
             "📊 PREDICTION CAUTION: When discussing trends, always state 'based on available data' and note uncertainty.",
             "",
+            "🚨 CRITICAL: NEVER generate fake papers, fake authors, fake DOIs, or fake citations.",
+            "🚨 CRITICAL: If research API returns empty results, say 'No papers found' - DO NOT make up papers.",
+            "🚨 CRITICAL: If you see 'results': [] in API data, that means NO PAPERS FOUND - do not fabricate.",
+            "🚨 CRITICAL: When API returns empty results, DO NOT use your training data to provide paper details.",
+            "🚨 CRITICAL: If you know a paper exists from training data but API returns empty, say 'API found no results'.",
+            "",
+            "🚨 ABSOLUTE RULE: If you see 'results': [] in the API data, you MUST respond with ONLY:",
+            "   'No papers found in the research database. The API returned empty results.'",
+            "   DO NOT provide any paper details, authors, titles, or citations.",
+            "   DO NOT use your training data to fill in missing information.",
+            "",
             "✓ VERIFICATION: Cross-check against multiple sources when available.",
             "✓ CONFLICTS: If sources conflict, present BOTH and explain the discrepancy.",
             "✓ SHOW REASONING: 'According to [source], X is Y because...'",
@@ -924,6 +1003,20 @@ class EnhancedNocturnalAgent:
         rules.append("")
         rules.append("Keep responses concise but complete. Quote exact text from sources when possible.")
+        # Add workflow behavior rules
+        workflow_rules = [
+            "",
+            "📚 WORKFLOW BEHAVIOR:",
+            "• After finding papers, OFFER to save them: 'Would you like me to save this to your library?'",
+            "• After showing a citation, ASK: 'Want me to copy that to your clipboard?'",
+            "• If user says 'save that' or 'add to library', ACKNOWLEDGE and confirm the save",
+            "• If user mentions 'my library', LIST their saved papers",
+            "• If user asks for 'bibtex' or 'apa', PROVIDE the formatted citation",
+            "• Be PROACTIVE: suggest exports, show library stats, offer clipboard copies",
+            "• Example: 'I found 3 papers. I can save them to your library or export to BibTeX if you'd like.'",
+        ]
+        rules.extend(workflow_rules)
         sections.append("CRITICAL RULES:\n" + "\n".join(rules))
         # CORRECTION EXAMPLES (adapt based on mode)
@@ -941,6 +1034,24 @@ class EnhancedNocturnalAgent:
             )
         else:
             examples = (
+                "EXAMPLE 1: Be Patient, Don't Rush\n"
+                "User: 'Find papers on 2008, 2015, 2019'\n"
+                "❌ BAD: [Searches for year:2008 immediately] 'Found 50 papers from 2008...'\n"
+                "✅ GOOD: 'Are you looking for papers ABOUT events in those years (financial crises, policy changes), "
+                "or papers PUBLISHED in those years? Also, what topic? (Economics? Healthcare? Climate?)'\n\n"
+                "EXAMPLE 2: Know Your Tools' Limits\n"
+                "User: 'What's Palantir's market share?'\n"
+                "❌ BAD: 'Palantir's latest revenue is $1B...' (Revenue ≠ Market Share! SEC doesn't have market share!)\n"
+                "✅ GOOD: 'Market share requires: (1) Palantir's revenue, (2) total market size. SEC has #1, not #2. "
+                "Which market? (Data analytics = ~$50B, Gov contracts = ~$200B). I can web search for total market size if you specify.'\n\n"
+                "EXAMPLE 3: Conversational Flow\n"
+                "User: 'Compare Tesla and Ford'\n"
+                "❌ BAD: [Immediately fetches both revenues] 'Tesla: $81B, Ford: $158B'\n"
+                "✅ GOOD: 'Compare on what dimension? Revenue? (Ford larger). Market cap? (Tesla larger). EV sales? (Tesla dominates). "
+                "Production volume? (Ford higher). Each tells a different story. Which matters to you?'\n\n"
                 "EXAMPLE CORRECTIONS:\n"
                 "User: 'So revenue went up 50%?'\n"
                 "You: '❌ No. According to 10-K page 23, revenue increased 15%, not 50%. "
@@ -961,7 +1072,16 @@ class EnhancedNocturnalAgent:
             f"confidence={request_analysis.get('confidence')}"
         )
-        sections.append("API RESULTS:\n" + self._format_api_results_for_prompt(api_results))
+        # Add explicit instruction before API results
+        api_instructions = (
+            "🚨 CRITICAL: The following API RESULTS are REAL DATA from production APIs.\n"
+            "🚨 These are NOT examples or templates - they are ACTUAL results to use in your response.\n"
+            "🚨 DO NOT generate new/fake data - USE EXACTLY what is shown below.\n"
+            "🚨 If you see paper titles, authors, DOIs below - these are REAL papers you MUST cite.\n"
+            "🚨 If API results show empty/no papers, say 'No papers found' - DO NOT make up papers.\n"
+        )
+        sections.append(api_instructions + "\nAPI RESULTS:\n" + self._format_api_results_for_prompt(api_results))
         return "\n\n".join(sections)
@@ -1016,18 +1136,32 @@ class EnhancedNocturnalAgent:
         elif len(question.split()) <= 40 and request_analysis.get("type") in {"general", "system"} and not api_results:
             use_light_model = True
-        if use_light_model:
+        # Select model based on LLM provider
+        if getattr(self, 'llm_provider', 'groq') == 'cerebras':
+            if use_light_model:
+                return {
+                    "model": "llama3.1-8b",  # Cerebras 8B model
+                    "max_tokens": 520,
+                    "temperature": 0.2
+                }
             return {
-                "model": "llama-3.1-8b-instant",
-                "max_tokens": 520,
-                "temperature": 0.2
+                "model": "llama-3.3-70b",  # Cerebras 70B model
+                "max_tokens": 900,
+                "temperature": 0.3
+            }
+        else:
+            # Groq models
+            if use_light_model:
+                return {
+                    "model": "llama-3.1-8b-instant",
+                    "max_tokens": 520,
+                    "temperature": 0.2
+                }
+            return {
+                "model": "llama-3.3-70b-versatile",
+                "max_tokens": 900,
+                "temperature": 0.3
             }
-        return {
-            "model": "llama-3.3-70b-versatile",
-            "max_tokens": 900,
-            "temperature": 0.3
-        }
     def _mark_current_key_exhausted(self, reason: str = "rate_limit"):
         if not self.api_keys:
@@ -1055,11 +1189,18 @@ class EnhancedNocturnalAgent:
                     attempts += 1
                     continue
             try:
-                self.client = Groq(api_key=key)
+                if self.llm_provider == "cerebras":
+                    from openai import OpenAI
+                    self.client = OpenAI(
+                        api_key=key,
+                        base_url="https://api.cerebras.ai/v1"
+                    )
+                else:
+                    self.client = Groq(api_key=key)
                 self.current_api_key = key
                 return True
             except Exception as e:
-                logger.error(f"Failed to initialize Groq client for rotated key: {e}")
+                logger.error(f"Failed to initialize {self.llm_provider.upper()} client for rotated key: {e}")
                 self.exhausted_keys[key] = now
                 attempts += 1
         return False
@@ -1087,11 +1228,18 @@ class EnhancedNocturnalAgent:
                 del self.exhausted_keys[key]
             try:
-                self.client = Groq(api_key=key)
+                if self.llm_provider == "cerebras":
+                    from openai import OpenAI
+                    self.client = OpenAI(
+                        api_key=key,
+                        base_url="https://api.cerebras.ai/v1"
+                    )
+                else:
+                    self.client = Groq(api_key=key)
                 self.current_api_key = key
                 return True
             except Exception as e:
-                logger.error(f"Failed to initialize Groq client for key index {self.current_key_index}: {e}")
+                logger.error(f"Failed to initialize {self.llm_provider.upper()} client for key index {self.current_key_index}: {e}")
                 self.exhausted_keys[key] = now
                 attempts += 1
                 self.current_key_index = (self.current_key_index + 1) % total
@@ -1145,7 +1293,15 @@ class EnhancedNocturnalAgent:
                 payload = payload_full[:1500]
                 if len(payload_full) > 1500:
                     payload += "\n…"
-                details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
+                # Check if results are empty and add explicit warning
+                if research.get("results") == [] or not research.get("results"):
+                    details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
+                    details.append("🚨 **CRITICAL: API RETURNED EMPTY RESULTS - DO NOT GENERATE ANY PAPER DETAILS**")
+                    details.append("🚨 **DO NOT PROVIDE AUTHORS, TITLES, DOIs, OR ANY PAPER INFORMATION**")
+                    details.append("🚨 **SAY 'NO PAPERS FOUND' AND STOP - DO NOT HALLUCINATE**")
+                else:
+                    details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
             files_context = api_results.get("files_context")
             if files_context:
@@ -1239,13 +1395,31 @@ class EnhancedNocturnalAgent:
             self._check_updates_background()
             self._ensure_environment_loaded()
             self._init_api_clients()
+            # Suppress verbose initialization messages in production
+            import logging
+            logging.getLogger("aiohttp").setLevel(logging.ERROR)
+            logging.getLogger("asyncio").setLevel(logging.ERROR)
             # SECURITY FIX: No API keys on client!
             # All API calls go through our secure backend
             # This prevents key extraction and piracy
             # DISABLED for beta testing - set USE_LOCAL_KEYS=false to enable backend-only mode
-            use_local_keys = os.getenv("USE_LOCAL_KEYS", "true").lower() == "true"
+            # SECURITY: Production users MUST use backend for monetization
+            # Dev mode only available via undocumented env var (not in user docs)
+            use_local_keys_env = os.getenv("USE_LOCAL_KEYS", "").lower()
+            if use_local_keys_env == "true":
+                # Dev mode - use local keys
+                use_local_keys = True
+            elif use_local_keys_env == "false":
+                # Explicit backend mode
+                use_local_keys = False
+            else:
+                # Default: Always use backend (for monetization)
+                # Even if session doesn't exist, we'll prompt for login
+                use_local_keys = False
             if not use_local_keys:
                 self.api_keys = []  # Empty - keys stay on server
@@ -1256,7 +1430,7 @@ class EnhancedNocturnalAgent:
                 # Get backend API URL from config
                 self.backend_api_url = os.getenv(
                     "NOCTURNAL_API_URL",
-                    "https://api.nocturnal.dev/api"  # Production default
+                    "https://cite-agent-api-720dfadd602c.herokuapp.com/api"  # Production Heroku backend
                 )
                 # Get auth token from session (set by auth.py after login)
@@ -1276,34 +1450,59 @@ class EnhancedNocturnalAgent:
                     self.auth_token = None
                     self.user_id = None
-                if self.auth_token:
-                    print(f"✅ Enhanced Nocturnal Agent Ready! (Authenticated)")
-                else:
-                    print("⚠️ Not authenticated. Please log in to use the agent.")
+                # Suppress messages in production (only show in debug mode)
+                debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+                if debug_mode:
+                    if self.auth_token:
+                        print(f"✅ Enhanced Nocturnal Agent Ready! (Authenticated)")
+                    else:
+                        print("⚠️ Not authenticated. Please log in to use the agent.")
             else:
-                # Local keys mode - load Groq API keys
+                # Local keys mode - load Cerebras API keys (primary) with Groq fallback
                 self.auth_token = None
                 self.user_id = None
-                # Load Groq keys from environment
+                # Load Cerebras keys from environment (PRIMARY)
                 self.api_keys = []
-                for i in range(1, 10):  # Check GROQ_API_KEY_1 through GROQ_API_KEY_9
-                    key = os.getenv(f"GROQ_API_KEY_{i}") or os.getenv(f"GROQ_API_KEY")
+                for i in range(1, 10):  # Check CEREBRAS_API_KEY_1 through CEREBRAS_API_KEY_9
+                    key = os.getenv(f"CEREBRAS_API_KEY_{i}") or os.getenv(f"CEREBRAS_API_KEY")
                     if key and key not in self.api_keys:
                         self.api_keys.append(key)
+                # Fallback to Groq keys if no Cerebras keys found
+                if not self.api_keys:
+                    for i in range(1, 10):
+                        key = os.getenv(f"GROQ_API_KEY_{i}") or os.getenv(f"GROQ_API_KEY")
+                        if key and key not in self.api_keys:
+                            self.api_keys.append(key)
+                    self.llm_provider = "groq"
+                else:
+                    self.llm_provider = "cerebras"
+                debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
                 if not self.api_keys:
-                    print("⚠️ No Groq API keys found. Set GROQ_API_KEY_1, GROQ_API_KEY_2, etc.")
+                    if debug_mode:
+                        print("⚠️ No LLM API keys found. Set CEREBRAS_API_KEY or GROQ_API_KEY")
                 else:
-                    print(f"✅ Loaded {len(self.api_keys)} Groq API key(s)")
-                    # Initialize first client
+                    if debug_mode:
+                        print(f"✅ Loaded {len(self.api_keys)} {self.llm_provider.upper()} API key(s)")
+                    # Initialize first client - Cerebras uses OpenAI-compatible API
                     try:
-                        from groq import Groq
-                        self.client = Groq(api_key=self.api_keys[0])
+                        if self.llm_provider == "cerebras":
+                            # Cerebras uses OpenAI client with custom base URL
+                            from openai import OpenAI
+                            self.client = OpenAI(
+                                api_key=self.api_keys[0],
+                                base_url="https://api.cerebras.ai/v1"
+                            )
+                        else:
+                            # Groq fallback
+                            from groq import Groq
+                            self.client = Groq(api_key=self.api_keys[0])
                         self.current_api_key = self.api_keys[0]
                         self.current_key_index = 0
                     except Exception as e:
-                        print(f"⚠️ Failed to initialize Groq client: {e}")
+                        print(f"⚠️ Failed to initialize {self.llm_provider.upper()} client: {e}")
             if self.shell_session and self.shell_session.poll() is not None:
                 self.shell_session = None
@@ -1332,33 +1531,36 @@ class EnhancedNocturnalAgent:
             return True
     def _check_updates_background(self):
-        """Check for updates in background (silent, non-blocking)"""
+        """Check for updates and auto-install if available"""
         if not self._auto_update_enabled:
             return
-        import threading
-        def update_check():
-            try:
-                from .updater import NocturnalUpdater
-                updater = NocturnalUpdater()
-                update_info = updater.check_for_updates()
+        # Check for updates (synchronous, fast)
+        try:
+            from .updater import NocturnalUpdater
+            updater = NocturnalUpdater()
+            update_info = updater.check_for_updates()
+            if update_info and update_info["available"]:
+                # Auto-update silently in background
+                import threading
+                def do_update():
+                    try:
+                        updater.update_package(silent=True)
+                    except:
+                        pass
+                threading.Thread(target=do_update, daemon=True).start()
-                if update_info and update_info["available"]:
-                    # Silent update - no interruption
-                    updater.update_package()
-            except Exception:
-                # Completely silent - don't interrupt user experience
-                pass
-        # Run in background thread
-        threading.Thread(target=update_check, daemon=True).start()
+        except Exception:
+            # Silently ignore update check failures
+            pass
-    async def call_backend_query(self, query: str, conversation_history: Optional[List[Dict]] = None) -> ChatResponse:
+    async def call_backend_query(self, query: str, conversation_history: Optional[List[Dict]] = None,
+                                 api_results: Optional[Dict[str, Any]] = None, tools_used: Optional[List[str]] = None) -> ChatResponse:
         """
         Call backend /query endpoint instead of Groq directly
         This is the SECURE method - all API keys stay on server
+        Includes API results (Archive, FinSight) in context for better responses
         """
         if not self.auth_token:
             return ChatResponse(
@@ -1373,12 +1575,13 @@ class EnhancedNocturnalAgent:
             )
         try:
-            # Build request
+            # Build request with API context as separate field
             payload = {
-                "query": query,
+                "query": query,  # Keep query clean
                 "conversation_history": conversation_history or [],
-                "model": "llama-3.3-70b-versatile",
-                "temperature": 0.7,
+                "api_context": api_results,  # Send API results separately
+                "model": "llama-3.3-70b",  # Compatible with Cerebras (priority) and Groq
+                "temperature": 0.2,  # Low temp for accuracy
                 "max_tokens": 4000
             }
@@ -1410,11 +1613,32 @@ class EnhancedNocturnalAgent:
                 elif response.status == 200:
                     data = await response.json()
+                    response_text = data.get('response', '')
+                    tokens = data.get('tokens_used', 0)
+                    # Combine tools used
+                    all_tools = tools_used or []
+                    all_tools.append("backend_llm")
+                    # Save to workflow history
+                    self.workflow.save_query_result(
+                        query=query,
+                        response=response_text,
+                        metadata={
+                            "tools_used": all_tools,
+                            "tokens_used": tokens,
+                            "model": data.get('model'),
+                            "provider": data.get('provider')
+                        }
+                    )
                     return ChatResponse(
-                        response=data.get('response', ''),
-                        tokens_used=data.get('tokens_used', 0),
+                        response=response_text,
+                        tokens_used=tokens,
+                        tools_used=all_tools,
                         model=data.get('model', 'llama-3.3-70b-versatile'),
-                        timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat())
+                        timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat()),
+                        api_results=api_results
                     )
                 else:
@@ -1498,15 +1722,48 @@ class EnhancedNocturnalAgent:
                     return {"error": "HTTP session not initialized"}
                 url = f"{self.archive_base_url}/{endpoint}"
-                headers = getattr(self, "_default_headers", None)
-                if headers:
-                    headers = dict(headers)
+                # Start fresh with headers
+                headers = {}
+                # Always use demo key for Archive (public research data)
+                headers["X-API-Key"] = "demo-key-123"
+                headers["Content-Type"] = "application/json"
+                # Also add JWT if we have it
+                if self.auth_token:
+                    headers["Authorization"] = f"Bearer {self.auth_token}"
+                debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+                if debug_mode:
+                    print(f"🔍 Archive headers: {list(headers.keys())}, X-API-Key={headers.get('X-API-Key')}")
+                    print(f"🔍 Archive URL: {url}")
+                    print(f"🔍 Archive data: {data}")
                 async with self.session.post(url, json=data, headers=headers, timeout=30) as response:
+                    if debug_mode:
+                        print(f"🔍 Archive response status: {response.status}")
                     if response.status == 200:
                         payload = await response.json()
                         self._record_data_source("Archive", f"POST {endpoint}", True)
                         return payload
+                    elif response.status == 422:  # Validation error
+                        try:
+                            error_detail = await response.json()
+                            logger.error(f"Archive API validation error (HTTP 422): {error_detail}")
+                        except Exception:
+                            error_detail = await response.text()
+                            logger.error(f"Archive API validation error (HTTP 422): {error_detail}")
+                        if attempt < max_retries - 1:
+                            # Retry with simplified request
+                            if "sources" in data and len(data["sources"]) > 1:
+                                data["sources"] = [data["sources"][0]]  # Try single source
+                                logger.info(f"Retrying with single source: {data['sources']}")
+                            await asyncio.sleep(retry_delay)
+                            continue
+                        self._record_data_source("Archive", f"POST {endpoint}", False, "422 validation error")
+                        return {"error": f"Archive API validation error: {error_detail}"}
                     elif response.status == 429:  # Rate limited
                         if attempt < max_retries - 1:
                             await asyncio.sleep(retry_delay * (2 ** attempt))  # Exponential backoff
@@ -1517,6 +1774,8 @@ class EnhancedNocturnalAgent:
                         self._record_data_source("Archive", f"POST {endpoint}", False, "401 unauthorized")
                         return {"error": "Archive API authentication failed. Please check API key."}
                     else:
+                        error_text = await response.text()
+                        logger.error(f"Archive API error (HTTP {response.status}): {error_text}")
                         self._record_data_source("Archive", f"POST {endpoint}", False, f"HTTP {response.status}")
                         return {"error": f"Archive API error: {response.status}"}
@@ -1551,9 +1810,20 @@ class EnhancedNocturnalAgent:
                     return {"error": "HTTP session not initialized"}
                 url = f"{self.finsight_base_url}/{endpoint}"
-                headers = getattr(self, "_default_headers", None)
-                if headers:
-                    headers = dict(headers)
+                # Start fresh with headers - don't use _default_headers which might be wrong
+                headers = {}
+                # Always use demo key for FinSight (SEC data is public)
+                headers["X-API-Key"] = "demo-key-123"
+                # Also add JWT if we have it
+                if self.auth_token:
+                    headers["Authorization"] = f"Bearer {self.auth_token}"
+                debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+                if debug_mode:
+                    print(f"🔍 FinSight headers: {list(headers.keys())}, X-API-Key={headers.get('X-API-Key')}")
+                    print(f"🔍 FinSight URL: {url}")
                 async with self.session.get(url, params=params, headers=headers, timeout=30) as response:
                     if response.status == 200:
@@ -1639,9 +1909,18 @@ class EnhancedNocturnalAgent:
                 continue
             results = result.get("results") or result.get("papers") or []
-            if results:
+            # Validate papers have minimal required fields
+            validated_results = []
+            for paper in results:
+                if isinstance(paper, dict) and paper.get("title") and paper.get("year"):
+                    validated_results.append(paper)
+                else:
+                    logger.warning(f"Skipping invalid paper: {paper}")
+            if validated_results:
                 aggregated_payload = dict(result)
-                aggregated_payload["results"] = results
+                aggregated_payload["results"] = validated_results
+                aggregated_payload["validation_note"] = f"Validated {len(validated_results)} out of {len(results)} papers"
                 break
         aggregated_payload.setdefault("results", [])
@@ -1650,11 +1929,14 @@ class EnhancedNocturnalAgent:
         if provider_errors:
             aggregated_payload["provider_errors"] = provider_errors
+        # CRITICAL: Add explicit marker for empty results to prevent hallucination
         if not aggregated_payload["results"]:
             aggregated_payload["notes"] = (
                 "No papers were returned by the research providers. This often occurs during "
                 "temporary rate limits; please retry in a minute or adjust the query scope."
             )
+            aggregated_payload["EMPTY_RESULTS"] = True
+            aggregated_payload["warning"] = "DO NOT GENERATE FAKE PAPERS - API returned zero results"
         return aggregated_payload
@@ -1939,6 +2221,92 @@ class EnhancedNocturnalAgent:
         return formatted, 0
+    async def _handle_workflow_commands(self, request: ChatRequest) -> Optional[ChatResponse]:
+        """Handle natural language workflow commands directly"""
+        question_lower = request.question.lower()
+        # Show library
+        if any(phrase in question_lower for phrase in ["show my library", "list my papers", "what's in my library", "my saved papers"]):
+            papers = self.workflow.list_papers()
+            if not papers:
+                message = "Your library is empty. As you find papers, I can save them for you."
+            else:
+                paper_list = []
+                for i, paper in enumerate(papers[:10], 1):
+                    authors_str = paper.authors[0] if paper.authors else "Unknown"
+                    if len(paper.authors) > 1:
+                        authors_str += " et al."
+                    paper_list.append(f"{i}. {paper.title} ({authors_str}, {paper.year})")
+                message = f"You have {len(papers)} paper(s) in your library:\n\n" + "\n".join(paper_list)
+                if len(papers) > 10:
+                    message += f"\n\n...and {len(papers) - 10} more."
+            return self._quick_reply(request, message, tools_used=["workflow_library"], confidence=1.0)
+        # Export to BibTeX
+        if any(phrase in question_lower for phrase in ["export to bibtex", "export bibtex", "generate bibtex", "bibtex export"]):
+            success = self.workflow.export_to_bibtex()
+            if success:
+                message = f"✅ Exported {len(self.workflow.list_papers())} papers to BibTeX.\n\nFile: {self.workflow.bibtex_file}\n\nYou can import this into Zotero, Mendeley, or use it in your LaTeX project."
+            else:
+                message = "❌ Failed to export BibTeX. Make sure you have papers in your library first."
+            return self._quick_reply(request, message, tools_used=["workflow_export"], confidence=1.0)
+        # Export to Markdown
+        if any(phrase in question_lower for phrase in ["export to markdown", "export markdown", "markdown export"]):
+            success = self.workflow.export_to_markdown()
+            if success:
+                message = f"✅ Exported to Markdown. Check {self.workflow.exports_dir} for the file.\n\nYou can open it in Obsidian, Notion, or any markdown editor."
+            else:
+                message = "❌ Failed to export Markdown."
+            return self._quick_reply(request, message, tools_used=["workflow_export"], confidence=1.0)
+        # Show history
+        if any(phrase in question_lower for phrase in ["show history", "my history", "recent queries", "what did i search"]):
+            history = self.workflow.get_history()[:10]
+            if not history:
+                message = "No query history yet."
+            else:
+                history_list = []
+                for i, entry in enumerate(history, 1):
+                    timestamp = datetime.fromisoformat(entry['timestamp']).strftime("%m/%d %H:%M")
+                    query = entry['query'][:60] + "..." if len(entry['query']) > 60 else entry['query']
+                    history_list.append(f"{i}. [{timestamp}] {query}")
+                message = "Recent queries:\n\n" + "\n".join(history_list)
+            return self._quick_reply(request, message, tools_used=["workflow_history"], confidence=1.0)
+        # Search library
+        search_match = re.match(r".*(?:search|find).*(?:in|my).*library.*[\"'](.+?)[\"']", question_lower)
+        if not search_match:
+            search_match = re.match(r".*search library (?:for )?(.+)", question_lower)
+        if search_match:
+            query_term = search_match.group(1).strip()
+            results = self.workflow.search_library(query_term)
+            if not results:
+                message = f"No papers found matching '{query_term}' in your library."
+            else:
+                result_list = []
+                for i, paper in enumerate(results[:5], 1):
+                    authors_str = paper.authors[0] if paper.authors else "Unknown"
+                    if len(paper.authors) > 1:
+                        authors_str += " et al."
+                    result_list.append(f"{i}. {paper.title} ({authors_str}, {paper.year})")
+                message = f"Found {len(results)} paper(s) matching '{query_term}':\n\n" + "\n".join(result_list)
+                if len(results) > 5:
+                    message += f"\n\n...and {len(results) - 5} more."
+            return self._quick_reply(request, message, tools_used=["workflow_search"], confidence=1.0)
+        # No workflow command detected
+        return None
     async def _analyze_request_type(self, question: str) -> Dict[str, Any]:
         """Analyze what type of request this is and what APIs to use"""
@@ -2090,15 +2458,120 @@ class EnhancedNocturnalAgent:
             "analysis_mode": analysis_mode  # NEW: qualitative, quantitative, or mixed
         }
+    def _is_query_too_vague_for_apis(self, question: str) -> bool:
+        """
+        Detect if query is too vague to warrant API calls
+        Returns True if we should skip APIs and just ask clarifying questions
+        """
+        question_lower = question.lower()
+        # Pattern 1: Multiple years without SPECIFIC topic (e.g., "2008, 2015, 2019")
+        import re
+        years_pattern = r'\b(19\d{2}|20\d{2})\b'
+        years = re.findall(years_pattern, question)
+        if len(years) >= 2:
+            # Multiple years - check if there's a SPECIFIC topic beyond just "papers on"
+            # Generic terms that don't add specificity
+            generic_terms = ['papers', 'about', 'on', 'regarding', 'concerning', 'related to']
+            # Remove generic terms and check what's left
+            words = question_lower.split()
+            content_words = [w for w in words if w not in generic_terms and not re.match(r'\d{4}', w)]
+            # If fewer than 2 meaningful content words, it's too vague
+            if len(content_words) < 2:
+                return True  # Too vague: "papers on 2008, 2015, 2019" needs topic
+        # Pattern 2: Market share without market specified
+        if 'market share' in question_lower:
+            market_indicators = ['analytics', 'software', 'government', 'data', 'cloud', 'sector', 'industry']
+            if not any(indicator in question_lower for indicator in market_indicators):
+                return True  # Too vague: needs market specification
+        # Pattern 3: Comparison without metric (compare X and Y)
+        if any(word in question_lower for word in ['compare', 'versus', 'vs', 'vs.']):
+            metric_indicators = ['revenue', 'market cap', 'sales', 'growth', 'profit', 'valuation']
+            if not any(indicator in question_lower for indicator in metric_indicators):
+                return True  # Too vague: needs metric specification
+        # Pattern 4: Ultra-short queries without specifics (< 4 words)
+        word_count = len(question.split())
+        if word_count <= 3 and '?' in question:
+            return True  # Too short and questioning - likely needs clarification
+        return False  # Query seems specific enough for API calls
     async def process_request(self, request: ChatRequest) -> ChatResponse:
         """Process request with full AI capabilities and API integration"""
         try:
-            # PRODUCTION MODE: Route all LLM queries through backend
-            # This ensures monetization - no local API key bypass
+            # Check workflow commands first (both modes)
+            workflow_response = await self._handle_workflow_commands(request)
+            if workflow_response:
+                return workflow_response
+            # Analyze request to determine what APIs to call
+            request_analysis = await self._analyze_request_type(request.question)
+            # Debug: Check what was detected
+            debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
+            if debug_mode:
+                print(f"🔍 Request analysis: {request_analysis}")
+            # Check if query is too vague - skip API calls to save tokens
+            is_vague = self._is_query_too_vague_for_apis(request.question)
+            if debug_mode and is_vague:
+                print(f"🔍 Query detected as VAGUE - skipping API calls, asking for clarification")
+            # Call appropriate APIs (Archive, FinSight) - BOTH production and dev mode
+            api_results = {}
+            tools_used = []
+            # Skip API calls if query is too vague
+            if not is_vague:
+                # Archive API for research
+                if "archive" in request_analysis.get("apis", []):
+                    result = await self.search_academic_papers(request.question, 5)
+                    if "error" not in result:
+                        api_results["research"] = result
+                        tools_used.append("archive_api")
+                # FinSight API for financial data
+                if "finsight" in request_analysis.get("apis", []):
+                    tickers = self._extract_tickers_from_text(request.question)
+                    if not tickers:
+                        # Try common company name mappings
+                        question_lower = request.question.lower()
+                        if "apple" in question_lower:
+                            tickers = ["AAPL"]
+                        elif "tesla" in question_lower:
+                            tickers = ["TSLA"]
+                        elif "microsoft" in question_lower:
+                            tickers = ["MSFT"]
+                        elif "google" in question_lower or "alphabet" in question_lower:
+                            tickers = ["GOOGL"]
+                    if debug_mode:
+                        print(f"🔍 Extracted tickers: {tickers}")
+                    if tickers:
+                        # Call FinSight with proper endpoint format
+                        if debug_mode:
+                            print(f"🔍 Calling FinSight API: calc/{tickers[0]}/revenue")
+                        financial_data = await self._call_finsight_api(f"calc/{tickers[0]}/revenue")
+                        if debug_mode:
+                            print(f"🔍 FinSight returned: {list(financial_data.keys()) if financial_data else None}")
+                        if financial_data and "error" not in financial_data:
+                            api_results["financial"] = financial_data
+                            tools_used.append("finsight_api")
+                        else:
+                            if debug_mode and financial_data:
+                                print(f"🔍 FinSight error: {financial_data.get('error')}")
+            # PRODUCTION MODE: Send to backend LLM with API results
             if self.client is None:
                 return await self.call_backend_query(
                     query=request.question,
-                    conversation_history=self.conversation_history[-10:]  # Last 10 messages for context
+                    conversation_history=self.conversation_history[-10:],
+                    api_results=api_results,  # Include the data!
+                    tools_used=tools_used  # Pass tools list for history
                 )
             # DEV MODE ONLY: Direct Groq calls (only works with local API keys)
@@ -2152,6 +2625,11 @@ class EnhancedNocturnalAgent:
                     confidence=0.55
                 )
+            # Check for workflow commands (natural language)
+            workflow_response = await self._handle_workflow_commands(request)
+            if workflow_response:
+                return workflow_response
             # Call appropriate APIs based on request type
             api_results = {}
             tools_used = []
@@ -2291,8 +2769,14 @@ class EnhancedNocturnalAgent:
                 result = await self.search_academic_papers(request.question, 5)
                 if "error" not in result:
                     api_results["research"] = result
+                    # DEBUG: Log what we got from the API
+                    papers_count = len(result.get("results", []))
+                    logger.info(f"🔍 DEBUG: Got {papers_count} papers from Archive API")
+                    if papers_count > 0:
+                        logger.info(f"🔍 DEBUG: First paper: {result['results'][0].get('title', 'NO TITLE')[:80]}")
                 else:
                     api_results["research"] = {"error": result["error"]}
+                    logger.warning(f"🔍 DEBUG: Archive API returned error: {result['error']}")
                 tools_used.append("archive_api")
             # Build enhanced system prompt with trimmed sections based on detected needs
@@ -2514,6 +2998,17 @@ class EnhancedNocturnalAgent:
                 f"Q: {request.question[:100]}... A: {final_response[:100]}..."
             )
+            # Save to workflow history automatically
+            self.workflow.save_query_result(
+                query=request.question,
+                response=final_response,
+                metadata={
+                    "tools_used": tools_used,
+                    "tokens_used": tokens_used,
+                    "confidence_score": request_analysis['confidence']
+                }
+            )
             return ChatResponse(
                 response=final_response,
                 tools_used=tools_used,

cite-agent 1.0.5__py3-none-any.whl → 1.2.4__py3-none-any.whl

Potentially problematic release.

cite-agent 1.0.5py3-none-any.whl → 1.2.4py3-none-any.whl