cite-agent 1.0.5__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cite-agent might be problematic. Click here for more details.

@@ -76,6 +76,11 @@ class EnhancedNocturnalAgent:
76
76
  self.total_cost = 0.0
77
77
  self.cost_per_1k_tokens = 0.0001 # Groq pricing estimate
78
78
  self._auto_update_enabled = True
79
+
80
+ # Workflow integration
81
+ from .workflow import WorkflowManager
82
+ self.workflow = WorkflowManager()
83
+ self.last_paper_result = None # Track last paper mentioned for "save that"
79
84
  try:
80
85
  self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
81
86
  except (TypeError, ValueError):
@@ -108,11 +113,42 @@ class EnhancedNocturnalAgent:
108
113
 
109
114
  self._service_roots: List[str] = []
110
115
  self._backend_health_cache: Dict[str, Dict[str, Any]] = {}
116
+
117
+ # Initialize authentication
118
+ self.auth_token = None
119
+ self.user_id = None
120
+ self._load_authentication()
111
121
  try:
112
122
  self._health_ttl = float(os.getenv("NOCTURNAL_HEALTH_TTL", 30))
113
123
  except Exception:
114
124
  self._health_ttl = 30.0
115
125
  self._recent_sources: List[Dict[str, Any]] = []
126
+
127
+ def _load_authentication(self):
128
+ """Load authentication from session file"""
129
+ use_local_keys = os.getenv("USE_LOCAL_KEYS", "true").lower() == "true"
130
+
131
+ if not use_local_keys:
132
+ # Backend mode - load auth token from session
133
+ from pathlib import Path
134
+ session_file = Path.home() / ".nocturnal_archive" / "session.json"
135
+ if session_file.exists():
136
+ try:
137
+ import json
138
+ with open(session_file, 'r') as f:
139
+ session_data = json.load(f)
140
+ self.auth_token = session_data.get('access_token')
141
+ self.user_id = session_data.get('user_id')
142
+ except Exception:
143
+ self.auth_token = None
144
+ self.user_id = None
145
+ else:
146
+ self.auth_token = None
147
+ self.user_id = None
148
+ else:
149
+ # Local keys mode
150
+ self.auth_token = None
151
+ self.user_id = None
116
152
  self._session_topics: Dict[str, Dict[str, Any]] = {}
117
153
 
118
154
  # Initialize API clients
@@ -184,10 +220,10 @@ class EnhancedNocturnalAgent:
184
220
  )
185
221
 
186
222
  # Archive API client
187
- self.archive_base_url = _normalize_base(archive_env, "http://127.0.0.1:8000/api")
223
+ self.archive_base_url = _normalize_base(archive_env, "https://cite-agent-api-720dfadd602c.herokuapp.com/api")
188
224
 
189
225
  # FinSight API client
190
- self.finsight_base_url = _normalize_base(finsight_env, "http://127.0.0.1:8000/v1/finance")
226
+ self.finsight_base_url = _normalize_base(finsight_env, "https://cite-agent-api-720dfadd602c.herokuapp.com/v1/finance")
191
227
 
192
228
  # Workspace Files API client
193
229
  files_env = os.getenv("FILES_API_URL")
@@ -203,13 +239,15 @@ class EnhancedNocturnalAgent:
203
239
  self._default_headers.clear()
204
240
  if self.api_key:
205
241
  self._default_headers["X-API-Key"] = self.api_key
206
- if self.api_key == "demo-key-123":
207
- print("⚠️ Using demo API key. Set NOCTURNAL_KEY for production usage.")
208
- else:
209
- print("⚠️ No API key configured for Nocturnal Archive API calls")
210
242
 
211
243
  self._update_service_roots()
212
- print(f"✅ API clients initialized (Archive={self.archive_base_url}, FinSight={self.finsight_base_url})")
244
+
245
+ # Only show init messages in debug mode
246
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
247
+ if debug_mode:
248
+ if self.api_key == "demo-key-123":
249
+ print("⚠️ Using demo API key")
250
+ print(f"✅ API clients initialized (Archive={self.archive_base_url}, FinSight={self.finsight_base_url})")
213
251
 
214
252
  except Exception as e:
215
253
  print(f"⚠️ API client initialization warning: {e}")
@@ -812,14 +850,22 @@ class EnhancedNocturnalAgent:
812
850
 
813
851
  def _format_api_results_for_prompt(self, api_results: Dict[str, Any]) -> str:
814
852
  if not api_results:
853
+ logger.info("🔍 DEBUG: _format_api_results_for_prompt called with EMPTY api_results")
815
854
  return "No API results yet."
816
855
  try:
817
856
  serialized = json.dumps(api_results, indent=2)
818
857
  except Exception:
819
858
  serialized = str(api_results)
820
- max_len = 2000
859
+ max_len = 8000 # Keep under 12K token limit (backend + context)
821
860
  if len(serialized) > max_len:
822
- serialized = serialized[:max_len] + "\n... (truncated)"
861
+ serialized = serialized[:max_len] + "\n... (truncated for length)"
862
+
863
+ # DEBUG: Log formatted results length and preview
864
+ logger.info(f"🔍 DEBUG: _format_api_results_for_prompt returning {len(serialized)} chars")
865
+ if "research" in api_results:
866
+ papers_count = len(api_results.get("research", {}).get("results", []))
867
+ logger.info(f"🔍 DEBUG: api_results contains 'research' with {papers_count} papers")
868
+
823
869
  return serialized
824
870
 
825
871
  def _build_system_prompt(
@@ -851,8 +897,8 @@ class EnhancedNocturnalAgent:
851
897
  )
852
898
  else: # quantitative
853
899
  intro = (
854
- "You are Nocturnal, a truth-seeking research and finance AI. "
855
- "PRIMARY DIRECTIVE: Accuracy > Agreeableness. "
900
+ "You are Cite Agent, a truth-seeking research and finance AI. "
901
+ "PRIMARY DIRECTIVE: Accuracy > Agreeableness. Ask clarifying questions when context is missing. "
856
902
  "You are a fact-checker and analyst, NOT a people-pleaser. "
857
903
  "You have direct access to production-grade data sources and can write/execute code (Python, R, SQL)."
858
904
  )
@@ -869,10 +915,32 @@ class EnhancedNocturnalAgent:
869
915
  capability_lines.append("• Persistent shell session for system inspection and code execution")
870
916
  if not capability_lines:
871
917
  capability_lines.append("• Core reasoning, code generation (Python/R/SQL), memory recall")
918
+
919
+ # Add workflow capabilities
920
+ capability_lines.append("")
921
+ capability_lines.append("📚 WORKFLOW INTEGRATION (Always available):")
922
+ capability_lines.append("• You can SAVE papers to user's local library")
923
+ capability_lines.append("• You can LIST papers from library")
924
+ capability_lines.append("• You can EXPORT citations to BibTeX or APA")
925
+ capability_lines.append("• You can SEARCH user's paper collection")
926
+ capability_lines.append("• You can COPY text to user's clipboard")
927
+ capability_lines.append("• User's query history is automatically tracked")
928
+
872
929
  sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
873
930
 
874
931
  # ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
875
932
  base_rules = [
933
+ "🚨 BE PATIENT: Don't rush to tools. Have a conversation to understand intent FIRST.",
934
+ "🚨 CLARIFY BEFORE SEARCH: If you see '2008, 2015, 2019' → ask 'Are you looking for crisis patterns? Economic events? Papers published in those years?' DON'T just search '2008'.",
935
+ "🚨 KNOW YOUR TOOLS' LIMITS: SEC has revenue, not market share. Archive has papers, not market data. If tool can't answer, say 'I don't have that data' or use web search.",
936
+ "🚨 TOOL != ANSWER: Don't use tools just because you have them. Revenue ≠ Market Share. Published year ≠ Subject matter.",
937
+ "",
938
+ "💬 CONVERSATIONAL FLOW:",
939
+ "1. User asks vague question → YOU ask clarifying questions",
940
+ "2. User provides context → YOU confirm understanding",
941
+ "3. YOU make tool calls → Present results",
942
+ "NEVER skip step 1 or 2. Be deliberate, not eager.",
943
+ "",
876
944
  "🚨 ANTI-APPEASEMENT: If user states something incorrect, CORRECT THEM immediately. Do not agree to be polite.",
877
945
  "🚨 UNCERTAINTY: If you're uncertain, SAY SO explicitly. 'I don't know' is better than a wrong answer.",
878
946
  "🚨 CONTRADICTIONS: If data contradicts user's assumption, SHOW THE CONTRADICTION clearly.",
@@ -883,6 +951,17 @@ class EnhancedNocturnalAgent:
883
951
  "📊 NO EXTRAPOLATION: Never go beyond what sources directly state.",
884
952
  "📊 PREDICTION CAUTION: When discussing trends, always state 'based on available data' and note uncertainty.",
885
953
  "",
954
+ "🚨 CRITICAL: NEVER generate fake papers, fake authors, fake DOIs, or fake citations.",
955
+ "🚨 CRITICAL: If research API returns empty results, say 'No papers found' - DO NOT make up papers.",
956
+ "🚨 CRITICAL: If you see 'results': [] in API data, that means NO PAPERS FOUND - do not fabricate.",
957
+ "🚨 CRITICAL: When API returns empty results, DO NOT use your training data to provide paper details.",
958
+ "🚨 CRITICAL: If you know a paper exists from training data but API returns empty, say 'API found no results'.",
959
+ "",
960
+ "🚨 ABSOLUTE RULE: If you see 'results': [] in the API data, you MUST respond with ONLY:",
961
+ " 'No papers found in the research database. The API returned empty results.'",
962
+ " DO NOT provide any paper details, authors, titles, or citations.",
963
+ " DO NOT use your training data to fill in missing information.",
964
+ "",
886
965
  "✓ VERIFICATION: Cross-check against multiple sources when available.",
887
966
  "✓ CONFLICTS: If sources conflict, present BOTH and explain the discrepancy.",
888
967
  "✓ SHOW REASONING: 'According to [source], X is Y because...'",
@@ -924,6 +1003,20 @@ class EnhancedNocturnalAgent:
924
1003
  rules.append("")
925
1004
  rules.append("Keep responses concise but complete. Quote exact text from sources when possible.")
926
1005
 
1006
+ # Add workflow behavior rules
1007
+ workflow_rules = [
1008
+ "",
1009
+ "📚 WORKFLOW BEHAVIOR:",
1010
+ "• After finding papers, OFFER to save them: 'Would you like me to save this to your library?'",
1011
+ "• After showing a citation, ASK: 'Want me to copy that to your clipboard?'",
1012
+ "• If user says 'save that' or 'add to library', ACKNOWLEDGE and confirm the save",
1013
+ "• If user mentions 'my library', LIST their saved papers",
1014
+ "• If user asks for 'bibtex' or 'apa', PROVIDE the formatted citation",
1015
+ "• Be PROACTIVE: suggest exports, show library stats, offer clipboard copies",
1016
+ "• Example: 'I found 3 papers. I can save them to your library or export to BibTeX if you'd like.'",
1017
+ ]
1018
+ rules.extend(workflow_rules)
1019
+
927
1020
  sections.append("CRITICAL RULES:\n" + "\n".join(rules))
928
1021
 
929
1022
  # CORRECTION EXAMPLES (adapt based on mode)
@@ -941,6 +1034,24 @@ class EnhancedNocturnalAgent:
941
1034
  )
942
1035
  else:
943
1036
  examples = (
1037
+ "EXAMPLE 1: Be Patient, Don't Rush\n"
1038
+ "User: 'Find papers on 2008, 2015, 2019'\n"
1039
+ "❌ BAD: [Searches for year:2008 immediately] 'Found 50 papers from 2008...'\n"
1040
+ "✅ GOOD: 'Are you looking for papers ABOUT events in those years (financial crises, policy changes), "
1041
+ "or papers PUBLISHED in those years? Also, what topic? (Economics? Healthcare? Climate?)'\n\n"
1042
+
1043
+ "EXAMPLE 2: Know Your Tools' Limits\n"
1044
+ "User: 'What's Palantir's market share?'\n"
1045
+ "❌ BAD: 'Palantir's latest revenue is $1B...' (Revenue ≠ Market Share! SEC doesn't have market share!)\n"
1046
+ "✅ GOOD: 'Market share requires: (1) Palantir's revenue, (2) total market size. SEC has #1, not #2. "
1047
+ "Which market? (Data analytics = ~$50B, Gov contracts = ~$200B). I can web search for total market size if you specify.'\n\n"
1048
+
1049
+ "EXAMPLE 3: Conversational Flow\n"
1050
+ "User: 'Compare Tesla and Ford'\n"
1051
+ "❌ BAD: [Immediately fetches both revenues] 'Tesla: $81B, Ford: $158B'\n"
1052
+ "✅ GOOD: 'Compare on what dimension? Revenue? (Ford larger). Market cap? (Tesla larger). EV sales? (Tesla dominates). "
1053
+ "Production volume? (Ford higher). Each tells a different story. Which matters to you?'\n\n"
1054
+
944
1055
  "EXAMPLE CORRECTIONS:\n"
945
1056
  "User: 'So revenue went up 50%?'\n"
946
1057
  "You: '❌ No. According to 10-K page 23, revenue increased 15%, not 50%. "
@@ -961,7 +1072,16 @@ class EnhancedNocturnalAgent:
961
1072
  f"confidence={request_analysis.get('confidence')}"
962
1073
  )
963
1074
 
964
- sections.append("API RESULTS:\n" + self._format_api_results_for_prompt(api_results))
1075
+ # Add explicit instruction before API results
1076
+ api_instructions = (
1077
+ "🚨 CRITICAL: The following API RESULTS are REAL DATA from production APIs.\n"
1078
+ "🚨 These are NOT examples or templates - they are ACTUAL results to use in your response.\n"
1079
+ "🚨 DO NOT generate new/fake data - USE EXACTLY what is shown below.\n"
1080
+ "🚨 If you see paper titles, authors, DOIs below - these are REAL papers you MUST cite.\n"
1081
+ "🚨 If API results show empty/no papers, say 'No papers found' - DO NOT make up papers.\n"
1082
+ )
1083
+
1084
+ sections.append(api_instructions + "\nAPI RESULTS:\n" + self._format_api_results_for_prompt(api_results))
965
1085
 
966
1086
  return "\n\n".join(sections)
967
1087
 
@@ -1016,18 +1136,32 @@ class EnhancedNocturnalAgent:
1016
1136
  elif len(question.split()) <= 40 and request_analysis.get("type") in {"general", "system"} and not api_results:
1017
1137
  use_light_model = True
1018
1138
 
1019
- if use_light_model:
1139
+ # Select model based on LLM provider
1140
+ if getattr(self, 'llm_provider', 'groq') == 'cerebras':
1141
+ if use_light_model:
1142
+ return {
1143
+ "model": "llama3.1-8b", # Cerebras 8B model
1144
+ "max_tokens": 520,
1145
+ "temperature": 0.2
1146
+ }
1020
1147
  return {
1021
- "model": "llama-3.1-8b-instant",
1022
- "max_tokens": 520,
1023
- "temperature": 0.2
1148
+ "model": "llama-3.3-70b", # Cerebras 70B model
1149
+ "max_tokens": 900,
1150
+ "temperature": 0.3
1151
+ }
1152
+ else:
1153
+ # Groq models
1154
+ if use_light_model:
1155
+ return {
1156
+ "model": "llama-3.1-8b-instant",
1157
+ "max_tokens": 520,
1158
+ "temperature": 0.2
1159
+ }
1160
+ return {
1161
+ "model": "llama-3.3-70b-versatile",
1162
+ "max_tokens": 900,
1163
+ "temperature": 0.3
1024
1164
  }
1025
-
1026
- return {
1027
- "model": "llama-3.3-70b-versatile",
1028
- "max_tokens": 900,
1029
- "temperature": 0.3
1030
- }
1031
1165
 
1032
1166
  def _mark_current_key_exhausted(self, reason: str = "rate_limit"):
1033
1167
  if not self.api_keys:
@@ -1055,11 +1189,18 @@ class EnhancedNocturnalAgent:
1055
1189
  attempts += 1
1056
1190
  continue
1057
1191
  try:
1058
- self.client = Groq(api_key=key)
1192
+ if self.llm_provider == "cerebras":
1193
+ from openai import OpenAI
1194
+ self.client = OpenAI(
1195
+ api_key=key,
1196
+ base_url="https://api.cerebras.ai/v1"
1197
+ )
1198
+ else:
1199
+ self.client = Groq(api_key=key)
1059
1200
  self.current_api_key = key
1060
1201
  return True
1061
1202
  except Exception as e:
1062
- logger.error(f"Failed to initialize Groq client for rotated key: {e}")
1203
+ logger.error(f"Failed to initialize {self.llm_provider.upper()} client for rotated key: {e}")
1063
1204
  self.exhausted_keys[key] = now
1064
1205
  attempts += 1
1065
1206
  return False
@@ -1087,11 +1228,18 @@ class EnhancedNocturnalAgent:
1087
1228
  del self.exhausted_keys[key]
1088
1229
 
1089
1230
  try:
1090
- self.client = Groq(api_key=key)
1231
+ if self.llm_provider == "cerebras":
1232
+ from openai import OpenAI
1233
+ self.client = OpenAI(
1234
+ api_key=key,
1235
+ base_url="https://api.cerebras.ai/v1"
1236
+ )
1237
+ else:
1238
+ self.client = Groq(api_key=key)
1091
1239
  self.current_api_key = key
1092
1240
  return True
1093
1241
  except Exception as e:
1094
- logger.error(f"Failed to initialize Groq client for key index {self.current_key_index}: {e}")
1242
+ logger.error(f"Failed to initialize {self.llm_provider.upper()} client for key index {self.current_key_index}: {e}")
1095
1243
  self.exhausted_keys[key] = now
1096
1244
  attempts += 1
1097
1245
  self.current_key_index = (self.current_key_index + 1) % total
@@ -1145,7 +1293,15 @@ class EnhancedNocturnalAgent:
1145
1293
  payload = payload_full[:1500]
1146
1294
  if len(payload_full) > 1500:
1147
1295
  payload += "\n…"
1148
- details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
1296
+
1297
+ # Check if results are empty and add explicit warning
1298
+ if research.get("results") == [] or not research.get("results"):
1299
+ details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
1300
+ details.append("🚨 **CRITICAL: API RETURNED EMPTY RESULTS - DO NOT GENERATE ANY PAPER DETAILS**")
1301
+ details.append("🚨 **DO NOT PROVIDE AUTHORS, TITLES, DOIs, OR ANY PAPER INFORMATION**")
1302
+ details.append("🚨 **SAY 'NO PAPERS FOUND' AND STOP - DO NOT HALLUCINATE**")
1303
+ else:
1304
+ details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
1149
1305
 
1150
1306
  files_context = api_results.get("files_context")
1151
1307
  if files_context:
@@ -1239,13 +1395,31 @@ class EnhancedNocturnalAgent:
1239
1395
  self._check_updates_background()
1240
1396
  self._ensure_environment_loaded()
1241
1397
  self._init_api_clients()
1398
+
1399
+ # Suppress verbose initialization messages in production
1400
+ import logging
1401
+ logging.getLogger("aiohttp").setLevel(logging.ERROR)
1402
+ logging.getLogger("asyncio").setLevel(logging.ERROR)
1242
1403
 
1243
1404
  # SECURITY FIX: No API keys on client!
1244
1405
  # All API calls go through our secure backend
1245
1406
  # This prevents key extraction and piracy
1246
1407
  # DISABLED for beta testing - set USE_LOCAL_KEYS=false to enable backend-only mode
1247
1408
 
1248
- use_local_keys = os.getenv("USE_LOCAL_KEYS", "true").lower() == "true"
1409
+ # SECURITY: Production users MUST use backend for monetization
1410
+ # Dev mode only available via undocumented env var (not in user docs)
1411
+ use_local_keys_env = os.getenv("USE_LOCAL_KEYS", "").lower()
1412
+
1413
+ if use_local_keys_env == "true":
1414
+ # Dev mode - use local keys
1415
+ use_local_keys = True
1416
+ elif use_local_keys_env == "false":
1417
+ # Explicit backend mode
1418
+ use_local_keys = False
1419
+ else:
1420
+ # Default: Always use backend (for monetization)
1421
+ # Even if session doesn't exist, we'll prompt for login
1422
+ use_local_keys = False
1249
1423
 
1250
1424
  if not use_local_keys:
1251
1425
  self.api_keys = [] # Empty - keys stay on server
@@ -1256,7 +1430,7 @@ class EnhancedNocturnalAgent:
1256
1430
  # Get backend API URL from config
1257
1431
  self.backend_api_url = os.getenv(
1258
1432
  "NOCTURNAL_API_URL",
1259
- "https://api.nocturnal.dev/api" # Production default
1433
+ "https://cite-agent-api-720dfadd602c.herokuapp.com/api" # Production Heroku backend
1260
1434
  )
1261
1435
 
1262
1436
  # Get auth token from session (set by auth.py after login)
@@ -1276,34 +1450,59 @@ class EnhancedNocturnalAgent:
1276
1450
  self.auth_token = None
1277
1451
  self.user_id = None
1278
1452
 
1279
- if self.auth_token:
1280
- print(f" Enhanced Nocturnal Agent Ready! (Authenticated)")
1281
- else:
1282
- print("⚠️ Not authenticated. Please log in to use the agent.")
1453
+ # Suppress messages in production (only show in debug mode)
1454
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
1455
+ if debug_mode:
1456
+ if self.auth_token:
1457
+ print(f"✅ Enhanced Nocturnal Agent Ready! (Authenticated)")
1458
+ else:
1459
+ print("⚠️ Not authenticated. Please log in to use the agent.")
1283
1460
  else:
1284
- # Local keys mode - load Groq API keys
1461
+ # Local keys mode - load Cerebras API keys (primary) with Groq fallback
1285
1462
  self.auth_token = None
1286
1463
  self.user_id = None
1287
1464
 
1288
- # Load Groq keys from environment
1465
+ # Load Cerebras keys from environment (PRIMARY)
1289
1466
  self.api_keys = []
1290
- for i in range(1, 10): # Check GROQ_API_KEY_1 through GROQ_API_KEY_9
1291
- key = os.getenv(f"GROQ_API_KEY_{i}") or os.getenv(f"GROQ_API_KEY")
1467
+ for i in range(1, 10): # Check CEREBRAS_API_KEY_1 through CEREBRAS_API_KEY_9
1468
+ key = os.getenv(f"CEREBRAS_API_KEY_{i}") or os.getenv(f"CEREBRAS_API_KEY")
1292
1469
  if key and key not in self.api_keys:
1293
1470
  self.api_keys.append(key)
1294
1471
 
1472
+ # Fallback to Groq keys if no Cerebras keys found
1295
1473
  if not self.api_keys:
1296
- print("⚠️ No Groq API keys found. Set GROQ_API_KEY_1, GROQ_API_KEY_2, etc.")
1474
+ for i in range(1, 10):
1475
+ key = os.getenv(f"GROQ_API_KEY_{i}") or os.getenv(f"GROQ_API_KEY")
1476
+ if key and key not in self.api_keys:
1477
+ self.api_keys.append(key)
1478
+ self.llm_provider = "groq"
1297
1479
  else:
1298
- print(f"✅ Loaded {len(self.api_keys)} Groq API key(s)")
1299
- # Initialize first client
1480
+ self.llm_provider = "cerebras"
1481
+
1482
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
1483
+ if not self.api_keys:
1484
+ if debug_mode:
1485
+ print("⚠️ No LLM API keys found. Set CEREBRAS_API_KEY or GROQ_API_KEY")
1486
+ else:
1487
+ if debug_mode:
1488
+ print(f"✅ Loaded {len(self.api_keys)} {self.llm_provider.upper()} API key(s)")
1489
+ # Initialize first client - Cerebras uses OpenAI-compatible API
1300
1490
  try:
1301
- from groq import Groq
1302
- self.client = Groq(api_key=self.api_keys[0])
1491
+ if self.llm_provider == "cerebras":
1492
+ # Cerebras uses OpenAI client with custom base URL
1493
+ from openai import OpenAI
1494
+ self.client = OpenAI(
1495
+ api_key=self.api_keys[0],
1496
+ base_url="https://api.cerebras.ai/v1"
1497
+ )
1498
+ else:
1499
+ # Groq fallback
1500
+ from groq import Groq
1501
+ self.client = Groq(api_key=self.api_keys[0])
1303
1502
  self.current_api_key = self.api_keys[0]
1304
1503
  self.current_key_index = 0
1305
1504
  except Exception as e:
1306
- print(f"⚠️ Failed to initialize Groq client: {e}")
1505
+ print(f"⚠️ Failed to initialize {self.llm_provider.upper()} client: {e}")
1307
1506
 
1308
1507
  if self.shell_session and self.shell_session.poll() is not None:
1309
1508
  self.shell_session = None
@@ -1332,33 +1531,36 @@ class EnhancedNocturnalAgent:
1332
1531
  return True
1333
1532
 
1334
1533
  def _check_updates_background(self):
1335
- """Check for updates in background (silent, non-blocking)"""
1534
+ """Check for updates and auto-install if available"""
1336
1535
  if not self._auto_update_enabled:
1337
1536
  return
1338
-
1339
- import threading
1340
1537
 
1341
- def update_check():
1342
- try:
1343
- from .updater import NocturnalUpdater
1344
- updater = NocturnalUpdater()
1345
- update_info = updater.check_for_updates()
1538
+ # Check for updates (synchronous, fast)
1539
+ try:
1540
+ from .updater import NocturnalUpdater
1541
+ updater = NocturnalUpdater()
1542
+ update_info = updater.check_for_updates()
1543
+
1544
+ if update_info and update_info["available"]:
1545
+ # Auto-update silently in background
1546
+ import threading
1547
+ def do_update():
1548
+ try:
1549
+ updater.update_package(silent=True)
1550
+ except:
1551
+ pass
1552
+ threading.Thread(target=do_update, daemon=True).start()
1346
1553
 
1347
- if update_info and update_info["available"]:
1348
- # Silent update - no interruption
1349
- updater.update_package()
1350
-
1351
- except Exception:
1352
- # Completely silent - don't interrupt user experience
1353
- pass
1354
-
1355
- # Run in background thread
1356
- threading.Thread(target=update_check, daemon=True).start()
1554
+ except Exception:
1555
+ # Silently ignore update check failures
1556
+ pass
1357
1557
 
1358
- async def call_backend_query(self, query: str, conversation_history: Optional[List[Dict]] = None) -> ChatResponse:
1558
+ async def call_backend_query(self, query: str, conversation_history: Optional[List[Dict]] = None,
1559
+ api_results: Optional[Dict[str, Any]] = None, tools_used: Optional[List[str]] = None) -> ChatResponse:
1359
1560
  """
1360
1561
  Call backend /query endpoint instead of Groq directly
1361
1562
  This is the SECURE method - all API keys stay on server
1563
+ Includes API results (Archive, FinSight) in context for better responses
1362
1564
  """
1363
1565
  if not self.auth_token:
1364
1566
  return ChatResponse(
@@ -1373,12 +1575,13 @@ class EnhancedNocturnalAgent:
1373
1575
  )
1374
1576
 
1375
1577
  try:
1376
- # Build request
1578
+ # Build request with API context as separate field
1377
1579
  payload = {
1378
- "query": query,
1580
+ "query": query, # Keep query clean
1379
1581
  "conversation_history": conversation_history or [],
1380
- "model": "llama-3.3-70b-versatile",
1381
- "temperature": 0.7,
1582
+ "api_context": api_results, # Send API results separately
1583
+ "model": "llama-3.3-70b", # Compatible with Cerebras (priority) and Groq
1584
+ "temperature": 0.2, # Low temp for accuracy
1382
1585
  "max_tokens": 4000
1383
1586
  }
1384
1587
 
@@ -1410,11 +1613,32 @@ class EnhancedNocturnalAgent:
1410
1613
 
1411
1614
  elif response.status == 200:
1412
1615
  data = await response.json()
1616
+ response_text = data.get('response', '')
1617
+ tokens = data.get('tokens_used', 0)
1618
+
1619
+ # Combine tools used
1620
+ all_tools = tools_used or []
1621
+ all_tools.append("backend_llm")
1622
+
1623
+ # Save to workflow history
1624
+ self.workflow.save_query_result(
1625
+ query=query,
1626
+ response=response_text,
1627
+ metadata={
1628
+ "tools_used": all_tools,
1629
+ "tokens_used": tokens,
1630
+ "model": data.get('model'),
1631
+ "provider": data.get('provider')
1632
+ }
1633
+ )
1634
+
1413
1635
  return ChatResponse(
1414
- response=data.get('response', ''),
1415
- tokens_used=data.get('tokens_used', 0),
1636
+ response=response_text,
1637
+ tokens_used=tokens,
1638
+ tools_used=all_tools,
1416
1639
  model=data.get('model', 'llama-3.3-70b-versatile'),
1417
- timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat())
1640
+ timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat()),
1641
+ api_results=api_results
1418
1642
  )
1419
1643
 
1420
1644
  else:
@@ -1498,15 +1722,48 @@ class EnhancedNocturnalAgent:
1498
1722
  return {"error": "HTTP session not initialized"}
1499
1723
 
1500
1724
  url = f"{self.archive_base_url}/{endpoint}"
1501
- headers = getattr(self, "_default_headers", None)
1502
- if headers:
1503
- headers = dict(headers)
1725
+ # Start fresh with headers
1726
+ headers = {}
1727
+
1728
+ # Always use demo key for Archive (public research data)
1729
+ headers["X-API-Key"] = "demo-key-123"
1730
+ headers["Content-Type"] = "application/json"
1731
+
1732
+ # Also add JWT if we have it
1733
+ if self.auth_token:
1734
+ headers["Authorization"] = f"Bearer {self.auth_token}"
1735
+
1736
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
1737
+ if debug_mode:
1738
+ print(f"🔍 Archive headers: {list(headers.keys())}, X-API-Key={headers.get('X-API-Key')}")
1739
+ print(f"🔍 Archive URL: {url}")
1740
+ print(f"🔍 Archive data: {data}")
1504
1741
 
1505
1742
  async with self.session.post(url, json=data, headers=headers, timeout=30) as response:
1743
+ if debug_mode:
1744
+ print(f"🔍 Archive response status: {response.status}")
1745
+
1506
1746
  if response.status == 200:
1507
1747
  payload = await response.json()
1508
1748
  self._record_data_source("Archive", f"POST {endpoint}", True)
1509
1749
  return payload
1750
+ elif response.status == 422: # Validation error
1751
+ try:
1752
+ error_detail = await response.json()
1753
+ logger.error(f"Archive API validation error (HTTP 422): {error_detail}")
1754
+ except Exception:
1755
+ error_detail = await response.text()
1756
+ logger.error(f"Archive API validation error (HTTP 422): {error_detail}")
1757
+
1758
+ if attempt < max_retries - 1:
1759
+ # Retry with simplified request
1760
+ if "sources" in data and len(data["sources"]) > 1:
1761
+ data["sources"] = [data["sources"][0]] # Try single source
1762
+ logger.info(f"Retrying with single source: {data['sources']}")
1763
+ await asyncio.sleep(retry_delay)
1764
+ continue
1765
+ self._record_data_source("Archive", f"POST {endpoint}", False, "422 validation error")
1766
+ return {"error": f"Archive API validation error: {error_detail}"}
1510
1767
  elif response.status == 429: # Rate limited
1511
1768
  if attempt < max_retries - 1:
1512
1769
  await asyncio.sleep(retry_delay * (2 ** attempt)) # Exponential backoff
@@ -1517,6 +1774,8 @@ class EnhancedNocturnalAgent:
1517
1774
  self._record_data_source("Archive", f"POST {endpoint}", False, "401 unauthorized")
1518
1775
  return {"error": "Archive API authentication failed. Please check API key."}
1519
1776
  else:
1777
+ error_text = await response.text()
1778
+ logger.error(f"Archive API error (HTTP {response.status}): {error_text}")
1520
1779
  self._record_data_source("Archive", f"POST {endpoint}", False, f"HTTP {response.status}")
1521
1780
  return {"error": f"Archive API error: {response.status}"}
1522
1781
 
@@ -1551,9 +1810,20 @@ class EnhancedNocturnalAgent:
1551
1810
  return {"error": "HTTP session not initialized"}
1552
1811
 
1553
1812
  url = f"{self.finsight_base_url}/{endpoint}"
1554
- headers = getattr(self, "_default_headers", None)
1555
- if headers:
1556
- headers = dict(headers)
1813
+ # Start fresh with headers - don't use _default_headers which might be wrong
1814
+ headers = {}
1815
+
1816
+ # Always use demo key for FinSight (SEC data is public)
1817
+ headers["X-API-Key"] = "demo-key-123"
1818
+
1819
+ # Also add JWT if we have it
1820
+ if self.auth_token:
1821
+ headers["Authorization"] = f"Bearer {self.auth_token}"
1822
+
1823
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
1824
+ if debug_mode:
1825
+ print(f"🔍 FinSight headers: {list(headers.keys())}, X-API-Key={headers.get('X-API-Key')}")
1826
+ print(f"🔍 FinSight URL: {url}")
1557
1827
 
1558
1828
  async with self.session.get(url, params=params, headers=headers, timeout=30) as response:
1559
1829
  if response.status == 200:
@@ -1639,9 +1909,18 @@ class EnhancedNocturnalAgent:
1639
1909
  continue
1640
1910
 
1641
1911
  results = result.get("results") or result.get("papers") or []
1642
- if results:
1912
+ # Validate papers have minimal required fields
1913
+ validated_results = []
1914
+ for paper in results:
1915
+ if isinstance(paper, dict) and paper.get("title") and paper.get("year"):
1916
+ validated_results.append(paper)
1917
+ else:
1918
+ logger.warning(f"Skipping invalid paper: {paper}")
1919
+
1920
+ if validated_results:
1643
1921
  aggregated_payload = dict(result)
1644
- aggregated_payload["results"] = results
1922
+ aggregated_payload["results"] = validated_results
1923
+ aggregated_payload["validation_note"] = f"Validated {len(validated_results)} out of {len(results)} papers"
1645
1924
  break
1646
1925
 
1647
1926
  aggregated_payload.setdefault("results", [])
@@ -1650,11 +1929,14 @@ class EnhancedNocturnalAgent:
1650
1929
  if provider_errors:
1651
1930
  aggregated_payload["provider_errors"] = provider_errors
1652
1931
 
1932
+ # CRITICAL: Add explicit marker for empty results to prevent hallucination
1653
1933
  if not aggregated_payload["results"]:
1654
1934
  aggregated_payload["notes"] = (
1655
1935
  "No papers were returned by the research providers. This often occurs during "
1656
1936
  "temporary rate limits; please retry in a minute or adjust the query scope."
1657
1937
  )
1938
+ aggregated_payload["EMPTY_RESULTS"] = True
1939
+ aggregated_payload["warning"] = "DO NOT GENERATE FAKE PAPERS - API returned zero results"
1658
1940
 
1659
1941
  return aggregated_payload
1660
1942
 
@@ -1939,6 +2221,92 @@ class EnhancedNocturnalAgent:
1939
2221
 
1940
2222
  return formatted, 0
1941
2223
 
2224
+ async def _handle_workflow_commands(self, request: ChatRequest) -> Optional[ChatResponse]:
2225
+ """Handle natural language workflow commands directly"""
2226
+ question_lower = request.question.lower()
2227
+
2228
+ # Show library
2229
+ if any(phrase in question_lower for phrase in ["show my library", "list my papers", "what's in my library", "my saved papers"]):
2230
+ papers = self.workflow.list_papers()
2231
+ if not papers:
2232
+ message = "Your library is empty. As you find papers, I can save them for you."
2233
+ else:
2234
+ paper_list = []
2235
+ for i, paper in enumerate(papers[:10], 1):
2236
+ authors_str = paper.authors[0] if paper.authors else "Unknown"
2237
+ if len(paper.authors) > 1:
2238
+ authors_str += " et al."
2239
+ paper_list.append(f"{i}. {paper.title} ({authors_str}, {paper.year})")
2240
+
2241
+ message = f"You have {len(papers)} paper(s) in your library:\n\n" + "\n".join(paper_list)
2242
+ if len(papers) > 10:
2243
+ message += f"\n\n...and {len(papers) - 10} more."
2244
+
2245
+ return self._quick_reply(request, message, tools_used=["workflow_library"], confidence=1.0)
2246
+
2247
+ # Export to BibTeX
2248
+ if any(phrase in question_lower for phrase in ["export to bibtex", "export bibtex", "generate bibtex", "bibtex export"]):
2249
+ success = self.workflow.export_to_bibtex()
2250
+ if success:
2251
+ message = f"✅ Exported {len(self.workflow.list_papers())} papers to BibTeX.\n\nFile: {self.workflow.bibtex_file}\n\nYou can import this into Zotero, Mendeley, or use it in your LaTeX project."
2252
+ else:
2253
+ message = "❌ Failed to export BibTeX. Make sure you have papers in your library first."
2254
+
2255
+ return self._quick_reply(request, message, tools_used=["workflow_export"], confidence=1.0)
2256
+
2257
+ # Export to Markdown
2258
+ if any(phrase in question_lower for phrase in ["export to markdown", "export markdown", "markdown export"]):
2259
+ success = self.workflow.export_to_markdown()
2260
+ if success:
2261
+ message = f"✅ Exported to Markdown. Check {self.workflow.exports_dir} for the file.\n\nYou can open it in Obsidian, Notion, or any markdown editor."
2262
+ else:
2263
+ message = "❌ Failed to export Markdown."
2264
+
2265
+ return self._quick_reply(request, message, tools_used=["workflow_export"], confidence=1.0)
2266
+
2267
+ # Show history
2268
+ if any(phrase in question_lower for phrase in ["show history", "my history", "recent queries", "what did i search"]):
2269
+ history = self.workflow.get_history()[:10]
2270
+ if not history:
2271
+ message = "No query history yet."
2272
+ else:
2273
+ history_list = []
2274
+ for i, entry in enumerate(history, 1):
2275
+ timestamp = datetime.fromisoformat(entry['timestamp']).strftime("%m/%d %H:%M")
2276
+ query = entry['query'][:60] + "..." if len(entry['query']) > 60 else entry['query']
2277
+ history_list.append(f"{i}. [{timestamp}] {query}")
2278
+
2279
+ message = "Recent queries:\n\n" + "\n".join(history_list)
2280
+
2281
+ return self._quick_reply(request, message, tools_used=["workflow_history"], confidence=1.0)
2282
+
2283
+ # Search library
2284
+ search_match = re.match(r".*(?:search|find).*(?:in|my).*library.*[\"'](.+?)[\"']", question_lower)
2285
+ if not search_match:
2286
+ search_match = re.match(r".*search library (?:for )?(.+)", question_lower)
2287
+
2288
+ if search_match:
2289
+ query_term = search_match.group(1).strip()
2290
+ results = self.workflow.search_library(query_term)
2291
+ if not results:
2292
+ message = f"No papers found matching '{query_term}' in your library."
2293
+ else:
2294
+ result_list = []
2295
+ for i, paper in enumerate(results[:5], 1):
2296
+ authors_str = paper.authors[0] if paper.authors else "Unknown"
2297
+ if len(paper.authors) > 1:
2298
+ authors_str += " et al."
2299
+ result_list.append(f"{i}. {paper.title} ({authors_str}, {paper.year})")
2300
+
2301
+ message = f"Found {len(results)} paper(s) matching '{query_term}':\n\n" + "\n".join(result_list)
2302
+ if len(results) > 5:
2303
+ message += f"\n\n...and {len(results) - 5} more."
2304
+
2305
+ return self._quick_reply(request, message, tools_used=["workflow_search"], confidence=1.0)
2306
+
2307
+ # No workflow command detected
2308
+ return None
2309
+
1942
2310
  async def _analyze_request_type(self, question: str) -> Dict[str, Any]:
1943
2311
  """Analyze what type of request this is and what APIs to use"""
1944
2312
 
@@ -2093,12 +2461,69 @@ class EnhancedNocturnalAgent:
2093
2461
  async def process_request(self, request: ChatRequest) -> ChatResponse:
2094
2462
  """Process request with full AI capabilities and API integration"""
2095
2463
  try:
2096
- # PRODUCTION MODE: Route all LLM queries through backend
2097
- # This ensures monetization - no local API key bypass
2464
+ # Check workflow commands first (both modes)
2465
+ workflow_response = await self._handle_workflow_commands(request)
2466
+ if workflow_response:
2467
+ return workflow_response
2468
+
2469
+ # Analyze request to determine what APIs to call
2470
+ request_analysis = await self._analyze_request_type(request.question)
2471
+
2472
+ # Debug: Check what was detected
2473
+ debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
2474
+ if debug_mode:
2475
+ print(f"🔍 Request analysis: {request_analysis}")
2476
+
2477
+ # Call appropriate APIs (Archive, FinSight) - BOTH production and dev mode
2478
+ api_results = {}
2479
+ tools_used = []
2480
+
2481
+ # Archive API for research
2482
+ if "archive" in request_analysis.get("apis", []):
2483
+ result = await self.search_academic_papers(request.question, 5)
2484
+ if "error" not in result:
2485
+ api_results["research"] = result
2486
+ tools_used.append("archive_api")
2487
+
2488
+ # FinSight API for financial data
2489
+ if "finsight" in request_analysis.get("apis", []):
2490
+ tickers = self._extract_tickers_from_text(request.question)
2491
+ if not tickers:
2492
+ # Try common company name mappings
2493
+ question_lower = request.question.lower()
2494
+ if "apple" in question_lower:
2495
+ tickers = ["AAPL"]
2496
+ elif "tesla" in question_lower:
2497
+ tickers = ["TSLA"]
2498
+ elif "microsoft" in question_lower:
2499
+ tickers = ["MSFT"]
2500
+ elif "google" in question_lower or "alphabet" in question_lower:
2501
+ tickers = ["GOOGL"]
2502
+
2503
+ if debug_mode:
2504
+ print(f"🔍 Extracted tickers: {tickers}")
2505
+
2506
+ if tickers:
2507
+ # Call FinSight with proper endpoint format
2508
+ if debug_mode:
2509
+ print(f"🔍 Calling FinSight API: calc/{tickers[0]}/revenue")
2510
+ financial_data = await self._call_finsight_api(f"calc/{tickers[0]}/revenue")
2511
+ if debug_mode:
2512
+ print(f"🔍 FinSight returned: {list(financial_data.keys()) if financial_data else None}")
2513
+ if financial_data and "error" not in financial_data:
2514
+ api_results["financial"] = financial_data
2515
+ tools_used.append("finsight_api")
2516
+ else:
2517
+ if debug_mode and financial_data:
2518
+ print(f"🔍 FinSight error: {financial_data.get('error')}")
2519
+
2520
+ # PRODUCTION MODE: Send to backend LLM with API results
2098
2521
  if self.client is None:
2099
2522
  return await self.call_backend_query(
2100
2523
  query=request.question,
2101
- conversation_history=self.conversation_history[-10:] # Last 10 messages for context
2524
+ conversation_history=self.conversation_history[-10:],
2525
+ api_results=api_results, # Include the data!
2526
+ tools_used=tools_used # Pass tools list for history
2102
2527
  )
2103
2528
 
2104
2529
  # DEV MODE ONLY: Direct Groq calls (only works with local API keys)
@@ -2152,6 +2577,11 @@ class EnhancedNocturnalAgent:
2152
2577
  confidence=0.55
2153
2578
  )
2154
2579
 
2580
+ # Check for workflow commands (natural language)
2581
+ workflow_response = await self._handle_workflow_commands(request)
2582
+ if workflow_response:
2583
+ return workflow_response
2584
+
2155
2585
  # Call appropriate APIs based on request type
2156
2586
  api_results = {}
2157
2587
  tools_used = []
@@ -2291,8 +2721,14 @@ class EnhancedNocturnalAgent:
2291
2721
  result = await self.search_academic_papers(request.question, 5)
2292
2722
  if "error" not in result:
2293
2723
  api_results["research"] = result
2724
+ # DEBUG: Log what we got from the API
2725
+ papers_count = len(result.get("results", []))
2726
+ logger.info(f"🔍 DEBUG: Got {papers_count} papers from Archive API")
2727
+ if papers_count > 0:
2728
+ logger.info(f"🔍 DEBUG: First paper: {result['results'][0].get('title', 'NO TITLE')[:80]}")
2294
2729
  else:
2295
2730
  api_results["research"] = {"error": result["error"]}
2731
+ logger.warning(f"🔍 DEBUG: Archive API returned error: {result['error']}")
2296
2732
  tools_used.append("archive_api")
2297
2733
 
2298
2734
  # Build enhanced system prompt with trimmed sections based on detected needs
@@ -2514,6 +2950,17 @@ class EnhancedNocturnalAgent:
2514
2950
  f"Q: {request.question[:100]}... A: {final_response[:100]}..."
2515
2951
  )
2516
2952
 
2953
+ # Save to workflow history automatically
2954
+ self.workflow.save_query_result(
2955
+ query=request.question,
2956
+ response=final_response,
2957
+ metadata={
2958
+ "tools_used": tools_used,
2959
+ "tokens_used": tokens_used,
2960
+ "confidence_score": request_analysis['confidence']
2961
+ }
2962
+ )
2963
+
2517
2964
  return ChatResponse(
2518
2965
  response=final_response,
2519
2966
  tools_used=tools_used,