cite-agent 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cite-agent might be problematic. Click here for more details.

Files changed (42) hide show
  1. cite_agent/__init__.py +1 -1
  2. cite_agent/account_client.py +19 -46
  3. cite_agent/agent_backend_only.py +30 -4
  4. cite_agent/cli.py +24 -26
  5. cite_agent/cli_conversational.py +294 -0
  6. cite_agent/enhanced_ai_agent.py +2776 -118
  7. cite_agent/setup_config.py +5 -21
  8. cite_agent/streaming_ui.py +252 -0
  9. {cite_agent-1.0.4.dist-info → cite_agent-1.0.5.dist-info}/METADATA +4 -3
  10. cite_agent-1.0.5.dist-info/RECORD +50 -0
  11. {cite_agent-1.0.4.dist-info → cite_agent-1.0.5.dist-info}/top_level.txt +1 -0
  12. src/__init__.py +1 -0
  13. src/services/__init__.py +132 -0
  14. src/services/auth_service/__init__.py +3 -0
  15. src/services/auth_service/auth_manager.py +33 -0
  16. src/services/graph/__init__.py +1 -0
  17. src/services/graph/knowledge_graph.py +194 -0
  18. src/services/llm_service/__init__.py +5 -0
  19. src/services/llm_service/llm_manager.py +495 -0
  20. src/services/paper_service/__init__.py +5 -0
  21. src/services/paper_service/openalex.py +231 -0
  22. src/services/performance_service/__init__.py +1 -0
  23. src/services/performance_service/rust_performance.py +395 -0
  24. src/services/research_service/__init__.py +23 -0
  25. src/services/research_service/chatbot.py +2056 -0
  26. src/services/research_service/citation_manager.py +436 -0
  27. src/services/research_service/context_manager.py +1441 -0
  28. src/services/research_service/conversation_manager.py +597 -0
  29. src/services/research_service/critical_paper_detector.py +577 -0
  30. src/services/research_service/enhanced_research.py +121 -0
  31. src/services/research_service/enhanced_synthesizer.py +375 -0
  32. src/services/research_service/query_generator.py +777 -0
  33. src/services/research_service/synthesizer.py +1273 -0
  34. src/services/search_service/__init__.py +5 -0
  35. src/services/search_service/indexer.py +186 -0
  36. src/services/search_service/search_engine.py +342 -0
  37. src/services/simple_enhanced_main.py +287 -0
  38. cite_agent/__distribution__.py +0 -7
  39. cite_agent-1.0.4.dist-info/RECORD +0 -23
  40. {cite_agent-1.0.4.dist-info → cite_agent-1.0.5.dist-info}/WHEEL +0 -0
  41. {cite_agent-1.0.4.dist-info → cite_agent-1.0.5.dist-info}/entry_points.txt +0 -0
  42. {cite_agent-1.0.4.dist-info → cite_agent-1.0.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,172 +1,2830 @@
1
+ #!/usr/bin/env python3
1
2
  """
2
- Backend-Only Agent (Distribution Version)
3
- All LLM queries go through centralized backend API.
4
- Local API keys are not supported.
3
+ Enhanced Nocturnal AI Agent - Production-Ready Research Assistant
4
+ Integrates with Archive API and FinSight API for comprehensive research capabilities
5
5
  """
6
6
 
7
+ import asyncio
8
+ import hashlib
9
+ import json
10
+ import logging
7
11
  import os
8
- import requests
9
- from typing import Dict, Any, Optional
10
- from dataclasses import dataclass
12
+ import re
13
+ import shlex
14
+ import subprocess
15
+ import time
16
+ from importlib import resources
17
+
18
+ import aiohttp
11
19
  from datetime import datetime, timezone
20
+ from typing import Dict, Any, List, Optional, Tuple
21
+ from urllib.parse import urlparse
22
+ from dataclasses import dataclass, field
23
+ from pathlib import Path
24
+
25
+ from .telemetry import TelemetryManager
26
+ from .setup_config import DEFAULT_QUERY_LIMIT
27
+
28
+ # Suppress noise
29
+ logging.basicConfig(level=logging.ERROR)
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Removed: No direct Groq import in production
33
+ # All LLM calls go through backend API for monetization
34
+ # Backend has the API keys, not the client
12
35
 
13
36
  @dataclass
14
37
  class ChatRequest:
15
38
  question: str
16
39
  user_id: str = "default"
17
40
  conversation_id: str = "default"
18
- context: Dict[str, Any] = None
41
+ context: Dict[str, Any] = field(default_factory=dict)
42
+
19
43
 
20
44
  @dataclass
21
45
  class ChatResponse:
22
46
  response: str
23
- citations: list = None
24
- tools_used: list = None
25
- model: str = "backend"
26
- timestamp: str = None
27
-
28
- def __post_init__(self):
29
- if self.timestamp is None:
30
- self.timestamp = datetime.now(timezone.utc).isoformat()
31
- if self.citations is None:
32
- self.citations = []
33
- if self.tools_used is None:
34
- self.tools_used = []
47
+ tools_used: List[str] = field(default_factory=list)
48
+ reasoning_steps: List[str] = field(default_factory=list)
49
+ model: str = "enhanced-nocturnal-agent"
50
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
51
+ tokens_used: int = 0
52
+ confidence_score: float = 0.0
53
+ execution_results: Dict[str, Any] = field(default_factory=dict)
54
+ api_results: Dict[str, Any] = field(default_factory=dict)
55
+ error_message: Optional[str] = None
35
56
 
36
57
  class EnhancedNocturnalAgent:
37
58
  """
38
- Backend-only agent for distribution.
39
- Proxies all requests to centralized API.
59
+ Enhanced AI Agent with full API integration:
60
+ - Archive API for academic research
61
+ - FinSight API for financial data
62
+ - Shell access for system operations
63
+ - Memory system for context retention
40
64
  """
41
-
65
+
42
66
  def __init__(self):
43
- self.backend_url = (
44
- os.getenv("NOCTURNAL_CONTROL_PLANE_URL")
45
- or "https://cite-agent-api-720dfadd602c.herokuapp.com"
46
- )
47
- self.auth_token = None
48
- self._load_auth()
67
+ self.client = None
68
+ self.conversation_history = []
69
+ self.shell_session = None
70
+ self.memory = {}
71
+ self.daily_token_usage = 0
72
+ self.daily_limit = 100000
73
+ self.daily_query_limit = self._resolve_daily_query_limit()
74
+ self.per_user_query_limit = self.daily_query_limit
75
+ self.daily_query_count = 0
76
+ self.total_cost = 0.0
77
+ self.cost_per_1k_tokens = 0.0001 # Groq pricing estimate
78
+ self._auto_update_enabled = True
79
+ try:
80
+ self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
81
+ except (TypeError, ValueError):
82
+ self.per_user_token_limit = 50000 # 50 queries at ~1000 tokens each
83
+ self.user_token_usage: Dict[str, int] = {}
84
+ self.user_query_counts: Dict[str, int] = {}
85
+ self._usage_day = datetime.now(timezone.utc).strftime("%Y-%m-%d")
86
+ self._initialized = False
87
+ self._env_loaded = False
88
+ self._init_lock: Optional[asyncio.Lock] = None
89
+ self._default_headers: Dict[str, str] = {}
49
90
 
50
- def _load_auth(self):
51
- """Load authentication token from config"""
52
- # Try environment first
53
- self.auth_token = os.getenv("NOCTURNAL_AUTH_TOKEN")
91
+ # API clients
92
+ self.archive_client = None
93
+ self.finsight_client = None
94
+ self.session = None
95
+ self.company_name_to_ticker = {}
54
96
 
55
- # Try config file
56
- if not self.auth_token:
57
- from pathlib import Path
58
- config_file = Path.home() / ".nocturnal_archive" / "config.env"
59
- if config_file.exists():
60
- with open(config_file) as f:
61
- for line in f:
62
- if line.startswith("NOCTURNAL_AUTH_TOKEN="):
63
- self.auth_token = line.split("=", 1)[1].strip()
64
- break
97
+ # Groq key rotation state
98
+ self.api_keys: List[str] = []
99
+ self.current_key_index: int = 0
100
+ self.current_api_key: Optional[str] = None
101
+ self.exhausted_keys: Dict[str, float] = {}
102
+ try:
103
+ self.key_recheck_seconds = float(
104
+ os.getenv("GROQ_KEY_RECHECK_SECONDS", 3600)
105
+ )
106
+ except Exception:
107
+ self.key_recheck_seconds = 3600.0
108
+
109
+ self._service_roots: List[str] = []
110
+ self._backend_health_cache: Dict[str, Dict[str, Any]] = {}
111
+ try:
112
+ self._health_ttl = float(os.getenv("NOCTURNAL_HEALTH_TTL", 30))
113
+ except Exception:
114
+ self._health_ttl = 30.0
115
+ self._recent_sources: List[Dict[str, Any]] = []
116
+ self._session_topics: Dict[str, Dict[str, Any]] = {}
65
117
 
66
- async def initialize(self):
67
- """Initialize agent"""
68
- if not self.auth_token:
69
- raise RuntimeError(
70
- "Not authenticated. Please run 'cite-agent --setup' first."
118
+ # Initialize API clients
119
+ self._init_api_clients()
120
+ self._load_ticker_map()
121
+
122
+ def get_usage_stats(self) -> Dict[str, Any]:
123
+ """Get current usage statistics and cost information"""
124
+ limit = self.daily_limit if self.daily_limit > 0 else 1
125
+ remaining = max(self.daily_limit - self.daily_token_usage, 0)
126
+ usage_percentage = (self.daily_token_usage / limit) * 100 if limit else 0.0
127
+ return {
128
+ "daily_tokens_used": self.daily_token_usage,
129
+ "daily_token_limit": self.daily_limit,
130
+ "remaining_tokens": remaining,
131
+ "usage_percentage": usage_percentage,
132
+ "total_cost": self.total_cost,
133
+ "cost_per_1k_tokens": self.cost_per_1k_tokens,
134
+ "estimated_monthly_cost": self.total_cost * 30, # Rough estimate
135
+ "per_user_token_limit": self.per_user_token_limit,
136
+ "daily_queries_used": self.daily_query_count,
137
+ "daily_query_limit": self.daily_query_limit,
138
+ "per_user_query_limit": self.per_user_query_limit,
139
+ }
140
+
141
+ async def close(self):
142
+ """Cleanly close resources (HTTP session and shell)."""
143
+ lock = self._get_init_lock()
144
+ async with lock:
145
+ await self._close_resources()
146
+
147
+ async def _close_resources(self):
148
+ try:
149
+ if self.session and not self.session.closed:
150
+ await self.session.close()
151
+ except Exception:
152
+ pass
153
+ finally:
154
+ self.session = None
155
+
156
+ try:
157
+ if self.shell_session:
158
+ self.shell_session.terminate()
159
+ except Exception:
160
+ pass
161
+ finally:
162
+ self.shell_session = None
163
+
164
+ self.client = None
165
+ self.current_api_key = None
166
+ self.current_key_index = 0
167
+ self._initialized = False
168
+ self.exhausted_keys.clear()
169
+
170
+ def _init_api_clients(self):
171
+ """Initialize API clients for Archive and FinSight"""
172
+ try:
173
+ def _normalize_base(value: Optional[str], fallback: str) -> str:
174
+ candidate = (value or fallback).strip()
175
+ return candidate[:-1] if candidate.endswith('/') else candidate
176
+
177
+ archive_env = (
178
+ os.getenv("ARCHIVE_API_URL")
179
+ or os.getenv("NOCTURNAL_ARCHIVE_API_URL")
180
+ )
181
+ finsight_env = (
182
+ os.getenv("FINSIGHT_API_URL")
183
+ or os.getenv("NOCTURNAL_FINSIGHT_API_URL")
71
184
  )
72
- print(f"✅ Connected to backend: {self.backend_url}")
73
185
 
74
- async def chat(self, request: ChatRequest) -> ChatResponse:
75
- """
76
- Send chat request to backend API.
186
+ # Archive API client
187
+ self.archive_base_url = _normalize_base(archive_env, "http://127.0.0.1:8000/api")
77
188
 
78
- Args:
79
- request: Chat request with question and context
189
+ # FinSight API client
190
+ self.finsight_base_url = _normalize_base(finsight_env, "http://127.0.0.1:8000/v1/finance")
80
191
 
81
- Returns:
82
- Chat response with answer and citations
192
+ # Workspace Files API client
193
+ files_env = os.getenv("FILES_API_URL")
194
+ self.files_base_url = _normalize_base(files_env, "http://127.0.0.1:8000/v1/files")
83
195
 
84
- Raises:
85
- RuntimeError: If authentication fails or backend unavailable
86
- """
87
- if not self.auth_token:
88
- raise RuntimeError(
89
- "Not authenticated. Run 'cite-agent --setup' first."
196
+ # Shared API key handling for protected routes
197
+ self.api_key = (
198
+ os.getenv("NOCTURNAL_KEY")
199
+ or os.getenv("NOCTURNAL_API_KEY")
200
+ or os.getenv("X_API_KEY")
201
+ or "demo-key-123"
202
+ )
203
+ self._default_headers.clear()
204
+ if self.api_key:
205
+ self._default_headers["X-API-Key"] = self.api_key
206
+ if self.api_key == "demo-key-123":
207
+ print("⚠️ Using demo API key. Set NOCTURNAL_KEY for production usage.")
208
+ else:
209
+ print("⚠️ No API key configured for Nocturnal Archive API calls")
210
+
211
+ self._update_service_roots()
212
+ print(f"✅ API clients initialized (Archive={self.archive_base_url}, FinSight={self.finsight_base_url})")
213
+
214
+ except Exception as e:
215
+ print(f"⚠️ API client initialization warning: {e}")
216
+
217
+ def _update_service_roots(self) -> None:
218
+ roots = set()
219
+ for base in (getattr(self, "archive_base_url", None), getattr(self, "finsight_base_url", None), getattr(self, "files_base_url", None)):
220
+ if not base:
221
+ continue
222
+ parsed = urlparse(base)
223
+ if parsed.scheme and parsed.netloc:
224
+ roots.add(f"{parsed.scheme}://{parsed.netloc}")
225
+
226
+ if not roots:
227
+ roots.add("http://127.0.0.1:8000")
228
+
229
+ self._service_roots = sorted(roots)
230
+ # Drop caches for roots that no longer exist
231
+ for cached in list(self._backend_health_cache.keys()):
232
+ if cached not in self._service_roots:
233
+ self._backend_health_cache.pop(cached, None)
234
+
235
+ async def _probe_health_endpoint(self, root: str) -> Tuple[bool, str]:
236
+ if not self.session:
237
+ return False, "HTTP session not initialized"
238
+
239
+ if not hasattr(self.session, "get"):
240
+ # Assume healthy when using lightweight mocks that lack GET semantics
241
+ return True, ""
242
+
243
+ candidates = ["/readyz", "/health", "/api/health", "/livez"]
244
+ last_detail = ""
245
+
246
+ for endpoint in candidates:
247
+ try:
248
+ async with self.session.get(f"{root}{endpoint}", timeout=5) as response:
249
+ if response.status == 200:
250
+ return True, ""
251
+ body = await response.text()
252
+ if response.status == 404:
253
+ # Endpoint absent—record detail but keep probing
254
+ last_detail = (
255
+ f"{endpoint} missing (404)."
256
+ if not body else f"{endpoint} missing (404): {body.strip()}"
257
+ )
258
+ continue
259
+ last_detail = (
260
+ f"{endpoint} returned {response.status}"
261
+ if not body else f"{endpoint} returned {response.status}: {body.strip()}"
262
+ )
263
+ except Exception as exc:
264
+ last_detail = f"{endpoint} failed: {exc}"
265
+
266
+ # Fall back to a lightweight root probe so services without explicit
267
+ # health endpoints don't register as offline.
268
+ try:
269
+ async with self.session.get(root, timeout=5) as response:
270
+ if response.status < 500:
271
+ fallback_detail = f"fallback probe returned {response.status}"
272
+ if response.status == 200:
273
+ detail = (f"{last_detail}; {fallback_detail}" if last_detail else "")
274
+ else:
275
+ detail = (
276
+ f"{last_detail}; {fallback_detail}"
277
+ if last_detail else f"Health endpoint unavailable; {fallback_detail}"
278
+ )
279
+ return True, detail
280
+ except Exception as exc: # pragma: no cover - network failure already captured above
281
+ last_detail = last_detail or f"Fallback probe failed: {exc}"
282
+
283
+ return False, last_detail or f"Health check failed for {root}"
284
+
285
+ async def _check_backend_health(self, force: bool = False) -> Dict[str, Any]:
286
+ now = time.monotonic()
287
+ overall_ok = True
288
+ details: List[str] = []
289
+
290
+ if not self._service_roots:
291
+ self._update_service_roots()
292
+
293
+ for root in self._service_roots:
294
+ cache = self._backend_health_cache.get(root)
295
+ if cache and not force and now - cache.get("timestamp", 0.0) < self._health_ttl:
296
+ if not cache.get("ok", False) and cache.get("detail"):
297
+ details.append(cache["detail"])
298
+ overall_ok = False
299
+ overall_ok = overall_ok and cache.get("ok", False)
300
+ continue
301
+
302
+ ok, detail = await self._probe_health_endpoint(root)
303
+ self._backend_health_cache[root] = {"ok": ok, "detail": detail, "timestamp": now}
304
+ if not ok and detail:
305
+ details.append(detail)
306
+ overall_ok = overall_ok and ok
307
+
308
+ return {"ok": overall_ok, "detail": "; ".join(details) if details else ""}
309
+
310
+ async def _ensure_backend_ready(self) -> Tuple[bool, str]:
311
+ status = await self._check_backend_health()
312
+ return status["ok"], status.get("detail", "")
313
+
314
+ def _record_data_source(self, service: str, endpoint: str, success: bool, detail: str = "") -> None:
315
+ entry = {
316
+ "service": service,
317
+ "endpoint": endpoint,
318
+ "success": success,
319
+ "detail": detail,
320
+ }
321
+ self._recent_sources.append(entry)
322
+ if len(self._recent_sources) > 10:
323
+ self._recent_sources = self._recent_sources[-10:]
324
+
325
+ def _format_data_sources_footer(self) -> str:
326
+ if not self._recent_sources:
327
+ return ""
328
+
329
+ snippets: List[str] = []
330
+ for item in self._recent_sources[:4]:
331
+ status = "ok" if item.get("success") else f"error ({item.get('detail')})" if item.get("detail") else "error"
332
+ snippets.append(f"{item.get('service')} {item.get('endpoint')} – {status}")
333
+ if len(self._recent_sources) > 4:
334
+ snippets.append("…")
335
+ return "Data sources: " + "; ".join(snippets)
336
+
337
+ def _reset_data_sources(self) -> None:
338
+ self._recent_sources = []
339
+
340
+ def _load_ticker_map(self):
341
+ """Load a simple company name -> ticker map for FinSight lookups."""
342
+ # Start with common aliases
343
+ mapping = {
344
+ "apple": "AAPL",
345
+ "microsoft": "MSFT",
346
+ "alphabet": "GOOGL",
347
+ "google": "GOOGL",
348
+ "amazon": "AMZN",
349
+ "nvidia": "NVDA",
350
+ "palantir": "PLTR",
351
+ "shopify": "SHOP",
352
+ "target": "TGT",
353
+ "amd": "AMD",
354
+ "tesla": "TSLA",
355
+ "meta": "META",
356
+ "netflix": "NFLX",
357
+ "goldman sachs": "GS",
358
+ "goldman": "GS",
359
+ "exxonmobil": "XOM",
360
+ "exxon": "XOM",
361
+ "jpmorgan": "JPM",
362
+ "square": "SQ"
363
+ }
364
+
365
+ def _augment_from_records(records: List[Dict[str, Any]]) -> None:
366
+ for item in records:
367
+ name = str(item.get("name", "")).lower()
368
+ symbol = item.get("symbol")
369
+ if name and symbol:
370
+ mapping.setdefault(name, symbol)
371
+ short = (
372
+ name.replace("inc.", "")
373
+ .replace("inc", "")
374
+ .replace("corporation", "")
375
+ .replace("corp.", "")
376
+ .strip()
377
+ )
378
+ if short and short != name:
379
+ mapping.setdefault(short, symbol)
380
+
381
+ try:
382
+ supplemental: List[Dict[str, Any]] = []
383
+
384
+ try:
385
+ package_resource = resources.files("nocturnal_archive.data").joinpath("company_tickers.json")
386
+ if package_resource.is_file():
387
+ supplemental = json.loads(package_resource.read_text(encoding="utf-8"))
388
+ except (FileNotFoundError, ModuleNotFoundError, AttributeError):
389
+ supplemental = []
390
+
391
+ if not supplemental:
392
+ candidate_paths = [
393
+ Path(__file__).resolve().parent / "data" / "company_tickers.json",
394
+ Path("./data/company_tickers.json"),
395
+ ]
396
+ for data_path in candidate_paths:
397
+ if data_path.exists():
398
+ supplemental = json.loads(data_path.read_text(encoding="utf-8"))
399
+ break
400
+
401
+ if supplemental:
402
+ _augment_from_records(supplemental)
403
+
404
+ override_candidates: List[Path] = []
405
+ override_env = os.getenv("NOCTURNAL_TICKER_MAP")
406
+ if override_env:
407
+ override_candidates.append(Path(override_env).expanduser())
408
+
409
+ default_override = Path.home() / ".nocturnal_archive" / "tickers.json"
410
+ override_candidates.append(default_override)
411
+
412
+ for override_path in override_candidates:
413
+ if not override_path or not override_path.exists():
414
+ continue
415
+ try:
416
+ override_records = json.loads(override_path.read_text(encoding="utf-8"))
417
+ if isinstance(override_records, list):
418
+ _augment_from_records(override_records)
419
+ except Exception as override_exc:
420
+ logger.warning(f"Failed to load ticker override from {override_path}: {override_exc}")
421
+ except Exception:
422
+ pass
423
+
424
+ self.company_name_to_ticker = mapping
425
+
426
+ def _ensure_environment_loaded(self):
427
+ if self._env_loaded:
428
+ return
429
+
430
+ try:
431
+ from .setup_config import NocturnalConfig
432
+
433
+ config = NocturnalConfig()
434
+ config.setup_environment()
435
+ except ImportError:
436
+ pass
437
+ except Exception as exc:
438
+ print(f"⚠️ Environment setup warning: {exc}")
439
+
440
+ try:
441
+ from dotenv import load_dotenv
442
+
443
+ load_dotenv('.env.local')
444
+ except ImportError:
445
+ print("⚠️ python-dotenv not installed, using system environment variables")
446
+ except Exception as exc:
447
+ print(f"⚠️ Could not load .env.local: {exc}")
448
+ finally:
449
+ self._env_loaded = True
450
+
451
+ def _get_init_lock(self) -> asyncio.Lock:
452
+ if self._init_lock is None:
453
+ self._init_lock = asyncio.Lock()
454
+ return self._init_lock
455
+
456
+ async def _get_workspace_listing(self, limit: int = 20) -> Dict[str, Any]:
457
+ params = {"path": ".", "limit": limit, "include_hidden": "false"}
458
+ result = await self._call_files_api("GET", "/", params=params)
459
+ if "error" not in result:
460
+ return result
461
+
462
+ fallback = self._fallback_workspace_listing(limit)
463
+ fallback["error"] = result["error"]
464
+ return fallback
465
+
466
+ def _fallback_workspace_listing(self, limit: int = 20) -> Dict[str, Any]:
467
+ base = Path.cwd().resolve()
468
+ items: List[Dict[str, str]] = []
469
+ try:
470
+ for entry in sorted(base.iterdir(), key=lambda e: e.name.lower()):
471
+ if entry.name.startswith('.'):
472
+ continue
473
+ item = {
474
+ "name": entry.name,
475
+ "type": "directory" if entry.is_dir() else "file"
476
+ }
477
+ items.append(item)
478
+ if len(items) >= limit:
479
+ break
480
+ except Exception as exc:
481
+ return {
482
+ "base": str(base),
483
+ "items": [],
484
+ "error": f"Unable to list workspace: {exc}"
485
+ }
486
+
487
+ return {
488
+ "base": str(base),
489
+ "items": items,
490
+ "note": "Showing up to first {limit} non-hidden entries.".format(limit=limit)
491
+ }
492
+
493
+ def _format_workspace_listing_response(self, listing: Dict[str, Any]) -> str:
494
+ base = listing.get("base", Path.cwd().resolve())
495
+ items = listing.get("items")
496
+ if not items:
497
+ items = listing.get("entries", []) or []
498
+ note = listing.get("note")
499
+ error = listing.get("error")
500
+ truncated_flag = listing.get("truncated")
501
+
502
+ if not items:
503
+ summary_lines = ["(no visible files in the current directory)"]
504
+ else:
505
+ max_entries = min(len(items), 12)
506
+ summary_lines = [
507
+ f"- {item.get('name')} ({item.get('type', 'unknown')})"
508
+ for item in items[:max_entries]
509
+ ]
510
+ if len(items) > max_entries:
511
+ remaining = len(items) - max_entries
512
+ summary_lines.append(f"… and {remaining} more")
513
+
514
+ message_parts = [
515
+ f"Workspace root: {base}",
516
+ "Here are the first entries I can see:",
517
+ "\n".join(summary_lines)
518
+ ]
519
+
520
+ if note:
521
+ message_parts.append(note)
522
+ if error:
523
+ message_parts.append(f"Workspace API warning: {error}")
524
+ if truncated_flag:
525
+ message_parts.append("(Listing truncated by workspace service)")
526
+
527
+ footer = self._format_data_sources_footer()
528
+ if footer:
529
+ message_parts.append(f"_{footer}_")
530
+
531
+ return "\n\n".join(part for part in message_parts if part)
532
+
533
+ def _respond_with_workspace_listing(self, request: ChatRequest, listing: Dict[str, Any]) -> ChatResponse:
534
+ message = self._format_workspace_listing_response(listing)
535
+
536
+ self.conversation_history.append({"role": "user", "content": request.question})
537
+ self.conversation_history.append({"role": "assistant", "content": message})
538
+ self._update_memory(request.user_id, request.conversation_id, f"Q: {request.question[:100]}... A: {message[:100]}...")
539
+
540
+ items = listing.get("items") or listing.get("entries") or []
541
+ success = "error" not in listing
542
+ self._emit_telemetry(
543
+ "workspace_listing",
544
+ request,
545
+ success=success,
546
+ extra={
547
+ "item_count": len(items),
548
+ "truncated": bool(listing.get("truncated")),
549
+ },
550
+ )
551
+
552
+ return ChatResponse(
553
+ response=message,
554
+ tools_used=["files_listing"],
555
+ reasoning_steps=["Direct workspace listing response"],
556
+ tokens_used=0,
557
+ confidence_score=0.7,
558
+ api_results={"workspace_listing": listing}
559
+ )
560
+
561
+ def _respond_with_shell_command(self, request: ChatRequest, command: str) -> ChatResponse:
562
+ command_stub = command.split()[0] if command else ""
563
+ if not self._is_safe_shell_command(command):
564
+ message = (
565
+ "I couldn't run that command because it violates the safety policy. "
566
+ "Please try a simpler shell command (no pipes, redirection, or file writes)."
567
+ )
568
+ tools = ["shell_blocked"]
569
+ execution_results = {"command": command, "output": "Command blocked by safety policy", "success": False}
570
+ telemetry_event = "shell_blocked"
571
+ success = False
572
+ output_len = 0
573
+ else:
574
+ output = self.execute_command(command)
575
+ truncated_output = output if len(output) <= 2000 else output[:2000] + "\n… (truncated)"
576
+ message = (
577
+ f"Running the command: `{command}`\n\n"
578
+ "Output:\n```\n"
579
+ f"{truncated_output}\n"
580
+ "```"
90
581
  )
582
+ tools = ["shell_execution"]
583
+ success = not output.startswith("ERROR:")
584
+ execution_results = {"command": command, "output": truncated_output, "success": success}
585
+ telemetry_event = "shell_execution"
586
+ output_len = len(truncated_output)
91
587
 
588
+ footer = self._format_data_sources_footer()
589
+ if footer:
590
+ message = f"{message}\n\n_{footer}_"
591
+
592
+ self.conversation_history.append({"role": "user", "content": request.question})
593
+ self.conversation_history.append({"role": "assistant", "content": message})
594
+ self._update_memory(
595
+ request.user_id,
596
+ request.conversation_id,
597
+ f"Q: {request.question[:100]}... A: {message[:100]}..."
598
+ )
599
+
600
+ self._emit_telemetry(
601
+ telemetry_event,
602
+ request,
603
+ success=success,
604
+ extra={
605
+ "command": command_stub,
606
+ "output_len": output_len,
607
+ },
608
+ )
609
+
610
+ return ChatResponse(
611
+ response=message,
612
+ tools_used=tools,
613
+ reasoning_steps=["Direct shell execution"],
614
+ tokens_used=0,
615
+ confidence_score=0.75 if tools == ["shell_execution"] else 0.4,
616
+ execution_results=execution_results
617
+ )
618
+ def _format_currency_value(self, value: float) -> str:
92
619
  try:
93
- response = requests.post(
94
- f"{self.backend_url}/api/query",
95
- headers={
96
- "Authorization": f"Bearer {self.auth_token}",
97
- "Content-Type": "application/json"
98
- },
99
- json={
100
- "query": request.question,
101
- "context": request.context or {},
102
- "user_id": request.user_id,
103
- "conversation_id": request.conversation_id,
104
- },
105
- timeout=60
620
+ abs_val = abs(value)
621
+ if abs_val >= 1e12:
622
+ return f"${value / 1e12:.2f} trillion"
623
+ if abs_val >= 1e9:
624
+ return f"${value / 1e9:.2f} billion"
625
+ if abs_val >= 1e6:
626
+ return f"${value / 1e6:.2f} million"
627
+ return f"${value:,.2f}"
628
+ except Exception:
629
+ return str(value)
630
+
631
+ def _respond_with_financial_metrics(self, request: ChatRequest, payload: Dict[str, Any]) -> ChatResponse:
632
+ ticker, metrics = next(iter(payload.items()))
633
+ headline = [f"{ticker} key metrics:"]
634
+ citations: List[str] = []
635
+
636
+ for metric_name, metric_data in metrics.items():
637
+ if not isinstance(metric_data, dict):
638
+ continue
639
+ value = metric_data.get("value")
640
+ if value is None:
641
+ inner_inputs = metric_data.get("inputs", {})
642
+ entry = inner_inputs.get(metric_name) or next(iter(inner_inputs.values()), {})
643
+ value = entry.get("value")
644
+ formatted_value = self._format_currency_value(value) if value is not None else "(value unavailable)"
645
+ period = metric_data.get("period")
646
+ if not period or (isinstance(period, str) and period.lower().startswith("latest")):
647
+ inner_inputs = metric_data.get("inputs", {})
648
+ entry = inner_inputs.get(metric_name) or next(iter(inner_inputs.values()), {})
649
+ period = entry.get("period")
650
+ sources = metric_data.get("citations") or []
651
+ if sources:
652
+ source_url = sources[0].get("source_url")
653
+ if source_url:
654
+ citations.append(source_url)
655
+ label = metric_name.replace("Gross", "Gross ").replace("Income", " Income").replace("Net", "Net ")
656
+ label = label.replace("operating", "operating ").replace("Ratio", " Ratio").title()
657
+ if period:
658
+ headline.append(f"• {label}: {formatted_value} (as of {period})")
659
+ else:
660
+ headline.append(f"• {label}: {formatted_value}")
661
+
662
+ unique_citations = []
663
+ for c in citations:
664
+ if c not in unique_citations:
665
+ unique_citations.append(c)
666
+
667
+ message_parts = ["\n".join(headline)]
668
+ if unique_citations:
669
+ message_parts.append("Sources:\n" + "\n".join(unique_citations))
670
+
671
+ footer = self._format_data_sources_footer()
672
+ if footer:
673
+ message_parts.append(f"_{footer}_")
674
+
675
+ message = "\n\n".join(message_parts)
676
+
677
+ self.conversation_history.append({"role": "user", "content": request.question})
678
+ self.conversation_history.append({"role": "assistant", "content": message})
679
+ self._update_memory(
680
+ request.user_id,
681
+ request.conversation_id,
682
+ f"Q: {request.question[:100]}... A: {message[:100]}..."
683
+ )
684
+
685
+ self._emit_telemetry(
686
+ "financial_metrics",
687
+ request,
688
+ success=True,
689
+ extra={
690
+ "ticker": ticker,
691
+ "metric_count": len(metrics),
692
+ },
693
+ )
694
+
695
+ return ChatResponse(
696
+ response=message,
697
+ tools_used=["finsight_api"],
698
+ reasoning_steps=["Direct financial metrics response"],
699
+ tokens_used=0,
700
+ confidence_score=0.8,
701
+ api_results={"financial": payload}
702
+ )
703
+
704
+ def _local_file_preview(self, path_str: str) -> Optional[Dict[str, Any]]:
705
+ try:
706
+ p = Path(path_str)
707
+ if not p.exists():
708
+ return None
709
+ if p.is_dir():
710
+ entries = sorted([e.name for e in p.iterdir()][:10])
711
+ return {
712
+ "path": str(p),
713
+ "type": "directory",
714
+ "preview": "\n".join(entries),
715
+ "encoding": "utf-8",
716
+ "truncated": False,
717
+ "size": None,
718
+ }
719
+
720
+ stat_result = p.stat()
721
+ if p.suffix.lower() in {".pdf", ".png", ".jpg", ".jpeg", ".parquet", ".zip", ".gif"}:
722
+ return {
723
+ "path": str(p),
724
+ "type": "binary",
725
+ "preview": "(binary file preview skipped)",
726
+ "encoding": "binary",
727
+ "truncated": False,
728
+ "size": stat_result.st_size,
729
+ }
730
+
731
+ content = p.read_text(errors="ignore")
732
+ truncated = len(content) > 65536
733
+ snippet = content[:65536]
734
+ preview = "\n".join(snippet.splitlines()[:60])
735
+ return {
736
+ "path": str(p),
737
+ "type": "text",
738
+ "preview": preview,
739
+ "encoding": "utf-8",
740
+ "truncated": truncated,
741
+ "size": stat_result.st_size,
742
+ }
743
+ except Exception as exc:
744
+ return {
745
+ "path": path_str,
746
+ "type": "error",
747
+ "preview": f"error: {exc}",
748
+ "encoding": "utf-8",
749
+ "truncated": False,
750
+ "size": None,
751
+ }
752
+
753
+ async def _preview_file(self, path_str: str) -> Optional[Dict[str, Any]]:
754
+ params = {"path": path_str}
755
+ result = await self._call_files_api("GET", "/preview", params=params)
756
+ if "error" not in result:
757
+ encoding = result.get("encoding", "utf-8")
758
+ return {
759
+ "path": result.get("path", path_str),
760
+ "type": "text" if encoding == "utf-8" else "binary",
761
+ "preview": result.get("content", ""),
762
+ "encoding": encoding,
763
+ "truncated": bool(result.get("truncated", False)),
764
+ "size": result.get("size"),
765
+ }
766
+
767
+ message = result.get("error", "")
768
+ if message and "does not exist" in message.lower():
769
+ return None
770
+
771
+ fallback = self._local_file_preview(path_str)
772
+ if fallback:
773
+ fallback.setdefault("error", message)
774
+ return fallback
775
+ return {
776
+ "path": path_str,
777
+ "type": "error",
778
+ "preview": "",
779
+ "encoding": "utf-8",
780
+ "truncated": False,
781
+ "size": None,
782
+ "error": message,
783
+ }
784
+
785
+ async def __aenter__(self):
786
+ await self.initialize()
787
+ return self
788
+
789
+ async def __aexit__(self, exc_type, exc, tb):
790
+ await self.close()
791
+ return False
792
+
793
+ def _is_simple_greeting(self, text: str) -> bool:
794
+ greetings = {"hi", "hello", "hey", "hola", "howdy", "greetings"}
795
+ normalized = text.lower().strip()
796
+ return any(normalized.startswith(greet) for greet in greetings)
797
+
798
+ def _is_casual_acknowledgment(self, text: str) -> bool:
799
+ acknowledgments = {
800
+ "thanks",
801
+ "thank you",
802
+ "thx",
803
+ "ty",
804
+ "appreciate it",
805
+ "got it",
806
+ "cool",
807
+ "great",
808
+ "awesome"
809
+ }
810
+ normalized = text.lower().strip()
811
+ return any(normalized.startswith(ack) for ack in acknowledgments)
812
+
813
+ def _format_api_results_for_prompt(self, api_results: Dict[str, Any]) -> str:
814
+ if not api_results:
815
+ return "No API results yet."
816
+ try:
817
+ serialized = json.dumps(api_results, indent=2)
818
+ except Exception:
819
+ serialized = str(api_results)
820
+ max_len = 2000
821
+ if len(serialized) > max_len:
822
+ serialized = serialized[:max_len] + "\n... (truncated)"
823
+ return serialized
824
+
825
+ def _build_system_prompt(
826
+ self,
827
+ request_analysis: Dict[str, Any],
828
+ memory_context: str,
829
+ api_results: Dict[str, Any]
830
+ ) -> str:
831
+ sections: List[str] = []
832
+
833
+ # TRUTH-SEEKING CORE IDENTITY
834
+ # Adapt intro based on analysis mode
835
+ analysis_mode = request_analysis.get("analysis_mode", "quantitative")
836
+
837
+ if analysis_mode == "qualitative":
838
+ intro = (
839
+ "You are Nocturnal, a truth-seeking research AI specialized in QUALITATIVE ANALYSIS. "
840
+ "PRIMARY DIRECTIVE: Accuracy > Agreeableness. Quote verbatim, never paraphrase. "
841
+ "You analyze text, identify themes, extract quotes with context, and synthesize patterns. "
842
+ "You have direct access to academic sources and can perform thematic coding."
843
+ )
844
+ elif analysis_mode == "mixed":
845
+ intro = (
846
+ "You are Nocturnal, a truth-seeking research AI handling MIXED METHODS analysis. "
847
+ "PRIMARY DIRECTIVE: Accuracy > Agreeableness. "
848
+ "You work with both quantitative data (numbers, stats) and qualitative data (themes, quotes). "
849
+ "For numbers: calculate and cite. For text: quote verbatim and identify patterns. "
850
+ "You have access to production data sources and can write/execute code (Python, R, SQL)."
851
+ )
852
+ else: # quantitative
853
+ intro = (
854
+ "You are Nocturnal, a truth-seeking research and finance AI. "
855
+ "PRIMARY DIRECTIVE: Accuracy > Agreeableness. "
856
+ "You are a fact-checker and analyst, NOT a people-pleaser. "
857
+ "You have direct access to production-grade data sources and can write/execute code (Python, R, SQL)."
858
+ )
859
+
860
+ sections.append(intro)
861
+
862
+ apis = request_analysis.get("apis", [])
863
+ capability_lines: List[str] = []
864
+ if "archive" in apis:
865
+ capability_lines.append("• Archive Research API for academic search and synthesis")
866
+ if "finsight" in apis:
867
+ capability_lines.append("• FinSight Finance API for SEC-quality metrics and citations")
868
+ if "shell" in apis:
869
+ capability_lines.append("• Persistent shell session for system inspection and code execution")
870
+ if not capability_lines:
871
+ capability_lines.append("• Core reasoning, code generation (Python/R/SQL), memory recall")
872
+ sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
873
+
874
+ # ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
875
+ base_rules = [
876
+ "🚨 ANTI-APPEASEMENT: If user states something incorrect, CORRECT THEM immediately. Do not agree to be polite.",
877
+ "🚨 UNCERTAINTY: If you're uncertain, SAY SO explicitly. 'I don't know' is better than a wrong answer.",
878
+ "🚨 CONTRADICTIONS: If data contradicts user's assumption, SHOW THE CONTRADICTION clearly.",
879
+ "🚨 FUTURE PREDICTIONS: You CANNOT predict the future. For 'will X happen?' questions, emphasize uncertainty and multiple possible outcomes.",
880
+ "",
881
+ "📊 SOURCE GROUNDING: EVERY factual claim MUST cite a source (paper, SEC filing, or data file).",
882
+ "📊 NO FABRICATION: If API results are empty/ambiguous, explicitly state this limitation.",
883
+ "📊 NO EXTRAPOLATION: Never go beyond what sources directly state.",
884
+ "📊 PREDICTION CAUTION: When discussing trends, always state 'based on available data' and note uncertainty.",
885
+ "",
886
+ "✓ VERIFICATION: Cross-check against multiple sources when available.",
887
+ "✓ CONFLICTS: If sources conflict, present BOTH and explain the discrepancy.",
888
+ "✓ SHOW REASONING: 'According to [source], X is Y because...'",
889
+ ]
890
+
891
+ if analysis_mode == "qualitative":
892
+ qual_rules = [
893
+ "",
894
+ "📝 QUOTES: Extract EXACT quotes (verbatim), NEVER paraphrase. Use quotation marks.",
895
+ "📝 CONTEXT: Provide surrounding context for every quote (what came before/after).",
896
+ "📝 ATTRIBUTION: Cite source + page/line number: \"quote\" — Author (Year), p. X",
897
+ "📝 THEMES: Identify recurring patterns. Count frequency (\"mentioned 5 times across 3 sources\").",
898
+ "",
899
+ "🔍 INTERPRETATION: Distinguish between description (what text says) vs interpretation (what it means).",
900
+ "🔍 EVIDENCE: Support every theme with 2-3 representative quotes.",
901
+ "🔍 SATURATION: Note when patterns repeat (\"no new themes after source 4\").",
902
+ ]
903
+ rules = base_rules + qual_rules
904
+ elif analysis_mode == "mixed":
905
+ mixed_rules = [
906
+ "",
907
+ "📝 For QUALITATIVE: Extract exact quotes with context. Identify themes.",
908
+ "💻 For QUANTITATIVE: Calculate exact values, show code.",
909
+ "🔗 INTEGRATION: Connect numbers to narratives ('15% growth' + 'participants felt optimistic')."
910
+ ]
911
+ rules = base_rules + mixed_rules + [
912
+ "",
913
+ "💻 CODE: For data analysis, write and execute Python/R/SQL code. Show your work.",
914
+ "💻 CALCULATIONS: Don't estimate - calculate exact values and show the code.",
915
+ ]
916
+ else: # quantitative
917
+ quant_rules = [
918
+ "",
919
+ "💻 CODE: For data analysis, write and execute Python/R/SQL code. Show your work.",
920
+ "💻 CALCULATIONS: Don't estimate - calculate exact values and show the code.",
921
+ ]
922
+ rules = base_rules + quant_rules
923
+
924
+ rules.append("")
925
+ rules.append("Keep responses concise but complete. Quote exact text from sources when possible.")
926
+
927
+ sections.append("CRITICAL RULES:\n" + "\n".join(rules))
928
+
929
+ # CORRECTION EXAMPLES (adapt based on mode)
930
+ if analysis_mode == "qualitative":
931
+ examples = (
932
+ "EXAMPLE RESPONSES:\n"
933
+ "User: 'So participants felt happy about the change?'\n"
934
+ "You: '⚠️ Mixed. 3 participants expressed satisfaction: \"I welcomed the new policy\" (P2, line 45), "
935
+ "but 2 expressed concern: \"It felt rushed\" (P4, line 67). Theme: Ambivalence about pace.'\n\n"
936
+ "User: 'What's the main theme?'\n"
937
+ "You: 'THEME 1: Trust in leadership (8 mentions across 4 interviews)\n"
938
+ "\"I trust my manager to make the right call\" — Interview 2, Line 34\n"
939
+ "\"Leadership has been transparent\" — Interview 5, Line 89\n"
940
+ "[Context: Both quotes from questions about organizational changes]'"
106
941
  )
942
+ else:
943
+ examples = (
944
+ "EXAMPLE CORRECTIONS:\n"
945
+ "User: 'So revenue went up 50%?'\n"
946
+ "You: '❌ No. According to 10-K page 23, revenue increased 15%, not 50%. "
947
+ "You may be thinking of gross margin (30%→45%, a 15pp increase).'\n\n"
948
+ "User: 'What will the stock price be?'\n"
949
+ "You: '⚠️ Cannot predict future prices. I can show: historical trends, current fundamentals, analyst data (if in filings).'"
950
+ )
951
+
952
+ sections.append(examples)
953
+
954
+ if memory_context:
955
+ sections.append("CONTEXT:\n" + memory_context.strip())
956
+
957
+ sections.append(
958
+ "REQUEST ANALYSIS: "
959
+ f"type={request_analysis.get('type')}, "
960
+ f"apis={apis}, "
961
+ f"confidence={request_analysis.get('confidence')}"
962
+ )
963
+
964
+ sections.append("API RESULTS:\n" + self._format_api_results_for_prompt(api_results))
965
+
966
+ return "\n\n".join(sections)
967
+
968
+ def _quick_reply(
969
+ self,
970
+ request: ChatRequest,
971
+ message: str,
972
+ tools_used: Optional[List[str]] = None,
973
+ confidence: float = 0.6
974
+ ) -> ChatResponse:
975
+ tools = tools_used or []
976
+ self.conversation_history.append({"role": "user", "content": request.question})
977
+ self.conversation_history.append({"role": "assistant", "content": message})
978
+ self._update_memory(
979
+ request.user_id,
980
+ request.conversation_id,
981
+ f"Q: {request.question[:100]}... A: {message[:100]}..."
982
+ )
983
+ self._emit_telemetry(
984
+ "quick_reply",
985
+ request,
986
+ success=True,
987
+ extra={
988
+ "tools_used": tools,
989
+ },
990
+ )
991
+ return ChatResponse(
992
+ response=message,
993
+ tools_used=tools,
994
+ reasoning_steps=["Quick reply without LLM"],
995
+ timestamp=datetime.now().isoformat(),
996
+ tokens_used=0,
997
+ confidence_score=confidence,
998
+ execution_results={},
999
+ api_results={}
1000
+ )
1001
+
1002
+ def _select_model(
1003
+ self,
1004
+ request: ChatRequest,
1005
+ request_analysis: Dict[str, Any],
1006
+ api_results: Dict[str, Any]
1007
+ ) -> Dict[str, Any]:
1008
+ question = request.question.strip()
1009
+ apis = request_analysis.get("apis", [])
1010
+ use_light_model = False
1011
+
1012
+ if len(question) <= 180 and not api_results and not apis:
1013
+ use_light_model = True
1014
+ elif len(question) <= 220 and set(apis).issubset({"shell"}):
1015
+ use_light_model = True
1016
+ elif len(question.split()) <= 40 and request_analysis.get("type") in {"general", "system"} and not api_results:
1017
+ use_light_model = True
1018
+
1019
+ if use_light_model:
1020
+ return {
1021
+ "model": "llama-3.1-8b-instant",
1022
+ "max_tokens": 520,
1023
+ "temperature": 0.2
1024
+ }
1025
+
1026
+ return {
1027
+ "model": "llama-3.3-70b-versatile",
1028
+ "max_tokens": 900,
1029
+ "temperature": 0.3
1030
+ }
1031
+
1032
+ def _mark_current_key_exhausted(self, reason: str = "rate_limit"):
1033
+ if not self.api_keys:
1034
+ return
1035
+ key = self.api_keys[self.current_key_index]
1036
+ self.exhausted_keys[key] = time.time()
1037
+ logger.warning(f"Groq key index {self.current_key_index} marked exhausted ({reason})")
1038
+
1039
+ def _rotate_to_next_available_key(self) -> bool:
1040
+ if not self.api_keys:
1041
+ return False
1042
+
1043
+ attempts = 0
1044
+ total = len(self.api_keys)
1045
+ now = time.time()
1046
+
1047
+ while attempts < total:
1048
+ self.current_key_index = (self.current_key_index + 1) % total
1049
+ key = self.api_keys[self.current_key_index]
1050
+ exhausted_at = self.exhausted_keys.get(key)
1051
+ if exhausted_at:
1052
+ if now - exhausted_at >= self.key_recheck_seconds:
1053
+ del self.exhausted_keys[key]
1054
+ else:
1055
+ attempts += 1
1056
+ continue
1057
+ try:
1058
+ self.client = Groq(api_key=key)
1059
+ self.current_api_key = key
1060
+ return True
1061
+ except Exception as e:
1062
+ logger.error(f"Failed to initialize Groq client for rotated key: {e}")
1063
+ self.exhausted_keys[key] = now
1064
+ attempts += 1
1065
+ return False
1066
+
1067
+ def _ensure_client_ready(self) -> bool:
1068
+ if self.client and self.current_api_key:
1069
+ return True
1070
+
1071
+ if not self.api_keys:
1072
+ return False
1073
+
1074
+ total = len(self.api_keys)
1075
+ attempts = 0
1076
+ now = time.time()
1077
+
1078
+ while attempts < total:
1079
+ key = self.api_keys[self.current_key_index]
1080
+ exhausted_at = self.exhausted_keys.get(key)
1081
+ if exhausted_at and (now - exhausted_at) < self.key_recheck_seconds:
1082
+ attempts += 1
1083
+ self.current_key_index = (self.current_key_index + 1) % total
1084
+ continue
1085
+
1086
+ if exhausted_at and (now - exhausted_at) >= self.key_recheck_seconds:
1087
+ del self.exhausted_keys[key]
1088
+
1089
+ try:
1090
+ self.client = Groq(api_key=key)
1091
+ self.current_api_key = key
1092
+ return True
1093
+ except Exception as e:
1094
+ logger.error(f"Failed to initialize Groq client for key index {self.current_key_index}: {e}")
1095
+ self.exhausted_keys[key] = now
1096
+ attempts += 1
1097
+ self.current_key_index = (self.current_key_index + 1) % total
1098
+
1099
+ return False
1100
+
1101
+ def _schedule_next_key_rotation(self):
1102
+ if len(self.api_keys) <= 1:
1103
+ return
1104
+ self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
1105
+ self.current_api_key = None
1106
+ self.client = None
1107
+
1108
+ def _is_rate_limit_error(self, error: Exception) -> bool:
1109
+ message = str(error).lower()
1110
+ return "rate limit" in message or "429" in message
1111
+
1112
+ def _respond_with_fallback(
1113
+ self,
1114
+ request: ChatRequest,
1115
+ tools_used: List[str],
1116
+ api_results: Dict[str, Any],
1117
+ failure_reason: str,
1118
+ error_message: Optional[str] = None
1119
+ ) -> ChatResponse:
1120
+ tools = list(tools_used) if tools_used else []
1121
+ if "fallback" not in tools:
1122
+ tools.append("fallback")
107
1123
 
108
- if response.status_code == 401:
109
- raise RuntimeError(
110
- "Authentication expired. Run 'cite-agent --setup' to log in again."
1124
+ header = "⚠️ Temporary LLM downtime\n\n"
1125
+
1126
+ if self._is_simple_greeting(request.question):
1127
+ body = (
1128
+ "Hi there! I'm currently at my Groq capacity, so I can't craft a full narrative response just yet. "
1129
+ "You're welcome to try again in a little while, or I can still fetch finance and research data for you."
1130
+ )
1131
+ else:
1132
+ details: List[str] = []
1133
+
1134
+ financial = api_results.get("financial")
1135
+ if financial:
1136
+ payload_full = json.dumps(financial, indent=2)
1137
+ payload = payload_full[:1500]
1138
+ if len(payload_full) > 1500:
1139
+ payload += "\n…"
1140
+ details.append(f"**Finance API snapshot**\n```json\n{payload}\n```")
1141
+
1142
+ research = api_results.get("research")
1143
+ if research:
1144
+ payload_full = json.dumps(research, indent=2)
1145
+ payload = payload_full[:1500]
1146
+ if len(payload_full) > 1500:
1147
+ payload += "\n…"
1148
+ details.append(f"**Research API snapshot**\n```json\n{payload}\n```")
1149
+
1150
+ files_context = api_results.get("files_context")
1151
+ if files_context:
1152
+ preview = files_context[:600]
1153
+ if len(files_context) > 600:
1154
+ preview += "\n…"
1155
+ details.append(f"**File preview**\n{preview}")
1156
+
1157
+ if details:
1158
+ body = (
1159
+ "I pulled the structured data you asked for, but I'm temporarily out of Groq quota to synthesize a full answer. "
1160
+ "Here are the raw results so you can keep moving:"
1161
+ ) + "\n\n" + "\n\n".join(details)
1162
+ else:
1163
+ body = (
1164
+ "I'm temporarily out of Groq quota, so I can't compose a full answer. "
1165
+ "Please try again in a bit, or ask me to queue this work for later."
111
1166
  )
112
1167
 
113
- if response.status_code == 429:
114
- raise RuntimeError(
115
- "Daily quota exceeded (25,000 tokens). Resets tomorrow."
1168
+ footer = (
1169
+ "\n\nNext steps:\n"
1170
+ " Wait for the Groq daily quota to reset (usually within 24 hours).\n"
1171
+ "• Add another API key in your environment for automatic rotation.\n"
1172
+ "• Keep the conversation open—I’ll resume normal replies once capacity returns."
1173
+ )
1174
+
1175
+ message = header + body + footer
1176
+
1177
+ self.conversation_history.append({"role": "user", "content": request.question})
1178
+ self.conversation_history.append({"role": "assistant", "content": message})
1179
+ self._update_memory(
1180
+ request.user_id,
1181
+ request.conversation_id,
1182
+ f"Q: {request.question[:100]}... A: {message[:100]}..."
1183
+ )
1184
+
1185
+ self._emit_telemetry(
1186
+ "fallback_response",
1187
+ request,
1188
+ success=False,
1189
+ extra={
1190
+ "failure_reason": failure_reason,
1191
+ "has_financial_payload": bool(api_results.get("financial")),
1192
+ "has_research_payload": bool(api_results.get("research")),
1193
+ },
1194
+ )
1195
+
1196
+ return ChatResponse(
1197
+ response=message,
1198
+ tools_used=tools,
1199
+ reasoning_steps=["Fallback response activated"],
1200
+ timestamp=datetime.now().isoformat(),
1201
+ tokens_used=0,
1202
+ confidence_score=0.2,
1203
+ execution_results={},
1204
+ api_results=api_results,
1205
+ error_message=error_message or failure_reason
1206
+ )
1207
+
1208
+ def _extract_tickers_from_text(self, text: str) -> List[str]:
1209
+ """Find tickers either as explicit symbols or from known company names."""
1210
+ text_lower = text.lower()
1211
+ # Explicit ticker-like symbols
1212
+ ticker_candidates: List[str] = []
1213
+ for token in re.findall(r"\b[A-Z]{1,5}(?:\d{0,2})\b", text):
1214
+ ticker_candidates.append(token)
1215
+ # Company name matches
1216
+ for name, sym in self.company_name_to_ticker.items():
1217
+ if name and name in text_lower:
1218
+ ticker_candidates.append(sym)
1219
+ # Deduplicate preserve order
1220
+ seen = set()
1221
+ ordered: List[str] = []
1222
+ for t in ticker_candidates:
1223
+ if t not in seen:
1224
+ seen.add(t)
1225
+ ordered.append(t)
1226
+ return ordered[:4]
1227
+
1228
+ async def initialize(self, force_reload: bool = False):
1229
+ """Initialize the agent with API keys and shell session."""
1230
+ lock = self._get_init_lock()
1231
+ async with lock:
1232
+ if self._initialized and not force_reload:
1233
+ return True
1234
+
1235
+ if self._initialized and force_reload:
1236
+ await self._close_resources()
1237
+
1238
+ # Check for updates automatically (silent background check)
1239
+ self._check_updates_background()
1240
+ self._ensure_environment_loaded()
1241
+ self._init_api_clients()
1242
+
1243
+ # SECURITY FIX: No API keys on client!
1244
+ # All API calls go through our secure backend
1245
+ # This prevents key extraction and piracy
1246
+ # DISABLED for beta testing - set USE_LOCAL_KEYS=false to enable backend-only mode
1247
+
1248
+ use_local_keys = os.getenv("USE_LOCAL_KEYS", "true").lower() == "true"
1249
+
1250
+ if not use_local_keys:
1251
+ self.api_keys = [] # Empty - keys stay on server
1252
+ self.current_key_index = 0
1253
+ self.current_api_key = None
1254
+ self.client = None # Will use HTTP client instead
1255
+
1256
+ # Get backend API URL from config
1257
+ self.backend_api_url = os.getenv(
1258
+ "NOCTURNAL_API_URL",
1259
+ "https://api.nocturnal.dev/api" # Production default
116
1260
  )
117
1261
 
118
- if response.status_code >= 400:
119
- error_detail = response.json().get("detail", response.text)
120
- raise RuntimeError(f"Backend error: {error_detail}")
1262
+ # Get auth token from session (set by auth.py after login)
1263
+ from pathlib import Path
1264
+ session_file = Path.home() / ".nocturnal_archive" / "session.json"
1265
+ if session_file.exists():
1266
+ try:
1267
+ import json
1268
+ with open(session_file, 'r') as f:
1269
+ session_data = json.load(f)
1270
+ self.auth_token = session_data.get('access_token')
1271
+ self.user_id = session_data.get('user_id')
1272
+ except Exception:
1273
+ self.auth_token = None
1274
+ self.user_id = None
1275
+ else:
1276
+ self.auth_token = None
1277
+ self.user_id = None
121
1278
 
122
- data = response.json()
1279
+ if self.auth_token:
1280
+ print(f"✅ Enhanced Nocturnal Agent Ready! (Authenticated)")
1281
+ else:
1282
+ print("⚠️ Not authenticated. Please log in to use the agent.")
1283
+ else:
1284
+ # Local keys mode - load Groq API keys
1285
+ self.auth_token = None
1286
+ self.user_id = None
123
1287
 
1288
+ # Load Groq keys from environment
1289
+ self.api_keys = []
1290
+ for i in range(1, 10): # Check GROQ_API_KEY_1 through GROQ_API_KEY_9
1291
+ key = os.getenv(f"GROQ_API_KEY_{i}") or os.getenv(f"GROQ_API_KEY")
1292
+ if key and key not in self.api_keys:
1293
+ self.api_keys.append(key)
1294
+
1295
+ if not self.api_keys:
1296
+ print("⚠️ No Groq API keys found. Set GROQ_API_KEY_1, GROQ_API_KEY_2, etc.")
1297
+ else:
1298
+ print(f"✅ Loaded {len(self.api_keys)} Groq API key(s)")
1299
+ # Initialize first client
1300
+ try:
1301
+ from groq import Groq
1302
+ self.client = Groq(api_key=self.api_keys[0])
1303
+ self.current_api_key = self.api_keys[0]
1304
+ self.current_key_index = 0
1305
+ except Exception as e:
1306
+ print(f"⚠️ Failed to initialize Groq client: {e}")
1307
+
1308
+ if self.shell_session and self.shell_session.poll() is not None:
1309
+ self.shell_session = None
1310
+
1311
+ if self.shell_session is None:
1312
+ try:
1313
+ self.shell_session = subprocess.Popen(
1314
+ ['bash'],
1315
+ stdin=subprocess.PIPE,
1316
+ stdout=subprocess.PIPE,
1317
+ stderr=subprocess.STDOUT,
1318
+ text=True,
1319
+ cwd=os.getcwd()
1320
+ )
1321
+ except Exception as exc:
1322
+ print(f"⚠️ Unable to launch persistent shell session: {exc}")
1323
+ self.shell_session = None
1324
+
1325
+ if self.session is None or getattr(self.session, "closed", False):
1326
+ if self.session and not self.session.closed:
1327
+ await self.session.close()
1328
+ default_headers = dict(getattr(self, "_default_headers", {}))
1329
+ self.session = aiohttp.ClientSession(headers=default_headers)
1330
+
1331
+ self._initialized = True
1332
+ return True
1333
+
1334
+ def _check_updates_background(self):
1335
+ """Check for updates in background (silent, non-blocking)"""
1336
+ if not self._auto_update_enabled:
1337
+ return
1338
+
1339
+ import threading
1340
+
1341
+ def update_check():
1342
+ try:
1343
+ from .updater import NocturnalUpdater
1344
+ updater = NocturnalUpdater()
1345
+ update_info = updater.check_for_updates()
1346
+
1347
+ if update_info and update_info["available"]:
1348
+ # Silent update - no interruption
1349
+ updater.update_package()
1350
+
1351
+ except Exception:
1352
+ # Completely silent - don't interrupt user experience
1353
+ pass
1354
+
1355
+ # Run in background thread
1356
+ threading.Thread(target=update_check, daemon=True).start()
1357
+
1358
+ async def call_backend_query(self, query: str, conversation_history: Optional[List[Dict]] = None) -> ChatResponse:
1359
+ """
1360
+ Call backend /query endpoint instead of Groq directly
1361
+ This is the SECURE method - all API keys stay on server
1362
+ """
1363
+ if not self.auth_token:
1364
+ return ChatResponse(
1365
+ response="❌ Not authenticated. Please log in first.",
1366
+ error_message="Authentication required"
1367
+ )
1368
+
1369
+ if not self.session:
124
1370
  return ChatResponse(
125
- response=data.get("response", data.get("answer", "")),
126
- citations=data.get("citations", []),
127
- tools_used=data.get("tools_used", []),
128
- model=data.get("model", "backend"),
1371
+ response=" HTTP session not initialized",
1372
+ error_message="Session not initialized"
129
1373
  )
1374
+
1375
+ try:
1376
+ # Build request
1377
+ payload = {
1378
+ "query": query,
1379
+ "conversation_history": conversation_history or [],
1380
+ "model": "llama-3.3-70b-versatile",
1381
+ "temperature": 0.7,
1382
+ "max_tokens": 4000
1383
+ }
1384
+
1385
+ # Call backend
1386
+ headers = {
1387
+ "Authorization": f"Bearer {self.auth_token}",
1388
+ "Content-Type": "application/json"
1389
+ }
1390
+
1391
+ url = f"{self.backend_api_url}/query/"
1392
+
1393
+ async with self.session.post(url, json=payload, headers=headers, timeout=60) as response:
1394
+ if response.status == 401:
1395
+ return ChatResponse(
1396
+ response="❌ Authentication expired. Please log in again.",
1397
+ error_message="Authentication expired"
1398
+ )
1399
+
1400
+ elif response.status == 429:
1401
+ # Rate limit exceeded
1402
+ data = await response.json()
1403
+ detail = data.get('detail', {})
1404
+ tokens_remaining = detail.get('tokens_remaining', 0)
1405
+ return ChatResponse(
1406
+ response=f"❌ Daily token limit reached. You have {tokens_remaining} tokens remaining today. The limit resets tomorrow.",
1407
+ error_message="Rate limit exceeded",
1408
+ tokens_used=detail.get('tokens_used_today', 0)
1409
+ )
1410
+
1411
+ elif response.status == 200:
1412
+ data = await response.json()
1413
+ return ChatResponse(
1414
+ response=data.get('response', ''),
1415
+ tokens_used=data.get('tokens_used', 0),
1416
+ model=data.get('model', 'llama-3.3-70b-versatile'),
1417
+ timestamp=data.get('timestamp', datetime.now(timezone.utc).isoformat())
1418
+ )
1419
+
1420
+ else:
1421
+ error_text = await response.text()
1422
+ return ChatResponse(
1423
+ response=f"❌ Backend error (HTTP {response.status}): {error_text}",
1424
+ error_message=f"HTTP {response.status}"
1425
+ )
1426
+
1427
+ except asyncio.TimeoutError:
1428
+ return ChatResponse(
1429
+ response="❌ Request timeout. Please try again.",
1430
+ error_message="Timeout"
1431
+ )
1432
+ except Exception as e:
1433
+ return ChatResponse(
1434
+ response=f"❌ Error calling backend: {str(e)}",
1435
+ error_message=str(e)
1436
+ )
1437
+
1438
+ async def _call_files_api(
1439
+ self,
1440
+ method: str,
1441
+ endpoint: str,
1442
+ *,
1443
+ params: Optional[Dict[str, Any]] = None,
1444
+ json_body: Optional[Dict[str, Any]] = None,
1445
+ data: Any = None,
1446
+ ) -> Dict[str, Any]:
1447
+ if not self.session:
1448
+ return {"error": "HTTP session not initialized"}
130
1449
 
131
- except requests.RequestException as e:
132
- raise RuntimeError(
133
- f"Backend connection failed: {e}. Check your internet connection."
134
- ) from e
1450
+ ok, detail = await self._ensure_backend_ready()
1451
+ if not ok:
1452
+ self._record_data_source("Files", f"{method.upper()} {endpoint}", False, detail)
1453
+ return {"error": f"Workspace API unavailable: {detail or 'backend offline'}"}
135
1454
 
136
- async def close(self):
137
- """Cleanup"""
138
- pass
1455
+ url = f"{self.files_base_url}{endpoint}"
1456
+ request_method = getattr(self.session, method.lower(), None)
1457
+ if not request_method:
1458
+ return {"error": f"Unsupported HTTP method: {method}"}
139
1459
 
140
- def get_health_status(self) -> Dict[str, Any]:
141
- """Get backend health status"""
142
1460
  try:
143
- response = requests.get(
144
- f"{self.backend_url}/api/health/",
145
- timeout=5
1461
+ async with request_method(url, params=params, json=json_body, data=data, timeout=20) as response:
1462
+ payload: Any
1463
+ if response.content_type and "json" in response.content_type:
1464
+ payload = await response.json()
1465
+ else:
1466
+ payload = {"raw": await response.text()}
1467
+
1468
+ success = response.status == 200
1469
+ self._record_data_source(
1470
+ "Files",
1471
+ f"{method.upper()} {endpoint}",
1472
+ success,
1473
+ "" if success else f"HTTP {response.status}"
1474
+ )
1475
+
1476
+ if success:
1477
+ return payload if isinstance(payload, dict) else {"data": payload}
1478
+
1479
+ detail_msg = payload.get("detail") if isinstance(payload, dict) else None
1480
+ return {"error": detail_msg or f"Files API error: {response.status}"}
1481
+ except Exception as exc:
1482
+ self._record_data_source("Files", f"{method.upper()} {endpoint}", False, str(exc))
1483
+ return {"error": f"Files API call failed: {exc}"}
1484
+
1485
+ async def _call_archive_api(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
1486
+ """Call Archive API endpoint with retry mechanism"""
1487
+ max_retries = 3
1488
+ retry_delay = 1
1489
+
1490
+ ok, detail = await self._ensure_backend_ready()
1491
+ if not ok:
1492
+ self._record_data_source("Archive", f"POST {endpoint}", False, detail)
1493
+ return {"error": f"Archive backend unavailable: {detail or 'backend offline'}"}
1494
+
1495
+ for attempt in range(max_retries):
1496
+ try:
1497
+ if not self.session:
1498
+ return {"error": "HTTP session not initialized"}
1499
+
1500
+ url = f"{self.archive_base_url}/{endpoint}"
1501
+ headers = getattr(self, "_default_headers", None)
1502
+ if headers:
1503
+ headers = dict(headers)
1504
+
1505
+ async with self.session.post(url, json=data, headers=headers, timeout=30) as response:
1506
+ if response.status == 200:
1507
+ payload = await response.json()
1508
+ self._record_data_source("Archive", f"POST {endpoint}", True)
1509
+ return payload
1510
+ elif response.status == 429: # Rate limited
1511
+ if attempt < max_retries - 1:
1512
+ await asyncio.sleep(retry_delay * (2 ** attempt)) # Exponential backoff
1513
+ continue
1514
+ self._record_data_source("Archive", f"POST {endpoint}", False, "rate limited")
1515
+ return {"error": "Archive API rate limited. Please try again later."}
1516
+ elif response.status == 401:
1517
+ self._record_data_source("Archive", f"POST {endpoint}", False, "401 unauthorized")
1518
+ return {"error": "Archive API authentication failed. Please check API key."}
1519
+ else:
1520
+ self._record_data_source("Archive", f"POST {endpoint}", False, f"HTTP {response.status}")
1521
+ return {"error": f"Archive API error: {response.status}"}
1522
+
1523
+ except asyncio.TimeoutError:
1524
+ if attempt < max_retries - 1:
1525
+ await asyncio.sleep(retry_delay * (2 ** attempt))
1526
+ continue
1527
+ self._record_data_source("Archive", f"POST {endpoint}", False, "timeout")
1528
+ return {"error": "Archive API timeout. Please try again later."}
1529
+ except Exception as e:
1530
+ if attempt < max_retries - 1:
1531
+ await asyncio.sleep(retry_delay * (2 ** attempt))
1532
+ continue
1533
+ self._record_data_source("Archive", f"POST {endpoint}", False, str(e))
1534
+ return {"error": f"Archive API call failed: {e}"}
1535
+
1536
+ return {"error": "Archive API call failed after all retries"}
1537
+
1538
+ async def _call_finsight_api(self, endpoint: str, params: Dict[str, Any] = None) -> Dict[str, Any]:
1539
+ """Call FinSight API endpoint with retry mechanism"""
1540
+ max_retries = 3
1541
+ retry_delay = 1
1542
+
1543
+ ok, detail = await self._ensure_backend_ready()
1544
+ if not ok:
1545
+ self._record_data_source("FinSight", f"GET {endpoint}", False, detail)
1546
+ return {"error": f"FinSight backend unavailable: {detail or 'backend offline'}"}
1547
+
1548
+ for attempt in range(max_retries):
1549
+ try:
1550
+ if not self.session:
1551
+ return {"error": "HTTP session not initialized"}
1552
+
1553
+ url = f"{self.finsight_base_url}/{endpoint}"
1554
+ headers = getattr(self, "_default_headers", None)
1555
+ if headers:
1556
+ headers = dict(headers)
1557
+
1558
+ async with self.session.get(url, params=params, headers=headers, timeout=30) as response:
1559
+ if response.status == 200:
1560
+ payload = await response.json()
1561
+ self._record_data_source("FinSight", f"GET {endpoint}", True)
1562
+ return payload
1563
+ elif response.status == 429: # Rate limited
1564
+ if attempt < max_retries - 1:
1565
+ await asyncio.sleep(retry_delay * (2 ** attempt)) # Exponential backoff
1566
+ continue
1567
+ self._record_data_source("FinSight", f"GET {endpoint}", False, "rate limited")
1568
+ return {"error": "FinSight API rate limited. Please try again later."}
1569
+ elif response.status == 401:
1570
+ self._record_data_source("FinSight", f"GET {endpoint}", False, "401 unauthorized")
1571
+ return {"error": "FinSight API authentication failed. Please check API key."}
1572
+ else:
1573
+ self._record_data_source("FinSight", f"GET {endpoint}", False, f"HTTP {response.status}")
1574
+ return {"error": f"FinSight API error: {response.status}"}
1575
+
1576
+ except asyncio.TimeoutError:
1577
+ if attempt < max_retries - 1:
1578
+ await asyncio.sleep(retry_delay * (2 ** attempt))
1579
+ continue
1580
+ self._record_data_source("FinSight", f"GET {endpoint}", False, "timeout")
1581
+ return {"error": "FinSight API timeout. Please try again later."}
1582
+ except Exception as e:
1583
+ if attempt < max_retries - 1:
1584
+ await asyncio.sleep(retry_delay * (2 ** attempt))
1585
+ continue
1586
+ self._record_data_source("FinSight", f"GET {endpoint}", False, str(e))
1587
+ return {"error": f"FinSight API call failed: {e}"}
1588
+
1589
+ return {"error": "FinSight API call failed after all retries"}
1590
+
1591
+ async def _call_finsight_api_post(self, endpoint: str, data: Dict[str, Any] = None) -> Dict[str, Any]:
1592
+ """Call FinSight API endpoint with POST request"""
1593
+ ok, detail = await self._ensure_backend_ready()
1594
+ if not ok:
1595
+ self._record_data_source("FinSight", f"POST {endpoint}", False, detail)
1596
+ return {"error": f"FinSight backend unavailable: {detail or 'backend offline'}"}
1597
+
1598
+ try:
1599
+ if not self.session:
1600
+ return {"error": "HTTP session not initialized"}
1601
+
1602
+ url = f"{self.finsight_base_url}/{endpoint}"
1603
+ headers = getattr(self, "_default_headers", None)
1604
+ if headers:
1605
+ headers = dict(headers)
1606
+ async with self.session.post(url, json=data, headers=headers) as response:
1607
+ if response.status == 200:
1608
+ payload = await response.json()
1609
+ self._record_data_source("FinSight", f"POST {endpoint}", True)
1610
+ return payload
1611
+ self._record_data_source("FinSight", f"POST {endpoint}", False, f"HTTP {response.status}")
1612
+ return {"error": f"FinSight API error: {response.status}"}
1613
+
1614
+ except Exception as e:
1615
+ self._record_data_source("FinSight", f"POST {endpoint}", False, str(e))
1616
+ return {"error": f"FinSight API call failed: {e}"}
1617
+
1618
+ async def search_academic_papers(self, query: str, limit: int = 10) -> Dict[str, Any]:
1619
+ """Search academic papers using Archive API with resilient fallbacks."""
1620
+ source_sets: List[List[str]] = [
1621
+ ["semantic_scholar", "openalex"],
1622
+ ["semantic_scholar"],
1623
+ ["openalex"],
1624
+ ["pubmed"],
1625
+ ["offline"],
1626
+ ]
1627
+
1628
+ tried: List[List[str]] = []
1629
+ provider_errors: List[Dict[str, Any]] = []
1630
+ aggregated_payload: Dict[str, Any] = {"results": []}
1631
+
1632
+ for sources in source_sets:
1633
+ data = {"query": query, "limit": limit, "sources": sources}
1634
+ tried.append(list(sources))
1635
+ result = await self._call_archive_api("search", data)
1636
+
1637
+ if "error" in result:
1638
+ provider_errors.append({"sources": sources, "error": result["error"]})
1639
+ continue
1640
+
1641
+ results = result.get("results") or result.get("papers") or []
1642
+ if results:
1643
+ aggregated_payload = dict(result)
1644
+ aggregated_payload["results"] = results
1645
+ break
1646
+
1647
+ aggregated_payload.setdefault("results", [])
1648
+ aggregated_payload["sources_tried"] = [",".join(s) for s in tried]
1649
+
1650
+ if provider_errors:
1651
+ aggregated_payload["provider_errors"] = provider_errors
1652
+
1653
+ if not aggregated_payload["results"]:
1654
+ aggregated_payload["notes"] = (
1655
+ "No papers were returned by the research providers. This often occurs during "
1656
+ "temporary rate limits; please retry in a minute or adjust the query scope."
146
1657
  )
147
- return response.json()
148
- except:
149
- return {"status": "unavailable"}
150
1658
 
151
- def check_quota(self) -> Dict[str, Any]:
152
- """Check remaining daily quota"""
153
- if not self.auth_token:
154
- raise RuntimeError("Not authenticated")
1659
+ return aggregated_payload
1660
+
1661
+ async def synthesize_research(self, paper_ids: List[str], max_words: int = 500) -> Dict[str, Any]:
1662
+ """Synthesize research papers using Archive API"""
1663
+ data = {
1664
+ "paper_ids": paper_ids,
1665
+ "max_words": max_words,
1666
+ "focus": "key_findings",
1667
+ "style": "academic"
1668
+ }
1669
+ return await self._call_archive_api("synthesize", data)
1670
+
1671
+ async def get_financial_data(self, ticker: str, metric: str, limit: int = 12) -> Dict[str, Any]:
1672
+ """Get financial data using FinSight API"""
1673
+ params = {
1674
+ "freq": "Q",
1675
+ "limit": limit
1676
+ }
1677
+ return await self._call_finsight_api(f"kpis/{ticker}/{metric}", params)
1678
+
1679
+ async def get_financial_metrics(self, ticker: str, metrics: List[str] = None) -> Dict[str, Any]:
1680
+ """Get financial metrics using FinSight KPI endpoints (with schema drift fixes)"""
1681
+ if metrics is None:
1682
+ metrics = ["revenue", "grossProfit", "operatingIncome", "netIncome"]
1683
+
1684
+ if not metrics:
1685
+ return {}
1686
+
1687
+ async def _fetch_metric(metric_name: str) -> Dict[str, Any]:
1688
+ params = {"period": "latest", "freq": "Q"}
1689
+ try:
1690
+ result = await self._call_finsight_api(f"calc/{ticker}/{metric_name}", params)
1691
+ except Exception as exc:
1692
+ return {metric_name: {"error": str(exc)}}
1693
+
1694
+ if "error" in result:
1695
+ return {metric_name: {"error": result["error"]}}
1696
+ return {metric_name: result}
1697
+
1698
+ tasks = [asyncio.create_task(_fetch_metric(metric)) for metric in metrics]
1699
+ results: Dict[str, Any] = {}
1700
+
1701
+ for payload in await asyncio.gather(*tasks):
1702
+ results.update(payload)
1703
+
1704
+ return results
1705
+
1706
+ def execute_command(self, command: str) -> str:
1707
+ """Execute command in persistent shell session and return output"""
1708
+ try:
1709
+ if self.shell_session is None:
1710
+ return "ERROR: Shell session not initialized"
1711
+
1712
+ # Send command to persistent shell
1713
+ self.shell_session.stdin.write(command + '\n')
1714
+ self.shell_session.stdin.flush()
1715
+
1716
+ # Read output with timeout
1717
+ try:
1718
+ import select
1719
+ use_select = True
1720
+ except ImportError:
1721
+ # Windows doesn't have select module
1722
+ use_select = False
1723
+
1724
+ output_lines = []
1725
+ start_time = time.time()
1726
+ timeout = 10 # seconds
1727
+
1728
+ if use_select:
1729
+ while time.time() - start_time < timeout:
1730
+ if select.select([self.shell_session.stdout], [], [], 0.1)[0]:
1731
+ line = self.shell_session.stdout.readline()
1732
+ if line:
1733
+ output_lines.append(line.rstrip())
1734
+ else:
1735
+ break
1736
+ else:
1737
+ # No more output available
1738
+ break
1739
+ else:
1740
+ # Fallback for Windows - simpler approach
1741
+ import threading
1742
+
1743
+ def read_output():
1744
+ try:
1745
+ while True:
1746
+ line = self.shell_session.stdout.readline()
1747
+ if line:
1748
+ output_lines.append(line.rstrip())
1749
+ else:
1750
+ break
1751
+ except:
1752
+ pass
1753
+
1754
+ reader_thread = threading.Thread(target=read_output, daemon=True)
1755
+ reader_thread.start()
1756
+ reader_thread.join(timeout=timeout)
1757
+
1758
+ output = '\n'.join(output_lines)
1759
+ return output if output else "Command executed successfully"
1760
+
1761
+ except Exception as e:
1762
+ return f"ERROR: {e}"
1763
+
1764
+ def _is_safe_shell_command(self, cmd: str) -> bool:
1765
+ """
1766
+ Minimal safety check - only block truly catastrophic commands.
1767
+ Philosophy: This is the user's machine. They can do anything in terminal anyway.
1768
+ We only block commands that could cause immediate, irreversible system damage.
1769
+ """
1770
+ cmd = cmd.strip()
1771
+ if not cmd:
1772
+ return False
1773
+
1774
+ # Block ONLY truly catastrophic commands
1775
+ nuclear_patterns = [
1776
+ 'rm -rf /', # Wipe root filesystem
1777
+ 'rm -rf ~/*', # Wipe home directory
1778
+ 'dd if=/dev/zero of=/dev/sda', # Wipe disk
1779
+ 'dd if=/dev/zero of=/dev/hda',
1780
+ 'mkfs', # Format filesystem
1781
+ 'fdisk', # Partition disk
1782
+ ':(){ :|:& };:', # Fork bomb
1783
+ 'chmod -R 777 /', # Make everything executable
1784
+ ]
1785
+
1786
+ cmd_lower = cmd.lower()
1787
+ for pattern in nuclear_patterns:
1788
+ if pattern.lower() in cmd_lower:
1789
+ return False
1790
+
1791
+ # Allow everything else - pip, npm, git, pipes, redirection, etc.
1792
+ # User asked for it, user gets it. Just like Cursor.
1793
+ return True
1794
+
1795
+ def _check_token_budget(self, estimated_tokens: int) -> bool:
1796
+ """Check if we have enough token budget"""
1797
+ self._ensure_usage_day()
1798
+ return (self.daily_token_usage + estimated_tokens) < self.daily_limit
1799
+
1800
+ def _check_user_token_budget(self, user_id: str, estimated_tokens: int) -> bool:
1801
+ self._ensure_usage_day()
1802
+ current = self.user_token_usage.get(user_id, 0)
1803
+ return (current + estimated_tokens) < self.per_user_token_limit
1804
+
1805
+ def _resolve_daily_query_limit(self) -> int:
1806
+ limit_env = os.getenv("NOCTURNAL_QUERY_LIMIT")
1807
+ if limit_env and limit_env != str(DEFAULT_QUERY_LIMIT):
1808
+ logger.warning("Ignoring attempted query-limit override (%s); enforcing default %s", limit_env, DEFAULT_QUERY_LIMIT)
1809
+ os.environ["NOCTURNAL_QUERY_LIMIT"] = str(DEFAULT_QUERY_LIMIT)
1810
+ os.environ.pop("NOCTURNAL_QUERY_LIMIT_SIG", None)
1811
+ return DEFAULT_QUERY_LIMIT
1812
+
1813
+ def _check_query_budget(self, user_id: Optional[str]) -> bool:
1814
+ self._ensure_usage_day()
1815
+ if self.daily_query_limit > 0 and self.daily_query_count >= self.daily_query_limit:
1816
+ return False
1817
+
1818
+ effective_limit = self.per_user_query_limit if self.per_user_query_limit > 0 else self.daily_query_limit
1819
+ if user_id and effective_limit > 0 and self.user_query_counts.get(user_id, 0) >= effective_limit:
1820
+ return False
1821
+
1822
+ return True
1823
+
1824
+ def _record_query_usage(self, user_id: Optional[str]):
1825
+ self._ensure_usage_day()
1826
+ self.daily_query_count += 1
1827
+ if user_id:
1828
+ self.user_query_counts[user_id] = self.user_query_counts.get(user_id, 0) + 1
1829
+
1830
+ def _ensure_usage_day(self):
1831
+ current_day = datetime.now(timezone.utc).strftime("%Y-%m-%d")
1832
+ if current_day != self._usage_day:
1833
+ self._usage_day = current_day
1834
+ self.daily_token_usage = 0
1835
+ self.user_token_usage = {}
1836
+ self.daily_query_count = 0
1837
+ self.user_query_counts = {}
1838
+
1839
+ def _charge_tokens(self, user_id: Optional[str], tokens: int):
1840
+ """Charge tokens to daily and per-user usage"""
1841
+ self._ensure_usage_day()
1842
+ self.daily_token_usage += tokens
1843
+ if user_id:
1844
+ self.user_token_usage[user_id] = self.user_token_usage.get(user_id, 0) + tokens
1845
+
1846
+ def _get_memory_context(self, user_id: str, conversation_id: str) -> str:
1847
+ """Get relevant memory context for the conversation"""
1848
+ if user_id not in self.memory:
1849
+ self.memory[user_id] = {}
1850
+
1851
+ if conversation_id not in self.memory[user_id]:
1852
+ self.memory[user_id][conversation_id] = []
1853
+
1854
+ # Get last 3 interactions for context
1855
+ recent_memory = self.memory[user_id][conversation_id][-3:]
1856
+ if not recent_memory:
1857
+ return ""
1858
+
1859
+ context = "Recent conversation context:\n"
1860
+ for mem in recent_memory:
1861
+ context += f"- {mem}\n"
1862
+ return context
1863
+
1864
+ def _update_memory(self, user_id: str, conversation_id: str, interaction: str):
1865
+ """Update memory with new interaction"""
1866
+ if user_id not in self.memory:
1867
+ self.memory[user_id] = {}
1868
+
1869
+ if conversation_id not in self.memory[user_id]:
1870
+ self.memory[user_id][conversation_id] = []
1871
+
1872
+ self.memory[user_id][conversation_id].append(interaction)
1873
+
1874
+ # Keep only last 10 interactions
1875
+ if len(self.memory[user_id][conversation_id]) > 10:
1876
+ self.memory[user_id][conversation_id] = self.memory[user_id][conversation_id][-10:]
1877
+
1878
+ @staticmethod
1879
+ def _hash_identifier(value: Optional[str]) -> Optional[str]:
1880
+ if not value:
1881
+ return None
1882
+ digest = hashlib.sha256(value.encode("utf-8")).hexdigest()
1883
+ return digest[:16]
1884
+
1885
+ def _emit_telemetry(
1886
+ self,
1887
+ event: str,
1888
+ request: Optional[ChatRequest] = None,
1889
+ *,
1890
+ success: Optional[bool] = None,
1891
+ extra: Optional[Dict[str, Any]] = None,
1892
+ ) -> None:
1893
+ manager = TelemetryManager.get()
1894
+ if not manager:
1895
+ return
1896
+
1897
+ payload: Dict[str, Any] = {}
1898
+ if request:
1899
+ payload["user"] = self._hash_identifier(request.user_id)
1900
+ payload["conversation"] = self._hash_identifier(request.conversation_id)
1901
+ if success is not None:
1902
+ payload["success"] = bool(success)
1903
+ if extra:
1904
+ for key, value in extra.items():
1905
+ if value is None:
1906
+ continue
1907
+ payload[key] = value
155
1908
 
156
- response = requests.get(
157
- f"{self.backend_url}/api/auth/me",
158
- headers={"Authorization": f"Bearer {self.auth_token}"},
159
- timeout=10
1909
+ manager.record(event, payload)
1910
+
1911
+ @staticmethod
1912
+ def _format_model_error(details: str) -> str:
1913
+ headline = "⚠️ I couldn't finish the reasoning step because the language model call failed."
1914
+ advice = "Please retry shortly or verify your Groq API keys and network connectivity."
1915
+ if details:
1916
+ return f"{headline}\n\nDetails: {details}\n\n{advice}"
1917
+ return f"{headline}\n\n{advice}"
1918
+
1919
+ def _summarize_command_output(
1920
+ self,
1921
+ request: ChatRequest,
1922
+ command: str,
1923
+ truncated_output: str,
1924
+ base_response: str
1925
+ ) -> Tuple[str, int]:
1926
+ """Attach a deterministic shell output block to the agent response."""
1927
+
1928
+ rendered_output = truncated_output.rstrip()
1929
+ if not rendered_output:
1930
+ rendered_output = "(no output)"
1931
+
1932
+ formatted = (
1933
+ f"{base_response.strip()}\n\n"
1934
+ "```shell\n"
1935
+ f"$ {command}\n"
1936
+ f"{rendered_output}\n"
1937
+ "```"
160
1938
  )
161
1939
 
162
- if response.status_code == 401:
163
- raise RuntimeError("Authentication expired")
1940
+ return formatted, 0
1941
+
1942
+ async def _analyze_request_type(self, question: str) -> Dict[str, Any]:
1943
+ """Analyze what type of request this is and what APIs to use"""
1944
+
1945
+ # Financial indicators
1946
+ financial_keywords = [
1947
+ 'financial', 'revenue', 'profit', 'earnings', 'stock', 'market',
1948
+ 'ticker', 'company', 'balance sheet', 'income statement', 'cash flow',
1949
+ 'valuation', 'pe ratio', 'debt', 'equity', 'dividend', 'growth',
1950
+ 'ceo', 'earnings call', 'quarterly', 'annual report'
1951
+ ]
1952
+
1953
+ # Research indicators (quantitative)
1954
+ research_keywords = [
1955
+ 'research', 'paper', 'study', 'academic', 'literature', 'journal',
1956
+ 'synthesis', 'findings', 'methodology', 'abstract', 'citation',
1957
+ 'author', 'publication', 'peer review', 'scientific'
1958
+ ]
1959
+
1960
+ # Qualitative indicators (NEW)
1961
+ qualitative_keywords = [
1962
+ 'theme', 'themes', 'thematic', 'code', 'coding', 'qualitative',
1963
+ 'interview', 'interviews', 'transcript', 'case study', 'narrative',
1964
+ 'discourse', 'content analysis', 'quote', 'quotes', 'excerpt',
1965
+ 'participant', 'respondent', 'informant', 'ethnography',
1966
+ 'grounded theory', 'phenomenology', 'what do people say',
1967
+ 'how do participants', 'sentiment', 'perception', 'experience',
1968
+ 'lived experience', 'meaning', 'interpret', 'understand',
1969
+ 'focus group', 'observation', 'field notes', 'memoir', 'diary'
1970
+ ]
1971
+
1972
+ # Quantitative indicators (explicit stats/math)
1973
+ quantitative_keywords = [
1974
+ 'calculate', 'average', 'mean', 'median', 'percentage', 'correlation',
1975
+ 'regression', 'statistical', 'significance', 'p-value', 'variance',
1976
+ 'standard deviation', 'trend', 'forecast', 'model', 'predict',
1977
+ 'rate of', 'ratio', 'growth rate', 'change in', 'compared to'
1978
+ ]
1979
+
1980
+ # System/technical indicators
1981
+ system_keywords = [
1982
+ 'file', 'directory', 'command', 'run', 'execute', 'install',
1983
+ 'python', 'code', 'script', 'program', 'system', 'terminal'
1984
+ ]
1985
+
1986
+ question_lower = question.lower()
1987
+
1988
+ matched_types: List[str] = []
1989
+ apis_to_use: List[str] = []
1990
+ analysis_mode = "quantitative" # default
1991
+
1992
+ # Context-aware keyword detection
1993
+ # Strong quant contexts that override everything
1994
+ strong_quant_contexts = [
1995
+ 'algorithm', 'park', 'system', 'database',
1996
+ 'calculate', 'predict', 'forecast', 'ratio', 'percentage'
1997
+ ]
1998
+
1999
+ # Measurement words (can indicate mixed when combined with qual words)
2000
+ measurement_words = ['score', 'metric', 'rating', 'measure', 'index']
2001
+
2002
+ has_strong_quant_context = any(ctx in question_lower for ctx in strong_quant_contexts)
2003
+ has_measurement = any(mw in question_lower for mw in measurement_words)
2004
+
2005
+ # Special cases: Certain qual words + measurement = mixed (subjective + quantified)
2006
+ # BUT: Only if NOT in a strong quant context (algorithm overrides)
2007
+ mixed_indicators = [
2008
+ 'experience', # user experience
2009
+ 'sentiment', # sentiment analysis
2010
+ 'perception', # perception
2011
+ ]
2012
+
2013
+ is_mixed_method = False
2014
+ if not has_strong_quant_context and has_measurement:
2015
+ if any(indicator in question_lower for indicator in mixed_indicators):
2016
+ is_mixed_method = True
2017
+
2018
+ # Check for qualitative vs quantitative keywords
2019
+ qual_score = sum(1 for kw in qualitative_keywords if kw in question_lower)
2020
+ quant_score = sum(1 for kw in quantitative_keywords if kw in question_lower)
2021
+
2022
+ # Financial queries are quantitative by nature (unless explicitly qualitative like "interview")
2023
+ has_financial = any(kw in question_lower for kw in financial_keywords)
2024
+ if has_financial and qual_score == 1:
2025
+ # Single qual keyword + financial = probably mixed
2026
+ # e.g., "Interview CEO about earnings" = interview (qual) + earnings/CEO (financial)
2027
+ quant_score += 1
2028
+
2029
+ # Adjust for context
2030
+ if has_strong_quant_context:
2031
+ # Reduce qualitative score if in strong quantitative context
2032
+ # e.g., "theme park" or "sentiment analysis algorithm"
2033
+ qual_score = max(0, qual_score - 1)
2034
+
2035
+ # Improved mixed detection: use ratio instead of simple comparison
2036
+ if is_mixed_method:
2037
+ # Special case: qual word + measurement = always mixed
2038
+ analysis_mode = "mixed"
2039
+ elif qual_score >= 2 and quant_score >= 1:
2040
+ # Clear mixed: multiple qual + some quant
2041
+ analysis_mode = "mixed"
2042
+ elif qual_score > quant_score and qual_score > 0:
2043
+ # Predominantly qualitative
2044
+ analysis_mode = "qualitative"
2045
+ elif qual_score > 0 and quant_score > 0:
2046
+ # Some of both - default to mixed
2047
+ analysis_mode = "mixed"
2048
+
2049
+ if any(keyword in question_lower for keyword in financial_keywords):
2050
+ matched_types.append("financial")
2051
+ apis_to_use.append("finsight")
164
2052
 
165
- response.raise_for_status()
166
- data = response.json()
2053
+ if any(keyword in question_lower for keyword in research_keywords):
2054
+ matched_types.append("research")
2055
+ apis_to_use.append("archive")
2056
+
2057
+ # Qualitative queries often involve research
2058
+ if analysis_mode in ("qualitative", "mixed") and "research" not in matched_types:
2059
+ matched_types.append("research")
2060
+ if "archive" not in apis_to_use:
2061
+ apis_to_use.append("archive")
2062
+
2063
+ if any(keyword in question_lower for keyword in system_keywords):
2064
+ matched_types.append("system")
2065
+ apis_to_use.append("shell")
2066
+
2067
+ # Deduplicate while preserving order
2068
+ apis_to_use = list(dict.fromkeys(apis_to_use))
2069
+ unique_types = list(dict.fromkeys(matched_types))
2070
+
2071
+ if not unique_types:
2072
+ request_type = "general"
2073
+ elif len(unique_types) == 1:
2074
+ request_type = unique_types[0]
2075
+ elif {"financial", "research"}.issubset(set(unique_types)):
2076
+ request_type = "comprehensive"
2077
+ if "system" in unique_types:
2078
+ request_type += "+system"
2079
+ else:
2080
+ request_type = "+".join(unique_types)
2081
+
2082
+ confidence = 0.8 if apis_to_use else 0.5
2083
+ if len(unique_types) > 1:
2084
+ confidence = 0.85
167
2085
 
168
2086
  return {
169
- "tokens_used": data.get("tokens_used_today", 0),
170
- "tokens_remaining": data.get("tokens_remaining", 0),
171
- "daily_limit": 25000,
2087
+ "type": request_type,
2088
+ "apis": apis_to_use,
2089
+ "confidence": confidence,
2090
+ "analysis_mode": analysis_mode # NEW: qualitative, quantitative, or mixed
172
2091
  }
2092
+
2093
+ async def process_request(self, request: ChatRequest) -> ChatResponse:
2094
+ """Process request with full AI capabilities and API integration"""
2095
+ try:
2096
+ # PRODUCTION MODE: Route all LLM queries through backend
2097
+ # This ensures monetization - no local API key bypass
2098
+ if self.client is None:
2099
+ return await self.call_backend_query(
2100
+ query=request.question,
2101
+ conversation_history=self.conversation_history[-10:] # Last 10 messages for context
2102
+ )
2103
+
2104
+ # DEV MODE ONLY: Direct Groq calls (only works with local API keys)
2105
+ # This code path won't execute in production since self.client = None
2106
+
2107
+ if not self._check_query_budget(request.user_id):
2108
+ effective_limit = self.daily_query_limit if self.daily_query_limit > 0 else self.per_user_query_limit
2109
+ if effective_limit <= 0:
2110
+ effective_limit = 25
2111
+ message = (
2112
+ "Daily query limit reached. You've hit the "
2113
+ f"{effective_limit} request cap for today. "
2114
+ "Try again tomorrow or reach out if you need the limit raised."
2115
+ )
2116
+ return self._quick_reply(
2117
+ request,
2118
+ message,
2119
+ tools_used=["rate_limit"],
2120
+ confidence=0.35,
2121
+ )
2122
+
2123
+ self._record_query_usage(request.user_id)
2124
+
2125
+ # Analyze request type
2126
+ request_analysis = await self._analyze_request_type(request.question)
2127
+ question_lower = request.question.lower()
2128
+
2129
+ self._reset_data_sources()
2130
+
2131
+ direct_shell = re.match(r"^(?:run|execute)\s*:?\s*(.+)$", request.question.strip(), re.IGNORECASE)
2132
+ if direct_shell:
2133
+ return self._respond_with_shell_command(request, direct_shell.group(1).strip())
2134
+
2135
+ # Get memory context
2136
+ memory_context = self._get_memory_context(request.user_id, request.conversation_id)
2137
+
2138
+ # Ultra-light handling for small talk to save tokens entirely
2139
+ if self._is_simple_greeting(request.question):
2140
+ return self._quick_reply(
2141
+ request,
2142
+ "Hi there! I'm up and ready whenever you want to dig into finance or research.",
2143
+ tools_used=["quick_reply"],
2144
+ confidence=0.5
2145
+ )
2146
+
2147
+ if self._is_casual_acknowledgment(request.question):
2148
+ return self._quick_reply(
2149
+ request,
2150
+ "Happy to help! Feel free to fire off another question whenever you're ready.",
2151
+ tools_used=["quick_reply"],
2152
+ confidence=0.55
2153
+ )
2154
+
2155
+ # Call appropriate APIs based on request type
2156
+ api_results = {}
2157
+ tools_used = []
2158
+
2159
+ # Auto file-reading: detect filenames in the prompt and attach previews
2160
+ def _extract_filenames(text: str) -> List[str]:
2161
+ # Match common file patterns (no spaces) and simple quoted paths
2162
+ patterns = [
2163
+ r"[\w\-./]+\.(?:py|md|txt|json|csv|yml|yaml|toml|ini|ts|tsx|js|ipynb)",
2164
+ r"(?:\./|/)?[\w\-./]+/" # directories
2165
+ ]
2166
+ matches: List[str] = []
2167
+ for pat in patterns:
2168
+ matches.extend(re.findall(pat, text))
2169
+ # Deduplicate and keep reasonable length
2170
+ uniq = []
2171
+ for m in matches:
2172
+ if len(m) <= 256 and m not in uniq:
2173
+ uniq.append(m)
2174
+ return uniq[:5]
2175
+
2176
+ mentioned = _extract_filenames(request.question)
2177
+ file_previews: List[Dict[str, Any]] = []
2178
+ files_forbidden: List[str] = []
2179
+ base_dir = Path.cwd().resolve()
2180
+ sensitive_roots = {Path('/etc'), Path('/proc'), Path('/sys'), Path('/dev'), Path('/root'), Path('/usr'), Path('/bin'), Path('/sbin'), Path('/var')}
2181
+ def _is_safe_path(path_str: str) -> bool:
2182
+ try:
2183
+ rp = Path(path_str).resolve()
2184
+ if any(str(rp).startswith(str(sr)) for sr in sensitive_roots):
2185
+ return False
2186
+ return str(rp).startswith(str(base_dir))
2187
+ except Exception:
2188
+ return False
2189
+ for m in mentioned:
2190
+ if not _is_safe_path(m):
2191
+ files_forbidden.append(m)
2192
+ continue
2193
+ pr = await self._preview_file(m)
2194
+ if pr:
2195
+ file_previews.append(pr)
2196
+ if file_previews:
2197
+ api_results["files"] = file_previews
2198
+ # Build grounded context from first text preview
2199
+ text_previews = [fp for fp in file_previews if fp.get("type") == "text" and fp.get("preview")]
2200
+ files_context = ""
2201
+ if text_previews:
2202
+ fp = text_previews[0]
2203
+ quoted = "\n".join(fp["preview"].splitlines()[:20])
2204
+ files_context = f"File: {fp['path']} (first lines)\n" + quoted
2205
+ api_results["files_context"] = files_context
2206
+ elif mentioned:
2207
+ # Mentioned files but none found
2208
+ api_results["files_missing"] = mentioned
2209
+ if files_forbidden:
2210
+ api_results["files_forbidden"] = files_forbidden
2211
+
2212
+ workspace_listing: Optional[Dict[str, Any]] = None
2213
+ if not file_previews:
2214
+ file_browse_keywords = (
2215
+ "list files",
2216
+ "show files",
2217
+ "show me files",
2218
+ "file browser",
2219
+ "file upload",
2220
+ "upload file",
2221
+ "files?",
2222
+ "browse files",
2223
+ "what files",
2224
+ "available files"
2225
+ )
2226
+ describe_files = (
2227
+ "file" in question_lower or "directory" in question_lower
2228
+ ) and any(verb in question_lower for verb in ("show", "list", "what", "which", "display"))
2229
+ if any(keyword in question_lower for keyword in file_browse_keywords) or describe_files:
2230
+ workspace_listing = await self._get_workspace_listing()
2231
+ api_results["workspace_listing"] = workspace_listing
2232
+
2233
+ if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"}:
2234
+ return self._respond_with_workspace_listing(request, workspace_listing)
2235
+
2236
+ if "finsight" in request_analysis["apis"]:
2237
+ # Extract tickers from symbols or company names
2238
+ tickers = self._extract_tickers_from_text(request.question)
2239
+ financial_payload = {}
2240
+ session_key = f"{request.user_id}:{request.conversation_id}"
2241
+ last_topic = self._session_topics.get(session_key)
2242
+ if not tickers:
2243
+ # Heuristic defaults for common requests
2244
+ if "apple" in request.question.lower():
2245
+ tickers = ["AAPL"]
2246
+ if "microsoft" in request.question.lower():
2247
+ tickers = tickers + ["MSFT"] if "AAPL" in tickers else ["MSFT"]
2248
+
2249
+ # Determine which metrics to fetch based on query keywords
2250
+ metrics_to_fetch = []
2251
+ if any(kw in question_lower for kw in ["revenue", "sales", "top line"]):
2252
+ metrics_to_fetch.append("revenue")
2253
+ if any(kw in question_lower for kw in ["gross profit", "gross margin", "margin"]):
2254
+ metrics_to_fetch.append("grossProfit")
2255
+ if any(kw in question_lower for kw in ["operating income", "operating profit", "ebit"]):
2256
+ metrics_to_fetch.append("operatingIncome")
2257
+ if any(kw in question_lower for kw in ["net income", "profit", "earnings", "bottom line"]):
2258
+ metrics_to_fetch.append("netIncome")
2259
+
2260
+ # Default to key metrics if no specific request
2261
+ if not metrics_to_fetch and last_topic and last_topic.get("metrics"):
2262
+ metrics_to_fetch = list(last_topic["metrics"])
2263
+
2264
+ if not metrics_to_fetch:
2265
+ metrics_to_fetch = ["revenue", "grossProfit"]
2266
+
2267
+ # Fetch metrics for each ticker (cap 2 tickers)
2268
+ for t in tickers[:2]:
2269
+ result = await self.get_financial_metrics(t, metrics_to_fetch)
2270
+ financial_payload[t] = result
2271
+
2272
+ if financial_payload:
2273
+ self._session_topics[session_key] = {
2274
+ "tickers": tickers[:2],
2275
+ "metrics": metrics_to_fetch,
2276
+ }
2277
+ direct_finance = (
2278
+ len(financial_payload) == 1
2279
+ and set(request_analysis.get("apis", [])) == {"finsight"}
2280
+ and not api_results.get("research")
2281
+ and not file_previews
2282
+ and not workspace_listing
2283
+ )
2284
+ if direct_finance:
2285
+ return self._respond_with_financial_metrics(request, financial_payload)
2286
+ api_results["financial"] = financial_payload
2287
+ tools_used.append("finsight_api")
2288
+
2289
+ if "archive" in request_analysis["apis"]:
2290
+ # Extract research query
2291
+ result = await self.search_academic_papers(request.question, 5)
2292
+ if "error" not in result:
2293
+ api_results["research"] = result
2294
+ else:
2295
+ api_results["research"] = {"error": result["error"]}
2296
+ tools_used.append("archive_api")
2297
+
2298
+ # Build enhanced system prompt with trimmed sections based on detected needs
2299
+ system_prompt = self._build_system_prompt(request_analysis, memory_context, api_results)
2300
+
2301
+ # Build messages
2302
+ messages = [
2303
+ {"role": "system", "content": system_prompt}
2304
+ ]
2305
+ # If we have file context, inject it as an additional grounding message
2306
+ fc = api_results.get("files_context")
2307
+ if fc:
2308
+ messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}\n\nAnswer based strictly on this content when relevant. Do not run shell commands."})
2309
+ missing = api_results.get("files_missing")
2310
+ if missing:
2311
+ messages.append({"role": "system", "content": f"User mentioned file(s) not found: {missing}. Respond explicitly that the file was not found and avoid speculation."})
2312
+ forbidden = api_results.get("files_forbidden")
2313
+ if forbidden:
2314
+ messages.append({"role": "system", "content": f"User mentioned file(s) outside the allowed workspace or sensitive paths: {forbidden}. Refuse to access and explain the restriction succinctly."})
2315
+
2316
+ # Add conversation history with smart context management
2317
+ if len(self.conversation_history) > 12:
2318
+ # For long conversations, summarize early context and keep recent history
2319
+ early_history = self.conversation_history[:-6]
2320
+ recent_history = self.conversation_history[-6:]
2321
+
2322
+ # Create a summary of early conversation
2323
+ summary_prompt = "Summarize the key points from this conversation history in 2-3 sentences:"
2324
+ summary_messages = [
2325
+ {"role": "system", "content": summary_prompt},
2326
+ {"role": "user", "content": str(early_history)}
2327
+ ]
2328
+
2329
+ try:
2330
+ if self._ensure_client_ready():
2331
+ summary_response = self.client.chat.completions.create(
2332
+ model="llama-3.1-8b-instant",
2333
+ messages=summary_messages,
2334
+ max_tokens=160,
2335
+ temperature=0.2
2336
+ )
2337
+ conversation_summary = summary_response.choices[0].message.content
2338
+ if summary_response.usage and summary_response.usage.total_tokens:
2339
+ summary_tokens = summary_response.usage.total_tokens
2340
+ self._charge_tokens(request.user_id, summary_tokens)
2341
+ self.total_cost += (summary_tokens / 1000) * self.cost_per_1k_tokens
2342
+ messages.append({"role": "system", "content": f"Previous conversation summary: {conversation_summary}"})
2343
+ except:
2344
+ # If summary fails, just use recent history
2345
+ pass
2346
+
2347
+ messages.extend(recent_history)
2348
+ else:
2349
+ # For shorter conversations, use full history
2350
+ messages.extend(self.conversation_history)
2351
+
2352
+ # Add current user message
2353
+ messages.append({"role": "user", "content": request.question})
2354
+
2355
+ model_config = self._select_model(request, request_analysis, api_results)
2356
+ target_model = model_config["model"]
2357
+ max_completion_tokens = model_config["max_tokens"]
2358
+ temperature = model_config["temperature"]
2359
+
2360
+ # Check token budget
2361
+ estimated_tokens = (len(str(messages)) // 4) + max_completion_tokens # Rough estimate incl. completion budget
2362
+ if not self._check_token_budget(estimated_tokens):
2363
+ return self._respond_with_fallback(
2364
+ request,
2365
+ tools_used,
2366
+ api_results,
2367
+ failure_reason="Daily Groq token budget exhausted",
2368
+ error_message="Daily token limit reached"
2369
+ )
2370
+
2371
+ if not self._check_user_token_budget(request.user_id, estimated_tokens):
2372
+ return self._respond_with_fallback(
2373
+ request,
2374
+ tools_used,
2375
+ api_results,
2376
+ failure_reason="Per-user Groq token budget exhausted",
2377
+ error_message="Per-user token limit reached"
2378
+ )
2379
+
2380
+ if not self._ensure_client_ready():
2381
+ return self._respond_with_fallback(
2382
+ request,
2383
+ tools_used,
2384
+ api_results,
2385
+ failure_reason="No available Groq API key"
2386
+ )
2387
+
2388
+ response_text: Optional[str] = None
2389
+ tokens_used = 0
2390
+ attempts_remaining = len(self.api_keys) if self.api_keys else (1 if self.client else 0)
2391
+ last_error: Optional[Exception] = None
2392
+
2393
+ while attempts_remaining > 0:
2394
+ attempts_remaining -= 1
2395
+ try:
2396
+ response = self.client.chat.completions.create(
2397
+ model=target_model,
2398
+ messages=messages,
2399
+ max_tokens=max_completion_tokens,
2400
+ temperature=temperature
2401
+ )
2402
+
2403
+ response_text = response.choices[0].message.content
2404
+ tokens_used = response.usage.total_tokens if response.usage else estimated_tokens
2405
+ self._charge_tokens(request.user_id, tokens_used)
2406
+ cost = (tokens_used / 1000) * self.cost_per_1k_tokens
2407
+ self.total_cost += cost
2408
+ break
2409
+ except Exception as e:
2410
+ last_error = e
2411
+ if self._is_rate_limit_error(e):
2412
+ self._mark_current_key_exhausted(str(e))
2413
+ if not self._rotate_to_next_available_key():
2414
+ break
2415
+ continue
2416
+ else:
2417
+ error_str = str(e)
2418
+ friendly = self._format_model_error(error_str)
2419
+ return ChatResponse(
2420
+ response=friendly,
2421
+ timestamp=datetime.now().isoformat(),
2422
+ tools_used=tools_used,
2423
+ api_results=api_results,
2424
+ error_message=error_str
2425
+ )
2426
+
2427
+ if response_text is None:
2428
+ rate_limit_error = last_error if last_error and self._is_rate_limit_error(last_error) else None
2429
+ if rate_limit_error:
2430
+ return self._respond_with_fallback(
2431
+ request,
2432
+ tools_used,
2433
+ api_results,
2434
+ failure_reason="All Groq API keys exhausted",
2435
+ error_message=str(rate_limit_error)
2436
+ )
2437
+ error_str = str(last_error) if last_error else "Unknown error"
2438
+ friendly = self._format_model_error(error_str)
2439
+ return ChatResponse(
2440
+ response=friendly,
2441
+ timestamp=datetime.now().isoformat(),
2442
+ tools_used=tools_used,
2443
+ api_results=api_results,
2444
+ error_message=error_str
2445
+ )
2446
+
2447
+ self._schedule_next_key_rotation()
2448
+
2449
+ allow_shell_commands = "shell" in request_analysis.get("apis", []) or request_analysis.get("type") in {"system", "comprehensive+system"}
2450
+ if api_results.get("files_context") or api_results.get("files_missing") or api_results.get("files_forbidden"):
2451
+ allow_shell_commands = False
2452
+
2453
+ commands = re.findall(r'`([^`]+)`', response_text) if allow_shell_commands else []
2454
+ execution_results = {}
2455
+ final_response = response_text
2456
+
2457
+ if commands:
2458
+ command = commands[0].strip()
2459
+ if self._is_safe_shell_command(command):
2460
+ print(f"\n🔧 Executing: {command}")
2461
+ output = self.execute_command(command)
2462
+ print(f"✅ Command completed")
2463
+ execution_results = {
2464
+ "command": command,
2465
+ "output": output,
2466
+ "success": not output.startswith("ERROR:")
2467
+ }
2468
+ tools_used.append("shell_execution")
2469
+ else:
2470
+ execution_results = {
2471
+ "command": command,
2472
+ "output": "Command blocked by safety policy",
2473
+ "success": False
2474
+ }
2475
+ if "⚠️ Shell command skipped for safety." not in final_response:
2476
+ final_response = f"{final_response.strip()}\n\n⚠️ Shell command skipped for safety."
2477
+
2478
+ # Create analysis prompt only if we actually executed and have output
2479
+ if execution_results.get("success") and isinstance(execution_results.get("output"), str):
2480
+ truncated_output = execution_results["output"]
2481
+ truncated_flag = False
2482
+ if len(truncated_output) > 1000:
2483
+ truncated_output = truncated_output[:1000]
2484
+ truncated_flag = True
2485
+
2486
+ summarised_text, summary_tokens = self._summarize_command_output(
2487
+ request,
2488
+ command,
2489
+ truncated_output,
2490
+ response_text
2491
+ )
2492
+
2493
+ final_response = summarised_text
2494
+ if truncated_flag:
2495
+ final_response += "\n\n(Output truncated to first 1000 characters.)"
2496
+ if summary_tokens:
2497
+ self._charge_tokens(request.user_id, summary_tokens)
2498
+ tokens_used += summary_tokens
2499
+ else:
2500
+ final_response = response_text
2501
+
2502
+ footer = self._format_data_sources_footer()
2503
+ if footer:
2504
+ final_response = f"{final_response}\n\n_{footer}_"
2505
+
2506
+ # Update conversation history
2507
+ self.conversation_history.append({"role": "user", "content": request.question})
2508
+ self.conversation_history.append({"role": "assistant", "content": final_response})
2509
+
2510
+ # Update memory
2511
+ self._update_memory(
2512
+ request.user_id,
2513
+ request.conversation_id,
2514
+ f"Q: {request.question[:100]}... A: {final_response[:100]}..."
2515
+ )
2516
+
2517
+ return ChatResponse(
2518
+ response=final_response,
2519
+ tools_used=tools_used,
2520
+ reasoning_steps=[f"Request type: {request_analysis['type']}", f"APIs used: {request_analysis['apis']}"],
2521
+ timestamp=datetime.now().isoformat(),
2522
+ tokens_used=tokens_used,
2523
+ confidence_score=request_analysis['confidence'],
2524
+ execution_results=execution_results,
2525
+ api_results=api_results
2526
+ )
2527
+
2528
+ except Exception as e:
2529
+ details = str(e)
2530
+ message = (
2531
+ "⚠️ Something went wrong while orchestrating your request, but no actions were performed. "
2532
+ "Please retry, and if the issue persists share this detail with the team: {details}."
2533
+ ).format(details=details)
2534
+ return ChatResponse(
2535
+ response=message,
2536
+ timestamp=datetime.now().isoformat(),
2537
+ confidence_score=0.0,
2538
+ error_message=details
2539
+ )
2540
+
2541
+ async def process_request_streaming(self, request: ChatRequest):
2542
+ """
2543
+ Process request with streaming response from Groq API
2544
+ Returns a Groq stream object that yields chunks as they arrive
2545
+
2546
+ This enables real-time character-by-character streaming in the UI
2547
+ """
2548
+ # PRODUCTION MODE: Backend doesn't support streaming yet, use regular response
2549
+ if self.client is None:
2550
+ response = await self.call_backend_query(request.question, self.conversation_history[-10:])
2551
+ async def single_yield():
2552
+ yield response.response
2553
+ return single_yield()
2554
+
2555
+ # DEV MODE ONLY
2556
+ try:
2557
+ # Quick budget checks
2558
+ if not self._check_query_budget(request.user_id):
2559
+ effective_limit = self.daily_query_limit if self.daily_query_limit > 0 else self.per_user_query_limit
2560
+ if effective_limit <= 0:
2561
+ effective_limit = 25
2562
+ error_msg = (
2563
+ f"Daily query limit reached. You've hit the {effective_limit} request cap for today. "
2564
+ "Try again tomorrow or reach out if you need the limit raised."
2565
+ )
2566
+ async def error_gen():
2567
+ yield error_msg
2568
+ return error_gen()
2569
+
2570
+ self._record_query_usage(request.user_id)
2571
+
2572
+ # Analyze request
2573
+ request_analysis = await self._analyze_request_type(request.question)
2574
+ question_lower = request.question.lower()
2575
+ self._reset_data_sources()
2576
+
2577
+ # Direct shell commands (non-streaming fallback)
2578
+ direct_shell = re.match(r"^(?:run|execute)\s*:?\s*(.+)$", request.question.strip(), re.IGNORECASE)
2579
+ if direct_shell:
2580
+ result = self._respond_with_shell_command(request, direct_shell.group(1).strip())
2581
+ async def shell_gen():
2582
+ yield result.response
2583
+ return shell_gen()
2584
+
2585
+ # Memory context
2586
+ memory_context = self._get_memory_context(request.user_id, request.conversation_id)
2587
+
2588
+ # Quick greetings (non-streaming)
2589
+ if self._is_simple_greeting(request.question):
2590
+ async def greeting_gen():
2591
+ yield "Hi there! I'm up and ready whenever you want to dig into finance or research."
2592
+ return greeting_gen()
2593
+
2594
+ if self._is_casual_acknowledgment(request.question):
2595
+ async def ack_gen():
2596
+ yield "Happy to help! Feel free to fire off another question whenever you're ready."
2597
+ return ack_gen()
2598
+
2599
+ # Gather API results (same logic as process_request but abbreviated)
2600
+ api_results = {}
2601
+ tools_used = []
2602
+
2603
+ # File preview
2604
+ def _extract_filenames(text: str) -> List[str]:
2605
+ patterns = [
2606
+ r"[\w\-./]+\.(?:py|md|txt|json|csv|yml|yaml|toml|ini|ts|tsx|js|ipynb)",
2607
+ r"(?:\./|/)?[\w\-./]+/"
2608
+ ]
2609
+ matches: List[str] = []
2610
+ for pat in patterns:
2611
+ matches.extend(re.findall(pat, text))
2612
+ uniq = []
2613
+ for m in matches:
2614
+ if len(m) <= 256 and m not in uniq:
2615
+ uniq.append(m)
2616
+ return uniq[:5]
2617
+
2618
+ mentioned = _extract_filenames(request.question)
2619
+ file_previews: List[Dict[str, Any]] = []
2620
+ files_forbidden: List[str] = []
2621
+ base_dir = Path.cwd().resolve()
2622
+ sensitive_roots = {Path('/etc'), Path('/proc'), Path('/sys'), Path('/dev'), Path('/root'), Path('/usr'), Path('/bin'), Path('/sbin'), Path('/var')}
2623
+
2624
+ def _is_safe_path(path_str: str) -> bool:
2625
+ try:
2626
+ rp = Path(path_str).resolve()
2627
+ if any(str(rp).startswith(str(sr)) for sr in sensitive_roots):
2628
+ return False
2629
+ return str(rp).startswith(str(base_dir))
2630
+ except Exception:
2631
+ return False
2632
+
2633
+ for m in mentioned:
2634
+ if not _is_safe_path(m):
2635
+ files_forbidden.append(m)
2636
+ continue
2637
+ pr = await self._preview_file(m)
2638
+ if pr:
2639
+ file_previews.append(pr)
2640
+
2641
+ if file_previews:
2642
+ api_results["files"] = file_previews
2643
+ text_previews = [fp for fp in file_previews if fp.get("type") == "text" and fp.get("preview")]
2644
+ files_context = ""
2645
+ if text_previews:
2646
+ fp = text_previews[0]
2647
+ quoted = "\n".join(fp["preview"].splitlines()[:20])
2648
+ files_context = f"File: {fp['path']} (first lines)\n" + quoted
2649
+ api_results["files_context"] = files_context
2650
+ elif mentioned:
2651
+ api_results["files_missing"] = mentioned
2652
+ if files_forbidden:
2653
+ api_results["files_forbidden"] = files_forbidden
2654
+
2655
+ # Workspace listing
2656
+ workspace_listing: Optional[Dict[str, Any]] = None
2657
+ if not file_previews:
2658
+ file_browse_keywords = ("list files", "show files", "what files")
2659
+ describe_files = ("file" in question_lower or "directory" in question_lower)
2660
+ if any(keyword in question_lower for keyword in file_browse_keywords) or describe_files:
2661
+ workspace_listing = await self._get_workspace_listing()
2662
+ api_results["workspace_listing"] = workspace_listing
2663
+
2664
+ if workspace_listing and set(request_analysis.get("apis", [])) <= {"shell"}:
2665
+ result = self._respond_with_workspace_listing(request, workspace_listing)
2666
+ async def workspace_gen():
2667
+ yield result.response
2668
+ return workspace_gen()
2669
+
2670
+ # FinSight API (abbreviated)
2671
+ if "finsight" in request_analysis["apis"]:
2672
+ tickers = self._extract_tickers_from_text(request.question)
2673
+ financial_payload = {}
2674
+
2675
+ if not tickers:
2676
+ if "apple" in question_lower:
2677
+ tickers = ["AAPL"]
2678
+ if "microsoft" in question_lower:
2679
+ tickers = ["MSFT"] if not tickers else tickers + ["MSFT"]
2680
+
2681
+ metrics_to_fetch = ["revenue", "grossProfit"]
2682
+ if any(kw in question_lower for kw in ["revenue", "sales"]):
2683
+ metrics_to_fetch = ["revenue"]
2684
+ if any(kw in question_lower for kw in ["profit", "margin"]):
2685
+ metrics_to_fetch.append("grossProfit")
2686
+
2687
+ for t in tickers[:2]:
2688
+ result = await self.get_financial_metrics(t, metrics_to_fetch)
2689
+ financial_payload[t] = result
2690
+
2691
+ if financial_payload:
2692
+ api_results["financial"] = financial_payload
2693
+ tools_used.append("finsight_api")
2694
+
2695
+ # Archive API (abbreviated)
2696
+ if "archive" in request_analysis["apis"]:
2697
+ result = await self.search_academic_papers(request.question, 5)
2698
+ if "error" not in result:
2699
+ api_results["research"] = result
2700
+ else:
2701
+ api_results["research"] = {"error": result["error"]}
2702
+ tools_used.append("archive_api")
2703
+
2704
+ # Build messages
2705
+ system_prompt = self._build_system_prompt(request_analysis, memory_context, api_results)
2706
+ messages = [{"role": "system", "content": system_prompt}]
2707
+
2708
+ fc = api_results.get("files_context")
2709
+ if fc:
2710
+ messages.append({"role": "system", "content": f"Grounding from mentioned file(s):\n{fc}"})
2711
+
2712
+ # Add conversation history (abbreviated - just recent)
2713
+ if len(self.conversation_history) > 6:
2714
+ messages.extend(self.conversation_history[-6:])
2715
+ else:
2716
+ messages.extend(self.conversation_history)
2717
+
2718
+ messages.append({"role": "user", "content": request.question})
2719
+
2720
+ # Model selection
2721
+ model_config = self._select_model(request, request_analysis, api_results)
2722
+ target_model = model_config["model"]
2723
+ max_completion_tokens = model_config["max_tokens"]
2724
+ temperature = model_config["temperature"]
2725
+
2726
+ # Token budget check
2727
+ estimated_tokens = (len(str(messages)) // 4) + max_completion_tokens
2728
+ if not self._check_token_budget(estimated_tokens):
2729
+ async def budget_gen():
2730
+ yield "⚠️ Daily Groq token budget exhausted. Please try again tomorrow."
2731
+ return budget_gen()
2732
+
2733
+ if not self._ensure_client_ready():
2734
+ async def no_key_gen():
2735
+ yield "⚠️ No available Groq API key."
2736
+ return no_key_gen()
2737
+
2738
+ # **STREAMING: Call Groq with stream=True**
2739
+ try:
2740
+ stream = self.client.chat.completions.create(
2741
+ model=target_model,
2742
+ messages=messages,
2743
+ max_tokens=max_completion_tokens,
2744
+ temperature=temperature,
2745
+ stream=True # Enable streaming!
2746
+ )
2747
+
2748
+ # Update conversation history (add user message now, assistant message will be added after streaming completes)
2749
+ self.conversation_history.append({"role": "user", "content": request.question})
2750
+
2751
+ # Return the stream directly - groq_stream_to_generator() in streaming_ui.py will handle it
2752
+ return stream
2753
+
2754
+ except Exception as e:
2755
+ if self._is_rate_limit_error(e):
2756
+ self._mark_current_key_exhausted(str(e))
2757
+ if self._rotate_to_next_available_key():
2758
+ try:
2759
+ stream = self.client.chat.completions.create(
2760
+ model=target_model,
2761
+ messages=messages,
2762
+ max_tokens=max_completion_tokens,
2763
+ temperature=temperature,
2764
+ stream=True
2765
+ )
2766
+ self.conversation_history.append({"role": "user", "content": request.question})
2767
+ return stream
2768
+ except:
2769
+ pass
2770
+ async def error_gen():
2771
+ yield f"⚠️ Groq API error: {str(e)}"
2772
+ return error_gen()
2773
+
2774
+ except Exception as e:
2775
+ async def exception_gen():
2776
+ yield f"⚠️ Request failed: {str(e)}"
2777
+ return exception_gen()
2778
+
2779
+ async def run_interactive(self):
2780
+ """Run interactive chat session"""
2781
+ if not await self.initialize():
2782
+ return
2783
+
2784
+ print("\n" + "="*70)
2785
+ print("🤖 ENHANCED NOCTURNAL AI AGENT")
2786
+ print("="*70)
2787
+ print("Research Assistant with Archive API + FinSight API Integration")
2788
+ print("Type 'quit' to exit")
2789
+ print("="*70)
2790
+
2791
+ while True:
2792
+ try:
2793
+ user_input = input("\n👤 You: ").strip()
2794
+
2795
+ if user_input.lower() in ['quit', 'exit', 'bye']:
2796
+ print("👋 Goodbye!")
2797
+ await self.close()
2798
+ break
2799
+
2800
+ # Process request
2801
+ request = ChatRequest(question=user_input)
2802
+ response = await self.process_request(request)
2803
+
2804
+ print(f"\n🤖 Agent: {response.response}")
2805
+
2806
+ if response.api_results:
2807
+ print(f"📊 API Results: {len(response.api_results)} sources used")
2808
+
2809
+ if response.execution_results:
2810
+ print(f"🔧 Command: {response.execution_results['command']}")
2811
+ print(f"📊 Success: {response.execution_results['success']}")
2812
+
2813
+ print(f"📈 Tokens used: {response.tokens_used}")
2814
+ print(f"🎯 Confidence: {response.confidence_score:.2f}")
2815
+ print(f"🛠️ Tools used: {', '.join(response.tools_used) if response.tools_used else 'None'}")
2816
+
2817
+ except KeyboardInterrupt:
2818
+ print("\n👋 Goodbye!")
2819
+ await self.close()
2820
+ break
2821
+ except Exception as e:
2822
+ print(f"\n❌ Error: {e}")
2823
+
2824
+ async def main():
2825
+ """Main entry point"""
2826
+ agent = EnhancedNocturnalAgent()
2827
+ await agent.run_interactive()
2828
+
2829
+ if __name__ == "__main__":
2830
+ asyncio.run(main())