cite-agent 1.3.6__py3-none-any.whl → 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cite-agent might be problematic. Click here for more details.

Files changed (36) hide show
  1. cite_agent/__version__.py +1 -1
  2. cite_agent/cli.py +9 -2
  3. cite_agent/enhanced_ai_agent.py +332 -73
  4. {cite_agent-1.3.6.dist-info → cite_agent-1.3.7.dist-info}/METADATA +1 -1
  5. cite_agent-1.3.7.dist-info/RECORD +31 -0
  6. {cite_agent-1.3.6.dist-info → cite_agent-1.3.7.dist-info}/top_level.txt +0 -1
  7. cite_agent-1.3.6.dist-info/RECORD +0 -57
  8. src/__init__.py +0 -1
  9. src/services/__init__.py +0 -132
  10. src/services/auth_service/__init__.py +0 -3
  11. src/services/auth_service/auth_manager.py +0 -33
  12. src/services/graph/__init__.py +0 -1
  13. src/services/graph/knowledge_graph.py +0 -194
  14. src/services/llm_service/__init__.py +0 -5
  15. src/services/llm_service/llm_manager.py +0 -495
  16. src/services/paper_service/__init__.py +0 -5
  17. src/services/paper_service/openalex.py +0 -231
  18. src/services/performance_service/__init__.py +0 -1
  19. src/services/performance_service/rust_performance.py +0 -395
  20. src/services/research_service/__init__.py +0 -23
  21. src/services/research_service/chatbot.py +0 -2056
  22. src/services/research_service/citation_manager.py +0 -436
  23. src/services/research_service/context_manager.py +0 -1441
  24. src/services/research_service/conversation_manager.py +0 -597
  25. src/services/research_service/critical_paper_detector.py +0 -577
  26. src/services/research_service/enhanced_research.py +0 -121
  27. src/services/research_service/enhanced_synthesizer.py +0 -375
  28. src/services/research_service/query_generator.py +0 -777
  29. src/services/research_service/synthesizer.py +0 -1273
  30. src/services/search_service/__init__.py +0 -5
  31. src/services/search_service/indexer.py +0 -186
  32. src/services/search_service/search_engine.py +0 -342
  33. src/services/simple_enhanced_main.py +0 -287
  34. {cite_agent-1.3.6.dist-info → cite_agent-1.3.7.dist-info}/WHEEL +0 -0
  35. {cite_agent-1.3.6.dist-info → cite_agent-1.3.7.dist-info}/entry_points.txt +0 -0
  36. {cite_agent-1.3.6.dist-info → cite_agent-1.3.7.dist-info}/licenses/LICENSE +0 -0
cite_agent/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.3.6"
1
+ __version__ = "1.3.7"
cite_agent/cli.py CHANGED
@@ -320,9 +320,16 @@ class NocturnalCLI:
320
320
 
321
321
  response = await self.agent.process_request(request)
322
322
 
323
- # Print response with proper formatting
323
+ # Print response with typing effect for natural feel
324
324
  self.console.print("[bold violet]🤖 Agent[/]: ", end="", highlight=False)
325
- self.console.print(response.response)
325
+
326
+ # Character-by-character streaming (like ChatGPT)
327
+ import time
328
+ for char in response.response:
329
+ self.console.print(char, end="", style="white")
330
+ time.sleep(0.008) # 8ms per character (~125 chars/sec)
331
+
332
+ self.console.print() # Newline after response
326
333
 
327
334
  # Save to history automatically
328
335
  self.workflow.save_query_result(
@@ -89,6 +89,15 @@ class EnhancedNocturnalAgent:
89
89
  from .workflow import WorkflowManager
90
90
  self.workflow = WorkflowManager()
91
91
  self.last_paper_result = None # Track last paper mentioned for "save that"
92
+
93
+ # File context tracking (for pronoun resolution and multi-turn)
94
+ self.file_context = {
95
+ 'last_file': None, # Last file mentioned/read
96
+ 'last_directory': None, # Last directory mentioned/navigated
97
+ 'recent_files': [], # Last 5 files (for "those files")
98
+ 'recent_dirs': [], # Last 5 directories
99
+ 'current_cwd': None, # Track shell's current directory
100
+ }
92
101
  try:
93
102
  self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
94
103
  except (TypeError, ValueError):
@@ -1950,14 +1959,17 @@ class EnhancedNocturnalAgent:
1950
1959
  url = f"{self.finsight_base_url}/{endpoint}"
1951
1960
  # Start fresh with headers - don't use _default_headers which might be wrong
1952
1961
  headers = {}
1953
-
1962
+
1954
1963
  # Always use demo key for FinSight (SEC data is public)
1955
1964
  headers["X-API-Key"] = "demo-key-123"
1956
-
1965
+
1966
+ # Mark request as agent-mediated for product separation
1967
+ headers["X-Request-Source"] = "agent"
1968
+
1957
1969
  # Also add JWT if we have it
1958
1970
  if self.auth_token:
1959
1971
  headers["Authorization"] = f"Bearer {self.auth_token}"
1960
-
1972
+
1961
1973
  debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
1962
1974
  if debug_mode:
1963
1975
  print(f"🔍 FinSight headers: {list(headers.keys())}, X-API-Key={headers.get('X-API-Key')}")
@@ -2183,36 +2195,80 @@ class EnhancedNocturnalAgent:
2183
2195
  except Exception as e:
2184
2196
  return f"ERROR: {e}"
2185
2197
 
2186
- def _is_safe_shell_command(self, cmd: str) -> bool:
2198
+ def _classify_command_safety(self, cmd: str) -> str:
2187
2199
  """
2188
- Minimal safety check - only block truly catastrophic commands.
2189
- Philosophy: This is the user's machine. They can do anything in terminal anyway.
2190
- We only block commands that could cause immediate, irreversible system damage.
2200
+ Classify command by safety level for smart execution.
2201
+ Returns: 'SAFE', 'WRITE', 'DANGEROUS', or 'BLOCKED'
2191
2202
  """
2192
2203
  cmd = cmd.strip()
2193
2204
  if not cmd:
2194
- return False
2195
-
2196
- # Block ONLY truly catastrophic commands
2205
+ return 'BLOCKED'
2206
+
2207
+ cmd_lower = cmd.lower()
2208
+ cmd_parts = cmd.split()
2209
+ cmd_base = cmd_parts[0] if cmd_parts else ''
2210
+ cmd_with_sub = ' '.join(cmd_parts[:2]) if len(cmd_parts) >= 2 else ''
2211
+
2212
+ # BLOCKED: Catastrophic commands
2197
2213
  nuclear_patterns = [
2198
- 'rm -rf /', # Wipe root filesystem
2199
- 'rm -rf ~/*', # Wipe home directory
2200
- 'dd if=/dev/zero of=/dev/sda', # Wipe disk
2201
- 'dd if=/dev/zero of=/dev/hda',
2202
- 'mkfs', # Format filesystem
2203
- 'fdisk', # Partition disk
2214
+ 'rm -rf /',
2215
+ 'rm -rf ~',
2216
+ 'rm -rf /*',
2217
+ 'dd if=/dev/zero',
2218
+ 'mkfs',
2219
+ 'fdisk',
2204
2220
  ':(){ :|:& };:', # Fork bomb
2205
- 'chmod -R 777 /', # Make everything executable
2221
+ 'chmod -r 777 /',
2222
+ '> /dev/sda',
2206
2223
  ]
2207
-
2208
- cmd_lower = cmd.lower()
2209
2224
  for pattern in nuclear_patterns:
2210
- if pattern.lower() in cmd_lower:
2211
- return False
2212
-
2213
- # Allow everything else - pip, npm, git, pipes, redirection, etc.
2214
- # User asked for it, user gets it. Just like Cursor.
2215
- return True
2225
+ if pattern in cmd_lower:
2226
+ return 'BLOCKED'
2227
+
2228
+ # SAFE: Read-only commands
2229
+ safe_commands = {
2230
+ 'pwd', 'ls', 'cd', 'cat', 'head', 'tail', 'grep', 'find', 'which', 'type',
2231
+ 'wc', 'diff', 'echo', 'ps', 'top', 'df', 'du', 'file', 'stat', 'tree',
2232
+ 'whoami', 'hostname', 'date', 'cal', 'uptime', 'printenv', 'env',
2233
+ }
2234
+ safe_git = {'git status', 'git log', 'git diff', 'git branch', 'git show', 'git remote'}
2235
+
2236
+ if cmd_base in safe_commands or cmd_with_sub in safe_git:
2237
+ return 'SAFE'
2238
+
2239
+ # WRITE: File creation/modification (allowed but tracked)
2240
+ write_commands = {'mkdir', 'touch', 'cp', 'mv', 'tee'}
2241
+ if cmd_base in write_commands:
2242
+ return 'WRITE'
2243
+
2244
+ # WRITE: Redirection operations (echo > file, cat > file)
2245
+ if '>' in cmd or '>>' in cmd:
2246
+ # Allow redirection to regular files, block to devices
2247
+ if '/dev/' not in cmd_lower:
2248
+ return 'WRITE'
2249
+ else:
2250
+ return 'BLOCKED'
2251
+
2252
+ # DANGEROUS: Deletion and permission changes
2253
+ dangerous_commands = {'rm', 'rmdir', 'chmod', 'chown', 'chgrp'}
2254
+ if cmd_base in dangerous_commands:
2255
+ return 'DANGEROUS'
2256
+
2257
+ # WRITE: Git write operations
2258
+ write_git = {'git add', 'git commit', 'git push', 'git pull', 'git checkout', 'git merge'}
2259
+ if cmd_with_sub in write_git:
2260
+ return 'WRITE'
2261
+
2262
+ # Default: Treat unknown commands as requiring user awareness
2263
+ return 'WRITE'
2264
+
2265
+ def _is_safe_shell_command(self, cmd: str) -> bool:
2266
+ """
2267
+ Compatibility wrapper for old safety check.
2268
+ Now uses tiered classification system.
2269
+ """
2270
+ classification = self._classify_command_safety(cmd)
2271
+ return classification in ['SAFE', 'WRITE'] # Allow SAFE and WRITE, block DANGEROUS and BLOCKED
2216
2272
 
2217
2273
  def _check_token_budget(self, estimated_tokens: int) -> bool:
2218
2274
  """Check if we have enough token budget"""
@@ -2450,12 +2506,42 @@ class EnhancedNocturnalAgent:
2450
2506
  async def _analyze_request_type(self, question: str) -> Dict[str, Any]:
2451
2507
  """Analyze what type of request this is and what APIs to use"""
2452
2508
 
2453
- # Financial indicators
2509
+ # Financial indicators - COMPREHENSIVE list to ensure FinSight is used
2454
2510
  financial_keywords = [
2455
- 'financial', 'revenue', 'profit', 'earnings', 'stock', 'market',
2456
- 'ticker', 'company', 'balance sheet', 'income statement', 'cash flow',
2457
- 'valuation', 'pe ratio', 'debt', 'equity', 'dividend', 'growth',
2458
- 'ceo', 'earnings call', 'quarterly', 'annual report'
2511
+ # Core metrics
2512
+ 'financial', 'revenue', 'sales', 'income', 'profit', 'earnings', 'loss',
2513
+ 'net income', 'operating income', 'gross profit', 'ebitda', 'ebit',
2514
+
2515
+ # Margins & Ratios
2516
+ 'margin', 'gross margin', 'profit margin', 'operating margin', 'net margin', 'ebitda margin',
2517
+ 'ratio', 'current ratio', 'quick ratio', 'debt ratio', 'pe ratio', 'p/e',
2518
+ 'roe', 'roa', 'roic', 'roce', 'eps',
2519
+
2520
+ # Balance Sheet
2521
+ 'assets', 'liabilities', 'equity', 'debt', 'cash', 'capital',
2522
+ 'balance sheet', 'total assets', 'current assets', 'fixed assets',
2523
+ 'shareholders equity', 'stockholders equity', 'retained earnings',
2524
+
2525
+ # Cash Flow
2526
+ 'cash flow', 'fcf', 'free cash flow', 'operating cash flow',
2527
+ 'cfo', 'cfi', 'cff', 'capex', 'capital expenditure',
2528
+
2529
+ # Market Metrics
2530
+ 'stock', 'market cap', 'market capitalization', 'enterprise value',
2531
+ 'valuation', 'price', 'share price', 'stock price', 'quote',
2532
+ 'volume', 'trading volume', 'shares outstanding',
2533
+
2534
+ # Financial Statements
2535
+ 'income statement', '10-k', '10-q', '8-k', 'filing', 'sec filing',
2536
+ 'quarterly', 'annual report', 'earnings report', 'financial statement',
2537
+
2538
+ # Company Info
2539
+ 'ticker', 'company', 'corporation', 'ceo', 'earnings call',
2540
+ 'dividend', 'dividend yield', 'payout ratio',
2541
+
2542
+ # Growth & Performance
2543
+ 'growth', 'yoy', 'year over year', 'qoq', 'quarter over quarter',
2544
+ 'cagr', 'trend', 'performance', 'returns'
2459
2545
  ]
2460
2546
 
2461
2547
  # Research indicators (quantitative)
@@ -2664,40 +2750,68 @@ class EnhancedNocturnalAgent:
2664
2750
  # Quick check if query might need shell
2665
2751
  question_lower = request.question.lower()
2666
2752
  might_need_shell = any(word in question_lower for word in [
2667
- 'directory', 'folder', 'where', 'find', 'list', 'files', 'look', 'search', 'check', 'into',
2668
- 'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb'
2753
+ 'directory', 'folder', 'where', 'find', 'list', 'files', 'file', 'look', 'search', 'check', 'into',
2754
+ 'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb',
2755
+ 'create', 'make', 'mkdir', 'touch', 'new', 'write', 'copy', 'move', 'delete', 'remove',
2756
+ 'git', 'grep', 'navigate', 'go to', 'change to'
2669
2757
  ])
2670
2758
 
2671
2759
  if might_need_shell and self.shell_session:
2760
+ # Get current directory and context for intelligent planning
2761
+ try:
2762
+ current_dir = self.execute_command("pwd").strip()
2763
+ self.file_context['current_cwd'] = current_dir
2764
+ except:
2765
+ current_dir = "~"
2766
+
2767
+ last_file = self.file_context.get('last_file') or 'None'
2768
+ last_dir = self.file_context.get('last_directory') or 'None'
2769
+
2672
2770
  # Ask LLM planner: What shell command should we run?
2673
- planner_prompt = f"""You are a shell command planner. Determine what shell command to run.
2771
+ planner_prompt = f"""You are a shell command planner. Determine what shell command to run, if any.
2674
2772
 
2675
2773
  User query: "{request.question}"
2676
2774
  Previous conversation: {json.dumps(self.conversation_history[-2:]) if self.conversation_history else "None"}
2775
+ Current directory: {current_dir}
2776
+ Last file mentioned: {last_file}
2777
+ Last directory mentioned: {last_dir}
2677
2778
 
2678
2779
  Respond ONLY with JSON:
2679
2780
  {{
2680
- "action": "pwd|ls|find|read_file|none",
2681
- "search_target": "cm522" (if find),
2682
- "search_path": "~/Downloads" (if find),
2683
- "target_path": "/full/path" (if ls on previous result),
2684
- "file_path": "/full/path/to/file.R" (if read_file)
2781
+ "action": "execute|none",
2782
+ "command": "pwd" (the actual shell command to run, if action=execute),
2783
+ "reason": "Show current directory" (why this command is needed),
2784
+ "updates_context": true (set to true if command changes files/directories)
2685
2785
  }}
2686
2786
 
2787
+ IMPORTANT RULES:
2788
+ 1. Return "none" for conversational queries ("hello", "test", "thanks", "how are you")
2789
+ 2. Return "none" when query is ambiguous without more context
2790
+ 3. Return "none" for questions about data that don't need shell (e.g., "Tesla revenue", "Apple stock price")
2791
+ 4. Use ACTUAL shell commands (pwd, ls, cd, mkdir, cat, grep, find, touch, etc.)
2792
+ 5. Resolve pronouns using context: "it"={last_file}, "there"/{last_dir}
2793
+ 6. For reading files, prefer: head -100 filename (shows first 100 lines)
2794
+ 7. For finding things, use: find ~ -maxdepth 4 -name '*pattern*' 2>/dev/null
2795
+ 8. For creating files: touch filename OR echo "content" > filename
2796
+ 9. For creating directories: mkdir dirname
2797
+ 10. ALWAYS include 2>/dev/null to suppress errors from find
2798
+
2687
2799
  Examples:
2688
- "where am i?" → {{"action": "pwd"}}
2689
- "what files here?" → {{"action": "ls"}}
2690
- "find cm522" → {{"action": "find", "search_target": "cm522"}}
2691
- "look into it" + Previous: "Found /path" {{"action": "ls", "target_path": "/path"}}
2692
- "show me calculate_betas.R" → {{"action": "read_file", "file_path": "calculate_betas.R"}}
2693
- "open regression.R" → {{"action": "read_file", "file_path": "regression.R"}}
2694
- "read that file" + Previous: "regression.R" → {{"action": "read_file", "file_path": "regression.R"}}
2695
- "display analysis.py" → {{"action": "read_file", "file_path": "analysis.py"}}
2696
- "cat data.csv" → {{"action": "read_file", "file_path": "data.csv"}}
2697
- "what columns does it have?" + Previous: file was shown → {{"action": "none"}} (LLM will parse from conversation)
2698
- "Tesla revenue" → {{"action": "none"}}
2699
-
2700
- KEY: If query mentions a specific FILENAME (*.R, *.py, *.csv), use read_file, NOT find!
2800
+ "where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
2801
+ "list files" → {{"action": "execute", "command": "ls -lah", "reason": "List all files with details", "updates_context": false}}
2802
+ "find cm522" → {{"action": "execute", "command": "find ~ -maxdepth 4 -name '*cm522*' -type d 2>/dev/null | head -20", "reason": "Search for cm522 directory", "updates_context": false}}
2803
+ "go to Downloads" {{"action": "execute", "command": "cd ~/Downloads && pwd", "reason": "Navigate to Downloads directory", "updates_context": true}}
2804
+ "show me calc.R" → {{"action": "execute", "command": "head -100 calc.R", "reason": "Display file contents", "updates_context": true}}
2805
+ "create test directory" → {{"action": "execute", "command": "mkdir test && echo 'Created test/'", "reason": "Create new directory", "updates_context": true}}
2806
+ "create empty config.json" → {{"action": "execute", "command": "touch config.json && echo 'Created config.json'", "reason": "Create empty file", "updates_context": true}}
2807
+ "search for TODO in py files" → {{"action": "execute", "command": "grep -n 'TODO' *.py 2>/dev/null", "reason": "Find TODO comments", "updates_context": false}}
2808
+ "git status" → {{"action": "execute", "command": "git status", "reason": "Check repository status", "updates_context": false}}
2809
+ "what's in that file?" + last_file=data.csv → {{"action": "execute", "command": "head -100 data.csv", "reason": "Show file contents", "updates_context": false}}
2810
+ "hello" → {{"action": "none", "reason": "Conversational greeting, no command needed"}}
2811
+ "test" → {{"action": "none", "reason": "Ambiguous query, needs clarification"}}
2812
+ "thanks" {{"action": "none", "reason": "Conversational acknowledgment"}}
2813
+ "Tesla revenue" → {{"action": "none", "reason": "Finance query, will use FinSight API not shell"}}
2814
+ "what does the error mean?" → {{"action": "none", "reason": "Explanation request, no command needed"}}
2701
2815
 
2702
2816
  JSON:"""
2703
2817
 
@@ -2715,17 +2829,82 @@ JSON:"""
2715
2829
 
2716
2830
  plan = json.loads(plan_text)
2717
2831
  shell_action = plan.get("action", "none")
2832
+ command = plan.get("command", "")
2833
+ reason = plan.get("reason", "")
2834
+ updates_context = plan.get("updates_context", False)
2718
2835
 
2719
2836
  if debug_mode:
2720
2837
  print(f"🔍 SHELL PLAN: {plan}")
2721
2838
 
2722
- # Execute shell command based on plan
2723
- if shell_action == "pwd":
2724
- pwd_output = self.execute_command("pwd")
2725
- api_results["shell_info"] = {"current_directory": pwd_output.strip()}
2726
- tools_used.append("shell_execution")
2839
+ # GENERIC COMMAND EXECUTION - No more hardcoded actions!
2840
+ if shell_action == "execute" and command:
2841
+ # Check command safety
2842
+ safety_level = self._classify_command_safety(command)
2843
+
2844
+ if debug_mode:
2845
+ print(f"🔍 Command: {command}")
2846
+ print(f"🔍 Safety: {safety_level}")
2847
+
2848
+ if safety_level == 'BLOCKED':
2849
+ api_results["shell_info"] = {
2850
+ "error": f"Command blocked for safety: {command}",
2851
+ "reason": "This command could cause system damage"
2852
+ }
2853
+ else:
2854
+ # Execute the command
2855
+ output = self.execute_command(command)
2856
+
2857
+ if not output.startswith("ERROR"):
2858
+ # Success - store results
2859
+ api_results["shell_info"] = {
2860
+ "command": command,
2861
+ "output": output,
2862
+ "reason": reason,
2863
+ "safety_level": safety_level
2864
+ }
2865
+ tools_used.append("shell_execution")
2866
+
2867
+ # Update file context if needed
2868
+ if updates_context:
2869
+ import re
2870
+ # Extract file paths from command
2871
+ file_patterns = r'([a-zA-Z0-9_\-./]+\.(py|r|csv|txt|json|md|ipynb|rmd))'
2872
+ files_mentioned = re.findall(file_patterns, command, re.IGNORECASE)
2873
+ if files_mentioned:
2874
+ file_path = files_mentioned[0][0]
2875
+ self.file_context['last_file'] = file_path
2876
+ if file_path not in self.file_context['recent_files']:
2877
+ self.file_context['recent_files'].append(file_path)
2878
+ self.file_context['recent_files'] = self.file_context['recent_files'][-5:] # Keep last 5
2879
+
2880
+ # Extract directory paths
2881
+ dir_patterns = r'cd\s+([^\s&|;]+)|mkdir\s+([^\s&|;]+)'
2882
+ dirs_mentioned = re.findall(dir_patterns, command)
2883
+ if dirs_mentioned:
2884
+ for dir_tuple in dirs_mentioned:
2885
+ dir_path = dir_tuple[0] or dir_tuple[1]
2886
+ if dir_path:
2887
+ self.file_context['last_directory'] = dir_path
2888
+ if dir_path not in self.file_context['recent_dirs']:
2889
+ self.file_context['recent_dirs'].append(dir_path)
2890
+ self.file_context['recent_dirs'] = self.file_context['recent_dirs'][-5:] # Keep last 5
2891
+
2892
+ # If cd command, update current_cwd
2893
+ if command.startswith('cd '):
2894
+ try:
2895
+ new_cwd = self.execute_command("pwd").strip()
2896
+ self.file_context['current_cwd'] = new_cwd
2897
+ except:
2898
+ pass
2899
+ else:
2900
+ # Command failed
2901
+ api_results["shell_info"] = {
2902
+ "error": output,
2903
+ "command": command
2904
+ }
2727
2905
 
2728
- elif shell_action == "ls":
2906
+ # Backwards compatibility: support old hardcoded actions if LLM still returns them
2907
+ elif shell_action == "pwd":
2729
2908
  target = plan.get("target_path")
2730
2909
  if target:
2731
2910
  ls_output = self.execute_command(f"ls -lah {target}")
@@ -2757,6 +2936,32 @@ JSON:"""
2757
2936
  }
2758
2937
  tools_used.append("shell_execution")
2759
2938
 
2939
+ elif shell_action == "cd":
2940
+ # NEW: Change directory
2941
+ target = plan.get("target_path")
2942
+ if target:
2943
+ # Expand ~ to home directory
2944
+ if target.startswith("~"):
2945
+ home = os.path.expanduser("~")
2946
+ target = target.replace("~", home, 1)
2947
+
2948
+ # Execute cd command
2949
+ cd_cmd = f"cd {target} && pwd"
2950
+ cd_output = self.execute_command(cd_cmd)
2951
+
2952
+ if not cd_output.startswith("ERROR"):
2953
+ api_results["shell_info"] = {
2954
+ "directory_changed": True,
2955
+ "new_directory": cd_output.strip(),
2956
+ "target_path": target
2957
+ }
2958
+ tools_used.append("shell_execution")
2959
+ else:
2960
+ api_results["shell_info"] = {
2961
+ "directory_changed": False,
2962
+ "error": f"Failed to change to {target}: {cd_output}"
2963
+ }
2964
+
2760
2965
  elif shell_action == "read_file":
2761
2966
  # NEW: Read and inspect file (R, Python, CSV, etc.)
2762
2967
  import re # Import at function level
@@ -2836,6 +3041,14 @@ JSON:"""
2836
3041
  if debug_mode and is_vague:
2837
3042
  print(f"🔍 Query is VAGUE - skipping expensive APIs")
2838
3043
 
3044
+ # If query is vague, hint to backend LLM to ask clarifying questions
3045
+ if is_vague:
3046
+ api_results["query_analysis"] = {
3047
+ "is_vague": True,
3048
+ "suggestion": "Ask clarifying questions instead of guessing",
3049
+ "reason": "Query needs more specificity to provide accurate answer"
3050
+ }
3051
+
2839
3052
  # Skip Archive/FinSight if query is too vague, but still allow web search later
2840
3053
  if not is_vague:
2841
3054
  # Archive API for research
@@ -2914,32 +3127,78 @@ JSON:"""
2914
3127
  # - Shell said "none" (not a directory/file operation)
2915
3128
  # - We don't have enough data from Archive/FinSight
2916
3129
 
2917
- if self.web_search and shell_action == "none":
3130
+ # First check: Is this a conversational query that doesn't need web search?
3131
+ def is_conversational_query(query: str) -> bool:
3132
+ """Detect if query is conversational (greeting, thanks, testing, etc.)"""
3133
+ query_lower = query.lower().strip()
3134
+
3135
+ # Single word queries that are conversational
3136
+ conversational_words = {
3137
+ 'hello', 'hi', 'hey', 'thanks', 'thank', 'ok', 'okay', 'yes', 'no',
3138
+ 'test', 'testing', 'cool', 'nice', 'great', 'awesome', 'perfect',
3139
+ 'bye', 'goodbye', 'quit', 'exit', 'help'
3140
+ }
3141
+
3142
+ # Short conversational phrases
3143
+ conversational_phrases = [
3144
+ 'how are you', 'thank you', 'thanks!', 'ok', 'got it', 'i see',
3145
+ 'makes sense', 'sounds good', 'that works', 'no problem'
3146
+ ]
3147
+
3148
+ words = query_lower.split()
3149
+
3150
+ # Single word check
3151
+ if len(words) == 1 and words[0] in conversational_words:
3152
+ return True
3153
+
3154
+ # Short phrase check
3155
+ if len(words) <= 3 and any(phrase in query_lower for phrase in conversational_phrases):
3156
+ return True
3157
+
3158
+ # Question marks with no content words (just pronouns)
3159
+ if '?' in query_lower and len(words) <= 2:
3160
+ return True
3161
+
3162
+ return False
3163
+
3164
+ skip_web_search = is_conversational_query(request.question)
3165
+
3166
+ if self.web_search and shell_action == "none" and not skip_web_search:
2918
3167
  # Ask LLM: Should we web search for this?
2919
- web_decision_prompt = f"""Should we use web search for this query?
3168
+ web_decision_prompt = f"""You are a tool selection expert. Decide if web search is needed.
2920
3169
 
2921
3170
  User query: "{request.question}"
2922
3171
  Data already available: {list(api_results.keys())}
2923
- Shell action: {shell_action}
3172
+ Tools already used: {tools_used}
3173
+
3174
+ AVAILABLE TOOLS YOU SHOULD KNOW:
3175
+ 1. FinSight API: Company financial data (revenue, income, margins, ratios, cash flow, balance sheet, SEC filings)
3176
+ - Covers: All US public companies (~8,000)
3177
+ - Data: SEC EDGAR + Yahoo Finance
3178
+ - Metrics: 50+ financial KPIs
3179
+
3180
+ 2. Archive API: Academic research papers
3181
+ - Covers: Semantic Scholar, OpenAlex, PubMed
3182
+ - Data: Papers, citations, abstracts
3183
+
3184
+ 3. Web Search: General information, current events
3185
+ - Covers: Anything on the internet
3186
+ - Use for: Market share, industry news, non-financial company info
3187
+
3188
+ DECISION RULES:
3189
+ - If query is about company financials (revenue, profit, margins, etc.) → Check if FinSight already provided data
3190
+ - If FinSight has data in api_results → Web search is NOT needed
3191
+ - If FinSight was called but no data → Web search as fallback is OK
3192
+ - If query is about market share, industry size, trends → Web search (FinSight doesn't have this)
3193
+ - If query is about research papers → Archive handles it, not web
3194
+ - If query is conversational → Already filtered, you won't see these
2924
3195
 
2925
3196
  Respond with JSON:
2926
3197
  {{
2927
3198
  "use_web_search": true/false,
2928
- "reason": "why or why not"
3199
+ "reason": "explain why based on tools available and data already fetched"
2929
3200
  }}
2930
3201
 
2931
- Use web search for:
2932
- - Market share/size (not in SEC filings)
2933
- - Current prices (Bitcoin, commodities, real-time data)
2934
- - Industry data, statistics
2935
- - Recent events, news
2936
- - Questions not answered by existing data
2937
-
2938
- Don't use if:
2939
- - Shell already handled it (pwd/ls/find)
2940
- - Question answered by research/financial APIs
2941
- - Pure opinion question
2942
-
2943
3202
  JSON:"""
2944
3203
 
2945
3204
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cite-agent
3
- Version: 1.3.6
3
+ Version: 1.3.7
4
4
  Summary: Terminal AI assistant for academic research with citation verification
5
5
  Home-page: https://github.com/Spectating101/cite-agent
6
6
  Author: Cite-Agent Team
@@ -0,0 +1,31 @@
1
+ cite_agent/__init__.py,sha256=wAXV2v8nNOmIAd0rh8196ItBl9hHWBVOBl5Re4VB77I,1645
2
+ cite_agent/__main__.py,sha256=6x3lltwG-iZHeQbN12rwvdkPDfd2Rmdk71tOOaC89Mw,179
3
+ cite_agent/__version__.py,sha256=9peaXOar2qezOPJEKG6cD_A0aaXrzdVN8h-v6fBoBEk,22
4
+ cite_agent/account_client.py,sha256=yLuzhIJoIZuXHXGbaVMzDxRATQwcy-wiaLnUrDuwUhI,5725
5
+ cite_agent/agent_backend_only.py,sha256=H4DH4hmKhT0T3rQLAb2xnnJVjxl3pOZaljL9r6JndFY,6314
6
+ cite_agent/ascii_plotting.py,sha256=lk8BaECs6fmjtp4iH12G09-frlRehAN7HLhHt2crers,8570
7
+ cite_agent/auth.py,sha256=YtoGXKwcLkZQbop37iYYL9BzRWBRPlt_D9p71VGViS4,9833
8
+ cite_agent/backend_only_client.py,sha256=WqLF8x7aXTro2Q3ehqKMsdCg53s6fNk9Hy86bGxqmmw,2561
9
+ cite_agent/cli.py,sha256=QO4hmHOeiW_8gxCjos1zk7NV4-joQiLc9LNsv7zCr70,35931
10
+ cite_agent/cli_conversational.py,sha256=RAmgRNRyB8gQ8QLvWU-Tt23j2lmA34rQNT5F3_7SOq0,11141
11
+ cite_agent/cli_enhanced.py,sha256=EAaSw9qtiYRWUXF6_05T19GCXlz9cCSz6n41ASnXIPc,7407
12
+ cite_agent/cli_workflow.py,sha256=4oS_jW9D8ylovXbEFdsyLQONt4o0xxR4Xatfcc4tnBs,11641
13
+ cite_agent/dashboard.py,sha256=VGV5XQU1PnqvTsxfKMcue3j2ri_nvm9Be6O5aVays_w,10502
14
+ cite_agent/enhanced_ai_agent.py,sha256=wyuQu50mZo6jMYZqFD8Bqjk55dFFOBBR28AixchftXY,187083
15
+ cite_agent/project_detector.py,sha256=fPl5cLTy_oyufqrQ7RJ5IRVdofZoPqDRaQXW6tRtBJc,6086
16
+ cite_agent/rate_limiter.py,sha256=-0fXx8Tl4zVB4O28n9ojU2weRo-FBF1cJo9Z5jC2LxQ,10908
17
+ cite_agent/session_manager.py,sha256=B0MXSOsXdhO3DlvTG7S8x6pmGlYEDvIZ-o8TZM23niQ,9444
18
+ cite_agent/setup_config.py,sha256=3m2e3gw0srEWA0OygdRo64r-8HK5ohyXfct0c__CF3s,16817
19
+ cite_agent/streaming_ui.py,sha256=N6TWOo7GVQ_Ynfw73JCfrdGcLIU-PwbS3GbsHQHegmg,7810
20
+ cite_agent/telemetry.py,sha256=55kXdHvI24ZsEkbFtihcjIfJt2oiSXcEpLzTxQ3KCdQ,2916
21
+ cite_agent/ui.py,sha256=r1OAeY3NSeqhAjJYmEBH9CaennBuibFAz1Mur6YF80E,6134
22
+ cite_agent/updater.py,sha256=udoAAN4gBKAvKDV7JTh2FJO_jIhNk9bby4x6n188MEY,8458
23
+ cite_agent/web_search.py,sha256=FZCuNO7MAITiOIbpPbJyt2bzbXPzQla-9amJpnMpW_4,6520
24
+ cite_agent/workflow.py,sha256=a0YC0Mzz4or1C5t2gZcuJBQ0uMOZrooaI8eLu2kkI0k,15086
25
+ cite_agent/workflow_integration.py,sha256=A9ua0DN5pRtuU0cAwrUTGvqt2SXKhEHQbrHx16EGnDM,10910
26
+ cite_agent-1.3.7.dist-info/licenses/LICENSE,sha256=XJkyO4IymhSUniN1ENY6lLrL2729gn_rbRlFK6_Hi9M,1074
27
+ cite_agent-1.3.7.dist-info/METADATA,sha256=YRov18tZiDcm3tnPS7nIPoD8Ruq1wFghllHcqy6mTKc,12231
28
+ cite_agent-1.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ cite_agent-1.3.7.dist-info/entry_points.txt,sha256=bJ0u28nFIxQKH1PWQ2ak4PV-FAjhoxTC7YADEdDenFw,83
30
+ cite_agent-1.3.7.dist-info/top_level.txt,sha256=NNfD8pxDZzBK8tjDIpCs2BW9Va-OQ5qUFbEx0SgmyIE,11
31
+ cite_agent-1.3.7.dist-info/RECORD,,