cite-agent 1.3.6__py3-none-any.whl → 1.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cite-agent might be problematic. Click here for more details.

Files changed (36) hide show
  1. cite_agent/__version__.py +1 -1
  2. cite_agent/cli.py +9 -2
  3. cite_agent/enhanced_ai_agent.py +1100 -77
  4. {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/METADATA +1 -1
  5. cite_agent-1.3.8.dist-info/RECORD +31 -0
  6. {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/top_level.txt +0 -1
  7. cite_agent-1.3.6.dist-info/RECORD +0 -57
  8. src/__init__.py +0 -1
  9. src/services/__init__.py +0 -132
  10. src/services/auth_service/__init__.py +0 -3
  11. src/services/auth_service/auth_manager.py +0 -33
  12. src/services/graph/__init__.py +0 -1
  13. src/services/graph/knowledge_graph.py +0 -194
  14. src/services/llm_service/__init__.py +0 -5
  15. src/services/llm_service/llm_manager.py +0 -495
  16. src/services/paper_service/__init__.py +0 -5
  17. src/services/paper_service/openalex.py +0 -231
  18. src/services/performance_service/__init__.py +0 -1
  19. src/services/performance_service/rust_performance.py +0 -395
  20. src/services/research_service/__init__.py +0 -23
  21. src/services/research_service/chatbot.py +0 -2056
  22. src/services/research_service/citation_manager.py +0 -436
  23. src/services/research_service/context_manager.py +0 -1441
  24. src/services/research_service/conversation_manager.py +0 -597
  25. src/services/research_service/critical_paper_detector.py +0 -577
  26. src/services/research_service/enhanced_research.py +0 -121
  27. src/services/research_service/enhanced_synthesizer.py +0 -375
  28. src/services/research_service/query_generator.py +0 -777
  29. src/services/research_service/synthesizer.py +0 -1273
  30. src/services/search_service/__init__.py +0 -5
  31. src/services/search_service/indexer.py +0 -186
  32. src/services/search_service/search_engine.py +0 -342
  33. src/services/simple_enhanced_main.py +0 -287
  34. {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/WHEEL +0 -0
  35. {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/entry_points.txt +0 -0
  36. {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/licenses/LICENSE +0 -0
@@ -89,6 +89,15 @@ class EnhancedNocturnalAgent:
89
89
  from .workflow import WorkflowManager
90
90
  self.workflow = WorkflowManager()
91
91
  self.last_paper_result = None # Track last paper mentioned for "save that"
92
+
93
+ # File context tracking (for pronoun resolution and multi-turn)
94
+ self.file_context = {
95
+ 'last_file': None, # Last file mentioned/read
96
+ 'last_directory': None, # Last directory mentioned/navigated
97
+ 'recent_files': [], # Last 5 files (for "those files")
98
+ 'recent_dirs': [], # Last 5 directories
99
+ 'current_cwd': None, # Track shell's current directory
100
+ }
92
101
  try:
93
102
  self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
94
103
  except (TypeError, ValueError):
@@ -994,7 +1003,17 @@ class EnhancedNocturnalAgent:
994
1003
  capability_lines.append("• You can SEARCH user's paper collection")
995
1004
  capability_lines.append("• You can COPY text to user's clipboard")
996
1005
  capability_lines.append("• User's query history is automatically tracked")
997
-
1006
+
1007
+ # Add file operation capabilities (Claude Code / Cursor parity)
1008
+ capability_lines.append("")
1009
+ capability_lines.append("📁 DIRECT FILE OPERATIONS (Always available):")
1010
+ capability_lines.append("• read_file(path) - Read files with line numbers (like cat but better)")
1011
+ capability_lines.append("• write_file(path, content) - Create/overwrite files directly")
1012
+ capability_lines.append("• edit_file(path, old, new) - Surgical find/replace edits")
1013
+ capability_lines.append("• glob_search(pattern) - Fast file search (e.g., '**/*.py')")
1014
+ capability_lines.append("• grep_search(pattern) - Fast content search in files")
1015
+ capability_lines.append("• batch_edit_files(edits) - Multi-file refactoring")
1016
+
998
1017
  sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
999
1018
 
1000
1019
  # ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
@@ -1089,6 +1108,48 @@ class EnhancedNocturnalAgent:
1089
1108
  "• Example: 'I found 3 papers. I can save them to your library or export to BibTeX if you'd like.'",
1090
1109
  ]
1091
1110
  rules.extend(workflow_rules)
1111
+
1112
+ # Add file operation tool usage rules (CRITICAL for Claude Code parity)
1113
+ file_ops_rules = [
1114
+ "",
1115
+ "📁 FILE OPERATION TOOL USAGE (Use these INSTEAD of shell commands):",
1116
+ "",
1117
+ "🔴 ALWAYS PREFER (in order):",
1118
+ "1. read_file(path) → INSTEAD OF: cat, head, tail",
1119
+ "2. write_file(path, content) → INSTEAD OF: echo >, cat << EOF, printf >",
1120
+ "3. edit_file(path, old, new) → INSTEAD OF: sed, awk",
1121
+ "4. glob_search(pattern, path) → INSTEAD OF: find, ls",
1122
+ "5. grep_search(pattern, path, file_pattern) → INSTEAD OF: grep -r",
1123
+ "",
1124
+ "✅ CORRECT USAGE:",
1125
+ "• Reading code: result = read_file('app.py')",
1126
+ "• Creating file: write_file('config.json', '{...}')",
1127
+ "• Editing code: edit_file('main.py', 'old_var', 'new_var', replace_all=True)",
1128
+ "• Finding files: glob_search('**/*.py', '/home/user/project')",
1129
+ "• Searching code: grep_search('class.*Agent', '.', '*.py', output_mode='content')",
1130
+ "• Multi-file refactor: batch_edit_files([{file: 'a.py', old: '...', new: '...'}, ...])",
1131
+ "",
1132
+ "❌ ANTI-PATTERNS (Don't do these):",
1133
+ "• DON'T use cat when read_file exists",
1134
+ "• DON'T use echo > when write_file exists",
1135
+ "• DON'T use sed when edit_file exists",
1136
+ "• DON'T use find when glob_search exists",
1137
+ "• DON'T use grep -r when grep_search exists",
1138
+ "",
1139
+ "🎯 WHY USE THESE TOOLS:",
1140
+ "• read_file() shows line numbers (critical for code analysis)",
1141
+ "• write_file() handles escaping/quoting automatically (no heredoc hell)",
1142
+ "• edit_file() validates changes before applying (safer than sed)",
1143
+ "• glob_search() is faster and cleaner than find",
1144
+ "• grep_search() returns structured data (easier to parse)",
1145
+ "",
1146
+ "⚠️ SHELL COMMANDS ONLY FOR:",
1147
+ "• System operations (ps, df, du, uptime)",
1148
+ "• Git commands (git status, git diff, git log)",
1149
+ "• Package installs (pip install, Rscript -e \"install.packages(...)\")",
1150
+ "• Running Python/R scripts (python script.py, Rscript analysis.R)",
1151
+ ]
1152
+ rules.extend(file_ops_rules)
1092
1153
 
1093
1154
  sections.append("CRITICAL RULES:\n" + "\n".join(rules))
1094
1155
 
@@ -1950,14 +2011,17 @@ class EnhancedNocturnalAgent:
1950
2011
  url = f"{self.finsight_base_url}/{endpoint}"
1951
2012
  # Start fresh with headers - don't use _default_headers which might be wrong
1952
2013
  headers = {}
1953
-
2014
+
1954
2015
  # Always use demo key for FinSight (SEC data is public)
1955
2016
  headers["X-API-Key"] = "demo-key-123"
1956
-
2017
+
2018
+ # Mark request as agent-mediated for product separation
2019
+ headers["X-Request-Source"] = "agent"
2020
+
1957
2021
  # Also add JWT if we have it
1958
2022
  if self.auth_token:
1959
2023
  headers["Authorization"] = f"Bearer {self.auth_token}"
1960
-
2024
+
1961
2025
  debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
1962
2026
  if debug_mode:
1963
2027
  print(f"🔍 FinSight headers: {list(headers.keys())}, X-API-Key={headers.get('X-API-Key')}")
@@ -2179,40 +2243,552 @@ class EnhancedNocturnalAgent:
2179
2243
 
2180
2244
  output = '\n'.join(output_lines).strip()
2181
2245
  return output if output else "Command executed (no output)"
2182
-
2246
+
2183
2247
  except Exception as e:
2184
2248
  return f"ERROR: {e}"
2185
2249
 
2186
- def _is_safe_shell_command(self, cmd: str) -> bool:
2250
+ # ========================================================================
2251
+ # DIRECT FILE OPERATIONS (Claude Code / Cursor Parity)
2252
+ # ========================================================================
2253
+
2254
+ def read_file(self, file_path: str, offset: int = 0, limit: int = 2000) -> str:
2255
+ """
2256
+ Read file with line numbers (like Claude Code's Read tool)
2257
+
2258
+ Args:
2259
+ file_path: Path to file
2260
+ offset: Starting line number (0-indexed)
2261
+ limit: Maximum number of lines to read
2262
+
2263
+ Returns:
2264
+ File contents with line numbers in format: " 123→content"
2265
+ """
2266
+ try:
2267
+ # Expand ~ to home directory
2268
+ file_path = os.path.expanduser(file_path)
2269
+
2270
+ # Make absolute if relative
2271
+ if not os.path.isabs(file_path):
2272
+ file_path = os.path.abspath(file_path)
2273
+
2274
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
2275
+ lines = f.readlines()
2276
+
2277
+ # Apply offset and limit
2278
+ if offset or limit:
2279
+ lines = lines[offset:offset+limit if limit else None]
2280
+
2281
+ # Format with line numbers (1-indexed, like vim/editors)
2282
+ numbered_lines = [
2283
+ f"{offset+i+1:6d}→{line.rstrip()}\n"
2284
+ for i, line in enumerate(lines)
2285
+ ]
2286
+
2287
+ result = ''.join(numbered_lines)
2288
+
2289
+ # Update file context
2290
+ self.file_context['last_file'] = file_path
2291
+ if file_path not in self.file_context['recent_files']:
2292
+ self.file_context['recent_files'].append(file_path)
2293
+ self.file_context['recent_files'] = self.file_context['recent_files'][-5:]
2294
+
2295
+ return result if result else "(empty file)"
2296
+
2297
+ except FileNotFoundError:
2298
+ return f"ERROR: File not found: {file_path}"
2299
+ except PermissionError:
2300
+ return f"ERROR: Permission denied: {file_path}"
2301
+ except IsADirectoryError:
2302
+ return f"ERROR: {file_path} is a directory, not a file"
2303
+ except Exception as e:
2304
+ return f"ERROR: {type(e).__name__}: {e}"
2305
+
2306
+ def write_file(self, file_path: str, content: str) -> Dict[str, Any]:
2307
+ """
2308
+ Write file directly (like Claude Code's Write tool)
2309
+ Creates new file or overwrites existing one.
2310
+
2311
+ Args:
2312
+ file_path: Path to file
2313
+ content: Full file content
2314
+
2315
+ Returns:
2316
+ {"success": bool, "message": str, "bytes_written": int}
2317
+ """
2318
+ try:
2319
+ # Expand ~ to home directory
2320
+ file_path = os.path.expanduser(file_path)
2321
+
2322
+ # Make absolute if relative
2323
+ if not os.path.isabs(file_path):
2324
+ file_path = os.path.abspath(file_path)
2325
+
2326
+ # Create parent directories if needed
2327
+ parent_dir = os.path.dirname(file_path)
2328
+ if parent_dir and not os.path.exists(parent_dir):
2329
+ os.makedirs(parent_dir, exist_ok=True)
2330
+
2331
+ # Write file
2332
+ with open(file_path, 'w', encoding='utf-8') as f:
2333
+ bytes_written = f.write(content)
2334
+
2335
+ # Update file context
2336
+ self.file_context['last_file'] = file_path
2337
+ if file_path not in self.file_context['recent_files']:
2338
+ self.file_context['recent_files'].append(file_path)
2339
+ self.file_context['recent_files'] = self.file_context['recent_files'][-5:]
2340
+
2341
+ return {
2342
+ "success": True,
2343
+ "message": f"Wrote {bytes_written} bytes to {file_path}",
2344
+ "bytes_written": bytes_written
2345
+ }
2346
+
2347
+ except PermissionError:
2348
+ return {
2349
+ "success": False,
2350
+ "message": f"ERROR: Permission denied: {file_path}",
2351
+ "bytes_written": 0
2352
+ }
2353
+ except Exception as e:
2354
+ return {
2355
+ "success": False,
2356
+ "message": f"ERROR: {type(e).__name__}: {e}",
2357
+ "bytes_written": 0
2358
+ }
2359
+
2360
+ def edit_file(self, file_path: str, old_string: str, new_string: str,
2361
+ replace_all: bool = False) -> Dict[str, Any]:
2362
+ """
2363
+ Surgical file edit (like Claude Code's Edit tool)
2364
+
2365
+ Args:
2366
+ file_path: Path to file
2367
+ old_string: Exact string to replace (must be unique unless replace_all=True)
2368
+ new_string: Replacement string
2369
+ replace_all: If True, replace all occurrences. If False, old_string must be unique.
2370
+
2371
+ Returns:
2372
+ {"success": bool, "message": str, "replacements": int}
2373
+ """
2374
+ try:
2375
+ # Expand ~ to home directory
2376
+ file_path = os.path.expanduser(file_path)
2377
+
2378
+ # Make absolute if relative
2379
+ if not os.path.isabs(file_path):
2380
+ file_path = os.path.abspath(file_path)
2381
+
2382
+ # Read file
2383
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
2384
+ content = f.read()
2385
+
2386
+ # Check if old_string exists
2387
+ if old_string not in content:
2388
+ return {
2389
+ "success": False,
2390
+ "message": f"ERROR: old_string not found in {file_path}",
2391
+ "replacements": 0
2392
+ }
2393
+
2394
+ # Check uniqueness if not replace_all
2395
+ occurrences = content.count(old_string)
2396
+ if not replace_all and occurrences > 1:
2397
+ return {
2398
+ "success": False,
2399
+ "message": f"ERROR: old_string appears {occurrences} times in {file_path}. Use replace_all=True or provide more context to make it unique.",
2400
+ "replacements": 0
2401
+ }
2402
+
2403
+ # Perform replacement
2404
+ if replace_all:
2405
+ new_content = content.replace(old_string, new_string)
2406
+ else:
2407
+ new_content = content.replace(old_string, new_string, 1)
2408
+
2409
+ # Write back
2410
+ with open(file_path, 'w', encoding='utf-8') as f:
2411
+ f.write(new_content)
2412
+
2413
+ # Update file context
2414
+ self.file_context['last_file'] = file_path
2415
+
2416
+ return {
2417
+ "success": True,
2418
+ "message": f"Replaced {occurrences if replace_all else 1} occurrence(s) in {file_path}",
2419
+ "replacements": occurrences if replace_all else 1
2420
+ }
2421
+
2422
+ except FileNotFoundError:
2423
+ return {
2424
+ "success": False,
2425
+ "message": f"ERROR: File not found: {file_path}",
2426
+ "replacements": 0
2427
+ }
2428
+ except PermissionError:
2429
+ return {
2430
+ "success": False,
2431
+ "message": f"ERROR: Permission denied: {file_path}",
2432
+ "replacements": 0
2433
+ }
2434
+ except Exception as e:
2435
+ return {
2436
+ "success": False,
2437
+ "message": f"ERROR: {type(e).__name__}: {e}",
2438
+ "replacements": 0
2439
+ }
2440
+
2441
+ def glob_search(self, pattern: str, path: str = ".") -> Dict[str, Any]:
2442
+ """
2443
+ Fast file pattern matching (like Claude Code's Glob tool)
2444
+
2445
+ Args:
2446
+ pattern: Glob pattern (e.g., "*.py", "**/*.md", "src/**/*.ts")
2447
+ path: Starting directory (default: current directory)
2448
+
2449
+ Returns:
2450
+ {"files": List[str], "count": int, "pattern": str}
2451
+ """
2452
+ try:
2453
+ import glob as glob_module
2454
+
2455
+ # Expand ~ to home directory
2456
+ path = os.path.expanduser(path)
2457
+
2458
+ # Make absolute if relative
2459
+ if not os.path.isabs(path):
2460
+ path = os.path.abspath(path)
2461
+
2462
+ # Combine path and pattern
2463
+ full_pattern = os.path.join(path, pattern)
2464
+
2465
+ # Find matches (recursive if ** in pattern)
2466
+ matches = glob_module.glob(full_pattern, recursive=True)
2467
+
2468
+ # Filter to files only (not directories)
2469
+ files = [f for f in matches if os.path.isfile(f)]
2470
+
2471
+ # Sort by modification time (newest first)
2472
+ files.sort(key=lambda f: os.path.getmtime(f), reverse=True)
2473
+
2474
+ return {
2475
+ "files": files,
2476
+ "count": len(files),
2477
+ "pattern": full_pattern
2478
+ }
2479
+
2480
+ except Exception as e:
2481
+ return {
2482
+ "files": [],
2483
+ "count": 0,
2484
+ "pattern": pattern,
2485
+ "error": f"{type(e).__name__}: {e}"
2486
+ }
2487
+
2488
+ def grep_search(self, pattern: str, path: str = ".",
2489
+ file_pattern: str = "*",
2490
+ output_mode: str = "files_with_matches",
2491
+ context_lines: int = 0,
2492
+ ignore_case: bool = False,
2493
+ max_results: int = 100) -> Dict[str, Any]:
2494
+ """
2495
+ Fast content search (like Claude Code's Grep tool / ripgrep)
2496
+
2497
+ Args:
2498
+ pattern: Regex pattern to search for
2499
+ path: Directory to search in
2500
+ file_pattern: Glob pattern for files to search (e.g., "*.py")
2501
+ output_mode: "files_with_matches", "content", or "count"
2502
+ context_lines: Lines of context around matches
2503
+ ignore_case: Case-insensitive search
2504
+ max_results: Maximum number of results to return
2505
+
2506
+ Returns:
2507
+ Depends on output_mode:
2508
+ - files_with_matches: {"files": List[str], "count": int}
2509
+ - content: {"matches": {file: [(line_num, line_content), ...]}}
2510
+ - count: {"counts": {file: match_count}}
2511
+ """
2512
+ try:
2513
+ import re
2514
+
2515
+ # Expand ~ to home directory
2516
+ path = os.path.expanduser(path)
2517
+
2518
+ # Make absolute if relative
2519
+ if not os.path.isabs(path):
2520
+ path = os.path.abspath(path)
2521
+
2522
+ # Compile regex
2523
+ flags = re.IGNORECASE if ignore_case else 0
2524
+ regex = re.compile(pattern, flags)
2525
+
2526
+ # Find files to search
2527
+ glob_result = self.glob_search(file_pattern, path)
2528
+ files_to_search = glob_result["files"]
2529
+
2530
+ # Search each file
2531
+ if output_mode == "files_with_matches":
2532
+ matching_files = []
2533
+ for file_path in files_to_search[:max_results]:
2534
+ try:
2535
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
2536
+ content = f.read()
2537
+ if regex.search(content):
2538
+ matching_files.append(file_path)
2539
+ except:
2540
+ continue
2541
+
2542
+ return {
2543
+ "files": matching_files,
2544
+ "count": len(matching_files),
2545
+ "pattern": pattern
2546
+ }
2547
+
2548
+ elif output_mode == "content":
2549
+ matches = {}
2550
+ for file_path in files_to_search:
2551
+ try:
2552
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
2553
+ lines = f.readlines()
2554
+
2555
+ file_matches = []
2556
+ for line_num, line in enumerate(lines, 1):
2557
+ if regex.search(line):
2558
+ file_matches.append((line_num, line.rstrip()))
2559
+
2560
+ if len(file_matches) >= max_results:
2561
+ break
2562
+
2563
+ if file_matches:
2564
+ matches[file_path] = file_matches
2565
+ except:
2566
+ continue
2567
+
2568
+ return {
2569
+ "matches": matches,
2570
+ "file_count": len(matches),
2571
+ "pattern": pattern
2572
+ }
2573
+
2574
+ elif output_mode == "count":
2575
+ counts = {}
2576
+ for file_path in files_to_search:
2577
+ try:
2578
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
2579
+ content = f.read()
2580
+
2581
+ match_count = len(regex.findall(content))
2582
+ if match_count > 0:
2583
+ counts[file_path] = match_count
2584
+ except:
2585
+ continue
2586
+
2587
+ return {
2588
+ "counts": counts,
2589
+ "total_matches": sum(counts.values()),
2590
+ "pattern": pattern
2591
+ }
2592
+
2593
+ else:
2594
+ return {
2595
+ "error": f"Invalid output_mode: {output_mode}. Use 'files_with_matches', 'content', or 'count'."
2596
+ }
2597
+
2598
+ except re.error as e:
2599
+ return {
2600
+ "error": f"Invalid regex pattern: {e}"
2601
+ }
2602
+ except Exception as e:
2603
+ return {
2604
+ "error": f"{type(e).__name__}: {e}"
2605
+ }
2606
+
2607
+ async def batch_edit_files(self, edits: List[Dict[str, str]]) -> Dict[str, Any]:
2187
2608
  """
2188
- Minimal safety check - only block truly catastrophic commands.
2189
- Philosophy: This is the user's machine. They can do anything in terminal anyway.
2190
- We only block commands that could cause immediate, irreversible system damage.
2609
+ Apply multiple file edits atomically (all-or-nothing)
2610
+
2611
+ Args:
2612
+ edits: List of edit operations:
2613
+ [
2614
+ {"file": "path.py", "old": "...", "new": "..."},
2615
+ {"file": "other.py", "old": "...", "new": "...", "replace_all": True},
2616
+ ...
2617
+ ]
2618
+
2619
+ Returns:
2620
+ {
2621
+ "success": bool,
2622
+ "results": {file: {"success": bool, "message": str, "replacements": int}},
2623
+ "total_edits": int,
2624
+ "failed_edits": int
2625
+ }
2626
+ """
2627
+ try:
2628
+ results = {}
2629
+
2630
+ # Phase 1: Validate all edits
2631
+ for edit in edits:
2632
+ file_path = edit["file"]
2633
+ old_string = edit["old"]
2634
+ replace_all = edit.get("replace_all", False)
2635
+
2636
+ # Expand path
2637
+ file_path = os.path.expanduser(file_path)
2638
+ if not os.path.isabs(file_path):
2639
+ file_path = os.path.abspath(file_path)
2640
+
2641
+ # Check file exists
2642
+ if not os.path.exists(file_path):
2643
+ return {
2644
+ "success": False,
2645
+ "results": {},
2646
+ "total_edits": 0,
2647
+ "failed_edits": len(edits),
2648
+ "error": f"Validation failed: {file_path} not found. No edits applied."
2649
+ }
2650
+
2651
+ # Check old_string exists
2652
+ try:
2653
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
2654
+ content = f.read()
2655
+
2656
+ if old_string not in content:
2657
+ return {
2658
+ "success": False,
2659
+ "results": {},
2660
+ "total_edits": 0,
2661
+ "failed_edits": len(edits),
2662
+ "error": f"Validation failed: Pattern not found in {file_path}. No edits applied."
2663
+ }
2664
+
2665
+ # Check uniqueness if not replace_all
2666
+ if not replace_all and content.count(old_string) > 1:
2667
+ return {
2668
+ "success": False,
2669
+ "results": {},
2670
+ "total_edits": 0,
2671
+ "failed_edits": len(edits),
2672
+ "error": f"Validation failed: Pattern appears {content.count(old_string)} times in {file_path}. Use replace_all or provide more context. No edits applied."
2673
+ }
2674
+ except Exception as e:
2675
+ return {
2676
+ "success": False,
2677
+ "results": {},
2678
+ "total_edits": 0,
2679
+ "failed_edits": len(edits),
2680
+ "error": f"Validation failed reading {file_path}: {e}. No edits applied."
2681
+ }
2682
+
2683
+ # Phase 2: Apply all edits (validation passed)
2684
+ for edit in edits:
2685
+ file_path = edit["file"]
2686
+ old_string = edit["old"]
2687
+ new_string = edit["new"]
2688
+ replace_all = edit.get("replace_all", False)
2689
+
2690
+ result = self.edit_file(file_path, old_string, new_string, replace_all)
2691
+ results[file_path] = result
2692
+
2693
+ # Count successes/failures
2694
+ successful_edits = sum(1 for r in results.values() if r["success"])
2695
+ failed_edits = len(edits) - successful_edits
2696
+
2697
+ return {
2698
+ "success": failed_edits == 0,
2699
+ "results": results,
2700
+ "total_edits": len(edits),
2701
+ "successful_edits": successful_edits,
2702
+ "failed_edits": failed_edits
2703
+ }
2704
+
2705
+ except Exception as e:
2706
+ return {
2707
+ "success": False,
2708
+ "results": {},
2709
+ "total_edits": 0,
2710
+ "failed_edits": len(edits),
2711
+ "error": f"Batch edit failed: {type(e).__name__}: {e}"
2712
+ }
2713
+
2714
+ # ========================================================================
2715
+ # END DIRECT FILE OPERATIONS
2716
+ # ========================================================================
2717
+
2718
+ def _classify_command_safety(self, cmd: str) -> str:
2719
+ """
2720
+ Classify command by safety level for smart execution.
2721
+ Returns: 'SAFE', 'WRITE', 'DANGEROUS', or 'BLOCKED'
2191
2722
  """
2192
2723
  cmd = cmd.strip()
2193
2724
  if not cmd:
2194
- return False
2195
-
2196
- # Block ONLY truly catastrophic commands
2725
+ return 'BLOCKED'
2726
+
2727
+ cmd_lower = cmd.lower()
2728
+ cmd_parts = cmd.split()
2729
+ cmd_base = cmd_parts[0] if cmd_parts else ''
2730
+ cmd_with_sub = ' '.join(cmd_parts[:2]) if len(cmd_parts) >= 2 else ''
2731
+
2732
+ # BLOCKED: Catastrophic commands
2197
2733
  nuclear_patterns = [
2198
- 'rm -rf /', # Wipe root filesystem
2199
- 'rm -rf ~/*', # Wipe home directory
2200
- 'dd if=/dev/zero of=/dev/sda', # Wipe disk
2201
- 'dd if=/dev/zero of=/dev/hda',
2202
- 'mkfs', # Format filesystem
2203
- 'fdisk', # Partition disk
2734
+ 'rm -rf /',
2735
+ 'rm -rf ~',
2736
+ 'rm -rf /*',
2737
+ 'dd if=/dev/zero',
2738
+ 'mkfs',
2739
+ 'fdisk',
2204
2740
  ':(){ :|:& };:', # Fork bomb
2205
- 'chmod -R 777 /', # Make everything executable
2741
+ 'chmod -r 777 /',
2742
+ '> /dev/sda',
2206
2743
  ]
2207
-
2208
- cmd_lower = cmd.lower()
2209
2744
  for pattern in nuclear_patterns:
2210
- if pattern.lower() in cmd_lower:
2211
- return False
2212
-
2213
- # Allow everything else - pip, npm, git, pipes, redirection, etc.
2214
- # User asked for it, user gets it. Just like Cursor.
2215
- return True
2745
+ if pattern in cmd_lower:
2746
+ return 'BLOCKED'
2747
+
2748
+ # SAFE: Read-only commands
2749
+ safe_commands = {
2750
+ 'pwd', 'ls', 'cd', 'cat', 'head', 'tail', 'grep', 'find', 'which', 'type',
2751
+ 'wc', 'diff', 'echo', 'ps', 'top', 'df', 'du', 'file', 'stat', 'tree',
2752
+ 'whoami', 'hostname', 'date', 'cal', 'uptime', 'printenv', 'env',
2753
+ }
2754
+ safe_git = {'git status', 'git log', 'git diff', 'git branch', 'git show', 'git remote'}
2755
+
2756
+ if cmd_base in safe_commands or cmd_with_sub in safe_git:
2757
+ return 'SAFE'
2758
+
2759
+ # WRITE: File creation/modification (allowed but tracked)
2760
+ write_commands = {'mkdir', 'touch', 'cp', 'mv', 'tee'}
2761
+ if cmd_base in write_commands:
2762
+ return 'WRITE'
2763
+
2764
+ # WRITE: Redirection operations (echo > file, cat > file)
2765
+ if '>' in cmd or '>>' in cmd:
2766
+ # Allow redirection to regular files, block to devices
2767
+ if '/dev/' not in cmd_lower:
2768
+ return 'WRITE'
2769
+ else:
2770
+ return 'BLOCKED'
2771
+
2772
+ # DANGEROUS: Deletion and permission changes
2773
+ dangerous_commands = {'rm', 'rmdir', 'chmod', 'chown', 'chgrp'}
2774
+ if cmd_base in dangerous_commands:
2775
+ return 'DANGEROUS'
2776
+
2777
+ # WRITE: Git write operations
2778
+ write_git = {'git add', 'git commit', 'git push', 'git pull', 'git checkout', 'git merge'}
2779
+ if cmd_with_sub in write_git:
2780
+ return 'WRITE'
2781
+
2782
+ # Default: Treat unknown commands as requiring user awareness
2783
+ return 'WRITE'
2784
+
2785
+ def _is_safe_shell_command(self, cmd: str) -> bool:
2786
+ """
2787
+ Compatibility wrapper for old safety check.
2788
+ Now uses tiered classification system.
2789
+ """
2790
+ classification = self._classify_command_safety(cmd)
2791
+ return classification in ['SAFE', 'WRITE'] # Allow SAFE and WRITE, block DANGEROUS and BLOCKED
2216
2792
 
2217
2793
  def _check_token_budget(self, estimated_tokens: int) -> bool:
2218
2794
  """Check if we have enough token budget"""
@@ -2450,12 +3026,42 @@ class EnhancedNocturnalAgent:
2450
3026
  async def _analyze_request_type(self, question: str) -> Dict[str, Any]:
2451
3027
  """Analyze what type of request this is and what APIs to use"""
2452
3028
 
2453
- # Financial indicators
3029
+ # Financial indicators - COMPREHENSIVE list to ensure FinSight is used
2454
3030
  financial_keywords = [
2455
- 'financial', 'revenue', 'profit', 'earnings', 'stock', 'market',
2456
- 'ticker', 'company', 'balance sheet', 'income statement', 'cash flow',
2457
- 'valuation', 'pe ratio', 'debt', 'equity', 'dividend', 'growth',
2458
- 'ceo', 'earnings call', 'quarterly', 'annual report'
3031
+ # Core metrics
3032
+ 'financial', 'revenue', 'sales', 'income', 'profit', 'earnings', 'loss',
3033
+ 'net income', 'operating income', 'gross profit', 'ebitda', 'ebit',
3034
+
3035
+ # Margins & Ratios
3036
+ 'margin', 'gross margin', 'profit margin', 'operating margin', 'net margin', 'ebitda margin',
3037
+ 'ratio', 'current ratio', 'quick ratio', 'debt ratio', 'pe ratio', 'p/e',
3038
+ 'roe', 'roa', 'roic', 'roce', 'eps',
3039
+
3040
+ # Balance Sheet
3041
+ 'assets', 'liabilities', 'equity', 'debt', 'cash', 'capital',
3042
+ 'balance sheet', 'total assets', 'current assets', 'fixed assets',
3043
+ 'shareholders equity', 'stockholders equity', 'retained earnings',
3044
+
3045
+ # Cash Flow
3046
+ 'cash flow', 'fcf', 'free cash flow', 'operating cash flow',
3047
+ 'cfo', 'cfi', 'cff', 'capex', 'capital expenditure',
3048
+
3049
+ # Market Metrics
3050
+ 'stock', 'market cap', 'market capitalization', 'enterprise value',
3051
+ 'valuation', 'price', 'share price', 'stock price', 'quote',
3052
+ 'volume', 'trading volume', 'shares outstanding',
3053
+
3054
+ # Financial Statements
3055
+ 'income statement', '10-k', '10-q', '8-k', 'filing', 'sec filing',
3056
+ 'quarterly', 'annual report', 'earnings report', 'financial statement',
3057
+
3058
+ # Company Info
3059
+ 'ticker', 'company', 'corporation', 'ceo', 'earnings call',
3060
+ 'dividend', 'dividend yield', 'payout ratio',
3061
+
3062
+ # Growth & Performance
3063
+ 'growth', 'yoy', 'year over year', 'qoq', 'quarter over quarter',
3064
+ 'cagr', 'trend', 'performance', 'returns'
2459
3065
  ]
2460
3066
 
2461
3067
  # Research indicators (quantitative)
@@ -2664,40 +3270,76 @@ class EnhancedNocturnalAgent:
2664
3270
  # Quick check if query might need shell
2665
3271
  question_lower = request.question.lower()
2666
3272
  might_need_shell = any(word in question_lower for word in [
2667
- 'directory', 'folder', 'where', 'find', 'list', 'files', 'look', 'search', 'check', 'into',
2668
- 'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb'
3273
+ 'directory', 'folder', 'where', 'find', 'list', 'files', 'file', 'look', 'search', 'check', 'into',
3274
+ 'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb',
3275
+ 'create', 'make', 'mkdir', 'touch', 'new', 'write', 'copy', 'move', 'delete', 'remove',
3276
+ 'git', 'grep', 'navigate', 'go to', 'change to'
2669
3277
  ])
2670
3278
 
2671
3279
  if might_need_shell and self.shell_session:
3280
+ # Get current directory and context for intelligent planning
3281
+ try:
3282
+ current_dir = self.execute_command("pwd").strip()
3283
+ self.file_context['current_cwd'] = current_dir
3284
+ except:
3285
+ current_dir = "~"
3286
+
3287
+ last_file = self.file_context.get('last_file') or 'None'
3288
+ last_dir = self.file_context.get('last_directory') or 'None'
3289
+
2672
3290
  # Ask LLM planner: What shell command should we run?
2673
- planner_prompt = f"""You are a shell command planner. Determine what shell command to run.
3291
+ planner_prompt = f"""You are a shell command planner. Determine what shell command to run, if any.
2674
3292
 
2675
3293
  User query: "{request.question}"
2676
3294
  Previous conversation: {json.dumps(self.conversation_history[-2:]) if self.conversation_history else "None"}
3295
+ Current directory: {current_dir}
3296
+ Last file mentioned: {last_file}
3297
+ Last directory mentioned: {last_dir}
2677
3298
 
2678
3299
  Respond ONLY with JSON:
2679
3300
  {{
2680
- "action": "pwd|ls|find|read_file|none",
2681
- "search_target": "cm522" (if find),
2682
- "search_path": "~/Downloads" (if find),
2683
- "target_path": "/full/path" (if ls on previous result),
2684
- "file_path": "/full/path/to/file.R" (if read_file)
3301
+ "action": "execute|none",
3302
+ "command": "pwd" (the actual shell command to run, if action=execute),
3303
+ "reason": "Show current directory" (why this command is needed),
3304
+ "updates_context": true (set to true if command changes files/directories)
2685
3305
  }}
2686
3306
 
3307
+ IMPORTANT RULES:
3308
+ 1. Return "none" for conversational queries ("hello", "test", "thanks", "how are you")
3309
+ 2. Return "none" when query is ambiguous without more context
3310
+ 3. Return "none" for questions about data that don't need shell (e.g., "Tesla revenue", "Apple stock price")
3311
+ 4. Use ACTUAL shell commands (pwd, ls, cd, mkdir, cat, grep, find, touch, etc.)
3312
+ 5. Resolve pronouns using context: "it"={last_file}, "there"/{last_dir}
3313
+ 6. For reading files, prefer: head -100 filename (shows first 100 lines)
3314
+ 7. For finding things, use: find ~ -maxdepth 4 -name '*pattern*' 2>/dev/null
3315
+ 8. For creating files: touch filename OR echo "content" > filename
3316
+ 9. For creating directories: mkdir dirname
3317
+ 10. ALWAYS include 2>/dev/null to suppress errors from find
3318
+ 11. 🚨 MULTI-STEP QUERIES: For queries like "read X and do Y", ONLY generate the FIRST step (reading X). The LLM will handle subsequent steps after seeing the file contents.
3319
+ 12. 🚨 NEVER use python -m py_compile or other code execution for finding bugs - just read the file with cat/head
3320
+
2687
3321
  Examples:
2688
- "where am i?" → {{"action": "pwd"}}
2689
- "what files here?" → {{"action": "ls"}}
2690
- "find cm522" → {{"action": "find", "search_target": "cm522"}}
2691
- "look into it" + Previous: "Found /path" {{"action": "ls", "target_path": "/path"}}
2692
- "show me calculate_betas.R" → {{"action": "read_file", "file_path": "calculate_betas.R"}}
2693
- "open regression.R" → {{"action": "read_file", "file_path": "regression.R"}}
2694
- "read that file" + Previous: "regression.R" → {{"action": "read_file", "file_path": "regression.R"}}
2695
- "display analysis.py" → {{"action": "read_file", "file_path": "analysis.py"}}
2696
- "cat data.csv" → {{"action": "read_file", "file_path": "data.csv"}}
2697
- "what columns does it have?" + Previous: file was shown → {{"action": "none"}} (LLM will parse from conversation)
2698
- "Tesla revenue" → {{"action": "none"}}
2699
-
2700
- KEY: If query mentions a specific FILENAME (*.R, *.py, *.csv), use read_file, NOT find!
3322
+ "where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
3323
+ "list files" → {{"action": "execute", "command": "ls -lah", "reason": "List all files with details", "updates_context": false}}
3324
+ "find cm522" → {{"action": "execute", "command": "find ~ -maxdepth 4 -name '*cm522*' -type d 2>/dev/null | head -20", "reason": "Search for cm522 directory", "updates_context": false}}
3325
+ "go to Downloads" {{"action": "execute", "command": "cd ~/Downloads && pwd", "reason": "Navigate to Downloads directory", "updates_context": true}}
3326
+ "show me calc.R" → {{"action": "execute", "command": "head -100 calc.R", "reason": "Display file contents", "updates_context": true}}
3327
+ "create test directory" → {{"action": "execute", "command": "mkdir test && echo 'Created test/'", "reason": "Create new directory", "updates_context": true}}
3328
+ "create empty config.json" → {{"action": "execute", "command": "touch config.json && echo 'Created config.json'", "reason": "Create empty file", "updates_context": true}}
3329
+ "write hello.txt with content Hello World" → {{"action": "execute", "command": "echo 'Hello World' > hello.txt", "reason": "Create file with content", "updates_context": true}}
3330
+ "create results.txt with line 1 and line 2" → {{"action": "execute", "command": "echo 'line 1' > results.txt && echo 'line 2' >> results.txt", "reason": "Create file with multiple lines", "updates_context": true}}
3331
+ "fix bug in script.py change OLD to NEW" → {{"action": "execute", "command": "sed -i 's/OLD/NEW/g' script.py && echo 'Fixed script.py'", "reason": "Edit file to fix bug", "updates_context": true}}
3332
+ "search for TODO in py files" → {{"action": "execute", "command": "grep -n 'TODO' *.py 2>/dev/null", "reason": "Find TODO comments", "updates_context": false}}
3333
+ "find all bugs in code" → {{"action": "execute", "command": "grep -rn 'BUG:' . 2>/dev/null", "reason": "Search for bug markers in code", "updates_context": false}}
3334
+ "read analyze.py and find bugs" {{"action": "execute", "command": "head -200 analyze.py", "reason": "Read file to analyze bugs", "updates_context": false}}
3335
+ "show me calc.py completely" → {{"action": "execute", "command": "cat calc.py", "reason": "Display entire file", "updates_context": false}}
3336
+ "git status" → {{"action": "execute", "command": "git status", "reason": "Check repository status", "updates_context": false}}
3337
+ "what's in that file?" + last_file=data.csv → {{"action": "execute", "command": "head -100 data.csv", "reason": "Show file contents", "updates_context": false}}
3338
+ "hello" → {{"action": "none", "reason": "Conversational greeting, no command needed"}}
3339
+ "test" → {{"action": "none", "reason": "Ambiguous query, needs clarification"}}
3340
+ "thanks" → {{"action": "none", "reason": "Conversational acknowledgment"}}
3341
+ "Tesla revenue" → {{"action": "none", "reason": "Finance query, will use FinSight API not shell"}}
3342
+ "what does the error mean?" → {{"action": "none", "reason": "Explanation request, no command needed"}}
2701
3343
 
2702
3344
  JSON:"""
2703
3345
 
@@ -2715,17 +3357,285 @@ JSON:"""
2715
3357
 
2716
3358
  plan = json.loads(plan_text)
2717
3359
  shell_action = plan.get("action", "none")
3360
+ command = plan.get("command", "")
3361
+ reason = plan.get("reason", "")
3362
+ updates_context = plan.get("updates_context", False)
2718
3363
 
2719
3364
  if debug_mode:
2720
3365
  print(f"🔍 SHELL PLAN: {plan}")
2721
3366
 
2722
- # Execute shell command based on plan
2723
- if shell_action == "pwd":
2724
- pwd_output = self.execute_command("pwd")
2725
- api_results["shell_info"] = {"current_directory": pwd_output.strip()}
2726
- tools_used.append("shell_execution")
3367
+ # GENERIC COMMAND EXECUTION - No more hardcoded actions!
3368
+ if shell_action == "execute" and command:
3369
+ # Check command safety
3370
+ safety_level = self._classify_command_safety(command)
3371
+
3372
+ if debug_mode:
3373
+ print(f"🔍 Command: {command}")
3374
+ print(f"🔍 Safety: {safety_level}")
3375
+
3376
+ if safety_level == 'BLOCKED':
3377
+ api_results["shell_info"] = {
3378
+ "error": f"Command blocked for safety: {command}",
3379
+ "reason": "This command could cause system damage"
3380
+ }
3381
+ else:
3382
+ # ========================================
3383
+ # COMMAND INTERCEPTOR: Translate shell commands to file operations
3384
+ # (Claude Code / Cursor parity)
3385
+ # ========================================
3386
+ intercepted = False
3387
+ output = ""
3388
+
3389
+ # Check for file reading commands (cat, head, tail)
3390
+ if command.startswith(('cat ', 'head ', 'tail ')):
3391
+ import shlex
3392
+ try:
3393
+ parts = shlex.split(command)
3394
+ cmd = parts[0]
3395
+
3396
+ # Extract filename (last non-flag argument)
3397
+ filename = None
3398
+ for part in reversed(parts[1:]):
3399
+ if not part.startswith('-'):
3400
+ filename = part
3401
+ break
3402
+
3403
+ if filename:
3404
+ # Use read_file instead of cat/head/tail
3405
+ if cmd == 'head':
3406
+ # head -n 100 file OR head file
3407
+ limit = 100 # default
3408
+ if '-n' in parts or '-' in parts[0]:
3409
+ try:
3410
+ idx = parts.index('-n') if '-n' in parts else 0
3411
+ limit = int(parts[idx + 1])
3412
+ except:
3413
+ pass
3414
+ output = self.read_file(filename, offset=0, limit=limit)
3415
+ elif cmd == 'tail':
3416
+ # For tail, read last N lines (harder, so just read all and show it's tail)
3417
+ output = self.read_file(filename)
3418
+ if "ERROR" not in output:
3419
+ lines = output.split('\n')
3420
+ output = '\n'.join(lines[-100:]) # last 100 lines
3421
+ else: # cat
3422
+ output = self.read_file(filename)
3423
+
3424
+ intercepted = True
3425
+ tools_used.append("read_file")
3426
+ if debug_mode:
3427
+ print(f"🔄 Intercepted: {command} → read_file({filename})")
3428
+ except:
3429
+ pass # Fall back to shell execution
3430
+
3431
+ # Check for file search commands (find)
3432
+ if not intercepted and 'find' in command and '-name' in command:
3433
+ try:
3434
+ import re
3435
+ # Extract pattern: find ... -name '*pattern*'
3436
+ name_match = re.search(r"-name\s+['\"]?\*?([^'\"*\s]+)\*?['\"]?", command)
3437
+ if name_match:
3438
+ pattern = f"**/*{name_match.group(1)}*"
3439
+ path_match = re.search(r"find\s+([^\s]+)", command)
3440
+ search_path = path_match.group(1) if path_match else "."
3441
+
3442
+ result = self.glob_search(pattern, search_path)
3443
+ output = '\n'.join(result['files'][:20]) # Show first 20 matches
3444
+ intercepted = True
3445
+ tools_used.append("glob_search")
3446
+ if debug_mode:
3447
+ print(f"🔄 Intercepted: {command} → glob_search({pattern}, {search_path})")
3448
+ except:
3449
+ pass
3450
+
3451
+ # Check for file writing commands (echo > file, grep > file, etc.) - CHECK THIS FIRST!
3452
+ # This must come BEFORE the plain grep interceptor
3453
+ if not intercepted and ('>' in command or '>>' in command):
3454
+ try:
3455
+ import re
3456
+
3457
+ # Handle grep ... > file (intercept and execute grep, then write output)
3458
+ if 'grep' in command and '>' in command:
3459
+ # Extract: grep -rn 'pattern' path > output.txt
3460
+ grep_match = re.search(r"grep\s+(.*)>\s*(\S+)", command)
3461
+ if grep_match:
3462
+ grep_part = grep_match.group(1).strip()
3463
+ output_file = grep_match.group(2)
3464
+
3465
+ # Extract pattern and options from grep command
3466
+ pattern_match = re.search(r"['\"]([^'\"]+)['\"]", grep_part)
3467
+ if pattern_match:
3468
+ pattern = pattern_match.group(1)
3469
+ search_path = "."
3470
+ file_pattern = "*.py" if "*.py" in command else "*"
3471
+
3472
+ if debug_mode:
3473
+ print(f"🔄 Intercepted: {command} → grep_search('{pattern}', '{search_path}', '{file_pattern}') + write_file({output_file})")
3474
+
3475
+ # Execute grep_search
3476
+ try:
3477
+ grep_result = self.grep_search(
3478
+ pattern=pattern,
3479
+ path=search_path,
3480
+ file_pattern=file_pattern,
3481
+ output_mode="content"
3482
+ )
3483
+
3484
+ # Format matches as text (like grep -rn output)
3485
+ output_lines = []
3486
+ for file_path, matches in grep_result.get('matches', {}).items():
3487
+ for line_num, line_content in matches:
3488
+ output_lines.append(f"{file_path}:{line_num}:{line_content}")
3489
+
3490
+ content_to_write = '\n'.join(output_lines) if output_lines else "(no matches found)"
3491
+
3492
+ # Write grep output to file
3493
+ write_result = self.write_file(output_file, content_to_write)
3494
+ if write_result['success']:
3495
+ output = f"Found {len(output_lines)} lines with '{pattern}' → Created {output_file} ({write_result['bytes_written']} bytes)"
3496
+ intercepted = True
3497
+ tools_used.extend(["grep_search", "write_file"])
3498
+ except Exception as e:
3499
+ if debug_mode:
3500
+ print(f"⚠️ Grep > file interception error: {e}")
3501
+ # Fall back to normal execution
3502
+ pass
3503
+
3504
+ # Extract: echo 'content' > filename OR cat << EOF > filename
3505
+ if not intercepted and 'echo' in command and '>' in command:
3506
+ # echo 'content' > file OR echo "content" > file
3507
+ match = re.search(r"echo\s+['\"](.+?)['\"].*?>\s*(\S+)", command)
3508
+ if match:
3509
+ content = match.group(1)
3510
+ filename = match.group(2)
3511
+ # Unescape common sequences
3512
+ content = content.replace('\\n', '\n').replace('\\t', '\t')
3513
+ result = self.write_file(filename, content + '\n')
3514
+ if result['success']:
3515
+ output = f"Created {filename} ({result['bytes_written']} bytes)"
3516
+ intercepted = True
3517
+ tools_used.append("write_file")
3518
+ if debug_mode:
3519
+ print(f"🔄 Intercepted: {command} → write_file({filename}, ...)")
3520
+ except:
3521
+ pass
3522
+
3523
+ # Check for sed editing commands
3524
+ if not intercepted and command.startswith('sed '):
3525
+ try:
3526
+ import re
3527
+ # sed 's/old/new/g' file OR sed -i 's/old/new/' file
3528
+ match = re.search(r"sed.*?['\"]s/([^/]+)/([^/]+)/", command)
3529
+ if match:
3530
+ old_text = match.group(1)
3531
+ new_text = match.group(2)
3532
+ # Extract filename (last argument)
3533
+ parts = command.split()
3534
+ filename = parts[-1]
3535
+
3536
+ # Determine if replace_all based on /g flag
3537
+ replace_all = '/g' in command
3538
+
3539
+ result = self.edit_file(filename, old_text, new_text, replace_all=replace_all)
3540
+ if result['success']:
3541
+ output = result['message']
3542
+ intercepted = True
3543
+ tools_used.append("edit_file")
3544
+ if debug_mode:
3545
+ print(f"🔄 Intercepted: {command} → edit_file({filename}, {old_text}, {new_text})")
3546
+ except:
3547
+ pass
3548
+
3549
+ # Check for heredoc file creation (cat << EOF > file)
3550
+ if not intercepted and '<<' in command and ('EOF' in command or 'HEREDOC' in command):
3551
+ try:
3552
+ import re
3553
+ # Extract: cat << EOF > filename OR cat > filename << EOF
3554
+ # Note: We can't actually get the heredoc content from a single command line
3555
+ # This would need to be handled differently (multi-line input)
3556
+ # For now, just detect and warn
3557
+ if debug_mode:
3558
+ print(f"⚠️ Heredoc detected but not intercepted: {command[:80]}")
3559
+ except:
3560
+ pass
3561
+
3562
+ # Check for content search commands (grep -r) WITHOUT redirection
3563
+ # This comes AFTER grep > file interceptor to avoid conflicts
3564
+ if not intercepted and command.startswith('grep ') and ('-r' in command or '-R' in command):
3565
+ try:
3566
+ import re
3567
+ # Extract pattern: grep -r 'pattern' path
3568
+ pattern_match = re.search(r"grep.*?['\"]([^'\"]+)['\"]", command)
3569
+ if pattern_match:
3570
+ pattern = pattern_match.group(1)
3571
+ # Extract path (last argument usually)
3572
+ parts = command.split()
3573
+ search_path = parts[-1] if len(parts) > 2 else "."
3574
+
3575
+ result = self.grep_search(pattern, search_path, "*.py", output_mode="files_with_matches")
3576
+ output = f"Files matching '{pattern}':\n" + '\n'.join(result['files'][:20])
3577
+ intercepted = True
3578
+ tools_used.append("grep_search")
3579
+ if debug_mode:
3580
+ print(f"🔄 Intercepted: {command} → grep_search({pattern}, {search_path})")
3581
+ except:
3582
+ pass
3583
+
3584
+ # If not intercepted, execute as shell command
3585
+ if not intercepted:
3586
+ output = self.execute_command(command)
3587
+
3588
+ if not output.startswith("ERROR"):
3589
+ # Success - store results
3590
+ api_results["shell_info"] = {
3591
+ "command": command,
3592
+ "output": output,
3593
+ "reason": reason,
3594
+ "safety_level": safety_level
3595
+ }
3596
+ tools_used.append("shell_execution")
3597
+
3598
+ # Update file context if needed
3599
+ if updates_context:
3600
+ import re
3601
+ # Extract file paths from command
3602
+ file_patterns = r'([a-zA-Z0-9_\-./]+\.(py|r|csv|txt|json|md|ipynb|rmd))'
3603
+ files_mentioned = re.findall(file_patterns, command, re.IGNORECASE)
3604
+ if files_mentioned:
3605
+ file_path = files_mentioned[0][0]
3606
+ self.file_context['last_file'] = file_path
3607
+ if file_path not in self.file_context['recent_files']:
3608
+ self.file_context['recent_files'].append(file_path)
3609
+ self.file_context['recent_files'] = self.file_context['recent_files'][-5:] # Keep last 5
3610
+
3611
+ # Extract directory paths
3612
+ dir_patterns = r'cd\s+([^\s&|;]+)|mkdir\s+([^\s&|;]+)'
3613
+ dirs_mentioned = re.findall(dir_patterns, command)
3614
+ if dirs_mentioned:
3615
+ for dir_tuple in dirs_mentioned:
3616
+ dir_path = dir_tuple[0] or dir_tuple[1]
3617
+ if dir_path:
3618
+ self.file_context['last_directory'] = dir_path
3619
+ if dir_path not in self.file_context['recent_dirs']:
3620
+ self.file_context['recent_dirs'].append(dir_path)
3621
+ self.file_context['recent_dirs'] = self.file_context['recent_dirs'][-5:] # Keep last 5
3622
+
3623
+ # If cd command, update current_cwd
3624
+ if command.startswith('cd '):
3625
+ try:
3626
+ new_cwd = self.execute_command("pwd").strip()
3627
+ self.file_context['current_cwd'] = new_cwd
3628
+ except:
3629
+ pass
3630
+ else:
3631
+ # Command failed
3632
+ api_results["shell_info"] = {
3633
+ "error": output,
3634
+ "command": command
3635
+ }
2727
3636
 
2728
- elif shell_action == "ls":
3637
+ # Backwards compatibility: support old hardcoded actions if LLM still returns them
3638
+ elif shell_action == "pwd":
2729
3639
  target = plan.get("target_path")
2730
3640
  if target:
2731
3641
  ls_output = self.execute_command(f"ls -lah {target}")
@@ -2757,6 +3667,32 @@ JSON:"""
2757
3667
  }
2758
3668
  tools_used.append("shell_execution")
2759
3669
 
3670
+ elif shell_action == "cd":
3671
+ # NEW: Change directory
3672
+ target = plan.get("target_path")
3673
+ if target:
3674
+ # Expand ~ to home directory
3675
+ if target.startswith("~"):
3676
+ home = os.path.expanduser("~")
3677
+ target = target.replace("~", home, 1)
3678
+
3679
+ # Execute cd command
3680
+ cd_cmd = f"cd {target} && pwd"
3681
+ cd_output = self.execute_command(cd_cmd)
3682
+
3683
+ if not cd_output.startswith("ERROR"):
3684
+ api_results["shell_info"] = {
3685
+ "directory_changed": True,
3686
+ "new_directory": cd_output.strip(),
3687
+ "target_path": target
3688
+ }
3689
+ tools_used.append("shell_execution")
3690
+ else:
3691
+ api_results["shell_info"] = {
3692
+ "directory_changed": False,
3693
+ "error": f"Failed to change to {target}: {cd_output}"
3694
+ }
3695
+
2760
3696
  elif shell_action == "read_file":
2761
3697
  # NEW: Read and inspect file (R, Python, CSV, etc.)
2762
3698
  import re # Import at function level
@@ -2836,6 +3772,14 @@ JSON:"""
2836
3772
  if debug_mode and is_vague:
2837
3773
  print(f"🔍 Query is VAGUE - skipping expensive APIs")
2838
3774
 
3775
+ # If query is vague, hint to backend LLM to ask clarifying questions
3776
+ if is_vague:
3777
+ api_results["query_analysis"] = {
3778
+ "is_vague": True,
3779
+ "suggestion": "Ask clarifying questions instead of guessing",
3780
+ "reason": "Query needs more specificity to provide accurate answer"
3781
+ }
3782
+
2839
3783
  # Skip Archive/FinSight if query is too vague, but still allow web search later
2840
3784
  if not is_vague:
2841
3785
  # Archive API for research
@@ -2914,32 +3858,78 @@ JSON:"""
2914
3858
  # - Shell said "none" (not a directory/file operation)
2915
3859
  # - We don't have enough data from Archive/FinSight
2916
3860
 
2917
- if self.web_search and shell_action == "none":
3861
+ # First check: Is this a conversational query that doesn't need web search?
3862
+ def is_conversational_query(query: str) -> bool:
3863
+ """Detect if query is conversational (greeting, thanks, testing, etc.)"""
3864
+ query_lower = query.lower().strip()
3865
+
3866
+ # Single word queries that are conversational
3867
+ conversational_words = {
3868
+ 'hello', 'hi', 'hey', 'thanks', 'thank', 'ok', 'okay', 'yes', 'no',
3869
+ 'test', 'testing', 'cool', 'nice', 'great', 'awesome', 'perfect',
3870
+ 'bye', 'goodbye', 'quit', 'exit', 'help'
3871
+ }
3872
+
3873
+ # Short conversational phrases
3874
+ conversational_phrases = [
3875
+ 'how are you', 'thank you', 'thanks!', 'ok', 'got it', 'i see',
3876
+ 'makes sense', 'sounds good', 'that works', 'no problem'
3877
+ ]
3878
+
3879
+ words = query_lower.split()
3880
+
3881
+ # Single word check
3882
+ if len(words) == 1 and words[0] in conversational_words:
3883
+ return True
3884
+
3885
+ # Short phrase check
3886
+ if len(words) <= 3 and any(phrase in query_lower for phrase in conversational_phrases):
3887
+ return True
3888
+
3889
+ # Question marks with no content words (just pronouns)
3890
+ if '?' in query_lower and len(words) <= 2:
3891
+ return True
3892
+
3893
+ return False
3894
+
3895
+ skip_web_search = is_conversational_query(request.question)
3896
+
3897
+ if self.web_search and shell_action == "none" and not skip_web_search:
2918
3898
  # Ask LLM: Should we web search for this?
2919
- web_decision_prompt = f"""Should we use web search for this query?
3899
+ web_decision_prompt = f"""You are a tool selection expert. Decide if web search is needed.
2920
3900
 
2921
3901
  User query: "{request.question}"
2922
3902
  Data already available: {list(api_results.keys())}
2923
- Shell action: {shell_action}
3903
+ Tools already used: {tools_used}
3904
+
3905
+ AVAILABLE TOOLS YOU SHOULD KNOW:
3906
+ 1. FinSight API: Company financial data (revenue, income, margins, ratios, cash flow, balance sheet, SEC filings)
3907
+ - Covers: All US public companies (~8,000)
3908
+ - Data: SEC EDGAR + Yahoo Finance
3909
+ - Metrics: 50+ financial KPIs
3910
+
3911
+ 2. Archive API: Academic research papers
3912
+ - Covers: Semantic Scholar, OpenAlex, PubMed
3913
+ - Data: Papers, citations, abstracts
3914
+
3915
+ 3. Web Search: General information, current events
3916
+ - Covers: Anything on the internet
3917
+ - Use for: Market share, industry news, non-financial company info
3918
+
3919
+ DECISION RULES:
3920
+ - If query is about company financials (revenue, profit, margins, etc.) → Check if FinSight already provided data
3921
+ - If FinSight has data in api_results → Web search is NOT needed
3922
+ - If FinSight was called but no data → Web search as fallback is OK
3923
+ - If query is about market share, industry size, trends → Web search (FinSight doesn't have this)
3924
+ - If query is about research papers → Archive handles it, not web
3925
+ - If query is conversational → Already filtered, you won't see these
2924
3926
 
2925
3927
  Respond with JSON:
2926
3928
  {{
2927
3929
  "use_web_search": true/false,
2928
- "reason": "why or why not"
3930
+ "reason": "explain why based on tools available and data already fetched"
2929
3931
  }}
2930
3932
 
2931
- Use web search for:
2932
- - Market share/size (not in SEC filings)
2933
- - Current prices (Bitcoin, commodities, real-time data)
2934
- - Industry data, statistics
2935
- - Recent events, news
2936
- - Questions not answered by existing data
2937
-
2938
- Don't use if:
2939
- - Shell already handled it (pwd/ls/find)
2940
- - Question answered by research/financial APIs
2941
- - Pure opinion question
2942
-
2943
3933
  JSON:"""
2944
3934
 
2945
3935
  try:
@@ -2986,11 +3976,44 @@ JSON:"""
2986
3976
  api_results=api_results,
2987
3977
  tools_used=tools_used
2988
3978
  )
2989
-
3979
+
3980
+ # POST-PROCESSING: Auto-extract code blocks and write files if user requested file creation
3981
+ # This fixes the issue where LLM shows corrected code but doesn't create the file
3982
+ if any(keyword in request.question.lower() for keyword in ['create', 'write', 'save', 'generate', 'fixed', 'corrected']):
3983
+ # Extract filename from query (e.g., "write to foo.py", "create bar_fixed.py")
3984
+ import re
3985
+ filename_match = re.search(r'(?:to|create|write|save|generate)\s+(\w+[._-]\w+\.[\w]+)', request.question, re.IGNORECASE)
3986
+ if not filename_match:
3987
+ # Try pattern: "foo_fixed.py" or "bar.py"
3988
+ filename_match = re.search(r'(\w+_fixed\.[\w]+|\w+\.[\w]+)', request.question)
3989
+
3990
+ if filename_match:
3991
+ target_filename = filename_match.group(1)
3992
+
3993
+ # Extract code block from response (```python ... ``` or ``` ... ```)
3994
+ code_block_pattern = r'```(?:python|bash|sh|r|sql)?\n(.*?)```'
3995
+ code_blocks = re.findall(code_block_pattern, response.response, re.DOTALL)
3996
+
3997
+ if code_blocks:
3998
+ # Use the LARGEST code block (likely the complete file)
3999
+ largest_block = max(code_blocks, key=len)
4000
+
4001
+ # Write to file
4002
+ try:
4003
+ write_result = self.write_file(target_filename, largest_block)
4004
+ if write_result['success']:
4005
+ # Append confirmation to response
4006
+ response.response += f"\n\n✅ File created: {target_filename} ({write_result['bytes_written']} bytes)"
4007
+ if debug_mode:
4008
+ print(f"🔄 Auto-extracted code block → write_file({target_filename})")
4009
+ except Exception as e:
4010
+ if debug_mode:
4011
+ print(f"⚠️ Auto-write failed: {e}")
4012
+
2990
4013
  # CRITICAL: Save to conversation history
2991
4014
  self.conversation_history.append({"role": "user", "content": request.question})
2992
4015
  self.conversation_history.append({"role": "assistant", "content": response.response})
2993
-
4016
+
2994
4017
  return response
2995
4018
 
2996
4019
  # DEV MODE ONLY: Direct Groq calls (only works with local API keys)