cite-agent 1.3.6__py3-none-any.whl → 1.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cite-agent might be problematic. Click here for more details.
- cite_agent/__version__.py +1 -1
- cite_agent/cli.py +9 -2
- cite_agent/enhanced_ai_agent.py +1100 -77
- {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/METADATA +1 -1
- cite_agent-1.3.8.dist-info/RECORD +31 -0
- {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/top_level.txt +0 -1
- cite_agent-1.3.6.dist-info/RECORD +0 -57
- src/__init__.py +0 -1
- src/services/__init__.py +0 -132
- src/services/auth_service/__init__.py +0 -3
- src/services/auth_service/auth_manager.py +0 -33
- src/services/graph/__init__.py +0 -1
- src/services/graph/knowledge_graph.py +0 -194
- src/services/llm_service/__init__.py +0 -5
- src/services/llm_service/llm_manager.py +0 -495
- src/services/paper_service/__init__.py +0 -5
- src/services/paper_service/openalex.py +0 -231
- src/services/performance_service/__init__.py +0 -1
- src/services/performance_service/rust_performance.py +0 -395
- src/services/research_service/__init__.py +0 -23
- src/services/research_service/chatbot.py +0 -2056
- src/services/research_service/citation_manager.py +0 -436
- src/services/research_service/context_manager.py +0 -1441
- src/services/research_service/conversation_manager.py +0 -597
- src/services/research_service/critical_paper_detector.py +0 -577
- src/services/research_service/enhanced_research.py +0 -121
- src/services/research_service/enhanced_synthesizer.py +0 -375
- src/services/research_service/query_generator.py +0 -777
- src/services/research_service/synthesizer.py +0 -1273
- src/services/search_service/__init__.py +0 -5
- src/services/search_service/indexer.py +0 -186
- src/services/search_service/search_engine.py +0 -342
- src/services/simple_enhanced_main.py +0 -287
- {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/WHEEL +0 -0
- {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/entry_points.txt +0 -0
- {cite_agent-1.3.6.dist-info → cite_agent-1.3.8.dist-info}/licenses/LICENSE +0 -0
cite_agent/enhanced_ai_agent.py
CHANGED
|
@@ -89,6 +89,15 @@ class EnhancedNocturnalAgent:
|
|
|
89
89
|
from .workflow import WorkflowManager
|
|
90
90
|
self.workflow = WorkflowManager()
|
|
91
91
|
self.last_paper_result = None # Track last paper mentioned for "save that"
|
|
92
|
+
|
|
93
|
+
# File context tracking (for pronoun resolution and multi-turn)
|
|
94
|
+
self.file_context = {
|
|
95
|
+
'last_file': None, # Last file mentioned/read
|
|
96
|
+
'last_directory': None, # Last directory mentioned/navigated
|
|
97
|
+
'recent_files': [], # Last 5 files (for "those files")
|
|
98
|
+
'recent_dirs': [], # Last 5 directories
|
|
99
|
+
'current_cwd': None, # Track shell's current directory
|
|
100
|
+
}
|
|
92
101
|
try:
|
|
93
102
|
self.per_user_token_limit = int(os.getenv("GROQ_PER_USER_TOKENS", 50000))
|
|
94
103
|
except (TypeError, ValueError):
|
|
@@ -994,7 +1003,17 @@ class EnhancedNocturnalAgent:
|
|
|
994
1003
|
capability_lines.append("• You can SEARCH user's paper collection")
|
|
995
1004
|
capability_lines.append("• You can COPY text to user's clipboard")
|
|
996
1005
|
capability_lines.append("• User's query history is automatically tracked")
|
|
997
|
-
|
|
1006
|
+
|
|
1007
|
+
# Add file operation capabilities (Claude Code / Cursor parity)
|
|
1008
|
+
capability_lines.append("")
|
|
1009
|
+
capability_lines.append("📁 DIRECT FILE OPERATIONS (Always available):")
|
|
1010
|
+
capability_lines.append("• read_file(path) - Read files with line numbers (like cat but better)")
|
|
1011
|
+
capability_lines.append("• write_file(path, content) - Create/overwrite files directly")
|
|
1012
|
+
capability_lines.append("• edit_file(path, old, new) - Surgical find/replace edits")
|
|
1013
|
+
capability_lines.append("• glob_search(pattern) - Fast file search (e.g., '**/*.py')")
|
|
1014
|
+
capability_lines.append("• grep_search(pattern) - Fast content search in files")
|
|
1015
|
+
capability_lines.append("• batch_edit_files(edits) - Multi-file refactoring")
|
|
1016
|
+
|
|
998
1017
|
sections.append("Capabilities in play:\n" + "\n".join(capability_lines))
|
|
999
1018
|
|
|
1000
1019
|
# ENHANCED TRUTH-SEEKING RULES (adapt based on mode)
|
|
@@ -1089,6 +1108,48 @@ class EnhancedNocturnalAgent:
|
|
|
1089
1108
|
"• Example: 'I found 3 papers. I can save them to your library or export to BibTeX if you'd like.'",
|
|
1090
1109
|
]
|
|
1091
1110
|
rules.extend(workflow_rules)
|
|
1111
|
+
|
|
1112
|
+
# Add file operation tool usage rules (CRITICAL for Claude Code parity)
|
|
1113
|
+
file_ops_rules = [
|
|
1114
|
+
"",
|
|
1115
|
+
"📁 FILE OPERATION TOOL USAGE (Use these INSTEAD of shell commands):",
|
|
1116
|
+
"",
|
|
1117
|
+
"🔴 ALWAYS PREFER (in order):",
|
|
1118
|
+
"1. read_file(path) → INSTEAD OF: cat, head, tail",
|
|
1119
|
+
"2. write_file(path, content) → INSTEAD OF: echo >, cat << EOF, printf >",
|
|
1120
|
+
"3. edit_file(path, old, new) → INSTEAD OF: sed, awk",
|
|
1121
|
+
"4. glob_search(pattern, path) → INSTEAD OF: find, ls",
|
|
1122
|
+
"5. grep_search(pattern, path, file_pattern) → INSTEAD OF: grep -r",
|
|
1123
|
+
"",
|
|
1124
|
+
"✅ CORRECT USAGE:",
|
|
1125
|
+
"• Reading code: result = read_file('app.py')",
|
|
1126
|
+
"• Creating file: write_file('config.json', '{...}')",
|
|
1127
|
+
"• Editing code: edit_file('main.py', 'old_var', 'new_var', replace_all=True)",
|
|
1128
|
+
"• Finding files: glob_search('**/*.py', '/home/user/project')",
|
|
1129
|
+
"• Searching code: grep_search('class.*Agent', '.', '*.py', output_mode='content')",
|
|
1130
|
+
"• Multi-file refactor: batch_edit_files([{file: 'a.py', old: '...', new: '...'}, ...])",
|
|
1131
|
+
"",
|
|
1132
|
+
"❌ ANTI-PATTERNS (Don't do these):",
|
|
1133
|
+
"• DON'T use cat when read_file exists",
|
|
1134
|
+
"• DON'T use echo > when write_file exists",
|
|
1135
|
+
"• DON'T use sed when edit_file exists",
|
|
1136
|
+
"• DON'T use find when glob_search exists",
|
|
1137
|
+
"• DON'T use grep -r when grep_search exists",
|
|
1138
|
+
"",
|
|
1139
|
+
"🎯 WHY USE THESE TOOLS:",
|
|
1140
|
+
"• read_file() shows line numbers (critical for code analysis)",
|
|
1141
|
+
"• write_file() handles escaping/quoting automatically (no heredoc hell)",
|
|
1142
|
+
"• edit_file() validates changes before applying (safer than sed)",
|
|
1143
|
+
"• glob_search() is faster and cleaner than find",
|
|
1144
|
+
"• grep_search() returns structured data (easier to parse)",
|
|
1145
|
+
"",
|
|
1146
|
+
"⚠️ SHELL COMMANDS ONLY FOR:",
|
|
1147
|
+
"• System operations (ps, df, du, uptime)",
|
|
1148
|
+
"• Git commands (git status, git diff, git log)",
|
|
1149
|
+
"• Package installs (pip install, Rscript -e \"install.packages(...)\")",
|
|
1150
|
+
"• Running Python/R scripts (python script.py, Rscript analysis.R)",
|
|
1151
|
+
]
|
|
1152
|
+
rules.extend(file_ops_rules)
|
|
1092
1153
|
|
|
1093
1154
|
sections.append("CRITICAL RULES:\n" + "\n".join(rules))
|
|
1094
1155
|
|
|
@@ -1950,14 +2011,17 @@ class EnhancedNocturnalAgent:
|
|
|
1950
2011
|
url = f"{self.finsight_base_url}/{endpoint}"
|
|
1951
2012
|
# Start fresh with headers - don't use _default_headers which might be wrong
|
|
1952
2013
|
headers = {}
|
|
1953
|
-
|
|
2014
|
+
|
|
1954
2015
|
# Always use demo key for FinSight (SEC data is public)
|
|
1955
2016
|
headers["X-API-Key"] = "demo-key-123"
|
|
1956
|
-
|
|
2017
|
+
|
|
2018
|
+
# Mark request as agent-mediated for product separation
|
|
2019
|
+
headers["X-Request-Source"] = "agent"
|
|
2020
|
+
|
|
1957
2021
|
# Also add JWT if we have it
|
|
1958
2022
|
if self.auth_token:
|
|
1959
2023
|
headers["Authorization"] = f"Bearer {self.auth_token}"
|
|
1960
|
-
|
|
2024
|
+
|
|
1961
2025
|
debug_mode = os.getenv("NOCTURNAL_DEBUG", "").lower() == "1"
|
|
1962
2026
|
if debug_mode:
|
|
1963
2027
|
print(f"🔍 FinSight headers: {list(headers.keys())}, X-API-Key={headers.get('X-API-Key')}")
|
|
@@ -2179,40 +2243,552 @@ class EnhancedNocturnalAgent:
|
|
|
2179
2243
|
|
|
2180
2244
|
output = '\n'.join(output_lines).strip()
|
|
2181
2245
|
return output if output else "Command executed (no output)"
|
|
2182
|
-
|
|
2246
|
+
|
|
2183
2247
|
except Exception as e:
|
|
2184
2248
|
return f"ERROR: {e}"
|
|
2185
2249
|
|
|
2186
|
-
|
|
2250
|
+
# ========================================================================
|
|
2251
|
+
# DIRECT FILE OPERATIONS (Claude Code / Cursor Parity)
|
|
2252
|
+
# ========================================================================
|
|
2253
|
+
|
|
2254
|
+
def read_file(self, file_path: str, offset: int = 0, limit: int = 2000) -> str:
|
|
2255
|
+
"""
|
|
2256
|
+
Read file with line numbers (like Claude Code's Read tool)
|
|
2257
|
+
|
|
2258
|
+
Args:
|
|
2259
|
+
file_path: Path to file
|
|
2260
|
+
offset: Starting line number (0-indexed)
|
|
2261
|
+
limit: Maximum number of lines to read
|
|
2262
|
+
|
|
2263
|
+
Returns:
|
|
2264
|
+
File contents with line numbers in format: " 123→content"
|
|
2265
|
+
"""
|
|
2266
|
+
try:
|
|
2267
|
+
# Expand ~ to home directory
|
|
2268
|
+
file_path = os.path.expanduser(file_path)
|
|
2269
|
+
|
|
2270
|
+
# Make absolute if relative
|
|
2271
|
+
if not os.path.isabs(file_path):
|
|
2272
|
+
file_path = os.path.abspath(file_path)
|
|
2273
|
+
|
|
2274
|
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
2275
|
+
lines = f.readlines()
|
|
2276
|
+
|
|
2277
|
+
# Apply offset and limit
|
|
2278
|
+
if offset or limit:
|
|
2279
|
+
lines = lines[offset:offset+limit if limit else None]
|
|
2280
|
+
|
|
2281
|
+
# Format with line numbers (1-indexed, like vim/editors)
|
|
2282
|
+
numbered_lines = [
|
|
2283
|
+
f"{offset+i+1:6d}→{line.rstrip()}\n"
|
|
2284
|
+
for i, line in enumerate(lines)
|
|
2285
|
+
]
|
|
2286
|
+
|
|
2287
|
+
result = ''.join(numbered_lines)
|
|
2288
|
+
|
|
2289
|
+
# Update file context
|
|
2290
|
+
self.file_context['last_file'] = file_path
|
|
2291
|
+
if file_path not in self.file_context['recent_files']:
|
|
2292
|
+
self.file_context['recent_files'].append(file_path)
|
|
2293
|
+
self.file_context['recent_files'] = self.file_context['recent_files'][-5:]
|
|
2294
|
+
|
|
2295
|
+
return result if result else "(empty file)"
|
|
2296
|
+
|
|
2297
|
+
except FileNotFoundError:
|
|
2298
|
+
return f"ERROR: File not found: {file_path}"
|
|
2299
|
+
except PermissionError:
|
|
2300
|
+
return f"ERROR: Permission denied: {file_path}"
|
|
2301
|
+
except IsADirectoryError:
|
|
2302
|
+
return f"ERROR: {file_path} is a directory, not a file"
|
|
2303
|
+
except Exception as e:
|
|
2304
|
+
return f"ERROR: {type(e).__name__}: {e}"
|
|
2305
|
+
|
|
2306
|
+
def write_file(self, file_path: str, content: str) -> Dict[str, Any]:
|
|
2307
|
+
"""
|
|
2308
|
+
Write file directly (like Claude Code's Write tool)
|
|
2309
|
+
Creates new file or overwrites existing one.
|
|
2310
|
+
|
|
2311
|
+
Args:
|
|
2312
|
+
file_path: Path to file
|
|
2313
|
+
content: Full file content
|
|
2314
|
+
|
|
2315
|
+
Returns:
|
|
2316
|
+
{"success": bool, "message": str, "bytes_written": int}
|
|
2317
|
+
"""
|
|
2318
|
+
try:
|
|
2319
|
+
# Expand ~ to home directory
|
|
2320
|
+
file_path = os.path.expanduser(file_path)
|
|
2321
|
+
|
|
2322
|
+
# Make absolute if relative
|
|
2323
|
+
if not os.path.isabs(file_path):
|
|
2324
|
+
file_path = os.path.abspath(file_path)
|
|
2325
|
+
|
|
2326
|
+
# Create parent directories if needed
|
|
2327
|
+
parent_dir = os.path.dirname(file_path)
|
|
2328
|
+
if parent_dir and not os.path.exists(parent_dir):
|
|
2329
|
+
os.makedirs(parent_dir, exist_ok=True)
|
|
2330
|
+
|
|
2331
|
+
# Write file
|
|
2332
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
2333
|
+
bytes_written = f.write(content)
|
|
2334
|
+
|
|
2335
|
+
# Update file context
|
|
2336
|
+
self.file_context['last_file'] = file_path
|
|
2337
|
+
if file_path not in self.file_context['recent_files']:
|
|
2338
|
+
self.file_context['recent_files'].append(file_path)
|
|
2339
|
+
self.file_context['recent_files'] = self.file_context['recent_files'][-5:]
|
|
2340
|
+
|
|
2341
|
+
return {
|
|
2342
|
+
"success": True,
|
|
2343
|
+
"message": f"Wrote {bytes_written} bytes to {file_path}",
|
|
2344
|
+
"bytes_written": bytes_written
|
|
2345
|
+
}
|
|
2346
|
+
|
|
2347
|
+
except PermissionError:
|
|
2348
|
+
return {
|
|
2349
|
+
"success": False,
|
|
2350
|
+
"message": f"ERROR: Permission denied: {file_path}",
|
|
2351
|
+
"bytes_written": 0
|
|
2352
|
+
}
|
|
2353
|
+
except Exception as e:
|
|
2354
|
+
return {
|
|
2355
|
+
"success": False,
|
|
2356
|
+
"message": f"ERROR: {type(e).__name__}: {e}",
|
|
2357
|
+
"bytes_written": 0
|
|
2358
|
+
}
|
|
2359
|
+
|
|
2360
|
+
def edit_file(self, file_path: str, old_string: str, new_string: str,
|
|
2361
|
+
replace_all: bool = False) -> Dict[str, Any]:
|
|
2362
|
+
"""
|
|
2363
|
+
Surgical file edit (like Claude Code's Edit tool)
|
|
2364
|
+
|
|
2365
|
+
Args:
|
|
2366
|
+
file_path: Path to file
|
|
2367
|
+
old_string: Exact string to replace (must be unique unless replace_all=True)
|
|
2368
|
+
new_string: Replacement string
|
|
2369
|
+
replace_all: If True, replace all occurrences. If False, old_string must be unique.
|
|
2370
|
+
|
|
2371
|
+
Returns:
|
|
2372
|
+
{"success": bool, "message": str, "replacements": int}
|
|
2373
|
+
"""
|
|
2374
|
+
try:
|
|
2375
|
+
# Expand ~ to home directory
|
|
2376
|
+
file_path = os.path.expanduser(file_path)
|
|
2377
|
+
|
|
2378
|
+
# Make absolute if relative
|
|
2379
|
+
if not os.path.isabs(file_path):
|
|
2380
|
+
file_path = os.path.abspath(file_path)
|
|
2381
|
+
|
|
2382
|
+
# Read file
|
|
2383
|
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
2384
|
+
content = f.read()
|
|
2385
|
+
|
|
2386
|
+
# Check if old_string exists
|
|
2387
|
+
if old_string not in content:
|
|
2388
|
+
return {
|
|
2389
|
+
"success": False,
|
|
2390
|
+
"message": f"ERROR: old_string not found in {file_path}",
|
|
2391
|
+
"replacements": 0
|
|
2392
|
+
}
|
|
2393
|
+
|
|
2394
|
+
# Check uniqueness if not replace_all
|
|
2395
|
+
occurrences = content.count(old_string)
|
|
2396
|
+
if not replace_all and occurrences > 1:
|
|
2397
|
+
return {
|
|
2398
|
+
"success": False,
|
|
2399
|
+
"message": f"ERROR: old_string appears {occurrences} times in {file_path}. Use replace_all=True or provide more context to make it unique.",
|
|
2400
|
+
"replacements": 0
|
|
2401
|
+
}
|
|
2402
|
+
|
|
2403
|
+
# Perform replacement
|
|
2404
|
+
if replace_all:
|
|
2405
|
+
new_content = content.replace(old_string, new_string)
|
|
2406
|
+
else:
|
|
2407
|
+
new_content = content.replace(old_string, new_string, 1)
|
|
2408
|
+
|
|
2409
|
+
# Write back
|
|
2410
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
2411
|
+
f.write(new_content)
|
|
2412
|
+
|
|
2413
|
+
# Update file context
|
|
2414
|
+
self.file_context['last_file'] = file_path
|
|
2415
|
+
|
|
2416
|
+
return {
|
|
2417
|
+
"success": True,
|
|
2418
|
+
"message": f"Replaced {occurrences if replace_all else 1} occurrence(s) in {file_path}",
|
|
2419
|
+
"replacements": occurrences if replace_all else 1
|
|
2420
|
+
}
|
|
2421
|
+
|
|
2422
|
+
except FileNotFoundError:
|
|
2423
|
+
return {
|
|
2424
|
+
"success": False,
|
|
2425
|
+
"message": f"ERROR: File not found: {file_path}",
|
|
2426
|
+
"replacements": 0
|
|
2427
|
+
}
|
|
2428
|
+
except PermissionError:
|
|
2429
|
+
return {
|
|
2430
|
+
"success": False,
|
|
2431
|
+
"message": f"ERROR: Permission denied: {file_path}",
|
|
2432
|
+
"replacements": 0
|
|
2433
|
+
}
|
|
2434
|
+
except Exception as e:
|
|
2435
|
+
return {
|
|
2436
|
+
"success": False,
|
|
2437
|
+
"message": f"ERROR: {type(e).__name__}: {e}",
|
|
2438
|
+
"replacements": 0
|
|
2439
|
+
}
|
|
2440
|
+
|
|
2441
|
+
def glob_search(self, pattern: str, path: str = ".") -> Dict[str, Any]:
|
|
2442
|
+
"""
|
|
2443
|
+
Fast file pattern matching (like Claude Code's Glob tool)
|
|
2444
|
+
|
|
2445
|
+
Args:
|
|
2446
|
+
pattern: Glob pattern (e.g., "*.py", "**/*.md", "src/**/*.ts")
|
|
2447
|
+
path: Starting directory (default: current directory)
|
|
2448
|
+
|
|
2449
|
+
Returns:
|
|
2450
|
+
{"files": List[str], "count": int, "pattern": str}
|
|
2451
|
+
"""
|
|
2452
|
+
try:
|
|
2453
|
+
import glob as glob_module
|
|
2454
|
+
|
|
2455
|
+
# Expand ~ to home directory
|
|
2456
|
+
path = os.path.expanduser(path)
|
|
2457
|
+
|
|
2458
|
+
# Make absolute if relative
|
|
2459
|
+
if not os.path.isabs(path):
|
|
2460
|
+
path = os.path.abspath(path)
|
|
2461
|
+
|
|
2462
|
+
# Combine path and pattern
|
|
2463
|
+
full_pattern = os.path.join(path, pattern)
|
|
2464
|
+
|
|
2465
|
+
# Find matches (recursive if ** in pattern)
|
|
2466
|
+
matches = glob_module.glob(full_pattern, recursive=True)
|
|
2467
|
+
|
|
2468
|
+
# Filter to files only (not directories)
|
|
2469
|
+
files = [f for f in matches if os.path.isfile(f)]
|
|
2470
|
+
|
|
2471
|
+
# Sort by modification time (newest first)
|
|
2472
|
+
files.sort(key=lambda f: os.path.getmtime(f), reverse=True)
|
|
2473
|
+
|
|
2474
|
+
return {
|
|
2475
|
+
"files": files,
|
|
2476
|
+
"count": len(files),
|
|
2477
|
+
"pattern": full_pattern
|
|
2478
|
+
}
|
|
2479
|
+
|
|
2480
|
+
except Exception as e:
|
|
2481
|
+
return {
|
|
2482
|
+
"files": [],
|
|
2483
|
+
"count": 0,
|
|
2484
|
+
"pattern": pattern,
|
|
2485
|
+
"error": f"{type(e).__name__}: {e}"
|
|
2486
|
+
}
|
|
2487
|
+
|
|
2488
|
+
def grep_search(self, pattern: str, path: str = ".",
|
|
2489
|
+
file_pattern: str = "*",
|
|
2490
|
+
output_mode: str = "files_with_matches",
|
|
2491
|
+
context_lines: int = 0,
|
|
2492
|
+
ignore_case: bool = False,
|
|
2493
|
+
max_results: int = 100) -> Dict[str, Any]:
|
|
2494
|
+
"""
|
|
2495
|
+
Fast content search (like Claude Code's Grep tool / ripgrep)
|
|
2496
|
+
|
|
2497
|
+
Args:
|
|
2498
|
+
pattern: Regex pattern to search for
|
|
2499
|
+
path: Directory to search in
|
|
2500
|
+
file_pattern: Glob pattern for files to search (e.g., "*.py")
|
|
2501
|
+
output_mode: "files_with_matches", "content", or "count"
|
|
2502
|
+
context_lines: Lines of context around matches
|
|
2503
|
+
ignore_case: Case-insensitive search
|
|
2504
|
+
max_results: Maximum number of results to return
|
|
2505
|
+
|
|
2506
|
+
Returns:
|
|
2507
|
+
Depends on output_mode:
|
|
2508
|
+
- files_with_matches: {"files": List[str], "count": int}
|
|
2509
|
+
- content: {"matches": {file: [(line_num, line_content), ...]}}
|
|
2510
|
+
- count: {"counts": {file: match_count}}
|
|
2511
|
+
"""
|
|
2512
|
+
try:
|
|
2513
|
+
import re
|
|
2514
|
+
|
|
2515
|
+
# Expand ~ to home directory
|
|
2516
|
+
path = os.path.expanduser(path)
|
|
2517
|
+
|
|
2518
|
+
# Make absolute if relative
|
|
2519
|
+
if not os.path.isabs(path):
|
|
2520
|
+
path = os.path.abspath(path)
|
|
2521
|
+
|
|
2522
|
+
# Compile regex
|
|
2523
|
+
flags = re.IGNORECASE if ignore_case else 0
|
|
2524
|
+
regex = re.compile(pattern, flags)
|
|
2525
|
+
|
|
2526
|
+
# Find files to search
|
|
2527
|
+
glob_result = self.glob_search(file_pattern, path)
|
|
2528
|
+
files_to_search = glob_result["files"]
|
|
2529
|
+
|
|
2530
|
+
# Search each file
|
|
2531
|
+
if output_mode == "files_with_matches":
|
|
2532
|
+
matching_files = []
|
|
2533
|
+
for file_path in files_to_search[:max_results]:
|
|
2534
|
+
try:
|
|
2535
|
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
2536
|
+
content = f.read()
|
|
2537
|
+
if regex.search(content):
|
|
2538
|
+
matching_files.append(file_path)
|
|
2539
|
+
except:
|
|
2540
|
+
continue
|
|
2541
|
+
|
|
2542
|
+
return {
|
|
2543
|
+
"files": matching_files,
|
|
2544
|
+
"count": len(matching_files),
|
|
2545
|
+
"pattern": pattern
|
|
2546
|
+
}
|
|
2547
|
+
|
|
2548
|
+
elif output_mode == "content":
|
|
2549
|
+
matches = {}
|
|
2550
|
+
for file_path in files_to_search:
|
|
2551
|
+
try:
|
|
2552
|
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
2553
|
+
lines = f.readlines()
|
|
2554
|
+
|
|
2555
|
+
file_matches = []
|
|
2556
|
+
for line_num, line in enumerate(lines, 1):
|
|
2557
|
+
if regex.search(line):
|
|
2558
|
+
file_matches.append((line_num, line.rstrip()))
|
|
2559
|
+
|
|
2560
|
+
if len(file_matches) >= max_results:
|
|
2561
|
+
break
|
|
2562
|
+
|
|
2563
|
+
if file_matches:
|
|
2564
|
+
matches[file_path] = file_matches
|
|
2565
|
+
except:
|
|
2566
|
+
continue
|
|
2567
|
+
|
|
2568
|
+
return {
|
|
2569
|
+
"matches": matches,
|
|
2570
|
+
"file_count": len(matches),
|
|
2571
|
+
"pattern": pattern
|
|
2572
|
+
}
|
|
2573
|
+
|
|
2574
|
+
elif output_mode == "count":
|
|
2575
|
+
counts = {}
|
|
2576
|
+
for file_path in files_to_search:
|
|
2577
|
+
try:
|
|
2578
|
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
2579
|
+
content = f.read()
|
|
2580
|
+
|
|
2581
|
+
match_count = len(regex.findall(content))
|
|
2582
|
+
if match_count > 0:
|
|
2583
|
+
counts[file_path] = match_count
|
|
2584
|
+
except:
|
|
2585
|
+
continue
|
|
2586
|
+
|
|
2587
|
+
return {
|
|
2588
|
+
"counts": counts,
|
|
2589
|
+
"total_matches": sum(counts.values()),
|
|
2590
|
+
"pattern": pattern
|
|
2591
|
+
}
|
|
2592
|
+
|
|
2593
|
+
else:
|
|
2594
|
+
return {
|
|
2595
|
+
"error": f"Invalid output_mode: {output_mode}. Use 'files_with_matches', 'content', or 'count'."
|
|
2596
|
+
}
|
|
2597
|
+
|
|
2598
|
+
except re.error as e:
|
|
2599
|
+
return {
|
|
2600
|
+
"error": f"Invalid regex pattern: {e}"
|
|
2601
|
+
}
|
|
2602
|
+
except Exception as e:
|
|
2603
|
+
return {
|
|
2604
|
+
"error": f"{type(e).__name__}: {e}"
|
|
2605
|
+
}
|
|
2606
|
+
|
|
2607
|
+
async def batch_edit_files(self, edits: List[Dict[str, str]]) -> Dict[str, Any]:
|
|
2187
2608
|
"""
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2609
|
+
Apply multiple file edits atomically (all-or-nothing)
|
|
2610
|
+
|
|
2611
|
+
Args:
|
|
2612
|
+
edits: List of edit operations:
|
|
2613
|
+
[
|
|
2614
|
+
{"file": "path.py", "old": "...", "new": "..."},
|
|
2615
|
+
{"file": "other.py", "old": "...", "new": "...", "replace_all": True},
|
|
2616
|
+
...
|
|
2617
|
+
]
|
|
2618
|
+
|
|
2619
|
+
Returns:
|
|
2620
|
+
{
|
|
2621
|
+
"success": bool,
|
|
2622
|
+
"results": {file: {"success": bool, "message": str, "replacements": int}},
|
|
2623
|
+
"total_edits": int,
|
|
2624
|
+
"failed_edits": int
|
|
2625
|
+
}
|
|
2626
|
+
"""
|
|
2627
|
+
try:
|
|
2628
|
+
results = {}
|
|
2629
|
+
|
|
2630
|
+
# Phase 1: Validate all edits
|
|
2631
|
+
for edit in edits:
|
|
2632
|
+
file_path = edit["file"]
|
|
2633
|
+
old_string = edit["old"]
|
|
2634
|
+
replace_all = edit.get("replace_all", False)
|
|
2635
|
+
|
|
2636
|
+
# Expand path
|
|
2637
|
+
file_path = os.path.expanduser(file_path)
|
|
2638
|
+
if not os.path.isabs(file_path):
|
|
2639
|
+
file_path = os.path.abspath(file_path)
|
|
2640
|
+
|
|
2641
|
+
# Check file exists
|
|
2642
|
+
if not os.path.exists(file_path):
|
|
2643
|
+
return {
|
|
2644
|
+
"success": False,
|
|
2645
|
+
"results": {},
|
|
2646
|
+
"total_edits": 0,
|
|
2647
|
+
"failed_edits": len(edits),
|
|
2648
|
+
"error": f"Validation failed: {file_path} not found. No edits applied."
|
|
2649
|
+
}
|
|
2650
|
+
|
|
2651
|
+
# Check old_string exists
|
|
2652
|
+
try:
|
|
2653
|
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
2654
|
+
content = f.read()
|
|
2655
|
+
|
|
2656
|
+
if old_string not in content:
|
|
2657
|
+
return {
|
|
2658
|
+
"success": False,
|
|
2659
|
+
"results": {},
|
|
2660
|
+
"total_edits": 0,
|
|
2661
|
+
"failed_edits": len(edits),
|
|
2662
|
+
"error": f"Validation failed: Pattern not found in {file_path}. No edits applied."
|
|
2663
|
+
}
|
|
2664
|
+
|
|
2665
|
+
# Check uniqueness if not replace_all
|
|
2666
|
+
if not replace_all and content.count(old_string) > 1:
|
|
2667
|
+
return {
|
|
2668
|
+
"success": False,
|
|
2669
|
+
"results": {},
|
|
2670
|
+
"total_edits": 0,
|
|
2671
|
+
"failed_edits": len(edits),
|
|
2672
|
+
"error": f"Validation failed: Pattern appears {content.count(old_string)} times in {file_path}. Use replace_all or provide more context. No edits applied."
|
|
2673
|
+
}
|
|
2674
|
+
except Exception as e:
|
|
2675
|
+
return {
|
|
2676
|
+
"success": False,
|
|
2677
|
+
"results": {},
|
|
2678
|
+
"total_edits": 0,
|
|
2679
|
+
"failed_edits": len(edits),
|
|
2680
|
+
"error": f"Validation failed reading {file_path}: {e}. No edits applied."
|
|
2681
|
+
}
|
|
2682
|
+
|
|
2683
|
+
# Phase 2: Apply all edits (validation passed)
|
|
2684
|
+
for edit in edits:
|
|
2685
|
+
file_path = edit["file"]
|
|
2686
|
+
old_string = edit["old"]
|
|
2687
|
+
new_string = edit["new"]
|
|
2688
|
+
replace_all = edit.get("replace_all", False)
|
|
2689
|
+
|
|
2690
|
+
result = self.edit_file(file_path, old_string, new_string, replace_all)
|
|
2691
|
+
results[file_path] = result
|
|
2692
|
+
|
|
2693
|
+
# Count successes/failures
|
|
2694
|
+
successful_edits = sum(1 for r in results.values() if r["success"])
|
|
2695
|
+
failed_edits = len(edits) - successful_edits
|
|
2696
|
+
|
|
2697
|
+
return {
|
|
2698
|
+
"success": failed_edits == 0,
|
|
2699
|
+
"results": results,
|
|
2700
|
+
"total_edits": len(edits),
|
|
2701
|
+
"successful_edits": successful_edits,
|
|
2702
|
+
"failed_edits": failed_edits
|
|
2703
|
+
}
|
|
2704
|
+
|
|
2705
|
+
except Exception as e:
|
|
2706
|
+
return {
|
|
2707
|
+
"success": False,
|
|
2708
|
+
"results": {},
|
|
2709
|
+
"total_edits": 0,
|
|
2710
|
+
"failed_edits": len(edits),
|
|
2711
|
+
"error": f"Batch edit failed: {type(e).__name__}: {e}"
|
|
2712
|
+
}
|
|
2713
|
+
|
|
2714
|
+
# ========================================================================
|
|
2715
|
+
# END DIRECT FILE OPERATIONS
|
|
2716
|
+
# ========================================================================
|
|
2717
|
+
|
|
2718
|
+
def _classify_command_safety(self, cmd: str) -> str:
|
|
2719
|
+
"""
|
|
2720
|
+
Classify command by safety level for smart execution.
|
|
2721
|
+
Returns: 'SAFE', 'WRITE', 'DANGEROUS', or 'BLOCKED'
|
|
2191
2722
|
"""
|
|
2192
2723
|
cmd = cmd.strip()
|
|
2193
2724
|
if not cmd:
|
|
2194
|
-
return
|
|
2195
|
-
|
|
2196
|
-
|
|
2725
|
+
return 'BLOCKED'
|
|
2726
|
+
|
|
2727
|
+
cmd_lower = cmd.lower()
|
|
2728
|
+
cmd_parts = cmd.split()
|
|
2729
|
+
cmd_base = cmd_parts[0] if cmd_parts else ''
|
|
2730
|
+
cmd_with_sub = ' '.join(cmd_parts[:2]) if len(cmd_parts) >= 2 else ''
|
|
2731
|
+
|
|
2732
|
+
# BLOCKED: Catastrophic commands
|
|
2197
2733
|
nuclear_patterns = [
|
|
2198
|
-
'rm -rf /',
|
|
2199
|
-
'rm -rf
|
|
2200
|
-
'
|
|
2201
|
-
'dd if=/dev/zero
|
|
2202
|
-
'mkfs',
|
|
2203
|
-
'fdisk',
|
|
2734
|
+
'rm -rf /',
|
|
2735
|
+
'rm -rf ~',
|
|
2736
|
+
'rm -rf /*',
|
|
2737
|
+
'dd if=/dev/zero',
|
|
2738
|
+
'mkfs',
|
|
2739
|
+
'fdisk',
|
|
2204
2740
|
':(){ :|:& };:', # Fork bomb
|
|
2205
|
-
'chmod -
|
|
2741
|
+
'chmod -r 777 /',
|
|
2742
|
+
'> /dev/sda',
|
|
2206
2743
|
]
|
|
2207
|
-
|
|
2208
|
-
cmd_lower = cmd.lower()
|
|
2209
2744
|
for pattern in nuclear_patterns:
|
|
2210
|
-
if pattern
|
|
2211
|
-
return
|
|
2212
|
-
|
|
2213
|
-
#
|
|
2214
|
-
|
|
2215
|
-
|
|
2745
|
+
if pattern in cmd_lower:
|
|
2746
|
+
return 'BLOCKED'
|
|
2747
|
+
|
|
2748
|
+
# SAFE: Read-only commands
|
|
2749
|
+
safe_commands = {
|
|
2750
|
+
'pwd', 'ls', 'cd', 'cat', 'head', 'tail', 'grep', 'find', 'which', 'type',
|
|
2751
|
+
'wc', 'diff', 'echo', 'ps', 'top', 'df', 'du', 'file', 'stat', 'tree',
|
|
2752
|
+
'whoami', 'hostname', 'date', 'cal', 'uptime', 'printenv', 'env',
|
|
2753
|
+
}
|
|
2754
|
+
safe_git = {'git status', 'git log', 'git diff', 'git branch', 'git show', 'git remote'}
|
|
2755
|
+
|
|
2756
|
+
if cmd_base in safe_commands or cmd_with_sub in safe_git:
|
|
2757
|
+
return 'SAFE'
|
|
2758
|
+
|
|
2759
|
+
# WRITE: File creation/modification (allowed but tracked)
|
|
2760
|
+
write_commands = {'mkdir', 'touch', 'cp', 'mv', 'tee'}
|
|
2761
|
+
if cmd_base in write_commands:
|
|
2762
|
+
return 'WRITE'
|
|
2763
|
+
|
|
2764
|
+
# WRITE: Redirection operations (echo > file, cat > file)
|
|
2765
|
+
if '>' in cmd or '>>' in cmd:
|
|
2766
|
+
# Allow redirection to regular files, block to devices
|
|
2767
|
+
if '/dev/' not in cmd_lower:
|
|
2768
|
+
return 'WRITE'
|
|
2769
|
+
else:
|
|
2770
|
+
return 'BLOCKED'
|
|
2771
|
+
|
|
2772
|
+
# DANGEROUS: Deletion and permission changes
|
|
2773
|
+
dangerous_commands = {'rm', 'rmdir', 'chmod', 'chown', 'chgrp'}
|
|
2774
|
+
if cmd_base in dangerous_commands:
|
|
2775
|
+
return 'DANGEROUS'
|
|
2776
|
+
|
|
2777
|
+
# WRITE: Git write operations
|
|
2778
|
+
write_git = {'git add', 'git commit', 'git push', 'git pull', 'git checkout', 'git merge'}
|
|
2779
|
+
if cmd_with_sub in write_git:
|
|
2780
|
+
return 'WRITE'
|
|
2781
|
+
|
|
2782
|
+
# Default: Treat unknown commands as requiring user awareness
|
|
2783
|
+
return 'WRITE'
|
|
2784
|
+
|
|
2785
|
+
def _is_safe_shell_command(self, cmd: str) -> bool:
|
|
2786
|
+
"""
|
|
2787
|
+
Compatibility wrapper for old safety check.
|
|
2788
|
+
Now uses tiered classification system.
|
|
2789
|
+
"""
|
|
2790
|
+
classification = self._classify_command_safety(cmd)
|
|
2791
|
+
return classification in ['SAFE', 'WRITE'] # Allow SAFE and WRITE, block DANGEROUS and BLOCKED
|
|
2216
2792
|
|
|
2217
2793
|
def _check_token_budget(self, estimated_tokens: int) -> bool:
|
|
2218
2794
|
"""Check if we have enough token budget"""
|
|
@@ -2450,12 +3026,42 @@ class EnhancedNocturnalAgent:
|
|
|
2450
3026
|
async def _analyze_request_type(self, question: str) -> Dict[str, Any]:
|
|
2451
3027
|
"""Analyze what type of request this is and what APIs to use"""
|
|
2452
3028
|
|
|
2453
|
-
# Financial indicators
|
|
3029
|
+
# Financial indicators - COMPREHENSIVE list to ensure FinSight is used
|
|
2454
3030
|
financial_keywords = [
|
|
2455
|
-
|
|
2456
|
-
'
|
|
2457
|
-
'
|
|
2458
|
-
|
|
3031
|
+
# Core metrics
|
|
3032
|
+
'financial', 'revenue', 'sales', 'income', 'profit', 'earnings', 'loss',
|
|
3033
|
+
'net income', 'operating income', 'gross profit', 'ebitda', 'ebit',
|
|
3034
|
+
|
|
3035
|
+
# Margins & Ratios
|
|
3036
|
+
'margin', 'gross margin', 'profit margin', 'operating margin', 'net margin', 'ebitda margin',
|
|
3037
|
+
'ratio', 'current ratio', 'quick ratio', 'debt ratio', 'pe ratio', 'p/e',
|
|
3038
|
+
'roe', 'roa', 'roic', 'roce', 'eps',
|
|
3039
|
+
|
|
3040
|
+
# Balance Sheet
|
|
3041
|
+
'assets', 'liabilities', 'equity', 'debt', 'cash', 'capital',
|
|
3042
|
+
'balance sheet', 'total assets', 'current assets', 'fixed assets',
|
|
3043
|
+
'shareholders equity', 'stockholders equity', 'retained earnings',
|
|
3044
|
+
|
|
3045
|
+
# Cash Flow
|
|
3046
|
+
'cash flow', 'fcf', 'free cash flow', 'operating cash flow',
|
|
3047
|
+
'cfo', 'cfi', 'cff', 'capex', 'capital expenditure',
|
|
3048
|
+
|
|
3049
|
+
# Market Metrics
|
|
3050
|
+
'stock', 'market cap', 'market capitalization', 'enterprise value',
|
|
3051
|
+
'valuation', 'price', 'share price', 'stock price', 'quote',
|
|
3052
|
+
'volume', 'trading volume', 'shares outstanding',
|
|
3053
|
+
|
|
3054
|
+
# Financial Statements
|
|
3055
|
+
'income statement', '10-k', '10-q', '8-k', 'filing', 'sec filing',
|
|
3056
|
+
'quarterly', 'annual report', 'earnings report', 'financial statement',
|
|
3057
|
+
|
|
3058
|
+
# Company Info
|
|
3059
|
+
'ticker', 'company', 'corporation', 'ceo', 'earnings call',
|
|
3060
|
+
'dividend', 'dividend yield', 'payout ratio',
|
|
3061
|
+
|
|
3062
|
+
# Growth & Performance
|
|
3063
|
+
'growth', 'yoy', 'year over year', 'qoq', 'quarter over quarter',
|
|
3064
|
+
'cagr', 'trend', 'performance', 'returns'
|
|
2459
3065
|
]
|
|
2460
3066
|
|
|
2461
3067
|
# Research indicators (quantitative)
|
|
@@ -2664,40 +3270,76 @@ class EnhancedNocturnalAgent:
|
|
|
2664
3270
|
# Quick check if query might need shell
|
|
2665
3271
|
question_lower = request.question.lower()
|
|
2666
3272
|
might_need_shell = any(word in question_lower for word in [
|
|
2667
|
-
'directory', 'folder', 'where', 'find', 'list', 'files', 'look', 'search', 'check', 'into',
|
|
2668
|
-
'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb'
|
|
3273
|
+
'directory', 'folder', 'where', 'find', 'list', 'files', 'file', 'look', 'search', 'check', 'into',
|
|
3274
|
+
'show', 'open', 'read', 'display', 'cat', 'view', 'contents', '.r', '.py', '.csv', '.ipynb',
|
|
3275
|
+
'create', 'make', 'mkdir', 'touch', 'new', 'write', 'copy', 'move', 'delete', 'remove',
|
|
3276
|
+
'git', 'grep', 'navigate', 'go to', 'change to'
|
|
2669
3277
|
])
|
|
2670
3278
|
|
|
2671
3279
|
if might_need_shell and self.shell_session:
|
|
3280
|
+
# Get current directory and context for intelligent planning
|
|
3281
|
+
try:
|
|
3282
|
+
current_dir = self.execute_command("pwd").strip()
|
|
3283
|
+
self.file_context['current_cwd'] = current_dir
|
|
3284
|
+
except:
|
|
3285
|
+
current_dir = "~"
|
|
3286
|
+
|
|
3287
|
+
last_file = self.file_context.get('last_file') or 'None'
|
|
3288
|
+
last_dir = self.file_context.get('last_directory') or 'None'
|
|
3289
|
+
|
|
2672
3290
|
# Ask LLM planner: What shell command should we run?
|
|
2673
|
-
planner_prompt = f"""You are a shell command planner. Determine what shell command to run.
|
|
3291
|
+
planner_prompt = f"""You are a shell command planner. Determine what shell command to run, if any.
|
|
2674
3292
|
|
|
2675
3293
|
User query: "{request.question}"
|
|
2676
3294
|
Previous conversation: {json.dumps(self.conversation_history[-2:]) if self.conversation_history else "None"}
|
|
3295
|
+
Current directory: {current_dir}
|
|
3296
|
+
Last file mentioned: {last_file}
|
|
3297
|
+
Last directory mentioned: {last_dir}
|
|
2677
3298
|
|
|
2678
3299
|
Respond ONLY with JSON:
|
|
2679
3300
|
{{
|
|
2680
|
-
"action": "
|
|
2681
|
-
"
|
|
2682
|
-
"
|
|
2683
|
-
"
|
|
2684
|
-
"file_path": "/full/path/to/file.R" (if read_file)
|
|
3301
|
+
"action": "execute|none",
|
|
3302
|
+
"command": "pwd" (the actual shell command to run, if action=execute),
|
|
3303
|
+
"reason": "Show current directory" (why this command is needed),
|
|
3304
|
+
"updates_context": true (set to true if command changes files/directories)
|
|
2685
3305
|
}}
|
|
2686
3306
|
|
|
3307
|
+
IMPORTANT RULES:
|
|
3308
|
+
1. Return "none" for conversational queries ("hello", "test", "thanks", "how are you")
|
|
3309
|
+
2. Return "none" when query is ambiguous without more context
|
|
3310
|
+
3. Return "none" for questions about data that don't need shell (e.g., "Tesla revenue", "Apple stock price")
|
|
3311
|
+
4. Use ACTUAL shell commands (pwd, ls, cd, mkdir, cat, grep, find, touch, etc.)
|
|
3312
|
+
5. Resolve pronouns using context: "it"={last_file}, "there"/{last_dir}
|
|
3313
|
+
6. For reading files, prefer: head -100 filename (shows first 100 lines)
|
|
3314
|
+
7. For finding things, use: find ~ -maxdepth 4 -name '*pattern*' 2>/dev/null
|
|
3315
|
+
8. For creating files: touch filename OR echo "content" > filename
|
|
3316
|
+
9. For creating directories: mkdir dirname
|
|
3317
|
+
10. ALWAYS include 2>/dev/null to suppress errors from find
|
|
3318
|
+
11. 🚨 MULTI-STEP QUERIES: For queries like "read X and do Y", ONLY generate the FIRST step (reading X). The LLM will handle subsequent steps after seeing the file contents.
|
|
3319
|
+
12. 🚨 NEVER use python -m py_compile or other code execution for finding bugs - just read the file with cat/head
|
|
3320
|
+
|
|
2687
3321
|
Examples:
|
|
2688
|
-
"where am i?" → {{"action": "pwd"}}
|
|
2689
|
-
"
|
|
2690
|
-
"find cm522" → {{"action": "find", "
|
|
2691
|
-
"
|
|
2692
|
-
"show me
|
|
2693
|
-
"
|
|
2694
|
-
"
|
|
2695
|
-
"
|
|
2696
|
-
"
|
|
2697
|
-
"
|
|
2698
|
-
"
|
|
2699
|
-
|
|
2700
|
-
|
|
3322
|
+
"where am i?" → {{"action": "execute", "command": "pwd", "reason": "Show current directory", "updates_context": false}}
|
|
3323
|
+
"list files" → {{"action": "execute", "command": "ls -lah", "reason": "List all files with details", "updates_context": false}}
|
|
3324
|
+
"find cm522" → {{"action": "execute", "command": "find ~ -maxdepth 4 -name '*cm522*' -type d 2>/dev/null | head -20", "reason": "Search for cm522 directory", "updates_context": false}}
|
|
3325
|
+
"go to Downloads" → {{"action": "execute", "command": "cd ~/Downloads && pwd", "reason": "Navigate to Downloads directory", "updates_context": true}}
|
|
3326
|
+
"show me calc.R" → {{"action": "execute", "command": "head -100 calc.R", "reason": "Display file contents", "updates_context": true}}
|
|
3327
|
+
"create test directory" → {{"action": "execute", "command": "mkdir test && echo 'Created test/'", "reason": "Create new directory", "updates_context": true}}
|
|
3328
|
+
"create empty config.json" → {{"action": "execute", "command": "touch config.json && echo 'Created config.json'", "reason": "Create empty file", "updates_context": true}}
|
|
3329
|
+
"write hello.txt with content Hello World" → {{"action": "execute", "command": "echo 'Hello World' > hello.txt", "reason": "Create file with content", "updates_context": true}}
|
|
3330
|
+
"create results.txt with line 1 and line 2" → {{"action": "execute", "command": "echo 'line 1' > results.txt && echo 'line 2' >> results.txt", "reason": "Create file with multiple lines", "updates_context": true}}
|
|
3331
|
+
"fix bug in script.py change OLD to NEW" → {{"action": "execute", "command": "sed -i 's/OLD/NEW/g' script.py && echo 'Fixed script.py'", "reason": "Edit file to fix bug", "updates_context": true}}
|
|
3332
|
+
"search for TODO in py files" → {{"action": "execute", "command": "grep -n 'TODO' *.py 2>/dev/null", "reason": "Find TODO comments", "updates_context": false}}
|
|
3333
|
+
"find all bugs in code" → {{"action": "execute", "command": "grep -rn 'BUG:' . 2>/dev/null", "reason": "Search for bug markers in code", "updates_context": false}}
|
|
3334
|
+
"read analyze.py and find bugs" → {{"action": "execute", "command": "head -200 analyze.py", "reason": "Read file to analyze bugs", "updates_context": false}}
|
|
3335
|
+
"show me calc.py completely" → {{"action": "execute", "command": "cat calc.py", "reason": "Display entire file", "updates_context": false}}
|
|
3336
|
+
"git status" → {{"action": "execute", "command": "git status", "reason": "Check repository status", "updates_context": false}}
|
|
3337
|
+
"what's in that file?" + last_file=data.csv → {{"action": "execute", "command": "head -100 data.csv", "reason": "Show file contents", "updates_context": false}}
|
|
3338
|
+
"hello" → {{"action": "none", "reason": "Conversational greeting, no command needed"}}
|
|
3339
|
+
"test" → {{"action": "none", "reason": "Ambiguous query, needs clarification"}}
|
|
3340
|
+
"thanks" → {{"action": "none", "reason": "Conversational acknowledgment"}}
|
|
3341
|
+
"Tesla revenue" → {{"action": "none", "reason": "Finance query, will use FinSight API not shell"}}
|
|
3342
|
+
"what does the error mean?" → {{"action": "none", "reason": "Explanation request, no command needed"}}
|
|
2701
3343
|
|
|
2702
3344
|
JSON:"""
|
|
2703
3345
|
|
|
@@ -2715,17 +3357,285 @@ JSON:"""
|
|
|
2715
3357
|
|
|
2716
3358
|
plan = json.loads(plan_text)
|
|
2717
3359
|
shell_action = plan.get("action", "none")
|
|
3360
|
+
command = plan.get("command", "")
|
|
3361
|
+
reason = plan.get("reason", "")
|
|
3362
|
+
updates_context = plan.get("updates_context", False)
|
|
2718
3363
|
|
|
2719
3364
|
if debug_mode:
|
|
2720
3365
|
print(f"🔍 SHELL PLAN: {plan}")
|
|
2721
3366
|
|
|
2722
|
-
#
|
|
2723
|
-
if shell_action == "
|
|
2724
|
-
|
|
2725
|
-
|
|
2726
|
-
|
|
3367
|
+
# GENERIC COMMAND EXECUTION - No more hardcoded actions!
|
|
3368
|
+
if shell_action == "execute" and command:
|
|
3369
|
+
# Check command safety
|
|
3370
|
+
safety_level = self._classify_command_safety(command)
|
|
3371
|
+
|
|
3372
|
+
if debug_mode:
|
|
3373
|
+
print(f"🔍 Command: {command}")
|
|
3374
|
+
print(f"🔍 Safety: {safety_level}")
|
|
3375
|
+
|
|
3376
|
+
if safety_level == 'BLOCKED':
|
|
3377
|
+
api_results["shell_info"] = {
|
|
3378
|
+
"error": f"Command blocked for safety: {command}",
|
|
3379
|
+
"reason": "This command could cause system damage"
|
|
3380
|
+
}
|
|
3381
|
+
else:
|
|
3382
|
+
# ========================================
|
|
3383
|
+
# COMMAND INTERCEPTOR: Translate shell commands to file operations
|
|
3384
|
+
# (Claude Code / Cursor parity)
|
|
3385
|
+
# ========================================
|
|
3386
|
+
intercepted = False
|
|
3387
|
+
output = ""
|
|
3388
|
+
|
|
3389
|
+
# Check for file reading commands (cat, head, tail)
|
|
3390
|
+
if command.startswith(('cat ', 'head ', 'tail ')):
|
|
3391
|
+
import shlex
|
|
3392
|
+
try:
|
|
3393
|
+
parts = shlex.split(command)
|
|
3394
|
+
cmd = parts[0]
|
|
3395
|
+
|
|
3396
|
+
# Extract filename (last non-flag argument)
|
|
3397
|
+
filename = None
|
|
3398
|
+
for part in reversed(parts[1:]):
|
|
3399
|
+
if not part.startswith('-'):
|
|
3400
|
+
filename = part
|
|
3401
|
+
break
|
|
3402
|
+
|
|
3403
|
+
if filename:
|
|
3404
|
+
# Use read_file instead of cat/head/tail
|
|
3405
|
+
if cmd == 'head':
|
|
3406
|
+
# head -n 100 file OR head file
|
|
3407
|
+
limit = 100 # default
|
|
3408
|
+
if '-n' in parts or '-' in parts[0]:
|
|
3409
|
+
try:
|
|
3410
|
+
idx = parts.index('-n') if '-n' in parts else 0
|
|
3411
|
+
limit = int(parts[idx + 1])
|
|
3412
|
+
except:
|
|
3413
|
+
pass
|
|
3414
|
+
output = self.read_file(filename, offset=0, limit=limit)
|
|
3415
|
+
elif cmd == 'tail':
|
|
3416
|
+
# For tail, read last N lines (harder, so just read all and show it's tail)
|
|
3417
|
+
output = self.read_file(filename)
|
|
3418
|
+
if "ERROR" not in output:
|
|
3419
|
+
lines = output.split('\n')
|
|
3420
|
+
output = '\n'.join(lines[-100:]) # last 100 lines
|
|
3421
|
+
else: # cat
|
|
3422
|
+
output = self.read_file(filename)
|
|
3423
|
+
|
|
3424
|
+
intercepted = True
|
|
3425
|
+
tools_used.append("read_file")
|
|
3426
|
+
if debug_mode:
|
|
3427
|
+
print(f"🔄 Intercepted: {command} → read_file({filename})")
|
|
3428
|
+
except:
|
|
3429
|
+
pass # Fall back to shell execution
|
|
3430
|
+
|
|
3431
|
+
# Check for file search commands (find)
|
|
3432
|
+
if not intercepted and 'find' in command and '-name' in command:
|
|
3433
|
+
try:
|
|
3434
|
+
import re
|
|
3435
|
+
# Extract pattern: find ... -name '*pattern*'
|
|
3436
|
+
name_match = re.search(r"-name\s+['\"]?\*?([^'\"*\s]+)\*?['\"]?", command)
|
|
3437
|
+
if name_match:
|
|
3438
|
+
pattern = f"**/*{name_match.group(1)}*"
|
|
3439
|
+
path_match = re.search(r"find\s+([^\s]+)", command)
|
|
3440
|
+
search_path = path_match.group(1) if path_match else "."
|
|
3441
|
+
|
|
3442
|
+
result = self.glob_search(pattern, search_path)
|
|
3443
|
+
output = '\n'.join(result['files'][:20]) # Show first 20 matches
|
|
3444
|
+
intercepted = True
|
|
3445
|
+
tools_used.append("glob_search")
|
|
3446
|
+
if debug_mode:
|
|
3447
|
+
print(f"🔄 Intercepted: {command} → glob_search({pattern}, {search_path})")
|
|
3448
|
+
except:
|
|
3449
|
+
pass
|
|
3450
|
+
|
|
3451
|
+
# Check for file writing commands (echo > file, grep > file, etc.) - CHECK THIS FIRST!
|
|
3452
|
+
# This must come BEFORE the plain grep interceptor
|
|
3453
|
+
if not intercepted and ('>' in command or '>>' in command):
|
|
3454
|
+
try:
|
|
3455
|
+
import re
|
|
3456
|
+
|
|
3457
|
+
# Handle grep ... > file (intercept and execute grep, then write output)
|
|
3458
|
+
if 'grep' in command and '>' in command:
|
|
3459
|
+
# Extract: grep -rn 'pattern' path > output.txt
|
|
3460
|
+
grep_match = re.search(r"grep\s+(.*)>\s*(\S+)", command)
|
|
3461
|
+
if grep_match:
|
|
3462
|
+
grep_part = grep_match.group(1).strip()
|
|
3463
|
+
output_file = grep_match.group(2)
|
|
3464
|
+
|
|
3465
|
+
# Extract pattern and options from grep command
|
|
3466
|
+
pattern_match = re.search(r"['\"]([^'\"]+)['\"]", grep_part)
|
|
3467
|
+
if pattern_match:
|
|
3468
|
+
pattern = pattern_match.group(1)
|
|
3469
|
+
search_path = "."
|
|
3470
|
+
file_pattern = "*.py" if "*.py" in command else "*"
|
|
3471
|
+
|
|
3472
|
+
if debug_mode:
|
|
3473
|
+
print(f"🔄 Intercepted: {command} → grep_search('{pattern}', '{search_path}', '{file_pattern}') + write_file({output_file})")
|
|
3474
|
+
|
|
3475
|
+
# Execute grep_search
|
|
3476
|
+
try:
|
|
3477
|
+
grep_result = self.grep_search(
|
|
3478
|
+
pattern=pattern,
|
|
3479
|
+
path=search_path,
|
|
3480
|
+
file_pattern=file_pattern,
|
|
3481
|
+
output_mode="content"
|
|
3482
|
+
)
|
|
3483
|
+
|
|
3484
|
+
# Format matches as text (like grep -rn output)
|
|
3485
|
+
output_lines = []
|
|
3486
|
+
for file_path, matches in grep_result.get('matches', {}).items():
|
|
3487
|
+
for line_num, line_content in matches:
|
|
3488
|
+
output_lines.append(f"{file_path}:{line_num}:{line_content}")
|
|
3489
|
+
|
|
3490
|
+
content_to_write = '\n'.join(output_lines) if output_lines else "(no matches found)"
|
|
3491
|
+
|
|
3492
|
+
# Write grep output to file
|
|
3493
|
+
write_result = self.write_file(output_file, content_to_write)
|
|
3494
|
+
if write_result['success']:
|
|
3495
|
+
output = f"Found {len(output_lines)} lines with '{pattern}' → Created {output_file} ({write_result['bytes_written']} bytes)"
|
|
3496
|
+
intercepted = True
|
|
3497
|
+
tools_used.extend(["grep_search", "write_file"])
|
|
3498
|
+
except Exception as e:
|
|
3499
|
+
if debug_mode:
|
|
3500
|
+
print(f"⚠️ Grep > file interception error: {e}")
|
|
3501
|
+
# Fall back to normal execution
|
|
3502
|
+
pass
|
|
3503
|
+
|
|
3504
|
+
# Extract: echo 'content' > filename OR cat << EOF > filename
|
|
3505
|
+
if not intercepted and 'echo' in command and '>' in command:
|
|
3506
|
+
# echo 'content' > file OR echo "content" > file
|
|
3507
|
+
match = re.search(r"echo\s+['\"](.+?)['\"].*?>\s*(\S+)", command)
|
|
3508
|
+
if match:
|
|
3509
|
+
content = match.group(1)
|
|
3510
|
+
filename = match.group(2)
|
|
3511
|
+
# Unescape common sequences
|
|
3512
|
+
content = content.replace('\\n', '\n').replace('\\t', '\t')
|
|
3513
|
+
result = self.write_file(filename, content + '\n')
|
|
3514
|
+
if result['success']:
|
|
3515
|
+
output = f"Created {filename} ({result['bytes_written']} bytes)"
|
|
3516
|
+
intercepted = True
|
|
3517
|
+
tools_used.append("write_file")
|
|
3518
|
+
if debug_mode:
|
|
3519
|
+
print(f"🔄 Intercepted: {command} → write_file({filename}, ...)")
|
|
3520
|
+
except:
|
|
3521
|
+
pass
|
|
3522
|
+
|
|
3523
|
+
# Check for sed editing commands
|
|
3524
|
+
if not intercepted and command.startswith('sed '):
|
|
3525
|
+
try:
|
|
3526
|
+
import re
|
|
3527
|
+
# sed 's/old/new/g' file OR sed -i 's/old/new/' file
|
|
3528
|
+
match = re.search(r"sed.*?['\"]s/([^/]+)/([^/]+)/", command)
|
|
3529
|
+
if match:
|
|
3530
|
+
old_text = match.group(1)
|
|
3531
|
+
new_text = match.group(2)
|
|
3532
|
+
# Extract filename (last argument)
|
|
3533
|
+
parts = command.split()
|
|
3534
|
+
filename = parts[-1]
|
|
3535
|
+
|
|
3536
|
+
# Determine if replace_all based on /g flag
|
|
3537
|
+
replace_all = '/g' in command
|
|
3538
|
+
|
|
3539
|
+
result = self.edit_file(filename, old_text, new_text, replace_all=replace_all)
|
|
3540
|
+
if result['success']:
|
|
3541
|
+
output = result['message']
|
|
3542
|
+
intercepted = True
|
|
3543
|
+
tools_used.append("edit_file")
|
|
3544
|
+
if debug_mode:
|
|
3545
|
+
print(f"🔄 Intercepted: {command} → edit_file({filename}, {old_text}, {new_text})")
|
|
3546
|
+
except:
|
|
3547
|
+
pass
|
|
3548
|
+
|
|
3549
|
+
# Check for heredoc file creation (cat << EOF > file)
|
|
3550
|
+
if not intercepted and '<<' in command and ('EOF' in command or 'HEREDOC' in command):
|
|
3551
|
+
try:
|
|
3552
|
+
import re
|
|
3553
|
+
# Extract: cat << EOF > filename OR cat > filename << EOF
|
|
3554
|
+
# Note: We can't actually get the heredoc content from a single command line
|
|
3555
|
+
# This would need to be handled differently (multi-line input)
|
|
3556
|
+
# For now, just detect and warn
|
|
3557
|
+
if debug_mode:
|
|
3558
|
+
print(f"⚠️ Heredoc detected but not intercepted: {command[:80]}")
|
|
3559
|
+
except:
|
|
3560
|
+
pass
|
|
3561
|
+
|
|
3562
|
+
# Check for content search commands (grep -r) WITHOUT redirection
|
|
3563
|
+
# This comes AFTER grep > file interceptor to avoid conflicts
|
|
3564
|
+
if not intercepted and command.startswith('grep ') and ('-r' in command or '-R' in command):
|
|
3565
|
+
try:
|
|
3566
|
+
import re
|
|
3567
|
+
# Extract pattern: grep -r 'pattern' path
|
|
3568
|
+
pattern_match = re.search(r"grep.*?['\"]([^'\"]+)['\"]", command)
|
|
3569
|
+
if pattern_match:
|
|
3570
|
+
pattern = pattern_match.group(1)
|
|
3571
|
+
# Extract path (last argument usually)
|
|
3572
|
+
parts = command.split()
|
|
3573
|
+
search_path = parts[-1] if len(parts) > 2 else "."
|
|
3574
|
+
|
|
3575
|
+
result = self.grep_search(pattern, search_path, "*.py", output_mode="files_with_matches")
|
|
3576
|
+
output = f"Files matching '{pattern}':\n" + '\n'.join(result['files'][:20])
|
|
3577
|
+
intercepted = True
|
|
3578
|
+
tools_used.append("grep_search")
|
|
3579
|
+
if debug_mode:
|
|
3580
|
+
print(f"🔄 Intercepted: {command} → grep_search({pattern}, {search_path})")
|
|
3581
|
+
except:
|
|
3582
|
+
pass
|
|
3583
|
+
|
|
3584
|
+
# If not intercepted, execute as shell command
|
|
3585
|
+
if not intercepted:
|
|
3586
|
+
output = self.execute_command(command)
|
|
3587
|
+
|
|
3588
|
+
if not output.startswith("ERROR"):
|
|
3589
|
+
# Success - store results
|
|
3590
|
+
api_results["shell_info"] = {
|
|
3591
|
+
"command": command,
|
|
3592
|
+
"output": output,
|
|
3593
|
+
"reason": reason,
|
|
3594
|
+
"safety_level": safety_level
|
|
3595
|
+
}
|
|
3596
|
+
tools_used.append("shell_execution")
|
|
3597
|
+
|
|
3598
|
+
# Update file context if needed
|
|
3599
|
+
if updates_context:
|
|
3600
|
+
import re
|
|
3601
|
+
# Extract file paths from command
|
|
3602
|
+
file_patterns = r'([a-zA-Z0-9_\-./]+\.(py|r|csv|txt|json|md|ipynb|rmd))'
|
|
3603
|
+
files_mentioned = re.findall(file_patterns, command, re.IGNORECASE)
|
|
3604
|
+
if files_mentioned:
|
|
3605
|
+
file_path = files_mentioned[0][0]
|
|
3606
|
+
self.file_context['last_file'] = file_path
|
|
3607
|
+
if file_path not in self.file_context['recent_files']:
|
|
3608
|
+
self.file_context['recent_files'].append(file_path)
|
|
3609
|
+
self.file_context['recent_files'] = self.file_context['recent_files'][-5:] # Keep last 5
|
|
3610
|
+
|
|
3611
|
+
# Extract directory paths
|
|
3612
|
+
dir_patterns = r'cd\s+([^\s&|;]+)|mkdir\s+([^\s&|;]+)'
|
|
3613
|
+
dirs_mentioned = re.findall(dir_patterns, command)
|
|
3614
|
+
if dirs_mentioned:
|
|
3615
|
+
for dir_tuple in dirs_mentioned:
|
|
3616
|
+
dir_path = dir_tuple[0] or dir_tuple[1]
|
|
3617
|
+
if dir_path:
|
|
3618
|
+
self.file_context['last_directory'] = dir_path
|
|
3619
|
+
if dir_path not in self.file_context['recent_dirs']:
|
|
3620
|
+
self.file_context['recent_dirs'].append(dir_path)
|
|
3621
|
+
self.file_context['recent_dirs'] = self.file_context['recent_dirs'][-5:] # Keep last 5
|
|
3622
|
+
|
|
3623
|
+
# If cd command, update current_cwd
|
|
3624
|
+
if command.startswith('cd '):
|
|
3625
|
+
try:
|
|
3626
|
+
new_cwd = self.execute_command("pwd").strip()
|
|
3627
|
+
self.file_context['current_cwd'] = new_cwd
|
|
3628
|
+
except:
|
|
3629
|
+
pass
|
|
3630
|
+
else:
|
|
3631
|
+
# Command failed
|
|
3632
|
+
api_results["shell_info"] = {
|
|
3633
|
+
"error": output,
|
|
3634
|
+
"command": command
|
|
3635
|
+
}
|
|
2727
3636
|
|
|
2728
|
-
|
|
3637
|
+
# Backwards compatibility: support old hardcoded actions if LLM still returns them
|
|
3638
|
+
elif shell_action == "pwd":
|
|
2729
3639
|
target = plan.get("target_path")
|
|
2730
3640
|
if target:
|
|
2731
3641
|
ls_output = self.execute_command(f"ls -lah {target}")
|
|
@@ -2757,6 +3667,32 @@ JSON:"""
|
|
|
2757
3667
|
}
|
|
2758
3668
|
tools_used.append("shell_execution")
|
|
2759
3669
|
|
|
3670
|
+
elif shell_action == "cd":
|
|
3671
|
+
# NEW: Change directory
|
|
3672
|
+
target = plan.get("target_path")
|
|
3673
|
+
if target:
|
|
3674
|
+
# Expand ~ to home directory
|
|
3675
|
+
if target.startswith("~"):
|
|
3676
|
+
home = os.path.expanduser("~")
|
|
3677
|
+
target = target.replace("~", home, 1)
|
|
3678
|
+
|
|
3679
|
+
# Execute cd command
|
|
3680
|
+
cd_cmd = f"cd {target} && pwd"
|
|
3681
|
+
cd_output = self.execute_command(cd_cmd)
|
|
3682
|
+
|
|
3683
|
+
if not cd_output.startswith("ERROR"):
|
|
3684
|
+
api_results["shell_info"] = {
|
|
3685
|
+
"directory_changed": True,
|
|
3686
|
+
"new_directory": cd_output.strip(),
|
|
3687
|
+
"target_path": target
|
|
3688
|
+
}
|
|
3689
|
+
tools_used.append("shell_execution")
|
|
3690
|
+
else:
|
|
3691
|
+
api_results["shell_info"] = {
|
|
3692
|
+
"directory_changed": False,
|
|
3693
|
+
"error": f"Failed to change to {target}: {cd_output}"
|
|
3694
|
+
}
|
|
3695
|
+
|
|
2760
3696
|
elif shell_action == "read_file":
|
|
2761
3697
|
# NEW: Read and inspect file (R, Python, CSV, etc.)
|
|
2762
3698
|
import re # Import at function level
|
|
@@ -2836,6 +3772,14 @@ JSON:"""
|
|
|
2836
3772
|
if debug_mode and is_vague:
|
|
2837
3773
|
print(f"🔍 Query is VAGUE - skipping expensive APIs")
|
|
2838
3774
|
|
|
3775
|
+
# If query is vague, hint to backend LLM to ask clarifying questions
|
|
3776
|
+
if is_vague:
|
|
3777
|
+
api_results["query_analysis"] = {
|
|
3778
|
+
"is_vague": True,
|
|
3779
|
+
"suggestion": "Ask clarifying questions instead of guessing",
|
|
3780
|
+
"reason": "Query needs more specificity to provide accurate answer"
|
|
3781
|
+
}
|
|
3782
|
+
|
|
2839
3783
|
# Skip Archive/FinSight if query is too vague, but still allow web search later
|
|
2840
3784
|
if not is_vague:
|
|
2841
3785
|
# Archive API for research
|
|
@@ -2914,32 +3858,78 @@ JSON:"""
|
|
|
2914
3858
|
# - Shell said "none" (not a directory/file operation)
|
|
2915
3859
|
# - We don't have enough data from Archive/FinSight
|
|
2916
3860
|
|
|
2917
|
-
|
|
3861
|
+
# First check: Is this a conversational query that doesn't need web search?
|
|
3862
|
+
def is_conversational_query(query: str) -> bool:
|
|
3863
|
+
"""Detect if query is conversational (greeting, thanks, testing, etc.)"""
|
|
3864
|
+
query_lower = query.lower().strip()
|
|
3865
|
+
|
|
3866
|
+
# Single word queries that are conversational
|
|
3867
|
+
conversational_words = {
|
|
3868
|
+
'hello', 'hi', 'hey', 'thanks', 'thank', 'ok', 'okay', 'yes', 'no',
|
|
3869
|
+
'test', 'testing', 'cool', 'nice', 'great', 'awesome', 'perfect',
|
|
3870
|
+
'bye', 'goodbye', 'quit', 'exit', 'help'
|
|
3871
|
+
}
|
|
3872
|
+
|
|
3873
|
+
# Short conversational phrases
|
|
3874
|
+
conversational_phrases = [
|
|
3875
|
+
'how are you', 'thank you', 'thanks!', 'ok', 'got it', 'i see',
|
|
3876
|
+
'makes sense', 'sounds good', 'that works', 'no problem'
|
|
3877
|
+
]
|
|
3878
|
+
|
|
3879
|
+
words = query_lower.split()
|
|
3880
|
+
|
|
3881
|
+
# Single word check
|
|
3882
|
+
if len(words) == 1 and words[0] in conversational_words:
|
|
3883
|
+
return True
|
|
3884
|
+
|
|
3885
|
+
# Short phrase check
|
|
3886
|
+
if len(words) <= 3 and any(phrase in query_lower for phrase in conversational_phrases):
|
|
3887
|
+
return True
|
|
3888
|
+
|
|
3889
|
+
# Question marks with no content words (just pronouns)
|
|
3890
|
+
if '?' in query_lower and len(words) <= 2:
|
|
3891
|
+
return True
|
|
3892
|
+
|
|
3893
|
+
return False
|
|
3894
|
+
|
|
3895
|
+
skip_web_search = is_conversational_query(request.question)
|
|
3896
|
+
|
|
3897
|
+
if self.web_search and shell_action == "none" and not skip_web_search:
|
|
2918
3898
|
# Ask LLM: Should we web search for this?
|
|
2919
|
-
web_decision_prompt = f"""
|
|
3899
|
+
web_decision_prompt = f"""You are a tool selection expert. Decide if web search is needed.
|
|
2920
3900
|
|
|
2921
3901
|
User query: "{request.question}"
|
|
2922
3902
|
Data already available: {list(api_results.keys())}
|
|
2923
|
-
|
|
3903
|
+
Tools already used: {tools_used}
|
|
3904
|
+
|
|
3905
|
+
AVAILABLE TOOLS YOU SHOULD KNOW:
|
|
3906
|
+
1. FinSight API: Company financial data (revenue, income, margins, ratios, cash flow, balance sheet, SEC filings)
|
|
3907
|
+
- Covers: All US public companies (~8,000)
|
|
3908
|
+
- Data: SEC EDGAR + Yahoo Finance
|
|
3909
|
+
- Metrics: 50+ financial KPIs
|
|
3910
|
+
|
|
3911
|
+
2. Archive API: Academic research papers
|
|
3912
|
+
- Covers: Semantic Scholar, OpenAlex, PubMed
|
|
3913
|
+
- Data: Papers, citations, abstracts
|
|
3914
|
+
|
|
3915
|
+
3. Web Search: General information, current events
|
|
3916
|
+
- Covers: Anything on the internet
|
|
3917
|
+
- Use for: Market share, industry news, non-financial company info
|
|
3918
|
+
|
|
3919
|
+
DECISION RULES:
|
|
3920
|
+
- If query is about company financials (revenue, profit, margins, etc.) → Check if FinSight already provided data
|
|
3921
|
+
- If FinSight has data in api_results → Web search is NOT needed
|
|
3922
|
+
- If FinSight was called but no data → Web search as fallback is OK
|
|
3923
|
+
- If query is about market share, industry size, trends → Web search (FinSight doesn't have this)
|
|
3924
|
+
- If query is about research papers → Archive handles it, not web
|
|
3925
|
+
- If query is conversational → Already filtered, you won't see these
|
|
2924
3926
|
|
|
2925
3927
|
Respond with JSON:
|
|
2926
3928
|
{{
|
|
2927
3929
|
"use_web_search": true/false,
|
|
2928
|
-
"reason": "why
|
|
3930
|
+
"reason": "explain why based on tools available and data already fetched"
|
|
2929
3931
|
}}
|
|
2930
3932
|
|
|
2931
|
-
Use web search for:
|
|
2932
|
-
- Market share/size (not in SEC filings)
|
|
2933
|
-
- Current prices (Bitcoin, commodities, real-time data)
|
|
2934
|
-
- Industry data, statistics
|
|
2935
|
-
- Recent events, news
|
|
2936
|
-
- Questions not answered by existing data
|
|
2937
|
-
|
|
2938
|
-
Don't use if:
|
|
2939
|
-
- Shell already handled it (pwd/ls/find)
|
|
2940
|
-
- Question answered by research/financial APIs
|
|
2941
|
-
- Pure opinion question
|
|
2942
|
-
|
|
2943
3933
|
JSON:"""
|
|
2944
3934
|
|
|
2945
3935
|
try:
|
|
@@ -2986,11 +3976,44 @@ JSON:"""
|
|
|
2986
3976
|
api_results=api_results,
|
|
2987
3977
|
tools_used=tools_used
|
|
2988
3978
|
)
|
|
2989
|
-
|
|
3979
|
+
|
|
3980
|
+
# POST-PROCESSING: Auto-extract code blocks and write files if user requested file creation
|
|
3981
|
+
# This fixes the issue where LLM shows corrected code but doesn't create the file
|
|
3982
|
+
if any(keyword in request.question.lower() for keyword in ['create', 'write', 'save', 'generate', 'fixed', 'corrected']):
|
|
3983
|
+
# Extract filename from query (e.g., "write to foo.py", "create bar_fixed.py")
|
|
3984
|
+
import re
|
|
3985
|
+
filename_match = re.search(r'(?:to|create|write|save|generate)\s+(\w+[._-]\w+\.[\w]+)', request.question, re.IGNORECASE)
|
|
3986
|
+
if not filename_match:
|
|
3987
|
+
# Try pattern: "foo_fixed.py" or "bar.py"
|
|
3988
|
+
filename_match = re.search(r'(\w+_fixed\.[\w]+|\w+\.[\w]+)', request.question)
|
|
3989
|
+
|
|
3990
|
+
if filename_match:
|
|
3991
|
+
target_filename = filename_match.group(1)
|
|
3992
|
+
|
|
3993
|
+
# Extract code block from response (```python ... ``` or ``` ... ```)
|
|
3994
|
+
code_block_pattern = r'```(?:python|bash|sh|r|sql)?\n(.*?)```'
|
|
3995
|
+
code_blocks = re.findall(code_block_pattern, response.response, re.DOTALL)
|
|
3996
|
+
|
|
3997
|
+
if code_blocks:
|
|
3998
|
+
# Use the LARGEST code block (likely the complete file)
|
|
3999
|
+
largest_block = max(code_blocks, key=len)
|
|
4000
|
+
|
|
4001
|
+
# Write to file
|
|
4002
|
+
try:
|
|
4003
|
+
write_result = self.write_file(target_filename, largest_block)
|
|
4004
|
+
if write_result['success']:
|
|
4005
|
+
# Append confirmation to response
|
|
4006
|
+
response.response += f"\n\n✅ File created: {target_filename} ({write_result['bytes_written']} bytes)"
|
|
4007
|
+
if debug_mode:
|
|
4008
|
+
print(f"🔄 Auto-extracted code block → write_file({target_filename})")
|
|
4009
|
+
except Exception as e:
|
|
4010
|
+
if debug_mode:
|
|
4011
|
+
print(f"⚠️ Auto-write failed: {e}")
|
|
4012
|
+
|
|
2990
4013
|
# CRITICAL: Save to conversation history
|
|
2991
4014
|
self.conversation_history.append({"role": "user", "content": request.question})
|
|
2992
4015
|
self.conversation_history.append({"role": "assistant", "content": response.response})
|
|
2993
|
-
|
|
4016
|
+
|
|
2994
4017
|
return response
|
|
2995
4018
|
|
|
2996
4019
|
# DEV MODE ONLY: Direct Groq calls (only works with local API keys)
|