abstractcore 2.4.9__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/__main__.py +8 -1
- abstractcore/apps/deepsearch.py +644 -0
- abstractcore/apps/intent.py +614 -0
- abstractcore/assets/model_capabilities.json +50 -34
- abstractcore/config/__init__.py +10 -0
- abstractcore/{cli → config}/main.py +13 -1
- abstractcore/config/manager.py +355 -0
- abstractcore/core/session.py +46 -1
- abstractcore/processing/__init__.py +5 -1
- abstractcore/processing/basic_deepsearch.py +2173 -0
- abstractcore/processing/basic_intent.py +690 -0
- abstractcore/providers/anthropic_provider.py +1 -0
- abstractcore/providers/base.py +1 -0
- abstractcore/providers/huggingface_provider.py +95 -4
- abstractcore/providers/lmstudio_provider.py +14 -0
- abstractcore/providers/mlx_provider.py +76 -2
- abstractcore/providers/ollama_provider.py +6 -2
- abstractcore/providers/openai_provider.py +1 -0
- abstractcore/providers/registry.py +6 -6
- abstractcore/structured/handler.py +161 -1
- abstractcore/tools/common_tools.py +98 -3
- abstractcore/utils/cli.py +114 -1
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/METADATA +38 -18
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/RECORD +30 -25
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/entry_points.txt +6 -2
- abstractcore/cli/__init__.py +0 -9
- /abstractcore/{cli → config}/vision_config.py +0 -0
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/WHEEL +0 -0
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/top_level.txt +0 -0
abstractcore/apps/__main__.py
CHANGED
|
@@ -9,11 +9,13 @@ Available apps:
|
|
|
9
9
|
summarizer - Document summarization tool
|
|
10
10
|
extractor - Entity and relationship extraction tool
|
|
11
11
|
judge - Text evaluation and scoring tool
|
|
12
|
+
intent - Intent analysis and motivation identification tool
|
|
12
13
|
|
|
13
14
|
Examples:
|
|
14
15
|
python -m abstractcore.apps summarizer document.txt
|
|
15
16
|
python -m abstractcore.apps extractor report.txt --format json-ld
|
|
16
17
|
python -m abstractcore.apps judge essay.txt --criteria clarity,accuracy
|
|
18
|
+
python -m abstractcore.apps intent "I need help with this problem" --depth comprehensive
|
|
17
19
|
python -m abstractcore.apps <app> --help
|
|
18
20
|
"""
|
|
19
21
|
|
|
@@ -43,9 +45,14 @@ def main():
|
|
|
43
45
|
sys.argv = [sys.argv[0]] + sys.argv[2:]
|
|
44
46
|
from .judge import main as judge_main
|
|
45
47
|
judge_main()
|
|
48
|
+
elif app_name == "intent":
|
|
49
|
+
# Remove the app name from sys.argv and run intent analyzer
|
|
50
|
+
sys.argv = [sys.argv[0]] + sys.argv[2:]
|
|
51
|
+
from .intent import main as intent_main
|
|
52
|
+
intent_main()
|
|
46
53
|
else:
|
|
47
54
|
print(f"Unknown app: {app_name}")
|
|
48
|
-
print("\nAvailable apps: summarizer, extractor, judge")
|
|
55
|
+
print("\nAvailable apps: summarizer, extractor, judge, intent")
|
|
49
56
|
sys.exit(1)
|
|
50
57
|
|
|
51
58
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,644 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
AbstractCore Deep Search CLI Application
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python -m abstractcore.apps.deepsearch "<research_query>" [options]
|
|
7
|
+
|
|
8
|
+
Options:
|
|
9
|
+
--focus <areas> Comma-separated focus areas (e.g., "technology,business,impact")
|
|
10
|
+
--depth <depth> Research depth (brief, standard, comprehensive, default: standard)
|
|
11
|
+
--max-sources <number> Maximum number of sources to gather (default: 15)
|
|
12
|
+
--format <format> Output format (structured, narrative, executive, default: structured)
|
|
13
|
+
--output <output> Output file path (optional, prints to console if not provided)
|
|
14
|
+
--provider <provider> LLM provider (requires --model)
|
|
15
|
+
--model <model> LLM model (requires --provider)
|
|
16
|
+
--no-verification Skip fact-checking and verification stage
|
|
17
|
+
--parallel-searches <num> Maximum parallel web searches (default: 5)
|
|
18
|
+
--verbose Show detailed progress information
|
|
19
|
+
--timeout <seconds> HTTP timeout for LLM providers (default: 300)
|
|
20
|
+
--max-tokens <tokens> Maximum total tokens for LLM context (default: 32000)
|
|
21
|
+
--max-output-tokens <tokens> Maximum tokens for LLM output generation (default: 8000)
|
|
22
|
+
--help Show this help message
|
|
23
|
+
|
|
24
|
+
Examples:
|
|
25
|
+
python -m abstractcore.apps.deepsearch "What are the latest developments in quantum computing?"
|
|
26
|
+
python -m abstractcore.apps.deepsearch "AI impact on healthcare" --focus "diagnosis,treatment,ethics" --depth comprehensive
|
|
27
|
+
python -m abstractcore.apps.deepsearch "sustainable energy 2025" --format executive --output report.json
|
|
28
|
+
python -m abstractcore.apps.deepsearch "blockchain technology trends" --max-sources 25 --verbose
|
|
29
|
+
python -m abstractcore.apps.deepsearch "climate change solutions" --provider openai --model gpt-4o-mini --depth comprehensive
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import argparse
|
|
33
|
+
import sys
|
|
34
|
+
import time
|
|
35
|
+
import json
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import Optional, List, Dict, Any
|
|
38
|
+
|
|
39
|
+
from ..processing import BasicDeepSearch
|
|
40
|
+
from ..core.factory import create_llm
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def timeout_type(value):
|
|
44
|
+
"""Parse timeout value - accepts None, 'none', or float"""
|
|
45
|
+
if value is None:
|
|
46
|
+
return None
|
|
47
|
+
if isinstance(value, str) and value.lower() == 'none':
|
|
48
|
+
return None
|
|
49
|
+
try:
|
|
50
|
+
return float(value)
|
|
51
|
+
except ValueError:
|
|
52
|
+
raise argparse.ArgumentTypeError(f"Invalid timeout value: {value}. Use 'none' for unlimited or a number in seconds.")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def save_report(report, output_path: str, format_type: str) -> None:
|
|
56
|
+
"""
|
|
57
|
+
Save research report to file
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
report: ResearchReport object or dictionary
|
|
61
|
+
output_path: Path to save the report
|
|
62
|
+
format_type: Output format type
|
|
63
|
+
"""
|
|
64
|
+
output_file = Path(output_path)
|
|
65
|
+
|
|
66
|
+
# Create directory if it doesn't exist
|
|
67
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
if hasattr(report, 'model_dump'):
|
|
71
|
+
# Pydantic model
|
|
72
|
+
report_data = report.model_dump()
|
|
73
|
+
elif hasattr(report, 'dict'):
|
|
74
|
+
# Pydantic model (older versions)
|
|
75
|
+
report_data = report.dict()
|
|
76
|
+
else:
|
|
77
|
+
# Dictionary
|
|
78
|
+
report_data = report
|
|
79
|
+
|
|
80
|
+
# Determine file format based on extension
|
|
81
|
+
if output_path.lower().endswith('.json'):
|
|
82
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
|
83
|
+
json.dump(report_data, f, indent=2, ensure_ascii=False)
|
|
84
|
+
elif output_path.lower().endswith('.md'):
|
|
85
|
+
# Convert to markdown format
|
|
86
|
+
markdown_content = format_report_as_markdown(report_data)
|
|
87
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
|
88
|
+
f.write(markdown_content)
|
|
89
|
+
elif output_path.lower().endswith('.html'):
|
|
90
|
+
# Convert to HTML format
|
|
91
|
+
html_content = format_report_as_html(report_data)
|
|
92
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
|
93
|
+
f.write(html_content)
|
|
94
|
+
else:
|
|
95
|
+
# Default to JSON
|
|
96
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
|
97
|
+
json.dump(report_data, f, indent=2, ensure_ascii=False)
|
|
98
|
+
|
|
99
|
+
print(f"✅ Report saved to: {output_file}")
|
|
100
|
+
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"❌ Failed to save report: {e}")
|
|
103
|
+
sys.exit(1)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def format_report_as_markdown(report_data: Dict[str, Any]) -> str:
|
|
107
|
+
"""Convert report data to markdown format"""
|
|
108
|
+
|
|
109
|
+
md_lines = []
|
|
110
|
+
|
|
111
|
+
# Title
|
|
112
|
+
md_lines.append(f"# {report_data.get('title', 'Research Report')}")
|
|
113
|
+
md_lines.append("")
|
|
114
|
+
|
|
115
|
+
# Executive Summary
|
|
116
|
+
if report_data.get('executive_summary'):
|
|
117
|
+
md_lines.append("## Executive Summary")
|
|
118
|
+
md_lines.append("")
|
|
119
|
+
md_lines.append(report_data['executive_summary'])
|
|
120
|
+
md_lines.append("")
|
|
121
|
+
|
|
122
|
+
# Key Findings
|
|
123
|
+
if report_data.get('key_findings'):
|
|
124
|
+
md_lines.append("## Key Findings")
|
|
125
|
+
md_lines.append("")
|
|
126
|
+
for i, finding in enumerate(report_data['key_findings'], 1):
|
|
127
|
+
md_lines.append(f"{i}. {finding}")
|
|
128
|
+
md_lines.append("")
|
|
129
|
+
|
|
130
|
+
# Detailed Analysis
|
|
131
|
+
if report_data.get('detailed_analysis'):
|
|
132
|
+
md_lines.append("## Detailed Analysis")
|
|
133
|
+
md_lines.append("")
|
|
134
|
+
md_lines.append(report_data['detailed_analysis'])
|
|
135
|
+
md_lines.append("")
|
|
136
|
+
|
|
137
|
+
# Conclusions
|
|
138
|
+
if report_data.get('conclusions'):
|
|
139
|
+
md_lines.append("## Conclusions")
|
|
140
|
+
md_lines.append("")
|
|
141
|
+
md_lines.append(report_data['conclusions'])
|
|
142
|
+
md_lines.append("")
|
|
143
|
+
|
|
144
|
+
# Sources
|
|
145
|
+
if report_data.get('sources'):
|
|
146
|
+
md_lines.append("## Sources")
|
|
147
|
+
md_lines.append("")
|
|
148
|
+
for i, source in enumerate(report_data['sources'], 1):
|
|
149
|
+
title = source.get('title', 'Untitled')
|
|
150
|
+
url = source.get('url', '')
|
|
151
|
+
relevance = source.get('relevance', 0)
|
|
152
|
+
md_lines.append(f"{i}. [{title}]({url}) (Relevance: {relevance:.2f})")
|
|
153
|
+
md_lines.append("")
|
|
154
|
+
|
|
155
|
+
# Methodology
|
|
156
|
+
if report_data.get('methodology'):
|
|
157
|
+
md_lines.append("## Methodology")
|
|
158
|
+
md_lines.append("")
|
|
159
|
+
md_lines.append(report_data['methodology'])
|
|
160
|
+
md_lines.append("")
|
|
161
|
+
|
|
162
|
+
# Limitations
|
|
163
|
+
if report_data.get('limitations'):
|
|
164
|
+
md_lines.append("## Limitations")
|
|
165
|
+
md_lines.append("")
|
|
166
|
+
md_lines.append(report_data['limitations'])
|
|
167
|
+
md_lines.append("")
|
|
168
|
+
|
|
169
|
+
return "\n".join(md_lines)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def format_report_as_html(report_data: Dict[str, Any]) -> str:
|
|
173
|
+
"""Convert report data to HTML format"""
|
|
174
|
+
|
|
175
|
+
html_parts = []
|
|
176
|
+
|
|
177
|
+
# HTML header
|
|
178
|
+
html_parts.append("""<!DOCTYPE html>
|
|
179
|
+
<html lang="en">
|
|
180
|
+
<head>
|
|
181
|
+
<meta charset="UTF-8">
|
|
182
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
183
|
+
<title>Deep Search Report</title>
|
|
184
|
+
<style>
|
|
185
|
+
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; line-height: 1.6; max-width: 1200px; margin: 0 auto; padding: 20px; }
|
|
186
|
+
h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }
|
|
187
|
+
h2 { color: #34495e; margin-top: 30px; }
|
|
188
|
+
.section { margin-bottom: 30px; padding: 20px; background: #f8f9fa; border-radius: 8px; }
|
|
189
|
+
.finding { margin-bottom: 15px; padding: 10px; background: white; border-left: 4px solid #3498db; }
|
|
190
|
+
.source { margin-bottom: 10px; padding: 10px; background: white; border-radius: 4px; }
|
|
191
|
+
.source a { color: #3498db; text-decoration: none; }
|
|
192
|
+
.source a:hover { text-decoration: underline; }
|
|
193
|
+
.relevance { color: #7f8c8d; font-size: 0.9em; }
|
|
194
|
+
.metadata { color: #7f8c8d; font-size: 0.9em; margin-top: 20px; }
|
|
195
|
+
</style>
|
|
196
|
+
</head>
|
|
197
|
+
<body>""")
|
|
198
|
+
|
|
199
|
+
# Title
|
|
200
|
+
html_parts.append(f"<h1>{report_data.get('title', 'Research Report')}</h1>")
|
|
201
|
+
|
|
202
|
+
# Executive Summary
|
|
203
|
+
if report_data.get('executive_summary'):
|
|
204
|
+
html_parts.append('<div class="section">')
|
|
205
|
+
html_parts.append('<h2>📊 Executive Summary</h2>')
|
|
206
|
+
html_parts.append(f"<p>{report_data['executive_summary']}</p>")
|
|
207
|
+
html_parts.append('</div>')
|
|
208
|
+
|
|
209
|
+
# Key Findings
|
|
210
|
+
if report_data.get('key_findings'):
|
|
211
|
+
html_parts.append('<div class="section">')
|
|
212
|
+
html_parts.append('<h2>🎯 Key Findings</h2>')
|
|
213
|
+
for i, finding in enumerate(report_data['key_findings'], 1):
|
|
214
|
+
html_parts.append(f'<div class="finding">{i}. {finding}</div>')
|
|
215
|
+
html_parts.append('</div>')
|
|
216
|
+
|
|
217
|
+
# Detailed Analysis
|
|
218
|
+
if report_data.get('detailed_analysis'):
|
|
219
|
+
html_parts.append('<div class="section">')
|
|
220
|
+
html_parts.append('<h2>📝 Detailed Analysis</h2>')
|
|
221
|
+
# Convert newlines to paragraphs
|
|
222
|
+
analysis = report_data['detailed_analysis'].replace('\n\n', '</p><p>').replace('\n', '<br>')
|
|
223
|
+
html_parts.append(f"<p>{analysis}</p>")
|
|
224
|
+
html_parts.append('</div>')
|
|
225
|
+
|
|
226
|
+
# Conclusions
|
|
227
|
+
if report_data.get('conclusions'):
|
|
228
|
+
html_parts.append('<div class="section">')
|
|
229
|
+
html_parts.append('<h2>💡 Conclusions</h2>')
|
|
230
|
+
conclusions = report_data['conclusions'].replace('\n\n', '</p><p>').replace('\n', '<br>')
|
|
231
|
+
html_parts.append(f"<p>{conclusions}</p>")
|
|
232
|
+
html_parts.append('</div>')
|
|
233
|
+
|
|
234
|
+
# Sources
|
|
235
|
+
if report_data.get('sources'):
|
|
236
|
+
html_parts.append('<div class="section">')
|
|
237
|
+
html_parts.append(f'<h2>📚 Sources ({len(report_data["sources"])} total)</h2>')
|
|
238
|
+
for i, source in enumerate(report_data['sources'], 1):
|
|
239
|
+
title = source.get('title', 'Untitled')
|
|
240
|
+
url = source.get('url', '')
|
|
241
|
+
relevance = source.get('relevance', 0)
|
|
242
|
+
html_parts.append(f'''<div class="source">
|
|
243
|
+
{i}. <a href="{url}" target="_blank">{title}</a>
|
|
244
|
+
<div class="relevance">Relevance: {relevance:.2f}</div>
|
|
245
|
+
</div>''')
|
|
246
|
+
html_parts.append('</div>')
|
|
247
|
+
|
|
248
|
+
# Methodology and Limitations
|
|
249
|
+
if report_data.get('methodology') or report_data.get('limitations'):
|
|
250
|
+
html_parts.append('<div class="section">')
|
|
251
|
+
html_parts.append('<h2>📋 Methodology & Limitations</h2>')
|
|
252
|
+
if report_data.get('methodology'):
|
|
253
|
+
html_parts.append(f"<p><strong>Methodology:</strong> {report_data['methodology']}</p>")
|
|
254
|
+
if report_data.get('limitations'):
|
|
255
|
+
html_parts.append(f"<p><strong>Limitations:</strong> {report_data['limitations']}</p>")
|
|
256
|
+
html_parts.append('</div>')
|
|
257
|
+
|
|
258
|
+
# Footer
|
|
259
|
+
html_parts.append('<div class="metadata">')
|
|
260
|
+
html_parts.append('<p>Generated by AbstractCore Deep Search</p>')
|
|
261
|
+
html_parts.append('</div>')
|
|
262
|
+
html_parts.append('</body></html>')
|
|
263
|
+
|
|
264
|
+
return '\n'.join(html_parts)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def print_report(report, format_type: str) -> None:
|
|
268
|
+
"""
|
|
269
|
+
Print research report to console
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
report: ResearchReport object or dictionary
|
|
273
|
+
format_type: Output format type
|
|
274
|
+
"""
|
|
275
|
+
try:
|
|
276
|
+
if hasattr(report, 'model_dump'):
|
|
277
|
+
# Pydantic model
|
|
278
|
+
report_data = report.model_dump()
|
|
279
|
+
elif hasattr(report, 'dict'):
|
|
280
|
+
# Pydantic model (older versions)
|
|
281
|
+
report_data = report.dict()
|
|
282
|
+
else:
|
|
283
|
+
# Dictionary
|
|
284
|
+
report_data = report
|
|
285
|
+
|
|
286
|
+
print("\n" + "="*80)
|
|
287
|
+
print(f"🔍 DEEP SEARCH REPORT")
|
|
288
|
+
print("="*80)
|
|
289
|
+
|
|
290
|
+
# Title
|
|
291
|
+
print(f"\n📋 {report_data.get('title', 'Research Report')}")
|
|
292
|
+
print("-" * 60)
|
|
293
|
+
|
|
294
|
+
# Executive Summary
|
|
295
|
+
if report_data.get('executive_summary'):
|
|
296
|
+
print(f"\n📊 EXECUTIVE SUMMARY")
|
|
297
|
+
print(f"{report_data['executive_summary']}")
|
|
298
|
+
|
|
299
|
+
# Key Findings
|
|
300
|
+
if report_data.get('key_findings'):
|
|
301
|
+
print(f"\n🎯 KEY FINDINGS")
|
|
302
|
+
for i, finding in enumerate(report_data['key_findings'], 1):
|
|
303
|
+
print(f"{i}. {finding}")
|
|
304
|
+
|
|
305
|
+
# Detailed Analysis (show full content)
|
|
306
|
+
if report_data.get('detailed_analysis'):
|
|
307
|
+
print(f"\n📝 DETAILED ANALYSIS")
|
|
308
|
+
print(report_data['detailed_analysis'])
|
|
309
|
+
|
|
310
|
+
# Conclusions
|
|
311
|
+
if report_data.get('conclusions'):
|
|
312
|
+
print(f"\n💡 CONCLUSIONS")
|
|
313
|
+
print(report_data['conclusions'])
|
|
314
|
+
|
|
315
|
+
# Sources
|
|
316
|
+
if report_data.get('sources'):
|
|
317
|
+
print(f"\n📚 SOURCES ({len(report_data['sources'])} total)")
|
|
318
|
+
for i, source in enumerate(report_data['sources'], 1): # Show ALL sources
|
|
319
|
+
title = source.get('title', 'Untitled')
|
|
320
|
+
url = source.get('url', '')
|
|
321
|
+
print(f"{i}. {title}")
|
|
322
|
+
print(f" 🔗 {url}")
|
|
323
|
+
|
|
324
|
+
# Methodology and Limitations
|
|
325
|
+
if report_data.get('methodology') or report_data.get('limitations'):
|
|
326
|
+
print(f"\n📋 METHODOLOGY & LIMITATIONS")
|
|
327
|
+
if report_data.get('methodology'):
|
|
328
|
+
print(f"Methodology: {report_data['methodology']}")
|
|
329
|
+
if report_data.get('limitations'):
|
|
330
|
+
print(f"Limitations: {report_data['limitations']}")
|
|
331
|
+
|
|
332
|
+
print("\n" + "="*80)
|
|
333
|
+
|
|
334
|
+
except Exception as e:
|
|
335
|
+
print(f"❌ Error displaying report: {e}")
|
|
336
|
+
# Fallback to JSON output
|
|
337
|
+
try:
|
|
338
|
+
if hasattr(report, 'model_dump'):
|
|
339
|
+
report_data = report.model_dump()
|
|
340
|
+
elif hasattr(report, 'dict'):
|
|
341
|
+
report_data = report.dict()
|
|
342
|
+
else:
|
|
343
|
+
report_data = report
|
|
344
|
+
print(json.dumps(report_data, indent=2, ensure_ascii=False))
|
|
345
|
+
except:
|
|
346
|
+
print(f"Report object: {report}")
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def main():
|
|
350
|
+
"""Main CLI function"""
|
|
351
|
+
parser = argparse.ArgumentParser(
|
|
352
|
+
description="AbstractCore Deep Search - Autonomous research agent",
|
|
353
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
354
|
+
epilog="""
|
|
355
|
+
Examples:
|
|
356
|
+
%(prog)s "What are the latest developments in quantum computing?"
|
|
357
|
+
%(prog)s "AI impact on healthcare" --focus "diagnosis,treatment,ethics" --depth comprehensive
|
|
358
|
+
%(prog)s "sustainable energy 2025" --format executive --output report.json
|
|
359
|
+
%(prog)s "blockchain technology trends" --max-sources 25 --verbose
|
|
360
|
+
"""
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Required argument
|
|
364
|
+
parser.add_argument(
|
|
365
|
+
'query',
|
|
366
|
+
help='Research query or question to investigate'
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Research configuration
|
|
370
|
+
parser.add_argument(
|
|
371
|
+
'--focus',
|
|
372
|
+
type=str,
|
|
373
|
+
help='Comma-separated focus areas (e.g., "technology,business,impact")'
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
parser.add_argument(
|
|
377
|
+
'--depth',
|
|
378
|
+
choices=['brief', 'standard', 'comprehensive'],
|
|
379
|
+
default='standard',
|
|
380
|
+
help='Research depth (default: standard)'
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
parser.add_argument(
|
|
384
|
+
'--max-sources',
|
|
385
|
+
type=int,
|
|
386
|
+
default=15,
|
|
387
|
+
help='Maximum number of sources to gather (default: 15)'
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
parser.add_argument(
|
|
391
|
+
'--format',
|
|
392
|
+
choices=['structured', 'narrative', 'executive'],
|
|
393
|
+
default='structured',
|
|
394
|
+
help='Output format (default: structured)'
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Output options
|
|
398
|
+
parser.add_argument(
|
|
399
|
+
'--output',
|
|
400
|
+
type=str,
|
|
401
|
+
help='Output file path (supports .json, .md, .html, .txt formats)'
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
parser.add_argument(
|
|
405
|
+
'--output-format',
|
|
406
|
+
choices=['text', 'json', 'markdown', 'html'],
|
|
407
|
+
default='text',
|
|
408
|
+
help='Console output format (default: text)'
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# LLM configuration
|
|
412
|
+
parser.add_argument(
|
|
413
|
+
'--provider',
|
|
414
|
+
type=str,
|
|
415
|
+
help='LLM provider (requires --model)'
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
parser.add_argument(
|
|
419
|
+
'--model',
|
|
420
|
+
type=str,
|
|
421
|
+
help='LLM model (requires --provider)'
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
parser.add_argument(
|
|
425
|
+
'--max-tokens',
|
|
426
|
+
type=int,
|
|
427
|
+
default=32000,
|
|
428
|
+
help='Maximum total tokens for LLM context (default: 32000)'
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
parser.add_argument(
|
|
432
|
+
'--max-output-tokens',
|
|
433
|
+
type=int,
|
|
434
|
+
default=8000,
|
|
435
|
+
help='Maximum tokens for LLM output generation (default: 8000)'
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
parser.add_argument(
|
|
439
|
+
'--timeout',
|
|
440
|
+
type=timeout_type,
|
|
441
|
+
default=300,
|
|
442
|
+
help='HTTP timeout for LLM providers in seconds (default: 300, "none" for unlimited)'
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
# Research options
|
|
446
|
+
parser.add_argument(
|
|
447
|
+
'--no-verification',
|
|
448
|
+
action='store_true',
|
|
449
|
+
help='Skip fact-checking and verification stage'
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
parser.add_argument(
|
|
453
|
+
'--parallel-searches',
|
|
454
|
+
type=int,
|
|
455
|
+
default=5,
|
|
456
|
+
help='Maximum parallel web searches (default: 5)'
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
parser.add_argument(
|
|
460
|
+
'--full-text',
|
|
461
|
+
action='store_true',
|
|
462
|
+
help='Extract full text content from web pages (slower but more comprehensive)'
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
parser.add_argument(
|
|
466
|
+
'--reflexive',
|
|
467
|
+
action='store_true',
|
|
468
|
+
help='Enable reflexive mode - analyzes limitations and performs targeted refinement searches'
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
parser.add_argument(
|
|
472
|
+
'--max-reflexive-iterations',
|
|
473
|
+
type=int,
|
|
474
|
+
default=2,
|
|
475
|
+
help='Maximum number of reflexive refinement cycles (default: 2)'
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
parser.add_argument(
|
|
479
|
+
'--temperature',
|
|
480
|
+
type=float,
|
|
481
|
+
default=0.1,
|
|
482
|
+
help='LLM temperature for consistency (default: 0.1, range: 0.0-1.0)'
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# Utility options
|
|
486
|
+
parser.add_argument(
|
|
487
|
+
'--verbose',
|
|
488
|
+
action='store_true',
|
|
489
|
+
help='Show detailed progress information'
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
parser.add_argument(
|
|
493
|
+
'--debug',
|
|
494
|
+
action='store_true',
|
|
495
|
+
help='Show comprehensive debug information: all queries, URLs, relevance assessments, and processing decisions'
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
args = parser.parse_args()
|
|
499
|
+
|
|
500
|
+
# Validate arguments
|
|
501
|
+
if (args.provider and not args.model) or (args.model and not args.provider):
|
|
502
|
+
print("❌ Error: Both --provider and --model must be specified together")
|
|
503
|
+
sys.exit(1)
|
|
504
|
+
|
|
505
|
+
if args.max_sources < 1 or args.max_sources > 100:
|
|
506
|
+
print("❌ Error: --max-sources must be between 1 and 100")
|
|
507
|
+
sys.exit(1)
|
|
508
|
+
|
|
509
|
+
if args.parallel_searches < 1 or args.parallel_searches > 20:
|
|
510
|
+
print("❌ Error: --parallel-searches must be between 1 and 20")
|
|
511
|
+
sys.exit(1)
|
|
512
|
+
|
|
513
|
+
# Configure logging level
|
|
514
|
+
if args.verbose:
|
|
515
|
+
import logging
|
|
516
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
517
|
+
|
|
518
|
+
try:
|
|
519
|
+
# Initialize LLM
|
|
520
|
+
if args.provider and args.model:
|
|
521
|
+
llm = create_llm(
|
|
522
|
+
args.provider,
|
|
523
|
+
model=args.model,
|
|
524
|
+
max_tokens=args.max_tokens,
|
|
525
|
+
max_output_tokens=args.max_output_tokens,
|
|
526
|
+
timeout=args.timeout
|
|
527
|
+
)
|
|
528
|
+
else:
|
|
529
|
+
llm = None # Will use default in BasicDeepSearch
|
|
530
|
+
|
|
531
|
+
# Initialize Deep Search
|
|
532
|
+
searcher = BasicDeepSearch(
|
|
533
|
+
llm=llm,
|
|
534
|
+
max_tokens=args.max_tokens,
|
|
535
|
+
max_output_tokens=args.max_output_tokens,
|
|
536
|
+
timeout=args.timeout,
|
|
537
|
+
max_parallel_searches=args.parallel_searches,
|
|
538
|
+
full_text_extraction=args.full_text,
|
|
539
|
+
reflexive_mode=args.reflexive,
|
|
540
|
+
max_reflexive_iterations=args.max_reflexive_iterations,
|
|
541
|
+
temperature=args.temperature,
|
|
542
|
+
debug_mode=args.debug
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# Parse focus areas
|
|
546
|
+
focus_areas = None
|
|
547
|
+
if args.focus:
|
|
548
|
+
focus_areas = [area.strip() for area in args.focus.split(',')]
|
|
549
|
+
print(f"🎯 Focus areas: {', '.join(focus_areas)}")
|
|
550
|
+
|
|
551
|
+
# Display research configuration
|
|
552
|
+
print(f"🔍 Research Query: {args.query}")
|
|
553
|
+
print(f"📊 Depth: {args.depth}")
|
|
554
|
+
print(f"📚 Max Sources: {args.max_sources}")
|
|
555
|
+
print(f"📝 Format: {args.format}")
|
|
556
|
+
print(f"✅ Verification: {'Disabled' if args.no_verification else 'Enabled'}")
|
|
557
|
+
print(f"⚡ Parallel Searches: {args.parallel_searches}")
|
|
558
|
+
print(f"📄 Text Extraction: {'Full Text' if args.full_text else 'Preview (1000 chars)'}")
|
|
559
|
+
print(f"🔄 Reflexive Mode: {'Enabled' if args.reflexive else 'Disabled'}")
|
|
560
|
+
if args.reflexive:
|
|
561
|
+
print(f"🔁 Max Reflexive Iterations: {args.max_reflexive_iterations}")
|
|
562
|
+
|
|
563
|
+
# Start research
|
|
564
|
+
start_time = time.time()
|
|
565
|
+
print(f"\n🚀 Starting deep search research...")
|
|
566
|
+
|
|
567
|
+
report = searcher.research(
|
|
568
|
+
query=args.query,
|
|
569
|
+
focus_areas=focus_areas,
|
|
570
|
+
max_sources=args.max_sources,
|
|
571
|
+
search_depth=args.depth,
|
|
572
|
+
include_verification=not args.no_verification,
|
|
573
|
+
output_format=args.format
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
elapsed_time = time.time() - start_time
|
|
577
|
+
print(f"\n✨ Research completed in {elapsed_time:.1f} seconds")
|
|
578
|
+
|
|
579
|
+
# Output results
|
|
580
|
+
if args.output:
|
|
581
|
+
save_report(report, args.output, args.format)
|
|
582
|
+
|
|
583
|
+
# Console output based on format
|
|
584
|
+
if args.output_format == 'json':
|
|
585
|
+
# JSON output to console
|
|
586
|
+
if hasattr(report, 'model_dump'):
|
|
587
|
+
report_data = report.model_dump()
|
|
588
|
+
elif hasattr(report, 'dict'):
|
|
589
|
+
report_data = report.dict()
|
|
590
|
+
else:
|
|
591
|
+
report_data = report
|
|
592
|
+
print(json.dumps(report_data, indent=2, ensure_ascii=False))
|
|
593
|
+
elif args.output_format == 'markdown':
|
|
594
|
+
# Markdown output to console
|
|
595
|
+
if hasattr(report, 'model_dump'):
|
|
596
|
+
report_data = report.model_dump()
|
|
597
|
+
elif hasattr(report, 'dict'):
|
|
598
|
+
report_data = report.dict()
|
|
599
|
+
else:
|
|
600
|
+
report_data = report
|
|
601
|
+
print(format_report_as_markdown(report_data))
|
|
602
|
+
elif args.output_format == 'html':
|
|
603
|
+
# HTML output to console
|
|
604
|
+
if hasattr(report, 'model_dump'):
|
|
605
|
+
report_data = report.model_dump()
|
|
606
|
+
elif hasattr(report, 'dict'):
|
|
607
|
+
report_data = report.dict()
|
|
608
|
+
else:
|
|
609
|
+
report_data = report
|
|
610
|
+
print(format_report_as_html(report_data))
|
|
611
|
+
else:
|
|
612
|
+
# Default text output
|
|
613
|
+
print_report(report, args.format)
|
|
614
|
+
|
|
615
|
+
# Summary statistics
|
|
616
|
+
if hasattr(report, 'sources'):
|
|
617
|
+
source_count = len(report.sources) if report.sources else 0
|
|
618
|
+
elif isinstance(report, dict) and 'sources' in report:
|
|
619
|
+
source_count = len(report['sources']) if report['sources'] else 0
|
|
620
|
+
else:
|
|
621
|
+
source_count = 0
|
|
622
|
+
|
|
623
|
+
print(f"\n📊 Research Summary:")
|
|
624
|
+
print(f" • Sources analyzed: {source_count}")
|
|
625
|
+
print(f" • Research depth: {args.depth}")
|
|
626
|
+
print(f" • Time taken: {elapsed_time:.1f} seconds")
|
|
627
|
+
print(f" • Format: {args.format}")
|
|
628
|
+
|
|
629
|
+
if args.output:
|
|
630
|
+
print(f" • Saved to: {args.output}")
|
|
631
|
+
|
|
632
|
+
except KeyboardInterrupt:
|
|
633
|
+
print("\n⚠️ Research interrupted by user")
|
|
634
|
+
sys.exit(1)
|
|
635
|
+
except Exception as e:
|
|
636
|
+
print(f"\n❌ Deep search failed: {e}")
|
|
637
|
+
if args.verbose:
|
|
638
|
+
import traceback
|
|
639
|
+
traceback.print_exc()
|
|
640
|
+
sys.exit(1)
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
if __name__ == "__main__":
|
|
644
|
+
main()
|