local-deep-research 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +24 -0
- local_deep_research/citation_handler.py +113 -0
- local_deep_research/config.py +166 -0
- local_deep_research/defaults/__init__.py +44 -0
- local_deep_research/defaults/llm_config.py +269 -0
- local_deep_research/defaults/local_collections.toml +47 -0
- local_deep_research/defaults/main.toml +57 -0
- local_deep_research/defaults/search_engines.toml +244 -0
- local_deep_research/local_collections.py +141 -0
- local_deep_research/main.py +113 -0
- local_deep_research/report_generator.py +206 -0
- local_deep_research/search_system.py +241 -0
- local_deep_research/utilties/__init__.py +0 -0
- local_deep_research/utilties/enums.py +9 -0
- local_deep_research/utilties/llm_utils.py +116 -0
- local_deep_research/utilties/search_utilities.py +115 -0
- local_deep_research/utilties/setup_utils.py +6 -0
- local_deep_research/web/__init__.py +2 -0
- local_deep_research/web/app.py +1209 -0
- local_deep_research/web/static/css/styles.css +1008 -0
- local_deep_research/web/static/js/app.js +2078 -0
- local_deep_research/web/templates/api_keys_config.html +82 -0
- local_deep_research/web/templates/collections_config.html +90 -0
- local_deep_research/web/templates/index.html +312 -0
- local_deep_research/web/templates/llm_config.html +120 -0
- local_deep_research/web/templates/main_config.html +89 -0
- local_deep_research/web/templates/search_engines_config.html +154 -0
- local_deep_research/web/templates/settings.html +519 -0
- local_deep_research/web/templates/settings_dashboard.html +207 -0
- local_deep_research/web_search_engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/full_search.py +128 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
- local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
- local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
- local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
- local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
- local_deep_research/web_search_engines/full_search.py +254 -0
- local_deep_research/web_search_engines/search_engine_base.py +197 -0
- local_deep_research/web_search_engines/search_engine_factory.py +233 -0
- local_deep_research/web_search_engines/search_engines_config.py +54 -0
- local_deep_research-0.1.0.dist-info/LICENSE +21 -0
- local_deep_research-0.1.0.dist-info/METADATA +328 -0
- local_deep_research-0.1.0.dist-info/RECORD +56 -0
- local_deep_research-0.1.0.dist-info/WHEEL +5 -0
- local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
- local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
|
4
|
+
def remove_think_tags(text: str) -> str:
|
5
|
+
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
|
6
|
+
print(text)
|
7
|
+
return text
|
8
|
+
|
9
|
+
|
10
|
+
def extract_links_from_search_results(search_results: list) -> list:
|
11
|
+
"""
|
12
|
+
Extracts links and titles from a list of search result dictionaries.
|
13
|
+
|
14
|
+
Each dictionary is expected to have at least the keys "title" and "link".
|
15
|
+
|
16
|
+
Returns a list of dictionaries with 'title' and 'url' keys.
|
17
|
+
"""
|
18
|
+
links = []
|
19
|
+
for result in search_results:
|
20
|
+
#print(result)
|
21
|
+
try:
|
22
|
+
|
23
|
+
title = result.get("title", "").strip()
|
24
|
+
url = result.get("link", "").strip()
|
25
|
+
index = result.get("index", "").strip()
|
26
|
+
print("INDEX:",index)
|
27
|
+
if title and url:
|
28
|
+
links.append({"title": title, "url": url, "index": index})
|
29
|
+
except Exception:
|
30
|
+
continue
|
31
|
+
return links
|
32
|
+
|
33
|
+
def format_links(links):
|
34
|
+
formatted_links =""
|
35
|
+
formatted_links += "SOURCES:\n"
|
36
|
+
for i, link in enumerate(links, 1):
|
37
|
+
formatted_links += f"{link['index']}. {link['title']}\n URL: {link['url']}\n"
|
38
|
+
formatted_links += "\n"
|
39
|
+
return formatted_links
|
40
|
+
|
41
|
+
|
42
|
+
def format_findings_to_text(findings_list, current_knowledge, questions_by_iteration):
|
43
|
+
formatted_text = "COMPLETE RESEARCH OUTPUT \n\n"
|
44
|
+
|
45
|
+
# Store the full current knowledge
|
46
|
+
|
47
|
+
formatted_text += f"{current_knowledge}\n\n"
|
48
|
+
formatted_text += "=" * 80 + "\n\n"
|
49
|
+
|
50
|
+
# Store questions by iteration
|
51
|
+
formatted_text += "SEARCH QUESTIONS BY ITERATION:\n"
|
52
|
+
for iter_num, questions in questions_by_iteration.items():
|
53
|
+
formatted_text += f"\nIteration {iter_num}:\n"
|
54
|
+
for i, q in enumerate(questions, 1):
|
55
|
+
formatted_text += f"{i}. {q}\n"
|
56
|
+
formatted_text += "\n" + "=" * 80 + "\n\n"
|
57
|
+
|
58
|
+
# Store detailed findings
|
59
|
+
formatted_text += "DETAILED FINDINGS:\n\n"
|
60
|
+
all_links = [] # To collect all sources
|
61
|
+
|
62
|
+
for finding in findings_list:
|
63
|
+
# Phase header
|
64
|
+
formatted_text += f"{'='*80}\n"
|
65
|
+
formatted_text += f"PHASE: {finding['phase']}\n"
|
66
|
+
formatted_text += f"{'='*80}\n\n"
|
67
|
+
|
68
|
+
# If this is a follow-up phase, show the corresponding question
|
69
|
+
if finding["phase"].startswith("Follow-up"):
|
70
|
+
iteration = int(finding["phase"].split(".")[0].split()[-1])
|
71
|
+
question_index = int(finding["phase"].split(".")[-1]) - 1
|
72
|
+
if iteration in questions_by_iteration and question_index < len(
|
73
|
+
questions_by_iteration[iteration]
|
74
|
+
):
|
75
|
+
formatted_text += f"SEARCH QUESTION:\n{questions_by_iteration[iteration][question_index]}\n\n"
|
76
|
+
|
77
|
+
# Content
|
78
|
+
formatted_text += f"CONTENT:\n{finding['content']}\n\n"
|
79
|
+
|
80
|
+
# Search results if they exist
|
81
|
+
if "search_results" in finding:
|
82
|
+
# formatted_text += "SEARCH RESULTS:\n"
|
83
|
+
# formatted_text += f"{finding['search_results']}\n\n"
|
84
|
+
|
85
|
+
# Extract and format links for this finding
|
86
|
+
links = extract_links_from_search_results(finding["search_results"])
|
87
|
+
if links:
|
88
|
+
formatted_text += "SOURCES USED IN THIS SECTION:\n"
|
89
|
+
for i, link in enumerate(links, 1):
|
90
|
+
formatted_text += f"{i}. {link['title']}\n URL: {link['url']}\n"
|
91
|
+
formatted_text += "\n"
|
92
|
+
all_links.extend(links)
|
93
|
+
|
94
|
+
formatted_text += f"{'_'*80}\n\n"
|
95
|
+
|
96
|
+
# Add summary of all sources at the end
|
97
|
+
if all_links:
|
98
|
+
formatted_text += "\nALL SOURCES USED IN RESEARCH:\n"
|
99
|
+
formatted_text += "=" * 80 + "\n\n"
|
100
|
+
seen_urls = set() # To prevent duplicates
|
101
|
+
for i, link in enumerate(all_links, 1):
|
102
|
+
if link["url"] not in seen_urls:
|
103
|
+
formatted_text += f"{i}. {link['title']}\n URL: {link['url']}\n"
|
104
|
+
seen_urls.add(link["url"])
|
105
|
+
formatted_text += "\n" + "=" * 80 + "\n"
|
106
|
+
|
107
|
+
return formatted_text
|
108
|
+
|
109
|
+
def print_search_results(search_results):
|
110
|
+
formatted_text=""
|
111
|
+
links = extract_links_from_search_results(search_results)
|
112
|
+
if links:
|
113
|
+
formatted_text=format_links(links=links)
|
114
|
+
print(formatted_text)
|
115
|
+
|