local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +96 -84
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +72 -44
- local_deep_research/search_system.py +147 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1592 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
- local_deep_research-0.2.0.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,242 @@
|
|
1
|
+
import logging
|
2
|
+
import re
|
3
|
+
from typing import Dict, List
|
4
|
+
|
5
|
+
logger = logging.getLogger(__name__)
|
6
|
+
|
7
|
+
|
8
|
+
def remove_think_tags(text: str) -> str:
|
9
|
+
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
|
10
|
+
return text
|
11
|
+
|
12
|
+
|
13
|
+
def extract_links_from_search_results(search_results: List[Dict]) -> List[Dict]:
|
14
|
+
"""
|
15
|
+
Extracts links and titles from a list of search result dictionaries.
|
16
|
+
|
17
|
+
Each dictionary is expected to have at least the keys "title" and "link".
|
18
|
+
|
19
|
+
Returns a list of dictionaries with 'title' and 'url' keys.
|
20
|
+
"""
|
21
|
+
links = []
|
22
|
+
if not search_results:
|
23
|
+
return links
|
24
|
+
|
25
|
+
for result in search_results:
|
26
|
+
try:
|
27
|
+
# Ensure we handle None values safely before calling strip()
|
28
|
+
title = result.get("title", "")
|
29
|
+
url = result.get("link", "")
|
30
|
+
index = result.get("index", "")
|
31
|
+
|
32
|
+
# Apply strip() only if the values are not None
|
33
|
+
title = title.strip() if title is not None else ""
|
34
|
+
url = url.strip() if url is not None else ""
|
35
|
+
index = index.strip() if index is not None else ""
|
36
|
+
|
37
|
+
if title and url:
|
38
|
+
links.append({"title": title, "url": url, "index": index})
|
39
|
+
except Exception as e:
|
40
|
+
# Log the specific error for debugging
|
41
|
+
logger.error(f"Error extracting link from result: {str(e)}")
|
42
|
+
continue
|
43
|
+
return links
|
44
|
+
|
45
|
+
|
46
|
+
def format_links(links: List[Dict]) -> str:
|
47
|
+
formatted_links = format_links_to_markdown(links)
|
48
|
+
return formatted_links
|
49
|
+
|
50
|
+
|
51
|
+
def format_links_to_markdown(all_links: List[Dict]) -> str:
|
52
|
+
formatted_text = ""
|
53
|
+
if all_links:
|
54
|
+
|
55
|
+
# Group links by URL and collect all their indices
|
56
|
+
url_to_indices = {}
|
57
|
+
for link in all_links:
|
58
|
+
url = link.get("url")
|
59
|
+
index = link.get("index", "")
|
60
|
+
if url:
|
61
|
+
if url not in url_to_indices:
|
62
|
+
url_to_indices[url] = []
|
63
|
+
url_to_indices[url].append(index)
|
64
|
+
|
65
|
+
# Format each unique URL with all its indices
|
66
|
+
seen_urls = set() # Initialize the set here
|
67
|
+
for link in all_links:
|
68
|
+
url = link.get("url")
|
69
|
+
title = link.get("title", "Untitled")
|
70
|
+
if url and url not in seen_urls:
|
71
|
+
# Get all indices for this URL
|
72
|
+
indices = set(url_to_indices[url])
|
73
|
+
# Format as [1, 3, 5] if multiple indices, or just [1] if single
|
74
|
+
indices_str = f"[{', '.join(map(str, indices))}]"
|
75
|
+
formatted_text += f"{indices_str} {title}\n URL: {url}\n\n"
|
76
|
+
seen_urls.add(url)
|
77
|
+
|
78
|
+
formatted_text += "\n"
|
79
|
+
|
80
|
+
return formatted_text
|
81
|
+
|
82
|
+
|
83
|
+
def format_findings(
|
84
|
+
findings_list: List[Dict],
|
85
|
+
synthesized_content: str,
|
86
|
+
questions_by_iteration: Dict[int, List[str]],
|
87
|
+
) -> str:
|
88
|
+
"""Format findings into a detailed text output.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
findings_list: List of finding dictionaries
|
92
|
+
synthesized_content: The synthesized content from the LLM.
|
93
|
+
questions_by_iteration: Dictionary mapping iteration numbers to lists of questions
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
str: Formatted text output
|
97
|
+
"""
|
98
|
+
logger.info(
|
99
|
+
f"Inside format_findings utility. Findings count: {len(findings_list)}, Questions iterations: {len(questions_by_iteration)}"
|
100
|
+
)
|
101
|
+
formatted_text = ""
|
102
|
+
|
103
|
+
# Extract all sources from findings
|
104
|
+
all_links = []
|
105
|
+
for finding in findings_list:
|
106
|
+
search_results = finding.get("search_results", [])
|
107
|
+
if search_results:
|
108
|
+
try:
|
109
|
+
links = extract_links_from_search_results(search_results)
|
110
|
+
all_links.extend(links)
|
111
|
+
except Exception as link_err:
|
112
|
+
logger.error(f"Error processing search results/links: {link_err}")
|
113
|
+
|
114
|
+
# Start with the synthesized content (passed as synthesized_content)
|
115
|
+
formatted_text += f"{synthesized_content}\n\n"
|
116
|
+
|
117
|
+
# Add sources section after synthesized content if sources exist
|
118
|
+
formatted_text += format_links_to_markdown(all_links)
|
119
|
+
|
120
|
+
formatted_text += "\n\n" # Separator after synthesized content
|
121
|
+
|
122
|
+
# Add Search Questions by Iteration section
|
123
|
+
if questions_by_iteration:
|
124
|
+
formatted_text += "## SEARCH QUESTIONS BY ITERATION\n"
|
125
|
+
formatted_text += "\n"
|
126
|
+
for iter_num, questions in questions_by_iteration.items():
|
127
|
+
formatted_text += f"\n #### Iteration {iter_num}:\n"
|
128
|
+
for i, q in enumerate(questions, 1):
|
129
|
+
formatted_text += f"{i}. {q}\n"
|
130
|
+
formatted_text += "\n" + "\n\n"
|
131
|
+
else:
|
132
|
+
logger.warning("No questions by iteration found to format.")
|
133
|
+
|
134
|
+
# Add Detailed Findings section
|
135
|
+
if findings_list:
|
136
|
+
formatted_text += "## DETAILED FINDINGS\n\n"
|
137
|
+
logger.info(f"Formatting {len(findings_list)} detailed finding items.")
|
138
|
+
|
139
|
+
for idx, finding in enumerate(findings_list):
|
140
|
+
logger.debug(f"Formatting finding item {idx}. Keys: {list(finding.keys())}")
|
141
|
+
# Use .get() for safety
|
142
|
+
phase = finding.get("phase", "Unknown Phase")
|
143
|
+
content = finding.get("content", "No content available.")
|
144
|
+
search_results = finding.get("search_results", [])
|
145
|
+
|
146
|
+
# Phase header
|
147
|
+
formatted_text += "\n"
|
148
|
+
formatted_text += f"### {phase}\n"
|
149
|
+
formatted_text += "\n\n"
|
150
|
+
|
151
|
+
question_displayed = False
|
152
|
+
# If this is a follow-up phase, try to show the corresponding question
|
153
|
+
if isinstance(phase, str) and phase.startswith("Follow-up"):
|
154
|
+
try:
|
155
|
+
parts = phase.replace("Follow-up Iteration ", "").split(".")
|
156
|
+
if len(parts) == 2:
|
157
|
+
iteration = int(parts[0])
|
158
|
+
question_index = int(parts[1]) - 1
|
159
|
+
if (
|
160
|
+
iteration in questions_by_iteration
|
161
|
+
and 0
|
162
|
+
<= question_index
|
163
|
+
< len(questions_by_iteration[iteration])
|
164
|
+
):
|
165
|
+
formatted_text += f"#### {questions_by_iteration[iteration][question_index]}\n\n"
|
166
|
+
question_displayed = True
|
167
|
+
else:
|
168
|
+
logger.warning(
|
169
|
+
f"Could not find matching question for phase: {phase}"
|
170
|
+
)
|
171
|
+
else:
|
172
|
+
logger.warning(
|
173
|
+
f"Could not parse iteration/index from phase: {phase}"
|
174
|
+
)
|
175
|
+
except ValueError:
|
176
|
+
logger.warning(
|
177
|
+
f"Could not parse iteration/index from phase: {phase}"
|
178
|
+
)
|
179
|
+
# Handle Sub-query phases from IterDRAG strategy
|
180
|
+
elif isinstance(phase, str) and phase.startswith("Sub-query"):
|
181
|
+
try:
|
182
|
+
# Extract the index number from "Sub-query X"
|
183
|
+
query_index = int(phase.replace("Sub-query ", "")) - 1
|
184
|
+
# In IterDRAG, sub-queries are stored in iteration 0
|
185
|
+
if 0 in questions_by_iteration and query_index < len(
|
186
|
+
questions_by_iteration[0]
|
187
|
+
):
|
188
|
+
formatted_text += (
|
189
|
+
f"#### {questions_by_iteration[0][query_index]}\n\n"
|
190
|
+
)
|
191
|
+
question_displayed = True
|
192
|
+
else:
|
193
|
+
logger.warning(
|
194
|
+
f"Could not find matching question for phase: {phase}"
|
195
|
+
)
|
196
|
+
except ValueError:
|
197
|
+
logger.warning(
|
198
|
+
f"Could not parse question index from phase: {phase}"
|
199
|
+
)
|
200
|
+
|
201
|
+
# If the question is in the finding itself, display it
|
202
|
+
if not question_displayed and "question" in finding and finding["question"]:
|
203
|
+
formatted_text += f"### SEARCH QUESTION:\n{finding['question']}\n\n"
|
204
|
+
|
205
|
+
# Content
|
206
|
+
formatted_text += f"\n\n{content}\n\n"
|
207
|
+
|
208
|
+
# Search results if they exist
|
209
|
+
if search_results:
|
210
|
+
try:
|
211
|
+
links = extract_links_from_search_results(search_results)
|
212
|
+
if links:
|
213
|
+
formatted_text += "### SOURCES USED IN THIS SECTION:\n"
|
214
|
+
formatted_text += format_links(links) + "\n\n"
|
215
|
+
except Exception as link_err:
|
216
|
+
logger.error(
|
217
|
+
f"Error processing search results/links for finding {idx}: {link_err}"
|
218
|
+
)
|
219
|
+
else:
|
220
|
+
logger.debug(f"No search_results found for finding item {idx}.")
|
221
|
+
|
222
|
+
formatted_text += f"{'_' * 80}\n\n"
|
223
|
+
else:
|
224
|
+
logger.warning("No detailed findings found to format.")
|
225
|
+
|
226
|
+
# Add summary of all sources at the end
|
227
|
+
if all_links:
|
228
|
+
formatted_text += "## ALL SOURCES:\n"
|
229
|
+
formatted_text += format_links_to_markdown(all_links)
|
230
|
+
else:
|
231
|
+
logger.info("No unique sources found across all findings to list.")
|
232
|
+
|
233
|
+
logger.info("Finished format_findings utility.")
|
234
|
+
return formatted_text
|
235
|
+
|
236
|
+
|
237
|
+
def print_search_results(search_results):
|
238
|
+
formatted_text = ""
|
239
|
+
links = extract_links_from_search_results(search_results)
|
240
|
+
if links:
|
241
|
+
formatted_text = format_links(links=links)
|
242
|
+
logger.info(formatted_text)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
"""Setup utilities (legacy wrapper)."""
|
2
2
|
|
3
|
+
|
3
4
|
def setup_user_directories():
|
4
5
|
"""Set up directories and ensure config files exist."""
|
5
|
-
from
|
6
|
-
|
6
|
+
from ..config.config_files import init_config_files
|
7
|
+
|
8
|
+
init_config_files()
|