local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +96 -84
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +72 -44
  41. local_deep_research/search_system.py +147 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1592 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.0.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,242 @@
1
+ import logging
2
+ import re
3
+ from typing import Dict, List
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ def remove_think_tags(text: str) -> str:
9
+ text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
10
+ return text
11
+
12
+
13
+ def extract_links_from_search_results(search_results: List[Dict]) -> List[Dict]:
14
+ """
15
+ Extracts links and titles from a list of search result dictionaries.
16
+
17
+ Each dictionary is expected to have at least the keys "title" and "link".
18
+
19
+ Returns a list of dictionaries with 'title' and 'url' keys.
20
+ """
21
+ links = []
22
+ if not search_results:
23
+ return links
24
+
25
+ for result in search_results:
26
+ try:
27
+ # Ensure we handle None values safely before calling strip()
28
+ title = result.get("title", "")
29
+ url = result.get("link", "")
30
+ index = result.get("index", "")
31
+
32
+ # Apply strip() only if the values are not None
33
+ title = title.strip() if title is not None else ""
34
+ url = url.strip() if url is not None else ""
35
+ index = index.strip() if index is not None else ""
36
+
37
+ if title and url:
38
+ links.append({"title": title, "url": url, "index": index})
39
+ except Exception as e:
40
+ # Log the specific error for debugging
41
+ logger.error(f"Error extracting link from result: {str(e)}")
42
+ continue
43
+ return links
44
+
45
+
46
+ def format_links(links: List[Dict]) -> str:
47
+ formatted_links = format_links_to_markdown(links)
48
+ return formatted_links
49
+
50
+
51
+ def format_links_to_markdown(all_links: List[Dict]) -> str:
52
+ formatted_text = ""
53
+ if all_links:
54
+
55
+ # Group links by URL and collect all their indices
56
+ url_to_indices = {}
57
+ for link in all_links:
58
+ url = link.get("url")
59
+ index = link.get("index", "")
60
+ if url:
61
+ if url not in url_to_indices:
62
+ url_to_indices[url] = []
63
+ url_to_indices[url].append(index)
64
+
65
+ # Format each unique URL with all its indices
66
+ seen_urls = set() # Initialize the set here
67
+ for link in all_links:
68
+ url = link.get("url")
69
+ title = link.get("title", "Untitled")
70
+ if url and url not in seen_urls:
71
+ # Get all indices for this URL
72
+ indices = set(url_to_indices[url])
73
+ # Format as [1, 3, 5] if multiple indices, or just [1] if single
74
+ indices_str = f"[{', '.join(map(str, indices))}]"
75
+ formatted_text += f"{indices_str} {title}\n URL: {url}\n\n"
76
+ seen_urls.add(url)
77
+
78
+ formatted_text += "\n"
79
+
80
+ return formatted_text
81
+
82
+
83
+ def format_findings(
84
+ findings_list: List[Dict],
85
+ synthesized_content: str,
86
+ questions_by_iteration: Dict[int, List[str]],
87
+ ) -> str:
88
+ """Format findings into a detailed text output.
89
+
90
+ Args:
91
+ findings_list: List of finding dictionaries
92
+ synthesized_content: The synthesized content from the LLM.
93
+ questions_by_iteration: Dictionary mapping iteration numbers to lists of questions
94
+
95
+ Returns:
96
+ str: Formatted text output
97
+ """
98
+ logger.info(
99
+ f"Inside format_findings utility. Findings count: {len(findings_list)}, Questions iterations: {len(questions_by_iteration)}"
100
+ )
101
+ formatted_text = ""
102
+
103
+ # Extract all sources from findings
104
+ all_links = []
105
+ for finding in findings_list:
106
+ search_results = finding.get("search_results", [])
107
+ if search_results:
108
+ try:
109
+ links = extract_links_from_search_results(search_results)
110
+ all_links.extend(links)
111
+ except Exception as link_err:
112
+ logger.error(f"Error processing search results/links: {link_err}")
113
+
114
+ # Start with the synthesized content (passed as synthesized_content)
115
+ formatted_text += f"{synthesized_content}\n\n"
116
+
117
+ # Add sources section after synthesized content if sources exist
118
+ formatted_text += format_links_to_markdown(all_links)
119
+
120
+ formatted_text += "\n\n" # Separator after synthesized content
121
+
122
+ # Add Search Questions by Iteration section
123
+ if questions_by_iteration:
124
+ formatted_text += "## SEARCH QUESTIONS BY ITERATION\n"
125
+ formatted_text += "\n"
126
+ for iter_num, questions in questions_by_iteration.items():
127
+ formatted_text += f"\n #### Iteration {iter_num}:\n"
128
+ for i, q in enumerate(questions, 1):
129
+ formatted_text += f"{i}. {q}\n"
130
+ formatted_text += "\n" + "\n\n"
131
+ else:
132
+ logger.warning("No questions by iteration found to format.")
133
+
134
+ # Add Detailed Findings section
135
+ if findings_list:
136
+ formatted_text += "## DETAILED FINDINGS\n\n"
137
+ logger.info(f"Formatting {len(findings_list)} detailed finding items.")
138
+
139
+ for idx, finding in enumerate(findings_list):
140
+ logger.debug(f"Formatting finding item {idx}. Keys: {list(finding.keys())}")
141
+ # Use .get() for safety
142
+ phase = finding.get("phase", "Unknown Phase")
143
+ content = finding.get("content", "No content available.")
144
+ search_results = finding.get("search_results", [])
145
+
146
+ # Phase header
147
+ formatted_text += "\n"
148
+ formatted_text += f"### {phase}\n"
149
+ formatted_text += "\n\n"
150
+
151
+ question_displayed = False
152
+ # If this is a follow-up phase, try to show the corresponding question
153
+ if isinstance(phase, str) and phase.startswith("Follow-up"):
154
+ try:
155
+ parts = phase.replace("Follow-up Iteration ", "").split(".")
156
+ if len(parts) == 2:
157
+ iteration = int(parts[0])
158
+ question_index = int(parts[1]) - 1
159
+ if (
160
+ iteration in questions_by_iteration
161
+ and 0
162
+ <= question_index
163
+ < len(questions_by_iteration[iteration])
164
+ ):
165
+ formatted_text += f"#### {questions_by_iteration[iteration][question_index]}\n\n"
166
+ question_displayed = True
167
+ else:
168
+ logger.warning(
169
+ f"Could not find matching question for phase: {phase}"
170
+ )
171
+ else:
172
+ logger.warning(
173
+ f"Could not parse iteration/index from phase: {phase}"
174
+ )
175
+ except ValueError:
176
+ logger.warning(
177
+ f"Could not parse iteration/index from phase: {phase}"
178
+ )
179
+ # Handle Sub-query phases from IterDRAG strategy
180
+ elif isinstance(phase, str) and phase.startswith("Sub-query"):
181
+ try:
182
+ # Extract the index number from "Sub-query X"
183
+ query_index = int(phase.replace("Sub-query ", "")) - 1
184
+ # In IterDRAG, sub-queries are stored in iteration 0
185
+ if 0 in questions_by_iteration and query_index < len(
186
+ questions_by_iteration[0]
187
+ ):
188
+ formatted_text += (
189
+ f"#### {questions_by_iteration[0][query_index]}\n\n"
190
+ )
191
+ question_displayed = True
192
+ else:
193
+ logger.warning(
194
+ f"Could not find matching question for phase: {phase}"
195
+ )
196
+ except ValueError:
197
+ logger.warning(
198
+ f"Could not parse question index from phase: {phase}"
199
+ )
200
+
201
+ # If the question is in the finding itself, display it
202
+ if not question_displayed and "question" in finding and finding["question"]:
203
+ formatted_text += f"### SEARCH QUESTION:\n{finding['question']}\n\n"
204
+
205
+ # Content
206
+ formatted_text += f"\n\n{content}\n\n"
207
+
208
+ # Search results if they exist
209
+ if search_results:
210
+ try:
211
+ links = extract_links_from_search_results(search_results)
212
+ if links:
213
+ formatted_text += "### SOURCES USED IN THIS SECTION:\n"
214
+ formatted_text += format_links(links) + "\n\n"
215
+ except Exception as link_err:
216
+ logger.error(
217
+ f"Error processing search results/links for finding {idx}: {link_err}"
218
+ )
219
+ else:
220
+ logger.debug(f"No search_results found for finding item {idx}.")
221
+
222
+ formatted_text += f"{'_' * 80}\n\n"
223
+ else:
224
+ logger.warning("No detailed findings found to format.")
225
+
226
+ # Add summary of all sources at the end
227
+ if all_links:
228
+ formatted_text += "## ALL SOURCES:\n"
229
+ formatted_text += format_links_to_markdown(all_links)
230
+ else:
231
+ logger.info("No unique sources found across all findings to list.")
232
+
233
+ logger.info("Finished format_findings utility.")
234
+ return formatted_text
235
+
236
+
237
+ def print_search_results(search_results):
238
+ formatted_text = ""
239
+ links = extract_links_from_search_results(search_results)
240
+ if links:
241
+ formatted_text = format_links(links=links)
242
+ logger.info(formatted_text)
@@ -1,6 +1,8 @@
1
1
  """Setup utilities (legacy wrapper)."""
2
2
 
3
+
3
4
  def setup_user_directories():
4
5
  """Set up directories and ensure config files exist."""
5
- from local_deep_research.config import init_config_files
6
- init_config_files()
6
+ from ..config.config_files import init_config_files
7
+
8
+ init_config_files()
@@ -1,2 +1 @@
1
1
  """Web interface for Local Deep Research"""
2
- from . import app