local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +96 -84
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +72 -44
  41. local_deep_research/search_system.py +147 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1592 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.0.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,16 +1,28 @@
1
- from typing import Dict, List, Optional
2
- from .config import get_llm
3
- import re
4
- from datetime import datetime
1
+ import importlib
2
+ from typing import Dict, List
3
+
4
+ # Fix circular import by importing directly from source modules
5
+ from .config.llm_config import get_llm
5
6
  from .search_system import AdvancedSearchSystem
6
- from local_deep_research import config
7
- from . import utilties
8
- from .utilties import search_utilities
7
+
8
+ # from . import utilities
9
+ from .utilities import search_utilities
10
+
11
+
12
+ def get_report_generator(search_system=None):
13
+ """Return an instance of the report generator with default settings.
14
+
15
+ Args:
16
+ search_system: Optional existing AdvancedSearchSystem to use
17
+ """
18
+ return IntegratedReportGenerator(search_system=search_system)
19
+
9
20
 
10
21
  class IntegratedReportGenerator:
11
- def __init__(self, searches_per_section: int = 2):
22
+ def __init__(self, searches_per_section: int = 2, search_system=None):
12
23
  self.model = get_llm()
13
- self.search_system = AdvancedSearchSystem()
24
+ # Use provided search_system or create a new one
25
+ self.search_system = search_system or AdvancedSearchSystem()
14
26
  self.searches_per_section = (
15
27
  searches_per_section # Control search depth per section
16
28
  )
@@ -22,17 +34,16 @@ class IntegratedReportGenerator:
22
34
  structure = self._determine_report_structure(initial_findings, query)
23
35
 
24
36
  # Step 2: Research and generate content for each section in one step
25
- sections = self._research_and_generate_sections(initial_findings, structure, query)
37
+ sections = self._research_and_generate_sections(
38
+ initial_findings, structure, query
39
+ )
26
40
 
27
41
  # Step 3: Format final report
28
42
  report = self._format_final_report(sections, structure, query)
29
43
 
30
44
  return report
31
45
 
32
- def _determine_report_structure(
33
- self, findings: Dict, query: str
34
- ) -> List[Dict]:
35
-
46
+ def _determine_report_structure(self, findings: Dict, query: str) -> List[Dict]:
36
47
  """Analyze content and determine optimal report structure."""
37
48
  combined_content = findings["current_knowledge"]
38
49
  prompt = f"""
@@ -92,44 +103,51 @@ class IntegratedReportGenerator:
92
103
  ) -> Dict[str, str]:
93
104
  """Research and generate content for each section in one step."""
94
105
  sections = {}
95
-
106
+
96
107
  for section in structure:
97
108
  print(f"Processing section: {section['name']}")
98
109
  section_content = []
99
110
  section_content.append(f"# {section['name']}\n")
100
-
111
+
101
112
  # Process each subsection by directly researching it
102
113
  for subsection in section["subsections"]:
103
114
  # Add subsection header
104
115
  section_content.append(f"## {subsection['name']}\n")
105
116
  section_content.append(f"_{subsection['purpose']}_\n\n")
106
-
117
+
107
118
  # Generate a specific search query for this subsection
108
119
  subsection_query = f"{query} {section['name']} {subsection['name']} {subsection['purpose']}"
109
-
110
- print(f"Researching subsection: {subsection['name']} with query: {subsection_query}")
111
-
120
+
121
+ print(
122
+ f"Researching subsection: {subsection['name']} with query: {subsection_query}"
123
+ )
124
+
112
125
  # Configure search system for focused search
113
126
  original_max_iterations = self.search_system.max_iterations
114
127
  self.search_system.max_iterations = 1 # Keep search focused
115
-
128
+
116
129
  # Perform search for this subsection
117
130
  subsection_results = self.search_system.analyze_topic(subsection_query)
118
-
131
+
119
132
  # Restore original iterations setting
120
133
  self.search_system.max_iterations = original_max_iterations
121
-
134
+
122
135
  # Add the researched content for this subsection
123
- if "current_knowledge" in subsection_results and subsection_results["current_knowledge"]:
136
+ if (
137
+ "current_knowledge" in subsection_results
138
+ and subsection_results["current_knowledge"]
139
+ ):
124
140
  section_content.append(subsection_results["current_knowledge"])
125
141
  else:
126
- section_content.append("*Limited information was found for this subsection.*\n")
127
-
142
+ section_content.append(
143
+ "*Limited information was found for this subsection.*\n"
144
+ )
145
+
128
146
  section_content.append("\n\n")
129
-
147
+
130
148
  # Combine all content for this section
131
149
  sections[section["name"]] = "\n".join(section_content)
132
-
150
+
133
151
  return sections
134
152
 
135
153
  def _generate_sections(
@@ -157,15 +175,21 @@ class IntegratedReportGenerator:
157
175
  for i, section in enumerate(structure, 1):
158
176
  toc.append(f"{i}. **{section['name']}**")
159
177
  for j, subsection in enumerate(section["subsections"], 1):
160
- toc.append(f" {i}.{j} {subsection['name']} | _{subsection['purpose']}_")
178
+ toc.append(
179
+ f" {i}.{j} {subsection['name']} | _{subsection['purpose']}_"
180
+ )
161
181
 
162
182
  # Combine TOC and sections
163
183
  report_parts = ["\n".join(toc), ""]
164
-
184
+
165
185
  # Add a summary of the research
166
186
  report_parts.append("# Research Summary")
167
- report_parts.append(f"This report was researched using an advanced search system.")
168
- report_parts.append(f"Research included targeted searches for each section and subsection.")
187
+ report_parts.append(
188
+ "This report was researched using an advanced search system."
189
+ )
190
+ report_parts.append(
191
+ "Research included targeted searches for each section and subsection."
192
+ )
169
193
  report_parts.append("\n---\n")
170
194
 
171
195
  # Add each section's content
@@ -173,30 +197,34 @@ class IntegratedReportGenerator:
173
197
  if section["name"] in sections:
174
198
  report_parts.append(sections[section["name"]])
175
199
  report_parts.append("")
176
-
200
+
177
201
  # Format links from search system
178
- formatted_all_links = utilties.search_utilities.format_links(links=self.search_system.all_links_of_system)
179
-
202
+ # Get utilities module dynamically to avoid circular imports
203
+ utilities = importlib.import_module("local_deep_research.utilities")
204
+ formatted_all_links = utilities.search_utilities.format_links_to_markdown(
205
+ all_links=self.search_system.all_links_of_system
206
+ )
207
+
180
208
  # Create final report with all parts
181
209
  final_report_content = "\n\n".join(report_parts)
182
- final_report_content = final_report_content + "\n\n## Sources\n\n" + formatted_all_links
183
-
210
+ final_report_content = (
211
+ final_report_content + "\n\n## Sources\n\n" + formatted_all_links
212
+ )
213
+
184
214
  # Create metadata dictionary
185
215
  from datetime import datetime
216
+
186
217
  metadata = {
187
218
  "generated_at": datetime.utcnow().isoformat(),
188
219
  "initial_sources": len(self.search_system.all_links_of_system),
189
220
  "sections_researched": len(structure),
190
221
  "searches_per_section": self.searches_per_section,
191
- "query": query
222
+ "query": query,
192
223
  }
193
-
224
+
194
225
  # Return both content and metadata
195
- return {
196
- "content": final_report_content,
197
- "metadata": metadata
198
- }
226
+ return {"content": final_report_content, "metadata": metadata}
199
227
 
200
228
  def _generate_error_report(self, query: str, error_msg: str) -> str:
201
229
  error_report = f"=== ERROR REPORT ===\nQuery: {query}\nError: {error_msg}"
202
- return error_report
230
+ return error_report
@@ -1,306 +1,170 @@
1
- from typing import Dict, List, Optional, Callable
2
- from datetime import datetime
3
- from .utilties.search_utilities import remove_think_tags, format_findings_to_text, format_links
4
- import os
5
- from .utilties.enums import KnowledgeAccumulationApproach
6
- from .config import settings, get_llm, get_search
7
- from .citation_handler import CitationHandler
8
- from datetime import datetime
9
- from .utilties.search_utilities import extract_links_from_search_results
1
+ # src/local_deep_research/search_system/search_system.py
10
2
  import logging
11
- logger = logging.getLogger(__name__)
12
- class AdvancedSearchSystem:
13
- def __init__(self):
3
+ from typing import Callable, Dict
4
+
5
+ from langchain_core.language_models import BaseChatModel
6
+
7
+ from .advanced_search_system.findings.repository import FindingsRepository
8
+ from .advanced_search_system.questions.standard_question import (
9
+ StandardQuestionGenerator,
10
+ )
11
+ from .advanced_search_system.strategies.iterdrag_strategy import IterDRAGStrategy
12
+ from .advanced_search_system.strategies.parallel_search_strategy import (
13
+ ParallelSearchStrategy,
14
+ )
15
+ from .advanced_search_system.strategies.rapid_search_strategy import RapidSearchStrategy
16
+ from .advanced_search_system.strategies.standard_strategy import StandardSearchStrategy
17
+ from .citation_handler import CitationHandler
18
+ from .config.config_files import settings
19
+ from .config.llm_config import get_llm
20
+ from .config.search_config import get_search
21
+ from .utilities.db_utils import get_db_setting
14
22
 
15
-
16
- # Get fresh configuration
23
+ logger = logging.getLogger(__name__)
17
24
 
18
- self.search = get_search()
19
- self.model = get_llm()
20
- self.max_iterations = settings.search.iterations
21
- self.questions_per_iteration = settings.search.questions_per_iteration
22
-
23
- self.context_limit = settings.general.knowledge_accumulation_context_limit
24
- self.questions_by_iteration = {}
25
- self.citation_handler = CitationHandler(self.model)
26
- self.progress_callback = None
27
- self.all_links_of_system = list()
28
-
29
- # Check if search is available, log warning if not
30
- if self.search is None:
31
- logger.info("WARNING: Search system initialized with no search engine! Research will not be effective.")
32
- self._update_progress("WARNING: No search engine available", None, {"error": "No search engine configured properly"})
33
25
 
34
-
26
+ class AdvancedSearchSystem:
27
+ """
28
+ Advanced search system that coordinates different search strategies.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ strategy_name: str = "parallel",
34
+ include_text_content: bool = True,
35
+ use_cross_engine_filter: bool = True,
36
+ llm: BaseChatModel | None = None,
37
+ ):
38
+ """Initialize the advanced search system.
35
39
 
36
- def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
37
- """Set a callback function to receive progress updates.
38
-
39
40
  Args:
40
- callback: Function that takes (message, progress_percent, metadata)
41
+ strategy_name: The name of the search strategy to use ("standard" or "iterdrag")
42
+ include_text_content: If False, only includes metadata and links in search results
43
+ use_cross_engine_filter: Whether to filter results across search
44
+ engines.
45
+ llm: LLM to use. If not provided, it will use the default one.
41
46
  """
42
- self.progress_callback = callback
47
+ # Get configuration
48
+ self.search = get_search()
49
+ self.model = llm
50
+ if llm is None:
51
+ self.model = get_llm()
52
+ self.max_iterations = get_db_setting(
53
+ "search.iterations", settings.search.iterations
54
+ )
55
+ self.questions_per_iteration = get_db_setting(
56
+ "search.questions_per_iteration", settings.search.questions_per_iteration
57
+ )
43
58
 
44
- def _update_progress(self, message: str, progress_percent: int = None, metadata: dict = None) -> None:
45
- """Send a progress update via the callback if available.
46
-
47
- Args:
48
- message: Description of the current progress state
49
- progress_percent: Progress percentage (0-100), if applicable
50
- metadata: Additional data about the progress state
51
- """
52
- if self.progress_callback:
53
- self.progress_callback(message, progress_percent, metadata or {})
59
+ # Log the strategy name that's being used
60
+ logger.info(
61
+ f"Initializing AdvancedSearchSystem with strategy_name='{strategy_name}'"
62
+ )
54
63
 
55
- def _get_follow_up_questions(self, current_knowledge: str, query: str) -> List[str]:
56
- now = datetime.now()
57
- current_time = now.strftime("%Y-%m-%d")
58
-
59
- self._update_progress("Generating follow-up questions...", None, {"iteration": len(self.questions_by_iteration)})
60
-
61
- if self.questions_by_iteration:
62
- prompt = f"""Critically reflect current knowledge (e.g., timeliness), what {self.questions_per_iteration} high-quality internet search questions remain unanswered to exactly answer the query?
63
- Query: {query}
64
- Today: {current_time}
65
- Past questions: {str(self.questions_by_iteration)}
66
- Knowledge: {current_knowledge}
67
- Include questions that critically reflect current knowledge.
68
- \n\n\nFormat: One question per line, e.g. \n Q: question1 \n Q: question2\n\n"""
64
+ # Initialize components
65
+ self.citation_handler = CitationHandler(self.model)
66
+ self.question_generator = StandardQuestionGenerator(self.model)
67
+ self.findings_repository = FindingsRepository(self.model)
68
+
69
+ # Initialize strategy based on name
70
+ if strategy_name.lower() == "iterdrag":
71
+ logger.info("Creating IterDRAGStrategy instance")
72
+ self.strategy = IterDRAGStrategy(model=self.model, search=self.search)
73
+ elif strategy_name.lower() == "parallel":
74
+ logger.info("Creating ParallelSearchStrategy instance")
75
+ self.strategy = ParallelSearchStrategy(
76
+ model=self.model,
77
+ search=self.search,
78
+ include_text_content=include_text_content,
79
+ use_cross_engine_filter=use_cross_engine_filter,
80
+ )
81
+ elif strategy_name.lower() == "rapid":
82
+ logger.info("Creating RapidSearchStrategy instance")
83
+ self.strategy = RapidSearchStrategy(model=self.model, search=self.search)
69
84
  else:
70
- prompt = f" You will have follow up questions. First, identify if your knowledge is outdated (high chance). Today: {current_time}. Generate {self.questions_per_iteration} high-quality internet search questions to exactly answer: {query}\n\n\nFormat: One question per line, e.g. \n Q: question1 \n Q: question2\n\n"
85
+ logger.info("Creating StandardSearchStrategy instance")
86
+ self.strategy = StandardSearchStrategy(model=self.model, search=self.search)
71
87
 
72
- response = self.model.invoke(prompt)
73
- questions = [
74
- q.replace("Q:", "").strip()
75
- for q in remove_think_tags(response.content).split("\n")
76
- if q.strip().startswith("Q:")
77
- ][: self.questions_per_iteration]
78
-
79
- self._update_progress(
80
- f"Generated {len(questions)} follow-up questions",
81
- None,
82
- {"questions": questions}
83
- )
84
-
85
- return questions
88
+ # Log the actual strategy class
89
+ logger.info(f"Created strategy of type: {type(self.strategy).__name__}")
86
90
 
87
- def _compress_knowledge(self, current_knowledge: str, query: str, section_links) -> List[str]:
88
- self._update_progress("Compressing and summarizing knowledge...", None)
91
+ # For backward compatibility
92
+ self.questions_by_iteration = {}
93
+ self.progress_callback = None
94
+ self.all_links_of_system = list()
89
95
 
90
- now = datetime.now()
91
- current_time = now.strftime("%Y-%m-%d")
92
- formatted_links = format_links(links=section_links)
93
- if self.questions_by_iteration:
94
- prompt = f"""First provide a high-quality 1 page explanation with IEEE Referencing Style e.g. [1,2]. Never make up sources. Than provide a exact high-quality one sentence-long answer to the query.
96
+ # Configure the strategy with our attributes
97
+ if hasattr(self, "progress_callback") and self.progress_callback:
98
+ self.strategy.set_progress_callback(self.progress_callback)
95
99
 
96
- Knowledge: {current_knowledge}
97
- Query: {query}
98
- I will append following text to your output for the sources (dont repeat it):\n\n {formatted_links}"""
99
- response = self.model.invoke(prompt)
100
-
101
- self._update_progress("Knowledge compression complete", None)
102
- response = remove_think_tags(response.content)
103
- response = str(response) #+ "\n\n" + str(formatted_links)
100
+ def _progress_callback(self, message: str, progress: int, metadata: dict) -> None:
101
+ """Handle progress updates from the strategy."""
102
+ logger.info(f"Progress: {progress}% - {message}")
103
+ if hasattr(self, "progress_callback"):
104
+ self.progress_callback(message, progress, metadata)
104
105
 
105
- return response
106
+ def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
107
+ """Set a callback function to receive progress updates."""
108
+ self.progress_callback = callback
109
+ if hasattr(self, "strategy"):
110
+ self.strategy.set_progress_callback(callback)
106
111
 
107
112
  def analyze_topic(self, query: str) -> Dict:
108
- logger.info(f"Starting research on topic: {query}")
109
-
110
-
111
-
112
- findings = []
113
- current_knowledge = ""
114
- iteration = 0
115
- total_iterations = self.max_iterations
116
- section_links = list()
117
-
118
- self._update_progress("Initializing research system", 5, {
119
- "phase": "init",
120
- "iterations_planned": total_iterations
121
- })
122
-
123
- # Check if search engine is available
124
- if self.search is None:
125
- error_msg = "Error: No search engine available. Please check your configuration."
126
- self._update_progress(error_msg, 100, {
127
- "phase": "error",
128
- "error": "No search engine available",
129
- "status": "failed"
130
- })
131
- return {
132
- "findings": [],
133
- "iterations": 0,
134
- "questions": {},
135
- "formatted_findings": "Error: Unable to conduct research without a search engine.",
136
- "current_knowledge": "",
137
- "error": error_msg
138
- }
139
-
140
- while iteration < self.max_iterations:
141
- iteration_progress_base = (iteration / total_iterations) * 100
142
- self._update_progress(f"Starting iteration {iteration + 1} of {total_iterations}",
143
- int(iteration_progress_base),
144
- {"phase": "iteration_start", "iteration": iteration + 1})
145
-
146
- # Generate questions for this iteration
147
- questions = self._get_follow_up_questions(current_knowledge, query)
148
- self.questions_by_iteration[iteration] = questions
149
- logger.info(f"Generated questions: {questions}")
150
- question_count = len(questions)
151
- for q_idx, question in enumerate(questions):
152
- question_progress_base = iteration_progress_base + (((q_idx+1) / question_count) * (100/total_iterations) * 0.5)
153
-
154
- self._update_progress(f"Searching for: {question}",
155
- int(question_progress_base),
156
- {"phase": "search", "iteration": iteration + 1, "question_index": q_idx + 1})
157
-
158
- try:
159
- if self.search is None:
160
- self._update_progress(f"Search engine unavailable, skipping search for: {question}",
161
- int(question_progress_base + 2),
162
- {"phase": "search_error", "error": "No search engine available"})
163
- search_results = []
164
- else:
165
- search_results = self.search.run(question)
166
- except Exception as e:
167
- error_msg = f"Error during search: {str(e)}"
168
- logger.info(f"SEARCH ERROR: {error_msg}")
169
- self._update_progress(error_msg,
170
- int(question_progress_base + 2),
171
- {"phase": "search_error", "error": str(e)})
172
- search_results = []
173
-
174
- if search_results is None:
175
- self._update_progress(f"No search results found for question: {question}",
176
- int(question_progress_base + 2),
177
- {"phase": "search_complete", "result_count": 0})
178
- search_results = [] # Initialize to empty list instead of None
179
- continue
180
-
181
- self._update_progress(f"Found {len(search_results)} results for question: {question}",
182
- int(question_progress_base + 2),
183
- {"phase": "search_complete", "result_count": len(search_results)})
184
-
185
- logger.info(f"len search: {len(search_results)}")
186
-
187
- if len(search_results) == 0:
188
- continue
113
+ """Analyze a topic using the current strategy.
189
114
 
190
- self._update_progress(f"Analyzing results for: {question}",
191
- int(question_progress_base + 5),
192
- {"phase": "analysis"})
115
+ Args:
116
+ query: The research query to analyze
117
+ """
193
118
 
119
+ # Send progress message with LLM info
120
+ self.progress_callback(
121
+ f"Using {get_db_setting('llm.provider')} model: {get_db_setting('llm.model')}",
122
+ 1, # Low percentage to show this as an early step
123
+ {
124
+ "phase": "setup",
125
+ "llm_info": {
126
+ "name": get_db_setting("llm.model"),
127
+ "provider": get_db_setting("llm.provider"),
128
+ },
129
+ },
130
+ )
131
+ # Send progress message with search strategy info
132
+ search_tool = get_db_setting("search.tool")
133
+
134
+ self.progress_callback(
135
+ f"Using search tool: {search_tool}",
136
+ 1.5, # Between setup and processing steps
137
+ {
138
+ "phase": "setup",
139
+ "search_info": {
140
+ "tool": search_tool,
141
+ },
142
+ },
143
+ )
194
144
 
195
- try:
196
- result = self.citation_handler.analyze_followup(
197
- question, search_results, current_knowledge, nr_of_links=len(self.all_links_of_system)
198
- )
199
- links = extract_links_from_search_results(search_results)
200
- self.all_links_of_system.extend(links)
201
- section_links.extend(links)
202
- formatted_links = ""
203
- if links:
204
- formatted_links=format_links(links=links)
205
-
206
- logger.info(f"Generated questions: {formatted_links}")
207
- if result is not None:
208
- results_with_links = str(result["content"])
209
- findings.append(
210
- {
211
- "phase": f"Follow-up {iteration}.{questions.index(question) + 1}",
212
- "content": results_with_links,
213
- "question": question,
214
- "search_results": search_results,
215
- "documents": result["documents"],
216
- }
145
+ # Use the strategy to analyze the topic
146
+ result = self.strategy.analyze_topic(query)
147
+
148
+ # Update our attributes for backward compatibility
149
+ if hasattr(self.strategy, "questions_by_iteration"):
150
+ self.questions_by_iteration = self.strategy.questions_by_iteration
151
+ # Send progress message with search info
152
+ self.progress_callback(
153
+ f"Processed questions: {self.strategy.questions_by_iteration}",
154
+ 2, # Low percentage to show this as an early step
155
+ {
156
+ "phase": "setup",
157
+ "search_info": {
158
+ "questions_by_iteration": len(
159
+ self.strategy.questions_by_iteration
217
160
  )
161
+ },
162
+ },
163
+ )
164
+ if hasattr(self.strategy, "all_links_of_system"):
165
+ self.all_links_of_system = self.strategy.all_links_of_system
218
166
 
219
- if settings.general.knowledge_accumulation != str(KnowledgeAccumulationApproach.NO_KNOWLEDGE.value):
220
- current_knowledge = current_knowledge + "\n\n\n New: \n" + results_with_links
221
-
222
- if settings.general.knowledge_accumulation == str(KnowledgeAccumulationApproach.QUESTION.value):
223
- logger.info("Compressing knowledge")
224
- self._update_progress(f"Compress Knowledge for: {question}",
225
- int(question_progress_base + 0),
226
- {"phase": "analysis"})
227
- current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
228
-
229
- self._update_progress(f"Analysis complete for question: {question}",
230
- int(question_progress_base + 10),
231
- {"phase": "analysis_complete"})
232
- except Exception as e:
233
- error_msg = f"Error analyzing results: {str(e)}"
234
- logger.info(f"ANALYSIS ERROR: {error_msg}")
235
- self._update_progress(error_msg,
236
- int(question_progress_base + 10),
237
- {"phase": "analysis_error", "error": str(e)})
238
- iteration += 1
239
-
240
- self._update_progress(f"Compressing knowledge after iteration {iteration}",
241
- int((iteration / total_iterations) * 100 - 5),
242
- {"phase": "knowledge_compression"})
243
- logger.info(str(iteration))
244
- logger.info(settings.general.knowledge_accumulation)
245
- logger.info(str(KnowledgeAccumulationApproach.ITERATION.value))
246
- if settings.general.knowledge_accumulation == KnowledgeAccumulationApproach.ITERATION.value:
247
- try:
248
- logger.info("ITERATION - Compressing Knowledge")
249
- current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
250
- logger.info("FINISHED ITERATION - Compressing Knowledge")
251
- except Exception as e:
252
- error_msg = f"Error compressing knowledge: {str(e)}"
253
- logger.info(f"COMPRESSION ERROR: {error_msg}")
254
- self._update_progress(error_msg,
255
- int((iteration / total_iterations) * 100 - 3),
256
- {"phase": "compression_error", "error": str(e)})
257
-
258
-
259
-
260
- self._update_progress(f"Iteration {iteration} complete",
261
- int((iteration / total_iterations) * 100),
262
- {"phase": "iteration_complete", "iteration": iteration})
263
-
264
- try:
265
- formatted_findings = self._save_findings(findings, current_knowledge, query)
266
- except Exception as e:
267
- error_msg = f"Error saving findings: {str(e)}"
268
- logger.info(f"SAVE ERROR: {error_msg}")
269
- self._update_progress(error_msg,
270
- int((iteration / total_iterations) * 100),
271
- {"phase": "save_error", "error": str(e)})
272
- formatted_findings = "Error: Could not format findings due to an error."
273
-
274
- self._update_progress("Research complete", 95, {"phase": "complete"})
275
-
276
- return {
277
- "findings": findings,
278
- "iterations": iteration,
279
- "questions": self.questions_by_iteration,
280
- "formatted_findings": formatted_findings,
281
- "current_knowledge": current_knowledge
282
- }
283
-
284
- def _save_findings(self, findings: List[Dict], current_knowledge: str, query: str):
285
- logger.info("Saving findings ...")
286
- self._update_progress("Saving research findings...", None)
287
-
288
- formatted_findings = format_findings_to_text(
289
- findings, current_knowledge, self.questions_by_iteration
290
- )
291
- safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[
292
- :50
293
- ]
294
- safe_query = safe_query.replace(" ", "_").lower()
295
- import local_deep_research.config as conf
296
- output_dir = f"{conf.get_config_dir()}/research_outputs"
297
- if not os.path.exists(output_dir):
298
- os.makedirs(output_dir)
299
-
300
- filename = os.path.join(output_dir, f"formatted_output_{safe_query}.txt")
167
+ # Include the search system instance for access to citations
168
+ result["search_system"] = self
301
169
 
302
- with open(filename, "w", encoding="utf-8") as text_file:
303
- text_file.write(formatted_findings)
304
- logger.info("Saved findings")
305
- self._update_progress("Research findings saved", None, {"filename": filename})
306
- return formatted_findings
170
+ return result