local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -23,7 +23,11 @@ class RapidSearchStrategy(BaseSearchStrategy):
23
23
  """
24
24
 
25
25
  def __init__(
26
- self, search=None, model=None, citation_handler=None, all_links_of_system=None
26
+ self,
27
+ search=None,
28
+ model=None,
29
+ citation_handler=None,
30
+ all_links_of_system=None,
27
31
  ):
28
32
  """Initialize with optional dependency injection for testing."""
29
33
  super().__init__(all_links_of_system=all_links_of_system)
@@ -90,7 +94,10 @@ class RapidSearchStrategy(BaseSearchStrategy):
90
94
  self._update_progress(
91
95
  f"Found {len(initial_results)} initial results",
92
96
  15,
93
- {"phase": "search_complete", "result_count": len(initial_results)},
97
+ {
98
+ "phase": "search_complete",
99
+ "result_count": len(initial_results),
100
+ },
94
101
  )
95
102
 
96
103
  # Extract snippets and links
@@ -123,7 +130,9 @@ class RapidSearchStrategy(BaseSearchStrategy):
123
130
 
124
131
  # Step 2: Generate a few follow-up questions (optional, can be skipped for ultimate speed)
125
132
  self._update_progress(
126
- "Generating follow-up questions", 25, {"phase": "question_generation"}
133
+ "Generating follow-up questions",
134
+ 25,
135
+ {"phase": "question_generation"},
127
136
  )
128
137
 
129
138
  questions = self.question_generator.generate_questions(
@@ -161,7 +170,10 @@ class RapidSearchStrategy(BaseSearchStrategy):
161
170
  self._update_progress(
162
171
  f"Found {len(search_results)} results for question: {question}",
163
172
  int(question_progress + 5),
164
- {"phase": "search_complete", "result_count": len(search_results)},
173
+ {
174
+ "phase": "search_complete",
175
+ "result_count": len(search_results),
176
+ },
165
177
  )
166
178
 
167
179
  # Extract snippets only
@@ -195,7 +207,9 @@ class RapidSearchStrategy(BaseSearchStrategy):
195
207
 
196
208
  # Step 4: Perform a single final synthesis with all collected snippets using the citation handler
197
209
  self._update_progress(
198
- "Synthesizing all collected information", 80, {"phase": "final_synthesis"}
210
+ "Synthesizing all collected information",
211
+ 80,
212
+ {"phase": "final_synthesis"},
199
213
  )
200
214
 
201
215
  try:
@@ -203,7 +217,9 @@ class RapidSearchStrategy(BaseSearchStrategy):
203
217
  # First, we need a stub of current knowledge
204
218
 
205
219
  # Use the citation handler to analyze the results
206
- result = self.citation_handler.analyze_initial(query, all_search_results)
220
+ result = self.citation_handler.analyze_initial(
221
+ query, all_search_results
222
+ )
207
223
 
208
224
  if result:
209
225
  synthesized_content = result["content"]
@@ -224,12 +240,16 @@ class RapidSearchStrategy(BaseSearchStrategy):
224
240
  )
225
241
 
226
242
  # Format the findings with search questions and sources
227
- formatted_findings = self.findings_repository.format_findings_to_text(
228
- findings, synthesized_content
243
+ formatted_findings = (
244
+ self.findings_repository.format_findings_to_text(
245
+ findings, synthesized_content
246
+ )
229
247
  )
230
248
 
231
249
  # Also add to the repository
232
- self.findings_repository.add_documents(result.get("documents", []))
250
+ self.findings_repository.add_documents(
251
+ result.get("documents", [])
252
+ )
233
253
  else:
234
254
  # Fallback if citation handler fails
235
255
  synthesized_content = (
@@ -0,0 +1,492 @@
1
+ """
2
+ Recursive Decomposition Strategy for hierarchical question answering.
3
+
4
+ This strategy decomposes complex questions into subtasks, recursively solving
5
+ each subtask and aggregating results to answer the original question.
6
+ """
7
+
8
+ from typing import Any, Dict, List
9
+
10
+ from langchain_core.language_models import BaseChatModel
11
+ from loguru import logger
12
+
13
+ from ...utilities.search_utilities import format_findings, remove_think_tags
14
+ from ..findings.repository import FindingsRepository
15
+ from ..questions.standard_question import StandardQuestionGenerator
16
+ from .base_strategy import BaseSearchStrategy
17
+ from .source_based_strategy import SourceBasedSearchStrategy
18
+
19
+
20
+ class RecursiveDecompositionStrategy(BaseSearchStrategy):
21
+ """
22
+ A strategy that recursively decomposes complex questions into subtasks.
23
+
24
+ Each subtask is either solved directly via search or further decomposed,
25
+ creating a hierarchical problem-solving approach.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ model: BaseChatModel,
31
+ search: Any,
32
+ all_links_of_system: List[str],
33
+ max_recursion_depth: int = 5,
34
+ source_search_iterations: int = 2,
35
+ source_questions_per_iteration: int = 20,
36
+ ):
37
+ """Initialize the recursive decomposition strategy.
38
+
39
+ Args:
40
+ model: The language model to use
41
+ search: The search engine instance
42
+ all_links_of_system: List to store all encountered links
43
+ max_recursion_depth: Maximum recursion depth to prevent infinite loops
44
+ source_search_iterations: Iterations for source-based searches
45
+ source_questions_per_iteration: Questions per iteration for source-based searches
46
+ """
47
+ super().__init__(all_links_of_system)
48
+ self.model = model
49
+ self.search = search
50
+ self.max_recursion_depth = max_recursion_depth
51
+ self.source_search_iterations = source_search_iterations
52
+ self.source_questions_per_iteration = source_questions_per_iteration
53
+ self.question_generator = StandardQuestionGenerator(model)
54
+ self.findings_repository = FindingsRepository(model)
55
+ self.current_depth = 0
56
+ self.original_query = None # Store the original query for context
57
+
58
+ def analyze_topic(self, query: str, recursion_depth: int = 0) -> Dict:
59
+ """Analyze a topic using recursive decomposition.
60
+
61
+ Args:
62
+ query: The research query to analyze
63
+ recursion_depth: Current recursion depth
64
+
65
+ Returns:
66
+ Dictionary containing analysis results
67
+ """
68
+ if recursion_depth >= self.max_recursion_depth:
69
+ logger.warning(
70
+ f"Max recursion depth {self.max_recursion_depth} reached"
71
+ )
72
+ return self._use_source_based_strategy(query)
73
+
74
+ # Initialize tracking at top level
75
+ if recursion_depth == 0:
76
+ self.all_links_of_system.clear()
77
+ self.questions_by_iteration = []
78
+ self.findings = []
79
+ self.original_query = query # Store the original query
80
+
81
+ # Progress callback for UI
82
+ if self.progress_callback:
83
+ self.progress_callback(
84
+ "Starting recursive decomposition analysis",
85
+ 1,
86
+ {"phase": "init", "strategy": "recursive_decomposition"},
87
+ )
88
+
89
+ logger.info(f"Analyzing query at depth {recursion_depth}: {query}")
90
+
91
+ # Decide whether to decompose or search directly
92
+ decomposition_decision = self._decide_decomposition(query)
93
+
94
+ if decomposition_decision["should_decompose"]:
95
+ # Add decomposition decision to findings for UI visibility
96
+ self.findings.append(
97
+ {
98
+ "phase": f"Decomposition Decision (Depth {recursion_depth})",
99
+ "content": decomposition_decision["reasoning"],
100
+ "subtasks": decomposition_decision["subtasks"],
101
+ "timestamp": self._get_timestamp(),
102
+ }
103
+ )
104
+
105
+ if self.progress_callback:
106
+ self.progress_callback(
107
+ f"Decomposing query into {len(decomposition_decision['subtasks'])} subtasks",
108
+ 10 + (recursion_depth * 10),
109
+ {
110
+ "phase": "decomposition",
111
+ "depth": recursion_depth,
112
+ "subtask_count": len(
113
+ decomposition_decision["subtasks"]
114
+ ),
115
+ },
116
+ )
117
+
118
+ return self._handle_decomposition(
119
+ query, decomposition_decision, recursion_depth
120
+ )
121
+ else:
122
+ # Add search decision to findings
123
+ self.findings.append(
124
+ {
125
+ "phase": f"Direct Search Decision (Depth {recursion_depth})",
126
+ "content": f"Searching directly for: {query}",
127
+ "reasoning": decomposition_decision["reasoning"],
128
+ "timestamp": self._get_timestamp(),
129
+ }
130
+ )
131
+
132
+ return self._use_source_based_strategy(query)
133
+
134
+ def _decide_decomposition(self, query: str) -> Dict:
135
+ """Decide whether to decompose the query or search directly.
136
+
137
+ Args:
138
+ query: The query to analyze
139
+
140
+ Returns:
141
+ Dictionary with decomposition decision and subtasks if applicable
142
+ """
143
+ # Include original query context when needed
144
+ context_info = ""
145
+ if self.original_query and query != self.original_query:
146
+ context_info = f"\nOriginal research topic: {self.original_query}"
147
+
148
+ prompt = f"""Analyze this research query and decide whether to decompose it into subtasks or search directly.
149
+
150
+ Query: {query}{context_info}
151
+
152
+ Consider:
153
+ 1. Is this a compound question with multiple distinct parts?
154
+ 2. Does it require finding specific information that builds on other information?
155
+ 3. Would breaking it down lead to more focused, answerable questions?
156
+ 4. Can this be answered with a straightforward web search?
157
+
158
+ Respond in this format:
159
+ DECISION: [DECOMPOSE or SEARCH_DIRECTLY]
160
+ REASONING: [Your reasoning in 1-2 sentences]
161
+ If DECOMPOSE, provide:
162
+ SUBTASKS:
163
+ 1. [First subtask - make it specific and searchable]
164
+ 2. [Second subtask - make it specific and searchable]
165
+ ...
166
+ DEPENDENCIES: [Explain which subtasks depend on others, if any]
167
+
168
+ When creating subtasks, ensure they maintain relevance to the original topic."""
169
+
170
+ response = self.model.invoke(prompt)
171
+ content = remove_think_tags(response.content)
172
+
173
+ # Log the decision for debugging
174
+ logger.info(f"Decomposition decision for '{query}': {content[:200]}...")
175
+
176
+ # Parse the response
177
+ lines = content.strip().split("\n")
178
+ decision = "SEARCH_DIRECTLY"
179
+ subtasks = []
180
+ dependencies = []
181
+ reasoning = ""
182
+
183
+ parsing_subtasks = False
184
+ for line in lines:
185
+ if line.startswith("DECISION:"):
186
+ decision = line.split(":", 1)[1].strip()
187
+ elif line.startswith("REASONING:"):
188
+ reasoning = line.split(":", 1)[1].strip()
189
+ elif line.startswith("SUBTASKS:"):
190
+ parsing_subtasks = True
191
+ elif line.startswith("DEPENDENCIES:"):
192
+ parsing_subtasks = False
193
+ dependencies_text = line.split(":", 1)[1].strip()
194
+ if dependencies_text:
195
+ dependencies = [dependencies_text]
196
+ elif parsing_subtasks and line.strip():
197
+ # Handle numbered subtasks
198
+ if line[0].isdigit() and "." in line:
199
+ subtasks.append(line.split(".", 1)[1].strip())
200
+ elif line.strip().startswith("-"):
201
+ subtasks.append(line.strip()[1:].strip())
202
+
203
+ return {
204
+ "should_decompose": decision == "DECOMPOSE",
205
+ "reasoning": reasoning,
206
+ "subtasks": subtasks,
207
+ "dependencies": dependencies,
208
+ }
209
+
210
+ def _handle_decomposition(
211
+ self, query: str, decomposition: Dict, recursion_depth: int
212
+ ) -> Dict:
213
+ """Handle the decomposition of a query into subtasks.
214
+
215
+ Args:
216
+ query: The original query
217
+ decomposition: The decomposition decision with subtasks
218
+ recursion_depth: Current recursion depth
219
+
220
+ Returns:
221
+ Aggregated results from all subtasks
222
+ """
223
+ subtasks = decomposition["subtasks"]
224
+ subtask_results = []
225
+
226
+ # Process each subtask
227
+ for i, subtask in enumerate(subtasks):
228
+ logger.info(
229
+ f"Processing subtask {i + 1}/{len(subtasks)} at depth {recursion_depth}: {subtask}"
230
+ )
231
+
232
+ # Update progress for UI
233
+ progress = (
234
+ 20 + (recursion_depth * 10) + ((i + 1) / len(subtasks) * 40)
235
+ )
236
+ if self.progress_callback:
237
+ self.progress_callback(
238
+ f"Processing subtask {i + 1}/{len(subtasks)}: {subtask[:50]}...",
239
+ int(progress),
240
+ {
241
+ "phase": "subtask_processing",
242
+ "depth": recursion_depth,
243
+ "current_subtask": i + 1,
244
+ "total_subtasks": len(subtasks),
245
+ "subtask_text": subtask,
246
+ },
247
+ )
248
+
249
+ # Recursively analyze subtask
250
+ result = self.analyze_topic(subtask, recursion_depth + 1)
251
+
252
+ # Store subtask result with metadata
253
+ subtask_results.append(
254
+ {
255
+ "subtask": subtask,
256
+ "result": result,
257
+ "depth": recursion_depth + 1,
258
+ "index": i + 1,
259
+ }
260
+ )
261
+
262
+ # Add subtask completion to findings
263
+ self.findings.append(
264
+ {
265
+ "phase": f"Subtask {i + 1} Complete (Depth {recursion_depth})",
266
+ "content": f"Completed: {subtask}",
267
+ "result_summary": result.get("current_knowledge", "")[:500],
268
+ "timestamp": self._get_timestamp(),
269
+ }
270
+ )
271
+
272
+ # Aggregate results
273
+ aggregated_result = self._aggregate_subtask_results(
274
+ query, subtask_results, recursion_depth
275
+ )
276
+
277
+ return aggregated_result
278
+
279
+ def _aggregate_subtask_results(
280
+ self,
281
+ original_query: str,
282
+ subtask_results: List[Dict],
283
+ recursion_depth: int,
284
+ ) -> Dict:
285
+ """Aggregate results from multiple subtasks to answer the original query.
286
+
287
+ Args:
288
+ original_query: The original query
289
+ subtask_results: Results from all subtasks
290
+ recursion_depth: Current recursion depth
291
+
292
+ Returns:
293
+ Aggregated result answering the original query
294
+ """
295
+ # Update progress
296
+ if self.progress_callback:
297
+ self.progress_callback(
298
+ f"Synthesizing results from {len(subtask_results)} subtasks",
299
+ 80 + (recursion_depth * 5),
300
+ {"phase": "synthesis", "depth": recursion_depth},
301
+ )
302
+
303
+ # Prepare context from subtask results
304
+ context_parts = []
305
+ all_links = []
306
+ all_findings = []
307
+ all_questions = []
308
+
309
+ for idx, result in enumerate(subtask_results):
310
+ subtask = result["subtask"]
311
+ subtask_result = result["result"]
312
+
313
+ # Extract key information
314
+ if "current_knowledge" in subtask_result:
315
+ context_parts.append(
316
+ f"### Subtask {idx + 1}: {subtask}\n"
317
+ f"Result: {subtask_result['current_knowledge']}\n"
318
+ )
319
+
320
+ if "all_links_of_system" in subtask_result:
321
+ all_links.extend(subtask_result["all_links_of_system"])
322
+
323
+ if "findings" in subtask_result:
324
+ all_findings.extend(subtask_result["findings"])
325
+
326
+ if "questions_by_iteration" in subtask_result:
327
+ all_questions.extend(subtask_result["questions_by_iteration"])
328
+
329
+ context = "\n".join(context_parts)
330
+
331
+ # Include master context for better synthesis
332
+ master_context_info = ""
333
+ if self.original_query and original_query != self.original_query:
334
+ master_context_info = (
335
+ f"\nMaster Research Topic: {self.original_query}"
336
+ )
337
+
338
+ # Use LLM to synthesize final answer
339
+ synthesis_prompt = f"""Based on the following subtask results, provide a comprehensive answer to the original query.
340
+
341
+ Original Query: {original_query}{master_context_info}
342
+
343
+ Subtask Results:
344
+ {context}
345
+
346
+ Synthesize the information to directly answer the original query. Be specific and reference the relevant information from the subtasks. Provide a clear, well-structured answer.
347
+ """
348
+
349
+ response = self.model.invoke(synthesis_prompt)
350
+ synthesized_answer = remove_think_tags(response.content)
351
+
352
+ # Add synthesis to findings
353
+ synthesis_finding = {
354
+ "phase": f"Final Synthesis (Depth {recursion_depth})",
355
+ "content": synthesized_answer,
356
+ "question": original_query,
357
+ "timestamp": self._get_timestamp(),
358
+ }
359
+ self.findings.append(synthesis_finding)
360
+ all_findings.append(synthesis_finding)
361
+
362
+ # Format findings for UI using the standard utility
363
+ # Remove duplicate links (can't use set() since links might be dicts)
364
+ unique_links = []
365
+ seen_links = set()
366
+ for link in all_links:
367
+ # If link is a dict, use its URL/link as the unique identifier
368
+ if isinstance(link, dict):
369
+ link_id = link.get("link") or link.get("url") or str(link)
370
+ else:
371
+ link_id = str(link)
372
+
373
+ if link_id not in seen_links:
374
+ seen_links.add(link_id)
375
+ unique_links.append(link)
376
+
377
+ # Convert questions list to the expected dictionary format
378
+ questions_dict = {}
379
+ for i, questions in enumerate(all_questions):
380
+ if isinstance(questions, list):
381
+ questions_dict[i + 1] = questions
382
+ elif isinstance(questions, dict):
383
+ # If it's already a dict, just merge it
384
+ questions_dict.update(questions)
385
+ elif questions is not None:
386
+ # For any other type, make it a single-item list
387
+ questions_dict[i + 1] = [str(questions)]
388
+
389
+ formatted_findings = format_findings(
390
+ all_findings,
391
+ synthesized_answer, # Pass the synthesized answer as the second argument
392
+ questions_dict, # Pass the questions dictionary as the third argument
393
+ )
394
+
395
+ # Compile final result matching source-based strategy format
396
+ result = {
397
+ "current_knowledge": synthesized_answer,
398
+ "formatted_findings": formatted_findings,
399
+ "findings": all_findings,
400
+ "iterations": 0, # Set to 0 since we don't track iterations
401
+ "questions_by_iteration": questions_dict,
402
+ "all_links_of_system": unique_links,
403
+ "sources": unique_links,
404
+ "subtask_results": subtask_results,
405
+ "strategy": "recursive_decomposition",
406
+ "recursion_depth": recursion_depth,
407
+ "questions": {
408
+ "total": sum(
409
+ len(q) if isinstance(q, (list, dict)) else 1
410
+ for q in all_questions
411
+ if q is not None
412
+ ),
413
+ "by_iteration": questions_dict,
414
+ },
415
+ }
416
+
417
+ # Final progress update
418
+ if self.progress_callback and recursion_depth == 0:
419
+ self.progress_callback(
420
+ "Analysis complete",
421
+ 100,
422
+ {"phase": "complete", "strategy": "recursive_decomposition"},
423
+ )
424
+
425
+ return result
426
+
427
+ def _use_source_based_strategy(self, query: str) -> Dict:
428
+ """Fall back to source-based strategy for direct search.
429
+
430
+ Args:
431
+ query: The query to search
432
+
433
+ Returns:
434
+ Search results from source-based strategy
435
+ """
436
+ # If we have original query context and it's different from current query,
437
+ # create an enhanced query that includes the context
438
+ enhanced_query = query
439
+ if self.original_query and query != self.original_query:
440
+ enhanced_query = (
441
+ f"{query} (in the context of: {self.original_query})"
442
+ )
443
+ logger.info(
444
+ f"Enhanced query for source-based search: {enhanced_query}"
445
+ )
446
+ else:
447
+ logger.info(
448
+ f"Using source-based strategy for direct search: {query}"
449
+ )
450
+
451
+ # Create a source-based strategy instance with specified parameters
452
+ source_strategy = SourceBasedSearchStrategy(
453
+ model=self.model,
454
+ search=self.search,
455
+ all_links_of_system=self.all_links_of_system,
456
+ include_text_content=True,
457
+ use_cross_engine_filter=True,
458
+ use_atomic_facts=True,
459
+ )
460
+
461
+ # Configure with our parameters
462
+ source_strategy.max_iterations = self.source_search_iterations
463
+ source_strategy.questions_per_iteration = (
464
+ self.source_questions_per_iteration
465
+ )
466
+
467
+ # Copy our callback to maintain UI integration
468
+ if self.progress_callback:
469
+ source_strategy.set_progress_callback(self.progress_callback)
470
+
471
+ # Run the search with the enhanced query
472
+ results = source_strategy.analyze_topic(enhanced_query)
473
+
474
+ # Update our tracking to maintain consistency with UI
475
+ if hasattr(source_strategy, "questions_by_iteration"):
476
+ self.questions_by_iteration.extend(
477
+ source_strategy.questions_by_iteration
478
+ )
479
+
480
+ # Ensure results have the fields the UI expects
481
+ if "findings" not in results:
482
+ results["findings"] = []
483
+ if "questions_by_iteration" not in results:
484
+ results["questions_by_iteration"] = self.questions_by_iteration
485
+
486
+ return results
487
+
488
+ def _get_timestamp(self) -> str:
489
+ """Get current timestamp for findings."""
490
+ from datetime import datetime
491
+
492
+ return datetime.utcnow().isoformat()