local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,537 @@
1
+ """
2
+ Focused Iteration Strategy - **PROVEN HIGH-PERFORMANCE STRATEGY FOR SIMPLEQA**
3
+
4
+ **PERFORMANCE RECORD:**
5
+ - SimpleQA Accuracy: 96.51% (CONFIRMED HIGH PERFORMER)
6
+ - Optimal Configuration: 8 iterations, 5 questions/iteration, GPT-4.1 Mini
7
+ - Status: PRESERVE THIS STRATEGY - Core SimpleQA implementation
8
+
9
+ This strategy achieves excellent SimpleQA performance by:
10
+ 1. Using simple, direct search execution (like source-based)
11
+ 2. Progressive entity-focused exploration
12
+ 3. No early filtering or complex constraint checking
13
+ 4. Trusting the LLM for final synthesis
14
+
15
+ IMPORTANT: This strategy works exceptionally well for SimpleQA. Any modifications
16
+ should preserve the core approach that achieves 96.51% accuracy.
17
+
18
+ **BrowseComp Enhancement:** Also includes BrowseComp-specific optimizations
19
+ when use_browsecomp_optimization=True, but SimpleQA performance is the priority.
20
+ """
21
+
22
+ import concurrent.futures
23
+ import logging
24
+ from typing import Dict, List
25
+
26
+ from ...citation_handler import CitationHandler
27
+ from ...config.llm_config import get_llm
28
+ from ...config.search_config import get_search
29
+ from ..candidate_exploration import ProgressiveExplorer
30
+ from ..findings.repository import FindingsRepository
31
+ from ..questions import BrowseCompQuestionGenerator
32
+ from .base_strategy import BaseSearchStrategy
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class FocusedIterationStrategy(BaseSearchStrategy):
38
+ """
39
+ A hybrid strategy that combines the simplicity of source-based search
40
+ with BrowseComp-optimized progressive exploration.
41
+
42
+ Key principles:
43
+ 1. Start broad, then narrow progressively
44
+ 2. Extract and systematically search entities
45
+ 3. Keep all results without filtering
46
+ 4. Trust LLM for final constraint matching
47
+ 5. Use more iterations for thorough exploration
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ model=None,
53
+ search=None,
54
+ citation_handler=None,
55
+ all_links_of_system=None,
56
+ max_iterations: int = 8, # OPTIMAL FOR SIMPLEQA: 96.51% accuracy achieved
57
+ questions_per_iteration: int = 5, # OPTIMAL FOR SIMPLEQA: proven config
58
+ use_browsecomp_optimization: bool = True, # Can be False for pure SimpleQA
59
+ ):
60
+ """Initialize with components optimized for focused iteration."""
61
+ super().__init__(all_links_of_system)
62
+ self.search = search or get_search()
63
+ self.model = model or get_llm()
64
+ self.progress_callback = None
65
+
66
+ # Configuration
67
+ self.max_iterations = max_iterations
68
+ self.questions_per_iteration = questions_per_iteration
69
+ self.use_browsecomp_optimization = use_browsecomp_optimization
70
+
71
+ # Initialize specialized components
72
+ if use_browsecomp_optimization:
73
+ self.question_generator = BrowseCompQuestionGenerator(self.model)
74
+ self.explorer = ProgressiveExplorer(self.search, self.model)
75
+ else:
76
+ # Fall back to standard components
77
+ from ..questions import StandardQuestionGenerator
78
+
79
+ self.question_generator = StandardQuestionGenerator(self.model)
80
+ self.explorer = None
81
+
82
+ # Use forced answer handler for BrowseComp optimization
83
+ handler_type = (
84
+ "forced_answer" if use_browsecomp_optimization else "standard"
85
+ )
86
+ self.citation_handler = citation_handler or CitationHandler(
87
+ self.model, handler_type=handler_type
88
+ )
89
+ self.findings_repository = FindingsRepository(self.model)
90
+
91
+ # Track all search results
92
+ self.all_search_results = []
93
+ self.questions_by_iteration = {}
94
+
95
+ def analyze_topic(self, query: str) -> Dict:
96
+ """
97
+ Analyze topic using focused iteration approach.
98
+
99
+ Combines simplicity of source-based with progressive BrowseComp optimization.
100
+ """
101
+ logger.info(f"Starting focused iteration search: {query}")
102
+
103
+ self._update_progress(
104
+ "Initializing focused iteration search",
105
+ 5,
106
+ {
107
+ "phase": "init",
108
+ "strategy": "focused_iteration",
109
+ "max_iterations": self.max_iterations,
110
+ "browsecomp_optimized": self.use_browsecomp_optimization,
111
+ },
112
+ )
113
+
114
+ # Validate search engine
115
+ if not self._validate_search_engine():
116
+ return self._create_error_response("No search engine available")
117
+
118
+ findings = []
119
+ extracted_entities = {}
120
+
121
+ try:
122
+ # Main iteration loop
123
+ for iteration in range(1, self.max_iterations + 1):
124
+ iteration_progress = 10 + (iteration - 1) * (
125
+ 80 / self.max_iterations
126
+ )
127
+
128
+ self._update_progress(
129
+ f"Iteration {iteration}/{self.max_iterations}",
130
+ iteration_progress,
131
+ {"phase": f"iteration_{iteration}", "iteration": iteration},
132
+ )
133
+
134
+ # Generate questions for this iteration
135
+ if self.use_browsecomp_optimization:
136
+ # Use BrowseComp-aware question generation
137
+ questions = self.question_generator.generate_questions(
138
+ current_knowledge=self._get_current_knowledge_summary(),
139
+ query=query,
140
+ questions_per_iteration=self.questions_per_iteration,
141
+ questions_by_iteration=self.questions_by_iteration,
142
+ iteration=iteration,
143
+ )
144
+
145
+ # Extract entities on first iteration
146
+ if iteration == 1 and hasattr(
147
+ self.question_generator, "extracted_entities"
148
+ ):
149
+ extracted_entities = (
150
+ self.question_generator.extracted_entities
151
+ )
152
+ else:
153
+ # Standard question generation
154
+ questions = self.question_generator.generate_questions(
155
+ current_knowledge=self._get_current_knowledge_summary(),
156
+ query=query,
157
+ questions_per_iteration=self.questions_per_iteration,
158
+ questions_by_iteration=self.questions_by_iteration,
159
+ )
160
+
161
+ # Always include original query in first iteration
162
+ if iteration == 1 and query not in questions:
163
+ questions = [query] + questions
164
+
165
+ self.questions_by_iteration[iteration] = questions
166
+ logger.info(f"Iteration {iteration} questions: {questions}")
167
+
168
+ # Report starting searches for this iteration
169
+ self._update_progress(
170
+ f"Executing {len(questions)} searches in iteration {iteration}",
171
+ iteration_progress - (80 / self.max_iterations / 4),
172
+ {
173
+ "phase": f"iteration_{iteration}_searching",
174
+ "queries": questions[:3], # Show first 3 queries
175
+ "total_queries": len(questions),
176
+ },
177
+ )
178
+
179
+ # Execute searches
180
+ if self.explorer and self.use_browsecomp_optimization:
181
+ # Use progressive explorer for better tracking
182
+ iteration_results, search_progress = self.explorer.explore(
183
+ queries=questions,
184
+ max_workers=len(questions),
185
+ extracted_entities=extracted_entities,
186
+ )
187
+
188
+ # Report detailed search progress
189
+ # Convert sets to lists for JSON serialization
190
+ serializable_entity_coverage = {
191
+ k: list(v)
192
+ for k, v in list(
193
+ search_progress.entity_coverage.items()
194
+ )[:3]
195
+ }
196
+
197
+ self._update_progress(
198
+ f"Found {len(search_progress.found_candidates)} candidates, covered {sum(len(v) for v in search_progress.entity_coverage.values())} entities",
199
+ iteration_progress,
200
+ {
201
+ "phase": f"iteration_{iteration}_results",
202
+ "candidates_found": len(
203
+ search_progress.found_candidates
204
+ ),
205
+ "entities_covered": sum(
206
+ len(v)
207
+ for v in search_progress.entity_coverage.values()
208
+ ),
209
+ "entity_coverage": serializable_entity_coverage, # JSON-serializable version
210
+ },
211
+ )
212
+
213
+ # Check if we should generate verification searches
214
+ if iteration > 3 and search_progress.found_candidates:
215
+ verification_searches = (
216
+ self.explorer.suggest_next_searches(
217
+ extracted_entities, max_suggestions=2
218
+ )
219
+ )
220
+ if verification_searches:
221
+ logger.info(
222
+ f"Adding verification searches: {verification_searches}"
223
+ )
224
+ self._update_progress(
225
+ f"Running {len(verification_searches)} verification searches",
226
+ iteration_progress
227
+ + (80 / self.max_iterations / 8),
228
+ {
229
+ "phase": f"iteration_{iteration}_verification",
230
+ "verification_queries": verification_searches,
231
+ },
232
+ )
233
+ questions.extend(verification_searches)
234
+ # Re-run with verification searches
235
+ verification_results, _ = self.explorer.explore(
236
+ queries=verification_searches,
237
+ max_workers=len(verification_searches),
238
+ )
239
+ iteration_results.extend(verification_results)
240
+ else:
241
+ # Simple parallel search (like source-based) with detailed reporting
242
+ iteration_results = (
243
+ self._execute_parallel_searches_with_progress(
244
+ questions, iteration
245
+ )
246
+ )
247
+
248
+ # Accumulate all results (no filtering!)
249
+ self.all_search_results.extend(iteration_results)
250
+
251
+ # Update progress
252
+ self._update_progress(
253
+ f"Completed iteration {iteration} - {len(iteration_results)} results",
254
+ iteration_progress + (80 / self.max_iterations / 2),
255
+ {
256
+ "phase": f"iteration_{iteration}_complete",
257
+ "results_count": len(iteration_results),
258
+ "total_results": len(self.all_search_results),
259
+ },
260
+ )
261
+
262
+ # Add iteration finding
263
+ finding = {
264
+ "phase": f"Iteration {iteration}",
265
+ "content": f"Searched with {len(questions)} questions, found {len(iteration_results)} results.",
266
+ "question": query,
267
+ "documents": [],
268
+ }
269
+ findings.append(finding)
270
+
271
+ # Early termination check for BrowseComp
272
+ if self._should_terminate_early(iteration):
273
+ logger.info(f"Early termination at iteration {iteration}")
274
+ break
275
+
276
+ # Final synthesis (like source-based - trust the LLM!)
277
+ self._update_progress(
278
+ "Generating final synthesis",
279
+ 90,
280
+ {"phase": "synthesis"},
281
+ )
282
+
283
+ # Use citation handler for final synthesis
284
+ final_result = self.citation_handler.analyze_followup(
285
+ query,
286
+ self.all_search_results,
287
+ previous_knowledge="",
288
+ nr_of_links=len(self.all_links_of_system),
289
+ )
290
+
291
+ synthesized_content = final_result.get(
292
+ "content", "No relevant results found."
293
+ )
294
+ documents = final_result.get("documents", [])
295
+
296
+ # Add final synthesis finding
297
+ final_finding = {
298
+ "phase": "Final synthesis",
299
+ "content": synthesized_content,
300
+ "question": query,
301
+ "search_results": self.all_search_results,
302
+ "documents": documents,
303
+ }
304
+ findings.append(final_finding)
305
+
306
+ # Add documents to repository
307
+ self.findings_repository.add_documents(documents)
308
+ self.findings_repository.set_questions_by_iteration(
309
+ self.questions_by_iteration
310
+ )
311
+
312
+ # Format findings
313
+ formatted_findings = (
314
+ self.findings_repository.format_findings_to_text(
315
+ findings, synthesized_content
316
+ )
317
+ )
318
+
319
+ self._update_progress(
320
+ "Search complete",
321
+ 100,
322
+ {"phase": "complete"},
323
+ )
324
+
325
+ # Return results
326
+ result = {
327
+ "findings": findings,
328
+ "iterations": len(self.questions_by_iteration),
329
+ "questions_by_iteration": self.questions_by_iteration,
330
+ "formatted_findings": formatted_findings,
331
+ "current_knowledge": synthesized_content,
332
+ "all_links_of_system": self.all_links_of_system,
333
+ "sources": self.all_links_of_system,
334
+ }
335
+
336
+ # Add BrowseComp-specific data if available
337
+ if self.explorer and hasattr(self.explorer, "progress"):
338
+ result["candidates"] = dict(
339
+ self.explorer.progress.found_candidates
340
+ )
341
+ result["entity_coverage"] = {
342
+ k: list(v)
343
+ for k, v in self.explorer.progress.entity_coverage.items()
344
+ }
345
+
346
+ return result
347
+
348
+ except Exception as e:
349
+ logger.error(f"Error in focused iteration search: {str(e)}")
350
+ import traceback
351
+
352
+ logger.error(traceback.format_exc())
353
+ return self._create_error_response(str(e))
354
+
355
+ def _execute_parallel_searches(self, queries: List[str]) -> List[Dict]:
356
+ """Execute searches in parallel (like source-based strategy)."""
357
+ all_results = []
358
+
359
+ def search_question(q):
360
+ try:
361
+ result = self.search.run(q)
362
+ return {"question": q, "results": result or []}
363
+ except Exception as e:
364
+ logger.error(f"Error searching '{q}': {str(e)}")
365
+ return {"question": q, "results": [], "error": str(e)}
366
+
367
+ # Run searches in parallel
368
+ with concurrent.futures.ThreadPoolExecutor(
369
+ max_workers=len(queries)
370
+ ) as executor:
371
+ futures = [executor.submit(search_question, q) for q in queries]
372
+
373
+ for future in concurrent.futures.as_completed(futures):
374
+ result_dict = future.result()
375
+ all_results.extend(result_dict.get("results", []))
376
+
377
+ return all_results
378
+
379
+ def _execute_parallel_searches_with_progress(
380
+ self, queries: List[str], iteration: int
381
+ ) -> List[Dict]:
382
+ """Execute searches in parallel with detailed progress reporting."""
383
+ all_results = []
384
+ completed_searches = 0
385
+ total_searches = len(queries)
386
+
387
+ def search_question_with_progress(q):
388
+ nonlocal completed_searches
389
+ try:
390
+ # Report starting this search
391
+ self._update_progress(
392
+ f"Searching: {q[:50]}{'...' if len(q) > 50 else ''}",
393
+ None, # Don't update overall progress for individual searches
394
+ {
395
+ "phase": f"iteration_{iteration}_individual_search",
396
+ "current_query": q,
397
+ "search_progress": f"{completed_searches + 1}/{total_searches}",
398
+ },
399
+ )
400
+
401
+ result = self.search.run(q)
402
+ completed_searches += 1
403
+
404
+ # Report completion of this search
405
+ result_count = len(result) if result else 0
406
+ self._update_progress(
407
+ f"Completed search for '{q[:30]}{'...' if len(q) > 30 else ''}' - {result_count} results",
408
+ None,
409
+ {
410
+ "phase": f"iteration_{iteration}_search_complete",
411
+ "completed_query": q,
412
+ "results_found": result_count,
413
+ "search_progress": f"{completed_searches}/{total_searches}",
414
+ },
415
+ )
416
+
417
+ return {
418
+ "question": q,
419
+ "results": result or [],
420
+ "result_count": result_count,
421
+ }
422
+ except Exception as e:
423
+ completed_searches += 1
424
+ logger.error(f"Error searching '{q}': {str(e)}")
425
+ self._update_progress(
426
+ f"Search failed for '{q[:30]}{'...' if len(q) > 30 else ''}': {str(e)[:50]}",
427
+ None,
428
+ {
429
+ "phase": f"iteration_{iteration}_search_error",
430
+ "failed_query": q,
431
+ "error": str(e)[:100],
432
+ "search_progress": f"{completed_searches}/{total_searches}",
433
+ },
434
+ )
435
+ return {
436
+ "question": q,
437
+ "results": [],
438
+ "error": str(e),
439
+ "result_count": 0,
440
+ }
441
+
442
+ # Run searches in parallel
443
+ with concurrent.futures.ThreadPoolExecutor(
444
+ max_workers=min(len(queries), 5)
445
+ ) as executor:
446
+ futures = [
447
+ executor.submit(search_question_with_progress, q)
448
+ for q in queries
449
+ ]
450
+
451
+ total_results_found = 0
452
+ for future in concurrent.futures.as_completed(futures):
453
+ result_dict = future.result()
454
+ results = result_dict.get("results", [])
455
+ all_results.extend(results)
456
+ total_results_found += result_dict.get("result_count", 0)
457
+
458
+ # Report final iteration summary
459
+ self._update_progress(
460
+ f"Iteration {iteration} complete: {total_results_found} total results from {total_searches} searches",
461
+ None,
462
+ {
463
+ "phase": f"iteration_{iteration}_summary",
464
+ "total_searches": total_searches,
465
+ "total_results": total_results_found,
466
+ "average_results": (
467
+ round(total_results_found / total_searches, 1)
468
+ if total_searches > 0
469
+ else 0
470
+ ),
471
+ },
472
+ )
473
+
474
+ return all_results
475
+
476
+ def _get_current_knowledge_summary(self) -> str:
477
+ """Get summary of current knowledge for question generation."""
478
+ if not self.all_search_results:
479
+ return ""
480
+
481
+ # Simple summary of top results
482
+ summary_parts = []
483
+ for i, result in enumerate(self.all_search_results[:10]):
484
+ title = result.get("title", "")
485
+ snippet = result.get("snippet", "")
486
+ if title or snippet:
487
+ summary_parts.append(f"{i + 1}. {title}: {snippet[:200]}...")
488
+
489
+ return "\n".join(summary_parts)
490
+
491
+ def _should_terminate_early(self, iteration: int) -> bool:
492
+ """Check if we should terminate early based on findings."""
493
+ # For BrowseComp, continue if we're making progress
494
+ if self.explorer and hasattr(self.explorer, "progress"):
495
+ progress = self.explorer.progress
496
+
497
+ # Continue if we're still finding new candidates
498
+ if iteration > 3 and len(progress.found_candidates) > 0:
499
+ # Check if top candidate has very high confidence
500
+ if progress.found_candidates:
501
+ top_confidence = max(progress.found_candidates.values())
502
+ if top_confidence > 0.9:
503
+ return True
504
+
505
+ # Continue if we haven't covered all entities
506
+ if extracted_entities := getattr(
507
+ self.question_generator, "extracted_entities", {}
508
+ ):
509
+ total_entities = sum(
510
+ len(v) for v in extracted_entities.values()
511
+ )
512
+ covered_entities = sum(
513
+ len(v) for v in progress.entity_coverage.values()
514
+ )
515
+ coverage_ratio = (
516
+ covered_entities / total_entities
517
+ if total_entities > 0
518
+ else 0
519
+ )
520
+
521
+ # Continue if coverage is low
522
+ if coverage_ratio < 0.8 and iteration < 6:
523
+ return False
524
+
525
+ # Default: continue to max iterations for thoroughness
526
+ return False
527
+
528
+ def _create_error_response(self, error_msg: str) -> Dict:
529
+ """Create standardized error response."""
530
+ return {
531
+ "findings": [],
532
+ "iterations": 0,
533
+ "questions_by_iteration": {},
534
+ "formatted_findings": f"Error: {error_msg}",
535
+ "current_knowledge": "",
536
+ "error": error_msg,
537
+ }