local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,760 @@
1
+ """
2
+ Iterative Reasoning Strategy for step-by-step search and reasoning.
3
+
4
+ This strategy maintains a persistent knowledge base and iteratively:
5
+ 1. Analyzes what we know so far
6
+ 2. Decides what to search next
7
+ 3. Performs the search
8
+ 4. Updates knowledge with findings
9
+ 5. Repeats until confident in answer
10
+ """
11
+
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ from langchain_core.language_models import BaseChatModel
17
+ from loguru import logger
18
+
19
+ from ...utilities.search_utilities import format_findings, remove_think_tags
20
+ from ..findings.repository import FindingsRepository
21
+ from .base_strategy import BaseSearchStrategy
22
+ from .source_based_strategy import SourceBasedSearchStrategy
23
+
24
+
25
+ @dataclass
26
+ class KnowledgeState:
27
+ """Current state of our knowledge about the query."""
28
+
29
+ original_query: str
30
+ key_facts: List[str] = field(default_factory=list)
31
+ uncertainties: List[str] = field(default_factory=list)
32
+ search_history: List[Dict] = field(default_factory=list)
33
+ candidate_answers: List[Dict] = field(default_factory=list)
34
+ confidence: float = 0.0
35
+ iteration: int = 0
36
+
37
+ def to_string(self) -> str:
38
+ """Convert knowledge state to readable string for LLM."""
39
+ return f"""
40
+ Original Query: {self.original_query}
41
+
42
+ What We Know:
43
+ {chr(10).join(f"- {fact}" for fact in self.key_facts) if self.key_facts else "- Nothing yet"}
44
+
45
+ What We're Uncertain About:
46
+ {chr(10).join(f"- {uncertainty}" for uncertainty in self.uncertainties) if self.uncertainties else "- Nothing specific"}
47
+
48
+ Search History ({len(self.search_history)} searches):
49
+ {chr(10).join(f"- Search {i + 1}: {s['query']}" for i, s in enumerate(self.search_history[-3:])) if self.search_history else "- No searches yet"}
50
+
51
+ Candidate Answers ({len(self.candidate_answers)} total):
52
+ {chr(10).join(f"- {c['answer']} (confidence: {c['confidence']:.0%})" for c in sorted(self.candidate_answers, key=lambda x: x["confidence"], reverse=True)) if self.candidate_answers else "- None yet"}
53
+
54
+ Current Confidence: {self.confidence:.1%}
55
+ """
56
+
57
+
58
+ class IterativeReasoningStrategy(BaseSearchStrategy):
59
+ """
60
+ A strategy that iteratively searches and reasons, maintaining persistent knowledge.
61
+
62
+ Simple loop:
63
+ 1. Assess current knowledge
64
+ 2. Decide next search
65
+ 3. Execute search
66
+ 4. Update knowledge
67
+ 5. Check if we have answer
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ model: BaseChatModel,
73
+ search: Any,
74
+ all_links_of_system: List[str],
75
+ max_iterations: int = 10,
76
+ confidence_threshold: float = 0.85,
77
+ search_iterations_per_round: int = 1,
78
+ questions_per_search: int = 15,
79
+ ):
80
+ """Initialize the iterative reasoning strategy.
81
+
82
+ Args:
83
+ model: The language model to use
84
+ search: The search engine instance
85
+ all_links_of_system: List to store all encountered links
86
+ max_iterations: Maximum reasoning iterations
87
+ confidence_threshold: Confidence needed to stop
88
+ search_iterations_per_round: Iterations per search round
89
+ questions_per_search: Questions per search
90
+ """
91
+ super().__init__(all_links_of_system)
92
+ self.model = model
93
+ self.search = search
94
+ self.max_iterations = max_iterations
95
+ self.confidence_threshold = confidence_threshold
96
+ self.search_iterations_per_round = search_iterations_per_round
97
+ self.questions_per_search = questions_per_search
98
+ self.findings_repository = FindingsRepository(model)
99
+ self.knowledge_state: Optional[KnowledgeState] = None
100
+
101
+ def analyze_topic(self, query: str) -> Dict:
102
+ """Analyze a topic using iterative reasoning.
103
+
104
+ Args:
105
+ query: The research query to analyze
106
+
107
+ Returns:
108
+ Dictionary containing analysis results
109
+ """
110
+ # Initialize
111
+ self.all_links_of_system.clear()
112
+ self.questions_by_iteration = []
113
+ self.findings = []
114
+ self.knowledge_state = KnowledgeState(original_query=query)
115
+
116
+ # Progress callback
117
+ if self.progress_callback:
118
+ self.progress_callback(
119
+ "Starting iterative reasoning - will build knowledge step by step",
120
+ 1,
121
+ {
122
+ "phase": "init",
123
+ "strategy": "iterative_reasoning",
124
+ "reasoning_approach": "step-by-step",
125
+ },
126
+ )
127
+
128
+ logger.info(f"Starting iterative reasoning for: {query}")
129
+ logger.info(
130
+ f"Max iterations: {self.max_iterations}, Confidence threshold: {self.confidence_threshold}"
131
+ )
132
+
133
+ # Add initial analysis to findings
134
+ initial_finding = {
135
+ "phase": "Initial Analysis",
136
+ "content": f"""
137
+ **Query**: {query}
138
+
139
+ **Strategy**: Iterative Reasoning
140
+ - Will build knowledge step-by-step
141
+ - Continue until {self.confidence_threshold:.0%} confident or {self.max_iterations} steps
142
+ - Each step will search, analyze findings, and update knowledge
143
+
144
+ **Starting Analysis**...
145
+ """.strip(),
146
+ "timestamp": self._get_timestamp(),
147
+ }
148
+ self.findings.append(initial_finding)
149
+
150
+ # Main reasoning loop
151
+ while (
152
+ self.knowledge_state.iteration < self.max_iterations
153
+ and self.knowledge_state.confidence < self.confidence_threshold
154
+ ):
155
+ self.knowledge_state.iteration += 1
156
+ logger.info(
157
+ f"Iteration {self.knowledge_state.iteration}/{self.max_iterations}, Current confidence: {self.knowledge_state.confidence:.1%}"
158
+ )
159
+
160
+ # Update progress
161
+ if self.progress_callback:
162
+ progress = (
163
+ int(
164
+ (self.knowledge_state.iteration / self.max_iterations)
165
+ * 80
166
+ )
167
+ + 10
168
+ )
169
+ facts_count = len(self.knowledge_state.key_facts)
170
+ candidates_count = len(self.knowledge_state.candidate_answers)
171
+
172
+ # Format top candidates for display
173
+ top_candidates = ""
174
+ if self.knowledge_state.candidate_answers:
175
+ sorted_candidates = sorted(
176
+ self.knowledge_state.candidate_answers,
177
+ key=lambda x: x["confidence"],
178
+ reverse=True,
179
+ )[:2]
180
+ top_candidates = ", ".join(
181
+ f"{c['answer']} ({c['confidence']:.0%})"
182
+ for c in sorted_candidates
183
+ )
184
+
185
+ self.progress_callback(
186
+ f"Step {self.knowledge_state.iteration}: {facts_count} facts, {candidates_count} candidates. Top: {top_candidates if top_candidates else 'none yet'}",
187
+ progress,
188
+ {
189
+ "phase": "reasoning",
190
+ "iteration": self.knowledge_state.iteration,
191
+ "confidence": self.knowledge_state.confidence,
192
+ "facts_found": facts_count,
193
+ "uncertainties": len(
194
+ self.knowledge_state.uncertainties
195
+ ),
196
+ "candidates_count": candidates_count,
197
+ "top_candidates": top_candidates,
198
+ },
199
+ )
200
+
201
+ # Step 1: Analyze current knowledge and decide next search
202
+ if self.progress_callback:
203
+ self.progress_callback(
204
+ f"Step {self.knowledge_state.iteration}: Deciding what to search next",
205
+ progress + 2,
206
+ {"phase": "planning", "step": "deciding_search"},
207
+ )
208
+
209
+ next_search = self._decide_next_search()
210
+
211
+ if not next_search:
212
+ logger.info("No more searches needed")
213
+ if self.progress_callback:
214
+ self.progress_callback(
215
+ "Sufficient information gathered - preparing final answer",
216
+ progress + 5,
217
+ {"phase": "synthesis", "reason": "no_more_searches"},
218
+ )
219
+ break
220
+
221
+ # Step 2: Execute the search
222
+ if self.progress_callback:
223
+ self.progress_callback(
224
+ f"Step {self.knowledge_state.iteration}: Searching: {next_search[:50]}...",
225
+ progress + 5,
226
+ {"phase": "searching", "query": next_search},
227
+ )
228
+
229
+ search_results = self._execute_search(next_search)
230
+
231
+ # Step 3: Update knowledge with findings
232
+ if self.progress_callback:
233
+ self.progress_callback(
234
+ f"Step {self.knowledge_state.iteration}: Processing search results",
235
+ progress + 8,
236
+ {"phase": "processing", "step": "updating_knowledge"},
237
+ )
238
+
239
+ # Store confidence before update
240
+ prev_confidence = self.knowledge_state.confidence
241
+ prev_facts_count = len(self.knowledge_state.key_facts)
242
+
243
+ self._update_knowledge(search_results)
244
+
245
+ # Step 4: Check if we have a confident answer
246
+ self._assess_answer()
247
+
248
+ # Check if we made progress
249
+ confidence_change = (
250
+ self.knowledge_state.confidence - prev_confidence
251
+ )
252
+ new_facts = len(self.knowledge_state.key_facts) - prev_facts_count
253
+
254
+ if confidence_change < 0.01 and new_facts == 0:
255
+ # No significant progress - add a finding about this
256
+ self.findings.append(
257
+ {
258
+ "phase": f"Low Progress Alert (Step {self.knowledge_state.iteration})",
259
+ "content": f"Search '{next_search[:50]}...' yielded limited new information. May need to adjust search approach.",
260
+ "timestamp": self._get_timestamp(),
261
+ }
262
+ )
263
+
264
+ # Add a flag to indicate we need a different approach
265
+ if not hasattr(self.knowledge_state, "low_progress_count"):
266
+ self.knowledge_state.low_progress_count = 0
267
+ self.knowledge_state.low_progress_count += 1
268
+
269
+ if self.progress_callback:
270
+ candidates_count = len(self.knowledge_state.candidate_answers)
271
+ self.progress_callback(
272
+ f"Step {self.knowledge_state.iteration} complete: {len(self.knowledge_state.key_facts)} facts, {candidates_count} candidates, {self.knowledge_state.confidence:.0%} confident",
273
+ progress + 10,
274
+ {
275
+ "phase": "step_complete",
276
+ "iteration": self.knowledge_state.iteration,
277
+ "facts": len(self.knowledge_state.key_facts),
278
+ "candidates": candidates_count,
279
+ "confidence": self.knowledge_state.confidence,
280
+ },
281
+ )
282
+
283
+ # Create detailed iteration findings
284
+ iteration_summary = f"""
285
+ **Search Query**: {next_search}
286
+
287
+ **New Facts Discovered**:
288
+ {chr(10).join(f"- {fact}" for fact in self.knowledge_state.key_facts[-3:]) if self.knowledge_state.key_facts else "- No new facts in this iteration"}
289
+
290
+ **Current Candidates** ({len(self.knowledge_state.candidate_answers)} total):
291
+ {chr(10).join(f"- {c['answer']} (confidence: {c['confidence']:.0%})" for c in sorted(self.knowledge_state.candidate_answers, key=lambda x: x["confidence"], reverse=True)[:5]) if self.knowledge_state.candidate_answers else "- No candidates yet"}
292
+
293
+ **Remaining Questions**:
294
+ {chr(10).join(f"- {u}" for u in self.knowledge_state.uncertainties[:3]) if self.knowledge_state.uncertainties else "- No specific uncertainties"}
295
+
296
+ **Progress**: {self.knowledge_state.confidence:.0%} confident after {self.knowledge_state.iteration} steps
297
+ """
298
+
299
+ # Add iteration summary to findings
300
+ self.findings.append(
301
+ {
302
+ "phase": f"Iteration {self.knowledge_state.iteration}",
303
+ "content": iteration_summary.strip(),
304
+ "search_query": next_search,
305
+ "key_findings": self.knowledge_state.key_facts[
306
+ -3:
307
+ ], # Last 3 facts
308
+ "confidence": self.knowledge_state.confidence,
309
+ "timestamp": self._get_timestamp(),
310
+ }
311
+ )
312
+
313
+ # Final synthesis
314
+ if self.progress_callback:
315
+ self.progress_callback(
316
+ f"Creating final answer based on {len(self.knowledge_state.key_facts)} facts discovered",
317
+ 90,
318
+ {
319
+ "phase": "final_synthesis",
320
+ "facts_count": len(self.knowledge_state.key_facts),
321
+ "total_searches": len(self.knowledge_state.search_history),
322
+ "final_confidence": self.knowledge_state.confidence,
323
+ },
324
+ )
325
+
326
+ final_result = self._synthesize_final_answer()
327
+
328
+ if self.progress_callback:
329
+ self.progress_callback(
330
+ f"Analysis complete - {self.knowledge_state.iteration} reasoning steps, {len(self.knowledge_state.key_facts)} facts found",
331
+ 100,
332
+ {
333
+ "phase": "complete",
334
+ "strategy": "iterative_reasoning",
335
+ "total_iterations": self.knowledge_state.iteration,
336
+ "facts_discovered": len(self.knowledge_state.key_facts),
337
+ "final_confidence": self.knowledge_state.confidence,
338
+ },
339
+ )
340
+
341
+ return final_result
342
+
343
+ def _decide_next_search(self) -> Optional[str]:
344
+ """Decide what to search next based on current knowledge.
345
+
346
+ Returns:
347
+ Next search query, or None if done
348
+ """
349
+ # Check for low progress
350
+ low_progress_warning = ""
351
+ if (
352
+ hasattr(self.knowledge_state, "low_progress_count")
353
+ and self.knowledge_state.low_progress_count > 1
354
+ ):
355
+ low_progress_warning = f"\nNOTE: {self.knowledge_state.low_progress_count} recent searches yielded limited new information. Try a significantly different search approach."
356
+
357
+ prompt = f"""Based on our current knowledge, decide what to search next.
358
+
359
+ {self.knowledge_state.to_string()}{low_progress_warning}
360
+
361
+ Consider:
362
+ 1. What specific information would help answer the original query?
363
+ 2. What uncertainties should we resolve?
364
+ 3. Should we verify any candidate answers?
365
+ 4. Do we need more specific or broader information?
366
+ 5. Can we combine multiple constraints into a more targeted search?
367
+ 6. Are there multiple candidates with similar confidence? If so, what searches would help distinguish between them?
368
+ 7. If recent searches haven't been productive, what completely different approach could we try?
369
+
370
+ For puzzle-like queries with specific clues, try to:
371
+ - Search for locations that match multiple criteria at once
372
+ - Use specific place names when possible
373
+ - Include relevant statistics or dates mentioned
374
+ - If multiple candidates exist, search for distinguishing features
375
+ - If searches are repetitive, try broader regional searches or different constraint combinations
376
+
377
+ If we have one clear candidate with high confidence and low uncertainty, respond with "DONE".
378
+ If multiple candidates have similar confidence, continue searching to distinguish between them.
379
+
380
+ Otherwise, provide:
381
+ NEXT_SEARCH: [specific search query that targets the constraints]
382
+ REASONING: [why this search will help]
383
+ EXPECTED_OUTCOME: [what we hope to learn]
384
+ """
385
+
386
+ response = self.model.invoke(prompt)
387
+ content = remove_think_tags(response.content)
388
+
389
+ logger.debug(f"LLM response for next search: {content[:200]}...")
390
+
391
+ if "DONE" in content:
392
+ logger.info("LLM decided no more searches needed")
393
+ return None
394
+
395
+ # Parse response
396
+ lines = content.strip().split("\n")
397
+ next_search = None
398
+ reasoning = ""
399
+
400
+ for line in lines:
401
+ if line.startswith("NEXT_SEARCH:"):
402
+ next_search = line.split(":", 1)[1].strip()
403
+ elif line.startswith("REASONING:"):
404
+ reasoning = line.split(":", 1)[1].strip()
405
+
406
+ if next_search:
407
+ logger.info(f"Next search: {next_search}")
408
+ logger.info(f"Reasoning: {reasoning}")
409
+ else:
410
+ logger.warning("Could not extract next search from LLM response")
411
+ # Try to extract any search-like query from the response
412
+ for line in lines:
413
+ if (
414
+ len(line) > 10 and "?" not in line
415
+ ): # Heuristic for search queries
416
+ next_search = line.strip()
417
+ logger.info(f"Extracted fallback search: {next_search}")
418
+ break
419
+
420
+ return next_search
421
+
422
+ def _execute_search(self, search_query: str) -> Dict:
423
+ """Execute a search using the source-based strategy.
424
+
425
+ Args:
426
+ search_query: The query to search
427
+
428
+ Returns:
429
+ Search results
430
+ """
431
+ logger.info(f"Executing search: {search_query}")
432
+
433
+ # Use source-based strategy for actual search
434
+ source_strategy = SourceBasedSearchStrategy(
435
+ model=self.model,
436
+ search=self.search,
437
+ all_links_of_system=self.all_links_of_system,
438
+ include_text_content=True,
439
+ use_cross_engine_filter=True,
440
+ use_atomic_facts=True,
441
+ )
442
+
443
+ source_strategy.max_iterations = self.search_iterations_per_round
444
+ source_strategy.questions_per_iteration = self.questions_per_search
445
+
446
+ if self.progress_callback:
447
+ # Create a wrapped callback that includes our iteration info
448
+ def wrapped_callback(message, progress, data):
449
+ data["parent_iteration"] = self.knowledge_state.iteration
450
+ data["parent_strategy"] = "iterative_reasoning"
451
+ # Make the message clearer for the GUI
452
+ if "Searching" in message:
453
+ display_message = (
454
+ f"Step {self.knowledge_state.iteration}: {message}"
455
+ )
456
+ elif "questions" in message.lower():
457
+ display_message = f"Step {self.knowledge_state.iteration}: Generating follow-up questions"
458
+ elif "findings" in message.lower():
459
+ display_message = f"Step {self.knowledge_state.iteration}: Processing findings"
460
+ else:
461
+ display_message = (
462
+ f"Step {self.knowledge_state.iteration}: {message}"
463
+ )
464
+
465
+ self.progress_callback(display_message, progress, data)
466
+
467
+ source_strategy.set_progress_callback(wrapped_callback)
468
+
469
+ results = source_strategy.analyze_topic(search_query)
470
+
471
+ # Track search history
472
+ self.knowledge_state.search_history.append(
473
+ {
474
+ "query": search_query,
475
+ "timestamp": self._get_timestamp(),
476
+ "findings_count": len(results.get("findings", [])),
477
+ "sources_count": len(results.get("sources", [])),
478
+ }
479
+ )
480
+
481
+ # Update our tracking
482
+ if "questions_by_iteration" in results:
483
+ self.questions_by_iteration.extend(
484
+ results["questions_by_iteration"]
485
+ )
486
+
487
+ return results
488
+
489
+ def _update_knowledge(self, search_results: Dict):
490
+ """Update knowledge state with new findings.
491
+
492
+ Args:
493
+ search_results: Results from the search
494
+ """
495
+ current_knowledge = search_results.get("current_knowledge", "")
496
+
497
+ if not current_knowledge:
498
+ return
499
+
500
+ prompt = f"""Update our knowledge based on new search results.
501
+
502
+ Current Knowledge State:
503
+ {self.knowledge_state.to_string()}
504
+
505
+ New Information:
506
+ {current_knowledge[:2000]} # Truncate if too long
507
+
508
+ Extract:
509
+ 1. KEY_FACTS: Specific facts that help answer the original query
510
+ 2. NEW_UNCERTAINTIES: Questions that arose from this search
511
+ 3. CANDIDATE_ANSWERS: Possible answers to the original query with confidence (0-1)
512
+ 4. RESOLVED_UNCERTAINTIES: Which previous uncertainties were resolved
513
+
514
+ Format:
515
+ KEY_FACTS:
516
+ - [fact 1]
517
+ - [fact 2]
518
+
519
+ NEW_UNCERTAINTIES:
520
+ - [uncertainty 1]
521
+
522
+ CANDIDATE_ANSWERS:
523
+ - [answer]: [confidence]
524
+
525
+ RESOLVED_UNCERTAINTIES:
526
+ - [resolved uncertainty]
527
+ """
528
+
529
+ response = self.model.invoke(prompt)
530
+ content = remove_think_tags(response.content)
531
+
532
+ # Parse response and update knowledge state
533
+ lines = content.strip().split("\n")
534
+ current_section = None
535
+
536
+ for line in lines:
537
+ line = line.strip()
538
+ if not line:
539
+ continue
540
+
541
+ if line.startswith("KEY_FACTS:"):
542
+ current_section = "facts"
543
+ elif line.startswith("NEW_UNCERTAINTIES:"):
544
+ current_section = "uncertainties"
545
+ elif line.startswith("CANDIDATE_ANSWERS:"):
546
+ current_section = "candidates"
547
+ elif line.startswith("RESOLVED_UNCERTAINTIES:"):
548
+ current_section = "resolved"
549
+ elif line.startswith("-") and current_section:
550
+ item = line[1:].strip()
551
+
552
+ if current_section == "facts":
553
+ if item not in self.knowledge_state.key_facts:
554
+ self.knowledge_state.key_facts.append(item)
555
+ elif current_section == "uncertainties":
556
+ if item not in self.knowledge_state.uncertainties:
557
+ self.knowledge_state.uncertainties.append(item)
558
+ elif current_section == "candidates" and ":" in item:
559
+ answer, confidence_str = item.split(":", 1)
560
+ answer = answer.strip()
561
+
562
+ # Skip invalid answers
563
+ if (
564
+ answer.lower() in ["none", "unknown", "n/a", ""]
565
+ or len(answer) < 3
566
+ ):
567
+ continue
568
+
569
+ try:
570
+ confidence = float(confidence_str.strip())
571
+ except ValueError:
572
+ confidence = 0.5
573
+
574
+ # Clean up answer text (remove duplicated confidence info)
575
+ if "(confidence" in answer:
576
+ answer = answer.split("(confidence")[0].strip()
577
+
578
+ # Update or add candidate
579
+ existing = False
580
+ for candidate in self.knowledge_state.candidate_answers:
581
+ if candidate["answer"].lower() == answer.lower():
582
+ candidate["confidence"] = max(
583
+ candidate["confidence"], confidence
584
+ )
585
+ existing = True
586
+ break
587
+
588
+ if not existing:
589
+ self.knowledge_state.candidate_answers.append(
590
+ {"answer": answer, "confidence": confidence}
591
+ )
592
+ elif current_section == "resolved":
593
+ # Remove from uncertainties if present
594
+ self.knowledge_state.uncertainties = [
595
+ u
596
+ for u in self.knowledge_state.uncertainties
597
+ if item.lower() not in u.lower()
598
+ ]
599
+
600
+ logger.info(
601
+ f"Knowledge updated: {len(self.knowledge_state.key_facts)} facts, "
602
+ f"{len(self.knowledge_state.uncertainties)} uncertainties"
603
+ )
604
+
605
+ def _assess_answer(self):
606
+ """Assess our confidence in the current answer."""
607
+ if not self.knowledge_state.candidate_answers:
608
+ self.knowledge_state.confidence = 0.0
609
+ return
610
+
611
+ # Sort candidates by confidence
612
+ sorted_candidates = sorted(
613
+ self.knowledge_state.candidate_answers,
614
+ key=lambda x: x["confidence"],
615
+ reverse=True,
616
+ )
617
+
618
+ best_candidate = sorted_candidates[0]
619
+ base_confidence = best_candidate["confidence"]
620
+
621
+ # Check if multiple candidates have similar confidence
622
+ confidence_spread = 0.0
623
+ if len(sorted_candidates) > 1:
624
+ second_best = sorted_candidates[1]
625
+ confidence_spread = (
626
+ best_candidate["confidence"] - second_best["confidence"]
627
+ )
628
+
629
+ # If top two candidates are very close, reduce overall confidence
630
+ if confidence_spread < 0.1: # Less than 10% difference
631
+ base_confidence *= 0.8 # Reduce confidence by 20%
632
+ logger.info(
633
+ f"Multiple candidates with similar confidence: {best_candidate['answer']} ({best_candidate['confidence']:.0%}) vs {second_best['answer']} ({second_best['confidence']:.0%})"
634
+ )
635
+
636
+ # Consider multiple factors for overall confidence
637
+ # Boost confidence if we have supporting facts
638
+ fact_boost = min(len(self.knowledge_state.key_facts) * 0.05, 0.2)
639
+
640
+ # Reduce confidence if we have many uncertainties
641
+ uncertainty_penalty = min(
642
+ len(self.knowledge_state.uncertainties) * 0.05, 0.2
643
+ )
644
+
645
+ # Boost if we've done multiple searches that confirm
646
+ search_boost = min(len(self.knowledge_state.search_history) * 0.02, 0.1)
647
+
648
+ # Penalty for too many candidates
649
+ candidate_penalty = 0.0
650
+ if len(self.knowledge_state.candidate_answers) > 3:
651
+ candidate_penalty = min(
652
+ (len(self.knowledge_state.candidate_answers) - 3) * 0.05, 0.15
653
+ )
654
+
655
+ self.knowledge_state.confidence = min(
656
+ base_confidence
657
+ + fact_boost
658
+ + search_boost
659
+ - uncertainty_penalty
660
+ - candidate_penalty,
661
+ 0.95, # Cap at 95% to leave room for synthesis
662
+ )
663
+
664
+ logger.info(
665
+ f"Current confidence: {self.knowledge_state.confidence:.1%} (best: {best_candidate['answer']} at {best_candidate['confidence']:.0%}, spread: {confidence_spread:.1%})"
666
+ )
667
+
668
+ def _synthesize_final_answer(self) -> Dict:
669
+ """Synthesize final answer from accumulated knowledge.
670
+
671
+ Returns:
672
+ Final results dictionary
673
+ """
674
+ # Get best answer (removed - not used anymore)
675
+
676
+ # Create comprehensive answer
677
+ prompt = f"""Provide a final answer to the original query based on our research.
678
+
679
+ {self.knowledge_state.to_string()}
680
+
681
+ Requirements:
682
+ 1. Directly answer the original query
683
+ 2. Reference the key facts that support the answer
684
+ 3. Acknowledge any remaining uncertainties
685
+ 4. Be concise but complete
686
+
687
+ Final Answer:"""
688
+
689
+ response = self.model.invoke(prompt)
690
+ final_answer = remove_think_tags(response.content)
691
+
692
+ # Add comprehensive final synthesis to findings
693
+ synthesis_summary = f"""
694
+ **Final Answer**:
695
+ {final_answer}
696
+
697
+ **Research Summary**:
698
+ - Completed {self.knowledge_state.iteration} reasoning steps
699
+ - Discovered {len(self.knowledge_state.key_facts)} key facts
700
+ - Evaluated {len(self.knowledge_state.candidate_answers)} candidate answers
701
+ - Final confidence: {self.knowledge_state.confidence:.0%}
702
+
703
+ **Key Facts That Led to This Answer**:
704
+ {chr(10).join(f"{i + 1}. {fact}" for i, fact in enumerate(self.knowledge_state.key_facts[:10]))}
705
+
706
+ **Search History**:
707
+ {chr(10).join(f"{i + 1}. {search['query']}" for i, search in enumerate(self.knowledge_state.search_history))}
708
+ """.strip()
709
+
710
+ self.findings.append(
711
+ {
712
+ "phase": "Final Synthesis",
713
+ "content": synthesis_summary,
714
+ "confidence": self.knowledge_state.confidence,
715
+ "iterations": self.knowledge_state.iteration,
716
+ "timestamp": self._get_timestamp(),
717
+ }
718
+ )
719
+
720
+ # Compile all questions
721
+ questions_dict = {}
722
+ for i, questions in enumerate(self.questions_by_iteration):
723
+ if isinstance(questions, list):
724
+ questions_dict[i + 1] = questions
725
+ elif isinstance(questions, dict):
726
+ questions_dict.update(questions)
727
+
728
+ # Format findings
729
+ formatted_findings = format_findings(
730
+ self.findings, final_answer, questions_dict
731
+ )
732
+
733
+ return {
734
+ "current_knowledge": final_answer,
735
+ "formatted_findings": formatted_findings,
736
+ "findings": self.findings,
737
+ "iterations": self.knowledge_state.iteration,
738
+ "questions_by_iteration": questions_dict,
739
+ "all_links_of_system": self.all_links_of_system,
740
+ "sources": self.all_links_of_system,
741
+ "knowledge_state": {
742
+ "key_facts": self.knowledge_state.key_facts,
743
+ "uncertainties": self.knowledge_state.uncertainties,
744
+ "candidate_answers": self.knowledge_state.candidate_answers,
745
+ "confidence": self.knowledge_state.confidence,
746
+ "search_history": self.knowledge_state.search_history,
747
+ },
748
+ "strategy": "iterative_reasoning",
749
+ "questions": {
750
+ "total": sum(
751
+ len(q) if isinstance(q, list) else 1
752
+ for q in questions_dict.values()
753
+ ),
754
+ "by_iteration": questions_dict,
755
+ },
756
+ }
757
+
758
+ def _get_timestamp(self) -> str:
759
+ """Get current timestamp for findings."""
760
+ return datetime.utcnow().isoformat()