local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1248 @@
1
+ """
2
+ Evidence-based search strategy for complex query resolution.
3
+
4
+ This strategy decomposes queries into constraints, finds candidates,
5
+ and systematically gathers evidence to score each candidate.
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any, Dict, List
10
+
11
+ from langchain_core.language_models import BaseChatModel
12
+ from loguru import logger
13
+
14
+ from ...utilities.search_utilities import format_findings, remove_think_tags
15
+ from ..candidates.base_candidate import Candidate
16
+ from ..constraints.base_constraint import Constraint, ConstraintType
17
+ from ..constraints.constraint_analyzer import ConstraintAnalyzer
18
+ from ..evidence.evaluator import EvidenceEvaluator
19
+ from ..findings.repository import FindingsRepository
20
+ from .base_strategy import BaseSearchStrategy
21
+ from .source_based_strategy import SourceBasedSearchStrategy
22
+
23
+
24
+ class EvidenceBasedStrategy(BaseSearchStrategy):
25
+ """
26
+ Evidence-based strategy for solving complex queries.
27
+
28
+ Key features:
29
+ 1. Decomposes queries into verifiable constraints
30
+ 2. Finds candidates that might satisfy constraints
31
+ 3. Gathers specific evidence for each candidate-constraint pair
32
+ 4. Scores candidates based on evidence quality
33
+ 5. Progressively refines the search
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ model: BaseChatModel,
39
+ search: Any,
40
+ all_links_of_system: List[str],
41
+ max_iterations: int = 20,
42
+ confidence_threshold: float = 0.85,
43
+ candidate_limit: int = 10,
44
+ evidence_threshold: float = 0.6,
45
+ max_search_iterations: int = 2, # For source-based sub-searches
46
+ questions_per_iteration: int = 3,
47
+ ):
48
+ """Initialize the evidence-based strategy."""
49
+ super().__init__(all_links_of_system)
50
+ self.model = model
51
+ self.search = search
52
+ self.max_iterations = max_iterations
53
+ self.confidence_threshold = confidence_threshold
54
+ self.candidate_limit = candidate_limit
55
+ self.evidence_threshold = evidence_threshold
56
+ self.max_search_iterations = max_search_iterations
57
+ self.questions_per_iteration = questions_per_iteration
58
+
59
+ # Enable direct search by default for performance
60
+ self.use_direct_search = True
61
+ logger.info(
62
+ f"EvidenceBasedStrategy init: use_direct_search={self.use_direct_search}"
63
+ )
64
+
65
+ # Initialize components
66
+ self.constraint_analyzer = ConstraintAnalyzer(model)
67
+ self.evidence_evaluator = EvidenceEvaluator(model)
68
+ self.findings_repository = FindingsRepository(model)
69
+
70
+ # State tracking
71
+ self.constraints: List[Constraint] = []
72
+ self.candidates: List[Candidate] = []
73
+ self.search_history: List[Dict] = []
74
+ self.iteration: int = 0
75
+
76
+ def analyze_topic(self, query: str) -> Dict:
77
+ """Analyze a topic using evidence-based approach."""
78
+ # Initialize
79
+ self.all_links_of_system.clear()
80
+ self.questions_by_iteration = []
81
+ self.findings = []
82
+ self.iteration = 0
83
+
84
+ # Step 1: Extract constraints from query
85
+ if self.progress_callback:
86
+ self.progress_callback(
87
+ "Analyzing query to extract constraints...",
88
+ 2,
89
+ {
90
+ "phase": "constraint_analysis",
91
+ "status": "starting",
92
+ "query_length": len(query),
93
+ },
94
+ )
95
+
96
+ self.constraints = self.constraint_analyzer.extract_constraints(query)
97
+
98
+ if self.progress_callback:
99
+ constraint_summary = {}
100
+ for c in self.constraints:
101
+ type_name = c.type.name
102
+ if type_name not in constraint_summary:
103
+ constraint_summary[type_name] = 0
104
+ constraint_summary[type_name] += 1
105
+
106
+ self.progress_callback(
107
+ f"Extracted {len(self.constraints)} constraints ({len([c for c in self.constraints if c.weight >= 0.9])} critical)",
108
+ 5,
109
+ {
110
+ "phase": "constraint_extraction",
111
+ "constraints_count": len(self.constraints),
112
+ "constraint_types": constraint_summary,
113
+ "high_priority": len(
114
+ [c for c in self.constraints if c.weight >= 0.9]
115
+ ),
116
+ },
117
+ )
118
+
119
+ # Add initial analysis finding
120
+ initial_finding = {
121
+ "phase": "Initial Analysis",
122
+ "content": self._format_initial_analysis(query),
123
+ "timestamp": self._get_timestamp(),
124
+ }
125
+ self.findings.append(initial_finding)
126
+
127
+ # Step 2: Find initial candidates
128
+ self._find_initial_candidates()
129
+
130
+ # Step 3: Main evidence-gathering loop
131
+ while (
132
+ self.iteration < self.max_iterations
133
+ and not self._has_sufficient_answer()
134
+ ):
135
+ self.iteration += 1
136
+
137
+ if self.progress_callback:
138
+ progress = 15 + int((self.iteration / self.max_iterations) * 70)
139
+
140
+ # Calculate detailed metrics
141
+ evidence_coverage = self._calculate_evidence_coverage()
142
+ top_score = self.candidates[0].score if self.candidates else 0
143
+
144
+ # Calculate constraint satisfaction
145
+ satisfied_constraints = 0
146
+ if self.candidates:
147
+ top_candidate = self.candidates[0]
148
+ satisfied_constraints = len(
149
+ [
150
+ c
151
+ for c in self.constraints
152
+ if c.id in top_candidate.evidence
153
+ and top_candidate.evidence[c.id].confidence
154
+ >= self.evidence_threshold
155
+ ]
156
+ )
157
+
158
+ confidence_level = (
159
+ "HIGH"
160
+ if top_score >= self.confidence_threshold
161
+ else "MEDIUM"
162
+ if top_score >= 0.6
163
+ else "LOW"
164
+ )
165
+ self.progress_callback(
166
+ f"Iteration {self.iteration}/{self.max_iterations} - {self._get_iteration_status()} [{confidence_level}]",
167
+ progress,
168
+ {
169
+ "phase": "iteration_start",
170
+ "iteration": self.iteration,
171
+ "max_iterations": self.max_iterations,
172
+ "candidates_count": len(self.candidates),
173
+ "evidence_coverage": f"{evidence_coverage:.0%}",
174
+ "top_score": f"{top_score:.0%}",
175
+ "status": self._get_iteration_status(),
176
+ "constraints_satisfied": f"{satisfied_constraints}/{len(self.constraints)}",
177
+ "search_count": len(self.search_history),
178
+ "confidence_level": confidence_level,
179
+ },
180
+ )
181
+
182
+ # Gather evidence for each candidate
183
+ self._gather_evidence_round()
184
+
185
+ # Score and prune candidates
186
+ self._score_and_prune_candidates()
187
+
188
+ # Add iteration finding
189
+ iteration_finding = {
190
+ "phase": f"Iteration {self.iteration}",
191
+ "content": self._format_iteration_summary(),
192
+ "timestamp": self._get_timestamp(),
193
+ "metadata": {
194
+ "candidates": len(self.candidates),
195
+ "evidence_collected": sum(
196
+ len(c.evidence) for c in self.candidates
197
+ ),
198
+ "top_score": self.candidates[0].score
199
+ if self.candidates
200
+ else 0,
201
+ },
202
+ }
203
+ self.findings.append(iteration_finding)
204
+
205
+ # Check if we need more candidates
206
+ if len(self.candidates) < 3:
207
+ if self.iteration <= 2:
208
+ # Early iterations - try different search strategies
209
+ self._find_initial_candidates()
210
+ elif self.iteration < self.max_iterations / 2:
211
+ # Mid iterations - look for additional candidates
212
+ self._find_additional_candidates()
213
+
214
+ # Step 4: Final verification of top candidates
215
+ self._final_verification()
216
+
217
+ # Step 5: Generate final answer
218
+ final_result = self._synthesize_final_answer(query)
219
+
220
+ if self.progress_callback:
221
+ self.progress_callback(
222
+ f"Analysis complete - evaluated {len(self.candidates)} candidates",
223
+ 100,
224
+ {
225
+ "phase": "complete",
226
+ "strategy": "evidence_based",
227
+ "total_iterations": self.iteration,
228
+ "final_candidates": len(self.candidates),
229
+ },
230
+ )
231
+
232
+ return final_result
233
+
234
+ def _find_initial_candidates(self):
235
+ """Find initial candidates based on key constraints."""
236
+ # Try multiple search strategies to find candidates
237
+ all_candidates = []
238
+
239
+ # Strategy 1: Use the most distinctive constraints
240
+ distinctive_constraints = self._get_distinctive_constraints()
241
+
242
+ if self.progress_callback:
243
+ self.progress_callback(
244
+ f"Prioritizing {len(distinctive_constraints)} key constraints from {len(self.constraints)} total",
245
+ 7,
246
+ {
247
+ "phase": "constraint_prioritization",
248
+ "total_constraints": len(self.constraints),
249
+ "selected_constraints": len(distinctive_constraints),
250
+ "key_constraint_types": [
251
+ c.type.value for c in distinctive_constraints
252
+ ],
253
+ },
254
+ )
255
+
256
+ # Try first query with distinctive constraints
257
+ search_query = self._create_candidate_search_query(
258
+ distinctive_constraints
259
+ )
260
+
261
+ if self.progress_callback:
262
+ self.progress_callback(
263
+ f"Creating search query: {search_query[:50]}...",
264
+ 8,
265
+ {
266
+ "phase": "query_generation",
267
+ "search_query": (
268
+ search_query[:100] + "..."
269
+ if len(search_query) > 100
270
+ else search_query
271
+ ),
272
+ "query_length": len(search_query),
273
+ "constraint_count": len(distinctive_constraints),
274
+ },
275
+ )
276
+
277
+ if self.progress_callback:
278
+ self.progress_callback(
279
+ f"Searching for candidates using {type(self.search).__name__ if self.search else 'Unknown'}",
280
+ 9,
281
+ {
282
+ "phase": "candidate_search",
283
+ "status": "searching",
284
+ "search_engine": (
285
+ type(self.search).__name__ if self.search else "Unknown"
286
+ ),
287
+ },
288
+ )
289
+
290
+ results = self._execute_search(search_query)
291
+ candidates = self._extract_candidates_from_results(
292
+ results, search_query
293
+ )
294
+ all_candidates.extend(candidates)
295
+
296
+ # If no candidates found, try a different approach
297
+ if not all_candidates:
298
+ if self.progress_callback:
299
+ self.progress_callback(
300
+ "Primary search found 0 candidates - trying alternative search strategies",
301
+ 10,
302
+ {"phase": "alternative_search", "status": "searching"},
303
+ )
304
+
305
+ # Strategy 2: Focus on name pattern constraints if available
306
+ pattern_constraints = [
307
+ c
308
+ for c in self.constraints
309
+ if c.type == ConstraintType.NAME_PATTERN
310
+ ]
311
+ location_constraints = [
312
+ c for c in self.constraints if c.type == ConstraintType.LOCATION
313
+ ]
314
+
315
+ if pattern_constraints or location_constraints:
316
+ combined_constraints = (
317
+ pattern_constraints + location_constraints
318
+ )[:3]
319
+ search_query = self._create_candidate_search_query(
320
+ combined_constraints
321
+ )
322
+ results = self._execute_search(search_query)
323
+ candidates = self._extract_candidates_from_results(
324
+ results, search_query
325
+ )
326
+ all_candidates.extend(candidates)
327
+
328
+ # Remove duplicates while preserving order
329
+ seen = set()
330
+ unique_candidates = []
331
+ for c in all_candidates:
332
+ if c.name.lower() not in seen:
333
+ seen.add(c.name.lower())
334
+ unique_candidates.append(c)
335
+
336
+ self.candidates.extend(unique_candidates[: self.candidate_limit])
337
+
338
+ if self.progress_callback:
339
+ status_msg = (
340
+ f"Found {len(self.candidates)} candidates"
341
+ if self.candidates
342
+ else "No candidates found - will retry in next iteration"
343
+ )
344
+ self.progress_callback(
345
+ status_msg,
346
+ 15,
347
+ {
348
+ "phase": "candidates_found",
349
+ "count": len(self.candidates),
350
+ "candidates": [
351
+ {"name": c.name, "initial_score": 0}
352
+ for c in self.candidates[:5]
353
+ ],
354
+ "status": (
355
+ "ready_for_evidence_gathering"
356
+ if self.candidates
357
+ else "no_candidates_found"
358
+ ),
359
+ },
360
+ )
361
+
362
+ logger.info(f"Found {len(self.candidates)} initial candidates")
363
+
364
+ def _gather_evidence_round(self):
365
+ """Gather evidence for candidates in this round."""
366
+ evidence_gathered = 0
367
+ total_candidates = min(5, len(self.candidates))
368
+
369
+ if self.progress_callback:
370
+ evidence_msg = f"Gathering evidence for {total_candidates} candidates x {len(self.constraints)} constraints"
371
+ if total_candidates == 0:
372
+ evidence_msg = (
373
+ "No candidates to process - skipping evidence gathering"
374
+ )
375
+ self.progress_callback(
376
+ evidence_msg,
377
+ None,
378
+ {
379
+ "phase": "evidence_round_start",
380
+ "candidates_to_process": total_candidates,
381
+ "iteration": self.iteration,
382
+ "total_evidence_needed": total_candidates
383
+ * len(self.constraints),
384
+ },
385
+ )
386
+
387
+ for i, candidate in enumerate(
388
+ self.candidates[:5]
389
+ ): # Focus on top candidates
390
+ unverified = candidate.get_unverified_constraints(self.constraints)
391
+
392
+ if not unverified:
393
+ if self.progress_callback:
394
+ self.progress_callback(
395
+ f"All constraints verified for {candidate.name}",
396
+ None,
397
+ {
398
+ "phase": "candidate_complete",
399
+ "candidate": candidate.name,
400
+ "evidence_count": len(candidate.evidence),
401
+ },
402
+ )
403
+ continue
404
+
405
+ # Pick the most important unverified constraint
406
+ constraint = max(unverified, key=lambda c: c.weight)
407
+
408
+ if self.progress_callback:
409
+ current_evidence = sum(
410
+ len(c.evidence) for c in self.candidates[:total_candidates]
411
+ )
412
+ total_possible = total_candidates * len(self.constraints)
413
+ evidence_percentage = (
414
+ (current_evidence / total_possible * 100)
415
+ if total_possible > 0
416
+ else 0
417
+ )
418
+
419
+ self.progress_callback(
420
+ f"Processing {candidate.name} [{i + 1}/{total_candidates}] - verifying {constraint.type.value}",
421
+ None,
422
+ {
423
+ "phase": "evidence_search",
424
+ "candidate": candidate.name,
425
+ "constraint": constraint.description,
426
+ "constraint_type": constraint.type.value,
427
+ "constraint_weight": constraint.weight,
428
+ "candidate_rank": i + 1,
429
+ "evidence_progress": f"{evidence_percentage:.0f}%",
430
+ "unverified_count": len(unverified),
431
+ },
432
+ )
433
+
434
+ # Create a targeted search query for evidence
435
+ evidence_query_prompt = f"""Create a search query to verify if "{candidate.name}" satisfies this constraint:
436
+ Constraint: {constraint.description}
437
+ Type: {constraint.type.value}
438
+
439
+ Create a specific search query that would find evidence about whether this candidate meets this constraint.
440
+ Return only the search query, no explanation."""
441
+
442
+ query_response = self.model.invoke(evidence_query_prompt)
443
+ search_query = remove_think_tags(query_response.content).strip()
444
+
445
+ # Fallback to simple query if needed
446
+ if not search_query or len(search_query) < 5:
447
+ search_query = (
448
+ f"{candidate.name} {constraint.to_search_terms()}"
449
+ )
450
+
451
+ results = self._execute_search(search_query)
452
+
453
+ # Extract evidence
454
+ evidence = self.evidence_evaluator.extract_evidence(
455
+ results.get("current_knowledge", ""), candidate.name, constraint
456
+ )
457
+
458
+ candidate.add_evidence(constraint.id, evidence)
459
+ evidence_gathered += 1
460
+
461
+ if self.progress_callback:
462
+ self.progress_callback(
463
+ f"Evidence found: {evidence.confidence:.0%} confidence",
464
+ None,
465
+ {
466
+ "phase": "evidence_found",
467
+ "candidate": candidate.name,
468
+ "constraint": constraint.description,
469
+ "confidence": evidence.confidence,
470
+ "evidence_type": evidence.type.value,
471
+ "evidence_claim": (
472
+ evidence.claim[:100] + "..."
473
+ if len(evidence.claim) > 100
474
+ else evidence.claim
475
+ ),
476
+ "constraint_satisfied": evidence.confidence
477
+ >= self.evidence_threshold,
478
+ },
479
+ )
480
+
481
+ logger.info(
482
+ f"Added evidence for {candidate.name} - {constraint.id}: "
483
+ f"{evidence.confidence:.2f} confidence"
484
+ )
485
+
486
+ if self.progress_callback and evidence_gathered > 0:
487
+ self.progress_callback(
488
+ f"Gathered {evidence_gathered} pieces of evidence",
489
+ None,
490
+ {
491
+ "phase": "evidence_round_complete",
492
+ "evidence_count": evidence_gathered,
493
+ },
494
+ )
495
+
496
+ def _score_and_prune_candidates(self):
497
+ """Score candidates and remove low-scoring ones."""
498
+ if self.progress_callback:
499
+ self.progress_callback(
500
+ "Scoring candidates based on evidence",
501
+ None,
502
+ {
503
+ "phase": "scoring_start",
504
+ "candidate_count": len(self.candidates),
505
+ },
506
+ )
507
+
508
+ for i, candidate in enumerate(self.candidates):
509
+ old_score = candidate.score
510
+ candidate.calculate_score(self.constraints)
511
+
512
+ if self.progress_callback and i < 5: # Report top 5
513
+ self.progress_callback(
514
+ f"Scored {candidate.name}",
515
+ None,
516
+ {
517
+ "phase": "candidate_scored",
518
+ "candidate": candidate.name,
519
+ "old_score": old_score,
520
+ "new_score": candidate.score,
521
+ "evidence_count": len(candidate.evidence),
522
+ "satisfied_constraints": len(
523
+ [
524
+ c
525
+ for c in self.constraints
526
+ if c.id in candidate.evidence
527
+ ]
528
+ ),
529
+ "score_change": candidate.score - old_score,
530
+ },
531
+ )
532
+
533
+ # Sort by score
534
+ self.candidates.sort(key=lambda c: c.score, reverse=True)
535
+
536
+ # Prune low-scoring candidates
537
+ old_count = len(self.candidates)
538
+ min_score = (
539
+ max(0.2, self.candidates[0].score * 0.3) if self.candidates else 0.2
540
+ )
541
+ self.candidates = [c for c in self.candidates if c.score >= min_score]
542
+
543
+ # Keep only top candidates
544
+ self.candidates = self.candidates[: self.candidate_limit]
545
+
546
+ if self.progress_callback:
547
+ removed = old_count - len(self.candidates)
548
+ self.progress_callback(
549
+ f"Pruned {removed} low-scoring candidates - keeping top {len(self.candidates)}",
550
+ None,
551
+ {
552
+ "phase": "pruning_complete",
553
+ "candidates_removed": removed,
554
+ "min_score_threshold": min_score,
555
+ "top_score": self.candidates[0].score
556
+ if self.candidates
557
+ else 0,
558
+ "remaining_candidates": [
559
+ {"name": c.name, "score": c.score, "rank": i + 1}
560
+ for i, c in enumerate(self.candidates[:5])
561
+ ],
562
+ },
563
+ )
564
+
565
+ def _has_sufficient_answer(self) -> bool:
566
+ """Check if we have a sufficiently confident answer."""
567
+ if not self.candidates:
568
+ return False
569
+
570
+ top_candidate = self.candidates[0]
571
+
572
+ # Check if top candidate has high score
573
+ if top_candidate.score >= self.confidence_threshold:
574
+ # Verify it has evidence for all critical constraints
575
+ critical_constraints = [
576
+ c for c in self.constraints if c.weight >= 0.8
577
+ ]
578
+ critical_evidence = [
579
+ c.id
580
+ for c in critical_constraints
581
+ if c.id in top_candidate.evidence
582
+ and top_candidate.evidence[c.id].confidence
583
+ >= self.evidence_threshold
584
+ ]
585
+
586
+ if len(critical_evidence) == len(critical_constraints):
587
+ return True
588
+
589
+ return False
590
+
591
+ def _final_verification(self):
592
+ """Perform final verification on top candidates."""
593
+ if not self.candidates:
594
+ return
595
+
596
+ if self.progress_callback:
597
+ self.progress_callback(
598
+ "Starting final verification of top candidates",
599
+ 85,
600
+ {
601
+ "phase": "final_verification_start",
602
+ "top_candidates": [c.name for c in self.candidates[:3]],
603
+ },
604
+ )
605
+
606
+ # Get top 3 candidates
607
+ top_candidates = self.candidates[:3]
608
+
609
+ for candidate in top_candidates:
610
+ # Find weak evidence or missing critical constraints
611
+ weak_evidence = candidate.get_weak_evidence(self.evidence_threshold)
612
+ critical_missing = [
613
+ c
614
+ for c in self.constraints
615
+ if c.weight >= 0.8 and c.id not in candidate.evidence
616
+ ]
617
+
618
+ # Search for better evidence
619
+ for constraint_id in weak_evidence[:2] + [
620
+ c.id for c in critical_missing[:2]
621
+ ]:
622
+ constraint = next(
623
+ (c for c in self.constraints if c.id == constraint_id), None
624
+ )
625
+ if constraint:
626
+ search_query = f"{candidate.name} {constraint.value} exact verification"
627
+ results = self._execute_search(search_query)
628
+
629
+ evidence = self.evidence_evaluator.extract_evidence(
630
+ results.get("current_knowledge", ""),
631
+ candidate.name,
632
+ constraint,
633
+ )
634
+
635
+ # Update if better evidence
636
+ if (
637
+ constraint_id not in candidate.evidence
638
+ or evidence.confidence
639
+ > candidate.evidence[constraint_id].confidence
640
+ ):
641
+ candidate.add_evidence(constraint.id, evidence)
642
+
643
+ # Final scoring
644
+ self._score_and_prune_candidates()
645
+
646
+ def _execute_search(self, search_query: str) -> Dict:
647
+ """Execute a search - optimized for direct queries."""
648
+ self.search_history.append(
649
+ {
650
+ "query": search_query,
651
+ "timestamp": self._get_timestamp(),
652
+ "iteration": self.iteration,
653
+ }
654
+ )
655
+
656
+ # For candidate searches and verification queries, use direct search
657
+ # This avoids the overhead of full source-based strategy
658
+ logger.info(
659
+ f"_execute_search called with use_direct_search={self.use_direct_search} for query: {search_query[:50]}..."
660
+ )
661
+ if self.use_direct_search: # Always use direct search when flag is True
662
+ # Direct search without question generation or iterations
663
+ search_results = self.search.run(search_query)
664
+
665
+ # Simple synthesis for knowledge extraction
666
+ if search_results:
667
+ content = "\n\n".join(
668
+ [
669
+ f"Result {i + 1}:\n{result.get('snippet', '')}"
670
+ for i, result in enumerate(search_results[:10])
671
+ ]
672
+ )
673
+ else:
674
+ content = "No results found."
675
+
676
+ return {
677
+ "current_knowledge": content,
678
+ "findings": [],
679
+ "iterations": 1,
680
+ "questions_by_iteration": [[search_query]],
681
+ "all_links_of_system": search_results or [],
682
+ }
683
+
684
+ # Use source-based strategy for complex searches if needed
685
+ source_strategy = SourceBasedSearchStrategy(
686
+ model=self.model,
687
+ search=self.search, # Pass the existing search instance
688
+ all_links_of_system=self.all_links_of_system,
689
+ include_text_content=True,
690
+ use_cross_engine_filter=True,
691
+ use_atomic_facts=True,
692
+ )
693
+
694
+ source_strategy.max_iterations = self.max_search_iterations
695
+ source_strategy.questions_per_iteration = self.questions_per_iteration
696
+
697
+ if self.progress_callback:
698
+
699
+ def wrapped_callback(message, progress, data):
700
+ # Add parent context
701
+ data["parent_iteration"] = self.iteration
702
+ data["parent_strategy"] = "evidence_based"
703
+ data["search_query"] = search_query[:100]
704
+
705
+ # Don't override parent progress percentage
706
+ parent_progress = None
707
+
708
+ # Make messages very short for frontend visibility, but keep query visible
709
+ clean_message = message
710
+ if "Generating questions" in message:
711
+ clean_message = f"Q: {search_query[:500]}"
712
+ elif "Searching" in message:
713
+ clean_message = f"S: {search_query[:500]}"
714
+ elif "Processing" in message:
715
+ clean_message = f"P: {search_query[:500]}"
716
+ elif "Completed search" in message:
717
+ # Extract the actual search query from the message
718
+ parts = message.split(":", 2)
719
+ if len(parts) > 2:
720
+ query_part = parts[2].strip()
721
+ clean_message = f"✓ {query_part}" # Show full query
722
+ else:
723
+ clean_message = f"✓ {search_query[:500]}"
724
+ elif "iteration" in message.lower():
725
+ clean_message = (
726
+ f"I{data.get('iteration', '?')}: {search_query[:500]}"
727
+ )
728
+ elif "Filtered" in message:
729
+ results = data.get("result_count", "?")
730
+ filtered = (
731
+ len(data.get("links_count", []))
732
+ if "links_count" in data
733
+ else "?"
734
+ )
735
+ clean_message = f"Filter: {results}→{filtered}"
736
+
737
+ self.progress_callback(
738
+ clean_message,
739
+ parent_progress, # Let parent manage overall progress
740
+ data,
741
+ )
742
+
743
+ source_strategy.set_progress_callback(wrapped_callback)
744
+
745
+ results = source_strategy.analyze_topic(search_query)
746
+
747
+ if "questions_by_iteration" in results:
748
+ self.questions_by_iteration.extend(
749
+ results["questions_by_iteration"]
750
+ )
751
+
752
+ return results
753
+
754
+ def _create_candidate_search_query(
755
+ self, constraints: List[Constraint]
756
+ ) -> str:
757
+ """Create a search query to find candidates."""
758
+ # Use an LLM to create effective search queries from constraints
759
+ constraint_descriptions = []
760
+ for c in constraints[:5]: # Limit to top 5 constraints
761
+ constraint_descriptions.append(
762
+ f"- {c.type.value}: {c.value} (weight: {c.weight})"
763
+ )
764
+
765
+ prompt = f"""Given these constraints for finding a specific answer, create an effective search query.
766
+
767
+ Constraints:
768
+ {chr(10).join(constraint_descriptions)}
769
+
770
+ Your task is to create a search query that finds SPECIFIC NAMED ENTITIES that satisfy these constraints.
771
+
772
+ Key principle: Focus on finding actual names of things, not general information.
773
+
774
+ Guidelines:
775
+ 1. If the constraints describe properties, search for entities that have those properties
776
+ 2. If the constraints describe patterns, search for entities whose names match those patterns
777
+ 3. Combine the most distinctive constraints to narrow the search
778
+ 4. Use query operators (AND, OR, quotes) effectively
779
+
780
+ Important: The query should be designed to surface specific names/entities in the search results, not explanations or general information about the constraints.
781
+
782
+ Return only the search query, no explanation."""
783
+
784
+ response = self.model.invoke(prompt)
785
+ search_query = remove_think_tags(response.content).strip()
786
+
787
+ # Fallback if the query is too generic
788
+ if len(search_query.split()) < 3:
789
+ # Combine the most important constraint values
790
+ key_terms = []
791
+ for c in sorted(constraints, key=lambda x: x.weight, reverse=True)[
792
+ :3
793
+ ]:
794
+ key_terms.append(f'"{c.value}"')
795
+ search_query = " AND ".join(key_terms)
796
+
797
+ return search_query
798
+
799
+ def _get_distinctive_constraints(self) -> List[Constraint]:
800
+ """Get the most distinctive constraints for initial search."""
801
+ # Prioritize constraints that are most likely to identify specific entities
802
+ priority_order = [
803
+ ConstraintType.NAME_PATTERN,
804
+ ConstraintType.LOCATION,
805
+ ConstraintType.EVENT,
806
+ ConstraintType.STATISTIC,
807
+ ConstraintType.COMPARISON,
808
+ ConstraintType.PROPERTY,
809
+ ConstraintType.TEMPORAL,
810
+ ConstraintType.EXISTENCE,
811
+ ]
812
+
813
+ # Sort constraints by priority and weight
814
+ sorted_constraints = sorted(
815
+ self.constraints,
816
+ key=lambda c: (priority_order.index(c.type), -c.weight),
817
+ )
818
+
819
+ # Take top 3 constraints, ensuring we have at least one name/location constraint if available
820
+ distinctive = sorted_constraints[:3]
821
+
822
+ # Ensure we have at least one naming constraint if available
823
+ has_naming = any(
824
+ c.type in [ConstraintType.NAME_PATTERN, ConstraintType.LOCATION]
825
+ for c in distinctive
826
+ )
827
+ if not has_naming:
828
+ for c in sorted_constraints[3:]:
829
+ if c.type in [
830
+ ConstraintType.NAME_PATTERN,
831
+ ConstraintType.LOCATION,
832
+ ]:
833
+ distinctive[-1] = c # Replace the least important
834
+ break
835
+
836
+ return distinctive
837
+
838
+ def _extract_candidates_from_results(
839
+ self, results: Dict, search_query: str
840
+ ) -> List[Candidate]:
841
+ """Extract potential candidates from search results."""
842
+ knowledge = results.get("current_knowledge", "")
843
+
844
+ # Also check the raw findings
845
+ findings = results.get("findings", [])
846
+ all_content = knowledge
847
+
848
+ for finding in findings:
849
+ if isinstance(finding, dict) and "content" in finding:
850
+ all_content += "\n" + finding["content"]
851
+
852
+ # First, understand what type of entity we're looking for
853
+ type_prompt = f"""Based on this search query, what type of entity are we looking for?
854
+ Search Query: {search_query}
855
+
856
+ Common types include: location, person, organization, product, concept, event, etc.
857
+ Answer with just the entity type, no explanation."""
858
+
859
+ type_response = self.model.invoke(type_prompt)
860
+ entity_type = remove_think_tags(type_response.content).strip().lower()
861
+
862
+ # Now extract candidates based on the entity type
863
+ prompt = f"""Extract potential {entity_type} candidates from these search results.
864
+
865
+ Search Query: {search_query}
866
+
867
+ Search Results:
868
+ {all_content[:4000]}
869
+
870
+ CRITICAL: Extract ONLY specific named entities that could answer the query.
871
+
872
+ For example:
873
+ - If looking for a character: extract "Sherlock Holmes", NOT "detective" or "fictional character"
874
+ - If looking for a place: extract "Mount Everest", NOT "mountain" or "high peak"
875
+ - If looking for a person: extract "Albert Einstein", NOT "scientist" or "physicist"
876
+
877
+ Extract ONLY:
878
+ 1. Proper nouns and specific names
879
+ 2. Individual entities that could be the answer
880
+ 3. Concrete names mentioned in the text
881
+
882
+ DO NOT extract:
883
+ - Sources or websites (e.g., "Wikipedia", "IMDb")
884
+ - General descriptions (e.g., "TV show", "fictional character")
885
+ - Categories or types (e.g., "mountain", "scientist")
886
+ - Meta-information about search results
887
+
888
+ If the text mentions a work (book, movie, TV show) when looking for a character,
889
+ extract CHARACTER NAMES from that work, not the work's title.
890
+
891
+ Format each candidate as:
892
+ CANDIDATE_1: [specific name]
893
+ CANDIDATE_2: [specific name]
894
+ ...
895
+
896
+ Limit to the 10 most relevant candidates."""
897
+
898
+ response = self.model.invoke(prompt)
899
+ content = remove_think_tags(response.content)
900
+
901
+ candidates = []
902
+ for line in content.strip().split("\n"):
903
+ if line.startswith("CANDIDATE_"):
904
+ name = line.split(":", 1)[1].strip().strip("\"'")
905
+ # Basic validation - must be non-empty and reasonable length
906
+ if name and 2 < len(name) < 100:
907
+ # Additional validation based on entity type
908
+ if entity_type in [
909
+ "location",
910
+ "place",
911
+ "person",
912
+ "organization",
913
+ ]:
914
+ # Should start with capital letter for these types
915
+ if name[0].isupper():
916
+ candidate = Candidate(name=name)
917
+ candidates.append(candidate)
918
+ else:
919
+ # Other types might not need capital letters
920
+ candidate = Candidate(name=name)
921
+ candidates.append(candidate)
922
+
923
+ return candidates[: self.candidate_limit]
924
+
925
+ def _find_additional_candidates(self):
926
+ """Find additional candidates if we don't have enough."""
927
+ # Use different constraint combinations
928
+ unused_constraints = [
929
+ c
930
+ for c in self.constraints
931
+ if not any(c.id in cand.evidence for cand in self.candidates)
932
+ ]
933
+
934
+ if unused_constraints:
935
+ search_query = self._create_candidate_search_query(
936
+ unused_constraints[:3]
937
+ )
938
+ results = self._execute_search(search_query)
939
+ new_candidates = self._extract_candidates_from_results(
940
+ results, search_query
941
+ )
942
+
943
+ # Add only truly new candidates
944
+ existing_names = {c.name.lower() for c in self.candidates}
945
+ for candidate in new_candidates:
946
+ if candidate.name.lower() not in existing_names:
947
+ self.candidates.append(candidate)
948
+ existing_names.add(candidate.name.lower())
949
+
950
+ def _format_initial_analysis(self, query: str) -> str:
951
+ """Format initial analysis summary."""
952
+ # Group constraints by type
953
+ constraint_groups = {}
954
+ for c in self.constraints:
955
+ type_name = c.type.name.replace("_", " ").title()
956
+ if type_name not in constraint_groups:
957
+ constraint_groups[type_name] = []
958
+ constraint_groups[type_name].append(c)
959
+
960
+ constraints_formatted = ""
961
+ for type_name, constraints in constraint_groups.items():
962
+ constraints_formatted += f"\n**{type_name}**:\n"
963
+ for c in constraints:
964
+ constraints_formatted += (
965
+ f" • {c.description} (importance: {c.weight:.0%})\n"
966
+ )
967
+
968
+ return f"""
969
+ **Research Query**: {query}
970
+
971
+ **Strategy**: Evidence-Based Search
972
+ **Approach**: Systematically identify candidates and verify each constraint
973
+
974
+ **Identified Constraints**: {len(self.constraints)}
975
+ {constraints_formatted}
976
+
977
+ **Next Steps**:
978
+ 1. Search for candidates matching key constraints
979
+ 2. Gather evidence for each candidate-constraint pair
980
+ 3. Score candidates based on evidence quality
981
+ 4. Progressively refine until confident answer found
982
+
983
+ **Starting Research...**
984
+ """.strip()
985
+
986
+ def _format_iteration_summary(self) -> str:
987
+ """Format iteration summary."""
988
+ top_candidates = self.candidates[:3]
989
+
990
+ # Calculate overall progress
991
+ total_evidence_needed = (
992
+ len(self.candidates[:5]) * len(self.constraints)
993
+ if self.candidates
994
+ else 0
995
+ )
996
+ evidence_collected = sum(len(c.evidence) for c in self.candidates[:5])
997
+ evidence_progress = (
998
+ evidence_collected / total_evidence_needed
999
+ if total_evidence_needed > 0
1000
+ else 0
1001
+ )
1002
+
1003
+ summary = f"""
1004
+ **Iteration {self.iteration}**
1005
+
1006
+ **Top Candidates**:
1007
+ {chr(10).join(f"{i + 1}. {c.name} (score: {c.score:.0%})" for i, c in enumerate(top_candidates)) if top_candidates else "No candidates found yet"}
1008
+
1009
+ **Evidence Collection Progress**:
1010
+ - Total candidates: {len(self.candidates)}
1011
+ - Evidence gathered: {evidence_collected}/{total_evidence_needed} ({evidence_progress:.0%})
1012
+ - Constraints verified: {len([c for c in self.constraints if any(cand.evidence.get(c.id) for cand in self.candidates[:3])])}/{len(self.constraints)}
1013
+
1014
+ **Current Search Focus**:
1015
+ """
1016
+
1017
+ if top_candidates:
1018
+ for candidate in top_candidates[:2]:
1019
+ satisfied = len(
1020
+ [
1021
+ c
1022
+ for c in self.constraints
1023
+ if c.id in candidate.evidence
1024
+ and candidate.evidence[c.id].confidence
1025
+ >= self.evidence_threshold
1026
+ ]
1027
+ )
1028
+ summary += f"\n{candidate.name} - {satisfied}/{len(self.constraints)} constraints satisfied:\n"
1029
+
1030
+ for constraint in self.constraints[:3]:
1031
+ evidence = candidate.evidence.get(constraint.id)
1032
+ if evidence:
1033
+ summary += f" ✓ {constraint.description}: {evidence.confidence:.0%} confidence\n"
1034
+ else:
1035
+ summary += (
1036
+ f" ? {constraint.description}: Searching...\n"
1037
+ )
1038
+ else:
1039
+ summary += "\nSearching for initial candidates..."
1040
+ summary += "\nFocus: " + ", ".join(
1041
+ [c.type.value for c in self.constraints[:3]]
1042
+ )
1043
+
1044
+ summary += f"\n**Overall Progress**: {self.iteration}/{self.max_iterations} iterations ({self.iteration / self.max_iterations:.0%})"
1045
+
1046
+ # Add recent searches
1047
+ if self.search_history:
1048
+ recent_searches = self.search_history[-3:]
1049
+ summary += "\n\n**Recent Searches**:\n"
1050
+ summary += chr(10).join(
1051
+ f"- {s['query'][:60]}..." for s in recent_searches
1052
+ )
1053
+
1054
+ return summary.strip()
1055
+
1056
+ def _synthesize_final_answer(self, original_query: str) -> Dict:
1057
+ """Generate final answer based on evidence."""
1058
+ if not self.candidates:
1059
+ answer = "Unable to determine"
1060
+ confidence = 0
1061
+ else:
1062
+ top_candidate = self.candidates[0]
1063
+ answer = top_candidate.name
1064
+ confidence = int(top_candidate.score * 100)
1065
+
1066
+ # Generate detailed explanation
1067
+ prompt = f"""
1068
+ Based on our evidence-based research, provide a final answer to:
1069
+ {original_query}
1070
+
1071
+ Top Answer: {answer}
1072
+ Confidence: {confidence}%
1073
+
1074
+ Evidence Summary:
1075
+ {self._format_evidence_summary()}
1076
+
1077
+ Provide a clear, concise answer with justification based on the evidence.
1078
+ Include which constraints were satisfied and which weren't.
1079
+ """
1080
+
1081
+ response = self.model.invoke(prompt)
1082
+ final_answer = remove_think_tags(response.content)
1083
+
1084
+ # Add final synthesis finding
1085
+ synthesis_finding = {
1086
+ "phase": "Final Synthesis",
1087
+ "content": self._format_final_synthesis(answer, confidence),
1088
+ "timestamp": self._get_timestamp(),
1089
+ }
1090
+ self.findings.append(synthesis_finding)
1091
+
1092
+ # Compile questions
1093
+ questions_dict = {}
1094
+ for i, questions in enumerate(self.questions_by_iteration):
1095
+ if isinstance(questions, list):
1096
+ questions_dict[i + 1] = questions
1097
+ elif isinstance(questions, dict):
1098
+ questions_dict.update(questions)
1099
+
1100
+ # Format findings
1101
+ formatted_findings = format_findings(
1102
+ self.findings, final_answer, questions_dict
1103
+ )
1104
+
1105
+ return {
1106
+ "current_knowledge": final_answer,
1107
+ "formatted_findings": formatted_findings,
1108
+ "findings": self.findings,
1109
+ "iterations": self.iteration,
1110
+ "questions_by_iteration": questions_dict,
1111
+ "all_links_of_system": self.all_links_of_system,
1112
+ "sources": self.all_links_of_system,
1113
+ "candidates": [
1114
+ {
1115
+ "name": c.name,
1116
+ "score": c.score,
1117
+ "evidence": {
1118
+ k: {
1119
+ "claim": e.claim,
1120
+ "confidence": e.confidence,
1121
+ "type": e.type.value,
1122
+ }
1123
+ for k, e in c.evidence.items()
1124
+ },
1125
+ }
1126
+ for c in self.candidates[:5]
1127
+ ],
1128
+ "constraints": [
1129
+ {
1130
+ "id": c.id,
1131
+ "description": c.description,
1132
+ "weight": c.weight,
1133
+ "type": c.type.value,
1134
+ }
1135
+ for c in self.constraints
1136
+ ],
1137
+ "strategy": "evidence_based",
1138
+ }
1139
+
1140
+ def _format_evidence_summary(self) -> str:
1141
+ """Format evidence summary for top candidates."""
1142
+ if not self.candidates:
1143
+ return "No candidates found"
1144
+
1145
+ summary = ""
1146
+ for candidate in self.candidates[:2]:
1147
+ summary += f"\n{candidate.name} (score: {candidate.score:.2f}):\n"
1148
+ for constraint in self.constraints:
1149
+ evidence = candidate.evidence.get(constraint.id)
1150
+ if evidence:
1151
+ summary += (
1152
+ f" - {constraint.description}: {evidence.claim} "
1153
+ )
1154
+ summary += f"(confidence: {evidence.confidence:.2f}, type: {evidence.type.value})\n"
1155
+ else:
1156
+ summary += f" - {constraint.description}: No evidence\n"
1157
+
1158
+ return summary
1159
+
1160
+ def _format_final_synthesis(self, answer: str, confidence: int) -> str:
1161
+ """Format final synthesis summary."""
1162
+ if not self.candidates:
1163
+ evidence_summary = "No candidates found"
1164
+ constraint_breakdown = "Unable to verify constraints"
1165
+ else:
1166
+ top_candidate = self.candidates[0]
1167
+ satisfied = len(
1168
+ [
1169
+ c
1170
+ for c in self.constraints
1171
+ if c.id in top_candidate.evidence
1172
+ and top_candidate.evidence[c.id].confidence
1173
+ >= self.evidence_threshold
1174
+ ]
1175
+ )
1176
+ evidence_summary = (
1177
+ f"Satisfied {satisfied}/{len(self.constraints)} constraints"
1178
+ )
1179
+
1180
+ # Create constraint satisfaction breakdown
1181
+ constraint_breakdown = "\n**Constraint Satisfaction**:\n"
1182
+ for constraint in self.constraints:
1183
+ evidence = top_candidate.evidence.get(constraint.id)
1184
+ if evidence and evidence.confidence >= self.evidence_threshold:
1185
+ constraint_breakdown += f"✓ {constraint.description} - {evidence.confidence:.0%} confidence\n"
1186
+ elif evidence:
1187
+ constraint_breakdown += f"⚠ {constraint.description} - {evidence.confidence:.0%} confidence (below threshold)\n"
1188
+ else:
1189
+ constraint_breakdown += (
1190
+ f"✗ {constraint.description} - No evidence found\n"
1191
+ )
1192
+
1193
+ return f"""
1194
+ **Final Answer**: {answer} ({confidence}% confidence)
1195
+
1196
+ **Research Summary**:
1197
+ - Strategy: Evidence-Based Search
1198
+ - Iterations completed: {self.iteration}/{self.max_iterations}
1199
+ - Candidates evaluated: {len(self.candidates)}
1200
+ - Evidence pieces collected: {sum(len(c.evidence) for c in self.candidates)}
1201
+ - {evidence_summary}
1202
+
1203
+ **Top Candidates**:
1204
+ {chr(10).join(f"{i + 1}. {c.name} (score: {c.score:.0%})" for i, c in enumerate(self.candidates[:3]))}
1205
+
1206
+ {constraint_breakdown}
1207
+
1208
+ **Evidence Details**:
1209
+ {self._format_evidence_summary()}
1210
+
1211
+ **Search Strategy**:
1212
+ - Total searches performed: {len(self.search_history)}
1213
+ - Constraint-focused searches: {len([s for s in self.search_history if any(c.description in s["query"] for c in self.constraints)])}
1214
+ - Candidate verification searches: {len([s for s in self.search_history if any(c.name in s["query"] for c in self.candidates)])}
1215
+
1216
+ **Recent Search Queries**:
1217
+ {chr(10).join(f"{i + 1}. {s['query'][:80]}..." for i, s in enumerate(self.search_history[-5:]))}
1218
+ """.strip()
1219
+
1220
+ def _get_timestamp(self) -> str:
1221
+ """Get current timestamp for findings."""
1222
+ return datetime.utcnow().isoformat()
1223
+
1224
+ def _calculate_evidence_coverage(self) -> float:
1225
+ """Calculate how much evidence we've collected across all candidates."""
1226
+ if not self.candidates or not self.constraints:
1227
+ return 0.0
1228
+
1229
+ total_possible = len(self.candidates[:5]) * len(self.constraints)
1230
+ total_collected = sum(len(c.evidence) for c in self.candidates[:5])
1231
+
1232
+ return total_collected / total_possible if total_possible > 0 else 0.0
1233
+
1234
+ def _get_iteration_status(self) -> str:
1235
+ """Get a human-readable status for the current iteration."""
1236
+ if not self.candidates:
1237
+ return "Searching for initial candidates"
1238
+
1239
+ top_score = self.candidates[0].score if self.candidates else 0
1240
+
1241
+ if top_score >= self.confidence_threshold:
1242
+ return "Verifying top candidate"
1243
+ elif top_score >= 0.7:
1244
+ return "Gathering final evidence"
1245
+ elif top_score >= 0.5:
1246
+ return "Refining candidate scores"
1247
+ else:
1248
+ return "Exploring candidate evidence"