local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,782 @@
1
+ """
2
+ Improved evidence-based search strategy for complex query resolution.
3
+
4
+ Key improvements:
5
+ 1. Multi-stage candidate discovery with adaptive query generation
6
+ 2. Dynamic constraint combination for cross-constraint searches
7
+ 3. Query adaptation based on partial results
8
+ 4. Enhanced source diversity management
9
+ """
10
+
11
+ import itertools
12
+ from collections import defaultdict
13
+ from dataclasses import dataclass
14
+ from datetime import datetime
15
+ from typing import Any, Dict, List, Set
16
+
17
+ from langchain_core.language_models import BaseChatModel
18
+
19
+ from ...utilities.search_utilities import remove_think_tags
20
+ from ..candidates.base_candidate import Candidate
21
+ from ..constraints.base_constraint import Constraint, ConstraintType
22
+ from ..constraints.constraint_analyzer import ConstraintAnalyzer
23
+ from ..evidence.base_evidence import EvidenceType
24
+ from ..evidence.evaluator import EvidenceEvaluator
25
+ from ..findings.repository import FindingsRepository
26
+ from .base_strategy import BaseSearchStrategy
27
+
28
+
29
+ @dataclass
30
+ class SearchAttempt:
31
+ """Track search attempts for query adaptation."""
32
+
33
+ query: str
34
+ constraint_ids: List[str]
35
+ results_count: int
36
+ candidates_found: int
37
+ timestamp: str
38
+ strategy_type: str # 'single', 'combined', 'exploratory'
39
+
40
+
41
+ class ImprovedEvidenceBasedStrategy(BaseSearchStrategy):
42
+ """
43
+ Improved evidence-based strategy with adaptive search capabilities.
44
+
45
+ Key improvements:
46
+ 1. Multi-stage candidate discovery
47
+ 2. Adaptive query generation based on results
48
+ 3. Cross-constraint search optimization
49
+ 4. Source diversity tracking and enhancement
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ model: BaseChatModel,
55
+ search: Any,
56
+ all_links_of_system: List[str],
57
+ max_iterations: int = 20,
58
+ confidence_threshold: float = 0.85,
59
+ candidate_limit: int = 15, # Increased for better diversity
60
+ evidence_threshold: float = 0.6,
61
+ max_search_iterations: int = 3,
62
+ questions_per_iteration: int = 3,
63
+ min_source_diversity: int = 3, # Minimum different sources
64
+ adaptive_query_count: int = 3, # Number of adaptive queries per stage
65
+ ):
66
+ """Initialize the improved evidence-based strategy."""
67
+ super().__init__(all_links_of_system)
68
+ self.model = model
69
+ self.search = search
70
+ self.max_iterations = max_iterations
71
+ self.confidence_threshold = confidence_threshold
72
+ self.candidate_limit = candidate_limit
73
+ self.evidence_threshold = evidence_threshold
74
+ self.max_search_iterations = max_search_iterations
75
+ self.questions_per_iteration = questions_per_iteration
76
+ self.min_source_diversity = min_source_diversity
77
+ self.adaptive_query_count = adaptive_query_count
78
+
79
+ # Initialize components
80
+ self.constraint_analyzer = ConstraintAnalyzer(model)
81
+ self.evidence_evaluator = EvidenceEvaluator(model)
82
+ self.findings_repository = FindingsRepository(model)
83
+
84
+ # State tracking
85
+ self.constraints: List[Constraint] = []
86
+ self.candidates: List[Candidate] = []
87
+ self.search_history: List[Dict] = []
88
+ self.search_attempts: List[SearchAttempt] = []
89
+ self.failed_queries: Set[str] = set()
90
+ self.successful_patterns: List[Dict[str, Any]] = []
91
+ self.source_types: Dict[str, Set[str]] = defaultdict(set)
92
+ self.iteration: int = 0
93
+
94
+ def analyze_topic(self, query: str) -> Dict:
95
+ """Analyze a topic using improved evidence-based approach."""
96
+ # Initialize
97
+ self.all_links_of_system.clear()
98
+ self.questions_by_iteration = []
99
+ self.findings = []
100
+ self.iteration = 0
101
+ self.search_attempts.clear()
102
+ self.failed_queries.clear()
103
+ self.successful_patterns.clear()
104
+
105
+ # Step 1: Extract initial constraints
106
+ if self.progress_callback:
107
+ self.progress_callback(
108
+ "Analyzing query for constraint extraction...",
109
+ 2,
110
+ {"phase": "constraint_analysis", "status": "starting"},
111
+ )
112
+
113
+ self.constraints = self.constraint_analyzer.extract_constraints(query)
114
+
115
+ # Step 2: Multi-stage candidate discovery
116
+ self._multi_stage_candidate_discovery()
117
+
118
+ # Step 3: Main evidence-gathering loop with adaptive search
119
+ while (
120
+ self.iteration < self.max_iterations
121
+ and not self._has_sufficient_answer()
122
+ ):
123
+ self.iteration += 1
124
+
125
+ if self.progress_callback:
126
+ progress = 15 + int((self.iteration / self.max_iterations) * 70)
127
+ self.progress_callback(
128
+ f"Iteration {self.iteration}/{self.max_iterations} - {self._get_iteration_status()}",
129
+ progress,
130
+ {
131
+ "phase": "iteration_start",
132
+ "iteration": self.iteration,
133
+ "candidates_count": len(self.candidates),
134
+ "search_attempts": len(self.search_attempts),
135
+ "successful_patterns": len(self.successful_patterns),
136
+ },
137
+ )
138
+
139
+ # Adaptive evidence gathering
140
+ self._adaptive_evidence_gathering()
141
+
142
+ # Score and prune with diversity consideration
143
+ self._score_with_diversity()
144
+
145
+ # Adaptive candidate discovery if needed
146
+ if len(self.candidates) < 3 or self._needs_diversity():
147
+ self._adaptive_candidate_search()
148
+
149
+ # Step 4: Cross-validation and final verification
150
+ self._cross_validate_candidates()
151
+
152
+ # Step 5: Generate final answer
153
+ return self._synthesize_final_answer(query)
154
+
155
+ def _multi_stage_candidate_discovery(self):
156
+ """Multi-stage candidate discovery with different strategies."""
157
+ stages = [
158
+ ("distinctive", self._discover_with_distinctive_constraints),
159
+ ("combined", self._discover_with_combined_constraints),
160
+ ("exploratory", self._discover_with_exploratory_search),
161
+ ("pattern_based", self._discover_with_pattern_matching),
162
+ ]
163
+
164
+ for stage_name, discovery_method in stages:
165
+ if self.progress_callback:
166
+ self.progress_callback(
167
+ f"Stage {stage_name}: Discovering candidates...",
168
+ 5 + stages.index((stage_name, discovery_method)) * 3,
169
+ {"phase": "candidate_discovery", "stage": stage_name},
170
+ )
171
+
172
+ new_candidates = discovery_method()
173
+
174
+ # Add unique candidates
175
+ existing_names = {c.name.lower() for c in self.candidates}
176
+ for candidate in new_candidates:
177
+ if candidate.name.lower() not in existing_names:
178
+ self.candidates.append(candidate)
179
+ existing_names.add(candidate.name.lower())
180
+
181
+ # Stop if we have enough diverse candidates
182
+ if len(self.candidates) >= self.candidate_limit // 2:
183
+ break
184
+
185
+ def _discover_with_distinctive_constraints(self) -> List[Candidate]:
186
+ """Discover candidates using most distinctive constraints."""
187
+ distinctive = self._get_adaptive_distinctive_constraints()
188
+ candidates = []
189
+
190
+ for constraint_combo in self._generate_constraint_combinations(
191
+ distinctive, max_size=3
192
+ ):
193
+ query = self._create_adaptive_search_query(constraint_combo)
194
+ if query not in self.failed_queries:
195
+ results = self._execute_tracked_search(
196
+ query, constraint_combo, "distinctive"
197
+ )
198
+ candidates.extend(
199
+ self._extract_candidates_with_context(results, query)
200
+ )
201
+
202
+ if candidates: # Track successful patterns
203
+ self.successful_patterns.append(
204
+ {
205
+ "constraints": [c.id for c in constraint_combo],
206
+ "query_pattern": query,
207
+ "candidates_found": len(candidates),
208
+ }
209
+ )
210
+
211
+ return candidates
212
+
213
+ def _discover_with_combined_constraints(self) -> List[Candidate]:
214
+ """Discover candidates using strategic constraint combinations."""
215
+ # Combine constraints from different types for better results
216
+ type_groups = defaultdict(list)
217
+ for c in self.constraints:
218
+ type_groups[c.type].append(c)
219
+
220
+ candidates = []
221
+ # Cross-type combinations
222
+ for type1, type2 in itertools.combinations(type_groups.keys(), 2):
223
+ for c1, c2 in itertools.product(
224
+ type_groups[type1][:2], type_groups[type2][:2]
225
+ ):
226
+ query = self._create_cross_constraint_query([c1, c2])
227
+ results = self._execute_tracked_search(
228
+ query, [c1, c2], "combined"
229
+ )
230
+ candidates.extend(
231
+ self._extract_candidates_with_context(results, query)
232
+ )
233
+
234
+ return candidates
235
+
236
+ def _discover_with_exploratory_search(self) -> List[Candidate]:
237
+ """Use exploratory searches to find unexpected candidates."""
238
+ candidates = []
239
+
240
+ # Generate exploratory queries
241
+ exploratory_prompt = f"""
242
+ Based on these constraints, generate 3 exploratory search queries that might find relevant candidates:
243
+
244
+ Constraints:
245
+ {self._format_constraints_for_prompt(self.constraints[:5])}
246
+
247
+ Create queries that:
248
+ 1. Use alternative phrasings or related concepts
249
+ 2. Explore edge cases or unusual combinations
250
+ 3. Look for historical or contextual matches
251
+
252
+ Return only the queries, one per line.
253
+ """
254
+
255
+ response = self.model.invoke(exploratory_prompt)
256
+ queries = remove_think_tags(response.content).strip().split("\n")
257
+
258
+ for query in queries[:3]:
259
+ if query.strip() and query not in self.failed_queries:
260
+ results = self._execute_tracked_search(
261
+ query, self.constraints[:3], "exploratory"
262
+ )
263
+ candidates.extend(
264
+ self._extract_candidates_with_context(results, query)
265
+ )
266
+
267
+ return candidates
268
+
269
+ def _discover_with_pattern_matching(self) -> List[Candidate]:
270
+ """Use pattern matching based on successful patterns."""
271
+ if not self.successful_patterns:
272
+ return []
273
+
274
+ candidates = []
275
+
276
+ # Adapt successful patterns
277
+ for pattern in self.successful_patterns[:3]:
278
+ constraint_ids = pattern["constraints"]
279
+ constraints = [
280
+ c for c in self.constraints if c.id in constraint_ids
281
+ ]
282
+
283
+ # Create variations of successful queries
284
+ adapted_query = self._adapt_successful_query(
285
+ pattern["query_pattern"], constraints
286
+ )
287
+ results = self._execute_tracked_search(
288
+ adapted_query, constraints, "pattern_based"
289
+ )
290
+ candidates.extend(
291
+ self._extract_candidates_with_context(results, adapted_query)
292
+ )
293
+
294
+ return candidates
295
+
296
+ def _adaptive_evidence_gathering(self):
297
+ """Gather evidence with adaptive query generation."""
298
+ for candidate in self.candidates[:5]:
299
+ unverified = candidate.get_unverified_constraints(self.constraints)
300
+
301
+ if not unverified:
302
+ continue
303
+
304
+ # Sort by weight and group by type
305
+ unverified.sort(key=lambda c: c.weight, reverse=True)
306
+ type_groups = defaultdict(list)
307
+ for c in unverified:
308
+ type_groups[c.type].append(c)
309
+
310
+ # Try different evidence gathering strategies
311
+ for constraint_type, constraints in type_groups.items():
312
+ # Try single constraint
313
+ for c in constraints[:2]:
314
+ query = self._create_evidence_query(candidate, [c])
315
+ results = self._execute_tracked_search(
316
+ query, [c], "evidence"
317
+ )
318
+ evidence = self.evidence_evaluator.extract_evidence(
319
+ results.get("current_knowledge", ""), candidate.name, c
320
+ )
321
+ candidate.add_evidence(c.id, evidence)
322
+
323
+ # Try combined constraints of same type
324
+ if len(constraints) > 1:
325
+ query = self._create_evidence_query(
326
+ candidate, constraints[:2]
327
+ )
328
+ results = self._execute_tracked_search(
329
+ query, constraints[:2], "evidence_combined"
330
+ )
331
+
332
+ for c in constraints[:2]:
333
+ evidence = self.evidence_evaluator.extract_evidence(
334
+ results.get("current_knowledge", ""),
335
+ candidate.name,
336
+ c,
337
+ )
338
+ if (
339
+ c.id not in candidate.evidence
340
+ or evidence.confidence
341
+ > candidate.evidence[c.id].confidence
342
+ ):
343
+ candidate.add_evidence(c.id, evidence)
344
+
345
+ def _create_adaptive_search_query(
346
+ self, constraints: List[Constraint]
347
+ ) -> str:
348
+ """Create adaptive search queries based on past performance."""
349
+ # Check if similar constraint combinations have been successful
350
+ constraint_ids = {c.id for c in constraints}
351
+
352
+ for pattern in self.successful_patterns:
353
+ if (
354
+ len(constraint_ids.intersection(pattern["constraints"]))
355
+ >= len(constraint_ids) // 2
356
+ ):
357
+ # Adapt successful pattern
358
+ return self._adapt_successful_query(
359
+ pattern["query_pattern"], constraints
360
+ )
361
+
362
+ # Check failed queries to avoid repetition
363
+ base_query = self._create_base_search_query(constraints)
364
+ if base_query in self.failed_queries:
365
+ # Modify query to avoid failure
366
+ return self._modify_failed_query(base_query, constraints)
367
+
368
+ return base_query
369
+
370
+ def _create_cross_constraint_query(
371
+ self, constraints: List[Constraint]
372
+ ) -> str:
373
+ """Create queries that leverage relationships between constraints."""
374
+ prompt = f"""
375
+ Create a search query that finds candidates satisfying BOTH/ALL of these constraints:
376
+
377
+ {self._format_constraints_for_prompt(constraints)}
378
+
379
+ The query should:
380
+ 1. Find entities that match both/all constraints
381
+ 2. Use operators to require both/all conditions
382
+ 3. Focus on finding specific names or entities
383
+
384
+ Return only the search query.
385
+ """
386
+
387
+ response = self.model.invoke(prompt)
388
+ return remove_think_tags(response.content).strip()
389
+
390
+ def _create_evidence_query(
391
+ self, candidate: Candidate, constraints: List[Constraint]
392
+ ) -> str:
393
+ """Create targeted evidence queries."""
394
+ constraint_desc = self._format_constraints_for_prompt(constraints)
395
+
396
+ prompt = f"""
397
+ Create a search query to verify if "{candidate.name}" satisfies these constraints:
398
+
399
+ {constraint_desc}
400
+
401
+ The query should:
402
+ 1. Include the candidate name
403
+ 2. Target the specific constraint requirements
404
+ 3. Find factual evidence, not opinions
405
+
406
+ Return only the search query.
407
+ """
408
+
409
+ response = self.model.invoke(prompt)
410
+ return remove_think_tags(response.content).strip()
411
+
412
+ def _score_with_diversity(self):
413
+ """Score candidates considering source diversity."""
414
+ for candidate in self.candidates:
415
+ # Base score from evidence
416
+ candidate.calculate_score(self.constraints)
417
+
418
+ # Diversity bonus
419
+ diversity_score = self._calculate_diversity_score(candidate)
420
+ candidate.score = 0.8 * candidate.score + 0.2 * diversity_score
421
+
422
+ # Track source types
423
+ for evidence in candidate.evidence.values():
424
+ if hasattr(evidence, "source"):
425
+ self.source_types[candidate.name].add(evidence.source)
426
+
427
+ # Sort by adjusted score
428
+ self.candidates.sort(key=lambda c: c.score, reverse=True)
429
+
430
+ # Prune while maintaining some diversity
431
+ self._prune_with_diversity()
432
+
433
+ def _cross_validate_candidates(self):
434
+ """Cross-validate top candidates using different approaches."""
435
+ if not self.candidates:
436
+ return
437
+
438
+ top_candidates = self.candidates[:3]
439
+
440
+ for candidate in top_candidates:
441
+ # Validate using different search engines or approaches
442
+ validation_queries = self._generate_validation_queries(candidate)
443
+
444
+ for query in validation_queries:
445
+ results = self._execute_tracked_search(
446
+ query, self.constraints, "validation"
447
+ )
448
+
449
+ # Update evidence if better found
450
+ for constraint in self.constraints:
451
+ evidence = self.evidence_evaluator.extract_evidence(
452
+ results.get("current_knowledge", ""),
453
+ candidate.name,
454
+ constraint,
455
+ )
456
+
457
+ if (
458
+ constraint.id not in candidate.evidence
459
+ or evidence.confidence
460
+ > candidate.evidence[constraint.id].confidence
461
+ ):
462
+ candidate.add_evidence(constraint.id, evidence)
463
+
464
+ def _execute_tracked_search(
465
+ self, query: str, constraints: List[Constraint], strategy_type: str
466
+ ) -> Dict:
467
+ """Execute search with tracking for adaptation."""
468
+ results = self._execute_search(query)
469
+
470
+ # Track the attempt
471
+ candidates_found = len(
472
+ self._extract_candidates_with_context(results, query)
473
+ )
474
+ attempt = SearchAttempt(
475
+ query=query,
476
+ constraint_ids=[c.id for c in constraints],
477
+ results_count=len(results.get("all_links_of_system", [])),
478
+ candidates_found=candidates_found,
479
+ timestamp=datetime.utcnow().isoformat(),
480
+ strategy_type=strategy_type,
481
+ )
482
+ self.search_attempts.append(attempt)
483
+
484
+ # Mark as failed if no results
485
+ if candidates_found == 0:
486
+ self.failed_queries.add(query)
487
+
488
+ return results
489
+
490
+ def _needs_diversity(self) -> bool:
491
+ """Check if we need more diverse candidates."""
492
+ if len(self.candidates) < 3:
493
+ return True
494
+
495
+ # Check source diversity
496
+ top_sources = self.source_types.get(self.candidates[0].name, set())
497
+ return len(top_sources) < self.min_source_diversity
498
+
499
+ def _generate_constraint_combinations(
500
+ self, constraints: List[Constraint], max_size: int = 3
501
+ ) -> List[List[Constraint]]:
502
+ """Generate strategic constraint combinations."""
503
+ combinations = []
504
+
505
+ # Single constraints
506
+ combinations.extend([[c] for c in constraints])
507
+
508
+ # Pairs
509
+ for size in range(2, min(len(constraints), max_size) + 1):
510
+ for combo in itertools.combinations(constraints, size):
511
+ combinations.append(list(combo))
512
+
513
+ return combinations
514
+
515
+ def _format_constraints_for_prompt(
516
+ self, constraints: List[Constraint]
517
+ ) -> str:
518
+ """Format constraints for LLM prompts."""
519
+ formatted = []
520
+ for c in constraints:
521
+ formatted.append(
522
+ f"- {c.type.value}: {c.description} (weight: {c.weight:.2f})"
523
+ )
524
+ return "\n".join(formatted)
525
+
526
+ def _adapt_successful_query(
527
+ self, pattern_query: str, constraints: List[Constraint]
528
+ ) -> str:
529
+ """Adapt a successful query pattern with new constraints."""
530
+ prompt = f"""
531
+ Adapt this successful search query pattern with new constraints:
532
+
533
+ Original query: {pattern_query}
534
+
535
+ New constraints:
536
+ {self._format_constraints_for_prompt(constraints)}
537
+
538
+ Create a similar query structure but with the new constraint values.
539
+ Return only the adapted query.
540
+ """
541
+
542
+ response = self.model.invoke(prompt)
543
+ return remove_think_tags(response.content).strip()
544
+
545
+ def _modify_failed_query(
546
+ self, failed_query: str, constraints: List[Constraint]
547
+ ) -> str:
548
+ """Modify a failed query to try a different approach."""
549
+ prompt = f"""
550
+ This search query returned no results: {failed_query}
551
+
552
+ Constraints we're trying to satisfy:
553
+ {self._format_constraints_for_prompt(constraints)}
554
+
555
+ Create an alternative query that:
556
+ 1. Uses different keywords or phrases
557
+ 2. Tries a different search approach
558
+ 3. Still targets the same constraints
559
+
560
+ Return only the modified query.
561
+ """
562
+
563
+ response = self.model.invoke(prompt)
564
+ return remove_think_tags(response.content).strip()
565
+
566
+ def _calculate_diversity_score(self, candidate: Candidate) -> float:
567
+ """Calculate diversity score for a candidate."""
568
+ if not candidate.evidence:
569
+ return 0.0
570
+
571
+ # Source diversity
572
+ sources = self.source_types.get(candidate.name, set())
573
+ source_score = min(len(sources) / self.min_source_diversity, 1.0)
574
+
575
+ # Evidence type diversity
576
+ evidence_types = {e.type for e in candidate.evidence.values()}
577
+ type_score = len(evidence_types) / len(EvidenceType)
578
+
579
+ # Confidence distribution (prefer balanced confidence)
580
+ confidences = [e.confidence for e in candidate.evidence.values()]
581
+ if confidences:
582
+ variance = sum((c - 0.7) ** 2 for c in confidences) / len(
583
+ confidences
584
+ )
585
+ confidence_score = 1.0 / (1.0 + variance)
586
+ else:
587
+ confidence_score = 0.0
588
+
589
+ return (source_score + type_score + confidence_score) / 3.0
590
+
591
+ def _prune_with_diversity(self):
592
+ """Prune candidates while maintaining diversity."""
593
+ if len(self.candidates) <= self.candidate_limit:
594
+ return
595
+
596
+ # Keep top candidates
597
+ kept = self.candidates[: self.candidate_limit // 2]
598
+ remaining = self.candidates[self.candidate_limit // 2 :]
599
+
600
+ # Add diverse candidates from remaining
601
+ for candidate in remaining:
602
+ if len(kept) >= self.candidate_limit:
603
+ break
604
+
605
+ # Check if this candidate adds diversity
606
+ if self._adds_diversity(candidate, kept):
607
+ kept.append(candidate)
608
+
609
+ # Fill remaining slots with highest scoring
610
+ for candidate in remaining:
611
+ if len(kept) >= self.candidate_limit:
612
+ break
613
+ if candidate not in kept:
614
+ kept.append(candidate)
615
+
616
+ self.candidates = kept
617
+
618
+ def _adds_diversity(
619
+ self, candidate: Candidate, existing: List[Candidate]
620
+ ) -> bool:
621
+ """Check if a candidate adds diversity to the existing set."""
622
+ # Check source diversity
623
+ candidate_sources = self.source_types.get(candidate.name, set())
624
+ existing_sources = set()
625
+ for c in existing:
626
+ existing_sources.update(self.source_types.get(c.name, set()))
627
+
628
+ new_sources = candidate_sources - existing_sources
629
+ if new_sources:
630
+ return True
631
+
632
+ # Check constraint coverage
633
+ candidate_constraints = set(candidate.evidence.keys())
634
+ existing_constraints = set()
635
+ for c in existing:
636
+ existing_constraints.update(c.evidence.keys())
637
+
638
+ new_constraints = candidate_constraints - existing_constraints
639
+ return len(new_constraints) > 0
640
+
641
+ def _generate_validation_queries(self, candidate: Candidate) -> List[str]:
642
+ """Generate validation queries for cross-checking."""
643
+ queries = []
644
+
645
+ # Query combining multiple constraints
646
+ high_weight_constraints = sorted(
647
+ self.constraints, key=lambda c: c.weight, reverse=True
648
+ )[:3]
649
+ combined_query = f'"{candidate.name}" ' + " ".join(
650
+ c.to_search_terms() for c in high_weight_constraints
651
+ )
652
+ queries.append(combined_query)
653
+
654
+ # Query with alternative phrasing
655
+ alt_prompt = f"""
656
+ Create an alternative search query to validate "{candidate.name}" as the answer.
657
+ Use different keywords but same intent.
658
+
659
+ Return only the query.
660
+ """
661
+ response = self.model.invoke(alt_prompt)
662
+ queries.append(remove_think_tags(response.content).strip())
663
+
664
+ # Source-specific query
665
+ if self.source_types.get(candidate.name):
666
+ source_query = f'"{candidate.name}" site:{list(self.source_types[candidate.name])[0]}'
667
+ queries.append(source_query)
668
+
669
+ return queries
670
+
671
+ def _extract_candidates_with_context(
672
+ self, results: Dict, query: str
673
+ ) -> List[Candidate]:
674
+ """Extract candidates with context awareness."""
675
+ # Use the original extraction method but with context
676
+ candidates = self._extract_candidates_from_results(results, query)
677
+
678
+ # Add context metadata
679
+ for candidate in candidates:
680
+ candidate.metadata["discovery_query"] = query
681
+ candidate.metadata["discovery_stage"] = self.iteration
682
+
683
+ return candidates
684
+
685
+ def _create_base_search_query(self, constraints: List[Constraint]) -> str:
686
+ """Create a base search query from constraints."""
687
+ # Use an improved prompt that considers constraint relationships
688
+ prompt = f"""
689
+ Create a search query that finds specific entities satisfying these constraints:
690
+
691
+ {self._format_constraints_for_prompt(constraints)}
692
+
693
+ Guidelines:
694
+ 1. Focus on finding names/entities, not general information
695
+ 2. Use the most distinctive constraints
696
+ 3. Combine constraints effectively
697
+ 4. Keep the query concise but comprehensive
698
+
699
+ Return only the search query.
700
+ """
701
+
702
+ response = self.model.invoke(prompt)
703
+ return remove_think_tags(response.content).strip()
704
+
705
+ def _adaptive_candidate_search(self):
706
+ """Adaptively search for more candidates based on current state."""
707
+ # Analyze what types of candidates we're missing
708
+ covered_constraints = set()
709
+ for candidate in self.candidates[:5]:
710
+ covered_constraints.update(candidate.evidence.keys())
711
+
712
+ uncovered = [
713
+ c for c in self.constraints if c.id not in covered_constraints
714
+ ]
715
+
716
+ if uncovered:
717
+ # Search specifically for uncovered constraints
718
+ queries = []
719
+ for constraint_group in self._generate_constraint_combinations(
720
+ uncovered[:5], max_size=2
721
+ ):
722
+ query = self._create_adaptive_search_query(constraint_group)
723
+ queries.append((query, constraint_group))
724
+
725
+ for query, constraints in queries[: self.adaptive_query_count]:
726
+ results = self._execute_tracked_search(
727
+ query, constraints, "adaptive"
728
+ )
729
+ new_candidates = self._extract_candidates_with_context(
730
+ results, query
731
+ )
732
+
733
+ # Add unique candidates
734
+ existing_names = {c.name.lower() for c in self.candidates}
735
+ for candidate in new_candidates:
736
+ if candidate.name.lower() not in existing_names:
737
+ self.candidates.append(candidate)
738
+ existing_names.add(candidate.name.lower())
739
+
740
+ def _get_adaptive_distinctive_constraints(self) -> List[Constraint]:
741
+ """Get distinctive constraints with adaptive prioritization."""
742
+ # Start with basic prioritization
743
+ priority_order = [
744
+ ConstraintType.NAME_PATTERN,
745
+ ConstraintType.LOCATION,
746
+ ConstraintType.EVENT,
747
+ ConstraintType.STATISTIC,
748
+ ConstraintType.COMPARISON,
749
+ ConstraintType.PROPERTY,
750
+ ConstraintType.TEMPORAL,
751
+ ConstraintType.EXISTENCE,
752
+ ]
753
+
754
+ # Adjust based on successful patterns
755
+ if self.successful_patterns:
756
+ # Count successful constraint types
757
+ type_success = defaultdict(int)
758
+ for pattern in self.successful_patterns:
759
+ for constraint_id in pattern["constraints"]:
760
+ constraint = next(
761
+ (c for c in self.constraints if c.id == constraint_id),
762
+ None,
763
+ )
764
+ if constraint:
765
+ type_success[constraint.type] += pattern[
766
+ "candidates_found"
767
+ ]
768
+
769
+ # Sort by success rate
770
+ priority_order = sorted(
771
+ priority_order,
772
+ key=lambda t: type_success.get(t, 0),
773
+ reverse=True,
774
+ )
775
+
776
+ # Sort constraints by adjusted priority
777
+ sorted_constraints = sorted(
778
+ self.constraints,
779
+ key=lambda c: (priority_order.index(c.type), -c.weight),
780
+ )
781
+
782
+ return sorted_constraints[:5]