local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1337 @@
1
+ """
2
+ Enhanced evidence-based search strategy for complex query resolution.
3
+
4
+ This strategy addresses common issues with candidate discovery and evidence gathering:
5
+ 1. Multi-stage candidate discovery with fallback mechanisms
6
+ 2. Adaptive query generation based on past performance
7
+ 3. Cross-constraint search capabilities
8
+ 4. Source diversity management
9
+ """
10
+
11
+ import math
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime
14
+ from typing import Any, Dict, List, Optional, Set, Tuple
15
+
16
+ from langchain_core.language_models import BaseChatModel
17
+
18
+ from ...utilities.search_utilities import remove_think_tags
19
+ from ..candidates.base_candidate import Candidate
20
+ from ..constraints.base_constraint import Constraint, ConstraintType
21
+ from ..evidence.base_evidence import Evidence
22
+ from .evidence_based_strategy import EvidenceBasedStrategy
23
+
24
+
25
+ @dataclass
26
+ class QueryPattern:
27
+ """Pattern for generating search queries."""
28
+
29
+ pattern: str
30
+ success_rate: float = 0.0
31
+ usage_count: int = 0
32
+ constraint_types: List[ConstraintType] = field(default_factory=list)
33
+
34
+
35
+ @dataclass
36
+ class SourceProfile:
37
+ """Profile for tracking source effectiveness."""
38
+
39
+ source_name: str
40
+ success_rate: float = 0.0
41
+ usage_count: int = 0
42
+ specialties: List[str] = field(default_factory=list)
43
+ last_used: Optional[datetime] = None
44
+
45
+
46
+ class EnhancedEvidenceBasedStrategy(EvidenceBasedStrategy):
47
+ """
48
+ Enhanced evidence-based strategy with improved candidate discovery.
49
+
50
+ Key improvements:
51
+ 1. Multi-stage candidate discovery
52
+ 2. Adaptive query patterns
53
+ 3. Cross-constraint capabilities
54
+ 4. Source diversity tracking
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ model: BaseChatModel,
60
+ search: Any,
61
+ all_links_of_system: List[str],
62
+ max_iterations: int = 20,
63
+ confidence_threshold: float = 0.85,
64
+ candidate_limit: int = 20, # Increased for better candidate variety
65
+ evidence_threshold: float = 0.6,
66
+ max_search_iterations: int = 2,
67
+ questions_per_iteration: int = 3,
68
+ min_candidates_threshold: int = 10, # Increased to ensure we have enough candidates
69
+ enable_pattern_learning: bool = True,
70
+ ):
71
+ """Initialize the enhanced evidence-based strategy."""
72
+ # Call parent initializer with required arguments
73
+ super().__init__(
74
+ model=model,
75
+ search=search,
76
+ all_links_of_system=all_links_of_system,
77
+ max_iterations=max_iterations,
78
+ confidence_threshold=confidence_threshold,
79
+ candidate_limit=candidate_limit,
80
+ evidence_threshold=evidence_threshold,
81
+ max_search_iterations=max_search_iterations,
82
+ questions_per_iteration=questions_per_iteration,
83
+ )
84
+
85
+ # Enhanced-specific attributes
86
+ self.min_candidates_threshold = min_candidates_threshold
87
+ self.enable_pattern_learning = enable_pattern_learning
88
+
89
+ # Pattern learning
90
+ self.query_patterns: Dict[str, QueryPattern] = (
91
+ self._initialize_patterns()
92
+ )
93
+ self.source_profiles: Dict[str, SourceProfile] = (
94
+ self._initialize_sources()
95
+ )
96
+
97
+ # Failure tracking
98
+ self.failed_queries: Set[str] = set()
99
+ self.constraint_relationships: Dict[str, List[str]] = {}
100
+
101
+ def _initialize_patterns(self) -> Dict[str, QueryPattern]:
102
+ """Initialize query patterns for different constraint types."""
103
+ patterns = {
104
+ "property_basic": QueryPattern(
105
+ pattern="{value} {constraint_type}",
106
+ constraint_types=[ConstraintType.PROPERTY],
107
+ ),
108
+ "property_character": QueryPattern(
109
+ pattern="character with {value}",
110
+ constraint_types=[ConstraintType.PROPERTY],
111
+ ),
112
+ "event_show": QueryPattern(
113
+ pattern="TV show {value}",
114
+ constraint_types=[ConstraintType.EVENT],
115
+ ),
116
+ "statistic_episodes": QueryPattern(
117
+ pattern="{value} episodes series",
118
+ constraint_types=[ConstraintType.STATISTIC],
119
+ ),
120
+ "cross_constraint": QueryPattern(
121
+ pattern="{value1} AND {value2}", constraint_types=[]
122
+ ),
123
+ "semantic_expansion": QueryPattern(
124
+ pattern='"{value}" OR "{synonym1}" OR "{synonym2}"',
125
+ constraint_types=[],
126
+ ),
127
+ }
128
+ return patterns
129
+
130
+ def _initialize_sources(self) -> Dict[str, SourceProfile]:
131
+ """Initialize source profiles for diversity tracking."""
132
+ sources = {
133
+ "wikipedia": SourceProfile(
134
+ source_name="wikipedia",
135
+ specialties=["characters", "properties", "general"],
136
+ ),
137
+ "imdb": SourceProfile(
138
+ source_name="imdb",
139
+ specialties=["tv_shows", "episodes", "statistics"],
140
+ ),
141
+ "fandom": SourceProfile(
142
+ source_name="fandom",
143
+ specialties=["characters", "properties", "backstory"],
144
+ ),
145
+ "tv_databases": SourceProfile(
146
+ source_name="tv_databases",
147
+ specialties=["shows", "episodes", "statistics"],
148
+ ),
149
+ "web": SourceProfile(source_name="web", specialties=["general"]),
150
+ }
151
+ return sources
152
+
153
+ def analyze_topic(self, query: str) -> Dict:
154
+ """Analyze a topic using enhanced evidence-based approach."""
155
+ # Initialize
156
+ self.all_links_of_system.clear()
157
+ self.questions_by_iteration = []
158
+ self.findings = []
159
+ self.iteration = 0
160
+
161
+ if self.progress_callback:
162
+ self.progress_callback(
163
+ "Enhanced Evidence-Based Strategy initialized - beginning analysis",
164
+ 1,
165
+ {
166
+ "phase": "initialization",
167
+ "strategy": "enhanced_evidence_based",
168
+ },
169
+ )
170
+
171
+ # Extract constraints
172
+ if self.progress_callback:
173
+ self.progress_callback(
174
+ "Extracting verifiable constraints from query...",
175
+ 3,
176
+ {"phase": "constraint_extraction", "query_length": len(query)},
177
+ )
178
+
179
+ self.constraints = self.constraint_analyzer.extract_constraints(query)
180
+
181
+ if self.progress_callback:
182
+ self.progress_callback(
183
+ f"Found {len(self.constraints)} constraints - analyzing relationships",
184
+ 5,
185
+ {
186
+ "phase": "constraint_analysis",
187
+ "constraint_count": len(self.constraints),
188
+ "constraint_types": {
189
+ c.type.name: c.weight for c in self.constraints[:5]
190
+ },
191
+ },
192
+ )
193
+
194
+ # Analyze constraint relationships
195
+ self._analyze_constraint_relationships()
196
+
197
+ # Find initial candidates with enhanced discovery
198
+ if self.progress_callback:
199
+ self.progress_callback(
200
+ "Starting enhanced candidate discovery (multi-stage)",
201
+ 8,
202
+ {"phase": "candidate_discovery_start", "stages": 5},
203
+ )
204
+
205
+ self._enhanced_candidate_discovery()
206
+
207
+ # Main evidence-gathering loop
208
+ while (
209
+ self.iteration < self.max_iterations
210
+ and not self._has_sufficient_answer()
211
+ ):
212
+ self.iteration += 1
213
+
214
+ # Progress for iteration
215
+ if self.progress_callback:
216
+ base_progress = 40
217
+ iteration_progress = base_progress + int(
218
+ (self.iteration / self.max_iterations) * 50
219
+ )
220
+
221
+ self.progress_callback(
222
+ f"Iteration {self.iteration}/{self.max_iterations} - gathering evidence",
223
+ iteration_progress,
224
+ {
225
+ "phase": "iteration_start",
226
+ "iteration": self.iteration,
227
+ "max_iterations": self.max_iterations,
228
+ "candidates_count": len(self.candidates),
229
+ "top_candidate": (
230
+ self.candidates[0].name
231
+ if self.candidates
232
+ else "None"
233
+ ),
234
+ },
235
+ )
236
+
237
+ # Adaptive evidence gathering
238
+ self._adaptive_evidence_gathering()
239
+
240
+ # Score and prune with pattern learning
241
+ if self.progress_callback:
242
+ self.progress_callback(
243
+ f"Scoring {len(self.candidates)} candidates based on evidence",
244
+ None,
245
+ {
246
+ "phase": "scoring_start",
247
+ "candidate_count": len(self.candidates),
248
+ },
249
+ )
250
+
251
+ self._score_and_prune_adaptive()
252
+
253
+ # Add iteration finding
254
+ iteration_finding = {
255
+ "phase": f"Iteration {self.iteration}",
256
+ "content": self._format_iteration_summary(),
257
+ "timestamp": self._get_timestamp(),
258
+ "metadata": {
259
+ "candidates": len(self.candidates),
260
+ "patterns_used": len(self.query_patterns),
261
+ "source_diversity": self._calculate_source_diversity(),
262
+ },
263
+ }
264
+ self.findings.append(iteration_finding)
265
+
266
+ # Adaptive candidate discovery if needed
267
+ if len(self.candidates) < self.min_candidates_threshold:
268
+ if self.progress_callback:
269
+ self.progress_callback(
270
+ f"Too few candidates ({len(self.candidates)}) - searching for more",
271
+ None,
272
+ {
273
+ "phase": "adaptive_discovery",
274
+ "current_candidates": len(self.candidates),
275
+ "threshold": self.min_candidates_threshold,
276
+ },
277
+ )
278
+ self._adaptive_candidate_discovery()
279
+
280
+ # Final verification with source diversity
281
+ if self.progress_callback:
282
+ self.progress_callback(
283
+ "Starting final verification of top candidates",
284
+ 90,
285
+ {
286
+ "phase": "final_verification_start",
287
+ "top_candidates": [c.name for c in self.candidates[:3]],
288
+ },
289
+ )
290
+
291
+ self._enhanced_final_verification()
292
+
293
+ # Generate final answer
294
+ if self.progress_callback:
295
+ self.progress_callback(
296
+ "Synthesizing final answer based on evidence",
297
+ 95,
298
+ {
299
+ "phase": "synthesis_start",
300
+ "candidates_evaluated": len(self.candidates),
301
+ "evidence_pieces": sum(
302
+ len(c.evidence) for c in self.candidates
303
+ ),
304
+ },
305
+ )
306
+
307
+ result = self._synthesize_final_answer(query)
308
+
309
+ if self.progress_callback:
310
+ top_candidate = self.candidates[0] if self.candidates else None
311
+ self.progress_callback(
312
+ f"Analysis complete - {top_candidate.name if top_candidate else 'No answer found'}",
313
+ 100,
314
+ {
315
+ "phase": "complete",
316
+ "final_answer": (
317
+ top_candidate.name
318
+ if top_candidate
319
+ else "Unable to determine"
320
+ ),
321
+ "confidence": top_candidate.score if top_candidate else 0,
322
+ "candidates_evaluated": len(self.candidates),
323
+ "evidence_pieces": sum(
324
+ len(c.evidence) for c in self.candidates
325
+ ),
326
+ "iterations_used": self.iteration,
327
+ },
328
+ )
329
+
330
+ return result
331
+
332
+ def _enhanced_candidate_discovery(self):
333
+ """Enhanced multi-stage candidate discovery."""
334
+ all_candidates = []
335
+ discovery_stages = [
336
+ ("broad", "Broad discovery", self._broad_discovery_search),
337
+ ("focused", "Focused constraints", self._focused_constraint_search),
338
+ ("cross", "Cross-constraint", self._cross_constraint_search),
339
+ ("semantic", "Semantic expansion", self._semantic_expansion_search),
340
+ ("temporal", "Temporal search", self._temporal_search),
341
+ (
342
+ "character_db",
343
+ "Character databases",
344
+ self._character_database_search,
345
+ ),
346
+ ("fallback", "Fallback search", self._fallback_search),
347
+ ]
348
+
349
+ stage_progress_base = 10
350
+ stage_progress_increment = 5
351
+
352
+ for i, (stage_name, stage_desc, stage_func) in enumerate(
353
+ discovery_stages
354
+ ):
355
+ if self.progress_callback:
356
+ progress = stage_progress_base + (i * stage_progress_increment)
357
+ self.progress_callback(
358
+ f"{stage_desc} search - looking for candidates [{i + 1}/5]",
359
+ progress,
360
+ {
361
+ "phase": f"discovery_{stage_name}",
362
+ "stage": i + 1,
363
+ "total_stages": 5,
364
+ "candidates_found": len(all_candidates),
365
+ "stage_description": stage_desc,
366
+ },
367
+ )
368
+
369
+ stage_candidates = stage_func()
370
+ new_candidates = len(stage_candidates)
371
+ all_candidates.extend(stage_candidates)
372
+
373
+ if self.progress_callback and new_candidates > 0:
374
+ self.progress_callback(
375
+ f"Found {new_candidates} candidates in {stage_desc} stage",
376
+ None,
377
+ {
378
+ "phase": f"discovery_{stage_name}_results",
379
+ "new_candidates": new_candidates,
380
+ "total_candidates": len(all_candidates),
381
+ },
382
+ )
383
+
384
+ # Early exit if we have enough candidates
385
+ if len(all_candidates) >= self.min_candidates_threshold * 2:
386
+ if self.progress_callback:
387
+ self.progress_callback(
388
+ f"Found sufficient candidates ({len(all_candidates)}) - ending discovery",
389
+ 30,
390
+ {
391
+ "phase": "discovery_complete",
392
+ "total_candidates": len(all_candidates),
393
+ },
394
+ )
395
+ break
396
+
397
+ # Deduplicate and add candidates
398
+ self._add_unique_candidates(all_candidates)
399
+
400
+ # If still not enough candidates, try more aggressive discovery
401
+ if (
402
+ len(self.candidates) < self.min_candidates_threshold
403
+ and all_candidates
404
+ ):
405
+ if self.progress_callback:
406
+ self.progress_callback(
407
+ f"Only {len(self.candidates)} candidates - trying more aggressive search",
408
+ None,
409
+ {"phase": "aggressive_discovery"},
410
+ )
411
+ self._aggressive_supplemental_search()
412
+
413
+ if self.progress_callback:
414
+ self.progress_callback(
415
+ f"Candidate discovery complete - {len(self.candidates)} unique candidates found",
416
+ 35,
417
+ {
418
+ "phase": "discovery_complete",
419
+ "unique_candidates": len(self.candidates),
420
+ "candidate_names": [c.name for c in self.candidates[:10]],
421
+ },
422
+ )
423
+
424
+ def _broad_discovery_search(self) -> List[Candidate]:
425
+ """Initial broad search to discover potential candidates."""
426
+ candidates = []
427
+
428
+ # Extended patterns for broader discovery
429
+ extended_patterns = [
430
+ "{value}",
431
+ "character {value}",
432
+ "TV character {value}",
433
+ "fictional character {value}",
434
+ "{value} television",
435
+ "{value} TV show",
436
+ "{value} series",
437
+ ]
438
+
439
+ # Apply patterns to constraints
440
+ for constraint in self.constraints[:4]: # Increased from 3
441
+ for pattern_template in extended_patterns:
442
+ query = pattern_template.replace("{value}", constraint.value)
443
+
444
+ if query not in self.failed_queries and len(query) > 3:
445
+ if self.progress_callback:
446
+ self.progress_callback(
447
+ f"Broad search: {query[:50]}...",
448
+ None,
449
+ {
450
+ "phase": "broad_search_query",
451
+ "query": query,
452
+ "constraint_type": constraint.type.value,
453
+ },
454
+ )
455
+
456
+ results = self._execute_adaptive_search(query)
457
+ extracted = (
458
+ self._extract_candidates_with_enhanced_validation(
459
+ results, query
460
+ )
461
+ )
462
+ candidates.extend(extracted)
463
+
464
+ if not extracted:
465
+ self.failed_queries.add(query)
466
+
467
+ return candidates
468
+
469
+ def _focused_constraint_search(self) -> List[Candidate]:
470
+ """Focused search using specific constraint values."""
471
+ candidates = []
472
+
473
+ # Get high-weight constraints
474
+ high_weight_constraints = sorted(
475
+ self.constraints, key=lambda c: c.weight, reverse=True
476
+ )[:5]
477
+
478
+ for i, constraint in enumerate(high_weight_constraints):
479
+ # Generate adaptive queries based on past performance
480
+ queries = self._generate_adaptive_queries(constraint)
481
+
482
+ if self.progress_callback:
483
+ self.progress_callback(
484
+ f"Focused search: {constraint.description[:40]}... [{i + 1}/{len(high_weight_constraints)}]",
485
+ None,
486
+ {
487
+ "phase": "focused_constraint",
488
+ "constraint": constraint.description,
489
+ "constraint_weight": constraint.weight,
490
+ "constraint_rank": i + 1,
491
+ },
492
+ )
493
+
494
+ for query in queries[:3]: # Limit queries per constraint
495
+ if query not in self.failed_queries:
496
+ results = self._execute_adaptive_search(query)
497
+ extracted = self._extract_candidates_from_results(
498
+ results, query
499
+ )
500
+
501
+ if extracted:
502
+ candidates.extend(extracted)
503
+ # Learn from successful query
504
+ self._learn_query_pattern(
505
+ query, constraint, success=True
506
+ )
507
+ else:
508
+ self.failed_queries.add(query)
509
+
510
+ return candidates
511
+
512
+ def _cross_constraint_search(self) -> List[Candidate]:
513
+ """Search using combinations of constraints."""
514
+ candidates = []
515
+
516
+ # Find related constraint pairs
517
+ constraint_pairs = self._find_related_constraint_pairs()
518
+
519
+ if self.progress_callback and constraint_pairs:
520
+ self.progress_callback(
521
+ f"Cross-constraint search - {len(constraint_pairs)} pairs identified",
522
+ None,
523
+ {
524
+ "phase": "cross_constraint_start",
525
+ "pair_count": len(constraint_pairs),
526
+ },
527
+ )
528
+
529
+ for i, (constraint1, constraint2) in enumerate(
530
+ constraint_pairs[:5]
531
+ ): # Limit combinations
532
+ query = self._build_cross_constraint_query(constraint1, constraint2)
533
+
534
+ if self.progress_callback:
535
+ self.progress_callback(
536
+ f"Combining: {constraint1.type.value} + {constraint2.type.value}",
537
+ None,
538
+ {
539
+ "phase": "cross_constraint_pair",
540
+ "pair": i + 1,
541
+ "constraint1": constraint1.description[:30],
542
+ "constraint2": constraint2.description[:30],
543
+ },
544
+ )
545
+
546
+ if query not in self.failed_queries:
547
+ results = self._execute_adaptive_search(query)
548
+ extracted = self._extract_candidates_from_results(
549
+ results, query
550
+ )
551
+
552
+ if extracted:
553
+ candidates.extend(extracted)
554
+ # Update constraint relationships
555
+ self._update_constraint_relationships(
556
+ constraint1, constraint2, True
557
+ )
558
+ else:
559
+ self.failed_queries.add(query)
560
+
561
+ return candidates
562
+
563
+ def _semantic_expansion_search(self) -> List[Candidate]:
564
+ """Search using semantic variations of constraints."""
565
+ candidates = []
566
+
567
+ # Use general semantic variations without specific terms
568
+ for constraint in self.constraints[:4]:
569
+ variations = self._generate_semantic_variations(constraint)
570
+
571
+ for variation in variations[:3]: # Limit variations
572
+ if variation not in self.failed_queries:
573
+ results = self._execute_adaptive_search(variation)
574
+ extracted = (
575
+ self._extract_candidates_with_enhanced_validation(
576
+ results, variation
577
+ )
578
+ )
579
+
580
+ if extracted:
581
+ candidates.extend(extracted)
582
+ else:
583
+ self.failed_queries.add(variation)
584
+
585
+ return candidates
586
+
587
+ def _fallback_search(self) -> List[Candidate]:
588
+ """Fallback search with relaxed constraints."""
589
+ candidates = []
590
+
591
+ # Relax constraints by combining fewer requirements
592
+ relaxed_queries = self._generate_relaxed_queries()
593
+
594
+ for query in relaxed_queries[:3]:
595
+ if query not in self.failed_queries:
596
+ results = self._execute_adaptive_search(query)
597
+ extracted = self._extract_candidates_from_results(
598
+ results, query
599
+ )
600
+ candidates.extend(extracted)
601
+
602
+ return candidates
603
+
604
+ def _adaptive_evidence_gathering(self):
605
+ """Enhanced evidence gathering with source diversity."""
606
+ evidence_gathered = 0
607
+ source_usage = {}
608
+ total_to_gather = (
609
+ min(5, len(self.candidates)) * 2
610
+ ) # Max 2 constraints per candidate
611
+
612
+ if self.progress_callback:
613
+ self.progress_callback(
614
+ f"Gathering evidence for top {min(5, len(self.candidates))} candidates",
615
+ None,
616
+ {
617
+ "phase": "evidence_gathering_start",
618
+ "candidates_to_process": min(5, len(self.candidates)),
619
+ "total_evidence_needed": total_to_gather,
620
+ },
621
+ )
622
+
623
+ for i, candidate in enumerate(self.candidates[:5]):
624
+ unverified = candidate.get_unverified_constraints(self.constraints)
625
+
626
+ if self.progress_callback:
627
+ self.progress_callback(
628
+ f"Processing {candidate.name} [{i + 1}/{min(5, len(self.candidates))}]",
629
+ None,
630
+ {
631
+ "phase": "candidate_evidence",
632
+ "candidate": candidate.name,
633
+ "candidate_rank": i + 1,
634
+ "unverified_constraints": len(unverified),
635
+ },
636
+ )
637
+
638
+ for j, constraint in enumerate(
639
+ unverified[:2]
640
+ ): # Limit per candidate
641
+ # Select diverse source
642
+ source = self._select_diverse_source(
643
+ source_usage, constraint.type
644
+ )
645
+
646
+ # Generate evidence query
647
+ query = self._generate_evidence_query(candidate, constraint)
648
+
649
+ if self.progress_callback:
650
+ self.progress_callback(
651
+ f"Searching {source} for: {query[:40]}...",
652
+ None,
653
+ {
654
+ "phase": "evidence_search",
655
+ "candidate": candidate.name,
656
+ "constraint": constraint.description[:50],
657
+ "source": source,
658
+ "query": query,
659
+ },
660
+ )
661
+
662
+ # Execute search with selected source
663
+ results = self._execute_search_with_source(query, source)
664
+
665
+ # Extract and evaluate evidence
666
+ evidence = self.evidence_evaluator.extract_evidence(
667
+ results.get("current_knowledge", ""),
668
+ candidate.name,
669
+ constraint,
670
+ )
671
+
672
+ candidate.add_evidence(constraint.id, evidence)
673
+ evidence_gathered += 1
674
+
675
+ if self.progress_callback:
676
+ self.progress_callback(
677
+ f"Evidence found: {evidence.confidence:.0%} confidence",
678
+ None,
679
+ {
680
+ "phase": "evidence_result",
681
+ "candidate": candidate.name,
682
+ "constraint": constraint.description[:50],
683
+ "confidence": evidence.confidence,
684
+ "evidence_type": evidence.type.value,
685
+ "progress": f"{evidence_gathered}/{total_to_gather}",
686
+ },
687
+ )
688
+
689
+ # Update source profile
690
+ self._update_source_profile(source, evidence.confidence)
691
+
692
+ if self.progress_callback:
693
+ self.progress_callback(
694
+ f"Evidence gathering complete - {evidence_gathered} pieces collected",
695
+ None,
696
+ {
697
+ "phase": "evidence_complete",
698
+ "evidence_count": evidence_gathered,
699
+ "source_diversity": self._calculate_source_diversity(),
700
+ "sources_used": len(
701
+ [s for s in source_usage.values() if s > 0]
702
+ ),
703
+ },
704
+ )
705
+
706
+ def _generate_adaptive_queries(self, constraint: Constraint) -> List[str]:
707
+ """Generate queries based on past performance."""
708
+ queries = []
709
+
710
+ # Get successful patterns for this constraint type
711
+ successful_patterns = sorted(
712
+ [
713
+ p
714
+ for p in self.query_patterns.values()
715
+ if constraint.type in p.constraint_types
716
+ and p.success_rate > 0.3
717
+ ],
718
+ key=lambda p: p.success_rate,
719
+ reverse=True,
720
+ )
721
+
722
+ # Apply patterns
723
+ for pattern in successful_patterns[:3]:
724
+ query = self._apply_pattern_to_constraint(pattern, constraint)
725
+ queries.append(query)
726
+
727
+ # Add semantic variations
728
+ semantic_queries = self._generate_semantic_variations(constraint)
729
+ queries.extend(semantic_queries[:2])
730
+
731
+ # Add fallback basic query
732
+ queries.append(f"{constraint.value} {constraint.type.value}")
733
+
734
+ return queries
735
+
736
+ def _apply_pattern_to_constraint(
737
+ self, pattern: QueryPattern, constraint: Constraint
738
+ ) -> str:
739
+ """Apply a pattern to a constraint to create a query."""
740
+ query = pattern.pattern
741
+
742
+ # Replace placeholders
743
+ query = query.replace("{value}", constraint.value)
744
+ query = query.replace("{constraint_type}", constraint.type.value)
745
+
746
+ # Add synonyms if pattern requires them
747
+ if "{synonym" in query:
748
+ synonyms = self._get_synonyms(constraint.value)
749
+ for i, synonym in enumerate(synonyms[:2], 1):
750
+ query = query.replace(f"{{synonym{i}}}", synonym)
751
+
752
+ return query
753
+
754
+ def _build_cross_constraint_query(
755
+ self, constraint1: Constraint, constraint2: Constraint
756
+ ) -> str:
757
+ """Build query combining multiple constraints."""
758
+ # Identify common terms
759
+ common_terms = self._find_common_terms(constraint1, constraint2)
760
+
761
+ if common_terms:
762
+ base = " ".join(common_terms)
763
+ query = f"{base} {constraint1.value} {constraint2.value}"
764
+ else:
765
+ query = f"{constraint1.value} AND {constraint2.value}"
766
+
767
+ # Add type-specific context
768
+ if (
769
+ constraint1.type == ConstraintType.PROPERTY
770
+ and constraint2.type == ConstraintType.EVENT
771
+ ):
772
+ query += " TV show character"
773
+
774
+ return query
775
+
776
+ def _select_diverse_source(
777
+ self, source_usage: Dict[str, int], constraint_type: ConstraintType
778
+ ) -> str:
779
+ """Select source to ensure diversity."""
780
+ # Get specialized sources for constraint type
781
+ specialized = self._get_specialized_sources(constraint_type)
782
+
783
+ # Sort by usage (least used first) and success rate
784
+ available_sources = sorted(
785
+ self.source_profiles.values(),
786
+ key=lambda s: (source_usage.get(s.source_name, 0), -s.success_rate),
787
+ )
788
+
789
+ # Prefer specialized sources
790
+ for source in available_sources:
791
+ if source.source_name in specialized:
792
+ source_usage[source.source_name] = (
793
+ source_usage.get(source.source_name, 0) + 1
794
+ )
795
+ return source.source_name
796
+
797
+ # Fallback to least used source
798
+ selected = available_sources[0].source_name
799
+ source_usage[selected] = source_usage.get(selected, 0) + 1
800
+ return selected
801
+
802
+ def _get_specialized_sources(
803
+ self, constraint_type: ConstraintType
804
+ ) -> List[str]:
805
+ """Get specialized sources for constraint type."""
806
+ specialization_map = {
807
+ ConstraintType.PROPERTY: ["fandom", "wikipedia"],
808
+ ConstraintType.EVENT: ["imdb", "tv_databases"],
809
+ ConstraintType.STATISTIC: ["imdb", "tv_databases"],
810
+ ConstraintType.LOCATION: ["wikipedia"],
811
+ }
812
+ return specialization_map.get(constraint_type, ["web"])
813
+
814
+ def _update_pattern_success(self, pattern: QueryPattern, success: bool):
815
+ """Update pattern success rate."""
816
+ pattern.usage_count += 1
817
+ if success:
818
+ # Exponential moving average
819
+ alpha = 0.3
820
+ pattern.success_rate = (
821
+ alpha * 1.0 + (1 - alpha) * pattern.success_rate
822
+ )
823
+ else:
824
+ pattern.success_rate = (1 - 0.3) * pattern.success_rate
825
+
826
+ def _update_source_profile(self, source_name: str, confidence: float):
827
+ """Update source profile based on evidence quality."""
828
+ if source_name in self.source_profiles:
829
+ profile = self.source_profiles[source_name]
830
+ profile.usage_count += 1
831
+ profile.last_used = datetime.utcnow()
832
+
833
+ # Update success rate based on confidence
834
+ alpha = 0.3
835
+ success = 1.0 if confidence >= self.evidence_threshold else 0.0
836
+ profile.success_rate = (
837
+ alpha * success + (1 - alpha) * profile.success_rate
838
+ )
839
+
840
+ def _analyze_constraint_relationships(self):
841
+ """Analyze relationships between constraints."""
842
+ for i, constraint1 in enumerate(self.constraints):
843
+ for constraint2 in self.constraints[i + 1 :]:
844
+ # Check for common terms or semantic similarity
845
+ common_terms = self._find_common_terms(constraint1, constraint2)
846
+ if common_terms:
847
+ self.constraint_relationships[constraint1.id] = (
848
+ self.constraint_relationships.get(constraint1.id, [])
849
+ + [constraint2.id]
850
+ )
851
+ self.constraint_relationships[constraint2.id] = (
852
+ self.constraint_relationships.get(constraint2.id, [])
853
+ + [constraint1.id]
854
+ )
855
+
856
+ def _find_related_constraint_pairs(
857
+ self,
858
+ ) -> List[Tuple[Constraint, Constraint]]:
859
+ """Find constraint pairs that might work well together."""
860
+ pairs = []
861
+
862
+ # Use analyzed relationships
863
+ for constraint_id, related_ids in self.constraint_relationships.items():
864
+ constraint = next(
865
+ (c for c in self.constraints if c.id == constraint_id), None
866
+ )
867
+ if constraint:
868
+ for related_id in related_ids:
869
+ related = next(
870
+ (c for c in self.constraints if c.id == related_id),
871
+ None,
872
+ )
873
+ if related and (constraint, related) not in pairs:
874
+ pairs.append((constraint, related))
875
+
876
+ # Add type-based pairs
877
+ property_constraints = [
878
+ c for c in self.constraints if c.type == ConstraintType.PROPERTY
879
+ ]
880
+ event_constraints = [
881
+ c for c in self.constraints if c.type == ConstraintType.EVENT
882
+ ]
883
+
884
+ for prop in property_constraints[:2]:
885
+ for event in event_constraints[:2]:
886
+ if (prop, event) not in pairs:
887
+ pairs.append((prop, event))
888
+
889
+ return pairs
890
+
891
+ def _calculate_source_diversity(self) -> float:
892
+ """Calculate source diversity score."""
893
+ if not self.source_profiles:
894
+ return 0.0
895
+
896
+ used_sources = [
897
+ s for s in self.source_profiles.values() if s.usage_count > 0
898
+ ]
899
+ if not used_sources:
900
+ return 0.0
901
+
902
+ # Calculate entropy of source usage
903
+ total_usage = sum(s.usage_count for s in used_sources)
904
+ entropy = 0.0
905
+
906
+ for source in used_sources:
907
+ if source.usage_count > 0:
908
+ p = source.usage_count / total_usage
909
+ entropy -= p * (math.log2(p) if p > 0 else 0)
910
+
911
+ # Normalize by maximum possible entropy
912
+ max_entropy = -math.log2(1 / len(used_sources))
913
+ return entropy / max_entropy if max_entropy > 0 else 0.0
914
+
915
+ def _execute_adaptive_search(self, query: str) -> Dict:
916
+ """Execute search with adaptive source selection."""
917
+ # Simple wrapper for consistency - could be enhanced with source selection
918
+ return self._execute_search(query)
919
+
920
+ def _execute_search_with_source(self, query: str, source: str) -> Dict:
921
+ """Execute search with specific source."""
922
+ # For now, use the standard search - could be enhanced to use specific sources
923
+ return self._execute_search(query)
924
+
925
+ def _get_synonyms(self, term: str) -> List[str]:
926
+ """Get synonyms for a term - generic implementation."""
927
+ # Use LLM to generate synonyms dynamically
928
+ try:
929
+ prompt = f"List 3 synonyms or similar terms for '{term}'. Return only the synonyms, one per line."
930
+ response = self.model.invoke(prompt)
931
+ content = remove_think_tags(response.content)
932
+ synonyms = [
933
+ line.strip() for line in content.split("\n") if line.strip()
934
+ ]
935
+ return synonyms[:3]
936
+ except:
937
+ return [] # Return empty list on error
938
+
939
+ def _find_common_terms(
940
+ self, constraint1: Constraint, constraint2: Constraint
941
+ ) -> List[str]:
942
+ """Find common terms between constraints."""
943
+ terms1 = set(constraint1.value.lower().split())
944
+ terms2 = set(constraint2.value.lower().split())
945
+ return list(terms1.intersection(terms2))
946
+
947
+ def _generate_semantic_variations(
948
+ self, constraint: Constraint
949
+ ) -> List[str]:
950
+ """Generate semantically similar queries"""
951
+ variations = []
952
+
953
+ # Basic variations based on constraint type
954
+ base_value = (
955
+ constraint.value
956
+ if hasattr(constraint, "value")
957
+ else constraint.description
958
+ )
959
+
960
+ # Add contextual variations based on type
961
+ if constraint.type == ConstraintType.PROPERTY:
962
+ variations.extend(
963
+ [
964
+ f"entity with {base_value}",
965
+ f"subject {base_value}",
966
+ f"thing {base_value}",
967
+ f"{base_value} entity",
968
+ ]
969
+ )
970
+ elif constraint.type == ConstraintType.EVENT:
971
+ variations.extend(
972
+ [
973
+ f"event {base_value}",
974
+ f"occurrence {base_value}",
975
+ f"{base_value} happened",
976
+ f"when {base_value}",
977
+ ]
978
+ )
979
+ elif constraint.type == ConstraintType.STATISTIC:
980
+ variations.extend(
981
+ [
982
+ f"number {base_value}",
983
+ f"count {base_value}",
984
+ f"{base_value} total",
985
+ f"quantity {base_value}",
986
+ ]
987
+ )
988
+ else:
989
+ # Generic variations
990
+ variations.extend(
991
+ [
992
+ f"{base_value}",
993
+ f"specific {base_value}",
994
+ f"about {base_value}",
995
+ f"regarding {base_value}",
996
+ ]
997
+ )
998
+
999
+ return variations[:5] # Limit variations
1000
+
1001
+ def _generate_evidence_query(
1002
+ self, candidate: Candidate, constraint: Constraint
1003
+ ) -> str:
1004
+ """Generate evidence query for a candidate-constraint pair."""
1005
+ prompt = f"""Create a search query to verify if "{candidate.name}" satisfies this constraint:
1006
+ Constraint: {constraint.description}
1007
+ Type: {constraint.type.value}
1008
+
1009
+ Create a specific search query that would find evidence about whether this candidate meets this constraint.
1010
+ Return only the search query, no explanation."""
1011
+
1012
+ response = self.model.invoke(prompt)
1013
+ query = remove_think_tags(response.content).strip()
1014
+
1015
+ # Fallback to simple query if needed
1016
+ if not query or len(query) < 5:
1017
+ query = f"{candidate.name} {constraint.value}"
1018
+
1019
+ return query
1020
+
1021
+ def _add_unique_candidates(self, candidates: List[Candidate]):
1022
+ """Add unique candidates to the main list."""
1023
+ existing_names = {c.name.lower() for c in self.candidates}
1024
+
1025
+ for candidate in candidates:
1026
+ if candidate.name.lower() not in existing_names:
1027
+ self.candidates.append(candidate)
1028
+ existing_names.add(candidate.name.lower())
1029
+
1030
+ # Limit total candidates
1031
+ self.candidates = self.candidates[: self.candidate_limit]
1032
+
1033
+ def _adaptive_candidate_discovery(self):
1034
+ """Adaptive candidate discovery when we have too few candidates."""
1035
+ # Use unused constraints or different strategies
1036
+ unused_constraints = [
1037
+ c
1038
+ for c in self.constraints
1039
+ if not any(c.id in cand.evidence for cand in self.candidates)
1040
+ ]
1041
+
1042
+ if unused_constraints:
1043
+ # Try cross-constraint search with unused constraints
1044
+ candidates = self._cross_constraint_search()
1045
+ self._add_unique_candidates(candidates)
1046
+
1047
+ # If still not enough, try semantic expansion
1048
+ if len(self.candidates) < self.min_candidates_threshold:
1049
+ candidates = self._semantic_expansion_search()
1050
+ self._add_unique_candidates(candidates)
1051
+
1052
+ def _generate_relaxed_queries(self) -> List[str]:
1053
+ """Generate relaxed queries for fallback search."""
1054
+ queries = []
1055
+
1056
+ # Take most important constraints and relax them
1057
+ important_constraints = sorted(
1058
+ self.constraints, key=lambda c: c.weight, reverse=True
1059
+ )[:3]
1060
+
1061
+ for constraint in important_constraints:
1062
+ # Simple value-based query
1063
+ queries.append(constraint.value)
1064
+
1065
+ # Type-based query
1066
+ queries.append(f"{constraint.type.value} {constraint.value}")
1067
+
1068
+ return queries
1069
+
1070
+ def _learn_query_pattern(
1071
+ self, query: str, constraint: Constraint, success: bool
1072
+ ):
1073
+ """Learn from query success/failure."""
1074
+ if not self.enable_pattern_learning:
1075
+ return
1076
+
1077
+ # Extract pattern from query
1078
+ # This is a simplified version - could be enhanced with ML
1079
+ pattern_key = f"{constraint.type.value}_custom"
1080
+
1081
+ if pattern_key not in self.query_patterns:
1082
+ self.query_patterns[pattern_key] = QueryPattern(
1083
+ pattern=query, constraint_types=[constraint.type]
1084
+ )
1085
+
1086
+ self._update_pattern_success(self.query_patterns[pattern_key], success)
1087
+
1088
+ def _update_constraint_relationships(
1089
+ self, constraint1: Constraint, constraint2: Constraint, success: bool
1090
+ ):
1091
+ """Update constraint relationship tracking."""
1092
+ if success:
1093
+ # Strengthen the relationship
1094
+ if constraint1.id not in self.constraint_relationships:
1095
+ self.constraint_relationships[constraint1.id] = []
1096
+ if (
1097
+ constraint2.id
1098
+ not in self.constraint_relationships[constraint1.id]
1099
+ ):
1100
+ self.constraint_relationships[constraint1.id].append(
1101
+ constraint2.id
1102
+ )
1103
+
1104
+ if constraint2.id not in self.constraint_relationships:
1105
+ self.constraint_relationships[constraint2.id] = []
1106
+ if (
1107
+ constraint1.id
1108
+ not in self.constraint_relationships[constraint2.id]
1109
+ ):
1110
+ self.constraint_relationships[constraint2.id].append(
1111
+ constraint1.id
1112
+ )
1113
+
1114
+ def _wikipedia_search(self) -> List[Candidate]:
1115
+ """Search Wikipedia for general information."""
1116
+ candidates = []
1117
+
1118
+ # Generic Wikipedia searches based on constraints
1119
+ for constraint in self.constraints[:3]:
1120
+ queries = [
1121
+ f"Wikipedia {constraint.value}",
1122
+ f"Wikipedia list {constraint.value}",
1123
+ f"Wikipedia {constraint.type.value} {constraint.value}",
1124
+ ]
1125
+
1126
+ for query in queries:
1127
+ if query not in self.failed_queries:
1128
+ results = self._execute_adaptive_search(query)
1129
+ extracted = (
1130
+ self._extract_candidates_with_enhanced_validation(
1131
+ results, query
1132
+ )
1133
+ )
1134
+ candidates.extend(extracted)
1135
+
1136
+ if not extracted:
1137
+ self.failed_queries.add(query)
1138
+
1139
+ return candidates
1140
+
1141
+ def _domain_specific_search(self) -> List[Candidate]:
1142
+ """Search domain-specific sources based on constraint types."""
1143
+ candidates = []
1144
+
1145
+ # Map constraint types to relevant domains
1146
+ domain_map = {
1147
+ ConstraintType.PROPERTY: ["database", "encyclopedia", "wiki"],
1148
+ ConstraintType.EVENT: ["timeline", "history", "archive"],
1149
+ ConstraintType.STATISTIC: ["data", "statistics", "numbers"],
1150
+ ConstraintType.LOCATION: ["geography", "places", "maps"],
1151
+ }
1152
+
1153
+ for constraint in self.constraints[:3]:
1154
+ domains = domain_map.get(constraint.type, ["general"])
1155
+
1156
+ for domain in domains:
1157
+ query = f"{domain} {constraint.value}"
1158
+
1159
+ if query not in self.failed_queries:
1160
+ results = self._execute_adaptive_search(query)
1161
+ extracted = (
1162
+ self._extract_candidates_with_enhanced_validation(
1163
+ results, query
1164
+ )
1165
+ )
1166
+ candidates.extend(extracted)
1167
+
1168
+ if not extracted:
1169
+ self.failed_queries.add(query)
1170
+
1171
+ return candidates
1172
+
1173
+ def _aggressive_supplemental_search(self):
1174
+ """Aggressive supplemental search when we need more candidates."""
1175
+ additional_candidates = []
1176
+
1177
+ # Try different query formulations
1178
+ query_templates = [
1179
+ "list of {value}",
1180
+ "examples of {value}",
1181
+ "types of {value}",
1182
+ "{value} instances",
1183
+ "specific {value}",
1184
+ "named {value}",
1185
+ ]
1186
+
1187
+ for constraint in self.constraints:
1188
+ for template in query_templates:
1189
+ query = template.replace("{value}", constraint.value)
1190
+
1191
+ if query not in self.failed_queries:
1192
+ results = self._execute_adaptive_search(query)
1193
+ extracted = (
1194
+ self._extract_candidates_with_enhanced_validation(
1195
+ results, query
1196
+ )
1197
+ )
1198
+ additional_candidates.extend(extracted)
1199
+
1200
+ if len(additional_candidates) > 50: # Enough candidates
1201
+ break
1202
+
1203
+ if len(additional_candidates) > 50:
1204
+ break
1205
+
1206
+ self._add_unique_candidates(additional_candidates)
1207
+
1208
+ def _extract_candidates_with_enhanced_validation(
1209
+ self, results: Dict, query: str
1210
+ ) -> List[Candidate]:
1211
+ """Extract candidates with better validation."""
1212
+ candidates = self._extract_candidates_from_results(results, query)
1213
+
1214
+ # Additional validation to filter out non-candidates
1215
+ validated = []
1216
+ for candidate in candidates:
1217
+ # Basic validation - not too short, not a common word
1218
+ if len(candidate.name) > 2 and not self._is_common_word(
1219
+ candidate.name
1220
+ ):
1221
+ validated.append(candidate)
1222
+
1223
+ return validated[:15] # Limit per search
1224
+
1225
+ def _is_common_word(self, name: str) -> bool:
1226
+ """Check if a name is likely a common word rather than a proper name."""
1227
+ common_words = {
1228
+ "the",
1229
+ "and",
1230
+ "or",
1231
+ "but",
1232
+ "in",
1233
+ "on",
1234
+ "at",
1235
+ "to",
1236
+ "for",
1237
+ "of",
1238
+ "with",
1239
+ "by",
1240
+ "from",
1241
+ "up",
1242
+ "about",
1243
+ "into",
1244
+ "it",
1245
+ "is",
1246
+ "was",
1247
+ "are",
1248
+ }
1249
+ return name.lower() in common_words
1250
+
1251
+ def _score_and_prune_adaptive(self):
1252
+ """Score and prune candidates with adaptive thresholds."""
1253
+ old_count = len(self.candidates)
1254
+
1255
+ # Use the parent's scoring method
1256
+ super()._score_and_prune_candidates()
1257
+
1258
+ # Additional adaptive pruning based on iteration
1259
+ if self.iteration > self.max_iterations / 2:
1260
+ # Be more aggressive in later iterations
1261
+ min_score = (
1262
+ max(0.3, self.candidates[0].score * 0.4)
1263
+ if self.candidates
1264
+ else 0.3
1265
+ )
1266
+ self.candidates = [
1267
+ c for c in self.candidates if c.score >= min_score
1268
+ ]
1269
+
1270
+ if self.progress_callback:
1271
+ pruned = old_count - len(self.candidates)
1272
+ self.progress_callback(
1273
+ f"Scored candidates - kept {len(self.candidates)}, pruned {pruned}",
1274
+ None,
1275
+ {
1276
+ "phase": "scoring_complete",
1277
+ "candidates_kept": len(self.candidates),
1278
+ "candidates_pruned": pruned,
1279
+ "top_score": self.candidates[0].score
1280
+ if self.candidates
1281
+ else 0,
1282
+ "min_score_threshold": (
1283
+ min_score
1284
+ if self.iteration > self.max_iterations / 2
1285
+ else "adaptive"
1286
+ ),
1287
+ },
1288
+ )
1289
+
1290
+ def _enhanced_final_verification(self):
1291
+ """Enhanced final verification with source diversity."""
1292
+ # Use parent's verification as base
1293
+ super()._final_verification()
1294
+
1295
+ # Additional verification with unused sources
1296
+ if self.candidates:
1297
+ top_candidate = self.candidates[0]
1298
+
1299
+ # Find least-used sources
1300
+ unused_sources = [
1301
+ s for s in self.source_profiles.values() if s.usage_count < 2
1302
+ ]
1303
+
1304
+ for source in unused_sources[:2]:
1305
+ weak_constraints = [
1306
+ c
1307
+ for c in self.constraints
1308
+ if c.id not in top_candidate.evidence
1309
+ or top_candidate.evidence[c.id].confidence
1310
+ < self.evidence_threshold
1311
+ ]
1312
+
1313
+ if weak_constraints:
1314
+ constraint = weak_constraints[0]
1315
+ query = self._generate_evidence_query(
1316
+ top_candidate, constraint
1317
+ )
1318
+
1319
+ # Use specific source
1320
+ results = self._execute_search_with_source(
1321
+ query, source.source_name
1322
+ )
1323
+ evidence = self.evidence_evaluator.extract_evidence(
1324
+ results.get("current_knowledge", ""),
1325
+ top_candidate.name,
1326
+ constraint,
1327
+ )
1328
+
1329
+ if evidence.confidence > (
1330
+ top_candidate.evidence.get(
1331
+ constraint.id, Evidence(claim="", confidence=0)
1332
+ ).confidence
1333
+ ):
1334
+ top_candidate.add_evidence(constraint.id, evidence)
1335
+
1336
+ # Final re-scoring
1337
+ self._score_and_prune_adaptive()