local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1142 @@
1
+ """
2
+ Modular strategy that demonstrates usage of the new constraint_checking and candidate_exploration modules.
3
+ Enhanced with LLM-driven constraint processing, early rejection, and immediate evaluation.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from dataclasses import dataclass
10
+ from typing import Dict, List, Optional, Tuple
11
+
12
+ from loguru import logger
13
+
14
+ from ...utilities.search_cache import get_search_cache, normalize_entity_query
15
+ from ..candidate_exploration import (
16
+ AdaptiveExplorer,
17
+ ConstraintGuidedExplorer,
18
+ DiversityExplorer,
19
+ ParallelExplorer,
20
+ )
21
+ from ..constraint_checking import (
22
+ DualConfidenceChecker,
23
+ StrictChecker,
24
+ ThresholdChecker,
25
+ )
26
+ from ..constraints import ConstraintAnalyzer
27
+ from ..questions import StandardQuestionGenerator
28
+ from .base_strategy import BaseSearchStrategy
29
+
30
+
31
+ @dataclass
32
+ class CandidateConfidence:
33
+ """Track candidate confidence levels for early rejection"""
34
+
35
+ candidate: object
36
+ positive_confidence: float
37
+ negative_confidence: float
38
+ rejection_reason: Optional[str] = None
39
+ should_continue: bool = True
40
+
41
+
42
+ class LLMConstraintProcessor:
43
+ """LLM-driven intelligent constraint processing"""
44
+
45
+ def __init__(self, model):
46
+ self.model = model
47
+
48
+ async def decompose_constraints_intelligently(self, constraints):
49
+ """Let LLM intelligently break down constraints into searchable elements"""
50
+ constraint_text = "\n".join([f"- {c.description}" for c in constraints])
51
+
52
+ prompt = f"""
53
+ I have these constraints from a search query:
54
+ {constraint_text}
55
+
56
+ Please intelligently decompose these constraints into atomic, searchable elements that can be combined in different ways.
57
+
58
+ For each constraint, provide:
59
+ 1. **Atomic elements** - Break it into smallest meaningful parts
60
+ 2. **Variations** - Different ways to express the same concept
61
+ 3. **Granular specifics** - Specific values, years, numbers, etc.
62
+
63
+ Example for a time-based constraint:
64
+ - Atomic elements: Break down the main subject into searchable terms
65
+ - Time variations: Different ways to express time periods
66
+ - Granular specifics: Individual years, dates, or specific values mentioned
67
+
68
+ Return as valid JSON format:
69
+ {{
70
+ "constraint_1": {{
71
+ "atomic_elements": [...],
72
+ "variations": [...],
73
+ "granular_specifics": [...]
74
+ }},
75
+ "constraint_2": {{
76
+ "atomic_elements": [...],
77
+ "variations": [...],
78
+ "granular_specifics": [...]
79
+ }}
80
+ }}
81
+ """
82
+
83
+ response = await self.model.ainvoke(prompt)
84
+ return self._parse_decomposition(response.content)
85
+
86
+ async def generate_intelligent_combinations(
87
+ self, decomposed_constraints, existing_queries=None, original_query=None
88
+ ):
89
+ """LLM generates smart combinations of atomic elements"""
90
+
91
+ if existing_queries is None:
92
+ existing_queries = []
93
+
94
+ existing_queries_str = (
95
+ "\n".join([f"- {q}" for q in existing_queries])
96
+ if existing_queries
97
+ else "None yet"
98
+ )
99
+
100
+ # Store all queries we've used to avoid repeats
101
+ if existing_queries is None:
102
+ existing_queries = []
103
+
104
+ # Add the original query as first in our tracking
105
+ all_queries_used = (
106
+ [original_query] + existing_queries
107
+ if original_query
108
+ else existing_queries
109
+ )
110
+ existing_queries_str = (
111
+ "\n".join([f"- {q}" for q in all_queries_used])
112
+ if all_queries_used
113
+ else "None yet"
114
+ )
115
+
116
+ prompt = f"""
117
+ Create search query variations using TWO strategies:
118
+
119
+ ORIGINAL QUERY: "{original_query if original_query else "Not provided"}"
120
+
121
+ ALREADY USED QUERIES (DO NOT REPEAT):
122
+ {existing_queries_str}
123
+
124
+ **STRATEGY 1: QUERY REFORMULATION** (5-8 variations)
125
+ Keep ALL key information but rephrase the entire query:
126
+ - Change word order and sentence structure
127
+ - Use synonyms for key terms
128
+ - Convert questions to statements or keyword phrases
129
+ - Maintain all specific details (names, dates, numbers)
130
+
131
+ **STRATEGY 2: RANGE SPLITTING** (10-15 variations)
132
+ For any time periods, ranges, or multiple options, create separate specific searches:
133
+ - Split year ranges into individual years
134
+ - Split time periods into specific decades/years
135
+ - Split "between X and Y" into individual values
136
+ - Create one search per specific value in any range
137
+
138
+ **EXAMPLES:**
139
+ Original: "Who won Nobel Prize between 1960-1965?"
140
+ - Reformulations: "Nobel Prize winner 1960-1965", "Nobel laureate from 1960 to 1965"
141
+ - Range splits: "Nobel Prize winner 1960", "Nobel Prize winner 1961", "Nobel Prize winner 1962", "Nobel Prize winner 1963", "Nobel Prize winner 1964", "Nobel Prize winner 1965"
142
+
143
+ Generate 15-25 search queries total (reformulations + range splits).
144
+ Focus on maximum specificity through systematic coverage.
145
+
146
+ Return as a valid JSON list of search queries:
147
+ ["query1", "query2", "query3"]
148
+ """
149
+
150
+ response = await self.model.ainvoke(prompt)
151
+ return self._parse_combinations(response.content)
152
+
153
+ def _parse_decomposition(self, content):
154
+ """Parse LLM decomposition response"""
155
+ try:
156
+ start = content.find("{")
157
+ end = content.rfind("}") + 1
158
+ if start != -1 and end != -1:
159
+ json_str = content[start:end]
160
+ return json.loads(json_str)
161
+ except Exception as e:
162
+ logger.error(f"Failed to parse decomposition: {e}")
163
+
164
+ # If parsing fails, return empty dict - let the system handle gracefully
165
+ logger.warning(
166
+ "Failed to parse constraint decomposition, returning empty dict"
167
+ )
168
+ return {}
169
+
170
+ def _parse_combinations(self, content):
171
+ """Parse LLM combinations response"""
172
+ try:
173
+ start = content.find("[")
174
+ end = content.rfind("]") + 1
175
+ if start != -1 and end != -1:
176
+ json_str = content[start:end]
177
+ return json.loads(json_str)
178
+ except Exception as e:
179
+ logger.error(f"Failed to parse combinations: {e}")
180
+
181
+ # If parsing fails, return empty list - let the system handle gracefully
182
+ logger.warning("Failed to parse LLM combinations, returning empty list")
183
+ return []
184
+
185
+
186
+ class EarlyRejectionManager:
187
+ """Manages early rejection and confidence tracking"""
188
+
189
+ def __init__(self, model, positive_threshold=0.6, negative_threshold=0.3):
190
+ self.model = model
191
+ self.positive_threshold = positive_threshold
192
+ self.negative_threshold = negative_threshold
193
+ self.rejected_candidates = set()
194
+
195
+ async def quick_confidence_check(self, candidate, constraints):
196
+ """Quick confidence assessment for early rejection"""
197
+
198
+ prompt = f"""
199
+ Quickly assess if this candidate matches the search criteria:
200
+
201
+ Candidate: {candidate.name}
202
+ Available info: {getattr(candidate, "metadata", {})}
203
+
204
+ Constraints to match:
205
+ {[c.description for c in constraints]}
206
+
207
+ Provide:
208
+ 1. **Positive confidence** (0.0-1.0): How likely this candidate matches
209
+ 2. **Negative confidence** (0.0-1.0): How likely this candidate does NOT match
210
+ 3. **Quick reasoning**: Brief explanation
211
+
212
+ Return as JSON:
213
+ {{
214
+ "positive_confidence": 0.X,
215
+ "negative_confidence": 0.X,
216
+ "reasoning": "brief explanation"
217
+ }}
218
+ """
219
+
220
+ try:
221
+ response = await self.model.ainvoke(prompt)
222
+ return self._parse_confidence(response.content)
223
+ except Exception as e:
224
+ logger.error(f"Quick confidence check failed: {e}")
225
+ return {
226
+ "positive_confidence": 0.5,
227
+ "negative_confidence": 0.3,
228
+ "reasoning": "fallback",
229
+ }
230
+
231
+ def should_reject_early(self, confidence_result):
232
+ """Determine if candidate should be rejected early"""
233
+ # positive = confidence_result.get("positive_confidence", 0.5) # Not currently used
234
+ negative = confidence_result.get("negative_confidence", 0.3)
235
+
236
+ # Only reject if we have strong negative evidence (not just lack of positive evidence)
237
+ if negative > 0.85:
238
+ return (
239
+ True,
240
+ f"High negative confidence ({negative:.2f})",
241
+ )
242
+
243
+ return False, None
244
+
245
+ def should_continue_search(self, all_candidates, high_confidence_count):
246
+ """Determine if we should continue searching"""
247
+ # Stop if we have enough high-confidence candidates
248
+ if high_confidence_count >= 5:
249
+ return False, "Found sufficient high-confidence candidates"
250
+
251
+ # Stop if we have many candidates but low quality
252
+ if len(all_candidates) > 50 and high_confidence_count == 0:
253
+ return False, "Too many low-quality candidates"
254
+
255
+ return True, None
256
+
257
+ def _parse_confidence(self, content):
258
+ """Parse confidence assessment"""
259
+ try:
260
+ start = content.find("{")
261
+ end = content.rfind("}") + 1
262
+ if start != -1 and end != -1:
263
+ json_str = content[start:end]
264
+ return json.loads(json_str)
265
+ except Exception as e:
266
+ logger.error(f"Failed to parse confidence: {e}")
267
+
268
+ return {
269
+ "positive_confidence": 0.5,
270
+ "negative_confidence": 0.3,
271
+ "reasoning": "parse_error",
272
+ }
273
+
274
+
275
+ class ModularStrategy(BaseSearchStrategy):
276
+ """
277
+ A strategy that showcases the new modular architecture using:
278
+ - constraint_checking module for candidate evaluation
279
+ - candidate_exploration module for search execution
280
+ - constraints module for constraint analysis
281
+ - LLM-driven intelligent constraint processing
282
+ - Early rejection and confidence-based filtering
283
+ - Immediate candidate evaluation
284
+ """
285
+
286
+ def __init__(
287
+ self,
288
+ model,
289
+ search,
290
+ all_links_of_system=None,
291
+ constraint_checker_type: str = "dual_confidence", # dual_confidence, strict, threshold
292
+ exploration_strategy: str = "adaptive", # parallel, adaptive, constraint_guided, diversity
293
+ early_rejection: bool = True, # Enable early rejection by default
294
+ early_stopping: bool = True, # Enable early stopping by default
295
+ llm_constraint_processing: bool = True, # Enable LLM-driven constraint processing by default
296
+ immediate_evaluation: bool = True, # Enable immediate candidate evaluation by default
297
+ **kwargs,
298
+ ):
299
+ super().__init__(all_links_of_system=all_links_of_system)
300
+
301
+ self.model = model
302
+ self.search_engine = search
303
+ self.search_engines = getattr(search, "search_engines", [])
304
+
305
+ # Initialize constraint analyzer
306
+ self.constraint_analyzer = ConstraintAnalyzer(self.model)
307
+
308
+ # Initialize LLM constraint processor if enabled
309
+ self.llm_processor = (
310
+ LLMConstraintProcessor(self.model)
311
+ if llm_constraint_processing
312
+ else None
313
+ )
314
+
315
+ # Initialize early rejection manager if enabled
316
+ self.early_rejection_manager = (
317
+ EarlyRejectionManager(self.model) if early_rejection else None
318
+ )
319
+
320
+ # Initialize constraint checker based on type (default to dual confidence)
321
+ self.constraint_checker = self._create_constraint_checker(
322
+ constraint_checker_type
323
+ )
324
+
325
+ # Initialize candidate explorer based on strategy
326
+ self.candidate_explorer = self._create_candidate_explorer(
327
+ exploration_strategy
328
+ )
329
+
330
+ # Initialize question generator
331
+ self.question_generator = StandardQuestionGenerator(model=self.model)
332
+
333
+ # Strategy configuration
334
+ self.constraint_checker_type = constraint_checker_type
335
+ self.exploration_strategy = exploration_strategy
336
+ self.early_rejection = early_rejection
337
+ self.early_stopping = early_stopping
338
+ self.llm_constraint_processing = llm_constraint_processing
339
+ self.immediate_evaluation = immediate_evaluation
340
+
341
+ logger.info(
342
+ f"Initialized ModularStrategy with {constraint_checker_type} checker, {exploration_strategy} explorer, "
343
+ f"early_rejection={early_rejection}, early_stopping={early_stopping}, "
344
+ f"llm_processing={llm_constraint_processing}, immediate_eval={immediate_evaluation}"
345
+ )
346
+
347
+ def _create_constraint_checker(self, checker_type: str):
348
+ """Create the appropriate constraint checker."""
349
+ if checker_type == "dual_confidence":
350
+ return DualConfidenceChecker(
351
+ model=self.model,
352
+ evidence_gatherer=self._gather_evidence_for_constraint,
353
+ negative_threshold=0.75,
354
+ positive_threshold=0.2,
355
+ uncertainty_penalty=0.1,
356
+ negative_weight=1.5,
357
+ )
358
+ elif checker_type == "strict":
359
+ return StrictChecker(
360
+ model=self.model,
361
+ evidence_gatherer=self._gather_evidence_for_constraint,
362
+ )
363
+ elif checker_type == "threshold":
364
+ return ThresholdChecker(
365
+ model=self.model,
366
+ evidence_gatherer=self._gather_evidence_for_constraint,
367
+ acceptance_threshold=0.7,
368
+ )
369
+ else:
370
+ raise ValueError(f"Unknown constraint checker type: {checker_type}")
371
+
372
+ def _create_candidate_explorer(self, strategy_type: str):
373
+ """Create the appropriate candidate explorer."""
374
+ if strategy_type == "parallel":
375
+ return ParallelExplorer(
376
+ search_engine=self.search_engine,
377
+ model=self.model,
378
+ max_workers=4,
379
+ )
380
+ elif strategy_type == "adaptive":
381
+ return AdaptiveExplorer(
382
+ search_engine=self.search_engine,
383
+ model=self.model,
384
+ learning_rate=0.1,
385
+ )
386
+ elif strategy_type == "constraint_guided":
387
+ return ConstraintGuidedExplorer(
388
+ search_engine=self.search_engine, model=self.model
389
+ )
390
+ elif strategy_type == "diversity":
391
+ return DiversityExplorer(
392
+ search_engine=self.search_engine,
393
+ model=self.model,
394
+ diversity_factor=0.3,
395
+ )
396
+ else:
397
+ raise ValueError(f"Unknown exploration strategy: {strategy_type}")
398
+
399
+ async def search(
400
+ self,
401
+ query: str,
402
+ search_engines: List[str] = None,
403
+ progress_callback=None,
404
+ **kwargs,
405
+ ) -> Tuple[str, Dict]:
406
+ """
407
+ Execute the modular search strategy.
408
+ """
409
+ try:
410
+ logger.info(f"Starting enhanced modular search for: {query}")
411
+
412
+ # Phase 1: Extract base constraints
413
+ if progress_callback:
414
+ progress_callback(
415
+ {
416
+ "phase": "constraint_analysis",
417
+ "progress": 5,
418
+ "message": "Analyzing query constraints",
419
+ }
420
+ )
421
+
422
+ base_constraints = self.constraint_analyzer.extract_constraints(
423
+ query
424
+ )
425
+ logger.info(f"Extracted {len(base_constraints)} base constraints")
426
+
427
+ # Phase 2: LLM constraint processing (if enabled)
428
+ all_search_queries = []
429
+ if self.llm_constraint_processing and self.llm_processor:
430
+ if progress_callback:
431
+ progress_callback(
432
+ {
433
+ "phase": "llm_processing",
434
+ "progress": 15,
435
+ "message": "LLM processing constraints intelligently",
436
+ }
437
+ )
438
+
439
+ logger.info("🤖 LLM CONSTRAINT PROCESSING ACTIVATED")
440
+ # LLM decomposition and combination
441
+ decomposed = await self.llm_processor.decompose_constraints_intelligently(
442
+ base_constraints
443
+ )
444
+
445
+ # Pass existing base constraint queries to avoid duplication
446
+ existing_queries = [c.description for c in base_constraints]
447
+ logger.info("📋 BASE CONSTRAINT QUERIES:")
448
+ for i, base_query in enumerate(existing_queries, 1):
449
+ logger.info(f" BASE-{i:02d}: {base_query}")
450
+
451
+ intelligent_combinations = (
452
+ await self.llm_processor.generate_intelligent_combinations(
453
+ decomposed, existing_queries, query
454
+ )
455
+ )
456
+
457
+ logger.info("🧠 LLM-GENERATED INTELLIGENT QUERIES:")
458
+ logger.info("### START_LLM_QUERIES ###") # Grep-friendly marker
459
+ for i, llm_query in enumerate(intelligent_combinations, 1):
460
+ logger.info(f" LLM-{i:02d}: {llm_query}")
461
+ logger.info("### END_LLM_QUERIES ###") # Grep-friendly marker
462
+
463
+ # OPTIMIZATION: Start with original query, then use LLM-generated targeted queries
464
+ # This ensures we search for the exact question first, then explore variations
465
+ all_search_queries = (
466
+ [query] + intelligent_combinations
467
+ ) # Original query first, then LLM combinations
468
+ logger.info(
469
+ f"🎯 Using original query + {len(intelligent_combinations)} targeted LLM search combinations (skipping broad base constraints)"
470
+ )
471
+ logger.info(
472
+ f"📊 Optimized search strategies: {len(all_search_queries)} (original + {len(intelligent_combinations)} LLM queries)"
473
+ )
474
+ else:
475
+ logger.warning(
476
+ "⚠️ LLM constraint processing is DISABLED - falling back to basic searches"
477
+ )
478
+ # Fallback to basic constraint searches
479
+ all_search_queries = [c.description for c in base_constraints]
480
+
481
+ # Phase 3: Enhanced candidate exploration with immediate evaluation
482
+ if progress_callback:
483
+ progress_callback(
484
+ {
485
+ "phase": "candidate_exploration",
486
+ "progress": 25,
487
+ "message": f"🔍 Exploring with {len(all_search_queries)} enhanced search strategies",
488
+ }
489
+ )
490
+
491
+ all_candidates = []
492
+ high_confidence_count = 0
493
+ search_progress = 30
494
+
495
+ # DECOUPLED APPROACH: Separate search execution from candidate evaluation
496
+ candidate_evaluation_queue = asyncio.Queue()
497
+ evaluation_results = []
498
+ rejected_candidates = [] # Store rejected candidates for potential recovery
499
+
500
+ # Execute searches in parallel batches with decoupled evaluation
501
+ batch_size = 8 # Optimized for parallel execution
502
+ logger.info(
503
+ f"🚀 Starting enhanced exploration with {len(all_search_queries)} search queries (8 concurrent, decoupled evaluation)"
504
+ )
505
+
506
+ # Start background candidate evaluation task
507
+ evaluation_task = asyncio.create_task(
508
+ self._background_candidate_evaluation(
509
+ candidate_evaluation_queue,
510
+ base_constraints,
511
+ evaluation_results,
512
+ query,
513
+ rejected_candidates,
514
+ )
515
+ )
516
+
517
+ for i in range(0, len(all_search_queries), batch_size):
518
+ batch = all_search_queries[i : i + batch_size]
519
+
520
+ if progress_callback:
521
+ progress_callback(
522
+ {
523
+ "phase": "search_batch",
524
+ "progress": search_progress,
525
+ "message": f"🔍 Executing search batch {i // batch_size + 1}",
526
+ }
527
+ )
528
+
529
+ logger.info(
530
+ f"📦 Processing batch {i // batch_size + 1}: {batch}"
531
+ )
532
+
533
+ # Execute batch searches in parallel using ThreadPoolExecutor
534
+ batch_results = []
535
+ with ThreadPoolExecutor(max_workers=8) as executor:
536
+ # Submit all searches in the batch concurrently
537
+ future_to_query = {
538
+ executor.submit(
539
+ self.candidate_explorer._execute_search, query
540
+ ): query
541
+ for query in batch
542
+ }
543
+
544
+ # Collect results as they complete
545
+ for future in as_completed(future_to_query):
546
+ query = future_to_query[future]
547
+ try:
548
+ result = future.result()
549
+ batch_results.append(result)
550
+ except Exception as e:
551
+ logger.error(
552
+ f"❌ Parallel search failed for '{query[:30]}...': {e}"
553
+ )
554
+ batch_results.append(e)
555
+
556
+ # CRITICAL: Yield control to allow background evaluation task to run
557
+ await asyncio.sleep(0)
558
+
559
+ # Process batch results - QUEUE CANDIDATES FOR BACKGROUND EVALUATION
560
+ for j, result in enumerate(batch_results):
561
+ if isinstance(result, Exception):
562
+ logger.error(f"❌ Search failed: {batch[j]} - {result}")
563
+ continue
564
+
565
+ candidates = self.candidate_explorer._extract_candidates_from_results(
566
+ result, original_query=query
567
+ )
568
+
569
+ logger.info(
570
+ f"🎯 Found {len(candidates)} candidates from query: '{batch[j][:50]}...'"
571
+ )
572
+
573
+ # QUEUE CANDIDATES for background evaluation (non-blocking)
574
+ for candidate in candidates:
575
+ await candidate_evaluation_queue.put(candidate)
576
+
577
+ # Progress tracking without blocking on evaluation
578
+ total_candidates = sum(
579
+ len(
580
+ self.candidate_explorer._extract_candidates_from_results(
581
+ result, original_query=query
582
+ )
583
+ )
584
+ for result in batch_results
585
+ if not isinstance(result, Exception)
586
+ )
587
+
588
+ logger.info(
589
+ f"📦 Batch {i // batch_size + 1}: queued {total_candidates} candidates for evaluation"
590
+ )
591
+
592
+ # CRITICAL: Yield control after each batch to allow background evaluation
593
+ await asyncio.sleep(
594
+ 0.1
595
+ ) # Small delay to let background task process
596
+
597
+ search_progress = min(search_progress + 10, 75)
598
+
599
+ # Signal completion to background evaluation and wait for final results
600
+ await candidate_evaluation_queue.put(
601
+ None
602
+ ) # Sentinel to signal completion
603
+
604
+ # Wait for background evaluation to complete
605
+ try:
606
+ await asyncio.wait_for(
607
+ evaluation_task, timeout=30.0
608
+ ) # 30s timeout
609
+ except asyncio.TimeoutError:
610
+ logger.warning(
611
+ "⚠️ Background evaluation timed out, using partial results"
612
+ )
613
+ evaluation_task.cancel()
614
+
615
+ # Collect all evaluated candidates
616
+ all_candidates = [
617
+ result for result in evaluation_results if result is not None
618
+ ]
619
+
620
+ logger.info(
621
+ f"🏁 Search completed: {len(all_candidates)} total candidates, {high_confidence_count} high-confidence"
622
+ )
623
+
624
+ # Phase 4: Final candidate evaluation (if immediate evaluation was disabled)
625
+ evaluated_candidates = all_candidates
626
+ if not self.immediate_evaluation:
627
+ if progress_callback:
628
+ progress_callback(
629
+ {
630
+ "phase": "candidate_evaluation",
631
+ "progress": 80,
632
+ "message": f"🔍 Evaluating {len(all_candidates)} candidates",
633
+ }
634
+ )
635
+
636
+ evaluated_candidates = []
637
+ for candidate in all_candidates[:20]: # Limit to top 20
638
+ try:
639
+ result = self.constraint_checker.check_candidate(
640
+ candidate, base_constraints
641
+ )
642
+ candidate.evaluation_results = result.detailed_results
643
+ candidate.score = result.total_score
644
+ candidate.should_reject = result.should_reject
645
+
646
+ if not result.should_reject:
647
+ evaluated_candidates.append(candidate)
648
+
649
+ except Exception as e:
650
+ logger.error(
651
+ f"💥 Error evaluating candidate {candidate.name}: {e}"
652
+ )
653
+ continue
654
+
655
+ # Phase 5: Select best candidate
656
+ if progress_callback:
657
+ progress_callback(
658
+ {
659
+ "phase": "result_selection",
660
+ "progress": 90,
661
+ "message": "🏆 Selecting best result",
662
+ }
663
+ )
664
+
665
+ if not evaluated_candidates:
666
+ # Check all candidates including rejected ones
667
+ all_scored_candidates = []
668
+
669
+ # Add all candidates with scores
670
+ for c in all_candidates:
671
+ if hasattr(c, "score") and c.score > 0:
672
+ all_scored_candidates.append(c)
673
+
674
+ # Add rejected candidates with scores
675
+ for c in rejected_candidates:
676
+ if hasattr(c, "score") and c.score > 0:
677
+ all_scored_candidates.append(c)
678
+
679
+ if all_scored_candidates:
680
+ # Sort by score
681
+ all_scored_candidates.sort(
682
+ key=lambda x: x.score, reverse=True
683
+ )
684
+ best_candidate = all_scored_candidates[0]
685
+
686
+ # Accept if score is above minimum threshold (20%)
687
+ if best_candidate.score >= 0.20:
688
+ logger.info(
689
+ f"🎯 Accepting best available candidate (recovered from rejected): {best_candidate.name} with score {best_candidate.score:.2%}"
690
+ )
691
+ evaluated_candidates = [best_candidate]
692
+ else:
693
+ logger.warning(
694
+ f"❌ Best candidate {best_candidate.name} has too low score: {best_candidate.score:.2%}"
695
+ )
696
+
697
+ if not evaluated_candidates:
698
+ logger.warning(
699
+ "❌ No valid candidates found after evaluation"
700
+ )
701
+ return "No valid candidates found after evaluation", {
702
+ "strategy": "enhanced_modular",
703
+ "constraint_checker": self.constraint_checker_type,
704
+ "exploration_strategy": self.exploration_strategy,
705
+ "early_rejection": self.early_rejection,
706
+ "llm_processing": self.llm_constraint_processing,
707
+ "total_searches": len(all_search_queries),
708
+ "candidates_found": len(all_candidates),
709
+ "candidates_valid": 0,
710
+ "high_confidence_count": high_confidence_count,
711
+ }
712
+
713
+ # Sort by score and select best
714
+ evaluated_candidates.sort(
715
+ key=lambda x: getattr(x, "score", 0), reverse=True
716
+ )
717
+ best_candidate = evaluated_candidates[0]
718
+
719
+ logger.info(
720
+ f"🏆 Best candidate: {best_candidate.name} with score {getattr(best_candidate, 'score', 0):.2%}"
721
+ )
722
+
723
+ # Phase 6: Generate final answer
724
+ if progress_callback:
725
+ progress_callback(
726
+ {
727
+ "phase": "final_answer",
728
+ "progress": 95,
729
+ "message": "📝 Generating final answer",
730
+ }
731
+ )
732
+
733
+ answer = await self._generate_final_answer(
734
+ query, best_candidate, base_constraints
735
+ )
736
+
737
+ # Search Query Analysis Summary for easy analysis
738
+ logger.info("=" * 80)
739
+ logger.info("🔍 SEARCH QUERY ANALYSIS SUMMARY")
740
+ logger.info("=" * 80)
741
+ logger.info(
742
+ f"📊 TOTAL QUERIES GENERATED: {len(all_search_queries)}"
743
+ )
744
+ logger.info(
745
+ f"📋 BASE CONSTRAINT QUERIES: {len(existing_queries) if 'existing_queries' in locals() else 0}"
746
+ )
747
+ logger.info(
748
+ f"🧠 LLM INTELLIGENT QUERIES: {len(intelligent_combinations) if 'intelligent_combinations' in locals() else 0}"
749
+ )
750
+
751
+ if (
752
+ "intelligent_combinations" in locals()
753
+ and intelligent_combinations
754
+ ):
755
+ logger.info("\n🎯 SAMPLE LLM-GENERATED QUERIES (first 10):")
756
+ for i, query in enumerate(intelligent_combinations[:10], 1):
757
+ logger.info(f" SAMPLE-{i:02d}: {query}")
758
+
759
+ logger.info("=" * 80)
760
+
761
+ metadata = {
762
+ "strategy": "enhanced_modular",
763
+ "constraint_checker": self.constraint_checker_type,
764
+ "exploration_strategy": self.exploration_strategy,
765
+ "early_rejection_enabled": self.early_rejection,
766
+ "early_stopping_enabled": self.early_stopping,
767
+ "llm_processing_enabled": self.llm_constraint_processing,
768
+ "immediate_evaluation_enabled": self.immediate_evaluation,
769
+ "total_searches_generated": len(all_search_queries),
770
+ "candidates_found": len(all_candidates),
771
+ "candidates_evaluated": len(evaluated_candidates),
772
+ "high_confidence_count": high_confidence_count,
773
+ "best_candidate": best_candidate.name,
774
+ "best_score": getattr(best_candidate, "score", 0),
775
+ }
776
+
777
+ return answer, metadata
778
+
779
+ except Exception as e:
780
+ logger.error(f"💥 Error in enhanced modular search: {e}")
781
+ import traceback
782
+
783
+ logger.error(f"🔍 Traceback: {traceback.format_exc()}")
784
+ return f"Search failed: {str(e)}", {"error": str(e)}
785
+
786
+ async def _generate_final_answer(
787
+ self, query: str, best_candidate, constraints
788
+ ) -> str:
789
+ """Generate the final answer using the best candidate."""
790
+
791
+ constraint_info = "\n".join(
792
+ [f"- {c.description} (weight: {c.weight})" for c in constraints]
793
+ )
794
+
795
+ evaluation_info = ""
796
+ if hasattr(best_candidate, "evaluation_results"):
797
+ evaluation_info = "\n".join(
798
+ [
799
+ f"- {result.get('constraint', 'Unknown')}: {result.get('score', 0):.0%}"
800
+ for result in best_candidate.evaluation_results
801
+ ]
802
+ )
803
+
804
+ prompt = f"""Based on the search results, provide a comprehensive answer to: {query}
805
+
806
+ Best candidate found: {best_candidate.name}
807
+ Score: {best_candidate.score:.0%}
808
+
809
+ Constraints analyzed:
810
+ {constraint_info}
811
+
812
+ Constraint evaluation results:
813
+ {evaluation_info}
814
+
815
+ Evidence summary: {getattr(best_candidate, "summary", "No summary available")}
816
+
817
+ Provide a clear, factual answer that addresses the original question and explains how the candidate satisfies the constraints."""
818
+
819
+ response = await self.model.ainvoke(prompt)
820
+ return response.content
821
+
822
+ def _gather_evidence_for_constraint(self, candidate, constraint):
823
+ """Gather evidence for a constraint using actual search with caching."""
824
+ # Check cache first
825
+ cache = get_search_cache()
826
+ cache_key = normalize_entity_query(candidate.name, constraint.value)
827
+
828
+ cached_results = cache.get(cache_key, "modular_strategy")
829
+ if cached_results is not None:
830
+ logger.debug(
831
+ f"Using cached evidence for {candidate.name} - {constraint.value[:30]}..."
832
+ )
833
+ return cached_results
834
+
835
+ try:
836
+ # Build search query intelligently based on constraint type
837
+ query_parts = []
838
+
839
+ # Add candidate name
840
+ query_parts.append(f'"{candidate.name}"')
841
+
842
+ # Parse constraint value for key terms
843
+ constraint_value = constraint.value
844
+
845
+ # Remove common prefixes
846
+ prefixes_to_remove = [
847
+ "The individual is associated with",
848
+ "The answer must be",
849
+ "The character must be",
850
+ "The entity must be",
851
+ "Must be",
852
+ "Should be",
853
+ "Is",
854
+ ]
855
+
856
+ for prefix in prefixes_to_remove:
857
+ if constraint_value.startswith(prefix):
858
+ constraint_value = constraint_value[len(prefix) :].strip()
859
+ break
860
+
861
+ # Handle different constraint types
862
+ if constraint.type.value == "TEMPORAL":
863
+ # For temporal constraints, extract years/dates and search specifically
864
+ import re
865
+
866
+ years = re.findall(r"\b(19\d{2}|20\d{2})\b", constraint_value)
867
+ decades = re.findall(
868
+ r"\b(19\d{2}s|20\d{2}s)\b", constraint_value
869
+ )
870
+
871
+ if years:
872
+ for year in years:
873
+ query_parts.append(year)
874
+ elif decades:
875
+ for decade in decades:
876
+ query_parts.append(decade)
877
+ else:
878
+ query_parts.append(constraint_value)
879
+
880
+ elif constraint.type.value == "PROPERTY":
881
+ # For properties, focus on the specific characteristic
882
+ query_parts.append(constraint_value)
883
+
884
+ elif constraint.type.value == "STATISTIC":
885
+ # For statistics, include numbers and comparisons
886
+ query_parts.append(constraint_value)
887
+
888
+ else:
889
+ # Default: use the constraint value as-is
890
+ query_parts.append(constraint_value)
891
+
892
+ # Build final query
893
+ query = " ".join(query_parts)
894
+ logger.debug(f"Evidence search query: {query}")
895
+
896
+ # Execute search using the appropriate method
897
+ results = None
898
+
899
+ # Try different search methods based on what's available
900
+ if hasattr(self.search_engine, "run"):
901
+ results = self.search_engine.run(query)
902
+ elif hasattr(self.search_engine, "search"):
903
+ results = self.search_engine.search(query)
904
+ elif callable(self.search_engine):
905
+ results = self.search_engine(query)
906
+ else:
907
+ logger.error(
908
+ f"Search engine has no callable method: {type(self.search_engine)}"
909
+ )
910
+ return []
911
+
912
+ # Process results
913
+ evidence = []
914
+
915
+ # Handle different result formats
916
+ if results is None:
917
+ logger.warning("Search returned None")
918
+ return []
919
+
920
+ if isinstance(results, list):
921
+ result_list = results
922
+ elif isinstance(results, dict):
923
+ # Try common keys for results
924
+ result_list = (
925
+ results.get("results")
926
+ or results.get("items")
927
+ or results.get("data")
928
+ or []
929
+ )
930
+ else:
931
+ logger.warning(f"Unknown search result type: {type(results)}")
932
+ result_list = []
933
+
934
+ # Extract top evidence (limit to 5 for better quality)
935
+ for i, result in enumerate(result_list[:5]):
936
+ if isinstance(result, dict):
937
+ # Extract text content
938
+ text = (
939
+ result.get("snippet")
940
+ or result.get("content")
941
+ or result.get("description")
942
+ or result.get("text")
943
+ or ""
944
+ )
945
+
946
+ # Extract source information
947
+ source = (
948
+ result.get("url")
949
+ or result.get("link")
950
+ or result.get("source")
951
+ or f"search_result_{i + 1}"
952
+ )
953
+
954
+ # Extract title
955
+ title = result.get("title", "")
956
+
957
+ # Calculate confidence based on result position and content
958
+ base_confidence = 0.8 - (i * 0.1) # Decay by position
959
+
960
+ # Boost confidence if key terms are present
961
+ if candidate.name.lower() in text.lower():
962
+ base_confidence += 0.1
963
+ if any(
964
+ term.lower() in text.lower()
965
+ for term in constraint_value.split()
966
+ ):
967
+ base_confidence += 0.1
968
+
969
+ confidence = min(base_confidence, 0.95)
970
+
971
+ evidence.append(
972
+ {
973
+ "text": text[:500], # Limit text length
974
+ "source": source,
975
+ "confidence": confidence,
976
+ "title": title,
977
+ "full_text": text, # Keep full text for detailed analysis
978
+ }
979
+ )
980
+ else:
981
+ # Handle string results
982
+ evidence.append(
983
+ {
984
+ "text": str(result)[:500],
985
+ "source": f"search_result_{i + 1}",
986
+ "confidence": 0.6,
987
+ "title": "",
988
+ }
989
+ )
990
+
991
+ logger.debug(
992
+ f"Gathered {len(evidence)} evidence items for {candidate.name} - {constraint.value[:50]}..."
993
+ )
994
+
995
+ # Cache the results for future use
996
+ cache.put(
997
+ cache_key, evidence, "modular_strategy", ttl=1800
998
+ ) # 30 minutes TTL
999
+
1000
+ return evidence
1001
+
1002
+ except Exception as e:
1003
+ logger.error(f"Error gathering evidence: {e}", exc_info=True)
1004
+ # Return empty list instead of mock evidence
1005
+ return []
1006
+
1007
+ async def _background_candidate_evaluation(
1008
+ self,
1009
+ queue,
1010
+ constraints,
1011
+ results,
1012
+ original_query=None,
1013
+ rejected_candidates=None,
1014
+ ):
1015
+ """Background task to evaluate candidates without blocking search progress."""
1016
+ logger.info("🔄 Started background candidate evaluation")
1017
+
1018
+ # Use provided rejected_candidates list or create new one
1019
+ if rejected_candidates is None:
1020
+ rejected_candidates = []
1021
+
1022
+ while True:
1023
+ try:
1024
+ # Get candidate from queue
1025
+ candidate = await queue.get()
1026
+
1027
+ # Check for completion sentinel
1028
+ if candidate is None:
1029
+ logger.info("🏁 Background evaluation completed")
1030
+ break
1031
+
1032
+ # Evaluate candidate with LLM pre-screening
1033
+ try:
1034
+ # Always do full constraint evaluation to get scores
1035
+ result = self.constraint_checker.check_candidate(
1036
+ candidate, constraints, original_query=original_query
1037
+ )
1038
+ candidate.evaluation_results = result.detailed_results
1039
+ candidate.score = result.total_score
1040
+ candidate.should_reject = result.should_reject
1041
+
1042
+ # Now check early rejection AFTER we have a score
1043
+ if self.early_rejection_manager:
1044
+ confidence = await self.early_rejection_manager.quick_confidence_check(
1045
+ candidate, constraints
1046
+ )
1047
+
1048
+ should_reject, reason = (
1049
+ self.early_rejection_manager.should_reject_early(
1050
+ confidence
1051
+ )
1052
+ )
1053
+ if (
1054
+ should_reject and candidate.score < 0.5
1055
+ ): # Only early reject if score is also low
1056
+ logger.debug(
1057
+ f"⚡ Early rejected {candidate.name}: {reason} (score: {candidate.score:.2%})"
1058
+ )
1059
+ # Store the candidate anyway for potential best candidate recovery
1060
+ rejected_candidates.append(candidate)
1061
+ continue
1062
+
1063
+ if not result.should_reject:
1064
+ results.append(candidate)
1065
+ logger.info(
1066
+ f"✅ Accepted: {candidate.name} (score: {result.total_score:.2%})"
1067
+ )
1068
+
1069
+ # Check for excellent candidates
1070
+ if result.total_score > 0.9:
1071
+ logger.info(
1072
+ f"🏆 EXCELLENT: {candidate.name} with {result.total_score:.1%} score"
1073
+ )
1074
+ else:
1075
+ # Store rejected candidates with scores for potential recovery
1076
+ rejected_candidates.append(candidate)
1077
+ logger.debug(
1078
+ f"❌ Rejected: {candidate.name} (score: {candidate.score:.2%})"
1079
+ )
1080
+
1081
+ except Exception as e:
1082
+ logger.error(f"💥 Error evaluating {candidate.name}: {e}")
1083
+
1084
+ except Exception as e:
1085
+ logger.error(f"💥 Background evaluation error: {e}")
1086
+
1087
+ def analyze_topic(self, query: str) -> Dict:
1088
+ """
1089
+ Analyze a topic using the modular strategy.
1090
+
1091
+ This is the main entry point that implements the BaseSearchStrategy interface.
1092
+ """
1093
+ try:
1094
+ # Run the search asynchronously
1095
+ import asyncio
1096
+
1097
+ # Create a new event loop if none exists or if the current loop is running
1098
+ try:
1099
+ loop = asyncio.get_event_loop()
1100
+ if loop.is_running():
1101
+ # If we're already in an async context, run in a new thread
1102
+ import concurrent.futures
1103
+
1104
+ with concurrent.futures.ThreadPoolExecutor() as executor:
1105
+ future = executor.submit(
1106
+ lambda: asyncio.run(self.search(query))
1107
+ )
1108
+ answer, metadata = future.result()
1109
+ else:
1110
+ # If not in async context, run directly
1111
+ answer, metadata = loop.run_until_complete(
1112
+ self.search(query)
1113
+ )
1114
+ except RuntimeError:
1115
+ # No event loop, create one
1116
+ answer, metadata = asyncio.run(self.search(query))
1117
+
1118
+ return {
1119
+ "findings": [{"content": answer}],
1120
+ "iterations": 1,
1121
+ "final_answer": answer,
1122
+ "current_knowledge": answer,
1123
+ "metadata": metadata,
1124
+ "links": getattr(self, "all_links_of_system", []),
1125
+ "questions_by_iteration": getattr(
1126
+ self, "questions_by_iteration", []
1127
+ ),
1128
+ }
1129
+
1130
+ except Exception as e:
1131
+ logger.error(f"Error in analyze_topic: {e}")
1132
+ import traceback
1133
+
1134
+ logger.error(f"Traceback: {traceback.format_exc()}")
1135
+ return {
1136
+ "findings": [],
1137
+ "iterations": 0,
1138
+ "final_answer": f"Analysis failed: {str(e)}",
1139
+ "metadata": {"error": str(e)},
1140
+ "links": [],
1141
+ "questions_by_iteration": [],
1142
+ }