local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,522 @@
1
+ """
2
+ Constraint Parallel Strategy for search optimization.
3
+
4
+ Key features:
5
+ 1. Runs separate searches for each constraint in parallel
6
+ 2. Uses entity type detection to focus all searches
7
+ 3. Collects candidates from all constraint-specific searches
8
+ 4. Evaluates candidates using the existing evaluation system
9
+ 5. Early rejection of poor candidates for efficiency
10
+ """
11
+
12
+ import concurrent.futures
13
+ import threading
14
+ import time
15
+ from dataclasses import dataclass, field
16
+ from typing import Dict, List, Optional, Tuple
17
+
18
+ from loguru import logger
19
+
20
+ from ..candidates.base_candidate import Candidate
21
+ from ..constraints.base_constraint import Constraint
22
+ from .dual_confidence_with_rejection import DualConfidenceWithRejectionStrategy
23
+
24
+
25
+ @dataclass
26
+ class ConstraintSearchState:
27
+ """Tracks the state of parallel constraint-specific searches."""
28
+
29
+ all_candidates: List[Candidate] = field(default_factory=list)
30
+ evaluated_candidates: List[Tuple[Candidate, float]] = field(
31
+ default_factory=list
32
+ )
33
+ total_evaluated: int = 0
34
+ start_time: float = field(default_factory=time.time)
35
+ constraint_searches: Dict[str, List[Candidate]] = field(
36
+ default_factory=dict
37
+ )
38
+ candidates_lock: threading.Lock = field(default_factory=threading.Lock)
39
+ stop_search: threading.Event = field(default_factory=threading.Event)
40
+ evaluation_futures: List[concurrent.futures.Future] = field(
41
+ default_factory=list
42
+ )
43
+ entity_type: str = "unknown entity"
44
+
45
+
46
+ class ConstraintParallelStrategy(DualConfidenceWithRejectionStrategy):
47
+ """
48
+ Strategy that runs parallel searches for each constraint independently.
49
+
50
+ Rather than combining constraints in queries, this approach:
51
+ 1. Runs a separate search for each constraint in parallel
52
+ 2. Uses entity type to focus all searches
53
+ 3. Collects candidates from all constraint searches
54
+ 4. Evaluates candidates as they're found
55
+ 5. Identifies candidates that match multiple constraints
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ *args,
61
+ # Concurrent execution settings
62
+ max_workers: int = 10,
63
+ # Candidate targets
64
+ min_good_candidates: int = 3,
65
+ target_candidates: int = 5,
66
+ max_candidates: int = 10,
67
+ # Quality thresholds
68
+ min_score_threshold: float = 0.65,
69
+ exceptional_score: float = 0.95,
70
+ quality_plateau_threshold: float = 0.1,
71
+ # Time and resource limits
72
+ max_search_time: float = 30.0,
73
+ max_evaluations: int = 30,
74
+ # Search behavior
75
+ initial_search_timeout: float = 5.0,
76
+ **kwargs,
77
+ ):
78
+ super().__init__(*args, **kwargs)
79
+
80
+ # Thread pool for concurrent operations
81
+ self.search_executor = concurrent.futures.ThreadPoolExecutor(
82
+ max_workers=max_workers
83
+ )
84
+ self.evaluation_executor = concurrent.futures.ThreadPoolExecutor(
85
+ max_workers=max_workers
86
+ )
87
+
88
+ # Candidate thresholds
89
+ self.min_good_candidates = min_good_candidates
90
+ self.target_candidates = target_candidates
91
+ self.max_candidates = max_candidates
92
+
93
+ # Quality settings
94
+ self.min_score_threshold = min_score_threshold
95
+ self.exceptional_score = exceptional_score
96
+ self.quality_plateau_threshold = quality_plateau_threshold
97
+
98
+ # Resource limits
99
+ self.max_search_time = max_search_time
100
+ self.max_evaluations = max_evaluations
101
+ self.initial_search_timeout = initial_search_timeout
102
+
103
+ # Search state
104
+ self.state: Optional[ConstraintSearchState] = None
105
+
106
+ def find_relevant_information(self):
107
+ """Override to use parallel constraint-specific searches."""
108
+ # Initialize state
109
+ self.state = ConstraintSearchState(start_time=time.time())
110
+
111
+ # Detect entity type first to guide all searches
112
+ self.state.entity_type = self._detect_entity_type()
113
+ logger.info(f"Detected entity type: {self.state.entity_type}")
114
+
115
+ if self.progress_callback:
116
+ self.progress_callback(
117
+ f"Starting parallel searches for {self.state.entity_type}",
118
+ 10,
119
+ {
120
+ "phase": "entity_detection",
121
+ "entity_type": self.state.entity_type,
122
+ },
123
+ )
124
+
125
+ # Start parallel constraint searches with concurrent evaluation
126
+ try:
127
+ self._run_parallel_constraint_searches()
128
+ finally:
129
+ # Clean up thread pools
130
+ self.search_executor.shutdown(wait=False)
131
+ self.evaluation_executor.shutdown(wait=False)
132
+
133
+ # Return best candidates
134
+ self.candidates = [
135
+ c
136
+ for c, _ in sorted(
137
+ self.state.evaluated_candidates,
138
+ key=lambda x: x[1],
139
+ reverse=True,
140
+ )[: self.max_candidates]
141
+ ]
142
+
143
+ logger.info(
144
+ f"Found {len(self.candidates)} candidates after parallel constraint searches"
145
+ )
146
+
147
+ def _detect_entity_type(self) -> str:
148
+ """Use LLM to detect what type of entity we're searching for."""
149
+ # Build context from constraints
150
+ constraint_text = "\n".join(
151
+ [f"- {c.value}" for c in self.constraint_ranking]
152
+ )
153
+
154
+ prompt = f"""
155
+ Analyze these search constraints and determine what type of entity is being searched for:
156
+
157
+ Constraints:
158
+ {constraint_text}
159
+
160
+ What is the primary entity type being searched for? Be specific.
161
+
162
+ Examples of entity types (but you can choose any appropriate type):
163
+ - fictional character
164
+ - TV show
165
+ - movie
166
+ - actor/actress
167
+ - historical figure
168
+ - company
169
+ - product
170
+ - location
171
+ - event
172
+
173
+ Respond with just the entity type.
174
+ """
175
+
176
+ try:
177
+ entity_type = self.model.invoke(prompt).content.strip()
178
+ logger.info(f"LLM determined entity type: {entity_type}")
179
+ return entity_type
180
+ except Exception as e:
181
+ logger.error(f"Failed to detect entity type: {e}")
182
+ return "unknown entity"
183
+
184
+ def _run_parallel_constraint_searches(self):
185
+ """Run separate searches for each constraint in parallel."""
186
+ # Submit a search for each constraint
187
+ search_futures = {}
188
+
189
+ for i, constraint in enumerate(self.constraint_ranking):
190
+ if self.state.stop_search.is_set():
191
+ break
192
+
193
+ logger.info(
194
+ f"Scheduling search for constraint {i + 1}/{len(self.constraint_ranking)}: {constraint.value}"
195
+ )
196
+
197
+ # Submit search task
198
+ future = self.search_executor.submit(
199
+ self._run_constraint_search,
200
+ constraint,
201
+ i,
202
+ len(self.constraint_ranking),
203
+ )
204
+ search_futures[future] = constraint
205
+
206
+ # Process results as they complete
207
+ for future in concurrent.futures.as_completed(search_futures):
208
+ if self.state.stop_search.is_set():
209
+ break
210
+
211
+ constraint = search_futures[future]
212
+ try:
213
+ candidates = future.result()
214
+
215
+ # Store results by constraint
216
+ with self.state.candidates_lock:
217
+ self.state.constraint_searches[constraint.id] = candidates
218
+ # Add to overall candidate pool
219
+ self.state.all_candidates.extend(candidates)
220
+
221
+ logger.info(
222
+ f"Constraint '{constraint.value[:30]}...' found {len(candidates)} candidates"
223
+ )
224
+
225
+ # Submit candidates for evaluation
226
+ self._submit_candidates_for_evaluation(candidates)
227
+
228
+ except Exception as e:
229
+ logger.error(
230
+ f"Search failed for constraint {constraint.value}: {e}"
231
+ )
232
+
233
+ # Wait for evaluations to complete
234
+ self._finalize_evaluations()
235
+
236
+ def _run_constraint_search(
237
+ self, constraint: Constraint, index: int, total: int
238
+ ) -> List[Candidate]:
239
+ """Execute search for a specific constraint."""
240
+ try:
241
+ # Build a query combining entity type and constraint
242
+ query = self._build_constraint_query(constraint)
243
+
244
+ if self.progress_callback:
245
+ self.progress_callback(
246
+ f"Searching for constraint {index + 1}/{total}: {constraint.value[:30]}...",
247
+ 20 + int(30 * (index / total)),
248
+ {
249
+ "phase": "constraint_search",
250
+ "constraint_index": index,
251
+ "constraint_total": total,
252
+ "constraint_value": constraint.value,
253
+ },
254
+ )
255
+
256
+ # Execute search
257
+ search_results = self._execute_search(query)
258
+
259
+ # Extract candidates
260
+ candidates = self._extract_relevant_candidates(
261
+ search_results, constraint
262
+ )
263
+
264
+ logger.info(
265
+ f"Found {len(candidates)} candidates for constraint: {constraint.value[:30]}..."
266
+ )
267
+ return candidates
268
+
269
+ except Exception as e:
270
+ logger.error(f"Error in constraint search: {e}", exc_info=True)
271
+ return []
272
+
273
+ def _build_constraint_query(self, constraint: Constraint) -> str:
274
+ """Build a query combining entity type and constraint."""
275
+ # Get entity type
276
+ entity_type = self.state.entity_type
277
+ query_parts = []
278
+
279
+ # Always include entity type if known
280
+ if entity_type and entity_type != "unknown entity":
281
+ # Add entity type as a search term
282
+ if " " in entity_type and not entity_type.startswith('"'):
283
+ query_parts.append(f'"{entity_type}"')
284
+ else:
285
+ query_parts.append(entity_type)
286
+
287
+ # Add constraint
288
+ value = constraint.value
289
+ if " " in value and not value.startswith('"'):
290
+ query_parts.append(f'"{value}"')
291
+ else:
292
+ query_parts.append(value)
293
+
294
+ # Convert constraint to search-friendly terms based on type
295
+ search_terms = constraint.to_search_terms()
296
+ if search_terms and search_terms != value:
297
+ query_parts.append(search_terms)
298
+
299
+ return " ".join(query_parts)
300
+
301
+ def _submit_candidates_for_evaluation(self, candidates: List[Candidate]):
302
+ """Submit candidates for concurrent evaluation."""
303
+ for candidate in candidates:
304
+ if self.state.stop_search.is_set():
305
+ break
306
+
307
+ if self.state.total_evaluated >= self.max_evaluations:
308
+ logger.info("Reached maximum evaluations limit")
309
+ self.state.stop_search.set()
310
+ break
311
+
312
+ # Check if we already evaluated this candidate
313
+ if self._is_candidate_evaluated(candidate):
314
+ continue
315
+
316
+ # Submit for evaluation
317
+ future = self.evaluation_executor.submit(
318
+ self._evaluate_candidate_thread, candidate
319
+ )
320
+ self.state.evaluation_futures.append(future)
321
+ self.state.total_evaluated += 1
322
+
323
+ def _evaluate_candidate_thread(
324
+ self, candidate: Candidate
325
+ ) -> Tuple[Candidate, float]:
326
+ """Evaluate a candidate in a separate thread."""
327
+ try:
328
+ thread_name = threading.current_thread().name
329
+ logger.info(
330
+ f"[{thread_name}] Starting evaluation of {candidate.name}"
331
+ )
332
+
333
+ # FIRST CHECK: Verify that candidate matches the expected entity type
334
+ entity_match_score = self._verify_entity_type_match(candidate)
335
+ if entity_match_score < 0.5: # Threshold for entity type match
336
+ logger.info(
337
+ f"[{thread_name}] ❌ {candidate.name} rejected - Not a {self.state.entity_type} (score: {entity_match_score:.3f})"
338
+ )
339
+ return (candidate, 0.0)
340
+
341
+ # Continue with parent's evaluation with early rejection
342
+ score = self._evaluate_candidate_immediately(candidate)
343
+
344
+ # Log result
345
+ if score >= self.min_score_threshold:
346
+ logger.info(
347
+ f"[{thread_name}] ✓ {candidate.name} passed (score: {score:.3f})"
348
+ )
349
+
350
+ # Add to good candidates
351
+ with self.state.candidates_lock:
352
+ self.state.evaluated_candidates.append((candidate, score))
353
+
354
+ # Check if we should stop
355
+ if self._should_stop_search():
356
+ logger.info("Stopping criteria met after evaluation")
357
+ self.state.stop_search.set()
358
+ else:
359
+ logger.info(
360
+ f"[{thread_name}] ❌ {candidate.name} rejected (score: {score:.3f})"
361
+ )
362
+
363
+ return (candidate, score)
364
+
365
+ except Exception as e:
366
+ logger.error(
367
+ f"Error evaluating {candidate.name}: {e}", exc_info=True
368
+ )
369
+ return (candidate, 0.0)
370
+
371
+ def _verify_entity_type_match(self, candidate: Candidate) -> float:
372
+ """Verify that the candidate matches the expected entity type.
373
+
374
+ Returns:
375
+ float: Score between 0.0 and 1.0 indicating confidence that candidate matches the entity type
376
+ """
377
+ entity_type = self.state.entity_type
378
+ candidate_name = candidate.name
379
+
380
+ # Skip check if entity type is unknown
381
+ if not entity_type or entity_type == "unknown entity":
382
+ return 1.0
383
+
384
+ # Use LLM to verify entity type match
385
+ try:
386
+ prompt = f"""
387
+ Determine whether "{candidate_name}" is a specific {entity_type} or a general category/collection.
388
+
389
+ Rules:
390
+ 1. A specific {entity_type} refers to a single, identifiable instance (e.g., "Mount Rainier" is a specific mountain)
391
+ 2. A general category refers to a group or collection (e.g., "U.S. national parks" is a category, not a specific location)
392
+ 3. Be strict - answer must be a single, concrete {entity_type}
393
+
394
+ Return ONLY a score from 0.0 to 1.0 where:
395
+ - 1.0 = Definitely a specific {entity_type}
396
+ - 0.5 = Unclear or partially matches
397
+ - 0.0 = Definitely NOT a specific {entity_type} (too general or wrong type)
398
+
399
+ Score:
400
+ """
401
+
402
+ response = self.model.invoke(prompt).content.strip()
403
+
404
+ # Extract numeric score from response
405
+ try:
406
+ score = float(response.split()[0].strip())
407
+ # Ensure score is in valid range
408
+ score = max(0.0, min(score, 1.0))
409
+ logger.info(
410
+ f"Entity type check for {candidate_name}: {score:.2f} (entity type: {entity_type})"
411
+ )
412
+ return score
413
+ except (ValueError, IndexError):
414
+ logger.warning(
415
+ f"Could not parse entity type score from: {response}"
416
+ )
417
+ return 0.5 # Default to middle value on parsing error
418
+
419
+ except Exception as e:
420
+ logger.error(
421
+ f"Error verifying entity type for {candidate_name}: {e}"
422
+ )
423
+ return 0.5 # Default to middle value on error
424
+
425
+ def _is_candidate_evaluated(self, candidate: Candidate) -> bool:
426
+ """Check if we already evaluated this candidate."""
427
+ with self.state.candidates_lock:
428
+ return any(
429
+ c.name == candidate.name
430
+ for c, _ in self.state.evaluated_candidates
431
+ )
432
+
433
+ def _should_stop_search(self) -> bool:
434
+ """Determine if we should stop searching based on multiple criteria."""
435
+ # Always respect the stop flag
436
+ if self.state.stop_search.is_set():
437
+ return True
438
+
439
+ num_good = len(self.state.evaluated_candidates)
440
+
441
+ # 1. Maximum candidates reached
442
+ if num_good >= self.max_candidates:
443
+ logger.info(f"Maximum candidates reached ({self.max_candidates})")
444
+ return True
445
+
446
+ # 2. Target reached with good quality
447
+ if num_good >= self.target_candidates:
448
+ avg_score = (
449
+ sum(s for _, s in self.state.evaluated_candidates) / num_good
450
+ )
451
+ if avg_score >= 0.8:
452
+ logger.info(
453
+ f"Target reached with high quality (avg: {avg_score:.3f})"
454
+ )
455
+ return True
456
+
457
+ # 3. Minimum satisfied with exceptional candidates
458
+ if num_good >= self.min_good_candidates:
459
+ top_score = max(s for _, s in self.state.evaluated_candidates)
460
+ if top_score >= self.exceptional_score:
461
+ logger.info(
462
+ f"Exceptional candidate found (score: {top_score:.3f})"
463
+ )
464
+ return True
465
+
466
+ # 4. Time limit reached
467
+ elapsed = time.time() - self.state.start_time
468
+ if elapsed > self.max_search_time:
469
+ logger.info(f"Time limit reached ({elapsed:.1f}s)")
470
+ return True
471
+
472
+ # 5. Too many evaluations
473
+ if self.state.total_evaluated >= self.max_evaluations:
474
+ logger.info(f"Evaluation limit reached ({self.max_evaluations})")
475
+ return True
476
+
477
+ # 6. Quality plateau detection
478
+ if num_good >= 5:
479
+ recent_scores = [s for _, s in self.state.evaluated_candidates[-5:]]
480
+ score_range = max(recent_scores) - min(recent_scores)
481
+ if score_range < self.quality_plateau_threshold:
482
+ logger.info(
483
+ f"Quality plateau detected (range: {score_range:.3f})"
484
+ )
485
+ return True
486
+
487
+ return False
488
+
489
+ def _finalize_evaluations(self):
490
+ """Wait for or cancel remaining evaluations."""
491
+ if self.state.evaluation_futures:
492
+ logger.info(
493
+ f"Finalizing {len(self.state.evaluation_futures)} remaining evaluations"
494
+ )
495
+
496
+ # Give them a short time to complete
497
+ wait_time = min(
498
+ 5.0,
499
+ self.max_search_time - (time.time() - self.state.start_time),
500
+ )
501
+ if wait_time > 0:
502
+ concurrent.futures.wait(
503
+ self.state.evaluation_futures,
504
+ timeout=wait_time,
505
+ return_when=concurrent.futures.FIRST_COMPLETED,
506
+ )
507
+
508
+ # Cancel any still running
509
+ for future in self.state.evaluation_futures:
510
+ if not future.done():
511
+ future.cancel()
512
+
513
+ # Final report
514
+ logger.info(
515
+ f"""
516
+ Search completed:
517
+ - Total evaluated: {self.state.total_evaluated}
518
+ - Good candidates found: {len(self.state.evaluated_candidates)}
519
+ - Time taken: {time.time() - self.state.start_time:.1f}s
520
+ - Constraint searches: {len(self.state.constraint_searches)}
521
+ """
522
+ )