local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1031 @@
1
+ """
2
+ BrowseComp Entity-Focused Search Strategy
3
+
4
+ This strategy is specifically designed for BrowseComp questions that require finding
5
+ specific entities (companies, people, events) that match multiple constraints.
6
+
7
+ Key features:
8
+ 1. Entity extraction and progressive search
9
+ 2. Knowledge graph building approach
10
+ 3. Multi-constraint verification with caching
11
+ 4. Specialized search patterns for different entity types
12
+ """
13
+
14
+ import asyncio
15
+ import json
16
+ import re
17
+ from collections import defaultdict
18
+ from dataclasses import dataclass
19
+ from typing import Dict, List, Tuple
20
+
21
+ from loguru import logger
22
+
23
+ from ...utilities.search_cache import get_search_cache
24
+ from ..candidate_exploration import ConstraintGuidedExplorer
25
+ from ..constraint_checking import DualConfidenceChecker
26
+ from ..constraints import Constraint, ConstraintAnalyzer
27
+ from ..questions import BrowseCompQuestionGenerator
28
+ from .base_strategy import BaseSearchStrategy
29
+
30
+
31
+ @dataclass
32
+ class EntityCandidate:
33
+ """Enhanced candidate with entity-specific metadata."""
34
+
35
+ name: str
36
+ entity_type: str # company, person, event, etc.
37
+ aliases: List[str] = None
38
+ properties: Dict[str, any] = None
39
+ sources: List[str] = None
40
+ confidence: float = 0.0
41
+ constraint_matches: Dict[str, float] = None
42
+
43
+ def __post_init__(self):
44
+ self.aliases = self.aliases or []
45
+ self.properties = self.properties or {}
46
+ self.sources = self.sources or []
47
+ self.constraint_matches = self.constraint_matches or {}
48
+
49
+
50
+ class EntityKnowledgeGraph:
51
+ """Build and maintain knowledge about discovered entities."""
52
+
53
+ def __init__(self):
54
+ self.entities = {} # name -> EntityCandidate
55
+ self.constraint_evidence = defaultdict(
56
+ dict
57
+ ) # constraint -> entity -> evidence
58
+ self.search_cache = {} # query -> results
59
+
60
+ def add_entity(self, entity: EntityCandidate):
61
+ """Add or update an entity in the knowledge graph."""
62
+ if entity.name in self.entities:
63
+ # Merge information
64
+ existing = self.entities[entity.name]
65
+ existing.aliases.extend(entity.aliases)
66
+ existing.aliases = list(set(existing.aliases))
67
+ existing.properties.update(entity.properties)
68
+ existing.sources.extend(entity.sources)
69
+ existing.sources = list(set(existing.sources))
70
+ existing.constraint_matches.update(entity.constraint_matches)
71
+ else:
72
+ self.entities[entity.name] = entity
73
+
74
+ def add_constraint_evidence(
75
+ self, constraint: str, entity_name: str, evidence: Dict
76
+ ):
77
+ """Add evidence for a constraint-entity pair."""
78
+ self.constraint_evidence[constraint][entity_name] = evidence
79
+
80
+ def get_entities_by_constraint(
81
+ self, constraint: str, min_confidence: float = 0.5
82
+ ) -> List[EntityCandidate]:
83
+ """Get entities that match a constraint above confidence threshold."""
84
+ matching = []
85
+ for entity in self.entities.values():
86
+ if constraint in entity.constraint_matches:
87
+ if entity.constraint_matches[constraint] >= min_confidence:
88
+ matching.append(entity)
89
+ return sorted(
90
+ matching,
91
+ key=lambda e: e.constraint_matches.get(constraint, 0),
92
+ reverse=True,
93
+ )
94
+
95
+
96
+ class BrowseCompEntityStrategy(BaseSearchStrategy):
97
+ """
98
+ Entity-focused search strategy for BrowseComp questions.
99
+
100
+ This strategy:
101
+ 1. Extracts key entities from the query
102
+ 2. Performs broad entity discovery searches
103
+ 3. Builds a knowledge graph of candidates
104
+ 4. Progressively verifies constraints
105
+ 5. Uses caching to avoid redundant searches
106
+ """
107
+
108
+ def __init__(
109
+ self, model=None, search=None, all_links_of_system=None, **kwargs
110
+ ):
111
+ super().__init__(all_links_of_system=all_links_of_system)
112
+
113
+ # Store model and search engine
114
+ self.model = model
115
+ self.search_engine = search
116
+
117
+ # Initialize components that depend on model/search
118
+ if self.model:
119
+ self.constraint_analyzer = ConstraintAnalyzer(model=self.model)
120
+ self.question_generator = BrowseCompQuestionGenerator()
121
+ else:
122
+ logger.warning("No model provided to BrowseCompEntityStrategy")
123
+
124
+ self.knowledge_graph = EntityKnowledgeGraph()
125
+
126
+ # Initialize constraint checker with entity-aware settings
127
+ if self.model:
128
+ self.constraint_checker = DualConfidenceChecker(
129
+ evidence_gatherer=self._gather_entity_evidence,
130
+ negative_threshold=0.3, # More lenient for entities
131
+ positive_threshold=0.4,
132
+ uncertainty_penalty=0.1,
133
+ negative_weight=1.0,
134
+ )
135
+
136
+ # Initialize specialized explorer
137
+ if self.search_engine and self.model:
138
+ self.explorer = ConstraintGuidedExplorer(
139
+ search_engine=self.search_engine, model=self.model
140
+ )
141
+
142
+ # Entity type patterns
143
+ self.entity_patterns = {
144
+ "company": [
145
+ "company",
146
+ "corporation",
147
+ "group",
148
+ "firm",
149
+ "business",
150
+ "conglomerate",
151
+ ],
152
+ "person": ["person", "individual", "character", "figure", "people"],
153
+ "event": [
154
+ "event",
155
+ "incident",
156
+ "occurrence",
157
+ "game",
158
+ "match",
159
+ "competition",
160
+ ],
161
+ "location": [
162
+ "place",
163
+ "location",
164
+ "city",
165
+ "country",
166
+ "region",
167
+ "area",
168
+ ],
169
+ "product": ["product", "item", "device", "software", "app", "tool"],
170
+ }
171
+
172
+ async def search(
173
+ self,
174
+ query: str,
175
+ search_engines: List[str] = None,
176
+ progress_callback=None,
177
+ **kwargs,
178
+ ) -> Tuple[str, Dict]:
179
+ """Execute entity-focused search strategy."""
180
+ try:
181
+ logger.info(f"🎯 Starting BrowseComp Entity Search for: {query}")
182
+
183
+ # Phase 1: Constraint and entity analysis
184
+ if progress_callback:
185
+ progress_callback(
186
+ {
187
+ "phase": "entity_analysis",
188
+ "progress": 10,
189
+ "message": "Analyzing query for entities and constraints",
190
+ }
191
+ )
192
+
193
+ constraints = self.constraint_analyzer.extract_constraints(query)
194
+ entity_type = self._identify_entity_type(query)
195
+ logger.info(
196
+ f"Identified entity type: {entity_type}, {len(constraints)} constraints"
197
+ )
198
+
199
+ # Phase 2: Initial entity discovery
200
+ if progress_callback:
201
+ progress_callback(
202
+ {
203
+ "phase": "entity_discovery",
204
+ "progress": 25,
205
+ "message": f"Searching for {entity_type} entities",
206
+ }
207
+ )
208
+
209
+ initial_entities = await self._discover_entities(
210
+ query,
211
+ entity_type,
212
+ constraints[:2], # Use first 2 constraints for initial search
213
+ )
214
+ logger.info(f"Discovered {len(initial_entities)} initial entities")
215
+
216
+ # Phase 3: Progressive constraint verification
217
+ best_candidate = None
218
+ iteration = 0
219
+ max_iterations = 10
220
+
221
+ while iteration < max_iterations:
222
+ iteration += 1
223
+
224
+ if progress_callback:
225
+ progress_callback(
226
+ {
227
+ "phase": "constraint_verification",
228
+ "progress": 25 + (iteration * 50 / max_iterations),
229
+ "message": f"Verifying constraints (iteration {iteration}/{max_iterations})",
230
+ }
231
+ )
232
+
233
+ # Generate targeted searches based on current knowledge
234
+ questions = self.question_generator.generate_questions(
235
+ current_knowledge=self._summarize_knowledge(),
236
+ query=query,
237
+ questions_per_iteration=5,
238
+ iteration=iteration,
239
+ )
240
+
241
+ # Search for evidence
242
+ new_entities = await self._search_with_questions(
243
+ questions, entity_type
244
+ )
245
+
246
+ # Add to knowledge graph
247
+ for entity in new_entities:
248
+ self.knowledge_graph.add_entity(entity)
249
+
250
+ # Evaluate all entities against constraints
251
+ evaluated = await self._evaluate_entities(constraints)
252
+
253
+ # Check for high-confidence matches
254
+ if evaluated:
255
+ best_candidate = evaluated[0]
256
+ if best_candidate.confidence > 0.8:
257
+ logger.info(
258
+ f"✅ Found high-confidence match: {best_candidate.name} ({best_candidate.confidence:.2%})"
259
+ )
260
+ break
261
+
262
+ # Early stopping if no progress
263
+ if iteration > 3 and not self.knowledge_graph.entities:
264
+ logger.warning(
265
+ "No entities found after 3 iterations, stopping"
266
+ )
267
+ break
268
+
269
+ # Phase 4: Generate final answer
270
+ if progress_callback:
271
+ progress_callback(
272
+ {
273
+ "phase": "answer_generation",
274
+ "progress": 90,
275
+ "message": "Generating final answer",
276
+ }
277
+ )
278
+
279
+ if best_candidate and best_candidate.confidence > 0.5:
280
+ answer = await self._generate_entity_answer(
281
+ query, best_candidate, constraints
282
+ )
283
+ else:
284
+ answer = await self._generate_uncertain_answer(
285
+ query, evaluated[:3] if evaluated else []
286
+ )
287
+
288
+ # Prepare metadata
289
+ metadata = {
290
+ "strategy": "browsecomp_entity",
291
+ "entity_type": entity_type,
292
+ "entities_discovered": len(self.knowledge_graph.entities),
293
+ "iterations": iteration,
294
+ "best_candidate": best_candidate.name
295
+ if best_candidate
296
+ else None,
297
+ "confidence": best_candidate.confidence
298
+ if best_candidate
299
+ else 0.0,
300
+ "constraint_count": len(constraints),
301
+ "cached_searches": len(self.knowledge_graph.search_cache),
302
+ }
303
+
304
+ return answer, metadata
305
+
306
+ except Exception as e:
307
+ logger.error(
308
+ f"Error in BrowseComp entity search: {e}", exc_info=True
309
+ )
310
+ return f"Search failed: {str(e)}", {"error": str(e)}
311
+
312
+ def _identify_entity_type(self, query: str) -> str:
313
+ """Identify what type of entity we're looking for."""
314
+ query_lower = query.lower()
315
+
316
+ for entity_type, keywords in self.entity_patterns.items():
317
+ if any(keyword in query_lower for keyword in keywords):
318
+ return entity_type
319
+
320
+ # Default based on common patterns
321
+ if "who" in query_lower:
322
+ return "person"
323
+ elif "which" in query_lower:
324
+ return "product"
325
+ elif "what" in query_lower and "company" in query_lower:
326
+ return "company"
327
+ else:
328
+ return "entity"
329
+
330
+ async def _discover_entities(
331
+ self,
332
+ query: str,
333
+ entity_type: str,
334
+ initial_constraints: List[Constraint],
335
+ ) -> List[EntityCandidate]:
336
+ """Discover initial entity candidates."""
337
+ entities = []
338
+
339
+ # Generate entity-focused search queries
340
+ search_queries = self._generate_entity_searches(
341
+ entity_type, initial_constraints
342
+ )
343
+
344
+ # Execute searches in parallel
345
+ search_tasks = []
346
+ for search_query in search_queries[:5]: # Limit initial searches
347
+ if search_query not in self.knowledge_graph.search_cache:
348
+ search_tasks.append(self._cached_search(search_query))
349
+
350
+ results = await asyncio.gather(*search_tasks)
351
+
352
+ # Extract entities from results
353
+ for query_results in results:
354
+ extracted = await self._extract_entities_from_results(
355
+ query_results, entity_type
356
+ )
357
+ entities.extend(extracted)
358
+
359
+ return entities
360
+
361
+ def _generate_entity_searches(
362
+ self, entity_type: str, constraints: List[Constraint]
363
+ ) -> List[str]:
364
+ """Generate search queries for entity discovery."""
365
+ searches = []
366
+
367
+ # Type-specific base queries
368
+ if entity_type == "company":
369
+ searches.extend(
370
+ [
371
+ "largest companies conglomerates groups",
372
+ "major corporation multinational business",
373
+ "company group founded",
374
+ ]
375
+ )
376
+ elif entity_type == "person":
377
+ searches.extend(
378
+ [
379
+ "famous people individuals",
380
+ "notable person character",
381
+ "who known for",
382
+ ]
383
+ )
384
+ elif entity_type == "event":
385
+ searches.extend(
386
+ [
387
+ "major events competitions",
388
+ "historical event game match",
389
+ "significant occurrence",
390
+ ]
391
+ )
392
+
393
+ # Add constraint-based searches
394
+ for constraint in constraints:
395
+ if constraint.type.value == "TEMPORAL":
396
+ # Extract years/dates
397
+ years = re.findall(r"\b(19\d{2}|20\d{2})\b", constraint.value)
398
+ for year in years:
399
+ searches.append(f"{entity_type} {year}")
400
+ elif constraint.type.value == "LOCATION":
401
+ # Extract location names
402
+ locations = re.findall(
403
+ r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", constraint.value
404
+ )
405
+ for location in locations:
406
+ searches.append(f"{entity_type} {location}")
407
+ elif constraint.type.value == "STATISTIC":
408
+ # Extract numbers
409
+ numbers = re.findall(r"\b\d+\b", constraint.value)
410
+ for number in numbers:
411
+ searches.append(f"{entity_type} {number}")
412
+
413
+ return searches
414
+
415
+ async def _extract_entities_from_results(
416
+ self, results: List[Dict], entity_type: str
417
+ ) -> List[EntityCandidate]:
418
+ """Extract entity candidates from search results."""
419
+ if not results:
420
+ return []
421
+
422
+ # Use LLM to extract entities
423
+ results_text = "\n".join(
424
+ [
425
+ f"- {r.get('title', '')}: {r.get('snippet', '')[:200]}"
426
+ for r in results[:10]
427
+ ]
428
+ )
429
+
430
+ prompt = f"""Extract {entity_type} entities from these search results.
431
+
432
+ Search Results:
433
+ {results_text}
434
+
435
+ For each entity found, provide:
436
+ 1. Name (official/full name)
437
+ 2. Aliases (other names, abbreviations)
438
+ 3. Key properties (founding year, location, size, etc.)
439
+
440
+ Format as JSON:
441
+ [
442
+ {{
443
+ "name": "Entity Name",
444
+ "aliases": ["alias1", "alias2"],
445
+ "properties": {{"key": "value"}}
446
+ }}
447
+ ]
448
+
449
+ Return only entities that are clearly {entity_type} entities."""
450
+
451
+ response = await self.model.ainvoke(prompt)
452
+
453
+ try:
454
+ entities_data = json.loads(response.content)
455
+ entities = []
456
+
457
+ for data in entities_data:
458
+ entity = EntityCandidate(
459
+ name=data["name"],
460
+ entity_type=entity_type,
461
+ aliases=data.get("aliases", []),
462
+ properties=data.get("properties", {}),
463
+ sources=[r.get("url", "") for r in results[:3]],
464
+ )
465
+ entities.append(entity)
466
+
467
+ return entities
468
+
469
+ except json.JSONDecodeError:
470
+ logger.warning("Failed to parse entity extraction response")
471
+ return []
472
+
473
+ async def _search_with_questions(
474
+ self, questions: List[str], entity_type: str
475
+ ) -> List[EntityCandidate]:
476
+ """Search using generated questions and extract entities."""
477
+ all_entities = []
478
+
479
+ # Execute searches
480
+ search_tasks = []
481
+ for question in questions:
482
+ if question not in self.knowledge_graph.search_cache:
483
+ search_tasks.append(self._cached_search(question))
484
+
485
+ results = await asyncio.gather(*search_tasks)
486
+
487
+ # Extract entities
488
+ for query_results in results:
489
+ entities = await self._extract_entities_from_results(
490
+ query_results, entity_type
491
+ )
492
+ all_entities.extend(entities)
493
+
494
+ return all_entities
495
+
496
+ async def _evaluate_entities(
497
+ self, constraints: List[Constraint]
498
+ ) -> List[EntityCandidate]:
499
+ """Evaluate all entities against constraints."""
500
+ evaluated = []
501
+
502
+ for entity_name, entity in self.knowledge_graph.entities.items():
503
+ # Check each constraint
504
+ total_score = 0.0
505
+ constraint_scores = {}
506
+
507
+ for constraint in constraints:
508
+ # Check if we already have evidence for this constraint-entity pair
509
+ if constraint.value in self.knowledge_graph.constraint_evidence:
510
+ if (
511
+ entity_name
512
+ in self.knowledge_graph.constraint_evidence[
513
+ constraint.value
514
+ ]
515
+ ):
516
+ evidence = self.knowledge_graph.constraint_evidence[
517
+ constraint.value
518
+ ][entity_name]
519
+ score = evidence.get("score", 0.0)
520
+ else:
521
+ # Gather new evidence
522
+ score = await self._verify_entity_constraint(
523
+ entity, constraint
524
+ )
525
+ else:
526
+ score = await self._verify_entity_constraint(
527
+ entity, constraint
528
+ )
529
+
530
+ constraint_scores[constraint.value] = score
531
+ total_score += score * constraint.weight
532
+
533
+ # Update entity with scores
534
+ entity.constraint_matches = constraint_scores
535
+ entity.confidence = total_score / sum(c.weight for c in constraints)
536
+
537
+ if entity.confidence > 0.3: # Only keep reasonable candidates
538
+ evaluated.append(entity)
539
+
540
+ # Sort by confidence
541
+ return sorted(evaluated, key=lambda e: e.confidence, reverse=True)
542
+
543
+ async def _verify_entity_constraint(
544
+ self, entity: EntityCandidate, constraint: Constraint
545
+ ) -> float:
546
+ """Verify if an entity satisfies a constraint."""
547
+ # Build targeted search query
548
+ search_terms = [entity.name] + entity.aliases[:2]
549
+ constraint_terms = self._extract_constraint_terms(constraint)
550
+
551
+ best_score = 0.0
552
+ for term in search_terms:
553
+ query = f'"{term}" {" ".join(constraint_terms)}'
554
+
555
+ # Search for evidence
556
+ results = await self._cached_search(query)
557
+
558
+ if results:
559
+ # Quick verification with LLM
560
+ evidence_text = " ".join(
561
+ [r.get("snippet", "") for r in results[:3]]
562
+ )
563
+
564
+ prompt = f"""Does {entity.name} satisfy this constraint?
565
+
566
+ Constraint: {constraint.description}
567
+ Evidence: {evidence_text}
568
+
569
+ Answer with a confidence score from 0.0 to 1.0 and brief explanation.
570
+ Format: SCORE: X.X | REASON: explanation"""
571
+
572
+ response = await self.model.ainvoke(prompt)
573
+ content = response.content
574
+
575
+ # Extract score
576
+ score_match = re.search(r"SCORE:\s*([\d.]+)", content)
577
+ if score_match:
578
+ score = float(score_match.group(1))
579
+ best_score = max(best_score, score)
580
+
581
+ # Cache the evidence
582
+ self.knowledge_graph.add_constraint_evidence(
583
+ constraint.value,
584
+ entity.name,
585
+ {
586
+ "score": score,
587
+ "evidence": evidence_text,
588
+ "reason": content,
589
+ },
590
+ )
591
+
592
+ return best_score
593
+
594
+ def _extract_constraint_terms(self, constraint: Constraint) -> List[str]:
595
+ """Extract searchable terms from a constraint."""
596
+ terms = []
597
+
598
+ # Remove common prefixes
599
+ value = constraint.value
600
+ for prefix in ["The answer must", "Must be", "Should be", "Is"]:
601
+ if value.startswith(prefix):
602
+ value = value[len(prefix) :].strip()
603
+ break
604
+
605
+ # Extract specific terms based on constraint type
606
+ if constraint.type.value == "TEMPORAL":
607
+ # Extract years
608
+ terms.extend(re.findall(r"\b(19\d{2}|20\d{2})\b", value))
609
+ elif constraint.type.value == "STATISTIC":
610
+ # Extract numbers
611
+ terms.extend(re.findall(r"\b\d+\b", value))
612
+ elif constraint.type.value == "LOCATION":
613
+ # Extract proper nouns
614
+ terms.extend(
615
+ re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", value)
616
+ )
617
+
618
+ # Add key descriptive words
619
+ words = value.split()
620
+ for word in words:
621
+ if len(word) > 4 and word.lower() not in [
622
+ "must",
623
+ "should",
624
+ "would",
625
+ "could",
626
+ ]:
627
+ terms.append(word)
628
+
629
+ return terms[:5] # Limit to avoid overly long queries
630
+
631
+ def extract_entity_candidates(
632
+ self, constraints: List[Constraint]
633
+ ) -> List[str]:
634
+ """
635
+ Extract potential entity names using constraint analysis.
636
+ Implements progressive entity discovery from improvement strategy.
637
+ """
638
+ candidates = []
639
+
640
+ for constraint in constraints:
641
+ # Look for proper nouns (likely entity names)
642
+ proper_nouns = re.findall(
643
+ r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", constraint.value
644
+ )
645
+ candidates.extend(proper_nouns)
646
+
647
+ # Look for company name patterns
648
+ company_patterns = [
649
+ r"([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:Group|Inc|Ltd|Corp|Company|Corporation)",
650
+ r"([A-Z][a-z]+(?:-[A-Z][a-z]+)*)\s+(?:Group|Inc|Ltd|Corp)",
651
+ r"([A-Z]{2,}(?:-[A-Z]{2,})*)", # Acronyms like PRAN-RFL
652
+ ]
653
+
654
+ for pattern in company_patterns:
655
+ matches = re.findall(pattern, constraint.value)
656
+ candidates.extend(matches)
657
+
658
+ # Remove duplicates and sort by specificity
659
+ unique_candidates = list(set(candidates))
660
+ return sorted(
661
+ unique_candidates, key=self.entity_specificity, reverse=True
662
+ )
663
+
664
+ def entity_specificity(self, entity: str) -> float:
665
+ """
666
+ Score entity specificity for search prioritization.
667
+ Higher scores = more specific entities to search first.
668
+ """
669
+ score = 0.0
670
+
671
+ # Longer names are typically more specific
672
+ score += len(entity) * 0.1
673
+
674
+ # Multiple words indicate more specificity
675
+ word_count = len(entity.split())
676
+ score += word_count * 2.0
677
+
678
+ # Company suffixes indicate high specificity
679
+ company_suffixes = [
680
+ "Group",
681
+ "Inc",
682
+ "Ltd",
683
+ "Corp",
684
+ "Corporation",
685
+ "Company",
686
+ "Conglomerate",
687
+ ]
688
+ if any(suffix in entity for suffix in company_suffixes):
689
+ score += 10.0
690
+
691
+ # Hyphenated names (like PRAN-RFL) are often specific
692
+ if "-" in entity:
693
+ score += 5.0
694
+
695
+ # All caps acronyms are specific
696
+ if entity.isupper() and len(entity) >= 3:
697
+ score += 8.0
698
+
699
+ return score
700
+
701
+ def _gather_entity_evidence(self, candidate, constraint):
702
+ """Evidence gatherer function for constraint checker."""
703
+ # Convert to EntityCandidate if needed
704
+ if not isinstance(candidate, EntityCandidate):
705
+ entity = EntityCandidate(
706
+ name=candidate.name
707
+ if hasattr(candidate, "name")
708
+ else str(candidate),
709
+ entity_type="unknown",
710
+ )
711
+ else:
712
+ entity = candidate
713
+
714
+ # Check cache first
715
+ if constraint.value in self.knowledge_graph.constraint_evidence:
716
+ if (
717
+ entity.name
718
+ in self.knowledge_graph.constraint_evidence[constraint.value]
719
+ ):
720
+ evidence_data = self.knowledge_graph.constraint_evidence[
721
+ constraint.value
722
+ ][entity.name]
723
+ return [
724
+ {
725
+ "text": evidence_data.get("evidence", ""),
726
+ "source": "cache",
727
+ "confidence": evidence_data.get("score", 0.5),
728
+ }
729
+ ]
730
+
731
+ # Generate search query
732
+ constraint_terms = self._extract_constraint_terms(constraint)
733
+ query = f'"{entity.name}" {" ".join(constraint_terms)}'
734
+
735
+ # Search
736
+ results = (
737
+ self.search_engine.run(query)
738
+ if hasattr(self.search_engine, "run")
739
+ else []
740
+ )
741
+
742
+ # Convert to evidence format
743
+ evidence = []
744
+ for i, result in enumerate(results[:3]):
745
+ evidence.append(
746
+ {
747
+ "text": result.get("snippet", ""),
748
+ "source": result.get("url", f"result_{i}"),
749
+ "confidence": 0.7 - (i * 0.1),
750
+ }
751
+ )
752
+
753
+ return evidence
754
+
755
+ def _summarize_knowledge(self) -> str:
756
+ """Summarize current knowledge for question generation."""
757
+ summary_parts = []
758
+
759
+ # Top entities by confidence
760
+ entities_by_confidence = sorted(
761
+ self.knowledge_graph.entities.values(),
762
+ key=lambda e: e.confidence,
763
+ reverse=True,
764
+ )[:5]
765
+
766
+ if entities_by_confidence:
767
+ summary_parts.append("Top candidates found:")
768
+ for entity in entities_by_confidence:
769
+ summary_parts.append(
770
+ f"- {entity.name} ({entity.entity_type}): {entity.confidence:.2%} confidence"
771
+ )
772
+ if entity.properties:
773
+ props = ", ".join(
774
+ f"{k}={v}"
775
+ for k, v in list(entity.properties.items())[:3]
776
+ )
777
+ summary_parts.append(f" Properties: {props}")
778
+
779
+ # Constraint satisfaction summary
780
+ if self.knowledge_graph.constraint_evidence:
781
+ summary_parts.append("\nConstraint verification status:")
782
+ for constraint, entities in list(
783
+ self.knowledge_graph.constraint_evidence.items()
784
+ )[:3]:
785
+ summary_parts.append(f"- {constraint[:50]}...")
786
+ for entity_name, evidence in list(entities.items())[:2]:
787
+ score = evidence.get("score", 0)
788
+ summary_parts.append(f" {entity_name}: {score:.2%}")
789
+
790
+ return "\n".join(summary_parts)
791
+
792
+ async def _cached_search(self, query: str) -> List[Dict]:
793
+ """Perform search with caching support."""
794
+ cache = get_search_cache()
795
+
796
+ # Check cache first
797
+ cached_results = cache.get(query, "browsecomp_entity")
798
+ if cached_results is not None:
799
+ logger.debug(f"Using cached search results for: {query[:50]}...")
800
+ return cached_results
801
+
802
+ # Perform actual search
803
+ try:
804
+ if hasattr(self.search_engine, "run"):
805
+ results = self.search_engine.run(query)
806
+ elif hasattr(self.search_engine, "search"):
807
+ results = self.search_engine.search(query)
808
+ elif callable(self.search_engine):
809
+ results = self.search_engine(query)
810
+ else:
811
+ logger.warning("Search engine has no callable method")
812
+ return []
813
+
814
+ # Normalize results format
815
+ if isinstance(results, list):
816
+ normalized_results = results
817
+ elif isinstance(results, dict):
818
+ normalized_results = results.get("results", [])
819
+ else:
820
+ normalized_results = []
821
+
822
+ # Cache the results
823
+ cache.put(
824
+ query, normalized_results, "browsecomp_entity", ttl=1800
825
+ ) # 30 minutes
826
+
827
+ logger.debug(f"Cached new search results for: {query[:50]}...")
828
+ return normalized_results
829
+
830
+ except Exception as e:
831
+ logger.error(f"Search failed for query '{query}': {e}")
832
+ return []
833
+
834
+ async def _generate_entity_answer(
835
+ self,
836
+ query: str,
837
+ best_entity: EntityCandidate,
838
+ constraints: List[Constraint],
839
+ ) -> str:
840
+ """Generate answer for the best matching entity."""
841
+ constraint_details = []
842
+ for constraint in constraints:
843
+ score = best_entity.constraint_matches.get(constraint.value, 0)
844
+ constraint_details.append(
845
+ f"- {constraint.description}: {score:.2%} confidence"
846
+ )
847
+
848
+ prompt = f"""Based on the search results, provide the answer to: {query}
849
+
850
+ Best matching {best_entity.entity_type}: {best_entity.name}
851
+ Overall confidence: {best_entity.confidence:.2%}
852
+
853
+ Aliases/Other names: {", ".join(best_entity.aliases[:3]) if best_entity.aliases else "None found"}
854
+
855
+ Properties:
856
+ {json.dumps(best_entity.properties, indent=2) if best_entity.properties else "No properties found"}
857
+
858
+ Constraint satisfaction:
859
+ {chr(10).join(constraint_details)}
860
+
861
+ Provide a clear, confident answer that explains why this entity matches the constraints."""
862
+
863
+ response = await self.model.ainvoke(prompt)
864
+ return response.content
865
+
866
+ async def _generate_uncertain_answer(
867
+ self, query: str, top_entities: List[EntityCandidate]
868
+ ) -> str:
869
+ """Generate answer when no high-confidence match is found."""
870
+ if not top_entities:
871
+ return "Unable to find any entities matching the specified constraints."
872
+
873
+ candidates_info = []
874
+ for entity in top_entities:
875
+ candidates_info.append(
876
+ f"- {entity.name}: {entity.confidence:.2%} confidence"
877
+ )
878
+
879
+ prompt = f"""Based on the search results for: {query}
880
+
881
+ Found these potential matches but with low confidence:
882
+ {chr(10).join(candidates_info)}
883
+
884
+ The search was unable to find a definitive answer matching all constraints.
885
+
886
+ Provide a helpful response explaining what was found and why no definitive answer could be determined."""
887
+
888
+ response = await self.model.ainvoke(prompt)
889
+ return response.content
890
+
891
+ def analyze_topic(self, query: str) -> Dict:
892
+ """
893
+ Analyze a topic using entity-focused BrowseComp approach.
894
+
895
+ Args:
896
+ query: The research query to analyze
897
+
898
+ Returns:
899
+ Dict containing findings, iterations, and questions
900
+ """
901
+ import asyncio
902
+
903
+ try:
904
+ # Run the async method in a new event loop if needed
905
+ try:
906
+ loop = asyncio.get_event_loop()
907
+ if loop.is_running():
908
+ # If loop is already running, create a new task
909
+ import concurrent.futures
910
+
911
+ with concurrent.futures.ThreadPoolExecutor() as executor:
912
+ future = executor.submit(
913
+ asyncio.run, self._analyze_topic_async(query)
914
+ )
915
+ return future.result()
916
+ else:
917
+ return loop.run_until_complete(
918
+ self._analyze_topic_async(query)
919
+ )
920
+ except RuntimeError:
921
+ # No event loop running, create new one
922
+ return asyncio.run(self._analyze_topic_async(query))
923
+
924
+ except Exception as e:
925
+ logger.error(f"Error in analyze_topic: {e}")
926
+ return {
927
+ "findings": [f"Error analyzing query: {str(e)}"],
928
+ "iterations": 0,
929
+ "questions": {},
930
+ "entities_found": 0,
931
+ "confidence": 0.0,
932
+ }
933
+
934
+ async def _analyze_topic_async(self, query: str) -> Dict:
935
+ """Async implementation of topic analysis."""
936
+ try:
937
+ self._update_progress("Starting entity-focused analysis...", 0)
938
+
939
+ # Parse constraints from query
940
+ constraint_analyzer = ConstraintAnalyzer()
941
+ constraints = constraint_analyzer.analyze_query(query)
942
+
943
+ self._update_progress(
944
+ f"Identified {len(constraints)} constraints", 10
945
+ )
946
+
947
+ # Generate initial search questions
948
+ question_generator = BrowseCompQuestionGenerator()
949
+ initial_questions = question_generator.generate_questions(
950
+ query, constraints
951
+ )
952
+
953
+ self._update_progress("Generated initial questions", 20)
954
+
955
+ # Progressive entity discovery
956
+ all_entities = []
957
+ iteration = 0
958
+ max_iterations = 3
959
+
960
+ while iteration < max_iterations:
961
+ self._update_progress(
962
+ f"Iteration {iteration + 1}: Discovering entities...",
963
+ 30 + iteration * 20,
964
+ )
965
+
966
+ # Search for entities
967
+ entities = await self._discover_entities_from_questions(
968
+ initial_questions, self._determine_entity_type(query)
969
+ )
970
+ all_entities.extend(entities)
971
+
972
+ # Break if we found high-confidence entities
973
+ if any(e.confidence > 0.8 for e in entities):
974
+ logger.info(
975
+ f"Found high-confidence entities in iteration {iteration + 1}"
976
+ )
977
+ break
978
+
979
+ iteration += 1
980
+
981
+ self._update_progress(
982
+ "Evaluating entities against constraints...", 80
983
+ )
984
+
985
+ # Evaluate entities
986
+ evaluated_entities = await self._evaluate_entities(constraints)
987
+
988
+ # Generate final answer
989
+ if evaluated_entities:
990
+ best_entity = max(
991
+ evaluated_entities, key=lambda e: e.confidence
992
+ )
993
+ if best_entity.confidence > 0.6:
994
+ answer = await self._generate_entity_answer(
995
+ query, best_entity, constraints
996
+ )
997
+ else:
998
+ answer = await self._generate_uncertain_answer(
999
+ query, evaluated_entities[:3]
1000
+ )
1001
+ else:
1002
+ answer = (
1003
+ "No entities were found matching the specified constraints."
1004
+ )
1005
+
1006
+ self._update_progress("Analysis complete", 100)
1007
+
1008
+ # Return results in expected format
1009
+ return {
1010
+ "findings": [answer],
1011
+ "iterations": iteration + 1,
1012
+ "questions": {
1013
+ f"iteration_{i}": initial_questions
1014
+ for i in range(iteration + 1)
1015
+ },
1016
+ "entities_found": len(evaluated_entities),
1017
+ "confidence": best_entity.confidence
1018
+ if evaluated_entities
1019
+ else 0.0,
1020
+ "strategy": "browsecomp_entity",
1021
+ }
1022
+
1023
+ except Exception as e:
1024
+ logger.error(f"Error in async topic analysis: {e}")
1025
+ return {
1026
+ "findings": [f"Analysis failed: {str(e)}"],
1027
+ "iterations": 0,
1028
+ "questions": {},
1029
+ "entities_found": 0,
1030
+ "confidence": 0.0,
1031
+ }