local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,329 @@
1
+ """
2
+ Adaptive candidate explorer implementation.
3
+
4
+ This explorer adapts its search strategy based on the success of different
5
+ approaches and the quality of candidates found.
6
+ """
7
+
8
+ import time
9
+ from collections import defaultdict
10
+ from typing import List, Optional
11
+
12
+ from loguru import logger
13
+
14
+ from ..candidates.base_candidate import Candidate
15
+ from ..constraints.base_constraint import Constraint
16
+ from .base_explorer import (
17
+ BaseCandidateExplorer,
18
+ ExplorationResult,
19
+ ExplorationStrategy,
20
+ )
21
+
22
+
23
+ class AdaptiveExplorer(BaseCandidateExplorer):
24
+ """
25
+ Adaptive candidate explorer that learns from search results.
26
+
27
+ This explorer:
28
+ 1. Tries different search strategies
29
+ 2. Tracks which strategies work best
30
+ 3. Adapts future searches based on success rates
31
+ 4. Focuses effort on the most productive approaches
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ *args,
37
+ initial_strategies: List[str] = None,
38
+ adaptation_threshold: int = 5, # Adapt after this many searches
39
+ **kwargs,
40
+ ):
41
+ """
42
+ Initialize adaptive explorer.
43
+
44
+ Args:
45
+ initial_strategies: Starting search strategies to try
46
+ adaptation_threshold: Number of searches before adapting
47
+ """
48
+ super().__init__(*args, **kwargs)
49
+
50
+ self.initial_strategies = initial_strategies or [
51
+ "direct_search",
52
+ "synonym_expansion",
53
+ "category_exploration",
54
+ "related_terms",
55
+ ]
56
+
57
+ self.adaptation_threshold = adaptation_threshold
58
+
59
+ # Track strategy performance
60
+ self.strategy_stats = defaultdict(
61
+ lambda: {"attempts": 0, "candidates_found": 0, "quality_sum": 0.0}
62
+ )
63
+ self.current_strategy = self.initial_strategies[0]
64
+
65
+ def explore(
66
+ self,
67
+ initial_query: str,
68
+ constraints: Optional[List[Constraint]] = None,
69
+ entity_type: Optional[str] = None,
70
+ ) -> ExplorationResult:
71
+ """Explore candidates using adaptive strategy."""
72
+ start_time = time.time()
73
+ logger.info(f"Starting adaptive exploration for: {initial_query}")
74
+
75
+ all_candidates = []
76
+ exploration_paths = []
77
+ total_searched = 0
78
+
79
+ # Track current strategy performance
80
+ search_count = 0
81
+
82
+ while self._should_continue_exploration(
83
+ start_time, len(all_candidates)
84
+ ):
85
+ # Choose strategy based on current performance
86
+ strategy = self._choose_strategy(search_count)
87
+
88
+ # Generate query using chosen strategy
89
+ query = self._generate_query_with_strategy(
90
+ initial_query, strategy, all_candidates, constraints
91
+ )
92
+
93
+ if not query or query.lower() in self.explored_queries:
94
+ # Try next strategy or stop
95
+ if not self._try_next_strategy():
96
+ break
97
+ continue
98
+
99
+ # Execute search
100
+ logger.info(
101
+ f"Using strategy '{strategy}' for query: {query[:50]}..."
102
+ )
103
+ results = self._execute_search(query)
104
+ candidates = self._extract_candidates_from_results(
105
+ results, entity_type
106
+ )
107
+
108
+ # Track strategy performance
109
+ self._update_strategy_stats(strategy, candidates)
110
+
111
+ # Add results
112
+ all_candidates.extend(candidates)
113
+ total_searched += 1
114
+ search_count += 1
115
+
116
+ exploration_paths.append(
117
+ f"{strategy}: {query} -> {len(candidates)} candidates"
118
+ )
119
+
120
+ # Adapt strategy if threshold reached
121
+ if search_count >= self.adaptation_threshold:
122
+ self._adapt_strategy()
123
+ search_count = 0
124
+
125
+ # Process final results
126
+ unique_candidates = self._deduplicate_candidates(all_candidates)
127
+ ranked_candidates = self._rank_candidates_by_relevance(
128
+ unique_candidates, initial_query
129
+ )
130
+ final_candidates = ranked_candidates[: self.max_candidates]
131
+
132
+ elapsed_time = time.time() - start_time
133
+ logger.info(
134
+ f"Adaptive exploration completed: {len(final_candidates)} candidates in {elapsed_time:.1f}s"
135
+ )
136
+
137
+ return ExplorationResult(
138
+ candidates=final_candidates,
139
+ total_searched=total_searched,
140
+ unique_candidates=len(unique_candidates),
141
+ exploration_paths=exploration_paths,
142
+ metadata={
143
+ "strategy": "adaptive",
144
+ "strategy_stats": dict(self.strategy_stats),
145
+ "final_strategy": self.current_strategy,
146
+ "entity_type": entity_type,
147
+ },
148
+ elapsed_time=elapsed_time,
149
+ strategy_used=ExplorationStrategy.ADAPTIVE,
150
+ )
151
+
152
+ def generate_exploration_queries(
153
+ self,
154
+ base_query: str,
155
+ found_candidates: List[Candidate],
156
+ constraints: Optional[List[Constraint]] = None,
157
+ ) -> List[str]:
158
+ """Generate queries using adaptive approach."""
159
+ queries = []
160
+
161
+ # Generate queries using best performing strategies
162
+ top_strategies = self._get_top_strategies(3)
163
+
164
+ for strategy in top_strategies:
165
+ query = self._generate_query_with_strategy(
166
+ base_query, strategy, found_candidates, constraints
167
+ )
168
+ if query:
169
+ queries.append(query)
170
+
171
+ return queries
172
+
173
+ def _choose_strategy(self, search_count: int) -> str:
174
+ """Choose the best strategy based on current performance."""
175
+ if search_count < self.adaptation_threshold:
176
+ # Use current strategy during initial phase
177
+ return self.current_strategy
178
+
179
+ # Choose best performing strategy
180
+ best_strategies = self._get_top_strategies(1)
181
+ return best_strategies[0] if best_strategies else self.current_strategy
182
+
183
+ def _get_top_strategies(self, n: int) -> List[str]:
184
+ """Get top N performing strategies."""
185
+ if not self.strategy_stats:
186
+ return self.initial_strategies[:n]
187
+
188
+ # Sort by candidates found per attempt
189
+ sorted_strategies = sorted(
190
+ self.strategy_stats.items(),
191
+ key=lambda x: x[1]["candidates_found"] / max(x[1]["attempts"], 1),
192
+ reverse=True,
193
+ )
194
+
195
+ return [strategy for strategy, _ in sorted_strategies[:n]]
196
+
197
+ def _generate_query_with_strategy(
198
+ self,
199
+ base_query: str,
200
+ strategy: str,
201
+ found_candidates: List[Candidate],
202
+ constraints: Optional[List[Constraint]] = None,
203
+ ) -> Optional[str]:
204
+ """Generate a query using specific strategy."""
205
+ try:
206
+ if strategy == "direct_search":
207
+ return self._direct_search_query(base_query)
208
+ elif strategy == "synonym_expansion":
209
+ return self._synonym_expansion_query(base_query)
210
+ elif strategy == "category_exploration":
211
+ return self._category_exploration_query(
212
+ base_query, found_candidates
213
+ )
214
+ elif strategy == "related_terms":
215
+ return self._related_terms_query(base_query, found_candidates)
216
+ elif strategy == "constraint_focused" and constraints:
217
+ return self._constraint_focused_query(base_query, constraints)
218
+ else:
219
+ return self._direct_search_query(base_query)
220
+
221
+ except Exception as e:
222
+ logger.error(
223
+ f"Error generating query with strategy {strategy}: {e}"
224
+ )
225
+ return None
226
+
227
+ def _direct_search_query(self, base_query: str) -> str:
228
+ """Generate direct search variation."""
229
+ variations = [
230
+ f'"{base_query}" examples',
231
+ f"{base_query} list",
232
+ f"{base_query} instances",
233
+ f"types of {base_query}",
234
+ ]
235
+
236
+ # Choose variation not yet explored
237
+ for variation in variations:
238
+ if variation.lower() not in self.explored_queries:
239
+ return variation
240
+
241
+ return base_query
242
+
243
+ def _synonym_expansion_query(self, base_query: str) -> Optional[str]:
244
+ """Generate query with synonym expansion."""
245
+ prompt = f"""
246
+ Generate a search query that means the same as "{base_query}" but uses different words.
247
+ Focus on synonyms and alternative terminology.
248
+
249
+ Query:
250
+ """
251
+
252
+ try:
253
+ response = self.model.invoke(prompt).content.strip()
254
+ return response if response != base_query else None
255
+ except:
256
+ return None
257
+
258
+ def _category_exploration_query(
259
+ self, base_query: str, found_candidates: List[Candidate]
260
+ ) -> Optional[str]:
261
+ """Generate query exploring categories of found candidates."""
262
+ if not found_candidates:
263
+ return f"categories of {base_query}"
264
+
265
+ sample_names = [c.name for c in found_candidates[:3]]
266
+ return f"similar to {', '.join(sample_names)}"
267
+
268
+ def _related_terms_query(
269
+ self, base_query: str, found_candidates: List[Candidate]
270
+ ) -> Optional[str]:
271
+ """Generate query using related terms."""
272
+ prompt = f"""
273
+ Given the search topic "{base_query}", suggest a related search term that would find similar but different examples.
274
+
275
+ Related search term:
276
+ """
277
+
278
+ try:
279
+ response = self.model.invoke(prompt).content.strip()
280
+ return response if response != base_query else None
281
+ except:
282
+ return None
283
+
284
+ def _constraint_focused_query(
285
+ self, base_query: str, constraints: List[Constraint]
286
+ ) -> Optional[str]:
287
+ """Generate query focused on a specific constraint."""
288
+ if not constraints:
289
+ return None
290
+
291
+ # Pick least explored constraint
292
+ constraint = constraints[0] # Simple selection
293
+ return f"{base_query} {constraint.value}"
294
+
295
+ def _update_strategy_stats(
296
+ self, strategy: str, candidates: List[Candidate]
297
+ ):
298
+ """Update performance statistics for a strategy."""
299
+ self.strategy_stats[strategy]["attempts"] += 1
300
+ self.strategy_stats[strategy]["candidates_found"] += len(candidates)
301
+
302
+ # Simple quality assessment (could be more sophisticated)
303
+ quality = len(candidates) * 0.1 # Basic quality based on quantity
304
+ self.strategy_stats[strategy]["quality_sum"] += quality
305
+
306
+ def _adapt_strategy(self):
307
+ """Adapt current strategy based on performance."""
308
+ best_strategies = self._get_top_strategies(1)
309
+ if best_strategies and best_strategies[0] != self.current_strategy:
310
+ old_strategy = self.current_strategy
311
+ self.current_strategy = best_strategies[0]
312
+ logger.info(
313
+ f"Adapted strategy from '{old_strategy}' to '{self.current_strategy}'"
314
+ )
315
+
316
+ def _try_next_strategy(self) -> bool:
317
+ """Try the next available strategy."""
318
+ current_index = (
319
+ self.initial_strategies.index(self.current_strategy)
320
+ if self.current_strategy in self.initial_strategies
321
+ else 0
322
+ )
323
+ next_index = (current_index + 1) % len(self.initial_strategies)
324
+
325
+ if next_index == 0: # We've tried all strategies
326
+ return False
327
+
328
+ self.current_strategy = self.initial_strategies[next_index]
329
+ return True
@@ -0,0 +1,341 @@
1
+ """
2
+ Base candidate explorer for inheritance-based exploration system.
3
+
4
+ This module provides the base interface and common functionality for
5
+ candidate exploration implementations.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ from typing import Dict, List, Optional, Set
12
+
13
+ from langchain_core.language_models import BaseChatModel
14
+ from loguru import logger
15
+
16
+ from ..candidates.base_candidate import Candidate
17
+ from ..constraints.base_constraint import Constraint
18
+
19
+
20
+ class ExplorationStrategy(Enum):
21
+ """Different exploration strategies."""
22
+
23
+ BREADTH_FIRST = "breadth_first" # Explore widely first
24
+ DEPTH_FIRST = "depth_first" # Deep dive into promising areas
25
+ CONSTRAINT_GUIDED = "constraint_guided" # Let constraints guide exploration
26
+ DIVERSITY_FOCUSED = "diversity_focused" # Maximize candidate diversity
27
+ ADAPTIVE = "adaptive" # Adapt based on findings
28
+
29
+
30
+ @dataclass
31
+ class ExplorationResult:
32
+ """Result of candidate exploration."""
33
+
34
+ candidates: List[Candidate]
35
+ total_searched: int
36
+ unique_candidates: int
37
+ exploration_paths: List[str]
38
+ metadata: Dict
39
+ elapsed_time: float
40
+ strategy_used: ExplorationStrategy
41
+
42
+
43
+ class BaseCandidateExplorer(ABC):
44
+ """
45
+ Base class for candidate exploration implementations.
46
+
47
+ This provides the common interface and shared functionality that
48
+ all candidate explorers should implement.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ model: BaseChatModel,
54
+ search_engine,
55
+ max_candidates: int = 50,
56
+ max_search_time: float = 60.0,
57
+ **kwargs,
58
+ ):
59
+ """
60
+ Initialize the base candidate explorer.
61
+
62
+ Args:
63
+ model: Language model for analysis
64
+ search_engine: Search engine for finding candidates
65
+ max_candidates: Maximum number of candidates to find
66
+ max_search_time: Maximum time to spend searching
67
+ **kwargs: Additional parameters for specific implementations
68
+ """
69
+ self.model = model
70
+ self.search_engine = search_engine
71
+ self.max_candidates = max_candidates
72
+ self.max_search_time = max_search_time
73
+
74
+ # Tracking
75
+ self.explored_queries: Set[str] = set()
76
+ self.found_candidates: Dict[str, Candidate] = {}
77
+
78
+ @abstractmethod
79
+ def explore(
80
+ self,
81
+ initial_query: str,
82
+ constraints: Optional[List[Constraint]] = None,
83
+ entity_type: Optional[str] = None,
84
+ ) -> ExplorationResult:
85
+ """
86
+ Explore and discover candidates.
87
+
88
+ Args:
89
+ initial_query: Starting query for exploration
90
+ constraints: Optional constraints to guide exploration
91
+ entity_type: Optional entity type to focus on
92
+
93
+ Returns:
94
+ ExplorationResult: Complete exploration results
95
+ """
96
+ pass
97
+
98
+ @abstractmethod
99
+ def generate_exploration_queries(
100
+ self,
101
+ base_query: str,
102
+ found_candidates: List[Candidate],
103
+ constraints: Optional[List[Constraint]] = None,
104
+ ) -> List[str]:
105
+ """
106
+ Generate new queries for continued exploration.
107
+
108
+ Args:
109
+ base_query: Original base query
110
+ found_candidates: Candidates found so far
111
+ constraints: Optional constraints to consider
112
+
113
+ Returns:
114
+ List[str]: New queries to explore
115
+ """
116
+ pass
117
+
118
+ def _execute_search(self, query: str) -> Dict:
119
+ """Execute a search query."""
120
+ try:
121
+ # Mark query as explored
122
+ self.explored_queries.add(query.lower())
123
+
124
+ # Execute search
125
+ results = self.search_engine.run(query)
126
+
127
+ # Handle different result formats
128
+ if isinstance(results, list):
129
+ # If results is a list, wrap it in the expected format
130
+ formatted_results = {"results": results, "query": query}
131
+ logger.info(
132
+ f"Search '{query[:50]}...' returned {len(results)} results"
133
+ )
134
+ return formatted_results
135
+ elif isinstance(results, dict):
136
+ # If results is already a dict, use it as is
137
+ result_count = len(results.get("results", []))
138
+ logger.info(
139
+ f"Search '{query[:50]}...' returned {result_count} results"
140
+ )
141
+ return results
142
+ else:
143
+ # Unknown format, return empty
144
+ logger.warning(f"Unknown search result format: {type(results)}")
145
+ return {"results": [], "query": query}
146
+
147
+ except Exception as e:
148
+ logger.error(f"Error executing search '{query}': {e}")
149
+ return {"results": []}
150
+
151
+ def _extract_candidates_from_results(
152
+ self,
153
+ results: Dict,
154
+ original_query: str = None,
155
+ entity_type: Optional[str] = None,
156
+ ) -> List[Candidate]:
157
+ """Generate answer candidates directly from search results using LLM."""
158
+ candidates = []
159
+
160
+ # Collect all search result content
161
+ all_content = []
162
+ for result in results.get("results", []):
163
+ title = result.get("title", "")
164
+ snippet = result.get("snippet", "")
165
+ if title or snippet:
166
+ all_content.append(f"Title: {title}\nContent: {snippet}")
167
+
168
+ if not all_content or not original_query:
169
+ return candidates
170
+
171
+ # Generate answer candidates using LLM
172
+ answer_candidates = self._generate_answer_candidates(
173
+ original_query,
174
+ "\n\n".join(all_content[:10]), # Limit to first 10 results
175
+ )
176
+
177
+ for answer in answer_candidates:
178
+ if answer and answer not in self.found_candidates:
179
+ candidate = Candidate(
180
+ name=answer,
181
+ metadata={
182
+ "source": "llm_answer_generation",
183
+ "query": results.get("query", ""),
184
+ "original_query": original_query,
185
+ "result_count": len(results.get("results", [])),
186
+ },
187
+ )
188
+ candidates.append(candidate)
189
+ self.found_candidates[answer] = candidate
190
+
191
+ return candidates
192
+
193
+ def _generate_answer_candidates(
194
+ self, question: str, search_content: str
195
+ ) -> List[str]:
196
+ """Generate multiple answer candidates from search results."""
197
+ prompt = f"""
198
+ Question: {question}
199
+
200
+ Based on these search results, provide 3-5 possible answers:
201
+
202
+ {search_content}
203
+
204
+ Give me multiple possible answers, one per line:
205
+ """
206
+
207
+ try:
208
+ response = self.model.invoke(prompt)
209
+ content = response.content.strip()
210
+
211
+ # Parse multiple answers
212
+ answers = []
213
+ for line in content.split("\n"):
214
+ line = line.strip()
215
+ if line:
216
+ # Clean up common prefixes and formatting
217
+ line = line.lstrip("•-*1234567890.").strip()
218
+ if line and len(line) > 2: # Skip very short answers
219
+ answers.append(line)
220
+
221
+ return answers[:5] # Limit to 5 candidates max
222
+
223
+ except Exception as e:
224
+ logger.error(f"Error generating answer candidates: {e}")
225
+ return []
226
+
227
+ def _extract_entity_names(
228
+ self, text: str, entity_type: Optional[str] = None
229
+ ) -> List[str]:
230
+ """Extract entity names from text using LLM."""
231
+ if not text.strip():
232
+ return []
233
+
234
+ prompt = f"""
235
+ Extract specific entity names from this text.
236
+ {"Focus on: " + entity_type if entity_type else "Extract any named entities."}
237
+
238
+ Text: {text[:500]}
239
+
240
+ Return only the names, one per line. Be selective - only include clear, specific names.
241
+ Do not include:
242
+ - Generic terms or categories
243
+ - Adjectives or descriptions
244
+ - Common words
245
+
246
+ Names:
247
+ """
248
+
249
+ try:
250
+ response = self.model.invoke(prompt).content.strip()
251
+
252
+ # Parse response into names
253
+ names = []
254
+ for line in response.split("\n"):
255
+ name = line.strip()
256
+ if (
257
+ name
258
+ and len(name) > 2
259
+ and not name.lower().startswith(("the ", "a ", "an "))
260
+ ):
261
+ names.append(name)
262
+
263
+ return names[:5] # Limit to top 5 per text
264
+
265
+ except Exception as e:
266
+ logger.error(f"Error extracting entity names: {e}")
267
+ return []
268
+
269
+ def _should_continue_exploration(
270
+ self, start_time: float, candidates_found: int
271
+ ) -> bool:
272
+ """Determine if exploration should continue."""
273
+ import time
274
+
275
+ elapsed = time.time() - start_time
276
+
277
+ # Stop if time limit reached
278
+ if elapsed > self.max_search_time:
279
+ logger.info(f"Time limit reached ({elapsed:.1f}s)")
280
+ return False
281
+
282
+ # Stop if candidate limit reached
283
+ if candidates_found >= self.max_candidates:
284
+ logger.info(f"Candidate limit reached ({candidates_found})")
285
+ return False
286
+
287
+ return True
288
+
289
+ def _deduplicate_candidates(
290
+ self, candidates: List[Candidate]
291
+ ) -> List[Candidate]:
292
+ """Remove duplicate candidates based on name similarity."""
293
+ unique_candidates = []
294
+ seen_names = set()
295
+
296
+ for candidate in candidates:
297
+ # Simple deduplication by exact name match
298
+ name_key = candidate.name.lower().strip()
299
+ if name_key not in seen_names:
300
+ seen_names.add(name_key)
301
+ unique_candidates.append(candidate)
302
+
303
+ return unique_candidates
304
+
305
+ def _rank_candidates_by_relevance(
306
+ self, candidates: List[Candidate], query: str
307
+ ) -> List[Candidate]:
308
+ """Rank candidates by relevance to original query."""
309
+ if not candidates:
310
+ return candidates
311
+
312
+ # Simple relevance scoring based on metadata
313
+ for candidate in candidates:
314
+ score = 0.0
315
+
316
+ # Score based on source query similarity
317
+ if "query" in candidate.metadata:
318
+ # Simple word overlap scoring
319
+ query_words = set(query.lower().split())
320
+ candidate_query_words = set(
321
+ candidate.metadata["query"].lower().split()
322
+ )
323
+ overlap = len(query_words.intersection(candidate_query_words))
324
+ score += overlap * 0.1
325
+
326
+ # Score based on result title relevance
327
+ if "result_title" in candidate.metadata:
328
+ title_words = set(
329
+ candidate.metadata["result_title"].lower().split()
330
+ )
331
+ overlap = len(query_words.intersection(title_words))
332
+ score += overlap * 0.2
333
+
334
+ candidate.relevance_score = score
335
+
336
+ # Sort by relevance
337
+ return sorted(
338
+ candidates,
339
+ key=lambda c: getattr(c, "relevance_score", 0.0),
340
+ reverse=True,
341
+ )